xref: /openbsd-src/gnu/usr.bin/perl/regexec.c (revision 5054e3e78af0749a9bb00ba9a024b3ee2d90290f)
1 /*    regexec.c
2  */
3 
4 /*
5  * 	One Ring to rule them all, One Ring to find them
6  &
7  *     [p.v of _The Lord of the Rings_, opening poem]
8  *     [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
9  *     [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
10  */
11 
12 /* This file contains functions for executing a regular expression.  See
13  * also regcomp.c which funnily enough, contains functions for compiling
14  * a regular expression.
15  *
16  * This file is also copied at build time to ext/re/re_exec.c, where
17  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
18  * This causes the main functions to be compiled under new names and with
19  * debugging support added, which makes "use re 'debug'" work.
20  */
21 
22 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
23  * confused with the original package (see point 3 below).  Thanks, Henry!
24  */
25 
26 /* Additional note: this code is very heavily munged from Henry's version
27  * in places.  In some spots I've traded clarity for efficiency, so don't
28  * blame Henry for some of the lack of readability.
29  */
30 
31 /* The names of the functions have been changed from regcomp and
32  * regexec to  pregcomp and pregexec in order to avoid conflicts
33  * with the POSIX routines of the same names.
34 */
35 
36 #ifdef PERL_EXT_RE_BUILD
37 #include "re_top.h"
38 #endif
39 
40 /*
41  * pregcomp and pregexec -- regsub and regerror are not used in perl
42  *
43  *	Copyright (c) 1986 by University of Toronto.
44  *	Written by Henry Spencer.  Not derived from licensed software.
45  *
46  *	Permission is granted to anyone to use this software for any
47  *	purpose on any computer system, and to redistribute it freely,
48  *	subject to the following restrictions:
49  *
50  *	1. The author is not responsible for the consequences of use of
51  *		this software, no matter how awful, even if they arise
52  *		from defects in it.
53  *
54  *	2. The origin of this software must not be misrepresented, either
55  *		by explicit claim or by omission.
56  *
57  *	3. Altered versions must be plainly marked as such, and must not
58  *		be misrepresented as being the original software.
59  *
60  ****    Alterations to Henry's code are...
61  ****
62  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
63  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
64  ****    by Larry Wall and others
65  ****
66  ****    You may distribute under the terms of either the GNU General Public
67  ****    License or the Artistic License, as specified in the README file.
68  *
69  * Beware that some of this code is subtly aware of the way operator
70  * precedence is structured in regular expressions.  Serious changes in
71  * regular-expression syntax might require a total rethink.
72  */
73 #include "EXTERN.h"
74 #define PERL_IN_REGEXEC_C
75 #include "perl.h"
76 
77 #ifdef PERL_IN_XSUB_RE
78 #  include "re_comp.h"
79 #else
80 #  include "regcomp.h"
81 #endif
82 
83 #define RF_tainted	1		/* tainted information used? */
84 #define RF_warned	2		/* warned about big count? */
85 
86 #define RF_utf8		8		/* Pattern contains multibyte chars? */
87 
88 #define UTF ((PL_reg_flags & RF_utf8) != 0)
89 
90 #define RS_init		1		/* eval environment created */
91 #define RS_set		2		/* replsv value is set */
92 
93 #ifndef STATIC
94 #define	STATIC	static
95 #endif
96 
97 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
98 
99 /*
100  * Forwards.
101  */
102 
103 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
104 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
105 
106 #define HOPc(pos,off) \
107 	(char *)(PL_reg_match_utf8 \
108 	    ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
109 	    : (U8*)(pos + off))
110 #define HOPBACKc(pos, off) \
111 	(char*)(PL_reg_match_utf8\
112 	    ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
113 	    : (pos - off >= PL_bostr)		\
114 		? (U8*)pos - off		\
115 		: NULL)
116 
117 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
118 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
119 
120 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
121     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
122 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
123 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
124 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
125 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
126 
127 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
128 
129 /* for use after a quantifier and before an EXACT-like node -- japhy */
130 /* it would be nice to rework regcomp.sym to generate this stuff. sigh */
131 #define JUMPABLE(rn) (      \
132     OP(rn) == OPEN ||       \
133     (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
134     OP(rn) == EVAL ||   \
135     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
136     OP(rn) == PLUS || OP(rn) == MINMOD || \
137     OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
138     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
139 )
140 #define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
141 
142 #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
143 
144 #if 0
145 /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
146    we don't need this definition. */
147 #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
148 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
149 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
150 
151 #else
152 /* ... so we use this as its faster. */
153 #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
154 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
155 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
156 
157 #endif
158 
159 /*
160   Search for mandatory following text node; for lookahead, the text must
161   follow but for lookbehind (rn->flags != 0) we skip to the next step.
162 */
163 #define FIND_NEXT_IMPT(rn) STMT_START { \
164     while (JUMPABLE(rn)) { \
165 	const OPCODE type = OP(rn); \
166 	if (type == SUSPEND || PL_regkind[type] == CURLY) \
167 	    rn = NEXTOPER(NEXTOPER(rn)); \
168 	else if (type == PLUS) \
169 	    rn = NEXTOPER(rn); \
170 	else if (type == IFMATCH) \
171 	    rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
172 	else rn += NEXT_OFF(rn); \
173     } \
174 } STMT_END
175 
176 
177 static void restore_pos(pTHX_ void *arg);
178 
179 STATIC CHECKPOINT
180 S_regcppush(pTHX_ I32 parenfloor)
181 {
182     dVAR;
183     const int retval = PL_savestack_ix;
184 #define REGCP_PAREN_ELEMS 4
185     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
186     int p;
187     GET_RE_DEBUG_FLAGS_DECL;
188 
189     if (paren_elems_to_push < 0)
190 	Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
191 
192 #define REGCP_OTHER_ELEMS 7
193     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
194 
195     for (p = PL_regsize; p > parenfloor; p--) {
196 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
197 	SSPUSHINT(PL_regoffs[p].end);
198 	SSPUSHINT(PL_regoffs[p].start);
199 	SSPUSHPTR(PL_reg_start_tmp[p]);
200 	SSPUSHINT(p);
201 	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
202 	  "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
203 		      (UV)p, (IV)PL_regoffs[p].start,
204 		      (IV)(PL_reg_start_tmp[p] - PL_bostr),
205 		      (IV)PL_regoffs[p].end
206 	));
207     }
208 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
209     SSPUSHPTR(PL_regoffs);
210     SSPUSHINT(PL_regsize);
211     SSPUSHINT(*PL_reglastparen);
212     SSPUSHINT(*PL_reglastcloseparen);
213     SSPUSHPTR(PL_reginput);
214 #define REGCP_FRAME_ELEMS 2
215 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
216  * are needed for the regexp context stack bookkeeping. */
217     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
218     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
219 
220     return retval;
221 }
222 
223 /* These are needed since we do not localize EVAL nodes: */
224 #define REGCP_SET(cp)                                           \
225     DEBUG_STATE_r(                                              \
226             PerlIO_printf(Perl_debug_log,		        \
227 	        "  Setting an EVAL scope, savestack=%"IVdf"\n",	\
228 	        (IV)PL_savestack_ix));                          \
229     cp = PL_savestack_ix
230 
231 #define REGCP_UNWIND(cp)                                        \
232     DEBUG_STATE_r(                                              \
233         if (cp != PL_savestack_ix) 		                \
234     	    PerlIO_printf(Perl_debug_log,		        \
235 		"  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
236 	        (IV)(cp), (IV)PL_savestack_ix));                \
237     regcpblow(cp)
238 
239 STATIC char *
240 S_regcppop(pTHX_ const regexp *rex)
241 {
242     dVAR;
243     U32 i;
244     char *input;
245     GET_RE_DEBUG_FLAGS_DECL;
246 
247     PERL_ARGS_ASSERT_REGCPPOP;
248 
249     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
250     i = SSPOPINT;
251     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
252     i = SSPOPINT; /* Parentheses elements to pop. */
253     input = (char *) SSPOPPTR;
254     *PL_reglastcloseparen = SSPOPINT;
255     *PL_reglastparen = SSPOPINT;
256     PL_regsize = SSPOPINT;
257     PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
258 
259 
260     /* Now restore the parentheses context. */
261     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
262 	 i > 0; i -= REGCP_PAREN_ELEMS) {
263 	I32 tmps;
264 	U32 paren = (U32)SSPOPINT;
265 	PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
266 	PL_regoffs[paren].start = SSPOPINT;
267 	tmps = SSPOPINT;
268 	if (paren <= *PL_reglastparen)
269 	    PL_regoffs[paren].end = tmps;
270 	DEBUG_BUFFERS_r(
271 	    PerlIO_printf(Perl_debug_log,
272 			  "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
273 			  (UV)paren, (IV)PL_regoffs[paren].start,
274 			  (IV)(PL_reg_start_tmp[paren] - PL_bostr),
275 			  (IV)PL_regoffs[paren].end,
276 			  (paren > *PL_reglastparen ? "(no)" : ""));
277 	);
278     }
279     DEBUG_BUFFERS_r(
280 	if (*PL_reglastparen + 1 <= rex->nparens) {
281 	    PerlIO_printf(Perl_debug_log,
282 			  "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
283 			  (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
284 	}
285     );
286 #if 1
287     /* It would seem that the similar code in regtry()
288      * already takes care of this, and in fact it is in
289      * a better location to since this code can #if 0-ed out
290      * but the code in regtry() is needed or otherwise tests
291      * requiring null fields (pat.t#187 and split.t#{13,14}
292      * (as of patchlevel 7877)  will fail.  Then again,
293      * this code seems to be necessary or otherwise
294      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
295      * --jhi updated by dapm */
296     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
297 	if (i > PL_regsize)
298 	    PL_regoffs[i].start = -1;
299 	PL_regoffs[i].end = -1;
300     }
301 #endif
302     return input;
303 }
304 
305 #define regcpblow(cp) LEAVE_SCOPE(cp)	/* Ignores regcppush()ed data. */
306 
307 /*
308  * pregexec and friends
309  */
310 
311 #ifndef PERL_IN_XSUB_RE
312 /*
313  - pregexec - match a regexp against a string
314  */
315 I32
316 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
317 	 char *strbeg, I32 minend, SV *screamer, U32 nosave)
318 /* strend: pointer to null at end of string */
319 /* strbeg: real beginning of string */
320 /* minend: end of match must be >=minend after stringarg. */
321 /* nosave: For optimizations. */
322 {
323     PERL_ARGS_ASSERT_PREGEXEC;
324 
325     return
326 	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
327 		      nosave ? 0 : REXEC_COPY_STR);
328 }
329 #endif
330 
331 /*
332  * Need to implement the following flags for reg_anch:
333  *
334  * USE_INTUIT_NOML		- Useful to call re_intuit_start() first
335  * USE_INTUIT_ML
336  * INTUIT_AUTORITATIVE_NOML	- Can trust a positive answer
337  * INTUIT_AUTORITATIVE_ML
338  * INTUIT_ONCE_NOML		- Intuit can match in one location only.
339  * INTUIT_ONCE_ML
340  *
341  * Another flag for this function: SECOND_TIME (so that float substrs
342  * with giant delta may be not rechecked).
343  */
344 
345 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
346 
347 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
348    Otherwise, only SvCUR(sv) is used to get strbeg. */
349 
350 /* XXXX We assume that strpos is strbeg unless sv. */
351 
352 /* XXXX Some places assume that there is a fixed substring.
353 	An update may be needed if optimizer marks as "INTUITable"
354 	RExen without fixed substrings.  Similarly, it is assumed that
355 	lengths of all the strings are no more than minlen, thus they
356 	cannot come from lookahead.
357 	(Or minlen should take into account lookahead.)
358   NOTE: Some of this comment is not correct. minlen does now take account
359   of lookahead/behind. Further research is required. -- demerphq
360 
361 */
362 
363 /* A failure to find a constant substring means that there is no need to make
364    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
365    finding a substring too deep into the string means that less calls to
366    regtry() should be needed.
367 
368    REx compiler's optimizer found 4 possible hints:
369 	a) Anchored substring;
370 	b) Fixed substring;
371 	c) Whether we are anchored (beginning-of-line or \G);
372 	d) First node (of those at offset 0) which may distingush positions;
373    We use a)b)d) and multiline-part of c), and try to find a position in the
374    string which does not contradict any of them.
375  */
376 
377 /* Most of decisions we do here should have been done at compile time.
378    The nodes of the REx which we used for the search should have been
379    deleted from the finite automaton. */
380 
381 char *
382 Perl_re_intuit_start(pTHX_ REGEXP * const prog, SV *sv, char *strpos,
383 		     char *strend, const U32 flags, re_scream_pos_data *data)
384 {
385     dVAR;
386     register I32 start_shift = 0;
387     /* Should be nonnegative! */
388     register I32 end_shift   = 0;
389     register char *s;
390     register SV *check;
391     char *strbeg;
392     char *t;
393     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
394     I32 ml_anch;
395     register char *other_last = NULL;	/* other substr checked before this */
396     char *check_at = NULL;		/* check substr found at this pos */
397     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
398     RXi_GET_DECL(prog,progi);
399 #ifdef DEBUGGING
400     const char * const i_strpos = strpos;
401 #endif
402     GET_RE_DEBUG_FLAGS_DECL;
403 
404     PERL_ARGS_ASSERT_RE_INTUIT_START;
405 
406     RX_MATCH_UTF8_set(prog,do_utf8);
407 
408     if (RX_UTF8(prog)) {
409 	PL_reg_flags |= RF_utf8;
410     }
411     DEBUG_EXECUTE_r(
412         debug_start_match(prog, do_utf8, strpos, strend,
413             sv ? "Guessing start of match in sv for"
414                : "Guessing start of match in string for");
415 	      );
416 
417     /* CHR_DIST() would be more correct here but it makes things slow. */
418     if (prog->minlen > strend - strpos) {
419 	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
420 			      "String too short... [re_intuit_start]\n"));
421 	goto fail;
422     }
423 
424     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
425     PL_regeol = strend;
426     if (do_utf8) {
427 	if (!prog->check_utf8 && prog->check_substr)
428 	    to_utf8_substr(prog);
429 	check = prog->check_utf8;
430     } else {
431 	if (!prog->check_substr && prog->check_utf8)
432 	    to_byte_substr(prog);
433 	check = prog->check_substr;
434     }
435     if (check == &PL_sv_undef) {
436 	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
437 		"Non-utf8 string cannot match utf8 check string\n"));
438 	goto fail;
439     }
440     if (prog->extflags & RXf_ANCH) {	/* Match at beg-of-str or after \n */
441 	ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
442 		     || ( (prog->extflags & RXf_ANCH_BOL)
443 			  && !multiline ) );	/* Check after \n? */
444 
445 	if (!ml_anch) {
446 	  if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
447 		&& !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
448 	       /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
449 	       && sv && !SvROK(sv)
450 	       && (strpos != strbeg)) {
451 	      DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
452 	      goto fail;
453 	  }
454 	  if (prog->check_offset_min == prog->check_offset_max &&
455 	      !(prog->extflags & RXf_CANY_SEEN)) {
456 	    /* Substring at constant offset from beg-of-str... */
457 	    I32 slen;
458 
459 	    s = HOP3c(strpos, prog->check_offset_min, strend);
460 
461 	    if (SvTAIL(check)) {
462 		slen = SvCUR(check);	/* >= 1 */
463 
464 		if ( strend - s > slen || strend - s < slen - 1
465 		     || (strend - s == slen && strend[-1] != '\n')) {
466 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
467 		    goto fail_finish;
468 		}
469 		/* Now should match s[0..slen-2] */
470 		slen--;
471 		if (slen && (*SvPVX_const(check) != *s
472 			     || (slen > 1
473 				 && memNE(SvPVX_const(check), s, slen)))) {
474 		  report_neq:
475 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
476 		    goto fail_finish;
477 		}
478 	    }
479 	    else if (*SvPVX_const(check) != *s
480 		     || ((slen = SvCUR(check)) > 1
481 			 && memNE(SvPVX_const(check), s, slen)))
482 		goto report_neq;
483 	    check_at = s;
484 	    goto success_at_start;
485 	  }
486 	}
487 	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
488 	s = strpos;
489 	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
490 	end_shift = prog->check_end_shift;
491 
492 	if (!ml_anch) {
493 	    const I32 end = prog->check_offset_max + CHR_SVLEN(check)
494 					 - (SvTAIL(check) != 0);
495 	    const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
496 
497 	    if (end_shift < eshift)
498 		end_shift = eshift;
499 	}
500     }
501     else {				/* Can match at random position */
502 	ml_anch = 0;
503 	s = strpos;
504 	start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
505 	end_shift = prog->check_end_shift;
506 
507 	/* end shift should be non negative here */
508     }
509 
510 #ifdef QDEBUGGING	/* 7/99: reports of failure (with the older version) */
511     if (end_shift < 0)
512 	Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
513 		   (IV)end_shift, RX_PRECOMP(prog));
514 #endif
515 
516   restart:
517     /* Find a possible match in the region s..strend by looking for
518        the "check" substring in the region corrected by start/end_shift. */
519 
520     {
521         I32 srch_start_shift = start_shift;
522         I32 srch_end_shift = end_shift;
523         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
524 	    srch_end_shift -= ((strbeg - s) - srch_start_shift);
525 	    srch_start_shift = strbeg - s;
526 	}
527     DEBUG_OPTIMISE_MORE_r({
528         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
529             (IV)prog->check_offset_min,
530             (IV)srch_start_shift,
531             (IV)srch_end_shift,
532             (IV)prog->check_end_shift);
533     });
534 
535     if (flags & REXEC_SCREAM) {
536 	I32 p = -1;			/* Internal iterator of scream. */
537 	I32 * const pp = data ? data->scream_pos : &p;
538 
539 	if (PL_screamfirst[BmRARE(check)] >= 0
540 	    || ( BmRARE(check) == '\n'
541 		 && (BmPREVIOUS(check) == SvCUR(check) - 1)
542 		 && SvTAIL(check) ))
543 	    s = screaminstr(sv, check,
544 			    srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
545 	else
546 	    goto fail_finish;
547 	/* we may be pointing at the wrong string */
548 	if (s && RXp_MATCH_COPIED(prog))
549 	    s = strbeg + (s - SvPVX_const(sv));
550 	if (data)
551 	    *data->scream_olds = s;
552     }
553     else {
554         U8* start_point;
555         U8* end_point;
556         if (prog->extflags & RXf_CANY_SEEN) {
557             start_point= (U8*)(s + srch_start_shift);
558             end_point= (U8*)(strend - srch_end_shift);
559         } else {
560 	    start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
561             end_point= HOP3(strend, -srch_end_shift, strbeg);
562 	}
563 	DEBUG_OPTIMISE_MORE_r({
564             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
565                 (int)(end_point - start_point),
566                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
567                 start_point);
568         });
569 
570 	s = fbm_instr( start_point, end_point,
571 		      check, multiline ? FBMrf_MULTILINE : 0);
572     }
573     }
574     /* Update the count-of-usability, remove useless subpatterns,
575 	unshift s.  */
576 
577     DEBUG_EXECUTE_r({
578         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
579             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
580         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
581 			  (s ? "Found" : "Did not find"),
582 	    (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
583 	        ? "anchored" : "floating"),
584 	    quoted,
585 	    RE_SV_TAIL(check),
586 	    (s ? " at offset " : "...\n") );
587     });
588 
589     if (!s)
590 	goto fail_finish;
591     /* Finish the diagnostic message */
592     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
593 
594     /* XXX dmq: first branch is for positive lookbehind...
595        Our check string is offset from the beginning of the pattern.
596        So we need to do any stclass tests offset forward from that
597        point. I think. :-(
598      */
599 
600 
601 
602     check_at=s;
603 
604 
605     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
606        Start with the other substr.
607        XXXX no SCREAM optimization yet - and a very coarse implementation
608        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
609 		*always* match.  Probably should be marked during compile...
610        Probably it is right to do no SCREAM here...
611      */
612 
613     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
614                 : (prog->float_substr && prog->anchored_substr))
615     {
616 	/* Take into account the "other" substring. */
617 	/* XXXX May be hopelessly wrong for UTF... */
618 	if (!other_last)
619 	    other_last = strpos;
620 	if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
621 	  do_other_anchored:
622 	    {
623 		char * const last = HOP3c(s, -start_shift, strbeg);
624 		char *last1, *last2;
625 		char * const saved_s = s;
626 		SV* must;
627 
628 		t = s - prog->check_offset_max;
629 		if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
630 		    && (!do_utf8
631 			|| ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
632 			    && t > strpos)))
633 		    NOOP;
634 		else
635 		    t = strpos;
636 		t = HOP3c(t, prog->anchored_offset, strend);
637 		if (t < other_last)	/* These positions already checked */
638 		    t = other_last;
639 		last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
640 		if (last < last1)
641 		    last1 = last;
642                 /* XXXX It is not documented what units *_offsets are in.
643                    We assume bytes, but this is clearly wrong.
644                    Meaning this code needs to be carefully reviewed for errors.
645                    dmq.
646                   */
647 
648 		/* On end-of-str: see comment below. */
649 		must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
650 		if (must == &PL_sv_undef) {
651 		    s = (char*)NULL;
652 		    DEBUG_r(must = prog->anchored_utf8);	/* for debug */
653 		}
654 		else
655 		    s = fbm_instr(
656 			(unsigned char*)t,
657 			HOP3(HOP3(last1, prog->anchored_offset, strend)
658 				+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
659 			must,
660 			multiline ? FBMrf_MULTILINE : 0
661 		    );
662                 DEBUG_EXECUTE_r({
663                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
664                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
665                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
666 			(s ? "Found" : "Contradicts"),
667                         quoted, RE_SV_TAIL(must));
668                 });
669 
670 
671 		if (!s) {
672 		    if (last1 >= last2) {
673 			DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
674 						", giving up...\n"));
675 			goto fail_finish;
676 		    }
677 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
678 			", trying floating at offset %ld...\n",
679 			(long)(HOP3c(saved_s, 1, strend) - i_strpos)));
680 		    other_last = HOP3c(last1, prog->anchored_offset+1, strend);
681 		    s = HOP3c(last, 1, strend);
682 		    goto restart;
683 		}
684 		else {
685 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
686 			  (long)(s - i_strpos)));
687 		    t = HOP3c(s, -prog->anchored_offset, strbeg);
688 		    other_last = HOP3c(s, 1, strend);
689 		    s = saved_s;
690 		    if (t == strpos)
691 			goto try_at_start;
692 		    goto try_at_offset;
693 		}
694 	    }
695 	}
696 	else {		/* Take into account the floating substring. */
697 	    char *last, *last1;
698 	    char * const saved_s = s;
699 	    SV* must;
700 
701 	    t = HOP3c(s, -start_shift, strbeg);
702 	    last1 = last =
703 		HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
704 	    if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
705 		last = HOP3c(t, prog->float_max_offset, strend);
706 	    s = HOP3c(t, prog->float_min_offset, strend);
707 	    if (s < other_last)
708 		s = other_last;
709  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
710 	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
711 	    /* fbm_instr() takes into account exact value of end-of-str
712 	       if the check is SvTAIL(ed).  Since false positives are OK,
713 	       and end-of-str is not later than strend we are OK. */
714 	    if (must == &PL_sv_undef) {
715 		s = (char*)NULL;
716 		DEBUG_r(must = prog->float_utf8);	/* for debug message */
717 	    }
718 	    else
719 		s = fbm_instr((unsigned char*)s,
720 			      (unsigned char*)last + SvCUR(must)
721 				  - (SvTAIL(must)!=0),
722 			      must, multiline ? FBMrf_MULTILINE : 0);
723 	    DEBUG_EXECUTE_r({
724 	        RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
725 	            SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
726 	        PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
727 		    (s ? "Found" : "Contradicts"),
728 		    quoted, RE_SV_TAIL(must));
729             });
730 	    if (!s) {
731 		if (last1 == last) {
732 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
733 					    ", giving up...\n"));
734 		    goto fail_finish;
735 		}
736 		DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
737 		    ", trying anchored starting at offset %ld...\n",
738 		    (long)(saved_s + 1 - i_strpos)));
739 		other_last = last;
740 		s = HOP3c(t, 1, strend);
741 		goto restart;
742 	    }
743 	    else {
744 		DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
745 		      (long)(s - i_strpos)));
746 		other_last = s; /* Fix this later. --Hugo */
747 		s = saved_s;
748 		if (t == strpos)
749 		    goto try_at_start;
750 		goto try_at_offset;
751 	    }
752 	}
753     }
754 
755 
756     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
757 
758     DEBUG_OPTIMISE_MORE_r(
759         PerlIO_printf(Perl_debug_log,
760             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
761             (IV)prog->check_offset_min,
762             (IV)prog->check_offset_max,
763             (IV)(s-strpos),
764             (IV)(t-strpos),
765             (IV)(t-s),
766             (IV)(strend-strpos)
767         )
768     );
769 
770     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
771         && (!do_utf8
772 	    || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
773 		 && t > strpos)))
774     {
775 	/* Fixed substring is found far enough so that the match
776 	   cannot start at strpos. */
777       try_at_offset:
778 	if (ml_anch && t[-1] != '\n') {
779 	    /* Eventually fbm_*() should handle this, but often
780 	       anchored_offset is not 0, so this check will not be wasted. */
781 	    /* XXXX In the code below we prefer to look for "^" even in
782 	       presence of anchored substrings.  And we search even
783 	       beyond the found float position.  These pessimizations
784 	       are historical artefacts only.  */
785 	  find_anchor:
786 	    while (t < strend - prog->minlen) {
787 		if (*t == '\n') {
788 		    if (t < check_at - prog->check_offset_min) {
789 			if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
790 			    /* Since we moved from the found position,
791 			       we definitely contradict the found anchored
792 			       substr.  Due to the above check we do not
793 			       contradict "check" substr.
794 			       Thus we can arrive here only if check substr
795 			       is float.  Redo checking for "other"=="fixed".
796 			     */
797 			    strpos = t + 1;
798 			    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
799 				PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
800 			    goto do_other_anchored;
801 			}
802 			/* We don't contradict the found floating substring. */
803 			/* XXXX Why not check for STCLASS? */
804 			s = t + 1;
805 			DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
806 			    PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
807 			goto set_useful;
808 		    }
809 		    /* Position contradicts check-string */
810 		    /* XXXX probably better to look for check-string
811 		       than for "\n", so one should lower the limit for t? */
812 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
813 			PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
814 		    other_last = strpos = s = t + 1;
815 		    goto restart;
816 		}
817 		t++;
818 	    }
819 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
820 			PL_colors[0], PL_colors[1]));
821 	    goto fail_finish;
822 	}
823 	else {
824 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
825 			PL_colors[0], PL_colors[1]));
826 	}
827 	s = t;
828       set_useful:
829 	++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);	/* hooray/5 */
830     }
831     else {
832 	/* The found string does not prohibit matching at strpos,
833 	   - no optimization of calling REx engine can be performed,
834 	   unless it was an MBOL and we are not after MBOL,
835 	   or a future STCLASS check will fail this. */
836       try_at_start:
837 	/* Even in this situation we may use MBOL flag if strpos is offset
838 	   wrt the start of the string. */
839 	if (ml_anch && sv && !SvROK(sv)	/* See prev comment on SvROK */
840 	    && (strpos != strbeg) && strpos[-1] != '\n'
841 	    /* May be due to an implicit anchor of m{.*foo}  */
842 	    && !(prog->intflags & PREGf_IMPLICIT))
843 	{
844 	    t = strpos;
845 	    goto find_anchor;
846 	}
847 	DEBUG_EXECUTE_r( if (ml_anch)
848 	    PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
849 			  (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
850 	);
851       success_at_start:
852 	if (!(prog->intflags & PREGf_NAUGHTY)	/* XXXX If strpos moved? */
853 	    && (do_utf8 ? (
854 		prog->check_utf8		/* Could be deleted already */
855 		&& --BmUSEFUL(prog->check_utf8) < 0
856 		&& (prog->check_utf8 == prog->float_utf8)
857 	    ) : (
858 		prog->check_substr		/* Could be deleted already */
859 		&& --BmUSEFUL(prog->check_substr) < 0
860 		&& (prog->check_substr == prog->float_substr)
861 	    )))
862 	{
863 	    /* If flags & SOMETHING - do not do it many times on the same match */
864 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
865 	    SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
866 	    if (do_utf8 ? prog->check_substr : prog->check_utf8)
867 		SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
868 	    prog->check_substr = prog->check_utf8 = NULL;	/* disable */
869 	    prog->float_substr = prog->float_utf8 = NULL;	/* clear */
870 	    check = NULL;			/* abort */
871 	    s = strpos;
872 	    /* XXXX This is a remnant of the old implementation.  It
873 	            looks wasteful, since now INTUIT can use many
874 	            other heuristics. */
875 	    prog->extflags &= ~RXf_USE_INTUIT;
876 	}
877 	else
878 	    s = strpos;
879     }
880 
881     /* Last resort... */
882     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
883     /* trie stclasses are too expensive to use here, we are better off to
884        leave it to regmatch itself */
885     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
886 	/* minlen == 0 is possible if regstclass is \b or \B,
887 	   and the fixed substr is ''$.
888 	   Since minlen is already taken into account, s+1 is before strend;
889 	   accidentally, minlen >= 1 guaranties no false positives at s + 1
890 	   even for \b or \B.  But (minlen? 1 : 0) below assumes that
891 	   regstclass does not come from lookahead...  */
892 	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
893 	   This leaves EXACTF only, which is dealt with in find_byclass().  */
894         const U8* const str = (U8*)STRING(progi->regstclass);
895         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
896 		    ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
897 		    : 1);
898 	char * endpos;
899 	if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
900             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
901         else if (prog->float_substr || prog->float_utf8)
902 	    endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
903         else
904             endpos= strend;
905 
906         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
907 				      (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
908 
909 	t = s;
910         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
911 	if (!s) {
912 #ifdef DEBUGGING
913 	    const char *what = NULL;
914 #endif
915 	    if (endpos == strend) {
916 		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
917 				"Could not match STCLASS...\n") );
918 		goto fail;
919 	    }
920 	    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
921 				   "This position contradicts STCLASS...\n") );
922 	    if ((prog->extflags & RXf_ANCH) && !ml_anch)
923 		goto fail;
924 	    /* Contradict one of substrings */
925 	    if (prog->anchored_substr || prog->anchored_utf8) {
926 		if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
927 		    DEBUG_EXECUTE_r( what = "anchored" );
928 		  hop_and_restart:
929 		    s = HOP3c(t, 1, strend);
930 		    if (s + start_shift + end_shift > strend) {
931 			/* XXXX Should be taken into account earlier? */
932 			DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
933 					       "Could not match STCLASS...\n") );
934 			goto fail;
935 		    }
936 		    if (!check)
937 			goto giveup;
938 		    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
939 				"Looking for %s substr starting at offset %ld...\n",
940 				 what, (long)(s + start_shift - i_strpos)) );
941 		    goto restart;
942 		}
943 		/* Have both, check_string is floating */
944 		if (t + start_shift >= check_at) /* Contradicts floating=check */
945 		    goto retry_floating_check;
946 		/* Recheck anchored substring, but not floating... */
947 		s = check_at;
948 		if (!check)
949 		    goto giveup;
950 		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
951 			  "Looking for anchored substr starting at offset %ld...\n",
952 			  (long)(other_last - i_strpos)) );
953 		goto do_other_anchored;
954 	    }
955 	    /* Another way we could have checked stclass at the
956                current position only: */
957 	    if (ml_anch) {
958 		s = t = t + 1;
959 		if (!check)
960 		    goto giveup;
961 		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
962 			  "Looking for /%s^%s/m starting at offset %ld...\n",
963 			  PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
964 		goto try_at_offset;
965 	    }
966 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))	/* Could have been deleted */
967 		goto fail;
968 	    /* Check is floating subtring. */
969 	  retry_floating_check:
970 	    t = check_at - start_shift;
971 	    DEBUG_EXECUTE_r( what = "floating" );
972 	    goto hop_and_restart;
973 	}
974 	if (t != s) {
975             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
976 			"By STCLASS: moving %ld --> %ld\n",
977                                   (long)(t - i_strpos), (long)(s - i_strpos))
978                    );
979         }
980         else {
981             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
982                                   "Does not contradict STCLASS...\n");
983                    );
984         }
985     }
986   giveup:
987     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
988 			  PL_colors[4], (check ? "Guessed" : "Giving up"),
989 			  PL_colors[5], (long)(s - i_strpos)) );
990     return s;
991 
992   fail_finish:				/* Substring not found */
993     if (prog->check_substr || prog->check_utf8)		/* could be removed already */
994 	BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
995   fail:
996     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
997 			  PL_colors[4], PL_colors[5]));
998     return NULL;
999 }
1000 
1001 #define DECL_TRIE_TYPE(scan) \
1002     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
1003 		    trie_type = (scan->flags != EXACT) \
1004 		              ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
1005                               : (do_utf8 ? trie_utf8 : trie_plain)
1006 
1007 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
1008 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
1009     switch (trie_type) {                                                    \
1010     case trie_utf8_fold:                                                    \
1011 	if ( foldlen>0 ) {                                                  \
1012 	    uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
1013 	    foldlen -= len;                                                 \
1014 	    uscan += len;                                                   \
1015 	    len=0;                                                          \
1016 	} else {                                                            \
1017 	    uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
1018 	    uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1019 	    foldlen -= UNISKIP( uvc );                                      \
1020 	    uscan = foldbuf + UNISKIP( uvc );                               \
1021 	}                                                                   \
1022 	break;                                                              \
1023     case trie_latin_utf8_fold:                                              \
1024 	if ( foldlen>0 ) {                                                  \
1025 	    uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1026 	    foldlen -= len;                                                 \
1027 	    uscan += len;                                                   \
1028 	    len=0;                                                          \
1029 	} else {                                                            \
1030 	    len = 1;                                                        \
1031 	    uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen );               \
1032 	    foldlen -= UNISKIP( uvc );                                      \
1033 	    uscan = foldbuf + UNISKIP( uvc );                               \
1034 	}                                                                   \
1035 	break;                                                              \
1036     case trie_utf8:                                                         \
1037 	uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1038 	break;                                                              \
1039     case trie_plain:                                                        \
1040 	uvc = (UV)*uc;                                                      \
1041 	len = 1;                                                            \
1042     }                                                                       \
1043     if (uvc < 256) {                                                        \
1044 	charid = trie->charmap[ uvc ];                                      \
1045     }                                                                       \
1046     else {                                                                  \
1047 	charid = 0;                                                         \
1048 	if (widecharmap) {                                                  \
1049 	    SV** const svpp = hv_fetch(widecharmap,                         \
1050 			(char*)&uvc, sizeof(UV), 0);                        \
1051 	    if (svpp)                                                       \
1052 		charid = (U16)SvIV(*svpp);                                  \
1053 	}                                                                   \
1054     }                                                                       \
1055 } STMT_END
1056 
1057 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
1058 {                                                      \
1059     char *my_strend= (char *)strend;                   \
1060     if ( (CoNd)                                        \
1061 	 && (ln == len ||                              \
1062 	     !ibcmp_utf8(s, &my_strend, 0,  do_utf8,   \
1063 			m, NULL, ln, (bool)UTF))       \
1064 	 && (!reginfo || regtry(reginfo, &s)) )        \
1065 	goto got_it;                                   \
1066     else {                                             \
1067 	 U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1068 	 uvchr_to_utf8(tmpbuf, c);                     \
1069 	 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1070 	 if ( f != c                                   \
1071 	      && (f == c1 || f == c2)                  \
1072 	      && (ln == len ||                         \
1073 	        !ibcmp_utf8(s, &my_strend, 0,  do_utf8,\
1074 			      m, NULL, ln, (bool)UTF)) \
1075 	      && (!reginfo || regtry(reginfo, &s)) )   \
1076 	      goto got_it;                             \
1077     }                                                  \
1078 }                                                      \
1079 s += len
1080 
1081 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1082 STMT_START {                                              \
1083     while (s <= e) {                                      \
1084 	if ( (CoNd)                                       \
1085 	     && (ln == 1 || !(OP(c) == EXACTF             \
1086 			      ? ibcmp(s, m, ln)           \
1087 			      : ibcmp_locale(s, m, ln)))  \
1088 	     && (!reginfo || regtry(reginfo, &s)) )        \
1089 	    goto got_it;                                  \
1090 	s++;                                              \
1091     }                                                     \
1092 } STMT_END
1093 
1094 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1095 STMT_START {                                          \
1096     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1097 	CoDe                                          \
1098 	s += uskip;                                   \
1099     }                                                 \
1100 } STMT_END
1101 
1102 #define REXEC_FBC_SCAN(CoDe)                          \
1103 STMT_START {                                          \
1104     while (s < strend) {                              \
1105 	CoDe                                          \
1106 	s++;                                          \
1107     }                                                 \
1108 } STMT_END
1109 
1110 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1111 REXEC_FBC_UTF8_SCAN(                                  \
1112     if (CoNd) {                                       \
1113 	if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1114 	    goto got_it;                              \
1115 	else                                          \
1116 	    tmp = doevery;                            \
1117     }                                                 \
1118     else                                              \
1119 	tmp = 1;                                      \
1120 )
1121 
1122 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1123 REXEC_FBC_SCAN(                                       \
1124     if (CoNd) {                                       \
1125 	if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1126 	    goto got_it;                              \
1127 	else                                          \
1128 	    tmp = doevery;                            \
1129     }                                                 \
1130     else                                              \
1131 	tmp = 1;                                      \
1132 )
1133 
1134 #define REXEC_FBC_TRYIT               \
1135 if ((!reginfo || regtry(reginfo, &s))) \
1136     goto got_it
1137 
1138 #define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1139     if (do_utf8) {                                             \
1140 	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1141     }                                                          \
1142     else {                                                     \
1143 	REXEC_FBC_CLASS_SCAN(CoNd);                            \
1144     }                                                          \
1145     break
1146 
1147 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1148     if (do_utf8) {                                             \
1149 	UtFpReLoAd;                                            \
1150 	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1151     }                                                          \
1152     else {                                                     \
1153 	REXEC_FBC_CLASS_SCAN(CoNd);                            \
1154     }                                                          \
1155     break
1156 
1157 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1158     PL_reg_flags |= RF_tainted;                                \
1159     if (do_utf8) {                                             \
1160 	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1161     }                                                          \
1162     else {                                                     \
1163 	REXEC_FBC_CLASS_SCAN(CoNd);                            \
1164     }                                                          \
1165     break
1166 
1167 #define DUMP_EXEC_POS(li,s,doutf8) \
1168     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1169 
1170 /* We know what class REx starts with.  Try to find this position... */
1171 /* if reginfo is NULL, its a dryrun */
1172 /* annoyingly all the vars in this routine have different names from their counterparts
1173    in regmatch. /grrr */
1174 
1175 STATIC char *
1176 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1177     const char *strend, regmatch_info *reginfo)
1178 {
1179 	dVAR;
1180 	const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1181 	char *m;
1182 	STRLEN ln;
1183 	STRLEN lnc;
1184 	register STRLEN uskip;
1185 	unsigned int c1;
1186 	unsigned int c2;
1187 	char *e;
1188 	register I32 tmp = 1;	/* Scratch variable? */
1189 	register const bool do_utf8 = PL_reg_match_utf8;
1190         RXi_GET_DECL(prog,progi);
1191 
1192 	PERL_ARGS_ASSERT_FIND_BYCLASS;
1193 
1194 	/* We know what class it must start with. */
1195 	switch (OP(c)) {
1196 	case ANYOF:
1197 	    if (do_utf8) {
1198 		 REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1199 			  !UTF8_IS_INVARIANT((U8)s[0]) ?
1200 			  reginclass(prog, c, (U8*)s, 0, do_utf8) :
1201 			  REGINCLASS(prog, c, (U8*)s));
1202 	    }
1203 	    else {
1204 		 while (s < strend) {
1205 		      STRLEN skip = 1;
1206 
1207 		      if (REGINCLASS(prog, c, (U8*)s) ||
1208 			  (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1209 			   /* The assignment of 2 is intentional:
1210 			    * for the folded sharp s, the skip is 2. */
1211 			   (skip = SHARP_S_SKIP))) {
1212 			   if (tmp && (!reginfo || regtry(reginfo, &s)))
1213 				goto got_it;
1214 			   else
1215 				tmp = doevery;
1216 		      }
1217 		      else
1218 			   tmp = 1;
1219 		      s += skip;
1220 		 }
1221 	    }
1222 	    break;
1223 	case CANY:
1224 	    REXEC_FBC_SCAN(
1225 	        if (tmp && (!reginfo || regtry(reginfo, &s)))
1226 		    goto got_it;
1227 		else
1228 		    tmp = doevery;
1229 	    );
1230 	    break;
1231 	case EXACTF:
1232 	    m   = STRING(c);
1233 	    ln  = STR_LEN(c);	/* length to match in octets/bytes */
1234 	    lnc = (I32) ln;	/* length to match in characters */
1235 	    if (UTF) {
1236 	        STRLEN ulen1, ulen2;
1237 		U8 *sm = (U8 *) m;
1238 		U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1239 		U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1240 		/* used by commented-out code below */
1241 		/*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
1242 
1243                 /* XXX: Since the node will be case folded at compile
1244                    time this logic is a little odd, although im not
1245                    sure that its actually wrong. --dmq */
1246 
1247 		c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1248 		c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1249 
1250 		/* XXX: This is kinda strange. to_utf8_XYZ returns the
1251                    codepoint of the first character in the converted
1252                    form, yet originally we did the extra step.
1253                    No tests fail by commenting this code out however
1254                    so Ive left it out. -- dmq.
1255 
1256 		c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1257 				    0, uniflags);
1258 		c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1259 				    0, uniflags);
1260                 */
1261 
1262 		lnc = 0;
1263 		while (sm < ((U8 *) m + ln)) {
1264 		    lnc++;
1265 		    sm += UTF8SKIP(sm);
1266 		}
1267 	    }
1268 	    else {
1269 		c1 = *(U8*)m;
1270 		c2 = PL_fold[c1];
1271 	    }
1272 	    goto do_exactf;
1273 	case EXACTFL:
1274 	    m   = STRING(c);
1275 	    ln  = STR_LEN(c);
1276 	    lnc = (I32) ln;
1277 	    c1 = *(U8*)m;
1278 	    c2 = PL_fold_locale[c1];
1279 	  do_exactf:
1280 	    e = HOP3c(strend, -((I32)lnc), s);
1281 
1282 	    if (!reginfo && e < s)
1283 		e = s;			/* Due to minlen logic of intuit() */
1284 
1285 	    /* The idea in the EXACTF* cases is to first find the
1286 	     * first character of the EXACTF* node and then, if
1287 	     * necessary, case-insensitively compare the full
1288 	     * text of the node.  The c1 and c2 are the first
1289 	     * characters (though in Unicode it gets a bit
1290 	     * more complicated because there are more cases
1291 	     * than just upper and lower: one needs to use
1292 	     * the so-called folding case for case-insensitive
1293 	     * matching (called "loose matching" in Unicode).
1294 	     * ibcmp_utf8() will do just that. */
1295 
1296 	    if (do_utf8 || UTF) {
1297 	        UV c, f;
1298 	        U8 tmpbuf [UTF8_MAXBYTES+1];
1299 		STRLEN len = 1;
1300 		STRLEN foldlen;
1301 		const U32 uniflags = UTF8_ALLOW_DEFAULT;
1302 		if (c1 == c2) {
1303 		    /* Upper and lower of 1st char are equal -
1304 		     * probably not a "letter". */
1305 		    while (s <= e) {
1306 		        if (do_utf8) {
1307 		            c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1308 					   uniflags);
1309                         } else {
1310                             c = *((U8*)s);
1311                         }
1312 			REXEC_FBC_EXACTISH_CHECK(c == c1);
1313 		    }
1314 		}
1315 		else {
1316 		    while (s <= e) {
1317 		        if (do_utf8) {
1318 		            c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1319 					   uniflags);
1320                         } else {
1321                             c = *((U8*)s);
1322                         }
1323 
1324 			/* Handle some of the three Greek sigmas cases.
1325 			 * Note that not all the possible combinations
1326 			 * are handled here: some of them are handled
1327 			 * by the standard folding rules, and some of
1328 			 * them (the character class or ANYOF cases)
1329 			 * are handled during compiletime in
1330 			 * regexec.c:S_regclass(). */
1331 			if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1332 			    c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1333 			    c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1334 
1335 			REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1336 		    }
1337 		}
1338 	    }
1339 	    else {
1340 	        /* Neither pattern nor string are UTF8 */
1341 		if (c1 == c2)
1342 		    REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1343 		else
1344 		    REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1345 	    }
1346 	    break;
1347 	case BOUNDL:
1348 	    PL_reg_flags |= RF_tainted;
1349 	    /* FALL THROUGH */
1350 	case BOUND:
1351 	    if (do_utf8) {
1352 		if (s == PL_bostr)
1353 		    tmp = '\n';
1354 		else {
1355 		    U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1356 		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1357 		}
1358 		tmp = ((OP(c) == BOUND ?
1359 			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1360 		LOAD_UTF8_CHARCLASS_ALNUM();
1361 		REXEC_FBC_UTF8_SCAN(
1362 		    if (tmp == !(OP(c) == BOUND ?
1363 				 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1364 				 isALNUM_LC_utf8((U8*)s)))
1365 		    {
1366 			tmp = !tmp;
1367 			REXEC_FBC_TRYIT;
1368 		}
1369 		);
1370 	    }
1371 	    else {
1372 		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1373 		tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1374 		REXEC_FBC_SCAN(
1375 		    if (tmp ==
1376 			!(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1377 			tmp = !tmp;
1378 			REXEC_FBC_TRYIT;
1379 		}
1380 		);
1381 	    }
1382 	    if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1383 		goto got_it;
1384 	    break;
1385 	case NBOUNDL:
1386 	    PL_reg_flags |= RF_tainted;
1387 	    /* FALL THROUGH */
1388 	case NBOUND:
1389 	    if (do_utf8) {
1390 		if (s == PL_bostr)
1391 		    tmp = '\n';
1392 		else {
1393 		    U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1394 		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1395 		}
1396 		tmp = ((OP(c) == NBOUND ?
1397 			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1398 		LOAD_UTF8_CHARCLASS_ALNUM();
1399 		REXEC_FBC_UTF8_SCAN(
1400 		    if (tmp == !(OP(c) == NBOUND ?
1401 				 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1402 				 isALNUM_LC_utf8((U8*)s)))
1403 			tmp = !tmp;
1404 		    else REXEC_FBC_TRYIT;
1405 		);
1406 	    }
1407 	    else {
1408 		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1409 		tmp = ((OP(c) == NBOUND ?
1410 			isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1411 		REXEC_FBC_SCAN(
1412 		    if (tmp ==
1413 			!(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1414 			tmp = !tmp;
1415 		    else REXEC_FBC_TRYIT;
1416 		);
1417 	    }
1418 	    if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1419 		goto got_it;
1420 	    break;
1421 	case ALNUM:
1422 	    REXEC_FBC_CSCAN_PRELOAD(
1423 		LOAD_UTF8_CHARCLASS_ALNUM(),
1424 		swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1425 		isALNUM(*s)
1426 	    );
1427 	case ALNUML:
1428 	    REXEC_FBC_CSCAN_TAINT(
1429 		isALNUM_LC_utf8((U8*)s),
1430 		isALNUM_LC(*s)
1431 	    );
1432 	case NALNUM:
1433 	    REXEC_FBC_CSCAN_PRELOAD(
1434 		LOAD_UTF8_CHARCLASS_ALNUM(),
1435 		!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1436 		!isALNUM(*s)
1437 	    );
1438 	case NALNUML:
1439 	    REXEC_FBC_CSCAN_TAINT(
1440 		!isALNUM_LC_utf8((U8*)s),
1441 		!isALNUM_LC(*s)
1442 	    );
1443 	case SPACE:
1444 	    REXEC_FBC_CSCAN_PRELOAD(
1445 		LOAD_UTF8_CHARCLASS_SPACE(),
1446 		*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1447 		isSPACE(*s)
1448 	    );
1449 	case SPACEL:
1450 	    REXEC_FBC_CSCAN_TAINT(
1451 		*s == ' ' || isSPACE_LC_utf8((U8*)s),
1452 		isSPACE_LC(*s)
1453 	    );
1454 	case NSPACE:
1455 	    REXEC_FBC_CSCAN_PRELOAD(
1456 		LOAD_UTF8_CHARCLASS_SPACE(),
1457 		!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1458 		!isSPACE(*s)
1459 	    );
1460 	case NSPACEL:
1461 	    REXEC_FBC_CSCAN_TAINT(
1462 		!(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1463 		!isSPACE_LC(*s)
1464 	    );
1465 	case DIGIT:
1466 	    REXEC_FBC_CSCAN_PRELOAD(
1467 		LOAD_UTF8_CHARCLASS_DIGIT(),
1468 		swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1469 		isDIGIT(*s)
1470 	    );
1471 	case DIGITL:
1472 	    REXEC_FBC_CSCAN_TAINT(
1473 		isDIGIT_LC_utf8((U8*)s),
1474 		isDIGIT_LC(*s)
1475 	    );
1476 	case NDIGIT:
1477 	    REXEC_FBC_CSCAN_PRELOAD(
1478 		LOAD_UTF8_CHARCLASS_DIGIT(),
1479 		!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1480 		!isDIGIT(*s)
1481 	    );
1482 	case NDIGITL:
1483 	    REXEC_FBC_CSCAN_TAINT(
1484 		!isDIGIT_LC_utf8((U8*)s),
1485 		!isDIGIT_LC(*s)
1486 	    );
1487 	case LNBREAK:
1488 	    REXEC_FBC_CSCAN(
1489 		is_LNBREAK_utf8(s),
1490 		is_LNBREAK_latin1(s)
1491 	    );
1492 	case VERTWS:
1493 	    REXEC_FBC_CSCAN(
1494 		is_VERTWS_utf8(s),
1495 		is_VERTWS_latin1(s)
1496 	    );
1497 	case NVERTWS:
1498 	    REXEC_FBC_CSCAN(
1499 		!is_VERTWS_utf8(s),
1500 		!is_VERTWS_latin1(s)
1501 	    );
1502 	case HORIZWS:
1503 	    REXEC_FBC_CSCAN(
1504 		is_HORIZWS_utf8(s),
1505 		is_HORIZWS_latin1(s)
1506 	    );
1507 	case NHORIZWS:
1508 	    REXEC_FBC_CSCAN(
1509 		!is_HORIZWS_utf8(s),
1510 		!is_HORIZWS_latin1(s)
1511 	    );
1512 	case AHOCORASICKC:
1513 	case AHOCORASICK:
1514 	    {
1515 	        DECL_TRIE_TYPE(c);
1516                 /* what trie are we using right now */
1517         	reg_ac_data *aho
1518         	    = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1519         	reg_trie_data *trie
1520 		    = (reg_trie_data*)progi->data->data[ aho->trie ];
1521 		HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
1522 
1523 		const char *last_start = strend - trie->minlen;
1524 #ifdef DEBUGGING
1525 		const char *real_start = s;
1526 #endif
1527 		STRLEN maxlen = trie->maxlen;
1528 		SV *sv_points;
1529 		U8 **points; /* map of where we were in the input string
1530 		                when reading a given char. For ASCII this
1531 		                is unnecessary overhead as the relationship
1532 		                is always 1:1, but for Unicode, especially
1533 		                case folded Unicode this is not true. */
1534 		U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1535 		U8 *bitmap=NULL;
1536 
1537 
1538                 GET_RE_DEBUG_FLAGS_DECL;
1539 
1540                 /* We can't just allocate points here. We need to wrap it in
1541                  * an SV so it gets freed properly if there is a croak while
1542                  * running the match */
1543                 ENTER;
1544 	        SAVETMPS;
1545                 sv_points=newSV(maxlen * sizeof(U8 *));
1546                 SvCUR_set(sv_points,
1547                     maxlen * sizeof(U8 *));
1548                 SvPOK_on(sv_points);
1549                 sv_2mortal(sv_points);
1550                 points=(U8**)SvPV_nolen(sv_points );
1551                 if ( trie_type != trie_utf8_fold
1552                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1553                 {
1554                     if (trie->bitmap)
1555                         bitmap=(U8*)trie->bitmap;
1556                     else
1557                         bitmap=(U8*)ANYOF_BITMAP(c);
1558                 }
1559                 /* this is the Aho-Corasick algorithm modified a touch
1560                    to include special handling for long "unknown char"
1561                    sequences. The basic idea being that we use AC as long
1562                    as we are dealing with a possible matching char, when
1563                    we encounter an unknown char (and we have not encountered
1564                    an accepting state) we scan forward until we find a legal
1565                    starting char.
1566                    AC matching is basically that of trie matching, except
1567                    that when we encounter a failing transition, we fall back
1568                    to the current states "fail state", and try the current char
1569                    again, a process we repeat until we reach the root state,
1570                    state 1, or a legal transition. If we fail on the root state
1571                    then we can either terminate if we have reached an accepting
1572                    state previously, or restart the entire process from the beginning
1573                    if we have not.
1574 
1575                  */
1576                 while (s <= last_start) {
1577                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1578                     U8 *uc = (U8*)s;
1579                     U16 charid = 0;
1580                     U32 base = 1;
1581                     U32 state = 1;
1582                     UV uvc = 0;
1583                     STRLEN len = 0;
1584                     STRLEN foldlen = 0;
1585                     U8 *uscan = (U8*)NULL;
1586                     U8 *leftmost = NULL;
1587 #ifdef DEBUGGING
1588                     U32 accepted_word= 0;
1589 #endif
1590                     U32 pointpos = 0;
1591 
1592                     while ( state && uc <= (U8*)strend ) {
1593                         int failed=0;
1594                         U32 word = aho->states[ state ].wordnum;
1595 
1596                         if( state==1 ) {
1597                             if ( bitmap ) {
1598                                 DEBUG_TRIE_EXECUTE_r(
1599                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1600                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1601                                             (char *)uc, do_utf8 );
1602                                         PerlIO_printf( Perl_debug_log,
1603                                             " Scanning for legal start char...\n");
1604                                     }
1605                                 );
1606                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1607                                     uc++;
1608                                 }
1609                                 s= (char *)uc;
1610                             }
1611                             if (uc >(U8*)last_start) break;
1612                         }
1613 
1614                         if ( word ) {
1615                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1616                             if (!leftmost || lpos < leftmost) {
1617                                 DEBUG_r(accepted_word=word);
1618                                 leftmost= lpos;
1619                             }
1620                             if (base==0) break;
1621 
1622                         }
1623                         points[pointpos++ % maxlen]= uc;
1624 			REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1625 					     uscan, len, uvc, charid, foldlen,
1626 					     foldbuf, uniflags);
1627                         DEBUG_TRIE_EXECUTE_r({
1628                             dump_exec_pos( (char *)uc, c, strend, real_start,
1629                                 s,   do_utf8 );
1630                             PerlIO_printf(Perl_debug_log,
1631                                 " Charid:%3u CP:%4"UVxf" ",
1632                                  charid, uvc);
1633                         });
1634 
1635                         do {
1636 #ifdef DEBUGGING
1637                             word = aho->states[ state ].wordnum;
1638 #endif
1639                             base = aho->states[ state ].trans.base;
1640 
1641                             DEBUG_TRIE_EXECUTE_r({
1642                                 if (failed)
1643                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1644                                         s,   do_utf8 );
1645                                 PerlIO_printf( Perl_debug_log,
1646                                     "%sState: %4"UVxf", word=%"UVxf,
1647                                     failed ? " Fail transition to " : "",
1648                                     (UV)state, (UV)word);
1649                             });
1650                             if ( base ) {
1651                                 U32 tmp;
1652                                 if (charid &&
1653                                      (base + charid > trie->uniquecharcount )
1654                                      && (base + charid - 1 - trie->uniquecharcount
1655                                             < trie->lasttrans)
1656                                      && trie->trans[base + charid - 1 -
1657                                             trie->uniquecharcount].check == state
1658                                      && (tmp=trie->trans[base + charid - 1 -
1659                                         trie->uniquecharcount ].next))
1660                                 {
1661                                     DEBUG_TRIE_EXECUTE_r(
1662                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1663                                     state = tmp;
1664                                     break;
1665                                 }
1666                                 else {
1667                                     DEBUG_TRIE_EXECUTE_r(
1668                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1669                                     failed = 1;
1670                                     state = aho->fail[state];
1671                                 }
1672                             }
1673                             else {
1674                                 /* we must be accepting here */
1675                                 DEBUG_TRIE_EXECUTE_r(
1676                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1677                                 failed = 1;
1678                                 break;
1679                             }
1680                         } while(state);
1681                         uc += len;
1682                         if (failed) {
1683                             if (leftmost)
1684                                 break;
1685                             if (!state) state = 1;
1686                         }
1687                     }
1688                     if ( aho->states[ state ].wordnum ) {
1689                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1690                         if (!leftmost || lpos < leftmost) {
1691                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1692                             leftmost = lpos;
1693                         }
1694                     }
1695                     if (leftmost) {
1696                         s = (char*)leftmost;
1697                         DEBUG_TRIE_EXECUTE_r({
1698                             PerlIO_printf(
1699                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1700                                 (UV)accepted_word, (IV)(s - real_start)
1701                             );
1702                         });
1703                         if (!reginfo || regtry(reginfo, &s)) {
1704                             FREETMPS;
1705 		            LEAVE;
1706                             goto got_it;
1707                         }
1708                         s = HOPc(s,1);
1709                         DEBUG_TRIE_EXECUTE_r({
1710                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1711                         });
1712                     } else {
1713                         DEBUG_TRIE_EXECUTE_r(
1714                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1715                         break;
1716                     }
1717                 }
1718                 FREETMPS;
1719                 LEAVE;
1720 	    }
1721 	    break;
1722 	default:
1723 	    Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1724 	    break;
1725 	}
1726 	return 0;
1727       got_it:
1728 	return s;
1729 }
1730 
1731 static void
1732 S_swap_match_buff (pTHX_ regexp *prog)
1733 {
1734     regexp_paren_pair *t;
1735 
1736     PERL_ARGS_ASSERT_SWAP_MATCH_BUFF;
1737 
1738     if (!prog->swap) {
1739     /* We have to be careful. If the previous successful match
1740        was from this regex we don't want a subsequent paritally
1741        successful match to clobber the old results.
1742        So when we detect this possibility we add a swap buffer
1743        to the re, and switch the buffer each match. If we fail
1744        we switch it back, otherwise we leave it swapped.
1745     */
1746         Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair);
1747     }
1748     t = prog->swap;
1749     prog->swap = prog->offs;
1750     prog->offs = t;
1751 }
1752 
1753 
1754 /*
1755  - regexec_flags - match a regexp against a string
1756  */
1757 I32
1758 Perl_regexec_flags(pTHX_ REGEXP * const prog, char *stringarg, register char *strend,
1759 	      char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1760 /* strend: pointer to null at end of string */
1761 /* strbeg: real beginning of string */
1762 /* minend: end of match must be >=minend after stringarg. */
1763 /* data: May be used for some additional optimizations.
1764          Currently its only used, with a U32 cast, for transmitting
1765          the ganch offset when doing a /g match. This will change */
1766 /* nosave: For optimizations. */
1767 {
1768     dVAR;
1769     /*register*/ char *s;
1770     register regnode *c;
1771     /*register*/ char *startpos = stringarg;
1772     I32 minlen;		/* must match at least this many chars */
1773     I32 dontbother = 0;	/* how many characters not to try at end */
1774     I32 end_shift = 0;			/* Same for the end. */		/* CC */
1775     I32 scream_pos = -1;		/* Internal iterator of scream. */
1776     char *scream_olds = NULL;
1777     const bool do_utf8 = (bool)DO_UTF8(sv);
1778     I32 multiline;
1779     RXi_GET_DECL(prog,progi);
1780     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1781     bool swap_on_fail = 0;
1782     GET_RE_DEBUG_FLAGS_DECL;
1783 
1784     PERL_ARGS_ASSERT_REGEXEC_FLAGS;
1785     PERL_UNUSED_ARG(data);
1786 
1787     /* Be paranoid... */
1788     if (prog == NULL || startpos == NULL) {
1789 	Perl_croak(aTHX_ "NULL regexp parameter");
1790 	return 0;
1791     }
1792 
1793     multiline = prog->extflags & RXf_PMf_MULTILINE;
1794     reginfo.prog = prog;
1795 
1796     RX_MATCH_UTF8_set(prog, do_utf8);
1797     DEBUG_EXECUTE_r(
1798         debug_start_match(prog, do_utf8, startpos, strend,
1799         "Matching");
1800     );
1801 
1802     minlen = prog->minlen;
1803 
1804     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1805         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1806 			      "String too short [regexec_flags]...\n"));
1807 	goto phooey;
1808     }
1809 
1810 
1811     /* Check validity of program. */
1812     if (UCHARAT(progi->program) != REG_MAGIC) {
1813 	Perl_croak(aTHX_ "corrupted regexp program");
1814     }
1815 
1816     PL_reg_flags = 0;
1817     PL_reg_eval_set = 0;
1818     PL_reg_maxiter = 0;
1819 
1820     if (RX_UTF8(prog))
1821 	PL_reg_flags |= RF_utf8;
1822 
1823     /* Mark beginning of line for ^ and lookbehind. */
1824     reginfo.bol = startpos; /* XXX not used ??? */
1825     PL_bostr  = strbeg;
1826     reginfo.sv = sv;
1827 
1828     /* Mark end of line for $ (and such) */
1829     PL_regeol = strend;
1830 
1831     /* see how far we have to get to not match where we matched before */
1832     reginfo.till = startpos+minend;
1833 
1834     /* If there is a "must appear" string, look for it. */
1835     s = startpos;
1836 
1837     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1838 	MAGIC *mg;
1839 
1840 	if (flags & REXEC_IGNOREPOS)	/* Means: check only at start */
1841 	    reginfo.ganch = startpos + prog->gofs;
1842 	else if (sv && SvTYPE(sv) >= SVt_PVMG
1843 		  && SvMAGIC(sv)
1844 		  && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1845 		  && mg->mg_len >= 0) {
1846 	    reginfo.ganch = strbeg + mg->mg_len;	/* Defined pos() */
1847 	    if (prog->extflags & RXf_ANCH_GPOS) {
1848 	        if (s > reginfo.ganch)
1849 		    goto phooey;
1850 		s = reginfo.ganch - prog->gofs;
1851 	    }
1852 	}
1853 	else if (data) {
1854 	    reginfo.ganch = strbeg + PTR2UV(data);
1855 	} else				/* pos() not defined */
1856 	    reginfo.ganch = strbeg;
1857     }
1858     if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) {
1859         swap_on_fail = 1;
1860         swap_match_buff(prog); /* do we need a save destructor here for
1861                                   eval dies? */
1862     }
1863     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1864 	re_scream_pos_data d;
1865 
1866 	d.scream_olds = &scream_olds;
1867 	d.scream_pos = &scream_pos;
1868 	s = re_intuit_start(prog, sv, s, strend, flags, &d);
1869 	if (!s) {
1870 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1871 	    goto phooey;	/* not present */
1872 	}
1873     }
1874 
1875 
1876 
1877     /* Simplest case:  anchored match need be tried only once. */
1878     /*  [unless only anchor is BOL and multiline is set] */
1879     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1880 	if (s == startpos && regtry(&reginfo, &startpos))
1881 	    goto got_it;
1882 	else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1883 		 || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1884 	{
1885 	    char *end;
1886 
1887 	    if (minlen)
1888 		dontbother = minlen - 1;
1889 	    end = HOP3c(strend, -dontbother, strbeg) - 1;
1890 	    /* for multiline we only have to try after newlines */
1891 	    if (prog->check_substr || prog->check_utf8) {
1892 		if (s == startpos)
1893 		    goto after_try;
1894 		while (1) {
1895 		    if (regtry(&reginfo, &s))
1896 			goto got_it;
1897 		  after_try:
1898 		    if (s > end)
1899 			goto phooey;
1900 		    if (prog->extflags & RXf_USE_INTUIT) {
1901 			s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1902 			if (!s)
1903 			    goto phooey;
1904 		    }
1905 		    else
1906 			s++;
1907 		}
1908 	    } else {
1909 		if (s > startpos)
1910 		    s--;
1911 		while (s < end) {
1912 		    if (*s++ == '\n') {	/* don't need PL_utf8skip here */
1913 			if (regtry(&reginfo, &s))
1914 			    goto got_it;
1915 		    }
1916 		}
1917 	    }
1918 	}
1919 	goto phooey;
1920     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
1921     {
1922         /* the warning about reginfo.ganch being used without intialization
1923            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
1924            and we only enter this block when the same bit is set. */
1925         char *tmp_s = reginfo.ganch - prog->gofs;
1926 	if (regtry(&reginfo, &tmp_s))
1927 	    goto got_it;
1928 	goto phooey;
1929     }
1930 
1931     /* Messy cases:  unanchored match. */
1932     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
1933 	/* we have /x+whatever/ */
1934 	/* it must be a one character string (XXXX Except UTF?) */
1935 	char ch;
1936 #ifdef DEBUGGING
1937 	int did_match = 0;
1938 #endif
1939 	if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1940 	    do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1941 	ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1942 
1943 	if (do_utf8) {
1944 	    REXEC_FBC_SCAN(
1945 		if (*s == ch) {
1946 		    DEBUG_EXECUTE_r( did_match = 1 );
1947 		    if (regtry(&reginfo, &s)) goto got_it;
1948 		    s += UTF8SKIP(s);
1949 		    while (s < strend && *s == ch)
1950 			s += UTF8SKIP(s);
1951 		}
1952 	    );
1953 	}
1954 	else {
1955 	    REXEC_FBC_SCAN(
1956 		if (*s == ch) {
1957 		    DEBUG_EXECUTE_r( did_match = 1 );
1958 		    if (regtry(&reginfo, &s)) goto got_it;
1959 		    s++;
1960 		    while (s < strend && *s == ch)
1961 			s++;
1962 		}
1963 	    );
1964 	}
1965 	DEBUG_EXECUTE_r(if (!did_match)
1966 		PerlIO_printf(Perl_debug_log,
1967                                   "Did not find anchored character...\n")
1968                );
1969     }
1970     else if (prog->anchored_substr != NULL
1971 	      || prog->anchored_utf8 != NULL
1972 	      || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1973 		  && prog->float_max_offset < strend - s)) {
1974 	SV *must;
1975 	I32 back_max;
1976 	I32 back_min;
1977 	char *last;
1978 	char *last1;		/* Last position checked before */
1979 #ifdef DEBUGGING
1980 	int did_match = 0;
1981 #endif
1982 	if (prog->anchored_substr || prog->anchored_utf8) {
1983 	    if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1984 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1985 	    must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1986 	    back_max = back_min = prog->anchored_offset;
1987 	} else {
1988 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1989 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1990 	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1991 	    back_max = prog->float_max_offset;
1992 	    back_min = prog->float_min_offset;
1993 	}
1994 
1995 
1996 	if (must == &PL_sv_undef)
1997 	    /* could not downgrade utf8 check substring, so must fail */
1998 	    goto phooey;
1999 
2000         if (back_min<0) {
2001 	    last = strend;
2002 	} else {
2003             last = HOP3c(strend,	/* Cannot start after this */
2004         	  -(I32)(CHR_SVLEN(must)
2005         		 - (SvTAIL(must) != 0) + back_min), strbeg);
2006         }
2007 	if (s > PL_bostr)
2008 	    last1 = HOPc(s, -1);
2009 	else
2010 	    last1 = s - 1;	/* bogus */
2011 
2012 	/* XXXX check_substr already used to find "s", can optimize if
2013 	   check_substr==must. */
2014 	scream_pos = -1;
2015 	dontbother = end_shift;
2016 	strend = HOPc(strend, -dontbother);
2017 	while ( (s <= last) &&
2018 		((flags & REXEC_SCREAM)
2019 		 ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2020 				    end_shift, &scream_pos, 0))
2021 		 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2022 				  (unsigned char*)strend, must,
2023 				  multiline ? FBMrf_MULTILINE : 0))) ) {
2024 	    /* we may be pointing at the wrong string */
2025 	    if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2026 		s = strbeg + (s - SvPVX_const(sv));
2027 	    DEBUG_EXECUTE_r( did_match = 1 );
2028 	    if (HOPc(s, -back_max) > last1) {
2029 		last1 = HOPc(s, -back_min);
2030 		s = HOPc(s, -back_max);
2031 	    }
2032 	    else {
2033 		char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2034 
2035 		last1 = HOPc(s, -back_min);
2036 		s = t;
2037 	    }
2038 	    if (do_utf8) {
2039 		while (s <= last1) {
2040 		    if (regtry(&reginfo, &s))
2041 			goto got_it;
2042 		    s += UTF8SKIP(s);
2043 		}
2044 	    }
2045 	    else {
2046 		while (s <= last1) {
2047 		    if (regtry(&reginfo, &s))
2048 			goto got_it;
2049 		    s++;
2050 		}
2051 	    }
2052 	}
2053 	DEBUG_EXECUTE_r(if (!did_match) {
2054             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2055                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2056             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2057 			      ((must == prog->anchored_substr || must == prog->anchored_utf8)
2058 			       ? "anchored" : "floating"),
2059                 quoted, RE_SV_TAIL(must));
2060         });
2061 	goto phooey;
2062     }
2063     else if ( (c = progi->regstclass) ) {
2064 	if (minlen) {
2065 	    const OPCODE op = OP(progi->regstclass);
2066 	    /* don't bother with what can't match */
2067 	    if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2068 	        strend = HOPc(strend, -(minlen - 1));
2069 	}
2070 	DEBUG_EXECUTE_r({
2071 	    SV * const prop = sv_newmortal();
2072 	    regprop(prog, prop, c);
2073 	    {
2074 		RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2075 		    s,strend-s,60);
2076 		PerlIO_printf(Perl_debug_log,
2077 		    "Matching stclass %.*s against %s (%d chars)\n",
2078 		    (int)SvCUR(prop), SvPVX_const(prop),
2079 		     quoted, (int)(strend - s));
2080 	    }
2081 	});
2082         if (find_byclass(prog, c, s, strend, &reginfo))
2083 	    goto got_it;
2084 	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2085     }
2086     else {
2087 	dontbother = 0;
2088 	if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2089 	    /* Trim the end. */
2090 	    char *last;
2091 	    SV* float_real;
2092 
2093 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2094 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2095 	    float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2096 
2097 	    if (flags & REXEC_SCREAM) {
2098 		last = screaminstr(sv, float_real, s - strbeg,
2099 				   end_shift, &scream_pos, 1); /* last one */
2100 		if (!last)
2101 		    last = scream_olds; /* Only one occurrence. */
2102 		/* we may be pointing at the wrong string */
2103 		else if (RXp_MATCH_COPIED(prog))
2104 		    s = strbeg + (s - SvPVX_const(sv));
2105 	    }
2106 	    else {
2107 		STRLEN len;
2108                 const char * const little = SvPV_const(float_real, len);
2109 
2110 		if (SvTAIL(float_real)) {
2111 		    if (memEQ(strend - len + 1, little, len - 1))
2112 			last = strend - len + 1;
2113 		    else if (!multiline)
2114 			last = memEQ(strend - len, little, len)
2115 			    ? strend - len : NULL;
2116 		    else
2117 			goto find_last;
2118 		} else {
2119 		  find_last:
2120 		    if (len)
2121 			last = rninstr(s, strend, little, little + len);
2122 		    else
2123 			last = strend;	/* matching "$" */
2124 		}
2125 	    }
2126 	    if (last == NULL) {
2127 		DEBUG_EXECUTE_r(
2128 		    PerlIO_printf(Perl_debug_log,
2129 			"%sCan't trim the tail, match fails (should not happen)%s\n",
2130 	                PL_colors[4], PL_colors[5]));
2131 		goto phooey; /* Should not happen! */
2132 	    }
2133 	    dontbother = strend - last + prog->float_min_offset;
2134 	}
2135 	if (minlen && (dontbother < minlen))
2136 	    dontbother = minlen - 1;
2137 	strend -= dontbother; 		   /* this one's always in bytes! */
2138 	/* We don't know much -- general case. */
2139 	if (do_utf8) {
2140 	    for (;;) {
2141 		if (regtry(&reginfo, &s))
2142 		    goto got_it;
2143 		if (s >= strend)
2144 		    break;
2145 		s += UTF8SKIP(s);
2146 	    };
2147 	}
2148 	else {
2149 	    do {
2150 		if (regtry(&reginfo, &s))
2151 		    goto got_it;
2152 	    } while (s++ < strend);
2153 	}
2154     }
2155 
2156     /* Failure. */
2157     goto phooey;
2158 
2159 got_it:
2160     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2161 
2162     if (PL_reg_eval_set)
2163 	restore_pos(aTHX_ prog);
2164     if (RXp_PAREN_NAMES(prog))
2165         (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2166 
2167     /* make sure $`, $&, $', and $digit will work later */
2168     if ( !(flags & REXEC_NOT_FIRST) ) {
2169 	RX_MATCH_COPY_FREE(prog);
2170 	if (flags & REXEC_COPY_STR) {
2171 	    const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2172 #ifdef PERL_OLD_COPY_ON_WRITE
2173 	    if ((SvIsCOW(sv)
2174 		 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2175 		if (DEBUG_C_TEST) {
2176 		    PerlIO_printf(Perl_debug_log,
2177 				  "Copy on write: regexp capture, type %d\n",
2178 				  (int) SvTYPE(sv));
2179 		}
2180 		prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2181 		prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2182 		assert (SvPOKp(prog->saved_copy));
2183 	    } else
2184 #endif
2185 	    {
2186 		RX_MATCH_COPIED_on(prog);
2187 		s = savepvn(strbeg, i);
2188 		prog->subbeg = s;
2189 	    }
2190 	    prog->sublen = i;
2191 	}
2192 	else {
2193 	    prog->subbeg = strbeg;
2194 	    prog->sublen = PL_regeol - strbeg;	/* strend may have been modified */
2195 	}
2196     }
2197 
2198     return 1;
2199 
2200 phooey:
2201     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2202 			  PL_colors[4], PL_colors[5]));
2203     if (PL_reg_eval_set)
2204 	restore_pos(aTHX_ prog);
2205     if (swap_on_fail)
2206         /* we failed :-( roll it back */
2207         swap_match_buff(prog);
2208 
2209     return 0;
2210 }
2211 
2212 
2213 /*
2214  - regtry - try match at specific point
2215  */
2216 STATIC I32			/* 0 failure, 1 success */
2217 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2218 {
2219     dVAR;
2220     CHECKPOINT lastcp;
2221     regexp *prog = reginfo->prog;
2222     RXi_GET_DECL(prog,progi);
2223     GET_RE_DEBUG_FLAGS_DECL;
2224 
2225     PERL_ARGS_ASSERT_REGTRY;
2226 
2227     reginfo->cutpoint=NULL;
2228 
2229     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2230 	MAGIC *mg;
2231 
2232 	PL_reg_eval_set = RS_init;
2233 	DEBUG_EXECUTE_r(DEBUG_s(
2234 	    PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2235 			  (IV)(PL_stack_sp - PL_stack_base));
2236 	    ));
2237 	SAVESTACK_CXPOS();
2238 	cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2239 	/* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2240 	SAVETMPS;
2241 	/* Apparently this is not needed, judging by wantarray. */
2242 	/* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2243 	   cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2244 
2245 	if (reginfo->sv) {
2246 	    /* Make $_ available to executed code. */
2247 	    if (reginfo->sv != DEFSV) {
2248 		SAVE_DEFSV;
2249 		DEFSV_set(reginfo->sv);
2250 	    }
2251 
2252 	    if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2253 		  && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2254 		/* prepare for quick setting of pos */
2255 #ifdef PERL_OLD_COPY_ON_WRITE
2256 		if (SvIsCOW(reginfo->sv))
2257 		    sv_force_normal_flags(reginfo->sv, 0);
2258 #endif
2259 		mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2260 				 &PL_vtbl_mglob, NULL, 0);
2261 		mg->mg_len = -1;
2262 	    }
2263 	    PL_reg_magic    = mg;
2264 	    PL_reg_oldpos   = mg->mg_len;
2265 	    SAVEDESTRUCTOR_X(restore_pos, prog);
2266         }
2267         if (!PL_reg_curpm) {
2268 	    Newxz(PL_reg_curpm, 1, PMOP);
2269 #ifdef USE_ITHREADS
2270             {
2271 		SV* const repointer = newSViv(0);
2272                 /* this regexp is also owned by the new PL_reg_curpm, which
2273 		   will try to free it.  */
2274                 av_push(PL_regex_padav,repointer);
2275                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2276                 PL_regex_pad = AvARRAY(PL_regex_padav);
2277             }
2278 #endif
2279         }
2280 #ifdef USE_ITHREADS
2281 	/* It seems that non-ithreads works both with and without this code.
2282 	   So for efficiency reasons it seems best not to have the code
2283 	   compiled when it is not needed.  */
2284 	/* This is safe against NULLs: */
2285 	ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
2286 	/* PM_reg_curpm owns a reference to this regexp.  */
2287 	ReREFCNT_inc(prog);
2288 #endif
2289 	PM_SETRE(PL_reg_curpm, prog);
2290 	PL_reg_oldcurpm = PL_curpm;
2291 	PL_curpm = PL_reg_curpm;
2292 	if (RXp_MATCH_COPIED(prog)) {
2293 	    /*  Here is a serious problem: we cannot rewrite subbeg,
2294 		since it may be needed if this match fails.  Thus
2295 		$` inside (?{}) could fail... */
2296 	    PL_reg_oldsaved = prog->subbeg;
2297 	    PL_reg_oldsavedlen = prog->sublen;
2298 #ifdef PERL_OLD_COPY_ON_WRITE
2299 	    PL_nrs = prog->saved_copy;
2300 #endif
2301 	    RXp_MATCH_COPIED_off(prog);
2302 	}
2303 	else
2304 	    PL_reg_oldsaved = NULL;
2305 	prog->subbeg = PL_bostr;
2306 	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2307     }
2308     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2309     prog->offs[0].start = *startpos - PL_bostr;
2310     PL_reginput = *startpos;
2311     PL_reglastparen = &prog->lastparen;
2312     PL_reglastcloseparen = &prog->lastcloseparen;
2313     prog->lastparen = 0;
2314     prog->lastcloseparen = 0;
2315     PL_regsize = 0;
2316     PL_regoffs = prog->offs;
2317     if (PL_reg_start_tmpl <= prog->nparens) {
2318 	PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2319         if(PL_reg_start_tmp)
2320             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2321         else
2322             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2323     }
2324 
2325     /* XXXX What this code is doing here?!!!  There should be no need
2326        to do this again and again, PL_reglastparen should take care of
2327        this!  --ilya*/
2328 
2329     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2330      * Actually, the code in regcppop() (which Ilya may be meaning by
2331      * PL_reglastparen), is not needed at all by the test suite
2332      * (op/regexp, op/pat, op/split), but that code is needed otherwise
2333      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2334      * Meanwhile, this code *is* needed for the
2335      * above-mentioned test suite tests to succeed.  The common theme
2336      * on those tests seems to be returning null fields from matches.
2337      * --jhi updated by dapm */
2338 #if 1
2339     if (prog->nparens) {
2340 	regexp_paren_pair *pp = PL_regoffs;
2341 	register I32 i;
2342 	for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2343 	    ++pp;
2344 	    pp->start = -1;
2345 	    pp->end = -1;
2346 	}
2347     }
2348 #endif
2349     REGCP_SET(lastcp);
2350     if (regmatch(reginfo, progi->program + 1)) {
2351 	PL_regoffs[0].end = PL_reginput - PL_bostr;
2352 	return 1;
2353     }
2354     if (reginfo->cutpoint)
2355         *startpos= reginfo->cutpoint;
2356     REGCP_UNWIND(lastcp);
2357     return 0;
2358 }
2359 
2360 
2361 #define sayYES goto yes
2362 #define sayNO goto no
2363 #define sayNO_SILENT goto no_silent
2364 
2365 /* we dont use STMT_START/END here because it leads to
2366    "unreachable code" warnings, which are bogus, but distracting. */
2367 #define CACHEsayNO \
2368     if (ST.cache_mask) \
2369        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2370     sayNO
2371 
2372 /* this is used to determine how far from the left messages like
2373    'failed...' are printed. It should be set such that messages
2374    are inline with the regop output that created them.
2375 */
2376 #define REPORT_CODE_OFF 32
2377 
2378 
2379 /* Make sure there is a test for this +1 options in re_tests */
2380 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2381 
2382 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2383 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2384 
2385 #define SLAB_FIRST(s) (&(s)->states[0])
2386 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2387 
2388 /* grab a new slab and return the first slot in it */
2389 
2390 STATIC regmatch_state *
2391 S_push_slab(pTHX)
2392 {
2393 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2394     dMY_CXT;
2395 #endif
2396     regmatch_slab *s = PL_regmatch_slab->next;
2397     if (!s) {
2398 	Newx(s, 1, regmatch_slab);
2399 	s->prev = PL_regmatch_slab;
2400 	s->next = NULL;
2401 	PL_regmatch_slab->next = s;
2402     }
2403     PL_regmatch_slab = s;
2404     return SLAB_FIRST(s);
2405 }
2406 
2407 
2408 /* push a new state then goto it */
2409 
2410 #define PUSH_STATE_GOTO(state, node) \
2411     scan = node; \
2412     st->resume_state = state; \
2413     goto push_state;
2414 
2415 /* push a new state with success backtracking, then goto it */
2416 
2417 #define PUSH_YES_STATE_GOTO(state, node) \
2418     scan = node; \
2419     st->resume_state = state; \
2420     goto push_yes_state;
2421 
2422 
2423 
2424 /*
2425 
2426 regmatch() - main matching routine
2427 
2428 This is basically one big switch statement in a loop. We execute an op,
2429 set 'next' to point the next op, and continue. If we come to a point which
2430 we may need to backtrack to on failure such as (A|B|C), we push a
2431 backtrack state onto the backtrack stack. On failure, we pop the top
2432 state, and re-enter the loop at the state indicated. If there are no more
2433 states to pop, we return failure.
2434 
2435 Sometimes we also need to backtrack on success; for example /A+/, where
2436 after successfully matching one A, we need to go back and try to
2437 match another one; similarly for lookahead assertions: if the assertion
2438 completes successfully, we backtrack to the state just before the assertion
2439 and then carry on.  In these cases, the pushed state is marked as
2440 'backtrack on success too'. This marking is in fact done by a chain of
2441 pointers, each pointing to the previous 'yes' state. On success, we pop to
2442 the nearest yes state, discarding any intermediate failure-only states.
2443 Sometimes a yes state is pushed just to force some cleanup code to be
2444 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2445 it to free the inner regex.
2446 
2447 Note that failure backtracking rewinds the cursor position, while
2448 success backtracking leaves it alone.
2449 
2450 A pattern is complete when the END op is executed, while a subpattern
2451 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2452 ops trigger the "pop to last yes state if any, otherwise return true"
2453 behaviour.
2454 
2455 A common convention in this function is to use A and B to refer to the two
2456 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2457 the subpattern to be matched possibly multiple times, while B is the entire
2458 rest of the pattern. Variable and state names reflect this convention.
2459 
2460 The states in the main switch are the union of ops and failure/success of
2461 substates associated with with that op.  For example, IFMATCH is the op
2462 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2463 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2464 successfully matched A and IFMATCH_A_fail is a state saying that we have
2465 just failed to match A. Resume states always come in pairs. The backtrack
2466 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2467 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2468 on success or failure.
2469 
2470 The struct that holds a backtracking state is actually a big union, with
2471 one variant for each major type of op. The variable st points to the
2472 top-most backtrack struct. To make the code clearer, within each
2473 block of code we #define ST to alias the relevant union.
2474 
2475 Here's a concrete example of a (vastly oversimplified) IFMATCH
2476 implementation:
2477 
2478     switch (state) {
2479     ....
2480 
2481 #define ST st->u.ifmatch
2482 
2483     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2484 	ST.foo = ...; // some state we wish to save
2485 	...
2486 	// push a yes backtrack state with a resume value of
2487 	// IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2488 	// first node of A:
2489 	PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2490 	// NOTREACHED
2491 
2492     case IFMATCH_A: // we have successfully executed A; now continue with B
2493 	next = B;
2494 	bar = ST.foo; // do something with the preserved value
2495 	break;
2496 
2497     case IFMATCH_A_fail: // A failed, so the assertion failed
2498 	...;   // do some housekeeping, then ...
2499 	sayNO; // propagate the failure
2500 
2501 #undef ST
2502 
2503     ...
2504     }
2505 
2506 For any old-timers reading this who are familiar with the old recursive
2507 approach, the code above is equivalent to:
2508 
2509     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2510     {
2511 	int foo = ...
2512 	...
2513 	if (regmatch(A)) {
2514 	    next = B;
2515 	    bar = foo;
2516 	    break;
2517 	}
2518 	...;   // do some housekeeping, then ...
2519 	sayNO; // propagate the failure
2520     }
2521 
2522 The topmost backtrack state, pointed to by st, is usually free. If you
2523 want to claim it, populate any ST.foo fields in it with values you wish to
2524 save, then do one of
2525 
2526 	PUSH_STATE_GOTO(resume_state, node);
2527 	PUSH_YES_STATE_GOTO(resume_state, node);
2528 
2529 which sets that backtrack state's resume value to 'resume_state', pushes a
2530 new free entry to the top of the backtrack stack, then goes to 'node'.
2531 On backtracking, the free slot is popped, and the saved state becomes the
2532 new free state. An ST.foo field in this new top state can be temporarily
2533 accessed to retrieve values, but once the main loop is re-entered, it
2534 becomes available for reuse.
2535 
2536 Note that the depth of the backtrack stack constantly increases during the
2537 left-to-right execution of the pattern, rather than going up and down with
2538 the pattern nesting. For example the stack is at its maximum at Z at the
2539 end of the pattern, rather than at X in the following:
2540 
2541     /(((X)+)+)+....(Y)+....Z/
2542 
2543 The only exceptions to this are lookahead/behind assertions and the cut,
2544 (?>A), which pop all the backtrack states associated with A before
2545 continuing.
2546 
2547 Bascktrack state structs are allocated in slabs of about 4K in size.
2548 PL_regmatch_state and st always point to the currently active state,
2549 and PL_regmatch_slab points to the slab currently containing
2550 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2551 allocated, and is never freed until interpreter destruction. When the slab
2552 is full, a new one is allocated and chained to the end. At exit from
2553 regmatch(), slabs allocated since entry are freed.
2554 
2555 */
2556 
2557 
2558 #define DEBUG_STATE_pp(pp)				    \
2559     DEBUG_STATE_r({					    \
2560 	DUMP_EXEC_POS(locinput, scan, do_utf8);		    \
2561 	PerlIO_printf(Perl_debug_log,			    \
2562 	    "    %*s"pp" %s%s%s%s%s\n",			    \
2563 	    depth*2, "",				    \
2564 	    PL_reg_name[st->resume_state],                     \
2565 	    ((st==yes_state||st==mark_state) ? "[" : ""),   \
2566 	    ((st==yes_state) ? "Y" : ""),                   \
2567 	    ((st==mark_state) ? "M" : ""),                  \
2568 	    ((st==yes_state||st==mark_state) ? "]" : "")    \
2569 	);                                                  \
2570     });
2571 
2572 
2573 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2574 
2575 #ifdef DEBUGGING
2576 
2577 STATIC void
2578 S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
2579     const char *start, const char *end, const char *blurb)
2580 {
2581     const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2582 
2583     PERL_ARGS_ASSERT_DEBUG_START_MATCH;
2584 
2585     if (!PL_colorset)
2586             reginitcolors();
2587     {
2588         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2589             RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
2590 
2591         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2592             start, end - start, 60);
2593 
2594         PerlIO_printf(Perl_debug_log,
2595             "%s%s REx%s %s against %s\n",
2596 		       PL_colors[4], blurb, PL_colors[5], s0, s1);
2597 
2598         if (do_utf8||utf8_pat)
2599             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2600                 utf8_pat ? "pattern" : "",
2601                 utf8_pat && do_utf8 ? " and " : "",
2602                 do_utf8 ? "string" : ""
2603             );
2604     }
2605 }
2606 
2607 STATIC void
2608 S_dump_exec_pos(pTHX_ const char *locinput,
2609                       const regnode *scan,
2610                       const char *loc_regeol,
2611                       const char *loc_bostr,
2612                       const char *loc_reg_starttry,
2613                       const bool do_utf8)
2614 {
2615     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2616     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2617     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2618     /* The part of the string before starttry has one color
2619        (pref0_len chars), between starttry and current
2620        position another one (pref_len - pref0_len chars),
2621        after the current position the third one.
2622        We assume that pref0_len <= pref_len, otherwise we
2623        decrease pref0_len.  */
2624     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2625 	? (5 + taill) - l : locinput - loc_bostr;
2626     int pref0_len;
2627 
2628     PERL_ARGS_ASSERT_DUMP_EXEC_POS;
2629 
2630     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2631 	pref_len++;
2632     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2633     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2634 	l = ( loc_regeol - locinput > (5 + taill) - pref_len
2635 	      ? (5 + taill) - pref_len : loc_regeol - locinput);
2636     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2637 	l--;
2638     if (pref0_len < 0)
2639 	pref0_len = 0;
2640     if (pref0_len > pref_len)
2641 	pref0_len = pref_len;
2642     {
2643 	const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2644 
2645 	RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2646 	    (locinput - pref_len),pref0_len, 60, 4, 5);
2647 
2648 	RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2649 		    (locinput - pref_len + pref0_len),
2650 		    pref_len - pref0_len, 60, 2, 3);
2651 
2652 	RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2653 		    locinput, loc_regeol - locinput, 10, 0, 1);
2654 
2655 	const STRLEN tlen=len0+len1+len2;
2656 	PerlIO_printf(Perl_debug_log,
2657 		    "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2658 		    (IV)(locinput - loc_bostr),
2659 		    len0, s0,
2660 		    len1, s1,
2661 		    (docolor ? "" : "> <"),
2662 		    len2, s2,
2663 		    (int)(tlen > 19 ? 0 :  19 - tlen),
2664 		    "");
2665     }
2666 }
2667 
2668 #endif
2669 
2670 /* reg_check_named_buff_matched()
2671  * Checks to see if a named buffer has matched. The data array of
2672  * buffer numbers corresponding to the buffer is expected to reside
2673  * in the regexp->data->data array in the slot stored in the ARG() of
2674  * node involved. Note that this routine doesn't actually care about the
2675  * name, that information is not preserved from compilation to execution.
2676  * Returns the index of the leftmost defined buffer with the given name
2677  * or 0 if non of the buffers matched.
2678  */
2679 STATIC I32
2680 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
2681 {
2682     I32 n;
2683     RXi_GET_DECL(rex,rexi);
2684     SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
2685     I32 *nums=(I32*)SvPVX(sv_dat);
2686 
2687     PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
2688 
2689     for ( n=0; n<SvIVX(sv_dat); n++ ) {
2690         if ((I32)*PL_reglastparen >= nums[n] &&
2691             PL_regoffs[nums[n]].end != -1)
2692         {
2693             return nums[n];
2694         }
2695     }
2696     return 0;
2697 }
2698 
2699 
2700 /* free all slabs above current one  - called during LEAVE_SCOPE */
2701 
2702 STATIC void
2703 S_clear_backtrack_stack(pTHX_ void *p)
2704 {
2705     regmatch_slab *s = PL_regmatch_slab->next;
2706     PERL_UNUSED_ARG(p);
2707 
2708     if (!s)
2709 	return;
2710     PL_regmatch_slab->next = NULL;
2711     while (s) {
2712 	regmatch_slab * const osl = s;
2713 	s = s->next;
2714 	Safefree(osl);
2715     }
2716 }
2717 
2718 
2719 #define SETREX(Re1,Re2) \
2720     if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2721     Re1 = (Re2)
2722 
2723 STATIC I32			/* 0 failure, 1 success */
2724 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2725 {
2726 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2727     dMY_CXT;
2728 #endif
2729     dVAR;
2730     register const bool do_utf8 = PL_reg_match_utf8;
2731     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2732     regexp *rex = reginfo->prog;
2733     RXi_GET_DECL(rex,rexi);
2734     I32	oldsave;
2735     /* the current state. This is a cached copy of PL_regmatch_state */
2736     register regmatch_state *st;
2737     /* cache heavy used fields of st in registers */
2738     register regnode *scan;
2739     register regnode *next;
2740     register U32 n = 0;	/* general value; init to avoid compiler warning */
2741     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2742     register char *locinput = PL_reginput;
2743     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2744 
2745     bool result = 0;	    /* return value of S_regmatch */
2746     int depth = 0;	    /* depth of backtrack stack */
2747     U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2748     const U32 max_nochange_depth =
2749         (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2750         3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2751     regmatch_state *yes_state = NULL; /* state to pop to on success of
2752 							    subpattern */
2753     /* mark_state piggy backs on the yes_state logic so that when we unwind
2754        the stack on success we can update the mark_state as we go */
2755     regmatch_state *mark_state = NULL; /* last mark state we have seen */
2756     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2757     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2758     U32 state_num;
2759     bool no_final = 0;      /* prevent failure from backtracking? */
2760     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2761     char *startpoint = PL_reginput;
2762     SV *popmark = NULL;     /* are we looking for a mark? */
2763     SV *sv_commit = NULL;   /* last mark name seen in failure */
2764     SV *sv_yes_mark = NULL; /* last mark name we have seen
2765                                during a successfull match */
2766     U32 lastopen = 0;       /* last open we saw */
2767     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2768     SV* const oreplsv = GvSV(PL_replgv);
2769     /* these three flags are set by various ops to signal information to
2770      * the very next op. They have a useful lifetime of exactly one loop
2771      * iteration, and are not preserved or restored by state pushes/pops
2772      */
2773     bool sw = 0;	    /* the condition value in (?(cond)a|b) */
2774     bool minmod = 0;	    /* the next "{n,m}" is a "{n,m}?" */
2775     int logical = 0;	    /* the following EVAL is:
2776 				0: (?{...})
2777 				1: (?(?{...})X|Y)
2778 				2: (??{...})
2779 			       or the following IFMATCH/UNLESSM is:
2780 			        false: plain (?=foo)
2781 				true:  used as a condition: (?(?=foo))
2782 			    */
2783 #ifdef DEBUGGING
2784     GET_RE_DEBUG_FLAGS_DECL;
2785 #endif
2786 
2787     PERL_ARGS_ASSERT_REGMATCH;
2788 
2789     DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2790 	    PerlIO_printf(Perl_debug_log,"regmatch start\n");
2791     }));
2792     /* on first ever call to regmatch, allocate first slab */
2793     if (!PL_regmatch_slab) {
2794 	Newx(PL_regmatch_slab, 1, regmatch_slab);
2795 	PL_regmatch_slab->prev = NULL;
2796 	PL_regmatch_slab->next = NULL;
2797 	PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2798     }
2799 
2800     oldsave = PL_savestack_ix;
2801     SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2802     SAVEVPTR(PL_regmatch_slab);
2803     SAVEVPTR(PL_regmatch_state);
2804 
2805     /* grab next free state slot */
2806     st = ++PL_regmatch_state;
2807     if (st >  SLAB_LAST(PL_regmatch_slab))
2808 	st = PL_regmatch_state = S_push_slab(aTHX);
2809 
2810     /* Note that nextchr is a byte even in UTF */
2811     nextchr = UCHARAT(locinput);
2812     scan = prog;
2813     while (scan != NULL) {
2814 
2815         DEBUG_EXECUTE_r( {
2816 	    SV * const prop = sv_newmortal();
2817 	    regnode *rnext=regnext(scan);
2818 	    DUMP_EXEC_POS( locinput, scan, do_utf8 );
2819 	    regprop(rex, prop, scan);
2820 
2821 	    PerlIO_printf(Perl_debug_log,
2822 		    "%3"IVdf":%*s%s(%"IVdf")\n",
2823 		    (IV)(scan - rexi->program), depth*2, "",
2824 		    SvPVX_const(prop),
2825 		    (PL_regkind[OP(scan)] == END || !rnext) ?
2826 		        0 : (IV)(rnext - rexi->program));
2827 	});
2828 
2829 	next = scan + NEXT_OFF(scan);
2830 	if (next == scan)
2831 	    next = NULL;
2832 	state_num = OP(scan);
2833 
2834       reenter_switch:
2835 
2836 	assert(PL_reglastparen == &rex->lastparen);
2837 	assert(PL_reglastcloseparen == &rex->lastcloseparen);
2838 	assert(PL_regoffs == rex->offs);
2839 
2840 	switch (state_num) {
2841 	case BOL:
2842 	    if (locinput == PL_bostr)
2843 	    {
2844 		/* reginfo->till = reginfo->bol; */
2845 		break;
2846 	    }
2847 	    sayNO;
2848 	case MBOL:
2849 	    if (locinput == PL_bostr ||
2850 		((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2851 	    {
2852 		break;
2853 	    }
2854 	    sayNO;
2855 	case SBOL:
2856 	    if (locinput == PL_bostr)
2857 		break;
2858 	    sayNO;
2859 	case GPOS:
2860 	    if (locinput == reginfo->ganch)
2861 		break;
2862 	    sayNO;
2863 
2864 	case KEEPS:
2865 	    /* update the startpoint */
2866 	    st->u.keeper.val = PL_regoffs[0].start;
2867 	    PL_reginput = locinput;
2868 	    PL_regoffs[0].start = locinput - PL_bostr;
2869 	    PUSH_STATE_GOTO(KEEPS_next, next);
2870 	    /*NOT-REACHED*/
2871 	case KEEPS_next_fail:
2872 	    /* rollback the start point change */
2873 	    PL_regoffs[0].start = st->u.keeper.val;
2874 	    sayNO_SILENT;
2875 	    /*NOT-REACHED*/
2876 	case EOL:
2877 		goto seol;
2878 	case MEOL:
2879 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2880 		sayNO;
2881 	    break;
2882 	case SEOL:
2883 	  seol:
2884 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2885 		sayNO;
2886 	    if (PL_regeol - locinput > 1)
2887 		sayNO;
2888 	    break;
2889 	case EOS:
2890 	    if (PL_regeol != locinput)
2891 		sayNO;
2892 	    break;
2893 	case SANY:
2894 	    if (!nextchr && locinput >= PL_regeol)
2895 		sayNO;
2896  	    if (do_utf8) {
2897 	        locinput += PL_utf8skip[nextchr];
2898 		if (locinput > PL_regeol)
2899  		    sayNO;
2900  		nextchr = UCHARAT(locinput);
2901  	    }
2902  	    else
2903  		nextchr = UCHARAT(++locinput);
2904 	    break;
2905 	case CANY:
2906 	    if (!nextchr && locinput >= PL_regeol)
2907 		sayNO;
2908 	    nextchr = UCHARAT(++locinput);
2909 	    break;
2910 	case REG_ANY:
2911 	    if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2912 		sayNO;
2913 	    if (do_utf8) {
2914 		locinput += PL_utf8skip[nextchr];
2915 		if (locinput > PL_regeol)
2916 		    sayNO;
2917 		nextchr = UCHARAT(locinput);
2918 	    }
2919 	    else
2920 		nextchr = UCHARAT(++locinput);
2921 	    break;
2922 
2923 #undef  ST
2924 #define ST st->u.trie
2925         case TRIEC:
2926             /* In this case the charclass data is available inline so
2927                we can fail fast without a lot of extra overhead.
2928              */
2929             if (scan->flags == EXACT || !do_utf8) {
2930                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2931                     DEBUG_EXECUTE_r(
2932                         PerlIO_printf(Perl_debug_log,
2933                     	          "%*s  %sfailed to match trie start class...%s\n",
2934                     	          REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2935                     );
2936                     sayNO_SILENT;
2937                     /* NOTREACHED */
2938                 }
2939             }
2940             /* FALL THROUGH */
2941 	case TRIE:
2942 	    {
2943                 /* what type of TRIE am I? (utf8 makes this contextual) */
2944                 DECL_TRIE_TYPE(scan);
2945 
2946                 /* what trie are we using right now */
2947 		reg_trie_data * const trie
2948         	    = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
2949 		HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
2950                 U32 state = trie->startstate;
2951 
2952         	if (trie->bitmap && trie_type != trie_utf8_fold &&
2953         	    !TRIE_BITMAP_TEST(trie,*locinput)
2954         	) {
2955         	    if (trie->states[ state ].wordnum) {
2956         	         DEBUG_EXECUTE_r(
2957                             PerlIO_printf(Perl_debug_log,
2958                         	          "%*s  %smatched empty string...%s\n",
2959                         	          REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2960                         );
2961         	        break;
2962         	    } else {
2963         	        DEBUG_EXECUTE_r(
2964                             PerlIO_printf(Perl_debug_log,
2965                         	          "%*s  %sfailed to match trie start class...%s\n",
2966                         	          REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2967                         );
2968         	        sayNO_SILENT;
2969         	   }
2970                 }
2971 
2972             {
2973 		U8 *uc = ( U8* )locinput;
2974 
2975 		STRLEN len = 0;
2976 		STRLEN foldlen = 0;
2977 		U8 *uscan = (U8*)NULL;
2978 		STRLEN bufflen=0;
2979 		SV *sv_accept_buff = NULL;
2980 		U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2981 
2982 	    	ST.accepted = 0; /* how many accepting states we have seen */
2983 		ST.B = next;
2984 		ST.jump = trie->jump;
2985 		ST.me = scan;
2986 	        /*
2987         	   traverse the TRIE keeping track of all accepting states
2988         	   we transition through until we get to a failing node.
2989         	*/
2990 
2991 		while ( state && uc <= (U8*)PL_regeol ) {
2992                     U32 base = trie->states[ state ].trans.base;
2993                     UV uvc = 0;
2994                     U16 charid;
2995                     /* We use charid to hold the wordnum as we don't use it
2996                        for charid until after we have done the wordnum logic.
2997                        We define an alias just so that the wordnum logic reads
2998                        more naturally. */
2999 
3000 #define got_wordnum charid
3001                     got_wordnum = trie->states[ state ].wordnum;
3002 
3003 		    if ( got_wordnum ) {
3004 			if ( ! ST.accepted ) {
3005 			    ENTER;
3006 			    SAVETMPS; /* XXX is this necessary? dmq */
3007 			    bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
3008 			    sv_accept_buff=newSV(bufflen *
3009 					    sizeof(reg_trie_accepted) - 1);
3010 			    SvCUR_set(sv_accept_buff, 0);
3011 			    SvPOK_on(sv_accept_buff);
3012 			    sv_2mortal(sv_accept_buff);
3013 			    SAVETMPS;
3014 			    ST.accept_buff =
3015 				(reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
3016 			}
3017 			do {
3018 			    if (ST.accepted >= bufflen) {
3019 				bufflen *= 2;
3020 				ST.accept_buff =(reg_trie_accepted*)
3021 				    SvGROW(sv_accept_buff,
3022 				       	bufflen * sizeof(reg_trie_accepted));
3023 			    }
3024 			    SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
3025 				+ sizeof(reg_trie_accepted));
3026 
3027 
3028 			    ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3029 			    ST.accept_buff[ST.accepted].endpos = uc;
3030 			    ++ST.accepted;
3031 		        } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3032 		    }
3033 #undef got_wordnum
3034 
3035 		    DEBUG_TRIE_EXECUTE_r({
3036 		                DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3037 			        PerlIO_printf( Perl_debug_log,
3038 			            "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
3039 			            2+depth * 2, "", PL_colors[4],
3040 			            (UV)state, (UV)ST.accepted );
3041 		    });
3042 
3043 		    if ( base ) {
3044 			REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3045 					     uscan, len, uvc, charid, foldlen,
3046 					     foldbuf, uniflags);
3047 
3048 			if (charid &&
3049 			     (base + charid > trie->uniquecharcount )
3050 			     && (base + charid - 1 - trie->uniquecharcount
3051 				    < trie->lasttrans)
3052 			     && trie->trans[base + charid - 1 -
3053 				    trie->uniquecharcount].check == state)
3054 			{
3055 			    state = trie->trans[base + charid - 1 -
3056 				trie->uniquecharcount ].next;
3057 			}
3058 			else {
3059 			    state = 0;
3060 			}
3061 			uc += len;
3062 
3063 		    }
3064 		    else {
3065 			state = 0;
3066 		    }
3067 		    DEBUG_TRIE_EXECUTE_r(
3068 		        PerlIO_printf( Perl_debug_log,
3069 		            "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3070 		            charid, uvc, (UV)state, PL_colors[5] );
3071 		    );
3072 		}
3073 		if (!ST.accepted )
3074 		   sayNO;
3075 
3076 		DEBUG_EXECUTE_r(
3077 		    PerlIO_printf( Perl_debug_log,
3078 			"%*s  %sgot %"IVdf" possible matches%s\n",
3079 			REPORT_CODE_OFF + depth * 2, "",
3080 			PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3081 		);
3082 	    }}
3083             goto trie_first_try; /* jump into the fail handler */
3084 	    /* NOTREACHED */
3085 	case TRIE_next_fail: /* we failed - try next alterative */
3086             if ( ST.jump) {
3087                 REGCP_UNWIND(ST.cp);
3088 	        for (n = *PL_reglastparen; n > ST.lastparen; n--)
3089 		    PL_regoffs[n].end = -1;
3090 	        *PL_reglastparen = n;
3091 	    }
3092           trie_first_try:
3093             if (do_cutgroup) {
3094                 do_cutgroup = 0;
3095                 no_final = 0;
3096             }
3097 
3098             if ( ST.jump) {
3099                 ST.lastparen = *PL_reglastparen;
3100 	        REGCP_SET(ST.cp);
3101             }
3102 	    if ( ST.accepted == 1 ) {
3103 		/* only one choice left - just continue */
3104 		DEBUG_EXECUTE_r({
3105 		    AV *const trie_words
3106 			= MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3107 		    SV ** const tmp = av_fetch( trie_words,
3108 		        ST.accept_buff[ 0 ].wordnum-1, 0 );
3109 		    SV *sv= tmp ? sv_newmortal() : NULL;
3110 
3111 		    PerlIO_printf( Perl_debug_log,
3112 			"%*s  %sonly one match left: #%d <%s>%s\n",
3113 			REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3114 			ST.accept_buff[ 0 ].wordnum,
3115 			tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3116 	                        PL_colors[0], PL_colors[1],
3117 	                        (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3118                             )
3119 			: "not compiled under -Dr",
3120 			PL_colors[5] );
3121 		});
3122 		PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3123 		/* in this case we free tmps/leave before we call regmatch
3124 		   as we wont be using accept_buff again. */
3125 
3126 		locinput = PL_reginput;
3127 		nextchr = UCHARAT(locinput);
3128     		if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3129     		    scan = ST.B;
3130     		else
3131     		    scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3132 		if (!has_cutgroup) {
3133 		    FREETMPS;
3134 		    LEAVE;
3135                 } else {
3136                     ST.accepted--;
3137                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3138                 }
3139 
3140 		continue; /* execute rest of RE */
3141 	    }
3142 
3143 	    if ( !ST.accepted-- ) {
3144 	        DEBUG_EXECUTE_r({
3145 		    PerlIO_printf( Perl_debug_log,
3146 			"%*s  %sTRIE failed...%s\n",
3147 			REPORT_CODE_OFF+depth*2, "",
3148 			PL_colors[4],
3149 			PL_colors[5] );
3150 		});
3151 		FREETMPS;
3152 		LEAVE;
3153 		sayNO_SILENT;
3154 		/*NOTREACHED*/
3155 	    }
3156 
3157 	    /*
3158 	       There are at least two accepting states left.  Presumably
3159 	       the number of accepting states is going to be low,
3160 	       typically two. So we simply scan through to find the one
3161 	       with lowest wordnum.  Once we find it, we swap the last
3162 	       state into its place and decrement the size. We then try to
3163 	       match the rest of the pattern at the point where the word
3164 	       ends. If we succeed, control just continues along the
3165 	       regex; if we fail we return here to try the next accepting
3166 	       state
3167 	     */
3168 
3169 	    {
3170 		U32 best = 0;
3171 		U32 cur;
3172 		for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3173 		    DEBUG_TRIE_EXECUTE_r(
3174 			PerlIO_printf( Perl_debug_log,
3175 			    "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3176 			    REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3177 			    (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3178 			    ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3179 		    );
3180 
3181 		    if (ST.accept_buff[cur].wordnum <
3182 			    ST.accept_buff[best].wordnum)
3183 			best = cur;
3184 		}
3185 
3186 		DEBUG_EXECUTE_r({
3187 		    AV *const trie_words
3188 			= MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3189 		    SV ** const tmp = av_fetch( trie_words,
3190 		        ST.accept_buff[ best ].wordnum - 1, 0 );
3191 		    regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3192 		                    ST.B :
3193 		                    ST.me + ST.jump[ST.accept_buff[best].wordnum];
3194 		    SV *sv= tmp ? sv_newmortal() : NULL;
3195 
3196 		    PerlIO_printf( Perl_debug_log,
3197 		        "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3198 			REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3199 			ST.accept_buff[best].wordnum,
3200 			tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3201 	                        PL_colors[0], PL_colors[1],
3202 	                        (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3203                             ) : "not compiled under -Dr",
3204 			    REG_NODE_NUM(nextop),
3205 			PL_colors[5] );
3206 		});
3207 
3208 		if ( best<ST.accepted ) {
3209 		    reg_trie_accepted tmp = ST.accept_buff[ best ];
3210 		    ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3211 		    ST.accept_buff[ ST.accepted ] = tmp;
3212 		    best = ST.accepted;
3213 		}
3214 		PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3215 		if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3216 		    scan = ST.B;
3217 		} else {
3218 		    scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3219                 }
3220                 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3221                 /* NOTREACHED */
3222 	    }
3223 	    /* NOTREACHED */
3224         case TRIE_next:
3225 	    /* we dont want to throw this away, see bug 57042*/
3226 	    if (oreplsv != GvSV(PL_replgv))
3227 		sv_setsv(oreplsv, GvSV(PL_replgv));
3228             FREETMPS;
3229 	    LEAVE;
3230 	    sayYES;
3231 #undef  ST
3232 
3233 	case EXACT: {
3234 	    char *s = STRING(scan);
3235 	    ln = STR_LEN(scan);
3236 	    if (do_utf8 != UTF) {
3237 		/* The target and the pattern have differing utf8ness. */
3238 		char *l = locinput;
3239 		const char * const e = s + ln;
3240 
3241 		if (do_utf8) {
3242 		    /* The target is utf8, the pattern is not utf8. */
3243 		    while (s < e) {
3244 			STRLEN ulen;
3245 			if (l >= PL_regeol)
3246 			     sayNO;
3247 			if (NATIVE_TO_UNI(*(U8*)s) !=
3248 			    utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3249 					    uniflags))
3250 			     sayNO;
3251 			l += ulen;
3252 			s ++;
3253 		    }
3254 		}
3255 		else {
3256 		    /* The target is not utf8, the pattern is utf8. */
3257 		    while (s < e) {
3258 			STRLEN ulen;
3259 			if (l >= PL_regeol)
3260 			    sayNO;
3261 			if (NATIVE_TO_UNI(*((U8*)l)) !=
3262 			    utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3263 					   uniflags))
3264 			    sayNO;
3265 			s += ulen;
3266 			l ++;
3267 		    }
3268 		}
3269 		locinput = l;
3270 		nextchr = UCHARAT(locinput);
3271 		break;
3272 	    }
3273 	    /* The target and the pattern have the same utf8ness. */
3274 	    /* Inline the first character, for speed. */
3275 	    if (UCHARAT(s) != nextchr)
3276 		sayNO;
3277 	    if (PL_regeol - locinput < ln)
3278 		sayNO;
3279 	    if (ln > 1 && memNE(s, locinput, ln))
3280 		sayNO;
3281 	    locinput += ln;
3282 	    nextchr = UCHARAT(locinput);
3283 	    break;
3284 	    }
3285 	case EXACTFL:
3286 	    PL_reg_flags |= RF_tainted;
3287 	    /* FALL THROUGH */
3288 	case EXACTF: {
3289 	    char * const s = STRING(scan);
3290 	    ln = STR_LEN(scan);
3291 
3292 	    if (do_utf8 || UTF) {
3293 	      /* Either target or the pattern are utf8. */
3294 		const char * const l = locinput;
3295 		char *e = PL_regeol;
3296 
3297 		if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3298 			       l, &e, 0,  do_utf8)) {
3299 		     /* One more case for the sharp s:
3300 		      * pack("U0U*", 0xDF) =~ /ss/i,
3301 		      * the 0xC3 0x9F are the UTF-8
3302 		      * byte sequence for the U+00DF. */
3303 
3304 		     if (!(do_utf8 &&
3305 		           toLOWER(s[0]) == 's' &&
3306 			   ln >= 2 &&
3307 			   toLOWER(s[1]) == 's' &&
3308 			   (U8)l[0] == 0xC3 &&
3309 			   e - l >= 2 &&
3310 			   (U8)l[1] == 0x9F))
3311 			  sayNO;
3312 		}
3313 		locinput = e;
3314 		nextchr = UCHARAT(locinput);
3315 		break;
3316 	    }
3317 
3318 	    /* Neither the target and the pattern are utf8. */
3319 
3320 	    /* Inline the first character, for speed. */
3321 	    if (UCHARAT(s) != nextchr &&
3322 		UCHARAT(s) != ((OP(scan) == EXACTF)
3323 			       ? PL_fold : PL_fold_locale)[nextchr])
3324 		sayNO;
3325 	    if (PL_regeol - locinput < ln)
3326 		sayNO;
3327 	    if (ln > 1 && (OP(scan) == EXACTF
3328 			   ? ibcmp(s, locinput, ln)
3329 			   : ibcmp_locale(s, locinput, ln)))
3330 		sayNO;
3331 	    locinput += ln;
3332 	    nextchr = UCHARAT(locinput);
3333 	    break;
3334 	    }
3335 	case ANYOF:
3336 	    if (do_utf8) {
3337 	        STRLEN inclasslen = PL_regeol - locinput;
3338 
3339 	        if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3340 		    goto anyof_fail;
3341 		if (locinput >= PL_regeol)
3342 		    sayNO;
3343 		locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3344 		nextchr = UCHARAT(locinput);
3345 		break;
3346 	    }
3347 	    else {
3348 		if (nextchr < 0)
3349 		    nextchr = UCHARAT(locinput);
3350 		if (!REGINCLASS(rex, scan, (U8*)locinput))
3351 		    goto anyof_fail;
3352 		if (!nextchr && locinput >= PL_regeol)
3353 		    sayNO;
3354 		nextchr = UCHARAT(++locinput);
3355 		break;
3356 	    }
3357 	anyof_fail:
3358 	    /* If we might have the case of the German sharp s
3359 	     * in a casefolding Unicode character class. */
3360 
3361 	    if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3362 		 locinput += SHARP_S_SKIP;
3363 		 nextchr = UCHARAT(locinput);
3364 	    }
3365 	    else
3366 		 sayNO;
3367 	    break;
3368 	case ALNUML:
3369 	    PL_reg_flags |= RF_tainted;
3370 	    /* FALL THROUGH */
3371 	case ALNUM:
3372 	    if (!nextchr)
3373 		sayNO;
3374 	    if (do_utf8) {
3375 		LOAD_UTF8_CHARCLASS_ALNUM();
3376 		if (!(OP(scan) == ALNUM
3377 		      ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3378 		      : isALNUM_LC_utf8((U8*)locinput)))
3379 		{
3380 		    sayNO;
3381 		}
3382 		locinput += PL_utf8skip[nextchr];
3383 		nextchr = UCHARAT(locinput);
3384 		break;
3385 	    }
3386 	    if (!(OP(scan) == ALNUM
3387 		  ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3388 		sayNO;
3389 	    nextchr = UCHARAT(++locinput);
3390 	    break;
3391 	case NALNUML:
3392 	    PL_reg_flags |= RF_tainted;
3393 	    /* FALL THROUGH */
3394 	case NALNUM:
3395 	    if (!nextchr && locinput >= PL_regeol)
3396 		sayNO;
3397 	    if (do_utf8) {
3398 		LOAD_UTF8_CHARCLASS_ALNUM();
3399 		if (OP(scan) == NALNUM
3400 		    ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3401 		    : isALNUM_LC_utf8((U8*)locinput))
3402 		{
3403 		    sayNO;
3404 		}
3405 		locinput += PL_utf8skip[nextchr];
3406 		nextchr = UCHARAT(locinput);
3407 		break;
3408 	    }
3409 	    if (OP(scan) == NALNUM
3410 		? isALNUM(nextchr) : isALNUM_LC(nextchr))
3411 		sayNO;
3412 	    nextchr = UCHARAT(++locinput);
3413 	    break;
3414 	case BOUNDL:
3415 	case NBOUNDL:
3416 	    PL_reg_flags |= RF_tainted;
3417 	    /* FALL THROUGH */
3418 	case BOUND:
3419 	case NBOUND:
3420 	    /* was last char in word? */
3421 	    if (do_utf8) {
3422 		if (locinput == PL_bostr)
3423 		    ln = '\n';
3424 		else {
3425 		    const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3426 
3427 		    ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3428 		}
3429 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3430 		    ln = isALNUM_uni(ln);
3431 		    LOAD_UTF8_CHARCLASS_ALNUM();
3432 		    n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3433 		}
3434 		else {
3435 		    ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3436 		    n = isALNUM_LC_utf8((U8*)locinput);
3437 		}
3438 	    }
3439 	    else {
3440 		ln = (locinput != PL_bostr) ?
3441 		    UCHARAT(locinput - 1) : '\n';
3442 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3443 		    ln = isALNUM(ln);
3444 		    n = isALNUM(nextchr);
3445 		}
3446 		else {
3447 		    ln = isALNUM_LC(ln);
3448 		    n = isALNUM_LC(nextchr);
3449 		}
3450 	    }
3451 	    if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3452 				    OP(scan) == BOUNDL))
3453 		    sayNO;
3454 	    break;
3455 	case SPACEL:
3456 	    PL_reg_flags |= RF_tainted;
3457 	    /* FALL THROUGH */
3458 	case SPACE:
3459 	    if (!nextchr)
3460 		sayNO;
3461 	    if (do_utf8) {
3462 		if (UTF8_IS_CONTINUED(nextchr)) {
3463 		    LOAD_UTF8_CHARCLASS_SPACE();
3464 		    if (!(OP(scan) == SPACE
3465 			  ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3466 			  : isSPACE_LC_utf8((U8*)locinput)))
3467 		    {
3468 			sayNO;
3469 		    }
3470 		    locinput += PL_utf8skip[nextchr];
3471 		    nextchr = UCHARAT(locinput);
3472 		    break;
3473 		}
3474 		if (!(OP(scan) == SPACE
3475 		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3476 		    sayNO;
3477 		nextchr = UCHARAT(++locinput);
3478 	    }
3479 	    else {
3480 		if (!(OP(scan) == SPACE
3481 		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3482 		    sayNO;
3483 		nextchr = UCHARAT(++locinput);
3484 	    }
3485 	    break;
3486 	case NSPACEL:
3487 	    PL_reg_flags |= RF_tainted;
3488 	    /* FALL THROUGH */
3489 	case NSPACE:
3490 	    if (!nextchr && locinput >= PL_regeol)
3491 		sayNO;
3492 	    if (do_utf8) {
3493 		LOAD_UTF8_CHARCLASS_SPACE();
3494 		if (OP(scan) == NSPACE
3495 		    ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3496 		    : isSPACE_LC_utf8((U8*)locinput))
3497 		{
3498 		    sayNO;
3499 		}
3500 		locinput += PL_utf8skip[nextchr];
3501 		nextchr = UCHARAT(locinput);
3502 		break;
3503 	    }
3504 	    if (OP(scan) == NSPACE
3505 		? isSPACE(nextchr) : isSPACE_LC(nextchr))
3506 		sayNO;
3507 	    nextchr = UCHARAT(++locinput);
3508 	    break;
3509 	case DIGITL:
3510 	    PL_reg_flags |= RF_tainted;
3511 	    /* FALL THROUGH */
3512 	case DIGIT:
3513 	    if (!nextchr)
3514 		sayNO;
3515 	    if (do_utf8) {
3516 		LOAD_UTF8_CHARCLASS_DIGIT();
3517 		if (!(OP(scan) == DIGIT
3518 		      ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3519 		      : isDIGIT_LC_utf8((U8*)locinput)))
3520 		{
3521 		    sayNO;
3522 		}
3523 		locinput += PL_utf8skip[nextchr];
3524 		nextchr = UCHARAT(locinput);
3525 		break;
3526 	    }
3527 	    if (!(OP(scan) == DIGIT
3528 		  ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3529 		sayNO;
3530 	    nextchr = UCHARAT(++locinput);
3531 	    break;
3532 	case NDIGITL:
3533 	    PL_reg_flags |= RF_tainted;
3534 	    /* FALL THROUGH */
3535 	case NDIGIT:
3536 	    if (!nextchr && locinput >= PL_regeol)
3537 		sayNO;
3538 	    if (do_utf8) {
3539 		LOAD_UTF8_CHARCLASS_DIGIT();
3540 		if (OP(scan) == NDIGIT
3541 		    ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3542 		    : isDIGIT_LC_utf8((U8*)locinput))
3543 		{
3544 		    sayNO;
3545 		}
3546 		locinput += PL_utf8skip[nextchr];
3547 		nextchr = UCHARAT(locinput);
3548 		break;
3549 	    }
3550 	    if (OP(scan) == NDIGIT
3551 		? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3552 		sayNO;
3553 	    nextchr = UCHARAT(++locinput);
3554 	    break;
3555 	case CLUMP:
3556 	    if (locinput >= PL_regeol)
3557 		sayNO;
3558 	    if  (do_utf8) {
3559 		LOAD_UTF8_CHARCLASS_MARK();
3560 		if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3561 		    sayNO;
3562 		locinput += PL_utf8skip[nextchr];
3563 		while (locinput < PL_regeol &&
3564 		       swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3565 		    locinput += UTF8SKIP(locinput);
3566 		if (locinput > PL_regeol)
3567 		    sayNO;
3568 	    }
3569 	    else
3570 	       locinput++;
3571 	    nextchr = UCHARAT(locinput);
3572 	    break;
3573 
3574 	case NREFFL:
3575 	{
3576 	    char *s;
3577 	    char type;
3578 	    PL_reg_flags |= RF_tainted;
3579 	    /* FALL THROUGH */
3580 	case NREF:
3581 	case NREFF:
3582 	    type = OP(scan);
3583 	    n = reg_check_named_buff_matched(rex,scan);
3584 
3585             if ( n ) {
3586                 type = REF + ( type - NREF );
3587                 goto do_ref;
3588             } else {
3589                 sayNO;
3590             }
3591             /* unreached */
3592 	case REFFL:
3593 	    PL_reg_flags |= RF_tainted;
3594 	    /* FALL THROUGH */
3595         case REF:
3596 	case REFF:
3597 	    n = ARG(scan);  /* which paren pair */
3598 	    type = OP(scan);
3599 	  do_ref:
3600 	    ln = PL_regoffs[n].start;
3601 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
3602 	    if (*PL_reglastparen < n || ln == -1)
3603 		sayNO;			/* Do not match unless seen CLOSEn. */
3604 	    if (ln == PL_regoffs[n].end)
3605 		break;
3606 
3607 	    s = PL_bostr + ln;
3608 	    if (do_utf8 && type != REF) {	/* REF can do byte comparison */
3609 		char *l = locinput;
3610 		const char *e = PL_bostr + PL_regoffs[n].end;
3611 		/*
3612 		 * Note that we can't do the "other character" lookup trick as
3613 		 * in the 8-bit case (no pun intended) because in Unicode we
3614 		 * have to map both upper and title case to lower case.
3615 		 */
3616 		if (type == REFF) {
3617 		    while (s < e) {
3618 			STRLEN ulen1, ulen2;
3619 			U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3620 			U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3621 
3622 			if (l >= PL_regeol)
3623 			    sayNO;
3624 			toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3625 			toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3626 			if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3627 			    sayNO;
3628 			s += ulen1;
3629 			l += ulen2;
3630 		    }
3631 		}
3632 		locinput = l;
3633 		nextchr = UCHARAT(locinput);
3634 		break;
3635 	    }
3636 
3637 	    /* Inline the first character, for speed. */
3638 	    if (UCHARAT(s) != nextchr &&
3639 		(type == REF ||
3640 		 (UCHARAT(s) != (type == REFF
3641 				  ? PL_fold : PL_fold_locale)[nextchr])))
3642 		sayNO;
3643 	    ln = PL_regoffs[n].end - ln;
3644 	    if (locinput + ln > PL_regeol)
3645 		sayNO;
3646 	    if (ln > 1 && (type == REF
3647 			   ? memNE(s, locinput, ln)
3648 			   : (type == REFF
3649 			      ? ibcmp(s, locinput, ln)
3650 			      : ibcmp_locale(s, locinput, ln))))
3651 		sayNO;
3652 	    locinput += ln;
3653 	    nextchr = UCHARAT(locinput);
3654 	    break;
3655 	}
3656 	case NOTHING:
3657 	case TAIL:
3658 	    break;
3659 	case BACK:
3660 	    break;
3661 
3662 #undef  ST
3663 #define ST st->u.eval
3664 	{
3665 	    SV *ret;
3666             regexp *re;
3667             regexp_internal *rei;
3668             regnode *startpoint;
3669 
3670 	case GOSTART:
3671 	case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3672 	    if (cur_eval && cur_eval->locinput==locinput) {
3673                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3674                     Perl_croak(aTHX_ "Infinite recursion in regex");
3675                 if ( ++nochange_depth > max_nochange_depth )
3676                     Perl_croak(aTHX_
3677                         "Pattern subroutine nesting without pos change"
3678                         " exceeded limit in regex");
3679             } else {
3680                 nochange_depth = 0;
3681             }
3682             re = rex;
3683             rei = rexi;
3684             (void)ReREFCNT_inc(rex);
3685             if (OP(scan)==GOSUB) {
3686                 startpoint = scan + ARG2L(scan);
3687                 ST.close_paren = ARG(scan);
3688             } else {
3689                 startpoint = rei->program+1;
3690                 ST.close_paren = 0;
3691             }
3692             goto eval_recurse_doit;
3693             /* NOTREACHED */
3694         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3695             if (cur_eval && cur_eval->locinput==locinput) {
3696 		if ( ++nochange_depth > max_nochange_depth )
3697                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3698             } else {
3699                 nochange_depth = 0;
3700             }
3701 	    {
3702 		/* execute the code in the {...} */
3703 		dSP;
3704 		SV ** const before = SP;
3705 		OP_4tree * const oop = PL_op;
3706 		COP * const ocurcop = PL_curcop;
3707 		PAD *old_comppad;
3708 
3709 		n = ARG(scan);
3710 		PL_op = (OP_4tree*)rexi->data->data[n];
3711 		DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3712 		    "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3713 		PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3714 		PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3715 
3716                 if (sv_yes_mark) {
3717                     SV *sv_mrk = get_sv("REGMARK", 1);
3718                     sv_setsv(sv_mrk, sv_yes_mark);
3719                 }
3720 
3721 		CALLRUNOPS(aTHX);			/* Scalar context. */
3722 		SPAGAIN;
3723 		if (SP == before)
3724 		    ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3725 		else {
3726 		    ret = POPs;
3727 		    PUTBACK;
3728 		}
3729 
3730 		PL_op = oop;
3731 		PAD_RESTORE_LOCAL(old_comppad);
3732 		PL_curcop = ocurcop;
3733 		if (!logical) {
3734 		    /* /(?{...})/ */
3735 		    sv_setsv(save_scalar(PL_replgv), ret);
3736 		    break;
3737 		}
3738 	    }
3739 	    if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3740 		logical = 0;
3741 		{
3742 		    /* extract RE object from returned value; compiling if
3743 		     * necessary */
3744 
3745 		    MAGIC *mg = NULL;
3746 		    const SV *sv;
3747 		    if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3748 			mg = mg_find(sv, PERL_MAGIC_qr);
3749 		    else if (SvSMAGICAL(ret)) {
3750 			if (SvGMAGICAL(ret))
3751 			    sv_unmagic(ret, PERL_MAGIC_qr);
3752 			else
3753 			    mg = mg_find(ret, PERL_MAGIC_qr);
3754 		    }
3755 
3756 		    if (mg) {
3757 			re = reg_temp_copy((regexp *)mg->mg_obj); /*XXX:dmq*/
3758 		    }
3759 		    else {
3760 			U32 pm_flags = 0;
3761 			const I32 osize = PL_regsize;
3762 
3763 			if (DO_UTF8(ret)) pm_flags |= RXf_UTF8;
3764 			re = CALLREGCOMP(ret, pm_flags);
3765 			if (!(SvFLAGS(ret)
3766 			      & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3767 				| SVs_GMG)))
3768 			    sv_magic(ret,MUTABLE_SV(ReREFCNT_inc(re)),
3769 					PERL_MAGIC_qr,0,0);
3770 			PL_regsize = osize;
3771 		    }
3772 		}
3773                 RXp_MATCH_COPIED_off(re);
3774                 re->subbeg = rex->subbeg;
3775                 re->sublen = rex->sublen;
3776 		rei = RXi_GET(re);
3777                 DEBUG_EXECUTE_r(
3778                     debug_start_match(re, do_utf8, locinput, PL_regeol,
3779                         "Matching embedded");
3780 		);
3781 		startpoint = rei->program + 1;
3782                	ST.close_paren = 0; /* only used for GOSUB */
3783                	/* borrowed from regtry */
3784                 if (PL_reg_start_tmpl <= re->nparens) {
3785                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
3786                     if(PL_reg_start_tmp)
3787                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3788                     else
3789                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3790                 }
3791 
3792         eval_recurse_doit: /* Share code with GOSUB below this line */
3793 		/* run the pattern returned from (??{...}) */
3794 		ST.cp = regcppush(0);	/* Save *all* the positions. */
3795 		REGCP_SET(ST.lastcp);
3796 
3797 		PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
3798 
3799 		/* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
3800 		PL_reglastparen = &re->lastparen;
3801 		PL_reglastcloseparen = &re->lastcloseparen;
3802 		re->lastparen = 0;
3803 		re->lastcloseparen = 0;
3804 
3805 		PL_reginput = locinput;
3806 		PL_regsize = 0;
3807 
3808 		/* XXXX This is too dramatic a measure... */
3809 		PL_reg_maxiter = 0;
3810 
3811 		ST.toggle_reg_flags = PL_reg_flags;
3812 		if (RX_UTF8(re))
3813 		    PL_reg_flags |= RF_utf8;
3814 		else
3815 		    PL_reg_flags &= ~RF_utf8;
3816 		ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3817 
3818 		ST.prev_rex = rex;
3819 		ST.prev_curlyx = cur_curlyx;
3820 		SETREX(rex,re);
3821 		rexi = rei;
3822 		cur_curlyx = NULL;
3823 		ST.B = next;
3824 		ST.prev_eval = cur_eval;
3825 		cur_eval = st;
3826 		/* now continue from first node in postoned RE */
3827 		PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3828 		/* NOTREACHED */
3829 	    }
3830 	    /* logical is 1,   /(?(?{...})X|Y)/ */
3831 	    sw = (bool)SvTRUE(ret);
3832 	    logical = 0;
3833 	    break;
3834 	}
3835 
3836 	case EVAL_AB: /* cleanup after a successful (??{A})B */
3837 	    /* note: this is called twice; first after popping B, then A */
3838 	    PL_reg_flags ^= ST.toggle_reg_flags;
3839 	    ReREFCNT_dec(rex);
3840 	    SETREX(rex,ST.prev_rex);
3841 	    rexi = RXi_GET(rex);
3842 	    regcpblow(ST.cp);
3843 	    cur_eval = ST.prev_eval;
3844 	    cur_curlyx = ST.prev_curlyx;
3845 
3846 	    /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3847 	    PL_reglastparen = &rex->lastparen;
3848 	    PL_reglastcloseparen = &rex->lastcloseparen;
3849 	    /* also update PL_regoffs */
3850 	    PL_regoffs = rex->offs;
3851 
3852 	    /* XXXX This is too dramatic a measure... */
3853 	    PL_reg_maxiter = 0;
3854             if ( nochange_depth )
3855 	        nochange_depth--;
3856 	    sayYES;
3857 
3858 
3859 	case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3860 	    /* note: this is called twice; first after popping B, then A */
3861 	    PL_reg_flags ^= ST.toggle_reg_flags;
3862 	    ReREFCNT_dec(rex);
3863 	    SETREX(rex,ST.prev_rex);
3864 	    rexi = RXi_GET(rex);
3865 	    /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3866 	    PL_reglastparen = &rex->lastparen;
3867 	    PL_reglastcloseparen = &rex->lastcloseparen;
3868 
3869 	    PL_reginput = locinput;
3870 	    REGCP_UNWIND(ST.lastcp);
3871 	    regcppop(rex);
3872 	    cur_eval = ST.prev_eval;
3873 	    cur_curlyx = ST.prev_curlyx;
3874 	    /* XXXX This is too dramatic a measure... */
3875 	    PL_reg_maxiter = 0;
3876 	    if ( nochange_depth )
3877 	        nochange_depth--;
3878 	    sayNO_SILENT;
3879 #undef ST
3880 
3881 	case OPEN:
3882 	    n = ARG(scan);  /* which paren pair */
3883 	    PL_reg_start_tmp[n] = locinput;
3884 	    if (n > PL_regsize)
3885 		PL_regsize = n;
3886             lastopen = n;
3887 	    break;
3888 	case CLOSE:
3889 	    n = ARG(scan);  /* which paren pair */
3890 	    PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3891 	    PL_regoffs[n].end = locinput - PL_bostr;
3892 	    /*if (n > PL_regsize)
3893 		PL_regsize = n;*/
3894 	    if (n > *PL_reglastparen)
3895 		*PL_reglastparen = n;
3896 	    *PL_reglastcloseparen = n;
3897             if (cur_eval && cur_eval->u.eval.close_paren == n) {
3898 	        goto fake_end;
3899 	    }
3900 	    break;
3901         case ACCEPT:
3902             if (ARG(scan)){
3903                 regnode *cursor;
3904                 for (cursor=scan;
3905                      cursor && OP(cursor)!=END;
3906                      cursor=regnext(cursor))
3907                 {
3908                     if ( OP(cursor)==CLOSE ){
3909                         n = ARG(cursor);
3910                         if ( n <= lastopen ) {
3911                             PL_regoffs[n].start
3912 				= PL_reg_start_tmp[n] - PL_bostr;
3913                             PL_regoffs[n].end = locinput - PL_bostr;
3914                             /*if (n > PL_regsize)
3915                             PL_regsize = n;*/
3916                             if (n > *PL_reglastparen)
3917                                 *PL_reglastparen = n;
3918                             *PL_reglastcloseparen = n;
3919                             if ( n == ARG(scan) || (cur_eval &&
3920                                 cur_eval->u.eval.close_paren == n))
3921                                 break;
3922                         }
3923                     }
3924                 }
3925             }
3926 	    goto fake_end;
3927 	    /*NOTREACHED*/
3928 	case GROUPP:
3929 	    n = ARG(scan);  /* which paren pair */
3930 	    sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
3931 	    break;
3932 	case NGROUPP:
3933 	    /* reg_check_named_buff_matched returns 0 for no match */
3934 	    sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3935 	    break;
3936         case INSUBP:
3937             n = ARG(scan);
3938             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3939             break;
3940         case DEFINEP:
3941             sw = 0;
3942             break;
3943 	case IFTHEN:
3944 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
3945 	    if (sw)
3946 		next = NEXTOPER(NEXTOPER(scan));
3947 	    else {
3948 		next = scan + ARG(scan);
3949 		if (OP(next) == IFTHEN) /* Fake one. */
3950 		    next = NEXTOPER(NEXTOPER(next));
3951 	    }
3952 	    break;
3953 	case LOGICAL:
3954 	    logical = scan->flags;
3955 	    break;
3956 
3957 /*******************************************************************
3958 
3959 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3960 pattern, where A and B are subpatterns. (For simple A, CURLYM or
3961 STAR/PLUS/CURLY/CURLYN are used instead.)
3962 
3963 A*B is compiled as <CURLYX><A><WHILEM><B>
3964 
3965 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3966 state, which contains the current count, initialised to -1. It also sets
3967 cur_curlyx to point to this state, with any previous value saved in the
3968 state block.
3969 
3970 CURLYX then jumps straight to the WHILEM op, rather than executing A,
3971 since the pattern may possibly match zero times (i.e. it's a while {} loop
3972 rather than a do {} while loop).
3973 
3974 Each entry to WHILEM represents a successful match of A. The count in the
3975 CURLYX block is incremented, another WHILEM state is pushed, and execution
3976 passes to A or B depending on greediness and the current count.
3977 
3978 For example, if matching against the string a1a2a3b (where the aN are
3979 substrings that match /A/), then the match progresses as follows: (the
3980 pushed states are interspersed with the bits of strings matched so far):
3981 
3982     <CURLYX cnt=-1>
3983     <CURLYX cnt=0><WHILEM>
3984     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
3985     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
3986     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
3987     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
3988 
3989 (Contrast this with something like CURLYM, which maintains only a single
3990 backtrack state:
3991 
3992     <CURLYM cnt=0> a1
3993     a1 <CURLYM cnt=1> a2
3994     a1 a2 <CURLYM cnt=2> a3
3995     a1 a2 a3 <CURLYM cnt=3> b
3996 )
3997 
3998 Each WHILEM state block marks a point to backtrack to upon partial failure
3999 of A or B, and also contains some minor state data related to that
4000 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
4001 overall state, such as the count, and pointers to the A and B ops.
4002 
4003 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4004 must always point to the *current* CURLYX block, the rules are:
4005 
4006 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4007 and set cur_curlyx to point the new block.
4008 
4009 When popping the CURLYX block after a successful or unsuccessful match,
4010 restore the previous cur_curlyx.
4011 
4012 When WHILEM is about to execute B, save the current cur_curlyx, and set it
4013 to the outer one saved in the CURLYX block.
4014 
4015 When popping the WHILEM block after a successful or unsuccessful B match,
4016 restore the previous cur_curlyx.
4017 
4018 Here's an example for the pattern (AI* BI)*BO
4019 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
4020 
4021 cur_
4022 curlyx backtrack stack
4023 ------ ---------------
4024 NULL
4025 CO     <CO prev=NULL> <WO>
4026 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4027 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4028 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
4029 
4030 At this point the pattern succeeds, and we work back down the stack to
4031 clean up, restoring as we go:
4032 
4033 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4034 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4035 CO     <CO prev=NULL> <WO>
4036 NULL
4037 
4038 *******************************************************************/
4039 
4040 #define ST st->u.curlyx
4041 
4042 	case CURLYX:    /* start of /A*B/  (for complex A) */
4043 	{
4044 	    /* No need to save/restore up to this paren */
4045 	    I32 parenfloor = scan->flags;
4046 
4047 	    assert(next); /* keep Coverity happy */
4048 	    if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4049 		next += ARG(next);
4050 
4051 	    /* XXXX Probably it is better to teach regpush to support
4052 	       parenfloor > PL_regsize... */
4053 	    if (parenfloor > (I32)*PL_reglastparen)
4054 		parenfloor = *PL_reglastparen; /* Pessimization... */
4055 
4056 	    ST.prev_curlyx= cur_curlyx;
4057 	    cur_curlyx = st;
4058 	    ST.cp = PL_savestack_ix;
4059 
4060 	    /* these fields contain the state of the current curly.
4061 	     * they are accessed by subsequent WHILEMs */
4062 	    ST.parenfloor = parenfloor;
4063 	    ST.min = ARG1(scan);
4064 	    ST.max = ARG2(scan);
4065 	    ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4066 	    ST.B = next;
4067 	    ST.minmod = minmod;
4068 	    minmod = 0;
4069 	    ST.count = -1;	/* this will be updated by WHILEM */
4070 	    ST.lastloc = NULL;  /* this will be updated by WHILEM */
4071 
4072 	    PL_reginput = locinput;
4073 	    PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4074 	    /* NOTREACHED */
4075 	}
4076 
4077 	case CURLYX_end: /* just finished matching all of A*B */
4078 	    cur_curlyx = ST.prev_curlyx;
4079 	    sayYES;
4080 	    /* NOTREACHED */
4081 
4082 	case CURLYX_end_fail: /* just failed to match all of A*B */
4083 	    regcpblow(ST.cp);
4084 	    cur_curlyx = ST.prev_curlyx;
4085 	    sayNO;
4086 	    /* NOTREACHED */
4087 
4088 
4089 #undef ST
4090 #define ST st->u.whilem
4091 
4092 	case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4093 	{
4094 	    /* see the discussion above about CURLYX/WHILEM */
4095 	    I32 n;
4096 	    assert(cur_curlyx); /* keep Coverity happy */
4097 	    n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4098 	    ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4099 	    ST.cache_offset = 0;
4100 	    ST.cache_mask = 0;
4101 
4102 	    PL_reginput = locinput;
4103 
4104 	    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4105 		  "%*s  whilem: matched %ld out of %ld..%ld\n",
4106 		  REPORT_CODE_OFF+depth*2, "", (long)n,
4107 		  (long)cur_curlyx->u.curlyx.min,
4108 		  (long)cur_curlyx->u.curlyx.max)
4109 	    );
4110 
4111 	    /* First just match a string of min A's. */
4112 
4113 	    if (n < cur_curlyx->u.curlyx.min) {
4114 		cur_curlyx->u.curlyx.lastloc = locinput;
4115 		PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4116 		/* NOTREACHED */
4117 	    }
4118 
4119 	    /* If degenerate A matches "", assume A done. */
4120 
4121 	    if (locinput == cur_curlyx->u.curlyx.lastloc) {
4122 		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4123 		   "%*s  whilem: empty match detected, trying continuation...\n",
4124 		   REPORT_CODE_OFF+depth*2, "")
4125 		);
4126 		goto do_whilem_B_max;
4127 	    }
4128 
4129 	    /* super-linear cache processing */
4130 
4131 	    if (scan->flags) {
4132 
4133 		if (!PL_reg_maxiter) {
4134 		    /* start the countdown: Postpone detection until we
4135 		     * know the match is not *that* much linear. */
4136 		    PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4137 		    /* possible overflow for long strings and many CURLYX's */
4138 		    if (PL_reg_maxiter < 0)
4139 			PL_reg_maxiter = I32_MAX;
4140 		    PL_reg_leftiter = PL_reg_maxiter;
4141 		}
4142 
4143 		if (PL_reg_leftiter-- == 0) {
4144 		    /* initialise cache */
4145 		    const I32 size = (PL_reg_maxiter + 7)/8;
4146 		    if (PL_reg_poscache) {
4147 			if ((I32)PL_reg_poscache_size < size) {
4148 			    Renew(PL_reg_poscache, size, char);
4149 			    PL_reg_poscache_size = size;
4150 			}
4151 			Zero(PL_reg_poscache, size, char);
4152 		    }
4153 		    else {
4154 			PL_reg_poscache_size = size;
4155 			Newxz(PL_reg_poscache, size, char);
4156 		    }
4157 		    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4158       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4159 			      PL_colors[4], PL_colors[5])
4160 		    );
4161 		}
4162 
4163 		if (PL_reg_leftiter < 0) {
4164 		    /* have we already failed at this position? */
4165 		    I32 offset, mask;
4166 		    offset  = (scan->flags & 0xf) - 1
4167 		  		+ (locinput - PL_bostr)  * (scan->flags>>4);
4168 		    mask    = 1 << (offset % 8);
4169 		    offset /= 8;
4170 		    if (PL_reg_poscache[offset] & mask) {
4171 			DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4172 			    "%*s  whilem: (cache) already tried at this position...\n",
4173 			    REPORT_CODE_OFF+depth*2, "")
4174 			);
4175 			sayNO; /* cache records failure */
4176 		    }
4177 		    ST.cache_offset = offset;
4178 		    ST.cache_mask   = mask;
4179 		}
4180 	    }
4181 
4182 	    /* Prefer B over A for minimal matching. */
4183 
4184 	    if (cur_curlyx->u.curlyx.minmod) {
4185 		ST.save_curlyx = cur_curlyx;
4186 		cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4187 		ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4188 		REGCP_SET(ST.lastcp);
4189 		PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4190 		/* NOTREACHED */
4191 	    }
4192 
4193 	    /* Prefer A over B for maximal matching. */
4194 
4195 	    if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4196 		ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4197 		cur_curlyx->u.curlyx.lastloc = locinput;
4198 		REGCP_SET(ST.lastcp);
4199 		PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4200 		/* NOTREACHED */
4201 	    }
4202 	    goto do_whilem_B_max;
4203 	}
4204 	/* NOTREACHED */
4205 
4206 	case WHILEM_B_min: /* just matched B in a minimal match */
4207 	case WHILEM_B_max: /* just matched B in a maximal match */
4208 	    cur_curlyx = ST.save_curlyx;
4209 	    sayYES;
4210 	    /* NOTREACHED */
4211 
4212 	case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4213 	    cur_curlyx = ST.save_curlyx;
4214 	    cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4215 	    cur_curlyx->u.curlyx.count--;
4216 	    CACHEsayNO;
4217 	    /* NOTREACHED */
4218 
4219 	case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4220 	    REGCP_UNWIND(ST.lastcp);
4221 	    regcppop(rex);
4222 	    /* FALL THROUGH */
4223 	case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4224 	    cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4225 	    cur_curlyx->u.curlyx.count--;
4226 	    CACHEsayNO;
4227 	    /* NOTREACHED */
4228 
4229 	case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4230 	    REGCP_UNWIND(ST.lastcp);
4231 	    regcppop(rex);	/* Restore some previous $<digit>s? */
4232 	    PL_reginput = locinput;
4233 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4234 		"%*s  whilem: failed, trying continuation...\n",
4235 		REPORT_CODE_OFF+depth*2, "")
4236 	    );
4237 	  do_whilem_B_max:
4238 	    if (cur_curlyx->u.curlyx.count >= REG_INFTY
4239 		&& ckWARN(WARN_REGEXP)
4240 		&& !(PL_reg_flags & RF_warned))
4241 	    {
4242 		PL_reg_flags |= RF_warned;
4243 		Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4244 		     "Complex regular subexpression recursion",
4245 		     REG_INFTY - 1);
4246 	    }
4247 
4248 	    /* now try B */
4249 	    ST.save_curlyx = cur_curlyx;
4250 	    cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4251 	    PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4252 	    /* NOTREACHED */
4253 
4254 	case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4255 	    cur_curlyx = ST.save_curlyx;
4256 	    REGCP_UNWIND(ST.lastcp);
4257 	    regcppop(rex);
4258 
4259 	    if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4260 		/* Maximum greed exceeded */
4261 		if (cur_curlyx->u.curlyx.count >= REG_INFTY
4262 		    && ckWARN(WARN_REGEXP)
4263 		    && !(PL_reg_flags & RF_warned))
4264 		{
4265 		    PL_reg_flags |= RF_warned;
4266 		    Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4267 			"%s limit (%d) exceeded",
4268 			"Complex regular subexpression recursion",
4269 			REG_INFTY - 1);
4270 		}
4271 		cur_curlyx->u.curlyx.count--;
4272 		CACHEsayNO;
4273 	    }
4274 
4275 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4276 		"%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4277 	    );
4278 	    /* Try grabbing another A and see if it helps. */
4279 	    PL_reginput = locinput;
4280 	    cur_curlyx->u.curlyx.lastloc = locinput;
4281 	    ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4282 	    REGCP_SET(ST.lastcp);
4283 	    PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4284 	    /* NOTREACHED */
4285 
4286 #undef  ST
4287 #define ST st->u.branch
4288 
4289 	case BRANCHJ:	    /*  /(...|A|...)/ with long next pointer */
4290 	    next = scan + ARG(scan);
4291 	    if (next == scan)
4292 		next = NULL;
4293 	    scan = NEXTOPER(scan);
4294 	    /* FALL THROUGH */
4295 
4296 	case BRANCH:	    /*  /(...|A|...)/ */
4297 	    scan = NEXTOPER(scan); /* scan now points to inner node */
4298 	    ST.lastparen = *PL_reglastparen;
4299 	    ST.next_branch = next;
4300 	    REGCP_SET(ST.cp);
4301 	    PL_reginput = locinput;
4302 
4303 	    /* Now go into the branch */
4304 	    if (has_cutgroup) {
4305 	        PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4306 	    } else {
4307 	        PUSH_STATE_GOTO(BRANCH_next, scan);
4308 	    }
4309 	    /* NOTREACHED */
4310         case CUTGROUP:
4311             PL_reginput = locinput;
4312             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4313                 MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
4314             PUSH_STATE_GOTO(CUTGROUP_next,next);
4315             /* NOTREACHED */
4316         case CUTGROUP_next_fail:
4317             do_cutgroup = 1;
4318             no_final = 1;
4319             if (st->u.mark.mark_name)
4320                 sv_commit = st->u.mark.mark_name;
4321             sayNO;
4322             /* NOTREACHED */
4323         case BRANCH_next:
4324             sayYES;
4325             /* NOTREACHED */
4326 	case BRANCH_next_fail: /* that branch failed; try the next, if any */
4327 	    if (do_cutgroup) {
4328 	        do_cutgroup = 0;
4329 	        no_final = 0;
4330 	    }
4331 	    REGCP_UNWIND(ST.cp);
4332 	    for (n = *PL_reglastparen; n > ST.lastparen; n--)
4333 		PL_regoffs[n].end = -1;
4334 	    *PL_reglastparen = n;
4335 	    /*dmq: *PL_reglastcloseparen = n; */
4336 	    scan = ST.next_branch;
4337 	    /* no more branches? */
4338 	    if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4339 	        DEBUG_EXECUTE_r({
4340 		    PerlIO_printf( Perl_debug_log,
4341 			"%*s  %sBRANCH failed...%s\n",
4342 			REPORT_CODE_OFF+depth*2, "",
4343 			PL_colors[4],
4344 			PL_colors[5] );
4345 		});
4346 		sayNO_SILENT;
4347             }
4348 	    continue; /* execute next BRANCH[J] op */
4349 	    /* NOTREACHED */
4350 
4351 	case MINMOD:
4352 	    minmod = 1;
4353 	    break;
4354 
4355 #undef  ST
4356 #define ST st->u.curlym
4357 
4358 	case CURLYM:	/* /A{m,n}B/ where A is fixed-length */
4359 
4360 	    /* This is an optimisation of CURLYX that enables us to push
4361 	     * only a single backtracking state, no matter how many matches
4362 	     * there are in {m,n}. It relies on the pattern being constant
4363 	     * length, with no parens to influence future backrefs
4364 	     */
4365 
4366 	    ST.me = scan;
4367 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4368 
4369 	    /* if paren positive, emulate an OPEN/CLOSE around A */
4370 	    if (ST.me->flags) {
4371 		U32 paren = ST.me->flags;
4372 		if (paren > PL_regsize)
4373 		    PL_regsize = paren;
4374 		if (paren > *PL_reglastparen)
4375 		    *PL_reglastparen = paren;
4376 		scan += NEXT_OFF(scan); /* Skip former OPEN. */
4377 	    }
4378 	    ST.A = scan;
4379 	    ST.B = next;
4380 	    ST.alen = 0;
4381 	    ST.count = 0;
4382 	    ST.minmod = minmod;
4383 	    minmod = 0;
4384 	    ST.c1 = CHRTEST_UNINIT;
4385 	    REGCP_SET(ST.cp);
4386 
4387 	    if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4388 		goto curlym_do_B;
4389 
4390 	  curlym_do_A: /* execute the A in /A{m,n}B/  */
4391 	    PL_reginput = locinput;
4392 	    PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4393 	    /* NOTREACHED */
4394 
4395 	case CURLYM_A: /* we've just matched an A */
4396 	    locinput = st->locinput;
4397 	    nextchr = UCHARAT(locinput);
4398 
4399 	    ST.count++;
4400 	    /* after first match, determine A's length: u.curlym.alen */
4401 	    if (ST.count == 1) {
4402 		if (PL_reg_match_utf8) {
4403 		    char *s = locinput;
4404 		    while (s < PL_reginput) {
4405 			ST.alen++;
4406 			s += UTF8SKIP(s);
4407 		    }
4408 		}
4409 		else {
4410 		    ST.alen = PL_reginput - locinput;
4411 		}
4412 		if (ST.alen == 0)
4413 		    ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4414 	    }
4415 	    DEBUG_EXECUTE_r(
4416 		PerlIO_printf(Perl_debug_log,
4417 			  "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4418 			  (int)(REPORT_CODE_OFF+(depth*2)), "",
4419 			  (IV) ST.count, (IV)ST.alen)
4420 	    );
4421 
4422 	    locinput = PL_reginput;
4423 
4424 	    if (cur_eval && cur_eval->u.eval.close_paren &&
4425 	        cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4426 	        goto fake_end;
4427 
4428 	    {
4429 		I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
4430 		if ( max == REG_INFTY || ST.count < max )
4431 		    goto curlym_do_A; /* try to match another A */
4432 	    }
4433 	    goto curlym_do_B; /* try to match B */
4434 
4435 	case CURLYM_A_fail: /* just failed to match an A */
4436 	    REGCP_UNWIND(ST.cp);
4437 
4438 	    if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4439 	        || (cur_eval && cur_eval->u.eval.close_paren &&
4440 	            cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4441 		sayNO;
4442 
4443 	  curlym_do_B: /* execute the B in /A{m,n}B/  */
4444 	    PL_reginput = locinput;
4445 	    if (ST.c1 == CHRTEST_UNINIT) {
4446 		/* calculate c1 and c2 for possible match of 1st char
4447 		 * following curly */
4448 		ST.c1 = ST.c2 = CHRTEST_VOID;
4449 		if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4450 		    regnode *text_node = ST.B;
4451 		    if (! HAS_TEXT(text_node))
4452 			FIND_NEXT_IMPT(text_node);
4453 	            /* this used to be
4454 
4455 	                (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4456 
4457 	            	But the former is redundant in light of the latter.
4458 
4459 	            	if this changes back then the macro for
4460 	            	IS_TEXT and friends need to change.
4461 	             */
4462 		    if (PL_regkind[OP(text_node)] == EXACT)
4463 		    {
4464 
4465 			ST.c1 = (U8)*STRING(text_node);
4466 			ST.c2 =
4467 			    (IS_TEXTF(text_node))
4468 			    ? PL_fold[ST.c1]
4469 			    : (IS_TEXTFL(text_node))
4470 				? PL_fold_locale[ST.c1]
4471 				: ST.c1;
4472 		    }
4473 		}
4474 	    }
4475 
4476 	    DEBUG_EXECUTE_r(
4477 		PerlIO_printf(Perl_debug_log,
4478 		    "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4479 		    (int)(REPORT_CODE_OFF+(depth*2)),
4480 		    "", (IV)ST.count)
4481 		);
4482 	    if (ST.c1 != CHRTEST_VOID
4483 		    && UCHARAT(PL_reginput) != ST.c1
4484 		    && UCHARAT(PL_reginput) != ST.c2)
4485 	    {
4486 		/* simulate B failing */
4487 		DEBUG_OPTIMISE_r(
4488 		    PerlIO_printf(Perl_debug_log,
4489 		        "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4490 		        (int)(REPORT_CODE_OFF+(depth*2)),"",
4491 		        (IV)ST.c1,(IV)ST.c2
4492 		));
4493 		state_num = CURLYM_B_fail;
4494 		goto reenter_switch;
4495 	    }
4496 
4497 	    if (ST.me->flags) {
4498 		/* mark current A as captured */
4499 		I32 paren = ST.me->flags;
4500 		if (ST.count) {
4501 		    PL_regoffs[paren].start
4502 			= HOPc(PL_reginput, -ST.alen) - PL_bostr;
4503 		    PL_regoffs[paren].end = PL_reginput - PL_bostr;
4504 		    /*dmq: *PL_reglastcloseparen = paren; */
4505 		}
4506 		else
4507 		    PL_regoffs[paren].end = -1;
4508 		if (cur_eval && cur_eval->u.eval.close_paren &&
4509 		    cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4510 		{
4511 		    if (ST.count)
4512 	                goto fake_end;
4513 	            else
4514 	                sayNO;
4515 	        }
4516 	    }
4517 
4518 	    PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4519 	    /* NOTREACHED */
4520 
4521 	case CURLYM_B_fail: /* just failed to match a B */
4522 	    REGCP_UNWIND(ST.cp);
4523 	    if (ST.minmod) {
4524 		I32 max = ARG2(ST.me);
4525 		if (max != REG_INFTY && ST.count == max)
4526 		    sayNO;
4527 		goto curlym_do_A; /* try to match a further A */
4528 	    }
4529 	    /* backtrack one A */
4530 	    if (ST.count == ARG1(ST.me) /* min */)
4531 		sayNO;
4532 	    ST.count--;
4533 	    locinput = HOPc(locinput, -ST.alen);
4534 	    goto curlym_do_B; /* try to match B */
4535 
4536 #undef ST
4537 #define ST st->u.curly
4538 
4539 #define CURLY_SETPAREN(paren, success) \
4540     if (paren) { \
4541 	if (success) { \
4542 	    PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4543 	    PL_regoffs[paren].end = locinput - PL_bostr; \
4544 	    *PL_reglastcloseparen = paren; \
4545 	} \
4546 	else \
4547 	    PL_regoffs[paren].end = -1; \
4548     }
4549 
4550 	case STAR:		/*  /A*B/ where A is width 1 */
4551 	    ST.paren = 0;
4552 	    ST.min = 0;
4553 	    ST.max = REG_INFTY;
4554 	    scan = NEXTOPER(scan);
4555 	    goto repeat;
4556 	case PLUS:		/*  /A+B/ where A is width 1 */
4557 	    ST.paren = 0;
4558 	    ST.min = 1;
4559 	    ST.max = REG_INFTY;
4560 	    scan = NEXTOPER(scan);
4561 	    goto repeat;
4562 	case CURLYN:		/*  /(A){m,n}B/ where A is width 1 */
4563 	    ST.paren = scan->flags;	/* Which paren to set */
4564 	    if (ST.paren > PL_regsize)
4565 		PL_regsize = ST.paren;
4566 	    if (ST.paren > *PL_reglastparen)
4567 		*PL_reglastparen = ST.paren;
4568 	    ST.min = ARG1(scan);  /* min to match */
4569 	    ST.max = ARG2(scan);  /* max to match */
4570 	    if (cur_eval && cur_eval->u.eval.close_paren &&
4571 	        cur_eval->u.eval.close_paren == (U32)ST.paren) {
4572 	        ST.min=1;
4573 	        ST.max=1;
4574 	    }
4575             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4576 	    goto repeat;
4577 	case CURLY:		/*  /A{m,n}B/ where A is width 1 */
4578 	    ST.paren = 0;
4579 	    ST.min = ARG1(scan);  /* min to match */
4580 	    ST.max = ARG2(scan);  /* max to match */
4581 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4582 	  repeat:
4583 	    /*
4584 	    * Lookahead to avoid useless match attempts
4585 	    * when we know what character comes next.
4586 	    *
4587 	    * Used to only do .*x and .*?x, but now it allows
4588 	    * for )'s, ('s and (?{ ... })'s to be in the way
4589 	    * of the quantifier and the EXACT-like node.  -- japhy
4590 	    */
4591 
4592 	    if (ST.min > ST.max) /* XXX make this a compile-time check? */
4593 		sayNO;
4594 	    if (HAS_TEXT(next) || JUMPABLE(next)) {
4595 		U8 *s;
4596 		regnode *text_node = next;
4597 
4598 		if (! HAS_TEXT(text_node))
4599 		    FIND_NEXT_IMPT(text_node);
4600 
4601 		if (! HAS_TEXT(text_node))
4602 		    ST.c1 = ST.c2 = CHRTEST_VOID;
4603 		else {
4604 		    if ( PL_regkind[OP(text_node)] != EXACT ) {
4605 			ST.c1 = ST.c2 = CHRTEST_VOID;
4606 			goto assume_ok_easy;
4607 		    }
4608 		    else
4609 			s = (U8*)STRING(text_node);
4610 
4611                     /*  Currently we only get here when
4612 
4613                         PL_rekind[OP(text_node)] == EXACT
4614 
4615                         if this changes back then the macro for IS_TEXT and
4616                         friends need to change. */
4617 		    if (!UTF) {
4618 			ST.c2 = ST.c1 = *s;
4619 			if (IS_TEXTF(text_node))
4620 			    ST.c2 = PL_fold[ST.c1];
4621 			else if (IS_TEXTFL(text_node))
4622 			    ST.c2 = PL_fold_locale[ST.c1];
4623 		    }
4624 		    else { /* UTF */
4625 			if (IS_TEXTF(text_node)) {
4626 			     STRLEN ulen1, ulen2;
4627 			     U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4628 			     U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4629 
4630 			     to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4631 			     to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4632 #ifdef EBCDIC
4633 			     ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4634 						    ckWARN(WARN_UTF8) ?
4635                                                     0 : UTF8_ALLOW_ANY);
4636 			     ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4637                                                     ckWARN(WARN_UTF8) ?
4638                                                     0 : UTF8_ALLOW_ANY);
4639 #else
4640 			     ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4641 						    uniflags);
4642 			     ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4643 						    uniflags);
4644 #endif
4645 			}
4646 			else {
4647 			    ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4648 						     uniflags);
4649 			}
4650 		    }
4651 		}
4652 	    }
4653 	    else
4654 		ST.c1 = ST.c2 = CHRTEST_VOID;
4655 	assume_ok_easy:
4656 
4657 	    ST.A = scan;
4658 	    ST.B = next;
4659 	    PL_reginput = locinput;
4660 	    if (minmod) {
4661 		minmod = 0;
4662 		if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4663 		    sayNO;
4664 		ST.count = ST.min;
4665 		locinput = PL_reginput;
4666 		REGCP_SET(ST.cp);
4667 		if (ST.c1 == CHRTEST_VOID)
4668 		    goto curly_try_B_min;
4669 
4670 		ST.oldloc = locinput;
4671 
4672 		/* set ST.maxpos to the furthest point along the
4673 		 * string that could possibly match */
4674 		if  (ST.max == REG_INFTY) {
4675 		    ST.maxpos = PL_regeol - 1;
4676 		    if (do_utf8)
4677 			while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4678 			    ST.maxpos--;
4679 		}
4680 		else if (do_utf8) {
4681 		    int m = ST.max - ST.min;
4682 		    for (ST.maxpos = locinput;
4683 			 m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4684 			ST.maxpos += UTF8SKIP(ST.maxpos);
4685 		}
4686 		else {
4687 		    ST.maxpos = locinput + ST.max - ST.min;
4688 		    if (ST.maxpos >= PL_regeol)
4689 			ST.maxpos = PL_regeol - 1;
4690 		}
4691 		goto curly_try_B_min_known;
4692 
4693 	    }
4694 	    else {
4695 		ST.count = regrepeat(rex, ST.A, ST.max, depth);
4696 		locinput = PL_reginput;
4697 		if (ST.count < ST.min)
4698 		    sayNO;
4699 		if ((ST.count > ST.min)
4700 		    && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4701 		{
4702 		    /* A{m,n} must come at the end of the string, there's
4703 		     * no point in backing off ... */
4704 		    ST.min = ST.count;
4705 		    /* ...except that $ and \Z can match before *and* after
4706 		       newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4707 		       We may back off by one in this case. */
4708 		    if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4709 			ST.min--;
4710 		}
4711 		REGCP_SET(ST.cp);
4712 		goto curly_try_B_max;
4713 	    }
4714 	    /* NOTREACHED */
4715 
4716 
4717 	case CURLY_B_min_known_fail:
4718 	    /* failed to find B in a non-greedy match where c1,c2 valid */
4719 	    if (ST.paren && ST.count)
4720 		PL_regoffs[ST.paren].end = -1;
4721 
4722 	    PL_reginput = locinput;	/* Could be reset... */
4723 	    REGCP_UNWIND(ST.cp);
4724 	    /* Couldn't or didn't -- move forward. */
4725 	    ST.oldloc = locinput;
4726 	    if (do_utf8)
4727 		locinput += UTF8SKIP(locinput);
4728 	    else
4729 		locinput++;
4730 	    ST.count++;
4731 	  curly_try_B_min_known:
4732 	     /* find the next place where 'B' could work, then call B */
4733 	    {
4734 		int n;
4735 		if (do_utf8) {
4736 		    n = (ST.oldloc == locinput) ? 0 : 1;
4737 		    if (ST.c1 == ST.c2) {
4738 			STRLEN len;
4739 			/* set n to utf8_distance(oldloc, locinput) */
4740 			while (locinput <= ST.maxpos &&
4741 			       utf8n_to_uvchr((U8*)locinput,
4742 					      UTF8_MAXBYTES, &len,
4743 					      uniflags) != (UV)ST.c1) {
4744 			    locinput += len;
4745 			    n++;
4746 			}
4747 		    }
4748 		    else {
4749 			/* set n to utf8_distance(oldloc, locinput) */
4750 			while (locinput <= ST.maxpos) {
4751 			    STRLEN len;
4752 			    const UV c = utf8n_to_uvchr((U8*)locinput,
4753 						  UTF8_MAXBYTES, &len,
4754 						  uniflags);
4755 			    if (c == (UV)ST.c1 || c == (UV)ST.c2)
4756 				break;
4757 			    locinput += len;
4758 			    n++;
4759 			}
4760 		    }
4761 		}
4762 		else {
4763 		    if (ST.c1 == ST.c2) {
4764 			while (locinput <= ST.maxpos &&
4765 			       UCHARAT(locinput) != ST.c1)
4766 			    locinput++;
4767 		    }
4768 		    else {
4769 			while (locinput <= ST.maxpos
4770 			       && UCHARAT(locinput) != ST.c1
4771 			       && UCHARAT(locinput) != ST.c2)
4772 			    locinput++;
4773 		    }
4774 		    n = locinput - ST.oldloc;
4775 		}
4776 		if (locinput > ST.maxpos)
4777 		    sayNO;
4778 		/* PL_reginput == oldloc now */
4779 		if (n) {
4780 		    ST.count += n;
4781 		    if (regrepeat(rex, ST.A, n, depth) < n)
4782 			sayNO;
4783 		}
4784 		PL_reginput = locinput;
4785 		CURLY_SETPAREN(ST.paren, ST.count);
4786 		if (cur_eval && cur_eval->u.eval.close_paren &&
4787 		    cur_eval->u.eval.close_paren == (U32)ST.paren) {
4788 		    goto fake_end;
4789 	        }
4790 		PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4791 	    }
4792 	    /* NOTREACHED */
4793 
4794 
4795 	case CURLY_B_min_fail:
4796 	    /* failed to find B in a non-greedy match where c1,c2 invalid */
4797 	    if (ST.paren && ST.count)
4798 		PL_regoffs[ST.paren].end = -1;
4799 
4800 	    REGCP_UNWIND(ST.cp);
4801 	    /* failed -- move forward one */
4802 	    PL_reginput = locinput;
4803 	    if (regrepeat(rex, ST.A, 1, depth)) {
4804 		ST.count++;
4805 		locinput = PL_reginput;
4806 		if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4807 			ST.count > 0)) /* count overflow ? */
4808 		{
4809 		  curly_try_B_min:
4810 		    CURLY_SETPAREN(ST.paren, ST.count);
4811 		    if (cur_eval && cur_eval->u.eval.close_paren &&
4812 		        cur_eval->u.eval.close_paren == (U32)ST.paren) {
4813                         goto fake_end;
4814                     }
4815 		    PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4816 		}
4817 	    }
4818 	    sayNO;
4819 	    /* NOTREACHED */
4820 
4821 
4822 	curly_try_B_max:
4823 	    /* a successful greedy match: now try to match B */
4824             if (cur_eval && cur_eval->u.eval.close_paren &&
4825                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4826                 goto fake_end;
4827             }
4828 	    {
4829 		UV c = 0;
4830 		if (ST.c1 != CHRTEST_VOID)
4831 		    c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4832 					   UTF8_MAXBYTES, 0, uniflags)
4833 				: (UV) UCHARAT(PL_reginput);
4834 		/* If it could work, try it. */
4835 		if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4836 		    CURLY_SETPAREN(ST.paren, ST.count);
4837 		    PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4838 		    /* NOTREACHED */
4839 		}
4840 	    }
4841 	    /* FALL THROUGH */
4842 	case CURLY_B_max_fail:
4843 	    /* failed to find B in a greedy match */
4844 	    if (ST.paren && ST.count)
4845 		PL_regoffs[ST.paren].end = -1;
4846 
4847 	    REGCP_UNWIND(ST.cp);
4848 	    /*  back up. */
4849 	    if (--ST.count < ST.min)
4850 		sayNO;
4851 	    PL_reginput = locinput = HOPc(locinput, -1);
4852 	    goto curly_try_B_max;
4853 
4854 #undef ST
4855 
4856 	case END:
4857 	    fake_end:
4858 	    if (cur_eval) {
4859 		/* we've just finished A in /(??{A})B/; now continue with B */
4860 		I32 tmpix;
4861 		st->u.eval.toggle_reg_flags
4862 			    = cur_eval->u.eval.toggle_reg_flags;
4863 		PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4864 
4865 		st->u.eval.prev_rex = rex;		/* inner */
4866 		SETREX(rex,cur_eval->u.eval.prev_rex);
4867 		rexi = RXi_GET(rex);
4868 		cur_curlyx = cur_eval->u.eval.prev_curlyx;
4869 		ReREFCNT_inc(rex);
4870 		st->u.eval.cp = regcppush(0);	/* Save *all* the positions. */
4871 
4872 		/* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
4873 		PL_reglastparen = &rex->lastparen;
4874 		PL_reglastcloseparen = &rex->lastcloseparen;
4875 
4876 		REGCP_SET(st->u.eval.lastcp);
4877 		PL_reginput = locinput;
4878 
4879 		/* Restore parens of the outer rex without popping the
4880 		 * savestack */
4881 		tmpix = PL_savestack_ix;
4882 		PL_savestack_ix = cur_eval->u.eval.lastcp;
4883 		regcppop(rex);
4884 		PL_savestack_ix = tmpix;
4885 
4886 		st->u.eval.prev_eval = cur_eval;
4887 		cur_eval = cur_eval->u.eval.prev_eval;
4888 		DEBUG_EXECUTE_r(
4889 		    PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4890 				      REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4891                 if ( nochange_depth )
4892 	            nochange_depth--;
4893 
4894                 PUSH_YES_STATE_GOTO(EVAL_AB,
4895 			st->u.eval.prev_eval->u.eval.B); /* match B */
4896 	    }
4897 
4898 	    if (locinput < reginfo->till) {
4899 		DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4900 				      "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4901 				      PL_colors[4],
4902 				      (long)(locinput - PL_reg_starttry),
4903 				      (long)(reginfo->till - PL_reg_starttry),
4904 				      PL_colors[5]));
4905 
4906 		sayNO_SILENT;		/* Cannot match: too short. */
4907 	    }
4908 	    PL_reginput = locinput;	/* put where regtry can find it */
4909 	    sayYES;			/* Success! */
4910 
4911 	case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4912 	    DEBUG_EXECUTE_r(
4913 	    PerlIO_printf(Perl_debug_log,
4914 		"%*s  %ssubpattern success...%s\n",
4915 		REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4916 	    PL_reginput = locinput;	/* put where regtry can find it */
4917 	    sayYES;			/* Success! */
4918 
4919 #undef  ST
4920 #define ST st->u.ifmatch
4921 
4922 	case SUSPEND:	/* (?>A) */
4923 	    ST.wanted = 1;
4924 	    PL_reginput = locinput;
4925 	    goto do_ifmatch;
4926 
4927 	case UNLESSM:	/* -ve lookaround: (?!A), or with flags, (?<!A) */
4928 	    ST.wanted = 0;
4929 	    goto ifmatch_trivial_fail_test;
4930 
4931 	case IFMATCH:	/* +ve lookaround: (?=A), or with flags, (?<=A) */
4932 	    ST.wanted = 1;
4933 	  ifmatch_trivial_fail_test:
4934 	    if (scan->flags) {
4935 		char * const s = HOPBACKc(locinput, scan->flags);
4936 		if (!s) {
4937 		    /* trivial fail */
4938 		    if (logical) {
4939 			logical = 0;
4940 			sw = 1 - (bool)ST.wanted;
4941 		    }
4942 		    else if (ST.wanted)
4943 			sayNO;
4944 		    next = scan + ARG(scan);
4945 		    if (next == scan)
4946 			next = NULL;
4947 		    break;
4948 		}
4949 		PL_reginput = s;
4950 	    }
4951 	    else
4952 		PL_reginput = locinput;
4953 
4954 	  do_ifmatch:
4955 	    ST.me = scan;
4956 	    ST.logical = logical;
4957 	    logical = 0; /* XXX: reset state of logical once it has been saved into ST */
4958 
4959 	    /* execute body of (?...A) */
4960 	    PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4961 	    /* NOTREACHED */
4962 
4963 	case IFMATCH_A_fail: /* body of (?...A) failed */
4964 	    ST.wanted = !ST.wanted;
4965 	    /* FALL THROUGH */
4966 
4967 	case IFMATCH_A: /* body of (?...A) succeeded */
4968 	    if (ST.logical) {
4969 		sw = (bool)ST.wanted;
4970 	    }
4971 	    else if (!ST.wanted)
4972 		sayNO;
4973 
4974 	    if (OP(ST.me) == SUSPEND)
4975 		locinput = PL_reginput;
4976 	    else {
4977 		locinput = PL_reginput = st->locinput;
4978 		nextchr = UCHARAT(locinput);
4979 	    }
4980 	    scan = ST.me + ARG(ST.me);
4981 	    if (scan == ST.me)
4982 		scan = NULL;
4983 	    continue; /* execute B */
4984 
4985 #undef ST
4986 
4987 	case LONGJMP:
4988 	    next = scan + ARG(scan);
4989 	    if (next == scan)
4990 		next = NULL;
4991 	    break;
4992 	case COMMIT:
4993 	    reginfo->cutpoint = PL_regeol;
4994 	    /* FALLTHROUGH */
4995 	case PRUNE:
4996 	    PL_reginput = locinput;
4997 	    if (!scan->flags)
4998 	        sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
4999 	    PUSH_STATE_GOTO(COMMIT_next,next);
5000 	    /* NOTREACHED */
5001 	case COMMIT_next_fail:
5002 	    no_final = 1;
5003 	    /* FALLTHROUGH */
5004 	case OPFAIL:
5005 	    sayNO;
5006 	    /* NOTREACHED */
5007 
5008 #define ST st->u.mark
5009         case MARKPOINT:
5010             ST.prev_mark = mark_state;
5011             ST.mark_name = sv_commit = sv_yes_mark
5012                 = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5013             mark_state = st;
5014             ST.mark_loc = PL_reginput = locinput;
5015             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5016             /* NOTREACHED */
5017         case MARKPOINT_next:
5018             mark_state = ST.prev_mark;
5019             sayYES;
5020             /* NOTREACHED */
5021         case MARKPOINT_next_fail:
5022             if (popmark && sv_eq(ST.mark_name,popmark))
5023             {
5024                 if (ST.mark_loc > startpoint)
5025 	            reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5026                 popmark = NULL; /* we found our mark */
5027                 sv_commit = ST.mark_name;
5028 
5029                 DEBUG_EXECUTE_r({
5030                         PerlIO_printf(Perl_debug_log,
5031 		            "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
5032 		            REPORT_CODE_OFF+depth*2, "",
5033 		            PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5034 		});
5035             }
5036             mark_state = ST.prev_mark;
5037             sv_yes_mark = mark_state ?
5038                 mark_state->u.mark.mark_name : NULL;
5039             sayNO;
5040             /* NOTREACHED */
5041         case SKIP:
5042             PL_reginput = locinput;
5043             if (scan->flags) {
5044                 /* (*SKIP) : if we fail we cut here*/
5045                 ST.mark_name = NULL;
5046                 ST.mark_loc = locinput;
5047                 PUSH_STATE_GOTO(SKIP_next,next);
5048             } else {
5049                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
5050                    otherwise do nothing.  Meaning we need to scan
5051                  */
5052                 regmatch_state *cur = mark_state;
5053                 SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5054 
5055                 while (cur) {
5056                     if ( sv_eq( cur->u.mark.mark_name,
5057                                 find ) )
5058                     {
5059                         ST.mark_name = find;
5060                         PUSH_STATE_GOTO( SKIP_next, next );
5061                     }
5062                     cur = cur->u.mark.prev_mark;
5063                 }
5064             }
5065             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5066             break;
5067 	case SKIP_next_fail:
5068 	    if (ST.mark_name) {
5069 	        /* (*CUT:NAME) - Set up to search for the name as we
5070 	           collapse the stack*/
5071 	        popmark = ST.mark_name;
5072 	    } else {
5073 	        /* (*CUT) - No name, we cut here.*/
5074 	        if (ST.mark_loc > startpoint)
5075 	            reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5076 	        /* but we set sv_commit to latest mark_name if there
5077 	           is one so they can test to see how things lead to this
5078 	           cut */
5079                 if (mark_state)
5080                     sv_commit=mark_state->u.mark.mark_name;
5081             }
5082             no_final = 1;
5083             sayNO;
5084             /* NOTREACHED */
5085 #undef ST
5086         case FOLDCHAR:
5087             n = ARG(scan);
5088             if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5089                 locinput += ln;
5090             } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5091                 sayNO;
5092             } else  {
5093                 U8 folded[UTF8_MAXBYTES_CASE+1];
5094                 STRLEN foldlen;
5095                 const char * const l = locinput;
5096                 char *e = PL_regeol;
5097                 to_uni_fold(n, folded, &foldlen);
5098 
5099 		if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
5100                 	       l, &e, 0,  do_utf8)) {
5101                         sayNO;
5102                 }
5103                 locinput = e;
5104             }
5105             nextchr = UCHARAT(locinput);
5106             break;
5107         case LNBREAK:
5108             if ((n=is_LNBREAK(locinput,do_utf8))) {
5109                 locinput += n;
5110                 nextchr = UCHARAT(locinput);
5111             } else
5112                 sayNO;
5113             break;
5114 
5115 #define CASE_CLASS(nAmE)                              \
5116         case nAmE:                                    \
5117             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5118                 locinput += n;                        \
5119                 nextchr = UCHARAT(locinput);          \
5120             } else                                    \
5121                 sayNO;                                \
5122             break;                                    \
5123         case N##nAmE:                                 \
5124             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5125                 sayNO;                                \
5126             } else {                                  \
5127                 locinput += UTF8SKIP(locinput);       \
5128                 nextchr = UCHARAT(locinput);          \
5129             }                                         \
5130             break
5131 
5132         CASE_CLASS(VERTWS);
5133         CASE_CLASS(HORIZWS);
5134 #undef CASE_CLASS
5135 
5136 	default:
5137 	    PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5138 			  PTR2UV(scan), OP(scan));
5139 	    Perl_croak(aTHX_ "regexp memory corruption");
5140 
5141 	} /* end switch */
5142 
5143         /* switch break jumps here */
5144 	scan = next; /* prepare to execute the next op and ... */
5145 	continue;    /* ... jump back to the top, reusing st */
5146 	/* NOTREACHED */
5147 
5148       push_yes_state:
5149 	/* push a state that backtracks on success */
5150 	st->u.yes.prev_yes_state = yes_state;
5151 	yes_state = st;
5152 	/* FALL THROUGH */
5153       push_state:
5154 	/* push a new regex state, then continue at scan  */
5155 	{
5156 	    regmatch_state *newst;
5157 
5158 	    DEBUG_STACK_r({
5159 	        regmatch_state *cur = st;
5160 	        regmatch_state *curyes = yes_state;
5161 	        int curd = depth;
5162 	        regmatch_slab *slab = PL_regmatch_slab;
5163                 for (;curd > -1;cur--,curd--) {
5164                     if (cur < SLAB_FIRST(slab)) {
5165                 	slab = slab->prev;
5166                 	cur = SLAB_LAST(slab);
5167                     }
5168                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5169                         REPORT_CODE_OFF + 2 + depth * 2,"",
5170                         curd, PL_reg_name[cur->resume_state],
5171                         (curyes == cur) ? "yes" : ""
5172                     );
5173                     if (curyes == cur)
5174 	                curyes = cur->u.yes.prev_yes_state;
5175                 }
5176             } else
5177                 DEBUG_STATE_pp("push")
5178             );
5179 	    depth++;
5180 	    st->locinput = locinput;
5181 	    newst = st+1;
5182 	    if (newst >  SLAB_LAST(PL_regmatch_slab))
5183 		newst = S_push_slab(aTHX);
5184 	    PL_regmatch_state = newst;
5185 
5186 	    locinput = PL_reginput;
5187 	    nextchr = UCHARAT(locinput);
5188 	    st = newst;
5189 	    continue;
5190 	    /* NOTREACHED */
5191 	}
5192     }
5193 
5194     /*
5195     * We get here only if there's trouble -- normally "case END" is
5196     * the terminating point.
5197     */
5198     Perl_croak(aTHX_ "corrupted regexp pointers");
5199     /*NOTREACHED*/
5200     sayNO;
5201 
5202 yes:
5203     if (yes_state) {
5204 	/* we have successfully completed a subexpression, but we must now
5205 	 * pop to the state marked by yes_state and continue from there */
5206 	assert(st != yes_state);
5207 #ifdef DEBUGGING
5208 	while (st != yes_state) {
5209 	    st--;
5210 	    if (st < SLAB_FIRST(PL_regmatch_slab)) {
5211 		PL_regmatch_slab = PL_regmatch_slab->prev;
5212 		st = SLAB_LAST(PL_regmatch_slab);
5213 	    }
5214 	    DEBUG_STATE_r({
5215 	        if (no_final) {
5216 	            DEBUG_STATE_pp("pop (no final)");
5217 	        } else {
5218 	            DEBUG_STATE_pp("pop (yes)");
5219 	        }
5220 	    });
5221 	    depth--;
5222 	}
5223 #else
5224 	while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5225 	    || yes_state > SLAB_LAST(PL_regmatch_slab))
5226 	{
5227 	    /* not in this slab, pop slab */
5228 	    depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5229 	    PL_regmatch_slab = PL_regmatch_slab->prev;
5230 	    st = SLAB_LAST(PL_regmatch_slab);
5231 	}
5232 	depth -= (st - yes_state);
5233 #endif
5234 	st = yes_state;
5235 	yes_state = st->u.yes.prev_yes_state;
5236 	PL_regmatch_state = st;
5237 
5238         if (no_final) {
5239             locinput= st->locinput;
5240             nextchr = UCHARAT(locinput);
5241         }
5242 	state_num = st->resume_state + no_final;
5243 	goto reenter_switch;
5244     }
5245 
5246     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5247 			  PL_colors[4], PL_colors[5]));
5248 
5249     if (PL_reg_eval_set) {
5250 	/* each successfully executed (?{...}) block does the equivalent of
5251 	 *   local $^R = do {...}
5252 	 * When popping the save stack, all these locals would be undone;
5253 	 * bypass this by setting the outermost saved $^R to the latest
5254 	 * value */
5255 	if (oreplsv != GvSV(PL_replgv))
5256 	    sv_setsv(oreplsv, GvSV(PL_replgv));
5257     }
5258     result = 1;
5259     goto final_exit;
5260 
5261 no:
5262     DEBUG_EXECUTE_r(
5263 	PerlIO_printf(Perl_debug_log,
5264             "%*s  %sfailed...%s\n",
5265             REPORT_CODE_OFF+depth*2, "",
5266             PL_colors[4], PL_colors[5])
5267 	);
5268 
5269 no_silent:
5270     if (no_final) {
5271         if (yes_state) {
5272             goto yes;
5273         } else {
5274             goto final_exit;
5275         }
5276     }
5277     if (depth) {
5278 	/* there's a previous state to backtrack to */
5279 	st--;
5280 	if (st < SLAB_FIRST(PL_regmatch_slab)) {
5281 	    PL_regmatch_slab = PL_regmatch_slab->prev;
5282 	    st = SLAB_LAST(PL_regmatch_slab);
5283 	}
5284 	PL_regmatch_state = st;
5285 	locinput= st->locinput;
5286 	nextchr = UCHARAT(locinput);
5287 
5288 	DEBUG_STATE_pp("pop");
5289 	depth--;
5290 	if (yes_state == st)
5291 	    yes_state = st->u.yes.prev_yes_state;
5292 
5293 	state_num = st->resume_state + 1; /* failure = success + 1 */
5294 	goto reenter_switch;
5295     }
5296     result = 0;
5297 
5298   final_exit:
5299     if (rex->intflags & PREGf_VERBARG_SEEN) {
5300         SV *sv_err = get_sv("REGERROR", 1);
5301         SV *sv_mrk = get_sv("REGMARK", 1);
5302         if (result) {
5303             sv_commit = &PL_sv_no;
5304             if (!sv_yes_mark)
5305                 sv_yes_mark = &PL_sv_yes;
5306         } else {
5307             if (!sv_commit)
5308                 sv_commit = &PL_sv_yes;
5309             sv_yes_mark = &PL_sv_no;
5310         }
5311         sv_setsv(sv_err, sv_commit);
5312         sv_setsv(sv_mrk, sv_yes_mark);
5313     }
5314 
5315     /* clean up; in particular, free all slabs above current one */
5316     LEAVE_SCOPE(oldsave);
5317 
5318     return result;
5319 }
5320 
5321 /*
5322  - regrepeat - repeatedly match something simple, report how many
5323  */
5324 /*
5325  * [This routine now assumes that it will only match on things of length 1.
5326  * That was true before, but now we assume scan - reginput is the count,
5327  * rather than incrementing count on every character.  [Er, except utf8.]]
5328  */
5329 STATIC I32
5330 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5331 {
5332     dVAR;
5333     register char *scan;
5334     register I32 c;
5335     register char *loceol = PL_regeol;
5336     register I32 hardcount = 0;
5337     register bool do_utf8 = PL_reg_match_utf8;
5338 #ifndef DEBUGGING
5339     PERL_UNUSED_ARG(depth);
5340 #endif
5341 
5342     PERL_ARGS_ASSERT_REGREPEAT;
5343 
5344     scan = PL_reginput;
5345     if (max == REG_INFTY)
5346 	max = I32_MAX;
5347     else if (max < loceol - scan)
5348 	loceol = scan + max;
5349     switch (OP(p)) {
5350     case REG_ANY:
5351 	if (do_utf8) {
5352 	    loceol = PL_regeol;
5353 	    while (scan < loceol && hardcount < max && *scan != '\n') {
5354 		scan += UTF8SKIP(scan);
5355 		hardcount++;
5356 	    }
5357 	} else {
5358 	    while (scan < loceol && *scan != '\n')
5359 		scan++;
5360 	}
5361 	break;
5362     case SANY:
5363         if (do_utf8) {
5364 	    loceol = PL_regeol;
5365 	    while (scan < loceol && hardcount < max) {
5366 	        scan += UTF8SKIP(scan);
5367 		hardcount++;
5368 	    }
5369 	}
5370 	else
5371 	    scan = loceol;
5372 	break;
5373     case CANY:
5374 	scan = loceol;
5375 	break;
5376     case EXACT:		/* length of string is 1 */
5377 	c = (U8)*STRING(p);
5378 	while (scan < loceol && UCHARAT(scan) == c)
5379 	    scan++;
5380 	break;
5381     case EXACTF:	/* length of string is 1 */
5382 	c = (U8)*STRING(p);
5383 	while (scan < loceol &&
5384 	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5385 	    scan++;
5386 	break;
5387     case EXACTFL:	/* length of string is 1 */
5388 	PL_reg_flags |= RF_tainted;
5389 	c = (U8)*STRING(p);
5390 	while (scan < loceol &&
5391 	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5392 	    scan++;
5393 	break;
5394     case ANYOF:
5395 	if (do_utf8) {
5396 	    loceol = PL_regeol;
5397 	    while (hardcount < max && scan < loceol &&
5398 		   reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5399 		scan += UTF8SKIP(scan);
5400 		hardcount++;
5401 	    }
5402 	} else {
5403 	    while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5404 		scan++;
5405 	}
5406 	break;
5407     case ALNUM:
5408 	if (do_utf8) {
5409 	    loceol = PL_regeol;
5410 	    LOAD_UTF8_CHARCLASS_ALNUM();
5411 	    while (hardcount < max && scan < loceol &&
5412 		   swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5413 		scan += UTF8SKIP(scan);
5414 		hardcount++;
5415 	    }
5416 	} else {
5417 	    while (scan < loceol && isALNUM(*scan))
5418 		scan++;
5419 	}
5420 	break;
5421     case ALNUML:
5422 	PL_reg_flags |= RF_tainted;
5423 	if (do_utf8) {
5424 	    loceol = PL_regeol;
5425 	    while (hardcount < max && scan < loceol &&
5426 		   isALNUM_LC_utf8((U8*)scan)) {
5427 		scan += UTF8SKIP(scan);
5428 		hardcount++;
5429 	    }
5430 	} else {
5431 	    while (scan < loceol && isALNUM_LC(*scan))
5432 		scan++;
5433 	}
5434 	break;
5435     case NALNUM:
5436 	if (do_utf8) {
5437 	    loceol = PL_regeol;
5438 	    LOAD_UTF8_CHARCLASS_ALNUM();
5439 	    while (hardcount < max && scan < loceol &&
5440 		   !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5441 		scan += UTF8SKIP(scan);
5442 		hardcount++;
5443 	    }
5444 	} else {
5445 	    while (scan < loceol && !isALNUM(*scan))
5446 		scan++;
5447 	}
5448 	break;
5449     case NALNUML:
5450 	PL_reg_flags |= RF_tainted;
5451 	if (do_utf8) {
5452 	    loceol = PL_regeol;
5453 	    while (hardcount < max && scan < loceol &&
5454 		   !isALNUM_LC_utf8((U8*)scan)) {
5455 		scan += UTF8SKIP(scan);
5456 		hardcount++;
5457 	    }
5458 	} else {
5459 	    while (scan < loceol && !isALNUM_LC(*scan))
5460 		scan++;
5461 	}
5462 	break;
5463     case SPACE:
5464 	if (do_utf8) {
5465 	    loceol = PL_regeol;
5466 	    LOAD_UTF8_CHARCLASS_SPACE();
5467 	    while (hardcount < max && scan < loceol &&
5468 		   (*scan == ' ' ||
5469 		    swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5470 		scan += UTF8SKIP(scan);
5471 		hardcount++;
5472 	    }
5473 	} else {
5474 	    while (scan < loceol && isSPACE(*scan))
5475 		scan++;
5476 	}
5477 	break;
5478     case SPACEL:
5479 	PL_reg_flags |= RF_tainted;
5480 	if (do_utf8) {
5481 	    loceol = PL_regeol;
5482 	    while (hardcount < max && scan < loceol &&
5483 		   (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5484 		scan += UTF8SKIP(scan);
5485 		hardcount++;
5486 	    }
5487 	} else {
5488 	    while (scan < loceol && isSPACE_LC(*scan))
5489 		scan++;
5490 	}
5491 	break;
5492     case NSPACE:
5493 	if (do_utf8) {
5494 	    loceol = PL_regeol;
5495 	    LOAD_UTF8_CHARCLASS_SPACE();
5496 	    while (hardcount < max && scan < loceol &&
5497 		   !(*scan == ' ' ||
5498 		     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5499 		scan += UTF8SKIP(scan);
5500 		hardcount++;
5501 	    }
5502 	} else {
5503 	    while (scan < loceol && !isSPACE(*scan))
5504 		scan++;
5505 	}
5506 	break;
5507     case NSPACEL:
5508 	PL_reg_flags |= RF_tainted;
5509 	if (do_utf8) {
5510 	    loceol = PL_regeol;
5511 	    while (hardcount < max && scan < loceol &&
5512 		   !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5513 		scan += UTF8SKIP(scan);
5514 		hardcount++;
5515 	    }
5516 	} else {
5517 	    while (scan < loceol && !isSPACE_LC(*scan))
5518 		scan++;
5519 	}
5520 	break;
5521     case DIGIT:
5522 	if (do_utf8) {
5523 	    loceol = PL_regeol;
5524 	    LOAD_UTF8_CHARCLASS_DIGIT();
5525 	    while (hardcount < max && scan < loceol &&
5526 		   swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5527 		scan += UTF8SKIP(scan);
5528 		hardcount++;
5529 	    }
5530 	} else {
5531 	    while (scan < loceol && isDIGIT(*scan))
5532 		scan++;
5533 	}
5534 	break;
5535     case NDIGIT:
5536 	if (do_utf8) {
5537 	    loceol = PL_regeol;
5538 	    LOAD_UTF8_CHARCLASS_DIGIT();
5539 	    while (hardcount < max && scan < loceol &&
5540 		   !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5541 		scan += UTF8SKIP(scan);
5542 		hardcount++;
5543 	    }
5544 	} else {
5545 	    while (scan < loceol && !isDIGIT(*scan))
5546 		scan++;
5547 	}
5548     case LNBREAK:
5549         if (do_utf8) {
5550 	    loceol = PL_regeol;
5551 	    while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5552 		scan += c;
5553 		hardcount++;
5554 	    }
5555 	} else {
5556 	    /*
5557 	      LNBREAK can match two latin chars, which is ok,
5558 	      because we have a null terminated string, but we
5559 	      have to use hardcount in this situation
5560 	    */
5561 	    while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
5562 		scan+=c;
5563 		hardcount++;
5564 	    }
5565 	}
5566 	break;
5567     case HORIZWS:
5568         if (do_utf8) {
5569 	    loceol = PL_regeol;
5570 	    while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5571 		scan += c;
5572 		hardcount++;
5573 	    }
5574 	} else {
5575 	    while (scan < loceol && is_HORIZWS_latin1(scan))
5576 		scan++;
5577 	}
5578 	break;
5579     case NHORIZWS:
5580         if (do_utf8) {
5581 	    loceol = PL_regeol;
5582 	    while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5583 		scan += UTF8SKIP(scan);
5584 		hardcount++;
5585 	    }
5586 	} else {
5587 	    while (scan < loceol && !is_HORIZWS_latin1(scan))
5588 		scan++;
5589 
5590 	}
5591 	break;
5592     case VERTWS:
5593         if (do_utf8) {
5594 	    loceol = PL_regeol;
5595 	    while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5596 		scan += c;
5597 		hardcount++;
5598 	    }
5599 	} else {
5600 	    while (scan < loceol && is_VERTWS_latin1(scan))
5601 		scan++;
5602 
5603 	}
5604 	break;
5605     case NVERTWS:
5606         if (do_utf8) {
5607 	    loceol = PL_regeol;
5608 	    while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5609 		scan += UTF8SKIP(scan);
5610 		hardcount++;
5611 	    }
5612 	} else {
5613 	    while (scan < loceol && !is_VERTWS_latin1(scan))
5614 		scan++;
5615 
5616 	}
5617 	break;
5618 
5619     default:		/* Called on something of 0 width. */
5620 	break;		/* So match right here or not at all. */
5621     }
5622 
5623     if (hardcount)
5624 	c = hardcount;
5625     else
5626 	c = scan - PL_reginput;
5627     PL_reginput = scan;
5628 
5629     DEBUG_r({
5630 	GET_RE_DEBUG_FLAGS_DECL;
5631 	DEBUG_EXECUTE_r({
5632 	    SV * const prop = sv_newmortal();
5633 	    regprop(prog, prop, p);
5634 	    PerlIO_printf(Perl_debug_log,
5635 			"%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5636 			REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5637 	});
5638     });
5639 
5640     return(c);
5641 }
5642 
5643 
5644 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5645 /*
5646 - regclass_swash - prepare the utf8 swash
5647 */
5648 
5649 SV *
5650 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5651 {
5652     dVAR;
5653     SV *sw  = NULL;
5654     SV *si  = NULL;
5655     SV *alt = NULL;
5656     RXi_GET_DECL(prog,progi);
5657     const struct reg_data * const data = prog ? progi->data : NULL;
5658 
5659     PERL_ARGS_ASSERT_REGCLASS_SWASH;
5660 
5661     if (data && data->count) {
5662 	const U32 n = ARG(node);
5663 
5664 	if (data->what[n] == 's') {
5665 	    SV * const rv = MUTABLE_SV(data->data[n]);
5666 	    AV * const av = MUTABLE_AV(SvRV(rv));
5667 	    SV **const ary = AvARRAY(av);
5668 	    SV **a, **b;
5669 
5670 	    /* See the end of regcomp.c:S_regclass() for
5671 	     * documentation of these array elements. */
5672 
5673 	    si = *ary;
5674 	    a  = SvROK(ary[1]) ? &ary[1] : NULL;
5675 	    b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
5676 
5677 	    if (a)
5678 		sw = *a;
5679 	    else if (si && doinit) {
5680 		sw = swash_init("utf8", "", si, 1, 0);
5681 		(void)av_store(av, 1, sw);
5682 	    }
5683 	    if (b)
5684 	        alt = *b;
5685 	}
5686     }
5687 
5688     if (listsvp)
5689 	*listsvp = si;
5690     if (altsvp)
5691 	*altsvp  = alt;
5692 
5693     return sw;
5694 }
5695 #endif
5696 
5697 /*
5698  - reginclass - determine if a character falls into a character class
5699 
5700   The n is the ANYOF regnode, the p is the target string, lenp
5701   is pointer to the maximum length of how far to go in the p
5702   (if the lenp is zero, UTF8SKIP(p) is used),
5703   do_utf8 tells whether the target string is in UTF-8.
5704 
5705  */
5706 
5707 STATIC bool
5708 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5709 {
5710     dVAR;
5711     const char flags = ANYOF_FLAGS(n);
5712     bool match = FALSE;
5713     UV c = *p;
5714     STRLEN len = 0;
5715     STRLEN plen;
5716 
5717     PERL_ARGS_ASSERT_REGINCLASS;
5718 
5719     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5720 	c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5721 		(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5722 		/* see [perl #37836] for UTF8_ALLOW_ANYUV */
5723 	if (len == (STRLEN)-1)
5724 	    Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5725     }
5726 
5727     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5728     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5729         if (lenp)
5730 	    *lenp = 0;
5731 	if (do_utf8 && !ANYOF_RUNTIME(n)) {
5732 	    if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5733 		match = TRUE;
5734 	}
5735 	if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5736 	    match = TRUE;
5737 	if (!match) {
5738 	    AV *av;
5739 	    SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5740 
5741 	    if (sw) {
5742 		U8 * utf8_p;
5743 		if (do_utf8) {
5744 		    utf8_p = (U8 *) p;
5745 		} else {
5746 		    STRLEN len = 1;
5747 		    utf8_p = bytes_to_utf8(p, &len);
5748 		}
5749 		if (swash_fetch(sw, utf8_p, 1))
5750 		    match = TRUE;
5751 		else if (flags & ANYOF_FOLD) {
5752 		    if (!match && lenp && av) {
5753 		        I32 i;
5754 			for (i = 0; i <= av_len(av); i++) {
5755 			    SV* const sv = *av_fetch(av, i, FALSE);
5756 			    STRLEN len;
5757 			    const char * const s = SvPV_const(sv, len);
5758 			    if (len <= plen && memEQ(s, (char*)utf8_p, len)) {
5759 			        *lenp = len;
5760 				match = TRUE;
5761 				break;
5762 			    }
5763 			}
5764 		    }
5765 		    if (!match) {
5766 		        U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5767 
5768 			STRLEN tmplen;
5769 			to_utf8_fold(utf8_p, tmpbuf, &tmplen);
5770 			if (swash_fetch(sw, tmpbuf, 1))
5771 			    match = TRUE;
5772 		    }
5773 		}
5774 
5775 		/* If we allocated a string above, free it */
5776 		if (! do_utf8) Safefree(utf8_p);
5777 	    }
5778 	}
5779 	if (match && lenp && *lenp == 0)
5780 	    *lenp = UNISKIP(NATIVE_TO_UNI(c));
5781     }
5782     if (!match && c < 256) {
5783 	if (ANYOF_BITMAP_TEST(n, c))
5784 	    match = TRUE;
5785 	else if (flags & ANYOF_FOLD) {
5786 	    U8 f;
5787 
5788 	    if (flags & ANYOF_LOCALE) {
5789 		PL_reg_flags |= RF_tainted;
5790 		f = PL_fold_locale[c];
5791 	    }
5792 	    else
5793 		f = PL_fold[c];
5794 	    if (f != c && ANYOF_BITMAP_TEST(n, f))
5795 		match = TRUE;
5796 	}
5797 
5798 	if (!match && (flags & ANYOF_CLASS)) {
5799 	    PL_reg_flags |= RF_tainted;
5800 	    if (
5801 		(ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5802 		(ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5803 		(ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5804 		(ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5805 		(ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5806 		(ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5807 		(ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5808 		(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5809 		(ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5810 		(ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5811 		(ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5812 		(ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5813 		(ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5814 		(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5815 		(ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5816 		(ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5817 		(ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5818 		(ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5819 		(ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5820 		(ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5821 		(ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5822 		(ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5823 		(ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5824 		(ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5825 		(ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5826 		(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5827 		(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5828 		(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5829 		(ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5830 		(ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5831 		) /* How's that for a conditional? */
5832 	    {
5833 		match = TRUE;
5834 	    }
5835 	}
5836     }
5837 
5838     return (flags & ANYOF_INVERT) ? !match : match;
5839 }
5840 
5841 STATIC U8 *
5842 S_reghop3(U8 *s, I32 off, const U8* lim)
5843 {
5844     dVAR;
5845 
5846     PERL_ARGS_ASSERT_REGHOP3;
5847 
5848     if (off >= 0) {
5849 	while (off-- && s < lim) {
5850 	    /* XXX could check well-formedness here */
5851 	    s += UTF8SKIP(s);
5852 	}
5853     }
5854     else {
5855         while (off++ && s > lim) {
5856             s--;
5857             if (UTF8_IS_CONTINUED(*s)) {
5858                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5859                     s--;
5860 	    }
5861             /* XXX could check well-formedness here */
5862 	}
5863     }
5864     return s;
5865 }
5866 
5867 #ifdef XXX_dmq
5868 /* there are a bunch of places where we use two reghop3's that should
5869    be replaced with this routine. but since thats not done yet
5870    we ifdef it out - dmq
5871 */
5872 STATIC U8 *
5873 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5874 {
5875     dVAR;
5876 
5877     PERL_ARGS_ASSERT_REGHOP4;
5878 
5879     if (off >= 0) {
5880         while (off-- && s < rlim) {
5881             /* XXX could check well-formedness here */
5882             s += UTF8SKIP(s);
5883         }
5884     }
5885     else {
5886         while (off++ && s > llim) {
5887             s--;
5888             if (UTF8_IS_CONTINUED(*s)) {
5889                 while (s > llim && UTF8_IS_CONTINUATION(*s))
5890                     s--;
5891             }
5892             /* XXX could check well-formedness here */
5893         }
5894     }
5895     return s;
5896 }
5897 #endif
5898 
5899 STATIC U8 *
5900 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5901 {
5902     dVAR;
5903 
5904     PERL_ARGS_ASSERT_REGHOPMAYBE3;
5905 
5906     if (off >= 0) {
5907 	while (off-- && s < lim) {
5908 	    /* XXX could check well-formedness here */
5909 	    s += UTF8SKIP(s);
5910 	}
5911 	if (off >= 0)
5912 	    return NULL;
5913     }
5914     else {
5915         while (off++ && s > lim) {
5916             s--;
5917             if (UTF8_IS_CONTINUED(*s)) {
5918                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5919                     s--;
5920 	    }
5921             /* XXX could check well-formedness here */
5922 	}
5923 	if (off <= 0)
5924 	    return NULL;
5925     }
5926     return s;
5927 }
5928 
5929 static void
5930 restore_pos(pTHX_ void *arg)
5931 {
5932     dVAR;
5933     regexp * const rex = (regexp *)arg;
5934     if (PL_reg_eval_set) {
5935 	if (PL_reg_oldsaved) {
5936 	    rex->subbeg = PL_reg_oldsaved;
5937 	    rex->sublen = PL_reg_oldsavedlen;
5938 #ifdef PERL_OLD_COPY_ON_WRITE
5939 	    rex->saved_copy = PL_nrs;
5940 #endif
5941 	    RXp_MATCH_COPIED_on(rex);
5942 	}
5943 	PL_reg_magic->mg_len = PL_reg_oldpos;
5944 	PL_reg_eval_set = 0;
5945 	PL_curpm = PL_reg_oldcurpm;
5946     }
5947 }
5948 
5949 STATIC void
5950 S_to_utf8_substr(pTHX_ register regexp *prog)
5951 {
5952     int i = 1;
5953 
5954     PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
5955 
5956     do {
5957 	if (prog->substrs->data[i].substr
5958 	    && !prog->substrs->data[i].utf8_substr) {
5959 	    SV* const sv = newSVsv(prog->substrs->data[i].substr);
5960 	    prog->substrs->data[i].utf8_substr = sv;
5961 	    sv_utf8_upgrade(sv);
5962 	    if (SvVALID(prog->substrs->data[i].substr)) {
5963 		const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
5964 		if (flags & FBMcf_TAIL) {
5965 		    /* Trim the trailing \n that fbm_compile added last
5966 		       time.  */
5967 		    SvCUR_set(sv, SvCUR(sv) - 1);
5968 		    /* Whilst this makes the SV technically "invalid" (as its
5969 		       buffer is no longer followed by "\0") when fbm_compile()
5970 		       adds the "\n" back, a "\0" is restored.  */
5971 		}
5972 		fbm_compile(sv, flags);
5973 	    }
5974 	    if (prog->substrs->data[i].substr == prog->check_substr)
5975 		prog->check_utf8 = sv;
5976 	}
5977     } while (i--);
5978 }
5979 
5980 STATIC void
5981 S_to_byte_substr(pTHX_ register regexp *prog)
5982 {
5983     dVAR;
5984     int i = 1;
5985 
5986     PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
5987 
5988     do {
5989 	if (prog->substrs->data[i].utf8_substr
5990 	    && !prog->substrs->data[i].substr) {
5991 	    SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
5992 	    if (sv_utf8_downgrade(sv, TRUE)) {
5993 		if (SvVALID(prog->substrs->data[i].utf8_substr)) {
5994 		    const U8 flags
5995 			= BmFLAGS(prog->substrs->data[i].utf8_substr);
5996 		    if (flags & FBMcf_TAIL) {
5997 			/* Trim the trailing \n that fbm_compile added last
5998 			   time.  */
5999 			SvCUR_set(sv, SvCUR(sv) - 1);
6000 		    }
6001 		    fbm_compile(sv, flags);
6002 		}
6003 	    } else {
6004 		SvREFCNT_dec(sv);
6005 		sv = &PL_sv_undef;
6006 	    }
6007 	    prog->substrs->data[i].substr = sv;
6008 	    if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
6009 		prog->check_substr = sv;
6010 	}
6011     } while (i--);
6012 }
6013 
6014 /*
6015  * Local variables:
6016  * c-indentation-style: bsd
6017  * c-basic-offset: 4
6018  * indent-tabs-mode: t
6019  * End:
6020  *
6021  * ex: set ts=8 sts=4 sw=4 noet:
6022  */
6023