xref: /openbsd-src/gnu/usr.bin/perl/regexec.c (revision db3296cf5c1dd9058ceecc3a29fe4aaa0bd26000)
1 /*    regexec.c
2  */
3 
4 /*
5  * "One Ring to rule them all, One Ring to find them..."
6  */
7 
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9  * confused with the original package (see point 3 below).  Thanks, Henry!
10  */
11 
12 /* Additional note: this code is very heavily munged from Henry's version
13  * in places.  In some spots I've traded clarity for efficiency, so don't
14  * blame Henry for some of the lack of readability.
15  */
16 
17 /* The names of the functions have been changed from regcomp and
18  * regexec to  pregcomp and pregexec in order to avoid conflicts
19  * with the POSIX routines of the same names.
20 */
21 
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 #  ifndef PERL_IN_XSUB_RE
25 #    define PERL_IN_XSUB_RE
26 #  endif
27 /* need access to debugger hooks */
28 #  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
29 #    define DEBUGGING
30 #  endif
31 #endif
32 
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 #  define Perl_regexec_flags my_regexec
36 #  define Perl_regdump my_regdump
37 #  define Perl_regprop my_regprop
38 #  define Perl_re_intuit_start my_re_intuit_start
39 /* *These* symbols are masked to allow static link. */
40 #  define Perl_pregexec my_pregexec
41 #  define Perl_reginitcolors my_reginitcolors
42 #  define Perl_regclass_swash my_regclass_swash
43 
44 #  define PERL_NO_GET_CONTEXT
45 #endif
46 
47 /*SUPPRESS 112*/
48 /*
49  * pregcomp and pregexec -- regsub and regerror are not used in perl
50  *
51  *	Copyright (c) 1986 by University of Toronto.
52  *	Written by Henry Spencer.  Not derived from licensed software.
53  *
54  *	Permission is granted to anyone to use this software for any
55  *	purpose on any computer system, and to redistribute it freely,
56  *	subject to the following restrictions:
57  *
58  *	1. The author is not responsible for the consequences of use of
59  *		this software, no matter how awful, even if they arise
60  *		from defects in it.
61  *
62  *	2. The origin of this software must not be misrepresented, either
63  *		by explicit claim or by omission.
64  *
65  *	3. Altered versions must be plainly marked as such, and must not
66  *		be misrepresented as being the original software.
67  *
68  ****    Alterations to Henry's code are...
69  ****
70  ****    Copyright (c) 1991-2002, Larry Wall
71  ****
72  ****    You may distribute under the terms of either the GNU General Public
73  ****    License or the Artistic License, as specified in the README file.
74  *
75  * Beware that some of this code is subtly aware of the way operator
76  * precedence is structured in regular expressions.  Serious changes in
77  * regular-expression syntax might require a total rethink.
78  */
79 #include "EXTERN.h"
80 #define PERL_IN_REGEXEC_C
81 #include "perl.h"
82 
83 #include "regcomp.h"
84 
85 #define RF_tainted	1		/* tainted information used? */
86 #define RF_warned	2		/* warned about big count? */
87 #define RF_evaled	4		/* Did an EVAL with setting? */
88 #define RF_utf8		8		/* String contains multibyte chars? */
89 
90 #define UTF ((PL_reg_flags & RF_utf8) != 0)
91 
92 #define RS_init		1		/* eval environment created */
93 #define RS_set		2		/* replsv value is set */
94 
95 #ifndef STATIC
96 #define	STATIC	static
97 #endif
98 
99 #define REGINCLASS(p,c)  (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
100 
101 /*
102  * Forwards.
103  */
104 
105 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
106 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
107 
108 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
109 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
110 #define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
111 #define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
112 #define HOPc(pos,off) ((char*)HOP(pos,off))
113 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
114 
115 #define HOPBACK(pos, off) (		\
116     (PL_reg_match_utf8)			\
117 	? reghopmaybe((U8*)pos, -off)	\
118     : (pos - off >= PL_bostr)		\
119 	? (U8*)(pos - off)		\
120     : (U8*)NULL				\
121 )
122 #define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
123 
124 #define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
125 #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
126 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
127 #define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
128 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
129 #define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
130 
131 #define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) { ENTER; save_re_context(); (void)CAT2(is_utf8_, a)((U8*)b); LEAVE; } } STMT_END
132 
133 /* for use after a quantifier and before an EXACT-like node -- japhy */
134 #define JUMPABLE(rn) ( \
135     OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
136     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
137     OP(rn) == PLUS || OP(rn) == MINMOD || \
138     (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
139 )
140 
141 #define HAS_TEXT(rn) ( \
142     PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
143 )
144 
145 /*
146   Search for mandatory following text node; for lookahead, the text must
147   follow but for lookbehind (rn->flags != 0) we skip to the next step.
148 */
149 #define FIND_NEXT_IMPT(rn) STMT_START { \
150     while (JUMPABLE(rn)) \
151 	if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
152 	    rn = NEXTOPER(NEXTOPER(rn)); \
153 	else if (OP(rn) == PLUS) \
154 	    rn = NEXTOPER(rn); \
155 	else if (OP(rn) == IFMATCH) \
156 	    rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
157 	else rn += NEXT_OFF(rn); \
158 } STMT_END
159 
160 static void restore_pos(pTHX_ void *arg);
161 
162 STATIC CHECKPOINT
163 S_regcppush(pTHX_ I32 parenfloor)
164 {
165     int retval = PL_savestack_ix;
166 #define REGCP_PAREN_ELEMS 4
167     int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
168     int p;
169 
170     if (paren_elems_to_push < 0)
171 	Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
172 
173 #define REGCP_OTHER_ELEMS 6
174     SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
175     for (p = PL_regsize; p > parenfloor; p--) {
176 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
177 	SSPUSHINT(PL_regendp[p]);
178 	SSPUSHINT(PL_regstartp[p]);
179 	SSPUSHPTR(PL_reg_start_tmp[p]);
180 	SSPUSHINT(p);
181     }
182 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
183     SSPUSHINT(PL_regsize);
184     SSPUSHINT(*PL_reglastparen);
185     SSPUSHINT(*PL_reglastcloseparen);
186     SSPUSHPTR(PL_reginput);
187 #define REGCP_FRAME_ELEMS 2
188 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
189  * are needed for the regexp context stack bookkeeping. */
190     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
191     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
192 
193     return retval;
194 }
195 
196 /* These are needed since we do not localize EVAL nodes: */
197 #  define REGCP_SET(cp)  DEBUG_r(PerlIO_printf(Perl_debug_log,		\
198 			     "  Setting an EVAL scope, savestack=%"IVdf"\n",	\
199 			     (IV)PL_savestack_ix)); cp = PL_savestack_ix
200 
201 #  define REGCP_UNWIND(cp)  DEBUG_r(cp != PL_savestack_ix ?		\
202 				PerlIO_printf(Perl_debug_log,		\
203 				"  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
204 				(IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
205 
206 STATIC char *
207 S_regcppop(pTHX)
208 {
209     I32 i;
210     U32 paren = 0;
211     char *input;
212     I32 tmps;
213 
214     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
215     i = SSPOPINT;
216     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
217     i = SSPOPINT; /* Parentheses elements to pop. */
218     input = (char *) SSPOPPTR;
219     *PL_reglastcloseparen = SSPOPINT;
220     *PL_reglastparen = SSPOPINT;
221     PL_regsize = SSPOPINT;
222 
223     /* Now restore the parentheses context. */
224     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
225 	 i > 0; i -= REGCP_PAREN_ELEMS) {
226 	paren = (U32)SSPOPINT;
227 	PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
228 	PL_regstartp[paren] = SSPOPINT;
229 	tmps = SSPOPINT;
230 	if (paren <= *PL_reglastparen)
231 	    PL_regendp[paren] = tmps;
232 	DEBUG_r(
233 	    PerlIO_printf(Perl_debug_log,
234 			  "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
235 			  (UV)paren, (IV)PL_regstartp[paren],
236 			  (IV)(PL_reg_start_tmp[paren] - PL_bostr),
237 			  (IV)PL_regendp[paren],
238 			  (paren > *PL_reglastparen ? "(no)" : ""));
239 	);
240     }
241     DEBUG_r(
242 	if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
243 	    PerlIO_printf(Perl_debug_log,
244 			  "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
245 			  (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
246 	}
247     );
248 #if 1
249     /* It would seem that the similar code in regtry()
250      * already takes care of this, and in fact it is in
251      * a better location to since this code can #if 0-ed out
252      * but the code in regtry() is needed or otherwise tests
253      * requiring null fields (pat.t#187 and split.t#{13,14}
254      * (as of patchlevel 7877)  will fail.  Then again,
255      * this code seems to be necessary or otherwise
256      * building DynaLoader will fail:
257      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
258      * --jhi */
259     for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
260 	if ((I32)paren > PL_regsize)
261 	    PL_regstartp[paren] = -1;
262 	PL_regendp[paren] = -1;
263     }
264 #endif
265     return input;
266 }
267 
268 STATIC char *
269 S_regcp_set_to(pTHX_ I32 ss)
270 {
271     I32 tmp = PL_savestack_ix;
272 
273     PL_savestack_ix = ss;
274     regcppop();
275     PL_savestack_ix = tmp;
276     return Nullch;
277 }
278 
279 typedef struct re_cc_state
280 {
281     I32 ss;
282     regnode *node;
283     struct re_cc_state *prev;
284     CURCUR *cc;
285     regexp *re;
286 } re_cc_state;
287 
288 #define regcpblow(cp) LEAVE_SCOPE(cp)	/* Ignores regcppush()ed data. */
289 
290 #define TRYPAREN(paren, n, input) {				\
291     if (paren) {						\
292 	if (n) {						\
293 	    PL_regstartp[paren] = HOPc(input, -1) - PL_bostr;	\
294 	    PL_regendp[paren] = input - PL_bostr;		\
295 	}							\
296 	else							\
297 	    PL_regendp[paren] = -1;				\
298     }								\
299     if (regmatch(next))						\
300 	sayYES;							\
301     if (paren && n)						\
302 	PL_regendp[paren] = -1;					\
303 }
304 
305 
306 /*
307  * pregexec and friends
308  */
309 
310 /*
311  - pregexec - match a regexp against a string
312  */
313 I32
314 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
315 	 char *strbeg, I32 minend, SV *screamer, U32 nosave)
316 /* strend: pointer to null at end of string */
317 /* strbeg: real beginning of string */
318 /* minend: end of match must be >=minend after stringarg. */
319 /* nosave: For optimizations. */
320 {
321     return
322 	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
323 		      nosave ? 0 : REXEC_COPY_STR);
324 }
325 
326 STATIC void
327 S_cache_re(pTHX_ regexp *prog)
328 {
329     PL_regprecomp = prog->precomp;		/* Needed for FAIL. */
330 #ifdef DEBUGGING
331     PL_regprogram = prog->program;
332 #endif
333     PL_regnpar = prog->nparens;
334     PL_regdata = prog->data;
335     PL_reg_re = prog;
336 }
337 
338 /*
339  * Need to implement the following flags for reg_anch:
340  *
341  * USE_INTUIT_NOML		- Useful to call re_intuit_start() first
342  * USE_INTUIT_ML
343  * INTUIT_AUTORITATIVE_NOML	- Can trust a positive answer
344  * INTUIT_AUTORITATIVE_ML
345  * INTUIT_ONCE_NOML		- Intuit can match in one location only.
346  * INTUIT_ONCE_ML
347  *
348  * Another flag for this function: SECOND_TIME (so that float substrs
349  * with giant delta may be not rechecked).
350  */
351 
352 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
353 
354 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
355    Otherwise, only SvCUR(sv) is used to get strbeg. */
356 
357 /* XXXX We assume that strpos is strbeg unless sv. */
358 
359 /* XXXX Some places assume that there is a fixed substring.
360 	An update may be needed if optimizer marks as "INTUITable"
361 	RExen without fixed substrings.  Similarly, it is assumed that
362 	lengths of all the strings are no more than minlen, thus they
363 	cannot come from lookahead.
364 	(Or minlen should take into account lookahead.) */
365 
366 /* A failure to find a constant substring means that there is no need to make
367    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
368    finding a substring too deep into the string means that less calls to
369    regtry() should be needed.
370 
371    REx compiler's optimizer found 4 possible hints:
372 	a) Anchored substring;
373 	b) Fixed substring;
374 	c) Whether we are anchored (beginning-of-line or \G);
375 	d) First node (of those at offset 0) which may distingush positions;
376    We use a)b)d) and multiline-part of c), and try to find a position in the
377    string which does not contradict any of them.
378  */
379 
380 /* Most of decisions we do here should have been done at compile time.
381    The nodes of the REx which we used for the search should have been
382    deleted from the finite automaton. */
383 
384 char *
385 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
386 		     char *strend, U32 flags, re_scream_pos_data *data)
387 {
388     register I32 start_shift = 0;
389     /* Should be nonnegative! */
390     register I32 end_shift   = 0;
391     register char *s;
392     register SV *check;
393     char *strbeg;
394     char *t;
395     int do_utf8 = sv ? SvUTF8(sv) : 0;	/* if no sv we have to assume bytes */
396     I32 ml_anch;
397     register char *other_last = Nullch;	/* other substr checked before this */
398     char *check_at = Nullch;		/* check substr found at this pos */
399 #ifdef DEBUGGING
400     char *i_strpos = strpos;
401     SV *dsv = PERL_DEBUG_PAD_ZERO(0);
402 #endif
403 
404     if (prog->reganch & ROPT_UTF8) {
405 	DEBUG_r(PerlIO_printf(Perl_debug_log,
406 			      "UTF-8 regex...\n"));
407 	PL_reg_flags |= RF_utf8;
408     }
409 
410     DEBUG_r({
411 	 char *s   = PL_reg_match_utf8 ?
412 	                 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
413 	                 strpos;
414 	 int   len = PL_reg_match_utf8 ?
415 	                 strlen(s) : strend - strpos;
416 	 if (!PL_colorset)
417 	      reginitcolors();
418 	 if (PL_reg_match_utf8)
419 	     DEBUG_r(PerlIO_printf(Perl_debug_log,
420 				   "UTF-8 target...\n"));
421 	 PerlIO_printf(Perl_debug_log,
422 		       "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
423 		       PL_colors[4],PL_colors[5],PL_colors[0],
424 		       prog->precomp,
425 		       PL_colors[1],
426 		       (strlen(prog->precomp) > 60 ? "..." : ""),
427 		       PL_colors[0],
428 		       (int)(len > 60 ? 60 : len),
429 		       s, PL_colors[1],
430 		       (len > 60 ? "..." : "")
431 	      );
432     });
433 
434     /* CHR_DIST() would be more correct here but it makes things slow. */
435     if (prog->minlen > strend - strpos) {
436 	DEBUG_r(PerlIO_printf(Perl_debug_log,
437 			      "String too short... [re_intuit_start]\n"));
438 	goto fail;
439     }
440     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
441     PL_regeol = strend;
442     if (do_utf8) {
443 	if (!prog->check_utf8 && prog->check_substr)
444 	    to_utf8_substr(prog);
445 	check = prog->check_utf8;
446     } else {
447 	if (!prog->check_substr && prog->check_utf8)
448 	    to_byte_substr(prog);
449 	check = prog->check_substr;
450     }
451    if (check == &PL_sv_undef) {
452 	DEBUG_r(PerlIO_printf(Perl_debug_log,
453 		"Non-utf string cannot match utf check string\n"));
454 	goto fail;
455     }
456     if (prog->reganch & ROPT_ANCH) {	/* Match at beg-of-str or after \n */
457 	ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
458 		     || ( (prog->reganch & ROPT_ANCH_BOL)
459 			  && !PL_multiline ) );	/* Check after \n? */
460 
461 	if (!ml_anch) {
462 	  if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
463 				  | ROPT_IMPLICIT)) /* not a real BOL */
464 	       /* SvCUR is not set on references: SvRV and SvPVX overlap */
465 	       && sv && !SvROK(sv)
466 	       && (strpos != strbeg)) {
467 	      DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
468 	      goto fail;
469 	  }
470 	  if (prog->check_offset_min == prog->check_offset_max &&
471 	      !(prog->reganch & ROPT_CANY_SEEN)) {
472 	    /* Substring at constant offset from beg-of-str... */
473 	    I32 slen;
474 
475 	    s = HOP3c(strpos, prog->check_offset_min, strend);
476 	    if (SvTAIL(check)) {
477 		slen = SvCUR(check);	/* >= 1 */
478 
479 		if ( strend - s > slen || strend - s < slen - 1
480 		     || (strend - s == slen && strend[-1] != '\n')) {
481 		    DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
482 		    goto fail_finish;
483 		}
484 		/* Now should match s[0..slen-2] */
485 		slen--;
486 		if (slen && (*SvPVX(check) != *s
487 			     || (slen > 1
488 				 && memNE(SvPVX(check), s, slen)))) {
489 		  report_neq:
490 		    DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
491 		    goto fail_finish;
492 		}
493 	    }
494 	    else if (*SvPVX(check) != *s
495 		     || ((slen = SvCUR(check)) > 1
496 			 && memNE(SvPVX(check), s, slen)))
497 		goto report_neq;
498 	    goto success_at_start;
499 	  }
500 	}
501 	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
502 	s = strpos;
503 	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
504 	end_shift = prog->minlen - start_shift -
505 	    CHR_SVLEN(check) + (SvTAIL(check) != 0);
506 	if (!ml_anch) {
507 	    I32 end = prog->check_offset_max + CHR_SVLEN(check)
508 					 - (SvTAIL(check) != 0);
509 	    I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
510 
511 	    if (end_shift < eshift)
512 		end_shift = eshift;
513 	}
514     }
515     else {				/* Can match at random position */
516 	ml_anch = 0;
517 	s = strpos;
518 	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
519 	/* Should be nonnegative! */
520 	end_shift = prog->minlen - start_shift -
521 	    CHR_SVLEN(check) + (SvTAIL(check) != 0);
522     }
523 
524 #ifdef DEBUGGING	/* 7/99: reports of failure (with the older version) */
525     if (end_shift < 0)
526 	Perl_croak(aTHX_ "panic: end_shift");
527 #endif
528 
529   restart:
530     /* Find a possible match in the region s..strend by looking for
531        the "check" substring in the region corrected by start/end_shift. */
532     if (flags & REXEC_SCREAM) {
533 	I32 p = -1;			/* Internal iterator of scream. */
534 	I32 *pp = data ? data->scream_pos : &p;
535 
536 	if (PL_screamfirst[BmRARE(check)] >= 0
537 	    || ( BmRARE(check) == '\n'
538 		 && (BmPREVIOUS(check) == SvCUR(check) - 1)
539 		 && SvTAIL(check) ))
540 	    s = screaminstr(sv, check,
541 			    start_shift + (s - strbeg), end_shift, pp, 0);
542 	else
543 	    goto fail_finish;
544 	if (data)
545 	    *data->scream_olds = s;
546     }
547     else if (prog->reganch & ROPT_CANY_SEEN)
548 	s = fbm_instr((U8*)(s + start_shift),
549 		      (U8*)(strend - end_shift),
550 		      check, PL_multiline ? FBMrf_MULTILINE : 0);
551     else
552 	s = fbm_instr(HOP3(s, start_shift, strend),
553 		      HOP3(strend, -end_shift, strbeg),
554 		      check, PL_multiline ? FBMrf_MULTILINE : 0);
555 
556     /* Update the count-of-usability, remove useless subpatterns,
557 	unshift s.  */
558 
559     DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
560 			  (s ? "Found" : "Did not find"),
561 			  (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
562 			  PL_colors[0],
563 			  (int)(SvCUR(check) - (SvTAIL(check)!=0)),
564 			  SvPVX(check),
565 			  PL_colors[1], (SvTAIL(check) ? "$" : ""),
566 			  (s ? " at offset " : "...\n") ) );
567 
568     if (!s)
569 	goto fail_finish;
570 
571     check_at = s;
572 
573     /* Finish the diagnostic message */
574     DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
575 
576     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
577        Start with the other substr.
578        XXXX no SCREAM optimization yet - and a very coarse implementation
579        XXXX /ttx+/ results in anchored=`ttx', floating=`x'.  floating will
580 		*always* match.  Probably should be marked during compile...
581        Probably it is right to do no SCREAM here...
582      */
583 
584     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
585 	/* Take into account the "other" substring. */
586 	/* XXXX May be hopelessly wrong for UTF... */
587 	if (!other_last)
588 	    other_last = strpos;
589 	if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
590 	  do_other_anchored:
591 	    {
592 		char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
593 		char *s1 = s;
594 		SV* must;
595 
596 		t = s - prog->check_offset_max;
597 		if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
598 		    && (!(prog->reganch & ROPT_UTF8)
599 			|| ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
600 			    && t > strpos)))
601 		    /* EMPTY */;
602 		else
603 		    t = strpos;
604 		t = HOP3c(t, prog->anchored_offset, strend);
605 		if (t < other_last)	/* These positions already checked */
606 		    t = other_last;
607 		last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
608 		if (last < last1)
609 		    last1 = last;
610  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
611 		/* On end-of-str: see comment below. */
612 		must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
613 		if (must == &PL_sv_undef) {
614 		    s = (char*)NULL;
615 		    DEBUG_r(must = prog->anchored_utf8);	/* for debug */
616 		}
617 		else
618 		    s = fbm_instr(
619 			(unsigned char*)t,
620 			HOP3(HOP3(last1, prog->anchored_offset, strend)
621 				+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
622 			must,
623 			PL_multiline ? FBMrf_MULTILINE : 0
624 		    );
625 		DEBUG_r(PerlIO_printf(Perl_debug_log,
626 			"%s anchored substr `%s%.*s%s'%s",
627 			(s ? "Found" : "Contradicts"),
628 			PL_colors[0],
629 			  (int)(SvCUR(must)
630 			  - (SvTAIL(must)!=0)),
631 			  SvPVX(must),
632 			  PL_colors[1], (SvTAIL(must) ? "$" : "")));
633 		if (!s) {
634 		    if (last1 >= last2) {
635 			DEBUG_r(PerlIO_printf(Perl_debug_log,
636 						", giving up...\n"));
637 			goto fail_finish;
638 		    }
639 		    DEBUG_r(PerlIO_printf(Perl_debug_log,
640 			", trying floating at offset %ld...\n",
641 			(long)(HOP3c(s1, 1, strend) - i_strpos)));
642 		    other_last = HOP3c(last1, prog->anchored_offset+1, strend);
643 		    s = HOP3c(last, 1, strend);
644 		    goto restart;
645 		}
646 		else {
647 		    DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
648 			  (long)(s - i_strpos)));
649 		    t = HOP3c(s, -prog->anchored_offset, strbeg);
650 		    other_last = HOP3c(s, 1, strend);
651 		    s = s1;
652 		    if (t == strpos)
653 			goto try_at_start;
654 		    goto try_at_offset;
655 		}
656 	    }
657 	}
658 	else {		/* Take into account the floating substring. */
659 	    char *last, *last1;
660 	    char *s1 = s;
661 	    SV* must;
662 
663 	    t = HOP3c(s, -start_shift, strbeg);
664 	    last1 = last =
665 		HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
666 	    if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
667 		last = HOP3c(t, prog->float_max_offset, strend);
668 	    s = HOP3c(t, prog->float_min_offset, strend);
669 	    if (s < other_last)
670 		s = other_last;
671  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
672 	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
673 	    /* fbm_instr() takes into account exact value of end-of-str
674 	       if the check is SvTAIL(ed).  Since false positives are OK,
675 	       and end-of-str is not later than strend we are OK. */
676 	    if (must == &PL_sv_undef) {
677 		s = (char*)NULL;
678 		DEBUG_r(must = prog->float_utf8);	/* for debug message */
679 	    }
680 	    else
681 		s = fbm_instr((unsigned char*)s,
682 			      (unsigned char*)last + SvCUR(must)
683 				  - (SvTAIL(must)!=0),
684 			      must, PL_multiline ? FBMrf_MULTILINE : 0);
685 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
686 		    (s ? "Found" : "Contradicts"),
687 		    PL_colors[0],
688 		      (int)(SvCUR(must) - (SvTAIL(must)!=0)),
689 		      SvPVX(must),
690 		      PL_colors[1], (SvTAIL(must) ? "$" : "")));
691 	    if (!s) {
692 		if (last1 == last) {
693 		    DEBUG_r(PerlIO_printf(Perl_debug_log,
694 					    ", giving up...\n"));
695 		    goto fail_finish;
696 		}
697 		DEBUG_r(PerlIO_printf(Perl_debug_log,
698 		    ", trying anchored starting at offset %ld...\n",
699 		    (long)(s1 + 1 - i_strpos)));
700 		other_last = last;
701 		s = HOP3c(t, 1, strend);
702 		goto restart;
703 	    }
704 	    else {
705 		DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
706 		      (long)(s - i_strpos)));
707 		other_last = s; /* Fix this later. --Hugo */
708 		s = s1;
709 		if (t == strpos)
710 		    goto try_at_start;
711 		goto try_at_offset;
712 	    }
713 	}
714     }
715 
716     t = s - prog->check_offset_max;
717     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
718         && (!(prog->reganch & ROPT_UTF8)
719 	    || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
720 		 && t > strpos))) {
721 	/* Fixed substring is found far enough so that the match
722 	   cannot start at strpos. */
723       try_at_offset:
724 	if (ml_anch && t[-1] != '\n') {
725 	    /* Eventually fbm_*() should handle this, but often
726 	       anchored_offset is not 0, so this check will not be wasted. */
727 	    /* XXXX In the code below we prefer to look for "^" even in
728 	       presence of anchored substrings.  And we search even
729 	       beyond the found float position.  These pessimizations
730 	       are historical artefacts only.  */
731 	  find_anchor:
732 	    while (t < strend - prog->minlen) {
733 		if (*t == '\n') {
734 		    if (t < check_at - prog->check_offset_min) {
735 			if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
736 			    /* Since we moved from the found position,
737 			       we definitely contradict the found anchored
738 			       substr.  Due to the above check we do not
739 			       contradict "check" substr.
740 			       Thus we can arrive here only if check substr
741 			       is float.  Redo checking for "other"=="fixed".
742 			     */
743 			    strpos = t + 1;
744 			    DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
745 				PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
746 			    goto do_other_anchored;
747 			}
748 			/* We don't contradict the found floating substring. */
749 			/* XXXX Why not check for STCLASS? */
750 			s = t + 1;
751 			DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
752 			    PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
753 			goto set_useful;
754 		    }
755 		    /* Position contradicts check-string */
756 		    /* XXXX probably better to look for check-string
757 		       than for "\n", so one should lower the limit for t? */
758 		    DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
759 			PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
760 		    other_last = strpos = s = t + 1;
761 		    goto restart;
762 		}
763 		t++;
764 	    }
765 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
766 			PL_colors[0],PL_colors[1]));
767 	    goto fail_finish;
768 	}
769 	else {
770 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
771 			PL_colors[0],PL_colors[1]));
772 	}
773 	s = t;
774       set_useful:
775 	++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);	/* hooray/5 */
776     }
777     else {
778 	/* The found string does not prohibit matching at strpos,
779 	   - no optimization of calling REx engine can be performed,
780 	   unless it was an MBOL and we are not after MBOL,
781 	   or a future STCLASS check will fail this. */
782       try_at_start:
783 	/* Even in this situation we may use MBOL flag if strpos is offset
784 	   wrt the start of the string. */
785 	if (ml_anch && sv && !SvROK(sv)	/* See prev comment on SvROK */
786 	    && (strpos != strbeg) && strpos[-1] != '\n'
787 	    /* May be due to an implicit anchor of m{.*foo}  */
788 	    && !(prog->reganch & ROPT_IMPLICIT))
789 	{
790 	    t = strpos;
791 	    goto find_anchor;
792 	}
793 	DEBUG_r( if (ml_anch)
794 	    PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
795 			(long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
796 	);
797       success_at_start:
798 	if (!(prog->reganch & ROPT_NAUGHTY)	/* XXXX If strpos moved? */
799 	    && (do_utf8 ? (
800 		prog->check_utf8		/* Could be deleted already */
801 		&& --BmUSEFUL(prog->check_utf8) < 0
802 		&& (prog->check_utf8 == prog->float_utf8)
803 	    ) : (
804 		prog->check_substr		/* Could be deleted already */
805 		&& --BmUSEFUL(prog->check_substr) < 0
806 		&& (prog->check_substr == prog->float_substr)
807 	    )))
808 	{
809 	    /* If flags & SOMETHING - do not do it many times on the same match */
810 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
811 	    SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
812 	    if (do_utf8 ? prog->check_substr : prog->check_utf8)
813 		SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
814 	    prog->check_substr = prog->check_utf8 = Nullsv;	/* disable */
815 	    prog->float_substr = prog->float_utf8 = Nullsv;	/* clear */
816 	    check = Nullsv;			/* abort */
817 	    s = strpos;
818 	    /* XXXX This is a remnant of the old implementation.  It
819 	            looks wasteful, since now INTUIT can use many
820 	            other heuristics. */
821 	    prog->reganch &= ~RE_USE_INTUIT;
822 	}
823 	else
824 	    s = strpos;
825     }
826 
827     /* Last resort... */
828     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
829     if (prog->regstclass) {
830 	/* minlen == 0 is possible if regstclass is \b or \B,
831 	   and the fixed substr is ''$.
832 	   Since minlen is already taken into account, s+1 is before strend;
833 	   accidentally, minlen >= 1 guaranties no false positives at s + 1
834 	   even for \b or \B.  But (minlen? 1 : 0) below assumes that
835 	   regstclass does not come from lookahead...  */
836 	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
837 	   This leaves EXACTF only, which is dealt with in find_byclass().  */
838 	U8* str = (U8*)STRING(prog->regstclass);
839 	int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
840 		    ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
841 		    : 1);
842 	char *endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
843 		? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
844 		: (prog->float_substr || prog->float_utf8
845 		   ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
846 			   cl_l, strend)
847 		   : strend);
848 	char *startpos = strbeg;
849 
850 	t = s;
851 	if (prog->reganch & ROPT_UTF8) {
852 	    PL_regdata = prog->data;
853 	    PL_bostr = startpos;
854 	}
855 	s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
856 	if (!s) {
857 #ifdef DEBUGGING
858 	    char *what = 0;
859 #endif
860 	    if (endpos == strend) {
861 		DEBUG_r( PerlIO_printf(Perl_debug_log,
862 				"Could not match STCLASS...\n") );
863 		goto fail;
864 	    }
865 	    DEBUG_r( PerlIO_printf(Perl_debug_log,
866 				   "This position contradicts STCLASS...\n") );
867 	    if ((prog->reganch & ROPT_ANCH) && !ml_anch)
868 		goto fail;
869 	    /* Contradict one of substrings */
870 	    if (prog->anchored_substr || prog->anchored_utf8) {
871 		if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
872 		    DEBUG_r( what = "anchored" );
873 		  hop_and_restart:
874 		    s = HOP3c(t, 1, strend);
875 		    if (s + start_shift + end_shift > strend) {
876 			/* XXXX Should be taken into account earlier? */
877 			DEBUG_r( PerlIO_printf(Perl_debug_log,
878 					       "Could not match STCLASS...\n") );
879 			goto fail;
880 		    }
881 		    if (!check)
882 			goto giveup;
883 		    DEBUG_r( PerlIO_printf(Perl_debug_log,
884 				"Looking for %s substr starting at offset %ld...\n",
885 				 what, (long)(s + start_shift - i_strpos)) );
886 		    goto restart;
887 		}
888 		/* Have both, check_string is floating */
889 		if (t + start_shift >= check_at) /* Contradicts floating=check */
890 		    goto retry_floating_check;
891 		/* Recheck anchored substring, but not floating... */
892 		s = check_at;
893 		if (!check)
894 		    goto giveup;
895 		DEBUG_r( PerlIO_printf(Perl_debug_log,
896 			  "Looking for anchored substr starting at offset %ld...\n",
897 			  (long)(other_last - i_strpos)) );
898 		goto do_other_anchored;
899 	    }
900 	    /* Another way we could have checked stclass at the
901                current position only: */
902 	    if (ml_anch) {
903 		s = t = t + 1;
904 		if (!check)
905 		    goto giveup;
906 		DEBUG_r( PerlIO_printf(Perl_debug_log,
907 			  "Looking for /%s^%s/m starting at offset %ld...\n",
908 			  PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
909 		goto try_at_offset;
910 	    }
911 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))	/* Could have been deleted */
912 		goto fail;
913 	    /* Check is floating subtring. */
914 	  retry_floating_check:
915 	    t = check_at - start_shift;
916 	    DEBUG_r( what = "floating" );
917 	    goto hop_and_restart;
918 	}
919 	if (t != s) {
920             DEBUG_r(PerlIO_printf(Perl_debug_log,
921 			"By STCLASS: moving %ld --> %ld\n",
922                                   (long)(t - i_strpos), (long)(s - i_strpos))
923                    );
924         }
925         else {
926             DEBUG_r(PerlIO_printf(Perl_debug_log,
927                                   "Does not contradict STCLASS...\n");
928                    );
929         }
930     }
931   giveup:
932     DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
933 			  PL_colors[4], (check ? "Guessed" : "Giving up"),
934 			  PL_colors[5], (long)(s - i_strpos)) );
935     return s;
936 
937   fail_finish:				/* Substring not found */
938     if (prog->check_substr || prog->check_utf8)		/* could be removed already */
939 	BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
940   fail:
941     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
942 			  PL_colors[4],PL_colors[5]));
943     return Nullch;
944 }
945 
946 /* We know what class REx starts with.  Try to find this position... */
947 STATIC char *
948 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
949 {
950 	I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
951 	char *m;
952 	STRLEN ln;
953 	unsigned int c1;
954 	unsigned int c2;
955 	char *e;
956 	register I32 tmp = 1;	/* Scratch variable? */
957 	register bool do_utf8 = PL_reg_match_utf8;
958 
959 	/* We know what class it must start with. */
960 	switch (OP(c)) {
961 	case ANYOF:
962 	    while (s < strend) {
963 		STRLEN skip = do_utf8 ? UTF8SKIP(s) : 1;
964 
965 		if (do_utf8 ?
966 		    reginclass(c, (U8*)s, 0, do_utf8) :
967 		    REGINCLASS(c, (U8*)s) ||
968 		    (ANYOF_FOLD_SHARP_S(c, s, strend) &&
969 		     /* The assignment of 2 is intentional:
970 		      * for the sharp s, the skip is 2. */
971 		     (skip = SHARP_S_SKIP)
972 		     )) {
973 		    if (tmp && (norun || regtry(prog, s)))
974 			goto got_it;
975 		    else
976 			tmp = doevery;
977 		}
978 		else
979 		    tmp = 1;
980 		s += skip;
981 	    }
982 	    break;
983 	case CANY:
984 	    while (s < strend) {
985 	        if (tmp && (norun || regtry(prog, s)))
986 		    goto got_it;
987 		else
988 		    tmp = doevery;
989 		s++;
990 	    }
991 	    break;
992 	case EXACTF:
993 	    m = STRING(c);
994 	    ln = STR_LEN(c);
995 	    if (UTF) {
996 	        STRLEN ulen1, ulen2;
997 		U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
998 		U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
999 
1000 		to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1001 		to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1002 
1003 		c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN_UCLC,
1004 				    0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1005 		c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN_UCLC,
1006 				    0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1007 	    }
1008 	    else {
1009 		c1 = *(U8*)m;
1010 		c2 = PL_fold[c1];
1011 	    }
1012 	    goto do_exactf;
1013 	case EXACTFL:
1014 	    m = STRING(c);
1015 	    ln = STR_LEN(c);
1016 	    c1 = *(U8*)m;
1017 	    c2 = PL_fold_locale[c1];
1018 	  do_exactf:
1019 	    e = HOP3c(strend, -(I32)ln, s);
1020 
1021 	    if (norun && e < s)
1022 		e = s;			/* Due to minlen logic of intuit() */
1023 
1024 	    /* The idea in the EXACTF* cases is to first find the
1025 	     * first character of the EXACTF* node and then, if
1026 	     * necessary, case-insensitively compare the full
1027 	     * text of the node.  The c1 and c2 are the first
1028 	     * characters (though in Unicode it gets a bit
1029 	     * more complicated because there are more cases
1030 	     * than just upper and lower: one needs to use
1031 	     * the so-called folding case for case-insensitive
1032 	     * matching (called "loose matching" in Unicode).
1033 	     * ibcmp_utf8() will do just that. */
1034 
1035 	    if (do_utf8) {
1036 	        UV c, f;
1037 	        U8 tmpbuf [UTF8_MAXLEN+1];
1038 		U8 foldbuf[UTF8_MAXLEN_FOLD+1];
1039 		STRLEN len, foldlen;
1040 
1041 		if (c1 == c2) {
1042 		    while (s <= e) {
1043 		        c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
1044 					   ckWARN(WARN_UTF8) ?
1045 					   0 : UTF8_ALLOW_ANY);
1046 			if ( c == c1
1047 			     && (ln == len ||
1048 				 ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1049 					    m, (char **)0, ln, (bool)UTF))
1050 			     && (norun || regtry(prog, s)) )
1051 			    goto got_it;
1052 			else {
1053 			     uvchr_to_utf8(tmpbuf, c);
1054 			     f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1055 			     if ( f != c
1056 				  && (f == c1 || f == c2)
1057 				  && (ln == foldlen ||
1058 				      !ibcmp_utf8((char *) foldbuf,
1059 						  (char **)0, foldlen, do_utf8,
1060 						  m,
1061 						  (char **)0, ln, (bool)UTF))
1062 				  && (norun || regtry(prog, s)) )
1063 				  goto got_it;
1064 			}
1065 			s += len;
1066 		    }
1067 		}
1068 		else {
1069 		    while (s <= e) {
1070 		      c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
1071 					   ckWARN(WARN_UTF8) ?
1072 					   0 : UTF8_ALLOW_ANY);
1073 
1074 			/* Handle some of the three Greek sigmas cases.
1075 			 * Note that not all the possible combinations
1076 			 * are handled here: some of them are handled
1077 			 * by the standard folding rules, and some of
1078 			 * them (the character class or ANYOF cases)
1079 			 * are handled during compiletime in
1080 			 * regexec.c:S_regclass(). */
1081 			if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1082 			    c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1083 			    c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1084 
1085 			if ( (c == c1 || c == c2)
1086 			     && (ln == len ||
1087 				 ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1088 					    m, (char **)0, ln, (bool)UTF))
1089 			     && (norun || regtry(prog, s)) )
1090 			    goto got_it;
1091 			else {
1092 			     uvchr_to_utf8(tmpbuf, c);
1093 			     f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1094 			     if ( f != c
1095 				  && (f == c1 || f == c2)
1096 				  && (ln == foldlen ||
1097 				      !ibcmp_utf8((char *) foldbuf,
1098 						  (char **)0, foldlen, do_utf8,
1099 						  m,
1100 						  (char **)0, ln, (bool)UTF))
1101 				  && (norun || regtry(prog, s)) )
1102 				  goto got_it;
1103 			}
1104 			s += len;
1105 		    }
1106 		}
1107 	    }
1108 	    else {
1109 		if (c1 == c2)
1110 		    while (s <= e) {
1111 			if ( *(U8*)s == c1
1112 			     && (ln == 1 || !(OP(c) == EXACTF
1113 					      ? ibcmp(s, m, ln)
1114 					      : ibcmp_locale(s, m, ln)))
1115 			     && (norun || regtry(prog, s)) )
1116 			    goto got_it;
1117 			s++;
1118 		    }
1119 		else
1120 		    while (s <= e) {
1121 			if ( (*(U8*)s == c1 || *(U8*)s == c2)
1122 			     && (ln == 1 || !(OP(c) == EXACTF
1123 					      ? ibcmp(s, m, ln)
1124 					      : ibcmp_locale(s, m, ln)))
1125 			     && (norun || regtry(prog, s)) )
1126 			    goto got_it;
1127 			s++;
1128 		    }
1129 	    }
1130 	    break;
1131 	case BOUNDL:
1132 	    PL_reg_flags |= RF_tainted;
1133 	    /* FALL THROUGH */
1134 	case BOUND:
1135 	    if (do_utf8) {
1136 		if (s == PL_bostr)
1137 		    tmp = '\n';
1138 		else {
1139 		    U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1140 
1141 		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1142 		}
1143 		tmp = ((OP(c) == BOUND ?
1144 			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1145 		LOAD_UTF8_CHARCLASS(alnum,"a");
1146 		while (s < strend) {
1147 		    if (tmp == !(OP(c) == BOUND ?
1148 				 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1149 				 isALNUM_LC_utf8((U8*)s)))
1150 		    {
1151 			tmp = !tmp;
1152 			if ((norun || regtry(prog, s)))
1153 			    goto got_it;
1154 		    }
1155 		    s += UTF8SKIP(s);
1156 		}
1157 	    }
1158 	    else {
1159 		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1160 		tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1161 		while (s < strend) {
1162 		    if (tmp ==
1163 			!(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1164 			tmp = !tmp;
1165 			if ((norun || regtry(prog, s)))
1166 			    goto got_it;
1167 		    }
1168 		    s++;
1169 		}
1170 	    }
1171 	    if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
1172 		goto got_it;
1173 	    break;
1174 	case NBOUNDL:
1175 	    PL_reg_flags |= RF_tainted;
1176 	    /* FALL THROUGH */
1177 	case NBOUND:
1178 	    if (do_utf8) {
1179 		if (s == PL_bostr)
1180 		    tmp = '\n';
1181 		else {
1182 		    U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1183 
1184 		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1185 		}
1186 		tmp = ((OP(c) == NBOUND ?
1187 			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1188 		LOAD_UTF8_CHARCLASS(alnum,"a");
1189 		while (s < strend) {
1190 		    if (tmp == !(OP(c) == NBOUND ?
1191 				 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1192 				 isALNUM_LC_utf8((U8*)s)))
1193 			tmp = !tmp;
1194 		    else if ((norun || regtry(prog, s)))
1195 			goto got_it;
1196 		    s += UTF8SKIP(s);
1197 		}
1198 	    }
1199 	    else {
1200 		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1201 		tmp = ((OP(c) == NBOUND ?
1202 			isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1203 		while (s < strend) {
1204 		    if (tmp ==
1205 			!(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1206 			tmp = !tmp;
1207 		    else if ((norun || regtry(prog, s)))
1208 			goto got_it;
1209 		    s++;
1210 		}
1211 	    }
1212 	    if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1213 		goto got_it;
1214 	    break;
1215 	case ALNUM:
1216 	    if (do_utf8) {
1217 		LOAD_UTF8_CHARCLASS(alnum,"a");
1218 		while (s < strend) {
1219 		    if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1220 			if (tmp && (norun || regtry(prog, s)))
1221 			    goto got_it;
1222 			else
1223 			    tmp = doevery;
1224 		    }
1225 		    else
1226 			tmp = 1;
1227 		    s += UTF8SKIP(s);
1228 		}
1229 	    }
1230 	    else {
1231 		while (s < strend) {
1232 		    if (isALNUM(*s)) {
1233 			if (tmp && (norun || regtry(prog, s)))
1234 			    goto got_it;
1235 			else
1236 			    tmp = doevery;
1237 		    }
1238 		    else
1239 			tmp = 1;
1240 		    s++;
1241 		}
1242 	    }
1243 	    break;
1244 	case ALNUML:
1245 	    PL_reg_flags |= RF_tainted;
1246 	    if (do_utf8) {
1247 		while (s < strend) {
1248 		    if (isALNUM_LC_utf8((U8*)s)) {
1249 			if (tmp && (norun || regtry(prog, s)))
1250 			    goto got_it;
1251 			else
1252 			    tmp = doevery;
1253 		    }
1254 		    else
1255 			tmp = 1;
1256 		    s += UTF8SKIP(s);
1257 		}
1258 	    }
1259 	    else {
1260 		while (s < strend) {
1261 		    if (isALNUM_LC(*s)) {
1262 			if (tmp && (norun || regtry(prog, s)))
1263 			    goto got_it;
1264 			else
1265 			    tmp = doevery;
1266 		    }
1267 		    else
1268 			tmp = 1;
1269 		    s++;
1270 		}
1271 	    }
1272 	    break;
1273 	case NALNUM:
1274 	    if (do_utf8) {
1275 		LOAD_UTF8_CHARCLASS(alnum,"a");
1276 		while (s < strend) {
1277 		    if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1278 			if (tmp && (norun || regtry(prog, s)))
1279 			    goto got_it;
1280 			else
1281 			    tmp = doevery;
1282 		    }
1283 		    else
1284 			tmp = 1;
1285 		    s += UTF8SKIP(s);
1286 		}
1287 	    }
1288 	    else {
1289 		while (s < strend) {
1290 		    if (!isALNUM(*s)) {
1291 			if (tmp && (norun || regtry(prog, s)))
1292 			    goto got_it;
1293 			else
1294 			    tmp = doevery;
1295 		    }
1296 		    else
1297 			tmp = 1;
1298 		    s++;
1299 		}
1300 	    }
1301 	    break;
1302 	case NALNUML:
1303 	    PL_reg_flags |= RF_tainted;
1304 	    if (do_utf8) {
1305 		while (s < strend) {
1306 		    if (!isALNUM_LC_utf8((U8*)s)) {
1307 			if (tmp && (norun || regtry(prog, s)))
1308 			    goto got_it;
1309 			else
1310 			    tmp = doevery;
1311 		    }
1312 		    else
1313 			tmp = 1;
1314 		    s += UTF8SKIP(s);
1315 		}
1316 	    }
1317 	    else {
1318 		while (s < strend) {
1319 		    if (!isALNUM_LC(*s)) {
1320 			if (tmp && (norun || regtry(prog, s)))
1321 			    goto got_it;
1322 			else
1323 			    tmp = doevery;
1324 		    }
1325 		    else
1326 			tmp = 1;
1327 		    s++;
1328 		}
1329 	    }
1330 	    break;
1331 	case SPACE:
1332 	    if (do_utf8) {
1333 		LOAD_UTF8_CHARCLASS(space," ");
1334 		while (s < strend) {
1335 		    if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
1336 			if (tmp && (norun || regtry(prog, s)))
1337 			    goto got_it;
1338 			else
1339 			    tmp = doevery;
1340 		    }
1341 		    else
1342 			tmp = 1;
1343 		    s += UTF8SKIP(s);
1344 		}
1345 	    }
1346 	    else {
1347 		while (s < strend) {
1348 		    if (isSPACE(*s)) {
1349 			if (tmp && (norun || regtry(prog, s)))
1350 			    goto got_it;
1351 			else
1352 			    tmp = doevery;
1353 		    }
1354 		    else
1355 			tmp = 1;
1356 		    s++;
1357 		}
1358 	    }
1359 	    break;
1360 	case SPACEL:
1361 	    PL_reg_flags |= RF_tainted;
1362 	    if (do_utf8) {
1363 		while (s < strend) {
1364 		    if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1365 			if (tmp && (norun || regtry(prog, s)))
1366 			    goto got_it;
1367 			else
1368 			    tmp = doevery;
1369 		    }
1370 		    else
1371 			tmp = 1;
1372 		    s += UTF8SKIP(s);
1373 		}
1374 	    }
1375 	    else {
1376 		while (s < strend) {
1377 		    if (isSPACE_LC(*s)) {
1378 			if (tmp && (norun || regtry(prog, s)))
1379 			    goto got_it;
1380 			else
1381 			    tmp = doevery;
1382 		    }
1383 		    else
1384 			tmp = 1;
1385 		    s++;
1386 		}
1387 	    }
1388 	    break;
1389 	case NSPACE:
1390 	    if (do_utf8) {
1391 		LOAD_UTF8_CHARCLASS(space," ");
1392 		while (s < strend) {
1393 		    if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
1394 			if (tmp && (norun || regtry(prog, s)))
1395 			    goto got_it;
1396 			else
1397 			    tmp = doevery;
1398 		    }
1399 		    else
1400 			tmp = 1;
1401 		    s += UTF8SKIP(s);
1402 		}
1403 	    }
1404 	    else {
1405 		while (s < strend) {
1406 		    if (!isSPACE(*s)) {
1407 			if (tmp && (norun || regtry(prog, s)))
1408 			    goto got_it;
1409 			else
1410 			    tmp = doevery;
1411 		    }
1412 		    else
1413 			tmp = 1;
1414 		    s++;
1415 		}
1416 	    }
1417 	    break;
1418 	case NSPACEL:
1419 	    PL_reg_flags |= RF_tainted;
1420 	    if (do_utf8) {
1421 		while (s < strend) {
1422 		    if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1423 			if (tmp && (norun || regtry(prog, s)))
1424 			    goto got_it;
1425 			else
1426 			    tmp = doevery;
1427 		    }
1428 		    else
1429 			tmp = 1;
1430 		    s += UTF8SKIP(s);
1431 		}
1432 	    }
1433 	    else {
1434 		while (s < strend) {
1435 		    if (!isSPACE_LC(*s)) {
1436 			if (tmp && (norun || regtry(prog, s)))
1437 			    goto got_it;
1438 			else
1439 			    tmp = doevery;
1440 		    }
1441 		    else
1442 			tmp = 1;
1443 		    s++;
1444 		}
1445 	    }
1446 	    break;
1447 	case DIGIT:
1448 	    if (do_utf8) {
1449 		LOAD_UTF8_CHARCLASS(digit,"0");
1450 		while (s < strend) {
1451 		    if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1452 			if (tmp && (norun || regtry(prog, s)))
1453 			    goto got_it;
1454 			else
1455 			    tmp = doevery;
1456 		    }
1457 		    else
1458 			tmp = 1;
1459 		    s += UTF8SKIP(s);
1460 		}
1461 	    }
1462 	    else {
1463 		while (s < strend) {
1464 		    if (isDIGIT(*s)) {
1465 			if (tmp && (norun || regtry(prog, s)))
1466 			    goto got_it;
1467 			else
1468 			    tmp = doevery;
1469 		    }
1470 		    else
1471 			tmp = 1;
1472 		    s++;
1473 		}
1474 	    }
1475 	    break;
1476 	case DIGITL:
1477 	    PL_reg_flags |= RF_tainted;
1478 	    if (do_utf8) {
1479 		while (s < strend) {
1480 		    if (isDIGIT_LC_utf8((U8*)s)) {
1481 			if (tmp && (norun || regtry(prog, s)))
1482 			    goto got_it;
1483 			else
1484 			    tmp = doevery;
1485 		    }
1486 		    else
1487 			tmp = 1;
1488 		    s += UTF8SKIP(s);
1489 		}
1490 	    }
1491 	    else {
1492 		while (s < strend) {
1493 		    if (isDIGIT_LC(*s)) {
1494 			if (tmp && (norun || regtry(prog, s)))
1495 			    goto got_it;
1496 			else
1497 			    tmp = doevery;
1498 		    }
1499 		    else
1500 			tmp = 1;
1501 		    s++;
1502 		}
1503 	    }
1504 	    break;
1505 	case NDIGIT:
1506 	    if (do_utf8) {
1507 		LOAD_UTF8_CHARCLASS(digit,"0");
1508 		while (s < strend) {
1509 		    if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1510 			if (tmp && (norun || regtry(prog, s)))
1511 			    goto got_it;
1512 			else
1513 			    tmp = doevery;
1514 		    }
1515 		    else
1516 			tmp = 1;
1517 		    s += UTF8SKIP(s);
1518 		}
1519 	    }
1520 	    else {
1521 		while (s < strend) {
1522 		    if (!isDIGIT(*s)) {
1523 			if (tmp && (norun || regtry(prog, s)))
1524 			    goto got_it;
1525 			else
1526 			    tmp = doevery;
1527 		    }
1528 		    else
1529 			tmp = 1;
1530 		    s++;
1531 		}
1532 	    }
1533 	    break;
1534 	case NDIGITL:
1535 	    PL_reg_flags |= RF_tainted;
1536 	    if (do_utf8) {
1537 		while (s < strend) {
1538 		    if (!isDIGIT_LC_utf8((U8*)s)) {
1539 			if (tmp && (norun || regtry(prog, s)))
1540 			    goto got_it;
1541 			else
1542 			    tmp = doevery;
1543 		    }
1544 		    else
1545 			tmp = 1;
1546 		    s += UTF8SKIP(s);
1547 		}
1548 	    }
1549 	    else {
1550 		while (s < strend) {
1551 		    if (!isDIGIT_LC(*s)) {
1552 			if (tmp && (norun || regtry(prog, s)))
1553 			    goto got_it;
1554 			else
1555 			    tmp = doevery;
1556 		    }
1557 		    else
1558 			tmp = 1;
1559 		    s++;
1560 		}
1561 	    }
1562 	    break;
1563 	default:
1564 	    Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1565 	    break;
1566 	}
1567 	return 0;
1568       got_it:
1569 	return s;
1570 }
1571 
1572 /*
1573  - regexec_flags - match a regexp against a string
1574  */
1575 I32
1576 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1577 	      char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1578 /* strend: pointer to null at end of string */
1579 /* strbeg: real beginning of string */
1580 /* minend: end of match must be >=minend after stringarg. */
1581 /* data: May be used for some additional optimizations. */
1582 /* nosave: For optimizations. */
1583 {
1584     register char *s;
1585     register regnode *c;
1586     register char *startpos = stringarg;
1587     I32 minlen;		/* must match at least this many chars */
1588     I32 dontbother = 0;	/* how many characters not to try at end */
1589     /* I32 start_shift = 0; */		/* Offset of the start to find
1590 					 constant substr. */		/* CC */
1591     I32 end_shift = 0;			/* Same for the end. */		/* CC */
1592     I32 scream_pos = -1;		/* Internal iterator of scream. */
1593     char *scream_olds;
1594     SV* oreplsv = GvSV(PL_replgv);
1595     bool do_utf8 = DO_UTF8(sv);
1596 #ifdef DEBUGGING
1597     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1598     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
1599 #endif
1600 
1601     PL_regcc = 0;
1602 
1603     cache_re(prog);
1604 #ifdef DEBUGGING
1605     PL_regnarrate = DEBUG_r_TEST;
1606 #endif
1607 
1608     /* Be paranoid... */
1609     if (prog == NULL || startpos == NULL) {
1610 	Perl_croak(aTHX_ "NULL regexp parameter");
1611 	return 0;
1612     }
1613 
1614     minlen = prog->minlen;
1615     if (strend - startpos < minlen) {
1616         DEBUG_r(PerlIO_printf(Perl_debug_log,
1617 			      "String too short [regexec_flags]...\n"));
1618 	goto phooey;
1619     }
1620 
1621     /* Check validity of program. */
1622     if (UCHARAT(prog->program) != REG_MAGIC) {
1623 	Perl_croak(aTHX_ "corrupted regexp program");
1624     }
1625 
1626     PL_reg_flags = 0;
1627     PL_reg_eval_set = 0;
1628     PL_reg_maxiter = 0;
1629 
1630     if (prog->reganch & ROPT_UTF8)
1631 	PL_reg_flags |= RF_utf8;
1632 
1633     /* Mark beginning of line for ^ and lookbehind. */
1634     PL_regbol = startpos;
1635     PL_bostr  = strbeg;
1636     PL_reg_sv = sv;
1637 
1638     /* Mark end of line for $ (and such) */
1639     PL_regeol = strend;
1640 
1641     /* see how far we have to get to not match where we matched before */
1642     PL_regtill = startpos+minend;
1643 
1644     /* We start without call_cc context.  */
1645     PL_reg_call_cc = 0;
1646 
1647     /* If there is a "must appear" string, look for it. */
1648     s = startpos;
1649 
1650     if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1651 	MAGIC *mg;
1652 
1653 	if (flags & REXEC_IGNOREPOS)	/* Means: check only at start */
1654 	    PL_reg_ganch = startpos;
1655 	else if (sv && SvTYPE(sv) >= SVt_PVMG
1656 		  && SvMAGIC(sv)
1657 		  && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1658 		  && mg->mg_len >= 0) {
1659 	    PL_reg_ganch = strbeg + mg->mg_len;	/* Defined pos() */
1660 	    if (prog->reganch & ROPT_ANCH_GPOS) {
1661 	        if (s > PL_reg_ganch)
1662 		    goto phooey;
1663 		s = PL_reg_ganch;
1664 	    }
1665 	}
1666 	else				/* pos() not defined */
1667 	    PL_reg_ganch = strbeg;
1668     }
1669 
1670     if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
1671 	re_scream_pos_data d;
1672 
1673 	d.scream_olds = &scream_olds;
1674 	d.scream_pos = &scream_pos;
1675 	s = re_intuit_start(prog, sv, s, strend, flags, &d);
1676 	if (!s) {
1677 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1678 	    goto phooey;	/* not present */
1679 	}
1680     }
1681 
1682     DEBUG_r({
1683 	 char *s0   = UTF ?
1684 	   pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1685 			  UNI_DISPLAY_REGEX) :
1686 	   prog->precomp;
1687 	 int   len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1688 	 char *s1   = do_utf8 ? sv_uni_display(dsv1, sv, 60,
1689 					       UNI_DISPLAY_REGEX) : startpos;
1690 	 int   len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
1691 	 if (!PL_colorset)
1692 	     reginitcolors();
1693 	 PerlIO_printf(Perl_debug_log,
1694 		       "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
1695 		       PL_colors[4],PL_colors[5],PL_colors[0],
1696 		       len0, len0, s0,
1697 		       PL_colors[1],
1698 		       len0 > 60 ? "..." : "",
1699 		       PL_colors[0],
1700 		       (int)(len1 > 60 ? 60 : len1),
1701 		       s1, PL_colors[1],
1702 		       (len1 > 60 ? "..." : "")
1703 	      );
1704     });
1705 
1706     /* Simplest case:  anchored match need be tried only once. */
1707     /*  [unless only anchor is BOL and multiline is set] */
1708     if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1709 	if (s == startpos && regtry(prog, startpos))
1710 	    goto got_it;
1711 	else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1712 		 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1713 	{
1714 	    char *end;
1715 
1716 	    if (minlen)
1717 		dontbother = minlen - 1;
1718 	    end = HOP3c(strend, -dontbother, strbeg) - 1;
1719 	    /* for multiline we only have to try after newlines */
1720 	    if (prog->check_substr || prog->check_utf8) {
1721 		if (s == startpos)
1722 		    goto after_try;
1723 		while (1) {
1724 		    if (regtry(prog, s))
1725 			goto got_it;
1726 		  after_try:
1727 		    if (s >= end)
1728 			goto phooey;
1729 		    if (prog->reganch & RE_USE_INTUIT) {
1730 			s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1731 			if (!s)
1732 			    goto phooey;
1733 		    }
1734 		    else
1735 			s++;
1736 		}
1737 	    } else {
1738 		if (s > startpos)
1739 		    s--;
1740 		while (s < end) {
1741 		    if (*s++ == '\n') {	/* don't need PL_utf8skip here */
1742 			if (regtry(prog, s))
1743 			    goto got_it;
1744 		    }
1745 		}
1746 	    }
1747 	}
1748 	goto phooey;
1749     } else if (prog->reganch & ROPT_ANCH_GPOS) {
1750 	if (regtry(prog, PL_reg_ganch))
1751 	    goto got_it;
1752 	goto phooey;
1753     }
1754 
1755     /* Messy cases:  unanchored match. */
1756     if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
1757 	/* we have /x+whatever/ */
1758 	/* it must be a one character string (XXXX Except UTF?) */
1759 	char ch;
1760 #ifdef DEBUGGING
1761 	int did_match = 0;
1762 #endif
1763 	if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1764 	    do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1765 	ch = SvPVX(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1766 
1767 	if (do_utf8) {
1768 	    while (s < strend) {
1769 		if (*s == ch) {
1770 		    DEBUG_r( did_match = 1 );
1771 		    if (regtry(prog, s)) goto got_it;
1772 		    s += UTF8SKIP(s);
1773 		    while (s < strend && *s == ch)
1774 			s += UTF8SKIP(s);
1775 		}
1776 		s += UTF8SKIP(s);
1777 	    }
1778 	}
1779 	else {
1780 	    while (s < strend) {
1781 		if (*s == ch) {
1782 		    DEBUG_r( did_match = 1 );
1783 		    if (regtry(prog, s)) goto got_it;
1784 		    s++;
1785 		    while (s < strend && *s == ch)
1786 			s++;
1787 		}
1788 		s++;
1789 	    }
1790 	}
1791 	DEBUG_r(if (!did_match)
1792 		PerlIO_printf(Perl_debug_log,
1793                                   "Did not find anchored character...\n")
1794                );
1795     }
1796     /*SUPPRESS 560*/
1797     else if (prog->anchored_substr != Nullsv
1798 	      || prog->anchored_utf8 != Nullsv
1799 	      || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
1800 		  && prog->float_max_offset < strend - s)) {
1801 	SV *must;
1802 	I32 back_max;
1803 	I32 back_min;
1804 	char *last;
1805 	char *last1;		/* Last position checked before */
1806 #ifdef DEBUGGING
1807 	int did_match = 0;
1808 #endif
1809 	if (prog->anchored_substr || prog->anchored_utf8) {
1810 	    if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1811 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1812 	    must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1813 	    back_max = back_min = prog->anchored_offset;
1814 	} else {
1815 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1816 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1817 	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1818 	    back_max = prog->float_max_offset;
1819 	    back_min = prog->float_min_offset;
1820 	}
1821 	if (must == &PL_sv_undef)
1822 	    /* could not downgrade utf8 check substring, so must fail */
1823 	    goto phooey;
1824 
1825 	last = HOP3c(strend,	/* Cannot start after this */
1826 			  -(I32)(CHR_SVLEN(must)
1827 				 - (SvTAIL(must) != 0) + back_min), strbeg);
1828 
1829 	if (s > PL_bostr)
1830 	    last1 = HOPc(s, -1);
1831 	else
1832 	    last1 = s - 1;	/* bogus */
1833 
1834 	/* XXXX check_substr already used to find `s', can optimize if
1835 	   check_substr==must. */
1836 	scream_pos = -1;
1837 	dontbother = end_shift;
1838 	strend = HOPc(strend, -dontbother);
1839 	while ( (s <= last) &&
1840 		((flags & REXEC_SCREAM)
1841 		 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
1842 				    end_shift, &scream_pos, 0))
1843 		 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
1844 				  (unsigned char*)strend, must,
1845 				  PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1846 	    DEBUG_r( did_match = 1 );
1847 	    if (HOPc(s, -back_max) > last1) {
1848 		last1 = HOPc(s, -back_min);
1849 		s = HOPc(s, -back_max);
1850 	    }
1851 	    else {
1852 		char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1853 
1854 		last1 = HOPc(s, -back_min);
1855 		s = t;
1856 	    }
1857 	    if (do_utf8) {
1858 		while (s <= last1) {
1859 		    if (regtry(prog, s))
1860 			goto got_it;
1861 		    s += UTF8SKIP(s);
1862 		}
1863 	    }
1864 	    else {
1865 		while (s <= last1) {
1866 		    if (regtry(prog, s))
1867 			goto got_it;
1868 		    s++;
1869 		}
1870 	    }
1871 	}
1872 	DEBUG_r(if (!did_match)
1873                     PerlIO_printf(Perl_debug_log,
1874                                   "Did not find %s substr `%s%.*s%s'%s...\n",
1875 			      ((must == prog->anchored_substr || must == prog->anchored_utf8)
1876 			       ? "anchored" : "floating"),
1877 			      PL_colors[0],
1878 			      (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1879 			      SvPVX(must),
1880                                   PL_colors[1], (SvTAIL(must) ? "$" : ""))
1881                );
1882 	goto phooey;
1883     }
1884     else if ((c = prog->regstclass)) {
1885 	if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1886 	    /* don't bother with what can't match */
1887 	    strend = HOPc(strend, -(minlen - 1));
1888 	DEBUG_r({
1889 	    SV *prop = sv_newmortal();
1890 	    char *s0;
1891 	    char *s1;
1892 	    int len0;
1893 	    int len1;
1894 
1895 	    regprop(prop, c);
1896 	    s0 = UTF ?
1897 	      pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
1898 			     UNI_DISPLAY_REGEX) :
1899 	      SvPVX(prop);
1900 	    len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1901 	    s1 = UTF ?
1902 	      sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
1903 	    len1 = UTF ? SvCUR(dsv1) : strend - s;
1904 	    PerlIO_printf(Perl_debug_log,
1905 			  "Matching stclass `%*.*s' against `%*.*s'\n",
1906 			  len0, len0, s0,
1907 			  len1, len1, s1);
1908 	});
1909   	if (find_byclass(prog, c, s, strend, startpos, 0))
1910 	    goto got_it;
1911 	DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1912     }
1913     else {
1914 	dontbother = 0;
1915 	if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
1916 	    /* Trim the end. */
1917 	    char *last;
1918 	    SV* float_real;
1919 
1920 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1921 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1922 	    float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
1923 
1924 	    if (flags & REXEC_SCREAM) {
1925 		last = screaminstr(sv, float_real, s - strbeg,
1926 				   end_shift, &scream_pos, 1); /* last one */
1927 		if (!last)
1928 		    last = scream_olds; /* Only one occurrence. */
1929 	    }
1930 	    else {
1931 		STRLEN len;
1932 		char *little = SvPV(float_real, len);
1933 
1934 		if (SvTAIL(float_real)) {
1935 		    if (memEQ(strend - len + 1, little, len - 1))
1936 			last = strend - len + 1;
1937 		    else if (!PL_multiline)
1938 			last = memEQ(strend - len, little, len)
1939 			    ? strend - len : Nullch;
1940 		    else
1941 			goto find_last;
1942 		} else {
1943 		  find_last:
1944 		    if (len)
1945 			last = rninstr(s, strend, little, little + len);
1946 		    else
1947 			last = strend;	/* matching `$' */
1948 		}
1949 	    }
1950 	    if (last == NULL) {
1951 		DEBUG_r(PerlIO_printf(Perl_debug_log,
1952 				      "%sCan't trim the tail, match fails (should not happen)%s\n",
1953 				      PL_colors[4],PL_colors[5]));
1954 		goto phooey; /* Should not happen! */
1955 	    }
1956 	    dontbother = strend - last + prog->float_min_offset;
1957 	}
1958 	if (minlen && (dontbother < minlen))
1959 	    dontbother = minlen - 1;
1960 	strend -= dontbother; 		   /* this one's always in bytes! */
1961 	/* We don't know much -- general case. */
1962 	if (do_utf8) {
1963 	    for (;;) {
1964 		if (regtry(prog, s))
1965 		    goto got_it;
1966 		if (s >= strend)
1967 		    break;
1968 		s += UTF8SKIP(s);
1969 	    };
1970 	}
1971 	else {
1972 	    do {
1973 		if (regtry(prog, s))
1974 		    goto got_it;
1975 	    } while (s++ < strend);
1976 	}
1977     }
1978 
1979     /* Failure. */
1980     goto phooey;
1981 
1982 got_it:
1983     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1984 
1985     if (PL_reg_eval_set) {
1986 	/* Preserve the current value of $^R */
1987 	if (oreplsv != GvSV(PL_replgv))
1988 	    sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1989 						  restored, the value remains
1990 						  the same. */
1991 	restore_pos(aTHX_ 0);
1992     }
1993 
1994     /* make sure $`, $&, $', and $digit will work later */
1995     if ( !(flags & REXEC_NOT_FIRST) ) {
1996 	if (RX_MATCH_COPIED(prog)) {
1997 	    Safefree(prog->subbeg);
1998 	    RX_MATCH_COPIED_off(prog);
1999 	}
2000 	if (flags & REXEC_COPY_STR) {
2001 	    I32 i = PL_regeol - startpos + (stringarg - strbeg);
2002 
2003 	    s = savepvn(strbeg, i);
2004 	    prog->subbeg = s;
2005 	    prog->sublen = i;
2006 	    RX_MATCH_COPIED_on(prog);
2007 	}
2008 	else {
2009 	    prog->subbeg = strbeg;
2010 	    prog->sublen = PL_regeol - strbeg;	/* strend may have been modified */
2011 	}
2012     }
2013 
2014     return 1;
2015 
2016 phooey:
2017     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2018 			  PL_colors[4],PL_colors[5]));
2019     if (PL_reg_eval_set)
2020 	restore_pos(aTHX_ 0);
2021     return 0;
2022 }
2023 
2024 /*
2025  - regtry - try match at specific point
2026  */
2027 STATIC I32			/* 0 failure, 1 success */
2028 S_regtry(pTHX_ regexp *prog, char *startpos)
2029 {
2030     register I32 i;
2031     register I32 *sp;
2032     register I32 *ep;
2033     CHECKPOINT lastcp;
2034 
2035 #ifdef DEBUGGING
2036     PL_regindent = 0;	/* XXXX Not good when matches are reenterable... */
2037 #endif
2038     if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2039 	MAGIC *mg;
2040 
2041 	PL_reg_eval_set = RS_init;
2042 	DEBUG_r(DEBUG_s(
2043 	    PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2044 			  (IV)(PL_stack_sp - PL_stack_base));
2045 	    ));
2046 	SAVEI32(cxstack[cxstack_ix].blk_oldsp);
2047 	cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2048 	/* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2049 	SAVETMPS;
2050 	/* Apparently this is not needed, judging by wantarray. */
2051 	/* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2052 	   cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2053 
2054 	if (PL_reg_sv) {
2055 	    /* Make $_ available to executed code. */
2056 	    if (PL_reg_sv != DEFSV) {
2057 		/* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
2058 		SAVESPTR(DEFSV);
2059 		DEFSV = PL_reg_sv;
2060 	    }
2061 
2062 	    if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
2063 		  && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
2064 		/* prepare for quick setting of pos */
2065 		sv_magic(PL_reg_sv, (SV*)0,
2066 			PERL_MAGIC_regex_global, Nullch, 0);
2067 		mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
2068 		mg->mg_len = -1;
2069 	    }
2070 	    PL_reg_magic    = mg;
2071 	    PL_reg_oldpos   = mg->mg_len;
2072 	    SAVEDESTRUCTOR_X(restore_pos, 0);
2073         }
2074         if (!PL_reg_curpm) {
2075 	    Newz(22,PL_reg_curpm, 1, PMOP);
2076 #ifdef USE_ITHREADS
2077             {
2078                 SV* repointer = newSViv(0);
2079                 /* so we know which PL_regex_padav element is PL_reg_curpm */
2080                 SvFLAGS(repointer) |= SVf_BREAK;
2081                 av_push(PL_regex_padav,repointer);
2082                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2083                 PL_regex_pad = AvARRAY(PL_regex_padav);
2084             }
2085 #endif
2086         }
2087 	PM_SETRE(PL_reg_curpm, prog);
2088 	PL_reg_oldcurpm = PL_curpm;
2089 	PL_curpm = PL_reg_curpm;
2090 	if (RX_MATCH_COPIED(prog)) {
2091 	    /*  Here is a serious problem: we cannot rewrite subbeg,
2092 		since it may be needed if this match fails.  Thus
2093 		$` inside (?{}) could fail... */
2094 	    PL_reg_oldsaved = prog->subbeg;
2095 	    PL_reg_oldsavedlen = prog->sublen;
2096 	    RX_MATCH_COPIED_off(prog);
2097 	}
2098 	else
2099 	    PL_reg_oldsaved = Nullch;
2100 	prog->subbeg = PL_bostr;
2101 	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2102     }
2103     prog->startp[0] = startpos - PL_bostr;
2104     PL_reginput = startpos;
2105     PL_regstartp = prog->startp;
2106     PL_regendp = prog->endp;
2107     PL_reglastparen = &prog->lastparen;
2108     PL_reglastcloseparen = &prog->lastcloseparen;
2109     prog->lastparen = 0;
2110     PL_regsize = 0;
2111     DEBUG_r(PL_reg_starttry = startpos);
2112     if (PL_reg_start_tmpl <= prog->nparens) {
2113 	PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2114         if(PL_reg_start_tmp)
2115             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2116         else
2117             New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2118     }
2119 
2120     /* XXXX What this code is doing here?!!!  There should be no need
2121        to do this again and again, PL_reglastparen should take care of
2122        this!  --ilya*/
2123 
2124     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2125      * Actually, the code in regcppop() (which Ilya may be meaning by
2126      * PL_reglastparen), is not needed at all by the test suite
2127      * (op/regexp, op/pat, op/split), but that code is needed, oddly
2128      * enough, for building DynaLoader, or otherwise this
2129      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2130      * will happen.  Meanwhile, this code *is* needed for the
2131      * above-mentioned test suite tests to succeed.  The common theme
2132      * on those tests seems to be returning null fields from matches.
2133      * --jhi */
2134 #if 1
2135     sp = prog->startp;
2136     ep = prog->endp;
2137     if (prog->nparens) {
2138 	for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2139 	    *++sp = -1;
2140 	    *++ep = -1;
2141 	}
2142     }
2143 #endif
2144     REGCP_SET(lastcp);
2145     if (regmatch(prog->program + 1)) {
2146 	prog->endp[0] = PL_reginput - PL_bostr;
2147 	return 1;
2148     }
2149     REGCP_UNWIND(lastcp);
2150     return 0;
2151 }
2152 
2153 #define RE_UNWIND_BRANCH	1
2154 #define RE_UNWIND_BRANCHJ	2
2155 
2156 union re_unwind_t;
2157 
2158 typedef struct {		/* XX: makes sense to enlarge it... */
2159     I32 type;
2160     I32 prev;
2161     CHECKPOINT lastcp;
2162 } re_unwind_generic_t;
2163 
2164 typedef struct {
2165     I32 type;
2166     I32 prev;
2167     CHECKPOINT lastcp;
2168     I32 lastparen;
2169     regnode *next;
2170     char *locinput;
2171     I32 nextchr;
2172 #ifdef DEBUGGING
2173     int regindent;
2174 #endif
2175 } re_unwind_branch_t;
2176 
2177 typedef union re_unwind_t {
2178     I32 type;
2179     re_unwind_generic_t generic;
2180     re_unwind_branch_t branch;
2181 } re_unwind_t;
2182 
2183 #define sayYES goto yes
2184 #define sayNO goto no
2185 #define sayNO_ANYOF goto no_anyof
2186 #define sayYES_FINAL goto yes_final
2187 #define sayYES_LOUD  goto yes_loud
2188 #define sayNO_FINAL  goto no_final
2189 #define sayNO_SILENT goto do_no
2190 #define saySAME(x) if (x) goto yes; else goto no
2191 
2192 #define REPORT_CODE_OFF 24
2193 
2194 /*
2195  - regmatch - main matching routine
2196  *
2197  * Conceptually the strategy is simple:  check to see whether the current
2198  * node matches, call self recursively to see whether the rest matches,
2199  * and then act accordingly.  In practice we make some effort to avoid
2200  * recursion, in particular by going through "ordinary" nodes (that don't
2201  * need to know whether the rest of the match failed) by a loop instead of
2202  * by recursion.
2203  */
2204 /* [lwall] I've hoisted the register declarations to the outer block in order to
2205  * maybe save a little bit of pushing and popping on the stack.  It also takes
2206  * advantage of machines that use a register save mask on subroutine entry.
2207  */
2208 STATIC I32			/* 0 failure, 1 success */
2209 S_regmatch(pTHX_ regnode *prog)
2210 {
2211     register regnode *scan;	/* Current node. */
2212     regnode *next;		/* Next node. */
2213     regnode *inner;		/* Next node in internal branch. */
2214     register I32 nextchr;	/* renamed nextchr - nextchar colides with
2215 				   function of same name */
2216     register I32 n;		/* no or next */
2217     register I32 ln = 0;	/* len or last */
2218     register char *s = Nullch;	/* operand or save */
2219     register char *locinput = PL_reginput;
2220     register I32 c1 = 0, c2 = 0, paren;	/* case fold search, parenth */
2221     int minmod = 0, sw = 0, logical = 0;
2222     I32 unwind = 0;
2223 #if 0
2224     I32 firstcp = PL_savestack_ix;
2225 #endif
2226     register bool do_utf8 = PL_reg_match_utf8;
2227 #ifdef DEBUGGING
2228     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2229     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2230     SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2231 #endif
2232 
2233 #ifdef DEBUGGING
2234     PL_regindent++;
2235 #endif
2236 
2237     /* Note that nextchr is a byte even in UTF */
2238     nextchr = UCHARAT(locinput);
2239     scan = prog;
2240     while (scan != NULL) {
2241 
2242         DEBUG_r( {
2243 	    SV *prop = sv_newmortal();
2244 	    int docolor = *PL_colors[0];
2245 	    int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2246 	    int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
2247 	    /* The part of the string before starttry has one color
2248 	       (pref0_len chars), between starttry and current
2249 	       position another one (pref_len - pref0_len chars),
2250 	       after the current position the third one.
2251 	       We assume that pref0_len <= pref_len, otherwise we
2252 	       decrease pref0_len.  */
2253 	    int pref_len = (locinput - PL_bostr) > (5 + taill) - l
2254 		? (5 + taill) - l : locinput - PL_bostr;
2255 	    int pref0_len;
2256 
2257 	    while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2258 		pref_len++;
2259 	    pref0_len = pref_len  - (locinput - PL_reg_starttry);
2260 	    if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2261 		l = ( PL_regeol - locinput > (5 + taill) - pref_len
2262 		      ? (5 + taill) - pref_len : PL_regeol - locinput);
2263 	    while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2264 		l--;
2265 	    if (pref0_len < 0)
2266 		pref0_len = 0;
2267 	    if (pref0_len > pref_len)
2268 		pref0_len = pref_len;
2269 	    regprop(prop, scan);
2270 	    {
2271 	      char *s0 =
2272 		do_utf8 ?
2273 		pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2274 			       pref0_len, 60, UNI_DISPLAY_REGEX) :
2275 		locinput - pref_len;
2276 	      int len0 = do_utf8 ? strlen(s0) : pref0_len;
2277 	      char *s1 = do_utf8 ?
2278 		pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2279 			       pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2280 		locinput - pref_len + pref0_len;
2281 	      int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2282 	      char *s2 = do_utf8 ?
2283 		pv_uni_display(dsv2, (U8*)locinput,
2284 			       PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2285 		locinput;
2286 	      int len2 = do_utf8 ? strlen(s2) : l;
2287 	      PerlIO_printf(Perl_debug_log,
2288 			    "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2289 			    (IV)(locinput - PL_bostr),
2290 			    PL_colors[4],
2291 			    len0, s0,
2292 			    PL_colors[5],
2293 			    PL_colors[2],
2294 			    len1, s1,
2295 			    PL_colors[3],
2296 			    (docolor ? "" : "> <"),
2297 			    PL_colors[0],
2298 			    len2, s2,
2299 			    PL_colors[1],
2300 			    15 - l - pref_len + 1,
2301 			    "",
2302 			    (IV)(scan - PL_regprogram), PL_regindent*2, "",
2303 			    SvPVX(prop));
2304 	    }
2305 	});
2306 
2307 	next = scan + NEXT_OFF(scan);
2308 	if (next == scan)
2309 	    next = NULL;
2310 
2311 	switch (OP(scan)) {
2312 	case BOL:
2313 	    if (locinput == PL_bostr || (PL_multiline &&
2314 		(nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2315 	    {
2316 		/* regtill = regbol; */
2317 		break;
2318 	    }
2319 	    sayNO;
2320 	case MBOL:
2321 	    if (locinput == PL_bostr ||
2322 		((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2323 	    {
2324 		break;
2325 	    }
2326 	    sayNO;
2327 	case SBOL:
2328 	    if (locinput == PL_bostr)
2329 		break;
2330 	    sayNO;
2331 	case GPOS:
2332 	    if (locinput == PL_reg_ganch)
2333 		break;
2334 	    sayNO;
2335 	case EOL:
2336 	    if (PL_multiline)
2337 		goto meol;
2338 	    else
2339 		goto seol;
2340 	case MEOL:
2341 	  meol:
2342 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2343 		sayNO;
2344 	    break;
2345 	case SEOL:
2346 	  seol:
2347 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2348 		sayNO;
2349 	    if (PL_regeol - locinput > 1)
2350 		sayNO;
2351 	    break;
2352 	case EOS:
2353 	    if (PL_regeol != locinput)
2354 		sayNO;
2355 	    break;
2356 	case SANY:
2357 	    if (!nextchr && locinput >= PL_regeol)
2358 		sayNO;
2359  	    if (do_utf8) {
2360 	        locinput += PL_utf8skip[nextchr];
2361 		if (locinput > PL_regeol)
2362  		    sayNO;
2363  		nextchr = UCHARAT(locinput);
2364  	    }
2365  	    else
2366  		nextchr = UCHARAT(++locinput);
2367 	    break;
2368 	case CANY:
2369 	    if (!nextchr && locinput >= PL_regeol)
2370 		sayNO;
2371 	    nextchr = UCHARAT(++locinput);
2372 	    break;
2373 	case REG_ANY:
2374 	    if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2375 		sayNO;
2376 	    if (do_utf8) {
2377 		locinput += PL_utf8skip[nextchr];
2378 		if (locinput > PL_regeol)
2379 		    sayNO;
2380 		nextchr = UCHARAT(locinput);
2381 	    }
2382 	    else
2383 		nextchr = UCHARAT(++locinput);
2384 	    break;
2385 	case EXACT:
2386 	    s = STRING(scan);
2387 	    ln = STR_LEN(scan);
2388 	    if (do_utf8 != UTF) {
2389 		/* The target and the pattern have differing utf8ness. */
2390 		char *l = locinput;
2391 		char *e = s + ln;
2392 		STRLEN ulen;
2393 
2394 		if (do_utf8) {
2395 		    /* The target is utf8, the pattern is not utf8. */
2396 		    while (s < e) {
2397 			if (l >= PL_regeol)
2398 			     sayNO;
2399 			if (NATIVE_TO_UNI(*(U8*)s) !=
2400 			    utf8n_to_uvuni((U8*)l, UTF8_MAXLEN, &ulen,
2401 					   ckWARN(WARN_UTF8) ?
2402 					   0 : UTF8_ALLOW_ANY))
2403 			     sayNO;
2404 			l += ulen;
2405 			s ++;
2406 		    }
2407 		}
2408 		else {
2409 		    /* The target is not utf8, the pattern is utf8. */
2410 		    while (s < e) {
2411 			if (l >= PL_regeol)
2412 			    sayNO;
2413 			if (NATIVE_TO_UNI(*((U8*)l)) !=
2414 			    utf8n_to_uvuni((U8*)s, UTF8_MAXLEN, &ulen,
2415 					   ckWARN(WARN_UTF8) ?
2416 					   0 : UTF8_ALLOW_ANY))
2417 			    sayNO;
2418 			s += ulen;
2419 			l ++;
2420 		    }
2421 		}
2422 		locinput = l;
2423 		nextchr = UCHARAT(locinput);
2424 		break;
2425 	    }
2426 	    /* The target and the pattern have the same utf8ness. */
2427 	    /* Inline the first character, for speed. */
2428 	    if (UCHARAT(s) != nextchr)
2429 		sayNO;
2430 	    if (PL_regeol - locinput < ln)
2431 		sayNO;
2432 	    if (ln > 1 && memNE(s, locinput, ln))
2433 		sayNO;
2434 	    locinput += ln;
2435 	    nextchr = UCHARAT(locinput);
2436 	    break;
2437 	case EXACTFL:
2438 	    PL_reg_flags |= RF_tainted;
2439 	    /* FALL THROUGH */
2440 	case EXACTF:
2441 	    s = STRING(scan);
2442 	    ln = STR_LEN(scan);
2443 
2444 	    if (do_utf8 || UTF) {
2445 	      /* Either target or the pattern are utf8. */
2446 		char *l = locinput;
2447 		char *e = PL_regeol;
2448 
2449 		if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
2450 			       l, &e, 0,  do_utf8)) {
2451 		     /* One more case for the sharp s:
2452 		      * pack("U0U*", 0xDF) =~ /ss/i,
2453 		      * the 0xC3 0x9F are the UTF-8
2454 		      * byte sequence for the U+00DF. */
2455 		     if (!(do_utf8 &&
2456 			   toLOWER(s[0]) == 's' &&
2457 			   ln >= 2 &&
2458 			   toLOWER(s[1]) == 's' &&
2459 			   (U8)l[0] == 0xC3 &&
2460 			   e - l >= 2 &&
2461 			   (U8)l[1] == 0x9F))
2462 			  sayNO;
2463 		}
2464 		locinput = e;
2465 		nextchr = UCHARAT(locinput);
2466 		break;
2467 	    }
2468 
2469 	    /* Neither the target and the pattern are utf8. */
2470 
2471 	    /* Inline the first character, for speed. */
2472 	    if (UCHARAT(s) != nextchr &&
2473 		UCHARAT(s) != ((OP(scan) == EXACTF)
2474 			       ? PL_fold : PL_fold_locale)[nextchr])
2475 		sayNO;
2476 	    if (PL_regeol - locinput < ln)
2477 		sayNO;
2478 	    if (ln > 1 && (OP(scan) == EXACTF
2479 			   ? ibcmp(s, locinput, ln)
2480 			   : ibcmp_locale(s, locinput, ln)))
2481 		sayNO;
2482 	    locinput += ln;
2483 	    nextchr = UCHARAT(locinput);
2484 	    break;
2485 	case ANYOF:
2486 	    if (do_utf8) {
2487 	        STRLEN inclasslen = PL_regeol - locinput;
2488 
2489 	        if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
2490 		    sayNO_ANYOF;
2491 		if (locinput >= PL_regeol)
2492 		    sayNO;
2493 		locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
2494 		nextchr = UCHARAT(locinput);
2495 		break;
2496 	    }
2497 	    else {
2498 		if (nextchr < 0)
2499 		    nextchr = UCHARAT(locinput);
2500 		if (!REGINCLASS(scan, (U8*)locinput))
2501 		    sayNO_ANYOF;
2502 		if (!nextchr && locinput >= PL_regeol)
2503 		    sayNO;
2504 		nextchr = UCHARAT(++locinput);
2505 		break;
2506 	    }
2507 	no_anyof:
2508 	    /* If we might have the case of the German sharp s
2509 	     * in a casefolding Unicode character class. */
2510 
2511 	    if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2512 		 locinput += SHARP_S_SKIP;
2513 		 nextchr = UCHARAT(locinput);
2514 	    }
2515 	    else
2516 		 sayNO;
2517 	    break;
2518 	case ALNUML:
2519 	    PL_reg_flags |= RF_tainted;
2520 	    /* FALL THROUGH */
2521 	case ALNUM:
2522 	    if (!nextchr)
2523 		sayNO;
2524 	    if (do_utf8) {
2525 		LOAD_UTF8_CHARCLASS(alnum,"a");
2526 		if (!(OP(scan) == ALNUM
2527 		      ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2528 		      : isALNUM_LC_utf8((U8*)locinput)))
2529 		{
2530 		    sayNO;
2531 		}
2532 		locinput += PL_utf8skip[nextchr];
2533 		nextchr = UCHARAT(locinput);
2534 		break;
2535 	    }
2536 	    if (!(OP(scan) == ALNUM
2537 		  ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2538 		sayNO;
2539 	    nextchr = UCHARAT(++locinput);
2540 	    break;
2541 	case NALNUML:
2542 	    PL_reg_flags |= RF_tainted;
2543 	    /* FALL THROUGH */
2544 	case NALNUM:
2545 	    if (!nextchr && locinput >= PL_regeol)
2546 		sayNO;
2547 	    if (do_utf8) {
2548 		LOAD_UTF8_CHARCLASS(alnum,"a");
2549 		if (OP(scan) == NALNUM
2550 		    ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2551 		    : isALNUM_LC_utf8((U8*)locinput))
2552 		{
2553 		    sayNO;
2554 		}
2555 		locinput += PL_utf8skip[nextchr];
2556 		nextchr = UCHARAT(locinput);
2557 		break;
2558 	    }
2559 	    if (OP(scan) == NALNUM
2560 		? isALNUM(nextchr) : isALNUM_LC(nextchr))
2561 		sayNO;
2562 	    nextchr = UCHARAT(++locinput);
2563 	    break;
2564 	case BOUNDL:
2565 	case NBOUNDL:
2566 	    PL_reg_flags |= RF_tainted;
2567 	    /* FALL THROUGH */
2568 	case BOUND:
2569 	case NBOUND:
2570 	    /* was last char in word? */
2571 	    if (do_utf8) {
2572 		if (locinput == PL_bostr)
2573 		    ln = '\n';
2574 		else {
2575 		    U8 *r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
2576 
2577 		    ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
2578 		}
2579 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2580 		    ln = isALNUM_uni(ln);
2581 		    LOAD_UTF8_CHARCLASS(alnum,"a");
2582 		    n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
2583 		}
2584 		else {
2585 		    ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
2586 		    n = isALNUM_LC_utf8((U8*)locinput);
2587 		}
2588 	    }
2589 	    else {
2590 		ln = (locinput != PL_bostr) ?
2591 		    UCHARAT(locinput - 1) : '\n';
2592 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2593 		    ln = isALNUM(ln);
2594 		    n = isALNUM(nextchr);
2595 		}
2596 		else {
2597 		    ln = isALNUM_LC(ln);
2598 		    n = isALNUM_LC(nextchr);
2599 		}
2600 	    }
2601 	    if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2602 				    OP(scan) == BOUNDL))
2603 		    sayNO;
2604 	    break;
2605 	case SPACEL:
2606 	    PL_reg_flags |= RF_tainted;
2607 	    /* FALL THROUGH */
2608 	case SPACE:
2609 	    if (!nextchr)
2610 		sayNO;
2611 	    if (do_utf8) {
2612 		if (UTF8_IS_CONTINUED(nextchr)) {
2613 		    LOAD_UTF8_CHARCLASS(space," ");
2614 		    if (!(OP(scan) == SPACE
2615 			  ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2616 			  : isSPACE_LC_utf8((U8*)locinput)))
2617 		    {
2618 			sayNO;
2619 		    }
2620 		    locinput += PL_utf8skip[nextchr];
2621 		    nextchr = UCHARAT(locinput);
2622 		    break;
2623 		}
2624 		if (!(OP(scan) == SPACE
2625 		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2626 		    sayNO;
2627 		nextchr = UCHARAT(++locinput);
2628 	    }
2629 	    else {
2630 		if (!(OP(scan) == SPACE
2631 		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2632 		    sayNO;
2633 		nextchr = UCHARAT(++locinput);
2634 	    }
2635 	    break;
2636 	case NSPACEL:
2637 	    PL_reg_flags |= RF_tainted;
2638 	    /* FALL THROUGH */
2639 	case NSPACE:
2640 	    if (!nextchr && locinput >= PL_regeol)
2641 		sayNO;
2642 	    if (do_utf8) {
2643 		LOAD_UTF8_CHARCLASS(space," ");
2644 		if (OP(scan) == NSPACE
2645 		    ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2646 		    : isSPACE_LC_utf8((U8*)locinput))
2647 		{
2648 		    sayNO;
2649 		}
2650 		locinput += PL_utf8skip[nextchr];
2651 		nextchr = UCHARAT(locinput);
2652 		break;
2653 	    }
2654 	    if (OP(scan) == NSPACE
2655 		? isSPACE(nextchr) : isSPACE_LC(nextchr))
2656 		sayNO;
2657 	    nextchr = UCHARAT(++locinput);
2658 	    break;
2659 	case DIGITL:
2660 	    PL_reg_flags |= RF_tainted;
2661 	    /* FALL THROUGH */
2662 	case DIGIT:
2663 	    if (!nextchr)
2664 		sayNO;
2665 	    if (do_utf8) {
2666 		LOAD_UTF8_CHARCLASS(digit,"0");
2667 		if (!(OP(scan) == DIGIT
2668 		      ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2669 		      : isDIGIT_LC_utf8((U8*)locinput)))
2670 		{
2671 		    sayNO;
2672 		}
2673 		locinput += PL_utf8skip[nextchr];
2674 		nextchr = UCHARAT(locinput);
2675 		break;
2676 	    }
2677 	    if (!(OP(scan) == DIGIT
2678 		  ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2679 		sayNO;
2680 	    nextchr = UCHARAT(++locinput);
2681 	    break;
2682 	case NDIGITL:
2683 	    PL_reg_flags |= RF_tainted;
2684 	    /* FALL THROUGH */
2685 	case NDIGIT:
2686 	    if (!nextchr && locinput >= PL_regeol)
2687 		sayNO;
2688 	    if (do_utf8) {
2689 		LOAD_UTF8_CHARCLASS(digit,"0");
2690 		if (OP(scan) == NDIGIT
2691 		    ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2692 		    : isDIGIT_LC_utf8((U8*)locinput))
2693 		{
2694 		    sayNO;
2695 		}
2696 		locinput += PL_utf8skip[nextchr];
2697 		nextchr = UCHARAT(locinput);
2698 		break;
2699 	    }
2700 	    if (OP(scan) == NDIGIT
2701 		? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2702 		sayNO;
2703 	    nextchr = UCHARAT(++locinput);
2704 	    break;
2705 	case CLUMP:
2706 	    if (locinput >= PL_regeol)
2707 		sayNO;
2708 	    if  (do_utf8) {
2709 		LOAD_UTF8_CHARCLASS(mark,"~");
2710 		if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2711 		    sayNO;
2712 		locinput += PL_utf8skip[nextchr];
2713 		while (locinput < PL_regeol &&
2714 		       swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2715 		    locinput += UTF8SKIP(locinput);
2716 		if (locinput > PL_regeol)
2717 		    sayNO;
2718 	    }
2719 	    else
2720 	       locinput++;
2721 	    nextchr = UCHARAT(locinput);
2722 	    break;
2723 	case REFFL:
2724 	    PL_reg_flags |= RF_tainted;
2725 	    /* FALL THROUGH */
2726         case REF:
2727 	case REFF:
2728 	    n = ARG(scan);  /* which paren pair */
2729 	    ln = PL_regstartp[n];
2730 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
2731 	    if ((I32)*PL_reglastparen < n || ln == -1)
2732 		sayNO;			/* Do not match unless seen CLOSEn. */
2733 	    if (ln == PL_regendp[n])
2734 		break;
2735 
2736 	    s = PL_bostr + ln;
2737 	    if (do_utf8 && OP(scan) != REF) {	/* REF can do byte comparison */
2738 		char *l = locinput;
2739 		char *e = PL_bostr + PL_regendp[n];
2740 		/*
2741 		 * Note that we can't do the "other character" lookup trick as
2742 		 * in the 8-bit case (no pun intended) because in Unicode we
2743 		 * have to map both upper and title case to lower case.
2744 		 */
2745 		if (OP(scan) == REFF) {
2746 		    STRLEN ulen1, ulen2;
2747 		    U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2748 		    U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
2749 		    while (s < e) {
2750 			if (l >= PL_regeol)
2751 			    sayNO;
2752 			toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2753 			toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
2754 			if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
2755 			    sayNO;
2756 			s += ulen1;
2757 			l += ulen2;
2758 		    }
2759 		}
2760 		locinput = l;
2761 		nextchr = UCHARAT(locinput);
2762 		break;
2763 	    }
2764 
2765 	    /* Inline the first character, for speed. */
2766 	    if (UCHARAT(s) != nextchr &&
2767 		(OP(scan) == REF ||
2768 		 (UCHARAT(s) != ((OP(scan) == REFF
2769 				  ? PL_fold : PL_fold_locale)[nextchr]))))
2770 		sayNO;
2771 	    ln = PL_regendp[n] - ln;
2772 	    if (locinput + ln > PL_regeol)
2773 		sayNO;
2774 	    if (ln > 1 && (OP(scan) == REF
2775 			   ? memNE(s, locinput, ln)
2776 			   : (OP(scan) == REFF
2777 			      ? ibcmp(s, locinput, ln)
2778 			      : ibcmp_locale(s, locinput, ln))))
2779 		sayNO;
2780 	    locinput += ln;
2781 	    nextchr = UCHARAT(locinput);
2782 	    break;
2783 
2784 	case NOTHING:
2785 	case TAIL:
2786 	    break;
2787 	case BACK:
2788 	    break;
2789 	case EVAL:
2790 	{
2791 	    dSP;
2792 	    OP_4tree *oop = PL_op;
2793 	    COP *ocurcop = PL_curcop;
2794 	    SV **ocurpad = PL_curpad;
2795 	    SV *ret;
2796 
2797 	    n = ARG(scan);
2798 	    PL_op = (OP_4tree*)PL_regdata->data[n];
2799 	    DEBUG_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
2800 	    PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
2801 	    PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2802 
2803 	    {
2804 		SV **before = SP;
2805 		CALLRUNOPS(aTHX);			/* Scalar context. */
2806 		SPAGAIN;
2807 		if (SP == before)
2808 		    ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
2809 		else {
2810 		    ret = POPs;
2811 		    PUTBACK;
2812 		}
2813 	    }
2814 
2815 	    PL_op = oop;
2816 	    PL_curpad = ocurpad;
2817 	    PL_curcop = ocurcop;
2818 	    if (logical) {
2819 		if (logical == 2) {	/* Postponed subexpression. */
2820 		    regexp *re;
2821 		    MAGIC *mg = Null(MAGIC*);
2822 		    re_cc_state state;
2823 		    CHECKPOINT cp, lastcp;
2824 
2825 		    if(SvROK(ret) || SvRMAGICAL(ret)) {
2826 			SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2827 
2828 			if(SvMAGICAL(sv))
2829 			    mg = mg_find(sv, PERL_MAGIC_qr);
2830 		    }
2831 		    if (mg) {
2832 			re = (regexp *)mg->mg_obj;
2833 			(void)ReREFCNT_inc(re);
2834 		    }
2835 		    else {
2836 			STRLEN len;
2837 			char *t = SvPV(ret, len);
2838 			PMOP pm;
2839 			char *oprecomp = PL_regprecomp;
2840 			I32 osize = PL_regsize;
2841 			I32 onpar = PL_regnpar;
2842 
2843 			Zero(&pm, 1, PMOP);
2844 			re = CALLREGCOMP(aTHX_ t, t + len, &pm);
2845 			if (!(SvFLAGS(ret)
2846 			      & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2847 			    sv_magic(ret,(SV*)ReREFCNT_inc(re),
2848 					PERL_MAGIC_qr,0,0);
2849 			PL_regprecomp = oprecomp;
2850 			PL_regsize = osize;
2851 			PL_regnpar = onpar;
2852 		    }
2853 		    DEBUG_r(
2854 			PerlIO_printf(Perl_debug_log,
2855 				      "Entering embedded `%s%.60s%s%s'\n",
2856 				      PL_colors[0],
2857 				      re->precomp,
2858 				      PL_colors[1],
2859 				      (strlen(re->precomp) > 60 ? "..." : ""))
2860 			);
2861 		    state.node = next;
2862 		    state.prev = PL_reg_call_cc;
2863 		    state.cc = PL_regcc;
2864 		    state.re = PL_reg_re;
2865 
2866 		    PL_regcc = 0;
2867 
2868 		    cp = regcppush(0);	/* Save *all* the positions. */
2869 		    REGCP_SET(lastcp);
2870 		    cache_re(re);
2871 		    state.ss = PL_savestack_ix;
2872 		    *PL_reglastparen = 0;
2873 		    *PL_reglastcloseparen = 0;
2874 		    PL_reg_call_cc = &state;
2875 		    PL_reginput = locinput;
2876 
2877 		    /* XXXX This is too dramatic a measure... */
2878 		    PL_reg_maxiter = 0;
2879 
2880 		    if (regmatch(re->program + 1)) {
2881 			/* Even though we succeeded, we need to restore
2882 			   global variables, since we may be wrapped inside
2883 			   SUSPEND, thus the match may be not finished yet. */
2884 
2885 			/* XXXX Do this only if SUSPENDed? */
2886 			PL_reg_call_cc = state.prev;
2887 			PL_regcc = state.cc;
2888 			PL_reg_re = state.re;
2889 			cache_re(PL_reg_re);
2890 
2891 			/* XXXX This is too dramatic a measure... */
2892 			PL_reg_maxiter = 0;
2893 
2894 			/* These are needed even if not SUSPEND. */
2895 			ReREFCNT_dec(re);
2896 			regcpblow(cp);
2897 			sayYES;
2898 		    }
2899 		    ReREFCNT_dec(re);
2900 		    REGCP_UNWIND(lastcp);
2901 		    regcppop();
2902 		    PL_reg_call_cc = state.prev;
2903 		    PL_regcc = state.cc;
2904 		    PL_reg_re = state.re;
2905 		    cache_re(PL_reg_re);
2906 
2907 		    /* XXXX This is too dramatic a measure... */
2908 		    PL_reg_maxiter = 0;
2909 
2910 		    logical = 0;
2911 		    sayNO;
2912 		}
2913 		sw = SvTRUE(ret);
2914 		logical = 0;
2915 	    }
2916 	    else
2917 		sv_setsv(save_scalar(PL_replgv), ret);
2918 	    break;
2919 	}
2920 	case OPEN:
2921 	    n = ARG(scan);  /* which paren pair */
2922 	    PL_reg_start_tmp[n] = locinput;
2923 	    if (n > PL_regsize)
2924 		PL_regsize = n;
2925 	    break;
2926 	case CLOSE:
2927 	    n = ARG(scan);  /* which paren pair */
2928 	    PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2929 	    PL_regendp[n] = locinput - PL_bostr;
2930 	    if (n > (I32)*PL_reglastparen)
2931 		*PL_reglastparen = n;
2932 	    *PL_reglastcloseparen = n;
2933 	    break;
2934 	case GROUPP:
2935 	    n = ARG(scan);  /* which paren pair */
2936 	    sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
2937 	    break;
2938 	case IFTHEN:
2939 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
2940 	    if (sw)
2941 		next = NEXTOPER(NEXTOPER(scan));
2942 	    else {
2943 		next = scan + ARG(scan);
2944 		if (OP(next) == IFTHEN) /* Fake one. */
2945 		    next = NEXTOPER(NEXTOPER(next));
2946 	    }
2947 	    break;
2948 	case LOGICAL:
2949 	    logical = scan->flags;
2950 	    break;
2951 /*******************************************************************
2952  PL_regcc contains infoblock about the innermost (...)* loop, and
2953  a pointer to the next outer infoblock.
2954 
2955  Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2956 
2957    1) After matching X, regnode for CURLYX is processed;
2958 
2959    2) This regnode creates infoblock on the stack, and calls
2960       regmatch() recursively with the starting point at WHILEM node;
2961 
2962    3) Each hit of WHILEM node tries to match A and Z (in the order
2963       depending on the current iteration, min/max of {min,max} and
2964       greediness).  The information about where are nodes for "A"
2965       and "Z" is read from the infoblock, as is info on how many times "A"
2966       was already matched, and greediness.
2967 
2968    4) After A matches, the same WHILEM node is hit again.
2969 
2970    5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2971       of the same pair.  Thus when WHILEM tries to match Z, it temporarily
2972       resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2973       as in (Y(A)*Z)*.  If Z matches, the automaton will hit the WHILEM node
2974       of the external loop.
2975 
2976  Currently present infoblocks form a tree with a stem formed by PL_curcc
2977  and whatever it mentions via ->next, and additional attached trees
2978  corresponding to temporarily unset infoblocks as in "5" above.
2979 
2980  In the following picture infoblocks for outer loop of
2981  (Y(A)*?Z)*?T are denoted O, for inner I.  NULL starting block
2982  is denoted by x.  The matched string is YAAZYAZT.  Temporarily postponed
2983  infoblocks are drawn below the "reset" infoblock.
2984 
2985  In fact in the picture below we do not show failed matches for Z and T
2986  by WHILEM blocks.  [We illustrate minimal matches, since for them it is
2987  more obvious *why* one needs to *temporary* unset infoblocks.]
2988 
2989   Matched	REx position	InfoBlocks	Comment
2990   		(Y(A)*?Z)*?T	x
2991   		Y(A)*?Z)*?T	x <- O
2992   Y		(A)*?Z)*?T	x <- O
2993   Y		A)*?Z)*?T	x <- O <- I
2994   YA		)*?Z)*?T	x <- O <- I
2995   YA		A)*?Z)*?T	x <- O <- I
2996   YAA		)*?Z)*?T	x <- O <- I
2997   YAA		Z)*?T		x <- O		# Temporary unset I
2998 				     I
2999 
3000   YAAZ		Y(A)*?Z)*?T	x <- O
3001 				     I
3002 
3003   YAAZY		(A)*?Z)*?T	x <- O
3004 				     I
3005 
3006   YAAZY		A)*?Z)*?T	x <- O <- I
3007 				     I
3008 
3009   YAAZYA	)*?Z)*?T	x <- O <- I
3010 				     I
3011 
3012   YAAZYA	Z)*?T		x <- O		# Temporary unset I
3013 				     I,I
3014 
3015   YAAZYAZ	)*?T		x <- O
3016 				     I,I
3017 
3018   YAAZYAZ	T		x		# Temporary unset O
3019 				O
3020 				I,I
3021 
3022   YAAZYAZT			x
3023 				O
3024 				I,I
3025  *******************************************************************/
3026 	case CURLYX: {
3027 		CURCUR cc;
3028 		CHECKPOINT cp = PL_savestack_ix;
3029 		/* No need to save/restore up to this paren */
3030 		I32 parenfloor = scan->flags;
3031 
3032 		if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3033 		    next += ARG(next);
3034 		cc.oldcc = PL_regcc;
3035 		PL_regcc = &cc;
3036 		/* XXXX Probably it is better to teach regpush to support
3037 		   parenfloor > PL_regsize... */
3038 		if (parenfloor > (I32)*PL_reglastparen)
3039 		    parenfloor = *PL_reglastparen; /* Pessimization... */
3040 		cc.parenfloor = parenfloor;
3041 		cc.cur = -1;
3042 		cc.min = ARG1(scan);
3043 		cc.max  = ARG2(scan);
3044 		cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3045 		cc.next = next;
3046 		cc.minmod = minmod;
3047 		cc.lastloc = 0;
3048 		PL_reginput = locinput;
3049 		n = regmatch(PREVOPER(next));	/* start on the WHILEM */
3050 		regcpblow(cp);
3051 		PL_regcc = cc.oldcc;
3052 		saySAME(n);
3053 	    }
3054 	    /* NOT REACHED */
3055 	case WHILEM: {
3056 		/*
3057 		 * This is really hard to understand, because after we match
3058 		 * what we're trying to match, we must make sure the rest of
3059 		 * the REx is going to match for sure, and to do that we have
3060 		 * to go back UP the parse tree by recursing ever deeper.  And
3061 		 * if it fails, we have to reset our parent's current state
3062 		 * that we can try again after backing off.
3063 		 */
3064 
3065 		CHECKPOINT cp, lastcp;
3066 		CURCUR* cc = PL_regcc;
3067 		char *lastloc = cc->lastloc; /* Detection of 0-len. */
3068 
3069 		n = cc->cur + 1;	/* how many we know we matched */
3070 		PL_reginput = locinput;
3071 
3072 		DEBUG_r(
3073 		    PerlIO_printf(Perl_debug_log,
3074 				  "%*s  %ld out of %ld..%ld  cc=%"UVxf"\n",
3075 				  REPORT_CODE_OFF+PL_regindent*2, "",
3076 				  (long)n, (long)cc->min,
3077 				  (long)cc->max, PTR2UV(cc))
3078 		    );
3079 
3080 		/* If degenerate scan matches "", assume scan done. */
3081 
3082 		if (locinput == cc->lastloc && n >= cc->min) {
3083 		    PL_regcc = cc->oldcc;
3084 		    if (PL_regcc)
3085 			ln = PL_regcc->cur;
3086 		    DEBUG_r(
3087 			PerlIO_printf(Perl_debug_log,
3088 			   "%*s  empty match detected, try continuation...\n",
3089 			   REPORT_CODE_OFF+PL_regindent*2, "")
3090 			);
3091 		    if (regmatch(cc->next))
3092 			sayYES;
3093 		    if (PL_regcc)
3094 			PL_regcc->cur = ln;
3095 		    PL_regcc = cc;
3096 		    sayNO;
3097 		}
3098 
3099 		/* First just match a string of min scans. */
3100 
3101 		if (n < cc->min) {
3102 		    cc->cur = n;
3103 		    cc->lastloc = locinput;
3104 		    if (regmatch(cc->scan))
3105 			sayYES;
3106 		    cc->cur = n - 1;
3107 		    cc->lastloc = lastloc;
3108 		    sayNO;
3109 		}
3110 
3111 		if (scan->flags) {
3112 		    /* Check whether we already were at this position.
3113 			Postpone detection until we know the match is not
3114 			*that* much linear. */
3115 		if (!PL_reg_maxiter) {
3116 		    PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3117 		    PL_reg_leftiter = PL_reg_maxiter;
3118 		}
3119 		if (PL_reg_leftiter-- == 0) {
3120 		    I32 size = (PL_reg_maxiter + 7)/8;
3121 		    if (PL_reg_poscache) {
3122 			if ((I32)PL_reg_poscache_size < size) {
3123 			    Renew(PL_reg_poscache, size, char);
3124 			    PL_reg_poscache_size = size;
3125 			}
3126 			Zero(PL_reg_poscache, size, char);
3127 		    }
3128 		    else {
3129 			PL_reg_poscache_size = size;
3130 			Newz(29, PL_reg_poscache, size, char);
3131 		    }
3132 		    DEBUG_r(
3133 			PerlIO_printf(Perl_debug_log,
3134 	      "%sDetected a super-linear match, switching on caching%s...\n",
3135 				      PL_colors[4], PL_colors[5])
3136 			);
3137 		}
3138 		if (PL_reg_leftiter < 0) {
3139 		    I32 o = locinput - PL_bostr, b;
3140 
3141 		    o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3142 		    b = o % 8;
3143 		    o /= 8;
3144 		    if (PL_reg_poscache[o] & (1<<b)) {
3145 		    DEBUG_r(
3146 			PerlIO_printf(Perl_debug_log,
3147 				      "%*s  already tried at this position...\n",
3148 				      REPORT_CODE_OFF+PL_regindent*2, "")
3149 			);
3150 			sayNO_SILENT;
3151 		    }
3152 		    PL_reg_poscache[o] |= (1<<b);
3153 		}
3154 		}
3155 
3156 		/* Prefer next over scan for minimal matching. */
3157 
3158 		if (cc->minmod) {
3159 		    PL_regcc = cc->oldcc;
3160 		    if (PL_regcc)
3161 			ln = PL_regcc->cur;
3162 		    cp = regcppush(cc->parenfloor);
3163 		    REGCP_SET(lastcp);
3164 		    if (regmatch(cc->next)) {
3165 			regcpblow(cp);
3166 			sayYES;	/* All done. */
3167 		    }
3168 		    REGCP_UNWIND(lastcp);
3169 		    regcppop();
3170 		    if (PL_regcc)
3171 			PL_regcc->cur = ln;
3172 		    PL_regcc = cc;
3173 
3174 		    if (n >= cc->max) {	/* Maximum greed exceeded? */
3175 			if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3176 			    && !(PL_reg_flags & RF_warned)) {
3177 			    PL_reg_flags |= RF_warned;
3178 			    Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3179 				 "Complex regular subexpression recursion",
3180 				 REG_INFTY - 1);
3181 			}
3182 			sayNO;
3183 		    }
3184 
3185 		    DEBUG_r(
3186 			PerlIO_printf(Perl_debug_log,
3187 				      "%*s  trying longer...\n",
3188 				      REPORT_CODE_OFF+PL_regindent*2, "")
3189 			);
3190 		    /* Try scanning more and see if it helps. */
3191 		    PL_reginput = locinput;
3192 		    cc->cur = n;
3193 		    cc->lastloc = locinput;
3194 		    cp = regcppush(cc->parenfloor);
3195 		    REGCP_SET(lastcp);
3196 		    if (regmatch(cc->scan)) {
3197 			regcpblow(cp);
3198 			sayYES;
3199 		    }
3200 		    REGCP_UNWIND(lastcp);
3201 		    regcppop();
3202 		    cc->cur = n - 1;
3203 		    cc->lastloc = lastloc;
3204 		    sayNO;
3205 		}
3206 
3207 		/* Prefer scan over next for maximal matching. */
3208 
3209 		if (n < cc->max) {	/* More greed allowed? */
3210 		    cp = regcppush(cc->parenfloor);
3211 		    cc->cur = n;
3212 		    cc->lastloc = locinput;
3213 		    REGCP_SET(lastcp);
3214 		    if (regmatch(cc->scan)) {
3215 			regcpblow(cp);
3216 			sayYES;
3217 		    }
3218 		    REGCP_UNWIND(lastcp);
3219 		    regcppop();		/* Restore some previous $<digit>s? */
3220 		    PL_reginput = locinput;
3221 		    DEBUG_r(
3222 			PerlIO_printf(Perl_debug_log,
3223 				      "%*s  failed, try continuation...\n",
3224 				      REPORT_CODE_OFF+PL_regindent*2, "")
3225 			);
3226 		}
3227 		if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3228 			&& !(PL_reg_flags & RF_warned)) {
3229 		    PL_reg_flags |= RF_warned;
3230 		    Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3231 			 "Complex regular subexpression recursion",
3232 			 REG_INFTY - 1);
3233 		}
3234 
3235 		/* Failed deeper matches of scan, so see if this one works. */
3236 		PL_regcc = cc->oldcc;
3237 		if (PL_regcc)
3238 		    ln = PL_regcc->cur;
3239 		if (regmatch(cc->next))
3240 		    sayYES;
3241 		if (PL_regcc)
3242 		    PL_regcc->cur = ln;
3243 		PL_regcc = cc;
3244 		cc->cur = n - 1;
3245 		cc->lastloc = lastloc;
3246 		sayNO;
3247 	    }
3248 	    /* NOT REACHED */
3249 	case BRANCHJ:
3250 	    next = scan + ARG(scan);
3251 	    if (next == scan)
3252 		next = NULL;
3253 	    inner = NEXTOPER(NEXTOPER(scan));
3254 	    goto do_branch;
3255 	case BRANCH:
3256 	    inner = NEXTOPER(scan);
3257 	  do_branch:
3258 	    {
3259 		c1 = OP(scan);
3260 		if (OP(next) != c1)	/* No choice. */
3261 		    next = inner;	/* Avoid recursion. */
3262 		else {
3263 		    I32 lastparen = *PL_reglastparen;
3264 		    I32 unwind1;
3265 		    re_unwind_branch_t *uw;
3266 
3267 		    /* Put unwinding data on stack */
3268 		    unwind1 = SSNEWt(1,re_unwind_branch_t);
3269 		    uw = SSPTRt(unwind1,re_unwind_branch_t);
3270 		    uw->prev = unwind;
3271 		    unwind = unwind1;
3272 		    uw->type = ((c1 == BRANCH)
3273 				? RE_UNWIND_BRANCH
3274 				: RE_UNWIND_BRANCHJ);
3275 		    uw->lastparen = lastparen;
3276 		    uw->next = next;
3277 		    uw->locinput = locinput;
3278 		    uw->nextchr = nextchr;
3279 #ifdef DEBUGGING
3280 		    uw->regindent = ++PL_regindent;
3281 #endif
3282 
3283 		    REGCP_SET(uw->lastcp);
3284 
3285 		    /* Now go into the first branch */
3286 		    next = inner;
3287 		}
3288 	    }
3289 	    break;
3290 	case MINMOD:
3291 	    minmod = 1;
3292 	    break;
3293 	case CURLYM:
3294 	{
3295 	    I32 l = 0;
3296 	    CHECKPOINT lastcp;
3297 
3298 	    /* We suppose that the next guy does not need
3299 	       backtracking: in particular, it is of constant length,
3300 	       and has no parenths to influence future backrefs. */
3301 	    ln = ARG1(scan);  /* min to match */
3302 	    n  = ARG2(scan);  /* max to match */
3303 	    paren = scan->flags;
3304 	    if (paren) {
3305 		if (paren > PL_regsize)
3306 		    PL_regsize = paren;
3307 		if (paren > (I32)*PL_reglastparen)
3308 		    *PL_reglastparen = paren;
3309 	    }
3310 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3311 	    if (paren)
3312 		scan += NEXT_OFF(scan); /* Skip former OPEN. */
3313 	    PL_reginput = locinput;
3314 	    if (minmod) {
3315 		minmod = 0;
3316 		if (ln && regrepeat_hard(scan, ln, &l) < ln)
3317 		    sayNO;
3318 		/* if we matched something zero-length we don't need to
3319 		   backtrack - capturing parens are already defined, so
3320 		   the caveat in the maximal case doesn't apply
3321 
3322 		   XXXX if ln == 0, we can redo this check first time
3323 		   through the following loop
3324 		*/
3325 		if (ln && l == 0)
3326 		    n = ln;	/* don't backtrack */
3327 		locinput = PL_reginput;
3328 		if (HAS_TEXT(next) || JUMPABLE(next)) {
3329 		    regnode *text_node = next;
3330 
3331 		    if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3332 
3333 		    if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3334 		    else {
3335 			if (PL_regkind[(U8)OP(text_node)] == REF) {
3336 			    I32 n, ln;
3337 			    n = ARG(text_node);  /* which paren pair */
3338 			    ln = PL_regstartp[n];
3339 			    /* assume yes if we haven't seen CLOSEn */
3340 			    if (
3341 				(I32)*PL_reglastparen < n ||
3342 				ln == -1 ||
3343 				ln == PL_regendp[n]
3344 			    ) {
3345 				c1 = c2 = -1000;
3346 				goto assume_ok_MM;
3347 			    }
3348 			    c1 = *(PL_bostr + ln);
3349 			}
3350 			else { c1 = (U8)*STRING(text_node); }
3351 			if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3352 			    c2 = PL_fold[c1];
3353 			else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3354 			    c2 = PL_fold_locale[c1];
3355 			else
3356 			    c2 = c1;
3357 		    }
3358 		}
3359 		else
3360 		    c1 = c2 = -1000;
3361 	    assume_ok_MM:
3362 		REGCP_SET(lastcp);
3363 		/* This may be improved if l == 0.  */
3364 		while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3365 		    /* If it could work, try it. */
3366 		    if (c1 == -1000 ||
3367 			UCHARAT(PL_reginput) == c1 ||
3368 			UCHARAT(PL_reginput) == c2)
3369 		    {
3370 			if (paren) {
3371 			    if (ln) {
3372 				PL_regstartp[paren] =
3373 				    HOPc(PL_reginput, -l) - PL_bostr;
3374 				PL_regendp[paren] = PL_reginput - PL_bostr;
3375 			    }
3376 			    else
3377 				PL_regendp[paren] = -1;
3378 			}
3379 			if (regmatch(next))
3380 			    sayYES;
3381 			REGCP_UNWIND(lastcp);
3382 		    }
3383 		    /* Couldn't or didn't -- move forward. */
3384 		    PL_reginput = locinput;
3385 		    if (regrepeat_hard(scan, 1, &l)) {
3386 			ln++;
3387 			locinput = PL_reginput;
3388 		    }
3389 		    else
3390 			sayNO;
3391 		}
3392 	    }
3393 	    else {
3394 		n = regrepeat_hard(scan, n, &l);
3395 		/* if we matched something zero-length we don't need to
3396 		   backtrack, unless the minimum count is zero and we
3397 		   are capturing the result - in that case the capture
3398 		   being defined or not may affect later execution
3399 		*/
3400 		if (n != 0 && l == 0 && !(paren && ln == 0))
3401 		    ln = n;	/* don't backtrack */
3402 		locinput = PL_reginput;
3403 		DEBUG_r(
3404 		    PerlIO_printf(Perl_debug_log,
3405 				  "%*s  matched %"IVdf" times, len=%"IVdf"...\n",
3406 				  (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3407 				  (IV) n, (IV)l)
3408 		    );
3409 		if (n >= ln) {
3410 		    if (HAS_TEXT(next) || JUMPABLE(next)) {
3411 			regnode *text_node = next;
3412 
3413 			if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3414 
3415 			if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3416 			else {
3417 			    if (PL_regkind[(U8)OP(text_node)] == REF) {
3418 				I32 n, ln;
3419 				n = ARG(text_node);  /* which paren pair */
3420 				ln = PL_regstartp[n];
3421 				/* assume yes if we haven't seen CLOSEn */
3422 				if (
3423 				    (I32)*PL_reglastparen < n ||
3424 				    ln == -1 ||
3425 				    ln == PL_regendp[n]
3426 				) {
3427 				    c1 = c2 = -1000;
3428 				    goto assume_ok_REG;
3429 				}
3430 				c1 = *(PL_bostr + ln);
3431 			    }
3432 			    else { c1 = (U8)*STRING(text_node); }
3433 
3434 			    if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3435 				c2 = PL_fold[c1];
3436 			    else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3437 				c2 = PL_fold_locale[c1];
3438 			    else
3439 				c2 = c1;
3440 			}
3441 		    }
3442 		    else
3443 			c1 = c2 = -1000;
3444 		}
3445 	    assume_ok_REG:
3446 		REGCP_SET(lastcp);
3447 		while (n >= ln) {
3448 		    /* If it could work, try it. */
3449 		    if (c1 == -1000 ||
3450 			UCHARAT(PL_reginput) == c1 ||
3451 			UCHARAT(PL_reginput) == c2)
3452 		    {
3453 			DEBUG_r(
3454 				PerlIO_printf(Perl_debug_log,
3455 					      "%*s  trying tail with n=%"IVdf"...\n",
3456 					      (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3457 			    );
3458 			if (paren) {
3459 			    if (n) {
3460 				PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3461 				PL_regendp[paren] = PL_reginput - PL_bostr;
3462 			    }
3463 			    else
3464 				PL_regendp[paren] = -1;
3465 			}
3466 			if (regmatch(next))
3467 			    sayYES;
3468 			REGCP_UNWIND(lastcp);
3469 		    }
3470 		    /* Couldn't or didn't -- back up. */
3471 		    n--;
3472 		    locinput = HOPc(locinput, -l);
3473 		    PL_reginput = locinput;
3474 		}
3475 	    }
3476 	    sayNO;
3477 	    break;
3478 	}
3479 	case CURLYN:
3480 	    paren = scan->flags;	/* Which paren to set */
3481 	    if (paren > PL_regsize)
3482 		PL_regsize = paren;
3483 	    if (paren > (I32)*PL_reglastparen)
3484 		*PL_reglastparen = paren;
3485 	    ln = ARG1(scan);  /* min to match */
3486 	    n  = ARG2(scan);  /* max to match */
3487             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3488 	    goto repeat;
3489 	case CURLY:
3490 	    paren = 0;
3491 	    ln = ARG1(scan);  /* min to match */
3492 	    n  = ARG2(scan);  /* max to match */
3493 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3494 	    goto repeat;
3495 	case STAR:
3496 	    ln = 0;
3497 	    n = REG_INFTY;
3498 	    scan = NEXTOPER(scan);
3499 	    paren = 0;
3500 	    goto repeat;
3501 	case PLUS:
3502 	    ln = 1;
3503 	    n = REG_INFTY;
3504 	    scan = NEXTOPER(scan);
3505 	    paren = 0;
3506 	  repeat:
3507 	    /*
3508 	    * Lookahead to avoid useless match attempts
3509 	    * when we know what character comes next.
3510 	    */
3511 
3512 	    /*
3513 	    * Used to only do .*x and .*?x, but now it allows
3514 	    * for )'s, ('s and (?{ ... })'s to be in the way
3515 	    * of the quantifier and the EXACT-like node.  -- japhy
3516 	    */
3517 
3518 	    if (HAS_TEXT(next) || JUMPABLE(next)) {
3519 		U8 *s;
3520 		regnode *text_node = next;
3521 
3522 		if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3523 
3524 		if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3525 		else {
3526 		    if (PL_regkind[(U8)OP(text_node)] == REF) {
3527 			I32 n, ln;
3528 			n = ARG(text_node);  /* which paren pair */
3529 			ln = PL_regstartp[n];
3530 			/* assume yes if we haven't seen CLOSEn */
3531 			if (
3532 			    (I32)*PL_reglastparen < n ||
3533 			    ln == -1 ||
3534 			    ln == PL_regendp[n]
3535 			) {
3536 			    c1 = c2 = -1000;
3537 			    goto assume_ok_easy;
3538 			}
3539 			s = (U8*)PL_bostr + ln;
3540 		    }
3541 		    else { s = (U8*)STRING(text_node); }
3542 
3543 		    if (!UTF) {
3544 			c2 = c1 = *s;
3545 			if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3546 			    c2 = PL_fold[c1];
3547 			else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3548 			    c2 = PL_fold_locale[c1];
3549 		    }
3550 		    else { /* UTF */
3551 			if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
3552 			     STRLEN ulen1, ulen2;
3553 			     U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3554 			     U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
3555 
3556 			     to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3557 			     to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3558 
3559 			     c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXLEN, 0,
3560 						 ckWARN(WARN_UTF8) ?
3561 						 0 : UTF8_ALLOW_ANY);
3562 			     c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXLEN, 0,
3563 						 ckWARN(WARN_UTF8) ?
3564 						 0 : UTF8_ALLOW_ANY);
3565 			}
3566 			else {
3567 			    c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXLEN, 0,
3568 						     ckWARN(WARN_UTF8) ?
3569 						     0 : UTF8_ALLOW_ANY);
3570 			}
3571 		    }
3572 		}
3573 	    }
3574 	    else
3575 		c1 = c2 = -1000;
3576 	assume_ok_easy:
3577 	    PL_reginput = locinput;
3578 	    if (minmod) {
3579 		CHECKPOINT lastcp;
3580 		minmod = 0;
3581 		if (ln && regrepeat(scan, ln) < ln)
3582 		    sayNO;
3583 		locinput = PL_reginput;
3584 		REGCP_SET(lastcp);
3585 		if (c1 != -1000) {
3586 		    char *e; /* Should not check after this */
3587 		    char *old = locinput;
3588 		    int count = 0;
3589 
3590 		    if  (n == REG_INFTY) {
3591 			e = PL_regeol - 1;
3592 			if (do_utf8)
3593 			    while (UTF8_IS_CONTINUATION(*(U8*)e))
3594 				e--;
3595 		    }
3596 		    else if (do_utf8) {
3597 			int m = n - ln;
3598 			for (e = locinput;
3599 			     m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3600 			    e += UTF8SKIP(e);
3601 		    }
3602 		    else {
3603 			e = locinput + n - ln;
3604 			if (e >= PL_regeol)
3605 			    e = PL_regeol - 1;
3606 		    }
3607 		    while (1) {
3608 			/* Find place 'next' could work */
3609 			if (!do_utf8) {
3610 			    if (c1 == c2) {
3611 				while (locinput <= e &&
3612 				       UCHARAT(locinput) != c1)
3613 				    locinput++;
3614 			    } else {
3615 				while (locinput <= e
3616 				       && UCHARAT(locinput) != c1
3617 				       && UCHARAT(locinput) != c2)
3618 				    locinput++;
3619 			    }
3620 			    count = locinput - old;
3621 			}
3622 			else {
3623 			    STRLEN len;
3624 			    if (c1 == c2) {
3625 				/* count initialised to
3626 				 * utf8_distance(old, locinput) */
3627 				while (locinput <= e &&
3628 				       utf8n_to_uvchr((U8*)locinput,
3629 						      UTF8_MAXLEN, &len,
3630 						      ckWARN(WARN_UTF8) ?
3631 						      0 : UTF8_ALLOW_ANY) != (UV)c1) {
3632 				    locinput += len;
3633 				    count++;
3634 				}
3635 			    } else {
3636 				/* count initialised to
3637 				 * utf8_distance(old, locinput) */
3638 				while (locinput <= e) {
3639 				    UV c = utf8n_to_uvchr((U8*)locinput,
3640 							  UTF8_MAXLEN, &len,
3641 							  ckWARN(WARN_UTF8) ?
3642 							  0 : UTF8_ALLOW_ANY);
3643 				    if (c == (UV)c1 || c == (UV)c2)
3644 					break;
3645 				    locinput += len;
3646 				    count++;
3647 				}
3648 			    }
3649 			}
3650 			if (locinput > e)
3651 			    sayNO;
3652 			/* PL_reginput == old now */
3653 			if (locinput != old) {
3654 			    ln = 1;	/* Did some */
3655 			    if (regrepeat(scan, count) < count)
3656 				sayNO;
3657 			}
3658 			/* PL_reginput == locinput now */
3659 			TRYPAREN(paren, ln, locinput);
3660 			PL_reginput = locinput;	/* Could be reset... */
3661 			REGCP_UNWIND(lastcp);
3662 			/* Couldn't or didn't -- move forward. */
3663 			old = locinput;
3664 			if (do_utf8)
3665 			    locinput += UTF8SKIP(locinput);
3666 			else
3667 			    locinput++;
3668 			count = 1;
3669 		    }
3670 		}
3671 		else
3672 		while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3673 		    UV c;
3674 		    if (c1 != -1000) {
3675 			if (do_utf8)
3676 			    c = utf8n_to_uvchr((U8*)PL_reginput,
3677 					       UTF8_MAXLEN, 0,
3678 					       ckWARN(WARN_UTF8) ?
3679 					       0 : UTF8_ALLOW_ANY);
3680 			else
3681 			    c = UCHARAT(PL_reginput);
3682 			/* If it could work, try it. */
3683 		        if (c == (UV)c1 || c == (UV)c2)
3684 		        {
3685 			    TRYPAREN(paren, n, PL_reginput);
3686 			    REGCP_UNWIND(lastcp);
3687 		        }
3688 		    }
3689 		    /* If it could work, try it. */
3690 		    else if (c1 == -1000)
3691 		    {
3692 			TRYPAREN(paren, n, PL_reginput);
3693 			REGCP_UNWIND(lastcp);
3694 		    }
3695 		    /* Couldn't or didn't -- move forward. */
3696 		    PL_reginput = locinput;
3697 		    if (regrepeat(scan, 1)) {
3698 			ln++;
3699 			locinput = PL_reginput;
3700 		    }
3701 		    else
3702 			sayNO;
3703 		}
3704 	    }
3705 	    else {
3706 		CHECKPOINT lastcp;
3707 		n = regrepeat(scan, n);
3708 		locinput = PL_reginput;
3709 		if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3710 		    ((!PL_multiline && OP(next) != MEOL) ||
3711 			OP(next) == SEOL || OP(next) == EOS))
3712 		{
3713 		    ln = n;			/* why back off? */
3714 		    /* ...because $ and \Z can match before *and* after
3715 		       newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
3716 		       We should back off by one in this case. */
3717 		    if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3718 			ln--;
3719 		}
3720 		REGCP_SET(lastcp);
3721 		if (paren) {
3722 		    UV c = 0;
3723 		    while (n >= ln) {
3724 			if (c1 != -1000) {
3725 			    if (do_utf8)
3726 				c = utf8n_to_uvchr((U8*)PL_reginput,
3727 						   UTF8_MAXLEN, 0,
3728 						   ckWARN(WARN_UTF8) ?
3729 						   0 : UTF8_ALLOW_ANY);
3730 			    else
3731 				c = UCHARAT(PL_reginput);
3732 			}
3733 			/* If it could work, try it. */
3734 			if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
3735 			    {
3736 				TRYPAREN(paren, n, PL_reginput);
3737 				REGCP_UNWIND(lastcp);
3738 			    }
3739 			/* Couldn't or didn't -- back up. */
3740 			n--;
3741 			PL_reginput = locinput = HOPc(locinput, -1);
3742 		    }
3743 		}
3744 		else {
3745 		    UV c = 0;
3746 		    while (n >= ln) {
3747 			if (c1 != -1000) {
3748 			    if (do_utf8)
3749 				c = utf8n_to_uvchr((U8*)PL_reginput,
3750 						   UTF8_MAXLEN, 0,
3751 						   ckWARN(WARN_UTF8) ?
3752 						   0 : UTF8_ALLOW_ANY);
3753 			    else
3754 				c = UCHARAT(PL_reginput);
3755 			}
3756 			/* If it could work, try it. */
3757 			if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
3758 			    {
3759 				TRYPAREN(paren, n, PL_reginput);
3760 				REGCP_UNWIND(lastcp);
3761 			    }
3762 			/* Couldn't or didn't -- back up. */
3763 			n--;
3764 			PL_reginput = locinput = HOPc(locinput, -1);
3765 		    }
3766 		}
3767 	    }
3768 	    sayNO;
3769 	    break;
3770 	case END:
3771 	    if (PL_reg_call_cc) {
3772 		re_cc_state *cur_call_cc = PL_reg_call_cc;
3773 		CURCUR *cctmp = PL_regcc;
3774 		regexp *re = PL_reg_re;
3775 		CHECKPOINT cp, lastcp;
3776 
3777 		cp = regcppush(0);	/* Save *all* the positions. */
3778 		REGCP_SET(lastcp);
3779 		regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3780 						    the caller. */
3781 		PL_reginput = locinput;	/* Make position available to
3782 					   the callcc. */
3783 		cache_re(PL_reg_call_cc->re);
3784 		PL_regcc = PL_reg_call_cc->cc;
3785 		PL_reg_call_cc = PL_reg_call_cc->prev;
3786 		if (regmatch(cur_call_cc->node)) {
3787 		    PL_reg_call_cc = cur_call_cc;
3788 		    regcpblow(cp);
3789 		    sayYES;
3790 		}
3791 		REGCP_UNWIND(lastcp);
3792 		regcppop();
3793 		PL_reg_call_cc = cur_call_cc;
3794 		PL_regcc = cctmp;
3795 		PL_reg_re = re;
3796 		cache_re(re);
3797 
3798 		DEBUG_r(
3799 		    PerlIO_printf(Perl_debug_log,
3800 				  "%*s  continuation failed...\n",
3801 				  REPORT_CODE_OFF+PL_regindent*2, "")
3802 		    );
3803 		sayNO_SILENT;
3804 	    }
3805 	    if (locinput < PL_regtill) {
3806 		DEBUG_r(PerlIO_printf(Perl_debug_log,
3807 				      "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3808 				      PL_colors[4],
3809 				      (long)(locinput - PL_reg_starttry),
3810 				      (long)(PL_regtill - PL_reg_starttry),
3811 				      PL_colors[5]));
3812 		sayNO_FINAL;		/* Cannot match: too short. */
3813 	    }
3814 	    PL_reginput = locinput;	/* put where regtry can find it */
3815 	    sayYES_FINAL;		/* Success! */
3816 	case SUCCEED:
3817 	    PL_reginput = locinput;	/* put where regtry can find it */
3818 	    sayYES_LOUD;		/* Success! */
3819 	case SUSPEND:
3820 	    n = 1;
3821 	    PL_reginput = locinput;
3822 	    goto do_ifmatch;
3823 	case UNLESSM:
3824 	    n = 0;
3825 	    if (scan->flags) {
3826 		s = HOPBACKc(locinput, scan->flags);
3827 		if (!s)
3828 		    goto say_yes;
3829 		PL_reginput = s;
3830 	    }
3831 	    else
3832 		PL_reginput = locinput;
3833 	    goto do_ifmatch;
3834 	case IFMATCH:
3835 	    n = 1;
3836 	    if (scan->flags) {
3837 		s = HOPBACKc(locinput, scan->flags);
3838 		if (!s)
3839 		    goto say_no;
3840 		PL_reginput = s;
3841 	    }
3842 	    else
3843 		PL_reginput = locinput;
3844 
3845 	  do_ifmatch:
3846 	    inner = NEXTOPER(NEXTOPER(scan));
3847 	    if (regmatch(inner) != n) {
3848 	      say_no:
3849 		if (logical) {
3850 		    logical = 0;
3851 		    sw = 0;
3852 		    goto do_longjump;
3853 		}
3854 		else
3855 		    sayNO;
3856 	    }
3857 	  say_yes:
3858 	    if (logical) {
3859 		logical = 0;
3860 		sw = 1;
3861 	    }
3862 	    if (OP(scan) == SUSPEND) {
3863 		locinput = PL_reginput;
3864 		nextchr = UCHARAT(locinput);
3865 	    }
3866 	    /* FALL THROUGH. */
3867 	case LONGJMP:
3868 	  do_longjump:
3869 	    next = scan + ARG(scan);
3870 	    if (next == scan)
3871 		next = NULL;
3872 	    break;
3873 	default:
3874 	    PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3875 			  PTR2UV(scan), OP(scan));
3876 	    Perl_croak(aTHX_ "regexp memory corruption");
3877 	}
3878       reenter:
3879 	scan = next;
3880     }
3881 
3882     /*
3883     * We get here only if there's trouble -- normally "case END" is
3884     * the terminating point.
3885     */
3886     Perl_croak(aTHX_ "corrupted regexp pointers");
3887     /*NOTREACHED*/
3888     sayNO;
3889 
3890 yes_loud:
3891     DEBUG_r(
3892 	PerlIO_printf(Perl_debug_log,
3893 		      "%*s  %scould match...%s\n",
3894 		      REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3895 	);
3896     goto yes;
3897 yes_final:
3898     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3899 			  PL_colors[4],PL_colors[5]));
3900 yes:
3901 #ifdef DEBUGGING
3902     PL_regindent--;
3903 #endif
3904 
3905 #if 0					/* Breaks $^R */
3906     if (unwind)
3907 	regcpblow(firstcp);
3908 #endif
3909     return 1;
3910 
3911 no:
3912     DEBUG_r(
3913 	PerlIO_printf(Perl_debug_log,
3914 		      "%*s  %sfailed...%s\n",
3915 		      REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3916 	);
3917     goto do_no;
3918 no_final:
3919 do_no:
3920     if (unwind) {
3921 	re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3922 
3923 	switch (uw->type) {
3924 	case RE_UNWIND_BRANCH:
3925 	case RE_UNWIND_BRANCHJ:
3926 	{
3927 	    re_unwind_branch_t *uwb = &(uw->branch);
3928 	    I32 lastparen = uwb->lastparen;
3929 
3930 	    REGCP_UNWIND(uwb->lastcp);
3931 	    for (n = *PL_reglastparen; n > lastparen; n--)
3932 		PL_regendp[n] = -1;
3933 	    *PL_reglastparen = n;
3934 	    scan = next = uwb->next;
3935 	    if ( !scan ||
3936 		 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3937 			      ? BRANCH : BRANCHJ) ) {		/* Failure */
3938 		unwind = uwb->prev;
3939 #ifdef DEBUGGING
3940 		PL_regindent--;
3941 #endif
3942 		goto do_no;
3943 	    }
3944 	    /* Have more choice yet.  Reuse the same uwb.  */
3945 	    /*SUPPRESS 560*/
3946 	    if ((n = (uwb->type == RE_UNWIND_BRANCH
3947 		      ? NEXT_OFF(next) : ARG(next))))
3948 		next += n;
3949 	    else
3950 		next = NULL;	/* XXXX Needn't unwinding in this case... */
3951 	    uwb->next = next;
3952 	    next = NEXTOPER(scan);
3953 	    if (uwb->type == RE_UNWIND_BRANCHJ)
3954 		next = NEXTOPER(next);
3955 	    locinput = uwb->locinput;
3956 	    nextchr = uwb->nextchr;
3957 #ifdef DEBUGGING
3958 	    PL_regindent = uwb->regindent;
3959 #endif
3960 
3961 	    goto reenter;
3962 	}
3963 	/* NOT REACHED */
3964 	default:
3965 	    Perl_croak(aTHX_ "regexp unwind memory corruption");
3966 	}
3967 	/* NOT REACHED */
3968     }
3969 #ifdef DEBUGGING
3970     PL_regindent--;
3971 #endif
3972     return 0;
3973 }
3974 
3975 /*
3976  - regrepeat - repeatedly match something simple, report how many
3977  */
3978 /*
3979  * [This routine now assumes that it will only match on things of length 1.
3980  * That was true before, but now we assume scan - reginput is the count,
3981  * rather than incrementing count on every character.  [Er, except utf8.]]
3982  */
3983 STATIC I32
3984 S_regrepeat(pTHX_ regnode *p, I32 max)
3985 {
3986     register char *scan;
3987     register I32 c;
3988     register char *loceol = PL_regeol;
3989     register I32 hardcount = 0;
3990     register bool do_utf8 = PL_reg_match_utf8;
3991 
3992     scan = PL_reginput;
3993     if (max == REG_INFTY)
3994 	max = I32_MAX;
3995     else if (max < loceol - scan)
3996       loceol = scan + max;
3997     switch (OP(p)) {
3998     case REG_ANY:
3999 	if (do_utf8) {
4000 	    loceol = PL_regeol;
4001 	    while (scan < loceol && hardcount < max && *scan != '\n') {
4002 		scan += UTF8SKIP(scan);
4003 		hardcount++;
4004 	    }
4005 	} else {
4006 	    while (scan < loceol && *scan != '\n')
4007 		scan++;
4008 	}
4009 	break;
4010     case SANY:
4011         if (do_utf8) {
4012 	    loceol = PL_regeol;
4013 	    while (scan < loceol && hardcount < max) {
4014 	        scan += UTF8SKIP(scan);
4015 		hardcount++;
4016 	    }
4017 	}
4018 	else
4019 	    scan = loceol;
4020 	break;
4021     case CANY:
4022 	scan = loceol;
4023 	break;
4024     case EXACT:		/* length of string is 1 */
4025 	c = (U8)*STRING(p);
4026 	while (scan < loceol && UCHARAT(scan) == c)
4027 	    scan++;
4028 	break;
4029     case EXACTF:	/* length of string is 1 */
4030 	c = (U8)*STRING(p);
4031 	while (scan < loceol &&
4032 	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4033 	    scan++;
4034 	break;
4035     case EXACTFL:	/* length of string is 1 */
4036 	PL_reg_flags |= RF_tainted;
4037 	c = (U8)*STRING(p);
4038 	while (scan < loceol &&
4039 	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4040 	    scan++;
4041 	break;
4042     case ANYOF:
4043 	if (do_utf8) {
4044 	    loceol = PL_regeol;
4045 	    while (hardcount < max && scan < loceol &&
4046 		   reginclass(p, (U8*)scan, 0, do_utf8)) {
4047 		scan += UTF8SKIP(scan);
4048 		hardcount++;
4049 	    }
4050 	} else {
4051 	    while (scan < loceol && REGINCLASS(p, (U8*)scan))
4052 		scan++;
4053 	}
4054 	break;
4055     case ALNUM:
4056 	if (do_utf8) {
4057 	    loceol = PL_regeol;
4058 	    LOAD_UTF8_CHARCLASS(alnum,"a");
4059 	    while (hardcount < max && scan < loceol &&
4060 		   swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4061 		scan += UTF8SKIP(scan);
4062 		hardcount++;
4063 	    }
4064 	} else {
4065 	    while (scan < loceol && isALNUM(*scan))
4066 		scan++;
4067 	}
4068 	break;
4069     case ALNUML:
4070 	PL_reg_flags |= RF_tainted;
4071 	if (do_utf8) {
4072 	    loceol = PL_regeol;
4073 	    while (hardcount < max && scan < loceol &&
4074 		   isALNUM_LC_utf8((U8*)scan)) {
4075 		scan += UTF8SKIP(scan);
4076 		hardcount++;
4077 	    }
4078 	} else {
4079 	    while (scan < loceol && isALNUM_LC(*scan))
4080 		scan++;
4081 	}
4082 	break;
4083     case NALNUM:
4084 	if (do_utf8) {
4085 	    loceol = PL_regeol;
4086 	    LOAD_UTF8_CHARCLASS(alnum,"a");
4087 	    while (hardcount < max && scan < loceol &&
4088 		   !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4089 		scan += UTF8SKIP(scan);
4090 		hardcount++;
4091 	    }
4092 	} else {
4093 	    while (scan < loceol && !isALNUM(*scan))
4094 		scan++;
4095 	}
4096 	break;
4097     case NALNUML:
4098 	PL_reg_flags |= RF_tainted;
4099 	if (do_utf8) {
4100 	    loceol = PL_regeol;
4101 	    while (hardcount < max && scan < loceol &&
4102 		   !isALNUM_LC_utf8((U8*)scan)) {
4103 		scan += UTF8SKIP(scan);
4104 		hardcount++;
4105 	    }
4106 	} else {
4107 	    while (scan < loceol && !isALNUM_LC(*scan))
4108 		scan++;
4109 	}
4110 	break;
4111     case SPACE:
4112 	if (do_utf8) {
4113 	    loceol = PL_regeol;
4114 	    LOAD_UTF8_CHARCLASS(space," ");
4115 	    while (hardcount < max && scan < loceol &&
4116 		   (*scan == ' ' ||
4117 		    swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4118 		scan += UTF8SKIP(scan);
4119 		hardcount++;
4120 	    }
4121 	} else {
4122 	    while (scan < loceol && isSPACE(*scan))
4123 		scan++;
4124 	}
4125 	break;
4126     case SPACEL:
4127 	PL_reg_flags |= RF_tainted;
4128 	if (do_utf8) {
4129 	    loceol = PL_regeol;
4130 	    while (hardcount < max && scan < loceol &&
4131 		   (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4132 		scan += UTF8SKIP(scan);
4133 		hardcount++;
4134 	    }
4135 	} else {
4136 	    while (scan < loceol && isSPACE_LC(*scan))
4137 		scan++;
4138 	}
4139 	break;
4140     case NSPACE:
4141 	if (do_utf8) {
4142 	    loceol = PL_regeol;
4143 	    LOAD_UTF8_CHARCLASS(space," ");
4144 	    while (hardcount < max && scan < loceol &&
4145 		   !(*scan == ' ' ||
4146 		     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4147 		scan += UTF8SKIP(scan);
4148 		hardcount++;
4149 	    }
4150 	} else {
4151 	    while (scan < loceol && !isSPACE(*scan))
4152 		scan++;
4153 	    break;
4154 	}
4155     case NSPACEL:
4156 	PL_reg_flags |= RF_tainted;
4157 	if (do_utf8) {
4158 	    loceol = PL_regeol;
4159 	    while (hardcount < max && scan < loceol &&
4160 		   !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4161 		scan += UTF8SKIP(scan);
4162 		hardcount++;
4163 	    }
4164 	} else {
4165 	    while (scan < loceol && !isSPACE_LC(*scan))
4166 		scan++;
4167 	}
4168 	break;
4169     case DIGIT:
4170 	if (do_utf8) {
4171 	    loceol = PL_regeol;
4172 	    LOAD_UTF8_CHARCLASS(digit,"0");
4173 	    while (hardcount < max && scan < loceol &&
4174 		   swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4175 		scan += UTF8SKIP(scan);
4176 		hardcount++;
4177 	    }
4178 	} else {
4179 	    while (scan < loceol && isDIGIT(*scan))
4180 		scan++;
4181 	}
4182 	break;
4183     case NDIGIT:
4184 	if (do_utf8) {
4185 	    loceol = PL_regeol;
4186 	    LOAD_UTF8_CHARCLASS(digit,"0");
4187 	    while (hardcount < max && scan < loceol &&
4188 		   !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4189 		scan += UTF8SKIP(scan);
4190 		hardcount++;
4191 	    }
4192 	} else {
4193 	    while (scan < loceol && !isDIGIT(*scan))
4194 		scan++;
4195 	}
4196 	break;
4197     default:		/* Called on something of 0 width. */
4198 	break;		/* So match right here or not at all. */
4199     }
4200 
4201     if (hardcount)
4202 	c = hardcount;
4203     else
4204 	c = scan - PL_reginput;
4205     PL_reginput = scan;
4206 
4207     DEBUG_r(
4208 	{
4209 		SV *prop = sv_newmortal();
4210 
4211 		regprop(prop, p);
4212 		PerlIO_printf(Perl_debug_log,
4213 			      "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
4214 			      REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
4215 	});
4216 
4217     return(c);
4218 }
4219 
4220 /*
4221  - regrepeat_hard - repeatedly match something, report total lenth and length
4222  *
4223  * The repeater is supposed to have constant length.
4224  */
4225 
4226 STATIC I32
4227 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
4228 {
4229     register char *scan = Nullch;
4230     register char *start;
4231     register char *loceol = PL_regeol;
4232     I32 l = 0;
4233     I32 count = 0, res = 1;
4234 
4235     if (!max)
4236 	return 0;
4237 
4238     start = PL_reginput;
4239     if (PL_reg_match_utf8) {
4240 	while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4241 	    if (!count++) {
4242 		l = 0;
4243 		while (start < PL_reginput) {
4244 		    l++;
4245 		    start += UTF8SKIP(start);
4246 		}
4247 		*lp = l;
4248 		if (l == 0)
4249 		    return max;
4250 	    }
4251 	    if (count == max)
4252 		return count;
4253 	}
4254     }
4255     else {
4256 	while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4257 	    if (!count++) {
4258 		*lp = l = PL_reginput - start;
4259 		if (max != REG_INFTY && l*max < loceol - scan)
4260 		    loceol = scan + l*max;
4261 		if (l == 0)
4262 		    return max;
4263 	    }
4264 	}
4265     }
4266     if (!res)
4267 	PL_reginput = scan;
4268 
4269     return count;
4270 }
4271 
4272 /*
4273 - regclass_swash - prepare the utf8 swash
4274 */
4275 
4276 SV *
4277 Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
4278 {
4279     SV *sw  = NULL;
4280     SV *si  = NULL;
4281     SV *alt = NULL;
4282 
4283     if (PL_regdata && PL_regdata->count) {
4284 	U32 n = ARG(node);
4285 
4286 	if (PL_regdata->what[n] == 's') {
4287 	    SV *rv = (SV*)PL_regdata->data[n];
4288 	    AV *av = (AV*)SvRV((SV*)rv);
4289 	    SV **ary = AvARRAY(av);
4290 	    SV **a, **b;
4291 
4292 	    /* See the end of regcomp.c:S_reglass() for
4293 	     * documentation of these array elements. */
4294 
4295 	    si = *ary;
4296 	    a  = SvTYPE(ary[1]) == SVt_RV   ? &ary[1] : 0;
4297 	    b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4298 
4299 	    if (a)
4300 		sw = *a;
4301 	    else if (si && doinit) {
4302 		sw = swash_init("utf8", "", si, 1, 0);
4303 		(void)av_store(av, 1, sw);
4304 	    }
4305 	    if (b)
4306 	        alt = *b;
4307 	}
4308     }
4309 
4310     if (listsvp)
4311 	*listsvp = si;
4312     if (altsvp)
4313 	*altsvp  = alt;
4314 
4315     return sw;
4316 }
4317 
4318 /*
4319  - reginclass - determine if a character falls into a character class
4320 
4321   The n is the ANYOF regnode, the p is the target string, lenp
4322   is pointer to the maximum length of how far to go in the p
4323   (if the lenp is zero, UTF8SKIP(p) is used),
4324   do_utf8 tells whether the target string is in UTF-8.
4325 
4326  */
4327 
4328 STATIC bool
4329 S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
4330 {
4331     char flags = ANYOF_FLAGS(n);
4332     bool match = FALSE;
4333     UV c = *p;
4334     STRLEN len = 0;
4335     STRLEN plen;
4336 
4337     if (do_utf8 && !UTF8_IS_INVARIANT(c))
4338 	 c = utf8n_to_uvchr(p, UTF8_MAXLEN, &len,
4339 			    ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
4340 
4341     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
4342     if (do_utf8 || (flags & ANYOF_UNICODE)) {
4343         if (lenp)
4344 	    *lenp = 0;
4345 	if (do_utf8 && !ANYOF_RUNTIME(n)) {
4346 	    if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4347 		match = TRUE;
4348 	}
4349 	if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
4350 	    match = TRUE;
4351 	if (!match) {
4352 	    AV *av;
4353 	    SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
4354 
4355 	    if (sw) {
4356 		if (swash_fetch(sw, p, do_utf8))
4357 		    match = TRUE;
4358 		else if (flags & ANYOF_FOLD) {
4359 		    if (!match && lenp && av) {
4360 		        I32 i;
4361 
4362 			for (i = 0; i <= av_len(av); i++) {
4363 			    SV* sv = *av_fetch(av, i, FALSE);
4364 			    STRLEN len;
4365 			    char *s = SvPV(sv, len);
4366 
4367 			    if (len <= plen && memEQ(s, (char*)p, len)) {
4368 			        *lenp = len;
4369 				match = TRUE;
4370 				break;
4371 			    }
4372 			}
4373 		    }
4374 		    if (!match) {
4375 		        U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
4376 			STRLEN tmplen;
4377 
4378 		        to_utf8_fold(p, tmpbuf, &tmplen);
4379 			if (swash_fetch(sw, tmpbuf, do_utf8))
4380 			    match = TRUE;
4381 		    }
4382 		}
4383 	    }
4384 	}
4385 	if (match && lenp && *lenp == 0)
4386 	    *lenp = UNISKIP(NATIVE_TO_UNI(c));
4387     }
4388     if (!match && c < 256) {
4389 	if (ANYOF_BITMAP_TEST(n, c))
4390 	    match = TRUE;
4391 	else if (flags & ANYOF_FOLD) {
4392 	    U8 f;
4393 
4394 	    if (flags & ANYOF_LOCALE) {
4395 		PL_reg_flags |= RF_tainted;
4396 		f = PL_fold_locale[c];
4397 	    }
4398 	    else
4399 		f = PL_fold[c];
4400 	    if (f != c && ANYOF_BITMAP_TEST(n, f))
4401 		match = TRUE;
4402 	}
4403 
4404 	if (!match && (flags & ANYOF_CLASS)) {
4405 	    PL_reg_flags |= RF_tainted;
4406 	    if (
4407 		(ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
4408 		(ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
4409 		(ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
4410 		(ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
4411 		(ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
4412 		(ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
4413 		(ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
4414 		(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4415 		(ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
4416 		(ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
4417 		(ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
4418 		(ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
4419 		(ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
4420 		(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
4421 		(ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
4422 		(ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
4423 		(ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
4424 		(ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
4425 		(ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
4426 		(ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
4427 		(ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
4428 		(ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
4429 		(ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
4430 		(ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
4431 		(ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
4432 		(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
4433 		(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
4434 		(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
4435 		(ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
4436 		(ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
4437 		) /* How's that for a conditional? */
4438 	    {
4439 		match = TRUE;
4440 	    }
4441 	}
4442     }
4443 
4444     return (flags & ANYOF_INVERT) ? !match : match;
4445 }
4446 
4447 STATIC U8 *
4448 S_reghop(pTHX_ U8 *s, I32 off)
4449 {
4450     return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4451 }
4452 
4453 STATIC U8 *
4454 S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
4455 {
4456     if (off >= 0) {
4457 	while (off-- && s < lim) {
4458 	    /* XXX could check well-formedness here */
4459 	    s += UTF8SKIP(s);
4460 	}
4461     }
4462     else {
4463 	while (off++) {
4464 	    if (s > lim) {
4465 		s--;
4466 		if (UTF8_IS_CONTINUED(*s)) {
4467 		    while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4468 			s--;
4469 		}
4470 		/* XXX could check well-formedness here */
4471 	    }
4472 	}
4473     }
4474     return s;
4475 }
4476 
4477 STATIC U8 *
4478 S_reghopmaybe(pTHX_ U8 *s, I32 off)
4479 {
4480     return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4481 }
4482 
4483 STATIC U8 *
4484 S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
4485 {
4486     if (off >= 0) {
4487 	while (off-- && s < lim) {
4488 	    /* XXX could check well-formedness here */
4489 	    s += UTF8SKIP(s);
4490 	}
4491 	if (off >= 0)
4492 	    return 0;
4493     }
4494     else {
4495 	while (off++) {
4496 	    if (s > lim) {
4497 		s--;
4498 		if (UTF8_IS_CONTINUED(*s)) {
4499 		    while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4500 			s--;
4501 		}
4502 		/* XXX could check well-formedness here */
4503 	    }
4504 	    else
4505 		break;
4506 	}
4507 	if (off <= 0)
4508 	    return 0;
4509     }
4510     return s;
4511 }
4512 
4513 static void
4514 restore_pos(pTHX_ void *arg)
4515 {
4516     if (PL_reg_eval_set) {
4517 	if (PL_reg_oldsaved) {
4518 	    PL_reg_re->subbeg = PL_reg_oldsaved;
4519 	    PL_reg_re->sublen = PL_reg_oldsavedlen;
4520 	    RX_MATCH_COPIED_on(PL_reg_re);
4521 	}
4522 	PL_reg_magic->mg_len = PL_reg_oldpos;
4523 	PL_reg_eval_set = 0;
4524 	PL_curpm = PL_reg_oldcurpm;
4525     }
4526 }
4527 
4528 STATIC void
4529 S_to_utf8_substr(pTHX_ register regexp *prog)
4530 {
4531     SV* sv;
4532     if (prog->float_substr && !prog->float_utf8) {
4533 	prog->float_utf8 = sv = NEWSV(117, 0);
4534 	SvSetSV(sv, prog->float_substr);
4535 	sv_utf8_upgrade(sv);
4536 	if (SvTAIL(prog->float_substr))
4537 	    SvTAIL_on(sv);
4538 	if (prog->float_substr == prog->check_substr)
4539 	    prog->check_utf8 = sv;
4540     }
4541     if (prog->anchored_substr && !prog->anchored_utf8) {
4542 	prog->anchored_utf8 = sv = NEWSV(118, 0);
4543 	SvSetSV(sv, prog->anchored_substr);
4544 	sv_utf8_upgrade(sv);
4545 	if (SvTAIL(prog->anchored_substr))
4546 	    SvTAIL_on(sv);
4547 	if (prog->anchored_substr == prog->check_substr)
4548 	    prog->check_utf8 = sv;
4549     }
4550 }
4551 
4552 STATIC void
4553 S_to_byte_substr(pTHX_ register regexp *prog)
4554 {
4555     SV* sv;
4556     if (prog->float_utf8 && !prog->float_substr) {
4557 	prog->float_substr = sv = NEWSV(117, 0);
4558 	SvSetSV(sv, prog->float_utf8);
4559 	if (sv_utf8_downgrade(sv, TRUE)) {
4560 	    if (SvTAIL(prog->float_utf8))
4561 		SvTAIL_on(sv);
4562 	} else {
4563 	    SvREFCNT_dec(sv);
4564 	    prog->float_substr = sv = &PL_sv_undef;
4565 	}
4566 	if (prog->float_utf8 == prog->check_utf8)
4567 	    prog->check_substr = sv;
4568     }
4569     if (prog->anchored_utf8 && !prog->anchored_substr) {
4570 	prog->anchored_substr = sv = NEWSV(118, 0);
4571 	SvSetSV(sv, prog->anchored_utf8);
4572 	if (sv_utf8_downgrade(sv, TRUE)) {
4573 	    if (SvTAIL(prog->anchored_utf8))
4574 		SvTAIL_on(sv);
4575 	} else {
4576 	    SvREFCNT_dec(sv);
4577 	    prog->anchored_substr = sv = &PL_sv_undef;
4578 	}
4579 	if (prog->anchored_utf8 == prog->check_utf8)
4580 	    prog->check_substr = sv;
4581     }
4582 }
4583