xref: /openbsd-src/gnu/usr.bin/perl/regexec.c (revision b725ae7711052a2233e31a66fefb8a752c388d7a)
1 /*    regexec.c
2  */
3 
4 /*
5  * "One Ring to rule them all, One Ring to find them..."
6  */
7 
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9  * confused with the original package (see point 3 below).  Thanks, Henry!
10  */
11 
12 /* Additional note: this code is very heavily munged from Henry's version
13  * in places.  In some spots I've traded clarity for efficiency, so don't
14  * blame Henry for some of the lack of readability.
15  */
16 
17 /* The names of the functions have been changed from regcomp and
18  * regexec to  pregcomp and pregexec in order to avoid conflicts
19  * with the POSIX routines of the same names.
20 */
21 
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 #  ifndef PERL_IN_XSUB_RE
25 #    define PERL_IN_XSUB_RE
26 #  endif
27 /* need access to debugger hooks */
28 #  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
29 #    define DEBUGGING
30 #  endif
31 #endif
32 
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 #  define Perl_regexec_flags my_regexec
36 #  define Perl_regdump my_regdump
37 #  define Perl_regprop my_regprop
38 #  define Perl_re_intuit_start my_re_intuit_start
39 /* *These* symbols are masked to allow static link. */
40 #  define Perl_pregexec my_pregexec
41 #  define Perl_reginitcolors my_reginitcolors
42 #  define Perl_regclass_swash my_regclass_swash
43 
44 #  define PERL_NO_GET_CONTEXT
45 #endif
46 
47 /*SUPPRESS 112*/
48 /*
49  * pregcomp and pregexec -- regsub and regerror are not used in perl
50  *
51  *	Copyright (c) 1986 by University of Toronto.
52  *	Written by Henry Spencer.  Not derived from licensed software.
53  *
54  *	Permission is granted to anyone to use this software for any
55  *	purpose on any computer system, and to redistribute it freely,
56  *	subject to the following restrictions:
57  *
58  *	1. The author is not responsible for the consequences of use of
59  *		this software, no matter how awful, even if they arise
60  *		from defects in it.
61  *
62  *	2. The origin of this software must not be misrepresented, either
63  *		by explicit claim or by omission.
64  *
65  *	3. Altered versions must be plainly marked as such, and must not
66  *		be misrepresented as being the original software.
67  *
68  ****    Alterations to Henry's code are...
69  ****
70  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
71  ****    2000, 2001, 2002, 2003, by Larry Wall and others
72  ****
73  ****    You may distribute under the terms of either the GNU General Public
74  ****    License or the Artistic License, as specified in the README file.
75  *
76  * Beware that some of this code is subtly aware of the way operator
77  * precedence is structured in regular expressions.  Serious changes in
78  * regular-expression syntax might require a total rethink.
79  */
80 #include "EXTERN.h"
81 #define PERL_IN_REGEXEC_C
82 #include "perl.h"
83 
84 #include "regcomp.h"
85 
86 #define RF_tainted	1		/* tainted information used? */
87 #define RF_warned	2		/* warned about big count? */
88 #define RF_evaled	4		/* Did an EVAL with setting? */
89 #define RF_utf8		8		/* String contains multibyte chars? */
90 #define RF_false	16		/* odd number of nested negatives */
91 
92 #define UTF ((PL_reg_flags & RF_utf8) != 0)
93 
94 #define RS_init		1		/* eval environment created */
95 #define RS_set		2		/* replsv value is set */
96 
97 #ifndef STATIC
98 #define	STATIC	static
99 #endif
100 
101 #define REGINCLASS(p,c)  (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
102 
103 /*
104  * Forwards.
105  */
106 
107 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
108 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
109 
110 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
111 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
112 #define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
113 #define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
114 #define HOPc(pos,off) ((char*)HOP(pos,off))
115 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
116 
117 #define HOPBACK(pos, off) (		\
118     (PL_reg_match_utf8)			\
119 	? reghopmaybe((U8*)pos, -off)	\
120     : (pos - off >= PL_bostr)		\
121 	? (U8*)(pos - off)		\
122     : (U8*)NULL				\
123 )
124 #define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
125 
126 #define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
127 #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
128 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
129 #define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
130 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
131 #define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
132 
133 #define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) { ENTER; save_re_context(); (void)CAT2(is_utf8_, a)((U8*)b); LEAVE; } } STMT_END
134 
135 /* for use after a quantifier and before an EXACT-like node -- japhy */
136 #define JUMPABLE(rn) ( \
137     OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
138     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
139     OP(rn) == PLUS || OP(rn) == MINMOD || \
140     (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
141 )
142 
143 #define HAS_TEXT(rn) ( \
144     PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
145 )
146 
147 /*
148   Search for mandatory following text node; for lookahead, the text must
149   follow but for lookbehind (rn->flags != 0) we skip to the next step.
150 */
151 #define FIND_NEXT_IMPT(rn) STMT_START { \
152     while (JUMPABLE(rn)) \
153 	if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
154 	    rn = NEXTOPER(NEXTOPER(rn)); \
155 	else if (OP(rn) == PLUS) \
156 	    rn = NEXTOPER(rn); \
157 	else if (OP(rn) == IFMATCH) \
158 	    rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
159 	else rn += NEXT_OFF(rn); \
160 } STMT_END
161 
162 static void restore_pos(pTHX_ void *arg);
163 
164 STATIC CHECKPOINT
165 S_regcppush(pTHX_ I32 parenfloor)
166 {
167     int retval = PL_savestack_ix;
168 #define REGCP_PAREN_ELEMS 4
169     int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
170     int p;
171 
172     if (paren_elems_to_push < 0)
173 	Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
174 
175 #define REGCP_OTHER_ELEMS 6
176     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
177     for (p = PL_regsize; p > parenfloor; p--) {
178 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
179 	SSPUSHINT(PL_regendp[p]);
180 	SSPUSHINT(PL_regstartp[p]);
181 	SSPUSHPTR(PL_reg_start_tmp[p]);
182 	SSPUSHINT(p);
183     }
184 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
185     SSPUSHINT(PL_regsize);
186     SSPUSHINT(*PL_reglastparen);
187     SSPUSHINT(*PL_reglastcloseparen);
188     SSPUSHPTR(PL_reginput);
189 #define REGCP_FRAME_ELEMS 2
190 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
191  * are needed for the regexp context stack bookkeeping. */
192     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
193     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
194 
195     return retval;
196 }
197 
198 /* These are needed since we do not localize EVAL nodes: */
199 #  define REGCP_SET(cp)  DEBUG_r(PerlIO_printf(Perl_debug_log,		\
200 			     "  Setting an EVAL scope, savestack=%"IVdf"\n",	\
201 			     (IV)PL_savestack_ix)); cp = PL_savestack_ix
202 
203 #  define REGCP_UNWIND(cp)  DEBUG_r(cp != PL_savestack_ix ?		\
204 				PerlIO_printf(Perl_debug_log,		\
205 				"  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
206 				(IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
207 
208 STATIC char *
209 S_regcppop(pTHX)
210 {
211     I32 i;
212     U32 paren = 0;
213     char *input;
214     I32 tmps;
215 
216     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
217     i = SSPOPINT;
218     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
219     i = SSPOPINT; /* Parentheses elements to pop. */
220     input = (char *) SSPOPPTR;
221     *PL_reglastcloseparen = SSPOPINT;
222     *PL_reglastparen = SSPOPINT;
223     PL_regsize = SSPOPINT;
224 
225     /* Now restore the parentheses context. */
226     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
227 	 i > 0; i -= REGCP_PAREN_ELEMS) {
228 	paren = (U32)SSPOPINT;
229 	PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
230 	PL_regstartp[paren] = SSPOPINT;
231 	tmps = SSPOPINT;
232 	if (paren <= *PL_reglastparen)
233 	    PL_regendp[paren] = tmps;
234 	DEBUG_r(
235 	    PerlIO_printf(Perl_debug_log,
236 			  "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
237 			  (UV)paren, (IV)PL_regstartp[paren],
238 			  (IV)(PL_reg_start_tmp[paren] - PL_bostr),
239 			  (IV)PL_regendp[paren],
240 			  (paren > *PL_reglastparen ? "(no)" : ""));
241 	);
242     }
243     DEBUG_r(
244 	if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
245 	    PerlIO_printf(Perl_debug_log,
246 			  "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
247 			  (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
248 	}
249     );
250 #if 1
251     /* It would seem that the similar code in regtry()
252      * already takes care of this, and in fact it is in
253      * a better location to since this code can #if 0-ed out
254      * but the code in regtry() is needed or otherwise tests
255      * requiring null fields (pat.t#187 and split.t#{13,14}
256      * (as of patchlevel 7877)  will fail.  Then again,
257      * this code seems to be necessary or otherwise
258      * building DynaLoader will fail:
259      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
260      * --jhi */
261     for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
262 	if ((I32)paren > PL_regsize)
263 	    PL_regstartp[paren] = -1;
264 	PL_regendp[paren] = -1;
265     }
266 #endif
267     return input;
268 }
269 
270 STATIC char *
271 S_regcp_set_to(pTHX_ I32 ss)
272 {
273     I32 tmp = PL_savestack_ix;
274 
275     PL_savestack_ix = ss;
276     regcppop();
277     PL_savestack_ix = tmp;
278     return Nullch;
279 }
280 
281 typedef struct re_cc_state
282 {
283     I32 ss;
284     regnode *node;
285     struct re_cc_state *prev;
286     CURCUR *cc;
287     regexp *re;
288 } re_cc_state;
289 
290 #define regcpblow(cp) LEAVE_SCOPE(cp)	/* Ignores regcppush()ed data. */
291 
292 #define TRYPAREN(paren, n, input) {				\
293     if (paren) {						\
294 	if (n) {						\
295 	    PL_regstartp[paren] = HOPc(input, -1) - PL_bostr;	\
296 	    PL_regendp[paren] = input - PL_bostr;		\
297 	}							\
298 	else							\
299 	    PL_regendp[paren] = -1;				\
300     }								\
301     if (regmatch(next))						\
302 	sayYES;							\
303     if (paren && n)						\
304 	PL_regendp[paren] = -1;					\
305 }
306 
307 
308 /*
309  * pregexec and friends
310  */
311 
312 /*
313  - pregexec - match a regexp against a string
314  */
315 I32
316 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
317 	 char *strbeg, I32 minend, SV *screamer, U32 nosave)
318 /* strend: pointer to null at end of string */
319 /* strbeg: real beginning of string */
320 /* minend: end of match must be >=minend after stringarg. */
321 /* nosave: For optimizations. */
322 {
323     return
324 	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
325 		      nosave ? 0 : REXEC_COPY_STR);
326 }
327 
328 STATIC void
329 S_cache_re(pTHX_ regexp *prog)
330 {
331     PL_regprecomp = prog->precomp;		/* Needed for FAIL. */
332 #ifdef DEBUGGING
333     PL_regprogram = prog->program;
334 #endif
335     PL_regnpar = prog->nparens;
336     PL_regdata = prog->data;
337     PL_reg_re = prog;
338 }
339 
340 /*
341  * Need to implement the following flags for reg_anch:
342  *
343  * USE_INTUIT_NOML		- Useful to call re_intuit_start() first
344  * USE_INTUIT_ML
345  * INTUIT_AUTORITATIVE_NOML	- Can trust a positive answer
346  * INTUIT_AUTORITATIVE_ML
347  * INTUIT_ONCE_NOML		- Intuit can match in one location only.
348  * INTUIT_ONCE_ML
349  *
350  * Another flag for this function: SECOND_TIME (so that float substrs
351  * with giant delta may be not rechecked).
352  */
353 
354 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
355 
356 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
357    Otherwise, only SvCUR(sv) is used to get strbeg. */
358 
359 /* XXXX We assume that strpos is strbeg unless sv. */
360 
361 /* XXXX Some places assume that there is a fixed substring.
362 	An update may be needed if optimizer marks as "INTUITable"
363 	RExen without fixed substrings.  Similarly, it is assumed that
364 	lengths of all the strings are no more than minlen, thus they
365 	cannot come from lookahead.
366 	(Or minlen should take into account lookahead.) */
367 
368 /* A failure to find a constant substring means that there is no need to make
369    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
370    finding a substring too deep into the string means that less calls to
371    regtry() should be needed.
372 
373    REx compiler's optimizer found 4 possible hints:
374 	a) Anchored substring;
375 	b) Fixed substring;
376 	c) Whether we are anchored (beginning-of-line or \G);
377 	d) First node (of those at offset 0) which may distingush positions;
378    We use a)b)d) and multiline-part of c), and try to find a position in the
379    string which does not contradict any of them.
380  */
381 
382 /* Most of decisions we do here should have been done at compile time.
383    The nodes of the REx which we used for the search should have been
384    deleted from the finite automaton. */
385 
386 char *
387 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
388 		     char *strend, U32 flags, re_scream_pos_data *data)
389 {
390     register I32 start_shift = 0;
391     /* Should be nonnegative! */
392     register I32 end_shift   = 0;
393     register char *s;
394     register SV *check;
395     char *strbeg;
396     char *t;
397     int do_utf8 = sv ? SvUTF8(sv) : 0;	/* if no sv we have to assume bytes */
398     I32 ml_anch;
399     register char *other_last = Nullch;	/* other substr checked before this */
400     char *check_at = Nullch;		/* check substr found at this pos */
401 #ifdef DEBUGGING
402     char *i_strpos = strpos;
403     SV *dsv = PERL_DEBUG_PAD_ZERO(0);
404 #endif
405     RX_MATCH_UTF8_set(prog,do_utf8);
406 
407     if (prog->reganch & ROPT_UTF8) {
408 	DEBUG_r(PerlIO_printf(Perl_debug_log,
409 			      "UTF-8 regex...\n"));
410 	PL_reg_flags |= RF_utf8;
411     }
412 
413     DEBUG_r({
414 	 char *s   = PL_reg_match_utf8 ?
415 	                 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
416 	                 strpos;
417 	 int   len = PL_reg_match_utf8 ?
418 	                 strlen(s) : strend - strpos;
419 	 if (!PL_colorset)
420 	      reginitcolors();
421 	 if (PL_reg_match_utf8)
422 	     DEBUG_r(PerlIO_printf(Perl_debug_log,
423 				   "UTF-8 target...\n"));
424 	 PerlIO_printf(Perl_debug_log,
425 		       "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
426 		       PL_colors[4],PL_colors[5],PL_colors[0],
427 		       prog->precomp,
428 		       PL_colors[1],
429 		       (strlen(prog->precomp) > 60 ? "..." : ""),
430 		       PL_colors[0],
431 		       (int)(len > 60 ? 60 : len),
432 		       s, PL_colors[1],
433 		       (len > 60 ? "..." : "")
434 	      );
435     });
436 
437     /* CHR_DIST() would be more correct here but it makes things slow. */
438     if (prog->minlen > strend - strpos) {
439 	DEBUG_r(PerlIO_printf(Perl_debug_log,
440 			      "String too short... [re_intuit_start]\n"));
441 	goto fail;
442     }
443     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
444     PL_regeol = strend;
445     if (do_utf8) {
446 	if (!prog->check_utf8 && prog->check_substr)
447 	    to_utf8_substr(prog);
448 	check = prog->check_utf8;
449     } else {
450 	if (!prog->check_substr && prog->check_utf8)
451 	    to_byte_substr(prog);
452 	check = prog->check_substr;
453     }
454    if (check == &PL_sv_undef) {
455 	DEBUG_r(PerlIO_printf(Perl_debug_log,
456 		"Non-utf string cannot match utf check string\n"));
457 	goto fail;
458     }
459     if (prog->reganch & ROPT_ANCH) {	/* Match at beg-of-str or after \n */
460 	ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
461 		     || ( (prog->reganch & ROPT_ANCH_BOL)
462 			  && !PL_multiline ) );	/* Check after \n? */
463 
464 	if (!ml_anch) {
465 	  if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
466 				  | ROPT_IMPLICIT)) /* not a real BOL */
467 	       /* SvCUR is not set on references: SvRV and SvPVX overlap */
468 	       && sv && !SvROK(sv)
469 	       && (strpos != strbeg)) {
470 	      DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
471 	      goto fail;
472 	  }
473 	  if (prog->check_offset_min == prog->check_offset_max &&
474 	      !(prog->reganch & ROPT_CANY_SEEN)) {
475 	    /* Substring at constant offset from beg-of-str... */
476 	    I32 slen;
477 
478 	    s = HOP3c(strpos, prog->check_offset_min, strend);
479 	    if (SvTAIL(check)) {
480 		slen = SvCUR(check);	/* >= 1 */
481 
482 		if ( strend - s > slen || strend - s < slen - 1
483 		     || (strend - s == slen && strend[-1] != '\n')) {
484 		    DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
485 		    goto fail_finish;
486 		}
487 		/* Now should match s[0..slen-2] */
488 		slen--;
489 		if (slen && (*SvPVX(check) != *s
490 			     || (slen > 1
491 				 && memNE(SvPVX(check), s, slen)))) {
492 		  report_neq:
493 		    DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
494 		    goto fail_finish;
495 		}
496 	    }
497 	    else if (*SvPVX(check) != *s
498 		     || ((slen = SvCUR(check)) > 1
499 			 && memNE(SvPVX(check), s, slen)))
500 		goto report_neq;
501 	    goto success_at_start;
502 	  }
503 	}
504 	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
505 	s = strpos;
506 	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
507 	end_shift = prog->minlen - start_shift -
508 	    CHR_SVLEN(check) + (SvTAIL(check) != 0);
509 	if (!ml_anch) {
510 	    I32 end = prog->check_offset_max + CHR_SVLEN(check)
511 					 - (SvTAIL(check) != 0);
512 	    I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
513 
514 	    if (end_shift < eshift)
515 		end_shift = eshift;
516 	}
517     }
518     else {				/* Can match at random position */
519 	ml_anch = 0;
520 	s = strpos;
521 	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
522 	/* Should be nonnegative! */
523 	end_shift = prog->minlen - start_shift -
524 	    CHR_SVLEN(check) + (SvTAIL(check) != 0);
525     }
526 
527 #ifdef DEBUGGING	/* 7/99: reports of failure (with the older version) */
528     if (end_shift < 0)
529 	Perl_croak(aTHX_ "panic: end_shift");
530 #endif
531 
532   restart:
533     /* Find a possible match in the region s..strend by looking for
534        the "check" substring in the region corrected by start/end_shift. */
535     if (flags & REXEC_SCREAM) {
536 	I32 p = -1;			/* Internal iterator of scream. */
537 	I32 *pp = data ? data->scream_pos : &p;
538 
539 	if (PL_screamfirst[BmRARE(check)] >= 0
540 	    || ( BmRARE(check) == '\n'
541 		 && (BmPREVIOUS(check) == SvCUR(check) - 1)
542 		 && SvTAIL(check) ))
543 	    s = screaminstr(sv, check,
544 			    start_shift + (s - strbeg), end_shift, pp, 0);
545 	else
546 	    goto fail_finish;
547 	/* we may be pointing at the wrong string */
548 	if (s && RX_MATCH_COPIED(prog))
549 	    s = strbeg + (s - SvPVX(sv));
550 	if (data)
551 	    *data->scream_olds = s;
552     }
553     else if (prog->reganch & ROPT_CANY_SEEN)
554 	s = fbm_instr((U8*)(s + start_shift),
555 		      (U8*)(strend - end_shift),
556 		      check, PL_multiline ? FBMrf_MULTILINE : 0);
557     else
558 	s = fbm_instr(HOP3(s, start_shift, strend),
559 		      HOP3(strend, -end_shift, strbeg),
560 		      check, PL_multiline ? FBMrf_MULTILINE : 0);
561 
562     /* Update the count-of-usability, remove useless subpatterns,
563 	unshift s.  */
564 
565     DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
566 			  (s ? "Found" : "Did not find"),
567 			  (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
568 			  PL_colors[0],
569 			  (int)(SvCUR(check) - (SvTAIL(check)!=0)),
570 			  SvPVX(check),
571 			  PL_colors[1], (SvTAIL(check) ? "$" : ""),
572 			  (s ? " at offset " : "...\n") ) );
573 
574     if (!s)
575 	goto fail_finish;
576 
577     check_at = s;
578 
579     /* Finish the diagnostic message */
580     DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
581 
582     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
583        Start with the other substr.
584        XXXX no SCREAM optimization yet - and a very coarse implementation
585        XXXX /ttx+/ results in anchored=`ttx', floating=`x'.  floating will
586 		*always* match.  Probably should be marked during compile...
587        Probably it is right to do no SCREAM here...
588      */
589 
590     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
591 	/* Take into account the "other" substring. */
592 	/* XXXX May be hopelessly wrong for UTF... */
593 	if (!other_last)
594 	    other_last = strpos;
595 	if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
596 	  do_other_anchored:
597 	    {
598 		char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
599 		char *s1 = s;
600 		SV* must;
601 
602 		t = s - prog->check_offset_max;
603 		if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
604 		    && (!do_utf8
605 			|| ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
606 			    && t > strpos)))
607 		    /* EMPTY */;
608 		else
609 		    t = strpos;
610 		t = HOP3c(t, prog->anchored_offset, strend);
611 		if (t < other_last)	/* These positions already checked */
612 		    t = other_last;
613 		last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
614 		if (last < last1)
615 		    last1 = last;
616  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
617 		/* On end-of-str: see comment below. */
618 		must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
619 		if (must == &PL_sv_undef) {
620 		    s = (char*)NULL;
621 		    DEBUG_r(must = prog->anchored_utf8);	/* for debug */
622 		}
623 		else
624 		    s = fbm_instr(
625 			(unsigned char*)t,
626 			HOP3(HOP3(last1, prog->anchored_offset, strend)
627 				+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
628 			must,
629 			PL_multiline ? FBMrf_MULTILINE : 0
630 		    );
631 		DEBUG_r(PerlIO_printf(Perl_debug_log,
632 			"%s anchored substr `%s%.*s%s'%s",
633 			(s ? "Found" : "Contradicts"),
634 			PL_colors[0],
635 			  (int)(SvCUR(must)
636 			  - (SvTAIL(must)!=0)),
637 			  SvPVX(must),
638 			  PL_colors[1], (SvTAIL(must) ? "$" : "")));
639 		if (!s) {
640 		    if (last1 >= last2) {
641 			DEBUG_r(PerlIO_printf(Perl_debug_log,
642 						", giving up...\n"));
643 			goto fail_finish;
644 		    }
645 		    DEBUG_r(PerlIO_printf(Perl_debug_log,
646 			", trying floating at offset %ld...\n",
647 			(long)(HOP3c(s1, 1, strend) - i_strpos)));
648 		    other_last = HOP3c(last1, prog->anchored_offset+1, strend);
649 		    s = HOP3c(last, 1, strend);
650 		    goto restart;
651 		}
652 		else {
653 		    DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
654 			  (long)(s - i_strpos)));
655 		    t = HOP3c(s, -prog->anchored_offset, strbeg);
656 		    other_last = HOP3c(s, 1, strend);
657 		    s = s1;
658 		    if (t == strpos)
659 			goto try_at_start;
660 		    goto try_at_offset;
661 		}
662 	    }
663 	}
664 	else {		/* Take into account the floating substring. */
665 	    char *last, *last1;
666 	    char *s1 = s;
667 	    SV* must;
668 
669 	    t = HOP3c(s, -start_shift, strbeg);
670 	    last1 = last =
671 		HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
672 	    if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
673 		last = HOP3c(t, prog->float_max_offset, strend);
674 	    s = HOP3c(t, prog->float_min_offset, strend);
675 	    if (s < other_last)
676 		s = other_last;
677  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
678 	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
679 	    /* fbm_instr() takes into account exact value of end-of-str
680 	       if the check is SvTAIL(ed).  Since false positives are OK,
681 	       and end-of-str is not later than strend we are OK. */
682 	    if (must == &PL_sv_undef) {
683 		s = (char*)NULL;
684 		DEBUG_r(must = prog->float_utf8);	/* for debug message */
685 	    }
686 	    else
687 		s = fbm_instr((unsigned char*)s,
688 			      (unsigned char*)last + SvCUR(must)
689 				  - (SvTAIL(must)!=0),
690 			      must, PL_multiline ? FBMrf_MULTILINE : 0);
691 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
692 		    (s ? "Found" : "Contradicts"),
693 		    PL_colors[0],
694 		      (int)(SvCUR(must) - (SvTAIL(must)!=0)),
695 		      SvPVX(must),
696 		      PL_colors[1], (SvTAIL(must) ? "$" : "")));
697 	    if (!s) {
698 		if (last1 == last) {
699 		    DEBUG_r(PerlIO_printf(Perl_debug_log,
700 					    ", giving up...\n"));
701 		    goto fail_finish;
702 		}
703 		DEBUG_r(PerlIO_printf(Perl_debug_log,
704 		    ", trying anchored starting at offset %ld...\n",
705 		    (long)(s1 + 1 - i_strpos)));
706 		other_last = last;
707 		s = HOP3c(t, 1, strend);
708 		goto restart;
709 	    }
710 	    else {
711 		DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
712 		      (long)(s - i_strpos)));
713 		other_last = s; /* Fix this later. --Hugo */
714 		s = s1;
715 		if (t == strpos)
716 		    goto try_at_start;
717 		goto try_at_offset;
718 	    }
719 	}
720     }
721 
722     t = s - prog->check_offset_max;
723     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
724         && (!do_utf8
725 	    || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
726 		 && t > strpos))) {
727 	/* Fixed substring is found far enough so that the match
728 	   cannot start at strpos. */
729       try_at_offset:
730 	if (ml_anch && t[-1] != '\n') {
731 	    /* Eventually fbm_*() should handle this, but often
732 	       anchored_offset is not 0, so this check will not be wasted. */
733 	    /* XXXX In the code below we prefer to look for "^" even in
734 	       presence of anchored substrings.  And we search even
735 	       beyond the found float position.  These pessimizations
736 	       are historical artefacts only.  */
737 	  find_anchor:
738 	    while (t < strend - prog->minlen) {
739 		if (*t == '\n') {
740 		    if (t < check_at - prog->check_offset_min) {
741 			if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
742 			    /* Since we moved from the found position,
743 			       we definitely contradict the found anchored
744 			       substr.  Due to the above check we do not
745 			       contradict "check" substr.
746 			       Thus we can arrive here only if check substr
747 			       is float.  Redo checking for "other"=="fixed".
748 			     */
749 			    strpos = t + 1;
750 			    DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
751 				PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
752 			    goto do_other_anchored;
753 			}
754 			/* We don't contradict the found floating substring. */
755 			/* XXXX Why not check for STCLASS? */
756 			s = t + 1;
757 			DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
758 			    PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
759 			goto set_useful;
760 		    }
761 		    /* Position contradicts check-string */
762 		    /* XXXX probably better to look for check-string
763 		       than for "\n", so one should lower the limit for t? */
764 		    DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
765 			PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
766 		    other_last = strpos = s = t + 1;
767 		    goto restart;
768 		}
769 		t++;
770 	    }
771 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
772 			PL_colors[0],PL_colors[1]));
773 	    goto fail_finish;
774 	}
775 	else {
776 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
777 			PL_colors[0],PL_colors[1]));
778 	}
779 	s = t;
780       set_useful:
781 	++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);	/* hooray/5 */
782     }
783     else {
784 	/* The found string does not prohibit matching at strpos,
785 	   - no optimization of calling REx engine can be performed,
786 	   unless it was an MBOL and we are not after MBOL,
787 	   or a future STCLASS check will fail this. */
788       try_at_start:
789 	/* Even in this situation we may use MBOL flag if strpos is offset
790 	   wrt the start of the string. */
791 	if (ml_anch && sv && !SvROK(sv)	/* See prev comment on SvROK */
792 	    && (strpos != strbeg) && strpos[-1] != '\n'
793 	    /* May be due to an implicit anchor of m{.*foo}  */
794 	    && !(prog->reganch & ROPT_IMPLICIT))
795 	{
796 	    t = strpos;
797 	    goto find_anchor;
798 	}
799 	DEBUG_r( if (ml_anch)
800 	    PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
801 			(long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
802 	);
803       success_at_start:
804 	if (!(prog->reganch & ROPT_NAUGHTY)	/* XXXX If strpos moved? */
805 	    && (do_utf8 ? (
806 		prog->check_utf8		/* Could be deleted already */
807 		&& --BmUSEFUL(prog->check_utf8) < 0
808 		&& (prog->check_utf8 == prog->float_utf8)
809 	    ) : (
810 		prog->check_substr		/* Could be deleted already */
811 		&& --BmUSEFUL(prog->check_substr) < 0
812 		&& (prog->check_substr == prog->float_substr)
813 	    )))
814 	{
815 	    /* If flags & SOMETHING - do not do it many times on the same match */
816 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
817 	    SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
818 	    if (do_utf8 ? prog->check_substr : prog->check_utf8)
819 		SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
820 	    prog->check_substr = prog->check_utf8 = Nullsv;	/* disable */
821 	    prog->float_substr = prog->float_utf8 = Nullsv;	/* clear */
822 	    check = Nullsv;			/* abort */
823 	    s = strpos;
824 	    /* XXXX This is a remnant of the old implementation.  It
825 	            looks wasteful, since now INTUIT can use many
826 	            other heuristics. */
827 	    prog->reganch &= ~RE_USE_INTUIT;
828 	}
829 	else
830 	    s = strpos;
831     }
832 
833     /* Last resort... */
834     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
835     if (prog->regstclass) {
836 	/* minlen == 0 is possible if regstclass is \b or \B,
837 	   and the fixed substr is ''$.
838 	   Since minlen is already taken into account, s+1 is before strend;
839 	   accidentally, minlen >= 1 guaranties no false positives at s + 1
840 	   even for \b or \B.  But (minlen? 1 : 0) below assumes that
841 	   regstclass does not come from lookahead...  */
842 	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
843 	   This leaves EXACTF only, which is dealt with in find_byclass().  */
844 	U8* str = (U8*)STRING(prog->regstclass);
845 	int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
846 		    ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
847 		    : 1);
848 	char *endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
849 		? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
850 		: (prog->float_substr || prog->float_utf8
851 		   ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
852 			   cl_l, strend)
853 		   : strend);
854 	char *startpos = strbeg;
855 
856 	t = s;
857 	cache_re(prog);
858 	s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
859 	if (!s) {
860 #ifdef DEBUGGING
861 	    char *what = 0;
862 #endif
863 	    if (endpos == strend) {
864 		DEBUG_r( PerlIO_printf(Perl_debug_log,
865 				"Could not match STCLASS...\n") );
866 		goto fail;
867 	    }
868 	    DEBUG_r( PerlIO_printf(Perl_debug_log,
869 				   "This position contradicts STCLASS...\n") );
870 	    if ((prog->reganch & ROPT_ANCH) && !ml_anch)
871 		goto fail;
872 	    /* Contradict one of substrings */
873 	    if (prog->anchored_substr || prog->anchored_utf8) {
874 		if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
875 		    DEBUG_r( what = "anchored" );
876 		  hop_and_restart:
877 		    s = HOP3c(t, 1, strend);
878 		    if (s + start_shift + end_shift > strend) {
879 			/* XXXX Should be taken into account earlier? */
880 			DEBUG_r( PerlIO_printf(Perl_debug_log,
881 					       "Could not match STCLASS...\n") );
882 			goto fail;
883 		    }
884 		    if (!check)
885 			goto giveup;
886 		    DEBUG_r( PerlIO_printf(Perl_debug_log,
887 				"Looking for %s substr starting at offset %ld...\n",
888 				 what, (long)(s + start_shift - i_strpos)) );
889 		    goto restart;
890 		}
891 		/* Have both, check_string is floating */
892 		if (t + start_shift >= check_at) /* Contradicts floating=check */
893 		    goto retry_floating_check;
894 		/* Recheck anchored substring, but not floating... */
895 		s = check_at;
896 		if (!check)
897 		    goto giveup;
898 		DEBUG_r( PerlIO_printf(Perl_debug_log,
899 			  "Looking for anchored substr starting at offset %ld...\n",
900 			  (long)(other_last - i_strpos)) );
901 		goto do_other_anchored;
902 	    }
903 	    /* Another way we could have checked stclass at the
904                current position only: */
905 	    if (ml_anch) {
906 		s = t = t + 1;
907 		if (!check)
908 		    goto giveup;
909 		DEBUG_r( PerlIO_printf(Perl_debug_log,
910 			  "Looking for /%s^%s/m starting at offset %ld...\n",
911 			  PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
912 		goto try_at_offset;
913 	    }
914 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))	/* Could have been deleted */
915 		goto fail;
916 	    /* Check is floating subtring. */
917 	  retry_floating_check:
918 	    t = check_at - start_shift;
919 	    DEBUG_r( what = "floating" );
920 	    goto hop_and_restart;
921 	}
922 	if (t != s) {
923             DEBUG_r(PerlIO_printf(Perl_debug_log,
924 			"By STCLASS: moving %ld --> %ld\n",
925                                   (long)(t - i_strpos), (long)(s - i_strpos))
926                    );
927         }
928         else {
929             DEBUG_r(PerlIO_printf(Perl_debug_log,
930                                   "Does not contradict STCLASS...\n");
931                    );
932         }
933     }
934   giveup:
935     DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
936 			  PL_colors[4], (check ? "Guessed" : "Giving up"),
937 			  PL_colors[5], (long)(s - i_strpos)) );
938     return s;
939 
940   fail_finish:				/* Substring not found */
941     if (prog->check_substr || prog->check_utf8)		/* could be removed already */
942 	BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
943   fail:
944     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
945 			  PL_colors[4],PL_colors[5]));
946     return Nullch;
947 }
948 
949 /* We know what class REx starts with.  Try to find this position... */
950 STATIC char *
951 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
952 {
953 	I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
954 	char *m;
955 	STRLEN ln;
956 	STRLEN lnc;
957 	unsigned int c1;
958 	unsigned int c2;
959 	char *e;
960 	register I32 tmp = 1;	/* Scratch variable? */
961 	register bool do_utf8 = PL_reg_match_utf8;
962 
963 	/* We know what class it must start with. */
964 	switch (OP(c)) {
965 	case ANYOF:
966 	    if (do_utf8) {
967 		 while (s < strend) {
968 		      if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
969 			  !UTF8_IS_INVARIANT((U8)s[0]) ?
970 			  reginclass(c, (U8*)s, 0, do_utf8) :
971 			  REGINCLASS(c, (U8*)s)) {
972 			   if (tmp && (norun || regtry(prog, s)))
973 				goto got_it;
974 			   else
975 				tmp = doevery;
976 		      }
977 		      else
978 			   tmp = 1;
979 		      s += UTF8SKIP(s);
980 		 }
981 	    }
982 	    else {
983 		 while (s < strend) {
984 		      STRLEN skip = 1;
985 
986 		      if (REGINCLASS(c, (U8*)s) ||
987 			  (ANYOF_FOLD_SHARP_S(c, s, strend) &&
988 			   /* The assignment of 2 is intentional:
989 			    * for the folded sharp s, the skip is 2. */
990 			   (skip = SHARP_S_SKIP))) {
991 			   if (tmp && (norun || regtry(prog, s)))
992 				goto got_it;
993 			   else
994 				tmp = doevery;
995 		      }
996 		      else
997 			   tmp = 1;
998 		      s += skip;
999 		 }
1000 	    }
1001 	    break;
1002 	case CANY:
1003 	    while (s < strend) {
1004 	        if (tmp && (norun || regtry(prog, s)))
1005 		    goto got_it;
1006 		else
1007 		    tmp = doevery;
1008 		s++;
1009 	    }
1010 	    break;
1011 	case EXACTF:
1012 	    m   = STRING(c);
1013 	    ln  = STR_LEN(c);	/* length to match in octets/bytes */
1014 	    lnc = (I32) ln;	/* length to match in characters */
1015 	    if (UTF) {
1016 	        STRLEN ulen1, ulen2;
1017 		U8 *sm = (U8 *) m;
1018 		U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
1019 		U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
1020 
1021 		to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1022 		to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1023 
1024 		c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN_UCLC,
1025 				    0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1026 		c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN_UCLC,
1027 				    0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1028 		lnc = 0;
1029 		while (sm < ((U8 *) m + ln)) {
1030 		    lnc++;
1031 		    sm += UTF8SKIP(sm);
1032 		}
1033 	    }
1034 	    else {
1035 		c1 = *(U8*)m;
1036 		c2 = PL_fold[c1];
1037 	    }
1038 	    goto do_exactf;
1039 	case EXACTFL:
1040 	    m   = STRING(c);
1041 	    ln  = STR_LEN(c);
1042 	    lnc = (I32) ln;
1043 	    c1 = *(U8*)m;
1044 	    c2 = PL_fold_locale[c1];
1045 	  do_exactf:
1046 	    e = HOP3c(strend, -((I32)lnc), s);
1047 
1048 	    if (norun && e < s)
1049 		e = s;			/* Due to minlen logic of intuit() */
1050 
1051 	    /* The idea in the EXACTF* cases is to first find the
1052 	     * first character of the EXACTF* node and then, if
1053 	     * necessary, case-insensitively compare the full
1054 	     * text of the node.  The c1 and c2 are the first
1055 	     * characters (though in Unicode it gets a bit
1056 	     * more complicated because there are more cases
1057 	     * than just upper and lower: one needs to use
1058 	     * the so-called folding case for case-insensitive
1059 	     * matching (called "loose matching" in Unicode).
1060 	     * ibcmp_utf8() will do just that. */
1061 
1062 	    if (do_utf8) {
1063 	        UV c, f;
1064 	        U8 tmpbuf [UTF8_MAXLEN+1];
1065 		U8 foldbuf[UTF8_MAXLEN_FOLD+1];
1066 		STRLEN len, foldlen;
1067 
1068 		if (c1 == c2) {
1069 		    /* Upper and lower of 1st char are equal -
1070 		     * probably not a "letter". */
1071 		    while (s <= e) {
1072 		        c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
1073 					   ckWARN(WARN_UTF8) ?
1074 					   0 : UTF8_ALLOW_ANY);
1075 			if ( c == c1
1076 			     && (ln == len ||
1077 				 ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1078 					    m, (char **)0, ln, (bool)UTF))
1079 			     && (norun || regtry(prog, s)) )
1080 			    goto got_it;
1081 			else {
1082 			     uvchr_to_utf8(tmpbuf, c);
1083 			     f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1084 			     if ( f != c
1085 				  && (f == c1 || f == c2)
1086 				  && (ln == foldlen ||
1087 				      !ibcmp_utf8((char *) foldbuf,
1088 						  (char **)0, foldlen, do_utf8,
1089 						  m,
1090 						  (char **)0, ln, (bool)UTF))
1091 				  && (norun || regtry(prog, s)) )
1092 				  goto got_it;
1093 			}
1094 			s += len;
1095 		    }
1096 		}
1097 		else {
1098 		    while (s <= e) {
1099 		      c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
1100 					   ckWARN(WARN_UTF8) ?
1101 					   0 : UTF8_ALLOW_ANY);
1102 
1103 			/* Handle some of the three Greek sigmas cases.
1104 			 * Note that not all the possible combinations
1105 			 * are handled here: some of them are handled
1106 			 * by the standard folding rules, and some of
1107 			 * them (the character class or ANYOF cases)
1108 			 * are handled during compiletime in
1109 			 * regexec.c:S_regclass(). */
1110 			if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1111 			    c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1112 			    c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1113 
1114 			if ( (c == c1 || c == c2)
1115 			     && (ln == len ||
1116 				 ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1117 					    m, (char **)0, ln, (bool)UTF))
1118 			     && (norun || regtry(prog, s)) )
1119 			    goto got_it;
1120 			else {
1121 			     uvchr_to_utf8(tmpbuf, c);
1122 			     f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1123 			     if ( f != c
1124 				  && (f == c1 || f == c2)
1125 				  && (ln == foldlen ||
1126 				      !ibcmp_utf8((char *) foldbuf,
1127 						  (char **)0, foldlen, do_utf8,
1128 						  m,
1129 						  (char **)0, ln, (bool)UTF))
1130 				  && (norun || regtry(prog, s)) )
1131 				  goto got_it;
1132 			}
1133 			s += len;
1134 		    }
1135 		}
1136 	    }
1137 	    else {
1138 		if (c1 == c2)
1139 		    while (s <= e) {
1140 			if ( *(U8*)s == c1
1141 			     && (ln == 1 || !(OP(c) == EXACTF
1142 					      ? ibcmp(s, m, ln)
1143 					      : ibcmp_locale(s, m, ln)))
1144 			     && (norun || regtry(prog, s)) )
1145 			    goto got_it;
1146 			s++;
1147 		    }
1148 		else
1149 		    while (s <= e) {
1150 			if ( (*(U8*)s == c1 || *(U8*)s == c2)
1151 			     && (ln == 1 || !(OP(c) == EXACTF
1152 					      ? ibcmp(s, m, ln)
1153 					      : ibcmp_locale(s, m, ln)))
1154 			     && (norun || regtry(prog, s)) )
1155 			    goto got_it;
1156 			s++;
1157 		    }
1158 	    }
1159 	    break;
1160 	case BOUNDL:
1161 	    PL_reg_flags |= RF_tainted;
1162 	    /* FALL THROUGH */
1163 	case BOUND:
1164 	    if (do_utf8) {
1165 		if (s == PL_bostr)
1166 		    tmp = '\n';
1167 		else {
1168 		    U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1169 
1170 		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1171 		}
1172 		tmp = ((OP(c) == BOUND ?
1173 			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1174 		LOAD_UTF8_CHARCLASS(alnum,"a");
1175 		while (s < strend) {
1176 		    if (tmp == !(OP(c) == BOUND ?
1177 				 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1178 				 isALNUM_LC_utf8((U8*)s)))
1179 		    {
1180 			tmp = !tmp;
1181 			if ((norun || regtry(prog, s)))
1182 			    goto got_it;
1183 		    }
1184 		    s += UTF8SKIP(s);
1185 		}
1186 	    }
1187 	    else {
1188 		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1189 		tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1190 		while (s < strend) {
1191 		    if (tmp ==
1192 			!(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1193 			tmp = !tmp;
1194 			if ((norun || regtry(prog, s)))
1195 			    goto got_it;
1196 		    }
1197 		    s++;
1198 		}
1199 	    }
1200 	    if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
1201 		goto got_it;
1202 	    break;
1203 	case NBOUNDL:
1204 	    PL_reg_flags |= RF_tainted;
1205 	    /* FALL THROUGH */
1206 	case NBOUND:
1207 	    if (do_utf8) {
1208 		if (s == PL_bostr)
1209 		    tmp = '\n';
1210 		else {
1211 		    U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1212 
1213 		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1214 		}
1215 		tmp = ((OP(c) == NBOUND ?
1216 			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1217 		LOAD_UTF8_CHARCLASS(alnum,"a");
1218 		while (s < strend) {
1219 		    if (tmp == !(OP(c) == NBOUND ?
1220 				 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1221 				 isALNUM_LC_utf8((U8*)s)))
1222 			tmp = !tmp;
1223 		    else if ((norun || regtry(prog, s)))
1224 			goto got_it;
1225 		    s += UTF8SKIP(s);
1226 		}
1227 	    }
1228 	    else {
1229 		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1230 		tmp = ((OP(c) == NBOUND ?
1231 			isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1232 		while (s < strend) {
1233 		    if (tmp ==
1234 			!(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1235 			tmp = !tmp;
1236 		    else if ((norun || regtry(prog, s)))
1237 			goto got_it;
1238 		    s++;
1239 		}
1240 	    }
1241 	    if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1242 		goto got_it;
1243 	    break;
1244 	case ALNUM:
1245 	    if (do_utf8) {
1246 		LOAD_UTF8_CHARCLASS(alnum,"a");
1247 		while (s < strend) {
1248 		    if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1249 			if (tmp && (norun || regtry(prog, s)))
1250 			    goto got_it;
1251 			else
1252 			    tmp = doevery;
1253 		    }
1254 		    else
1255 			tmp = 1;
1256 		    s += UTF8SKIP(s);
1257 		}
1258 	    }
1259 	    else {
1260 		while (s < strend) {
1261 		    if (isALNUM(*s)) {
1262 			if (tmp && (norun || regtry(prog, s)))
1263 			    goto got_it;
1264 			else
1265 			    tmp = doevery;
1266 		    }
1267 		    else
1268 			tmp = 1;
1269 		    s++;
1270 		}
1271 	    }
1272 	    break;
1273 	case ALNUML:
1274 	    PL_reg_flags |= RF_tainted;
1275 	    if (do_utf8) {
1276 		while (s < strend) {
1277 		    if (isALNUM_LC_utf8((U8*)s)) {
1278 			if (tmp && (norun || regtry(prog, s)))
1279 			    goto got_it;
1280 			else
1281 			    tmp = doevery;
1282 		    }
1283 		    else
1284 			tmp = 1;
1285 		    s += UTF8SKIP(s);
1286 		}
1287 	    }
1288 	    else {
1289 		while (s < strend) {
1290 		    if (isALNUM_LC(*s)) {
1291 			if (tmp && (norun || regtry(prog, s)))
1292 			    goto got_it;
1293 			else
1294 			    tmp = doevery;
1295 		    }
1296 		    else
1297 			tmp = 1;
1298 		    s++;
1299 		}
1300 	    }
1301 	    break;
1302 	case NALNUM:
1303 	    if (do_utf8) {
1304 		LOAD_UTF8_CHARCLASS(alnum,"a");
1305 		while (s < strend) {
1306 		    if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1307 			if (tmp && (norun || regtry(prog, s)))
1308 			    goto got_it;
1309 			else
1310 			    tmp = doevery;
1311 		    }
1312 		    else
1313 			tmp = 1;
1314 		    s += UTF8SKIP(s);
1315 		}
1316 	    }
1317 	    else {
1318 		while (s < strend) {
1319 		    if (!isALNUM(*s)) {
1320 			if (tmp && (norun || regtry(prog, s)))
1321 			    goto got_it;
1322 			else
1323 			    tmp = doevery;
1324 		    }
1325 		    else
1326 			tmp = 1;
1327 		    s++;
1328 		}
1329 	    }
1330 	    break;
1331 	case NALNUML:
1332 	    PL_reg_flags |= RF_tainted;
1333 	    if (do_utf8) {
1334 		while (s < strend) {
1335 		    if (!isALNUM_LC_utf8((U8*)s)) {
1336 			if (tmp && (norun || regtry(prog, s)))
1337 			    goto got_it;
1338 			else
1339 			    tmp = doevery;
1340 		    }
1341 		    else
1342 			tmp = 1;
1343 		    s += UTF8SKIP(s);
1344 		}
1345 	    }
1346 	    else {
1347 		while (s < strend) {
1348 		    if (!isALNUM_LC(*s)) {
1349 			if (tmp && (norun || regtry(prog, s)))
1350 			    goto got_it;
1351 			else
1352 			    tmp = doevery;
1353 		    }
1354 		    else
1355 			tmp = 1;
1356 		    s++;
1357 		}
1358 	    }
1359 	    break;
1360 	case SPACE:
1361 	    if (do_utf8) {
1362 		LOAD_UTF8_CHARCLASS(space," ");
1363 		while (s < strend) {
1364 		    if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
1365 			if (tmp && (norun || regtry(prog, s)))
1366 			    goto got_it;
1367 			else
1368 			    tmp = doevery;
1369 		    }
1370 		    else
1371 			tmp = 1;
1372 		    s += UTF8SKIP(s);
1373 		}
1374 	    }
1375 	    else {
1376 		while (s < strend) {
1377 		    if (isSPACE(*s)) {
1378 			if (tmp && (norun || regtry(prog, s)))
1379 			    goto got_it;
1380 			else
1381 			    tmp = doevery;
1382 		    }
1383 		    else
1384 			tmp = 1;
1385 		    s++;
1386 		}
1387 	    }
1388 	    break;
1389 	case SPACEL:
1390 	    PL_reg_flags |= RF_tainted;
1391 	    if (do_utf8) {
1392 		while (s < strend) {
1393 		    if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1394 			if (tmp && (norun || regtry(prog, s)))
1395 			    goto got_it;
1396 			else
1397 			    tmp = doevery;
1398 		    }
1399 		    else
1400 			tmp = 1;
1401 		    s += UTF8SKIP(s);
1402 		}
1403 	    }
1404 	    else {
1405 		while (s < strend) {
1406 		    if (isSPACE_LC(*s)) {
1407 			if (tmp && (norun || regtry(prog, s)))
1408 			    goto got_it;
1409 			else
1410 			    tmp = doevery;
1411 		    }
1412 		    else
1413 			tmp = 1;
1414 		    s++;
1415 		}
1416 	    }
1417 	    break;
1418 	case NSPACE:
1419 	    if (do_utf8) {
1420 		LOAD_UTF8_CHARCLASS(space," ");
1421 		while (s < strend) {
1422 		    if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
1423 			if (tmp && (norun || regtry(prog, s)))
1424 			    goto got_it;
1425 			else
1426 			    tmp = doevery;
1427 		    }
1428 		    else
1429 			tmp = 1;
1430 		    s += UTF8SKIP(s);
1431 		}
1432 	    }
1433 	    else {
1434 		while (s < strend) {
1435 		    if (!isSPACE(*s)) {
1436 			if (tmp && (norun || regtry(prog, s)))
1437 			    goto got_it;
1438 			else
1439 			    tmp = doevery;
1440 		    }
1441 		    else
1442 			tmp = 1;
1443 		    s++;
1444 		}
1445 	    }
1446 	    break;
1447 	case NSPACEL:
1448 	    PL_reg_flags |= RF_tainted;
1449 	    if (do_utf8) {
1450 		while (s < strend) {
1451 		    if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1452 			if (tmp && (norun || regtry(prog, s)))
1453 			    goto got_it;
1454 			else
1455 			    tmp = doevery;
1456 		    }
1457 		    else
1458 			tmp = 1;
1459 		    s += UTF8SKIP(s);
1460 		}
1461 	    }
1462 	    else {
1463 		while (s < strend) {
1464 		    if (!isSPACE_LC(*s)) {
1465 			if (tmp && (norun || regtry(prog, s)))
1466 			    goto got_it;
1467 			else
1468 			    tmp = doevery;
1469 		    }
1470 		    else
1471 			tmp = 1;
1472 		    s++;
1473 		}
1474 	    }
1475 	    break;
1476 	case DIGIT:
1477 	    if (do_utf8) {
1478 		LOAD_UTF8_CHARCLASS(digit,"0");
1479 		while (s < strend) {
1480 		    if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1481 			if (tmp && (norun || regtry(prog, s)))
1482 			    goto got_it;
1483 			else
1484 			    tmp = doevery;
1485 		    }
1486 		    else
1487 			tmp = 1;
1488 		    s += UTF8SKIP(s);
1489 		}
1490 	    }
1491 	    else {
1492 		while (s < strend) {
1493 		    if (isDIGIT(*s)) {
1494 			if (tmp && (norun || regtry(prog, s)))
1495 			    goto got_it;
1496 			else
1497 			    tmp = doevery;
1498 		    }
1499 		    else
1500 			tmp = 1;
1501 		    s++;
1502 		}
1503 	    }
1504 	    break;
1505 	case DIGITL:
1506 	    PL_reg_flags |= RF_tainted;
1507 	    if (do_utf8) {
1508 		while (s < strend) {
1509 		    if (isDIGIT_LC_utf8((U8*)s)) {
1510 			if (tmp && (norun || regtry(prog, s)))
1511 			    goto got_it;
1512 			else
1513 			    tmp = doevery;
1514 		    }
1515 		    else
1516 			tmp = 1;
1517 		    s += UTF8SKIP(s);
1518 		}
1519 	    }
1520 	    else {
1521 		while (s < strend) {
1522 		    if (isDIGIT_LC(*s)) {
1523 			if (tmp && (norun || regtry(prog, s)))
1524 			    goto got_it;
1525 			else
1526 			    tmp = doevery;
1527 		    }
1528 		    else
1529 			tmp = 1;
1530 		    s++;
1531 		}
1532 	    }
1533 	    break;
1534 	case NDIGIT:
1535 	    if (do_utf8) {
1536 		LOAD_UTF8_CHARCLASS(digit,"0");
1537 		while (s < strend) {
1538 		    if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1539 			if (tmp && (norun || regtry(prog, s)))
1540 			    goto got_it;
1541 			else
1542 			    tmp = doevery;
1543 		    }
1544 		    else
1545 			tmp = 1;
1546 		    s += UTF8SKIP(s);
1547 		}
1548 	    }
1549 	    else {
1550 		while (s < strend) {
1551 		    if (!isDIGIT(*s)) {
1552 			if (tmp && (norun || regtry(prog, s)))
1553 			    goto got_it;
1554 			else
1555 			    tmp = doevery;
1556 		    }
1557 		    else
1558 			tmp = 1;
1559 		    s++;
1560 		}
1561 	    }
1562 	    break;
1563 	case NDIGITL:
1564 	    PL_reg_flags |= RF_tainted;
1565 	    if (do_utf8) {
1566 		while (s < strend) {
1567 		    if (!isDIGIT_LC_utf8((U8*)s)) {
1568 			if (tmp && (norun || regtry(prog, s)))
1569 			    goto got_it;
1570 			else
1571 			    tmp = doevery;
1572 		    }
1573 		    else
1574 			tmp = 1;
1575 		    s += UTF8SKIP(s);
1576 		}
1577 	    }
1578 	    else {
1579 		while (s < strend) {
1580 		    if (!isDIGIT_LC(*s)) {
1581 			if (tmp && (norun || regtry(prog, s)))
1582 			    goto got_it;
1583 			else
1584 			    tmp = doevery;
1585 		    }
1586 		    else
1587 			tmp = 1;
1588 		    s++;
1589 		}
1590 	    }
1591 	    break;
1592 	default:
1593 	    Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1594 	    break;
1595 	}
1596 	return 0;
1597       got_it:
1598 	return s;
1599 }
1600 
1601 /*
1602  - regexec_flags - match a regexp against a string
1603  */
1604 I32
1605 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1606 	      char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1607 /* strend: pointer to null at end of string */
1608 /* strbeg: real beginning of string */
1609 /* minend: end of match must be >=minend after stringarg. */
1610 /* data: May be used for some additional optimizations. */
1611 /* nosave: For optimizations. */
1612 {
1613     register char *s;
1614     register regnode *c;
1615     register char *startpos = stringarg;
1616     I32 minlen;		/* must match at least this many chars */
1617     I32 dontbother = 0;	/* how many characters not to try at end */
1618     /* I32 start_shift = 0; */		/* Offset of the start to find
1619 					 constant substr. */		/* CC */
1620     I32 end_shift = 0;			/* Same for the end. */		/* CC */
1621     I32 scream_pos = -1;		/* Internal iterator of scream. */
1622     char *scream_olds;
1623     SV* oreplsv = GvSV(PL_replgv);
1624     bool do_utf8 = DO_UTF8(sv);
1625 #ifdef DEBUGGING
1626     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1627     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
1628 #endif
1629     RX_MATCH_UTF8_set(prog,do_utf8);
1630 
1631     PL_regcc = 0;
1632 
1633     cache_re(prog);
1634 #ifdef DEBUGGING
1635     PL_regnarrate = DEBUG_r_TEST;
1636 #endif
1637 
1638     /* Be paranoid... */
1639     if (prog == NULL || startpos == NULL) {
1640 	Perl_croak(aTHX_ "NULL regexp parameter");
1641 	return 0;
1642     }
1643 
1644     minlen = prog->minlen;
1645     if (strend - startpos < minlen) {
1646         DEBUG_r(PerlIO_printf(Perl_debug_log,
1647 			      "String too short [regexec_flags]...\n"));
1648 	goto phooey;
1649     }
1650 
1651     /* Check validity of program. */
1652     if (UCHARAT(prog->program) != REG_MAGIC) {
1653 	Perl_croak(aTHX_ "corrupted regexp program");
1654     }
1655 
1656     PL_reg_flags = 0;
1657     PL_reg_eval_set = 0;
1658     PL_reg_maxiter = 0;
1659 
1660     if (prog->reganch & ROPT_UTF8)
1661 	PL_reg_flags |= RF_utf8;
1662 
1663     /* Mark beginning of line for ^ and lookbehind. */
1664     PL_regbol = startpos;
1665     PL_bostr  = strbeg;
1666     PL_reg_sv = sv;
1667 
1668     /* Mark end of line for $ (and such) */
1669     PL_regeol = strend;
1670 
1671     /* see how far we have to get to not match where we matched before */
1672     PL_regtill = startpos+minend;
1673 
1674     /* We start without call_cc context.  */
1675     PL_reg_call_cc = 0;
1676 
1677     /* If there is a "must appear" string, look for it. */
1678     s = startpos;
1679 
1680     if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1681 	MAGIC *mg;
1682 
1683 	if (flags & REXEC_IGNOREPOS)	/* Means: check only at start */
1684 	    PL_reg_ganch = startpos;
1685 	else if (sv && SvTYPE(sv) >= SVt_PVMG
1686 		  && SvMAGIC(sv)
1687 		  && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1688 		  && mg->mg_len >= 0) {
1689 	    PL_reg_ganch = strbeg + mg->mg_len;	/* Defined pos() */
1690 	    if (prog->reganch & ROPT_ANCH_GPOS) {
1691 	        if (s > PL_reg_ganch)
1692 		    goto phooey;
1693 		s = PL_reg_ganch;
1694 	    }
1695 	}
1696 	else				/* pos() not defined */
1697 	    PL_reg_ganch = strbeg;
1698     }
1699 
1700     if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
1701 	re_scream_pos_data d;
1702 
1703 	d.scream_olds = &scream_olds;
1704 	d.scream_pos = &scream_pos;
1705 	s = re_intuit_start(prog, sv, s, strend, flags, &d);
1706 	if (!s) {
1707 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1708 	    goto phooey;	/* not present */
1709 	}
1710     }
1711 
1712     DEBUG_r({
1713 	 char *s0   = UTF ?
1714 	   pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1715 			  UNI_DISPLAY_REGEX) :
1716 	   prog->precomp;
1717 	 int   len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1718 	 char *s1   = do_utf8 ? sv_uni_display(dsv1, sv, 60,
1719 					       UNI_DISPLAY_REGEX) : startpos;
1720 	 int   len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
1721 	 if (!PL_colorset)
1722 	     reginitcolors();
1723 	 PerlIO_printf(Perl_debug_log,
1724 		       "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
1725 		       PL_colors[4],PL_colors[5],PL_colors[0],
1726 		       len0, len0, s0,
1727 		       PL_colors[1],
1728 		       len0 > 60 ? "..." : "",
1729 		       PL_colors[0],
1730 		       (int)(len1 > 60 ? 60 : len1),
1731 		       s1, PL_colors[1],
1732 		       (len1 > 60 ? "..." : "")
1733 	      );
1734     });
1735 
1736     /* Simplest case:  anchored match need be tried only once. */
1737     /*  [unless only anchor is BOL and multiline is set] */
1738     if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1739 	if (s == startpos && regtry(prog, startpos))
1740 	    goto got_it;
1741 	else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1742 		 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1743 	{
1744 	    char *end;
1745 
1746 	    if (minlen)
1747 		dontbother = minlen - 1;
1748 	    end = HOP3c(strend, -dontbother, strbeg) - 1;
1749 	    /* for multiline we only have to try after newlines */
1750 	    if (prog->check_substr || prog->check_utf8) {
1751 		if (s == startpos)
1752 		    goto after_try;
1753 		while (1) {
1754 		    if (regtry(prog, s))
1755 			goto got_it;
1756 		  after_try:
1757 		    if (s >= end)
1758 			goto phooey;
1759 		    if (prog->reganch & RE_USE_INTUIT) {
1760 			s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1761 			if (!s)
1762 			    goto phooey;
1763 		    }
1764 		    else
1765 			s++;
1766 		}
1767 	    } else {
1768 		if (s > startpos)
1769 		    s--;
1770 		while (s < end) {
1771 		    if (*s++ == '\n') {	/* don't need PL_utf8skip here */
1772 			if (regtry(prog, s))
1773 			    goto got_it;
1774 		    }
1775 		}
1776 	    }
1777 	}
1778 	goto phooey;
1779     } else if (prog->reganch & ROPT_ANCH_GPOS) {
1780 	if (regtry(prog, PL_reg_ganch))
1781 	    goto got_it;
1782 	goto phooey;
1783     }
1784 
1785     /* Messy cases:  unanchored match. */
1786     if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
1787 	/* we have /x+whatever/ */
1788 	/* it must be a one character string (XXXX Except UTF?) */
1789 	char ch;
1790 #ifdef DEBUGGING
1791 	int did_match = 0;
1792 #endif
1793 	if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1794 	    do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1795 	ch = SvPVX(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1796 
1797 	if (do_utf8) {
1798 	    while (s < strend) {
1799 		if (*s == ch) {
1800 		    DEBUG_r( did_match = 1 );
1801 		    if (regtry(prog, s)) goto got_it;
1802 		    s += UTF8SKIP(s);
1803 		    while (s < strend && *s == ch)
1804 			s += UTF8SKIP(s);
1805 		}
1806 		s += UTF8SKIP(s);
1807 	    }
1808 	}
1809 	else {
1810 	    while (s < strend) {
1811 		if (*s == ch) {
1812 		    DEBUG_r( did_match = 1 );
1813 		    if (regtry(prog, s)) goto got_it;
1814 		    s++;
1815 		    while (s < strend && *s == ch)
1816 			s++;
1817 		}
1818 		s++;
1819 	    }
1820 	}
1821 	DEBUG_r(if (!did_match)
1822 		PerlIO_printf(Perl_debug_log,
1823                                   "Did not find anchored character...\n")
1824                );
1825     }
1826     /*SUPPRESS 560*/
1827     else if (prog->anchored_substr != Nullsv
1828 	      || prog->anchored_utf8 != Nullsv
1829 	      || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
1830 		  && prog->float_max_offset < strend - s)) {
1831 	SV *must;
1832 	I32 back_max;
1833 	I32 back_min;
1834 	char *last;
1835 	char *last1;		/* Last position checked before */
1836 #ifdef DEBUGGING
1837 	int did_match = 0;
1838 #endif
1839 	if (prog->anchored_substr || prog->anchored_utf8) {
1840 	    if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1841 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1842 	    must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1843 	    back_max = back_min = prog->anchored_offset;
1844 	} else {
1845 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1846 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1847 	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1848 	    back_max = prog->float_max_offset;
1849 	    back_min = prog->float_min_offset;
1850 	}
1851 	if (must == &PL_sv_undef)
1852 	    /* could not downgrade utf8 check substring, so must fail */
1853 	    goto phooey;
1854 
1855 	last = HOP3c(strend,	/* Cannot start after this */
1856 			  -(I32)(CHR_SVLEN(must)
1857 				 - (SvTAIL(must) != 0) + back_min), strbeg);
1858 
1859 	if (s > PL_bostr)
1860 	    last1 = HOPc(s, -1);
1861 	else
1862 	    last1 = s - 1;	/* bogus */
1863 
1864 	/* XXXX check_substr already used to find `s', can optimize if
1865 	   check_substr==must. */
1866 	scream_pos = -1;
1867 	dontbother = end_shift;
1868 	strend = HOPc(strend, -dontbother);
1869 	while ( (s <= last) &&
1870 		((flags & REXEC_SCREAM)
1871 		 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
1872 				    end_shift, &scream_pos, 0))
1873 		 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
1874 				  (unsigned char*)strend, must,
1875 				  PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1876 	    /* we may be pointing at the wrong string */
1877 	    if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1878 		s = strbeg + (s - SvPVX(sv));
1879 	    DEBUG_r( did_match = 1 );
1880 	    if (HOPc(s, -back_max) > last1) {
1881 		last1 = HOPc(s, -back_min);
1882 		s = HOPc(s, -back_max);
1883 	    }
1884 	    else {
1885 		char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1886 
1887 		last1 = HOPc(s, -back_min);
1888 		s = t;
1889 	    }
1890 	    if (do_utf8) {
1891 		while (s <= last1) {
1892 		    if (regtry(prog, s))
1893 			goto got_it;
1894 		    s += UTF8SKIP(s);
1895 		}
1896 	    }
1897 	    else {
1898 		while (s <= last1) {
1899 		    if (regtry(prog, s))
1900 			goto got_it;
1901 		    s++;
1902 		}
1903 	    }
1904 	}
1905 	DEBUG_r(if (!did_match)
1906                     PerlIO_printf(Perl_debug_log,
1907                                   "Did not find %s substr `%s%.*s%s'%s...\n",
1908 			      ((must == prog->anchored_substr || must == prog->anchored_utf8)
1909 			       ? "anchored" : "floating"),
1910 			      PL_colors[0],
1911 			      (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1912 			      SvPVX(must),
1913                                   PL_colors[1], (SvTAIL(must) ? "$" : ""))
1914                );
1915 	goto phooey;
1916     }
1917     else if ((c = prog->regstclass)) {
1918 	if (minlen) {
1919 	    I32 op = (U8)OP(prog->regstclass);
1920 	    /* don't bother with what can't match */
1921 	    if (PL_regkind[op] != EXACT && op != CANY)
1922 	        strend = HOPc(strend, -(minlen - 1));
1923 	}
1924 	DEBUG_r({
1925 	    SV *prop = sv_newmortal();
1926 	    char *s0;
1927 	    char *s1;
1928 	    int len0;
1929 	    int len1;
1930 
1931 	    regprop(prop, c);
1932 	    s0 = UTF ?
1933 	      pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
1934 			     UNI_DISPLAY_REGEX) :
1935 	      SvPVX(prop);
1936 	    len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1937 	    s1 = UTF ?
1938 	      sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
1939 	    len1 = UTF ? SvCUR(dsv1) : strend - s;
1940 	    PerlIO_printf(Perl_debug_log,
1941 			  "Matching stclass `%*.*s' against `%*.*s'\n",
1942 			  len0, len0, s0,
1943 			  len1, len1, s1);
1944 	});
1945   	if (find_byclass(prog, c, s, strend, startpos, 0))
1946 	    goto got_it;
1947 	DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1948     }
1949     else {
1950 	dontbother = 0;
1951 	if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
1952 	    /* Trim the end. */
1953 	    char *last;
1954 	    SV* float_real;
1955 
1956 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1957 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1958 	    float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
1959 
1960 	    if (flags & REXEC_SCREAM) {
1961 		last = screaminstr(sv, float_real, s - strbeg,
1962 				   end_shift, &scream_pos, 1); /* last one */
1963 		if (!last)
1964 		    last = scream_olds; /* Only one occurrence. */
1965 		/* we may be pointing at the wrong string */
1966 		else if (RX_MATCH_COPIED(prog))
1967 		    s = strbeg + (s - SvPVX(sv));
1968 	    }
1969 	    else {
1970 		STRLEN len;
1971 		char *little = SvPV(float_real, len);
1972 
1973 		if (SvTAIL(float_real)) {
1974 		    if (memEQ(strend - len + 1, little, len - 1))
1975 			last = strend - len + 1;
1976 		    else if (!PL_multiline)
1977 			last = memEQ(strend - len, little, len)
1978 			    ? strend - len : Nullch;
1979 		    else
1980 			goto find_last;
1981 		} else {
1982 		  find_last:
1983 		    if (len)
1984 			last = rninstr(s, strend, little, little + len);
1985 		    else
1986 			last = strend;	/* matching `$' */
1987 		}
1988 	    }
1989 	    if (last == NULL) {
1990 		DEBUG_r(PerlIO_printf(Perl_debug_log,
1991 				      "%sCan't trim the tail, match fails (should not happen)%s\n",
1992 				      PL_colors[4],PL_colors[5]));
1993 		goto phooey; /* Should not happen! */
1994 	    }
1995 	    dontbother = strend - last + prog->float_min_offset;
1996 	}
1997 	if (minlen && (dontbother < minlen))
1998 	    dontbother = minlen - 1;
1999 	strend -= dontbother; 		   /* this one's always in bytes! */
2000 	/* We don't know much -- general case. */
2001 	if (do_utf8) {
2002 	    for (;;) {
2003 		if (regtry(prog, s))
2004 		    goto got_it;
2005 		if (s >= strend)
2006 		    break;
2007 		s += UTF8SKIP(s);
2008 	    };
2009 	}
2010 	else {
2011 	    do {
2012 		if (regtry(prog, s))
2013 		    goto got_it;
2014 	    } while (s++ < strend);
2015 	}
2016     }
2017 
2018     /* Failure. */
2019     goto phooey;
2020 
2021 got_it:
2022     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2023 
2024     if (PL_reg_eval_set) {
2025 	/* Preserve the current value of $^R */
2026 	if (oreplsv != GvSV(PL_replgv))
2027 	    sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2028 						  restored, the value remains
2029 						  the same. */
2030 	restore_pos(aTHX_ 0);
2031     }
2032 
2033     /* make sure $`, $&, $', and $digit will work later */
2034     if ( !(flags & REXEC_NOT_FIRST) ) {
2035 	if (RX_MATCH_COPIED(prog)) {
2036 	    Safefree(prog->subbeg);
2037 	    RX_MATCH_COPIED_off(prog);
2038 	}
2039 	if (flags & REXEC_COPY_STR) {
2040 	    I32 i = PL_regeol - startpos + (stringarg - strbeg);
2041 
2042 	    s = savepvn(strbeg, i);
2043 	    prog->subbeg = s;
2044 	    prog->sublen = i;
2045 	    RX_MATCH_COPIED_on(prog);
2046 	}
2047 	else {
2048 	    prog->subbeg = strbeg;
2049 	    prog->sublen = PL_regeol - strbeg;	/* strend may have been modified */
2050 	}
2051     }
2052 
2053     return 1;
2054 
2055 phooey:
2056     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2057 			  PL_colors[4],PL_colors[5]));
2058     if (PL_reg_eval_set)
2059 	restore_pos(aTHX_ 0);
2060     return 0;
2061 }
2062 
2063 /*
2064  - regtry - try match at specific point
2065  */
2066 STATIC I32			/* 0 failure, 1 success */
2067 S_regtry(pTHX_ regexp *prog, char *startpos)
2068 {
2069     register I32 i;
2070     register I32 *sp;
2071     register I32 *ep;
2072     CHECKPOINT lastcp;
2073 
2074 #ifdef DEBUGGING
2075     PL_regindent = 0;	/* XXXX Not good when matches are reenterable... */
2076 #endif
2077     if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2078 	MAGIC *mg;
2079 
2080 	PL_reg_eval_set = RS_init;
2081 	DEBUG_r(DEBUG_s(
2082 	    PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2083 			  (IV)(PL_stack_sp - PL_stack_base));
2084 	    ));
2085 	SAVEI32(cxstack[cxstack_ix].blk_oldsp);
2086 	cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2087 	/* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2088 	SAVETMPS;
2089 	/* Apparently this is not needed, judging by wantarray. */
2090 	/* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2091 	   cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2092 
2093 	if (PL_reg_sv) {
2094 	    /* Make $_ available to executed code. */
2095 	    if (PL_reg_sv != DEFSV) {
2096 		/* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
2097 		SAVESPTR(DEFSV);
2098 		DEFSV = PL_reg_sv;
2099 	    }
2100 
2101 	    if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
2102 		  && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
2103 		/* prepare for quick setting of pos */
2104 		sv_magic(PL_reg_sv, (SV*)0,
2105 			PERL_MAGIC_regex_global, Nullch, 0);
2106 		mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
2107 		mg->mg_len = -1;
2108 	    }
2109 	    PL_reg_magic    = mg;
2110 	    PL_reg_oldpos   = mg->mg_len;
2111 	    SAVEDESTRUCTOR_X(restore_pos, 0);
2112         }
2113         if (!PL_reg_curpm) {
2114 	    Newz(22,PL_reg_curpm, 1, PMOP);
2115 #ifdef USE_ITHREADS
2116             {
2117                 SV* repointer = newSViv(0);
2118                 /* so we know which PL_regex_padav element is PL_reg_curpm */
2119                 SvFLAGS(repointer) |= SVf_BREAK;
2120                 av_push(PL_regex_padav,repointer);
2121                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2122                 PL_regex_pad = AvARRAY(PL_regex_padav);
2123             }
2124 #endif
2125         }
2126 	PM_SETRE(PL_reg_curpm, prog);
2127 	PL_reg_oldcurpm = PL_curpm;
2128 	PL_curpm = PL_reg_curpm;
2129 	if (RX_MATCH_COPIED(prog)) {
2130 	    /*  Here is a serious problem: we cannot rewrite subbeg,
2131 		since it may be needed if this match fails.  Thus
2132 		$` inside (?{}) could fail... */
2133 	    PL_reg_oldsaved = prog->subbeg;
2134 	    PL_reg_oldsavedlen = prog->sublen;
2135 	    RX_MATCH_COPIED_off(prog);
2136 	}
2137 	else
2138 	    PL_reg_oldsaved = Nullch;
2139 	prog->subbeg = PL_bostr;
2140 	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2141     }
2142     prog->startp[0] = startpos - PL_bostr;
2143     PL_reginput = startpos;
2144     PL_regstartp = prog->startp;
2145     PL_regendp = prog->endp;
2146     PL_reglastparen = &prog->lastparen;
2147     PL_reglastcloseparen = &prog->lastcloseparen;
2148     prog->lastparen = 0;
2149     prog->lastcloseparen = 0;
2150     PL_regsize = 0;
2151     DEBUG_r(PL_reg_starttry = startpos);
2152     if (PL_reg_start_tmpl <= prog->nparens) {
2153 	PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2154         if(PL_reg_start_tmp)
2155             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2156         else
2157             New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2158     }
2159 
2160     /* XXXX What this code is doing here?!!!  There should be no need
2161        to do this again and again, PL_reglastparen should take care of
2162        this!  --ilya*/
2163 
2164     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2165      * Actually, the code in regcppop() (which Ilya may be meaning by
2166      * PL_reglastparen), is not needed at all by the test suite
2167      * (op/regexp, op/pat, op/split), but that code is needed, oddly
2168      * enough, for building DynaLoader, or otherwise this
2169      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2170      * will happen.  Meanwhile, this code *is* needed for the
2171      * above-mentioned test suite tests to succeed.  The common theme
2172      * on those tests seems to be returning null fields from matches.
2173      * --jhi */
2174 #if 1
2175     sp = prog->startp;
2176     ep = prog->endp;
2177     if (prog->nparens) {
2178 	for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2179 	    *++sp = -1;
2180 	    *++ep = -1;
2181 	}
2182     }
2183 #endif
2184     REGCP_SET(lastcp);
2185     if (regmatch(prog->program + 1)) {
2186 	prog->endp[0] = PL_reginput - PL_bostr;
2187 	return 1;
2188     }
2189     REGCP_UNWIND(lastcp);
2190     return 0;
2191 }
2192 
2193 #define RE_UNWIND_BRANCH	1
2194 #define RE_UNWIND_BRANCHJ	2
2195 
2196 union re_unwind_t;
2197 
2198 typedef struct {		/* XX: makes sense to enlarge it... */
2199     I32 type;
2200     I32 prev;
2201     CHECKPOINT lastcp;
2202 } re_unwind_generic_t;
2203 
2204 typedef struct {
2205     I32 type;
2206     I32 prev;
2207     CHECKPOINT lastcp;
2208     I32 lastparen;
2209     regnode *next;
2210     char *locinput;
2211     I32 nextchr;
2212 #ifdef DEBUGGING
2213     int regindent;
2214 #endif
2215 } re_unwind_branch_t;
2216 
2217 typedef union re_unwind_t {
2218     I32 type;
2219     re_unwind_generic_t generic;
2220     re_unwind_branch_t branch;
2221 } re_unwind_t;
2222 
2223 #define sayYES goto yes
2224 #define sayNO goto no
2225 #define sayNO_ANYOF goto no_anyof
2226 #define sayYES_FINAL goto yes_final
2227 #define sayYES_LOUD  goto yes_loud
2228 #define sayNO_FINAL  goto no_final
2229 #define sayNO_SILENT goto do_no
2230 #define saySAME(x) if (x) goto yes; else goto no
2231 
2232 #define REPORT_CODE_OFF 24
2233 
2234 /*
2235  - regmatch - main matching routine
2236  *
2237  * Conceptually the strategy is simple:  check to see whether the current
2238  * node matches, call self recursively to see whether the rest matches,
2239  * and then act accordingly.  In practice we make some effort to avoid
2240  * recursion, in particular by going through "ordinary" nodes (that don't
2241  * need to know whether the rest of the match failed) by a loop instead of
2242  * by recursion.
2243  */
2244 /* [lwall] I've hoisted the register declarations to the outer block in order to
2245  * maybe save a little bit of pushing and popping on the stack.  It also takes
2246  * advantage of machines that use a register save mask on subroutine entry.
2247  */
2248 STATIC I32			/* 0 failure, 1 success */
2249 S_regmatch(pTHX_ regnode *prog)
2250 {
2251     register regnode *scan;	/* Current node. */
2252     regnode *next;		/* Next node. */
2253     regnode *inner;		/* Next node in internal branch. */
2254     register I32 nextchr;	/* renamed nextchr - nextchar colides with
2255 				   function of same name */
2256     register I32 n;		/* no or next */
2257     register I32 ln = 0;	/* len or last */
2258     register char *s = Nullch;	/* operand or save */
2259     register char *locinput = PL_reginput;
2260     register I32 c1 = 0, c2 = 0, paren;	/* case fold search, parenth */
2261     int minmod = 0, sw = 0, logical = 0;
2262     I32 unwind = 0;
2263 #if 0
2264     I32 firstcp = PL_savestack_ix;
2265 #endif
2266     register bool do_utf8 = PL_reg_match_utf8;
2267 #ifdef DEBUGGING
2268     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2269     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2270     SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2271 #endif
2272 
2273 #ifdef DEBUGGING
2274     PL_regindent++;
2275 #endif
2276 
2277     /* Note that nextchr is a byte even in UTF */
2278     nextchr = UCHARAT(locinput);
2279     scan = prog;
2280     while (scan != NULL) {
2281 
2282         DEBUG_r( {
2283 	    SV *prop = sv_newmortal();
2284 	    int docolor = *PL_colors[0];
2285 	    int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2286 	    int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
2287 	    /* The part of the string before starttry has one color
2288 	       (pref0_len chars), between starttry and current
2289 	       position another one (pref_len - pref0_len chars),
2290 	       after the current position the third one.
2291 	       We assume that pref0_len <= pref_len, otherwise we
2292 	       decrease pref0_len.  */
2293 	    int pref_len = (locinput - PL_bostr) > (5 + taill) - l
2294 		? (5 + taill) - l : locinput - PL_bostr;
2295 	    int pref0_len;
2296 
2297 	    while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2298 		pref_len++;
2299 	    pref0_len = pref_len  - (locinput - PL_reg_starttry);
2300 	    if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2301 		l = ( PL_regeol - locinput > (5 + taill) - pref_len
2302 		      ? (5 + taill) - pref_len : PL_regeol - locinput);
2303 	    while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2304 		l--;
2305 	    if (pref0_len < 0)
2306 		pref0_len = 0;
2307 	    if (pref0_len > pref_len)
2308 		pref0_len = pref_len;
2309 	    regprop(prop, scan);
2310 	    {
2311 	      char *s0 =
2312 		do_utf8 && OP(scan) != CANY ?
2313 		pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2314 			       pref0_len, 60, UNI_DISPLAY_REGEX) :
2315 		locinput - pref_len;
2316 	      int len0 = do_utf8 ? strlen(s0) : pref0_len;
2317 	      char *s1 = do_utf8 && OP(scan) != CANY ?
2318 		pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2319 			       pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2320 		locinput - pref_len + pref0_len;
2321 	      int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2322 	      char *s2 = do_utf8 && OP(scan) != CANY ?
2323 		pv_uni_display(dsv2, (U8*)locinput,
2324 			       PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2325 		locinput;
2326 	      int len2 = do_utf8 ? strlen(s2) : l;
2327 	      PerlIO_printf(Perl_debug_log,
2328 			    "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2329 			    (IV)(locinput - PL_bostr),
2330 			    PL_colors[4],
2331 			    len0, s0,
2332 			    PL_colors[5],
2333 			    PL_colors[2],
2334 			    len1, s1,
2335 			    PL_colors[3],
2336 			    (docolor ? "" : "> <"),
2337 			    PL_colors[0],
2338 			    len2, s2,
2339 			    PL_colors[1],
2340 			    15 - l - pref_len + 1,
2341 			    "",
2342 			    (IV)(scan - PL_regprogram), PL_regindent*2, "",
2343 			    SvPVX(prop));
2344 	    }
2345 	});
2346 
2347 	next = scan + NEXT_OFF(scan);
2348 	if (next == scan)
2349 	    next = NULL;
2350 
2351 	switch (OP(scan)) {
2352 	case BOL:
2353 	    if (locinput == PL_bostr || (PL_multiline &&
2354 		(nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2355 	    {
2356 		/* regtill = regbol; */
2357 		break;
2358 	    }
2359 	    sayNO;
2360 	case MBOL:
2361 	    if (locinput == PL_bostr ||
2362 		((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2363 	    {
2364 		break;
2365 	    }
2366 	    sayNO;
2367 	case SBOL:
2368 	    if (locinput == PL_bostr)
2369 		break;
2370 	    sayNO;
2371 	case GPOS:
2372 	    if (locinput == PL_reg_ganch)
2373 		break;
2374 	    sayNO;
2375 	case EOL:
2376 	    if (PL_multiline)
2377 		goto meol;
2378 	    else
2379 		goto seol;
2380 	case MEOL:
2381 	  meol:
2382 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2383 		sayNO;
2384 	    break;
2385 	case SEOL:
2386 	  seol:
2387 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2388 		sayNO;
2389 	    if (PL_regeol - locinput > 1)
2390 		sayNO;
2391 	    break;
2392 	case EOS:
2393 	    if (PL_regeol != locinput)
2394 		sayNO;
2395 	    break;
2396 	case SANY:
2397 	    if (!nextchr && locinput >= PL_regeol)
2398 		sayNO;
2399  	    if (do_utf8) {
2400 	        locinput += PL_utf8skip[nextchr];
2401 		if (locinput > PL_regeol)
2402  		    sayNO;
2403  		nextchr = UCHARAT(locinput);
2404  	    }
2405  	    else
2406  		nextchr = UCHARAT(++locinput);
2407 	    break;
2408 	case CANY:
2409 	    if (!nextchr && locinput >= PL_regeol)
2410 		sayNO;
2411 	    nextchr = UCHARAT(++locinput);
2412 	    break;
2413 	case REG_ANY:
2414 	    if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2415 		sayNO;
2416 	    if (do_utf8) {
2417 		locinput += PL_utf8skip[nextchr];
2418 		if (locinput > PL_regeol)
2419 		    sayNO;
2420 		nextchr = UCHARAT(locinput);
2421 	    }
2422 	    else
2423 		nextchr = UCHARAT(++locinput);
2424 	    break;
2425 	case EXACT:
2426 	    s = STRING(scan);
2427 	    ln = STR_LEN(scan);
2428 	    if (do_utf8 != UTF) {
2429 		/* The target and the pattern have differing utf8ness. */
2430 		char *l = locinput;
2431 		char *e = s + ln;
2432 		STRLEN ulen;
2433 
2434 		if (do_utf8) {
2435 		    /* The target is utf8, the pattern is not utf8. */
2436 		    while (s < e) {
2437 			if (l >= PL_regeol)
2438 			     sayNO;
2439 			if (NATIVE_TO_UNI(*(U8*)s) !=
2440 			    utf8n_to_uvuni((U8*)l, UTF8_MAXLEN, &ulen,
2441 					   ckWARN(WARN_UTF8) ?
2442 					   0 : UTF8_ALLOW_ANY))
2443 			     sayNO;
2444 			l += ulen;
2445 			s ++;
2446 		    }
2447 		}
2448 		else {
2449 		    /* The target is not utf8, the pattern is utf8. */
2450 		    while (s < e) {
2451 			if (l >= PL_regeol)
2452 			    sayNO;
2453 			if (NATIVE_TO_UNI(*((U8*)l)) !=
2454 			    utf8n_to_uvuni((U8*)s, UTF8_MAXLEN, &ulen,
2455 					   ckWARN(WARN_UTF8) ?
2456 					   0 : UTF8_ALLOW_ANY))
2457 			    sayNO;
2458 			s += ulen;
2459 			l ++;
2460 		    }
2461 		}
2462 		locinput = l;
2463 		nextchr = UCHARAT(locinput);
2464 		break;
2465 	    }
2466 	    /* The target and the pattern have the same utf8ness. */
2467 	    /* Inline the first character, for speed. */
2468 	    if (UCHARAT(s) != nextchr)
2469 		sayNO;
2470 	    if (PL_regeol - locinput < ln)
2471 		sayNO;
2472 	    if (ln > 1 && memNE(s, locinput, ln))
2473 		sayNO;
2474 	    locinput += ln;
2475 	    nextchr = UCHARAT(locinput);
2476 	    break;
2477 	case EXACTFL:
2478 	    PL_reg_flags |= RF_tainted;
2479 	    /* FALL THROUGH */
2480 	case EXACTF:
2481 	    s = STRING(scan);
2482 	    ln = STR_LEN(scan);
2483 
2484 	    if (do_utf8 || UTF) {
2485 	      /* Either target or the pattern are utf8. */
2486 		char *l = locinput;
2487 		char *e = PL_regeol;
2488 
2489 		if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
2490 			       l, &e, 0,  do_utf8)) {
2491 		     /* One more case for the sharp s:
2492 		      * pack("U0U*", 0xDF) =~ /ss/i,
2493 		      * the 0xC3 0x9F are the UTF-8
2494 		      * byte sequence for the U+00DF. */
2495 		     if (!(do_utf8 &&
2496 			   toLOWER(s[0]) == 's' &&
2497 			   ln >= 2 &&
2498 			   toLOWER(s[1]) == 's' &&
2499 			   (U8)l[0] == 0xC3 &&
2500 			   e - l >= 2 &&
2501 			   (U8)l[1] == 0x9F))
2502 			  sayNO;
2503 		}
2504 		locinput = e;
2505 		nextchr = UCHARAT(locinput);
2506 		break;
2507 	    }
2508 
2509 	    /* Neither the target and the pattern are utf8. */
2510 
2511 	    /* Inline the first character, for speed. */
2512 	    if (UCHARAT(s) != nextchr &&
2513 		UCHARAT(s) != ((OP(scan) == EXACTF)
2514 			       ? PL_fold : PL_fold_locale)[nextchr])
2515 		sayNO;
2516 	    if (PL_regeol - locinput < ln)
2517 		sayNO;
2518 	    if (ln > 1 && (OP(scan) == EXACTF
2519 			   ? ibcmp(s, locinput, ln)
2520 			   : ibcmp_locale(s, locinput, ln)))
2521 		sayNO;
2522 	    locinput += ln;
2523 	    nextchr = UCHARAT(locinput);
2524 	    break;
2525 	case ANYOF:
2526 	    if (do_utf8) {
2527 	        STRLEN inclasslen = PL_regeol - locinput;
2528 
2529 	        if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
2530 		    sayNO_ANYOF;
2531 		if (locinput >= PL_regeol)
2532 		    sayNO;
2533 		locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
2534 		nextchr = UCHARAT(locinput);
2535 		break;
2536 	    }
2537 	    else {
2538 		if (nextchr < 0)
2539 		    nextchr = UCHARAT(locinput);
2540 		if (!REGINCLASS(scan, (U8*)locinput))
2541 		    sayNO_ANYOF;
2542 		if (!nextchr && locinput >= PL_regeol)
2543 		    sayNO;
2544 		nextchr = UCHARAT(++locinput);
2545 		break;
2546 	    }
2547 	no_anyof:
2548 	    /* If we might have the case of the German sharp s
2549 	     * in a casefolding Unicode character class. */
2550 
2551 	    if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2552 		 locinput += SHARP_S_SKIP;
2553 		 nextchr = UCHARAT(locinput);
2554 	    }
2555 	    else
2556 		 sayNO;
2557 	    break;
2558 	case ALNUML:
2559 	    PL_reg_flags |= RF_tainted;
2560 	    /* FALL THROUGH */
2561 	case ALNUM:
2562 	    if (!nextchr)
2563 		sayNO;
2564 	    if (do_utf8) {
2565 		LOAD_UTF8_CHARCLASS(alnum,"a");
2566 		if (!(OP(scan) == ALNUM
2567 		      ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2568 		      : isALNUM_LC_utf8((U8*)locinput)))
2569 		{
2570 		    sayNO;
2571 		}
2572 		locinput += PL_utf8skip[nextchr];
2573 		nextchr = UCHARAT(locinput);
2574 		break;
2575 	    }
2576 	    if (!(OP(scan) == ALNUM
2577 		  ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2578 		sayNO;
2579 	    nextchr = UCHARAT(++locinput);
2580 	    break;
2581 	case NALNUML:
2582 	    PL_reg_flags |= RF_tainted;
2583 	    /* FALL THROUGH */
2584 	case NALNUM:
2585 	    if (!nextchr && locinput >= PL_regeol)
2586 		sayNO;
2587 	    if (do_utf8) {
2588 		LOAD_UTF8_CHARCLASS(alnum,"a");
2589 		if (OP(scan) == NALNUM
2590 		    ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2591 		    : isALNUM_LC_utf8((U8*)locinput))
2592 		{
2593 		    sayNO;
2594 		}
2595 		locinput += PL_utf8skip[nextchr];
2596 		nextchr = UCHARAT(locinput);
2597 		break;
2598 	    }
2599 	    if (OP(scan) == NALNUM
2600 		? isALNUM(nextchr) : isALNUM_LC(nextchr))
2601 		sayNO;
2602 	    nextchr = UCHARAT(++locinput);
2603 	    break;
2604 	case BOUNDL:
2605 	case NBOUNDL:
2606 	    PL_reg_flags |= RF_tainted;
2607 	    /* FALL THROUGH */
2608 	case BOUND:
2609 	case NBOUND:
2610 	    /* was last char in word? */
2611 	    if (do_utf8) {
2612 		if (locinput == PL_bostr)
2613 		    ln = '\n';
2614 		else {
2615 		    U8 *r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
2616 
2617 		    ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
2618 		}
2619 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2620 		    ln = isALNUM_uni(ln);
2621 		    LOAD_UTF8_CHARCLASS(alnum,"a");
2622 		    n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
2623 		}
2624 		else {
2625 		    ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
2626 		    n = isALNUM_LC_utf8((U8*)locinput);
2627 		}
2628 	    }
2629 	    else {
2630 		ln = (locinput != PL_bostr) ?
2631 		    UCHARAT(locinput - 1) : '\n';
2632 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2633 		    ln = isALNUM(ln);
2634 		    n = isALNUM(nextchr);
2635 		}
2636 		else {
2637 		    ln = isALNUM_LC(ln);
2638 		    n = isALNUM_LC(nextchr);
2639 		}
2640 	    }
2641 	    if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2642 				    OP(scan) == BOUNDL))
2643 		    sayNO;
2644 	    break;
2645 	case SPACEL:
2646 	    PL_reg_flags |= RF_tainted;
2647 	    /* FALL THROUGH */
2648 	case SPACE:
2649 	    if (!nextchr)
2650 		sayNO;
2651 	    if (do_utf8) {
2652 		if (UTF8_IS_CONTINUED(nextchr)) {
2653 		    LOAD_UTF8_CHARCLASS(space," ");
2654 		    if (!(OP(scan) == SPACE
2655 			  ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2656 			  : isSPACE_LC_utf8((U8*)locinput)))
2657 		    {
2658 			sayNO;
2659 		    }
2660 		    locinput += PL_utf8skip[nextchr];
2661 		    nextchr = UCHARAT(locinput);
2662 		    break;
2663 		}
2664 		if (!(OP(scan) == SPACE
2665 		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2666 		    sayNO;
2667 		nextchr = UCHARAT(++locinput);
2668 	    }
2669 	    else {
2670 		if (!(OP(scan) == SPACE
2671 		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2672 		    sayNO;
2673 		nextchr = UCHARAT(++locinput);
2674 	    }
2675 	    break;
2676 	case NSPACEL:
2677 	    PL_reg_flags |= RF_tainted;
2678 	    /* FALL THROUGH */
2679 	case NSPACE:
2680 	    if (!nextchr && locinput >= PL_regeol)
2681 		sayNO;
2682 	    if (do_utf8) {
2683 		LOAD_UTF8_CHARCLASS(space," ");
2684 		if (OP(scan) == NSPACE
2685 		    ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2686 		    : isSPACE_LC_utf8((U8*)locinput))
2687 		{
2688 		    sayNO;
2689 		}
2690 		locinput += PL_utf8skip[nextchr];
2691 		nextchr = UCHARAT(locinput);
2692 		break;
2693 	    }
2694 	    if (OP(scan) == NSPACE
2695 		? isSPACE(nextchr) : isSPACE_LC(nextchr))
2696 		sayNO;
2697 	    nextchr = UCHARAT(++locinput);
2698 	    break;
2699 	case DIGITL:
2700 	    PL_reg_flags |= RF_tainted;
2701 	    /* FALL THROUGH */
2702 	case DIGIT:
2703 	    if (!nextchr)
2704 		sayNO;
2705 	    if (do_utf8) {
2706 		LOAD_UTF8_CHARCLASS(digit,"0");
2707 		if (!(OP(scan) == DIGIT
2708 		      ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2709 		      : isDIGIT_LC_utf8((U8*)locinput)))
2710 		{
2711 		    sayNO;
2712 		}
2713 		locinput += PL_utf8skip[nextchr];
2714 		nextchr = UCHARAT(locinput);
2715 		break;
2716 	    }
2717 	    if (!(OP(scan) == DIGIT
2718 		  ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2719 		sayNO;
2720 	    nextchr = UCHARAT(++locinput);
2721 	    break;
2722 	case NDIGITL:
2723 	    PL_reg_flags |= RF_tainted;
2724 	    /* FALL THROUGH */
2725 	case NDIGIT:
2726 	    if (!nextchr && locinput >= PL_regeol)
2727 		sayNO;
2728 	    if (do_utf8) {
2729 		LOAD_UTF8_CHARCLASS(digit,"0");
2730 		if (OP(scan) == NDIGIT
2731 		    ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2732 		    : isDIGIT_LC_utf8((U8*)locinput))
2733 		{
2734 		    sayNO;
2735 		}
2736 		locinput += PL_utf8skip[nextchr];
2737 		nextchr = UCHARAT(locinput);
2738 		break;
2739 	    }
2740 	    if (OP(scan) == NDIGIT
2741 		? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2742 		sayNO;
2743 	    nextchr = UCHARAT(++locinput);
2744 	    break;
2745 	case CLUMP:
2746 	    if (locinput >= PL_regeol)
2747 		sayNO;
2748 	    if  (do_utf8) {
2749 		LOAD_UTF8_CHARCLASS(mark,"~");
2750 		if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2751 		    sayNO;
2752 		locinput += PL_utf8skip[nextchr];
2753 		while (locinput < PL_regeol &&
2754 		       swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2755 		    locinput += UTF8SKIP(locinput);
2756 		if (locinput > PL_regeol)
2757 		    sayNO;
2758 	    }
2759 	    else
2760 	       locinput++;
2761 	    nextchr = UCHARAT(locinput);
2762 	    break;
2763 	case REFFL:
2764 	    PL_reg_flags |= RF_tainted;
2765 	    /* FALL THROUGH */
2766         case REF:
2767 	case REFF:
2768 	    n = ARG(scan);  /* which paren pair */
2769 	    ln = PL_regstartp[n];
2770 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
2771 	    if ((I32)*PL_reglastparen < n || ln == -1)
2772 		sayNO;			/* Do not match unless seen CLOSEn. */
2773 	    if (ln == PL_regendp[n])
2774 		break;
2775 
2776 	    s = PL_bostr + ln;
2777 	    if (do_utf8 && OP(scan) != REF) {	/* REF can do byte comparison */
2778 		char *l = locinput;
2779 		char *e = PL_bostr + PL_regendp[n];
2780 		/*
2781 		 * Note that we can't do the "other character" lookup trick as
2782 		 * in the 8-bit case (no pun intended) because in Unicode we
2783 		 * have to map both upper and title case to lower case.
2784 		 */
2785 		if (OP(scan) == REFF) {
2786 		    STRLEN ulen1, ulen2;
2787 		    U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2788 		    U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
2789 		    while (s < e) {
2790 			if (l >= PL_regeol)
2791 			    sayNO;
2792 			toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2793 			toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
2794 			if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
2795 			    sayNO;
2796 			s += ulen1;
2797 			l += ulen2;
2798 		    }
2799 		}
2800 		locinput = l;
2801 		nextchr = UCHARAT(locinput);
2802 		break;
2803 	    }
2804 
2805 	    /* Inline the first character, for speed. */
2806 	    if (UCHARAT(s) != nextchr &&
2807 		(OP(scan) == REF ||
2808 		 (UCHARAT(s) != ((OP(scan) == REFF
2809 				  ? PL_fold : PL_fold_locale)[nextchr]))))
2810 		sayNO;
2811 	    ln = PL_regendp[n] - ln;
2812 	    if (locinput + ln > PL_regeol)
2813 		sayNO;
2814 	    if (ln > 1 && (OP(scan) == REF
2815 			   ? memNE(s, locinput, ln)
2816 			   : (OP(scan) == REFF
2817 			      ? ibcmp(s, locinput, ln)
2818 			      : ibcmp_locale(s, locinput, ln))))
2819 		sayNO;
2820 	    locinput += ln;
2821 	    nextchr = UCHARAT(locinput);
2822 	    break;
2823 
2824 	case NOTHING:
2825 	case TAIL:
2826 	    break;
2827 	case BACK:
2828 	    break;
2829 	case EVAL:
2830 	{
2831 	    dSP;
2832 	    OP_4tree *oop = PL_op;
2833 	    COP *ocurcop = PL_curcop;
2834 	    PAD *old_comppad;
2835 	    SV *ret;
2836 	    struct regexp *oreg = PL_reg_re;
2837 
2838 	    n = ARG(scan);
2839 	    PL_op = (OP_4tree*)PL_regdata->data[n];
2840 	    DEBUG_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
2841 	    PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
2842 	    PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2843 
2844 	    {
2845 		SV **before = SP;
2846 		CALLRUNOPS(aTHX);			/* Scalar context. */
2847 		SPAGAIN;
2848 		if (SP == before)
2849 		    ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
2850 		else {
2851 		    ret = POPs;
2852 		    PUTBACK;
2853 		}
2854 	    }
2855 
2856 	    PL_op = oop;
2857 	    PAD_RESTORE_LOCAL(old_comppad);
2858 	    PL_curcop = ocurcop;
2859 	    if (logical) {
2860 		if (logical == 2) {	/* Postponed subexpression. */
2861 		    regexp *re;
2862 		    MAGIC *mg = Null(MAGIC*);
2863 		    re_cc_state state;
2864 		    CHECKPOINT cp, lastcp;
2865                     int toggleutf;
2866 		    register SV *sv;
2867 
2868 		    if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
2869 			mg = mg_find(sv, PERL_MAGIC_qr);
2870 		    else if (SvSMAGICAL(ret)) {
2871 			if (SvGMAGICAL(ret))
2872 			    sv_unmagic(ret, PERL_MAGIC_qr);
2873 			else
2874 			    mg = mg_find(ret, PERL_MAGIC_qr);
2875 		    }
2876 
2877 		    if (mg) {
2878 			re = (regexp *)mg->mg_obj;
2879 			(void)ReREFCNT_inc(re);
2880 		    }
2881 		    else {
2882 			STRLEN len;
2883 			char *t = SvPV(ret, len);
2884 			PMOP pm;
2885 			char *oprecomp = PL_regprecomp;
2886 			I32 osize = PL_regsize;
2887 			I32 onpar = PL_regnpar;
2888 
2889 			Zero(&pm, 1, PMOP);
2890                         if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
2891 			re = CALLREGCOMP(aTHX_ t, t + len, &pm);
2892 			if (!(SvFLAGS(ret)
2893 			      & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
2894 				| SVs_GMG)))
2895 			    sv_magic(ret,(SV*)ReREFCNT_inc(re),
2896 					PERL_MAGIC_qr,0,0);
2897 			PL_regprecomp = oprecomp;
2898 			PL_regsize = osize;
2899 			PL_regnpar = onpar;
2900 		    }
2901 		    DEBUG_r(
2902 			PerlIO_printf(Perl_debug_log,
2903 				      "Entering embedded `%s%.60s%s%s'\n",
2904 				      PL_colors[0],
2905 				      re->precomp,
2906 				      PL_colors[1],
2907 				      (strlen(re->precomp) > 60 ? "..." : ""))
2908 			);
2909 		    state.node = next;
2910 		    state.prev = PL_reg_call_cc;
2911 		    state.cc = PL_regcc;
2912 		    state.re = PL_reg_re;
2913 
2914 		    PL_regcc = 0;
2915 
2916 		    cp = regcppush(0);	/* Save *all* the positions. */
2917 		    REGCP_SET(lastcp);
2918 		    cache_re(re);
2919 		    state.ss = PL_savestack_ix;
2920 		    *PL_reglastparen = 0;
2921 		    *PL_reglastcloseparen = 0;
2922 		    PL_reg_call_cc = &state;
2923 		    PL_reginput = locinput;
2924 		    toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
2925 				((re->reganch & ROPT_UTF8) != 0);
2926 		    if (toggleutf) PL_reg_flags ^= RF_utf8;
2927 
2928 		    /* XXXX This is too dramatic a measure... */
2929 		    PL_reg_maxiter = 0;
2930 
2931 		    if (regmatch(re->program + 1)) {
2932 			/* Even though we succeeded, we need to restore
2933 			   global variables, since we may be wrapped inside
2934 			   SUSPEND, thus the match may be not finished yet. */
2935 
2936 			/* XXXX Do this only if SUSPENDed? */
2937 			PL_reg_call_cc = state.prev;
2938 			PL_regcc = state.cc;
2939 			PL_reg_re = state.re;
2940 			cache_re(PL_reg_re);
2941 			if (toggleutf) PL_reg_flags ^= RF_utf8;
2942 
2943 			/* XXXX This is too dramatic a measure... */
2944 			PL_reg_maxiter = 0;
2945 
2946 			/* These are needed even if not SUSPEND. */
2947 			ReREFCNT_dec(re);
2948 			regcpblow(cp);
2949 			sayYES;
2950 		    }
2951 		    ReREFCNT_dec(re);
2952 		    REGCP_UNWIND(lastcp);
2953 		    regcppop();
2954 		    PL_reg_call_cc = state.prev;
2955 		    PL_regcc = state.cc;
2956 		    PL_reg_re = state.re;
2957 		    cache_re(PL_reg_re);
2958 		    if (toggleutf) PL_reg_flags ^= RF_utf8;
2959 
2960 		    /* XXXX This is too dramatic a measure... */
2961 		    PL_reg_maxiter = 0;
2962 
2963 		    logical = 0;
2964 		    sayNO;
2965 		}
2966 		sw = SvTRUE(ret);
2967 		logical = 0;
2968 	    }
2969 	    else {
2970 		sv_setsv(save_scalar(PL_replgv), ret);
2971 		cache_re(oreg);
2972 	    }
2973 	    break;
2974 	}
2975 	case OPEN:
2976 	    n = ARG(scan);  /* which paren pair */
2977 	    PL_reg_start_tmp[n] = locinput;
2978 	    if (n > PL_regsize)
2979 		PL_regsize = n;
2980 	    break;
2981 	case CLOSE:
2982 	    n = ARG(scan);  /* which paren pair */
2983 	    PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2984 	    PL_regendp[n] = locinput - PL_bostr;
2985 	    if (n > (I32)*PL_reglastparen)
2986 		*PL_reglastparen = n;
2987 	    *PL_reglastcloseparen = n;
2988 	    break;
2989 	case GROUPP:
2990 	    n = ARG(scan);  /* which paren pair */
2991 	    sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
2992 	    break;
2993 	case IFTHEN:
2994 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
2995 	    if (sw)
2996 		next = NEXTOPER(NEXTOPER(scan));
2997 	    else {
2998 		next = scan + ARG(scan);
2999 		if (OP(next) == IFTHEN) /* Fake one. */
3000 		    next = NEXTOPER(NEXTOPER(next));
3001 	    }
3002 	    break;
3003 	case LOGICAL:
3004 	    logical = scan->flags;
3005 	    break;
3006 /*******************************************************************
3007  PL_regcc contains infoblock about the innermost (...)* loop, and
3008  a pointer to the next outer infoblock.
3009 
3010  Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3011 
3012    1) After matching X, regnode for CURLYX is processed;
3013 
3014    2) This regnode creates infoblock on the stack, and calls
3015       regmatch() recursively with the starting point at WHILEM node;
3016 
3017    3) Each hit of WHILEM node tries to match A and Z (in the order
3018       depending on the current iteration, min/max of {min,max} and
3019       greediness).  The information about where are nodes for "A"
3020       and "Z" is read from the infoblock, as is info on how many times "A"
3021       was already matched, and greediness.
3022 
3023    4) After A matches, the same WHILEM node is hit again.
3024 
3025    5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
3026       of the same pair.  Thus when WHILEM tries to match Z, it temporarily
3027       resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
3028       as in (Y(A)*Z)*.  If Z matches, the automaton will hit the WHILEM node
3029       of the external loop.
3030 
3031  Currently present infoblocks form a tree with a stem formed by PL_curcc
3032  and whatever it mentions via ->next, and additional attached trees
3033  corresponding to temporarily unset infoblocks as in "5" above.
3034 
3035  In the following picture infoblocks for outer loop of
3036  (Y(A)*?Z)*?T are denoted O, for inner I.  NULL starting block
3037  is denoted by x.  The matched string is YAAZYAZT.  Temporarily postponed
3038  infoblocks are drawn below the "reset" infoblock.
3039 
3040  In fact in the picture below we do not show failed matches for Z and T
3041  by WHILEM blocks.  [We illustrate minimal matches, since for them it is
3042  more obvious *why* one needs to *temporary* unset infoblocks.]
3043 
3044   Matched	REx position	InfoBlocks	Comment
3045   		(Y(A)*?Z)*?T	x
3046   		Y(A)*?Z)*?T	x <- O
3047   Y		(A)*?Z)*?T	x <- O
3048   Y		A)*?Z)*?T	x <- O <- I
3049   YA		)*?Z)*?T	x <- O <- I
3050   YA		A)*?Z)*?T	x <- O <- I
3051   YAA		)*?Z)*?T	x <- O <- I
3052   YAA		Z)*?T		x <- O		# Temporary unset I
3053 				     I
3054 
3055   YAAZ		Y(A)*?Z)*?T	x <- O
3056 				     I
3057 
3058   YAAZY		(A)*?Z)*?T	x <- O
3059 				     I
3060 
3061   YAAZY		A)*?Z)*?T	x <- O <- I
3062 				     I
3063 
3064   YAAZYA	)*?Z)*?T	x <- O <- I
3065 				     I
3066 
3067   YAAZYA	Z)*?T		x <- O		# Temporary unset I
3068 				     I,I
3069 
3070   YAAZYAZ	)*?T		x <- O
3071 				     I,I
3072 
3073   YAAZYAZ	T		x		# Temporary unset O
3074 				O
3075 				I,I
3076 
3077   YAAZYAZT			x
3078 				O
3079 				I,I
3080  *******************************************************************/
3081 	case CURLYX: {
3082 		CURCUR cc;
3083 		CHECKPOINT cp = PL_savestack_ix;
3084 		/* No need to save/restore up to this paren */
3085 		I32 parenfloor = scan->flags;
3086 
3087 		if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3088 		    next += ARG(next);
3089 		cc.oldcc = PL_regcc;
3090 		PL_regcc = &cc;
3091 		/* XXXX Probably it is better to teach regpush to support
3092 		   parenfloor > PL_regsize... */
3093 		if (parenfloor > (I32)*PL_reglastparen)
3094 		    parenfloor = *PL_reglastparen; /* Pessimization... */
3095 		cc.parenfloor = parenfloor;
3096 		cc.cur = -1;
3097 		cc.min = ARG1(scan);
3098 		cc.max  = ARG2(scan);
3099 		cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3100 		cc.next = next;
3101 		cc.minmod = minmod;
3102 		cc.lastloc = 0;
3103 		PL_reginput = locinput;
3104 		n = regmatch(PREVOPER(next));	/* start on the WHILEM */
3105 		regcpblow(cp);
3106 		PL_regcc = cc.oldcc;
3107 		saySAME(n);
3108 	    }
3109 	    /* NOT REACHED */
3110 	case WHILEM: {
3111 		/*
3112 		 * This is really hard to understand, because after we match
3113 		 * what we're trying to match, we must make sure the rest of
3114 		 * the REx is going to match for sure, and to do that we have
3115 		 * to go back UP the parse tree by recursing ever deeper.  And
3116 		 * if it fails, we have to reset our parent's current state
3117 		 * that we can try again after backing off.
3118 		 */
3119 
3120 		CHECKPOINT cp, lastcp;
3121 		CURCUR* cc = PL_regcc;
3122 		char *lastloc = cc->lastloc; /* Detection of 0-len. */
3123 
3124 		n = cc->cur + 1;	/* how many we know we matched */
3125 		PL_reginput = locinput;
3126 
3127 		DEBUG_r(
3128 		    PerlIO_printf(Perl_debug_log,
3129 				  "%*s  %ld out of %ld..%ld  cc=%"UVxf"\n",
3130 				  REPORT_CODE_OFF+PL_regindent*2, "",
3131 				  (long)n, (long)cc->min,
3132 				  (long)cc->max, PTR2UV(cc))
3133 		    );
3134 
3135 		/* If degenerate scan matches "", assume scan done. */
3136 
3137 		if (locinput == cc->lastloc && n >= cc->min) {
3138 		    PL_regcc = cc->oldcc;
3139 		    if (PL_regcc)
3140 			ln = PL_regcc->cur;
3141 		    DEBUG_r(
3142 			PerlIO_printf(Perl_debug_log,
3143 			   "%*s  empty match detected, try continuation...\n",
3144 			   REPORT_CODE_OFF+PL_regindent*2, "")
3145 			);
3146 		    if (regmatch(cc->next))
3147 			sayYES;
3148 		    if (PL_regcc)
3149 			PL_regcc->cur = ln;
3150 		    PL_regcc = cc;
3151 		    sayNO;
3152 		}
3153 
3154 		/* First just match a string of min scans. */
3155 
3156 		if (n < cc->min) {
3157 		    cc->cur = n;
3158 		    cc->lastloc = locinput;
3159 		    if (regmatch(cc->scan))
3160 			sayYES;
3161 		    cc->cur = n - 1;
3162 		    cc->lastloc = lastloc;
3163 		    sayNO;
3164 		}
3165 
3166 		if (scan->flags) {
3167 		    /* Check whether we already were at this position.
3168 			Postpone detection until we know the match is not
3169 			*that* much linear. */
3170 		if (!PL_reg_maxiter) {
3171 		    PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3172 		    PL_reg_leftiter = PL_reg_maxiter;
3173 		}
3174 		if (PL_reg_leftiter-- == 0) {
3175 		    I32 size = (PL_reg_maxiter + 7)/8;
3176 		    if (PL_reg_poscache) {
3177 			if ((I32)PL_reg_poscache_size < size) {
3178 			    Renew(PL_reg_poscache, size, char);
3179 			    PL_reg_poscache_size = size;
3180 			}
3181 			Zero(PL_reg_poscache, size, char);
3182 		    }
3183 		    else {
3184 			PL_reg_poscache_size = size;
3185 			Newz(29, PL_reg_poscache, size, char);
3186 		    }
3187 		    DEBUG_r(
3188 			PerlIO_printf(Perl_debug_log,
3189 	      "%sDetected a super-linear match, switching on caching%s...\n",
3190 				      PL_colors[4], PL_colors[5])
3191 			);
3192 		}
3193 		if (PL_reg_leftiter < 0) {
3194 		    I32 o = locinput - PL_bostr, b;
3195 
3196 		    o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3197 		    b = o % 8;
3198 		    o /= 8;
3199 		    if (PL_reg_poscache[o] & (1<<b)) {
3200 		    DEBUG_r(
3201 			PerlIO_printf(Perl_debug_log,
3202 				      "%*s  already tried at this position...\n",
3203 				      REPORT_CODE_OFF+PL_regindent*2, "")
3204 			);
3205 			if (PL_reg_flags & RF_false)
3206 			    sayYES;
3207 			else
3208 			    sayNO_SILENT;
3209 		    }
3210 		    PL_reg_poscache[o] |= (1<<b);
3211 		}
3212 		}
3213 
3214 		/* Prefer next over scan for minimal matching. */
3215 
3216 		if (cc->minmod) {
3217 		    PL_regcc = cc->oldcc;
3218 		    if (PL_regcc)
3219 			ln = PL_regcc->cur;
3220 		    cp = regcppush(cc->parenfloor);
3221 		    REGCP_SET(lastcp);
3222 		    if (regmatch(cc->next)) {
3223 			regcpblow(cp);
3224 			sayYES;	/* All done. */
3225 		    }
3226 		    REGCP_UNWIND(lastcp);
3227 		    regcppop();
3228 		    if (PL_regcc)
3229 			PL_regcc->cur = ln;
3230 		    PL_regcc = cc;
3231 
3232 		    if (n >= cc->max) {	/* Maximum greed exceeded? */
3233 			if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3234 			    && !(PL_reg_flags & RF_warned)) {
3235 			    PL_reg_flags |= RF_warned;
3236 			    Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3237 				 "Complex regular subexpression recursion",
3238 				 REG_INFTY - 1);
3239 			}
3240 			sayNO;
3241 		    }
3242 
3243 		    DEBUG_r(
3244 			PerlIO_printf(Perl_debug_log,
3245 				      "%*s  trying longer...\n",
3246 				      REPORT_CODE_OFF+PL_regindent*2, "")
3247 			);
3248 		    /* Try scanning more and see if it helps. */
3249 		    PL_reginput = locinput;
3250 		    cc->cur = n;
3251 		    cc->lastloc = locinput;
3252 		    cp = regcppush(cc->parenfloor);
3253 		    REGCP_SET(lastcp);
3254 		    if (regmatch(cc->scan)) {
3255 			regcpblow(cp);
3256 			sayYES;
3257 		    }
3258 		    REGCP_UNWIND(lastcp);
3259 		    regcppop();
3260 		    cc->cur = n - 1;
3261 		    cc->lastloc = lastloc;
3262 		    sayNO;
3263 		}
3264 
3265 		/* Prefer scan over next for maximal matching. */
3266 
3267 		if (n < cc->max) {	/* More greed allowed? */
3268 		    cp = regcppush(cc->parenfloor);
3269 		    cc->cur = n;
3270 		    cc->lastloc = locinput;
3271 		    REGCP_SET(lastcp);
3272 		    if (regmatch(cc->scan)) {
3273 			regcpblow(cp);
3274 			sayYES;
3275 		    }
3276 		    REGCP_UNWIND(lastcp);
3277 		    regcppop();		/* Restore some previous $<digit>s? */
3278 		    PL_reginput = locinput;
3279 		    DEBUG_r(
3280 			PerlIO_printf(Perl_debug_log,
3281 				      "%*s  failed, try continuation...\n",
3282 				      REPORT_CODE_OFF+PL_regindent*2, "")
3283 			);
3284 		}
3285 		if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3286 			&& !(PL_reg_flags & RF_warned)) {
3287 		    PL_reg_flags |= RF_warned;
3288 		    Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3289 			 "Complex regular subexpression recursion",
3290 			 REG_INFTY - 1);
3291 		}
3292 
3293 		/* Failed deeper matches of scan, so see if this one works. */
3294 		PL_regcc = cc->oldcc;
3295 		if (PL_regcc)
3296 		    ln = PL_regcc->cur;
3297 		if (regmatch(cc->next))
3298 		    sayYES;
3299 		if (PL_regcc)
3300 		    PL_regcc->cur = ln;
3301 		PL_regcc = cc;
3302 		cc->cur = n - 1;
3303 		cc->lastloc = lastloc;
3304 		sayNO;
3305 	    }
3306 	    /* NOT REACHED */
3307 	case BRANCHJ:
3308 	    next = scan + ARG(scan);
3309 	    if (next == scan)
3310 		next = NULL;
3311 	    inner = NEXTOPER(NEXTOPER(scan));
3312 	    goto do_branch;
3313 	case BRANCH:
3314 	    inner = NEXTOPER(scan);
3315 	  do_branch:
3316 	    {
3317 		c1 = OP(scan);
3318 		if (OP(next) != c1)	/* No choice. */
3319 		    next = inner;	/* Avoid recursion. */
3320 		else {
3321 		    I32 lastparen = *PL_reglastparen;
3322 		    I32 unwind1;
3323 		    re_unwind_branch_t *uw;
3324 
3325 		    /* Put unwinding data on stack */
3326 		    unwind1 = SSNEWt(1,re_unwind_branch_t);
3327 		    uw = SSPTRt(unwind1,re_unwind_branch_t);
3328 		    uw->prev = unwind;
3329 		    unwind = unwind1;
3330 		    uw->type = ((c1 == BRANCH)
3331 				? RE_UNWIND_BRANCH
3332 				: RE_UNWIND_BRANCHJ);
3333 		    uw->lastparen = lastparen;
3334 		    uw->next = next;
3335 		    uw->locinput = locinput;
3336 		    uw->nextchr = nextchr;
3337 #ifdef DEBUGGING
3338 		    uw->regindent = ++PL_regindent;
3339 #endif
3340 
3341 		    REGCP_SET(uw->lastcp);
3342 
3343 		    /* Now go into the first branch */
3344 		    next = inner;
3345 		}
3346 	    }
3347 	    break;
3348 	case MINMOD:
3349 	    minmod = 1;
3350 	    break;
3351 	case CURLYM:
3352 	{
3353 	    I32 l = 0;
3354 	    CHECKPOINT lastcp;
3355 
3356 	    /* We suppose that the next guy does not need
3357 	       backtracking: in particular, it is of constant length,
3358 	       and has no parenths to influence future backrefs. */
3359 	    ln = ARG1(scan);  /* min to match */
3360 	    n  = ARG2(scan);  /* max to match */
3361 	    paren = scan->flags;
3362 	    if (paren) {
3363 		if (paren > PL_regsize)
3364 		    PL_regsize = paren;
3365 		if (paren > (I32)*PL_reglastparen)
3366 		    *PL_reglastparen = paren;
3367 	    }
3368 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3369 	    if (paren)
3370 		scan += NEXT_OFF(scan); /* Skip former OPEN. */
3371 	    PL_reginput = locinput;
3372 	    if (minmod) {
3373 		minmod = 0;
3374 		if (ln && regrepeat_hard(scan, ln, &l) < ln)
3375 		    sayNO;
3376 		/* if we matched something zero-length we don't need to
3377 		   backtrack - capturing parens are already defined, so
3378 		   the caveat in the maximal case doesn't apply
3379 
3380 		   XXXX if ln == 0, we can redo this check first time
3381 		   through the following loop
3382 		*/
3383 		if (ln && l == 0)
3384 		    n = ln;	/* don't backtrack */
3385 		locinput = PL_reginput;
3386 		if (HAS_TEXT(next) || JUMPABLE(next)) {
3387 		    regnode *text_node = next;
3388 
3389 		    if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3390 
3391 		    if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3392 		    else {
3393 			if (PL_regkind[(U8)OP(text_node)] == REF) {
3394 			    c1 = c2 = -1000;
3395 			    goto assume_ok_MM;
3396 			}
3397 			else { c1 = (U8)*STRING(text_node); }
3398 			if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3399 			    c2 = PL_fold[c1];
3400 			else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3401 			    c2 = PL_fold_locale[c1];
3402 			else
3403 			    c2 = c1;
3404 		    }
3405 		}
3406 		else
3407 		    c1 = c2 = -1000;
3408 	    assume_ok_MM:
3409 		REGCP_SET(lastcp);
3410 		/* This may be improved if l == 0.  */
3411 		while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3412 		    /* If it could work, try it. */
3413 		    if (c1 == -1000 ||
3414 			UCHARAT(PL_reginput) == c1 ||
3415 			UCHARAT(PL_reginput) == c2)
3416 		    {
3417 			if (paren) {
3418 			    if (ln) {
3419 				PL_regstartp[paren] =
3420 				    HOPc(PL_reginput, -l) - PL_bostr;
3421 				PL_regendp[paren] = PL_reginput - PL_bostr;
3422 			    }
3423 			    else
3424 				PL_regendp[paren] = -1;
3425 			}
3426 			if (regmatch(next))
3427 			    sayYES;
3428 			REGCP_UNWIND(lastcp);
3429 		    }
3430 		    /* Couldn't or didn't -- move forward. */
3431 		    PL_reginput = locinput;
3432 		    if (regrepeat_hard(scan, 1, &l)) {
3433 			ln++;
3434 			locinput = PL_reginput;
3435 		    }
3436 		    else
3437 			sayNO;
3438 		}
3439 	    }
3440 	    else {
3441 		n = regrepeat_hard(scan, n, &l);
3442 		/* if we matched something zero-length we don't need to
3443 		   backtrack, unless the minimum count is zero and we
3444 		   are capturing the result - in that case the capture
3445 		   being defined or not may affect later execution
3446 		*/
3447 		if (n != 0 && l == 0 && !(paren && ln == 0))
3448 		    ln = n;	/* don't backtrack */
3449 		locinput = PL_reginput;
3450 		DEBUG_r(
3451 		    PerlIO_printf(Perl_debug_log,
3452 				  "%*s  matched %"IVdf" times, len=%"IVdf"...\n",
3453 				  (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3454 				  (IV) n, (IV)l)
3455 		    );
3456 		if (n >= ln) {
3457 		    if (HAS_TEXT(next) || JUMPABLE(next)) {
3458 			regnode *text_node = next;
3459 
3460 			if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3461 
3462 			if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3463 			else {
3464 			    if (PL_regkind[(U8)OP(text_node)] == REF) {
3465 				c1 = c2 = -1000;
3466 				goto assume_ok_REG;
3467 			    }
3468 			    else { c1 = (U8)*STRING(text_node); }
3469 
3470 			    if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3471 				c2 = PL_fold[c1];
3472 			    else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3473 				c2 = PL_fold_locale[c1];
3474 			    else
3475 				c2 = c1;
3476 			}
3477 		    }
3478 		    else
3479 			c1 = c2 = -1000;
3480 		}
3481 	    assume_ok_REG:
3482 		REGCP_SET(lastcp);
3483 		while (n >= ln) {
3484 		    /* If it could work, try it. */
3485 		    if (c1 == -1000 ||
3486 			UCHARAT(PL_reginput) == c1 ||
3487 			UCHARAT(PL_reginput) == c2)
3488 		    {
3489 			DEBUG_r(
3490 				PerlIO_printf(Perl_debug_log,
3491 					      "%*s  trying tail with n=%"IVdf"...\n",
3492 					      (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3493 			    );
3494 			if (paren) {
3495 			    if (n) {
3496 				PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3497 				PL_regendp[paren] = PL_reginput - PL_bostr;
3498 			    }
3499 			    else
3500 				PL_regendp[paren] = -1;
3501 			}
3502 			if (regmatch(next))
3503 			    sayYES;
3504 			REGCP_UNWIND(lastcp);
3505 		    }
3506 		    /* Couldn't or didn't -- back up. */
3507 		    n--;
3508 		    locinput = HOPc(locinput, -l);
3509 		    PL_reginput = locinput;
3510 		}
3511 	    }
3512 	    sayNO;
3513 	    break;
3514 	}
3515 	case CURLYN:
3516 	    paren = scan->flags;	/* Which paren to set */
3517 	    if (paren > PL_regsize)
3518 		PL_regsize = paren;
3519 	    if (paren > (I32)*PL_reglastparen)
3520 		*PL_reglastparen = paren;
3521 	    ln = ARG1(scan);  /* min to match */
3522 	    n  = ARG2(scan);  /* max to match */
3523             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3524 	    goto repeat;
3525 	case CURLY:
3526 	    paren = 0;
3527 	    ln = ARG1(scan);  /* min to match */
3528 	    n  = ARG2(scan);  /* max to match */
3529 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3530 	    goto repeat;
3531 	case STAR:
3532 	    ln = 0;
3533 	    n = REG_INFTY;
3534 	    scan = NEXTOPER(scan);
3535 	    paren = 0;
3536 	    goto repeat;
3537 	case PLUS:
3538 	    ln = 1;
3539 	    n = REG_INFTY;
3540 	    scan = NEXTOPER(scan);
3541 	    paren = 0;
3542 	  repeat:
3543 	    /*
3544 	    * Lookahead to avoid useless match attempts
3545 	    * when we know what character comes next.
3546 	    */
3547 
3548 	    /*
3549 	    * Used to only do .*x and .*?x, but now it allows
3550 	    * for )'s, ('s and (?{ ... })'s to be in the way
3551 	    * of the quantifier and the EXACT-like node.  -- japhy
3552 	    */
3553 
3554 	    if (HAS_TEXT(next) || JUMPABLE(next)) {
3555 		U8 *s;
3556 		regnode *text_node = next;
3557 
3558 		if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3559 
3560 		if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3561 		else {
3562 		    if (PL_regkind[(U8)OP(text_node)] == REF) {
3563 			c1 = c2 = -1000;
3564 			goto assume_ok_easy;
3565 		    }
3566 		    else { s = (U8*)STRING(text_node); }
3567 
3568 		    if (!UTF) {
3569 			c2 = c1 = *s;
3570 			if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3571 			    c2 = PL_fold[c1];
3572 			else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3573 			    c2 = PL_fold_locale[c1];
3574 		    }
3575 		    else { /* UTF */
3576 			if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
3577 			     STRLEN ulen1, ulen2;
3578 			     U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3579 			     U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
3580 
3581 			     to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3582 			     to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3583 
3584 			     c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXLEN, 0,
3585 						 ckWARN(WARN_UTF8) ?
3586 						 0 : UTF8_ALLOW_ANY);
3587 			     c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXLEN, 0,
3588 						 ckWARN(WARN_UTF8) ?
3589 						 0 : UTF8_ALLOW_ANY);
3590 			}
3591 			else {
3592 			    c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXLEN, 0,
3593 						     ckWARN(WARN_UTF8) ?
3594 						     0 : UTF8_ALLOW_ANY);
3595 			}
3596 		    }
3597 		}
3598 	    }
3599 	    else
3600 		c1 = c2 = -1000;
3601 	assume_ok_easy:
3602 	    PL_reginput = locinput;
3603 	    if (minmod) {
3604 		CHECKPOINT lastcp;
3605 		minmod = 0;
3606 		if (ln && regrepeat(scan, ln) < ln)
3607 		    sayNO;
3608 		locinput = PL_reginput;
3609 		REGCP_SET(lastcp);
3610 		if (c1 != -1000) {
3611 		    char *e; /* Should not check after this */
3612 		    char *old = locinput;
3613 		    int count = 0;
3614 
3615 		    if  (n == REG_INFTY) {
3616 			e = PL_regeol - 1;
3617 			if (do_utf8)
3618 			    while (UTF8_IS_CONTINUATION(*(U8*)e))
3619 				e--;
3620 		    }
3621 		    else if (do_utf8) {
3622 			int m = n - ln;
3623 			for (e = locinput;
3624 			     m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3625 			    e += UTF8SKIP(e);
3626 		    }
3627 		    else {
3628 			e = locinput + n - ln;
3629 			if (e >= PL_regeol)
3630 			    e = PL_regeol - 1;
3631 		    }
3632 		    while (1) {
3633 			/* Find place 'next' could work */
3634 			if (!do_utf8) {
3635 			    if (c1 == c2) {
3636 				while (locinput <= e &&
3637 				       UCHARAT(locinput) != c1)
3638 				    locinput++;
3639 			    } else {
3640 				while (locinput <= e
3641 				       && UCHARAT(locinput) != c1
3642 				       && UCHARAT(locinput) != c2)
3643 				    locinput++;
3644 			    }
3645 			    count = locinput - old;
3646 			}
3647 			else {
3648 			    STRLEN len;
3649 			    if (c1 == c2) {
3650 				/* count initialised to
3651 				 * utf8_distance(old, locinput) */
3652 				while (locinput <= e &&
3653 				       utf8n_to_uvchr((U8*)locinput,
3654 						      UTF8_MAXLEN, &len,
3655 						      ckWARN(WARN_UTF8) ?
3656 						      0 : UTF8_ALLOW_ANY) != (UV)c1) {
3657 				    locinput += len;
3658 				    count++;
3659 				}
3660 			    } else {
3661 				/* count initialised to
3662 				 * utf8_distance(old, locinput) */
3663 				while (locinput <= e) {
3664 				    UV c = utf8n_to_uvchr((U8*)locinput,
3665 							  UTF8_MAXLEN, &len,
3666 							  ckWARN(WARN_UTF8) ?
3667 							  0 : UTF8_ALLOW_ANY);
3668 				    if (c == (UV)c1 || c == (UV)c2)
3669 					break;
3670 				    locinput += len;
3671 				    count++;
3672 				}
3673 			    }
3674 			}
3675 			if (locinput > e)
3676 			    sayNO;
3677 			/* PL_reginput == old now */
3678 			if (locinput != old) {
3679 			    ln = 1;	/* Did some */
3680 			    if (regrepeat(scan, count) < count)
3681 				sayNO;
3682 			}
3683 			/* PL_reginput == locinput now */
3684 			TRYPAREN(paren, ln, locinput);
3685 			PL_reginput = locinput;	/* Could be reset... */
3686 			REGCP_UNWIND(lastcp);
3687 			/* Couldn't or didn't -- move forward. */
3688 			old = locinput;
3689 			if (do_utf8)
3690 			    locinput += UTF8SKIP(locinput);
3691 			else
3692 			    locinput++;
3693 			count = 1;
3694 		    }
3695 		}
3696 		else
3697 		while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3698 		    UV c;
3699 		    if (c1 != -1000) {
3700 			if (do_utf8)
3701 			    c = utf8n_to_uvchr((U8*)PL_reginput,
3702 					       UTF8_MAXLEN, 0,
3703 					       ckWARN(WARN_UTF8) ?
3704 					       0 : UTF8_ALLOW_ANY);
3705 			else
3706 			    c = UCHARAT(PL_reginput);
3707 			/* If it could work, try it. */
3708 		        if (c == (UV)c1 || c == (UV)c2)
3709 		        {
3710 			    TRYPAREN(paren, ln, PL_reginput);
3711 			    REGCP_UNWIND(lastcp);
3712 		        }
3713 		    }
3714 		    /* If it could work, try it. */
3715 		    else if (c1 == -1000)
3716 		    {
3717 			TRYPAREN(paren, ln, PL_reginput);
3718 			REGCP_UNWIND(lastcp);
3719 		    }
3720 		    /* Couldn't or didn't -- move forward. */
3721 		    PL_reginput = locinput;
3722 		    if (regrepeat(scan, 1)) {
3723 			ln++;
3724 			locinput = PL_reginput;
3725 		    }
3726 		    else
3727 			sayNO;
3728 		}
3729 	    }
3730 	    else {
3731 		CHECKPOINT lastcp;
3732 		n = regrepeat(scan, n);
3733 		locinput = PL_reginput;
3734 		if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3735 		    ((!PL_multiline && OP(next) != MEOL) ||
3736 			OP(next) == SEOL || OP(next) == EOS))
3737 		{
3738 		    ln = n;			/* why back off? */
3739 		    /* ...because $ and \Z can match before *and* after
3740 		       newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
3741 		       We should back off by one in this case. */
3742 		    if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3743 			ln--;
3744 		}
3745 		REGCP_SET(lastcp);
3746 		if (paren) {
3747 		    UV c = 0;
3748 		    while (n >= ln) {
3749 			if (c1 != -1000) {
3750 			    if (do_utf8)
3751 				c = utf8n_to_uvchr((U8*)PL_reginput,
3752 						   UTF8_MAXLEN, 0,
3753 						   ckWARN(WARN_UTF8) ?
3754 						   0 : UTF8_ALLOW_ANY);
3755 			    else
3756 				c = UCHARAT(PL_reginput);
3757 			}
3758 			/* If it could work, try it. */
3759 			if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
3760 			    {
3761 				TRYPAREN(paren, n, PL_reginput);
3762 				REGCP_UNWIND(lastcp);
3763 			    }
3764 			/* Couldn't or didn't -- back up. */
3765 			n--;
3766 			PL_reginput = locinput = HOPc(locinput, -1);
3767 		    }
3768 		}
3769 		else {
3770 		    UV c = 0;
3771 		    while (n >= ln) {
3772 			if (c1 != -1000) {
3773 			    if (do_utf8)
3774 				c = utf8n_to_uvchr((U8*)PL_reginput,
3775 						   UTF8_MAXLEN, 0,
3776 						   ckWARN(WARN_UTF8) ?
3777 						   0 : UTF8_ALLOW_ANY);
3778 			    else
3779 				c = UCHARAT(PL_reginput);
3780 			}
3781 			/* If it could work, try it. */
3782 			if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
3783 			    {
3784 				TRYPAREN(paren, n, PL_reginput);
3785 				REGCP_UNWIND(lastcp);
3786 			    }
3787 			/* Couldn't or didn't -- back up. */
3788 			n--;
3789 			PL_reginput = locinput = HOPc(locinput, -1);
3790 		    }
3791 		}
3792 	    }
3793 	    sayNO;
3794 	    break;
3795 	case END:
3796 	    if (PL_reg_call_cc) {
3797 		re_cc_state *cur_call_cc = PL_reg_call_cc;
3798 		CURCUR *cctmp = PL_regcc;
3799 		regexp *re = PL_reg_re;
3800 		CHECKPOINT cp, lastcp;
3801 
3802 		cp = regcppush(0);	/* Save *all* the positions. */
3803 		REGCP_SET(lastcp);
3804 		regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3805 						    the caller. */
3806 		PL_reginput = locinput;	/* Make position available to
3807 					   the callcc. */
3808 		cache_re(PL_reg_call_cc->re);
3809 		PL_regcc = PL_reg_call_cc->cc;
3810 		PL_reg_call_cc = PL_reg_call_cc->prev;
3811 		if (regmatch(cur_call_cc->node)) {
3812 		    PL_reg_call_cc = cur_call_cc;
3813 		    regcpblow(cp);
3814 		    sayYES;
3815 		}
3816 		REGCP_UNWIND(lastcp);
3817 		regcppop();
3818 		PL_reg_call_cc = cur_call_cc;
3819 		PL_regcc = cctmp;
3820 		PL_reg_re = re;
3821 		cache_re(re);
3822 
3823 		DEBUG_r(
3824 		    PerlIO_printf(Perl_debug_log,
3825 				  "%*s  continuation failed...\n",
3826 				  REPORT_CODE_OFF+PL_regindent*2, "")
3827 		    );
3828 		sayNO_SILENT;
3829 	    }
3830 	    if (locinput < PL_regtill) {
3831 		DEBUG_r(PerlIO_printf(Perl_debug_log,
3832 				      "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3833 				      PL_colors[4],
3834 				      (long)(locinput - PL_reg_starttry),
3835 				      (long)(PL_regtill - PL_reg_starttry),
3836 				      PL_colors[5]));
3837 		sayNO_FINAL;		/* Cannot match: too short. */
3838 	    }
3839 	    PL_reginput = locinput;	/* put where regtry can find it */
3840 	    sayYES_FINAL;		/* Success! */
3841 	case SUCCEED:
3842 	    PL_reginput = locinput;	/* put where regtry can find it */
3843 	    sayYES_LOUD;		/* Success! */
3844 	case SUSPEND:
3845 	    n = 1;
3846 	    PL_reginput = locinput;
3847 	    goto do_ifmatch;
3848 	case UNLESSM:
3849 	    n = 0;
3850 	    if (scan->flags) {
3851 		s = HOPBACKc(locinput, scan->flags);
3852 		if (!s)
3853 		    goto say_yes;
3854 		PL_reginput = s;
3855 	    }
3856 	    else
3857 		PL_reginput = locinput;
3858 	    PL_reg_flags ^= RF_false;
3859 	    goto do_ifmatch;
3860 	case IFMATCH:
3861 	    n = 1;
3862 	    if (scan->flags) {
3863 		s = HOPBACKc(locinput, scan->flags);
3864 		if (!s)
3865 		    goto say_no;
3866 		PL_reginput = s;
3867 	    }
3868 	    else
3869 		PL_reginput = locinput;
3870 
3871 	  do_ifmatch:
3872 	    inner = NEXTOPER(NEXTOPER(scan));
3873 	    if (regmatch(inner) != n) {
3874 		if (n == 0)
3875 		    PL_reg_flags ^= RF_false;
3876 	      say_no:
3877 		if (logical) {
3878 		    logical = 0;
3879 		    sw = 0;
3880 		    goto do_longjump;
3881 		}
3882 		else
3883 		    sayNO;
3884 	    }
3885 	    if (n == 0)
3886 		PL_reg_flags ^= RF_false;
3887 	  say_yes:
3888 	    if (logical) {
3889 		logical = 0;
3890 		sw = 1;
3891 	    }
3892 	    if (OP(scan) == SUSPEND) {
3893 		locinput = PL_reginput;
3894 		nextchr = UCHARAT(locinput);
3895 	    }
3896 	    /* FALL THROUGH. */
3897 	case LONGJMP:
3898 	  do_longjump:
3899 	    next = scan + ARG(scan);
3900 	    if (next == scan)
3901 		next = NULL;
3902 	    break;
3903 	default:
3904 	    PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3905 			  PTR2UV(scan), OP(scan));
3906 	    Perl_croak(aTHX_ "regexp memory corruption");
3907 	}
3908       reenter:
3909 	scan = next;
3910     }
3911 
3912     /*
3913     * We get here only if there's trouble -- normally "case END" is
3914     * the terminating point.
3915     */
3916     Perl_croak(aTHX_ "corrupted regexp pointers");
3917     /*NOTREACHED*/
3918     sayNO;
3919 
3920 yes_loud:
3921     DEBUG_r(
3922 	PerlIO_printf(Perl_debug_log,
3923 		      "%*s  %scould match...%s\n",
3924 		      REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3925 	);
3926     goto yes;
3927 yes_final:
3928     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3929 			  PL_colors[4],PL_colors[5]));
3930 yes:
3931 #ifdef DEBUGGING
3932     PL_regindent--;
3933 #endif
3934 
3935 #if 0					/* Breaks $^R */
3936     if (unwind)
3937 	regcpblow(firstcp);
3938 #endif
3939     return 1;
3940 
3941 no:
3942     DEBUG_r(
3943 	PerlIO_printf(Perl_debug_log,
3944 		      "%*s  %sfailed...%s\n",
3945 		      REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3946 	);
3947     goto do_no;
3948 no_final:
3949 do_no:
3950     if (unwind) {
3951 	re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3952 
3953 	switch (uw->type) {
3954 	case RE_UNWIND_BRANCH:
3955 	case RE_UNWIND_BRANCHJ:
3956 	{
3957 	    re_unwind_branch_t *uwb = &(uw->branch);
3958 	    I32 lastparen = uwb->lastparen;
3959 
3960 	    REGCP_UNWIND(uwb->lastcp);
3961 	    for (n = *PL_reglastparen; n > lastparen; n--)
3962 		PL_regendp[n] = -1;
3963 	    *PL_reglastparen = n;
3964 	    scan = next = uwb->next;
3965 	    if ( !scan ||
3966 		 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3967 			      ? BRANCH : BRANCHJ) ) {		/* Failure */
3968 		unwind = uwb->prev;
3969 #ifdef DEBUGGING
3970 		PL_regindent--;
3971 #endif
3972 		goto do_no;
3973 	    }
3974 	    /* Have more choice yet.  Reuse the same uwb.  */
3975 	    /*SUPPRESS 560*/
3976 	    if ((n = (uwb->type == RE_UNWIND_BRANCH
3977 		      ? NEXT_OFF(next) : ARG(next))))
3978 		next += n;
3979 	    else
3980 		next = NULL;	/* XXXX Needn't unwinding in this case... */
3981 	    uwb->next = next;
3982 	    next = NEXTOPER(scan);
3983 	    if (uwb->type == RE_UNWIND_BRANCHJ)
3984 		next = NEXTOPER(next);
3985 	    locinput = uwb->locinput;
3986 	    nextchr = uwb->nextchr;
3987 #ifdef DEBUGGING
3988 	    PL_regindent = uwb->regindent;
3989 #endif
3990 
3991 	    goto reenter;
3992 	}
3993 	/* NOT REACHED */
3994 	default:
3995 	    Perl_croak(aTHX_ "regexp unwind memory corruption");
3996 	}
3997 	/* NOT REACHED */
3998     }
3999 #ifdef DEBUGGING
4000     PL_regindent--;
4001 #endif
4002     return 0;
4003 }
4004 
4005 /*
4006  - regrepeat - repeatedly match something simple, report how many
4007  */
4008 /*
4009  * [This routine now assumes that it will only match on things of length 1.
4010  * That was true before, but now we assume scan - reginput is the count,
4011  * rather than incrementing count on every character.  [Er, except utf8.]]
4012  */
4013 STATIC I32
4014 S_regrepeat(pTHX_ regnode *p, I32 max)
4015 {
4016     register char *scan;
4017     register I32 c;
4018     register char *loceol = PL_regeol;
4019     register I32 hardcount = 0;
4020     register bool do_utf8 = PL_reg_match_utf8;
4021 
4022     scan = PL_reginput;
4023     if (max == REG_INFTY)
4024 	max = I32_MAX;
4025     else if (max < loceol - scan)
4026       loceol = scan + max;
4027     switch (OP(p)) {
4028     case REG_ANY:
4029 	if (do_utf8) {
4030 	    loceol = PL_regeol;
4031 	    while (scan < loceol && hardcount < max && *scan != '\n') {
4032 		scan += UTF8SKIP(scan);
4033 		hardcount++;
4034 	    }
4035 	} else {
4036 	    while (scan < loceol && *scan != '\n')
4037 		scan++;
4038 	}
4039 	break;
4040     case SANY:
4041         if (do_utf8) {
4042 	    loceol = PL_regeol;
4043 	    while (scan < loceol && hardcount < max) {
4044 	        scan += UTF8SKIP(scan);
4045 		hardcount++;
4046 	    }
4047 	}
4048 	else
4049 	    scan = loceol;
4050 	break;
4051     case CANY:
4052 	scan = loceol;
4053 	break;
4054     case EXACT:		/* length of string is 1 */
4055 	c = (U8)*STRING(p);
4056 	while (scan < loceol && UCHARAT(scan) == c)
4057 	    scan++;
4058 	break;
4059     case EXACTF:	/* length of string is 1 */
4060 	c = (U8)*STRING(p);
4061 	while (scan < loceol &&
4062 	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4063 	    scan++;
4064 	break;
4065     case EXACTFL:	/* length of string is 1 */
4066 	PL_reg_flags |= RF_tainted;
4067 	c = (U8)*STRING(p);
4068 	while (scan < loceol &&
4069 	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4070 	    scan++;
4071 	break;
4072     case ANYOF:
4073 	if (do_utf8) {
4074 	    loceol = PL_regeol;
4075 	    while (hardcount < max && scan < loceol &&
4076 		   reginclass(p, (U8*)scan, 0, do_utf8)) {
4077 		scan += UTF8SKIP(scan);
4078 		hardcount++;
4079 	    }
4080 	} else {
4081 	    while (scan < loceol && REGINCLASS(p, (U8*)scan))
4082 		scan++;
4083 	}
4084 	break;
4085     case ALNUM:
4086 	if (do_utf8) {
4087 	    loceol = PL_regeol;
4088 	    LOAD_UTF8_CHARCLASS(alnum,"a");
4089 	    while (hardcount < max && scan < loceol &&
4090 		   swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4091 		scan += UTF8SKIP(scan);
4092 		hardcount++;
4093 	    }
4094 	} else {
4095 	    while (scan < loceol && isALNUM(*scan))
4096 		scan++;
4097 	}
4098 	break;
4099     case ALNUML:
4100 	PL_reg_flags |= RF_tainted;
4101 	if (do_utf8) {
4102 	    loceol = PL_regeol;
4103 	    while (hardcount < max && scan < loceol &&
4104 		   isALNUM_LC_utf8((U8*)scan)) {
4105 		scan += UTF8SKIP(scan);
4106 		hardcount++;
4107 	    }
4108 	} else {
4109 	    while (scan < loceol && isALNUM_LC(*scan))
4110 		scan++;
4111 	}
4112 	break;
4113     case NALNUM:
4114 	if (do_utf8) {
4115 	    loceol = PL_regeol;
4116 	    LOAD_UTF8_CHARCLASS(alnum,"a");
4117 	    while (hardcount < max && scan < loceol &&
4118 		   !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4119 		scan += UTF8SKIP(scan);
4120 		hardcount++;
4121 	    }
4122 	} else {
4123 	    while (scan < loceol && !isALNUM(*scan))
4124 		scan++;
4125 	}
4126 	break;
4127     case NALNUML:
4128 	PL_reg_flags |= RF_tainted;
4129 	if (do_utf8) {
4130 	    loceol = PL_regeol;
4131 	    while (hardcount < max && scan < loceol &&
4132 		   !isALNUM_LC_utf8((U8*)scan)) {
4133 		scan += UTF8SKIP(scan);
4134 		hardcount++;
4135 	    }
4136 	} else {
4137 	    while (scan < loceol && !isALNUM_LC(*scan))
4138 		scan++;
4139 	}
4140 	break;
4141     case SPACE:
4142 	if (do_utf8) {
4143 	    loceol = PL_regeol;
4144 	    LOAD_UTF8_CHARCLASS(space," ");
4145 	    while (hardcount < max && scan < loceol &&
4146 		   (*scan == ' ' ||
4147 		    swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4148 		scan += UTF8SKIP(scan);
4149 		hardcount++;
4150 	    }
4151 	} else {
4152 	    while (scan < loceol && isSPACE(*scan))
4153 		scan++;
4154 	}
4155 	break;
4156     case SPACEL:
4157 	PL_reg_flags |= RF_tainted;
4158 	if (do_utf8) {
4159 	    loceol = PL_regeol;
4160 	    while (hardcount < max && scan < loceol &&
4161 		   (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4162 		scan += UTF8SKIP(scan);
4163 		hardcount++;
4164 	    }
4165 	} else {
4166 	    while (scan < loceol && isSPACE_LC(*scan))
4167 		scan++;
4168 	}
4169 	break;
4170     case NSPACE:
4171 	if (do_utf8) {
4172 	    loceol = PL_regeol;
4173 	    LOAD_UTF8_CHARCLASS(space," ");
4174 	    while (hardcount < max && scan < loceol &&
4175 		   !(*scan == ' ' ||
4176 		     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4177 		scan += UTF8SKIP(scan);
4178 		hardcount++;
4179 	    }
4180 	} else {
4181 	    while (scan < loceol && !isSPACE(*scan))
4182 		scan++;
4183 	    break;
4184 	}
4185     case NSPACEL:
4186 	PL_reg_flags |= RF_tainted;
4187 	if (do_utf8) {
4188 	    loceol = PL_regeol;
4189 	    while (hardcount < max && scan < loceol &&
4190 		   !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4191 		scan += UTF8SKIP(scan);
4192 		hardcount++;
4193 	    }
4194 	} else {
4195 	    while (scan < loceol && !isSPACE_LC(*scan))
4196 		scan++;
4197 	}
4198 	break;
4199     case DIGIT:
4200 	if (do_utf8) {
4201 	    loceol = PL_regeol;
4202 	    LOAD_UTF8_CHARCLASS(digit,"0");
4203 	    while (hardcount < max && scan < loceol &&
4204 		   swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4205 		scan += UTF8SKIP(scan);
4206 		hardcount++;
4207 	    }
4208 	} else {
4209 	    while (scan < loceol && isDIGIT(*scan))
4210 		scan++;
4211 	}
4212 	break;
4213     case NDIGIT:
4214 	if (do_utf8) {
4215 	    loceol = PL_regeol;
4216 	    LOAD_UTF8_CHARCLASS(digit,"0");
4217 	    while (hardcount < max && scan < loceol &&
4218 		   !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4219 		scan += UTF8SKIP(scan);
4220 		hardcount++;
4221 	    }
4222 	} else {
4223 	    while (scan < loceol && !isDIGIT(*scan))
4224 		scan++;
4225 	}
4226 	break;
4227     default:		/* Called on something of 0 width. */
4228 	break;		/* So match right here or not at all. */
4229     }
4230 
4231     if (hardcount)
4232 	c = hardcount;
4233     else
4234 	c = scan - PL_reginput;
4235     PL_reginput = scan;
4236 
4237     DEBUG_r(
4238 	{
4239 		SV *prop = sv_newmortal();
4240 
4241 		regprop(prop, p);
4242 		PerlIO_printf(Perl_debug_log,
4243 			      "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
4244 			      REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
4245 	});
4246 
4247     return(c);
4248 }
4249 
4250 /*
4251  - regrepeat_hard - repeatedly match something, report total lenth and length
4252  *
4253  * The repeater is supposed to have constant length.
4254  */
4255 
4256 STATIC I32
4257 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
4258 {
4259     register char *scan = Nullch;
4260     register char *start;
4261     register char *loceol = PL_regeol;
4262     I32 l = 0;
4263     I32 count = 0, res = 1;
4264 
4265     if (!max)
4266 	return 0;
4267 
4268     start = PL_reginput;
4269     if (PL_reg_match_utf8) {
4270 	while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4271 	    if (!count++) {
4272 		l = 0;
4273 		while (start < PL_reginput) {
4274 		    l++;
4275 		    start += UTF8SKIP(start);
4276 		}
4277 		*lp = l;
4278 		if (l == 0)
4279 		    return max;
4280 	    }
4281 	    if (count == max)
4282 		return count;
4283 	}
4284     }
4285     else {
4286 	while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4287 	    if (!count++) {
4288 		*lp = l = PL_reginput - start;
4289 		if (max != REG_INFTY && l*max < loceol - scan)
4290 		    loceol = scan + l*max;
4291 		if (l == 0)
4292 		    return max;
4293 	    }
4294 	}
4295     }
4296     if (!res)
4297 	PL_reginput = scan;
4298 
4299     return count;
4300 }
4301 
4302 /*
4303 - regclass_swash - prepare the utf8 swash
4304 */
4305 
4306 SV *
4307 Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
4308 {
4309     SV *sw  = NULL;
4310     SV *si  = NULL;
4311     SV *alt = NULL;
4312 
4313     if (PL_regdata && PL_regdata->count) {
4314 	U32 n = ARG(node);
4315 
4316 	if (PL_regdata->what[n] == 's') {
4317 	    SV *rv = (SV*)PL_regdata->data[n];
4318 	    AV *av = (AV*)SvRV((SV*)rv);
4319 	    SV **ary = AvARRAY(av);
4320 	    SV **a, **b;
4321 
4322 	    /* See the end of regcomp.c:S_reglass() for
4323 	     * documentation of these array elements. */
4324 
4325 	    si = *ary;
4326 	    a  = SvTYPE(ary[1]) == SVt_RV   ? &ary[1] : 0;
4327 	    b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4328 
4329 	    if (a)
4330 		sw = *a;
4331 	    else if (si && doinit) {
4332 		sw = swash_init("utf8", "", si, 1, 0);
4333 		(void)av_store(av, 1, sw);
4334 	    }
4335 	    if (b)
4336 	        alt = *b;
4337 	}
4338     }
4339 
4340     if (listsvp)
4341 	*listsvp = si;
4342     if (altsvp)
4343 	*altsvp  = alt;
4344 
4345     return sw;
4346 }
4347 
4348 /*
4349  - reginclass - determine if a character falls into a character class
4350 
4351   The n is the ANYOF regnode, the p is the target string, lenp
4352   is pointer to the maximum length of how far to go in the p
4353   (if the lenp is zero, UTF8SKIP(p) is used),
4354   do_utf8 tells whether the target string is in UTF-8.
4355 
4356  */
4357 
4358 STATIC bool
4359 S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
4360 {
4361     char flags = ANYOF_FLAGS(n);
4362     bool match = FALSE;
4363     UV c = *p;
4364     STRLEN len = 0;
4365     STRLEN plen;
4366 
4367     if (do_utf8 && !UTF8_IS_INVARIANT(c))
4368 	 c = utf8n_to_uvchr(p, UTF8_MAXLEN, &len,
4369 			    ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
4370 
4371     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
4372     if (do_utf8 || (flags & ANYOF_UNICODE)) {
4373         if (lenp)
4374 	    *lenp = 0;
4375 	if (do_utf8 && !ANYOF_RUNTIME(n)) {
4376 	    if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4377 		match = TRUE;
4378 	}
4379 	if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
4380 	    match = TRUE;
4381 	if (!match) {
4382 	    AV *av;
4383 	    SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
4384 
4385 	    if (sw) {
4386 		if (swash_fetch(sw, p, do_utf8))
4387 		    match = TRUE;
4388 		else if (flags & ANYOF_FOLD) {
4389 		    if (!match && lenp && av) {
4390 		        I32 i;
4391 
4392 			for (i = 0; i <= av_len(av); i++) {
4393 			    SV* sv = *av_fetch(av, i, FALSE);
4394 			    STRLEN len;
4395 			    char *s = SvPV(sv, len);
4396 
4397 			    if (len <= plen && memEQ(s, (char*)p, len)) {
4398 			        *lenp = len;
4399 				match = TRUE;
4400 				break;
4401 			    }
4402 			}
4403 		    }
4404 		    if (!match) {
4405 		        U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
4406 			STRLEN tmplen;
4407 
4408 		        to_utf8_fold(p, tmpbuf, &tmplen);
4409 			if (swash_fetch(sw, tmpbuf, do_utf8))
4410 			    match = TRUE;
4411 		    }
4412 		}
4413 	    }
4414 	}
4415 	if (match && lenp && *lenp == 0)
4416 	    *lenp = UNISKIP(NATIVE_TO_UNI(c));
4417     }
4418     if (!match && c < 256) {
4419 	if (ANYOF_BITMAP_TEST(n, c))
4420 	    match = TRUE;
4421 	else if (flags & ANYOF_FOLD) {
4422 	    U8 f;
4423 
4424 	    if (flags & ANYOF_LOCALE) {
4425 		PL_reg_flags |= RF_tainted;
4426 		f = PL_fold_locale[c];
4427 	    }
4428 	    else
4429 		f = PL_fold[c];
4430 	    if (f != c && ANYOF_BITMAP_TEST(n, f))
4431 		match = TRUE;
4432 	}
4433 
4434 	if (!match && (flags & ANYOF_CLASS)) {
4435 	    PL_reg_flags |= RF_tainted;
4436 	    if (
4437 		(ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
4438 		(ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
4439 		(ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
4440 		(ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
4441 		(ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
4442 		(ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
4443 		(ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
4444 		(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4445 		(ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
4446 		(ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
4447 		(ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
4448 		(ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
4449 		(ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
4450 		(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
4451 		(ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
4452 		(ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
4453 		(ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
4454 		(ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
4455 		(ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
4456 		(ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
4457 		(ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
4458 		(ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
4459 		(ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
4460 		(ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
4461 		(ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
4462 		(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
4463 		(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
4464 		(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
4465 		(ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
4466 		(ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
4467 		) /* How's that for a conditional? */
4468 	    {
4469 		match = TRUE;
4470 	    }
4471 	}
4472     }
4473 
4474     return (flags & ANYOF_INVERT) ? !match : match;
4475 }
4476 
4477 STATIC U8 *
4478 S_reghop(pTHX_ U8 *s, I32 off)
4479 {
4480     return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4481 }
4482 
4483 STATIC U8 *
4484 S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
4485 {
4486     if (off >= 0) {
4487 	while (off-- && s < lim) {
4488 	    /* XXX could check well-formedness here */
4489 	    s += UTF8SKIP(s);
4490 	}
4491     }
4492     else {
4493 	while (off++) {
4494 	    if (s > lim) {
4495 		s--;
4496 		if (UTF8_IS_CONTINUED(*s)) {
4497 		    while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4498 			s--;
4499 		}
4500 		/* XXX could check well-formedness here */
4501 	    }
4502 	}
4503     }
4504     return s;
4505 }
4506 
4507 STATIC U8 *
4508 S_reghopmaybe(pTHX_ U8 *s, I32 off)
4509 {
4510     return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4511 }
4512 
4513 STATIC U8 *
4514 S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
4515 {
4516     if (off >= 0) {
4517 	while (off-- && s < lim) {
4518 	    /* XXX could check well-formedness here */
4519 	    s += UTF8SKIP(s);
4520 	}
4521 	if (off >= 0)
4522 	    return 0;
4523     }
4524     else {
4525 	while (off++) {
4526 	    if (s > lim) {
4527 		s--;
4528 		if (UTF8_IS_CONTINUED(*s)) {
4529 		    while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4530 			s--;
4531 		}
4532 		/* XXX could check well-formedness here */
4533 	    }
4534 	    else
4535 		break;
4536 	}
4537 	if (off <= 0)
4538 	    return 0;
4539     }
4540     return s;
4541 }
4542 
4543 static void
4544 restore_pos(pTHX_ void *arg)
4545 {
4546     if (PL_reg_eval_set) {
4547 	if (PL_reg_oldsaved) {
4548 	    PL_reg_re->subbeg = PL_reg_oldsaved;
4549 	    PL_reg_re->sublen = PL_reg_oldsavedlen;
4550 	    RX_MATCH_COPIED_on(PL_reg_re);
4551 	}
4552 	PL_reg_magic->mg_len = PL_reg_oldpos;
4553 	PL_reg_eval_set = 0;
4554 	PL_curpm = PL_reg_oldcurpm;
4555     }
4556 }
4557 
4558 STATIC void
4559 S_to_utf8_substr(pTHX_ register regexp *prog)
4560 {
4561     SV* sv;
4562     if (prog->float_substr && !prog->float_utf8) {
4563 	prog->float_utf8 = sv = NEWSV(117, 0);
4564 	SvSetSV(sv, prog->float_substr);
4565 	sv_utf8_upgrade(sv);
4566 	if (SvTAIL(prog->float_substr))
4567 	    SvTAIL_on(sv);
4568 	if (prog->float_substr == prog->check_substr)
4569 	    prog->check_utf8 = sv;
4570     }
4571     if (prog->anchored_substr && !prog->anchored_utf8) {
4572 	prog->anchored_utf8 = sv = NEWSV(118, 0);
4573 	SvSetSV(sv, prog->anchored_substr);
4574 	sv_utf8_upgrade(sv);
4575 	if (SvTAIL(prog->anchored_substr))
4576 	    SvTAIL_on(sv);
4577 	if (prog->anchored_substr == prog->check_substr)
4578 	    prog->check_utf8 = sv;
4579     }
4580 }
4581 
4582 STATIC void
4583 S_to_byte_substr(pTHX_ register regexp *prog)
4584 {
4585     SV* sv;
4586     if (prog->float_utf8 && !prog->float_substr) {
4587 	prog->float_substr = sv = NEWSV(117, 0);
4588 	SvSetSV(sv, prog->float_utf8);
4589 	if (sv_utf8_downgrade(sv, TRUE)) {
4590 	    if (SvTAIL(prog->float_utf8))
4591 		SvTAIL_on(sv);
4592 	} else {
4593 	    SvREFCNT_dec(sv);
4594 	    prog->float_substr = sv = &PL_sv_undef;
4595 	}
4596 	if (prog->float_utf8 == prog->check_utf8)
4597 	    prog->check_substr = sv;
4598     }
4599     if (prog->anchored_utf8 && !prog->anchored_substr) {
4600 	prog->anchored_substr = sv = NEWSV(118, 0);
4601 	SvSetSV(sv, prog->anchored_utf8);
4602 	if (sv_utf8_downgrade(sv, TRUE)) {
4603 	    if (SvTAIL(prog->anchored_utf8))
4604 		SvTAIL_on(sv);
4605 	} else {
4606 	    SvREFCNT_dec(sv);
4607 	    prog->anchored_substr = sv = &PL_sv_undef;
4608 	}
4609 	if (prog->anchored_utf8 == prog->check_utf8)
4610 	    prog->check_substr = sv;
4611     }
4612 }
4613