xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/regexec.c (revision 0:68f95e015346)
1 /*    regexec.c
2  */
3 
4 /*
5  * "One Ring to rule them all, One Ring to find them..."
6  */
7 
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9  * confused with the original package (see point 3 below).  Thanks, Henry!
10  */
11 
12 /* Additional note: this code is very heavily munged from Henry's version
13  * in places.  In some spots I've traded clarity for efficiency, so don't
14  * blame Henry for some of the lack of readability.
15  */
16 
17 /* The names of the functions have been changed from regcomp and
18  * regexec to  pregcomp and pregexec in order to avoid conflicts
19  * with the POSIX routines of the same names.
20 */
21 
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 #  ifndef PERL_IN_XSUB_RE
25 #    define PERL_IN_XSUB_RE
26 #  endif
27 /* need access to debugger hooks */
28 #  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
29 #    define DEBUGGING
30 #  endif
31 #endif
32 
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 #  define Perl_regexec_flags my_regexec
36 #  define Perl_regdump my_regdump
37 #  define Perl_regprop my_regprop
38 #  define Perl_re_intuit_start my_re_intuit_start
39 /* *These* symbols are masked to allow static link. */
40 #  define Perl_pregexec my_pregexec
41 #  define Perl_reginitcolors my_reginitcolors
42 #  define Perl_regclass_swash my_regclass_swash
43 
44 #  define PERL_NO_GET_CONTEXT
45 #endif
46 
47 /*SUPPRESS 112*/
48 /*
49  * pregcomp and pregexec -- regsub and regerror are not used in perl
50  *
51  *	Copyright (c) 1986 by University of Toronto.
52  *	Written by Henry Spencer.  Not derived from licensed software.
53  *
54  *	Permission is granted to anyone to use this software for any
55  *	purpose on any computer system, and to redistribute it freely,
56  *	subject to the following restrictions:
57  *
58  *	1. The author is not responsible for the consequences of use of
59  *		this software, no matter how awful, even if they arise
60  *		from defects in it.
61  *
62  *	2. The origin of this software must not be misrepresented, either
63  *		by explicit claim or by omission.
64  *
65  *	3. Altered versions must be plainly marked as such, and must not
66  *		be misrepresented as being the original software.
67  *
68  ****    Alterations to Henry's code are...
69  ****
70  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
71  ****    2000, 2001, 2002, 2003, 2004, by Larry Wall and others
72  ****
73  ****    You may distribute under the terms of either the GNU General Public
74  ****    License or the Artistic License, as specified in the README file.
75  *
76  * Beware that some of this code is subtly aware of the way operator
77  * precedence is structured in regular expressions.  Serious changes in
78  * regular-expression syntax might require a total rethink.
79  */
80 #include "EXTERN.h"
81 #define PERL_IN_REGEXEC_C
82 #include "perl.h"
83 
84 #include "regcomp.h"
85 
86 #define RF_tainted	1		/* tainted information used? */
87 #define RF_warned	2		/* warned about big count? */
88 #define RF_evaled	4		/* Did an EVAL with setting? */
89 #define RF_utf8		8		/* String contains multibyte chars? */
90 #define RF_false	16		/* odd number of nested negatives */
91 
92 #define UTF ((PL_reg_flags & RF_utf8) != 0)
93 
94 #define RS_init		1		/* eval environment created */
95 #define RS_set		2		/* replsv value is set */
96 
97 #ifndef STATIC
98 #define	STATIC	static
99 #endif
100 
101 #define REGINCLASS(p,c)  (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
102 
103 /*
104  * Forwards.
105  */
106 
107 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
108 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
109 
110 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
111 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
112 #define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
113 #define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
114 #define HOPc(pos,off) ((char*)HOP(pos,off))
115 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
116 
117 #define HOPBACK(pos, off) (		\
118     (PL_reg_match_utf8)			\
119 	? reghopmaybe((U8*)pos, -off)	\
120     : (pos - off >= PL_bostr)		\
121 	? (U8*)(pos - off)		\
122     : (U8*)NULL				\
123 )
124 #define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
125 
126 #define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
127 #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
128 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
129 #define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
130 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
131 #define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
132 
133 #define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) { ENTER; save_re_context(); (void)CAT2(is_utf8_, a)((U8*)b); LEAVE; } } STMT_END
134 
135 /* for use after a quantifier and before an EXACT-like node -- japhy */
136 #define JUMPABLE(rn) ( \
137     OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
138     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
139     OP(rn) == PLUS || OP(rn) == MINMOD || \
140     (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
141 )
142 
143 #define HAS_TEXT(rn) ( \
144     PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
145 )
146 
147 /*
148   Search for mandatory following text node; for lookahead, the text must
149   follow but for lookbehind (rn->flags != 0) we skip to the next step.
150 */
151 #define FIND_NEXT_IMPT(rn) STMT_START { \
152     while (JUMPABLE(rn)) \
153 	if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
154 	    rn = NEXTOPER(NEXTOPER(rn)); \
155 	else if (OP(rn) == PLUS) \
156 	    rn = NEXTOPER(rn); \
157 	else if (OP(rn) == IFMATCH) \
158 	    rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
159 	else rn += NEXT_OFF(rn); \
160 } STMT_END
161 
162 static void restore_pos(pTHX_ void *arg);
163 
164 STATIC CHECKPOINT
S_regcppush(pTHX_ I32 parenfloor)165 S_regcppush(pTHX_ I32 parenfloor)
166 {
167     int retval = PL_savestack_ix;
168 #define REGCP_PAREN_ELEMS 4
169     int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
170     int p;
171 
172     if (paren_elems_to_push < 0)
173 	Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
174 
175 #define REGCP_OTHER_ELEMS 6
176     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
177     for (p = PL_regsize; p > parenfloor; p--) {
178 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
179 	SSPUSHINT(PL_regendp[p]);
180 	SSPUSHINT(PL_regstartp[p]);
181 	SSPUSHPTR(PL_reg_start_tmp[p]);
182 	SSPUSHINT(p);
183     }
184 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
185     SSPUSHINT(PL_regsize);
186     SSPUSHINT(*PL_reglastparen);
187     SSPUSHINT(*PL_reglastcloseparen);
188     SSPUSHPTR(PL_reginput);
189 #define REGCP_FRAME_ELEMS 2
190 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
191  * are needed for the regexp context stack bookkeeping. */
192     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
193     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
194 
195     return retval;
196 }
197 
198 /* These are needed since we do not localize EVAL nodes: */
199 #  define REGCP_SET(cp)  DEBUG_r(PerlIO_printf(Perl_debug_log,		\
200 			     "  Setting an EVAL scope, savestack=%"IVdf"\n",	\
201 			     (IV)PL_savestack_ix)); cp = PL_savestack_ix
202 
203 #  define REGCP_UNWIND(cp)  DEBUG_r(cp != PL_savestack_ix ?		\
204 				PerlIO_printf(Perl_debug_log,		\
205 				"  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
206 				(IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
207 
208 STATIC char *
S_regcppop(pTHX)209 S_regcppop(pTHX)
210 {
211     I32 i;
212     U32 paren = 0;
213     char *input;
214     I32 tmps;
215 
216     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
217     i = SSPOPINT;
218     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
219     i = SSPOPINT; /* Parentheses elements to pop. */
220     input = (char *) SSPOPPTR;
221     *PL_reglastcloseparen = SSPOPINT;
222     *PL_reglastparen = SSPOPINT;
223     PL_regsize = SSPOPINT;
224 
225     /* Now restore the parentheses context. */
226     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
227 	 i > 0; i -= REGCP_PAREN_ELEMS) {
228 	paren = (U32)SSPOPINT;
229 	PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
230 	PL_regstartp[paren] = SSPOPINT;
231 	tmps = SSPOPINT;
232 	if (paren <= *PL_reglastparen)
233 	    PL_regendp[paren] = tmps;
234 	DEBUG_r(
235 	    PerlIO_printf(Perl_debug_log,
236 			  "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
237 			  (UV)paren, (IV)PL_regstartp[paren],
238 			  (IV)(PL_reg_start_tmp[paren] - PL_bostr),
239 			  (IV)PL_regendp[paren],
240 			  (paren > *PL_reglastparen ? "(no)" : ""));
241 	);
242     }
243     DEBUG_r(
244 	if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
245 	    PerlIO_printf(Perl_debug_log,
246 			  "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
247 			  (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
248 	}
249     );
250 #if 1
251     /* It would seem that the similar code in regtry()
252      * already takes care of this, and in fact it is in
253      * a better location to since this code can #if 0-ed out
254      * but the code in regtry() is needed or otherwise tests
255      * requiring null fields (pat.t#187 and split.t#{13,14}
256      * (as of patchlevel 7877)  will fail.  Then again,
257      * this code seems to be necessary or otherwise
258      * building DynaLoader will fail:
259      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
260      * --jhi */
261     for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
262 	if ((I32)paren > PL_regsize)
263 	    PL_regstartp[paren] = -1;
264 	PL_regendp[paren] = -1;
265     }
266 #endif
267     return input;
268 }
269 
270 STATIC char *
S_regcp_set_to(pTHX_ I32 ss)271 S_regcp_set_to(pTHX_ I32 ss)
272 {
273     I32 tmp = PL_savestack_ix;
274 
275     PL_savestack_ix = ss;
276     regcppop();
277     PL_savestack_ix = tmp;
278     return Nullch;
279 }
280 
281 typedef struct re_cc_state
282 {
283     I32 ss;
284     regnode *node;
285     struct re_cc_state *prev;
286     CURCUR *cc;
287     regexp *re;
288 } re_cc_state;
289 
290 #define regcpblow(cp) LEAVE_SCOPE(cp)	/* Ignores regcppush()ed data. */
291 
292 #define TRYPAREN(paren, n, input) {				\
293     if (paren) {						\
294 	if (n) {						\
295 	    PL_regstartp[paren] = HOPc(input, -1) - PL_bostr;	\
296 	    PL_regendp[paren] = input - PL_bostr;		\
297 	}							\
298 	else							\
299 	    PL_regendp[paren] = -1;				\
300     }								\
301     if (regmatch(next))						\
302 	sayYES;							\
303     if (paren && n)						\
304 	PL_regendp[paren] = -1;					\
305 }
306 
307 
308 /*
309  * pregexec and friends
310  */
311 
312 /*
313  - pregexec - match a regexp against a string
314  */
315 I32
Perl_pregexec(pTHX_ register regexp * prog,char * stringarg,register char * strend,char * strbeg,I32 minend,SV * screamer,U32 nosave)316 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
317 	 char *strbeg, I32 minend, SV *screamer, U32 nosave)
318 /* strend: pointer to null at end of string */
319 /* strbeg: real beginning of string */
320 /* minend: end of match must be >=minend after stringarg. */
321 /* nosave: For optimizations. */
322 {
323     return
324 	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
325 		      nosave ? 0 : REXEC_COPY_STR);
326 }
327 
328 STATIC void
S_cache_re(pTHX_ regexp * prog)329 S_cache_re(pTHX_ regexp *prog)
330 {
331     PL_regprecomp = prog->precomp;		/* Needed for FAIL. */
332 #ifdef DEBUGGING
333     PL_regprogram = prog->program;
334 #endif
335     PL_regnpar = prog->nparens;
336     PL_regdata = prog->data;
337     PL_reg_re = prog;
338 }
339 
340 /*
341  * Need to implement the following flags for reg_anch:
342  *
343  * USE_INTUIT_NOML		- Useful to call re_intuit_start() first
344  * USE_INTUIT_ML
345  * INTUIT_AUTORITATIVE_NOML	- Can trust a positive answer
346  * INTUIT_AUTORITATIVE_ML
347  * INTUIT_ONCE_NOML		- Intuit can match in one location only.
348  * INTUIT_ONCE_ML
349  *
350  * Another flag for this function: SECOND_TIME (so that float substrs
351  * with giant delta may be not rechecked).
352  */
353 
354 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
355 
356 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
357    Otherwise, only SvCUR(sv) is used to get strbeg. */
358 
359 /* XXXX We assume that strpos is strbeg unless sv. */
360 
361 /* XXXX Some places assume that there is a fixed substring.
362 	An update may be needed if optimizer marks as "INTUITable"
363 	RExen without fixed substrings.  Similarly, it is assumed that
364 	lengths of all the strings are no more than minlen, thus they
365 	cannot come from lookahead.
366 	(Or minlen should take into account lookahead.) */
367 
368 /* A failure to find a constant substring means that there is no need to make
369    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
370    finding a substring too deep into the string means that less calls to
371    regtry() should be needed.
372 
373    REx compiler's optimizer found 4 possible hints:
374 	a) Anchored substring;
375 	b) Fixed substring;
376 	c) Whether we are anchored (beginning-of-line or \G);
377 	d) First node (of those at offset 0) which may distingush positions;
378    We use a)b)d) and multiline-part of c), and try to find a position in the
379    string which does not contradict any of them.
380  */
381 
382 /* Most of decisions we do here should have been done at compile time.
383    The nodes of the REx which we used for the search should have been
384    deleted from the finite automaton. */
385 
386 char *
Perl_re_intuit_start(pTHX_ regexp * prog,SV * sv,char * strpos,char * strend,U32 flags,re_scream_pos_data * data)387 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
388 		     char *strend, U32 flags, re_scream_pos_data *data)
389 {
390     register I32 start_shift = 0;
391     /* Should be nonnegative! */
392     register I32 end_shift   = 0;
393     register char *s;
394     register SV *check;
395     char *strbeg;
396     char *t;
397     int do_utf8 = sv ? SvUTF8(sv) : 0;	/* if no sv we have to assume bytes */
398     I32 ml_anch;
399     register char *other_last = Nullch;	/* other substr checked before this */
400     char *check_at = Nullch;		/* check substr found at this pos */
401 #ifdef DEBUGGING
402     char *i_strpos = strpos;
403     SV *dsv = PERL_DEBUG_PAD_ZERO(0);
404 #endif
405     RX_MATCH_UTF8_set(prog,do_utf8);
406 
407     if (prog->reganch & ROPT_UTF8) {
408 	DEBUG_r(PerlIO_printf(Perl_debug_log,
409 			      "UTF-8 regex...\n"));
410 	PL_reg_flags |= RF_utf8;
411     }
412 
413     DEBUG_r({
414 	 char *s   = PL_reg_match_utf8 ?
415 	                 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
416 	                 strpos;
417 	 int   len = PL_reg_match_utf8 ?
418 	                 strlen(s) : strend - strpos;
419 	 if (!PL_colorset)
420 	      reginitcolors();
421 	 if (PL_reg_match_utf8)
422 	     DEBUG_r(PerlIO_printf(Perl_debug_log,
423 				   "UTF-8 target...\n"));
424 	 PerlIO_printf(Perl_debug_log,
425 		       "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
426 		       PL_colors[4],PL_colors[5],PL_colors[0],
427 		       prog->precomp,
428 		       PL_colors[1],
429 		       (strlen(prog->precomp) > 60 ? "..." : ""),
430 		       PL_colors[0],
431 		       (int)(len > 60 ? 60 : len),
432 		       s, PL_colors[1],
433 		       (len > 60 ? "..." : "")
434 	      );
435     });
436 
437     /* CHR_DIST() would be more correct here but it makes things slow. */
438     if (prog->minlen > strend - strpos) {
439 	DEBUG_r(PerlIO_printf(Perl_debug_log,
440 			      "String too short... [re_intuit_start]\n"));
441 	goto fail;
442     }
443     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
444     PL_regeol = strend;
445     if (do_utf8) {
446 	if (!prog->check_utf8 && prog->check_substr)
447 	    to_utf8_substr(prog);
448 	check = prog->check_utf8;
449     } else {
450 	if (!prog->check_substr && prog->check_utf8)
451 	    to_byte_substr(prog);
452 	check = prog->check_substr;
453     }
454    if (check == &PL_sv_undef) {
455 	DEBUG_r(PerlIO_printf(Perl_debug_log,
456 		"Non-utf string cannot match utf check string\n"));
457 	goto fail;
458     }
459     if (prog->reganch & ROPT_ANCH) {	/* Match at beg-of-str or after \n */
460 	ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
461 		     || ( (prog->reganch & ROPT_ANCH_BOL)
462 			  && !PL_multiline ) );	/* Check after \n? */
463 
464 	if (!ml_anch) {
465 	  if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
466 				  | ROPT_IMPLICIT)) /* not a real BOL */
467 	       /* SvCUR is not set on references: SvRV and SvPVX overlap */
468 	       && sv && !SvROK(sv)
469 	       && (strpos != strbeg)) {
470 	      DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
471 	      goto fail;
472 	  }
473 	  if (prog->check_offset_min == prog->check_offset_max &&
474 	      !(prog->reganch & ROPT_CANY_SEEN)) {
475 	    /* Substring at constant offset from beg-of-str... */
476 	    I32 slen;
477 
478 	    s = HOP3c(strpos, prog->check_offset_min, strend);
479 	    if (SvTAIL(check)) {
480 		slen = SvCUR(check);	/* >= 1 */
481 
482 		if ( strend - s > slen || strend - s < slen - 1
483 		     || (strend - s == slen && strend[-1] != '\n')) {
484 		    DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
485 		    goto fail_finish;
486 		}
487 		/* Now should match s[0..slen-2] */
488 		slen--;
489 		if (slen && (*SvPVX(check) != *s
490 			     || (slen > 1
491 				 && memNE(SvPVX(check), s, slen)))) {
492 		  report_neq:
493 		    DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
494 		    goto fail_finish;
495 		}
496 	    }
497 	    else if (*SvPVX(check) != *s
498 		     || ((slen = SvCUR(check)) > 1
499 			 && memNE(SvPVX(check), s, slen)))
500 		goto report_neq;
501 	    goto success_at_start;
502 	  }
503 	}
504 	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
505 	s = strpos;
506 	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
507 	end_shift = prog->minlen - start_shift -
508 	    CHR_SVLEN(check) + (SvTAIL(check) != 0);
509 	if (!ml_anch) {
510 	    I32 end = prog->check_offset_max + CHR_SVLEN(check)
511 					 - (SvTAIL(check) != 0);
512 	    I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
513 
514 	    if (end_shift < eshift)
515 		end_shift = eshift;
516 	}
517     }
518     else {				/* Can match at random position */
519 	ml_anch = 0;
520 	s = strpos;
521 	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
522 	/* Should be nonnegative! */
523 	end_shift = prog->minlen - start_shift -
524 	    CHR_SVLEN(check) + (SvTAIL(check) != 0);
525     }
526 
527 #ifdef DEBUGGING	/* 7/99: reports of failure (with the older version) */
528     if (end_shift < 0)
529 	Perl_croak(aTHX_ "panic: end_shift");
530 #endif
531 
532   restart:
533     /* Find a possible match in the region s..strend by looking for
534        the "check" substring in the region corrected by start/end_shift. */
535     if (flags & REXEC_SCREAM) {
536 	I32 p = -1;			/* Internal iterator of scream. */
537 	I32 *pp = data ? data->scream_pos : &p;
538 
539 	if (PL_screamfirst[BmRARE(check)] >= 0
540 	    || ( BmRARE(check) == '\n'
541 		 && (BmPREVIOUS(check) == SvCUR(check) - 1)
542 		 && SvTAIL(check) ))
543 	    s = screaminstr(sv, check,
544 			    start_shift + (s - strbeg), end_shift, pp, 0);
545 	else
546 	    goto fail_finish;
547 	/* we may be pointing at the wrong string */
548 	if (s && RX_MATCH_COPIED(prog))
549 	    s = strbeg + (s - SvPVX(sv));
550 	if (data)
551 	    *data->scream_olds = s;
552     }
553     else if (prog->reganch & ROPT_CANY_SEEN)
554 	s = fbm_instr((U8*)(s + start_shift),
555 		      (U8*)(strend - end_shift),
556 		      check, PL_multiline ? FBMrf_MULTILINE : 0);
557     else
558 	s = fbm_instr(HOP3(s, start_shift, strend),
559 		      HOP3(strend, -end_shift, strbeg),
560 		      check, PL_multiline ? FBMrf_MULTILINE : 0);
561 
562     /* Update the count-of-usability, remove useless subpatterns,
563 	unshift s.  */
564 
565     DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
566 			  (s ? "Found" : "Did not find"),
567 			  (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
568 			  PL_colors[0],
569 			  (int)(SvCUR(check) - (SvTAIL(check)!=0)),
570 			  SvPVX(check),
571 			  PL_colors[1], (SvTAIL(check) ? "$" : ""),
572 			  (s ? " at offset " : "...\n") ) );
573 
574     if (!s)
575 	goto fail_finish;
576 
577     check_at = s;
578 
579     /* Finish the diagnostic message */
580     DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
581 
582     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
583        Start with the other substr.
584        XXXX no SCREAM optimization yet - and a very coarse implementation
585        XXXX /ttx+/ results in anchored=`ttx', floating=`x'.  floating will
586 		*always* match.  Probably should be marked during compile...
587        Probably it is right to do no SCREAM here...
588      */
589 
590     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
591 	/* Take into account the "other" substring. */
592 	/* XXXX May be hopelessly wrong for UTF... */
593 	if (!other_last)
594 	    other_last = strpos;
595 	if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
596 	  do_other_anchored:
597 	    {
598 		char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
599 		char *s1 = s;
600 		SV* must;
601 
602 		t = s - prog->check_offset_max;
603 		if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
604 		    && (!do_utf8
605 			|| ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
606 			    && t > strpos)))
607 		    /* EMPTY */;
608 		else
609 		    t = strpos;
610 		t = HOP3c(t, prog->anchored_offset, strend);
611 		if (t < other_last)	/* These positions already checked */
612 		    t = other_last;
613 		last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
614 		if (last < last1)
615 		    last1 = last;
616  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
617 		/* On end-of-str: see comment below. */
618 		must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
619 		if (must == &PL_sv_undef) {
620 		    s = (char*)NULL;
621 		    DEBUG_r(must = prog->anchored_utf8);	/* for debug */
622 		}
623 		else
624 		    s = fbm_instr(
625 			(unsigned char*)t,
626 			HOP3(HOP3(last1, prog->anchored_offset, strend)
627 				+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
628 			must,
629 			PL_multiline ? FBMrf_MULTILINE : 0
630 		    );
631 		DEBUG_r(PerlIO_printf(Perl_debug_log,
632 			"%s anchored substr `%s%.*s%s'%s",
633 			(s ? "Found" : "Contradicts"),
634 			PL_colors[0],
635 			  (int)(SvCUR(must)
636 			  - (SvTAIL(must)!=0)),
637 			  SvPVX(must),
638 			  PL_colors[1], (SvTAIL(must) ? "$" : "")));
639 		if (!s) {
640 		    if (last1 >= last2) {
641 			DEBUG_r(PerlIO_printf(Perl_debug_log,
642 						", giving up...\n"));
643 			goto fail_finish;
644 		    }
645 		    DEBUG_r(PerlIO_printf(Perl_debug_log,
646 			", trying floating at offset %ld...\n",
647 			(long)(HOP3c(s1, 1, strend) - i_strpos)));
648 		    other_last = HOP3c(last1, prog->anchored_offset+1, strend);
649 		    s = HOP3c(last, 1, strend);
650 		    goto restart;
651 		}
652 		else {
653 		    DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
654 			  (long)(s - i_strpos)));
655 		    t = HOP3c(s, -prog->anchored_offset, strbeg);
656 		    other_last = HOP3c(s, 1, strend);
657 		    s = s1;
658 		    if (t == strpos)
659 			goto try_at_start;
660 		    goto try_at_offset;
661 		}
662 	    }
663 	}
664 	else {		/* Take into account the floating substring. */
665 	    char *last, *last1;
666 	    char *s1 = s;
667 	    SV* must;
668 
669 	    t = HOP3c(s, -start_shift, strbeg);
670 	    last1 = last =
671 		HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
672 	    if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
673 		last = HOP3c(t, prog->float_max_offset, strend);
674 	    s = HOP3c(t, prog->float_min_offset, strend);
675 	    if (s < other_last)
676 		s = other_last;
677  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
678 	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
679 	    /* fbm_instr() takes into account exact value of end-of-str
680 	       if the check is SvTAIL(ed).  Since false positives are OK,
681 	       and end-of-str is not later than strend we are OK. */
682 	    if (must == &PL_sv_undef) {
683 		s = (char*)NULL;
684 		DEBUG_r(must = prog->float_utf8);	/* for debug message */
685 	    }
686 	    else
687 		s = fbm_instr((unsigned char*)s,
688 			      (unsigned char*)last + SvCUR(must)
689 				  - (SvTAIL(must)!=0),
690 			      must, PL_multiline ? FBMrf_MULTILINE : 0);
691 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
692 		    (s ? "Found" : "Contradicts"),
693 		    PL_colors[0],
694 		      (int)(SvCUR(must) - (SvTAIL(must)!=0)),
695 		      SvPVX(must),
696 		      PL_colors[1], (SvTAIL(must) ? "$" : "")));
697 	    if (!s) {
698 		if (last1 == last) {
699 		    DEBUG_r(PerlIO_printf(Perl_debug_log,
700 					    ", giving up...\n"));
701 		    goto fail_finish;
702 		}
703 		DEBUG_r(PerlIO_printf(Perl_debug_log,
704 		    ", trying anchored starting at offset %ld...\n",
705 		    (long)(s1 + 1 - i_strpos)));
706 		other_last = last;
707 		s = HOP3c(t, 1, strend);
708 		goto restart;
709 	    }
710 	    else {
711 		DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
712 		      (long)(s - i_strpos)));
713 		other_last = s; /* Fix this later. --Hugo */
714 		s = s1;
715 		if (t == strpos)
716 		    goto try_at_start;
717 		goto try_at_offset;
718 	    }
719 	}
720     }
721 
722     t = s - prog->check_offset_max;
723     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
724         && (!do_utf8
725 	    || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
726 		 && t > strpos))) {
727 	/* Fixed substring is found far enough so that the match
728 	   cannot start at strpos. */
729       try_at_offset:
730 	if (ml_anch && t[-1] != '\n') {
731 	    /* Eventually fbm_*() should handle this, but often
732 	       anchored_offset is not 0, so this check will not be wasted. */
733 	    /* XXXX In the code below we prefer to look for "^" even in
734 	       presence of anchored substrings.  And we search even
735 	       beyond the found float position.  These pessimizations
736 	       are historical artefacts only.  */
737 	  find_anchor:
738 	    while (t < strend - prog->minlen) {
739 		if (*t == '\n') {
740 		    if (t < check_at - prog->check_offset_min) {
741 			if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
742 			    /* Since we moved from the found position,
743 			       we definitely contradict the found anchored
744 			       substr.  Due to the above check we do not
745 			       contradict "check" substr.
746 			       Thus we can arrive here only if check substr
747 			       is float.  Redo checking for "other"=="fixed".
748 			     */
749 			    strpos = t + 1;
750 			    DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
751 				PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
752 			    goto do_other_anchored;
753 			}
754 			/* We don't contradict the found floating substring. */
755 			/* XXXX Why not check for STCLASS? */
756 			s = t + 1;
757 			DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
758 			    PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
759 			goto set_useful;
760 		    }
761 		    /* Position contradicts check-string */
762 		    /* XXXX probably better to look for check-string
763 		       than for "\n", so one should lower the limit for t? */
764 		    DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
765 			PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
766 		    other_last = strpos = s = t + 1;
767 		    goto restart;
768 		}
769 		t++;
770 	    }
771 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
772 			PL_colors[0],PL_colors[1]));
773 	    goto fail_finish;
774 	}
775 	else {
776 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
777 			PL_colors[0],PL_colors[1]));
778 	}
779 	s = t;
780       set_useful:
781 	++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);	/* hooray/5 */
782     }
783     else {
784 	/* The found string does not prohibit matching at strpos,
785 	   - no optimization of calling REx engine can be performed,
786 	   unless it was an MBOL and we are not after MBOL,
787 	   or a future STCLASS check will fail this. */
788       try_at_start:
789 	/* Even in this situation we may use MBOL flag if strpos is offset
790 	   wrt the start of the string. */
791 	if (ml_anch && sv && !SvROK(sv)	/* See prev comment on SvROK */
792 	    && (strpos != strbeg) && strpos[-1] != '\n'
793 	    /* May be due to an implicit anchor of m{.*foo}  */
794 	    && !(prog->reganch & ROPT_IMPLICIT))
795 	{
796 	    t = strpos;
797 	    goto find_anchor;
798 	}
799 	DEBUG_r( if (ml_anch)
800 	    PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
801 			(long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
802 	);
803       success_at_start:
804 	if (!(prog->reganch & ROPT_NAUGHTY)	/* XXXX If strpos moved? */
805 	    && (do_utf8 ? (
806 		prog->check_utf8		/* Could be deleted already */
807 		&& --BmUSEFUL(prog->check_utf8) < 0
808 		&& (prog->check_utf8 == prog->float_utf8)
809 	    ) : (
810 		prog->check_substr		/* Could be deleted already */
811 		&& --BmUSEFUL(prog->check_substr) < 0
812 		&& (prog->check_substr == prog->float_substr)
813 	    )))
814 	{
815 	    /* If flags & SOMETHING - do not do it many times on the same match */
816 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
817 	    SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
818 	    if (do_utf8 ? prog->check_substr : prog->check_utf8)
819 		SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
820 	    prog->check_substr = prog->check_utf8 = Nullsv;	/* disable */
821 	    prog->float_substr = prog->float_utf8 = Nullsv;	/* clear */
822 	    check = Nullsv;			/* abort */
823 	    s = strpos;
824 	    /* XXXX This is a remnant of the old implementation.  It
825 	            looks wasteful, since now INTUIT can use many
826 	            other heuristics. */
827 	    prog->reganch &= ~RE_USE_INTUIT;
828 	}
829 	else
830 	    s = strpos;
831     }
832 
833     /* Last resort... */
834     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
835     if (prog->regstclass) {
836 	/* minlen == 0 is possible if regstclass is \b or \B,
837 	   and the fixed substr is ''$.
838 	   Since minlen is already taken into account, s+1 is before strend;
839 	   accidentally, minlen >= 1 guaranties no false positives at s + 1
840 	   even for \b or \B.  But (minlen? 1 : 0) below assumes that
841 	   regstclass does not come from lookahead...  */
842 	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
843 	   This leaves EXACTF only, which is dealt with in find_byclass().  */
844 	U8* str = (U8*)STRING(prog->regstclass);
845 	int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
846 		    ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
847 		    : 1);
848 	char *endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
849 		? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
850 		: (prog->float_substr || prog->float_utf8
851 		   ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
852 			   cl_l, strend)
853 		   : strend);
854 	char *startpos = strbeg;
855 
856 	t = s;
857 	cache_re(prog);
858 	s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
859 	if (!s) {
860 #ifdef DEBUGGING
861 	    char *what = 0;
862 #endif
863 	    if (endpos == strend) {
864 		DEBUG_r( PerlIO_printf(Perl_debug_log,
865 				"Could not match STCLASS...\n") );
866 		goto fail;
867 	    }
868 	    DEBUG_r( PerlIO_printf(Perl_debug_log,
869 				   "This position contradicts STCLASS...\n") );
870 	    if ((prog->reganch & ROPT_ANCH) && !ml_anch)
871 		goto fail;
872 	    /* Contradict one of substrings */
873 	    if (prog->anchored_substr || prog->anchored_utf8) {
874 		if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
875 		    DEBUG_r( what = "anchored" );
876 		  hop_and_restart:
877 		    s = HOP3c(t, 1, strend);
878 		    if (s + start_shift + end_shift > strend) {
879 			/* XXXX Should be taken into account earlier? */
880 			DEBUG_r( PerlIO_printf(Perl_debug_log,
881 					       "Could not match STCLASS...\n") );
882 			goto fail;
883 		    }
884 		    if (!check)
885 			goto giveup;
886 		    DEBUG_r( PerlIO_printf(Perl_debug_log,
887 				"Looking for %s substr starting at offset %ld...\n",
888 				 what, (long)(s + start_shift - i_strpos)) );
889 		    goto restart;
890 		}
891 		/* Have both, check_string is floating */
892 		if (t + start_shift >= check_at) /* Contradicts floating=check */
893 		    goto retry_floating_check;
894 		/* Recheck anchored substring, but not floating... */
895 		s = check_at;
896 		if (!check)
897 		    goto giveup;
898 		DEBUG_r( PerlIO_printf(Perl_debug_log,
899 			  "Looking for anchored substr starting at offset %ld...\n",
900 			  (long)(other_last - i_strpos)) );
901 		goto do_other_anchored;
902 	    }
903 	    /* Another way we could have checked stclass at the
904                current position only: */
905 	    if (ml_anch) {
906 		s = t = t + 1;
907 		if (!check)
908 		    goto giveup;
909 		DEBUG_r( PerlIO_printf(Perl_debug_log,
910 			  "Looking for /%s^%s/m starting at offset %ld...\n",
911 			  PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
912 		goto try_at_offset;
913 	    }
914 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))	/* Could have been deleted */
915 		goto fail;
916 	    /* Check is floating subtring. */
917 	  retry_floating_check:
918 	    t = check_at - start_shift;
919 	    DEBUG_r( what = "floating" );
920 	    goto hop_and_restart;
921 	}
922 	if (t != s) {
923             DEBUG_r(PerlIO_printf(Perl_debug_log,
924 			"By STCLASS: moving %ld --> %ld\n",
925                                   (long)(t - i_strpos), (long)(s - i_strpos))
926                    );
927         }
928         else {
929             DEBUG_r(PerlIO_printf(Perl_debug_log,
930                                   "Does not contradict STCLASS...\n");
931                    );
932         }
933     }
934   giveup:
935     DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
936 			  PL_colors[4], (check ? "Guessed" : "Giving up"),
937 			  PL_colors[5], (long)(s - i_strpos)) );
938     return s;
939 
940   fail_finish:				/* Substring not found */
941     if (prog->check_substr || prog->check_utf8)		/* could be removed already */
942 	BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
943   fail:
944     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
945 			  PL_colors[4],PL_colors[5]));
946     return Nullch;
947 }
948 
949 /* We know what class REx starts with.  Try to find this position... */
950 STATIC char *
S_find_byclass(pTHX_ regexp * prog,regnode * c,char * s,char * strend,char * startpos,I32 norun)951 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
952 {
953 	I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
954 	char *m;
955 	STRLEN ln;
956 	STRLEN lnc;
957 	register STRLEN uskip;
958 	unsigned int c1;
959 	unsigned int c2;
960 	char *e;
961 	register I32 tmp = 1;	/* Scratch variable? */
962 	register bool do_utf8 = PL_reg_match_utf8;
963 
964 	/* We know what class it must start with. */
965 	switch (OP(c)) {
966 	case ANYOF:
967 	    if (do_utf8) {
968 		 while (s + (uskip = UTF8SKIP(s)) <= strend) {
969 		      if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
970 			  !UTF8_IS_INVARIANT((U8)s[0]) ?
971 			  reginclass(c, (U8*)s, 0, do_utf8) :
972 			  REGINCLASS(c, (U8*)s)) {
973 			   if (tmp && (norun || regtry(prog, s)))
974 				goto got_it;
975 			   else
976 				tmp = doevery;
977 		      }
978 		      else
979 			   tmp = 1;
980 		      s += uskip;
981 		 }
982 	    }
983 	    else {
984 		 while (s < strend) {
985 		      STRLEN skip = 1;
986 
987 		      if (REGINCLASS(c, (U8*)s) ||
988 			  (ANYOF_FOLD_SHARP_S(c, s, strend) &&
989 			   /* The assignment of 2 is intentional:
990 			    * for the folded sharp s, the skip is 2. */
991 			   (skip = SHARP_S_SKIP))) {
992 			   if (tmp && (norun || regtry(prog, s)))
993 				goto got_it;
994 			   else
995 				tmp = doevery;
996 		      }
997 		      else
998 			   tmp = 1;
999 		      s += skip;
1000 		 }
1001 	    }
1002 	    break;
1003 	case CANY:
1004 	    while (s < strend) {
1005 	        if (tmp && (norun || regtry(prog, s)))
1006 		    goto got_it;
1007 		else
1008 		    tmp = doevery;
1009 		s++;
1010 	    }
1011 	    break;
1012 	case EXACTF:
1013 	    m   = STRING(c);
1014 	    ln  = STR_LEN(c);	/* length to match in octets/bytes */
1015 	    lnc = (I32) ln;	/* length to match in characters */
1016 	    if (UTF) {
1017 	        STRLEN ulen1, ulen2;
1018 		U8 *sm = (U8 *) m;
1019 		U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
1020 		U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
1021 
1022 		to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1023 		to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1024 
1025 		c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN_UCLC,
1026 				    0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1027 		c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN_UCLC,
1028 				    0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1029 		lnc = 0;
1030 		while (sm < ((U8 *) m + ln)) {
1031 		    lnc++;
1032 		    sm += UTF8SKIP(sm);
1033 		}
1034 	    }
1035 	    else {
1036 		c1 = *(U8*)m;
1037 		c2 = PL_fold[c1];
1038 	    }
1039 	    goto do_exactf;
1040 	case EXACTFL:
1041 	    m   = STRING(c);
1042 	    ln  = STR_LEN(c);
1043 	    lnc = (I32) ln;
1044 	    c1 = *(U8*)m;
1045 	    c2 = PL_fold_locale[c1];
1046 	  do_exactf:
1047 	    e = HOP3c(strend, -((I32)lnc), s);
1048 
1049 	    if (norun && e < s)
1050 		e = s;			/* Due to minlen logic of intuit() */
1051 
1052 	    /* The idea in the EXACTF* cases is to first find the
1053 	     * first character of the EXACTF* node and then, if
1054 	     * necessary, case-insensitively compare the full
1055 	     * text of the node.  The c1 and c2 are the first
1056 	     * characters (though in Unicode it gets a bit
1057 	     * more complicated because there are more cases
1058 	     * than just upper and lower: one needs to use
1059 	     * the so-called folding case for case-insensitive
1060 	     * matching (called "loose matching" in Unicode).
1061 	     * ibcmp_utf8() will do just that. */
1062 
1063 	    if (do_utf8) {
1064 	        UV c, f;
1065 	        U8 tmpbuf [UTF8_MAXLEN+1];
1066 		U8 foldbuf[UTF8_MAXLEN_FOLD+1];
1067 		STRLEN len, foldlen;
1068 
1069 		if (c1 == c2) {
1070 		    /* Upper and lower of 1st char are equal -
1071 		     * probably not a "letter". */
1072 		    while (s <= e) {
1073 		        c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
1074 					   ckWARN(WARN_UTF8) ?
1075 					   0 : UTF8_ALLOW_ANY);
1076 			if ( c == c1
1077 			     && (ln == len ||
1078 				 ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1079 					    m, (char **)0, ln, (bool)UTF))
1080 			     && (norun || regtry(prog, s)) )
1081 			    goto got_it;
1082 			else {
1083 			     uvchr_to_utf8(tmpbuf, c);
1084 			     f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1085 			     if ( f != c
1086 				  && (f == c1 || f == c2)
1087 				  && (ln == foldlen ||
1088 				      !ibcmp_utf8((char *) foldbuf,
1089 						  (char **)0, foldlen, do_utf8,
1090 						  m,
1091 						  (char **)0, ln, (bool)UTF))
1092 				  && (norun || regtry(prog, s)) )
1093 				  goto got_it;
1094 			}
1095 			s += len;
1096 		    }
1097 		}
1098 		else {
1099 		    while (s <= e) {
1100 		      c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
1101 					   ckWARN(WARN_UTF8) ?
1102 					   0 : UTF8_ALLOW_ANY);
1103 
1104 			/* Handle some of the three Greek sigmas cases.
1105 			 * Note that not all the possible combinations
1106 			 * are handled here: some of them are handled
1107 			 * by the standard folding rules, and some of
1108 			 * them (the character class or ANYOF cases)
1109 			 * are handled during compiletime in
1110 			 * regexec.c:S_regclass(). */
1111 			if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1112 			    c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1113 			    c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1114 
1115 			if ( (c == c1 || c == c2)
1116 			     && (ln == len ||
1117 				 ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1118 					    m, (char **)0, ln, (bool)UTF))
1119 			     && (norun || regtry(prog, s)) )
1120 			    goto got_it;
1121 			else {
1122 			     uvchr_to_utf8(tmpbuf, c);
1123 			     f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1124 			     if ( f != c
1125 				  && (f == c1 || f == c2)
1126 				  && (ln == foldlen ||
1127 				      !ibcmp_utf8((char *) foldbuf,
1128 						  (char **)0, foldlen, do_utf8,
1129 						  m,
1130 						  (char **)0, ln, (bool)UTF))
1131 				  && (norun || regtry(prog, s)) )
1132 				  goto got_it;
1133 			}
1134 			s += len;
1135 		    }
1136 		}
1137 	    }
1138 	    else {
1139 		if (c1 == c2)
1140 		    while (s <= e) {
1141 			if ( *(U8*)s == c1
1142 			     && (ln == 1 || !(OP(c) == EXACTF
1143 					      ? ibcmp(s, m, ln)
1144 					      : ibcmp_locale(s, m, ln)))
1145 			     && (norun || regtry(prog, s)) )
1146 			    goto got_it;
1147 			s++;
1148 		    }
1149 		else
1150 		    while (s <= e) {
1151 			if ( (*(U8*)s == c1 || *(U8*)s == c2)
1152 			     && (ln == 1 || !(OP(c) == EXACTF
1153 					      ? ibcmp(s, m, ln)
1154 					      : ibcmp_locale(s, m, ln)))
1155 			     && (norun || regtry(prog, s)) )
1156 			    goto got_it;
1157 			s++;
1158 		    }
1159 	    }
1160 	    break;
1161 	case BOUNDL:
1162 	    PL_reg_flags |= RF_tainted;
1163 	    /* FALL THROUGH */
1164 	case BOUND:
1165 	    if (do_utf8) {
1166 		if (s == PL_bostr)
1167 		    tmp = '\n';
1168 		else {
1169 		    U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1170 
1171 		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1172 		}
1173 		tmp = ((OP(c) == BOUND ?
1174 			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1175 		LOAD_UTF8_CHARCLASS(alnum,"a");
1176 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1177 		    if (tmp == !(OP(c) == BOUND ?
1178 				 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1179 				 isALNUM_LC_utf8((U8*)s)))
1180 		    {
1181 			tmp = !tmp;
1182 			if ((norun || regtry(prog, s)))
1183 			    goto got_it;
1184 		    }
1185 		    s += uskip;
1186 		}
1187 	    }
1188 	    else {
1189 		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1190 		tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1191 		while (s < strend) {
1192 		    if (tmp ==
1193 			!(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1194 			tmp = !tmp;
1195 			if ((norun || regtry(prog, s)))
1196 			    goto got_it;
1197 		    }
1198 		    s++;
1199 		}
1200 	    }
1201 	    if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
1202 		goto got_it;
1203 	    break;
1204 	case NBOUNDL:
1205 	    PL_reg_flags |= RF_tainted;
1206 	    /* FALL THROUGH */
1207 	case NBOUND:
1208 	    if (do_utf8) {
1209 		if (s == PL_bostr)
1210 		    tmp = '\n';
1211 		else {
1212 		    U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1213 
1214 		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1215 		}
1216 		tmp = ((OP(c) == NBOUND ?
1217 			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1218 		LOAD_UTF8_CHARCLASS(alnum,"a");
1219 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1220 		    if (tmp == !(OP(c) == NBOUND ?
1221 				 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1222 				 isALNUM_LC_utf8((U8*)s)))
1223 			tmp = !tmp;
1224 		    else if ((norun || regtry(prog, s)))
1225 			goto got_it;
1226 		    s += uskip;
1227 		}
1228 	    }
1229 	    else {
1230 		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1231 		tmp = ((OP(c) == NBOUND ?
1232 			isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1233 		while (s < strend) {
1234 		    if (tmp ==
1235 			!(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1236 			tmp = !tmp;
1237 		    else if ((norun || regtry(prog, s)))
1238 			goto got_it;
1239 		    s++;
1240 		}
1241 	    }
1242 	    if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1243 		goto got_it;
1244 	    break;
1245 	case ALNUM:
1246 	    if (do_utf8) {
1247 		LOAD_UTF8_CHARCLASS(alnum,"a");
1248 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1249 		    if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1250 			if (tmp && (norun || regtry(prog, s)))
1251 			    goto got_it;
1252 			else
1253 			    tmp = doevery;
1254 		    }
1255 		    else
1256 			tmp = 1;
1257 		    s += uskip;
1258 		}
1259 	    }
1260 	    else {
1261 		while (s < strend) {
1262 		    if (isALNUM(*s)) {
1263 			if (tmp && (norun || regtry(prog, s)))
1264 			    goto got_it;
1265 			else
1266 			    tmp = doevery;
1267 		    }
1268 		    else
1269 			tmp = 1;
1270 		    s++;
1271 		}
1272 	    }
1273 	    break;
1274 	case ALNUML:
1275 	    PL_reg_flags |= RF_tainted;
1276 	    if (do_utf8) {
1277 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1278 		    if (isALNUM_LC_utf8((U8*)s)) {
1279 			if (tmp && (norun || regtry(prog, s)))
1280 			    goto got_it;
1281 			else
1282 			    tmp = doevery;
1283 		    }
1284 		    else
1285 			tmp = 1;
1286 		    s += uskip;
1287 		}
1288 	    }
1289 	    else {
1290 		while (s < strend) {
1291 		    if (isALNUM_LC(*s)) {
1292 			if (tmp && (norun || regtry(prog, s)))
1293 			    goto got_it;
1294 			else
1295 			    tmp = doevery;
1296 		    }
1297 		    else
1298 			tmp = 1;
1299 		    s++;
1300 		}
1301 	    }
1302 	    break;
1303 	case NALNUM:
1304 	    if (do_utf8) {
1305 		LOAD_UTF8_CHARCLASS(alnum,"a");
1306 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1307 		    if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1308 			if (tmp && (norun || regtry(prog, s)))
1309 			    goto got_it;
1310 			else
1311 			    tmp = doevery;
1312 		    }
1313 		    else
1314 			tmp = 1;
1315 		    s += uskip;
1316 		}
1317 	    }
1318 	    else {
1319 		while (s < strend) {
1320 		    if (!isALNUM(*s)) {
1321 			if (tmp && (norun || regtry(prog, s)))
1322 			    goto got_it;
1323 			else
1324 			    tmp = doevery;
1325 		    }
1326 		    else
1327 			tmp = 1;
1328 		    s++;
1329 		}
1330 	    }
1331 	    break;
1332 	case NALNUML:
1333 	    PL_reg_flags |= RF_tainted;
1334 	    if (do_utf8) {
1335 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1336 		    if (!isALNUM_LC_utf8((U8*)s)) {
1337 			if (tmp && (norun || regtry(prog, s)))
1338 			    goto got_it;
1339 			else
1340 			    tmp = doevery;
1341 		    }
1342 		    else
1343 			tmp = 1;
1344 		    s += uskip;
1345 		}
1346 	    }
1347 	    else {
1348 		while (s < strend) {
1349 		    if (!isALNUM_LC(*s)) {
1350 			if (tmp && (norun || regtry(prog, s)))
1351 			    goto got_it;
1352 			else
1353 			    tmp = doevery;
1354 		    }
1355 		    else
1356 			tmp = 1;
1357 		    s++;
1358 		}
1359 	    }
1360 	    break;
1361 	case SPACE:
1362 	    if (do_utf8) {
1363 		LOAD_UTF8_CHARCLASS(space," ");
1364 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1365 		    if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
1366 			if (tmp && (norun || regtry(prog, s)))
1367 			    goto got_it;
1368 			else
1369 			    tmp = doevery;
1370 		    }
1371 		    else
1372 			tmp = 1;
1373 		    s += uskip;
1374 		}
1375 	    }
1376 	    else {
1377 		while (s < strend) {
1378 		    if (isSPACE(*s)) {
1379 			if (tmp && (norun || regtry(prog, s)))
1380 			    goto got_it;
1381 			else
1382 			    tmp = doevery;
1383 		    }
1384 		    else
1385 			tmp = 1;
1386 		    s++;
1387 		}
1388 	    }
1389 	    break;
1390 	case SPACEL:
1391 	    PL_reg_flags |= RF_tainted;
1392 	    if (do_utf8) {
1393 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1394 		    if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1395 			if (tmp && (norun || regtry(prog, s)))
1396 			    goto got_it;
1397 			else
1398 			    tmp = doevery;
1399 		    }
1400 		    else
1401 			tmp = 1;
1402 		    s += uskip;
1403 		}
1404 	    }
1405 	    else {
1406 		while (s < strend) {
1407 		    if (isSPACE_LC(*s)) {
1408 			if (tmp && (norun || regtry(prog, s)))
1409 			    goto got_it;
1410 			else
1411 			    tmp = doevery;
1412 		    }
1413 		    else
1414 			tmp = 1;
1415 		    s++;
1416 		}
1417 	    }
1418 	    break;
1419 	case NSPACE:
1420 	    if (do_utf8) {
1421 		LOAD_UTF8_CHARCLASS(space," ");
1422 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1423 		    if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
1424 			if (tmp && (norun || regtry(prog, s)))
1425 			    goto got_it;
1426 			else
1427 			    tmp = doevery;
1428 		    }
1429 		    else
1430 			tmp = 1;
1431 		    s += uskip;
1432 		}
1433 	    }
1434 	    else {
1435 		while (s < strend) {
1436 		    if (!isSPACE(*s)) {
1437 			if (tmp && (norun || regtry(prog, s)))
1438 			    goto got_it;
1439 			else
1440 			    tmp = doevery;
1441 		    }
1442 		    else
1443 			tmp = 1;
1444 		    s++;
1445 		}
1446 	    }
1447 	    break;
1448 	case NSPACEL:
1449 	    PL_reg_flags |= RF_tainted;
1450 	    if (do_utf8) {
1451 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1452 		    if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1453 			if (tmp && (norun || regtry(prog, s)))
1454 			    goto got_it;
1455 			else
1456 			    tmp = doevery;
1457 		    }
1458 		    else
1459 			tmp = 1;
1460 		    s += uskip;
1461 		}
1462 	    }
1463 	    else {
1464 		while (s < strend) {
1465 		    if (!isSPACE_LC(*s)) {
1466 			if (tmp && (norun || regtry(prog, s)))
1467 			    goto got_it;
1468 			else
1469 			    tmp = doevery;
1470 		    }
1471 		    else
1472 			tmp = 1;
1473 		    s++;
1474 		}
1475 	    }
1476 	    break;
1477 	case DIGIT:
1478 	    if (do_utf8) {
1479 		LOAD_UTF8_CHARCLASS(digit,"0");
1480 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1481 		    if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1482 			if (tmp && (norun || regtry(prog, s)))
1483 			    goto got_it;
1484 			else
1485 			    tmp = doevery;
1486 		    }
1487 		    else
1488 			tmp = 1;
1489 		    s += uskip;
1490 		}
1491 	    }
1492 	    else {
1493 		while (s < strend) {
1494 		    if (isDIGIT(*s)) {
1495 			if (tmp && (norun || regtry(prog, s)))
1496 			    goto got_it;
1497 			else
1498 			    tmp = doevery;
1499 		    }
1500 		    else
1501 			tmp = 1;
1502 		    s++;
1503 		}
1504 	    }
1505 	    break;
1506 	case DIGITL:
1507 	    PL_reg_flags |= RF_tainted;
1508 	    if (do_utf8) {
1509 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1510 		    if (isDIGIT_LC_utf8((U8*)s)) {
1511 			if (tmp && (norun || regtry(prog, s)))
1512 			    goto got_it;
1513 			else
1514 			    tmp = doevery;
1515 		    }
1516 		    else
1517 			tmp = 1;
1518 		    s += uskip;
1519 		}
1520 	    }
1521 	    else {
1522 		while (s < strend) {
1523 		    if (isDIGIT_LC(*s)) {
1524 			if (tmp && (norun || regtry(prog, s)))
1525 			    goto got_it;
1526 			else
1527 			    tmp = doevery;
1528 		    }
1529 		    else
1530 			tmp = 1;
1531 		    s++;
1532 		}
1533 	    }
1534 	    break;
1535 	case NDIGIT:
1536 	    if (do_utf8) {
1537 		LOAD_UTF8_CHARCLASS(digit,"0");
1538 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1539 		    if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1540 			if (tmp && (norun || regtry(prog, s)))
1541 			    goto got_it;
1542 			else
1543 			    tmp = doevery;
1544 		    }
1545 		    else
1546 			tmp = 1;
1547 		    s += uskip;
1548 		}
1549 	    }
1550 	    else {
1551 		while (s < strend) {
1552 		    if (!isDIGIT(*s)) {
1553 			if (tmp && (norun || regtry(prog, s)))
1554 			    goto got_it;
1555 			else
1556 			    tmp = doevery;
1557 		    }
1558 		    else
1559 			tmp = 1;
1560 		    s++;
1561 		}
1562 	    }
1563 	    break;
1564 	case NDIGITL:
1565 	    PL_reg_flags |= RF_tainted;
1566 	    if (do_utf8) {
1567 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1568 		    if (!isDIGIT_LC_utf8((U8*)s)) {
1569 			if (tmp && (norun || regtry(prog, s)))
1570 			    goto got_it;
1571 			else
1572 			    tmp = doevery;
1573 		    }
1574 		    else
1575 			tmp = 1;
1576 		    s += uskip;
1577 		}
1578 	    }
1579 	    else {
1580 		while (s < strend) {
1581 		    if (!isDIGIT_LC(*s)) {
1582 			if (tmp && (norun || regtry(prog, s)))
1583 			    goto got_it;
1584 			else
1585 			    tmp = doevery;
1586 		    }
1587 		    else
1588 			tmp = 1;
1589 		    s++;
1590 		}
1591 	    }
1592 	    break;
1593 	default:
1594 	    Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1595 	    break;
1596 	}
1597 	return 0;
1598       got_it:
1599 	return s;
1600 }
1601 
1602 /*
1603  - regexec_flags - match a regexp against a string
1604  */
1605 I32
Perl_regexec_flags(pTHX_ register regexp * prog,char * stringarg,register char * strend,char * strbeg,I32 minend,SV * sv,void * data,U32 flags)1606 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1607 	      char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1608 /* strend: pointer to null at end of string */
1609 /* strbeg: real beginning of string */
1610 /* minend: end of match must be >=minend after stringarg. */
1611 /* data: May be used for some additional optimizations. */
1612 /* nosave: For optimizations. */
1613 {
1614     register char *s;
1615     register regnode *c;
1616     register char *startpos = stringarg;
1617     I32 minlen;		/* must match at least this many chars */
1618     I32 dontbother = 0;	/* how many characters not to try at end */
1619     /* I32 start_shift = 0; */		/* Offset of the start to find
1620 					 constant substr. */		/* CC */
1621     I32 end_shift = 0;			/* Same for the end. */		/* CC */
1622     I32 scream_pos = -1;		/* Internal iterator of scream. */
1623     char *scream_olds;
1624     SV* oreplsv = GvSV(PL_replgv);
1625     bool do_utf8 = DO_UTF8(sv);
1626 #ifdef DEBUGGING
1627     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1628     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
1629 #endif
1630     RX_MATCH_UTF8_set(prog,do_utf8);
1631 
1632     PL_regcc = 0;
1633 
1634     cache_re(prog);
1635 #ifdef DEBUGGING
1636     PL_regnarrate = DEBUG_r_TEST;
1637 #endif
1638 
1639     /* Be paranoid... */
1640     if (prog == NULL || startpos == NULL) {
1641 	Perl_croak(aTHX_ "NULL regexp parameter");
1642 	return 0;
1643     }
1644 
1645     minlen = prog->minlen;
1646     if (strend - startpos < minlen) {
1647         DEBUG_r(PerlIO_printf(Perl_debug_log,
1648 			      "String too short [regexec_flags]...\n"));
1649 	goto phooey;
1650     }
1651 
1652     /* Check validity of program. */
1653     if (UCHARAT(prog->program) != REG_MAGIC) {
1654 	Perl_croak(aTHX_ "corrupted regexp program");
1655     }
1656 
1657     PL_reg_flags = 0;
1658     PL_reg_eval_set = 0;
1659     PL_reg_maxiter = 0;
1660 
1661     if (prog->reganch & ROPT_UTF8)
1662 	PL_reg_flags |= RF_utf8;
1663 
1664     /* Mark beginning of line for ^ and lookbehind. */
1665     PL_regbol = startpos;
1666     PL_bostr  = strbeg;
1667     PL_reg_sv = sv;
1668 
1669     /* Mark end of line for $ (and such) */
1670     PL_regeol = strend;
1671 
1672     /* see how far we have to get to not match where we matched before */
1673     PL_regtill = startpos+minend;
1674 
1675     /* We start without call_cc context.  */
1676     PL_reg_call_cc = 0;
1677 
1678     /* If there is a "must appear" string, look for it. */
1679     s = startpos;
1680 
1681     if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1682 	MAGIC *mg;
1683 
1684 	if (flags & REXEC_IGNOREPOS)	/* Means: check only at start */
1685 	    PL_reg_ganch = startpos;
1686 	else if (sv && SvTYPE(sv) >= SVt_PVMG
1687 		  && SvMAGIC(sv)
1688 		  && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1689 		  && mg->mg_len >= 0) {
1690 	    PL_reg_ganch = strbeg + mg->mg_len;	/* Defined pos() */
1691 	    if (prog->reganch & ROPT_ANCH_GPOS) {
1692 	        if (s > PL_reg_ganch)
1693 		    goto phooey;
1694 		s = PL_reg_ganch;
1695 	    }
1696 	}
1697 	else				/* pos() not defined */
1698 	    PL_reg_ganch = strbeg;
1699     }
1700 
1701     if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
1702 	re_scream_pos_data d;
1703 
1704 	d.scream_olds = &scream_olds;
1705 	d.scream_pos = &scream_pos;
1706 	s = re_intuit_start(prog, sv, s, strend, flags, &d);
1707 	if (!s) {
1708 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1709 	    goto phooey;	/* not present */
1710 	}
1711     }
1712 
1713     DEBUG_r({
1714 	 char *s0   = UTF ?
1715 	   pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1716 			  UNI_DISPLAY_REGEX) :
1717 	   prog->precomp;
1718 	 int   len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1719 	 char *s1   = do_utf8 ? sv_uni_display(dsv1, sv, 60,
1720 					       UNI_DISPLAY_REGEX) : startpos;
1721 	 int   len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
1722 	 if (!PL_colorset)
1723 	     reginitcolors();
1724 	 PerlIO_printf(Perl_debug_log,
1725 		       "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
1726 		       PL_colors[4],PL_colors[5],PL_colors[0],
1727 		       len0, len0, s0,
1728 		       PL_colors[1],
1729 		       len0 > 60 ? "..." : "",
1730 		       PL_colors[0],
1731 		       (int)(len1 > 60 ? 60 : len1),
1732 		       s1, PL_colors[1],
1733 		       (len1 > 60 ? "..." : "")
1734 	      );
1735     });
1736 
1737     /* Simplest case:  anchored match need be tried only once. */
1738     /*  [unless only anchor is BOL and multiline is set] */
1739     if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1740 	if (s == startpos && regtry(prog, startpos))
1741 	    goto got_it;
1742 	else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1743 		 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1744 	{
1745 	    char *end;
1746 
1747 	    if (minlen)
1748 		dontbother = minlen - 1;
1749 	    end = HOP3c(strend, -dontbother, strbeg) - 1;
1750 	    /* for multiline we only have to try after newlines */
1751 	    if (prog->check_substr || prog->check_utf8) {
1752 		if (s == startpos)
1753 		    goto after_try;
1754 		while (1) {
1755 		    if (regtry(prog, s))
1756 			goto got_it;
1757 		  after_try:
1758 		    if (s >= end)
1759 			goto phooey;
1760 		    if (prog->reganch & RE_USE_INTUIT) {
1761 			s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1762 			if (!s)
1763 			    goto phooey;
1764 		    }
1765 		    else
1766 			s++;
1767 		}
1768 	    } else {
1769 		if (s > startpos)
1770 		    s--;
1771 		while (s < end) {
1772 		    if (*s++ == '\n') {	/* don't need PL_utf8skip here */
1773 			if (regtry(prog, s))
1774 			    goto got_it;
1775 		    }
1776 		}
1777 	    }
1778 	}
1779 	goto phooey;
1780     } else if (prog->reganch & ROPT_ANCH_GPOS) {
1781 	if (regtry(prog, PL_reg_ganch))
1782 	    goto got_it;
1783 	goto phooey;
1784     }
1785 
1786     /* Messy cases:  unanchored match. */
1787     if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
1788 	/* we have /x+whatever/ */
1789 	/* it must be a one character string (XXXX Except UTF?) */
1790 	char ch;
1791 #ifdef DEBUGGING
1792 	int did_match = 0;
1793 #endif
1794 	if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1795 	    do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1796 	ch = SvPVX(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1797 
1798 	if (do_utf8) {
1799 	    while (s < strend) {
1800 		if (*s == ch) {
1801 		    DEBUG_r( did_match = 1 );
1802 		    if (regtry(prog, s)) goto got_it;
1803 		    s += UTF8SKIP(s);
1804 		    while (s < strend && *s == ch)
1805 			s += UTF8SKIP(s);
1806 		}
1807 		s += UTF8SKIP(s);
1808 	    }
1809 	}
1810 	else {
1811 	    while (s < strend) {
1812 		if (*s == ch) {
1813 		    DEBUG_r( did_match = 1 );
1814 		    if (regtry(prog, s)) goto got_it;
1815 		    s++;
1816 		    while (s < strend && *s == ch)
1817 			s++;
1818 		}
1819 		s++;
1820 	    }
1821 	}
1822 	DEBUG_r(if (!did_match)
1823 		PerlIO_printf(Perl_debug_log,
1824                                   "Did not find anchored character...\n")
1825                );
1826     }
1827     /*SUPPRESS 560*/
1828     else if (prog->anchored_substr != Nullsv
1829 	      || prog->anchored_utf8 != Nullsv
1830 	      || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
1831 		  && prog->float_max_offset < strend - s)) {
1832 	SV *must;
1833 	I32 back_max;
1834 	I32 back_min;
1835 	char *last;
1836 	char *last1;		/* Last position checked before */
1837 #ifdef DEBUGGING
1838 	int did_match = 0;
1839 #endif
1840 	if (prog->anchored_substr || prog->anchored_utf8) {
1841 	    if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1842 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1843 	    must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1844 	    back_max = back_min = prog->anchored_offset;
1845 	} else {
1846 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1847 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1848 	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1849 	    back_max = prog->float_max_offset;
1850 	    back_min = prog->float_min_offset;
1851 	}
1852 	if (must == &PL_sv_undef)
1853 	    /* could not downgrade utf8 check substring, so must fail */
1854 	    goto phooey;
1855 
1856 	last = HOP3c(strend,	/* Cannot start after this */
1857 			  -(I32)(CHR_SVLEN(must)
1858 				 - (SvTAIL(must) != 0) + back_min), strbeg);
1859 
1860 	if (s > PL_bostr)
1861 	    last1 = HOPc(s, -1);
1862 	else
1863 	    last1 = s - 1;	/* bogus */
1864 
1865 	/* XXXX check_substr already used to find `s', can optimize if
1866 	   check_substr==must. */
1867 	scream_pos = -1;
1868 	dontbother = end_shift;
1869 	strend = HOPc(strend, -dontbother);
1870 	while ( (s <= last) &&
1871 		((flags & REXEC_SCREAM)
1872 		 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
1873 				    end_shift, &scream_pos, 0))
1874 		 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
1875 				  (unsigned char*)strend, must,
1876 				  PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1877 	    /* we may be pointing at the wrong string */
1878 	    if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1879 		s = strbeg + (s - SvPVX(sv));
1880 	    DEBUG_r( did_match = 1 );
1881 	    if (HOPc(s, -back_max) > last1) {
1882 		last1 = HOPc(s, -back_min);
1883 		s = HOPc(s, -back_max);
1884 	    }
1885 	    else {
1886 		char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1887 
1888 		last1 = HOPc(s, -back_min);
1889 		s = t;
1890 	    }
1891 	    if (do_utf8) {
1892 		while (s <= last1) {
1893 		    if (regtry(prog, s))
1894 			goto got_it;
1895 		    s += UTF8SKIP(s);
1896 		}
1897 	    }
1898 	    else {
1899 		while (s <= last1) {
1900 		    if (regtry(prog, s))
1901 			goto got_it;
1902 		    s++;
1903 		}
1904 	    }
1905 	}
1906 	DEBUG_r(if (!did_match)
1907                     PerlIO_printf(Perl_debug_log,
1908                                   "Did not find %s substr `%s%.*s%s'%s...\n",
1909 			      ((must == prog->anchored_substr || must == prog->anchored_utf8)
1910 			       ? "anchored" : "floating"),
1911 			      PL_colors[0],
1912 			      (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1913 			      SvPVX(must),
1914                                   PL_colors[1], (SvTAIL(must) ? "$" : ""))
1915                );
1916 	goto phooey;
1917     }
1918     else if ((c = prog->regstclass)) {
1919 	if (minlen) {
1920 	    I32 op = (U8)OP(prog->regstclass);
1921 	    /* don't bother with what can't match */
1922 	    if (PL_regkind[op] != EXACT && op != CANY)
1923 	        strend = HOPc(strend, -(minlen - 1));
1924 	}
1925 	DEBUG_r({
1926 	    SV *prop = sv_newmortal();
1927 	    char *s0;
1928 	    char *s1;
1929 	    int len0;
1930 	    int len1;
1931 
1932 	    regprop(prop, c);
1933 	    s0 = UTF ?
1934 	      pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
1935 			     UNI_DISPLAY_REGEX) :
1936 	      SvPVX(prop);
1937 	    len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1938 	    s1 = UTF ?
1939 	      sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
1940 	    len1 = UTF ? SvCUR(dsv1) : strend - s;
1941 	    PerlIO_printf(Perl_debug_log,
1942 			  "Matching stclass `%*.*s' against `%*.*s'\n",
1943 			  len0, len0, s0,
1944 			  len1, len1, s1);
1945 	});
1946   	if (find_byclass(prog, c, s, strend, startpos, 0))
1947 	    goto got_it;
1948 	DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1949     }
1950     else {
1951 	dontbother = 0;
1952 	if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
1953 	    /* Trim the end. */
1954 	    char *last;
1955 	    SV* float_real;
1956 
1957 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1958 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1959 	    float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
1960 
1961 	    if (flags & REXEC_SCREAM) {
1962 		last = screaminstr(sv, float_real, s - strbeg,
1963 				   end_shift, &scream_pos, 1); /* last one */
1964 		if (!last)
1965 		    last = scream_olds; /* Only one occurrence. */
1966 		/* we may be pointing at the wrong string */
1967 		else if (RX_MATCH_COPIED(prog))
1968 		    s = strbeg + (s - SvPVX(sv));
1969 	    }
1970 	    else {
1971 		STRLEN len;
1972 		char *little = SvPV(float_real, len);
1973 
1974 		if (SvTAIL(float_real)) {
1975 		    if (memEQ(strend - len + 1, little, len - 1))
1976 			last = strend - len + 1;
1977 		    else if (!PL_multiline)
1978 			last = memEQ(strend - len, little, len)
1979 			    ? strend - len : Nullch;
1980 		    else
1981 			goto find_last;
1982 		} else {
1983 		  find_last:
1984 		    if (len)
1985 			last = rninstr(s, strend, little, little + len);
1986 		    else
1987 			last = strend;	/* matching `$' */
1988 		}
1989 	    }
1990 	    if (last == NULL) {
1991 		DEBUG_r(PerlIO_printf(Perl_debug_log,
1992 				      "%sCan't trim the tail, match fails (should not happen)%s\n",
1993 				      PL_colors[4],PL_colors[5]));
1994 		goto phooey; /* Should not happen! */
1995 	    }
1996 	    dontbother = strend - last + prog->float_min_offset;
1997 	}
1998 	if (minlen && (dontbother < minlen))
1999 	    dontbother = minlen - 1;
2000 	strend -= dontbother; 		   /* this one's always in bytes! */
2001 	/* We don't know much -- general case. */
2002 	if (do_utf8) {
2003 	    for (;;) {
2004 		if (regtry(prog, s))
2005 		    goto got_it;
2006 		if (s >= strend)
2007 		    break;
2008 		s += UTF8SKIP(s);
2009 	    };
2010 	}
2011 	else {
2012 	    do {
2013 		if (regtry(prog, s))
2014 		    goto got_it;
2015 	    } while (s++ < strend);
2016 	}
2017     }
2018 
2019     /* Failure. */
2020     goto phooey;
2021 
2022 got_it:
2023     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2024 
2025     if (PL_reg_eval_set) {
2026 	/* Preserve the current value of $^R */
2027 	if (oreplsv != GvSV(PL_replgv))
2028 	    sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2029 						  restored, the value remains
2030 						  the same. */
2031 	restore_pos(aTHX_ 0);
2032     }
2033 
2034     /* make sure $`, $&, $', and $digit will work later */
2035     if ( !(flags & REXEC_NOT_FIRST) ) {
2036 	if (RX_MATCH_COPIED(prog)) {
2037 	    Safefree(prog->subbeg);
2038 	    RX_MATCH_COPIED_off(prog);
2039 	}
2040 	if (flags & REXEC_COPY_STR) {
2041 	    I32 i = PL_regeol - startpos + (stringarg - strbeg);
2042 
2043 	    s = savepvn(strbeg, i);
2044 	    prog->subbeg = s;
2045 	    prog->sublen = i;
2046 	    RX_MATCH_COPIED_on(prog);
2047 	}
2048 	else {
2049 	    prog->subbeg = strbeg;
2050 	    prog->sublen = PL_regeol - strbeg;	/* strend may have been modified */
2051 	}
2052     }
2053 
2054     return 1;
2055 
2056 phooey:
2057     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2058 			  PL_colors[4],PL_colors[5]));
2059     if (PL_reg_eval_set)
2060 	restore_pos(aTHX_ 0);
2061     return 0;
2062 }
2063 
2064 /*
2065  - regtry - try match at specific point
2066  */
2067 STATIC I32			/* 0 failure, 1 success */
S_regtry(pTHX_ regexp * prog,char * startpos)2068 S_regtry(pTHX_ regexp *prog, char *startpos)
2069 {
2070     register I32 i;
2071     register I32 *sp;
2072     register I32 *ep;
2073     CHECKPOINT lastcp;
2074 
2075 #ifdef DEBUGGING
2076     PL_regindent = 0;	/* XXXX Not good when matches are reenterable... */
2077 #endif
2078     if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2079 	MAGIC *mg;
2080 
2081 	PL_reg_eval_set = RS_init;
2082 	DEBUG_r(DEBUG_s(
2083 	    PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2084 			  (IV)(PL_stack_sp - PL_stack_base));
2085 	    ));
2086 	SAVEI32(cxstack[cxstack_ix].blk_oldsp);
2087 	cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2088 	/* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2089 	SAVETMPS;
2090 	/* Apparently this is not needed, judging by wantarray. */
2091 	/* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2092 	   cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2093 
2094 	if (PL_reg_sv) {
2095 	    /* Make $_ available to executed code. */
2096 	    if (PL_reg_sv != DEFSV) {
2097 		/* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
2098 		SAVESPTR(DEFSV);
2099 		DEFSV = PL_reg_sv;
2100 	    }
2101 
2102 	    if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
2103 		  && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
2104 		/* prepare for quick setting of pos */
2105 		sv_magic(PL_reg_sv, (SV*)0,
2106 			PERL_MAGIC_regex_global, Nullch, 0);
2107 		mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
2108 		mg->mg_len = -1;
2109 	    }
2110 	    PL_reg_magic    = mg;
2111 	    PL_reg_oldpos   = mg->mg_len;
2112 	    SAVEDESTRUCTOR_X(restore_pos, 0);
2113         }
2114         if (!PL_reg_curpm) {
2115 	    Newz(22,PL_reg_curpm, 1, PMOP);
2116 #ifdef USE_ITHREADS
2117             {
2118                 SV* repointer = newSViv(0);
2119                 /* so we know which PL_regex_padav element is PL_reg_curpm */
2120                 SvFLAGS(repointer) |= SVf_BREAK;
2121                 av_push(PL_regex_padav,repointer);
2122                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2123                 PL_regex_pad = AvARRAY(PL_regex_padav);
2124             }
2125 #endif
2126         }
2127 	PM_SETRE(PL_reg_curpm, prog);
2128 	PL_reg_oldcurpm = PL_curpm;
2129 	PL_curpm = PL_reg_curpm;
2130 	if (RX_MATCH_COPIED(prog)) {
2131 	    /*  Here is a serious problem: we cannot rewrite subbeg,
2132 		since it may be needed if this match fails.  Thus
2133 		$` inside (?{}) could fail... */
2134 	    PL_reg_oldsaved = prog->subbeg;
2135 	    PL_reg_oldsavedlen = prog->sublen;
2136 	    RX_MATCH_COPIED_off(prog);
2137 	}
2138 	else
2139 	    PL_reg_oldsaved = Nullch;
2140 	prog->subbeg = PL_bostr;
2141 	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2142     }
2143     prog->startp[0] = startpos - PL_bostr;
2144     PL_reginput = startpos;
2145     PL_regstartp = prog->startp;
2146     PL_regendp = prog->endp;
2147     PL_reglastparen = &prog->lastparen;
2148     PL_reglastcloseparen = &prog->lastcloseparen;
2149     prog->lastparen = 0;
2150     prog->lastcloseparen = 0;
2151     PL_regsize = 0;
2152     DEBUG_r(PL_reg_starttry = startpos);
2153     if (PL_reg_start_tmpl <= prog->nparens) {
2154 	PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2155         if(PL_reg_start_tmp)
2156             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2157         else
2158             New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2159     }
2160 
2161     /* XXXX What this code is doing here?!!!  There should be no need
2162        to do this again and again, PL_reglastparen should take care of
2163        this!  --ilya*/
2164 
2165     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2166      * Actually, the code in regcppop() (which Ilya may be meaning by
2167      * PL_reglastparen), is not needed at all by the test suite
2168      * (op/regexp, op/pat, op/split), but that code is needed, oddly
2169      * enough, for building DynaLoader, or otherwise this
2170      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2171      * will happen.  Meanwhile, this code *is* needed for the
2172      * above-mentioned test suite tests to succeed.  The common theme
2173      * on those tests seems to be returning null fields from matches.
2174      * --jhi */
2175 #if 1
2176     sp = prog->startp;
2177     ep = prog->endp;
2178     if (prog->nparens) {
2179 	for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2180 	    *++sp = -1;
2181 	    *++ep = -1;
2182 	}
2183     }
2184 #endif
2185     REGCP_SET(lastcp);
2186     if (regmatch(prog->program + 1)) {
2187 	prog->endp[0] = PL_reginput - PL_bostr;
2188 	return 1;
2189     }
2190     REGCP_UNWIND(lastcp);
2191     return 0;
2192 }
2193 
2194 #define RE_UNWIND_BRANCH	1
2195 #define RE_UNWIND_BRANCHJ	2
2196 
2197 union re_unwind_t;
2198 
2199 typedef struct {		/* XX: makes sense to enlarge it... */
2200     I32 type;
2201     I32 prev;
2202     CHECKPOINT lastcp;
2203 } re_unwind_generic_t;
2204 
2205 typedef struct {
2206     I32 type;
2207     I32 prev;
2208     CHECKPOINT lastcp;
2209     I32 lastparen;
2210     regnode *next;
2211     char *locinput;
2212     I32 nextchr;
2213 #ifdef DEBUGGING
2214     int regindent;
2215 #endif
2216 } re_unwind_branch_t;
2217 
2218 typedef union re_unwind_t {
2219     I32 type;
2220     re_unwind_generic_t generic;
2221     re_unwind_branch_t branch;
2222 } re_unwind_t;
2223 
2224 #define sayYES goto yes
2225 #define sayNO goto no
2226 #define sayNO_ANYOF goto no_anyof
2227 #define sayYES_FINAL goto yes_final
2228 #define sayYES_LOUD  goto yes_loud
2229 #define sayNO_FINAL  goto no_final
2230 #define sayNO_SILENT goto do_no
2231 #define saySAME(x) if (x) goto yes; else goto no
2232 
2233 #define REPORT_CODE_OFF 24
2234 
2235 /*
2236  - regmatch - main matching routine
2237  *
2238  * Conceptually the strategy is simple:  check to see whether the current
2239  * node matches, call self recursively to see whether the rest matches,
2240  * and then act accordingly.  In practice we make some effort to avoid
2241  * recursion, in particular by going through "ordinary" nodes (that don't
2242  * need to know whether the rest of the match failed) by a loop instead of
2243  * by recursion.
2244  */
2245 /* [lwall] I've hoisted the register declarations to the outer block in order to
2246  * maybe save a little bit of pushing and popping on the stack.  It also takes
2247  * advantage of machines that use a register save mask on subroutine entry.
2248  */
2249 STATIC I32			/* 0 failure, 1 success */
S_regmatch(pTHX_ regnode * prog)2250 S_regmatch(pTHX_ regnode *prog)
2251 {
2252     register regnode *scan;	/* Current node. */
2253     regnode *next;		/* Next node. */
2254     regnode *inner;		/* Next node in internal branch. */
2255     register I32 nextchr;	/* renamed nextchr - nextchar colides with
2256 				   function of same name */
2257     register I32 n;		/* no or next */
2258     register I32 ln = 0;	/* len or last */
2259     register char *s = Nullch;	/* operand or save */
2260     register char *locinput = PL_reginput;
2261     register I32 c1 = 0, c2 = 0, paren;	/* case fold search, parenth */
2262     int minmod = 0, sw = 0, logical = 0;
2263     I32 unwind = 0;
2264 #if 0
2265     I32 firstcp = PL_savestack_ix;
2266 #endif
2267     register bool do_utf8 = PL_reg_match_utf8;
2268 #ifdef DEBUGGING
2269     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2270     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2271     SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2272 #endif
2273 
2274 #ifdef DEBUGGING
2275     PL_regindent++;
2276 #endif
2277 
2278     /* Note that nextchr is a byte even in UTF */
2279     nextchr = UCHARAT(locinput);
2280     scan = prog;
2281     while (scan != NULL) {
2282 
2283         DEBUG_r( {
2284 	    SV *prop = sv_newmortal();
2285 	    int docolor = *PL_colors[0];
2286 	    int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2287 	    int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
2288 	    /* The part of the string before starttry has one color
2289 	       (pref0_len chars), between starttry and current
2290 	       position another one (pref_len - pref0_len chars),
2291 	       after the current position the third one.
2292 	       We assume that pref0_len <= pref_len, otherwise we
2293 	       decrease pref0_len.  */
2294 	    int pref_len = (locinput - PL_bostr) > (5 + taill) - l
2295 		? (5 + taill) - l : locinput - PL_bostr;
2296 	    int pref0_len;
2297 
2298 	    while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2299 		pref_len++;
2300 	    pref0_len = pref_len  - (locinput - PL_reg_starttry);
2301 	    if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2302 		l = ( PL_regeol - locinput > (5 + taill) - pref_len
2303 		      ? (5 + taill) - pref_len : PL_regeol - locinput);
2304 	    while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2305 		l--;
2306 	    if (pref0_len < 0)
2307 		pref0_len = 0;
2308 	    if (pref0_len > pref_len)
2309 		pref0_len = pref_len;
2310 	    regprop(prop, scan);
2311 	    {
2312 	      char *s0 =
2313 		do_utf8 && OP(scan) != CANY ?
2314 		pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2315 			       pref0_len, 60, UNI_DISPLAY_REGEX) :
2316 		locinput - pref_len;
2317 	      int len0 = do_utf8 ? strlen(s0) : pref0_len;
2318 	      char *s1 = do_utf8 && OP(scan) != CANY ?
2319 		pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2320 			       pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2321 		locinput - pref_len + pref0_len;
2322 	      int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2323 	      char *s2 = do_utf8 && OP(scan) != CANY ?
2324 		pv_uni_display(dsv2, (U8*)locinput,
2325 			       PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2326 		locinput;
2327 	      int len2 = do_utf8 ? strlen(s2) : l;
2328 	      PerlIO_printf(Perl_debug_log,
2329 			    "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2330 			    (IV)(locinput - PL_bostr),
2331 			    PL_colors[4],
2332 			    len0, s0,
2333 			    PL_colors[5],
2334 			    PL_colors[2],
2335 			    len1, s1,
2336 			    PL_colors[3],
2337 			    (docolor ? "" : "> <"),
2338 			    PL_colors[0],
2339 			    len2, s2,
2340 			    PL_colors[1],
2341 			    15 - l - pref_len + 1,
2342 			    "",
2343 			    (IV)(scan - PL_regprogram), PL_regindent*2, "",
2344 			    SvPVX(prop));
2345 	    }
2346 	});
2347 
2348 	next = scan + NEXT_OFF(scan);
2349 	if (next == scan)
2350 	    next = NULL;
2351 
2352 	switch (OP(scan)) {
2353 	case BOL:
2354 	    if (locinput == PL_bostr || (PL_multiline &&
2355 		(nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2356 	    {
2357 		/* regtill = regbol; */
2358 		break;
2359 	    }
2360 	    sayNO;
2361 	case MBOL:
2362 	    if (locinput == PL_bostr ||
2363 		((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2364 	    {
2365 		break;
2366 	    }
2367 	    sayNO;
2368 	case SBOL:
2369 	    if (locinput == PL_bostr)
2370 		break;
2371 	    sayNO;
2372 	case GPOS:
2373 	    if (locinput == PL_reg_ganch)
2374 		break;
2375 	    sayNO;
2376 	case EOL:
2377 	    if (PL_multiline)
2378 		goto meol;
2379 	    else
2380 		goto seol;
2381 	case MEOL:
2382 	  meol:
2383 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2384 		sayNO;
2385 	    break;
2386 	case SEOL:
2387 	  seol:
2388 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2389 		sayNO;
2390 	    if (PL_regeol - locinput > 1)
2391 		sayNO;
2392 	    break;
2393 	case EOS:
2394 	    if (PL_regeol != locinput)
2395 		sayNO;
2396 	    break;
2397 	case SANY:
2398 	    if (!nextchr && locinput >= PL_regeol)
2399 		sayNO;
2400  	    if (do_utf8) {
2401 	        locinput += PL_utf8skip[nextchr];
2402 		if (locinput > PL_regeol)
2403  		    sayNO;
2404  		nextchr = UCHARAT(locinput);
2405  	    }
2406  	    else
2407  		nextchr = UCHARAT(++locinput);
2408 	    break;
2409 	case CANY:
2410 	    if (!nextchr && locinput >= PL_regeol)
2411 		sayNO;
2412 	    nextchr = UCHARAT(++locinput);
2413 	    break;
2414 	case REG_ANY:
2415 	    if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2416 		sayNO;
2417 	    if (do_utf8) {
2418 		locinput += PL_utf8skip[nextchr];
2419 		if (locinput > PL_regeol)
2420 		    sayNO;
2421 		nextchr = UCHARAT(locinput);
2422 	    }
2423 	    else
2424 		nextchr = UCHARAT(++locinput);
2425 	    break;
2426 	case EXACT:
2427 	    s = STRING(scan);
2428 	    ln = STR_LEN(scan);
2429 	    if (do_utf8 != UTF) {
2430 		/* The target and the pattern have differing utf8ness. */
2431 		char *l = locinput;
2432 		char *e = s + ln;
2433 		STRLEN ulen;
2434 
2435 		if (do_utf8) {
2436 		    /* The target is utf8, the pattern is not utf8. */
2437 		    while (s < e) {
2438 			if (l >= PL_regeol)
2439 			     sayNO;
2440 			if (NATIVE_TO_UNI(*(U8*)s) !=
2441 			    utf8n_to_uvuni((U8*)l, UTF8_MAXLEN, &ulen,
2442 					   ckWARN(WARN_UTF8) ?
2443 					   0 : UTF8_ALLOW_ANY))
2444 			     sayNO;
2445 			l += ulen;
2446 			s ++;
2447 		    }
2448 		}
2449 		else {
2450 		    /* The target is not utf8, the pattern is utf8. */
2451 		    while (s < e) {
2452 			if (l >= PL_regeol)
2453 			    sayNO;
2454 			if (NATIVE_TO_UNI(*((U8*)l)) !=
2455 			    utf8n_to_uvuni((U8*)s, UTF8_MAXLEN, &ulen,
2456 					   ckWARN(WARN_UTF8) ?
2457 					   0 : UTF8_ALLOW_ANY))
2458 			    sayNO;
2459 			s += ulen;
2460 			l ++;
2461 		    }
2462 		}
2463 		locinput = l;
2464 		nextchr = UCHARAT(locinput);
2465 		break;
2466 	    }
2467 	    /* The target and the pattern have the same utf8ness. */
2468 	    /* Inline the first character, for speed. */
2469 	    if (UCHARAT(s) != nextchr)
2470 		sayNO;
2471 	    if (PL_regeol - locinput < ln)
2472 		sayNO;
2473 	    if (ln > 1 && memNE(s, locinput, ln))
2474 		sayNO;
2475 	    locinput += ln;
2476 	    nextchr = UCHARAT(locinput);
2477 	    break;
2478 	case EXACTFL:
2479 	    PL_reg_flags |= RF_tainted;
2480 	    /* FALL THROUGH */
2481 	case EXACTF:
2482 	    s = STRING(scan);
2483 	    ln = STR_LEN(scan);
2484 
2485 	    if (do_utf8 || UTF) {
2486 	      /* Either target or the pattern are utf8. */
2487 		char *l = locinput;
2488 		char *e = PL_regeol;
2489 
2490 		if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
2491 			       l, &e, 0,  do_utf8)) {
2492 		     /* One more case for the sharp s:
2493 		      * pack("U0U*", 0xDF) =~ /ss/i,
2494 		      * the 0xC3 0x9F are the UTF-8
2495 		      * byte sequence for the U+00DF. */
2496 		     if (!(do_utf8 &&
2497 			   toLOWER(s[0]) == 's' &&
2498 			   ln >= 2 &&
2499 			   toLOWER(s[1]) == 's' &&
2500 			   (U8)l[0] == 0xC3 &&
2501 			   e - l >= 2 &&
2502 			   (U8)l[1] == 0x9F))
2503 			  sayNO;
2504 		}
2505 		locinput = e;
2506 		nextchr = UCHARAT(locinput);
2507 		break;
2508 	    }
2509 
2510 	    /* Neither the target and the pattern are utf8. */
2511 
2512 	    /* Inline the first character, for speed. */
2513 	    if (UCHARAT(s) != nextchr &&
2514 		UCHARAT(s) != ((OP(scan) == EXACTF)
2515 			       ? PL_fold : PL_fold_locale)[nextchr])
2516 		sayNO;
2517 	    if (PL_regeol - locinput < ln)
2518 		sayNO;
2519 	    if (ln > 1 && (OP(scan) == EXACTF
2520 			   ? ibcmp(s, locinput, ln)
2521 			   : ibcmp_locale(s, locinput, ln)))
2522 		sayNO;
2523 	    locinput += ln;
2524 	    nextchr = UCHARAT(locinput);
2525 	    break;
2526 	case ANYOF:
2527 	    if (do_utf8) {
2528 	        STRLEN inclasslen = PL_regeol - locinput;
2529 
2530 	        if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
2531 		    sayNO_ANYOF;
2532 		if (locinput >= PL_regeol)
2533 		    sayNO;
2534 		locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
2535 		nextchr = UCHARAT(locinput);
2536 		break;
2537 	    }
2538 	    else {
2539 		if (nextchr < 0)
2540 		    nextchr = UCHARAT(locinput);
2541 		if (!REGINCLASS(scan, (U8*)locinput))
2542 		    sayNO_ANYOF;
2543 		if (!nextchr && locinput >= PL_regeol)
2544 		    sayNO;
2545 		nextchr = UCHARAT(++locinput);
2546 		break;
2547 	    }
2548 	no_anyof:
2549 	    /* If we might have the case of the German sharp s
2550 	     * in a casefolding Unicode character class. */
2551 
2552 	    if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2553 		 locinput += SHARP_S_SKIP;
2554 		 nextchr = UCHARAT(locinput);
2555 	    }
2556 	    else
2557 		 sayNO;
2558 	    break;
2559 	case ALNUML:
2560 	    PL_reg_flags |= RF_tainted;
2561 	    /* FALL THROUGH */
2562 	case ALNUM:
2563 	    if (!nextchr)
2564 		sayNO;
2565 	    if (do_utf8) {
2566 		LOAD_UTF8_CHARCLASS(alnum,"a");
2567 		if (!(OP(scan) == ALNUM
2568 		      ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2569 		      : isALNUM_LC_utf8((U8*)locinput)))
2570 		{
2571 		    sayNO;
2572 		}
2573 		locinput += PL_utf8skip[nextchr];
2574 		nextchr = UCHARAT(locinput);
2575 		break;
2576 	    }
2577 	    if (!(OP(scan) == ALNUM
2578 		  ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2579 		sayNO;
2580 	    nextchr = UCHARAT(++locinput);
2581 	    break;
2582 	case NALNUML:
2583 	    PL_reg_flags |= RF_tainted;
2584 	    /* FALL THROUGH */
2585 	case NALNUM:
2586 	    if (!nextchr && locinput >= PL_regeol)
2587 		sayNO;
2588 	    if (do_utf8) {
2589 		LOAD_UTF8_CHARCLASS(alnum,"a");
2590 		if (OP(scan) == NALNUM
2591 		    ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2592 		    : isALNUM_LC_utf8((U8*)locinput))
2593 		{
2594 		    sayNO;
2595 		}
2596 		locinput += PL_utf8skip[nextchr];
2597 		nextchr = UCHARAT(locinput);
2598 		break;
2599 	    }
2600 	    if (OP(scan) == NALNUM
2601 		? isALNUM(nextchr) : isALNUM_LC(nextchr))
2602 		sayNO;
2603 	    nextchr = UCHARAT(++locinput);
2604 	    break;
2605 	case BOUNDL:
2606 	case NBOUNDL:
2607 	    PL_reg_flags |= RF_tainted;
2608 	    /* FALL THROUGH */
2609 	case BOUND:
2610 	case NBOUND:
2611 	    /* was last char in word? */
2612 	    if (do_utf8) {
2613 		if (locinput == PL_bostr)
2614 		    ln = '\n';
2615 		else {
2616 		    U8 *r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
2617 
2618 		    ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
2619 		}
2620 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2621 		    ln = isALNUM_uni(ln);
2622 		    LOAD_UTF8_CHARCLASS(alnum,"a");
2623 		    n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
2624 		}
2625 		else {
2626 		    ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
2627 		    n = isALNUM_LC_utf8((U8*)locinput);
2628 		}
2629 	    }
2630 	    else {
2631 		ln = (locinput != PL_bostr) ?
2632 		    UCHARAT(locinput - 1) : '\n';
2633 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2634 		    ln = isALNUM(ln);
2635 		    n = isALNUM(nextchr);
2636 		}
2637 		else {
2638 		    ln = isALNUM_LC(ln);
2639 		    n = isALNUM_LC(nextchr);
2640 		}
2641 	    }
2642 	    if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2643 				    OP(scan) == BOUNDL))
2644 		    sayNO;
2645 	    break;
2646 	case SPACEL:
2647 	    PL_reg_flags |= RF_tainted;
2648 	    /* FALL THROUGH */
2649 	case SPACE:
2650 	    if (!nextchr)
2651 		sayNO;
2652 	    if (do_utf8) {
2653 		if (UTF8_IS_CONTINUED(nextchr)) {
2654 		    LOAD_UTF8_CHARCLASS(space," ");
2655 		    if (!(OP(scan) == SPACE
2656 			  ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2657 			  : isSPACE_LC_utf8((U8*)locinput)))
2658 		    {
2659 			sayNO;
2660 		    }
2661 		    locinput += PL_utf8skip[nextchr];
2662 		    nextchr = UCHARAT(locinput);
2663 		    break;
2664 		}
2665 		if (!(OP(scan) == SPACE
2666 		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2667 		    sayNO;
2668 		nextchr = UCHARAT(++locinput);
2669 	    }
2670 	    else {
2671 		if (!(OP(scan) == SPACE
2672 		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2673 		    sayNO;
2674 		nextchr = UCHARAT(++locinput);
2675 	    }
2676 	    break;
2677 	case NSPACEL:
2678 	    PL_reg_flags |= RF_tainted;
2679 	    /* FALL THROUGH */
2680 	case NSPACE:
2681 	    if (!nextchr && locinput >= PL_regeol)
2682 		sayNO;
2683 	    if (do_utf8) {
2684 		LOAD_UTF8_CHARCLASS(space," ");
2685 		if (OP(scan) == NSPACE
2686 		    ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2687 		    : isSPACE_LC_utf8((U8*)locinput))
2688 		{
2689 		    sayNO;
2690 		}
2691 		locinput += PL_utf8skip[nextchr];
2692 		nextchr = UCHARAT(locinput);
2693 		break;
2694 	    }
2695 	    if (OP(scan) == NSPACE
2696 		? isSPACE(nextchr) : isSPACE_LC(nextchr))
2697 		sayNO;
2698 	    nextchr = UCHARAT(++locinput);
2699 	    break;
2700 	case DIGITL:
2701 	    PL_reg_flags |= RF_tainted;
2702 	    /* FALL THROUGH */
2703 	case DIGIT:
2704 	    if (!nextchr)
2705 		sayNO;
2706 	    if (do_utf8) {
2707 		LOAD_UTF8_CHARCLASS(digit,"0");
2708 		if (!(OP(scan) == DIGIT
2709 		      ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2710 		      : isDIGIT_LC_utf8((U8*)locinput)))
2711 		{
2712 		    sayNO;
2713 		}
2714 		locinput += PL_utf8skip[nextchr];
2715 		nextchr = UCHARAT(locinput);
2716 		break;
2717 	    }
2718 	    if (!(OP(scan) == DIGIT
2719 		  ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2720 		sayNO;
2721 	    nextchr = UCHARAT(++locinput);
2722 	    break;
2723 	case NDIGITL:
2724 	    PL_reg_flags |= RF_tainted;
2725 	    /* FALL THROUGH */
2726 	case NDIGIT:
2727 	    if (!nextchr && locinput >= PL_regeol)
2728 		sayNO;
2729 	    if (do_utf8) {
2730 		LOAD_UTF8_CHARCLASS(digit,"0");
2731 		if (OP(scan) == NDIGIT
2732 		    ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2733 		    : isDIGIT_LC_utf8((U8*)locinput))
2734 		{
2735 		    sayNO;
2736 		}
2737 		locinput += PL_utf8skip[nextchr];
2738 		nextchr = UCHARAT(locinput);
2739 		break;
2740 	    }
2741 	    if (OP(scan) == NDIGIT
2742 		? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2743 		sayNO;
2744 	    nextchr = UCHARAT(++locinput);
2745 	    break;
2746 	case CLUMP:
2747 	    if (locinput >= PL_regeol)
2748 		sayNO;
2749 	    if  (do_utf8) {
2750 		LOAD_UTF8_CHARCLASS(mark,"~");
2751 		if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2752 		    sayNO;
2753 		locinput += PL_utf8skip[nextchr];
2754 		while (locinput < PL_regeol &&
2755 		       swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2756 		    locinput += UTF8SKIP(locinput);
2757 		if (locinput > PL_regeol)
2758 		    sayNO;
2759 	    }
2760 	    else
2761 	       locinput++;
2762 	    nextchr = UCHARAT(locinput);
2763 	    break;
2764 	case REFFL:
2765 	    PL_reg_flags |= RF_tainted;
2766 	    /* FALL THROUGH */
2767         case REF:
2768 	case REFF:
2769 	    n = ARG(scan);  /* which paren pair */
2770 	    ln = PL_regstartp[n];
2771 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
2772 	    if ((I32)*PL_reglastparen < n || ln == -1)
2773 		sayNO;			/* Do not match unless seen CLOSEn. */
2774 	    if (ln == PL_regendp[n])
2775 		break;
2776 
2777 	    s = PL_bostr + ln;
2778 	    if (do_utf8 && OP(scan) != REF) {	/* REF can do byte comparison */
2779 		char *l = locinput;
2780 		char *e = PL_bostr + PL_regendp[n];
2781 		/*
2782 		 * Note that we can't do the "other character" lookup trick as
2783 		 * in the 8-bit case (no pun intended) because in Unicode we
2784 		 * have to map both upper and title case to lower case.
2785 		 */
2786 		if (OP(scan) == REFF) {
2787 		    STRLEN ulen1, ulen2;
2788 		    U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2789 		    U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
2790 		    while (s < e) {
2791 			if (l >= PL_regeol)
2792 			    sayNO;
2793 			toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2794 			toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
2795 			if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
2796 			    sayNO;
2797 			s += ulen1;
2798 			l += ulen2;
2799 		    }
2800 		}
2801 		locinput = l;
2802 		nextchr = UCHARAT(locinput);
2803 		break;
2804 	    }
2805 
2806 	    /* Inline the first character, for speed. */
2807 	    if (UCHARAT(s) != nextchr &&
2808 		(OP(scan) == REF ||
2809 		 (UCHARAT(s) != ((OP(scan) == REFF
2810 				  ? PL_fold : PL_fold_locale)[nextchr]))))
2811 		sayNO;
2812 	    ln = PL_regendp[n] - ln;
2813 	    if (locinput + ln > PL_regeol)
2814 		sayNO;
2815 	    if (ln > 1 && (OP(scan) == REF
2816 			   ? memNE(s, locinput, ln)
2817 			   : (OP(scan) == REFF
2818 			      ? ibcmp(s, locinput, ln)
2819 			      : ibcmp_locale(s, locinput, ln))))
2820 		sayNO;
2821 	    locinput += ln;
2822 	    nextchr = UCHARAT(locinput);
2823 	    break;
2824 
2825 	case NOTHING:
2826 	case TAIL:
2827 	    break;
2828 	case BACK:
2829 	    break;
2830 	case EVAL:
2831 	{
2832 	    dSP;
2833 	    OP_4tree *oop = PL_op;
2834 	    COP *ocurcop = PL_curcop;
2835 	    PAD *old_comppad;
2836 	    SV *ret;
2837 	    struct regexp *oreg = PL_reg_re;
2838 
2839 	    n = ARG(scan);
2840 	    PL_op = (OP_4tree*)PL_regdata->data[n];
2841 	    DEBUG_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
2842 	    PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
2843 	    PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2844 
2845 	    {
2846 		SV **before = SP;
2847 		CALLRUNOPS(aTHX);			/* Scalar context. */
2848 		SPAGAIN;
2849 		if (SP == before)
2850 		    ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
2851 		else {
2852 		    ret = POPs;
2853 		    PUTBACK;
2854 		}
2855 	    }
2856 
2857 	    PL_op = oop;
2858 	    PAD_RESTORE_LOCAL(old_comppad);
2859 	    PL_curcop = ocurcop;
2860 	    if (logical) {
2861 		if (logical == 2) {	/* Postponed subexpression. */
2862 		    regexp *re;
2863 		    MAGIC *mg = Null(MAGIC*);
2864 		    re_cc_state state;
2865 		    CHECKPOINT cp, lastcp;
2866                     int toggleutf;
2867 		    register SV *sv;
2868 
2869 		    if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
2870 			mg = mg_find(sv, PERL_MAGIC_qr);
2871 		    else if (SvSMAGICAL(ret)) {
2872 			if (SvGMAGICAL(ret))
2873 			    sv_unmagic(ret, PERL_MAGIC_qr);
2874 			else
2875 			    mg = mg_find(ret, PERL_MAGIC_qr);
2876 		    }
2877 
2878 		    if (mg) {
2879 			re = (regexp *)mg->mg_obj;
2880 			(void)ReREFCNT_inc(re);
2881 		    }
2882 		    else {
2883 			STRLEN len;
2884 			char *t = SvPV(ret, len);
2885 			PMOP pm;
2886 			char *oprecomp = PL_regprecomp;
2887 			I32 osize = PL_regsize;
2888 			I32 onpar = PL_regnpar;
2889 
2890 			Zero(&pm, 1, PMOP);
2891                         if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
2892 			re = CALLREGCOMP(aTHX_ t, t + len, &pm);
2893 			if (!(SvFLAGS(ret)
2894 			      & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
2895 				| SVs_GMG)))
2896 			    sv_magic(ret,(SV*)ReREFCNT_inc(re),
2897 					PERL_MAGIC_qr,0,0);
2898 			PL_regprecomp = oprecomp;
2899 			PL_regsize = osize;
2900 			PL_regnpar = onpar;
2901 		    }
2902 		    DEBUG_r(
2903 			PerlIO_printf(Perl_debug_log,
2904 				      "Entering embedded `%s%.60s%s%s'\n",
2905 				      PL_colors[0],
2906 				      re->precomp,
2907 				      PL_colors[1],
2908 				      (strlen(re->precomp) > 60 ? "..." : ""))
2909 			);
2910 		    state.node = next;
2911 		    state.prev = PL_reg_call_cc;
2912 		    state.cc = PL_regcc;
2913 		    state.re = PL_reg_re;
2914 
2915 		    PL_regcc = 0;
2916 
2917 		    cp = regcppush(0);	/* Save *all* the positions. */
2918 		    REGCP_SET(lastcp);
2919 		    cache_re(re);
2920 		    state.ss = PL_savestack_ix;
2921 		    *PL_reglastparen = 0;
2922 		    *PL_reglastcloseparen = 0;
2923 		    PL_reg_call_cc = &state;
2924 		    PL_reginput = locinput;
2925 		    toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
2926 				((re->reganch & ROPT_UTF8) != 0);
2927 		    if (toggleutf) PL_reg_flags ^= RF_utf8;
2928 
2929 		    /* XXXX This is too dramatic a measure... */
2930 		    PL_reg_maxiter = 0;
2931 
2932 		    if (regmatch(re->program + 1)) {
2933 			/* Even though we succeeded, we need to restore
2934 			   global variables, since we may be wrapped inside
2935 			   SUSPEND, thus the match may be not finished yet. */
2936 
2937 			/* XXXX Do this only if SUSPENDed? */
2938 			PL_reg_call_cc = state.prev;
2939 			PL_regcc = state.cc;
2940 			PL_reg_re = state.re;
2941 			cache_re(PL_reg_re);
2942 			if (toggleutf) PL_reg_flags ^= RF_utf8;
2943 
2944 			/* XXXX This is too dramatic a measure... */
2945 			PL_reg_maxiter = 0;
2946 
2947 			/* These are needed even if not SUSPEND. */
2948 			ReREFCNT_dec(re);
2949 			regcpblow(cp);
2950 			sayYES;
2951 		    }
2952 		    ReREFCNT_dec(re);
2953 		    REGCP_UNWIND(lastcp);
2954 		    regcppop();
2955 		    PL_reg_call_cc = state.prev;
2956 		    PL_regcc = state.cc;
2957 		    PL_reg_re = state.re;
2958 		    cache_re(PL_reg_re);
2959 		    if (toggleutf) PL_reg_flags ^= RF_utf8;
2960 
2961 		    /* XXXX This is too dramatic a measure... */
2962 		    PL_reg_maxiter = 0;
2963 
2964 		    logical = 0;
2965 		    sayNO;
2966 		}
2967 		sw = SvTRUE(ret);
2968 		logical = 0;
2969 	    }
2970 	    else {
2971 		sv_setsv(save_scalar(PL_replgv), ret);
2972 		cache_re(oreg);
2973 	    }
2974 	    break;
2975 	}
2976 	case OPEN:
2977 	    n = ARG(scan);  /* which paren pair */
2978 	    PL_reg_start_tmp[n] = locinput;
2979 	    if (n > PL_regsize)
2980 		PL_regsize = n;
2981 	    break;
2982 	case CLOSE:
2983 	    n = ARG(scan);  /* which paren pair */
2984 	    PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2985 	    PL_regendp[n] = locinput - PL_bostr;
2986 	    if (n > (I32)*PL_reglastparen)
2987 		*PL_reglastparen = n;
2988 	    *PL_reglastcloseparen = n;
2989 	    break;
2990 	case GROUPP:
2991 	    n = ARG(scan);  /* which paren pair */
2992 	    sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
2993 	    break;
2994 	case IFTHEN:
2995 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
2996 	    if (sw)
2997 		next = NEXTOPER(NEXTOPER(scan));
2998 	    else {
2999 		next = scan + ARG(scan);
3000 		if (OP(next) == IFTHEN) /* Fake one. */
3001 		    next = NEXTOPER(NEXTOPER(next));
3002 	    }
3003 	    break;
3004 	case LOGICAL:
3005 	    logical = scan->flags;
3006 	    break;
3007 /*******************************************************************
3008  PL_regcc contains infoblock about the innermost (...)* loop, and
3009  a pointer to the next outer infoblock.
3010 
3011  Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3012 
3013    1) After matching X, regnode for CURLYX is processed;
3014 
3015    2) This regnode creates infoblock on the stack, and calls
3016       regmatch() recursively with the starting point at WHILEM node;
3017 
3018    3) Each hit of WHILEM node tries to match A and Z (in the order
3019       depending on the current iteration, min/max of {min,max} and
3020       greediness).  The information about where are nodes for "A"
3021       and "Z" is read from the infoblock, as is info on how many times "A"
3022       was already matched, and greediness.
3023 
3024    4) After A matches, the same WHILEM node is hit again.
3025 
3026    5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
3027       of the same pair.  Thus when WHILEM tries to match Z, it temporarily
3028       resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
3029       as in (Y(A)*Z)*.  If Z matches, the automaton will hit the WHILEM node
3030       of the external loop.
3031 
3032  Currently present infoblocks form a tree with a stem formed by PL_curcc
3033  and whatever it mentions via ->next, and additional attached trees
3034  corresponding to temporarily unset infoblocks as in "5" above.
3035 
3036  In the following picture infoblocks for outer loop of
3037  (Y(A)*?Z)*?T are denoted O, for inner I.  NULL starting block
3038  is denoted by x.  The matched string is YAAZYAZT.  Temporarily postponed
3039  infoblocks are drawn below the "reset" infoblock.
3040 
3041  In fact in the picture below we do not show failed matches for Z and T
3042  by WHILEM blocks.  [We illustrate minimal matches, since for them it is
3043  more obvious *why* one needs to *temporary* unset infoblocks.]
3044 
3045   Matched	REx position	InfoBlocks	Comment
3046   		(Y(A)*?Z)*?T	x
3047   		Y(A)*?Z)*?T	x <- O
3048   Y		(A)*?Z)*?T	x <- O
3049   Y		A)*?Z)*?T	x <- O <- I
3050   YA		)*?Z)*?T	x <- O <- I
3051   YA		A)*?Z)*?T	x <- O <- I
3052   YAA		)*?Z)*?T	x <- O <- I
3053   YAA		Z)*?T		x <- O		# Temporary unset I
3054 				     I
3055 
3056   YAAZ		Y(A)*?Z)*?T	x <- O
3057 				     I
3058 
3059   YAAZY		(A)*?Z)*?T	x <- O
3060 				     I
3061 
3062   YAAZY		A)*?Z)*?T	x <- O <- I
3063 				     I
3064 
3065   YAAZYA	)*?Z)*?T	x <- O <- I
3066 				     I
3067 
3068   YAAZYA	Z)*?T		x <- O		# Temporary unset I
3069 				     I,I
3070 
3071   YAAZYAZ	)*?T		x <- O
3072 				     I,I
3073 
3074   YAAZYAZ	T		x		# Temporary unset O
3075 				O
3076 				I,I
3077 
3078   YAAZYAZT			x
3079 				O
3080 				I,I
3081  *******************************************************************/
3082 	case CURLYX: {
3083 		CURCUR cc;
3084 		CHECKPOINT cp = PL_savestack_ix;
3085 		/* No need to save/restore up to this paren */
3086 		I32 parenfloor = scan->flags;
3087 
3088 		if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3089 		    next += ARG(next);
3090 		cc.oldcc = PL_regcc;
3091 		PL_regcc = &cc;
3092 		/* XXXX Probably it is better to teach regpush to support
3093 		   parenfloor > PL_regsize... */
3094 		if (parenfloor > (I32)*PL_reglastparen)
3095 		    parenfloor = *PL_reglastparen; /* Pessimization... */
3096 		cc.parenfloor = parenfloor;
3097 		cc.cur = -1;
3098 		cc.min = ARG1(scan);
3099 		cc.max  = ARG2(scan);
3100 		cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3101 		cc.next = next;
3102 		cc.minmod = minmod;
3103 		cc.lastloc = 0;
3104 		PL_reginput = locinput;
3105 		n = regmatch(PREVOPER(next));	/* start on the WHILEM */
3106 		regcpblow(cp);
3107 		PL_regcc = cc.oldcc;
3108 		saySAME(n);
3109 	    }
3110 	    /* NOT REACHED */
3111 	case WHILEM: {
3112 		/*
3113 		 * This is really hard to understand, because after we match
3114 		 * what we're trying to match, we must make sure the rest of
3115 		 * the REx is going to match for sure, and to do that we have
3116 		 * to go back UP the parse tree by recursing ever deeper.  And
3117 		 * if it fails, we have to reset our parent's current state
3118 		 * that we can try again after backing off.
3119 		 */
3120 
3121 		CHECKPOINT cp, lastcp;
3122 		CURCUR* cc = PL_regcc;
3123 		char *lastloc = cc->lastloc; /* Detection of 0-len. */
3124 
3125 		n = cc->cur + 1;	/* how many we know we matched */
3126 		PL_reginput = locinput;
3127 
3128 		DEBUG_r(
3129 		    PerlIO_printf(Perl_debug_log,
3130 				  "%*s  %ld out of %ld..%ld  cc=%"UVxf"\n",
3131 				  REPORT_CODE_OFF+PL_regindent*2, "",
3132 				  (long)n, (long)cc->min,
3133 				  (long)cc->max, PTR2UV(cc))
3134 		    );
3135 
3136 		/* If degenerate scan matches "", assume scan done. */
3137 
3138 		if (locinput == cc->lastloc && n >= cc->min) {
3139 		    PL_regcc = cc->oldcc;
3140 		    if (PL_regcc)
3141 			ln = PL_regcc->cur;
3142 		    DEBUG_r(
3143 			PerlIO_printf(Perl_debug_log,
3144 			   "%*s  empty match detected, try continuation...\n",
3145 			   REPORT_CODE_OFF+PL_regindent*2, "")
3146 			);
3147 		    if (regmatch(cc->next))
3148 			sayYES;
3149 		    if (PL_regcc)
3150 			PL_regcc->cur = ln;
3151 		    PL_regcc = cc;
3152 		    sayNO;
3153 		}
3154 
3155 		/* First just match a string of min scans. */
3156 
3157 		if (n < cc->min) {
3158 		    cc->cur = n;
3159 		    cc->lastloc = locinput;
3160 		    if (regmatch(cc->scan))
3161 			sayYES;
3162 		    cc->cur = n - 1;
3163 		    cc->lastloc = lastloc;
3164 		    sayNO;
3165 		}
3166 
3167 		if (scan->flags) {
3168 		    /* Check whether we already were at this position.
3169 			Postpone detection until we know the match is not
3170 			*that* much linear. */
3171 		if (!PL_reg_maxiter) {
3172 		    PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3173 		    PL_reg_leftiter = PL_reg_maxiter;
3174 		}
3175 		if (PL_reg_leftiter-- == 0) {
3176 		    I32 size = (PL_reg_maxiter + 7)/8;
3177 		    if (PL_reg_poscache) {
3178 			if ((I32)PL_reg_poscache_size < size) {
3179 			    Renew(PL_reg_poscache, size, char);
3180 			    PL_reg_poscache_size = size;
3181 			}
3182 			Zero(PL_reg_poscache, size, char);
3183 		    }
3184 		    else {
3185 			PL_reg_poscache_size = size;
3186 			Newz(29, PL_reg_poscache, size, char);
3187 		    }
3188 		    DEBUG_r(
3189 			PerlIO_printf(Perl_debug_log,
3190 	      "%sDetected a super-linear match, switching on caching%s...\n",
3191 				      PL_colors[4], PL_colors[5])
3192 			);
3193 		}
3194 		if (PL_reg_leftiter < 0) {
3195 		    I32 o = locinput - PL_bostr, b;
3196 
3197 		    o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3198 		    b = o % 8;
3199 		    o /= 8;
3200 		    if (PL_reg_poscache[o] & (1<<b)) {
3201 		    DEBUG_r(
3202 			PerlIO_printf(Perl_debug_log,
3203 				      "%*s  already tried at this position...\n",
3204 				      REPORT_CODE_OFF+PL_regindent*2, "")
3205 			);
3206 			if (PL_reg_flags & RF_false)
3207 			    sayYES;
3208 			else
3209 			    sayNO_SILENT;
3210 		    }
3211 		    PL_reg_poscache[o] |= (1<<b);
3212 		}
3213 		}
3214 
3215 		/* Prefer next over scan for minimal matching. */
3216 
3217 		if (cc->minmod) {
3218 		    PL_regcc = cc->oldcc;
3219 		    if (PL_regcc)
3220 			ln = PL_regcc->cur;
3221 		    cp = regcppush(cc->parenfloor);
3222 		    REGCP_SET(lastcp);
3223 		    if (regmatch(cc->next)) {
3224 			regcpblow(cp);
3225 			sayYES;	/* All done. */
3226 		    }
3227 		    REGCP_UNWIND(lastcp);
3228 		    regcppop();
3229 		    if (PL_regcc)
3230 			PL_regcc->cur = ln;
3231 		    PL_regcc = cc;
3232 
3233 		    if (n >= cc->max) {	/* Maximum greed exceeded? */
3234 			if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3235 			    && !(PL_reg_flags & RF_warned)) {
3236 			    PL_reg_flags |= RF_warned;
3237 			    Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3238 				 "Complex regular subexpression recursion",
3239 				 REG_INFTY - 1);
3240 			}
3241 			sayNO;
3242 		    }
3243 
3244 		    DEBUG_r(
3245 			PerlIO_printf(Perl_debug_log,
3246 				      "%*s  trying longer...\n",
3247 				      REPORT_CODE_OFF+PL_regindent*2, "")
3248 			);
3249 		    /* Try scanning more and see if it helps. */
3250 		    PL_reginput = locinput;
3251 		    cc->cur = n;
3252 		    cc->lastloc = locinput;
3253 		    cp = regcppush(cc->parenfloor);
3254 		    REGCP_SET(lastcp);
3255 		    if (regmatch(cc->scan)) {
3256 			regcpblow(cp);
3257 			sayYES;
3258 		    }
3259 		    REGCP_UNWIND(lastcp);
3260 		    regcppop();
3261 		    cc->cur = n - 1;
3262 		    cc->lastloc = lastloc;
3263 		    sayNO;
3264 		}
3265 
3266 		/* Prefer scan over next for maximal matching. */
3267 
3268 		if (n < cc->max) {	/* More greed allowed? */
3269 		    cp = regcppush(cc->parenfloor);
3270 		    cc->cur = n;
3271 		    cc->lastloc = locinput;
3272 		    REGCP_SET(lastcp);
3273 		    if (regmatch(cc->scan)) {
3274 			regcpblow(cp);
3275 			sayYES;
3276 		    }
3277 		    REGCP_UNWIND(lastcp);
3278 		    regcppop();		/* Restore some previous $<digit>s? */
3279 		    PL_reginput = locinput;
3280 		    DEBUG_r(
3281 			PerlIO_printf(Perl_debug_log,
3282 				      "%*s  failed, try continuation...\n",
3283 				      REPORT_CODE_OFF+PL_regindent*2, "")
3284 			);
3285 		}
3286 		if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3287 			&& !(PL_reg_flags & RF_warned)) {
3288 		    PL_reg_flags |= RF_warned;
3289 		    Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3290 			 "Complex regular subexpression recursion",
3291 			 REG_INFTY - 1);
3292 		}
3293 
3294 		/* Failed deeper matches of scan, so see if this one works. */
3295 		PL_regcc = cc->oldcc;
3296 		if (PL_regcc)
3297 		    ln = PL_regcc->cur;
3298 		if (regmatch(cc->next))
3299 		    sayYES;
3300 		if (PL_regcc)
3301 		    PL_regcc->cur = ln;
3302 		PL_regcc = cc;
3303 		cc->cur = n - 1;
3304 		cc->lastloc = lastloc;
3305 		sayNO;
3306 	    }
3307 	    /* NOT REACHED */
3308 	case BRANCHJ:
3309 	    next = scan + ARG(scan);
3310 	    if (next == scan)
3311 		next = NULL;
3312 	    inner = NEXTOPER(NEXTOPER(scan));
3313 	    goto do_branch;
3314 	case BRANCH:
3315 	    inner = NEXTOPER(scan);
3316 	  do_branch:
3317 	    {
3318 		c1 = OP(scan);
3319 		if (OP(next) != c1)	/* No choice. */
3320 		    next = inner;	/* Avoid recursion. */
3321 		else {
3322 		    I32 lastparen = *PL_reglastparen;
3323 		    I32 unwind1;
3324 		    re_unwind_branch_t *uw;
3325 
3326 		    /* Put unwinding data on stack */
3327 		    unwind1 = SSNEWt(1,re_unwind_branch_t);
3328 		    uw = SSPTRt(unwind1,re_unwind_branch_t);
3329 		    uw->prev = unwind;
3330 		    unwind = unwind1;
3331 		    uw->type = ((c1 == BRANCH)
3332 				? RE_UNWIND_BRANCH
3333 				: RE_UNWIND_BRANCHJ);
3334 		    uw->lastparen = lastparen;
3335 		    uw->next = next;
3336 		    uw->locinput = locinput;
3337 		    uw->nextchr = nextchr;
3338 #ifdef DEBUGGING
3339 		    uw->regindent = ++PL_regindent;
3340 #endif
3341 
3342 		    REGCP_SET(uw->lastcp);
3343 
3344 		    /* Now go into the first branch */
3345 		    next = inner;
3346 		}
3347 	    }
3348 	    break;
3349 	case MINMOD:
3350 	    minmod = 1;
3351 	    break;
3352 	case CURLYM:
3353 	{
3354 	    I32 l = 0;
3355 	    CHECKPOINT lastcp;
3356 
3357 	    /* We suppose that the next guy does not need
3358 	       backtracking: in particular, it is of constant length,
3359 	       and has no parenths to influence future backrefs. */
3360 	    ln = ARG1(scan);  /* min to match */
3361 	    n  = ARG2(scan);  /* max to match */
3362 	    paren = scan->flags;
3363 	    if (paren) {
3364 		if (paren > PL_regsize)
3365 		    PL_regsize = paren;
3366 		if (paren > (I32)*PL_reglastparen)
3367 		    *PL_reglastparen = paren;
3368 	    }
3369 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3370 	    if (paren)
3371 		scan += NEXT_OFF(scan); /* Skip former OPEN. */
3372 	    PL_reginput = locinput;
3373 	    if (minmod) {
3374 		minmod = 0;
3375 		if (ln && regrepeat_hard(scan, ln, &l) < ln)
3376 		    sayNO;
3377 		/* if we matched something zero-length we don't need to
3378 		   backtrack - capturing parens are already defined, so
3379 		   the caveat in the maximal case doesn't apply
3380 
3381 		   XXXX if ln == 0, we can redo this check first time
3382 		   through the following loop
3383 		*/
3384 		if (ln && l == 0)
3385 		    n = ln;	/* don't backtrack */
3386 		locinput = PL_reginput;
3387 		if (HAS_TEXT(next) || JUMPABLE(next)) {
3388 		    regnode *text_node = next;
3389 
3390 		    if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3391 
3392 		    if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3393 		    else {
3394 			if (PL_regkind[(U8)OP(text_node)] == REF) {
3395 			    c1 = c2 = -1000;
3396 			    goto assume_ok_MM;
3397 			}
3398 			else { c1 = (U8)*STRING(text_node); }
3399 			if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3400 			    c2 = PL_fold[c1];
3401 			else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3402 			    c2 = PL_fold_locale[c1];
3403 			else
3404 			    c2 = c1;
3405 		    }
3406 		}
3407 		else
3408 		    c1 = c2 = -1000;
3409 	    assume_ok_MM:
3410 		REGCP_SET(lastcp);
3411 		/* This may be improved if l == 0.  */
3412 		while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3413 		    /* If it could work, try it. */
3414 		    if (c1 == -1000 ||
3415 			UCHARAT(PL_reginput) == c1 ||
3416 			UCHARAT(PL_reginput) == c2)
3417 		    {
3418 			if (paren) {
3419 			    if (ln) {
3420 				PL_regstartp[paren] =
3421 				    HOPc(PL_reginput, -l) - PL_bostr;
3422 				PL_regendp[paren] = PL_reginput - PL_bostr;
3423 			    }
3424 			    else
3425 				PL_regendp[paren] = -1;
3426 			}
3427 			if (regmatch(next))
3428 			    sayYES;
3429 			REGCP_UNWIND(lastcp);
3430 		    }
3431 		    /* Couldn't or didn't -- move forward. */
3432 		    PL_reginput = locinput;
3433 		    if (regrepeat_hard(scan, 1, &l)) {
3434 			ln++;
3435 			locinput = PL_reginput;
3436 		    }
3437 		    else
3438 			sayNO;
3439 		}
3440 	    }
3441 	    else {
3442 		n = regrepeat_hard(scan, n, &l);
3443 		/* if we matched something zero-length we don't need to
3444 		   backtrack, unless the minimum count is zero and we
3445 		   are capturing the result - in that case the capture
3446 		   being defined or not may affect later execution
3447 		*/
3448 		if (n != 0 && l == 0 && !(paren && ln == 0))
3449 		    ln = n;	/* don't backtrack */
3450 		locinput = PL_reginput;
3451 		DEBUG_r(
3452 		    PerlIO_printf(Perl_debug_log,
3453 				  "%*s  matched %"IVdf" times, len=%"IVdf"...\n",
3454 				  (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3455 				  (IV) n, (IV)l)
3456 		    );
3457 		if (n >= ln) {
3458 		    if (HAS_TEXT(next) || JUMPABLE(next)) {
3459 			regnode *text_node = next;
3460 
3461 			if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3462 
3463 			if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3464 			else {
3465 			    if (PL_regkind[(U8)OP(text_node)] == REF) {
3466 				c1 = c2 = -1000;
3467 				goto assume_ok_REG;
3468 			    }
3469 			    else { c1 = (U8)*STRING(text_node); }
3470 
3471 			    if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3472 				c2 = PL_fold[c1];
3473 			    else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3474 				c2 = PL_fold_locale[c1];
3475 			    else
3476 				c2 = c1;
3477 			}
3478 		    }
3479 		    else
3480 			c1 = c2 = -1000;
3481 		}
3482 	    assume_ok_REG:
3483 		REGCP_SET(lastcp);
3484 		while (n >= ln) {
3485 		    /* If it could work, try it. */
3486 		    if (c1 == -1000 ||
3487 			UCHARAT(PL_reginput) == c1 ||
3488 			UCHARAT(PL_reginput) == c2)
3489 		    {
3490 			DEBUG_r(
3491 				PerlIO_printf(Perl_debug_log,
3492 					      "%*s  trying tail with n=%"IVdf"...\n",
3493 					      (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3494 			    );
3495 			if (paren) {
3496 			    if (n) {
3497 				PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3498 				PL_regendp[paren] = PL_reginput - PL_bostr;
3499 			    }
3500 			    else
3501 				PL_regendp[paren] = -1;
3502 			}
3503 			if (regmatch(next))
3504 			    sayYES;
3505 			REGCP_UNWIND(lastcp);
3506 		    }
3507 		    /* Couldn't or didn't -- back up. */
3508 		    n--;
3509 		    locinput = HOPc(locinput, -l);
3510 		    PL_reginput = locinput;
3511 		}
3512 	    }
3513 	    sayNO;
3514 	    break;
3515 	}
3516 	case CURLYN:
3517 	    paren = scan->flags;	/* Which paren to set */
3518 	    if (paren > PL_regsize)
3519 		PL_regsize = paren;
3520 	    if (paren > (I32)*PL_reglastparen)
3521 		*PL_reglastparen = paren;
3522 	    ln = ARG1(scan);  /* min to match */
3523 	    n  = ARG2(scan);  /* max to match */
3524             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3525 	    goto repeat;
3526 	case CURLY:
3527 	    paren = 0;
3528 	    ln = ARG1(scan);  /* min to match */
3529 	    n  = ARG2(scan);  /* max to match */
3530 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3531 	    goto repeat;
3532 	case STAR:
3533 	    ln = 0;
3534 	    n = REG_INFTY;
3535 	    scan = NEXTOPER(scan);
3536 	    paren = 0;
3537 	    goto repeat;
3538 	case PLUS:
3539 	    ln = 1;
3540 	    n = REG_INFTY;
3541 	    scan = NEXTOPER(scan);
3542 	    paren = 0;
3543 	  repeat:
3544 	    /*
3545 	    * Lookahead to avoid useless match attempts
3546 	    * when we know what character comes next.
3547 	    */
3548 
3549 	    /*
3550 	    * Used to only do .*x and .*?x, but now it allows
3551 	    * for )'s, ('s and (?{ ... })'s to be in the way
3552 	    * of the quantifier and the EXACT-like node.  -- japhy
3553 	    */
3554 
3555 	    if (HAS_TEXT(next) || JUMPABLE(next)) {
3556 		U8 *s;
3557 		regnode *text_node = next;
3558 
3559 		if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3560 
3561 		if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3562 		else {
3563 		    if (PL_regkind[(U8)OP(text_node)] == REF) {
3564 			c1 = c2 = -1000;
3565 			goto assume_ok_easy;
3566 		    }
3567 		    else { s = (U8*)STRING(text_node); }
3568 
3569 		    if (!UTF) {
3570 			c2 = c1 = *s;
3571 			if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3572 			    c2 = PL_fold[c1];
3573 			else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3574 			    c2 = PL_fold_locale[c1];
3575 		    }
3576 		    else { /* UTF */
3577 			if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
3578 			     STRLEN ulen1, ulen2;
3579 			     U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3580 			     U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
3581 
3582 			     to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3583 			     to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3584 
3585 			     c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXLEN, 0,
3586 						 ckWARN(WARN_UTF8) ?
3587 						 0 : UTF8_ALLOW_ANY);
3588 			     c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXLEN, 0,
3589 						 ckWARN(WARN_UTF8) ?
3590 						 0 : UTF8_ALLOW_ANY);
3591 			}
3592 			else {
3593 			    c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXLEN, 0,
3594 						     ckWARN(WARN_UTF8) ?
3595 						     0 : UTF8_ALLOW_ANY);
3596 			}
3597 		    }
3598 		}
3599 	    }
3600 	    else
3601 		c1 = c2 = -1000;
3602 	assume_ok_easy:
3603 	    PL_reginput = locinput;
3604 	    if (minmod) {
3605 		CHECKPOINT lastcp;
3606 		minmod = 0;
3607 		if (ln && regrepeat(scan, ln) < ln)
3608 		    sayNO;
3609 		locinput = PL_reginput;
3610 		REGCP_SET(lastcp);
3611 		if (c1 != -1000) {
3612 		    char *e; /* Should not check after this */
3613 		    char *old = locinput;
3614 		    int count = 0;
3615 
3616 		    if  (n == REG_INFTY) {
3617 			e = PL_regeol - 1;
3618 			if (do_utf8)
3619 			    while (UTF8_IS_CONTINUATION(*(U8*)e))
3620 				e--;
3621 		    }
3622 		    else if (do_utf8) {
3623 			int m = n - ln;
3624 			for (e = locinput;
3625 			     m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3626 			    e += UTF8SKIP(e);
3627 		    }
3628 		    else {
3629 			e = locinput + n - ln;
3630 			if (e >= PL_regeol)
3631 			    e = PL_regeol - 1;
3632 		    }
3633 		    while (1) {
3634 			/* Find place 'next' could work */
3635 			if (!do_utf8) {
3636 			    if (c1 == c2) {
3637 				while (locinput <= e &&
3638 				       UCHARAT(locinput) != c1)
3639 				    locinput++;
3640 			    } else {
3641 				while (locinput <= e
3642 				       && UCHARAT(locinput) != c1
3643 				       && UCHARAT(locinput) != c2)
3644 				    locinput++;
3645 			    }
3646 			    count = locinput - old;
3647 			}
3648 			else {
3649 			    STRLEN len;
3650 			    if (c1 == c2) {
3651 				/* count initialised to
3652 				 * utf8_distance(old, locinput) */
3653 				while (locinput <= e &&
3654 				       utf8n_to_uvchr((U8*)locinput,
3655 						      UTF8_MAXLEN, &len,
3656 						      ckWARN(WARN_UTF8) ?
3657 						      0 : UTF8_ALLOW_ANY) != (UV)c1) {
3658 				    locinput += len;
3659 				    count++;
3660 				}
3661 			    } else {
3662 				/* count initialised to
3663 				 * utf8_distance(old, locinput) */
3664 				while (locinput <= e) {
3665 				    UV c = utf8n_to_uvchr((U8*)locinput,
3666 							  UTF8_MAXLEN, &len,
3667 							  ckWARN(WARN_UTF8) ?
3668 							  0 : UTF8_ALLOW_ANY);
3669 				    if (c == (UV)c1 || c == (UV)c2)
3670 					break;
3671 				    locinput += len;
3672 				    count++;
3673 				}
3674 			    }
3675 			}
3676 			if (locinput > e)
3677 			    sayNO;
3678 			/* PL_reginput == old now */
3679 			if (locinput != old) {
3680 			    ln = 1;	/* Did some */
3681 			    if (regrepeat(scan, count) < count)
3682 				sayNO;
3683 			}
3684 			/* PL_reginput == locinput now */
3685 			TRYPAREN(paren, ln, locinput);
3686 			PL_reginput = locinput;	/* Could be reset... */
3687 			REGCP_UNWIND(lastcp);
3688 			/* Couldn't or didn't -- move forward. */
3689 			old = locinput;
3690 			if (do_utf8)
3691 			    locinput += UTF8SKIP(locinput);
3692 			else
3693 			    locinput++;
3694 			count = 1;
3695 		    }
3696 		}
3697 		else
3698 		while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3699 		    UV c;
3700 		    if (c1 != -1000) {
3701 			if (do_utf8)
3702 			    c = utf8n_to_uvchr((U8*)PL_reginput,
3703 					       UTF8_MAXLEN, 0,
3704 					       ckWARN(WARN_UTF8) ?
3705 					       0 : UTF8_ALLOW_ANY);
3706 			else
3707 			    c = UCHARAT(PL_reginput);
3708 			/* If it could work, try it. */
3709 		        if (c == (UV)c1 || c == (UV)c2)
3710 		        {
3711 			    TRYPAREN(paren, ln, PL_reginput);
3712 			    REGCP_UNWIND(lastcp);
3713 		        }
3714 		    }
3715 		    /* If it could work, try it. */
3716 		    else if (c1 == -1000)
3717 		    {
3718 			TRYPAREN(paren, ln, PL_reginput);
3719 			REGCP_UNWIND(lastcp);
3720 		    }
3721 		    /* Couldn't or didn't -- move forward. */
3722 		    PL_reginput = locinput;
3723 		    if (regrepeat(scan, 1)) {
3724 			ln++;
3725 			locinput = PL_reginput;
3726 		    }
3727 		    else
3728 			sayNO;
3729 		}
3730 	    }
3731 	    else {
3732 		CHECKPOINT lastcp;
3733 		n = regrepeat(scan, n);
3734 		locinput = PL_reginput;
3735 		if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3736 		    ((!PL_multiline && OP(next) != MEOL) ||
3737 			OP(next) == SEOL || OP(next) == EOS))
3738 		{
3739 		    ln = n;			/* why back off? */
3740 		    /* ...because $ and \Z can match before *and* after
3741 		       newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
3742 		       We should back off by one in this case. */
3743 		    if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3744 			ln--;
3745 		}
3746 		REGCP_SET(lastcp);
3747 		if (paren) {
3748 		    UV c = 0;
3749 		    while (n >= ln) {
3750 			if (c1 != -1000) {
3751 			    if (do_utf8)
3752 				c = utf8n_to_uvchr((U8*)PL_reginput,
3753 						   UTF8_MAXLEN, 0,
3754 						   ckWARN(WARN_UTF8) ?
3755 						   0 : UTF8_ALLOW_ANY);
3756 			    else
3757 				c = UCHARAT(PL_reginput);
3758 			}
3759 			/* If it could work, try it. */
3760 			if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
3761 			    {
3762 				TRYPAREN(paren, n, PL_reginput);
3763 				REGCP_UNWIND(lastcp);
3764 			    }
3765 			/* Couldn't or didn't -- back up. */
3766 			n--;
3767 			PL_reginput = locinput = HOPc(locinput, -1);
3768 		    }
3769 		}
3770 		else {
3771 		    UV c = 0;
3772 		    while (n >= ln) {
3773 			if (c1 != -1000) {
3774 			    if (do_utf8)
3775 				c = utf8n_to_uvchr((U8*)PL_reginput,
3776 						   UTF8_MAXLEN, 0,
3777 						   ckWARN(WARN_UTF8) ?
3778 						   0 : UTF8_ALLOW_ANY);
3779 			    else
3780 				c = UCHARAT(PL_reginput);
3781 			}
3782 			/* If it could work, try it. */
3783 			if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
3784 			    {
3785 				TRYPAREN(paren, n, PL_reginput);
3786 				REGCP_UNWIND(lastcp);
3787 			    }
3788 			/* Couldn't or didn't -- back up. */
3789 			n--;
3790 			PL_reginput = locinput = HOPc(locinput, -1);
3791 		    }
3792 		}
3793 	    }
3794 	    sayNO;
3795 	    break;
3796 	case END:
3797 	    if (PL_reg_call_cc) {
3798 		re_cc_state *cur_call_cc = PL_reg_call_cc;
3799 		CURCUR *cctmp = PL_regcc;
3800 		regexp *re = PL_reg_re;
3801 		CHECKPOINT cp, lastcp;
3802 
3803 		cp = regcppush(0);	/* Save *all* the positions. */
3804 		REGCP_SET(lastcp);
3805 		regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3806 						    the caller. */
3807 		PL_reginput = locinput;	/* Make position available to
3808 					   the callcc. */
3809 		cache_re(PL_reg_call_cc->re);
3810 		PL_regcc = PL_reg_call_cc->cc;
3811 		PL_reg_call_cc = PL_reg_call_cc->prev;
3812 		if (regmatch(cur_call_cc->node)) {
3813 		    PL_reg_call_cc = cur_call_cc;
3814 		    regcpblow(cp);
3815 		    sayYES;
3816 		}
3817 		REGCP_UNWIND(lastcp);
3818 		regcppop();
3819 		PL_reg_call_cc = cur_call_cc;
3820 		PL_regcc = cctmp;
3821 		PL_reg_re = re;
3822 		cache_re(re);
3823 
3824 		DEBUG_r(
3825 		    PerlIO_printf(Perl_debug_log,
3826 				  "%*s  continuation failed...\n",
3827 				  REPORT_CODE_OFF+PL_regindent*2, "")
3828 		    );
3829 		sayNO_SILENT;
3830 	    }
3831 	    if (locinput < PL_regtill) {
3832 		DEBUG_r(PerlIO_printf(Perl_debug_log,
3833 				      "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3834 				      PL_colors[4],
3835 				      (long)(locinput - PL_reg_starttry),
3836 				      (long)(PL_regtill - PL_reg_starttry),
3837 				      PL_colors[5]));
3838 		sayNO_FINAL;		/* Cannot match: too short. */
3839 	    }
3840 	    PL_reginput = locinput;	/* put where regtry can find it */
3841 	    sayYES_FINAL;		/* Success! */
3842 	case SUCCEED:
3843 	    PL_reginput = locinput;	/* put where regtry can find it */
3844 	    sayYES_LOUD;		/* Success! */
3845 	case SUSPEND:
3846 	    n = 1;
3847 	    PL_reginput = locinput;
3848 	    goto do_ifmatch;
3849 	case UNLESSM:
3850 	    n = 0;
3851 	    if (scan->flags) {
3852 		s = HOPBACKc(locinput, scan->flags);
3853 		if (!s)
3854 		    goto say_yes;
3855 		PL_reginput = s;
3856 	    }
3857 	    else
3858 		PL_reginput = locinput;
3859 	    PL_reg_flags ^= RF_false;
3860 	    goto do_ifmatch;
3861 	case IFMATCH:
3862 	    n = 1;
3863 	    if (scan->flags) {
3864 		s = HOPBACKc(locinput, scan->flags);
3865 		if (!s)
3866 		    goto say_no;
3867 		PL_reginput = s;
3868 	    }
3869 	    else
3870 		PL_reginput = locinput;
3871 
3872 	  do_ifmatch:
3873 	    inner = NEXTOPER(NEXTOPER(scan));
3874 	    if (regmatch(inner) != n) {
3875 		if (n == 0)
3876 		    PL_reg_flags ^= RF_false;
3877 	      say_no:
3878 		if (logical) {
3879 		    logical = 0;
3880 		    sw = 0;
3881 		    goto do_longjump;
3882 		}
3883 		else
3884 		    sayNO;
3885 	    }
3886 	    if (n == 0)
3887 		PL_reg_flags ^= RF_false;
3888 	  say_yes:
3889 	    if (logical) {
3890 		logical = 0;
3891 		sw = 1;
3892 	    }
3893 	    if (OP(scan) == SUSPEND) {
3894 		locinput = PL_reginput;
3895 		nextchr = UCHARAT(locinput);
3896 	    }
3897 	    /* FALL THROUGH. */
3898 	case LONGJMP:
3899 	  do_longjump:
3900 	    next = scan + ARG(scan);
3901 	    if (next == scan)
3902 		next = NULL;
3903 	    break;
3904 	default:
3905 	    PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3906 			  PTR2UV(scan), OP(scan));
3907 	    Perl_croak(aTHX_ "regexp memory corruption");
3908 	}
3909       reenter:
3910 	scan = next;
3911     }
3912 
3913     /*
3914     * We get here only if there's trouble -- normally "case END" is
3915     * the terminating point.
3916     */
3917     Perl_croak(aTHX_ "corrupted regexp pointers");
3918     /*NOTREACHED*/
3919     sayNO;
3920 
3921 yes_loud:
3922     DEBUG_r(
3923 	PerlIO_printf(Perl_debug_log,
3924 		      "%*s  %scould match...%s\n",
3925 		      REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3926 	);
3927     goto yes;
3928 yes_final:
3929     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3930 			  PL_colors[4],PL_colors[5]));
3931 yes:
3932 #ifdef DEBUGGING
3933     PL_regindent--;
3934 #endif
3935 
3936 #if 0					/* Breaks $^R */
3937     if (unwind)
3938 	regcpblow(firstcp);
3939 #endif
3940     return 1;
3941 
3942 no:
3943     DEBUG_r(
3944 	PerlIO_printf(Perl_debug_log,
3945 		      "%*s  %sfailed...%s\n",
3946 		      REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3947 	);
3948     goto do_no;
3949 no_final:
3950 do_no:
3951     if (unwind) {
3952 	re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3953 
3954 	switch (uw->type) {
3955 	case RE_UNWIND_BRANCH:
3956 	case RE_UNWIND_BRANCHJ:
3957 	{
3958 	    re_unwind_branch_t *uwb = &(uw->branch);
3959 	    I32 lastparen = uwb->lastparen;
3960 
3961 	    REGCP_UNWIND(uwb->lastcp);
3962 	    for (n = *PL_reglastparen; n > lastparen; n--)
3963 		PL_regendp[n] = -1;
3964 	    *PL_reglastparen = n;
3965 	    scan = next = uwb->next;
3966 	    if ( !scan ||
3967 		 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3968 			      ? BRANCH : BRANCHJ) ) {		/* Failure */
3969 		unwind = uwb->prev;
3970 #ifdef DEBUGGING
3971 		PL_regindent--;
3972 #endif
3973 		goto do_no;
3974 	    }
3975 	    /* Have more choice yet.  Reuse the same uwb.  */
3976 	    /*SUPPRESS 560*/
3977 	    if ((n = (uwb->type == RE_UNWIND_BRANCH
3978 		      ? NEXT_OFF(next) : ARG(next))))
3979 		next += n;
3980 	    else
3981 		next = NULL;	/* XXXX Needn't unwinding in this case... */
3982 	    uwb->next = next;
3983 	    next = NEXTOPER(scan);
3984 	    if (uwb->type == RE_UNWIND_BRANCHJ)
3985 		next = NEXTOPER(next);
3986 	    locinput = uwb->locinput;
3987 	    nextchr = uwb->nextchr;
3988 #ifdef DEBUGGING
3989 	    PL_regindent = uwb->regindent;
3990 #endif
3991 
3992 	    goto reenter;
3993 	}
3994 	/* NOT REACHED */
3995 	default:
3996 	    Perl_croak(aTHX_ "regexp unwind memory corruption");
3997 	}
3998 	/* NOT REACHED */
3999     }
4000 #ifdef DEBUGGING
4001     PL_regindent--;
4002 #endif
4003     return 0;
4004 }
4005 
4006 /*
4007  - regrepeat - repeatedly match something simple, report how many
4008  */
4009 /*
4010  * [This routine now assumes that it will only match on things of length 1.
4011  * That was true before, but now we assume scan - reginput is the count,
4012  * rather than incrementing count on every character.  [Er, except utf8.]]
4013  */
4014 STATIC I32
S_regrepeat(pTHX_ regnode * p,I32 max)4015 S_regrepeat(pTHX_ regnode *p, I32 max)
4016 {
4017     register char *scan;
4018     register I32 c;
4019     register char *loceol = PL_regeol;
4020     register I32 hardcount = 0;
4021     register bool do_utf8 = PL_reg_match_utf8;
4022 
4023     scan = PL_reginput;
4024     if (max == REG_INFTY)
4025 	max = I32_MAX;
4026     else if (max < loceol - scan)
4027       loceol = scan + max;
4028     switch (OP(p)) {
4029     case REG_ANY:
4030 	if (do_utf8) {
4031 	    loceol = PL_regeol;
4032 	    while (scan < loceol && hardcount < max && *scan != '\n') {
4033 		scan += UTF8SKIP(scan);
4034 		hardcount++;
4035 	    }
4036 	} else {
4037 	    while (scan < loceol && *scan != '\n')
4038 		scan++;
4039 	}
4040 	break;
4041     case SANY:
4042         if (do_utf8) {
4043 	    loceol = PL_regeol;
4044 	    while (scan < loceol && hardcount < max) {
4045 	        scan += UTF8SKIP(scan);
4046 		hardcount++;
4047 	    }
4048 	}
4049 	else
4050 	    scan = loceol;
4051 	break;
4052     case CANY:
4053 	scan = loceol;
4054 	break;
4055     case EXACT:		/* length of string is 1 */
4056 	c = (U8)*STRING(p);
4057 	while (scan < loceol && UCHARAT(scan) == c)
4058 	    scan++;
4059 	break;
4060     case EXACTF:	/* length of string is 1 */
4061 	c = (U8)*STRING(p);
4062 	while (scan < loceol &&
4063 	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4064 	    scan++;
4065 	break;
4066     case EXACTFL:	/* length of string is 1 */
4067 	PL_reg_flags |= RF_tainted;
4068 	c = (U8)*STRING(p);
4069 	while (scan < loceol &&
4070 	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4071 	    scan++;
4072 	break;
4073     case ANYOF:
4074 	if (do_utf8) {
4075 	    loceol = PL_regeol;
4076 	    while (hardcount < max && scan < loceol &&
4077 		   reginclass(p, (U8*)scan, 0, do_utf8)) {
4078 		scan += UTF8SKIP(scan);
4079 		hardcount++;
4080 	    }
4081 	} else {
4082 	    while (scan < loceol && REGINCLASS(p, (U8*)scan))
4083 		scan++;
4084 	}
4085 	break;
4086     case ALNUM:
4087 	if (do_utf8) {
4088 	    loceol = PL_regeol;
4089 	    LOAD_UTF8_CHARCLASS(alnum,"a");
4090 	    while (hardcount < max && scan < loceol &&
4091 		   swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4092 		scan += UTF8SKIP(scan);
4093 		hardcount++;
4094 	    }
4095 	} else {
4096 	    while (scan < loceol && isALNUM(*scan))
4097 		scan++;
4098 	}
4099 	break;
4100     case ALNUML:
4101 	PL_reg_flags |= RF_tainted;
4102 	if (do_utf8) {
4103 	    loceol = PL_regeol;
4104 	    while (hardcount < max && scan < loceol &&
4105 		   isALNUM_LC_utf8((U8*)scan)) {
4106 		scan += UTF8SKIP(scan);
4107 		hardcount++;
4108 	    }
4109 	} else {
4110 	    while (scan < loceol && isALNUM_LC(*scan))
4111 		scan++;
4112 	}
4113 	break;
4114     case NALNUM:
4115 	if (do_utf8) {
4116 	    loceol = PL_regeol;
4117 	    LOAD_UTF8_CHARCLASS(alnum,"a");
4118 	    while (hardcount < max && scan < loceol &&
4119 		   !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4120 		scan += UTF8SKIP(scan);
4121 		hardcount++;
4122 	    }
4123 	} else {
4124 	    while (scan < loceol && !isALNUM(*scan))
4125 		scan++;
4126 	}
4127 	break;
4128     case NALNUML:
4129 	PL_reg_flags |= RF_tainted;
4130 	if (do_utf8) {
4131 	    loceol = PL_regeol;
4132 	    while (hardcount < max && scan < loceol &&
4133 		   !isALNUM_LC_utf8((U8*)scan)) {
4134 		scan += UTF8SKIP(scan);
4135 		hardcount++;
4136 	    }
4137 	} else {
4138 	    while (scan < loceol && !isALNUM_LC(*scan))
4139 		scan++;
4140 	}
4141 	break;
4142     case SPACE:
4143 	if (do_utf8) {
4144 	    loceol = PL_regeol;
4145 	    LOAD_UTF8_CHARCLASS(space," ");
4146 	    while (hardcount < max && scan < loceol &&
4147 		   (*scan == ' ' ||
4148 		    swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4149 		scan += UTF8SKIP(scan);
4150 		hardcount++;
4151 	    }
4152 	} else {
4153 	    while (scan < loceol && isSPACE(*scan))
4154 		scan++;
4155 	}
4156 	break;
4157     case SPACEL:
4158 	PL_reg_flags |= RF_tainted;
4159 	if (do_utf8) {
4160 	    loceol = PL_regeol;
4161 	    while (hardcount < max && scan < loceol &&
4162 		   (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4163 		scan += UTF8SKIP(scan);
4164 		hardcount++;
4165 	    }
4166 	} else {
4167 	    while (scan < loceol && isSPACE_LC(*scan))
4168 		scan++;
4169 	}
4170 	break;
4171     case NSPACE:
4172 	if (do_utf8) {
4173 	    loceol = PL_regeol;
4174 	    LOAD_UTF8_CHARCLASS(space," ");
4175 	    while (hardcount < max && scan < loceol &&
4176 		   !(*scan == ' ' ||
4177 		     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4178 		scan += UTF8SKIP(scan);
4179 		hardcount++;
4180 	    }
4181 	} else {
4182 	    while (scan < loceol && !isSPACE(*scan))
4183 		scan++;
4184 	    break;
4185 	}
4186     case NSPACEL:
4187 	PL_reg_flags |= RF_tainted;
4188 	if (do_utf8) {
4189 	    loceol = PL_regeol;
4190 	    while (hardcount < max && scan < loceol &&
4191 		   !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4192 		scan += UTF8SKIP(scan);
4193 		hardcount++;
4194 	    }
4195 	} else {
4196 	    while (scan < loceol && !isSPACE_LC(*scan))
4197 		scan++;
4198 	}
4199 	break;
4200     case DIGIT:
4201 	if (do_utf8) {
4202 	    loceol = PL_regeol;
4203 	    LOAD_UTF8_CHARCLASS(digit,"0");
4204 	    while (hardcount < max && scan < loceol &&
4205 		   swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4206 		scan += UTF8SKIP(scan);
4207 		hardcount++;
4208 	    }
4209 	} else {
4210 	    while (scan < loceol && isDIGIT(*scan))
4211 		scan++;
4212 	}
4213 	break;
4214     case NDIGIT:
4215 	if (do_utf8) {
4216 	    loceol = PL_regeol;
4217 	    LOAD_UTF8_CHARCLASS(digit,"0");
4218 	    while (hardcount < max && scan < loceol &&
4219 		   !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4220 		scan += UTF8SKIP(scan);
4221 		hardcount++;
4222 	    }
4223 	} else {
4224 	    while (scan < loceol && !isDIGIT(*scan))
4225 		scan++;
4226 	}
4227 	break;
4228     default:		/* Called on something of 0 width. */
4229 	break;		/* So match right here or not at all. */
4230     }
4231 
4232     if (hardcount)
4233 	c = hardcount;
4234     else
4235 	c = scan - PL_reginput;
4236     PL_reginput = scan;
4237 
4238     DEBUG_r(
4239 	{
4240 		SV *prop = sv_newmortal();
4241 
4242 		regprop(prop, p);
4243 		PerlIO_printf(Perl_debug_log,
4244 			      "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
4245 			      REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
4246 	});
4247 
4248     return(c);
4249 }
4250 
4251 /*
4252  - regrepeat_hard - repeatedly match something, report total lenth and length
4253  *
4254  * The repeater is supposed to have constant length.
4255  */
4256 
4257 STATIC I32
S_regrepeat_hard(pTHX_ regnode * p,I32 max,I32 * lp)4258 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
4259 {
4260     register char *scan = Nullch;
4261     register char *start;
4262     register char *loceol = PL_regeol;
4263     I32 l = 0;
4264     I32 count = 0, res = 1;
4265 
4266     if (!max)
4267 	return 0;
4268 
4269     start = PL_reginput;
4270     if (PL_reg_match_utf8) {
4271 	while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4272 	    if (!count++) {
4273 		l = 0;
4274 		while (start < PL_reginput) {
4275 		    l++;
4276 		    start += UTF8SKIP(start);
4277 		}
4278 		*lp = l;
4279 		if (l == 0)
4280 		    return max;
4281 	    }
4282 	    if (count == max)
4283 		return count;
4284 	}
4285     }
4286     else {
4287 	while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4288 	    if (!count++) {
4289 		*lp = l = PL_reginput - start;
4290 		if (max != REG_INFTY && l*max < loceol - scan)
4291 		    loceol = scan + l*max;
4292 		if (l == 0)
4293 		    return max;
4294 	    }
4295 	}
4296     }
4297     if (!res)
4298 	PL_reginput = scan;
4299 
4300     return count;
4301 }
4302 
4303 /*
4304 - regclass_swash - prepare the utf8 swash
4305 */
4306 
4307 SV *
Perl_regclass_swash(pTHX_ register regnode * node,bool doinit,SV ** listsvp,SV ** altsvp)4308 Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
4309 {
4310     SV *sw  = NULL;
4311     SV *si  = NULL;
4312     SV *alt = NULL;
4313 
4314     if (PL_regdata && PL_regdata->count) {
4315 	U32 n = ARG(node);
4316 
4317 	if (PL_regdata->what[n] == 's') {
4318 	    SV *rv = (SV*)PL_regdata->data[n];
4319 	    AV *av = (AV*)SvRV((SV*)rv);
4320 	    SV **ary = AvARRAY(av);
4321 	    SV **a, **b;
4322 
4323 	    /* See the end of regcomp.c:S_reglass() for
4324 	     * documentation of these array elements. */
4325 
4326 	    si = *ary;
4327 	    a  = SvTYPE(ary[1]) == SVt_RV   ? &ary[1] : 0;
4328 	    b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4329 
4330 	    if (a)
4331 		sw = *a;
4332 	    else if (si && doinit) {
4333 		sw = swash_init("utf8", "", si, 1, 0);
4334 		(void)av_store(av, 1, sw);
4335 	    }
4336 	    if (b)
4337 	        alt = *b;
4338 	}
4339     }
4340 
4341     if (listsvp)
4342 	*listsvp = si;
4343     if (altsvp)
4344 	*altsvp  = alt;
4345 
4346     return sw;
4347 }
4348 
4349 /*
4350  - reginclass - determine if a character falls into a character class
4351 
4352   The n is the ANYOF regnode, the p is the target string, lenp
4353   is pointer to the maximum length of how far to go in the p
4354   (if the lenp is zero, UTF8SKIP(p) is used),
4355   do_utf8 tells whether the target string is in UTF-8.
4356 
4357  */
4358 
4359 STATIC bool
S_reginclass(pTHX_ register regnode * n,register U8 * p,STRLEN * lenp,register bool do_utf8)4360 S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
4361 {
4362     char flags = ANYOF_FLAGS(n);
4363     bool match = FALSE;
4364     UV c = *p;
4365     STRLEN len = 0;
4366     STRLEN plen;
4367 
4368     if (do_utf8 && !UTF8_IS_INVARIANT(c))
4369 	 c = utf8n_to_uvchr(p, UTF8_MAXLEN, &len,
4370 			    ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
4371 
4372     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
4373     if (do_utf8 || (flags & ANYOF_UNICODE)) {
4374         if (lenp)
4375 	    *lenp = 0;
4376 	if (do_utf8 && !ANYOF_RUNTIME(n)) {
4377 	    if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4378 		match = TRUE;
4379 	}
4380 	if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
4381 	    match = TRUE;
4382 	if (!match) {
4383 	    AV *av;
4384 	    SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
4385 
4386 	    if (sw) {
4387 		if (swash_fetch(sw, p, do_utf8))
4388 		    match = TRUE;
4389 		else if (flags & ANYOF_FOLD) {
4390 		    if (!match && lenp && av) {
4391 		        I32 i;
4392 
4393 			for (i = 0; i <= av_len(av); i++) {
4394 			    SV* sv = *av_fetch(av, i, FALSE);
4395 			    STRLEN len;
4396 			    char *s = SvPV(sv, len);
4397 
4398 			    if (len <= plen && memEQ(s, (char*)p, len)) {
4399 			        *lenp = len;
4400 				match = TRUE;
4401 				break;
4402 			    }
4403 			}
4404 		    }
4405 		    if (!match) {
4406 		        U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
4407 			STRLEN tmplen;
4408 
4409 		        to_utf8_fold(p, tmpbuf, &tmplen);
4410 			if (swash_fetch(sw, tmpbuf, do_utf8))
4411 			    match = TRUE;
4412 		    }
4413 		}
4414 	    }
4415 	}
4416 	if (match && lenp && *lenp == 0)
4417 	    *lenp = UNISKIP(NATIVE_TO_UNI(c));
4418     }
4419     if (!match && c < 256) {
4420 	if (ANYOF_BITMAP_TEST(n, c))
4421 	    match = TRUE;
4422 	else if (flags & ANYOF_FOLD) {
4423 	    U8 f;
4424 
4425 	    if (flags & ANYOF_LOCALE) {
4426 		PL_reg_flags |= RF_tainted;
4427 		f = PL_fold_locale[c];
4428 	    }
4429 	    else
4430 		f = PL_fold[c];
4431 	    if (f != c && ANYOF_BITMAP_TEST(n, f))
4432 		match = TRUE;
4433 	}
4434 
4435 	if (!match && (flags & ANYOF_CLASS)) {
4436 	    PL_reg_flags |= RF_tainted;
4437 	    if (
4438 		(ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
4439 		(ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
4440 		(ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
4441 		(ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
4442 		(ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
4443 		(ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
4444 		(ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
4445 		(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4446 		(ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
4447 		(ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
4448 		(ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
4449 		(ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
4450 		(ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
4451 		(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
4452 		(ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
4453 		(ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
4454 		(ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
4455 		(ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
4456 		(ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
4457 		(ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
4458 		(ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
4459 		(ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
4460 		(ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
4461 		(ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
4462 		(ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
4463 		(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
4464 		(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
4465 		(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
4466 		(ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
4467 		(ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
4468 		) /* How's that for a conditional? */
4469 	    {
4470 		match = TRUE;
4471 	    }
4472 	}
4473     }
4474 
4475     return (flags & ANYOF_INVERT) ? !match : match;
4476 }
4477 
4478 STATIC U8 *
S_reghop(pTHX_ U8 * s,I32 off)4479 S_reghop(pTHX_ U8 *s, I32 off)
4480 {
4481     return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4482 }
4483 
4484 STATIC U8 *
S_reghop3(pTHX_ U8 * s,I32 off,U8 * lim)4485 S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
4486 {
4487     if (off >= 0) {
4488 	while (off-- && s < lim) {
4489 	    /* XXX could check well-formedness here */
4490 	    s += UTF8SKIP(s);
4491 	}
4492     }
4493     else {
4494 	while (off++) {
4495 	    if (s > lim) {
4496 		s--;
4497 		if (UTF8_IS_CONTINUED(*s)) {
4498 		    while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4499 			s--;
4500 		}
4501 		/* XXX could check well-formedness here */
4502 	    }
4503 	}
4504     }
4505     return s;
4506 }
4507 
4508 STATIC U8 *
S_reghopmaybe(pTHX_ U8 * s,I32 off)4509 S_reghopmaybe(pTHX_ U8 *s, I32 off)
4510 {
4511     return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4512 }
4513 
4514 STATIC U8 *
S_reghopmaybe3(pTHX_ U8 * s,I32 off,U8 * lim)4515 S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
4516 {
4517     if (off >= 0) {
4518 	while (off-- && s < lim) {
4519 	    /* XXX could check well-formedness here */
4520 	    s += UTF8SKIP(s);
4521 	}
4522 	if (off >= 0)
4523 	    return 0;
4524     }
4525     else {
4526 	while (off++) {
4527 	    if (s > lim) {
4528 		s--;
4529 		if (UTF8_IS_CONTINUED(*s)) {
4530 		    while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4531 			s--;
4532 		}
4533 		/* XXX could check well-formedness here */
4534 	    }
4535 	    else
4536 		break;
4537 	}
4538 	if (off <= 0)
4539 	    return 0;
4540     }
4541     return s;
4542 }
4543 
4544 static void
restore_pos(pTHX_ void * arg)4545 restore_pos(pTHX_ void *arg)
4546 {
4547     if (PL_reg_eval_set) {
4548 	if (PL_reg_oldsaved) {
4549 	    PL_reg_re->subbeg = PL_reg_oldsaved;
4550 	    PL_reg_re->sublen = PL_reg_oldsavedlen;
4551 	    RX_MATCH_COPIED_on(PL_reg_re);
4552 	}
4553 	PL_reg_magic->mg_len = PL_reg_oldpos;
4554 	PL_reg_eval_set = 0;
4555 	PL_curpm = PL_reg_oldcurpm;
4556     }
4557 }
4558 
4559 STATIC void
S_to_utf8_substr(pTHX_ register regexp * prog)4560 S_to_utf8_substr(pTHX_ register regexp *prog)
4561 {
4562     SV* sv;
4563     if (prog->float_substr && !prog->float_utf8) {
4564 	prog->float_utf8 = sv = NEWSV(117, 0);
4565 	SvSetSV(sv, prog->float_substr);
4566 	sv_utf8_upgrade(sv);
4567 	if (SvTAIL(prog->float_substr))
4568 	    SvTAIL_on(sv);
4569 	if (prog->float_substr == prog->check_substr)
4570 	    prog->check_utf8 = sv;
4571     }
4572     if (prog->anchored_substr && !prog->anchored_utf8) {
4573 	prog->anchored_utf8 = sv = NEWSV(118, 0);
4574 	SvSetSV(sv, prog->anchored_substr);
4575 	sv_utf8_upgrade(sv);
4576 	if (SvTAIL(prog->anchored_substr))
4577 	    SvTAIL_on(sv);
4578 	if (prog->anchored_substr == prog->check_substr)
4579 	    prog->check_utf8 = sv;
4580     }
4581 }
4582 
4583 STATIC void
S_to_byte_substr(pTHX_ register regexp * prog)4584 S_to_byte_substr(pTHX_ register regexp *prog)
4585 {
4586     SV* sv;
4587     if (prog->float_utf8 && !prog->float_substr) {
4588 	prog->float_substr = sv = NEWSV(117, 0);
4589 	SvSetSV(sv, prog->float_utf8);
4590 	if (sv_utf8_downgrade(sv, TRUE)) {
4591 	    if (SvTAIL(prog->float_utf8))
4592 		SvTAIL_on(sv);
4593 	} else {
4594 	    SvREFCNT_dec(sv);
4595 	    prog->float_substr = sv = &PL_sv_undef;
4596 	}
4597 	if (prog->float_utf8 == prog->check_utf8)
4598 	    prog->check_substr = sv;
4599     }
4600     if (prog->anchored_utf8 && !prog->anchored_substr) {
4601 	prog->anchored_substr = sv = NEWSV(118, 0);
4602 	SvSetSV(sv, prog->anchored_utf8);
4603 	if (sv_utf8_downgrade(sv, TRUE)) {
4604 	    if (SvTAIL(prog->anchored_utf8))
4605 		SvTAIL_on(sv);
4606 	} else {
4607 	    SvREFCNT_dec(sv);
4608 	    prog->anchored_substr = sv = &PL_sv_undef;
4609 	}
4610 	if (prog->anchored_utf8 == prog->check_utf8)
4611 	    prog->check_substr = sv;
4612     }
4613 }
4614