xref: /openbsd-src/gnu/usr.bin/perl/regexec.c (revision daf88648c0e349d5c02e1504293082072c981640)
1 /*    regexec.c
2  */
3 
4 /*
5  * "One Ring to rule them all, One Ring to find them..."
6  */
7 
8 /* This file contains functions for executing a regular expression.  See
9  * also regcomp.c which funnily enough, contains functions for compiling
10  * a regular expression.
11  *
12  * This file is also copied at build time to ext/re/re_exec.c, where
13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14  * This causes the main functions to be compiled under new names and with
15  * debugging support added, which makes "use re 'debug'" work.
16 
17  */
18 
19 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
20  * confused with the original package (see point 3 below).  Thanks, Henry!
21  */
22 
23 /* Additional note: this code is very heavily munged from Henry's version
24  * in places.  In some spots I've traded clarity for efficiency, so don't
25  * blame Henry for some of the lack of readability.
26  */
27 
28 /* The names of the functions have been changed from regcomp and
29  * regexec to  pregcomp and pregexec in order to avoid conflicts
30  * with the POSIX routines of the same names.
31 */
32 
33 #ifdef PERL_EXT_RE_BUILD
34 /* need to replace pregcomp et al, so enable that */
35 #  ifndef PERL_IN_XSUB_RE
36 #    define PERL_IN_XSUB_RE
37 #  endif
38 /* need access to debugger hooks */
39 #  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
40 #    define DEBUGGING
41 #  endif
42 #endif
43 
44 #ifdef PERL_IN_XSUB_RE
45 /* We *really* need to overwrite these symbols: */
46 #  define Perl_regexec_flags my_regexec
47 #  define Perl_regdump my_regdump
48 #  define Perl_regprop my_regprop
49 #  define Perl_re_intuit_start my_re_intuit_start
50 /* *These* symbols are masked to allow static link. */
51 #  define Perl_pregexec my_pregexec
52 #  define Perl_reginitcolors my_reginitcolors
53 #  define Perl_regclass_swash my_regclass_swash
54 
55 #  define PERL_NO_GET_CONTEXT
56 #endif
57 
58 /*
59  * pregcomp and pregexec -- regsub and regerror are not used in perl
60  *
61  *	Copyright (c) 1986 by University of Toronto.
62  *	Written by Henry Spencer.  Not derived from licensed software.
63  *
64  *	Permission is granted to anyone to use this software for any
65  *	purpose on any computer system, and to redistribute it freely,
66  *	subject to the following restrictions:
67  *
68  *	1. The author is not responsible for the consequences of use of
69  *		this software, no matter how awful, even if they arise
70  *		from defects in it.
71  *
72  *	2. The origin of this software must not be misrepresented, either
73  *		by explicit claim or by omission.
74  *
75  *	3. Altered versions must be plainly marked as such, and must not
76  *		be misrepresented as being the original software.
77  *
78  ****    Alterations to Henry's code are...
79  ****
80  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
81  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
82  ****
83  ****    You may distribute under the terms of either the GNU General Public
84  ****    License or the Artistic License, as specified in the README file.
85  *
86  * Beware that some of this code is subtly aware of the way operator
87  * precedence is structured in regular expressions.  Serious changes in
88  * regular-expression syntax might require a total rethink.
89  */
90 #include "EXTERN.h"
91 #define PERL_IN_REGEXEC_C
92 #include "perl.h"
93 
94 #include "regcomp.h"
95 
96 #define RF_tainted	1		/* tainted information used? */
97 #define RF_warned	2		/* warned about big count? */
98 #define RF_evaled	4		/* Did an EVAL with setting? */
99 #define RF_utf8		8		/* String contains multibyte chars? */
100 
101 #define UTF ((PL_reg_flags & RF_utf8) != 0)
102 
103 #define RS_init		1		/* eval environment created */
104 #define RS_set		2		/* replsv value is set */
105 
106 #ifndef STATIC
107 #define	STATIC	static
108 #endif
109 
110 #define REGINCLASS(p,c)  (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
111 
112 /*
113  * Forwards.
114  */
115 
116 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
117 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
118 
119 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
120 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
121 #define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
122 #define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
123 #define HOPc(pos,off) ((char*)HOP(pos,off))
124 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
125 
126 #define HOPBACK(pos, off) (		\
127     (PL_reg_match_utf8)			\
128 	? reghopmaybe((U8*)pos, -off)	\
129     : (pos - off >= PL_bostr)		\
130 	? (U8*)(pos - off)		\
131     : (U8*)NULL				\
132 )
133 #define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
134 
135 #define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
136 #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
137 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
138 #define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
139 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
140 #define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
141 
142 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
143     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((U8*)str); assert(ok); LEAVE; } } STMT_END
144 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
145 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
146 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
147 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
148 
149 /* for use after a quantifier and before an EXACT-like node -- japhy */
150 #define JUMPABLE(rn) ( \
151     OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
152     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
153     OP(rn) == PLUS || OP(rn) == MINMOD || \
154     (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
155 )
156 
157 #define HAS_TEXT(rn) ( \
158     PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
159 )
160 
161 /*
162   Search for mandatory following text node; for lookahead, the text must
163   follow but for lookbehind (rn->flags != 0) we skip to the next step.
164 */
165 #define FIND_NEXT_IMPT(rn) STMT_START { \
166     while (JUMPABLE(rn)) \
167 	if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
168 	    rn = NEXTOPER(NEXTOPER(rn)); \
169 	else if (OP(rn) == PLUS) \
170 	    rn = NEXTOPER(rn); \
171 	else if (OP(rn) == IFMATCH) \
172 	    rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
173 	else rn += NEXT_OFF(rn); \
174 } STMT_END
175 
176 static void restore_pos(pTHX_ void *arg);
177 
178 STATIC CHECKPOINT
179 S_regcppush(pTHX_ I32 parenfloor)
180 {
181     const int retval = PL_savestack_ix;
182 #define REGCP_PAREN_ELEMS 4
183     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
184     int p;
185 
186     if (paren_elems_to_push < 0)
187 	Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
188 
189 #define REGCP_OTHER_ELEMS 6
190     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
191     for (p = PL_regsize; p > parenfloor; p--) {
192 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
193 	SSPUSHINT(PL_regendp[p]);
194 	SSPUSHINT(PL_regstartp[p]);
195 	SSPUSHPTR(PL_reg_start_tmp[p]);
196 	SSPUSHINT(p);
197     }
198 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
199     SSPUSHINT(PL_regsize);
200     SSPUSHINT(*PL_reglastparen);
201     SSPUSHINT(*PL_reglastcloseparen);
202     SSPUSHPTR(PL_reginput);
203 #define REGCP_FRAME_ELEMS 2
204 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
205  * are needed for the regexp context stack bookkeeping. */
206     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
207     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
208 
209     return retval;
210 }
211 
212 /* These are needed since we do not localize EVAL nodes: */
213 #  define REGCP_SET(cp)  DEBUG_r(PerlIO_printf(Perl_debug_log,		\
214 			     "  Setting an EVAL scope, savestack=%"IVdf"\n",	\
215 			     (IV)PL_savestack_ix)); cp = PL_savestack_ix
216 
217 #  define REGCP_UNWIND(cp)  DEBUG_r(cp != PL_savestack_ix ?		\
218 				PerlIO_printf(Perl_debug_log,		\
219 				"  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
220 				(IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
221 
222 STATIC char *
223 S_regcppop(pTHX)
224 {
225     I32 i;
226     U32 paren = 0;
227     char *input;
228 
229     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
230     i = SSPOPINT;
231     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
232     i = SSPOPINT; /* Parentheses elements to pop. */
233     input = (char *) SSPOPPTR;
234     *PL_reglastcloseparen = SSPOPINT;
235     *PL_reglastparen = SSPOPINT;
236     PL_regsize = SSPOPINT;
237 
238     /* Now restore the parentheses context. */
239     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
240 	 i > 0; i -= REGCP_PAREN_ELEMS) {
241 	I32 tmps;
242 	paren = (U32)SSPOPINT;
243 	PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
244 	PL_regstartp[paren] = SSPOPINT;
245 	tmps = SSPOPINT;
246 	if (paren <= *PL_reglastparen)
247 	    PL_regendp[paren] = tmps;
248 	DEBUG_r(
249 	    PerlIO_printf(Perl_debug_log,
250 			  "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
251 			  (UV)paren, (IV)PL_regstartp[paren],
252 			  (IV)(PL_reg_start_tmp[paren] - PL_bostr),
253 			  (IV)PL_regendp[paren],
254 			  (paren > *PL_reglastparen ? "(no)" : ""));
255 	);
256     }
257     DEBUG_r(
258 	if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
259 	    PerlIO_printf(Perl_debug_log,
260 			  "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
261 			  (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
262 	}
263     );
264 #if 1
265     /* It would seem that the similar code in regtry()
266      * already takes care of this, and in fact it is in
267      * a better location to since this code can #if 0-ed out
268      * but the code in regtry() is needed or otherwise tests
269      * requiring null fields (pat.t#187 and split.t#{13,14}
270      * (as of patchlevel 7877)  will fail.  Then again,
271      * this code seems to be necessary or otherwise
272      * building DynaLoader will fail:
273      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
274      * --jhi */
275     for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
276 	if ((I32)paren > PL_regsize)
277 	    PL_regstartp[paren] = -1;
278 	PL_regendp[paren] = -1;
279     }
280 #endif
281     return input;
282 }
283 
284 STATIC char *
285 S_regcp_set_to(pTHX_ I32 ss)
286 {
287     const I32 tmp = PL_savestack_ix;
288 
289     PL_savestack_ix = ss;
290     regcppop();
291     PL_savestack_ix = tmp;
292     return Nullch;
293 }
294 
295 typedef struct re_cc_state
296 {
297     I32 ss;
298     regnode *node;
299     struct re_cc_state *prev;
300     CURCUR *cc;
301     regexp *re;
302 } re_cc_state;
303 
304 #define regcpblow(cp) LEAVE_SCOPE(cp)	/* Ignores regcppush()ed data. */
305 
306 #define TRYPAREN(paren, n, input) {				\
307     if (paren) {						\
308 	if (n) {						\
309 	    PL_regstartp[paren] = HOPc(input, -1) - PL_bostr;	\
310 	    PL_regendp[paren] = input - PL_bostr;		\
311 	}							\
312 	else							\
313 	    PL_regendp[paren] = -1;				\
314     }								\
315     if (regmatch(next))						\
316 	sayYES;							\
317     if (paren && n)						\
318 	PL_regendp[paren] = -1;					\
319 }
320 
321 
322 /*
323  * pregexec and friends
324  */
325 
326 /*
327  - pregexec - match a regexp against a string
328  */
329 I32
330 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
331 	 char *strbeg, I32 minend, SV *screamer, U32 nosave)
332 /* strend: pointer to null at end of string */
333 /* strbeg: real beginning of string */
334 /* minend: end of match must be >=minend after stringarg. */
335 /* nosave: For optimizations. */
336 {
337     return
338 	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
339 		      nosave ? 0 : REXEC_COPY_STR);
340 }
341 
342 STATIC void
343 S_cache_re(pTHX_ regexp *prog)
344 {
345     PL_regprecomp = prog->precomp;		/* Needed for FAIL. */
346 #ifdef DEBUGGING
347     PL_regprogram = prog->program;
348 #endif
349     PL_regnpar = prog->nparens;
350     PL_regdata = prog->data;
351     PL_reg_re = prog;
352 }
353 
354 /*
355  * Need to implement the following flags for reg_anch:
356  *
357  * USE_INTUIT_NOML		- Useful to call re_intuit_start() first
358  * USE_INTUIT_ML
359  * INTUIT_AUTORITATIVE_NOML	- Can trust a positive answer
360  * INTUIT_AUTORITATIVE_ML
361  * INTUIT_ONCE_NOML		- Intuit can match in one location only.
362  * INTUIT_ONCE_ML
363  *
364  * Another flag for this function: SECOND_TIME (so that float substrs
365  * with giant delta may be not rechecked).
366  */
367 
368 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
369 
370 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
371    Otherwise, only SvCUR(sv) is used to get strbeg. */
372 
373 /* XXXX We assume that strpos is strbeg unless sv. */
374 
375 /* XXXX Some places assume that there is a fixed substring.
376 	An update may be needed if optimizer marks as "INTUITable"
377 	RExen without fixed substrings.  Similarly, it is assumed that
378 	lengths of all the strings are no more than minlen, thus they
379 	cannot come from lookahead.
380 	(Or minlen should take into account lookahead.) */
381 
382 /* A failure to find a constant substring means that there is no need to make
383    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
384    finding a substring too deep into the string means that less calls to
385    regtry() should be needed.
386 
387    REx compiler's optimizer found 4 possible hints:
388 	a) Anchored substring;
389 	b) Fixed substring;
390 	c) Whether we are anchored (beginning-of-line or \G);
391 	d) First node (of those at offset 0) which may distingush positions;
392    We use a)b)d) and multiline-part of c), and try to find a position in the
393    string which does not contradict any of them.
394  */
395 
396 /* Most of decisions we do here should have been done at compile time.
397    The nodes of the REx which we used for the search should have been
398    deleted from the finite automaton. */
399 
400 char *
401 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
402 		     char *strend, U32 flags, re_scream_pos_data *data)
403 {
404     register I32 start_shift = 0;
405     /* Should be nonnegative! */
406     register I32 end_shift   = 0;
407     register char *s;
408     register SV *check;
409     char *strbeg;
410     char *t;
411     const int do_utf8 = sv ? SvUTF8(sv) : 0;	/* if no sv we have to assume bytes */
412     I32 ml_anch;
413     register char *other_last = Nullch;	/* other substr checked before this */
414     char *check_at = Nullch;		/* check substr found at this pos */
415 #ifdef DEBUGGING
416     const char * const i_strpos = strpos;
417     SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
418 #endif
419     RX_MATCH_UTF8_set(prog,do_utf8);
420 
421     if (prog->reganch & ROPT_UTF8) {
422 	DEBUG_r(PerlIO_printf(Perl_debug_log,
423 			      "UTF-8 regex...\n"));
424 	PL_reg_flags |= RF_utf8;
425     }
426 
427     DEBUG_r({
428 	 const char *s   = PL_reg_match_utf8 ?
429 	                 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
430 	                 strpos;
431 	 const int   len = PL_reg_match_utf8 ?
432 	                 strlen(s) : strend - strpos;
433 	 if (!PL_colorset)
434 	      reginitcolors();
435 	 if (PL_reg_match_utf8)
436 	     DEBUG_r(PerlIO_printf(Perl_debug_log,
437 				   "UTF-8 target...\n"));
438 	 PerlIO_printf(Perl_debug_log,
439 		       "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
440 		       PL_colors[4],PL_colors[5],PL_colors[0],
441 		       prog->precomp,
442 		       PL_colors[1],
443 		       (strlen(prog->precomp) > 60 ? "..." : ""),
444 		       PL_colors[0],
445 		       (int)(len > 60 ? 60 : len),
446 		       s, PL_colors[1],
447 		       (len > 60 ? "..." : "")
448 	      );
449     });
450 
451     /* CHR_DIST() would be more correct here but it makes things slow. */
452     if (prog->minlen > strend - strpos) {
453 	DEBUG_r(PerlIO_printf(Perl_debug_log,
454 			      "String too short... [re_intuit_start]\n"));
455 	goto fail;
456     }
457     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
458     PL_regeol = strend;
459     if (do_utf8) {
460 	if (!prog->check_utf8 && prog->check_substr)
461 	    to_utf8_substr(prog);
462 	check = prog->check_utf8;
463     } else {
464 	if (!prog->check_substr && prog->check_utf8)
465 	    to_byte_substr(prog);
466 	check = prog->check_substr;
467     }
468    if (check == &PL_sv_undef) {
469 	DEBUG_r(PerlIO_printf(Perl_debug_log,
470 		"Non-utf string cannot match utf check string\n"));
471 	goto fail;
472     }
473     if (prog->reganch & ROPT_ANCH) {	/* Match at beg-of-str or after \n */
474 	ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
475 		     || ( (prog->reganch & ROPT_ANCH_BOL)
476 			  && !PL_multiline ) );	/* Check after \n? */
477 
478 	if (!ml_anch) {
479 	  if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
480 				  | ROPT_IMPLICIT)) /* not a real BOL */
481 	       /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
482 	       && sv && !SvROK(sv)
483 	       && (strpos != strbeg)) {
484 	      DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
485 	      goto fail;
486 	  }
487 	  if (prog->check_offset_min == prog->check_offset_max &&
488 	      !(prog->reganch & ROPT_CANY_SEEN)) {
489 	    /* Substring at constant offset from beg-of-str... */
490 	    I32 slen;
491 
492 	    s = HOP3c(strpos, prog->check_offset_min, strend);
493 	    if (SvTAIL(check)) {
494 		slen = SvCUR(check);	/* >= 1 */
495 
496 		if ( strend - s > slen || strend - s < slen - 1
497 		     || (strend - s == slen && strend[-1] != '\n')) {
498 		    DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
499 		    goto fail_finish;
500 		}
501 		/* Now should match s[0..slen-2] */
502 		slen--;
503 		if (slen && (*SvPVX_const(check) != *s
504 			     || (slen > 1
505 				 && memNE(SvPVX_const(check), s, slen)))) {
506 		  report_neq:
507 		    DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
508 		    goto fail_finish;
509 		}
510 	    }
511 	    else if (*SvPVX_const(check) != *s
512 		     || ((slen = SvCUR(check)) > 1
513 			 && memNE(SvPVX_const(check), s, slen)))
514 		goto report_neq;
515 	    check_at = s;
516 	    goto success_at_start;
517 	  }
518 	}
519 	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
520 	s = strpos;
521 	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
522 	end_shift = prog->minlen - start_shift -
523 	    CHR_SVLEN(check) + (SvTAIL(check) != 0);
524 	if (!ml_anch) {
525 	    const I32 end = prog->check_offset_max + CHR_SVLEN(check)
526 					 - (SvTAIL(check) != 0);
527 	    const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
528 
529 	    if (end_shift < eshift)
530 		end_shift = eshift;
531 	}
532     }
533     else {				/* Can match at random position */
534 	ml_anch = 0;
535 	s = strpos;
536 	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
537 	/* Should be nonnegative! */
538 	end_shift = prog->minlen - start_shift -
539 	    CHR_SVLEN(check) + (SvTAIL(check) != 0);
540     }
541 
542 #ifdef DEBUGGING	/* 7/99: reports of failure (with the older version) */
543     if (end_shift < 0)
544 	Perl_croak(aTHX_ "panic: end_shift");
545 #endif
546 
547   restart:
548     /* Find a possible match in the region s..strend by looking for
549        the "check" substring in the region corrected by start/end_shift. */
550     if (flags & REXEC_SCREAM) {
551 	I32 p = -1;			/* Internal iterator of scream. */
552 	I32 * const pp = data ? data->scream_pos : &p;
553 
554 	if (PL_screamfirst[BmRARE(check)] >= 0
555 	    || ( BmRARE(check) == '\n'
556 		 && (BmPREVIOUS(check) == SvCUR(check) - 1)
557 		 && SvTAIL(check) ))
558 	    s = screaminstr(sv, check,
559 			    start_shift + (s - strbeg), end_shift, pp, 0);
560 	else
561 	    goto fail_finish;
562 	/* we may be pointing at the wrong string */
563 	if (s && RX_MATCH_COPIED(prog))
564 	    s = strbeg + (s - SvPVX_const(sv));
565 	if (data)
566 	    *data->scream_olds = s;
567     }
568     else if (prog->reganch & ROPT_CANY_SEEN)
569 	s = fbm_instr((U8*)(s + start_shift),
570 		      (U8*)(strend - end_shift),
571 		      check, PL_multiline ? FBMrf_MULTILINE : 0);
572     else
573 	s = fbm_instr(HOP3(s, start_shift, strend),
574 		      HOP3(strend, -end_shift, strbeg),
575 		      check, PL_multiline ? FBMrf_MULTILINE : 0);
576 
577     /* Update the count-of-usability, remove useless subpatterns,
578 	unshift s.  */
579 
580 	/* FIXME - DEBUG_EXECUTE_r if that is merged to maint.  */
581     DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
582 			  (s ? "Found" : "Did not find"),
583 			  (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
584 			  PL_colors[0],
585 			  (int)(SvCUR(check) - (SvTAIL(check)!=0)),
586 			  SvPVX_const(check),
587 			  PL_colors[1], (SvTAIL(check) ? "$" : ""),
588 			  (s ? " at offset " : "...\n") ) );
589 
590     if (!s)
591 	goto fail_finish;
592 
593     check_at = s;
594 
595     /* Finish the diagnostic message */
596     DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
597 
598     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
599        Start with the other substr.
600        XXXX no SCREAM optimization yet - and a very coarse implementation
601        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
602 		*always* match.  Probably should be marked during compile...
603        Probably it is right to do no SCREAM here...
604      */
605 
606     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
607 	/* Take into account the "other" substring. */
608 	/* XXXX May be hopelessly wrong for UTF... */
609 	if (!other_last)
610 	    other_last = strpos;
611 	if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
612 	  do_other_anchored:
613 	    {
614 		char * const last = HOP3c(s, -start_shift, strbeg);
615 		char *last1, *last2;
616 		char *s1 = s;
617 		SV* must;
618 
619 		t = s - prog->check_offset_max;
620 		if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
621 		    && (!do_utf8
622 			|| ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
623 			    && t > strpos)))
624 		    /* EMPTY */;
625 		else
626 		    t = strpos;
627 		t = HOP3c(t, prog->anchored_offset, strend);
628 		if (t < other_last)	/* These positions already checked */
629 		    t = other_last;
630 		last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
631 		if (last < last1)
632 		    last1 = last;
633  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
634 		/* On end-of-str: see comment below. */
635 		must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
636 		if (must == &PL_sv_undef) {
637 		    s = (char*)NULL;
638 		    DEBUG_r(must = prog->anchored_utf8);	/* for debug */
639 		}
640 		else
641 		    s = fbm_instr(
642 			(unsigned char*)t,
643 			HOP3(HOP3(last1, prog->anchored_offset, strend)
644 				+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
645 			must,
646 			PL_multiline ? FBMrf_MULTILINE : 0
647 		    );
648 		DEBUG_r(PerlIO_printf(Perl_debug_log,
649 			"%s anchored substr \"%s%.*s%s\"%s",
650 			(s ? "Found" : "Contradicts"),
651 			PL_colors[0],
652 			  (int)(SvCUR(must)
653 			  - (SvTAIL(must)!=0)),
654 			  SvPVX_const(must),
655 			  PL_colors[1], (SvTAIL(must) ? "$" : "")));
656 		if (!s) {
657 		    if (last1 >= last2) {
658 			DEBUG_r(PerlIO_printf(Perl_debug_log,
659 						", giving up...\n"));
660 			goto fail_finish;
661 		    }
662 		    DEBUG_r(PerlIO_printf(Perl_debug_log,
663 			", trying floating at offset %ld...\n",
664 			(long)(HOP3c(s1, 1, strend) - i_strpos)));
665 		    other_last = HOP3c(last1, prog->anchored_offset+1, strend);
666 		    s = HOP3c(last, 1, strend);
667 		    goto restart;
668 		}
669 		else {
670 		    DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
671 			  (long)(s - i_strpos)));
672 		    t = HOP3c(s, -prog->anchored_offset, strbeg);
673 		    other_last = HOP3c(s, 1, strend);
674 		    s = s1;
675 		    if (t == strpos)
676 			goto try_at_start;
677 		    goto try_at_offset;
678 		}
679 	    }
680 	}
681 	else {		/* Take into account the floating substring. */
682 	    char *last, *last1;
683 	    char *s1 = s;
684 	    SV* must;
685 
686 	    t = HOP3c(s, -start_shift, strbeg);
687 	    last1 = last =
688 		HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
689 	    if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
690 		last = HOP3c(t, prog->float_max_offset, strend);
691 	    s = HOP3c(t, prog->float_min_offset, strend);
692 	    if (s < other_last)
693 		s = other_last;
694  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
695 	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
696 	    /* fbm_instr() takes into account exact value of end-of-str
697 	       if the check is SvTAIL(ed).  Since false positives are OK,
698 	       and end-of-str is not later than strend we are OK. */
699 	    if (must == &PL_sv_undef) {
700 		s = (char*)NULL;
701 		DEBUG_r(must = prog->float_utf8);	/* for debug message */
702 	    }
703 	    else
704 		s = fbm_instr((unsigned char*)s,
705 			      (unsigned char*)last + SvCUR(must)
706 				  - (SvTAIL(must)!=0),
707 			      must, PL_multiline ? FBMrf_MULTILINE : 0);
708 	    /* FIXME - DEBUG_EXECUTE_r if that is merged to maint  */
709 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
710 		    (s ? "Found" : "Contradicts"),
711 		    PL_colors[0],
712 		      (int)(SvCUR(must) - (SvTAIL(must)!=0)),
713 		      SvPVX_const(must),
714 		      PL_colors[1], (SvTAIL(must) ? "$" : "")));
715 	    if (!s) {
716 		if (last1 == last) {
717 		    DEBUG_r(PerlIO_printf(Perl_debug_log,
718 					    ", giving up...\n"));
719 		    goto fail_finish;
720 		}
721 		DEBUG_r(PerlIO_printf(Perl_debug_log,
722 		    ", trying anchored starting at offset %ld...\n",
723 		    (long)(s1 + 1 - i_strpos)));
724 		other_last = last;
725 		s = HOP3c(t, 1, strend);
726 		goto restart;
727 	    }
728 	    else {
729 		DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
730 		      (long)(s - i_strpos)));
731 		other_last = s; /* Fix this later. --Hugo */
732 		s = s1;
733 		if (t == strpos)
734 		    goto try_at_start;
735 		goto try_at_offset;
736 	    }
737 	}
738     }
739 
740     t = s - prog->check_offset_max;
741     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
742         && (!do_utf8
743 	    || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
744 		 && t > strpos))) {
745 	/* Fixed substring is found far enough so that the match
746 	   cannot start at strpos. */
747       try_at_offset:
748 	if (ml_anch && t[-1] != '\n') {
749 	    /* Eventually fbm_*() should handle this, but often
750 	       anchored_offset is not 0, so this check will not be wasted. */
751 	    /* XXXX In the code below we prefer to look for "^" even in
752 	       presence of anchored substrings.  And we search even
753 	       beyond the found float position.  These pessimizations
754 	       are historical artefacts only.  */
755 	  find_anchor:
756 	    while (t < strend - prog->minlen) {
757 		if (*t == '\n') {
758 		    if (t < check_at - prog->check_offset_min) {
759 			if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
760 			    /* Since we moved from the found position,
761 			       we definitely contradict the found anchored
762 			       substr.  Due to the above check we do not
763 			       contradict "check" substr.
764 			       Thus we can arrive here only if check substr
765 			       is float.  Redo checking for "other"=="fixed".
766 			     */
767 			    strpos = t + 1;
768 			    DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
769 				PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
770 			    goto do_other_anchored;
771 			}
772 			/* We don't contradict the found floating substring. */
773 			/* XXXX Why not check for STCLASS? */
774 			s = t + 1;
775 			DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
776 			    PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
777 			goto set_useful;
778 		    }
779 		    /* Position contradicts check-string */
780 		    /* XXXX probably better to look for check-string
781 		       than for "\n", so one should lower the limit for t? */
782 		    DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
783 			PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
784 		    other_last = strpos = s = t + 1;
785 		    goto restart;
786 		}
787 		t++;
788 	    }
789 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
790 			PL_colors[0],PL_colors[1]));
791 	    goto fail_finish;
792 	}
793 	else {
794 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
795 			PL_colors[0],PL_colors[1]));
796 	}
797 	s = t;
798       set_useful:
799 	++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);	/* hooray/5 */
800     }
801     else {
802 	/* The found string does not prohibit matching at strpos,
803 	   - no optimization of calling REx engine can be performed,
804 	   unless it was an MBOL and we are not after MBOL,
805 	   or a future STCLASS check will fail this. */
806       try_at_start:
807 	/* Even in this situation we may use MBOL flag if strpos is offset
808 	   wrt the start of the string. */
809 	if (ml_anch && sv && !SvROK(sv)	/* See prev comment on SvROK */
810 	    && (strpos != strbeg) && strpos[-1] != '\n'
811 	    /* May be due to an implicit anchor of m{.*foo}  */
812 	    && !(prog->reganch & ROPT_IMPLICIT))
813 	{
814 	    t = strpos;
815 	    goto find_anchor;
816 	}
817 	DEBUG_r( if (ml_anch)
818 	    PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
819 			(long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
820 	);
821       success_at_start:
822 	if (!(prog->reganch & ROPT_NAUGHTY)	/* XXXX If strpos moved? */
823 	    && (do_utf8 ? (
824 		prog->check_utf8		/* Could be deleted already */
825 		&& --BmUSEFUL(prog->check_utf8) < 0
826 		&& (prog->check_utf8 == prog->float_utf8)
827 	    ) : (
828 		prog->check_substr		/* Could be deleted already */
829 		&& --BmUSEFUL(prog->check_substr) < 0
830 		&& (prog->check_substr == prog->float_substr)
831 	    )))
832 	{
833 	    /* If flags & SOMETHING - do not do it many times on the same match */
834 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
835 	    SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
836 	    if (do_utf8 ? prog->check_substr : prog->check_utf8)
837 		SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
838 	    prog->check_substr = prog->check_utf8 = Nullsv;	/* disable */
839 	    prog->float_substr = prog->float_utf8 = Nullsv;	/* clear */
840 	    check = Nullsv;			/* abort */
841 	    s = strpos;
842 	    /* XXXX This is a remnant of the old implementation.  It
843 	            looks wasteful, since now INTUIT can use many
844 	            other heuristics. */
845 	    prog->reganch &= ~RE_USE_INTUIT;
846 	}
847 	else
848 	    s = strpos;
849     }
850 
851     /* Last resort... */
852     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
853     if (prog->regstclass) {
854 	/* minlen == 0 is possible if regstclass is \b or \B,
855 	   and the fixed substr is ''$.
856 	   Since minlen is already taken into account, s+1 is before strend;
857 	   accidentally, minlen >= 1 guaranties no false positives at s + 1
858 	   even for \b or \B.  But (minlen? 1 : 0) below assumes that
859 	   regstclass does not come from lookahead...  */
860 	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
861 	   This leaves EXACTF only, which is dealt with in find_byclass().  */
862         const U8* const str = (U8*)STRING(prog->regstclass);
863         const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
864 			  ? CHR_DIST((U8 *)str+STR_LEN(prog->regstclass),
865 				     (U8 *)str)
866 		    : 1);
867 	const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
868 		? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
869 		: (prog->float_substr || prog->float_utf8
870 		   ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
871 			   cl_l, strend)
872 		   : strend);
873 
874 	t = s;
875 	cache_re(prog);
876         s = find_byclass(prog, prog->regstclass, s, endpos, 1);
877 	if (!s) {
878 #ifdef DEBUGGING
879 	    const char *what = 0;
880 #endif
881 	    if (endpos == strend) {
882 		DEBUG_r( PerlIO_printf(Perl_debug_log,
883 				"Could not match STCLASS...\n") );
884 		goto fail;
885 	    }
886 	    DEBUG_r( PerlIO_printf(Perl_debug_log,
887 				   "This position contradicts STCLASS...\n") );
888 	    if ((prog->reganch & ROPT_ANCH) && !ml_anch)
889 		goto fail;
890 	    /* Contradict one of substrings */
891 	    if (prog->anchored_substr || prog->anchored_utf8) {
892 		if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
893 		    DEBUG_r( what = "anchored" );
894 		  hop_and_restart:
895 		    s = HOP3c(t, 1, strend);
896 		    if (s + start_shift + end_shift > strend) {
897 			/* XXXX Should be taken into account earlier? */
898 			DEBUG_r( PerlIO_printf(Perl_debug_log,
899 					       "Could not match STCLASS...\n") );
900 			goto fail;
901 		    }
902 		    if (!check)
903 			goto giveup;
904 		    DEBUG_r( PerlIO_printf(Perl_debug_log,
905 				"Looking for %s substr starting at offset %ld...\n",
906 				 what, (long)(s + start_shift - i_strpos)) );
907 		    goto restart;
908 		}
909 		/* Have both, check_string is floating */
910 		if (t + start_shift >= check_at) /* Contradicts floating=check */
911 		    goto retry_floating_check;
912 		/* Recheck anchored substring, but not floating... */
913 		s = check_at;
914 		if (!check)
915 		    goto giveup;
916 		DEBUG_r( PerlIO_printf(Perl_debug_log,
917 			  "Looking for anchored substr starting at offset %ld...\n",
918 			  (long)(other_last - i_strpos)) );
919 		goto do_other_anchored;
920 	    }
921 	    /* Another way we could have checked stclass at the
922                current position only: */
923 	    if (ml_anch) {
924 		s = t = t + 1;
925 		if (!check)
926 		    goto giveup;
927 		DEBUG_r( PerlIO_printf(Perl_debug_log,
928 			  "Looking for /%s^%s/m starting at offset %ld...\n",
929 			  PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
930 		goto try_at_offset;
931 	    }
932 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))	/* Could have been deleted */
933 		goto fail;
934 	    /* Check is floating subtring. */
935 	  retry_floating_check:
936 	    t = check_at - start_shift;
937 	    DEBUG_r( what = "floating" );
938 	    goto hop_and_restart;
939 	}
940 	if (t != s) {
941             DEBUG_r(PerlIO_printf(Perl_debug_log,
942 			"By STCLASS: moving %ld --> %ld\n",
943                                   (long)(t - i_strpos), (long)(s - i_strpos))
944                    );
945         }
946         else {
947             DEBUG_r(PerlIO_printf(Perl_debug_log,
948                                   "Does not contradict STCLASS...\n");
949                    );
950         }
951     }
952   giveup:
953     DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
954 			  PL_colors[4], (check ? "Guessed" : "Giving up"),
955 			  PL_colors[5], (long)(s - i_strpos)) );
956     return s;
957 
958   fail_finish:				/* Substring not found */
959     if (prog->check_substr || prog->check_utf8)		/* could be removed already */
960 	BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
961   fail:
962     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
963 			  PL_colors[4],PL_colors[5]));
964     return Nullch;
965 }
966 
967 /* We know what class REx starts with.  Try to find this position... */
968 STATIC char *
969 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32 norun)
970 {
971 	const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
972 	char *m;
973 	STRLEN ln;
974 	STRLEN lnc;
975 	register STRLEN uskip;
976 	unsigned int c1;
977 	unsigned int c2;
978 	char *e;
979 	register I32 tmp = 1;	/* Scratch variable? */
980 	register const bool do_utf8 = PL_reg_match_utf8;
981 
982 	/* We know what class it must start with. */
983 	switch (OP(c)) {
984 	case ANYOF:
985 	    if (do_utf8) {
986 		 while (s + (uskip = UTF8SKIP(s)) <= strend) {
987 		      if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
988 			  !UTF8_IS_INVARIANT((U8)s[0]) ?
989 			  reginclass(c, (U8*)s, 0, do_utf8) :
990 			  REGINCLASS(c, (U8*)s)) {
991 			   if (tmp && (norun || regtry(prog, s)))
992 				goto got_it;
993 			   else
994 				tmp = doevery;
995 		      }
996 		      else
997 			   tmp = 1;
998 		      s += uskip;
999 		 }
1000 	    }
1001 	    else {
1002 		 while (s < strend) {
1003 		      STRLEN skip = 1;
1004 
1005 		      if (REGINCLASS(c, (U8*)s) ||
1006 			  (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1007 			   /* The assignment of 2 is intentional:
1008 			    * for the folded sharp s, the skip is 2. */
1009 			   (skip = SHARP_S_SKIP))) {
1010 			   if (tmp && (norun || regtry(prog, s)))
1011 				goto got_it;
1012 			   else
1013 				tmp = doevery;
1014 		      }
1015 		      else
1016 			   tmp = 1;
1017 		      s += skip;
1018 		 }
1019 	    }
1020 	    break;
1021 	case CANY:
1022 	    while (s < strend) {
1023 	        if (tmp && (norun || regtry(prog, s)))
1024 		    goto got_it;
1025 		else
1026 		    tmp = doevery;
1027 		s++;
1028 	    }
1029 	    break;
1030 	case EXACTF:
1031 	    m   = STRING(c);
1032 	    ln  = STR_LEN(c);	/* length to match in octets/bytes */
1033 	    lnc = (I32) ln;	/* length to match in characters */
1034 	    if (UTF) {
1035 	        STRLEN ulen1, ulen2;
1036 		U8 *sm = (U8 *) m;
1037 		U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1038 		U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1039 		const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
1040 
1041 		to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1042 		to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1043 
1044 		c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1045 				    0, uniflags);
1046 		c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1047 				    0, uniflags);
1048 		lnc = 0;
1049 		while (sm < ((U8 *) m + ln)) {
1050 		    lnc++;
1051 		    sm += UTF8SKIP(sm);
1052 		}
1053 	    }
1054 	    else {
1055 		c1 = *(U8*)m;
1056 		c2 = PL_fold[c1];
1057 	    }
1058 	    goto do_exactf;
1059 	case EXACTFL:
1060 	    m   = STRING(c);
1061 	    ln  = STR_LEN(c);
1062 	    lnc = (I32) ln;
1063 	    c1 = *(U8*)m;
1064 	    c2 = PL_fold_locale[c1];
1065 	  do_exactf:
1066 	    e = HOP3c(strend, -((I32)lnc), s);
1067 
1068 	    if (norun && e < s)
1069 		e = s;			/* Due to minlen logic of intuit() */
1070 
1071 	    /* The idea in the EXACTF* cases is to first find the
1072 	     * first character of the EXACTF* node and then, if
1073 	     * necessary, case-insensitively compare the full
1074 	     * text of the node.  The c1 and c2 are the first
1075 	     * characters (though in Unicode it gets a bit
1076 	     * more complicated because there are more cases
1077 	     * than just upper and lower: one needs to use
1078 	     * the so-called folding case for case-insensitive
1079 	     * matching (called "loose matching" in Unicode).
1080 	     * ibcmp_utf8() will do just that. */
1081 
1082 	    if (do_utf8) {
1083 	        UV c, f;
1084 	        U8 tmpbuf [UTF8_MAXBYTES+1];
1085 		STRLEN len, foldlen;
1086 		const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
1087 		if (c1 == c2) {
1088 		    /* Upper and lower of 1st char are equal -
1089 		     * probably not a "letter". */
1090 		    while (s <= e) {
1091 		        c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1092 					   uniflags);
1093 			if ( c == c1
1094 			     && (ln == len ||
1095 				 ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1096 					    m, (char **)0, ln, (bool)UTF))
1097 			     && (norun || regtry(prog, s)) )
1098 			    goto got_it;
1099 			else {
1100 			     U8 foldbuf[UTF8_MAXBYTES_CASE+1];
1101 			     uvchr_to_utf8(tmpbuf, c);
1102 			     f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1103 			     if ( f != c
1104 				  && (f == c1 || f == c2)
1105 				  && (ln == foldlen ||
1106 				      !ibcmp_utf8((char *) foldbuf,
1107 						  (char **)0, foldlen, do_utf8,
1108 						  m,
1109 						  (char **)0, ln, (bool)UTF))
1110 				  && (norun || regtry(prog, s)) )
1111 				  goto got_it;
1112 			}
1113 			s += len;
1114 		    }
1115 		}
1116 		else {
1117 		    while (s <= e) {
1118 		      c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1119 					   uniflags);
1120 
1121 			/* Handle some of the three Greek sigmas cases.
1122 			 * Note that not all the possible combinations
1123 			 * are handled here: some of them are handled
1124 			 * by the standard folding rules, and some of
1125 			 * them (the character class or ANYOF cases)
1126 			 * are handled during compiletime in
1127 			 * regexec.c:S_regclass(). */
1128 			if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1129 			    c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1130 			    c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1131 
1132 			if ( (c == c1 || c == c2)
1133 			     && (ln == len ||
1134 				 ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1135 					    m, (char **)0, ln, (bool)UTF))
1136 			     && (norun || regtry(prog, s)) )
1137 			    goto got_it;
1138 			else {
1139 			     U8 foldbuf[UTF8_MAXBYTES_CASE+1];
1140 			     uvchr_to_utf8(tmpbuf, c);
1141 			     f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1142 			     if ( f != c
1143 				  && (f == c1 || f == c2)
1144 				  && (ln == foldlen ||
1145 				      !ibcmp_utf8((char *) foldbuf,
1146 						  (char **)0, foldlen, do_utf8,
1147 						  m,
1148 						  (char **)0, ln, (bool)UTF))
1149 				  && (norun || regtry(prog, s)) )
1150 				  goto got_it;
1151 			}
1152 			s += len;
1153 		    }
1154 		}
1155 	    }
1156 	    else {
1157 		if (c1 == c2)
1158 		    while (s <= e) {
1159 			if ( *(U8*)s == c1
1160 			     && (ln == 1 || !(OP(c) == EXACTF
1161 					      ? ibcmp(s, m, ln)
1162 					      : ibcmp_locale(s, m, ln)))
1163 			     && (norun || regtry(prog, s)) )
1164 			    goto got_it;
1165 			s++;
1166 		    }
1167 		else
1168 		    while (s <= e) {
1169 			if ( (*(U8*)s == c1 || *(U8*)s == c2)
1170 			     && (ln == 1 || !(OP(c) == EXACTF
1171 					      ? ibcmp(s, m, ln)
1172 					      : ibcmp_locale(s, m, ln)))
1173 			     && (norun || regtry(prog, s)) )
1174 			    goto got_it;
1175 			s++;
1176 		    }
1177 	    }
1178 	    break;
1179 	case BOUNDL:
1180 	    PL_reg_flags |= RF_tainted;
1181 	    /* FALL THROUGH */
1182 	case BOUND:
1183 	    if (do_utf8) {
1184 		if (s == PL_bostr)
1185 		    tmp = '\n';
1186 		else {
1187 		    U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1188 
1189 		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1190 		}
1191 		tmp = ((OP(c) == BOUND ?
1192 			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1193 		LOAD_UTF8_CHARCLASS_ALNUM();
1194 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1195 		    if (tmp == !(OP(c) == BOUND ?
1196 				 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1197 				 isALNUM_LC_utf8((U8*)s)))
1198 		    {
1199 			tmp = !tmp;
1200 			if ((norun || regtry(prog, s)))
1201 			    goto got_it;
1202 		    }
1203 		    s += uskip;
1204 		}
1205 	    }
1206 	    else {
1207 		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1208 		tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1209 		while (s < strend) {
1210 		    if (tmp ==
1211 			!(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1212 			tmp = !tmp;
1213 			if ((norun || regtry(prog, s)))
1214 			    goto got_it;
1215 		    }
1216 		    s++;
1217 		}
1218 	    }
1219 	    if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
1220 		goto got_it;
1221 	    break;
1222 	case NBOUNDL:
1223 	    PL_reg_flags |= RF_tainted;
1224 	    /* FALL THROUGH */
1225 	case NBOUND:
1226 	    if (do_utf8) {
1227 		if (s == PL_bostr)
1228 		    tmp = '\n';
1229 		else {
1230 		    U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1231 
1232 		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1233 		}
1234 		tmp = ((OP(c) == NBOUND ?
1235 			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1236 		LOAD_UTF8_CHARCLASS_ALNUM();
1237 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1238 		    if (tmp == !(OP(c) == NBOUND ?
1239 				 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1240 				 isALNUM_LC_utf8((U8*)s)))
1241 			tmp = !tmp;
1242 		    else if ((norun || regtry(prog, s)))
1243 			goto got_it;
1244 		    s += uskip;
1245 		}
1246 	    }
1247 	    else {
1248 		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1249 		tmp = ((OP(c) == NBOUND ?
1250 			isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1251 		while (s < strend) {
1252 		    if (tmp ==
1253 			!(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1254 			tmp = !tmp;
1255 		    else if ((norun || regtry(prog, s)))
1256 			goto got_it;
1257 		    s++;
1258 		}
1259 	    }
1260 	    if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1261 		goto got_it;
1262 	    break;
1263 	case ALNUM:
1264 	    if (do_utf8) {
1265 		LOAD_UTF8_CHARCLASS_ALNUM();
1266 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1267 		    if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1268 			if (tmp && (norun || regtry(prog, s)))
1269 			    goto got_it;
1270 			else
1271 			    tmp = doevery;
1272 		    }
1273 		    else
1274 			tmp = 1;
1275 		    s += uskip;
1276 		}
1277 	    }
1278 	    else {
1279 		while (s < strend) {
1280 		    if (isALNUM(*s)) {
1281 			if (tmp && (norun || regtry(prog, s)))
1282 			    goto got_it;
1283 			else
1284 			    tmp = doevery;
1285 		    }
1286 		    else
1287 			tmp = 1;
1288 		    s++;
1289 		}
1290 	    }
1291 	    break;
1292 	case ALNUML:
1293 	    PL_reg_flags |= RF_tainted;
1294 	    if (do_utf8) {
1295 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1296 		    if (isALNUM_LC_utf8((U8*)s)) {
1297 			if (tmp && (norun || regtry(prog, s)))
1298 			    goto got_it;
1299 			else
1300 			    tmp = doevery;
1301 		    }
1302 		    else
1303 			tmp = 1;
1304 		    s += uskip;
1305 		}
1306 	    }
1307 	    else {
1308 		while (s < strend) {
1309 		    if (isALNUM_LC(*s)) {
1310 			if (tmp && (norun || regtry(prog, s)))
1311 			    goto got_it;
1312 			else
1313 			    tmp = doevery;
1314 		    }
1315 		    else
1316 			tmp = 1;
1317 		    s++;
1318 		}
1319 	    }
1320 	    break;
1321 	case NALNUM:
1322 	    if (do_utf8) {
1323 		LOAD_UTF8_CHARCLASS_ALNUM();
1324 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1325 		    if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1326 			if (tmp && (norun || regtry(prog, s)))
1327 			    goto got_it;
1328 			else
1329 			    tmp = doevery;
1330 		    }
1331 		    else
1332 			tmp = 1;
1333 		    s += uskip;
1334 		}
1335 	    }
1336 	    else {
1337 		while (s < strend) {
1338 		    if (!isALNUM(*s)) {
1339 			if (tmp && (norun || regtry(prog, s)))
1340 			    goto got_it;
1341 			else
1342 			    tmp = doevery;
1343 		    }
1344 		    else
1345 			tmp = 1;
1346 		    s++;
1347 		}
1348 	    }
1349 	    break;
1350 	case NALNUML:
1351 	    PL_reg_flags |= RF_tainted;
1352 	    if (do_utf8) {
1353 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1354 		    if (!isALNUM_LC_utf8((U8*)s)) {
1355 			if (tmp && (norun || regtry(prog, s)))
1356 			    goto got_it;
1357 			else
1358 			    tmp = doevery;
1359 		    }
1360 		    else
1361 			tmp = 1;
1362 		    s += uskip;
1363 		}
1364 	    }
1365 	    else {
1366 		while (s < strend) {
1367 		    if (!isALNUM_LC(*s)) {
1368 			if (tmp && (norun || regtry(prog, s)))
1369 			    goto got_it;
1370 			else
1371 			    tmp = doevery;
1372 		    }
1373 		    else
1374 			tmp = 1;
1375 		    s++;
1376 		}
1377 	    }
1378 	    break;
1379 	case SPACE:
1380 	    if (do_utf8) {
1381 		LOAD_UTF8_CHARCLASS_SPACE();
1382 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1383 		    if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
1384 			if (tmp && (norun || regtry(prog, s)))
1385 			    goto got_it;
1386 			else
1387 			    tmp = doevery;
1388 		    }
1389 		    else
1390 			tmp = 1;
1391 		    s += uskip;
1392 		}
1393 	    }
1394 	    else {
1395 		while (s < strend) {
1396 		    if (isSPACE(*s)) {
1397 			if (tmp && (norun || regtry(prog, s)))
1398 			    goto got_it;
1399 			else
1400 			    tmp = doevery;
1401 		    }
1402 		    else
1403 			tmp = 1;
1404 		    s++;
1405 		}
1406 	    }
1407 	    break;
1408 	case SPACEL:
1409 	    PL_reg_flags |= RF_tainted;
1410 	    if (do_utf8) {
1411 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1412 		    if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1413 			if (tmp && (norun || regtry(prog, s)))
1414 			    goto got_it;
1415 			else
1416 			    tmp = doevery;
1417 		    }
1418 		    else
1419 			tmp = 1;
1420 		    s += uskip;
1421 		}
1422 	    }
1423 	    else {
1424 		while (s < strend) {
1425 		    if (isSPACE_LC(*s)) {
1426 			if (tmp && (norun || regtry(prog, s)))
1427 			    goto got_it;
1428 			else
1429 			    tmp = doevery;
1430 		    }
1431 		    else
1432 			tmp = 1;
1433 		    s++;
1434 		}
1435 	    }
1436 	    break;
1437 	case NSPACE:
1438 	    if (do_utf8) {
1439 		LOAD_UTF8_CHARCLASS_SPACE();
1440 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1441 		    if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
1442 			if (tmp && (norun || regtry(prog, s)))
1443 			    goto got_it;
1444 			else
1445 			    tmp = doevery;
1446 		    }
1447 		    else
1448 			tmp = 1;
1449 		    s += uskip;
1450 		}
1451 	    }
1452 	    else {
1453 		while (s < strend) {
1454 		    if (!isSPACE(*s)) {
1455 			if (tmp && (norun || regtry(prog, s)))
1456 			    goto got_it;
1457 			else
1458 			    tmp = doevery;
1459 		    }
1460 		    else
1461 			tmp = 1;
1462 		    s++;
1463 		}
1464 	    }
1465 	    break;
1466 	case NSPACEL:
1467 	    PL_reg_flags |= RF_tainted;
1468 	    if (do_utf8) {
1469 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1470 		    if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1471 			if (tmp && (norun || regtry(prog, s)))
1472 			    goto got_it;
1473 			else
1474 			    tmp = doevery;
1475 		    }
1476 		    else
1477 			tmp = 1;
1478 		    s += uskip;
1479 		}
1480 	    }
1481 	    else {
1482 		while (s < strend) {
1483 		    if (!isSPACE_LC(*s)) {
1484 			if (tmp && (norun || regtry(prog, s)))
1485 			    goto got_it;
1486 			else
1487 			    tmp = doevery;
1488 		    }
1489 		    else
1490 			tmp = 1;
1491 		    s++;
1492 		}
1493 	    }
1494 	    break;
1495 	case DIGIT:
1496 	    if (do_utf8) {
1497 		LOAD_UTF8_CHARCLASS_DIGIT();
1498 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1499 		    if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1500 			if (tmp && (norun || regtry(prog, s)))
1501 			    goto got_it;
1502 			else
1503 			    tmp = doevery;
1504 		    }
1505 		    else
1506 			tmp = 1;
1507 		    s += uskip;
1508 		}
1509 	    }
1510 	    else {
1511 		while (s < strend) {
1512 		    if (isDIGIT(*s)) {
1513 			if (tmp && (norun || regtry(prog, s)))
1514 			    goto got_it;
1515 			else
1516 			    tmp = doevery;
1517 		    }
1518 		    else
1519 			tmp = 1;
1520 		    s++;
1521 		}
1522 	    }
1523 	    break;
1524 	case DIGITL:
1525 	    PL_reg_flags |= RF_tainted;
1526 	    if (do_utf8) {
1527 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1528 		    if (isDIGIT_LC_utf8((U8*)s)) {
1529 			if (tmp && (norun || regtry(prog, s)))
1530 			    goto got_it;
1531 			else
1532 			    tmp = doevery;
1533 		    }
1534 		    else
1535 			tmp = 1;
1536 		    s += uskip;
1537 		}
1538 	    }
1539 	    else {
1540 		while (s < strend) {
1541 		    if (isDIGIT_LC(*s)) {
1542 			if (tmp && (norun || regtry(prog, s)))
1543 			    goto got_it;
1544 			else
1545 			    tmp = doevery;
1546 		    }
1547 		    else
1548 			tmp = 1;
1549 		    s++;
1550 		}
1551 	    }
1552 	    break;
1553 	case NDIGIT:
1554 	    if (do_utf8) {
1555 		LOAD_UTF8_CHARCLASS_DIGIT();
1556 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1557 		    if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1558 			if (tmp && (norun || regtry(prog, s)))
1559 			    goto got_it;
1560 			else
1561 			    tmp = doevery;
1562 		    }
1563 		    else
1564 			tmp = 1;
1565 		    s += uskip;
1566 		}
1567 	    }
1568 	    else {
1569 		while (s < strend) {
1570 		    if (!isDIGIT(*s)) {
1571 			if (tmp && (norun || regtry(prog, s)))
1572 			    goto got_it;
1573 			else
1574 			    tmp = doevery;
1575 		    }
1576 		    else
1577 			tmp = 1;
1578 		    s++;
1579 		}
1580 	    }
1581 	    break;
1582 	case NDIGITL:
1583 	    PL_reg_flags |= RF_tainted;
1584 	    if (do_utf8) {
1585 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1586 		    if (!isDIGIT_LC_utf8((U8*)s)) {
1587 			if (tmp && (norun || regtry(prog, s)))
1588 			    goto got_it;
1589 			else
1590 			    tmp = doevery;
1591 		    }
1592 		    else
1593 			tmp = 1;
1594 		    s += uskip;
1595 		}
1596 	    }
1597 	    else {
1598 		while (s < strend) {
1599 		    if (!isDIGIT_LC(*s)) {
1600 			if (tmp && (norun || regtry(prog, s)))
1601 			    goto got_it;
1602 			else
1603 			    tmp = doevery;
1604 		    }
1605 		    else
1606 			tmp = 1;
1607 		    s++;
1608 		}
1609 	    }
1610 	    break;
1611 	default:
1612 	    Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1613 	    break;
1614 	}
1615 	return 0;
1616       got_it:
1617 	return s;
1618 }
1619 
1620 /*
1621  - regexec_flags - match a regexp against a string
1622  */
1623 I32
1624 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1625 	      char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1626 /* strend: pointer to null at end of string */
1627 /* strbeg: real beginning of string */
1628 /* minend: end of match must be >=minend after stringarg. */
1629 /* data: May be used for some additional optimizations. */
1630 /* nosave: For optimizations. */
1631 {
1632     register char *s;
1633     register regnode *c;
1634     register char *startpos = stringarg;
1635     I32 minlen;		/* must match at least this many chars */
1636     I32 dontbother = 0;	/* how many characters not to try at end */
1637     I32 end_shift = 0;			/* Same for the end. */		/* CC */
1638     I32 scream_pos = -1;		/* Internal iterator of scream. */
1639     char *scream_olds;
1640     SV* oreplsv = GvSV(PL_replgv);
1641     const bool do_utf8 = DO_UTF8(sv);
1642 #ifdef DEBUGGING
1643     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1644     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
1645 #endif
1646     PERL_UNUSED_ARG(data);
1647     RX_MATCH_UTF8_set(prog,do_utf8);
1648 
1649     PL_regcc = 0;
1650 
1651     cache_re(prog);
1652 #ifdef DEBUGGING
1653     PL_regnarrate = DEBUG_r_TEST;
1654 #endif
1655 
1656     /* Be paranoid... */
1657     if (prog == NULL || startpos == NULL) {
1658 	Perl_croak(aTHX_ "NULL regexp parameter");
1659 	return 0;
1660     }
1661 
1662     minlen = prog->minlen;
1663     if (strend - startpos < minlen) {
1664         DEBUG_r(PerlIO_printf(Perl_debug_log,
1665 			      "String too short [regexec_flags]...\n"));
1666 	goto phooey;
1667     }
1668 
1669     /* Check validity of program. */
1670     if (UCHARAT(prog->program) != REG_MAGIC) {
1671 	Perl_croak(aTHX_ "corrupted regexp program");
1672     }
1673 
1674     PL_reg_flags = 0;
1675     PL_reg_eval_set = 0;
1676     PL_reg_maxiter = 0;
1677 
1678     if (prog->reganch & ROPT_UTF8)
1679 	PL_reg_flags |= RF_utf8;
1680 
1681     /* Mark beginning of line for ^ and lookbehind. */
1682     PL_regbol = startpos;
1683     PL_bostr  = strbeg;
1684     PL_reg_sv = sv;
1685 
1686     /* Mark end of line for $ (and such) */
1687     PL_regeol = strend;
1688 
1689     /* see how far we have to get to not match where we matched before */
1690     PL_regtill = startpos+minend;
1691 
1692     /* We start without call_cc context.  */
1693     PL_reg_call_cc = 0;
1694 
1695     /* If there is a "must appear" string, look for it. */
1696     s = startpos;
1697 
1698     if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1699 	MAGIC *mg;
1700 
1701 	if (flags & REXEC_IGNOREPOS)	/* Means: check only at start */
1702 	    PL_reg_ganch = startpos;
1703 	else if (sv && SvTYPE(sv) >= SVt_PVMG
1704 		  && SvMAGIC(sv)
1705 		  && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1706 		  && mg->mg_len >= 0) {
1707 	    PL_reg_ganch = strbeg + mg->mg_len;	/* Defined pos() */
1708 	    if (prog->reganch & ROPT_ANCH_GPOS) {
1709 	        if (s > PL_reg_ganch)
1710 		    goto phooey;
1711 		s = PL_reg_ganch;
1712 	    }
1713 	}
1714 	else				/* pos() not defined */
1715 	    PL_reg_ganch = strbeg;
1716     }
1717 
1718     if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
1719 	re_scream_pos_data d;
1720 
1721 	d.scream_olds = &scream_olds;
1722 	d.scream_pos = &scream_pos;
1723 	s = re_intuit_start(prog, sv, s, strend, flags, &d);
1724 	if (!s) {
1725 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1726 	    goto phooey;	/* not present */
1727 	}
1728     }
1729 
1730     DEBUG_r({
1731 	const char * const s0   = UTF
1732 	    ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1733 			  UNI_DISPLAY_REGEX)
1734 	    : prog->precomp;
1735 	const int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1736 	const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
1737 					       UNI_DISPLAY_REGEX) : startpos;
1738 	const int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
1739 	 if (!PL_colorset)
1740 	     reginitcolors();
1741 	 PerlIO_printf(Perl_debug_log,
1742 		       "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
1743 		       PL_colors[4],PL_colors[5],PL_colors[0],
1744 		       len0, len0, s0,
1745 		       PL_colors[1],
1746 		       len0 > 60 ? "..." : "",
1747 		       PL_colors[0],
1748 		       (int)(len1 > 60 ? 60 : len1),
1749 		       s1, PL_colors[1],
1750 		       (len1 > 60 ? "..." : "")
1751 	      );
1752     });
1753 
1754     /* Simplest case:  anchored match need be tried only once. */
1755     /*  [unless only anchor is BOL and multiline is set] */
1756     if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1757 	if (s == startpos && regtry(prog, startpos))
1758 	    goto got_it;
1759 	else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1760 		 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1761 	{
1762 	    char *end;
1763 
1764 	    if (minlen)
1765 		dontbother = minlen - 1;
1766 	    end = HOP3c(strend, -dontbother, strbeg) - 1;
1767 	    /* for multiline we only have to try after newlines */
1768 	    if (prog->check_substr || prog->check_utf8) {
1769 		if (s == startpos)
1770 		    goto after_try;
1771 		while (1) {
1772 		    if (regtry(prog, s))
1773 			goto got_it;
1774 		  after_try:
1775 		    if (s >= end)
1776 			goto phooey;
1777 		    if (prog->reganch & RE_USE_INTUIT) {
1778 			s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1779 			if (!s)
1780 			    goto phooey;
1781 		    }
1782 		    else
1783 			s++;
1784 		}
1785 	    } else {
1786 		if (s > startpos)
1787 		    s--;
1788 		while (s < end) {
1789 		    if (*s++ == '\n') {	/* don't need PL_utf8skip here */
1790 			if (regtry(prog, s))
1791 			    goto got_it;
1792 		    }
1793 		}
1794 	    }
1795 	}
1796 	goto phooey;
1797     } else if (prog->reganch & ROPT_ANCH_GPOS) {
1798 	if (regtry(prog, PL_reg_ganch))
1799 	    goto got_it;
1800 	goto phooey;
1801     }
1802 
1803     /* Messy cases:  unanchored match. */
1804     if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
1805 	/* we have /x+whatever/ */
1806 	/* it must be a one character string (XXXX Except UTF?) */
1807 	char ch;
1808 #ifdef DEBUGGING
1809 	int did_match = 0;
1810 #endif
1811 	if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1812 	    do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1813 	ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1814 
1815 	if (do_utf8) {
1816 	    while (s < strend) {
1817 		if (*s == ch) {
1818 		    DEBUG_r( did_match = 1 );
1819 		    if (regtry(prog, s)) goto got_it;
1820 		    s += UTF8SKIP(s);
1821 		    while (s < strend && *s == ch)
1822 			s += UTF8SKIP(s);
1823 		}
1824 		s += UTF8SKIP(s);
1825 	    }
1826 	}
1827 	else {
1828 	    while (s < strend) {
1829 		if (*s == ch) {
1830 		    DEBUG_r( did_match = 1 );
1831 		    if (regtry(prog, s)) goto got_it;
1832 		    s++;
1833 		    while (s < strend && *s == ch)
1834 			s++;
1835 		}
1836 		s++;
1837 	    }
1838 	}
1839 	DEBUG_r(if (!did_match)
1840 		PerlIO_printf(Perl_debug_log,
1841                                   "Did not find anchored character...\n")
1842                );
1843     }
1844     else if (prog->anchored_substr != Nullsv
1845 	      || prog->anchored_utf8 != Nullsv
1846 	      || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
1847 		  && prog->float_max_offset < strend - s)) {
1848 	SV *must;
1849 	I32 back_max;
1850 	I32 back_min;
1851 	char *last;
1852 	char *last1;		/* Last position checked before */
1853 #ifdef DEBUGGING
1854 	int did_match = 0;
1855 #endif
1856 	if (prog->anchored_substr || prog->anchored_utf8) {
1857 	    if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1858 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1859 	    must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1860 	    back_max = back_min = prog->anchored_offset;
1861 	} else {
1862 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1863 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1864 	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1865 	    back_max = prog->float_max_offset;
1866 	    back_min = prog->float_min_offset;
1867 	}
1868 	if (must == &PL_sv_undef)
1869 	    /* could not downgrade utf8 check substring, so must fail */
1870 	    goto phooey;
1871 
1872 	last = HOP3c(strend,	/* Cannot start after this */
1873 			  -(I32)(CHR_SVLEN(must)
1874 				 - (SvTAIL(must) != 0) + back_min), strbeg);
1875 
1876 	if (s > PL_bostr)
1877 	    last1 = HOPc(s, -1);
1878 	else
1879 	    last1 = s - 1;	/* bogus */
1880 
1881 	/* XXXX check_substr already used to find "s", can optimize if
1882 	   check_substr==must. */
1883 	scream_pos = -1;
1884 	dontbother = end_shift;
1885 	strend = HOPc(strend, -dontbother);
1886 	while ( (s <= last) &&
1887 		((flags & REXEC_SCREAM)
1888 		 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
1889 				    end_shift, &scream_pos, 0))
1890 		 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
1891 				  (unsigned char*)strend, must,
1892 				  PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1893 	    /* we may be pointing at the wrong string */
1894 	    if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1895 		s = strbeg + (s - SvPVX_const(sv));
1896 	    DEBUG_r( did_match = 1 );
1897 	    if (HOPc(s, -back_max) > last1) {
1898 		last1 = HOPc(s, -back_min);
1899 		s = HOPc(s, -back_max);
1900 	    }
1901 	    else {
1902 		char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1903 
1904 		last1 = HOPc(s, -back_min);
1905 		s = t;
1906 	    }
1907 	    if (do_utf8) {
1908 		while (s <= last1) {
1909 		    if (regtry(prog, s))
1910 			goto got_it;
1911 		    s += UTF8SKIP(s);
1912 		}
1913 	    }
1914 	    else {
1915 		while (s <= last1) {
1916 		    if (regtry(prog, s))
1917 			goto got_it;
1918 		    s++;
1919 		}
1920 	    }
1921 	}
1922 	DEBUG_r(if (!did_match)
1923                     PerlIO_printf(Perl_debug_log,
1924                                   "Did not find %s substr \"%s%.*s%s\"%s...\n",
1925 			      ((must == prog->anchored_substr || must == prog->anchored_utf8)
1926 			       ? "anchored" : "floating"),
1927 			      PL_colors[0],
1928 			      (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1929 			      SvPVX_const(must),
1930                                   PL_colors[1], (SvTAIL(must) ? "$" : ""))
1931                );
1932 	goto phooey;
1933     }
1934     else if ((c = prog->regstclass)) {
1935 	if (minlen) {
1936 	    I32 op = (U8)OP(prog->regstclass);
1937 	    /* don't bother with what can't match */
1938 	    if (PL_regkind[op] != EXACT && op != CANY)
1939 	        strend = HOPc(strend, -(minlen - 1));
1940 	}
1941 	DEBUG_r({
1942 	    SV *prop = sv_newmortal();
1943 	    const char *s0;
1944 	    const char *s1;
1945 	    int len0;
1946 	    int len1;
1947 
1948 	    regprop(prop, c);
1949 	    s0 = UTF ?
1950 	      pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
1951 			     UNI_DISPLAY_REGEX) :
1952 	      SvPVX_const(prop);
1953 	    len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1954 	    s1 = UTF ?
1955 	      sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
1956 	    len1 = UTF ? SvCUR(dsv1) : strend - s;
1957 	    PerlIO_printf(Perl_debug_log,
1958 			  "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
1959 			  len0, len0, s0,
1960 			  len1, len1, s1);
1961 	});
1962         if (find_byclass(prog, c, s, strend, 0))
1963 	    goto got_it;
1964 	DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1965     }
1966     else {
1967 	dontbother = 0;
1968 	if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
1969 	    /* Trim the end. */
1970 	    char *last;
1971 	    SV* float_real;
1972 
1973 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1974 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1975 	    float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
1976 
1977 	    if (flags & REXEC_SCREAM) {
1978 		last = screaminstr(sv, float_real, s - strbeg,
1979 				   end_shift, &scream_pos, 1); /* last one */
1980 		if (!last)
1981 		    last = scream_olds; /* Only one occurrence. */
1982 		/* we may be pointing at the wrong string */
1983 		else if (RX_MATCH_COPIED(prog))
1984 		    s = strbeg + (s - SvPVX_const(sv));
1985 	    }
1986 	    else {
1987 		STRLEN len;
1988                 const char * const little = SvPV_const(float_real, len);
1989 
1990 		if (SvTAIL(float_real)) {
1991 		    if (memEQ(strend - len + 1, little, len - 1))
1992 			last = strend - len + 1;
1993 		    else if (!PL_multiline)
1994 			last = memEQ(strend - len, little, len)
1995 			    ? strend - len : Nullch;
1996 		    else
1997 			goto find_last;
1998 		} else {
1999 		  find_last:
2000 		    if (len)
2001 			last = rninstr(s, strend, little, little + len);
2002 		    else
2003 			last = strend;	/* matching "$" */
2004 		}
2005 	    }
2006 	    if (last == NULL) {
2007 		DEBUG_r(PerlIO_printf(Perl_debug_log,
2008 				      "%sCan't trim the tail, match fails (should not happen)%s\n",
2009 				      PL_colors[4],PL_colors[5]));
2010 		goto phooey; /* Should not happen! */
2011 	    }
2012 	    dontbother = strend - last + prog->float_min_offset;
2013 	}
2014 	if (minlen && (dontbother < minlen))
2015 	    dontbother = minlen - 1;
2016 	strend -= dontbother; 		   /* this one's always in bytes! */
2017 	/* We don't know much -- general case. */
2018 	if (do_utf8) {
2019 	    for (;;) {
2020 		if (regtry(prog, s))
2021 		    goto got_it;
2022 		if (s >= strend)
2023 		    break;
2024 		s += UTF8SKIP(s);
2025 	    };
2026 	}
2027 	else {
2028 	    do {
2029 		if (regtry(prog, s))
2030 		    goto got_it;
2031 	    } while (s++ < strend);
2032 	}
2033     }
2034 
2035     /* Failure. */
2036     goto phooey;
2037 
2038 got_it:
2039     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2040 
2041     if (PL_reg_eval_set) {
2042 	/* Preserve the current value of $^R */
2043 	if (oreplsv != GvSV(PL_replgv))
2044 	    sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2045 						  restored, the value remains
2046 						  the same. */
2047 	restore_pos(aTHX_ 0);
2048     }
2049 
2050     /* make sure $`, $&, $', and $digit will work later */
2051     if ( !(flags & REXEC_NOT_FIRST) ) {
2052 	if (RX_MATCH_COPIED(prog)) {
2053 	    Safefree(prog->subbeg);
2054 	    RX_MATCH_COPIED_off(prog);
2055 	}
2056 	if (flags & REXEC_COPY_STR) {
2057 	    I32 i = PL_regeol - startpos + (stringarg - strbeg);
2058 
2059 	    s = savepvn(strbeg, i);
2060 	    prog->subbeg = s;
2061 	    prog->sublen = i;
2062 	    RX_MATCH_COPIED_on(prog);
2063 	}
2064 	else {
2065 	    prog->subbeg = strbeg;
2066 	    prog->sublen = PL_regeol - strbeg;	/* strend may have been modified */
2067 	}
2068     }
2069 
2070     return 1;
2071 
2072 phooey:
2073     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2074 			  PL_colors[4],PL_colors[5]));
2075     if (PL_reg_eval_set)
2076 	restore_pos(aTHX_ 0);
2077     return 0;
2078 }
2079 
2080 /*
2081  - regtry - try match at specific point
2082  */
2083 STATIC I32			/* 0 failure, 1 success */
2084 S_regtry(pTHX_ regexp *prog, char *startpos)
2085 {
2086     register I32 i;
2087     register I32 *sp;
2088     register I32 *ep;
2089     CHECKPOINT lastcp;
2090 
2091 #ifdef DEBUGGING
2092     PL_regindent = 0;	/* XXXX Not good when matches are reenterable... */
2093 #endif
2094     if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2095 	MAGIC *mg;
2096 
2097 	PL_reg_eval_set = RS_init;
2098 	DEBUG_r(DEBUG_s(
2099 	    PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2100 			  (IV)(PL_stack_sp - PL_stack_base));
2101 	    ));
2102 	SAVEI32(cxstack[cxstack_ix].blk_oldsp);
2103 	cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2104 	/* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2105 	SAVETMPS;
2106 	/* Apparently this is not needed, judging by wantarray. */
2107 	/* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2108 	   cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2109 
2110 	if (PL_reg_sv) {
2111 	    /* Make $_ available to executed code. */
2112 	    if (PL_reg_sv != DEFSV) {
2113 		/* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
2114 		SAVESPTR(DEFSV);
2115 		DEFSV = PL_reg_sv;
2116 	    }
2117 
2118 	    if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
2119 		  && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
2120 		/* prepare for quick setting of pos */
2121 		sv_magic(PL_reg_sv, (SV*)0,
2122 			PERL_MAGIC_regex_global, Nullch, 0);
2123 		mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
2124 		mg->mg_len = -1;
2125 	    }
2126 	    PL_reg_magic    = mg;
2127 	    PL_reg_oldpos   = mg->mg_len;
2128 	    SAVEDESTRUCTOR_X(restore_pos, 0);
2129         }
2130         if (!PL_reg_curpm) {
2131 	    Newxz(PL_reg_curpm, 1, PMOP);
2132 #ifdef USE_ITHREADS
2133             {
2134                 SV* repointer = newSViv(0);
2135                 /* so we know which PL_regex_padav element is PL_reg_curpm */
2136                 SvFLAGS(repointer) |= SVf_BREAK;
2137                 av_push(PL_regex_padav,repointer);
2138                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2139                 PL_regex_pad = AvARRAY(PL_regex_padav);
2140             }
2141 #endif
2142         }
2143 	PM_SETRE(PL_reg_curpm, prog);
2144 	PL_reg_oldcurpm = PL_curpm;
2145 	PL_curpm = PL_reg_curpm;
2146 	if (RX_MATCH_COPIED(prog)) {
2147 	    /*  Here is a serious problem: we cannot rewrite subbeg,
2148 		since it may be needed if this match fails.  Thus
2149 		$` inside (?{}) could fail... */
2150 	    PL_reg_oldsaved = prog->subbeg;
2151 	    PL_reg_oldsavedlen = prog->sublen;
2152 	    RX_MATCH_COPIED_off(prog);
2153 	}
2154 	else
2155 	    PL_reg_oldsaved = Nullch;
2156 	prog->subbeg = PL_bostr;
2157 	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2158     }
2159     prog->startp[0] = startpos - PL_bostr;
2160     PL_reginput = startpos;
2161     PL_regstartp = prog->startp;
2162     PL_regendp = prog->endp;
2163     PL_reglastparen = &prog->lastparen;
2164     PL_reglastcloseparen = &prog->lastcloseparen;
2165     prog->lastparen = 0;
2166     prog->lastcloseparen = 0;
2167     PL_regsize = 0;
2168     DEBUG_r(PL_reg_starttry = startpos);
2169     if (PL_reg_start_tmpl <= prog->nparens) {
2170 	PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2171         if(PL_reg_start_tmp)
2172             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2173         else
2174             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2175     }
2176 
2177     /* XXXX What this code is doing here?!!!  There should be no need
2178        to do this again and again, PL_reglastparen should take care of
2179        this!  --ilya*/
2180 
2181     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2182      * Actually, the code in regcppop() (which Ilya may be meaning by
2183      * PL_reglastparen), is not needed at all by the test suite
2184      * (op/regexp, op/pat, op/split), but that code is needed, oddly
2185      * enough, for building DynaLoader, or otherwise this
2186      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2187      * will happen.  Meanwhile, this code *is* needed for the
2188      * above-mentioned test suite tests to succeed.  The common theme
2189      * on those tests seems to be returning null fields from matches.
2190      * --jhi */
2191 #if 1
2192     sp = prog->startp;
2193     ep = prog->endp;
2194     if (prog->nparens) {
2195 	for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2196 	    *++sp = -1;
2197 	    *++ep = -1;
2198 	}
2199     }
2200 #endif
2201     REGCP_SET(lastcp);
2202     if (regmatch(prog->program + 1)) {
2203 	prog->endp[0] = PL_reginput - PL_bostr;
2204 	return 1;
2205     }
2206     REGCP_UNWIND(lastcp);
2207     return 0;
2208 }
2209 
2210 #define RE_UNWIND_BRANCH	1
2211 #define RE_UNWIND_BRANCHJ	2
2212 
2213 union re_unwind_t;
2214 
2215 typedef struct {		/* XX: makes sense to enlarge it... */
2216     I32 type;
2217     I32 prev;
2218     CHECKPOINT lastcp;
2219 } re_unwind_generic_t;
2220 
2221 typedef struct {
2222     I32 type;
2223     I32 prev;
2224     CHECKPOINT lastcp;
2225     I32 lastparen;
2226     regnode *next;
2227     char *locinput;
2228     I32 nextchr;
2229 #ifdef DEBUGGING
2230     int regindent;
2231 #endif
2232 } re_unwind_branch_t;
2233 
2234 typedef union re_unwind_t {
2235     I32 type;
2236     re_unwind_generic_t generic;
2237     re_unwind_branch_t branch;
2238 } re_unwind_t;
2239 
2240 #define sayYES goto yes
2241 #define sayNO goto no
2242 #define sayNO_ANYOF goto no_anyof
2243 #define sayYES_FINAL goto yes_final
2244 #define sayYES_LOUD  goto yes_loud
2245 #define sayNO_FINAL  goto no_final
2246 #define sayNO_SILENT goto do_no
2247 #define saySAME(x) if (x) goto yes; else goto no
2248 
2249 #define POSCACHE_SUCCESS 0	/* caching success rather than failure */
2250 #define POSCACHE_SEEN 1		/* we know what we're caching */
2251 #define POSCACHE_START 2	/* the real cache: this bit maps to pos 0 */
2252 #define CACHEsayYES STMT_START { \
2253     if (cache_offset | cache_bit) { \
2254 	if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2255 	    PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
2256         else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2257 	    /* cache records failure, but this is success */ \
2258 	    DEBUG_r( \
2259 		PerlIO_printf(Perl_debug_log, \
2260 		    "%*s  (remove success from failure cache)\n", \
2261 		    REPORT_CODE_OFF+PL_regindent*2, "") \
2262 	    ); \
2263 	    PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
2264 	} \
2265     } \
2266     sayYES; \
2267 } STMT_END
2268 #define CACHEsayNO STMT_START { \
2269     if (cache_offset | cache_bit) { \
2270 	if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2271 	    PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
2272         else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2273 	    /* cache records success, but this is failure */ \
2274 	    DEBUG_r( \
2275 		PerlIO_printf(Perl_debug_log, \
2276 		    "%*s  (remove failure from success cache)\n", \
2277 		    REPORT_CODE_OFF+PL_regindent*2, "") \
2278 	    ); \
2279 	    PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
2280 	} \
2281     } \
2282     sayNO; \
2283 } STMT_END
2284 
2285 #define REPORT_CODE_OFF 24
2286 
2287 /*
2288  - regmatch - main matching routine
2289  *
2290  * Conceptually the strategy is simple:  check to see whether the current
2291  * node matches, call self recursively to see whether the rest matches,
2292  * and then act accordingly.  In practice we make some effort to avoid
2293  * recursion, in particular by going through "ordinary" nodes (that don't
2294  * need to know whether the rest of the match failed) by a loop instead of
2295  * by recursion.
2296  */
2297 /* [lwall] I've hoisted the register declarations to the outer block in order to
2298  * maybe save a little bit of pushing and popping on the stack.  It also takes
2299  * advantage of machines that use a register save mask on subroutine entry.
2300  */
2301 STATIC I32			/* 0 failure, 1 success */
2302 S_regmatch(pTHX_ regnode *prog)
2303 {
2304     register regnode *scan;	/* Current node. */
2305     regnode *next;		/* Next node. */
2306     regnode *inner;		/* Next node in internal branch. */
2307     register I32 nextchr;	/* renamed nextchr - nextchar colides with
2308 				   function of same name */
2309     register I32 n;		/* no or next */
2310     register I32 ln = 0;	/* len or last */
2311     register char *s = Nullch;	/* operand or save */
2312     register char *locinput = PL_reginput;
2313     register I32 c1 = 0, c2 = 0, paren;	/* case fold search, parenth */
2314     int minmod = 0, sw = 0, logical = 0;
2315     I32 unwind = 0;
2316 #if 0
2317     I32 firstcp = PL_savestack_ix;
2318 #endif
2319     register const bool do_utf8 = PL_reg_match_utf8;
2320 #ifdef DEBUGGING
2321     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2322     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2323     SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2324 #endif
2325     U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2326 
2327 #ifdef DEBUGGING
2328     PL_regindent++;
2329 #endif
2330 
2331     /* Note that nextchr is a byte even in UTF */
2332     nextchr = UCHARAT(locinput);
2333     scan = prog;
2334     while (scan != NULL) {
2335 
2336         DEBUG_r( {
2337 	    SV *prop = sv_newmortal();
2338 	    const int docolor = *PL_colors[0];
2339 	    const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2340 	    int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
2341 	    /* The part of the string before starttry has one color
2342 	       (pref0_len chars), between starttry and current
2343 	       position another one (pref_len - pref0_len chars),
2344 	       after the current position the third one.
2345 	       We assume that pref0_len <= pref_len, otherwise we
2346 	       decrease pref0_len.  */
2347 	    int pref_len = (locinput - PL_bostr) > (5 + taill) - l
2348 		? (5 + taill) - l : locinput - PL_bostr;
2349 	    int pref0_len;
2350 
2351 	    while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2352 		pref_len++;
2353 	    pref0_len = pref_len  - (locinput - PL_reg_starttry);
2354 	    if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2355 		l = ( PL_regeol - locinput > (5 + taill) - pref_len
2356 		      ? (5 + taill) - pref_len : PL_regeol - locinput);
2357 	    while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2358 		l--;
2359 	    if (pref0_len < 0)
2360 		pref0_len = 0;
2361 	    if (pref0_len > pref_len)
2362 		pref0_len = pref_len;
2363 	    regprop(prop, scan);
2364 	    {
2365 	      const char * const s0 =
2366 		do_utf8 && OP(scan) != CANY ?
2367 		pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2368 			       pref0_len, 60, UNI_DISPLAY_REGEX) :
2369 		locinput - pref_len;
2370 	      const int len0 = do_utf8 ? strlen(s0) : pref0_len;
2371 	      const char * const s1 = do_utf8 && OP(scan) != CANY ?
2372 		pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2373 			       pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2374 		locinput - pref_len + pref0_len;
2375 	      const int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2376 	      const char * const s2 = do_utf8 && OP(scan) != CANY ?
2377 		pv_uni_display(dsv2, (U8*)locinput,
2378 			       PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2379 		locinput;
2380 	      const int len2 = do_utf8 ? strlen(s2) : l;
2381 	      PerlIO_printf(Perl_debug_log,
2382 			    "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2383 			    (IV)(locinput - PL_bostr),
2384 			    PL_colors[4],
2385 			    len0, s0,
2386 			    PL_colors[5],
2387 			    PL_colors[2],
2388 			    len1, s1,
2389 			    PL_colors[3],
2390 			    (docolor ? "" : "> <"),
2391 			    PL_colors[0],
2392 			    len2, s2,
2393 			    PL_colors[1],
2394 			    15 - l - pref_len + 1,
2395 			    "",
2396 			    (IV)(scan - PL_regprogram), PL_regindent*2, "",
2397 			    SvPVX_const(prop));
2398 	    }
2399 	});
2400 
2401 	next = scan + NEXT_OFF(scan);
2402 	if (next == scan)
2403 	    next = NULL;
2404 
2405 	switch (OP(scan)) {
2406 	case BOL:
2407 	    if (locinput == PL_bostr || (PL_multiline &&
2408 		(nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2409 	    {
2410 		/* regtill = regbol; */
2411 		break;
2412 	    }
2413 	    sayNO;
2414 	case MBOL:
2415 	    if (locinput == PL_bostr ||
2416 		((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2417 	    {
2418 		break;
2419 	    }
2420 	    sayNO;
2421 	case SBOL:
2422 	    if (locinput == PL_bostr)
2423 		break;
2424 	    sayNO;
2425 	case GPOS:
2426 	    if (locinput == PL_reg_ganch)
2427 		break;
2428 	    sayNO;
2429 	case EOL:
2430 	    if (PL_multiline)
2431 		goto meol;
2432 	    else
2433 		goto seol;
2434 	case MEOL:
2435 	  meol:
2436 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2437 		sayNO;
2438 	    break;
2439 	case SEOL:
2440 	  seol:
2441 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2442 		sayNO;
2443 	    if (PL_regeol - locinput > 1)
2444 		sayNO;
2445 	    break;
2446 	case EOS:
2447 	    if (PL_regeol != locinput)
2448 		sayNO;
2449 	    break;
2450 	case SANY:
2451 	    if (!nextchr && locinput >= PL_regeol)
2452 		sayNO;
2453  	    if (do_utf8) {
2454 	        locinput += PL_utf8skip[nextchr];
2455 		if (locinput > PL_regeol)
2456  		    sayNO;
2457  		nextchr = UCHARAT(locinput);
2458  	    }
2459  	    else
2460  		nextchr = UCHARAT(++locinput);
2461 	    break;
2462 	case CANY:
2463 	    if (!nextchr && locinput >= PL_regeol)
2464 		sayNO;
2465 	    nextchr = UCHARAT(++locinput);
2466 	    break;
2467 	case REG_ANY:
2468 	    if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2469 		sayNO;
2470 	    if (do_utf8) {
2471 		locinput += PL_utf8skip[nextchr];
2472 		if (locinput > PL_regeol)
2473 		    sayNO;
2474 		nextchr = UCHARAT(locinput);
2475 	    }
2476 	    else
2477 		nextchr = UCHARAT(++locinput);
2478 	    break;
2479 	case EXACT:
2480 	    s = STRING(scan);
2481 	    ln = STR_LEN(scan);
2482 	    if (do_utf8 != UTF) {
2483 		/* The target and the pattern have differing utf8ness. */
2484 		char *l = locinput;
2485 		const char *e = s + ln;
2486 
2487 		if (do_utf8) {
2488 		    /* The target is utf8, the pattern is not utf8. */
2489 		    while (s < e) {
2490 			STRLEN ulen;
2491 			if (l >= PL_regeol)
2492 			     sayNO;
2493 			if (NATIVE_TO_UNI(*(U8*)s) !=
2494 			    utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
2495 					    uniflags))
2496 			     sayNO;
2497 			l += ulen;
2498 			s ++;
2499 		    }
2500 		}
2501 		else {
2502 		    /* The target is not utf8, the pattern is utf8. */
2503 		    while (s < e) {
2504 			STRLEN ulen;
2505 			if (l >= PL_regeol)
2506 			    sayNO;
2507 			if (NATIVE_TO_UNI(*((U8*)l)) !=
2508 			    utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
2509 					   uniflags))
2510 			    sayNO;
2511 			s += ulen;
2512 			l ++;
2513 		    }
2514 		}
2515 		locinput = l;
2516 		nextchr = UCHARAT(locinput);
2517 		break;
2518 	    }
2519 	    /* The target and the pattern have the same utf8ness. */
2520 	    /* Inline the first character, for speed. */
2521 	    if (UCHARAT(s) != nextchr)
2522 		sayNO;
2523 	    if (PL_regeol - locinput < ln)
2524 		sayNO;
2525 	    if (ln > 1 && memNE(s, locinput, ln))
2526 		sayNO;
2527 	    locinput += ln;
2528 	    nextchr = UCHARAT(locinput);
2529 	    break;
2530 	case EXACTFL:
2531 	    PL_reg_flags |= RF_tainted;
2532 	    /* FALL THROUGH */
2533 	case EXACTF:
2534 	    s = STRING(scan);
2535 	    ln = STR_LEN(scan);
2536 
2537 	    if (do_utf8 || UTF) {
2538 	      /* Either target or the pattern are utf8. */
2539 		char *l = locinput;
2540 		char *e = PL_regeol;
2541 
2542 		if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
2543 			       l, &e, 0,  do_utf8)) {
2544 		     /* One more case for the sharp s:
2545 		      * pack("U0U*", 0xDF) =~ /ss/i,
2546 		      * the 0xC3 0x9F are the UTF-8
2547 		      * byte sequence for the U+00DF. */
2548 		     if (!(do_utf8 &&
2549 			   toLOWER(s[0]) == 's' &&
2550 			   ln >= 2 &&
2551 			   toLOWER(s[1]) == 's' &&
2552 			   (U8)l[0] == 0xC3 &&
2553 			   e - l >= 2 &&
2554 			   (U8)l[1] == 0x9F))
2555 			  sayNO;
2556 		}
2557 		locinput = e;
2558 		nextchr = UCHARAT(locinput);
2559 		break;
2560 	    }
2561 
2562 	    /* Neither the target and the pattern are utf8. */
2563 
2564 	    /* Inline the first character, for speed. */
2565 	    if (UCHARAT(s) != nextchr &&
2566 		UCHARAT(s) != ((OP(scan) == EXACTF)
2567 			       ? PL_fold : PL_fold_locale)[nextchr])
2568 		sayNO;
2569 	    if (PL_regeol - locinput < ln)
2570 		sayNO;
2571 	    if (ln > 1 && (OP(scan) == EXACTF
2572 			   ? ibcmp(s, locinput, ln)
2573 			   : ibcmp_locale(s, locinput, ln)))
2574 		sayNO;
2575 	    locinput += ln;
2576 	    nextchr = UCHARAT(locinput);
2577 	    break;
2578 	case ANYOF:
2579 	    if (do_utf8) {
2580 	        STRLEN inclasslen = PL_regeol - locinput;
2581 
2582 	        if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
2583 		    sayNO_ANYOF;
2584 		if (locinput >= PL_regeol)
2585 		    sayNO;
2586 		locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
2587 		nextchr = UCHARAT(locinput);
2588 		break;
2589 	    }
2590 	    else {
2591 		if (nextchr < 0)
2592 		    nextchr = UCHARAT(locinput);
2593 		if (!REGINCLASS(scan, (U8*)locinput))
2594 		    sayNO_ANYOF;
2595 		if (!nextchr && locinput >= PL_regeol)
2596 		    sayNO;
2597 		nextchr = UCHARAT(++locinput);
2598 		break;
2599 	    }
2600 	no_anyof:
2601 	    /* If we might have the case of the German sharp s
2602 	     * in a casefolding Unicode character class. */
2603 
2604 	    if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2605 		 locinput += SHARP_S_SKIP;
2606 		 nextchr = UCHARAT(locinput);
2607 	    }
2608 	    else
2609 		 sayNO;
2610 	    break;
2611 	case ALNUML:
2612 	    PL_reg_flags |= RF_tainted;
2613 	    /* FALL THROUGH */
2614 	case ALNUM:
2615 	    if (!nextchr)
2616 		sayNO;
2617 	    if (do_utf8) {
2618 		LOAD_UTF8_CHARCLASS_ALNUM();
2619 		if (!(OP(scan) == ALNUM
2620 		      ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2621 		      : isALNUM_LC_utf8((U8*)locinput)))
2622 		{
2623 		    sayNO;
2624 		}
2625 		locinput += PL_utf8skip[nextchr];
2626 		nextchr = UCHARAT(locinput);
2627 		break;
2628 	    }
2629 	    if (!(OP(scan) == ALNUM
2630 		  ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2631 		sayNO;
2632 	    nextchr = UCHARAT(++locinput);
2633 	    break;
2634 	case NALNUML:
2635 	    PL_reg_flags |= RF_tainted;
2636 	    /* FALL THROUGH */
2637 	case NALNUM:
2638 	    if (!nextchr && locinput >= PL_regeol)
2639 		sayNO;
2640 	    if (do_utf8) {
2641 		LOAD_UTF8_CHARCLASS_ALNUM();
2642 		if (OP(scan) == NALNUM
2643 		    ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2644 		    : isALNUM_LC_utf8((U8*)locinput))
2645 		{
2646 		    sayNO;
2647 		}
2648 		locinput += PL_utf8skip[nextchr];
2649 		nextchr = UCHARAT(locinput);
2650 		break;
2651 	    }
2652 	    if (OP(scan) == NALNUM
2653 		? isALNUM(nextchr) : isALNUM_LC(nextchr))
2654 		sayNO;
2655 	    nextchr = UCHARAT(++locinput);
2656 	    break;
2657 	case BOUNDL:
2658 	case NBOUNDL:
2659 	    PL_reg_flags |= RF_tainted;
2660 	    /* FALL THROUGH */
2661 	case BOUND:
2662 	case NBOUND:
2663 	    /* was last char in word? */
2664 	    if (do_utf8) {
2665 		if (locinput == PL_bostr)
2666 		    ln = '\n';
2667 		else {
2668 		    const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
2669 
2670 		    ln = utf8n_to_uvchr((U8 *)r, UTF8SKIP(r), 0, 0);
2671 		}
2672 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2673 		    ln = isALNUM_uni(ln);
2674 		    LOAD_UTF8_CHARCLASS_ALNUM();
2675 		    n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
2676 		}
2677 		else {
2678 		    ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
2679 		    n = isALNUM_LC_utf8((U8*)locinput);
2680 		}
2681 	    }
2682 	    else {
2683 		ln = (locinput != PL_bostr) ?
2684 		    UCHARAT(locinput - 1) : '\n';
2685 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2686 		    ln = isALNUM(ln);
2687 		    n = isALNUM(nextchr);
2688 		}
2689 		else {
2690 		    ln = isALNUM_LC(ln);
2691 		    n = isALNUM_LC(nextchr);
2692 		}
2693 	    }
2694 	    if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2695 				    OP(scan) == BOUNDL))
2696 		    sayNO;
2697 	    break;
2698 	case SPACEL:
2699 	    PL_reg_flags |= RF_tainted;
2700 	    /* FALL THROUGH */
2701 	case SPACE:
2702 	    if (!nextchr)
2703 		sayNO;
2704 	    if (do_utf8) {
2705 		if (UTF8_IS_CONTINUED(nextchr)) {
2706 		    LOAD_UTF8_CHARCLASS_SPACE();
2707 		    if (!(OP(scan) == SPACE
2708 			  ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2709 			  : isSPACE_LC_utf8((U8*)locinput)))
2710 		    {
2711 			sayNO;
2712 		    }
2713 		    locinput += PL_utf8skip[nextchr];
2714 		    nextchr = UCHARAT(locinput);
2715 		    break;
2716 		}
2717 		if (!(OP(scan) == SPACE
2718 		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2719 		    sayNO;
2720 		nextchr = UCHARAT(++locinput);
2721 	    }
2722 	    else {
2723 		if (!(OP(scan) == SPACE
2724 		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2725 		    sayNO;
2726 		nextchr = UCHARAT(++locinput);
2727 	    }
2728 	    break;
2729 	case NSPACEL:
2730 	    PL_reg_flags |= RF_tainted;
2731 	    /* FALL THROUGH */
2732 	case NSPACE:
2733 	    if (!nextchr && locinput >= PL_regeol)
2734 		sayNO;
2735 	    if (do_utf8) {
2736 		LOAD_UTF8_CHARCLASS_SPACE();
2737 		if (OP(scan) == NSPACE
2738 		    ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2739 		    : isSPACE_LC_utf8((U8*)locinput))
2740 		{
2741 		    sayNO;
2742 		}
2743 		locinput += PL_utf8skip[nextchr];
2744 		nextchr = UCHARAT(locinput);
2745 		break;
2746 	    }
2747 	    if (OP(scan) == NSPACE
2748 		? isSPACE(nextchr) : isSPACE_LC(nextchr))
2749 		sayNO;
2750 	    nextchr = UCHARAT(++locinput);
2751 	    break;
2752 	case DIGITL:
2753 	    PL_reg_flags |= RF_tainted;
2754 	    /* FALL THROUGH */
2755 	case DIGIT:
2756 	    if (!nextchr)
2757 		sayNO;
2758 	    if (do_utf8) {
2759 		LOAD_UTF8_CHARCLASS_DIGIT();
2760 		if (!(OP(scan) == DIGIT
2761 		      ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2762 		      : isDIGIT_LC_utf8((U8*)locinput)))
2763 		{
2764 		    sayNO;
2765 		}
2766 		locinput += PL_utf8skip[nextchr];
2767 		nextchr = UCHARAT(locinput);
2768 		break;
2769 	    }
2770 	    if (!(OP(scan) == DIGIT
2771 		  ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2772 		sayNO;
2773 	    nextchr = UCHARAT(++locinput);
2774 	    break;
2775 	case NDIGITL:
2776 	    PL_reg_flags |= RF_tainted;
2777 	    /* FALL THROUGH */
2778 	case NDIGIT:
2779 	    if (!nextchr && locinput >= PL_regeol)
2780 		sayNO;
2781 	    if (do_utf8) {
2782 		LOAD_UTF8_CHARCLASS_DIGIT();
2783 		if (OP(scan) == NDIGIT
2784 		    ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2785 		    : isDIGIT_LC_utf8((U8*)locinput))
2786 		{
2787 		    sayNO;
2788 		}
2789 		locinput += PL_utf8skip[nextchr];
2790 		nextchr = UCHARAT(locinput);
2791 		break;
2792 	    }
2793 	    if (OP(scan) == NDIGIT
2794 		? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2795 		sayNO;
2796 	    nextchr = UCHARAT(++locinput);
2797 	    break;
2798 	case CLUMP:
2799 	    if (locinput >= PL_regeol)
2800 		sayNO;
2801 	    if  (do_utf8) {
2802 		LOAD_UTF8_CHARCLASS_MARK();
2803 		if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2804 		    sayNO;
2805 		locinput += PL_utf8skip[nextchr];
2806 		while (locinput < PL_regeol &&
2807 		       swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2808 		    locinput += UTF8SKIP(locinput);
2809 		if (locinput > PL_regeol)
2810 		    sayNO;
2811 	    }
2812 	    else
2813 	       locinput++;
2814 	    nextchr = UCHARAT(locinput);
2815 	    break;
2816 	case REFFL:
2817 	    PL_reg_flags |= RF_tainted;
2818 	    /* FALL THROUGH */
2819         case REF:
2820 	case REFF:
2821 	    n = ARG(scan);  /* which paren pair */
2822 	    ln = PL_regstartp[n];
2823 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
2824 	    if ((I32)*PL_reglastparen < n || ln == -1)
2825 		sayNO;			/* Do not match unless seen CLOSEn. */
2826 	    if (ln == PL_regendp[n])
2827 		break;
2828 
2829 	    s = PL_bostr + ln;
2830 	    if (do_utf8 && OP(scan) != REF) {	/* REF can do byte comparison */
2831 		char *l = locinput;
2832 		const char *e = PL_bostr + PL_regendp[n];
2833 		/*
2834 		 * Note that we can't do the "other character" lookup trick as
2835 		 * in the 8-bit case (no pun intended) because in Unicode we
2836 		 * have to map both upper and title case to lower case.
2837 		 */
2838 		if (OP(scan) == REFF) {
2839 		    while (s < e) {
2840 			STRLEN ulen1, ulen2;
2841 			U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
2842 			U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
2843 
2844 			if (l >= PL_regeol)
2845 			    sayNO;
2846 			toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2847 			toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
2848 			if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
2849 			    sayNO;
2850 			s += ulen1;
2851 			l += ulen2;
2852 		    }
2853 		}
2854 		locinput = l;
2855 		nextchr = UCHARAT(locinput);
2856 		break;
2857 	    }
2858 
2859 	    /* Inline the first character, for speed. */
2860 	    if (UCHARAT(s) != nextchr &&
2861 		(OP(scan) == REF ||
2862 		 (UCHARAT(s) != ((OP(scan) == REFF
2863 				  ? PL_fold : PL_fold_locale)[nextchr]))))
2864 		sayNO;
2865 	    ln = PL_regendp[n] - ln;
2866 	    if (locinput + ln > PL_regeol)
2867 		sayNO;
2868 	    if (ln > 1 && (OP(scan) == REF
2869 			   ? memNE(s, locinput, ln)
2870 			   : (OP(scan) == REFF
2871 			      ? ibcmp(s, locinput, ln)
2872 			      : ibcmp_locale(s, locinput, ln))))
2873 		sayNO;
2874 	    locinput += ln;
2875 	    nextchr = UCHARAT(locinput);
2876 	    break;
2877 
2878 	case NOTHING:
2879 	case TAIL:
2880 	    break;
2881 	case BACK:
2882 	    break;
2883 	case EVAL:
2884 	{
2885 	    dSP;
2886 	    OP_4tree *oop = PL_op;
2887 	    COP *ocurcop = PL_curcop;
2888 	    PAD *old_comppad;
2889 	    SV *ret;
2890 	    struct regexp *oreg = PL_reg_re;
2891 
2892 	    n = ARG(scan);
2893 	    PL_op = (OP_4tree*)PL_regdata->data[n];
2894 	    DEBUG_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
2895 	    PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
2896 	    PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2897 
2898 	    {
2899 		SV **before = SP;
2900 		CALLRUNOPS(aTHX);			/* Scalar context. */
2901 		SPAGAIN;
2902 		if (SP == before)
2903 		    ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
2904 		else {
2905 		    ret = POPs;
2906 		    PUTBACK;
2907 		}
2908 	    }
2909 
2910 	    PL_op = oop;
2911 	    PAD_RESTORE_LOCAL(old_comppad);
2912 	    PL_curcop = ocurcop;
2913 	    if (logical) {
2914 		if (logical == 2) {	/* Postponed subexpression. */
2915 		    regexp *re;
2916 		    MAGIC *mg = Null(MAGIC*);
2917 		    re_cc_state state;
2918 		    CHECKPOINT cp, lastcp;
2919                     int toggleutf;
2920 		    register SV *sv;
2921 
2922 		    if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
2923 			mg = mg_find(sv, PERL_MAGIC_qr);
2924 		    else if (SvSMAGICAL(ret)) {
2925 			if (SvGMAGICAL(ret))
2926 			    sv_unmagic(ret, PERL_MAGIC_qr);
2927 			else
2928 			    mg = mg_find(ret, PERL_MAGIC_qr);
2929 		    }
2930 
2931 		    if (mg) {
2932 			re = (regexp *)mg->mg_obj;
2933 			(void)ReREFCNT_inc(re);
2934 		    }
2935 		    else {
2936 			STRLEN len;
2937 			const char *t = SvPV_const(ret, len);
2938 			PMOP pm;
2939 			char * const oprecomp = PL_regprecomp;
2940 			const I32 osize = PL_regsize;
2941 			const I32 onpar = PL_regnpar;
2942 
2943 			Zero(&pm, 1, PMOP);
2944                         if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
2945 			re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
2946 			if (!(SvFLAGS(ret)
2947 			      & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
2948 				| SVs_GMG)))
2949 			    sv_magic(ret,(SV*)ReREFCNT_inc(re),
2950 					PERL_MAGIC_qr,0,0);
2951 			PL_regprecomp = oprecomp;
2952 			PL_regsize = osize;
2953 			PL_regnpar = onpar;
2954 		    }
2955 		    DEBUG_r(
2956 			PerlIO_printf(Perl_debug_log,
2957 				      "Entering embedded \"%s%.60s%s%s\"\n",
2958 				      PL_colors[0],
2959 				      re->precomp,
2960 				      PL_colors[1],
2961 				      (strlen(re->precomp) > 60 ? "..." : ""))
2962 			);
2963 		    state.node = next;
2964 		    state.prev = PL_reg_call_cc;
2965 		    state.cc = PL_regcc;
2966 		    state.re = PL_reg_re;
2967 
2968 		    PL_regcc = 0;
2969 
2970 		    cp = regcppush(0);	/* Save *all* the positions. */
2971 		    REGCP_SET(lastcp);
2972 		    cache_re(re);
2973 		    state.ss = PL_savestack_ix;
2974 		    *PL_reglastparen = 0;
2975 		    *PL_reglastcloseparen = 0;
2976 		    PL_reg_call_cc = &state;
2977 		    PL_reginput = locinput;
2978 		    toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
2979 				((re->reganch & ROPT_UTF8) != 0);
2980 		    if (toggleutf) PL_reg_flags ^= RF_utf8;
2981 
2982 		    /* XXXX This is too dramatic a measure... */
2983 		    PL_reg_maxiter = 0;
2984 
2985 		    if (regmatch(re->program + 1)) {
2986 			/* Even though we succeeded, we need to restore
2987 			   global variables, since we may be wrapped inside
2988 			   SUSPEND, thus the match may be not finished yet. */
2989 
2990 			/* XXXX Do this only if SUSPENDed? */
2991 			PL_reg_call_cc = state.prev;
2992 			PL_regcc = state.cc;
2993 			PL_reg_re = state.re;
2994 			cache_re(PL_reg_re);
2995 			if (toggleutf) PL_reg_flags ^= RF_utf8;
2996 
2997 			/* XXXX This is too dramatic a measure... */
2998 			PL_reg_maxiter = 0;
2999 
3000 			/* These are needed even if not SUSPEND. */
3001 			ReREFCNT_dec(re);
3002 			regcpblow(cp);
3003 			sayYES;
3004 		    }
3005 		    ReREFCNT_dec(re);
3006 		    REGCP_UNWIND(lastcp);
3007 		    regcppop();
3008 		    PL_reg_call_cc = state.prev;
3009 		    PL_regcc = state.cc;
3010 		    PL_reg_re = state.re;
3011 		    cache_re(PL_reg_re);
3012 		    if (toggleutf) PL_reg_flags ^= RF_utf8;
3013 
3014 		    /* XXXX This is too dramatic a measure... */
3015 		    PL_reg_maxiter = 0;
3016 
3017 		    logical = 0;
3018 		    sayNO;
3019 		}
3020 		sw = SvTRUE(ret);
3021 		logical = 0;
3022 	    }
3023 	    else {
3024 		sv_setsv(save_scalar(PL_replgv), ret);
3025 		cache_re(oreg);
3026 	    }
3027 	    break;
3028 	}
3029 	case OPEN:
3030 	    n = ARG(scan);  /* which paren pair */
3031 	    PL_reg_start_tmp[n] = locinput;
3032 	    if (n > PL_regsize)
3033 		PL_regsize = n;
3034 	    break;
3035 	case CLOSE:
3036 	    n = ARG(scan);  /* which paren pair */
3037 	    PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3038 	    PL_regendp[n] = locinput - PL_bostr;
3039 	    if (n > (I32)*PL_reglastparen)
3040 		*PL_reglastparen = n;
3041 	    *PL_reglastcloseparen = n;
3042 	    break;
3043 	case GROUPP:
3044 	    n = ARG(scan);  /* which paren pair */
3045 	    sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
3046 	    break;
3047 	case IFTHEN:
3048 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
3049 	    if (sw)
3050 		next = NEXTOPER(NEXTOPER(scan));
3051 	    else {
3052 		next = scan + ARG(scan);
3053 		if (OP(next) == IFTHEN) /* Fake one. */
3054 		    next = NEXTOPER(NEXTOPER(next));
3055 	    }
3056 	    break;
3057 	case LOGICAL:
3058 	    logical = scan->flags;
3059 	    break;
3060 /*******************************************************************
3061  PL_regcc contains infoblock about the innermost (...)* loop, and
3062  a pointer to the next outer infoblock.
3063 
3064  Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3065 
3066    1) After matching X, regnode for CURLYX is processed;
3067 
3068    2) This regnode creates infoblock on the stack, and calls
3069       regmatch() recursively with the starting point at WHILEM node;
3070 
3071    3) Each hit of WHILEM node tries to match A and Z (in the order
3072       depending on the current iteration, min/max of {min,max} and
3073       greediness).  The information about where are nodes for "A"
3074       and "Z" is read from the infoblock, as is info on how many times "A"
3075       was already matched, and greediness.
3076 
3077    4) After A matches, the same WHILEM node is hit again.
3078 
3079    5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
3080       of the same pair.  Thus when WHILEM tries to match Z, it temporarily
3081       resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
3082       as in (Y(A)*Z)*.  If Z matches, the automaton will hit the WHILEM node
3083       of the external loop.
3084 
3085  Currently present infoblocks form a tree with a stem formed by PL_curcc
3086  and whatever it mentions via ->next, and additional attached trees
3087  corresponding to temporarily unset infoblocks as in "5" above.
3088 
3089  In the following picture infoblocks for outer loop of
3090  (Y(A)*?Z)*?T are denoted O, for inner I.  NULL starting block
3091  is denoted by x.  The matched string is YAAZYAZT.  Temporarily postponed
3092  infoblocks are drawn below the "reset" infoblock.
3093 
3094  In fact in the picture below we do not show failed matches for Z and T
3095  by WHILEM blocks.  [We illustrate minimal matches, since for them it is
3096  more obvious *why* one needs to *temporary* unset infoblocks.]
3097 
3098   Matched	REx position	InfoBlocks	Comment
3099   		(Y(A)*?Z)*?T	x
3100   		Y(A)*?Z)*?T	x <- O
3101   Y		(A)*?Z)*?T	x <- O
3102   Y		A)*?Z)*?T	x <- O <- I
3103   YA		)*?Z)*?T	x <- O <- I
3104   YA		A)*?Z)*?T	x <- O <- I
3105   YAA		)*?Z)*?T	x <- O <- I
3106   YAA		Z)*?T		x <- O		# Temporary unset I
3107 				     I
3108 
3109   YAAZ		Y(A)*?Z)*?T	x <- O
3110 				     I
3111 
3112   YAAZY		(A)*?Z)*?T	x <- O
3113 				     I
3114 
3115   YAAZY		A)*?Z)*?T	x <- O <- I
3116 				     I
3117 
3118   YAAZYA	)*?Z)*?T	x <- O <- I
3119 				     I
3120 
3121   YAAZYA	Z)*?T		x <- O		# Temporary unset I
3122 				     I,I
3123 
3124   YAAZYAZ	)*?T		x <- O
3125 				     I,I
3126 
3127   YAAZYAZ	T		x		# Temporary unset O
3128 				O
3129 				I,I
3130 
3131   YAAZYAZT			x
3132 				O
3133 				I,I
3134  *******************************************************************/
3135 	case CURLYX: {
3136 		CURCUR cc;
3137 		CHECKPOINT cp = PL_savestack_ix;
3138 		/* No need to save/restore up to this paren */
3139 		I32 parenfloor = scan->flags;
3140 
3141 		if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3142 		    next += ARG(next);
3143 		cc.oldcc = PL_regcc;
3144 		PL_regcc = &cc;
3145 		/* XXXX Probably it is better to teach regpush to support
3146 		   parenfloor > PL_regsize... */
3147 		if (parenfloor > (I32)*PL_reglastparen)
3148 		    parenfloor = *PL_reglastparen; /* Pessimization... */
3149 		cc.parenfloor = parenfloor;
3150 		cc.cur = -1;
3151 		cc.min = ARG1(scan);
3152 		cc.max  = ARG2(scan);
3153 		cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3154 		cc.next = next;
3155 		cc.minmod = minmod;
3156 		cc.lastloc = 0;
3157 		PL_reginput = locinput;
3158 		n = regmatch(PREVOPER(next));	/* start on the WHILEM */
3159 		regcpblow(cp);
3160 		PL_regcc = cc.oldcc;
3161 		saySAME(n);
3162 	    }
3163 	    /* NOT REACHED */
3164 	case WHILEM: {
3165 		/*
3166 		 * This is really hard to understand, because after we match
3167 		 * what we're trying to match, we must make sure the rest of
3168 		 * the REx is going to match for sure, and to do that we have
3169 		 * to go back UP the parse tree by recursing ever deeper.  And
3170 		 * if it fails, we have to reset our parent's current state
3171 		 * that we can try again after backing off.
3172 		 */
3173 
3174 		CHECKPOINT cp, lastcp;
3175 		CURCUR* cc = PL_regcc;
3176 		char *lastloc = cc->lastloc; /* Detection of 0-len. */
3177 		I32 cache_offset = 0, cache_bit = 0;
3178 
3179 		n = cc->cur + 1;	/* how many we know we matched */
3180 		PL_reginput = locinput;
3181 
3182 		DEBUG_r(
3183 		    PerlIO_printf(Perl_debug_log,
3184 				  "%*s  %ld out of %ld..%ld  cc=%"UVxf"\n",
3185 				  REPORT_CODE_OFF+PL_regindent*2, "",
3186 				  (long)n, (long)cc->min,
3187 				  (long)cc->max, PTR2UV(cc))
3188 		    );
3189 
3190 		/* If degenerate scan matches "", assume scan done. */
3191 
3192 		if (locinput == cc->lastloc && n >= cc->min) {
3193 		    PL_regcc = cc->oldcc;
3194 		    if (PL_regcc)
3195 			ln = PL_regcc->cur;
3196 		    DEBUG_r(
3197 			PerlIO_printf(Perl_debug_log,
3198 			   "%*s  empty match detected, try continuation...\n",
3199 			   REPORT_CODE_OFF+PL_regindent*2, "")
3200 			);
3201 		    if (regmatch(cc->next))
3202 			sayYES;
3203 		    if (PL_regcc)
3204 			PL_regcc->cur = ln;
3205 		    PL_regcc = cc;
3206 		    sayNO;
3207 		}
3208 
3209 		/* First just match a string of min scans. */
3210 
3211 		if (n < cc->min) {
3212 		    cc->cur = n;
3213 		    cc->lastloc = locinput;
3214 		    if (regmatch(cc->scan))
3215 			sayYES;
3216 		    cc->cur = n - 1;
3217 		    cc->lastloc = lastloc;
3218 		    sayNO;
3219 		}
3220 
3221 		if (scan->flags) {
3222 		    /* Check whether we already were at this position.
3223 			Postpone detection until we know the match is not
3224 			*that* much linear. */
3225 		if (!PL_reg_maxiter) {
3226 		    PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3227 		    PL_reg_leftiter = PL_reg_maxiter;
3228 		}
3229 		if (PL_reg_leftiter-- == 0) {
3230 		    const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
3231 		    if (PL_reg_poscache) {
3232 			if ((I32)PL_reg_poscache_size < size) {
3233 			    Renew(PL_reg_poscache, size, char);
3234 			    PL_reg_poscache_size = size;
3235 			}
3236 			Zero(PL_reg_poscache, size, char);
3237 		    }
3238 		    else {
3239 			PL_reg_poscache_size = size;
3240 			Newxz(PL_reg_poscache, size, char);
3241 		    }
3242 		    DEBUG_r(
3243 			PerlIO_printf(Perl_debug_log,
3244 	      "%sDetected a super-linear match, switching on caching%s...\n",
3245 				      PL_colors[4], PL_colors[5])
3246 			);
3247 		}
3248 		if (PL_reg_leftiter < 0) {
3249 		    cache_offset = locinput - PL_bostr;
3250 
3251 		    cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3252 			    + cache_offset * (scan->flags>>4);
3253 		    cache_bit = cache_offset % 8;
3254 		    cache_offset /= 8;
3255 		    if (PL_reg_poscache[cache_offset] & (1<<cache_bit)) {
3256 		    DEBUG_r(
3257 			PerlIO_printf(Perl_debug_log,
3258 				      "%*s  already tried at this position...\n",
3259 				      REPORT_CODE_OFF+PL_regindent*2, "")
3260 			);
3261 			if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3262 			    /* cache records success */
3263 			    sayYES;
3264 			else
3265 			    /* cache records failure */
3266 			    sayNO_SILENT;
3267 		    }
3268 		    PL_reg_poscache[cache_offset] |= (1<<cache_bit);
3269 		}
3270 		}
3271 
3272 		/* Prefer next over scan for minimal matching. */
3273 
3274 		if (cc->minmod) {
3275 		    PL_regcc = cc->oldcc;
3276 		    if (PL_regcc)
3277 			ln = PL_regcc->cur;
3278 		    cp = regcppush(cc->parenfloor);
3279 		    REGCP_SET(lastcp);
3280 		    if (regmatch(cc->next)) {
3281 			regcpblow(cp);
3282 			CACHEsayYES;	/* All done. */
3283 		    }
3284 		    REGCP_UNWIND(lastcp);
3285 		    regcppop();
3286 		    if (PL_regcc)
3287 			PL_regcc->cur = ln;
3288 		    PL_regcc = cc;
3289 
3290 		    if (n >= cc->max) {	/* Maximum greed exceeded? */
3291 			if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3292 			    && !(PL_reg_flags & RF_warned)) {
3293 			    PL_reg_flags |= RF_warned;
3294 			    Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3295 				 "Complex regular subexpression recursion",
3296 				 REG_INFTY - 1);
3297 			}
3298 			CACHEsayNO;
3299 		    }
3300 
3301 		    DEBUG_r(
3302 			PerlIO_printf(Perl_debug_log,
3303 				      "%*s  trying longer...\n",
3304 				      REPORT_CODE_OFF+PL_regindent*2, "")
3305 			);
3306 		    /* Try scanning more and see if it helps. */
3307 		    PL_reginput = locinput;
3308 		    cc->cur = n;
3309 		    cc->lastloc = locinput;
3310 		    cp = regcppush(cc->parenfloor);
3311 		    REGCP_SET(lastcp);
3312 		    if (regmatch(cc->scan)) {
3313 			regcpblow(cp);
3314 			CACHEsayYES;
3315 		    }
3316 		    REGCP_UNWIND(lastcp);
3317 		    regcppop();
3318 		    cc->cur = n - 1;
3319 		    cc->lastloc = lastloc;
3320 		    CACHEsayNO;
3321 		}
3322 
3323 		/* Prefer scan over next for maximal matching. */
3324 
3325 		if (n < cc->max) {	/* More greed allowed? */
3326 		    cp = regcppush(cc->parenfloor);
3327 		    cc->cur = n;
3328 		    cc->lastloc = locinput;
3329 		    REGCP_SET(lastcp);
3330 		    if (regmatch(cc->scan)) {
3331 			regcpblow(cp);
3332 			CACHEsayYES;
3333 		    }
3334 		    REGCP_UNWIND(lastcp);
3335 		    regcppop();		/* Restore some previous $<digit>s? */
3336 		    PL_reginput = locinput;
3337 		    DEBUG_r(
3338 			PerlIO_printf(Perl_debug_log,
3339 				      "%*s  failed, try continuation...\n",
3340 				      REPORT_CODE_OFF+PL_regindent*2, "")
3341 			);
3342 		}
3343 		if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3344 			&& !(PL_reg_flags & RF_warned)) {
3345 		    PL_reg_flags |= RF_warned;
3346 		    Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3347 			 "Complex regular subexpression recursion",
3348 			 REG_INFTY - 1);
3349 		}
3350 
3351 		/* Failed deeper matches of scan, so see if this one works. */
3352 		PL_regcc = cc->oldcc;
3353 		if (PL_regcc)
3354 		    ln = PL_regcc->cur;
3355 		if (regmatch(cc->next))
3356 		    CACHEsayYES;
3357 		if (PL_regcc)
3358 		    PL_regcc->cur = ln;
3359 		PL_regcc = cc;
3360 		cc->cur = n - 1;
3361 		cc->lastloc = lastloc;
3362 		CACHEsayNO;
3363 	    }
3364 	    /* NOT REACHED */
3365 	case BRANCHJ:
3366 	    next = scan + ARG(scan);
3367 	    if (next == scan)
3368 		next = NULL;
3369 	    inner = NEXTOPER(NEXTOPER(scan));
3370 	    goto do_branch;
3371 	case BRANCH:
3372 	    inner = NEXTOPER(scan);
3373 	  do_branch:
3374 	    {
3375 		c1 = OP(scan);
3376 		if (OP(next) != c1)	/* No choice. */
3377 		    next = inner;	/* Avoid recursion. */
3378 		else {
3379 		    const I32 lastparen = *PL_reglastparen;
3380 		    I32 unwind1;
3381 		    re_unwind_branch_t *uw;
3382 
3383 		    /* Put unwinding data on stack */
3384 		    unwind1 = SSNEWt(1,re_unwind_branch_t);
3385 		    uw = SSPTRt(unwind1,re_unwind_branch_t);
3386 		    uw->prev = unwind;
3387 		    unwind = unwind1;
3388 		    uw->type = ((c1 == BRANCH)
3389 				? RE_UNWIND_BRANCH
3390 				: RE_UNWIND_BRANCHJ);
3391 		    uw->lastparen = lastparen;
3392 		    uw->next = next;
3393 		    uw->locinput = locinput;
3394 		    uw->nextchr = nextchr;
3395 #ifdef DEBUGGING
3396 		    uw->regindent = ++PL_regindent;
3397 #endif
3398 
3399 		    REGCP_SET(uw->lastcp);
3400 
3401 		    /* Now go into the first branch */
3402 		    next = inner;
3403 		}
3404 	    }
3405 	    break;
3406 	case MINMOD:
3407 	    minmod = 1;
3408 	    break;
3409 	case CURLYM:
3410 	{
3411 	    I32 l = 0;
3412 	    CHECKPOINT lastcp;
3413 
3414 	    /* We suppose that the next guy does not need
3415 	       backtracking: in particular, it is of constant non-zero length,
3416 	       and has no parenths to influence future backrefs. */
3417 	    ln = ARG1(scan);  /* min to match */
3418 	    n  = ARG2(scan);  /* max to match */
3419 	    paren = scan->flags;
3420 	    if (paren) {
3421 		if (paren > PL_regsize)
3422 		    PL_regsize = paren;
3423 		if (paren > (I32)*PL_reglastparen)
3424 		    *PL_reglastparen = paren;
3425 	    }
3426 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3427 	    if (paren)
3428 		scan += NEXT_OFF(scan); /* Skip former OPEN. */
3429 	    PL_reginput = locinput;
3430 	    if (minmod) {
3431 		minmod = 0;
3432 		if (ln && regrepeat_hard(scan, ln, &l) < ln)
3433 		    sayNO;
3434 		locinput = PL_reginput;
3435 		if (HAS_TEXT(next) || JUMPABLE(next)) {
3436 		    regnode *text_node = next;
3437 
3438 		    if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3439 
3440 		    if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3441 		    else {
3442 			if (PL_regkind[(U8)OP(text_node)] == REF) {
3443 			    c1 = c2 = -1000;
3444 			    goto assume_ok_MM;
3445 			}
3446 			else { c1 = (U8)*STRING(text_node); }
3447 			if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3448 			    c2 = PL_fold[c1];
3449 			else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3450 			    c2 = PL_fold_locale[c1];
3451 			else
3452 			    c2 = c1;
3453 		    }
3454 		}
3455 		else
3456 		    c1 = c2 = -1000;
3457 	    assume_ok_MM:
3458 		REGCP_SET(lastcp);
3459 		while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3460 		    /* If it could work, try it. */
3461 		    if (c1 == -1000 ||
3462 			UCHARAT(PL_reginput) == c1 ||
3463 			UCHARAT(PL_reginput) == c2)
3464 		    {
3465 			if (paren) {
3466 			    if (ln) {
3467 				PL_regstartp[paren] =
3468 				    HOPc(PL_reginput, -l) - PL_bostr;
3469 				PL_regendp[paren] = PL_reginput - PL_bostr;
3470 			    }
3471 			    else
3472 				PL_regendp[paren] = -1;
3473 			}
3474 			if (regmatch(next))
3475 			    sayYES;
3476 			REGCP_UNWIND(lastcp);
3477 		    }
3478 		    /* Couldn't or didn't -- move forward. */
3479 		    PL_reginput = locinput;
3480 		    if (regrepeat_hard(scan, 1, &l)) {
3481 			ln++;
3482 			locinput = PL_reginput;
3483 		    }
3484 		    else
3485 			sayNO;
3486 		}
3487 	    }
3488 	    else {
3489 		n = regrepeat_hard(scan, n, &l);
3490 		locinput = PL_reginput;
3491 		DEBUG_r(
3492 		    PerlIO_printf(Perl_debug_log,
3493 				  "%*s  matched %"IVdf" times, len=%"IVdf"...\n",
3494 				  (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3495 				  (IV) n, (IV)l)
3496 		    );
3497 		if (n >= ln) {
3498 		    if (HAS_TEXT(next) || JUMPABLE(next)) {
3499 			regnode *text_node = next;
3500 
3501 			if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3502 
3503 			if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3504 			else {
3505 			    if (PL_regkind[(U8)OP(text_node)] == REF) {
3506 				c1 = c2 = -1000;
3507 				goto assume_ok_REG;
3508 			    }
3509 			    else { c1 = (U8)*STRING(text_node); }
3510 
3511 			    if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3512 				c2 = PL_fold[c1];
3513 			    else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3514 				c2 = PL_fold_locale[c1];
3515 			    else
3516 				c2 = c1;
3517 			}
3518 		    }
3519 		    else
3520 			c1 = c2 = -1000;
3521 		}
3522 	    assume_ok_REG:
3523 		REGCP_SET(lastcp);
3524 		while (n >= ln) {
3525 		    /* If it could work, try it. */
3526 		    if (c1 == -1000 ||
3527 			UCHARAT(PL_reginput) == c1 ||
3528 			UCHARAT(PL_reginput) == c2)
3529 		    {
3530 			DEBUG_r(
3531 				PerlIO_printf(Perl_debug_log,
3532 					      "%*s  trying tail with n=%"IVdf"...\n",
3533 					      (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3534 			    );
3535 			if (paren) {
3536 			    if (n) {
3537 				PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3538 				PL_regendp[paren] = PL_reginput - PL_bostr;
3539 			    }
3540 			    else
3541 				PL_regendp[paren] = -1;
3542 			}
3543 			if (regmatch(next))
3544 			    sayYES;
3545 			REGCP_UNWIND(lastcp);
3546 		    }
3547 		    /* Couldn't or didn't -- back up. */
3548 		    n--;
3549 		    locinput = HOPc(locinput, -l);
3550 		    PL_reginput = locinput;
3551 		}
3552 	    }
3553 	    sayNO;
3554 	    break;
3555 	}
3556 	case CURLYN:
3557 	    paren = scan->flags;	/* Which paren to set */
3558 	    if (paren > PL_regsize)
3559 		PL_regsize = paren;
3560 	    if (paren > (I32)*PL_reglastparen)
3561 		*PL_reglastparen = paren;
3562 	    ln = ARG1(scan);  /* min to match */
3563 	    n  = ARG2(scan);  /* max to match */
3564             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3565 	    goto repeat;
3566 	case CURLY:
3567 	    paren = 0;
3568 	    ln = ARG1(scan);  /* min to match */
3569 	    n  = ARG2(scan);  /* max to match */
3570 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3571 	    goto repeat;
3572 	case STAR:
3573 	    ln = 0;
3574 	    n = REG_INFTY;
3575 	    scan = NEXTOPER(scan);
3576 	    paren = 0;
3577 	    goto repeat;
3578 	case PLUS:
3579 	    ln = 1;
3580 	    n = REG_INFTY;
3581 	    scan = NEXTOPER(scan);
3582 	    paren = 0;
3583 	  repeat:
3584 	    /*
3585 	    * Lookahead to avoid useless match attempts
3586 	    * when we know what character comes next.
3587 	    */
3588 
3589 	    /*
3590 	    * Used to only do .*x and .*?x, but now it allows
3591 	    * for )'s, ('s and (?{ ... })'s to be in the way
3592 	    * of the quantifier and the EXACT-like node.  -- japhy
3593 	    */
3594 
3595 	    if (HAS_TEXT(next) || JUMPABLE(next)) {
3596 		U8 *s;
3597 		regnode *text_node = next;
3598 
3599 		if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3600 
3601 		if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3602 		else {
3603 		    if (PL_regkind[(U8)OP(text_node)] == REF) {
3604 			c1 = c2 = -1000;
3605 			goto assume_ok_easy;
3606 		    }
3607 		    else { s = (U8*)STRING(text_node); }
3608 
3609 		    if (!UTF) {
3610 			c2 = c1 = *s;
3611 			if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3612 			    c2 = PL_fold[c1];
3613 			else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3614 			    c2 = PL_fold_locale[c1];
3615 		    }
3616 		    else { /* UTF */
3617 			if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
3618 			     STRLEN ulen1, ulen2;
3619 			     U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3620 			     U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3621 
3622 			     to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3623 			     to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3624 
3625 			     c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
3626 						 uniflags);
3627 			     c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
3628 						 uniflags);
3629 			}
3630 			else {
3631 			    c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
3632 						     uniflags);
3633 			}
3634 		    }
3635 		}
3636 	    }
3637 	    else
3638 		c1 = c2 = -1000;
3639 	assume_ok_easy:
3640 	    PL_reginput = locinput;
3641 	    if (minmod) {
3642 		CHECKPOINT lastcp;
3643 		minmod = 0;
3644 		if (ln && regrepeat(scan, ln) < ln)
3645 		    sayNO;
3646 		locinput = PL_reginput;
3647 		REGCP_SET(lastcp);
3648 		if (c1 != -1000) {
3649 		    char *e; /* Should not check after this */
3650 		    char *old = locinput;
3651 		    int count = 0;
3652 
3653 		    if  (n == REG_INFTY) {
3654 			e = PL_regeol - 1;
3655 			if (do_utf8)
3656 			    while (UTF8_IS_CONTINUATION(*(U8*)e))
3657 				e--;
3658 		    }
3659 		    else if (do_utf8) {
3660 			int m = n - ln;
3661 			for (e = locinput;
3662 			     m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3663 			    e += UTF8SKIP(e);
3664 		    }
3665 		    else {
3666 			e = locinput + n - ln;
3667 			if (e >= PL_regeol)
3668 			    e = PL_regeol - 1;
3669 		    }
3670 		    while (1) {
3671 			/* Find place 'next' could work */
3672 			if (!do_utf8) {
3673 			    if (c1 == c2) {
3674 				while (locinput <= e &&
3675 				       UCHARAT(locinput) != c1)
3676 				    locinput++;
3677 			    } else {
3678 				while (locinput <= e
3679 				       && UCHARAT(locinput) != c1
3680 				       && UCHARAT(locinput) != c2)
3681 				    locinput++;
3682 			    }
3683 			    count = locinput - old;
3684 			}
3685 			else {
3686 			    if (c1 == c2) {
3687 				STRLEN len;
3688 				/* count initialised to
3689 				 * utf8_distance(old, locinput) */
3690 				while (locinput <= e &&
3691 				       utf8n_to_uvchr((U8*)locinput,
3692 						      UTF8_MAXBYTES, &len,
3693 						      uniflags) != (UV)c1) {
3694 				    locinput += len;
3695 				    count++;
3696 				}
3697 			    } else {
3698 				STRLEN len;
3699 				/* count initialised to
3700 				 * utf8_distance(old, locinput) */
3701 				while (locinput <= e) {
3702 				    UV c = utf8n_to_uvchr((U8*)locinput,
3703 							  UTF8_MAXBYTES, &len,
3704 							  uniflags);
3705 				    if (c == (UV)c1 || c == (UV)c2)
3706 					break;
3707 				    locinput += len;
3708 				    count++;
3709 				}
3710 			    }
3711 			}
3712 			if (locinput > e)
3713 			    sayNO;
3714 			/* PL_reginput == old now */
3715 			if (locinput != old) {
3716 			    ln = 1;	/* Did some */
3717 			    if (regrepeat(scan, count) < count)
3718 				sayNO;
3719 			}
3720 			/* PL_reginput == locinput now */
3721 			TRYPAREN(paren, ln, locinput);
3722 			PL_reginput = locinput;	/* Could be reset... */
3723 			REGCP_UNWIND(lastcp);
3724 			/* Couldn't or didn't -- move forward. */
3725 			old = locinput;
3726 			if (do_utf8)
3727 			    locinput += UTF8SKIP(locinput);
3728 			else
3729 			    locinput++;
3730 			count = 1;
3731 		    }
3732 		}
3733 		else
3734 		while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3735 		    UV c;
3736 		    if (c1 != -1000) {
3737 			if (do_utf8)
3738 			    c = utf8n_to_uvchr((U8*)PL_reginput,
3739 					       UTF8_MAXBYTES, 0,
3740 					       uniflags);
3741 			else
3742 			    c = UCHARAT(PL_reginput);
3743 			/* If it could work, try it. */
3744 		        if (c == (UV)c1 || c == (UV)c2)
3745 		        {
3746 			    TRYPAREN(paren, ln, PL_reginput);
3747 			    REGCP_UNWIND(lastcp);
3748 		        }
3749 		    }
3750 		    /* If it could work, try it. */
3751 		    else if (c1 == -1000)
3752 		    {
3753 			TRYPAREN(paren, ln, PL_reginput);
3754 			REGCP_UNWIND(lastcp);
3755 		    }
3756 		    /* Couldn't or didn't -- move forward. */
3757 		    PL_reginput = locinput;
3758 		    if (regrepeat(scan, 1)) {
3759 			ln++;
3760 			locinput = PL_reginput;
3761 		    }
3762 		    else
3763 			sayNO;
3764 		}
3765 	    }
3766 	    else {
3767 		CHECKPOINT lastcp;
3768 		n = regrepeat(scan, n);
3769 		locinput = PL_reginput;
3770 		if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3771 		    ((!PL_multiline && OP(next) != MEOL) ||
3772 			OP(next) == SEOL || OP(next) == EOS))
3773 		{
3774 		    ln = n;			/* why back off? */
3775 		    /* ...because $ and \Z can match before *and* after
3776 		       newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
3777 		       We should back off by one in this case. */
3778 		    if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3779 			ln--;
3780 		}
3781 		REGCP_SET(lastcp);
3782 		if (paren) {
3783 		    UV c = 0;
3784 		    while (n >= ln) {
3785 			if (c1 != -1000) {
3786 			    if (do_utf8)
3787 				c = utf8n_to_uvchr((U8*)PL_reginput,
3788 						   UTF8_MAXBYTES, 0,
3789 						   uniflags);
3790 			    else
3791 				c = UCHARAT(PL_reginput);
3792 			}
3793 			/* If it could work, try it. */
3794 			if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
3795 			    {
3796 				TRYPAREN(paren, n, PL_reginput);
3797 				REGCP_UNWIND(lastcp);
3798 			    }
3799 			/* Couldn't or didn't -- back up. */
3800 			n--;
3801 			PL_reginput = locinput = HOPc(locinput, -1);
3802 		    }
3803 		}
3804 		else {
3805 		    UV c = 0;
3806 		    while (n >= ln) {
3807 			if (c1 != -1000) {
3808 			    if (do_utf8)
3809 				c = utf8n_to_uvchr((U8*)PL_reginput,
3810 						   UTF8_MAXBYTES, 0,
3811 						   uniflags);
3812 			    else
3813 				c = UCHARAT(PL_reginput);
3814 			}
3815 			/* If it could work, try it. */
3816 			if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
3817 			    {
3818 				TRYPAREN(paren, n, PL_reginput);
3819 				REGCP_UNWIND(lastcp);
3820 			    }
3821 			/* Couldn't or didn't -- back up. */
3822 			n--;
3823 			PL_reginput = locinput = HOPc(locinput, -1);
3824 		    }
3825 		}
3826 	    }
3827 	    sayNO;
3828 	    break;
3829 	case END:
3830 	    if (PL_reg_call_cc) {
3831 		re_cc_state *cur_call_cc = PL_reg_call_cc;
3832 		CURCUR *cctmp = PL_regcc;
3833 		regexp *re = PL_reg_re;
3834 		CHECKPOINT cp, lastcp;
3835 
3836 		cp = regcppush(0);	/* Save *all* the positions. */
3837 		REGCP_SET(lastcp);
3838 		regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3839 						    the caller. */
3840 		PL_reginput = locinput;	/* Make position available to
3841 					   the callcc. */
3842 		cache_re(PL_reg_call_cc->re);
3843 		PL_regcc = PL_reg_call_cc->cc;
3844 		PL_reg_call_cc = PL_reg_call_cc->prev;
3845 		if (regmatch(cur_call_cc->node)) {
3846 		    PL_reg_call_cc = cur_call_cc;
3847 		    regcpblow(cp);
3848 		    sayYES;
3849 		}
3850 		REGCP_UNWIND(lastcp);
3851 		regcppop();
3852 		PL_reg_call_cc = cur_call_cc;
3853 		PL_regcc = cctmp;
3854 		PL_reg_re = re;
3855 		cache_re(re);
3856 
3857 		DEBUG_r(
3858 		    PerlIO_printf(Perl_debug_log,
3859 				  "%*s  continuation failed...\n",
3860 				  REPORT_CODE_OFF+PL_regindent*2, "")
3861 		    );
3862 		sayNO_SILENT;
3863 	    }
3864 	    if (locinput < PL_regtill) {
3865 		DEBUG_r(PerlIO_printf(Perl_debug_log,
3866 				      "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3867 				      PL_colors[4],
3868 				      (long)(locinput - PL_reg_starttry),
3869 				      (long)(PL_regtill - PL_reg_starttry),
3870 				      PL_colors[5]));
3871 		sayNO_FINAL;		/* Cannot match: too short. */
3872 	    }
3873 	    PL_reginput = locinput;	/* put where regtry can find it */
3874 	    sayYES_FINAL;		/* Success! */
3875 	case SUCCEED:
3876 	    PL_reginput = locinput;	/* put where regtry can find it */
3877 	    sayYES_LOUD;		/* Success! */
3878 	case SUSPEND:
3879 	    n = 1;
3880 	    PL_reginput = locinput;
3881 	    goto do_ifmatch;
3882 	case UNLESSM:
3883 	    n = 0;
3884 	    if (scan->flags) {
3885 		s = HOPBACKc(locinput, scan->flags);
3886 		if (!s)
3887 		    goto say_yes;
3888 		PL_reginput = s;
3889 	    }
3890 	    else
3891 		PL_reginput = locinput;
3892 	    goto do_ifmatch;
3893 	case IFMATCH:
3894 	    n = 1;
3895 	    if (scan->flags) {
3896 		s = HOPBACKc(locinput, scan->flags);
3897 		if (!s)
3898 		    goto say_no;
3899 		PL_reginput = s;
3900 	    }
3901 	    else
3902 		PL_reginput = locinput;
3903 
3904 	  do_ifmatch:
3905 	    inner = NEXTOPER(NEXTOPER(scan));
3906 	    if (regmatch(inner) != n) {
3907 	      say_no:
3908 		if (logical) {
3909 		    logical = 0;
3910 		    sw = 0;
3911 		    goto do_longjump;
3912 		}
3913 		else
3914 		    sayNO;
3915 	    }
3916 	  say_yes:
3917 	    if (logical) {
3918 		logical = 0;
3919 		sw = 1;
3920 	    }
3921 	    if (OP(scan) == SUSPEND) {
3922 		locinput = PL_reginput;
3923 		nextchr = UCHARAT(locinput);
3924 	    }
3925 	    /* FALL THROUGH. */
3926 	case LONGJMP:
3927 	  do_longjump:
3928 	    next = scan + ARG(scan);
3929 	    if (next == scan)
3930 		next = NULL;
3931 	    break;
3932 	default:
3933 	    PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3934 			  PTR2UV(scan), OP(scan));
3935 	    Perl_croak(aTHX_ "regexp memory corruption");
3936 	}
3937       reenter:
3938 	scan = next;
3939     }
3940 
3941     /*
3942     * We get here only if there's trouble -- normally "case END" is
3943     * the terminating point.
3944     */
3945     Perl_croak(aTHX_ "corrupted regexp pointers");
3946     /*NOTREACHED*/
3947     sayNO;
3948 
3949 yes_loud:
3950     DEBUG_r(
3951 	PerlIO_printf(Perl_debug_log,
3952 		      "%*s  %scould match...%s\n",
3953 		      REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3954 	);
3955     goto yes;
3956 yes_final:
3957     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3958 			  PL_colors[4],PL_colors[5]));
3959 yes:
3960 #ifdef DEBUGGING
3961     PL_regindent--;
3962 #endif
3963 
3964 #if 0					/* Breaks $^R */
3965     if (unwind)
3966 	regcpblow(firstcp);
3967 #endif
3968     return 1;
3969 
3970 no:
3971     DEBUG_r(
3972 	PerlIO_printf(Perl_debug_log,
3973 		      "%*s  %sfailed...%s\n",
3974 		      REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3975 	);
3976     goto do_no;
3977 no_final:
3978 do_no:
3979     if (unwind) {
3980 	re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3981 
3982 	switch (uw->type) {
3983 	case RE_UNWIND_BRANCH:
3984 	case RE_UNWIND_BRANCHJ:
3985 	{
3986 	    re_unwind_branch_t *uwb = &(uw->branch);
3987 	    const I32 lastparen = uwb->lastparen;
3988 
3989 	    REGCP_UNWIND(uwb->lastcp);
3990 	    for (n = *PL_reglastparen; n > lastparen; n--)
3991 		PL_regendp[n] = -1;
3992 	    *PL_reglastparen = n;
3993 	    scan = next = uwb->next;
3994 	    if ( !scan ||
3995 		 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3996 			      ? BRANCH : BRANCHJ) ) {		/* Failure */
3997 		unwind = uwb->prev;
3998 #ifdef DEBUGGING
3999 		PL_regindent--;
4000 #endif
4001 		goto do_no;
4002 	    }
4003 	    /* Have more choice yet.  Reuse the same uwb.  */
4004 	    if ((n = (uwb->type == RE_UNWIND_BRANCH
4005 		      ? NEXT_OFF(next) : ARG(next))))
4006 		next += n;
4007 	    else
4008 		next = NULL;	/* XXXX Needn't unwinding in this case... */
4009 	    uwb->next = next;
4010 	    next = NEXTOPER(scan);
4011 	    if (uwb->type == RE_UNWIND_BRANCHJ)
4012 		next = NEXTOPER(next);
4013 	    locinput = uwb->locinput;
4014 	    nextchr = uwb->nextchr;
4015 #ifdef DEBUGGING
4016 	    PL_regindent = uwb->regindent;
4017 #endif
4018 
4019 	    goto reenter;
4020 	}
4021 	/* NOT REACHED */
4022 	default:
4023 	    Perl_croak(aTHX_ "regexp unwind memory corruption");
4024 	}
4025 	/* NOT REACHED */
4026     }
4027 #ifdef DEBUGGING
4028     PL_regindent--;
4029 #endif
4030     return 0;
4031 }
4032 
4033 /*
4034  - regrepeat - repeatedly match something simple, report how many
4035  */
4036 /*
4037  * [This routine now assumes that it will only match on things of length 1.
4038  * That was true before, but now we assume scan - reginput is the count,
4039  * rather than incrementing count on every character.  [Er, except utf8.]]
4040  */
4041 STATIC I32
4042 S_regrepeat(pTHX_ const regnode *p, I32 max)
4043 {
4044     register char *scan;
4045     register I32 c;
4046     register char *loceol = PL_regeol;
4047     register I32 hardcount = 0;
4048     register bool do_utf8 = PL_reg_match_utf8;
4049 
4050     scan = PL_reginput;
4051     if (max == REG_INFTY)
4052 	max = I32_MAX;
4053     else if (max < loceol - scan)
4054       loceol = scan + max;
4055     switch (OP(p)) {
4056     case REG_ANY:
4057 	if (do_utf8) {
4058 	    loceol = PL_regeol;
4059 	    while (scan < loceol && hardcount < max && *scan != '\n') {
4060 		scan += UTF8SKIP(scan);
4061 		hardcount++;
4062 	    }
4063 	} else {
4064 	    while (scan < loceol && *scan != '\n')
4065 		scan++;
4066 	}
4067 	break;
4068     case SANY:
4069         if (do_utf8) {
4070 	    loceol = PL_regeol;
4071 	    while (scan < loceol && hardcount < max) {
4072 	        scan += UTF8SKIP(scan);
4073 		hardcount++;
4074 	    }
4075 	}
4076 	else
4077 	    scan = loceol;
4078 	break;
4079     case CANY:
4080 	scan = loceol;
4081 	break;
4082     case EXACT:		/* length of string is 1 */
4083 	c = (U8)*STRING(p);
4084 	while (scan < loceol && UCHARAT(scan) == c)
4085 	    scan++;
4086 	break;
4087     case EXACTF:	/* length of string is 1 */
4088 	c = (U8)*STRING(p);
4089 	while (scan < loceol &&
4090 	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4091 	    scan++;
4092 	break;
4093     case EXACTFL:	/* length of string is 1 */
4094 	PL_reg_flags |= RF_tainted;
4095 	c = (U8)*STRING(p);
4096 	while (scan < loceol &&
4097 	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4098 	    scan++;
4099 	break;
4100     case ANYOF:
4101 	if (do_utf8) {
4102 	    loceol = PL_regeol;
4103 	    while (hardcount < max && scan < loceol &&
4104 		   reginclass(p, (U8*)scan, 0, do_utf8)) {
4105 		scan += UTF8SKIP(scan);
4106 		hardcount++;
4107 	    }
4108 	} else {
4109 	    while (scan < loceol && REGINCLASS(p, (U8*)scan))
4110 		scan++;
4111 	}
4112 	break;
4113     case ALNUM:
4114 	if (do_utf8) {
4115 	    loceol = PL_regeol;
4116 	    LOAD_UTF8_CHARCLASS_ALNUM();
4117 	    while (hardcount < max && scan < loceol &&
4118 		   swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4119 		scan += UTF8SKIP(scan);
4120 		hardcount++;
4121 	    }
4122 	} else {
4123 	    while (scan < loceol && isALNUM(*scan))
4124 		scan++;
4125 	}
4126 	break;
4127     case ALNUML:
4128 	PL_reg_flags |= RF_tainted;
4129 	if (do_utf8) {
4130 	    loceol = PL_regeol;
4131 	    while (hardcount < max && scan < loceol &&
4132 		   isALNUM_LC_utf8((U8*)scan)) {
4133 		scan += UTF8SKIP(scan);
4134 		hardcount++;
4135 	    }
4136 	} else {
4137 	    while (scan < loceol && isALNUM_LC(*scan))
4138 		scan++;
4139 	}
4140 	break;
4141     case NALNUM:
4142 	if (do_utf8) {
4143 	    loceol = PL_regeol;
4144 	    LOAD_UTF8_CHARCLASS_ALNUM();
4145 	    while (hardcount < max && scan < loceol &&
4146 		   !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4147 		scan += UTF8SKIP(scan);
4148 		hardcount++;
4149 	    }
4150 	} else {
4151 	    while (scan < loceol && !isALNUM(*scan))
4152 		scan++;
4153 	}
4154 	break;
4155     case NALNUML:
4156 	PL_reg_flags |= RF_tainted;
4157 	if (do_utf8) {
4158 	    loceol = PL_regeol;
4159 	    while (hardcount < max && scan < loceol &&
4160 		   !isALNUM_LC_utf8((U8*)scan)) {
4161 		scan += UTF8SKIP(scan);
4162 		hardcount++;
4163 	    }
4164 	} else {
4165 	    while (scan < loceol && !isALNUM_LC(*scan))
4166 		scan++;
4167 	}
4168 	break;
4169     case SPACE:
4170 	if (do_utf8) {
4171 	    loceol = PL_regeol;
4172 	    LOAD_UTF8_CHARCLASS_SPACE();
4173 	    while (hardcount < max && scan < loceol &&
4174 		   (*scan == ' ' ||
4175 		    swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4176 		scan += UTF8SKIP(scan);
4177 		hardcount++;
4178 	    }
4179 	} else {
4180 	    while (scan < loceol && isSPACE(*scan))
4181 		scan++;
4182 	}
4183 	break;
4184     case SPACEL:
4185 	PL_reg_flags |= RF_tainted;
4186 	if (do_utf8) {
4187 	    loceol = PL_regeol;
4188 	    while (hardcount < max && scan < loceol &&
4189 		   (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4190 		scan += UTF8SKIP(scan);
4191 		hardcount++;
4192 	    }
4193 	} else {
4194 	    while (scan < loceol && isSPACE_LC(*scan))
4195 		scan++;
4196 	}
4197 	break;
4198     case NSPACE:
4199 	if (do_utf8) {
4200 	    loceol = PL_regeol;
4201 	    LOAD_UTF8_CHARCLASS_SPACE();
4202 	    while (hardcount < max && scan < loceol &&
4203 		   !(*scan == ' ' ||
4204 		     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4205 		scan += UTF8SKIP(scan);
4206 		hardcount++;
4207 	    }
4208 	} else {
4209 	    while (scan < loceol && !isSPACE(*scan))
4210 		scan++;
4211 	    break;
4212 	}
4213     case NSPACEL:
4214 	PL_reg_flags |= RF_tainted;
4215 	if (do_utf8) {
4216 	    loceol = PL_regeol;
4217 	    while (hardcount < max && scan < loceol &&
4218 		   !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4219 		scan += UTF8SKIP(scan);
4220 		hardcount++;
4221 	    }
4222 	} else {
4223 	    while (scan < loceol && !isSPACE_LC(*scan))
4224 		scan++;
4225 	}
4226 	break;
4227     case DIGIT:
4228 	if (do_utf8) {
4229 	    loceol = PL_regeol;
4230 	    LOAD_UTF8_CHARCLASS_DIGIT();
4231 	    while (hardcount < max && scan < loceol &&
4232 		   swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4233 		scan += UTF8SKIP(scan);
4234 		hardcount++;
4235 	    }
4236 	} else {
4237 	    while (scan < loceol && isDIGIT(*scan))
4238 		scan++;
4239 	}
4240 	break;
4241     case NDIGIT:
4242 	if (do_utf8) {
4243 	    loceol = PL_regeol;
4244 	    LOAD_UTF8_CHARCLASS_DIGIT();
4245 	    while (hardcount < max && scan < loceol &&
4246 		   !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4247 		scan += UTF8SKIP(scan);
4248 		hardcount++;
4249 	    }
4250 	} else {
4251 	    while (scan < loceol && !isDIGIT(*scan))
4252 		scan++;
4253 	}
4254 	break;
4255     default:		/* Called on something of 0 width. */
4256 	break;		/* So match right here or not at all. */
4257     }
4258 
4259     if (hardcount)
4260 	c = hardcount;
4261     else
4262 	c = scan - PL_reginput;
4263     PL_reginput = scan;
4264 
4265     DEBUG_r(
4266 	{
4267 		SV *prop = sv_newmortal();
4268 
4269 		regprop(prop, (regnode *)p);
4270 		PerlIO_printf(Perl_debug_log,
4271 			      "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
4272 			      REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
4273 	});
4274 
4275     return(c);
4276 }
4277 
4278 /*
4279  - regrepeat_hard - repeatedly match something, report total lenth and length
4280  *
4281  * The repeater is supposed to have constant non-zero length.
4282  */
4283 
4284 STATIC I32
4285 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
4286 {
4287     register char *scan = Nullch;
4288     register char *start;
4289     register char *loceol = PL_regeol;
4290     I32 l = 0;
4291     I32 count = 0, res = 1;
4292 
4293     if (!max)
4294 	return 0;
4295 
4296     start = PL_reginput;
4297     if (PL_reg_match_utf8) {
4298 	while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4299 	    if (!count++) {
4300 		l = 0;
4301 		while (start < PL_reginput) {
4302 		    l++;
4303 		    start += UTF8SKIP(start);
4304 		}
4305 		*lp = l;
4306 		if (l == 0)
4307 		    return max;
4308 	    }
4309 	    if (count == max)
4310 		return count;
4311 	}
4312     }
4313     else {
4314 	while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4315 	    if (!count++) {
4316 		*lp = l = PL_reginput - start;
4317 		if (max != REG_INFTY && l*max < loceol - scan)
4318 		    loceol = scan + l*max;
4319 		if (l == 0)
4320 		    return max;
4321 	    }
4322 	}
4323     }
4324     if (!res)
4325 	PL_reginput = scan;
4326 
4327     return count;
4328 }
4329 
4330 /*
4331 - regclass_swash - prepare the utf8 swash
4332 */
4333 
4334 SV *
4335 Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
4336 {
4337     SV *sw  = NULL;
4338     SV *si  = NULL;
4339     SV *alt = NULL;
4340 
4341     if (PL_regdata && PL_regdata->count) {
4342 	const U32 n = ARG(node);
4343 
4344 	if (PL_regdata->what[n] == 's') {
4345 	    SV * const rv = (SV*)PL_regdata->data[n];
4346 	    AV * const av = (AV*)SvRV((SV*)rv);
4347 	    SV **const ary = AvARRAY(av);
4348 	    SV **a, **b;
4349 
4350 	    /* See the end of regcomp.c:S_reglass() for
4351 	     * documentation of these array elements. */
4352 
4353 	    si = *ary;
4354 	    a  = SvROK(ary[1]) ? &ary[1] : 0;
4355 	    b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4356 
4357 	    if (a)
4358 		sw = *a;
4359 	    else if (si && doinit) {
4360 		sw = swash_init("utf8", "", si, 1, 0);
4361 		(void)av_store(av, 1, sw);
4362 	    }
4363 	    if (b)
4364 	        alt = *b;
4365 	}
4366     }
4367 
4368     if (listsvp)
4369 	*listsvp = si;
4370     if (altsvp)
4371 	*altsvp  = alt;
4372 
4373     return sw;
4374 }
4375 
4376 /*
4377  - reginclass - determine if a character falls into a character class
4378 
4379   The n is the ANYOF regnode, the p is the target string, lenp
4380   is pointer to the maximum length of how far to go in the p
4381   (if the lenp is zero, UTF8SKIP(p) is used),
4382   do_utf8 tells whether the target string is in UTF-8.
4383 
4384  */
4385 
4386 STATIC bool
4387 S_reginclass(pTHX_ register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
4388 {
4389     const char flags = ANYOF_FLAGS(n);
4390     bool match = FALSE;
4391     UV c = *p;
4392     STRLEN len = 0;
4393     STRLEN plen;
4394 
4395     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
4396 	c = utf8n_to_uvchr((U8 *)p, UTF8_MAXBYTES, &len,
4397 			    ckWARN(WARN_UTF8) ? UTF8_CHECK_ONLY :
4398 					UTF8_ALLOW_ANYUV|UTF8_CHECK_ONLY);
4399 	if (len == (STRLEN)-1)
4400 	    Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
4401     }
4402 
4403     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
4404     if (do_utf8 || (flags & ANYOF_UNICODE)) {
4405         if (lenp)
4406 	    *lenp = 0;
4407 	if (do_utf8 && !ANYOF_RUNTIME(n)) {
4408 	    if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4409 		match = TRUE;
4410 	}
4411 	if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
4412 	    match = TRUE;
4413 	if (!match) {
4414 	    AV *av;
4415 	    SV * const sw = regclass_swash((regnode *)n, TRUE, 0, (SV**)&av);
4416 
4417 	    if (sw) {
4418 		if (swash_fetch(sw, (U8 *)p, do_utf8))
4419 		    match = TRUE;
4420 		else if (flags & ANYOF_FOLD) {
4421 		    if (!match && lenp && av) {
4422 		        I32 i;
4423 			for (i = 0; i <= av_len(av); i++) {
4424 			    SV* const sv = *av_fetch(av, i, FALSE);
4425 			    STRLEN len;
4426 			    const char * const s = SvPV_const(sv, len);
4427 
4428 			    if (len <= plen && memEQ(s, (char*)p, len)) {
4429 			        *lenp = len;
4430 				match = TRUE;
4431 				break;
4432 			    }
4433 			}
4434 		    }
4435 		    if (!match) {
4436 		        U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4437 			STRLEN tmplen;
4438 
4439 		        to_utf8_fold((U8 *)p, tmpbuf, &tmplen);
4440 			if (swash_fetch(sw, tmpbuf, do_utf8))
4441 			    match = TRUE;
4442 		    }
4443 		}
4444 	    }
4445 	}
4446 	if (match && lenp && *lenp == 0)
4447 	    *lenp = UNISKIP(NATIVE_TO_UNI(c));
4448     }
4449     if (!match && c < 256) {
4450 	if (ANYOF_BITMAP_TEST(n, c))
4451 	    match = TRUE;
4452 	else if (flags & ANYOF_FOLD) {
4453 	    U8 f;
4454 
4455 	    if (flags & ANYOF_LOCALE) {
4456 		PL_reg_flags |= RF_tainted;
4457 		f = PL_fold_locale[c];
4458 	    }
4459 	    else
4460 		f = PL_fold[c];
4461 	    if (f != c && ANYOF_BITMAP_TEST(n, f))
4462 		match = TRUE;
4463 	}
4464 
4465 	if (!match && (flags & ANYOF_CLASS)) {
4466 	    PL_reg_flags |= RF_tainted;
4467 	    if (
4468 		(ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
4469 		(ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
4470 		(ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
4471 		(ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
4472 		(ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
4473 		(ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
4474 		(ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
4475 		(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4476 		(ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
4477 		(ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
4478 		(ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
4479 		(ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
4480 		(ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
4481 		(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
4482 		(ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
4483 		(ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
4484 		(ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
4485 		(ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
4486 		(ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
4487 		(ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
4488 		(ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
4489 		(ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
4490 		(ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
4491 		(ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
4492 		(ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
4493 		(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
4494 		(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
4495 		(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
4496 		(ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
4497 		(ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
4498 		) /* How's that for a conditional? */
4499 	    {
4500 		match = TRUE;
4501 	    }
4502 	}
4503     }
4504 
4505     return (flags & ANYOF_INVERT) ? !match : match;
4506 }
4507 
4508 STATIC U8 *
4509 S_reghop(pTHX_ U8 *s, I32 off)
4510 {
4511     return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4512 }
4513 
4514 STATIC U8 *
4515 S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
4516 {
4517     if (off >= 0) {
4518 	while (off-- && s < lim) {
4519 	    /* XXX could check well-formedness here */
4520 	    s += UTF8SKIP(s);
4521 	}
4522     }
4523     else {
4524 	while (off++) {
4525 	    if (s > lim) {
4526 		s--;
4527 		if (UTF8_IS_CONTINUED(*s)) {
4528 		    while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4529 			s--;
4530 		}
4531 		/* XXX could check well-formedness here */
4532 	    }
4533 	}
4534     }
4535     return s;
4536 }
4537 
4538 STATIC U8 *
4539 S_reghopmaybe(pTHX_ U8 *s, I32 off)
4540 {
4541     return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4542 }
4543 
4544 STATIC U8 *
4545 S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
4546 {
4547     if (off >= 0) {
4548 	while (off-- && s < lim) {
4549 	    /* XXX could check well-formedness here */
4550 	    s += UTF8SKIP(s);
4551 	}
4552 	if (off >= 0)
4553 	    return 0;
4554     }
4555     else {
4556 	while (off++) {
4557 	    if (s > lim) {
4558 		s--;
4559 		if (UTF8_IS_CONTINUED(*s)) {
4560 		    while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4561 			s--;
4562 		}
4563 		/* XXX could check well-formedness here */
4564 	    }
4565 	    else
4566 		break;
4567 	}
4568 	if (off <= 0)
4569 	    return 0;
4570     }
4571     return s;
4572 }
4573 
4574 static void
4575 restore_pos(pTHX_ void *arg)
4576 {
4577     PERL_UNUSED_ARG(arg);
4578     if (PL_reg_eval_set) {
4579 	if (PL_reg_oldsaved) {
4580 	    PL_reg_re->subbeg = PL_reg_oldsaved;
4581 	    PL_reg_re->sublen = PL_reg_oldsavedlen;
4582 	    RX_MATCH_COPIED_on(PL_reg_re);
4583 	}
4584 	PL_reg_magic->mg_len = PL_reg_oldpos;
4585 	PL_reg_eval_set = 0;
4586 	PL_curpm = PL_reg_oldcurpm;
4587     }
4588 }
4589 
4590 STATIC void
4591 S_to_utf8_substr(pTHX_ register regexp *prog)
4592 {
4593     if (prog->float_substr && !prog->float_utf8) {
4594 	SV* sv;
4595 	prog->float_utf8 = sv = newSVsv(prog->float_substr);
4596 	sv_utf8_upgrade(sv);
4597 	if (SvTAIL(prog->float_substr))
4598 	    SvTAIL_on(sv);
4599 	if (prog->float_substr == prog->check_substr)
4600 	    prog->check_utf8 = sv;
4601     }
4602     if (prog->anchored_substr && !prog->anchored_utf8) {
4603 	SV* sv;
4604 	prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr);
4605 	sv_utf8_upgrade(sv);
4606 	if (SvTAIL(prog->anchored_substr))
4607 	    SvTAIL_on(sv);
4608 	if (prog->anchored_substr == prog->check_substr)
4609 	    prog->check_utf8 = sv;
4610     }
4611 }
4612 
4613 STATIC void
4614 S_to_byte_substr(pTHX_ register regexp *prog)
4615 {
4616     if (prog->float_utf8 && !prog->float_substr) {
4617 	SV* sv;
4618 	prog->float_substr = sv = newSVsv(prog->float_utf8);
4619 	if (sv_utf8_downgrade(sv, TRUE)) {
4620 	    if (SvTAIL(prog->float_utf8))
4621 		SvTAIL_on(sv);
4622 	} else {
4623 	    SvREFCNT_dec(sv);
4624 	    prog->float_substr = sv = &PL_sv_undef;
4625 	}
4626 	if (prog->float_utf8 == prog->check_utf8)
4627 	    prog->check_substr = sv;
4628     }
4629     if (prog->anchored_utf8 && !prog->anchored_substr) {
4630 	SV* sv;
4631 	prog->anchored_substr = sv = newSVsv(prog->anchored_utf8);
4632 	if (sv_utf8_downgrade(sv, TRUE)) {
4633 	    if (SvTAIL(prog->anchored_utf8))
4634 		SvTAIL_on(sv);
4635 	} else {
4636 	    SvREFCNT_dec(sv);
4637 	    prog->anchored_substr = sv = &PL_sv_undef;
4638 	}
4639 	if (prog->anchored_utf8 == prog->check_utf8)
4640 	    prog->check_substr = sv;
4641     }
4642 }
4643 
4644 /*
4645  * Local variables:
4646  * c-indentation-style: bsd
4647  * c-basic-offset: 4
4648  * indent-tabs-mode: t
4649  * End:
4650  *
4651  * ex: set ts=8 sts=4 sw=4 noet:
4652  */
4653