xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/regexec.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*    regexec.c
2*0Sstevel@tonic-gate  */
3*0Sstevel@tonic-gate 
4*0Sstevel@tonic-gate /*
5*0Sstevel@tonic-gate  * "One Ring to rule them all, One Ring to find them..."
6*0Sstevel@tonic-gate  */
7*0Sstevel@tonic-gate 
8*0Sstevel@tonic-gate /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9*0Sstevel@tonic-gate  * confused with the original package (see point 3 below).  Thanks, Henry!
10*0Sstevel@tonic-gate  */
11*0Sstevel@tonic-gate 
12*0Sstevel@tonic-gate /* Additional note: this code is very heavily munged from Henry's version
13*0Sstevel@tonic-gate  * in places.  In some spots I've traded clarity for efficiency, so don't
14*0Sstevel@tonic-gate  * blame Henry for some of the lack of readability.
15*0Sstevel@tonic-gate  */
16*0Sstevel@tonic-gate 
17*0Sstevel@tonic-gate /* The names of the functions have been changed from regcomp and
18*0Sstevel@tonic-gate  * regexec to  pregcomp and pregexec in order to avoid conflicts
19*0Sstevel@tonic-gate  * with the POSIX routines of the same names.
20*0Sstevel@tonic-gate */
21*0Sstevel@tonic-gate 
22*0Sstevel@tonic-gate #ifdef PERL_EXT_RE_BUILD
23*0Sstevel@tonic-gate /* need to replace pregcomp et al, so enable that */
24*0Sstevel@tonic-gate #  ifndef PERL_IN_XSUB_RE
25*0Sstevel@tonic-gate #    define PERL_IN_XSUB_RE
26*0Sstevel@tonic-gate #  endif
27*0Sstevel@tonic-gate /* need access to debugger hooks */
28*0Sstevel@tonic-gate #  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
29*0Sstevel@tonic-gate #    define DEBUGGING
30*0Sstevel@tonic-gate #  endif
31*0Sstevel@tonic-gate #endif
32*0Sstevel@tonic-gate 
33*0Sstevel@tonic-gate #ifdef PERL_IN_XSUB_RE
34*0Sstevel@tonic-gate /* We *really* need to overwrite these symbols: */
35*0Sstevel@tonic-gate #  define Perl_regexec_flags my_regexec
36*0Sstevel@tonic-gate #  define Perl_regdump my_regdump
37*0Sstevel@tonic-gate #  define Perl_regprop my_regprop
38*0Sstevel@tonic-gate #  define Perl_re_intuit_start my_re_intuit_start
39*0Sstevel@tonic-gate /* *These* symbols are masked to allow static link. */
40*0Sstevel@tonic-gate #  define Perl_pregexec my_pregexec
41*0Sstevel@tonic-gate #  define Perl_reginitcolors my_reginitcolors
42*0Sstevel@tonic-gate #  define Perl_regclass_swash my_regclass_swash
43*0Sstevel@tonic-gate 
44*0Sstevel@tonic-gate #  define PERL_NO_GET_CONTEXT
45*0Sstevel@tonic-gate #endif
46*0Sstevel@tonic-gate 
47*0Sstevel@tonic-gate /*SUPPRESS 112*/
48*0Sstevel@tonic-gate /*
49*0Sstevel@tonic-gate  * pregcomp and pregexec -- regsub and regerror are not used in perl
50*0Sstevel@tonic-gate  *
51*0Sstevel@tonic-gate  *	Copyright (c) 1986 by University of Toronto.
52*0Sstevel@tonic-gate  *	Written by Henry Spencer.  Not derived from licensed software.
53*0Sstevel@tonic-gate  *
54*0Sstevel@tonic-gate  *	Permission is granted to anyone to use this software for any
55*0Sstevel@tonic-gate  *	purpose on any computer system, and to redistribute it freely,
56*0Sstevel@tonic-gate  *	subject to the following restrictions:
57*0Sstevel@tonic-gate  *
58*0Sstevel@tonic-gate  *	1. The author is not responsible for the consequences of use of
59*0Sstevel@tonic-gate  *		this software, no matter how awful, even if they arise
60*0Sstevel@tonic-gate  *		from defects in it.
61*0Sstevel@tonic-gate  *
62*0Sstevel@tonic-gate  *	2. The origin of this software must not be misrepresented, either
63*0Sstevel@tonic-gate  *		by explicit claim or by omission.
64*0Sstevel@tonic-gate  *
65*0Sstevel@tonic-gate  *	3. Altered versions must be plainly marked as such, and must not
66*0Sstevel@tonic-gate  *		be misrepresented as being the original software.
67*0Sstevel@tonic-gate  *
68*0Sstevel@tonic-gate  ****    Alterations to Henry's code are...
69*0Sstevel@tonic-gate  ****
70*0Sstevel@tonic-gate  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
71*0Sstevel@tonic-gate  ****    2000, 2001, 2002, 2003, 2004, by Larry Wall and others
72*0Sstevel@tonic-gate  ****
73*0Sstevel@tonic-gate  ****    You may distribute under the terms of either the GNU General Public
74*0Sstevel@tonic-gate  ****    License or the Artistic License, as specified in the README file.
75*0Sstevel@tonic-gate  *
76*0Sstevel@tonic-gate  * Beware that some of this code is subtly aware of the way operator
77*0Sstevel@tonic-gate  * precedence is structured in regular expressions.  Serious changes in
78*0Sstevel@tonic-gate  * regular-expression syntax might require a total rethink.
79*0Sstevel@tonic-gate  */
80*0Sstevel@tonic-gate #include "EXTERN.h"
81*0Sstevel@tonic-gate #define PERL_IN_REGEXEC_C
82*0Sstevel@tonic-gate #include "perl.h"
83*0Sstevel@tonic-gate 
84*0Sstevel@tonic-gate #include "regcomp.h"
85*0Sstevel@tonic-gate 
86*0Sstevel@tonic-gate #define RF_tainted	1		/* tainted information used? */
87*0Sstevel@tonic-gate #define RF_warned	2		/* warned about big count? */
88*0Sstevel@tonic-gate #define RF_evaled	4		/* Did an EVAL with setting? */
89*0Sstevel@tonic-gate #define RF_utf8		8		/* String contains multibyte chars? */
90*0Sstevel@tonic-gate #define RF_false	16		/* odd number of nested negatives */
91*0Sstevel@tonic-gate 
92*0Sstevel@tonic-gate #define UTF ((PL_reg_flags & RF_utf8) != 0)
93*0Sstevel@tonic-gate 
94*0Sstevel@tonic-gate #define RS_init		1		/* eval environment created */
95*0Sstevel@tonic-gate #define RS_set		2		/* replsv value is set */
96*0Sstevel@tonic-gate 
97*0Sstevel@tonic-gate #ifndef STATIC
98*0Sstevel@tonic-gate #define	STATIC	static
99*0Sstevel@tonic-gate #endif
100*0Sstevel@tonic-gate 
101*0Sstevel@tonic-gate #define REGINCLASS(p,c)  (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
102*0Sstevel@tonic-gate 
103*0Sstevel@tonic-gate /*
104*0Sstevel@tonic-gate  * Forwards.
105*0Sstevel@tonic-gate  */
106*0Sstevel@tonic-gate 
107*0Sstevel@tonic-gate #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
108*0Sstevel@tonic-gate #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
109*0Sstevel@tonic-gate 
110*0Sstevel@tonic-gate #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
111*0Sstevel@tonic-gate #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
112*0Sstevel@tonic-gate #define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
113*0Sstevel@tonic-gate #define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
114*0Sstevel@tonic-gate #define HOPc(pos,off) ((char*)HOP(pos,off))
115*0Sstevel@tonic-gate #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
116*0Sstevel@tonic-gate 
117*0Sstevel@tonic-gate #define HOPBACK(pos, off) (		\
118*0Sstevel@tonic-gate     (PL_reg_match_utf8)			\
119*0Sstevel@tonic-gate 	? reghopmaybe((U8*)pos, -off)	\
120*0Sstevel@tonic-gate     : (pos - off >= PL_bostr)		\
121*0Sstevel@tonic-gate 	? (U8*)(pos - off)		\
122*0Sstevel@tonic-gate     : (U8*)NULL				\
123*0Sstevel@tonic-gate )
124*0Sstevel@tonic-gate #define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
125*0Sstevel@tonic-gate 
126*0Sstevel@tonic-gate #define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
127*0Sstevel@tonic-gate #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
128*0Sstevel@tonic-gate #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
129*0Sstevel@tonic-gate #define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
130*0Sstevel@tonic-gate #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
131*0Sstevel@tonic-gate #define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
132*0Sstevel@tonic-gate 
133*0Sstevel@tonic-gate #define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) { ENTER; save_re_context(); (void)CAT2(is_utf8_, a)((U8*)b); LEAVE; } } STMT_END
134*0Sstevel@tonic-gate 
135*0Sstevel@tonic-gate /* for use after a quantifier and before an EXACT-like node -- japhy */
136*0Sstevel@tonic-gate #define JUMPABLE(rn) ( \
137*0Sstevel@tonic-gate     OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
138*0Sstevel@tonic-gate     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
139*0Sstevel@tonic-gate     OP(rn) == PLUS || OP(rn) == MINMOD || \
140*0Sstevel@tonic-gate     (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
141*0Sstevel@tonic-gate )
142*0Sstevel@tonic-gate 
143*0Sstevel@tonic-gate #define HAS_TEXT(rn) ( \
144*0Sstevel@tonic-gate     PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
145*0Sstevel@tonic-gate )
146*0Sstevel@tonic-gate 
147*0Sstevel@tonic-gate /*
148*0Sstevel@tonic-gate   Search for mandatory following text node; for lookahead, the text must
149*0Sstevel@tonic-gate   follow but for lookbehind (rn->flags != 0) we skip to the next step.
150*0Sstevel@tonic-gate */
151*0Sstevel@tonic-gate #define FIND_NEXT_IMPT(rn) STMT_START { \
152*0Sstevel@tonic-gate     while (JUMPABLE(rn)) \
153*0Sstevel@tonic-gate 	if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
154*0Sstevel@tonic-gate 	    rn = NEXTOPER(NEXTOPER(rn)); \
155*0Sstevel@tonic-gate 	else if (OP(rn) == PLUS) \
156*0Sstevel@tonic-gate 	    rn = NEXTOPER(rn); \
157*0Sstevel@tonic-gate 	else if (OP(rn) == IFMATCH) \
158*0Sstevel@tonic-gate 	    rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
159*0Sstevel@tonic-gate 	else rn += NEXT_OFF(rn); \
160*0Sstevel@tonic-gate } STMT_END
161*0Sstevel@tonic-gate 
162*0Sstevel@tonic-gate static void restore_pos(pTHX_ void *arg);
163*0Sstevel@tonic-gate 
164*0Sstevel@tonic-gate STATIC CHECKPOINT
S_regcppush(pTHX_ I32 parenfloor)165*0Sstevel@tonic-gate S_regcppush(pTHX_ I32 parenfloor)
166*0Sstevel@tonic-gate {
167*0Sstevel@tonic-gate     int retval = PL_savestack_ix;
168*0Sstevel@tonic-gate #define REGCP_PAREN_ELEMS 4
169*0Sstevel@tonic-gate     int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
170*0Sstevel@tonic-gate     int p;
171*0Sstevel@tonic-gate 
172*0Sstevel@tonic-gate     if (paren_elems_to_push < 0)
173*0Sstevel@tonic-gate 	Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
174*0Sstevel@tonic-gate 
175*0Sstevel@tonic-gate #define REGCP_OTHER_ELEMS 6
176*0Sstevel@tonic-gate     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
177*0Sstevel@tonic-gate     for (p = PL_regsize; p > parenfloor; p--) {
178*0Sstevel@tonic-gate /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
179*0Sstevel@tonic-gate 	SSPUSHINT(PL_regendp[p]);
180*0Sstevel@tonic-gate 	SSPUSHINT(PL_regstartp[p]);
181*0Sstevel@tonic-gate 	SSPUSHPTR(PL_reg_start_tmp[p]);
182*0Sstevel@tonic-gate 	SSPUSHINT(p);
183*0Sstevel@tonic-gate     }
184*0Sstevel@tonic-gate /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
185*0Sstevel@tonic-gate     SSPUSHINT(PL_regsize);
186*0Sstevel@tonic-gate     SSPUSHINT(*PL_reglastparen);
187*0Sstevel@tonic-gate     SSPUSHINT(*PL_reglastcloseparen);
188*0Sstevel@tonic-gate     SSPUSHPTR(PL_reginput);
189*0Sstevel@tonic-gate #define REGCP_FRAME_ELEMS 2
190*0Sstevel@tonic-gate /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
191*0Sstevel@tonic-gate  * are needed for the regexp context stack bookkeeping. */
192*0Sstevel@tonic-gate     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
193*0Sstevel@tonic-gate     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
194*0Sstevel@tonic-gate 
195*0Sstevel@tonic-gate     return retval;
196*0Sstevel@tonic-gate }
197*0Sstevel@tonic-gate 
198*0Sstevel@tonic-gate /* These are needed since we do not localize EVAL nodes: */
199*0Sstevel@tonic-gate #  define REGCP_SET(cp)  DEBUG_r(PerlIO_printf(Perl_debug_log,		\
200*0Sstevel@tonic-gate 			     "  Setting an EVAL scope, savestack=%"IVdf"\n",	\
201*0Sstevel@tonic-gate 			     (IV)PL_savestack_ix)); cp = PL_savestack_ix
202*0Sstevel@tonic-gate 
203*0Sstevel@tonic-gate #  define REGCP_UNWIND(cp)  DEBUG_r(cp != PL_savestack_ix ?		\
204*0Sstevel@tonic-gate 				PerlIO_printf(Perl_debug_log,		\
205*0Sstevel@tonic-gate 				"  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
206*0Sstevel@tonic-gate 				(IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
207*0Sstevel@tonic-gate 
208*0Sstevel@tonic-gate STATIC char *
S_regcppop(pTHX)209*0Sstevel@tonic-gate S_regcppop(pTHX)
210*0Sstevel@tonic-gate {
211*0Sstevel@tonic-gate     I32 i;
212*0Sstevel@tonic-gate     U32 paren = 0;
213*0Sstevel@tonic-gate     char *input;
214*0Sstevel@tonic-gate     I32 tmps;
215*0Sstevel@tonic-gate 
216*0Sstevel@tonic-gate     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
217*0Sstevel@tonic-gate     i = SSPOPINT;
218*0Sstevel@tonic-gate     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
219*0Sstevel@tonic-gate     i = SSPOPINT; /* Parentheses elements to pop. */
220*0Sstevel@tonic-gate     input = (char *) SSPOPPTR;
221*0Sstevel@tonic-gate     *PL_reglastcloseparen = SSPOPINT;
222*0Sstevel@tonic-gate     *PL_reglastparen = SSPOPINT;
223*0Sstevel@tonic-gate     PL_regsize = SSPOPINT;
224*0Sstevel@tonic-gate 
225*0Sstevel@tonic-gate     /* Now restore the parentheses context. */
226*0Sstevel@tonic-gate     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
227*0Sstevel@tonic-gate 	 i > 0; i -= REGCP_PAREN_ELEMS) {
228*0Sstevel@tonic-gate 	paren = (U32)SSPOPINT;
229*0Sstevel@tonic-gate 	PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
230*0Sstevel@tonic-gate 	PL_regstartp[paren] = SSPOPINT;
231*0Sstevel@tonic-gate 	tmps = SSPOPINT;
232*0Sstevel@tonic-gate 	if (paren <= *PL_reglastparen)
233*0Sstevel@tonic-gate 	    PL_regendp[paren] = tmps;
234*0Sstevel@tonic-gate 	DEBUG_r(
235*0Sstevel@tonic-gate 	    PerlIO_printf(Perl_debug_log,
236*0Sstevel@tonic-gate 			  "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
237*0Sstevel@tonic-gate 			  (UV)paren, (IV)PL_regstartp[paren],
238*0Sstevel@tonic-gate 			  (IV)(PL_reg_start_tmp[paren] - PL_bostr),
239*0Sstevel@tonic-gate 			  (IV)PL_regendp[paren],
240*0Sstevel@tonic-gate 			  (paren > *PL_reglastparen ? "(no)" : ""));
241*0Sstevel@tonic-gate 	);
242*0Sstevel@tonic-gate     }
243*0Sstevel@tonic-gate     DEBUG_r(
244*0Sstevel@tonic-gate 	if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
245*0Sstevel@tonic-gate 	    PerlIO_printf(Perl_debug_log,
246*0Sstevel@tonic-gate 			  "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
247*0Sstevel@tonic-gate 			  (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
248*0Sstevel@tonic-gate 	}
249*0Sstevel@tonic-gate     );
250*0Sstevel@tonic-gate #if 1
251*0Sstevel@tonic-gate     /* It would seem that the similar code in regtry()
252*0Sstevel@tonic-gate      * already takes care of this, and in fact it is in
253*0Sstevel@tonic-gate      * a better location to since this code can #if 0-ed out
254*0Sstevel@tonic-gate      * but the code in regtry() is needed or otherwise tests
255*0Sstevel@tonic-gate      * requiring null fields (pat.t#187 and split.t#{13,14}
256*0Sstevel@tonic-gate      * (as of patchlevel 7877)  will fail.  Then again,
257*0Sstevel@tonic-gate      * this code seems to be necessary or otherwise
258*0Sstevel@tonic-gate      * building DynaLoader will fail:
259*0Sstevel@tonic-gate      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
260*0Sstevel@tonic-gate      * --jhi */
261*0Sstevel@tonic-gate     for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
262*0Sstevel@tonic-gate 	if ((I32)paren > PL_regsize)
263*0Sstevel@tonic-gate 	    PL_regstartp[paren] = -1;
264*0Sstevel@tonic-gate 	PL_regendp[paren] = -1;
265*0Sstevel@tonic-gate     }
266*0Sstevel@tonic-gate #endif
267*0Sstevel@tonic-gate     return input;
268*0Sstevel@tonic-gate }
269*0Sstevel@tonic-gate 
270*0Sstevel@tonic-gate STATIC char *
S_regcp_set_to(pTHX_ I32 ss)271*0Sstevel@tonic-gate S_regcp_set_to(pTHX_ I32 ss)
272*0Sstevel@tonic-gate {
273*0Sstevel@tonic-gate     I32 tmp = PL_savestack_ix;
274*0Sstevel@tonic-gate 
275*0Sstevel@tonic-gate     PL_savestack_ix = ss;
276*0Sstevel@tonic-gate     regcppop();
277*0Sstevel@tonic-gate     PL_savestack_ix = tmp;
278*0Sstevel@tonic-gate     return Nullch;
279*0Sstevel@tonic-gate }
280*0Sstevel@tonic-gate 
281*0Sstevel@tonic-gate typedef struct re_cc_state
282*0Sstevel@tonic-gate {
283*0Sstevel@tonic-gate     I32 ss;
284*0Sstevel@tonic-gate     regnode *node;
285*0Sstevel@tonic-gate     struct re_cc_state *prev;
286*0Sstevel@tonic-gate     CURCUR *cc;
287*0Sstevel@tonic-gate     regexp *re;
288*0Sstevel@tonic-gate } re_cc_state;
289*0Sstevel@tonic-gate 
290*0Sstevel@tonic-gate #define regcpblow(cp) LEAVE_SCOPE(cp)	/* Ignores regcppush()ed data. */
291*0Sstevel@tonic-gate 
292*0Sstevel@tonic-gate #define TRYPAREN(paren, n, input) {				\
293*0Sstevel@tonic-gate     if (paren) {						\
294*0Sstevel@tonic-gate 	if (n) {						\
295*0Sstevel@tonic-gate 	    PL_regstartp[paren] = HOPc(input, -1) - PL_bostr;	\
296*0Sstevel@tonic-gate 	    PL_regendp[paren] = input - PL_bostr;		\
297*0Sstevel@tonic-gate 	}							\
298*0Sstevel@tonic-gate 	else							\
299*0Sstevel@tonic-gate 	    PL_regendp[paren] = -1;				\
300*0Sstevel@tonic-gate     }								\
301*0Sstevel@tonic-gate     if (regmatch(next))						\
302*0Sstevel@tonic-gate 	sayYES;							\
303*0Sstevel@tonic-gate     if (paren && n)						\
304*0Sstevel@tonic-gate 	PL_regendp[paren] = -1;					\
305*0Sstevel@tonic-gate }
306*0Sstevel@tonic-gate 
307*0Sstevel@tonic-gate 
308*0Sstevel@tonic-gate /*
309*0Sstevel@tonic-gate  * pregexec and friends
310*0Sstevel@tonic-gate  */
311*0Sstevel@tonic-gate 
312*0Sstevel@tonic-gate /*
313*0Sstevel@tonic-gate  - pregexec - match a regexp against a string
314*0Sstevel@tonic-gate  */
315*0Sstevel@tonic-gate I32
Perl_pregexec(pTHX_ register regexp * prog,char * stringarg,register char * strend,char * strbeg,I32 minend,SV * screamer,U32 nosave)316*0Sstevel@tonic-gate Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
317*0Sstevel@tonic-gate 	 char *strbeg, I32 minend, SV *screamer, U32 nosave)
318*0Sstevel@tonic-gate /* strend: pointer to null at end of string */
319*0Sstevel@tonic-gate /* strbeg: real beginning of string */
320*0Sstevel@tonic-gate /* minend: end of match must be >=minend after stringarg. */
321*0Sstevel@tonic-gate /* nosave: For optimizations. */
322*0Sstevel@tonic-gate {
323*0Sstevel@tonic-gate     return
324*0Sstevel@tonic-gate 	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
325*0Sstevel@tonic-gate 		      nosave ? 0 : REXEC_COPY_STR);
326*0Sstevel@tonic-gate }
327*0Sstevel@tonic-gate 
328*0Sstevel@tonic-gate STATIC void
S_cache_re(pTHX_ regexp * prog)329*0Sstevel@tonic-gate S_cache_re(pTHX_ regexp *prog)
330*0Sstevel@tonic-gate {
331*0Sstevel@tonic-gate     PL_regprecomp = prog->precomp;		/* Needed for FAIL. */
332*0Sstevel@tonic-gate #ifdef DEBUGGING
333*0Sstevel@tonic-gate     PL_regprogram = prog->program;
334*0Sstevel@tonic-gate #endif
335*0Sstevel@tonic-gate     PL_regnpar = prog->nparens;
336*0Sstevel@tonic-gate     PL_regdata = prog->data;
337*0Sstevel@tonic-gate     PL_reg_re = prog;
338*0Sstevel@tonic-gate }
339*0Sstevel@tonic-gate 
340*0Sstevel@tonic-gate /*
341*0Sstevel@tonic-gate  * Need to implement the following flags for reg_anch:
342*0Sstevel@tonic-gate  *
343*0Sstevel@tonic-gate  * USE_INTUIT_NOML		- Useful to call re_intuit_start() first
344*0Sstevel@tonic-gate  * USE_INTUIT_ML
345*0Sstevel@tonic-gate  * INTUIT_AUTORITATIVE_NOML	- Can trust a positive answer
346*0Sstevel@tonic-gate  * INTUIT_AUTORITATIVE_ML
347*0Sstevel@tonic-gate  * INTUIT_ONCE_NOML		- Intuit can match in one location only.
348*0Sstevel@tonic-gate  * INTUIT_ONCE_ML
349*0Sstevel@tonic-gate  *
350*0Sstevel@tonic-gate  * Another flag for this function: SECOND_TIME (so that float substrs
351*0Sstevel@tonic-gate  * with giant delta may be not rechecked).
352*0Sstevel@tonic-gate  */
353*0Sstevel@tonic-gate 
354*0Sstevel@tonic-gate /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
355*0Sstevel@tonic-gate 
356*0Sstevel@tonic-gate /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
357*0Sstevel@tonic-gate    Otherwise, only SvCUR(sv) is used to get strbeg. */
358*0Sstevel@tonic-gate 
359*0Sstevel@tonic-gate /* XXXX We assume that strpos is strbeg unless sv. */
360*0Sstevel@tonic-gate 
361*0Sstevel@tonic-gate /* XXXX Some places assume that there is a fixed substring.
362*0Sstevel@tonic-gate 	An update may be needed if optimizer marks as "INTUITable"
363*0Sstevel@tonic-gate 	RExen without fixed substrings.  Similarly, it is assumed that
364*0Sstevel@tonic-gate 	lengths of all the strings are no more than minlen, thus they
365*0Sstevel@tonic-gate 	cannot come from lookahead.
366*0Sstevel@tonic-gate 	(Or minlen should take into account lookahead.) */
367*0Sstevel@tonic-gate 
368*0Sstevel@tonic-gate /* A failure to find a constant substring means that there is no need to make
369*0Sstevel@tonic-gate    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
370*0Sstevel@tonic-gate    finding a substring too deep into the string means that less calls to
371*0Sstevel@tonic-gate    regtry() should be needed.
372*0Sstevel@tonic-gate 
373*0Sstevel@tonic-gate    REx compiler's optimizer found 4 possible hints:
374*0Sstevel@tonic-gate 	a) Anchored substring;
375*0Sstevel@tonic-gate 	b) Fixed substring;
376*0Sstevel@tonic-gate 	c) Whether we are anchored (beginning-of-line or \G);
377*0Sstevel@tonic-gate 	d) First node (of those at offset 0) which may distingush positions;
378*0Sstevel@tonic-gate    We use a)b)d) and multiline-part of c), and try to find a position in the
379*0Sstevel@tonic-gate    string which does not contradict any of them.
380*0Sstevel@tonic-gate  */
381*0Sstevel@tonic-gate 
382*0Sstevel@tonic-gate /* Most of decisions we do here should have been done at compile time.
383*0Sstevel@tonic-gate    The nodes of the REx which we used for the search should have been
384*0Sstevel@tonic-gate    deleted from the finite automaton. */
385*0Sstevel@tonic-gate 
386*0Sstevel@tonic-gate char *
Perl_re_intuit_start(pTHX_ regexp * prog,SV * sv,char * strpos,char * strend,U32 flags,re_scream_pos_data * data)387*0Sstevel@tonic-gate Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
388*0Sstevel@tonic-gate 		     char *strend, U32 flags, re_scream_pos_data *data)
389*0Sstevel@tonic-gate {
390*0Sstevel@tonic-gate     register I32 start_shift = 0;
391*0Sstevel@tonic-gate     /* Should be nonnegative! */
392*0Sstevel@tonic-gate     register I32 end_shift   = 0;
393*0Sstevel@tonic-gate     register char *s;
394*0Sstevel@tonic-gate     register SV *check;
395*0Sstevel@tonic-gate     char *strbeg;
396*0Sstevel@tonic-gate     char *t;
397*0Sstevel@tonic-gate     int do_utf8 = sv ? SvUTF8(sv) : 0;	/* if no sv we have to assume bytes */
398*0Sstevel@tonic-gate     I32 ml_anch;
399*0Sstevel@tonic-gate     register char *other_last = Nullch;	/* other substr checked before this */
400*0Sstevel@tonic-gate     char *check_at = Nullch;		/* check substr found at this pos */
401*0Sstevel@tonic-gate #ifdef DEBUGGING
402*0Sstevel@tonic-gate     char *i_strpos = strpos;
403*0Sstevel@tonic-gate     SV *dsv = PERL_DEBUG_PAD_ZERO(0);
404*0Sstevel@tonic-gate #endif
405*0Sstevel@tonic-gate     RX_MATCH_UTF8_set(prog,do_utf8);
406*0Sstevel@tonic-gate 
407*0Sstevel@tonic-gate     if (prog->reganch & ROPT_UTF8) {
408*0Sstevel@tonic-gate 	DEBUG_r(PerlIO_printf(Perl_debug_log,
409*0Sstevel@tonic-gate 			      "UTF-8 regex...\n"));
410*0Sstevel@tonic-gate 	PL_reg_flags |= RF_utf8;
411*0Sstevel@tonic-gate     }
412*0Sstevel@tonic-gate 
413*0Sstevel@tonic-gate     DEBUG_r({
414*0Sstevel@tonic-gate 	 char *s   = PL_reg_match_utf8 ?
415*0Sstevel@tonic-gate 	                 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
416*0Sstevel@tonic-gate 	                 strpos;
417*0Sstevel@tonic-gate 	 int   len = PL_reg_match_utf8 ?
418*0Sstevel@tonic-gate 	                 strlen(s) : strend - strpos;
419*0Sstevel@tonic-gate 	 if (!PL_colorset)
420*0Sstevel@tonic-gate 	      reginitcolors();
421*0Sstevel@tonic-gate 	 if (PL_reg_match_utf8)
422*0Sstevel@tonic-gate 	     DEBUG_r(PerlIO_printf(Perl_debug_log,
423*0Sstevel@tonic-gate 				   "UTF-8 target...\n"));
424*0Sstevel@tonic-gate 	 PerlIO_printf(Perl_debug_log,
425*0Sstevel@tonic-gate 		       "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
426*0Sstevel@tonic-gate 		       PL_colors[4],PL_colors[5],PL_colors[0],
427*0Sstevel@tonic-gate 		       prog->precomp,
428*0Sstevel@tonic-gate 		       PL_colors[1],
429*0Sstevel@tonic-gate 		       (strlen(prog->precomp) > 60 ? "..." : ""),
430*0Sstevel@tonic-gate 		       PL_colors[0],
431*0Sstevel@tonic-gate 		       (int)(len > 60 ? 60 : len),
432*0Sstevel@tonic-gate 		       s, PL_colors[1],
433*0Sstevel@tonic-gate 		       (len > 60 ? "..." : "")
434*0Sstevel@tonic-gate 	      );
435*0Sstevel@tonic-gate     });
436*0Sstevel@tonic-gate 
437*0Sstevel@tonic-gate     /* CHR_DIST() would be more correct here but it makes things slow. */
438*0Sstevel@tonic-gate     if (prog->minlen > strend - strpos) {
439*0Sstevel@tonic-gate 	DEBUG_r(PerlIO_printf(Perl_debug_log,
440*0Sstevel@tonic-gate 			      "String too short... [re_intuit_start]\n"));
441*0Sstevel@tonic-gate 	goto fail;
442*0Sstevel@tonic-gate     }
443*0Sstevel@tonic-gate     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
444*0Sstevel@tonic-gate     PL_regeol = strend;
445*0Sstevel@tonic-gate     if (do_utf8) {
446*0Sstevel@tonic-gate 	if (!prog->check_utf8 && prog->check_substr)
447*0Sstevel@tonic-gate 	    to_utf8_substr(prog);
448*0Sstevel@tonic-gate 	check = prog->check_utf8;
449*0Sstevel@tonic-gate     } else {
450*0Sstevel@tonic-gate 	if (!prog->check_substr && prog->check_utf8)
451*0Sstevel@tonic-gate 	    to_byte_substr(prog);
452*0Sstevel@tonic-gate 	check = prog->check_substr;
453*0Sstevel@tonic-gate     }
454*0Sstevel@tonic-gate    if (check == &PL_sv_undef) {
455*0Sstevel@tonic-gate 	DEBUG_r(PerlIO_printf(Perl_debug_log,
456*0Sstevel@tonic-gate 		"Non-utf string cannot match utf check string\n"));
457*0Sstevel@tonic-gate 	goto fail;
458*0Sstevel@tonic-gate     }
459*0Sstevel@tonic-gate     if (prog->reganch & ROPT_ANCH) {	/* Match at beg-of-str or after \n */
460*0Sstevel@tonic-gate 	ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
461*0Sstevel@tonic-gate 		     || ( (prog->reganch & ROPT_ANCH_BOL)
462*0Sstevel@tonic-gate 			  && !PL_multiline ) );	/* Check after \n? */
463*0Sstevel@tonic-gate 
464*0Sstevel@tonic-gate 	if (!ml_anch) {
465*0Sstevel@tonic-gate 	  if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
466*0Sstevel@tonic-gate 				  | ROPT_IMPLICIT)) /* not a real BOL */
467*0Sstevel@tonic-gate 	       /* SvCUR is not set on references: SvRV and SvPVX overlap */
468*0Sstevel@tonic-gate 	       && sv && !SvROK(sv)
469*0Sstevel@tonic-gate 	       && (strpos != strbeg)) {
470*0Sstevel@tonic-gate 	      DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
471*0Sstevel@tonic-gate 	      goto fail;
472*0Sstevel@tonic-gate 	  }
473*0Sstevel@tonic-gate 	  if (prog->check_offset_min == prog->check_offset_max &&
474*0Sstevel@tonic-gate 	      !(prog->reganch & ROPT_CANY_SEEN)) {
475*0Sstevel@tonic-gate 	    /* Substring at constant offset from beg-of-str... */
476*0Sstevel@tonic-gate 	    I32 slen;
477*0Sstevel@tonic-gate 
478*0Sstevel@tonic-gate 	    s = HOP3c(strpos, prog->check_offset_min, strend);
479*0Sstevel@tonic-gate 	    if (SvTAIL(check)) {
480*0Sstevel@tonic-gate 		slen = SvCUR(check);	/* >= 1 */
481*0Sstevel@tonic-gate 
482*0Sstevel@tonic-gate 		if ( strend - s > slen || strend - s < slen - 1
483*0Sstevel@tonic-gate 		     || (strend - s == slen && strend[-1] != '\n')) {
484*0Sstevel@tonic-gate 		    DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
485*0Sstevel@tonic-gate 		    goto fail_finish;
486*0Sstevel@tonic-gate 		}
487*0Sstevel@tonic-gate 		/* Now should match s[0..slen-2] */
488*0Sstevel@tonic-gate 		slen--;
489*0Sstevel@tonic-gate 		if (slen && (*SvPVX(check) != *s
490*0Sstevel@tonic-gate 			     || (slen > 1
491*0Sstevel@tonic-gate 				 && memNE(SvPVX(check), s, slen)))) {
492*0Sstevel@tonic-gate 		  report_neq:
493*0Sstevel@tonic-gate 		    DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
494*0Sstevel@tonic-gate 		    goto fail_finish;
495*0Sstevel@tonic-gate 		}
496*0Sstevel@tonic-gate 	    }
497*0Sstevel@tonic-gate 	    else if (*SvPVX(check) != *s
498*0Sstevel@tonic-gate 		     || ((slen = SvCUR(check)) > 1
499*0Sstevel@tonic-gate 			 && memNE(SvPVX(check), s, slen)))
500*0Sstevel@tonic-gate 		goto report_neq;
501*0Sstevel@tonic-gate 	    goto success_at_start;
502*0Sstevel@tonic-gate 	  }
503*0Sstevel@tonic-gate 	}
504*0Sstevel@tonic-gate 	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
505*0Sstevel@tonic-gate 	s = strpos;
506*0Sstevel@tonic-gate 	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
507*0Sstevel@tonic-gate 	end_shift = prog->minlen - start_shift -
508*0Sstevel@tonic-gate 	    CHR_SVLEN(check) + (SvTAIL(check) != 0);
509*0Sstevel@tonic-gate 	if (!ml_anch) {
510*0Sstevel@tonic-gate 	    I32 end = prog->check_offset_max + CHR_SVLEN(check)
511*0Sstevel@tonic-gate 					 - (SvTAIL(check) != 0);
512*0Sstevel@tonic-gate 	    I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
513*0Sstevel@tonic-gate 
514*0Sstevel@tonic-gate 	    if (end_shift < eshift)
515*0Sstevel@tonic-gate 		end_shift = eshift;
516*0Sstevel@tonic-gate 	}
517*0Sstevel@tonic-gate     }
518*0Sstevel@tonic-gate     else {				/* Can match at random position */
519*0Sstevel@tonic-gate 	ml_anch = 0;
520*0Sstevel@tonic-gate 	s = strpos;
521*0Sstevel@tonic-gate 	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
522*0Sstevel@tonic-gate 	/* Should be nonnegative! */
523*0Sstevel@tonic-gate 	end_shift = prog->minlen - start_shift -
524*0Sstevel@tonic-gate 	    CHR_SVLEN(check) + (SvTAIL(check) != 0);
525*0Sstevel@tonic-gate     }
526*0Sstevel@tonic-gate 
527*0Sstevel@tonic-gate #ifdef DEBUGGING	/* 7/99: reports of failure (with the older version) */
528*0Sstevel@tonic-gate     if (end_shift < 0)
529*0Sstevel@tonic-gate 	Perl_croak(aTHX_ "panic: end_shift");
530*0Sstevel@tonic-gate #endif
531*0Sstevel@tonic-gate 
532*0Sstevel@tonic-gate   restart:
533*0Sstevel@tonic-gate     /* Find a possible match in the region s..strend by looking for
534*0Sstevel@tonic-gate        the "check" substring in the region corrected by start/end_shift. */
535*0Sstevel@tonic-gate     if (flags & REXEC_SCREAM) {
536*0Sstevel@tonic-gate 	I32 p = -1;			/* Internal iterator of scream. */
537*0Sstevel@tonic-gate 	I32 *pp = data ? data->scream_pos : &p;
538*0Sstevel@tonic-gate 
539*0Sstevel@tonic-gate 	if (PL_screamfirst[BmRARE(check)] >= 0
540*0Sstevel@tonic-gate 	    || ( BmRARE(check) == '\n'
541*0Sstevel@tonic-gate 		 && (BmPREVIOUS(check) == SvCUR(check) - 1)
542*0Sstevel@tonic-gate 		 && SvTAIL(check) ))
543*0Sstevel@tonic-gate 	    s = screaminstr(sv, check,
544*0Sstevel@tonic-gate 			    start_shift + (s - strbeg), end_shift, pp, 0);
545*0Sstevel@tonic-gate 	else
546*0Sstevel@tonic-gate 	    goto fail_finish;
547*0Sstevel@tonic-gate 	/* we may be pointing at the wrong string */
548*0Sstevel@tonic-gate 	if (s && RX_MATCH_COPIED(prog))
549*0Sstevel@tonic-gate 	    s = strbeg + (s - SvPVX(sv));
550*0Sstevel@tonic-gate 	if (data)
551*0Sstevel@tonic-gate 	    *data->scream_olds = s;
552*0Sstevel@tonic-gate     }
553*0Sstevel@tonic-gate     else if (prog->reganch & ROPT_CANY_SEEN)
554*0Sstevel@tonic-gate 	s = fbm_instr((U8*)(s + start_shift),
555*0Sstevel@tonic-gate 		      (U8*)(strend - end_shift),
556*0Sstevel@tonic-gate 		      check, PL_multiline ? FBMrf_MULTILINE : 0);
557*0Sstevel@tonic-gate     else
558*0Sstevel@tonic-gate 	s = fbm_instr(HOP3(s, start_shift, strend),
559*0Sstevel@tonic-gate 		      HOP3(strend, -end_shift, strbeg),
560*0Sstevel@tonic-gate 		      check, PL_multiline ? FBMrf_MULTILINE : 0);
561*0Sstevel@tonic-gate 
562*0Sstevel@tonic-gate     /* Update the count-of-usability, remove useless subpatterns,
563*0Sstevel@tonic-gate 	unshift s.  */
564*0Sstevel@tonic-gate 
565*0Sstevel@tonic-gate     DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
566*0Sstevel@tonic-gate 			  (s ? "Found" : "Did not find"),
567*0Sstevel@tonic-gate 			  (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
568*0Sstevel@tonic-gate 			  PL_colors[0],
569*0Sstevel@tonic-gate 			  (int)(SvCUR(check) - (SvTAIL(check)!=0)),
570*0Sstevel@tonic-gate 			  SvPVX(check),
571*0Sstevel@tonic-gate 			  PL_colors[1], (SvTAIL(check) ? "$" : ""),
572*0Sstevel@tonic-gate 			  (s ? " at offset " : "...\n") ) );
573*0Sstevel@tonic-gate 
574*0Sstevel@tonic-gate     if (!s)
575*0Sstevel@tonic-gate 	goto fail_finish;
576*0Sstevel@tonic-gate 
577*0Sstevel@tonic-gate     check_at = s;
578*0Sstevel@tonic-gate 
579*0Sstevel@tonic-gate     /* Finish the diagnostic message */
580*0Sstevel@tonic-gate     DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
581*0Sstevel@tonic-gate 
582*0Sstevel@tonic-gate     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
583*0Sstevel@tonic-gate        Start with the other substr.
584*0Sstevel@tonic-gate        XXXX no SCREAM optimization yet - and a very coarse implementation
585*0Sstevel@tonic-gate        XXXX /ttx+/ results in anchored=`ttx', floating=`x'.  floating will
586*0Sstevel@tonic-gate 		*always* match.  Probably should be marked during compile...
587*0Sstevel@tonic-gate        Probably it is right to do no SCREAM here...
588*0Sstevel@tonic-gate      */
589*0Sstevel@tonic-gate 
590*0Sstevel@tonic-gate     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
591*0Sstevel@tonic-gate 	/* Take into account the "other" substring. */
592*0Sstevel@tonic-gate 	/* XXXX May be hopelessly wrong for UTF... */
593*0Sstevel@tonic-gate 	if (!other_last)
594*0Sstevel@tonic-gate 	    other_last = strpos;
595*0Sstevel@tonic-gate 	if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
596*0Sstevel@tonic-gate 	  do_other_anchored:
597*0Sstevel@tonic-gate 	    {
598*0Sstevel@tonic-gate 		char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
599*0Sstevel@tonic-gate 		char *s1 = s;
600*0Sstevel@tonic-gate 		SV* must;
601*0Sstevel@tonic-gate 
602*0Sstevel@tonic-gate 		t = s - prog->check_offset_max;
603*0Sstevel@tonic-gate 		if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
604*0Sstevel@tonic-gate 		    && (!do_utf8
605*0Sstevel@tonic-gate 			|| ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
606*0Sstevel@tonic-gate 			    && t > strpos)))
607*0Sstevel@tonic-gate 		    /* EMPTY */;
608*0Sstevel@tonic-gate 		else
609*0Sstevel@tonic-gate 		    t = strpos;
610*0Sstevel@tonic-gate 		t = HOP3c(t, prog->anchored_offset, strend);
611*0Sstevel@tonic-gate 		if (t < other_last)	/* These positions already checked */
612*0Sstevel@tonic-gate 		    t = other_last;
613*0Sstevel@tonic-gate 		last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
614*0Sstevel@tonic-gate 		if (last < last1)
615*0Sstevel@tonic-gate 		    last1 = last;
616*0Sstevel@tonic-gate  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
617*0Sstevel@tonic-gate 		/* On end-of-str: see comment below. */
618*0Sstevel@tonic-gate 		must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
619*0Sstevel@tonic-gate 		if (must == &PL_sv_undef) {
620*0Sstevel@tonic-gate 		    s = (char*)NULL;
621*0Sstevel@tonic-gate 		    DEBUG_r(must = prog->anchored_utf8);	/* for debug */
622*0Sstevel@tonic-gate 		}
623*0Sstevel@tonic-gate 		else
624*0Sstevel@tonic-gate 		    s = fbm_instr(
625*0Sstevel@tonic-gate 			(unsigned char*)t,
626*0Sstevel@tonic-gate 			HOP3(HOP3(last1, prog->anchored_offset, strend)
627*0Sstevel@tonic-gate 				+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
628*0Sstevel@tonic-gate 			must,
629*0Sstevel@tonic-gate 			PL_multiline ? FBMrf_MULTILINE : 0
630*0Sstevel@tonic-gate 		    );
631*0Sstevel@tonic-gate 		DEBUG_r(PerlIO_printf(Perl_debug_log,
632*0Sstevel@tonic-gate 			"%s anchored substr `%s%.*s%s'%s",
633*0Sstevel@tonic-gate 			(s ? "Found" : "Contradicts"),
634*0Sstevel@tonic-gate 			PL_colors[0],
635*0Sstevel@tonic-gate 			  (int)(SvCUR(must)
636*0Sstevel@tonic-gate 			  - (SvTAIL(must)!=0)),
637*0Sstevel@tonic-gate 			  SvPVX(must),
638*0Sstevel@tonic-gate 			  PL_colors[1], (SvTAIL(must) ? "$" : "")));
639*0Sstevel@tonic-gate 		if (!s) {
640*0Sstevel@tonic-gate 		    if (last1 >= last2) {
641*0Sstevel@tonic-gate 			DEBUG_r(PerlIO_printf(Perl_debug_log,
642*0Sstevel@tonic-gate 						", giving up...\n"));
643*0Sstevel@tonic-gate 			goto fail_finish;
644*0Sstevel@tonic-gate 		    }
645*0Sstevel@tonic-gate 		    DEBUG_r(PerlIO_printf(Perl_debug_log,
646*0Sstevel@tonic-gate 			", trying floating at offset %ld...\n",
647*0Sstevel@tonic-gate 			(long)(HOP3c(s1, 1, strend) - i_strpos)));
648*0Sstevel@tonic-gate 		    other_last = HOP3c(last1, prog->anchored_offset+1, strend);
649*0Sstevel@tonic-gate 		    s = HOP3c(last, 1, strend);
650*0Sstevel@tonic-gate 		    goto restart;
651*0Sstevel@tonic-gate 		}
652*0Sstevel@tonic-gate 		else {
653*0Sstevel@tonic-gate 		    DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
654*0Sstevel@tonic-gate 			  (long)(s - i_strpos)));
655*0Sstevel@tonic-gate 		    t = HOP3c(s, -prog->anchored_offset, strbeg);
656*0Sstevel@tonic-gate 		    other_last = HOP3c(s, 1, strend);
657*0Sstevel@tonic-gate 		    s = s1;
658*0Sstevel@tonic-gate 		    if (t == strpos)
659*0Sstevel@tonic-gate 			goto try_at_start;
660*0Sstevel@tonic-gate 		    goto try_at_offset;
661*0Sstevel@tonic-gate 		}
662*0Sstevel@tonic-gate 	    }
663*0Sstevel@tonic-gate 	}
664*0Sstevel@tonic-gate 	else {		/* Take into account the floating substring. */
665*0Sstevel@tonic-gate 	    char *last, *last1;
666*0Sstevel@tonic-gate 	    char *s1 = s;
667*0Sstevel@tonic-gate 	    SV* must;
668*0Sstevel@tonic-gate 
669*0Sstevel@tonic-gate 	    t = HOP3c(s, -start_shift, strbeg);
670*0Sstevel@tonic-gate 	    last1 = last =
671*0Sstevel@tonic-gate 		HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
672*0Sstevel@tonic-gate 	    if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
673*0Sstevel@tonic-gate 		last = HOP3c(t, prog->float_max_offset, strend);
674*0Sstevel@tonic-gate 	    s = HOP3c(t, prog->float_min_offset, strend);
675*0Sstevel@tonic-gate 	    if (s < other_last)
676*0Sstevel@tonic-gate 		s = other_last;
677*0Sstevel@tonic-gate  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
678*0Sstevel@tonic-gate 	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
679*0Sstevel@tonic-gate 	    /* fbm_instr() takes into account exact value of end-of-str
680*0Sstevel@tonic-gate 	       if the check is SvTAIL(ed).  Since false positives are OK,
681*0Sstevel@tonic-gate 	       and end-of-str is not later than strend we are OK. */
682*0Sstevel@tonic-gate 	    if (must == &PL_sv_undef) {
683*0Sstevel@tonic-gate 		s = (char*)NULL;
684*0Sstevel@tonic-gate 		DEBUG_r(must = prog->float_utf8);	/* for debug message */
685*0Sstevel@tonic-gate 	    }
686*0Sstevel@tonic-gate 	    else
687*0Sstevel@tonic-gate 		s = fbm_instr((unsigned char*)s,
688*0Sstevel@tonic-gate 			      (unsigned char*)last + SvCUR(must)
689*0Sstevel@tonic-gate 				  - (SvTAIL(must)!=0),
690*0Sstevel@tonic-gate 			      must, PL_multiline ? FBMrf_MULTILINE : 0);
691*0Sstevel@tonic-gate 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
692*0Sstevel@tonic-gate 		    (s ? "Found" : "Contradicts"),
693*0Sstevel@tonic-gate 		    PL_colors[0],
694*0Sstevel@tonic-gate 		      (int)(SvCUR(must) - (SvTAIL(must)!=0)),
695*0Sstevel@tonic-gate 		      SvPVX(must),
696*0Sstevel@tonic-gate 		      PL_colors[1], (SvTAIL(must) ? "$" : "")));
697*0Sstevel@tonic-gate 	    if (!s) {
698*0Sstevel@tonic-gate 		if (last1 == last) {
699*0Sstevel@tonic-gate 		    DEBUG_r(PerlIO_printf(Perl_debug_log,
700*0Sstevel@tonic-gate 					    ", giving up...\n"));
701*0Sstevel@tonic-gate 		    goto fail_finish;
702*0Sstevel@tonic-gate 		}
703*0Sstevel@tonic-gate 		DEBUG_r(PerlIO_printf(Perl_debug_log,
704*0Sstevel@tonic-gate 		    ", trying anchored starting at offset %ld...\n",
705*0Sstevel@tonic-gate 		    (long)(s1 + 1 - i_strpos)));
706*0Sstevel@tonic-gate 		other_last = last;
707*0Sstevel@tonic-gate 		s = HOP3c(t, 1, strend);
708*0Sstevel@tonic-gate 		goto restart;
709*0Sstevel@tonic-gate 	    }
710*0Sstevel@tonic-gate 	    else {
711*0Sstevel@tonic-gate 		DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
712*0Sstevel@tonic-gate 		      (long)(s - i_strpos)));
713*0Sstevel@tonic-gate 		other_last = s; /* Fix this later. --Hugo */
714*0Sstevel@tonic-gate 		s = s1;
715*0Sstevel@tonic-gate 		if (t == strpos)
716*0Sstevel@tonic-gate 		    goto try_at_start;
717*0Sstevel@tonic-gate 		goto try_at_offset;
718*0Sstevel@tonic-gate 	    }
719*0Sstevel@tonic-gate 	}
720*0Sstevel@tonic-gate     }
721*0Sstevel@tonic-gate 
722*0Sstevel@tonic-gate     t = s - prog->check_offset_max;
723*0Sstevel@tonic-gate     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
724*0Sstevel@tonic-gate         && (!do_utf8
725*0Sstevel@tonic-gate 	    || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
726*0Sstevel@tonic-gate 		 && t > strpos))) {
727*0Sstevel@tonic-gate 	/* Fixed substring is found far enough so that the match
728*0Sstevel@tonic-gate 	   cannot start at strpos. */
729*0Sstevel@tonic-gate       try_at_offset:
730*0Sstevel@tonic-gate 	if (ml_anch && t[-1] != '\n') {
731*0Sstevel@tonic-gate 	    /* Eventually fbm_*() should handle this, but often
732*0Sstevel@tonic-gate 	       anchored_offset is not 0, so this check will not be wasted. */
733*0Sstevel@tonic-gate 	    /* XXXX In the code below we prefer to look for "^" even in
734*0Sstevel@tonic-gate 	       presence of anchored substrings.  And we search even
735*0Sstevel@tonic-gate 	       beyond the found float position.  These pessimizations
736*0Sstevel@tonic-gate 	       are historical artefacts only.  */
737*0Sstevel@tonic-gate 	  find_anchor:
738*0Sstevel@tonic-gate 	    while (t < strend - prog->minlen) {
739*0Sstevel@tonic-gate 		if (*t == '\n') {
740*0Sstevel@tonic-gate 		    if (t < check_at - prog->check_offset_min) {
741*0Sstevel@tonic-gate 			if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
742*0Sstevel@tonic-gate 			    /* Since we moved from the found position,
743*0Sstevel@tonic-gate 			       we definitely contradict the found anchored
744*0Sstevel@tonic-gate 			       substr.  Due to the above check we do not
745*0Sstevel@tonic-gate 			       contradict "check" substr.
746*0Sstevel@tonic-gate 			       Thus we can arrive here only if check substr
747*0Sstevel@tonic-gate 			       is float.  Redo checking for "other"=="fixed".
748*0Sstevel@tonic-gate 			     */
749*0Sstevel@tonic-gate 			    strpos = t + 1;
750*0Sstevel@tonic-gate 			    DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
751*0Sstevel@tonic-gate 				PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
752*0Sstevel@tonic-gate 			    goto do_other_anchored;
753*0Sstevel@tonic-gate 			}
754*0Sstevel@tonic-gate 			/* We don't contradict the found floating substring. */
755*0Sstevel@tonic-gate 			/* XXXX Why not check for STCLASS? */
756*0Sstevel@tonic-gate 			s = t + 1;
757*0Sstevel@tonic-gate 			DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
758*0Sstevel@tonic-gate 			    PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
759*0Sstevel@tonic-gate 			goto set_useful;
760*0Sstevel@tonic-gate 		    }
761*0Sstevel@tonic-gate 		    /* Position contradicts check-string */
762*0Sstevel@tonic-gate 		    /* XXXX probably better to look for check-string
763*0Sstevel@tonic-gate 		       than for "\n", so one should lower the limit for t? */
764*0Sstevel@tonic-gate 		    DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
765*0Sstevel@tonic-gate 			PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
766*0Sstevel@tonic-gate 		    other_last = strpos = s = t + 1;
767*0Sstevel@tonic-gate 		    goto restart;
768*0Sstevel@tonic-gate 		}
769*0Sstevel@tonic-gate 		t++;
770*0Sstevel@tonic-gate 	    }
771*0Sstevel@tonic-gate 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
772*0Sstevel@tonic-gate 			PL_colors[0],PL_colors[1]));
773*0Sstevel@tonic-gate 	    goto fail_finish;
774*0Sstevel@tonic-gate 	}
775*0Sstevel@tonic-gate 	else {
776*0Sstevel@tonic-gate 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
777*0Sstevel@tonic-gate 			PL_colors[0],PL_colors[1]));
778*0Sstevel@tonic-gate 	}
779*0Sstevel@tonic-gate 	s = t;
780*0Sstevel@tonic-gate       set_useful:
781*0Sstevel@tonic-gate 	++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);	/* hooray/5 */
782*0Sstevel@tonic-gate     }
783*0Sstevel@tonic-gate     else {
784*0Sstevel@tonic-gate 	/* The found string does not prohibit matching at strpos,
785*0Sstevel@tonic-gate 	   - no optimization of calling REx engine can be performed,
786*0Sstevel@tonic-gate 	   unless it was an MBOL and we are not after MBOL,
787*0Sstevel@tonic-gate 	   or a future STCLASS check will fail this. */
788*0Sstevel@tonic-gate       try_at_start:
789*0Sstevel@tonic-gate 	/* Even in this situation we may use MBOL flag if strpos is offset
790*0Sstevel@tonic-gate 	   wrt the start of the string. */
791*0Sstevel@tonic-gate 	if (ml_anch && sv && !SvROK(sv)	/* See prev comment on SvROK */
792*0Sstevel@tonic-gate 	    && (strpos != strbeg) && strpos[-1] != '\n'
793*0Sstevel@tonic-gate 	    /* May be due to an implicit anchor of m{.*foo}  */
794*0Sstevel@tonic-gate 	    && !(prog->reganch & ROPT_IMPLICIT))
795*0Sstevel@tonic-gate 	{
796*0Sstevel@tonic-gate 	    t = strpos;
797*0Sstevel@tonic-gate 	    goto find_anchor;
798*0Sstevel@tonic-gate 	}
799*0Sstevel@tonic-gate 	DEBUG_r( if (ml_anch)
800*0Sstevel@tonic-gate 	    PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
801*0Sstevel@tonic-gate 			(long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
802*0Sstevel@tonic-gate 	);
803*0Sstevel@tonic-gate       success_at_start:
804*0Sstevel@tonic-gate 	if (!(prog->reganch & ROPT_NAUGHTY)	/* XXXX If strpos moved? */
805*0Sstevel@tonic-gate 	    && (do_utf8 ? (
806*0Sstevel@tonic-gate 		prog->check_utf8		/* Could be deleted already */
807*0Sstevel@tonic-gate 		&& --BmUSEFUL(prog->check_utf8) < 0
808*0Sstevel@tonic-gate 		&& (prog->check_utf8 == prog->float_utf8)
809*0Sstevel@tonic-gate 	    ) : (
810*0Sstevel@tonic-gate 		prog->check_substr		/* Could be deleted already */
811*0Sstevel@tonic-gate 		&& --BmUSEFUL(prog->check_substr) < 0
812*0Sstevel@tonic-gate 		&& (prog->check_substr == prog->float_substr)
813*0Sstevel@tonic-gate 	    )))
814*0Sstevel@tonic-gate 	{
815*0Sstevel@tonic-gate 	    /* If flags & SOMETHING - do not do it many times on the same match */
816*0Sstevel@tonic-gate 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
817*0Sstevel@tonic-gate 	    SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
818*0Sstevel@tonic-gate 	    if (do_utf8 ? prog->check_substr : prog->check_utf8)
819*0Sstevel@tonic-gate 		SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
820*0Sstevel@tonic-gate 	    prog->check_substr = prog->check_utf8 = Nullsv;	/* disable */
821*0Sstevel@tonic-gate 	    prog->float_substr = prog->float_utf8 = Nullsv;	/* clear */
822*0Sstevel@tonic-gate 	    check = Nullsv;			/* abort */
823*0Sstevel@tonic-gate 	    s = strpos;
824*0Sstevel@tonic-gate 	    /* XXXX This is a remnant of the old implementation.  It
825*0Sstevel@tonic-gate 	            looks wasteful, since now INTUIT can use many
826*0Sstevel@tonic-gate 	            other heuristics. */
827*0Sstevel@tonic-gate 	    prog->reganch &= ~RE_USE_INTUIT;
828*0Sstevel@tonic-gate 	}
829*0Sstevel@tonic-gate 	else
830*0Sstevel@tonic-gate 	    s = strpos;
831*0Sstevel@tonic-gate     }
832*0Sstevel@tonic-gate 
833*0Sstevel@tonic-gate     /* Last resort... */
834*0Sstevel@tonic-gate     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
835*0Sstevel@tonic-gate     if (prog->regstclass) {
836*0Sstevel@tonic-gate 	/* minlen == 0 is possible if regstclass is \b or \B,
837*0Sstevel@tonic-gate 	   and the fixed substr is ''$.
838*0Sstevel@tonic-gate 	   Since minlen is already taken into account, s+1 is before strend;
839*0Sstevel@tonic-gate 	   accidentally, minlen >= 1 guaranties no false positives at s + 1
840*0Sstevel@tonic-gate 	   even for \b or \B.  But (minlen? 1 : 0) below assumes that
841*0Sstevel@tonic-gate 	   regstclass does not come from lookahead...  */
842*0Sstevel@tonic-gate 	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
843*0Sstevel@tonic-gate 	   This leaves EXACTF only, which is dealt with in find_byclass().  */
844*0Sstevel@tonic-gate 	U8* str = (U8*)STRING(prog->regstclass);
845*0Sstevel@tonic-gate 	int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
846*0Sstevel@tonic-gate 		    ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
847*0Sstevel@tonic-gate 		    : 1);
848*0Sstevel@tonic-gate 	char *endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
849*0Sstevel@tonic-gate 		? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
850*0Sstevel@tonic-gate 		: (prog->float_substr || prog->float_utf8
851*0Sstevel@tonic-gate 		   ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
852*0Sstevel@tonic-gate 			   cl_l, strend)
853*0Sstevel@tonic-gate 		   : strend);
854*0Sstevel@tonic-gate 	char *startpos = strbeg;
855*0Sstevel@tonic-gate 
856*0Sstevel@tonic-gate 	t = s;
857*0Sstevel@tonic-gate 	cache_re(prog);
858*0Sstevel@tonic-gate 	s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
859*0Sstevel@tonic-gate 	if (!s) {
860*0Sstevel@tonic-gate #ifdef DEBUGGING
861*0Sstevel@tonic-gate 	    char *what = 0;
862*0Sstevel@tonic-gate #endif
863*0Sstevel@tonic-gate 	    if (endpos == strend) {
864*0Sstevel@tonic-gate 		DEBUG_r( PerlIO_printf(Perl_debug_log,
865*0Sstevel@tonic-gate 				"Could not match STCLASS...\n") );
866*0Sstevel@tonic-gate 		goto fail;
867*0Sstevel@tonic-gate 	    }
868*0Sstevel@tonic-gate 	    DEBUG_r( PerlIO_printf(Perl_debug_log,
869*0Sstevel@tonic-gate 				   "This position contradicts STCLASS...\n") );
870*0Sstevel@tonic-gate 	    if ((prog->reganch & ROPT_ANCH) && !ml_anch)
871*0Sstevel@tonic-gate 		goto fail;
872*0Sstevel@tonic-gate 	    /* Contradict one of substrings */
873*0Sstevel@tonic-gate 	    if (prog->anchored_substr || prog->anchored_utf8) {
874*0Sstevel@tonic-gate 		if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
875*0Sstevel@tonic-gate 		    DEBUG_r( what = "anchored" );
876*0Sstevel@tonic-gate 		  hop_and_restart:
877*0Sstevel@tonic-gate 		    s = HOP3c(t, 1, strend);
878*0Sstevel@tonic-gate 		    if (s + start_shift + end_shift > strend) {
879*0Sstevel@tonic-gate 			/* XXXX Should be taken into account earlier? */
880*0Sstevel@tonic-gate 			DEBUG_r( PerlIO_printf(Perl_debug_log,
881*0Sstevel@tonic-gate 					       "Could not match STCLASS...\n") );
882*0Sstevel@tonic-gate 			goto fail;
883*0Sstevel@tonic-gate 		    }
884*0Sstevel@tonic-gate 		    if (!check)
885*0Sstevel@tonic-gate 			goto giveup;
886*0Sstevel@tonic-gate 		    DEBUG_r( PerlIO_printf(Perl_debug_log,
887*0Sstevel@tonic-gate 				"Looking for %s substr starting at offset %ld...\n",
888*0Sstevel@tonic-gate 				 what, (long)(s + start_shift - i_strpos)) );
889*0Sstevel@tonic-gate 		    goto restart;
890*0Sstevel@tonic-gate 		}
891*0Sstevel@tonic-gate 		/* Have both, check_string is floating */
892*0Sstevel@tonic-gate 		if (t + start_shift >= check_at) /* Contradicts floating=check */
893*0Sstevel@tonic-gate 		    goto retry_floating_check;
894*0Sstevel@tonic-gate 		/* Recheck anchored substring, but not floating... */
895*0Sstevel@tonic-gate 		s = check_at;
896*0Sstevel@tonic-gate 		if (!check)
897*0Sstevel@tonic-gate 		    goto giveup;
898*0Sstevel@tonic-gate 		DEBUG_r( PerlIO_printf(Perl_debug_log,
899*0Sstevel@tonic-gate 			  "Looking for anchored substr starting at offset %ld...\n",
900*0Sstevel@tonic-gate 			  (long)(other_last - i_strpos)) );
901*0Sstevel@tonic-gate 		goto do_other_anchored;
902*0Sstevel@tonic-gate 	    }
903*0Sstevel@tonic-gate 	    /* Another way we could have checked stclass at the
904*0Sstevel@tonic-gate                current position only: */
905*0Sstevel@tonic-gate 	    if (ml_anch) {
906*0Sstevel@tonic-gate 		s = t = t + 1;
907*0Sstevel@tonic-gate 		if (!check)
908*0Sstevel@tonic-gate 		    goto giveup;
909*0Sstevel@tonic-gate 		DEBUG_r( PerlIO_printf(Perl_debug_log,
910*0Sstevel@tonic-gate 			  "Looking for /%s^%s/m starting at offset %ld...\n",
911*0Sstevel@tonic-gate 			  PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
912*0Sstevel@tonic-gate 		goto try_at_offset;
913*0Sstevel@tonic-gate 	    }
914*0Sstevel@tonic-gate 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))	/* Could have been deleted */
915*0Sstevel@tonic-gate 		goto fail;
916*0Sstevel@tonic-gate 	    /* Check is floating subtring. */
917*0Sstevel@tonic-gate 	  retry_floating_check:
918*0Sstevel@tonic-gate 	    t = check_at - start_shift;
919*0Sstevel@tonic-gate 	    DEBUG_r( what = "floating" );
920*0Sstevel@tonic-gate 	    goto hop_and_restart;
921*0Sstevel@tonic-gate 	}
922*0Sstevel@tonic-gate 	if (t != s) {
923*0Sstevel@tonic-gate             DEBUG_r(PerlIO_printf(Perl_debug_log,
924*0Sstevel@tonic-gate 			"By STCLASS: moving %ld --> %ld\n",
925*0Sstevel@tonic-gate                                   (long)(t - i_strpos), (long)(s - i_strpos))
926*0Sstevel@tonic-gate                    );
927*0Sstevel@tonic-gate         }
928*0Sstevel@tonic-gate         else {
929*0Sstevel@tonic-gate             DEBUG_r(PerlIO_printf(Perl_debug_log,
930*0Sstevel@tonic-gate                                   "Does not contradict STCLASS...\n");
931*0Sstevel@tonic-gate                    );
932*0Sstevel@tonic-gate         }
933*0Sstevel@tonic-gate     }
934*0Sstevel@tonic-gate   giveup:
935*0Sstevel@tonic-gate     DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
936*0Sstevel@tonic-gate 			  PL_colors[4], (check ? "Guessed" : "Giving up"),
937*0Sstevel@tonic-gate 			  PL_colors[5], (long)(s - i_strpos)) );
938*0Sstevel@tonic-gate     return s;
939*0Sstevel@tonic-gate 
940*0Sstevel@tonic-gate   fail_finish:				/* Substring not found */
941*0Sstevel@tonic-gate     if (prog->check_substr || prog->check_utf8)		/* could be removed already */
942*0Sstevel@tonic-gate 	BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
943*0Sstevel@tonic-gate   fail:
944*0Sstevel@tonic-gate     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
945*0Sstevel@tonic-gate 			  PL_colors[4],PL_colors[5]));
946*0Sstevel@tonic-gate     return Nullch;
947*0Sstevel@tonic-gate }
948*0Sstevel@tonic-gate 
949*0Sstevel@tonic-gate /* We know what class REx starts with.  Try to find this position... */
950*0Sstevel@tonic-gate STATIC char *
S_find_byclass(pTHX_ regexp * prog,regnode * c,char * s,char * strend,char * startpos,I32 norun)951*0Sstevel@tonic-gate S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
952*0Sstevel@tonic-gate {
953*0Sstevel@tonic-gate 	I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
954*0Sstevel@tonic-gate 	char *m;
955*0Sstevel@tonic-gate 	STRLEN ln;
956*0Sstevel@tonic-gate 	STRLEN lnc;
957*0Sstevel@tonic-gate 	register STRLEN uskip;
958*0Sstevel@tonic-gate 	unsigned int c1;
959*0Sstevel@tonic-gate 	unsigned int c2;
960*0Sstevel@tonic-gate 	char *e;
961*0Sstevel@tonic-gate 	register I32 tmp = 1;	/* Scratch variable? */
962*0Sstevel@tonic-gate 	register bool do_utf8 = PL_reg_match_utf8;
963*0Sstevel@tonic-gate 
964*0Sstevel@tonic-gate 	/* We know what class it must start with. */
965*0Sstevel@tonic-gate 	switch (OP(c)) {
966*0Sstevel@tonic-gate 	case ANYOF:
967*0Sstevel@tonic-gate 	    if (do_utf8) {
968*0Sstevel@tonic-gate 		 while (s + (uskip = UTF8SKIP(s)) <= strend) {
969*0Sstevel@tonic-gate 		      if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
970*0Sstevel@tonic-gate 			  !UTF8_IS_INVARIANT((U8)s[0]) ?
971*0Sstevel@tonic-gate 			  reginclass(c, (U8*)s, 0, do_utf8) :
972*0Sstevel@tonic-gate 			  REGINCLASS(c, (U8*)s)) {
973*0Sstevel@tonic-gate 			   if (tmp && (norun || regtry(prog, s)))
974*0Sstevel@tonic-gate 				goto got_it;
975*0Sstevel@tonic-gate 			   else
976*0Sstevel@tonic-gate 				tmp = doevery;
977*0Sstevel@tonic-gate 		      }
978*0Sstevel@tonic-gate 		      else
979*0Sstevel@tonic-gate 			   tmp = 1;
980*0Sstevel@tonic-gate 		      s += uskip;
981*0Sstevel@tonic-gate 		 }
982*0Sstevel@tonic-gate 	    }
983*0Sstevel@tonic-gate 	    else {
984*0Sstevel@tonic-gate 		 while (s < strend) {
985*0Sstevel@tonic-gate 		      STRLEN skip = 1;
986*0Sstevel@tonic-gate 
987*0Sstevel@tonic-gate 		      if (REGINCLASS(c, (U8*)s) ||
988*0Sstevel@tonic-gate 			  (ANYOF_FOLD_SHARP_S(c, s, strend) &&
989*0Sstevel@tonic-gate 			   /* The assignment of 2 is intentional:
990*0Sstevel@tonic-gate 			    * for the folded sharp s, the skip is 2. */
991*0Sstevel@tonic-gate 			   (skip = SHARP_S_SKIP))) {
992*0Sstevel@tonic-gate 			   if (tmp && (norun || regtry(prog, s)))
993*0Sstevel@tonic-gate 				goto got_it;
994*0Sstevel@tonic-gate 			   else
995*0Sstevel@tonic-gate 				tmp = doevery;
996*0Sstevel@tonic-gate 		      }
997*0Sstevel@tonic-gate 		      else
998*0Sstevel@tonic-gate 			   tmp = 1;
999*0Sstevel@tonic-gate 		      s += skip;
1000*0Sstevel@tonic-gate 		 }
1001*0Sstevel@tonic-gate 	    }
1002*0Sstevel@tonic-gate 	    break;
1003*0Sstevel@tonic-gate 	case CANY:
1004*0Sstevel@tonic-gate 	    while (s < strend) {
1005*0Sstevel@tonic-gate 	        if (tmp && (norun || regtry(prog, s)))
1006*0Sstevel@tonic-gate 		    goto got_it;
1007*0Sstevel@tonic-gate 		else
1008*0Sstevel@tonic-gate 		    tmp = doevery;
1009*0Sstevel@tonic-gate 		s++;
1010*0Sstevel@tonic-gate 	    }
1011*0Sstevel@tonic-gate 	    break;
1012*0Sstevel@tonic-gate 	case EXACTF:
1013*0Sstevel@tonic-gate 	    m   = STRING(c);
1014*0Sstevel@tonic-gate 	    ln  = STR_LEN(c);	/* length to match in octets/bytes */
1015*0Sstevel@tonic-gate 	    lnc = (I32) ln;	/* length to match in characters */
1016*0Sstevel@tonic-gate 	    if (UTF) {
1017*0Sstevel@tonic-gate 	        STRLEN ulen1, ulen2;
1018*0Sstevel@tonic-gate 		U8 *sm = (U8 *) m;
1019*0Sstevel@tonic-gate 		U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
1020*0Sstevel@tonic-gate 		U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
1021*0Sstevel@tonic-gate 
1022*0Sstevel@tonic-gate 		to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1023*0Sstevel@tonic-gate 		to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1024*0Sstevel@tonic-gate 
1025*0Sstevel@tonic-gate 		c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN_UCLC,
1026*0Sstevel@tonic-gate 				    0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1027*0Sstevel@tonic-gate 		c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN_UCLC,
1028*0Sstevel@tonic-gate 				    0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1029*0Sstevel@tonic-gate 		lnc = 0;
1030*0Sstevel@tonic-gate 		while (sm < ((U8 *) m + ln)) {
1031*0Sstevel@tonic-gate 		    lnc++;
1032*0Sstevel@tonic-gate 		    sm += UTF8SKIP(sm);
1033*0Sstevel@tonic-gate 		}
1034*0Sstevel@tonic-gate 	    }
1035*0Sstevel@tonic-gate 	    else {
1036*0Sstevel@tonic-gate 		c1 = *(U8*)m;
1037*0Sstevel@tonic-gate 		c2 = PL_fold[c1];
1038*0Sstevel@tonic-gate 	    }
1039*0Sstevel@tonic-gate 	    goto do_exactf;
1040*0Sstevel@tonic-gate 	case EXACTFL:
1041*0Sstevel@tonic-gate 	    m   = STRING(c);
1042*0Sstevel@tonic-gate 	    ln  = STR_LEN(c);
1043*0Sstevel@tonic-gate 	    lnc = (I32) ln;
1044*0Sstevel@tonic-gate 	    c1 = *(U8*)m;
1045*0Sstevel@tonic-gate 	    c2 = PL_fold_locale[c1];
1046*0Sstevel@tonic-gate 	  do_exactf:
1047*0Sstevel@tonic-gate 	    e = HOP3c(strend, -((I32)lnc), s);
1048*0Sstevel@tonic-gate 
1049*0Sstevel@tonic-gate 	    if (norun && e < s)
1050*0Sstevel@tonic-gate 		e = s;			/* Due to minlen logic of intuit() */
1051*0Sstevel@tonic-gate 
1052*0Sstevel@tonic-gate 	    /* The idea in the EXACTF* cases is to first find the
1053*0Sstevel@tonic-gate 	     * first character of the EXACTF* node and then, if
1054*0Sstevel@tonic-gate 	     * necessary, case-insensitively compare the full
1055*0Sstevel@tonic-gate 	     * text of the node.  The c1 and c2 are the first
1056*0Sstevel@tonic-gate 	     * characters (though in Unicode it gets a bit
1057*0Sstevel@tonic-gate 	     * more complicated because there are more cases
1058*0Sstevel@tonic-gate 	     * than just upper and lower: one needs to use
1059*0Sstevel@tonic-gate 	     * the so-called folding case for case-insensitive
1060*0Sstevel@tonic-gate 	     * matching (called "loose matching" in Unicode).
1061*0Sstevel@tonic-gate 	     * ibcmp_utf8() will do just that. */
1062*0Sstevel@tonic-gate 
1063*0Sstevel@tonic-gate 	    if (do_utf8) {
1064*0Sstevel@tonic-gate 	        UV c, f;
1065*0Sstevel@tonic-gate 	        U8 tmpbuf [UTF8_MAXLEN+1];
1066*0Sstevel@tonic-gate 		U8 foldbuf[UTF8_MAXLEN_FOLD+1];
1067*0Sstevel@tonic-gate 		STRLEN len, foldlen;
1068*0Sstevel@tonic-gate 
1069*0Sstevel@tonic-gate 		if (c1 == c2) {
1070*0Sstevel@tonic-gate 		    /* Upper and lower of 1st char are equal -
1071*0Sstevel@tonic-gate 		     * probably not a "letter". */
1072*0Sstevel@tonic-gate 		    while (s <= e) {
1073*0Sstevel@tonic-gate 		        c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
1074*0Sstevel@tonic-gate 					   ckWARN(WARN_UTF8) ?
1075*0Sstevel@tonic-gate 					   0 : UTF8_ALLOW_ANY);
1076*0Sstevel@tonic-gate 			if ( c == c1
1077*0Sstevel@tonic-gate 			     && (ln == len ||
1078*0Sstevel@tonic-gate 				 ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1079*0Sstevel@tonic-gate 					    m, (char **)0, ln, (bool)UTF))
1080*0Sstevel@tonic-gate 			     && (norun || regtry(prog, s)) )
1081*0Sstevel@tonic-gate 			    goto got_it;
1082*0Sstevel@tonic-gate 			else {
1083*0Sstevel@tonic-gate 			     uvchr_to_utf8(tmpbuf, c);
1084*0Sstevel@tonic-gate 			     f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1085*0Sstevel@tonic-gate 			     if ( f != c
1086*0Sstevel@tonic-gate 				  && (f == c1 || f == c2)
1087*0Sstevel@tonic-gate 				  && (ln == foldlen ||
1088*0Sstevel@tonic-gate 				      !ibcmp_utf8((char *) foldbuf,
1089*0Sstevel@tonic-gate 						  (char **)0, foldlen, do_utf8,
1090*0Sstevel@tonic-gate 						  m,
1091*0Sstevel@tonic-gate 						  (char **)0, ln, (bool)UTF))
1092*0Sstevel@tonic-gate 				  && (norun || regtry(prog, s)) )
1093*0Sstevel@tonic-gate 				  goto got_it;
1094*0Sstevel@tonic-gate 			}
1095*0Sstevel@tonic-gate 			s += len;
1096*0Sstevel@tonic-gate 		    }
1097*0Sstevel@tonic-gate 		}
1098*0Sstevel@tonic-gate 		else {
1099*0Sstevel@tonic-gate 		    while (s <= e) {
1100*0Sstevel@tonic-gate 		      c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
1101*0Sstevel@tonic-gate 					   ckWARN(WARN_UTF8) ?
1102*0Sstevel@tonic-gate 					   0 : UTF8_ALLOW_ANY);
1103*0Sstevel@tonic-gate 
1104*0Sstevel@tonic-gate 			/* Handle some of the three Greek sigmas cases.
1105*0Sstevel@tonic-gate 			 * Note that not all the possible combinations
1106*0Sstevel@tonic-gate 			 * are handled here: some of them are handled
1107*0Sstevel@tonic-gate 			 * by the standard folding rules, and some of
1108*0Sstevel@tonic-gate 			 * them (the character class or ANYOF cases)
1109*0Sstevel@tonic-gate 			 * are handled during compiletime in
1110*0Sstevel@tonic-gate 			 * regexec.c:S_regclass(). */
1111*0Sstevel@tonic-gate 			if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1112*0Sstevel@tonic-gate 			    c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1113*0Sstevel@tonic-gate 			    c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1114*0Sstevel@tonic-gate 
1115*0Sstevel@tonic-gate 			if ( (c == c1 || c == c2)
1116*0Sstevel@tonic-gate 			     && (ln == len ||
1117*0Sstevel@tonic-gate 				 ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1118*0Sstevel@tonic-gate 					    m, (char **)0, ln, (bool)UTF))
1119*0Sstevel@tonic-gate 			     && (norun || regtry(prog, s)) )
1120*0Sstevel@tonic-gate 			    goto got_it;
1121*0Sstevel@tonic-gate 			else {
1122*0Sstevel@tonic-gate 			     uvchr_to_utf8(tmpbuf, c);
1123*0Sstevel@tonic-gate 			     f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1124*0Sstevel@tonic-gate 			     if ( f != c
1125*0Sstevel@tonic-gate 				  && (f == c1 || f == c2)
1126*0Sstevel@tonic-gate 				  && (ln == foldlen ||
1127*0Sstevel@tonic-gate 				      !ibcmp_utf8((char *) foldbuf,
1128*0Sstevel@tonic-gate 						  (char **)0, foldlen, do_utf8,
1129*0Sstevel@tonic-gate 						  m,
1130*0Sstevel@tonic-gate 						  (char **)0, ln, (bool)UTF))
1131*0Sstevel@tonic-gate 				  && (norun || regtry(prog, s)) )
1132*0Sstevel@tonic-gate 				  goto got_it;
1133*0Sstevel@tonic-gate 			}
1134*0Sstevel@tonic-gate 			s += len;
1135*0Sstevel@tonic-gate 		    }
1136*0Sstevel@tonic-gate 		}
1137*0Sstevel@tonic-gate 	    }
1138*0Sstevel@tonic-gate 	    else {
1139*0Sstevel@tonic-gate 		if (c1 == c2)
1140*0Sstevel@tonic-gate 		    while (s <= e) {
1141*0Sstevel@tonic-gate 			if ( *(U8*)s == c1
1142*0Sstevel@tonic-gate 			     && (ln == 1 || !(OP(c) == EXACTF
1143*0Sstevel@tonic-gate 					      ? ibcmp(s, m, ln)
1144*0Sstevel@tonic-gate 					      : ibcmp_locale(s, m, ln)))
1145*0Sstevel@tonic-gate 			     && (norun || regtry(prog, s)) )
1146*0Sstevel@tonic-gate 			    goto got_it;
1147*0Sstevel@tonic-gate 			s++;
1148*0Sstevel@tonic-gate 		    }
1149*0Sstevel@tonic-gate 		else
1150*0Sstevel@tonic-gate 		    while (s <= e) {
1151*0Sstevel@tonic-gate 			if ( (*(U8*)s == c1 || *(U8*)s == c2)
1152*0Sstevel@tonic-gate 			     && (ln == 1 || !(OP(c) == EXACTF
1153*0Sstevel@tonic-gate 					      ? ibcmp(s, m, ln)
1154*0Sstevel@tonic-gate 					      : ibcmp_locale(s, m, ln)))
1155*0Sstevel@tonic-gate 			     && (norun || regtry(prog, s)) )
1156*0Sstevel@tonic-gate 			    goto got_it;
1157*0Sstevel@tonic-gate 			s++;
1158*0Sstevel@tonic-gate 		    }
1159*0Sstevel@tonic-gate 	    }
1160*0Sstevel@tonic-gate 	    break;
1161*0Sstevel@tonic-gate 	case BOUNDL:
1162*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
1163*0Sstevel@tonic-gate 	    /* FALL THROUGH */
1164*0Sstevel@tonic-gate 	case BOUND:
1165*0Sstevel@tonic-gate 	    if (do_utf8) {
1166*0Sstevel@tonic-gate 		if (s == PL_bostr)
1167*0Sstevel@tonic-gate 		    tmp = '\n';
1168*0Sstevel@tonic-gate 		else {
1169*0Sstevel@tonic-gate 		    U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1170*0Sstevel@tonic-gate 
1171*0Sstevel@tonic-gate 		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1172*0Sstevel@tonic-gate 		}
1173*0Sstevel@tonic-gate 		tmp = ((OP(c) == BOUND ?
1174*0Sstevel@tonic-gate 			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1175*0Sstevel@tonic-gate 		LOAD_UTF8_CHARCLASS(alnum,"a");
1176*0Sstevel@tonic-gate 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1177*0Sstevel@tonic-gate 		    if (tmp == !(OP(c) == BOUND ?
1178*0Sstevel@tonic-gate 				 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1179*0Sstevel@tonic-gate 				 isALNUM_LC_utf8((U8*)s)))
1180*0Sstevel@tonic-gate 		    {
1181*0Sstevel@tonic-gate 			tmp = !tmp;
1182*0Sstevel@tonic-gate 			if ((norun || regtry(prog, s)))
1183*0Sstevel@tonic-gate 			    goto got_it;
1184*0Sstevel@tonic-gate 		    }
1185*0Sstevel@tonic-gate 		    s += uskip;
1186*0Sstevel@tonic-gate 		}
1187*0Sstevel@tonic-gate 	    }
1188*0Sstevel@tonic-gate 	    else {
1189*0Sstevel@tonic-gate 		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1190*0Sstevel@tonic-gate 		tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1191*0Sstevel@tonic-gate 		while (s < strend) {
1192*0Sstevel@tonic-gate 		    if (tmp ==
1193*0Sstevel@tonic-gate 			!(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1194*0Sstevel@tonic-gate 			tmp = !tmp;
1195*0Sstevel@tonic-gate 			if ((norun || regtry(prog, s)))
1196*0Sstevel@tonic-gate 			    goto got_it;
1197*0Sstevel@tonic-gate 		    }
1198*0Sstevel@tonic-gate 		    s++;
1199*0Sstevel@tonic-gate 		}
1200*0Sstevel@tonic-gate 	    }
1201*0Sstevel@tonic-gate 	    if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
1202*0Sstevel@tonic-gate 		goto got_it;
1203*0Sstevel@tonic-gate 	    break;
1204*0Sstevel@tonic-gate 	case NBOUNDL:
1205*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
1206*0Sstevel@tonic-gate 	    /* FALL THROUGH */
1207*0Sstevel@tonic-gate 	case NBOUND:
1208*0Sstevel@tonic-gate 	    if (do_utf8) {
1209*0Sstevel@tonic-gate 		if (s == PL_bostr)
1210*0Sstevel@tonic-gate 		    tmp = '\n';
1211*0Sstevel@tonic-gate 		else {
1212*0Sstevel@tonic-gate 		    U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1213*0Sstevel@tonic-gate 
1214*0Sstevel@tonic-gate 		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1215*0Sstevel@tonic-gate 		}
1216*0Sstevel@tonic-gate 		tmp = ((OP(c) == NBOUND ?
1217*0Sstevel@tonic-gate 			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1218*0Sstevel@tonic-gate 		LOAD_UTF8_CHARCLASS(alnum,"a");
1219*0Sstevel@tonic-gate 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1220*0Sstevel@tonic-gate 		    if (tmp == !(OP(c) == NBOUND ?
1221*0Sstevel@tonic-gate 				 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1222*0Sstevel@tonic-gate 				 isALNUM_LC_utf8((U8*)s)))
1223*0Sstevel@tonic-gate 			tmp = !tmp;
1224*0Sstevel@tonic-gate 		    else if ((norun || regtry(prog, s)))
1225*0Sstevel@tonic-gate 			goto got_it;
1226*0Sstevel@tonic-gate 		    s += uskip;
1227*0Sstevel@tonic-gate 		}
1228*0Sstevel@tonic-gate 	    }
1229*0Sstevel@tonic-gate 	    else {
1230*0Sstevel@tonic-gate 		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1231*0Sstevel@tonic-gate 		tmp = ((OP(c) == NBOUND ?
1232*0Sstevel@tonic-gate 			isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1233*0Sstevel@tonic-gate 		while (s < strend) {
1234*0Sstevel@tonic-gate 		    if (tmp ==
1235*0Sstevel@tonic-gate 			!(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1236*0Sstevel@tonic-gate 			tmp = !tmp;
1237*0Sstevel@tonic-gate 		    else if ((norun || regtry(prog, s)))
1238*0Sstevel@tonic-gate 			goto got_it;
1239*0Sstevel@tonic-gate 		    s++;
1240*0Sstevel@tonic-gate 		}
1241*0Sstevel@tonic-gate 	    }
1242*0Sstevel@tonic-gate 	    if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1243*0Sstevel@tonic-gate 		goto got_it;
1244*0Sstevel@tonic-gate 	    break;
1245*0Sstevel@tonic-gate 	case ALNUM:
1246*0Sstevel@tonic-gate 	    if (do_utf8) {
1247*0Sstevel@tonic-gate 		LOAD_UTF8_CHARCLASS(alnum,"a");
1248*0Sstevel@tonic-gate 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1249*0Sstevel@tonic-gate 		    if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1250*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1251*0Sstevel@tonic-gate 			    goto got_it;
1252*0Sstevel@tonic-gate 			else
1253*0Sstevel@tonic-gate 			    tmp = doevery;
1254*0Sstevel@tonic-gate 		    }
1255*0Sstevel@tonic-gate 		    else
1256*0Sstevel@tonic-gate 			tmp = 1;
1257*0Sstevel@tonic-gate 		    s += uskip;
1258*0Sstevel@tonic-gate 		}
1259*0Sstevel@tonic-gate 	    }
1260*0Sstevel@tonic-gate 	    else {
1261*0Sstevel@tonic-gate 		while (s < strend) {
1262*0Sstevel@tonic-gate 		    if (isALNUM(*s)) {
1263*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1264*0Sstevel@tonic-gate 			    goto got_it;
1265*0Sstevel@tonic-gate 			else
1266*0Sstevel@tonic-gate 			    tmp = doevery;
1267*0Sstevel@tonic-gate 		    }
1268*0Sstevel@tonic-gate 		    else
1269*0Sstevel@tonic-gate 			tmp = 1;
1270*0Sstevel@tonic-gate 		    s++;
1271*0Sstevel@tonic-gate 		}
1272*0Sstevel@tonic-gate 	    }
1273*0Sstevel@tonic-gate 	    break;
1274*0Sstevel@tonic-gate 	case ALNUML:
1275*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
1276*0Sstevel@tonic-gate 	    if (do_utf8) {
1277*0Sstevel@tonic-gate 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1278*0Sstevel@tonic-gate 		    if (isALNUM_LC_utf8((U8*)s)) {
1279*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1280*0Sstevel@tonic-gate 			    goto got_it;
1281*0Sstevel@tonic-gate 			else
1282*0Sstevel@tonic-gate 			    tmp = doevery;
1283*0Sstevel@tonic-gate 		    }
1284*0Sstevel@tonic-gate 		    else
1285*0Sstevel@tonic-gate 			tmp = 1;
1286*0Sstevel@tonic-gate 		    s += uskip;
1287*0Sstevel@tonic-gate 		}
1288*0Sstevel@tonic-gate 	    }
1289*0Sstevel@tonic-gate 	    else {
1290*0Sstevel@tonic-gate 		while (s < strend) {
1291*0Sstevel@tonic-gate 		    if (isALNUM_LC(*s)) {
1292*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1293*0Sstevel@tonic-gate 			    goto got_it;
1294*0Sstevel@tonic-gate 			else
1295*0Sstevel@tonic-gate 			    tmp = doevery;
1296*0Sstevel@tonic-gate 		    }
1297*0Sstevel@tonic-gate 		    else
1298*0Sstevel@tonic-gate 			tmp = 1;
1299*0Sstevel@tonic-gate 		    s++;
1300*0Sstevel@tonic-gate 		}
1301*0Sstevel@tonic-gate 	    }
1302*0Sstevel@tonic-gate 	    break;
1303*0Sstevel@tonic-gate 	case NALNUM:
1304*0Sstevel@tonic-gate 	    if (do_utf8) {
1305*0Sstevel@tonic-gate 		LOAD_UTF8_CHARCLASS(alnum,"a");
1306*0Sstevel@tonic-gate 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1307*0Sstevel@tonic-gate 		    if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1308*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1309*0Sstevel@tonic-gate 			    goto got_it;
1310*0Sstevel@tonic-gate 			else
1311*0Sstevel@tonic-gate 			    tmp = doevery;
1312*0Sstevel@tonic-gate 		    }
1313*0Sstevel@tonic-gate 		    else
1314*0Sstevel@tonic-gate 			tmp = 1;
1315*0Sstevel@tonic-gate 		    s += uskip;
1316*0Sstevel@tonic-gate 		}
1317*0Sstevel@tonic-gate 	    }
1318*0Sstevel@tonic-gate 	    else {
1319*0Sstevel@tonic-gate 		while (s < strend) {
1320*0Sstevel@tonic-gate 		    if (!isALNUM(*s)) {
1321*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1322*0Sstevel@tonic-gate 			    goto got_it;
1323*0Sstevel@tonic-gate 			else
1324*0Sstevel@tonic-gate 			    tmp = doevery;
1325*0Sstevel@tonic-gate 		    }
1326*0Sstevel@tonic-gate 		    else
1327*0Sstevel@tonic-gate 			tmp = 1;
1328*0Sstevel@tonic-gate 		    s++;
1329*0Sstevel@tonic-gate 		}
1330*0Sstevel@tonic-gate 	    }
1331*0Sstevel@tonic-gate 	    break;
1332*0Sstevel@tonic-gate 	case NALNUML:
1333*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
1334*0Sstevel@tonic-gate 	    if (do_utf8) {
1335*0Sstevel@tonic-gate 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1336*0Sstevel@tonic-gate 		    if (!isALNUM_LC_utf8((U8*)s)) {
1337*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1338*0Sstevel@tonic-gate 			    goto got_it;
1339*0Sstevel@tonic-gate 			else
1340*0Sstevel@tonic-gate 			    tmp = doevery;
1341*0Sstevel@tonic-gate 		    }
1342*0Sstevel@tonic-gate 		    else
1343*0Sstevel@tonic-gate 			tmp = 1;
1344*0Sstevel@tonic-gate 		    s += uskip;
1345*0Sstevel@tonic-gate 		}
1346*0Sstevel@tonic-gate 	    }
1347*0Sstevel@tonic-gate 	    else {
1348*0Sstevel@tonic-gate 		while (s < strend) {
1349*0Sstevel@tonic-gate 		    if (!isALNUM_LC(*s)) {
1350*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1351*0Sstevel@tonic-gate 			    goto got_it;
1352*0Sstevel@tonic-gate 			else
1353*0Sstevel@tonic-gate 			    tmp = doevery;
1354*0Sstevel@tonic-gate 		    }
1355*0Sstevel@tonic-gate 		    else
1356*0Sstevel@tonic-gate 			tmp = 1;
1357*0Sstevel@tonic-gate 		    s++;
1358*0Sstevel@tonic-gate 		}
1359*0Sstevel@tonic-gate 	    }
1360*0Sstevel@tonic-gate 	    break;
1361*0Sstevel@tonic-gate 	case SPACE:
1362*0Sstevel@tonic-gate 	    if (do_utf8) {
1363*0Sstevel@tonic-gate 		LOAD_UTF8_CHARCLASS(space," ");
1364*0Sstevel@tonic-gate 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1365*0Sstevel@tonic-gate 		    if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
1366*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1367*0Sstevel@tonic-gate 			    goto got_it;
1368*0Sstevel@tonic-gate 			else
1369*0Sstevel@tonic-gate 			    tmp = doevery;
1370*0Sstevel@tonic-gate 		    }
1371*0Sstevel@tonic-gate 		    else
1372*0Sstevel@tonic-gate 			tmp = 1;
1373*0Sstevel@tonic-gate 		    s += uskip;
1374*0Sstevel@tonic-gate 		}
1375*0Sstevel@tonic-gate 	    }
1376*0Sstevel@tonic-gate 	    else {
1377*0Sstevel@tonic-gate 		while (s < strend) {
1378*0Sstevel@tonic-gate 		    if (isSPACE(*s)) {
1379*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1380*0Sstevel@tonic-gate 			    goto got_it;
1381*0Sstevel@tonic-gate 			else
1382*0Sstevel@tonic-gate 			    tmp = doevery;
1383*0Sstevel@tonic-gate 		    }
1384*0Sstevel@tonic-gate 		    else
1385*0Sstevel@tonic-gate 			tmp = 1;
1386*0Sstevel@tonic-gate 		    s++;
1387*0Sstevel@tonic-gate 		}
1388*0Sstevel@tonic-gate 	    }
1389*0Sstevel@tonic-gate 	    break;
1390*0Sstevel@tonic-gate 	case SPACEL:
1391*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
1392*0Sstevel@tonic-gate 	    if (do_utf8) {
1393*0Sstevel@tonic-gate 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1394*0Sstevel@tonic-gate 		    if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1395*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1396*0Sstevel@tonic-gate 			    goto got_it;
1397*0Sstevel@tonic-gate 			else
1398*0Sstevel@tonic-gate 			    tmp = doevery;
1399*0Sstevel@tonic-gate 		    }
1400*0Sstevel@tonic-gate 		    else
1401*0Sstevel@tonic-gate 			tmp = 1;
1402*0Sstevel@tonic-gate 		    s += uskip;
1403*0Sstevel@tonic-gate 		}
1404*0Sstevel@tonic-gate 	    }
1405*0Sstevel@tonic-gate 	    else {
1406*0Sstevel@tonic-gate 		while (s < strend) {
1407*0Sstevel@tonic-gate 		    if (isSPACE_LC(*s)) {
1408*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1409*0Sstevel@tonic-gate 			    goto got_it;
1410*0Sstevel@tonic-gate 			else
1411*0Sstevel@tonic-gate 			    tmp = doevery;
1412*0Sstevel@tonic-gate 		    }
1413*0Sstevel@tonic-gate 		    else
1414*0Sstevel@tonic-gate 			tmp = 1;
1415*0Sstevel@tonic-gate 		    s++;
1416*0Sstevel@tonic-gate 		}
1417*0Sstevel@tonic-gate 	    }
1418*0Sstevel@tonic-gate 	    break;
1419*0Sstevel@tonic-gate 	case NSPACE:
1420*0Sstevel@tonic-gate 	    if (do_utf8) {
1421*0Sstevel@tonic-gate 		LOAD_UTF8_CHARCLASS(space," ");
1422*0Sstevel@tonic-gate 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1423*0Sstevel@tonic-gate 		    if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
1424*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1425*0Sstevel@tonic-gate 			    goto got_it;
1426*0Sstevel@tonic-gate 			else
1427*0Sstevel@tonic-gate 			    tmp = doevery;
1428*0Sstevel@tonic-gate 		    }
1429*0Sstevel@tonic-gate 		    else
1430*0Sstevel@tonic-gate 			tmp = 1;
1431*0Sstevel@tonic-gate 		    s += uskip;
1432*0Sstevel@tonic-gate 		}
1433*0Sstevel@tonic-gate 	    }
1434*0Sstevel@tonic-gate 	    else {
1435*0Sstevel@tonic-gate 		while (s < strend) {
1436*0Sstevel@tonic-gate 		    if (!isSPACE(*s)) {
1437*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1438*0Sstevel@tonic-gate 			    goto got_it;
1439*0Sstevel@tonic-gate 			else
1440*0Sstevel@tonic-gate 			    tmp = doevery;
1441*0Sstevel@tonic-gate 		    }
1442*0Sstevel@tonic-gate 		    else
1443*0Sstevel@tonic-gate 			tmp = 1;
1444*0Sstevel@tonic-gate 		    s++;
1445*0Sstevel@tonic-gate 		}
1446*0Sstevel@tonic-gate 	    }
1447*0Sstevel@tonic-gate 	    break;
1448*0Sstevel@tonic-gate 	case NSPACEL:
1449*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
1450*0Sstevel@tonic-gate 	    if (do_utf8) {
1451*0Sstevel@tonic-gate 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1452*0Sstevel@tonic-gate 		    if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1453*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1454*0Sstevel@tonic-gate 			    goto got_it;
1455*0Sstevel@tonic-gate 			else
1456*0Sstevel@tonic-gate 			    tmp = doevery;
1457*0Sstevel@tonic-gate 		    }
1458*0Sstevel@tonic-gate 		    else
1459*0Sstevel@tonic-gate 			tmp = 1;
1460*0Sstevel@tonic-gate 		    s += uskip;
1461*0Sstevel@tonic-gate 		}
1462*0Sstevel@tonic-gate 	    }
1463*0Sstevel@tonic-gate 	    else {
1464*0Sstevel@tonic-gate 		while (s < strend) {
1465*0Sstevel@tonic-gate 		    if (!isSPACE_LC(*s)) {
1466*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1467*0Sstevel@tonic-gate 			    goto got_it;
1468*0Sstevel@tonic-gate 			else
1469*0Sstevel@tonic-gate 			    tmp = doevery;
1470*0Sstevel@tonic-gate 		    }
1471*0Sstevel@tonic-gate 		    else
1472*0Sstevel@tonic-gate 			tmp = 1;
1473*0Sstevel@tonic-gate 		    s++;
1474*0Sstevel@tonic-gate 		}
1475*0Sstevel@tonic-gate 	    }
1476*0Sstevel@tonic-gate 	    break;
1477*0Sstevel@tonic-gate 	case DIGIT:
1478*0Sstevel@tonic-gate 	    if (do_utf8) {
1479*0Sstevel@tonic-gate 		LOAD_UTF8_CHARCLASS(digit,"0");
1480*0Sstevel@tonic-gate 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1481*0Sstevel@tonic-gate 		    if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1482*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1483*0Sstevel@tonic-gate 			    goto got_it;
1484*0Sstevel@tonic-gate 			else
1485*0Sstevel@tonic-gate 			    tmp = doevery;
1486*0Sstevel@tonic-gate 		    }
1487*0Sstevel@tonic-gate 		    else
1488*0Sstevel@tonic-gate 			tmp = 1;
1489*0Sstevel@tonic-gate 		    s += uskip;
1490*0Sstevel@tonic-gate 		}
1491*0Sstevel@tonic-gate 	    }
1492*0Sstevel@tonic-gate 	    else {
1493*0Sstevel@tonic-gate 		while (s < strend) {
1494*0Sstevel@tonic-gate 		    if (isDIGIT(*s)) {
1495*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1496*0Sstevel@tonic-gate 			    goto got_it;
1497*0Sstevel@tonic-gate 			else
1498*0Sstevel@tonic-gate 			    tmp = doevery;
1499*0Sstevel@tonic-gate 		    }
1500*0Sstevel@tonic-gate 		    else
1501*0Sstevel@tonic-gate 			tmp = 1;
1502*0Sstevel@tonic-gate 		    s++;
1503*0Sstevel@tonic-gate 		}
1504*0Sstevel@tonic-gate 	    }
1505*0Sstevel@tonic-gate 	    break;
1506*0Sstevel@tonic-gate 	case DIGITL:
1507*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
1508*0Sstevel@tonic-gate 	    if (do_utf8) {
1509*0Sstevel@tonic-gate 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1510*0Sstevel@tonic-gate 		    if (isDIGIT_LC_utf8((U8*)s)) {
1511*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1512*0Sstevel@tonic-gate 			    goto got_it;
1513*0Sstevel@tonic-gate 			else
1514*0Sstevel@tonic-gate 			    tmp = doevery;
1515*0Sstevel@tonic-gate 		    }
1516*0Sstevel@tonic-gate 		    else
1517*0Sstevel@tonic-gate 			tmp = 1;
1518*0Sstevel@tonic-gate 		    s += uskip;
1519*0Sstevel@tonic-gate 		}
1520*0Sstevel@tonic-gate 	    }
1521*0Sstevel@tonic-gate 	    else {
1522*0Sstevel@tonic-gate 		while (s < strend) {
1523*0Sstevel@tonic-gate 		    if (isDIGIT_LC(*s)) {
1524*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1525*0Sstevel@tonic-gate 			    goto got_it;
1526*0Sstevel@tonic-gate 			else
1527*0Sstevel@tonic-gate 			    tmp = doevery;
1528*0Sstevel@tonic-gate 		    }
1529*0Sstevel@tonic-gate 		    else
1530*0Sstevel@tonic-gate 			tmp = 1;
1531*0Sstevel@tonic-gate 		    s++;
1532*0Sstevel@tonic-gate 		}
1533*0Sstevel@tonic-gate 	    }
1534*0Sstevel@tonic-gate 	    break;
1535*0Sstevel@tonic-gate 	case NDIGIT:
1536*0Sstevel@tonic-gate 	    if (do_utf8) {
1537*0Sstevel@tonic-gate 		LOAD_UTF8_CHARCLASS(digit,"0");
1538*0Sstevel@tonic-gate 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1539*0Sstevel@tonic-gate 		    if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1540*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1541*0Sstevel@tonic-gate 			    goto got_it;
1542*0Sstevel@tonic-gate 			else
1543*0Sstevel@tonic-gate 			    tmp = doevery;
1544*0Sstevel@tonic-gate 		    }
1545*0Sstevel@tonic-gate 		    else
1546*0Sstevel@tonic-gate 			tmp = 1;
1547*0Sstevel@tonic-gate 		    s += uskip;
1548*0Sstevel@tonic-gate 		}
1549*0Sstevel@tonic-gate 	    }
1550*0Sstevel@tonic-gate 	    else {
1551*0Sstevel@tonic-gate 		while (s < strend) {
1552*0Sstevel@tonic-gate 		    if (!isDIGIT(*s)) {
1553*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1554*0Sstevel@tonic-gate 			    goto got_it;
1555*0Sstevel@tonic-gate 			else
1556*0Sstevel@tonic-gate 			    tmp = doevery;
1557*0Sstevel@tonic-gate 		    }
1558*0Sstevel@tonic-gate 		    else
1559*0Sstevel@tonic-gate 			tmp = 1;
1560*0Sstevel@tonic-gate 		    s++;
1561*0Sstevel@tonic-gate 		}
1562*0Sstevel@tonic-gate 	    }
1563*0Sstevel@tonic-gate 	    break;
1564*0Sstevel@tonic-gate 	case NDIGITL:
1565*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
1566*0Sstevel@tonic-gate 	    if (do_utf8) {
1567*0Sstevel@tonic-gate 		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1568*0Sstevel@tonic-gate 		    if (!isDIGIT_LC_utf8((U8*)s)) {
1569*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1570*0Sstevel@tonic-gate 			    goto got_it;
1571*0Sstevel@tonic-gate 			else
1572*0Sstevel@tonic-gate 			    tmp = doevery;
1573*0Sstevel@tonic-gate 		    }
1574*0Sstevel@tonic-gate 		    else
1575*0Sstevel@tonic-gate 			tmp = 1;
1576*0Sstevel@tonic-gate 		    s += uskip;
1577*0Sstevel@tonic-gate 		}
1578*0Sstevel@tonic-gate 	    }
1579*0Sstevel@tonic-gate 	    else {
1580*0Sstevel@tonic-gate 		while (s < strend) {
1581*0Sstevel@tonic-gate 		    if (!isDIGIT_LC(*s)) {
1582*0Sstevel@tonic-gate 			if (tmp && (norun || regtry(prog, s)))
1583*0Sstevel@tonic-gate 			    goto got_it;
1584*0Sstevel@tonic-gate 			else
1585*0Sstevel@tonic-gate 			    tmp = doevery;
1586*0Sstevel@tonic-gate 		    }
1587*0Sstevel@tonic-gate 		    else
1588*0Sstevel@tonic-gate 			tmp = 1;
1589*0Sstevel@tonic-gate 		    s++;
1590*0Sstevel@tonic-gate 		}
1591*0Sstevel@tonic-gate 	    }
1592*0Sstevel@tonic-gate 	    break;
1593*0Sstevel@tonic-gate 	default:
1594*0Sstevel@tonic-gate 	    Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1595*0Sstevel@tonic-gate 	    break;
1596*0Sstevel@tonic-gate 	}
1597*0Sstevel@tonic-gate 	return 0;
1598*0Sstevel@tonic-gate       got_it:
1599*0Sstevel@tonic-gate 	return s;
1600*0Sstevel@tonic-gate }
1601*0Sstevel@tonic-gate 
1602*0Sstevel@tonic-gate /*
1603*0Sstevel@tonic-gate  - regexec_flags - match a regexp against a string
1604*0Sstevel@tonic-gate  */
1605*0Sstevel@tonic-gate I32
Perl_regexec_flags(pTHX_ register regexp * prog,char * stringarg,register char * strend,char * strbeg,I32 minend,SV * sv,void * data,U32 flags)1606*0Sstevel@tonic-gate Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1607*0Sstevel@tonic-gate 	      char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1608*0Sstevel@tonic-gate /* strend: pointer to null at end of string */
1609*0Sstevel@tonic-gate /* strbeg: real beginning of string */
1610*0Sstevel@tonic-gate /* minend: end of match must be >=minend after stringarg. */
1611*0Sstevel@tonic-gate /* data: May be used for some additional optimizations. */
1612*0Sstevel@tonic-gate /* nosave: For optimizations. */
1613*0Sstevel@tonic-gate {
1614*0Sstevel@tonic-gate     register char *s;
1615*0Sstevel@tonic-gate     register regnode *c;
1616*0Sstevel@tonic-gate     register char *startpos = stringarg;
1617*0Sstevel@tonic-gate     I32 minlen;		/* must match at least this many chars */
1618*0Sstevel@tonic-gate     I32 dontbother = 0;	/* how many characters not to try at end */
1619*0Sstevel@tonic-gate     /* I32 start_shift = 0; */		/* Offset of the start to find
1620*0Sstevel@tonic-gate 					 constant substr. */		/* CC */
1621*0Sstevel@tonic-gate     I32 end_shift = 0;			/* Same for the end. */		/* CC */
1622*0Sstevel@tonic-gate     I32 scream_pos = -1;		/* Internal iterator of scream. */
1623*0Sstevel@tonic-gate     char *scream_olds;
1624*0Sstevel@tonic-gate     SV* oreplsv = GvSV(PL_replgv);
1625*0Sstevel@tonic-gate     bool do_utf8 = DO_UTF8(sv);
1626*0Sstevel@tonic-gate #ifdef DEBUGGING
1627*0Sstevel@tonic-gate     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1628*0Sstevel@tonic-gate     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
1629*0Sstevel@tonic-gate #endif
1630*0Sstevel@tonic-gate     RX_MATCH_UTF8_set(prog,do_utf8);
1631*0Sstevel@tonic-gate 
1632*0Sstevel@tonic-gate     PL_regcc = 0;
1633*0Sstevel@tonic-gate 
1634*0Sstevel@tonic-gate     cache_re(prog);
1635*0Sstevel@tonic-gate #ifdef DEBUGGING
1636*0Sstevel@tonic-gate     PL_regnarrate = DEBUG_r_TEST;
1637*0Sstevel@tonic-gate #endif
1638*0Sstevel@tonic-gate 
1639*0Sstevel@tonic-gate     /* Be paranoid... */
1640*0Sstevel@tonic-gate     if (prog == NULL || startpos == NULL) {
1641*0Sstevel@tonic-gate 	Perl_croak(aTHX_ "NULL regexp parameter");
1642*0Sstevel@tonic-gate 	return 0;
1643*0Sstevel@tonic-gate     }
1644*0Sstevel@tonic-gate 
1645*0Sstevel@tonic-gate     minlen = prog->minlen;
1646*0Sstevel@tonic-gate     if (strend - startpos < minlen) {
1647*0Sstevel@tonic-gate         DEBUG_r(PerlIO_printf(Perl_debug_log,
1648*0Sstevel@tonic-gate 			      "String too short [regexec_flags]...\n"));
1649*0Sstevel@tonic-gate 	goto phooey;
1650*0Sstevel@tonic-gate     }
1651*0Sstevel@tonic-gate 
1652*0Sstevel@tonic-gate     /* Check validity of program. */
1653*0Sstevel@tonic-gate     if (UCHARAT(prog->program) != REG_MAGIC) {
1654*0Sstevel@tonic-gate 	Perl_croak(aTHX_ "corrupted regexp program");
1655*0Sstevel@tonic-gate     }
1656*0Sstevel@tonic-gate 
1657*0Sstevel@tonic-gate     PL_reg_flags = 0;
1658*0Sstevel@tonic-gate     PL_reg_eval_set = 0;
1659*0Sstevel@tonic-gate     PL_reg_maxiter = 0;
1660*0Sstevel@tonic-gate 
1661*0Sstevel@tonic-gate     if (prog->reganch & ROPT_UTF8)
1662*0Sstevel@tonic-gate 	PL_reg_flags |= RF_utf8;
1663*0Sstevel@tonic-gate 
1664*0Sstevel@tonic-gate     /* Mark beginning of line for ^ and lookbehind. */
1665*0Sstevel@tonic-gate     PL_regbol = startpos;
1666*0Sstevel@tonic-gate     PL_bostr  = strbeg;
1667*0Sstevel@tonic-gate     PL_reg_sv = sv;
1668*0Sstevel@tonic-gate 
1669*0Sstevel@tonic-gate     /* Mark end of line for $ (and such) */
1670*0Sstevel@tonic-gate     PL_regeol = strend;
1671*0Sstevel@tonic-gate 
1672*0Sstevel@tonic-gate     /* see how far we have to get to not match where we matched before */
1673*0Sstevel@tonic-gate     PL_regtill = startpos+minend;
1674*0Sstevel@tonic-gate 
1675*0Sstevel@tonic-gate     /* We start without call_cc context.  */
1676*0Sstevel@tonic-gate     PL_reg_call_cc = 0;
1677*0Sstevel@tonic-gate 
1678*0Sstevel@tonic-gate     /* If there is a "must appear" string, look for it. */
1679*0Sstevel@tonic-gate     s = startpos;
1680*0Sstevel@tonic-gate 
1681*0Sstevel@tonic-gate     if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1682*0Sstevel@tonic-gate 	MAGIC *mg;
1683*0Sstevel@tonic-gate 
1684*0Sstevel@tonic-gate 	if (flags & REXEC_IGNOREPOS)	/* Means: check only at start */
1685*0Sstevel@tonic-gate 	    PL_reg_ganch = startpos;
1686*0Sstevel@tonic-gate 	else if (sv && SvTYPE(sv) >= SVt_PVMG
1687*0Sstevel@tonic-gate 		  && SvMAGIC(sv)
1688*0Sstevel@tonic-gate 		  && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1689*0Sstevel@tonic-gate 		  && mg->mg_len >= 0) {
1690*0Sstevel@tonic-gate 	    PL_reg_ganch = strbeg + mg->mg_len;	/* Defined pos() */
1691*0Sstevel@tonic-gate 	    if (prog->reganch & ROPT_ANCH_GPOS) {
1692*0Sstevel@tonic-gate 	        if (s > PL_reg_ganch)
1693*0Sstevel@tonic-gate 		    goto phooey;
1694*0Sstevel@tonic-gate 		s = PL_reg_ganch;
1695*0Sstevel@tonic-gate 	    }
1696*0Sstevel@tonic-gate 	}
1697*0Sstevel@tonic-gate 	else				/* pos() not defined */
1698*0Sstevel@tonic-gate 	    PL_reg_ganch = strbeg;
1699*0Sstevel@tonic-gate     }
1700*0Sstevel@tonic-gate 
1701*0Sstevel@tonic-gate     if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
1702*0Sstevel@tonic-gate 	re_scream_pos_data d;
1703*0Sstevel@tonic-gate 
1704*0Sstevel@tonic-gate 	d.scream_olds = &scream_olds;
1705*0Sstevel@tonic-gate 	d.scream_pos = &scream_pos;
1706*0Sstevel@tonic-gate 	s = re_intuit_start(prog, sv, s, strend, flags, &d);
1707*0Sstevel@tonic-gate 	if (!s) {
1708*0Sstevel@tonic-gate 	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1709*0Sstevel@tonic-gate 	    goto phooey;	/* not present */
1710*0Sstevel@tonic-gate 	}
1711*0Sstevel@tonic-gate     }
1712*0Sstevel@tonic-gate 
1713*0Sstevel@tonic-gate     DEBUG_r({
1714*0Sstevel@tonic-gate 	 char *s0   = UTF ?
1715*0Sstevel@tonic-gate 	   pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1716*0Sstevel@tonic-gate 			  UNI_DISPLAY_REGEX) :
1717*0Sstevel@tonic-gate 	   prog->precomp;
1718*0Sstevel@tonic-gate 	 int   len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1719*0Sstevel@tonic-gate 	 char *s1   = do_utf8 ? sv_uni_display(dsv1, sv, 60,
1720*0Sstevel@tonic-gate 					       UNI_DISPLAY_REGEX) : startpos;
1721*0Sstevel@tonic-gate 	 int   len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
1722*0Sstevel@tonic-gate 	 if (!PL_colorset)
1723*0Sstevel@tonic-gate 	     reginitcolors();
1724*0Sstevel@tonic-gate 	 PerlIO_printf(Perl_debug_log,
1725*0Sstevel@tonic-gate 		       "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
1726*0Sstevel@tonic-gate 		       PL_colors[4],PL_colors[5],PL_colors[0],
1727*0Sstevel@tonic-gate 		       len0, len0, s0,
1728*0Sstevel@tonic-gate 		       PL_colors[1],
1729*0Sstevel@tonic-gate 		       len0 > 60 ? "..." : "",
1730*0Sstevel@tonic-gate 		       PL_colors[0],
1731*0Sstevel@tonic-gate 		       (int)(len1 > 60 ? 60 : len1),
1732*0Sstevel@tonic-gate 		       s1, PL_colors[1],
1733*0Sstevel@tonic-gate 		       (len1 > 60 ? "..." : "")
1734*0Sstevel@tonic-gate 	      );
1735*0Sstevel@tonic-gate     });
1736*0Sstevel@tonic-gate 
1737*0Sstevel@tonic-gate     /* Simplest case:  anchored match need be tried only once. */
1738*0Sstevel@tonic-gate     /*  [unless only anchor is BOL and multiline is set] */
1739*0Sstevel@tonic-gate     if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1740*0Sstevel@tonic-gate 	if (s == startpos && regtry(prog, startpos))
1741*0Sstevel@tonic-gate 	    goto got_it;
1742*0Sstevel@tonic-gate 	else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1743*0Sstevel@tonic-gate 		 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1744*0Sstevel@tonic-gate 	{
1745*0Sstevel@tonic-gate 	    char *end;
1746*0Sstevel@tonic-gate 
1747*0Sstevel@tonic-gate 	    if (minlen)
1748*0Sstevel@tonic-gate 		dontbother = minlen - 1;
1749*0Sstevel@tonic-gate 	    end = HOP3c(strend, -dontbother, strbeg) - 1;
1750*0Sstevel@tonic-gate 	    /* for multiline we only have to try after newlines */
1751*0Sstevel@tonic-gate 	    if (prog->check_substr || prog->check_utf8) {
1752*0Sstevel@tonic-gate 		if (s == startpos)
1753*0Sstevel@tonic-gate 		    goto after_try;
1754*0Sstevel@tonic-gate 		while (1) {
1755*0Sstevel@tonic-gate 		    if (regtry(prog, s))
1756*0Sstevel@tonic-gate 			goto got_it;
1757*0Sstevel@tonic-gate 		  after_try:
1758*0Sstevel@tonic-gate 		    if (s >= end)
1759*0Sstevel@tonic-gate 			goto phooey;
1760*0Sstevel@tonic-gate 		    if (prog->reganch & RE_USE_INTUIT) {
1761*0Sstevel@tonic-gate 			s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1762*0Sstevel@tonic-gate 			if (!s)
1763*0Sstevel@tonic-gate 			    goto phooey;
1764*0Sstevel@tonic-gate 		    }
1765*0Sstevel@tonic-gate 		    else
1766*0Sstevel@tonic-gate 			s++;
1767*0Sstevel@tonic-gate 		}
1768*0Sstevel@tonic-gate 	    } else {
1769*0Sstevel@tonic-gate 		if (s > startpos)
1770*0Sstevel@tonic-gate 		    s--;
1771*0Sstevel@tonic-gate 		while (s < end) {
1772*0Sstevel@tonic-gate 		    if (*s++ == '\n') {	/* don't need PL_utf8skip here */
1773*0Sstevel@tonic-gate 			if (regtry(prog, s))
1774*0Sstevel@tonic-gate 			    goto got_it;
1775*0Sstevel@tonic-gate 		    }
1776*0Sstevel@tonic-gate 		}
1777*0Sstevel@tonic-gate 	    }
1778*0Sstevel@tonic-gate 	}
1779*0Sstevel@tonic-gate 	goto phooey;
1780*0Sstevel@tonic-gate     } else if (prog->reganch & ROPT_ANCH_GPOS) {
1781*0Sstevel@tonic-gate 	if (regtry(prog, PL_reg_ganch))
1782*0Sstevel@tonic-gate 	    goto got_it;
1783*0Sstevel@tonic-gate 	goto phooey;
1784*0Sstevel@tonic-gate     }
1785*0Sstevel@tonic-gate 
1786*0Sstevel@tonic-gate     /* Messy cases:  unanchored match. */
1787*0Sstevel@tonic-gate     if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
1788*0Sstevel@tonic-gate 	/* we have /x+whatever/ */
1789*0Sstevel@tonic-gate 	/* it must be a one character string (XXXX Except UTF?) */
1790*0Sstevel@tonic-gate 	char ch;
1791*0Sstevel@tonic-gate #ifdef DEBUGGING
1792*0Sstevel@tonic-gate 	int did_match = 0;
1793*0Sstevel@tonic-gate #endif
1794*0Sstevel@tonic-gate 	if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1795*0Sstevel@tonic-gate 	    do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1796*0Sstevel@tonic-gate 	ch = SvPVX(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1797*0Sstevel@tonic-gate 
1798*0Sstevel@tonic-gate 	if (do_utf8) {
1799*0Sstevel@tonic-gate 	    while (s < strend) {
1800*0Sstevel@tonic-gate 		if (*s == ch) {
1801*0Sstevel@tonic-gate 		    DEBUG_r( did_match = 1 );
1802*0Sstevel@tonic-gate 		    if (regtry(prog, s)) goto got_it;
1803*0Sstevel@tonic-gate 		    s += UTF8SKIP(s);
1804*0Sstevel@tonic-gate 		    while (s < strend && *s == ch)
1805*0Sstevel@tonic-gate 			s += UTF8SKIP(s);
1806*0Sstevel@tonic-gate 		}
1807*0Sstevel@tonic-gate 		s += UTF8SKIP(s);
1808*0Sstevel@tonic-gate 	    }
1809*0Sstevel@tonic-gate 	}
1810*0Sstevel@tonic-gate 	else {
1811*0Sstevel@tonic-gate 	    while (s < strend) {
1812*0Sstevel@tonic-gate 		if (*s == ch) {
1813*0Sstevel@tonic-gate 		    DEBUG_r( did_match = 1 );
1814*0Sstevel@tonic-gate 		    if (regtry(prog, s)) goto got_it;
1815*0Sstevel@tonic-gate 		    s++;
1816*0Sstevel@tonic-gate 		    while (s < strend && *s == ch)
1817*0Sstevel@tonic-gate 			s++;
1818*0Sstevel@tonic-gate 		}
1819*0Sstevel@tonic-gate 		s++;
1820*0Sstevel@tonic-gate 	    }
1821*0Sstevel@tonic-gate 	}
1822*0Sstevel@tonic-gate 	DEBUG_r(if (!did_match)
1823*0Sstevel@tonic-gate 		PerlIO_printf(Perl_debug_log,
1824*0Sstevel@tonic-gate                                   "Did not find anchored character...\n")
1825*0Sstevel@tonic-gate                );
1826*0Sstevel@tonic-gate     }
1827*0Sstevel@tonic-gate     /*SUPPRESS 560*/
1828*0Sstevel@tonic-gate     else if (prog->anchored_substr != Nullsv
1829*0Sstevel@tonic-gate 	      || prog->anchored_utf8 != Nullsv
1830*0Sstevel@tonic-gate 	      || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
1831*0Sstevel@tonic-gate 		  && prog->float_max_offset < strend - s)) {
1832*0Sstevel@tonic-gate 	SV *must;
1833*0Sstevel@tonic-gate 	I32 back_max;
1834*0Sstevel@tonic-gate 	I32 back_min;
1835*0Sstevel@tonic-gate 	char *last;
1836*0Sstevel@tonic-gate 	char *last1;		/* Last position checked before */
1837*0Sstevel@tonic-gate #ifdef DEBUGGING
1838*0Sstevel@tonic-gate 	int did_match = 0;
1839*0Sstevel@tonic-gate #endif
1840*0Sstevel@tonic-gate 	if (prog->anchored_substr || prog->anchored_utf8) {
1841*0Sstevel@tonic-gate 	    if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1842*0Sstevel@tonic-gate 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1843*0Sstevel@tonic-gate 	    must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1844*0Sstevel@tonic-gate 	    back_max = back_min = prog->anchored_offset;
1845*0Sstevel@tonic-gate 	} else {
1846*0Sstevel@tonic-gate 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1847*0Sstevel@tonic-gate 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1848*0Sstevel@tonic-gate 	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1849*0Sstevel@tonic-gate 	    back_max = prog->float_max_offset;
1850*0Sstevel@tonic-gate 	    back_min = prog->float_min_offset;
1851*0Sstevel@tonic-gate 	}
1852*0Sstevel@tonic-gate 	if (must == &PL_sv_undef)
1853*0Sstevel@tonic-gate 	    /* could not downgrade utf8 check substring, so must fail */
1854*0Sstevel@tonic-gate 	    goto phooey;
1855*0Sstevel@tonic-gate 
1856*0Sstevel@tonic-gate 	last = HOP3c(strend,	/* Cannot start after this */
1857*0Sstevel@tonic-gate 			  -(I32)(CHR_SVLEN(must)
1858*0Sstevel@tonic-gate 				 - (SvTAIL(must) != 0) + back_min), strbeg);
1859*0Sstevel@tonic-gate 
1860*0Sstevel@tonic-gate 	if (s > PL_bostr)
1861*0Sstevel@tonic-gate 	    last1 = HOPc(s, -1);
1862*0Sstevel@tonic-gate 	else
1863*0Sstevel@tonic-gate 	    last1 = s - 1;	/* bogus */
1864*0Sstevel@tonic-gate 
1865*0Sstevel@tonic-gate 	/* XXXX check_substr already used to find `s', can optimize if
1866*0Sstevel@tonic-gate 	   check_substr==must. */
1867*0Sstevel@tonic-gate 	scream_pos = -1;
1868*0Sstevel@tonic-gate 	dontbother = end_shift;
1869*0Sstevel@tonic-gate 	strend = HOPc(strend, -dontbother);
1870*0Sstevel@tonic-gate 	while ( (s <= last) &&
1871*0Sstevel@tonic-gate 		((flags & REXEC_SCREAM)
1872*0Sstevel@tonic-gate 		 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
1873*0Sstevel@tonic-gate 				    end_shift, &scream_pos, 0))
1874*0Sstevel@tonic-gate 		 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
1875*0Sstevel@tonic-gate 				  (unsigned char*)strend, must,
1876*0Sstevel@tonic-gate 				  PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1877*0Sstevel@tonic-gate 	    /* we may be pointing at the wrong string */
1878*0Sstevel@tonic-gate 	    if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1879*0Sstevel@tonic-gate 		s = strbeg + (s - SvPVX(sv));
1880*0Sstevel@tonic-gate 	    DEBUG_r( did_match = 1 );
1881*0Sstevel@tonic-gate 	    if (HOPc(s, -back_max) > last1) {
1882*0Sstevel@tonic-gate 		last1 = HOPc(s, -back_min);
1883*0Sstevel@tonic-gate 		s = HOPc(s, -back_max);
1884*0Sstevel@tonic-gate 	    }
1885*0Sstevel@tonic-gate 	    else {
1886*0Sstevel@tonic-gate 		char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1887*0Sstevel@tonic-gate 
1888*0Sstevel@tonic-gate 		last1 = HOPc(s, -back_min);
1889*0Sstevel@tonic-gate 		s = t;
1890*0Sstevel@tonic-gate 	    }
1891*0Sstevel@tonic-gate 	    if (do_utf8) {
1892*0Sstevel@tonic-gate 		while (s <= last1) {
1893*0Sstevel@tonic-gate 		    if (regtry(prog, s))
1894*0Sstevel@tonic-gate 			goto got_it;
1895*0Sstevel@tonic-gate 		    s += UTF8SKIP(s);
1896*0Sstevel@tonic-gate 		}
1897*0Sstevel@tonic-gate 	    }
1898*0Sstevel@tonic-gate 	    else {
1899*0Sstevel@tonic-gate 		while (s <= last1) {
1900*0Sstevel@tonic-gate 		    if (regtry(prog, s))
1901*0Sstevel@tonic-gate 			goto got_it;
1902*0Sstevel@tonic-gate 		    s++;
1903*0Sstevel@tonic-gate 		}
1904*0Sstevel@tonic-gate 	    }
1905*0Sstevel@tonic-gate 	}
1906*0Sstevel@tonic-gate 	DEBUG_r(if (!did_match)
1907*0Sstevel@tonic-gate                     PerlIO_printf(Perl_debug_log,
1908*0Sstevel@tonic-gate                                   "Did not find %s substr `%s%.*s%s'%s...\n",
1909*0Sstevel@tonic-gate 			      ((must == prog->anchored_substr || must == prog->anchored_utf8)
1910*0Sstevel@tonic-gate 			       ? "anchored" : "floating"),
1911*0Sstevel@tonic-gate 			      PL_colors[0],
1912*0Sstevel@tonic-gate 			      (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1913*0Sstevel@tonic-gate 			      SvPVX(must),
1914*0Sstevel@tonic-gate                                   PL_colors[1], (SvTAIL(must) ? "$" : ""))
1915*0Sstevel@tonic-gate                );
1916*0Sstevel@tonic-gate 	goto phooey;
1917*0Sstevel@tonic-gate     }
1918*0Sstevel@tonic-gate     else if ((c = prog->regstclass)) {
1919*0Sstevel@tonic-gate 	if (minlen) {
1920*0Sstevel@tonic-gate 	    I32 op = (U8)OP(prog->regstclass);
1921*0Sstevel@tonic-gate 	    /* don't bother with what can't match */
1922*0Sstevel@tonic-gate 	    if (PL_regkind[op] != EXACT && op != CANY)
1923*0Sstevel@tonic-gate 	        strend = HOPc(strend, -(minlen - 1));
1924*0Sstevel@tonic-gate 	}
1925*0Sstevel@tonic-gate 	DEBUG_r({
1926*0Sstevel@tonic-gate 	    SV *prop = sv_newmortal();
1927*0Sstevel@tonic-gate 	    char *s0;
1928*0Sstevel@tonic-gate 	    char *s1;
1929*0Sstevel@tonic-gate 	    int len0;
1930*0Sstevel@tonic-gate 	    int len1;
1931*0Sstevel@tonic-gate 
1932*0Sstevel@tonic-gate 	    regprop(prop, c);
1933*0Sstevel@tonic-gate 	    s0 = UTF ?
1934*0Sstevel@tonic-gate 	      pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
1935*0Sstevel@tonic-gate 			     UNI_DISPLAY_REGEX) :
1936*0Sstevel@tonic-gate 	      SvPVX(prop);
1937*0Sstevel@tonic-gate 	    len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1938*0Sstevel@tonic-gate 	    s1 = UTF ?
1939*0Sstevel@tonic-gate 	      sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
1940*0Sstevel@tonic-gate 	    len1 = UTF ? SvCUR(dsv1) : strend - s;
1941*0Sstevel@tonic-gate 	    PerlIO_printf(Perl_debug_log,
1942*0Sstevel@tonic-gate 			  "Matching stclass `%*.*s' against `%*.*s'\n",
1943*0Sstevel@tonic-gate 			  len0, len0, s0,
1944*0Sstevel@tonic-gate 			  len1, len1, s1);
1945*0Sstevel@tonic-gate 	});
1946*0Sstevel@tonic-gate   	if (find_byclass(prog, c, s, strend, startpos, 0))
1947*0Sstevel@tonic-gate 	    goto got_it;
1948*0Sstevel@tonic-gate 	DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1949*0Sstevel@tonic-gate     }
1950*0Sstevel@tonic-gate     else {
1951*0Sstevel@tonic-gate 	dontbother = 0;
1952*0Sstevel@tonic-gate 	if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
1953*0Sstevel@tonic-gate 	    /* Trim the end. */
1954*0Sstevel@tonic-gate 	    char *last;
1955*0Sstevel@tonic-gate 	    SV* float_real;
1956*0Sstevel@tonic-gate 
1957*0Sstevel@tonic-gate 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1958*0Sstevel@tonic-gate 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1959*0Sstevel@tonic-gate 	    float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
1960*0Sstevel@tonic-gate 
1961*0Sstevel@tonic-gate 	    if (flags & REXEC_SCREAM) {
1962*0Sstevel@tonic-gate 		last = screaminstr(sv, float_real, s - strbeg,
1963*0Sstevel@tonic-gate 				   end_shift, &scream_pos, 1); /* last one */
1964*0Sstevel@tonic-gate 		if (!last)
1965*0Sstevel@tonic-gate 		    last = scream_olds; /* Only one occurrence. */
1966*0Sstevel@tonic-gate 		/* we may be pointing at the wrong string */
1967*0Sstevel@tonic-gate 		else if (RX_MATCH_COPIED(prog))
1968*0Sstevel@tonic-gate 		    s = strbeg + (s - SvPVX(sv));
1969*0Sstevel@tonic-gate 	    }
1970*0Sstevel@tonic-gate 	    else {
1971*0Sstevel@tonic-gate 		STRLEN len;
1972*0Sstevel@tonic-gate 		char *little = SvPV(float_real, len);
1973*0Sstevel@tonic-gate 
1974*0Sstevel@tonic-gate 		if (SvTAIL(float_real)) {
1975*0Sstevel@tonic-gate 		    if (memEQ(strend - len + 1, little, len - 1))
1976*0Sstevel@tonic-gate 			last = strend - len + 1;
1977*0Sstevel@tonic-gate 		    else if (!PL_multiline)
1978*0Sstevel@tonic-gate 			last = memEQ(strend - len, little, len)
1979*0Sstevel@tonic-gate 			    ? strend - len : Nullch;
1980*0Sstevel@tonic-gate 		    else
1981*0Sstevel@tonic-gate 			goto find_last;
1982*0Sstevel@tonic-gate 		} else {
1983*0Sstevel@tonic-gate 		  find_last:
1984*0Sstevel@tonic-gate 		    if (len)
1985*0Sstevel@tonic-gate 			last = rninstr(s, strend, little, little + len);
1986*0Sstevel@tonic-gate 		    else
1987*0Sstevel@tonic-gate 			last = strend;	/* matching `$' */
1988*0Sstevel@tonic-gate 		}
1989*0Sstevel@tonic-gate 	    }
1990*0Sstevel@tonic-gate 	    if (last == NULL) {
1991*0Sstevel@tonic-gate 		DEBUG_r(PerlIO_printf(Perl_debug_log,
1992*0Sstevel@tonic-gate 				      "%sCan't trim the tail, match fails (should not happen)%s\n",
1993*0Sstevel@tonic-gate 				      PL_colors[4],PL_colors[5]));
1994*0Sstevel@tonic-gate 		goto phooey; /* Should not happen! */
1995*0Sstevel@tonic-gate 	    }
1996*0Sstevel@tonic-gate 	    dontbother = strend - last + prog->float_min_offset;
1997*0Sstevel@tonic-gate 	}
1998*0Sstevel@tonic-gate 	if (minlen && (dontbother < minlen))
1999*0Sstevel@tonic-gate 	    dontbother = minlen - 1;
2000*0Sstevel@tonic-gate 	strend -= dontbother; 		   /* this one's always in bytes! */
2001*0Sstevel@tonic-gate 	/* We don't know much -- general case. */
2002*0Sstevel@tonic-gate 	if (do_utf8) {
2003*0Sstevel@tonic-gate 	    for (;;) {
2004*0Sstevel@tonic-gate 		if (regtry(prog, s))
2005*0Sstevel@tonic-gate 		    goto got_it;
2006*0Sstevel@tonic-gate 		if (s >= strend)
2007*0Sstevel@tonic-gate 		    break;
2008*0Sstevel@tonic-gate 		s += UTF8SKIP(s);
2009*0Sstevel@tonic-gate 	    };
2010*0Sstevel@tonic-gate 	}
2011*0Sstevel@tonic-gate 	else {
2012*0Sstevel@tonic-gate 	    do {
2013*0Sstevel@tonic-gate 		if (regtry(prog, s))
2014*0Sstevel@tonic-gate 		    goto got_it;
2015*0Sstevel@tonic-gate 	    } while (s++ < strend);
2016*0Sstevel@tonic-gate 	}
2017*0Sstevel@tonic-gate     }
2018*0Sstevel@tonic-gate 
2019*0Sstevel@tonic-gate     /* Failure. */
2020*0Sstevel@tonic-gate     goto phooey;
2021*0Sstevel@tonic-gate 
2022*0Sstevel@tonic-gate got_it:
2023*0Sstevel@tonic-gate     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2024*0Sstevel@tonic-gate 
2025*0Sstevel@tonic-gate     if (PL_reg_eval_set) {
2026*0Sstevel@tonic-gate 	/* Preserve the current value of $^R */
2027*0Sstevel@tonic-gate 	if (oreplsv != GvSV(PL_replgv))
2028*0Sstevel@tonic-gate 	    sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2029*0Sstevel@tonic-gate 						  restored, the value remains
2030*0Sstevel@tonic-gate 						  the same. */
2031*0Sstevel@tonic-gate 	restore_pos(aTHX_ 0);
2032*0Sstevel@tonic-gate     }
2033*0Sstevel@tonic-gate 
2034*0Sstevel@tonic-gate     /* make sure $`, $&, $', and $digit will work later */
2035*0Sstevel@tonic-gate     if ( !(flags & REXEC_NOT_FIRST) ) {
2036*0Sstevel@tonic-gate 	if (RX_MATCH_COPIED(prog)) {
2037*0Sstevel@tonic-gate 	    Safefree(prog->subbeg);
2038*0Sstevel@tonic-gate 	    RX_MATCH_COPIED_off(prog);
2039*0Sstevel@tonic-gate 	}
2040*0Sstevel@tonic-gate 	if (flags & REXEC_COPY_STR) {
2041*0Sstevel@tonic-gate 	    I32 i = PL_regeol - startpos + (stringarg - strbeg);
2042*0Sstevel@tonic-gate 
2043*0Sstevel@tonic-gate 	    s = savepvn(strbeg, i);
2044*0Sstevel@tonic-gate 	    prog->subbeg = s;
2045*0Sstevel@tonic-gate 	    prog->sublen = i;
2046*0Sstevel@tonic-gate 	    RX_MATCH_COPIED_on(prog);
2047*0Sstevel@tonic-gate 	}
2048*0Sstevel@tonic-gate 	else {
2049*0Sstevel@tonic-gate 	    prog->subbeg = strbeg;
2050*0Sstevel@tonic-gate 	    prog->sublen = PL_regeol - strbeg;	/* strend may have been modified */
2051*0Sstevel@tonic-gate 	}
2052*0Sstevel@tonic-gate     }
2053*0Sstevel@tonic-gate 
2054*0Sstevel@tonic-gate     return 1;
2055*0Sstevel@tonic-gate 
2056*0Sstevel@tonic-gate phooey:
2057*0Sstevel@tonic-gate     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2058*0Sstevel@tonic-gate 			  PL_colors[4],PL_colors[5]));
2059*0Sstevel@tonic-gate     if (PL_reg_eval_set)
2060*0Sstevel@tonic-gate 	restore_pos(aTHX_ 0);
2061*0Sstevel@tonic-gate     return 0;
2062*0Sstevel@tonic-gate }
2063*0Sstevel@tonic-gate 
2064*0Sstevel@tonic-gate /*
2065*0Sstevel@tonic-gate  - regtry - try match at specific point
2066*0Sstevel@tonic-gate  */
2067*0Sstevel@tonic-gate STATIC I32			/* 0 failure, 1 success */
S_regtry(pTHX_ regexp * prog,char * startpos)2068*0Sstevel@tonic-gate S_regtry(pTHX_ regexp *prog, char *startpos)
2069*0Sstevel@tonic-gate {
2070*0Sstevel@tonic-gate     register I32 i;
2071*0Sstevel@tonic-gate     register I32 *sp;
2072*0Sstevel@tonic-gate     register I32 *ep;
2073*0Sstevel@tonic-gate     CHECKPOINT lastcp;
2074*0Sstevel@tonic-gate 
2075*0Sstevel@tonic-gate #ifdef DEBUGGING
2076*0Sstevel@tonic-gate     PL_regindent = 0;	/* XXXX Not good when matches are reenterable... */
2077*0Sstevel@tonic-gate #endif
2078*0Sstevel@tonic-gate     if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2079*0Sstevel@tonic-gate 	MAGIC *mg;
2080*0Sstevel@tonic-gate 
2081*0Sstevel@tonic-gate 	PL_reg_eval_set = RS_init;
2082*0Sstevel@tonic-gate 	DEBUG_r(DEBUG_s(
2083*0Sstevel@tonic-gate 	    PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2084*0Sstevel@tonic-gate 			  (IV)(PL_stack_sp - PL_stack_base));
2085*0Sstevel@tonic-gate 	    ));
2086*0Sstevel@tonic-gate 	SAVEI32(cxstack[cxstack_ix].blk_oldsp);
2087*0Sstevel@tonic-gate 	cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2088*0Sstevel@tonic-gate 	/* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2089*0Sstevel@tonic-gate 	SAVETMPS;
2090*0Sstevel@tonic-gate 	/* Apparently this is not needed, judging by wantarray. */
2091*0Sstevel@tonic-gate 	/* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2092*0Sstevel@tonic-gate 	   cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2093*0Sstevel@tonic-gate 
2094*0Sstevel@tonic-gate 	if (PL_reg_sv) {
2095*0Sstevel@tonic-gate 	    /* Make $_ available to executed code. */
2096*0Sstevel@tonic-gate 	    if (PL_reg_sv != DEFSV) {
2097*0Sstevel@tonic-gate 		/* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
2098*0Sstevel@tonic-gate 		SAVESPTR(DEFSV);
2099*0Sstevel@tonic-gate 		DEFSV = PL_reg_sv;
2100*0Sstevel@tonic-gate 	    }
2101*0Sstevel@tonic-gate 
2102*0Sstevel@tonic-gate 	    if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
2103*0Sstevel@tonic-gate 		  && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
2104*0Sstevel@tonic-gate 		/* prepare for quick setting of pos */
2105*0Sstevel@tonic-gate 		sv_magic(PL_reg_sv, (SV*)0,
2106*0Sstevel@tonic-gate 			PERL_MAGIC_regex_global, Nullch, 0);
2107*0Sstevel@tonic-gate 		mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
2108*0Sstevel@tonic-gate 		mg->mg_len = -1;
2109*0Sstevel@tonic-gate 	    }
2110*0Sstevel@tonic-gate 	    PL_reg_magic    = mg;
2111*0Sstevel@tonic-gate 	    PL_reg_oldpos   = mg->mg_len;
2112*0Sstevel@tonic-gate 	    SAVEDESTRUCTOR_X(restore_pos, 0);
2113*0Sstevel@tonic-gate         }
2114*0Sstevel@tonic-gate         if (!PL_reg_curpm) {
2115*0Sstevel@tonic-gate 	    Newz(22,PL_reg_curpm, 1, PMOP);
2116*0Sstevel@tonic-gate #ifdef USE_ITHREADS
2117*0Sstevel@tonic-gate             {
2118*0Sstevel@tonic-gate                 SV* repointer = newSViv(0);
2119*0Sstevel@tonic-gate                 /* so we know which PL_regex_padav element is PL_reg_curpm */
2120*0Sstevel@tonic-gate                 SvFLAGS(repointer) |= SVf_BREAK;
2121*0Sstevel@tonic-gate                 av_push(PL_regex_padav,repointer);
2122*0Sstevel@tonic-gate                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2123*0Sstevel@tonic-gate                 PL_regex_pad = AvARRAY(PL_regex_padav);
2124*0Sstevel@tonic-gate             }
2125*0Sstevel@tonic-gate #endif
2126*0Sstevel@tonic-gate         }
2127*0Sstevel@tonic-gate 	PM_SETRE(PL_reg_curpm, prog);
2128*0Sstevel@tonic-gate 	PL_reg_oldcurpm = PL_curpm;
2129*0Sstevel@tonic-gate 	PL_curpm = PL_reg_curpm;
2130*0Sstevel@tonic-gate 	if (RX_MATCH_COPIED(prog)) {
2131*0Sstevel@tonic-gate 	    /*  Here is a serious problem: we cannot rewrite subbeg,
2132*0Sstevel@tonic-gate 		since it may be needed if this match fails.  Thus
2133*0Sstevel@tonic-gate 		$` inside (?{}) could fail... */
2134*0Sstevel@tonic-gate 	    PL_reg_oldsaved = prog->subbeg;
2135*0Sstevel@tonic-gate 	    PL_reg_oldsavedlen = prog->sublen;
2136*0Sstevel@tonic-gate 	    RX_MATCH_COPIED_off(prog);
2137*0Sstevel@tonic-gate 	}
2138*0Sstevel@tonic-gate 	else
2139*0Sstevel@tonic-gate 	    PL_reg_oldsaved = Nullch;
2140*0Sstevel@tonic-gate 	prog->subbeg = PL_bostr;
2141*0Sstevel@tonic-gate 	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2142*0Sstevel@tonic-gate     }
2143*0Sstevel@tonic-gate     prog->startp[0] = startpos - PL_bostr;
2144*0Sstevel@tonic-gate     PL_reginput = startpos;
2145*0Sstevel@tonic-gate     PL_regstartp = prog->startp;
2146*0Sstevel@tonic-gate     PL_regendp = prog->endp;
2147*0Sstevel@tonic-gate     PL_reglastparen = &prog->lastparen;
2148*0Sstevel@tonic-gate     PL_reglastcloseparen = &prog->lastcloseparen;
2149*0Sstevel@tonic-gate     prog->lastparen = 0;
2150*0Sstevel@tonic-gate     prog->lastcloseparen = 0;
2151*0Sstevel@tonic-gate     PL_regsize = 0;
2152*0Sstevel@tonic-gate     DEBUG_r(PL_reg_starttry = startpos);
2153*0Sstevel@tonic-gate     if (PL_reg_start_tmpl <= prog->nparens) {
2154*0Sstevel@tonic-gate 	PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2155*0Sstevel@tonic-gate         if(PL_reg_start_tmp)
2156*0Sstevel@tonic-gate             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2157*0Sstevel@tonic-gate         else
2158*0Sstevel@tonic-gate             New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2159*0Sstevel@tonic-gate     }
2160*0Sstevel@tonic-gate 
2161*0Sstevel@tonic-gate     /* XXXX What this code is doing here?!!!  There should be no need
2162*0Sstevel@tonic-gate        to do this again and again, PL_reglastparen should take care of
2163*0Sstevel@tonic-gate        this!  --ilya*/
2164*0Sstevel@tonic-gate 
2165*0Sstevel@tonic-gate     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2166*0Sstevel@tonic-gate      * Actually, the code in regcppop() (which Ilya may be meaning by
2167*0Sstevel@tonic-gate      * PL_reglastparen), is not needed at all by the test suite
2168*0Sstevel@tonic-gate      * (op/regexp, op/pat, op/split), but that code is needed, oddly
2169*0Sstevel@tonic-gate      * enough, for building DynaLoader, or otherwise this
2170*0Sstevel@tonic-gate      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2171*0Sstevel@tonic-gate      * will happen.  Meanwhile, this code *is* needed for the
2172*0Sstevel@tonic-gate      * above-mentioned test suite tests to succeed.  The common theme
2173*0Sstevel@tonic-gate      * on those tests seems to be returning null fields from matches.
2174*0Sstevel@tonic-gate      * --jhi */
2175*0Sstevel@tonic-gate #if 1
2176*0Sstevel@tonic-gate     sp = prog->startp;
2177*0Sstevel@tonic-gate     ep = prog->endp;
2178*0Sstevel@tonic-gate     if (prog->nparens) {
2179*0Sstevel@tonic-gate 	for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2180*0Sstevel@tonic-gate 	    *++sp = -1;
2181*0Sstevel@tonic-gate 	    *++ep = -1;
2182*0Sstevel@tonic-gate 	}
2183*0Sstevel@tonic-gate     }
2184*0Sstevel@tonic-gate #endif
2185*0Sstevel@tonic-gate     REGCP_SET(lastcp);
2186*0Sstevel@tonic-gate     if (regmatch(prog->program + 1)) {
2187*0Sstevel@tonic-gate 	prog->endp[0] = PL_reginput - PL_bostr;
2188*0Sstevel@tonic-gate 	return 1;
2189*0Sstevel@tonic-gate     }
2190*0Sstevel@tonic-gate     REGCP_UNWIND(lastcp);
2191*0Sstevel@tonic-gate     return 0;
2192*0Sstevel@tonic-gate }
2193*0Sstevel@tonic-gate 
2194*0Sstevel@tonic-gate #define RE_UNWIND_BRANCH	1
2195*0Sstevel@tonic-gate #define RE_UNWIND_BRANCHJ	2
2196*0Sstevel@tonic-gate 
2197*0Sstevel@tonic-gate union re_unwind_t;
2198*0Sstevel@tonic-gate 
2199*0Sstevel@tonic-gate typedef struct {		/* XX: makes sense to enlarge it... */
2200*0Sstevel@tonic-gate     I32 type;
2201*0Sstevel@tonic-gate     I32 prev;
2202*0Sstevel@tonic-gate     CHECKPOINT lastcp;
2203*0Sstevel@tonic-gate } re_unwind_generic_t;
2204*0Sstevel@tonic-gate 
2205*0Sstevel@tonic-gate typedef struct {
2206*0Sstevel@tonic-gate     I32 type;
2207*0Sstevel@tonic-gate     I32 prev;
2208*0Sstevel@tonic-gate     CHECKPOINT lastcp;
2209*0Sstevel@tonic-gate     I32 lastparen;
2210*0Sstevel@tonic-gate     regnode *next;
2211*0Sstevel@tonic-gate     char *locinput;
2212*0Sstevel@tonic-gate     I32 nextchr;
2213*0Sstevel@tonic-gate #ifdef DEBUGGING
2214*0Sstevel@tonic-gate     int regindent;
2215*0Sstevel@tonic-gate #endif
2216*0Sstevel@tonic-gate } re_unwind_branch_t;
2217*0Sstevel@tonic-gate 
2218*0Sstevel@tonic-gate typedef union re_unwind_t {
2219*0Sstevel@tonic-gate     I32 type;
2220*0Sstevel@tonic-gate     re_unwind_generic_t generic;
2221*0Sstevel@tonic-gate     re_unwind_branch_t branch;
2222*0Sstevel@tonic-gate } re_unwind_t;
2223*0Sstevel@tonic-gate 
2224*0Sstevel@tonic-gate #define sayYES goto yes
2225*0Sstevel@tonic-gate #define sayNO goto no
2226*0Sstevel@tonic-gate #define sayNO_ANYOF goto no_anyof
2227*0Sstevel@tonic-gate #define sayYES_FINAL goto yes_final
2228*0Sstevel@tonic-gate #define sayYES_LOUD  goto yes_loud
2229*0Sstevel@tonic-gate #define sayNO_FINAL  goto no_final
2230*0Sstevel@tonic-gate #define sayNO_SILENT goto do_no
2231*0Sstevel@tonic-gate #define saySAME(x) if (x) goto yes; else goto no
2232*0Sstevel@tonic-gate 
2233*0Sstevel@tonic-gate #define REPORT_CODE_OFF 24
2234*0Sstevel@tonic-gate 
2235*0Sstevel@tonic-gate /*
2236*0Sstevel@tonic-gate  - regmatch - main matching routine
2237*0Sstevel@tonic-gate  *
2238*0Sstevel@tonic-gate  * Conceptually the strategy is simple:  check to see whether the current
2239*0Sstevel@tonic-gate  * node matches, call self recursively to see whether the rest matches,
2240*0Sstevel@tonic-gate  * and then act accordingly.  In practice we make some effort to avoid
2241*0Sstevel@tonic-gate  * recursion, in particular by going through "ordinary" nodes (that don't
2242*0Sstevel@tonic-gate  * need to know whether the rest of the match failed) by a loop instead of
2243*0Sstevel@tonic-gate  * by recursion.
2244*0Sstevel@tonic-gate  */
2245*0Sstevel@tonic-gate /* [lwall] I've hoisted the register declarations to the outer block in order to
2246*0Sstevel@tonic-gate  * maybe save a little bit of pushing and popping on the stack.  It also takes
2247*0Sstevel@tonic-gate  * advantage of machines that use a register save mask on subroutine entry.
2248*0Sstevel@tonic-gate  */
2249*0Sstevel@tonic-gate STATIC I32			/* 0 failure, 1 success */
S_regmatch(pTHX_ regnode * prog)2250*0Sstevel@tonic-gate S_regmatch(pTHX_ regnode *prog)
2251*0Sstevel@tonic-gate {
2252*0Sstevel@tonic-gate     register regnode *scan;	/* Current node. */
2253*0Sstevel@tonic-gate     regnode *next;		/* Next node. */
2254*0Sstevel@tonic-gate     regnode *inner;		/* Next node in internal branch. */
2255*0Sstevel@tonic-gate     register I32 nextchr;	/* renamed nextchr - nextchar colides with
2256*0Sstevel@tonic-gate 				   function of same name */
2257*0Sstevel@tonic-gate     register I32 n;		/* no or next */
2258*0Sstevel@tonic-gate     register I32 ln = 0;	/* len or last */
2259*0Sstevel@tonic-gate     register char *s = Nullch;	/* operand or save */
2260*0Sstevel@tonic-gate     register char *locinput = PL_reginput;
2261*0Sstevel@tonic-gate     register I32 c1 = 0, c2 = 0, paren;	/* case fold search, parenth */
2262*0Sstevel@tonic-gate     int minmod = 0, sw = 0, logical = 0;
2263*0Sstevel@tonic-gate     I32 unwind = 0;
2264*0Sstevel@tonic-gate #if 0
2265*0Sstevel@tonic-gate     I32 firstcp = PL_savestack_ix;
2266*0Sstevel@tonic-gate #endif
2267*0Sstevel@tonic-gate     register bool do_utf8 = PL_reg_match_utf8;
2268*0Sstevel@tonic-gate #ifdef DEBUGGING
2269*0Sstevel@tonic-gate     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2270*0Sstevel@tonic-gate     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2271*0Sstevel@tonic-gate     SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2272*0Sstevel@tonic-gate #endif
2273*0Sstevel@tonic-gate 
2274*0Sstevel@tonic-gate #ifdef DEBUGGING
2275*0Sstevel@tonic-gate     PL_regindent++;
2276*0Sstevel@tonic-gate #endif
2277*0Sstevel@tonic-gate 
2278*0Sstevel@tonic-gate     /* Note that nextchr is a byte even in UTF */
2279*0Sstevel@tonic-gate     nextchr = UCHARAT(locinput);
2280*0Sstevel@tonic-gate     scan = prog;
2281*0Sstevel@tonic-gate     while (scan != NULL) {
2282*0Sstevel@tonic-gate 
2283*0Sstevel@tonic-gate         DEBUG_r( {
2284*0Sstevel@tonic-gate 	    SV *prop = sv_newmortal();
2285*0Sstevel@tonic-gate 	    int docolor = *PL_colors[0];
2286*0Sstevel@tonic-gate 	    int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2287*0Sstevel@tonic-gate 	    int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
2288*0Sstevel@tonic-gate 	    /* The part of the string before starttry has one color
2289*0Sstevel@tonic-gate 	       (pref0_len chars), between starttry and current
2290*0Sstevel@tonic-gate 	       position another one (pref_len - pref0_len chars),
2291*0Sstevel@tonic-gate 	       after the current position the third one.
2292*0Sstevel@tonic-gate 	       We assume that pref0_len <= pref_len, otherwise we
2293*0Sstevel@tonic-gate 	       decrease pref0_len.  */
2294*0Sstevel@tonic-gate 	    int pref_len = (locinput - PL_bostr) > (5 + taill) - l
2295*0Sstevel@tonic-gate 		? (5 + taill) - l : locinput - PL_bostr;
2296*0Sstevel@tonic-gate 	    int pref0_len;
2297*0Sstevel@tonic-gate 
2298*0Sstevel@tonic-gate 	    while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2299*0Sstevel@tonic-gate 		pref_len++;
2300*0Sstevel@tonic-gate 	    pref0_len = pref_len  - (locinput - PL_reg_starttry);
2301*0Sstevel@tonic-gate 	    if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2302*0Sstevel@tonic-gate 		l = ( PL_regeol - locinput > (5 + taill) - pref_len
2303*0Sstevel@tonic-gate 		      ? (5 + taill) - pref_len : PL_regeol - locinput);
2304*0Sstevel@tonic-gate 	    while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2305*0Sstevel@tonic-gate 		l--;
2306*0Sstevel@tonic-gate 	    if (pref0_len < 0)
2307*0Sstevel@tonic-gate 		pref0_len = 0;
2308*0Sstevel@tonic-gate 	    if (pref0_len > pref_len)
2309*0Sstevel@tonic-gate 		pref0_len = pref_len;
2310*0Sstevel@tonic-gate 	    regprop(prop, scan);
2311*0Sstevel@tonic-gate 	    {
2312*0Sstevel@tonic-gate 	      char *s0 =
2313*0Sstevel@tonic-gate 		do_utf8 && OP(scan) != CANY ?
2314*0Sstevel@tonic-gate 		pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2315*0Sstevel@tonic-gate 			       pref0_len, 60, UNI_DISPLAY_REGEX) :
2316*0Sstevel@tonic-gate 		locinput - pref_len;
2317*0Sstevel@tonic-gate 	      int len0 = do_utf8 ? strlen(s0) : pref0_len;
2318*0Sstevel@tonic-gate 	      char *s1 = do_utf8 && OP(scan) != CANY ?
2319*0Sstevel@tonic-gate 		pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2320*0Sstevel@tonic-gate 			       pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2321*0Sstevel@tonic-gate 		locinput - pref_len + pref0_len;
2322*0Sstevel@tonic-gate 	      int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2323*0Sstevel@tonic-gate 	      char *s2 = do_utf8 && OP(scan) != CANY ?
2324*0Sstevel@tonic-gate 		pv_uni_display(dsv2, (U8*)locinput,
2325*0Sstevel@tonic-gate 			       PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2326*0Sstevel@tonic-gate 		locinput;
2327*0Sstevel@tonic-gate 	      int len2 = do_utf8 ? strlen(s2) : l;
2328*0Sstevel@tonic-gate 	      PerlIO_printf(Perl_debug_log,
2329*0Sstevel@tonic-gate 			    "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2330*0Sstevel@tonic-gate 			    (IV)(locinput - PL_bostr),
2331*0Sstevel@tonic-gate 			    PL_colors[4],
2332*0Sstevel@tonic-gate 			    len0, s0,
2333*0Sstevel@tonic-gate 			    PL_colors[5],
2334*0Sstevel@tonic-gate 			    PL_colors[2],
2335*0Sstevel@tonic-gate 			    len1, s1,
2336*0Sstevel@tonic-gate 			    PL_colors[3],
2337*0Sstevel@tonic-gate 			    (docolor ? "" : "> <"),
2338*0Sstevel@tonic-gate 			    PL_colors[0],
2339*0Sstevel@tonic-gate 			    len2, s2,
2340*0Sstevel@tonic-gate 			    PL_colors[1],
2341*0Sstevel@tonic-gate 			    15 - l - pref_len + 1,
2342*0Sstevel@tonic-gate 			    "",
2343*0Sstevel@tonic-gate 			    (IV)(scan - PL_regprogram), PL_regindent*2, "",
2344*0Sstevel@tonic-gate 			    SvPVX(prop));
2345*0Sstevel@tonic-gate 	    }
2346*0Sstevel@tonic-gate 	});
2347*0Sstevel@tonic-gate 
2348*0Sstevel@tonic-gate 	next = scan + NEXT_OFF(scan);
2349*0Sstevel@tonic-gate 	if (next == scan)
2350*0Sstevel@tonic-gate 	    next = NULL;
2351*0Sstevel@tonic-gate 
2352*0Sstevel@tonic-gate 	switch (OP(scan)) {
2353*0Sstevel@tonic-gate 	case BOL:
2354*0Sstevel@tonic-gate 	    if (locinput == PL_bostr || (PL_multiline &&
2355*0Sstevel@tonic-gate 		(nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2356*0Sstevel@tonic-gate 	    {
2357*0Sstevel@tonic-gate 		/* regtill = regbol; */
2358*0Sstevel@tonic-gate 		break;
2359*0Sstevel@tonic-gate 	    }
2360*0Sstevel@tonic-gate 	    sayNO;
2361*0Sstevel@tonic-gate 	case MBOL:
2362*0Sstevel@tonic-gate 	    if (locinput == PL_bostr ||
2363*0Sstevel@tonic-gate 		((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2364*0Sstevel@tonic-gate 	    {
2365*0Sstevel@tonic-gate 		break;
2366*0Sstevel@tonic-gate 	    }
2367*0Sstevel@tonic-gate 	    sayNO;
2368*0Sstevel@tonic-gate 	case SBOL:
2369*0Sstevel@tonic-gate 	    if (locinput == PL_bostr)
2370*0Sstevel@tonic-gate 		break;
2371*0Sstevel@tonic-gate 	    sayNO;
2372*0Sstevel@tonic-gate 	case GPOS:
2373*0Sstevel@tonic-gate 	    if (locinput == PL_reg_ganch)
2374*0Sstevel@tonic-gate 		break;
2375*0Sstevel@tonic-gate 	    sayNO;
2376*0Sstevel@tonic-gate 	case EOL:
2377*0Sstevel@tonic-gate 	    if (PL_multiline)
2378*0Sstevel@tonic-gate 		goto meol;
2379*0Sstevel@tonic-gate 	    else
2380*0Sstevel@tonic-gate 		goto seol;
2381*0Sstevel@tonic-gate 	case MEOL:
2382*0Sstevel@tonic-gate 	  meol:
2383*0Sstevel@tonic-gate 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2384*0Sstevel@tonic-gate 		sayNO;
2385*0Sstevel@tonic-gate 	    break;
2386*0Sstevel@tonic-gate 	case SEOL:
2387*0Sstevel@tonic-gate 	  seol:
2388*0Sstevel@tonic-gate 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2389*0Sstevel@tonic-gate 		sayNO;
2390*0Sstevel@tonic-gate 	    if (PL_regeol - locinput > 1)
2391*0Sstevel@tonic-gate 		sayNO;
2392*0Sstevel@tonic-gate 	    break;
2393*0Sstevel@tonic-gate 	case EOS:
2394*0Sstevel@tonic-gate 	    if (PL_regeol != locinput)
2395*0Sstevel@tonic-gate 		sayNO;
2396*0Sstevel@tonic-gate 	    break;
2397*0Sstevel@tonic-gate 	case SANY:
2398*0Sstevel@tonic-gate 	    if (!nextchr && locinput >= PL_regeol)
2399*0Sstevel@tonic-gate 		sayNO;
2400*0Sstevel@tonic-gate  	    if (do_utf8) {
2401*0Sstevel@tonic-gate 	        locinput += PL_utf8skip[nextchr];
2402*0Sstevel@tonic-gate 		if (locinput > PL_regeol)
2403*0Sstevel@tonic-gate  		    sayNO;
2404*0Sstevel@tonic-gate  		nextchr = UCHARAT(locinput);
2405*0Sstevel@tonic-gate  	    }
2406*0Sstevel@tonic-gate  	    else
2407*0Sstevel@tonic-gate  		nextchr = UCHARAT(++locinput);
2408*0Sstevel@tonic-gate 	    break;
2409*0Sstevel@tonic-gate 	case CANY:
2410*0Sstevel@tonic-gate 	    if (!nextchr && locinput >= PL_regeol)
2411*0Sstevel@tonic-gate 		sayNO;
2412*0Sstevel@tonic-gate 	    nextchr = UCHARAT(++locinput);
2413*0Sstevel@tonic-gate 	    break;
2414*0Sstevel@tonic-gate 	case REG_ANY:
2415*0Sstevel@tonic-gate 	    if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2416*0Sstevel@tonic-gate 		sayNO;
2417*0Sstevel@tonic-gate 	    if (do_utf8) {
2418*0Sstevel@tonic-gate 		locinput += PL_utf8skip[nextchr];
2419*0Sstevel@tonic-gate 		if (locinput > PL_regeol)
2420*0Sstevel@tonic-gate 		    sayNO;
2421*0Sstevel@tonic-gate 		nextchr = UCHARAT(locinput);
2422*0Sstevel@tonic-gate 	    }
2423*0Sstevel@tonic-gate 	    else
2424*0Sstevel@tonic-gate 		nextchr = UCHARAT(++locinput);
2425*0Sstevel@tonic-gate 	    break;
2426*0Sstevel@tonic-gate 	case EXACT:
2427*0Sstevel@tonic-gate 	    s = STRING(scan);
2428*0Sstevel@tonic-gate 	    ln = STR_LEN(scan);
2429*0Sstevel@tonic-gate 	    if (do_utf8 != UTF) {
2430*0Sstevel@tonic-gate 		/* The target and the pattern have differing utf8ness. */
2431*0Sstevel@tonic-gate 		char *l = locinput;
2432*0Sstevel@tonic-gate 		char *e = s + ln;
2433*0Sstevel@tonic-gate 		STRLEN ulen;
2434*0Sstevel@tonic-gate 
2435*0Sstevel@tonic-gate 		if (do_utf8) {
2436*0Sstevel@tonic-gate 		    /* The target is utf8, the pattern is not utf8. */
2437*0Sstevel@tonic-gate 		    while (s < e) {
2438*0Sstevel@tonic-gate 			if (l >= PL_regeol)
2439*0Sstevel@tonic-gate 			     sayNO;
2440*0Sstevel@tonic-gate 			if (NATIVE_TO_UNI(*(U8*)s) !=
2441*0Sstevel@tonic-gate 			    utf8n_to_uvuni((U8*)l, UTF8_MAXLEN, &ulen,
2442*0Sstevel@tonic-gate 					   ckWARN(WARN_UTF8) ?
2443*0Sstevel@tonic-gate 					   0 : UTF8_ALLOW_ANY))
2444*0Sstevel@tonic-gate 			     sayNO;
2445*0Sstevel@tonic-gate 			l += ulen;
2446*0Sstevel@tonic-gate 			s ++;
2447*0Sstevel@tonic-gate 		    }
2448*0Sstevel@tonic-gate 		}
2449*0Sstevel@tonic-gate 		else {
2450*0Sstevel@tonic-gate 		    /* The target is not utf8, the pattern is utf8. */
2451*0Sstevel@tonic-gate 		    while (s < e) {
2452*0Sstevel@tonic-gate 			if (l >= PL_regeol)
2453*0Sstevel@tonic-gate 			    sayNO;
2454*0Sstevel@tonic-gate 			if (NATIVE_TO_UNI(*((U8*)l)) !=
2455*0Sstevel@tonic-gate 			    utf8n_to_uvuni((U8*)s, UTF8_MAXLEN, &ulen,
2456*0Sstevel@tonic-gate 					   ckWARN(WARN_UTF8) ?
2457*0Sstevel@tonic-gate 					   0 : UTF8_ALLOW_ANY))
2458*0Sstevel@tonic-gate 			    sayNO;
2459*0Sstevel@tonic-gate 			s += ulen;
2460*0Sstevel@tonic-gate 			l ++;
2461*0Sstevel@tonic-gate 		    }
2462*0Sstevel@tonic-gate 		}
2463*0Sstevel@tonic-gate 		locinput = l;
2464*0Sstevel@tonic-gate 		nextchr = UCHARAT(locinput);
2465*0Sstevel@tonic-gate 		break;
2466*0Sstevel@tonic-gate 	    }
2467*0Sstevel@tonic-gate 	    /* The target and the pattern have the same utf8ness. */
2468*0Sstevel@tonic-gate 	    /* Inline the first character, for speed. */
2469*0Sstevel@tonic-gate 	    if (UCHARAT(s) != nextchr)
2470*0Sstevel@tonic-gate 		sayNO;
2471*0Sstevel@tonic-gate 	    if (PL_regeol - locinput < ln)
2472*0Sstevel@tonic-gate 		sayNO;
2473*0Sstevel@tonic-gate 	    if (ln > 1 && memNE(s, locinput, ln))
2474*0Sstevel@tonic-gate 		sayNO;
2475*0Sstevel@tonic-gate 	    locinput += ln;
2476*0Sstevel@tonic-gate 	    nextchr = UCHARAT(locinput);
2477*0Sstevel@tonic-gate 	    break;
2478*0Sstevel@tonic-gate 	case EXACTFL:
2479*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
2480*0Sstevel@tonic-gate 	    /* FALL THROUGH */
2481*0Sstevel@tonic-gate 	case EXACTF:
2482*0Sstevel@tonic-gate 	    s = STRING(scan);
2483*0Sstevel@tonic-gate 	    ln = STR_LEN(scan);
2484*0Sstevel@tonic-gate 
2485*0Sstevel@tonic-gate 	    if (do_utf8 || UTF) {
2486*0Sstevel@tonic-gate 	      /* Either target or the pattern are utf8. */
2487*0Sstevel@tonic-gate 		char *l = locinput;
2488*0Sstevel@tonic-gate 		char *e = PL_regeol;
2489*0Sstevel@tonic-gate 
2490*0Sstevel@tonic-gate 		if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
2491*0Sstevel@tonic-gate 			       l, &e, 0,  do_utf8)) {
2492*0Sstevel@tonic-gate 		     /* One more case for the sharp s:
2493*0Sstevel@tonic-gate 		      * pack("U0U*", 0xDF) =~ /ss/i,
2494*0Sstevel@tonic-gate 		      * the 0xC3 0x9F are the UTF-8
2495*0Sstevel@tonic-gate 		      * byte sequence for the U+00DF. */
2496*0Sstevel@tonic-gate 		     if (!(do_utf8 &&
2497*0Sstevel@tonic-gate 			   toLOWER(s[0]) == 's' &&
2498*0Sstevel@tonic-gate 			   ln >= 2 &&
2499*0Sstevel@tonic-gate 			   toLOWER(s[1]) == 's' &&
2500*0Sstevel@tonic-gate 			   (U8)l[0] == 0xC3 &&
2501*0Sstevel@tonic-gate 			   e - l >= 2 &&
2502*0Sstevel@tonic-gate 			   (U8)l[1] == 0x9F))
2503*0Sstevel@tonic-gate 			  sayNO;
2504*0Sstevel@tonic-gate 		}
2505*0Sstevel@tonic-gate 		locinput = e;
2506*0Sstevel@tonic-gate 		nextchr = UCHARAT(locinput);
2507*0Sstevel@tonic-gate 		break;
2508*0Sstevel@tonic-gate 	    }
2509*0Sstevel@tonic-gate 
2510*0Sstevel@tonic-gate 	    /* Neither the target and the pattern are utf8. */
2511*0Sstevel@tonic-gate 
2512*0Sstevel@tonic-gate 	    /* Inline the first character, for speed. */
2513*0Sstevel@tonic-gate 	    if (UCHARAT(s) != nextchr &&
2514*0Sstevel@tonic-gate 		UCHARAT(s) != ((OP(scan) == EXACTF)
2515*0Sstevel@tonic-gate 			       ? PL_fold : PL_fold_locale)[nextchr])
2516*0Sstevel@tonic-gate 		sayNO;
2517*0Sstevel@tonic-gate 	    if (PL_regeol - locinput < ln)
2518*0Sstevel@tonic-gate 		sayNO;
2519*0Sstevel@tonic-gate 	    if (ln > 1 && (OP(scan) == EXACTF
2520*0Sstevel@tonic-gate 			   ? ibcmp(s, locinput, ln)
2521*0Sstevel@tonic-gate 			   : ibcmp_locale(s, locinput, ln)))
2522*0Sstevel@tonic-gate 		sayNO;
2523*0Sstevel@tonic-gate 	    locinput += ln;
2524*0Sstevel@tonic-gate 	    nextchr = UCHARAT(locinput);
2525*0Sstevel@tonic-gate 	    break;
2526*0Sstevel@tonic-gate 	case ANYOF:
2527*0Sstevel@tonic-gate 	    if (do_utf8) {
2528*0Sstevel@tonic-gate 	        STRLEN inclasslen = PL_regeol - locinput;
2529*0Sstevel@tonic-gate 
2530*0Sstevel@tonic-gate 	        if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
2531*0Sstevel@tonic-gate 		    sayNO_ANYOF;
2532*0Sstevel@tonic-gate 		if (locinput >= PL_regeol)
2533*0Sstevel@tonic-gate 		    sayNO;
2534*0Sstevel@tonic-gate 		locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
2535*0Sstevel@tonic-gate 		nextchr = UCHARAT(locinput);
2536*0Sstevel@tonic-gate 		break;
2537*0Sstevel@tonic-gate 	    }
2538*0Sstevel@tonic-gate 	    else {
2539*0Sstevel@tonic-gate 		if (nextchr < 0)
2540*0Sstevel@tonic-gate 		    nextchr = UCHARAT(locinput);
2541*0Sstevel@tonic-gate 		if (!REGINCLASS(scan, (U8*)locinput))
2542*0Sstevel@tonic-gate 		    sayNO_ANYOF;
2543*0Sstevel@tonic-gate 		if (!nextchr && locinput >= PL_regeol)
2544*0Sstevel@tonic-gate 		    sayNO;
2545*0Sstevel@tonic-gate 		nextchr = UCHARAT(++locinput);
2546*0Sstevel@tonic-gate 		break;
2547*0Sstevel@tonic-gate 	    }
2548*0Sstevel@tonic-gate 	no_anyof:
2549*0Sstevel@tonic-gate 	    /* If we might have the case of the German sharp s
2550*0Sstevel@tonic-gate 	     * in a casefolding Unicode character class. */
2551*0Sstevel@tonic-gate 
2552*0Sstevel@tonic-gate 	    if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2553*0Sstevel@tonic-gate 		 locinput += SHARP_S_SKIP;
2554*0Sstevel@tonic-gate 		 nextchr = UCHARAT(locinput);
2555*0Sstevel@tonic-gate 	    }
2556*0Sstevel@tonic-gate 	    else
2557*0Sstevel@tonic-gate 		 sayNO;
2558*0Sstevel@tonic-gate 	    break;
2559*0Sstevel@tonic-gate 	case ALNUML:
2560*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
2561*0Sstevel@tonic-gate 	    /* FALL THROUGH */
2562*0Sstevel@tonic-gate 	case ALNUM:
2563*0Sstevel@tonic-gate 	    if (!nextchr)
2564*0Sstevel@tonic-gate 		sayNO;
2565*0Sstevel@tonic-gate 	    if (do_utf8) {
2566*0Sstevel@tonic-gate 		LOAD_UTF8_CHARCLASS(alnum,"a");
2567*0Sstevel@tonic-gate 		if (!(OP(scan) == ALNUM
2568*0Sstevel@tonic-gate 		      ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2569*0Sstevel@tonic-gate 		      : isALNUM_LC_utf8((U8*)locinput)))
2570*0Sstevel@tonic-gate 		{
2571*0Sstevel@tonic-gate 		    sayNO;
2572*0Sstevel@tonic-gate 		}
2573*0Sstevel@tonic-gate 		locinput += PL_utf8skip[nextchr];
2574*0Sstevel@tonic-gate 		nextchr = UCHARAT(locinput);
2575*0Sstevel@tonic-gate 		break;
2576*0Sstevel@tonic-gate 	    }
2577*0Sstevel@tonic-gate 	    if (!(OP(scan) == ALNUM
2578*0Sstevel@tonic-gate 		  ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2579*0Sstevel@tonic-gate 		sayNO;
2580*0Sstevel@tonic-gate 	    nextchr = UCHARAT(++locinput);
2581*0Sstevel@tonic-gate 	    break;
2582*0Sstevel@tonic-gate 	case NALNUML:
2583*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
2584*0Sstevel@tonic-gate 	    /* FALL THROUGH */
2585*0Sstevel@tonic-gate 	case NALNUM:
2586*0Sstevel@tonic-gate 	    if (!nextchr && locinput >= PL_regeol)
2587*0Sstevel@tonic-gate 		sayNO;
2588*0Sstevel@tonic-gate 	    if (do_utf8) {
2589*0Sstevel@tonic-gate 		LOAD_UTF8_CHARCLASS(alnum,"a");
2590*0Sstevel@tonic-gate 		if (OP(scan) == NALNUM
2591*0Sstevel@tonic-gate 		    ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2592*0Sstevel@tonic-gate 		    : isALNUM_LC_utf8((U8*)locinput))
2593*0Sstevel@tonic-gate 		{
2594*0Sstevel@tonic-gate 		    sayNO;
2595*0Sstevel@tonic-gate 		}
2596*0Sstevel@tonic-gate 		locinput += PL_utf8skip[nextchr];
2597*0Sstevel@tonic-gate 		nextchr = UCHARAT(locinput);
2598*0Sstevel@tonic-gate 		break;
2599*0Sstevel@tonic-gate 	    }
2600*0Sstevel@tonic-gate 	    if (OP(scan) == NALNUM
2601*0Sstevel@tonic-gate 		? isALNUM(nextchr) : isALNUM_LC(nextchr))
2602*0Sstevel@tonic-gate 		sayNO;
2603*0Sstevel@tonic-gate 	    nextchr = UCHARAT(++locinput);
2604*0Sstevel@tonic-gate 	    break;
2605*0Sstevel@tonic-gate 	case BOUNDL:
2606*0Sstevel@tonic-gate 	case NBOUNDL:
2607*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
2608*0Sstevel@tonic-gate 	    /* FALL THROUGH */
2609*0Sstevel@tonic-gate 	case BOUND:
2610*0Sstevel@tonic-gate 	case NBOUND:
2611*0Sstevel@tonic-gate 	    /* was last char in word? */
2612*0Sstevel@tonic-gate 	    if (do_utf8) {
2613*0Sstevel@tonic-gate 		if (locinput == PL_bostr)
2614*0Sstevel@tonic-gate 		    ln = '\n';
2615*0Sstevel@tonic-gate 		else {
2616*0Sstevel@tonic-gate 		    U8 *r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
2617*0Sstevel@tonic-gate 
2618*0Sstevel@tonic-gate 		    ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
2619*0Sstevel@tonic-gate 		}
2620*0Sstevel@tonic-gate 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2621*0Sstevel@tonic-gate 		    ln = isALNUM_uni(ln);
2622*0Sstevel@tonic-gate 		    LOAD_UTF8_CHARCLASS(alnum,"a");
2623*0Sstevel@tonic-gate 		    n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
2624*0Sstevel@tonic-gate 		}
2625*0Sstevel@tonic-gate 		else {
2626*0Sstevel@tonic-gate 		    ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
2627*0Sstevel@tonic-gate 		    n = isALNUM_LC_utf8((U8*)locinput);
2628*0Sstevel@tonic-gate 		}
2629*0Sstevel@tonic-gate 	    }
2630*0Sstevel@tonic-gate 	    else {
2631*0Sstevel@tonic-gate 		ln = (locinput != PL_bostr) ?
2632*0Sstevel@tonic-gate 		    UCHARAT(locinput - 1) : '\n';
2633*0Sstevel@tonic-gate 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2634*0Sstevel@tonic-gate 		    ln = isALNUM(ln);
2635*0Sstevel@tonic-gate 		    n = isALNUM(nextchr);
2636*0Sstevel@tonic-gate 		}
2637*0Sstevel@tonic-gate 		else {
2638*0Sstevel@tonic-gate 		    ln = isALNUM_LC(ln);
2639*0Sstevel@tonic-gate 		    n = isALNUM_LC(nextchr);
2640*0Sstevel@tonic-gate 		}
2641*0Sstevel@tonic-gate 	    }
2642*0Sstevel@tonic-gate 	    if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2643*0Sstevel@tonic-gate 				    OP(scan) == BOUNDL))
2644*0Sstevel@tonic-gate 		    sayNO;
2645*0Sstevel@tonic-gate 	    break;
2646*0Sstevel@tonic-gate 	case SPACEL:
2647*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
2648*0Sstevel@tonic-gate 	    /* FALL THROUGH */
2649*0Sstevel@tonic-gate 	case SPACE:
2650*0Sstevel@tonic-gate 	    if (!nextchr)
2651*0Sstevel@tonic-gate 		sayNO;
2652*0Sstevel@tonic-gate 	    if (do_utf8) {
2653*0Sstevel@tonic-gate 		if (UTF8_IS_CONTINUED(nextchr)) {
2654*0Sstevel@tonic-gate 		    LOAD_UTF8_CHARCLASS(space," ");
2655*0Sstevel@tonic-gate 		    if (!(OP(scan) == SPACE
2656*0Sstevel@tonic-gate 			  ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2657*0Sstevel@tonic-gate 			  : isSPACE_LC_utf8((U8*)locinput)))
2658*0Sstevel@tonic-gate 		    {
2659*0Sstevel@tonic-gate 			sayNO;
2660*0Sstevel@tonic-gate 		    }
2661*0Sstevel@tonic-gate 		    locinput += PL_utf8skip[nextchr];
2662*0Sstevel@tonic-gate 		    nextchr = UCHARAT(locinput);
2663*0Sstevel@tonic-gate 		    break;
2664*0Sstevel@tonic-gate 		}
2665*0Sstevel@tonic-gate 		if (!(OP(scan) == SPACE
2666*0Sstevel@tonic-gate 		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2667*0Sstevel@tonic-gate 		    sayNO;
2668*0Sstevel@tonic-gate 		nextchr = UCHARAT(++locinput);
2669*0Sstevel@tonic-gate 	    }
2670*0Sstevel@tonic-gate 	    else {
2671*0Sstevel@tonic-gate 		if (!(OP(scan) == SPACE
2672*0Sstevel@tonic-gate 		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2673*0Sstevel@tonic-gate 		    sayNO;
2674*0Sstevel@tonic-gate 		nextchr = UCHARAT(++locinput);
2675*0Sstevel@tonic-gate 	    }
2676*0Sstevel@tonic-gate 	    break;
2677*0Sstevel@tonic-gate 	case NSPACEL:
2678*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
2679*0Sstevel@tonic-gate 	    /* FALL THROUGH */
2680*0Sstevel@tonic-gate 	case NSPACE:
2681*0Sstevel@tonic-gate 	    if (!nextchr && locinput >= PL_regeol)
2682*0Sstevel@tonic-gate 		sayNO;
2683*0Sstevel@tonic-gate 	    if (do_utf8) {
2684*0Sstevel@tonic-gate 		LOAD_UTF8_CHARCLASS(space," ");
2685*0Sstevel@tonic-gate 		if (OP(scan) == NSPACE
2686*0Sstevel@tonic-gate 		    ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2687*0Sstevel@tonic-gate 		    : isSPACE_LC_utf8((U8*)locinput))
2688*0Sstevel@tonic-gate 		{
2689*0Sstevel@tonic-gate 		    sayNO;
2690*0Sstevel@tonic-gate 		}
2691*0Sstevel@tonic-gate 		locinput += PL_utf8skip[nextchr];
2692*0Sstevel@tonic-gate 		nextchr = UCHARAT(locinput);
2693*0Sstevel@tonic-gate 		break;
2694*0Sstevel@tonic-gate 	    }
2695*0Sstevel@tonic-gate 	    if (OP(scan) == NSPACE
2696*0Sstevel@tonic-gate 		? isSPACE(nextchr) : isSPACE_LC(nextchr))
2697*0Sstevel@tonic-gate 		sayNO;
2698*0Sstevel@tonic-gate 	    nextchr = UCHARAT(++locinput);
2699*0Sstevel@tonic-gate 	    break;
2700*0Sstevel@tonic-gate 	case DIGITL:
2701*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
2702*0Sstevel@tonic-gate 	    /* FALL THROUGH */
2703*0Sstevel@tonic-gate 	case DIGIT:
2704*0Sstevel@tonic-gate 	    if (!nextchr)
2705*0Sstevel@tonic-gate 		sayNO;
2706*0Sstevel@tonic-gate 	    if (do_utf8) {
2707*0Sstevel@tonic-gate 		LOAD_UTF8_CHARCLASS(digit,"0");
2708*0Sstevel@tonic-gate 		if (!(OP(scan) == DIGIT
2709*0Sstevel@tonic-gate 		      ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2710*0Sstevel@tonic-gate 		      : isDIGIT_LC_utf8((U8*)locinput)))
2711*0Sstevel@tonic-gate 		{
2712*0Sstevel@tonic-gate 		    sayNO;
2713*0Sstevel@tonic-gate 		}
2714*0Sstevel@tonic-gate 		locinput += PL_utf8skip[nextchr];
2715*0Sstevel@tonic-gate 		nextchr = UCHARAT(locinput);
2716*0Sstevel@tonic-gate 		break;
2717*0Sstevel@tonic-gate 	    }
2718*0Sstevel@tonic-gate 	    if (!(OP(scan) == DIGIT
2719*0Sstevel@tonic-gate 		  ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2720*0Sstevel@tonic-gate 		sayNO;
2721*0Sstevel@tonic-gate 	    nextchr = UCHARAT(++locinput);
2722*0Sstevel@tonic-gate 	    break;
2723*0Sstevel@tonic-gate 	case NDIGITL:
2724*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
2725*0Sstevel@tonic-gate 	    /* FALL THROUGH */
2726*0Sstevel@tonic-gate 	case NDIGIT:
2727*0Sstevel@tonic-gate 	    if (!nextchr && locinput >= PL_regeol)
2728*0Sstevel@tonic-gate 		sayNO;
2729*0Sstevel@tonic-gate 	    if (do_utf8) {
2730*0Sstevel@tonic-gate 		LOAD_UTF8_CHARCLASS(digit,"0");
2731*0Sstevel@tonic-gate 		if (OP(scan) == NDIGIT
2732*0Sstevel@tonic-gate 		    ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2733*0Sstevel@tonic-gate 		    : isDIGIT_LC_utf8((U8*)locinput))
2734*0Sstevel@tonic-gate 		{
2735*0Sstevel@tonic-gate 		    sayNO;
2736*0Sstevel@tonic-gate 		}
2737*0Sstevel@tonic-gate 		locinput += PL_utf8skip[nextchr];
2738*0Sstevel@tonic-gate 		nextchr = UCHARAT(locinput);
2739*0Sstevel@tonic-gate 		break;
2740*0Sstevel@tonic-gate 	    }
2741*0Sstevel@tonic-gate 	    if (OP(scan) == NDIGIT
2742*0Sstevel@tonic-gate 		? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2743*0Sstevel@tonic-gate 		sayNO;
2744*0Sstevel@tonic-gate 	    nextchr = UCHARAT(++locinput);
2745*0Sstevel@tonic-gate 	    break;
2746*0Sstevel@tonic-gate 	case CLUMP:
2747*0Sstevel@tonic-gate 	    if (locinput >= PL_regeol)
2748*0Sstevel@tonic-gate 		sayNO;
2749*0Sstevel@tonic-gate 	    if  (do_utf8) {
2750*0Sstevel@tonic-gate 		LOAD_UTF8_CHARCLASS(mark,"~");
2751*0Sstevel@tonic-gate 		if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2752*0Sstevel@tonic-gate 		    sayNO;
2753*0Sstevel@tonic-gate 		locinput += PL_utf8skip[nextchr];
2754*0Sstevel@tonic-gate 		while (locinput < PL_regeol &&
2755*0Sstevel@tonic-gate 		       swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2756*0Sstevel@tonic-gate 		    locinput += UTF8SKIP(locinput);
2757*0Sstevel@tonic-gate 		if (locinput > PL_regeol)
2758*0Sstevel@tonic-gate 		    sayNO;
2759*0Sstevel@tonic-gate 	    }
2760*0Sstevel@tonic-gate 	    else
2761*0Sstevel@tonic-gate 	       locinput++;
2762*0Sstevel@tonic-gate 	    nextchr = UCHARAT(locinput);
2763*0Sstevel@tonic-gate 	    break;
2764*0Sstevel@tonic-gate 	case REFFL:
2765*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
2766*0Sstevel@tonic-gate 	    /* FALL THROUGH */
2767*0Sstevel@tonic-gate         case REF:
2768*0Sstevel@tonic-gate 	case REFF:
2769*0Sstevel@tonic-gate 	    n = ARG(scan);  /* which paren pair */
2770*0Sstevel@tonic-gate 	    ln = PL_regstartp[n];
2771*0Sstevel@tonic-gate 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
2772*0Sstevel@tonic-gate 	    if ((I32)*PL_reglastparen < n || ln == -1)
2773*0Sstevel@tonic-gate 		sayNO;			/* Do not match unless seen CLOSEn. */
2774*0Sstevel@tonic-gate 	    if (ln == PL_regendp[n])
2775*0Sstevel@tonic-gate 		break;
2776*0Sstevel@tonic-gate 
2777*0Sstevel@tonic-gate 	    s = PL_bostr + ln;
2778*0Sstevel@tonic-gate 	    if (do_utf8 && OP(scan) != REF) {	/* REF can do byte comparison */
2779*0Sstevel@tonic-gate 		char *l = locinput;
2780*0Sstevel@tonic-gate 		char *e = PL_bostr + PL_regendp[n];
2781*0Sstevel@tonic-gate 		/*
2782*0Sstevel@tonic-gate 		 * Note that we can't do the "other character" lookup trick as
2783*0Sstevel@tonic-gate 		 * in the 8-bit case (no pun intended) because in Unicode we
2784*0Sstevel@tonic-gate 		 * have to map both upper and title case to lower case.
2785*0Sstevel@tonic-gate 		 */
2786*0Sstevel@tonic-gate 		if (OP(scan) == REFF) {
2787*0Sstevel@tonic-gate 		    STRLEN ulen1, ulen2;
2788*0Sstevel@tonic-gate 		    U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2789*0Sstevel@tonic-gate 		    U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
2790*0Sstevel@tonic-gate 		    while (s < e) {
2791*0Sstevel@tonic-gate 			if (l >= PL_regeol)
2792*0Sstevel@tonic-gate 			    sayNO;
2793*0Sstevel@tonic-gate 			toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2794*0Sstevel@tonic-gate 			toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
2795*0Sstevel@tonic-gate 			if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
2796*0Sstevel@tonic-gate 			    sayNO;
2797*0Sstevel@tonic-gate 			s += ulen1;
2798*0Sstevel@tonic-gate 			l += ulen2;
2799*0Sstevel@tonic-gate 		    }
2800*0Sstevel@tonic-gate 		}
2801*0Sstevel@tonic-gate 		locinput = l;
2802*0Sstevel@tonic-gate 		nextchr = UCHARAT(locinput);
2803*0Sstevel@tonic-gate 		break;
2804*0Sstevel@tonic-gate 	    }
2805*0Sstevel@tonic-gate 
2806*0Sstevel@tonic-gate 	    /* Inline the first character, for speed. */
2807*0Sstevel@tonic-gate 	    if (UCHARAT(s) != nextchr &&
2808*0Sstevel@tonic-gate 		(OP(scan) == REF ||
2809*0Sstevel@tonic-gate 		 (UCHARAT(s) != ((OP(scan) == REFF
2810*0Sstevel@tonic-gate 				  ? PL_fold : PL_fold_locale)[nextchr]))))
2811*0Sstevel@tonic-gate 		sayNO;
2812*0Sstevel@tonic-gate 	    ln = PL_regendp[n] - ln;
2813*0Sstevel@tonic-gate 	    if (locinput + ln > PL_regeol)
2814*0Sstevel@tonic-gate 		sayNO;
2815*0Sstevel@tonic-gate 	    if (ln > 1 && (OP(scan) == REF
2816*0Sstevel@tonic-gate 			   ? memNE(s, locinput, ln)
2817*0Sstevel@tonic-gate 			   : (OP(scan) == REFF
2818*0Sstevel@tonic-gate 			      ? ibcmp(s, locinput, ln)
2819*0Sstevel@tonic-gate 			      : ibcmp_locale(s, locinput, ln))))
2820*0Sstevel@tonic-gate 		sayNO;
2821*0Sstevel@tonic-gate 	    locinput += ln;
2822*0Sstevel@tonic-gate 	    nextchr = UCHARAT(locinput);
2823*0Sstevel@tonic-gate 	    break;
2824*0Sstevel@tonic-gate 
2825*0Sstevel@tonic-gate 	case NOTHING:
2826*0Sstevel@tonic-gate 	case TAIL:
2827*0Sstevel@tonic-gate 	    break;
2828*0Sstevel@tonic-gate 	case BACK:
2829*0Sstevel@tonic-gate 	    break;
2830*0Sstevel@tonic-gate 	case EVAL:
2831*0Sstevel@tonic-gate 	{
2832*0Sstevel@tonic-gate 	    dSP;
2833*0Sstevel@tonic-gate 	    OP_4tree *oop = PL_op;
2834*0Sstevel@tonic-gate 	    COP *ocurcop = PL_curcop;
2835*0Sstevel@tonic-gate 	    PAD *old_comppad;
2836*0Sstevel@tonic-gate 	    SV *ret;
2837*0Sstevel@tonic-gate 	    struct regexp *oreg = PL_reg_re;
2838*0Sstevel@tonic-gate 
2839*0Sstevel@tonic-gate 	    n = ARG(scan);
2840*0Sstevel@tonic-gate 	    PL_op = (OP_4tree*)PL_regdata->data[n];
2841*0Sstevel@tonic-gate 	    DEBUG_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
2842*0Sstevel@tonic-gate 	    PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
2843*0Sstevel@tonic-gate 	    PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2844*0Sstevel@tonic-gate 
2845*0Sstevel@tonic-gate 	    {
2846*0Sstevel@tonic-gate 		SV **before = SP;
2847*0Sstevel@tonic-gate 		CALLRUNOPS(aTHX);			/* Scalar context. */
2848*0Sstevel@tonic-gate 		SPAGAIN;
2849*0Sstevel@tonic-gate 		if (SP == before)
2850*0Sstevel@tonic-gate 		    ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
2851*0Sstevel@tonic-gate 		else {
2852*0Sstevel@tonic-gate 		    ret = POPs;
2853*0Sstevel@tonic-gate 		    PUTBACK;
2854*0Sstevel@tonic-gate 		}
2855*0Sstevel@tonic-gate 	    }
2856*0Sstevel@tonic-gate 
2857*0Sstevel@tonic-gate 	    PL_op = oop;
2858*0Sstevel@tonic-gate 	    PAD_RESTORE_LOCAL(old_comppad);
2859*0Sstevel@tonic-gate 	    PL_curcop = ocurcop;
2860*0Sstevel@tonic-gate 	    if (logical) {
2861*0Sstevel@tonic-gate 		if (logical == 2) {	/* Postponed subexpression. */
2862*0Sstevel@tonic-gate 		    regexp *re;
2863*0Sstevel@tonic-gate 		    MAGIC *mg = Null(MAGIC*);
2864*0Sstevel@tonic-gate 		    re_cc_state state;
2865*0Sstevel@tonic-gate 		    CHECKPOINT cp, lastcp;
2866*0Sstevel@tonic-gate                     int toggleutf;
2867*0Sstevel@tonic-gate 		    register SV *sv;
2868*0Sstevel@tonic-gate 
2869*0Sstevel@tonic-gate 		    if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
2870*0Sstevel@tonic-gate 			mg = mg_find(sv, PERL_MAGIC_qr);
2871*0Sstevel@tonic-gate 		    else if (SvSMAGICAL(ret)) {
2872*0Sstevel@tonic-gate 			if (SvGMAGICAL(ret))
2873*0Sstevel@tonic-gate 			    sv_unmagic(ret, PERL_MAGIC_qr);
2874*0Sstevel@tonic-gate 			else
2875*0Sstevel@tonic-gate 			    mg = mg_find(ret, PERL_MAGIC_qr);
2876*0Sstevel@tonic-gate 		    }
2877*0Sstevel@tonic-gate 
2878*0Sstevel@tonic-gate 		    if (mg) {
2879*0Sstevel@tonic-gate 			re = (regexp *)mg->mg_obj;
2880*0Sstevel@tonic-gate 			(void)ReREFCNT_inc(re);
2881*0Sstevel@tonic-gate 		    }
2882*0Sstevel@tonic-gate 		    else {
2883*0Sstevel@tonic-gate 			STRLEN len;
2884*0Sstevel@tonic-gate 			char *t = SvPV(ret, len);
2885*0Sstevel@tonic-gate 			PMOP pm;
2886*0Sstevel@tonic-gate 			char *oprecomp = PL_regprecomp;
2887*0Sstevel@tonic-gate 			I32 osize = PL_regsize;
2888*0Sstevel@tonic-gate 			I32 onpar = PL_regnpar;
2889*0Sstevel@tonic-gate 
2890*0Sstevel@tonic-gate 			Zero(&pm, 1, PMOP);
2891*0Sstevel@tonic-gate                         if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
2892*0Sstevel@tonic-gate 			re = CALLREGCOMP(aTHX_ t, t + len, &pm);
2893*0Sstevel@tonic-gate 			if (!(SvFLAGS(ret)
2894*0Sstevel@tonic-gate 			      & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
2895*0Sstevel@tonic-gate 				| SVs_GMG)))
2896*0Sstevel@tonic-gate 			    sv_magic(ret,(SV*)ReREFCNT_inc(re),
2897*0Sstevel@tonic-gate 					PERL_MAGIC_qr,0,0);
2898*0Sstevel@tonic-gate 			PL_regprecomp = oprecomp;
2899*0Sstevel@tonic-gate 			PL_regsize = osize;
2900*0Sstevel@tonic-gate 			PL_regnpar = onpar;
2901*0Sstevel@tonic-gate 		    }
2902*0Sstevel@tonic-gate 		    DEBUG_r(
2903*0Sstevel@tonic-gate 			PerlIO_printf(Perl_debug_log,
2904*0Sstevel@tonic-gate 				      "Entering embedded `%s%.60s%s%s'\n",
2905*0Sstevel@tonic-gate 				      PL_colors[0],
2906*0Sstevel@tonic-gate 				      re->precomp,
2907*0Sstevel@tonic-gate 				      PL_colors[1],
2908*0Sstevel@tonic-gate 				      (strlen(re->precomp) > 60 ? "..." : ""))
2909*0Sstevel@tonic-gate 			);
2910*0Sstevel@tonic-gate 		    state.node = next;
2911*0Sstevel@tonic-gate 		    state.prev = PL_reg_call_cc;
2912*0Sstevel@tonic-gate 		    state.cc = PL_regcc;
2913*0Sstevel@tonic-gate 		    state.re = PL_reg_re;
2914*0Sstevel@tonic-gate 
2915*0Sstevel@tonic-gate 		    PL_regcc = 0;
2916*0Sstevel@tonic-gate 
2917*0Sstevel@tonic-gate 		    cp = regcppush(0);	/* Save *all* the positions. */
2918*0Sstevel@tonic-gate 		    REGCP_SET(lastcp);
2919*0Sstevel@tonic-gate 		    cache_re(re);
2920*0Sstevel@tonic-gate 		    state.ss = PL_savestack_ix;
2921*0Sstevel@tonic-gate 		    *PL_reglastparen = 0;
2922*0Sstevel@tonic-gate 		    *PL_reglastcloseparen = 0;
2923*0Sstevel@tonic-gate 		    PL_reg_call_cc = &state;
2924*0Sstevel@tonic-gate 		    PL_reginput = locinput;
2925*0Sstevel@tonic-gate 		    toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
2926*0Sstevel@tonic-gate 				((re->reganch & ROPT_UTF8) != 0);
2927*0Sstevel@tonic-gate 		    if (toggleutf) PL_reg_flags ^= RF_utf8;
2928*0Sstevel@tonic-gate 
2929*0Sstevel@tonic-gate 		    /* XXXX This is too dramatic a measure... */
2930*0Sstevel@tonic-gate 		    PL_reg_maxiter = 0;
2931*0Sstevel@tonic-gate 
2932*0Sstevel@tonic-gate 		    if (regmatch(re->program + 1)) {
2933*0Sstevel@tonic-gate 			/* Even though we succeeded, we need to restore
2934*0Sstevel@tonic-gate 			   global variables, since we may be wrapped inside
2935*0Sstevel@tonic-gate 			   SUSPEND, thus the match may be not finished yet. */
2936*0Sstevel@tonic-gate 
2937*0Sstevel@tonic-gate 			/* XXXX Do this only if SUSPENDed? */
2938*0Sstevel@tonic-gate 			PL_reg_call_cc = state.prev;
2939*0Sstevel@tonic-gate 			PL_regcc = state.cc;
2940*0Sstevel@tonic-gate 			PL_reg_re = state.re;
2941*0Sstevel@tonic-gate 			cache_re(PL_reg_re);
2942*0Sstevel@tonic-gate 			if (toggleutf) PL_reg_flags ^= RF_utf8;
2943*0Sstevel@tonic-gate 
2944*0Sstevel@tonic-gate 			/* XXXX This is too dramatic a measure... */
2945*0Sstevel@tonic-gate 			PL_reg_maxiter = 0;
2946*0Sstevel@tonic-gate 
2947*0Sstevel@tonic-gate 			/* These are needed even if not SUSPEND. */
2948*0Sstevel@tonic-gate 			ReREFCNT_dec(re);
2949*0Sstevel@tonic-gate 			regcpblow(cp);
2950*0Sstevel@tonic-gate 			sayYES;
2951*0Sstevel@tonic-gate 		    }
2952*0Sstevel@tonic-gate 		    ReREFCNT_dec(re);
2953*0Sstevel@tonic-gate 		    REGCP_UNWIND(lastcp);
2954*0Sstevel@tonic-gate 		    regcppop();
2955*0Sstevel@tonic-gate 		    PL_reg_call_cc = state.prev;
2956*0Sstevel@tonic-gate 		    PL_regcc = state.cc;
2957*0Sstevel@tonic-gate 		    PL_reg_re = state.re;
2958*0Sstevel@tonic-gate 		    cache_re(PL_reg_re);
2959*0Sstevel@tonic-gate 		    if (toggleutf) PL_reg_flags ^= RF_utf8;
2960*0Sstevel@tonic-gate 
2961*0Sstevel@tonic-gate 		    /* XXXX This is too dramatic a measure... */
2962*0Sstevel@tonic-gate 		    PL_reg_maxiter = 0;
2963*0Sstevel@tonic-gate 
2964*0Sstevel@tonic-gate 		    logical = 0;
2965*0Sstevel@tonic-gate 		    sayNO;
2966*0Sstevel@tonic-gate 		}
2967*0Sstevel@tonic-gate 		sw = SvTRUE(ret);
2968*0Sstevel@tonic-gate 		logical = 0;
2969*0Sstevel@tonic-gate 	    }
2970*0Sstevel@tonic-gate 	    else {
2971*0Sstevel@tonic-gate 		sv_setsv(save_scalar(PL_replgv), ret);
2972*0Sstevel@tonic-gate 		cache_re(oreg);
2973*0Sstevel@tonic-gate 	    }
2974*0Sstevel@tonic-gate 	    break;
2975*0Sstevel@tonic-gate 	}
2976*0Sstevel@tonic-gate 	case OPEN:
2977*0Sstevel@tonic-gate 	    n = ARG(scan);  /* which paren pair */
2978*0Sstevel@tonic-gate 	    PL_reg_start_tmp[n] = locinput;
2979*0Sstevel@tonic-gate 	    if (n > PL_regsize)
2980*0Sstevel@tonic-gate 		PL_regsize = n;
2981*0Sstevel@tonic-gate 	    break;
2982*0Sstevel@tonic-gate 	case CLOSE:
2983*0Sstevel@tonic-gate 	    n = ARG(scan);  /* which paren pair */
2984*0Sstevel@tonic-gate 	    PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2985*0Sstevel@tonic-gate 	    PL_regendp[n] = locinput - PL_bostr;
2986*0Sstevel@tonic-gate 	    if (n > (I32)*PL_reglastparen)
2987*0Sstevel@tonic-gate 		*PL_reglastparen = n;
2988*0Sstevel@tonic-gate 	    *PL_reglastcloseparen = n;
2989*0Sstevel@tonic-gate 	    break;
2990*0Sstevel@tonic-gate 	case GROUPP:
2991*0Sstevel@tonic-gate 	    n = ARG(scan);  /* which paren pair */
2992*0Sstevel@tonic-gate 	    sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
2993*0Sstevel@tonic-gate 	    break;
2994*0Sstevel@tonic-gate 	case IFTHEN:
2995*0Sstevel@tonic-gate 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
2996*0Sstevel@tonic-gate 	    if (sw)
2997*0Sstevel@tonic-gate 		next = NEXTOPER(NEXTOPER(scan));
2998*0Sstevel@tonic-gate 	    else {
2999*0Sstevel@tonic-gate 		next = scan + ARG(scan);
3000*0Sstevel@tonic-gate 		if (OP(next) == IFTHEN) /* Fake one. */
3001*0Sstevel@tonic-gate 		    next = NEXTOPER(NEXTOPER(next));
3002*0Sstevel@tonic-gate 	    }
3003*0Sstevel@tonic-gate 	    break;
3004*0Sstevel@tonic-gate 	case LOGICAL:
3005*0Sstevel@tonic-gate 	    logical = scan->flags;
3006*0Sstevel@tonic-gate 	    break;
3007*0Sstevel@tonic-gate /*******************************************************************
3008*0Sstevel@tonic-gate  PL_regcc contains infoblock about the innermost (...)* loop, and
3009*0Sstevel@tonic-gate  a pointer to the next outer infoblock.
3010*0Sstevel@tonic-gate 
3011*0Sstevel@tonic-gate  Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3012*0Sstevel@tonic-gate 
3013*0Sstevel@tonic-gate    1) After matching X, regnode for CURLYX is processed;
3014*0Sstevel@tonic-gate 
3015*0Sstevel@tonic-gate    2) This regnode creates infoblock on the stack, and calls
3016*0Sstevel@tonic-gate       regmatch() recursively with the starting point at WHILEM node;
3017*0Sstevel@tonic-gate 
3018*0Sstevel@tonic-gate    3) Each hit of WHILEM node tries to match A and Z (in the order
3019*0Sstevel@tonic-gate       depending on the current iteration, min/max of {min,max} and
3020*0Sstevel@tonic-gate       greediness).  The information about where are nodes for "A"
3021*0Sstevel@tonic-gate       and "Z" is read from the infoblock, as is info on how many times "A"
3022*0Sstevel@tonic-gate       was already matched, and greediness.
3023*0Sstevel@tonic-gate 
3024*0Sstevel@tonic-gate    4) After A matches, the same WHILEM node is hit again.
3025*0Sstevel@tonic-gate 
3026*0Sstevel@tonic-gate    5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
3027*0Sstevel@tonic-gate       of the same pair.  Thus when WHILEM tries to match Z, it temporarily
3028*0Sstevel@tonic-gate       resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
3029*0Sstevel@tonic-gate       as in (Y(A)*Z)*.  If Z matches, the automaton will hit the WHILEM node
3030*0Sstevel@tonic-gate       of the external loop.
3031*0Sstevel@tonic-gate 
3032*0Sstevel@tonic-gate  Currently present infoblocks form a tree with a stem formed by PL_curcc
3033*0Sstevel@tonic-gate  and whatever it mentions via ->next, and additional attached trees
3034*0Sstevel@tonic-gate  corresponding to temporarily unset infoblocks as in "5" above.
3035*0Sstevel@tonic-gate 
3036*0Sstevel@tonic-gate  In the following picture infoblocks for outer loop of
3037*0Sstevel@tonic-gate  (Y(A)*?Z)*?T are denoted O, for inner I.  NULL starting block
3038*0Sstevel@tonic-gate  is denoted by x.  The matched string is YAAZYAZT.  Temporarily postponed
3039*0Sstevel@tonic-gate  infoblocks are drawn below the "reset" infoblock.
3040*0Sstevel@tonic-gate 
3041*0Sstevel@tonic-gate  In fact in the picture below we do not show failed matches for Z and T
3042*0Sstevel@tonic-gate  by WHILEM blocks.  [We illustrate minimal matches, since for them it is
3043*0Sstevel@tonic-gate  more obvious *why* one needs to *temporary* unset infoblocks.]
3044*0Sstevel@tonic-gate 
3045*0Sstevel@tonic-gate   Matched	REx position	InfoBlocks	Comment
3046*0Sstevel@tonic-gate   		(Y(A)*?Z)*?T	x
3047*0Sstevel@tonic-gate   		Y(A)*?Z)*?T	x <- O
3048*0Sstevel@tonic-gate   Y		(A)*?Z)*?T	x <- O
3049*0Sstevel@tonic-gate   Y		A)*?Z)*?T	x <- O <- I
3050*0Sstevel@tonic-gate   YA		)*?Z)*?T	x <- O <- I
3051*0Sstevel@tonic-gate   YA		A)*?Z)*?T	x <- O <- I
3052*0Sstevel@tonic-gate   YAA		)*?Z)*?T	x <- O <- I
3053*0Sstevel@tonic-gate   YAA		Z)*?T		x <- O		# Temporary unset I
3054*0Sstevel@tonic-gate 				     I
3055*0Sstevel@tonic-gate 
3056*0Sstevel@tonic-gate   YAAZ		Y(A)*?Z)*?T	x <- O
3057*0Sstevel@tonic-gate 				     I
3058*0Sstevel@tonic-gate 
3059*0Sstevel@tonic-gate   YAAZY		(A)*?Z)*?T	x <- O
3060*0Sstevel@tonic-gate 				     I
3061*0Sstevel@tonic-gate 
3062*0Sstevel@tonic-gate   YAAZY		A)*?Z)*?T	x <- O <- I
3063*0Sstevel@tonic-gate 				     I
3064*0Sstevel@tonic-gate 
3065*0Sstevel@tonic-gate   YAAZYA	)*?Z)*?T	x <- O <- I
3066*0Sstevel@tonic-gate 				     I
3067*0Sstevel@tonic-gate 
3068*0Sstevel@tonic-gate   YAAZYA	Z)*?T		x <- O		# Temporary unset I
3069*0Sstevel@tonic-gate 				     I,I
3070*0Sstevel@tonic-gate 
3071*0Sstevel@tonic-gate   YAAZYAZ	)*?T		x <- O
3072*0Sstevel@tonic-gate 				     I,I
3073*0Sstevel@tonic-gate 
3074*0Sstevel@tonic-gate   YAAZYAZ	T		x		# Temporary unset O
3075*0Sstevel@tonic-gate 				O
3076*0Sstevel@tonic-gate 				I,I
3077*0Sstevel@tonic-gate 
3078*0Sstevel@tonic-gate   YAAZYAZT			x
3079*0Sstevel@tonic-gate 				O
3080*0Sstevel@tonic-gate 				I,I
3081*0Sstevel@tonic-gate  *******************************************************************/
3082*0Sstevel@tonic-gate 	case CURLYX: {
3083*0Sstevel@tonic-gate 		CURCUR cc;
3084*0Sstevel@tonic-gate 		CHECKPOINT cp = PL_savestack_ix;
3085*0Sstevel@tonic-gate 		/* No need to save/restore up to this paren */
3086*0Sstevel@tonic-gate 		I32 parenfloor = scan->flags;
3087*0Sstevel@tonic-gate 
3088*0Sstevel@tonic-gate 		if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3089*0Sstevel@tonic-gate 		    next += ARG(next);
3090*0Sstevel@tonic-gate 		cc.oldcc = PL_regcc;
3091*0Sstevel@tonic-gate 		PL_regcc = &cc;
3092*0Sstevel@tonic-gate 		/* XXXX Probably it is better to teach regpush to support
3093*0Sstevel@tonic-gate 		   parenfloor > PL_regsize... */
3094*0Sstevel@tonic-gate 		if (parenfloor > (I32)*PL_reglastparen)
3095*0Sstevel@tonic-gate 		    parenfloor = *PL_reglastparen; /* Pessimization... */
3096*0Sstevel@tonic-gate 		cc.parenfloor = parenfloor;
3097*0Sstevel@tonic-gate 		cc.cur = -1;
3098*0Sstevel@tonic-gate 		cc.min = ARG1(scan);
3099*0Sstevel@tonic-gate 		cc.max  = ARG2(scan);
3100*0Sstevel@tonic-gate 		cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3101*0Sstevel@tonic-gate 		cc.next = next;
3102*0Sstevel@tonic-gate 		cc.minmod = minmod;
3103*0Sstevel@tonic-gate 		cc.lastloc = 0;
3104*0Sstevel@tonic-gate 		PL_reginput = locinput;
3105*0Sstevel@tonic-gate 		n = regmatch(PREVOPER(next));	/* start on the WHILEM */
3106*0Sstevel@tonic-gate 		regcpblow(cp);
3107*0Sstevel@tonic-gate 		PL_regcc = cc.oldcc;
3108*0Sstevel@tonic-gate 		saySAME(n);
3109*0Sstevel@tonic-gate 	    }
3110*0Sstevel@tonic-gate 	    /* NOT REACHED */
3111*0Sstevel@tonic-gate 	case WHILEM: {
3112*0Sstevel@tonic-gate 		/*
3113*0Sstevel@tonic-gate 		 * This is really hard to understand, because after we match
3114*0Sstevel@tonic-gate 		 * what we're trying to match, we must make sure the rest of
3115*0Sstevel@tonic-gate 		 * the REx is going to match for sure, and to do that we have
3116*0Sstevel@tonic-gate 		 * to go back UP the parse tree by recursing ever deeper.  And
3117*0Sstevel@tonic-gate 		 * if it fails, we have to reset our parent's current state
3118*0Sstevel@tonic-gate 		 * that we can try again after backing off.
3119*0Sstevel@tonic-gate 		 */
3120*0Sstevel@tonic-gate 
3121*0Sstevel@tonic-gate 		CHECKPOINT cp, lastcp;
3122*0Sstevel@tonic-gate 		CURCUR* cc = PL_regcc;
3123*0Sstevel@tonic-gate 		char *lastloc = cc->lastloc; /* Detection of 0-len. */
3124*0Sstevel@tonic-gate 
3125*0Sstevel@tonic-gate 		n = cc->cur + 1;	/* how many we know we matched */
3126*0Sstevel@tonic-gate 		PL_reginput = locinput;
3127*0Sstevel@tonic-gate 
3128*0Sstevel@tonic-gate 		DEBUG_r(
3129*0Sstevel@tonic-gate 		    PerlIO_printf(Perl_debug_log,
3130*0Sstevel@tonic-gate 				  "%*s  %ld out of %ld..%ld  cc=%"UVxf"\n",
3131*0Sstevel@tonic-gate 				  REPORT_CODE_OFF+PL_regindent*2, "",
3132*0Sstevel@tonic-gate 				  (long)n, (long)cc->min,
3133*0Sstevel@tonic-gate 				  (long)cc->max, PTR2UV(cc))
3134*0Sstevel@tonic-gate 		    );
3135*0Sstevel@tonic-gate 
3136*0Sstevel@tonic-gate 		/* If degenerate scan matches "", assume scan done. */
3137*0Sstevel@tonic-gate 
3138*0Sstevel@tonic-gate 		if (locinput == cc->lastloc && n >= cc->min) {
3139*0Sstevel@tonic-gate 		    PL_regcc = cc->oldcc;
3140*0Sstevel@tonic-gate 		    if (PL_regcc)
3141*0Sstevel@tonic-gate 			ln = PL_regcc->cur;
3142*0Sstevel@tonic-gate 		    DEBUG_r(
3143*0Sstevel@tonic-gate 			PerlIO_printf(Perl_debug_log,
3144*0Sstevel@tonic-gate 			   "%*s  empty match detected, try continuation...\n",
3145*0Sstevel@tonic-gate 			   REPORT_CODE_OFF+PL_regindent*2, "")
3146*0Sstevel@tonic-gate 			);
3147*0Sstevel@tonic-gate 		    if (regmatch(cc->next))
3148*0Sstevel@tonic-gate 			sayYES;
3149*0Sstevel@tonic-gate 		    if (PL_regcc)
3150*0Sstevel@tonic-gate 			PL_regcc->cur = ln;
3151*0Sstevel@tonic-gate 		    PL_regcc = cc;
3152*0Sstevel@tonic-gate 		    sayNO;
3153*0Sstevel@tonic-gate 		}
3154*0Sstevel@tonic-gate 
3155*0Sstevel@tonic-gate 		/* First just match a string of min scans. */
3156*0Sstevel@tonic-gate 
3157*0Sstevel@tonic-gate 		if (n < cc->min) {
3158*0Sstevel@tonic-gate 		    cc->cur = n;
3159*0Sstevel@tonic-gate 		    cc->lastloc = locinput;
3160*0Sstevel@tonic-gate 		    if (regmatch(cc->scan))
3161*0Sstevel@tonic-gate 			sayYES;
3162*0Sstevel@tonic-gate 		    cc->cur = n - 1;
3163*0Sstevel@tonic-gate 		    cc->lastloc = lastloc;
3164*0Sstevel@tonic-gate 		    sayNO;
3165*0Sstevel@tonic-gate 		}
3166*0Sstevel@tonic-gate 
3167*0Sstevel@tonic-gate 		if (scan->flags) {
3168*0Sstevel@tonic-gate 		    /* Check whether we already were at this position.
3169*0Sstevel@tonic-gate 			Postpone detection until we know the match is not
3170*0Sstevel@tonic-gate 			*that* much linear. */
3171*0Sstevel@tonic-gate 		if (!PL_reg_maxiter) {
3172*0Sstevel@tonic-gate 		    PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3173*0Sstevel@tonic-gate 		    PL_reg_leftiter = PL_reg_maxiter;
3174*0Sstevel@tonic-gate 		}
3175*0Sstevel@tonic-gate 		if (PL_reg_leftiter-- == 0) {
3176*0Sstevel@tonic-gate 		    I32 size = (PL_reg_maxiter + 7)/8;
3177*0Sstevel@tonic-gate 		    if (PL_reg_poscache) {
3178*0Sstevel@tonic-gate 			if ((I32)PL_reg_poscache_size < size) {
3179*0Sstevel@tonic-gate 			    Renew(PL_reg_poscache, size, char);
3180*0Sstevel@tonic-gate 			    PL_reg_poscache_size = size;
3181*0Sstevel@tonic-gate 			}
3182*0Sstevel@tonic-gate 			Zero(PL_reg_poscache, size, char);
3183*0Sstevel@tonic-gate 		    }
3184*0Sstevel@tonic-gate 		    else {
3185*0Sstevel@tonic-gate 			PL_reg_poscache_size = size;
3186*0Sstevel@tonic-gate 			Newz(29, PL_reg_poscache, size, char);
3187*0Sstevel@tonic-gate 		    }
3188*0Sstevel@tonic-gate 		    DEBUG_r(
3189*0Sstevel@tonic-gate 			PerlIO_printf(Perl_debug_log,
3190*0Sstevel@tonic-gate 	      "%sDetected a super-linear match, switching on caching%s...\n",
3191*0Sstevel@tonic-gate 				      PL_colors[4], PL_colors[5])
3192*0Sstevel@tonic-gate 			);
3193*0Sstevel@tonic-gate 		}
3194*0Sstevel@tonic-gate 		if (PL_reg_leftiter < 0) {
3195*0Sstevel@tonic-gate 		    I32 o = locinput - PL_bostr, b;
3196*0Sstevel@tonic-gate 
3197*0Sstevel@tonic-gate 		    o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3198*0Sstevel@tonic-gate 		    b = o % 8;
3199*0Sstevel@tonic-gate 		    o /= 8;
3200*0Sstevel@tonic-gate 		    if (PL_reg_poscache[o] & (1<<b)) {
3201*0Sstevel@tonic-gate 		    DEBUG_r(
3202*0Sstevel@tonic-gate 			PerlIO_printf(Perl_debug_log,
3203*0Sstevel@tonic-gate 				      "%*s  already tried at this position...\n",
3204*0Sstevel@tonic-gate 				      REPORT_CODE_OFF+PL_regindent*2, "")
3205*0Sstevel@tonic-gate 			);
3206*0Sstevel@tonic-gate 			if (PL_reg_flags & RF_false)
3207*0Sstevel@tonic-gate 			    sayYES;
3208*0Sstevel@tonic-gate 			else
3209*0Sstevel@tonic-gate 			    sayNO_SILENT;
3210*0Sstevel@tonic-gate 		    }
3211*0Sstevel@tonic-gate 		    PL_reg_poscache[o] |= (1<<b);
3212*0Sstevel@tonic-gate 		}
3213*0Sstevel@tonic-gate 		}
3214*0Sstevel@tonic-gate 
3215*0Sstevel@tonic-gate 		/* Prefer next over scan for minimal matching. */
3216*0Sstevel@tonic-gate 
3217*0Sstevel@tonic-gate 		if (cc->minmod) {
3218*0Sstevel@tonic-gate 		    PL_regcc = cc->oldcc;
3219*0Sstevel@tonic-gate 		    if (PL_regcc)
3220*0Sstevel@tonic-gate 			ln = PL_regcc->cur;
3221*0Sstevel@tonic-gate 		    cp = regcppush(cc->parenfloor);
3222*0Sstevel@tonic-gate 		    REGCP_SET(lastcp);
3223*0Sstevel@tonic-gate 		    if (regmatch(cc->next)) {
3224*0Sstevel@tonic-gate 			regcpblow(cp);
3225*0Sstevel@tonic-gate 			sayYES;	/* All done. */
3226*0Sstevel@tonic-gate 		    }
3227*0Sstevel@tonic-gate 		    REGCP_UNWIND(lastcp);
3228*0Sstevel@tonic-gate 		    regcppop();
3229*0Sstevel@tonic-gate 		    if (PL_regcc)
3230*0Sstevel@tonic-gate 			PL_regcc->cur = ln;
3231*0Sstevel@tonic-gate 		    PL_regcc = cc;
3232*0Sstevel@tonic-gate 
3233*0Sstevel@tonic-gate 		    if (n >= cc->max) {	/* Maximum greed exceeded? */
3234*0Sstevel@tonic-gate 			if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3235*0Sstevel@tonic-gate 			    && !(PL_reg_flags & RF_warned)) {
3236*0Sstevel@tonic-gate 			    PL_reg_flags |= RF_warned;
3237*0Sstevel@tonic-gate 			    Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3238*0Sstevel@tonic-gate 				 "Complex regular subexpression recursion",
3239*0Sstevel@tonic-gate 				 REG_INFTY - 1);
3240*0Sstevel@tonic-gate 			}
3241*0Sstevel@tonic-gate 			sayNO;
3242*0Sstevel@tonic-gate 		    }
3243*0Sstevel@tonic-gate 
3244*0Sstevel@tonic-gate 		    DEBUG_r(
3245*0Sstevel@tonic-gate 			PerlIO_printf(Perl_debug_log,
3246*0Sstevel@tonic-gate 				      "%*s  trying longer...\n",
3247*0Sstevel@tonic-gate 				      REPORT_CODE_OFF+PL_regindent*2, "")
3248*0Sstevel@tonic-gate 			);
3249*0Sstevel@tonic-gate 		    /* Try scanning more and see if it helps. */
3250*0Sstevel@tonic-gate 		    PL_reginput = locinput;
3251*0Sstevel@tonic-gate 		    cc->cur = n;
3252*0Sstevel@tonic-gate 		    cc->lastloc = locinput;
3253*0Sstevel@tonic-gate 		    cp = regcppush(cc->parenfloor);
3254*0Sstevel@tonic-gate 		    REGCP_SET(lastcp);
3255*0Sstevel@tonic-gate 		    if (regmatch(cc->scan)) {
3256*0Sstevel@tonic-gate 			regcpblow(cp);
3257*0Sstevel@tonic-gate 			sayYES;
3258*0Sstevel@tonic-gate 		    }
3259*0Sstevel@tonic-gate 		    REGCP_UNWIND(lastcp);
3260*0Sstevel@tonic-gate 		    regcppop();
3261*0Sstevel@tonic-gate 		    cc->cur = n - 1;
3262*0Sstevel@tonic-gate 		    cc->lastloc = lastloc;
3263*0Sstevel@tonic-gate 		    sayNO;
3264*0Sstevel@tonic-gate 		}
3265*0Sstevel@tonic-gate 
3266*0Sstevel@tonic-gate 		/* Prefer scan over next for maximal matching. */
3267*0Sstevel@tonic-gate 
3268*0Sstevel@tonic-gate 		if (n < cc->max) {	/* More greed allowed? */
3269*0Sstevel@tonic-gate 		    cp = regcppush(cc->parenfloor);
3270*0Sstevel@tonic-gate 		    cc->cur = n;
3271*0Sstevel@tonic-gate 		    cc->lastloc = locinput;
3272*0Sstevel@tonic-gate 		    REGCP_SET(lastcp);
3273*0Sstevel@tonic-gate 		    if (regmatch(cc->scan)) {
3274*0Sstevel@tonic-gate 			regcpblow(cp);
3275*0Sstevel@tonic-gate 			sayYES;
3276*0Sstevel@tonic-gate 		    }
3277*0Sstevel@tonic-gate 		    REGCP_UNWIND(lastcp);
3278*0Sstevel@tonic-gate 		    regcppop();		/* Restore some previous $<digit>s? */
3279*0Sstevel@tonic-gate 		    PL_reginput = locinput;
3280*0Sstevel@tonic-gate 		    DEBUG_r(
3281*0Sstevel@tonic-gate 			PerlIO_printf(Perl_debug_log,
3282*0Sstevel@tonic-gate 				      "%*s  failed, try continuation...\n",
3283*0Sstevel@tonic-gate 				      REPORT_CODE_OFF+PL_regindent*2, "")
3284*0Sstevel@tonic-gate 			);
3285*0Sstevel@tonic-gate 		}
3286*0Sstevel@tonic-gate 		if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3287*0Sstevel@tonic-gate 			&& !(PL_reg_flags & RF_warned)) {
3288*0Sstevel@tonic-gate 		    PL_reg_flags |= RF_warned;
3289*0Sstevel@tonic-gate 		    Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3290*0Sstevel@tonic-gate 			 "Complex regular subexpression recursion",
3291*0Sstevel@tonic-gate 			 REG_INFTY - 1);
3292*0Sstevel@tonic-gate 		}
3293*0Sstevel@tonic-gate 
3294*0Sstevel@tonic-gate 		/* Failed deeper matches of scan, so see if this one works. */
3295*0Sstevel@tonic-gate 		PL_regcc = cc->oldcc;
3296*0Sstevel@tonic-gate 		if (PL_regcc)
3297*0Sstevel@tonic-gate 		    ln = PL_regcc->cur;
3298*0Sstevel@tonic-gate 		if (regmatch(cc->next))
3299*0Sstevel@tonic-gate 		    sayYES;
3300*0Sstevel@tonic-gate 		if (PL_regcc)
3301*0Sstevel@tonic-gate 		    PL_regcc->cur = ln;
3302*0Sstevel@tonic-gate 		PL_regcc = cc;
3303*0Sstevel@tonic-gate 		cc->cur = n - 1;
3304*0Sstevel@tonic-gate 		cc->lastloc = lastloc;
3305*0Sstevel@tonic-gate 		sayNO;
3306*0Sstevel@tonic-gate 	    }
3307*0Sstevel@tonic-gate 	    /* NOT REACHED */
3308*0Sstevel@tonic-gate 	case BRANCHJ:
3309*0Sstevel@tonic-gate 	    next = scan + ARG(scan);
3310*0Sstevel@tonic-gate 	    if (next == scan)
3311*0Sstevel@tonic-gate 		next = NULL;
3312*0Sstevel@tonic-gate 	    inner = NEXTOPER(NEXTOPER(scan));
3313*0Sstevel@tonic-gate 	    goto do_branch;
3314*0Sstevel@tonic-gate 	case BRANCH:
3315*0Sstevel@tonic-gate 	    inner = NEXTOPER(scan);
3316*0Sstevel@tonic-gate 	  do_branch:
3317*0Sstevel@tonic-gate 	    {
3318*0Sstevel@tonic-gate 		c1 = OP(scan);
3319*0Sstevel@tonic-gate 		if (OP(next) != c1)	/* No choice. */
3320*0Sstevel@tonic-gate 		    next = inner;	/* Avoid recursion. */
3321*0Sstevel@tonic-gate 		else {
3322*0Sstevel@tonic-gate 		    I32 lastparen = *PL_reglastparen;
3323*0Sstevel@tonic-gate 		    I32 unwind1;
3324*0Sstevel@tonic-gate 		    re_unwind_branch_t *uw;
3325*0Sstevel@tonic-gate 
3326*0Sstevel@tonic-gate 		    /* Put unwinding data on stack */
3327*0Sstevel@tonic-gate 		    unwind1 = SSNEWt(1,re_unwind_branch_t);
3328*0Sstevel@tonic-gate 		    uw = SSPTRt(unwind1,re_unwind_branch_t);
3329*0Sstevel@tonic-gate 		    uw->prev = unwind;
3330*0Sstevel@tonic-gate 		    unwind = unwind1;
3331*0Sstevel@tonic-gate 		    uw->type = ((c1 == BRANCH)
3332*0Sstevel@tonic-gate 				? RE_UNWIND_BRANCH
3333*0Sstevel@tonic-gate 				: RE_UNWIND_BRANCHJ);
3334*0Sstevel@tonic-gate 		    uw->lastparen = lastparen;
3335*0Sstevel@tonic-gate 		    uw->next = next;
3336*0Sstevel@tonic-gate 		    uw->locinput = locinput;
3337*0Sstevel@tonic-gate 		    uw->nextchr = nextchr;
3338*0Sstevel@tonic-gate #ifdef DEBUGGING
3339*0Sstevel@tonic-gate 		    uw->regindent = ++PL_regindent;
3340*0Sstevel@tonic-gate #endif
3341*0Sstevel@tonic-gate 
3342*0Sstevel@tonic-gate 		    REGCP_SET(uw->lastcp);
3343*0Sstevel@tonic-gate 
3344*0Sstevel@tonic-gate 		    /* Now go into the first branch */
3345*0Sstevel@tonic-gate 		    next = inner;
3346*0Sstevel@tonic-gate 		}
3347*0Sstevel@tonic-gate 	    }
3348*0Sstevel@tonic-gate 	    break;
3349*0Sstevel@tonic-gate 	case MINMOD:
3350*0Sstevel@tonic-gate 	    minmod = 1;
3351*0Sstevel@tonic-gate 	    break;
3352*0Sstevel@tonic-gate 	case CURLYM:
3353*0Sstevel@tonic-gate 	{
3354*0Sstevel@tonic-gate 	    I32 l = 0;
3355*0Sstevel@tonic-gate 	    CHECKPOINT lastcp;
3356*0Sstevel@tonic-gate 
3357*0Sstevel@tonic-gate 	    /* We suppose that the next guy does not need
3358*0Sstevel@tonic-gate 	       backtracking: in particular, it is of constant length,
3359*0Sstevel@tonic-gate 	       and has no parenths to influence future backrefs. */
3360*0Sstevel@tonic-gate 	    ln = ARG1(scan);  /* min to match */
3361*0Sstevel@tonic-gate 	    n  = ARG2(scan);  /* max to match */
3362*0Sstevel@tonic-gate 	    paren = scan->flags;
3363*0Sstevel@tonic-gate 	    if (paren) {
3364*0Sstevel@tonic-gate 		if (paren > PL_regsize)
3365*0Sstevel@tonic-gate 		    PL_regsize = paren;
3366*0Sstevel@tonic-gate 		if (paren > (I32)*PL_reglastparen)
3367*0Sstevel@tonic-gate 		    *PL_reglastparen = paren;
3368*0Sstevel@tonic-gate 	    }
3369*0Sstevel@tonic-gate 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3370*0Sstevel@tonic-gate 	    if (paren)
3371*0Sstevel@tonic-gate 		scan += NEXT_OFF(scan); /* Skip former OPEN. */
3372*0Sstevel@tonic-gate 	    PL_reginput = locinput;
3373*0Sstevel@tonic-gate 	    if (minmod) {
3374*0Sstevel@tonic-gate 		minmod = 0;
3375*0Sstevel@tonic-gate 		if (ln && regrepeat_hard(scan, ln, &l) < ln)
3376*0Sstevel@tonic-gate 		    sayNO;
3377*0Sstevel@tonic-gate 		/* if we matched something zero-length we don't need to
3378*0Sstevel@tonic-gate 		   backtrack - capturing parens are already defined, so
3379*0Sstevel@tonic-gate 		   the caveat in the maximal case doesn't apply
3380*0Sstevel@tonic-gate 
3381*0Sstevel@tonic-gate 		   XXXX if ln == 0, we can redo this check first time
3382*0Sstevel@tonic-gate 		   through the following loop
3383*0Sstevel@tonic-gate 		*/
3384*0Sstevel@tonic-gate 		if (ln && l == 0)
3385*0Sstevel@tonic-gate 		    n = ln;	/* don't backtrack */
3386*0Sstevel@tonic-gate 		locinput = PL_reginput;
3387*0Sstevel@tonic-gate 		if (HAS_TEXT(next) || JUMPABLE(next)) {
3388*0Sstevel@tonic-gate 		    regnode *text_node = next;
3389*0Sstevel@tonic-gate 
3390*0Sstevel@tonic-gate 		    if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3391*0Sstevel@tonic-gate 
3392*0Sstevel@tonic-gate 		    if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3393*0Sstevel@tonic-gate 		    else {
3394*0Sstevel@tonic-gate 			if (PL_regkind[(U8)OP(text_node)] == REF) {
3395*0Sstevel@tonic-gate 			    c1 = c2 = -1000;
3396*0Sstevel@tonic-gate 			    goto assume_ok_MM;
3397*0Sstevel@tonic-gate 			}
3398*0Sstevel@tonic-gate 			else { c1 = (U8)*STRING(text_node); }
3399*0Sstevel@tonic-gate 			if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3400*0Sstevel@tonic-gate 			    c2 = PL_fold[c1];
3401*0Sstevel@tonic-gate 			else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3402*0Sstevel@tonic-gate 			    c2 = PL_fold_locale[c1];
3403*0Sstevel@tonic-gate 			else
3404*0Sstevel@tonic-gate 			    c2 = c1;
3405*0Sstevel@tonic-gate 		    }
3406*0Sstevel@tonic-gate 		}
3407*0Sstevel@tonic-gate 		else
3408*0Sstevel@tonic-gate 		    c1 = c2 = -1000;
3409*0Sstevel@tonic-gate 	    assume_ok_MM:
3410*0Sstevel@tonic-gate 		REGCP_SET(lastcp);
3411*0Sstevel@tonic-gate 		/* This may be improved if l == 0.  */
3412*0Sstevel@tonic-gate 		while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3413*0Sstevel@tonic-gate 		    /* If it could work, try it. */
3414*0Sstevel@tonic-gate 		    if (c1 == -1000 ||
3415*0Sstevel@tonic-gate 			UCHARAT(PL_reginput) == c1 ||
3416*0Sstevel@tonic-gate 			UCHARAT(PL_reginput) == c2)
3417*0Sstevel@tonic-gate 		    {
3418*0Sstevel@tonic-gate 			if (paren) {
3419*0Sstevel@tonic-gate 			    if (ln) {
3420*0Sstevel@tonic-gate 				PL_regstartp[paren] =
3421*0Sstevel@tonic-gate 				    HOPc(PL_reginput, -l) - PL_bostr;
3422*0Sstevel@tonic-gate 				PL_regendp[paren] = PL_reginput - PL_bostr;
3423*0Sstevel@tonic-gate 			    }
3424*0Sstevel@tonic-gate 			    else
3425*0Sstevel@tonic-gate 				PL_regendp[paren] = -1;
3426*0Sstevel@tonic-gate 			}
3427*0Sstevel@tonic-gate 			if (regmatch(next))
3428*0Sstevel@tonic-gate 			    sayYES;
3429*0Sstevel@tonic-gate 			REGCP_UNWIND(lastcp);
3430*0Sstevel@tonic-gate 		    }
3431*0Sstevel@tonic-gate 		    /* Couldn't or didn't -- move forward. */
3432*0Sstevel@tonic-gate 		    PL_reginput = locinput;
3433*0Sstevel@tonic-gate 		    if (regrepeat_hard(scan, 1, &l)) {
3434*0Sstevel@tonic-gate 			ln++;
3435*0Sstevel@tonic-gate 			locinput = PL_reginput;
3436*0Sstevel@tonic-gate 		    }
3437*0Sstevel@tonic-gate 		    else
3438*0Sstevel@tonic-gate 			sayNO;
3439*0Sstevel@tonic-gate 		}
3440*0Sstevel@tonic-gate 	    }
3441*0Sstevel@tonic-gate 	    else {
3442*0Sstevel@tonic-gate 		n = regrepeat_hard(scan, n, &l);
3443*0Sstevel@tonic-gate 		/* if we matched something zero-length we don't need to
3444*0Sstevel@tonic-gate 		   backtrack, unless the minimum count is zero and we
3445*0Sstevel@tonic-gate 		   are capturing the result - in that case the capture
3446*0Sstevel@tonic-gate 		   being defined or not may affect later execution
3447*0Sstevel@tonic-gate 		*/
3448*0Sstevel@tonic-gate 		if (n != 0 && l == 0 && !(paren && ln == 0))
3449*0Sstevel@tonic-gate 		    ln = n;	/* don't backtrack */
3450*0Sstevel@tonic-gate 		locinput = PL_reginput;
3451*0Sstevel@tonic-gate 		DEBUG_r(
3452*0Sstevel@tonic-gate 		    PerlIO_printf(Perl_debug_log,
3453*0Sstevel@tonic-gate 				  "%*s  matched %"IVdf" times, len=%"IVdf"...\n",
3454*0Sstevel@tonic-gate 				  (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3455*0Sstevel@tonic-gate 				  (IV) n, (IV)l)
3456*0Sstevel@tonic-gate 		    );
3457*0Sstevel@tonic-gate 		if (n >= ln) {
3458*0Sstevel@tonic-gate 		    if (HAS_TEXT(next) || JUMPABLE(next)) {
3459*0Sstevel@tonic-gate 			regnode *text_node = next;
3460*0Sstevel@tonic-gate 
3461*0Sstevel@tonic-gate 			if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3462*0Sstevel@tonic-gate 
3463*0Sstevel@tonic-gate 			if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3464*0Sstevel@tonic-gate 			else {
3465*0Sstevel@tonic-gate 			    if (PL_regkind[(U8)OP(text_node)] == REF) {
3466*0Sstevel@tonic-gate 				c1 = c2 = -1000;
3467*0Sstevel@tonic-gate 				goto assume_ok_REG;
3468*0Sstevel@tonic-gate 			    }
3469*0Sstevel@tonic-gate 			    else { c1 = (U8)*STRING(text_node); }
3470*0Sstevel@tonic-gate 
3471*0Sstevel@tonic-gate 			    if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3472*0Sstevel@tonic-gate 				c2 = PL_fold[c1];
3473*0Sstevel@tonic-gate 			    else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3474*0Sstevel@tonic-gate 				c2 = PL_fold_locale[c1];
3475*0Sstevel@tonic-gate 			    else
3476*0Sstevel@tonic-gate 				c2 = c1;
3477*0Sstevel@tonic-gate 			}
3478*0Sstevel@tonic-gate 		    }
3479*0Sstevel@tonic-gate 		    else
3480*0Sstevel@tonic-gate 			c1 = c2 = -1000;
3481*0Sstevel@tonic-gate 		}
3482*0Sstevel@tonic-gate 	    assume_ok_REG:
3483*0Sstevel@tonic-gate 		REGCP_SET(lastcp);
3484*0Sstevel@tonic-gate 		while (n >= ln) {
3485*0Sstevel@tonic-gate 		    /* If it could work, try it. */
3486*0Sstevel@tonic-gate 		    if (c1 == -1000 ||
3487*0Sstevel@tonic-gate 			UCHARAT(PL_reginput) == c1 ||
3488*0Sstevel@tonic-gate 			UCHARAT(PL_reginput) == c2)
3489*0Sstevel@tonic-gate 		    {
3490*0Sstevel@tonic-gate 			DEBUG_r(
3491*0Sstevel@tonic-gate 				PerlIO_printf(Perl_debug_log,
3492*0Sstevel@tonic-gate 					      "%*s  trying tail with n=%"IVdf"...\n",
3493*0Sstevel@tonic-gate 					      (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3494*0Sstevel@tonic-gate 			    );
3495*0Sstevel@tonic-gate 			if (paren) {
3496*0Sstevel@tonic-gate 			    if (n) {
3497*0Sstevel@tonic-gate 				PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3498*0Sstevel@tonic-gate 				PL_regendp[paren] = PL_reginput - PL_bostr;
3499*0Sstevel@tonic-gate 			    }
3500*0Sstevel@tonic-gate 			    else
3501*0Sstevel@tonic-gate 				PL_regendp[paren] = -1;
3502*0Sstevel@tonic-gate 			}
3503*0Sstevel@tonic-gate 			if (regmatch(next))
3504*0Sstevel@tonic-gate 			    sayYES;
3505*0Sstevel@tonic-gate 			REGCP_UNWIND(lastcp);
3506*0Sstevel@tonic-gate 		    }
3507*0Sstevel@tonic-gate 		    /* Couldn't or didn't -- back up. */
3508*0Sstevel@tonic-gate 		    n--;
3509*0Sstevel@tonic-gate 		    locinput = HOPc(locinput, -l);
3510*0Sstevel@tonic-gate 		    PL_reginput = locinput;
3511*0Sstevel@tonic-gate 		}
3512*0Sstevel@tonic-gate 	    }
3513*0Sstevel@tonic-gate 	    sayNO;
3514*0Sstevel@tonic-gate 	    break;
3515*0Sstevel@tonic-gate 	}
3516*0Sstevel@tonic-gate 	case CURLYN:
3517*0Sstevel@tonic-gate 	    paren = scan->flags;	/* Which paren to set */
3518*0Sstevel@tonic-gate 	    if (paren > PL_regsize)
3519*0Sstevel@tonic-gate 		PL_regsize = paren;
3520*0Sstevel@tonic-gate 	    if (paren > (I32)*PL_reglastparen)
3521*0Sstevel@tonic-gate 		*PL_reglastparen = paren;
3522*0Sstevel@tonic-gate 	    ln = ARG1(scan);  /* min to match */
3523*0Sstevel@tonic-gate 	    n  = ARG2(scan);  /* max to match */
3524*0Sstevel@tonic-gate             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3525*0Sstevel@tonic-gate 	    goto repeat;
3526*0Sstevel@tonic-gate 	case CURLY:
3527*0Sstevel@tonic-gate 	    paren = 0;
3528*0Sstevel@tonic-gate 	    ln = ARG1(scan);  /* min to match */
3529*0Sstevel@tonic-gate 	    n  = ARG2(scan);  /* max to match */
3530*0Sstevel@tonic-gate 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3531*0Sstevel@tonic-gate 	    goto repeat;
3532*0Sstevel@tonic-gate 	case STAR:
3533*0Sstevel@tonic-gate 	    ln = 0;
3534*0Sstevel@tonic-gate 	    n = REG_INFTY;
3535*0Sstevel@tonic-gate 	    scan = NEXTOPER(scan);
3536*0Sstevel@tonic-gate 	    paren = 0;
3537*0Sstevel@tonic-gate 	    goto repeat;
3538*0Sstevel@tonic-gate 	case PLUS:
3539*0Sstevel@tonic-gate 	    ln = 1;
3540*0Sstevel@tonic-gate 	    n = REG_INFTY;
3541*0Sstevel@tonic-gate 	    scan = NEXTOPER(scan);
3542*0Sstevel@tonic-gate 	    paren = 0;
3543*0Sstevel@tonic-gate 	  repeat:
3544*0Sstevel@tonic-gate 	    /*
3545*0Sstevel@tonic-gate 	    * Lookahead to avoid useless match attempts
3546*0Sstevel@tonic-gate 	    * when we know what character comes next.
3547*0Sstevel@tonic-gate 	    */
3548*0Sstevel@tonic-gate 
3549*0Sstevel@tonic-gate 	    /*
3550*0Sstevel@tonic-gate 	    * Used to only do .*x and .*?x, but now it allows
3551*0Sstevel@tonic-gate 	    * for )'s, ('s and (?{ ... })'s to be in the way
3552*0Sstevel@tonic-gate 	    * of the quantifier and the EXACT-like node.  -- japhy
3553*0Sstevel@tonic-gate 	    */
3554*0Sstevel@tonic-gate 
3555*0Sstevel@tonic-gate 	    if (HAS_TEXT(next) || JUMPABLE(next)) {
3556*0Sstevel@tonic-gate 		U8 *s;
3557*0Sstevel@tonic-gate 		regnode *text_node = next;
3558*0Sstevel@tonic-gate 
3559*0Sstevel@tonic-gate 		if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3560*0Sstevel@tonic-gate 
3561*0Sstevel@tonic-gate 		if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3562*0Sstevel@tonic-gate 		else {
3563*0Sstevel@tonic-gate 		    if (PL_regkind[(U8)OP(text_node)] == REF) {
3564*0Sstevel@tonic-gate 			c1 = c2 = -1000;
3565*0Sstevel@tonic-gate 			goto assume_ok_easy;
3566*0Sstevel@tonic-gate 		    }
3567*0Sstevel@tonic-gate 		    else { s = (U8*)STRING(text_node); }
3568*0Sstevel@tonic-gate 
3569*0Sstevel@tonic-gate 		    if (!UTF) {
3570*0Sstevel@tonic-gate 			c2 = c1 = *s;
3571*0Sstevel@tonic-gate 			if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3572*0Sstevel@tonic-gate 			    c2 = PL_fold[c1];
3573*0Sstevel@tonic-gate 			else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3574*0Sstevel@tonic-gate 			    c2 = PL_fold_locale[c1];
3575*0Sstevel@tonic-gate 		    }
3576*0Sstevel@tonic-gate 		    else { /* UTF */
3577*0Sstevel@tonic-gate 			if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
3578*0Sstevel@tonic-gate 			     STRLEN ulen1, ulen2;
3579*0Sstevel@tonic-gate 			     U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3580*0Sstevel@tonic-gate 			     U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
3581*0Sstevel@tonic-gate 
3582*0Sstevel@tonic-gate 			     to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3583*0Sstevel@tonic-gate 			     to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3584*0Sstevel@tonic-gate 
3585*0Sstevel@tonic-gate 			     c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXLEN, 0,
3586*0Sstevel@tonic-gate 						 ckWARN(WARN_UTF8) ?
3587*0Sstevel@tonic-gate 						 0 : UTF8_ALLOW_ANY);
3588*0Sstevel@tonic-gate 			     c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXLEN, 0,
3589*0Sstevel@tonic-gate 						 ckWARN(WARN_UTF8) ?
3590*0Sstevel@tonic-gate 						 0 : UTF8_ALLOW_ANY);
3591*0Sstevel@tonic-gate 			}
3592*0Sstevel@tonic-gate 			else {
3593*0Sstevel@tonic-gate 			    c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXLEN, 0,
3594*0Sstevel@tonic-gate 						     ckWARN(WARN_UTF8) ?
3595*0Sstevel@tonic-gate 						     0 : UTF8_ALLOW_ANY);
3596*0Sstevel@tonic-gate 			}
3597*0Sstevel@tonic-gate 		    }
3598*0Sstevel@tonic-gate 		}
3599*0Sstevel@tonic-gate 	    }
3600*0Sstevel@tonic-gate 	    else
3601*0Sstevel@tonic-gate 		c1 = c2 = -1000;
3602*0Sstevel@tonic-gate 	assume_ok_easy:
3603*0Sstevel@tonic-gate 	    PL_reginput = locinput;
3604*0Sstevel@tonic-gate 	    if (minmod) {
3605*0Sstevel@tonic-gate 		CHECKPOINT lastcp;
3606*0Sstevel@tonic-gate 		minmod = 0;
3607*0Sstevel@tonic-gate 		if (ln && regrepeat(scan, ln) < ln)
3608*0Sstevel@tonic-gate 		    sayNO;
3609*0Sstevel@tonic-gate 		locinput = PL_reginput;
3610*0Sstevel@tonic-gate 		REGCP_SET(lastcp);
3611*0Sstevel@tonic-gate 		if (c1 != -1000) {
3612*0Sstevel@tonic-gate 		    char *e; /* Should not check after this */
3613*0Sstevel@tonic-gate 		    char *old = locinput;
3614*0Sstevel@tonic-gate 		    int count = 0;
3615*0Sstevel@tonic-gate 
3616*0Sstevel@tonic-gate 		    if  (n == REG_INFTY) {
3617*0Sstevel@tonic-gate 			e = PL_regeol - 1;
3618*0Sstevel@tonic-gate 			if (do_utf8)
3619*0Sstevel@tonic-gate 			    while (UTF8_IS_CONTINUATION(*(U8*)e))
3620*0Sstevel@tonic-gate 				e--;
3621*0Sstevel@tonic-gate 		    }
3622*0Sstevel@tonic-gate 		    else if (do_utf8) {
3623*0Sstevel@tonic-gate 			int m = n - ln;
3624*0Sstevel@tonic-gate 			for (e = locinput;
3625*0Sstevel@tonic-gate 			     m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3626*0Sstevel@tonic-gate 			    e += UTF8SKIP(e);
3627*0Sstevel@tonic-gate 		    }
3628*0Sstevel@tonic-gate 		    else {
3629*0Sstevel@tonic-gate 			e = locinput + n - ln;
3630*0Sstevel@tonic-gate 			if (e >= PL_regeol)
3631*0Sstevel@tonic-gate 			    e = PL_regeol - 1;
3632*0Sstevel@tonic-gate 		    }
3633*0Sstevel@tonic-gate 		    while (1) {
3634*0Sstevel@tonic-gate 			/* Find place 'next' could work */
3635*0Sstevel@tonic-gate 			if (!do_utf8) {
3636*0Sstevel@tonic-gate 			    if (c1 == c2) {
3637*0Sstevel@tonic-gate 				while (locinput <= e &&
3638*0Sstevel@tonic-gate 				       UCHARAT(locinput) != c1)
3639*0Sstevel@tonic-gate 				    locinput++;
3640*0Sstevel@tonic-gate 			    } else {
3641*0Sstevel@tonic-gate 				while (locinput <= e
3642*0Sstevel@tonic-gate 				       && UCHARAT(locinput) != c1
3643*0Sstevel@tonic-gate 				       && UCHARAT(locinput) != c2)
3644*0Sstevel@tonic-gate 				    locinput++;
3645*0Sstevel@tonic-gate 			    }
3646*0Sstevel@tonic-gate 			    count = locinput - old;
3647*0Sstevel@tonic-gate 			}
3648*0Sstevel@tonic-gate 			else {
3649*0Sstevel@tonic-gate 			    STRLEN len;
3650*0Sstevel@tonic-gate 			    if (c1 == c2) {
3651*0Sstevel@tonic-gate 				/* count initialised to
3652*0Sstevel@tonic-gate 				 * utf8_distance(old, locinput) */
3653*0Sstevel@tonic-gate 				while (locinput <= e &&
3654*0Sstevel@tonic-gate 				       utf8n_to_uvchr((U8*)locinput,
3655*0Sstevel@tonic-gate 						      UTF8_MAXLEN, &len,
3656*0Sstevel@tonic-gate 						      ckWARN(WARN_UTF8) ?
3657*0Sstevel@tonic-gate 						      0 : UTF8_ALLOW_ANY) != (UV)c1) {
3658*0Sstevel@tonic-gate 				    locinput += len;
3659*0Sstevel@tonic-gate 				    count++;
3660*0Sstevel@tonic-gate 				}
3661*0Sstevel@tonic-gate 			    } else {
3662*0Sstevel@tonic-gate 				/* count initialised to
3663*0Sstevel@tonic-gate 				 * utf8_distance(old, locinput) */
3664*0Sstevel@tonic-gate 				while (locinput <= e) {
3665*0Sstevel@tonic-gate 				    UV c = utf8n_to_uvchr((U8*)locinput,
3666*0Sstevel@tonic-gate 							  UTF8_MAXLEN, &len,
3667*0Sstevel@tonic-gate 							  ckWARN(WARN_UTF8) ?
3668*0Sstevel@tonic-gate 							  0 : UTF8_ALLOW_ANY);
3669*0Sstevel@tonic-gate 				    if (c == (UV)c1 || c == (UV)c2)
3670*0Sstevel@tonic-gate 					break;
3671*0Sstevel@tonic-gate 				    locinput += len;
3672*0Sstevel@tonic-gate 				    count++;
3673*0Sstevel@tonic-gate 				}
3674*0Sstevel@tonic-gate 			    }
3675*0Sstevel@tonic-gate 			}
3676*0Sstevel@tonic-gate 			if (locinput > e)
3677*0Sstevel@tonic-gate 			    sayNO;
3678*0Sstevel@tonic-gate 			/* PL_reginput == old now */
3679*0Sstevel@tonic-gate 			if (locinput != old) {
3680*0Sstevel@tonic-gate 			    ln = 1;	/* Did some */
3681*0Sstevel@tonic-gate 			    if (regrepeat(scan, count) < count)
3682*0Sstevel@tonic-gate 				sayNO;
3683*0Sstevel@tonic-gate 			}
3684*0Sstevel@tonic-gate 			/* PL_reginput == locinput now */
3685*0Sstevel@tonic-gate 			TRYPAREN(paren, ln, locinput);
3686*0Sstevel@tonic-gate 			PL_reginput = locinput;	/* Could be reset... */
3687*0Sstevel@tonic-gate 			REGCP_UNWIND(lastcp);
3688*0Sstevel@tonic-gate 			/* Couldn't or didn't -- move forward. */
3689*0Sstevel@tonic-gate 			old = locinput;
3690*0Sstevel@tonic-gate 			if (do_utf8)
3691*0Sstevel@tonic-gate 			    locinput += UTF8SKIP(locinput);
3692*0Sstevel@tonic-gate 			else
3693*0Sstevel@tonic-gate 			    locinput++;
3694*0Sstevel@tonic-gate 			count = 1;
3695*0Sstevel@tonic-gate 		    }
3696*0Sstevel@tonic-gate 		}
3697*0Sstevel@tonic-gate 		else
3698*0Sstevel@tonic-gate 		while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3699*0Sstevel@tonic-gate 		    UV c;
3700*0Sstevel@tonic-gate 		    if (c1 != -1000) {
3701*0Sstevel@tonic-gate 			if (do_utf8)
3702*0Sstevel@tonic-gate 			    c = utf8n_to_uvchr((U8*)PL_reginput,
3703*0Sstevel@tonic-gate 					       UTF8_MAXLEN, 0,
3704*0Sstevel@tonic-gate 					       ckWARN(WARN_UTF8) ?
3705*0Sstevel@tonic-gate 					       0 : UTF8_ALLOW_ANY);
3706*0Sstevel@tonic-gate 			else
3707*0Sstevel@tonic-gate 			    c = UCHARAT(PL_reginput);
3708*0Sstevel@tonic-gate 			/* If it could work, try it. */
3709*0Sstevel@tonic-gate 		        if (c == (UV)c1 || c == (UV)c2)
3710*0Sstevel@tonic-gate 		        {
3711*0Sstevel@tonic-gate 			    TRYPAREN(paren, ln, PL_reginput);
3712*0Sstevel@tonic-gate 			    REGCP_UNWIND(lastcp);
3713*0Sstevel@tonic-gate 		        }
3714*0Sstevel@tonic-gate 		    }
3715*0Sstevel@tonic-gate 		    /* If it could work, try it. */
3716*0Sstevel@tonic-gate 		    else if (c1 == -1000)
3717*0Sstevel@tonic-gate 		    {
3718*0Sstevel@tonic-gate 			TRYPAREN(paren, ln, PL_reginput);
3719*0Sstevel@tonic-gate 			REGCP_UNWIND(lastcp);
3720*0Sstevel@tonic-gate 		    }
3721*0Sstevel@tonic-gate 		    /* Couldn't or didn't -- move forward. */
3722*0Sstevel@tonic-gate 		    PL_reginput = locinput;
3723*0Sstevel@tonic-gate 		    if (regrepeat(scan, 1)) {
3724*0Sstevel@tonic-gate 			ln++;
3725*0Sstevel@tonic-gate 			locinput = PL_reginput;
3726*0Sstevel@tonic-gate 		    }
3727*0Sstevel@tonic-gate 		    else
3728*0Sstevel@tonic-gate 			sayNO;
3729*0Sstevel@tonic-gate 		}
3730*0Sstevel@tonic-gate 	    }
3731*0Sstevel@tonic-gate 	    else {
3732*0Sstevel@tonic-gate 		CHECKPOINT lastcp;
3733*0Sstevel@tonic-gate 		n = regrepeat(scan, n);
3734*0Sstevel@tonic-gate 		locinput = PL_reginput;
3735*0Sstevel@tonic-gate 		if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3736*0Sstevel@tonic-gate 		    ((!PL_multiline && OP(next) != MEOL) ||
3737*0Sstevel@tonic-gate 			OP(next) == SEOL || OP(next) == EOS))
3738*0Sstevel@tonic-gate 		{
3739*0Sstevel@tonic-gate 		    ln = n;			/* why back off? */
3740*0Sstevel@tonic-gate 		    /* ...because $ and \Z can match before *and* after
3741*0Sstevel@tonic-gate 		       newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
3742*0Sstevel@tonic-gate 		       We should back off by one in this case. */
3743*0Sstevel@tonic-gate 		    if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3744*0Sstevel@tonic-gate 			ln--;
3745*0Sstevel@tonic-gate 		}
3746*0Sstevel@tonic-gate 		REGCP_SET(lastcp);
3747*0Sstevel@tonic-gate 		if (paren) {
3748*0Sstevel@tonic-gate 		    UV c = 0;
3749*0Sstevel@tonic-gate 		    while (n >= ln) {
3750*0Sstevel@tonic-gate 			if (c1 != -1000) {
3751*0Sstevel@tonic-gate 			    if (do_utf8)
3752*0Sstevel@tonic-gate 				c = utf8n_to_uvchr((U8*)PL_reginput,
3753*0Sstevel@tonic-gate 						   UTF8_MAXLEN, 0,
3754*0Sstevel@tonic-gate 						   ckWARN(WARN_UTF8) ?
3755*0Sstevel@tonic-gate 						   0 : UTF8_ALLOW_ANY);
3756*0Sstevel@tonic-gate 			    else
3757*0Sstevel@tonic-gate 				c = UCHARAT(PL_reginput);
3758*0Sstevel@tonic-gate 			}
3759*0Sstevel@tonic-gate 			/* If it could work, try it. */
3760*0Sstevel@tonic-gate 			if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
3761*0Sstevel@tonic-gate 			    {
3762*0Sstevel@tonic-gate 				TRYPAREN(paren, n, PL_reginput);
3763*0Sstevel@tonic-gate 				REGCP_UNWIND(lastcp);
3764*0Sstevel@tonic-gate 			    }
3765*0Sstevel@tonic-gate 			/* Couldn't or didn't -- back up. */
3766*0Sstevel@tonic-gate 			n--;
3767*0Sstevel@tonic-gate 			PL_reginput = locinput = HOPc(locinput, -1);
3768*0Sstevel@tonic-gate 		    }
3769*0Sstevel@tonic-gate 		}
3770*0Sstevel@tonic-gate 		else {
3771*0Sstevel@tonic-gate 		    UV c = 0;
3772*0Sstevel@tonic-gate 		    while (n >= ln) {
3773*0Sstevel@tonic-gate 			if (c1 != -1000) {
3774*0Sstevel@tonic-gate 			    if (do_utf8)
3775*0Sstevel@tonic-gate 				c = utf8n_to_uvchr((U8*)PL_reginput,
3776*0Sstevel@tonic-gate 						   UTF8_MAXLEN, 0,
3777*0Sstevel@tonic-gate 						   ckWARN(WARN_UTF8) ?
3778*0Sstevel@tonic-gate 						   0 : UTF8_ALLOW_ANY);
3779*0Sstevel@tonic-gate 			    else
3780*0Sstevel@tonic-gate 				c = UCHARAT(PL_reginput);
3781*0Sstevel@tonic-gate 			}
3782*0Sstevel@tonic-gate 			/* If it could work, try it. */
3783*0Sstevel@tonic-gate 			if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
3784*0Sstevel@tonic-gate 			    {
3785*0Sstevel@tonic-gate 				TRYPAREN(paren, n, PL_reginput);
3786*0Sstevel@tonic-gate 				REGCP_UNWIND(lastcp);
3787*0Sstevel@tonic-gate 			    }
3788*0Sstevel@tonic-gate 			/* Couldn't or didn't -- back up. */
3789*0Sstevel@tonic-gate 			n--;
3790*0Sstevel@tonic-gate 			PL_reginput = locinput = HOPc(locinput, -1);
3791*0Sstevel@tonic-gate 		    }
3792*0Sstevel@tonic-gate 		}
3793*0Sstevel@tonic-gate 	    }
3794*0Sstevel@tonic-gate 	    sayNO;
3795*0Sstevel@tonic-gate 	    break;
3796*0Sstevel@tonic-gate 	case END:
3797*0Sstevel@tonic-gate 	    if (PL_reg_call_cc) {
3798*0Sstevel@tonic-gate 		re_cc_state *cur_call_cc = PL_reg_call_cc;
3799*0Sstevel@tonic-gate 		CURCUR *cctmp = PL_regcc;
3800*0Sstevel@tonic-gate 		regexp *re = PL_reg_re;
3801*0Sstevel@tonic-gate 		CHECKPOINT cp, lastcp;
3802*0Sstevel@tonic-gate 
3803*0Sstevel@tonic-gate 		cp = regcppush(0);	/* Save *all* the positions. */
3804*0Sstevel@tonic-gate 		REGCP_SET(lastcp);
3805*0Sstevel@tonic-gate 		regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3806*0Sstevel@tonic-gate 						    the caller. */
3807*0Sstevel@tonic-gate 		PL_reginput = locinput;	/* Make position available to
3808*0Sstevel@tonic-gate 					   the callcc. */
3809*0Sstevel@tonic-gate 		cache_re(PL_reg_call_cc->re);
3810*0Sstevel@tonic-gate 		PL_regcc = PL_reg_call_cc->cc;
3811*0Sstevel@tonic-gate 		PL_reg_call_cc = PL_reg_call_cc->prev;
3812*0Sstevel@tonic-gate 		if (regmatch(cur_call_cc->node)) {
3813*0Sstevel@tonic-gate 		    PL_reg_call_cc = cur_call_cc;
3814*0Sstevel@tonic-gate 		    regcpblow(cp);
3815*0Sstevel@tonic-gate 		    sayYES;
3816*0Sstevel@tonic-gate 		}
3817*0Sstevel@tonic-gate 		REGCP_UNWIND(lastcp);
3818*0Sstevel@tonic-gate 		regcppop();
3819*0Sstevel@tonic-gate 		PL_reg_call_cc = cur_call_cc;
3820*0Sstevel@tonic-gate 		PL_regcc = cctmp;
3821*0Sstevel@tonic-gate 		PL_reg_re = re;
3822*0Sstevel@tonic-gate 		cache_re(re);
3823*0Sstevel@tonic-gate 
3824*0Sstevel@tonic-gate 		DEBUG_r(
3825*0Sstevel@tonic-gate 		    PerlIO_printf(Perl_debug_log,
3826*0Sstevel@tonic-gate 				  "%*s  continuation failed...\n",
3827*0Sstevel@tonic-gate 				  REPORT_CODE_OFF+PL_regindent*2, "")
3828*0Sstevel@tonic-gate 		    );
3829*0Sstevel@tonic-gate 		sayNO_SILENT;
3830*0Sstevel@tonic-gate 	    }
3831*0Sstevel@tonic-gate 	    if (locinput < PL_regtill) {
3832*0Sstevel@tonic-gate 		DEBUG_r(PerlIO_printf(Perl_debug_log,
3833*0Sstevel@tonic-gate 				      "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3834*0Sstevel@tonic-gate 				      PL_colors[4],
3835*0Sstevel@tonic-gate 				      (long)(locinput - PL_reg_starttry),
3836*0Sstevel@tonic-gate 				      (long)(PL_regtill - PL_reg_starttry),
3837*0Sstevel@tonic-gate 				      PL_colors[5]));
3838*0Sstevel@tonic-gate 		sayNO_FINAL;		/* Cannot match: too short. */
3839*0Sstevel@tonic-gate 	    }
3840*0Sstevel@tonic-gate 	    PL_reginput = locinput;	/* put where regtry can find it */
3841*0Sstevel@tonic-gate 	    sayYES_FINAL;		/* Success! */
3842*0Sstevel@tonic-gate 	case SUCCEED:
3843*0Sstevel@tonic-gate 	    PL_reginput = locinput;	/* put where regtry can find it */
3844*0Sstevel@tonic-gate 	    sayYES_LOUD;		/* Success! */
3845*0Sstevel@tonic-gate 	case SUSPEND:
3846*0Sstevel@tonic-gate 	    n = 1;
3847*0Sstevel@tonic-gate 	    PL_reginput = locinput;
3848*0Sstevel@tonic-gate 	    goto do_ifmatch;
3849*0Sstevel@tonic-gate 	case UNLESSM:
3850*0Sstevel@tonic-gate 	    n = 0;
3851*0Sstevel@tonic-gate 	    if (scan->flags) {
3852*0Sstevel@tonic-gate 		s = HOPBACKc(locinput, scan->flags);
3853*0Sstevel@tonic-gate 		if (!s)
3854*0Sstevel@tonic-gate 		    goto say_yes;
3855*0Sstevel@tonic-gate 		PL_reginput = s;
3856*0Sstevel@tonic-gate 	    }
3857*0Sstevel@tonic-gate 	    else
3858*0Sstevel@tonic-gate 		PL_reginput = locinput;
3859*0Sstevel@tonic-gate 	    PL_reg_flags ^= RF_false;
3860*0Sstevel@tonic-gate 	    goto do_ifmatch;
3861*0Sstevel@tonic-gate 	case IFMATCH:
3862*0Sstevel@tonic-gate 	    n = 1;
3863*0Sstevel@tonic-gate 	    if (scan->flags) {
3864*0Sstevel@tonic-gate 		s = HOPBACKc(locinput, scan->flags);
3865*0Sstevel@tonic-gate 		if (!s)
3866*0Sstevel@tonic-gate 		    goto say_no;
3867*0Sstevel@tonic-gate 		PL_reginput = s;
3868*0Sstevel@tonic-gate 	    }
3869*0Sstevel@tonic-gate 	    else
3870*0Sstevel@tonic-gate 		PL_reginput = locinput;
3871*0Sstevel@tonic-gate 
3872*0Sstevel@tonic-gate 	  do_ifmatch:
3873*0Sstevel@tonic-gate 	    inner = NEXTOPER(NEXTOPER(scan));
3874*0Sstevel@tonic-gate 	    if (regmatch(inner) != n) {
3875*0Sstevel@tonic-gate 		if (n == 0)
3876*0Sstevel@tonic-gate 		    PL_reg_flags ^= RF_false;
3877*0Sstevel@tonic-gate 	      say_no:
3878*0Sstevel@tonic-gate 		if (logical) {
3879*0Sstevel@tonic-gate 		    logical = 0;
3880*0Sstevel@tonic-gate 		    sw = 0;
3881*0Sstevel@tonic-gate 		    goto do_longjump;
3882*0Sstevel@tonic-gate 		}
3883*0Sstevel@tonic-gate 		else
3884*0Sstevel@tonic-gate 		    sayNO;
3885*0Sstevel@tonic-gate 	    }
3886*0Sstevel@tonic-gate 	    if (n == 0)
3887*0Sstevel@tonic-gate 		PL_reg_flags ^= RF_false;
3888*0Sstevel@tonic-gate 	  say_yes:
3889*0Sstevel@tonic-gate 	    if (logical) {
3890*0Sstevel@tonic-gate 		logical = 0;
3891*0Sstevel@tonic-gate 		sw = 1;
3892*0Sstevel@tonic-gate 	    }
3893*0Sstevel@tonic-gate 	    if (OP(scan) == SUSPEND) {
3894*0Sstevel@tonic-gate 		locinput = PL_reginput;
3895*0Sstevel@tonic-gate 		nextchr = UCHARAT(locinput);
3896*0Sstevel@tonic-gate 	    }
3897*0Sstevel@tonic-gate 	    /* FALL THROUGH. */
3898*0Sstevel@tonic-gate 	case LONGJMP:
3899*0Sstevel@tonic-gate 	  do_longjump:
3900*0Sstevel@tonic-gate 	    next = scan + ARG(scan);
3901*0Sstevel@tonic-gate 	    if (next == scan)
3902*0Sstevel@tonic-gate 		next = NULL;
3903*0Sstevel@tonic-gate 	    break;
3904*0Sstevel@tonic-gate 	default:
3905*0Sstevel@tonic-gate 	    PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3906*0Sstevel@tonic-gate 			  PTR2UV(scan), OP(scan));
3907*0Sstevel@tonic-gate 	    Perl_croak(aTHX_ "regexp memory corruption");
3908*0Sstevel@tonic-gate 	}
3909*0Sstevel@tonic-gate       reenter:
3910*0Sstevel@tonic-gate 	scan = next;
3911*0Sstevel@tonic-gate     }
3912*0Sstevel@tonic-gate 
3913*0Sstevel@tonic-gate     /*
3914*0Sstevel@tonic-gate     * We get here only if there's trouble -- normally "case END" is
3915*0Sstevel@tonic-gate     * the terminating point.
3916*0Sstevel@tonic-gate     */
3917*0Sstevel@tonic-gate     Perl_croak(aTHX_ "corrupted regexp pointers");
3918*0Sstevel@tonic-gate     /*NOTREACHED*/
3919*0Sstevel@tonic-gate     sayNO;
3920*0Sstevel@tonic-gate 
3921*0Sstevel@tonic-gate yes_loud:
3922*0Sstevel@tonic-gate     DEBUG_r(
3923*0Sstevel@tonic-gate 	PerlIO_printf(Perl_debug_log,
3924*0Sstevel@tonic-gate 		      "%*s  %scould match...%s\n",
3925*0Sstevel@tonic-gate 		      REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3926*0Sstevel@tonic-gate 	);
3927*0Sstevel@tonic-gate     goto yes;
3928*0Sstevel@tonic-gate yes_final:
3929*0Sstevel@tonic-gate     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3930*0Sstevel@tonic-gate 			  PL_colors[4],PL_colors[5]));
3931*0Sstevel@tonic-gate yes:
3932*0Sstevel@tonic-gate #ifdef DEBUGGING
3933*0Sstevel@tonic-gate     PL_regindent--;
3934*0Sstevel@tonic-gate #endif
3935*0Sstevel@tonic-gate 
3936*0Sstevel@tonic-gate #if 0					/* Breaks $^R */
3937*0Sstevel@tonic-gate     if (unwind)
3938*0Sstevel@tonic-gate 	regcpblow(firstcp);
3939*0Sstevel@tonic-gate #endif
3940*0Sstevel@tonic-gate     return 1;
3941*0Sstevel@tonic-gate 
3942*0Sstevel@tonic-gate no:
3943*0Sstevel@tonic-gate     DEBUG_r(
3944*0Sstevel@tonic-gate 	PerlIO_printf(Perl_debug_log,
3945*0Sstevel@tonic-gate 		      "%*s  %sfailed...%s\n",
3946*0Sstevel@tonic-gate 		      REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3947*0Sstevel@tonic-gate 	);
3948*0Sstevel@tonic-gate     goto do_no;
3949*0Sstevel@tonic-gate no_final:
3950*0Sstevel@tonic-gate do_no:
3951*0Sstevel@tonic-gate     if (unwind) {
3952*0Sstevel@tonic-gate 	re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3953*0Sstevel@tonic-gate 
3954*0Sstevel@tonic-gate 	switch (uw->type) {
3955*0Sstevel@tonic-gate 	case RE_UNWIND_BRANCH:
3956*0Sstevel@tonic-gate 	case RE_UNWIND_BRANCHJ:
3957*0Sstevel@tonic-gate 	{
3958*0Sstevel@tonic-gate 	    re_unwind_branch_t *uwb = &(uw->branch);
3959*0Sstevel@tonic-gate 	    I32 lastparen = uwb->lastparen;
3960*0Sstevel@tonic-gate 
3961*0Sstevel@tonic-gate 	    REGCP_UNWIND(uwb->lastcp);
3962*0Sstevel@tonic-gate 	    for (n = *PL_reglastparen; n > lastparen; n--)
3963*0Sstevel@tonic-gate 		PL_regendp[n] = -1;
3964*0Sstevel@tonic-gate 	    *PL_reglastparen = n;
3965*0Sstevel@tonic-gate 	    scan = next = uwb->next;
3966*0Sstevel@tonic-gate 	    if ( !scan ||
3967*0Sstevel@tonic-gate 		 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3968*0Sstevel@tonic-gate 			      ? BRANCH : BRANCHJ) ) {		/* Failure */
3969*0Sstevel@tonic-gate 		unwind = uwb->prev;
3970*0Sstevel@tonic-gate #ifdef DEBUGGING
3971*0Sstevel@tonic-gate 		PL_regindent--;
3972*0Sstevel@tonic-gate #endif
3973*0Sstevel@tonic-gate 		goto do_no;
3974*0Sstevel@tonic-gate 	    }
3975*0Sstevel@tonic-gate 	    /* Have more choice yet.  Reuse the same uwb.  */
3976*0Sstevel@tonic-gate 	    /*SUPPRESS 560*/
3977*0Sstevel@tonic-gate 	    if ((n = (uwb->type == RE_UNWIND_BRANCH
3978*0Sstevel@tonic-gate 		      ? NEXT_OFF(next) : ARG(next))))
3979*0Sstevel@tonic-gate 		next += n;
3980*0Sstevel@tonic-gate 	    else
3981*0Sstevel@tonic-gate 		next = NULL;	/* XXXX Needn't unwinding in this case... */
3982*0Sstevel@tonic-gate 	    uwb->next = next;
3983*0Sstevel@tonic-gate 	    next = NEXTOPER(scan);
3984*0Sstevel@tonic-gate 	    if (uwb->type == RE_UNWIND_BRANCHJ)
3985*0Sstevel@tonic-gate 		next = NEXTOPER(next);
3986*0Sstevel@tonic-gate 	    locinput = uwb->locinput;
3987*0Sstevel@tonic-gate 	    nextchr = uwb->nextchr;
3988*0Sstevel@tonic-gate #ifdef DEBUGGING
3989*0Sstevel@tonic-gate 	    PL_regindent = uwb->regindent;
3990*0Sstevel@tonic-gate #endif
3991*0Sstevel@tonic-gate 
3992*0Sstevel@tonic-gate 	    goto reenter;
3993*0Sstevel@tonic-gate 	}
3994*0Sstevel@tonic-gate 	/* NOT REACHED */
3995*0Sstevel@tonic-gate 	default:
3996*0Sstevel@tonic-gate 	    Perl_croak(aTHX_ "regexp unwind memory corruption");
3997*0Sstevel@tonic-gate 	}
3998*0Sstevel@tonic-gate 	/* NOT REACHED */
3999*0Sstevel@tonic-gate     }
4000*0Sstevel@tonic-gate #ifdef DEBUGGING
4001*0Sstevel@tonic-gate     PL_regindent--;
4002*0Sstevel@tonic-gate #endif
4003*0Sstevel@tonic-gate     return 0;
4004*0Sstevel@tonic-gate }
4005*0Sstevel@tonic-gate 
4006*0Sstevel@tonic-gate /*
4007*0Sstevel@tonic-gate  - regrepeat - repeatedly match something simple, report how many
4008*0Sstevel@tonic-gate  */
4009*0Sstevel@tonic-gate /*
4010*0Sstevel@tonic-gate  * [This routine now assumes that it will only match on things of length 1.
4011*0Sstevel@tonic-gate  * That was true before, but now we assume scan - reginput is the count,
4012*0Sstevel@tonic-gate  * rather than incrementing count on every character.  [Er, except utf8.]]
4013*0Sstevel@tonic-gate  */
4014*0Sstevel@tonic-gate STATIC I32
S_regrepeat(pTHX_ regnode * p,I32 max)4015*0Sstevel@tonic-gate S_regrepeat(pTHX_ regnode *p, I32 max)
4016*0Sstevel@tonic-gate {
4017*0Sstevel@tonic-gate     register char *scan;
4018*0Sstevel@tonic-gate     register I32 c;
4019*0Sstevel@tonic-gate     register char *loceol = PL_regeol;
4020*0Sstevel@tonic-gate     register I32 hardcount = 0;
4021*0Sstevel@tonic-gate     register bool do_utf8 = PL_reg_match_utf8;
4022*0Sstevel@tonic-gate 
4023*0Sstevel@tonic-gate     scan = PL_reginput;
4024*0Sstevel@tonic-gate     if (max == REG_INFTY)
4025*0Sstevel@tonic-gate 	max = I32_MAX;
4026*0Sstevel@tonic-gate     else if (max < loceol - scan)
4027*0Sstevel@tonic-gate       loceol = scan + max;
4028*0Sstevel@tonic-gate     switch (OP(p)) {
4029*0Sstevel@tonic-gate     case REG_ANY:
4030*0Sstevel@tonic-gate 	if (do_utf8) {
4031*0Sstevel@tonic-gate 	    loceol = PL_regeol;
4032*0Sstevel@tonic-gate 	    while (scan < loceol && hardcount < max && *scan != '\n') {
4033*0Sstevel@tonic-gate 		scan += UTF8SKIP(scan);
4034*0Sstevel@tonic-gate 		hardcount++;
4035*0Sstevel@tonic-gate 	    }
4036*0Sstevel@tonic-gate 	} else {
4037*0Sstevel@tonic-gate 	    while (scan < loceol && *scan != '\n')
4038*0Sstevel@tonic-gate 		scan++;
4039*0Sstevel@tonic-gate 	}
4040*0Sstevel@tonic-gate 	break;
4041*0Sstevel@tonic-gate     case SANY:
4042*0Sstevel@tonic-gate         if (do_utf8) {
4043*0Sstevel@tonic-gate 	    loceol = PL_regeol;
4044*0Sstevel@tonic-gate 	    while (scan < loceol && hardcount < max) {
4045*0Sstevel@tonic-gate 	        scan += UTF8SKIP(scan);
4046*0Sstevel@tonic-gate 		hardcount++;
4047*0Sstevel@tonic-gate 	    }
4048*0Sstevel@tonic-gate 	}
4049*0Sstevel@tonic-gate 	else
4050*0Sstevel@tonic-gate 	    scan = loceol;
4051*0Sstevel@tonic-gate 	break;
4052*0Sstevel@tonic-gate     case CANY:
4053*0Sstevel@tonic-gate 	scan = loceol;
4054*0Sstevel@tonic-gate 	break;
4055*0Sstevel@tonic-gate     case EXACT:		/* length of string is 1 */
4056*0Sstevel@tonic-gate 	c = (U8)*STRING(p);
4057*0Sstevel@tonic-gate 	while (scan < loceol && UCHARAT(scan) == c)
4058*0Sstevel@tonic-gate 	    scan++;
4059*0Sstevel@tonic-gate 	break;
4060*0Sstevel@tonic-gate     case EXACTF:	/* length of string is 1 */
4061*0Sstevel@tonic-gate 	c = (U8)*STRING(p);
4062*0Sstevel@tonic-gate 	while (scan < loceol &&
4063*0Sstevel@tonic-gate 	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4064*0Sstevel@tonic-gate 	    scan++;
4065*0Sstevel@tonic-gate 	break;
4066*0Sstevel@tonic-gate     case EXACTFL:	/* length of string is 1 */
4067*0Sstevel@tonic-gate 	PL_reg_flags |= RF_tainted;
4068*0Sstevel@tonic-gate 	c = (U8)*STRING(p);
4069*0Sstevel@tonic-gate 	while (scan < loceol &&
4070*0Sstevel@tonic-gate 	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4071*0Sstevel@tonic-gate 	    scan++;
4072*0Sstevel@tonic-gate 	break;
4073*0Sstevel@tonic-gate     case ANYOF:
4074*0Sstevel@tonic-gate 	if (do_utf8) {
4075*0Sstevel@tonic-gate 	    loceol = PL_regeol;
4076*0Sstevel@tonic-gate 	    while (hardcount < max && scan < loceol &&
4077*0Sstevel@tonic-gate 		   reginclass(p, (U8*)scan, 0, do_utf8)) {
4078*0Sstevel@tonic-gate 		scan += UTF8SKIP(scan);
4079*0Sstevel@tonic-gate 		hardcount++;
4080*0Sstevel@tonic-gate 	    }
4081*0Sstevel@tonic-gate 	} else {
4082*0Sstevel@tonic-gate 	    while (scan < loceol && REGINCLASS(p, (U8*)scan))
4083*0Sstevel@tonic-gate 		scan++;
4084*0Sstevel@tonic-gate 	}
4085*0Sstevel@tonic-gate 	break;
4086*0Sstevel@tonic-gate     case ALNUM:
4087*0Sstevel@tonic-gate 	if (do_utf8) {
4088*0Sstevel@tonic-gate 	    loceol = PL_regeol;
4089*0Sstevel@tonic-gate 	    LOAD_UTF8_CHARCLASS(alnum,"a");
4090*0Sstevel@tonic-gate 	    while (hardcount < max && scan < loceol &&
4091*0Sstevel@tonic-gate 		   swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4092*0Sstevel@tonic-gate 		scan += UTF8SKIP(scan);
4093*0Sstevel@tonic-gate 		hardcount++;
4094*0Sstevel@tonic-gate 	    }
4095*0Sstevel@tonic-gate 	} else {
4096*0Sstevel@tonic-gate 	    while (scan < loceol && isALNUM(*scan))
4097*0Sstevel@tonic-gate 		scan++;
4098*0Sstevel@tonic-gate 	}
4099*0Sstevel@tonic-gate 	break;
4100*0Sstevel@tonic-gate     case ALNUML:
4101*0Sstevel@tonic-gate 	PL_reg_flags |= RF_tainted;
4102*0Sstevel@tonic-gate 	if (do_utf8) {
4103*0Sstevel@tonic-gate 	    loceol = PL_regeol;
4104*0Sstevel@tonic-gate 	    while (hardcount < max && scan < loceol &&
4105*0Sstevel@tonic-gate 		   isALNUM_LC_utf8((U8*)scan)) {
4106*0Sstevel@tonic-gate 		scan += UTF8SKIP(scan);
4107*0Sstevel@tonic-gate 		hardcount++;
4108*0Sstevel@tonic-gate 	    }
4109*0Sstevel@tonic-gate 	} else {
4110*0Sstevel@tonic-gate 	    while (scan < loceol && isALNUM_LC(*scan))
4111*0Sstevel@tonic-gate 		scan++;
4112*0Sstevel@tonic-gate 	}
4113*0Sstevel@tonic-gate 	break;
4114*0Sstevel@tonic-gate     case NALNUM:
4115*0Sstevel@tonic-gate 	if (do_utf8) {
4116*0Sstevel@tonic-gate 	    loceol = PL_regeol;
4117*0Sstevel@tonic-gate 	    LOAD_UTF8_CHARCLASS(alnum,"a");
4118*0Sstevel@tonic-gate 	    while (hardcount < max && scan < loceol &&
4119*0Sstevel@tonic-gate 		   !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4120*0Sstevel@tonic-gate 		scan += UTF8SKIP(scan);
4121*0Sstevel@tonic-gate 		hardcount++;
4122*0Sstevel@tonic-gate 	    }
4123*0Sstevel@tonic-gate 	} else {
4124*0Sstevel@tonic-gate 	    while (scan < loceol && !isALNUM(*scan))
4125*0Sstevel@tonic-gate 		scan++;
4126*0Sstevel@tonic-gate 	}
4127*0Sstevel@tonic-gate 	break;
4128*0Sstevel@tonic-gate     case NALNUML:
4129*0Sstevel@tonic-gate 	PL_reg_flags |= RF_tainted;
4130*0Sstevel@tonic-gate 	if (do_utf8) {
4131*0Sstevel@tonic-gate 	    loceol = PL_regeol;
4132*0Sstevel@tonic-gate 	    while (hardcount < max && scan < loceol &&
4133*0Sstevel@tonic-gate 		   !isALNUM_LC_utf8((U8*)scan)) {
4134*0Sstevel@tonic-gate 		scan += UTF8SKIP(scan);
4135*0Sstevel@tonic-gate 		hardcount++;
4136*0Sstevel@tonic-gate 	    }
4137*0Sstevel@tonic-gate 	} else {
4138*0Sstevel@tonic-gate 	    while (scan < loceol && !isALNUM_LC(*scan))
4139*0Sstevel@tonic-gate 		scan++;
4140*0Sstevel@tonic-gate 	}
4141*0Sstevel@tonic-gate 	break;
4142*0Sstevel@tonic-gate     case SPACE:
4143*0Sstevel@tonic-gate 	if (do_utf8) {
4144*0Sstevel@tonic-gate 	    loceol = PL_regeol;
4145*0Sstevel@tonic-gate 	    LOAD_UTF8_CHARCLASS(space," ");
4146*0Sstevel@tonic-gate 	    while (hardcount < max && scan < loceol &&
4147*0Sstevel@tonic-gate 		   (*scan == ' ' ||
4148*0Sstevel@tonic-gate 		    swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4149*0Sstevel@tonic-gate 		scan += UTF8SKIP(scan);
4150*0Sstevel@tonic-gate 		hardcount++;
4151*0Sstevel@tonic-gate 	    }
4152*0Sstevel@tonic-gate 	} else {
4153*0Sstevel@tonic-gate 	    while (scan < loceol && isSPACE(*scan))
4154*0Sstevel@tonic-gate 		scan++;
4155*0Sstevel@tonic-gate 	}
4156*0Sstevel@tonic-gate 	break;
4157*0Sstevel@tonic-gate     case SPACEL:
4158*0Sstevel@tonic-gate 	PL_reg_flags |= RF_tainted;
4159*0Sstevel@tonic-gate 	if (do_utf8) {
4160*0Sstevel@tonic-gate 	    loceol = PL_regeol;
4161*0Sstevel@tonic-gate 	    while (hardcount < max && scan < loceol &&
4162*0Sstevel@tonic-gate 		   (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4163*0Sstevel@tonic-gate 		scan += UTF8SKIP(scan);
4164*0Sstevel@tonic-gate 		hardcount++;
4165*0Sstevel@tonic-gate 	    }
4166*0Sstevel@tonic-gate 	} else {
4167*0Sstevel@tonic-gate 	    while (scan < loceol && isSPACE_LC(*scan))
4168*0Sstevel@tonic-gate 		scan++;
4169*0Sstevel@tonic-gate 	}
4170*0Sstevel@tonic-gate 	break;
4171*0Sstevel@tonic-gate     case NSPACE:
4172*0Sstevel@tonic-gate 	if (do_utf8) {
4173*0Sstevel@tonic-gate 	    loceol = PL_regeol;
4174*0Sstevel@tonic-gate 	    LOAD_UTF8_CHARCLASS(space," ");
4175*0Sstevel@tonic-gate 	    while (hardcount < max && scan < loceol &&
4176*0Sstevel@tonic-gate 		   !(*scan == ' ' ||
4177*0Sstevel@tonic-gate 		     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4178*0Sstevel@tonic-gate 		scan += UTF8SKIP(scan);
4179*0Sstevel@tonic-gate 		hardcount++;
4180*0Sstevel@tonic-gate 	    }
4181*0Sstevel@tonic-gate 	} else {
4182*0Sstevel@tonic-gate 	    while (scan < loceol && !isSPACE(*scan))
4183*0Sstevel@tonic-gate 		scan++;
4184*0Sstevel@tonic-gate 	    break;
4185*0Sstevel@tonic-gate 	}
4186*0Sstevel@tonic-gate     case NSPACEL:
4187*0Sstevel@tonic-gate 	PL_reg_flags |= RF_tainted;
4188*0Sstevel@tonic-gate 	if (do_utf8) {
4189*0Sstevel@tonic-gate 	    loceol = PL_regeol;
4190*0Sstevel@tonic-gate 	    while (hardcount < max && scan < loceol &&
4191*0Sstevel@tonic-gate 		   !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4192*0Sstevel@tonic-gate 		scan += UTF8SKIP(scan);
4193*0Sstevel@tonic-gate 		hardcount++;
4194*0Sstevel@tonic-gate 	    }
4195*0Sstevel@tonic-gate 	} else {
4196*0Sstevel@tonic-gate 	    while (scan < loceol && !isSPACE_LC(*scan))
4197*0Sstevel@tonic-gate 		scan++;
4198*0Sstevel@tonic-gate 	}
4199*0Sstevel@tonic-gate 	break;
4200*0Sstevel@tonic-gate     case DIGIT:
4201*0Sstevel@tonic-gate 	if (do_utf8) {
4202*0Sstevel@tonic-gate 	    loceol = PL_regeol;
4203*0Sstevel@tonic-gate 	    LOAD_UTF8_CHARCLASS(digit,"0");
4204*0Sstevel@tonic-gate 	    while (hardcount < max && scan < loceol &&
4205*0Sstevel@tonic-gate 		   swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4206*0Sstevel@tonic-gate 		scan += UTF8SKIP(scan);
4207*0Sstevel@tonic-gate 		hardcount++;
4208*0Sstevel@tonic-gate 	    }
4209*0Sstevel@tonic-gate 	} else {
4210*0Sstevel@tonic-gate 	    while (scan < loceol && isDIGIT(*scan))
4211*0Sstevel@tonic-gate 		scan++;
4212*0Sstevel@tonic-gate 	}
4213*0Sstevel@tonic-gate 	break;
4214*0Sstevel@tonic-gate     case NDIGIT:
4215*0Sstevel@tonic-gate 	if (do_utf8) {
4216*0Sstevel@tonic-gate 	    loceol = PL_regeol;
4217*0Sstevel@tonic-gate 	    LOAD_UTF8_CHARCLASS(digit,"0");
4218*0Sstevel@tonic-gate 	    while (hardcount < max && scan < loceol &&
4219*0Sstevel@tonic-gate 		   !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4220*0Sstevel@tonic-gate 		scan += UTF8SKIP(scan);
4221*0Sstevel@tonic-gate 		hardcount++;
4222*0Sstevel@tonic-gate 	    }
4223*0Sstevel@tonic-gate 	} else {
4224*0Sstevel@tonic-gate 	    while (scan < loceol && !isDIGIT(*scan))
4225*0Sstevel@tonic-gate 		scan++;
4226*0Sstevel@tonic-gate 	}
4227*0Sstevel@tonic-gate 	break;
4228*0Sstevel@tonic-gate     default:		/* Called on something of 0 width. */
4229*0Sstevel@tonic-gate 	break;		/* So match right here or not at all. */
4230*0Sstevel@tonic-gate     }
4231*0Sstevel@tonic-gate 
4232*0Sstevel@tonic-gate     if (hardcount)
4233*0Sstevel@tonic-gate 	c = hardcount;
4234*0Sstevel@tonic-gate     else
4235*0Sstevel@tonic-gate 	c = scan - PL_reginput;
4236*0Sstevel@tonic-gate     PL_reginput = scan;
4237*0Sstevel@tonic-gate 
4238*0Sstevel@tonic-gate     DEBUG_r(
4239*0Sstevel@tonic-gate 	{
4240*0Sstevel@tonic-gate 		SV *prop = sv_newmortal();
4241*0Sstevel@tonic-gate 
4242*0Sstevel@tonic-gate 		regprop(prop, p);
4243*0Sstevel@tonic-gate 		PerlIO_printf(Perl_debug_log,
4244*0Sstevel@tonic-gate 			      "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
4245*0Sstevel@tonic-gate 			      REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
4246*0Sstevel@tonic-gate 	});
4247*0Sstevel@tonic-gate 
4248*0Sstevel@tonic-gate     return(c);
4249*0Sstevel@tonic-gate }
4250*0Sstevel@tonic-gate 
4251*0Sstevel@tonic-gate /*
4252*0Sstevel@tonic-gate  - regrepeat_hard - repeatedly match something, report total lenth and length
4253*0Sstevel@tonic-gate  *
4254*0Sstevel@tonic-gate  * The repeater is supposed to have constant length.
4255*0Sstevel@tonic-gate  */
4256*0Sstevel@tonic-gate 
4257*0Sstevel@tonic-gate STATIC I32
S_regrepeat_hard(pTHX_ regnode * p,I32 max,I32 * lp)4258*0Sstevel@tonic-gate S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
4259*0Sstevel@tonic-gate {
4260*0Sstevel@tonic-gate     register char *scan = Nullch;
4261*0Sstevel@tonic-gate     register char *start;
4262*0Sstevel@tonic-gate     register char *loceol = PL_regeol;
4263*0Sstevel@tonic-gate     I32 l = 0;
4264*0Sstevel@tonic-gate     I32 count = 0, res = 1;
4265*0Sstevel@tonic-gate 
4266*0Sstevel@tonic-gate     if (!max)
4267*0Sstevel@tonic-gate 	return 0;
4268*0Sstevel@tonic-gate 
4269*0Sstevel@tonic-gate     start = PL_reginput;
4270*0Sstevel@tonic-gate     if (PL_reg_match_utf8) {
4271*0Sstevel@tonic-gate 	while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4272*0Sstevel@tonic-gate 	    if (!count++) {
4273*0Sstevel@tonic-gate 		l = 0;
4274*0Sstevel@tonic-gate 		while (start < PL_reginput) {
4275*0Sstevel@tonic-gate 		    l++;
4276*0Sstevel@tonic-gate 		    start += UTF8SKIP(start);
4277*0Sstevel@tonic-gate 		}
4278*0Sstevel@tonic-gate 		*lp = l;
4279*0Sstevel@tonic-gate 		if (l == 0)
4280*0Sstevel@tonic-gate 		    return max;
4281*0Sstevel@tonic-gate 	    }
4282*0Sstevel@tonic-gate 	    if (count == max)
4283*0Sstevel@tonic-gate 		return count;
4284*0Sstevel@tonic-gate 	}
4285*0Sstevel@tonic-gate     }
4286*0Sstevel@tonic-gate     else {
4287*0Sstevel@tonic-gate 	while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4288*0Sstevel@tonic-gate 	    if (!count++) {
4289*0Sstevel@tonic-gate 		*lp = l = PL_reginput - start;
4290*0Sstevel@tonic-gate 		if (max != REG_INFTY && l*max < loceol - scan)
4291*0Sstevel@tonic-gate 		    loceol = scan + l*max;
4292*0Sstevel@tonic-gate 		if (l == 0)
4293*0Sstevel@tonic-gate 		    return max;
4294*0Sstevel@tonic-gate 	    }
4295*0Sstevel@tonic-gate 	}
4296*0Sstevel@tonic-gate     }
4297*0Sstevel@tonic-gate     if (!res)
4298*0Sstevel@tonic-gate 	PL_reginput = scan;
4299*0Sstevel@tonic-gate 
4300*0Sstevel@tonic-gate     return count;
4301*0Sstevel@tonic-gate }
4302*0Sstevel@tonic-gate 
4303*0Sstevel@tonic-gate /*
4304*0Sstevel@tonic-gate - regclass_swash - prepare the utf8 swash
4305*0Sstevel@tonic-gate */
4306*0Sstevel@tonic-gate 
4307*0Sstevel@tonic-gate SV *
Perl_regclass_swash(pTHX_ register regnode * node,bool doinit,SV ** listsvp,SV ** altsvp)4308*0Sstevel@tonic-gate Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
4309*0Sstevel@tonic-gate {
4310*0Sstevel@tonic-gate     SV *sw  = NULL;
4311*0Sstevel@tonic-gate     SV *si  = NULL;
4312*0Sstevel@tonic-gate     SV *alt = NULL;
4313*0Sstevel@tonic-gate 
4314*0Sstevel@tonic-gate     if (PL_regdata && PL_regdata->count) {
4315*0Sstevel@tonic-gate 	U32 n = ARG(node);
4316*0Sstevel@tonic-gate 
4317*0Sstevel@tonic-gate 	if (PL_regdata->what[n] == 's') {
4318*0Sstevel@tonic-gate 	    SV *rv = (SV*)PL_regdata->data[n];
4319*0Sstevel@tonic-gate 	    AV *av = (AV*)SvRV((SV*)rv);
4320*0Sstevel@tonic-gate 	    SV **ary = AvARRAY(av);
4321*0Sstevel@tonic-gate 	    SV **a, **b;
4322*0Sstevel@tonic-gate 
4323*0Sstevel@tonic-gate 	    /* See the end of regcomp.c:S_reglass() for
4324*0Sstevel@tonic-gate 	     * documentation of these array elements. */
4325*0Sstevel@tonic-gate 
4326*0Sstevel@tonic-gate 	    si = *ary;
4327*0Sstevel@tonic-gate 	    a  = SvTYPE(ary[1]) == SVt_RV   ? &ary[1] : 0;
4328*0Sstevel@tonic-gate 	    b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4329*0Sstevel@tonic-gate 
4330*0Sstevel@tonic-gate 	    if (a)
4331*0Sstevel@tonic-gate 		sw = *a;
4332*0Sstevel@tonic-gate 	    else if (si && doinit) {
4333*0Sstevel@tonic-gate 		sw = swash_init("utf8", "", si, 1, 0);
4334*0Sstevel@tonic-gate 		(void)av_store(av, 1, sw);
4335*0Sstevel@tonic-gate 	    }
4336*0Sstevel@tonic-gate 	    if (b)
4337*0Sstevel@tonic-gate 	        alt = *b;
4338*0Sstevel@tonic-gate 	}
4339*0Sstevel@tonic-gate     }
4340*0Sstevel@tonic-gate 
4341*0Sstevel@tonic-gate     if (listsvp)
4342*0Sstevel@tonic-gate 	*listsvp = si;
4343*0Sstevel@tonic-gate     if (altsvp)
4344*0Sstevel@tonic-gate 	*altsvp  = alt;
4345*0Sstevel@tonic-gate 
4346*0Sstevel@tonic-gate     return sw;
4347*0Sstevel@tonic-gate }
4348*0Sstevel@tonic-gate 
4349*0Sstevel@tonic-gate /*
4350*0Sstevel@tonic-gate  - reginclass - determine if a character falls into a character class
4351*0Sstevel@tonic-gate 
4352*0Sstevel@tonic-gate   The n is the ANYOF regnode, the p is the target string, lenp
4353*0Sstevel@tonic-gate   is pointer to the maximum length of how far to go in the p
4354*0Sstevel@tonic-gate   (if the lenp is zero, UTF8SKIP(p) is used),
4355*0Sstevel@tonic-gate   do_utf8 tells whether the target string is in UTF-8.
4356*0Sstevel@tonic-gate 
4357*0Sstevel@tonic-gate  */
4358*0Sstevel@tonic-gate 
4359*0Sstevel@tonic-gate STATIC bool
S_reginclass(pTHX_ register regnode * n,register U8 * p,STRLEN * lenp,register bool do_utf8)4360*0Sstevel@tonic-gate S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
4361*0Sstevel@tonic-gate {
4362*0Sstevel@tonic-gate     char flags = ANYOF_FLAGS(n);
4363*0Sstevel@tonic-gate     bool match = FALSE;
4364*0Sstevel@tonic-gate     UV c = *p;
4365*0Sstevel@tonic-gate     STRLEN len = 0;
4366*0Sstevel@tonic-gate     STRLEN plen;
4367*0Sstevel@tonic-gate 
4368*0Sstevel@tonic-gate     if (do_utf8 && !UTF8_IS_INVARIANT(c))
4369*0Sstevel@tonic-gate 	 c = utf8n_to_uvchr(p, UTF8_MAXLEN, &len,
4370*0Sstevel@tonic-gate 			    ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
4371*0Sstevel@tonic-gate 
4372*0Sstevel@tonic-gate     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
4373*0Sstevel@tonic-gate     if (do_utf8 || (flags & ANYOF_UNICODE)) {
4374*0Sstevel@tonic-gate         if (lenp)
4375*0Sstevel@tonic-gate 	    *lenp = 0;
4376*0Sstevel@tonic-gate 	if (do_utf8 && !ANYOF_RUNTIME(n)) {
4377*0Sstevel@tonic-gate 	    if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4378*0Sstevel@tonic-gate 		match = TRUE;
4379*0Sstevel@tonic-gate 	}
4380*0Sstevel@tonic-gate 	if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
4381*0Sstevel@tonic-gate 	    match = TRUE;
4382*0Sstevel@tonic-gate 	if (!match) {
4383*0Sstevel@tonic-gate 	    AV *av;
4384*0Sstevel@tonic-gate 	    SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
4385*0Sstevel@tonic-gate 
4386*0Sstevel@tonic-gate 	    if (sw) {
4387*0Sstevel@tonic-gate 		if (swash_fetch(sw, p, do_utf8))
4388*0Sstevel@tonic-gate 		    match = TRUE;
4389*0Sstevel@tonic-gate 		else if (flags & ANYOF_FOLD) {
4390*0Sstevel@tonic-gate 		    if (!match && lenp && av) {
4391*0Sstevel@tonic-gate 		        I32 i;
4392*0Sstevel@tonic-gate 
4393*0Sstevel@tonic-gate 			for (i = 0; i <= av_len(av); i++) {
4394*0Sstevel@tonic-gate 			    SV* sv = *av_fetch(av, i, FALSE);
4395*0Sstevel@tonic-gate 			    STRLEN len;
4396*0Sstevel@tonic-gate 			    char *s = SvPV(sv, len);
4397*0Sstevel@tonic-gate 
4398*0Sstevel@tonic-gate 			    if (len <= plen && memEQ(s, (char*)p, len)) {
4399*0Sstevel@tonic-gate 			        *lenp = len;
4400*0Sstevel@tonic-gate 				match = TRUE;
4401*0Sstevel@tonic-gate 				break;
4402*0Sstevel@tonic-gate 			    }
4403*0Sstevel@tonic-gate 			}
4404*0Sstevel@tonic-gate 		    }
4405*0Sstevel@tonic-gate 		    if (!match) {
4406*0Sstevel@tonic-gate 		        U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
4407*0Sstevel@tonic-gate 			STRLEN tmplen;
4408*0Sstevel@tonic-gate 
4409*0Sstevel@tonic-gate 		        to_utf8_fold(p, tmpbuf, &tmplen);
4410*0Sstevel@tonic-gate 			if (swash_fetch(sw, tmpbuf, do_utf8))
4411*0Sstevel@tonic-gate 			    match = TRUE;
4412*0Sstevel@tonic-gate 		    }
4413*0Sstevel@tonic-gate 		}
4414*0Sstevel@tonic-gate 	    }
4415*0Sstevel@tonic-gate 	}
4416*0Sstevel@tonic-gate 	if (match && lenp && *lenp == 0)
4417*0Sstevel@tonic-gate 	    *lenp = UNISKIP(NATIVE_TO_UNI(c));
4418*0Sstevel@tonic-gate     }
4419*0Sstevel@tonic-gate     if (!match && c < 256) {
4420*0Sstevel@tonic-gate 	if (ANYOF_BITMAP_TEST(n, c))
4421*0Sstevel@tonic-gate 	    match = TRUE;
4422*0Sstevel@tonic-gate 	else if (flags & ANYOF_FOLD) {
4423*0Sstevel@tonic-gate 	    U8 f;
4424*0Sstevel@tonic-gate 
4425*0Sstevel@tonic-gate 	    if (flags & ANYOF_LOCALE) {
4426*0Sstevel@tonic-gate 		PL_reg_flags |= RF_tainted;
4427*0Sstevel@tonic-gate 		f = PL_fold_locale[c];
4428*0Sstevel@tonic-gate 	    }
4429*0Sstevel@tonic-gate 	    else
4430*0Sstevel@tonic-gate 		f = PL_fold[c];
4431*0Sstevel@tonic-gate 	    if (f != c && ANYOF_BITMAP_TEST(n, f))
4432*0Sstevel@tonic-gate 		match = TRUE;
4433*0Sstevel@tonic-gate 	}
4434*0Sstevel@tonic-gate 
4435*0Sstevel@tonic-gate 	if (!match && (flags & ANYOF_CLASS)) {
4436*0Sstevel@tonic-gate 	    PL_reg_flags |= RF_tainted;
4437*0Sstevel@tonic-gate 	    if (
4438*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
4439*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
4440*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
4441*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
4442*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
4443*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
4444*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
4445*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4446*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
4447*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
4448*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
4449*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
4450*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
4451*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
4452*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
4453*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
4454*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
4455*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
4456*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
4457*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
4458*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
4459*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
4460*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
4461*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
4462*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
4463*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
4464*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
4465*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
4466*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
4467*0Sstevel@tonic-gate 		(ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
4468*0Sstevel@tonic-gate 		) /* How's that for a conditional? */
4469*0Sstevel@tonic-gate 	    {
4470*0Sstevel@tonic-gate 		match = TRUE;
4471*0Sstevel@tonic-gate 	    }
4472*0Sstevel@tonic-gate 	}
4473*0Sstevel@tonic-gate     }
4474*0Sstevel@tonic-gate 
4475*0Sstevel@tonic-gate     return (flags & ANYOF_INVERT) ? !match : match;
4476*0Sstevel@tonic-gate }
4477*0Sstevel@tonic-gate 
4478*0Sstevel@tonic-gate STATIC U8 *
S_reghop(pTHX_ U8 * s,I32 off)4479*0Sstevel@tonic-gate S_reghop(pTHX_ U8 *s, I32 off)
4480*0Sstevel@tonic-gate {
4481*0Sstevel@tonic-gate     return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4482*0Sstevel@tonic-gate }
4483*0Sstevel@tonic-gate 
4484*0Sstevel@tonic-gate STATIC U8 *
S_reghop3(pTHX_ U8 * s,I32 off,U8 * lim)4485*0Sstevel@tonic-gate S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
4486*0Sstevel@tonic-gate {
4487*0Sstevel@tonic-gate     if (off >= 0) {
4488*0Sstevel@tonic-gate 	while (off-- && s < lim) {
4489*0Sstevel@tonic-gate 	    /* XXX could check well-formedness here */
4490*0Sstevel@tonic-gate 	    s += UTF8SKIP(s);
4491*0Sstevel@tonic-gate 	}
4492*0Sstevel@tonic-gate     }
4493*0Sstevel@tonic-gate     else {
4494*0Sstevel@tonic-gate 	while (off++) {
4495*0Sstevel@tonic-gate 	    if (s > lim) {
4496*0Sstevel@tonic-gate 		s--;
4497*0Sstevel@tonic-gate 		if (UTF8_IS_CONTINUED(*s)) {
4498*0Sstevel@tonic-gate 		    while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4499*0Sstevel@tonic-gate 			s--;
4500*0Sstevel@tonic-gate 		}
4501*0Sstevel@tonic-gate 		/* XXX could check well-formedness here */
4502*0Sstevel@tonic-gate 	    }
4503*0Sstevel@tonic-gate 	}
4504*0Sstevel@tonic-gate     }
4505*0Sstevel@tonic-gate     return s;
4506*0Sstevel@tonic-gate }
4507*0Sstevel@tonic-gate 
4508*0Sstevel@tonic-gate STATIC U8 *
S_reghopmaybe(pTHX_ U8 * s,I32 off)4509*0Sstevel@tonic-gate S_reghopmaybe(pTHX_ U8 *s, I32 off)
4510*0Sstevel@tonic-gate {
4511*0Sstevel@tonic-gate     return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4512*0Sstevel@tonic-gate }
4513*0Sstevel@tonic-gate 
4514*0Sstevel@tonic-gate STATIC U8 *
S_reghopmaybe3(pTHX_ U8 * s,I32 off,U8 * lim)4515*0Sstevel@tonic-gate S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
4516*0Sstevel@tonic-gate {
4517*0Sstevel@tonic-gate     if (off >= 0) {
4518*0Sstevel@tonic-gate 	while (off-- && s < lim) {
4519*0Sstevel@tonic-gate 	    /* XXX could check well-formedness here */
4520*0Sstevel@tonic-gate 	    s += UTF8SKIP(s);
4521*0Sstevel@tonic-gate 	}
4522*0Sstevel@tonic-gate 	if (off >= 0)
4523*0Sstevel@tonic-gate 	    return 0;
4524*0Sstevel@tonic-gate     }
4525*0Sstevel@tonic-gate     else {
4526*0Sstevel@tonic-gate 	while (off++) {
4527*0Sstevel@tonic-gate 	    if (s > lim) {
4528*0Sstevel@tonic-gate 		s--;
4529*0Sstevel@tonic-gate 		if (UTF8_IS_CONTINUED(*s)) {
4530*0Sstevel@tonic-gate 		    while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4531*0Sstevel@tonic-gate 			s--;
4532*0Sstevel@tonic-gate 		}
4533*0Sstevel@tonic-gate 		/* XXX could check well-formedness here */
4534*0Sstevel@tonic-gate 	    }
4535*0Sstevel@tonic-gate 	    else
4536*0Sstevel@tonic-gate 		break;
4537*0Sstevel@tonic-gate 	}
4538*0Sstevel@tonic-gate 	if (off <= 0)
4539*0Sstevel@tonic-gate 	    return 0;
4540*0Sstevel@tonic-gate     }
4541*0Sstevel@tonic-gate     return s;
4542*0Sstevel@tonic-gate }
4543*0Sstevel@tonic-gate 
4544*0Sstevel@tonic-gate static void
restore_pos(pTHX_ void * arg)4545*0Sstevel@tonic-gate restore_pos(pTHX_ void *arg)
4546*0Sstevel@tonic-gate {
4547*0Sstevel@tonic-gate     if (PL_reg_eval_set) {
4548*0Sstevel@tonic-gate 	if (PL_reg_oldsaved) {
4549*0Sstevel@tonic-gate 	    PL_reg_re->subbeg = PL_reg_oldsaved;
4550*0Sstevel@tonic-gate 	    PL_reg_re->sublen = PL_reg_oldsavedlen;
4551*0Sstevel@tonic-gate 	    RX_MATCH_COPIED_on(PL_reg_re);
4552*0Sstevel@tonic-gate 	}
4553*0Sstevel@tonic-gate 	PL_reg_magic->mg_len = PL_reg_oldpos;
4554*0Sstevel@tonic-gate 	PL_reg_eval_set = 0;
4555*0Sstevel@tonic-gate 	PL_curpm = PL_reg_oldcurpm;
4556*0Sstevel@tonic-gate     }
4557*0Sstevel@tonic-gate }
4558*0Sstevel@tonic-gate 
4559*0Sstevel@tonic-gate STATIC void
S_to_utf8_substr(pTHX_ register regexp * prog)4560*0Sstevel@tonic-gate S_to_utf8_substr(pTHX_ register regexp *prog)
4561*0Sstevel@tonic-gate {
4562*0Sstevel@tonic-gate     SV* sv;
4563*0Sstevel@tonic-gate     if (prog->float_substr && !prog->float_utf8) {
4564*0Sstevel@tonic-gate 	prog->float_utf8 = sv = NEWSV(117, 0);
4565*0Sstevel@tonic-gate 	SvSetSV(sv, prog->float_substr);
4566*0Sstevel@tonic-gate 	sv_utf8_upgrade(sv);
4567*0Sstevel@tonic-gate 	if (SvTAIL(prog->float_substr))
4568*0Sstevel@tonic-gate 	    SvTAIL_on(sv);
4569*0Sstevel@tonic-gate 	if (prog->float_substr == prog->check_substr)
4570*0Sstevel@tonic-gate 	    prog->check_utf8 = sv;
4571*0Sstevel@tonic-gate     }
4572*0Sstevel@tonic-gate     if (prog->anchored_substr && !prog->anchored_utf8) {
4573*0Sstevel@tonic-gate 	prog->anchored_utf8 = sv = NEWSV(118, 0);
4574*0Sstevel@tonic-gate 	SvSetSV(sv, prog->anchored_substr);
4575*0Sstevel@tonic-gate 	sv_utf8_upgrade(sv);
4576*0Sstevel@tonic-gate 	if (SvTAIL(prog->anchored_substr))
4577*0Sstevel@tonic-gate 	    SvTAIL_on(sv);
4578*0Sstevel@tonic-gate 	if (prog->anchored_substr == prog->check_substr)
4579*0Sstevel@tonic-gate 	    prog->check_utf8 = sv;
4580*0Sstevel@tonic-gate     }
4581*0Sstevel@tonic-gate }
4582*0Sstevel@tonic-gate 
4583*0Sstevel@tonic-gate STATIC void
S_to_byte_substr(pTHX_ register regexp * prog)4584*0Sstevel@tonic-gate S_to_byte_substr(pTHX_ register regexp *prog)
4585*0Sstevel@tonic-gate {
4586*0Sstevel@tonic-gate     SV* sv;
4587*0Sstevel@tonic-gate     if (prog->float_utf8 && !prog->float_substr) {
4588*0Sstevel@tonic-gate 	prog->float_substr = sv = NEWSV(117, 0);
4589*0Sstevel@tonic-gate 	SvSetSV(sv, prog->float_utf8);
4590*0Sstevel@tonic-gate 	if (sv_utf8_downgrade(sv, TRUE)) {
4591*0Sstevel@tonic-gate 	    if (SvTAIL(prog->float_utf8))
4592*0Sstevel@tonic-gate 		SvTAIL_on(sv);
4593*0Sstevel@tonic-gate 	} else {
4594*0Sstevel@tonic-gate 	    SvREFCNT_dec(sv);
4595*0Sstevel@tonic-gate 	    prog->float_substr = sv = &PL_sv_undef;
4596*0Sstevel@tonic-gate 	}
4597*0Sstevel@tonic-gate 	if (prog->float_utf8 == prog->check_utf8)
4598*0Sstevel@tonic-gate 	    prog->check_substr = sv;
4599*0Sstevel@tonic-gate     }
4600*0Sstevel@tonic-gate     if (prog->anchored_utf8 && !prog->anchored_substr) {
4601*0Sstevel@tonic-gate 	prog->anchored_substr = sv = NEWSV(118, 0);
4602*0Sstevel@tonic-gate 	SvSetSV(sv, prog->anchored_utf8);
4603*0Sstevel@tonic-gate 	if (sv_utf8_downgrade(sv, TRUE)) {
4604*0Sstevel@tonic-gate 	    if (SvTAIL(prog->anchored_utf8))
4605*0Sstevel@tonic-gate 		SvTAIL_on(sv);
4606*0Sstevel@tonic-gate 	} else {
4607*0Sstevel@tonic-gate 	    SvREFCNT_dec(sv);
4608*0Sstevel@tonic-gate 	    prog->anchored_substr = sv = &PL_sv_undef;
4609*0Sstevel@tonic-gate 	}
4610*0Sstevel@tonic-gate 	if (prog->anchored_utf8 == prog->check_utf8)
4611*0Sstevel@tonic-gate 	    prog->check_substr = sv;
4612*0Sstevel@tonic-gate     }
4613*0Sstevel@tonic-gate }
4614