1*0Sstevel@tonic-gate /* regexec.c
2*0Sstevel@tonic-gate */
3*0Sstevel@tonic-gate
4*0Sstevel@tonic-gate /*
5*0Sstevel@tonic-gate * "One Ring to rule them all, One Ring to find them..."
6*0Sstevel@tonic-gate */
7*0Sstevel@tonic-gate
8*0Sstevel@tonic-gate /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9*0Sstevel@tonic-gate * confused with the original package (see point 3 below). Thanks, Henry!
10*0Sstevel@tonic-gate */
11*0Sstevel@tonic-gate
12*0Sstevel@tonic-gate /* Additional note: this code is very heavily munged from Henry's version
13*0Sstevel@tonic-gate * in places. In some spots I've traded clarity for efficiency, so don't
14*0Sstevel@tonic-gate * blame Henry for some of the lack of readability.
15*0Sstevel@tonic-gate */
16*0Sstevel@tonic-gate
17*0Sstevel@tonic-gate /* The names of the functions have been changed from regcomp and
18*0Sstevel@tonic-gate * regexec to pregcomp and pregexec in order to avoid conflicts
19*0Sstevel@tonic-gate * with the POSIX routines of the same names.
20*0Sstevel@tonic-gate */
21*0Sstevel@tonic-gate
22*0Sstevel@tonic-gate #ifdef PERL_EXT_RE_BUILD
23*0Sstevel@tonic-gate /* need to replace pregcomp et al, so enable that */
24*0Sstevel@tonic-gate # ifndef PERL_IN_XSUB_RE
25*0Sstevel@tonic-gate # define PERL_IN_XSUB_RE
26*0Sstevel@tonic-gate # endif
27*0Sstevel@tonic-gate /* need access to debugger hooks */
28*0Sstevel@tonic-gate # if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
29*0Sstevel@tonic-gate # define DEBUGGING
30*0Sstevel@tonic-gate # endif
31*0Sstevel@tonic-gate #endif
32*0Sstevel@tonic-gate
33*0Sstevel@tonic-gate #ifdef PERL_IN_XSUB_RE
34*0Sstevel@tonic-gate /* We *really* need to overwrite these symbols: */
35*0Sstevel@tonic-gate # define Perl_regexec_flags my_regexec
36*0Sstevel@tonic-gate # define Perl_regdump my_regdump
37*0Sstevel@tonic-gate # define Perl_regprop my_regprop
38*0Sstevel@tonic-gate # define Perl_re_intuit_start my_re_intuit_start
39*0Sstevel@tonic-gate /* *These* symbols are masked to allow static link. */
40*0Sstevel@tonic-gate # define Perl_pregexec my_pregexec
41*0Sstevel@tonic-gate # define Perl_reginitcolors my_reginitcolors
42*0Sstevel@tonic-gate # define Perl_regclass_swash my_regclass_swash
43*0Sstevel@tonic-gate
44*0Sstevel@tonic-gate # define PERL_NO_GET_CONTEXT
45*0Sstevel@tonic-gate #endif
46*0Sstevel@tonic-gate
47*0Sstevel@tonic-gate /*SUPPRESS 112*/
48*0Sstevel@tonic-gate /*
49*0Sstevel@tonic-gate * pregcomp and pregexec -- regsub and regerror are not used in perl
50*0Sstevel@tonic-gate *
51*0Sstevel@tonic-gate * Copyright (c) 1986 by University of Toronto.
52*0Sstevel@tonic-gate * Written by Henry Spencer. Not derived from licensed software.
53*0Sstevel@tonic-gate *
54*0Sstevel@tonic-gate * Permission is granted to anyone to use this software for any
55*0Sstevel@tonic-gate * purpose on any computer system, and to redistribute it freely,
56*0Sstevel@tonic-gate * subject to the following restrictions:
57*0Sstevel@tonic-gate *
58*0Sstevel@tonic-gate * 1. The author is not responsible for the consequences of use of
59*0Sstevel@tonic-gate * this software, no matter how awful, even if they arise
60*0Sstevel@tonic-gate * from defects in it.
61*0Sstevel@tonic-gate *
62*0Sstevel@tonic-gate * 2. The origin of this software must not be misrepresented, either
63*0Sstevel@tonic-gate * by explicit claim or by omission.
64*0Sstevel@tonic-gate *
65*0Sstevel@tonic-gate * 3. Altered versions must be plainly marked as such, and must not
66*0Sstevel@tonic-gate * be misrepresented as being the original software.
67*0Sstevel@tonic-gate *
68*0Sstevel@tonic-gate **** Alterations to Henry's code are...
69*0Sstevel@tonic-gate ****
70*0Sstevel@tonic-gate **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
71*0Sstevel@tonic-gate **** 2000, 2001, 2002, 2003, 2004, by Larry Wall and others
72*0Sstevel@tonic-gate ****
73*0Sstevel@tonic-gate **** You may distribute under the terms of either the GNU General Public
74*0Sstevel@tonic-gate **** License or the Artistic License, as specified in the README file.
75*0Sstevel@tonic-gate *
76*0Sstevel@tonic-gate * Beware that some of this code is subtly aware of the way operator
77*0Sstevel@tonic-gate * precedence is structured in regular expressions. Serious changes in
78*0Sstevel@tonic-gate * regular-expression syntax might require a total rethink.
79*0Sstevel@tonic-gate */
80*0Sstevel@tonic-gate #include "EXTERN.h"
81*0Sstevel@tonic-gate #define PERL_IN_REGEXEC_C
82*0Sstevel@tonic-gate #include "perl.h"
83*0Sstevel@tonic-gate
84*0Sstevel@tonic-gate #include "regcomp.h"
85*0Sstevel@tonic-gate
86*0Sstevel@tonic-gate #define RF_tainted 1 /* tainted information used? */
87*0Sstevel@tonic-gate #define RF_warned 2 /* warned about big count? */
88*0Sstevel@tonic-gate #define RF_evaled 4 /* Did an EVAL with setting? */
89*0Sstevel@tonic-gate #define RF_utf8 8 /* String contains multibyte chars? */
90*0Sstevel@tonic-gate #define RF_false 16 /* odd number of nested negatives */
91*0Sstevel@tonic-gate
92*0Sstevel@tonic-gate #define UTF ((PL_reg_flags & RF_utf8) != 0)
93*0Sstevel@tonic-gate
94*0Sstevel@tonic-gate #define RS_init 1 /* eval environment created */
95*0Sstevel@tonic-gate #define RS_set 2 /* replsv value is set */
96*0Sstevel@tonic-gate
97*0Sstevel@tonic-gate #ifndef STATIC
98*0Sstevel@tonic-gate #define STATIC static
99*0Sstevel@tonic-gate #endif
100*0Sstevel@tonic-gate
101*0Sstevel@tonic-gate #define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
102*0Sstevel@tonic-gate
103*0Sstevel@tonic-gate /*
104*0Sstevel@tonic-gate * Forwards.
105*0Sstevel@tonic-gate */
106*0Sstevel@tonic-gate
107*0Sstevel@tonic-gate #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
108*0Sstevel@tonic-gate #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
109*0Sstevel@tonic-gate
110*0Sstevel@tonic-gate #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
111*0Sstevel@tonic-gate #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
112*0Sstevel@tonic-gate #define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
113*0Sstevel@tonic-gate #define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
114*0Sstevel@tonic-gate #define HOPc(pos,off) ((char*)HOP(pos,off))
115*0Sstevel@tonic-gate #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
116*0Sstevel@tonic-gate
117*0Sstevel@tonic-gate #define HOPBACK(pos, off) ( \
118*0Sstevel@tonic-gate (PL_reg_match_utf8) \
119*0Sstevel@tonic-gate ? reghopmaybe((U8*)pos, -off) \
120*0Sstevel@tonic-gate : (pos - off >= PL_bostr) \
121*0Sstevel@tonic-gate ? (U8*)(pos - off) \
122*0Sstevel@tonic-gate : (U8*)NULL \
123*0Sstevel@tonic-gate )
124*0Sstevel@tonic-gate #define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
125*0Sstevel@tonic-gate
126*0Sstevel@tonic-gate #define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
127*0Sstevel@tonic-gate #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
128*0Sstevel@tonic-gate #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
129*0Sstevel@tonic-gate #define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
130*0Sstevel@tonic-gate #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
131*0Sstevel@tonic-gate #define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
132*0Sstevel@tonic-gate
133*0Sstevel@tonic-gate #define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) { ENTER; save_re_context(); (void)CAT2(is_utf8_, a)((U8*)b); LEAVE; } } STMT_END
134*0Sstevel@tonic-gate
135*0Sstevel@tonic-gate /* for use after a quantifier and before an EXACT-like node -- japhy */
136*0Sstevel@tonic-gate #define JUMPABLE(rn) ( \
137*0Sstevel@tonic-gate OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
138*0Sstevel@tonic-gate OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
139*0Sstevel@tonic-gate OP(rn) == PLUS || OP(rn) == MINMOD || \
140*0Sstevel@tonic-gate (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
141*0Sstevel@tonic-gate )
142*0Sstevel@tonic-gate
143*0Sstevel@tonic-gate #define HAS_TEXT(rn) ( \
144*0Sstevel@tonic-gate PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
145*0Sstevel@tonic-gate )
146*0Sstevel@tonic-gate
147*0Sstevel@tonic-gate /*
148*0Sstevel@tonic-gate Search for mandatory following text node; for lookahead, the text must
149*0Sstevel@tonic-gate follow but for lookbehind (rn->flags != 0) we skip to the next step.
150*0Sstevel@tonic-gate */
151*0Sstevel@tonic-gate #define FIND_NEXT_IMPT(rn) STMT_START { \
152*0Sstevel@tonic-gate while (JUMPABLE(rn)) \
153*0Sstevel@tonic-gate if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
154*0Sstevel@tonic-gate rn = NEXTOPER(NEXTOPER(rn)); \
155*0Sstevel@tonic-gate else if (OP(rn) == PLUS) \
156*0Sstevel@tonic-gate rn = NEXTOPER(rn); \
157*0Sstevel@tonic-gate else if (OP(rn) == IFMATCH) \
158*0Sstevel@tonic-gate rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
159*0Sstevel@tonic-gate else rn += NEXT_OFF(rn); \
160*0Sstevel@tonic-gate } STMT_END
161*0Sstevel@tonic-gate
162*0Sstevel@tonic-gate static void restore_pos(pTHX_ void *arg);
163*0Sstevel@tonic-gate
164*0Sstevel@tonic-gate STATIC CHECKPOINT
S_regcppush(pTHX_ I32 parenfloor)165*0Sstevel@tonic-gate S_regcppush(pTHX_ I32 parenfloor)
166*0Sstevel@tonic-gate {
167*0Sstevel@tonic-gate int retval = PL_savestack_ix;
168*0Sstevel@tonic-gate #define REGCP_PAREN_ELEMS 4
169*0Sstevel@tonic-gate int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
170*0Sstevel@tonic-gate int p;
171*0Sstevel@tonic-gate
172*0Sstevel@tonic-gate if (paren_elems_to_push < 0)
173*0Sstevel@tonic-gate Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
174*0Sstevel@tonic-gate
175*0Sstevel@tonic-gate #define REGCP_OTHER_ELEMS 6
176*0Sstevel@tonic-gate SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
177*0Sstevel@tonic-gate for (p = PL_regsize; p > parenfloor; p--) {
178*0Sstevel@tonic-gate /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
179*0Sstevel@tonic-gate SSPUSHINT(PL_regendp[p]);
180*0Sstevel@tonic-gate SSPUSHINT(PL_regstartp[p]);
181*0Sstevel@tonic-gate SSPUSHPTR(PL_reg_start_tmp[p]);
182*0Sstevel@tonic-gate SSPUSHINT(p);
183*0Sstevel@tonic-gate }
184*0Sstevel@tonic-gate /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
185*0Sstevel@tonic-gate SSPUSHINT(PL_regsize);
186*0Sstevel@tonic-gate SSPUSHINT(*PL_reglastparen);
187*0Sstevel@tonic-gate SSPUSHINT(*PL_reglastcloseparen);
188*0Sstevel@tonic-gate SSPUSHPTR(PL_reginput);
189*0Sstevel@tonic-gate #define REGCP_FRAME_ELEMS 2
190*0Sstevel@tonic-gate /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
191*0Sstevel@tonic-gate * are needed for the regexp context stack bookkeeping. */
192*0Sstevel@tonic-gate SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
193*0Sstevel@tonic-gate SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
194*0Sstevel@tonic-gate
195*0Sstevel@tonic-gate return retval;
196*0Sstevel@tonic-gate }
197*0Sstevel@tonic-gate
198*0Sstevel@tonic-gate /* These are needed since we do not localize EVAL nodes: */
199*0Sstevel@tonic-gate # define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
200*0Sstevel@tonic-gate " Setting an EVAL scope, savestack=%"IVdf"\n", \
201*0Sstevel@tonic-gate (IV)PL_savestack_ix)); cp = PL_savestack_ix
202*0Sstevel@tonic-gate
203*0Sstevel@tonic-gate # define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
204*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, \
205*0Sstevel@tonic-gate " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
206*0Sstevel@tonic-gate (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
207*0Sstevel@tonic-gate
208*0Sstevel@tonic-gate STATIC char *
S_regcppop(pTHX)209*0Sstevel@tonic-gate S_regcppop(pTHX)
210*0Sstevel@tonic-gate {
211*0Sstevel@tonic-gate I32 i;
212*0Sstevel@tonic-gate U32 paren = 0;
213*0Sstevel@tonic-gate char *input;
214*0Sstevel@tonic-gate I32 tmps;
215*0Sstevel@tonic-gate
216*0Sstevel@tonic-gate /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
217*0Sstevel@tonic-gate i = SSPOPINT;
218*0Sstevel@tonic-gate assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
219*0Sstevel@tonic-gate i = SSPOPINT; /* Parentheses elements to pop. */
220*0Sstevel@tonic-gate input = (char *) SSPOPPTR;
221*0Sstevel@tonic-gate *PL_reglastcloseparen = SSPOPINT;
222*0Sstevel@tonic-gate *PL_reglastparen = SSPOPINT;
223*0Sstevel@tonic-gate PL_regsize = SSPOPINT;
224*0Sstevel@tonic-gate
225*0Sstevel@tonic-gate /* Now restore the parentheses context. */
226*0Sstevel@tonic-gate for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
227*0Sstevel@tonic-gate i > 0; i -= REGCP_PAREN_ELEMS) {
228*0Sstevel@tonic-gate paren = (U32)SSPOPINT;
229*0Sstevel@tonic-gate PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
230*0Sstevel@tonic-gate PL_regstartp[paren] = SSPOPINT;
231*0Sstevel@tonic-gate tmps = SSPOPINT;
232*0Sstevel@tonic-gate if (paren <= *PL_reglastparen)
233*0Sstevel@tonic-gate PL_regendp[paren] = tmps;
234*0Sstevel@tonic-gate DEBUG_r(
235*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
236*0Sstevel@tonic-gate " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
237*0Sstevel@tonic-gate (UV)paren, (IV)PL_regstartp[paren],
238*0Sstevel@tonic-gate (IV)(PL_reg_start_tmp[paren] - PL_bostr),
239*0Sstevel@tonic-gate (IV)PL_regendp[paren],
240*0Sstevel@tonic-gate (paren > *PL_reglastparen ? "(no)" : ""));
241*0Sstevel@tonic-gate );
242*0Sstevel@tonic-gate }
243*0Sstevel@tonic-gate DEBUG_r(
244*0Sstevel@tonic-gate if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
245*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
246*0Sstevel@tonic-gate " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
247*0Sstevel@tonic-gate (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
248*0Sstevel@tonic-gate }
249*0Sstevel@tonic-gate );
250*0Sstevel@tonic-gate #if 1
251*0Sstevel@tonic-gate /* It would seem that the similar code in regtry()
252*0Sstevel@tonic-gate * already takes care of this, and in fact it is in
253*0Sstevel@tonic-gate * a better location to since this code can #if 0-ed out
254*0Sstevel@tonic-gate * but the code in regtry() is needed or otherwise tests
255*0Sstevel@tonic-gate * requiring null fields (pat.t#187 and split.t#{13,14}
256*0Sstevel@tonic-gate * (as of patchlevel 7877) will fail. Then again,
257*0Sstevel@tonic-gate * this code seems to be necessary or otherwise
258*0Sstevel@tonic-gate * building DynaLoader will fail:
259*0Sstevel@tonic-gate * "Error: '*' not in typemap in DynaLoader.xs, line 164"
260*0Sstevel@tonic-gate * --jhi */
261*0Sstevel@tonic-gate for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
262*0Sstevel@tonic-gate if ((I32)paren > PL_regsize)
263*0Sstevel@tonic-gate PL_regstartp[paren] = -1;
264*0Sstevel@tonic-gate PL_regendp[paren] = -1;
265*0Sstevel@tonic-gate }
266*0Sstevel@tonic-gate #endif
267*0Sstevel@tonic-gate return input;
268*0Sstevel@tonic-gate }
269*0Sstevel@tonic-gate
270*0Sstevel@tonic-gate STATIC char *
S_regcp_set_to(pTHX_ I32 ss)271*0Sstevel@tonic-gate S_regcp_set_to(pTHX_ I32 ss)
272*0Sstevel@tonic-gate {
273*0Sstevel@tonic-gate I32 tmp = PL_savestack_ix;
274*0Sstevel@tonic-gate
275*0Sstevel@tonic-gate PL_savestack_ix = ss;
276*0Sstevel@tonic-gate regcppop();
277*0Sstevel@tonic-gate PL_savestack_ix = tmp;
278*0Sstevel@tonic-gate return Nullch;
279*0Sstevel@tonic-gate }
280*0Sstevel@tonic-gate
281*0Sstevel@tonic-gate typedef struct re_cc_state
282*0Sstevel@tonic-gate {
283*0Sstevel@tonic-gate I32 ss;
284*0Sstevel@tonic-gate regnode *node;
285*0Sstevel@tonic-gate struct re_cc_state *prev;
286*0Sstevel@tonic-gate CURCUR *cc;
287*0Sstevel@tonic-gate regexp *re;
288*0Sstevel@tonic-gate } re_cc_state;
289*0Sstevel@tonic-gate
290*0Sstevel@tonic-gate #define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
291*0Sstevel@tonic-gate
292*0Sstevel@tonic-gate #define TRYPAREN(paren, n, input) { \
293*0Sstevel@tonic-gate if (paren) { \
294*0Sstevel@tonic-gate if (n) { \
295*0Sstevel@tonic-gate PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
296*0Sstevel@tonic-gate PL_regendp[paren] = input - PL_bostr; \
297*0Sstevel@tonic-gate } \
298*0Sstevel@tonic-gate else \
299*0Sstevel@tonic-gate PL_regendp[paren] = -1; \
300*0Sstevel@tonic-gate } \
301*0Sstevel@tonic-gate if (regmatch(next)) \
302*0Sstevel@tonic-gate sayYES; \
303*0Sstevel@tonic-gate if (paren && n) \
304*0Sstevel@tonic-gate PL_regendp[paren] = -1; \
305*0Sstevel@tonic-gate }
306*0Sstevel@tonic-gate
307*0Sstevel@tonic-gate
308*0Sstevel@tonic-gate /*
309*0Sstevel@tonic-gate * pregexec and friends
310*0Sstevel@tonic-gate */
311*0Sstevel@tonic-gate
312*0Sstevel@tonic-gate /*
313*0Sstevel@tonic-gate - pregexec - match a regexp against a string
314*0Sstevel@tonic-gate */
315*0Sstevel@tonic-gate I32
Perl_pregexec(pTHX_ register regexp * prog,char * stringarg,register char * strend,char * strbeg,I32 minend,SV * screamer,U32 nosave)316*0Sstevel@tonic-gate Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
317*0Sstevel@tonic-gate char *strbeg, I32 minend, SV *screamer, U32 nosave)
318*0Sstevel@tonic-gate /* strend: pointer to null at end of string */
319*0Sstevel@tonic-gate /* strbeg: real beginning of string */
320*0Sstevel@tonic-gate /* minend: end of match must be >=minend after stringarg. */
321*0Sstevel@tonic-gate /* nosave: For optimizations. */
322*0Sstevel@tonic-gate {
323*0Sstevel@tonic-gate return
324*0Sstevel@tonic-gate regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
325*0Sstevel@tonic-gate nosave ? 0 : REXEC_COPY_STR);
326*0Sstevel@tonic-gate }
327*0Sstevel@tonic-gate
328*0Sstevel@tonic-gate STATIC void
S_cache_re(pTHX_ regexp * prog)329*0Sstevel@tonic-gate S_cache_re(pTHX_ regexp *prog)
330*0Sstevel@tonic-gate {
331*0Sstevel@tonic-gate PL_regprecomp = prog->precomp; /* Needed for FAIL. */
332*0Sstevel@tonic-gate #ifdef DEBUGGING
333*0Sstevel@tonic-gate PL_regprogram = prog->program;
334*0Sstevel@tonic-gate #endif
335*0Sstevel@tonic-gate PL_regnpar = prog->nparens;
336*0Sstevel@tonic-gate PL_regdata = prog->data;
337*0Sstevel@tonic-gate PL_reg_re = prog;
338*0Sstevel@tonic-gate }
339*0Sstevel@tonic-gate
340*0Sstevel@tonic-gate /*
341*0Sstevel@tonic-gate * Need to implement the following flags for reg_anch:
342*0Sstevel@tonic-gate *
343*0Sstevel@tonic-gate * USE_INTUIT_NOML - Useful to call re_intuit_start() first
344*0Sstevel@tonic-gate * USE_INTUIT_ML
345*0Sstevel@tonic-gate * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
346*0Sstevel@tonic-gate * INTUIT_AUTORITATIVE_ML
347*0Sstevel@tonic-gate * INTUIT_ONCE_NOML - Intuit can match in one location only.
348*0Sstevel@tonic-gate * INTUIT_ONCE_ML
349*0Sstevel@tonic-gate *
350*0Sstevel@tonic-gate * Another flag for this function: SECOND_TIME (so that float substrs
351*0Sstevel@tonic-gate * with giant delta may be not rechecked).
352*0Sstevel@tonic-gate */
353*0Sstevel@tonic-gate
354*0Sstevel@tonic-gate /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
355*0Sstevel@tonic-gate
356*0Sstevel@tonic-gate /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
357*0Sstevel@tonic-gate Otherwise, only SvCUR(sv) is used to get strbeg. */
358*0Sstevel@tonic-gate
359*0Sstevel@tonic-gate /* XXXX We assume that strpos is strbeg unless sv. */
360*0Sstevel@tonic-gate
361*0Sstevel@tonic-gate /* XXXX Some places assume that there is a fixed substring.
362*0Sstevel@tonic-gate An update may be needed if optimizer marks as "INTUITable"
363*0Sstevel@tonic-gate RExen without fixed substrings. Similarly, it is assumed that
364*0Sstevel@tonic-gate lengths of all the strings are no more than minlen, thus they
365*0Sstevel@tonic-gate cannot come from lookahead.
366*0Sstevel@tonic-gate (Or minlen should take into account lookahead.) */
367*0Sstevel@tonic-gate
368*0Sstevel@tonic-gate /* A failure to find a constant substring means that there is no need to make
369*0Sstevel@tonic-gate an expensive call to REx engine, thus we celebrate a failure. Similarly,
370*0Sstevel@tonic-gate finding a substring too deep into the string means that less calls to
371*0Sstevel@tonic-gate regtry() should be needed.
372*0Sstevel@tonic-gate
373*0Sstevel@tonic-gate REx compiler's optimizer found 4 possible hints:
374*0Sstevel@tonic-gate a) Anchored substring;
375*0Sstevel@tonic-gate b) Fixed substring;
376*0Sstevel@tonic-gate c) Whether we are anchored (beginning-of-line or \G);
377*0Sstevel@tonic-gate d) First node (of those at offset 0) which may distingush positions;
378*0Sstevel@tonic-gate We use a)b)d) and multiline-part of c), and try to find a position in the
379*0Sstevel@tonic-gate string which does not contradict any of them.
380*0Sstevel@tonic-gate */
381*0Sstevel@tonic-gate
382*0Sstevel@tonic-gate /* Most of decisions we do here should have been done at compile time.
383*0Sstevel@tonic-gate The nodes of the REx which we used for the search should have been
384*0Sstevel@tonic-gate deleted from the finite automaton. */
385*0Sstevel@tonic-gate
386*0Sstevel@tonic-gate char *
Perl_re_intuit_start(pTHX_ regexp * prog,SV * sv,char * strpos,char * strend,U32 flags,re_scream_pos_data * data)387*0Sstevel@tonic-gate Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
388*0Sstevel@tonic-gate char *strend, U32 flags, re_scream_pos_data *data)
389*0Sstevel@tonic-gate {
390*0Sstevel@tonic-gate register I32 start_shift = 0;
391*0Sstevel@tonic-gate /* Should be nonnegative! */
392*0Sstevel@tonic-gate register I32 end_shift = 0;
393*0Sstevel@tonic-gate register char *s;
394*0Sstevel@tonic-gate register SV *check;
395*0Sstevel@tonic-gate char *strbeg;
396*0Sstevel@tonic-gate char *t;
397*0Sstevel@tonic-gate int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
398*0Sstevel@tonic-gate I32 ml_anch;
399*0Sstevel@tonic-gate register char *other_last = Nullch; /* other substr checked before this */
400*0Sstevel@tonic-gate char *check_at = Nullch; /* check substr found at this pos */
401*0Sstevel@tonic-gate #ifdef DEBUGGING
402*0Sstevel@tonic-gate char *i_strpos = strpos;
403*0Sstevel@tonic-gate SV *dsv = PERL_DEBUG_PAD_ZERO(0);
404*0Sstevel@tonic-gate #endif
405*0Sstevel@tonic-gate RX_MATCH_UTF8_set(prog,do_utf8);
406*0Sstevel@tonic-gate
407*0Sstevel@tonic-gate if (prog->reganch & ROPT_UTF8) {
408*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log,
409*0Sstevel@tonic-gate "UTF-8 regex...\n"));
410*0Sstevel@tonic-gate PL_reg_flags |= RF_utf8;
411*0Sstevel@tonic-gate }
412*0Sstevel@tonic-gate
413*0Sstevel@tonic-gate DEBUG_r({
414*0Sstevel@tonic-gate char *s = PL_reg_match_utf8 ?
415*0Sstevel@tonic-gate sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
416*0Sstevel@tonic-gate strpos;
417*0Sstevel@tonic-gate int len = PL_reg_match_utf8 ?
418*0Sstevel@tonic-gate strlen(s) : strend - strpos;
419*0Sstevel@tonic-gate if (!PL_colorset)
420*0Sstevel@tonic-gate reginitcolors();
421*0Sstevel@tonic-gate if (PL_reg_match_utf8)
422*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log,
423*0Sstevel@tonic-gate "UTF-8 target...\n"));
424*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
425*0Sstevel@tonic-gate "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
426*0Sstevel@tonic-gate PL_colors[4],PL_colors[5],PL_colors[0],
427*0Sstevel@tonic-gate prog->precomp,
428*0Sstevel@tonic-gate PL_colors[1],
429*0Sstevel@tonic-gate (strlen(prog->precomp) > 60 ? "..." : ""),
430*0Sstevel@tonic-gate PL_colors[0],
431*0Sstevel@tonic-gate (int)(len > 60 ? 60 : len),
432*0Sstevel@tonic-gate s, PL_colors[1],
433*0Sstevel@tonic-gate (len > 60 ? "..." : "")
434*0Sstevel@tonic-gate );
435*0Sstevel@tonic-gate });
436*0Sstevel@tonic-gate
437*0Sstevel@tonic-gate /* CHR_DIST() would be more correct here but it makes things slow. */
438*0Sstevel@tonic-gate if (prog->minlen > strend - strpos) {
439*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log,
440*0Sstevel@tonic-gate "String too short... [re_intuit_start]\n"));
441*0Sstevel@tonic-gate goto fail;
442*0Sstevel@tonic-gate }
443*0Sstevel@tonic-gate strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
444*0Sstevel@tonic-gate PL_regeol = strend;
445*0Sstevel@tonic-gate if (do_utf8) {
446*0Sstevel@tonic-gate if (!prog->check_utf8 && prog->check_substr)
447*0Sstevel@tonic-gate to_utf8_substr(prog);
448*0Sstevel@tonic-gate check = prog->check_utf8;
449*0Sstevel@tonic-gate } else {
450*0Sstevel@tonic-gate if (!prog->check_substr && prog->check_utf8)
451*0Sstevel@tonic-gate to_byte_substr(prog);
452*0Sstevel@tonic-gate check = prog->check_substr;
453*0Sstevel@tonic-gate }
454*0Sstevel@tonic-gate if (check == &PL_sv_undef) {
455*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log,
456*0Sstevel@tonic-gate "Non-utf string cannot match utf check string\n"));
457*0Sstevel@tonic-gate goto fail;
458*0Sstevel@tonic-gate }
459*0Sstevel@tonic-gate if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
460*0Sstevel@tonic-gate ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
461*0Sstevel@tonic-gate || ( (prog->reganch & ROPT_ANCH_BOL)
462*0Sstevel@tonic-gate && !PL_multiline ) ); /* Check after \n? */
463*0Sstevel@tonic-gate
464*0Sstevel@tonic-gate if (!ml_anch) {
465*0Sstevel@tonic-gate if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
466*0Sstevel@tonic-gate | ROPT_IMPLICIT)) /* not a real BOL */
467*0Sstevel@tonic-gate /* SvCUR is not set on references: SvRV and SvPVX overlap */
468*0Sstevel@tonic-gate && sv && !SvROK(sv)
469*0Sstevel@tonic-gate && (strpos != strbeg)) {
470*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
471*0Sstevel@tonic-gate goto fail;
472*0Sstevel@tonic-gate }
473*0Sstevel@tonic-gate if (prog->check_offset_min == prog->check_offset_max &&
474*0Sstevel@tonic-gate !(prog->reganch & ROPT_CANY_SEEN)) {
475*0Sstevel@tonic-gate /* Substring at constant offset from beg-of-str... */
476*0Sstevel@tonic-gate I32 slen;
477*0Sstevel@tonic-gate
478*0Sstevel@tonic-gate s = HOP3c(strpos, prog->check_offset_min, strend);
479*0Sstevel@tonic-gate if (SvTAIL(check)) {
480*0Sstevel@tonic-gate slen = SvCUR(check); /* >= 1 */
481*0Sstevel@tonic-gate
482*0Sstevel@tonic-gate if ( strend - s > slen || strend - s < slen - 1
483*0Sstevel@tonic-gate || (strend - s == slen && strend[-1] != '\n')) {
484*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
485*0Sstevel@tonic-gate goto fail_finish;
486*0Sstevel@tonic-gate }
487*0Sstevel@tonic-gate /* Now should match s[0..slen-2] */
488*0Sstevel@tonic-gate slen--;
489*0Sstevel@tonic-gate if (slen && (*SvPVX(check) != *s
490*0Sstevel@tonic-gate || (slen > 1
491*0Sstevel@tonic-gate && memNE(SvPVX(check), s, slen)))) {
492*0Sstevel@tonic-gate report_neq:
493*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
494*0Sstevel@tonic-gate goto fail_finish;
495*0Sstevel@tonic-gate }
496*0Sstevel@tonic-gate }
497*0Sstevel@tonic-gate else if (*SvPVX(check) != *s
498*0Sstevel@tonic-gate || ((slen = SvCUR(check)) > 1
499*0Sstevel@tonic-gate && memNE(SvPVX(check), s, slen)))
500*0Sstevel@tonic-gate goto report_neq;
501*0Sstevel@tonic-gate goto success_at_start;
502*0Sstevel@tonic-gate }
503*0Sstevel@tonic-gate }
504*0Sstevel@tonic-gate /* Match is anchored, but substr is not anchored wrt beg-of-str. */
505*0Sstevel@tonic-gate s = strpos;
506*0Sstevel@tonic-gate start_shift = prog->check_offset_min; /* okay to underestimate on CC */
507*0Sstevel@tonic-gate end_shift = prog->minlen - start_shift -
508*0Sstevel@tonic-gate CHR_SVLEN(check) + (SvTAIL(check) != 0);
509*0Sstevel@tonic-gate if (!ml_anch) {
510*0Sstevel@tonic-gate I32 end = prog->check_offset_max + CHR_SVLEN(check)
511*0Sstevel@tonic-gate - (SvTAIL(check) != 0);
512*0Sstevel@tonic-gate I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
513*0Sstevel@tonic-gate
514*0Sstevel@tonic-gate if (end_shift < eshift)
515*0Sstevel@tonic-gate end_shift = eshift;
516*0Sstevel@tonic-gate }
517*0Sstevel@tonic-gate }
518*0Sstevel@tonic-gate else { /* Can match at random position */
519*0Sstevel@tonic-gate ml_anch = 0;
520*0Sstevel@tonic-gate s = strpos;
521*0Sstevel@tonic-gate start_shift = prog->check_offset_min; /* okay to underestimate on CC */
522*0Sstevel@tonic-gate /* Should be nonnegative! */
523*0Sstevel@tonic-gate end_shift = prog->minlen - start_shift -
524*0Sstevel@tonic-gate CHR_SVLEN(check) + (SvTAIL(check) != 0);
525*0Sstevel@tonic-gate }
526*0Sstevel@tonic-gate
527*0Sstevel@tonic-gate #ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
528*0Sstevel@tonic-gate if (end_shift < 0)
529*0Sstevel@tonic-gate Perl_croak(aTHX_ "panic: end_shift");
530*0Sstevel@tonic-gate #endif
531*0Sstevel@tonic-gate
532*0Sstevel@tonic-gate restart:
533*0Sstevel@tonic-gate /* Find a possible match in the region s..strend by looking for
534*0Sstevel@tonic-gate the "check" substring in the region corrected by start/end_shift. */
535*0Sstevel@tonic-gate if (flags & REXEC_SCREAM) {
536*0Sstevel@tonic-gate I32 p = -1; /* Internal iterator of scream. */
537*0Sstevel@tonic-gate I32 *pp = data ? data->scream_pos : &p;
538*0Sstevel@tonic-gate
539*0Sstevel@tonic-gate if (PL_screamfirst[BmRARE(check)] >= 0
540*0Sstevel@tonic-gate || ( BmRARE(check) == '\n'
541*0Sstevel@tonic-gate && (BmPREVIOUS(check) == SvCUR(check) - 1)
542*0Sstevel@tonic-gate && SvTAIL(check) ))
543*0Sstevel@tonic-gate s = screaminstr(sv, check,
544*0Sstevel@tonic-gate start_shift + (s - strbeg), end_shift, pp, 0);
545*0Sstevel@tonic-gate else
546*0Sstevel@tonic-gate goto fail_finish;
547*0Sstevel@tonic-gate /* we may be pointing at the wrong string */
548*0Sstevel@tonic-gate if (s && RX_MATCH_COPIED(prog))
549*0Sstevel@tonic-gate s = strbeg + (s - SvPVX(sv));
550*0Sstevel@tonic-gate if (data)
551*0Sstevel@tonic-gate *data->scream_olds = s;
552*0Sstevel@tonic-gate }
553*0Sstevel@tonic-gate else if (prog->reganch & ROPT_CANY_SEEN)
554*0Sstevel@tonic-gate s = fbm_instr((U8*)(s + start_shift),
555*0Sstevel@tonic-gate (U8*)(strend - end_shift),
556*0Sstevel@tonic-gate check, PL_multiline ? FBMrf_MULTILINE : 0);
557*0Sstevel@tonic-gate else
558*0Sstevel@tonic-gate s = fbm_instr(HOP3(s, start_shift, strend),
559*0Sstevel@tonic-gate HOP3(strend, -end_shift, strbeg),
560*0Sstevel@tonic-gate check, PL_multiline ? FBMrf_MULTILINE : 0);
561*0Sstevel@tonic-gate
562*0Sstevel@tonic-gate /* Update the count-of-usability, remove useless subpatterns,
563*0Sstevel@tonic-gate unshift s. */
564*0Sstevel@tonic-gate
565*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
566*0Sstevel@tonic-gate (s ? "Found" : "Did not find"),
567*0Sstevel@tonic-gate (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
568*0Sstevel@tonic-gate PL_colors[0],
569*0Sstevel@tonic-gate (int)(SvCUR(check) - (SvTAIL(check)!=0)),
570*0Sstevel@tonic-gate SvPVX(check),
571*0Sstevel@tonic-gate PL_colors[1], (SvTAIL(check) ? "$" : ""),
572*0Sstevel@tonic-gate (s ? " at offset " : "...\n") ) );
573*0Sstevel@tonic-gate
574*0Sstevel@tonic-gate if (!s)
575*0Sstevel@tonic-gate goto fail_finish;
576*0Sstevel@tonic-gate
577*0Sstevel@tonic-gate check_at = s;
578*0Sstevel@tonic-gate
579*0Sstevel@tonic-gate /* Finish the diagnostic message */
580*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
581*0Sstevel@tonic-gate
582*0Sstevel@tonic-gate /* Got a candidate. Check MBOL anchoring, and the *other* substr.
583*0Sstevel@tonic-gate Start with the other substr.
584*0Sstevel@tonic-gate XXXX no SCREAM optimization yet - and a very coarse implementation
585*0Sstevel@tonic-gate XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
586*0Sstevel@tonic-gate *always* match. Probably should be marked during compile...
587*0Sstevel@tonic-gate Probably it is right to do no SCREAM here...
588*0Sstevel@tonic-gate */
589*0Sstevel@tonic-gate
590*0Sstevel@tonic-gate if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
591*0Sstevel@tonic-gate /* Take into account the "other" substring. */
592*0Sstevel@tonic-gate /* XXXX May be hopelessly wrong for UTF... */
593*0Sstevel@tonic-gate if (!other_last)
594*0Sstevel@tonic-gate other_last = strpos;
595*0Sstevel@tonic-gate if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
596*0Sstevel@tonic-gate do_other_anchored:
597*0Sstevel@tonic-gate {
598*0Sstevel@tonic-gate char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
599*0Sstevel@tonic-gate char *s1 = s;
600*0Sstevel@tonic-gate SV* must;
601*0Sstevel@tonic-gate
602*0Sstevel@tonic-gate t = s - prog->check_offset_max;
603*0Sstevel@tonic-gate if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
604*0Sstevel@tonic-gate && (!do_utf8
605*0Sstevel@tonic-gate || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
606*0Sstevel@tonic-gate && t > strpos)))
607*0Sstevel@tonic-gate /* EMPTY */;
608*0Sstevel@tonic-gate else
609*0Sstevel@tonic-gate t = strpos;
610*0Sstevel@tonic-gate t = HOP3c(t, prog->anchored_offset, strend);
611*0Sstevel@tonic-gate if (t < other_last) /* These positions already checked */
612*0Sstevel@tonic-gate t = other_last;
613*0Sstevel@tonic-gate last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
614*0Sstevel@tonic-gate if (last < last1)
615*0Sstevel@tonic-gate last1 = last;
616*0Sstevel@tonic-gate /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
617*0Sstevel@tonic-gate /* On end-of-str: see comment below. */
618*0Sstevel@tonic-gate must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
619*0Sstevel@tonic-gate if (must == &PL_sv_undef) {
620*0Sstevel@tonic-gate s = (char*)NULL;
621*0Sstevel@tonic-gate DEBUG_r(must = prog->anchored_utf8); /* for debug */
622*0Sstevel@tonic-gate }
623*0Sstevel@tonic-gate else
624*0Sstevel@tonic-gate s = fbm_instr(
625*0Sstevel@tonic-gate (unsigned char*)t,
626*0Sstevel@tonic-gate HOP3(HOP3(last1, prog->anchored_offset, strend)
627*0Sstevel@tonic-gate + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
628*0Sstevel@tonic-gate must,
629*0Sstevel@tonic-gate PL_multiline ? FBMrf_MULTILINE : 0
630*0Sstevel@tonic-gate );
631*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log,
632*0Sstevel@tonic-gate "%s anchored substr `%s%.*s%s'%s",
633*0Sstevel@tonic-gate (s ? "Found" : "Contradicts"),
634*0Sstevel@tonic-gate PL_colors[0],
635*0Sstevel@tonic-gate (int)(SvCUR(must)
636*0Sstevel@tonic-gate - (SvTAIL(must)!=0)),
637*0Sstevel@tonic-gate SvPVX(must),
638*0Sstevel@tonic-gate PL_colors[1], (SvTAIL(must) ? "$" : "")));
639*0Sstevel@tonic-gate if (!s) {
640*0Sstevel@tonic-gate if (last1 >= last2) {
641*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log,
642*0Sstevel@tonic-gate ", giving up...\n"));
643*0Sstevel@tonic-gate goto fail_finish;
644*0Sstevel@tonic-gate }
645*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log,
646*0Sstevel@tonic-gate ", trying floating at offset %ld...\n",
647*0Sstevel@tonic-gate (long)(HOP3c(s1, 1, strend) - i_strpos)));
648*0Sstevel@tonic-gate other_last = HOP3c(last1, prog->anchored_offset+1, strend);
649*0Sstevel@tonic-gate s = HOP3c(last, 1, strend);
650*0Sstevel@tonic-gate goto restart;
651*0Sstevel@tonic-gate }
652*0Sstevel@tonic-gate else {
653*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
654*0Sstevel@tonic-gate (long)(s - i_strpos)));
655*0Sstevel@tonic-gate t = HOP3c(s, -prog->anchored_offset, strbeg);
656*0Sstevel@tonic-gate other_last = HOP3c(s, 1, strend);
657*0Sstevel@tonic-gate s = s1;
658*0Sstevel@tonic-gate if (t == strpos)
659*0Sstevel@tonic-gate goto try_at_start;
660*0Sstevel@tonic-gate goto try_at_offset;
661*0Sstevel@tonic-gate }
662*0Sstevel@tonic-gate }
663*0Sstevel@tonic-gate }
664*0Sstevel@tonic-gate else { /* Take into account the floating substring. */
665*0Sstevel@tonic-gate char *last, *last1;
666*0Sstevel@tonic-gate char *s1 = s;
667*0Sstevel@tonic-gate SV* must;
668*0Sstevel@tonic-gate
669*0Sstevel@tonic-gate t = HOP3c(s, -start_shift, strbeg);
670*0Sstevel@tonic-gate last1 = last =
671*0Sstevel@tonic-gate HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
672*0Sstevel@tonic-gate if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
673*0Sstevel@tonic-gate last = HOP3c(t, prog->float_max_offset, strend);
674*0Sstevel@tonic-gate s = HOP3c(t, prog->float_min_offset, strend);
675*0Sstevel@tonic-gate if (s < other_last)
676*0Sstevel@tonic-gate s = other_last;
677*0Sstevel@tonic-gate /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
678*0Sstevel@tonic-gate must = do_utf8 ? prog->float_utf8 : prog->float_substr;
679*0Sstevel@tonic-gate /* fbm_instr() takes into account exact value of end-of-str
680*0Sstevel@tonic-gate if the check is SvTAIL(ed). Since false positives are OK,
681*0Sstevel@tonic-gate and end-of-str is not later than strend we are OK. */
682*0Sstevel@tonic-gate if (must == &PL_sv_undef) {
683*0Sstevel@tonic-gate s = (char*)NULL;
684*0Sstevel@tonic-gate DEBUG_r(must = prog->float_utf8); /* for debug message */
685*0Sstevel@tonic-gate }
686*0Sstevel@tonic-gate else
687*0Sstevel@tonic-gate s = fbm_instr((unsigned char*)s,
688*0Sstevel@tonic-gate (unsigned char*)last + SvCUR(must)
689*0Sstevel@tonic-gate - (SvTAIL(must)!=0),
690*0Sstevel@tonic-gate must, PL_multiline ? FBMrf_MULTILINE : 0);
691*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
692*0Sstevel@tonic-gate (s ? "Found" : "Contradicts"),
693*0Sstevel@tonic-gate PL_colors[0],
694*0Sstevel@tonic-gate (int)(SvCUR(must) - (SvTAIL(must)!=0)),
695*0Sstevel@tonic-gate SvPVX(must),
696*0Sstevel@tonic-gate PL_colors[1], (SvTAIL(must) ? "$" : "")));
697*0Sstevel@tonic-gate if (!s) {
698*0Sstevel@tonic-gate if (last1 == last) {
699*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log,
700*0Sstevel@tonic-gate ", giving up...\n"));
701*0Sstevel@tonic-gate goto fail_finish;
702*0Sstevel@tonic-gate }
703*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log,
704*0Sstevel@tonic-gate ", trying anchored starting at offset %ld...\n",
705*0Sstevel@tonic-gate (long)(s1 + 1 - i_strpos)));
706*0Sstevel@tonic-gate other_last = last;
707*0Sstevel@tonic-gate s = HOP3c(t, 1, strend);
708*0Sstevel@tonic-gate goto restart;
709*0Sstevel@tonic-gate }
710*0Sstevel@tonic-gate else {
711*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
712*0Sstevel@tonic-gate (long)(s - i_strpos)));
713*0Sstevel@tonic-gate other_last = s; /* Fix this later. --Hugo */
714*0Sstevel@tonic-gate s = s1;
715*0Sstevel@tonic-gate if (t == strpos)
716*0Sstevel@tonic-gate goto try_at_start;
717*0Sstevel@tonic-gate goto try_at_offset;
718*0Sstevel@tonic-gate }
719*0Sstevel@tonic-gate }
720*0Sstevel@tonic-gate }
721*0Sstevel@tonic-gate
722*0Sstevel@tonic-gate t = s - prog->check_offset_max;
723*0Sstevel@tonic-gate if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
724*0Sstevel@tonic-gate && (!do_utf8
725*0Sstevel@tonic-gate || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
726*0Sstevel@tonic-gate && t > strpos))) {
727*0Sstevel@tonic-gate /* Fixed substring is found far enough so that the match
728*0Sstevel@tonic-gate cannot start at strpos. */
729*0Sstevel@tonic-gate try_at_offset:
730*0Sstevel@tonic-gate if (ml_anch && t[-1] != '\n') {
731*0Sstevel@tonic-gate /* Eventually fbm_*() should handle this, but often
732*0Sstevel@tonic-gate anchored_offset is not 0, so this check will not be wasted. */
733*0Sstevel@tonic-gate /* XXXX In the code below we prefer to look for "^" even in
734*0Sstevel@tonic-gate presence of anchored substrings. And we search even
735*0Sstevel@tonic-gate beyond the found float position. These pessimizations
736*0Sstevel@tonic-gate are historical artefacts only. */
737*0Sstevel@tonic-gate find_anchor:
738*0Sstevel@tonic-gate while (t < strend - prog->minlen) {
739*0Sstevel@tonic-gate if (*t == '\n') {
740*0Sstevel@tonic-gate if (t < check_at - prog->check_offset_min) {
741*0Sstevel@tonic-gate if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
742*0Sstevel@tonic-gate /* Since we moved from the found position,
743*0Sstevel@tonic-gate we definitely contradict the found anchored
744*0Sstevel@tonic-gate substr. Due to the above check we do not
745*0Sstevel@tonic-gate contradict "check" substr.
746*0Sstevel@tonic-gate Thus we can arrive here only if check substr
747*0Sstevel@tonic-gate is float. Redo checking for "other"=="fixed".
748*0Sstevel@tonic-gate */
749*0Sstevel@tonic-gate strpos = t + 1;
750*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
751*0Sstevel@tonic-gate PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
752*0Sstevel@tonic-gate goto do_other_anchored;
753*0Sstevel@tonic-gate }
754*0Sstevel@tonic-gate /* We don't contradict the found floating substring. */
755*0Sstevel@tonic-gate /* XXXX Why not check for STCLASS? */
756*0Sstevel@tonic-gate s = t + 1;
757*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
758*0Sstevel@tonic-gate PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
759*0Sstevel@tonic-gate goto set_useful;
760*0Sstevel@tonic-gate }
761*0Sstevel@tonic-gate /* Position contradicts check-string */
762*0Sstevel@tonic-gate /* XXXX probably better to look for check-string
763*0Sstevel@tonic-gate than for "\n", so one should lower the limit for t? */
764*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
765*0Sstevel@tonic-gate PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
766*0Sstevel@tonic-gate other_last = strpos = s = t + 1;
767*0Sstevel@tonic-gate goto restart;
768*0Sstevel@tonic-gate }
769*0Sstevel@tonic-gate t++;
770*0Sstevel@tonic-gate }
771*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
772*0Sstevel@tonic-gate PL_colors[0],PL_colors[1]));
773*0Sstevel@tonic-gate goto fail_finish;
774*0Sstevel@tonic-gate }
775*0Sstevel@tonic-gate else {
776*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
777*0Sstevel@tonic-gate PL_colors[0],PL_colors[1]));
778*0Sstevel@tonic-gate }
779*0Sstevel@tonic-gate s = t;
780*0Sstevel@tonic-gate set_useful:
781*0Sstevel@tonic-gate ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
782*0Sstevel@tonic-gate }
783*0Sstevel@tonic-gate else {
784*0Sstevel@tonic-gate /* The found string does not prohibit matching at strpos,
785*0Sstevel@tonic-gate - no optimization of calling REx engine can be performed,
786*0Sstevel@tonic-gate unless it was an MBOL and we are not after MBOL,
787*0Sstevel@tonic-gate or a future STCLASS check will fail this. */
788*0Sstevel@tonic-gate try_at_start:
789*0Sstevel@tonic-gate /* Even in this situation we may use MBOL flag if strpos is offset
790*0Sstevel@tonic-gate wrt the start of the string. */
791*0Sstevel@tonic-gate if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
792*0Sstevel@tonic-gate && (strpos != strbeg) && strpos[-1] != '\n'
793*0Sstevel@tonic-gate /* May be due to an implicit anchor of m{.*foo} */
794*0Sstevel@tonic-gate && !(prog->reganch & ROPT_IMPLICIT))
795*0Sstevel@tonic-gate {
796*0Sstevel@tonic-gate t = strpos;
797*0Sstevel@tonic-gate goto find_anchor;
798*0Sstevel@tonic-gate }
799*0Sstevel@tonic-gate DEBUG_r( if (ml_anch)
800*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
801*0Sstevel@tonic-gate (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
802*0Sstevel@tonic-gate );
803*0Sstevel@tonic-gate success_at_start:
804*0Sstevel@tonic-gate if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
805*0Sstevel@tonic-gate && (do_utf8 ? (
806*0Sstevel@tonic-gate prog->check_utf8 /* Could be deleted already */
807*0Sstevel@tonic-gate && --BmUSEFUL(prog->check_utf8) < 0
808*0Sstevel@tonic-gate && (prog->check_utf8 == prog->float_utf8)
809*0Sstevel@tonic-gate ) : (
810*0Sstevel@tonic-gate prog->check_substr /* Could be deleted already */
811*0Sstevel@tonic-gate && --BmUSEFUL(prog->check_substr) < 0
812*0Sstevel@tonic-gate && (prog->check_substr == prog->float_substr)
813*0Sstevel@tonic-gate )))
814*0Sstevel@tonic-gate {
815*0Sstevel@tonic-gate /* If flags & SOMETHING - do not do it many times on the same match */
816*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
817*0Sstevel@tonic-gate SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
818*0Sstevel@tonic-gate if (do_utf8 ? prog->check_substr : prog->check_utf8)
819*0Sstevel@tonic-gate SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
820*0Sstevel@tonic-gate prog->check_substr = prog->check_utf8 = Nullsv; /* disable */
821*0Sstevel@tonic-gate prog->float_substr = prog->float_utf8 = Nullsv; /* clear */
822*0Sstevel@tonic-gate check = Nullsv; /* abort */
823*0Sstevel@tonic-gate s = strpos;
824*0Sstevel@tonic-gate /* XXXX This is a remnant of the old implementation. It
825*0Sstevel@tonic-gate looks wasteful, since now INTUIT can use many
826*0Sstevel@tonic-gate other heuristics. */
827*0Sstevel@tonic-gate prog->reganch &= ~RE_USE_INTUIT;
828*0Sstevel@tonic-gate }
829*0Sstevel@tonic-gate else
830*0Sstevel@tonic-gate s = strpos;
831*0Sstevel@tonic-gate }
832*0Sstevel@tonic-gate
833*0Sstevel@tonic-gate /* Last resort... */
834*0Sstevel@tonic-gate /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
835*0Sstevel@tonic-gate if (prog->regstclass) {
836*0Sstevel@tonic-gate /* minlen == 0 is possible if regstclass is \b or \B,
837*0Sstevel@tonic-gate and the fixed substr is ''$.
838*0Sstevel@tonic-gate Since minlen is already taken into account, s+1 is before strend;
839*0Sstevel@tonic-gate accidentally, minlen >= 1 guaranties no false positives at s + 1
840*0Sstevel@tonic-gate even for \b or \B. But (minlen? 1 : 0) below assumes that
841*0Sstevel@tonic-gate regstclass does not come from lookahead... */
842*0Sstevel@tonic-gate /* If regstclass takes bytelength more than 1: If charlength==1, OK.
843*0Sstevel@tonic-gate This leaves EXACTF only, which is dealt with in find_byclass(). */
844*0Sstevel@tonic-gate U8* str = (U8*)STRING(prog->regstclass);
845*0Sstevel@tonic-gate int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
846*0Sstevel@tonic-gate ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
847*0Sstevel@tonic-gate : 1);
848*0Sstevel@tonic-gate char *endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
849*0Sstevel@tonic-gate ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
850*0Sstevel@tonic-gate : (prog->float_substr || prog->float_utf8
851*0Sstevel@tonic-gate ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
852*0Sstevel@tonic-gate cl_l, strend)
853*0Sstevel@tonic-gate : strend);
854*0Sstevel@tonic-gate char *startpos = strbeg;
855*0Sstevel@tonic-gate
856*0Sstevel@tonic-gate t = s;
857*0Sstevel@tonic-gate cache_re(prog);
858*0Sstevel@tonic-gate s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
859*0Sstevel@tonic-gate if (!s) {
860*0Sstevel@tonic-gate #ifdef DEBUGGING
861*0Sstevel@tonic-gate char *what = 0;
862*0Sstevel@tonic-gate #endif
863*0Sstevel@tonic-gate if (endpos == strend) {
864*0Sstevel@tonic-gate DEBUG_r( PerlIO_printf(Perl_debug_log,
865*0Sstevel@tonic-gate "Could not match STCLASS...\n") );
866*0Sstevel@tonic-gate goto fail;
867*0Sstevel@tonic-gate }
868*0Sstevel@tonic-gate DEBUG_r( PerlIO_printf(Perl_debug_log,
869*0Sstevel@tonic-gate "This position contradicts STCLASS...\n") );
870*0Sstevel@tonic-gate if ((prog->reganch & ROPT_ANCH) && !ml_anch)
871*0Sstevel@tonic-gate goto fail;
872*0Sstevel@tonic-gate /* Contradict one of substrings */
873*0Sstevel@tonic-gate if (prog->anchored_substr || prog->anchored_utf8) {
874*0Sstevel@tonic-gate if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
875*0Sstevel@tonic-gate DEBUG_r( what = "anchored" );
876*0Sstevel@tonic-gate hop_and_restart:
877*0Sstevel@tonic-gate s = HOP3c(t, 1, strend);
878*0Sstevel@tonic-gate if (s + start_shift + end_shift > strend) {
879*0Sstevel@tonic-gate /* XXXX Should be taken into account earlier? */
880*0Sstevel@tonic-gate DEBUG_r( PerlIO_printf(Perl_debug_log,
881*0Sstevel@tonic-gate "Could not match STCLASS...\n") );
882*0Sstevel@tonic-gate goto fail;
883*0Sstevel@tonic-gate }
884*0Sstevel@tonic-gate if (!check)
885*0Sstevel@tonic-gate goto giveup;
886*0Sstevel@tonic-gate DEBUG_r( PerlIO_printf(Perl_debug_log,
887*0Sstevel@tonic-gate "Looking for %s substr starting at offset %ld...\n",
888*0Sstevel@tonic-gate what, (long)(s + start_shift - i_strpos)) );
889*0Sstevel@tonic-gate goto restart;
890*0Sstevel@tonic-gate }
891*0Sstevel@tonic-gate /* Have both, check_string is floating */
892*0Sstevel@tonic-gate if (t + start_shift >= check_at) /* Contradicts floating=check */
893*0Sstevel@tonic-gate goto retry_floating_check;
894*0Sstevel@tonic-gate /* Recheck anchored substring, but not floating... */
895*0Sstevel@tonic-gate s = check_at;
896*0Sstevel@tonic-gate if (!check)
897*0Sstevel@tonic-gate goto giveup;
898*0Sstevel@tonic-gate DEBUG_r( PerlIO_printf(Perl_debug_log,
899*0Sstevel@tonic-gate "Looking for anchored substr starting at offset %ld...\n",
900*0Sstevel@tonic-gate (long)(other_last - i_strpos)) );
901*0Sstevel@tonic-gate goto do_other_anchored;
902*0Sstevel@tonic-gate }
903*0Sstevel@tonic-gate /* Another way we could have checked stclass at the
904*0Sstevel@tonic-gate current position only: */
905*0Sstevel@tonic-gate if (ml_anch) {
906*0Sstevel@tonic-gate s = t = t + 1;
907*0Sstevel@tonic-gate if (!check)
908*0Sstevel@tonic-gate goto giveup;
909*0Sstevel@tonic-gate DEBUG_r( PerlIO_printf(Perl_debug_log,
910*0Sstevel@tonic-gate "Looking for /%s^%s/m starting at offset %ld...\n",
911*0Sstevel@tonic-gate PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
912*0Sstevel@tonic-gate goto try_at_offset;
913*0Sstevel@tonic-gate }
914*0Sstevel@tonic-gate if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
915*0Sstevel@tonic-gate goto fail;
916*0Sstevel@tonic-gate /* Check is floating subtring. */
917*0Sstevel@tonic-gate retry_floating_check:
918*0Sstevel@tonic-gate t = check_at - start_shift;
919*0Sstevel@tonic-gate DEBUG_r( what = "floating" );
920*0Sstevel@tonic-gate goto hop_and_restart;
921*0Sstevel@tonic-gate }
922*0Sstevel@tonic-gate if (t != s) {
923*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log,
924*0Sstevel@tonic-gate "By STCLASS: moving %ld --> %ld\n",
925*0Sstevel@tonic-gate (long)(t - i_strpos), (long)(s - i_strpos))
926*0Sstevel@tonic-gate );
927*0Sstevel@tonic-gate }
928*0Sstevel@tonic-gate else {
929*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log,
930*0Sstevel@tonic-gate "Does not contradict STCLASS...\n");
931*0Sstevel@tonic-gate );
932*0Sstevel@tonic-gate }
933*0Sstevel@tonic-gate }
934*0Sstevel@tonic-gate giveup:
935*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
936*0Sstevel@tonic-gate PL_colors[4], (check ? "Guessed" : "Giving up"),
937*0Sstevel@tonic-gate PL_colors[5], (long)(s - i_strpos)) );
938*0Sstevel@tonic-gate return s;
939*0Sstevel@tonic-gate
940*0Sstevel@tonic-gate fail_finish: /* Substring not found */
941*0Sstevel@tonic-gate if (prog->check_substr || prog->check_utf8) /* could be removed already */
942*0Sstevel@tonic-gate BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
943*0Sstevel@tonic-gate fail:
944*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
945*0Sstevel@tonic-gate PL_colors[4],PL_colors[5]));
946*0Sstevel@tonic-gate return Nullch;
947*0Sstevel@tonic-gate }
948*0Sstevel@tonic-gate
949*0Sstevel@tonic-gate /* We know what class REx starts with. Try to find this position... */
950*0Sstevel@tonic-gate STATIC char *
S_find_byclass(pTHX_ regexp * prog,regnode * c,char * s,char * strend,char * startpos,I32 norun)951*0Sstevel@tonic-gate S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
952*0Sstevel@tonic-gate {
953*0Sstevel@tonic-gate I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
954*0Sstevel@tonic-gate char *m;
955*0Sstevel@tonic-gate STRLEN ln;
956*0Sstevel@tonic-gate STRLEN lnc;
957*0Sstevel@tonic-gate register STRLEN uskip;
958*0Sstevel@tonic-gate unsigned int c1;
959*0Sstevel@tonic-gate unsigned int c2;
960*0Sstevel@tonic-gate char *e;
961*0Sstevel@tonic-gate register I32 tmp = 1; /* Scratch variable? */
962*0Sstevel@tonic-gate register bool do_utf8 = PL_reg_match_utf8;
963*0Sstevel@tonic-gate
964*0Sstevel@tonic-gate /* We know what class it must start with. */
965*0Sstevel@tonic-gate switch (OP(c)) {
966*0Sstevel@tonic-gate case ANYOF:
967*0Sstevel@tonic-gate if (do_utf8) {
968*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) {
969*0Sstevel@tonic-gate if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
970*0Sstevel@tonic-gate !UTF8_IS_INVARIANT((U8)s[0]) ?
971*0Sstevel@tonic-gate reginclass(c, (U8*)s, 0, do_utf8) :
972*0Sstevel@tonic-gate REGINCLASS(c, (U8*)s)) {
973*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
974*0Sstevel@tonic-gate goto got_it;
975*0Sstevel@tonic-gate else
976*0Sstevel@tonic-gate tmp = doevery;
977*0Sstevel@tonic-gate }
978*0Sstevel@tonic-gate else
979*0Sstevel@tonic-gate tmp = 1;
980*0Sstevel@tonic-gate s += uskip;
981*0Sstevel@tonic-gate }
982*0Sstevel@tonic-gate }
983*0Sstevel@tonic-gate else {
984*0Sstevel@tonic-gate while (s < strend) {
985*0Sstevel@tonic-gate STRLEN skip = 1;
986*0Sstevel@tonic-gate
987*0Sstevel@tonic-gate if (REGINCLASS(c, (U8*)s) ||
988*0Sstevel@tonic-gate (ANYOF_FOLD_SHARP_S(c, s, strend) &&
989*0Sstevel@tonic-gate /* The assignment of 2 is intentional:
990*0Sstevel@tonic-gate * for the folded sharp s, the skip is 2. */
991*0Sstevel@tonic-gate (skip = SHARP_S_SKIP))) {
992*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
993*0Sstevel@tonic-gate goto got_it;
994*0Sstevel@tonic-gate else
995*0Sstevel@tonic-gate tmp = doevery;
996*0Sstevel@tonic-gate }
997*0Sstevel@tonic-gate else
998*0Sstevel@tonic-gate tmp = 1;
999*0Sstevel@tonic-gate s += skip;
1000*0Sstevel@tonic-gate }
1001*0Sstevel@tonic-gate }
1002*0Sstevel@tonic-gate break;
1003*0Sstevel@tonic-gate case CANY:
1004*0Sstevel@tonic-gate while (s < strend) {
1005*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1006*0Sstevel@tonic-gate goto got_it;
1007*0Sstevel@tonic-gate else
1008*0Sstevel@tonic-gate tmp = doevery;
1009*0Sstevel@tonic-gate s++;
1010*0Sstevel@tonic-gate }
1011*0Sstevel@tonic-gate break;
1012*0Sstevel@tonic-gate case EXACTF:
1013*0Sstevel@tonic-gate m = STRING(c);
1014*0Sstevel@tonic-gate ln = STR_LEN(c); /* length to match in octets/bytes */
1015*0Sstevel@tonic-gate lnc = (I32) ln; /* length to match in characters */
1016*0Sstevel@tonic-gate if (UTF) {
1017*0Sstevel@tonic-gate STRLEN ulen1, ulen2;
1018*0Sstevel@tonic-gate U8 *sm = (U8 *) m;
1019*0Sstevel@tonic-gate U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
1020*0Sstevel@tonic-gate U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
1021*0Sstevel@tonic-gate
1022*0Sstevel@tonic-gate to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1023*0Sstevel@tonic-gate to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1024*0Sstevel@tonic-gate
1025*0Sstevel@tonic-gate c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN_UCLC,
1026*0Sstevel@tonic-gate 0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1027*0Sstevel@tonic-gate c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN_UCLC,
1028*0Sstevel@tonic-gate 0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1029*0Sstevel@tonic-gate lnc = 0;
1030*0Sstevel@tonic-gate while (sm < ((U8 *) m + ln)) {
1031*0Sstevel@tonic-gate lnc++;
1032*0Sstevel@tonic-gate sm += UTF8SKIP(sm);
1033*0Sstevel@tonic-gate }
1034*0Sstevel@tonic-gate }
1035*0Sstevel@tonic-gate else {
1036*0Sstevel@tonic-gate c1 = *(U8*)m;
1037*0Sstevel@tonic-gate c2 = PL_fold[c1];
1038*0Sstevel@tonic-gate }
1039*0Sstevel@tonic-gate goto do_exactf;
1040*0Sstevel@tonic-gate case EXACTFL:
1041*0Sstevel@tonic-gate m = STRING(c);
1042*0Sstevel@tonic-gate ln = STR_LEN(c);
1043*0Sstevel@tonic-gate lnc = (I32) ln;
1044*0Sstevel@tonic-gate c1 = *(U8*)m;
1045*0Sstevel@tonic-gate c2 = PL_fold_locale[c1];
1046*0Sstevel@tonic-gate do_exactf:
1047*0Sstevel@tonic-gate e = HOP3c(strend, -((I32)lnc), s);
1048*0Sstevel@tonic-gate
1049*0Sstevel@tonic-gate if (norun && e < s)
1050*0Sstevel@tonic-gate e = s; /* Due to minlen logic of intuit() */
1051*0Sstevel@tonic-gate
1052*0Sstevel@tonic-gate /* The idea in the EXACTF* cases is to first find the
1053*0Sstevel@tonic-gate * first character of the EXACTF* node and then, if
1054*0Sstevel@tonic-gate * necessary, case-insensitively compare the full
1055*0Sstevel@tonic-gate * text of the node. The c1 and c2 are the first
1056*0Sstevel@tonic-gate * characters (though in Unicode it gets a bit
1057*0Sstevel@tonic-gate * more complicated because there are more cases
1058*0Sstevel@tonic-gate * than just upper and lower: one needs to use
1059*0Sstevel@tonic-gate * the so-called folding case for case-insensitive
1060*0Sstevel@tonic-gate * matching (called "loose matching" in Unicode).
1061*0Sstevel@tonic-gate * ibcmp_utf8() will do just that. */
1062*0Sstevel@tonic-gate
1063*0Sstevel@tonic-gate if (do_utf8) {
1064*0Sstevel@tonic-gate UV c, f;
1065*0Sstevel@tonic-gate U8 tmpbuf [UTF8_MAXLEN+1];
1066*0Sstevel@tonic-gate U8 foldbuf[UTF8_MAXLEN_FOLD+1];
1067*0Sstevel@tonic-gate STRLEN len, foldlen;
1068*0Sstevel@tonic-gate
1069*0Sstevel@tonic-gate if (c1 == c2) {
1070*0Sstevel@tonic-gate /* Upper and lower of 1st char are equal -
1071*0Sstevel@tonic-gate * probably not a "letter". */
1072*0Sstevel@tonic-gate while (s <= e) {
1073*0Sstevel@tonic-gate c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
1074*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ?
1075*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY);
1076*0Sstevel@tonic-gate if ( c == c1
1077*0Sstevel@tonic-gate && (ln == len ||
1078*0Sstevel@tonic-gate ibcmp_utf8(s, (char **)0, 0, do_utf8,
1079*0Sstevel@tonic-gate m, (char **)0, ln, (bool)UTF))
1080*0Sstevel@tonic-gate && (norun || regtry(prog, s)) )
1081*0Sstevel@tonic-gate goto got_it;
1082*0Sstevel@tonic-gate else {
1083*0Sstevel@tonic-gate uvchr_to_utf8(tmpbuf, c);
1084*0Sstevel@tonic-gate f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1085*0Sstevel@tonic-gate if ( f != c
1086*0Sstevel@tonic-gate && (f == c1 || f == c2)
1087*0Sstevel@tonic-gate && (ln == foldlen ||
1088*0Sstevel@tonic-gate !ibcmp_utf8((char *) foldbuf,
1089*0Sstevel@tonic-gate (char **)0, foldlen, do_utf8,
1090*0Sstevel@tonic-gate m,
1091*0Sstevel@tonic-gate (char **)0, ln, (bool)UTF))
1092*0Sstevel@tonic-gate && (norun || regtry(prog, s)) )
1093*0Sstevel@tonic-gate goto got_it;
1094*0Sstevel@tonic-gate }
1095*0Sstevel@tonic-gate s += len;
1096*0Sstevel@tonic-gate }
1097*0Sstevel@tonic-gate }
1098*0Sstevel@tonic-gate else {
1099*0Sstevel@tonic-gate while (s <= e) {
1100*0Sstevel@tonic-gate c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
1101*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ?
1102*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY);
1103*0Sstevel@tonic-gate
1104*0Sstevel@tonic-gate /* Handle some of the three Greek sigmas cases.
1105*0Sstevel@tonic-gate * Note that not all the possible combinations
1106*0Sstevel@tonic-gate * are handled here: some of them are handled
1107*0Sstevel@tonic-gate * by the standard folding rules, and some of
1108*0Sstevel@tonic-gate * them (the character class or ANYOF cases)
1109*0Sstevel@tonic-gate * are handled during compiletime in
1110*0Sstevel@tonic-gate * regexec.c:S_regclass(). */
1111*0Sstevel@tonic-gate if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1112*0Sstevel@tonic-gate c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1113*0Sstevel@tonic-gate c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1114*0Sstevel@tonic-gate
1115*0Sstevel@tonic-gate if ( (c == c1 || c == c2)
1116*0Sstevel@tonic-gate && (ln == len ||
1117*0Sstevel@tonic-gate ibcmp_utf8(s, (char **)0, 0, do_utf8,
1118*0Sstevel@tonic-gate m, (char **)0, ln, (bool)UTF))
1119*0Sstevel@tonic-gate && (norun || regtry(prog, s)) )
1120*0Sstevel@tonic-gate goto got_it;
1121*0Sstevel@tonic-gate else {
1122*0Sstevel@tonic-gate uvchr_to_utf8(tmpbuf, c);
1123*0Sstevel@tonic-gate f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1124*0Sstevel@tonic-gate if ( f != c
1125*0Sstevel@tonic-gate && (f == c1 || f == c2)
1126*0Sstevel@tonic-gate && (ln == foldlen ||
1127*0Sstevel@tonic-gate !ibcmp_utf8((char *) foldbuf,
1128*0Sstevel@tonic-gate (char **)0, foldlen, do_utf8,
1129*0Sstevel@tonic-gate m,
1130*0Sstevel@tonic-gate (char **)0, ln, (bool)UTF))
1131*0Sstevel@tonic-gate && (norun || regtry(prog, s)) )
1132*0Sstevel@tonic-gate goto got_it;
1133*0Sstevel@tonic-gate }
1134*0Sstevel@tonic-gate s += len;
1135*0Sstevel@tonic-gate }
1136*0Sstevel@tonic-gate }
1137*0Sstevel@tonic-gate }
1138*0Sstevel@tonic-gate else {
1139*0Sstevel@tonic-gate if (c1 == c2)
1140*0Sstevel@tonic-gate while (s <= e) {
1141*0Sstevel@tonic-gate if ( *(U8*)s == c1
1142*0Sstevel@tonic-gate && (ln == 1 || !(OP(c) == EXACTF
1143*0Sstevel@tonic-gate ? ibcmp(s, m, ln)
1144*0Sstevel@tonic-gate : ibcmp_locale(s, m, ln)))
1145*0Sstevel@tonic-gate && (norun || regtry(prog, s)) )
1146*0Sstevel@tonic-gate goto got_it;
1147*0Sstevel@tonic-gate s++;
1148*0Sstevel@tonic-gate }
1149*0Sstevel@tonic-gate else
1150*0Sstevel@tonic-gate while (s <= e) {
1151*0Sstevel@tonic-gate if ( (*(U8*)s == c1 || *(U8*)s == c2)
1152*0Sstevel@tonic-gate && (ln == 1 || !(OP(c) == EXACTF
1153*0Sstevel@tonic-gate ? ibcmp(s, m, ln)
1154*0Sstevel@tonic-gate : ibcmp_locale(s, m, ln)))
1155*0Sstevel@tonic-gate && (norun || regtry(prog, s)) )
1156*0Sstevel@tonic-gate goto got_it;
1157*0Sstevel@tonic-gate s++;
1158*0Sstevel@tonic-gate }
1159*0Sstevel@tonic-gate }
1160*0Sstevel@tonic-gate break;
1161*0Sstevel@tonic-gate case BOUNDL:
1162*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
1163*0Sstevel@tonic-gate /* FALL THROUGH */
1164*0Sstevel@tonic-gate case BOUND:
1165*0Sstevel@tonic-gate if (do_utf8) {
1166*0Sstevel@tonic-gate if (s == PL_bostr)
1167*0Sstevel@tonic-gate tmp = '\n';
1168*0Sstevel@tonic-gate else {
1169*0Sstevel@tonic-gate U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1170*0Sstevel@tonic-gate
1171*0Sstevel@tonic-gate tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1172*0Sstevel@tonic-gate }
1173*0Sstevel@tonic-gate tmp = ((OP(c) == BOUND ?
1174*0Sstevel@tonic-gate isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1175*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a");
1176*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) {
1177*0Sstevel@tonic-gate if (tmp == !(OP(c) == BOUND ?
1178*0Sstevel@tonic-gate swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1179*0Sstevel@tonic-gate isALNUM_LC_utf8((U8*)s)))
1180*0Sstevel@tonic-gate {
1181*0Sstevel@tonic-gate tmp = !tmp;
1182*0Sstevel@tonic-gate if ((norun || regtry(prog, s)))
1183*0Sstevel@tonic-gate goto got_it;
1184*0Sstevel@tonic-gate }
1185*0Sstevel@tonic-gate s += uskip;
1186*0Sstevel@tonic-gate }
1187*0Sstevel@tonic-gate }
1188*0Sstevel@tonic-gate else {
1189*0Sstevel@tonic-gate tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1190*0Sstevel@tonic-gate tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1191*0Sstevel@tonic-gate while (s < strend) {
1192*0Sstevel@tonic-gate if (tmp ==
1193*0Sstevel@tonic-gate !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1194*0Sstevel@tonic-gate tmp = !tmp;
1195*0Sstevel@tonic-gate if ((norun || regtry(prog, s)))
1196*0Sstevel@tonic-gate goto got_it;
1197*0Sstevel@tonic-gate }
1198*0Sstevel@tonic-gate s++;
1199*0Sstevel@tonic-gate }
1200*0Sstevel@tonic-gate }
1201*0Sstevel@tonic-gate if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
1202*0Sstevel@tonic-gate goto got_it;
1203*0Sstevel@tonic-gate break;
1204*0Sstevel@tonic-gate case NBOUNDL:
1205*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
1206*0Sstevel@tonic-gate /* FALL THROUGH */
1207*0Sstevel@tonic-gate case NBOUND:
1208*0Sstevel@tonic-gate if (do_utf8) {
1209*0Sstevel@tonic-gate if (s == PL_bostr)
1210*0Sstevel@tonic-gate tmp = '\n';
1211*0Sstevel@tonic-gate else {
1212*0Sstevel@tonic-gate U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1213*0Sstevel@tonic-gate
1214*0Sstevel@tonic-gate tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1215*0Sstevel@tonic-gate }
1216*0Sstevel@tonic-gate tmp = ((OP(c) == NBOUND ?
1217*0Sstevel@tonic-gate isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1218*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a");
1219*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) {
1220*0Sstevel@tonic-gate if (tmp == !(OP(c) == NBOUND ?
1221*0Sstevel@tonic-gate swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1222*0Sstevel@tonic-gate isALNUM_LC_utf8((U8*)s)))
1223*0Sstevel@tonic-gate tmp = !tmp;
1224*0Sstevel@tonic-gate else if ((norun || regtry(prog, s)))
1225*0Sstevel@tonic-gate goto got_it;
1226*0Sstevel@tonic-gate s += uskip;
1227*0Sstevel@tonic-gate }
1228*0Sstevel@tonic-gate }
1229*0Sstevel@tonic-gate else {
1230*0Sstevel@tonic-gate tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1231*0Sstevel@tonic-gate tmp = ((OP(c) == NBOUND ?
1232*0Sstevel@tonic-gate isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1233*0Sstevel@tonic-gate while (s < strend) {
1234*0Sstevel@tonic-gate if (tmp ==
1235*0Sstevel@tonic-gate !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1236*0Sstevel@tonic-gate tmp = !tmp;
1237*0Sstevel@tonic-gate else if ((norun || regtry(prog, s)))
1238*0Sstevel@tonic-gate goto got_it;
1239*0Sstevel@tonic-gate s++;
1240*0Sstevel@tonic-gate }
1241*0Sstevel@tonic-gate }
1242*0Sstevel@tonic-gate if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1243*0Sstevel@tonic-gate goto got_it;
1244*0Sstevel@tonic-gate break;
1245*0Sstevel@tonic-gate case ALNUM:
1246*0Sstevel@tonic-gate if (do_utf8) {
1247*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a");
1248*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) {
1249*0Sstevel@tonic-gate if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1250*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1251*0Sstevel@tonic-gate goto got_it;
1252*0Sstevel@tonic-gate else
1253*0Sstevel@tonic-gate tmp = doevery;
1254*0Sstevel@tonic-gate }
1255*0Sstevel@tonic-gate else
1256*0Sstevel@tonic-gate tmp = 1;
1257*0Sstevel@tonic-gate s += uskip;
1258*0Sstevel@tonic-gate }
1259*0Sstevel@tonic-gate }
1260*0Sstevel@tonic-gate else {
1261*0Sstevel@tonic-gate while (s < strend) {
1262*0Sstevel@tonic-gate if (isALNUM(*s)) {
1263*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1264*0Sstevel@tonic-gate goto got_it;
1265*0Sstevel@tonic-gate else
1266*0Sstevel@tonic-gate tmp = doevery;
1267*0Sstevel@tonic-gate }
1268*0Sstevel@tonic-gate else
1269*0Sstevel@tonic-gate tmp = 1;
1270*0Sstevel@tonic-gate s++;
1271*0Sstevel@tonic-gate }
1272*0Sstevel@tonic-gate }
1273*0Sstevel@tonic-gate break;
1274*0Sstevel@tonic-gate case ALNUML:
1275*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
1276*0Sstevel@tonic-gate if (do_utf8) {
1277*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) {
1278*0Sstevel@tonic-gate if (isALNUM_LC_utf8((U8*)s)) {
1279*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1280*0Sstevel@tonic-gate goto got_it;
1281*0Sstevel@tonic-gate else
1282*0Sstevel@tonic-gate tmp = doevery;
1283*0Sstevel@tonic-gate }
1284*0Sstevel@tonic-gate else
1285*0Sstevel@tonic-gate tmp = 1;
1286*0Sstevel@tonic-gate s += uskip;
1287*0Sstevel@tonic-gate }
1288*0Sstevel@tonic-gate }
1289*0Sstevel@tonic-gate else {
1290*0Sstevel@tonic-gate while (s < strend) {
1291*0Sstevel@tonic-gate if (isALNUM_LC(*s)) {
1292*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1293*0Sstevel@tonic-gate goto got_it;
1294*0Sstevel@tonic-gate else
1295*0Sstevel@tonic-gate tmp = doevery;
1296*0Sstevel@tonic-gate }
1297*0Sstevel@tonic-gate else
1298*0Sstevel@tonic-gate tmp = 1;
1299*0Sstevel@tonic-gate s++;
1300*0Sstevel@tonic-gate }
1301*0Sstevel@tonic-gate }
1302*0Sstevel@tonic-gate break;
1303*0Sstevel@tonic-gate case NALNUM:
1304*0Sstevel@tonic-gate if (do_utf8) {
1305*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a");
1306*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) {
1307*0Sstevel@tonic-gate if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1308*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1309*0Sstevel@tonic-gate goto got_it;
1310*0Sstevel@tonic-gate else
1311*0Sstevel@tonic-gate tmp = doevery;
1312*0Sstevel@tonic-gate }
1313*0Sstevel@tonic-gate else
1314*0Sstevel@tonic-gate tmp = 1;
1315*0Sstevel@tonic-gate s += uskip;
1316*0Sstevel@tonic-gate }
1317*0Sstevel@tonic-gate }
1318*0Sstevel@tonic-gate else {
1319*0Sstevel@tonic-gate while (s < strend) {
1320*0Sstevel@tonic-gate if (!isALNUM(*s)) {
1321*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1322*0Sstevel@tonic-gate goto got_it;
1323*0Sstevel@tonic-gate else
1324*0Sstevel@tonic-gate tmp = doevery;
1325*0Sstevel@tonic-gate }
1326*0Sstevel@tonic-gate else
1327*0Sstevel@tonic-gate tmp = 1;
1328*0Sstevel@tonic-gate s++;
1329*0Sstevel@tonic-gate }
1330*0Sstevel@tonic-gate }
1331*0Sstevel@tonic-gate break;
1332*0Sstevel@tonic-gate case NALNUML:
1333*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
1334*0Sstevel@tonic-gate if (do_utf8) {
1335*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) {
1336*0Sstevel@tonic-gate if (!isALNUM_LC_utf8((U8*)s)) {
1337*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1338*0Sstevel@tonic-gate goto got_it;
1339*0Sstevel@tonic-gate else
1340*0Sstevel@tonic-gate tmp = doevery;
1341*0Sstevel@tonic-gate }
1342*0Sstevel@tonic-gate else
1343*0Sstevel@tonic-gate tmp = 1;
1344*0Sstevel@tonic-gate s += uskip;
1345*0Sstevel@tonic-gate }
1346*0Sstevel@tonic-gate }
1347*0Sstevel@tonic-gate else {
1348*0Sstevel@tonic-gate while (s < strend) {
1349*0Sstevel@tonic-gate if (!isALNUM_LC(*s)) {
1350*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1351*0Sstevel@tonic-gate goto got_it;
1352*0Sstevel@tonic-gate else
1353*0Sstevel@tonic-gate tmp = doevery;
1354*0Sstevel@tonic-gate }
1355*0Sstevel@tonic-gate else
1356*0Sstevel@tonic-gate tmp = 1;
1357*0Sstevel@tonic-gate s++;
1358*0Sstevel@tonic-gate }
1359*0Sstevel@tonic-gate }
1360*0Sstevel@tonic-gate break;
1361*0Sstevel@tonic-gate case SPACE:
1362*0Sstevel@tonic-gate if (do_utf8) {
1363*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(space," ");
1364*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) {
1365*0Sstevel@tonic-gate if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
1366*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1367*0Sstevel@tonic-gate goto got_it;
1368*0Sstevel@tonic-gate else
1369*0Sstevel@tonic-gate tmp = doevery;
1370*0Sstevel@tonic-gate }
1371*0Sstevel@tonic-gate else
1372*0Sstevel@tonic-gate tmp = 1;
1373*0Sstevel@tonic-gate s += uskip;
1374*0Sstevel@tonic-gate }
1375*0Sstevel@tonic-gate }
1376*0Sstevel@tonic-gate else {
1377*0Sstevel@tonic-gate while (s < strend) {
1378*0Sstevel@tonic-gate if (isSPACE(*s)) {
1379*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1380*0Sstevel@tonic-gate goto got_it;
1381*0Sstevel@tonic-gate else
1382*0Sstevel@tonic-gate tmp = doevery;
1383*0Sstevel@tonic-gate }
1384*0Sstevel@tonic-gate else
1385*0Sstevel@tonic-gate tmp = 1;
1386*0Sstevel@tonic-gate s++;
1387*0Sstevel@tonic-gate }
1388*0Sstevel@tonic-gate }
1389*0Sstevel@tonic-gate break;
1390*0Sstevel@tonic-gate case SPACEL:
1391*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
1392*0Sstevel@tonic-gate if (do_utf8) {
1393*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) {
1394*0Sstevel@tonic-gate if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1395*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1396*0Sstevel@tonic-gate goto got_it;
1397*0Sstevel@tonic-gate else
1398*0Sstevel@tonic-gate tmp = doevery;
1399*0Sstevel@tonic-gate }
1400*0Sstevel@tonic-gate else
1401*0Sstevel@tonic-gate tmp = 1;
1402*0Sstevel@tonic-gate s += uskip;
1403*0Sstevel@tonic-gate }
1404*0Sstevel@tonic-gate }
1405*0Sstevel@tonic-gate else {
1406*0Sstevel@tonic-gate while (s < strend) {
1407*0Sstevel@tonic-gate if (isSPACE_LC(*s)) {
1408*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1409*0Sstevel@tonic-gate goto got_it;
1410*0Sstevel@tonic-gate else
1411*0Sstevel@tonic-gate tmp = doevery;
1412*0Sstevel@tonic-gate }
1413*0Sstevel@tonic-gate else
1414*0Sstevel@tonic-gate tmp = 1;
1415*0Sstevel@tonic-gate s++;
1416*0Sstevel@tonic-gate }
1417*0Sstevel@tonic-gate }
1418*0Sstevel@tonic-gate break;
1419*0Sstevel@tonic-gate case NSPACE:
1420*0Sstevel@tonic-gate if (do_utf8) {
1421*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(space," ");
1422*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) {
1423*0Sstevel@tonic-gate if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
1424*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1425*0Sstevel@tonic-gate goto got_it;
1426*0Sstevel@tonic-gate else
1427*0Sstevel@tonic-gate tmp = doevery;
1428*0Sstevel@tonic-gate }
1429*0Sstevel@tonic-gate else
1430*0Sstevel@tonic-gate tmp = 1;
1431*0Sstevel@tonic-gate s += uskip;
1432*0Sstevel@tonic-gate }
1433*0Sstevel@tonic-gate }
1434*0Sstevel@tonic-gate else {
1435*0Sstevel@tonic-gate while (s < strend) {
1436*0Sstevel@tonic-gate if (!isSPACE(*s)) {
1437*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1438*0Sstevel@tonic-gate goto got_it;
1439*0Sstevel@tonic-gate else
1440*0Sstevel@tonic-gate tmp = doevery;
1441*0Sstevel@tonic-gate }
1442*0Sstevel@tonic-gate else
1443*0Sstevel@tonic-gate tmp = 1;
1444*0Sstevel@tonic-gate s++;
1445*0Sstevel@tonic-gate }
1446*0Sstevel@tonic-gate }
1447*0Sstevel@tonic-gate break;
1448*0Sstevel@tonic-gate case NSPACEL:
1449*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
1450*0Sstevel@tonic-gate if (do_utf8) {
1451*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) {
1452*0Sstevel@tonic-gate if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1453*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1454*0Sstevel@tonic-gate goto got_it;
1455*0Sstevel@tonic-gate else
1456*0Sstevel@tonic-gate tmp = doevery;
1457*0Sstevel@tonic-gate }
1458*0Sstevel@tonic-gate else
1459*0Sstevel@tonic-gate tmp = 1;
1460*0Sstevel@tonic-gate s += uskip;
1461*0Sstevel@tonic-gate }
1462*0Sstevel@tonic-gate }
1463*0Sstevel@tonic-gate else {
1464*0Sstevel@tonic-gate while (s < strend) {
1465*0Sstevel@tonic-gate if (!isSPACE_LC(*s)) {
1466*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1467*0Sstevel@tonic-gate goto got_it;
1468*0Sstevel@tonic-gate else
1469*0Sstevel@tonic-gate tmp = doevery;
1470*0Sstevel@tonic-gate }
1471*0Sstevel@tonic-gate else
1472*0Sstevel@tonic-gate tmp = 1;
1473*0Sstevel@tonic-gate s++;
1474*0Sstevel@tonic-gate }
1475*0Sstevel@tonic-gate }
1476*0Sstevel@tonic-gate break;
1477*0Sstevel@tonic-gate case DIGIT:
1478*0Sstevel@tonic-gate if (do_utf8) {
1479*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(digit,"0");
1480*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) {
1481*0Sstevel@tonic-gate if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1482*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1483*0Sstevel@tonic-gate goto got_it;
1484*0Sstevel@tonic-gate else
1485*0Sstevel@tonic-gate tmp = doevery;
1486*0Sstevel@tonic-gate }
1487*0Sstevel@tonic-gate else
1488*0Sstevel@tonic-gate tmp = 1;
1489*0Sstevel@tonic-gate s += uskip;
1490*0Sstevel@tonic-gate }
1491*0Sstevel@tonic-gate }
1492*0Sstevel@tonic-gate else {
1493*0Sstevel@tonic-gate while (s < strend) {
1494*0Sstevel@tonic-gate if (isDIGIT(*s)) {
1495*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1496*0Sstevel@tonic-gate goto got_it;
1497*0Sstevel@tonic-gate else
1498*0Sstevel@tonic-gate tmp = doevery;
1499*0Sstevel@tonic-gate }
1500*0Sstevel@tonic-gate else
1501*0Sstevel@tonic-gate tmp = 1;
1502*0Sstevel@tonic-gate s++;
1503*0Sstevel@tonic-gate }
1504*0Sstevel@tonic-gate }
1505*0Sstevel@tonic-gate break;
1506*0Sstevel@tonic-gate case DIGITL:
1507*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
1508*0Sstevel@tonic-gate if (do_utf8) {
1509*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) {
1510*0Sstevel@tonic-gate if (isDIGIT_LC_utf8((U8*)s)) {
1511*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1512*0Sstevel@tonic-gate goto got_it;
1513*0Sstevel@tonic-gate else
1514*0Sstevel@tonic-gate tmp = doevery;
1515*0Sstevel@tonic-gate }
1516*0Sstevel@tonic-gate else
1517*0Sstevel@tonic-gate tmp = 1;
1518*0Sstevel@tonic-gate s += uskip;
1519*0Sstevel@tonic-gate }
1520*0Sstevel@tonic-gate }
1521*0Sstevel@tonic-gate else {
1522*0Sstevel@tonic-gate while (s < strend) {
1523*0Sstevel@tonic-gate if (isDIGIT_LC(*s)) {
1524*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1525*0Sstevel@tonic-gate goto got_it;
1526*0Sstevel@tonic-gate else
1527*0Sstevel@tonic-gate tmp = doevery;
1528*0Sstevel@tonic-gate }
1529*0Sstevel@tonic-gate else
1530*0Sstevel@tonic-gate tmp = 1;
1531*0Sstevel@tonic-gate s++;
1532*0Sstevel@tonic-gate }
1533*0Sstevel@tonic-gate }
1534*0Sstevel@tonic-gate break;
1535*0Sstevel@tonic-gate case NDIGIT:
1536*0Sstevel@tonic-gate if (do_utf8) {
1537*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(digit,"0");
1538*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) {
1539*0Sstevel@tonic-gate if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1540*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1541*0Sstevel@tonic-gate goto got_it;
1542*0Sstevel@tonic-gate else
1543*0Sstevel@tonic-gate tmp = doevery;
1544*0Sstevel@tonic-gate }
1545*0Sstevel@tonic-gate else
1546*0Sstevel@tonic-gate tmp = 1;
1547*0Sstevel@tonic-gate s += uskip;
1548*0Sstevel@tonic-gate }
1549*0Sstevel@tonic-gate }
1550*0Sstevel@tonic-gate else {
1551*0Sstevel@tonic-gate while (s < strend) {
1552*0Sstevel@tonic-gate if (!isDIGIT(*s)) {
1553*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1554*0Sstevel@tonic-gate goto got_it;
1555*0Sstevel@tonic-gate else
1556*0Sstevel@tonic-gate tmp = doevery;
1557*0Sstevel@tonic-gate }
1558*0Sstevel@tonic-gate else
1559*0Sstevel@tonic-gate tmp = 1;
1560*0Sstevel@tonic-gate s++;
1561*0Sstevel@tonic-gate }
1562*0Sstevel@tonic-gate }
1563*0Sstevel@tonic-gate break;
1564*0Sstevel@tonic-gate case NDIGITL:
1565*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
1566*0Sstevel@tonic-gate if (do_utf8) {
1567*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) {
1568*0Sstevel@tonic-gate if (!isDIGIT_LC_utf8((U8*)s)) {
1569*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1570*0Sstevel@tonic-gate goto got_it;
1571*0Sstevel@tonic-gate else
1572*0Sstevel@tonic-gate tmp = doevery;
1573*0Sstevel@tonic-gate }
1574*0Sstevel@tonic-gate else
1575*0Sstevel@tonic-gate tmp = 1;
1576*0Sstevel@tonic-gate s += uskip;
1577*0Sstevel@tonic-gate }
1578*0Sstevel@tonic-gate }
1579*0Sstevel@tonic-gate else {
1580*0Sstevel@tonic-gate while (s < strend) {
1581*0Sstevel@tonic-gate if (!isDIGIT_LC(*s)) {
1582*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s)))
1583*0Sstevel@tonic-gate goto got_it;
1584*0Sstevel@tonic-gate else
1585*0Sstevel@tonic-gate tmp = doevery;
1586*0Sstevel@tonic-gate }
1587*0Sstevel@tonic-gate else
1588*0Sstevel@tonic-gate tmp = 1;
1589*0Sstevel@tonic-gate s++;
1590*0Sstevel@tonic-gate }
1591*0Sstevel@tonic-gate }
1592*0Sstevel@tonic-gate break;
1593*0Sstevel@tonic-gate default:
1594*0Sstevel@tonic-gate Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1595*0Sstevel@tonic-gate break;
1596*0Sstevel@tonic-gate }
1597*0Sstevel@tonic-gate return 0;
1598*0Sstevel@tonic-gate got_it:
1599*0Sstevel@tonic-gate return s;
1600*0Sstevel@tonic-gate }
1601*0Sstevel@tonic-gate
1602*0Sstevel@tonic-gate /*
1603*0Sstevel@tonic-gate - regexec_flags - match a regexp against a string
1604*0Sstevel@tonic-gate */
1605*0Sstevel@tonic-gate I32
Perl_regexec_flags(pTHX_ register regexp * prog,char * stringarg,register char * strend,char * strbeg,I32 minend,SV * sv,void * data,U32 flags)1606*0Sstevel@tonic-gate Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1607*0Sstevel@tonic-gate char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1608*0Sstevel@tonic-gate /* strend: pointer to null at end of string */
1609*0Sstevel@tonic-gate /* strbeg: real beginning of string */
1610*0Sstevel@tonic-gate /* minend: end of match must be >=minend after stringarg. */
1611*0Sstevel@tonic-gate /* data: May be used for some additional optimizations. */
1612*0Sstevel@tonic-gate /* nosave: For optimizations. */
1613*0Sstevel@tonic-gate {
1614*0Sstevel@tonic-gate register char *s;
1615*0Sstevel@tonic-gate register regnode *c;
1616*0Sstevel@tonic-gate register char *startpos = stringarg;
1617*0Sstevel@tonic-gate I32 minlen; /* must match at least this many chars */
1618*0Sstevel@tonic-gate I32 dontbother = 0; /* how many characters not to try at end */
1619*0Sstevel@tonic-gate /* I32 start_shift = 0; */ /* Offset of the start to find
1620*0Sstevel@tonic-gate constant substr. */ /* CC */
1621*0Sstevel@tonic-gate I32 end_shift = 0; /* Same for the end. */ /* CC */
1622*0Sstevel@tonic-gate I32 scream_pos = -1; /* Internal iterator of scream. */
1623*0Sstevel@tonic-gate char *scream_olds;
1624*0Sstevel@tonic-gate SV* oreplsv = GvSV(PL_replgv);
1625*0Sstevel@tonic-gate bool do_utf8 = DO_UTF8(sv);
1626*0Sstevel@tonic-gate #ifdef DEBUGGING
1627*0Sstevel@tonic-gate SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1628*0Sstevel@tonic-gate SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
1629*0Sstevel@tonic-gate #endif
1630*0Sstevel@tonic-gate RX_MATCH_UTF8_set(prog,do_utf8);
1631*0Sstevel@tonic-gate
1632*0Sstevel@tonic-gate PL_regcc = 0;
1633*0Sstevel@tonic-gate
1634*0Sstevel@tonic-gate cache_re(prog);
1635*0Sstevel@tonic-gate #ifdef DEBUGGING
1636*0Sstevel@tonic-gate PL_regnarrate = DEBUG_r_TEST;
1637*0Sstevel@tonic-gate #endif
1638*0Sstevel@tonic-gate
1639*0Sstevel@tonic-gate /* Be paranoid... */
1640*0Sstevel@tonic-gate if (prog == NULL || startpos == NULL) {
1641*0Sstevel@tonic-gate Perl_croak(aTHX_ "NULL regexp parameter");
1642*0Sstevel@tonic-gate return 0;
1643*0Sstevel@tonic-gate }
1644*0Sstevel@tonic-gate
1645*0Sstevel@tonic-gate minlen = prog->minlen;
1646*0Sstevel@tonic-gate if (strend - startpos < minlen) {
1647*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log,
1648*0Sstevel@tonic-gate "String too short [regexec_flags]...\n"));
1649*0Sstevel@tonic-gate goto phooey;
1650*0Sstevel@tonic-gate }
1651*0Sstevel@tonic-gate
1652*0Sstevel@tonic-gate /* Check validity of program. */
1653*0Sstevel@tonic-gate if (UCHARAT(prog->program) != REG_MAGIC) {
1654*0Sstevel@tonic-gate Perl_croak(aTHX_ "corrupted regexp program");
1655*0Sstevel@tonic-gate }
1656*0Sstevel@tonic-gate
1657*0Sstevel@tonic-gate PL_reg_flags = 0;
1658*0Sstevel@tonic-gate PL_reg_eval_set = 0;
1659*0Sstevel@tonic-gate PL_reg_maxiter = 0;
1660*0Sstevel@tonic-gate
1661*0Sstevel@tonic-gate if (prog->reganch & ROPT_UTF8)
1662*0Sstevel@tonic-gate PL_reg_flags |= RF_utf8;
1663*0Sstevel@tonic-gate
1664*0Sstevel@tonic-gate /* Mark beginning of line for ^ and lookbehind. */
1665*0Sstevel@tonic-gate PL_regbol = startpos;
1666*0Sstevel@tonic-gate PL_bostr = strbeg;
1667*0Sstevel@tonic-gate PL_reg_sv = sv;
1668*0Sstevel@tonic-gate
1669*0Sstevel@tonic-gate /* Mark end of line for $ (and such) */
1670*0Sstevel@tonic-gate PL_regeol = strend;
1671*0Sstevel@tonic-gate
1672*0Sstevel@tonic-gate /* see how far we have to get to not match where we matched before */
1673*0Sstevel@tonic-gate PL_regtill = startpos+minend;
1674*0Sstevel@tonic-gate
1675*0Sstevel@tonic-gate /* We start without call_cc context. */
1676*0Sstevel@tonic-gate PL_reg_call_cc = 0;
1677*0Sstevel@tonic-gate
1678*0Sstevel@tonic-gate /* If there is a "must appear" string, look for it. */
1679*0Sstevel@tonic-gate s = startpos;
1680*0Sstevel@tonic-gate
1681*0Sstevel@tonic-gate if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1682*0Sstevel@tonic-gate MAGIC *mg;
1683*0Sstevel@tonic-gate
1684*0Sstevel@tonic-gate if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1685*0Sstevel@tonic-gate PL_reg_ganch = startpos;
1686*0Sstevel@tonic-gate else if (sv && SvTYPE(sv) >= SVt_PVMG
1687*0Sstevel@tonic-gate && SvMAGIC(sv)
1688*0Sstevel@tonic-gate && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1689*0Sstevel@tonic-gate && mg->mg_len >= 0) {
1690*0Sstevel@tonic-gate PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1691*0Sstevel@tonic-gate if (prog->reganch & ROPT_ANCH_GPOS) {
1692*0Sstevel@tonic-gate if (s > PL_reg_ganch)
1693*0Sstevel@tonic-gate goto phooey;
1694*0Sstevel@tonic-gate s = PL_reg_ganch;
1695*0Sstevel@tonic-gate }
1696*0Sstevel@tonic-gate }
1697*0Sstevel@tonic-gate else /* pos() not defined */
1698*0Sstevel@tonic-gate PL_reg_ganch = strbeg;
1699*0Sstevel@tonic-gate }
1700*0Sstevel@tonic-gate
1701*0Sstevel@tonic-gate if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
1702*0Sstevel@tonic-gate re_scream_pos_data d;
1703*0Sstevel@tonic-gate
1704*0Sstevel@tonic-gate d.scream_olds = &scream_olds;
1705*0Sstevel@tonic-gate d.scream_pos = &scream_pos;
1706*0Sstevel@tonic-gate s = re_intuit_start(prog, sv, s, strend, flags, &d);
1707*0Sstevel@tonic-gate if (!s) {
1708*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1709*0Sstevel@tonic-gate goto phooey; /* not present */
1710*0Sstevel@tonic-gate }
1711*0Sstevel@tonic-gate }
1712*0Sstevel@tonic-gate
1713*0Sstevel@tonic-gate DEBUG_r({
1714*0Sstevel@tonic-gate char *s0 = UTF ?
1715*0Sstevel@tonic-gate pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1716*0Sstevel@tonic-gate UNI_DISPLAY_REGEX) :
1717*0Sstevel@tonic-gate prog->precomp;
1718*0Sstevel@tonic-gate int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1719*0Sstevel@tonic-gate char *s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
1720*0Sstevel@tonic-gate UNI_DISPLAY_REGEX) : startpos;
1721*0Sstevel@tonic-gate int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
1722*0Sstevel@tonic-gate if (!PL_colorset)
1723*0Sstevel@tonic-gate reginitcolors();
1724*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
1725*0Sstevel@tonic-gate "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
1726*0Sstevel@tonic-gate PL_colors[4],PL_colors[5],PL_colors[0],
1727*0Sstevel@tonic-gate len0, len0, s0,
1728*0Sstevel@tonic-gate PL_colors[1],
1729*0Sstevel@tonic-gate len0 > 60 ? "..." : "",
1730*0Sstevel@tonic-gate PL_colors[0],
1731*0Sstevel@tonic-gate (int)(len1 > 60 ? 60 : len1),
1732*0Sstevel@tonic-gate s1, PL_colors[1],
1733*0Sstevel@tonic-gate (len1 > 60 ? "..." : "")
1734*0Sstevel@tonic-gate );
1735*0Sstevel@tonic-gate });
1736*0Sstevel@tonic-gate
1737*0Sstevel@tonic-gate /* Simplest case: anchored match need be tried only once. */
1738*0Sstevel@tonic-gate /* [unless only anchor is BOL and multiline is set] */
1739*0Sstevel@tonic-gate if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1740*0Sstevel@tonic-gate if (s == startpos && regtry(prog, startpos))
1741*0Sstevel@tonic-gate goto got_it;
1742*0Sstevel@tonic-gate else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1743*0Sstevel@tonic-gate || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1744*0Sstevel@tonic-gate {
1745*0Sstevel@tonic-gate char *end;
1746*0Sstevel@tonic-gate
1747*0Sstevel@tonic-gate if (minlen)
1748*0Sstevel@tonic-gate dontbother = minlen - 1;
1749*0Sstevel@tonic-gate end = HOP3c(strend, -dontbother, strbeg) - 1;
1750*0Sstevel@tonic-gate /* for multiline we only have to try after newlines */
1751*0Sstevel@tonic-gate if (prog->check_substr || prog->check_utf8) {
1752*0Sstevel@tonic-gate if (s == startpos)
1753*0Sstevel@tonic-gate goto after_try;
1754*0Sstevel@tonic-gate while (1) {
1755*0Sstevel@tonic-gate if (regtry(prog, s))
1756*0Sstevel@tonic-gate goto got_it;
1757*0Sstevel@tonic-gate after_try:
1758*0Sstevel@tonic-gate if (s >= end)
1759*0Sstevel@tonic-gate goto phooey;
1760*0Sstevel@tonic-gate if (prog->reganch & RE_USE_INTUIT) {
1761*0Sstevel@tonic-gate s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1762*0Sstevel@tonic-gate if (!s)
1763*0Sstevel@tonic-gate goto phooey;
1764*0Sstevel@tonic-gate }
1765*0Sstevel@tonic-gate else
1766*0Sstevel@tonic-gate s++;
1767*0Sstevel@tonic-gate }
1768*0Sstevel@tonic-gate } else {
1769*0Sstevel@tonic-gate if (s > startpos)
1770*0Sstevel@tonic-gate s--;
1771*0Sstevel@tonic-gate while (s < end) {
1772*0Sstevel@tonic-gate if (*s++ == '\n') { /* don't need PL_utf8skip here */
1773*0Sstevel@tonic-gate if (regtry(prog, s))
1774*0Sstevel@tonic-gate goto got_it;
1775*0Sstevel@tonic-gate }
1776*0Sstevel@tonic-gate }
1777*0Sstevel@tonic-gate }
1778*0Sstevel@tonic-gate }
1779*0Sstevel@tonic-gate goto phooey;
1780*0Sstevel@tonic-gate } else if (prog->reganch & ROPT_ANCH_GPOS) {
1781*0Sstevel@tonic-gate if (regtry(prog, PL_reg_ganch))
1782*0Sstevel@tonic-gate goto got_it;
1783*0Sstevel@tonic-gate goto phooey;
1784*0Sstevel@tonic-gate }
1785*0Sstevel@tonic-gate
1786*0Sstevel@tonic-gate /* Messy cases: unanchored match. */
1787*0Sstevel@tonic-gate if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
1788*0Sstevel@tonic-gate /* we have /x+whatever/ */
1789*0Sstevel@tonic-gate /* it must be a one character string (XXXX Except UTF?) */
1790*0Sstevel@tonic-gate char ch;
1791*0Sstevel@tonic-gate #ifdef DEBUGGING
1792*0Sstevel@tonic-gate int did_match = 0;
1793*0Sstevel@tonic-gate #endif
1794*0Sstevel@tonic-gate if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1795*0Sstevel@tonic-gate do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1796*0Sstevel@tonic-gate ch = SvPVX(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1797*0Sstevel@tonic-gate
1798*0Sstevel@tonic-gate if (do_utf8) {
1799*0Sstevel@tonic-gate while (s < strend) {
1800*0Sstevel@tonic-gate if (*s == ch) {
1801*0Sstevel@tonic-gate DEBUG_r( did_match = 1 );
1802*0Sstevel@tonic-gate if (regtry(prog, s)) goto got_it;
1803*0Sstevel@tonic-gate s += UTF8SKIP(s);
1804*0Sstevel@tonic-gate while (s < strend && *s == ch)
1805*0Sstevel@tonic-gate s += UTF8SKIP(s);
1806*0Sstevel@tonic-gate }
1807*0Sstevel@tonic-gate s += UTF8SKIP(s);
1808*0Sstevel@tonic-gate }
1809*0Sstevel@tonic-gate }
1810*0Sstevel@tonic-gate else {
1811*0Sstevel@tonic-gate while (s < strend) {
1812*0Sstevel@tonic-gate if (*s == ch) {
1813*0Sstevel@tonic-gate DEBUG_r( did_match = 1 );
1814*0Sstevel@tonic-gate if (regtry(prog, s)) goto got_it;
1815*0Sstevel@tonic-gate s++;
1816*0Sstevel@tonic-gate while (s < strend && *s == ch)
1817*0Sstevel@tonic-gate s++;
1818*0Sstevel@tonic-gate }
1819*0Sstevel@tonic-gate s++;
1820*0Sstevel@tonic-gate }
1821*0Sstevel@tonic-gate }
1822*0Sstevel@tonic-gate DEBUG_r(if (!did_match)
1823*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
1824*0Sstevel@tonic-gate "Did not find anchored character...\n")
1825*0Sstevel@tonic-gate );
1826*0Sstevel@tonic-gate }
1827*0Sstevel@tonic-gate /*SUPPRESS 560*/
1828*0Sstevel@tonic-gate else if (prog->anchored_substr != Nullsv
1829*0Sstevel@tonic-gate || prog->anchored_utf8 != Nullsv
1830*0Sstevel@tonic-gate || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
1831*0Sstevel@tonic-gate && prog->float_max_offset < strend - s)) {
1832*0Sstevel@tonic-gate SV *must;
1833*0Sstevel@tonic-gate I32 back_max;
1834*0Sstevel@tonic-gate I32 back_min;
1835*0Sstevel@tonic-gate char *last;
1836*0Sstevel@tonic-gate char *last1; /* Last position checked before */
1837*0Sstevel@tonic-gate #ifdef DEBUGGING
1838*0Sstevel@tonic-gate int did_match = 0;
1839*0Sstevel@tonic-gate #endif
1840*0Sstevel@tonic-gate if (prog->anchored_substr || prog->anchored_utf8) {
1841*0Sstevel@tonic-gate if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1842*0Sstevel@tonic-gate do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1843*0Sstevel@tonic-gate must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1844*0Sstevel@tonic-gate back_max = back_min = prog->anchored_offset;
1845*0Sstevel@tonic-gate } else {
1846*0Sstevel@tonic-gate if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1847*0Sstevel@tonic-gate do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1848*0Sstevel@tonic-gate must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1849*0Sstevel@tonic-gate back_max = prog->float_max_offset;
1850*0Sstevel@tonic-gate back_min = prog->float_min_offset;
1851*0Sstevel@tonic-gate }
1852*0Sstevel@tonic-gate if (must == &PL_sv_undef)
1853*0Sstevel@tonic-gate /* could not downgrade utf8 check substring, so must fail */
1854*0Sstevel@tonic-gate goto phooey;
1855*0Sstevel@tonic-gate
1856*0Sstevel@tonic-gate last = HOP3c(strend, /* Cannot start after this */
1857*0Sstevel@tonic-gate -(I32)(CHR_SVLEN(must)
1858*0Sstevel@tonic-gate - (SvTAIL(must) != 0) + back_min), strbeg);
1859*0Sstevel@tonic-gate
1860*0Sstevel@tonic-gate if (s > PL_bostr)
1861*0Sstevel@tonic-gate last1 = HOPc(s, -1);
1862*0Sstevel@tonic-gate else
1863*0Sstevel@tonic-gate last1 = s - 1; /* bogus */
1864*0Sstevel@tonic-gate
1865*0Sstevel@tonic-gate /* XXXX check_substr already used to find `s', can optimize if
1866*0Sstevel@tonic-gate check_substr==must. */
1867*0Sstevel@tonic-gate scream_pos = -1;
1868*0Sstevel@tonic-gate dontbother = end_shift;
1869*0Sstevel@tonic-gate strend = HOPc(strend, -dontbother);
1870*0Sstevel@tonic-gate while ( (s <= last) &&
1871*0Sstevel@tonic-gate ((flags & REXEC_SCREAM)
1872*0Sstevel@tonic-gate ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
1873*0Sstevel@tonic-gate end_shift, &scream_pos, 0))
1874*0Sstevel@tonic-gate : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
1875*0Sstevel@tonic-gate (unsigned char*)strend, must,
1876*0Sstevel@tonic-gate PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1877*0Sstevel@tonic-gate /* we may be pointing at the wrong string */
1878*0Sstevel@tonic-gate if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1879*0Sstevel@tonic-gate s = strbeg + (s - SvPVX(sv));
1880*0Sstevel@tonic-gate DEBUG_r( did_match = 1 );
1881*0Sstevel@tonic-gate if (HOPc(s, -back_max) > last1) {
1882*0Sstevel@tonic-gate last1 = HOPc(s, -back_min);
1883*0Sstevel@tonic-gate s = HOPc(s, -back_max);
1884*0Sstevel@tonic-gate }
1885*0Sstevel@tonic-gate else {
1886*0Sstevel@tonic-gate char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1887*0Sstevel@tonic-gate
1888*0Sstevel@tonic-gate last1 = HOPc(s, -back_min);
1889*0Sstevel@tonic-gate s = t;
1890*0Sstevel@tonic-gate }
1891*0Sstevel@tonic-gate if (do_utf8) {
1892*0Sstevel@tonic-gate while (s <= last1) {
1893*0Sstevel@tonic-gate if (regtry(prog, s))
1894*0Sstevel@tonic-gate goto got_it;
1895*0Sstevel@tonic-gate s += UTF8SKIP(s);
1896*0Sstevel@tonic-gate }
1897*0Sstevel@tonic-gate }
1898*0Sstevel@tonic-gate else {
1899*0Sstevel@tonic-gate while (s <= last1) {
1900*0Sstevel@tonic-gate if (regtry(prog, s))
1901*0Sstevel@tonic-gate goto got_it;
1902*0Sstevel@tonic-gate s++;
1903*0Sstevel@tonic-gate }
1904*0Sstevel@tonic-gate }
1905*0Sstevel@tonic-gate }
1906*0Sstevel@tonic-gate DEBUG_r(if (!did_match)
1907*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
1908*0Sstevel@tonic-gate "Did not find %s substr `%s%.*s%s'%s...\n",
1909*0Sstevel@tonic-gate ((must == prog->anchored_substr || must == prog->anchored_utf8)
1910*0Sstevel@tonic-gate ? "anchored" : "floating"),
1911*0Sstevel@tonic-gate PL_colors[0],
1912*0Sstevel@tonic-gate (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1913*0Sstevel@tonic-gate SvPVX(must),
1914*0Sstevel@tonic-gate PL_colors[1], (SvTAIL(must) ? "$" : ""))
1915*0Sstevel@tonic-gate );
1916*0Sstevel@tonic-gate goto phooey;
1917*0Sstevel@tonic-gate }
1918*0Sstevel@tonic-gate else if ((c = prog->regstclass)) {
1919*0Sstevel@tonic-gate if (minlen) {
1920*0Sstevel@tonic-gate I32 op = (U8)OP(prog->regstclass);
1921*0Sstevel@tonic-gate /* don't bother with what can't match */
1922*0Sstevel@tonic-gate if (PL_regkind[op] != EXACT && op != CANY)
1923*0Sstevel@tonic-gate strend = HOPc(strend, -(minlen - 1));
1924*0Sstevel@tonic-gate }
1925*0Sstevel@tonic-gate DEBUG_r({
1926*0Sstevel@tonic-gate SV *prop = sv_newmortal();
1927*0Sstevel@tonic-gate char *s0;
1928*0Sstevel@tonic-gate char *s1;
1929*0Sstevel@tonic-gate int len0;
1930*0Sstevel@tonic-gate int len1;
1931*0Sstevel@tonic-gate
1932*0Sstevel@tonic-gate regprop(prop, c);
1933*0Sstevel@tonic-gate s0 = UTF ?
1934*0Sstevel@tonic-gate pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
1935*0Sstevel@tonic-gate UNI_DISPLAY_REGEX) :
1936*0Sstevel@tonic-gate SvPVX(prop);
1937*0Sstevel@tonic-gate len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1938*0Sstevel@tonic-gate s1 = UTF ?
1939*0Sstevel@tonic-gate sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
1940*0Sstevel@tonic-gate len1 = UTF ? SvCUR(dsv1) : strend - s;
1941*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
1942*0Sstevel@tonic-gate "Matching stclass `%*.*s' against `%*.*s'\n",
1943*0Sstevel@tonic-gate len0, len0, s0,
1944*0Sstevel@tonic-gate len1, len1, s1);
1945*0Sstevel@tonic-gate });
1946*0Sstevel@tonic-gate if (find_byclass(prog, c, s, strend, startpos, 0))
1947*0Sstevel@tonic-gate goto got_it;
1948*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1949*0Sstevel@tonic-gate }
1950*0Sstevel@tonic-gate else {
1951*0Sstevel@tonic-gate dontbother = 0;
1952*0Sstevel@tonic-gate if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
1953*0Sstevel@tonic-gate /* Trim the end. */
1954*0Sstevel@tonic-gate char *last;
1955*0Sstevel@tonic-gate SV* float_real;
1956*0Sstevel@tonic-gate
1957*0Sstevel@tonic-gate if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1958*0Sstevel@tonic-gate do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1959*0Sstevel@tonic-gate float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
1960*0Sstevel@tonic-gate
1961*0Sstevel@tonic-gate if (flags & REXEC_SCREAM) {
1962*0Sstevel@tonic-gate last = screaminstr(sv, float_real, s - strbeg,
1963*0Sstevel@tonic-gate end_shift, &scream_pos, 1); /* last one */
1964*0Sstevel@tonic-gate if (!last)
1965*0Sstevel@tonic-gate last = scream_olds; /* Only one occurrence. */
1966*0Sstevel@tonic-gate /* we may be pointing at the wrong string */
1967*0Sstevel@tonic-gate else if (RX_MATCH_COPIED(prog))
1968*0Sstevel@tonic-gate s = strbeg + (s - SvPVX(sv));
1969*0Sstevel@tonic-gate }
1970*0Sstevel@tonic-gate else {
1971*0Sstevel@tonic-gate STRLEN len;
1972*0Sstevel@tonic-gate char *little = SvPV(float_real, len);
1973*0Sstevel@tonic-gate
1974*0Sstevel@tonic-gate if (SvTAIL(float_real)) {
1975*0Sstevel@tonic-gate if (memEQ(strend - len + 1, little, len - 1))
1976*0Sstevel@tonic-gate last = strend - len + 1;
1977*0Sstevel@tonic-gate else if (!PL_multiline)
1978*0Sstevel@tonic-gate last = memEQ(strend - len, little, len)
1979*0Sstevel@tonic-gate ? strend - len : Nullch;
1980*0Sstevel@tonic-gate else
1981*0Sstevel@tonic-gate goto find_last;
1982*0Sstevel@tonic-gate } else {
1983*0Sstevel@tonic-gate find_last:
1984*0Sstevel@tonic-gate if (len)
1985*0Sstevel@tonic-gate last = rninstr(s, strend, little, little + len);
1986*0Sstevel@tonic-gate else
1987*0Sstevel@tonic-gate last = strend; /* matching `$' */
1988*0Sstevel@tonic-gate }
1989*0Sstevel@tonic-gate }
1990*0Sstevel@tonic-gate if (last == NULL) {
1991*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log,
1992*0Sstevel@tonic-gate "%sCan't trim the tail, match fails (should not happen)%s\n",
1993*0Sstevel@tonic-gate PL_colors[4],PL_colors[5]));
1994*0Sstevel@tonic-gate goto phooey; /* Should not happen! */
1995*0Sstevel@tonic-gate }
1996*0Sstevel@tonic-gate dontbother = strend - last + prog->float_min_offset;
1997*0Sstevel@tonic-gate }
1998*0Sstevel@tonic-gate if (minlen && (dontbother < minlen))
1999*0Sstevel@tonic-gate dontbother = minlen - 1;
2000*0Sstevel@tonic-gate strend -= dontbother; /* this one's always in bytes! */
2001*0Sstevel@tonic-gate /* We don't know much -- general case. */
2002*0Sstevel@tonic-gate if (do_utf8) {
2003*0Sstevel@tonic-gate for (;;) {
2004*0Sstevel@tonic-gate if (regtry(prog, s))
2005*0Sstevel@tonic-gate goto got_it;
2006*0Sstevel@tonic-gate if (s >= strend)
2007*0Sstevel@tonic-gate break;
2008*0Sstevel@tonic-gate s += UTF8SKIP(s);
2009*0Sstevel@tonic-gate };
2010*0Sstevel@tonic-gate }
2011*0Sstevel@tonic-gate else {
2012*0Sstevel@tonic-gate do {
2013*0Sstevel@tonic-gate if (regtry(prog, s))
2014*0Sstevel@tonic-gate goto got_it;
2015*0Sstevel@tonic-gate } while (s++ < strend);
2016*0Sstevel@tonic-gate }
2017*0Sstevel@tonic-gate }
2018*0Sstevel@tonic-gate
2019*0Sstevel@tonic-gate /* Failure. */
2020*0Sstevel@tonic-gate goto phooey;
2021*0Sstevel@tonic-gate
2022*0Sstevel@tonic-gate got_it:
2023*0Sstevel@tonic-gate RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2024*0Sstevel@tonic-gate
2025*0Sstevel@tonic-gate if (PL_reg_eval_set) {
2026*0Sstevel@tonic-gate /* Preserve the current value of $^R */
2027*0Sstevel@tonic-gate if (oreplsv != GvSV(PL_replgv))
2028*0Sstevel@tonic-gate sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2029*0Sstevel@tonic-gate restored, the value remains
2030*0Sstevel@tonic-gate the same. */
2031*0Sstevel@tonic-gate restore_pos(aTHX_ 0);
2032*0Sstevel@tonic-gate }
2033*0Sstevel@tonic-gate
2034*0Sstevel@tonic-gate /* make sure $`, $&, $', and $digit will work later */
2035*0Sstevel@tonic-gate if ( !(flags & REXEC_NOT_FIRST) ) {
2036*0Sstevel@tonic-gate if (RX_MATCH_COPIED(prog)) {
2037*0Sstevel@tonic-gate Safefree(prog->subbeg);
2038*0Sstevel@tonic-gate RX_MATCH_COPIED_off(prog);
2039*0Sstevel@tonic-gate }
2040*0Sstevel@tonic-gate if (flags & REXEC_COPY_STR) {
2041*0Sstevel@tonic-gate I32 i = PL_regeol - startpos + (stringarg - strbeg);
2042*0Sstevel@tonic-gate
2043*0Sstevel@tonic-gate s = savepvn(strbeg, i);
2044*0Sstevel@tonic-gate prog->subbeg = s;
2045*0Sstevel@tonic-gate prog->sublen = i;
2046*0Sstevel@tonic-gate RX_MATCH_COPIED_on(prog);
2047*0Sstevel@tonic-gate }
2048*0Sstevel@tonic-gate else {
2049*0Sstevel@tonic-gate prog->subbeg = strbeg;
2050*0Sstevel@tonic-gate prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2051*0Sstevel@tonic-gate }
2052*0Sstevel@tonic-gate }
2053*0Sstevel@tonic-gate
2054*0Sstevel@tonic-gate return 1;
2055*0Sstevel@tonic-gate
2056*0Sstevel@tonic-gate phooey:
2057*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2058*0Sstevel@tonic-gate PL_colors[4],PL_colors[5]));
2059*0Sstevel@tonic-gate if (PL_reg_eval_set)
2060*0Sstevel@tonic-gate restore_pos(aTHX_ 0);
2061*0Sstevel@tonic-gate return 0;
2062*0Sstevel@tonic-gate }
2063*0Sstevel@tonic-gate
2064*0Sstevel@tonic-gate /*
2065*0Sstevel@tonic-gate - regtry - try match at specific point
2066*0Sstevel@tonic-gate */
2067*0Sstevel@tonic-gate STATIC I32 /* 0 failure, 1 success */
S_regtry(pTHX_ regexp * prog,char * startpos)2068*0Sstevel@tonic-gate S_regtry(pTHX_ regexp *prog, char *startpos)
2069*0Sstevel@tonic-gate {
2070*0Sstevel@tonic-gate register I32 i;
2071*0Sstevel@tonic-gate register I32 *sp;
2072*0Sstevel@tonic-gate register I32 *ep;
2073*0Sstevel@tonic-gate CHECKPOINT lastcp;
2074*0Sstevel@tonic-gate
2075*0Sstevel@tonic-gate #ifdef DEBUGGING
2076*0Sstevel@tonic-gate PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2077*0Sstevel@tonic-gate #endif
2078*0Sstevel@tonic-gate if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2079*0Sstevel@tonic-gate MAGIC *mg;
2080*0Sstevel@tonic-gate
2081*0Sstevel@tonic-gate PL_reg_eval_set = RS_init;
2082*0Sstevel@tonic-gate DEBUG_r(DEBUG_s(
2083*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2084*0Sstevel@tonic-gate (IV)(PL_stack_sp - PL_stack_base));
2085*0Sstevel@tonic-gate ));
2086*0Sstevel@tonic-gate SAVEI32(cxstack[cxstack_ix].blk_oldsp);
2087*0Sstevel@tonic-gate cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2088*0Sstevel@tonic-gate /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2089*0Sstevel@tonic-gate SAVETMPS;
2090*0Sstevel@tonic-gate /* Apparently this is not needed, judging by wantarray. */
2091*0Sstevel@tonic-gate /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2092*0Sstevel@tonic-gate cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2093*0Sstevel@tonic-gate
2094*0Sstevel@tonic-gate if (PL_reg_sv) {
2095*0Sstevel@tonic-gate /* Make $_ available to executed code. */
2096*0Sstevel@tonic-gate if (PL_reg_sv != DEFSV) {
2097*0Sstevel@tonic-gate /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
2098*0Sstevel@tonic-gate SAVESPTR(DEFSV);
2099*0Sstevel@tonic-gate DEFSV = PL_reg_sv;
2100*0Sstevel@tonic-gate }
2101*0Sstevel@tonic-gate
2102*0Sstevel@tonic-gate if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
2103*0Sstevel@tonic-gate && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
2104*0Sstevel@tonic-gate /* prepare for quick setting of pos */
2105*0Sstevel@tonic-gate sv_magic(PL_reg_sv, (SV*)0,
2106*0Sstevel@tonic-gate PERL_MAGIC_regex_global, Nullch, 0);
2107*0Sstevel@tonic-gate mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
2108*0Sstevel@tonic-gate mg->mg_len = -1;
2109*0Sstevel@tonic-gate }
2110*0Sstevel@tonic-gate PL_reg_magic = mg;
2111*0Sstevel@tonic-gate PL_reg_oldpos = mg->mg_len;
2112*0Sstevel@tonic-gate SAVEDESTRUCTOR_X(restore_pos, 0);
2113*0Sstevel@tonic-gate }
2114*0Sstevel@tonic-gate if (!PL_reg_curpm) {
2115*0Sstevel@tonic-gate Newz(22,PL_reg_curpm, 1, PMOP);
2116*0Sstevel@tonic-gate #ifdef USE_ITHREADS
2117*0Sstevel@tonic-gate {
2118*0Sstevel@tonic-gate SV* repointer = newSViv(0);
2119*0Sstevel@tonic-gate /* so we know which PL_regex_padav element is PL_reg_curpm */
2120*0Sstevel@tonic-gate SvFLAGS(repointer) |= SVf_BREAK;
2121*0Sstevel@tonic-gate av_push(PL_regex_padav,repointer);
2122*0Sstevel@tonic-gate PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2123*0Sstevel@tonic-gate PL_regex_pad = AvARRAY(PL_regex_padav);
2124*0Sstevel@tonic-gate }
2125*0Sstevel@tonic-gate #endif
2126*0Sstevel@tonic-gate }
2127*0Sstevel@tonic-gate PM_SETRE(PL_reg_curpm, prog);
2128*0Sstevel@tonic-gate PL_reg_oldcurpm = PL_curpm;
2129*0Sstevel@tonic-gate PL_curpm = PL_reg_curpm;
2130*0Sstevel@tonic-gate if (RX_MATCH_COPIED(prog)) {
2131*0Sstevel@tonic-gate /* Here is a serious problem: we cannot rewrite subbeg,
2132*0Sstevel@tonic-gate since it may be needed if this match fails. Thus
2133*0Sstevel@tonic-gate $` inside (?{}) could fail... */
2134*0Sstevel@tonic-gate PL_reg_oldsaved = prog->subbeg;
2135*0Sstevel@tonic-gate PL_reg_oldsavedlen = prog->sublen;
2136*0Sstevel@tonic-gate RX_MATCH_COPIED_off(prog);
2137*0Sstevel@tonic-gate }
2138*0Sstevel@tonic-gate else
2139*0Sstevel@tonic-gate PL_reg_oldsaved = Nullch;
2140*0Sstevel@tonic-gate prog->subbeg = PL_bostr;
2141*0Sstevel@tonic-gate prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2142*0Sstevel@tonic-gate }
2143*0Sstevel@tonic-gate prog->startp[0] = startpos - PL_bostr;
2144*0Sstevel@tonic-gate PL_reginput = startpos;
2145*0Sstevel@tonic-gate PL_regstartp = prog->startp;
2146*0Sstevel@tonic-gate PL_regendp = prog->endp;
2147*0Sstevel@tonic-gate PL_reglastparen = &prog->lastparen;
2148*0Sstevel@tonic-gate PL_reglastcloseparen = &prog->lastcloseparen;
2149*0Sstevel@tonic-gate prog->lastparen = 0;
2150*0Sstevel@tonic-gate prog->lastcloseparen = 0;
2151*0Sstevel@tonic-gate PL_regsize = 0;
2152*0Sstevel@tonic-gate DEBUG_r(PL_reg_starttry = startpos);
2153*0Sstevel@tonic-gate if (PL_reg_start_tmpl <= prog->nparens) {
2154*0Sstevel@tonic-gate PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2155*0Sstevel@tonic-gate if(PL_reg_start_tmp)
2156*0Sstevel@tonic-gate Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2157*0Sstevel@tonic-gate else
2158*0Sstevel@tonic-gate New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2159*0Sstevel@tonic-gate }
2160*0Sstevel@tonic-gate
2161*0Sstevel@tonic-gate /* XXXX What this code is doing here?!!! There should be no need
2162*0Sstevel@tonic-gate to do this again and again, PL_reglastparen should take care of
2163*0Sstevel@tonic-gate this! --ilya*/
2164*0Sstevel@tonic-gate
2165*0Sstevel@tonic-gate /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2166*0Sstevel@tonic-gate * Actually, the code in regcppop() (which Ilya may be meaning by
2167*0Sstevel@tonic-gate * PL_reglastparen), is not needed at all by the test suite
2168*0Sstevel@tonic-gate * (op/regexp, op/pat, op/split), but that code is needed, oddly
2169*0Sstevel@tonic-gate * enough, for building DynaLoader, or otherwise this
2170*0Sstevel@tonic-gate * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2171*0Sstevel@tonic-gate * will happen. Meanwhile, this code *is* needed for the
2172*0Sstevel@tonic-gate * above-mentioned test suite tests to succeed. The common theme
2173*0Sstevel@tonic-gate * on those tests seems to be returning null fields from matches.
2174*0Sstevel@tonic-gate * --jhi */
2175*0Sstevel@tonic-gate #if 1
2176*0Sstevel@tonic-gate sp = prog->startp;
2177*0Sstevel@tonic-gate ep = prog->endp;
2178*0Sstevel@tonic-gate if (prog->nparens) {
2179*0Sstevel@tonic-gate for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2180*0Sstevel@tonic-gate *++sp = -1;
2181*0Sstevel@tonic-gate *++ep = -1;
2182*0Sstevel@tonic-gate }
2183*0Sstevel@tonic-gate }
2184*0Sstevel@tonic-gate #endif
2185*0Sstevel@tonic-gate REGCP_SET(lastcp);
2186*0Sstevel@tonic-gate if (regmatch(prog->program + 1)) {
2187*0Sstevel@tonic-gate prog->endp[0] = PL_reginput - PL_bostr;
2188*0Sstevel@tonic-gate return 1;
2189*0Sstevel@tonic-gate }
2190*0Sstevel@tonic-gate REGCP_UNWIND(lastcp);
2191*0Sstevel@tonic-gate return 0;
2192*0Sstevel@tonic-gate }
2193*0Sstevel@tonic-gate
2194*0Sstevel@tonic-gate #define RE_UNWIND_BRANCH 1
2195*0Sstevel@tonic-gate #define RE_UNWIND_BRANCHJ 2
2196*0Sstevel@tonic-gate
2197*0Sstevel@tonic-gate union re_unwind_t;
2198*0Sstevel@tonic-gate
2199*0Sstevel@tonic-gate typedef struct { /* XX: makes sense to enlarge it... */
2200*0Sstevel@tonic-gate I32 type;
2201*0Sstevel@tonic-gate I32 prev;
2202*0Sstevel@tonic-gate CHECKPOINT lastcp;
2203*0Sstevel@tonic-gate } re_unwind_generic_t;
2204*0Sstevel@tonic-gate
2205*0Sstevel@tonic-gate typedef struct {
2206*0Sstevel@tonic-gate I32 type;
2207*0Sstevel@tonic-gate I32 prev;
2208*0Sstevel@tonic-gate CHECKPOINT lastcp;
2209*0Sstevel@tonic-gate I32 lastparen;
2210*0Sstevel@tonic-gate regnode *next;
2211*0Sstevel@tonic-gate char *locinput;
2212*0Sstevel@tonic-gate I32 nextchr;
2213*0Sstevel@tonic-gate #ifdef DEBUGGING
2214*0Sstevel@tonic-gate int regindent;
2215*0Sstevel@tonic-gate #endif
2216*0Sstevel@tonic-gate } re_unwind_branch_t;
2217*0Sstevel@tonic-gate
2218*0Sstevel@tonic-gate typedef union re_unwind_t {
2219*0Sstevel@tonic-gate I32 type;
2220*0Sstevel@tonic-gate re_unwind_generic_t generic;
2221*0Sstevel@tonic-gate re_unwind_branch_t branch;
2222*0Sstevel@tonic-gate } re_unwind_t;
2223*0Sstevel@tonic-gate
2224*0Sstevel@tonic-gate #define sayYES goto yes
2225*0Sstevel@tonic-gate #define sayNO goto no
2226*0Sstevel@tonic-gate #define sayNO_ANYOF goto no_anyof
2227*0Sstevel@tonic-gate #define sayYES_FINAL goto yes_final
2228*0Sstevel@tonic-gate #define sayYES_LOUD goto yes_loud
2229*0Sstevel@tonic-gate #define sayNO_FINAL goto no_final
2230*0Sstevel@tonic-gate #define sayNO_SILENT goto do_no
2231*0Sstevel@tonic-gate #define saySAME(x) if (x) goto yes; else goto no
2232*0Sstevel@tonic-gate
2233*0Sstevel@tonic-gate #define REPORT_CODE_OFF 24
2234*0Sstevel@tonic-gate
2235*0Sstevel@tonic-gate /*
2236*0Sstevel@tonic-gate - regmatch - main matching routine
2237*0Sstevel@tonic-gate *
2238*0Sstevel@tonic-gate * Conceptually the strategy is simple: check to see whether the current
2239*0Sstevel@tonic-gate * node matches, call self recursively to see whether the rest matches,
2240*0Sstevel@tonic-gate * and then act accordingly. In practice we make some effort to avoid
2241*0Sstevel@tonic-gate * recursion, in particular by going through "ordinary" nodes (that don't
2242*0Sstevel@tonic-gate * need to know whether the rest of the match failed) by a loop instead of
2243*0Sstevel@tonic-gate * by recursion.
2244*0Sstevel@tonic-gate */
2245*0Sstevel@tonic-gate /* [lwall] I've hoisted the register declarations to the outer block in order to
2246*0Sstevel@tonic-gate * maybe save a little bit of pushing and popping on the stack. It also takes
2247*0Sstevel@tonic-gate * advantage of machines that use a register save mask on subroutine entry.
2248*0Sstevel@tonic-gate */
2249*0Sstevel@tonic-gate STATIC I32 /* 0 failure, 1 success */
S_regmatch(pTHX_ regnode * prog)2250*0Sstevel@tonic-gate S_regmatch(pTHX_ regnode *prog)
2251*0Sstevel@tonic-gate {
2252*0Sstevel@tonic-gate register regnode *scan; /* Current node. */
2253*0Sstevel@tonic-gate regnode *next; /* Next node. */
2254*0Sstevel@tonic-gate regnode *inner; /* Next node in internal branch. */
2255*0Sstevel@tonic-gate register I32 nextchr; /* renamed nextchr - nextchar colides with
2256*0Sstevel@tonic-gate function of same name */
2257*0Sstevel@tonic-gate register I32 n; /* no or next */
2258*0Sstevel@tonic-gate register I32 ln = 0; /* len or last */
2259*0Sstevel@tonic-gate register char *s = Nullch; /* operand or save */
2260*0Sstevel@tonic-gate register char *locinput = PL_reginput;
2261*0Sstevel@tonic-gate register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
2262*0Sstevel@tonic-gate int minmod = 0, sw = 0, logical = 0;
2263*0Sstevel@tonic-gate I32 unwind = 0;
2264*0Sstevel@tonic-gate #if 0
2265*0Sstevel@tonic-gate I32 firstcp = PL_savestack_ix;
2266*0Sstevel@tonic-gate #endif
2267*0Sstevel@tonic-gate register bool do_utf8 = PL_reg_match_utf8;
2268*0Sstevel@tonic-gate #ifdef DEBUGGING
2269*0Sstevel@tonic-gate SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2270*0Sstevel@tonic-gate SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2271*0Sstevel@tonic-gate SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2272*0Sstevel@tonic-gate #endif
2273*0Sstevel@tonic-gate
2274*0Sstevel@tonic-gate #ifdef DEBUGGING
2275*0Sstevel@tonic-gate PL_regindent++;
2276*0Sstevel@tonic-gate #endif
2277*0Sstevel@tonic-gate
2278*0Sstevel@tonic-gate /* Note that nextchr is a byte even in UTF */
2279*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2280*0Sstevel@tonic-gate scan = prog;
2281*0Sstevel@tonic-gate while (scan != NULL) {
2282*0Sstevel@tonic-gate
2283*0Sstevel@tonic-gate DEBUG_r( {
2284*0Sstevel@tonic-gate SV *prop = sv_newmortal();
2285*0Sstevel@tonic-gate int docolor = *PL_colors[0];
2286*0Sstevel@tonic-gate int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2287*0Sstevel@tonic-gate int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
2288*0Sstevel@tonic-gate /* The part of the string before starttry has one color
2289*0Sstevel@tonic-gate (pref0_len chars), between starttry and current
2290*0Sstevel@tonic-gate position another one (pref_len - pref0_len chars),
2291*0Sstevel@tonic-gate after the current position the third one.
2292*0Sstevel@tonic-gate We assume that pref0_len <= pref_len, otherwise we
2293*0Sstevel@tonic-gate decrease pref0_len. */
2294*0Sstevel@tonic-gate int pref_len = (locinput - PL_bostr) > (5 + taill) - l
2295*0Sstevel@tonic-gate ? (5 + taill) - l : locinput - PL_bostr;
2296*0Sstevel@tonic-gate int pref0_len;
2297*0Sstevel@tonic-gate
2298*0Sstevel@tonic-gate while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2299*0Sstevel@tonic-gate pref_len++;
2300*0Sstevel@tonic-gate pref0_len = pref_len - (locinput - PL_reg_starttry);
2301*0Sstevel@tonic-gate if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2302*0Sstevel@tonic-gate l = ( PL_regeol - locinput > (5 + taill) - pref_len
2303*0Sstevel@tonic-gate ? (5 + taill) - pref_len : PL_regeol - locinput);
2304*0Sstevel@tonic-gate while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2305*0Sstevel@tonic-gate l--;
2306*0Sstevel@tonic-gate if (pref0_len < 0)
2307*0Sstevel@tonic-gate pref0_len = 0;
2308*0Sstevel@tonic-gate if (pref0_len > pref_len)
2309*0Sstevel@tonic-gate pref0_len = pref_len;
2310*0Sstevel@tonic-gate regprop(prop, scan);
2311*0Sstevel@tonic-gate {
2312*0Sstevel@tonic-gate char *s0 =
2313*0Sstevel@tonic-gate do_utf8 && OP(scan) != CANY ?
2314*0Sstevel@tonic-gate pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2315*0Sstevel@tonic-gate pref0_len, 60, UNI_DISPLAY_REGEX) :
2316*0Sstevel@tonic-gate locinput - pref_len;
2317*0Sstevel@tonic-gate int len0 = do_utf8 ? strlen(s0) : pref0_len;
2318*0Sstevel@tonic-gate char *s1 = do_utf8 && OP(scan) != CANY ?
2319*0Sstevel@tonic-gate pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2320*0Sstevel@tonic-gate pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2321*0Sstevel@tonic-gate locinput - pref_len + pref0_len;
2322*0Sstevel@tonic-gate int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2323*0Sstevel@tonic-gate char *s2 = do_utf8 && OP(scan) != CANY ?
2324*0Sstevel@tonic-gate pv_uni_display(dsv2, (U8*)locinput,
2325*0Sstevel@tonic-gate PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2326*0Sstevel@tonic-gate locinput;
2327*0Sstevel@tonic-gate int len2 = do_utf8 ? strlen(s2) : l;
2328*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
2329*0Sstevel@tonic-gate "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2330*0Sstevel@tonic-gate (IV)(locinput - PL_bostr),
2331*0Sstevel@tonic-gate PL_colors[4],
2332*0Sstevel@tonic-gate len0, s0,
2333*0Sstevel@tonic-gate PL_colors[5],
2334*0Sstevel@tonic-gate PL_colors[2],
2335*0Sstevel@tonic-gate len1, s1,
2336*0Sstevel@tonic-gate PL_colors[3],
2337*0Sstevel@tonic-gate (docolor ? "" : "> <"),
2338*0Sstevel@tonic-gate PL_colors[0],
2339*0Sstevel@tonic-gate len2, s2,
2340*0Sstevel@tonic-gate PL_colors[1],
2341*0Sstevel@tonic-gate 15 - l - pref_len + 1,
2342*0Sstevel@tonic-gate "",
2343*0Sstevel@tonic-gate (IV)(scan - PL_regprogram), PL_regindent*2, "",
2344*0Sstevel@tonic-gate SvPVX(prop));
2345*0Sstevel@tonic-gate }
2346*0Sstevel@tonic-gate });
2347*0Sstevel@tonic-gate
2348*0Sstevel@tonic-gate next = scan + NEXT_OFF(scan);
2349*0Sstevel@tonic-gate if (next == scan)
2350*0Sstevel@tonic-gate next = NULL;
2351*0Sstevel@tonic-gate
2352*0Sstevel@tonic-gate switch (OP(scan)) {
2353*0Sstevel@tonic-gate case BOL:
2354*0Sstevel@tonic-gate if (locinput == PL_bostr || (PL_multiline &&
2355*0Sstevel@tonic-gate (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2356*0Sstevel@tonic-gate {
2357*0Sstevel@tonic-gate /* regtill = regbol; */
2358*0Sstevel@tonic-gate break;
2359*0Sstevel@tonic-gate }
2360*0Sstevel@tonic-gate sayNO;
2361*0Sstevel@tonic-gate case MBOL:
2362*0Sstevel@tonic-gate if (locinput == PL_bostr ||
2363*0Sstevel@tonic-gate ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2364*0Sstevel@tonic-gate {
2365*0Sstevel@tonic-gate break;
2366*0Sstevel@tonic-gate }
2367*0Sstevel@tonic-gate sayNO;
2368*0Sstevel@tonic-gate case SBOL:
2369*0Sstevel@tonic-gate if (locinput == PL_bostr)
2370*0Sstevel@tonic-gate break;
2371*0Sstevel@tonic-gate sayNO;
2372*0Sstevel@tonic-gate case GPOS:
2373*0Sstevel@tonic-gate if (locinput == PL_reg_ganch)
2374*0Sstevel@tonic-gate break;
2375*0Sstevel@tonic-gate sayNO;
2376*0Sstevel@tonic-gate case EOL:
2377*0Sstevel@tonic-gate if (PL_multiline)
2378*0Sstevel@tonic-gate goto meol;
2379*0Sstevel@tonic-gate else
2380*0Sstevel@tonic-gate goto seol;
2381*0Sstevel@tonic-gate case MEOL:
2382*0Sstevel@tonic-gate meol:
2383*0Sstevel@tonic-gate if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2384*0Sstevel@tonic-gate sayNO;
2385*0Sstevel@tonic-gate break;
2386*0Sstevel@tonic-gate case SEOL:
2387*0Sstevel@tonic-gate seol:
2388*0Sstevel@tonic-gate if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2389*0Sstevel@tonic-gate sayNO;
2390*0Sstevel@tonic-gate if (PL_regeol - locinput > 1)
2391*0Sstevel@tonic-gate sayNO;
2392*0Sstevel@tonic-gate break;
2393*0Sstevel@tonic-gate case EOS:
2394*0Sstevel@tonic-gate if (PL_regeol != locinput)
2395*0Sstevel@tonic-gate sayNO;
2396*0Sstevel@tonic-gate break;
2397*0Sstevel@tonic-gate case SANY:
2398*0Sstevel@tonic-gate if (!nextchr && locinput >= PL_regeol)
2399*0Sstevel@tonic-gate sayNO;
2400*0Sstevel@tonic-gate if (do_utf8) {
2401*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr];
2402*0Sstevel@tonic-gate if (locinput > PL_regeol)
2403*0Sstevel@tonic-gate sayNO;
2404*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2405*0Sstevel@tonic-gate }
2406*0Sstevel@tonic-gate else
2407*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput);
2408*0Sstevel@tonic-gate break;
2409*0Sstevel@tonic-gate case CANY:
2410*0Sstevel@tonic-gate if (!nextchr && locinput >= PL_regeol)
2411*0Sstevel@tonic-gate sayNO;
2412*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput);
2413*0Sstevel@tonic-gate break;
2414*0Sstevel@tonic-gate case REG_ANY:
2415*0Sstevel@tonic-gate if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2416*0Sstevel@tonic-gate sayNO;
2417*0Sstevel@tonic-gate if (do_utf8) {
2418*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr];
2419*0Sstevel@tonic-gate if (locinput > PL_regeol)
2420*0Sstevel@tonic-gate sayNO;
2421*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2422*0Sstevel@tonic-gate }
2423*0Sstevel@tonic-gate else
2424*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput);
2425*0Sstevel@tonic-gate break;
2426*0Sstevel@tonic-gate case EXACT:
2427*0Sstevel@tonic-gate s = STRING(scan);
2428*0Sstevel@tonic-gate ln = STR_LEN(scan);
2429*0Sstevel@tonic-gate if (do_utf8 != UTF) {
2430*0Sstevel@tonic-gate /* The target and the pattern have differing utf8ness. */
2431*0Sstevel@tonic-gate char *l = locinput;
2432*0Sstevel@tonic-gate char *e = s + ln;
2433*0Sstevel@tonic-gate STRLEN ulen;
2434*0Sstevel@tonic-gate
2435*0Sstevel@tonic-gate if (do_utf8) {
2436*0Sstevel@tonic-gate /* The target is utf8, the pattern is not utf8. */
2437*0Sstevel@tonic-gate while (s < e) {
2438*0Sstevel@tonic-gate if (l >= PL_regeol)
2439*0Sstevel@tonic-gate sayNO;
2440*0Sstevel@tonic-gate if (NATIVE_TO_UNI(*(U8*)s) !=
2441*0Sstevel@tonic-gate utf8n_to_uvuni((U8*)l, UTF8_MAXLEN, &ulen,
2442*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ?
2443*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY))
2444*0Sstevel@tonic-gate sayNO;
2445*0Sstevel@tonic-gate l += ulen;
2446*0Sstevel@tonic-gate s ++;
2447*0Sstevel@tonic-gate }
2448*0Sstevel@tonic-gate }
2449*0Sstevel@tonic-gate else {
2450*0Sstevel@tonic-gate /* The target is not utf8, the pattern is utf8. */
2451*0Sstevel@tonic-gate while (s < e) {
2452*0Sstevel@tonic-gate if (l >= PL_regeol)
2453*0Sstevel@tonic-gate sayNO;
2454*0Sstevel@tonic-gate if (NATIVE_TO_UNI(*((U8*)l)) !=
2455*0Sstevel@tonic-gate utf8n_to_uvuni((U8*)s, UTF8_MAXLEN, &ulen,
2456*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ?
2457*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY))
2458*0Sstevel@tonic-gate sayNO;
2459*0Sstevel@tonic-gate s += ulen;
2460*0Sstevel@tonic-gate l ++;
2461*0Sstevel@tonic-gate }
2462*0Sstevel@tonic-gate }
2463*0Sstevel@tonic-gate locinput = l;
2464*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2465*0Sstevel@tonic-gate break;
2466*0Sstevel@tonic-gate }
2467*0Sstevel@tonic-gate /* The target and the pattern have the same utf8ness. */
2468*0Sstevel@tonic-gate /* Inline the first character, for speed. */
2469*0Sstevel@tonic-gate if (UCHARAT(s) != nextchr)
2470*0Sstevel@tonic-gate sayNO;
2471*0Sstevel@tonic-gate if (PL_regeol - locinput < ln)
2472*0Sstevel@tonic-gate sayNO;
2473*0Sstevel@tonic-gate if (ln > 1 && memNE(s, locinput, ln))
2474*0Sstevel@tonic-gate sayNO;
2475*0Sstevel@tonic-gate locinput += ln;
2476*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2477*0Sstevel@tonic-gate break;
2478*0Sstevel@tonic-gate case EXACTFL:
2479*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
2480*0Sstevel@tonic-gate /* FALL THROUGH */
2481*0Sstevel@tonic-gate case EXACTF:
2482*0Sstevel@tonic-gate s = STRING(scan);
2483*0Sstevel@tonic-gate ln = STR_LEN(scan);
2484*0Sstevel@tonic-gate
2485*0Sstevel@tonic-gate if (do_utf8 || UTF) {
2486*0Sstevel@tonic-gate /* Either target or the pattern are utf8. */
2487*0Sstevel@tonic-gate char *l = locinput;
2488*0Sstevel@tonic-gate char *e = PL_regeol;
2489*0Sstevel@tonic-gate
2490*0Sstevel@tonic-gate if (ibcmp_utf8(s, 0, ln, (bool)UTF,
2491*0Sstevel@tonic-gate l, &e, 0, do_utf8)) {
2492*0Sstevel@tonic-gate /* One more case for the sharp s:
2493*0Sstevel@tonic-gate * pack("U0U*", 0xDF) =~ /ss/i,
2494*0Sstevel@tonic-gate * the 0xC3 0x9F are the UTF-8
2495*0Sstevel@tonic-gate * byte sequence for the U+00DF. */
2496*0Sstevel@tonic-gate if (!(do_utf8 &&
2497*0Sstevel@tonic-gate toLOWER(s[0]) == 's' &&
2498*0Sstevel@tonic-gate ln >= 2 &&
2499*0Sstevel@tonic-gate toLOWER(s[1]) == 's' &&
2500*0Sstevel@tonic-gate (U8)l[0] == 0xC3 &&
2501*0Sstevel@tonic-gate e - l >= 2 &&
2502*0Sstevel@tonic-gate (U8)l[1] == 0x9F))
2503*0Sstevel@tonic-gate sayNO;
2504*0Sstevel@tonic-gate }
2505*0Sstevel@tonic-gate locinput = e;
2506*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2507*0Sstevel@tonic-gate break;
2508*0Sstevel@tonic-gate }
2509*0Sstevel@tonic-gate
2510*0Sstevel@tonic-gate /* Neither the target and the pattern are utf8. */
2511*0Sstevel@tonic-gate
2512*0Sstevel@tonic-gate /* Inline the first character, for speed. */
2513*0Sstevel@tonic-gate if (UCHARAT(s) != nextchr &&
2514*0Sstevel@tonic-gate UCHARAT(s) != ((OP(scan) == EXACTF)
2515*0Sstevel@tonic-gate ? PL_fold : PL_fold_locale)[nextchr])
2516*0Sstevel@tonic-gate sayNO;
2517*0Sstevel@tonic-gate if (PL_regeol - locinput < ln)
2518*0Sstevel@tonic-gate sayNO;
2519*0Sstevel@tonic-gate if (ln > 1 && (OP(scan) == EXACTF
2520*0Sstevel@tonic-gate ? ibcmp(s, locinput, ln)
2521*0Sstevel@tonic-gate : ibcmp_locale(s, locinput, ln)))
2522*0Sstevel@tonic-gate sayNO;
2523*0Sstevel@tonic-gate locinput += ln;
2524*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2525*0Sstevel@tonic-gate break;
2526*0Sstevel@tonic-gate case ANYOF:
2527*0Sstevel@tonic-gate if (do_utf8) {
2528*0Sstevel@tonic-gate STRLEN inclasslen = PL_regeol - locinput;
2529*0Sstevel@tonic-gate
2530*0Sstevel@tonic-gate if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
2531*0Sstevel@tonic-gate sayNO_ANYOF;
2532*0Sstevel@tonic-gate if (locinput >= PL_regeol)
2533*0Sstevel@tonic-gate sayNO;
2534*0Sstevel@tonic-gate locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
2535*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2536*0Sstevel@tonic-gate break;
2537*0Sstevel@tonic-gate }
2538*0Sstevel@tonic-gate else {
2539*0Sstevel@tonic-gate if (nextchr < 0)
2540*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2541*0Sstevel@tonic-gate if (!REGINCLASS(scan, (U8*)locinput))
2542*0Sstevel@tonic-gate sayNO_ANYOF;
2543*0Sstevel@tonic-gate if (!nextchr && locinput >= PL_regeol)
2544*0Sstevel@tonic-gate sayNO;
2545*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput);
2546*0Sstevel@tonic-gate break;
2547*0Sstevel@tonic-gate }
2548*0Sstevel@tonic-gate no_anyof:
2549*0Sstevel@tonic-gate /* If we might have the case of the German sharp s
2550*0Sstevel@tonic-gate * in a casefolding Unicode character class. */
2551*0Sstevel@tonic-gate
2552*0Sstevel@tonic-gate if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2553*0Sstevel@tonic-gate locinput += SHARP_S_SKIP;
2554*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2555*0Sstevel@tonic-gate }
2556*0Sstevel@tonic-gate else
2557*0Sstevel@tonic-gate sayNO;
2558*0Sstevel@tonic-gate break;
2559*0Sstevel@tonic-gate case ALNUML:
2560*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
2561*0Sstevel@tonic-gate /* FALL THROUGH */
2562*0Sstevel@tonic-gate case ALNUM:
2563*0Sstevel@tonic-gate if (!nextchr)
2564*0Sstevel@tonic-gate sayNO;
2565*0Sstevel@tonic-gate if (do_utf8) {
2566*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a");
2567*0Sstevel@tonic-gate if (!(OP(scan) == ALNUM
2568*0Sstevel@tonic-gate ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2569*0Sstevel@tonic-gate : isALNUM_LC_utf8((U8*)locinput)))
2570*0Sstevel@tonic-gate {
2571*0Sstevel@tonic-gate sayNO;
2572*0Sstevel@tonic-gate }
2573*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr];
2574*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2575*0Sstevel@tonic-gate break;
2576*0Sstevel@tonic-gate }
2577*0Sstevel@tonic-gate if (!(OP(scan) == ALNUM
2578*0Sstevel@tonic-gate ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2579*0Sstevel@tonic-gate sayNO;
2580*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput);
2581*0Sstevel@tonic-gate break;
2582*0Sstevel@tonic-gate case NALNUML:
2583*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
2584*0Sstevel@tonic-gate /* FALL THROUGH */
2585*0Sstevel@tonic-gate case NALNUM:
2586*0Sstevel@tonic-gate if (!nextchr && locinput >= PL_regeol)
2587*0Sstevel@tonic-gate sayNO;
2588*0Sstevel@tonic-gate if (do_utf8) {
2589*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a");
2590*0Sstevel@tonic-gate if (OP(scan) == NALNUM
2591*0Sstevel@tonic-gate ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2592*0Sstevel@tonic-gate : isALNUM_LC_utf8((U8*)locinput))
2593*0Sstevel@tonic-gate {
2594*0Sstevel@tonic-gate sayNO;
2595*0Sstevel@tonic-gate }
2596*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr];
2597*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2598*0Sstevel@tonic-gate break;
2599*0Sstevel@tonic-gate }
2600*0Sstevel@tonic-gate if (OP(scan) == NALNUM
2601*0Sstevel@tonic-gate ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2602*0Sstevel@tonic-gate sayNO;
2603*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput);
2604*0Sstevel@tonic-gate break;
2605*0Sstevel@tonic-gate case BOUNDL:
2606*0Sstevel@tonic-gate case NBOUNDL:
2607*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
2608*0Sstevel@tonic-gate /* FALL THROUGH */
2609*0Sstevel@tonic-gate case BOUND:
2610*0Sstevel@tonic-gate case NBOUND:
2611*0Sstevel@tonic-gate /* was last char in word? */
2612*0Sstevel@tonic-gate if (do_utf8) {
2613*0Sstevel@tonic-gate if (locinput == PL_bostr)
2614*0Sstevel@tonic-gate ln = '\n';
2615*0Sstevel@tonic-gate else {
2616*0Sstevel@tonic-gate U8 *r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
2617*0Sstevel@tonic-gate
2618*0Sstevel@tonic-gate ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
2619*0Sstevel@tonic-gate }
2620*0Sstevel@tonic-gate if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2621*0Sstevel@tonic-gate ln = isALNUM_uni(ln);
2622*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a");
2623*0Sstevel@tonic-gate n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
2624*0Sstevel@tonic-gate }
2625*0Sstevel@tonic-gate else {
2626*0Sstevel@tonic-gate ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
2627*0Sstevel@tonic-gate n = isALNUM_LC_utf8((U8*)locinput);
2628*0Sstevel@tonic-gate }
2629*0Sstevel@tonic-gate }
2630*0Sstevel@tonic-gate else {
2631*0Sstevel@tonic-gate ln = (locinput != PL_bostr) ?
2632*0Sstevel@tonic-gate UCHARAT(locinput - 1) : '\n';
2633*0Sstevel@tonic-gate if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2634*0Sstevel@tonic-gate ln = isALNUM(ln);
2635*0Sstevel@tonic-gate n = isALNUM(nextchr);
2636*0Sstevel@tonic-gate }
2637*0Sstevel@tonic-gate else {
2638*0Sstevel@tonic-gate ln = isALNUM_LC(ln);
2639*0Sstevel@tonic-gate n = isALNUM_LC(nextchr);
2640*0Sstevel@tonic-gate }
2641*0Sstevel@tonic-gate }
2642*0Sstevel@tonic-gate if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2643*0Sstevel@tonic-gate OP(scan) == BOUNDL))
2644*0Sstevel@tonic-gate sayNO;
2645*0Sstevel@tonic-gate break;
2646*0Sstevel@tonic-gate case SPACEL:
2647*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
2648*0Sstevel@tonic-gate /* FALL THROUGH */
2649*0Sstevel@tonic-gate case SPACE:
2650*0Sstevel@tonic-gate if (!nextchr)
2651*0Sstevel@tonic-gate sayNO;
2652*0Sstevel@tonic-gate if (do_utf8) {
2653*0Sstevel@tonic-gate if (UTF8_IS_CONTINUED(nextchr)) {
2654*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(space," ");
2655*0Sstevel@tonic-gate if (!(OP(scan) == SPACE
2656*0Sstevel@tonic-gate ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2657*0Sstevel@tonic-gate : isSPACE_LC_utf8((U8*)locinput)))
2658*0Sstevel@tonic-gate {
2659*0Sstevel@tonic-gate sayNO;
2660*0Sstevel@tonic-gate }
2661*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr];
2662*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2663*0Sstevel@tonic-gate break;
2664*0Sstevel@tonic-gate }
2665*0Sstevel@tonic-gate if (!(OP(scan) == SPACE
2666*0Sstevel@tonic-gate ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2667*0Sstevel@tonic-gate sayNO;
2668*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput);
2669*0Sstevel@tonic-gate }
2670*0Sstevel@tonic-gate else {
2671*0Sstevel@tonic-gate if (!(OP(scan) == SPACE
2672*0Sstevel@tonic-gate ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2673*0Sstevel@tonic-gate sayNO;
2674*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput);
2675*0Sstevel@tonic-gate }
2676*0Sstevel@tonic-gate break;
2677*0Sstevel@tonic-gate case NSPACEL:
2678*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
2679*0Sstevel@tonic-gate /* FALL THROUGH */
2680*0Sstevel@tonic-gate case NSPACE:
2681*0Sstevel@tonic-gate if (!nextchr && locinput >= PL_regeol)
2682*0Sstevel@tonic-gate sayNO;
2683*0Sstevel@tonic-gate if (do_utf8) {
2684*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(space," ");
2685*0Sstevel@tonic-gate if (OP(scan) == NSPACE
2686*0Sstevel@tonic-gate ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2687*0Sstevel@tonic-gate : isSPACE_LC_utf8((U8*)locinput))
2688*0Sstevel@tonic-gate {
2689*0Sstevel@tonic-gate sayNO;
2690*0Sstevel@tonic-gate }
2691*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr];
2692*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2693*0Sstevel@tonic-gate break;
2694*0Sstevel@tonic-gate }
2695*0Sstevel@tonic-gate if (OP(scan) == NSPACE
2696*0Sstevel@tonic-gate ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2697*0Sstevel@tonic-gate sayNO;
2698*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput);
2699*0Sstevel@tonic-gate break;
2700*0Sstevel@tonic-gate case DIGITL:
2701*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
2702*0Sstevel@tonic-gate /* FALL THROUGH */
2703*0Sstevel@tonic-gate case DIGIT:
2704*0Sstevel@tonic-gate if (!nextchr)
2705*0Sstevel@tonic-gate sayNO;
2706*0Sstevel@tonic-gate if (do_utf8) {
2707*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(digit,"0");
2708*0Sstevel@tonic-gate if (!(OP(scan) == DIGIT
2709*0Sstevel@tonic-gate ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2710*0Sstevel@tonic-gate : isDIGIT_LC_utf8((U8*)locinput)))
2711*0Sstevel@tonic-gate {
2712*0Sstevel@tonic-gate sayNO;
2713*0Sstevel@tonic-gate }
2714*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr];
2715*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2716*0Sstevel@tonic-gate break;
2717*0Sstevel@tonic-gate }
2718*0Sstevel@tonic-gate if (!(OP(scan) == DIGIT
2719*0Sstevel@tonic-gate ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2720*0Sstevel@tonic-gate sayNO;
2721*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput);
2722*0Sstevel@tonic-gate break;
2723*0Sstevel@tonic-gate case NDIGITL:
2724*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
2725*0Sstevel@tonic-gate /* FALL THROUGH */
2726*0Sstevel@tonic-gate case NDIGIT:
2727*0Sstevel@tonic-gate if (!nextchr && locinput >= PL_regeol)
2728*0Sstevel@tonic-gate sayNO;
2729*0Sstevel@tonic-gate if (do_utf8) {
2730*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(digit,"0");
2731*0Sstevel@tonic-gate if (OP(scan) == NDIGIT
2732*0Sstevel@tonic-gate ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2733*0Sstevel@tonic-gate : isDIGIT_LC_utf8((U8*)locinput))
2734*0Sstevel@tonic-gate {
2735*0Sstevel@tonic-gate sayNO;
2736*0Sstevel@tonic-gate }
2737*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr];
2738*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2739*0Sstevel@tonic-gate break;
2740*0Sstevel@tonic-gate }
2741*0Sstevel@tonic-gate if (OP(scan) == NDIGIT
2742*0Sstevel@tonic-gate ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2743*0Sstevel@tonic-gate sayNO;
2744*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput);
2745*0Sstevel@tonic-gate break;
2746*0Sstevel@tonic-gate case CLUMP:
2747*0Sstevel@tonic-gate if (locinput >= PL_regeol)
2748*0Sstevel@tonic-gate sayNO;
2749*0Sstevel@tonic-gate if (do_utf8) {
2750*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(mark,"~");
2751*0Sstevel@tonic-gate if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2752*0Sstevel@tonic-gate sayNO;
2753*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr];
2754*0Sstevel@tonic-gate while (locinput < PL_regeol &&
2755*0Sstevel@tonic-gate swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2756*0Sstevel@tonic-gate locinput += UTF8SKIP(locinput);
2757*0Sstevel@tonic-gate if (locinput > PL_regeol)
2758*0Sstevel@tonic-gate sayNO;
2759*0Sstevel@tonic-gate }
2760*0Sstevel@tonic-gate else
2761*0Sstevel@tonic-gate locinput++;
2762*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2763*0Sstevel@tonic-gate break;
2764*0Sstevel@tonic-gate case REFFL:
2765*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
2766*0Sstevel@tonic-gate /* FALL THROUGH */
2767*0Sstevel@tonic-gate case REF:
2768*0Sstevel@tonic-gate case REFF:
2769*0Sstevel@tonic-gate n = ARG(scan); /* which paren pair */
2770*0Sstevel@tonic-gate ln = PL_regstartp[n];
2771*0Sstevel@tonic-gate PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2772*0Sstevel@tonic-gate if ((I32)*PL_reglastparen < n || ln == -1)
2773*0Sstevel@tonic-gate sayNO; /* Do not match unless seen CLOSEn. */
2774*0Sstevel@tonic-gate if (ln == PL_regendp[n])
2775*0Sstevel@tonic-gate break;
2776*0Sstevel@tonic-gate
2777*0Sstevel@tonic-gate s = PL_bostr + ln;
2778*0Sstevel@tonic-gate if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
2779*0Sstevel@tonic-gate char *l = locinput;
2780*0Sstevel@tonic-gate char *e = PL_bostr + PL_regendp[n];
2781*0Sstevel@tonic-gate /*
2782*0Sstevel@tonic-gate * Note that we can't do the "other character" lookup trick as
2783*0Sstevel@tonic-gate * in the 8-bit case (no pun intended) because in Unicode we
2784*0Sstevel@tonic-gate * have to map both upper and title case to lower case.
2785*0Sstevel@tonic-gate */
2786*0Sstevel@tonic-gate if (OP(scan) == REFF) {
2787*0Sstevel@tonic-gate STRLEN ulen1, ulen2;
2788*0Sstevel@tonic-gate U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2789*0Sstevel@tonic-gate U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
2790*0Sstevel@tonic-gate while (s < e) {
2791*0Sstevel@tonic-gate if (l >= PL_regeol)
2792*0Sstevel@tonic-gate sayNO;
2793*0Sstevel@tonic-gate toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2794*0Sstevel@tonic-gate toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
2795*0Sstevel@tonic-gate if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
2796*0Sstevel@tonic-gate sayNO;
2797*0Sstevel@tonic-gate s += ulen1;
2798*0Sstevel@tonic-gate l += ulen2;
2799*0Sstevel@tonic-gate }
2800*0Sstevel@tonic-gate }
2801*0Sstevel@tonic-gate locinput = l;
2802*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2803*0Sstevel@tonic-gate break;
2804*0Sstevel@tonic-gate }
2805*0Sstevel@tonic-gate
2806*0Sstevel@tonic-gate /* Inline the first character, for speed. */
2807*0Sstevel@tonic-gate if (UCHARAT(s) != nextchr &&
2808*0Sstevel@tonic-gate (OP(scan) == REF ||
2809*0Sstevel@tonic-gate (UCHARAT(s) != ((OP(scan) == REFF
2810*0Sstevel@tonic-gate ? PL_fold : PL_fold_locale)[nextchr]))))
2811*0Sstevel@tonic-gate sayNO;
2812*0Sstevel@tonic-gate ln = PL_regendp[n] - ln;
2813*0Sstevel@tonic-gate if (locinput + ln > PL_regeol)
2814*0Sstevel@tonic-gate sayNO;
2815*0Sstevel@tonic-gate if (ln > 1 && (OP(scan) == REF
2816*0Sstevel@tonic-gate ? memNE(s, locinput, ln)
2817*0Sstevel@tonic-gate : (OP(scan) == REFF
2818*0Sstevel@tonic-gate ? ibcmp(s, locinput, ln)
2819*0Sstevel@tonic-gate : ibcmp_locale(s, locinput, ln))))
2820*0Sstevel@tonic-gate sayNO;
2821*0Sstevel@tonic-gate locinput += ln;
2822*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
2823*0Sstevel@tonic-gate break;
2824*0Sstevel@tonic-gate
2825*0Sstevel@tonic-gate case NOTHING:
2826*0Sstevel@tonic-gate case TAIL:
2827*0Sstevel@tonic-gate break;
2828*0Sstevel@tonic-gate case BACK:
2829*0Sstevel@tonic-gate break;
2830*0Sstevel@tonic-gate case EVAL:
2831*0Sstevel@tonic-gate {
2832*0Sstevel@tonic-gate dSP;
2833*0Sstevel@tonic-gate OP_4tree *oop = PL_op;
2834*0Sstevel@tonic-gate COP *ocurcop = PL_curcop;
2835*0Sstevel@tonic-gate PAD *old_comppad;
2836*0Sstevel@tonic-gate SV *ret;
2837*0Sstevel@tonic-gate struct regexp *oreg = PL_reg_re;
2838*0Sstevel@tonic-gate
2839*0Sstevel@tonic-gate n = ARG(scan);
2840*0Sstevel@tonic-gate PL_op = (OP_4tree*)PL_regdata->data[n];
2841*0Sstevel@tonic-gate DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
2842*0Sstevel@tonic-gate PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
2843*0Sstevel@tonic-gate PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2844*0Sstevel@tonic-gate
2845*0Sstevel@tonic-gate {
2846*0Sstevel@tonic-gate SV **before = SP;
2847*0Sstevel@tonic-gate CALLRUNOPS(aTHX); /* Scalar context. */
2848*0Sstevel@tonic-gate SPAGAIN;
2849*0Sstevel@tonic-gate if (SP == before)
2850*0Sstevel@tonic-gate ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
2851*0Sstevel@tonic-gate else {
2852*0Sstevel@tonic-gate ret = POPs;
2853*0Sstevel@tonic-gate PUTBACK;
2854*0Sstevel@tonic-gate }
2855*0Sstevel@tonic-gate }
2856*0Sstevel@tonic-gate
2857*0Sstevel@tonic-gate PL_op = oop;
2858*0Sstevel@tonic-gate PAD_RESTORE_LOCAL(old_comppad);
2859*0Sstevel@tonic-gate PL_curcop = ocurcop;
2860*0Sstevel@tonic-gate if (logical) {
2861*0Sstevel@tonic-gate if (logical == 2) { /* Postponed subexpression. */
2862*0Sstevel@tonic-gate regexp *re;
2863*0Sstevel@tonic-gate MAGIC *mg = Null(MAGIC*);
2864*0Sstevel@tonic-gate re_cc_state state;
2865*0Sstevel@tonic-gate CHECKPOINT cp, lastcp;
2866*0Sstevel@tonic-gate int toggleutf;
2867*0Sstevel@tonic-gate register SV *sv;
2868*0Sstevel@tonic-gate
2869*0Sstevel@tonic-gate if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
2870*0Sstevel@tonic-gate mg = mg_find(sv, PERL_MAGIC_qr);
2871*0Sstevel@tonic-gate else if (SvSMAGICAL(ret)) {
2872*0Sstevel@tonic-gate if (SvGMAGICAL(ret))
2873*0Sstevel@tonic-gate sv_unmagic(ret, PERL_MAGIC_qr);
2874*0Sstevel@tonic-gate else
2875*0Sstevel@tonic-gate mg = mg_find(ret, PERL_MAGIC_qr);
2876*0Sstevel@tonic-gate }
2877*0Sstevel@tonic-gate
2878*0Sstevel@tonic-gate if (mg) {
2879*0Sstevel@tonic-gate re = (regexp *)mg->mg_obj;
2880*0Sstevel@tonic-gate (void)ReREFCNT_inc(re);
2881*0Sstevel@tonic-gate }
2882*0Sstevel@tonic-gate else {
2883*0Sstevel@tonic-gate STRLEN len;
2884*0Sstevel@tonic-gate char *t = SvPV(ret, len);
2885*0Sstevel@tonic-gate PMOP pm;
2886*0Sstevel@tonic-gate char *oprecomp = PL_regprecomp;
2887*0Sstevel@tonic-gate I32 osize = PL_regsize;
2888*0Sstevel@tonic-gate I32 onpar = PL_regnpar;
2889*0Sstevel@tonic-gate
2890*0Sstevel@tonic-gate Zero(&pm, 1, PMOP);
2891*0Sstevel@tonic-gate if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
2892*0Sstevel@tonic-gate re = CALLREGCOMP(aTHX_ t, t + len, &pm);
2893*0Sstevel@tonic-gate if (!(SvFLAGS(ret)
2894*0Sstevel@tonic-gate & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
2895*0Sstevel@tonic-gate | SVs_GMG)))
2896*0Sstevel@tonic-gate sv_magic(ret,(SV*)ReREFCNT_inc(re),
2897*0Sstevel@tonic-gate PERL_MAGIC_qr,0,0);
2898*0Sstevel@tonic-gate PL_regprecomp = oprecomp;
2899*0Sstevel@tonic-gate PL_regsize = osize;
2900*0Sstevel@tonic-gate PL_regnpar = onpar;
2901*0Sstevel@tonic-gate }
2902*0Sstevel@tonic-gate DEBUG_r(
2903*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
2904*0Sstevel@tonic-gate "Entering embedded `%s%.60s%s%s'\n",
2905*0Sstevel@tonic-gate PL_colors[0],
2906*0Sstevel@tonic-gate re->precomp,
2907*0Sstevel@tonic-gate PL_colors[1],
2908*0Sstevel@tonic-gate (strlen(re->precomp) > 60 ? "..." : ""))
2909*0Sstevel@tonic-gate );
2910*0Sstevel@tonic-gate state.node = next;
2911*0Sstevel@tonic-gate state.prev = PL_reg_call_cc;
2912*0Sstevel@tonic-gate state.cc = PL_regcc;
2913*0Sstevel@tonic-gate state.re = PL_reg_re;
2914*0Sstevel@tonic-gate
2915*0Sstevel@tonic-gate PL_regcc = 0;
2916*0Sstevel@tonic-gate
2917*0Sstevel@tonic-gate cp = regcppush(0); /* Save *all* the positions. */
2918*0Sstevel@tonic-gate REGCP_SET(lastcp);
2919*0Sstevel@tonic-gate cache_re(re);
2920*0Sstevel@tonic-gate state.ss = PL_savestack_ix;
2921*0Sstevel@tonic-gate *PL_reglastparen = 0;
2922*0Sstevel@tonic-gate *PL_reglastcloseparen = 0;
2923*0Sstevel@tonic-gate PL_reg_call_cc = &state;
2924*0Sstevel@tonic-gate PL_reginput = locinput;
2925*0Sstevel@tonic-gate toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
2926*0Sstevel@tonic-gate ((re->reganch & ROPT_UTF8) != 0);
2927*0Sstevel@tonic-gate if (toggleutf) PL_reg_flags ^= RF_utf8;
2928*0Sstevel@tonic-gate
2929*0Sstevel@tonic-gate /* XXXX This is too dramatic a measure... */
2930*0Sstevel@tonic-gate PL_reg_maxiter = 0;
2931*0Sstevel@tonic-gate
2932*0Sstevel@tonic-gate if (regmatch(re->program + 1)) {
2933*0Sstevel@tonic-gate /* Even though we succeeded, we need to restore
2934*0Sstevel@tonic-gate global variables, since we may be wrapped inside
2935*0Sstevel@tonic-gate SUSPEND, thus the match may be not finished yet. */
2936*0Sstevel@tonic-gate
2937*0Sstevel@tonic-gate /* XXXX Do this only if SUSPENDed? */
2938*0Sstevel@tonic-gate PL_reg_call_cc = state.prev;
2939*0Sstevel@tonic-gate PL_regcc = state.cc;
2940*0Sstevel@tonic-gate PL_reg_re = state.re;
2941*0Sstevel@tonic-gate cache_re(PL_reg_re);
2942*0Sstevel@tonic-gate if (toggleutf) PL_reg_flags ^= RF_utf8;
2943*0Sstevel@tonic-gate
2944*0Sstevel@tonic-gate /* XXXX This is too dramatic a measure... */
2945*0Sstevel@tonic-gate PL_reg_maxiter = 0;
2946*0Sstevel@tonic-gate
2947*0Sstevel@tonic-gate /* These are needed even if not SUSPEND. */
2948*0Sstevel@tonic-gate ReREFCNT_dec(re);
2949*0Sstevel@tonic-gate regcpblow(cp);
2950*0Sstevel@tonic-gate sayYES;
2951*0Sstevel@tonic-gate }
2952*0Sstevel@tonic-gate ReREFCNT_dec(re);
2953*0Sstevel@tonic-gate REGCP_UNWIND(lastcp);
2954*0Sstevel@tonic-gate regcppop();
2955*0Sstevel@tonic-gate PL_reg_call_cc = state.prev;
2956*0Sstevel@tonic-gate PL_regcc = state.cc;
2957*0Sstevel@tonic-gate PL_reg_re = state.re;
2958*0Sstevel@tonic-gate cache_re(PL_reg_re);
2959*0Sstevel@tonic-gate if (toggleutf) PL_reg_flags ^= RF_utf8;
2960*0Sstevel@tonic-gate
2961*0Sstevel@tonic-gate /* XXXX This is too dramatic a measure... */
2962*0Sstevel@tonic-gate PL_reg_maxiter = 0;
2963*0Sstevel@tonic-gate
2964*0Sstevel@tonic-gate logical = 0;
2965*0Sstevel@tonic-gate sayNO;
2966*0Sstevel@tonic-gate }
2967*0Sstevel@tonic-gate sw = SvTRUE(ret);
2968*0Sstevel@tonic-gate logical = 0;
2969*0Sstevel@tonic-gate }
2970*0Sstevel@tonic-gate else {
2971*0Sstevel@tonic-gate sv_setsv(save_scalar(PL_replgv), ret);
2972*0Sstevel@tonic-gate cache_re(oreg);
2973*0Sstevel@tonic-gate }
2974*0Sstevel@tonic-gate break;
2975*0Sstevel@tonic-gate }
2976*0Sstevel@tonic-gate case OPEN:
2977*0Sstevel@tonic-gate n = ARG(scan); /* which paren pair */
2978*0Sstevel@tonic-gate PL_reg_start_tmp[n] = locinput;
2979*0Sstevel@tonic-gate if (n > PL_regsize)
2980*0Sstevel@tonic-gate PL_regsize = n;
2981*0Sstevel@tonic-gate break;
2982*0Sstevel@tonic-gate case CLOSE:
2983*0Sstevel@tonic-gate n = ARG(scan); /* which paren pair */
2984*0Sstevel@tonic-gate PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2985*0Sstevel@tonic-gate PL_regendp[n] = locinput - PL_bostr;
2986*0Sstevel@tonic-gate if (n > (I32)*PL_reglastparen)
2987*0Sstevel@tonic-gate *PL_reglastparen = n;
2988*0Sstevel@tonic-gate *PL_reglastcloseparen = n;
2989*0Sstevel@tonic-gate break;
2990*0Sstevel@tonic-gate case GROUPP:
2991*0Sstevel@tonic-gate n = ARG(scan); /* which paren pair */
2992*0Sstevel@tonic-gate sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
2993*0Sstevel@tonic-gate break;
2994*0Sstevel@tonic-gate case IFTHEN:
2995*0Sstevel@tonic-gate PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2996*0Sstevel@tonic-gate if (sw)
2997*0Sstevel@tonic-gate next = NEXTOPER(NEXTOPER(scan));
2998*0Sstevel@tonic-gate else {
2999*0Sstevel@tonic-gate next = scan + ARG(scan);
3000*0Sstevel@tonic-gate if (OP(next) == IFTHEN) /* Fake one. */
3001*0Sstevel@tonic-gate next = NEXTOPER(NEXTOPER(next));
3002*0Sstevel@tonic-gate }
3003*0Sstevel@tonic-gate break;
3004*0Sstevel@tonic-gate case LOGICAL:
3005*0Sstevel@tonic-gate logical = scan->flags;
3006*0Sstevel@tonic-gate break;
3007*0Sstevel@tonic-gate /*******************************************************************
3008*0Sstevel@tonic-gate PL_regcc contains infoblock about the innermost (...)* loop, and
3009*0Sstevel@tonic-gate a pointer to the next outer infoblock.
3010*0Sstevel@tonic-gate
3011*0Sstevel@tonic-gate Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3012*0Sstevel@tonic-gate
3013*0Sstevel@tonic-gate 1) After matching X, regnode for CURLYX is processed;
3014*0Sstevel@tonic-gate
3015*0Sstevel@tonic-gate 2) This regnode creates infoblock on the stack, and calls
3016*0Sstevel@tonic-gate regmatch() recursively with the starting point at WHILEM node;
3017*0Sstevel@tonic-gate
3018*0Sstevel@tonic-gate 3) Each hit of WHILEM node tries to match A and Z (in the order
3019*0Sstevel@tonic-gate depending on the current iteration, min/max of {min,max} and
3020*0Sstevel@tonic-gate greediness). The information about where are nodes for "A"
3021*0Sstevel@tonic-gate and "Z" is read from the infoblock, as is info on how many times "A"
3022*0Sstevel@tonic-gate was already matched, and greediness.
3023*0Sstevel@tonic-gate
3024*0Sstevel@tonic-gate 4) After A matches, the same WHILEM node is hit again.
3025*0Sstevel@tonic-gate
3026*0Sstevel@tonic-gate 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
3027*0Sstevel@tonic-gate of the same pair. Thus when WHILEM tries to match Z, it temporarily
3028*0Sstevel@tonic-gate resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
3029*0Sstevel@tonic-gate as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3030*0Sstevel@tonic-gate of the external loop.
3031*0Sstevel@tonic-gate
3032*0Sstevel@tonic-gate Currently present infoblocks form a tree with a stem formed by PL_curcc
3033*0Sstevel@tonic-gate and whatever it mentions via ->next, and additional attached trees
3034*0Sstevel@tonic-gate corresponding to temporarily unset infoblocks as in "5" above.
3035*0Sstevel@tonic-gate
3036*0Sstevel@tonic-gate In the following picture infoblocks for outer loop of
3037*0Sstevel@tonic-gate (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3038*0Sstevel@tonic-gate is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3039*0Sstevel@tonic-gate infoblocks are drawn below the "reset" infoblock.
3040*0Sstevel@tonic-gate
3041*0Sstevel@tonic-gate In fact in the picture below we do not show failed matches for Z and T
3042*0Sstevel@tonic-gate by WHILEM blocks. [We illustrate minimal matches, since for them it is
3043*0Sstevel@tonic-gate more obvious *why* one needs to *temporary* unset infoblocks.]
3044*0Sstevel@tonic-gate
3045*0Sstevel@tonic-gate Matched REx position InfoBlocks Comment
3046*0Sstevel@tonic-gate (Y(A)*?Z)*?T x
3047*0Sstevel@tonic-gate Y(A)*?Z)*?T x <- O
3048*0Sstevel@tonic-gate Y (A)*?Z)*?T x <- O
3049*0Sstevel@tonic-gate Y A)*?Z)*?T x <- O <- I
3050*0Sstevel@tonic-gate YA )*?Z)*?T x <- O <- I
3051*0Sstevel@tonic-gate YA A)*?Z)*?T x <- O <- I
3052*0Sstevel@tonic-gate YAA )*?Z)*?T x <- O <- I
3053*0Sstevel@tonic-gate YAA Z)*?T x <- O # Temporary unset I
3054*0Sstevel@tonic-gate I
3055*0Sstevel@tonic-gate
3056*0Sstevel@tonic-gate YAAZ Y(A)*?Z)*?T x <- O
3057*0Sstevel@tonic-gate I
3058*0Sstevel@tonic-gate
3059*0Sstevel@tonic-gate YAAZY (A)*?Z)*?T x <- O
3060*0Sstevel@tonic-gate I
3061*0Sstevel@tonic-gate
3062*0Sstevel@tonic-gate YAAZY A)*?Z)*?T x <- O <- I
3063*0Sstevel@tonic-gate I
3064*0Sstevel@tonic-gate
3065*0Sstevel@tonic-gate YAAZYA )*?Z)*?T x <- O <- I
3066*0Sstevel@tonic-gate I
3067*0Sstevel@tonic-gate
3068*0Sstevel@tonic-gate YAAZYA Z)*?T x <- O # Temporary unset I
3069*0Sstevel@tonic-gate I,I
3070*0Sstevel@tonic-gate
3071*0Sstevel@tonic-gate YAAZYAZ )*?T x <- O
3072*0Sstevel@tonic-gate I,I
3073*0Sstevel@tonic-gate
3074*0Sstevel@tonic-gate YAAZYAZ T x # Temporary unset O
3075*0Sstevel@tonic-gate O
3076*0Sstevel@tonic-gate I,I
3077*0Sstevel@tonic-gate
3078*0Sstevel@tonic-gate YAAZYAZT x
3079*0Sstevel@tonic-gate O
3080*0Sstevel@tonic-gate I,I
3081*0Sstevel@tonic-gate *******************************************************************/
3082*0Sstevel@tonic-gate case CURLYX: {
3083*0Sstevel@tonic-gate CURCUR cc;
3084*0Sstevel@tonic-gate CHECKPOINT cp = PL_savestack_ix;
3085*0Sstevel@tonic-gate /* No need to save/restore up to this paren */
3086*0Sstevel@tonic-gate I32 parenfloor = scan->flags;
3087*0Sstevel@tonic-gate
3088*0Sstevel@tonic-gate if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3089*0Sstevel@tonic-gate next += ARG(next);
3090*0Sstevel@tonic-gate cc.oldcc = PL_regcc;
3091*0Sstevel@tonic-gate PL_regcc = &cc;
3092*0Sstevel@tonic-gate /* XXXX Probably it is better to teach regpush to support
3093*0Sstevel@tonic-gate parenfloor > PL_regsize... */
3094*0Sstevel@tonic-gate if (parenfloor > (I32)*PL_reglastparen)
3095*0Sstevel@tonic-gate parenfloor = *PL_reglastparen; /* Pessimization... */
3096*0Sstevel@tonic-gate cc.parenfloor = parenfloor;
3097*0Sstevel@tonic-gate cc.cur = -1;
3098*0Sstevel@tonic-gate cc.min = ARG1(scan);
3099*0Sstevel@tonic-gate cc.max = ARG2(scan);
3100*0Sstevel@tonic-gate cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3101*0Sstevel@tonic-gate cc.next = next;
3102*0Sstevel@tonic-gate cc.minmod = minmod;
3103*0Sstevel@tonic-gate cc.lastloc = 0;
3104*0Sstevel@tonic-gate PL_reginput = locinput;
3105*0Sstevel@tonic-gate n = regmatch(PREVOPER(next)); /* start on the WHILEM */
3106*0Sstevel@tonic-gate regcpblow(cp);
3107*0Sstevel@tonic-gate PL_regcc = cc.oldcc;
3108*0Sstevel@tonic-gate saySAME(n);
3109*0Sstevel@tonic-gate }
3110*0Sstevel@tonic-gate /* NOT REACHED */
3111*0Sstevel@tonic-gate case WHILEM: {
3112*0Sstevel@tonic-gate /*
3113*0Sstevel@tonic-gate * This is really hard to understand, because after we match
3114*0Sstevel@tonic-gate * what we're trying to match, we must make sure the rest of
3115*0Sstevel@tonic-gate * the REx is going to match for sure, and to do that we have
3116*0Sstevel@tonic-gate * to go back UP the parse tree by recursing ever deeper. And
3117*0Sstevel@tonic-gate * if it fails, we have to reset our parent's current state
3118*0Sstevel@tonic-gate * that we can try again after backing off.
3119*0Sstevel@tonic-gate */
3120*0Sstevel@tonic-gate
3121*0Sstevel@tonic-gate CHECKPOINT cp, lastcp;
3122*0Sstevel@tonic-gate CURCUR* cc = PL_regcc;
3123*0Sstevel@tonic-gate char *lastloc = cc->lastloc; /* Detection of 0-len. */
3124*0Sstevel@tonic-gate
3125*0Sstevel@tonic-gate n = cc->cur + 1; /* how many we know we matched */
3126*0Sstevel@tonic-gate PL_reginput = locinput;
3127*0Sstevel@tonic-gate
3128*0Sstevel@tonic-gate DEBUG_r(
3129*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
3130*0Sstevel@tonic-gate "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3131*0Sstevel@tonic-gate REPORT_CODE_OFF+PL_regindent*2, "",
3132*0Sstevel@tonic-gate (long)n, (long)cc->min,
3133*0Sstevel@tonic-gate (long)cc->max, PTR2UV(cc))
3134*0Sstevel@tonic-gate );
3135*0Sstevel@tonic-gate
3136*0Sstevel@tonic-gate /* If degenerate scan matches "", assume scan done. */
3137*0Sstevel@tonic-gate
3138*0Sstevel@tonic-gate if (locinput == cc->lastloc && n >= cc->min) {
3139*0Sstevel@tonic-gate PL_regcc = cc->oldcc;
3140*0Sstevel@tonic-gate if (PL_regcc)
3141*0Sstevel@tonic-gate ln = PL_regcc->cur;
3142*0Sstevel@tonic-gate DEBUG_r(
3143*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
3144*0Sstevel@tonic-gate "%*s empty match detected, try continuation...\n",
3145*0Sstevel@tonic-gate REPORT_CODE_OFF+PL_regindent*2, "")
3146*0Sstevel@tonic-gate );
3147*0Sstevel@tonic-gate if (regmatch(cc->next))
3148*0Sstevel@tonic-gate sayYES;
3149*0Sstevel@tonic-gate if (PL_regcc)
3150*0Sstevel@tonic-gate PL_regcc->cur = ln;
3151*0Sstevel@tonic-gate PL_regcc = cc;
3152*0Sstevel@tonic-gate sayNO;
3153*0Sstevel@tonic-gate }
3154*0Sstevel@tonic-gate
3155*0Sstevel@tonic-gate /* First just match a string of min scans. */
3156*0Sstevel@tonic-gate
3157*0Sstevel@tonic-gate if (n < cc->min) {
3158*0Sstevel@tonic-gate cc->cur = n;
3159*0Sstevel@tonic-gate cc->lastloc = locinput;
3160*0Sstevel@tonic-gate if (regmatch(cc->scan))
3161*0Sstevel@tonic-gate sayYES;
3162*0Sstevel@tonic-gate cc->cur = n - 1;
3163*0Sstevel@tonic-gate cc->lastloc = lastloc;
3164*0Sstevel@tonic-gate sayNO;
3165*0Sstevel@tonic-gate }
3166*0Sstevel@tonic-gate
3167*0Sstevel@tonic-gate if (scan->flags) {
3168*0Sstevel@tonic-gate /* Check whether we already were at this position.
3169*0Sstevel@tonic-gate Postpone detection until we know the match is not
3170*0Sstevel@tonic-gate *that* much linear. */
3171*0Sstevel@tonic-gate if (!PL_reg_maxiter) {
3172*0Sstevel@tonic-gate PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3173*0Sstevel@tonic-gate PL_reg_leftiter = PL_reg_maxiter;
3174*0Sstevel@tonic-gate }
3175*0Sstevel@tonic-gate if (PL_reg_leftiter-- == 0) {
3176*0Sstevel@tonic-gate I32 size = (PL_reg_maxiter + 7)/8;
3177*0Sstevel@tonic-gate if (PL_reg_poscache) {
3178*0Sstevel@tonic-gate if ((I32)PL_reg_poscache_size < size) {
3179*0Sstevel@tonic-gate Renew(PL_reg_poscache, size, char);
3180*0Sstevel@tonic-gate PL_reg_poscache_size = size;
3181*0Sstevel@tonic-gate }
3182*0Sstevel@tonic-gate Zero(PL_reg_poscache, size, char);
3183*0Sstevel@tonic-gate }
3184*0Sstevel@tonic-gate else {
3185*0Sstevel@tonic-gate PL_reg_poscache_size = size;
3186*0Sstevel@tonic-gate Newz(29, PL_reg_poscache, size, char);
3187*0Sstevel@tonic-gate }
3188*0Sstevel@tonic-gate DEBUG_r(
3189*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
3190*0Sstevel@tonic-gate "%sDetected a super-linear match, switching on caching%s...\n",
3191*0Sstevel@tonic-gate PL_colors[4], PL_colors[5])
3192*0Sstevel@tonic-gate );
3193*0Sstevel@tonic-gate }
3194*0Sstevel@tonic-gate if (PL_reg_leftiter < 0) {
3195*0Sstevel@tonic-gate I32 o = locinput - PL_bostr, b;
3196*0Sstevel@tonic-gate
3197*0Sstevel@tonic-gate o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3198*0Sstevel@tonic-gate b = o % 8;
3199*0Sstevel@tonic-gate o /= 8;
3200*0Sstevel@tonic-gate if (PL_reg_poscache[o] & (1<<b)) {
3201*0Sstevel@tonic-gate DEBUG_r(
3202*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
3203*0Sstevel@tonic-gate "%*s already tried at this position...\n",
3204*0Sstevel@tonic-gate REPORT_CODE_OFF+PL_regindent*2, "")
3205*0Sstevel@tonic-gate );
3206*0Sstevel@tonic-gate if (PL_reg_flags & RF_false)
3207*0Sstevel@tonic-gate sayYES;
3208*0Sstevel@tonic-gate else
3209*0Sstevel@tonic-gate sayNO_SILENT;
3210*0Sstevel@tonic-gate }
3211*0Sstevel@tonic-gate PL_reg_poscache[o] |= (1<<b);
3212*0Sstevel@tonic-gate }
3213*0Sstevel@tonic-gate }
3214*0Sstevel@tonic-gate
3215*0Sstevel@tonic-gate /* Prefer next over scan for minimal matching. */
3216*0Sstevel@tonic-gate
3217*0Sstevel@tonic-gate if (cc->minmod) {
3218*0Sstevel@tonic-gate PL_regcc = cc->oldcc;
3219*0Sstevel@tonic-gate if (PL_regcc)
3220*0Sstevel@tonic-gate ln = PL_regcc->cur;
3221*0Sstevel@tonic-gate cp = regcppush(cc->parenfloor);
3222*0Sstevel@tonic-gate REGCP_SET(lastcp);
3223*0Sstevel@tonic-gate if (regmatch(cc->next)) {
3224*0Sstevel@tonic-gate regcpblow(cp);
3225*0Sstevel@tonic-gate sayYES; /* All done. */
3226*0Sstevel@tonic-gate }
3227*0Sstevel@tonic-gate REGCP_UNWIND(lastcp);
3228*0Sstevel@tonic-gate regcppop();
3229*0Sstevel@tonic-gate if (PL_regcc)
3230*0Sstevel@tonic-gate PL_regcc->cur = ln;
3231*0Sstevel@tonic-gate PL_regcc = cc;
3232*0Sstevel@tonic-gate
3233*0Sstevel@tonic-gate if (n >= cc->max) { /* Maximum greed exceeded? */
3234*0Sstevel@tonic-gate if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3235*0Sstevel@tonic-gate && !(PL_reg_flags & RF_warned)) {
3236*0Sstevel@tonic-gate PL_reg_flags |= RF_warned;
3237*0Sstevel@tonic-gate Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3238*0Sstevel@tonic-gate "Complex regular subexpression recursion",
3239*0Sstevel@tonic-gate REG_INFTY - 1);
3240*0Sstevel@tonic-gate }
3241*0Sstevel@tonic-gate sayNO;
3242*0Sstevel@tonic-gate }
3243*0Sstevel@tonic-gate
3244*0Sstevel@tonic-gate DEBUG_r(
3245*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
3246*0Sstevel@tonic-gate "%*s trying longer...\n",
3247*0Sstevel@tonic-gate REPORT_CODE_OFF+PL_regindent*2, "")
3248*0Sstevel@tonic-gate );
3249*0Sstevel@tonic-gate /* Try scanning more and see if it helps. */
3250*0Sstevel@tonic-gate PL_reginput = locinput;
3251*0Sstevel@tonic-gate cc->cur = n;
3252*0Sstevel@tonic-gate cc->lastloc = locinput;
3253*0Sstevel@tonic-gate cp = regcppush(cc->parenfloor);
3254*0Sstevel@tonic-gate REGCP_SET(lastcp);
3255*0Sstevel@tonic-gate if (regmatch(cc->scan)) {
3256*0Sstevel@tonic-gate regcpblow(cp);
3257*0Sstevel@tonic-gate sayYES;
3258*0Sstevel@tonic-gate }
3259*0Sstevel@tonic-gate REGCP_UNWIND(lastcp);
3260*0Sstevel@tonic-gate regcppop();
3261*0Sstevel@tonic-gate cc->cur = n - 1;
3262*0Sstevel@tonic-gate cc->lastloc = lastloc;
3263*0Sstevel@tonic-gate sayNO;
3264*0Sstevel@tonic-gate }
3265*0Sstevel@tonic-gate
3266*0Sstevel@tonic-gate /* Prefer scan over next for maximal matching. */
3267*0Sstevel@tonic-gate
3268*0Sstevel@tonic-gate if (n < cc->max) { /* More greed allowed? */
3269*0Sstevel@tonic-gate cp = regcppush(cc->parenfloor);
3270*0Sstevel@tonic-gate cc->cur = n;
3271*0Sstevel@tonic-gate cc->lastloc = locinput;
3272*0Sstevel@tonic-gate REGCP_SET(lastcp);
3273*0Sstevel@tonic-gate if (regmatch(cc->scan)) {
3274*0Sstevel@tonic-gate regcpblow(cp);
3275*0Sstevel@tonic-gate sayYES;
3276*0Sstevel@tonic-gate }
3277*0Sstevel@tonic-gate REGCP_UNWIND(lastcp);
3278*0Sstevel@tonic-gate regcppop(); /* Restore some previous $<digit>s? */
3279*0Sstevel@tonic-gate PL_reginput = locinput;
3280*0Sstevel@tonic-gate DEBUG_r(
3281*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
3282*0Sstevel@tonic-gate "%*s failed, try continuation...\n",
3283*0Sstevel@tonic-gate REPORT_CODE_OFF+PL_regindent*2, "")
3284*0Sstevel@tonic-gate );
3285*0Sstevel@tonic-gate }
3286*0Sstevel@tonic-gate if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3287*0Sstevel@tonic-gate && !(PL_reg_flags & RF_warned)) {
3288*0Sstevel@tonic-gate PL_reg_flags |= RF_warned;
3289*0Sstevel@tonic-gate Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3290*0Sstevel@tonic-gate "Complex regular subexpression recursion",
3291*0Sstevel@tonic-gate REG_INFTY - 1);
3292*0Sstevel@tonic-gate }
3293*0Sstevel@tonic-gate
3294*0Sstevel@tonic-gate /* Failed deeper matches of scan, so see if this one works. */
3295*0Sstevel@tonic-gate PL_regcc = cc->oldcc;
3296*0Sstevel@tonic-gate if (PL_regcc)
3297*0Sstevel@tonic-gate ln = PL_regcc->cur;
3298*0Sstevel@tonic-gate if (regmatch(cc->next))
3299*0Sstevel@tonic-gate sayYES;
3300*0Sstevel@tonic-gate if (PL_regcc)
3301*0Sstevel@tonic-gate PL_regcc->cur = ln;
3302*0Sstevel@tonic-gate PL_regcc = cc;
3303*0Sstevel@tonic-gate cc->cur = n - 1;
3304*0Sstevel@tonic-gate cc->lastloc = lastloc;
3305*0Sstevel@tonic-gate sayNO;
3306*0Sstevel@tonic-gate }
3307*0Sstevel@tonic-gate /* NOT REACHED */
3308*0Sstevel@tonic-gate case BRANCHJ:
3309*0Sstevel@tonic-gate next = scan + ARG(scan);
3310*0Sstevel@tonic-gate if (next == scan)
3311*0Sstevel@tonic-gate next = NULL;
3312*0Sstevel@tonic-gate inner = NEXTOPER(NEXTOPER(scan));
3313*0Sstevel@tonic-gate goto do_branch;
3314*0Sstevel@tonic-gate case BRANCH:
3315*0Sstevel@tonic-gate inner = NEXTOPER(scan);
3316*0Sstevel@tonic-gate do_branch:
3317*0Sstevel@tonic-gate {
3318*0Sstevel@tonic-gate c1 = OP(scan);
3319*0Sstevel@tonic-gate if (OP(next) != c1) /* No choice. */
3320*0Sstevel@tonic-gate next = inner; /* Avoid recursion. */
3321*0Sstevel@tonic-gate else {
3322*0Sstevel@tonic-gate I32 lastparen = *PL_reglastparen;
3323*0Sstevel@tonic-gate I32 unwind1;
3324*0Sstevel@tonic-gate re_unwind_branch_t *uw;
3325*0Sstevel@tonic-gate
3326*0Sstevel@tonic-gate /* Put unwinding data on stack */
3327*0Sstevel@tonic-gate unwind1 = SSNEWt(1,re_unwind_branch_t);
3328*0Sstevel@tonic-gate uw = SSPTRt(unwind1,re_unwind_branch_t);
3329*0Sstevel@tonic-gate uw->prev = unwind;
3330*0Sstevel@tonic-gate unwind = unwind1;
3331*0Sstevel@tonic-gate uw->type = ((c1 == BRANCH)
3332*0Sstevel@tonic-gate ? RE_UNWIND_BRANCH
3333*0Sstevel@tonic-gate : RE_UNWIND_BRANCHJ);
3334*0Sstevel@tonic-gate uw->lastparen = lastparen;
3335*0Sstevel@tonic-gate uw->next = next;
3336*0Sstevel@tonic-gate uw->locinput = locinput;
3337*0Sstevel@tonic-gate uw->nextchr = nextchr;
3338*0Sstevel@tonic-gate #ifdef DEBUGGING
3339*0Sstevel@tonic-gate uw->regindent = ++PL_regindent;
3340*0Sstevel@tonic-gate #endif
3341*0Sstevel@tonic-gate
3342*0Sstevel@tonic-gate REGCP_SET(uw->lastcp);
3343*0Sstevel@tonic-gate
3344*0Sstevel@tonic-gate /* Now go into the first branch */
3345*0Sstevel@tonic-gate next = inner;
3346*0Sstevel@tonic-gate }
3347*0Sstevel@tonic-gate }
3348*0Sstevel@tonic-gate break;
3349*0Sstevel@tonic-gate case MINMOD:
3350*0Sstevel@tonic-gate minmod = 1;
3351*0Sstevel@tonic-gate break;
3352*0Sstevel@tonic-gate case CURLYM:
3353*0Sstevel@tonic-gate {
3354*0Sstevel@tonic-gate I32 l = 0;
3355*0Sstevel@tonic-gate CHECKPOINT lastcp;
3356*0Sstevel@tonic-gate
3357*0Sstevel@tonic-gate /* We suppose that the next guy does not need
3358*0Sstevel@tonic-gate backtracking: in particular, it is of constant length,
3359*0Sstevel@tonic-gate and has no parenths to influence future backrefs. */
3360*0Sstevel@tonic-gate ln = ARG1(scan); /* min to match */
3361*0Sstevel@tonic-gate n = ARG2(scan); /* max to match */
3362*0Sstevel@tonic-gate paren = scan->flags;
3363*0Sstevel@tonic-gate if (paren) {
3364*0Sstevel@tonic-gate if (paren > PL_regsize)
3365*0Sstevel@tonic-gate PL_regsize = paren;
3366*0Sstevel@tonic-gate if (paren > (I32)*PL_reglastparen)
3367*0Sstevel@tonic-gate *PL_reglastparen = paren;
3368*0Sstevel@tonic-gate }
3369*0Sstevel@tonic-gate scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3370*0Sstevel@tonic-gate if (paren)
3371*0Sstevel@tonic-gate scan += NEXT_OFF(scan); /* Skip former OPEN. */
3372*0Sstevel@tonic-gate PL_reginput = locinput;
3373*0Sstevel@tonic-gate if (minmod) {
3374*0Sstevel@tonic-gate minmod = 0;
3375*0Sstevel@tonic-gate if (ln && regrepeat_hard(scan, ln, &l) < ln)
3376*0Sstevel@tonic-gate sayNO;
3377*0Sstevel@tonic-gate /* if we matched something zero-length we don't need to
3378*0Sstevel@tonic-gate backtrack - capturing parens are already defined, so
3379*0Sstevel@tonic-gate the caveat in the maximal case doesn't apply
3380*0Sstevel@tonic-gate
3381*0Sstevel@tonic-gate XXXX if ln == 0, we can redo this check first time
3382*0Sstevel@tonic-gate through the following loop
3383*0Sstevel@tonic-gate */
3384*0Sstevel@tonic-gate if (ln && l == 0)
3385*0Sstevel@tonic-gate n = ln; /* don't backtrack */
3386*0Sstevel@tonic-gate locinput = PL_reginput;
3387*0Sstevel@tonic-gate if (HAS_TEXT(next) || JUMPABLE(next)) {
3388*0Sstevel@tonic-gate regnode *text_node = next;
3389*0Sstevel@tonic-gate
3390*0Sstevel@tonic-gate if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3391*0Sstevel@tonic-gate
3392*0Sstevel@tonic-gate if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3393*0Sstevel@tonic-gate else {
3394*0Sstevel@tonic-gate if (PL_regkind[(U8)OP(text_node)] == REF) {
3395*0Sstevel@tonic-gate c1 = c2 = -1000;
3396*0Sstevel@tonic-gate goto assume_ok_MM;
3397*0Sstevel@tonic-gate }
3398*0Sstevel@tonic-gate else { c1 = (U8)*STRING(text_node); }
3399*0Sstevel@tonic-gate if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3400*0Sstevel@tonic-gate c2 = PL_fold[c1];
3401*0Sstevel@tonic-gate else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3402*0Sstevel@tonic-gate c2 = PL_fold_locale[c1];
3403*0Sstevel@tonic-gate else
3404*0Sstevel@tonic-gate c2 = c1;
3405*0Sstevel@tonic-gate }
3406*0Sstevel@tonic-gate }
3407*0Sstevel@tonic-gate else
3408*0Sstevel@tonic-gate c1 = c2 = -1000;
3409*0Sstevel@tonic-gate assume_ok_MM:
3410*0Sstevel@tonic-gate REGCP_SET(lastcp);
3411*0Sstevel@tonic-gate /* This may be improved if l == 0. */
3412*0Sstevel@tonic-gate while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3413*0Sstevel@tonic-gate /* If it could work, try it. */
3414*0Sstevel@tonic-gate if (c1 == -1000 ||
3415*0Sstevel@tonic-gate UCHARAT(PL_reginput) == c1 ||
3416*0Sstevel@tonic-gate UCHARAT(PL_reginput) == c2)
3417*0Sstevel@tonic-gate {
3418*0Sstevel@tonic-gate if (paren) {
3419*0Sstevel@tonic-gate if (ln) {
3420*0Sstevel@tonic-gate PL_regstartp[paren] =
3421*0Sstevel@tonic-gate HOPc(PL_reginput, -l) - PL_bostr;
3422*0Sstevel@tonic-gate PL_regendp[paren] = PL_reginput - PL_bostr;
3423*0Sstevel@tonic-gate }
3424*0Sstevel@tonic-gate else
3425*0Sstevel@tonic-gate PL_regendp[paren] = -1;
3426*0Sstevel@tonic-gate }
3427*0Sstevel@tonic-gate if (regmatch(next))
3428*0Sstevel@tonic-gate sayYES;
3429*0Sstevel@tonic-gate REGCP_UNWIND(lastcp);
3430*0Sstevel@tonic-gate }
3431*0Sstevel@tonic-gate /* Couldn't or didn't -- move forward. */
3432*0Sstevel@tonic-gate PL_reginput = locinput;
3433*0Sstevel@tonic-gate if (regrepeat_hard(scan, 1, &l)) {
3434*0Sstevel@tonic-gate ln++;
3435*0Sstevel@tonic-gate locinput = PL_reginput;
3436*0Sstevel@tonic-gate }
3437*0Sstevel@tonic-gate else
3438*0Sstevel@tonic-gate sayNO;
3439*0Sstevel@tonic-gate }
3440*0Sstevel@tonic-gate }
3441*0Sstevel@tonic-gate else {
3442*0Sstevel@tonic-gate n = regrepeat_hard(scan, n, &l);
3443*0Sstevel@tonic-gate /* if we matched something zero-length we don't need to
3444*0Sstevel@tonic-gate backtrack, unless the minimum count is zero and we
3445*0Sstevel@tonic-gate are capturing the result - in that case the capture
3446*0Sstevel@tonic-gate being defined or not may affect later execution
3447*0Sstevel@tonic-gate */
3448*0Sstevel@tonic-gate if (n != 0 && l == 0 && !(paren && ln == 0))
3449*0Sstevel@tonic-gate ln = n; /* don't backtrack */
3450*0Sstevel@tonic-gate locinput = PL_reginput;
3451*0Sstevel@tonic-gate DEBUG_r(
3452*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
3453*0Sstevel@tonic-gate "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3454*0Sstevel@tonic-gate (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3455*0Sstevel@tonic-gate (IV) n, (IV)l)
3456*0Sstevel@tonic-gate );
3457*0Sstevel@tonic-gate if (n >= ln) {
3458*0Sstevel@tonic-gate if (HAS_TEXT(next) || JUMPABLE(next)) {
3459*0Sstevel@tonic-gate regnode *text_node = next;
3460*0Sstevel@tonic-gate
3461*0Sstevel@tonic-gate if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3462*0Sstevel@tonic-gate
3463*0Sstevel@tonic-gate if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3464*0Sstevel@tonic-gate else {
3465*0Sstevel@tonic-gate if (PL_regkind[(U8)OP(text_node)] == REF) {
3466*0Sstevel@tonic-gate c1 = c2 = -1000;
3467*0Sstevel@tonic-gate goto assume_ok_REG;
3468*0Sstevel@tonic-gate }
3469*0Sstevel@tonic-gate else { c1 = (U8)*STRING(text_node); }
3470*0Sstevel@tonic-gate
3471*0Sstevel@tonic-gate if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3472*0Sstevel@tonic-gate c2 = PL_fold[c1];
3473*0Sstevel@tonic-gate else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3474*0Sstevel@tonic-gate c2 = PL_fold_locale[c1];
3475*0Sstevel@tonic-gate else
3476*0Sstevel@tonic-gate c2 = c1;
3477*0Sstevel@tonic-gate }
3478*0Sstevel@tonic-gate }
3479*0Sstevel@tonic-gate else
3480*0Sstevel@tonic-gate c1 = c2 = -1000;
3481*0Sstevel@tonic-gate }
3482*0Sstevel@tonic-gate assume_ok_REG:
3483*0Sstevel@tonic-gate REGCP_SET(lastcp);
3484*0Sstevel@tonic-gate while (n >= ln) {
3485*0Sstevel@tonic-gate /* If it could work, try it. */
3486*0Sstevel@tonic-gate if (c1 == -1000 ||
3487*0Sstevel@tonic-gate UCHARAT(PL_reginput) == c1 ||
3488*0Sstevel@tonic-gate UCHARAT(PL_reginput) == c2)
3489*0Sstevel@tonic-gate {
3490*0Sstevel@tonic-gate DEBUG_r(
3491*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
3492*0Sstevel@tonic-gate "%*s trying tail with n=%"IVdf"...\n",
3493*0Sstevel@tonic-gate (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3494*0Sstevel@tonic-gate );
3495*0Sstevel@tonic-gate if (paren) {
3496*0Sstevel@tonic-gate if (n) {
3497*0Sstevel@tonic-gate PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3498*0Sstevel@tonic-gate PL_regendp[paren] = PL_reginput - PL_bostr;
3499*0Sstevel@tonic-gate }
3500*0Sstevel@tonic-gate else
3501*0Sstevel@tonic-gate PL_regendp[paren] = -1;
3502*0Sstevel@tonic-gate }
3503*0Sstevel@tonic-gate if (regmatch(next))
3504*0Sstevel@tonic-gate sayYES;
3505*0Sstevel@tonic-gate REGCP_UNWIND(lastcp);
3506*0Sstevel@tonic-gate }
3507*0Sstevel@tonic-gate /* Couldn't or didn't -- back up. */
3508*0Sstevel@tonic-gate n--;
3509*0Sstevel@tonic-gate locinput = HOPc(locinput, -l);
3510*0Sstevel@tonic-gate PL_reginput = locinput;
3511*0Sstevel@tonic-gate }
3512*0Sstevel@tonic-gate }
3513*0Sstevel@tonic-gate sayNO;
3514*0Sstevel@tonic-gate break;
3515*0Sstevel@tonic-gate }
3516*0Sstevel@tonic-gate case CURLYN:
3517*0Sstevel@tonic-gate paren = scan->flags; /* Which paren to set */
3518*0Sstevel@tonic-gate if (paren > PL_regsize)
3519*0Sstevel@tonic-gate PL_regsize = paren;
3520*0Sstevel@tonic-gate if (paren > (I32)*PL_reglastparen)
3521*0Sstevel@tonic-gate *PL_reglastparen = paren;
3522*0Sstevel@tonic-gate ln = ARG1(scan); /* min to match */
3523*0Sstevel@tonic-gate n = ARG2(scan); /* max to match */
3524*0Sstevel@tonic-gate scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3525*0Sstevel@tonic-gate goto repeat;
3526*0Sstevel@tonic-gate case CURLY:
3527*0Sstevel@tonic-gate paren = 0;
3528*0Sstevel@tonic-gate ln = ARG1(scan); /* min to match */
3529*0Sstevel@tonic-gate n = ARG2(scan); /* max to match */
3530*0Sstevel@tonic-gate scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3531*0Sstevel@tonic-gate goto repeat;
3532*0Sstevel@tonic-gate case STAR:
3533*0Sstevel@tonic-gate ln = 0;
3534*0Sstevel@tonic-gate n = REG_INFTY;
3535*0Sstevel@tonic-gate scan = NEXTOPER(scan);
3536*0Sstevel@tonic-gate paren = 0;
3537*0Sstevel@tonic-gate goto repeat;
3538*0Sstevel@tonic-gate case PLUS:
3539*0Sstevel@tonic-gate ln = 1;
3540*0Sstevel@tonic-gate n = REG_INFTY;
3541*0Sstevel@tonic-gate scan = NEXTOPER(scan);
3542*0Sstevel@tonic-gate paren = 0;
3543*0Sstevel@tonic-gate repeat:
3544*0Sstevel@tonic-gate /*
3545*0Sstevel@tonic-gate * Lookahead to avoid useless match attempts
3546*0Sstevel@tonic-gate * when we know what character comes next.
3547*0Sstevel@tonic-gate */
3548*0Sstevel@tonic-gate
3549*0Sstevel@tonic-gate /*
3550*0Sstevel@tonic-gate * Used to only do .*x and .*?x, but now it allows
3551*0Sstevel@tonic-gate * for )'s, ('s and (?{ ... })'s to be in the way
3552*0Sstevel@tonic-gate * of the quantifier and the EXACT-like node. -- japhy
3553*0Sstevel@tonic-gate */
3554*0Sstevel@tonic-gate
3555*0Sstevel@tonic-gate if (HAS_TEXT(next) || JUMPABLE(next)) {
3556*0Sstevel@tonic-gate U8 *s;
3557*0Sstevel@tonic-gate regnode *text_node = next;
3558*0Sstevel@tonic-gate
3559*0Sstevel@tonic-gate if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3560*0Sstevel@tonic-gate
3561*0Sstevel@tonic-gate if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3562*0Sstevel@tonic-gate else {
3563*0Sstevel@tonic-gate if (PL_regkind[(U8)OP(text_node)] == REF) {
3564*0Sstevel@tonic-gate c1 = c2 = -1000;
3565*0Sstevel@tonic-gate goto assume_ok_easy;
3566*0Sstevel@tonic-gate }
3567*0Sstevel@tonic-gate else { s = (U8*)STRING(text_node); }
3568*0Sstevel@tonic-gate
3569*0Sstevel@tonic-gate if (!UTF) {
3570*0Sstevel@tonic-gate c2 = c1 = *s;
3571*0Sstevel@tonic-gate if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3572*0Sstevel@tonic-gate c2 = PL_fold[c1];
3573*0Sstevel@tonic-gate else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3574*0Sstevel@tonic-gate c2 = PL_fold_locale[c1];
3575*0Sstevel@tonic-gate }
3576*0Sstevel@tonic-gate else { /* UTF */
3577*0Sstevel@tonic-gate if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
3578*0Sstevel@tonic-gate STRLEN ulen1, ulen2;
3579*0Sstevel@tonic-gate U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3580*0Sstevel@tonic-gate U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
3581*0Sstevel@tonic-gate
3582*0Sstevel@tonic-gate to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3583*0Sstevel@tonic-gate to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3584*0Sstevel@tonic-gate
3585*0Sstevel@tonic-gate c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXLEN, 0,
3586*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ?
3587*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY);
3588*0Sstevel@tonic-gate c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXLEN, 0,
3589*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ?
3590*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY);
3591*0Sstevel@tonic-gate }
3592*0Sstevel@tonic-gate else {
3593*0Sstevel@tonic-gate c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXLEN, 0,
3594*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ?
3595*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY);
3596*0Sstevel@tonic-gate }
3597*0Sstevel@tonic-gate }
3598*0Sstevel@tonic-gate }
3599*0Sstevel@tonic-gate }
3600*0Sstevel@tonic-gate else
3601*0Sstevel@tonic-gate c1 = c2 = -1000;
3602*0Sstevel@tonic-gate assume_ok_easy:
3603*0Sstevel@tonic-gate PL_reginput = locinput;
3604*0Sstevel@tonic-gate if (minmod) {
3605*0Sstevel@tonic-gate CHECKPOINT lastcp;
3606*0Sstevel@tonic-gate minmod = 0;
3607*0Sstevel@tonic-gate if (ln && regrepeat(scan, ln) < ln)
3608*0Sstevel@tonic-gate sayNO;
3609*0Sstevel@tonic-gate locinput = PL_reginput;
3610*0Sstevel@tonic-gate REGCP_SET(lastcp);
3611*0Sstevel@tonic-gate if (c1 != -1000) {
3612*0Sstevel@tonic-gate char *e; /* Should not check after this */
3613*0Sstevel@tonic-gate char *old = locinput;
3614*0Sstevel@tonic-gate int count = 0;
3615*0Sstevel@tonic-gate
3616*0Sstevel@tonic-gate if (n == REG_INFTY) {
3617*0Sstevel@tonic-gate e = PL_regeol - 1;
3618*0Sstevel@tonic-gate if (do_utf8)
3619*0Sstevel@tonic-gate while (UTF8_IS_CONTINUATION(*(U8*)e))
3620*0Sstevel@tonic-gate e--;
3621*0Sstevel@tonic-gate }
3622*0Sstevel@tonic-gate else if (do_utf8) {
3623*0Sstevel@tonic-gate int m = n - ln;
3624*0Sstevel@tonic-gate for (e = locinput;
3625*0Sstevel@tonic-gate m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3626*0Sstevel@tonic-gate e += UTF8SKIP(e);
3627*0Sstevel@tonic-gate }
3628*0Sstevel@tonic-gate else {
3629*0Sstevel@tonic-gate e = locinput + n - ln;
3630*0Sstevel@tonic-gate if (e >= PL_regeol)
3631*0Sstevel@tonic-gate e = PL_regeol - 1;
3632*0Sstevel@tonic-gate }
3633*0Sstevel@tonic-gate while (1) {
3634*0Sstevel@tonic-gate /* Find place 'next' could work */
3635*0Sstevel@tonic-gate if (!do_utf8) {
3636*0Sstevel@tonic-gate if (c1 == c2) {
3637*0Sstevel@tonic-gate while (locinput <= e &&
3638*0Sstevel@tonic-gate UCHARAT(locinput) != c1)
3639*0Sstevel@tonic-gate locinput++;
3640*0Sstevel@tonic-gate } else {
3641*0Sstevel@tonic-gate while (locinput <= e
3642*0Sstevel@tonic-gate && UCHARAT(locinput) != c1
3643*0Sstevel@tonic-gate && UCHARAT(locinput) != c2)
3644*0Sstevel@tonic-gate locinput++;
3645*0Sstevel@tonic-gate }
3646*0Sstevel@tonic-gate count = locinput - old;
3647*0Sstevel@tonic-gate }
3648*0Sstevel@tonic-gate else {
3649*0Sstevel@tonic-gate STRLEN len;
3650*0Sstevel@tonic-gate if (c1 == c2) {
3651*0Sstevel@tonic-gate /* count initialised to
3652*0Sstevel@tonic-gate * utf8_distance(old, locinput) */
3653*0Sstevel@tonic-gate while (locinput <= e &&
3654*0Sstevel@tonic-gate utf8n_to_uvchr((U8*)locinput,
3655*0Sstevel@tonic-gate UTF8_MAXLEN, &len,
3656*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ?
3657*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY) != (UV)c1) {
3658*0Sstevel@tonic-gate locinput += len;
3659*0Sstevel@tonic-gate count++;
3660*0Sstevel@tonic-gate }
3661*0Sstevel@tonic-gate } else {
3662*0Sstevel@tonic-gate /* count initialised to
3663*0Sstevel@tonic-gate * utf8_distance(old, locinput) */
3664*0Sstevel@tonic-gate while (locinput <= e) {
3665*0Sstevel@tonic-gate UV c = utf8n_to_uvchr((U8*)locinput,
3666*0Sstevel@tonic-gate UTF8_MAXLEN, &len,
3667*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ?
3668*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY);
3669*0Sstevel@tonic-gate if (c == (UV)c1 || c == (UV)c2)
3670*0Sstevel@tonic-gate break;
3671*0Sstevel@tonic-gate locinput += len;
3672*0Sstevel@tonic-gate count++;
3673*0Sstevel@tonic-gate }
3674*0Sstevel@tonic-gate }
3675*0Sstevel@tonic-gate }
3676*0Sstevel@tonic-gate if (locinput > e)
3677*0Sstevel@tonic-gate sayNO;
3678*0Sstevel@tonic-gate /* PL_reginput == old now */
3679*0Sstevel@tonic-gate if (locinput != old) {
3680*0Sstevel@tonic-gate ln = 1; /* Did some */
3681*0Sstevel@tonic-gate if (regrepeat(scan, count) < count)
3682*0Sstevel@tonic-gate sayNO;
3683*0Sstevel@tonic-gate }
3684*0Sstevel@tonic-gate /* PL_reginput == locinput now */
3685*0Sstevel@tonic-gate TRYPAREN(paren, ln, locinput);
3686*0Sstevel@tonic-gate PL_reginput = locinput; /* Could be reset... */
3687*0Sstevel@tonic-gate REGCP_UNWIND(lastcp);
3688*0Sstevel@tonic-gate /* Couldn't or didn't -- move forward. */
3689*0Sstevel@tonic-gate old = locinput;
3690*0Sstevel@tonic-gate if (do_utf8)
3691*0Sstevel@tonic-gate locinput += UTF8SKIP(locinput);
3692*0Sstevel@tonic-gate else
3693*0Sstevel@tonic-gate locinput++;
3694*0Sstevel@tonic-gate count = 1;
3695*0Sstevel@tonic-gate }
3696*0Sstevel@tonic-gate }
3697*0Sstevel@tonic-gate else
3698*0Sstevel@tonic-gate while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3699*0Sstevel@tonic-gate UV c;
3700*0Sstevel@tonic-gate if (c1 != -1000) {
3701*0Sstevel@tonic-gate if (do_utf8)
3702*0Sstevel@tonic-gate c = utf8n_to_uvchr((U8*)PL_reginput,
3703*0Sstevel@tonic-gate UTF8_MAXLEN, 0,
3704*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ?
3705*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY);
3706*0Sstevel@tonic-gate else
3707*0Sstevel@tonic-gate c = UCHARAT(PL_reginput);
3708*0Sstevel@tonic-gate /* If it could work, try it. */
3709*0Sstevel@tonic-gate if (c == (UV)c1 || c == (UV)c2)
3710*0Sstevel@tonic-gate {
3711*0Sstevel@tonic-gate TRYPAREN(paren, ln, PL_reginput);
3712*0Sstevel@tonic-gate REGCP_UNWIND(lastcp);
3713*0Sstevel@tonic-gate }
3714*0Sstevel@tonic-gate }
3715*0Sstevel@tonic-gate /* If it could work, try it. */
3716*0Sstevel@tonic-gate else if (c1 == -1000)
3717*0Sstevel@tonic-gate {
3718*0Sstevel@tonic-gate TRYPAREN(paren, ln, PL_reginput);
3719*0Sstevel@tonic-gate REGCP_UNWIND(lastcp);
3720*0Sstevel@tonic-gate }
3721*0Sstevel@tonic-gate /* Couldn't or didn't -- move forward. */
3722*0Sstevel@tonic-gate PL_reginput = locinput;
3723*0Sstevel@tonic-gate if (regrepeat(scan, 1)) {
3724*0Sstevel@tonic-gate ln++;
3725*0Sstevel@tonic-gate locinput = PL_reginput;
3726*0Sstevel@tonic-gate }
3727*0Sstevel@tonic-gate else
3728*0Sstevel@tonic-gate sayNO;
3729*0Sstevel@tonic-gate }
3730*0Sstevel@tonic-gate }
3731*0Sstevel@tonic-gate else {
3732*0Sstevel@tonic-gate CHECKPOINT lastcp;
3733*0Sstevel@tonic-gate n = regrepeat(scan, n);
3734*0Sstevel@tonic-gate locinput = PL_reginput;
3735*0Sstevel@tonic-gate if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3736*0Sstevel@tonic-gate ((!PL_multiline && OP(next) != MEOL) ||
3737*0Sstevel@tonic-gate OP(next) == SEOL || OP(next) == EOS))
3738*0Sstevel@tonic-gate {
3739*0Sstevel@tonic-gate ln = n; /* why back off? */
3740*0Sstevel@tonic-gate /* ...because $ and \Z can match before *and* after
3741*0Sstevel@tonic-gate newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3742*0Sstevel@tonic-gate We should back off by one in this case. */
3743*0Sstevel@tonic-gate if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3744*0Sstevel@tonic-gate ln--;
3745*0Sstevel@tonic-gate }
3746*0Sstevel@tonic-gate REGCP_SET(lastcp);
3747*0Sstevel@tonic-gate if (paren) {
3748*0Sstevel@tonic-gate UV c = 0;
3749*0Sstevel@tonic-gate while (n >= ln) {
3750*0Sstevel@tonic-gate if (c1 != -1000) {
3751*0Sstevel@tonic-gate if (do_utf8)
3752*0Sstevel@tonic-gate c = utf8n_to_uvchr((U8*)PL_reginput,
3753*0Sstevel@tonic-gate UTF8_MAXLEN, 0,
3754*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ?
3755*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY);
3756*0Sstevel@tonic-gate else
3757*0Sstevel@tonic-gate c = UCHARAT(PL_reginput);
3758*0Sstevel@tonic-gate }
3759*0Sstevel@tonic-gate /* If it could work, try it. */
3760*0Sstevel@tonic-gate if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
3761*0Sstevel@tonic-gate {
3762*0Sstevel@tonic-gate TRYPAREN(paren, n, PL_reginput);
3763*0Sstevel@tonic-gate REGCP_UNWIND(lastcp);
3764*0Sstevel@tonic-gate }
3765*0Sstevel@tonic-gate /* Couldn't or didn't -- back up. */
3766*0Sstevel@tonic-gate n--;
3767*0Sstevel@tonic-gate PL_reginput = locinput = HOPc(locinput, -1);
3768*0Sstevel@tonic-gate }
3769*0Sstevel@tonic-gate }
3770*0Sstevel@tonic-gate else {
3771*0Sstevel@tonic-gate UV c = 0;
3772*0Sstevel@tonic-gate while (n >= ln) {
3773*0Sstevel@tonic-gate if (c1 != -1000) {
3774*0Sstevel@tonic-gate if (do_utf8)
3775*0Sstevel@tonic-gate c = utf8n_to_uvchr((U8*)PL_reginput,
3776*0Sstevel@tonic-gate UTF8_MAXLEN, 0,
3777*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ?
3778*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY);
3779*0Sstevel@tonic-gate else
3780*0Sstevel@tonic-gate c = UCHARAT(PL_reginput);
3781*0Sstevel@tonic-gate }
3782*0Sstevel@tonic-gate /* If it could work, try it. */
3783*0Sstevel@tonic-gate if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
3784*0Sstevel@tonic-gate {
3785*0Sstevel@tonic-gate TRYPAREN(paren, n, PL_reginput);
3786*0Sstevel@tonic-gate REGCP_UNWIND(lastcp);
3787*0Sstevel@tonic-gate }
3788*0Sstevel@tonic-gate /* Couldn't or didn't -- back up. */
3789*0Sstevel@tonic-gate n--;
3790*0Sstevel@tonic-gate PL_reginput = locinput = HOPc(locinput, -1);
3791*0Sstevel@tonic-gate }
3792*0Sstevel@tonic-gate }
3793*0Sstevel@tonic-gate }
3794*0Sstevel@tonic-gate sayNO;
3795*0Sstevel@tonic-gate break;
3796*0Sstevel@tonic-gate case END:
3797*0Sstevel@tonic-gate if (PL_reg_call_cc) {
3798*0Sstevel@tonic-gate re_cc_state *cur_call_cc = PL_reg_call_cc;
3799*0Sstevel@tonic-gate CURCUR *cctmp = PL_regcc;
3800*0Sstevel@tonic-gate regexp *re = PL_reg_re;
3801*0Sstevel@tonic-gate CHECKPOINT cp, lastcp;
3802*0Sstevel@tonic-gate
3803*0Sstevel@tonic-gate cp = regcppush(0); /* Save *all* the positions. */
3804*0Sstevel@tonic-gate REGCP_SET(lastcp);
3805*0Sstevel@tonic-gate regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3806*0Sstevel@tonic-gate the caller. */
3807*0Sstevel@tonic-gate PL_reginput = locinput; /* Make position available to
3808*0Sstevel@tonic-gate the callcc. */
3809*0Sstevel@tonic-gate cache_re(PL_reg_call_cc->re);
3810*0Sstevel@tonic-gate PL_regcc = PL_reg_call_cc->cc;
3811*0Sstevel@tonic-gate PL_reg_call_cc = PL_reg_call_cc->prev;
3812*0Sstevel@tonic-gate if (regmatch(cur_call_cc->node)) {
3813*0Sstevel@tonic-gate PL_reg_call_cc = cur_call_cc;
3814*0Sstevel@tonic-gate regcpblow(cp);
3815*0Sstevel@tonic-gate sayYES;
3816*0Sstevel@tonic-gate }
3817*0Sstevel@tonic-gate REGCP_UNWIND(lastcp);
3818*0Sstevel@tonic-gate regcppop();
3819*0Sstevel@tonic-gate PL_reg_call_cc = cur_call_cc;
3820*0Sstevel@tonic-gate PL_regcc = cctmp;
3821*0Sstevel@tonic-gate PL_reg_re = re;
3822*0Sstevel@tonic-gate cache_re(re);
3823*0Sstevel@tonic-gate
3824*0Sstevel@tonic-gate DEBUG_r(
3825*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
3826*0Sstevel@tonic-gate "%*s continuation failed...\n",
3827*0Sstevel@tonic-gate REPORT_CODE_OFF+PL_regindent*2, "")
3828*0Sstevel@tonic-gate );
3829*0Sstevel@tonic-gate sayNO_SILENT;
3830*0Sstevel@tonic-gate }
3831*0Sstevel@tonic-gate if (locinput < PL_regtill) {
3832*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log,
3833*0Sstevel@tonic-gate "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3834*0Sstevel@tonic-gate PL_colors[4],
3835*0Sstevel@tonic-gate (long)(locinput - PL_reg_starttry),
3836*0Sstevel@tonic-gate (long)(PL_regtill - PL_reg_starttry),
3837*0Sstevel@tonic-gate PL_colors[5]));
3838*0Sstevel@tonic-gate sayNO_FINAL; /* Cannot match: too short. */
3839*0Sstevel@tonic-gate }
3840*0Sstevel@tonic-gate PL_reginput = locinput; /* put where regtry can find it */
3841*0Sstevel@tonic-gate sayYES_FINAL; /* Success! */
3842*0Sstevel@tonic-gate case SUCCEED:
3843*0Sstevel@tonic-gate PL_reginput = locinput; /* put where regtry can find it */
3844*0Sstevel@tonic-gate sayYES_LOUD; /* Success! */
3845*0Sstevel@tonic-gate case SUSPEND:
3846*0Sstevel@tonic-gate n = 1;
3847*0Sstevel@tonic-gate PL_reginput = locinput;
3848*0Sstevel@tonic-gate goto do_ifmatch;
3849*0Sstevel@tonic-gate case UNLESSM:
3850*0Sstevel@tonic-gate n = 0;
3851*0Sstevel@tonic-gate if (scan->flags) {
3852*0Sstevel@tonic-gate s = HOPBACKc(locinput, scan->flags);
3853*0Sstevel@tonic-gate if (!s)
3854*0Sstevel@tonic-gate goto say_yes;
3855*0Sstevel@tonic-gate PL_reginput = s;
3856*0Sstevel@tonic-gate }
3857*0Sstevel@tonic-gate else
3858*0Sstevel@tonic-gate PL_reginput = locinput;
3859*0Sstevel@tonic-gate PL_reg_flags ^= RF_false;
3860*0Sstevel@tonic-gate goto do_ifmatch;
3861*0Sstevel@tonic-gate case IFMATCH:
3862*0Sstevel@tonic-gate n = 1;
3863*0Sstevel@tonic-gate if (scan->flags) {
3864*0Sstevel@tonic-gate s = HOPBACKc(locinput, scan->flags);
3865*0Sstevel@tonic-gate if (!s)
3866*0Sstevel@tonic-gate goto say_no;
3867*0Sstevel@tonic-gate PL_reginput = s;
3868*0Sstevel@tonic-gate }
3869*0Sstevel@tonic-gate else
3870*0Sstevel@tonic-gate PL_reginput = locinput;
3871*0Sstevel@tonic-gate
3872*0Sstevel@tonic-gate do_ifmatch:
3873*0Sstevel@tonic-gate inner = NEXTOPER(NEXTOPER(scan));
3874*0Sstevel@tonic-gate if (regmatch(inner) != n) {
3875*0Sstevel@tonic-gate if (n == 0)
3876*0Sstevel@tonic-gate PL_reg_flags ^= RF_false;
3877*0Sstevel@tonic-gate say_no:
3878*0Sstevel@tonic-gate if (logical) {
3879*0Sstevel@tonic-gate logical = 0;
3880*0Sstevel@tonic-gate sw = 0;
3881*0Sstevel@tonic-gate goto do_longjump;
3882*0Sstevel@tonic-gate }
3883*0Sstevel@tonic-gate else
3884*0Sstevel@tonic-gate sayNO;
3885*0Sstevel@tonic-gate }
3886*0Sstevel@tonic-gate if (n == 0)
3887*0Sstevel@tonic-gate PL_reg_flags ^= RF_false;
3888*0Sstevel@tonic-gate say_yes:
3889*0Sstevel@tonic-gate if (logical) {
3890*0Sstevel@tonic-gate logical = 0;
3891*0Sstevel@tonic-gate sw = 1;
3892*0Sstevel@tonic-gate }
3893*0Sstevel@tonic-gate if (OP(scan) == SUSPEND) {
3894*0Sstevel@tonic-gate locinput = PL_reginput;
3895*0Sstevel@tonic-gate nextchr = UCHARAT(locinput);
3896*0Sstevel@tonic-gate }
3897*0Sstevel@tonic-gate /* FALL THROUGH. */
3898*0Sstevel@tonic-gate case LONGJMP:
3899*0Sstevel@tonic-gate do_longjump:
3900*0Sstevel@tonic-gate next = scan + ARG(scan);
3901*0Sstevel@tonic-gate if (next == scan)
3902*0Sstevel@tonic-gate next = NULL;
3903*0Sstevel@tonic-gate break;
3904*0Sstevel@tonic-gate default:
3905*0Sstevel@tonic-gate PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3906*0Sstevel@tonic-gate PTR2UV(scan), OP(scan));
3907*0Sstevel@tonic-gate Perl_croak(aTHX_ "regexp memory corruption");
3908*0Sstevel@tonic-gate }
3909*0Sstevel@tonic-gate reenter:
3910*0Sstevel@tonic-gate scan = next;
3911*0Sstevel@tonic-gate }
3912*0Sstevel@tonic-gate
3913*0Sstevel@tonic-gate /*
3914*0Sstevel@tonic-gate * We get here only if there's trouble -- normally "case END" is
3915*0Sstevel@tonic-gate * the terminating point.
3916*0Sstevel@tonic-gate */
3917*0Sstevel@tonic-gate Perl_croak(aTHX_ "corrupted regexp pointers");
3918*0Sstevel@tonic-gate /*NOTREACHED*/
3919*0Sstevel@tonic-gate sayNO;
3920*0Sstevel@tonic-gate
3921*0Sstevel@tonic-gate yes_loud:
3922*0Sstevel@tonic-gate DEBUG_r(
3923*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
3924*0Sstevel@tonic-gate "%*s %scould match...%s\n",
3925*0Sstevel@tonic-gate REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3926*0Sstevel@tonic-gate );
3927*0Sstevel@tonic-gate goto yes;
3928*0Sstevel@tonic-gate yes_final:
3929*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3930*0Sstevel@tonic-gate PL_colors[4],PL_colors[5]));
3931*0Sstevel@tonic-gate yes:
3932*0Sstevel@tonic-gate #ifdef DEBUGGING
3933*0Sstevel@tonic-gate PL_regindent--;
3934*0Sstevel@tonic-gate #endif
3935*0Sstevel@tonic-gate
3936*0Sstevel@tonic-gate #if 0 /* Breaks $^R */
3937*0Sstevel@tonic-gate if (unwind)
3938*0Sstevel@tonic-gate regcpblow(firstcp);
3939*0Sstevel@tonic-gate #endif
3940*0Sstevel@tonic-gate return 1;
3941*0Sstevel@tonic-gate
3942*0Sstevel@tonic-gate no:
3943*0Sstevel@tonic-gate DEBUG_r(
3944*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
3945*0Sstevel@tonic-gate "%*s %sfailed...%s\n",
3946*0Sstevel@tonic-gate REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3947*0Sstevel@tonic-gate );
3948*0Sstevel@tonic-gate goto do_no;
3949*0Sstevel@tonic-gate no_final:
3950*0Sstevel@tonic-gate do_no:
3951*0Sstevel@tonic-gate if (unwind) {
3952*0Sstevel@tonic-gate re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3953*0Sstevel@tonic-gate
3954*0Sstevel@tonic-gate switch (uw->type) {
3955*0Sstevel@tonic-gate case RE_UNWIND_BRANCH:
3956*0Sstevel@tonic-gate case RE_UNWIND_BRANCHJ:
3957*0Sstevel@tonic-gate {
3958*0Sstevel@tonic-gate re_unwind_branch_t *uwb = &(uw->branch);
3959*0Sstevel@tonic-gate I32 lastparen = uwb->lastparen;
3960*0Sstevel@tonic-gate
3961*0Sstevel@tonic-gate REGCP_UNWIND(uwb->lastcp);
3962*0Sstevel@tonic-gate for (n = *PL_reglastparen; n > lastparen; n--)
3963*0Sstevel@tonic-gate PL_regendp[n] = -1;
3964*0Sstevel@tonic-gate *PL_reglastparen = n;
3965*0Sstevel@tonic-gate scan = next = uwb->next;
3966*0Sstevel@tonic-gate if ( !scan ||
3967*0Sstevel@tonic-gate OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3968*0Sstevel@tonic-gate ? BRANCH : BRANCHJ) ) { /* Failure */
3969*0Sstevel@tonic-gate unwind = uwb->prev;
3970*0Sstevel@tonic-gate #ifdef DEBUGGING
3971*0Sstevel@tonic-gate PL_regindent--;
3972*0Sstevel@tonic-gate #endif
3973*0Sstevel@tonic-gate goto do_no;
3974*0Sstevel@tonic-gate }
3975*0Sstevel@tonic-gate /* Have more choice yet. Reuse the same uwb. */
3976*0Sstevel@tonic-gate /*SUPPRESS 560*/
3977*0Sstevel@tonic-gate if ((n = (uwb->type == RE_UNWIND_BRANCH
3978*0Sstevel@tonic-gate ? NEXT_OFF(next) : ARG(next))))
3979*0Sstevel@tonic-gate next += n;
3980*0Sstevel@tonic-gate else
3981*0Sstevel@tonic-gate next = NULL; /* XXXX Needn't unwinding in this case... */
3982*0Sstevel@tonic-gate uwb->next = next;
3983*0Sstevel@tonic-gate next = NEXTOPER(scan);
3984*0Sstevel@tonic-gate if (uwb->type == RE_UNWIND_BRANCHJ)
3985*0Sstevel@tonic-gate next = NEXTOPER(next);
3986*0Sstevel@tonic-gate locinput = uwb->locinput;
3987*0Sstevel@tonic-gate nextchr = uwb->nextchr;
3988*0Sstevel@tonic-gate #ifdef DEBUGGING
3989*0Sstevel@tonic-gate PL_regindent = uwb->regindent;
3990*0Sstevel@tonic-gate #endif
3991*0Sstevel@tonic-gate
3992*0Sstevel@tonic-gate goto reenter;
3993*0Sstevel@tonic-gate }
3994*0Sstevel@tonic-gate /* NOT REACHED */
3995*0Sstevel@tonic-gate default:
3996*0Sstevel@tonic-gate Perl_croak(aTHX_ "regexp unwind memory corruption");
3997*0Sstevel@tonic-gate }
3998*0Sstevel@tonic-gate /* NOT REACHED */
3999*0Sstevel@tonic-gate }
4000*0Sstevel@tonic-gate #ifdef DEBUGGING
4001*0Sstevel@tonic-gate PL_regindent--;
4002*0Sstevel@tonic-gate #endif
4003*0Sstevel@tonic-gate return 0;
4004*0Sstevel@tonic-gate }
4005*0Sstevel@tonic-gate
4006*0Sstevel@tonic-gate /*
4007*0Sstevel@tonic-gate - regrepeat - repeatedly match something simple, report how many
4008*0Sstevel@tonic-gate */
4009*0Sstevel@tonic-gate /*
4010*0Sstevel@tonic-gate * [This routine now assumes that it will only match on things of length 1.
4011*0Sstevel@tonic-gate * That was true before, but now we assume scan - reginput is the count,
4012*0Sstevel@tonic-gate * rather than incrementing count on every character. [Er, except utf8.]]
4013*0Sstevel@tonic-gate */
4014*0Sstevel@tonic-gate STATIC I32
S_regrepeat(pTHX_ regnode * p,I32 max)4015*0Sstevel@tonic-gate S_regrepeat(pTHX_ regnode *p, I32 max)
4016*0Sstevel@tonic-gate {
4017*0Sstevel@tonic-gate register char *scan;
4018*0Sstevel@tonic-gate register I32 c;
4019*0Sstevel@tonic-gate register char *loceol = PL_regeol;
4020*0Sstevel@tonic-gate register I32 hardcount = 0;
4021*0Sstevel@tonic-gate register bool do_utf8 = PL_reg_match_utf8;
4022*0Sstevel@tonic-gate
4023*0Sstevel@tonic-gate scan = PL_reginput;
4024*0Sstevel@tonic-gate if (max == REG_INFTY)
4025*0Sstevel@tonic-gate max = I32_MAX;
4026*0Sstevel@tonic-gate else if (max < loceol - scan)
4027*0Sstevel@tonic-gate loceol = scan + max;
4028*0Sstevel@tonic-gate switch (OP(p)) {
4029*0Sstevel@tonic-gate case REG_ANY:
4030*0Sstevel@tonic-gate if (do_utf8) {
4031*0Sstevel@tonic-gate loceol = PL_regeol;
4032*0Sstevel@tonic-gate while (scan < loceol && hardcount < max && *scan != '\n') {
4033*0Sstevel@tonic-gate scan += UTF8SKIP(scan);
4034*0Sstevel@tonic-gate hardcount++;
4035*0Sstevel@tonic-gate }
4036*0Sstevel@tonic-gate } else {
4037*0Sstevel@tonic-gate while (scan < loceol && *scan != '\n')
4038*0Sstevel@tonic-gate scan++;
4039*0Sstevel@tonic-gate }
4040*0Sstevel@tonic-gate break;
4041*0Sstevel@tonic-gate case SANY:
4042*0Sstevel@tonic-gate if (do_utf8) {
4043*0Sstevel@tonic-gate loceol = PL_regeol;
4044*0Sstevel@tonic-gate while (scan < loceol && hardcount < max) {
4045*0Sstevel@tonic-gate scan += UTF8SKIP(scan);
4046*0Sstevel@tonic-gate hardcount++;
4047*0Sstevel@tonic-gate }
4048*0Sstevel@tonic-gate }
4049*0Sstevel@tonic-gate else
4050*0Sstevel@tonic-gate scan = loceol;
4051*0Sstevel@tonic-gate break;
4052*0Sstevel@tonic-gate case CANY:
4053*0Sstevel@tonic-gate scan = loceol;
4054*0Sstevel@tonic-gate break;
4055*0Sstevel@tonic-gate case EXACT: /* length of string is 1 */
4056*0Sstevel@tonic-gate c = (U8)*STRING(p);
4057*0Sstevel@tonic-gate while (scan < loceol && UCHARAT(scan) == c)
4058*0Sstevel@tonic-gate scan++;
4059*0Sstevel@tonic-gate break;
4060*0Sstevel@tonic-gate case EXACTF: /* length of string is 1 */
4061*0Sstevel@tonic-gate c = (U8)*STRING(p);
4062*0Sstevel@tonic-gate while (scan < loceol &&
4063*0Sstevel@tonic-gate (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4064*0Sstevel@tonic-gate scan++;
4065*0Sstevel@tonic-gate break;
4066*0Sstevel@tonic-gate case EXACTFL: /* length of string is 1 */
4067*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
4068*0Sstevel@tonic-gate c = (U8)*STRING(p);
4069*0Sstevel@tonic-gate while (scan < loceol &&
4070*0Sstevel@tonic-gate (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4071*0Sstevel@tonic-gate scan++;
4072*0Sstevel@tonic-gate break;
4073*0Sstevel@tonic-gate case ANYOF:
4074*0Sstevel@tonic-gate if (do_utf8) {
4075*0Sstevel@tonic-gate loceol = PL_regeol;
4076*0Sstevel@tonic-gate while (hardcount < max && scan < loceol &&
4077*0Sstevel@tonic-gate reginclass(p, (U8*)scan, 0, do_utf8)) {
4078*0Sstevel@tonic-gate scan += UTF8SKIP(scan);
4079*0Sstevel@tonic-gate hardcount++;
4080*0Sstevel@tonic-gate }
4081*0Sstevel@tonic-gate } else {
4082*0Sstevel@tonic-gate while (scan < loceol && REGINCLASS(p, (U8*)scan))
4083*0Sstevel@tonic-gate scan++;
4084*0Sstevel@tonic-gate }
4085*0Sstevel@tonic-gate break;
4086*0Sstevel@tonic-gate case ALNUM:
4087*0Sstevel@tonic-gate if (do_utf8) {
4088*0Sstevel@tonic-gate loceol = PL_regeol;
4089*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a");
4090*0Sstevel@tonic-gate while (hardcount < max && scan < loceol &&
4091*0Sstevel@tonic-gate swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4092*0Sstevel@tonic-gate scan += UTF8SKIP(scan);
4093*0Sstevel@tonic-gate hardcount++;
4094*0Sstevel@tonic-gate }
4095*0Sstevel@tonic-gate } else {
4096*0Sstevel@tonic-gate while (scan < loceol && isALNUM(*scan))
4097*0Sstevel@tonic-gate scan++;
4098*0Sstevel@tonic-gate }
4099*0Sstevel@tonic-gate break;
4100*0Sstevel@tonic-gate case ALNUML:
4101*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
4102*0Sstevel@tonic-gate if (do_utf8) {
4103*0Sstevel@tonic-gate loceol = PL_regeol;
4104*0Sstevel@tonic-gate while (hardcount < max && scan < loceol &&
4105*0Sstevel@tonic-gate isALNUM_LC_utf8((U8*)scan)) {
4106*0Sstevel@tonic-gate scan += UTF8SKIP(scan);
4107*0Sstevel@tonic-gate hardcount++;
4108*0Sstevel@tonic-gate }
4109*0Sstevel@tonic-gate } else {
4110*0Sstevel@tonic-gate while (scan < loceol && isALNUM_LC(*scan))
4111*0Sstevel@tonic-gate scan++;
4112*0Sstevel@tonic-gate }
4113*0Sstevel@tonic-gate break;
4114*0Sstevel@tonic-gate case NALNUM:
4115*0Sstevel@tonic-gate if (do_utf8) {
4116*0Sstevel@tonic-gate loceol = PL_regeol;
4117*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a");
4118*0Sstevel@tonic-gate while (hardcount < max && scan < loceol &&
4119*0Sstevel@tonic-gate !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4120*0Sstevel@tonic-gate scan += UTF8SKIP(scan);
4121*0Sstevel@tonic-gate hardcount++;
4122*0Sstevel@tonic-gate }
4123*0Sstevel@tonic-gate } else {
4124*0Sstevel@tonic-gate while (scan < loceol && !isALNUM(*scan))
4125*0Sstevel@tonic-gate scan++;
4126*0Sstevel@tonic-gate }
4127*0Sstevel@tonic-gate break;
4128*0Sstevel@tonic-gate case NALNUML:
4129*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
4130*0Sstevel@tonic-gate if (do_utf8) {
4131*0Sstevel@tonic-gate loceol = PL_regeol;
4132*0Sstevel@tonic-gate while (hardcount < max && scan < loceol &&
4133*0Sstevel@tonic-gate !isALNUM_LC_utf8((U8*)scan)) {
4134*0Sstevel@tonic-gate scan += UTF8SKIP(scan);
4135*0Sstevel@tonic-gate hardcount++;
4136*0Sstevel@tonic-gate }
4137*0Sstevel@tonic-gate } else {
4138*0Sstevel@tonic-gate while (scan < loceol && !isALNUM_LC(*scan))
4139*0Sstevel@tonic-gate scan++;
4140*0Sstevel@tonic-gate }
4141*0Sstevel@tonic-gate break;
4142*0Sstevel@tonic-gate case SPACE:
4143*0Sstevel@tonic-gate if (do_utf8) {
4144*0Sstevel@tonic-gate loceol = PL_regeol;
4145*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(space," ");
4146*0Sstevel@tonic-gate while (hardcount < max && scan < loceol &&
4147*0Sstevel@tonic-gate (*scan == ' ' ||
4148*0Sstevel@tonic-gate swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4149*0Sstevel@tonic-gate scan += UTF8SKIP(scan);
4150*0Sstevel@tonic-gate hardcount++;
4151*0Sstevel@tonic-gate }
4152*0Sstevel@tonic-gate } else {
4153*0Sstevel@tonic-gate while (scan < loceol && isSPACE(*scan))
4154*0Sstevel@tonic-gate scan++;
4155*0Sstevel@tonic-gate }
4156*0Sstevel@tonic-gate break;
4157*0Sstevel@tonic-gate case SPACEL:
4158*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
4159*0Sstevel@tonic-gate if (do_utf8) {
4160*0Sstevel@tonic-gate loceol = PL_regeol;
4161*0Sstevel@tonic-gate while (hardcount < max && scan < loceol &&
4162*0Sstevel@tonic-gate (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4163*0Sstevel@tonic-gate scan += UTF8SKIP(scan);
4164*0Sstevel@tonic-gate hardcount++;
4165*0Sstevel@tonic-gate }
4166*0Sstevel@tonic-gate } else {
4167*0Sstevel@tonic-gate while (scan < loceol && isSPACE_LC(*scan))
4168*0Sstevel@tonic-gate scan++;
4169*0Sstevel@tonic-gate }
4170*0Sstevel@tonic-gate break;
4171*0Sstevel@tonic-gate case NSPACE:
4172*0Sstevel@tonic-gate if (do_utf8) {
4173*0Sstevel@tonic-gate loceol = PL_regeol;
4174*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(space," ");
4175*0Sstevel@tonic-gate while (hardcount < max && scan < loceol &&
4176*0Sstevel@tonic-gate !(*scan == ' ' ||
4177*0Sstevel@tonic-gate swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4178*0Sstevel@tonic-gate scan += UTF8SKIP(scan);
4179*0Sstevel@tonic-gate hardcount++;
4180*0Sstevel@tonic-gate }
4181*0Sstevel@tonic-gate } else {
4182*0Sstevel@tonic-gate while (scan < loceol && !isSPACE(*scan))
4183*0Sstevel@tonic-gate scan++;
4184*0Sstevel@tonic-gate break;
4185*0Sstevel@tonic-gate }
4186*0Sstevel@tonic-gate case NSPACEL:
4187*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
4188*0Sstevel@tonic-gate if (do_utf8) {
4189*0Sstevel@tonic-gate loceol = PL_regeol;
4190*0Sstevel@tonic-gate while (hardcount < max && scan < loceol &&
4191*0Sstevel@tonic-gate !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4192*0Sstevel@tonic-gate scan += UTF8SKIP(scan);
4193*0Sstevel@tonic-gate hardcount++;
4194*0Sstevel@tonic-gate }
4195*0Sstevel@tonic-gate } else {
4196*0Sstevel@tonic-gate while (scan < loceol && !isSPACE_LC(*scan))
4197*0Sstevel@tonic-gate scan++;
4198*0Sstevel@tonic-gate }
4199*0Sstevel@tonic-gate break;
4200*0Sstevel@tonic-gate case DIGIT:
4201*0Sstevel@tonic-gate if (do_utf8) {
4202*0Sstevel@tonic-gate loceol = PL_regeol;
4203*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(digit,"0");
4204*0Sstevel@tonic-gate while (hardcount < max && scan < loceol &&
4205*0Sstevel@tonic-gate swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4206*0Sstevel@tonic-gate scan += UTF8SKIP(scan);
4207*0Sstevel@tonic-gate hardcount++;
4208*0Sstevel@tonic-gate }
4209*0Sstevel@tonic-gate } else {
4210*0Sstevel@tonic-gate while (scan < loceol && isDIGIT(*scan))
4211*0Sstevel@tonic-gate scan++;
4212*0Sstevel@tonic-gate }
4213*0Sstevel@tonic-gate break;
4214*0Sstevel@tonic-gate case NDIGIT:
4215*0Sstevel@tonic-gate if (do_utf8) {
4216*0Sstevel@tonic-gate loceol = PL_regeol;
4217*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(digit,"0");
4218*0Sstevel@tonic-gate while (hardcount < max && scan < loceol &&
4219*0Sstevel@tonic-gate !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4220*0Sstevel@tonic-gate scan += UTF8SKIP(scan);
4221*0Sstevel@tonic-gate hardcount++;
4222*0Sstevel@tonic-gate }
4223*0Sstevel@tonic-gate } else {
4224*0Sstevel@tonic-gate while (scan < loceol && !isDIGIT(*scan))
4225*0Sstevel@tonic-gate scan++;
4226*0Sstevel@tonic-gate }
4227*0Sstevel@tonic-gate break;
4228*0Sstevel@tonic-gate default: /* Called on something of 0 width. */
4229*0Sstevel@tonic-gate break; /* So match right here or not at all. */
4230*0Sstevel@tonic-gate }
4231*0Sstevel@tonic-gate
4232*0Sstevel@tonic-gate if (hardcount)
4233*0Sstevel@tonic-gate c = hardcount;
4234*0Sstevel@tonic-gate else
4235*0Sstevel@tonic-gate c = scan - PL_reginput;
4236*0Sstevel@tonic-gate PL_reginput = scan;
4237*0Sstevel@tonic-gate
4238*0Sstevel@tonic-gate DEBUG_r(
4239*0Sstevel@tonic-gate {
4240*0Sstevel@tonic-gate SV *prop = sv_newmortal();
4241*0Sstevel@tonic-gate
4242*0Sstevel@tonic-gate regprop(prop, p);
4243*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log,
4244*0Sstevel@tonic-gate "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
4245*0Sstevel@tonic-gate REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
4246*0Sstevel@tonic-gate });
4247*0Sstevel@tonic-gate
4248*0Sstevel@tonic-gate return(c);
4249*0Sstevel@tonic-gate }
4250*0Sstevel@tonic-gate
4251*0Sstevel@tonic-gate /*
4252*0Sstevel@tonic-gate - regrepeat_hard - repeatedly match something, report total lenth and length
4253*0Sstevel@tonic-gate *
4254*0Sstevel@tonic-gate * The repeater is supposed to have constant length.
4255*0Sstevel@tonic-gate */
4256*0Sstevel@tonic-gate
4257*0Sstevel@tonic-gate STATIC I32
S_regrepeat_hard(pTHX_ regnode * p,I32 max,I32 * lp)4258*0Sstevel@tonic-gate S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
4259*0Sstevel@tonic-gate {
4260*0Sstevel@tonic-gate register char *scan = Nullch;
4261*0Sstevel@tonic-gate register char *start;
4262*0Sstevel@tonic-gate register char *loceol = PL_regeol;
4263*0Sstevel@tonic-gate I32 l = 0;
4264*0Sstevel@tonic-gate I32 count = 0, res = 1;
4265*0Sstevel@tonic-gate
4266*0Sstevel@tonic-gate if (!max)
4267*0Sstevel@tonic-gate return 0;
4268*0Sstevel@tonic-gate
4269*0Sstevel@tonic-gate start = PL_reginput;
4270*0Sstevel@tonic-gate if (PL_reg_match_utf8) {
4271*0Sstevel@tonic-gate while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4272*0Sstevel@tonic-gate if (!count++) {
4273*0Sstevel@tonic-gate l = 0;
4274*0Sstevel@tonic-gate while (start < PL_reginput) {
4275*0Sstevel@tonic-gate l++;
4276*0Sstevel@tonic-gate start += UTF8SKIP(start);
4277*0Sstevel@tonic-gate }
4278*0Sstevel@tonic-gate *lp = l;
4279*0Sstevel@tonic-gate if (l == 0)
4280*0Sstevel@tonic-gate return max;
4281*0Sstevel@tonic-gate }
4282*0Sstevel@tonic-gate if (count == max)
4283*0Sstevel@tonic-gate return count;
4284*0Sstevel@tonic-gate }
4285*0Sstevel@tonic-gate }
4286*0Sstevel@tonic-gate else {
4287*0Sstevel@tonic-gate while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4288*0Sstevel@tonic-gate if (!count++) {
4289*0Sstevel@tonic-gate *lp = l = PL_reginput - start;
4290*0Sstevel@tonic-gate if (max != REG_INFTY && l*max < loceol - scan)
4291*0Sstevel@tonic-gate loceol = scan + l*max;
4292*0Sstevel@tonic-gate if (l == 0)
4293*0Sstevel@tonic-gate return max;
4294*0Sstevel@tonic-gate }
4295*0Sstevel@tonic-gate }
4296*0Sstevel@tonic-gate }
4297*0Sstevel@tonic-gate if (!res)
4298*0Sstevel@tonic-gate PL_reginput = scan;
4299*0Sstevel@tonic-gate
4300*0Sstevel@tonic-gate return count;
4301*0Sstevel@tonic-gate }
4302*0Sstevel@tonic-gate
4303*0Sstevel@tonic-gate /*
4304*0Sstevel@tonic-gate - regclass_swash - prepare the utf8 swash
4305*0Sstevel@tonic-gate */
4306*0Sstevel@tonic-gate
4307*0Sstevel@tonic-gate SV *
Perl_regclass_swash(pTHX_ register regnode * node,bool doinit,SV ** listsvp,SV ** altsvp)4308*0Sstevel@tonic-gate Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
4309*0Sstevel@tonic-gate {
4310*0Sstevel@tonic-gate SV *sw = NULL;
4311*0Sstevel@tonic-gate SV *si = NULL;
4312*0Sstevel@tonic-gate SV *alt = NULL;
4313*0Sstevel@tonic-gate
4314*0Sstevel@tonic-gate if (PL_regdata && PL_regdata->count) {
4315*0Sstevel@tonic-gate U32 n = ARG(node);
4316*0Sstevel@tonic-gate
4317*0Sstevel@tonic-gate if (PL_regdata->what[n] == 's') {
4318*0Sstevel@tonic-gate SV *rv = (SV*)PL_regdata->data[n];
4319*0Sstevel@tonic-gate AV *av = (AV*)SvRV((SV*)rv);
4320*0Sstevel@tonic-gate SV **ary = AvARRAY(av);
4321*0Sstevel@tonic-gate SV **a, **b;
4322*0Sstevel@tonic-gate
4323*0Sstevel@tonic-gate /* See the end of regcomp.c:S_reglass() for
4324*0Sstevel@tonic-gate * documentation of these array elements. */
4325*0Sstevel@tonic-gate
4326*0Sstevel@tonic-gate si = *ary;
4327*0Sstevel@tonic-gate a = SvTYPE(ary[1]) == SVt_RV ? &ary[1] : 0;
4328*0Sstevel@tonic-gate b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4329*0Sstevel@tonic-gate
4330*0Sstevel@tonic-gate if (a)
4331*0Sstevel@tonic-gate sw = *a;
4332*0Sstevel@tonic-gate else if (si && doinit) {
4333*0Sstevel@tonic-gate sw = swash_init("utf8", "", si, 1, 0);
4334*0Sstevel@tonic-gate (void)av_store(av, 1, sw);
4335*0Sstevel@tonic-gate }
4336*0Sstevel@tonic-gate if (b)
4337*0Sstevel@tonic-gate alt = *b;
4338*0Sstevel@tonic-gate }
4339*0Sstevel@tonic-gate }
4340*0Sstevel@tonic-gate
4341*0Sstevel@tonic-gate if (listsvp)
4342*0Sstevel@tonic-gate *listsvp = si;
4343*0Sstevel@tonic-gate if (altsvp)
4344*0Sstevel@tonic-gate *altsvp = alt;
4345*0Sstevel@tonic-gate
4346*0Sstevel@tonic-gate return sw;
4347*0Sstevel@tonic-gate }
4348*0Sstevel@tonic-gate
4349*0Sstevel@tonic-gate /*
4350*0Sstevel@tonic-gate - reginclass - determine if a character falls into a character class
4351*0Sstevel@tonic-gate
4352*0Sstevel@tonic-gate The n is the ANYOF regnode, the p is the target string, lenp
4353*0Sstevel@tonic-gate is pointer to the maximum length of how far to go in the p
4354*0Sstevel@tonic-gate (if the lenp is zero, UTF8SKIP(p) is used),
4355*0Sstevel@tonic-gate do_utf8 tells whether the target string is in UTF-8.
4356*0Sstevel@tonic-gate
4357*0Sstevel@tonic-gate */
4358*0Sstevel@tonic-gate
4359*0Sstevel@tonic-gate STATIC bool
S_reginclass(pTHX_ register regnode * n,register U8 * p,STRLEN * lenp,register bool do_utf8)4360*0Sstevel@tonic-gate S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
4361*0Sstevel@tonic-gate {
4362*0Sstevel@tonic-gate char flags = ANYOF_FLAGS(n);
4363*0Sstevel@tonic-gate bool match = FALSE;
4364*0Sstevel@tonic-gate UV c = *p;
4365*0Sstevel@tonic-gate STRLEN len = 0;
4366*0Sstevel@tonic-gate STRLEN plen;
4367*0Sstevel@tonic-gate
4368*0Sstevel@tonic-gate if (do_utf8 && !UTF8_IS_INVARIANT(c))
4369*0Sstevel@tonic-gate c = utf8n_to_uvchr(p, UTF8_MAXLEN, &len,
4370*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
4371*0Sstevel@tonic-gate
4372*0Sstevel@tonic-gate plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
4373*0Sstevel@tonic-gate if (do_utf8 || (flags & ANYOF_UNICODE)) {
4374*0Sstevel@tonic-gate if (lenp)
4375*0Sstevel@tonic-gate *lenp = 0;
4376*0Sstevel@tonic-gate if (do_utf8 && !ANYOF_RUNTIME(n)) {
4377*0Sstevel@tonic-gate if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4378*0Sstevel@tonic-gate match = TRUE;
4379*0Sstevel@tonic-gate }
4380*0Sstevel@tonic-gate if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
4381*0Sstevel@tonic-gate match = TRUE;
4382*0Sstevel@tonic-gate if (!match) {
4383*0Sstevel@tonic-gate AV *av;
4384*0Sstevel@tonic-gate SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
4385*0Sstevel@tonic-gate
4386*0Sstevel@tonic-gate if (sw) {
4387*0Sstevel@tonic-gate if (swash_fetch(sw, p, do_utf8))
4388*0Sstevel@tonic-gate match = TRUE;
4389*0Sstevel@tonic-gate else if (flags & ANYOF_FOLD) {
4390*0Sstevel@tonic-gate if (!match && lenp && av) {
4391*0Sstevel@tonic-gate I32 i;
4392*0Sstevel@tonic-gate
4393*0Sstevel@tonic-gate for (i = 0; i <= av_len(av); i++) {
4394*0Sstevel@tonic-gate SV* sv = *av_fetch(av, i, FALSE);
4395*0Sstevel@tonic-gate STRLEN len;
4396*0Sstevel@tonic-gate char *s = SvPV(sv, len);
4397*0Sstevel@tonic-gate
4398*0Sstevel@tonic-gate if (len <= plen && memEQ(s, (char*)p, len)) {
4399*0Sstevel@tonic-gate *lenp = len;
4400*0Sstevel@tonic-gate match = TRUE;
4401*0Sstevel@tonic-gate break;
4402*0Sstevel@tonic-gate }
4403*0Sstevel@tonic-gate }
4404*0Sstevel@tonic-gate }
4405*0Sstevel@tonic-gate if (!match) {
4406*0Sstevel@tonic-gate U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
4407*0Sstevel@tonic-gate STRLEN tmplen;
4408*0Sstevel@tonic-gate
4409*0Sstevel@tonic-gate to_utf8_fold(p, tmpbuf, &tmplen);
4410*0Sstevel@tonic-gate if (swash_fetch(sw, tmpbuf, do_utf8))
4411*0Sstevel@tonic-gate match = TRUE;
4412*0Sstevel@tonic-gate }
4413*0Sstevel@tonic-gate }
4414*0Sstevel@tonic-gate }
4415*0Sstevel@tonic-gate }
4416*0Sstevel@tonic-gate if (match && lenp && *lenp == 0)
4417*0Sstevel@tonic-gate *lenp = UNISKIP(NATIVE_TO_UNI(c));
4418*0Sstevel@tonic-gate }
4419*0Sstevel@tonic-gate if (!match && c < 256) {
4420*0Sstevel@tonic-gate if (ANYOF_BITMAP_TEST(n, c))
4421*0Sstevel@tonic-gate match = TRUE;
4422*0Sstevel@tonic-gate else if (flags & ANYOF_FOLD) {
4423*0Sstevel@tonic-gate U8 f;
4424*0Sstevel@tonic-gate
4425*0Sstevel@tonic-gate if (flags & ANYOF_LOCALE) {
4426*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
4427*0Sstevel@tonic-gate f = PL_fold_locale[c];
4428*0Sstevel@tonic-gate }
4429*0Sstevel@tonic-gate else
4430*0Sstevel@tonic-gate f = PL_fold[c];
4431*0Sstevel@tonic-gate if (f != c && ANYOF_BITMAP_TEST(n, f))
4432*0Sstevel@tonic-gate match = TRUE;
4433*0Sstevel@tonic-gate }
4434*0Sstevel@tonic-gate
4435*0Sstevel@tonic-gate if (!match && (flags & ANYOF_CLASS)) {
4436*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted;
4437*0Sstevel@tonic-gate if (
4438*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4439*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4440*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4441*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4442*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4443*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4444*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4445*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4446*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4447*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4448*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4449*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4450*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4451*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4452*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4453*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4454*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4455*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4456*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4457*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4458*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4459*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4460*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4461*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4462*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4463*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4464*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4465*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4466*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4467*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4468*0Sstevel@tonic-gate ) /* How's that for a conditional? */
4469*0Sstevel@tonic-gate {
4470*0Sstevel@tonic-gate match = TRUE;
4471*0Sstevel@tonic-gate }
4472*0Sstevel@tonic-gate }
4473*0Sstevel@tonic-gate }
4474*0Sstevel@tonic-gate
4475*0Sstevel@tonic-gate return (flags & ANYOF_INVERT) ? !match : match;
4476*0Sstevel@tonic-gate }
4477*0Sstevel@tonic-gate
4478*0Sstevel@tonic-gate STATIC U8 *
S_reghop(pTHX_ U8 * s,I32 off)4479*0Sstevel@tonic-gate S_reghop(pTHX_ U8 *s, I32 off)
4480*0Sstevel@tonic-gate {
4481*0Sstevel@tonic-gate return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4482*0Sstevel@tonic-gate }
4483*0Sstevel@tonic-gate
4484*0Sstevel@tonic-gate STATIC U8 *
S_reghop3(pTHX_ U8 * s,I32 off,U8 * lim)4485*0Sstevel@tonic-gate S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
4486*0Sstevel@tonic-gate {
4487*0Sstevel@tonic-gate if (off >= 0) {
4488*0Sstevel@tonic-gate while (off-- && s < lim) {
4489*0Sstevel@tonic-gate /* XXX could check well-formedness here */
4490*0Sstevel@tonic-gate s += UTF8SKIP(s);
4491*0Sstevel@tonic-gate }
4492*0Sstevel@tonic-gate }
4493*0Sstevel@tonic-gate else {
4494*0Sstevel@tonic-gate while (off++) {
4495*0Sstevel@tonic-gate if (s > lim) {
4496*0Sstevel@tonic-gate s--;
4497*0Sstevel@tonic-gate if (UTF8_IS_CONTINUED(*s)) {
4498*0Sstevel@tonic-gate while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4499*0Sstevel@tonic-gate s--;
4500*0Sstevel@tonic-gate }
4501*0Sstevel@tonic-gate /* XXX could check well-formedness here */
4502*0Sstevel@tonic-gate }
4503*0Sstevel@tonic-gate }
4504*0Sstevel@tonic-gate }
4505*0Sstevel@tonic-gate return s;
4506*0Sstevel@tonic-gate }
4507*0Sstevel@tonic-gate
4508*0Sstevel@tonic-gate STATIC U8 *
S_reghopmaybe(pTHX_ U8 * s,I32 off)4509*0Sstevel@tonic-gate S_reghopmaybe(pTHX_ U8 *s, I32 off)
4510*0Sstevel@tonic-gate {
4511*0Sstevel@tonic-gate return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4512*0Sstevel@tonic-gate }
4513*0Sstevel@tonic-gate
4514*0Sstevel@tonic-gate STATIC U8 *
S_reghopmaybe3(pTHX_ U8 * s,I32 off,U8 * lim)4515*0Sstevel@tonic-gate S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
4516*0Sstevel@tonic-gate {
4517*0Sstevel@tonic-gate if (off >= 0) {
4518*0Sstevel@tonic-gate while (off-- && s < lim) {
4519*0Sstevel@tonic-gate /* XXX could check well-formedness here */
4520*0Sstevel@tonic-gate s += UTF8SKIP(s);
4521*0Sstevel@tonic-gate }
4522*0Sstevel@tonic-gate if (off >= 0)
4523*0Sstevel@tonic-gate return 0;
4524*0Sstevel@tonic-gate }
4525*0Sstevel@tonic-gate else {
4526*0Sstevel@tonic-gate while (off++) {
4527*0Sstevel@tonic-gate if (s > lim) {
4528*0Sstevel@tonic-gate s--;
4529*0Sstevel@tonic-gate if (UTF8_IS_CONTINUED(*s)) {
4530*0Sstevel@tonic-gate while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4531*0Sstevel@tonic-gate s--;
4532*0Sstevel@tonic-gate }
4533*0Sstevel@tonic-gate /* XXX could check well-formedness here */
4534*0Sstevel@tonic-gate }
4535*0Sstevel@tonic-gate else
4536*0Sstevel@tonic-gate break;
4537*0Sstevel@tonic-gate }
4538*0Sstevel@tonic-gate if (off <= 0)
4539*0Sstevel@tonic-gate return 0;
4540*0Sstevel@tonic-gate }
4541*0Sstevel@tonic-gate return s;
4542*0Sstevel@tonic-gate }
4543*0Sstevel@tonic-gate
4544*0Sstevel@tonic-gate static void
restore_pos(pTHX_ void * arg)4545*0Sstevel@tonic-gate restore_pos(pTHX_ void *arg)
4546*0Sstevel@tonic-gate {
4547*0Sstevel@tonic-gate if (PL_reg_eval_set) {
4548*0Sstevel@tonic-gate if (PL_reg_oldsaved) {
4549*0Sstevel@tonic-gate PL_reg_re->subbeg = PL_reg_oldsaved;
4550*0Sstevel@tonic-gate PL_reg_re->sublen = PL_reg_oldsavedlen;
4551*0Sstevel@tonic-gate RX_MATCH_COPIED_on(PL_reg_re);
4552*0Sstevel@tonic-gate }
4553*0Sstevel@tonic-gate PL_reg_magic->mg_len = PL_reg_oldpos;
4554*0Sstevel@tonic-gate PL_reg_eval_set = 0;
4555*0Sstevel@tonic-gate PL_curpm = PL_reg_oldcurpm;
4556*0Sstevel@tonic-gate }
4557*0Sstevel@tonic-gate }
4558*0Sstevel@tonic-gate
4559*0Sstevel@tonic-gate STATIC void
S_to_utf8_substr(pTHX_ register regexp * prog)4560*0Sstevel@tonic-gate S_to_utf8_substr(pTHX_ register regexp *prog)
4561*0Sstevel@tonic-gate {
4562*0Sstevel@tonic-gate SV* sv;
4563*0Sstevel@tonic-gate if (prog->float_substr && !prog->float_utf8) {
4564*0Sstevel@tonic-gate prog->float_utf8 = sv = NEWSV(117, 0);
4565*0Sstevel@tonic-gate SvSetSV(sv, prog->float_substr);
4566*0Sstevel@tonic-gate sv_utf8_upgrade(sv);
4567*0Sstevel@tonic-gate if (SvTAIL(prog->float_substr))
4568*0Sstevel@tonic-gate SvTAIL_on(sv);
4569*0Sstevel@tonic-gate if (prog->float_substr == prog->check_substr)
4570*0Sstevel@tonic-gate prog->check_utf8 = sv;
4571*0Sstevel@tonic-gate }
4572*0Sstevel@tonic-gate if (prog->anchored_substr && !prog->anchored_utf8) {
4573*0Sstevel@tonic-gate prog->anchored_utf8 = sv = NEWSV(118, 0);
4574*0Sstevel@tonic-gate SvSetSV(sv, prog->anchored_substr);
4575*0Sstevel@tonic-gate sv_utf8_upgrade(sv);
4576*0Sstevel@tonic-gate if (SvTAIL(prog->anchored_substr))
4577*0Sstevel@tonic-gate SvTAIL_on(sv);
4578*0Sstevel@tonic-gate if (prog->anchored_substr == prog->check_substr)
4579*0Sstevel@tonic-gate prog->check_utf8 = sv;
4580*0Sstevel@tonic-gate }
4581*0Sstevel@tonic-gate }
4582*0Sstevel@tonic-gate
4583*0Sstevel@tonic-gate STATIC void
S_to_byte_substr(pTHX_ register regexp * prog)4584*0Sstevel@tonic-gate S_to_byte_substr(pTHX_ register regexp *prog)
4585*0Sstevel@tonic-gate {
4586*0Sstevel@tonic-gate SV* sv;
4587*0Sstevel@tonic-gate if (prog->float_utf8 && !prog->float_substr) {
4588*0Sstevel@tonic-gate prog->float_substr = sv = NEWSV(117, 0);
4589*0Sstevel@tonic-gate SvSetSV(sv, prog->float_utf8);
4590*0Sstevel@tonic-gate if (sv_utf8_downgrade(sv, TRUE)) {
4591*0Sstevel@tonic-gate if (SvTAIL(prog->float_utf8))
4592*0Sstevel@tonic-gate SvTAIL_on(sv);
4593*0Sstevel@tonic-gate } else {
4594*0Sstevel@tonic-gate SvREFCNT_dec(sv);
4595*0Sstevel@tonic-gate prog->float_substr = sv = &PL_sv_undef;
4596*0Sstevel@tonic-gate }
4597*0Sstevel@tonic-gate if (prog->float_utf8 == prog->check_utf8)
4598*0Sstevel@tonic-gate prog->check_substr = sv;
4599*0Sstevel@tonic-gate }
4600*0Sstevel@tonic-gate if (prog->anchored_utf8 && !prog->anchored_substr) {
4601*0Sstevel@tonic-gate prog->anchored_substr = sv = NEWSV(118, 0);
4602*0Sstevel@tonic-gate SvSetSV(sv, prog->anchored_utf8);
4603*0Sstevel@tonic-gate if (sv_utf8_downgrade(sv, TRUE)) {
4604*0Sstevel@tonic-gate if (SvTAIL(prog->anchored_utf8))
4605*0Sstevel@tonic-gate SvTAIL_on(sv);
4606*0Sstevel@tonic-gate } else {
4607*0Sstevel@tonic-gate SvREFCNT_dec(sv);
4608*0Sstevel@tonic-gate prog->anchored_substr = sv = &PL_sv_undef;
4609*0Sstevel@tonic-gate }
4610*0Sstevel@tonic-gate if (prog->anchored_utf8 == prog->check_utf8)
4611*0Sstevel@tonic-gate prog->check_substr = sv;
4612*0Sstevel@tonic-gate }
4613*0Sstevel@tonic-gate }
4614