1*0Sstevel@tonic-gate /* regexec.c 2*0Sstevel@tonic-gate */ 3*0Sstevel@tonic-gate 4*0Sstevel@tonic-gate /* 5*0Sstevel@tonic-gate * "One Ring to rule them all, One Ring to find them..." 6*0Sstevel@tonic-gate */ 7*0Sstevel@tonic-gate 8*0Sstevel@tonic-gate /* NOTE: this is derived from Henry Spencer's regexp code, and should not 9*0Sstevel@tonic-gate * confused with the original package (see point 3 below). Thanks, Henry! 10*0Sstevel@tonic-gate */ 11*0Sstevel@tonic-gate 12*0Sstevel@tonic-gate /* Additional note: this code is very heavily munged from Henry's version 13*0Sstevel@tonic-gate * in places. In some spots I've traded clarity for efficiency, so don't 14*0Sstevel@tonic-gate * blame Henry for some of the lack of readability. 15*0Sstevel@tonic-gate */ 16*0Sstevel@tonic-gate 17*0Sstevel@tonic-gate /* The names of the functions have been changed from regcomp and 18*0Sstevel@tonic-gate * regexec to pregcomp and pregexec in order to avoid conflicts 19*0Sstevel@tonic-gate * with the POSIX routines of the same names. 20*0Sstevel@tonic-gate */ 21*0Sstevel@tonic-gate 22*0Sstevel@tonic-gate #ifdef PERL_EXT_RE_BUILD 23*0Sstevel@tonic-gate /* need to replace pregcomp et al, so enable that */ 24*0Sstevel@tonic-gate # ifndef PERL_IN_XSUB_RE 25*0Sstevel@tonic-gate # define PERL_IN_XSUB_RE 26*0Sstevel@tonic-gate # endif 27*0Sstevel@tonic-gate /* need access to debugger hooks */ 28*0Sstevel@tonic-gate # if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING) 29*0Sstevel@tonic-gate # define DEBUGGING 30*0Sstevel@tonic-gate # endif 31*0Sstevel@tonic-gate #endif 32*0Sstevel@tonic-gate 33*0Sstevel@tonic-gate #ifdef PERL_IN_XSUB_RE 34*0Sstevel@tonic-gate /* We *really* need to overwrite these symbols: */ 35*0Sstevel@tonic-gate # define Perl_regexec_flags my_regexec 36*0Sstevel@tonic-gate # define Perl_regdump my_regdump 37*0Sstevel@tonic-gate # define Perl_regprop my_regprop 38*0Sstevel@tonic-gate # define Perl_re_intuit_start my_re_intuit_start 39*0Sstevel@tonic-gate /* *These* symbols are masked to allow static link. */ 40*0Sstevel@tonic-gate # define Perl_pregexec my_pregexec 41*0Sstevel@tonic-gate # define Perl_reginitcolors my_reginitcolors 42*0Sstevel@tonic-gate # define Perl_regclass_swash my_regclass_swash 43*0Sstevel@tonic-gate 44*0Sstevel@tonic-gate # define PERL_NO_GET_CONTEXT 45*0Sstevel@tonic-gate #endif 46*0Sstevel@tonic-gate 47*0Sstevel@tonic-gate /*SUPPRESS 112*/ 48*0Sstevel@tonic-gate /* 49*0Sstevel@tonic-gate * pregcomp and pregexec -- regsub and regerror are not used in perl 50*0Sstevel@tonic-gate * 51*0Sstevel@tonic-gate * Copyright (c) 1986 by University of Toronto. 52*0Sstevel@tonic-gate * Written by Henry Spencer. Not derived from licensed software. 53*0Sstevel@tonic-gate * 54*0Sstevel@tonic-gate * Permission is granted to anyone to use this software for any 55*0Sstevel@tonic-gate * purpose on any computer system, and to redistribute it freely, 56*0Sstevel@tonic-gate * subject to the following restrictions: 57*0Sstevel@tonic-gate * 58*0Sstevel@tonic-gate * 1. The author is not responsible for the consequences of use of 59*0Sstevel@tonic-gate * this software, no matter how awful, even if they arise 60*0Sstevel@tonic-gate * from defects in it. 61*0Sstevel@tonic-gate * 62*0Sstevel@tonic-gate * 2. The origin of this software must not be misrepresented, either 63*0Sstevel@tonic-gate * by explicit claim or by omission. 64*0Sstevel@tonic-gate * 65*0Sstevel@tonic-gate * 3. Altered versions must be plainly marked as such, and must not 66*0Sstevel@tonic-gate * be misrepresented as being the original software. 67*0Sstevel@tonic-gate * 68*0Sstevel@tonic-gate **** Alterations to Henry's code are... 69*0Sstevel@tonic-gate **** 70*0Sstevel@tonic-gate **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 71*0Sstevel@tonic-gate **** 2000, 2001, 2002, 2003, 2004, by Larry Wall and others 72*0Sstevel@tonic-gate **** 73*0Sstevel@tonic-gate **** You may distribute under the terms of either the GNU General Public 74*0Sstevel@tonic-gate **** License or the Artistic License, as specified in the README file. 75*0Sstevel@tonic-gate * 76*0Sstevel@tonic-gate * Beware that some of this code is subtly aware of the way operator 77*0Sstevel@tonic-gate * precedence is structured in regular expressions. Serious changes in 78*0Sstevel@tonic-gate * regular-expression syntax might require a total rethink. 79*0Sstevel@tonic-gate */ 80*0Sstevel@tonic-gate #include "EXTERN.h" 81*0Sstevel@tonic-gate #define PERL_IN_REGEXEC_C 82*0Sstevel@tonic-gate #include "perl.h" 83*0Sstevel@tonic-gate 84*0Sstevel@tonic-gate #include "regcomp.h" 85*0Sstevel@tonic-gate 86*0Sstevel@tonic-gate #define RF_tainted 1 /* tainted information used? */ 87*0Sstevel@tonic-gate #define RF_warned 2 /* warned about big count? */ 88*0Sstevel@tonic-gate #define RF_evaled 4 /* Did an EVAL with setting? */ 89*0Sstevel@tonic-gate #define RF_utf8 8 /* String contains multibyte chars? */ 90*0Sstevel@tonic-gate #define RF_false 16 /* odd number of nested negatives */ 91*0Sstevel@tonic-gate 92*0Sstevel@tonic-gate #define UTF ((PL_reg_flags & RF_utf8) != 0) 93*0Sstevel@tonic-gate 94*0Sstevel@tonic-gate #define RS_init 1 /* eval environment created */ 95*0Sstevel@tonic-gate #define RS_set 2 /* replsv value is set */ 96*0Sstevel@tonic-gate 97*0Sstevel@tonic-gate #ifndef STATIC 98*0Sstevel@tonic-gate #define STATIC static 99*0Sstevel@tonic-gate #endif 100*0Sstevel@tonic-gate 101*0Sstevel@tonic-gate #define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c))) 102*0Sstevel@tonic-gate 103*0Sstevel@tonic-gate /* 104*0Sstevel@tonic-gate * Forwards. 105*0Sstevel@tonic-gate */ 106*0Sstevel@tonic-gate 107*0Sstevel@tonic-gate #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv)) 108*0Sstevel@tonic-gate #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b) 109*0Sstevel@tonic-gate 110*0Sstevel@tonic-gate #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off)) 111*0Sstevel@tonic-gate #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off)) 112*0Sstevel@tonic-gate #define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off)) 113*0Sstevel@tonic-gate #define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off)) 114*0Sstevel@tonic-gate #define HOPc(pos,off) ((char*)HOP(pos,off)) 115*0Sstevel@tonic-gate #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off)) 116*0Sstevel@tonic-gate 117*0Sstevel@tonic-gate #define HOPBACK(pos, off) ( \ 118*0Sstevel@tonic-gate (PL_reg_match_utf8) \ 119*0Sstevel@tonic-gate ? reghopmaybe((U8*)pos, -off) \ 120*0Sstevel@tonic-gate : (pos - off >= PL_bostr) \ 121*0Sstevel@tonic-gate ? (U8*)(pos - off) \ 122*0Sstevel@tonic-gate : (U8*)NULL \ 123*0Sstevel@tonic-gate ) 124*0Sstevel@tonic-gate #define HOPBACKc(pos, off) (char*)HOPBACK(pos, off) 125*0Sstevel@tonic-gate 126*0Sstevel@tonic-gate #define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim)) 127*0Sstevel@tonic-gate #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim)) 128*0Sstevel@tonic-gate #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off)) 129*0Sstevel@tonic-gate #define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off)) 130*0Sstevel@tonic-gate #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim)) 131*0Sstevel@tonic-gate #define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim)) 132*0Sstevel@tonic-gate 133*0Sstevel@tonic-gate #define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) { ENTER; save_re_context(); (void)CAT2(is_utf8_, a)((U8*)b); LEAVE; } } STMT_END 134*0Sstevel@tonic-gate 135*0Sstevel@tonic-gate /* for use after a quantifier and before an EXACT-like node -- japhy */ 136*0Sstevel@tonic-gate #define JUMPABLE(rn) ( \ 137*0Sstevel@tonic-gate OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \ 138*0Sstevel@tonic-gate OP(rn) == SUSPEND || OP(rn) == IFMATCH || \ 139*0Sstevel@tonic-gate OP(rn) == PLUS || OP(rn) == MINMOD || \ 140*0Sstevel@tonic-gate (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \ 141*0Sstevel@tonic-gate ) 142*0Sstevel@tonic-gate 143*0Sstevel@tonic-gate #define HAS_TEXT(rn) ( \ 144*0Sstevel@tonic-gate PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \ 145*0Sstevel@tonic-gate ) 146*0Sstevel@tonic-gate 147*0Sstevel@tonic-gate /* 148*0Sstevel@tonic-gate Search for mandatory following text node; for lookahead, the text must 149*0Sstevel@tonic-gate follow but for lookbehind (rn->flags != 0) we skip to the next step. 150*0Sstevel@tonic-gate */ 151*0Sstevel@tonic-gate #define FIND_NEXT_IMPT(rn) STMT_START { \ 152*0Sstevel@tonic-gate while (JUMPABLE(rn)) \ 153*0Sstevel@tonic-gate if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \ 154*0Sstevel@tonic-gate rn = NEXTOPER(NEXTOPER(rn)); \ 155*0Sstevel@tonic-gate else if (OP(rn) == PLUS) \ 156*0Sstevel@tonic-gate rn = NEXTOPER(rn); \ 157*0Sstevel@tonic-gate else if (OP(rn) == IFMATCH) \ 158*0Sstevel@tonic-gate rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \ 159*0Sstevel@tonic-gate else rn += NEXT_OFF(rn); \ 160*0Sstevel@tonic-gate } STMT_END 161*0Sstevel@tonic-gate 162*0Sstevel@tonic-gate static void restore_pos(pTHX_ void *arg); 163*0Sstevel@tonic-gate 164*0Sstevel@tonic-gate STATIC CHECKPOINT 165*0Sstevel@tonic-gate S_regcppush(pTHX_ I32 parenfloor) 166*0Sstevel@tonic-gate { 167*0Sstevel@tonic-gate int retval = PL_savestack_ix; 168*0Sstevel@tonic-gate #define REGCP_PAREN_ELEMS 4 169*0Sstevel@tonic-gate int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS; 170*0Sstevel@tonic-gate int p; 171*0Sstevel@tonic-gate 172*0Sstevel@tonic-gate if (paren_elems_to_push < 0) 173*0Sstevel@tonic-gate Perl_croak(aTHX_ "panic: paren_elems_to_push < 0"); 174*0Sstevel@tonic-gate 175*0Sstevel@tonic-gate #define REGCP_OTHER_ELEMS 6 176*0Sstevel@tonic-gate SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS); 177*0Sstevel@tonic-gate for (p = PL_regsize; p > parenfloor; p--) { 178*0Sstevel@tonic-gate /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */ 179*0Sstevel@tonic-gate SSPUSHINT(PL_regendp[p]); 180*0Sstevel@tonic-gate SSPUSHINT(PL_regstartp[p]); 181*0Sstevel@tonic-gate SSPUSHPTR(PL_reg_start_tmp[p]); 182*0Sstevel@tonic-gate SSPUSHINT(p); 183*0Sstevel@tonic-gate } 184*0Sstevel@tonic-gate /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */ 185*0Sstevel@tonic-gate SSPUSHINT(PL_regsize); 186*0Sstevel@tonic-gate SSPUSHINT(*PL_reglastparen); 187*0Sstevel@tonic-gate SSPUSHINT(*PL_reglastcloseparen); 188*0Sstevel@tonic-gate SSPUSHPTR(PL_reginput); 189*0Sstevel@tonic-gate #define REGCP_FRAME_ELEMS 2 190*0Sstevel@tonic-gate /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and 191*0Sstevel@tonic-gate * are needed for the regexp context stack bookkeeping. */ 192*0Sstevel@tonic-gate SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS); 193*0Sstevel@tonic-gate SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */ 194*0Sstevel@tonic-gate 195*0Sstevel@tonic-gate return retval; 196*0Sstevel@tonic-gate } 197*0Sstevel@tonic-gate 198*0Sstevel@tonic-gate /* These are needed since we do not localize EVAL nodes: */ 199*0Sstevel@tonic-gate # define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \ 200*0Sstevel@tonic-gate " Setting an EVAL scope, savestack=%"IVdf"\n", \ 201*0Sstevel@tonic-gate (IV)PL_savestack_ix)); cp = PL_savestack_ix 202*0Sstevel@tonic-gate 203*0Sstevel@tonic-gate # define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \ 204*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, \ 205*0Sstevel@tonic-gate " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \ 206*0Sstevel@tonic-gate (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp) 207*0Sstevel@tonic-gate 208*0Sstevel@tonic-gate STATIC char * 209*0Sstevel@tonic-gate S_regcppop(pTHX) 210*0Sstevel@tonic-gate { 211*0Sstevel@tonic-gate I32 i; 212*0Sstevel@tonic-gate U32 paren = 0; 213*0Sstevel@tonic-gate char *input; 214*0Sstevel@tonic-gate I32 tmps; 215*0Sstevel@tonic-gate 216*0Sstevel@tonic-gate /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */ 217*0Sstevel@tonic-gate i = SSPOPINT; 218*0Sstevel@tonic-gate assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */ 219*0Sstevel@tonic-gate i = SSPOPINT; /* Parentheses elements to pop. */ 220*0Sstevel@tonic-gate input = (char *) SSPOPPTR; 221*0Sstevel@tonic-gate *PL_reglastcloseparen = SSPOPINT; 222*0Sstevel@tonic-gate *PL_reglastparen = SSPOPINT; 223*0Sstevel@tonic-gate PL_regsize = SSPOPINT; 224*0Sstevel@tonic-gate 225*0Sstevel@tonic-gate /* Now restore the parentheses context. */ 226*0Sstevel@tonic-gate for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS); 227*0Sstevel@tonic-gate i > 0; i -= REGCP_PAREN_ELEMS) { 228*0Sstevel@tonic-gate paren = (U32)SSPOPINT; 229*0Sstevel@tonic-gate PL_reg_start_tmp[paren] = (char *) SSPOPPTR; 230*0Sstevel@tonic-gate PL_regstartp[paren] = SSPOPINT; 231*0Sstevel@tonic-gate tmps = SSPOPINT; 232*0Sstevel@tonic-gate if (paren <= *PL_reglastparen) 233*0Sstevel@tonic-gate PL_regendp[paren] = tmps; 234*0Sstevel@tonic-gate DEBUG_r( 235*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 236*0Sstevel@tonic-gate " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n", 237*0Sstevel@tonic-gate (UV)paren, (IV)PL_regstartp[paren], 238*0Sstevel@tonic-gate (IV)(PL_reg_start_tmp[paren] - PL_bostr), 239*0Sstevel@tonic-gate (IV)PL_regendp[paren], 240*0Sstevel@tonic-gate (paren > *PL_reglastparen ? "(no)" : "")); 241*0Sstevel@tonic-gate ); 242*0Sstevel@tonic-gate } 243*0Sstevel@tonic-gate DEBUG_r( 244*0Sstevel@tonic-gate if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) { 245*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 246*0Sstevel@tonic-gate " restoring \\%"IVdf"..\\%"IVdf" to undef\n", 247*0Sstevel@tonic-gate (IV)(*PL_reglastparen + 1), (IV)PL_regnpar); 248*0Sstevel@tonic-gate } 249*0Sstevel@tonic-gate ); 250*0Sstevel@tonic-gate #if 1 251*0Sstevel@tonic-gate /* It would seem that the similar code in regtry() 252*0Sstevel@tonic-gate * already takes care of this, and in fact it is in 253*0Sstevel@tonic-gate * a better location to since this code can #if 0-ed out 254*0Sstevel@tonic-gate * but the code in regtry() is needed or otherwise tests 255*0Sstevel@tonic-gate * requiring null fields (pat.t#187 and split.t#{13,14} 256*0Sstevel@tonic-gate * (as of patchlevel 7877) will fail. Then again, 257*0Sstevel@tonic-gate * this code seems to be necessary or otherwise 258*0Sstevel@tonic-gate * building DynaLoader will fail: 259*0Sstevel@tonic-gate * "Error: '*' not in typemap in DynaLoader.xs, line 164" 260*0Sstevel@tonic-gate * --jhi */ 261*0Sstevel@tonic-gate for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) { 262*0Sstevel@tonic-gate if ((I32)paren > PL_regsize) 263*0Sstevel@tonic-gate PL_regstartp[paren] = -1; 264*0Sstevel@tonic-gate PL_regendp[paren] = -1; 265*0Sstevel@tonic-gate } 266*0Sstevel@tonic-gate #endif 267*0Sstevel@tonic-gate return input; 268*0Sstevel@tonic-gate } 269*0Sstevel@tonic-gate 270*0Sstevel@tonic-gate STATIC char * 271*0Sstevel@tonic-gate S_regcp_set_to(pTHX_ I32 ss) 272*0Sstevel@tonic-gate { 273*0Sstevel@tonic-gate I32 tmp = PL_savestack_ix; 274*0Sstevel@tonic-gate 275*0Sstevel@tonic-gate PL_savestack_ix = ss; 276*0Sstevel@tonic-gate regcppop(); 277*0Sstevel@tonic-gate PL_savestack_ix = tmp; 278*0Sstevel@tonic-gate return Nullch; 279*0Sstevel@tonic-gate } 280*0Sstevel@tonic-gate 281*0Sstevel@tonic-gate typedef struct re_cc_state 282*0Sstevel@tonic-gate { 283*0Sstevel@tonic-gate I32 ss; 284*0Sstevel@tonic-gate regnode *node; 285*0Sstevel@tonic-gate struct re_cc_state *prev; 286*0Sstevel@tonic-gate CURCUR *cc; 287*0Sstevel@tonic-gate regexp *re; 288*0Sstevel@tonic-gate } re_cc_state; 289*0Sstevel@tonic-gate 290*0Sstevel@tonic-gate #define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */ 291*0Sstevel@tonic-gate 292*0Sstevel@tonic-gate #define TRYPAREN(paren, n, input) { \ 293*0Sstevel@tonic-gate if (paren) { \ 294*0Sstevel@tonic-gate if (n) { \ 295*0Sstevel@tonic-gate PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \ 296*0Sstevel@tonic-gate PL_regendp[paren] = input - PL_bostr; \ 297*0Sstevel@tonic-gate } \ 298*0Sstevel@tonic-gate else \ 299*0Sstevel@tonic-gate PL_regendp[paren] = -1; \ 300*0Sstevel@tonic-gate } \ 301*0Sstevel@tonic-gate if (regmatch(next)) \ 302*0Sstevel@tonic-gate sayYES; \ 303*0Sstevel@tonic-gate if (paren && n) \ 304*0Sstevel@tonic-gate PL_regendp[paren] = -1; \ 305*0Sstevel@tonic-gate } 306*0Sstevel@tonic-gate 307*0Sstevel@tonic-gate 308*0Sstevel@tonic-gate /* 309*0Sstevel@tonic-gate * pregexec and friends 310*0Sstevel@tonic-gate */ 311*0Sstevel@tonic-gate 312*0Sstevel@tonic-gate /* 313*0Sstevel@tonic-gate - pregexec - match a regexp against a string 314*0Sstevel@tonic-gate */ 315*0Sstevel@tonic-gate I32 316*0Sstevel@tonic-gate Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend, 317*0Sstevel@tonic-gate char *strbeg, I32 minend, SV *screamer, U32 nosave) 318*0Sstevel@tonic-gate /* strend: pointer to null at end of string */ 319*0Sstevel@tonic-gate /* strbeg: real beginning of string */ 320*0Sstevel@tonic-gate /* minend: end of match must be >=minend after stringarg. */ 321*0Sstevel@tonic-gate /* nosave: For optimizations. */ 322*0Sstevel@tonic-gate { 323*0Sstevel@tonic-gate return 324*0Sstevel@tonic-gate regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL, 325*0Sstevel@tonic-gate nosave ? 0 : REXEC_COPY_STR); 326*0Sstevel@tonic-gate } 327*0Sstevel@tonic-gate 328*0Sstevel@tonic-gate STATIC void 329*0Sstevel@tonic-gate S_cache_re(pTHX_ regexp *prog) 330*0Sstevel@tonic-gate { 331*0Sstevel@tonic-gate PL_regprecomp = prog->precomp; /* Needed for FAIL. */ 332*0Sstevel@tonic-gate #ifdef DEBUGGING 333*0Sstevel@tonic-gate PL_regprogram = prog->program; 334*0Sstevel@tonic-gate #endif 335*0Sstevel@tonic-gate PL_regnpar = prog->nparens; 336*0Sstevel@tonic-gate PL_regdata = prog->data; 337*0Sstevel@tonic-gate PL_reg_re = prog; 338*0Sstevel@tonic-gate } 339*0Sstevel@tonic-gate 340*0Sstevel@tonic-gate /* 341*0Sstevel@tonic-gate * Need to implement the following flags for reg_anch: 342*0Sstevel@tonic-gate * 343*0Sstevel@tonic-gate * USE_INTUIT_NOML - Useful to call re_intuit_start() first 344*0Sstevel@tonic-gate * USE_INTUIT_ML 345*0Sstevel@tonic-gate * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer 346*0Sstevel@tonic-gate * INTUIT_AUTORITATIVE_ML 347*0Sstevel@tonic-gate * INTUIT_ONCE_NOML - Intuit can match in one location only. 348*0Sstevel@tonic-gate * INTUIT_ONCE_ML 349*0Sstevel@tonic-gate * 350*0Sstevel@tonic-gate * Another flag for this function: SECOND_TIME (so that float substrs 351*0Sstevel@tonic-gate * with giant delta may be not rechecked). 352*0Sstevel@tonic-gate */ 353*0Sstevel@tonic-gate 354*0Sstevel@tonic-gate /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */ 355*0Sstevel@tonic-gate 356*0Sstevel@tonic-gate /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend. 357*0Sstevel@tonic-gate Otherwise, only SvCUR(sv) is used to get strbeg. */ 358*0Sstevel@tonic-gate 359*0Sstevel@tonic-gate /* XXXX We assume that strpos is strbeg unless sv. */ 360*0Sstevel@tonic-gate 361*0Sstevel@tonic-gate /* XXXX Some places assume that there is a fixed substring. 362*0Sstevel@tonic-gate An update may be needed if optimizer marks as "INTUITable" 363*0Sstevel@tonic-gate RExen without fixed substrings. Similarly, it is assumed that 364*0Sstevel@tonic-gate lengths of all the strings are no more than minlen, thus they 365*0Sstevel@tonic-gate cannot come from lookahead. 366*0Sstevel@tonic-gate (Or minlen should take into account lookahead.) */ 367*0Sstevel@tonic-gate 368*0Sstevel@tonic-gate /* A failure to find a constant substring means that there is no need to make 369*0Sstevel@tonic-gate an expensive call to REx engine, thus we celebrate a failure. Similarly, 370*0Sstevel@tonic-gate finding a substring too deep into the string means that less calls to 371*0Sstevel@tonic-gate regtry() should be needed. 372*0Sstevel@tonic-gate 373*0Sstevel@tonic-gate REx compiler's optimizer found 4 possible hints: 374*0Sstevel@tonic-gate a) Anchored substring; 375*0Sstevel@tonic-gate b) Fixed substring; 376*0Sstevel@tonic-gate c) Whether we are anchored (beginning-of-line or \G); 377*0Sstevel@tonic-gate d) First node (of those at offset 0) which may distingush positions; 378*0Sstevel@tonic-gate We use a)b)d) and multiline-part of c), and try to find a position in the 379*0Sstevel@tonic-gate string which does not contradict any of them. 380*0Sstevel@tonic-gate */ 381*0Sstevel@tonic-gate 382*0Sstevel@tonic-gate /* Most of decisions we do here should have been done at compile time. 383*0Sstevel@tonic-gate The nodes of the REx which we used for the search should have been 384*0Sstevel@tonic-gate deleted from the finite automaton. */ 385*0Sstevel@tonic-gate 386*0Sstevel@tonic-gate char * 387*0Sstevel@tonic-gate Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, 388*0Sstevel@tonic-gate char *strend, U32 flags, re_scream_pos_data *data) 389*0Sstevel@tonic-gate { 390*0Sstevel@tonic-gate register I32 start_shift = 0; 391*0Sstevel@tonic-gate /* Should be nonnegative! */ 392*0Sstevel@tonic-gate register I32 end_shift = 0; 393*0Sstevel@tonic-gate register char *s; 394*0Sstevel@tonic-gate register SV *check; 395*0Sstevel@tonic-gate char *strbeg; 396*0Sstevel@tonic-gate char *t; 397*0Sstevel@tonic-gate int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */ 398*0Sstevel@tonic-gate I32 ml_anch; 399*0Sstevel@tonic-gate register char *other_last = Nullch; /* other substr checked before this */ 400*0Sstevel@tonic-gate char *check_at = Nullch; /* check substr found at this pos */ 401*0Sstevel@tonic-gate #ifdef DEBUGGING 402*0Sstevel@tonic-gate char *i_strpos = strpos; 403*0Sstevel@tonic-gate SV *dsv = PERL_DEBUG_PAD_ZERO(0); 404*0Sstevel@tonic-gate #endif 405*0Sstevel@tonic-gate RX_MATCH_UTF8_set(prog,do_utf8); 406*0Sstevel@tonic-gate 407*0Sstevel@tonic-gate if (prog->reganch & ROPT_UTF8) { 408*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, 409*0Sstevel@tonic-gate "UTF-8 regex...\n")); 410*0Sstevel@tonic-gate PL_reg_flags |= RF_utf8; 411*0Sstevel@tonic-gate } 412*0Sstevel@tonic-gate 413*0Sstevel@tonic-gate DEBUG_r({ 414*0Sstevel@tonic-gate char *s = PL_reg_match_utf8 ? 415*0Sstevel@tonic-gate sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) : 416*0Sstevel@tonic-gate strpos; 417*0Sstevel@tonic-gate int len = PL_reg_match_utf8 ? 418*0Sstevel@tonic-gate strlen(s) : strend - strpos; 419*0Sstevel@tonic-gate if (!PL_colorset) 420*0Sstevel@tonic-gate reginitcolors(); 421*0Sstevel@tonic-gate if (PL_reg_match_utf8) 422*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, 423*0Sstevel@tonic-gate "UTF-8 target...\n")); 424*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 425*0Sstevel@tonic-gate "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n", 426*0Sstevel@tonic-gate PL_colors[4],PL_colors[5],PL_colors[0], 427*0Sstevel@tonic-gate prog->precomp, 428*0Sstevel@tonic-gate PL_colors[1], 429*0Sstevel@tonic-gate (strlen(prog->precomp) > 60 ? "..." : ""), 430*0Sstevel@tonic-gate PL_colors[0], 431*0Sstevel@tonic-gate (int)(len > 60 ? 60 : len), 432*0Sstevel@tonic-gate s, PL_colors[1], 433*0Sstevel@tonic-gate (len > 60 ? "..." : "") 434*0Sstevel@tonic-gate ); 435*0Sstevel@tonic-gate }); 436*0Sstevel@tonic-gate 437*0Sstevel@tonic-gate /* CHR_DIST() would be more correct here but it makes things slow. */ 438*0Sstevel@tonic-gate if (prog->minlen > strend - strpos) { 439*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, 440*0Sstevel@tonic-gate "String too short... [re_intuit_start]\n")); 441*0Sstevel@tonic-gate goto fail; 442*0Sstevel@tonic-gate } 443*0Sstevel@tonic-gate strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos; 444*0Sstevel@tonic-gate PL_regeol = strend; 445*0Sstevel@tonic-gate if (do_utf8) { 446*0Sstevel@tonic-gate if (!prog->check_utf8 && prog->check_substr) 447*0Sstevel@tonic-gate to_utf8_substr(prog); 448*0Sstevel@tonic-gate check = prog->check_utf8; 449*0Sstevel@tonic-gate } else { 450*0Sstevel@tonic-gate if (!prog->check_substr && prog->check_utf8) 451*0Sstevel@tonic-gate to_byte_substr(prog); 452*0Sstevel@tonic-gate check = prog->check_substr; 453*0Sstevel@tonic-gate } 454*0Sstevel@tonic-gate if (check == &PL_sv_undef) { 455*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, 456*0Sstevel@tonic-gate "Non-utf string cannot match utf check string\n")); 457*0Sstevel@tonic-gate goto fail; 458*0Sstevel@tonic-gate } 459*0Sstevel@tonic-gate if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */ 460*0Sstevel@tonic-gate ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE) 461*0Sstevel@tonic-gate || ( (prog->reganch & ROPT_ANCH_BOL) 462*0Sstevel@tonic-gate && !PL_multiline ) ); /* Check after \n? */ 463*0Sstevel@tonic-gate 464*0Sstevel@tonic-gate if (!ml_anch) { 465*0Sstevel@tonic-gate if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */ 466*0Sstevel@tonic-gate | ROPT_IMPLICIT)) /* not a real BOL */ 467*0Sstevel@tonic-gate /* SvCUR is not set on references: SvRV and SvPVX overlap */ 468*0Sstevel@tonic-gate && sv && !SvROK(sv) 469*0Sstevel@tonic-gate && (strpos != strbeg)) { 470*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n")); 471*0Sstevel@tonic-gate goto fail; 472*0Sstevel@tonic-gate } 473*0Sstevel@tonic-gate if (prog->check_offset_min == prog->check_offset_max && 474*0Sstevel@tonic-gate !(prog->reganch & ROPT_CANY_SEEN)) { 475*0Sstevel@tonic-gate /* Substring at constant offset from beg-of-str... */ 476*0Sstevel@tonic-gate I32 slen; 477*0Sstevel@tonic-gate 478*0Sstevel@tonic-gate s = HOP3c(strpos, prog->check_offset_min, strend); 479*0Sstevel@tonic-gate if (SvTAIL(check)) { 480*0Sstevel@tonic-gate slen = SvCUR(check); /* >= 1 */ 481*0Sstevel@tonic-gate 482*0Sstevel@tonic-gate if ( strend - s > slen || strend - s < slen - 1 483*0Sstevel@tonic-gate || (strend - s == slen && strend[-1] != '\n')) { 484*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n")); 485*0Sstevel@tonic-gate goto fail_finish; 486*0Sstevel@tonic-gate } 487*0Sstevel@tonic-gate /* Now should match s[0..slen-2] */ 488*0Sstevel@tonic-gate slen--; 489*0Sstevel@tonic-gate if (slen && (*SvPVX(check) != *s 490*0Sstevel@tonic-gate || (slen > 1 491*0Sstevel@tonic-gate && memNE(SvPVX(check), s, slen)))) { 492*0Sstevel@tonic-gate report_neq: 493*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n")); 494*0Sstevel@tonic-gate goto fail_finish; 495*0Sstevel@tonic-gate } 496*0Sstevel@tonic-gate } 497*0Sstevel@tonic-gate else if (*SvPVX(check) != *s 498*0Sstevel@tonic-gate || ((slen = SvCUR(check)) > 1 499*0Sstevel@tonic-gate && memNE(SvPVX(check), s, slen))) 500*0Sstevel@tonic-gate goto report_neq; 501*0Sstevel@tonic-gate goto success_at_start; 502*0Sstevel@tonic-gate } 503*0Sstevel@tonic-gate } 504*0Sstevel@tonic-gate /* Match is anchored, but substr is not anchored wrt beg-of-str. */ 505*0Sstevel@tonic-gate s = strpos; 506*0Sstevel@tonic-gate start_shift = prog->check_offset_min; /* okay to underestimate on CC */ 507*0Sstevel@tonic-gate end_shift = prog->minlen - start_shift - 508*0Sstevel@tonic-gate CHR_SVLEN(check) + (SvTAIL(check) != 0); 509*0Sstevel@tonic-gate if (!ml_anch) { 510*0Sstevel@tonic-gate I32 end = prog->check_offset_max + CHR_SVLEN(check) 511*0Sstevel@tonic-gate - (SvTAIL(check) != 0); 512*0Sstevel@tonic-gate I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end; 513*0Sstevel@tonic-gate 514*0Sstevel@tonic-gate if (end_shift < eshift) 515*0Sstevel@tonic-gate end_shift = eshift; 516*0Sstevel@tonic-gate } 517*0Sstevel@tonic-gate } 518*0Sstevel@tonic-gate else { /* Can match at random position */ 519*0Sstevel@tonic-gate ml_anch = 0; 520*0Sstevel@tonic-gate s = strpos; 521*0Sstevel@tonic-gate start_shift = prog->check_offset_min; /* okay to underestimate on CC */ 522*0Sstevel@tonic-gate /* Should be nonnegative! */ 523*0Sstevel@tonic-gate end_shift = prog->minlen - start_shift - 524*0Sstevel@tonic-gate CHR_SVLEN(check) + (SvTAIL(check) != 0); 525*0Sstevel@tonic-gate } 526*0Sstevel@tonic-gate 527*0Sstevel@tonic-gate #ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */ 528*0Sstevel@tonic-gate if (end_shift < 0) 529*0Sstevel@tonic-gate Perl_croak(aTHX_ "panic: end_shift"); 530*0Sstevel@tonic-gate #endif 531*0Sstevel@tonic-gate 532*0Sstevel@tonic-gate restart: 533*0Sstevel@tonic-gate /* Find a possible match in the region s..strend by looking for 534*0Sstevel@tonic-gate the "check" substring in the region corrected by start/end_shift. */ 535*0Sstevel@tonic-gate if (flags & REXEC_SCREAM) { 536*0Sstevel@tonic-gate I32 p = -1; /* Internal iterator of scream. */ 537*0Sstevel@tonic-gate I32 *pp = data ? data->scream_pos : &p; 538*0Sstevel@tonic-gate 539*0Sstevel@tonic-gate if (PL_screamfirst[BmRARE(check)] >= 0 540*0Sstevel@tonic-gate || ( BmRARE(check) == '\n' 541*0Sstevel@tonic-gate && (BmPREVIOUS(check) == SvCUR(check) - 1) 542*0Sstevel@tonic-gate && SvTAIL(check) )) 543*0Sstevel@tonic-gate s = screaminstr(sv, check, 544*0Sstevel@tonic-gate start_shift + (s - strbeg), end_shift, pp, 0); 545*0Sstevel@tonic-gate else 546*0Sstevel@tonic-gate goto fail_finish; 547*0Sstevel@tonic-gate /* we may be pointing at the wrong string */ 548*0Sstevel@tonic-gate if (s && RX_MATCH_COPIED(prog)) 549*0Sstevel@tonic-gate s = strbeg + (s - SvPVX(sv)); 550*0Sstevel@tonic-gate if (data) 551*0Sstevel@tonic-gate *data->scream_olds = s; 552*0Sstevel@tonic-gate } 553*0Sstevel@tonic-gate else if (prog->reganch & ROPT_CANY_SEEN) 554*0Sstevel@tonic-gate s = fbm_instr((U8*)(s + start_shift), 555*0Sstevel@tonic-gate (U8*)(strend - end_shift), 556*0Sstevel@tonic-gate check, PL_multiline ? FBMrf_MULTILINE : 0); 557*0Sstevel@tonic-gate else 558*0Sstevel@tonic-gate s = fbm_instr(HOP3(s, start_shift, strend), 559*0Sstevel@tonic-gate HOP3(strend, -end_shift, strbeg), 560*0Sstevel@tonic-gate check, PL_multiline ? FBMrf_MULTILINE : 0); 561*0Sstevel@tonic-gate 562*0Sstevel@tonic-gate /* Update the count-of-usability, remove useless subpatterns, 563*0Sstevel@tonic-gate unshift s. */ 564*0Sstevel@tonic-gate 565*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s", 566*0Sstevel@tonic-gate (s ? "Found" : "Did not find"), 567*0Sstevel@tonic-gate (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"), 568*0Sstevel@tonic-gate PL_colors[0], 569*0Sstevel@tonic-gate (int)(SvCUR(check) - (SvTAIL(check)!=0)), 570*0Sstevel@tonic-gate SvPVX(check), 571*0Sstevel@tonic-gate PL_colors[1], (SvTAIL(check) ? "$" : ""), 572*0Sstevel@tonic-gate (s ? " at offset " : "...\n") ) ); 573*0Sstevel@tonic-gate 574*0Sstevel@tonic-gate if (!s) 575*0Sstevel@tonic-gate goto fail_finish; 576*0Sstevel@tonic-gate 577*0Sstevel@tonic-gate check_at = s; 578*0Sstevel@tonic-gate 579*0Sstevel@tonic-gate /* Finish the diagnostic message */ 580*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) ); 581*0Sstevel@tonic-gate 582*0Sstevel@tonic-gate /* Got a candidate. Check MBOL anchoring, and the *other* substr. 583*0Sstevel@tonic-gate Start with the other substr. 584*0Sstevel@tonic-gate XXXX no SCREAM optimization yet - and a very coarse implementation 585*0Sstevel@tonic-gate XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will 586*0Sstevel@tonic-gate *always* match. Probably should be marked during compile... 587*0Sstevel@tonic-gate Probably it is right to do no SCREAM here... 588*0Sstevel@tonic-gate */ 589*0Sstevel@tonic-gate 590*0Sstevel@tonic-gate if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) { 591*0Sstevel@tonic-gate /* Take into account the "other" substring. */ 592*0Sstevel@tonic-gate /* XXXX May be hopelessly wrong for UTF... */ 593*0Sstevel@tonic-gate if (!other_last) 594*0Sstevel@tonic-gate other_last = strpos; 595*0Sstevel@tonic-gate if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) { 596*0Sstevel@tonic-gate do_other_anchored: 597*0Sstevel@tonic-gate { 598*0Sstevel@tonic-gate char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2; 599*0Sstevel@tonic-gate char *s1 = s; 600*0Sstevel@tonic-gate SV* must; 601*0Sstevel@tonic-gate 602*0Sstevel@tonic-gate t = s - prog->check_offset_max; 603*0Sstevel@tonic-gate if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */ 604*0Sstevel@tonic-gate && (!do_utf8 605*0Sstevel@tonic-gate || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos)) 606*0Sstevel@tonic-gate && t > strpos))) 607*0Sstevel@tonic-gate /* EMPTY */; 608*0Sstevel@tonic-gate else 609*0Sstevel@tonic-gate t = strpos; 610*0Sstevel@tonic-gate t = HOP3c(t, prog->anchored_offset, strend); 611*0Sstevel@tonic-gate if (t < other_last) /* These positions already checked */ 612*0Sstevel@tonic-gate t = other_last; 613*0Sstevel@tonic-gate last2 = last1 = HOP3c(strend, -prog->minlen, strbeg); 614*0Sstevel@tonic-gate if (last < last1) 615*0Sstevel@tonic-gate last1 = last; 616*0Sstevel@tonic-gate /* XXXX It is not documented what units *_offsets are in. Assume bytes. */ 617*0Sstevel@tonic-gate /* On end-of-str: see comment below. */ 618*0Sstevel@tonic-gate must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr; 619*0Sstevel@tonic-gate if (must == &PL_sv_undef) { 620*0Sstevel@tonic-gate s = (char*)NULL; 621*0Sstevel@tonic-gate DEBUG_r(must = prog->anchored_utf8); /* for debug */ 622*0Sstevel@tonic-gate } 623*0Sstevel@tonic-gate else 624*0Sstevel@tonic-gate s = fbm_instr( 625*0Sstevel@tonic-gate (unsigned char*)t, 626*0Sstevel@tonic-gate HOP3(HOP3(last1, prog->anchored_offset, strend) 627*0Sstevel@tonic-gate + SvCUR(must), -(SvTAIL(must)!=0), strbeg), 628*0Sstevel@tonic-gate must, 629*0Sstevel@tonic-gate PL_multiline ? FBMrf_MULTILINE : 0 630*0Sstevel@tonic-gate ); 631*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, 632*0Sstevel@tonic-gate "%s anchored substr `%s%.*s%s'%s", 633*0Sstevel@tonic-gate (s ? "Found" : "Contradicts"), 634*0Sstevel@tonic-gate PL_colors[0], 635*0Sstevel@tonic-gate (int)(SvCUR(must) 636*0Sstevel@tonic-gate - (SvTAIL(must)!=0)), 637*0Sstevel@tonic-gate SvPVX(must), 638*0Sstevel@tonic-gate PL_colors[1], (SvTAIL(must) ? "$" : ""))); 639*0Sstevel@tonic-gate if (!s) { 640*0Sstevel@tonic-gate if (last1 >= last2) { 641*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, 642*0Sstevel@tonic-gate ", giving up...\n")); 643*0Sstevel@tonic-gate goto fail_finish; 644*0Sstevel@tonic-gate } 645*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, 646*0Sstevel@tonic-gate ", trying floating at offset %ld...\n", 647*0Sstevel@tonic-gate (long)(HOP3c(s1, 1, strend) - i_strpos))); 648*0Sstevel@tonic-gate other_last = HOP3c(last1, prog->anchored_offset+1, strend); 649*0Sstevel@tonic-gate s = HOP3c(last, 1, strend); 650*0Sstevel@tonic-gate goto restart; 651*0Sstevel@tonic-gate } 652*0Sstevel@tonic-gate else { 653*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n", 654*0Sstevel@tonic-gate (long)(s - i_strpos))); 655*0Sstevel@tonic-gate t = HOP3c(s, -prog->anchored_offset, strbeg); 656*0Sstevel@tonic-gate other_last = HOP3c(s, 1, strend); 657*0Sstevel@tonic-gate s = s1; 658*0Sstevel@tonic-gate if (t == strpos) 659*0Sstevel@tonic-gate goto try_at_start; 660*0Sstevel@tonic-gate goto try_at_offset; 661*0Sstevel@tonic-gate } 662*0Sstevel@tonic-gate } 663*0Sstevel@tonic-gate } 664*0Sstevel@tonic-gate else { /* Take into account the floating substring. */ 665*0Sstevel@tonic-gate char *last, *last1; 666*0Sstevel@tonic-gate char *s1 = s; 667*0Sstevel@tonic-gate SV* must; 668*0Sstevel@tonic-gate 669*0Sstevel@tonic-gate t = HOP3c(s, -start_shift, strbeg); 670*0Sstevel@tonic-gate last1 = last = 671*0Sstevel@tonic-gate HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg); 672*0Sstevel@tonic-gate if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset) 673*0Sstevel@tonic-gate last = HOP3c(t, prog->float_max_offset, strend); 674*0Sstevel@tonic-gate s = HOP3c(t, prog->float_min_offset, strend); 675*0Sstevel@tonic-gate if (s < other_last) 676*0Sstevel@tonic-gate s = other_last; 677*0Sstevel@tonic-gate /* XXXX It is not documented what units *_offsets are in. Assume bytes. */ 678*0Sstevel@tonic-gate must = do_utf8 ? prog->float_utf8 : prog->float_substr; 679*0Sstevel@tonic-gate /* fbm_instr() takes into account exact value of end-of-str 680*0Sstevel@tonic-gate if the check is SvTAIL(ed). Since false positives are OK, 681*0Sstevel@tonic-gate and end-of-str is not later than strend we are OK. */ 682*0Sstevel@tonic-gate if (must == &PL_sv_undef) { 683*0Sstevel@tonic-gate s = (char*)NULL; 684*0Sstevel@tonic-gate DEBUG_r(must = prog->float_utf8); /* for debug message */ 685*0Sstevel@tonic-gate } 686*0Sstevel@tonic-gate else 687*0Sstevel@tonic-gate s = fbm_instr((unsigned char*)s, 688*0Sstevel@tonic-gate (unsigned char*)last + SvCUR(must) 689*0Sstevel@tonic-gate - (SvTAIL(must)!=0), 690*0Sstevel@tonic-gate must, PL_multiline ? FBMrf_MULTILINE : 0); 691*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s", 692*0Sstevel@tonic-gate (s ? "Found" : "Contradicts"), 693*0Sstevel@tonic-gate PL_colors[0], 694*0Sstevel@tonic-gate (int)(SvCUR(must) - (SvTAIL(must)!=0)), 695*0Sstevel@tonic-gate SvPVX(must), 696*0Sstevel@tonic-gate PL_colors[1], (SvTAIL(must) ? "$" : ""))); 697*0Sstevel@tonic-gate if (!s) { 698*0Sstevel@tonic-gate if (last1 == last) { 699*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, 700*0Sstevel@tonic-gate ", giving up...\n")); 701*0Sstevel@tonic-gate goto fail_finish; 702*0Sstevel@tonic-gate } 703*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, 704*0Sstevel@tonic-gate ", trying anchored starting at offset %ld...\n", 705*0Sstevel@tonic-gate (long)(s1 + 1 - i_strpos))); 706*0Sstevel@tonic-gate other_last = last; 707*0Sstevel@tonic-gate s = HOP3c(t, 1, strend); 708*0Sstevel@tonic-gate goto restart; 709*0Sstevel@tonic-gate } 710*0Sstevel@tonic-gate else { 711*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n", 712*0Sstevel@tonic-gate (long)(s - i_strpos))); 713*0Sstevel@tonic-gate other_last = s; /* Fix this later. --Hugo */ 714*0Sstevel@tonic-gate s = s1; 715*0Sstevel@tonic-gate if (t == strpos) 716*0Sstevel@tonic-gate goto try_at_start; 717*0Sstevel@tonic-gate goto try_at_offset; 718*0Sstevel@tonic-gate } 719*0Sstevel@tonic-gate } 720*0Sstevel@tonic-gate } 721*0Sstevel@tonic-gate 722*0Sstevel@tonic-gate t = s - prog->check_offset_max; 723*0Sstevel@tonic-gate if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */ 724*0Sstevel@tonic-gate && (!do_utf8 725*0Sstevel@tonic-gate || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos)) 726*0Sstevel@tonic-gate && t > strpos))) { 727*0Sstevel@tonic-gate /* Fixed substring is found far enough so that the match 728*0Sstevel@tonic-gate cannot start at strpos. */ 729*0Sstevel@tonic-gate try_at_offset: 730*0Sstevel@tonic-gate if (ml_anch && t[-1] != '\n') { 731*0Sstevel@tonic-gate /* Eventually fbm_*() should handle this, but often 732*0Sstevel@tonic-gate anchored_offset is not 0, so this check will not be wasted. */ 733*0Sstevel@tonic-gate /* XXXX In the code below we prefer to look for "^" even in 734*0Sstevel@tonic-gate presence of anchored substrings. And we search even 735*0Sstevel@tonic-gate beyond the found float position. These pessimizations 736*0Sstevel@tonic-gate are historical artefacts only. */ 737*0Sstevel@tonic-gate find_anchor: 738*0Sstevel@tonic-gate while (t < strend - prog->minlen) { 739*0Sstevel@tonic-gate if (*t == '\n') { 740*0Sstevel@tonic-gate if (t < check_at - prog->check_offset_min) { 741*0Sstevel@tonic-gate if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) { 742*0Sstevel@tonic-gate /* Since we moved from the found position, 743*0Sstevel@tonic-gate we definitely contradict the found anchored 744*0Sstevel@tonic-gate substr. Due to the above check we do not 745*0Sstevel@tonic-gate contradict "check" substr. 746*0Sstevel@tonic-gate Thus we can arrive here only if check substr 747*0Sstevel@tonic-gate is float. Redo checking for "other"=="fixed". 748*0Sstevel@tonic-gate */ 749*0Sstevel@tonic-gate strpos = t + 1; 750*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n", 751*0Sstevel@tonic-gate PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset))); 752*0Sstevel@tonic-gate goto do_other_anchored; 753*0Sstevel@tonic-gate } 754*0Sstevel@tonic-gate /* We don't contradict the found floating substring. */ 755*0Sstevel@tonic-gate /* XXXX Why not check for STCLASS? */ 756*0Sstevel@tonic-gate s = t + 1; 757*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n", 758*0Sstevel@tonic-gate PL_colors[0],PL_colors[1], (long)(s - i_strpos))); 759*0Sstevel@tonic-gate goto set_useful; 760*0Sstevel@tonic-gate } 761*0Sstevel@tonic-gate /* Position contradicts check-string */ 762*0Sstevel@tonic-gate /* XXXX probably better to look for check-string 763*0Sstevel@tonic-gate than for "\n", so one should lower the limit for t? */ 764*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n", 765*0Sstevel@tonic-gate PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos))); 766*0Sstevel@tonic-gate other_last = strpos = s = t + 1; 767*0Sstevel@tonic-gate goto restart; 768*0Sstevel@tonic-gate } 769*0Sstevel@tonic-gate t++; 770*0Sstevel@tonic-gate } 771*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n", 772*0Sstevel@tonic-gate PL_colors[0],PL_colors[1])); 773*0Sstevel@tonic-gate goto fail_finish; 774*0Sstevel@tonic-gate } 775*0Sstevel@tonic-gate else { 776*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n", 777*0Sstevel@tonic-gate PL_colors[0],PL_colors[1])); 778*0Sstevel@tonic-gate } 779*0Sstevel@tonic-gate s = t; 780*0Sstevel@tonic-gate set_useful: 781*0Sstevel@tonic-gate ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */ 782*0Sstevel@tonic-gate } 783*0Sstevel@tonic-gate else { 784*0Sstevel@tonic-gate /* The found string does not prohibit matching at strpos, 785*0Sstevel@tonic-gate - no optimization of calling REx engine can be performed, 786*0Sstevel@tonic-gate unless it was an MBOL and we are not after MBOL, 787*0Sstevel@tonic-gate or a future STCLASS check will fail this. */ 788*0Sstevel@tonic-gate try_at_start: 789*0Sstevel@tonic-gate /* Even in this situation we may use MBOL flag if strpos is offset 790*0Sstevel@tonic-gate wrt the start of the string. */ 791*0Sstevel@tonic-gate if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */ 792*0Sstevel@tonic-gate && (strpos != strbeg) && strpos[-1] != '\n' 793*0Sstevel@tonic-gate /* May be due to an implicit anchor of m{.*foo} */ 794*0Sstevel@tonic-gate && !(prog->reganch & ROPT_IMPLICIT)) 795*0Sstevel@tonic-gate { 796*0Sstevel@tonic-gate t = strpos; 797*0Sstevel@tonic-gate goto find_anchor; 798*0Sstevel@tonic-gate } 799*0Sstevel@tonic-gate DEBUG_r( if (ml_anch) 800*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n", 801*0Sstevel@tonic-gate (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]); 802*0Sstevel@tonic-gate ); 803*0Sstevel@tonic-gate success_at_start: 804*0Sstevel@tonic-gate if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */ 805*0Sstevel@tonic-gate && (do_utf8 ? ( 806*0Sstevel@tonic-gate prog->check_utf8 /* Could be deleted already */ 807*0Sstevel@tonic-gate && --BmUSEFUL(prog->check_utf8) < 0 808*0Sstevel@tonic-gate && (prog->check_utf8 == prog->float_utf8) 809*0Sstevel@tonic-gate ) : ( 810*0Sstevel@tonic-gate prog->check_substr /* Could be deleted already */ 811*0Sstevel@tonic-gate && --BmUSEFUL(prog->check_substr) < 0 812*0Sstevel@tonic-gate && (prog->check_substr == prog->float_substr) 813*0Sstevel@tonic-gate ))) 814*0Sstevel@tonic-gate { 815*0Sstevel@tonic-gate /* If flags & SOMETHING - do not do it many times on the same match */ 816*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n")); 817*0Sstevel@tonic-gate SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr); 818*0Sstevel@tonic-gate if (do_utf8 ? prog->check_substr : prog->check_utf8) 819*0Sstevel@tonic-gate SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8); 820*0Sstevel@tonic-gate prog->check_substr = prog->check_utf8 = Nullsv; /* disable */ 821*0Sstevel@tonic-gate prog->float_substr = prog->float_utf8 = Nullsv; /* clear */ 822*0Sstevel@tonic-gate check = Nullsv; /* abort */ 823*0Sstevel@tonic-gate s = strpos; 824*0Sstevel@tonic-gate /* XXXX This is a remnant of the old implementation. It 825*0Sstevel@tonic-gate looks wasteful, since now INTUIT can use many 826*0Sstevel@tonic-gate other heuristics. */ 827*0Sstevel@tonic-gate prog->reganch &= ~RE_USE_INTUIT; 828*0Sstevel@tonic-gate } 829*0Sstevel@tonic-gate else 830*0Sstevel@tonic-gate s = strpos; 831*0Sstevel@tonic-gate } 832*0Sstevel@tonic-gate 833*0Sstevel@tonic-gate /* Last resort... */ 834*0Sstevel@tonic-gate /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */ 835*0Sstevel@tonic-gate if (prog->regstclass) { 836*0Sstevel@tonic-gate /* minlen == 0 is possible if regstclass is \b or \B, 837*0Sstevel@tonic-gate and the fixed substr is ''$. 838*0Sstevel@tonic-gate Since minlen is already taken into account, s+1 is before strend; 839*0Sstevel@tonic-gate accidentally, minlen >= 1 guaranties no false positives at s + 1 840*0Sstevel@tonic-gate even for \b or \B. But (minlen? 1 : 0) below assumes that 841*0Sstevel@tonic-gate regstclass does not come from lookahead... */ 842*0Sstevel@tonic-gate /* If regstclass takes bytelength more than 1: If charlength==1, OK. 843*0Sstevel@tonic-gate This leaves EXACTF only, which is dealt with in find_byclass(). */ 844*0Sstevel@tonic-gate U8* str = (U8*)STRING(prog->regstclass); 845*0Sstevel@tonic-gate int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT 846*0Sstevel@tonic-gate ? CHR_DIST(str+STR_LEN(prog->regstclass), str) 847*0Sstevel@tonic-gate : 1); 848*0Sstevel@tonic-gate char *endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch) 849*0Sstevel@tonic-gate ? HOP3c(s, (prog->minlen ? cl_l : 0), strend) 850*0Sstevel@tonic-gate : (prog->float_substr || prog->float_utf8 851*0Sstevel@tonic-gate ? HOP3c(HOP3c(check_at, -start_shift, strbeg), 852*0Sstevel@tonic-gate cl_l, strend) 853*0Sstevel@tonic-gate : strend); 854*0Sstevel@tonic-gate char *startpos = strbeg; 855*0Sstevel@tonic-gate 856*0Sstevel@tonic-gate t = s; 857*0Sstevel@tonic-gate cache_re(prog); 858*0Sstevel@tonic-gate s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1); 859*0Sstevel@tonic-gate if (!s) { 860*0Sstevel@tonic-gate #ifdef DEBUGGING 861*0Sstevel@tonic-gate char *what = 0; 862*0Sstevel@tonic-gate #endif 863*0Sstevel@tonic-gate if (endpos == strend) { 864*0Sstevel@tonic-gate DEBUG_r( PerlIO_printf(Perl_debug_log, 865*0Sstevel@tonic-gate "Could not match STCLASS...\n") ); 866*0Sstevel@tonic-gate goto fail; 867*0Sstevel@tonic-gate } 868*0Sstevel@tonic-gate DEBUG_r( PerlIO_printf(Perl_debug_log, 869*0Sstevel@tonic-gate "This position contradicts STCLASS...\n") ); 870*0Sstevel@tonic-gate if ((prog->reganch & ROPT_ANCH) && !ml_anch) 871*0Sstevel@tonic-gate goto fail; 872*0Sstevel@tonic-gate /* Contradict one of substrings */ 873*0Sstevel@tonic-gate if (prog->anchored_substr || prog->anchored_utf8) { 874*0Sstevel@tonic-gate if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) { 875*0Sstevel@tonic-gate DEBUG_r( what = "anchored" ); 876*0Sstevel@tonic-gate hop_and_restart: 877*0Sstevel@tonic-gate s = HOP3c(t, 1, strend); 878*0Sstevel@tonic-gate if (s + start_shift + end_shift > strend) { 879*0Sstevel@tonic-gate /* XXXX Should be taken into account earlier? */ 880*0Sstevel@tonic-gate DEBUG_r( PerlIO_printf(Perl_debug_log, 881*0Sstevel@tonic-gate "Could not match STCLASS...\n") ); 882*0Sstevel@tonic-gate goto fail; 883*0Sstevel@tonic-gate } 884*0Sstevel@tonic-gate if (!check) 885*0Sstevel@tonic-gate goto giveup; 886*0Sstevel@tonic-gate DEBUG_r( PerlIO_printf(Perl_debug_log, 887*0Sstevel@tonic-gate "Looking for %s substr starting at offset %ld...\n", 888*0Sstevel@tonic-gate what, (long)(s + start_shift - i_strpos)) ); 889*0Sstevel@tonic-gate goto restart; 890*0Sstevel@tonic-gate } 891*0Sstevel@tonic-gate /* Have both, check_string is floating */ 892*0Sstevel@tonic-gate if (t + start_shift >= check_at) /* Contradicts floating=check */ 893*0Sstevel@tonic-gate goto retry_floating_check; 894*0Sstevel@tonic-gate /* Recheck anchored substring, but not floating... */ 895*0Sstevel@tonic-gate s = check_at; 896*0Sstevel@tonic-gate if (!check) 897*0Sstevel@tonic-gate goto giveup; 898*0Sstevel@tonic-gate DEBUG_r( PerlIO_printf(Perl_debug_log, 899*0Sstevel@tonic-gate "Looking for anchored substr starting at offset %ld...\n", 900*0Sstevel@tonic-gate (long)(other_last - i_strpos)) ); 901*0Sstevel@tonic-gate goto do_other_anchored; 902*0Sstevel@tonic-gate } 903*0Sstevel@tonic-gate /* Another way we could have checked stclass at the 904*0Sstevel@tonic-gate current position only: */ 905*0Sstevel@tonic-gate if (ml_anch) { 906*0Sstevel@tonic-gate s = t = t + 1; 907*0Sstevel@tonic-gate if (!check) 908*0Sstevel@tonic-gate goto giveup; 909*0Sstevel@tonic-gate DEBUG_r( PerlIO_printf(Perl_debug_log, 910*0Sstevel@tonic-gate "Looking for /%s^%s/m starting at offset %ld...\n", 911*0Sstevel@tonic-gate PL_colors[0],PL_colors[1], (long)(t - i_strpos)) ); 912*0Sstevel@tonic-gate goto try_at_offset; 913*0Sstevel@tonic-gate } 914*0Sstevel@tonic-gate if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */ 915*0Sstevel@tonic-gate goto fail; 916*0Sstevel@tonic-gate /* Check is floating subtring. */ 917*0Sstevel@tonic-gate retry_floating_check: 918*0Sstevel@tonic-gate t = check_at - start_shift; 919*0Sstevel@tonic-gate DEBUG_r( what = "floating" ); 920*0Sstevel@tonic-gate goto hop_and_restart; 921*0Sstevel@tonic-gate } 922*0Sstevel@tonic-gate if (t != s) { 923*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, 924*0Sstevel@tonic-gate "By STCLASS: moving %ld --> %ld\n", 925*0Sstevel@tonic-gate (long)(t - i_strpos), (long)(s - i_strpos)) 926*0Sstevel@tonic-gate ); 927*0Sstevel@tonic-gate } 928*0Sstevel@tonic-gate else { 929*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, 930*0Sstevel@tonic-gate "Does not contradict STCLASS...\n"); 931*0Sstevel@tonic-gate ); 932*0Sstevel@tonic-gate } 933*0Sstevel@tonic-gate } 934*0Sstevel@tonic-gate giveup: 935*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n", 936*0Sstevel@tonic-gate PL_colors[4], (check ? "Guessed" : "Giving up"), 937*0Sstevel@tonic-gate PL_colors[5], (long)(s - i_strpos)) ); 938*0Sstevel@tonic-gate return s; 939*0Sstevel@tonic-gate 940*0Sstevel@tonic-gate fail_finish: /* Substring not found */ 941*0Sstevel@tonic-gate if (prog->check_substr || prog->check_utf8) /* could be removed already */ 942*0Sstevel@tonic-gate BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */ 943*0Sstevel@tonic-gate fail: 944*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n", 945*0Sstevel@tonic-gate PL_colors[4],PL_colors[5])); 946*0Sstevel@tonic-gate return Nullch; 947*0Sstevel@tonic-gate } 948*0Sstevel@tonic-gate 949*0Sstevel@tonic-gate /* We know what class REx starts with. Try to find this position... */ 950*0Sstevel@tonic-gate STATIC char * 951*0Sstevel@tonic-gate S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun) 952*0Sstevel@tonic-gate { 953*0Sstevel@tonic-gate I32 doevery = (prog->reganch & ROPT_SKIP) == 0; 954*0Sstevel@tonic-gate char *m; 955*0Sstevel@tonic-gate STRLEN ln; 956*0Sstevel@tonic-gate STRLEN lnc; 957*0Sstevel@tonic-gate register STRLEN uskip; 958*0Sstevel@tonic-gate unsigned int c1; 959*0Sstevel@tonic-gate unsigned int c2; 960*0Sstevel@tonic-gate char *e; 961*0Sstevel@tonic-gate register I32 tmp = 1; /* Scratch variable? */ 962*0Sstevel@tonic-gate register bool do_utf8 = PL_reg_match_utf8; 963*0Sstevel@tonic-gate 964*0Sstevel@tonic-gate /* We know what class it must start with. */ 965*0Sstevel@tonic-gate switch (OP(c)) { 966*0Sstevel@tonic-gate case ANYOF: 967*0Sstevel@tonic-gate if (do_utf8) { 968*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) { 969*0Sstevel@tonic-gate if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) || 970*0Sstevel@tonic-gate !UTF8_IS_INVARIANT((U8)s[0]) ? 971*0Sstevel@tonic-gate reginclass(c, (U8*)s, 0, do_utf8) : 972*0Sstevel@tonic-gate REGINCLASS(c, (U8*)s)) { 973*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 974*0Sstevel@tonic-gate goto got_it; 975*0Sstevel@tonic-gate else 976*0Sstevel@tonic-gate tmp = doevery; 977*0Sstevel@tonic-gate } 978*0Sstevel@tonic-gate else 979*0Sstevel@tonic-gate tmp = 1; 980*0Sstevel@tonic-gate s += uskip; 981*0Sstevel@tonic-gate } 982*0Sstevel@tonic-gate } 983*0Sstevel@tonic-gate else { 984*0Sstevel@tonic-gate while (s < strend) { 985*0Sstevel@tonic-gate STRLEN skip = 1; 986*0Sstevel@tonic-gate 987*0Sstevel@tonic-gate if (REGINCLASS(c, (U8*)s) || 988*0Sstevel@tonic-gate (ANYOF_FOLD_SHARP_S(c, s, strend) && 989*0Sstevel@tonic-gate /* The assignment of 2 is intentional: 990*0Sstevel@tonic-gate * for the folded sharp s, the skip is 2. */ 991*0Sstevel@tonic-gate (skip = SHARP_S_SKIP))) { 992*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 993*0Sstevel@tonic-gate goto got_it; 994*0Sstevel@tonic-gate else 995*0Sstevel@tonic-gate tmp = doevery; 996*0Sstevel@tonic-gate } 997*0Sstevel@tonic-gate else 998*0Sstevel@tonic-gate tmp = 1; 999*0Sstevel@tonic-gate s += skip; 1000*0Sstevel@tonic-gate } 1001*0Sstevel@tonic-gate } 1002*0Sstevel@tonic-gate break; 1003*0Sstevel@tonic-gate case CANY: 1004*0Sstevel@tonic-gate while (s < strend) { 1005*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1006*0Sstevel@tonic-gate goto got_it; 1007*0Sstevel@tonic-gate else 1008*0Sstevel@tonic-gate tmp = doevery; 1009*0Sstevel@tonic-gate s++; 1010*0Sstevel@tonic-gate } 1011*0Sstevel@tonic-gate break; 1012*0Sstevel@tonic-gate case EXACTF: 1013*0Sstevel@tonic-gate m = STRING(c); 1014*0Sstevel@tonic-gate ln = STR_LEN(c); /* length to match in octets/bytes */ 1015*0Sstevel@tonic-gate lnc = (I32) ln; /* length to match in characters */ 1016*0Sstevel@tonic-gate if (UTF) { 1017*0Sstevel@tonic-gate STRLEN ulen1, ulen2; 1018*0Sstevel@tonic-gate U8 *sm = (U8 *) m; 1019*0Sstevel@tonic-gate U8 tmpbuf1[UTF8_MAXLEN_UCLC+1]; 1020*0Sstevel@tonic-gate U8 tmpbuf2[UTF8_MAXLEN_UCLC+1]; 1021*0Sstevel@tonic-gate 1022*0Sstevel@tonic-gate to_utf8_lower((U8*)m, tmpbuf1, &ulen1); 1023*0Sstevel@tonic-gate to_utf8_upper((U8*)m, tmpbuf2, &ulen2); 1024*0Sstevel@tonic-gate 1025*0Sstevel@tonic-gate c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN_UCLC, 1026*0Sstevel@tonic-gate 0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 1027*0Sstevel@tonic-gate c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN_UCLC, 1028*0Sstevel@tonic-gate 0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 1029*0Sstevel@tonic-gate lnc = 0; 1030*0Sstevel@tonic-gate while (sm < ((U8 *) m + ln)) { 1031*0Sstevel@tonic-gate lnc++; 1032*0Sstevel@tonic-gate sm += UTF8SKIP(sm); 1033*0Sstevel@tonic-gate } 1034*0Sstevel@tonic-gate } 1035*0Sstevel@tonic-gate else { 1036*0Sstevel@tonic-gate c1 = *(U8*)m; 1037*0Sstevel@tonic-gate c2 = PL_fold[c1]; 1038*0Sstevel@tonic-gate } 1039*0Sstevel@tonic-gate goto do_exactf; 1040*0Sstevel@tonic-gate case EXACTFL: 1041*0Sstevel@tonic-gate m = STRING(c); 1042*0Sstevel@tonic-gate ln = STR_LEN(c); 1043*0Sstevel@tonic-gate lnc = (I32) ln; 1044*0Sstevel@tonic-gate c1 = *(U8*)m; 1045*0Sstevel@tonic-gate c2 = PL_fold_locale[c1]; 1046*0Sstevel@tonic-gate do_exactf: 1047*0Sstevel@tonic-gate e = HOP3c(strend, -((I32)lnc), s); 1048*0Sstevel@tonic-gate 1049*0Sstevel@tonic-gate if (norun && e < s) 1050*0Sstevel@tonic-gate e = s; /* Due to minlen logic of intuit() */ 1051*0Sstevel@tonic-gate 1052*0Sstevel@tonic-gate /* The idea in the EXACTF* cases is to first find the 1053*0Sstevel@tonic-gate * first character of the EXACTF* node and then, if 1054*0Sstevel@tonic-gate * necessary, case-insensitively compare the full 1055*0Sstevel@tonic-gate * text of the node. The c1 and c2 are the first 1056*0Sstevel@tonic-gate * characters (though in Unicode it gets a bit 1057*0Sstevel@tonic-gate * more complicated because there are more cases 1058*0Sstevel@tonic-gate * than just upper and lower: one needs to use 1059*0Sstevel@tonic-gate * the so-called folding case for case-insensitive 1060*0Sstevel@tonic-gate * matching (called "loose matching" in Unicode). 1061*0Sstevel@tonic-gate * ibcmp_utf8() will do just that. */ 1062*0Sstevel@tonic-gate 1063*0Sstevel@tonic-gate if (do_utf8) { 1064*0Sstevel@tonic-gate UV c, f; 1065*0Sstevel@tonic-gate U8 tmpbuf [UTF8_MAXLEN+1]; 1066*0Sstevel@tonic-gate U8 foldbuf[UTF8_MAXLEN_FOLD+1]; 1067*0Sstevel@tonic-gate STRLEN len, foldlen; 1068*0Sstevel@tonic-gate 1069*0Sstevel@tonic-gate if (c1 == c2) { 1070*0Sstevel@tonic-gate /* Upper and lower of 1st char are equal - 1071*0Sstevel@tonic-gate * probably not a "letter". */ 1072*0Sstevel@tonic-gate while (s <= e) { 1073*0Sstevel@tonic-gate c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len, 1074*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ? 1075*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY); 1076*0Sstevel@tonic-gate if ( c == c1 1077*0Sstevel@tonic-gate && (ln == len || 1078*0Sstevel@tonic-gate ibcmp_utf8(s, (char **)0, 0, do_utf8, 1079*0Sstevel@tonic-gate m, (char **)0, ln, (bool)UTF)) 1080*0Sstevel@tonic-gate && (norun || regtry(prog, s)) ) 1081*0Sstevel@tonic-gate goto got_it; 1082*0Sstevel@tonic-gate else { 1083*0Sstevel@tonic-gate uvchr_to_utf8(tmpbuf, c); 1084*0Sstevel@tonic-gate f = to_utf8_fold(tmpbuf, foldbuf, &foldlen); 1085*0Sstevel@tonic-gate if ( f != c 1086*0Sstevel@tonic-gate && (f == c1 || f == c2) 1087*0Sstevel@tonic-gate && (ln == foldlen || 1088*0Sstevel@tonic-gate !ibcmp_utf8((char *) foldbuf, 1089*0Sstevel@tonic-gate (char **)0, foldlen, do_utf8, 1090*0Sstevel@tonic-gate m, 1091*0Sstevel@tonic-gate (char **)0, ln, (bool)UTF)) 1092*0Sstevel@tonic-gate && (norun || regtry(prog, s)) ) 1093*0Sstevel@tonic-gate goto got_it; 1094*0Sstevel@tonic-gate } 1095*0Sstevel@tonic-gate s += len; 1096*0Sstevel@tonic-gate } 1097*0Sstevel@tonic-gate } 1098*0Sstevel@tonic-gate else { 1099*0Sstevel@tonic-gate while (s <= e) { 1100*0Sstevel@tonic-gate c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len, 1101*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ? 1102*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY); 1103*0Sstevel@tonic-gate 1104*0Sstevel@tonic-gate /* Handle some of the three Greek sigmas cases. 1105*0Sstevel@tonic-gate * Note that not all the possible combinations 1106*0Sstevel@tonic-gate * are handled here: some of them are handled 1107*0Sstevel@tonic-gate * by the standard folding rules, and some of 1108*0Sstevel@tonic-gate * them (the character class or ANYOF cases) 1109*0Sstevel@tonic-gate * are handled during compiletime in 1110*0Sstevel@tonic-gate * regexec.c:S_regclass(). */ 1111*0Sstevel@tonic-gate if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA || 1112*0Sstevel@tonic-gate c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) 1113*0Sstevel@tonic-gate c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA; 1114*0Sstevel@tonic-gate 1115*0Sstevel@tonic-gate if ( (c == c1 || c == c2) 1116*0Sstevel@tonic-gate && (ln == len || 1117*0Sstevel@tonic-gate ibcmp_utf8(s, (char **)0, 0, do_utf8, 1118*0Sstevel@tonic-gate m, (char **)0, ln, (bool)UTF)) 1119*0Sstevel@tonic-gate && (norun || regtry(prog, s)) ) 1120*0Sstevel@tonic-gate goto got_it; 1121*0Sstevel@tonic-gate else { 1122*0Sstevel@tonic-gate uvchr_to_utf8(tmpbuf, c); 1123*0Sstevel@tonic-gate f = to_utf8_fold(tmpbuf, foldbuf, &foldlen); 1124*0Sstevel@tonic-gate if ( f != c 1125*0Sstevel@tonic-gate && (f == c1 || f == c2) 1126*0Sstevel@tonic-gate && (ln == foldlen || 1127*0Sstevel@tonic-gate !ibcmp_utf8((char *) foldbuf, 1128*0Sstevel@tonic-gate (char **)0, foldlen, do_utf8, 1129*0Sstevel@tonic-gate m, 1130*0Sstevel@tonic-gate (char **)0, ln, (bool)UTF)) 1131*0Sstevel@tonic-gate && (norun || regtry(prog, s)) ) 1132*0Sstevel@tonic-gate goto got_it; 1133*0Sstevel@tonic-gate } 1134*0Sstevel@tonic-gate s += len; 1135*0Sstevel@tonic-gate } 1136*0Sstevel@tonic-gate } 1137*0Sstevel@tonic-gate } 1138*0Sstevel@tonic-gate else { 1139*0Sstevel@tonic-gate if (c1 == c2) 1140*0Sstevel@tonic-gate while (s <= e) { 1141*0Sstevel@tonic-gate if ( *(U8*)s == c1 1142*0Sstevel@tonic-gate && (ln == 1 || !(OP(c) == EXACTF 1143*0Sstevel@tonic-gate ? ibcmp(s, m, ln) 1144*0Sstevel@tonic-gate : ibcmp_locale(s, m, ln))) 1145*0Sstevel@tonic-gate && (norun || regtry(prog, s)) ) 1146*0Sstevel@tonic-gate goto got_it; 1147*0Sstevel@tonic-gate s++; 1148*0Sstevel@tonic-gate } 1149*0Sstevel@tonic-gate else 1150*0Sstevel@tonic-gate while (s <= e) { 1151*0Sstevel@tonic-gate if ( (*(U8*)s == c1 || *(U8*)s == c2) 1152*0Sstevel@tonic-gate && (ln == 1 || !(OP(c) == EXACTF 1153*0Sstevel@tonic-gate ? ibcmp(s, m, ln) 1154*0Sstevel@tonic-gate : ibcmp_locale(s, m, ln))) 1155*0Sstevel@tonic-gate && (norun || regtry(prog, s)) ) 1156*0Sstevel@tonic-gate goto got_it; 1157*0Sstevel@tonic-gate s++; 1158*0Sstevel@tonic-gate } 1159*0Sstevel@tonic-gate } 1160*0Sstevel@tonic-gate break; 1161*0Sstevel@tonic-gate case BOUNDL: 1162*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 1163*0Sstevel@tonic-gate /* FALL THROUGH */ 1164*0Sstevel@tonic-gate case BOUND: 1165*0Sstevel@tonic-gate if (do_utf8) { 1166*0Sstevel@tonic-gate if (s == PL_bostr) 1167*0Sstevel@tonic-gate tmp = '\n'; 1168*0Sstevel@tonic-gate else { 1169*0Sstevel@tonic-gate U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr); 1170*0Sstevel@tonic-gate 1171*0Sstevel@tonic-gate tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0); 1172*0Sstevel@tonic-gate } 1173*0Sstevel@tonic-gate tmp = ((OP(c) == BOUND ? 1174*0Sstevel@tonic-gate isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0); 1175*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a"); 1176*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) { 1177*0Sstevel@tonic-gate if (tmp == !(OP(c) == BOUND ? 1178*0Sstevel@tonic-gate swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) : 1179*0Sstevel@tonic-gate isALNUM_LC_utf8((U8*)s))) 1180*0Sstevel@tonic-gate { 1181*0Sstevel@tonic-gate tmp = !tmp; 1182*0Sstevel@tonic-gate if ((norun || regtry(prog, s))) 1183*0Sstevel@tonic-gate goto got_it; 1184*0Sstevel@tonic-gate } 1185*0Sstevel@tonic-gate s += uskip; 1186*0Sstevel@tonic-gate } 1187*0Sstevel@tonic-gate } 1188*0Sstevel@tonic-gate else { 1189*0Sstevel@tonic-gate tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; 1190*0Sstevel@tonic-gate tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0); 1191*0Sstevel@tonic-gate while (s < strend) { 1192*0Sstevel@tonic-gate if (tmp == 1193*0Sstevel@tonic-gate !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) { 1194*0Sstevel@tonic-gate tmp = !tmp; 1195*0Sstevel@tonic-gate if ((norun || regtry(prog, s))) 1196*0Sstevel@tonic-gate goto got_it; 1197*0Sstevel@tonic-gate } 1198*0Sstevel@tonic-gate s++; 1199*0Sstevel@tonic-gate } 1200*0Sstevel@tonic-gate } 1201*0Sstevel@tonic-gate if ((!prog->minlen && tmp) && (norun || regtry(prog, s))) 1202*0Sstevel@tonic-gate goto got_it; 1203*0Sstevel@tonic-gate break; 1204*0Sstevel@tonic-gate case NBOUNDL: 1205*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 1206*0Sstevel@tonic-gate /* FALL THROUGH */ 1207*0Sstevel@tonic-gate case NBOUND: 1208*0Sstevel@tonic-gate if (do_utf8) { 1209*0Sstevel@tonic-gate if (s == PL_bostr) 1210*0Sstevel@tonic-gate tmp = '\n'; 1211*0Sstevel@tonic-gate else { 1212*0Sstevel@tonic-gate U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr); 1213*0Sstevel@tonic-gate 1214*0Sstevel@tonic-gate tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0); 1215*0Sstevel@tonic-gate } 1216*0Sstevel@tonic-gate tmp = ((OP(c) == NBOUND ? 1217*0Sstevel@tonic-gate isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0); 1218*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a"); 1219*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) { 1220*0Sstevel@tonic-gate if (tmp == !(OP(c) == NBOUND ? 1221*0Sstevel@tonic-gate swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) : 1222*0Sstevel@tonic-gate isALNUM_LC_utf8((U8*)s))) 1223*0Sstevel@tonic-gate tmp = !tmp; 1224*0Sstevel@tonic-gate else if ((norun || regtry(prog, s))) 1225*0Sstevel@tonic-gate goto got_it; 1226*0Sstevel@tonic-gate s += uskip; 1227*0Sstevel@tonic-gate } 1228*0Sstevel@tonic-gate } 1229*0Sstevel@tonic-gate else { 1230*0Sstevel@tonic-gate tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; 1231*0Sstevel@tonic-gate tmp = ((OP(c) == NBOUND ? 1232*0Sstevel@tonic-gate isALNUM(tmp) : isALNUM_LC(tmp)) != 0); 1233*0Sstevel@tonic-gate while (s < strend) { 1234*0Sstevel@tonic-gate if (tmp == 1235*0Sstevel@tonic-gate !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s))) 1236*0Sstevel@tonic-gate tmp = !tmp; 1237*0Sstevel@tonic-gate else if ((norun || regtry(prog, s))) 1238*0Sstevel@tonic-gate goto got_it; 1239*0Sstevel@tonic-gate s++; 1240*0Sstevel@tonic-gate } 1241*0Sstevel@tonic-gate } 1242*0Sstevel@tonic-gate if ((!prog->minlen && !tmp) && (norun || regtry(prog, s))) 1243*0Sstevel@tonic-gate goto got_it; 1244*0Sstevel@tonic-gate break; 1245*0Sstevel@tonic-gate case ALNUM: 1246*0Sstevel@tonic-gate if (do_utf8) { 1247*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a"); 1248*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) { 1249*0Sstevel@tonic-gate if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) { 1250*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1251*0Sstevel@tonic-gate goto got_it; 1252*0Sstevel@tonic-gate else 1253*0Sstevel@tonic-gate tmp = doevery; 1254*0Sstevel@tonic-gate } 1255*0Sstevel@tonic-gate else 1256*0Sstevel@tonic-gate tmp = 1; 1257*0Sstevel@tonic-gate s += uskip; 1258*0Sstevel@tonic-gate } 1259*0Sstevel@tonic-gate } 1260*0Sstevel@tonic-gate else { 1261*0Sstevel@tonic-gate while (s < strend) { 1262*0Sstevel@tonic-gate if (isALNUM(*s)) { 1263*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1264*0Sstevel@tonic-gate goto got_it; 1265*0Sstevel@tonic-gate else 1266*0Sstevel@tonic-gate tmp = doevery; 1267*0Sstevel@tonic-gate } 1268*0Sstevel@tonic-gate else 1269*0Sstevel@tonic-gate tmp = 1; 1270*0Sstevel@tonic-gate s++; 1271*0Sstevel@tonic-gate } 1272*0Sstevel@tonic-gate } 1273*0Sstevel@tonic-gate break; 1274*0Sstevel@tonic-gate case ALNUML: 1275*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 1276*0Sstevel@tonic-gate if (do_utf8) { 1277*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) { 1278*0Sstevel@tonic-gate if (isALNUM_LC_utf8((U8*)s)) { 1279*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1280*0Sstevel@tonic-gate goto got_it; 1281*0Sstevel@tonic-gate else 1282*0Sstevel@tonic-gate tmp = doevery; 1283*0Sstevel@tonic-gate } 1284*0Sstevel@tonic-gate else 1285*0Sstevel@tonic-gate tmp = 1; 1286*0Sstevel@tonic-gate s += uskip; 1287*0Sstevel@tonic-gate } 1288*0Sstevel@tonic-gate } 1289*0Sstevel@tonic-gate else { 1290*0Sstevel@tonic-gate while (s < strend) { 1291*0Sstevel@tonic-gate if (isALNUM_LC(*s)) { 1292*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1293*0Sstevel@tonic-gate goto got_it; 1294*0Sstevel@tonic-gate else 1295*0Sstevel@tonic-gate tmp = doevery; 1296*0Sstevel@tonic-gate } 1297*0Sstevel@tonic-gate else 1298*0Sstevel@tonic-gate tmp = 1; 1299*0Sstevel@tonic-gate s++; 1300*0Sstevel@tonic-gate } 1301*0Sstevel@tonic-gate } 1302*0Sstevel@tonic-gate break; 1303*0Sstevel@tonic-gate case NALNUM: 1304*0Sstevel@tonic-gate if (do_utf8) { 1305*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a"); 1306*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) { 1307*0Sstevel@tonic-gate if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) { 1308*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1309*0Sstevel@tonic-gate goto got_it; 1310*0Sstevel@tonic-gate else 1311*0Sstevel@tonic-gate tmp = doevery; 1312*0Sstevel@tonic-gate } 1313*0Sstevel@tonic-gate else 1314*0Sstevel@tonic-gate tmp = 1; 1315*0Sstevel@tonic-gate s += uskip; 1316*0Sstevel@tonic-gate } 1317*0Sstevel@tonic-gate } 1318*0Sstevel@tonic-gate else { 1319*0Sstevel@tonic-gate while (s < strend) { 1320*0Sstevel@tonic-gate if (!isALNUM(*s)) { 1321*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1322*0Sstevel@tonic-gate goto got_it; 1323*0Sstevel@tonic-gate else 1324*0Sstevel@tonic-gate tmp = doevery; 1325*0Sstevel@tonic-gate } 1326*0Sstevel@tonic-gate else 1327*0Sstevel@tonic-gate tmp = 1; 1328*0Sstevel@tonic-gate s++; 1329*0Sstevel@tonic-gate } 1330*0Sstevel@tonic-gate } 1331*0Sstevel@tonic-gate break; 1332*0Sstevel@tonic-gate case NALNUML: 1333*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 1334*0Sstevel@tonic-gate if (do_utf8) { 1335*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) { 1336*0Sstevel@tonic-gate if (!isALNUM_LC_utf8((U8*)s)) { 1337*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1338*0Sstevel@tonic-gate goto got_it; 1339*0Sstevel@tonic-gate else 1340*0Sstevel@tonic-gate tmp = doevery; 1341*0Sstevel@tonic-gate } 1342*0Sstevel@tonic-gate else 1343*0Sstevel@tonic-gate tmp = 1; 1344*0Sstevel@tonic-gate s += uskip; 1345*0Sstevel@tonic-gate } 1346*0Sstevel@tonic-gate } 1347*0Sstevel@tonic-gate else { 1348*0Sstevel@tonic-gate while (s < strend) { 1349*0Sstevel@tonic-gate if (!isALNUM_LC(*s)) { 1350*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1351*0Sstevel@tonic-gate goto got_it; 1352*0Sstevel@tonic-gate else 1353*0Sstevel@tonic-gate tmp = doevery; 1354*0Sstevel@tonic-gate } 1355*0Sstevel@tonic-gate else 1356*0Sstevel@tonic-gate tmp = 1; 1357*0Sstevel@tonic-gate s++; 1358*0Sstevel@tonic-gate } 1359*0Sstevel@tonic-gate } 1360*0Sstevel@tonic-gate break; 1361*0Sstevel@tonic-gate case SPACE: 1362*0Sstevel@tonic-gate if (do_utf8) { 1363*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(space," "); 1364*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) { 1365*0Sstevel@tonic-gate if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) { 1366*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1367*0Sstevel@tonic-gate goto got_it; 1368*0Sstevel@tonic-gate else 1369*0Sstevel@tonic-gate tmp = doevery; 1370*0Sstevel@tonic-gate } 1371*0Sstevel@tonic-gate else 1372*0Sstevel@tonic-gate tmp = 1; 1373*0Sstevel@tonic-gate s += uskip; 1374*0Sstevel@tonic-gate } 1375*0Sstevel@tonic-gate } 1376*0Sstevel@tonic-gate else { 1377*0Sstevel@tonic-gate while (s < strend) { 1378*0Sstevel@tonic-gate if (isSPACE(*s)) { 1379*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1380*0Sstevel@tonic-gate goto got_it; 1381*0Sstevel@tonic-gate else 1382*0Sstevel@tonic-gate tmp = doevery; 1383*0Sstevel@tonic-gate } 1384*0Sstevel@tonic-gate else 1385*0Sstevel@tonic-gate tmp = 1; 1386*0Sstevel@tonic-gate s++; 1387*0Sstevel@tonic-gate } 1388*0Sstevel@tonic-gate } 1389*0Sstevel@tonic-gate break; 1390*0Sstevel@tonic-gate case SPACEL: 1391*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 1392*0Sstevel@tonic-gate if (do_utf8) { 1393*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) { 1394*0Sstevel@tonic-gate if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) { 1395*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1396*0Sstevel@tonic-gate goto got_it; 1397*0Sstevel@tonic-gate else 1398*0Sstevel@tonic-gate tmp = doevery; 1399*0Sstevel@tonic-gate } 1400*0Sstevel@tonic-gate else 1401*0Sstevel@tonic-gate tmp = 1; 1402*0Sstevel@tonic-gate s += uskip; 1403*0Sstevel@tonic-gate } 1404*0Sstevel@tonic-gate } 1405*0Sstevel@tonic-gate else { 1406*0Sstevel@tonic-gate while (s < strend) { 1407*0Sstevel@tonic-gate if (isSPACE_LC(*s)) { 1408*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1409*0Sstevel@tonic-gate goto got_it; 1410*0Sstevel@tonic-gate else 1411*0Sstevel@tonic-gate tmp = doevery; 1412*0Sstevel@tonic-gate } 1413*0Sstevel@tonic-gate else 1414*0Sstevel@tonic-gate tmp = 1; 1415*0Sstevel@tonic-gate s++; 1416*0Sstevel@tonic-gate } 1417*0Sstevel@tonic-gate } 1418*0Sstevel@tonic-gate break; 1419*0Sstevel@tonic-gate case NSPACE: 1420*0Sstevel@tonic-gate if (do_utf8) { 1421*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(space," "); 1422*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) { 1423*0Sstevel@tonic-gate if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) { 1424*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1425*0Sstevel@tonic-gate goto got_it; 1426*0Sstevel@tonic-gate else 1427*0Sstevel@tonic-gate tmp = doevery; 1428*0Sstevel@tonic-gate } 1429*0Sstevel@tonic-gate else 1430*0Sstevel@tonic-gate tmp = 1; 1431*0Sstevel@tonic-gate s += uskip; 1432*0Sstevel@tonic-gate } 1433*0Sstevel@tonic-gate } 1434*0Sstevel@tonic-gate else { 1435*0Sstevel@tonic-gate while (s < strend) { 1436*0Sstevel@tonic-gate if (!isSPACE(*s)) { 1437*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1438*0Sstevel@tonic-gate goto got_it; 1439*0Sstevel@tonic-gate else 1440*0Sstevel@tonic-gate tmp = doevery; 1441*0Sstevel@tonic-gate } 1442*0Sstevel@tonic-gate else 1443*0Sstevel@tonic-gate tmp = 1; 1444*0Sstevel@tonic-gate s++; 1445*0Sstevel@tonic-gate } 1446*0Sstevel@tonic-gate } 1447*0Sstevel@tonic-gate break; 1448*0Sstevel@tonic-gate case NSPACEL: 1449*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 1450*0Sstevel@tonic-gate if (do_utf8) { 1451*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) { 1452*0Sstevel@tonic-gate if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) { 1453*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1454*0Sstevel@tonic-gate goto got_it; 1455*0Sstevel@tonic-gate else 1456*0Sstevel@tonic-gate tmp = doevery; 1457*0Sstevel@tonic-gate } 1458*0Sstevel@tonic-gate else 1459*0Sstevel@tonic-gate tmp = 1; 1460*0Sstevel@tonic-gate s += uskip; 1461*0Sstevel@tonic-gate } 1462*0Sstevel@tonic-gate } 1463*0Sstevel@tonic-gate else { 1464*0Sstevel@tonic-gate while (s < strend) { 1465*0Sstevel@tonic-gate if (!isSPACE_LC(*s)) { 1466*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1467*0Sstevel@tonic-gate goto got_it; 1468*0Sstevel@tonic-gate else 1469*0Sstevel@tonic-gate tmp = doevery; 1470*0Sstevel@tonic-gate } 1471*0Sstevel@tonic-gate else 1472*0Sstevel@tonic-gate tmp = 1; 1473*0Sstevel@tonic-gate s++; 1474*0Sstevel@tonic-gate } 1475*0Sstevel@tonic-gate } 1476*0Sstevel@tonic-gate break; 1477*0Sstevel@tonic-gate case DIGIT: 1478*0Sstevel@tonic-gate if (do_utf8) { 1479*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(digit,"0"); 1480*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) { 1481*0Sstevel@tonic-gate if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) { 1482*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1483*0Sstevel@tonic-gate goto got_it; 1484*0Sstevel@tonic-gate else 1485*0Sstevel@tonic-gate tmp = doevery; 1486*0Sstevel@tonic-gate } 1487*0Sstevel@tonic-gate else 1488*0Sstevel@tonic-gate tmp = 1; 1489*0Sstevel@tonic-gate s += uskip; 1490*0Sstevel@tonic-gate } 1491*0Sstevel@tonic-gate } 1492*0Sstevel@tonic-gate else { 1493*0Sstevel@tonic-gate while (s < strend) { 1494*0Sstevel@tonic-gate if (isDIGIT(*s)) { 1495*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1496*0Sstevel@tonic-gate goto got_it; 1497*0Sstevel@tonic-gate else 1498*0Sstevel@tonic-gate tmp = doevery; 1499*0Sstevel@tonic-gate } 1500*0Sstevel@tonic-gate else 1501*0Sstevel@tonic-gate tmp = 1; 1502*0Sstevel@tonic-gate s++; 1503*0Sstevel@tonic-gate } 1504*0Sstevel@tonic-gate } 1505*0Sstevel@tonic-gate break; 1506*0Sstevel@tonic-gate case DIGITL: 1507*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 1508*0Sstevel@tonic-gate if (do_utf8) { 1509*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) { 1510*0Sstevel@tonic-gate if (isDIGIT_LC_utf8((U8*)s)) { 1511*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1512*0Sstevel@tonic-gate goto got_it; 1513*0Sstevel@tonic-gate else 1514*0Sstevel@tonic-gate tmp = doevery; 1515*0Sstevel@tonic-gate } 1516*0Sstevel@tonic-gate else 1517*0Sstevel@tonic-gate tmp = 1; 1518*0Sstevel@tonic-gate s += uskip; 1519*0Sstevel@tonic-gate } 1520*0Sstevel@tonic-gate } 1521*0Sstevel@tonic-gate else { 1522*0Sstevel@tonic-gate while (s < strend) { 1523*0Sstevel@tonic-gate if (isDIGIT_LC(*s)) { 1524*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1525*0Sstevel@tonic-gate goto got_it; 1526*0Sstevel@tonic-gate else 1527*0Sstevel@tonic-gate tmp = doevery; 1528*0Sstevel@tonic-gate } 1529*0Sstevel@tonic-gate else 1530*0Sstevel@tonic-gate tmp = 1; 1531*0Sstevel@tonic-gate s++; 1532*0Sstevel@tonic-gate } 1533*0Sstevel@tonic-gate } 1534*0Sstevel@tonic-gate break; 1535*0Sstevel@tonic-gate case NDIGIT: 1536*0Sstevel@tonic-gate if (do_utf8) { 1537*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(digit,"0"); 1538*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) { 1539*0Sstevel@tonic-gate if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) { 1540*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1541*0Sstevel@tonic-gate goto got_it; 1542*0Sstevel@tonic-gate else 1543*0Sstevel@tonic-gate tmp = doevery; 1544*0Sstevel@tonic-gate } 1545*0Sstevel@tonic-gate else 1546*0Sstevel@tonic-gate tmp = 1; 1547*0Sstevel@tonic-gate s += uskip; 1548*0Sstevel@tonic-gate } 1549*0Sstevel@tonic-gate } 1550*0Sstevel@tonic-gate else { 1551*0Sstevel@tonic-gate while (s < strend) { 1552*0Sstevel@tonic-gate if (!isDIGIT(*s)) { 1553*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1554*0Sstevel@tonic-gate goto got_it; 1555*0Sstevel@tonic-gate else 1556*0Sstevel@tonic-gate tmp = doevery; 1557*0Sstevel@tonic-gate } 1558*0Sstevel@tonic-gate else 1559*0Sstevel@tonic-gate tmp = 1; 1560*0Sstevel@tonic-gate s++; 1561*0Sstevel@tonic-gate } 1562*0Sstevel@tonic-gate } 1563*0Sstevel@tonic-gate break; 1564*0Sstevel@tonic-gate case NDIGITL: 1565*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 1566*0Sstevel@tonic-gate if (do_utf8) { 1567*0Sstevel@tonic-gate while (s + (uskip = UTF8SKIP(s)) <= strend) { 1568*0Sstevel@tonic-gate if (!isDIGIT_LC_utf8((U8*)s)) { 1569*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1570*0Sstevel@tonic-gate goto got_it; 1571*0Sstevel@tonic-gate else 1572*0Sstevel@tonic-gate tmp = doevery; 1573*0Sstevel@tonic-gate } 1574*0Sstevel@tonic-gate else 1575*0Sstevel@tonic-gate tmp = 1; 1576*0Sstevel@tonic-gate s += uskip; 1577*0Sstevel@tonic-gate } 1578*0Sstevel@tonic-gate } 1579*0Sstevel@tonic-gate else { 1580*0Sstevel@tonic-gate while (s < strend) { 1581*0Sstevel@tonic-gate if (!isDIGIT_LC(*s)) { 1582*0Sstevel@tonic-gate if (tmp && (norun || regtry(prog, s))) 1583*0Sstevel@tonic-gate goto got_it; 1584*0Sstevel@tonic-gate else 1585*0Sstevel@tonic-gate tmp = doevery; 1586*0Sstevel@tonic-gate } 1587*0Sstevel@tonic-gate else 1588*0Sstevel@tonic-gate tmp = 1; 1589*0Sstevel@tonic-gate s++; 1590*0Sstevel@tonic-gate } 1591*0Sstevel@tonic-gate } 1592*0Sstevel@tonic-gate break; 1593*0Sstevel@tonic-gate default: 1594*0Sstevel@tonic-gate Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c)); 1595*0Sstevel@tonic-gate break; 1596*0Sstevel@tonic-gate } 1597*0Sstevel@tonic-gate return 0; 1598*0Sstevel@tonic-gate got_it: 1599*0Sstevel@tonic-gate return s; 1600*0Sstevel@tonic-gate } 1601*0Sstevel@tonic-gate 1602*0Sstevel@tonic-gate /* 1603*0Sstevel@tonic-gate - regexec_flags - match a regexp against a string 1604*0Sstevel@tonic-gate */ 1605*0Sstevel@tonic-gate I32 1606*0Sstevel@tonic-gate Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend, 1607*0Sstevel@tonic-gate char *strbeg, I32 minend, SV *sv, void *data, U32 flags) 1608*0Sstevel@tonic-gate /* strend: pointer to null at end of string */ 1609*0Sstevel@tonic-gate /* strbeg: real beginning of string */ 1610*0Sstevel@tonic-gate /* minend: end of match must be >=minend after stringarg. */ 1611*0Sstevel@tonic-gate /* data: May be used for some additional optimizations. */ 1612*0Sstevel@tonic-gate /* nosave: For optimizations. */ 1613*0Sstevel@tonic-gate { 1614*0Sstevel@tonic-gate register char *s; 1615*0Sstevel@tonic-gate register regnode *c; 1616*0Sstevel@tonic-gate register char *startpos = stringarg; 1617*0Sstevel@tonic-gate I32 minlen; /* must match at least this many chars */ 1618*0Sstevel@tonic-gate I32 dontbother = 0; /* how many characters not to try at end */ 1619*0Sstevel@tonic-gate /* I32 start_shift = 0; */ /* Offset of the start to find 1620*0Sstevel@tonic-gate constant substr. */ /* CC */ 1621*0Sstevel@tonic-gate I32 end_shift = 0; /* Same for the end. */ /* CC */ 1622*0Sstevel@tonic-gate I32 scream_pos = -1; /* Internal iterator of scream. */ 1623*0Sstevel@tonic-gate char *scream_olds; 1624*0Sstevel@tonic-gate SV* oreplsv = GvSV(PL_replgv); 1625*0Sstevel@tonic-gate bool do_utf8 = DO_UTF8(sv); 1626*0Sstevel@tonic-gate #ifdef DEBUGGING 1627*0Sstevel@tonic-gate SV *dsv0 = PERL_DEBUG_PAD_ZERO(0); 1628*0Sstevel@tonic-gate SV *dsv1 = PERL_DEBUG_PAD_ZERO(1); 1629*0Sstevel@tonic-gate #endif 1630*0Sstevel@tonic-gate RX_MATCH_UTF8_set(prog,do_utf8); 1631*0Sstevel@tonic-gate 1632*0Sstevel@tonic-gate PL_regcc = 0; 1633*0Sstevel@tonic-gate 1634*0Sstevel@tonic-gate cache_re(prog); 1635*0Sstevel@tonic-gate #ifdef DEBUGGING 1636*0Sstevel@tonic-gate PL_regnarrate = DEBUG_r_TEST; 1637*0Sstevel@tonic-gate #endif 1638*0Sstevel@tonic-gate 1639*0Sstevel@tonic-gate /* Be paranoid... */ 1640*0Sstevel@tonic-gate if (prog == NULL || startpos == NULL) { 1641*0Sstevel@tonic-gate Perl_croak(aTHX_ "NULL regexp parameter"); 1642*0Sstevel@tonic-gate return 0; 1643*0Sstevel@tonic-gate } 1644*0Sstevel@tonic-gate 1645*0Sstevel@tonic-gate minlen = prog->minlen; 1646*0Sstevel@tonic-gate if (strend - startpos < minlen) { 1647*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, 1648*0Sstevel@tonic-gate "String too short [regexec_flags]...\n")); 1649*0Sstevel@tonic-gate goto phooey; 1650*0Sstevel@tonic-gate } 1651*0Sstevel@tonic-gate 1652*0Sstevel@tonic-gate /* Check validity of program. */ 1653*0Sstevel@tonic-gate if (UCHARAT(prog->program) != REG_MAGIC) { 1654*0Sstevel@tonic-gate Perl_croak(aTHX_ "corrupted regexp program"); 1655*0Sstevel@tonic-gate } 1656*0Sstevel@tonic-gate 1657*0Sstevel@tonic-gate PL_reg_flags = 0; 1658*0Sstevel@tonic-gate PL_reg_eval_set = 0; 1659*0Sstevel@tonic-gate PL_reg_maxiter = 0; 1660*0Sstevel@tonic-gate 1661*0Sstevel@tonic-gate if (prog->reganch & ROPT_UTF8) 1662*0Sstevel@tonic-gate PL_reg_flags |= RF_utf8; 1663*0Sstevel@tonic-gate 1664*0Sstevel@tonic-gate /* Mark beginning of line for ^ and lookbehind. */ 1665*0Sstevel@tonic-gate PL_regbol = startpos; 1666*0Sstevel@tonic-gate PL_bostr = strbeg; 1667*0Sstevel@tonic-gate PL_reg_sv = sv; 1668*0Sstevel@tonic-gate 1669*0Sstevel@tonic-gate /* Mark end of line for $ (and such) */ 1670*0Sstevel@tonic-gate PL_regeol = strend; 1671*0Sstevel@tonic-gate 1672*0Sstevel@tonic-gate /* see how far we have to get to not match where we matched before */ 1673*0Sstevel@tonic-gate PL_regtill = startpos+minend; 1674*0Sstevel@tonic-gate 1675*0Sstevel@tonic-gate /* We start without call_cc context. */ 1676*0Sstevel@tonic-gate PL_reg_call_cc = 0; 1677*0Sstevel@tonic-gate 1678*0Sstevel@tonic-gate /* If there is a "must appear" string, look for it. */ 1679*0Sstevel@tonic-gate s = startpos; 1680*0Sstevel@tonic-gate 1681*0Sstevel@tonic-gate if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */ 1682*0Sstevel@tonic-gate MAGIC *mg; 1683*0Sstevel@tonic-gate 1684*0Sstevel@tonic-gate if (flags & REXEC_IGNOREPOS) /* Means: check only at start */ 1685*0Sstevel@tonic-gate PL_reg_ganch = startpos; 1686*0Sstevel@tonic-gate else if (sv && SvTYPE(sv) >= SVt_PVMG 1687*0Sstevel@tonic-gate && SvMAGIC(sv) 1688*0Sstevel@tonic-gate && (mg = mg_find(sv, PERL_MAGIC_regex_global)) 1689*0Sstevel@tonic-gate && mg->mg_len >= 0) { 1690*0Sstevel@tonic-gate PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */ 1691*0Sstevel@tonic-gate if (prog->reganch & ROPT_ANCH_GPOS) { 1692*0Sstevel@tonic-gate if (s > PL_reg_ganch) 1693*0Sstevel@tonic-gate goto phooey; 1694*0Sstevel@tonic-gate s = PL_reg_ganch; 1695*0Sstevel@tonic-gate } 1696*0Sstevel@tonic-gate } 1697*0Sstevel@tonic-gate else /* pos() not defined */ 1698*0Sstevel@tonic-gate PL_reg_ganch = strbeg; 1699*0Sstevel@tonic-gate } 1700*0Sstevel@tonic-gate 1701*0Sstevel@tonic-gate if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) { 1702*0Sstevel@tonic-gate re_scream_pos_data d; 1703*0Sstevel@tonic-gate 1704*0Sstevel@tonic-gate d.scream_olds = &scream_olds; 1705*0Sstevel@tonic-gate d.scream_pos = &scream_pos; 1706*0Sstevel@tonic-gate s = re_intuit_start(prog, sv, s, strend, flags, &d); 1707*0Sstevel@tonic-gate if (!s) { 1708*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n")); 1709*0Sstevel@tonic-gate goto phooey; /* not present */ 1710*0Sstevel@tonic-gate } 1711*0Sstevel@tonic-gate } 1712*0Sstevel@tonic-gate 1713*0Sstevel@tonic-gate DEBUG_r({ 1714*0Sstevel@tonic-gate char *s0 = UTF ? 1715*0Sstevel@tonic-gate pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60, 1716*0Sstevel@tonic-gate UNI_DISPLAY_REGEX) : 1717*0Sstevel@tonic-gate prog->precomp; 1718*0Sstevel@tonic-gate int len0 = UTF ? SvCUR(dsv0) : prog->prelen; 1719*0Sstevel@tonic-gate char *s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60, 1720*0Sstevel@tonic-gate UNI_DISPLAY_REGEX) : startpos; 1721*0Sstevel@tonic-gate int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos; 1722*0Sstevel@tonic-gate if (!PL_colorset) 1723*0Sstevel@tonic-gate reginitcolors(); 1724*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 1725*0Sstevel@tonic-gate "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n", 1726*0Sstevel@tonic-gate PL_colors[4],PL_colors[5],PL_colors[0], 1727*0Sstevel@tonic-gate len0, len0, s0, 1728*0Sstevel@tonic-gate PL_colors[1], 1729*0Sstevel@tonic-gate len0 > 60 ? "..." : "", 1730*0Sstevel@tonic-gate PL_colors[0], 1731*0Sstevel@tonic-gate (int)(len1 > 60 ? 60 : len1), 1732*0Sstevel@tonic-gate s1, PL_colors[1], 1733*0Sstevel@tonic-gate (len1 > 60 ? "..." : "") 1734*0Sstevel@tonic-gate ); 1735*0Sstevel@tonic-gate }); 1736*0Sstevel@tonic-gate 1737*0Sstevel@tonic-gate /* Simplest case: anchored match need be tried only once. */ 1738*0Sstevel@tonic-gate /* [unless only anchor is BOL and multiline is set] */ 1739*0Sstevel@tonic-gate if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) { 1740*0Sstevel@tonic-gate if (s == startpos && regtry(prog, startpos)) 1741*0Sstevel@tonic-gate goto got_it; 1742*0Sstevel@tonic-gate else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT) 1743*0Sstevel@tonic-gate || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */ 1744*0Sstevel@tonic-gate { 1745*0Sstevel@tonic-gate char *end; 1746*0Sstevel@tonic-gate 1747*0Sstevel@tonic-gate if (minlen) 1748*0Sstevel@tonic-gate dontbother = minlen - 1; 1749*0Sstevel@tonic-gate end = HOP3c(strend, -dontbother, strbeg) - 1; 1750*0Sstevel@tonic-gate /* for multiline we only have to try after newlines */ 1751*0Sstevel@tonic-gate if (prog->check_substr || prog->check_utf8) { 1752*0Sstevel@tonic-gate if (s == startpos) 1753*0Sstevel@tonic-gate goto after_try; 1754*0Sstevel@tonic-gate while (1) { 1755*0Sstevel@tonic-gate if (regtry(prog, s)) 1756*0Sstevel@tonic-gate goto got_it; 1757*0Sstevel@tonic-gate after_try: 1758*0Sstevel@tonic-gate if (s >= end) 1759*0Sstevel@tonic-gate goto phooey; 1760*0Sstevel@tonic-gate if (prog->reganch & RE_USE_INTUIT) { 1761*0Sstevel@tonic-gate s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL); 1762*0Sstevel@tonic-gate if (!s) 1763*0Sstevel@tonic-gate goto phooey; 1764*0Sstevel@tonic-gate } 1765*0Sstevel@tonic-gate else 1766*0Sstevel@tonic-gate s++; 1767*0Sstevel@tonic-gate } 1768*0Sstevel@tonic-gate } else { 1769*0Sstevel@tonic-gate if (s > startpos) 1770*0Sstevel@tonic-gate s--; 1771*0Sstevel@tonic-gate while (s < end) { 1772*0Sstevel@tonic-gate if (*s++ == '\n') { /* don't need PL_utf8skip here */ 1773*0Sstevel@tonic-gate if (regtry(prog, s)) 1774*0Sstevel@tonic-gate goto got_it; 1775*0Sstevel@tonic-gate } 1776*0Sstevel@tonic-gate } 1777*0Sstevel@tonic-gate } 1778*0Sstevel@tonic-gate } 1779*0Sstevel@tonic-gate goto phooey; 1780*0Sstevel@tonic-gate } else if (prog->reganch & ROPT_ANCH_GPOS) { 1781*0Sstevel@tonic-gate if (regtry(prog, PL_reg_ganch)) 1782*0Sstevel@tonic-gate goto got_it; 1783*0Sstevel@tonic-gate goto phooey; 1784*0Sstevel@tonic-gate } 1785*0Sstevel@tonic-gate 1786*0Sstevel@tonic-gate /* Messy cases: unanchored match. */ 1787*0Sstevel@tonic-gate if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) { 1788*0Sstevel@tonic-gate /* we have /x+whatever/ */ 1789*0Sstevel@tonic-gate /* it must be a one character string (XXXX Except UTF?) */ 1790*0Sstevel@tonic-gate char ch; 1791*0Sstevel@tonic-gate #ifdef DEBUGGING 1792*0Sstevel@tonic-gate int did_match = 0; 1793*0Sstevel@tonic-gate #endif 1794*0Sstevel@tonic-gate if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)) 1795*0Sstevel@tonic-gate do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog); 1796*0Sstevel@tonic-gate ch = SvPVX(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0]; 1797*0Sstevel@tonic-gate 1798*0Sstevel@tonic-gate if (do_utf8) { 1799*0Sstevel@tonic-gate while (s < strend) { 1800*0Sstevel@tonic-gate if (*s == ch) { 1801*0Sstevel@tonic-gate DEBUG_r( did_match = 1 ); 1802*0Sstevel@tonic-gate if (regtry(prog, s)) goto got_it; 1803*0Sstevel@tonic-gate s += UTF8SKIP(s); 1804*0Sstevel@tonic-gate while (s < strend && *s == ch) 1805*0Sstevel@tonic-gate s += UTF8SKIP(s); 1806*0Sstevel@tonic-gate } 1807*0Sstevel@tonic-gate s += UTF8SKIP(s); 1808*0Sstevel@tonic-gate } 1809*0Sstevel@tonic-gate } 1810*0Sstevel@tonic-gate else { 1811*0Sstevel@tonic-gate while (s < strend) { 1812*0Sstevel@tonic-gate if (*s == ch) { 1813*0Sstevel@tonic-gate DEBUG_r( did_match = 1 ); 1814*0Sstevel@tonic-gate if (regtry(prog, s)) goto got_it; 1815*0Sstevel@tonic-gate s++; 1816*0Sstevel@tonic-gate while (s < strend && *s == ch) 1817*0Sstevel@tonic-gate s++; 1818*0Sstevel@tonic-gate } 1819*0Sstevel@tonic-gate s++; 1820*0Sstevel@tonic-gate } 1821*0Sstevel@tonic-gate } 1822*0Sstevel@tonic-gate DEBUG_r(if (!did_match) 1823*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 1824*0Sstevel@tonic-gate "Did not find anchored character...\n") 1825*0Sstevel@tonic-gate ); 1826*0Sstevel@tonic-gate } 1827*0Sstevel@tonic-gate /*SUPPRESS 560*/ 1828*0Sstevel@tonic-gate else if (prog->anchored_substr != Nullsv 1829*0Sstevel@tonic-gate || prog->anchored_utf8 != Nullsv 1830*0Sstevel@tonic-gate || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) 1831*0Sstevel@tonic-gate && prog->float_max_offset < strend - s)) { 1832*0Sstevel@tonic-gate SV *must; 1833*0Sstevel@tonic-gate I32 back_max; 1834*0Sstevel@tonic-gate I32 back_min; 1835*0Sstevel@tonic-gate char *last; 1836*0Sstevel@tonic-gate char *last1; /* Last position checked before */ 1837*0Sstevel@tonic-gate #ifdef DEBUGGING 1838*0Sstevel@tonic-gate int did_match = 0; 1839*0Sstevel@tonic-gate #endif 1840*0Sstevel@tonic-gate if (prog->anchored_substr || prog->anchored_utf8) { 1841*0Sstevel@tonic-gate if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)) 1842*0Sstevel@tonic-gate do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog); 1843*0Sstevel@tonic-gate must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr; 1844*0Sstevel@tonic-gate back_max = back_min = prog->anchored_offset; 1845*0Sstevel@tonic-gate } else { 1846*0Sstevel@tonic-gate if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) 1847*0Sstevel@tonic-gate do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog); 1848*0Sstevel@tonic-gate must = do_utf8 ? prog->float_utf8 : prog->float_substr; 1849*0Sstevel@tonic-gate back_max = prog->float_max_offset; 1850*0Sstevel@tonic-gate back_min = prog->float_min_offset; 1851*0Sstevel@tonic-gate } 1852*0Sstevel@tonic-gate if (must == &PL_sv_undef) 1853*0Sstevel@tonic-gate /* could not downgrade utf8 check substring, so must fail */ 1854*0Sstevel@tonic-gate goto phooey; 1855*0Sstevel@tonic-gate 1856*0Sstevel@tonic-gate last = HOP3c(strend, /* Cannot start after this */ 1857*0Sstevel@tonic-gate -(I32)(CHR_SVLEN(must) 1858*0Sstevel@tonic-gate - (SvTAIL(must) != 0) + back_min), strbeg); 1859*0Sstevel@tonic-gate 1860*0Sstevel@tonic-gate if (s > PL_bostr) 1861*0Sstevel@tonic-gate last1 = HOPc(s, -1); 1862*0Sstevel@tonic-gate else 1863*0Sstevel@tonic-gate last1 = s - 1; /* bogus */ 1864*0Sstevel@tonic-gate 1865*0Sstevel@tonic-gate /* XXXX check_substr already used to find `s', can optimize if 1866*0Sstevel@tonic-gate check_substr==must. */ 1867*0Sstevel@tonic-gate scream_pos = -1; 1868*0Sstevel@tonic-gate dontbother = end_shift; 1869*0Sstevel@tonic-gate strend = HOPc(strend, -dontbother); 1870*0Sstevel@tonic-gate while ( (s <= last) && 1871*0Sstevel@tonic-gate ((flags & REXEC_SCREAM) 1872*0Sstevel@tonic-gate ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg, 1873*0Sstevel@tonic-gate end_shift, &scream_pos, 0)) 1874*0Sstevel@tonic-gate : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend), 1875*0Sstevel@tonic-gate (unsigned char*)strend, must, 1876*0Sstevel@tonic-gate PL_multiline ? FBMrf_MULTILINE : 0))) ) { 1877*0Sstevel@tonic-gate /* we may be pointing at the wrong string */ 1878*0Sstevel@tonic-gate if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog)) 1879*0Sstevel@tonic-gate s = strbeg + (s - SvPVX(sv)); 1880*0Sstevel@tonic-gate DEBUG_r( did_match = 1 ); 1881*0Sstevel@tonic-gate if (HOPc(s, -back_max) > last1) { 1882*0Sstevel@tonic-gate last1 = HOPc(s, -back_min); 1883*0Sstevel@tonic-gate s = HOPc(s, -back_max); 1884*0Sstevel@tonic-gate } 1885*0Sstevel@tonic-gate else { 1886*0Sstevel@tonic-gate char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1; 1887*0Sstevel@tonic-gate 1888*0Sstevel@tonic-gate last1 = HOPc(s, -back_min); 1889*0Sstevel@tonic-gate s = t; 1890*0Sstevel@tonic-gate } 1891*0Sstevel@tonic-gate if (do_utf8) { 1892*0Sstevel@tonic-gate while (s <= last1) { 1893*0Sstevel@tonic-gate if (regtry(prog, s)) 1894*0Sstevel@tonic-gate goto got_it; 1895*0Sstevel@tonic-gate s += UTF8SKIP(s); 1896*0Sstevel@tonic-gate } 1897*0Sstevel@tonic-gate } 1898*0Sstevel@tonic-gate else { 1899*0Sstevel@tonic-gate while (s <= last1) { 1900*0Sstevel@tonic-gate if (regtry(prog, s)) 1901*0Sstevel@tonic-gate goto got_it; 1902*0Sstevel@tonic-gate s++; 1903*0Sstevel@tonic-gate } 1904*0Sstevel@tonic-gate } 1905*0Sstevel@tonic-gate } 1906*0Sstevel@tonic-gate DEBUG_r(if (!did_match) 1907*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 1908*0Sstevel@tonic-gate "Did not find %s substr `%s%.*s%s'%s...\n", 1909*0Sstevel@tonic-gate ((must == prog->anchored_substr || must == prog->anchored_utf8) 1910*0Sstevel@tonic-gate ? "anchored" : "floating"), 1911*0Sstevel@tonic-gate PL_colors[0], 1912*0Sstevel@tonic-gate (int)(SvCUR(must) - (SvTAIL(must)!=0)), 1913*0Sstevel@tonic-gate SvPVX(must), 1914*0Sstevel@tonic-gate PL_colors[1], (SvTAIL(must) ? "$" : "")) 1915*0Sstevel@tonic-gate ); 1916*0Sstevel@tonic-gate goto phooey; 1917*0Sstevel@tonic-gate } 1918*0Sstevel@tonic-gate else if ((c = prog->regstclass)) { 1919*0Sstevel@tonic-gate if (minlen) { 1920*0Sstevel@tonic-gate I32 op = (U8)OP(prog->regstclass); 1921*0Sstevel@tonic-gate /* don't bother with what can't match */ 1922*0Sstevel@tonic-gate if (PL_regkind[op] != EXACT && op != CANY) 1923*0Sstevel@tonic-gate strend = HOPc(strend, -(minlen - 1)); 1924*0Sstevel@tonic-gate } 1925*0Sstevel@tonic-gate DEBUG_r({ 1926*0Sstevel@tonic-gate SV *prop = sv_newmortal(); 1927*0Sstevel@tonic-gate char *s0; 1928*0Sstevel@tonic-gate char *s1; 1929*0Sstevel@tonic-gate int len0; 1930*0Sstevel@tonic-gate int len1; 1931*0Sstevel@tonic-gate 1932*0Sstevel@tonic-gate regprop(prop, c); 1933*0Sstevel@tonic-gate s0 = UTF ? 1934*0Sstevel@tonic-gate pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60, 1935*0Sstevel@tonic-gate UNI_DISPLAY_REGEX) : 1936*0Sstevel@tonic-gate SvPVX(prop); 1937*0Sstevel@tonic-gate len0 = UTF ? SvCUR(dsv0) : SvCUR(prop); 1938*0Sstevel@tonic-gate s1 = UTF ? 1939*0Sstevel@tonic-gate sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s; 1940*0Sstevel@tonic-gate len1 = UTF ? SvCUR(dsv1) : strend - s; 1941*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 1942*0Sstevel@tonic-gate "Matching stclass `%*.*s' against `%*.*s'\n", 1943*0Sstevel@tonic-gate len0, len0, s0, 1944*0Sstevel@tonic-gate len1, len1, s1); 1945*0Sstevel@tonic-gate }); 1946*0Sstevel@tonic-gate if (find_byclass(prog, c, s, strend, startpos, 0)) 1947*0Sstevel@tonic-gate goto got_it; 1948*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n")); 1949*0Sstevel@tonic-gate } 1950*0Sstevel@tonic-gate else { 1951*0Sstevel@tonic-gate dontbother = 0; 1952*0Sstevel@tonic-gate if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) { 1953*0Sstevel@tonic-gate /* Trim the end. */ 1954*0Sstevel@tonic-gate char *last; 1955*0Sstevel@tonic-gate SV* float_real; 1956*0Sstevel@tonic-gate 1957*0Sstevel@tonic-gate if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) 1958*0Sstevel@tonic-gate do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog); 1959*0Sstevel@tonic-gate float_real = do_utf8 ? prog->float_utf8 : prog->float_substr; 1960*0Sstevel@tonic-gate 1961*0Sstevel@tonic-gate if (flags & REXEC_SCREAM) { 1962*0Sstevel@tonic-gate last = screaminstr(sv, float_real, s - strbeg, 1963*0Sstevel@tonic-gate end_shift, &scream_pos, 1); /* last one */ 1964*0Sstevel@tonic-gate if (!last) 1965*0Sstevel@tonic-gate last = scream_olds; /* Only one occurrence. */ 1966*0Sstevel@tonic-gate /* we may be pointing at the wrong string */ 1967*0Sstevel@tonic-gate else if (RX_MATCH_COPIED(prog)) 1968*0Sstevel@tonic-gate s = strbeg + (s - SvPVX(sv)); 1969*0Sstevel@tonic-gate } 1970*0Sstevel@tonic-gate else { 1971*0Sstevel@tonic-gate STRLEN len; 1972*0Sstevel@tonic-gate char *little = SvPV(float_real, len); 1973*0Sstevel@tonic-gate 1974*0Sstevel@tonic-gate if (SvTAIL(float_real)) { 1975*0Sstevel@tonic-gate if (memEQ(strend - len + 1, little, len - 1)) 1976*0Sstevel@tonic-gate last = strend - len + 1; 1977*0Sstevel@tonic-gate else if (!PL_multiline) 1978*0Sstevel@tonic-gate last = memEQ(strend - len, little, len) 1979*0Sstevel@tonic-gate ? strend - len : Nullch; 1980*0Sstevel@tonic-gate else 1981*0Sstevel@tonic-gate goto find_last; 1982*0Sstevel@tonic-gate } else { 1983*0Sstevel@tonic-gate find_last: 1984*0Sstevel@tonic-gate if (len) 1985*0Sstevel@tonic-gate last = rninstr(s, strend, little, little + len); 1986*0Sstevel@tonic-gate else 1987*0Sstevel@tonic-gate last = strend; /* matching `$' */ 1988*0Sstevel@tonic-gate } 1989*0Sstevel@tonic-gate } 1990*0Sstevel@tonic-gate if (last == NULL) { 1991*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, 1992*0Sstevel@tonic-gate "%sCan't trim the tail, match fails (should not happen)%s\n", 1993*0Sstevel@tonic-gate PL_colors[4],PL_colors[5])); 1994*0Sstevel@tonic-gate goto phooey; /* Should not happen! */ 1995*0Sstevel@tonic-gate } 1996*0Sstevel@tonic-gate dontbother = strend - last + prog->float_min_offset; 1997*0Sstevel@tonic-gate } 1998*0Sstevel@tonic-gate if (minlen && (dontbother < minlen)) 1999*0Sstevel@tonic-gate dontbother = minlen - 1; 2000*0Sstevel@tonic-gate strend -= dontbother; /* this one's always in bytes! */ 2001*0Sstevel@tonic-gate /* We don't know much -- general case. */ 2002*0Sstevel@tonic-gate if (do_utf8) { 2003*0Sstevel@tonic-gate for (;;) { 2004*0Sstevel@tonic-gate if (regtry(prog, s)) 2005*0Sstevel@tonic-gate goto got_it; 2006*0Sstevel@tonic-gate if (s >= strend) 2007*0Sstevel@tonic-gate break; 2008*0Sstevel@tonic-gate s += UTF8SKIP(s); 2009*0Sstevel@tonic-gate }; 2010*0Sstevel@tonic-gate } 2011*0Sstevel@tonic-gate else { 2012*0Sstevel@tonic-gate do { 2013*0Sstevel@tonic-gate if (regtry(prog, s)) 2014*0Sstevel@tonic-gate goto got_it; 2015*0Sstevel@tonic-gate } while (s++ < strend); 2016*0Sstevel@tonic-gate } 2017*0Sstevel@tonic-gate } 2018*0Sstevel@tonic-gate 2019*0Sstevel@tonic-gate /* Failure. */ 2020*0Sstevel@tonic-gate goto phooey; 2021*0Sstevel@tonic-gate 2022*0Sstevel@tonic-gate got_it: 2023*0Sstevel@tonic-gate RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted); 2024*0Sstevel@tonic-gate 2025*0Sstevel@tonic-gate if (PL_reg_eval_set) { 2026*0Sstevel@tonic-gate /* Preserve the current value of $^R */ 2027*0Sstevel@tonic-gate if (oreplsv != GvSV(PL_replgv)) 2028*0Sstevel@tonic-gate sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is 2029*0Sstevel@tonic-gate restored, the value remains 2030*0Sstevel@tonic-gate the same. */ 2031*0Sstevel@tonic-gate restore_pos(aTHX_ 0); 2032*0Sstevel@tonic-gate } 2033*0Sstevel@tonic-gate 2034*0Sstevel@tonic-gate /* make sure $`, $&, $', and $digit will work later */ 2035*0Sstevel@tonic-gate if ( !(flags & REXEC_NOT_FIRST) ) { 2036*0Sstevel@tonic-gate if (RX_MATCH_COPIED(prog)) { 2037*0Sstevel@tonic-gate Safefree(prog->subbeg); 2038*0Sstevel@tonic-gate RX_MATCH_COPIED_off(prog); 2039*0Sstevel@tonic-gate } 2040*0Sstevel@tonic-gate if (flags & REXEC_COPY_STR) { 2041*0Sstevel@tonic-gate I32 i = PL_regeol - startpos + (stringarg - strbeg); 2042*0Sstevel@tonic-gate 2043*0Sstevel@tonic-gate s = savepvn(strbeg, i); 2044*0Sstevel@tonic-gate prog->subbeg = s; 2045*0Sstevel@tonic-gate prog->sublen = i; 2046*0Sstevel@tonic-gate RX_MATCH_COPIED_on(prog); 2047*0Sstevel@tonic-gate } 2048*0Sstevel@tonic-gate else { 2049*0Sstevel@tonic-gate prog->subbeg = strbeg; 2050*0Sstevel@tonic-gate prog->sublen = PL_regeol - strbeg; /* strend may have been modified */ 2051*0Sstevel@tonic-gate } 2052*0Sstevel@tonic-gate } 2053*0Sstevel@tonic-gate 2054*0Sstevel@tonic-gate return 1; 2055*0Sstevel@tonic-gate 2056*0Sstevel@tonic-gate phooey: 2057*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n", 2058*0Sstevel@tonic-gate PL_colors[4],PL_colors[5])); 2059*0Sstevel@tonic-gate if (PL_reg_eval_set) 2060*0Sstevel@tonic-gate restore_pos(aTHX_ 0); 2061*0Sstevel@tonic-gate return 0; 2062*0Sstevel@tonic-gate } 2063*0Sstevel@tonic-gate 2064*0Sstevel@tonic-gate /* 2065*0Sstevel@tonic-gate - regtry - try match at specific point 2066*0Sstevel@tonic-gate */ 2067*0Sstevel@tonic-gate STATIC I32 /* 0 failure, 1 success */ 2068*0Sstevel@tonic-gate S_regtry(pTHX_ regexp *prog, char *startpos) 2069*0Sstevel@tonic-gate { 2070*0Sstevel@tonic-gate register I32 i; 2071*0Sstevel@tonic-gate register I32 *sp; 2072*0Sstevel@tonic-gate register I32 *ep; 2073*0Sstevel@tonic-gate CHECKPOINT lastcp; 2074*0Sstevel@tonic-gate 2075*0Sstevel@tonic-gate #ifdef DEBUGGING 2076*0Sstevel@tonic-gate PL_regindent = 0; /* XXXX Not good when matches are reenterable... */ 2077*0Sstevel@tonic-gate #endif 2078*0Sstevel@tonic-gate if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) { 2079*0Sstevel@tonic-gate MAGIC *mg; 2080*0Sstevel@tonic-gate 2081*0Sstevel@tonic-gate PL_reg_eval_set = RS_init; 2082*0Sstevel@tonic-gate DEBUG_r(DEBUG_s( 2083*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n", 2084*0Sstevel@tonic-gate (IV)(PL_stack_sp - PL_stack_base)); 2085*0Sstevel@tonic-gate )); 2086*0Sstevel@tonic-gate SAVEI32(cxstack[cxstack_ix].blk_oldsp); 2087*0Sstevel@tonic-gate cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base; 2088*0Sstevel@tonic-gate /* Otherwise OP_NEXTSTATE will free whatever on stack now. */ 2089*0Sstevel@tonic-gate SAVETMPS; 2090*0Sstevel@tonic-gate /* Apparently this is not needed, judging by wantarray. */ 2091*0Sstevel@tonic-gate /* SAVEI8(cxstack[cxstack_ix].blk_gimme); 2092*0Sstevel@tonic-gate cxstack[cxstack_ix].blk_gimme = G_SCALAR; */ 2093*0Sstevel@tonic-gate 2094*0Sstevel@tonic-gate if (PL_reg_sv) { 2095*0Sstevel@tonic-gate /* Make $_ available to executed code. */ 2096*0Sstevel@tonic-gate if (PL_reg_sv != DEFSV) { 2097*0Sstevel@tonic-gate /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */ 2098*0Sstevel@tonic-gate SAVESPTR(DEFSV); 2099*0Sstevel@tonic-gate DEFSV = PL_reg_sv; 2100*0Sstevel@tonic-gate } 2101*0Sstevel@tonic-gate 2102*0Sstevel@tonic-gate if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv) 2103*0Sstevel@tonic-gate && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) { 2104*0Sstevel@tonic-gate /* prepare for quick setting of pos */ 2105*0Sstevel@tonic-gate sv_magic(PL_reg_sv, (SV*)0, 2106*0Sstevel@tonic-gate PERL_MAGIC_regex_global, Nullch, 0); 2107*0Sstevel@tonic-gate mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global); 2108*0Sstevel@tonic-gate mg->mg_len = -1; 2109*0Sstevel@tonic-gate } 2110*0Sstevel@tonic-gate PL_reg_magic = mg; 2111*0Sstevel@tonic-gate PL_reg_oldpos = mg->mg_len; 2112*0Sstevel@tonic-gate SAVEDESTRUCTOR_X(restore_pos, 0); 2113*0Sstevel@tonic-gate } 2114*0Sstevel@tonic-gate if (!PL_reg_curpm) { 2115*0Sstevel@tonic-gate Newz(22,PL_reg_curpm, 1, PMOP); 2116*0Sstevel@tonic-gate #ifdef USE_ITHREADS 2117*0Sstevel@tonic-gate { 2118*0Sstevel@tonic-gate SV* repointer = newSViv(0); 2119*0Sstevel@tonic-gate /* so we know which PL_regex_padav element is PL_reg_curpm */ 2120*0Sstevel@tonic-gate SvFLAGS(repointer) |= SVf_BREAK; 2121*0Sstevel@tonic-gate av_push(PL_regex_padav,repointer); 2122*0Sstevel@tonic-gate PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav); 2123*0Sstevel@tonic-gate PL_regex_pad = AvARRAY(PL_regex_padav); 2124*0Sstevel@tonic-gate } 2125*0Sstevel@tonic-gate #endif 2126*0Sstevel@tonic-gate } 2127*0Sstevel@tonic-gate PM_SETRE(PL_reg_curpm, prog); 2128*0Sstevel@tonic-gate PL_reg_oldcurpm = PL_curpm; 2129*0Sstevel@tonic-gate PL_curpm = PL_reg_curpm; 2130*0Sstevel@tonic-gate if (RX_MATCH_COPIED(prog)) { 2131*0Sstevel@tonic-gate /* Here is a serious problem: we cannot rewrite subbeg, 2132*0Sstevel@tonic-gate since it may be needed if this match fails. Thus 2133*0Sstevel@tonic-gate $` inside (?{}) could fail... */ 2134*0Sstevel@tonic-gate PL_reg_oldsaved = prog->subbeg; 2135*0Sstevel@tonic-gate PL_reg_oldsavedlen = prog->sublen; 2136*0Sstevel@tonic-gate RX_MATCH_COPIED_off(prog); 2137*0Sstevel@tonic-gate } 2138*0Sstevel@tonic-gate else 2139*0Sstevel@tonic-gate PL_reg_oldsaved = Nullch; 2140*0Sstevel@tonic-gate prog->subbeg = PL_bostr; 2141*0Sstevel@tonic-gate prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */ 2142*0Sstevel@tonic-gate } 2143*0Sstevel@tonic-gate prog->startp[0] = startpos - PL_bostr; 2144*0Sstevel@tonic-gate PL_reginput = startpos; 2145*0Sstevel@tonic-gate PL_regstartp = prog->startp; 2146*0Sstevel@tonic-gate PL_regendp = prog->endp; 2147*0Sstevel@tonic-gate PL_reglastparen = &prog->lastparen; 2148*0Sstevel@tonic-gate PL_reglastcloseparen = &prog->lastcloseparen; 2149*0Sstevel@tonic-gate prog->lastparen = 0; 2150*0Sstevel@tonic-gate prog->lastcloseparen = 0; 2151*0Sstevel@tonic-gate PL_regsize = 0; 2152*0Sstevel@tonic-gate DEBUG_r(PL_reg_starttry = startpos); 2153*0Sstevel@tonic-gate if (PL_reg_start_tmpl <= prog->nparens) { 2154*0Sstevel@tonic-gate PL_reg_start_tmpl = prog->nparens*3/2 + 3; 2155*0Sstevel@tonic-gate if(PL_reg_start_tmp) 2156*0Sstevel@tonic-gate Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*); 2157*0Sstevel@tonic-gate else 2158*0Sstevel@tonic-gate New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*); 2159*0Sstevel@tonic-gate } 2160*0Sstevel@tonic-gate 2161*0Sstevel@tonic-gate /* XXXX What this code is doing here?!!! There should be no need 2162*0Sstevel@tonic-gate to do this again and again, PL_reglastparen should take care of 2163*0Sstevel@tonic-gate this! --ilya*/ 2164*0Sstevel@tonic-gate 2165*0Sstevel@tonic-gate /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code. 2166*0Sstevel@tonic-gate * Actually, the code in regcppop() (which Ilya may be meaning by 2167*0Sstevel@tonic-gate * PL_reglastparen), is not needed at all by the test suite 2168*0Sstevel@tonic-gate * (op/regexp, op/pat, op/split), but that code is needed, oddly 2169*0Sstevel@tonic-gate * enough, for building DynaLoader, or otherwise this 2170*0Sstevel@tonic-gate * "Error: '*' not in typemap in DynaLoader.xs, line 164" 2171*0Sstevel@tonic-gate * will happen. Meanwhile, this code *is* needed for the 2172*0Sstevel@tonic-gate * above-mentioned test suite tests to succeed. The common theme 2173*0Sstevel@tonic-gate * on those tests seems to be returning null fields from matches. 2174*0Sstevel@tonic-gate * --jhi */ 2175*0Sstevel@tonic-gate #if 1 2176*0Sstevel@tonic-gate sp = prog->startp; 2177*0Sstevel@tonic-gate ep = prog->endp; 2178*0Sstevel@tonic-gate if (prog->nparens) { 2179*0Sstevel@tonic-gate for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) { 2180*0Sstevel@tonic-gate *++sp = -1; 2181*0Sstevel@tonic-gate *++ep = -1; 2182*0Sstevel@tonic-gate } 2183*0Sstevel@tonic-gate } 2184*0Sstevel@tonic-gate #endif 2185*0Sstevel@tonic-gate REGCP_SET(lastcp); 2186*0Sstevel@tonic-gate if (regmatch(prog->program + 1)) { 2187*0Sstevel@tonic-gate prog->endp[0] = PL_reginput - PL_bostr; 2188*0Sstevel@tonic-gate return 1; 2189*0Sstevel@tonic-gate } 2190*0Sstevel@tonic-gate REGCP_UNWIND(lastcp); 2191*0Sstevel@tonic-gate return 0; 2192*0Sstevel@tonic-gate } 2193*0Sstevel@tonic-gate 2194*0Sstevel@tonic-gate #define RE_UNWIND_BRANCH 1 2195*0Sstevel@tonic-gate #define RE_UNWIND_BRANCHJ 2 2196*0Sstevel@tonic-gate 2197*0Sstevel@tonic-gate union re_unwind_t; 2198*0Sstevel@tonic-gate 2199*0Sstevel@tonic-gate typedef struct { /* XX: makes sense to enlarge it... */ 2200*0Sstevel@tonic-gate I32 type; 2201*0Sstevel@tonic-gate I32 prev; 2202*0Sstevel@tonic-gate CHECKPOINT lastcp; 2203*0Sstevel@tonic-gate } re_unwind_generic_t; 2204*0Sstevel@tonic-gate 2205*0Sstevel@tonic-gate typedef struct { 2206*0Sstevel@tonic-gate I32 type; 2207*0Sstevel@tonic-gate I32 prev; 2208*0Sstevel@tonic-gate CHECKPOINT lastcp; 2209*0Sstevel@tonic-gate I32 lastparen; 2210*0Sstevel@tonic-gate regnode *next; 2211*0Sstevel@tonic-gate char *locinput; 2212*0Sstevel@tonic-gate I32 nextchr; 2213*0Sstevel@tonic-gate #ifdef DEBUGGING 2214*0Sstevel@tonic-gate int regindent; 2215*0Sstevel@tonic-gate #endif 2216*0Sstevel@tonic-gate } re_unwind_branch_t; 2217*0Sstevel@tonic-gate 2218*0Sstevel@tonic-gate typedef union re_unwind_t { 2219*0Sstevel@tonic-gate I32 type; 2220*0Sstevel@tonic-gate re_unwind_generic_t generic; 2221*0Sstevel@tonic-gate re_unwind_branch_t branch; 2222*0Sstevel@tonic-gate } re_unwind_t; 2223*0Sstevel@tonic-gate 2224*0Sstevel@tonic-gate #define sayYES goto yes 2225*0Sstevel@tonic-gate #define sayNO goto no 2226*0Sstevel@tonic-gate #define sayNO_ANYOF goto no_anyof 2227*0Sstevel@tonic-gate #define sayYES_FINAL goto yes_final 2228*0Sstevel@tonic-gate #define sayYES_LOUD goto yes_loud 2229*0Sstevel@tonic-gate #define sayNO_FINAL goto no_final 2230*0Sstevel@tonic-gate #define sayNO_SILENT goto do_no 2231*0Sstevel@tonic-gate #define saySAME(x) if (x) goto yes; else goto no 2232*0Sstevel@tonic-gate 2233*0Sstevel@tonic-gate #define REPORT_CODE_OFF 24 2234*0Sstevel@tonic-gate 2235*0Sstevel@tonic-gate /* 2236*0Sstevel@tonic-gate - regmatch - main matching routine 2237*0Sstevel@tonic-gate * 2238*0Sstevel@tonic-gate * Conceptually the strategy is simple: check to see whether the current 2239*0Sstevel@tonic-gate * node matches, call self recursively to see whether the rest matches, 2240*0Sstevel@tonic-gate * and then act accordingly. In practice we make some effort to avoid 2241*0Sstevel@tonic-gate * recursion, in particular by going through "ordinary" nodes (that don't 2242*0Sstevel@tonic-gate * need to know whether the rest of the match failed) by a loop instead of 2243*0Sstevel@tonic-gate * by recursion. 2244*0Sstevel@tonic-gate */ 2245*0Sstevel@tonic-gate /* [lwall] I've hoisted the register declarations to the outer block in order to 2246*0Sstevel@tonic-gate * maybe save a little bit of pushing and popping on the stack. It also takes 2247*0Sstevel@tonic-gate * advantage of machines that use a register save mask on subroutine entry. 2248*0Sstevel@tonic-gate */ 2249*0Sstevel@tonic-gate STATIC I32 /* 0 failure, 1 success */ 2250*0Sstevel@tonic-gate S_regmatch(pTHX_ regnode *prog) 2251*0Sstevel@tonic-gate { 2252*0Sstevel@tonic-gate register regnode *scan; /* Current node. */ 2253*0Sstevel@tonic-gate regnode *next; /* Next node. */ 2254*0Sstevel@tonic-gate regnode *inner; /* Next node in internal branch. */ 2255*0Sstevel@tonic-gate register I32 nextchr; /* renamed nextchr - nextchar colides with 2256*0Sstevel@tonic-gate function of same name */ 2257*0Sstevel@tonic-gate register I32 n; /* no or next */ 2258*0Sstevel@tonic-gate register I32 ln = 0; /* len or last */ 2259*0Sstevel@tonic-gate register char *s = Nullch; /* operand or save */ 2260*0Sstevel@tonic-gate register char *locinput = PL_reginput; 2261*0Sstevel@tonic-gate register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */ 2262*0Sstevel@tonic-gate int minmod = 0, sw = 0, logical = 0; 2263*0Sstevel@tonic-gate I32 unwind = 0; 2264*0Sstevel@tonic-gate #if 0 2265*0Sstevel@tonic-gate I32 firstcp = PL_savestack_ix; 2266*0Sstevel@tonic-gate #endif 2267*0Sstevel@tonic-gate register bool do_utf8 = PL_reg_match_utf8; 2268*0Sstevel@tonic-gate #ifdef DEBUGGING 2269*0Sstevel@tonic-gate SV *dsv0 = PERL_DEBUG_PAD_ZERO(0); 2270*0Sstevel@tonic-gate SV *dsv1 = PERL_DEBUG_PAD_ZERO(1); 2271*0Sstevel@tonic-gate SV *dsv2 = PERL_DEBUG_PAD_ZERO(2); 2272*0Sstevel@tonic-gate #endif 2273*0Sstevel@tonic-gate 2274*0Sstevel@tonic-gate #ifdef DEBUGGING 2275*0Sstevel@tonic-gate PL_regindent++; 2276*0Sstevel@tonic-gate #endif 2277*0Sstevel@tonic-gate 2278*0Sstevel@tonic-gate /* Note that nextchr is a byte even in UTF */ 2279*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2280*0Sstevel@tonic-gate scan = prog; 2281*0Sstevel@tonic-gate while (scan != NULL) { 2282*0Sstevel@tonic-gate 2283*0Sstevel@tonic-gate DEBUG_r( { 2284*0Sstevel@tonic-gate SV *prop = sv_newmortal(); 2285*0Sstevel@tonic-gate int docolor = *PL_colors[0]; 2286*0Sstevel@tonic-gate int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */ 2287*0Sstevel@tonic-gate int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput); 2288*0Sstevel@tonic-gate /* The part of the string before starttry has one color 2289*0Sstevel@tonic-gate (pref0_len chars), between starttry and current 2290*0Sstevel@tonic-gate position another one (pref_len - pref0_len chars), 2291*0Sstevel@tonic-gate after the current position the third one. 2292*0Sstevel@tonic-gate We assume that pref0_len <= pref_len, otherwise we 2293*0Sstevel@tonic-gate decrease pref0_len. */ 2294*0Sstevel@tonic-gate int pref_len = (locinput - PL_bostr) > (5 + taill) - l 2295*0Sstevel@tonic-gate ? (5 + taill) - l : locinput - PL_bostr; 2296*0Sstevel@tonic-gate int pref0_len; 2297*0Sstevel@tonic-gate 2298*0Sstevel@tonic-gate while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len))) 2299*0Sstevel@tonic-gate pref_len++; 2300*0Sstevel@tonic-gate pref0_len = pref_len - (locinput - PL_reg_starttry); 2301*0Sstevel@tonic-gate if (l + pref_len < (5 + taill) && l < PL_regeol - locinput) 2302*0Sstevel@tonic-gate l = ( PL_regeol - locinput > (5 + taill) - pref_len 2303*0Sstevel@tonic-gate ? (5 + taill) - pref_len : PL_regeol - locinput); 2304*0Sstevel@tonic-gate while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l))) 2305*0Sstevel@tonic-gate l--; 2306*0Sstevel@tonic-gate if (pref0_len < 0) 2307*0Sstevel@tonic-gate pref0_len = 0; 2308*0Sstevel@tonic-gate if (pref0_len > pref_len) 2309*0Sstevel@tonic-gate pref0_len = pref_len; 2310*0Sstevel@tonic-gate regprop(prop, scan); 2311*0Sstevel@tonic-gate { 2312*0Sstevel@tonic-gate char *s0 = 2313*0Sstevel@tonic-gate do_utf8 && OP(scan) != CANY ? 2314*0Sstevel@tonic-gate pv_uni_display(dsv0, (U8*)(locinput - pref_len), 2315*0Sstevel@tonic-gate pref0_len, 60, UNI_DISPLAY_REGEX) : 2316*0Sstevel@tonic-gate locinput - pref_len; 2317*0Sstevel@tonic-gate int len0 = do_utf8 ? strlen(s0) : pref0_len; 2318*0Sstevel@tonic-gate char *s1 = do_utf8 && OP(scan) != CANY ? 2319*0Sstevel@tonic-gate pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len), 2320*0Sstevel@tonic-gate pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) : 2321*0Sstevel@tonic-gate locinput - pref_len + pref0_len; 2322*0Sstevel@tonic-gate int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len; 2323*0Sstevel@tonic-gate char *s2 = do_utf8 && OP(scan) != CANY ? 2324*0Sstevel@tonic-gate pv_uni_display(dsv2, (U8*)locinput, 2325*0Sstevel@tonic-gate PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) : 2326*0Sstevel@tonic-gate locinput; 2327*0Sstevel@tonic-gate int len2 = do_utf8 ? strlen(s2) : l; 2328*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 2329*0Sstevel@tonic-gate "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n", 2330*0Sstevel@tonic-gate (IV)(locinput - PL_bostr), 2331*0Sstevel@tonic-gate PL_colors[4], 2332*0Sstevel@tonic-gate len0, s0, 2333*0Sstevel@tonic-gate PL_colors[5], 2334*0Sstevel@tonic-gate PL_colors[2], 2335*0Sstevel@tonic-gate len1, s1, 2336*0Sstevel@tonic-gate PL_colors[3], 2337*0Sstevel@tonic-gate (docolor ? "" : "> <"), 2338*0Sstevel@tonic-gate PL_colors[0], 2339*0Sstevel@tonic-gate len2, s2, 2340*0Sstevel@tonic-gate PL_colors[1], 2341*0Sstevel@tonic-gate 15 - l - pref_len + 1, 2342*0Sstevel@tonic-gate "", 2343*0Sstevel@tonic-gate (IV)(scan - PL_regprogram), PL_regindent*2, "", 2344*0Sstevel@tonic-gate SvPVX(prop)); 2345*0Sstevel@tonic-gate } 2346*0Sstevel@tonic-gate }); 2347*0Sstevel@tonic-gate 2348*0Sstevel@tonic-gate next = scan + NEXT_OFF(scan); 2349*0Sstevel@tonic-gate if (next == scan) 2350*0Sstevel@tonic-gate next = NULL; 2351*0Sstevel@tonic-gate 2352*0Sstevel@tonic-gate switch (OP(scan)) { 2353*0Sstevel@tonic-gate case BOL: 2354*0Sstevel@tonic-gate if (locinput == PL_bostr || (PL_multiline && 2355*0Sstevel@tonic-gate (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') ) 2356*0Sstevel@tonic-gate { 2357*0Sstevel@tonic-gate /* regtill = regbol; */ 2358*0Sstevel@tonic-gate break; 2359*0Sstevel@tonic-gate } 2360*0Sstevel@tonic-gate sayNO; 2361*0Sstevel@tonic-gate case MBOL: 2362*0Sstevel@tonic-gate if (locinput == PL_bostr || 2363*0Sstevel@tonic-gate ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n')) 2364*0Sstevel@tonic-gate { 2365*0Sstevel@tonic-gate break; 2366*0Sstevel@tonic-gate } 2367*0Sstevel@tonic-gate sayNO; 2368*0Sstevel@tonic-gate case SBOL: 2369*0Sstevel@tonic-gate if (locinput == PL_bostr) 2370*0Sstevel@tonic-gate break; 2371*0Sstevel@tonic-gate sayNO; 2372*0Sstevel@tonic-gate case GPOS: 2373*0Sstevel@tonic-gate if (locinput == PL_reg_ganch) 2374*0Sstevel@tonic-gate break; 2375*0Sstevel@tonic-gate sayNO; 2376*0Sstevel@tonic-gate case EOL: 2377*0Sstevel@tonic-gate if (PL_multiline) 2378*0Sstevel@tonic-gate goto meol; 2379*0Sstevel@tonic-gate else 2380*0Sstevel@tonic-gate goto seol; 2381*0Sstevel@tonic-gate case MEOL: 2382*0Sstevel@tonic-gate meol: 2383*0Sstevel@tonic-gate if ((nextchr || locinput < PL_regeol) && nextchr != '\n') 2384*0Sstevel@tonic-gate sayNO; 2385*0Sstevel@tonic-gate break; 2386*0Sstevel@tonic-gate case SEOL: 2387*0Sstevel@tonic-gate seol: 2388*0Sstevel@tonic-gate if ((nextchr || locinput < PL_regeol) && nextchr != '\n') 2389*0Sstevel@tonic-gate sayNO; 2390*0Sstevel@tonic-gate if (PL_regeol - locinput > 1) 2391*0Sstevel@tonic-gate sayNO; 2392*0Sstevel@tonic-gate break; 2393*0Sstevel@tonic-gate case EOS: 2394*0Sstevel@tonic-gate if (PL_regeol != locinput) 2395*0Sstevel@tonic-gate sayNO; 2396*0Sstevel@tonic-gate break; 2397*0Sstevel@tonic-gate case SANY: 2398*0Sstevel@tonic-gate if (!nextchr && locinput >= PL_regeol) 2399*0Sstevel@tonic-gate sayNO; 2400*0Sstevel@tonic-gate if (do_utf8) { 2401*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr]; 2402*0Sstevel@tonic-gate if (locinput > PL_regeol) 2403*0Sstevel@tonic-gate sayNO; 2404*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2405*0Sstevel@tonic-gate } 2406*0Sstevel@tonic-gate else 2407*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput); 2408*0Sstevel@tonic-gate break; 2409*0Sstevel@tonic-gate case CANY: 2410*0Sstevel@tonic-gate if (!nextchr && locinput >= PL_regeol) 2411*0Sstevel@tonic-gate sayNO; 2412*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput); 2413*0Sstevel@tonic-gate break; 2414*0Sstevel@tonic-gate case REG_ANY: 2415*0Sstevel@tonic-gate if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n') 2416*0Sstevel@tonic-gate sayNO; 2417*0Sstevel@tonic-gate if (do_utf8) { 2418*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr]; 2419*0Sstevel@tonic-gate if (locinput > PL_regeol) 2420*0Sstevel@tonic-gate sayNO; 2421*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2422*0Sstevel@tonic-gate } 2423*0Sstevel@tonic-gate else 2424*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput); 2425*0Sstevel@tonic-gate break; 2426*0Sstevel@tonic-gate case EXACT: 2427*0Sstevel@tonic-gate s = STRING(scan); 2428*0Sstevel@tonic-gate ln = STR_LEN(scan); 2429*0Sstevel@tonic-gate if (do_utf8 != UTF) { 2430*0Sstevel@tonic-gate /* The target and the pattern have differing utf8ness. */ 2431*0Sstevel@tonic-gate char *l = locinput; 2432*0Sstevel@tonic-gate char *e = s + ln; 2433*0Sstevel@tonic-gate STRLEN ulen; 2434*0Sstevel@tonic-gate 2435*0Sstevel@tonic-gate if (do_utf8) { 2436*0Sstevel@tonic-gate /* The target is utf8, the pattern is not utf8. */ 2437*0Sstevel@tonic-gate while (s < e) { 2438*0Sstevel@tonic-gate if (l >= PL_regeol) 2439*0Sstevel@tonic-gate sayNO; 2440*0Sstevel@tonic-gate if (NATIVE_TO_UNI(*(U8*)s) != 2441*0Sstevel@tonic-gate utf8n_to_uvuni((U8*)l, UTF8_MAXLEN, &ulen, 2442*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ? 2443*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY)) 2444*0Sstevel@tonic-gate sayNO; 2445*0Sstevel@tonic-gate l += ulen; 2446*0Sstevel@tonic-gate s ++; 2447*0Sstevel@tonic-gate } 2448*0Sstevel@tonic-gate } 2449*0Sstevel@tonic-gate else { 2450*0Sstevel@tonic-gate /* The target is not utf8, the pattern is utf8. */ 2451*0Sstevel@tonic-gate while (s < e) { 2452*0Sstevel@tonic-gate if (l >= PL_regeol) 2453*0Sstevel@tonic-gate sayNO; 2454*0Sstevel@tonic-gate if (NATIVE_TO_UNI(*((U8*)l)) != 2455*0Sstevel@tonic-gate utf8n_to_uvuni((U8*)s, UTF8_MAXLEN, &ulen, 2456*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ? 2457*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY)) 2458*0Sstevel@tonic-gate sayNO; 2459*0Sstevel@tonic-gate s += ulen; 2460*0Sstevel@tonic-gate l ++; 2461*0Sstevel@tonic-gate } 2462*0Sstevel@tonic-gate } 2463*0Sstevel@tonic-gate locinput = l; 2464*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2465*0Sstevel@tonic-gate break; 2466*0Sstevel@tonic-gate } 2467*0Sstevel@tonic-gate /* The target and the pattern have the same utf8ness. */ 2468*0Sstevel@tonic-gate /* Inline the first character, for speed. */ 2469*0Sstevel@tonic-gate if (UCHARAT(s) != nextchr) 2470*0Sstevel@tonic-gate sayNO; 2471*0Sstevel@tonic-gate if (PL_regeol - locinput < ln) 2472*0Sstevel@tonic-gate sayNO; 2473*0Sstevel@tonic-gate if (ln > 1 && memNE(s, locinput, ln)) 2474*0Sstevel@tonic-gate sayNO; 2475*0Sstevel@tonic-gate locinput += ln; 2476*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2477*0Sstevel@tonic-gate break; 2478*0Sstevel@tonic-gate case EXACTFL: 2479*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 2480*0Sstevel@tonic-gate /* FALL THROUGH */ 2481*0Sstevel@tonic-gate case EXACTF: 2482*0Sstevel@tonic-gate s = STRING(scan); 2483*0Sstevel@tonic-gate ln = STR_LEN(scan); 2484*0Sstevel@tonic-gate 2485*0Sstevel@tonic-gate if (do_utf8 || UTF) { 2486*0Sstevel@tonic-gate /* Either target or the pattern are utf8. */ 2487*0Sstevel@tonic-gate char *l = locinput; 2488*0Sstevel@tonic-gate char *e = PL_regeol; 2489*0Sstevel@tonic-gate 2490*0Sstevel@tonic-gate if (ibcmp_utf8(s, 0, ln, (bool)UTF, 2491*0Sstevel@tonic-gate l, &e, 0, do_utf8)) { 2492*0Sstevel@tonic-gate /* One more case for the sharp s: 2493*0Sstevel@tonic-gate * pack("U0U*", 0xDF) =~ /ss/i, 2494*0Sstevel@tonic-gate * the 0xC3 0x9F are the UTF-8 2495*0Sstevel@tonic-gate * byte sequence for the U+00DF. */ 2496*0Sstevel@tonic-gate if (!(do_utf8 && 2497*0Sstevel@tonic-gate toLOWER(s[0]) == 's' && 2498*0Sstevel@tonic-gate ln >= 2 && 2499*0Sstevel@tonic-gate toLOWER(s[1]) == 's' && 2500*0Sstevel@tonic-gate (U8)l[0] == 0xC3 && 2501*0Sstevel@tonic-gate e - l >= 2 && 2502*0Sstevel@tonic-gate (U8)l[1] == 0x9F)) 2503*0Sstevel@tonic-gate sayNO; 2504*0Sstevel@tonic-gate } 2505*0Sstevel@tonic-gate locinput = e; 2506*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2507*0Sstevel@tonic-gate break; 2508*0Sstevel@tonic-gate } 2509*0Sstevel@tonic-gate 2510*0Sstevel@tonic-gate /* Neither the target and the pattern are utf8. */ 2511*0Sstevel@tonic-gate 2512*0Sstevel@tonic-gate /* Inline the first character, for speed. */ 2513*0Sstevel@tonic-gate if (UCHARAT(s) != nextchr && 2514*0Sstevel@tonic-gate UCHARAT(s) != ((OP(scan) == EXACTF) 2515*0Sstevel@tonic-gate ? PL_fold : PL_fold_locale)[nextchr]) 2516*0Sstevel@tonic-gate sayNO; 2517*0Sstevel@tonic-gate if (PL_regeol - locinput < ln) 2518*0Sstevel@tonic-gate sayNO; 2519*0Sstevel@tonic-gate if (ln > 1 && (OP(scan) == EXACTF 2520*0Sstevel@tonic-gate ? ibcmp(s, locinput, ln) 2521*0Sstevel@tonic-gate : ibcmp_locale(s, locinput, ln))) 2522*0Sstevel@tonic-gate sayNO; 2523*0Sstevel@tonic-gate locinput += ln; 2524*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2525*0Sstevel@tonic-gate break; 2526*0Sstevel@tonic-gate case ANYOF: 2527*0Sstevel@tonic-gate if (do_utf8) { 2528*0Sstevel@tonic-gate STRLEN inclasslen = PL_regeol - locinput; 2529*0Sstevel@tonic-gate 2530*0Sstevel@tonic-gate if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8)) 2531*0Sstevel@tonic-gate sayNO_ANYOF; 2532*0Sstevel@tonic-gate if (locinput >= PL_regeol) 2533*0Sstevel@tonic-gate sayNO; 2534*0Sstevel@tonic-gate locinput += inclasslen ? inclasslen : UTF8SKIP(locinput); 2535*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2536*0Sstevel@tonic-gate break; 2537*0Sstevel@tonic-gate } 2538*0Sstevel@tonic-gate else { 2539*0Sstevel@tonic-gate if (nextchr < 0) 2540*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2541*0Sstevel@tonic-gate if (!REGINCLASS(scan, (U8*)locinput)) 2542*0Sstevel@tonic-gate sayNO_ANYOF; 2543*0Sstevel@tonic-gate if (!nextchr && locinput >= PL_regeol) 2544*0Sstevel@tonic-gate sayNO; 2545*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput); 2546*0Sstevel@tonic-gate break; 2547*0Sstevel@tonic-gate } 2548*0Sstevel@tonic-gate no_anyof: 2549*0Sstevel@tonic-gate /* If we might have the case of the German sharp s 2550*0Sstevel@tonic-gate * in a casefolding Unicode character class. */ 2551*0Sstevel@tonic-gate 2552*0Sstevel@tonic-gate if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) { 2553*0Sstevel@tonic-gate locinput += SHARP_S_SKIP; 2554*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2555*0Sstevel@tonic-gate } 2556*0Sstevel@tonic-gate else 2557*0Sstevel@tonic-gate sayNO; 2558*0Sstevel@tonic-gate break; 2559*0Sstevel@tonic-gate case ALNUML: 2560*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 2561*0Sstevel@tonic-gate /* FALL THROUGH */ 2562*0Sstevel@tonic-gate case ALNUM: 2563*0Sstevel@tonic-gate if (!nextchr) 2564*0Sstevel@tonic-gate sayNO; 2565*0Sstevel@tonic-gate if (do_utf8) { 2566*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a"); 2567*0Sstevel@tonic-gate if (!(OP(scan) == ALNUM 2568*0Sstevel@tonic-gate ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8) 2569*0Sstevel@tonic-gate : isALNUM_LC_utf8((U8*)locinput))) 2570*0Sstevel@tonic-gate { 2571*0Sstevel@tonic-gate sayNO; 2572*0Sstevel@tonic-gate } 2573*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr]; 2574*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2575*0Sstevel@tonic-gate break; 2576*0Sstevel@tonic-gate } 2577*0Sstevel@tonic-gate if (!(OP(scan) == ALNUM 2578*0Sstevel@tonic-gate ? isALNUM(nextchr) : isALNUM_LC(nextchr))) 2579*0Sstevel@tonic-gate sayNO; 2580*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput); 2581*0Sstevel@tonic-gate break; 2582*0Sstevel@tonic-gate case NALNUML: 2583*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 2584*0Sstevel@tonic-gate /* FALL THROUGH */ 2585*0Sstevel@tonic-gate case NALNUM: 2586*0Sstevel@tonic-gate if (!nextchr && locinput >= PL_regeol) 2587*0Sstevel@tonic-gate sayNO; 2588*0Sstevel@tonic-gate if (do_utf8) { 2589*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a"); 2590*0Sstevel@tonic-gate if (OP(scan) == NALNUM 2591*0Sstevel@tonic-gate ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8) 2592*0Sstevel@tonic-gate : isALNUM_LC_utf8((U8*)locinput)) 2593*0Sstevel@tonic-gate { 2594*0Sstevel@tonic-gate sayNO; 2595*0Sstevel@tonic-gate } 2596*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr]; 2597*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2598*0Sstevel@tonic-gate break; 2599*0Sstevel@tonic-gate } 2600*0Sstevel@tonic-gate if (OP(scan) == NALNUM 2601*0Sstevel@tonic-gate ? isALNUM(nextchr) : isALNUM_LC(nextchr)) 2602*0Sstevel@tonic-gate sayNO; 2603*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput); 2604*0Sstevel@tonic-gate break; 2605*0Sstevel@tonic-gate case BOUNDL: 2606*0Sstevel@tonic-gate case NBOUNDL: 2607*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 2608*0Sstevel@tonic-gate /* FALL THROUGH */ 2609*0Sstevel@tonic-gate case BOUND: 2610*0Sstevel@tonic-gate case NBOUND: 2611*0Sstevel@tonic-gate /* was last char in word? */ 2612*0Sstevel@tonic-gate if (do_utf8) { 2613*0Sstevel@tonic-gate if (locinput == PL_bostr) 2614*0Sstevel@tonic-gate ln = '\n'; 2615*0Sstevel@tonic-gate else { 2616*0Sstevel@tonic-gate U8 *r = reghop3((U8*)locinput, -1, (U8*)PL_bostr); 2617*0Sstevel@tonic-gate 2618*0Sstevel@tonic-gate ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0); 2619*0Sstevel@tonic-gate } 2620*0Sstevel@tonic-gate if (OP(scan) == BOUND || OP(scan) == NBOUND) { 2621*0Sstevel@tonic-gate ln = isALNUM_uni(ln); 2622*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a"); 2623*0Sstevel@tonic-gate n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8); 2624*0Sstevel@tonic-gate } 2625*0Sstevel@tonic-gate else { 2626*0Sstevel@tonic-gate ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln)); 2627*0Sstevel@tonic-gate n = isALNUM_LC_utf8((U8*)locinput); 2628*0Sstevel@tonic-gate } 2629*0Sstevel@tonic-gate } 2630*0Sstevel@tonic-gate else { 2631*0Sstevel@tonic-gate ln = (locinput != PL_bostr) ? 2632*0Sstevel@tonic-gate UCHARAT(locinput - 1) : '\n'; 2633*0Sstevel@tonic-gate if (OP(scan) == BOUND || OP(scan) == NBOUND) { 2634*0Sstevel@tonic-gate ln = isALNUM(ln); 2635*0Sstevel@tonic-gate n = isALNUM(nextchr); 2636*0Sstevel@tonic-gate } 2637*0Sstevel@tonic-gate else { 2638*0Sstevel@tonic-gate ln = isALNUM_LC(ln); 2639*0Sstevel@tonic-gate n = isALNUM_LC(nextchr); 2640*0Sstevel@tonic-gate } 2641*0Sstevel@tonic-gate } 2642*0Sstevel@tonic-gate if (((!ln) == (!n)) == (OP(scan) == BOUND || 2643*0Sstevel@tonic-gate OP(scan) == BOUNDL)) 2644*0Sstevel@tonic-gate sayNO; 2645*0Sstevel@tonic-gate break; 2646*0Sstevel@tonic-gate case SPACEL: 2647*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 2648*0Sstevel@tonic-gate /* FALL THROUGH */ 2649*0Sstevel@tonic-gate case SPACE: 2650*0Sstevel@tonic-gate if (!nextchr) 2651*0Sstevel@tonic-gate sayNO; 2652*0Sstevel@tonic-gate if (do_utf8) { 2653*0Sstevel@tonic-gate if (UTF8_IS_CONTINUED(nextchr)) { 2654*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(space," "); 2655*0Sstevel@tonic-gate if (!(OP(scan) == SPACE 2656*0Sstevel@tonic-gate ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8) 2657*0Sstevel@tonic-gate : isSPACE_LC_utf8((U8*)locinput))) 2658*0Sstevel@tonic-gate { 2659*0Sstevel@tonic-gate sayNO; 2660*0Sstevel@tonic-gate } 2661*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr]; 2662*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2663*0Sstevel@tonic-gate break; 2664*0Sstevel@tonic-gate } 2665*0Sstevel@tonic-gate if (!(OP(scan) == SPACE 2666*0Sstevel@tonic-gate ? isSPACE(nextchr) : isSPACE_LC(nextchr))) 2667*0Sstevel@tonic-gate sayNO; 2668*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput); 2669*0Sstevel@tonic-gate } 2670*0Sstevel@tonic-gate else { 2671*0Sstevel@tonic-gate if (!(OP(scan) == SPACE 2672*0Sstevel@tonic-gate ? isSPACE(nextchr) : isSPACE_LC(nextchr))) 2673*0Sstevel@tonic-gate sayNO; 2674*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput); 2675*0Sstevel@tonic-gate } 2676*0Sstevel@tonic-gate break; 2677*0Sstevel@tonic-gate case NSPACEL: 2678*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 2679*0Sstevel@tonic-gate /* FALL THROUGH */ 2680*0Sstevel@tonic-gate case NSPACE: 2681*0Sstevel@tonic-gate if (!nextchr && locinput >= PL_regeol) 2682*0Sstevel@tonic-gate sayNO; 2683*0Sstevel@tonic-gate if (do_utf8) { 2684*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(space," "); 2685*0Sstevel@tonic-gate if (OP(scan) == NSPACE 2686*0Sstevel@tonic-gate ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8) 2687*0Sstevel@tonic-gate : isSPACE_LC_utf8((U8*)locinput)) 2688*0Sstevel@tonic-gate { 2689*0Sstevel@tonic-gate sayNO; 2690*0Sstevel@tonic-gate } 2691*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr]; 2692*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2693*0Sstevel@tonic-gate break; 2694*0Sstevel@tonic-gate } 2695*0Sstevel@tonic-gate if (OP(scan) == NSPACE 2696*0Sstevel@tonic-gate ? isSPACE(nextchr) : isSPACE_LC(nextchr)) 2697*0Sstevel@tonic-gate sayNO; 2698*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput); 2699*0Sstevel@tonic-gate break; 2700*0Sstevel@tonic-gate case DIGITL: 2701*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 2702*0Sstevel@tonic-gate /* FALL THROUGH */ 2703*0Sstevel@tonic-gate case DIGIT: 2704*0Sstevel@tonic-gate if (!nextchr) 2705*0Sstevel@tonic-gate sayNO; 2706*0Sstevel@tonic-gate if (do_utf8) { 2707*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(digit,"0"); 2708*0Sstevel@tonic-gate if (!(OP(scan) == DIGIT 2709*0Sstevel@tonic-gate ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8) 2710*0Sstevel@tonic-gate : isDIGIT_LC_utf8((U8*)locinput))) 2711*0Sstevel@tonic-gate { 2712*0Sstevel@tonic-gate sayNO; 2713*0Sstevel@tonic-gate } 2714*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr]; 2715*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2716*0Sstevel@tonic-gate break; 2717*0Sstevel@tonic-gate } 2718*0Sstevel@tonic-gate if (!(OP(scan) == DIGIT 2719*0Sstevel@tonic-gate ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))) 2720*0Sstevel@tonic-gate sayNO; 2721*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput); 2722*0Sstevel@tonic-gate break; 2723*0Sstevel@tonic-gate case NDIGITL: 2724*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 2725*0Sstevel@tonic-gate /* FALL THROUGH */ 2726*0Sstevel@tonic-gate case NDIGIT: 2727*0Sstevel@tonic-gate if (!nextchr && locinput >= PL_regeol) 2728*0Sstevel@tonic-gate sayNO; 2729*0Sstevel@tonic-gate if (do_utf8) { 2730*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(digit,"0"); 2731*0Sstevel@tonic-gate if (OP(scan) == NDIGIT 2732*0Sstevel@tonic-gate ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8) 2733*0Sstevel@tonic-gate : isDIGIT_LC_utf8((U8*)locinput)) 2734*0Sstevel@tonic-gate { 2735*0Sstevel@tonic-gate sayNO; 2736*0Sstevel@tonic-gate } 2737*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr]; 2738*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2739*0Sstevel@tonic-gate break; 2740*0Sstevel@tonic-gate } 2741*0Sstevel@tonic-gate if (OP(scan) == NDIGIT 2742*0Sstevel@tonic-gate ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)) 2743*0Sstevel@tonic-gate sayNO; 2744*0Sstevel@tonic-gate nextchr = UCHARAT(++locinput); 2745*0Sstevel@tonic-gate break; 2746*0Sstevel@tonic-gate case CLUMP: 2747*0Sstevel@tonic-gate if (locinput >= PL_regeol) 2748*0Sstevel@tonic-gate sayNO; 2749*0Sstevel@tonic-gate if (do_utf8) { 2750*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(mark,"~"); 2751*0Sstevel@tonic-gate if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8)) 2752*0Sstevel@tonic-gate sayNO; 2753*0Sstevel@tonic-gate locinput += PL_utf8skip[nextchr]; 2754*0Sstevel@tonic-gate while (locinput < PL_regeol && 2755*0Sstevel@tonic-gate swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8)) 2756*0Sstevel@tonic-gate locinput += UTF8SKIP(locinput); 2757*0Sstevel@tonic-gate if (locinput > PL_regeol) 2758*0Sstevel@tonic-gate sayNO; 2759*0Sstevel@tonic-gate } 2760*0Sstevel@tonic-gate else 2761*0Sstevel@tonic-gate locinput++; 2762*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2763*0Sstevel@tonic-gate break; 2764*0Sstevel@tonic-gate case REFFL: 2765*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 2766*0Sstevel@tonic-gate /* FALL THROUGH */ 2767*0Sstevel@tonic-gate case REF: 2768*0Sstevel@tonic-gate case REFF: 2769*0Sstevel@tonic-gate n = ARG(scan); /* which paren pair */ 2770*0Sstevel@tonic-gate ln = PL_regstartp[n]; 2771*0Sstevel@tonic-gate PL_reg_leftiter = PL_reg_maxiter; /* Void cache */ 2772*0Sstevel@tonic-gate if ((I32)*PL_reglastparen < n || ln == -1) 2773*0Sstevel@tonic-gate sayNO; /* Do not match unless seen CLOSEn. */ 2774*0Sstevel@tonic-gate if (ln == PL_regendp[n]) 2775*0Sstevel@tonic-gate break; 2776*0Sstevel@tonic-gate 2777*0Sstevel@tonic-gate s = PL_bostr + ln; 2778*0Sstevel@tonic-gate if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */ 2779*0Sstevel@tonic-gate char *l = locinput; 2780*0Sstevel@tonic-gate char *e = PL_bostr + PL_regendp[n]; 2781*0Sstevel@tonic-gate /* 2782*0Sstevel@tonic-gate * Note that we can't do the "other character" lookup trick as 2783*0Sstevel@tonic-gate * in the 8-bit case (no pun intended) because in Unicode we 2784*0Sstevel@tonic-gate * have to map both upper and title case to lower case. 2785*0Sstevel@tonic-gate */ 2786*0Sstevel@tonic-gate if (OP(scan) == REFF) { 2787*0Sstevel@tonic-gate STRLEN ulen1, ulen2; 2788*0Sstevel@tonic-gate U8 tmpbuf1[UTF8_MAXLEN_UCLC+1]; 2789*0Sstevel@tonic-gate U8 tmpbuf2[UTF8_MAXLEN_UCLC+1]; 2790*0Sstevel@tonic-gate while (s < e) { 2791*0Sstevel@tonic-gate if (l >= PL_regeol) 2792*0Sstevel@tonic-gate sayNO; 2793*0Sstevel@tonic-gate toLOWER_utf8((U8*)s, tmpbuf1, &ulen1); 2794*0Sstevel@tonic-gate toLOWER_utf8((U8*)l, tmpbuf2, &ulen2); 2795*0Sstevel@tonic-gate if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1)) 2796*0Sstevel@tonic-gate sayNO; 2797*0Sstevel@tonic-gate s += ulen1; 2798*0Sstevel@tonic-gate l += ulen2; 2799*0Sstevel@tonic-gate } 2800*0Sstevel@tonic-gate } 2801*0Sstevel@tonic-gate locinput = l; 2802*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2803*0Sstevel@tonic-gate break; 2804*0Sstevel@tonic-gate } 2805*0Sstevel@tonic-gate 2806*0Sstevel@tonic-gate /* Inline the first character, for speed. */ 2807*0Sstevel@tonic-gate if (UCHARAT(s) != nextchr && 2808*0Sstevel@tonic-gate (OP(scan) == REF || 2809*0Sstevel@tonic-gate (UCHARAT(s) != ((OP(scan) == REFF 2810*0Sstevel@tonic-gate ? PL_fold : PL_fold_locale)[nextchr])))) 2811*0Sstevel@tonic-gate sayNO; 2812*0Sstevel@tonic-gate ln = PL_regendp[n] - ln; 2813*0Sstevel@tonic-gate if (locinput + ln > PL_regeol) 2814*0Sstevel@tonic-gate sayNO; 2815*0Sstevel@tonic-gate if (ln > 1 && (OP(scan) == REF 2816*0Sstevel@tonic-gate ? memNE(s, locinput, ln) 2817*0Sstevel@tonic-gate : (OP(scan) == REFF 2818*0Sstevel@tonic-gate ? ibcmp(s, locinput, ln) 2819*0Sstevel@tonic-gate : ibcmp_locale(s, locinput, ln)))) 2820*0Sstevel@tonic-gate sayNO; 2821*0Sstevel@tonic-gate locinput += ln; 2822*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 2823*0Sstevel@tonic-gate break; 2824*0Sstevel@tonic-gate 2825*0Sstevel@tonic-gate case NOTHING: 2826*0Sstevel@tonic-gate case TAIL: 2827*0Sstevel@tonic-gate break; 2828*0Sstevel@tonic-gate case BACK: 2829*0Sstevel@tonic-gate break; 2830*0Sstevel@tonic-gate case EVAL: 2831*0Sstevel@tonic-gate { 2832*0Sstevel@tonic-gate dSP; 2833*0Sstevel@tonic-gate OP_4tree *oop = PL_op; 2834*0Sstevel@tonic-gate COP *ocurcop = PL_curcop; 2835*0Sstevel@tonic-gate PAD *old_comppad; 2836*0Sstevel@tonic-gate SV *ret; 2837*0Sstevel@tonic-gate struct regexp *oreg = PL_reg_re; 2838*0Sstevel@tonic-gate 2839*0Sstevel@tonic-gate n = ARG(scan); 2840*0Sstevel@tonic-gate PL_op = (OP_4tree*)PL_regdata->data[n]; 2841*0Sstevel@tonic-gate DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) ); 2842*0Sstevel@tonic-gate PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]); 2843*0Sstevel@tonic-gate PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr; 2844*0Sstevel@tonic-gate 2845*0Sstevel@tonic-gate { 2846*0Sstevel@tonic-gate SV **before = SP; 2847*0Sstevel@tonic-gate CALLRUNOPS(aTHX); /* Scalar context. */ 2848*0Sstevel@tonic-gate SPAGAIN; 2849*0Sstevel@tonic-gate if (SP == before) 2850*0Sstevel@tonic-gate ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */ 2851*0Sstevel@tonic-gate else { 2852*0Sstevel@tonic-gate ret = POPs; 2853*0Sstevel@tonic-gate PUTBACK; 2854*0Sstevel@tonic-gate } 2855*0Sstevel@tonic-gate } 2856*0Sstevel@tonic-gate 2857*0Sstevel@tonic-gate PL_op = oop; 2858*0Sstevel@tonic-gate PAD_RESTORE_LOCAL(old_comppad); 2859*0Sstevel@tonic-gate PL_curcop = ocurcop; 2860*0Sstevel@tonic-gate if (logical) { 2861*0Sstevel@tonic-gate if (logical == 2) { /* Postponed subexpression. */ 2862*0Sstevel@tonic-gate regexp *re; 2863*0Sstevel@tonic-gate MAGIC *mg = Null(MAGIC*); 2864*0Sstevel@tonic-gate re_cc_state state; 2865*0Sstevel@tonic-gate CHECKPOINT cp, lastcp; 2866*0Sstevel@tonic-gate int toggleutf; 2867*0Sstevel@tonic-gate register SV *sv; 2868*0Sstevel@tonic-gate 2869*0Sstevel@tonic-gate if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret))) 2870*0Sstevel@tonic-gate mg = mg_find(sv, PERL_MAGIC_qr); 2871*0Sstevel@tonic-gate else if (SvSMAGICAL(ret)) { 2872*0Sstevel@tonic-gate if (SvGMAGICAL(ret)) 2873*0Sstevel@tonic-gate sv_unmagic(ret, PERL_MAGIC_qr); 2874*0Sstevel@tonic-gate else 2875*0Sstevel@tonic-gate mg = mg_find(ret, PERL_MAGIC_qr); 2876*0Sstevel@tonic-gate } 2877*0Sstevel@tonic-gate 2878*0Sstevel@tonic-gate if (mg) { 2879*0Sstevel@tonic-gate re = (regexp *)mg->mg_obj; 2880*0Sstevel@tonic-gate (void)ReREFCNT_inc(re); 2881*0Sstevel@tonic-gate } 2882*0Sstevel@tonic-gate else { 2883*0Sstevel@tonic-gate STRLEN len; 2884*0Sstevel@tonic-gate char *t = SvPV(ret, len); 2885*0Sstevel@tonic-gate PMOP pm; 2886*0Sstevel@tonic-gate char *oprecomp = PL_regprecomp; 2887*0Sstevel@tonic-gate I32 osize = PL_regsize; 2888*0Sstevel@tonic-gate I32 onpar = PL_regnpar; 2889*0Sstevel@tonic-gate 2890*0Sstevel@tonic-gate Zero(&pm, 1, PMOP); 2891*0Sstevel@tonic-gate if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8; 2892*0Sstevel@tonic-gate re = CALLREGCOMP(aTHX_ t, t + len, &pm); 2893*0Sstevel@tonic-gate if (!(SvFLAGS(ret) 2894*0Sstevel@tonic-gate & (SVs_TEMP | SVs_PADTMP | SVf_READONLY 2895*0Sstevel@tonic-gate | SVs_GMG))) 2896*0Sstevel@tonic-gate sv_magic(ret,(SV*)ReREFCNT_inc(re), 2897*0Sstevel@tonic-gate PERL_MAGIC_qr,0,0); 2898*0Sstevel@tonic-gate PL_regprecomp = oprecomp; 2899*0Sstevel@tonic-gate PL_regsize = osize; 2900*0Sstevel@tonic-gate PL_regnpar = onpar; 2901*0Sstevel@tonic-gate } 2902*0Sstevel@tonic-gate DEBUG_r( 2903*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 2904*0Sstevel@tonic-gate "Entering embedded `%s%.60s%s%s'\n", 2905*0Sstevel@tonic-gate PL_colors[0], 2906*0Sstevel@tonic-gate re->precomp, 2907*0Sstevel@tonic-gate PL_colors[1], 2908*0Sstevel@tonic-gate (strlen(re->precomp) > 60 ? "..." : "")) 2909*0Sstevel@tonic-gate ); 2910*0Sstevel@tonic-gate state.node = next; 2911*0Sstevel@tonic-gate state.prev = PL_reg_call_cc; 2912*0Sstevel@tonic-gate state.cc = PL_regcc; 2913*0Sstevel@tonic-gate state.re = PL_reg_re; 2914*0Sstevel@tonic-gate 2915*0Sstevel@tonic-gate PL_regcc = 0; 2916*0Sstevel@tonic-gate 2917*0Sstevel@tonic-gate cp = regcppush(0); /* Save *all* the positions. */ 2918*0Sstevel@tonic-gate REGCP_SET(lastcp); 2919*0Sstevel@tonic-gate cache_re(re); 2920*0Sstevel@tonic-gate state.ss = PL_savestack_ix; 2921*0Sstevel@tonic-gate *PL_reglastparen = 0; 2922*0Sstevel@tonic-gate *PL_reglastcloseparen = 0; 2923*0Sstevel@tonic-gate PL_reg_call_cc = &state; 2924*0Sstevel@tonic-gate PL_reginput = locinput; 2925*0Sstevel@tonic-gate toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^ 2926*0Sstevel@tonic-gate ((re->reganch & ROPT_UTF8) != 0); 2927*0Sstevel@tonic-gate if (toggleutf) PL_reg_flags ^= RF_utf8; 2928*0Sstevel@tonic-gate 2929*0Sstevel@tonic-gate /* XXXX This is too dramatic a measure... */ 2930*0Sstevel@tonic-gate PL_reg_maxiter = 0; 2931*0Sstevel@tonic-gate 2932*0Sstevel@tonic-gate if (regmatch(re->program + 1)) { 2933*0Sstevel@tonic-gate /* Even though we succeeded, we need to restore 2934*0Sstevel@tonic-gate global variables, since we may be wrapped inside 2935*0Sstevel@tonic-gate SUSPEND, thus the match may be not finished yet. */ 2936*0Sstevel@tonic-gate 2937*0Sstevel@tonic-gate /* XXXX Do this only if SUSPENDed? */ 2938*0Sstevel@tonic-gate PL_reg_call_cc = state.prev; 2939*0Sstevel@tonic-gate PL_regcc = state.cc; 2940*0Sstevel@tonic-gate PL_reg_re = state.re; 2941*0Sstevel@tonic-gate cache_re(PL_reg_re); 2942*0Sstevel@tonic-gate if (toggleutf) PL_reg_flags ^= RF_utf8; 2943*0Sstevel@tonic-gate 2944*0Sstevel@tonic-gate /* XXXX This is too dramatic a measure... */ 2945*0Sstevel@tonic-gate PL_reg_maxiter = 0; 2946*0Sstevel@tonic-gate 2947*0Sstevel@tonic-gate /* These are needed even if not SUSPEND. */ 2948*0Sstevel@tonic-gate ReREFCNT_dec(re); 2949*0Sstevel@tonic-gate regcpblow(cp); 2950*0Sstevel@tonic-gate sayYES; 2951*0Sstevel@tonic-gate } 2952*0Sstevel@tonic-gate ReREFCNT_dec(re); 2953*0Sstevel@tonic-gate REGCP_UNWIND(lastcp); 2954*0Sstevel@tonic-gate regcppop(); 2955*0Sstevel@tonic-gate PL_reg_call_cc = state.prev; 2956*0Sstevel@tonic-gate PL_regcc = state.cc; 2957*0Sstevel@tonic-gate PL_reg_re = state.re; 2958*0Sstevel@tonic-gate cache_re(PL_reg_re); 2959*0Sstevel@tonic-gate if (toggleutf) PL_reg_flags ^= RF_utf8; 2960*0Sstevel@tonic-gate 2961*0Sstevel@tonic-gate /* XXXX This is too dramatic a measure... */ 2962*0Sstevel@tonic-gate PL_reg_maxiter = 0; 2963*0Sstevel@tonic-gate 2964*0Sstevel@tonic-gate logical = 0; 2965*0Sstevel@tonic-gate sayNO; 2966*0Sstevel@tonic-gate } 2967*0Sstevel@tonic-gate sw = SvTRUE(ret); 2968*0Sstevel@tonic-gate logical = 0; 2969*0Sstevel@tonic-gate } 2970*0Sstevel@tonic-gate else { 2971*0Sstevel@tonic-gate sv_setsv(save_scalar(PL_replgv), ret); 2972*0Sstevel@tonic-gate cache_re(oreg); 2973*0Sstevel@tonic-gate } 2974*0Sstevel@tonic-gate break; 2975*0Sstevel@tonic-gate } 2976*0Sstevel@tonic-gate case OPEN: 2977*0Sstevel@tonic-gate n = ARG(scan); /* which paren pair */ 2978*0Sstevel@tonic-gate PL_reg_start_tmp[n] = locinput; 2979*0Sstevel@tonic-gate if (n > PL_regsize) 2980*0Sstevel@tonic-gate PL_regsize = n; 2981*0Sstevel@tonic-gate break; 2982*0Sstevel@tonic-gate case CLOSE: 2983*0Sstevel@tonic-gate n = ARG(scan); /* which paren pair */ 2984*0Sstevel@tonic-gate PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr; 2985*0Sstevel@tonic-gate PL_regendp[n] = locinput - PL_bostr; 2986*0Sstevel@tonic-gate if (n > (I32)*PL_reglastparen) 2987*0Sstevel@tonic-gate *PL_reglastparen = n; 2988*0Sstevel@tonic-gate *PL_reglastcloseparen = n; 2989*0Sstevel@tonic-gate break; 2990*0Sstevel@tonic-gate case GROUPP: 2991*0Sstevel@tonic-gate n = ARG(scan); /* which paren pair */ 2992*0Sstevel@tonic-gate sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1); 2993*0Sstevel@tonic-gate break; 2994*0Sstevel@tonic-gate case IFTHEN: 2995*0Sstevel@tonic-gate PL_reg_leftiter = PL_reg_maxiter; /* Void cache */ 2996*0Sstevel@tonic-gate if (sw) 2997*0Sstevel@tonic-gate next = NEXTOPER(NEXTOPER(scan)); 2998*0Sstevel@tonic-gate else { 2999*0Sstevel@tonic-gate next = scan + ARG(scan); 3000*0Sstevel@tonic-gate if (OP(next) == IFTHEN) /* Fake one. */ 3001*0Sstevel@tonic-gate next = NEXTOPER(NEXTOPER(next)); 3002*0Sstevel@tonic-gate } 3003*0Sstevel@tonic-gate break; 3004*0Sstevel@tonic-gate case LOGICAL: 3005*0Sstevel@tonic-gate logical = scan->flags; 3006*0Sstevel@tonic-gate break; 3007*0Sstevel@tonic-gate /******************************************************************* 3008*0Sstevel@tonic-gate PL_regcc contains infoblock about the innermost (...)* loop, and 3009*0Sstevel@tonic-gate a pointer to the next outer infoblock. 3010*0Sstevel@tonic-gate 3011*0Sstevel@tonic-gate Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM): 3012*0Sstevel@tonic-gate 3013*0Sstevel@tonic-gate 1) After matching X, regnode for CURLYX is processed; 3014*0Sstevel@tonic-gate 3015*0Sstevel@tonic-gate 2) This regnode creates infoblock on the stack, and calls 3016*0Sstevel@tonic-gate regmatch() recursively with the starting point at WHILEM node; 3017*0Sstevel@tonic-gate 3018*0Sstevel@tonic-gate 3) Each hit of WHILEM node tries to match A and Z (in the order 3019*0Sstevel@tonic-gate depending on the current iteration, min/max of {min,max} and 3020*0Sstevel@tonic-gate greediness). The information about where are nodes for "A" 3021*0Sstevel@tonic-gate and "Z" is read from the infoblock, as is info on how many times "A" 3022*0Sstevel@tonic-gate was already matched, and greediness. 3023*0Sstevel@tonic-gate 3024*0Sstevel@tonic-gate 4) After A matches, the same WHILEM node is hit again. 3025*0Sstevel@tonic-gate 3026*0Sstevel@tonic-gate 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX 3027*0Sstevel@tonic-gate of the same pair. Thus when WHILEM tries to match Z, it temporarily 3028*0Sstevel@tonic-gate resets PL_regcc, since this Y(A)*Z can be a part of some other loop: 3029*0Sstevel@tonic-gate as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node 3030*0Sstevel@tonic-gate of the external loop. 3031*0Sstevel@tonic-gate 3032*0Sstevel@tonic-gate Currently present infoblocks form a tree with a stem formed by PL_curcc 3033*0Sstevel@tonic-gate and whatever it mentions via ->next, and additional attached trees 3034*0Sstevel@tonic-gate corresponding to temporarily unset infoblocks as in "5" above. 3035*0Sstevel@tonic-gate 3036*0Sstevel@tonic-gate In the following picture infoblocks for outer loop of 3037*0Sstevel@tonic-gate (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block 3038*0Sstevel@tonic-gate is denoted by x. The matched string is YAAZYAZT. Temporarily postponed 3039*0Sstevel@tonic-gate infoblocks are drawn below the "reset" infoblock. 3040*0Sstevel@tonic-gate 3041*0Sstevel@tonic-gate In fact in the picture below we do not show failed matches for Z and T 3042*0Sstevel@tonic-gate by WHILEM blocks. [We illustrate minimal matches, since for them it is 3043*0Sstevel@tonic-gate more obvious *why* one needs to *temporary* unset infoblocks.] 3044*0Sstevel@tonic-gate 3045*0Sstevel@tonic-gate Matched REx position InfoBlocks Comment 3046*0Sstevel@tonic-gate (Y(A)*?Z)*?T x 3047*0Sstevel@tonic-gate Y(A)*?Z)*?T x <- O 3048*0Sstevel@tonic-gate Y (A)*?Z)*?T x <- O 3049*0Sstevel@tonic-gate Y A)*?Z)*?T x <- O <- I 3050*0Sstevel@tonic-gate YA )*?Z)*?T x <- O <- I 3051*0Sstevel@tonic-gate YA A)*?Z)*?T x <- O <- I 3052*0Sstevel@tonic-gate YAA )*?Z)*?T x <- O <- I 3053*0Sstevel@tonic-gate YAA Z)*?T x <- O # Temporary unset I 3054*0Sstevel@tonic-gate I 3055*0Sstevel@tonic-gate 3056*0Sstevel@tonic-gate YAAZ Y(A)*?Z)*?T x <- O 3057*0Sstevel@tonic-gate I 3058*0Sstevel@tonic-gate 3059*0Sstevel@tonic-gate YAAZY (A)*?Z)*?T x <- O 3060*0Sstevel@tonic-gate I 3061*0Sstevel@tonic-gate 3062*0Sstevel@tonic-gate YAAZY A)*?Z)*?T x <- O <- I 3063*0Sstevel@tonic-gate I 3064*0Sstevel@tonic-gate 3065*0Sstevel@tonic-gate YAAZYA )*?Z)*?T x <- O <- I 3066*0Sstevel@tonic-gate I 3067*0Sstevel@tonic-gate 3068*0Sstevel@tonic-gate YAAZYA Z)*?T x <- O # Temporary unset I 3069*0Sstevel@tonic-gate I,I 3070*0Sstevel@tonic-gate 3071*0Sstevel@tonic-gate YAAZYAZ )*?T x <- O 3072*0Sstevel@tonic-gate I,I 3073*0Sstevel@tonic-gate 3074*0Sstevel@tonic-gate YAAZYAZ T x # Temporary unset O 3075*0Sstevel@tonic-gate O 3076*0Sstevel@tonic-gate I,I 3077*0Sstevel@tonic-gate 3078*0Sstevel@tonic-gate YAAZYAZT x 3079*0Sstevel@tonic-gate O 3080*0Sstevel@tonic-gate I,I 3081*0Sstevel@tonic-gate *******************************************************************/ 3082*0Sstevel@tonic-gate case CURLYX: { 3083*0Sstevel@tonic-gate CURCUR cc; 3084*0Sstevel@tonic-gate CHECKPOINT cp = PL_savestack_ix; 3085*0Sstevel@tonic-gate /* No need to save/restore up to this paren */ 3086*0Sstevel@tonic-gate I32 parenfloor = scan->flags; 3087*0Sstevel@tonic-gate 3088*0Sstevel@tonic-gate if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */ 3089*0Sstevel@tonic-gate next += ARG(next); 3090*0Sstevel@tonic-gate cc.oldcc = PL_regcc; 3091*0Sstevel@tonic-gate PL_regcc = &cc; 3092*0Sstevel@tonic-gate /* XXXX Probably it is better to teach regpush to support 3093*0Sstevel@tonic-gate parenfloor > PL_regsize... */ 3094*0Sstevel@tonic-gate if (parenfloor > (I32)*PL_reglastparen) 3095*0Sstevel@tonic-gate parenfloor = *PL_reglastparen; /* Pessimization... */ 3096*0Sstevel@tonic-gate cc.parenfloor = parenfloor; 3097*0Sstevel@tonic-gate cc.cur = -1; 3098*0Sstevel@tonic-gate cc.min = ARG1(scan); 3099*0Sstevel@tonic-gate cc.max = ARG2(scan); 3100*0Sstevel@tonic-gate cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS; 3101*0Sstevel@tonic-gate cc.next = next; 3102*0Sstevel@tonic-gate cc.minmod = minmod; 3103*0Sstevel@tonic-gate cc.lastloc = 0; 3104*0Sstevel@tonic-gate PL_reginput = locinput; 3105*0Sstevel@tonic-gate n = regmatch(PREVOPER(next)); /* start on the WHILEM */ 3106*0Sstevel@tonic-gate regcpblow(cp); 3107*0Sstevel@tonic-gate PL_regcc = cc.oldcc; 3108*0Sstevel@tonic-gate saySAME(n); 3109*0Sstevel@tonic-gate } 3110*0Sstevel@tonic-gate /* NOT REACHED */ 3111*0Sstevel@tonic-gate case WHILEM: { 3112*0Sstevel@tonic-gate /* 3113*0Sstevel@tonic-gate * This is really hard to understand, because after we match 3114*0Sstevel@tonic-gate * what we're trying to match, we must make sure the rest of 3115*0Sstevel@tonic-gate * the REx is going to match for sure, and to do that we have 3116*0Sstevel@tonic-gate * to go back UP the parse tree by recursing ever deeper. And 3117*0Sstevel@tonic-gate * if it fails, we have to reset our parent's current state 3118*0Sstevel@tonic-gate * that we can try again after backing off. 3119*0Sstevel@tonic-gate */ 3120*0Sstevel@tonic-gate 3121*0Sstevel@tonic-gate CHECKPOINT cp, lastcp; 3122*0Sstevel@tonic-gate CURCUR* cc = PL_regcc; 3123*0Sstevel@tonic-gate char *lastloc = cc->lastloc; /* Detection of 0-len. */ 3124*0Sstevel@tonic-gate 3125*0Sstevel@tonic-gate n = cc->cur + 1; /* how many we know we matched */ 3126*0Sstevel@tonic-gate PL_reginput = locinput; 3127*0Sstevel@tonic-gate 3128*0Sstevel@tonic-gate DEBUG_r( 3129*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 3130*0Sstevel@tonic-gate "%*s %ld out of %ld..%ld cc=%"UVxf"\n", 3131*0Sstevel@tonic-gate REPORT_CODE_OFF+PL_regindent*2, "", 3132*0Sstevel@tonic-gate (long)n, (long)cc->min, 3133*0Sstevel@tonic-gate (long)cc->max, PTR2UV(cc)) 3134*0Sstevel@tonic-gate ); 3135*0Sstevel@tonic-gate 3136*0Sstevel@tonic-gate /* If degenerate scan matches "", assume scan done. */ 3137*0Sstevel@tonic-gate 3138*0Sstevel@tonic-gate if (locinput == cc->lastloc && n >= cc->min) { 3139*0Sstevel@tonic-gate PL_regcc = cc->oldcc; 3140*0Sstevel@tonic-gate if (PL_regcc) 3141*0Sstevel@tonic-gate ln = PL_regcc->cur; 3142*0Sstevel@tonic-gate DEBUG_r( 3143*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 3144*0Sstevel@tonic-gate "%*s empty match detected, try continuation...\n", 3145*0Sstevel@tonic-gate REPORT_CODE_OFF+PL_regindent*2, "") 3146*0Sstevel@tonic-gate ); 3147*0Sstevel@tonic-gate if (regmatch(cc->next)) 3148*0Sstevel@tonic-gate sayYES; 3149*0Sstevel@tonic-gate if (PL_regcc) 3150*0Sstevel@tonic-gate PL_regcc->cur = ln; 3151*0Sstevel@tonic-gate PL_regcc = cc; 3152*0Sstevel@tonic-gate sayNO; 3153*0Sstevel@tonic-gate } 3154*0Sstevel@tonic-gate 3155*0Sstevel@tonic-gate /* First just match a string of min scans. */ 3156*0Sstevel@tonic-gate 3157*0Sstevel@tonic-gate if (n < cc->min) { 3158*0Sstevel@tonic-gate cc->cur = n; 3159*0Sstevel@tonic-gate cc->lastloc = locinput; 3160*0Sstevel@tonic-gate if (regmatch(cc->scan)) 3161*0Sstevel@tonic-gate sayYES; 3162*0Sstevel@tonic-gate cc->cur = n - 1; 3163*0Sstevel@tonic-gate cc->lastloc = lastloc; 3164*0Sstevel@tonic-gate sayNO; 3165*0Sstevel@tonic-gate } 3166*0Sstevel@tonic-gate 3167*0Sstevel@tonic-gate if (scan->flags) { 3168*0Sstevel@tonic-gate /* Check whether we already were at this position. 3169*0Sstevel@tonic-gate Postpone detection until we know the match is not 3170*0Sstevel@tonic-gate *that* much linear. */ 3171*0Sstevel@tonic-gate if (!PL_reg_maxiter) { 3172*0Sstevel@tonic-gate PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4); 3173*0Sstevel@tonic-gate PL_reg_leftiter = PL_reg_maxiter; 3174*0Sstevel@tonic-gate } 3175*0Sstevel@tonic-gate if (PL_reg_leftiter-- == 0) { 3176*0Sstevel@tonic-gate I32 size = (PL_reg_maxiter + 7)/8; 3177*0Sstevel@tonic-gate if (PL_reg_poscache) { 3178*0Sstevel@tonic-gate if ((I32)PL_reg_poscache_size < size) { 3179*0Sstevel@tonic-gate Renew(PL_reg_poscache, size, char); 3180*0Sstevel@tonic-gate PL_reg_poscache_size = size; 3181*0Sstevel@tonic-gate } 3182*0Sstevel@tonic-gate Zero(PL_reg_poscache, size, char); 3183*0Sstevel@tonic-gate } 3184*0Sstevel@tonic-gate else { 3185*0Sstevel@tonic-gate PL_reg_poscache_size = size; 3186*0Sstevel@tonic-gate Newz(29, PL_reg_poscache, size, char); 3187*0Sstevel@tonic-gate } 3188*0Sstevel@tonic-gate DEBUG_r( 3189*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 3190*0Sstevel@tonic-gate "%sDetected a super-linear match, switching on caching%s...\n", 3191*0Sstevel@tonic-gate PL_colors[4], PL_colors[5]) 3192*0Sstevel@tonic-gate ); 3193*0Sstevel@tonic-gate } 3194*0Sstevel@tonic-gate if (PL_reg_leftiter < 0) { 3195*0Sstevel@tonic-gate I32 o = locinput - PL_bostr, b; 3196*0Sstevel@tonic-gate 3197*0Sstevel@tonic-gate o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4); 3198*0Sstevel@tonic-gate b = o % 8; 3199*0Sstevel@tonic-gate o /= 8; 3200*0Sstevel@tonic-gate if (PL_reg_poscache[o] & (1<<b)) { 3201*0Sstevel@tonic-gate DEBUG_r( 3202*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 3203*0Sstevel@tonic-gate "%*s already tried at this position...\n", 3204*0Sstevel@tonic-gate REPORT_CODE_OFF+PL_regindent*2, "") 3205*0Sstevel@tonic-gate ); 3206*0Sstevel@tonic-gate if (PL_reg_flags & RF_false) 3207*0Sstevel@tonic-gate sayYES; 3208*0Sstevel@tonic-gate else 3209*0Sstevel@tonic-gate sayNO_SILENT; 3210*0Sstevel@tonic-gate } 3211*0Sstevel@tonic-gate PL_reg_poscache[o] |= (1<<b); 3212*0Sstevel@tonic-gate } 3213*0Sstevel@tonic-gate } 3214*0Sstevel@tonic-gate 3215*0Sstevel@tonic-gate /* Prefer next over scan for minimal matching. */ 3216*0Sstevel@tonic-gate 3217*0Sstevel@tonic-gate if (cc->minmod) { 3218*0Sstevel@tonic-gate PL_regcc = cc->oldcc; 3219*0Sstevel@tonic-gate if (PL_regcc) 3220*0Sstevel@tonic-gate ln = PL_regcc->cur; 3221*0Sstevel@tonic-gate cp = regcppush(cc->parenfloor); 3222*0Sstevel@tonic-gate REGCP_SET(lastcp); 3223*0Sstevel@tonic-gate if (regmatch(cc->next)) { 3224*0Sstevel@tonic-gate regcpblow(cp); 3225*0Sstevel@tonic-gate sayYES; /* All done. */ 3226*0Sstevel@tonic-gate } 3227*0Sstevel@tonic-gate REGCP_UNWIND(lastcp); 3228*0Sstevel@tonic-gate regcppop(); 3229*0Sstevel@tonic-gate if (PL_regcc) 3230*0Sstevel@tonic-gate PL_regcc->cur = ln; 3231*0Sstevel@tonic-gate PL_regcc = cc; 3232*0Sstevel@tonic-gate 3233*0Sstevel@tonic-gate if (n >= cc->max) { /* Maximum greed exceeded? */ 3234*0Sstevel@tonic-gate if (ckWARN(WARN_REGEXP) && n >= REG_INFTY 3235*0Sstevel@tonic-gate && !(PL_reg_flags & RF_warned)) { 3236*0Sstevel@tonic-gate PL_reg_flags |= RF_warned; 3237*0Sstevel@tonic-gate Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded", 3238*0Sstevel@tonic-gate "Complex regular subexpression recursion", 3239*0Sstevel@tonic-gate REG_INFTY - 1); 3240*0Sstevel@tonic-gate } 3241*0Sstevel@tonic-gate sayNO; 3242*0Sstevel@tonic-gate } 3243*0Sstevel@tonic-gate 3244*0Sstevel@tonic-gate DEBUG_r( 3245*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 3246*0Sstevel@tonic-gate "%*s trying longer...\n", 3247*0Sstevel@tonic-gate REPORT_CODE_OFF+PL_regindent*2, "") 3248*0Sstevel@tonic-gate ); 3249*0Sstevel@tonic-gate /* Try scanning more and see if it helps. */ 3250*0Sstevel@tonic-gate PL_reginput = locinput; 3251*0Sstevel@tonic-gate cc->cur = n; 3252*0Sstevel@tonic-gate cc->lastloc = locinput; 3253*0Sstevel@tonic-gate cp = regcppush(cc->parenfloor); 3254*0Sstevel@tonic-gate REGCP_SET(lastcp); 3255*0Sstevel@tonic-gate if (regmatch(cc->scan)) { 3256*0Sstevel@tonic-gate regcpblow(cp); 3257*0Sstevel@tonic-gate sayYES; 3258*0Sstevel@tonic-gate } 3259*0Sstevel@tonic-gate REGCP_UNWIND(lastcp); 3260*0Sstevel@tonic-gate regcppop(); 3261*0Sstevel@tonic-gate cc->cur = n - 1; 3262*0Sstevel@tonic-gate cc->lastloc = lastloc; 3263*0Sstevel@tonic-gate sayNO; 3264*0Sstevel@tonic-gate } 3265*0Sstevel@tonic-gate 3266*0Sstevel@tonic-gate /* Prefer scan over next for maximal matching. */ 3267*0Sstevel@tonic-gate 3268*0Sstevel@tonic-gate if (n < cc->max) { /* More greed allowed? */ 3269*0Sstevel@tonic-gate cp = regcppush(cc->parenfloor); 3270*0Sstevel@tonic-gate cc->cur = n; 3271*0Sstevel@tonic-gate cc->lastloc = locinput; 3272*0Sstevel@tonic-gate REGCP_SET(lastcp); 3273*0Sstevel@tonic-gate if (regmatch(cc->scan)) { 3274*0Sstevel@tonic-gate regcpblow(cp); 3275*0Sstevel@tonic-gate sayYES; 3276*0Sstevel@tonic-gate } 3277*0Sstevel@tonic-gate REGCP_UNWIND(lastcp); 3278*0Sstevel@tonic-gate regcppop(); /* Restore some previous $<digit>s? */ 3279*0Sstevel@tonic-gate PL_reginput = locinput; 3280*0Sstevel@tonic-gate DEBUG_r( 3281*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 3282*0Sstevel@tonic-gate "%*s failed, try continuation...\n", 3283*0Sstevel@tonic-gate REPORT_CODE_OFF+PL_regindent*2, "") 3284*0Sstevel@tonic-gate ); 3285*0Sstevel@tonic-gate } 3286*0Sstevel@tonic-gate if (ckWARN(WARN_REGEXP) && n >= REG_INFTY 3287*0Sstevel@tonic-gate && !(PL_reg_flags & RF_warned)) { 3288*0Sstevel@tonic-gate PL_reg_flags |= RF_warned; 3289*0Sstevel@tonic-gate Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded", 3290*0Sstevel@tonic-gate "Complex regular subexpression recursion", 3291*0Sstevel@tonic-gate REG_INFTY - 1); 3292*0Sstevel@tonic-gate } 3293*0Sstevel@tonic-gate 3294*0Sstevel@tonic-gate /* Failed deeper matches of scan, so see if this one works. */ 3295*0Sstevel@tonic-gate PL_regcc = cc->oldcc; 3296*0Sstevel@tonic-gate if (PL_regcc) 3297*0Sstevel@tonic-gate ln = PL_regcc->cur; 3298*0Sstevel@tonic-gate if (regmatch(cc->next)) 3299*0Sstevel@tonic-gate sayYES; 3300*0Sstevel@tonic-gate if (PL_regcc) 3301*0Sstevel@tonic-gate PL_regcc->cur = ln; 3302*0Sstevel@tonic-gate PL_regcc = cc; 3303*0Sstevel@tonic-gate cc->cur = n - 1; 3304*0Sstevel@tonic-gate cc->lastloc = lastloc; 3305*0Sstevel@tonic-gate sayNO; 3306*0Sstevel@tonic-gate } 3307*0Sstevel@tonic-gate /* NOT REACHED */ 3308*0Sstevel@tonic-gate case BRANCHJ: 3309*0Sstevel@tonic-gate next = scan + ARG(scan); 3310*0Sstevel@tonic-gate if (next == scan) 3311*0Sstevel@tonic-gate next = NULL; 3312*0Sstevel@tonic-gate inner = NEXTOPER(NEXTOPER(scan)); 3313*0Sstevel@tonic-gate goto do_branch; 3314*0Sstevel@tonic-gate case BRANCH: 3315*0Sstevel@tonic-gate inner = NEXTOPER(scan); 3316*0Sstevel@tonic-gate do_branch: 3317*0Sstevel@tonic-gate { 3318*0Sstevel@tonic-gate c1 = OP(scan); 3319*0Sstevel@tonic-gate if (OP(next) != c1) /* No choice. */ 3320*0Sstevel@tonic-gate next = inner; /* Avoid recursion. */ 3321*0Sstevel@tonic-gate else { 3322*0Sstevel@tonic-gate I32 lastparen = *PL_reglastparen; 3323*0Sstevel@tonic-gate I32 unwind1; 3324*0Sstevel@tonic-gate re_unwind_branch_t *uw; 3325*0Sstevel@tonic-gate 3326*0Sstevel@tonic-gate /* Put unwinding data on stack */ 3327*0Sstevel@tonic-gate unwind1 = SSNEWt(1,re_unwind_branch_t); 3328*0Sstevel@tonic-gate uw = SSPTRt(unwind1,re_unwind_branch_t); 3329*0Sstevel@tonic-gate uw->prev = unwind; 3330*0Sstevel@tonic-gate unwind = unwind1; 3331*0Sstevel@tonic-gate uw->type = ((c1 == BRANCH) 3332*0Sstevel@tonic-gate ? RE_UNWIND_BRANCH 3333*0Sstevel@tonic-gate : RE_UNWIND_BRANCHJ); 3334*0Sstevel@tonic-gate uw->lastparen = lastparen; 3335*0Sstevel@tonic-gate uw->next = next; 3336*0Sstevel@tonic-gate uw->locinput = locinput; 3337*0Sstevel@tonic-gate uw->nextchr = nextchr; 3338*0Sstevel@tonic-gate #ifdef DEBUGGING 3339*0Sstevel@tonic-gate uw->regindent = ++PL_regindent; 3340*0Sstevel@tonic-gate #endif 3341*0Sstevel@tonic-gate 3342*0Sstevel@tonic-gate REGCP_SET(uw->lastcp); 3343*0Sstevel@tonic-gate 3344*0Sstevel@tonic-gate /* Now go into the first branch */ 3345*0Sstevel@tonic-gate next = inner; 3346*0Sstevel@tonic-gate } 3347*0Sstevel@tonic-gate } 3348*0Sstevel@tonic-gate break; 3349*0Sstevel@tonic-gate case MINMOD: 3350*0Sstevel@tonic-gate minmod = 1; 3351*0Sstevel@tonic-gate break; 3352*0Sstevel@tonic-gate case CURLYM: 3353*0Sstevel@tonic-gate { 3354*0Sstevel@tonic-gate I32 l = 0; 3355*0Sstevel@tonic-gate CHECKPOINT lastcp; 3356*0Sstevel@tonic-gate 3357*0Sstevel@tonic-gate /* We suppose that the next guy does not need 3358*0Sstevel@tonic-gate backtracking: in particular, it is of constant length, 3359*0Sstevel@tonic-gate and has no parenths to influence future backrefs. */ 3360*0Sstevel@tonic-gate ln = ARG1(scan); /* min to match */ 3361*0Sstevel@tonic-gate n = ARG2(scan); /* max to match */ 3362*0Sstevel@tonic-gate paren = scan->flags; 3363*0Sstevel@tonic-gate if (paren) { 3364*0Sstevel@tonic-gate if (paren > PL_regsize) 3365*0Sstevel@tonic-gate PL_regsize = paren; 3366*0Sstevel@tonic-gate if (paren > (I32)*PL_reglastparen) 3367*0Sstevel@tonic-gate *PL_reglastparen = paren; 3368*0Sstevel@tonic-gate } 3369*0Sstevel@tonic-gate scan = NEXTOPER(scan) + NODE_STEP_REGNODE; 3370*0Sstevel@tonic-gate if (paren) 3371*0Sstevel@tonic-gate scan += NEXT_OFF(scan); /* Skip former OPEN. */ 3372*0Sstevel@tonic-gate PL_reginput = locinput; 3373*0Sstevel@tonic-gate if (minmod) { 3374*0Sstevel@tonic-gate minmod = 0; 3375*0Sstevel@tonic-gate if (ln && regrepeat_hard(scan, ln, &l) < ln) 3376*0Sstevel@tonic-gate sayNO; 3377*0Sstevel@tonic-gate /* if we matched something zero-length we don't need to 3378*0Sstevel@tonic-gate backtrack - capturing parens are already defined, so 3379*0Sstevel@tonic-gate the caveat in the maximal case doesn't apply 3380*0Sstevel@tonic-gate 3381*0Sstevel@tonic-gate XXXX if ln == 0, we can redo this check first time 3382*0Sstevel@tonic-gate through the following loop 3383*0Sstevel@tonic-gate */ 3384*0Sstevel@tonic-gate if (ln && l == 0) 3385*0Sstevel@tonic-gate n = ln; /* don't backtrack */ 3386*0Sstevel@tonic-gate locinput = PL_reginput; 3387*0Sstevel@tonic-gate if (HAS_TEXT(next) || JUMPABLE(next)) { 3388*0Sstevel@tonic-gate regnode *text_node = next; 3389*0Sstevel@tonic-gate 3390*0Sstevel@tonic-gate if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node); 3391*0Sstevel@tonic-gate 3392*0Sstevel@tonic-gate if (! HAS_TEXT(text_node)) c1 = c2 = -1000; 3393*0Sstevel@tonic-gate else { 3394*0Sstevel@tonic-gate if (PL_regkind[(U8)OP(text_node)] == REF) { 3395*0Sstevel@tonic-gate c1 = c2 = -1000; 3396*0Sstevel@tonic-gate goto assume_ok_MM; 3397*0Sstevel@tonic-gate } 3398*0Sstevel@tonic-gate else { c1 = (U8)*STRING(text_node); } 3399*0Sstevel@tonic-gate if (OP(text_node) == EXACTF || OP(text_node) == REFF) 3400*0Sstevel@tonic-gate c2 = PL_fold[c1]; 3401*0Sstevel@tonic-gate else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL) 3402*0Sstevel@tonic-gate c2 = PL_fold_locale[c1]; 3403*0Sstevel@tonic-gate else 3404*0Sstevel@tonic-gate c2 = c1; 3405*0Sstevel@tonic-gate } 3406*0Sstevel@tonic-gate } 3407*0Sstevel@tonic-gate else 3408*0Sstevel@tonic-gate c1 = c2 = -1000; 3409*0Sstevel@tonic-gate assume_ok_MM: 3410*0Sstevel@tonic-gate REGCP_SET(lastcp); 3411*0Sstevel@tonic-gate /* This may be improved if l == 0. */ 3412*0Sstevel@tonic-gate while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */ 3413*0Sstevel@tonic-gate /* If it could work, try it. */ 3414*0Sstevel@tonic-gate if (c1 == -1000 || 3415*0Sstevel@tonic-gate UCHARAT(PL_reginput) == c1 || 3416*0Sstevel@tonic-gate UCHARAT(PL_reginput) == c2) 3417*0Sstevel@tonic-gate { 3418*0Sstevel@tonic-gate if (paren) { 3419*0Sstevel@tonic-gate if (ln) { 3420*0Sstevel@tonic-gate PL_regstartp[paren] = 3421*0Sstevel@tonic-gate HOPc(PL_reginput, -l) - PL_bostr; 3422*0Sstevel@tonic-gate PL_regendp[paren] = PL_reginput - PL_bostr; 3423*0Sstevel@tonic-gate } 3424*0Sstevel@tonic-gate else 3425*0Sstevel@tonic-gate PL_regendp[paren] = -1; 3426*0Sstevel@tonic-gate } 3427*0Sstevel@tonic-gate if (regmatch(next)) 3428*0Sstevel@tonic-gate sayYES; 3429*0Sstevel@tonic-gate REGCP_UNWIND(lastcp); 3430*0Sstevel@tonic-gate } 3431*0Sstevel@tonic-gate /* Couldn't or didn't -- move forward. */ 3432*0Sstevel@tonic-gate PL_reginput = locinput; 3433*0Sstevel@tonic-gate if (regrepeat_hard(scan, 1, &l)) { 3434*0Sstevel@tonic-gate ln++; 3435*0Sstevel@tonic-gate locinput = PL_reginput; 3436*0Sstevel@tonic-gate } 3437*0Sstevel@tonic-gate else 3438*0Sstevel@tonic-gate sayNO; 3439*0Sstevel@tonic-gate } 3440*0Sstevel@tonic-gate } 3441*0Sstevel@tonic-gate else { 3442*0Sstevel@tonic-gate n = regrepeat_hard(scan, n, &l); 3443*0Sstevel@tonic-gate /* if we matched something zero-length we don't need to 3444*0Sstevel@tonic-gate backtrack, unless the minimum count is zero and we 3445*0Sstevel@tonic-gate are capturing the result - in that case the capture 3446*0Sstevel@tonic-gate being defined or not may affect later execution 3447*0Sstevel@tonic-gate */ 3448*0Sstevel@tonic-gate if (n != 0 && l == 0 && !(paren && ln == 0)) 3449*0Sstevel@tonic-gate ln = n; /* don't backtrack */ 3450*0Sstevel@tonic-gate locinput = PL_reginput; 3451*0Sstevel@tonic-gate DEBUG_r( 3452*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 3453*0Sstevel@tonic-gate "%*s matched %"IVdf" times, len=%"IVdf"...\n", 3454*0Sstevel@tonic-gate (int)(REPORT_CODE_OFF+PL_regindent*2), "", 3455*0Sstevel@tonic-gate (IV) n, (IV)l) 3456*0Sstevel@tonic-gate ); 3457*0Sstevel@tonic-gate if (n >= ln) { 3458*0Sstevel@tonic-gate if (HAS_TEXT(next) || JUMPABLE(next)) { 3459*0Sstevel@tonic-gate regnode *text_node = next; 3460*0Sstevel@tonic-gate 3461*0Sstevel@tonic-gate if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node); 3462*0Sstevel@tonic-gate 3463*0Sstevel@tonic-gate if (! HAS_TEXT(text_node)) c1 = c2 = -1000; 3464*0Sstevel@tonic-gate else { 3465*0Sstevel@tonic-gate if (PL_regkind[(U8)OP(text_node)] == REF) { 3466*0Sstevel@tonic-gate c1 = c2 = -1000; 3467*0Sstevel@tonic-gate goto assume_ok_REG; 3468*0Sstevel@tonic-gate } 3469*0Sstevel@tonic-gate else { c1 = (U8)*STRING(text_node); } 3470*0Sstevel@tonic-gate 3471*0Sstevel@tonic-gate if (OP(text_node) == EXACTF || OP(text_node) == REFF) 3472*0Sstevel@tonic-gate c2 = PL_fold[c1]; 3473*0Sstevel@tonic-gate else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL) 3474*0Sstevel@tonic-gate c2 = PL_fold_locale[c1]; 3475*0Sstevel@tonic-gate else 3476*0Sstevel@tonic-gate c2 = c1; 3477*0Sstevel@tonic-gate } 3478*0Sstevel@tonic-gate } 3479*0Sstevel@tonic-gate else 3480*0Sstevel@tonic-gate c1 = c2 = -1000; 3481*0Sstevel@tonic-gate } 3482*0Sstevel@tonic-gate assume_ok_REG: 3483*0Sstevel@tonic-gate REGCP_SET(lastcp); 3484*0Sstevel@tonic-gate while (n >= ln) { 3485*0Sstevel@tonic-gate /* If it could work, try it. */ 3486*0Sstevel@tonic-gate if (c1 == -1000 || 3487*0Sstevel@tonic-gate UCHARAT(PL_reginput) == c1 || 3488*0Sstevel@tonic-gate UCHARAT(PL_reginput) == c2) 3489*0Sstevel@tonic-gate { 3490*0Sstevel@tonic-gate DEBUG_r( 3491*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 3492*0Sstevel@tonic-gate "%*s trying tail with n=%"IVdf"...\n", 3493*0Sstevel@tonic-gate (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n) 3494*0Sstevel@tonic-gate ); 3495*0Sstevel@tonic-gate if (paren) { 3496*0Sstevel@tonic-gate if (n) { 3497*0Sstevel@tonic-gate PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr; 3498*0Sstevel@tonic-gate PL_regendp[paren] = PL_reginput - PL_bostr; 3499*0Sstevel@tonic-gate } 3500*0Sstevel@tonic-gate else 3501*0Sstevel@tonic-gate PL_regendp[paren] = -1; 3502*0Sstevel@tonic-gate } 3503*0Sstevel@tonic-gate if (regmatch(next)) 3504*0Sstevel@tonic-gate sayYES; 3505*0Sstevel@tonic-gate REGCP_UNWIND(lastcp); 3506*0Sstevel@tonic-gate } 3507*0Sstevel@tonic-gate /* Couldn't or didn't -- back up. */ 3508*0Sstevel@tonic-gate n--; 3509*0Sstevel@tonic-gate locinput = HOPc(locinput, -l); 3510*0Sstevel@tonic-gate PL_reginput = locinput; 3511*0Sstevel@tonic-gate } 3512*0Sstevel@tonic-gate } 3513*0Sstevel@tonic-gate sayNO; 3514*0Sstevel@tonic-gate break; 3515*0Sstevel@tonic-gate } 3516*0Sstevel@tonic-gate case CURLYN: 3517*0Sstevel@tonic-gate paren = scan->flags; /* Which paren to set */ 3518*0Sstevel@tonic-gate if (paren > PL_regsize) 3519*0Sstevel@tonic-gate PL_regsize = paren; 3520*0Sstevel@tonic-gate if (paren > (I32)*PL_reglastparen) 3521*0Sstevel@tonic-gate *PL_reglastparen = paren; 3522*0Sstevel@tonic-gate ln = ARG1(scan); /* min to match */ 3523*0Sstevel@tonic-gate n = ARG2(scan); /* max to match */ 3524*0Sstevel@tonic-gate scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE); 3525*0Sstevel@tonic-gate goto repeat; 3526*0Sstevel@tonic-gate case CURLY: 3527*0Sstevel@tonic-gate paren = 0; 3528*0Sstevel@tonic-gate ln = ARG1(scan); /* min to match */ 3529*0Sstevel@tonic-gate n = ARG2(scan); /* max to match */ 3530*0Sstevel@tonic-gate scan = NEXTOPER(scan) + NODE_STEP_REGNODE; 3531*0Sstevel@tonic-gate goto repeat; 3532*0Sstevel@tonic-gate case STAR: 3533*0Sstevel@tonic-gate ln = 0; 3534*0Sstevel@tonic-gate n = REG_INFTY; 3535*0Sstevel@tonic-gate scan = NEXTOPER(scan); 3536*0Sstevel@tonic-gate paren = 0; 3537*0Sstevel@tonic-gate goto repeat; 3538*0Sstevel@tonic-gate case PLUS: 3539*0Sstevel@tonic-gate ln = 1; 3540*0Sstevel@tonic-gate n = REG_INFTY; 3541*0Sstevel@tonic-gate scan = NEXTOPER(scan); 3542*0Sstevel@tonic-gate paren = 0; 3543*0Sstevel@tonic-gate repeat: 3544*0Sstevel@tonic-gate /* 3545*0Sstevel@tonic-gate * Lookahead to avoid useless match attempts 3546*0Sstevel@tonic-gate * when we know what character comes next. 3547*0Sstevel@tonic-gate */ 3548*0Sstevel@tonic-gate 3549*0Sstevel@tonic-gate /* 3550*0Sstevel@tonic-gate * Used to only do .*x and .*?x, but now it allows 3551*0Sstevel@tonic-gate * for )'s, ('s and (?{ ... })'s to be in the way 3552*0Sstevel@tonic-gate * of the quantifier and the EXACT-like node. -- japhy 3553*0Sstevel@tonic-gate */ 3554*0Sstevel@tonic-gate 3555*0Sstevel@tonic-gate if (HAS_TEXT(next) || JUMPABLE(next)) { 3556*0Sstevel@tonic-gate U8 *s; 3557*0Sstevel@tonic-gate regnode *text_node = next; 3558*0Sstevel@tonic-gate 3559*0Sstevel@tonic-gate if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node); 3560*0Sstevel@tonic-gate 3561*0Sstevel@tonic-gate if (! HAS_TEXT(text_node)) c1 = c2 = -1000; 3562*0Sstevel@tonic-gate else { 3563*0Sstevel@tonic-gate if (PL_regkind[(U8)OP(text_node)] == REF) { 3564*0Sstevel@tonic-gate c1 = c2 = -1000; 3565*0Sstevel@tonic-gate goto assume_ok_easy; 3566*0Sstevel@tonic-gate } 3567*0Sstevel@tonic-gate else { s = (U8*)STRING(text_node); } 3568*0Sstevel@tonic-gate 3569*0Sstevel@tonic-gate if (!UTF) { 3570*0Sstevel@tonic-gate c2 = c1 = *s; 3571*0Sstevel@tonic-gate if (OP(text_node) == EXACTF || OP(text_node) == REFF) 3572*0Sstevel@tonic-gate c2 = PL_fold[c1]; 3573*0Sstevel@tonic-gate else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL) 3574*0Sstevel@tonic-gate c2 = PL_fold_locale[c1]; 3575*0Sstevel@tonic-gate } 3576*0Sstevel@tonic-gate else { /* UTF */ 3577*0Sstevel@tonic-gate if (OP(text_node) == EXACTF || OP(text_node) == REFF) { 3578*0Sstevel@tonic-gate STRLEN ulen1, ulen2; 3579*0Sstevel@tonic-gate U8 tmpbuf1[UTF8_MAXLEN_UCLC+1]; 3580*0Sstevel@tonic-gate U8 tmpbuf2[UTF8_MAXLEN_UCLC+1]; 3581*0Sstevel@tonic-gate 3582*0Sstevel@tonic-gate to_utf8_lower((U8*)s, tmpbuf1, &ulen1); 3583*0Sstevel@tonic-gate to_utf8_upper((U8*)s, tmpbuf2, &ulen2); 3584*0Sstevel@tonic-gate 3585*0Sstevel@tonic-gate c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXLEN, 0, 3586*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ? 3587*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY); 3588*0Sstevel@tonic-gate c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXLEN, 0, 3589*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ? 3590*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY); 3591*0Sstevel@tonic-gate } 3592*0Sstevel@tonic-gate else { 3593*0Sstevel@tonic-gate c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXLEN, 0, 3594*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ? 3595*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY); 3596*0Sstevel@tonic-gate } 3597*0Sstevel@tonic-gate } 3598*0Sstevel@tonic-gate } 3599*0Sstevel@tonic-gate } 3600*0Sstevel@tonic-gate else 3601*0Sstevel@tonic-gate c1 = c2 = -1000; 3602*0Sstevel@tonic-gate assume_ok_easy: 3603*0Sstevel@tonic-gate PL_reginput = locinput; 3604*0Sstevel@tonic-gate if (minmod) { 3605*0Sstevel@tonic-gate CHECKPOINT lastcp; 3606*0Sstevel@tonic-gate minmod = 0; 3607*0Sstevel@tonic-gate if (ln && regrepeat(scan, ln) < ln) 3608*0Sstevel@tonic-gate sayNO; 3609*0Sstevel@tonic-gate locinput = PL_reginput; 3610*0Sstevel@tonic-gate REGCP_SET(lastcp); 3611*0Sstevel@tonic-gate if (c1 != -1000) { 3612*0Sstevel@tonic-gate char *e; /* Should not check after this */ 3613*0Sstevel@tonic-gate char *old = locinput; 3614*0Sstevel@tonic-gate int count = 0; 3615*0Sstevel@tonic-gate 3616*0Sstevel@tonic-gate if (n == REG_INFTY) { 3617*0Sstevel@tonic-gate e = PL_regeol - 1; 3618*0Sstevel@tonic-gate if (do_utf8) 3619*0Sstevel@tonic-gate while (UTF8_IS_CONTINUATION(*(U8*)e)) 3620*0Sstevel@tonic-gate e--; 3621*0Sstevel@tonic-gate } 3622*0Sstevel@tonic-gate else if (do_utf8) { 3623*0Sstevel@tonic-gate int m = n - ln; 3624*0Sstevel@tonic-gate for (e = locinput; 3625*0Sstevel@tonic-gate m >0 && e + UTF8SKIP(e) <= PL_regeol; m--) 3626*0Sstevel@tonic-gate e += UTF8SKIP(e); 3627*0Sstevel@tonic-gate } 3628*0Sstevel@tonic-gate else { 3629*0Sstevel@tonic-gate e = locinput + n - ln; 3630*0Sstevel@tonic-gate if (e >= PL_regeol) 3631*0Sstevel@tonic-gate e = PL_regeol - 1; 3632*0Sstevel@tonic-gate } 3633*0Sstevel@tonic-gate while (1) { 3634*0Sstevel@tonic-gate /* Find place 'next' could work */ 3635*0Sstevel@tonic-gate if (!do_utf8) { 3636*0Sstevel@tonic-gate if (c1 == c2) { 3637*0Sstevel@tonic-gate while (locinput <= e && 3638*0Sstevel@tonic-gate UCHARAT(locinput) != c1) 3639*0Sstevel@tonic-gate locinput++; 3640*0Sstevel@tonic-gate } else { 3641*0Sstevel@tonic-gate while (locinput <= e 3642*0Sstevel@tonic-gate && UCHARAT(locinput) != c1 3643*0Sstevel@tonic-gate && UCHARAT(locinput) != c2) 3644*0Sstevel@tonic-gate locinput++; 3645*0Sstevel@tonic-gate } 3646*0Sstevel@tonic-gate count = locinput - old; 3647*0Sstevel@tonic-gate } 3648*0Sstevel@tonic-gate else { 3649*0Sstevel@tonic-gate STRLEN len; 3650*0Sstevel@tonic-gate if (c1 == c2) { 3651*0Sstevel@tonic-gate /* count initialised to 3652*0Sstevel@tonic-gate * utf8_distance(old, locinput) */ 3653*0Sstevel@tonic-gate while (locinput <= e && 3654*0Sstevel@tonic-gate utf8n_to_uvchr((U8*)locinput, 3655*0Sstevel@tonic-gate UTF8_MAXLEN, &len, 3656*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ? 3657*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY) != (UV)c1) { 3658*0Sstevel@tonic-gate locinput += len; 3659*0Sstevel@tonic-gate count++; 3660*0Sstevel@tonic-gate } 3661*0Sstevel@tonic-gate } else { 3662*0Sstevel@tonic-gate /* count initialised to 3663*0Sstevel@tonic-gate * utf8_distance(old, locinput) */ 3664*0Sstevel@tonic-gate while (locinput <= e) { 3665*0Sstevel@tonic-gate UV c = utf8n_to_uvchr((U8*)locinput, 3666*0Sstevel@tonic-gate UTF8_MAXLEN, &len, 3667*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ? 3668*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY); 3669*0Sstevel@tonic-gate if (c == (UV)c1 || c == (UV)c2) 3670*0Sstevel@tonic-gate break; 3671*0Sstevel@tonic-gate locinput += len; 3672*0Sstevel@tonic-gate count++; 3673*0Sstevel@tonic-gate } 3674*0Sstevel@tonic-gate } 3675*0Sstevel@tonic-gate } 3676*0Sstevel@tonic-gate if (locinput > e) 3677*0Sstevel@tonic-gate sayNO; 3678*0Sstevel@tonic-gate /* PL_reginput == old now */ 3679*0Sstevel@tonic-gate if (locinput != old) { 3680*0Sstevel@tonic-gate ln = 1; /* Did some */ 3681*0Sstevel@tonic-gate if (regrepeat(scan, count) < count) 3682*0Sstevel@tonic-gate sayNO; 3683*0Sstevel@tonic-gate } 3684*0Sstevel@tonic-gate /* PL_reginput == locinput now */ 3685*0Sstevel@tonic-gate TRYPAREN(paren, ln, locinput); 3686*0Sstevel@tonic-gate PL_reginput = locinput; /* Could be reset... */ 3687*0Sstevel@tonic-gate REGCP_UNWIND(lastcp); 3688*0Sstevel@tonic-gate /* Couldn't or didn't -- move forward. */ 3689*0Sstevel@tonic-gate old = locinput; 3690*0Sstevel@tonic-gate if (do_utf8) 3691*0Sstevel@tonic-gate locinput += UTF8SKIP(locinput); 3692*0Sstevel@tonic-gate else 3693*0Sstevel@tonic-gate locinput++; 3694*0Sstevel@tonic-gate count = 1; 3695*0Sstevel@tonic-gate } 3696*0Sstevel@tonic-gate } 3697*0Sstevel@tonic-gate else 3698*0Sstevel@tonic-gate while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */ 3699*0Sstevel@tonic-gate UV c; 3700*0Sstevel@tonic-gate if (c1 != -1000) { 3701*0Sstevel@tonic-gate if (do_utf8) 3702*0Sstevel@tonic-gate c = utf8n_to_uvchr((U8*)PL_reginput, 3703*0Sstevel@tonic-gate UTF8_MAXLEN, 0, 3704*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ? 3705*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY); 3706*0Sstevel@tonic-gate else 3707*0Sstevel@tonic-gate c = UCHARAT(PL_reginput); 3708*0Sstevel@tonic-gate /* If it could work, try it. */ 3709*0Sstevel@tonic-gate if (c == (UV)c1 || c == (UV)c2) 3710*0Sstevel@tonic-gate { 3711*0Sstevel@tonic-gate TRYPAREN(paren, ln, PL_reginput); 3712*0Sstevel@tonic-gate REGCP_UNWIND(lastcp); 3713*0Sstevel@tonic-gate } 3714*0Sstevel@tonic-gate } 3715*0Sstevel@tonic-gate /* If it could work, try it. */ 3716*0Sstevel@tonic-gate else if (c1 == -1000) 3717*0Sstevel@tonic-gate { 3718*0Sstevel@tonic-gate TRYPAREN(paren, ln, PL_reginput); 3719*0Sstevel@tonic-gate REGCP_UNWIND(lastcp); 3720*0Sstevel@tonic-gate } 3721*0Sstevel@tonic-gate /* Couldn't or didn't -- move forward. */ 3722*0Sstevel@tonic-gate PL_reginput = locinput; 3723*0Sstevel@tonic-gate if (regrepeat(scan, 1)) { 3724*0Sstevel@tonic-gate ln++; 3725*0Sstevel@tonic-gate locinput = PL_reginput; 3726*0Sstevel@tonic-gate } 3727*0Sstevel@tonic-gate else 3728*0Sstevel@tonic-gate sayNO; 3729*0Sstevel@tonic-gate } 3730*0Sstevel@tonic-gate } 3731*0Sstevel@tonic-gate else { 3732*0Sstevel@tonic-gate CHECKPOINT lastcp; 3733*0Sstevel@tonic-gate n = regrepeat(scan, n); 3734*0Sstevel@tonic-gate locinput = PL_reginput; 3735*0Sstevel@tonic-gate if (ln < n && PL_regkind[(U8)OP(next)] == EOL && 3736*0Sstevel@tonic-gate ((!PL_multiline && OP(next) != MEOL) || 3737*0Sstevel@tonic-gate OP(next) == SEOL || OP(next) == EOS)) 3738*0Sstevel@tonic-gate { 3739*0Sstevel@tonic-gate ln = n; /* why back off? */ 3740*0Sstevel@tonic-gate /* ...because $ and \Z can match before *and* after 3741*0Sstevel@tonic-gate newline at the end. Consider "\n\n" =~ /\n+\Z\n/. 3742*0Sstevel@tonic-gate We should back off by one in this case. */ 3743*0Sstevel@tonic-gate if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS) 3744*0Sstevel@tonic-gate ln--; 3745*0Sstevel@tonic-gate } 3746*0Sstevel@tonic-gate REGCP_SET(lastcp); 3747*0Sstevel@tonic-gate if (paren) { 3748*0Sstevel@tonic-gate UV c = 0; 3749*0Sstevel@tonic-gate while (n >= ln) { 3750*0Sstevel@tonic-gate if (c1 != -1000) { 3751*0Sstevel@tonic-gate if (do_utf8) 3752*0Sstevel@tonic-gate c = utf8n_to_uvchr((U8*)PL_reginput, 3753*0Sstevel@tonic-gate UTF8_MAXLEN, 0, 3754*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ? 3755*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY); 3756*0Sstevel@tonic-gate else 3757*0Sstevel@tonic-gate c = UCHARAT(PL_reginput); 3758*0Sstevel@tonic-gate } 3759*0Sstevel@tonic-gate /* If it could work, try it. */ 3760*0Sstevel@tonic-gate if (c1 == -1000 || c == (UV)c1 || c == (UV)c2) 3761*0Sstevel@tonic-gate { 3762*0Sstevel@tonic-gate TRYPAREN(paren, n, PL_reginput); 3763*0Sstevel@tonic-gate REGCP_UNWIND(lastcp); 3764*0Sstevel@tonic-gate } 3765*0Sstevel@tonic-gate /* Couldn't or didn't -- back up. */ 3766*0Sstevel@tonic-gate n--; 3767*0Sstevel@tonic-gate PL_reginput = locinput = HOPc(locinput, -1); 3768*0Sstevel@tonic-gate } 3769*0Sstevel@tonic-gate } 3770*0Sstevel@tonic-gate else { 3771*0Sstevel@tonic-gate UV c = 0; 3772*0Sstevel@tonic-gate while (n >= ln) { 3773*0Sstevel@tonic-gate if (c1 != -1000) { 3774*0Sstevel@tonic-gate if (do_utf8) 3775*0Sstevel@tonic-gate c = utf8n_to_uvchr((U8*)PL_reginput, 3776*0Sstevel@tonic-gate UTF8_MAXLEN, 0, 3777*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ? 3778*0Sstevel@tonic-gate 0 : UTF8_ALLOW_ANY); 3779*0Sstevel@tonic-gate else 3780*0Sstevel@tonic-gate c = UCHARAT(PL_reginput); 3781*0Sstevel@tonic-gate } 3782*0Sstevel@tonic-gate /* If it could work, try it. */ 3783*0Sstevel@tonic-gate if (c1 == -1000 || c == (UV)c1 || c == (UV)c2) 3784*0Sstevel@tonic-gate { 3785*0Sstevel@tonic-gate TRYPAREN(paren, n, PL_reginput); 3786*0Sstevel@tonic-gate REGCP_UNWIND(lastcp); 3787*0Sstevel@tonic-gate } 3788*0Sstevel@tonic-gate /* Couldn't or didn't -- back up. */ 3789*0Sstevel@tonic-gate n--; 3790*0Sstevel@tonic-gate PL_reginput = locinput = HOPc(locinput, -1); 3791*0Sstevel@tonic-gate } 3792*0Sstevel@tonic-gate } 3793*0Sstevel@tonic-gate } 3794*0Sstevel@tonic-gate sayNO; 3795*0Sstevel@tonic-gate break; 3796*0Sstevel@tonic-gate case END: 3797*0Sstevel@tonic-gate if (PL_reg_call_cc) { 3798*0Sstevel@tonic-gate re_cc_state *cur_call_cc = PL_reg_call_cc; 3799*0Sstevel@tonic-gate CURCUR *cctmp = PL_regcc; 3800*0Sstevel@tonic-gate regexp *re = PL_reg_re; 3801*0Sstevel@tonic-gate CHECKPOINT cp, lastcp; 3802*0Sstevel@tonic-gate 3803*0Sstevel@tonic-gate cp = regcppush(0); /* Save *all* the positions. */ 3804*0Sstevel@tonic-gate REGCP_SET(lastcp); 3805*0Sstevel@tonic-gate regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of 3806*0Sstevel@tonic-gate the caller. */ 3807*0Sstevel@tonic-gate PL_reginput = locinput; /* Make position available to 3808*0Sstevel@tonic-gate the callcc. */ 3809*0Sstevel@tonic-gate cache_re(PL_reg_call_cc->re); 3810*0Sstevel@tonic-gate PL_regcc = PL_reg_call_cc->cc; 3811*0Sstevel@tonic-gate PL_reg_call_cc = PL_reg_call_cc->prev; 3812*0Sstevel@tonic-gate if (regmatch(cur_call_cc->node)) { 3813*0Sstevel@tonic-gate PL_reg_call_cc = cur_call_cc; 3814*0Sstevel@tonic-gate regcpblow(cp); 3815*0Sstevel@tonic-gate sayYES; 3816*0Sstevel@tonic-gate } 3817*0Sstevel@tonic-gate REGCP_UNWIND(lastcp); 3818*0Sstevel@tonic-gate regcppop(); 3819*0Sstevel@tonic-gate PL_reg_call_cc = cur_call_cc; 3820*0Sstevel@tonic-gate PL_regcc = cctmp; 3821*0Sstevel@tonic-gate PL_reg_re = re; 3822*0Sstevel@tonic-gate cache_re(re); 3823*0Sstevel@tonic-gate 3824*0Sstevel@tonic-gate DEBUG_r( 3825*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 3826*0Sstevel@tonic-gate "%*s continuation failed...\n", 3827*0Sstevel@tonic-gate REPORT_CODE_OFF+PL_regindent*2, "") 3828*0Sstevel@tonic-gate ); 3829*0Sstevel@tonic-gate sayNO_SILENT; 3830*0Sstevel@tonic-gate } 3831*0Sstevel@tonic-gate if (locinput < PL_regtill) { 3832*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, 3833*0Sstevel@tonic-gate "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n", 3834*0Sstevel@tonic-gate PL_colors[4], 3835*0Sstevel@tonic-gate (long)(locinput - PL_reg_starttry), 3836*0Sstevel@tonic-gate (long)(PL_regtill - PL_reg_starttry), 3837*0Sstevel@tonic-gate PL_colors[5])); 3838*0Sstevel@tonic-gate sayNO_FINAL; /* Cannot match: too short. */ 3839*0Sstevel@tonic-gate } 3840*0Sstevel@tonic-gate PL_reginput = locinput; /* put where regtry can find it */ 3841*0Sstevel@tonic-gate sayYES_FINAL; /* Success! */ 3842*0Sstevel@tonic-gate case SUCCEED: 3843*0Sstevel@tonic-gate PL_reginput = locinput; /* put where regtry can find it */ 3844*0Sstevel@tonic-gate sayYES_LOUD; /* Success! */ 3845*0Sstevel@tonic-gate case SUSPEND: 3846*0Sstevel@tonic-gate n = 1; 3847*0Sstevel@tonic-gate PL_reginput = locinput; 3848*0Sstevel@tonic-gate goto do_ifmatch; 3849*0Sstevel@tonic-gate case UNLESSM: 3850*0Sstevel@tonic-gate n = 0; 3851*0Sstevel@tonic-gate if (scan->flags) { 3852*0Sstevel@tonic-gate s = HOPBACKc(locinput, scan->flags); 3853*0Sstevel@tonic-gate if (!s) 3854*0Sstevel@tonic-gate goto say_yes; 3855*0Sstevel@tonic-gate PL_reginput = s; 3856*0Sstevel@tonic-gate } 3857*0Sstevel@tonic-gate else 3858*0Sstevel@tonic-gate PL_reginput = locinput; 3859*0Sstevel@tonic-gate PL_reg_flags ^= RF_false; 3860*0Sstevel@tonic-gate goto do_ifmatch; 3861*0Sstevel@tonic-gate case IFMATCH: 3862*0Sstevel@tonic-gate n = 1; 3863*0Sstevel@tonic-gate if (scan->flags) { 3864*0Sstevel@tonic-gate s = HOPBACKc(locinput, scan->flags); 3865*0Sstevel@tonic-gate if (!s) 3866*0Sstevel@tonic-gate goto say_no; 3867*0Sstevel@tonic-gate PL_reginput = s; 3868*0Sstevel@tonic-gate } 3869*0Sstevel@tonic-gate else 3870*0Sstevel@tonic-gate PL_reginput = locinput; 3871*0Sstevel@tonic-gate 3872*0Sstevel@tonic-gate do_ifmatch: 3873*0Sstevel@tonic-gate inner = NEXTOPER(NEXTOPER(scan)); 3874*0Sstevel@tonic-gate if (regmatch(inner) != n) { 3875*0Sstevel@tonic-gate if (n == 0) 3876*0Sstevel@tonic-gate PL_reg_flags ^= RF_false; 3877*0Sstevel@tonic-gate say_no: 3878*0Sstevel@tonic-gate if (logical) { 3879*0Sstevel@tonic-gate logical = 0; 3880*0Sstevel@tonic-gate sw = 0; 3881*0Sstevel@tonic-gate goto do_longjump; 3882*0Sstevel@tonic-gate } 3883*0Sstevel@tonic-gate else 3884*0Sstevel@tonic-gate sayNO; 3885*0Sstevel@tonic-gate } 3886*0Sstevel@tonic-gate if (n == 0) 3887*0Sstevel@tonic-gate PL_reg_flags ^= RF_false; 3888*0Sstevel@tonic-gate say_yes: 3889*0Sstevel@tonic-gate if (logical) { 3890*0Sstevel@tonic-gate logical = 0; 3891*0Sstevel@tonic-gate sw = 1; 3892*0Sstevel@tonic-gate } 3893*0Sstevel@tonic-gate if (OP(scan) == SUSPEND) { 3894*0Sstevel@tonic-gate locinput = PL_reginput; 3895*0Sstevel@tonic-gate nextchr = UCHARAT(locinput); 3896*0Sstevel@tonic-gate } 3897*0Sstevel@tonic-gate /* FALL THROUGH. */ 3898*0Sstevel@tonic-gate case LONGJMP: 3899*0Sstevel@tonic-gate do_longjump: 3900*0Sstevel@tonic-gate next = scan + ARG(scan); 3901*0Sstevel@tonic-gate if (next == scan) 3902*0Sstevel@tonic-gate next = NULL; 3903*0Sstevel@tonic-gate break; 3904*0Sstevel@tonic-gate default: 3905*0Sstevel@tonic-gate PerlIO_printf(Perl_error_log, "%"UVxf" %d\n", 3906*0Sstevel@tonic-gate PTR2UV(scan), OP(scan)); 3907*0Sstevel@tonic-gate Perl_croak(aTHX_ "regexp memory corruption"); 3908*0Sstevel@tonic-gate } 3909*0Sstevel@tonic-gate reenter: 3910*0Sstevel@tonic-gate scan = next; 3911*0Sstevel@tonic-gate } 3912*0Sstevel@tonic-gate 3913*0Sstevel@tonic-gate /* 3914*0Sstevel@tonic-gate * We get here only if there's trouble -- normally "case END" is 3915*0Sstevel@tonic-gate * the terminating point. 3916*0Sstevel@tonic-gate */ 3917*0Sstevel@tonic-gate Perl_croak(aTHX_ "corrupted regexp pointers"); 3918*0Sstevel@tonic-gate /*NOTREACHED*/ 3919*0Sstevel@tonic-gate sayNO; 3920*0Sstevel@tonic-gate 3921*0Sstevel@tonic-gate yes_loud: 3922*0Sstevel@tonic-gate DEBUG_r( 3923*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 3924*0Sstevel@tonic-gate "%*s %scould match...%s\n", 3925*0Sstevel@tonic-gate REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5]) 3926*0Sstevel@tonic-gate ); 3927*0Sstevel@tonic-gate goto yes; 3928*0Sstevel@tonic-gate yes_final: 3929*0Sstevel@tonic-gate DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n", 3930*0Sstevel@tonic-gate PL_colors[4],PL_colors[5])); 3931*0Sstevel@tonic-gate yes: 3932*0Sstevel@tonic-gate #ifdef DEBUGGING 3933*0Sstevel@tonic-gate PL_regindent--; 3934*0Sstevel@tonic-gate #endif 3935*0Sstevel@tonic-gate 3936*0Sstevel@tonic-gate #if 0 /* Breaks $^R */ 3937*0Sstevel@tonic-gate if (unwind) 3938*0Sstevel@tonic-gate regcpblow(firstcp); 3939*0Sstevel@tonic-gate #endif 3940*0Sstevel@tonic-gate return 1; 3941*0Sstevel@tonic-gate 3942*0Sstevel@tonic-gate no: 3943*0Sstevel@tonic-gate DEBUG_r( 3944*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 3945*0Sstevel@tonic-gate "%*s %sfailed...%s\n", 3946*0Sstevel@tonic-gate REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5]) 3947*0Sstevel@tonic-gate ); 3948*0Sstevel@tonic-gate goto do_no; 3949*0Sstevel@tonic-gate no_final: 3950*0Sstevel@tonic-gate do_no: 3951*0Sstevel@tonic-gate if (unwind) { 3952*0Sstevel@tonic-gate re_unwind_t *uw = SSPTRt(unwind,re_unwind_t); 3953*0Sstevel@tonic-gate 3954*0Sstevel@tonic-gate switch (uw->type) { 3955*0Sstevel@tonic-gate case RE_UNWIND_BRANCH: 3956*0Sstevel@tonic-gate case RE_UNWIND_BRANCHJ: 3957*0Sstevel@tonic-gate { 3958*0Sstevel@tonic-gate re_unwind_branch_t *uwb = &(uw->branch); 3959*0Sstevel@tonic-gate I32 lastparen = uwb->lastparen; 3960*0Sstevel@tonic-gate 3961*0Sstevel@tonic-gate REGCP_UNWIND(uwb->lastcp); 3962*0Sstevel@tonic-gate for (n = *PL_reglastparen; n > lastparen; n--) 3963*0Sstevel@tonic-gate PL_regendp[n] = -1; 3964*0Sstevel@tonic-gate *PL_reglastparen = n; 3965*0Sstevel@tonic-gate scan = next = uwb->next; 3966*0Sstevel@tonic-gate if ( !scan || 3967*0Sstevel@tonic-gate OP(scan) != (uwb->type == RE_UNWIND_BRANCH 3968*0Sstevel@tonic-gate ? BRANCH : BRANCHJ) ) { /* Failure */ 3969*0Sstevel@tonic-gate unwind = uwb->prev; 3970*0Sstevel@tonic-gate #ifdef DEBUGGING 3971*0Sstevel@tonic-gate PL_regindent--; 3972*0Sstevel@tonic-gate #endif 3973*0Sstevel@tonic-gate goto do_no; 3974*0Sstevel@tonic-gate } 3975*0Sstevel@tonic-gate /* Have more choice yet. Reuse the same uwb. */ 3976*0Sstevel@tonic-gate /*SUPPRESS 560*/ 3977*0Sstevel@tonic-gate if ((n = (uwb->type == RE_UNWIND_BRANCH 3978*0Sstevel@tonic-gate ? NEXT_OFF(next) : ARG(next)))) 3979*0Sstevel@tonic-gate next += n; 3980*0Sstevel@tonic-gate else 3981*0Sstevel@tonic-gate next = NULL; /* XXXX Needn't unwinding in this case... */ 3982*0Sstevel@tonic-gate uwb->next = next; 3983*0Sstevel@tonic-gate next = NEXTOPER(scan); 3984*0Sstevel@tonic-gate if (uwb->type == RE_UNWIND_BRANCHJ) 3985*0Sstevel@tonic-gate next = NEXTOPER(next); 3986*0Sstevel@tonic-gate locinput = uwb->locinput; 3987*0Sstevel@tonic-gate nextchr = uwb->nextchr; 3988*0Sstevel@tonic-gate #ifdef DEBUGGING 3989*0Sstevel@tonic-gate PL_regindent = uwb->regindent; 3990*0Sstevel@tonic-gate #endif 3991*0Sstevel@tonic-gate 3992*0Sstevel@tonic-gate goto reenter; 3993*0Sstevel@tonic-gate } 3994*0Sstevel@tonic-gate /* NOT REACHED */ 3995*0Sstevel@tonic-gate default: 3996*0Sstevel@tonic-gate Perl_croak(aTHX_ "regexp unwind memory corruption"); 3997*0Sstevel@tonic-gate } 3998*0Sstevel@tonic-gate /* NOT REACHED */ 3999*0Sstevel@tonic-gate } 4000*0Sstevel@tonic-gate #ifdef DEBUGGING 4001*0Sstevel@tonic-gate PL_regindent--; 4002*0Sstevel@tonic-gate #endif 4003*0Sstevel@tonic-gate return 0; 4004*0Sstevel@tonic-gate } 4005*0Sstevel@tonic-gate 4006*0Sstevel@tonic-gate /* 4007*0Sstevel@tonic-gate - regrepeat - repeatedly match something simple, report how many 4008*0Sstevel@tonic-gate */ 4009*0Sstevel@tonic-gate /* 4010*0Sstevel@tonic-gate * [This routine now assumes that it will only match on things of length 1. 4011*0Sstevel@tonic-gate * That was true before, but now we assume scan - reginput is the count, 4012*0Sstevel@tonic-gate * rather than incrementing count on every character. [Er, except utf8.]] 4013*0Sstevel@tonic-gate */ 4014*0Sstevel@tonic-gate STATIC I32 4015*0Sstevel@tonic-gate S_regrepeat(pTHX_ regnode *p, I32 max) 4016*0Sstevel@tonic-gate { 4017*0Sstevel@tonic-gate register char *scan; 4018*0Sstevel@tonic-gate register I32 c; 4019*0Sstevel@tonic-gate register char *loceol = PL_regeol; 4020*0Sstevel@tonic-gate register I32 hardcount = 0; 4021*0Sstevel@tonic-gate register bool do_utf8 = PL_reg_match_utf8; 4022*0Sstevel@tonic-gate 4023*0Sstevel@tonic-gate scan = PL_reginput; 4024*0Sstevel@tonic-gate if (max == REG_INFTY) 4025*0Sstevel@tonic-gate max = I32_MAX; 4026*0Sstevel@tonic-gate else if (max < loceol - scan) 4027*0Sstevel@tonic-gate loceol = scan + max; 4028*0Sstevel@tonic-gate switch (OP(p)) { 4029*0Sstevel@tonic-gate case REG_ANY: 4030*0Sstevel@tonic-gate if (do_utf8) { 4031*0Sstevel@tonic-gate loceol = PL_regeol; 4032*0Sstevel@tonic-gate while (scan < loceol && hardcount < max && *scan != '\n') { 4033*0Sstevel@tonic-gate scan += UTF8SKIP(scan); 4034*0Sstevel@tonic-gate hardcount++; 4035*0Sstevel@tonic-gate } 4036*0Sstevel@tonic-gate } else { 4037*0Sstevel@tonic-gate while (scan < loceol && *scan != '\n') 4038*0Sstevel@tonic-gate scan++; 4039*0Sstevel@tonic-gate } 4040*0Sstevel@tonic-gate break; 4041*0Sstevel@tonic-gate case SANY: 4042*0Sstevel@tonic-gate if (do_utf8) { 4043*0Sstevel@tonic-gate loceol = PL_regeol; 4044*0Sstevel@tonic-gate while (scan < loceol && hardcount < max) { 4045*0Sstevel@tonic-gate scan += UTF8SKIP(scan); 4046*0Sstevel@tonic-gate hardcount++; 4047*0Sstevel@tonic-gate } 4048*0Sstevel@tonic-gate } 4049*0Sstevel@tonic-gate else 4050*0Sstevel@tonic-gate scan = loceol; 4051*0Sstevel@tonic-gate break; 4052*0Sstevel@tonic-gate case CANY: 4053*0Sstevel@tonic-gate scan = loceol; 4054*0Sstevel@tonic-gate break; 4055*0Sstevel@tonic-gate case EXACT: /* length of string is 1 */ 4056*0Sstevel@tonic-gate c = (U8)*STRING(p); 4057*0Sstevel@tonic-gate while (scan < loceol && UCHARAT(scan) == c) 4058*0Sstevel@tonic-gate scan++; 4059*0Sstevel@tonic-gate break; 4060*0Sstevel@tonic-gate case EXACTF: /* length of string is 1 */ 4061*0Sstevel@tonic-gate c = (U8)*STRING(p); 4062*0Sstevel@tonic-gate while (scan < loceol && 4063*0Sstevel@tonic-gate (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c])) 4064*0Sstevel@tonic-gate scan++; 4065*0Sstevel@tonic-gate break; 4066*0Sstevel@tonic-gate case EXACTFL: /* length of string is 1 */ 4067*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 4068*0Sstevel@tonic-gate c = (U8)*STRING(p); 4069*0Sstevel@tonic-gate while (scan < loceol && 4070*0Sstevel@tonic-gate (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c])) 4071*0Sstevel@tonic-gate scan++; 4072*0Sstevel@tonic-gate break; 4073*0Sstevel@tonic-gate case ANYOF: 4074*0Sstevel@tonic-gate if (do_utf8) { 4075*0Sstevel@tonic-gate loceol = PL_regeol; 4076*0Sstevel@tonic-gate while (hardcount < max && scan < loceol && 4077*0Sstevel@tonic-gate reginclass(p, (U8*)scan, 0, do_utf8)) { 4078*0Sstevel@tonic-gate scan += UTF8SKIP(scan); 4079*0Sstevel@tonic-gate hardcount++; 4080*0Sstevel@tonic-gate } 4081*0Sstevel@tonic-gate } else { 4082*0Sstevel@tonic-gate while (scan < loceol && REGINCLASS(p, (U8*)scan)) 4083*0Sstevel@tonic-gate scan++; 4084*0Sstevel@tonic-gate } 4085*0Sstevel@tonic-gate break; 4086*0Sstevel@tonic-gate case ALNUM: 4087*0Sstevel@tonic-gate if (do_utf8) { 4088*0Sstevel@tonic-gate loceol = PL_regeol; 4089*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a"); 4090*0Sstevel@tonic-gate while (hardcount < max && scan < loceol && 4091*0Sstevel@tonic-gate swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) { 4092*0Sstevel@tonic-gate scan += UTF8SKIP(scan); 4093*0Sstevel@tonic-gate hardcount++; 4094*0Sstevel@tonic-gate } 4095*0Sstevel@tonic-gate } else { 4096*0Sstevel@tonic-gate while (scan < loceol && isALNUM(*scan)) 4097*0Sstevel@tonic-gate scan++; 4098*0Sstevel@tonic-gate } 4099*0Sstevel@tonic-gate break; 4100*0Sstevel@tonic-gate case ALNUML: 4101*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 4102*0Sstevel@tonic-gate if (do_utf8) { 4103*0Sstevel@tonic-gate loceol = PL_regeol; 4104*0Sstevel@tonic-gate while (hardcount < max && scan < loceol && 4105*0Sstevel@tonic-gate isALNUM_LC_utf8((U8*)scan)) { 4106*0Sstevel@tonic-gate scan += UTF8SKIP(scan); 4107*0Sstevel@tonic-gate hardcount++; 4108*0Sstevel@tonic-gate } 4109*0Sstevel@tonic-gate } else { 4110*0Sstevel@tonic-gate while (scan < loceol && isALNUM_LC(*scan)) 4111*0Sstevel@tonic-gate scan++; 4112*0Sstevel@tonic-gate } 4113*0Sstevel@tonic-gate break; 4114*0Sstevel@tonic-gate case NALNUM: 4115*0Sstevel@tonic-gate if (do_utf8) { 4116*0Sstevel@tonic-gate loceol = PL_regeol; 4117*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(alnum,"a"); 4118*0Sstevel@tonic-gate while (hardcount < max && scan < loceol && 4119*0Sstevel@tonic-gate !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) { 4120*0Sstevel@tonic-gate scan += UTF8SKIP(scan); 4121*0Sstevel@tonic-gate hardcount++; 4122*0Sstevel@tonic-gate } 4123*0Sstevel@tonic-gate } else { 4124*0Sstevel@tonic-gate while (scan < loceol && !isALNUM(*scan)) 4125*0Sstevel@tonic-gate scan++; 4126*0Sstevel@tonic-gate } 4127*0Sstevel@tonic-gate break; 4128*0Sstevel@tonic-gate case NALNUML: 4129*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 4130*0Sstevel@tonic-gate if (do_utf8) { 4131*0Sstevel@tonic-gate loceol = PL_regeol; 4132*0Sstevel@tonic-gate while (hardcount < max && scan < loceol && 4133*0Sstevel@tonic-gate !isALNUM_LC_utf8((U8*)scan)) { 4134*0Sstevel@tonic-gate scan += UTF8SKIP(scan); 4135*0Sstevel@tonic-gate hardcount++; 4136*0Sstevel@tonic-gate } 4137*0Sstevel@tonic-gate } else { 4138*0Sstevel@tonic-gate while (scan < loceol && !isALNUM_LC(*scan)) 4139*0Sstevel@tonic-gate scan++; 4140*0Sstevel@tonic-gate } 4141*0Sstevel@tonic-gate break; 4142*0Sstevel@tonic-gate case SPACE: 4143*0Sstevel@tonic-gate if (do_utf8) { 4144*0Sstevel@tonic-gate loceol = PL_regeol; 4145*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(space," "); 4146*0Sstevel@tonic-gate while (hardcount < max && scan < loceol && 4147*0Sstevel@tonic-gate (*scan == ' ' || 4148*0Sstevel@tonic-gate swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) { 4149*0Sstevel@tonic-gate scan += UTF8SKIP(scan); 4150*0Sstevel@tonic-gate hardcount++; 4151*0Sstevel@tonic-gate } 4152*0Sstevel@tonic-gate } else { 4153*0Sstevel@tonic-gate while (scan < loceol && isSPACE(*scan)) 4154*0Sstevel@tonic-gate scan++; 4155*0Sstevel@tonic-gate } 4156*0Sstevel@tonic-gate break; 4157*0Sstevel@tonic-gate case SPACEL: 4158*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 4159*0Sstevel@tonic-gate if (do_utf8) { 4160*0Sstevel@tonic-gate loceol = PL_regeol; 4161*0Sstevel@tonic-gate while (hardcount < max && scan < loceol && 4162*0Sstevel@tonic-gate (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) { 4163*0Sstevel@tonic-gate scan += UTF8SKIP(scan); 4164*0Sstevel@tonic-gate hardcount++; 4165*0Sstevel@tonic-gate } 4166*0Sstevel@tonic-gate } else { 4167*0Sstevel@tonic-gate while (scan < loceol && isSPACE_LC(*scan)) 4168*0Sstevel@tonic-gate scan++; 4169*0Sstevel@tonic-gate } 4170*0Sstevel@tonic-gate break; 4171*0Sstevel@tonic-gate case NSPACE: 4172*0Sstevel@tonic-gate if (do_utf8) { 4173*0Sstevel@tonic-gate loceol = PL_regeol; 4174*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(space," "); 4175*0Sstevel@tonic-gate while (hardcount < max && scan < loceol && 4176*0Sstevel@tonic-gate !(*scan == ' ' || 4177*0Sstevel@tonic-gate swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) { 4178*0Sstevel@tonic-gate scan += UTF8SKIP(scan); 4179*0Sstevel@tonic-gate hardcount++; 4180*0Sstevel@tonic-gate } 4181*0Sstevel@tonic-gate } else { 4182*0Sstevel@tonic-gate while (scan < loceol && !isSPACE(*scan)) 4183*0Sstevel@tonic-gate scan++; 4184*0Sstevel@tonic-gate break; 4185*0Sstevel@tonic-gate } 4186*0Sstevel@tonic-gate case NSPACEL: 4187*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 4188*0Sstevel@tonic-gate if (do_utf8) { 4189*0Sstevel@tonic-gate loceol = PL_regeol; 4190*0Sstevel@tonic-gate while (hardcount < max && scan < loceol && 4191*0Sstevel@tonic-gate !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) { 4192*0Sstevel@tonic-gate scan += UTF8SKIP(scan); 4193*0Sstevel@tonic-gate hardcount++; 4194*0Sstevel@tonic-gate } 4195*0Sstevel@tonic-gate } else { 4196*0Sstevel@tonic-gate while (scan < loceol && !isSPACE_LC(*scan)) 4197*0Sstevel@tonic-gate scan++; 4198*0Sstevel@tonic-gate } 4199*0Sstevel@tonic-gate break; 4200*0Sstevel@tonic-gate case DIGIT: 4201*0Sstevel@tonic-gate if (do_utf8) { 4202*0Sstevel@tonic-gate loceol = PL_regeol; 4203*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(digit,"0"); 4204*0Sstevel@tonic-gate while (hardcount < max && scan < loceol && 4205*0Sstevel@tonic-gate swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) { 4206*0Sstevel@tonic-gate scan += UTF8SKIP(scan); 4207*0Sstevel@tonic-gate hardcount++; 4208*0Sstevel@tonic-gate } 4209*0Sstevel@tonic-gate } else { 4210*0Sstevel@tonic-gate while (scan < loceol && isDIGIT(*scan)) 4211*0Sstevel@tonic-gate scan++; 4212*0Sstevel@tonic-gate } 4213*0Sstevel@tonic-gate break; 4214*0Sstevel@tonic-gate case NDIGIT: 4215*0Sstevel@tonic-gate if (do_utf8) { 4216*0Sstevel@tonic-gate loceol = PL_regeol; 4217*0Sstevel@tonic-gate LOAD_UTF8_CHARCLASS(digit,"0"); 4218*0Sstevel@tonic-gate while (hardcount < max && scan < loceol && 4219*0Sstevel@tonic-gate !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) { 4220*0Sstevel@tonic-gate scan += UTF8SKIP(scan); 4221*0Sstevel@tonic-gate hardcount++; 4222*0Sstevel@tonic-gate } 4223*0Sstevel@tonic-gate } else { 4224*0Sstevel@tonic-gate while (scan < loceol && !isDIGIT(*scan)) 4225*0Sstevel@tonic-gate scan++; 4226*0Sstevel@tonic-gate } 4227*0Sstevel@tonic-gate break; 4228*0Sstevel@tonic-gate default: /* Called on something of 0 width. */ 4229*0Sstevel@tonic-gate break; /* So match right here or not at all. */ 4230*0Sstevel@tonic-gate } 4231*0Sstevel@tonic-gate 4232*0Sstevel@tonic-gate if (hardcount) 4233*0Sstevel@tonic-gate c = hardcount; 4234*0Sstevel@tonic-gate else 4235*0Sstevel@tonic-gate c = scan - PL_reginput; 4236*0Sstevel@tonic-gate PL_reginput = scan; 4237*0Sstevel@tonic-gate 4238*0Sstevel@tonic-gate DEBUG_r( 4239*0Sstevel@tonic-gate { 4240*0Sstevel@tonic-gate SV *prop = sv_newmortal(); 4241*0Sstevel@tonic-gate 4242*0Sstevel@tonic-gate regprop(prop, p); 4243*0Sstevel@tonic-gate PerlIO_printf(Perl_debug_log, 4244*0Sstevel@tonic-gate "%*s %s can match %"IVdf" times out of %"IVdf"...\n", 4245*0Sstevel@tonic-gate REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max); 4246*0Sstevel@tonic-gate }); 4247*0Sstevel@tonic-gate 4248*0Sstevel@tonic-gate return(c); 4249*0Sstevel@tonic-gate } 4250*0Sstevel@tonic-gate 4251*0Sstevel@tonic-gate /* 4252*0Sstevel@tonic-gate - regrepeat_hard - repeatedly match something, report total lenth and length 4253*0Sstevel@tonic-gate * 4254*0Sstevel@tonic-gate * The repeater is supposed to have constant length. 4255*0Sstevel@tonic-gate */ 4256*0Sstevel@tonic-gate 4257*0Sstevel@tonic-gate STATIC I32 4258*0Sstevel@tonic-gate S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp) 4259*0Sstevel@tonic-gate { 4260*0Sstevel@tonic-gate register char *scan = Nullch; 4261*0Sstevel@tonic-gate register char *start; 4262*0Sstevel@tonic-gate register char *loceol = PL_regeol; 4263*0Sstevel@tonic-gate I32 l = 0; 4264*0Sstevel@tonic-gate I32 count = 0, res = 1; 4265*0Sstevel@tonic-gate 4266*0Sstevel@tonic-gate if (!max) 4267*0Sstevel@tonic-gate return 0; 4268*0Sstevel@tonic-gate 4269*0Sstevel@tonic-gate start = PL_reginput; 4270*0Sstevel@tonic-gate if (PL_reg_match_utf8) { 4271*0Sstevel@tonic-gate while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) { 4272*0Sstevel@tonic-gate if (!count++) { 4273*0Sstevel@tonic-gate l = 0; 4274*0Sstevel@tonic-gate while (start < PL_reginput) { 4275*0Sstevel@tonic-gate l++; 4276*0Sstevel@tonic-gate start += UTF8SKIP(start); 4277*0Sstevel@tonic-gate } 4278*0Sstevel@tonic-gate *lp = l; 4279*0Sstevel@tonic-gate if (l == 0) 4280*0Sstevel@tonic-gate return max; 4281*0Sstevel@tonic-gate } 4282*0Sstevel@tonic-gate if (count == max) 4283*0Sstevel@tonic-gate return count; 4284*0Sstevel@tonic-gate } 4285*0Sstevel@tonic-gate } 4286*0Sstevel@tonic-gate else { 4287*0Sstevel@tonic-gate while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) { 4288*0Sstevel@tonic-gate if (!count++) { 4289*0Sstevel@tonic-gate *lp = l = PL_reginput - start; 4290*0Sstevel@tonic-gate if (max != REG_INFTY && l*max < loceol - scan) 4291*0Sstevel@tonic-gate loceol = scan + l*max; 4292*0Sstevel@tonic-gate if (l == 0) 4293*0Sstevel@tonic-gate return max; 4294*0Sstevel@tonic-gate } 4295*0Sstevel@tonic-gate } 4296*0Sstevel@tonic-gate } 4297*0Sstevel@tonic-gate if (!res) 4298*0Sstevel@tonic-gate PL_reginput = scan; 4299*0Sstevel@tonic-gate 4300*0Sstevel@tonic-gate return count; 4301*0Sstevel@tonic-gate } 4302*0Sstevel@tonic-gate 4303*0Sstevel@tonic-gate /* 4304*0Sstevel@tonic-gate - regclass_swash - prepare the utf8 swash 4305*0Sstevel@tonic-gate */ 4306*0Sstevel@tonic-gate 4307*0Sstevel@tonic-gate SV * 4308*0Sstevel@tonic-gate Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp) 4309*0Sstevel@tonic-gate { 4310*0Sstevel@tonic-gate SV *sw = NULL; 4311*0Sstevel@tonic-gate SV *si = NULL; 4312*0Sstevel@tonic-gate SV *alt = NULL; 4313*0Sstevel@tonic-gate 4314*0Sstevel@tonic-gate if (PL_regdata && PL_regdata->count) { 4315*0Sstevel@tonic-gate U32 n = ARG(node); 4316*0Sstevel@tonic-gate 4317*0Sstevel@tonic-gate if (PL_regdata->what[n] == 's') { 4318*0Sstevel@tonic-gate SV *rv = (SV*)PL_regdata->data[n]; 4319*0Sstevel@tonic-gate AV *av = (AV*)SvRV((SV*)rv); 4320*0Sstevel@tonic-gate SV **ary = AvARRAY(av); 4321*0Sstevel@tonic-gate SV **a, **b; 4322*0Sstevel@tonic-gate 4323*0Sstevel@tonic-gate /* See the end of regcomp.c:S_reglass() for 4324*0Sstevel@tonic-gate * documentation of these array elements. */ 4325*0Sstevel@tonic-gate 4326*0Sstevel@tonic-gate si = *ary; 4327*0Sstevel@tonic-gate a = SvTYPE(ary[1]) == SVt_RV ? &ary[1] : 0; 4328*0Sstevel@tonic-gate b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0; 4329*0Sstevel@tonic-gate 4330*0Sstevel@tonic-gate if (a) 4331*0Sstevel@tonic-gate sw = *a; 4332*0Sstevel@tonic-gate else if (si && doinit) { 4333*0Sstevel@tonic-gate sw = swash_init("utf8", "", si, 1, 0); 4334*0Sstevel@tonic-gate (void)av_store(av, 1, sw); 4335*0Sstevel@tonic-gate } 4336*0Sstevel@tonic-gate if (b) 4337*0Sstevel@tonic-gate alt = *b; 4338*0Sstevel@tonic-gate } 4339*0Sstevel@tonic-gate } 4340*0Sstevel@tonic-gate 4341*0Sstevel@tonic-gate if (listsvp) 4342*0Sstevel@tonic-gate *listsvp = si; 4343*0Sstevel@tonic-gate if (altsvp) 4344*0Sstevel@tonic-gate *altsvp = alt; 4345*0Sstevel@tonic-gate 4346*0Sstevel@tonic-gate return sw; 4347*0Sstevel@tonic-gate } 4348*0Sstevel@tonic-gate 4349*0Sstevel@tonic-gate /* 4350*0Sstevel@tonic-gate - reginclass - determine if a character falls into a character class 4351*0Sstevel@tonic-gate 4352*0Sstevel@tonic-gate The n is the ANYOF regnode, the p is the target string, lenp 4353*0Sstevel@tonic-gate is pointer to the maximum length of how far to go in the p 4354*0Sstevel@tonic-gate (if the lenp is zero, UTF8SKIP(p) is used), 4355*0Sstevel@tonic-gate do_utf8 tells whether the target string is in UTF-8. 4356*0Sstevel@tonic-gate 4357*0Sstevel@tonic-gate */ 4358*0Sstevel@tonic-gate 4359*0Sstevel@tonic-gate STATIC bool 4360*0Sstevel@tonic-gate S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8) 4361*0Sstevel@tonic-gate { 4362*0Sstevel@tonic-gate char flags = ANYOF_FLAGS(n); 4363*0Sstevel@tonic-gate bool match = FALSE; 4364*0Sstevel@tonic-gate UV c = *p; 4365*0Sstevel@tonic-gate STRLEN len = 0; 4366*0Sstevel@tonic-gate STRLEN plen; 4367*0Sstevel@tonic-gate 4368*0Sstevel@tonic-gate if (do_utf8 && !UTF8_IS_INVARIANT(c)) 4369*0Sstevel@tonic-gate c = utf8n_to_uvchr(p, UTF8_MAXLEN, &len, 4370*0Sstevel@tonic-gate ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 4371*0Sstevel@tonic-gate 4372*0Sstevel@tonic-gate plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c)); 4373*0Sstevel@tonic-gate if (do_utf8 || (flags & ANYOF_UNICODE)) { 4374*0Sstevel@tonic-gate if (lenp) 4375*0Sstevel@tonic-gate *lenp = 0; 4376*0Sstevel@tonic-gate if (do_utf8 && !ANYOF_RUNTIME(n)) { 4377*0Sstevel@tonic-gate if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c)) 4378*0Sstevel@tonic-gate match = TRUE; 4379*0Sstevel@tonic-gate } 4380*0Sstevel@tonic-gate if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256) 4381*0Sstevel@tonic-gate match = TRUE; 4382*0Sstevel@tonic-gate if (!match) { 4383*0Sstevel@tonic-gate AV *av; 4384*0Sstevel@tonic-gate SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av); 4385*0Sstevel@tonic-gate 4386*0Sstevel@tonic-gate if (sw) { 4387*0Sstevel@tonic-gate if (swash_fetch(sw, p, do_utf8)) 4388*0Sstevel@tonic-gate match = TRUE; 4389*0Sstevel@tonic-gate else if (flags & ANYOF_FOLD) { 4390*0Sstevel@tonic-gate if (!match && lenp && av) { 4391*0Sstevel@tonic-gate I32 i; 4392*0Sstevel@tonic-gate 4393*0Sstevel@tonic-gate for (i = 0; i <= av_len(av); i++) { 4394*0Sstevel@tonic-gate SV* sv = *av_fetch(av, i, FALSE); 4395*0Sstevel@tonic-gate STRLEN len; 4396*0Sstevel@tonic-gate char *s = SvPV(sv, len); 4397*0Sstevel@tonic-gate 4398*0Sstevel@tonic-gate if (len <= plen && memEQ(s, (char*)p, len)) { 4399*0Sstevel@tonic-gate *lenp = len; 4400*0Sstevel@tonic-gate match = TRUE; 4401*0Sstevel@tonic-gate break; 4402*0Sstevel@tonic-gate } 4403*0Sstevel@tonic-gate } 4404*0Sstevel@tonic-gate } 4405*0Sstevel@tonic-gate if (!match) { 4406*0Sstevel@tonic-gate U8 tmpbuf[UTF8_MAXLEN_FOLD+1]; 4407*0Sstevel@tonic-gate STRLEN tmplen; 4408*0Sstevel@tonic-gate 4409*0Sstevel@tonic-gate to_utf8_fold(p, tmpbuf, &tmplen); 4410*0Sstevel@tonic-gate if (swash_fetch(sw, tmpbuf, do_utf8)) 4411*0Sstevel@tonic-gate match = TRUE; 4412*0Sstevel@tonic-gate } 4413*0Sstevel@tonic-gate } 4414*0Sstevel@tonic-gate } 4415*0Sstevel@tonic-gate } 4416*0Sstevel@tonic-gate if (match && lenp && *lenp == 0) 4417*0Sstevel@tonic-gate *lenp = UNISKIP(NATIVE_TO_UNI(c)); 4418*0Sstevel@tonic-gate } 4419*0Sstevel@tonic-gate if (!match && c < 256) { 4420*0Sstevel@tonic-gate if (ANYOF_BITMAP_TEST(n, c)) 4421*0Sstevel@tonic-gate match = TRUE; 4422*0Sstevel@tonic-gate else if (flags & ANYOF_FOLD) { 4423*0Sstevel@tonic-gate U8 f; 4424*0Sstevel@tonic-gate 4425*0Sstevel@tonic-gate if (flags & ANYOF_LOCALE) { 4426*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 4427*0Sstevel@tonic-gate f = PL_fold_locale[c]; 4428*0Sstevel@tonic-gate } 4429*0Sstevel@tonic-gate else 4430*0Sstevel@tonic-gate f = PL_fold[c]; 4431*0Sstevel@tonic-gate if (f != c && ANYOF_BITMAP_TEST(n, f)) 4432*0Sstevel@tonic-gate match = TRUE; 4433*0Sstevel@tonic-gate } 4434*0Sstevel@tonic-gate 4435*0Sstevel@tonic-gate if (!match && (flags & ANYOF_CLASS)) { 4436*0Sstevel@tonic-gate PL_reg_flags |= RF_tainted; 4437*0Sstevel@tonic-gate if ( 4438*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) || 4439*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) || 4440*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) || 4441*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) || 4442*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) || 4443*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) || 4444*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) || 4445*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) || 4446*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) || 4447*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) || 4448*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) || 4449*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) || 4450*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) || 4451*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) || 4452*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) || 4453*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) || 4454*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) || 4455*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) || 4456*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) || 4457*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) || 4458*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) || 4459*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) || 4460*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) || 4461*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) || 4462*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) || 4463*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) || 4464*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) || 4465*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) || 4466*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) || 4467*0Sstevel@tonic-gate (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c)) 4468*0Sstevel@tonic-gate ) /* How's that for a conditional? */ 4469*0Sstevel@tonic-gate { 4470*0Sstevel@tonic-gate match = TRUE; 4471*0Sstevel@tonic-gate } 4472*0Sstevel@tonic-gate } 4473*0Sstevel@tonic-gate } 4474*0Sstevel@tonic-gate 4475*0Sstevel@tonic-gate return (flags & ANYOF_INVERT) ? !match : match; 4476*0Sstevel@tonic-gate } 4477*0Sstevel@tonic-gate 4478*0Sstevel@tonic-gate STATIC U8 * 4479*0Sstevel@tonic-gate S_reghop(pTHX_ U8 *s, I32 off) 4480*0Sstevel@tonic-gate { 4481*0Sstevel@tonic-gate return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)); 4482*0Sstevel@tonic-gate } 4483*0Sstevel@tonic-gate 4484*0Sstevel@tonic-gate STATIC U8 * 4485*0Sstevel@tonic-gate S_reghop3(pTHX_ U8 *s, I32 off, U8* lim) 4486*0Sstevel@tonic-gate { 4487*0Sstevel@tonic-gate if (off >= 0) { 4488*0Sstevel@tonic-gate while (off-- && s < lim) { 4489*0Sstevel@tonic-gate /* XXX could check well-formedness here */ 4490*0Sstevel@tonic-gate s += UTF8SKIP(s); 4491*0Sstevel@tonic-gate } 4492*0Sstevel@tonic-gate } 4493*0Sstevel@tonic-gate else { 4494*0Sstevel@tonic-gate while (off++) { 4495*0Sstevel@tonic-gate if (s > lim) { 4496*0Sstevel@tonic-gate s--; 4497*0Sstevel@tonic-gate if (UTF8_IS_CONTINUED(*s)) { 4498*0Sstevel@tonic-gate while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s)) 4499*0Sstevel@tonic-gate s--; 4500*0Sstevel@tonic-gate } 4501*0Sstevel@tonic-gate /* XXX could check well-formedness here */ 4502*0Sstevel@tonic-gate } 4503*0Sstevel@tonic-gate } 4504*0Sstevel@tonic-gate } 4505*0Sstevel@tonic-gate return s; 4506*0Sstevel@tonic-gate } 4507*0Sstevel@tonic-gate 4508*0Sstevel@tonic-gate STATIC U8 * 4509*0Sstevel@tonic-gate S_reghopmaybe(pTHX_ U8 *s, I32 off) 4510*0Sstevel@tonic-gate { 4511*0Sstevel@tonic-gate return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)); 4512*0Sstevel@tonic-gate } 4513*0Sstevel@tonic-gate 4514*0Sstevel@tonic-gate STATIC U8 * 4515*0Sstevel@tonic-gate S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim) 4516*0Sstevel@tonic-gate { 4517*0Sstevel@tonic-gate if (off >= 0) { 4518*0Sstevel@tonic-gate while (off-- && s < lim) { 4519*0Sstevel@tonic-gate /* XXX could check well-formedness here */ 4520*0Sstevel@tonic-gate s += UTF8SKIP(s); 4521*0Sstevel@tonic-gate } 4522*0Sstevel@tonic-gate if (off >= 0) 4523*0Sstevel@tonic-gate return 0; 4524*0Sstevel@tonic-gate } 4525*0Sstevel@tonic-gate else { 4526*0Sstevel@tonic-gate while (off++) { 4527*0Sstevel@tonic-gate if (s > lim) { 4528*0Sstevel@tonic-gate s--; 4529*0Sstevel@tonic-gate if (UTF8_IS_CONTINUED(*s)) { 4530*0Sstevel@tonic-gate while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s)) 4531*0Sstevel@tonic-gate s--; 4532*0Sstevel@tonic-gate } 4533*0Sstevel@tonic-gate /* XXX could check well-formedness here */ 4534*0Sstevel@tonic-gate } 4535*0Sstevel@tonic-gate else 4536*0Sstevel@tonic-gate break; 4537*0Sstevel@tonic-gate } 4538*0Sstevel@tonic-gate if (off <= 0) 4539*0Sstevel@tonic-gate return 0; 4540*0Sstevel@tonic-gate } 4541*0Sstevel@tonic-gate return s; 4542*0Sstevel@tonic-gate } 4543*0Sstevel@tonic-gate 4544*0Sstevel@tonic-gate static void 4545*0Sstevel@tonic-gate restore_pos(pTHX_ void *arg) 4546*0Sstevel@tonic-gate { 4547*0Sstevel@tonic-gate if (PL_reg_eval_set) { 4548*0Sstevel@tonic-gate if (PL_reg_oldsaved) { 4549*0Sstevel@tonic-gate PL_reg_re->subbeg = PL_reg_oldsaved; 4550*0Sstevel@tonic-gate PL_reg_re->sublen = PL_reg_oldsavedlen; 4551*0Sstevel@tonic-gate RX_MATCH_COPIED_on(PL_reg_re); 4552*0Sstevel@tonic-gate } 4553*0Sstevel@tonic-gate PL_reg_magic->mg_len = PL_reg_oldpos; 4554*0Sstevel@tonic-gate PL_reg_eval_set = 0; 4555*0Sstevel@tonic-gate PL_curpm = PL_reg_oldcurpm; 4556*0Sstevel@tonic-gate } 4557*0Sstevel@tonic-gate } 4558*0Sstevel@tonic-gate 4559*0Sstevel@tonic-gate STATIC void 4560*0Sstevel@tonic-gate S_to_utf8_substr(pTHX_ register regexp *prog) 4561*0Sstevel@tonic-gate { 4562*0Sstevel@tonic-gate SV* sv; 4563*0Sstevel@tonic-gate if (prog->float_substr && !prog->float_utf8) { 4564*0Sstevel@tonic-gate prog->float_utf8 = sv = NEWSV(117, 0); 4565*0Sstevel@tonic-gate SvSetSV(sv, prog->float_substr); 4566*0Sstevel@tonic-gate sv_utf8_upgrade(sv); 4567*0Sstevel@tonic-gate if (SvTAIL(prog->float_substr)) 4568*0Sstevel@tonic-gate SvTAIL_on(sv); 4569*0Sstevel@tonic-gate if (prog->float_substr == prog->check_substr) 4570*0Sstevel@tonic-gate prog->check_utf8 = sv; 4571*0Sstevel@tonic-gate } 4572*0Sstevel@tonic-gate if (prog->anchored_substr && !prog->anchored_utf8) { 4573*0Sstevel@tonic-gate prog->anchored_utf8 = sv = NEWSV(118, 0); 4574*0Sstevel@tonic-gate SvSetSV(sv, prog->anchored_substr); 4575*0Sstevel@tonic-gate sv_utf8_upgrade(sv); 4576*0Sstevel@tonic-gate if (SvTAIL(prog->anchored_substr)) 4577*0Sstevel@tonic-gate SvTAIL_on(sv); 4578*0Sstevel@tonic-gate if (prog->anchored_substr == prog->check_substr) 4579*0Sstevel@tonic-gate prog->check_utf8 = sv; 4580*0Sstevel@tonic-gate } 4581*0Sstevel@tonic-gate } 4582*0Sstevel@tonic-gate 4583*0Sstevel@tonic-gate STATIC void 4584*0Sstevel@tonic-gate S_to_byte_substr(pTHX_ register regexp *prog) 4585*0Sstevel@tonic-gate { 4586*0Sstevel@tonic-gate SV* sv; 4587*0Sstevel@tonic-gate if (prog->float_utf8 && !prog->float_substr) { 4588*0Sstevel@tonic-gate prog->float_substr = sv = NEWSV(117, 0); 4589*0Sstevel@tonic-gate SvSetSV(sv, prog->float_utf8); 4590*0Sstevel@tonic-gate if (sv_utf8_downgrade(sv, TRUE)) { 4591*0Sstevel@tonic-gate if (SvTAIL(prog->float_utf8)) 4592*0Sstevel@tonic-gate SvTAIL_on(sv); 4593*0Sstevel@tonic-gate } else { 4594*0Sstevel@tonic-gate SvREFCNT_dec(sv); 4595*0Sstevel@tonic-gate prog->float_substr = sv = &PL_sv_undef; 4596*0Sstevel@tonic-gate } 4597*0Sstevel@tonic-gate if (prog->float_utf8 == prog->check_utf8) 4598*0Sstevel@tonic-gate prog->check_substr = sv; 4599*0Sstevel@tonic-gate } 4600*0Sstevel@tonic-gate if (prog->anchored_utf8 && !prog->anchored_substr) { 4601*0Sstevel@tonic-gate prog->anchored_substr = sv = NEWSV(118, 0); 4602*0Sstevel@tonic-gate SvSetSV(sv, prog->anchored_utf8); 4603*0Sstevel@tonic-gate if (sv_utf8_downgrade(sv, TRUE)) { 4604*0Sstevel@tonic-gate if (SvTAIL(prog->anchored_utf8)) 4605*0Sstevel@tonic-gate SvTAIL_on(sv); 4606*0Sstevel@tonic-gate } else { 4607*0Sstevel@tonic-gate SvREFCNT_dec(sv); 4608*0Sstevel@tonic-gate prog->anchored_substr = sv = &PL_sv_undef; 4609*0Sstevel@tonic-gate } 4610*0Sstevel@tonic-gate if (prog->anchored_utf8 == prog->check_utf8) 4611*0Sstevel@tonic-gate prog->check_substr = sv; 4612*0Sstevel@tonic-gate } 4613*0Sstevel@tonic-gate } 4614