1 /* toke.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * 'It all comes from here, the stench and the peril.' --Frodo 13 * 14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"] 15 */ 16 17 /* 18 * This file is the lexer for Perl. It's closely linked to the 19 * parser, perly.y. 20 * 21 * The main routine is yylex(), which returns the next token. 22 */ 23 24 /* 25 =head1 Lexer interface 26 This is the lower layer of the Perl parser, managing characters and tokens. 27 28 =for apidoc AmnU|yy_parser *|PL_parser 29 30 Pointer to a structure encapsulating the state of the parsing operation 31 currently in progress. The pointer can be locally changed to perform 32 a nested parse without interfering with the state of an outer parse. 33 Individual members of C<PL_parser> have their own documentation. 34 35 =cut 36 */ 37 38 #include "EXTERN.h" 39 #define PERL_IN_TOKE_C 40 #include "perl.h" 41 #include "invlist_inline.h" 42 43 #define new_constant(a,b,c,d,e,f,g, h) \ 44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h) 45 46 #define pl_yylval (PL_parser->yylval) 47 48 /* XXX temporary backwards compatibility */ 49 #define PL_lex_brackets (PL_parser->lex_brackets) 50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets) 51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof) 52 #define PL_lex_brackstack (PL_parser->lex_brackstack) 53 #define PL_lex_casemods (PL_parser->lex_casemods) 54 #define PL_lex_casestack (PL_parser->lex_casestack) 55 #define PL_lex_dojoin (PL_parser->lex_dojoin) 56 #define PL_lex_formbrack (PL_parser->lex_formbrack) 57 #define PL_lex_inpat (PL_parser->lex_inpat) 58 #define PL_lex_inwhat (PL_parser->lex_inwhat) 59 #define PL_lex_op (PL_parser->lex_op) 60 #define PL_lex_repl (PL_parser->lex_repl) 61 #define PL_lex_starts (PL_parser->lex_starts) 62 #define PL_lex_stuff (PL_parser->lex_stuff) 63 #define PL_multi_start (PL_parser->multi_start) 64 #define PL_multi_open (PL_parser->multi_open) 65 #define PL_multi_close (PL_parser->multi_close) 66 #define PL_preambled (PL_parser->preambled) 67 #define PL_linestr (PL_parser->linestr) 68 #define PL_expect (PL_parser->expect) 69 #define PL_copline (PL_parser->copline) 70 #define PL_bufptr (PL_parser->bufptr) 71 #define PL_oldbufptr (PL_parser->oldbufptr) 72 #define PL_oldoldbufptr (PL_parser->oldoldbufptr) 73 #define PL_linestart (PL_parser->linestart) 74 #define PL_bufend (PL_parser->bufend) 75 #define PL_last_uni (PL_parser->last_uni) 76 #define PL_last_lop (PL_parser->last_lop) 77 #define PL_last_lop_op (PL_parser->last_lop_op) 78 #define PL_lex_state (PL_parser->lex_state) 79 #define PL_rsfp (PL_parser->rsfp) 80 #define PL_rsfp_filters (PL_parser->rsfp_filters) 81 #define PL_in_my (PL_parser->in_my) 82 #define PL_in_my_stash (PL_parser->in_my_stash) 83 #define PL_tokenbuf (PL_parser->tokenbuf) 84 #define PL_multi_end (PL_parser->multi_end) 85 #define PL_error_count (PL_parser->error_count) 86 87 # define PL_nexttoke (PL_parser->nexttoke) 88 # define PL_nexttype (PL_parser->nexttype) 89 # define PL_nextval (PL_parser->nextval) 90 91 92 #define SvEVALED(sv) \ 93 (SvTYPE(sv) >= SVt_PVNV \ 94 && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen) 95 96 static const char* const ident_too_long = "Identifier too long"; 97 static const char* const ident_var_zero_multi_digit = "Numeric variables with more than one digit may not start with '0'"; 98 99 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke] 100 101 #define XENUMMASK 0x3f 102 #define XFAKEEOF 0x40 103 #define XFAKEBRACK 0x80 104 105 #ifdef USE_UTF8_SCRIPTS 106 # define UTF cBOOL(!IN_BYTES) 107 #else 108 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8))) 109 #endif 110 111 /* The maximum number of characters preceding the unrecognized one to display */ 112 #define UNRECOGNIZED_PRECEDE_COUNT 10 113 114 /* In variables named $^X, these are the legal values for X. 115 * 1999-02-27 mjd-perl-patch@plover.com */ 116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x))) 117 118 #define SPACE_OR_TAB(c) isBLANK_A(c) 119 120 #define HEXFP_PEEK(s) \ 121 (((s[0] == '.') && \ 122 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \ 123 isALPHA_FOLD_EQ(s[0], 'p')) 124 125 /* LEX_* are values for PL_lex_state, the state of the lexer. 126 * They are arranged oddly so that the guard on the switch statement 127 * can get by with a single comparison (if the compiler is smart enough). 128 * 129 * These values refer to the various states within a sublex parse, 130 * i.e. within a double quotish string 131 */ 132 133 /* #define LEX_NOTPARSING 11 is done in perl.h. */ 134 135 #define LEX_NORMAL 10 /* normal code (ie not within "...") */ 136 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */ 137 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */ 138 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */ 139 #define LEX_INTERPSTART 6 /* expecting the start of a $var */ 140 141 /* at end of code, eg "$x" followed by: */ 142 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */ 143 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */ 144 145 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of 146 string or after \E, $foo, etc */ 147 #define LEX_INTERPCONST 2 /* NOT USED */ 148 #define LEX_FORMLINE 1 /* expecting a format line */ 149 150 /* returned to yyl_try() to request it to retry the parse loop, expected to only 151 be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof() 152 can also return it. 153 154 yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1, 155 other token values are 258 or higher (see perly.h), so -1 should be 156 a safe value here. 157 */ 158 #define YYL_RETRY (-1) 159 160 #ifdef DEBUGGING 161 static const char* const lex_state_names[] = { 162 "KNOWNEXT", 163 "FORMLINE", 164 "INTERPCONST", 165 "INTERPCONCAT", 166 "INTERPENDMAYBE", 167 "INTERPEND", 168 "INTERPSTART", 169 "INTERPPUSH", 170 "INTERPCASEMOD", 171 "INTERPNORMAL", 172 "NORMAL" 173 }; 174 #endif 175 176 #include "keywords.h" 177 178 /* CLINE is a macro that ensures PL_copline has a sane value */ 179 180 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline)) 181 182 /* 183 * Convenience functions to return different tokens and prime the 184 * lexer for the next token. They all take an argument. 185 * 186 * TOKEN : generic token (used for '(', DOLSHARP, etc) 187 * OPERATOR : generic operator 188 * AOPERATOR : assignment operator 189 * PREBLOCK : beginning the block after an if, while, foreach, ... 190 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref) 191 * PREREF : *EXPR where EXPR is not a simple identifier 192 * TERM : expression term 193 * POSTDEREF : postfix dereference (->$* ->@[...] etc.) 194 * LOOPX : loop exiting command (goto, last, dump, etc) 195 * FTST : file test operator 196 * FUN0 : zero-argument function 197 * FUN0OP : zero-argument function, with its op created in this file 198 * FUN1 : not used, except for not, which isn't a UNIOP 199 * BOop : bitwise or or xor 200 * BAop : bitwise and 201 * BCop : bitwise complement 202 * SHop : shift operator 203 * PWop : power operator 204 * PMop : pattern-matching operator 205 * Aop : addition-level operator 206 * AopNOASSIGN : addition-level operator that is never part of .= 207 * Mop : multiplication-level operator 208 * ChEop : chaining equality-testing operator 209 * NCEop : non-chaining comparison operator at equality precedence 210 * ChRop : chaining relational operator <= != gt 211 * NCRop : non-chaining relational operator isa 212 * 213 * Also see LOP and lop() below. 214 */ 215 216 #ifdef DEBUGGING /* Serve -DT. */ 217 # define REPORT(retval) tokereport((I32)retval, &pl_yylval) 218 #else 219 # define REPORT(retval) (retval) 220 #endif 221 222 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval)) 223 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval)) 224 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval)) 225 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval)) 226 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval)) 227 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval)) 228 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval)) 229 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1])) 230 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \ 231 pl_yylval.ival=f, \ 232 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \ 233 REPORT((int)LOOPEX)) 234 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP)) 235 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0)) 236 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP)) 237 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1)) 238 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP)) 239 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP)) 240 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \ 241 REPORT('~') 242 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP)) 243 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP)) 244 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP)) 245 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP)) 246 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP)) 247 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP)) 248 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP)) 249 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP)) 250 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP)) 251 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP)) 252 253 /* This bit of chicanery makes a unary function followed by 254 * a parenthesis into a function with one argument, highest precedence. 255 * The UNIDOR macro is for unary functions that can be followed by the // 256 * operator (such as C<shift // 0>). 257 */ 258 #define UNI3(f,x,have_x) { \ 259 pl_yylval.ival = f; \ 260 if (have_x) PL_expect = x; \ 261 PL_bufptr = s; \ 262 PL_last_uni = PL_oldbufptr; \ 263 PL_last_lop_op = (f) < 0 ? -(f) : (f); \ 264 if (*s == '(') \ 265 return REPORT( (int)FUNC1 ); \ 266 s = skipspace(s); \ 267 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \ 268 } 269 #define UNI(f) UNI3(f,XTERM,1) 270 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1) 271 #define UNIPROTO(f,optional) { \ 272 if (optional) PL_last_uni = PL_oldbufptr; \ 273 OPERATOR(f); \ 274 } 275 276 #define UNIBRACK(f) UNI3(f,0,0) 277 278 /* grandfather return to old style */ 279 #define OLDLOP(f) \ 280 do { \ 281 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \ 282 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \ 283 pl_yylval.ival = (f); \ 284 PL_expect = XTERM; \ 285 PL_bufptr = s; \ 286 return (int)LSTOP; \ 287 } while(0) 288 289 #define COPLINE_INC_WITH_HERELINES \ 290 STMT_START { \ 291 CopLINE_inc(PL_curcop); \ 292 if (PL_parser->herelines) \ 293 CopLINE(PL_curcop) += PL_parser->herelines, \ 294 PL_parser->herelines = 0; \ 295 } STMT_END 296 /* Called after scan_str to update CopLINE(PL_curcop), but only when there 297 * is no sublex_push to follow. */ 298 #define COPLINE_SET_FROM_MULTI_END \ 299 STMT_START { \ 300 CopLINE_set(PL_curcop, PL_multi_end); \ 301 if (PL_multi_end != PL_multi_start) \ 302 PL_parser->herelines = 0; \ 303 } STMT_END 304 305 306 /* A file-local structure for passing around information about subroutines and 307 * related definable words */ 308 struct code { 309 SV *sv; 310 CV *cv; 311 GV *gv, **gvp; 312 OP *rv2cv_op; 313 PADOFFSET off; 314 bool lex; 315 }; 316 317 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE }; 318 319 #ifdef DEBUGGING 320 321 /* how to interpret the pl_yylval associated with the token */ 322 enum token_type { 323 TOKENTYPE_NONE, 324 TOKENTYPE_IVAL, 325 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */ 326 TOKENTYPE_PVAL, 327 TOKENTYPE_OPVAL 328 }; 329 330 static struct debug_tokens { 331 const int token; 332 enum token_type type; 333 const char *name; 334 } const debug_tokens[] = 335 { 336 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" }, 337 { ANDAND, TOKENTYPE_NONE, "ANDAND" }, 338 { ANDOP, TOKENTYPE_NONE, "ANDOP" }, 339 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" }, 340 { ANON_SIGSUB, TOKENTYPE_IVAL, "ANON_SIGSUB" }, 341 { ARROW, TOKENTYPE_NONE, "ARROW" }, 342 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" }, 343 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" }, 344 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" }, 345 { CHEQOP, TOKENTYPE_OPNUM, "CHEQOP" }, 346 { CHRELOP, TOKENTYPE_OPNUM, "CHRELOP" }, 347 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" }, 348 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" }, 349 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" }, 350 { DO, TOKENTYPE_NONE, "DO" }, 351 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" }, 352 { DORDOR, TOKENTYPE_NONE, "DORDOR" }, 353 { DOROP, TOKENTYPE_OPNUM, "DOROP" }, 354 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" }, 355 { ELSE, TOKENTYPE_NONE, "ELSE" }, 356 { ELSIF, TOKENTYPE_IVAL, "ELSIF" }, 357 { FOR, TOKENTYPE_IVAL, "FOR" }, 358 { FORMAT, TOKENTYPE_NONE, "FORMAT" }, 359 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" }, 360 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" }, 361 { FUNC, TOKENTYPE_OPNUM, "FUNC" }, 362 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" }, 363 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" }, 364 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" }, 365 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" }, 366 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" }, 367 { GIVEN, TOKENTYPE_IVAL, "GIVEN" }, 368 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" }, 369 { IF, TOKENTYPE_IVAL, "IF" }, 370 { LABEL, TOKENTYPE_OPVAL, "LABEL" }, 371 { LOCAL, TOKENTYPE_IVAL, "LOCAL" }, 372 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" }, 373 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" }, 374 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" }, 375 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" }, 376 { METHOD, TOKENTYPE_OPVAL, "METHOD" }, 377 { MULOP, TOKENTYPE_OPNUM, "MULOP" }, 378 { MY, TOKENTYPE_IVAL, "MY" }, 379 { NCEQOP, TOKENTYPE_OPNUM, "NCEQOP" }, 380 { NCRELOP, TOKENTYPE_OPNUM, "NCRELOP" }, 381 { NOAMP, TOKENTYPE_NONE, "NOAMP" }, 382 { NOTOP, TOKENTYPE_NONE, "NOTOP" }, 383 { OROP, TOKENTYPE_IVAL, "OROP" }, 384 { OROR, TOKENTYPE_NONE, "OROR" }, 385 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" }, 386 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" }, 387 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" }, 388 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" }, 389 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" }, 390 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" }, 391 { POSTINC, TOKENTYPE_NONE, "POSTINC" }, 392 { POWOP, TOKENTYPE_OPNUM, "POWOP" }, 393 { PREDEC, TOKENTYPE_NONE, "PREDEC" }, 394 { PREINC, TOKENTYPE_NONE, "PREINC" }, 395 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" }, 396 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" }, 397 { REFGEN, TOKENTYPE_NONE, "REFGEN" }, 398 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" }, 399 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" }, 400 { SIGSUB, TOKENTYPE_NONE, "SIGSUB" }, 401 { SUB, TOKENTYPE_NONE, "SUB" }, 402 { SUBLEXEND, TOKENTYPE_NONE, "SUBLEXEND" }, 403 { SUBLEXSTART, TOKENTYPE_NONE, "SUBLEXSTART" }, 404 { THING, TOKENTYPE_OPVAL, "THING" }, 405 { UMINUS, TOKENTYPE_NONE, "UMINUS" }, 406 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" }, 407 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" }, 408 { UNLESS, TOKENTYPE_IVAL, "UNLESS" }, 409 { UNTIL, TOKENTYPE_IVAL, "UNTIL" }, 410 { USE, TOKENTYPE_IVAL, "USE" }, 411 { WHEN, TOKENTYPE_IVAL, "WHEN" }, 412 { WHILE, TOKENTYPE_IVAL, "WHILE" }, 413 { BAREWORD, TOKENTYPE_OPVAL, "BAREWORD" }, 414 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" }, 415 { 0, TOKENTYPE_NONE, NULL } 416 }; 417 418 /* dump the returned token in rv, plus any optional arg in pl_yylval */ 419 420 STATIC int 421 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) 422 { 423 PERL_ARGS_ASSERT_TOKEREPORT; 424 425 if (DEBUG_T_TEST) { 426 const char *name = NULL; 427 enum token_type type = TOKENTYPE_NONE; 428 const struct debug_tokens *p; 429 SV* const report = newSVpvs("<== "); 430 431 for (p = debug_tokens; p->token; p++) { 432 if (p->token == (int)rv) { 433 name = p->name; 434 type = p->type; 435 break; 436 } 437 } 438 if (name) 439 Perl_sv_catpv(aTHX_ report, name); 440 else if (isGRAPH(rv)) 441 { 442 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); 443 if ((char)rv == 'p') 444 sv_catpvs(report, " (pending identifier)"); 445 } 446 else if (!rv) 447 sv_catpvs(report, "EOF"); 448 else 449 Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv); 450 switch (type) { 451 case TOKENTYPE_NONE: 452 break; 453 case TOKENTYPE_IVAL: 454 Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival); 455 break; 456 case TOKENTYPE_OPNUM: 457 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)", 458 PL_op_name[lvalp->ival]); 459 break; 460 case TOKENTYPE_PVAL: 461 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval); 462 break; 463 case TOKENTYPE_OPVAL: 464 if (lvalp->opval) { 465 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)", 466 PL_op_name[lvalp->opval->op_type]); 467 if (lvalp->opval->op_type == OP_CONST) { 468 Perl_sv_catpvf(aTHX_ report, " %s", 469 SvPEEK(cSVOPx_sv(lvalp->opval))); 470 } 471 472 } 473 else 474 sv_catpvs(report, "(opval=null)"); 475 break; 476 } 477 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report)); 478 }; 479 return (int)rv; 480 } 481 482 483 /* print the buffer with suitable escapes */ 484 485 STATIC void 486 S_printbuf(pTHX_ const char *const fmt, const char *const s) 487 { 488 SV* const tmp = newSVpvs(""); 489 490 PERL_ARGS_ASSERT_PRINTBUF; 491 492 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */ 493 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60)); 494 GCC_DIAG_RESTORE_STMT; 495 SvREFCNT_dec(tmp); 496 } 497 498 #endif 499 500 /* 501 * S_ao 502 * 503 * This subroutine looks for an '=' next to the operator that has just been 504 * parsed and turns it into an ASSIGNOP if it finds one. 505 */ 506 507 STATIC int 508 S_ao(pTHX_ int toketype) 509 { 510 if (*PL_bufptr == '=') { 511 PL_bufptr++; 512 if (toketype == ANDAND) 513 pl_yylval.ival = OP_ANDASSIGN; 514 else if (toketype == OROR) 515 pl_yylval.ival = OP_ORASSIGN; 516 else if (toketype == DORDOR) 517 pl_yylval.ival = OP_DORASSIGN; 518 toketype = ASSIGNOP; 519 } 520 return REPORT(toketype); 521 } 522 523 /* 524 * S_no_op 525 * When Perl expects an operator and finds something else, no_op 526 * prints the warning. It always prints "<something> found where 527 * operator expected. It prints "Missing semicolon on previous line?" 528 * if the surprise occurs at the start of the line. "do you need to 529 * predeclare ..." is printed out for code like "sub bar; foo bar $x" 530 * where the compiler doesn't know if foo is a method call or a function. 531 * It prints "Missing operator before end of line" if there's nothing 532 * after the missing operator, or "... before <...>" if there is something 533 * after the missing operator. 534 * 535 * PL_bufptr is expected to point to the start of the thing that was found, 536 * and s after the next token or partial token. 537 */ 538 539 STATIC void 540 S_no_op(pTHX_ const char *const what, char *s) 541 { 542 char * const oldbp = PL_bufptr; 543 const bool is_first = (PL_oldbufptr == PL_linestart); 544 545 PERL_ARGS_ASSERT_NO_OP; 546 547 if (!s) 548 s = oldbp; 549 else 550 PL_bufptr = s; 551 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0); 552 if (ckWARN_d(WARN_SYNTAX)) { 553 if (is_first) 554 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 555 "\t(Missing semicolon on previous line?)\n"); 556 else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr, 557 PL_bufend, 558 UTF)) 559 { 560 const char *t; 561 for (t = PL_oldoldbufptr; 562 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':'); 563 t += UTF ? UTF8SKIP(t) : 1) 564 { 565 NOOP; 566 } 567 if (t < PL_bufptr && isSPACE(*t)) 568 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 569 "\t(Do you need to predeclare %" UTF8f "?)\n", 570 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr)); 571 } 572 else { 573 assert(s >= oldbp); 574 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 575 "\t(Missing operator before %" UTF8f "?)\n", 576 UTF8fARG(UTF, s - oldbp, oldbp)); 577 } 578 } 579 PL_bufptr = oldbp; 580 } 581 582 /* 583 * S_missingterm 584 * Complain about missing quote/regexp/heredoc terminator. 585 * If it's called with NULL then it cauterizes the line buffer. 586 * If we're in a delimited string and the delimiter is a control 587 * character, it's reformatted into a two-char sequence like ^C. 588 * This is fatal. 589 */ 590 591 STATIC void 592 S_missingterm(pTHX_ char *s, STRLEN len) 593 { 594 char tmpbuf[UTF8_MAXBYTES + 1]; 595 char q; 596 bool uni = FALSE; 597 SV *sv; 598 if (s) { 599 char * const nl = (char *) my_memrchr(s, '\n', len); 600 if (nl) { 601 *nl = '\0'; 602 len = nl - s; 603 } 604 uni = UTF; 605 } 606 else if (PL_multi_close < 32) { 607 *tmpbuf = '^'; 608 tmpbuf[1] = (char)toCTRL(PL_multi_close); 609 tmpbuf[2] = '\0'; 610 s = tmpbuf; 611 len = 2; 612 } 613 else { 614 if (LIKELY(PL_multi_close < 256)) { 615 *tmpbuf = (char)PL_multi_close; 616 tmpbuf[1] = '\0'; 617 len = 1; 618 } 619 else { 620 char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close); 621 *end = '\0'; 622 len = end - tmpbuf; 623 uni = TRUE; 624 } 625 s = tmpbuf; 626 } 627 q = memchr(s, '"', len) ? '\'' : '"'; 628 sv = sv_2mortal(newSVpvn(s, len)); 629 if (uni) 630 SvUTF8_on(sv); 631 Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c" 632 " anywhere before EOF", q, SVfARG(sv), q); 633 } 634 635 #include "feature.h" 636 637 /* 638 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and 639 * utf16-to-utf8-reversed. 640 */ 641 642 #ifdef PERL_CR_FILTER 643 static void 644 strip_return(SV *sv) 645 { 646 const char *s = SvPVX_const(sv); 647 const char * const e = s + SvCUR(sv); 648 649 PERL_ARGS_ASSERT_STRIP_RETURN; 650 651 /* outer loop optimized to do nothing if there are no CR-LFs */ 652 while (s < e) { 653 if (*s++ == '\r' && *s == '\n') { 654 /* hit a CR-LF, need to copy the rest */ 655 char *d = s - 1; 656 *d++ = *s++; 657 while (s < e) { 658 if (*s == '\r' && s[1] == '\n') 659 s++; 660 *d++ = *s++; 661 } 662 SvCUR(sv) -= s - d; 663 return; 664 } 665 } 666 } 667 668 STATIC I32 669 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) 670 { 671 const I32 count = FILTER_READ(idx+1, sv, maxlen); 672 if (count > 0 && !maxlen) 673 strip_return(sv); 674 return count; 675 } 676 #endif 677 678 /* 679 =for apidoc lex_start 680 681 Creates and initialises a new lexer/parser state object, supplying 682 a context in which to lex and parse from a new source of Perl code. 683 A pointer to the new state object is placed in L</PL_parser>. An entry 684 is made on the save stack so that upon unwinding, the new state object 685 will be destroyed and the former value of L</PL_parser> will be restored. 686 Nothing else need be done to clean up the parsing context. 687 688 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if 689 non-null, provides a string (in SV form) containing code to be parsed. 690 A copy of the string is made, so subsequent modification of C<line> 691 does not affect parsing. C<rsfp>, if non-null, provides an input stream 692 from which code will be read to be parsed. If both are non-null, the 693 code in C<line> comes first and must consist of complete lines of input, 694 and C<rsfp> supplies the remainder of the source. 695 696 The C<flags> parameter is reserved for future use. Currently it is only 697 used by perl internally, so extensions should always pass zero. 698 699 =cut 700 */ 701 702 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it 703 can share filters with the current parser. 704 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the 705 caller, hence isn't owned by the parser, so shouldn't be closed on parser 706 destruction. This is used to handle the case of defaulting to reading the 707 script from the standard input because no filename was given on the command 708 line (without getting confused by situation where STDIN has been closed, so 709 the script handle is opened on fd 0) */ 710 711 void 712 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) 713 { 714 const char *s = NULL; 715 yy_parser *parser, *oparser; 716 717 if (flags && flags & ~LEX_START_FLAGS) 718 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start"); 719 720 /* create and initialise a parser */ 721 722 Newxz(parser, 1, yy_parser); 723 parser->old_parser = oparser = PL_parser; 724 PL_parser = parser; 725 726 parser->stack = NULL; 727 parser->stack_max1 = NULL; 728 parser->ps = NULL; 729 730 /* on scope exit, free this parser and restore any outer one */ 731 SAVEPARSER(parser); 732 parser->saved_curcop = PL_curcop; 733 734 /* initialise lexer state */ 735 736 parser->nexttoke = 0; 737 parser->error_count = oparser ? oparser->error_count : 0; 738 parser->copline = parser->preambling = NOLINE; 739 parser->lex_state = LEX_NORMAL; 740 parser->expect = XSTATE; 741 parser->rsfp = rsfp; 742 parser->recheck_utf8_validity = TRUE; 743 parser->rsfp_filters = 744 !(flags & LEX_START_SAME_FILTER) || !oparser 745 ? NULL 746 : MUTABLE_AV(SvREFCNT_inc( 747 oparser->rsfp_filters 748 ? oparser->rsfp_filters 749 : (oparser->rsfp_filters = newAV()) 750 )); 751 752 Newx(parser->lex_brackstack, 120, char); 753 Newx(parser->lex_casestack, 12, char); 754 *parser->lex_casestack = '\0'; 755 Newxz(parser->lex_shared, 1, LEXSHARED); 756 757 if (line) { 758 STRLEN len; 759 const U8* first_bad_char_loc; 760 761 s = SvPV_const(line, len); 762 763 if ( SvUTF8(line) 764 && UNLIKELY(! is_utf8_string_loc((U8 *) s, 765 SvCUR(line), 766 &first_bad_char_loc))) 767 { 768 _force_out_malformed_utf8_message(first_bad_char_loc, 769 (U8 *) s + SvCUR(line), 770 0, 771 1 /* 1 means die */ ); 772 NOT_REACHED; /* NOTREACHED */ 773 } 774 775 parser->linestr = flags & LEX_START_COPIED 776 ? SvREFCNT_inc_simple_NN(line) 777 : newSVpvn_flags(s, len, SvUTF8(line)); 778 if (!rsfp) 779 sv_catpvs(parser->linestr, "\n;"); 780 } else { 781 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2); 782 } 783 784 parser->oldoldbufptr = 785 parser->oldbufptr = 786 parser->bufptr = 787 parser->linestart = SvPVX(parser->linestr); 788 parser->bufend = parser->bufptr + SvCUR(parser->linestr); 789 parser->last_lop = parser->last_uni = NULL; 790 791 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES 792 |LEX_DONT_CLOSE_RSFP)); 793 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES 794 |LEX_DONT_CLOSE_RSFP)); 795 796 parser->in_pod = parser->filtered = 0; 797 } 798 799 800 /* delete a parser object */ 801 802 void 803 Perl_parser_free(pTHX_ const yy_parser *parser) 804 { 805 PERL_ARGS_ASSERT_PARSER_FREE; 806 807 PL_curcop = parser->saved_curcop; 808 SvREFCNT_dec(parser->linestr); 809 810 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 811 PerlIO_clearerr(parser->rsfp); 812 else if (parser->rsfp && (!parser->old_parser 813 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp))) 814 PerlIO_close(parser->rsfp); 815 SvREFCNT_dec(parser->rsfp_filters); 816 SvREFCNT_dec(parser->lex_stuff); 817 SvREFCNT_dec(parser->lex_sub_repl); 818 819 Safefree(parser->lex_brackstack); 820 Safefree(parser->lex_casestack); 821 Safefree(parser->lex_shared); 822 PL_parser = parser->old_parser; 823 Safefree(parser); 824 } 825 826 void 827 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab) 828 { 829 I32 nexttoke = parser->nexttoke; 830 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS; 831 while (nexttoke--) { 832 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff) 833 && parser->nextval[nexttoke].opval 834 && parser->nextval[nexttoke].opval->op_slabbed 835 && OpSLAB(parser->nextval[nexttoke].opval) == slab) { 836 op_free(parser->nextval[nexttoke].opval); 837 parser->nextval[nexttoke].opval = NULL; 838 } 839 } 840 } 841 842 843 /* 844 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr 845 846 Buffer scalar containing the chunk currently under consideration of the 847 text currently being lexed. This is always a plain string scalar (for 848 which C<SvPOK> is true). It is not intended to be used as a scalar by 849 normal scalar means; instead refer to the buffer directly by the pointer 850 variables described below. 851 852 The lexer maintains various C<char*> pointers to things in the 853 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever 854 reallocated, all of these pointers must be updated. Don't attempt to 855 do this manually, but rather use L</lex_grow_linestr> if you need to 856 reallocate the buffer. 857 858 The content of the text chunk in the buffer is commonly exactly one 859 complete line of input, up to and including a newline terminator, 860 but there are situations where it is otherwise. The octets of the 861 buffer may be intended to be interpreted as either UTF-8 or Latin-1. 862 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8> 863 flag on this scalar, which may disagree with it. 864 865 For direct examination of the buffer, the variable 866 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current 867 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use 868 of these pointers is usually preferable to examination of the scalar 869 through normal scalar means. 870 871 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend 872 873 Direct pointer to the end of the chunk of text currently being lexed, the 874 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr) 875 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is 876 always located at the end of the buffer, and does not count as part of 877 the buffer's contents. 878 879 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr 880 881 Points to the current position of lexing inside the lexer buffer. 882 Characters around this point may be freely examined, within 883 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and 884 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be 885 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>. 886 887 Lexing code (whether in the Perl core or not) moves this pointer past 888 the characters that it consumes. It is also expected to perform some 889 bookkeeping whenever a newline character is consumed. This movement 890 can be more conveniently performed by the function L</lex_read_to>, 891 which handles newlines appropriately. 892 893 Interpretation of the buffer's octets can be abstracted out by 894 using the slightly higher-level functions L</lex_peek_unichar> and 895 L</lex_read_unichar>. 896 897 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart 898 899 Points to the start of the current line inside the lexer buffer. 900 This is useful for indicating at which column an error occurred, and 901 not much else. This must be updated by any lexing code that consumes 902 a newline; the function L</lex_read_to> handles this detail. 903 904 =cut 905 */ 906 907 /* 908 =for apidoc lex_bufutf8 909 910 Indicates whether the octets in the lexer buffer 911 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding 912 of Unicode characters. If not, they should be interpreted as Latin-1 913 characters. This is analogous to the C<SvUTF8> flag for scalars. 914 915 In UTF-8 mode, it is not guaranteed that the lexer buffer actually 916 contains valid UTF-8. Lexing code must be robust in the face of invalid 917 encoding. 918 919 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar 920 is significant, but not the whole story regarding the input character 921 encoding. Normally, when a file is being read, the scalar contains octets 922 and its C<SvUTF8> flag is off, but the octets should be interpreted as 923 UTF-8 if the C<use utf8> pragma is in effect. During a string eval, 924 however, the scalar may have the C<SvUTF8> flag on, and in this case its 925 octets should be interpreted as UTF-8 unless the C<use bytes> pragma 926 is in effect. This logic may change in the future; use this function 927 instead of implementing the logic yourself. 928 929 =cut 930 */ 931 932 bool 933 Perl_lex_bufutf8(pTHX) 934 { 935 return UTF; 936 } 937 938 /* 939 =for apidoc lex_grow_linestr 940 941 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate 942 at least C<len> octets (including terminating C<NUL>). Returns a 943 pointer to the reallocated buffer. This is necessary before making 944 any direct modification of the buffer that would increase its length. 945 L</lex_stuff_pvn> provides a more convenient way to insert text into 946 the buffer. 947 948 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>; 949 this function updates all of the lexer's variables that point directly 950 into the buffer. 951 952 =cut 953 */ 954 955 char * 956 Perl_lex_grow_linestr(pTHX_ STRLEN len) 957 { 958 SV *linestr; 959 char *buf; 960 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 961 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos; 962 bool current; 963 964 linestr = PL_parser->linestr; 965 buf = SvPVX(linestr); 966 if (len <= SvLEN(linestr)) 967 return buf; 968 969 /* Is the lex_shared linestr SV the same as the current linestr SV? 970 * Only in this case does re_eval_start need adjusting, since it 971 * points within lex_shared->ls_linestr's buffer */ 972 current = ( !PL_parser->lex_shared->ls_linestr 973 || linestr == PL_parser->lex_shared->ls_linestr); 974 975 bufend_pos = PL_parser->bufend - buf; 976 bufptr_pos = PL_parser->bufptr - buf; 977 oldbufptr_pos = PL_parser->oldbufptr - buf; 978 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 979 linestart_pos = PL_parser->linestart - buf; 980 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 981 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 982 re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ? 983 PL_parser->lex_shared->re_eval_start - buf : 0; 984 985 buf = sv_grow(linestr, len); 986 987 PL_parser->bufend = buf + bufend_pos; 988 PL_parser->bufptr = buf + bufptr_pos; 989 PL_parser->oldbufptr = buf + oldbufptr_pos; 990 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 991 PL_parser->linestart = buf + linestart_pos; 992 if (PL_parser->last_uni) 993 PL_parser->last_uni = buf + last_uni_pos; 994 if (PL_parser->last_lop) 995 PL_parser->last_lop = buf + last_lop_pos; 996 if (current && PL_parser->lex_shared->re_eval_start) 997 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos; 998 return buf; 999 } 1000 1001 /* 1002 =for apidoc lex_stuff_pvn 1003 1004 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1005 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1006 reallocating the buffer if necessary. This means that lexing code that 1007 runs later will see the characters as if they had appeared in the input. 1008 It is not recommended to do this as part of normal parsing, and most 1009 uses of this facility run the risk of the inserted characters being 1010 interpreted in an unintended manner. 1011 1012 The string to be inserted is represented by C<len> octets starting 1013 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1, 1014 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>. 1015 The characters are recoded for the lexer buffer, according to how the 1016 buffer is currently being interpreted (L</lex_bufutf8>). If a string 1017 to be inserted is available as a Perl scalar, the L</lex_stuff_sv> 1018 function is more convenient. 1019 1020 =for apidoc Amnh||LEX_STUFF_UTF8 1021 1022 =cut 1023 */ 1024 1025 void 1026 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) 1027 { 1028 dVAR; 1029 char *bufptr; 1030 PERL_ARGS_ASSERT_LEX_STUFF_PVN; 1031 if (flags & ~(LEX_STUFF_UTF8)) 1032 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn"); 1033 if (UTF) { 1034 if (flags & LEX_STUFF_UTF8) { 1035 goto plain_copy; 1036 } else { 1037 STRLEN highhalf = variant_under_utf8_count((U8 *) pv, 1038 (U8 *) pv + len); 1039 const char *p, *e = pv+len;; 1040 if (!highhalf) 1041 goto plain_copy; 1042 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf); 1043 bufptr = PL_parser->bufptr; 1044 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char); 1045 SvCUR_set(PL_parser->linestr, 1046 SvCUR(PL_parser->linestr) + len+highhalf); 1047 PL_parser->bufend += len+highhalf; 1048 for (p = pv; p != e; p++) { 1049 append_utf8_from_native_byte(*p, (U8 **) &bufptr); 1050 } 1051 } 1052 } else { 1053 if (flags & LEX_STUFF_UTF8) { 1054 STRLEN highhalf = 0; 1055 const char *p, *e = pv+len; 1056 for (p = pv; p != e; p++) { 1057 U8 c = (U8)*p; 1058 if (UTF8_IS_ABOVE_LATIN1(c)) { 1059 Perl_croak(aTHX_ "Lexing code attempted to stuff " 1060 "non-Latin-1 character into Latin-1 input"); 1061 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) { 1062 p++; 1063 highhalf++; 1064 } else assert(UTF8_IS_INVARIANT(c)); 1065 } 1066 if (!highhalf) 1067 goto plain_copy; 1068 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf); 1069 bufptr = PL_parser->bufptr; 1070 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char); 1071 SvCUR_set(PL_parser->linestr, 1072 SvCUR(PL_parser->linestr) + len-highhalf); 1073 PL_parser->bufend += len-highhalf; 1074 p = pv; 1075 while (p < e) { 1076 if (UTF8_IS_INVARIANT(*p)) { 1077 *bufptr++ = *p; 1078 p++; 1079 } 1080 else { 1081 assert(p < e -1 ); 1082 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1)); 1083 p += 2; 1084 } 1085 } 1086 } else { 1087 plain_copy: 1088 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len); 1089 bufptr = PL_parser->bufptr; 1090 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char); 1091 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len); 1092 PL_parser->bufend += len; 1093 Copy(pv, bufptr, len, char); 1094 } 1095 } 1096 } 1097 1098 /* 1099 =for apidoc lex_stuff_pv 1100 1101 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1102 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1103 reallocating the buffer if necessary. This means that lexing code that 1104 runs later will see the characters as if they had appeared in the input. 1105 It is not recommended to do this as part of normal parsing, and most 1106 uses of this facility run the risk of the inserted characters being 1107 interpreted in an unintended manner. 1108 1109 The string to be inserted is represented by octets starting at C<pv> 1110 and continuing to the first nul. These octets are interpreted as either 1111 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set 1112 in C<flags>. The characters are recoded for the lexer buffer, according 1113 to how the buffer is currently being interpreted (L</lex_bufutf8>). 1114 If it is not convenient to nul-terminate a string to be inserted, the 1115 L</lex_stuff_pvn> function is more appropriate. 1116 1117 =cut 1118 */ 1119 1120 void 1121 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags) 1122 { 1123 PERL_ARGS_ASSERT_LEX_STUFF_PV; 1124 lex_stuff_pvn(pv, strlen(pv), flags); 1125 } 1126 1127 /* 1128 =for apidoc lex_stuff_sv 1129 1130 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1131 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1132 reallocating the buffer if necessary. This means that lexing code that 1133 runs later will see the characters as if they had appeared in the input. 1134 It is not recommended to do this as part of normal parsing, and most 1135 uses of this facility run the risk of the inserted characters being 1136 interpreted in an unintended manner. 1137 1138 The string to be inserted is the string value of C<sv>. The characters 1139 are recoded for the lexer buffer, according to how the buffer is currently 1140 being interpreted (L</lex_bufutf8>). If a string to be inserted is 1141 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the 1142 need to construct a scalar. 1143 1144 =cut 1145 */ 1146 1147 void 1148 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags) 1149 { 1150 char *pv; 1151 STRLEN len; 1152 PERL_ARGS_ASSERT_LEX_STUFF_SV; 1153 if (flags) 1154 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv"); 1155 pv = SvPV(sv, len); 1156 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0)); 1157 } 1158 1159 /* 1160 =for apidoc lex_unstuff 1161 1162 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to 1163 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened. 1164 This hides the discarded text from any lexing code that runs later, 1165 as if the text had never appeared. 1166 1167 This is not the normal way to consume lexed text. For that, use 1168 L</lex_read_to>. 1169 1170 =cut 1171 */ 1172 1173 void 1174 Perl_lex_unstuff(pTHX_ char *ptr) 1175 { 1176 char *buf, *bufend; 1177 STRLEN unstuff_len; 1178 PERL_ARGS_ASSERT_LEX_UNSTUFF; 1179 buf = PL_parser->bufptr; 1180 if (ptr < buf) 1181 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1182 if (ptr == buf) 1183 return; 1184 bufend = PL_parser->bufend; 1185 if (ptr > bufend) 1186 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1187 unstuff_len = ptr - buf; 1188 Move(ptr, buf, bufend+1-ptr, char); 1189 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len); 1190 PL_parser->bufend = bufend - unstuff_len; 1191 } 1192 1193 /* 1194 =for apidoc lex_read_to 1195 1196 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up 1197 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>, 1198 performing the correct bookkeeping whenever a newline character is passed. 1199 This is the normal way to consume lexed text. 1200 1201 Interpretation of the buffer's octets can be abstracted out by 1202 using the slightly higher-level functions L</lex_peek_unichar> and 1203 L</lex_read_unichar>. 1204 1205 =cut 1206 */ 1207 1208 void 1209 Perl_lex_read_to(pTHX_ char *ptr) 1210 { 1211 char *s; 1212 PERL_ARGS_ASSERT_LEX_READ_TO; 1213 s = PL_parser->bufptr; 1214 if (ptr < s || ptr > PL_parser->bufend) 1215 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to"); 1216 for (; s != ptr; s++) 1217 if (*s == '\n') { 1218 COPLINE_INC_WITH_HERELINES; 1219 PL_parser->linestart = s+1; 1220 } 1221 PL_parser->bufptr = ptr; 1222 } 1223 1224 /* 1225 =for apidoc lex_discard_to 1226 1227 Discards the first part of the L</PL_parser-E<gt>linestr> buffer, 1228 up to C<ptr>. The remaining content of the buffer will be moved, and 1229 all pointers into the buffer updated appropriately. C<ptr> must not 1230 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>: 1231 it is not permitted to discard text that has yet to be lexed. 1232 1233 Normally it is not necessarily to do this directly, because it suffices to 1234 use the implicit discarding behaviour of L</lex_next_chunk> and things 1235 based on it. However, if a token stretches across multiple lines, 1236 and the lexing code has kept multiple lines of text in the buffer for 1237 that purpose, then after completion of the token it would be wise to 1238 explicitly discard the now-unneeded earlier lines, to avoid future 1239 multi-line tokens growing the buffer without bound. 1240 1241 =cut 1242 */ 1243 1244 void 1245 Perl_lex_discard_to(pTHX_ char *ptr) 1246 { 1247 char *buf; 1248 STRLEN discard_len; 1249 PERL_ARGS_ASSERT_LEX_DISCARD_TO; 1250 buf = SvPVX(PL_parser->linestr); 1251 if (ptr < buf) 1252 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1253 if (ptr == buf) 1254 return; 1255 if (ptr > PL_parser->bufptr) 1256 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1257 discard_len = ptr - buf; 1258 if (PL_parser->oldbufptr < ptr) 1259 PL_parser->oldbufptr = ptr; 1260 if (PL_parser->oldoldbufptr < ptr) 1261 PL_parser->oldoldbufptr = ptr; 1262 if (PL_parser->last_uni && PL_parser->last_uni < ptr) 1263 PL_parser->last_uni = NULL; 1264 if (PL_parser->last_lop && PL_parser->last_lop < ptr) 1265 PL_parser->last_lop = NULL; 1266 Move(ptr, buf, PL_parser->bufend+1-ptr, char); 1267 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len); 1268 PL_parser->bufend -= discard_len; 1269 PL_parser->bufptr -= discard_len; 1270 PL_parser->oldbufptr -= discard_len; 1271 PL_parser->oldoldbufptr -= discard_len; 1272 if (PL_parser->last_uni) 1273 PL_parser->last_uni -= discard_len; 1274 if (PL_parser->last_lop) 1275 PL_parser->last_lop -= discard_len; 1276 } 1277 1278 void 1279 Perl_notify_parser_that_changed_to_utf8(pTHX) 1280 { 1281 /* Called when $^H is changed to indicate that HINT_UTF8 has changed from 1282 * off to on. At compile time, this has the effect of entering a 'use 1283 * utf8' section. This means that any input was not previously checked for 1284 * UTF-8 (because it was off), but now we do need to check it, or our 1285 * assumptions about the input being sane could be wrong, and we could 1286 * segfault. This routine just sets a flag so that the next time we look 1287 * at the input we do the well-formed UTF-8 check. If we aren't in the 1288 * proper phase, there may not be a parser object, but if there is, setting 1289 * the flag is harmless */ 1290 1291 if (PL_parser) { 1292 PL_parser->recheck_utf8_validity = TRUE; 1293 } 1294 } 1295 1296 /* 1297 =for apidoc lex_next_chunk 1298 1299 Reads in the next chunk of text to be lexed, appending it to 1300 L</PL_parser-E<gt>linestr>. This should be called when lexing code has 1301 looked to the end of the current chunk and wants to know more. It is 1302 usual, but not necessary, for lexing to have consumed the entirety of 1303 the current chunk at this time. 1304 1305 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current 1306 chunk (i.e., the current chunk has been entirely consumed), normally the 1307 current chunk will be discarded at the same time that the new chunk is 1308 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk 1309 will not be discarded. If the current chunk has not been entirely 1310 consumed, then it will not be discarded regardless of the flag. 1311 1312 Returns true if some new text was added to the buffer, or false if the 1313 buffer has reached the end of the input text. 1314 1315 =for apidoc Amnh||LEX_KEEP_PREVIOUS 1316 1317 =cut 1318 */ 1319 1320 #define LEX_FAKE_EOF 0x80000000 1321 #define LEX_NO_TERM 0x40000000 /* here-doc */ 1322 1323 bool 1324 Perl_lex_next_chunk(pTHX_ U32 flags) 1325 { 1326 SV *linestr; 1327 char *buf; 1328 STRLEN old_bufend_pos, new_bufend_pos; 1329 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 1330 STRLEN linestart_pos, last_uni_pos, last_lop_pos; 1331 bool got_some_for_debugger = 0; 1332 bool got_some; 1333 1334 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM)) 1335 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk"); 1336 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat) 1337 return FALSE; 1338 linestr = PL_parser->linestr; 1339 buf = SvPVX(linestr); 1340 if (!(flags & LEX_KEEP_PREVIOUS) 1341 && PL_parser->bufptr == PL_parser->bufend) 1342 { 1343 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0; 1344 linestart_pos = 0; 1345 if (PL_parser->last_uni != PL_parser->bufend) 1346 PL_parser->last_uni = NULL; 1347 if (PL_parser->last_lop != PL_parser->bufend) 1348 PL_parser->last_lop = NULL; 1349 last_uni_pos = last_lop_pos = 0; 1350 *buf = 0; 1351 SvCUR_set(linestr, 0); 1352 } else { 1353 old_bufend_pos = PL_parser->bufend - buf; 1354 bufptr_pos = PL_parser->bufptr - buf; 1355 oldbufptr_pos = PL_parser->oldbufptr - buf; 1356 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 1357 linestart_pos = PL_parser->linestart - buf; 1358 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 1359 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 1360 } 1361 if (flags & LEX_FAKE_EOF) { 1362 goto eof; 1363 } else if (!PL_parser->rsfp && !PL_parser->filtered) { 1364 got_some = 0; 1365 } else if (filter_gets(linestr, old_bufend_pos)) { 1366 got_some = 1; 1367 got_some_for_debugger = 1; 1368 } else if (flags & LEX_NO_TERM) { 1369 got_some = 0; 1370 } else { 1371 if (!SvPOK(linestr)) /* can get undefined by filter_gets */ 1372 SvPVCLEAR(linestr); 1373 eof: 1374 /* End of real input. Close filehandle (unless it was STDIN), 1375 * then add implicit termination. 1376 */ 1377 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 1378 PerlIO_clearerr(PL_parser->rsfp); 1379 else if (PL_parser->rsfp) 1380 (void)PerlIO_close(PL_parser->rsfp); 1381 PL_parser->rsfp = NULL; 1382 PL_parser->in_pod = PL_parser->filtered = 0; 1383 if (!PL_in_eval && PL_minus_p) { 1384 sv_catpvs(linestr, 1385 /*{*/";}continue{print or die qq(-p destination: $!\\n);}"); 1386 PL_minus_n = PL_minus_p = 0; 1387 } else if (!PL_in_eval && PL_minus_n) { 1388 sv_catpvs(linestr, /*{*/";}"); 1389 PL_minus_n = 0; 1390 } else 1391 sv_catpvs(linestr, ";"); 1392 got_some = 1; 1393 } 1394 buf = SvPVX(linestr); 1395 new_bufend_pos = SvCUR(linestr); 1396 PL_parser->bufend = buf + new_bufend_pos; 1397 PL_parser->bufptr = buf + bufptr_pos; 1398 1399 if (UTF) { 1400 const U8* first_bad_char_loc; 1401 if (UNLIKELY(! is_utf8_string_loc( 1402 (U8 *) PL_parser->bufptr, 1403 PL_parser->bufend - PL_parser->bufptr, 1404 &first_bad_char_loc))) 1405 { 1406 _force_out_malformed_utf8_message(first_bad_char_loc, 1407 (U8 *) PL_parser->bufend, 1408 0, 1409 1 /* 1 means die */ ); 1410 NOT_REACHED; /* NOTREACHED */ 1411 } 1412 } 1413 1414 PL_parser->oldbufptr = buf + oldbufptr_pos; 1415 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 1416 PL_parser->linestart = buf + linestart_pos; 1417 if (PL_parser->last_uni) 1418 PL_parser->last_uni = buf + last_uni_pos; 1419 if (PL_parser->last_lop) 1420 PL_parser->last_lop = buf + last_lop_pos; 1421 if (PL_parser->preambling != NOLINE) { 1422 CopLINE_set(PL_curcop, PL_parser->preambling + 1); 1423 PL_parser->preambling = NOLINE; 1424 } 1425 if ( got_some_for_debugger 1426 && PERLDB_LINE_OR_SAVESRC 1427 && PL_curstash != PL_debstash) 1428 { 1429 /* debugger active and we're not compiling the debugger code, 1430 * so store the line into the debugger's array of lines 1431 */ 1432 update_debugger_info(NULL, buf+old_bufend_pos, 1433 new_bufend_pos-old_bufend_pos); 1434 } 1435 return got_some; 1436 } 1437 1438 /* 1439 =for apidoc lex_peek_unichar 1440 1441 Looks ahead one (Unicode) character in the text currently being lexed. 1442 Returns the codepoint (unsigned integer value) of the next character, 1443 or -1 if lexing has reached the end of the input text. To consume the 1444 peeked character, use L</lex_read_unichar>. 1445 1446 If the next character is in (or extends into) the next chunk of input 1447 text, the next chunk will be read in. Normally the current chunk will be 1448 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> 1449 bit set, then the current chunk will not be discarded. 1450 1451 If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1452 is encountered, an exception is generated. 1453 1454 =cut 1455 */ 1456 1457 I32 1458 Perl_lex_peek_unichar(pTHX_ U32 flags) 1459 { 1460 dVAR; 1461 char *s, *bufend; 1462 if (flags & ~(LEX_KEEP_PREVIOUS)) 1463 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar"); 1464 s = PL_parser->bufptr; 1465 bufend = PL_parser->bufend; 1466 if (UTF) { 1467 U8 head; 1468 I32 unichar; 1469 STRLEN len, retlen; 1470 if (s == bufend) { 1471 if (!lex_next_chunk(flags)) 1472 return -1; 1473 s = PL_parser->bufptr; 1474 bufend = PL_parser->bufend; 1475 } 1476 head = (U8)*s; 1477 if (UTF8_IS_INVARIANT(head)) 1478 return head; 1479 if (UTF8_IS_START(head)) { 1480 len = UTF8SKIP(&head); 1481 while ((STRLEN)(bufend-s) < len) { 1482 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS)) 1483 break; 1484 s = PL_parser->bufptr; 1485 bufend = PL_parser->bufend; 1486 } 1487 } 1488 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY); 1489 if (retlen == (STRLEN)-1) { 1490 _force_out_malformed_utf8_message((U8 *) s, 1491 (U8 *) bufend, 1492 0, 1493 1 /* 1 means die */ ); 1494 NOT_REACHED; /* NOTREACHED */ 1495 } 1496 return unichar; 1497 } else { 1498 if (s == bufend) { 1499 if (!lex_next_chunk(flags)) 1500 return -1; 1501 s = PL_parser->bufptr; 1502 } 1503 return (U8)*s; 1504 } 1505 } 1506 1507 /* 1508 =for apidoc lex_read_unichar 1509 1510 Reads the next (Unicode) character in the text currently being lexed. 1511 Returns the codepoint (unsigned integer value) of the character read, 1512 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1 1513 if lexing has reached the end of the input text. To non-destructively 1514 examine the next character, use L</lex_peek_unichar> instead. 1515 1516 If the next character is in (or extends into) the next chunk of input 1517 text, the next chunk will be read in. Normally the current chunk will be 1518 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> 1519 bit set, then the current chunk will not be discarded. 1520 1521 If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1522 is encountered, an exception is generated. 1523 1524 =cut 1525 */ 1526 1527 I32 1528 Perl_lex_read_unichar(pTHX_ U32 flags) 1529 { 1530 I32 c; 1531 if (flags & ~(LEX_KEEP_PREVIOUS)) 1532 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar"); 1533 c = lex_peek_unichar(flags); 1534 if (c != -1) { 1535 if (c == '\n') 1536 COPLINE_INC_WITH_HERELINES; 1537 if (UTF) 1538 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr); 1539 else 1540 ++(PL_parser->bufptr); 1541 } 1542 return c; 1543 } 1544 1545 /* 1546 =for apidoc lex_read_space 1547 1548 Reads optional spaces, in Perl style, in the text currently being 1549 lexed. The spaces may include ordinary whitespace characters and 1550 Perl-style comments. C<#line> directives are processed if encountered. 1551 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points 1552 at a non-space character (or the end of the input text). 1553 1554 If spaces extend into the next chunk of input text, the next chunk will 1555 be read in. Normally the current chunk will be discarded at the same 1556 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current 1557 chunk will not be discarded. 1558 1559 =cut 1560 */ 1561 1562 #define LEX_NO_INCLINE 0x40000000 1563 #define LEX_NO_NEXT_CHUNK 0x80000000 1564 1565 void 1566 Perl_lex_read_space(pTHX_ U32 flags) 1567 { 1568 char *s, *bufend; 1569 const bool can_incline = !(flags & LEX_NO_INCLINE); 1570 bool need_incline = 0; 1571 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE)) 1572 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space"); 1573 s = PL_parser->bufptr; 1574 bufend = PL_parser->bufend; 1575 while (1) { 1576 char c = *s; 1577 if (c == '#') { 1578 do { 1579 c = *++s; 1580 } while (!(c == '\n' || (c == 0 && s == bufend))); 1581 } else if (c == '\n') { 1582 s++; 1583 if (can_incline) { 1584 PL_parser->linestart = s; 1585 if (s == bufend) 1586 need_incline = 1; 1587 else 1588 incline(s, bufend); 1589 } 1590 } else if (isSPACE(c)) { 1591 s++; 1592 } else if (c == 0 && s == bufend) { 1593 bool got_more; 1594 line_t l; 1595 if (flags & LEX_NO_NEXT_CHUNK) 1596 break; 1597 PL_parser->bufptr = s; 1598 l = CopLINE(PL_curcop); 1599 CopLINE(PL_curcop) += PL_parser->herelines + 1; 1600 got_more = lex_next_chunk(flags); 1601 CopLINE_set(PL_curcop, l); 1602 s = PL_parser->bufptr; 1603 bufend = PL_parser->bufend; 1604 if (!got_more) 1605 break; 1606 if (can_incline && need_incline && PL_parser->rsfp) { 1607 incline(s, bufend); 1608 need_incline = 0; 1609 } 1610 } else if (!c) { 1611 s++; 1612 } else { 1613 break; 1614 } 1615 } 1616 PL_parser->bufptr = s; 1617 } 1618 1619 /* 1620 1621 =for apidoc validate_proto 1622 1623 This function performs syntax checking on a prototype, C<proto>. 1624 If C<warn> is true, any illegal characters or mismatched brackets 1625 will trigger illegalproto warnings, declaring that they were 1626 detected in the prototype for C<name>. 1627 1628 The return value is C<true> if this is a valid prototype, and 1629 C<false> if it is not, regardless of whether C<warn> was C<true> or 1630 C<false>. 1631 1632 Note that C<NULL> is a valid C<proto> and will always return C<true>. 1633 1634 =cut 1635 1636 */ 1637 1638 bool 1639 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash) 1640 { 1641 STRLEN len, origlen; 1642 char *p; 1643 bool bad_proto = FALSE; 1644 bool in_brackets = FALSE; 1645 bool after_slash = FALSE; 1646 char greedy_proto = ' '; 1647 bool proto_after_greedy_proto = FALSE; 1648 bool must_be_last = FALSE; 1649 bool underscore = FALSE; 1650 bool bad_proto_after_underscore = FALSE; 1651 1652 PERL_ARGS_ASSERT_VALIDATE_PROTO; 1653 1654 if (!proto) 1655 return TRUE; 1656 1657 p = SvPV(proto, len); 1658 origlen = len; 1659 for (; len--; p++) { 1660 if (!isSPACE(*p)) { 1661 if (must_be_last) 1662 proto_after_greedy_proto = TRUE; 1663 if (underscore) { 1664 if (!memCHRs(";@%", *p)) 1665 bad_proto_after_underscore = TRUE; 1666 underscore = FALSE; 1667 } 1668 if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') { 1669 bad_proto = TRUE; 1670 } 1671 else { 1672 if (*p == '[') 1673 in_brackets = TRUE; 1674 else if (*p == ']') 1675 in_brackets = FALSE; 1676 else if ((*p == '@' || *p == '%') 1677 && !after_slash 1678 && !in_brackets ) 1679 { 1680 must_be_last = TRUE; 1681 greedy_proto = *p; 1682 } 1683 else if (*p == '_') 1684 underscore = TRUE; 1685 } 1686 if (*p == '\\') 1687 after_slash = TRUE; 1688 else 1689 after_slash = FALSE; 1690 } 1691 } 1692 1693 if (warn) { 1694 SV *tmpsv = newSVpvs_flags("", SVs_TEMP); 1695 p -= origlen; 1696 p = SvUTF8(proto) 1697 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8), 1698 origlen, UNI_DISPLAY_ISPRINT) 1699 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII); 1700 1701 if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) { 1702 SV *name2 = sv_2mortal(newSVsv(PL_curstname)); 1703 sv_catpvs(name2, "::"); 1704 sv_catsv(name2, (SV *)name); 1705 name = name2; 1706 } 1707 1708 if (proto_after_greedy_proto) 1709 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1710 "Prototype after '%c' for %" SVf " : %s", 1711 greedy_proto, SVfARG(name), p); 1712 if (in_brackets) 1713 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1714 "Missing ']' in prototype for %" SVf " : %s", 1715 SVfARG(name), p); 1716 if (bad_proto) 1717 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1718 "Illegal character in prototype for %" SVf " : %s", 1719 SVfARG(name), p); 1720 if (bad_proto_after_underscore) 1721 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1722 "Illegal character after '_' in prototype for %" SVf " : %s", 1723 SVfARG(name), p); 1724 } 1725 1726 return (! (proto_after_greedy_proto || bad_proto) ); 1727 } 1728 1729 /* 1730 * S_incline 1731 * This subroutine has nothing to do with tilting, whether at windmills 1732 * or pinball tables. Its name is short for "increment line". It 1733 * increments the current line number in CopLINE(PL_curcop) and checks 1734 * to see whether the line starts with a comment of the form 1735 * # line 500 "foo.pm" 1736 * If so, it sets the current line number and file to the values in the comment. 1737 */ 1738 1739 STATIC void 1740 S_incline(pTHX_ const char *s, const char *end) 1741 { 1742 const char *t; 1743 const char *n; 1744 const char *e; 1745 line_t line_num; 1746 UV uv; 1747 1748 PERL_ARGS_ASSERT_INCLINE; 1749 1750 assert(end >= s); 1751 1752 COPLINE_INC_WITH_HERELINES; 1753 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL 1754 && s+1 == PL_bufend && *s == ';') { 1755 /* fake newline in string eval */ 1756 CopLINE_dec(PL_curcop); 1757 return; 1758 } 1759 if (*s++ != '#') 1760 return; 1761 while (SPACE_OR_TAB(*s)) 1762 s++; 1763 if (memBEGINs(s, (STRLEN) (end - s), "line")) 1764 s += sizeof("line") - 1; 1765 else 1766 return; 1767 if (SPACE_OR_TAB(*s)) 1768 s++; 1769 else 1770 return; 1771 while (SPACE_OR_TAB(*s)) 1772 s++; 1773 if (!isDIGIT(*s)) 1774 return; 1775 1776 n = s; 1777 while (isDIGIT(*s)) 1778 s++; 1779 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0') 1780 return; 1781 while (SPACE_OR_TAB(*s)) 1782 s++; 1783 if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) { 1784 s++; 1785 e = t + 1; 1786 } 1787 else { 1788 t = s; 1789 while (*t && !isSPACE(*t)) 1790 t++; 1791 e = t; 1792 } 1793 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f') 1794 e++; 1795 if (*e != '\n' && *e != '\0') 1796 return; /* false alarm */ 1797 1798 if (!grok_atoUV(n, &uv, &e)) 1799 return; 1800 line_num = ((line_t)uv) - 1; 1801 1802 if (t - s > 0) { 1803 const STRLEN len = t - s; 1804 1805 if (!PL_rsfp && !PL_parser->filtered) { 1806 /* must copy *{"::_<(eval N)[oldfilename:L]"} 1807 * to *{"::_<newfilename"} */ 1808 /* However, the long form of evals is only turned on by the 1809 debugger - usually they're "(eval %lu)" */ 1810 GV * const cfgv = CopFILEGV(PL_curcop); 1811 if (cfgv) { 1812 char smallbuf[128]; 1813 STRLEN tmplen2 = len; 1814 char *tmpbuf2; 1815 GV *gv2; 1816 1817 if (tmplen2 + 2 <= sizeof smallbuf) 1818 tmpbuf2 = smallbuf; 1819 else 1820 Newx(tmpbuf2, tmplen2 + 2, char); 1821 1822 tmpbuf2[0] = '_'; 1823 tmpbuf2[1] = '<'; 1824 1825 memcpy(tmpbuf2 + 2, s, tmplen2); 1826 tmplen2 += 2; 1827 1828 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE); 1829 if (!isGV(gv2)) { 1830 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE); 1831 /* adjust ${"::_<newfilename"} to store the new file name */ 1832 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2); 1833 /* The line number may differ. If that is the case, 1834 alias the saved lines that are in the array. 1835 Otherwise alias the whole array. */ 1836 if (CopLINE(PL_curcop) == line_num) { 1837 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv))); 1838 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv))); 1839 } 1840 else if (GvAV(cfgv)) { 1841 AV * const av = GvAV(cfgv); 1842 const line_t start = CopLINE(PL_curcop)+1; 1843 SSize_t items = AvFILLp(av) - start; 1844 if (items > 0) { 1845 AV * const av2 = GvAVn(gv2); 1846 SV **svp = AvARRAY(av) + start; 1847 Size_t l = line_num+1; 1848 while (items-- && l < SSize_t_MAX && l == (line_t)l) 1849 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++)); 1850 } 1851 } 1852 } 1853 1854 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2); 1855 } 1856 } 1857 CopFILE_free(PL_curcop); 1858 CopFILE_setn(PL_curcop, s, len); 1859 } 1860 CopLINE_set(PL_curcop, line_num); 1861 } 1862 1863 STATIC void 1864 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) 1865 { 1866 AV *av = CopFILEAVx(PL_curcop); 1867 if (av) { 1868 SV * sv; 1869 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG); 1870 else { 1871 sv = *av_fetch(av, 0, 1); 1872 SvUPGRADE(sv, SVt_PVMG); 1873 } 1874 if (!SvPOK(sv)) SvPVCLEAR(sv); 1875 if (orig_sv) 1876 sv_catsv(sv, orig_sv); 1877 else 1878 sv_catpvn(sv, buf, len); 1879 if (!SvIOK(sv)) { 1880 (void)SvIOK_on(sv); 1881 SvIV_set(sv, 0); 1882 } 1883 if (PL_parser->preambling == NOLINE) 1884 av_store(av, CopLINE(PL_curcop), sv); 1885 } 1886 } 1887 1888 /* 1889 * skipspace 1890 * Called to gobble the appropriate amount and type of whitespace. 1891 * Skips comments as well. 1892 * Returns the next character after the whitespace that is skipped. 1893 * 1894 * peekspace 1895 * Same thing, but look ahead without incrementing line numbers or 1896 * adjusting PL_linestart. 1897 */ 1898 1899 #define skipspace(s) skipspace_flags(s, 0) 1900 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE) 1901 1902 char * 1903 Perl_skipspace_flags(pTHX_ char *s, U32 flags) 1904 { 1905 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS; 1906 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 1907 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s)) 1908 s++; 1909 } else { 1910 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr); 1911 PL_bufptr = s; 1912 lex_read_space(flags | LEX_KEEP_PREVIOUS | 1913 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ? 1914 LEX_NO_NEXT_CHUNK : 0)); 1915 s = PL_bufptr; 1916 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos; 1917 if (PL_linestart > PL_bufptr) 1918 PL_bufptr = PL_linestart; 1919 return s; 1920 } 1921 return s; 1922 } 1923 1924 /* 1925 * S_check_uni 1926 * Check the unary operators to ensure there's no ambiguity in how they're 1927 * used. An ambiguous piece of code would be: 1928 * rand + 5 1929 * This doesn't mean rand() + 5. Because rand() is a unary operator, 1930 * the +5 is its argument. 1931 */ 1932 1933 STATIC void 1934 S_check_uni(pTHX) 1935 { 1936 const char *s; 1937 1938 if (PL_oldoldbufptr != PL_last_uni) 1939 return; 1940 while (isSPACE(*PL_last_uni)) 1941 PL_last_uni++; 1942 s = PL_last_uni; 1943 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-') 1944 s += UTF ? UTF8SKIP(s) : 1; 1945 if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s)) 1946 return; 1947 1948 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 1949 "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous", 1950 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni)); 1951 } 1952 1953 /* 1954 * LOP : macro to build a list operator. Its behaviour has been replaced 1955 * with a subroutine, S_lop() for which LOP is just another name. 1956 */ 1957 1958 #define LOP(f,x) return lop(f,x,s) 1959 1960 /* 1961 * S_lop 1962 * Build a list operator (or something that might be one). The rules: 1963 * - if we have a next token, then it's a list operator (no parens) for 1964 * which the next token has already been parsed; e.g., 1965 * sort foo @args 1966 * sort foo (@args) 1967 * - if the next thing is an opening paren, then it's a function 1968 * - else it's a list operator 1969 */ 1970 1971 STATIC I32 1972 S_lop(pTHX_ I32 f, U8 x, char *s) 1973 { 1974 PERL_ARGS_ASSERT_LOP; 1975 1976 pl_yylval.ival = f; 1977 CLINE; 1978 PL_bufptr = s; 1979 PL_last_lop = PL_oldbufptr; 1980 PL_last_lop_op = (OPCODE)f; 1981 if (PL_nexttoke) 1982 goto lstop; 1983 PL_expect = x; 1984 if (*s == '(') 1985 return REPORT(FUNC); 1986 s = skipspace(s); 1987 if (*s == '(') 1988 return REPORT(FUNC); 1989 else { 1990 lstop: 1991 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 1992 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 1993 return REPORT(LSTOP); 1994 } 1995 } 1996 1997 /* 1998 * S_force_next 1999 * When the lexer realizes it knows the next token (for instance, 2000 * it is reordering tokens for the parser) then it can call S_force_next 2001 * to know what token to return the next time the lexer is called. Caller 2002 * will need to set PL_nextval[] and possibly PL_expect to ensure 2003 * the lexer handles the token correctly. 2004 */ 2005 2006 STATIC void 2007 S_force_next(pTHX_ I32 type) 2008 { 2009 #ifdef DEBUGGING 2010 if (DEBUG_T_TEST) { 2011 PerlIO_printf(Perl_debug_log, "### forced token:\n"); 2012 tokereport(type, &NEXTVAL_NEXTTOKE); 2013 } 2014 #endif 2015 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype)); 2016 PL_nexttype[PL_nexttoke] = type; 2017 PL_nexttoke++; 2018 } 2019 2020 /* 2021 * S_postderef 2022 * 2023 * This subroutine handles postfix deref syntax after the arrow has already 2024 * been emitted. @* $* etc. are emitted as two separate tokens right here. 2025 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits 2026 * only the first, leaving yylex to find the next. 2027 */ 2028 2029 static int 2030 S_postderef(pTHX_ int const funny, char const next) 2031 { 2032 assert(funny == DOLSHARP || memCHRs("$@%&*", funny)); 2033 if (next == '*') { 2034 PL_expect = XOPERATOR; 2035 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 2036 assert('@' == funny || '$' == funny || DOLSHARP == funny); 2037 PL_lex_state = LEX_INTERPEND; 2038 if ('@' == funny) 2039 force_next(POSTJOIN); 2040 } 2041 force_next(next); 2042 PL_bufptr+=2; 2043 } 2044 else { 2045 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL 2046 && !PL_lex_brackets) 2047 PL_lex_dojoin = 2; 2048 PL_expect = XOPERATOR; 2049 PL_bufptr++; 2050 } 2051 return funny; 2052 } 2053 2054 void 2055 Perl_yyunlex(pTHX) 2056 { 2057 int yyc = PL_parser->yychar; 2058 if (yyc != YYEMPTY) { 2059 if (yyc) { 2060 NEXTVAL_NEXTTOKE = PL_parser->yylval; 2061 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) { 2062 PL_lex_allbrackets--; 2063 PL_lex_brackets--; 2064 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16); 2065 } else if (yyc == '('/*)*/) { 2066 PL_lex_allbrackets--; 2067 yyc |= (2<<24); 2068 } 2069 force_next(yyc); 2070 } 2071 PL_parser->yychar = YYEMPTY; 2072 } 2073 } 2074 2075 STATIC SV * 2076 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) 2077 { 2078 SV * const sv = newSVpvn_utf8(start, len, 2079 ! IN_BYTES 2080 && UTF 2081 && len != 0 2082 && is_utf8_non_invariant_string((const U8*)start, len)); 2083 return sv; 2084 } 2085 2086 /* 2087 * S_force_word 2088 * When the lexer knows the next thing is a word (for instance, it has 2089 * just seen -> and it knows that the next char is a word char, then 2090 * it calls S_force_word to stick the next word into the PL_nexttoke/val 2091 * lookahead. 2092 * 2093 * Arguments: 2094 * char *start : buffer position (must be within PL_linestr) 2095 * int token : PL_next* will be this type of bare word 2096 * (e.g., METHOD,BAREWORD) 2097 * int check_keyword : if true, Perl checks to make sure the word isn't 2098 * a keyword (do this if the word is a label, e.g. goto FOO) 2099 * int allow_pack : if true, : characters will also be allowed (require, 2100 * use, etc. do this) 2101 */ 2102 2103 STATIC char * 2104 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) 2105 { 2106 char *s; 2107 STRLEN len; 2108 2109 PERL_ARGS_ASSERT_FORCE_WORD; 2110 2111 start = skipspace(start); 2112 s = start; 2113 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) 2114 || (allow_pack && *s == ':' && s[1] == ':') ) 2115 { 2116 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len); 2117 if (check_keyword) { 2118 char *s2 = PL_tokenbuf; 2119 STRLEN len2 = len; 2120 if (allow_pack && memBEGINPs(s2, len, "CORE::")) { 2121 s2 += sizeof("CORE::") - 1; 2122 len2 -= sizeof("CORE::") - 1; 2123 } 2124 if (keyword(s2, len2, 0)) 2125 return start; 2126 } 2127 if (token == METHOD) { 2128 s = skipspace(s); 2129 if (*s == '(') 2130 PL_expect = XTERM; 2131 else { 2132 PL_expect = XOPERATOR; 2133 } 2134 } 2135 NEXTVAL_NEXTTOKE.opval 2136 = newSVOP(OP_CONST,0, 2137 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 2138 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 2139 force_next(token); 2140 } 2141 return s; 2142 } 2143 2144 /* 2145 * S_force_ident 2146 * Called when the lexer wants $foo *foo &foo etc, but the program 2147 * text only contains the "foo" portion. The first argument is a pointer 2148 * to the "foo", and the second argument is the type symbol to prefix. 2149 * Forces the next token to be a "BAREWORD". 2150 * Creates the symbol if it didn't already exist (via gv_fetchpv()). 2151 */ 2152 2153 STATIC void 2154 S_force_ident(pTHX_ const char *s, int kind) 2155 { 2156 PERL_ARGS_ASSERT_FORCE_IDENT; 2157 2158 if (s[0]) { 2159 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */ 2160 OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len, 2161 UTF ? SVf_UTF8 : 0)); 2162 NEXTVAL_NEXTTOKE.opval = o; 2163 force_next(BAREWORD); 2164 if (kind) { 2165 o->op_private = OPpCONST_ENTERED; 2166 /* XXX see note in pp_entereval() for why we forgo typo 2167 warnings if the symbol must be introduced in an eval. 2168 GSAR 96-10-12 */ 2169 gv_fetchpvn_flags(s, len, 2170 (PL_in_eval ? GV_ADDMULTI 2171 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), 2172 kind == '$' ? SVt_PV : 2173 kind == '@' ? SVt_PVAV : 2174 kind == '%' ? SVt_PVHV : 2175 SVt_PVGV 2176 ); 2177 } 2178 } 2179 } 2180 2181 static void 2182 S_force_ident_maybe_lex(pTHX_ char pit) 2183 { 2184 NEXTVAL_NEXTTOKE.ival = pit; 2185 force_next('p'); 2186 } 2187 2188 NV 2189 Perl_str_to_version(pTHX_ SV *sv) 2190 { 2191 NV retval = 0.0; 2192 NV nshift = 1.0; 2193 STRLEN len; 2194 const char *start = SvPV_const(sv,len); 2195 const char * const end = start + len; 2196 const bool utf = cBOOL(SvUTF8(sv)); 2197 2198 PERL_ARGS_ASSERT_STR_TO_VERSION; 2199 2200 while (start < end) { 2201 STRLEN skip; 2202 UV n; 2203 if (utf) 2204 n = utf8n_to_uvchr((U8*)start, len, &skip, 0); 2205 else { 2206 n = *(U8*)start; 2207 skip = 1; 2208 } 2209 retval += ((NV)n)/nshift; 2210 start += skip; 2211 nshift *= 1000; 2212 } 2213 return retval; 2214 } 2215 2216 /* 2217 * S_force_version 2218 * Forces the next token to be a version number. 2219 * If the next token appears to be an invalid version number, (e.g. "v2b"), 2220 * and if "guessing" is TRUE, then no new token is created (and the caller 2221 * must use an alternative parsing method). 2222 */ 2223 2224 STATIC char * 2225 S_force_version(pTHX_ char *s, int guessing) 2226 { 2227 OP *version = NULL; 2228 char *d; 2229 2230 PERL_ARGS_ASSERT_FORCE_VERSION; 2231 2232 s = skipspace(s); 2233 2234 d = s; 2235 if (*d == 'v') 2236 d++; 2237 if (isDIGIT(*d)) { 2238 while (isDIGIT(*d) || *d == '_' || *d == '.') 2239 d++; 2240 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) { 2241 SV *ver; 2242 s = scan_num(s, &pl_yylval); 2243 version = pl_yylval.opval; 2244 ver = cSVOPx(version)->op_sv; 2245 if (SvPOK(ver) && !SvNIOK(ver)) { 2246 SvUPGRADE(ver, SVt_PVNV); 2247 SvNV_set(ver, str_to_version(ver)); 2248 SvNOK_on(ver); /* hint that it is a version */ 2249 } 2250 } 2251 else if (guessing) { 2252 return s; 2253 } 2254 } 2255 2256 /* NOTE: The parser sees the package name and the VERSION swapped */ 2257 NEXTVAL_NEXTTOKE.opval = version; 2258 force_next(BAREWORD); 2259 2260 return s; 2261 } 2262 2263 /* 2264 * S_force_strict_version 2265 * Forces the next token to be a version number using strict syntax rules. 2266 */ 2267 2268 STATIC char * 2269 S_force_strict_version(pTHX_ char *s) 2270 { 2271 OP *version = NULL; 2272 const char *errstr = NULL; 2273 2274 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION; 2275 2276 while (isSPACE(*s)) /* leading whitespace */ 2277 s++; 2278 2279 if (is_STRICT_VERSION(s,&errstr)) { 2280 SV *ver = newSV(0); 2281 s = (char *)scan_version(s, ver, 0); 2282 version = newSVOP(OP_CONST, 0, ver); 2283 } 2284 else if ((*s != ';' && *s != '{' && *s != '}' ) 2285 && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' ))) 2286 { 2287 PL_bufptr = s; 2288 if (errstr) 2289 yyerror(errstr); /* version required */ 2290 return s; 2291 } 2292 2293 /* NOTE: The parser sees the package name and the VERSION swapped */ 2294 NEXTVAL_NEXTTOKE.opval = version; 2295 force_next(BAREWORD); 2296 2297 return s; 2298 } 2299 2300 /* 2301 * S_tokeq 2302 * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv', 2303 * modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is 2304 * unchanged, and a new SV containing the modified input is returned. 2305 */ 2306 2307 STATIC SV * 2308 S_tokeq(pTHX_ SV *sv) 2309 { 2310 char *s; 2311 char *send; 2312 char *d; 2313 SV *pv = sv; 2314 2315 PERL_ARGS_ASSERT_TOKEQ; 2316 2317 assert (SvPOK(sv)); 2318 assert (SvLEN(sv)); 2319 assert (!SvIsCOW(sv)); 2320 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */ 2321 goto finish; 2322 s = SvPVX(sv); 2323 send = SvEND(sv); 2324 /* This is relying on the SV being "well formed" with a trailing '\0' */ 2325 while (s < send && !(*s == '\\' && s[1] == '\\')) 2326 s++; 2327 if (s == send) 2328 goto finish; 2329 d = s; 2330 if ( PL_hints & HINT_NEW_STRING ) { 2331 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv), 2332 SVs_TEMP | SvUTF8(sv)); 2333 } 2334 while (s < send) { 2335 if (*s == '\\') { 2336 if (s + 1 < send && (s[1] == '\\')) 2337 s++; /* all that, just for this */ 2338 } 2339 *d++ = *s++; 2340 } 2341 *d = '\0'; 2342 SvCUR_set(sv, d - SvPVX_const(sv)); 2343 finish: 2344 if ( PL_hints & HINT_NEW_STRING ) 2345 return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL); 2346 return sv; 2347 } 2348 2349 /* 2350 * Now come three functions related to double-quote context, 2351 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when 2352 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They 2353 * interact with PL_lex_state, and create fake ( ... ) argument lists 2354 * to handle functions and concatenation. 2355 * For example, 2356 * "foo\lbar" 2357 * is tokenised as 2358 * stringify ( const[foo] concat lcfirst ( const[bar] ) ) 2359 */ 2360 2361 /* 2362 * S_sublex_start 2363 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST). 2364 * 2365 * Pattern matching will set PL_lex_op to the pattern-matching op to 2366 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise). 2367 * 2368 * OP_CONST is easy--just make the new op and return. 2369 * 2370 * Everything else becomes a FUNC. 2371 * 2372 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we 2373 * had an OP_CONST. This just sets us up for a 2374 * call to S_sublex_push(). 2375 */ 2376 2377 STATIC I32 2378 S_sublex_start(pTHX) 2379 { 2380 const I32 op_type = pl_yylval.ival; 2381 2382 if (op_type == OP_NULL) { 2383 pl_yylval.opval = PL_lex_op; 2384 PL_lex_op = NULL; 2385 return THING; 2386 } 2387 if (op_type == OP_CONST) { 2388 SV *sv = PL_lex_stuff; 2389 PL_lex_stuff = NULL; 2390 sv = tokeq(sv); 2391 2392 if (SvTYPE(sv) == SVt_PVIV) { 2393 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */ 2394 STRLEN len; 2395 const char * const p = SvPV_const(sv, len); 2396 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv)); 2397 SvREFCNT_dec(sv); 2398 sv = nsv; 2399 } 2400 pl_yylval.opval = newSVOP(op_type, 0, sv); 2401 return THING; 2402 } 2403 2404 PL_parser->lex_super_state = PL_lex_state; 2405 PL_parser->lex_sub_inwhat = (U16)op_type; 2406 PL_parser->lex_sub_op = PL_lex_op; 2407 PL_parser->sub_no_recover = FALSE; 2408 PL_parser->sub_error_count = PL_error_count; 2409 PL_lex_state = LEX_INTERPPUSH; 2410 2411 PL_expect = XTERM; 2412 if (PL_lex_op) { 2413 pl_yylval.opval = PL_lex_op; 2414 PL_lex_op = NULL; 2415 return PMFUNC; 2416 } 2417 else 2418 return FUNC; 2419 } 2420 2421 /* 2422 * S_sublex_push 2423 * Create a new scope to save the lexing state. The scope will be 2424 * ended in S_sublex_done. Returns a '(', starting the function arguments 2425 * to the uc, lc, etc. found before. 2426 * Sets PL_lex_state to LEX_INTERPCONCAT. 2427 */ 2428 2429 STATIC I32 2430 S_sublex_push(pTHX) 2431 { 2432 LEXSHARED *shared; 2433 const bool is_heredoc = PL_multi_close == '<'; 2434 ENTER; 2435 2436 PL_lex_state = PL_parser->lex_super_state; 2437 SAVEI8(PL_lex_dojoin); 2438 SAVEI32(PL_lex_brackets); 2439 SAVEI32(PL_lex_allbrackets); 2440 SAVEI32(PL_lex_formbrack); 2441 SAVEI8(PL_lex_fakeeof); 2442 SAVEI32(PL_lex_casemods); 2443 SAVEI32(PL_lex_starts); 2444 SAVEI8(PL_lex_state); 2445 SAVESPTR(PL_lex_repl); 2446 SAVEVPTR(PL_lex_inpat); 2447 SAVEI16(PL_lex_inwhat); 2448 if (is_heredoc) 2449 { 2450 SAVECOPLINE(PL_curcop); 2451 SAVEI32(PL_multi_end); 2452 SAVEI32(PL_parser->herelines); 2453 PL_parser->herelines = 0; 2454 } 2455 SAVEIV(PL_multi_close); 2456 SAVEPPTR(PL_bufptr); 2457 SAVEPPTR(PL_bufend); 2458 SAVEPPTR(PL_oldbufptr); 2459 SAVEPPTR(PL_oldoldbufptr); 2460 SAVEPPTR(PL_last_lop); 2461 SAVEPPTR(PL_last_uni); 2462 SAVEPPTR(PL_linestart); 2463 SAVESPTR(PL_linestr); 2464 SAVEGENERICPV(PL_lex_brackstack); 2465 SAVEGENERICPV(PL_lex_casestack); 2466 SAVEGENERICPV(PL_parser->lex_shared); 2467 SAVEBOOL(PL_parser->lex_re_reparsing); 2468 SAVEI32(PL_copline); 2469 2470 /* The here-doc parser needs to be able to peek into outer lexing 2471 scopes to find the body of the here-doc. So we put PL_linestr and 2472 PL_bufptr into lex_shared, to ‘share’ those values. 2473 */ 2474 PL_parser->lex_shared->ls_linestr = PL_linestr; 2475 PL_parser->lex_shared->ls_bufptr = PL_bufptr; 2476 2477 PL_linestr = PL_lex_stuff; 2478 PL_lex_repl = PL_parser->lex_sub_repl; 2479 PL_lex_stuff = NULL; 2480 PL_parser->lex_sub_repl = NULL; 2481 2482 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets 2483 set for an inner quote-like operator and then an error causes scope- 2484 popping. We must not have a PL_lex_stuff value left dangling, as 2485 that breaks assumptions elsewhere. See bug #123617. */ 2486 SAVEGENERICSV(PL_lex_stuff); 2487 SAVEGENERICSV(PL_parser->lex_sub_repl); 2488 2489 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart 2490 = SvPVX(PL_linestr); 2491 PL_bufend += SvCUR(PL_linestr); 2492 PL_last_lop = PL_last_uni = NULL; 2493 SAVEFREESV(PL_linestr); 2494 if (PL_lex_repl) SAVEFREESV(PL_lex_repl); 2495 2496 PL_lex_dojoin = FALSE; 2497 PL_lex_brackets = PL_lex_formbrack = 0; 2498 PL_lex_allbrackets = 0; 2499 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2500 Newx(PL_lex_brackstack, 120, char); 2501 Newx(PL_lex_casestack, 12, char); 2502 PL_lex_casemods = 0; 2503 *PL_lex_casestack = '\0'; 2504 PL_lex_starts = 0; 2505 PL_lex_state = LEX_INTERPCONCAT; 2506 if (is_heredoc) 2507 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 2508 PL_copline = NOLINE; 2509 2510 Newxz(shared, 1, LEXSHARED); 2511 shared->ls_prev = PL_parser->lex_shared; 2512 PL_parser->lex_shared = shared; 2513 2514 PL_lex_inwhat = PL_parser->lex_sub_inwhat; 2515 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS; 2516 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST) 2517 PL_lex_inpat = PL_parser->lex_sub_op; 2518 else 2519 PL_lex_inpat = NULL; 2520 2521 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING); 2522 PL_in_eval &= ~EVAL_RE_REPARSING; 2523 2524 return SUBLEXSTART; 2525 } 2526 2527 /* 2528 * S_sublex_done 2529 * Restores lexer state after a S_sublex_push. 2530 */ 2531 2532 STATIC I32 2533 S_sublex_done(pTHX) 2534 { 2535 if (!PL_lex_starts++) { 2536 SV * const sv = newSVpvs(""); 2537 if (SvUTF8(PL_linestr)) 2538 SvUTF8_on(sv); 2539 PL_expect = XOPERATOR; 2540 pl_yylval.opval = newSVOP(OP_CONST, 0, sv); 2541 return THING; 2542 } 2543 2544 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */ 2545 PL_lex_state = LEX_INTERPCASEMOD; 2546 return yylex(); 2547 } 2548 2549 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */ 2550 assert(PL_lex_inwhat != OP_TRANSR); 2551 if (PL_lex_repl) { 2552 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS); 2553 PL_linestr = PL_lex_repl; 2554 PL_lex_inpat = 0; 2555 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); 2556 PL_bufend += SvCUR(PL_linestr); 2557 PL_last_lop = PL_last_uni = NULL; 2558 PL_lex_dojoin = FALSE; 2559 PL_lex_brackets = 0; 2560 PL_lex_allbrackets = 0; 2561 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2562 PL_lex_casemods = 0; 2563 *PL_lex_casestack = '\0'; 2564 PL_lex_starts = 0; 2565 if (SvEVALED(PL_lex_repl)) { 2566 PL_lex_state = LEX_INTERPNORMAL; 2567 PL_lex_starts++; 2568 /* we don't clear PL_lex_repl here, so that we can check later 2569 whether this is an evalled subst; that means we rely on the 2570 logic to ensure sublex_done() is called again only via the 2571 branch (in yylex()) that clears PL_lex_repl, else we'll loop */ 2572 } 2573 else { 2574 PL_lex_state = LEX_INTERPCONCAT; 2575 PL_lex_repl = NULL; 2576 } 2577 if (SvTYPE(PL_linestr) >= SVt_PVNV) { 2578 CopLINE(PL_curcop) += 2579 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines 2580 + PL_parser->herelines; 2581 PL_parser->herelines = 0; 2582 } 2583 return '/'; 2584 } 2585 else { 2586 const line_t l = CopLINE(PL_curcop); 2587 LEAVE; 2588 if (PL_parser->sub_error_count != PL_error_count) { 2589 if (PL_parser->sub_no_recover) { 2590 yyquit(); 2591 NOT_REACHED; 2592 } 2593 } 2594 if (PL_multi_close == '<') 2595 PL_parser->herelines += l - PL_multi_end; 2596 PL_bufend = SvPVX(PL_linestr); 2597 PL_bufend += SvCUR(PL_linestr); 2598 PL_expect = XOPERATOR; 2599 return SUBLEXEND; 2600 } 2601 } 2602 2603 HV * 2604 Perl_load_charnames(pTHX_ SV * char_name, const char * context, 2605 const STRLEN context_len, const char ** error_msg) 2606 { 2607 /* Load the official _charnames module if not already there. The 2608 * parameters are just to give info for any error messages generated: 2609 * char_name a name to look up which is the reason for loading this 2610 * context 'char_name' in the context in the input in which it appears 2611 * context_len how many bytes 'context' occupies 2612 * error_msg *error_msg will be set to any error 2613 * 2614 * Returns the ^H table if success; otherwise NULL */ 2615 2616 unsigned int i; 2617 HV * table; 2618 SV **cvp; 2619 SV * res; 2620 2621 PERL_ARGS_ASSERT_LOAD_CHARNAMES; 2622 2623 /* This loop is executed 1 1/2 times. On the first time through, if it 2624 * isn't already loaded, try loading it, and iterate just once to see if it 2625 * worked. */ 2626 for (i = 0; i < 2; i++) { 2627 table = GvHV(PL_hintgv); /* ^H */ 2628 2629 if ( table 2630 && (PL_hints & HINT_LOCALIZE_HH) 2631 && (cvp = hv_fetchs(table, "charnames", FALSE)) 2632 && SvOK(*cvp)) 2633 { 2634 return table; /* Quit if already loaded */ 2635 } 2636 2637 if (i == 0) { 2638 Perl_load_module(aTHX_ 2639 0, 2640 newSVpvs("_charnames"), 2641 2642 /* version parameter; no need to specify it, as if we get too early 2643 * a version, will fail anyway, not being able to find 'charnames' 2644 * */ 2645 NULL, 2646 newSVpvs(":full"), 2647 newSVpvs(":short"), 2648 NULL); 2649 } 2650 } 2651 2652 /* Here, it failed; new_constant will give appropriate error messages */ 2653 *error_msg = NULL; 2654 res = new_constant( NULL, 0, "charnames", char_name, NULL, 2655 context, context_len, error_msg); 2656 SvREFCNT_dec(res); 2657 2658 return NULL; 2659 } 2660 2661 STATIC SV* 2662 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e) 2663 { 2664 /* This justs wraps get_and_check_backslash_N_name() to output any error 2665 * message it returns. */ 2666 2667 const char * error_msg = NULL; 2668 SV * result; 2669 2670 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER; 2671 2672 /* charnames doesn't work well if there have been errors found */ 2673 if (PL_error_count > 0) { 2674 return NULL; 2675 } 2676 2677 result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg); 2678 2679 if (error_msg) { 2680 yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0); 2681 } 2682 2683 return result; 2684 } 2685 2686 SV* 2687 Perl_get_and_check_backslash_N_name(pTHX_ const char* s, 2688 const char* const e, 2689 const bool is_utf8, 2690 const char ** error_msg) 2691 { 2692 /* <s> points to first character of interior of \N{}, <e> to one beyond the 2693 * interior, hence to the "}". Finds what the name resolves to, returning 2694 * an SV* containing it; NULL if no valid one found. 2695 * 2696 * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it 2697 * doesn't have to be. */ 2698 2699 SV* char_name; 2700 SV* res; 2701 HV * table; 2702 SV **cvp; 2703 SV *cv; 2704 SV *rv; 2705 HV *stash; 2706 2707 /* Points to the beginning of the \N{... so that any messages include the 2708 * context of what's failing*/ 2709 const char* context = s - 3; 2710 STRLEN context_len = e - context + 1; /* include all of \N{...} */ 2711 2712 dVAR; 2713 2714 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; 2715 2716 assert(e >= s); 2717 assert(s > (char *) 3); 2718 2719 char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0); 2720 2721 if (!SvCUR(char_name)) { 2722 SvREFCNT_dec_NN(char_name); 2723 /* diag_listed_as: Unknown charname '%s' */ 2724 *error_msg = Perl_form(aTHX_ "Unknown charname ''"); 2725 return NULL; 2726 } 2727 2728 /* Autoload the charnames module */ 2729 2730 table = load_charnames(char_name, context, context_len, error_msg); 2731 if (table == NULL) { 2732 return NULL; 2733 } 2734 2735 *error_msg = NULL; 2736 res = new_constant( NULL, 0, "charnames", char_name, NULL, 2737 context, context_len, error_msg); 2738 if (*error_msg) { 2739 *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name)); 2740 2741 SvREFCNT_dec(res); 2742 return NULL; 2743 } 2744 2745 /* See if the charnames handler is the Perl core's, and if so, we can skip 2746 * the validation needed for a user-supplied one, as Perl's does its own 2747 * validation. */ 2748 cvp = hv_fetchs(table, "charnames", FALSE); 2749 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv), 2750 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL)) 2751 { 2752 const char * const name = HvNAME(stash); 2753 if (memEQs(name, HvNAMELEN(stash), "_charnames")) { 2754 return res; 2755 } 2756 } 2757 2758 /* Here, it isn't Perl's charname handler. We can't rely on a 2759 * user-supplied handler to validate the input name. For non-ut8 input, 2760 * look to see that the first character is legal. Then loop through the 2761 * rest checking that each is a continuation */ 2762 2763 /* This code makes the reasonable assumption that the only Latin1-range 2764 * characters that begin a character name alias are alphabetic, otherwise 2765 * would have to create a isCHARNAME_BEGIN macro */ 2766 2767 if (! is_utf8) { 2768 if (! isALPHAU(*s)) { 2769 goto bad_charname; 2770 } 2771 s++; 2772 while (s < e) { 2773 if (! isCHARNAME_CONT(*s)) { 2774 goto bad_charname; 2775 } 2776 if (*s == ' ' && *(s-1) == ' ') { 2777 goto multi_spaces; 2778 } 2779 s++; 2780 } 2781 } 2782 else { 2783 /* Similarly for utf8. For invariants can check directly; for other 2784 * Latin1, can calculate their code point and check; otherwise use an 2785 * inversion list */ 2786 if (UTF8_IS_INVARIANT(*s)) { 2787 if (! isALPHAU(*s)) { 2788 goto bad_charname; 2789 } 2790 s++; 2791 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2792 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) { 2793 goto bad_charname; 2794 } 2795 s += 2; 2796 } 2797 else { 2798 if (! _invlist_contains_cp(PL_utf8_charname_begin, 2799 utf8_to_uvchr_buf((U8 *) s, 2800 (U8 *) e, 2801 NULL))) 2802 { 2803 goto bad_charname; 2804 } 2805 s += UTF8SKIP(s); 2806 } 2807 2808 while (s < e) { 2809 if (UTF8_IS_INVARIANT(*s)) { 2810 if (! isCHARNAME_CONT(*s)) { 2811 goto bad_charname; 2812 } 2813 if (*s == ' ' && *(s-1) == ' ') { 2814 goto multi_spaces; 2815 } 2816 s++; 2817 } 2818 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2819 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) 2820 { 2821 goto bad_charname; 2822 } 2823 s += 2; 2824 } 2825 else { 2826 if (! _invlist_contains_cp(PL_utf8_charname_continue, 2827 utf8_to_uvchr_buf((U8 *) s, 2828 (U8 *) e, 2829 NULL))) 2830 { 2831 goto bad_charname; 2832 } 2833 s += UTF8SKIP(s); 2834 } 2835 } 2836 } 2837 if (*(s-1) == ' ') { 2838 /* diag_listed_as: charnames alias definitions may not contain 2839 trailing white-space; marked by <-- HERE in %s 2840 */ 2841 *error_msg = Perl_form(aTHX_ 2842 "charnames alias definitions may not contain trailing " 2843 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s", 2844 (int)(s - context + 1), context, 2845 (int)(e - s + 1), s + 1); 2846 return NULL; 2847 } 2848 2849 if (SvUTF8(res)) { /* Don't accept malformed charname value */ 2850 const U8* first_bad_char_loc; 2851 STRLEN len; 2852 const char* const str = SvPV_const(res, len); 2853 if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len, 2854 &first_bad_char_loc))) 2855 { 2856 _force_out_malformed_utf8_message(first_bad_char_loc, 2857 (U8 *) PL_parser->bufend, 2858 0, 2859 0 /* 0 means don't die */ ); 2860 /* diag_listed_as: Malformed UTF-8 returned by \N{%s} 2861 immediately after '%s' */ 2862 *error_msg = Perl_form(aTHX_ 2863 "Malformed UTF-8 returned by %.*s immediately after '%.*s'", 2864 (int) context_len, context, 2865 (int) ((char *) first_bad_char_loc - str), str); 2866 return NULL; 2867 } 2868 } 2869 2870 return res; 2871 2872 bad_charname: { 2873 2874 /* The final %.*s makes sure that should the trailing NUL be missing 2875 * that this print won't run off the end of the string */ 2876 /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE 2877 in \N{%s} */ 2878 *error_msg = Perl_form(aTHX_ 2879 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s", 2880 (int)(s - context + 1), context, 2881 (int)(e - s + 1), s + 1); 2882 return NULL; 2883 } 2884 2885 multi_spaces: 2886 /* diag_listed_as: charnames alias definitions may not contain a 2887 sequence of multiple spaces; marked by <-- HERE 2888 in %s */ 2889 *error_msg = Perl_form(aTHX_ 2890 "charnames alias definitions may not contain a sequence of " 2891 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s", 2892 (int)(s - context + 1), context, 2893 (int)(e - s + 1), s + 1); 2894 return NULL; 2895 } 2896 2897 /* 2898 scan_const 2899 2900 Extracts the next constant part of a pattern, double-quoted string, 2901 or transliteration. This is terrifying code. 2902 2903 For example, in parsing the double-quoted string "ab\x63$d", it would 2904 stop at the '$' and return an OP_CONST containing 'abc'. 2905 2906 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's 2907 processing a pattern (PL_lex_inpat is true), a transliteration 2908 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string. 2909 2910 Returns a pointer to the character scanned up to. If this is 2911 advanced from the start pointer supplied (i.e. if anything was 2912 successfully parsed), will leave an OP_CONST for the substring scanned 2913 in pl_yylval. Caller must intuit reason for not parsing further 2914 by looking at the next characters herself. 2915 2916 In patterns: 2917 expand: 2918 \N{FOO} => \N{U+hex_for_character_FOO} 2919 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...}) 2920 2921 pass through: 2922 all other \-char, including \N and \N{ apart from \N{ABC} 2923 2924 stops on: 2925 @ and $ where it appears to be a var, but not for $ as tail anchor 2926 \l \L \u \U \Q \E 2927 (?{ or (??{ 2928 2929 In transliterations: 2930 characters are VERY literal, except for - not at the start or end 2931 of the string, which indicates a range. However some backslash sequences 2932 are recognized: \r, \n, and the like 2933 \007 \o{}, \x{}, \N{} 2934 If all elements in the transliteration are below 256, 2935 scan_const expands the range to the full set of intermediate 2936 characters. If the range is in utf8, the hyphen is replaced with 2937 a certain range mark which will be handled by pmtrans() in op.c. 2938 2939 In double-quoted strings: 2940 backslashes: 2941 all those recognized in transliterations 2942 deprecated backrefs: \1 (in substitution replacements) 2943 case and quoting: \U \Q \E 2944 stops on @ and $ 2945 2946 scan_const does *not* construct ops to handle interpolated strings. 2947 It stops processing as soon as it finds an embedded $ or @ variable 2948 and leaves it to the caller to work out what's going on. 2949 2950 embedded arrays (whether in pattern or not) could be: 2951 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-. 2952 2953 $ in double-quoted strings must be the symbol of an embedded scalar. 2954 2955 $ in pattern could be $foo or could be tail anchor. Assumption: 2956 it's a tail anchor if $ is the last thing in the string, or if it's 2957 followed by one of "()| \r\n\t" 2958 2959 \1 (backreferences) are turned into $1 in substitutions 2960 2961 The structure of the code is 2962 while (there's a character to process) { 2963 handle transliteration ranges 2964 skip regexp comments /(?#comment)/ and codes /(?{code})/ 2965 skip #-initiated comments in //x patterns 2966 check for embedded arrays 2967 check for embedded scalars 2968 if (backslash) { 2969 deprecate \1 in substitution replacements 2970 handle string-changing backslashes \l \U \Q \E, etc. 2971 switch (what was escaped) { 2972 handle \- in a transliteration (becomes a literal -) 2973 if a pattern and not \N{, go treat as regular character 2974 handle \132 (octal characters) 2975 handle \x15 and \x{1234} (hex characters) 2976 handle \N{name} (named characters, also \N{3,5} in a pattern) 2977 handle \cV (control characters) 2978 handle printf-style backslashes (\f, \r, \n, etc) 2979 } (end switch) 2980 continue 2981 } (end if backslash) 2982 handle regular character 2983 } (end while character to read) 2984 2985 */ 2986 2987 STATIC char * 2988 S_scan_const(pTHX_ char *start) 2989 { 2990 char *send = PL_bufend; /* end of the constant */ 2991 SV *sv = newSV(send - start); /* sv for the constant. See note below 2992 on sizing. */ 2993 char *s = start; /* start of the constant */ 2994 char *d = SvPVX(sv); /* destination for copies */ 2995 bool dorange = FALSE; /* are we in a translit range? */ 2996 bool didrange = FALSE; /* did we just finish a range? */ 2997 bool in_charclass = FALSE; /* within /[...]/ */ 2998 bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be 2999 UTF8? But, this can show as true 3000 when the source isn't utf8, as for 3001 example when it is entirely composed 3002 of hex constants */ 3003 bool d_is_utf8 = FALSE; /* Output constant is UTF8 */ 3004 STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the 3005 number of characters found so far 3006 that will expand (into 2 bytes) 3007 should we have to convert to 3008 UTF-8) */ 3009 SV *res; /* result from charnames */ 3010 STRLEN offset_to_max = 0; /* The offset in the output to where the range 3011 high-end character is temporarily placed */ 3012 3013 /* Does something require special handling in tr/// ? This avoids extra 3014 * work in a less likely case. As such, khw didn't feel it was worth 3015 * adding any branches to the more mainline code to handle this, which 3016 * means that this doesn't get set in some circumstances when things like 3017 * \x{100} get expanded out. As a result there needs to be extra testing 3018 * done in the tr code */ 3019 bool has_above_latin1 = FALSE; 3020 3021 /* Note on sizing: The scanned constant is placed into sv, which is 3022 * initialized by newSV() assuming one byte of output for every byte of 3023 * input. This routine expects newSV() to allocate an extra byte for a 3024 * trailing NUL, which this routine will append if it gets to the end of 3025 * the input. There may be more bytes of input than output (eg., \N{LATIN 3026 * CAPITAL LETTER A}), or more output than input if the constant ends up 3027 * recoded to utf8, but each time a construct is found that might increase 3028 * the needed size, SvGROW() is called. Its size parameter each time is 3029 * based on the best guess estimate at the time, namely the length used so 3030 * far, plus the length the current construct will occupy, plus room for 3031 * the trailing NUL, plus one byte for every input byte still unscanned */ 3032 3033 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses 3034 before set */ 3035 #ifdef EBCDIC 3036 int backslash_N = 0; /* ? was the character from \N{} */ 3037 int non_portable_endpoint = 0; /* ? In a range is an endpoint 3038 platform-specific like \x65 */ 3039 #endif 3040 3041 PERL_ARGS_ASSERT_SCAN_CONST; 3042 3043 assert(PL_lex_inwhat != OP_TRANSR); 3044 3045 /* Protect sv from errors and fatal warnings. */ 3046 ENTER_with_name("scan_const"); 3047 SAVEFREESV(sv); 3048 3049 /* A bunch of code in the loop below assumes that if s[n] exists and is not 3050 * NUL, then s[n+1] exists. This assertion makes sure that assumption is 3051 * valid */ 3052 assert(*send == '\0'); 3053 3054 while (s < send 3055 || dorange /* Handle tr/// range at right edge of input */ 3056 ) { 3057 3058 /* get transliterations out of the way (they're most literal) */ 3059 if (PL_lex_inwhat == OP_TRANS) { 3060 3061 /* But there isn't any special handling necessary unless there is a 3062 * range, so for most cases we just drop down and handle the value 3063 * as any other. There are two exceptions. 3064 * 3065 * 1. A hyphen indicates that we are actually going to have a 3066 * range. In this case, skip the '-', set a flag, then drop 3067 * down to handle what should be the end range value. 3068 * 2. After we've handled that value, the next time through, that 3069 * flag is set and we fix up the range. 3070 * 3071 * Ranges entirely within Latin1 are expanded out entirely, in 3072 * order to make the transliteration a simple table look-up. 3073 * Ranges that extend above Latin1 have to be done differently, so 3074 * there is no advantage to expanding them here, so they are 3075 * stored here as Min, RANGE_INDICATOR, Max. 'RANGE_INDICATOR' is 3076 * a byte that can't occur in legal UTF-8, and hence can signify a 3077 * hyphen without any possible ambiguity. On EBCDIC machines, if 3078 * the range is expressed as Unicode, the Latin1 portion is 3079 * expanded out even if the range extends above Latin1. This is 3080 * because each code point in it has to be processed here 3081 * individually to get its native translation */ 3082 3083 if (! dorange) { 3084 3085 /* Here, we don't think we're in a range. If the new character 3086 * is not a hyphen; or if it is a hyphen, but it's too close to 3087 * either edge to indicate a range, or if we haven't output any 3088 * characters yet then it's a regular character. */ 3089 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv)) 3090 { 3091 3092 /* A regular character. Process like any other, but first 3093 * clear any flags */ 3094 didrange = FALSE; 3095 dorange = FALSE; 3096 #ifdef EBCDIC 3097 non_portable_endpoint = 0; 3098 backslash_N = 0; 3099 #endif 3100 /* The tests here for being above Latin1 and similar ones 3101 * in the following 'else' suffice to find all such 3102 * occurences in the constant, except those added by a 3103 * backslash escape sequence, like \x{100}. Mostly, those 3104 * set 'has_above_latin1' as appropriate */ 3105 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) { 3106 has_above_latin1 = TRUE; 3107 } 3108 3109 /* Drops down to generic code to process current byte */ 3110 } 3111 else { /* Is a '-' in the context where it means a range */ 3112 if (didrange) { /* Something like y/A-C-Z// */ 3113 Perl_croak(aTHX_ "Ambiguous range in transliteration" 3114 " operator"); 3115 } 3116 3117 dorange = TRUE; 3118 3119 s++; /* Skip past the hyphen */ 3120 3121 /* d now points to where the end-range character will be 3122 * placed. Drop down to get that character. We'll finish 3123 * processing the range the next time through the loop */ 3124 3125 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) { 3126 has_above_latin1 = TRUE; 3127 } 3128 3129 /* Drops down to generic code to process current byte */ 3130 } 3131 } /* End of not a range */ 3132 else { 3133 /* Here we have parsed a range. Now must handle it. At this 3134 * point: 3135 * 'sv' is a SV* that contains the output string we are 3136 * constructing. The final two characters in that string 3137 * are the range start and range end, in order. 3138 * 'd' points to just beyond the range end in the 'sv' string, 3139 * where we would next place something 3140 */ 3141 char * max_ptr; 3142 char * min_ptr; 3143 IV range_min; 3144 IV range_max; /* last character in range */ 3145 STRLEN grow; 3146 Size_t offset_to_min = 0; 3147 Size_t extras = 0; 3148 #ifdef EBCDIC 3149 bool convert_unicode; 3150 IV real_range_max = 0; 3151 #endif 3152 /* Get the code point values of the range ends. */ 3153 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1; 3154 offset_to_max = max_ptr - SvPVX_const(sv); 3155 if (d_is_utf8) { 3156 /* We know the utf8 is valid, because we just constructed 3157 * it ourselves in previous loop iterations */ 3158 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1); 3159 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL); 3160 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL); 3161 3162 /* This compensates for not all code setting 3163 * 'has_above_latin1', so that we don't skip stuff that 3164 * should be executed */ 3165 if (range_max > 255) { 3166 has_above_latin1 = TRUE; 3167 } 3168 } 3169 else { 3170 min_ptr = max_ptr - 1; 3171 range_min = * (U8*) min_ptr; 3172 range_max = * (U8*) max_ptr; 3173 } 3174 3175 /* If the range is just a single code point, like tr/a-a/.../, 3176 * that code point is already in the output, twice. We can 3177 * just back up over the second instance and avoid all the rest 3178 * of the work. But if it is a variant character, it's been 3179 * counted twice, so decrement. (This unlikely scenario is 3180 * special cased, like the one for a range of 2 code points 3181 * below, only because the main-line code below needs a range 3182 * of 3 or more to work without special casing. Might as well 3183 * get it out of the way now.) */ 3184 if (UNLIKELY(range_max == range_min)) { 3185 d = max_ptr; 3186 if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) { 3187 utf8_variant_count--; 3188 } 3189 goto range_done; 3190 } 3191 3192 #ifdef EBCDIC 3193 /* On EBCDIC platforms, we may have to deal with portable 3194 * ranges. These happen if at least one range endpoint is a 3195 * Unicode value (\N{...}), or if the range is a subset of 3196 * [A-Z] or [a-z], and both ends are literal characters, 3197 * like 'A', and not like \x{C1} */ 3198 convert_unicode = 3199 cBOOL(backslash_N) /* \N{} forces Unicode, 3200 hence portable range */ 3201 || ( ! non_portable_endpoint 3202 && (( isLOWER_A(range_min) && isLOWER_A(range_max)) 3203 || (isUPPER_A(range_min) && isUPPER_A(range_max)))); 3204 if (convert_unicode) { 3205 3206 /* Special handling is needed for these portable ranges. 3207 * They are defined to be in Unicode terms, which includes 3208 * all the Unicode code points between the end points. 3209 * Convert to Unicode to get the Unicode range. Later we 3210 * will convert each code point in the range back to 3211 * native. */ 3212 range_min = NATIVE_TO_UNI(range_min); 3213 range_max = NATIVE_TO_UNI(range_max); 3214 } 3215 #endif 3216 3217 if (range_min > range_max) { 3218 #ifdef EBCDIC 3219 if (convert_unicode) { 3220 /* Need to convert back to native for meaningful 3221 * messages for this platform */ 3222 range_min = UNI_TO_NATIVE(range_min); 3223 range_max = UNI_TO_NATIVE(range_max); 3224 } 3225 #endif 3226 /* Use the characters themselves for the error message if 3227 * ASCII printables; otherwise some visible representation 3228 * of them */ 3229 if (isPRINT_A(range_min) && isPRINT_A(range_max)) { 3230 Perl_croak(aTHX_ 3231 "Invalid range \"%c-%c\" in transliteration operator", 3232 (char)range_min, (char)range_max); 3233 } 3234 #ifdef EBCDIC 3235 else if (convert_unicode) { 3236 /* diag_listed_as: Invalid range "%s" in transliteration operator */ 3237 Perl_croak(aTHX_ 3238 "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04" 3239 UVXf "}\" in transliteration operator", 3240 range_min, range_max); 3241 } 3242 #endif 3243 else { 3244 /* diag_listed_as: Invalid range "%s" in transliteration operator */ 3245 Perl_croak(aTHX_ 3246 "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\"" 3247 " in transliteration operator", 3248 range_min, range_max); 3249 } 3250 } 3251 3252 /* If the range is exactly two code points long, they are 3253 * already both in the output */ 3254 if (UNLIKELY(range_min + 1 == range_max)) { 3255 goto range_done; 3256 } 3257 3258 /* Here the range contains at least 3 code points */ 3259 3260 if (d_is_utf8) { 3261 3262 /* If everything in the transliteration is below 256, we 3263 * can avoid special handling later. A translation table 3264 * for each of those bytes is created by op.c. So we 3265 * expand out all ranges to their constituent code points. 3266 * But if we've encountered something above 255, the 3267 * expanding won't help, so skip doing that. But if it's 3268 * EBCDIC, we may have to look at each character below 256 3269 * if we have to convert to/from Unicode values */ 3270 if ( has_above_latin1 3271 #ifdef EBCDIC 3272 && (range_min > 255 || ! convert_unicode) 3273 #endif 3274 ) { 3275 const STRLEN off = d - SvPVX(sv); 3276 const STRLEN extra = 1 + (send - s) + 1; 3277 char *e; 3278 3279 /* Move the high character one byte to the right; then 3280 * insert between it and the range begin, an illegal 3281 * byte which serves to indicate this is a range (using 3282 * a '-' would be ambiguous). */ 3283 3284 if (off + extra > SvLEN(sv)) { 3285 d = off + SvGROW(sv, off + extra); 3286 max_ptr = d - off + offset_to_max; 3287 } 3288 3289 e = d++; 3290 while (e-- > max_ptr) { 3291 *(e + 1) = *e; 3292 } 3293 *(e + 1) = (char) RANGE_INDICATOR; 3294 goto range_done; 3295 } 3296 3297 /* Here, we're going to expand out the range. For EBCDIC 3298 * the range can extend above 255 (not so in ASCII), so 3299 * for EBCDIC, split it into the parts above and below 3300 * 255/256 */ 3301 #ifdef EBCDIC 3302 if (range_max > 255) { 3303 real_range_max = range_max; 3304 range_max = 255; 3305 } 3306 #endif 3307 } 3308 3309 /* Here we need to expand out the string to contain each 3310 * character in the range. Grow the output to handle this. 3311 * For non-UTF8, we need a byte for each code point in the 3312 * range, minus the three that we've already allocated for: the 3313 * hyphen, the min, and the max. For UTF-8, we need this 3314 * plus an extra byte for each code point that occupies two 3315 * bytes (is variant) when in UTF-8 (except we've already 3316 * allocated for the end points, including if they are 3317 * variants). For ASCII platforms and Unicode ranges on EBCDIC 3318 * platforms, it's easy to calculate a precise number. To 3319 * start, we count the variants in the range, which we need 3320 * elsewhere in this function anyway. (For the case where it 3321 * isn't easy to calculate, 'extras' has been initialized to 0, 3322 * and the calculation is done in a loop further down.) */ 3323 #ifdef EBCDIC 3324 if (convert_unicode) 3325 #endif 3326 { 3327 /* This is executed unconditionally on ASCII, and for 3328 * Unicode ranges on EBCDIC. Under these conditions, all 3329 * code points above a certain value are variant; and none 3330 * under that value are. We just need to find out how much 3331 * of the range is above that value. We don't count the 3332 * end points here, as they will already have been counted 3333 * as they were parsed. */ 3334 if (range_min >= UTF_CONTINUATION_MARK) { 3335 3336 /* The whole range is made up of variants */ 3337 extras = (range_max - 1) - (range_min + 1) + 1; 3338 } 3339 else if (range_max >= UTF_CONTINUATION_MARK) { 3340 3341 /* Only the higher portion of the range is variants */ 3342 extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1; 3343 } 3344 3345 utf8_variant_count += extras; 3346 } 3347 3348 /* The base growth is the number of code points in the range, 3349 * not including the endpoints, which have already been sized 3350 * for (and output). We don't subtract for the hyphen, as it 3351 * has been parsed but not output, and the SvGROW below is 3352 * based only on what's been output plus what's left to parse. 3353 * */ 3354 grow = (range_max - 1) - (range_min + 1) + 1; 3355 3356 if (d_is_utf8) { 3357 #ifdef EBCDIC 3358 /* In some cases in EBCDIC, we haven't yet calculated a 3359 * precise amount needed for the UTF-8 variants. Just 3360 * assume the worst case, that everything will expand by a 3361 * byte */ 3362 if (! convert_unicode) { 3363 grow *= 2; 3364 } 3365 else 3366 #endif 3367 { 3368 /* Otherwise we know exactly how many variants there 3369 * are in the range. */ 3370 grow += extras; 3371 } 3372 } 3373 3374 /* Grow, but position the output to overwrite the range min end 3375 * point, because in some cases we overwrite that */ 3376 SvCUR_set(sv, d - SvPVX_const(sv)); 3377 offset_to_min = min_ptr - SvPVX_const(sv); 3378 3379 /* See Note on sizing above. */ 3380 d = offset_to_min + SvGROW(sv, SvCUR(sv) 3381 + (send - s) 3382 + grow 3383 + 1 /* Trailing NUL */ ); 3384 3385 /* Now, we can expand out the range. */ 3386 #ifdef EBCDIC 3387 if (convert_unicode) { 3388 SSize_t i; 3389 3390 /* Recall that the min and max are now in Unicode terms, so 3391 * we have to convert each character to its native 3392 * equivalent */ 3393 if (d_is_utf8) { 3394 for (i = range_min; i <= range_max; i++) { 3395 append_utf8_from_native_byte( 3396 LATIN1_TO_NATIVE((U8) i), 3397 (U8 **) &d); 3398 } 3399 } 3400 else { 3401 for (i = range_min; i <= range_max; i++) { 3402 *d++ = (char)LATIN1_TO_NATIVE((U8) i); 3403 } 3404 } 3405 } 3406 else 3407 #endif 3408 /* Always gets run for ASCII, and sometimes for EBCDIC. */ 3409 { 3410 /* Here, no conversions are necessary, which means that the 3411 * first character in the range is already in 'd' and 3412 * valid, so we can skip overwriting it */ 3413 if (d_is_utf8) { 3414 SSize_t i; 3415 d += UTF8SKIP(d); 3416 for (i = range_min + 1; i <= range_max; i++) { 3417 append_utf8_from_native_byte((U8) i, (U8 **) &d); 3418 } 3419 } 3420 else { 3421 SSize_t i; 3422 d++; 3423 assert(range_min + 1 <= range_max); 3424 for (i = range_min + 1; i < range_max; i++) { 3425 #ifdef EBCDIC 3426 /* In this case on EBCDIC, we haven't calculated 3427 * the variants. Do it here, as we go along */ 3428 if (! UVCHR_IS_INVARIANT(i)) { 3429 utf8_variant_count++; 3430 } 3431 #endif 3432 *d++ = (char)i; 3433 } 3434 3435 /* The range_max is done outside the loop so as to 3436 * avoid having to special case not incrementing 3437 * 'utf8_variant_count' on EBCDIC (it's already been 3438 * counted when originally parsed) */ 3439 *d++ = (char) range_max; 3440 } 3441 } 3442 3443 #ifdef EBCDIC 3444 /* If the original range extended above 255, add in that 3445 * portion. */ 3446 if (real_range_max) { 3447 *d++ = (char) UTF8_TWO_BYTE_HI(0x100); 3448 *d++ = (char) UTF8_TWO_BYTE_LO(0x100); 3449 if (real_range_max > 0x100) { 3450 if (real_range_max > 0x101) { 3451 *d++ = (char) RANGE_INDICATOR; 3452 } 3453 d = (char*)uvchr_to_utf8((U8*)d, real_range_max); 3454 } 3455 } 3456 #endif 3457 3458 range_done: 3459 /* mark the range as done, and continue */ 3460 didrange = TRUE; 3461 dorange = FALSE; 3462 #ifdef EBCDIC 3463 non_portable_endpoint = 0; 3464 backslash_N = 0; 3465 #endif 3466 continue; 3467 } /* End of is a range */ 3468 } /* End of transliteration. Joins main code after these else's */ 3469 else if (*s == '[' && PL_lex_inpat && !in_charclass) { 3470 char *s1 = s-1; 3471 int esc = 0; 3472 while (s1 >= start && *s1-- == '\\') 3473 esc = !esc; 3474 if (!esc) 3475 in_charclass = TRUE; 3476 } 3477 else if (*s == ']' && PL_lex_inpat && in_charclass) { 3478 char *s1 = s-1; 3479 int esc = 0; 3480 while (s1 >= start && *s1-- == '\\') 3481 esc = !esc; 3482 if (!esc) 3483 in_charclass = FALSE; 3484 } 3485 /* skip for regexp comments /(?#comment)/, except for the last 3486 * char, which will be done separately. Stop on (?{..}) and 3487 * friends */ 3488 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) { 3489 if (s[2] == '#') { 3490 if (s_is_utf8) { 3491 PERL_UINT_FAST8_T len = UTF8SKIP(s); 3492 3493 while (s + len < send && *s != ')') { 3494 Copy(s, d, len, U8); 3495 d += len; 3496 s += len; 3497 len = UTF8_SAFE_SKIP(s, send); 3498 } 3499 } 3500 else while (s+1 < send && *s != ')') { 3501 *d++ = *s++; 3502 } 3503 } 3504 else if (!PL_lex_casemods 3505 && ( s[2] == '{' /* This should match regcomp.c */ 3506 || (s[2] == '?' && s[3] == '{'))) 3507 { 3508 break; 3509 } 3510 } 3511 /* likewise skip #-initiated comments in //x patterns */ 3512 else if (*s == '#' 3513 && PL_lex_inpat 3514 && !in_charclass 3515 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) 3516 { 3517 while (s < send && *s != '\n') 3518 *d++ = *s++; 3519 } 3520 /* no further processing of single-quoted regex */ 3521 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') 3522 goto default_action; 3523 3524 /* check for embedded arrays 3525 * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) 3526 */ 3527 else if (*s == '@' && s[1]) { 3528 if (UTF 3529 ? isIDFIRST_utf8_safe(s+1, send) 3530 : isWORDCHAR_A(s[1])) 3531 { 3532 break; 3533 } 3534 if (memCHRs(":'{$", s[1])) 3535 break; 3536 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-')) 3537 break; /* in regexp, neither @+ nor @- are interpolated */ 3538 } 3539 /* check for embedded scalars. only stop if we're sure it's a 3540 * variable. */ 3541 else if (*s == '$') { 3542 if (!PL_lex_inpat) /* not a regexp, so $ must be var */ 3543 break; 3544 if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) { 3545 if (s[1] == '\\') { 3546 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 3547 "Possible unintended interpolation of $\\ in regex"); 3548 } 3549 break; /* in regexp, $ might be tail anchor */ 3550 } 3551 } 3552 3553 /* End of else if chain - OP_TRANS rejoin rest */ 3554 3555 if (UNLIKELY(s >= send)) { 3556 assert(s == send); 3557 break; 3558 } 3559 3560 /* backslashes */ 3561 if (*s == '\\' && s+1 < send) { 3562 char* e; /* Can be used for ending '}', etc. */ 3563 3564 s++; 3565 3566 /* warn on \1 - \9 in substitution replacements, but note that \11 3567 * is an octal; and \19 is \1 followed by '9' */ 3568 if (PL_lex_inwhat == OP_SUBST 3569 && !PL_lex_inpat 3570 && isDIGIT(*s) 3571 && *s != '0' 3572 && !isDIGIT(s[1])) 3573 { 3574 /* diag_listed_as: \%d better written as $%d */ 3575 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); 3576 *--s = '$'; 3577 break; 3578 } 3579 3580 /* string-change backslash escapes */ 3581 if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) { 3582 --s; 3583 break; 3584 } 3585 /* In a pattern, process \N, but skip any other backslash escapes. 3586 * This is because we don't want to translate an escape sequence 3587 * into a meta symbol and have the regex compiler use the meta 3588 * symbol meaning, e.g. \x{2E} would be confused with a dot. But 3589 * in spite of this, we do have to process \N here while the proper 3590 * charnames handler is in scope. See bugs #56444 and #62056. 3591 * 3592 * There is a complication because \N in a pattern may also stand 3593 * for 'match a non-nl', and not mean a charname, in which case its 3594 * processing should be deferred to the regex compiler. To be a 3595 * charname it must be followed immediately by a '{', and not look 3596 * like \N followed by a curly quantifier, i.e., not something like 3597 * \N{3,}. regcurly returns a boolean indicating if it is a legal 3598 * quantifier */ 3599 else if (PL_lex_inpat 3600 && (*s != 'N' 3601 || s[1] != '{' 3602 || regcurly(s + 1))) 3603 { 3604 *d++ = '\\'; 3605 goto default_action; 3606 } 3607 3608 switch (*s) { 3609 default: 3610 { 3611 if ((isALPHANUMERIC(*s))) 3612 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 3613 "Unrecognized escape \\%c passed through", 3614 *s); 3615 /* default action is to copy the quoted character */ 3616 goto default_action; 3617 } 3618 3619 /* eg. \132 indicates the octal constant 0132 */ 3620 case '0': case '1': case '2': case '3': 3621 case '4': case '5': case '6': case '7': 3622 { 3623 I32 flags = PERL_SCAN_SILENT_ILLDIGIT 3624 | PERL_SCAN_NOTIFY_ILLDIGIT; 3625 STRLEN len = 3; 3626 uv = grok_oct(s, &len, &flags, NULL); 3627 s += len; 3628 if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT) 3629 && s < send 3630 && isDIGIT(*s) /* like \08, \178 */ 3631 && ckWARN(WARN_MISC)) 3632 { 3633 Perl_warner(aTHX_ packWARN(WARN_MISC), "%s", 3634 form_alien_digit_msg(8, len, s, send, UTF, FALSE)); 3635 } 3636 } 3637 goto NUM_ESCAPE_INSERT; 3638 3639 /* eg. \o{24} indicates the octal constant \024 */ 3640 case 'o': 3641 { 3642 const char* error; 3643 3644 if (! grok_bslash_o(&s, send, 3645 &uv, &error, 3646 NULL, 3647 FALSE, /* Not strict */ 3648 FALSE, /* No illegal cp's */ 3649 UTF)) 3650 { 3651 yyerror(error); 3652 uv = 0; /* drop through to ensure range ends are set */ 3653 } 3654 goto NUM_ESCAPE_INSERT; 3655 } 3656 3657 /* eg. \x24 indicates the hex constant 0x24 */ 3658 case 'x': 3659 { 3660 const char* error; 3661 3662 if (! grok_bslash_x(&s, send, 3663 &uv, &error, 3664 NULL, 3665 FALSE, /* Not strict */ 3666 FALSE, /* No illegal cp's */ 3667 UTF)) 3668 { 3669 yyerror(error); 3670 uv = 0; /* drop through to ensure range ends are set */ 3671 } 3672 } 3673 3674 NUM_ESCAPE_INSERT: 3675 /* Insert oct or hex escaped character. */ 3676 3677 /* Here uv is the ordinal of the next character being added */ 3678 if (UVCHR_IS_INVARIANT(uv)) { 3679 *d++ = (char) uv; 3680 } 3681 else { 3682 if (!d_is_utf8 && uv > 255) { 3683 3684 /* Here, 'uv' won't fit unless we convert to UTF-8. 3685 * If we've only seen invariants so far, all we have to 3686 * do is turn on the flag */ 3687 if (utf8_variant_count == 0) { 3688 SvUTF8_on(sv); 3689 } 3690 else { 3691 SvCUR_set(sv, d - SvPVX_const(sv)); 3692 SvPOK_on(sv); 3693 *d = '\0'; 3694 3695 sv_utf8_upgrade_flags_grow( 3696 sv, 3697 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3698 3699 /* Since we're having to grow here, 3700 * make sure we have enough room for 3701 * this escape and a NUL, so the 3702 * code immediately below won't have 3703 * to actually grow again */ 3704 UVCHR_SKIP(uv) 3705 + (STRLEN)(send - s) + 1); 3706 d = SvPVX(sv) + SvCUR(sv); 3707 } 3708 3709 has_above_latin1 = TRUE; 3710 d_is_utf8 = TRUE; 3711 } 3712 3713 if (! d_is_utf8) { 3714 *d++ = (char)uv; 3715 utf8_variant_count++; 3716 } 3717 else { 3718 /* Usually, there will already be enough room in 'sv' 3719 * since such escapes are likely longer than any UTF-8 3720 * sequence they can end up as. This isn't the case on 3721 * EBCDIC where \x{40000000} contains 12 bytes, and the 3722 * UTF-8 for it contains 14. And, we have to allow for 3723 * a trailing NUL. It probably can't happen on ASCII 3724 * platforms, but be safe. See Note on sizing above. */ 3725 const STRLEN needed = d - SvPVX(sv) 3726 + UVCHR_SKIP(uv) 3727 + (send - s) 3728 + 1; 3729 if (UNLIKELY(needed > SvLEN(sv))) { 3730 SvCUR_set(sv, d - SvPVX_const(sv)); 3731 d = SvCUR(sv) + SvGROW(sv, needed); 3732 } 3733 3734 d = (char*) uvchr_to_utf8_flags((U8*)d, uv, 3735 (ckWARN(WARN_PORTABLE)) 3736 ? UNICODE_WARN_PERL_EXTENDED 3737 : 0); 3738 } 3739 } 3740 #ifdef EBCDIC 3741 non_portable_endpoint++; 3742 #endif 3743 continue; 3744 3745 case 'N': 3746 /* In a non-pattern \N must be like \N{U+0041}, or it can be a 3747 * named character, like \N{LATIN SMALL LETTER A}, or a named 3748 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND 3749 * GRAVE} (except y/// can't handle the latter, croaking). For 3750 * convenience all three forms are referred to as "named 3751 * characters" below. 3752 * 3753 * For patterns, \N also can mean to match a non-newline. Code 3754 * before this 'switch' statement should already have handled 3755 * this situation, and hence this code only has to deal with 3756 * the named character cases. 3757 * 3758 * For non-patterns, the named characters are converted to 3759 * their string equivalents. In patterns, named characters are 3760 * not converted to their ultimate forms for the same reasons 3761 * that other escapes aren't (mainly that the ultimate 3762 * character could be considered a meta-symbol by the regex 3763 * compiler). Instead, they are converted to the \N{U+...} 3764 * form to get the value from the charnames that is in effect 3765 * right now, while preserving the fact that it was a named 3766 * character, so that the regex compiler knows this. 3767 * 3768 * The structure of this section of code (besides checking for 3769 * errors and upgrading to utf8) is: 3770 * If the named character is of the form \N{U+...}, pass it 3771 * through if a pattern; otherwise convert the code point 3772 * to utf8 3773 * Otherwise must be some \N{NAME}: convert to 3774 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8 3775 * 3776 * Transliteration is an exception. The conversion to utf8 is 3777 * only done if the code point requires it to be representable. 3778 * 3779 * Here, 's' points to the 'N'; the test below is guaranteed to 3780 * succeed if we are being called on a pattern, as we already 3781 * know from a test above that the next character is a '{'. A 3782 * non-pattern \N must mean 'named character', which requires 3783 * braces */ 3784 s++; 3785 if (*s != '{') { 3786 yyerror("Missing braces on \\N{}"); 3787 *d++ = '\0'; 3788 continue; 3789 } 3790 s++; 3791 3792 /* If there is no matching '}', it is an error. */ 3793 if (! (e = (char *) memchr(s, '}', send - s))) { 3794 if (! PL_lex_inpat) { 3795 yyerror("Missing right brace on \\N{}"); 3796 } else { 3797 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N"); 3798 } 3799 yyquit(); /* Have exhausted the input. */ 3800 } 3801 3802 /* Here it looks like a named character */ 3803 3804 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */ 3805 s += 2; /* Skip to next char after the 'U+' */ 3806 if (PL_lex_inpat) { 3807 3808 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */ 3809 /* Check the syntax. */ 3810 const char *orig_s; 3811 orig_s = s - 5; 3812 if (!isXDIGIT(*s)) { 3813 bad_NU: 3814 yyerror( 3815 "Invalid hexadecimal number in \\N{U+...}" 3816 ); 3817 s = e + 1; 3818 *d++ = '\0'; 3819 continue; 3820 } 3821 while (++s < e) { 3822 if (isXDIGIT(*s)) 3823 continue; 3824 else if ((*s == '.' || *s == '_') 3825 && isXDIGIT(s[1])) 3826 continue; 3827 goto bad_NU; 3828 } 3829 3830 /* Pass everything through unchanged. 3831 * +1 is for the '}' */ 3832 Copy(orig_s, d, e - orig_s + 1, char); 3833 d += e - orig_s + 1; 3834 } 3835 else { /* Not a pattern: convert the hex to string */ 3836 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES 3837 | PERL_SCAN_SILENT_ILLDIGIT 3838 | PERL_SCAN_SILENT_OVERFLOW 3839 | PERL_SCAN_DISALLOW_PREFIX; 3840 STRLEN len = e - s; 3841 3842 uv = grok_hex(s, &len, &flags, NULL); 3843 if (len == 0 || (len != (STRLEN)(e - s))) 3844 goto bad_NU; 3845 3846 if ( uv > MAX_LEGAL_CP 3847 || (flags & PERL_SCAN_GREATER_THAN_UV_MAX)) 3848 { 3849 yyerror(form_cp_too_large_msg(16, s, len, 0)); 3850 uv = 0; /* drop through to ensure range ends are 3851 set */ 3852 } 3853 3854 /* For non-tr///, if the destination is not in utf8, 3855 * unconditionally recode it to be so. This is 3856 * because \N{} implies Unicode semantics, and scalars 3857 * have to be in utf8 to guarantee those semantics. 3858 * tr/// doesn't care about Unicode rules, so no need 3859 * there to upgrade to UTF-8 for small enough code 3860 * points */ 3861 if (! d_is_utf8 && ( uv > 0xFF 3862 || PL_lex_inwhat != OP_TRANS)) 3863 { 3864 /* See Note on sizing above. */ 3865 const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1; 3866 3867 SvCUR_set(sv, d - SvPVX_const(sv)); 3868 SvPOK_on(sv); 3869 *d = '\0'; 3870 3871 if (utf8_variant_count == 0) { 3872 SvUTF8_on(sv); 3873 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra); 3874 } 3875 else { 3876 sv_utf8_upgrade_flags_grow( 3877 sv, 3878 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3879 extra); 3880 d = SvPVX(sv) + SvCUR(sv); 3881 } 3882 3883 d_is_utf8 = TRUE; 3884 has_above_latin1 = TRUE; 3885 } 3886 3887 /* Add the (Unicode) code point to the output. */ 3888 if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) { 3889 *d++ = (char) LATIN1_TO_NATIVE(uv); 3890 } 3891 else { 3892 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 3893 (ckWARN(WARN_PORTABLE)) 3894 ? UNICODE_WARN_PERL_EXTENDED 3895 : 0); 3896 } 3897 } 3898 } 3899 else /* Here is \N{NAME} but not \N{U+...}. */ 3900 if (! (res = get_and_check_backslash_N_name_wrapper(s, e))) 3901 { /* Failed. We should die eventually, but for now use a NUL 3902 to keep parsing */ 3903 *d++ = '\0'; 3904 } 3905 else { /* Successfully evaluated the name */ 3906 STRLEN len; 3907 const char *str = SvPV_const(res, len); 3908 if (PL_lex_inpat) { 3909 3910 if (! len) { /* The name resolved to an empty string */ 3911 const char empty_N[] = "\\N{_}"; 3912 Copy(empty_N, d, sizeof(empty_N) - 1, char); 3913 d += sizeof(empty_N) - 1; 3914 } 3915 else { 3916 /* In order to not lose information for the regex 3917 * compiler, pass the result in the specially made 3918 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are 3919 * the code points in hex of each character 3920 * returned by charnames */ 3921 3922 const char *str_end = str + len; 3923 const STRLEN off = d - SvPVX_const(sv); 3924 3925 if (! SvUTF8(res)) { 3926 /* For the non-UTF-8 case, we can determine the 3927 * exact length needed without having to parse 3928 * through the string. Each character takes up 3929 * 2 hex digits plus either a trailing dot or 3930 * the "}" */ 3931 const char initial_text[] = "\\N{U+"; 3932 const STRLEN initial_len = sizeof(initial_text) 3933 - 1; 3934 d = off + SvGROW(sv, off 3935 + 3 * len 3936 3937 /* +1 for trailing NUL */ 3938 + initial_len + 1 3939 3940 + (STRLEN)(send - e)); 3941 Copy(initial_text, d, initial_len, char); 3942 d += initial_len; 3943 while (str < str_end) { 3944 char hex_string[4]; 3945 int len = 3946 my_snprintf(hex_string, 3947 sizeof(hex_string), 3948 "%02X.", 3949 3950 /* The regex compiler is 3951 * expecting Unicode, not 3952 * native */ 3953 NATIVE_TO_LATIN1(*str)); 3954 PERL_MY_SNPRINTF_POST_GUARD(len, 3955 sizeof(hex_string)); 3956 Copy(hex_string, d, 3, char); 3957 d += 3; 3958 str++; 3959 } 3960 d--; /* Below, we will overwrite the final 3961 dot with a right brace */ 3962 } 3963 else { 3964 STRLEN char_length; /* cur char's byte length */ 3965 3966 /* and the number of bytes after this is 3967 * translated into hex digits */ 3968 STRLEN output_length; 3969 3970 /* 2 hex per byte; 2 chars for '\N'; 2 chars 3971 * for max('U+', '.'); and 1 for NUL */ 3972 char hex_string[2 * UTF8_MAXBYTES + 5]; 3973 3974 /* Get the first character of the result. */ 3975 U32 uv = utf8n_to_uvchr((U8 *) str, 3976 len, 3977 &char_length, 3978 UTF8_ALLOW_ANYUV); 3979 /* Convert first code point to Unicode hex, 3980 * including the boiler plate before it. */ 3981 output_length = 3982 my_snprintf(hex_string, sizeof(hex_string), 3983 "\\N{U+%X", 3984 (unsigned int) NATIVE_TO_UNI(uv)); 3985 3986 /* Make sure there is enough space to hold it */ 3987 d = off + SvGROW(sv, off 3988 + output_length 3989 + (STRLEN)(send - e) 3990 + 2); /* '}' + NUL */ 3991 /* And output it */ 3992 Copy(hex_string, d, output_length, char); 3993 d += output_length; 3994 3995 /* For each subsequent character, append dot and 3996 * its Unicode code point in hex */ 3997 while ((str += char_length) < str_end) { 3998 const STRLEN off = d - SvPVX_const(sv); 3999 U32 uv = utf8n_to_uvchr((U8 *) str, 4000 str_end - str, 4001 &char_length, 4002 UTF8_ALLOW_ANYUV); 4003 output_length = 4004 my_snprintf(hex_string, 4005 sizeof(hex_string), 4006 ".%X", 4007 (unsigned int) NATIVE_TO_UNI(uv)); 4008 4009 d = off + SvGROW(sv, off 4010 + output_length 4011 + (STRLEN)(send - e) 4012 + 2); /* '}' + NUL */ 4013 Copy(hex_string, d, output_length, char); 4014 d += output_length; 4015 } 4016 } 4017 4018 *d++ = '}'; /* Done. Add the trailing brace */ 4019 } 4020 } 4021 else { /* Here, not in a pattern. Convert the name to a 4022 * string. */ 4023 4024 if (PL_lex_inwhat == OP_TRANS) { 4025 str = SvPV_const(res, len); 4026 if (len > ((SvUTF8(res)) 4027 ? UTF8SKIP(str) 4028 : 1U)) 4029 { 4030 yyerror(Perl_form(aTHX_ 4031 "%.*s must not be a named sequence" 4032 " in transliteration operator", 4033 /* +1 to include the "}" */ 4034 (int) (e + 1 - start), start)); 4035 *d++ = '\0'; 4036 goto end_backslash_N; 4037 } 4038 4039 if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) { 4040 has_above_latin1 = TRUE; 4041 } 4042 4043 } 4044 else if (! SvUTF8(res)) { 4045 /* Make sure \N{} return is UTF-8. This is because 4046 * \N{} implies Unicode semantics, and scalars have 4047 * to be in utf8 to guarantee those semantics; but 4048 * not needed in tr/// */ 4049 sv_utf8_upgrade_flags(res, 0); 4050 str = SvPV_const(res, len); 4051 } 4052 4053 /* Upgrade destination to be utf8 if this new 4054 * component is */ 4055 if (! d_is_utf8 && SvUTF8(res)) { 4056 /* See Note on sizing above. */ 4057 const STRLEN extra = len + (send - s) + 1; 4058 4059 SvCUR_set(sv, d - SvPVX_const(sv)); 4060 SvPOK_on(sv); 4061 *d = '\0'; 4062 4063 if (utf8_variant_count == 0) { 4064 SvUTF8_on(sv); 4065 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra); 4066 } 4067 else { 4068 sv_utf8_upgrade_flags_grow(sv, 4069 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 4070 extra); 4071 d = SvPVX(sv) + SvCUR(sv); 4072 } 4073 d_is_utf8 = TRUE; 4074 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */ 4075 4076 /* See Note on sizing above. (NOTE: SvCUR() is not 4077 * set correctly here). */ 4078 const STRLEN extra = len + (send - e) + 1; 4079 const STRLEN off = d - SvPVX_const(sv); 4080 d = off + SvGROW(sv, off + extra); 4081 } 4082 Copy(str, d, len, char); 4083 d += len; 4084 } 4085 4086 SvREFCNT_dec(res); 4087 4088 } /* End \N{NAME} */ 4089 4090 end_backslash_N: 4091 #ifdef EBCDIC 4092 backslash_N++; /* \N{} is defined to be Unicode */ 4093 #endif 4094 s = e + 1; /* Point to just after the '}' */ 4095 continue; 4096 4097 /* \c is a control character */ 4098 case 'c': 4099 s++; 4100 if (s < send) { 4101 const char * message; 4102 4103 if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) { 4104 yyerror(message); 4105 yyquit(); /* Have always immediately croaked on 4106 errors in this */ 4107 } 4108 d++; 4109 } 4110 else { 4111 yyerror("Missing control char name in \\c"); 4112 yyquit(); /* Are at end of input, no sense continuing */ 4113 } 4114 #ifdef EBCDIC 4115 non_portable_endpoint++; 4116 #endif 4117 break; 4118 4119 /* printf-style backslashes, formfeeds, newlines, etc */ 4120 case 'b': 4121 *d++ = '\b'; 4122 break; 4123 case 'n': 4124 *d++ = '\n'; 4125 break; 4126 case 'r': 4127 *d++ = '\r'; 4128 break; 4129 case 'f': 4130 *d++ = '\f'; 4131 break; 4132 case 't': 4133 *d++ = '\t'; 4134 break; 4135 case 'e': 4136 *d++ = ESC_NATIVE; 4137 break; 4138 case 'a': 4139 *d++ = '\a'; 4140 break; 4141 } /* end switch */ 4142 4143 s++; 4144 continue; 4145 } /* end if (backslash) */ 4146 4147 default_action: 4148 /* Just copy the input to the output, though we may have to convert 4149 * to/from UTF-8. 4150 * 4151 * If the input has the same representation in UTF-8 as not, it will be 4152 * a single byte, and we don't care about UTF8ness; just copy the byte */ 4153 if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) { 4154 *d++ = *s++; 4155 } 4156 else if (! s_is_utf8 && ! d_is_utf8) { 4157 /* If neither source nor output is UTF-8, is also a single byte, 4158 * just copy it; but this byte counts should we later have to 4159 * convert to UTF-8 */ 4160 *d++ = *s++; 4161 utf8_variant_count++; 4162 } 4163 else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */ 4164 const STRLEN len = UTF8SKIP(s); 4165 4166 /* We expect the source to have already been checked for 4167 * malformedness */ 4168 assert(isUTF8_CHAR((U8 *) s, (U8 *) send)); 4169 4170 Copy(s, d, len, U8); 4171 d += len; 4172 s += len; 4173 } 4174 else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */ 4175 STRLEN need = send - s + 1; /* See Note on sizing above. */ 4176 4177 SvCUR_set(sv, d - SvPVX_const(sv)); 4178 SvPOK_on(sv); 4179 *d = '\0'; 4180 4181 if (utf8_variant_count == 0) { 4182 SvUTF8_on(sv); 4183 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need); 4184 } 4185 else { 4186 sv_utf8_upgrade_flags_grow(sv, 4187 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 4188 need); 4189 d = SvPVX(sv) + SvCUR(sv); 4190 } 4191 d_is_utf8 = TRUE; 4192 goto default_action; /* Redo, having upgraded so both are UTF-8 */ 4193 } 4194 else { /* UTF8ness matters: convert this non-UTF8 source char to 4195 UTF-8 for output. It will occupy 2 bytes, but don't include 4196 the input byte since we haven't incremented 's' yet. See 4197 Note on sizing above. */ 4198 const STRLEN off = d - SvPVX(sv); 4199 const STRLEN extra = 2 + (send - s - 1) + 1; 4200 if (off + extra > SvLEN(sv)) { 4201 d = off + SvGROW(sv, off + extra); 4202 } 4203 *d++ = UTF8_EIGHT_BIT_HI(*s); 4204 *d++ = UTF8_EIGHT_BIT_LO(*s); 4205 s++; 4206 } 4207 } /* while loop to process each character */ 4208 4209 { 4210 const STRLEN off = d - SvPVX(sv); 4211 4212 /* See if room for the terminating NUL */ 4213 if (UNLIKELY(off >= SvLEN(sv))) { 4214 4215 #ifndef DEBUGGING 4216 4217 if (off > SvLEN(sv)) 4218 #endif 4219 Perl_croak(aTHX_ "panic: constant overflowed allocated space," 4220 " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv)); 4221 4222 /* Whew! Here we don't have room for the terminating NUL, but 4223 * everything else so far has fit. It's not too late to grow 4224 * to fit the NUL and continue on. But it is a bug, as the code 4225 * above was supposed to have made room for this, so under 4226 * DEBUGGING builds, we panic anyway. */ 4227 d = off + SvGROW(sv, off + 1); 4228 } 4229 } 4230 4231 /* terminate the string and set up the sv */ 4232 *d = '\0'; 4233 SvCUR_set(sv, d - SvPVX_const(sv)); 4234 4235 SvPOK_on(sv); 4236 if (d_is_utf8) { 4237 SvUTF8_on(sv); 4238 } 4239 4240 /* shrink the sv if we allocated more than we used */ 4241 if (SvCUR(sv) + 5 < SvLEN(sv)) { 4242 SvPV_shrink_to_cur(sv); 4243 } 4244 4245 /* return the substring (via pl_yylval) only if we parsed anything */ 4246 if (s > start) { 4247 char *s2 = start; 4248 for (; s2 < s; s2++) { 4249 if (*s2 == '\n') 4250 COPLINE_INC_WITH_HERELINES; 4251 } 4252 SvREFCNT_inc_simple_void_NN(sv); 4253 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING )) 4254 && ! PL_parser->lex_re_reparsing) 4255 { 4256 const char *const key = PL_lex_inpat ? "qr" : "q"; 4257 const STRLEN keylen = PL_lex_inpat ? 2 : 1; 4258 const char *type; 4259 STRLEN typelen; 4260 4261 if (PL_lex_inwhat == OP_TRANS) { 4262 type = "tr"; 4263 typelen = 2; 4264 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) { 4265 type = "s"; 4266 typelen = 1; 4267 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') { 4268 type = "q"; 4269 typelen = 1; 4270 } else { 4271 type = "qq"; 4272 typelen = 2; 4273 } 4274 4275 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL, 4276 type, typelen, NULL); 4277 } 4278 pl_yylval.opval = newSVOP(OP_CONST, 0, sv); 4279 } 4280 LEAVE_with_name("scan_const"); 4281 return s; 4282 } 4283 4284 /* S_intuit_more 4285 * Returns TRUE if there's more to the expression (e.g., a subscript), 4286 * FALSE otherwise. 4287 * 4288 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/ 4289 * 4290 * ->[ and ->{ return TRUE 4291 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled 4292 * { and [ outside a pattern are always subscripts, so return TRUE 4293 * if we're outside a pattern and it's not { or [, then return FALSE 4294 * if we're in a pattern and the first char is a { 4295 * {4,5} (any digits around the comma) returns FALSE 4296 * if we're in a pattern and the first char is a [ 4297 * [] returns FALSE 4298 * [SOMETHING] has a funky algorithm to decide whether it's a 4299 * character class or not. It has to deal with things like 4300 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/ 4301 * anything else returns TRUE 4302 */ 4303 4304 /* This is the one truly awful dwimmer necessary to conflate C and sed. */ 4305 4306 STATIC int 4307 S_intuit_more(pTHX_ char *s, char *e) 4308 { 4309 PERL_ARGS_ASSERT_INTUIT_MORE; 4310 4311 if (PL_lex_brackets) 4312 return TRUE; 4313 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{')) 4314 return TRUE; 4315 if (*s == '-' && s[1] == '>' 4316 && FEATURE_POSTDEREF_QQ_IS_ENABLED 4317 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*'))) 4318 ||(s[2] == '@' && memCHRs("*[{",s[3])) )) 4319 return TRUE; 4320 if (*s != '{' && *s != '[') 4321 return FALSE; 4322 PL_parser->sub_no_recover = TRUE; 4323 if (!PL_lex_inpat) 4324 return TRUE; 4325 4326 /* In a pattern, so maybe we have {n,m}. */ 4327 if (*s == '{') { 4328 if (regcurly(s)) { 4329 return FALSE; 4330 } 4331 return TRUE; 4332 } 4333 4334 /* On the other hand, maybe we have a character class */ 4335 4336 s++; 4337 if (*s == ']' || *s == '^') 4338 return FALSE; 4339 else { 4340 /* this is terrifying, and it works */ 4341 int weight; 4342 char seen[256]; 4343 const char * const send = (char *) memchr(s, ']', e - s); 4344 unsigned char un_char, last_un_char; 4345 char tmpbuf[sizeof PL_tokenbuf * 4]; 4346 4347 if (!send) /* has to be an expression */ 4348 return TRUE; 4349 weight = 2; /* let's weigh the evidence */ 4350 4351 if (*s == '$') 4352 weight -= 3; 4353 else if (isDIGIT(*s)) { 4354 if (s[1] != ']') { 4355 if (isDIGIT(s[1]) && s[2] == ']') 4356 weight -= 10; 4357 } 4358 else 4359 weight -= 100; 4360 } 4361 Zero(seen,256,char); 4362 un_char = 255; 4363 for (; s < send; s++) { 4364 last_un_char = un_char; 4365 un_char = (unsigned char)*s; 4366 switch (*s) { 4367 case '@': 4368 case '&': 4369 case '$': 4370 weight -= seen[un_char] * 10; 4371 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) { 4372 int len; 4373 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE); 4374 len = (int)strlen(tmpbuf); 4375 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 4376 UTF ? SVf_UTF8 : 0, SVt_PV)) 4377 weight -= 100; 4378 else 4379 weight -= 10; 4380 } 4381 else if (*s == '$' 4382 && s[1] 4383 && memCHRs("[#!%*<>()-=",s[1])) 4384 { 4385 if (/*{*/ memCHRs("])} =",s[2])) 4386 weight -= 10; 4387 else 4388 weight -= 1; 4389 } 4390 break; 4391 case '\\': 4392 un_char = 254; 4393 if (s[1]) { 4394 if (memCHRs("wds]",s[1])) 4395 weight += 100; 4396 else if (seen[(U8)'\''] || seen[(U8)'"']) 4397 weight += 1; 4398 else if (memCHRs("rnftbxcav",s[1])) 4399 weight += 40; 4400 else if (isDIGIT(s[1])) { 4401 weight += 40; 4402 while (s[1] && isDIGIT(s[1])) 4403 s++; 4404 } 4405 } 4406 else 4407 weight += 100; 4408 break; 4409 case '-': 4410 if (s[1] == '\\') 4411 weight += 50; 4412 if (memCHRs("aA01! ",last_un_char)) 4413 weight += 30; 4414 if (memCHRs("zZ79~",s[1])) 4415 weight += 30; 4416 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$')) 4417 weight -= 5; /* cope with negative subscript */ 4418 break; 4419 default: 4420 if (!isWORDCHAR(last_un_char) 4421 && !(last_un_char == '$' || last_un_char == '@' 4422 || last_un_char == '&') 4423 && isALPHA(*s) && s[1] && isALPHA(s[1])) { 4424 char *d = s; 4425 while (isALPHA(*s)) 4426 s++; 4427 if (keyword(d, s - d, 0)) 4428 weight -= 150; 4429 } 4430 if (un_char == last_un_char + 1) 4431 weight += 5; 4432 weight -= seen[un_char]; 4433 break; 4434 } 4435 seen[un_char]++; 4436 } 4437 if (weight >= 0) /* probably a character class */ 4438 return FALSE; 4439 } 4440 4441 return TRUE; 4442 } 4443 4444 /* 4445 * S_intuit_method 4446 * 4447 * Does all the checking to disambiguate 4448 * foo bar 4449 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise 4450 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args). 4451 * 4452 * First argument is the stuff after the first token, e.g. "bar". 4453 * 4454 * Not a method if foo is a filehandle. 4455 * Not a method if foo is a subroutine prototyped to take a filehandle. 4456 * Not a method if it's really "Foo $bar" 4457 * Method if it's "foo $bar" 4458 * Not a method if it's really "print foo $bar" 4459 * Method if it's really "foo package::" (interpreted as package->foo) 4460 * Not a method if bar is known to be a subroutine ("sub bar; foo bar") 4461 * Not a method if bar is a filehandle or package, but is quoted with 4462 * => 4463 */ 4464 4465 STATIC int 4466 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv) 4467 { 4468 char *s = start + (*start == '$'); 4469 char tmpbuf[sizeof PL_tokenbuf]; 4470 STRLEN len; 4471 GV* indirgv; 4472 /* Mustn't actually add anything to a symbol table. 4473 But also don't want to "initialise" any placeholder 4474 constants that might already be there into full 4475 blown PVGVs with attached PVCV. */ 4476 GV * const gv = 4477 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL; 4478 4479 PERL_ARGS_ASSERT_INTUIT_METHOD; 4480 4481 if (!FEATURE_INDIRECT_IS_ENABLED) 4482 return 0; 4483 4484 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv)) 4485 return 0; 4486 if (cv && SvPOK(cv)) { 4487 const char *proto = CvPROTO(cv); 4488 if (proto) { 4489 while (*proto && (isSPACE(*proto) || *proto == ';')) 4490 proto++; 4491 if (*proto == '*') 4492 return 0; 4493 } 4494 } 4495 4496 if (*start == '$') { 4497 SSize_t start_off = start - SvPVX(PL_linestr); 4498 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY 4499 || isUPPER(*PL_tokenbuf)) 4500 return 0; 4501 /* this could be $# */ 4502 if (isSPACE(*s)) 4503 s = skipspace(s); 4504 PL_bufptr = SvPVX(PL_linestr) + start_off; 4505 PL_expect = XREF; 4506 return *s == '(' ? FUNCMETH : METHOD; 4507 } 4508 4509 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 4510 /* start is the beginning of the possible filehandle/object, 4511 * and s is the end of it 4512 * tmpbuf is a copy of it (but with single quotes as double colons) 4513 */ 4514 4515 if (!keyword(tmpbuf, len, 0)) { 4516 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') { 4517 len -= 2; 4518 tmpbuf[len] = '\0'; 4519 goto bare_package; 4520 } 4521 indirgv = gv_fetchpvn_flags(tmpbuf, len, 4522 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ), 4523 SVt_PVCV); 4524 if (indirgv && SvTYPE(indirgv) != SVt_NULL 4525 && (!isGV(indirgv) || GvCVu(indirgv))) 4526 return 0; 4527 /* filehandle or package name makes it a method */ 4528 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) { 4529 s = skipspace(s); 4530 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>') 4531 return 0; /* no assumptions -- "=>" quotes bareword */ 4532 bare_package: 4533 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, 4534 S_newSV_maybe_utf8(aTHX_ tmpbuf, len)); 4535 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE; 4536 PL_expect = XTERM; 4537 force_next(BAREWORD); 4538 PL_bufptr = s; 4539 return *s == '(' ? FUNCMETH : METHOD; 4540 } 4541 } 4542 return 0; 4543 } 4544 4545 /* Encoded script support. filter_add() effectively inserts a 4546 * 'pre-processing' function into the current source input stream. 4547 * Note that the filter function only applies to the current source file 4548 * (e.g., it will not affect files 'require'd or 'use'd by this one). 4549 * 4550 * The datasv parameter (which may be NULL) can be used to pass 4551 * private data to this instance of the filter. The filter function 4552 * can recover the SV using the FILTER_DATA macro and use it to 4553 * store private buffers and state information. 4554 * 4555 * The supplied datasv parameter is upgraded to a PVIO type 4556 * and the IoDIRP/IoANY field is used to store the function pointer, 4557 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such. 4558 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for 4559 * private use must be set using malloc'd pointers. 4560 */ 4561 4562 SV * 4563 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) 4564 { 4565 if (!funcp) 4566 return NULL; 4567 4568 if (!PL_parser) 4569 return NULL; 4570 4571 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) 4572 Perl_croak(aTHX_ "Source filters apply only to byte streams"); 4573 4574 if (!PL_rsfp_filters) 4575 PL_rsfp_filters = newAV(); 4576 if (!datasv) 4577 datasv = newSV(0); 4578 SvUPGRADE(datasv, SVt_PVIO); 4579 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */ 4580 IoFLAGS(datasv) |= IOf_FAKE_DIRP; 4581 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n", 4582 FPTR2DPTR(void *, IoANY(datasv)), 4583 SvPV_nolen(datasv))); 4584 av_unshift(PL_rsfp_filters, 1); 4585 av_store(PL_rsfp_filters, 0, datasv) ; 4586 if ( 4587 !PL_parser->filtered 4588 && PL_parser->lex_flags & LEX_EVALBYTES 4589 && PL_bufptr < PL_bufend 4590 ) { 4591 const char *s = PL_bufptr; 4592 while (s < PL_bufend) { 4593 if (*s == '\n') { 4594 SV *linestr = PL_parser->linestr; 4595 char *buf = SvPVX(linestr); 4596 STRLEN const bufptr_pos = PL_parser->bufptr - buf; 4597 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf; 4598 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf; 4599 STRLEN const linestart_pos = PL_parser->linestart - buf; 4600 STRLEN const last_uni_pos = 4601 PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 4602 STRLEN const last_lop_pos = 4603 PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 4604 av_push(PL_rsfp_filters, linestr); 4605 PL_parser->linestr = 4606 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr)); 4607 buf = SvPVX(PL_parser->linestr); 4608 PL_parser->bufend = buf + SvCUR(PL_parser->linestr); 4609 PL_parser->bufptr = buf + bufptr_pos; 4610 PL_parser->oldbufptr = buf + oldbufptr_pos; 4611 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 4612 PL_parser->linestart = buf + linestart_pos; 4613 if (PL_parser->last_uni) 4614 PL_parser->last_uni = buf + last_uni_pos; 4615 if (PL_parser->last_lop) 4616 PL_parser->last_lop = buf + last_lop_pos; 4617 SvLEN_set(linestr, SvCUR(linestr)); 4618 SvCUR_set(linestr, s - SvPVX(linestr)); 4619 PL_parser->filtered = 1; 4620 break; 4621 } 4622 s++; 4623 } 4624 } 4625 return(datasv); 4626 } 4627 4628 4629 /* Delete most recently added instance of this filter function. */ 4630 void 4631 Perl_filter_del(pTHX_ filter_t funcp) 4632 { 4633 SV *datasv; 4634 4635 PERL_ARGS_ASSERT_FILTER_DEL; 4636 4637 #ifdef DEBUGGING 4638 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", 4639 FPTR2DPTR(void*, funcp))); 4640 #endif 4641 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0) 4642 return; 4643 /* if filter is on top of stack (usual case) just pop it off */ 4644 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters)); 4645 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) { 4646 sv_free(av_pop(PL_rsfp_filters)); 4647 4648 return; 4649 } 4650 /* we need to search for the correct entry and clear it */ 4651 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)"); 4652 } 4653 4654 4655 /* Invoke the idxth filter function for the current rsfp. */ 4656 /* maxlen 0 = read one text line */ 4657 I32 4658 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) 4659 { 4660 filter_t funcp; 4661 I32 ret; 4662 SV *datasv = NULL; 4663 /* This API is bad. It should have been using unsigned int for maxlen. 4664 Not sure if we want to change the API, but if not we should sanity 4665 check the value here. */ 4666 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen; 4667 4668 PERL_ARGS_ASSERT_FILTER_READ; 4669 4670 if (!PL_parser || !PL_rsfp_filters) 4671 return -1; 4672 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */ 4673 /* Provide a default input filter to make life easy. */ 4674 /* Note that we append to the line. This is handy. */ 4675 DEBUG_P(PerlIO_printf(Perl_debug_log, 4676 "filter_read %d: from rsfp\n", idx)); 4677 if (correct_length) { 4678 /* Want a block */ 4679 int len ; 4680 const int old_len = SvCUR(buf_sv); 4681 4682 /* ensure buf_sv is large enough */ 4683 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ; 4684 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, 4685 correct_length)) <= 0) { 4686 if (PerlIO_error(PL_rsfp)) 4687 return -1; /* error */ 4688 else 4689 return 0 ; /* end of file */ 4690 } 4691 SvCUR_set(buf_sv, old_len + len) ; 4692 SvPVX(buf_sv)[old_len + len] = '\0'; 4693 } else { 4694 /* Want a line */ 4695 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) { 4696 if (PerlIO_error(PL_rsfp)) 4697 return -1; /* error */ 4698 else 4699 return 0 ; /* end of file */ 4700 } 4701 } 4702 return SvCUR(buf_sv); 4703 } 4704 /* Skip this filter slot if filter has been deleted */ 4705 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) { 4706 DEBUG_P(PerlIO_printf(Perl_debug_log, 4707 "filter_read %d: skipped (filter deleted)\n", 4708 idx)); 4709 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */ 4710 } 4711 if (SvTYPE(datasv) != SVt_PVIO) { 4712 if (correct_length) { 4713 /* Want a block */ 4714 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv); 4715 if (!remainder) return 0; /* eof */ 4716 if (correct_length > remainder) correct_length = remainder; 4717 sv_catpvn(buf_sv, SvEND(datasv), correct_length); 4718 SvCUR_set(datasv, SvCUR(datasv) + correct_length); 4719 } else { 4720 /* Want a line */ 4721 const char *s = SvEND(datasv); 4722 const char *send = SvPVX(datasv) + SvLEN(datasv); 4723 while (s < send) { 4724 if (*s == '\n') { 4725 s++; 4726 break; 4727 } 4728 s++; 4729 } 4730 if (s == send) return 0; /* eof */ 4731 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv)); 4732 SvCUR_set(datasv, s-SvPVX(datasv)); 4733 } 4734 return SvCUR(buf_sv); 4735 } 4736 /* Get function pointer hidden within datasv */ 4737 funcp = DPTR2FPTR(filter_t, IoANY(datasv)); 4738 DEBUG_P(PerlIO_printf(Perl_debug_log, 4739 "filter_read %d: via function %p (%s)\n", 4740 idx, (void*)datasv, SvPV_nolen_const(datasv))); 4741 /* Call function. The function is expected to */ 4742 /* call "FILTER_READ(idx+1, buf_sv)" first. */ 4743 /* Return: <0:error, =0:eof, >0:not eof */ 4744 ENTER; 4745 save_scalar(PL_errgv); 4746 ret = (*funcp)(aTHX_ idx, buf_sv, correct_length); 4747 LEAVE; 4748 return ret; 4749 } 4750 4751 STATIC char * 4752 S_filter_gets(pTHX_ SV *sv, STRLEN append) 4753 { 4754 PERL_ARGS_ASSERT_FILTER_GETS; 4755 4756 #ifdef PERL_CR_FILTER 4757 if (!PL_rsfp_filters) { 4758 filter_add(S_cr_textfilter,NULL); 4759 } 4760 #endif 4761 if (PL_rsfp_filters) { 4762 if (!append) 4763 SvCUR_set(sv, 0); /* start with empty line */ 4764 if (FILTER_READ(0, sv, 0) > 0) 4765 return ( SvPVX(sv) ) ; 4766 else 4767 return NULL ; 4768 } 4769 else 4770 return (sv_gets(sv, PL_rsfp, append)); 4771 } 4772 4773 STATIC HV * 4774 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len) 4775 { 4776 GV *gv; 4777 4778 PERL_ARGS_ASSERT_FIND_IN_MY_STASH; 4779 4780 if (memEQs(pkgname, len, "__PACKAGE__")) 4781 return PL_curstash; 4782 4783 if (len > 2 4784 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') 4785 && (gv = gv_fetchpvn_flags(pkgname, 4786 len, 4787 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV))) 4788 { 4789 return GvHV(gv); /* Foo:: */ 4790 } 4791 4792 /* use constant CLASS => 'MyClass' */ 4793 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV); 4794 if (gv && GvCV(gv)) { 4795 SV * const sv = cv_const_sv(GvCV(gv)); 4796 if (sv) 4797 return gv_stashsv(sv, 0); 4798 } 4799 4800 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0); 4801 } 4802 4803 4804 STATIC char * 4805 S_tokenize_use(pTHX_ int is_use, char *s) { 4806 PERL_ARGS_ASSERT_TOKENIZE_USE; 4807 4808 if (PL_expect != XSTATE) 4809 /* diag_listed_as: "use" not allowed in expression */ 4810 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", 4811 is_use ? "use" : "no")); 4812 PL_expect = XTERM; 4813 s = skipspace(s); 4814 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) { 4815 s = force_version(s, TRUE); 4816 if (*s == ';' || *s == '}' 4817 || (s = skipspace(s), (*s == ';' || *s == '}'))) { 4818 NEXTVAL_NEXTTOKE.opval = NULL; 4819 force_next(BAREWORD); 4820 } 4821 else if (*s == 'v') { 4822 s = force_word(s,BAREWORD,FALSE,TRUE); 4823 s = force_version(s, FALSE); 4824 } 4825 } 4826 else { 4827 s = force_word(s,BAREWORD,FALSE,TRUE); 4828 s = force_version(s, FALSE); 4829 } 4830 pl_yylval.ival = is_use; 4831 return s; 4832 } 4833 #ifdef DEBUGGING 4834 static const char* const exp_name[] = 4835 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", 4836 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF", 4837 "SIGVAR", "TERMORDORDOR" 4838 }; 4839 #endif 4840 4841 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l) 4842 STATIC bool 4843 S_word_takes_any_delimiter(char *p, STRLEN len) 4844 { 4845 return (len == 1 && memCHRs("msyq", p[0])) 4846 || (len == 2 4847 && ((p[0] == 't' && p[1] == 'r') 4848 || (p[0] == 'q' && memCHRs("qwxr", p[1])))); 4849 } 4850 4851 static void 4852 S_check_scalar_slice(pTHX_ char *s) 4853 { 4854 s++; 4855 while (SPACE_OR_TAB(*s)) s++; 4856 if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2, 4857 PL_bufend, 4858 UTF)) 4859 { 4860 return; 4861 } 4862 while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) 4863 || (*s && memCHRs(" \t$#+-'\"", *s))) 4864 { 4865 s += UTF ? UTF8SKIP(s) : 1; 4866 } 4867 if (*s == '}' || *s == ']') 4868 pl_yylval.ival = OPpSLICEWARNING; 4869 } 4870 4871 #define lex_token_boundary() S_lex_token_boundary(aTHX) 4872 static void 4873 S_lex_token_boundary(pTHX) 4874 { 4875 PL_oldoldbufptr = PL_oldbufptr; 4876 PL_oldbufptr = PL_bufptr; 4877 } 4878 4879 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s) 4880 static char * 4881 S_vcs_conflict_marker(pTHX_ char *s) 4882 { 4883 lex_token_boundary(); 4884 PL_bufptr = s; 4885 yyerror("Version control conflict marker"); 4886 while (s < PL_bufend && *s != '\n') 4887 s++; 4888 return s; 4889 } 4890 4891 static int 4892 yyl_sigvar(pTHX_ char *s) 4893 { 4894 /* we expect the sigil and optional var name part of a 4895 * signature element here. Since a '$' is not necessarily 4896 * followed by a var name, handle it specially here; the general 4897 * yylex code would otherwise try to interpret whatever follows 4898 * as a var; e.g. ($, ...) would be seen as the var '$,' 4899 */ 4900 4901 U8 sigil; 4902 4903 s = skipspace(s); 4904 sigil = *s++; 4905 PL_bufptr = s; /* for error reporting */ 4906 switch (sigil) { 4907 case '$': 4908 case '@': 4909 case '%': 4910 /* spot stuff that looks like an prototype */ 4911 if (memCHRs("$:@%&*;\\[]", *s)) { 4912 yyerror("Illegal character following sigil in a subroutine signature"); 4913 break; 4914 } 4915 /* '$#' is banned, while '$ # comment' isn't */ 4916 if (*s == '#') { 4917 yyerror("'#' not allowed immediately following a sigil in a subroutine signature"); 4918 break; 4919 } 4920 s = skipspace(s); 4921 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 4922 char *dest = PL_tokenbuf + 1; 4923 /* read var name, including sigil, into PL_tokenbuf */ 4924 PL_tokenbuf[0] = sigil; 4925 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1, 4926 0, cBOOL(UTF), FALSE, FALSE); 4927 *dest = '\0'; 4928 assert(PL_tokenbuf[1]); /* we have a variable name */ 4929 } 4930 else { 4931 *PL_tokenbuf = 0; 4932 PL_in_my = 0; 4933 } 4934 4935 s = skipspace(s); 4936 /* parse the = for the default ourselves to avoid '+=' etc being accepted here 4937 * as the ASSIGNOP, and exclude other tokens that start with = 4938 */ 4939 if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) { 4940 /* save now to report with the same context as we did when 4941 * all ASSIGNOPS were accepted */ 4942 PL_oldbufptr = s; 4943 4944 ++s; 4945 NEXTVAL_NEXTTOKE.ival = 0; 4946 force_next(ASSIGNOP); 4947 PL_expect = XTERM; 4948 } 4949 else if (*s == ',' || *s == ')') { 4950 PL_expect = XOPERATOR; 4951 } 4952 else { 4953 /* make sure the context shows the unexpected character and 4954 * hopefully a bit more */ 4955 if (*s) ++s; 4956 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')') 4957 s++; 4958 PL_bufptr = s; /* for error reporting */ 4959 yyerror("Illegal operator following parameter in a subroutine signature"); 4960 PL_in_my = 0; 4961 } 4962 if (*PL_tokenbuf) { 4963 NEXTVAL_NEXTTOKE.ival = sigil; 4964 force_next('p'); /* force a signature pending identifier */ 4965 } 4966 break; 4967 4968 case ')': 4969 PL_expect = XBLOCK; 4970 break; 4971 case ',': /* handle ($a,,$b) */ 4972 break; 4973 4974 default: 4975 PL_in_my = 0; 4976 yyerror("A signature parameter must start with '$', '@' or '%'"); 4977 /* very crude error recovery: skip to likely next signature 4978 * element */ 4979 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')') 4980 s++; 4981 break; 4982 } 4983 4984 TOKEN(sigil); 4985 } 4986 4987 static int 4988 yyl_dollar(pTHX_ char *s) 4989 { 4990 CLINE; 4991 4992 if (PL_expect == XPOSTDEREF) { 4993 if (s[1] == '#') { 4994 s++; 4995 POSTDEREF(DOLSHARP); 4996 } 4997 POSTDEREF('$'); 4998 } 4999 5000 if ( s[1] == '#' 5001 && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF) 5002 || memCHRs("{$:+-@", s[2]))) 5003 { 5004 PL_tokenbuf[0] = '@'; 5005 s = scan_ident(s + 1, PL_tokenbuf + 1, 5006 sizeof PL_tokenbuf - 1, FALSE); 5007 if (PL_expect == XOPERATOR) { 5008 char *d = s; 5009 if (PL_bufptr > s) { 5010 d = PL_bufptr-1; 5011 PL_bufptr = PL_oldbufptr; 5012 } 5013 no_op("Array length", d); 5014 } 5015 if (!PL_tokenbuf[1]) 5016 PREREF(DOLSHARP); 5017 PL_expect = XOPERATOR; 5018 force_ident_maybe_lex('#'); 5019 TOKEN(DOLSHARP); 5020 } 5021 5022 PL_tokenbuf[0] = '$'; 5023 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 5024 if (PL_expect == XOPERATOR) { 5025 char *d = s; 5026 if (PL_bufptr > s) { 5027 d = PL_bufptr-1; 5028 PL_bufptr = PL_oldbufptr; 5029 } 5030 no_op("Scalar", d); 5031 } 5032 if (!PL_tokenbuf[1]) { 5033 if (s == PL_bufend) 5034 yyerror("Final $ should be \\$ or $name"); 5035 PREREF('$'); 5036 } 5037 5038 { 5039 const char tmp = *s; 5040 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) 5041 s = skipspace(s); 5042 5043 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 5044 && intuit_more(s, PL_bufend)) { 5045 if (*s == '[') { 5046 PL_tokenbuf[0] = '@'; 5047 if (ckWARN(WARN_SYNTAX)) { 5048 char *t = s+1; 5049 5050 while ( t < PL_bufend ) { 5051 if (isSPACE(*t)) { 5052 do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t)); 5053 /* consumed one or more space chars */ 5054 } else if (*t == '$' || *t == '@') { 5055 /* could be more than one '$' like $$ref or @$ref */ 5056 do { t++; } while (t < PL_bufend && *t == '$'); 5057 5058 /* could be an abigail style identifier like $ foo */ 5059 while (t < PL_bufend && *t == ' ') t++; 5060 5061 /* strip off the name of the var */ 5062 while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) 5063 t += UTF ? UTF8SKIP(t) : 1; 5064 /* consumed a varname */ 5065 } else if (isDIGIT(*t)) { 5066 /* deal with hex constants like 0x11 */ 5067 if (t[0] == '0' && t[1] == 'x') { 5068 t += 2; 5069 while (t < PL_bufend && isXDIGIT(*t)) t++; 5070 } else { 5071 /* deal with decimal/octal constants like 1 and 0123 */ 5072 do { t++; } while (isDIGIT(*t)); 5073 if (t<PL_bufend && *t == '.') { 5074 do { t++; } while (isDIGIT(*t)); 5075 } 5076 } 5077 /* consumed a number */ 5078 } else { 5079 /* not a var nor a space nor a number */ 5080 break; 5081 } 5082 } 5083 if (t < PL_bufend && *t++ == ',') { 5084 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */ 5085 while (t < PL_bufend && *t != ']') 5086 t++; 5087 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 5088 "Multidimensional syntax %" UTF8f " not supported", 5089 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr)); 5090 } 5091 } 5092 } 5093 else if (*s == '{') { 5094 char *t; 5095 PL_tokenbuf[0] = '%'; 5096 if ( strEQ(PL_tokenbuf+1, "SIG") 5097 && ckWARN(WARN_SYNTAX) 5098 && (t = (char *) memchr(s, '}', PL_bufend - s)) 5099 && (t = (char *) memchr(t, '=', PL_bufend - t))) 5100 { 5101 char tmpbuf[sizeof PL_tokenbuf]; 5102 do { 5103 t++; 5104 } while (isSPACE(*t)); 5105 if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) { 5106 STRLEN len; 5107 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, 5108 &len); 5109 while (isSPACE(*t)) 5110 t++; 5111 if ( *t == ';' 5112 && get_cvn_flags(tmpbuf, len, UTF 5113 ? SVf_UTF8 5114 : 0)) 5115 { 5116 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 5117 "You need to quote \"%" UTF8f "\"", 5118 UTF8fARG(UTF, len, tmpbuf)); 5119 } 5120 } 5121 } 5122 } 5123 } 5124 5125 PL_expect = XOPERATOR; 5126 if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) { 5127 const bool islop = (PL_last_lop == PL_oldoldbufptr); 5128 if (!islop || PL_last_lop_op == OP_GREPSTART) 5129 PL_expect = XOPERATOR; 5130 else if (memCHRs("$@\"'`q", *s)) 5131 PL_expect = XTERM; /* e.g. print $fh "foo" */ 5132 else if ( memCHRs("&*<%", *s) 5133 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF)) 5134 { 5135 PL_expect = XTERM; /* e.g. print $fh &sub */ 5136 } 5137 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 5138 char tmpbuf[sizeof PL_tokenbuf]; 5139 int t2; 5140 STRLEN len; 5141 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 5142 if ((t2 = keyword(tmpbuf, len, 0))) { 5143 /* binary operators exclude handle interpretations */ 5144 switch (t2) { 5145 case -KEY_x: 5146 case -KEY_eq: 5147 case -KEY_ne: 5148 case -KEY_gt: 5149 case -KEY_lt: 5150 case -KEY_ge: 5151 case -KEY_le: 5152 case -KEY_cmp: 5153 break; 5154 default: 5155 PL_expect = XTERM; /* e.g. print $fh length() */ 5156 break; 5157 } 5158 } 5159 else { 5160 PL_expect = XTERM; /* e.g. print $fh subr() */ 5161 } 5162 } 5163 else if (isDIGIT(*s)) 5164 PL_expect = XTERM; /* e.g. print $fh 3 */ 5165 else if (*s == '.' && isDIGIT(s[1])) 5166 PL_expect = XTERM; /* e.g. print $fh .3 */ 5167 else if ((*s == '?' || *s == '-' || *s == '+') 5168 && !isSPACE(s[1]) && s[1] != '=') 5169 PL_expect = XTERM; /* e.g. print $fh -1 */ 5170 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' 5171 && s[1] != '/') 5172 PL_expect = XTERM; /* e.g. print $fh /.../ 5173 XXX except DORDOR operator 5174 */ 5175 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) 5176 && s[2] != '=') 5177 PL_expect = XTERM; /* print $fh <<"EOF" */ 5178 } 5179 } 5180 force_ident_maybe_lex('$'); 5181 TOKEN('$'); 5182 } 5183 5184 static int 5185 yyl_sub(pTHX_ char *s, const int key) 5186 { 5187 char * const tmpbuf = PL_tokenbuf + 1; 5188 bool have_name, have_proto; 5189 STRLEN len; 5190 SV *format_name = NULL; 5191 bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED; 5192 5193 SSize_t off = s-SvPVX(PL_linestr); 5194 char *d; 5195 5196 s = skipspace(s); /* can move PL_linestr */ 5197 5198 d = SvPVX(PL_linestr)+off; 5199 5200 SAVEBOOL(PL_parser->sig_seen); 5201 PL_parser->sig_seen = FALSE; 5202 5203 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) 5204 || *s == '\'' 5205 || (*s == ':' && s[1] == ':')) 5206 { 5207 5208 PL_expect = XATTRBLOCK; 5209 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE, 5210 &len); 5211 if (key == KEY_format) 5212 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s); 5213 *PL_tokenbuf = '&'; 5214 if (memchr(tmpbuf, ':', len) || key != KEY_sub 5215 || pad_findmy_pvn( 5216 PL_tokenbuf, len + 1, 0 5217 ) != NOT_IN_PAD) 5218 sv_setpvn(PL_subname, tmpbuf, len); 5219 else { 5220 sv_setsv(PL_subname,PL_curstname); 5221 sv_catpvs(PL_subname,"::"); 5222 sv_catpvn(PL_subname,tmpbuf,len); 5223 } 5224 if (SvUTF8(PL_linestr)) 5225 SvUTF8_on(PL_subname); 5226 have_name = TRUE; 5227 5228 s = skipspace(d); 5229 } 5230 else { 5231 if (key == KEY_my || key == KEY_our || key==KEY_state) { 5232 *d = '\0'; 5233 /* diag_listed_as: Missing name in "%s sub" */ 5234 Perl_croak(aTHX_ 5235 "Missing name in \"%s\"", PL_bufptr); 5236 } 5237 PL_expect = XATTRTERM; 5238 sv_setpvs(PL_subname,"?"); 5239 have_name = FALSE; 5240 } 5241 5242 if (key == KEY_format) { 5243 if (format_name) { 5244 NEXTVAL_NEXTTOKE.opval 5245 = newSVOP(OP_CONST,0, format_name); 5246 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 5247 force_next(BAREWORD); 5248 } 5249 PREBLOCK(FORMAT); 5250 } 5251 5252 /* Look for a prototype */ 5253 if (*s == '(' && !is_sigsub) { 5254 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 5255 if (!s) 5256 Perl_croak(aTHX_ "Prototype not terminated"); 5257 COPLINE_SET_FROM_MULTI_END; 5258 (void)validate_proto(PL_subname, PL_lex_stuff, 5259 ckWARN(WARN_ILLEGALPROTO), 0); 5260 have_proto = TRUE; 5261 5262 s = skipspace(s); 5263 } 5264 else 5265 have_proto = FALSE; 5266 5267 if ( !(*s == ':' && s[1] != ':') 5268 && (*s != '{' && *s != '(') && key != KEY_format) 5269 { 5270 assert(key == KEY_sub || key == KEY_AUTOLOAD || 5271 key == KEY_DESTROY || key == KEY_BEGIN || 5272 key == KEY_UNITCHECK || key == KEY_CHECK || 5273 key == KEY_INIT || key == KEY_END || 5274 key == KEY_my || key == KEY_state || 5275 key == KEY_our); 5276 if (!have_name) 5277 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine"); 5278 else if (*s != ';' && *s != '}') 5279 Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname)); 5280 } 5281 5282 if (have_proto) { 5283 NEXTVAL_NEXTTOKE.opval = 5284 newSVOP(OP_CONST, 0, PL_lex_stuff); 5285 PL_lex_stuff = NULL; 5286 force_next(THING); 5287 } 5288 if (!have_name) { 5289 if (PL_curstash) 5290 sv_setpvs(PL_subname, "__ANON__"); 5291 else 5292 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 5293 if (is_sigsub) 5294 TOKEN(ANON_SIGSUB); 5295 else 5296 TOKEN(ANONSUB); 5297 } 5298 force_ident_maybe_lex('&'); 5299 if (is_sigsub) 5300 TOKEN(SIGSUB); 5301 else 5302 TOKEN(SUB); 5303 } 5304 5305 static int 5306 yyl_interpcasemod(pTHX_ char *s) 5307 { 5308 #ifdef DEBUGGING 5309 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\') 5310 Perl_croak(aTHX_ 5311 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u", 5312 PL_bufptr, PL_bufend, *PL_bufptr); 5313 #endif 5314 5315 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') { 5316 /* if at a \E */ 5317 if (PL_lex_casemods) { 5318 const char oldmod = PL_lex_casestack[--PL_lex_casemods]; 5319 PL_lex_casestack[PL_lex_casemods] = '\0'; 5320 5321 if (PL_bufptr != PL_bufend 5322 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q' 5323 || oldmod == 'F')) { 5324 PL_bufptr += 2; 5325 PL_lex_state = LEX_INTERPCONCAT; 5326 } 5327 PL_lex_allbrackets--; 5328 return REPORT(')'); 5329 } 5330 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) { 5331 /* Got an unpaired \E */ 5332 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 5333 "Useless use of \\E"); 5334 } 5335 if (PL_bufptr != PL_bufend) 5336 PL_bufptr += 2; 5337 PL_lex_state = LEX_INTERPCONCAT; 5338 return yylex(); 5339 } 5340 else { 5341 DEBUG_T({ 5342 PerlIO_printf(Perl_debug_log, "### Saw case modifier\n"); 5343 }); 5344 s = PL_bufptr + 1; 5345 if (s[1] == '\\' && s[2] == 'E') { 5346 PL_bufptr = s + 3; 5347 PL_lex_state = LEX_INTERPCONCAT; 5348 return yylex(); 5349 } 5350 else { 5351 I32 tmp; 5352 if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u") 5353 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l")) 5354 { 5355 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ 5356 } 5357 if ((*s == 'L' || *s == 'U' || *s == 'F') 5358 && (strpbrk(PL_lex_casestack, "LUF"))) 5359 { 5360 PL_lex_casestack[--PL_lex_casemods] = '\0'; 5361 PL_lex_allbrackets--; 5362 return REPORT(')'); 5363 } 5364 if (PL_lex_casemods > 10) 5365 Renew(PL_lex_casestack, PL_lex_casemods + 2, char); 5366 PL_lex_casestack[PL_lex_casemods++] = *s; 5367 PL_lex_casestack[PL_lex_casemods] = '\0'; 5368 PL_lex_state = LEX_INTERPCONCAT; 5369 NEXTVAL_NEXTTOKE.ival = 0; 5370 force_next((2<<24)|'('); 5371 if (*s == 'l') 5372 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST; 5373 else if (*s == 'u') 5374 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST; 5375 else if (*s == 'L') 5376 NEXTVAL_NEXTTOKE.ival = OP_LC; 5377 else if (*s == 'U') 5378 NEXTVAL_NEXTTOKE.ival = OP_UC; 5379 else if (*s == 'Q') 5380 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA; 5381 else if (*s == 'F') 5382 NEXTVAL_NEXTTOKE.ival = OP_FC; 5383 else 5384 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s); 5385 PL_bufptr = s + 1; 5386 } 5387 force_next(FUNC); 5388 if (PL_lex_starts) { 5389 s = PL_bufptr; 5390 PL_lex_starts = 0; 5391 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 5392 if (PL_lex_casemods == 1 && PL_lex_inpat) 5393 TOKEN(','); 5394 else 5395 AopNOASSIGN(OP_CONCAT); 5396 } 5397 else 5398 return yylex(); 5399 } 5400 } 5401 5402 static int 5403 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword, 5404 GV **pgv, GV ***pgvp) 5405 { 5406 GV *ogv = NULL; /* override (winner) */ 5407 GV *hgv = NULL; /* hidden (loser) */ 5408 GV *gv = *pgv; 5409 5410 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) { 5411 CV *cv; 5412 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 5413 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL, 5414 SVt_PVCV)) 5415 && (cv = GvCVu(gv))) 5416 { 5417 if (GvIMPORTED_CV(gv)) 5418 ogv = gv; 5419 else if (! CvMETHOD(cv)) 5420 hgv = gv; 5421 } 5422 if (!ogv 5423 && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE)) 5424 && (gv = **pgvp) 5425 && (isGV_with_GP(gv) 5426 ? GvCVu(gv) && GvIMPORTED_CV(gv) 5427 : SvPCS_IMPORTED(gv) 5428 && (gv_init(gv, PL_globalstash, PL_tokenbuf, 5429 len, 0), 1))) 5430 { 5431 ogv = gv; 5432 } 5433 } 5434 5435 *pgv = gv; 5436 5437 if (ogv) { 5438 *orig_keyword = key; 5439 return 0; /* overridden by import or by GLOBAL */ 5440 } 5441 else if (gv && !*pgvp 5442 && -key==KEY_lock /* XXX generalizable kludge */ 5443 && GvCVu(gv)) 5444 { 5445 return 0; /* any sub overrides "weak" keyword */ 5446 } 5447 else { /* no override */ 5448 key = -key; 5449 if (key == KEY_dump) { 5450 Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30"); 5451 } 5452 *pgv = NULL; 5453 *pgvp = 0; 5454 if (hgv && key != KEY_x) /* never ambiguous */ 5455 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 5456 "Ambiguous call resolved as CORE::%s(), " 5457 "qualify as such or use &", 5458 GvENAME(hgv)); 5459 return key; 5460 } 5461 } 5462 5463 static int 5464 yyl_qw(pTHX_ char *s, STRLEN len) 5465 { 5466 OP *words = NULL; 5467 5468 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 5469 if (!s) 5470 missingterm(NULL, 0); 5471 5472 COPLINE_SET_FROM_MULTI_END; 5473 PL_expect = XOPERATOR; 5474 if (SvCUR(PL_lex_stuff)) { 5475 int warned_comma = !ckWARN(WARN_QW); 5476 int warned_comment = warned_comma; 5477 char *d = SvPV_force(PL_lex_stuff, len); 5478 while (len) { 5479 for (; isSPACE(*d) && len; --len, ++d) 5480 /**/; 5481 if (len) { 5482 SV *sv; 5483 const char *b = d; 5484 if (!warned_comma || !warned_comment) { 5485 for (; !isSPACE(*d) && len; --len, ++d) { 5486 if (!warned_comma && *d == ',') { 5487 Perl_warner(aTHX_ packWARN(WARN_QW), 5488 "Possible attempt to separate words with commas"); 5489 ++warned_comma; 5490 } 5491 else if (!warned_comment && *d == '#') { 5492 Perl_warner(aTHX_ packWARN(WARN_QW), 5493 "Possible attempt to put comments in qw() list"); 5494 ++warned_comment; 5495 } 5496 } 5497 } 5498 else { 5499 for (; !isSPACE(*d) && len; --len, ++d) 5500 /**/; 5501 } 5502 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff)); 5503 words = op_append_elem(OP_LIST, words, 5504 newSVOP(OP_CONST, 0, tokeq(sv))); 5505 } 5506 } 5507 } 5508 if (!words) 5509 words = newNULLLIST(); 5510 SvREFCNT_dec_NN(PL_lex_stuff); 5511 PL_lex_stuff = NULL; 5512 PL_expect = XOPERATOR; 5513 pl_yylval.opval = sawparens(words); 5514 TOKEN(QWLIST); 5515 } 5516 5517 static int 5518 yyl_hyphen(pTHX_ char *s) 5519 { 5520 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) { 5521 I32 ftst = 0; 5522 char tmp; 5523 5524 s++; 5525 PL_bufptr = s; 5526 tmp = *s++; 5527 5528 while (s < PL_bufend && SPACE_OR_TAB(*s)) 5529 s++; 5530 5531 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) { 5532 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE); 5533 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } ); 5534 OPERATOR('-'); /* unary minus */ 5535 } 5536 switch (tmp) { 5537 case 'r': ftst = OP_FTEREAD; break; 5538 case 'w': ftst = OP_FTEWRITE; break; 5539 case 'x': ftst = OP_FTEEXEC; break; 5540 case 'o': ftst = OP_FTEOWNED; break; 5541 case 'R': ftst = OP_FTRREAD; break; 5542 case 'W': ftst = OP_FTRWRITE; break; 5543 case 'X': ftst = OP_FTREXEC; break; 5544 case 'O': ftst = OP_FTROWNED; break; 5545 case 'e': ftst = OP_FTIS; break; 5546 case 'z': ftst = OP_FTZERO; break; 5547 case 's': ftst = OP_FTSIZE; break; 5548 case 'f': ftst = OP_FTFILE; break; 5549 case 'd': ftst = OP_FTDIR; break; 5550 case 'l': ftst = OP_FTLINK; break; 5551 case 'p': ftst = OP_FTPIPE; break; 5552 case 'S': ftst = OP_FTSOCK; break; 5553 case 'u': ftst = OP_FTSUID; break; 5554 case 'g': ftst = OP_FTSGID; break; 5555 case 'k': ftst = OP_FTSVTX; break; 5556 case 'b': ftst = OP_FTBLK; break; 5557 case 'c': ftst = OP_FTCHR; break; 5558 case 't': ftst = OP_FTTTY; break; 5559 case 'T': ftst = OP_FTTEXT; break; 5560 case 'B': ftst = OP_FTBINARY; break; 5561 case 'M': case 'A': case 'C': 5562 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV); 5563 switch (tmp) { 5564 case 'M': ftst = OP_FTMTIME; break; 5565 case 'A': ftst = OP_FTATIME; break; 5566 case 'C': ftst = OP_FTCTIME; break; 5567 default: break; 5568 } 5569 break; 5570 default: 5571 break; 5572 } 5573 if (ftst) { 5574 PL_last_uni = PL_oldbufptr; 5575 PL_last_lop_op = (OPCODE)ftst; 5576 DEBUG_T( { 5577 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp); 5578 } ); 5579 FTST(ftst); 5580 } 5581 else { 5582 /* Assume it was a minus followed by a one-letter named 5583 * subroutine call (or a -bareword), then. */ 5584 DEBUG_T( { 5585 PerlIO_printf(Perl_debug_log, 5586 "### '-%c' looked like a file test but was not\n", 5587 (int) tmp); 5588 } ); 5589 s = --PL_bufptr; 5590 } 5591 } 5592 { 5593 const char tmp = *s++; 5594 if (*s == tmp) { 5595 s++; 5596 if (PL_expect == XOPERATOR) 5597 TERM(POSTDEC); 5598 else 5599 OPERATOR(PREDEC); 5600 } 5601 else if (*s == '>') { 5602 s++; 5603 s = skipspace(s); 5604 if (((*s == '$' || *s == '&') && s[1] == '*') 5605 ||(*s == '$' && s[1] == '#' && s[2] == '*') 5606 ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1])) 5607 ||(*s == '*' && (s[1] == '*' || s[1] == '{')) 5608 ) 5609 { 5610 PL_expect = XPOSTDEREF; 5611 TOKEN(ARROW); 5612 } 5613 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 5614 s = force_word(s,METHOD,FALSE,TRUE); 5615 TOKEN(ARROW); 5616 } 5617 else if (*s == '$') 5618 OPERATOR(ARROW); 5619 else 5620 TERM(ARROW); 5621 } 5622 if (PL_expect == XOPERATOR) { 5623 if (*s == '=' 5624 && !PL_lex_allbrackets 5625 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5626 { 5627 s--; 5628 TOKEN(0); 5629 } 5630 Aop(OP_SUBTRACT); 5631 } 5632 else { 5633 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5634 check_uni(); 5635 OPERATOR('-'); /* unary minus */ 5636 } 5637 } 5638 } 5639 5640 static int 5641 yyl_plus(pTHX_ char *s) 5642 { 5643 const char tmp = *s++; 5644 if (*s == tmp) { 5645 s++; 5646 if (PL_expect == XOPERATOR) 5647 TERM(POSTINC); 5648 else 5649 OPERATOR(PREINC); 5650 } 5651 if (PL_expect == XOPERATOR) { 5652 if (*s == '=' 5653 && !PL_lex_allbrackets 5654 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5655 { 5656 s--; 5657 TOKEN(0); 5658 } 5659 Aop(OP_ADD); 5660 } 5661 else { 5662 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5663 check_uni(); 5664 OPERATOR('+'); 5665 } 5666 } 5667 5668 static int 5669 yyl_star(pTHX_ char *s) 5670 { 5671 if (PL_expect == XPOSTDEREF) 5672 POSTDEREF('*'); 5673 5674 if (PL_expect != XOPERATOR) { 5675 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE); 5676 PL_expect = XOPERATOR; 5677 force_ident(PL_tokenbuf, '*'); 5678 if (!*PL_tokenbuf) 5679 PREREF('*'); 5680 TERM('*'); 5681 } 5682 5683 s++; 5684 if (*s == '*') { 5685 s++; 5686 if (*s == '=' && !PL_lex_allbrackets 5687 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5688 { 5689 s -= 2; 5690 TOKEN(0); 5691 } 5692 PWop(OP_POW); 5693 } 5694 5695 if (*s == '=' 5696 && !PL_lex_allbrackets 5697 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5698 { 5699 s--; 5700 TOKEN(0); 5701 } 5702 5703 Mop(OP_MULTIPLY); 5704 } 5705 5706 static int 5707 yyl_percent(pTHX_ char *s) 5708 { 5709 if (PL_expect == XOPERATOR) { 5710 if (s[1] == '=' 5711 && !PL_lex_allbrackets 5712 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5713 { 5714 TOKEN(0); 5715 } 5716 ++s; 5717 Mop(OP_MODULO); 5718 } 5719 else if (PL_expect == XPOSTDEREF) 5720 POSTDEREF('%'); 5721 5722 PL_tokenbuf[0] = '%'; 5723 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 5724 pl_yylval.ival = 0; 5725 if (!PL_tokenbuf[1]) { 5726 PREREF('%'); 5727 } 5728 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 5729 && intuit_more(s, PL_bufend)) { 5730 if (*s == '[') 5731 PL_tokenbuf[0] = '@'; 5732 } 5733 PL_expect = XOPERATOR; 5734 force_ident_maybe_lex('%'); 5735 TERM('%'); 5736 } 5737 5738 static int 5739 yyl_caret(pTHX_ char *s) 5740 { 5741 char *d = s; 5742 const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED); 5743 if (bof && s[1] == '.') 5744 s++; 5745 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 5746 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) 5747 { 5748 s = d; 5749 TOKEN(0); 5750 } 5751 s++; 5752 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR); 5753 } 5754 5755 static int 5756 yyl_colon(pTHX_ char *s) 5757 { 5758 OP *attrs; 5759 5760 switch (PL_expect) { 5761 case XOPERATOR: 5762 if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets)) 5763 break; 5764 PL_bufptr = s; /* update in case we back off */ 5765 if (*s == '=') { 5766 Perl_croak(aTHX_ 5767 "Use of := for an empty attribute list is not allowed"); 5768 } 5769 goto grabattrs; 5770 case XATTRBLOCK: 5771 PL_expect = XBLOCK; 5772 goto grabattrs; 5773 case XATTRTERM: 5774 PL_expect = XTERMBLOCK; 5775 grabattrs: 5776 /* NB: as well as parsing normal attributes, we also end up 5777 * here if there is something looking like attributes 5778 * following a signature (which is illegal, but used to be 5779 * legal in 5.20..5.26). If the latter, we still parse the 5780 * attributes so that error messages(s) are less confusing, 5781 * but ignore them (parser->sig_seen). 5782 */ 5783 s = skipspace(s); 5784 attrs = NULL; 5785 while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 5786 bool sig = PL_parser->sig_seen; 5787 I32 tmp; 5788 SV *sv; 5789 STRLEN len; 5790 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 5791 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) { 5792 if (tmp < 0) tmp = -tmp; 5793 switch (tmp) { 5794 case KEY_or: 5795 case KEY_and: 5796 case KEY_for: 5797 case KEY_foreach: 5798 case KEY_unless: 5799 case KEY_if: 5800 case KEY_while: 5801 case KEY_until: 5802 goto got_attrs; 5803 default: 5804 break; 5805 } 5806 } 5807 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0); 5808 if (*d == '(') { 5809 d = scan_str(d,TRUE,TRUE,FALSE,NULL); 5810 if (!d) { 5811 if (attrs) 5812 op_free(attrs); 5813 sv_free(sv); 5814 Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list"); 5815 } 5816 COPLINE_SET_FROM_MULTI_END; 5817 } 5818 if (PL_lex_stuff) { 5819 sv_catsv(sv, PL_lex_stuff); 5820 attrs = op_append_elem(OP_LIST, attrs, 5821 newSVOP(OP_CONST, 0, sv)); 5822 SvREFCNT_dec_NN(PL_lex_stuff); 5823 PL_lex_stuff = NULL; 5824 } 5825 else { 5826 /* NOTE: any CV attrs applied here need to be part of 5827 the CVf_BUILTIN_ATTRS define in cv.h! */ 5828 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) { 5829 sv_free(sv); 5830 if (!sig) 5831 CvLVALUE_on(PL_compcv); 5832 } 5833 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) { 5834 sv_free(sv); 5835 if (!sig) 5836 CvMETHOD_on(PL_compcv); 5837 } 5838 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) { 5839 sv_free(sv); 5840 if (!sig) { 5841 Perl_ck_warner_d(aTHX_ 5842 packWARN(WARN_EXPERIMENTAL__CONST_ATTR), 5843 ":const is experimental" 5844 ); 5845 CvANONCONST_on(PL_compcv); 5846 if (!CvANON(PL_compcv)) 5847 yyerror(":const is not permitted on named " 5848 "subroutines"); 5849 } 5850 } 5851 /* After we've set the flags, it could be argued that 5852 we don't need to do the attributes.pm-based setting 5853 process, and shouldn't bother appending recognized 5854 flags. To experiment with that, uncomment the 5855 following "else". (Note that's already been 5856 uncommented. That keeps the above-applied built-in 5857 attributes from being intercepted (and possibly 5858 rejected) by a package's attribute routines, but is 5859 justified by the performance win for the common case 5860 of applying only built-in attributes.) */ 5861 else 5862 attrs = op_append_elem(OP_LIST, attrs, 5863 newSVOP(OP_CONST, 0, 5864 sv)); 5865 } 5866 s = skipspace(d); 5867 if (*s == ':' && s[1] != ':') 5868 s = skipspace(s+1); 5869 else if (s == d) 5870 break; /* require real whitespace or :'s */ 5871 /* XXX losing whitespace on sequential attributes here */ 5872 } 5873 5874 if (*s != ';' 5875 && *s != '}' 5876 && !(PL_expect == XOPERATOR 5877 ? (*s == '=' || *s == ')') 5878 : (*s == '{' || *s == '('))) 5879 { 5880 const char q = ((*s == '\'') ? '"' : '\''); 5881 /* If here for an expression, and parsed no attrs, back off. */ 5882 if (PL_expect == XOPERATOR && !attrs) { 5883 s = PL_bufptr; 5884 break; 5885 } 5886 /* MUST advance bufptr here to avoid bogus "at end of line" 5887 context messages from yyerror(). 5888 */ 5889 PL_bufptr = s; 5890 yyerror( (const char *) 5891 (*s 5892 ? Perl_form(aTHX_ "Invalid separator character " 5893 "%c%c%c in attribute list", q, *s, q) 5894 : "Unterminated attribute list" ) ); 5895 if (attrs) 5896 op_free(attrs); 5897 OPERATOR(':'); 5898 } 5899 5900 got_attrs: 5901 if (PL_parser->sig_seen) { 5902 /* see comment about about sig_seen and parser error 5903 * handling */ 5904 if (attrs) 5905 op_free(attrs); 5906 Perl_croak(aTHX_ "Subroutine attributes must come " 5907 "before the signature"); 5908 } 5909 if (attrs) { 5910 NEXTVAL_NEXTTOKE.opval = attrs; 5911 force_next(THING); 5912 } 5913 TOKEN(COLONATTR); 5914 } 5915 5916 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) { 5917 s--; 5918 TOKEN(0); 5919 } 5920 5921 PL_lex_allbrackets--; 5922 OPERATOR(':'); 5923 } 5924 5925 static int 5926 yyl_subproto(pTHX_ char *s, CV *cv) 5927 { 5928 STRLEN protolen = CvPROTOLEN(cv); 5929 const char *proto = CvPROTO(cv); 5930 bool optional; 5931 5932 proto = S_strip_spaces(aTHX_ proto, &protolen); 5933 if (!protolen) 5934 TERM(FUNC0SUB); 5935 if ((optional = *proto == ';')) { 5936 do { 5937 proto++; 5938 } while (*proto == ';'); 5939 } 5940 5941 if ( 5942 ( 5943 ( 5944 *proto == '$' || *proto == '_' 5945 || *proto == '*' || *proto == '+' 5946 ) 5947 && proto[1] == '\0' 5948 ) 5949 || ( 5950 *proto == '\\' && proto[1] && proto[2] == '\0' 5951 ) 5952 ) { 5953 UNIPROTO(UNIOPSUB,optional); 5954 } 5955 5956 if (*proto == '\\' && proto[1] == '[') { 5957 const char *p = proto + 2; 5958 while(*p && *p != ']') 5959 ++p; 5960 if(*p == ']' && !p[1]) 5961 UNIPROTO(UNIOPSUB,optional); 5962 } 5963 5964 if (*proto == '&' && *s == '{') { 5965 if (PL_curstash) 5966 sv_setpvs(PL_subname, "__ANON__"); 5967 else 5968 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 5969 if (!PL_lex_allbrackets 5970 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 5971 { 5972 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 5973 } 5974 PREBLOCK(LSTOPSUB); 5975 } 5976 5977 return KEY_NULL; 5978 } 5979 5980 static int 5981 yyl_leftcurly(pTHX_ char *s, const U8 formbrack) 5982 { 5983 char *d; 5984 if (PL_lex_brackets > 100) { 5985 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 5986 } 5987 5988 switch (PL_expect) { 5989 case XTERM: 5990 case XTERMORDORDOR: 5991 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 5992 PL_lex_allbrackets++; 5993 OPERATOR(HASHBRACK); 5994 case XOPERATOR: 5995 while (s < PL_bufend && SPACE_OR_TAB(*s)) 5996 s++; 5997 d = s; 5998 PL_tokenbuf[0] = '\0'; 5999 if (d < PL_bufend && *d == '-') { 6000 PL_tokenbuf[0] = '-'; 6001 d++; 6002 while (d < PL_bufend && SPACE_OR_TAB(*d)) 6003 d++; 6004 } 6005 if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) { 6006 STRLEN len; 6007 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 6008 FALSE, &len); 6009 while (d < PL_bufend && SPACE_OR_TAB(*d)) 6010 d++; 6011 if (*d == '}') { 6012 const char minus = (PL_tokenbuf[0] == '-'); 6013 s = force_word(s + minus, BAREWORD, FALSE, TRUE); 6014 if (minus) 6015 force_next('-'); 6016 } 6017 } 6018 /* FALLTHROUGH */ 6019 case XATTRTERM: 6020 case XTERMBLOCK: 6021 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6022 PL_lex_allbrackets++; 6023 PL_expect = XSTATE; 6024 break; 6025 case XATTRBLOCK: 6026 case XBLOCK: 6027 PL_lex_brackstack[PL_lex_brackets++] = XSTATE; 6028 PL_lex_allbrackets++; 6029 PL_expect = XSTATE; 6030 break; 6031 case XBLOCKTERM: 6032 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 6033 PL_lex_allbrackets++; 6034 PL_expect = XSTATE; 6035 break; 6036 default: { 6037 const char *t; 6038 if (PL_oldoldbufptr == PL_last_lop) 6039 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 6040 else 6041 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6042 PL_lex_allbrackets++; 6043 s = skipspace(s); 6044 if (*s == '}') { 6045 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) { 6046 PL_expect = XTERM; 6047 /* This hack is to get the ${} in the message. */ 6048 PL_bufptr = s+1; 6049 yyerror("syntax error"); 6050 break; 6051 } 6052 OPERATOR(HASHBRACK); 6053 } 6054 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) { 6055 /* ${...} or @{...} etc., but not print {...} 6056 * Skip the disambiguation and treat this as a block. 6057 */ 6058 goto block_expectation; 6059 } 6060 /* This hack serves to disambiguate a pair of curlies 6061 * as being a block or an anon hash. Normally, expectation 6062 * determines that, but in cases where we're not in a 6063 * position to expect anything in particular (like inside 6064 * eval"") we have to resolve the ambiguity. This code 6065 * covers the case where the first term in the curlies is a 6066 * quoted string. Most other cases need to be explicitly 6067 * disambiguated by prepending a "+" before the opening 6068 * curly in order to force resolution as an anon hash. 6069 * 6070 * XXX should probably propagate the outer expectation 6071 * into eval"" to rely less on this hack, but that could 6072 * potentially break current behavior of eval"". 6073 * GSAR 97-07-21 6074 */ 6075 t = s; 6076 if (*s == '\'' || *s == '"' || *s == '`') { 6077 /* common case: get past first string, handling escapes */ 6078 for (t++; t < PL_bufend && *t != *s;) 6079 if (*t++ == '\\') 6080 t++; 6081 t++; 6082 } 6083 else if (*s == 'q') { 6084 if (++t < PL_bufend 6085 && (!isWORDCHAR(*t) 6086 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend 6087 && !isWORDCHAR(*t)))) 6088 { 6089 /* skip q//-like construct */ 6090 const char *tmps; 6091 char open, close, term; 6092 I32 brackets = 1; 6093 6094 while (t < PL_bufend && isSPACE(*t)) 6095 t++; 6096 /* check for q => */ 6097 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') { 6098 OPERATOR(HASHBRACK); 6099 } 6100 term = *t; 6101 open = term; 6102 if (term && (tmps = memCHRs("([{< )]}> )]}>",term))) 6103 term = tmps[5]; 6104 close = term; 6105 if (open == close) 6106 for (t++; t < PL_bufend; t++) { 6107 if (*t == '\\' && t+1 < PL_bufend && open != '\\') 6108 t++; 6109 else if (*t == open) 6110 break; 6111 } 6112 else { 6113 for (t++; t < PL_bufend; t++) { 6114 if (*t == '\\' && t+1 < PL_bufend) 6115 t++; 6116 else if (*t == close && --brackets <= 0) 6117 break; 6118 else if (*t == open) 6119 brackets++; 6120 } 6121 } 6122 t++; 6123 } 6124 else 6125 /* skip plain q word */ 6126 while ( t < PL_bufend 6127 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) 6128 { 6129 t += UTF ? UTF8SKIP(t) : 1; 6130 } 6131 } 6132 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) { 6133 t += UTF ? UTF8SKIP(t) : 1; 6134 while ( t < PL_bufend 6135 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) 6136 { 6137 t += UTF ? UTF8SKIP(t) : 1; 6138 } 6139 } 6140 while (t < PL_bufend && isSPACE(*t)) 6141 t++; 6142 /* if comma follows first term, call it an anon hash */ 6143 /* XXX it could be a comma expression with loop modifiers */ 6144 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s))) 6145 || (*t == '=' && t[1] == '>'))) 6146 OPERATOR(HASHBRACK); 6147 if (PL_expect == XREF) { 6148 block_expectation: 6149 /* If there is an opening brace or 'sub:', treat it 6150 as a term to make ${{...}}{k} and &{sub:attr...} 6151 dwim. Otherwise, treat it as a statement, so 6152 map {no strict; ...} works. 6153 */ 6154 s = skipspace(s); 6155 if (*s == '{') { 6156 PL_expect = XTERM; 6157 break; 6158 } 6159 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) { 6160 PL_bufptr = s; 6161 d = s + 3; 6162 d = skipspace(d); 6163 s = PL_bufptr; 6164 if (*d == ':') { 6165 PL_expect = XTERM; 6166 break; 6167 } 6168 } 6169 PL_expect = XSTATE; 6170 } 6171 else { 6172 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE; 6173 PL_expect = XSTATE; 6174 } 6175 } 6176 break; 6177 } 6178 6179 pl_yylval.ival = CopLINE(PL_curcop); 6180 PL_copline = NOLINE; /* invalidate current command line number */ 6181 TOKEN(formbrack ? '=' : '{'); 6182 } 6183 6184 static int 6185 yyl_rightcurly(pTHX_ char *s, const U8 formbrack) 6186 { 6187 assert(s != PL_bufend); 6188 s++; 6189 6190 if (PL_lex_brackets <= 0) 6191 /* diag_listed_as: Unmatched right %s bracket */ 6192 yyerror("Unmatched right curly bracket"); 6193 else 6194 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets]; 6195 6196 PL_lex_allbrackets--; 6197 6198 if (PL_lex_state == LEX_INTERPNORMAL) { 6199 if (PL_lex_brackets == 0) { 6200 if (PL_expect & XFAKEBRACK) { 6201 PL_expect &= XENUMMASK; 6202 PL_lex_state = LEX_INTERPEND; 6203 PL_bufptr = s; 6204 return yylex(); /* ignore fake brackets */ 6205 } 6206 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 6207 && SvEVALED(PL_lex_repl)) 6208 PL_lex_state = LEX_INTERPEND; 6209 else if (*s == '-' && s[1] == '>') 6210 PL_lex_state = LEX_INTERPENDMAYBE; 6211 else if (*s != '[' && *s != '{') 6212 PL_lex_state = LEX_INTERPEND; 6213 } 6214 } 6215 6216 if (PL_expect & XFAKEBRACK) { 6217 PL_expect &= XENUMMASK; 6218 PL_bufptr = s; 6219 return yylex(); /* ignore fake brackets */ 6220 } 6221 6222 force_next(formbrack ? '.' : '}'); 6223 if (formbrack) LEAVE_with_name("lex_format"); 6224 if (formbrack == 2) { /* means . where arguments were expected */ 6225 force_next(';'); 6226 TOKEN(FORMRBRACK); 6227 } 6228 6229 TOKEN(';'); 6230 } 6231 6232 static int 6233 yyl_ampersand(pTHX_ char *s) 6234 { 6235 if (PL_expect == XPOSTDEREF) 6236 POSTDEREF('&'); 6237 6238 s++; 6239 if (*s++ == '&') { 6240 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6241 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 6242 s -= 2; 6243 TOKEN(0); 6244 } 6245 AOPERATOR(ANDAND); 6246 } 6247 s--; 6248 6249 if (PL_expect == XOPERATOR) { 6250 char *d; 6251 bool bof; 6252 if ( PL_bufptr == PL_linestart 6253 && ckWARN(WARN_SEMICOLON) 6254 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) 6255 { 6256 CopLINE_dec(PL_curcop); 6257 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 6258 CopLINE_inc(PL_curcop); 6259 } 6260 d = s; 6261 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') 6262 s++; 6263 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6264 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 6265 s = d; 6266 s--; 6267 TOKEN(0); 6268 } 6269 if (d == s) 6270 BAop(bof ? OP_NBIT_AND : OP_BIT_AND); 6271 else 6272 BAop(OP_SBIT_AND); 6273 } 6274 6275 PL_tokenbuf[0] = '&'; 6276 s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE); 6277 pl_yylval.ival = (OPpENTERSUB_AMPER<<8); 6278 6279 if (PL_tokenbuf[1]) 6280 force_ident_maybe_lex('&'); 6281 else 6282 PREREF('&'); 6283 6284 TERM('&'); 6285 } 6286 6287 static int 6288 yyl_verticalbar(pTHX_ char *s) 6289 { 6290 char *d; 6291 bool bof; 6292 6293 s++; 6294 if (*s++ == '|') { 6295 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6296 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 6297 s -= 2; 6298 TOKEN(0); 6299 } 6300 AOPERATOR(OROR); 6301 } 6302 6303 s--; 6304 d = s; 6305 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') 6306 s++; 6307 6308 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6309 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 6310 s = d - 1; 6311 TOKEN(0); 6312 } 6313 6314 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR); 6315 } 6316 6317 static int 6318 yyl_bang(pTHX_ char *s) 6319 { 6320 const char tmp = *s++; 6321 if (tmp == '=') { 6322 /* was this !=~ where !~ was meant? 6323 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */ 6324 6325 if (*s == '~' && ckWARN(WARN_SYNTAX)) { 6326 const char *t = s+1; 6327 6328 while (t < PL_bufend && isSPACE(*t)) 6329 ++t; 6330 6331 if (*t == '/' || *t == '?' 6332 || ((*t == 'm' || *t == 's' || *t == 'y') 6333 && !isWORDCHAR(t[1])) 6334 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2]))) 6335 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6336 "!=~ should be !~"); 6337 } 6338 6339 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6340 s -= 2; 6341 TOKEN(0); 6342 } 6343 6344 ChEop(OP_NE); 6345 } 6346 6347 if (tmp == '~') 6348 PMop(OP_NOT); 6349 6350 s--; 6351 OPERATOR('!'); 6352 } 6353 6354 static int 6355 yyl_snail(pTHX_ char *s) 6356 { 6357 if (PL_expect == XPOSTDEREF) 6358 POSTDEREF('@'); 6359 PL_tokenbuf[0] = '@'; 6360 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 6361 if (PL_expect == XOPERATOR) { 6362 char *d = s; 6363 if (PL_bufptr > s) { 6364 d = PL_bufptr-1; 6365 PL_bufptr = PL_oldbufptr; 6366 } 6367 no_op("Array", d); 6368 } 6369 pl_yylval.ival = 0; 6370 if (!PL_tokenbuf[1]) { 6371 PREREF('@'); 6372 } 6373 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) 6374 s = skipspace(s); 6375 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 6376 && intuit_more(s, PL_bufend)) 6377 { 6378 if (*s == '{') 6379 PL_tokenbuf[0] = '%'; 6380 6381 /* Warn about @ where they meant $. */ 6382 if (*s == '[' || *s == '{') { 6383 if (ckWARN(WARN_SYNTAX)) { 6384 S_check_scalar_slice(aTHX_ s); 6385 } 6386 } 6387 } 6388 PL_expect = XOPERATOR; 6389 force_ident_maybe_lex('@'); 6390 TERM('@'); 6391 } 6392 6393 static int 6394 yyl_slash(pTHX_ char *s) 6395 { 6396 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') { 6397 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6398 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) 6399 TOKEN(0); 6400 s += 2; 6401 AOPERATOR(DORDOR); 6402 } 6403 else if (PL_expect == XOPERATOR) { 6404 s++; 6405 if (*s == '=' && !PL_lex_allbrackets 6406 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 6407 { 6408 s--; 6409 TOKEN(0); 6410 } 6411 Mop(OP_DIVIDE); 6412 } 6413 else { 6414 /* Disable warning on "study /blah/" */ 6415 if ( PL_oldoldbufptr == PL_last_uni 6416 && ( *PL_last_uni != 's' || s - PL_last_uni < 5 6417 || memNE(PL_last_uni, "study", 5) 6418 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF) 6419 )) 6420 check_uni(); 6421 s = scan_pat(s,OP_MATCH); 6422 TERM(sublex_start()); 6423 } 6424 } 6425 6426 static int 6427 yyl_leftsquare(pTHX_ char *s) 6428 { 6429 char tmp; 6430 6431 if (PL_lex_brackets > 100) 6432 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 6433 PL_lex_brackstack[PL_lex_brackets++] = 0; 6434 PL_lex_allbrackets++; 6435 tmp = *s++; 6436 OPERATOR(tmp); 6437 } 6438 6439 static int 6440 yyl_rightsquare(pTHX_ char *s) 6441 { 6442 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 6443 TOKEN(0); 6444 s++; 6445 if (PL_lex_brackets <= 0) 6446 /* diag_listed_as: Unmatched right %s bracket */ 6447 yyerror("Unmatched right square bracket"); 6448 else 6449 --PL_lex_brackets; 6450 PL_lex_allbrackets--; 6451 if (PL_lex_state == LEX_INTERPNORMAL) { 6452 if (PL_lex_brackets == 0) { 6453 if (*s == '-' && s[1] == '>') 6454 PL_lex_state = LEX_INTERPENDMAYBE; 6455 else if (*s != '[' && *s != '{') 6456 PL_lex_state = LEX_INTERPEND; 6457 } 6458 } 6459 TERM(']'); 6460 } 6461 6462 static int 6463 yyl_tilde(pTHX_ char *s) 6464 { 6465 bool bof; 6466 if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) { 6467 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 6468 TOKEN(0); 6469 s += 2; 6470 Perl_ck_warner_d(aTHX_ 6471 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 6472 "Smartmatch is experimental"); 6473 NCEop(OP_SMARTMATCH); 6474 } 6475 s++; 6476 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') { 6477 s++; 6478 BCop(OP_SCOMPLEMENT); 6479 } 6480 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT); 6481 } 6482 6483 static int 6484 yyl_leftparen(pTHX_ char *s) 6485 { 6486 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr) 6487 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */ 6488 else 6489 PL_expect = XTERM; 6490 s = skipspace(s); 6491 PL_lex_allbrackets++; 6492 TOKEN('('); 6493 } 6494 6495 static int 6496 yyl_rightparen(pTHX_ char *s) 6497 { 6498 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) 6499 TOKEN(0); 6500 s++; 6501 PL_lex_allbrackets--; 6502 s = skipspace(s); 6503 if (*s == '{') 6504 PREBLOCK(')'); 6505 TERM(')'); 6506 } 6507 6508 static int 6509 yyl_leftpointy(pTHX_ char *s) 6510 { 6511 char tmp; 6512 6513 if (PL_expect != XOPERATOR) { 6514 if (s[1] != '<' && !memchr(s,'>', PL_bufend - s)) 6515 check_uni(); 6516 if (s[1] == '<' && s[2] != '>') 6517 s = scan_heredoc(s); 6518 else 6519 s = scan_inputsymbol(s); 6520 PL_expect = XOPERATOR; 6521 TOKEN(sublex_start()); 6522 } 6523 6524 s++; 6525 6526 tmp = *s++; 6527 if (tmp == '<') { 6528 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6529 s -= 2; 6530 TOKEN(0); 6531 } 6532 SHop(OP_LEFT_SHIFT); 6533 } 6534 if (tmp == '=') { 6535 tmp = *s++; 6536 if (tmp == '>') { 6537 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6538 s -= 3; 6539 TOKEN(0); 6540 } 6541 NCEop(OP_NCMP); 6542 } 6543 s--; 6544 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6545 s -= 2; 6546 TOKEN(0); 6547 } 6548 ChRop(OP_LE); 6549 } 6550 6551 s--; 6552 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6553 s--; 6554 TOKEN(0); 6555 } 6556 6557 ChRop(OP_LT); 6558 } 6559 6560 static int 6561 yyl_rightpointy(pTHX_ char *s) 6562 { 6563 const char tmp = *s++; 6564 6565 if (tmp == '>') { 6566 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6567 s -= 2; 6568 TOKEN(0); 6569 } 6570 SHop(OP_RIGHT_SHIFT); 6571 } 6572 else if (tmp == '=') { 6573 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6574 s -= 2; 6575 TOKEN(0); 6576 } 6577 ChRop(OP_GE); 6578 } 6579 6580 s--; 6581 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6582 s--; 6583 TOKEN(0); 6584 } 6585 6586 ChRop(OP_GT); 6587 } 6588 6589 static int 6590 yyl_sglquote(pTHX_ char *s) 6591 { 6592 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6593 if (!s) 6594 missingterm(NULL, 0); 6595 COPLINE_SET_FROM_MULTI_END; 6596 DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); 6597 if (PL_expect == XOPERATOR) { 6598 no_op("String",s); 6599 } 6600 pl_yylval.ival = OP_CONST; 6601 TERM(sublex_start()); 6602 } 6603 6604 static int 6605 yyl_dblquote(pTHX_ char *s) 6606 { 6607 char *d; 6608 STRLEN len; 6609 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6610 DEBUG_T( { 6611 if (s) 6612 printbuf("### Saw string before %s\n", s); 6613 else 6614 PerlIO_printf(Perl_debug_log, 6615 "### Saw unterminated string\n"); 6616 } ); 6617 if (PL_expect == XOPERATOR) { 6618 no_op("String",s); 6619 } 6620 if (!s) 6621 missingterm(NULL, 0); 6622 pl_yylval.ival = OP_CONST; 6623 /* FIXME. I think that this can be const if char *d is replaced by 6624 more localised variables. */ 6625 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) { 6626 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) { 6627 pl_yylval.ival = OP_STRINGIFY; 6628 break; 6629 } 6630 } 6631 if (pl_yylval.ival == OP_CONST) 6632 COPLINE_SET_FROM_MULTI_END; 6633 TERM(sublex_start()); 6634 } 6635 6636 static int 6637 yyl_backtick(pTHX_ char *s) 6638 { 6639 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6640 DEBUG_T( { 6641 if (s) 6642 printbuf("### Saw backtick string before %s\n", s); 6643 else 6644 PerlIO_printf(Perl_debug_log, 6645 "### Saw unterminated backtick string\n"); 6646 } ); 6647 if (PL_expect == XOPERATOR) 6648 no_op("Backticks",s); 6649 if (!s) 6650 missingterm(NULL, 0); 6651 pl_yylval.ival = OP_BACKTICK; 6652 TERM(sublex_start()); 6653 } 6654 6655 static int 6656 yyl_backslash(pTHX_ char *s) 6657 { 6658 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s)) 6659 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", 6660 *s, *s); 6661 if (PL_expect == XOPERATOR) 6662 no_op("Backslash",s); 6663 OPERATOR(REFGEN); 6664 } 6665 6666 static void 6667 yyl_data_handle(pTHX) 6668 { 6669 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash 6670 ? PL_curstash 6671 : PL_defstash; 6672 GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1); 6673 6674 if (!isGV(gv)) 6675 gv_init(gv,stash,"DATA",4,0); 6676 6677 GvMULTI_on(gv); 6678 if (!GvIO(gv)) 6679 GvIOp(gv) = newIO(); 6680 IoIFP(GvIOp(gv)) = PL_rsfp; 6681 6682 /* Mark this internal pseudo-handle as clean */ 6683 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT; 6684 if ((PerlIO*)PL_rsfp == PerlIO_stdin()) 6685 IoTYPE(GvIOp(gv)) = IoTYPE_STD; 6686 else 6687 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY; 6688 6689 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS) 6690 /* if the script was opened in binmode, we need to revert 6691 * it to text mode for compatibility; but only iff it has CRs 6692 * XXX this is a questionable hack at best. */ 6693 if (PL_bufend-PL_bufptr > 2 6694 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r') 6695 { 6696 Off_t loc = 0; 6697 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) { 6698 loc = PerlIO_tell(PL_rsfp); 6699 (void)PerlIO_seek(PL_rsfp, 0L, 0); 6700 } 6701 if (PerlLIO_setmode(RSFP_FILENO, O_TEXT) != -1) { 6702 if (loc > 0) 6703 PerlIO_seek(PL_rsfp, loc, 0); 6704 } 6705 } 6706 #endif 6707 6708 #ifdef PERLIO_LAYERS 6709 if (!IN_BYTES) { 6710 if (UTF) 6711 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); 6712 } 6713 #endif 6714 6715 PL_rsfp = NULL; 6716 } 6717 6718 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*) 6719 __attribute__noreturn__; 6720 6721 PERL_STATIC_NO_RET void 6722 yyl_croak_unrecognised(pTHX_ char *s) 6723 { 6724 SV *dsv = newSVpvs_flags("", SVs_TEMP); 6725 const char *c; 6726 char *d; 6727 STRLEN len; 6728 6729 if (UTF) { 6730 STRLEN skiplen = UTF8SKIP(s); 6731 STRLEN stravail = PL_bufend - s; 6732 c = sv_uni_display(dsv, newSVpvn_flags(s, 6733 skiplen > stravail ? stravail : skiplen, 6734 SVs_TEMP | SVf_UTF8), 6735 10, UNI_DISPLAY_ISPRINT); 6736 } 6737 else { 6738 c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s); 6739 } 6740 6741 if (s >= PL_linestart) { 6742 d = PL_linestart; 6743 } 6744 else { 6745 /* somehow (probably due to a parse failure), PL_linestart has advanced 6746 * pass PL_bufptr, get a reasonable beginning of line 6747 */ 6748 d = s; 6749 while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n') 6750 --d; 6751 } 6752 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d); 6753 if (len > UNRECOGNIZED_PRECEDE_COUNT) { 6754 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT; 6755 } 6756 6757 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c, 6758 UTF8fARG(UTF, (s - d), d), 6759 (int) len + 1); 6760 } 6761 6762 static int 6763 yyl_require(pTHX_ char *s, I32 orig_keyword) 6764 { 6765 s = skipspace(s); 6766 if (isDIGIT(*s)) { 6767 s = force_version(s, FALSE); 6768 } 6769 else if (*s != 'v' || !isDIGIT(s[1]) 6770 || (s = force_version(s, TRUE), *s == 'v')) 6771 { 6772 *PL_tokenbuf = '\0'; 6773 s = force_word(s,BAREWORD,TRUE,TRUE); 6774 if (isIDFIRST_lazy_if_safe(PL_tokenbuf, 6775 PL_tokenbuf + sizeof(PL_tokenbuf), 6776 UTF)) 6777 { 6778 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), 6779 GV_ADD | (UTF ? SVf_UTF8 : 0)); 6780 } 6781 else if (*s == '<') 6782 yyerror("<> at require-statement should be quotes"); 6783 } 6784 6785 if (orig_keyword == KEY_require) 6786 pl_yylval.ival = 1; 6787 else 6788 pl_yylval.ival = 0; 6789 6790 PL_expect = PL_nexttoke ? XOPERATOR : XTERM; 6791 PL_bufptr = s; 6792 PL_last_uni = PL_oldbufptr; 6793 PL_last_lop_op = OP_REQUIRE; 6794 s = skipspace(s); 6795 return REPORT( (int)REQUIRE ); 6796 } 6797 6798 static int 6799 yyl_foreach(pTHX_ char *s) 6800 { 6801 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 6802 return REPORT(0); 6803 pl_yylval.ival = CopLINE(PL_curcop); 6804 s = skipspace(s); 6805 if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 6806 char *p = s; 6807 SSize_t s_off = s - SvPVX(PL_linestr); 6808 STRLEN len; 6809 6810 if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "my") && isSPACE(p[2])) { 6811 p += 2; 6812 } 6813 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our") && isSPACE(p[3])) { 6814 p += 3; 6815 } 6816 6817 p = skipspace(p); 6818 /* skip optional package name, as in "for my abc $x (..)" */ 6819 if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) { 6820 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); 6821 p = skipspace(p); 6822 } 6823 if (*p != '$' && *p != '\\') 6824 Perl_croak(aTHX_ "Missing $ on loop variable"); 6825 6826 /* The buffer may have been reallocated, update s */ 6827 s = SvPVX(PL_linestr) + s_off; 6828 } 6829 OPERATOR(FOR); 6830 } 6831 6832 static int 6833 yyl_do(pTHX_ char *s, I32 orig_keyword) 6834 { 6835 s = skipspace(s); 6836 if (*s == '{') 6837 PRETERMBLOCK(DO); 6838 if (*s != '\'') { 6839 char *d; 6840 STRLEN len; 6841 *PL_tokenbuf = '&'; 6842 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 6843 1, &len); 6844 if (len && memNEs(PL_tokenbuf+1, len, "CORE") 6845 && !keyword(PL_tokenbuf + 1, len, 0)) { 6846 SSize_t off = s-SvPVX(PL_linestr); 6847 d = skipspace(d); 6848 s = SvPVX(PL_linestr)+off; 6849 if (*d == '(') { 6850 force_ident_maybe_lex('&'); 6851 s = d; 6852 } 6853 } 6854 } 6855 if (orig_keyword == KEY_do) 6856 pl_yylval.ival = 1; 6857 else 6858 pl_yylval.ival = 0; 6859 OPERATOR(DO); 6860 } 6861 6862 static int 6863 yyl_my(pTHX_ char *s, I32 my) 6864 { 6865 if (PL_in_my) { 6866 PL_bufptr = s; 6867 yyerror(Perl_form(aTHX_ 6868 "Can't redeclare \"%s\" in \"%s\"", 6869 my == KEY_my ? "my" : 6870 my == KEY_state ? "state" : "our", 6871 PL_in_my == KEY_my ? "my" : 6872 PL_in_my == KEY_state ? "state" : "our")); 6873 } 6874 PL_in_my = (U16)my; 6875 s = skipspace(s); 6876 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 6877 STRLEN len; 6878 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); 6879 if (memEQs(PL_tokenbuf, len, "sub")) 6880 return yyl_sub(aTHX_ s, my); 6881 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len); 6882 if (!PL_in_my_stash) { 6883 char tmpbuf[1024]; 6884 int i; 6885 PL_bufptr = s; 6886 i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf); 6887 PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf)); 6888 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0); 6889 } 6890 } 6891 else if (*s == '\\') { 6892 if (!FEATURE_MYREF_IS_ENABLED) 6893 Perl_croak(aTHX_ "The experimental declared_refs " 6894 "feature is not enabled"); 6895 Perl_ck_warner_d(aTHX_ 6896 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS), 6897 "Declaring references is experimental"); 6898 } 6899 OPERATOR(MY); 6900 } 6901 6902 static int yyl_try(pTHX_ char*); 6903 6904 static bool 6905 yyl_eol_needs_semicolon(pTHX_ char **ps) 6906 { 6907 char *s = *ps; 6908 if (PL_lex_state != LEX_NORMAL 6909 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) 6910 { 6911 const bool in_comment = *s == '#'; 6912 char *d; 6913 if (*s == '#' && s == PL_linestart && PL_in_eval 6914 && !PL_rsfp && !PL_parser->filtered) { 6915 /* handle eval qq[#line 1 "foo"\n ...] */ 6916 CopLINE_dec(PL_curcop); 6917 incline(s, PL_bufend); 6918 } 6919 d = s; 6920 while (d < PL_bufend && *d != '\n') 6921 d++; 6922 if (d < PL_bufend) 6923 d++; 6924 s = d; 6925 if (in_comment && d == PL_bufend 6926 && PL_lex_state == LEX_INTERPNORMAL 6927 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 6928 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--; 6929 else 6930 incline(s, PL_bufend); 6931 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 6932 PL_lex_state = LEX_FORMLINE; 6933 force_next(FORMRBRACK); 6934 *ps = s; 6935 return TRUE; 6936 } 6937 } 6938 else { 6939 while (s < PL_bufend && *s != '\n') 6940 s++; 6941 if (s < PL_bufend) { 6942 s++; 6943 if (s < PL_bufend) 6944 incline(s, PL_bufend); 6945 } 6946 } 6947 *ps = s; 6948 return FALSE; 6949 } 6950 6951 static int 6952 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s) 6953 { 6954 char *d; 6955 6956 goto start; 6957 6958 do { 6959 fake_eof = 0; 6960 bof = cBOOL(PL_rsfp); 6961 start: 6962 6963 PL_bufptr = PL_bufend; 6964 COPLINE_INC_WITH_HERELINES; 6965 if (!lex_next_chunk(fake_eof)) { 6966 CopLINE_dec(PL_curcop); 6967 s = PL_bufptr; 6968 TOKEN(';'); /* not infinite loop because rsfp is NULL now */ 6969 } 6970 CopLINE_dec(PL_curcop); 6971 s = PL_bufptr; 6972 /* If it looks like the start of a BOM or raw UTF-16, 6973 * check if it in fact is. */ 6974 if (bof && PL_rsfp 6975 && ( *s == 0 6976 || *(U8*)s == BOM_UTF8_FIRST_BYTE 6977 || *(U8*)s >= 0xFE 6978 || s[1] == 0)) 6979 { 6980 Off_t offset = (IV)PerlIO_tell(PL_rsfp); 6981 bof = (offset == (Off_t)SvCUR(PL_linestr)); 6982 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS) 6983 /* offset may include swallowed CR */ 6984 if (!bof) 6985 bof = (offset == (Off_t)SvCUR(PL_linestr)+1); 6986 #endif 6987 if (bof) { 6988 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 6989 s = swallow_bom((U8*)s); 6990 } 6991 } 6992 if (PL_parser->in_pod) { 6993 /* Incest with pod. */ 6994 if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut") 6995 && !isALPHA(s[4])) 6996 { 6997 SvPVCLEAR(PL_linestr); 6998 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 6999 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 7000 PL_last_lop = PL_last_uni = NULL; 7001 PL_parser->in_pod = 0; 7002 } 7003 } 7004 if (PL_rsfp || PL_parser->filtered) 7005 incline(s, PL_bufend); 7006 } while (PL_parser->in_pod); 7007 7008 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; 7009 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 7010 PL_last_lop = PL_last_uni = NULL; 7011 if (CopLINE(PL_curcop) == 1) { 7012 while (s < PL_bufend && isSPACE(*s)) 7013 s++; 7014 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */ 7015 s++; 7016 d = NULL; 7017 if (!PL_in_eval) { 7018 if (*s == '#' && *(s+1) == '!') 7019 d = s + 2; 7020 #ifdef ALTERNATE_SHEBANG 7021 else { 7022 static char const as[] = ALTERNATE_SHEBANG; 7023 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1)) 7024 d = s + (sizeof(as) - 1); 7025 } 7026 #endif /* ALTERNATE_SHEBANG */ 7027 } 7028 if (d) { 7029 char *ipath; 7030 char *ipathend; 7031 7032 while (isSPACE(*d)) 7033 d++; 7034 ipath = d; 7035 while (*d && !isSPACE(*d)) 7036 d++; 7037 ipathend = d; 7038 7039 #ifdef ARG_ZERO_IS_SCRIPT 7040 if (ipathend > ipath) { 7041 /* 7042 * HP-UX (at least) sets argv[0] to the script name, 7043 * which makes $^X incorrect. And Digital UNIX and Linux, 7044 * at least, set argv[0] to the basename of the Perl 7045 * interpreter. So, having found "#!", we'll set it right. 7046 */ 7047 SV* copfilesv = CopFILESV(PL_curcop); 7048 if (copfilesv) { 7049 SV * const x = 7050 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL, 7051 SVt_PV)); /* $^X */ 7052 assert(SvPOK(x) || SvGMAGICAL(x)); 7053 if (sv_eq(x, copfilesv)) { 7054 sv_setpvn(x, ipath, ipathend - ipath); 7055 SvSETMAGIC(x); 7056 } 7057 else { 7058 STRLEN blen; 7059 STRLEN llen; 7060 const char *bstart = SvPV_const(copfilesv, blen); 7061 const char * const lstart = SvPV_const(x, llen); 7062 if (llen < blen) { 7063 bstart += blen - llen; 7064 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') { 7065 sv_setpvn(x, ipath, ipathend - ipath); 7066 SvSETMAGIC(x); 7067 } 7068 } 7069 } 7070 } 7071 else { 7072 /* Anything to do if no copfilesv? */ 7073 } 7074 TAINT_NOT; /* $^X is always tainted, but that's OK */ 7075 } 7076 #endif /* ARG_ZERO_IS_SCRIPT */ 7077 7078 /* 7079 * Look for options. 7080 */ 7081 d = instr(s,"perl -"); 7082 if (!d) { 7083 d = instr(s,"perl"); 7084 #if defined(DOSISH) 7085 /* avoid getting into infinite loops when shebang 7086 * line contains "Perl" rather than "perl" */ 7087 if (!d) { 7088 for (d = ipathend-4; d >= ipath; --d) { 7089 if (isALPHA_FOLD_EQ(*d, 'p') 7090 && !ibcmp(d, "perl", 4)) 7091 { 7092 break; 7093 } 7094 } 7095 if (d < ipath) 7096 d = NULL; 7097 } 7098 #endif 7099 } 7100 #ifdef ALTERNATE_SHEBANG 7101 /* 7102 * If the ALTERNATE_SHEBANG on this system starts with a 7103 * character that can be part of a Perl expression, then if 7104 * we see it but not "perl", we're probably looking at the 7105 * start of Perl code, not a request to hand off to some 7106 * other interpreter. Similarly, if "perl" is there, but 7107 * not in the first 'word' of the line, we assume the line 7108 * contains the start of the Perl program. 7109 */ 7110 if (d && *s != '#') { 7111 const char *c = ipath; 7112 while (*c && !memCHRs("; \t\r\n\f\v#", *c)) 7113 c++; 7114 if (c < d) 7115 d = NULL; /* "perl" not in first word; ignore */ 7116 else 7117 *s = '#'; /* Don't try to parse shebang line */ 7118 } 7119 #endif /* ALTERNATE_SHEBANG */ 7120 if (!d 7121 && *s == '#' 7122 && ipathend > ipath 7123 && !PL_minus_c 7124 && !instr(s,"indir") 7125 && instr(PL_origargv[0],"perl")) 7126 { 7127 dVAR; 7128 char **newargv; 7129 7130 *ipathend = '\0'; 7131 s = ipathend + 1; 7132 while (s < PL_bufend && isSPACE(*s)) 7133 s++; 7134 if (s < PL_bufend) { 7135 Newx(newargv,PL_origargc+3,char*); 7136 newargv[1] = s; 7137 while (s < PL_bufend && !isSPACE(*s)) 7138 s++; 7139 *s = '\0'; 7140 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*); 7141 } 7142 else 7143 newargv = PL_origargv; 7144 newargv[0] = ipath; 7145 PERL_FPU_PRE_EXEC 7146 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv)); 7147 PERL_FPU_POST_EXEC 7148 Perl_croak(aTHX_ "Can't exec %s", ipath); 7149 } 7150 if (d) { 7151 while (*d && !isSPACE(*d)) 7152 d++; 7153 while (SPACE_OR_TAB(*d)) 7154 d++; 7155 7156 if (*d++ == '-') { 7157 const bool switches_done = PL_doswitches; 7158 const U32 oldpdb = PL_perldb; 7159 const bool oldn = PL_minus_n; 7160 const bool oldp = PL_minus_p; 7161 const char *d1 = d; 7162 7163 do { 7164 bool baduni = FALSE; 7165 if (*d1 == 'C') { 7166 const char *d2 = d1 + 1; 7167 if (parse_unicode_opts((const char **)&d2) 7168 != PL_unicode) 7169 baduni = TRUE; 7170 } 7171 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) { 7172 const char * const m = d1; 7173 while (*d1 && !isSPACE(*d1)) 7174 d1++; 7175 Perl_croak(aTHX_ "Too late for \"-%.*s\" option", 7176 (int)(d1 - m), m); 7177 } 7178 d1 = moreswitches(d1); 7179 } while (d1); 7180 if (PL_doswitches && !switches_done) { 7181 int argc = PL_origargc; 7182 char **argv = PL_origargv; 7183 do { 7184 argc--,argv++; 7185 } while (argc && argv[0][0] == '-' && argv[0][1]); 7186 init_argv_symbols(argc,argv); 7187 } 7188 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb) 7189 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp))) 7190 /* if we have already added "LINE: while (<>) {", 7191 we must not do it again */ 7192 { 7193 SvPVCLEAR(PL_linestr); 7194 PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 7195 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 7196 PL_last_lop = PL_last_uni = NULL; 7197 PL_preambled = FALSE; 7198 if (PERLDB_LINE_OR_SAVESRC) 7199 (void)gv_fetchfile(PL_origfilename); 7200 return YYL_RETRY; 7201 } 7202 } 7203 } 7204 } 7205 } 7206 7207 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 7208 PL_lex_state = LEX_FORMLINE; 7209 force_next(FORMRBRACK); 7210 TOKEN(';'); 7211 } 7212 7213 PL_bufptr = s; 7214 return YYL_RETRY; 7215 } 7216 7217 static int 7218 yyl_fatcomma(pTHX_ char *s, STRLEN len) 7219 { 7220 CLINE; 7221 pl_yylval.opval 7222 = newSVOP(OP_CONST, 0, 7223 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 7224 pl_yylval.opval->op_private = OPpCONST_BARE; 7225 TERM(BAREWORD); 7226 } 7227 7228 static int 7229 yyl_safe_bareword(pTHX_ char *s, const char lastchar) 7230 { 7231 if ((lastchar == '*' || lastchar == '%' || lastchar == '&') 7232 && PL_parser->saw_infix_sigil) 7233 { 7234 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7235 "Operator or semicolon missing before %c%" UTF8f, 7236 lastchar, 7237 UTF8fARG(UTF, strlen(PL_tokenbuf), 7238 PL_tokenbuf)); 7239 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7240 "Ambiguous use of %c resolved as operator %c", 7241 lastchar, lastchar); 7242 } 7243 TOKEN(BAREWORD); 7244 } 7245 7246 static int 7247 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off) 7248 { 7249 if (sv) { 7250 op_free(rv2cv_op); 7251 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); 7252 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); 7253 if (SvTYPE(sv) == SVt_PVAV) 7254 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS, 7255 pl_yylval.opval); 7256 else { 7257 pl_yylval.opval->op_private = 0; 7258 pl_yylval.opval->op_folded = 1; 7259 pl_yylval.opval->op_flags |= OPf_SPECIAL; 7260 } 7261 TOKEN(BAREWORD); 7262 } 7263 7264 op_free(pl_yylval.opval); 7265 pl_yylval.opval = 7266 off ? newCVREF(0, rv2cv_op) : rv2cv_op; 7267 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; 7268 PL_last_lop = PL_oldbufptr; 7269 PL_last_lop_op = OP_ENTERSUB; 7270 7271 /* Is there a prototype? */ 7272 if (SvPOK(cv)) { 7273 int k = yyl_subproto(aTHX_ s, cv); 7274 if (k != KEY_NULL) 7275 return k; 7276 } 7277 7278 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 7279 PL_expect = XTERM; 7280 force_next(off ? PRIVATEREF : BAREWORD); 7281 if (!PL_lex_allbrackets 7282 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7283 { 7284 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7285 } 7286 7287 TOKEN(NOAMP); 7288 } 7289 7290 /* Honour "reserved word" warnings, and enforce strict subs */ 7291 static void 7292 yyl_strictwarn_bareword(pTHX_ const char lastchar) 7293 { 7294 /* after "print" and similar functions (corresponding to 7295 * "F? L" in opcode.pl), whatever wasn't already parsed as 7296 * a filehandle should be subject to "strict subs". 7297 * Likewise for the optional indirect-object argument to system 7298 * or exec, which can't be a bareword */ 7299 if ((PL_last_lop_op == OP_PRINT 7300 || PL_last_lop_op == OP_PRTF 7301 || PL_last_lop_op == OP_SAY 7302 || PL_last_lop_op == OP_SYSTEM 7303 || PL_last_lop_op == OP_EXEC) 7304 && (PL_hints & HINT_STRICT_SUBS)) 7305 { 7306 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7307 } 7308 7309 if (lastchar != '-' && ckWARN(WARN_RESERVED)) { 7310 char *d = PL_tokenbuf; 7311 while (isLOWER(*d)) 7312 d++; 7313 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) { 7314 /* PL_warn_reserved is constant */ 7315 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); 7316 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved, 7317 PL_tokenbuf); 7318 GCC_DIAG_RESTORE_STMT; 7319 } 7320 } 7321 } 7322 7323 static int 7324 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c) 7325 { 7326 int pkgname = 0; 7327 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); 7328 bool safebw; 7329 bool no_op_error = FALSE; 7330 /* Use this var to track whether intuit_method has been 7331 called. intuit_method returns 0 or > 255. */ 7332 int key = 1; 7333 7334 if (PL_expect == XOPERATOR) { 7335 if (PL_bufptr == PL_linestart) { 7336 CopLINE_dec(PL_curcop); 7337 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 7338 CopLINE_inc(PL_curcop); 7339 } 7340 else 7341 /* We want to call no_op with s pointing after the 7342 bareword, so defer it. But we want it to come 7343 before the Bad name croak. */ 7344 no_op_error = TRUE; 7345 } 7346 7347 /* Get the rest if it looks like a package qualifier */ 7348 7349 if (*s == '\'' || (*s == ':' && s[1] == ':')) { 7350 STRLEN morelen; 7351 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, 7352 TRUE, &morelen); 7353 if (no_op_error) { 7354 no_op("Bareword",s); 7355 no_op_error = FALSE; 7356 } 7357 if (!morelen) 7358 Perl_croak(aTHX_ "Bad name after %" UTF8f "%s", 7359 UTF8fARG(UTF, len, PL_tokenbuf), 7360 *s == '\'' ? "'" : "::"); 7361 len += morelen; 7362 pkgname = 1; 7363 } 7364 7365 if (no_op_error) 7366 no_op("Bareword",s); 7367 7368 /* See if the name is "Foo::", 7369 in which case Foo is a bareword 7370 (and a package name). */ 7371 7372 if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') { 7373 if (ckWARN(WARN_BAREWORD) 7374 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV)) 7375 Perl_warner(aTHX_ packWARN(WARN_BAREWORD), 7376 "Bareword \"%" UTF8f 7377 "\" refers to nonexistent package", 7378 UTF8fARG(UTF, len, PL_tokenbuf)); 7379 len -= 2; 7380 PL_tokenbuf[len] = '\0'; 7381 c.gv = NULL; 7382 c.gvp = 0; 7383 safebw = TRUE; 7384 } 7385 else { 7386 safebw = FALSE; 7387 } 7388 7389 /* if we saw a global override before, get the right name */ 7390 7391 if (!c.sv) 7392 c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len); 7393 if (c.gvp) { 7394 SV *sv = newSVpvs("CORE::GLOBAL::"); 7395 sv_catsv(sv, c.sv); 7396 SvREFCNT_dec(c.sv); 7397 c.sv = sv; 7398 } 7399 7400 /* Presume this is going to be a bareword of some sort. */ 7401 CLINE; 7402 pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv); 7403 pl_yylval.opval->op_private = OPpCONST_BARE; 7404 7405 /* And if "Foo::", then that's what it certainly is. */ 7406 if (safebw) 7407 return yyl_safe_bareword(aTHX_ s, lastchar); 7408 7409 if (!c.off) { 7410 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv)); 7411 const_op->op_private = OPpCONST_BARE; 7412 c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op); 7413 c.cv = c.lex 7414 ? isGV(c.gv) 7415 ? GvCV(c.gv) 7416 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV 7417 ? (CV *)SvRV(c.gv) 7418 : ((CV *)c.gv) 7419 : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB); 7420 } 7421 7422 /* See if it's the indirect object for a list operator. */ 7423 7424 if (PL_oldoldbufptr 7425 && PL_oldoldbufptr < PL_bufptr 7426 && (PL_oldoldbufptr == PL_last_lop 7427 || PL_oldoldbufptr == PL_last_uni) 7428 && /* NO SKIPSPACE BEFORE HERE! */ 7429 (PL_expect == XREF 7430 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) 7431 == OA_FILEREF)) 7432 { 7433 bool immediate_paren = *s == '('; 7434 SSize_t s_off; 7435 7436 /* (Now we can afford to cross potential line boundary.) */ 7437 s = skipspace(s); 7438 7439 /* intuit_method() can indirectly call lex_next_chunk(), 7440 * invalidating s 7441 */ 7442 s_off = s - SvPVX(PL_linestr); 7443 /* Two barewords in a row may indicate method call. */ 7444 if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) 7445 || *s == '$') 7446 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv))) 7447 { 7448 /* the code at method: doesn't use s */ 7449 goto method; 7450 } 7451 s = SvPVX(PL_linestr) + s_off; 7452 7453 /* If not a declared subroutine, it's an indirect object. */ 7454 /* (But it's an indir obj regardless for sort.) */ 7455 /* Also, if "_" follows a filetest operator, it's a bareword */ 7456 7457 if ( 7458 ( !immediate_paren && (PL_last_lop_op == OP_SORT 7459 || (!c.cv 7460 && (PL_last_lop_op != OP_MAPSTART 7461 && PL_last_lop_op != OP_GREPSTART)))) 7462 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0' 7463 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) 7464 == OA_FILESTATOP)) 7465 ) 7466 { 7467 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR; 7468 yyl_strictwarn_bareword(aTHX_ lastchar); 7469 op_free(c.rv2cv_op); 7470 return yyl_safe_bareword(aTHX_ s, lastchar); 7471 } 7472 } 7473 7474 PL_expect = XOPERATOR; 7475 s = skipspace(s); 7476 7477 /* Is this a word before a => operator? */ 7478 if (*s == '=' && s[1] == '>' && !pkgname) { 7479 op_free(c.rv2cv_op); 7480 CLINE; 7481 if (c.gvp || (c.lex && !c.off)) { 7482 assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv); 7483 /* This is our own scalar, created a few lines 7484 above, so this is safe. */ 7485 SvREADONLY_off(c.sv); 7486 sv_setpv(c.sv, PL_tokenbuf); 7487 if (UTF && !IN_BYTES 7488 && is_utf8_string((U8*)PL_tokenbuf, len)) 7489 SvUTF8_on(c.sv); 7490 SvREADONLY_on(c.sv); 7491 } 7492 TERM(BAREWORD); 7493 } 7494 7495 /* If followed by a paren, it's certainly a subroutine. */ 7496 if (*s == '(') { 7497 CLINE; 7498 if (c.cv) { 7499 char *d = s + 1; 7500 while (SPACE_OR_TAB(*d)) 7501 d++; 7502 if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv))) 7503 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off); 7504 } 7505 NEXTVAL_NEXTTOKE.opval = 7506 c.off ? c.rv2cv_op : pl_yylval.opval; 7507 if (c.off) 7508 op_free(pl_yylval.opval), force_next(PRIVATEREF); 7509 else op_free(c.rv2cv_op), force_next(BAREWORD); 7510 pl_yylval.ival = 0; 7511 TOKEN('&'); 7512 } 7513 7514 /* If followed by var or block, call it a method (unless sub) */ 7515 7516 if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) { 7517 op_free(c.rv2cv_op); 7518 PL_last_lop = PL_oldbufptr; 7519 PL_last_lop_op = OP_METHOD; 7520 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7521 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7522 PL_expect = XBLOCKTERM; 7523 PL_bufptr = s; 7524 return REPORT(METHOD); 7525 } 7526 7527 /* If followed by a bareword, see if it looks like indir obj. */ 7528 7529 if ( key == 1 7530 && !orig_keyword 7531 && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$') 7532 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv))) 7533 { 7534 method: 7535 if (c.lex && !c.off) { 7536 assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv); 7537 SvREADONLY_off(c.sv); 7538 sv_setpvn(c.sv, PL_tokenbuf, len); 7539 if (UTF && !IN_BYTES 7540 && is_utf8_string((U8*)PL_tokenbuf, len)) 7541 SvUTF8_on(c.sv); 7542 else SvUTF8_off(c.sv); 7543 } 7544 op_free(c.rv2cv_op); 7545 if (key == METHOD && !PL_lex_allbrackets 7546 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7547 { 7548 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7549 } 7550 return REPORT(key); 7551 } 7552 7553 /* Not a method, so call it a subroutine (if defined) */ 7554 7555 if (c.cv) { 7556 /* Check for a constant sub */ 7557 c.sv = cv_const_sv_or_av(c.cv); 7558 return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off); 7559 } 7560 7561 /* Call it a bare word */ 7562 7563 if (PL_hints & HINT_STRICT_SUBS) 7564 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7565 else 7566 yyl_strictwarn_bareword(aTHX_ lastchar); 7567 7568 op_free(c.rv2cv_op); 7569 7570 return yyl_safe_bareword(aTHX_ s, lastchar); 7571 } 7572 7573 static int 7574 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c) 7575 { 7576 switch (key) { 7577 default: /* not a keyword */ 7578 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c); 7579 7580 case KEY___FILE__: 7581 FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) ); 7582 7583 case KEY___LINE__: 7584 FUN0OP( 7585 newSVOP(OP_CONST, 0, 7586 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop))) 7587 ); 7588 7589 case KEY___PACKAGE__: 7590 FUN0OP( 7591 newSVOP(OP_CONST, 0, (PL_curstash 7592 ? newSVhek(HvNAME_HEK(PL_curstash)) 7593 : &PL_sv_undef)) 7594 ); 7595 7596 case KEY___DATA__: 7597 case KEY___END__: 7598 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) 7599 yyl_data_handle(aTHX); 7600 return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s); 7601 7602 case KEY___SUB__: 7603 FUN0OP(CvCLONE(PL_compcv) 7604 ? newOP(OP_RUNCV, 0) 7605 : newPVOP(OP_RUNCV,0,NULL)); 7606 7607 case KEY_AUTOLOAD: 7608 case KEY_DESTROY: 7609 case KEY_BEGIN: 7610 case KEY_UNITCHECK: 7611 case KEY_CHECK: 7612 case KEY_INIT: 7613 case KEY_END: 7614 if (PL_expect == XSTATE) 7615 return yyl_sub(aTHX_ PL_bufptr, key); 7616 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c); 7617 7618 case KEY_abs: 7619 UNI(OP_ABS); 7620 7621 case KEY_alarm: 7622 UNI(OP_ALARM); 7623 7624 case KEY_accept: 7625 LOP(OP_ACCEPT,XTERM); 7626 7627 case KEY_and: 7628 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 7629 return REPORT(0); 7630 OPERATOR(ANDOP); 7631 7632 case KEY_atan2: 7633 LOP(OP_ATAN2,XTERM); 7634 7635 case KEY_bind: 7636 LOP(OP_BIND,XTERM); 7637 7638 case KEY_binmode: 7639 LOP(OP_BINMODE,XTERM); 7640 7641 case KEY_bless: 7642 LOP(OP_BLESS,XTERM); 7643 7644 case KEY_break: 7645 FUN0(OP_BREAK); 7646 7647 case KEY_chop: 7648 UNI(OP_CHOP); 7649 7650 case KEY_continue: 7651 /* We have to disambiguate the two senses of 7652 "continue". If the next token is a '{' then 7653 treat it as the start of a continue block; 7654 otherwise treat it as a control operator. 7655 */ 7656 s = skipspace(s); 7657 if (*s == '{') 7658 PREBLOCK(CONTINUE); 7659 else 7660 FUN0(OP_CONTINUE); 7661 7662 case KEY_chdir: 7663 /* may use HOME */ 7664 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV); 7665 UNI(OP_CHDIR); 7666 7667 case KEY_close: 7668 UNI(OP_CLOSE); 7669 7670 case KEY_closedir: 7671 UNI(OP_CLOSEDIR); 7672 7673 case KEY_cmp: 7674 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7675 return REPORT(0); 7676 NCEop(OP_SCMP); 7677 7678 case KEY_caller: 7679 UNI(OP_CALLER); 7680 7681 case KEY_crypt: 7682 #ifdef FCRYPT 7683 if (!PL_cryptseen) { 7684 PL_cryptseen = TRUE; 7685 init_des(); 7686 } 7687 #endif 7688 LOP(OP_CRYPT,XTERM); 7689 7690 case KEY_chmod: 7691 LOP(OP_CHMOD,XTERM); 7692 7693 case KEY_chown: 7694 LOP(OP_CHOWN,XTERM); 7695 7696 case KEY_connect: 7697 LOP(OP_CONNECT,XTERM); 7698 7699 case KEY_chr: 7700 UNI(OP_CHR); 7701 7702 case KEY_cos: 7703 UNI(OP_COS); 7704 7705 case KEY_chroot: 7706 UNI(OP_CHROOT); 7707 7708 case KEY_default: 7709 PREBLOCK(DEFAULT); 7710 7711 case KEY_do: 7712 return yyl_do(aTHX_ s, orig_keyword); 7713 7714 case KEY_die: 7715 PL_hints |= HINT_BLOCK_SCOPE; 7716 LOP(OP_DIE,XTERM); 7717 7718 case KEY_defined: 7719 UNI(OP_DEFINED); 7720 7721 case KEY_delete: 7722 UNI(OP_DELETE); 7723 7724 case KEY_dbmopen: 7725 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"), 7726 STR_WITH_LEN("NDBM_File::"), 7727 STR_WITH_LEN("DB_File::"), 7728 STR_WITH_LEN("GDBM_File::"), 7729 STR_WITH_LEN("SDBM_File::"), 7730 STR_WITH_LEN("ODBM_File::"), 7731 NULL); 7732 LOP(OP_DBMOPEN,XTERM); 7733 7734 case KEY_dbmclose: 7735 UNI(OP_DBMCLOSE); 7736 7737 case KEY_dump: 7738 LOOPX(OP_DUMP); 7739 7740 case KEY_else: 7741 PREBLOCK(ELSE); 7742 7743 case KEY_elsif: 7744 pl_yylval.ival = CopLINE(PL_curcop); 7745 OPERATOR(ELSIF); 7746 7747 case KEY_eq: 7748 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7749 return REPORT(0); 7750 ChEop(OP_SEQ); 7751 7752 case KEY_exists: 7753 UNI(OP_EXISTS); 7754 7755 case KEY_exit: 7756 UNI(OP_EXIT); 7757 7758 case KEY_eval: 7759 s = skipspace(s); 7760 if (*s == '{') { /* block eval */ 7761 PL_expect = XTERMBLOCK; 7762 UNIBRACK(OP_ENTERTRY); 7763 } 7764 else { /* string eval */ 7765 PL_expect = XTERM; 7766 UNIBRACK(OP_ENTEREVAL); 7767 } 7768 7769 case KEY_evalbytes: 7770 PL_expect = XTERM; 7771 UNIBRACK(-OP_ENTEREVAL); 7772 7773 case KEY_eof: 7774 UNI(OP_EOF); 7775 7776 case KEY_exp: 7777 UNI(OP_EXP); 7778 7779 case KEY_each: 7780 UNI(OP_EACH); 7781 7782 case KEY_exec: 7783 LOP(OP_EXEC,XREF); 7784 7785 case KEY_endhostent: 7786 FUN0(OP_EHOSTENT); 7787 7788 case KEY_endnetent: 7789 FUN0(OP_ENETENT); 7790 7791 case KEY_endservent: 7792 FUN0(OP_ESERVENT); 7793 7794 case KEY_endprotoent: 7795 FUN0(OP_EPROTOENT); 7796 7797 case KEY_endpwent: 7798 FUN0(OP_EPWENT); 7799 7800 case KEY_endgrent: 7801 FUN0(OP_EGRENT); 7802 7803 case KEY_for: 7804 case KEY_foreach: 7805 return yyl_foreach(aTHX_ s); 7806 7807 case KEY_formline: 7808 LOP(OP_FORMLINE,XTERM); 7809 7810 case KEY_fork: 7811 FUN0(OP_FORK); 7812 7813 case KEY_fc: 7814 UNI(OP_FC); 7815 7816 case KEY_fcntl: 7817 LOP(OP_FCNTL,XTERM); 7818 7819 case KEY_fileno: 7820 UNI(OP_FILENO); 7821 7822 case KEY_flock: 7823 LOP(OP_FLOCK,XTERM); 7824 7825 case KEY_gt: 7826 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7827 return REPORT(0); 7828 ChRop(OP_SGT); 7829 7830 case KEY_ge: 7831 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7832 return REPORT(0); 7833 ChRop(OP_SGE); 7834 7835 case KEY_grep: 7836 LOP(OP_GREPSTART, XREF); 7837 7838 case KEY_goto: 7839 LOOPX(OP_GOTO); 7840 7841 case KEY_gmtime: 7842 UNI(OP_GMTIME); 7843 7844 case KEY_getc: 7845 UNIDOR(OP_GETC); 7846 7847 case KEY_getppid: 7848 FUN0(OP_GETPPID); 7849 7850 case KEY_getpgrp: 7851 UNI(OP_GETPGRP); 7852 7853 case KEY_getpriority: 7854 LOP(OP_GETPRIORITY,XTERM); 7855 7856 case KEY_getprotobyname: 7857 UNI(OP_GPBYNAME); 7858 7859 case KEY_getprotobynumber: 7860 LOP(OP_GPBYNUMBER,XTERM); 7861 7862 case KEY_getprotoent: 7863 FUN0(OP_GPROTOENT); 7864 7865 case KEY_getpwent: 7866 FUN0(OP_GPWENT); 7867 7868 case KEY_getpwnam: 7869 UNI(OP_GPWNAM); 7870 7871 case KEY_getpwuid: 7872 UNI(OP_GPWUID); 7873 7874 case KEY_getpeername: 7875 UNI(OP_GETPEERNAME); 7876 7877 case KEY_gethostbyname: 7878 UNI(OP_GHBYNAME); 7879 7880 case KEY_gethostbyaddr: 7881 LOP(OP_GHBYADDR,XTERM); 7882 7883 case KEY_gethostent: 7884 FUN0(OP_GHOSTENT); 7885 7886 case KEY_getnetbyname: 7887 UNI(OP_GNBYNAME); 7888 7889 case KEY_getnetbyaddr: 7890 LOP(OP_GNBYADDR,XTERM); 7891 7892 case KEY_getnetent: 7893 FUN0(OP_GNETENT); 7894 7895 case KEY_getservbyname: 7896 LOP(OP_GSBYNAME,XTERM); 7897 7898 case KEY_getservbyport: 7899 LOP(OP_GSBYPORT,XTERM); 7900 7901 case KEY_getservent: 7902 FUN0(OP_GSERVENT); 7903 7904 case KEY_getsockname: 7905 UNI(OP_GETSOCKNAME); 7906 7907 case KEY_getsockopt: 7908 LOP(OP_GSOCKOPT,XTERM); 7909 7910 case KEY_getgrent: 7911 FUN0(OP_GGRENT); 7912 7913 case KEY_getgrnam: 7914 UNI(OP_GGRNAM); 7915 7916 case KEY_getgrgid: 7917 UNI(OP_GGRGID); 7918 7919 case KEY_getlogin: 7920 FUN0(OP_GETLOGIN); 7921 7922 case KEY_given: 7923 pl_yylval.ival = CopLINE(PL_curcop); 7924 Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 7925 "given is experimental"); 7926 OPERATOR(GIVEN); 7927 7928 case KEY_glob: 7929 LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM ); 7930 7931 case KEY_hex: 7932 UNI(OP_HEX); 7933 7934 case KEY_if: 7935 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 7936 return REPORT(0); 7937 pl_yylval.ival = CopLINE(PL_curcop); 7938 OPERATOR(IF); 7939 7940 case KEY_index: 7941 LOP(OP_INDEX,XTERM); 7942 7943 case KEY_int: 7944 UNI(OP_INT); 7945 7946 case KEY_ioctl: 7947 LOP(OP_IOCTL,XTERM); 7948 7949 case KEY_isa: 7950 Perl_ck_warner_d(aTHX_ 7951 packWARN(WARN_EXPERIMENTAL__ISA), "isa is experimental"); 7952 NCRop(OP_ISA); 7953 7954 case KEY_join: 7955 LOP(OP_JOIN,XTERM); 7956 7957 case KEY_keys: 7958 UNI(OP_KEYS); 7959 7960 case KEY_kill: 7961 LOP(OP_KILL,XTERM); 7962 7963 case KEY_last: 7964 LOOPX(OP_LAST); 7965 7966 case KEY_lc: 7967 UNI(OP_LC); 7968 7969 case KEY_lcfirst: 7970 UNI(OP_LCFIRST); 7971 7972 case KEY_local: 7973 OPERATOR(LOCAL); 7974 7975 case KEY_length: 7976 UNI(OP_LENGTH); 7977 7978 case KEY_lt: 7979 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7980 return REPORT(0); 7981 ChRop(OP_SLT); 7982 7983 case KEY_le: 7984 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7985 return REPORT(0); 7986 ChRop(OP_SLE); 7987 7988 case KEY_localtime: 7989 UNI(OP_LOCALTIME); 7990 7991 case KEY_log: 7992 UNI(OP_LOG); 7993 7994 case KEY_link: 7995 LOP(OP_LINK,XTERM); 7996 7997 case KEY_listen: 7998 LOP(OP_LISTEN,XTERM); 7999 8000 case KEY_lock: 8001 UNI(OP_LOCK); 8002 8003 case KEY_lstat: 8004 UNI(OP_LSTAT); 8005 8006 case KEY_m: 8007 s = scan_pat(s,OP_MATCH); 8008 TERM(sublex_start()); 8009 8010 case KEY_map: 8011 LOP(OP_MAPSTART, XREF); 8012 8013 case KEY_mkdir: 8014 LOP(OP_MKDIR,XTERM); 8015 8016 case KEY_msgctl: 8017 LOP(OP_MSGCTL,XTERM); 8018 8019 case KEY_msgget: 8020 LOP(OP_MSGGET,XTERM); 8021 8022 case KEY_msgrcv: 8023 LOP(OP_MSGRCV,XTERM); 8024 8025 case KEY_msgsnd: 8026 LOP(OP_MSGSND,XTERM); 8027 8028 case KEY_our: 8029 case KEY_my: 8030 case KEY_state: 8031 return yyl_my(aTHX_ s, key); 8032 8033 case KEY_next: 8034 LOOPX(OP_NEXT); 8035 8036 case KEY_ne: 8037 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8038 return REPORT(0); 8039 ChEop(OP_SNE); 8040 8041 case KEY_no: 8042 s = tokenize_use(0, s); 8043 TOKEN(USE); 8044 8045 case KEY_not: 8046 if (*s == '(' || (s = skipspace(s), *s == '(')) 8047 FUN1(OP_NOT); 8048 else { 8049 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 8050 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 8051 OPERATOR(NOTOP); 8052 } 8053 8054 case KEY_open: 8055 s = skipspace(s); 8056 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 8057 const char *t; 8058 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 8059 for (t=d; isSPACE(*t);) 8060 t++; 8061 if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) 8062 /* [perl #16184] */ 8063 && !(t[0] == '=' && t[1] == '>') 8064 && !(t[0] == ':' && t[1] == ':') 8065 && !keyword(s, d-s, 0) 8066 ) { 8067 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), 8068 "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")", 8069 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s)); 8070 } 8071 } 8072 LOP(OP_OPEN,XTERM); 8073 8074 case KEY_or: 8075 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 8076 return REPORT(0); 8077 pl_yylval.ival = OP_OR; 8078 OPERATOR(OROP); 8079 8080 case KEY_ord: 8081 UNI(OP_ORD); 8082 8083 case KEY_oct: 8084 UNI(OP_OCT); 8085 8086 case KEY_opendir: 8087 LOP(OP_OPEN_DIR,XTERM); 8088 8089 case KEY_print: 8090 checkcomma(s,PL_tokenbuf,"filehandle"); 8091 LOP(OP_PRINT,XREF); 8092 8093 case KEY_printf: 8094 checkcomma(s,PL_tokenbuf,"filehandle"); 8095 LOP(OP_PRTF,XREF); 8096 8097 case KEY_prototype: 8098 UNI(OP_PROTOTYPE); 8099 8100 case KEY_push: 8101 LOP(OP_PUSH,XTERM); 8102 8103 case KEY_pop: 8104 UNIDOR(OP_POP); 8105 8106 case KEY_pos: 8107 UNIDOR(OP_POS); 8108 8109 case KEY_pack: 8110 LOP(OP_PACK,XTERM); 8111 8112 case KEY_package: 8113 s = force_word(s,BAREWORD,FALSE,TRUE); 8114 s = skipspace(s); 8115 s = force_strict_version(s); 8116 PREBLOCK(PACKAGE); 8117 8118 case KEY_pipe: 8119 LOP(OP_PIPE_OP,XTERM); 8120 8121 case KEY_q: 8122 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8123 if (!s) 8124 missingterm(NULL, 0); 8125 COPLINE_SET_FROM_MULTI_END; 8126 pl_yylval.ival = OP_CONST; 8127 TERM(sublex_start()); 8128 8129 case KEY_quotemeta: 8130 UNI(OP_QUOTEMETA); 8131 8132 case KEY_qw: 8133 return yyl_qw(aTHX_ s, len); 8134 8135 case KEY_qq: 8136 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8137 if (!s) 8138 missingterm(NULL, 0); 8139 pl_yylval.ival = OP_STRINGIFY; 8140 if (SvIVX(PL_lex_stuff) == '\'') 8141 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */ 8142 TERM(sublex_start()); 8143 8144 case KEY_qr: 8145 s = scan_pat(s,OP_QR); 8146 TERM(sublex_start()); 8147 8148 case KEY_qx: 8149 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8150 if (!s) 8151 missingterm(NULL, 0); 8152 pl_yylval.ival = OP_BACKTICK; 8153 TERM(sublex_start()); 8154 8155 case KEY_return: 8156 OLDLOP(OP_RETURN); 8157 8158 case KEY_require: 8159 return yyl_require(aTHX_ s, orig_keyword); 8160 8161 case KEY_reset: 8162 UNI(OP_RESET); 8163 8164 case KEY_redo: 8165 LOOPX(OP_REDO); 8166 8167 case KEY_rename: 8168 LOP(OP_RENAME,XTERM); 8169 8170 case KEY_rand: 8171 UNI(OP_RAND); 8172 8173 case KEY_rmdir: 8174 UNI(OP_RMDIR); 8175 8176 case KEY_rindex: 8177 LOP(OP_RINDEX,XTERM); 8178 8179 case KEY_read: 8180 LOP(OP_READ,XTERM); 8181 8182 case KEY_readdir: 8183 UNI(OP_READDIR); 8184 8185 case KEY_readline: 8186 UNIDOR(OP_READLINE); 8187 8188 case KEY_readpipe: 8189 UNIDOR(OP_BACKTICK); 8190 8191 case KEY_rewinddir: 8192 UNI(OP_REWINDDIR); 8193 8194 case KEY_recv: 8195 LOP(OP_RECV,XTERM); 8196 8197 case KEY_reverse: 8198 LOP(OP_REVERSE,XTERM); 8199 8200 case KEY_readlink: 8201 UNIDOR(OP_READLINK); 8202 8203 case KEY_ref: 8204 UNI(OP_REF); 8205 8206 case KEY_s: 8207 s = scan_subst(s); 8208 if (pl_yylval.opval) 8209 TERM(sublex_start()); 8210 else 8211 TOKEN(1); /* force error */ 8212 8213 case KEY_say: 8214 checkcomma(s,PL_tokenbuf,"filehandle"); 8215 LOP(OP_SAY,XREF); 8216 8217 case KEY_chomp: 8218 UNI(OP_CHOMP); 8219 8220 case KEY_scalar: 8221 UNI(OP_SCALAR); 8222 8223 case KEY_select: 8224 LOP(OP_SELECT,XTERM); 8225 8226 case KEY_seek: 8227 LOP(OP_SEEK,XTERM); 8228 8229 case KEY_semctl: 8230 LOP(OP_SEMCTL,XTERM); 8231 8232 case KEY_semget: 8233 LOP(OP_SEMGET,XTERM); 8234 8235 case KEY_semop: 8236 LOP(OP_SEMOP,XTERM); 8237 8238 case KEY_send: 8239 LOP(OP_SEND,XTERM); 8240 8241 case KEY_setpgrp: 8242 LOP(OP_SETPGRP,XTERM); 8243 8244 case KEY_setpriority: 8245 LOP(OP_SETPRIORITY,XTERM); 8246 8247 case KEY_sethostent: 8248 UNI(OP_SHOSTENT); 8249 8250 case KEY_setnetent: 8251 UNI(OP_SNETENT); 8252 8253 case KEY_setservent: 8254 UNI(OP_SSERVENT); 8255 8256 case KEY_setprotoent: 8257 UNI(OP_SPROTOENT); 8258 8259 case KEY_setpwent: 8260 FUN0(OP_SPWENT); 8261 8262 case KEY_setgrent: 8263 FUN0(OP_SGRENT); 8264 8265 case KEY_seekdir: 8266 LOP(OP_SEEKDIR,XTERM); 8267 8268 case KEY_setsockopt: 8269 LOP(OP_SSOCKOPT,XTERM); 8270 8271 case KEY_shift: 8272 UNIDOR(OP_SHIFT); 8273 8274 case KEY_shmctl: 8275 LOP(OP_SHMCTL,XTERM); 8276 8277 case KEY_shmget: 8278 LOP(OP_SHMGET,XTERM); 8279 8280 case KEY_shmread: 8281 LOP(OP_SHMREAD,XTERM); 8282 8283 case KEY_shmwrite: 8284 LOP(OP_SHMWRITE,XTERM); 8285 8286 case KEY_shutdown: 8287 LOP(OP_SHUTDOWN,XTERM); 8288 8289 case KEY_sin: 8290 UNI(OP_SIN); 8291 8292 case KEY_sleep: 8293 UNI(OP_SLEEP); 8294 8295 case KEY_socket: 8296 LOP(OP_SOCKET,XTERM); 8297 8298 case KEY_socketpair: 8299 LOP(OP_SOCKPAIR,XTERM); 8300 8301 case KEY_sort: 8302 checkcomma(s,PL_tokenbuf,"subroutine name"); 8303 s = skipspace(s); 8304 PL_expect = XTERM; 8305 s = force_word(s,BAREWORD,TRUE,TRUE); 8306 LOP(OP_SORT,XREF); 8307 8308 case KEY_split: 8309 LOP(OP_SPLIT,XTERM); 8310 8311 case KEY_sprintf: 8312 LOP(OP_SPRINTF,XTERM); 8313 8314 case KEY_splice: 8315 LOP(OP_SPLICE,XTERM); 8316 8317 case KEY_sqrt: 8318 UNI(OP_SQRT); 8319 8320 case KEY_srand: 8321 UNI(OP_SRAND); 8322 8323 case KEY_stat: 8324 UNI(OP_STAT); 8325 8326 case KEY_study: 8327 UNI(OP_STUDY); 8328 8329 case KEY_substr: 8330 LOP(OP_SUBSTR,XTERM); 8331 8332 case KEY_format: 8333 case KEY_sub: 8334 return yyl_sub(aTHX_ s, key); 8335 8336 case KEY_system: 8337 LOP(OP_SYSTEM,XREF); 8338 8339 case KEY_symlink: 8340 LOP(OP_SYMLINK,XTERM); 8341 8342 case KEY_syscall: 8343 LOP(OP_SYSCALL,XTERM); 8344 8345 case KEY_sysopen: 8346 LOP(OP_SYSOPEN,XTERM); 8347 8348 case KEY_sysseek: 8349 LOP(OP_SYSSEEK,XTERM); 8350 8351 case KEY_sysread: 8352 LOP(OP_SYSREAD,XTERM); 8353 8354 case KEY_syswrite: 8355 LOP(OP_SYSWRITE,XTERM); 8356 8357 case KEY_tr: 8358 case KEY_y: 8359 s = scan_trans(s); 8360 TERM(sublex_start()); 8361 8362 case KEY_tell: 8363 UNI(OP_TELL); 8364 8365 case KEY_telldir: 8366 UNI(OP_TELLDIR); 8367 8368 case KEY_tie: 8369 LOP(OP_TIE,XTERM); 8370 8371 case KEY_tied: 8372 UNI(OP_TIED); 8373 8374 case KEY_time: 8375 FUN0(OP_TIME); 8376 8377 case KEY_times: 8378 FUN0(OP_TMS); 8379 8380 case KEY_truncate: 8381 LOP(OP_TRUNCATE,XTERM); 8382 8383 case KEY_uc: 8384 UNI(OP_UC); 8385 8386 case KEY_ucfirst: 8387 UNI(OP_UCFIRST); 8388 8389 case KEY_untie: 8390 UNI(OP_UNTIE); 8391 8392 case KEY_until: 8393 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8394 return REPORT(0); 8395 pl_yylval.ival = CopLINE(PL_curcop); 8396 OPERATOR(UNTIL); 8397 8398 case KEY_unless: 8399 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8400 return REPORT(0); 8401 pl_yylval.ival = CopLINE(PL_curcop); 8402 OPERATOR(UNLESS); 8403 8404 case KEY_unlink: 8405 LOP(OP_UNLINK,XTERM); 8406 8407 case KEY_undef: 8408 UNIDOR(OP_UNDEF); 8409 8410 case KEY_unpack: 8411 LOP(OP_UNPACK,XTERM); 8412 8413 case KEY_utime: 8414 LOP(OP_UTIME,XTERM); 8415 8416 case KEY_umask: 8417 UNIDOR(OP_UMASK); 8418 8419 case KEY_unshift: 8420 LOP(OP_UNSHIFT,XTERM); 8421 8422 case KEY_use: 8423 s = tokenize_use(1, s); 8424 TOKEN(USE); 8425 8426 case KEY_values: 8427 UNI(OP_VALUES); 8428 8429 case KEY_vec: 8430 LOP(OP_VEC,XTERM); 8431 8432 case KEY_when: 8433 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8434 return REPORT(0); 8435 pl_yylval.ival = CopLINE(PL_curcop); 8436 Perl_ck_warner_d(aTHX_ 8437 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 8438 "when is experimental"); 8439 OPERATOR(WHEN); 8440 8441 case KEY_while: 8442 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8443 return REPORT(0); 8444 pl_yylval.ival = CopLINE(PL_curcop); 8445 OPERATOR(WHILE); 8446 8447 case KEY_warn: 8448 PL_hints |= HINT_BLOCK_SCOPE; 8449 LOP(OP_WARN,XTERM); 8450 8451 case KEY_wait: 8452 FUN0(OP_WAIT); 8453 8454 case KEY_waitpid: 8455 LOP(OP_WAITPID,XTERM); 8456 8457 case KEY_wantarray: 8458 FUN0(OP_WANTARRAY); 8459 8460 case KEY_write: 8461 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and 8462 * we use the same number on EBCDIC */ 8463 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV); 8464 UNI(OP_ENTERWRITE); 8465 8466 case KEY_x: 8467 if (PL_expect == XOPERATOR) { 8468 if (*s == '=' && !PL_lex_allbrackets 8469 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 8470 { 8471 return REPORT(0); 8472 } 8473 Mop(OP_REPEAT); 8474 } 8475 check_uni(); 8476 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c); 8477 8478 case KEY_xor: 8479 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 8480 return REPORT(0); 8481 pl_yylval.ival = OP_XOR; 8482 OPERATOR(OROP); 8483 } 8484 } 8485 8486 static int 8487 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c) 8488 { 8489 I32 key = 0; 8490 I32 orig_keyword = 0; 8491 STRLEN olen = len; 8492 char *d = s; 8493 s += 2; 8494 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 8495 if ((*s == ':' && s[1] == ':') 8496 || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\'')) 8497 { 8498 Copy(PL_bufptr, PL_tokenbuf, olen, char); 8499 return yyl_just_a_word(aTHX_ d, olen, 0, c); 8500 } 8501 if (!key) 8502 Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword", 8503 UTF8fARG(UTF, len, PL_tokenbuf)); 8504 if (key < 0) 8505 key = -key; 8506 else if (key == KEY_require || key == KEY_do 8507 || key == KEY_glob) 8508 /* that's a way to remember we saw "CORE::" */ 8509 orig_keyword = key; 8510 8511 /* Known to be a reserved word at this point */ 8512 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c); 8513 } 8514 8515 static int 8516 yyl_keylookup(pTHX_ char *s, GV *gv) 8517 { 8518 dVAR; 8519 STRLEN len; 8520 bool anydelim; 8521 I32 key; 8522 struct code c = no_code; 8523 I32 orig_keyword = 0; 8524 char *d; 8525 8526 c.gv = gv; 8527 8528 PL_bufptr = s; 8529 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 8530 8531 /* Some keywords can be followed by any delimiter, including ':' */ 8532 anydelim = word_takes_any_delimiter(PL_tokenbuf, len); 8533 8534 /* x::* is just a word, unless x is "CORE" */ 8535 if (!anydelim && *s == ':' && s[1] == ':') { 8536 if (memEQs(PL_tokenbuf, len, "CORE")) 8537 return yyl_key_core(aTHX_ s, len, c); 8538 return yyl_just_a_word(aTHX_ s, len, 0, c); 8539 } 8540 8541 d = s; 8542 while (d < PL_bufend && isSPACE(*d)) 8543 d++; /* no comments skipped here, or s### is misparsed */ 8544 8545 /* Is this a word before a => operator? */ 8546 if (*d == '=' && d[1] == '>') { 8547 return yyl_fatcomma(aTHX_ s, len); 8548 } 8549 8550 /* Check for plugged-in keyword */ 8551 { 8552 OP *o; 8553 int result; 8554 char *saved_bufptr = PL_bufptr; 8555 PL_bufptr = s; 8556 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o); 8557 s = PL_bufptr; 8558 if (result == KEYWORD_PLUGIN_DECLINE) { 8559 /* not a plugged-in keyword */ 8560 PL_bufptr = saved_bufptr; 8561 } else if (result == KEYWORD_PLUGIN_STMT) { 8562 pl_yylval.opval = o; 8563 CLINE; 8564 if (!PL_nexttoke) PL_expect = XSTATE; 8565 return REPORT(PLUGSTMT); 8566 } else if (result == KEYWORD_PLUGIN_EXPR) { 8567 pl_yylval.opval = o; 8568 CLINE; 8569 if (!PL_nexttoke) PL_expect = XOPERATOR; 8570 return REPORT(PLUGEXPR); 8571 } else { 8572 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf); 8573 } 8574 } 8575 8576 /* Is this a label? */ 8577 if (!anydelim && PL_expect == XSTATE 8578 && d < PL_bufend && *d == ':' && *(d + 1) != ':') { 8579 s = d + 1; 8580 pl_yylval.opval = 8581 newSVOP(OP_CONST, 0, 8582 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0)); 8583 CLINE; 8584 TOKEN(LABEL); 8585 } 8586 8587 /* Check for lexical sub */ 8588 if (PL_expect != XOPERATOR) { 8589 char tmpbuf[sizeof PL_tokenbuf + 1]; 8590 *tmpbuf = '&'; 8591 Copy(PL_tokenbuf, tmpbuf+1, len, char); 8592 c.off = pad_findmy_pvn(tmpbuf, len+1, 0); 8593 if (c.off != NOT_IN_PAD) { 8594 assert(c.off); /* we assume this is boolean-true below */ 8595 if (PAD_COMPNAME_FLAGS_isOUR(c.off)) { 8596 HV * const stash = PAD_COMPNAME_OURSTASH(c.off); 8597 HEK * const stashname = HvNAME_HEK(stash); 8598 c.sv = newSVhek(stashname); 8599 sv_catpvs(c.sv, "::"); 8600 sv_catpvn_flags(c.sv, PL_tokenbuf, len, 8601 (UTF ? SV_CATUTF8 : SV_CATBYTES)); 8602 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv), 8603 SVt_PVCV); 8604 c.off = 0; 8605 if (!c.gv) { 8606 sv_free(c.sv); 8607 c.sv = NULL; 8608 return yyl_just_a_word(aTHX_ s, len, 0, c); 8609 } 8610 } 8611 else { 8612 c.rv2cv_op = newOP(OP_PADANY, 0); 8613 c.rv2cv_op->op_targ = c.off; 8614 c.cv = find_lexical_cv(c.off); 8615 } 8616 c.lex = TRUE; 8617 return yyl_just_a_word(aTHX_ s, len, 0, c); 8618 } 8619 c.off = 0; 8620 } 8621 8622 /* Check for built-in keyword */ 8623 key = keyword(PL_tokenbuf, len, 0); 8624 8625 if (key < 0) 8626 key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp); 8627 8628 if (key && key != KEY___DATA__ && key != KEY___END__ 8629 && (!anydelim || *s != '#')) { 8630 /* no override, and not s### either; skipspace is safe here 8631 * check for => on following line */ 8632 bool arrow; 8633 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr); 8634 STRLEN soff = s - SvPVX(PL_linestr); 8635 s = peekspace(s); 8636 arrow = *s == '=' && s[1] == '>'; 8637 PL_bufptr = SvPVX(PL_linestr) + bufoff; 8638 s = SvPVX(PL_linestr) + soff; 8639 if (arrow) 8640 return yyl_fatcomma(aTHX_ s, len); 8641 } 8642 8643 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c); 8644 } 8645 8646 static int 8647 yyl_try(pTHX_ char *s) 8648 { 8649 char *d; 8650 GV *gv = NULL; 8651 int tok; 8652 8653 retry: 8654 switch (*s) { 8655 default: 8656 if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) { 8657 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 8658 return tok; 8659 goto retry_bufptr; 8660 } 8661 yyl_croak_unrecognised(aTHX_ s); 8662 8663 case 4: 8664 case 26: 8665 /* emulate EOF on ^D or ^Z */ 8666 if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY) 8667 return tok; 8668 retry_bufptr: 8669 s = PL_bufptr; 8670 goto retry; 8671 8672 case 0: 8673 if ((!PL_rsfp || PL_lex_inwhat) 8674 && (!PL_parser->filtered || s+1 < PL_bufend)) { 8675 PL_last_uni = 0; 8676 PL_last_lop = 0; 8677 if (PL_lex_brackets 8678 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) 8679 { 8680 yyerror((const char *) 8681 (PL_lex_formbrack 8682 ? "Format not terminated" 8683 : "Missing right curly or square bracket")); 8684 } 8685 DEBUG_T({ 8686 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n"); 8687 }); 8688 TOKEN(0); 8689 } 8690 if (s++ < PL_bufend) 8691 goto retry; /* ignore stray nulls */ 8692 PL_last_uni = 0; 8693 PL_last_lop = 0; 8694 if (!PL_in_eval && !PL_preambled) { 8695 PL_preambled = TRUE; 8696 if (PL_perldb) { 8697 /* Generate a string of Perl code to load the debugger. 8698 * If PERL5DB is set, it will return the contents of that, 8699 * otherwise a compile-time require of perl5db.pl. */ 8700 8701 const char * const pdb = PerlEnv_getenv("PERL5DB"); 8702 8703 if (pdb) { 8704 sv_setpv(PL_linestr, pdb); 8705 sv_catpvs(PL_linestr,";"); 8706 } else { 8707 SETERRNO(0,SS_NORMAL); 8708 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };"); 8709 } 8710 PL_parser->preambling = CopLINE(PL_curcop); 8711 } else 8712 SvPVCLEAR(PL_linestr); 8713 if (PL_preambleav) { 8714 SV **svp = AvARRAY(PL_preambleav); 8715 SV **const end = svp + AvFILLp(PL_preambleav); 8716 while(svp <= end) { 8717 sv_catsv(PL_linestr, *svp); 8718 ++svp; 8719 sv_catpvs(PL_linestr, ";"); 8720 } 8721 sv_free(MUTABLE_SV(PL_preambleav)); 8722 PL_preambleav = NULL; 8723 } 8724 if (PL_minus_E) 8725 sv_catpvs(PL_linestr, 8726 "use feature ':5." STRINGIFY(PERL_VERSION) "';"); 8727 if (PL_minus_n || PL_minus_p) { 8728 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/); 8729 if (PL_minus_l) 8730 sv_catpvs(PL_linestr,"chomp;"); 8731 if (PL_minus_a) { 8732 if (PL_minus_F) { 8733 if ( ( *PL_splitstr == '/' 8734 || *PL_splitstr == '\'' 8735 || *PL_splitstr == '"') 8736 && strchr(PL_splitstr + 1, *PL_splitstr)) 8737 { 8738 /* strchr is ok, because -F pattern can't contain 8739 * embeddded NULs */ 8740 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); 8741 } 8742 else { 8743 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL 8744 bytes can be used as quoting characters. :-) */ 8745 const char *splits = PL_splitstr; 8746 sv_catpvs(PL_linestr, "our @F=split(q\0"); 8747 do { 8748 /* Need to \ \s */ 8749 if (*splits == '\\') 8750 sv_catpvn(PL_linestr, splits, 1); 8751 sv_catpvn(PL_linestr, splits, 1); 8752 } while (*splits++); 8753 /* This loop will embed the trailing NUL of 8754 PL_linestr as the last thing it does before 8755 terminating. */ 8756 sv_catpvs(PL_linestr, ");"); 8757 } 8758 } 8759 else 8760 sv_catpvs(PL_linestr,"our @F=split(' ');"); 8761 } 8762 } 8763 sv_catpvs(PL_linestr, "\n"); 8764 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 8765 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 8766 PL_last_lop = PL_last_uni = NULL; 8767 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash) 8768 update_debugger_info(PL_linestr, NULL, 0); 8769 goto retry; 8770 } 8771 if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY) 8772 return tok; 8773 goto retry_bufptr; 8774 8775 case '\r': 8776 #ifdef PERL_STRICT_CR 8777 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); 8778 Perl_croak(aTHX_ 8779 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); 8780 #endif 8781 case ' ': case '\t': case '\f': case '\v': 8782 s++; 8783 goto retry; 8784 8785 case '#': 8786 case '\n': { 8787 const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s); 8788 if (needs_semicolon) 8789 TOKEN(';'); 8790 else 8791 goto retry; 8792 } 8793 8794 case '-': 8795 return yyl_hyphen(aTHX_ s); 8796 8797 case '+': 8798 return yyl_plus(aTHX_ s); 8799 8800 case '*': 8801 return yyl_star(aTHX_ s); 8802 8803 case '%': 8804 return yyl_percent(aTHX_ s); 8805 8806 case '^': 8807 return yyl_caret(aTHX_ s); 8808 8809 case '[': 8810 return yyl_leftsquare(aTHX_ s); 8811 8812 case '~': 8813 return yyl_tilde(aTHX_ s); 8814 8815 case ',': 8816 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) 8817 TOKEN(0); 8818 s++; 8819 OPERATOR(','); 8820 case ':': 8821 if (s[1] == ':') 8822 return yyl_just_a_word(aTHX_ s, 0, 0, no_code); 8823 return yyl_colon(aTHX_ s + 1); 8824 8825 case '(': 8826 return yyl_leftparen(aTHX_ s + 1); 8827 8828 case ';': 8829 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8830 TOKEN(0); 8831 CLINE; 8832 s++; 8833 PL_expect = XSTATE; 8834 TOKEN(';'); 8835 8836 case ')': 8837 return yyl_rightparen(aTHX_ s); 8838 8839 case ']': 8840 return yyl_rightsquare(aTHX_ s); 8841 8842 case '{': 8843 return yyl_leftcurly(aTHX_ s + 1, 0); 8844 8845 case '}': 8846 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 8847 TOKEN(0); 8848 return yyl_rightcurly(aTHX_ s, 0); 8849 8850 case '&': 8851 return yyl_ampersand(aTHX_ s); 8852 8853 case '|': 8854 return yyl_verticalbar(aTHX_ s); 8855 8856 case '=': 8857 if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n') 8858 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "=====")) 8859 { 8860 s = vcs_conflict_marker(s + 7); 8861 goto retry; 8862 } 8863 8864 s++; 8865 { 8866 const char tmp = *s++; 8867 if (tmp == '=') { 8868 if (!PL_lex_allbrackets 8869 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8870 { 8871 s -= 2; 8872 TOKEN(0); 8873 } 8874 ChEop(OP_EQ); 8875 } 8876 if (tmp == '>') { 8877 if (!PL_lex_allbrackets 8878 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) 8879 { 8880 s -= 2; 8881 TOKEN(0); 8882 } 8883 OPERATOR(','); 8884 } 8885 if (tmp == '~') 8886 PMop(OP_MATCH); 8887 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX) 8888 && memCHRs("+-*/%.^&|<",tmp)) 8889 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 8890 "Reversed %c= operator",(int)tmp); 8891 s--; 8892 if (PL_expect == XSTATE 8893 && isALPHA(tmp) 8894 && (s == PL_linestart+1 || s[-2] == '\n') ) 8895 { 8896 if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered) 8897 || PL_lex_state != LEX_NORMAL) 8898 { 8899 d = PL_bufend; 8900 while (s < d) { 8901 if (*s++ == '\n') { 8902 incline(s, PL_bufend); 8903 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut")) 8904 { 8905 s = (char *) memchr(s,'\n', d - s); 8906 if (s) 8907 s++; 8908 else 8909 s = d; 8910 incline(s, PL_bufend); 8911 goto retry; 8912 } 8913 } 8914 } 8915 goto retry; 8916 } 8917 s = PL_bufend; 8918 PL_parser->in_pod = 1; 8919 goto retry; 8920 } 8921 } 8922 if (PL_expect == XBLOCK) { 8923 const char *t = s; 8924 #ifdef PERL_STRICT_CR 8925 while (SPACE_OR_TAB(*t)) 8926 #else 8927 while (SPACE_OR_TAB(*t) || *t == '\r') 8928 #endif 8929 t++; 8930 if (*t == '\n' || *t == '#') { 8931 ENTER_with_name("lex_format"); 8932 SAVEI8(PL_parser->form_lex_state); 8933 SAVEI32(PL_lex_formbrack); 8934 PL_parser->form_lex_state = PL_lex_state; 8935 PL_lex_formbrack = PL_lex_brackets + 1; 8936 PL_parser->sub_error_count = PL_error_count; 8937 return yyl_leftcurly(aTHX_ s, 1); 8938 } 8939 } 8940 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 8941 s--; 8942 TOKEN(0); 8943 } 8944 pl_yylval.ival = 0; 8945 OPERATOR(ASSIGNOP); 8946 8947 case '!': 8948 return yyl_bang(aTHX_ s + 1); 8949 8950 case '<': 8951 if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n') 8952 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<")) 8953 { 8954 s = vcs_conflict_marker(s + 7); 8955 goto retry; 8956 } 8957 return yyl_leftpointy(aTHX_ s); 8958 8959 case '>': 8960 if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n') 8961 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>")) 8962 { 8963 s = vcs_conflict_marker(s + 7); 8964 goto retry; 8965 } 8966 return yyl_rightpointy(aTHX_ s + 1); 8967 8968 case '$': 8969 return yyl_dollar(aTHX_ s); 8970 8971 case '@': 8972 return yyl_snail(aTHX_ s); 8973 8974 case '/': /* may be division, defined-or, or pattern */ 8975 return yyl_slash(aTHX_ s); 8976 8977 case '?': /* conditional */ 8978 s++; 8979 if (!PL_lex_allbrackets 8980 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) 8981 { 8982 s--; 8983 TOKEN(0); 8984 } 8985 PL_lex_allbrackets++; 8986 OPERATOR('?'); 8987 8988 case '.': 8989 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack 8990 #ifdef PERL_STRICT_CR 8991 && s[1] == '\n' 8992 #else 8993 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n')) 8994 #endif 8995 && (s == PL_linestart || s[-1] == '\n') ) 8996 { 8997 PL_expect = XSTATE; 8998 /* formbrack==2 means dot seen where arguments expected */ 8999 return yyl_rightcurly(aTHX_ s, 2); 9000 } 9001 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { 9002 s += 3; 9003 OPERATOR(YADAYADA); 9004 } 9005 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { 9006 char tmp = *s++; 9007 if (*s == tmp) { 9008 if (!PL_lex_allbrackets 9009 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) 9010 { 9011 s--; 9012 TOKEN(0); 9013 } 9014 s++; 9015 if (*s == tmp) { 9016 s++; 9017 pl_yylval.ival = OPf_SPECIAL; 9018 } 9019 else 9020 pl_yylval.ival = 0; 9021 OPERATOR(DOTDOT); 9022 } 9023 if (*s == '=' && !PL_lex_allbrackets 9024 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 9025 { 9026 s--; 9027 TOKEN(0); 9028 } 9029 Aop(OP_CONCAT); 9030 } 9031 /* FALLTHROUGH */ 9032 case '0': case '1': case '2': case '3': case '4': 9033 case '5': case '6': case '7': case '8': case '9': 9034 s = scan_num(s, &pl_yylval); 9035 DEBUG_T( { printbuf("### Saw number in %s\n", s); } ); 9036 if (PL_expect == XOPERATOR) 9037 no_op("Number",s); 9038 TERM(THING); 9039 9040 case '\'': 9041 return yyl_sglquote(aTHX_ s); 9042 9043 case '"': 9044 return yyl_dblquote(aTHX_ s); 9045 9046 case '`': 9047 return yyl_backtick(aTHX_ s); 9048 9049 case '\\': 9050 return yyl_backslash(aTHX_ s + 1); 9051 9052 case 'v': 9053 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) { 9054 char *start = s + 2; 9055 while (isDIGIT(*start) || *start == '_') 9056 start++; 9057 if (*start == '.' && isDIGIT(start[1])) { 9058 s = scan_num(s, &pl_yylval); 9059 TERM(THING); 9060 } 9061 else if ((*start == ':' && start[1] == ':') 9062 || (PL_expect == XSTATE && *start == ':')) { 9063 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9064 return tok; 9065 goto retry_bufptr; 9066 } 9067 else if (PL_expect == XSTATE) { 9068 d = start; 9069 while (d < PL_bufend && isSPACE(*d)) d++; 9070 if (*d == ':') { 9071 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9072 return tok; 9073 goto retry_bufptr; 9074 } 9075 } 9076 /* avoid v123abc() or $h{v1}, allow C<print v10;> */ 9077 if (!isALPHA(*start) && (PL_expect == XTERM 9078 || PL_expect == XREF || PL_expect == XSTATE 9079 || PL_expect == XTERMORDORDOR)) { 9080 GV *const gv = gv_fetchpvn_flags(s, start - s, 9081 UTF ? SVf_UTF8 : 0, SVt_PVCV); 9082 if (!gv) { 9083 s = scan_num(s, &pl_yylval); 9084 TERM(THING); 9085 } 9086 } 9087 } 9088 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9089 return tok; 9090 goto retry_bufptr; 9091 9092 case 'x': 9093 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { 9094 s++; 9095 Mop(OP_REPEAT); 9096 } 9097 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9098 return tok; 9099 goto retry_bufptr; 9100 9101 case '_': 9102 case 'a': case 'A': 9103 case 'b': case 'B': 9104 case 'c': case 'C': 9105 case 'd': case 'D': 9106 case 'e': case 'E': 9107 case 'f': case 'F': 9108 case 'g': case 'G': 9109 case 'h': case 'H': 9110 case 'i': case 'I': 9111 case 'j': case 'J': 9112 case 'k': case 'K': 9113 case 'l': case 'L': 9114 case 'm': case 'M': 9115 case 'n': case 'N': 9116 case 'o': case 'O': 9117 case 'p': case 'P': 9118 case 'q': case 'Q': 9119 case 'r': case 'R': 9120 case 's': case 'S': 9121 case 't': case 'T': 9122 case 'u': case 'U': 9123 case 'V': 9124 case 'w': case 'W': 9125 case 'X': 9126 case 'y': case 'Y': 9127 case 'z': case 'Z': 9128 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9129 return tok; 9130 goto retry_bufptr; 9131 } 9132 } 9133 9134 9135 /* 9136 yylex 9137 9138 Works out what to call the token just pulled out of the input 9139 stream. The yacc parser takes care of taking the ops we return and 9140 stitching them into a tree. 9141 9142 Returns: 9143 The type of the next token 9144 9145 Structure: 9146 Check if we have already built the token; if so, use it. 9147 Switch based on the current state: 9148 - if we have a case modifier in a string, deal with that 9149 - handle other cases of interpolation inside a string 9150 - scan the next line if we are inside a format 9151 In the normal state, switch on the next character: 9152 - default: 9153 if alphabetic, go to key lookup 9154 unrecognized character - croak 9155 - 0/4/26: handle end-of-line or EOF 9156 - cases for whitespace 9157 - \n and #: handle comments and line numbers 9158 - various operators, brackets and sigils 9159 - numbers 9160 - quotes 9161 - 'v': vstrings (or go to key lookup) 9162 - 'x' repetition operator (or go to key lookup) 9163 - other ASCII alphanumerics (key lookup begins here): 9164 word before => ? 9165 keyword plugin 9166 scan built-in keyword (but do nothing with it yet) 9167 check for statement label 9168 check for lexical subs 9169 return yyl_just_a_word if there is one 9170 see whether built-in keyword is overridden 9171 switch on keyword number: 9172 - default: return yyl_just_a_word: 9173 not a built-in keyword; handle bareword lookup 9174 disambiguate between method and sub call 9175 fall back to bareword 9176 - cases for built-in keywords 9177 */ 9178 9179 #ifdef NETWARE 9180 #define RSFP_FILENO (PL_rsfp) 9181 #else 9182 #define RSFP_FILENO (PerlIO_fileno(PL_rsfp)) 9183 #endif 9184 9185 9186 int 9187 Perl_yylex(pTHX) 9188 { 9189 dVAR; 9190 char *s = PL_bufptr; 9191 9192 if (UNLIKELY(PL_parser->recheck_utf8_validity)) { 9193 const U8* first_bad_char_loc; 9194 if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr, 9195 PL_bufend - PL_bufptr, 9196 &first_bad_char_loc))) 9197 { 9198 _force_out_malformed_utf8_message(first_bad_char_loc, 9199 (U8 *) PL_bufend, 9200 0, 9201 1 /* 1 means die */ ); 9202 NOT_REACHED; /* NOTREACHED */ 9203 } 9204 PL_parser->recheck_utf8_validity = FALSE; 9205 } 9206 DEBUG_T( { 9207 SV* tmp = newSVpvs(""); 9208 PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n", 9209 (IV)CopLINE(PL_curcop), 9210 lex_state_names[PL_lex_state], 9211 exp_name[PL_expect], 9212 pv_display(tmp, s, strlen(s), 0, 60)); 9213 SvREFCNT_dec(tmp); 9214 } ); 9215 9216 /* when we've already built the next token, just pull it out of the queue */ 9217 if (PL_nexttoke) { 9218 PL_nexttoke--; 9219 pl_yylval = PL_nextval[PL_nexttoke]; 9220 { 9221 I32 next_type; 9222 next_type = PL_nexttype[PL_nexttoke]; 9223 if (next_type & (7<<24)) { 9224 if (next_type & (1<<24)) { 9225 if (PL_lex_brackets > 100) 9226 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 9227 PL_lex_brackstack[PL_lex_brackets++] = 9228 (char) ((next_type >> 16) & 0xff); 9229 } 9230 if (next_type & (2<<24)) 9231 PL_lex_allbrackets++; 9232 if (next_type & (4<<24)) 9233 PL_lex_allbrackets--; 9234 next_type &= 0xffff; 9235 } 9236 return REPORT(next_type == 'p' ? pending_ident() : next_type); 9237 } 9238 } 9239 9240 switch (PL_lex_state) { 9241 case LEX_NORMAL: 9242 case LEX_INTERPNORMAL: 9243 break; 9244 9245 /* interpolated case modifiers like \L \U, including \Q and \E. 9246 when we get here, PL_bufptr is at the \ 9247 */ 9248 case LEX_INTERPCASEMOD: 9249 /* handle \E or end of string */ 9250 return yyl_interpcasemod(aTHX_ s); 9251 9252 case LEX_INTERPPUSH: 9253 return REPORT(sublex_push()); 9254 9255 case LEX_INTERPSTART: 9256 if (PL_bufptr == PL_bufend) 9257 return REPORT(sublex_done()); 9258 DEBUG_T({ 9259 if(*PL_bufptr != '(') 9260 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n"); 9261 }); 9262 PL_expect = XTERM; 9263 /* for /@a/, we leave the joining for the regex engine to do 9264 * (unless we're within \Q etc) */ 9265 PL_lex_dojoin = (*PL_bufptr == '@' 9266 && (!PL_lex_inpat || PL_lex_casemods)); 9267 PL_lex_state = LEX_INTERPNORMAL; 9268 if (PL_lex_dojoin) { 9269 NEXTVAL_NEXTTOKE.ival = 0; 9270 force_next(','); 9271 force_ident("\"", '$'); 9272 NEXTVAL_NEXTTOKE.ival = 0; 9273 force_next('$'); 9274 NEXTVAL_NEXTTOKE.ival = 0; 9275 force_next((2<<24)|'('); 9276 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */ 9277 force_next(FUNC); 9278 } 9279 /* Convert (?{...}) and friends to 'do {...}' */ 9280 if (PL_lex_inpat && *PL_bufptr == '(') { 9281 PL_parser->lex_shared->re_eval_start = PL_bufptr; 9282 PL_bufptr += 2; 9283 if (*PL_bufptr != '{') 9284 PL_bufptr++; 9285 PL_expect = XTERMBLOCK; 9286 force_next(DO); 9287 } 9288 9289 if (PL_lex_starts++) { 9290 s = PL_bufptr; 9291 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 9292 if (!PL_lex_casemods && PL_lex_inpat) 9293 TOKEN(','); 9294 else 9295 AopNOASSIGN(OP_CONCAT); 9296 } 9297 return yylex(); 9298 9299 case LEX_INTERPENDMAYBE: 9300 if (intuit_more(PL_bufptr, PL_bufend)) { 9301 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */ 9302 break; 9303 } 9304 /* FALLTHROUGH */ 9305 9306 case LEX_INTERPEND: 9307 if (PL_lex_dojoin) { 9308 const U8 dojoin_was = PL_lex_dojoin; 9309 PL_lex_dojoin = FALSE; 9310 PL_lex_state = LEX_INTERPCONCAT; 9311 PL_lex_allbrackets--; 9312 return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN); 9313 } 9314 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl 9315 && SvEVALED(PL_lex_repl)) 9316 { 9317 if (PL_bufptr != PL_bufend) 9318 Perl_croak(aTHX_ "Bad evalled substitution pattern"); 9319 PL_lex_repl = NULL; 9320 } 9321 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets 9322 re_eval_str. If the here-doc body’s length equals the previous 9323 value of re_eval_start, re_eval_start will now be null. So 9324 check re_eval_str as well. */ 9325 if (PL_parser->lex_shared->re_eval_start 9326 || PL_parser->lex_shared->re_eval_str) { 9327 SV *sv; 9328 if (*PL_bufptr != ')') 9329 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'"); 9330 PL_bufptr++; 9331 /* having compiled a (?{..}) expression, return the original 9332 * text too, as a const */ 9333 if (PL_parser->lex_shared->re_eval_str) { 9334 sv = PL_parser->lex_shared->re_eval_str; 9335 PL_parser->lex_shared->re_eval_str = NULL; 9336 SvCUR_set(sv, 9337 PL_bufptr - PL_parser->lex_shared->re_eval_start); 9338 SvPV_shrink_to_cur(sv); 9339 } 9340 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start, 9341 PL_bufptr - PL_parser->lex_shared->re_eval_start); 9342 NEXTVAL_NEXTTOKE.opval = 9343 newSVOP(OP_CONST, 0, 9344 sv); 9345 force_next(THING); 9346 PL_parser->lex_shared->re_eval_start = NULL; 9347 PL_expect = XTERM; 9348 return REPORT(','); 9349 } 9350 9351 /* FALLTHROUGH */ 9352 case LEX_INTERPCONCAT: 9353 #ifdef DEBUGGING 9354 if (PL_lex_brackets) 9355 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld", 9356 (long) PL_lex_brackets); 9357 #endif 9358 if (PL_bufptr == PL_bufend) 9359 return REPORT(sublex_done()); 9360 9361 /* m'foo' still needs to be parsed for possible (?{...}) */ 9362 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) { 9363 SV *sv = newSVsv(PL_linestr); 9364 sv = tokeq(sv); 9365 pl_yylval.opval = newSVOP(OP_CONST, 0, sv); 9366 s = PL_bufend; 9367 } 9368 else { 9369 int save_error_count = PL_error_count; 9370 9371 s = scan_const(PL_bufptr); 9372 9373 /* Set flag if this was a pattern and there were errors. op.c will 9374 * refuse to compile a pattern with this flag set. Otherwise, we 9375 * could get segfaults, etc. */ 9376 if (PL_lex_inpat && PL_error_count > save_error_count) { 9377 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR; 9378 } 9379 if (*s == '\\') 9380 PL_lex_state = LEX_INTERPCASEMOD; 9381 else 9382 PL_lex_state = LEX_INTERPSTART; 9383 } 9384 9385 if (s != PL_bufptr) { 9386 NEXTVAL_NEXTTOKE = pl_yylval; 9387 PL_expect = XTERM; 9388 force_next(THING); 9389 if (PL_lex_starts++) { 9390 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 9391 if (!PL_lex_casemods && PL_lex_inpat) 9392 TOKEN(','); 9393 else 9394 AopNOASSIGN(OP_CONCAT); 9395 } 9396 else { 9397 PL_bufptr = s; 9398 return yylex(); 9399 } 9400 } 9401 9402 return yylex(); 9403 case LEX_FORMLINE: 9404 if (PL_parser->sub_error_count != PL_error_count) { 9405 /* There was an error parsing a formline, which tends to 9406 mess up the parser. 9407 Unlike interpolated sub-parsing, we can't treat any of 9408 these as recoverable, so no need to check sub_no_recover. 9409 */ 9410 yyquit(); 9411 } 9412 assert(PL_lex_formbrack); 9413 s = scan_formline(PL_bufptr); 9414 if (!PL_lex_formbrack) 9415 return yyl_rightcurly(aTHX_ s, 1); 9416 PL_bufptr = s; 9417 return yylex(); 9418 } 9419 9420 /* We really do *not* want PL_linestr ever becoming a COW. */ 9421 assert (!SvIsCOW(PL_linestr)); 9422 s = PL_bufptr; 9423 PL_oldoldbufptr = PL_oldbufptr; 9424 PL_oldbufptr = s; 9425 9426 if (PL_in_my == KEY_sigvar) { 9427 PL_parser->saw_infix_sigil = 0; 9428 return yyl_sigvar(aTHX_ s); 9429 } 9430 9431 { 9432 /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil. 9433 On its return, we then need to set it to indicate whether the token 9434 we just encountered was an infix operator that (if we hadn't been 9435 expecting an operator) have been a sigil. 9436 */ 9437 bool expected_operator = (PL_expect == XOPERATOR); 9438 int ret = yyl_try(aTHX_ s); 9439 switch (pl_yylval.ival) { 9440 case OP_BIT_AND: 9441 case OP_MODULO: 9442 case OP_MULTIPLY: 9443 case OP_NBIT_AND: 9444 if (expected_operator) { 9445 PL_parser->saw_infix_sigil = 1; 9446 break; 9447 } 9448 /* FALLTHROUGH */ 9449 default: 9450 PL_parser->saw_infix_sigil = 0; 9451 } 9452 return ret; 9453 } 9454 } 9455 9456 9457 /* 9458 S_pending_ident 9459 9460 Looks up an identifier in the pad or in a package 9461 9462 PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable 9463 rather than a plain pad var. 9464 9465 Returns: 9466 PRIVATEREF if this is a lexical name. 9467 BAREWORD if this belongs to a package. 9468 9469 Structure: 9470 if we're in a my declaration 9471 croak if they tried to say my($foo::bar) 9472 build the ops for a my() declaration 9473 if it's an access to a my() variable 9474 build ops for access to a my() variable 9475 if in a dq string, and they've said @foo and we can't find @foo 9476 warn 9477 build ops for a bareword 9478 */ 9479 9480 static int 9481 S_pending_ident(pTHX) 9482 { 9483 PADOFFSET tmp = 0; 9484 const char pit = (char)pl_yylval.ival; 9485 const STRLEN tokenbuf_len = strlen(PL_tokenbuf); 9486 /* All routes through this function want to know if there is a colon. */ 9487 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len); 9488 9489 DEBUG_T({ PerlIO_printf(Perl_debug_log, 9490 "### Pending identifier '%s'\n", PL_tokenbuf); }); 9491 assert(tokenbuf_len >= 2); 9492 9493 /* if we're in a my(), we can't allow dynamics here. 9494 $foo'bar has already been turned into $foo::bar, so 9495 just check for colons. 9496 9497 if it's a legal name, the OP is a PADANY. 9498 */ 9499 if (PL_in_my) { 9500 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */ 9501 if (has_colon) 9502 /* diag_listed_as: No package name allowed for variable %s 9503 in "our" */ 9504 yyerror_pv(Perl_form(aTHX_ "No package name allowed for " 9505 "%s %s in \"our\"", 9506 *PL_tokenbuf=='&' ? "subroutine" : "variable", 9507 PL_tokenbuf), UTF ? SVf_UTF8 : 0); 9508 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0); 9509 } 9510 else { 9511 OP *o; 9512 if (has_colon) { 9513 /* "my" variable %s can't be in a package */ 9514 /* PL_no_myglob is constant */ 9515 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); 9516 yyerror_pv(Perl_form(aTHX_ PL_no_myglob, 9517 PL_in_my == KEY_my ? "my" : "state", 9518 *PL_tokenbuf == '&' ? "subroutine" : "variable", 9519 PL_tokenbuf), 9520 UTF ? SVf_UTF8 : 0); 9521 GCC_DIAG_RESTORE_STMT; 9522 } 9523 9524 if (PL_in_my == KEY_sigvar) { 9525 /* A signature 'padop' needs in addition, an op_first to 9526 * point to a child sigdefelem, and an extra field to hold 9527 * the signature index. We can achieve both by using an 9528 * UNOP_AUX and (ab)using the op_aux field to hold the 9529 * index. If we ever need more fields, use a real malloced 9530 * aux strut instead. 9531 */ 9532 o = newUNOP_AUX(OP_ARGELEM, 0, NULL, 9533 INT2PTR(UNOP_AUX_item *, 9534 (PL_parser->sig_elems))); 9535 o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV 9536 : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV 9537 : OPpARGELEM_HV); 9538 } 9539 else 9540 o = newOP(OP_PADANY, 0); 9541 o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 9542 UTF ? SVf_UTF8 : 0); 9543 if (PL_in_my == KEY_sigvar) 9544 PL_in_my = 0; 9545 9546 pl_yylval.opval = o; 9547 return PRIVATEREF; 9548 } 9549 } 9550 9551 /* 9552 build the ops for accesses to a my() variable. 9553 */ 9554 9555 if (!has_colon) { 9556 if (!PL_in_my) 9557 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len, 9558 0); 9559 if (tmp != NOT_IN_PAD) { 9560 /* might be an "our" variable" */ 9561 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 9562 /* build ops for a bareword */ 9563 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 9564 HEK * const stashname = HvNAME_HEK(stash); 9565 SV * const sym = newSVhek(stashname); 9566 sv_catpvs(sym, "::"); 9567 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES )); 9568 pl_yylval.opval = newSVOP(OP_CONST, 0, sym); 9569 pl_yylval.opval->op_private = OPpCONST_ENTERED; 9570 if (pit != '&') 9571 gv_fetchsv(sym, 9572 GV_ADDMULTI, 9573 ((PL_tokenbuf[0] == '$') ? SVt_PV 9574 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 9575 : SVt_PVHV)); 9576 return BAREWORD; 9577 } 9578 9579 pl_yylval.opval = newOP(OP_PADANY, 0); 9580 pl_yylval.opval->op_targ = tmp; 9581 return PRIVATEREF; 9582 } 9583 } 9584 9585 /* 9586 Whine if they've said @foo or @foo{key} in a doublequoted string, 9587 and @foo (or %foo) isn't a variable we can find in the symbol 9588 table. 9589 */ 9590 if (ckWARN(WARN_AMBIGUOUS) 9591 && pit == '@' 9592 && PL_lex_state != LEX_NORMAL 9593 && !PL_lex_brackets) 9594 { 9595 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, 9596 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG, 9597 SVt_PVAV); 9598 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) 9599 ) 9600 { 9601 /* Downgraded from fatal to warning 20000522 mjd */ 9602 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 9603 "Possible unintended interpolation of %" UTF8f 9604 " in string", 9605 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf)); 9606 } 9607 } 9608 9609 /* build ops for a bareword */ 9610 pl_yylval.opval = newSVOP(OP_CONST, 0, 9611 newSVpvn_flags(PL_tokenbuf + 1, 9612 tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, 9613 UTF ? SVf_UTF8 : 0 )); 9614 pl_yylval.opval->op_private = OPpCONST_ENTERED; 9615 if (pit != '&') 9616 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, 9617 (PL_in_eval ? GV_ADDMULTI : GV_ADD) 9618 | ( UTF ? SVf_UTF8 : 0 ), 9619 ((PL_tokenbuf[0] == '$') ? SVt_PV 9620 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 9621 : SVt_PVHV)); 9622 return BAREWORD; 9623 } 9624 9625 STATIC void 9626 S_checkcomma(pTHX_ const char *s, const char *name, const char *what) 9627 { 9628 PERL_ARGS_ASSERT_CHECKCOMMA; 9629 9630 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */ 9631 if (ckWARN(WARN_SYNTAX)) { 9632 int level = 1; 9633 const char *w; 9634 for (w = s+2; *w && level; w++) { 9635 if (*w == '(') 9636 ++level; 9637 else if (*w == ')') 9638 --level; 9639 } 9640 while (isSPACE(*w)) 9641 ++w; 9642 /* the list of chars below is for end of statements or 9643 * block / parens, boolean operators (&&, ||, //) and branch 9644 * constructs (or, and, if, until, unless, while, err, for). 9645 * Not a very solid hack... */ 9646 if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w)) 9647 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 9648 "%s (...) interpreted as function",name); 9649 } 9650 } 9651 while (s < PL_bufend && isSPACE(*s)) 9652 s++; 9653 if (*s == '(') 9654 s++; 9655 while (s < PL_bufend && isSPACE(*s)) 9656 s++; 9657 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 9658 const char * const w = s; 9659 s += UTF ? UTF8SKIP(s) : 1; 9660 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)) 9661 s += UTF ? UTF8SKIP(s) : 1; 9662 while (s < PL_bufend && isSPACE(*s)) 9663 s++; 9664 if (*s == ',') { 9665 GV* gv; 9666 if (keyword(w, s - w, 0)) 9667 return; 9668 9669 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); 9670 if (gv && GvCVu(gv)) 9671 return; 9672 if (s - w <= 254) { 9673 PADOFFSET off; 9674 char tmpbuf[256]; 9675 Copy(w, tmpbuf+1, s - w, char); 9676 *tmpbuf = '&'; 9677 off = pad_findmy_pvn(tmpbuf, s-w+1, 0); 9678 if (off != NOT_IN_PAD) return; 9679 } 9680 Perl_croak(aTHX_ "No comma allowed after %s", what); 9681 } 9682 } 9683 } 9684 9685 /* S_new_constant(): do any overload::constant lookup. 9686 9687 Either returns sv, or mortalizes/frees sv and returns a new SV*. 9688 Best used as sv=new_constant(..., sv, ...). 9689 If s, pv are NULL, calls subroutine with one argument, 9690 and <type> is used with error messages only. 9691 <type> is assumed to be well formed UTF-8. 9692 9693 If error_msg is not NULL, *error_msg will be set to any error encountered. 9694 Otherwise yyerror() will be used to output it */ 9695 9696 STATIC SV * 9697 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, 9698 SV *sv, SV *pv, const char *type, STRLEN typelen, 9699 const char ** error_msg) 9700 { 9701 dSP; 9702 HV * table = GvHV(PL_hintgv); /* ^H */ 9703 SV *res; 9704 SV *errsv = NULL; 9705 SV **cvp; 9706 SV *cv, *typesv; 9707 const char *why1 = "", *why2 = "", *why3 = ""; 9708 const char * optional_colon = ":"; /* Only some messages have a colon */ 9709 char *msg; 9710 9711 PERL_ARGS_ASSERT_NEW_CONSTANT; 9712 /* We assume that this is true: */ 9713 assert(type || s); 9714 9715 sv_2mortal(sv); /* Parent created it permanently */ 9716 9717 if ( ! table 9718 || ! (PL_hints & HINT_LOCALIZE_HH)) 9719 { 9720 why1 = "unknown"; 9721 optional_colon = ""; 9722 goto report; 9723 } 9724 9725 cvp = hv_fetch(table, key, keylen, FALSE); 9726 if (!cvp || !SvOK(*cvp)) { 9727 why1 = "$^H{"; 9728 why2 = key; 9729 why3 = "} is not defined"; 9730 goto report; 9731 } 9732 9733 cv = *cvp; 9734 if (!pv && s) 9735 pv = newSVpvn_flags(s, len, SVs_TEMP); 9736 if (type && pv) 9737 typesv = newSVpvn_flags(type, typelen, SVs_TEMP); 9738 else 9739 typesv = &PL_sv_undef; 9740 9741 PUSHSTACKi(PERLSI_OVERLOAD); 9742 ENTER ; 9743 SAVETMPS; 9744 9745 PUSHMARK(SP) ; 9746 EXTEND(sp, 3); 9747 if (pv) 9748 PUSHs(pv); 9749 PUSHs(sv); 9750 if (pv) 9751 PUSHs(typesv); 9752 PUTBACK; 9753 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL)); 9754 9755 SPAGAIN ; 9756 9757 /* Check the eval first */ 9758 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) { 9759 STRLEN errlen; 9760 const char * errstr; 9761 sv_catpvs(errsv, "Propagated"); 9762 errstr = SvPV_const(errsv, errlen); 9763 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */ 9764 (void)POPs; 9765 res = SvREFCNT_inc_simple_NN(sv); 9766 } 9767 else { 9768 res = POPs; 9769 SvREFCNT_inc_simple_void_NN(res); 9770 } 9771 9772 PUTBACK ; 9773 FREETMPS ; 9774 LEAVE ; 9775 POPSTACK; 9776 9777 if (SvOK(res)) { 9778 return res; 9779 } 9780 9781 sv = res; 9782 (void)sv_2mortal(sv); 9783 9784 why1 = "Call to &{$^H{"; 9785 why2 = key; 9786 why3 = "}} did not return a defined value"; 9787 9788 report: 9789 9790 msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s", 9791 (int)(type ? typelen : len), 9792 (type ? type: s), 9793 optional_colon, 9794 why1, why2, why3); 9795 if (error_msg) { 9796 *error_msg = msg; 9797 } 9798 else { 9799 yyerror_pv(msg, UTF ? SVf_UTF8 : 0); 9800 } 9801 return SvREFCNT_inc_simple_NN(sv); 9802 } 9803 9804 PERL_STATIC_INLINE void 9805 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, 9806 bool is_utf8, bool check_dollar, bool tick_warn) 9807 { 9808 int saw_tick = 0; 9809 const char *olds = *s; 9810 PERL_ARGS_ASSERT_PARSE_IDENT; 9811 9812 while (*s < PL_bufend) { 9813 if (*d >= e) 9814 Perl_croak(aTHX_ "%s", ident_too_long); 9815 if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) { 9816 /* The UTF-8 case must come first, otherwise things 9817 * like c\N{COMBINING TILDE} would start failing, as the 9818 * isWORDCHAR_A case below would gobble the 'c' up. 9819 */ 9820 9821 char *t = *s + UTF8SKIP(*s); 9822 while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) { 9823 t += UTF8SKIP(t); 9824 } 9825 if (*d + (t - *s) > e) 9826 Perl_croak(aTHX_ "%s", ident_too_long); 9827 Copy(*s, *d, t - *s, char); 9828 *d += t - *s; 9829 *s = t; 9830 } 9831 else if ( isWORDCHAR_A(**s) ) { 9832 do { 9833 *(*d)++ = *(*s)++; 9834 } while (isWORDCHAR_A(**s) && *d < e); 9835 } 9836 else if ( allow_package 9837 && **s == '\'' 9838 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8)) 9839 { 9840 *(*d)++ = ':'; 9841 *(*d)++ = ':'; 9842 (*s)++; 9843 saw_tick++; 9844 } 9845 else if (allow_package && **s == ':' && (*s)[1] == ':' 9846 /* Disallow things like Foo::$bar. For the curious, this is 9847 * the code path that triggers the "Bad name after" warning 9848 * when looking for barewords. 9849 */ 9850 && !(check_dollar && (*s)[2] == '$')) { 9851 *(*d)++ = *(*s)++; 9852 *(*d)++ = *(*s)++; 9853 } 9854 else 9855 break; 9856 } 9857 if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL 9858 && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) { 9859 char *this_d; 9860 char *d2; 9861 Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */ 9862 d2 = this_d; 9863 SAVEFREEPV(this_d); 9864 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 9865 "Old package separator used in string"); 9866 if (olds[-1] == '#') 9867 *d2++ = olds[-2]; 9868 *d2++ = olds[-1]; 9869 while (olds < *s) { 9870 if (*olds == '\'') { 9871 *d2++ = '\\'; 9872 *d2++ = *olds++; 9873 } 9874 else 9875 *d2++ = *olds++; 9876 } 9877 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 9878 "\t(Did you mean \"%" UTF8f "\" instead?)\n", 9879 UTF8fARG(is_utf8, d2-this_d, this_d)); 9880 } 9881 return; 9882 } 9883 9884 /* Returns a NUL terminated string, with the length of the string written to 9885 *slp 9886 */ 9887 char * 9888 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) 9889 { 9890 char *d = dest; 9891 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 9892 bool is_utf8 = cBOOL(UTF); 9893 9894 PERL_ARGS_ASSERT_SCAN_WORD; 9895 9896 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE); 9897 *d = '\0'; 9898 *slp = d - dest; 9899 return s; 9900 } 9901 9902 /* Is the byte 'd' a legal single character identifier name? 'u' is true 9903 * iff Unicode semantics are to be used. The legal ones are any of: 9904 * a) all ASCII characters except: 9905 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE; 9906 * 2) '{' 9907 * The final case currently doesn't get this far in the program, so we 9908 * don't test for it. If that were to change, it would be ok to allow it. 9909 * b) When not under Unicode rules, any upper Latin1 character 9910 * c) Otherwise, when unicode rules are used, all XIDS characters. 9911 * 9912 * Because all ASCII characters have the same representation whether 9913 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and 9914 * '{' without knowing if is UTF-8 or not. */ 9915 #define VALID_LEN_ONE_IDENT(s, e, is_utf8) \ 9916 (isGRAPH_A(*(s)) || ((is_utf8) \ 9917 ? isIDFIRST_utf8_safe(s, e) \ 9918 : (isGRAPH_L1(*s) \ 9919 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD))))) 9920 9921 STATIC char * 9922 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) 9923 { 9924 I32 herelines = PL_parser->herelines; 9925 SSize_t bracket = -1; 9926 char funny = *s++; 9927 char *d = dest; 9928 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 9929 bool is_utf8 = cBOOL(UTF); 9930 I32 orig_copline = 0, tmp_copline = 0; 9931 9932 PERL_ARGS_ASSERT_SCAN_IDENT; 9933 9934 if (isSPACE(*s) || !*s) 9935 s = skipspace(s); 9936 if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */ 9937 bool is_zero= *s == '0' ? TRUE : FALSE; 9938 char *digit_start= d; 9939 *d++ = *s++; 9940 while (s < PL_bufend && isDIGIT(*s)) { 9941 if (d >= e) 9942 Perl_croak(aTHX_ "%s", ident_too_long); 9943 *d++ = *s++; 9944 } 9945 if (is_zero && d - digit_start > 1) 9946 Perl_croak(aTHX_ ident_var_zero_multi_digit); 9947 } 9948 else { /* See if it is a "normal" identifier */ 9949 parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE); 9950 } 9951 *d = '\0'; 9952 d = dest; 9953 if (*d) { 9954 /* Either a digit variable, or parse_ident() found an identifier 9955 (anything valid as a bareword), so job done and return. */ 9956 if (PL_lex_state != LEX_NORMAL) 9957 PL_lex_state = LEX_INTERPENDMAYBE; 9958 return s; 9959 } 9960 9961 /* Here, it is not a run-of-the-mill identifier name */ 9962 9963 if (*s == '$' && s[1] 9964 && ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8) 9965 || isDIGIT_A((U8)s[1]) 9966 || s[1] == '$' 9967 || s[1] == '{' 9968 || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) ) 9969 { 9970 /* Dereferencing a value in a scalar variable. 9971 The alternatives are different syntaxes for a scalar variable. 9972 Using ' as a leading package separator isn't allowed. :: is. */ 9973 return s; 9974 } 9975 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */ 9976 if (*s == '{') { 9977 bracket = s - SvPVX(PL_linestr); 9978 s++; 9979 orig_copline = CopLINE(PL_curcop); 9980 if (s < PL_bufend && isSPACE(*s)) { 9981 s = skipspace(s); 9982 } 9983 } 9984 if ((s <= PL_bufend - ((is_utf8) 9985 ? UTF8SKIP(s) 9986 : 1)) 9987 && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8)) 9988 { 9989 if (is_utf8) { 9990 const STRLEN skip = UTF8SKIP(s); 9991 STRLEN i; 9992 d[skip] = '\0'; 9993 for ( i = 0; i < skip; i++ ) 9994 d[i] = *s++; 9995 } 9996 else { 9997 *d = *s++; 9998 /* special case to handle ${10}, ${11} the same way we handle ${1} etc */ 9999 if (isDIGIT(*d)) { 10000 bool is_zero= *d == '0' ? TRUE : FALSE; 10001 char *digit_start= d; 10002 while (s < PL_bufend && isDIGIT(*s)) { 10003 d++; 10004 if (d >= e) 10005 Perl_croak(aTHX_ "%s", ident_too_long); 10006 *d= *s++; 10007 } 10008 if (is_zero && d - digit_start > 1) 10009 Perl_croak(aTHX_ ident_var_zero_multi_digit); 10010 } 10011 d[1] = '\0'; 10012 } 10013 } 10014 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */ 10015 if (*d == '^' && *s && isCONTROLVAR(*s)) { 10016 *d = toCTRL(*s); 10017 s++; 10018 } 10019 /* Warn about ambiguous code after unary operators if {...} notation isn't 10020 used. There's no difference in ambiguity; it's merely a heuristic 10021 about when not to warn. */ 10022 else if (ck_uni && bracket == -1) 10023 check_uni(); 10024 if (bracket != -1) { 10025 bool skip; 10026 char *s2; 10027 /* If we were processing {...} notation then... */ 10028 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) 10029 || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */ 10030 && isWORDCHAR(*s)) 10031 ) { 10032 /* note we have to check for a normal identifier first, 10033 * as it handles utf8 symbols, and only after that has 10034 * been ruled out can we look at the caret words */ 10035 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) { 10036 /* if it starts as a valid identifier, assume that it is one. 10037 (the later check for } being at the expected point will trap 10038 cases where this doesn't pan out.) */ 10039 d += is_utf8 ? UTF8SKIP(d) : 1; 10040 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE); 10041 *d = '\0'; 10042 } 10043 else { /* caret word: ${^Foo} ${^CAPTURE[0]} */ 10044 d++; 10045 while (isWORDCHAR(*s) && d < e) { 10046 *d++ = *s++; 10047 } 10048 if (d >= e) 10049 Perl_croak(aTHX_ "%s", ident_too_long); 10050 *d = '\0'; 10051 } 10052 tmp_copline = CopLINE(PL_curcop); 10053 if (s < PL_bufend && isSPACE(*s)) { 10054 s = skipspace(s); 10055 } 10056 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { 10057 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */ 10058 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) { 10059 const char * const brack = 10060 (const char *) 10061 ((*s == '[') ? "[...]" : "{...}"); 10062 orig_copline = CopLINE(PL_curcop); 10063 CopLINE_set(PL_curcop, tmp_copline); 10064 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */ 10065 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 10066 "Ambiguous use of %c{%s%s} resolved to %c%s%s", 10067 funny, dest, brack, funny, dest, brack); 10068 CopLINE_set(PL_curcop, orig_copline); 10069 } 10070 bracket++; 10071 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK); 10072 PL_lex_allbrackets++; 10073 return s; 10074 } 10075 } 10076 10077 if ( !tmp_copline ) 10078 tmp_copline = CopLINE(PL_curcop); 10079 if ((skip = s < PL_bufend && isSPACE(*s))) { 10080 /* Avoid incrementing line numbers or resetting PL_linestart, 10081 in case we have to back up. */ 10082 STRLEN s_off = s - SvPVX(PL_linestr); 10083 s2 = peekspace(s); 10084 s = SvPVX(PL_linestr) + s_off; 10085 } 10086 else 10087 s2 = s; 10088 10089 /* Expect to find a closing } after consuming any trailing whitespace. 10090 */ 10091 if (*s2 == '}') { 10092 /* Now increment line numbers if applicable. */ 10093 if (skip) 10094 s = skipspace(s); 10095 s++; 10096 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 10097 PL_lex_state = LEX_INTERPEND; 10098 PL_expect = XREF; 10099 } 10100 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) { 10101 if (ckWARN(WARN_AMBIGUOUS) 10102 && (keyword(dest, d - dest, 0) 10103 || get_cvn_flags(dest, d - dest, is_utf8 10104 ? SVf_UTF8 10105 : 0))) 10106 { 10107 SV *tmp = newSVpvn_flags( dest, d - dest, 10108 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) ); 10109 if (funny == '#') 10110 funny = '@'; 10111 orig_copline = CopLINE(PL_curcop); 10112 CopLINE_set(PL_curcop, tmp_copline); 10113 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 10114 "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf, 10115 funny, SVfARG(tmp), funny, SVfARG(tmp)); 10116 CopLINE_set(PL_curcop, orig_copline); 10117 } 10118 } 10119 } 10120 else { 10121 /* Didn't find the closing } at the point we expected, so restore 10122 state such that the next thing to process is the opening { and */ 10123 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */ 10124 CopLINE_set(PL_curcop, orig_copline); 10125 PL_parser->herelines = herelines; 10126 *dest = '\0'; 10127 PL_parser->sub_no_recover = TRUE; 10128 } 10129 } 10130 else if ( PL_lex_state == LEX_INTERPNORMAL 10131 && !PL_lex_brackets 10132 && !intuit_more(s, PL_bufend)) 10133 PL_lex_state = LEX_INTERPEND; 10134 return s; 10135 } 10136 10137 static bool 10138 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) { 10139 10140 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag 10141 * found in the parse starting at 's', based on the subset that are valid 10142 * in this context input to this routine in 'valid_flags'. Advances s. 10143 * Returns TRUE if the input should be treated as a valid flag, so the next 10144 * char may be as well; otherwise FALSE. 'charset' should point to a NUL 10145 * upon first call on the current regex. This routine will set it to any 10146 * charset modifier found. The caller shouldn't change it. This way, 10147 * another charset modifier encountered in the parse can be detected as an 10148 * error, as we have decided to allow only one */ 10149 10150 const char c = **s; 10151 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1; 10152 10153 if ( charlen != 1 || ! strchr(valid_flags, c) ) { 10154 if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) { 10155 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s), 10156 UTF ? SVf_UTF8 : 0); 10157 (*s) += charlen; 10158 /* Pretend that it worked, so will continue processing before 10159 * dieing */ 10160 return TRUE; 10161 } 10162 return FALSE; 10163 } 10164 10165 switch (c) { 10166 10167 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count); 10168 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break; 10169 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break; 10170 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break; 10171 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break; 10172 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break; 10173 case LOCALE_PAT_MOD: 10174 if (*charset) { 10175 goto multiple_charsets; 10176 } 10177 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET); 10178 *charset = c; 10179 break; 10180 case UNICODE_PAT_MOD: 10181 if (*charset) { 10182 goto multiple_charsets; 10183 } 10184 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET); 10185 *charset = c; 10186 break; 10187 case ASCII_RESTRICT_PAT_MOD: 10188 if (! *charset) { 10189 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET); 10190 } 10191 else { 10192 10193 /* Error if previous modifier wasn't an 'a', but if it was, see 10194 * if, and accept, a second occurrence (only) */ 10195 if (*charset != 'a' 10196 || get_regex_charset(*pmfl) 10197 != REGEX_ASCII_RESTRICTED_CHARSET) 10198 { 10199 goto multiple_charsets; 10200 } 10201 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET); 10202 } 10203 *charset = c; 10204 break; 10205 case DEPENDS_PAT_MOD: 10206 if (*charset) { 10207 goto multiple_charsets; 10208 } 10209 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET); 10210 *charset = c; 10211 break; 10212 } 10213 10214 (*s)++; 10215 return TRUE; 10216 10217 multiple_charsets: 10218 if (*charset != c) { 10219 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c)); 10220 } 10221 else if (c == 'a') { 10222 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */ 10223 yyerror("Regexp modifier \"/a\" may appear a maximum of twice"); 10224 } 10225 else { 10226 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c)); 10227 } 10228 10229 /* Pretend that it worked, so will continue processing before dieing */ 10230 (*s)++; 10231 return TRUE; 10232 } 10233 10234 STATIC char * 10235 S_scan_pat(pTHX_ char *start, I32 type) 10236 { 10237 PMOP *pm; 10238 char *s; 10239 const char * const valid_flags = 10240 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); 10241 char charset = '\0'; /* character set modifier */ 10242 unsigned int x_mod_count = 0; 10243 10244 PERL_ARGS_ASSERT_SCAN_PAT; 10245 10246 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL); 10247 if (!s) 10248 Perl_croak(aTHX_ "Search pattern not terminated"); 10249 10250 pm = (PMOP*)newPMOP(type, 0); 10251 if (PL_multi_open == '?') { 10252 /* This is the only point in the code that sets PMf_ONCE: */ 10253 pm->op_pmflags |= PMf_ONCE; 10254 10255 /* Hence it's safe to do this bit of PMOP book-keeping here, which 10256 allows us to restrict the list needed by reset to just the ?? 10257 matches. */ 10258 assert(type != OP_TRANS); 10259 if (PL_curstash) { 10260 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab); 10261 U32 elements; 10262 if (!mg) { 10263 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0, 10264 0); 10265 } 10266 elements = mg->mg_len / sizeof(PMOP**); 10267 Renewc(mg->mg_ptr, elements + 1, PMOP*, char); 10268 ((PMOP**)mg->mg_ptr) [elements++] = pm; 10269 mg->mg_len = elements * sizeof(PMOP**); 10270 PmopSTASH_set(pm,PL_curstash); 10271 } 10272 } 10273 10274 /* if qr/...(?{..}).../, then need to parse the pattern within a new 10275 * anon CV. False positives like qr/[(?{]/ are harmless */ 10276 10277 if (type == OP_QR) { 10278 STRLEN len; 10279 char *e, *p = SvPV(PL_lex_stuff, len); 10280 e = p + len; 10281 for (; p < e; p++) { 10282 if (p[0] == '(' && p[1] == '?' 10283 && (p[2] == '{' || (p[2] == '?' && p[3] == '{'))) 10284 { 10285 pm->op_pmflags |= PMf_HAS_CV; 10286 break; 10287 } 10288 } 10289 pm->op_pmflags |= PMf_IS_QR; 10290 } 10291 10292 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), 10293 &s, &charset, &x_mod_count)) 10294 {}; 10295 /* issue a warning if /c is specified,but /g is not */ 10296 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) 10297 { 10298 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), 10299 "Use of /c modifier is meaningless without /g" ); 10300 } 10301 10302 PL_lex_op = (OP*)pm; 10303 pl_yylval.ival = OP_MATCH; 10304 return s; 10305 } 10306 10307 STATIC char * 10308 S_scan_subst(pTHX_ char *start) 10309 { 10310 char *s; 10311 PMOP *pm; 10312 I32 first_start; 10313 line_t first_line; 10314 line_t linediff = 0; 10315 I32 es = 0; 10316 char charset = '\0'; /* character set modifier */ 10317 unsigned int x_mod_count = 0; 10318 char *t; 10319 10320 PERL_ARGS_ASSERT_SCAN_SUBST; 10321 10322 pl_yylval.ival = OP_NULL; 10323 10324 s = scan_str(start, TRUE, FALSE, FALSE, &t); 10325 10326 if (!s) 10327 Perl_croak(aTHX_ "Substitution pattern not terminated"); 10328 10329 s = t; 10330 10331 first_start = PL_multi_start; 10332 first_line = CopLINE(PL_curcop); 10333 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 10334 if (!s) { 10335 SvREFCNT_dec_NN(PL_lex_stuff); 10336 PL_lex_stuff = NULL; 10337 Perl_croak(aTHX_ "Substitution replacement not terminated"); 10338 } 10339 PL_multi_start = first_start; /* so whole substitution is taken together */ 10340 10341 pm = (PMOP*)newPMOP(OP_SUBST, 0); 10342 10343 10344 while (*s) { 10345 if (*s == EXEC_PAT_MOD) { 10346 s++; 10347 es++; 10348 } 10349 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), 10350 &s, &charset, &x_mod_count)) 10351 { 10352 break; 10353 } 10354 } 10355 10356 if ((pm->op_pmflags & PMf_CONTINUE)) { 10357 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); 10358 } 10359 10360 if (es) { 10361 SV * const repl = newSVpvs(""); 10362 10363 PL_multi_end = 0; 10364 pm->op_pmflags |= PMf_EVAL; 10365 for (; es > 1; es--) { 10366 sv_catpvs(repl, "eval "); 10367 } 10368 sv_catpvs(repl, "do {"); 10369 sv_catsv(repl, PL_parser->lex_sub_repl); 10370 sv_catpvs(repl, "}"); 10371 SvREFCNT_dec(PL_parser->lex_sub_repl); 10372 PL_parser->lex_sub_repl = repl; 10373 } 10374 10375 10376 linediff = CopLINE(PL_curcop) - first_line; 10377 if (linediff) 10378 CopLINE_set(PL_curcop, first_line); 10379 10380 if (linediff || es) { 10381 /* the IVX field indicates that the replacement string is a s///e; 10382 * the NVX field indicates how many src code lines the replacement 10383 * spreads over */ 10384 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV); 10385 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff; 10386 ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen = 10387 cBOOL(es); 10388 } 10389 10390 PL_lex_op = (OP*)pm; 10391 pl_yylval.ival = OP_SUBST; 10392 return s; 10393 } 10394 10395 STATIC char * 10396 S_scan_trans(pTHX_ char *start) 10397 { 10398 char* s; 10399 OP *o; 10400 U8 squash; 10401 U8 del; 10402 U8 complement; 10403 bool nondestruct = 0; 10404 char *t; 10405 10406 PERL_ARGS_ASSERT_SCAN_TRANS; 10407 10408 pl_yylval.ival = OP_NULL; 10409 10410 s = scan_str(start,FALSE,FALSE,FALSE,&t); 10411 if (!s) 10412 Perl_croak(aTHX_ "Transliteration pattern not terminated"); 10413 10414 s = t; 10415 10416 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 10417 if (!s) { 10418 SvREFCNT_dec_NN(PL_lex_stuff); 10419 PL_lex_stuff = NULL; 10420 Perl_croak(aTHX_ "Transliteration replacement not terminated"); 10421 } 10422 10423 complement = del = squash = 0; 10424 while (1) { 10425 switch (*s) { 10426 case 'c': 10427 complement = OPpTRANS_COMPLEMENT; 10428 break; 10429 case 'd': 10430 del = OPpTRANS_DELETE; 10431 break; 10432 case 's': 10433 squash = OPpTRANS_SQUASH; 10434 break; 10435 case 'r': 10436 nondestruct = 1; 10437 break; 10438 default: 10439 goto no_more; 10440 } 10441 s++; 10442 } 10443 no_more: 10444 10445 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL); 10446 o->op_private &= ~OPpTRANS_ALL; 10447 o->op_private |= del|squash|complement; 10448 10449 PL_lex_op = o; 10450 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS; 10451 10452 10453 return s; 10454 } 10455 10456 /* scan_heredoc 10457 Takes a pointer to the first < in <<FOO. 10458 Returns a pointer to the byte following <<FOO. 10459 10460 This function scans a heredoc, which involves different methods 10461 depending on whether we are in a string eval, quoted construct, etc. 10462 This is because PL_linestr could containing a single line of input, or 10463 a whole string being evalled, or the contents of the current quote- 10464 like operator. 10465 10466 The two basic methods are: 10467 - Steal lines from the input stream 10468 - Scan the heredoc in PL_linestr and remove it therefrom 10469 10470 In a file scope or filtered eval, the first method is used; in a 10471 string eval, the second. 10472 10473 In a quote-like operator, we have to choose between the two, 10474 depending on where we can find a newline. We peek into outer lex- 10475 ing scopes until we find one with a newline in it. If we reach the 10476 outermost lexing scope and it is a file, we use the stream method. 10477 Otherwise it is treated as an eval. 10478 */ 10479 10480 STATIC char * 10481 S_scan_heredoc(pTHX_ char *s) 10482 { 10483 I32 op_type = OP_SCALAR; 10484 I32 len; 10485 SV *tmpstr; 10486 char term; 10487 char *d; 10488 char *e; 10489 char *peek; 10490 char *indent = 0; 10491 I32 indent_len = 0; 10492 bool indented = FALSE; 10493 const bool infile = PL_rsfp || PL_parser->filtered; 10494 const line_t origline = CopLINE(PL_curcop); 10495 LEXSHARED *shared = PL_parser->lex_shared; 10496 10497 PERL_ARGS_ASSERT_SCAN_HEREDOC; 10498 10499 s += 2; 10500 d = PL_tokenbuf + 1; 10501 e = PL_tokenbuf + sizeof PL_tokenbuf - 1; 10502 *PL_tokenbuf = '\n'; 10503 peek = s; 10504 10505 if (*peek == '~') { 10506 indented = TRUE; 10507 peek++; s++; 10508 } 10509 10510 while (SPACE_OR_TAB(*peek)) 10511 peek++; 10512 10513 if (*peek == '`' || *peek == '\'' || *peek =='"') { 10514 s = peek; 10515 term = *s++; 10516 s = delimcpy(d, e, s, PL_bufend, term, &len); 10517 if (s == PL_bufend) 10518 Perl_croak(aTHX_ "Unterminated delimiter for here document"); 10519 d += len; 10520 s++; 10521 } 10522 else { 10523 if (*s == '\\') 10524 /* <<\FOO is equivalent to <<'FOO' */ 10525 s++, term = '\''; 10526 else 10527 term = '"'; 10528 10529 if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)) 10530 Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden"); 10531 10532 peek = s; 10533 10534 while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) { 10535 peek += UTF ? UTF8SKIP(peek) : 1; 10536 } 10537 10538 len = (peek - s >= e - d) ? (e - d) : (peek - s); 10539 Copy(s, d, len, char); 10540 s += len; 10541 d += len; 10542 } 10543 10544 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1) 10545 Perl_croak(aTHX_ "Delimiter for here document is too long"); 10546 10547 *d++ = '\n'; 10548 *d = '\0'; 10549 len = d - PL_tokenbuf; 10550 10551 #ifndef PERL_STRICT_CR 10552 d = (char *) memchr(s, '\r', PL_bufend - s); 10553 if (d) { 10554 char * const olds = s; 10555 s = d; 10556 while (s < PL_bufend) { 10557 if (*s == '\r') { 10558 *d++ = '\n'; 10559 if (*++s == '\n') 10560 s++; 10561 } 10562 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */ 10563 *d++ = *s++; 10564 s++; 10565 } 10566 else 10567 *d++ = *s++; 10568 } 10569 *d = '\0'; 10570 PL_bufend = d; 10571 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 10572 s = olds; 10573 } 10574 #endif 10575 10576 tmpstr = newSV_type(SVt_PVIV); 10577 SvGROW(tmpstr, 80); 10578 if (term == '\'') { 10579 op_type = OP_CONST; 10580 SvIV_set(tmpstr, -1); 10581 } 10582 else if (term == '`') { 10583 op_type = OP_BACKTICK; 10584 SvIV_set(tmpstr, '\\'); 10585 } 10586 10587 PL_multi_start = origline + 1 + PL_parser->herelines; 10588 PL_multi_open = PL_multi_close = '<'; 10589 10590 /* inside a string eval or quote-like operator */ 10591 if (!infile || PL_lex_inwhat) { 10592 SV *linestr; 10593 char *bufend; 10594 char * const olds = s; 10595 PERL_CONTEXT * const cx = CX_CUR(); 10596 /* These two fields are not set until an inner lexing scope is 10597 entered. But we need them set here. */ 10598 shared->ls_bufptr = s; 10599 shared->ls_linestr = PL_linestr; 10600 10601 if (PL_lex_inwhat) { 10602 /* Look for a newline. If the current buffer does not have one, 10603 peek into the line buffer of the parent lexing scope, going 10604 up as many levels as necessary to find one with a newline 10605 after bufptr. 10606 */ 10607 while (!(s = (char *)memchr( 10608 (void *)shared->ls_bufptr, '\n', 10609 SvEND(shared->ls_linestr)-shared->ls_bufptr 10610 ))) 10611 { 10612 shared = shared->ls_prev; 10613 /* shared is only null if we have gone beyond the outermost 10614 lexing scope. In a file, we will have broken out of the 10615 loop in the previous iteration. In an eval, the string buf- 10616 fer ends with "\n;", so the while condition above will have 10617 evaluated to false. So shared can never be null. Or so you 10618 might think. Odd syntax errors like s;@{<<; can gobble up 10619 the implicit semicolon at the end of a flie, causing the 10620 file handle to be closed even when we are not in a string 10621 eval. So shared may be null in that case. 10622 (Closing '>>}' here to balance the earlier open brace for 10623 editors that look for matched pairs.) */ 10624 if (UNLIKELY(!shared)) 10625 goto interminable; 10626 /* A LEXSHARED struct with a null ls_prev pointer is the outer- 10627 most lexing scope. In a file, shared->ls_linestr at that 10628 level is just one line, so there is no body to steal. */ 10629 if (infile && !shared->ls_prev) { 10630 s = olds; 10631 goto streaming; 10632 } 10633 } 10634 } 10635 else { /* eval or we've already hit EOF */ 10636 s = (char*)memchr((void*)s, '\n', PL_bufend - s); 10637 if (!s) 10638 goto interminable; 10639 } 10640 10641 linestr = shared->ls_linestr; 10642 bufend = SvEND(linestr); 10643 d = s; 10644 if (indented) { 10645 char *myolds = s; 10646 10647 while (s < bufend - len + 1) { 10648 if (*s++ == '\n') 10649 ++PL_parser->herelines; 10650 10651 if (memEQ(s, PL_tokenbuf + 1, len - 1)) { 10652 char *backup = s; 10653 indent_len = 0; 10654 10655 /* Only valid if it's preceded by whitespace only */ 10656 while (backup != myolds && --backup >= myolds) { 10657 if (! SPACE_OR_TAB(*backup)) { 10658 break; 10659 } 10660 indent_len++; 10661 } 10662 10663 /* No whitespace or all! */ 10664 if (backup == s || *backup == '\n') { 10665 Newx(indent, indent_len + 1, char); 10666 memcpy(indent, backup + 1, indent_len); 10667 indent[indent_len] = 0; 10668 s--; /* before our delimiter */ 10669 PL_parser->herelines--; /* this line doesn't count */ 10670 break; 10671 } 10672 } 10673 } 10674 } 10675 else { 10676 while (s < bufend - len + 1 10677 && memNE(s,PL_tokenbuf,len) ) 10678 { 10679 if (*s++ == '\n') 10680 ++PL_parser->herelines; 10681 } 10682 } 10683 10684 if (s >= bufend - len + 1) { 10685 goto interminable; 10686 } 10687 10688 sv_setpvn(tmpstr,d+1,s-d); 10689 s += len - 1; 10690 /* the preceding stmt passes a newline */ 10691 PL_parser->herelines++; 10692 10693 /* s now points to the newline after the heredoc terminator. 10694 d points to the newline before the body of the heredoc. 10695 */ 10696 10697 /* We are going to modify linestr in place here, so set 10698 aside copies of the string if necessary for re-evals or 10699 (caller $n)[6]. */ 10700 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we 10701 check shared->re_eval_str. */ 10702 if (shared->re_eval_start || shared->re_eval_str) { 10703 /* Set aside the rest of the regexp */ 10704 if (!shared->re_eval_str) 10705 shared->re_eval_str = 10706 newSVpvn(shared->re_eval_start, 10707 bufend - shared->re_eval_start); 10708 shared->re_eval_start -= s-d; 10709 } 10710 10711 if (cxstack_ix >= 0 10712 && CxTYPE(cx) == CXt_EVAL 10713 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL 10714 && cx->blk_eval.cur_text == linestr) 10715 { 10716 cx->blk_eval.cur_text = newSVsv(linestr); 10717 cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */ 10718 } 10719 10720 /* Copy everything from s onwards back to d. */ 10721 Move(s,d,bufend-s + 1,char); 10722 SvCUR_set(linestr, SvCUR(linestr) - (s-d)); 10723 /* Setting PL_bufend only applies when we have not dug deeper 10724 into other scopes, because sublex_done sets PL_bufend to 10725 SvEND(PL_linestr). */ 10726 if (shared == PL_parser->lex_shared) 10727 PL_bufend = SvEND(linestr); 10728 s = olds; 10729 } 10730 else { 10731 SV *linestr_save; 10732 char *oldbufptr_save; 10733 char *oldoldbufptr_save; 10734 streaming: 10735 SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */ 10736 term = PL_tokenbuf[1]; 10737 len--; 10738 linestr_save = PL_linestr; /* must restore this afterwards */ 10739 d = s; /* and this */ 10740 oldbufptr_save = PL_oldbufptr; 10741 oldoldbufptr_save = PL_oldoldbufptr; 10742 PL_linestr = newSVpvs(""); 10743 PL_bufend = SvPVX(PL_linestr); 10744 10745 while (1) { 10746 PL_bufptr = PL_bufend; 10747 CopLINE_set(PL_curcop, 10748 origline + 1 + PL_parser->herelines); 10749 10750 if ( !lex_next_chunk(LEX_NO_TERM) 10751 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) 10752 { 10753 /* Simply freeing linestr_save might seem simpler here, as it 10754 does not matter what PL_linestr points to, since we are 10755 about to croak; but in a quote-like op, linestr_save 10756 will have been prospectively freed already, via 10757 SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to 10758 restore PL_linestr. */ 10759 SvREFCNT_dec_NN(PL_linestr); 10760 PL_linestr = linestr_save; 10761 PL_oldbufptr = oldbufptr_save; 10762 PL_oldoldbufptr = oldoldbufptr_save; 10763 goto interminable; 10764 } 10765 10766 CopLINE_set(PL_curcop, origline); 10767 10768 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') { 10769 s = lex_grow_linestr(SvLEN(PL_linestr) + 3); 10770 /* ^That should be enough to avoid this needing to grow: */ 10771 sv_catpvs(PL_linestr, "\n\0"); 10772 assert(s == SvPVX(PL_linestr)); 10773 PL_bufend = SvEND(PL_linestr); 10774 } 10775 10776 s = PL_bufptr; 10777 PL_parser->herelines++; 10778 PL_last_lop = PL_last_uni = NULL; 10779 10780 #ifndef PERL_STRICT_CR 10781 if (PL_bufend - PL_linestart >= 2) { 10782 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') 10783 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r')) 10784 { 10785 PL_bufend[-2] = '\n'; 10786 PL_bufend--; 10787 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 10788 } 10789 else if (PL_bufend[-1] == '\r') 10790 PL_bufend[-1] = '\n'; 10791 } 10792 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') 10793 PL_bufend[-1] = '\n'; 10794 #endif 10795 10796 if (indented && (PL_bufend-s) >= len) { 10797 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len)); 10798 10799 if (found) { 10800 char *backup = found; 10801 indent_len = 0; 10802 10803 /* Only valid if it's preceded by whitespace only */ 10804 while (backup != s && --backup >= s) { 10805 if (! SPACE_OR_TAB(*backup)) { 10806 break; 10807 } 10808 indent_len++; 10809 } 10810 10811 /* All whitespace or none! */ 10812 if (backup == found || SPACE_OR_TAB(*backup)) { 10813 Newx(indent, indent_len + 1, char); 10814 memcpy(indent, backup, indent_len); 10815 indent[indent_len] = 0; 10816 SvREFCNT_dec(PL_linestr); 10817 PL_linestr = linestr_save; 10818 PL_linestart = SvPVX(linestr_save); 10819 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 10820 PL_oldbufptr = oldbufptr_save; 10821 PL_oldoldbufptr = oldoldbufptr_save; 10822 s = d; 10823 break; 10824 } 10825 } 10826 10827 /* Didn't find it */ 10828 sv_catsv(tmpstr,PL_linestr); 10829 } 10830 else { 10831 if (*s == term && PL_bufend-s >= len 10832 && memEQ(s,PL_tokenbuf + 1,len)) 10833 { 10834 SvREFCNT_dec(PL_linestr); 10835 PL_linestr = linestr_save; 10836 PL_linestart = SvPVX(linestr_save); 10837 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 10838 PL_oldbufptr = oldbufptr_save; 10839 PL_oldoldbufptr = oldoldbufptr_save; 10840 s = d; 10841 break; 10842 } 10843 else { 10844 sv_catsv(tmpstr,PL_linestr); 10845 } 10846 } 10847 } /* while (1) */ 10848 } 10849 10850 PL_multi_end = origline + PL_parser->herelines; 10851 10852 if (indented && indent) { 10853 STRLEN linecount = 1; 10854 STRLEN herelen = SvCUR(tmpstr); 10855 char *ss = SvPVX(tmpstr); 10856 char *se = ss + herelen; 10857 SV *newstr = newSV(herelen+1); 10858 SvPOK_on(newstr); 10859 10860 /* Trim leading whitespace */ 10861 while (ss < se) { 10862 /* newline only? Copy and move on */ 10863 if (*ss == '\n') { 10864 sv_catpvs(newstr,"\n"); 10865 ss++; 10866 linecount++; 10867 10868 /* Found our indentation? Strip it */ 10869 } 10870 else if (se - ss >= indent_len 10871 && memEQ(ss, indent, indent_len)) 10872 { 10873 STRLEN le = 0; 10874 ss += indent_len; 10875 10876 while ((ss + le) < se && *(ss + le) != '\n') 10877 le++; 10878 10879 sv_catpvn(newstr, ss, le); 10880 ss += le; 10881 10882 /* Line doesn't begin with our indentation? Croak */ 10883 } 10884 else { 10885 Safefree(indent); 10886 Perl_croak(aTHX_ 10887 "Indentation on line %d of here-doc doesn't match delimiter", 10888 (int)linecount 10889 ); 10890 } 10891 } /* while */ 10892 10893 /* avoid sv_setsv() as we dont wan't to COW here */ 10894 sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr)); 10895 Safefree(indent); 10896 SvREFCNT_dec_NN(newstr); 10897 } 10898 10899 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) { 10900 SvPV_shrink_to_cur(tmpstr); 10901 } 10902 10903 if (!IN_BYTES) { 10904 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) 10905 SvUTF8_on(tmpstr); 10906 } 10907 10908 PL_lex_stuff = tmpstr; 10909 pl_yylval.ival = op_type; 10910 return s; 10911 10912 interminable: 10913 if (indent) 10914 Safefree(indent); 10915 SvREFCNT_dec(tmpstr); 10916 CopLINE_set(PL_curcop, origline); 10917 missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1); 10918 } 10919 10920 10921 /* scan_inputsymbol 10922 takes: position of first '<' in input buffer 10923 returns: position of first char following the matching '>' in 10924 input buffer 10925 side-effects: pl_yylval and lex_op are set. 10926 10927 This code handles: 10928 10929 <> read from ARGV 10930 <<>> read from ARGV without magic open 10931 <FH> read from filehandle 10932 <pkg::FH> read from package qualified filehandle 10933 <pkg'FH> read from package qualified filehandle 10934 <$fh> read from filehandle in $fh 10935 <*.h> filename glob 10936 10937 */ 10938 10939 STATIC char * 10940 S_scan_inputsymbol(pTHX_ char *start) 10941 { 10942 char *s = start; /* current position in buffer */ 10943 char *end; 10944 I32 len; 10945 bool nomagicopen = FALSE; 10946 char *d = PL_tokenbuf; /* start of temp holding space */ 10947 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */ 10948 10949 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL; 10950 10951 end = (char *) memchr(s, '\n', PL_bufend - s); 10952 if (!end) 10953 end = PL_bufend; 10954 if (s[1] == '<' && s[2] == '>' && s[3] == '>') { 10955 nomagicopen = TRUE; 10956 *d = '\0'; 10957 len = 0; 10958 s += 3; 10959 } 10960 else 10961 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */ 10962 10963 /* die if we didn't have space for the contents of the <>, 10964 or if it didn't end, or if we see a newline 10965 */ 10966 10967 if (len >= (I32)sizeof PL_tokenbuf) 10968 Perl_croak(aTHX_ "Excessively long <> operator"); 10969 if (s >= end) 10970 Perl_croak(aTHX_ "Unterminated <> operator"); 10971 10972 s++; 10973 10974 /* check for <$fh> 10975 Remember, only scalar variables are interpreted as filehandles by 10976 this code. Anything more complex (e.g., <$fh{$num}>) will be 10977 treated as a glob() call. 10978 This code makes use of the fact that except for the $ at the front, 10979 a scalar variable and a filehandle look the same. 10980 */ 10981 if (*d == '$' && d[1]) d++; 10982 10983 /* allow <Pkg'VALUE> or <Pkg::VALUE> */ 10984 while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') { 10985 d += UTF ? UTF8SKIP(d) : 1; 10986 } 10987 10988 /* If we've tried to read what we allow filehandles to look like, and 10989 there's still text left, then it must be a glob() and not a getline. 10990 Use scan_str to pull out the stuff between the <> and treat it 10991 as nothing more than a string. 10992 */ 10993 10994 if (d - PL_tokenbuf != len) { 10995 pl_yylval.ival = OP_GLOB; 10996 s = scan_str(start,FALSE,FALSE,FALSE,NULL); 10997 if (!s) 10998 Perl_croak(aTHX_ "Glob not terminated"); 10999 return s; 11000 } 11001 else { 11002 bool readline_overriden = FALSE; 11003 GV *gv_readline; 11004 /* we're in a filehandle read situation */ 11005 d = PL_tokenbuf; 11006 11007 /* turn <> into <ARGV> */ 11008 if (!len) 11009 Copy("ARGV",d,5,char); 11010 11011 /* Check whether readline() is overriden */ 11012 if ((gv_readline = gv_override("readline",8))) 11013 readline_overriden = TRUE; 11014 11015 /* if <$fh>, create the ops to turn the variable into a 11016 filehandle 11017 */ 11018 if (*d == '$') { 11019 /* try to find it in the pad for this block, otherwise find 11020 add symbol table ops 11021 */ 11022 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0); 11023 if (tmp != NOT_IN_PAD) { 11024 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 11025 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 11026 HEK * const stashname = HvNAME_HEK(stash); 11027 SV * const sym = sv_2mortal(newSVhek(stashname)); 11028 sv_catpvs(sym, "::"); 11029 sv_catpv(sym, d+1); 11030 d = SvPVX(sym); 11031 goto intro_sym; 11032 } 11033 else { 11034 OP * const o = newOP(OP_PADSV, 0); 11035 o->op_targ = tmp; 11036 PL_lex_op = readline_overriden 11037 ? newUNOP(OP_ENTERSUB, OPf_STACKED, 11038 op_append_elem(OP_LIST, o, 11039 newCVREF(0, newGVOP(OP_GV,0,gv_readline)))) 11040 : newUNOP(OP_READLINE, 0, o); 11041 } 11042 } 11043 else { 11044 GV *gv; 11045 ++d; 11046 intro_sym: 11047 gv = gv_fetchpv(d, 11048 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ), 11049 SVt_PV); 11050 PL_lex_op = readline_overriden 11051 ? newUNOP(OP_ENTERSUB, OPf_STACKED, 11052 op_append_elem(OP_LIST, 11053 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)), 11054 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 11055 : newUNOP(OP_READLINE, 0, 11056 newUNOP(OP_RV2SV, 0, 11057 newGVOP(OP_GV, 0, gv))); 11058 } 11059 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */ 11060 pl_yylval.ival = OP_NULL; 11061 } 11062 11063 /* If it's none of the above, it must be a literal filehandle 11064 (<Foo::BAR> or <FOO>) so build a simple readline OP */ 11065 else { 11066 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO); 11067 PL_lex_op = readline_overriden 11068 ? newUNOP(OP_ENTERSUB, OPf_STACKED, 11069 op_append_elem(OP_LIST, 11070 newGVOP(OP_GV, 0, gv), 11071 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 11072 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv)); 11073 pl_yylval.ival = OP_NULL; 11074 } 11075 } 11076 11077 return s; 11078 } 11079 11080 11081 /* scan_str 11082 takes: 11083 start position in buffer 11084 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but 11085 only if they are of the open/close form 11086 keep_delims preserve the delimiters around the string 11087 re_reparse compiling a run-time /(?{})/: 11088 collapse // to /, and skip encoding src 11089 delimp if non-null, this is set to the position of 11090 the closing delimiter, or just after it if 11091 the closing and opening delimiters differ 11092 (i.e., the opening delimiter of a substitu- 11093 tion replacement) 11094 returns: position to continue reading from buffer 11095 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and 11096 updates the read buffer. 11097 11098 This subroutine pulls a string out of the input. It is called for: 11099 q single quotes q(literal text) 11100 ' single quotes 'literal text' 11101 qq double quotes qq(interpolate $here please) 11102 " double quotes "interpolate $here please" 11103 qx backticks qx(/bin/ls -l) 11104 ` backticks `/bin/ls -l` 11105 qw quote words @EXPORT_OK = qw( func() $spam ) 11106 m// regexp match m/this/ 11107 s/// regexp substitute s/this/that/ 11108 tr/// string transliterate tr/this/that/ 11109 y/// string transliterate y/this/that/ 11110 ($*@) sub prototypes sub foo ($) 11111 (stuff) sub attr parameters sub foo : attr(stuff) 11112 <> readline or globs <FOO>, <>, <$fh>, or <*.c> 11113 11114 In most of these cases (all but <>, patterns and transliterate) 11115 yylex() calls scan_str(). m// makes yylex() call scan_pat() which 11116 calls scan_str(). s/// makes yylex() call scan_subst() which calls 11117 scan_str(). tr/// and y/// make yylex() call scan_trans() which 11118 calls scan_str(). 11119 11120 It skips whitespace before the string starts, and treats the first 11121 character as the delimiter. If the delimiter is one of ([{< then 11122 the corresponding "close" character )]}> is used as the closing 11123 delimiter. It allows quoting of delimiters, and if the string has 11124 balanced delimiters ([{<>}]) it allows nesting. 11125 11126 On success, the SV with the resulting string is put into lex_stuff or, 11127 if that is already non-NULL, into lex_repl. The second case occurs only 11128 when parsing the RHS of the special constructs s/// and tr/// (y///). 11129 For convenience, the terminating delimiter character is stuffed into 11130 SvIVX of the SV. 11131 */ 11132 11133 char * 11134 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse, 11135 char **delimp 11136 ) 11137 { 11138 SV *sv; /* scalar value: string */ 11139 const char *tmps; /* temp string, used for delimiter matching */ 11140 char *s = start; /* current position in the buffer */ 11141 char term; /* terminating character */ 11142 char *to; /* current position in the sv's data */ 11143 I32 brackets = 1; /* bracket nesting level */ 11144 bool d_is_utf8 = FALSE; /* is there any utf8 content? */ 11145 IV termcode; /* terminating char. code */ 11146 U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */ 11147 STRLEN termlen; /* length of terminating string */ 11148 line_t herelines; 11149 11150 /* The delimiters that have a mirror-image closing one */ 11151 const char * opening_delims = "([{<"; 11152 const char * closing_delims = ")]}>"; 11153 11154 /* The only non-UTF character that isn't a stand alone grapheme is 11155 * white-space, hence can't be a delimiter. */ 11156 const char * non_grapheme_msg = "Use of unassigned code point or" 11157 " non-standalone grapheme for a delimiter" 11158 " is not allowed"; 11159 PERL_ARGS_ASSERT_SCAN_STR; 11160 11161 /* skip space before the delimiter */ 11162 if (isSPACE(*s)) { 11163 s = skipspace(s); 11164 } 11165 11166 /* mark where we are, in case we need to report errors */ 11167 CLINE; 11168 11169 /* after skipping whitespace, the next character is the terminator */ 11170 term = *s; 11171 if (!UTF || UTF8_IS_INVARIANT(term)) { 11172 termcode = termstr[0] = term; 11173 termlen = 1; 11174 } 11175 else { 11176 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen); 11177 if (UTF && UNLIKELY(! is_grapheme((U8 *) start, 11178 (U8 *) s, 11179 (U8 *) PL_bufend, 11180 termcode))) 11181 { 11182 yyerror(non_grapheme_msg); 11183 } 11184 11185 Copy(s, termstr, termlen, U8); 11186 } 11187 11188 /* mark where we are */ 11189 PL_multi_start = CopLINE(PL_curcop); 11190 PL_multi_open = termcode; 11191 herelines = PL_parser->herelines; 11192 11193 /* If the delimiter has a mirror-image closing one, get it */ 11194 if (term && (tmps = strchr(opening_delims, term))) { 11195 termcode = termstr[0] = term = closing_delims[tmps - opening_delims]; 11196 } 11197 11198 PL_multi_close = termcode; 11199 11200 if (PL_multi_open == PL_multi_close) { 11201 keep_bracketed_quoted = FALSE; 11202 } 11203 11204 /* create a new SV to hold the contents. 79 is the SV's initial length. 11205 What a random number. */ 11206 sv = newSV_type(SVt_PVIV); 11207 SvGROW(sv, 80); 11208 SvIV_set(sv, termcode); 11209 (void)SvPOK_only(sv); /* validate pointer */ 11210 11211 /* move past delimiter and try to read a complete string */ 11212 if (keep_delims) 11213 sv_catpvn(sv, s, termlen); 11214 s += termlen; 11215 for (;;) { 11216 /* extend sv if need be */ 11217 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); 11218 /* set 'to' to the next character in the sv's string */ 11219 to = SvPVX(sv)+SvCUR(sv); 11220 11221 /* if open delimiter is the close delimiter read unbridle */ 11222 if (PL_multi_open == PL_multi_close) { 11223 for (; s < PL_bufend; s++,to++) { 11224 /* embedded newlines increment the current line number */ 11225 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 11226 COPLINE_INC_WITH_HERELINES; 11227 /* handle quoted delimiters */ 11228 if (*s == '\\' && s+1 < PL_bufend && term != '\\') { 11229 if (!keep_bracketed_quoted 11230 && (s[1] == term 11231 || (re_reparse && s[1] == '\\')) 11232 ) 11233 s++; 11234 else /* any other quotes are simply copied straight through */ 11235 *to++ = *s++; 11236 } 11237 /* terminate when run out of buffer (the for() condition), or 11238 have found the terminator */ 11239 else if (*s == term) { /* First byte of terminator matches */ 11240 if (termlen == 1) /* If is the only byte, are done */ 11241 break; 11242 11243 /* If the remainder of the terminator matches, also are 11244 * done, after checking that is a separate grapheme */ 11245 if ( s + termlen <= PL_bufend 11246 && memEQ(s + 1, (char*)termstr + 1, termlen - 1)) 11247 { 11248 if ( UTF 11249 && UNLIKELY(! is_grapheme((U8 *) start, 11250 (U8 *) s, 11251 (U8 *) PL_bufend, 11252 termcode))) 11253 { 11254 yyerror(non_grapheme_msg); 11255 } 11256 break; 11257 } 11258 } 11259 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) { 11260 d_is_utf8 = TRUE; 11261 } 11262 11263 *to = *s; 11264 } 11265 } 11266 11267 /* if the terminator isn't the same as the start character (e.g., 11268 matched brackets), we have to allow more in the quoting, and 11269 be prepared for nested brackets. 11270 */ 11271 else { 11272 /* read until we run out of string, or we find the terminator */ 11273 for (; s < PL_bufend; s++,to++) { 11274 /* embedded newlines increment the line count */ 11275 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 11276 COPLINE_INC_WITH_HERELINES; 11277 /* backslashes can escape the open or closing characters */ 11278 if (*s == '\\' && s+1 < PL_bufend) { 11279 if (!keep_bracketed_quoted 11280 && ( ((UV)s[1] == PL_multi_open) 11281 || ((UV)s[1] == PL_multi_close) )) 11282 { 11283 s++; 11284 } 11285 else 11286 *to++ = *s++; 11287 } 11288 /* allow nested opens and closes */ 11289 else if ((UV)*s == PL_multi_close && --brackets <= 0) 11290 break; 11291 else if ((UV)*s == PL_multi_open) 11292 brackets++; 11293 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) 11294 d_is_utf8 = TRUE; 11295 *to = *s; 11296 } 11297 } 11298 /* terminate the copied string and update the sv's end-of-string */ 11299 *to = '\0'; 11300 SvCUR_set(sv, to - SvPVX_const(sv)); 11301 11302 /* 11303 * this next chunk reads more into the buffer if we're not done yet 11304 */ 11305 11306 if (s < PL_bufend) 11307 break; /* handle case where we are done yet :-) */ 11308 11309 #ifndef PERL_STRICT_CR 11310 if (to - SvPVX_const(sv) >= 2) { 11311 if ( (to[-2] == '\r' && to[-1] == '\n') 11312 || (to[-2] == '\n' && to[-1] == '\r')) 11313 { 11314 to[-2] = '\n'; 11315 to--; 11316 SvCUR_set(sv, to - SvPVX_const(sv)); 11317 } 11318 else if (to[-1] == '\r') 11319 to[-1] = '\n'; 11320 } 11321 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r') 11322 to[-1] = '\n'; 11323 #endif 11324 11325 /* if we're out of file, or a read fails, bail and reset the current 11326 line marker so we can report where the unterminated string began 11327 */ 11328 COPLINE_INC_WITH_HERELINES; 11329 PL_bufptr = PL_bufend; 11330 if (!lex_next_chunk(0)) { 11331 sv_free(sv); 11332 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 11333 return NULL; 11334 } 11335 s = start = PL_bufptr; 11336 } 11337 11338 /* at this point, we have successfully read the delimited string */ 11339 11340 if (keep_delims) 11341 sv_catpvn(sv, s, termlen); 11342 s += termlen; 11343 11344 if (d_is_utf8) 11345 SvUTF8_on(sv); 11346 11347 PL_multi_end = CopLINE(PL_curcop); 11348 CopLINE_set(PL_curcop, PL_multi_start); 11349 PL_parser->herelines = herelines; 11350 11351 /* if we allocated too much space, give some back */ 11352 if (SvCUR(sv) + 5 < SvLEN(sv)) { 11353 SvLEN_set(sv, SvCUR(sv) + 1); 11354 SvPV_renew(sv, SvLEN(sv)); 11355 } 11356 11357 /* decide whether this is the first or second quoted string we've read 11358 for this op 11359 */ 11360 11361 if (PL_lex_stuff) 11362 PL_parser->lex_sub_repl = sv; 11363 else 11364 PL_lex_stuff = sv; 11365 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s; 11366 return s; 11367 } 11368 11369 /* 11370 scan_num 11371 takes: pointer to position in buffer 11372 returns: pointer to new position in buffer 11373 side-effects: builds ops for the constant in pl_yylval.op 11374 11375 Read a number in any of the formats that Perl accepts: 11376 11377 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12. 11378 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34 11379 0b[01](_?[01])* binary integers 11380 0[0-7](_?[0-7])* octal integers 11381 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers 11382 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats 11383 11384 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the 11385 thing it reads. 11386 11387 If it reads a number without a decimal point or an exponent, it will 11388 try converting the number to an integer and see if it can do so 11389 without loss of precision. 11390 */ 11391 11392 char * 11393 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) 11394 { 11395 const char *s = start; /* current position in buffer */ 11396 char *d; /* destination in temp buffer */ 11397 char *e; /* end of temp buffer */ 11398 NV nv; /* number read, as a double */ 11399 SV *sv = NULL; /* place to put the converted number */ 11400 bool floatit; /* boolean: int or float? */ 11401 const char *lastub = NULL; /* position of last underbar */ 11402 static const char* const number_too_long = "Number too long"; 11403 bool warned_about_underscore = 0; 11404 I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */ 11405 #define WARN_ABOUT_UNDERSCORE() \ 11406 do { \ 11407 if (!warned_about_underscore) { \ 11408 warned_about_underscore = 1; \ 11409 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \ 11410 "Misplaced _ in number"); \ 11411 } \ 11412 } while(0) 11413 /* Hexadecimal floating point. 11414 * 11415 * In many places (where we have quads and NV is IEEE 754 double) 11416 * we can fit the mantissa bits of a NV into an unsigned quad. 11417 * (Note that UVs might not be quads even when we have quads.) 11418 * This will not work everywhere, though (either no quads, or 11419 * using long doubles), in which case we have to resort to NV, 11420 * which will probably mean horrible loss of precision due to 11421 * multiple fp operations. */ 11422 bool hexfp = FALSE; 11423 int total_bits = 0; 11424 int significant_bits = 0; 11425 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t) 11426 # define HEXFP_UQUAD 11427 Uquad_t hexfp_uquad = 0; 11428 int hexfp_frac_bits = 0; 11429 #else 11430 # define HEXFP_NV 11431 NV hexfp_nv = 0.0; 11432 #endif 11433 NV hexfp_mult = 1.0; 11434 UV high_non_zero = 0; /* highest digit */ 11435 int non_zero_integer_digits = 0; 11436 11437 PERL_ARGS_ASSERT_SCAN_NUM; 11438 11439 /* We use the first character to decide what type of number this is */ 11440 11441 switch (*s) { 11442 default: 11443 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s); 11444 11445 /* if it starts with a 0, it could be an octal number, a decimal in 11446 0.13 disguise, or a hexadecimal number, or a binary number. */ 11447 case '0': 11448 { 11449 /* variables: 11450 u holds the "number so far" 11451 overflowed was the number more than we can hold? 11452 11453 Shift is used when we add a digit. It also serves as an "are 11454 we in octal/hex/binary?" indicator to disallow hex characters 11455 when in octal mode. 11456 */ 11457 NV n = 0.0; 11458 UV u = 0; 11459 bool overflowed = FALSE; 11460 bool just_zero = TRUE; /* just plain 0 or binary number? */ 11461 bool has_digs = FALSE; 11462 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 }; 11463 static const char* const bases[5] = 11464 { "", "binary", "", "octal", "hexadecimal" }; 11465 static const char* const Bases[5] = 11466 { "", "Binary", "", "Octal", "Hexadecimal" }; 11467 static const char* const maxima[5] = 11468 { "", 11469 "0b11111111111111111111111111111111", 11470 "", 11471 "037777777777", 11472 "0xffffffff" }; 11473 const char *base, *Base, *max; 11474 11475 /* check for hex */ 11476 if (isALPHA_FOLD_EQ(s[1], 'x')) { 11477 shift = 4; 11478 s += 2; 11479 just_zero = FALSE; 11480 } else if (isALPHA_FOLD_EQ(s[1], 'b')) { 11481 shift = 1; 11482 s += 2; 11483 just_zero = FALSE; 11484 } 11485 /* check for a decimal in disguise */ 11486 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e')) 11487 goto decimal; 11488 /* so it must be octal */ 11489 else { 11490 shift = 3; 11491 s++; 11492 } 11493 11494 if (*s == '_') { 11495 WARN_ABOUT_UNDERSCORE(); 11496 lastub = s++; 11497 } 11498 11499 base = bases[shift]; 11500 Base = Bases[shift]; 11501 max = maxima[shift]; 11502 11503 /* read the rest of the number */ 11504 for (;;) { 11505 /* x is used in the overflow test, 11506 b is the digit we're adding on. */ 11507 UV x, b; 11508 11509 switch (*s) { 11510 11511 /* if we don't mention it, we're done */ 11512 default: 11513 goto out; 11514 11515 /* _ are ignored -- but warned about if consecutive */ 11516 case '_': 11517 if (lastub && s == lastub + 1) 11518 WARN_ABOUT_UNDERSCORE(); 11519 lastub = s++; 11520 break; 11521 11522 /* 8 and 9 are not octal */ 11523 case '8': case '9': 11524 if (shift == 3) 11525 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s)); 11526 /* FALLTHROUGH */ 11527 11528 /* octal digits */ 11529 case '2': case '3': case '4': 11530 case '5': case '6': case '7': 11531 if (shift == 1) 11532 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s)); 11533 /* FALLTHROUGH */ 11534 11535 case '0': case '1': 11536 b = *s++ & 15; /* ASCII digit -> value of digit */ 11537 goto digit; 11538 11539 /* hex digits */ 11540 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 11541 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 11542 /* make sure they said 0x */ 11543 if (shift != 4) 11544 goto out; 11545 b = (*s++ & 7) + 9; 11546 11547 /* Prepare to put the digit we have onto the end 11548 of the number so far. We check for overflows. 11549 */ 11550 11551 digit: 11552 just_zero = FALSE; 11553 has_digs = TRUE; 11554 if (!overflowed) { 11555 assert(shift >= 0); 11556 x = u << shift; /* make room for the digit */ 11557 11558 total_bits += shift; 11559 11560 if ((x >> shift) != u 11561 && !(PL_hints & HINT_NEW_BINARY)) { 11562 overflowed = TRUE; 11563 n = (NV) u; 11564 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 11565 "Integer overflow in %s number", 11566 base); 11567 } else 11568 u = x | b; /* add the digit to the end */ 11569 } 11570 if (overflowed) { 11571 n *= nvshift[shift]; 11572 /* If an NV has not enough bits in its 11573 * mantissa to represent an UV this summing of 11574 * small low-order numbers is a waste of time 11575 * (because the NV cannot preserve the 11576 * low-order bits anyway): we could just 11577 * remember when did we overflow and in the 11578 * end just multiply n by the right 11579 * amount. */ 11580 n += (NV) b; 11581 } 11582 11583 if (high_non_zero == 0 && b > 0) 11584 high_non_zero = b; 11585 11586 if (high_non_zero) 11587 non_zero_integer_digits++; 11588 11589 /* this could be hexfp, but peek ahead 11590 * to avoid matching ".." */ 11591 if (UNLIKELY(HEXFP_PEEK(s))) { 11592 goto out; 11593 } 11594 11595 break; 11596 } 11597 } 11598 11599 /* if we get here, we had success: make a scalar value from 11600 the number. 11601 */ 11602 out: 11603 11604 /* final misplaced underbar check */ 11605 if (s[-1] == '_') 11606 WARN_ABOUT_UNDERSCORE(); 11607 11608 if (UNLIKELY(HEXFP_PEEK(s))) { 11609 /* Do sloppy (on the underbars) but quick detection 11610 * (and value construction) for hexfp, the decimal 11611 * detection will shortly be more thorough with the 11612 * underbar checks. */ 11613 const char* h = s; 11614 significant_bits = non_zero_integer_digits * shift; 11615 #ifdef HEXFP_UQUAD 11616 hexfp_uquad = u; 11617 #else /* HEXFP_NV */ 11618 hexfp_nv = u; 11619 #endif 11620 /* Ignore the leading zero bits of 11621 * the high (first) non-zero digit. */ 11622 if (high_non_zero) { 11623 if (high_non_zero < 0x8) 11624 significant_bits--; 11625 if (high_non_zero < 0x4) 11626 significant_bits--; 11627 if (high_non_zero < 0x2) 11628 significant_bits--; 11629 } 11630 11631 if (*h == '.') { 11632 #ifdef HEXFP_NV 11633 NV nv_mult = 1.0; 11634 #endif 11635 bool accumulate = TRUE; 11636 U8 b; 11637 int lim = 1 << shift; 11638 for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) || 11639 *h == '_'); h++) { 11640 if (isXDIGIT(*h)) { 11641 significant_bits += shift; 11642 #ifdef HEXFP_UQUAD 11643 if (accumulate) { 11644 if (significant_bits < NV_MANT_DIG) { 11645 /* We are in the long "run" of xdigits, 11646 * accumulate the full four bits. */ 11647 assert(shift >= 0); 11648 hexfp_uquad <<= shift; 11649 hexfp_uquad |= b; 11650 hexfp_frac_bits += shift; 11651 } else if (significant_bits - shift < NV_MANT_DIG) { 11652 /* We are at a hexdigit either at, 11653 * or straddling, the edge of mantissa. 11654 * We will try grabbing as many as 11655 * possible bits. */ 11656 int tail = 11657 significant_bits - NV_MANT_DIG; 11658 if (tail <= 0) 11659 tail += shift; 11660 assert(tail >= 0); 11661 hexfp_uquad <<= tail; 11662 assert((shift - tail) >= 0); 11663 hexfp_uquad |= b >> (shift - tail); 11664 hexfp_frac_bits += tail; 11665 11666 /* Ignore the trailing zero bits 11667 * of the last non-zero xdigit. 11668 * 11669 * The assumption here is that if 11670 * one has input of e.g. the xdigit 11671 * eight (0x8), there is only one 11672 * bit being input, not the full 11673 * four bits. Conversely, if one 11674 * specifies a zero xdigit, the 11675 * assumption is that one really 11676 * wants all those bits to be zero. */ 11677 if (b) { 11678 if ((b & 0x1) == 0x0) { 11679 significant_bits--; 11680 if ((b & 0x2) == 0x0) { 11681 significant_bits--; 11682 if ((b & 0x4) == 0x0) { 11683 significant_bits--; 11684 } 11685 } 11686 } 11687 } 11688 11689 accumulate = FALSE; 11690 } 11691 } else { 11692 /* Keep skipping the xdigits, and 11693 * accumulating the significant bits, 11694 * but do not shift the uquad 11695 * (which would catastrophically drop 11696 * high-order bits) or accumulate the 11697 * xdigits anymore. */ 11698 } 11699 #else /* HEXFP_NV */ 11700 if (accumulate) { 11701 nv_mult /= nvshift[shift]; 11702 if (nv_mult > 0.0) 11703 hexfp_nv += b * nv_mult; 11704 else 11705 accumulate = FALSE; 11706 } 11707 #endif 11708 } 11709 if (significant_bits >= NV_MANT_DIG) 11710 accumulate = FALSE; 11711 } 11712 } 11713 11714 if ((total_bits > 0 || significant_bits > 0) && 11715 isALPHA_FOLD_EQ(*h, 'p')) { 11716 bool negexp = FALSE; 11717 h++; 11718 if (*h == '+') 11719 h++; 11720 else if (*h == '-') { 11721 negexp = TRUE; 11722 h++; 11723 } 11724 if (isDIGIT(*h)) { 11725 I32 hexfp_exp = 0; 11726 while (isDIGIT(*h) || *h == '_') { 11727 if (isDIGIT(*h)) { 11728 hexfp_exp *= 10; 11729 hexfp_exp += *h - '0'; 11730 #ifdef NV_MIN_EXP 11731 if (negexp 11732 && -hexfp_exp < NV_MIN_EXP - 1) { 11733 /* NOTE: this means that the exponent 11734 * underflow warning happens for 11735 * the IEEE 754 subnormals (denormals), 11736 * because DBL_MIN_EXP etc are the lowest 11737 * possible binary (or, rather, DBL_RADIX-base) 11738 * exponent for normals, not subnormals. 11739 * 11740 * This may or may not be a good thing. */ 11741 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 11742 "Hexadecimal float: exponent underflow"); 11743 break; 11744 } 11745 #endif 11746 #ifdef NV_MAX_EXP 11747 if (!negexp 11748 && hexfp_exp > NV_MAX_EXP - 1) { 11749 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 11750 "Hexadecimal float: exponent overflow"); 11751 break; 11752 } 11753 #endif 11754 } 11755 h++; 11756 } 11757 if (negexp) 11758 hexfp_exp = -hexfp_exp; 11759 #ifdef HEXFP_UQUAD 11760 hexfp_exp -= hexfp_frac_bits; 11761 #endif 11762 hexfp_mult = Perl_pow(2.0, hexfp_exp); 11763 hexfp = TRUE; 11764 goto decimal; 11765 } 11766 } 11767 } 11768 11769 if (shift != 3 && !has_digs) { 11770 /* 0x or 0b with no digits, treat it as an error. 11771 Originally this backed up the parse before the b or 11772 x, but that has the potential for silent changes in 11773 behaviour, like for: "0x.3" and "0x+$foo". 11774 */ 11775 const char *d = s; 11776 char *oldbp = PL_bufptr; 11777 if (*d) ++d; /* so the user sees the bad non-digit */ 11778 PL_bufptr = (char *)d; /* so yyerror reports the context */ 11779 yyerror(Perl_form(aTHX_ "No digits found for %s literal", 11780 shift == 4 ? "hexadecimal" : "binary")); 11781 PL_bufptr = oldbp; 11782 } 11783 11784 if (overflowed) { 11785 if (n > 4294967295.0) 11786 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 11787 "%s number > %s non-portable", 11788 Base, max); 11789 sv = newSVnv(n); 11790 } 11791 else { 11792 #if UVSIZE > 4 11793 if (u > 0xffffffff) 11794 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 11795 "%s number > %s non-portable", 11796 Base, max); 11797 #endif 11798 sv = newSVuv(u); 11799 } 11800 if (just_zero && (PL_hints & HINT_NEW_INTEGER)) 11801 sv = new_constant(start, s - start, "integer", 11802 sv, NULL, NULL, 0, NULL); 11803 else if (PL_hints & HINT_NEW_BINARY) 11804 sv = new_constant(start, s - start, "binary", 11805 sv, NULL, NULL, 0, NULL); 11806 } 11807 break; 11808 11809 /* 11810 handle decimal numbers. 11811 we're also sent here when we read a 0 as the first digit 11812 */ 11813 case '1': case '2': case '3': case '4': case '5': 11814 case '6': case '7': case '8': case '9': case '.': 11815 decimal: 11816 d = PL_tokenbuf; 11817 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */ 11818 floatit = FALSE; 11819 if (hexfp) { 11820 floatit = TRUE; 11821 *d++ = '0'; 11822 switch (shift) { 11823 case 4: 11824 *d++ = 'x'; 11825 s = start + 2; 11826 break; 11827 case 3: 11828 s = start + 1; 11829 break; 11830 case 1: 11831 *d++ = 'b'; 11832 s = start + 2; 11833 break; 11834 default: 11835 NOT_REACHED; /* NOTREACHED */ 11836 } 11837 } 11838 11839 /* read next group of digits and _ and copy into d */ 11840 while (isDIGIT(*s) 11841 || *s == '_' 11842 || UNLIKELY(hexfp && isXDIGIT(*s))) 11843 { 11844 /* skip underscores, checking for misplaced ones 11845 if -w is on 11846 */ 11847 if (*s == '_') { 11848 if (lastub && s == lastub + 1) 11849 WARN_ABOUT_UNDERSCORE(); 11850 lastub = s++; 11851 } 11852 else { 11853 /* check for end of fixed-length buffer */ 11854 if (d >= e) 11855 Perl_croak(aTHX_ "%s", number_too_long); 11856 /* if we're ok, copy the character */ 11857 *d++ = *s++; 11858 } 11859 } 11860 11861 /* final misplaced underbar check */ 11862 if (lastub && s == lastub + 1) 11863 WARN_ABOUT_UNDERSCORE(); 11864 11865 /* read a decimal portion if there is one. avoid 11866 3..5 being interpreted as the number 3. followed 11867 by .5 11868 */ 11869 if (*s == '.' && s[1] != '.') { 11870 floatit = TRUE; 11871 *d++ = *s++; 11872 11873 if (*s == '_') { 11874 WARN_ABOUT_UNDERSCORE(); 11875 lastub = s; 11876 } 11877 11878 /* copy, ignoring underbars, until we run out of digits. 11879 */ 11880 for (; isDIGIT(*s) 11881 || *s == '_' 11882 || UNLIKELY(hexfp && isXDIGIT(*s)); 11883 s++) 11884 { 11885 /* fixed length buffer check */ 11886 if (d >= e) 11887 Perl_croak(aTHX_ "%s", number_too_long); 11888 if (*s == '_') { 11889 if (lastub && s == lastub + 1) 11890 WARN_ABOUT_UNDERSCORE(); 11891 lastub = s; 11892 } 11893 else 11894 *d++ = *s; 11895 } 11896 /* fractional part ending in underbar? */ 11897 if (s[-1] == '_') 11898 WARN_ABOUT_UNDERSCORE(); 11899 if (*s == '.' && isDIGIT(s[1])) { 11900 /* oops, it's really a v-string, but without the "v" */ 11901 s = start; 11902 goto vstring; 11903 } 11904 } 11905 11906 /* read exponent part, if present */ 11907 if ((isALPHA_FOLD_EQ(*s, 'e') 11908 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p'))) 11909 && memCHRs("+-0123456789_", s[1])) 11910 { 11911 int exp_digits = 0; 11912 const char *save_s = s; 11913 char * save_d = d; 11914 11915 /* regardless of whether user said 3E5 or 3e5, use lower 'e', 11916 ditto for p (hexfloats) */ 11917 if ((isALPHA_FOLD_EQ(*s, 'e'))) { 11918 /* At least some Mach atof()s don't grok 'E' */ 11919 *d++ = 'e'; 11920 } 11921 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) { 11922 *d++ = 'p'; 11923 } 11924 11925 s++; 11926 11927 11928 /* stray preinitial _ */ 11929 if (*s == '_') { 11930 WARN_ABOUT_UNDERSCORE(); 11931 lastub = s++; 11932 } 11933 11934 /* allow positive or negative exponent */ 11935 if (*s == '+' || *s == '-') 11936 *d++ = *s++; 11937 11938 /* stray initial _ */ 11939 if (*s == '_') { 11940 WARN_ABOUT_UNDERSCORE(); 11941 lastub = s++; 11942 } 11943 11944 /* read digits of exponent */ 11945 while (isDIGIT(*s) || *s == '_') { 11946 if (isDIGIT(*s)) { 11947 ++exp_digits; 11948 if (d >= e) 11949 Perl_croak(aTHX_ "%s", number_too_long); 11950 *d++ = *s++; 11951 } 11952 else { 11953 if (((lastub && s == lastub + 1) 11954 || (!isDIGIT(s[1]) && s[1] != '_'))) 11955 WARN_ABOUT_UNDERSCORE(); 11956 lastub = s++; 11957 } 11958 } 11959 11960 if (!exp_digits) { 11961 /* no exponent digits, the [eEpP] could be for something else, 11962 * though in practice we don't get here for p since that's preparsed 11963 * earlier, and results in only the 0xX being consumed, so behave similarly 11964 * for decimal floats and consume only the D.DD, leaving the [eE] to the 11965 * next token. 11966 */ 11967 s = save_s; 11968 d = save_d; 11969 } 11970 else { 11971 floatit = TRUE; 11972 } 11973 } 11974 11975 11976 /* 11977 We try to do an integer conversion first if no characters 11978 indicating "float" have been found. 11979 */ 11980 11981 if (!floatit) { 11982 UV uv; 11983 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv); 11984 11985 if (flags == IS_NUMBER_IN_UV) { 11986 if (uv <= IV_MAX) 11987 sv = newSViv(uv); /* Prefer IVs over UVs. */ 11988 else 11989 sv = newSVuv(uv); 11990 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) { 11991 if (uv <= (UV) IV_MIN) 11992 sv = newSViv(-(IV)uv); 11993 else 11994 floatit = TRUE; 11995 } else 11996 floatit = TRUE; 11997 } 11998 if (floatit) { 11999 /* terminate the string */ 12000 *d = '\0'; 12001 if (UNLIKELY(hexfp)) { 12002 # ifdef NV_MANT_DIG 12003 if (significant_bits > NV_MANT_DIG) 12004 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 12005 "Hexadecimal float: mantissa overflow"); 12006 # endif 12007 #ifdef HEXFP_UQUAD 12008 nv = hexfp_uquad * hexfp_mult; 12009 #else /* HEXFP_NV */ 12010 nv = hexfp_nv * hexfp_mult; 12011 #endif 12012 } else { 12013 nv = Atof(PL_tokenbuf); 12014 } 12015 sv = newSVnv(nv); 12016 } 12017 12018 if ( floatit 12019 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) { 12020 const char *const key = floatit ? "float" : "integer"; 12021 const STRLEN keylen = floatit ? 5 : 7; 12022 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf, 12023 key, keylen, sv, NULL, NULL, 0, NULL); 12024 } 12025 break; 12026 12027 /* if it starts with a v, it could be a v-string */ 12028 case 'v': 12029 vstring: 12030 sv = newSV(5); /* preallocate storage space */ 12031 ENTER_with_name("scan_vstring"); 12032 SAVEFREESV(sv); 12033 s = scan_vstring(s, PL_bufend, sv); 12034 SvREFCNT_inc_simple_void_NN(sv); 12035 LEAVE_with_name("scan_vstring"); 12036 break; 12037 } 12038 12039 /* make the op for the constant and return */ 12040 12041 if (sv) 12042 lvalp->opval = newSVOP(OP_CONST, 0, sv); 12043 else 12044 lvalp->opval = NULL; 12045 12046 return (char *)s; 12047 } 12048 12049 STATIC char * 12050 S_scan_formline(pTHX_ char *s) 12051 { 12052 SV * const stuff = newSVpvs(""); 12053 bool needargs = FALSE; 12054 bool eofmt = FALSE; 12055 12056 PERL_ARGS_ASSERT_SCAN_FORMLINE; 12057 12058 while (!needargs) { 12059 char *eol; 12060 if (*s == '.') { 12061 char *t = s+1; 12062 #ifdef PERL_STRICT_CR 12063 while (SPACE_OR_TAB(*t)) 12064 t++; 12065 #else 12066 while (SPACE_OR_TAB(*t) || *t == '\r') 12067 t++; 12068 #endif 12069 if (*t == '\n' || t == PL_bufend) { 12070 eofmt = TRUE; 12071 break; 12072 } 12073 } 12074 eol = (char *) memchr(s,'\n',PL_bufend-s); 12075 if (!eol++) 12076 eol = PL_bufend; 12077 if (*s != '#') { 12078 char *t; 12079 for (t = s; t < eol; t++) { 12080 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { 12081 needargs = FALSE; 12082 goto enough; /* ~~ must be first line in formline */ 12083 } 12084 if (*t == '@' || *t == '^') 12085 needargs = TRUE; 12086 } 12087 if (eol > s) { 12088 sv_catpvn(stuff, s, eol-s); 12089 #ifndef PERL_STRICT_CR 12090 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') { 12091 char *end = SvPVX(stuff) + SvCUR(stuff); 12092 end[-2] = '\n'; 12093 end[-1] = '\0'; 12094 SvCUR_set(stuff, SvCUR(stuff) - 1); 12095 } 12096 #endif 12097 } 12098 else 12099 break; 12100 } 12101 s = (char*)eol; 12102 if ((PL_rsfp || PL_parser->filtered) 12103 && PL_parser->form_lex_state == LEX_NORMAL) { 12104 bool got_some; 12105 PL_bufptr = PL_bufend; 12106 COPLINE_INC_WITH_HERELINES; 12107 got_some = lex_next_chunk(0); 12108 CopLINE_dec(PL_curcop); 12109 s = PL_bufptr; 12110 if (!got_some) 12111 break; 12112 } 12113 incline(s, PL_bufend); 12114 } 12115 enough: 12116 if (!SvCUR(stuff) || needargs) 12117 PL_lex_state = PL_parser->form_lex_state; 12118 if (SvCUR(stuff)) { 12119 PL_expect = XSTATE; 12120 if (needargs) { 12121 const char *s2 = s; 12122 while (isSPACE(*s2) && *s2 != '\n') 12123 s2++; 12124 if (*s2 == '{') { 12125 PL_expect = XTERMBLOCK; 12126 NEXTVAL_NEXTTOKE.ival = 0; 12127 force_next(DO); 12128 } 12129 NEXTVAL_NEXTTOKE.ival = 0; 12130 force_next(FORMLBRACK); 12131 } 12132 if (!IN_BYTES) { 12133 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff))) 12134 SvUTF8_on(stuff); 12135 } 12136 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff); 12137 force_next(THING); 12138 } 12139 else { 12140 SvREFCNT_dec(stuff); 12141 if (eofmt) 12142 PL_lex_formbrack = 0; 12143 } 12144 return s; 12145 } 12146 12147 I32 12148 Perl_start_subparse(pTHX_ I32 is_format, U32 flags) 12149 { 12150 const I32 oldsavestack_ix = PL_savestack_ix; 12151 CV* const outsidecv = PL_compcv; 12152 12153 SAVEI32(PL_subline); 12154 save_item(PL_subname); 12155 SAVESPTR(PL_compcv); 12156 12157 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV)); 12158 CvFLAGS(PL_compcv) |= flags; 12159 12160 PL_subline = CopLINE(PL_curcop); 12161 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB); 12162 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv)); 12163 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; 12164 if (outsidecv && CvPADLIST(outsidecv)) 12165 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id; 12166 12167 return oldsavestack_ix; 12168 } 12169 12170 12171 /* Do extra initialisation of a CV (typically one just created by 12172 * start_subparse()) if that CV is for a named sub 12173 */ 12174 12175 void 12176 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop) 12177 { 12178 PERL_ARGS_ASSERT_INIT_NAMED_CV; 12179 12180 if (nameop->op_type == OP_CONST) { 12181 const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv); 12182 if ( strEQ(name, "BEGIN") 12183 || strEQ(name, "END") 12184 || strEQ(name, "INIT") 12185 || strEQ(name, "CHECK") 12186 || strEQ(name, "UNITCHECK") 12187 ) 12188 CvSPECIAL_on(cv); 12189 } 12190 else 12191 /* State subs inside anonymous subs need to be 12192 clonable themselves. */ 12193 if ( CvANON(CvOUTSIDE(cv)) 12194 || CvCLONE(CvOUTSIDE(cv)) 12195 || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST( 12196 CvOUTSIDE(cv) 12197 ))[nameop->op_targ]) 12198 ) 12199 CvCLONE_on(cv); 12200 } 12201 12202 12203 static int 12204 S_yywarn(pTHX_ const char *const s, U32 flags) 12205 { 12206 PERL_ARGS_ASSERT_YYWARN; 12207 12208 PL_in_eval |= EVAL_WARNONLY; 12209 yyerror_pv(s, flags); 12210 return 0; 12211 } 12212 12213 void 12214 Perl_abort_execution(pTHX_ const char * const msg, const char * const name) 12215 { 12216 PERL_ARGS_ASSERT_ABORT_EXECUTION; 12217 12218 if (PL_minus_c) 12219 Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name); 12220 else { 12221 Perl_croak(aTHX_ 12222 "%sExecution of %s aborted due to compilation errors.\n", msg, name); 12223 } 12224 NOT_REACHED; /* NOTREACHED */ 12225 } 12226 12227 void 12228 Perl_yyquit(pTHX) 12229 { 12230 /* Called, after at least one error has been found, to abort the parse now, 12231 * instead of trying to forge ahead */ 12232 12233 yyerror_pvn(NULL, 0, 0); 12234 } 12235 12236 int 12237 Perl_yyerror(pTHX_ const char *const s) 12238 { 12239 PERL_ARGS_ASSERT_YYERROR; 12240 return yyerror_pvn(s, strlen(s), 0); 12241 } 12242 12243 int 12244 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags) 12245 { 12246 PERL_ARGS_ASSERT_YYERROR_PV; 12247 return yyerror_pvn(s, strlen(s), flags); 12248 } 12249 12250 int 12251 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) 12252 { 12253 const char *context = NULL; 12254 int contlen = -1; 12255 SV *msg; 12256 SV * const where_sv = newSVpvs_flags("", SVs_TEMP); 12257 int yychar = PL_parser->yychar; 12258 12259 /* Output error message 's' with length 'len'. 'flags' are SV flags that 12260 * apply. If the number of errors found is large enough, it abandons 12261 * parsing. If 's' is NULL, there is no message, and it abandons 12262 * processing unconditionally */ 12263 12264 if (s != NULL) { 12265 if (!yychar || (yychar == ';' && !PL_rsfp)) 12266 sv_catpvs(where_sv, "at EOF"); 12267 else if ( PL_oldoldbufptr 12268 && PL_bufptr > PL_oldoldbufptr 12269 && PL_bufptr - PL_oldoldbufptr < 200 12270 && PL_oldoldbufptr != PL_oldbufptr 12271 && PL_oldbufptr != PL_bufptr) 12272 { 12273 /* 12274 Only for NetWare: 12275 The code below is removed for NetWare because it 12276 abends/crashes on NetWare when the script has error such as 12277 not having the closing quotes like: 12278 if ($var eq "value) 12279 Checking of white spaces is anyway done in NetWare code. 12280 */ 12281 #ifndef NETWARE 12282 while (isSPACE(*PL_oldoldbufptr)) 12283 PL_oldoldbufptr++; 12284 #endif 12285 context = PL_oldoldbufptr; 12286 contlen = PL_bufptr - PL_oldoldbufptr; 12287 } 12288 else if ( PL_oldbufptr 12289 && PL_bufptr > PL_oldbufptr 12290 && PL_bufptr - PL_oldbufptr < 200 12291 && PL_oldbufptr != PL_bufptr) { 12292 /* 12293 Only for NetWare: 12294 The code below is removed for NetWare because it 12295 abends/crashes on NetWare when the script has error such as 12296 not having the closing quotes like: 12297 if ($var eq "value) 12298 Checking of white spaces is anyway done in NetWare code. 12299 */ 12300 #ifndef NETWARE 12301 while (isSPACE(*PL_oldbufptr)) 12302 PL_oldbufptr++; 12303 #endif 12304 context = PL_oldbufptr; 12305 contlen = PL_bufptr - PL_oldbufptr; 12306 } 12307 else if (yychar > 255) 12308 sv_catpvs(where_sv, "next token ???"); 12309 else if (yychar == YYEMPTY) { 12310 if (PL_lex_state == LEX_NORMAL) 12311 sv_catpvs(where_sv, "at end of line"); 12312 else if (PL_lex_inpat) 12313 sv_catpvs(where_sv, "within pattern"); 12314 else 12315 sv_catpvs(where_sv, "within string"); 12316 } 12317 else { 12318 sv_catpvs(where_sv, "next char "); 12319 if (yychar < 32) 12320 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); 12321 else if (isPRINT_LC(yychar)) { 12322 const char string = yychar; 12323 sv_catpvn(where_sv, &string, 1); 12324 } 12325 else 12326 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); 12327 } 12328 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP); 12329 Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ", 12330 OutCopFILE(PL_curcop), 12331 (IV)(PL_parser->preambling == NOLINE 12332 ? CopLINE(PL_curcop) 12333 : PL_parser->preambling)); 12334 if (context) 12335 Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n", 12336 UTF8fARG(UTF, contlen, context)); 12337 else 12338 Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv)); 12339 if ( PL_multi_start < PL_multi_end 12340 && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) 12341 { 12342 Perl_sv_catpvf(aTHX_ msg, 12343 " (Might be a runaway multi-line %c%c string starting on" 12344 " line %" IVdf ")\n", 12345 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start); 12346 PL_multi_end = 0; 12347 } 12348 if (PL_in_eval & EVAL_WARNONLY) { 12349 PL_in_eval &= ~EVAL_WARNONLY; 12350 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg)); 12351 } 12352 else { 12353 qerror(msg); 12354 } 12355 } 12356 if (s == NULL || PL_error_count >= 10) { 12357 const char * msg = ""; 12358 const char * const name = OutCopFILE(PL_curcop); 12359 12360 if (PL_in_eval) { 12361 SV * errsv = ERRSV; 12362 if (SvCUR(errsv)) { 12363 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv)); 12364 } 12365 } 12366 12367 if (s == NULL) { 12368 abort_execution(msg, name); 12369 } 12370 else { 12371 Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name); 12372 } 12373 } 12374 PL_in_my = 0; 12375 PL_in_my_stash = NULL; 12376 return 0; 12377 } 12378 12379 STATIC char* 12380 S_swallow_bom(pTHX_ U8 *s) 12381 { 12382 const STRLEN slen = SvCUR(PL_linestr); 12383 12384 PERL_ARGS_ASSERT_SWALLOW_BOM; 12385 12386 switch (s[0]) { 12387 case 0xFF: 12388 if (s[1] == 0xFE) { 12389 /* UTF-16 little-endian? (or UTF-32LE?) */ 12390 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ 12391 /* diag_listed_as: Unsupported script encoding %s */ 12392 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE"); 12393 #ifndef PERL_NO_UTF16_FILTER 12394 #ifdef DEBUGGING 12395 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n"); 12396 #endif 12397 s += 2; 12398 if (PL_bufend > (char*)s) { 12399 s = add_utf16_textfilter(s, TRUE); 12400 } 12401 #else 12402 /* diag_listed_as: Unsupported script encoding %s */ 12403 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 12404 #endif 12405 } 12406 break; 12407 case 0xFE: 12408 if (s[1] == 0xFF) { /* UTF-16 big-endian? */ 12409 #ifndef PERL_NO_UTF16_FILTER 12410 #ifdef DEBUGGING 12411 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); 12412 #endif 12413 s += 2; 12414 if (PL_bufend > (char *)s) { 12415 s = add_utf16_textfilter(s, FALSE); 12416 } 12417 #else 12418 /* diag_listed_as: Unsupported script encoding %s */ 12419 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 12420 #endif 12421 } 12422 break; 12423 case BOM_UTF8_FIRST_BYTE: { 12424 if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) { 12425 #ifdef DEBUGGING 12426 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); 12427 #endif 12428 s += sizeof(BOM_UTF8) - 1; /* UTF-8 */ 12429 } 12430 break; 12431 } 12432 case 0: 12433 if (slen > 3) { 12434 if (s[1] == 0) { 12435 if (s[2] == 0xFE && s[3] == 0xFF) { 12436 /* UTF-32 big-endian */ 12437 /* diag_listed_as: Unsupported script encoding %s */ 12438 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE"); 12439 } 12440 } 12441 else if (s[2] == 0 && s[3] != 0) { 12442 /* Leading bytes 12443 * 00 xx 00 xx 12444 * are a good indicator of UTF-16BE. */ 12445 #ifndef PERL_NO_UTF16_FILTER 12446 #ifdef DEBUGGING 12447 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); 12448 #endif 12449 s = add_utf16_textfilter(s, FALSE); 12450 #else 12451 /* diag_listed_as: Unsupported script encoding %s */ 12452 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 12453 #endif 12454 } 12455 } 12456 break; 12457 12458 default: 12459 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) { 12460 /* Leading bytes 12461 * xx 00 xx 00 12462 * are a good indicator of UTF-16LE. */ 12463 #ifndef PERL_NO_UTF16_FILTER 12464 #ifdef DEBUGGING 12465 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); 12466 #endif 12467 s = add_utf16_textfilter(s, TRUE); 12468 #else 12469 /* diag_listed_as: Unsupported script encoding %s */ 12470 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 12471 #endif 12472 } 12473 } 12474 return (char*)s; 12475 } 12476 12477 12478 #ifndef PERL_NO_UTF16_FILTER 12479 static I32 12480 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) 12481 { 12482 SV *const filter = FILTER_DATA(idx); 12483 /* We re-use this each time round, throwing the contents away before we 12484 return. */ 12485 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter)); 12486 SV *const utf8_buffer = filter; 12487 IV status = IoPAGE(filter); 12488 const bool reverse = cBOOL(IoLINES(filter)); 12489 I32 retval; 12490 12491 PERL_ARGS_ASSERT_UTF16_TEXTFILTER; 12492 12493 /* As we're automatically added, at the lowest level, and hence only called 12494 from this file, we can be sure that we're not called in block mode. Hence 12495 don't bother writing code to deal with block mode. */ 12496 if (maxlen) { 12497 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen); 12498 } 12499 if (status < 0) { 12500 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status); 12501 } 12502 DEBUG_P(PerlIO_printf(Perl_debug_log, 12503 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n", 12504 FPTR2DPTR(void *, S_utf16_textfilter), 12505 reverse ? 'l' : 'b', idx, maxlen, status, 12506 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 12507 12508 while (1) { 12509 STRLEN chars; 12510 STRLEN have; 12511 Size_t newlen; 12512 U8 *end; 12513 /* First, look in our buffer of existing UTF-8 data: */ 12514 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer)); 12515 12516 if (nl) { 12517 ++nl; 12518 } else if (status == 0) { 12519 /* EOF */ 12520 IoPAGE(filter) = 0; 12521 nl = SvEND(utf8_buffer); 12522 } 12523 if (nl) { 12524 STRLEN got = nl - SvPVX(utf8_buffer); 12525 /* Did we have anything to append? */ 12526 retval = got != 0; 12527 sv_catpvn(sv, SvPVX(utf8_buffer), got); 12528 /* Everything else in this code works just fine if SVp_POK isn't 12529 set. This, however, needs it, and we need it to work, else 12530 we loop infinitely because the buffer is never consumed. */ 12531 sv_chop(utf8_buffer, nl); 12532 break; 12533 } 12534 12535 /* OK, not a complete line there, so need to read some more UTF-16. 12536 Read an extra octect if the buffer currently has an odd number. */ 12537 while (1) { 12538 if (status <= 0) 12539 break; 12540 if (SvCUR(utf16_buffer) >= 2) { 12541 /* Location of the high octet of the last complete code point. 12542 Gosh, UTF-16 is a pain. All the benefits of variable length, 12543 *coupled* with all the benefits of partial reads and 12544 endianness. */ 12545 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer) 12546 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2)); 12547 12548 if (*last_hi < 0xd8 || *last_hi > 0xdb) { 12549 break; 12550 } 12551 12552 /* We have the first half of a surrogate. Read more. */ 12553 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi)); 12554 } 12555 12556 status = FILTER_READ(idx + 1, utf16_buffer, 12557 160 + (SvCUR(utf16_buffer) & 1)); 12558 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer))); 12559 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);}); 12560 if (status < 0) { 12561 /* Error */ 12562 IoPAGE(filter) = status; 12563 return status; 12564 } 12565 } 12566 12567 /* 'chars' isn't quite the right name, as code points above 0xFFFF 12568 * require 4 bytes per char */ 12569 chars = SvCUR(utf16_buffer) >> 1; 12570 have = SvCUR(utf8_buffer); 12571 12572 /* Assume the worst case size as noted by the functions: twice the 12573 * number of input bytes */ 12574 SvGROW(utf8_buffer, have + chars * 4 + 1); 12575 12576 if (reverse) { 12577 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), 12578 (U8*)SvPVX_const(utf8_buffer) + have, 12579 chars * 2, &newlen); 12580 } else { 12581 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer), 12582 (U8*)SvPVX_const(utf8_buffer) + have, 12583 chars * 2, &newlen); 12584 } 12585 SvCUR_set(utf8_buffer, have + newlen); 12586 *end = '\0'; 12587 12588 /* No need to keep this SV "well-formed" with a '\0' after the end, as 12589 it's private to us, and utf16_to_utf8{,reversed} take a 12590 (pointer,length) pair, rather than a NUL-terminated string. */ 12591 if(SvCUR(utf16_buffer) & 1) { 12592 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1]; 12593 SvCUR_set(utf16_buffer, 1); 12594 } else { 12595 SvCUR_set(utf16_buffer, 0); 12596 } 12597 } 12598 DEBUG_P(PerlIO_printf(Perl_debug_log, 12599 "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n", 12600 status, 12601 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 12602 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);}); 12603 return retval; 12604 } 12605 12606 static U8 * 12607 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed) 12608 { 12609 SV *filter = filter_add(S_utf16_textfilter, NULL); 12610 12611 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER; 12612 12613 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s)); 12614 SvPVCLEAR(filter); 12615 IoLINES(filter) = reversed; 12616 IoPAGE(filter) = 1; /* Not EOF */ 12617 12618 /* Sadly, we have to return a valid pointer, come what may, so we have to 12619 ignore any error return from this. */ 12620 SvCUR_set(PL_linestr, 0); 12621 if (FILTER_READ(0, PL_linestr, 0)) { 12622 SvUTF8_on(PL_linestr); 12623 } else { 12624 SvUTF8_on(PL_linestr); 12625 } 12626 PL_bufend = SvEND(PL_linestr); 12627 return (U8*)SvPVX(PL_linestr); 12628 } 12629 #endif 12630 12631 /* 12632 Returns a pointer to the next character after the parsed 12633 vstring, as well as updating the passed in sv. 12634 12635 Function must be called like 12636 12637 sv = sv_2mortal(newSV(5)); 12638 s = scan_vstring(s,e,sv); 12639 12640 where s and e are the start and end of the string. 12641 The sv should already be large enough to store the vstring 12642 passed in, for performance reasons. 12643 12644 This function may croak if fatal warnings are enabled in the 12645 calling scope, hence the sv_2mortal in the example (to prevent 12646 a leak). Make sure to do SvREFCNT_inc afterwards if you use 12647 sv_2mortal. 12648 12649 */ 12650 12651 char * 12652 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) 12653 { 12654 const char *pos = s; 12655 const char *start = s; 12656 12657 PERL_ARGS_ASSERT_SCAN_VSTRING; 12658 12659 if (*pos == 'v') pos++; /* get past 'v' */ 12660 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 12661 pos++; 12662 if ( *pos != '.') { 12663 /* this may not be a v-string if followed by => */ 12664 const char *next = pos; 12665 while (next < e && isSPACE(*next)) 12666 ++next; 12667 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) { 12668 /* return string not v-string */ 12669 sv_setpvn(sv,(char *)s,pos-s); 12670 return (char *)pos; 12671 } 12672 } 12673 12674 if (!isALPHA(*pos)) { 12675 U8 tmpbuf[UTF8_MAXBYTES+1]; 12676 12677 if (*s == 'v') 12678 s++; /* get past 'v' */ 12679 12680 SvPVCLEAR(sv); 12681 12682 for (;;) { 12683 /* this is atoi() that tolerates underscores */ 12684 U8 *tmpend; 12685 UV rev = 0; 12686 const char *end = pos; 12687 UV mult = 1; 12688 while (--end >= s) { 12689 if (*end != '_') { 12690 const UV orev = rev; 12691 rev += (*end - '0') * mult; 12692 mult *= 10; 12693 if (orev > rev) 12694 /* diag_listed_as: Integer overflow in %s number */ 12695 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 12696 "Integer overflow in decimal number"); 12697 } 12698 } 12699 12700 /* Append native character for the rev point */ 12701 tmpend = uvchr_to_utf8(tmpbuf, rev); 12702 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); 12703 if (!UVCHR_IS_INVARIANT(rev)) 12704 SvUTF8_on(sv); 12705 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1])) 12706 s = ++pos; 12707 else { 12708 s = pos; 12709 break; 12710 } 12711 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 12712 pos++; 12713 } 12714 SvPOK_on(sv); 12715 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start); 12716 SvRMAGICAL_on(sv); 12717 } 12718 return (char *)s; 12719 } 12720 12721 int 12722 Perl_keyword_plugin_standard(pTHX_ 12723 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) 12724 { 12725 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD; 12726 PERL_UNUSED_CONTEXT; 12727 PERL_UNUSED_ARG(keyword_ptr); 12728 PERL_UNUSED_ARG(keyword_len); 12729 PERL_UNUSED_ARG(op_ptr); 12730 return KEYWORD_PLUGIN_DECLINE; 12731 } 12732 12733 /* 12734 =for apidoc wrap_keyword_plugin 12735 12736 Puts a C function into the chain of keyword plugins. This is the 12737 preferred way to manipulate the L</PL_keyword_plugin> variable. 12738 C<new_plugin> is a pointer to the C function that is to be added to the 12739 keyword plugin chain, and C<old_plugin_p> points to the storage location 12740 where a pointer to the next function in the chain will be stored. The 12741 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable, 12742 while the value previously stored there is written to C<*old_plugin_p>. 12743 12744 L</PL_keyword_plugin> is global to an entire process, and a module wishing 12745 to hook keyword parsing may find itself invoked more than once per 12746 process, typically in different threads. To handle that situation, this 12747 function is idempotent. The location C<*old_plugin_p> must initially 12748 (once per process) contain a null pointer. A C variable of static 12749 duration (declared at file scope, typically also marked C<static> to give 12750 it internal linkage) will be implicitly initialised appropriately, if it 12751 does not have an explicit initialiser. This function will only actually 12752 modify the plugin chain if it finds C<*old_plugin_p> to be null. This 12753 function is also thread safe on the small scale. It uses appropriate 12754 locking to avoid race conditions in accessing L</PL_keyword_plugin>. 12755 12756 When this function is called, the function referenced by C<new_plugin> 12757 must be ready to be called, except for C<*old_plugin_p> being unfilled. 12758 In a threading situation, C<new_plugin> may be called immediately, even 12759 before this function has returned. C<*old_plugin_p> will always be 12760 appropriately set before C<new_plugin> is called. If C<new_plugin> 12761 decides not to do anything special with the identifier that it is given 12762 (which is the usual case for most calls to a keyword plugin), it must 12763 chain the plugin function referenced by C<*old_plugin_p>. 12764 12765 Taken all together, XS code to install a keyword plugin should typically 12766 look something like this: 12767 12768 static Perl_keyword_plugin_t next_keyword_plugin; 12769 static OP *my_keyword_plugin(pTHX_ 12770 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) 12771 { 12772 if (memEQs(keyword_ptr, keyword_len, 12773 "my_new_keyword")) { 12774 ... 12775 } else { 12776 return next_keyword_plugin(aTHX_ 12777 keyword_ptr, keyword_len, op_ptr); 12778 } 12779 } 12780 BOOT: 12781 wrap_keyword_plugin(my_keyword_plugin, 12782 &next_keyword_plugin); 12783 12784 Direct access to L</PL_keyword_plugin> should be avoided. 12785 12786 =cut 12787 */ 12788 12789 void 12790 Perl_wrap_keyword_plugin(pTHX_ 12791 Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p) 12792 { 12793 dVAR; 12794 12795 PERL_UNUSED_CONTEXT; 12796 PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN; 12797 if (*old_plugin_p) return; 12798 KEYWORD_PLUGIN_MUTEX_LOCK; 12799 if (!*old_plugin_p) { 12800 *old_plugin_p = PL_keyword_plugin; 12801 PL_keyword_plugin = new_plugin; 12802 } 12803 KEYWORD_PLUGIN_MUTEX_UNLOCK; 12804 } 12805 12806 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p) 12807 static void 12808 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof) 12809 { 12810 SAVEI32(PL_lex_brackets); 12811 if (PL_lex_brackets > 100) 12812 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 12813 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF; 12814 SAVEI32(PL_lex_allbrackets); 12815 PL_lex_allbrackets = 0; 12816 SAVEI8(PL_lex_fakeeof); 12817 PL_lex_fakeeof = (U8)fakeeof; 12818 if(yyparse(gramtype) && !PL_parser->error_count) 12819 qerror(Perl_mess(aTHX_ "Parse error")); 12820 } 12821 12822 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p) 12823 static OP * 12824 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof) 12825 { 12826 OP *o; 12827 ENTER; 12828 SAVEVPTR(PL_eval_root); 12829 PL_eval_root = NULL; 12830 parse_recdescent(gramtype, fakeeof); 12831 o = PL_eval_root; 12832 LEAVE; 12833 return o; 12834 } 12835 12836 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f) 12837 static OP * 12838 S_parse_expr(pTHX_ I32 fakeeof, U32 flags) 12839 { 12840 OP *exprop; 12841 if (flags & ~PARSE_OPTIONAL) 12842 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr"); 12843 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof); 12844 if (!exprop && !(flags & PARSE_OPTIONAL)) { 12845 if (!PL_parser->error_count) 12846 qerror(Perl_mess(aTHX_ "Parse error")); 12847 exprop = newOP(OP_NULL, 0); 12848 } 12849 return exprop; 12850 } 12851 12852 /* 12853 =for apidoc parse_arithexpr 12854 12855 Parse a Perl arithmetic expression. This may contain operators of precedence 12856 down to the bit shift operators. The expression must be followed (and thus 12857 terminated) either by a comparison or lower-precedence operator or by 12858 something that would normally terminate an expression such as semicolon. 12859 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 12860 otherwise it is mandatory. It is up to the caller to ensure that the 12861 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 12862 the source of the code to be parsed and the lexical context for the 12863 expression. 12864 12865 The op tree representing the expression is returned. If an optional 12866 expression is absent, a null pointer is returned, otherwise the pointer 12867 will be non-null. 12868 12869 If an error occurs in parsing or compilation, in most cases a valid op 12870 tree is returned anyway. The error is reflected in the parser state, 12871 normally resulting in a single exception at the top level of parsing 12872 which covers all the compilation errors that occurred. Some compilation 12873 errors, however, will throw an exception immediately. 12874 12875 =for apidoc Amnh||PARSE_OPTIONAL 12876 12877 =cut 12878 12879 */ 12880 12881 OP * 12882 Perl_parse_arithexpr(pTHX_ U32 flags) 12883 { 12884 return parse_expr(LEX_FAKEEOF_COMPARE, flags); 12885 } 12886 12887 /* 12888 =for apidoc parse_termexpr 12889 12890 Parse a Perl term expression. This may contain operators of precedence 12891 down to the assignment operators. The expression must be followed (and thus 12892 terminated) either by a comma or lower-precedence operator or by 12893 something that would normally terminate an expression such as semicolon. 12894 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 12895 otherwise it is mandatory. It is up to the caller to ensure that the 12896 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 12897 the source of the code to be parsed and the lexical context for the 12898 expression. 12899 12900 The op tree representing the expression is returned. If an optional 12901 expression is absent, a null pointer is returned, otherwise the pointer 12902 will be non-null. 12903 12904 If an error occurs in parsing or compilation, in most cases a valid op 12905 tree is returned anyway. The error is reflected in the parser state, 12906 normally resulting in a single exception at the top level of parsing 12907 which covers all the compilation errors that occurred. Some compilation 12908 errors, however, will throw an exception immediately. 12909 12910 =cut 12911 */ 12912 12913 OP * 12914 Perl_parse_termexpr(pTHX_ U32 flags) 12915 { 12916 return parse_expr(LEX_FAKEEOF_COMMA, flags); 12917 } 12918 12919 /* 12920 =for apidoc parse_listexpr 12921 12922 Parse a Perl list expression. This may contain operators of precedence 12923 down to the comma operator. The expression must be followed (and thus 12924 terminated) either by a low-precedence logic operator such as C<or> or by 12925 something that would normally terminate an expression such as semicolon. 12926 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 12927 otherwise it is mandatory. It is up to the caller to ensure that the 12928 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 12929 the source of the code to be parsed and the lexical context for the 12930 expression. 12931 12932 The op tree representing the expression is returned. If an optional 12933 expression is absent, a null pointer is returned, otherwise the pointer 12934 will be non-null. 12935 12936 If an error occurs in parsing or compilation, in most cases a valid op 12937 tree is returned anyway. The error is reflected in the parser state, 12938 normally resulting in a single exception at the top level of parsing 12939 which covers all the compilation errors that occurred. Some compilation 12940 errors, however, will throw an exception immediately. 12941 12942 =cut 12943 */ 12944 12945 OP * 12946 Perl_parse_listexpr(pTHX_ U32 flags) 12947 { 12948 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags); 12949 } 12950 12951 /* 12952 =for apidoc parse_fullexpr 12953 12954 Parse a single complete Perl expression. This allows the full 12955 expression grammar, including the lowest-precedence operators such 12956 as C<or>. The expression must be followed (and thus terminated) by a 12957 token that an expression would normally be terminated by: end-of-file, 12958 closing bracketing punctuation, semicolon, or one of the keywords that 12959 signals a postfix expression-statement modifier. If C<flags> has the 12960 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is 12961 mandatory. It is up to the caller to ensure that the dynamic parser 12962 state (L</PL_parser> et al) is correctly set to reflect the source of 12963 the code to be parsed and the lexical context for the expression. 12964 12965 The op tree representing the expression is returned. If an optional 12966 expression is absent, a null pointer is returned, otherwise the pointer 12967 will be non-null. 12968 12969 If an error occurs in parsing or compilation, in most cases a valid op 12970 tree is returned anyway. The error is reflected in the parser state, 12971 normally resulting in a single exception at the top level of parsing 12972 which covers all the compilation errors that occurred. Some compilation 12973 errors, however, will throw an exception immediately. 12974 12975 =cut 12976 */ 12977 12978 OP * 12979 Perl_parse_fullexpr(pTHX_ U32 flags) 12980 { 12981 return parse_expr(LEX_FAKEEOF_NONEXPR, flags); 12982 } 12983 12984 /* 12985 =for apidoc parse_block 12986 12987 Parse a single complete Perl code block. This consists of an opening 12988 brace, a sequence of statements, and a closing brace. The block 12989 constitutes a lexical scope, so C<my> variables and various compile-time 12990 effects can be contained within it. It is up to the caller to ensure 12991 that the dynamic parser state (L</PL_parser> et al) is correctly set to 12992 reflect the source of the code to be parsed and the lexical context for 12993 the statement. 12994 12995 The op tree representing the code block is returned. This is always a 12996 real op, never a null pointer. It will normally be a C<lineseq> list, 12997 including C<nextstate> or equivalent ops. No ops to construct any kind 12998 of runtime scope are included by virtue of it being a block. 12999 13000 If an error occurs in parsing or compilation, in most cases a valid op 13001 tree (most likely null) is returned anyway. The error is reflected in 13002 the parser state, normally resulting in a single exception at the top 13003 level of parsing which covers all the compilation errors that occurred. 13004 Some compilation errors, however, will throw an exception immediately. 13005 13006 The C<flags> parameter is reserved for future use, and must always 13007 be zero. 13008 13009 =cut 13010 */ 13011 13012 OP * 13013 Perl_parse_block(pTHX_ U32 flags) 13014 { 13015 if (flags) 13016 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block"); 13017 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER); 13018 } 13019 13020 /* 13021 =for apidoc parse_barestmt 13022 13023 Parse a single unadorned Perl statement. This may be a normal imperative 13024 statement or a declaration that has compile-time effect. It does not 13025 include any label or other affixture. It is up to the caller to ensure 13026 that the dynamic parser state (L</PL_parser> et al) is correctly set to 13027 reflect the source of the code to be parsed and the lexical context for 13028 the statement. 13029 13030 The op tree representing the statement is returned. This may be a 13031 null pointer if the statement is null, for example if it was actually 13032 a subroutine definition (which has compile-time side effects). If not 13033 null, it will be ops directly implementing the statement, suitable to 13034 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or 13035 equivalent op (except for those embedded in a scope contained entirely 13036 within the statement). 13037 13038 If an error occurs in parsing or compilation, in most cases a valid op 13039 tree (most likely null) is returned anyway. The error is reflected in 13040 the parser state, normally resulting in a single exception at the top 13041 level of parsing which covers all the compilation errors that occurred. 13042 Some compilation errors, however, will throw an exception immediately. 13043 13044 The C<flags> parameter is reserved for future use, and must always 13045 be zero. 13046 13047 =cut 13048 */ 13049 13050 OP * 13051 Perl_parse_barestmt(pTHX_ U32 flags) 13052 { 13053 if (flags) 13054 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt"); 13055 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER); 13056 } 13057 13058 /* 13059 =for apidoc parse_label 13060 13061 Parse a single label, possibly optional, of the type that may prefix a 13062 Perl statement. It is up to the caller to ensure that the dynamic parser 13063 state (L</PL_parser> et al) is correctly set to reflect the source of 13064 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the 13065 label is optional, otherwise it is mandatory. 13066 13067 The name of the label is returned in the form of a fresh scalar. If an 13068 optional label is absent, a null pointer is returned. 13069 13070 If an error occurs in parsing, which can only occur if the label is 13071 mandatory, a valid label is returned anyway. The error is reflected in 13072 the parser state, normally resulting in a single exception at the top 13073 level of parsing which covers all the compilation errors that occurred. 13074 13075 =cut 13076 */ 13077 13078 SV * 13079 Perl_parse_label(pTHX_ U32 flags) 13080 { 13081 if (flags & ~PARSE_OPTIONAL) 13082 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label"); 13083 if (PL_nexttoke) { 13084 PL_parser->yychar = yylex(); 13085 if (PL_parser->yychar == LABEL) { 13086 SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv; 13087 PL_parser->yychar = YYEMPTY; 13088 cSVOPx(pl_yylval.opval)->op_sv = NULL; 13089 op_free(pl_yylval.opval); 13090 return labelsv; 13091 } else { 13092 yyunlex(); 13093 goto no_label; 13094 } 13095 } else { 13096 char *s, *t; 13097 STRLEN wlen, bufptr_pos; 13098 lex_read_space(0); 13099 t = s = PL_bufptr; 13100 if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) 13101 goto no_label; 13102 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen); 13103 if (word_takes_any_delimiter(s, wlen)) 13104 goto no_label; 13105 bufptr_pos = s - SvPVX(PL_linestr); 13106 PL_bufptr = t; 13107 lex_read_space(LEX_KEEP_PREVIOUS); 13108 t = PL_bufptr; 13109 s = SvPVX(PL_linestr) + bufptr_pos; 13110 if (t[0] == ':' && t[1] != ':') { 13111 PL_oldoldbufptr = PL_oldbufptr; 13112 PL_oldbufptr = s; 13113 PL_bufptr = t+1; 13114 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0); 13115 } else { 13116 PL_bufptr = s; 13117 no_label: 13118 if (flags & PARSE_OPTIONAL) { 13119 return NULL; 13120 } else { 13121 qerror(Perl_mess(aTHX_ "Parse error")); 13122 return newSVpvs("x"); 13123 } 13124 } 13125 } 13126 } 13127 13128 /* 13129 =for apidoc parse_fullstmt 13130 13131 Parse a single complete Perl statement. This may be a normal imperative 13132 statement or a declaration that has compile-time effect, and may include 13133 optional labels. It is up to the caller to ensure that the dynamic 13134 parser state (L</PL_parser> et al) is correctly set to reflect the source 13135 of the code to be parsed and the lexical context for the statement. 13136 13137 The op tree representing the statement is returned. This may be a 13138 null pointer if the statement is null, for example if it was actually 13139 a subroutine definition (which has compile-time side effects). If not 13140 null, it will be the result of a L</newSTATEOP> call, normally including 13141 a C<nextstate> or equivalent op. 13142 13143 If an error occurs in parsing or compilation, in most cases a valid op 13144 tree (most likely null) is returned anyway. The error is reflected in 13145 the parser state, normally resulting in a single exception at the top 13146 level of parsing which covers all the compilation errors that occurred. 13147 Some compilation errors, however, will throw an exception immediately. 13148 13149 The C<flags> parameter is reserved for future use, and must always 13150 be zero. 13151 13152 =cut 13153 */ 13154 13155 OP * 13156 Perl_parse_fullstmt(pTHX_ U32 flags) 13157 { 13158 if (flags) 13159 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt"); 13160 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER); 13161 } 13162 13163 /* 13164 =for apidoc parse_stmtseq 13165 13166 Parse a sequence of zero or more Perl statements. These may be normal 13167 imperative statements, including optional labels, or declarations 13168 that have compile-time effect, or any mixture thereof. The statement 13169 sequence ends when a closing brace or end-of-file is encountered in a 13170 place where a new statement could have validly started. It is up to 13171 the caller to ensure that the dynamic parser state (L</PL_parser> et al) 13172 is correctly set to reflect the source of the code to be parsed and the 13173 lexical context for the statements. 13174 13175 The op tree representing the statement sequence is returned. This may 13176 be a null pointer if the statements were all null, for example if there 13177 were no statements or if there were only subroutine definitions (which 13178 have compile-time side effects). If not null, it will be a C<lineseq> 13179 list, normally including C<nextstate> or equivalent ops. 13180 13181 If an error occurs in parsing or compilation, in most cases a valid op 13182 tree is returned anyway. The error is reflected in the parser state, 13183 normally resulting in a single exception at the top level of parsing 13184 which covers all the compilation errors that occurred. Some compilation 13185 errors, however, will throw an exception immediately. 13186 13187 The C<flags> parameter is reserved for future use, and must always 13188 be zero. 13189 13190 =cut 13191 */ 13192 13193 OP * 13194 Perl_parse_stmtseq(pTHX_ U32 flags) 13195 { 13196 OP *stmtseqop; 13197 I32 c; 13198 if (flags) 13199 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq"); 13200 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING); 13201 c = lex_peek_unichar(0); 13202 if (c != -1 && c != /*{*/'}') 13203 qerror(Perl_mess(aTHX_ "Parse error")); 13204 return stmtseqop; 13205 } 13206 13207 /* 13208 =for apidoc parse_subsignature 13209 13210 Parse a subroutine signature declaration. This is the contents of the 13211 parentheses following a named or anonymous subroutine declaration when the 13212 C<signatures> feature is enabled. Note that this function neither expects 13213 nor consumes the opening and closing parentheses around the signature; it 13214 is the caller's job to handle these. 13215 13216 This function must only be called during parsing of a subroutine; after 13217 L</start_subparse> has been called. It might allocate lexical variables on 13218 the pad for the current subroutine. 13219 13220 The op tree to unpack the arguments from the stack at runtime is returned. 13221 This op tree should appear at the beginning of the compiled function. The 13222 caller may wish to use L</op_append_list> to build their function body 13223 after it, or splice it together with the body before calling L</newATTRSUB>. 13224 13225 The C<flags> parameter is reserved for future use, and must always 13226 be zero. 13227 13228 =cut 13229 */ 13230 13231 OP * 13232 Perl_parse_subsignature(pTHX_ U32 flags) 13233 { 13234 if (flags) 13235 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature"); 13236 return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR); 13237 } 13238 13239 /* 13240 * ex: set ts=8 sts=4 sw=4 et: 13241 */ 13242