1 /* toke.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * 'It all comes from here, the stench and the peril.' --Frodo 13 * 14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"] 15 */ 16 17 /* 18 * This file is the lexer for Perl. It's closely linked to the 19 * parser, perly.y. 20 * 21 * The main routine is yylex(), which returns the next token. 22 */ 23 24 /* 25 =head1 Lexer interface 26 This is the lower layer of the Perl parser, managing characters and tokens. 27 28 =for apidoc AmU|yy_parser *|PL_parser 29 30 Pointer to a structure encapsulating the state of the parsing operation 31 currently in progress. The pointer can be locally changed to perform 32 a nested parse without interfering with the state of an outer parse. 33 Individual members of C<PL_parser> have their own documentation. 34 35 =cut 36 */ 37 38 #include "EXTERN.h" 39 #define PERL_IN_TOKE_C 40 #include "perl.h" 41 #include "dquote_inline.h" 42 43 #define new_constant(a,b,c,d,e,f,g) \ 44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g) 45 46 #define pl_yylval (PL_parser->yylval) 47 48 /* XXX temporary backwards compatibility */ 49 #define PL_lex_brackets (PL_parser->lex_brackets) 50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets) 51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof) 52 #define PL_lex_brackstack (PL_parser->lex_brackstack) 53 #define PL_lex_casemods (PL_parser->lex_casemods) 54 #define PL_lex_casestack (PL_parser->lex_casestack) 55 #define PL_lex_defer (PL_parser->lex_defer) 56 #define PL_lex_dojoin (PL_parser->lex_dojoin) 57 #define PL_lex_formbrack (PL_parser->lex_formbrack) 58 #define PL_lex_inpat (PL_parser->lex_inpat) 59 #define PL_lex_inwhat (PL_parser->lex_inwhat) 60 #define PL_lex_op (PL_parser->lex_op) 61 #define PL_lex_repl (PL_parser->lex_repl) 62 #define PL_lex_starts (PL_parser->lex_starts) 63 #define PL_lex_stuff (PL_parser->lex_stuff) 64 #define PL_multi_start (PL_parser->multi_start) 65 #define PL_multi_open (PL_parser->multi_open) 66 #define PL_multi_close (PL_parser->multi_close) 67 #define PL_preambled (PL_parser->preambled) 68 #define PL_sublex_info (PL_parser->sublex_info) 69 #define PL_linestr (PL_parser->linestr) 70 #define PL_expect (PL_parser->expect) 71 #define PL_copline (PL_parser->copline) 72 #define PL_bufptr (PL_parser->bufptr) 73 #define PL_oldbufptr (PL_parser->oldbufptr) 74 #define PL_oldoldbufptr (PL_parser->oldoldbufptr) 75 #define PL_linestart (PL_parser->linestart) 76 #define PL_bufend (PL_parser->bufend) 77 #define PL_last_uni (PL_parser->last_uni) 78 #define PL_last_lop (PL_parser->last_lop) 79 #define PL_last_lop_op (PL_parser->last_lop_op) 80 #define PL_lex_state (PL_parser->lex_state) 81 #define PL_rsfp (PL_parser->rsfp) 82 #define PL_rsfp_filters (PL_parser->rsfp_filters) 83 #define PL_in_my (PL_parser->in_my) 84 #define PL_in_my_stash (PL_parser->in_my_stash) 85 #define PL_tokenbuf (PL_parser->tokenbuf) 86 #define PL_multi_end (PL_parser->multi_end) 87 #define PL_error_count (PL_parser->error_count) 88 89 # define PL_nexttoke (PL_parser->nexttoke) 90 # define PL_nexttype (PL_parser->nexttype) 91 # define PL_nextval (PL_parser->nextval) 92 93 static const char* const ident_too_long = "Identifier too long"; 94 95 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke] 96 97 #define XENUMMASK 0x3f 98 #define XFAKEEOF 0x40 99 #define XFAKEBRACK 0x80 100 101 #ifdef USE_UTF8_SCRIPTS 102 # define UTF cBOOL(!IN_BYTES) 103 #else 104 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8))) 105 #endif 106 107 /* The maximum number of characters preceding the unrecognized one to display */ 108 #define UNRECOGNIZED_PRECEDE_COUNT 10 109 110 /* In variables named $^X, these are the legal values for X. 111 * 1999-02-27 mjd-perl-patch@plover.com */ 112 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) 113 114 #define SPACE_OR_TAB(c) isBLANK_A(c) 115 116 #define HEXFP_PEEK(s) \ 117 (((s[0] == '.') && \ 118 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \ 119 isALPHA_FOLD_EQ(s[0], 'p')) 120 121 /* LEX_* are values for PL_lex_state, the state of the lexer. 122 * They are arranged oddly so that the guard on the switch statement 123 * can get by with a single comparison (if the compiler is smart enough). 124 * 125 * These values refer to the various states within a sublex parse, 126 * i.e. within a double quotish string 127 */ 128 129 /* #define LEX_NOTPARSING 11 is done in perl.h. */ 130 131 #define LEX_NORMAL 10 /* normal code (ie not within "...") */ 132 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */ 133 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */ 134 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */ 135 #define LEX_INTERPSTART 6 /* expecting the start of a $var */ 136 137 /* at end of code, eg "$x" followed by: */ 138 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */ 139 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */ 140 141 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of 142 string or after \E, $foo, etc */ 143 #define LEX_INTERPCONST 2 /* NOT USED */ 144 #define LEX_FORMLINE 1 /* expecting a format line */ 145 #define LEX_KNOWNEXT 0 /* next token known; just return it */ 146 147 148 #ifdef DEBUGGING 149 static const char* const lex_state_names[] = { 150 "KNOWNEXT", 151 "FORMLINE", 152 "INTERPCONST", 153 "INTERPCONCAT", 154 "INTERPENDMAYBE", 155 "INTERPEND", 156 "INTERPSTART", 157 "INTERPPUSH", 158 "INTERPCASEMOD", 159 "INTERPNORMAL", 160 "NORMAL" 161 }; 162 #endif 163 164 #include "keywords.h" 165 166 /* CLINE is a macro that ensures PL_copline has a sane value */ 167 168 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline)) 169 170 /* 171 * Convenience functions to return different tokens and prime the 172 * lexer for the next token. They all take an argument. 173 * 174 * TOKEN : generic token (used for '(', DOLSHARP, etc) 175 * OPERATOR : generic operator 176 * AOPERATOR : assignment operator 177 * PREBLOCK : beginning the block after an if, while, foreach, ... 178 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref) 179 * PREREF : *EXPR where EXPR is not a simple identifier 180 * TERM : expression term 181 * POSTDEREF : postfix dereference (->$* ->@[...] etc.) 182 * LOOPX : loop exiting command (goto, last, dump, etc) 183 * FTST : file test operator 184 * FUN0 : zero-argument function 185 * FUN0OP : zero-argument function, with its op created in this file 186 * FUN1 : not used, except for not, which isn't a UNIOP 187 * BOop : bitwise or or xor 188 * BAop : bitwise and 189 * BCop : bitwise complement 190 * SHop : shift operator 191 * PWop : power operator 192 * PMop : pattern-matching operator 193 * Aop : addition-level operator 194 * AopNOASSIGN : addition-level operator that is never part of .= 195 * Mop : multiplication-level operator 196 * Eop : equality-testing operator 197 * Rop : relational operator <= != gt 198 * 199 * Also see LOP and lop() below. 200 */ 201 202 #ifdef DEBUGGING /* Serve -DT. */ 203 # define REPORT(retval) tokereport((I32)retval, &pl_yylval) 204 #else 205 # define REPORT(retval) (retval) 206 #endif 207 208 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval)) 209 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval)) 210 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval)) 211 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval)) 212 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval)) 213 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval)) 214 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval)) 215 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1])) 216 #define LOOPX(f) return (PL_bufptr = force_word(s,WORD,TRUE,FALSE), \ 217 pl_yylval.ival=f, \ 218 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \ 219 REPORT((int)LOOPEX)) 220 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP)) 221 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0)) 222 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP)) 223 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1)) 224 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP)) 225 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP)) 226 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \ 227 REPORT('~') 228 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP)) 229 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP)) 230 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP)) 231 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP)) 232 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP)) 233 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP)) 234 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP)) 235 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP)) 236 237 /* This bit of chicanery makes a unary function followed by 238 * a parenthesis into a function with one argument, highest precedence. 239 * The UNIDOR macro is for unary functions that can be followed by the // 240 * operator (such as C<shift // 0>). 241 */ 242 #define UNI3(f,x,have_x) { \ 243 pl_yylval.ival = f; \ 244 if (have_x) PL_expect = x; \ 245 PL_bufptr = s; \ 246 PL_last_uni = PL_oldbufptr; \ 247 PL_last_lop_op = f; \ 248 if (*s == '(') \ 249 return REPORT( (int)FUNC1 ); \ 250 s = skipspace(s); \ 251 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \ 252 } 253 #define UNI(f) UNI3(f,XTERM,1) 254 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1) 255 #define UNIPROTO(f,optional) { \ 256 if (optional) PL_last_uni = PL_oldbufptr; \ 257 OPERATOR(f); \ 258 } 259 260 #define UNIBRACK(f) UNI3(f,0,0) 261 262 /* grandfather return to old style */ 263 #define OLDLOP(f) \ 264 do { \ 265 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \ 266 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \ 267 pl_yylval.ival = (f); \ 268 PL_expect = XTERM; \ 269 PL_bufptr = s; \ 270 return (int)LSTOP; \ 271 } while(0) 272 273 #define COPLINE_INC_WITH_HERELINES \ 274 STMT_START { \ 275 CopLINE_inc(PL_curcop); \ 276 if (PL_parser->herelines) \ 277 CopLINE(PL_curcop) += PL_parser->herelines, \ 278 PL_parser->herelines = 0; \ 279 } STMT_END 280 /* Called after scan_str to update CopLINE(PL_curcop), but only when there 281 * is no sublex_push to follow. */ 282 #define COPLINE_SET_FROM_MULTI_END \ 283 STMT_START { \ 284 CopLINE_set(PL_curcop, PL_multi_end); \ 285 if (PL_multi_end != PL_multi_start) \ 286 PL_parser->herelines = 0; \ 287 } STMT_END 288 289 290 #ifdef DEBUGGING 291 292 /* how to interpret the pl_yylval associated with the token */ 293 enum token_type { 294 TOKENTYPE_NONE, 295 TOKENTYPE_IVAL, 296 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */ 297 TOKENTYPE_PVAL, 298 TOKENTYPE_OPVAL 299 }; 300 301 static struct debug_tokens { 302 const int token; 303 enum token_type type; 304 const char *name; 305 } const debug_tokens[] = 306 { 307 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" }, 308 { ANDAND, TOKENTYPE_NONE, "ANDAND" }, 309 { ANDOP, TOKENTYPE_NONE, "ANDOP" }, 310 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" }, 311 { ARROW, TOKENTYPE_NONE, "ARROW" }, 312 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" }, 313 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" }, 314 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" }, 315 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" }, 316 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" }, 317 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" }, 318 { DO, TOKENTYPE_NONE, "DO" }, 319 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" }, 320 { DORDOR, TOKENTYPE_NONE, "DORDOR" }, 321 { DOROP, TOKENTYPE_OPNUM, "DOROP" }, 322 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" }, 323 { ELSE, TOKENTYPE_NONE, "ELSE" }, 324 { ELSIF, TOKENTYPE_IVAL, "ELSIF" }, 325 { EQOP, TOKENTYPE_OPNUM, "EQOP" }, 326 { FOR, TOKENTYPE_IVAL, "FOR" }, 327 { FORMAT, TOKENTYPE_NONE, "FORMAT" }, 328 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" }, 329 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" }, 330 { FUNC, TOKENTYPE_OPNUM, "FUNC" }, 331 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" }, 332 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" }, 333 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" }, 334 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" }, 335 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" }, 336 { GIVEN, TOKENTYPE_IVAL, "GIVEN" }, 337 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" }, 338 { IF, TOKENTYPE_IVAL, "IF" }, 339 { LABEL, TOKENTYPE_PVAL, "LABEL" }, 340 { LOCAL, TOKENTYPE_IVAL, "LOCAL" }, 341 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" }, 342 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" }, 343 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" }, 344 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" }, 345 { METHOD, TOKENTYPE_OPVAL, "METHOD" }, 346 { MULOP, TOKENTYPE_OPNUM, "MULOP" }, 347 { MY, TOKENTYPE_IVAL, "MY" }, 348 { NOAMP, TOKENTYPE_NONE, "NOAMP" }, 349 { NOTOP, TOKENTYPE_NONE, "NOTOP" }, 350 { OROP, TOKENTYPE_IVAL, "OROP" }, 351 { OROR, TOKENTYPE_NONE, "OROR" }, 352 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" }, 353 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" }, 354 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" }, 355 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" }, 356 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" }, 357 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" }, 358 { POSTINC, TOKENTYPE_NONE, "POSTINC" }, 359 { POWOP, TOKENTYPE_OPNUM, "POWOP" }, 360 { PREDEC, TOKENTYPE_NONE, "PREDEC" }, 361 { PREINC, TOKENTYPE_NONE, "PREINC" }, 362 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" }, 363 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" }, 364 { REFGEN, TOKENTYPE_NONE, "REFGEN" }, 365 { RELOP, TOKENTYPE_OPNUM, "RELOP" }, 366 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" }, 367 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" }, 368 { SUB, TOKENTYPE_NONE, "SUB" }, 369 { THING, TOKENTYPE_OPVAL, "THING" }, 370 { UMINUS, TOKENTYPE_NONE, "UMINUS" }, 371 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" }, 372 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" }, 373 { UNLESS, TOKENTYPE_IVAL, "UNLESS" }, 374 { UNTIL, TOKENTYPE_IVAL, "UNTIL" }, 375 { USE, TOKENTYPE_IVAL, "USE" }, 376 { WHEN, TOKENTYPE_IVAL, "WHEN" }, 377 { WHILE, TOKENTYPE_IVAL, "WHILE" }, 378 { WORD, TOKENTYPE_OPVAL, "WORD" }, 379 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" }, 380 { 0, TOKENTYPE_NONE, NULL } 381 }; 382 383 /* dump the returned token in rv, plus any optional arg in pl_yylval */ 384 385 STATIC int 386 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) 387 { 388 PERL_ARGS_ASSERT_TOKEREPORT; 389 390 if (DEBUG_T_TEST) { 391 const char *name = NULL; 392 enum token_type type = TOKENTYPE_NONE; 393 const struct debug_tokens *p; 394 SV* const report = newSVpvs("<== "); 395 396 for (p = debug_tokens; p->token; p++) { 397 if (p->token == (int)rv) { 398 name = p->name; 399 type = p->type; 400 break; 401 } 402 } 403 if (name) 404 Perl_sv_catpv(aTHX_ report, name); 405 else if (isGRAPH(rv)) 406 { 407 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); 408 if ((char)rv == 'p') 409 sv_catpvs(report, " (pending identifier)"); 410 } 411 else if (!rv) 412 sv_catpvs(report, "EOF"); 413 else 414 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv); 415 switch (type) { 416 case TOKENTYPE_NONE: 417 break; 418 case TOKENTYPE_IVAL: 419 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival); 420 break; 421 case TOKENTYPE_OPNUM: 422 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)", 423 PL_op_name[lvalp->ival]); 424 break; 425 case TOKENTYPE_PVAL: 426 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval); 427 break; 428 case TOKENTYPE_OPVAL: 429 if (lvalp->opval) { 430 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)", 431 PL_op_name[lvalp->opval->op_type]); 432 if (lvalp->opval->op_type == OP_CONST) { 433 Perl_sv_catpvf(aTHX_ report, " %s", 434 SvPEEK(cSVOPx_sv(lvalp->opval))); 435 } 436 437 } 438 else 439 sv_catpvs(report, "(opval=null)"); 440 break; 441 } 442 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report)); 443 }; 444 return (int)rv; 445 } 446 447 448 /* print the buffer with suitable escapes */ 449 450 STATIC void 451 S_printbuf(pTHX_ const char *const fmt, const char *const s) 452 { 453 SV* const tmp = newSVpvs(""); 454 455 PERL_ARGS_ASSERT_PRINTBUF; 456 457 GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */ 458 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60)); 459 GCC_DIAG_RESTORE; 460 SvREFCNT_dec(tmp); 461 } 462 463 #endif 464 465 static int 466 S_deprecate_commaless_var_list(pTHX) { 467 PL_expect = XTERM; 468 deprecate("comma-less variable list"); 469 return REPORT(','); /* grandfather non-comma-format format */ 470 } 471 472 /* 473 * S_ao 474 * 475 * This subroutine looks for an '=' next to the operator that has just been 476 * parsed and turns it into an ASSIGNOP if it finds one. 477 */ 478 479 STATIC int 480 S_ao(pTHX_ int toketype) 481 { 482 if (*PL_bufptr == '=') { 483 PL_bufptr++; 484 if (toketype == ANDAND) 485 pl_yylval.ival = OP_ANDASSIGN; 486 else if (toketype == OROR) 487 pl_yylval.ival = OP_ORASSIGN; 488 else if (toketype == DORDOR) 489 pl_yylval.ival = OP_DORASSIGN; 490 toketype = ASSIGNOP; 491 } 492 return REPORT(toketype); 493 } 494 495 /* 496 * S_no_op 497 * When Perl expects an operator and finds something else, no_op 498 * prints the warning. It always prints "<something> found where 499 * operator expected. It prints "Missing semicolon on previous line?" 500 * if the surprise occurs at the start of the line. "do you need to 501 * predeclare ..." is printed out for code like "sub bar; foo bar $x" 502 * where the compiler doesn't know if foo is a method call or a function. 503 * It prints "Missing operator before end of line" if there's nothing 504 * after the missing operator, or "... before <...>" if there is something 505 * after the missing operator. 506 * 507 * PL_bufptr is expected to point to the start of the thing that was found, 508 * and s after the next token or partial token. 509 */ 510 511 STATIC void 512 S_no_op(pTHX_ const char *const what, char *s) 513 { 514 char * const oldbp = PL_bufptr; 515 const bool is_first = (PL_oldbufptr == PL_linestart); 516 517 PERL_ARGS_ASSERT_NO_OP; 518 519 if (!s) 520 s = oldbp; 521 else 522 PL_bufptr = s; 523 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0); 524 if (ckWARN_d(WARN_SYNTAX)) { 525 if (is_first) 526 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 527 "\t(Missing semicolon on previous line?)\n"); 528 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) { 529 const char *t; 530 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':'); 531 t += UTF ? UTF8SKIP(t) : 1) 532 NOOP; 533 if (t < PL_bufptr && isSPACE(*t)) 534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 535 "\t(Do you need to predeclare %"UTF8f"?)\n", 536 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr)); 537 } 538 else { 539 assert(s >= oldbp); 540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 541 "\t(Missing operator before %"UTF8f"?)\n", 542 UTF8fARG(UTF, s - oldbp, oldbp)); 543 } 544 } 545 PL_bufptr = oldbp; 546 } 547 548 /* 549 * S_missingterm 550 * Complain about missing quote/regexp/heredoc terminator. 551 * If it's called with NULL then it cauterizes the line buffer. 552 * If we're in a delimited string and the delimiter is a control 553 * character, it's reformatted into a two-char sequence like ^C. 554 * This is fatal. 555 */ 556 557 STATIC void 558 S_missingterm(pTHX_ char *s) 559 { 560 char tmpbuf[3]; 561 char q; 562 if (s) { 563 char * const nl = strrchr(s,'\n'); 564 if (nl) 565 *nl = '\0'; 566 } 567 else if ((U8) PL_multi_close < 32) { 568 *tmpbuf = '^'; 569 tmpbuf[1] = (char)toCTRL(PL_multi_close); 570 tmpbuf[2] = '\0'; 571 s = tmpbuf; 572 } 573 else { 574 *tmpbuf = (char)PL_multi_close; 575 tmpbuf[1] = '\0'; 576 s = tmpbuf; 577 } 578 q = strchr(s,'"') ? '\'' : '"'; 579 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q); 580 } 581 582 #include "feature.h" 583 584 /* 585 * Check whether the named feature is enabled. 586 */ 587 bool 588 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen) 589 { 590 char he_name[8 + MAX_FEATURE_LEN] = "feature_"; 591 592 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED; 593 594 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM); 595 596 if (namelen > MAX_FEATURE_LEN) 597 return FALSE; 598 memcpy(&he_name[8], name, namelen); 599 600 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0, 601 REFCOUNTED_HE_EXISTS)); 602 } 603 604 /* 605 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and 606 * utf16-to-utf8-reversed. 607 */ 608 609 #ifdef PERL_CR_FILTER 610 static void 611 strip_return(SV *sv) 612 { 613 const char *s = SvPVX_const(sv); 614 const char * const e = s + SvCUR(sv); 615 616 PERL_ARGS_ASSERT_STRIP_RETURN; 617 618 /* outer loop optimized to do nothing if there are no CR-LFs */ 619 while (s < e) { 620 if (*s++ == '\r' && *s == '\n') { 621 /* hit a CR-LF, need to copy the rest */ 622 char *d = s - 1; 623 *d++ = *s++; 624 while (s < e) { 625 if (*s == '\r' && s[1] == '\n') 626 s++; 627 *d++ = *s++; 628 } 629 SvCUR(sv) -= s - d; 630 return; 631 } 632 } 633 } 634 635 STATIC I32 636 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) 637 { 638 const I32 count = FILTER_READ(idx+1, sv, maxlen); 639 if (count > 0 && !maxlen) 640 strip_return(sv); 641 return count; 642 } 643 #endif 644 645 /* 646 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags 647 648 Creates and initialises a new lexer/parser state object, supplying 649 a context in which to lex and parse from a new source of Perl code. 650 A pointer to the new state object is placed in L</PL_parser>. An entry 651 is made on the save stack so that upon unwinding the new state object 652 will be destroyed and the former value of L</PL_parser> will be restored. 653 Nothing else need be done to clean up the parsing context. 654 655 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if 656 non-null, provides a string (in SV form) containing code to be parsed. 657 A copy of the string is made, so subsequent modification of C<line> 658 does not affect parsing. C<rsfp>, if non-null, provides an input stream 659 from which code will be read to be parsed. If both are non-null, the 660 code in C<line> comes first and must consist of complete lines of input, 661 and C<rsfp> supplies the remainder of the source. 662 663 The C<flags> parameter is reserved for future use. Currently it is only 664 used by perl internally, so extensions should always pass zero. 665 666 =cut 667 */ 668 669 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it 670 can share filters with the current parser. 671 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the 672 caller, hence isn't owned by the parser, so shouldn't be closed on parser 673 destruction. This is used to handle the case of defaulting to reading the 674 script from the standard input because no filename was given on the command 675 line (without getting confused by situation where STDIN has been closed, so 676 the script handle is opened on fd 0) */ 677 678 void 679 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) 680 { 681 const char *s = NULL; 682 yy_parser *parser, *oparser; 683 if (flags && flags & ~LEX_START_FLAGS) 684 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start"); 685 686 /* create and initialise a parser */ 687 688 Newxz(parser, 1, yy_parser); 689 parser->old_parser = oparser = PL_parser; 690 PL_parser = parser; 691 692 parser->stack = NULL; 693 parser->ps = NULL; 694 parser->stack_size = 0; 695 696 /* on scope exit, free this parser and restore any outer one */ 697 SAVEPARSER(parser); 698 parser->saved_curcop = PL_curcop; 699 700 /* initialise lexer state */ 701 702 parser->nexttoke = 0; 703 parser->error_count = oparser ? oparser->error_count : 0; 704 parser->copline = parser->preambling = NOLINE; 705 parser->lex_state = LEX_NORMAL; 706 parser->expect = XSTATE; 707 parser->rsfp = rsfp; 708 parser->rsfp_filters = 709 !(flags & LEX_START_SAME_FILTER) || !oparser 710 ? NULL 711 : MUTABLE_AV(SvREFCNT_inc( 712 oparser->rsfp_filters 713 ? oparser->rsfp_filters 714 : (oparser->rsfp_filters = newAV()) 715 )); 716 717 Newx(parser->lex_brackstack, 120, char); 718 Newx(parser->lex_casestack, 12, char); 719 *parser->lex_casestack = '\0'; 720 Newxz(parser->lex_shared, 1, LEXSHARED); 721 722 if (line) { 723 STRLEN len; 724 s = SvPV_const(line, len); 725 parser->linestr = flags & LEX_START_COPIED 726 ? SvREFCNT_inc_simple_NN(line) 727 : newSVpvn_flags(s, len, SvUTF8(line)); 728 sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2); 729 } else { 730 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2); 731 } 732 parser->oldoldbufptr = 733 parser->oldbufptr = 734 parser->bufptr = 735 parser->linestart = SvPVX(parser->linestr); 736 parser->bufend = parser->bufptr + SvCUR(parser->linestr); 737 parser->last_lop = parser->last_uni = NULL; 738 739 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES 740 |LEX_DONT_CLOSE_RSFP)); 741 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES 742 |LEX_DONT_CLOSE_RSFP)); 743 744 parser->in_pod = parser->filtered = 0; 745 } 746 747 748 /* delete a parser object */ 749 750 void 751 Perl_parser_free(pTHX_ const yy_parser *parser) 752 { 753 PERL_ARGS_ASSERT_PARSER_FREE; 754 755 PL_curcop = parser->saved_curcop; 756 SvREFCNT_dec(parser->linestr); 757 758 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 759 PerlIO_clearerr(parser->rsfp); 760 else if (parser->rsfp && (!parser->old_parser 761 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp))) 762 PerlIO_close(parser->rsfp); 763 SvREFCNT_dec(parser->rsfp_filters); 764 SvREFCNT_dec(parser->lex_stuff); 765 SvREFCNT_dec(parser->sublex_info.repl); 766 767 Safefree(parser->lex_brackstack); 768 Safefree(parser->lex_casestack); 769 Safefree(parser->lex_shared); 770 PL_parser = parser->old_parser; 771 Safefree(parser); 772 } 773 774 void 775 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab) 776 { 777 I32 nexttoke = parser->nexttoke; 778 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS; 779 while (nexttoke--) { 780 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff) 781 && parser->nextval[nexttoke].opval 782 && parser->nextval[nexttoke].opval->op_slabbed 783 && OpSLAB(parser->nextval[nexttoke].opval) == slab) { 784 op_free(parser->nextval[nexttoke].opval); 785 parser->nextval[nexttoke].opval = NULL; 786 } 787 } 788 } 789 790 791 /* 792 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr 793 794 Buffer scalar containing the chunk currently under consideration of the 795 text currently being lexed. This is always a plain string scalar (for 796 which C<SvPOK> is true). It is not intended to be used as a scalar by 797 normal scalar means; instead refer to the buffer directly by the pointer 798 variables described below. 799 800 The lexer maintains various C<char*> pointers to things in the 801 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever 802 reallocated, all of these pointers must be updated. Don't attempt to 803 do this manually, but rather use L</lex_grow_linestr> if you need to 804 reallocate the buffer. 805 806 The content of the text chunk in the buffer is commonly exactly one 807 complete line of input, up to and including a newline terminator, 808 but there are situations where it is otherwise. The octets of the 809 buffer may be intended to be interpreted as either UTF-8 or Latin-1. 810 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8> 811 flag on this scalar, which may disagree with it. 812 813 For direct examination of the buffer, the variable 814 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current 815 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use 816 of these pointers is usually preferable to examination of the scalar 817 through normal scalar means. 818 819 =for apidoc AmxU|char *|PL_parser-E<gt>bufend 820 821 Direct pointer to the end of the chunk of text currently being lexed, the 822 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr) 823 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is 824 always located at the end of the buffer, and does not count as part of 825 the buffer's contents. 826 827 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr 828 829 Points to the current position of lexing inside the lexer buffer. 830 Characters around this point may be freely examined, within 831 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and 832 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be 833 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>. 834 835 Lexing code (whether in the Perl core or not) moves this pointer past 836 the characters that it consumes. It is also expected to perform some 837 bookkeeping whenever a newline character is consumed. This movement 838 can be more conveniently performed by the function L</lex_read_to>, 839 which handles newlines appropriately. 840 841 Interpretation of the buffer's octets can be abstracted out by 842 using the slightly higher-level functions L</lex_peek_unichar> and 843 L</lex_read_unichar>. 844 845 =for apidoc AmxU|char *|PL_parser-E<gt>linestart 846 847 Points to the start of the current line inside the lexer buffer. 848 This is useful for indicating at which column an error occurred, and 849 not much else. This must be updated by any lexing code that consumes 850 a newline; the function L</lex_read_to> handles this detail. 851 852 =cut 853 */ 854 855 /* 856 =for apidoc Amx|bool|lex_bufutf8 857 858 Indicates whether the octets in the lexer buffer 859 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding 860 of Unicode characters. If not, they should be interpreted as Latin-1 861 characters. This is analogous to the C<SvUTF8> flag for scalars. 862 863 In UTF-8 mode, it is not guaranteed that the lexer buffer actually 864 contains valid UTF-8. Lexing code must be robust in the face of invalid 865 encoding. 866 867 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar 868 is significant, but not the whole story regarding the input character 869 encoding. Normally, when a file is being read, the scalar contains octets 870 and its C<SvUTF8> flag is off, but the octets should be interpreted as 871 UTF-8 if the C<use utf8> pragma is in effect. During a string eval, 872 however, the scalar may have the C<SvUTF8> flag on, and in this case its 873 octets should be interpreted as UTF-8 unless the C<use bytes> pragma 874 is in effect. This logic may change in the future; use this function 875 instead of implementing the logic yourself. 876 877 =cut 878 */ 879 880 bool 881 Perl_lex_bufutf8(pTHX) 882 { 883 return UTF; 884 } 885 886 /* 887 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len 888 889 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate 890 at least C<len> octets (including terminating C<NUL>). Returns a 891 pointer to the reallocated buffer. This is necessary before making 892 any direct modification of the buffer that would increase its length. 893 L</lex_stuff_pvn> provides a more convenient way to insert text into 894 the buffer. 895 896 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>; 897 this function updates all of the lexer's variables that point directly 898 into the buffer. 899 900 =cut 901 */ 902 903 char * 904 Perl_lex_grow_linestr(pTHX_ STRLEN len) 905 { 906 SV *linestr; 907 char *buf; 908 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 909 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos; 910 linestr = PL_parser->linestr; 911 buf = SvPVX(linestr); 912 if (len <= SvLEN(linestr)) 913 return buf; 914 bufend_pos = PL_parser->bufend - buf; 915 bufptr_pos = PL_parser->bufptr - buf; 916 oldbufptr_pos = PL_parser->oldbufptr - buf; 917 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 918 linestart_pos = PL_parser->linestart - buf; 919 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 920 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 921 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ? 922 PL_parser->lex_shared->re_eval_start - buf : 0; 923 924 buf = sv_grow(linestr, len); 925 926 PL_parser->bufend = buf + bufend_pos; 927 PL_parser->bufptr = buf + bufptr_pos; 928 PL_parser->oldbufptr = buf + oldbufptr_pos; 929 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 930 PL_parser->linestart = buf + linestart_pos; 931 if (PL_parser->last_uni) 932 PL_parser->last_uni = buf + last_uni_pos; 933 if (PL_parser->last_lop) 934 PL_parser->last_lop = buf + last_lop_pos; 935 if (PL_parser->lex_shared->re_eval_start) 936 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos; 937 return buf; 938 } 939 940 /* 941 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags 942 943 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 944 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 945 reallocating the buffer if necessary. This means that lexing code that 946 runs later will see the characters as if they had appeared in the input. 947 It is not recommended to do this as part of normal parsing, and most 948 uses of this facility run the risk of the inserted characters being 949 interpreted in an unintended manner. 950 951 The string to be inserted is represented by C<len> octets starting 952 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1, 953 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>. 954 The characters are recoded for the lexer buffer, according to how the 955 buffer is currently being interpreted (L</lex_bufutf8>). If a string 956 to be inserted is available as a Perl scalar, the L</lex_stuff_sv> 957 function is more convenient. 958 959 =cut 960 */ 961 962 void 963 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) 964 { 965 dVAR; 966 char *bufptr; 967 PERL_ARGS_ASSERT_LEX_STUFF_PVN; 968 if (flags & ~(LEX_STUFF_UTF8)) 969 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn"); 970 if (UTF) { 971 if (flags & LEX_STUFF_UTF8) { 972 goto plain_copy; 973 } else { 974 STRLEN highhalf = 0; /* Count of variants */ 975 const char *p, *e = pv+len; 976 for (p = pv; p != e; p++) { 977 if (! UTF8_IS_INVARIANT(*p)) { 978 highhalf++; 979 } 980 } 981 if (!highhalf) 982 goto plain_copy; 983 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf); 984 bufptr = PL_parser->bufptr; 985 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char); 986 SvCUR_set(PL_parser->linestr, 987 SvCUR(PL_parser->linestr) + len+highhalf); 988 PL_parser->bufend += len+highhalf; 989 for (p = pv; p != e; p++) { 990 U8 c = (U8)*p; 991 if (! UTF8_IS_INVARIANT(c)) { 992 *bufptr++ = UTF8_TWO_BYTE_HI(c); 993 *bufptr++ = UTF8_TWO_BYTE_LO(c); 994 } else { 995 *bufptr++ = (char)c; 996 } 997 } 998 } 999 } else { 1000 if (flags & LEX_STUFF_UTF8) { 1001 STRLEN highhalf = 0; 1002 const char *p, *e = pv+len; 1003 for (p = pv; p != e; p++) { 1004 U8 c = (U8)*p; 1005 if (UTF8_IS_ABOVE_LATIN1(c)) { 1006 Perl_croak(aTHX_ "Lexing code attempted to stuff " 1007 "non-Latin-1 character into Latin-1 input"); 1008 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) { 1009 p++; 1010 highhalf++; 1011 } else if (! UTF8_IS_INVARIANT(c)) { 1012 /* malformed UTF-8 */ 1013 ENTER; 1014 SAVESPTR(PL_warnhook); 1015 PL_warnhook = PERL_WARNHOOK_FATAL; 1016 utf8n_to_uvchr((U8*)p, e-p, NULL, 0); 1017 LEAVE; 1018 } 1019 } 1020 if (!highhalf) 1021 goto plain_copy; 1022 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf); 1023 bufptr = PL_parser->bufptr; 1024 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char); 1025 SvCUR_set(PL_parser->linestr, 1026 SvCUR(PL_parser->linestr) + len-highhalf); 1027 PL_parser->bufend += len-highhalf; 1028 p = pv; 1029 while (p < e) { 1030 if (UTF8_IS_INVARIANT(*p)) { 1031 *bufptr++ = *p; 1032 p++; 1033 } 1034 else { 1035 assert(p < e -1 ); 1036 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1)); 1037 p += 2; 1038 } 1039 } 1040 } else { 1041 plain_copy: 1042 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len); 1043 bufptr = PL_parser->bufptr; 1044 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char); 1045 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len); 1046 PL_parser->bufend += len; 1047 Copy(pv, bufptr, len, char); 1048 } 1049 } 1050 } 1051 1052 /* 1053 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags 1054 1055 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1056 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1057 reallocating the buffer if necessary. This means that lexing code that 1058 runs later will see the characters as if they had appeared in the input. 1059 It is not recommended to do this as part of normal parsing, and most 1060 uses of this facility run the risk of the inserted characters being 1061 interpreted in an unintended manner. 1062 1063 The string to be inserted is represented by octets starting at C<pv> 1064 and continuing to the first nul. These octets are interpreted as either 1065 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set 1066 in C<flags>. The characters are recoded for the lexer buffer, according 1067 to how the buffer is currently being interpreted (L</lex_bufutf8>). 1068 If it is not convenient to nul-terminate a string to be inserted, the 1069 L</lex_stuff_pvn> function is more appropriate. 1070 1071 =cut 1072 */ 1073 1074 void 1075 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags) 1076 { 1077 PERL_ARGS_ASSERT_LEX_STUFF_PV; 1078 lex_stuff_pvn(pv, strlen(pv), flags); 1079 } 1080 1081 /* 1082 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags 1083 1084 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1085 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1086 reallocating the buffer if necessary. This means that lexing code that 1087 runs later will see the characters as if they had appeared in the input. 1088 It is not recommended to do this as part of normal parsing, and most 1089 uses of this facility run the risk of the inserted characters being 1090 interpreted in an unintended manner. 1091 1092 The string to be inserted is the string value of C<sv>. The characters 1093 are recoded for the lexer buffer, according to how the buffer is currently 1094 being interpreted (L</lex_bufutf8>). If a string to be inserted is 1095 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the 1096 need to construct a scalar. 1097 1098 =cut 1099 */ 1100 1101 void 1102 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags) 1103 { 1104 char *pv; 1105 STRLEN len; 1106 PERL_ARGS_ASSERT_LEX_STUFF_SV; 1107 if (flags) 1108 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv"); 1109 pv = SvPV(sv, len); 1110 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0)); 1111 } 1112 1113 /* 1114 =for apidoc Amx|void|lex_unstuff|char *ptr 1115 1116 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to 1117 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened. 1118 This hides the discarded text from any lexing code that runs later, 1119 as if the text had never appeared. 1120 1121 This is not the normal way to consume lexed text. For that, use 1122 L</lex_read_to>. 1123 1124 =cut 1125 */ 1126 1127 void 1128 Perl_lex_unstuff(pTHX_ char *ptr) 1129 { 1130 char *buf, *bufend; 1131 STRLEN unstuff_len; 1132 PERL_ARGS_ASSERT_LEX_UNSTUFF; 1133 buf = PL_parser->bufptr; 1134 if (ptr < buf) 1135 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1136 if (ptr == buf) 1137 return; 1138 bufend = PL_parser->bufend; 1139 if (ptr > bufend) 1140 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1141 unstuff_len = ptr - buf; 1142 Move(ptr, buf, bufend+1-ptr, char); 1143 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len); 1144 PL_parser->bufend = bufend - unstuff_len; 1145 } 1146 1147 /* 1148 =for apidoc Amx|void|lex_read_to|char *ptr 1149 1150 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up 1151 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>, 1152 performing the correct bookkeeping whenever a newline character is passed. 1153 This is the normal way to consume lexed text. 1154 1155 Interpretation of the buffer's octets can be abstracted out by 1156 using the slightly higher-level functions L</lex_peek_unichar> and 1157 L</lex_read_unichar>. 1158 1159 =cut 1160 */ 1161 1162 void 1163 Perl_lex_read_to(pTHX_ char *ptr) 1164 { 1165 char *s; 1166 PERL_ARGS_ASSERT_LEX_READ_TO; 1167 s = PL_parser->bufptr; 1168 if (ptr < s || ptr > PL_parser->bufend) 1169 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to"); 1170 for (; s != ptr; s++) 1171 if (*s == '\n') { 1172 COPLINE_INC_WITH_HERELINES; 1173 PL_parser->linestart = s+1; 1174 } 1175 PL_parser->bufptr = ptr; 1176 } 1177 1178 /* 1179 =for apidoc Amx|void|lex_discard_to|char *ptr 1180 1181 Discards the first part of the L</PL_parser-E<gt>linestr> buffer, 1182 up to C<ptr>. The remaining content of the buffer will be moved, and 1183 all pointers into the buffer updated appropriately. C<ptr> must not 1184 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>: 1185 it is not permitted to discard text that has yet to be lexed. 1186 1187 Normally it is not necessarily to do this directly, because it suffices to 1188 use the implicit discarding behaviour of L</lex_next_chunk> and things 1189 based on it. However, if a token stretches across multiple lines, 1190 and the lexing code has kept multiple lines of text in the buffer for 1191 that purpose, then after completion of the token it would be wise to 1192 explicitly discard the now-unneeded earlier lines, to avoid future 1193 multi-line tokens growing the buffer without bound. 1194 1195 =cut 1196 */ 1197 1198 void 1199 Perl_lex_discard_to(pTHX_ char *ptr) 1200 { 1201 char *buf; 1202 STRLEN discard_len; 1203 PERL_ARGS_ASSERT_LEX_DISCARD_TO; 1204 buf = SvPVX(PL_parser->linestr); 1205 if (ptr < buf) 1206 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1207 if (ptr == buf) 1208 return; 1209 if (ptr > PL_parser->bufptr) 1210 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1211 discard_len = ptr - buf; 1212 if (PL_parser->oldbufptr < ptr) 1213 PL_parser->oldbufptr = ptr; 1214 if (PL_parser->oldoldbufptr < ptr) 1215 PL_parser->oldoldbufptr = ptr; 1216 if (PL_parser->last_uni && PL_parser->last_uni < ptr) 1217 PL_parser->last_uni = NULL; 1218 if (PL_parser->last_lop && PL_parser->last_lop < ptr) 1219 PL_parser->last_lop = NULL; 1220 Move(ptr, buf, PL_parser->bufend+1-ptr, char); 1221 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len); 1222 PL_parser->bufend -= discard_len; 1223 PL_parser->bufptr -= discard_len; 1224 PL_parser->oldbufptr -= discard_len; 1225 PL_parser->oldoldbufptr -= discard_len; 1226 if (PL_parser->last_uni) 1227 PL_parser->last_uni -= discard_len; 1228 if (PL_parser->last_lop) 1229 PL_parser->last_lop -= discard_len; 1230 } 1231 1232 /* 1233 =for apidoc Amx|bool|lex_next_chunk|U32 flags 1234 1235 Reads in the next chunk of text to be lexed, appending it to 1236 L</PL_parser-E<gt>linestr>. This should be called when lexing code has 1237 looked to the end of the current chunk and wants to know more. It is 1238 usual, but not necessary, for lexing to have consumed the entirety of 1239 the current chunk at this time. 1240 1241 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current 1242 chunk (i.e., the current chunk has been entirely consumed), normally the 1243 current chunk will be discarded at the same time that the new chunk is 1244 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk 1245 will not be discarded. If the current chunk has not been entirely 1246 consumed, then it will not be discarded regardless of the flag. 1247 1248 Returns true if some new text was added to the buffer, or false if the 1249 buffer has reached the end of the input text. 1250 1251 =cut 1252 */ 1253 1254 #define LEX_FAKE_EOF 0x80000000 1255 #define LEX_NO_TERM 0x40000000 /* here-doc */ 1256 1257 bool 1258 Perl_lex_next_chunk(pTHX_ U32 flags) 1259 { 1260 SV *linestr; 1261 char *buf; 1262 STRLEN old_bufend_pos, new_bufend_pos; 1263 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 1264 STRLEN linestart_pos, last_uni_pos, last_lop_pos; 1265 bool got_some_for_debugger = 0; 1266 bool got_some; 1267 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM)) 1268 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk"); 1269 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat) 1270 return FALSE; 1271 linestr = PL_parser->linestr; 1272 buf = SvPVX(linestr); 1273 if (!(flags & LEX_KEEP_PREVIOUS) 1274 && PL_parser->bufptr == PL_parser->bufend) 1275 { 1276 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0; 1277 linestart_pos = 0; 1278 if (PL_parser->last_uni != PL_parser->bufend) 1279 PL_parser->last_uni = NULL; 1280 if (PL_parser->last_lop != PL_parser->bufend) 1281 PL_parser->last_lop = NULL; 1282 last_uni_pos = last_lop_pos = 0; 1283 *buf = 0; 1284 SvCUR(linestr) = 0; 1285 } else { 1286 old_bufend_pos = PL_parser->bufend - buf; 1287 bufptr_pos = PL_parser->bufptr - buf; 1288 oldbufptr_pos = PL_parser->oldbufptr - buf; 1289 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 1290 linestart_pos = PL_parser->linestart - buf; 1291 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 1292 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 1293 } 1294 if (flags & LEX_FAKE_EOF) { 1295 goto eof; 1296 } else if (!PL_parser->rsfp && !PL_parser->filtered) { 1297 got_some = 0; 1298 } else if (filter_gets(linestr, old_bufend_pos)) { 1299 got_some = 1; 1300 got_some_for_debugger = 1; 1301 } else if (flags & LEX_NO_TERM) { 1302 got_some = 0; 1303 } else { 1304 if (!SvPOK(linestr)) /* can get undefined by filter_gets */ 1305 sv_setpvs(linestr, ""); 1306 eof: 1307 /* End of real input. Close filehandle (unless it was STDIN), 1308 * then add implicit termination. 1309 */ 1310 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 1311 PerlIO_clearerr(PL_parser->rsfp); 1312 else if (PL_parser->rsfp) 1313 (void)PerlIO_close(PL_parser->rsfp); 1314 PL_parser->rsfp = NULL; 1315 PL_parser->in_pod = PL_parser->filtered = 0; 1316 if (!PL_in_eval && PL_minus_p) { 1317 sv_catpvs(linestr, 1318 /*{*/";}continue{print or die qq(-p destination: $!\\n);}"); 1319 PL_minus_n = PL_minus_p = 0; 1320 } else if (!PL_in_eval && PL_minus_n) { 1321 sv_catpvs(linestr, /*{*/";}"); 1322 PL_minus_n = 0; 1323 } else 1324 sv_catpvs(linestr, ";"); 1325 got_some = 1; 1326 } 1327 buf = SvPVX(linestr); 1328 new_bufend_pos = SvCUR(linestr); 1329 PL_parser->bufend = buf + new_bufend_pos; 1330 PL_parser->bufptr = buf + bufptr_pos; 1331 PL_parser->oldbufptr = buf + oldbufptr_pos; 1332 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 1333 PL_parser->linestart = buf + linestart_pos; 1334 if (PL_parser->last_uni) 1335 PL_parser->last_uni = buf + last_uni_pos; 1336 if (PL_parser->last_lop) 1337 PL_parser->last_lop = buf + last_lop_pos; 1338 if (PL_parser->preambling != NOLINE) { 1339 CopLINE_set(PL_curcop, PL_parser->preambling + 1); 1340 PL_parser->preambling = NOLINE; 1341 } 1342 if ( got_some_for_debugger 1343 && PERLDB_LINE_OR_SAVESRC 1344 && PL_curstash != PL_debstash) 1345 { 1346 /* debugger active and we're not compiling the debugger code, 1347 * so store the line into the debugger's array of lines 1348 */ 1349 update_debugger_info(NULL, buf+old_bufend_pos, 1350 new_bufend_pos-old_bufend_pos); 1351 } 1352 return got_some; 1353 } 1354 1355 /* 1356 =for apidoc Amx|I32|lex_peek_unichar|U32 flags 1357 1358 Looks ahead one (Unicode) character in the text currently being lexed. 1359 Returns the codepoint (unsigned integer value) of the next character, 1360 or -1 if lexing has reached the end of the input text. To consume the 1361 peeked character, use L</lex_read_unichar>. 1362 1363 If the next character is in (or extends into) the next chunk of input 1364 text, the next chunk will be read in. Normally the current chunk will be 1365 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> 1366 bit set, then the current chunk will not be discarded. 1367 1368 If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1369 is encountered, an exception is generated. 1370 1371 =cut 1372 */ 1373 1374 I32 1375 Perl_lex_peek_unichar(pTHX_ U32 flags) 1376 { 1377 dVAR; 1378 char *s, *bufend; 1379 if (flags & ~(LEX_KEEP_PREVIOUS)) 1380 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar"); 1381 s = PL_parser->bufptr; 1382 bufend = PL_parser->bufend; 1383 if (UTF) { 1384 U8 head; 1385 I32 unichar; 1386 STRLEN len, retlen; 1387 if (s == bufend) { 1388 if (!lex_next_chunk(flags)) 1389 return -1; 1390 s = PL_parser->bufptr; 1391 bufend = PL_parser->bufend; 1392 } 1393 head = (U8)*s; 1394 if (UTF8_IS_INVARIANT(head)) 1395 return head; 1396 if (UTF8_IS_START(head)) { 1397 len = UTF8SKIP(&head); 1398 while ((STRLEN)(bufend-s) < len) { 1399 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS)) 1400 break; 1401 s = PL_parser->bufptr; 1402 bufend = PL_parser->bufend; 1403 } 1404 } 1405 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY); 1406 if (retlen == (STRLEN)-1) { 1407 /* malformed UTF-8 */ 1408 ENTER; 1409 SAVESPTR(PL_warnhook); 1410 PL_warnhook = PERL_WARNHOOK_FATAL; 1411 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0); 1412 LEAVE; 1413 } 1414 return unichar; 1415 } else { 1416 if (s == bufend) { 1417 if (!lex_next_chunk(flags)) 1418 return -1; 1419 s = PL_parser->bufptr; 1420 } 1421 return (U8)*s; 1422 } 1423 } 1424 1425 /* 1426 =for apidoc Amx|I32|lex_read_unichar|U32 flags 1427 1428 Reads the next (Unicode) character in the text currently being lexed. 1429 Returns the codepoint (unsigned integer value) of the character read, 1430 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1 1431 if lexing has reached the end of the input text. To non-destructively 1432 examine the next character, use L</lex_peek_unichar> instead. 1433 1434 If the next character is in (or extends into) the next chunk of input 1435 text, the next chunk will be read in. Normally the current chunk will be 1436 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> 1437 bit set, then the current chunk will not be discarded. 1438 1439 If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1440 is encountered, an exception is generated. 1441 1442 =cut 1443 */ 1444 1445 I32 1446 Perl_lex_read_unichar(pTHX_ U32 flags) 1447 { 1448 I32 c; 1449 if (flags & ~(LEX_KEEP_PREVIOUS)) 1450 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar"); 1451 c = lex_peek_unichar(flags); 1452 if (c != -1) { 1453 if (c == '\n') 1454 COPLINE_INC_WITH_HERELINES; 1455 if (UTF) 1456 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr); 1457 else 1458 ++(PL_parser->bufptr); 1459 } 1460 return c; 1461 } 1462 1463 /* 1464 =for apidoc Amx|void|lex_read_space|U32 flags 1465 1466 Reads optional spaces, in Perl style, in the text currently being 1467 lexed. The spaces may include ordinary whitespace characters and 1468 Perl-style comments. C<#line> directives are processed if encountered. 1469 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points 1470 at a non-space character (or the end of the input text). 1471 1472 If spaces extend into the next chunk of input text, the next chunk will 1473 be read in. Normally the current chunk will be discarded at the same 1474 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current 1475 chunk will not be discarded. 1476 1477 =cut 1478 */ 1479 1480 #define LEX_NO_INCLINE 0x40000000 1481 #define LEX_NO_NEXT_CHUNK 0x80000000 1482 1483 void 1484 Perl_lex_read_space(pTHX_ U32 flags) 1485 { 1486 char *s, *bufend; 1487 const bool can_incline = !(flags & LEX_NO_INCLINE); 1488 bool need_incline = 0; 1489 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE)) 1490 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space"); 1491 s = PL_parser->bufptr; 1492 bufend = PL_parser->bufend; 1493 while (1) { 1494 char c = *s; 1495 if (c == '#') { 1496 do { 1497 c = *++s; 1498 } while (!(c == '\n' || (c == 0 && s == bufend))); 1499 } else if (c == '\n') { 1500 s++; 1501 if (can_incline) { 1502 PL_parser->linestart = s; 1503 if (s == bufend) 1504 need_incline = 1; 1505 else 1506 incline(s); 1507 } 1508 } else if (isSPACE(c)) { 1509 s++; 1510 } else if (c == 0 && s == bufend) { 1511 bool got_more; 1512 line_t l; 1513 if (flags & LEX_NO_NEXT_CHUNK) 1514 break; 1515 PL_parser->bufptr = s; 1516 l = CopLINE(PL_curcop); 1517 CopLINE(PL_curcop) += PL_parser->herelines + 1; 1518 got_more = lex_next_chunk(flags); 1519 CopLINE_set(PL_curcop, l); 1520 s = PL_parser->bufptr; 1521 bufend = PL_parser->bufend; 1522 if (!got_more) 1523 break; 1524 if (can_incline && need_incline && PL_parser->rsfp) { 1525 incline(s); 1526 need_incline = 0; 1527 } 1528 } else if (!c) { 1529 s++; 1530 } else { 1531 break; 1532 } 1533 } 1534 PL_parser->bufptr = s; 1535 } 1536 1537 /* 1538 1539 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn 1540 1541 This function performs syntax checking on a prototype, C<proto>. 1542 If C<warn> is true, any illegal characters or mismatched brackets 1543 will trigger illegalproto warnings, declaring that they were 1544 detected in the prototype for C<name>. 1545 1546 The return value is C<true> if this is a valid prototype, and 1547 C<false> if it is not, regardless of whether C<warn> was C<true> or 1548 C<false>. 1549 1550 Note that C<NULL> is a valid C<proto> and will always return C<true>. 1551 1552 =cut 1553 1554 */ 1555 1556 bool 1557 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn) 1558 { 1559 STRLEN len, origlen; 1560 char *p = proto ? SvPV(proto, len) : NULL; 1561 bool bad_proto = FALSE; 1562 bool in_brackets = FALSE; 1563 bool after_slash = FALSE; 1564 char greedy_proto = ' '; 1565 bool proto_after_greedy_proto = FALSE; 1566 bool must_be_last = FALSE; 1567 bool underscore = FALSE; 1568 bool bad_proto_after_underscore = FALSE; 1569 1570 PERL_ARGS_ASSERT_VALIDATE_PROTO; 1571 1572 if (!proto) 1573 return TRUE; 1574 1575 origlen = len; 1576 for (; len--; p++) { 1577 if (!isSPACE(*p)) { 1578 if (must_be_last) 1579 proto_after_greedy_proto = TRUE; 1580 if (underscore) { 1581 if (!strchr(";@%", *p)) 1582 bad_proto_after_underscore = TRUE; 1583 underscore = FALSE; 1584 } 1585 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') { 1586 bad_proto = TRUE; 1587 } 1588 else { 1589 if (*p == '[') 1590 in_brackets = TRUE; 1591 else if (*p == ']') 1592 in_brackets = FALSE; 1593 else if ((*p == '@' || *p == '%') 1594 && !after_slash 1595 && !in_brackets ) 1596 { 1597 must_be_last = TRUE; 1598 greedy_proto = *p; 1599 } 1600 else if (*p == '_') 1601 underscore = TRUE; 1602 } 1603 if (*p == '\\') 1604 after_slash = TRUE; 1605 else 1606 after_slash = FALSE; 1607 } 1608 } 1609 1610 if (warn) { 1611 SV *tmpsv = newSVpvs_flags("", SVs_TEMP); 1612 p -= origlen; 1613 p = SvUTF8(proto) 1614 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8), 1615 origlen, UNI_DISPLAY_ISPRINT) 1616 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII); 1617 1618 if (proto_after_greedy_proto) 1619 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1620 "Prototype after '%c' for %"SVf" : %s", 1621 greedy_proto, SVfARG(name), p); 1622 if (in_brackets) 1623 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1624 "Missing ']' in prototype for %"SVf" : %s", 1625 SVfARG(name), p); 1626 if (bad_proto) 1627 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1628 "Illegal character in prototype for %"SVf" : %s", 1629 SVfARG(name), p); 1630 if (bad_proto_after_underscore) 1631 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1632 "Illegal character after '_' in prototype for %"SVf" : %s", 1633 SVfARG(name), p); 1634 } 1635 1636 return (! (proto_after_greedy_proto || bad_proto) ); 1637 } 1638 1639 /* 1640 * S_incline 1641 * This subroutine has nothing to do with tilting, whether at windmills 1642 * or pinball tables. Its name is short for "increment line". It 1643 * increments the current line number in CopLINE(PL_curcop) and checks 1644 * to see whether the line starts with a comment of the form 1645 * # line 500 "foo.pm" 1646 * If so, it sets the current line number and file to the values in the comment. 1647 */ 1648 1649 STATIC void 1650 S_incline(pTHX_ const char *s) 1651 { 1652 const char *t; 1653 const char *n; 1654 const char *e; 1655 line_t line_num; 1656 UV uv; 1657 1658 PERL_ARGS_ASSERT_INCLINE; 1659 1660 COPLINE_INC_WITH_HERELINES; 1661 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL 1662 && s+1 == PL_bufend && *s == ';') { 1663 /* fake newline in string eval */ 1664 CopLINE_dec(PL_curcop); 1665 return; 1666 } 1667 if (*s++ != '#') 1668 return; 1669 while (SPACE_OR_TAB(*s)) 1670 s++; 1671 if (strnEQ(s, "line", 4)) 1672 s += 4; 1673 else 1674 return; 1675 if (SPACE_OR_TAB(*s)) 1676 s++; 1677 else 1678 return; 1679 while (SPACE_OR_TAB(*s)) 1680 s++; 1681 if (!isDIGIT(*s)) 1682 return; 1683 1684 n = s; 1685 while (isDIGIT(*s)) 1686 s++; 1687 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0') 1688 return; 1689 while (SPACE_OR_TAB(*s)) 1690 s++; 1691 if (*s == '"' && (t = strchr(s+1, '"'))) { 1692 s++; 1693 e = t + 1; 1694 } 1695 else { 1696 t = s; 1697 while (*t && !isSPACE(*t)) 1698 t++; 1699 e = t; 1700 } 1701 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f') 1702 e++; 1703 if (*e != '\n' && *e != '\0') 1704 return; /* false alarm */ 1705 1706 if (!grok_atoUV(n, &uv, &e)) 1707 return; 1708 line_num = ((line_t)uv) - 1; 1709 1710 if (t - s > 0) { 1711 const STRLEN len = t - s; 1712 1713 if (!PL_rsfp && !PL_parser->filtered) { 1714 /* must copy *{"::_<(eval N)[oldfilename:L]"} 1715 * to *{"::_<newfilename"} */ 1716 /* However, the long form of evals is only turned on by the 1717 debugger - usually they're "(eval %lu)" */ 1718 GV * const cfgv = CopFILEGV(PL_curcop); 1719 if (cfgv) { 1720 char smallbuf[128]; 1721 STRLEN tmplen2 = len; 1722 char *tmpbuf2; 1723 GV *gv2; 1724 1725 if (tmplen2 + 2 <= sizeof smallbuf) 1726 tmpbuf2 = smallbuf; 1727 else 1728 Newx(tmpbuf2, tmplen2 + 2, char); 1729 1730 tmpbuf2[0] = '_'; 1731 tmpbuf2[1] = '<'; 1732 1733 memcpy(tmpbuf2 + 2, s, tmplen2); 1734 tmplen2 += 2; 1735 1736 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE); 1737 if (!isGV(gv2)) { 1738 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE); 1739 /* adjust ${"::_<newfilename"} to store the new file name */ 1740 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2); 1741 /* The line number may differ. If that is the case, 1742 alias the saved lines that are in the array. 1743 Otherwise alias the whole array. */ 1744 if (CopLINE(PL_curcop) == line_num) { 1745 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv))); 1746 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv))); 1747 } 1748 else if (GvAV(cfgv)) { 1749 AV * const av = GvAV(cfgv); 1750 const I32 start = CopLINE(PL_curcop)+1; 1751 I32 items = AvFILLp(av) - start; 1752 if (items > 0) { 1753 AV * const av2 = GvAVn(gv2); 1754 SV **svp = AvARRAY(av) + start; 1755 I32 l = (I32)line_num+1; 1756 while (items--) 1757 av_store(av2, l++, SvREFCNT_inc(*svp++)); 1758 } 1759 } 1760 } 1761 1762 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2); 1763 } 1764 } 1765 CopFILE_free(PL_curcop); 1766 CopFILE_setn(PL_curcop, s, len); 1767 } 1768 CopLINE_set(PL_curcop, line_num); 1769 } 1770 1771 #define skipspace(s) skipspace_flags(s, 0) 1772 1773 1774 STATIC void 1775 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) 1776 { 1777 AV *av = CopFILEAVx(PL_curcop); 1778 if (av) { 1779 SV * sv; 1780 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG); 1781 else { 1782 sv = *av_fetch(av, 0, 1); 1783 SvUPGRADE(sv, SVt_PVMG); 1784 } 1785 if (!SvPOK(sv)) sv_setpvs(sv,""); 1786 if (orig_sv) 1787 sv_catsv(sv, orig_sv); 1788 else 1789 sv_catpvn(sv, buf, len); 1790 if (!SvIOK(sv)) { 1791 (void)SvIOK_on(sv); 1792 SvIV_set(sv, 0); 1793 } 1794 if (PL_parser->preambling == NOLINE) 1795 av_store(av, CopLINE(PL_curcop), sv); 1796 } 1797 } 1798 1799 /* 1800 * S_skipspace 1801 * Called to gobble the appropriate amount and type of whitespace. 1802 * Skips comments as well. 1803 */ 1804 1805 STATIC char * 1806 S_skipspace_flags(pTHX_ char *s, U32 flags) 1807 { 1808 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS; 1809 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 1810 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s)) 1811 s++; 1812 } else { 1813 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr); 1814 PL_bufptr = s; 1815 lex_read_space(flags | LEX_KEEP_PREVIOUS | 1816 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ? 1817 LEX_NO_NEXT_CHUNK : 0)); 1818 s = PL_bufptr; 1819 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos; 1820 if (PL_linestart > PL_bufptr) 1821 PL_bufptr = PL_linestart; 1822 return s; 1823 } 1824 return s; 1825 } 1826 1827 /* 1828 * S_check_uni 1829 * Check the unary operators to ensure there's no ambiguity in how they're 1830 * used. An ambiguous piece of code would be: 1831 * rand + 5 1832 * This doesn't mean rand() + 5. Because rand() is a unary operator, 1833 * the +5 is its argument. 1834 */ 1835 1836 STATIC void 1837 S_check_uni(pTHX) 1838 { 1839 const char *s; 1840 const char *t; 1841 1842 if (PL_oldoldbufptr != PL_last_uni) 1843 return; 1844 while (isSPACE(*PL_last_uni)) 1845 PL_last_uni++; 1846 s = PL_last_uni; 1847 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-') 1848 s += UTF ? UTF8SKIP(s) : 1; 1849 if ((t = strchr(s, '(')) && t < PL_bufptr) 1850 return; 1851 1852 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 1853 "Warning: Use of \"%"UTF8f"\" without parentheses is ambiguous", 1854 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni)); 1855 } 1856 1857 /* 1858 * LOP : macro to build a list operator. Its behaviour has been replaced 1859 * with a subroutine, S_lop() for which LOP is just another name. 1860 */ 1861 1862 #define LOP(f,x) return lop(f,x,s) 1863 1864 /* 1865 * S_lop 1866 * Build a list operator (or something that might be one). The rules: 1867 * - if we have a next token, then it's a list operator (no parens) for 1868 * which the next token has already been parsed; e.g., 1869 * sort foo @args 1870 * sort foo (@args) 1871 * - if the next thing is an opening paren, then it's a function 1872 * - else it's a list operator 1873 */ 1874 1875 STATIC I32 1876 S_lop(pTHX_ I32 f, int x, char *s) 1877 { 1878 PERL_ARGS_ASSERT_LOP; 1879 1880 pl_yylval.ival = f; 1881 CLINE; 1882 PL_bufptr = s; 1883 PL_last_lop = PL_oldbufptr; 1884 PL_last_lop_op = (OPCODE)f; 1885 if (PL_nexttoke) 1886 goto lstop; 1887 PL_expect = x; 1888 if (*s == '(') 1889 return REPORT(FUNC); 1890 s = skipspace(s); 1891 if (*s == '(') 1892 return REPORT(FUNC); 1893 else { 1894 lstop: 1895 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 1896 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 1897 return REPORT(LSTOP); 1898 } 1899 } 1900 1901 /* 1902 * S_force_next 1903 * When the lexer realizes it knows the next token (for instance, 1904 * it is reordering tokens for the parser) then it can call S_force_next 1905 * to know what token to return the next time the lexer is called. Caller 1906 * will need to set PL_nextval[] and possibly PL_expect to ensure 1907 * the lexer handles the token correctly. 1908 */ 1909 1910 STATIC void 1911 S_force_next(pTHX_ I32 type) 1912 { 1913 #ifdef DEBUGGING 1914 if (DEBUG_T_TEST) { 1915 PerlIO_printf(Perl_debug_log, "### forced token:\n"); 1916 tokereport(type, &NEXTVAL_NEXTTOKE); 1917 } 1918 #endif 1919 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype)); 1920 PL_nexttype[PL_nexttoke] = type; 1921 PL_nexttoke++; 1922 if (PL_lex_state != LEX_KNOWNEXT) { 1923 PL_lex_defer = PL_lex_state; 1924 PL_lex_state = LEX_KNOWNEXT; 1925 } 1926 } 1927 1928 /* 1929 * S_postderef 1930 * 1931 * This subroutine handles postfix deref syntax after the arrow has already 1932 * been emitted. @* $* etc. are emitted as two separate token right here. 1933 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits 1934 * only the first, leaving yylex to find the next. 1935 */ 1936 1937 static int 1938 S_postderef(pTHX_ int const funny, char const next) 1939 { 1940 assert(funny == DOLSHARP || strchr("$@%&*", funny)); 1941 assert(strchr("*[{", next)); 1942 if (next == '*') { 1943 PL_expect = XOPERATOR; 1944 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 1945 assert('@' == funny || '$' == funny || DOLSHARP == funny); 1946 PL_lex_state = LEX_INTERPEND; 1947 force_next(POSTJOIN); 1948 } 1949 force_next(next); 1950 PL_bufptr+=2; 1951 } 1952 else { 1953 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL 1954 && !PL_lex_brackets) 1955 PL_lex_dojoin = 2; 1956 PL_expect = XOPERATOR; 1957 PL_bufptr++; 1958 } 1959 return funny; 1960 } 1961 1962 void 1963 Perl_yyunlex(pTHX) 1964 { 1965 int yyc = PL_parser->yychar; 1966 if (yyc != YYEMPTY) { 1967 if (yyc) { 1968 NEXTVAL_NEXTTOKE = PL_parser->yylval; 1969 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) { 1970 PL_lex_allbrackets--; 1971 PL_lex_brackets--; 1972 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16); 1973 } else if (yyc == '('/*)*/) { 1974 PL_lex_allbrackets--; 1975 yyc |= (2<<24); 1976 } 1977 force_next(yyc); 1978 } 1979 PL_parser->yychar = YYEMPTY; 1980 } 1981 } 1982 1983 STATIC SV * 1984 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) 1985 { 1986 SV * const sv = newSVpvn_utf8(start, len, 1987 !IN_BYTES 1988 && UTF 1989 && !is_invariant_string((const U8*)start, len) 1990 && is_utf8_string((const U8*)start, len)); 1991 return sv; 1992 } 1993 1994 /* 1995 * S_force_word 1996 * When the lexer knows the next thing is a word (for instance, it has 1997 * just seen -> and it knows that the next char is a word char, then 1998 * it calls S_force_word to stick the next word into the PL_nexttoke/val 1999 * lookahead. 2000 * 2001 * Arguments: 2002 * char *start : buffer position (must be within PL_linestr) 2003 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD) 2004 * int check_keyword : if true, Perl checks to make sure the word isn't 2005 * a keyword (do this if the word is a label, e.g. goto FOO) 2006 * int allow_pack : if true, : characters will also be allowed (require, 2007 * use, etc. do this) 2008 */ 2009 2010 STATIC char * 2011 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) 2012 { 2013 char *s; 2014 STRLEN len; 2015 2016 PERL_ARGS_ASSERT_FORCE_WORD; 2017 2018 start = skipspace(start); 2019 s = start; 2020 if (isIDFIRST_lazy_if(s,UTF) 2021 || (allow_pack && *s == ':') ) 2022 { 2023 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len); 2024 if (check_keyword) { 2025 char *s2 = PL_tokenbuf; 2026 STRLEN len2 = len; 2027 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6)) 2028 s2 += 6, len2 -= 6; 2029 if (keyword(s2, len2, 0)) 2030 return start; 2031 } 2032 if (token == METHOD) { 2033 s = skipspace(s); 2034 if (*s == '(') 2035 PL_expect = XTERM; 2036 else { 2037 PL_expect = XOPERATOR; 2038 } 2039 } 2040 NEXTVAL_NEXTTOKE.opval 2041 = (OP*)newSVOP(OP_CONST,0, 2042 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 2043 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 2044 force_next(token); 2045 } 2046 return s; 2047 } 2048 2049 /* 2050 * S_force_ident 2051 * Called when the lexer wants $foo *foo &foo etc, but the program 2052 * text only contains the "foo" portion. The first argument is a pointer 2053 * to the "foo", and the second argument is the type symbol to prefix. 2054 * Forces the next token to be a "WORD". 2055 * Creates the symbol if it didn't already exist (via gv_fetchpv()). 2056 */ 2057 2058 STATIC void 2059 S_force_ident(pTHX_ const char *s, int kind) 2060 { 2061 PERL_ARGS_ASSERT_FORCE_IDENT; 2062 2063 if (s[0]) { 2064 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */ 2065 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len, 2066 UTF ? SVf_UTF8 : 0)); 2067 NEXTVAL_NEXTTOKE.opval = o; 2068 force_next(WORD); 2069 if (kind) { 2070 o->op_private = OPpCONST_ENTERED; 2071 /* XXX see note in pp_entereval() for why we forgo typo 2072 warnings if the symbol must be introduced in an eval. 2073 GSAR 96-10-12 */ 2074 gv_fetchpvn_flags(s, len, 2075 (PL_in_eval ? GV_ADDMULTI 2076 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), 2077 kind == '$' ? SVt_PV : 2078 kind == '@' ? SVt_PVAV : 2079 kind == '%' ? SVt_PVHV : 2080 SVt_PVGV 2081 ); 2082 } 2083 } 2084 } 2085 2086 static void 2087 S_force_ident_maybe_lex(pTHX_ char pit) 2088 { 2089 NEXTVAL_NEXTTOKE.ival = pit; 2090 force_next('p'); 2091 } 2092 2093 NV 2094 Perl_str_to_version(pTHX_ SV *sv) 2095 { 2096 NV retval = 0.0; 2097 NV nshift = 1.0; 2098 STRLEN len; 2099 const char *start = SvPV_const(sv,len); 2100 const char * const end = start + len; 2101 const bool utf = SvUTF8(sv) ? TRUE : FALSE; 2102 2103 PERL_ARGS_ASSERT_STR_TO_VERSION; 2104 2105 while (start < end) { 2106 STRLEN skip; 2107 UV n; 2108 if (utf) 2109 n = utf8n_to_uvchr((U8*)start, len, &skip, 0); 2110 else { 2111 n = *(U8*)start; 2112 skip = 1; 2113 } 2114 retval += ((NV)n)/nshift; 2115 start += skip; 2116 nshift *= 1000; 2117 } 2118 return retval; 2119 } 2120 2121 /* 2122 * S_force_version 2123 * Forces the next token to be a version number. 2124 * If the next token appears to be an invalid version number, (e.g. "v2b"), 2125 * and if "guessing" is TRUE, then no new token is created (and the caller 2126 * must use an alternative parsing method). 2127 */ 2128 2129 STATIC char * 2130 S_force_version(pTHX_ char *s, int guessing) 2131 { 2132 OP *version = NULL; 2133 char *d; 2134 2135 PERL_ARGS_ASSERT_FORCE_VERSION; 2136 2137 s = skipspace(s); 2138 2139 d = s; 2140 if (*d == 'v') 2141 d++; 2142 if (isDIGIT(*d)) { 2143 while (isDIGIT(*d) || *d == '_' || *d == '.') 2144 d++; 2145 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) { 2146 SV *ver; 2147 s = scan_num(s, &pl_yylval); 2148 version = pl_yylval.opval; 2149 ver = cSVOPx(version)->op_sv; 2150 if (SvPOK(ver) && !SvNIOK(ver)) { 2151 SvUPGRADE(ver, SVt_PVNV); 2152 SvNV_set(ver, str_to_version(ver)); 2153 SvNOK_on(ver); /* hint that it is a version */ 2154 } 2155 } 2156 else if (guessing) { 2157 return s; 2158 } 2159 } 2160 2161 /* NOTE: The parser sees the package name and the VERSION swapped */ 2162 NEXTVAL_NEXTTOKE.opval = version; 2163 force_next(WORD); 2164 2165 return s; 2166 } 2167 2168 /* 2169 * S_force_strict_version 2170 * Forces the next token to be a version number using strict syntax rules. 2171 */ 2172 2173 STATIC char * 2174 S_force_strict_version(pTHX_ char *s) 2175 { 2176 OP *version = NULL; 2177 const char *errstr = NULL; 2178 2179 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION; 2180 2181 while (isSPACE(*s)) /* leading whitespace */ 2182 s++; 2183 2184 if (is_STRICT_VERSION(s,&errstr)) { 2185 SV *ver = newSV(0); 2186 s = (char *)scan_version(s, ver, 0); 2187 version = newSVOP(OP_CONST, 0, ver); 2188 } 2189 else if ((*s != ';' && *s != '{' && *s != '}' ) 2190 && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' ))) 2191 { 2192 PL_bufptr = s; 2193 if (errstr) 2194 yyerror(errstr); /* version required */ 2195 return s; 2196 } 2197 2198 /* NOTE: The parser sees the package name and the VERSION swapped */ 2199 NEXTVAL_NEXTTOKE.opval = version; 2200 force_next(WORD); 2201 2202 return s; 2203 } 2204 2205 /* 2206 * S_tokeq 2207 * Tokenize a quoted string passed in as an SV. It finds the next 2208 * chunk, up to end of string or a backslash. It may make a new 2209 * SV containing that chunk (if HINT_NEW_STRING is on). It also 2210 * turns \\ into \. 2211 */ 2212 2213 STATIC SV * 2214 S_tokeq(pTHX_ SV *sv) 2215 { 2216 char *s; 2217 char *send; 2218 char *d; 2219 SV *pv = sv; 2220 2221 PERL_ARGS_ASSERT_TOKEQ; 2222 2223 assert (SvPOK(sv)); 2224 assert (SvLEN(sv)); 2225 assert (!SvIsCOW(sv)); 2226 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */ 2227 goto finish; 2228 s = SvPVX(sv); 2229 send = SvEND(sv); 2230 /* This is relying on the SV being "well formed" with a trailing '\0' */ 2231 while (s < send && !(*s == '\\' && s[1] == '\\')) 2232 s++; 2233 if (s == send) 2234 goto finish; 2235 d = s; 2236 if ( PL_hints & HINT_NEW_STRING ) { 2237 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv), 2238 SVs_TEMP | SvUTF8(sv)); 2239 } 2240 while (s < send) { 2241 if (*s == '\\') { 2242 if (s + 1 < send && (s[1] == '\\')) 2243 s++; /* all that, just for this */ 2244 } 2245 *d++ = *s++; 2246 } 2247 *d = '\0'; 2248 SvCUR_set(sv, d - SvPVX_const(sv)); 2249 finish: 2250 if ( PL_hints & HINT_NEW_STRING ) 2251 return new_constant(NULL, 0, "q", sv, pv, "q", 1); 2252 return sv; 2253 } 2254 2255 /* 2256 * Now come three functions related to double-quote context, 2257 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when 2258 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They 2259 * interact with PL_lex_state, and create fake ( ... ) argument lists 2260 * to handle functions and concatenation. 2261 * For example, 2262 * "foo\lbar" 2263 * is tokenised as 2264 * stringify ( const[foo] concat lcfirst ( const[bar] ) ) 2265 */ 2266 2267 /* 2268 * S_sublex_start 2269 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST). 2270 * 2271 * Pattern matching will set PL_lex_op to the pattern-matching op to 2272 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise). 2273 * 2274 * OP_CONST and OP_READLINE are easy--just make the new op and return. 2275 * 2276 * Everything else becomes a FUNC. 2277 * 2278 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we 2279 * had an OP_CONST or OP_READLINE). This just sets us up for a 2280 * call to S_sublex_push(). 2281 */ 2282 2283 STATIC I32 2284 S_sublex_start(pTHX) 2285 { 2286 const I32 op_type = pl_yylval.ival; 2287 2288 if (op_type == OP_NULL) { 2289 pl_yylval.opval = PL_lex_op; 2290 PL_lex_op = NULL; 2291 return THING; 2292 } 2293 if (op_type == OP_CONST) { 2294 SV *sv = PL_lex_stuff; 2295 PL_lex_stuff = NULL; 2296 sv = tokeq(sv); 2297 2298 if (SvTYPE(sv) == SVt_PVIV) { 2299 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */ 2300 STRLEN len; 2301 const char * const p = SvPV_const(sv, len); 2302 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv)); 2303 SvREFCNT_dec(sv); 2304 sv = nsv; 2305 } 2306 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv); 2307 return THING; 2308 } 2309 2310 PL_sublex_info.super_state = PL_lex_state; 2311 PL_sublex_info.sub_inwhat = (U16)op_type; 2312 PL_sublex_info.sub_op = PL_lex_op; 2313 PL_lex_state = LEX_INTERPPUSH; 2314 2315 PL_expect = XTERM; 2316 if (PL_lex_op) { 2317 pl_yylval.opval = PL_lex_op; 2318 PL_lex_op = NULL; 2319 return PMFUNC; 2320 } 2321 else 2322 return FUNC; 2323 } 2324 2325 /* 2326 * S_sublex_push 2327 * Create a new scope to save the lexing state. The scope will be 2328 * ended in S_sublex_done. Returns a '(', starting the function arguments 2329 * to the uc, lc, etc. found before. 2330 * Sets PL_lex_state to LEX_INTERPCONCAT. 2331 */ 2332 2333 STATIC I32 2334 S_sublex_push(pTHX) 2335 { 2336 LEXSHARED *shared; 2337 const bool is_heredoc = PL_multi_close == '<'; 2338 ENTER; 2339 2340 PL_lex_state = PL_sublex_info.super_state; 2341 SAVEI8(PL_lex_dojoin); 2342 SAVEI32(PL_lex_brackets); 2343 SAVEI32(PL_lex_allbrackets); 2344 SAVEI32(PL_lex_formbrack); 2345 SAVEI8(PL_lex_fakeeof); 2346 SAVEI32(PL_lex_casemods); 2347 SAVEI32(PL_lex_starts); 2348 SAVEI8(PL_lex_state); 2349 SAVEI8(PL_lex_defer); 2350 SAVESPTR(PL_lex_repl); 2351 SAVEVPTR(PL_lex_inpat); 2352 SAVEI16(PL_lex_inwhat); 2353 if (is_heredoc) 2354 { 2355 SAVECOPLINE(PL_curcop); 2356 SAVEI32(PL_multi_end); 2357 SAVEI32(PL_parser->herelines); 2358 PL_parser->herelines = 0; 2359 } 2360 SAVEI8(PL_multi_close); 2361 SAVEPPTR(PL_bufptr); 2362 SAVEPPTR(PL_bufend); 2363 SAVEPPTR(PL_oldbufptr); 2364 SAVEPPTR(PL_oldoldbufptr); 2365 SAVEPPTR(PL_last_lop); 2366 SAVEPPTR(PL_last_uni); 2367 SAVEPPTR(PL_linestart); 2368 SAVESPTR(PL_linestr); 2369 SAVEGENERICPV(PL_lex_brackstack); 2370 SAVEGENERICPV(PL_lex_casestack); 2371 SAVEGENERICPV(PL_parser->lex_shared); 2372 SAVEBOOL(PL_parser->lex_re_reparsing); 2373 SAVEI32(PL_copline); 2374 2375 /* The here-doc parser needs to be able to peek into outer lexing 2376 scopes to find the body of the here-doc. So we put PL_linestr and 2377 PL_bufptr into lex_shared, to ‘share’ those values. 2378 */ 2379 PL_parser->lex_shared->ls_linestr = PL_linestr; 2380 PL_parser->lex_shared->ls_bufptr = PL_bufptr; 2381 2382 PL_linestr = PL_lex_stuff; 2383 PL_lex_repl = PL_sublex_info.repl; 2384 PL_lex_stuff = NULL; 2385 PL_sublex_info.repl = NULL; 2386 2387 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets 2388 set for an inner quote-like operator and then an error causes scope- 2389 popping. We must not have a PL_lex_stuff value left dangling, as 2390 that breaks assumptions elsewhere. See bug #123617. */ 2391 SAVEGENERICSV(PL_lex_stuff); 2392 SAVEGENERICSV(PL_sublex_info.repl); 2393 2394 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart 2395 = SvPVX(PL_linestr); 2396 PL_bufend += SvCUR(PL_linestr); 2397 PL_last_lop = PL_last_uni = NULL; 2398 SAVEFREESV(PL_linestr); 2399 if (PL_lex_repl) SAVEFREESV(PL_lex_repl); 2400 2401 PL_lex_dojoin = FALSE; 2402 PL_lex_brackets = PL_lex_formbrack = 0; 2403 PL_lex_allbrackets = 0; 2404 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2405 Newx(PL_lex_brackstack, 120, char); 2406 Newx(PL_lex_casestack, 12, char); 2407 PL_lex_casemods = 0; 2408 *PL_lex_casestack = '\0'; 2409 PL_lex_starts = 0; 2410 PL_lex_state = LEX_INTERPCONCAT; 2411 if (is_heredoc) 2412 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 2413 PL_copline = NOLINE; 2414 2415 Newxz(shared, 1, LEXSHARED); 2416 shared->ls_prev = PL_parser->lex_shared; 2417 PL_parser->lex_shared = shared; 2418 2419 PL_lex_inwhat = PL_sublex_info.sub_inwhat; 2420 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS; 2421 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST) 2422 PL_lex_inpat = PL_sublex_info.sub_op; 2423 else 2424 PL_lex_inpat = NULL; 2425 2426 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING); 2427 PL_in_eval &= ~EVAL_RE_REPARSING; 2428 2429 return '('; 2430 } 2431 2432 /* 2433 * S_sublex_done 2434 * Restores lexer state after a S_sublex_push. 2435 */ 2436 2437 STATIC I32 2438 S_sublex_done(pTHX) 2439 { 2440 if (!PL_lex_starts++) { 2441 SV * const sv = newSVpvs(""); 2442 if (SvUTF8(PL_linestr)) 2443 SvUTF8_on(sv); 2444 PL_expect = XOPERATOR; 2445 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 2446 return THING; 2447 } 2448 2449 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */ 2450 PL_lex_state = LEX_INTERPCASEMOD; 2451 return yylex(); 2452 } 2453 2454 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */ 2455 assert(PL_lex_inwhat != OP_TRANSR); 2456 if (PL_lex_repl) { 2457 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS); 2458 PL_linestr = PL_lex_repl; 2459 PL_lex_inpat = 0; 2460 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); 2461 PL_bufend += SvCUR(PL_linestr); 2462 PL_last_lop = PL_last_uni = NULL; 2463 PL_lex_dojoin = FALSE; 2464 PL_lex_brackets = 0; 2465 PL_lex_allbrackets = 0; 2466 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2467 PL_lex_casemods = 0; 2468 *PL_lex_casestack = '\0'; 2469 PL_lex_starts = 0; 2470 if (SvEVALED(PL_lex_repl)) { 2471 PL_lex_state = LEX_INTERPNORMAL; 2472 PL_lex_starts++; 2473 /* we don't clear PL_lex_repl here, so that we can check later 2474 whether this is an evalled subst; that means we rely on the 2475 logic to ensure sublex_done() is called again only via the 2476 branch (in yylex()) that clears PL_lex_repl, else we'll loop */ 2477 } 2478 else { 2479 PL_lex_state = LEX_INTERPCONCAT; 2480 PL_lex_repl = NULL; 2481 } 2482 if (SvTYPE(PL_linestr) >= SVt_PVNV) { 2483 CopLINE(PL_curcop) += 2484 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow 2485 + PL_parser->herelines; 2486 PL_parser->herelines = 0; 2487 } 2488 return '/'; 2489 } 2490 else { 2491 const line_t l = CopLINE(PL_curcop); 2492 LEAVE; 2493 if (PL_multi_close == '<') 2494 PL_parser->herelines += l - PL_multi_end; 2495 PL_bufend = SvPVX(PL_linestr); 2496 PL_bufend += SvCUR(PL_linestr); 2497 PL_expect = XOPERATOR; 2498 return ')'; 2499 } 2500 } 2501 2502 PERL_STATIC_INLINE SV* 2503 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) 2504 { 2505 /* <s> points to first character of interior of \N{}, <e> to one beyond the 2506 * interior, hence to the "}". Finds what the name resolves to, returning 2507 * an SV* containing it; NULL if no valid one found */ 2508 2509 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0); 2510 2511 HV * table; 2512 SV **cvp; 2513 SV *cv; 2514 SV *rv; 2515 HV *stash; 2516 const U8* first_bad_char_loc; 2517 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */ 2518 2519 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; 2520 2521 if (!SvCUR(res)) { 2522 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), 2523 "Unknown charname '' is deprecated"); 2524 return res; 2525 } 2526 2527 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr, 2528 e - backslash_ptr, 2529 &first_bad_char_loc)) 2530 { 2531 /* If warnings are on, this will print a more detailed analysis of what 2532 * is wrong than the error message below */ 2533 utf8n_to_uvchr(first_bad_char_loc, 2534 e - ((char *) first_bad_char_loc), 2535 NULL, 0); 2536 2537 /* We deliberately don't try to print the malformed character, which 2538 * might not print very well; it also may be just the first of many 2539 * malformations, so don't print what comes after it */ 2540 yyerror_pv(Perl_form(aTHX_ 2541 "Malformed UTF-8 character immediately after '%.*s'", 2542 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr), 2543 SVf_UTF8); 2544 return NULL; 2545 } 2546 2547 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr, 2548 /* include the <}> */ 2549 e - backslash_ptr + 1); 2550 if (! SvPOK(res)) { 2551 SvREFCNT_dec_NN(res); 2552 return NULL; 2553 } 2554 2555 /* See if the charnames handler is the Perl core's, and if so, we can skip 2556 * the validation needed for a user-supplied one, as Perl's does its own 2557 * validation. */ 2558 table = GvHV(PL_hintgv); /* ^H */ 2559 cvp = hv_fetchs(table, "charnames", FALSE); 2560 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv), 2561 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL)) 2562 { 2563 const char * const name = HvNAME(stash); 2564 if (HvNAMELEN(stash) == sizeof("_charnames")-1 2565 && strEQ(name, "_charnames")) { 2566 return res; 2567 } 2568 } 2569 2570 /* Here, it isn't Perl's charname handler. We can't rely on a 2571 * user-supplied handler to validate the input name. For non-ut8 input, 2572 * look to see that the first character is legal. Then loop through the 2573 * rest checking that each is a continuation */ 2574 2575 /* This code makes the reasonable assumption that the only Latin1-range 2576 * characters that begin a character name alias are alphabetic, otherwise 2577 * would have to create a isCHARNAME_BEGIN macro */ 2578 2579 if (! UTF) { 2580 if (! isALPHAU(*s)) { 2581 goto bad_charname; 2582 } 2583 s++; 2584 while (s < e) { 2585 if (! isCHARNAME_CONT(*s)) { 2586 goto bad_charname; 2587 } 2588 if (*s == ' ' && *(s-1) == ' ') { 2589 goto multi_spaces; 2590 } 2591 if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) { 2592 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 2593 "NO-BREAK SPACE in a charnames " 2594 "alias definition is deprecated"); 2595 } 2596 s++; 2597 } 2598 } 2599 else { 2600 /* Similarly for utf8. For invariants can check directly; for other 2601 * Latin1, can calculate their code point and check; otherwise use a 2602 * swash */ 2603 if (UTF8_IS_INVARIANT(*s)) { 2604 if (! isALPHAU(*s)) { 2605 goto bad_charname; 2606 } 2607 s++; 2608 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2609 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) { 2610 goto bad_charname; 2611 } 2612 s += 2; 2613 } 2614 else { 2615 if (! PL_utf8_charname_begin) { 2616 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; 2617 PL_utf8_charname_begin = _core_swash_init("utf8", 2618 "_Perl_Charname_Begin", 2619 &PL_sv_undef, 2620 1, 0, NULL, &flags); 2621 } 2622 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) { 2623 goto bad_charname; 2624 } 2625 s += UTF8SKIP(s); 2626 } 2627 2628 while (s < e) { 2629 if (UTF8_IS_INVARIANT(*s)) { 2630 if (! isCHARNAME_CONT(*s)) { 2631 goto bad_charname; 2632 } 2633 if (*s == ' ' && *(s-1) == ' ') { 2634 goto multi_spaces; 2635 } 2636 s++; 2637 } 2638 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2639 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) 2640 { 2641 goto bad_charname; 2642 } 2643 if (*s == *NBSP_UTF8 2644 && *(s+1) == *(NBSP_UTF8+1) 2645 && ckWARN_d(WARN_DEPRECATED)) 2646 { 2647 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 2648 "NO-BREAK SPACE in a charnames " 2649 "alias definition is deprecated"); 2650 } 2651 s += 2; 2652 } 2653 else { 2654 if (! PL_utf8_charname_continue) { 2655 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; 2656 PL_utf8_charname_continue = _core_swash_init("utf8", 2657 "_Perl_Charname_Continue", 2658 &PL_sv_undef, 2659 1, 0, NULL, &flags); 2660 } 2661 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) { 2662 goto bad_charname; 2663 } 2664 s += UTF8SKIP(s); 2665 } 2666 } 2667 } 2668 if (*(s-1) == ' ') { 2669 yyerror_pv( 2670 Perl_form(aTHX_ 2671 "charnames alias definitions may not contain trailing " 2672 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s", 2673 (int)(s - backslash_ptr + 1), backslash_ptr, 2674 (int)(e - s + 1), s + 1 2675 ), 2676 UTF ? SVf_UTF8 : 0); 2677 return NULL; 2678 } 2679 2680 if (SvUTF8(res)) { /* Don't accept malformed input */ 2681 const U8* first_bad_char_loc; 2682 STRLEN len; 2683 const char* const str = SvPV_const(res, len); 2684 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) { 2685 /* If warnings are on, this will print a more detailed analysis of 2686 * what is wrong than the error message below */ 2687 utf8n_to_uvchr(first_bad_char_loc, 2688 (char *) first_bad_char_loc - str, 2689 NULL, 0); 2690 2691 /* We deliberately don't try to print the malformed character, 2692 * which might not print very well; it also may be just the first 2693 * of many malformations, so don't print what comes after it */ 2694 yyerror_pv( 2695 Perl_form(aTHX_ 2696 "Malformed UTF-8 returned by %.*s immediately after '%.*s'", 2697 (int) (e - backslash_ptr + 1), backslash_ptr, 2698 (int) ((char *) first_bad_char_loc - str), str 2699 ), 2700 SVf_UTF8); 2701 return NULL; 2702 } 2703 } 2704 2705 return res; 2706 2707 bad_charname: { 2708 2709 /* The final %.*s makes sure that should the trailing NUL be missing 2710 * that this print won't run off the end of the string */ 2711 yyerror_pv( 2712 Perl_form(aTHX_ 2713 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s", 2714 (int)(s - backslash_ptr + 1), backslash_ptr, 2715 (int)(e - s + 1), s + 1 2716 ), 2717 UTF ? SVf_UTF8 : 0); 2718 return NULL; 2719 } 2720 2721 multi_spaces: 2722 yyerror_pv( 2723 Perl_form(aTHX_ 2724 "charnames alias definitions may not contain a sequence of " 2725 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s", 2726 (int)(s - backslash_ptr + 1), backslash_ptr, 2727 (int)(e - s + 1), s + 1 2728 ), 2729 UTF ? SVf_UTF8 : 0); 2730 return NULL; 2731 } 2732 2733 /* 2734 scan_const 2735 2736 Extracts the next constant part of a pattern, double-quoted string, 2737 or transliteration. This is terrifying code. 2738 2739 For example, in parsing the double-quoted string "ab\x63$d", it would 2740 stop at the '$' and return an OP_CONST containing 'abc'. 2741 2742 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's 2743 processing a pattern (PL_lex_inpat is true), a transliteration 2744 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string. 2745 2746 Returns a pointer to the character scanned up to. If this is 2747 advanced from the start pointer supplied (i.e. if anything was 2748 successfully parsed), will leave an OP_CONST for the substring scanned 2749 in pl_yylval. Caller must intuit reason for not parsing further 2750 by looking at the next characters herself. 2751 2752 In patterns: 2753 expand: 2754 \N{FOO} => \N{U+hex_for_character_FOO} 2755 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...}) 2756 2757 pass through: 2758 all other \-char, including \N and \N{ apart from \N{ABC} 2759 2760 stops on: 2761 @ and $ where it appears to be a var, but not for $ as tail anchor 2762 \l \L \u \U \Q \E 2763 (?{ or (??{ 2764 2765 In transliterations: 2766 characters are VERY literal, except for - not at the start or end 2767 of the string, which indicates a range. If the range is in bytes, 2768 scan_const expands the range to the full set of intermediate 2769 characters. If the range is in utf8, the hyphen is replaced with 2770 a certain range mark which will be handled by pmtrans() in op.c. 2771 2772 In double-quoted strings: 2773 backslashes: 2774 double-quoted style: \r and \n 2775 constants: \x31, etc. 2776 deprecated backrefs: \1 (in substitution replacements) 2777 case and quoting: \U \Q \E 2778 stops on @ and $ 2779 2780 scan_const does *not* construct ops to handle interpolated strings. 2781 It stops processing as soon as it finds an embedded $ or @ variable 2782 and leaves it to the caller to work out what's going on. 2783 2784 embedded arrays (whether in pattern or not) could be: 2785 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-. 2786 2787 $ in double-quoted strings must be the symbol of an embedded scalar. 2788 2789 $ in pattern could be $foo or could be tail anchor. Assumption: 2790 it's a tail anchor if $ is the last thing in the string, or if it's 2791 followed by one of "()| \r\n\t" 2792 2793 \1 (backreferences) are turned into $1 in substitutions 2794 2795 The structure of the code is 2796 while (there's a character to process) { 2797 handle transliteration ranges 2798 skip regexp comments /(?#comment)/ and codes /(?{code})/ 2799 skip #-initiated comments in //x patterns 2800 check for embedded arrays 2801 check for embedded scalars 2802 if (backslash) { 2803 deprecate \1 in substitution replacements 2804 handle string-changing backslashes \l \U \Q \E, etc. 2805 switch (what was escaped) { 2806 handle \- in a transliteration (becomes a literal -) 2807 if a pattern and not \N{, go treat as regular character 2808 handle \132 (octal characters) 2809 handle \x15 and \x{1234} (hex characters) 2810 handle \N{name} (named characters, also \N{3,5} in a pattern) 2811 handle \cV (control characters) 2812 handle printf-style backslashes (\f, \r, \n, etc) 2813 } (end switch) 2814 continue 2815 } (end if backslash) 2816 handle regular character 2817 } (end while character to read) 2818 2819 */ 2820 2821 STATIC char * 2822 S_scan_const(pTHX_ char *start) 2823 { 2824 char *send = PL_bufend; /* end of the constant */ 2825 SV *sv = newSV(send - start); /* sv for the constant. See note below 2826 on sizing. */ 2827 char *s = start; /* start of the constant */ 2828 char *d = SvPVX(sv); /* destination for copies */ 2829 bool dorange = FALSE; /* are we in a translit range? */ 2830 bool didrange = FALSE; /* did we just finish a range? */ 2831 bool in_charclass = FALSE; /* within /[...]/ */ 2832 bool has_utf8 = FALSE; /* Output constant is UTF8 */ 2833 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be 2834 UTF8? But, this can show as true 2835 when the source isn't utf8, as for 2836 example when it is entirely composed 2837 of hex constants */ 2838 SV *res; /* result from charnames */ 2839 STRLEN offset_to_max; /* The offset in the output to where the range 2840 high-end character is temporarily placed */ 2841 2842 /* Note on sizing: The scanned constant is placed into sv, which is 2843 * initialized by newSV() assuming one byte of output for every byte of 2844 * input. This routine expects newSV() to allocate an extra byte for a 2845 * trailing NUL, which this routine will append if it gets to the end of 2846 * the input. There may be more bytes of input than output (eg., \N{LATIN 2847 * CAPITAL LETTER A}), or more output than input if the constant ends up 2848 * recoded to utf8, but each time a construct is found that might increase 2849 * the needed size, SvGROW() is called. Its size parameter each time is 2850 * based on the best guess estimate at the time, namely the length used so 2851 * far, plus the length the current construct will occupy, plus room for 2852 * the trailing NUL, plus one byte for every input byte still unscanned */ 2853 2854 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses 2855 before set */ 2856 #ifdef EBCDIC 2857 int backslash_N = 0; /* ? was the character from \N{} */ 2858 int non_portable_endpoint = 0; /* ? In a range is an endpoint 2859 platform-specific like \x65 */ 2860 #endif 2861 2862 PERL_ARGS_ASSERT_SCAN_CONST; 2863 2864 assert(PL_lex_inwhat != OP_TRANSR); 2865 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { 2866 /* If we are doing a trans and we know we want UTF8 set expectation */ 2867 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF); 2868 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); 2869 } 2870 2871 /* Protect sv from errors and fatal warnings. */ 2872 ENTER_with_name("scan_const"); 2873 SAVEFREESV(sv); 2874 2875 while (s < send 2876 || dorange /* Handle tr/// range at right edge of input */ 2877 ) { 2878 2879 /* get transliterations out of the way (they're most literal) */ 2880 if (PL_lex_inwhat == OP_TRANS) { 2881 2882 /* But there isn't any special handling necessary unless there is a 2883 * range, so for most cases we just drop down and handle the value 2884 * as any other. There are two exceptions. 2885 * 2886 * 1. A minus sign indicates that we are actually going to have 2887 * a range. In this case, skip the '-', set a flag, then drop 2888 * down to handle what should be the end range value. 2889 * 2. After we've handled that value, the next time through, that 2890 * flag is set and we fix up the range. 2891 * 2892 * Ranges entirely within Latin1 are expanded out entirely, in 2893 * order to avoid the significant overhead of making a swash. 2894 * Ranges that extend above Latin1 have to have a swash, so there 2895 * is no advantage to abbreviate them here, so they are stored here 2896 * as Min, ILLEGAL_UTF8_BYTE, Max. The illegal byte signifies a 2897 * hyphen without any possible ambiguity. On EBCDIC machines, if 2898 * the range is expressed as Unicode, the Latin1 portion is 2899 * expanded out even if the entire range extends above Latin1. 2900 * This is because each code point in it has to be processed here 2901 * individually to get its native translation */ 2902 2903 if (! dorange) { 2904 2905 /* Here, we don't think we're in a range. If we've processed 2906 * at least one character, then see if this next one is a '-', 2907 * indicating the previous one was the start of a range. But 2908 * don't bother if we're too close to the end for the minus to 2909 * mean that. */ 2910 if (*s != '-' || s >= send - 1 || s == start) { 2911 2912 /* A regular character. Process like any other, but first 2913 * clear any flags */ 2914 didrange = FALSE; 2915 dorange = FALSE; 2916 #ifdef EBCDIC 2917 non_portable_endpoint = 0; 2918 backslash_N = 0; 2919 #endif 2920 /* Drops down to generic code to process current byte */ 2921 } 2922 else { 2923 if (didrange) { /* Something like y/A-C-Z// */ 2924 Perl_croak(aTHX_ "Ambiguous range in transliteration operator"); 2925 } 2926 2927 dorange = TRUE; 2928 2929 s++; /* Skip past the minus */ 2930 2931 /* d now points to where the end-range character will be 2932 * placed. Save it so won't have to go finding it later, 2933 * and drop down to get that character. (Actually we 2934 * instead save the offset, to handle the case where a 2935 * realloc in the meantime could change the actual 2936 * pointer). We'll finish processing the range the next 2937 * time through the loop */ 2938 offset_to_max = d - SvPVX_const(sv); 2939 } 2940 } /* End of not a range */ 2941 else { 2942 /* Here we have parsed a range. Now must handle it. At this 2943 * point: 2944 * 'sv' is a SV* that contains the output string we are 2945 * constructing. The final two characters in that string 2946 * are the range start and range end, in order. 2947 * 'd' points to just beyond the range end in the 'sv' string, 2948 * where we would next place something 2949 * 'offset_to_max' is the offset in 'sv' at which the character 2950 * before 'd' begins. 2951 */ 2952 const char * max_ptr = SvPVX_const(sv) + offset_to_max; 2953 const char * min_ptr; 2954 IV range_min; 2955 IV range_max; /* last character in range */ 2956 STRLEN save_offset; 2957 STRLEN grow; 2958 #ifndef EBCDIC /* Not meaningful except in EBCDIC, so initialize to false */ 2959 const bool convert_unicode = FALSE; 2960 const IV real_range_max = 0; 2961 #else 2962 bool convert_unicode; 2963 IV real_range_max = 0; 2964 #endif 2965 2966 /* Get the range-ends code point values. */ 2967 if (has_utf8) { 2968 /* We know the utf8 is valid, because we just constructed 2969 * it ourselves in previous loop iterations */ 2970 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1); 2971 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL); 2972 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL); 2973 } 2974 else { 2975 min_ptr = max_ptr - 1; 2976 range_min = * (U8*) min_ptr; 2977 range_max = * (U8*) max_ptr; 2978 } 2979 2980 #ifdef EBCDIC 2981 /* On EBCDIC platforms, we may have to deal with portable 2982 * ranges. These happen if at least one range endpoint is a 2983 * Unicode value (\N{...}), or if the range is a subset of 2984 * [A-Z] or [a-z], and both ends are literal characters, 2985 * like 'A', and not like \x{C1} */ 2986 if ((convert_unicode 2987 = cBOOL(backslash_N) /* \N{} forces Unicode, hence 2988 portable range */ 2989 || ( ! non_portable_endpoint 2990 && (( isLOWER_A(range_min) && isLOWER_A(range_max)) 2991 || (isUPPER_A(range_min) && isUPPER_A(range_max)))) 2992 )) { 2993 2994 /* Special handling is needed for these portable ranges. 2995 * They are defined to all be in Unicode terms, which 2996 * include all Unicode code points between the end points. 2997 * Convert to Unicode to get the Unicode range. Later we 2998 * will convert each code point in the range back to 2999 * native. */ 3000 range_min = NATIVE_TO_UNI(range_min); 3001 range_max = NATIVE_TO_UNI(range_max); 3002 } 3003 #endif 3004 3005 if (range_min > range_max) { 3006 if (convert_unicode) { 3007 /* Need to convert back to native for meaningful 3008 * messages for this platform */ 3009 range_min = UNI_TO_NATIVE(range_min); 3010 range_max = UNI_TO_NATIVE(range_max); 3011 } 3012 3013 /* Use the characters themselves for the error message if 3014 * ASCII printables; otherwise some visible representation 3015 * of them */ 3016 if (isPRINT_A(range_min) && isPRINT_A(range_max)) { 3017 Perl_croak(aTHX_ 3018 "Invalid range \"%c-%c\" in transliteration operator", 3019 (char)range_min, (char)range_max); 3020 } 3021 else if (convert_unicode) { 3022 /* diag_listed_as: Invalid range "%s" in transliteration operator */ 3023 Perl_croak(aTHX_ 3024 "Invalid range \"\\N{U+%04"UVXf"}-\\N{U+%04"UVXf"}\"" 3025 " in transliteration operator", 3026 range_min, range_max); 3027 } 3028 else { 3029 /* diag_listed_as: Invalid range "%s" in transliteration operator */ 3030 Perl_croak(aTHX_ 3031 "Invalid range \"\\x{%04"UVXf"}-\\x{%04"UVXf"}\"" 3032 " in transliteration operator", 3033 range_min, range_max); 3034 } 3035 } 3036 3037 if (has_utf8) { 3038 3039 /* We try to avoid creating a swash. If the upper end of 3040 * this range is below 256, this range won't force a swash; 3041 * otherwise it does force a swash, and as long as we have 3042 * to have one, we might as well not expand things out. 3043 * But if it's EBCDIC, we may have to look at each 3044 * character below 256 if we have to convert to/from 3045 * Unicode values */ 3046 if (range_max > 255 3047 #ifdef EBCDIC 3048 && (range_min > 255 || ! convert_unicode) 3049 #endif 3050 ) { 3051 /* Move the high character one byte to the right; then 3052 * insert between it and the range begin, an illegal 3053 * byte which serves to indicate this is a range (using 3054 * a '-' could be ambiguous). */ 3055 char *e = d++; 3056 while (e-- > max_ptr) { 3057 *(e + 1) = *e; 3058 } 3059 *(e + 1) = (char) ILLEGAL_UTF8_BYTE; 3060 goto range_done; 3061 } 3062 3063 /* Here, we're going to expand out the range. For EBCDIC 3064 * the range can extend above 255 (not so in ASCII), so 3065 * for EBCDIC, split it into the parts above and below 3066 * 255/256 */ 3067 #ifdef EBCDIC 3068 if (range_max > 255) { 3069 real_range_max = range_max; 3070 range_max = 255; 3071 } 3072 #endif 3073 } 3074 3075 /* Here we need to expand out the string to contain each 3076 * character in the range. Grow the output to handle this */ 3077 3078 save_offset = min_ptr - SvPVX_const(sv); 3079 3080 /* The base growth is the number of code points in the range */ 3081 grow = range_max - range_min + 1; 3082 if (has_utf8) { 3083 3084 /* But if the output is UTF-8, some of those characters may 3085 * need two bytes (since the maximum range value here is 3086 * 255, the max bytes per character is two). On ASCII 3087 * platforms, it's not much trouble to get an accurate 3088 * count of what's needed. But on EBCDIC, the ones that 3089 * need 2 bytes are scattered around, so just use a worst 3090 * case value instead of calculating for that platform. */ 3091 #ifdef EBCDIC 3092 grow *= 2; 3093 #else 3094 /* Only those above 127 require 2 bytes. This may be 3095 * everything in the range, or not */ 3096 if (range_min > 127) { 3097 grow *= 2; 3098 } 3099 else if (range_max > 127) { 3100 grow += range_max - 127; 3101 } 3102 #endif 3103 } 3104 3105 /* Subtract 3 for the bytes that were already accounted for 3106 * (min, max, and the hyphen) */ 3107 SvGROW(sv, SvLEN(sv) + grow - 3); 3108 d = SvPVX(sv) + save_offset; /* refresh d after realloc */ 3109 3110 /* Here, we expand out the range. On ASCII platforms, the 3111 * compiler should optimize out the 'convert_unicode==TRUE' 3112 * portion of this */ 3113 if (convert_unicode) { 3114 IV i; 3115 3116 /* Recall that the min and max are now in Unicode terms, so 3117 * we have to convert each character to its native 3118 * equivalent */ 3119 if (has_utf8) { 3120 for (i = range_min; i <= range_max; i++) { 3121 append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i), 3122 (U8 **) &d); 3123 } 3124 } 3125 else { 3126 for (i = range_min; i <= range_max; i++) { 3127 *d++ = (char)LATIN1_TO_NATIVE((U8) i); 3128 } 3129 } 3130 } 3131 else { 3132 IV i; 3133 3134 /* Here, no conversions are necessary, which means that the 3135 * first character in the range is already in 'd' and 3136 * valid, so we can skip overwriting it */ 3137 if (has_utf8) { 3138 d += UTF8SKIP(d); 3139 for (i = range_min + 1; i <= range_max; i++) { 3140 append_utf8_from_native_byte((U8) i, (U8 **) &d); 3141 } 3142 } 3143 else { 3144 d++; 3145 for (i = range_min + 1; i <= range_max; i++) { 3146 *d++ = (char)i; 3147 } 3148 } 3149 } 3150 3151 /* (Compilers should optimize this out for non-EBCDIC). If the 3152 * original range extended above 255, add in that portion */ 3153 if (real_range_max) { 3154 *d++ = (char) UTF8_TWO_BYTE_HI(0x100); 3155 *d++ = (char) UTF8_TWO_BYTE_LO(0x100); 3156 if (real_range_max > 0x101) 3157 *d++ = (char) ILLEGAL_UTF8_BYTE; 3158 if (real_range_max > 0x100) 3159 d = (char*)uvchr_to_utf8((U8*)d, real_range_max); 3160 } 3161 3162 range_done: 3163 /* mark the range as done, and continue */ 3164 didrange = TRUE; 3165 dorange = FALSE; 3166 #ifdef EBCDIC 3167 non_portable_endpoint = 0; 3168 backslash_N = 0; 3169 #endif 3170 continue; 3171 } /* End of is a range */ 3172 } /* End of transliteration. Joins main code after these else's */ 3173 else if (*s == '[' && PL_lex_inpat && !in_charclass) { 3174 char *s1 = s-1; 3175 int esc = 0; 3176 while (s1 >= start && *s1-- == '\\') 3177 esc = !esc; 3178 if (!esc) 3179 in_charclass = TRUE; 3180 } 3181 3182 else if (*s == ']' && PL_lex_inpat && in_charclass) { 3183 char *s1 = s-1; 3184 int esc = 0; 3185 while (s1 >= start && *s1-- == '\\') 3186 esc = !esc; 3187 if (!esc) 3188 in_charclass = FALSE; 3189 } 3190 3191 /* skip for regexp comments /(?#comment)/, except for the last 3192 * char, which will be done separately. 3193 * Stop on (?{..}) and friends */ 3194 3195 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) { 3196 if (s[2] == '#') { 3197 while (s+1 < send && *s != ')') 3198 *d++ = *s++; 3199 } 3200 else if (!PL_lex_casemods 3201 && ( s[2] == '{' /* This should match regcomp.c */ 3202 || (s[2] == '?' && s[3] == '{'))) 3203 { 3204 break; 3205 } 3206 } 3207 3208 /* likewise skip #-initiated comments in //x patterns */ 3209 else if (*s == '#' 3210 && PL_lex_inpat 3211 && !in_charclass 3212 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) 3213 { 3214 while (s+1 < send && *s != '\n') 3215 *d++ = *s++; 3216 } 3217 3218 /* no further processing of single-quoted regex */ 3219 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') 3220 goto default_action; 3221 3222 /* check for embedded arrays 3223 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) 3224 */ 3225 else if (*s == '@' && s[1]) { 3226 if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1])) 3227 break; 3228 if (strchr(":'{$", s[1])) 3229 break; 3230 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-')) 3231 break; /* in regexp, neither @+ nor @- are interpolated */ 3232 } 3233 3234 /* check for embedded scalars. only stop if we're sure it's a 3235 variable. 3236 */ 3237 else if (*s == '$') { 3238 if (!PL_lex_inpat) /* not a regexp, so $ must be var */ 3239 break; 3240 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) { 3241 if (s[1] == '\\') { 3242 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 3243 "Possible unintended interpolation of $\\ in regex"); 3244 } 3245 break; /* in regexp, $ might be tail anchor */ 3246 } 3247 } 3248 3249 /* End of else if chain - OP_TRANS rejoin rest */ 3250 3251 /* backslashes */ 3252 if (*s == '\\' && s+1 < send) { 3253 char* e; /* Can be used for ending '}', etc. */ 3254 3255 s++; 3256 3257 /* warn on \1 - \9 in substitution replacements, but note that \11 3258 * is an octal; and \19 is \1 followed by '9' */ 3259 if (PL_lex_inwhat == OP_SUBST 3260 && !PL_lex_inpat 3261 && isDIGIT(*s) 3262 && *s != '0' 3263 && !isDIGIT(s[1])) 3264 { 3265 /* diag_listed_as: \%d better written as $%d */ 3266 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); 3267 *--s = '$'; 3268 break; 3269 } 3270 3271 /* string-change backslash escapes */ 3272 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) { 3273 --s; 3274 break; 3275 } 3276 /* In a pattern, process \N, but skip any other backslash escapes. 3277 * This is because we don't want to translate an escape sequence 3278 * into a meta symbol and have the regex compiler use the meta 3279 * symbol meaning, e.g. \x{2E} would be confused with a dot. But 3280 * in spite of this, we do have to process \N here while the proper 3281 * charnames handler is in scope. See bugs #56444 and #62056. 3282 * 3283 * There is a complication because \N in a pattern may also stand 3284 * for 'match a non-nl', and not mean a charname, in which case its 3285 * processing should be deferred to the regex compiler. To be a 3286 * charname it must be followed immediately by a '{', and not look 3287 * like \N followed by a curly quantifier, i.e., not something like 3288 * \N{3,}. regcurly returns a boolean indicating if it is a legal 3289 * quantifier */ 3290 else if (PL_lex_inpat 3291 && (*s != 'N' 3292 || s[1] != '{' 3293 || regcurly(s + 1))) 3294 { 3295 *d++ = '\\'; 3296 goto default_action; 3297 } 3298 3299 switch (*s) { 3300 default: 3301 { 3302 if ((isALPHANUMERIC(*s))) 3303 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 3304 "Unrecognized escape \\%c passed through", 3305 *s); 3306 /* default action is to copy the quoted character */ 3307 goto default_action; 3308 } 3309 3310 /* eg. \132 indicates the octal constant 0132 */ 3311 case '0': case '1': case '2': case '3': 3312 case '4': case '5': case '6': case '7': 3313 { 3314 I32 flags = PERL_SCAN_SILENT_ILLDIGIT; 3315 STRLEN len = 3; 3316 uv = grok_oct(s, &len, &flags, NULL); 3317 s += len; 3318 if (len < 3 && s < send && isDIGIT(*s) 3319 && ckWARN(WARN_MISC)) 3320 { 3321 Perl_warner(aTHX_ packWARN(WARN_MISC), 3322 "%s", form_short_octal_warning(s, len)); 3323 } 3324 } 3325 goto NUM_ESCAPE_INSERT; 3326 3327 /* eg. \o{24} indicates the octal constant \024 */ 3328 case 'o': 3329 { 3330 const char* error; 3331 3332 bool valid = grok_bslash_o(&s, &uv, &error, 3333 TRUE, /* Output warning */ 3334 FALSE, /* Not strict */ 3335 TRUE, /* Output warnings for 3336 non-portables */ 3337 UTF); 3338 if (! valid) { 3339 yyerror(error); 3340 continue; 3341 } 3342 goto NUM_ESCAPE_INSERT; 3343 } 3344 3345 /* eg. \x24 indicates the hex constant 0x24 */ 3346 case 'x': 3347 { 3348 const char* error; 3349 3350 bool valid = grok_bslash_x(&s, &uv, &error, 3351 TRUE, /* Output warning */ 3352 FALSE, /* Not strict */ 3353 TRUE, /* Output warnings for 3354 non-portables */ 3355 UTF); 3356 if (! valid) { 3357 yyerror(error); 3358 continue; 3359 } 3360 } 3361 3362 NUM_ESCAPE_INSERT: 3363 /* Insert oct or hex escaped character. */ 3364 3365 /* Here uv is the ordinal of the next character being added */ 3366 if (UVCHR_IS_INVARIANT(uv)) { 3367 *d++ = (char) uv; 3368 } 3369 else { 3370 if (!has_utf8 && uv > 255) { 3371 /* Might need to recode whatever we have accumulated so 3372 * far if it contains any chars variant in utf8 or 3373 * utf-ebcdic. */ 3374 3375 SvCUR_set(sv, d - SvPVX_const(sv)); 3376 SvPOK_on(sv); 3377 *d = '\0'; 3378 /* See Note on sizing above. */ 3379 sv_utf8_upgrade_flags_grow( 3380 sv, 3381 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE 3382 /* Above-latin1 in string 3383 * implies no encoding */ 3384 |SV_UTF8_NO_ENCODING, 3385 UVCHR_SKIP(uv) + (STRLEN)(send - s) + 1); 3386 d = SvPVX(sv) + SvCUR(sv); 3387 has_utf8 = TRUE; 3388 } 3389 3390 if (has_utf8) { 3391 /* Usually, there will already be enough room in 'sv' 3392 * since such escapes are likely longer than any UTF-8 3393 * sequence they can end up as. This isn't the case on 3394 * EBCDIC where \x{40000000} contains 12 bytes, and the 3395 * UTF-8 for it contains 14. And, we have to allow for 3396 * a trailing NUL. It probably can't happen on ASCII 3397 * platforms, but be safe */ 3398 const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv) 3399 + 1; 3400 if (UNLIKELY(needed > SvLEN(sv))) { 3401 SvCUR_set(sv, d - SvPVX_const(sv)); 3402 d = sv_grow(sv, needed) + SvCUR(sv); 3403 } 3404 3405 d = (char*)uvchr_to_utf8((U8*)d, uv); 3406 if (PL_lex_inwhat == OP_TRANS 3407 && PL_sublex_info.sub_op) 3408 { 3409 PL_sublex_info.sub_op->op_private |= 3410 (PL_lex_repl ? OPpTRANS_FROM_UTF 3411 : OPpTRANS_TO_UTF); 3412 } 3413 } 3414 else { 3415 *d++ = (char)uv; 3416 } 3417 } 3418 #ifdef EBCDIC 3419 non_portable_endpoint++; 3420 #endif 3421 continue; 3422 3423 case 'N': 3424 /* In a non-pattern \N must be like \N{U+0041}, or it can be a 3425 * named character, like \N{LATIN SMALL LETTER A}, or a named 3426 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND 3427 * GRAVE} (except y/// can't handle the latter, croaking). For 3428 * convenience all three forms are referred to as "named 3429 * characters" below. 3430 * 3431 * For patterns, \N also can mean to match a non-newline. Code 3432 * before this 'switch' statement should already have handled 3433 * this situation, and hence this code only has to deal with 3434 * the named character cases. 3435 * 3436 * For non-patterns, the named characters are converted to 3437 * their string equivalents. In patterns, named characters are 3438 * not converted to their ultimate forms for the same reasons 3439 * that other escapes aren't. Instead, they are converted to 3440 * the \N{U+...} form to get the value from the charnames that 3441 * is in effect right now, while preserving the fact that it 3442 * was a named character, so that the regex compiler knows 3443 * this. 3444 * 3445 * The structure of this section of code (besides checking for 3446 * errors and upgrading to utf8) is: 3447 * If the named character is of the form \N{U+...}, pass it 3448 * through if a pattern; otherwise convert the code point 3449 * to utf8 3450 * Otherwise must be some \N{NAME}: convert to 3451 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8 3452 * 3453 * Transliteration is an exception. The conversion to utf8 is 3454 * only done if the code point requires it to be representable. 3455 * 3456 * Here, 's' points to the 'N'; the test below is guaranteed to 3457 * succeed if we are being called on a pattern, as we already 3458 * know from a test above that the next character is a '{'. A 3459 * non-pattern \N must mean 'named character', which requires 3460 * braces */ 3461 s++; 3462 if (*s != '{') { 3463 yyerror("Missing braces on \\N{}"); 3464 continue; 3465 } 3466 s++; 3467 3468 /* If there is no matching '}', it is an error. */ 3469 if (! (e = strchr(s, '}'))) { 3470 if (! PL_lex_inpat) { 3471 yyerror("Missing right brace on \\N{}"); 3472 } else { 3473 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N"); 3474 } 3475 continue; 3476 } 3477 3478 /* Here it looks like a named character */ 3479 3480 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */ 3481 s += 2; /* Skip to next char after the 'U+' */ 3482 if (PL_lex_inpat) { 3483 3484 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */ 3485 /* Check the syntax. */ 3486 const char *orig_s; 3487 orig_s = s - 5; 3488 if (!isXDIGIT(*s)) { 3489 bad_NU: 3490 yyerror( 3491 "Invalid hexadecimal number in \\N{U+...}" 3492 ); 3493 s = e + 1; 3494 continue; 3495 } 3496 while (++s < e) { 3497 if (isXDIGIT(*s)) 3498 continue; 3499 else if ((*s == '.' || *s == '_') 3500 && isXDIGIT(s[1])) 3501 continue; 3502 goto bad_NU; 3503 } 3504 3505 /* Pass everything through unchanged. 3506 * +1 is for the '}' */ 3507 Copy(orig_s, d, e - orig_s + 1, char); 3508 d += e - orig_s + 1; 3509 } 3510 else { /* Not a pattern: convert the hex to string */ 3511 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES 3512 | PERL_SCAN_SILENT_ILLDIGIT 3513 | PERL_SCAN_DISALLOW_PREFIX; 3514 STRLEN len = e - s; 3515 uv = grok_hex(s, &len, &flags, NULL); 3516 if (len == 0 || (len != (STRLEN)(e - s))) 3517 goto bad_NU; 3518 3519 /* For non-tr///, if the destination is not in utf8, 3520 * unconditionally recode it to be so. This is 3521 * because \N{} implies Unicode semantics, and scalars 3522 * have to be in utf8 to guarantee those semantics. 3523 * tr/// doesn't care about Unicode rules, so no need 3524 * there to upgrade to UTF-8 for small enough code 3525 * points */ 3526 if (! has_utf8 && ( uv > 0xFF 3527 || PL_lex_inwhat != OP_TRANS)) 3528 { 3529 SvCUR_set(sv, d - SvPVX_const(sv)); 3530 SvPOK_on(sv); 3531 *d = '\0'; 3532 /* See Note on sizing above. */ 3533 sv_utf8_upgrade_flags_grow( 3534 sv, 3535 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3536 UVCHR_SKIP(uv) + (STRLEN)(send - e) + 1); 3537 d = SvPVX(sv) + SvCUR(sv); 3538 has_utf8 = TRUE; 3539 } 3540 3541 /* Add the (Unicode) code point to the output. */ 3542 if (OFFUNI_IS_INVARIANT(uv)) { 3543 *d++ = (char) LATIN1_TO_NATIVE(uv); 3544 } 3545 else { 3546 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0); 3547 } 3548 } 3549 } 3550 else /* Here is \N{NAME} but not \N{U+...}. */ 3551 if ((res = get_and_check_backslash_N_name(s, e))) 3552 { 3553 STRLEN len; 3554 const char *str = SvPV_const(res, len); 3555 if (PL_lex_inpat) { 3556 3557 if (! len) { /* The name resolved to an empty string */ 3558 Copy("\\N{}", d, 4, char); 3559 d += 4; 3560 } 3561 else { 3562 /* In order to not lose information for the regex 3563 * compiler, pass the result in the specially made 3564 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are 3565 * the code points in hex of each character 3566 * returned by charnames */ 3567 3568 const char *str_end = str + len; 3569 const STRLEN off = d - SvPVX_const(sv); 3570 3571 if (! SvUTF8(res)) { 3572 /* For the non-UTF-8 case, we can determine the 3573 * exact length needed without having to parse 3574 * through the string. Each character takes up 3575 * 2 hex digits plus either a trailing dot or 3576 * the "}" */ 3577 const char initial_text[] = "\\N{U+"; 3578 const STRLEN initial_len = sizeof(initial_text) 3579 - 1; 3580 d = off + SvGROW(sv, off 3581 + 3 * len 3582 3583 /* +1 for trailing NUL */ 3584 + initial_len + 1 3585 3586 + (STRLEN)(send - e)); 3587 Copy(initial_text, d, initial_len, char); 3588 d += initial_len; 3589 while (str < str_end) { 3590 char hex_string[4]; 3591 int len = 3592 my_snprintf(hex_string, 3593 sizeof(hex_string), 3594 "%02X.", 3595 3596 /* The regex compiler is 3597 * expecting Unicode, not 3598 * native */ 3599 NATIVE_TO_LATIN1(*str)); 3600 PERL_MY_SNPRINTF_POST_GUARD(len, 3601 sizeof(hex_string)); 3602 Copy(hex_string, d, 3, char); 3603 d += 3; 3604 str++; 3605 } 3606 d--; /* Below, we will overwrite the final 3607 dot with a right brace */ 3608 } 3609 else { 3610 STRLEN char_length; /* cur char's byte length */ 3611 3612 /* and the number of bytes after this is 3613 * translated into hex digits */ 3614 STRLEN output_length; 3615 3616 /* 2 hex per byte; 2 chars for '\N'; 2 chars 3617 * for max('U+', '.'); and 1 for NUL */ 3618 char hex_string[2 * UTF8_MAXBYTES + 5]; 3619 3620 /* Get the first character of the result. */ 3621 U32 uv = utf8n_to_uvchr((U8 *) str, 3622 len, 3623 &char_length, 3624 UTF8_ALLOW_ANYUV); 3625 /* Convert first code point to Unicode hex, 3626 * including the boiler plate before it. */ 3627 output_length = 3628 my_snprintf(hex_string, sizeof(hex_string), 3629 "\\N{U+%X", 3630 (unsigned int) NATIVE_TO_UNI(uv)); 3631 3632 /* Make sure there is enough space to hold it */ 3633 d = off + SvGROW(sv, off 3634 + output_length 3635 + (STRLEN)(send - e) 3636 + 2); /* '}' + NUL */ 3637 /* And output it */ 3638 Copy(hex_string, d, output_length, char); 3639 d += output_length; 3640 3641 /* For each subsequent character, append dot and 3642 * its Unicode code point in hex */ 3643 while ((str += char_length) < str_end) { 3644 const STRLEN off = d - SvPVX_const(sv); 3645 U32 uv = utf8n_to_uvchr((U8 *) str, 3646 str_end - str, 3647 &char_length, 3648 UTF8_ALLOW_ANYUV); 3649 output_length = 3650 my_snprintf(hex_string, 3651 sizeof(hex_string), 3652 ".%X", 3653 (unsigned int) NATIVE_TO_UNI(uv)); 3654 3655 d = off + SvGROW(sv, off 3656 + output_length 3657 + (STRLEN)(send - e) 3658 + 2); /* '}' + NUL */ 3659 Copy(hex_string, d, output_length, char); 3660 d += output_length; 3661 } 3662 } 3663 3664 *d++ = '}'; /* Done. Add the trailing brace */ 3665 } 3666 } 3667 else { /* Here, not in a pattern. Convert the name to a 3668 * string. */ 3669 3670 if (PL_lex_inwhat == OP_TRANS) { 3671 str = SvPV_const(res, len); 3672 if (len > ((SvUTF8(res)) 3673 ? UTF8SKIP(str) 3674 : 1U)) 3675 { 3676 yyerror(Perl_form(aTHX_ 3677 "%.*s must not be a named sequence" 3678 " in transliteration operator", 3679 /* +1 to include the "}" */ 3680 (int) (e + 1 - start), start)); 3681 goto end_backslash_N; 3682 } 3683 } 3684 else if (! SvUTF8(res)) { 3685 /* Make sure \N{} return is UTF-8. This is because 3686 * \N{} implies Unicode semantics, and scalars have to 3687 * be in utf8 to guarantee those semantics; but not 3688 * needed in tr/// */ 3689 sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING); 3690 str = SvPV_const(res, len); 3691 } 3692 3693 /* Upgrade destination to be utf8 if this new 3694 * component is */ 3695 if (! has_utf8 && SvUTF8(res)) { 3696 SvCUR_set(sv, d - SvPVX_const(sv)); 3697 SvPOK_on(sv); 3698 *d = '\0'; 3699 /* See Note on sizing above. */ 3700 sv_utf8_upgrade_flags_grow(sv, 3701 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3702 len + (STRLEN)(send - s) + 1); 3703 d = SvPVX(sv) + SvCUR(sv); 3704 has_utf8 = TRUE; 3705 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */ 3706 3707 /* See Note on sizing above. (NOTE: SvCUR() is not 3708 * set correctly here). */ 3709 const STRLEN off = d - SvPVX_const(sv); 3710 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1); 3711 } 3712 Copy(str, d, len, char); 3713 d += len; 3714 } 3715 3716 SvREFCNT_dec(res); 3717 3718 } /* End \N{NAME} */ 3719 3720 end_backslash_N: 3721 #ifdef EBCDIC 3722 backslash_N++; /* \N{} is defined to be Unicode */ 3723 #endif 3724 s = e + 1; /* Point to just after the '}' */ 3725 continue; 3726 3727 /* \c is a control character */ 3728 case 'c': 3729 s++; 3730 if (s < send) { 3731 *d++ = grok_bslash_c(*s++, 1); 3732 } 3733 else { 3734 yyerror("Missing control char name in \\c"); 3735 } 3736 #ifdef EBCDIC 3737 non_portable_endpoint++; 3738 #endif 3739 continue; 3740 3741 /* printf-style backslashes, formfeeds, newlines, etc */ 3742 case 'b': 3743 *d++ = '\b'; 3744 break; 3745 case 'n': 3746 *d++ = '\n'; 3747 break; 3748 case 'r': 3749 *d++ = '\r'; 3750 break; 3751 case 'f': 3752 *d++ = '\f'; 3753 break; 3754 case 't': 3755 *d++ = '\t'; 3756 break; 3757 case 'e': 3758 *d++ = ESC_NATIVE; 3759 break; 3760 case 'a': 3761 *d++ = '\a'; 3762 break; 3763 } /* end switch */ 3764 3765 s++; 3766 continue; 3767 } /* end if (backslash) */ 3768 3769 default_action: 3770 /* If we started with encoded form, or already know we want it, 3771 then encode the next character */ 3772 if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) { 3773 STRLEN len = 1; 3774 3775 /* One might think that it is wasted effort in the case of the 3776 * source being utf8 (this_utf8 == TRUE) to take the next character 3777 * in the source, convert it to an unsigned value, and then convert 3778 * it back again. But the source has not been validated here. The 3779 * routine that does the conversion checks for errors like 3780 * malformed utf8 */ 3781 3782 const UV nextuv = (this_utf8) 3783 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) 3784 : (UV) ((U8) *s); 3785 const STRLEN need = UVCHR_SKIP(nextuv); 3786 if (!has_utf8) { 3787 SvCUR_set(sv, d - SvPVX_const(sv)); 3788 SvPOK_on(sv); 3789 *d = '\0'; 3790 /* See Note on sizing above. */ 3791 sv_utf8_upgrade_flags_grow(sv, 3792 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3793 need + (STRLEN)(send - s) + 1); 3794 d = SvPVX(sv) + SvCUR(sv); 3795 has_utf8 = TRUE; 3796 } else if (need > len) { 3797 /* encoded value larger than old, may need extra space (NOTE: 3798 * SvCUR() is not set correctly here). See Note on sizing 3799 * above. */ 3800 const STRLEN off = d - SvPVX_const(sv); 3801 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off; 3802 } 3803 s += len; 3804 3805 d = (char*)uvchr_to_utf8((U8*)d, nextuv); 3806 } 3807 else { 3808 *d++ = *s++; 3809 } 3810 } /* while loop to process each character */ 3811 3812 /* terminate the string and set up the sv */ 3813 *d = '\0'; 3814 SvCUR_set(sv, d - SvPVX_const(sv)); 3815 if (SvCUR(sv) >= SvLEN(sv)) 3816 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf 3817 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv)); 3818 3819 SvPOK_on(sv); 3820 if (IN_ENCODING && !has_utf8) { 3821 sv_recode_to_utf8(sv, _get_encoding()); 3822 if (SvUTF8(sv)) 3823 has_utf8 = TRUE; 3824 } 3825 if (has_utf8) { 3826 SvUTF8_on(sv); 3827 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { 3828 PL_sublex_info.sub_op->op_private |= 3829 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); 3830 } 3831 } 3832 3833 /* shrink the sv if we allocated more than we used */ 3834 if (SvCUR(sv) + 5 < SvLEN(sv)) { 3835 SvPV_shrink_to_cur(sv); 3836 } 3837 3838 /* return the substring (via pl_yylval) only if we parsed anything */ 3839 if (s > start) { 3840 char *s2 = start; 3841 for (; s2 < s; s2++) { 3842 if (*s2 == '\n') 3843 COPLINE_INC_WITH_HERELINES; 3844 } 3845 SvREFCNT_inc_simple_void_NN(sv); 3846 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING )) 3847 && ! PL_parser->lex_re_reparsing) 3848 { 3849 const char *const key = PL_lex_inpat ? "qr" : "q"; 3850 const STRLEN keylen = PL_lex_inpat ? 2 : 1; 3851 const char *type; 3852 STRLEN typelen; 3853 3854 if (PL_lex_inwhat == OP_TRANS) { 3855 type = "tr"; 3856 typelen = 2; 3857 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) { 3858 type = "s"; 3859 typelen = 1; 3860 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') { 3861 type = "q"; 3862 typelen = 1; 3863 } else { 3864 type = "qq"; 3865 typelen = 2; 3866 } 3867 3868 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL, 3869 type, typelen); 3870 } 3871 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 3872 } 3873 LEAVE_with_name("scan_const"); 3874 return s; 3875 } 3876 3877 /* S_intuit_more 3878 * Returns TRUE if there's more to the expression (e.g., a subscript), 3879 * FALSE otherwise. 3880 * 3881 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/ 3882 * 3883 * ->[ and ->{ return TRUE 3884 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled 3885 * { and [ outside a pattern are always subscripts, so return TRUE 3886 * if we're outside a pattern and it's not { or [, then return FALSE 3887 * if we're in a pattern and the first char is a { 3888 * {4,5} (any digits around the comma) returns FALSE 3889 * if we're in a pattern and the first char is a [ 3890 * [] returns FALSE 3891 * [SOMETHING] has a funky algorithm to decide whether it's a 3892 * character class or not. It has to deal with things like 3893 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/ 3894 * anything else returns TRUE 3895 */ 3896 3897 /* This is the one truly awful dwimmer necessary to conflate C and sed. */ 3898 3899 STATIC int 3900 S_intuit_more(pTHX_ char *s) 3901 { 3902 PERL_ARGS_ASSERT_INTUIT_MORE; 3903 3904 if (PL_lex_brackets) 3905 return TRUE; 3906 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{')) 3907 return TRUE; 3908 if (*s == '-' && s[1] == '>' 3909 && FEATURE_POSTDEREF_QQ_IS_ENABLED 3910 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*'))) 3911 ||(s[2] == '@' && strchr("*[{",s[3])) )) 3912 return TRUE; 3913 if (*s != '{' && *s != '[') 3914 return FALSE; 3915 if (!PL_lex_inpat) 3916 return TRUE; 3917 3918 /* In a pattern, so maybe we have {n,m}. */ 3919 if (*s == '{') { 3920 if (regcurly(s)) { 3921 return FALSE; 3922 } 3923 return TRUE; 3924 } 3925 3926 /* On the other hand, maybe we have a character class */ 3927 3928 s++; 3929 if (*s == ']' || *s == '^') 3930 return FALSE; 3931 else { 3932 /* this is terrifying, and it works */ 3933 int weight; 3934 char seen[256]; 3935 const char * const send = strchr(s,']'); 3936 unsigned char un_char, last_un_char; 3937 char tmpbuf[sizeof PL_tokenbuf * 4]; 3938 3939 if (!send) /* has to be an expression */ 3940 return TRUE; 3941 weight = 2; /* let's weigh the evidence */ 3942 3943 if (*s == '$') 3944 weight -= 3; 3945 else if (isDIGIT(*s)) { 3946 if (s[1] != ']') { 3947 if (isDIGIT(s[1]) && s[2] == ']') 3948 weight -= 10; 3949 } 3950 else 3951 weight -= 100; 3952 } 3953 Zero(seen,256,char); 3954 un_char = 255; 3955 for (; s < send; s++) { 3956 last_un_char = un_char; 3957 un_char = (unsigned char)*s; 3958 switch (*s) { 3959 case '@': 3960 case '&': 3961 case '$': 3962 weight -= seen[un_char] * 10; 3963 if (isWORDCHAR_lazy_if(s+1,UTF)) { 3964 int len; 3965 char *tmp = PL_bufend; 3966 PL_bufend = (char*)send; 3967 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE); 3968 PL_bufend = tmp; 3969 len = (int)strlen(tmpbuf); 3970 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 3971 UTF ? SVf_UTF8 : 0, SVt_PV)) 3972 weight -= 100; 3973 else 3974 weight -= 10; 3975 } 3976 else if (*s == '$' 3977 && s[1] 3978 && strchr("[#!%*<>()-=",s[1])) 3979 { 3980 if (/*{*/ strchr("])} =",s[2])) 3981 weight -= 10; 3982 else 3983 weight -= 1; 3984 } 3985 break; 3986 case '\\': 3987 un_char = 254; 3988 if (s[1]) { 3989 if (strchr("wds]",s[1])) 3990 weight += 100; 3991 else if (seen[(U8)'\''] || seen[(U8)'"']) 3992 weight += 1; 3993 else if (strchr("rnftbxcav",s[1])) 3994 weight += 40; 3995 else if (isDIGIT(s[1])) { 3996 weight += 40; 3997 while (s[1] && isDIGIT(s[1])) 3998 s++; 3999 } 4000 } 4001 else 4002 weight += 100; 4003 break; 4004 case '-': 4005 if (s[1] == '\\') 4006 weight += 50; 4007 if (strchr("aA01! ",last_un_char)) 4008 weight += 30; 4009 if (strchr("zZ79~",s[1])) 4010 weight += 30; 4011 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$')) 4012 weight -= 5; /* cope with negative subscript */ 4013 break; 4014 default: 4015 if (!isWORDCHAR(last_un_char) 4016 && !(last_un_char == '$' || last_un_char == '@' 4017 || last_un_char == '&') 4018 && isALPHA(*s) && s[1] && isALPHA(s[1])) { 4019 char *d = s; 4020 while (isALPHA(*s)) 4021 s++; 4022 if (keyword(d, s - d, 0)) 4023 weight -= 150; 4024 } 4025 if (un_char == last_un_char + 1) 4026 weight += 5; 4027 weight -= seen[un_char]; 4028 break; 4029 } 4030 seen[un_char]++; 4031 } 4032 if (weight >= 0) /* probably a character class */ 4033 return FALSE; 4034 } 4035 4036 return TRUE; 4037 } 4038 4039 /* 4040 * S_intuit_method 4041 * 4042 * Does all the checking to disambiguate 4043 * foo bar 4044 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise 4045 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args). 4046 * 4047 * First argument is the stuff after the first token, e.g. "bar". 4048 * 4049 * Not a method if foo is a filehandle. 4050 * Not a method if foo is a subroutine prototyped to take a filehandle. 4051 * Not a method if it's really "Foo $bar" 4052 * Method if it's "foo $bar" 4053 * Not a method if it's really "print foo $bar" 4054 * Method if it's really "foo package::" (interpreted as package->foo) 4055 * Not a method if bar is known to be a subroutine ("sub bar; foo bar") 4056 * Not a method if bar is a filehandle or package, but is quoted with 4057 * => 4058 */ 4059 4060 STATIC int 4061 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv) 4062 { 4063 char *s = start + (*start == '$'); 4064 char tmpbuf[sizeof PL_tokenbuf]; 4065 STRLEN len; 4066 GV* indirgv; 4067 /* Mustn't actually add anything to a symbol table. 4068 But also don't want to "initialise" any placeholder 4069 constants that might already be there into full 4070 blown PVGVs with attached PVCV. */ 4071 GV * const gv = 4072 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL; 4073 4074 PERL_ARGS_ASSERT_INTUIT_METHOD; 4075 4076 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv)) 4077 return 0; 4078 if (cv && SvPOK(cv)) { 4079 const char *proto = CvPROTO(cv); 4080 if (proto) { 4081 while (*proto && (isSPACE(*proto) || *proto == ';')) 4082 proto++; 4083 if (*proto == '*') 4084 return 0; 4085 } 4086 } 4087 4088 if (*start == '$') { 4089 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY 4090 || isUPPER(*PL_tokenbuf)) 4091 return 0; 4092 s = skipspace(s); 4093 PL_bufptr = start; 4094 PL_expect = XREF; 4095 return *s == '(' ? FUNCMETH : METHOD; 4096 } 4097 4098 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 4099 /* start is the beginning of the possible filehandle/object, 4100 * and s is the end of it 4101 * tmpbuf is a copy of it (but with single quotes as double colons) 4102 */ 4103 4104 if (!keyword(tmpbuf, len, 0)) { 4105 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') { 4106 len -= 2; 4107 tmpbuf[len] = '\0'; 4108 goto bare_package; 4109 } 4110 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); 4111 if (indirgv && GvCVu(indirgv)) 4112 return 0; 4113 /* filehandle or package name makes it a method */ 4114 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) { 4115 s = skipspace(s); 4116 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>') 4117 return 0; /* no assumptions -- "=>" quotes bareword */ 4118 bare_package: 4119 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, 4120 S_newSV_maybe_utf8(aTHX_ tmpbuf, len)); 4121 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE; 4122 PL_expect = XTERM; 4123 force_next(WORD); 4124 PL_bufptr = s; 4125 return *s == '(' ? FUNCMETH : METHOD; 4126 } 4127 } 4128 return 0; 4129 } 4130 4131 /* Encoded script support. filter_add() effectively inserts a 4132 * 'pre-processing' function into the current source input stream. 4133 * Note that the filter function only applies to the current source file 4134 * (e.g., it will not affect files 'require'd or 'use'd by this one). 4135 * 4136 * The datasv parameter (which may be NULL) can be used to pass 4137 * private data to this instance of the filter. The filter function 4138 * can recover the SV using the FILTER_DATA macro and use it to 4139 * store private buffers and state information. 4140 * 4141 * The supplied datasv parameter is upgraded to a PVIO type 4142 * and the IoDIRP/IoANY field is used to store the function pointer, 4143 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such. 4144 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for 4145 * private use must be set using malloc'd pointers. 4146 */ 4147 4148 SV * 4149 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) 4150 { 4151 if (!funcp) 4152 return NULL; 4153 4154 if (!PL_parser) 4155 return NULL; 4156 4157 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) 4158 Perl_croak(aTHX_ "Source filters apply only to byte streams"); 4159 4160 if (!PL_rsfp_filters) 4161 PL_rsfp_filters = newAV(); 4162 if (!datasv) 4163 datasv = newSV(0); 4164 SvUPGRADE(datasv, SVt_PVIO); 4165 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */ 4166 IoFLAGS(datasv) |= IOf_FAKE_DIRP; 4167 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n", 4168 FPTR2DPTR(void *, IoANY(datasv)), 4169 SvPV_nolen(datasv))); 4170 av_unshift(PL_rsfp_filters, 1); 4171 av_store(PL_rsfp_filters, 0, datasv) ; 4172 if ( 4173 !PL_parser->filtered 4174 && PL_parser->lex_flags & LEX_EVALBYTES 4175 && PL_bufptr < PL_bufend 4176 ) { 4177 const char *s = PL_bufptr; 4178 while (s < PL_bufend) { 4179 if (*s == '\n') { 4180 SV *linestr = PL_parser->linestr; 4181 char *buf = SvPVX(linestr); 4182 STRLEN const bufptr_pos = PL_parser->bufptr - buf; 4183 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf; 4184 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf; 4185 STRLEN const linestart_pos = PL_parser->linestart - buf; 4186 STRLEN const last_uni_pos = 4187 PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 4188 STRLEN const last_lop_pos = 4189 PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 4190 av_push(PL_rsfp_filters, linestr); 4191 PL_parser->linestr = 4192 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr)); 4193 buf = SvPVX(PL_parser->linestr); 4194 PL_parser->bufend = buf + SvCUR(PL_parser->linestr); 4195 PL_parser->bufptr = buf + bufptr_pos; 4196 PL_parser->oldbufptr = buf + oldbufptr_pos; 4197 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 4198 PL_parser->linestart = buf + linestart_pos; 4199 if (PL_parser->last_uni) 4200 PL_parser->last_uni = buf + last_uni_pos; 4201 if (PL_parser->last_lop) 4202 PL_parser->last_lop = buf + last_lop_pos; 4203 SvLEN(linestr) = SvCUR(linestr); 4204 SvCUR(linestr) = s-SvPVX(linestr); 4205 PL_parser->filtered = 1; 4206 break; 4207 } 4208 s++; 4209 } 4210 } 4211 return(datasv); 4212 } 4213 4214 4215 /* Delete most recently added instance of this filter function. */ 4216 void 4217 Perl_filter_del(pTHX_ filter_t funcp) 4218 { 4219 SV *datasv; 4220 4221 PERL_ARGS_ASSERT_FILTER_DEL; 4222 4223 #ifdef DEBUGGING 4224 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", 4225 FPTR2DPTR(void*, funcp))); 4226 #endif 4227 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0) 4228 return; 4229 /* if filter is on top of stack (usual case) just pop it off */ 4230 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters)); 4231 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) { 4232 sv_free(av_pop(PL_rsfp_filters)); 4233 4234 return; 4235 } 4236 /* we need to search for the correct entry and clear it */ 4237 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)"); 4238 } 4239 4240 4241 /* Invoke the idxth filter function for the current rsfp. */ 4242 /* maxlen 0 = read one text line */ 4243 I32 4244 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) 4245 { 4246 filter_t funcp; 4247 SV *datasv = NULL; 4248 /* This API is bad. It should have been using unsigned int for maxlen. 4249 Not sure if we want to change the API, but if not we should sanity 4250 check the value here. */ 4251 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen; 4252 4253 PERL_ARGS_ASSERT_FILTER_READ; 4254 4255 if (!PL_parser || !PL_rsfp_filters) 4256 return -1; 4257 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */ 4258 /* Provide a default input filter to make life easy. */ 4259 /* Note that we append to the line. This is handy. */ 4260 DEBUG_P(PerlIO_printf(Perl_debug_log, 4261 "filter_read %d: from rsfp\n", idx)); 4262 if (correct_length) { 4263 /* Want a block */ 4264 int len ; 4265 const int old_len = SvCUR(buf_sv); 4266 4267 /* ensure buf_sv is large enough */ 4268 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ; 4269 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, 4270 correct_length)) <= 0) { 4271 if (PerlIO_error(PL_rsfp)) 4272 return -1; /* error */ 4273 else 4274 return 0 ; /* end of file */ 4275 } 4276 SvCUR_set(buf_sv, old_len + len) ; 4277 SvPVX(buf_sv)[old_len + len] = '\0'; 4278 } else { 4279 /* Want a line */ 4280 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) { 4281 if (PerlIO_error(PL_rsfp)) 4282 return -1; /* error */ 4283 else 4284 return 0 ; /* end of file */ 4285 } 4286 } 4287 return SvCUR(buf_sv); 4288 } 4289 /* Skip this filter slot if filter has been deleted */ 4290 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) { 4291 DEBUG_P(PerlIO_printf(Perl_debug_log, 4292 "filter_read %d: skipped (filter deleted)\n", 4293 idx)); 4294 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */ 4295 } 4296 if (SvTYPE(datasv) != SVt_PVIO) { 4297 if (correct_length) { 4298 /* Want a block */ 4299 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv); 4300 if (!remainder) return 0; /* eof */ 4301 if (correct_length > remainder) correct_length = remainder; 4302 sv_catpvn(buf_sv, SvEND(datasv), correct_length); 4303 SvCUR_set(datasv, SvCUR(datasv) + correct_length); 4304 } else { 4305 /* Want a line */ 4306 const char *s = SvEND(datasv); 4307 const char *send = SvPVX(datasv) + SvLEN(datasv); 4308 while (s < send) { 4309 if (*s == '\n') { 4310 s++; 4311 break; 4312 } 4313 s++; 4314 } 4315 if (s == send) return 0; /* eof */ 4316 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv)); 4317 SvCUR_set(datasv, s-SvPVX(datasv)); 4318 } 4319 return SvCUR(buf_sv); 4320 } 4321 /* Get function pointer hidden within datasv */ 4322 funcp = DPTR2FPTR(filter_t, IoANY(datasv)); 4323 DEBUG_P(PerlIO_printf(Perl_debug_log, 4324 "filter_read %d: via function %p (%s)\n", 4325 idx, (void*)datasv, SvPV_nolen_const(datasv))); 4326 /* Call function. The function is expected to */ 4327 /* call "FILTER_READ(idx+1, buf_sv)" first. */ 4328 /* Return: <0:error, =0:eof, >0:not eof */ 4329 return (*funcp)(aTHX_ idx, buf_sv, correct_length); 4330 } 4331 4332 STATIC char * 4333 S_filter_gets(pTHX_ SV *sv, STRLEN append) 4334 { 4335 PERL_ARGS_ASSERT_FILTER_GETS; 4336 4337 #ifdef PERL_CR_FILTER 4338 if (!PL_rsfp_filters) { 4339 filter_add(S_cr_textfilter,NULL); 4340 } 4341 #endif 4342 if (PL_rsfp_filters) { 4343 if (!append) 4344 SvCUR_set(sv, 0); /* start with empty line */ 4345 if (FILTER_READ(0, sv, 0) > 0) 4346 return ( SvPVX(sv) ) ; 4347 else 4348 return NULL ; 4349 } 4350 else 4351 return (sv_gets(sv, PL_rsfp, append)); 4352 } 4353 4354 STATIC HV * 4355 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len) 4356 { 4357 GV *gv; 4358 4359 PERL_ARGS_ASSERT_FIND_IN_MY_STASH; 4360 4361 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__")) 4362 return PL_curstash; 4363 4364 if (len > 2 4365 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') 4366 && (gv = gv_fetchpvn_flags(pkgname, 4367 len, 4368 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV))) 4369 { 4370 return GvHV(gv); /* Foo:: */ 4371 } 4372 4373 /* use constant CLASS => 'MyClass' */ 4374 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV); 4375 if (gv && GvCV(gv)) { 4376 SV * const sv = cv_const_sv(GvCV(gv)); 4377 if (sv) 4378 return gv_stashsv(sv, 0); 4379 } 4380 4381 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0); 4382 } 4383 4384 4385 STATIC char * 4386 S_tokenize_use(pTHX_ int is_use, char *s) { 4387 PERL_ARGS_ASSERT_TOKENIZE_USE; 4388 4389 if (PL_expect != XSTATE) 4390 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", 4391 is_use ? "use" : "no")); 4392 PL_expect = XTERM; 4393 s = skipspace(s); 4394 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) { 4395 s = force_version(s, TRUE); 4396 if (*s == ';' || *s == '}' 4397 || (s = skipspace(s), (*s == ';' || *s == '}'))) { 4398 NEXTVAL_NEXTTOKE.opval = NULL; 4399 force_next(WORD); 4400 } 4401 else if (*s == 'v') { 4402 s = force_word(s,WORD,FALSE,TRUE); 4403 s = force_version(s, FALSE); 4404 } 4405 } 4406 else { 4407 s = force_word(s,WORD,FALSE,TRUE); 4408 s = force_version(s, FALSE); 4409 } 4410 pl_yylval.ival = is_use; 4411 return s; 4412 } 4413 #ifdef DEBUGGING 4414 static const char* const exp_name[] = 4415 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", 4416 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF", 4417 "TERMORDORDOR" 4418 }; 4419 #endif 4420 4421 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l) 4422 STATIC bool 4423 S_word_takes_any_delimeter(char *p, STRLEN len) 4424 { 4425 return (len == 1 && strchr("msyq", p[0])) 4426 || (len == 2 4427 && ((p[0] == 't' && p[1] == 'r') 4428 || (p[0] == 'q' && strchr("qwxr", p[1])))); 4429 } 4430 4431 static void 4432 S_check_scalar_slice(pTHX_ char *s) 4433 { 4434 s++; 4435 while (*s == ' ' || *s == '\t') s++; 4436 if (*s == 'q' && s[1] == 'w' 4437 && !isWORDCHAR_lazy_if(s+2,UTF)) 4438 return; 4439 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s))) 4440 s += UTF ? UTF8SKIP(s) : 1; 4441 if (*s == '}' || *s == ']') 4442 pl_yylval.ival = OPpSLICEWARNING; 4443 } 4444 4445 /* 4446 yylex 4447 4448 Works out what to call the token just pulled out of the input 4449 stream. The yacc parser takes care of taking the ops we return and 4450 stitching them into a tree. 4451 4452 Returns: 4453 The type of the next token 4454 4455 Structure: 4456 Switch based on the current state: 4457 - if we already built the token before, use it 4458 - if we have a case modifier in a string, deal with that 4459 - handle other cases of interpolation inside a string 4460 - scan the next line if we are inside a format 4461 In the normal state switch on the next character: 4462 - default: 4463 if alphabetic, go to key lookup 4464 unrecoginized character - croak 4465 - 0/4/26: handle end-of-line or EOF 4466 - cases for whitespace 4467 - \n and #: handle comments and line numbers 4468 - various operators, brackets and sigils 4469 - numbers 4470 - quotes 4471 - 'v': vstrings (or go to key lookup) 4472 - 'x' repetition operator (or go to key lookup) 4473 - other ASCII alphanumerics (key lookup begins here): 4474 word before => ? 4475 keyword plugin 4476 scan built-in keyword (but do nothing with it yet) 4477 check for statement label 4478 check for lexical subs 4479 goto just_a_word if there is one 4480 see whether built-in keyword is overridden 4481 switch on keyword number: 4482 - default: just_a_word: 4483 not a built-in keyword; handle bareword lookup 4484 disambiguate between method and sub call 4485 fall back to bareword 4486 - cases for built-in keywords 4487 */ 4488 4489 4490 int 4491 Perl_yylex(pTHX) 4492 { 4493 dVAR; 4494 char *s = PL_bufptr; 4495 char *d; 4496 STRLEN len; 4497 bool bof = FALSE; 4498 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil); 4499 U8 formbrack = 0; 4500 U32 fake_eof = 0; 4501 4502 /* orig_keyword, gvp, and gv are initialized here because 4503 * jump to the label just_a_word_zero can bypass their 4504 * initialization later. */ 4505 I32 orig_keyword = 0; 4506 GV *gv = NULL; 4507 GV **gvp = NULL; 4508 4509 DEBUG_T( { 4510 SV* tmp = newSVpvs(""); 4511 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n", 4512 (IV)CopLINE(PL_curcop), 4513 lex_state_names[PL_lex_state], 4514 exp_name[PL_expect], 4515 pv_display(tmp, s, strlen(s), 0, 60)); 4516 SvREFCNT_dec(tmp); 4517 } ); 4518 4519 /* when we've already built the next token, just pull it out of the queue */ 4520 if (PL_nexttoke) { 4521 PL_nexttoke--; 4522 pl_yylval = PL_nextval[PL_nexttoke]; 4523 if (!PL_nexttoke) { 4524 PL_lex_state = PL_lex_defer; 4525 PL_lex_defer = LEX_NORMAL; 4526 } 4527 { 4528 I32 next_type; 4529 next_type = PL_nexttype[PL_nexttoke]; 4530 if (next_type & (7<<24)) { 4531 if (next_type & (1<<24)) { 4532 if (PL_lex_brackets > 100) 4533 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 4534 PL_lex_brackstack[PL_lex_brackets++] = 4535 (char) ((next_type >> 16) & 0xff); 4536 } 4537 if (next_type & (2<<24)) 4538 PL_lex_allbrackets++; 4539 if (next_type & (4<<24)) 4540 PL_lex_allbrackets--; 4541 next_type &= 0xffff; 4542 } 4543 return REPORT(next_type == 'p' ? pending_ident() : next_type); 4544 } 4545 } 4546 4547 switch (PL_lex_state) { 4548 case LEX_NORMAL: 4549 case LEX_INTERPNORMAL: 4550 break; 4551 4552 /* interpolated case modifiers like \L \U, including \Q and \E. 4553 when we get here, PL_bufptr is at the \ 4554 */ 4555 case LEX_INTERPCASEMOD: 4556 #ifdef DEBUGGING 4557 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\') 4558 Perl_croak(aTHX_ 4559 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u", 4560 PL_bufptr, PL_bufend, *PL_bufptr); 4561 #endif 4562 /* handle \E or end of string */ 4563 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') { 4564 /* if at a \E */ 4565 if (PL_lex_casemods) { 4566 const char oldmod = PL_lex_casestack[--PL_lex_casemods]; 4567 PL_lex_casestack[PL_lex_casemods] = '\0'; 4568 4569 if (PL_bufptr != PL_bufend 4570 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q' 4571 || oldmod == 'F')) { 4572 PL_bufptr += 2; 4573 PL_lex_state = LEX_INTERPCONCAT; 4574 } 4575 PL_lex_allbrackets--; 4576 return REPORT(')'); 4577 } 4578 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) { 4579 /* Got an unpaired \E */ 4580 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 4581 "Useless use of \\E"); 4582 } 4583 if (PL_bufptr != PL_bufend) 4584 PL_bufptr += 2; 4585 PL_lex_state = LEX_INTERPCONCAT; 4586 return yylex(); 4587 } 4588 else { 4589 DEBUG_T({ PerlIO_printf(Perl_debug_log, 4590 "### Saw case modifier\n"); }); 4591 s = PL_bufptr + 1; 4592 if (s[1] == '\\' && s[2] == 'E') { 4593 PL_bufptr = s + 3; 4594 PL_lex_state = LEX_INTERPCONCAT; 4595 return yylex(); 4596 } 4597 else { 4598 I32 tmp; 4599 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) 4600 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ 4601 if ((*s == 'L' || *s == 'U' || *s == 'F') 4602 && (strchr(PL_lex_casestack, 'L') 4603 || strchr(PL_lex_casestack, 'U') 4604 || strchr(PL_lex_casestack, 'F'))) 4605 { 4606 PL_lex_casestack[--PL_lex_casemods] = '\0'; 4607 PL_lex_allbrackets--; 4608 return REPORT(')'); 4609 } 4610 if (PL_lex_casemods > 10) 4611 Renew(PL_lex_casestack, PL_lex_casemods + 2, char); 4612 PL_lex_casestack[PL_lex_casemods++] = *s; 4613 PL_lex_casestack[PL_lex_casemods] = '\0'; 4614 PL_lex_state = LEX_INTERPCONCAT; 4615 NEXTVAL_NEXTTOKE.ival = 0; 4616 force_next((2<<24)|'('); 4617 if (*s == 'l') 4618 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST; 4619 else if (*s == 'u') 4620 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST; 4621 else if (*s == 'L') 4622 NEXTVAL_NEXTTOKE.ival = OP_LC; 4623 else if (*s == 'U') 4624 NEXTVAL_NEXTTOKE.ival = OP_UC; 4625 else if (*s == 'Q') 4626 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA; 4627 else if (*s == 'F') 4628 NEXTVAL_NEXTTOKE.ival = OP_FC; 4629 else 4630 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s); 4631 PL_bufptr = s + 1; 4632 } 4633 force_next(FUNC); 4634 if (PL_lex_starts) { 4635 s = PL_bufptr; 4636 PL_lex_starts = 0; 4637 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 4638 if (PL_lex_casemods == 1 && PL_lex_inpat) 4639 TOKEN(','); 4640 else 4641 AopNOASSIGN(OP_CONCAT); 4642 } 4643 else 4644 return yylex(); 4645 } 4646 4647 case LEX_INTERPPUSH: 4648 return REPORT(sublex_push()); 4649 4650 case LEX_INTERPSTART: 4651 if (PL_bufptr == PL_bufend) 4652 return REPORT(sublex_done()); 4653 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log, 4654 "### Interpolated variable\n"); }); 4655 PL_expect = XTERM; 4656 /* for /@a/, we leave the joining for the regex engine to do 4657 * (unless we're within \Q etc) */ 4658 PL_lex_dojoin = (*PL_bufptr == '@' 4659 && (!PL_lex_inpat || PL_lex_casemods)); 4660 PL_lex_state = LEX_INTERPNORMAL; 4661 if (PL_lex_dojoin) { 4662 NEXTVAL_NEXTTOKE.ival = 0; 4663 force_next(','); 4664 force_ident("\"", '$'); 4665 NEXTVAL_NEXTTOKE.ival = 0; 4666 force_next('$'); 4667 NEXTVAL_NEXTTOKE.ival = 0; 4668 force_next((2<<24)|'('); 4669 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */ 4670 force_next(FUNC); 4671 } 4672 /* Convert (?{...}) and friends to 'do {...}' */ 4673 if (PL_lex_inpat && *PL_bufptr == '(') { 4674 PL_parser->lex_shared->re_eval_start = PL_bufptr; 4675 PL_bufptr += 2; 4676 if (*PL_bufptr != '{') 4677 PL_bufptr++; 4678 PL_expect = XTERMBLOCK; 4679 force_next(DO); 4680 } 4681 4682 if (PL_lex_starts++) { 4683 s = PL_bufptr; 4684 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 4685 if (!PL_lex_casemods && PL_lex_inpat) 4686 TOKEN(','); 4687 else 4688 AopNOASSIGN(OP_CONCAT); 4689 } 4690 return yylex(); 4691 4692 case LEX_INTERPENDMAYBE: 4693 if (intuit_more(PL_bufptr)) { 4694 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */ 4695 break; 4696 } 4697 /* FALLTHROUGH */ 4698 4699 case LEX_INTERPEND: 4700 /* Treat state as LEX_NORMAL if we have no inner lexing scope. 4701 XXX This hack can be removed if we stop setting PL_lex_state to 4702 LEX_KNOWNEXT, as can the hack under LEX_INTREPCONCAT below. */ 4703 if (UNLIKELY(!PL_lex_inwhat)) { 4704 PL_lex_state = LEX_NORMAL; 4705 break; 4706 } 4707 4708 if (PL_lex_dojoin) { 4709 const U8 dojoin_was = PL_lex_dojoin; 4710 PL_lex_dojoin = FALSE; 4711 PL_lex_state = LEX_INTERPCONCAT; 4712 PL_lex_allbrackets--; 4713 return REPORT(dojoin_was == 1 ? ')' : POSTJOIN); 4714 } 4715 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl 4716 && SvEVALED(PL_lex_repl)) 4717 { 4718 if (PL_bufptr != PL_bufend) 4719 Perl_croak(aTHX_ "Bad evalled substitution pattern"); 4720 PL_lex_repl = NULL; 4721 } 4722 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets 4723 re_eval_str. If the here-doc body’s length equals the previous 4724 value of re_eval_start, re_eval_start will now be null. So 4725 check re_eval_str as well. */ 4726 if (PL_parser->lex_shared->re_eval_start 4727 || PL_parser->lex_shared->re_eval_str) { 4728 SV *sv; 4729 if (*PL_bufptr != ')') 4730 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'"); 4731 PL_bufptr++; 4732 /* having compiled a (?{..}) expression, return the original 4733 * text too, as a const */ 4734 if (PL_parser->lex_shared->re_eval_str) { 4735 sv = PL_parser->lex_shared->re_eval_str; 4736 PL_parser->lex_shared->re_eval_str = NULL; 4737 SvCUR_set(sv, 4738 PL_bufptr - PL_parser->lex_shared->re_eval_start); 4739 SvPV_shrink_to_cur(sv); 4740 } 4741 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start, 4742 PL_bufptr - PL_parser->lex_shared->re_eval_start); 4743 NEXTVAL_NEXTTOKE.opval = 4744 (OP*)newSVOP(OP_CONST, 0, 4745 sv); 4746 force_next(THING); 4747 PL_parser->lex_shared->re_eval_start = NULL; 4748 PL_expect = XTERM; 4749 return REPORT(','); 4750 } 4751 4752 /* FALLTHROUGH */ 4753 case LEX_INTERPCONCAT: 4754 #ifdef DEBUGGING 4755 if (PL_lex_brackets) 4756 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld", 4757 (long) PL_lex_brackets); 4758 #endif 4759 /* Treat state as LEX_NORMAL when not in an inner lexing scope. 4760 XXX This hack can be removed if we stop setting PL_lex_state to 4761 LEX_KNOWNEXT. */ 4762 if (UNLIKELY(!PL_lex_inwhat)) { 4763 PL_lex_state = LEX_NORMAL; 4764 break; 4765 } 4766 4767 if (PL_bufptr == PL_bufend) 4768 return REPORT(sublex_done()); 4769 4770 /* m'foo' still needs to be parsed for possible (?{...}) */ 4771 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) { 4772 SV *sv = newSVsv(PL_linestr); 4773 sv = tokeq(sv); 4774 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 4775 s = PL_bufend; 4776 } 4777 else { 4778 s = scan_const(PL_bufptr); 4779 if (*s == '\\') 4780 PL_lex_state = LEX_INTERPCASEMOD; 4781 else 4782 PL_lex_state = LEX_INTERPSTART; 4783 } 4784 4785 if (s != PL_bufptr) { 4786 NEXTVAL_NEXTTOKE = pl_yylval; 4787 PL_expect = XTERM; 4788 force_next(THING); 4789 if (PL_lex_starts++) { 4790 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 4791 if (!PL_lex_casemods && PL_lex_inpat) 4792 TOKEN(','); 4793 else 4794 AopNOASSIGN(OP_CONCAT); 4795 } 4796 else { 4797 PL_bufptr = s; 4798 return yylex(); 4799 } 4800 } 4801 4802 return yylex(); 4803 case LEX_FORMLINE: 4804 s = scan_formline(PL_bufptr); 4805 if (!PL_lex_formbrack) 4806 { 4807 formbrack = 1; 4808 goto rightbracket; 4809 } 4810 PL_bufptr = s; 4811 return yylex(); 4812 } 4813 4814 /* We really do *not* want PL_linestr ever becoming a COW. */ 4815 assert (!SvIsCOW(PL_linestr)); 4816 s = PL_bufptr; 4817 PL_oldoldbufptr = PL_oldbufptr; 4818 PL_oldbufptr = s; 4819 PL_parser->saw_infix_sigil = 0; 4820 4821 retry: 4822 switch (*s) { 4823 default: 4824 if (UTF) { 4825 if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) { 4826 ENTER; 4827 SAVESPTR(PL_warnhook); 4828 PL_warnhook = PERL_WARNHOOK_FATAL; 4829 utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0); 4830 LEAVE; 4831 } 4832 if (isIDFIRST_utf8((U8*)s)) { 4833 goto keylookup; 4834 } 4835 } 4836 else if (isALNUMC(*s)) { 4837 goto keylookup; 4838 } 4839 { 4840 SV *dsv = newSVpvs_flags("", SVs_TEMP); 4841 const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s, 4842 UTF8SKIP(s), 4843 SVs_TEMP | SVf_UTF8), 4844 10, UNI_DISPLAY_ISPRINT) 4845 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s); 4846 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); 4847 if (len > UNRECOGNIZED_PRECEDE_COUNT) { 4848 d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT; 4849 } else { 4850 d = PL_linestart; 4851 } 4852 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c, 4853 UTF8fARG(UTF, (s - d), d), 4854 (int) len + 1); 4855 } 4856 case 4: 4857 case 26: 4858 goto fake_eof; /* emulate EOF on ^D or ^Z */ 4859 case 0: 4860 if ((!PL_rsfp || PL_lex_inwhat) 4861 && (!PL_parser->filtered || s+1 < PL_bufend)) { 4862 PL_last_uni = 0; 4863 PL_last_lop = 0; 4864 if (PL_lex_brackets 4865 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) 4866 { 4867 yyerror((const char *) 4868 (PL_lex_formbrack 4869 ? "Format not terminated" 4870 : "Missing right curly or square bracket")); 4871 } 4872 DEBUG_T( { PerlIO_printf(Perl_debug_log, 4873 "### Tokener got EOF\n"); 4874 } ); 4875 TOKEN(0); 4876 } 4877 if (s++ < PL_bufend) 4878 goto retry; /* ignore stray nulls */ 4879 PL_last_uni = 0; 4880 PL_last_lop = 0; 4881 if (!PL_in_eval && !PL_preambled) { 4882 PL_preambled = TRUE; 4883 if (PL_perldb) { 4884 /* Generate a string of Perl code to load the debugger. 4885 * If PERL5DB is set, it will return the contents of that, 4886 * otherwise a compile-time require of perl5db.pl. */ 4887 4888 const char * const pdb = PerlEnv_getenv("PERL5DB"); 4889 4890 if (pdb) { 4891 sv_setpv(PL_linestr, pdb); 4892 sv_catpvs(PL_linestr,";"); 4893 } else { 4894 SETERRNO(0,SS_NORMAL); 4895 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };"); 4896 } 4897 PL_parser->preambling = CopLINE(PL_curcop); 4898 } else 4899 sv_setpvs(PL_linestr,""); 4900 if (PL_preambleav) { 4901 SV **svp = AvARRAY(PL_preambleav); 4902 SV **const end = svp + AvFILLp(PL_preambleav); 4903 while(svp <= end) { 4904 sv_catsv(PL_linestr, *svp); 4905 ++svp; 4906 sv_catpvs(PL_linestr, ";"); 4907 } 4908 sv_free(MUTABLE_SV(PL_preambleav)); 4909 PL_preambleav = NULL; 4910 } 4911 if (PL_minus_E) 4912 sv_catpvs(PL_linestr, 4913 "use feature ':5." STRINGIFY(PERL_VERSION) "';"); 4914 if (PL_minus_n || PL_minus_p) { 4915 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/); 4916 if (PL_minus_l) 4917 sv_catpvs(PL_linestr,"chomp;"); 4918 if (PL_minus_a) { 4919 if (PL_minus_F) { 4920 if ((*PL_splitstr == '/' || *PL_splitstr == '\'' 4921 || *PL_splitstr == '"') 4922 && strchr(PL_splitstr + 1, *PL_splitstr)) 4923 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); 4924 else { 4925 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL 4926 bytes can be used as quoting characters. :-) */ 4927 const char *splits = PL_splitstr; 4928 sv_catpvs(PL_linestr, "our @F=split(q\0"); 4929 do { 4930 /* Need to \ \s */ 4931 if (*splits == '\\') 4932 sv_catpvn(PL_linestr, splits, 1); 4933 sv_catpvn(PL_linestr, splits, 1); 4934 } while (*splits++); 4935 /* This loop will embed the trailing NUL of 4936 PL_linestr as the last thing it does before 4937 terminating. */ 4938 sv_catpvs(PL_linestr, ");"); 4939 } 4940 } 4941 else 4942 sv_catpvs(PL_linestr,"our @F=split(' ');"); 4943 } 4944 } 4945 sv_catpvs(PL_linestr, "\n"); 4946 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 4947 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 4948 PL_last_lop = PL_last_uni = NULL; 4949 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash) 4950 update_debugger_info(PL_linestr, NULL, 0); 4951 goto retry; 4952 } 4953 do { 4954 fake_eof = 0; 4955 bof = PL_rsfp ? TRUE : FALSE; 4956 if (0) { 4957 fake_eof: 4958 fake_eof = LEX_FAKE_EOF; 4959 } 4960 PL_bufptr = PL_bufend; 4961 COPLINE_INC_WITH_HERELINES; 4962 if (!lex_next_chunk(fake_eof)) { 4963 CopLINE_dec(PL_curcop); 4964 s = PL_bufptr; 4965 TOKEN(';'); /* not infinite loop because rsfp is NULL now */ 4966 } 4967 CopLINE_dec(PL_curcop); 4968 s = PL_bufptr; 4969 /* If it looks like the start of a BOM or raw UTF-16, 4970 * check if it in fact is. */ 4971 if (bof && PL_rsfp 4972 && (*s == 0 4973 || *(U8*)s == BOM_UTF8_FIRST_BYTE 4974 || *(U8*)s >= 0xFE 4975 || s[1] == 0)) 4976 { 4977 Off_t offset = (IV)PerlIO_tell(PL_rsfp); 4978 bof = (offset == (Off_t)SvCUR(PL_linestr)); 4979 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS) 4980 /* offset may include swallowed CR */ 4981 if (!bof) 4982 bof = (offset == (Off_t)SvCUR(PL_linestr)+1); 4983 #endif 4984 if (bof) { 4985 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 4986 s = swallow_bom((U8*)s); 4987 } 4988 } 4989 if (PL_parser->in_pod) { 4990 /* Incest with pod. */ 4991 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) { 4992 sv_setpvs(PL_linestr, ""); 4993 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 4994 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 4995 PL_last_lop = PL_last_uni = NULL; 4996 PL_parser->in_pod = 0; 4997 } 4998 } 4999 if (PL_rsfp || PL_parser->filtered) 5000 incline(s); 5001 } while (PL_parser->in_pod); 5002 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; 5003 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5004 PL_last_lop = PL_last_uni = NULL; 5005 if (CopLINE(PL_curcop) == 1) { 5006 while (s < PL_bufend && isSPACE(*s)) 5007 s++; 5008 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */ 5009 s++; 5010 d = NULL; 5011 if (!PL_in_eval) { 5012 if (*s == '#' && *(s+1) == '!') 5013 d = s + 2; 5014 #ifdef ALTERNATE_SHEBANG 5015 else { 5016 static char const as[] = ALTERNATE_SHEBANG; 5017 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1)) 5018 d = s + (sizeof(as) - 1); 5019 } 5020 #endif /* ALTERNATE_SHEBANG */ 5021 } 5022 if (d) { 5023 char *ipath; 5024 char *ipathend; 5025 5026 while (isSPACE(*d)) 5027 d++; 5028 ipath = d; 5029 while (*d && !isSPACE(*d)) 5030 d++; 5031 ipathend = d; 5032 5033 #ifdef ARG_ZERO_IS_SCRIPT 5034 if (ipathend > ipath) { 5035 /* 5036 * HP-UX (at least) sets argv[0] to the script name, 5037 * which makes $^X incorrect. And Digital UNIX and Linux, 5038 * at least, set argv[0] to the basename of the Perl 5039 * interpreter. So, having found "#!", we'll set it right. 5040 */ 5041 SV* copfilesv = CopFILESV(PL_curcop); 5042 if (copfilesv) { 5043 SV * const x = 5044 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL, 5045 SVt_PV)); /* $^X */ 5046 assert(SvPOK(x) || SvGMAGICAL(x)); 5047 if (sv_eq(x, copfilesv)) { 5048 sv_setpvn(x, ipath, ipathend - ipath); 5049 SvSETMAGIC(x); 5050 } 5051 else { 5052 STRLEN blen; 5053 STRLEN llen; 5054 const char *bstart = SvPV_const(copfilesv, blen); 5055 const char * const lstart = SvPV_const(x, llen); 5056 if (llen < blen) { 5057 bstart += blen - llen; 5058 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') { 5059 sv_setpvn(x, ipath, ipathend - ipath); 5060 SvSETMAGIC(x); 5061 } 5062 } 5063 } 5064 } 5065 else { 5066 /* Anything to do if no copfilesv? */ 5067 } 5068 TAINT_NOT; /* $^X is always tainted, but that's OK */ 5069 } 5070 #endif /* ARG_ZERO_IS_SCRIPT */ 5071 5072 /* 5073 * Look for options. 5074 */ 5075 d = instr(s,"perl -"); 5076 if (!d) { 5077 d = instr(s,"perl"); 5078 #if defined(DOSISH) 5079 /* avoid getting into infinite loops when shebang 5080 * line contains "Perl" rather than "perl" */ 5081 if (!d) { 5082 for (d = ipathend-4; d >= ipath; --d) { 5083 if (isALPHA_FOLD_EQ(*d, 'p') 5084 && !ibcmp(d, "perl", 4)) 5085 { 5086 break; 5087 } 5088 } 5089 if (d < ipath) 5090 d = NULL; 5091 } 5092 #endif 5093 } 5094 #ifdef ALTERNATE_SHEBANG 5095 /* 5096 * If the ALTERNATE_SHEBANG on this system starts with a 5097 * character that can be part of a Perl expression, then if 5098 * we see it but not "perl", we're probably looking at the 5099 * start of Perl code, not a request to hand off to some 5100 * other interpreter. Similarly, if "perl" is there, but 5101 * not in the first 'word' of the line, we assume the line 5102 * contains the start of the Perl program. 5103 */ 5104 if (d && *s != '#') { 5105 const char *c = ipath; 5106 while (*c && !strchr("; \t\r\n\f\v#", *c)) 5107 c++; 5108 if (c < d) 5109 d = NULL; /* "perl" not in first word; ignore */ 5110 else 5111 *s = '#'; /* Don't try to parse shebang line */ 5112 } 5113 #endif /* ALTERNATE_SHEBANG */ 5114 if (!d 5115 && *s == '#' 5116 && ipathend > ipath 5117 && !PL_minus_c 5118 && !instr(s,"indir") 5119 && instr(PL_origargv[0],"perl")) 5120 { 5121 dVAR; 5122 char **newargv; 5123 5124 *ipathend = '\0'; 5125 s = ipathend + 1; 5126 while (s < PL_bufend && isSPACE(*s)) 5127 s++; 5128 if (s < PL_bufend) { 5129 Newx(newargv,PL_origargc+3,char*); 5130 newargv[1] = s; 5131 while (s < PL_bufend && !isSPACE(*s)) 5132 s++; 5133 *s = '\0'; 5134 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*); 5135 } 5136 else 5137 newargv = PL_origargv; 5138 newargv[0] = ipath; 5139 PERL_FPU_PRE_EXEC 5140 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv)); 5141 PERL_FPU_POST_EXEC 5142 Perl_croak(aTHX_ "Can't exec %s", ipath); 5143 } 5144 if (d) { 5145 while (*d && !isSPACE(*d)) 5146 d++; 5147 while (SPACE_OR_TAB(*d)) 5148 d++; 5149 5150 if (*d++ == '-') { 5151 const bool switches_done = PL_doswitches; 5152 const U32 oldpdb = PL_perldb; 5153 const bool oldn = PL_minus_n; 5154 const bool oldp = PL_minus_p; 5155 const char *d1 = d; 5156 5157 do { 5158 bool baduni = FALSE; 5159 if (*d1 == 'C') { 5160 const char *d2 = d1 + 1; 5161 if (parse_unicode_opts((const char **)&d2) 5162 != PL_unicode) 5163 baduni = TRUE; 5164 } 5165 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) { 5166 const char * const m = d1; 5167 while (*d1 && !isSPACE(*d1)) 5168 d1++; 5169 Perl_croak(aTHX_ "Too late for \"-%.*s\" option", 5170 (int)(d1 - m), m); 5171 } 5172 d1 = moreswitches(d1); 5173 } while (d1); 5174 if (PL_doswitches && !switches_done) { 5175 int argc = PL_origargc; 5176 char **argv = PL_origargv; 5177 do { 5178 argc--,argv++; 5179 } while (argc && argv[0][0] == '-' && argv[0][1]); 5180 init_argv_symbols(argc,argv); 5181 } 5182 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb) 5183 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp))) 5184 /* if we have already added "LINE: while (<>) {", 5185 we must not do it again */ 5186 { 5187 sv_setpvs(PL_linestr, ""); 5188 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 5189 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5190 PL_last_lop = PL_last_uni = NULL; 5191 PL_preambled = FALSE; 5192 if (PERLDB_LINE_OR_SAVESRC) 5193 (void)gv_fetchfile(PL_origfilename); 5194 goto retry; 5195 } 5196 } 5197 } 5198 } 5199 } 5200 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 5201 PL_lex_state = LEX_FORMLINE; 5202 force_next(FORMRBRACK); 5203 TOKEN(';'); 5204 } 5205 goto retry; 5206 case '\r': 5207 #ifdef PERL_STRICT_CR 5208 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); 5209 Perl_croak(aTHX_ 5210 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); 5211 #endif 5212 case ' ': case '\t': case '\f': case '\v': 5213 s++; 5214 goto retry; 5215 case '#': 5216 case '\n': 5217 if (PL_lex_state != LEX_NORMAL 5218 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) 5219 { 5220 const bool in_comment = *s == '#'; 5221 if (*s == '#' && s == PL_linestart && PL_in_eval 5222 && !PL_rsfp && !PL_parser->filtered) { 5223 /* handle eval qq[#line 1 "foo"\n ...] */ 5224 CopLINE_dec(PL_curcop); 5225 incline(s); 5226 } 5227 d = s; 5228 while (d < PL_bufend && *d != '\n') 5229 d++; 5230 if (d < PL_bufend) 5231 d++; 5232 else if (d > PL_bufend) 5233 /* Found by Ilya: feed random input to Perl. */ 5234 Perl_croak(aTHX_ "panic: input overflow, %p > %p", 5235 d, PL_bufend); 5236 s = d; 5237 if (in_comment && d == PL_bufend 5238 && PL_lex_state == LEX_INTERPNORMAL 5239 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 5240 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--; 5241 else 5242 incline(s); 5243 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 5244 PL_lex_state = LEX_FORMLINE; 5245 force_next(FORMRBRACK); 5246 TOKEN(';'); 5247 } 5248 } 5249 else { 5250 while (s < PL_bufend && *s != '\n') 5251 s++; 5252 if (s < PL_bufend) 5253 { 5254 s++; 5255 if (s < PL_bufend) 5256 incline(s); 5257 } 5258 else if (s > PL_bufend) 5259 /* Found by Ilya: feed random input to Perl. */ 5260 Perl_croak(aTHX_ "panic: input overflow"); 5261 } 5262 goto retry; 5263 case '-': 5264 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) { 5265 I32 ftst = 0; 5266 char tmp; 5267 5268 s++; 5269 PL_bufptr = s; 5270 tmp = *s++; 5271 5272 while (s < PL_bufend && SPACE_OR_TAB(*s)) 5273 s++; 5274 5275 if (strnEQ(s,"=>",2)) { 5276 s = force_word(PL_bufptr,WORD,FALSE,FALSE); 5277 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } ); 5278 OPERATOR('-'); /* unary minus */ 5279 } 5280 switch (tmp) { 5281 case 'r': ftst = OP_FTEREAD; break; 5282 case 'w': ftst = OP_FTEWRITE; break; 5283 case 'x': ftst = OP_FTEEXEC; break; 5284 case 'o': ftst = OP_FTEOWNED; break; 5285 case 'R': ftst = OP_FTRREAD; break; 5286 case 'W': ftst = OP_FTRWRITE; break; 5287 case 'X': ftst = OP_FTREXEC; break; 5288 case 'O': ftst = OP_FTROWNED; break; 5289 case 'e': ftst = OP_FTIS; break; 5290 case 'z': ftst = OP_FTZERO; break; 5291 case 's': ftst = OP_FTSIZE; break; 5292 case 'f': ftst = OP_FTFILE; break; 5293 case 'd': ftst = OP_FTDIR; break; 5294 case 'l': ftst = OP_FTLINK; break; 5295 case 'p': ftst = OP_FTPIPE; break; 5296 case 'S': ftst = OP_FTSOCK; break; 5297 case 'u': ftst = OP_FTSUID; break; 5298 case 'g': ftst = OP_FTSGID; break; 5299 case 'k': ftst = OP_FTSVTX; break; 5300 case 'b': ftst = OP_FTBLK; break; 5301 case 'c': ftst = OP_FTCHR; break; 5302 case 't': ftst = OP_FTTTY; break; 5303 case 'T': ftst = OP_FTTEXT; break; 5304 case 'B': ftst = OP_FTBINARY; break; 5305 case 'M': case 'A': case 'C': 5306 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV); 5307 switch (tmp) { 5308 case 'M': ftst = OP_FTMTIME; break; 5309 case 'A': ftst = OP_FTATIME; break; 5310 case 'C': ftst = OP_FTCTIME; break; 5311 default: break; 5312 } 5313 break; 5314 default: 5315 break; 5316 } 5317 if (ftst) { 5318 PL_last_uni = PL_oldbufptr; 5319 PL_last_lop_op = (OPCODE)ftst; 5320 DEBUG_T( { PerlIO_printf(Perl_debug_log, 5321 "### Saw file test %c\n", (int)tmp); 5322 } ); 5323 FTST(ftst); 5324 } 5325 else { 5326 /* Assume it was a minus followed by a one-letter named 5327 * subroutine call (or a -bareword), then. */ 5328 DEBUG_T( { PerlIO_printf(Perl_debug_log, 5329 "### '-%c' looked like a file test but was not\n", 5330 (int) tmp); 5331 } ); 5332 s = --PL_bufptr; 5333 } 5334 } 5335 { 5336 const char tmp = *s++; 5337 if (*s == tmp) { 5338 s++; 5339 if (PL_expect == XOPERATOR) 5340 TERM(POSTDEC); 5341 else 5342 OPERATOR(PREDEC); 5343 } 5344 else if (*s == '>') { 5345 s++; 5346 s = skipspace(s); 5347 if (((*s == '$' || *s == '&') && s[1] == '*') 5348 ||(*s == '$' && s[1] == '#' && s[2] == '*') 5349 ||((*s == '@' || *s == '%') && strchr("*[{", s[1])) 5350 ||(*s == '*' && (s[1] == '*' || s[1] == '{')) 5351 ) 5352 { 5353 PL_expect = XPOSTDEREF; 5354 TOKEN(ARROW); 5355 } 5356 if (isIDFIRST_lazy_if(s,UTF)) { 5357 s = force_word(s,METHOD,FALSE,TRUE); 5358 TOKEN(ARROW); 5359 } 5360 else if (*s == '$') 5361 OPERATOR(ARROW); 5362 else 5363 TERM(ARROW); 5364 } 5365 if (PL_expect == XOPERATOR) { 5366 if (*s == '=' 5367 && !PL_lex_allbrackets 5368 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5369 { 5370 s--; 5371 TOKEN(0); 5372 } 5373 Aop(OP_SUBTRACT); 5374 } 5375 else { 5376 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5377 check_uni(); 5378 OPERATOR('-'); /* unary minus */ 5379 } 5380 } 5381 5382 case '+': 5383 { 5384 const char tmp = *s++; 5385 if (*s == tmp) { 5386 s++; 5387 if (PL_expect == XOPERATOR) 5388 TERM(POSTINC); 5389 else 5390 OPERATOR(PREINC); 5391 } 5392 if (PL_expect == XOPERATOR) { 5393 if (*s == '=' 5394 && !PL_lex_allbrackets 5395 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5396 { 5397 s--; 5398 TOKEN(0); 5399 } 5400 Aop(OP_ADD); 5401 } 5402 else { 5403 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5404 check_uni(); 5405 OPERATOR('+'); 5406 } 5407 } 5408 5409 case '*': 5410 if (PL_expect == XPOSTDEREF) POSTDEREF('*'); 5411 if (PL_expect != XOPERATOR) { 5412 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE); 5413 PL_expect = XOPERATOR; 5414 force_ident(PL_tokenbuf, '*'); 5415 if (!*PL_tokenbuf) 5416 PREREF('*'); 5417 TERM('*'); 5418 } 5419 s++; 5420 if (*s == '*') { 5421 s++; 5422 if (*s == '=' && !PL_lex_allbrackets 5423 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5424 { 5425 s -= 2; 5426 TOKEN(0); 5427 } 5428 PWop(OP_POW); 5429 } 5430 if (*s == '=' 5431 && !PL_lex_allbrackets 5432 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5433 { 5434 s--; 5435 TOKEN(0); 5436 } 5437 PL_parser->saw_infix_sigil = 1; 5438 Mop(OP_MULTIPLY); 5439 5440 case '%': 5441 { 5442 if (PL_expect == XOPERATOR) { 5443 if (s[1] == '=' 5444 && !PL_lex_allbrackets 5445 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5446 { 5447 TOKEN(0); 5448 } 5449 ++s; 5450 PL_parser->saw_infix_sigil = 1; 5451 Mop(OP_MODULO); 5452 } 5453 else if (PL_expect == XPOSTDEREF) POSTDEREF('%'); 5454 PL_tokenbuf[0] = '%'; 5455 s = scan_ident(s, PL_tokenbuf + 1, 5456 sizeof PL_tokenbuf - 1, FALSE); 5457 pl_yylval.ival = 0; 5458 if (!PL_tokenbuf[1]) { 5459 PREREF('%'); 5460 } 5461 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { 5462 if (*s == '[') 5463 PL_tokenbuf[0] = '@'; 5464 } 5465 PL_expect = XOPERATOR; 5466 force_ident_maybe_lex('%'); 5467 TERM('%'); 5468 } 5469 case '^': 5470 d = s; 5471 bof = FEATURE_BITWISE_IS_ENABLED; 5472 if (bof && s[1] == '.') 5473 s++; 5474 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 5475 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) 5476 { 5477 s = d; 5478 TOKEN(0); 5479 } 5480 s++; 5481 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR); 5482 case '[': 5483 if (PL_lex_brackets > 100) 5484 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 5485 PL_lex_brackstack[PL_lex_brackets++] = 0; 5486 PL_lex_allbrackets++; 5487 { 5488 const char tmp = *s++; 5489 OPERATOR(tmp); 5490 } 5491 case '~': 5492 if (s[1] == '~' 5493 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) 5494 { 5495 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 5496 TOKEN(0); 5497 s += 2; 5498 Perl_ck_warner_d(aTHX_ 5499 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 5500 "Smartmatch is experimental"); 5501 Eop(OP_SMARTMATCH); 5502 } 5503 s++; 5504 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') { 5505 s++; 5506 BCop(OP_SCOMPLEMENT); 5507 } 5508 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT); 5509 case ',': 5510 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) 5511 TOKEN(0); 5512 s++; 5513 OPERATOR(','); 5514 case ':': 5515 if (s[1] == ':') { 5516 len = 0; 5517 goto just_a_word_zero_gv; 5518 } 5519 s++; 5520 { 5521 OP *attrs; 5522 5523 switch (PL_expect) { 5524 case XOPERATOR: 5525 if (!PL_in_my || PL_lex_state != LEX_NORMAL) 5526 break; 5527 PL_bufptr = s; /* update in case we back off */ 5528 if (*s == '=') { 5529 Perl_croak(aTHX_ 5530 "Use of := for an empty attribute list is not allowed"); 5531 } 5532 goto grabattrs; 5533 case XATTRBLOCK: 5534 PL_expect = XBLOCK; 5535 goto grabattrs; 5536 case XATTRTERM: 5537 PL_expect = XTERMBLOCK; 5538 grabattrs: 5539 s = skipspace(s); 5540 attrs = NULL; 5541 while (isIDFIRST_lazy_if(s,UTF)) { 5542 I32 tmp; 5543 SV *sv; 5544 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 5545 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) { 5546 if (tmp < 0) tmp = -tmp; 5547 switch (tmp) { 5548 case KEY_or: 5549 case KEY_and: 5550 case KEY_for: 5551 case KEY_foreach: 5552 case KEY_unless: 5553 case KEY_if: 5554 case KEY_while: 5555 case KEY_until: 5556 goto got_attrs; 5557 default: 5558 break; 5559 } 5560 } 5561 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0); 5562 if (*d == '(') { 5563 d = scan_str(d,TRUE,TRUE,FALSE,NULL); 5564 COPLINE_SET_FROM_MULTI_END; 5565 if (!d) { 5566 /* MUST advance bufptr here to avoid bogus 5567 "at end of line" context messages from yyerror(). 5568 */ 5569 PL_bufptr = s + len; 5570 yyerror("Unterminated attribute parameter in attribute list"); 5571 if (attrs) 5572 op_free(attrs); 5573 sv_free(sv); 5574 return REPORT(0); /* EOF indicator */ 5575 } 5576 } 5577 if (PL_lex_stuff) { 5578 sv_catsv(sv, PL_lex_stuff); 5579 attrs = op_append_elem(OP_LIST, attrs, 5580 newSVOP(OP_CONST, 0, sv)); 5581 SvREFCNT_dec_NN(PL_lex_stuff); 5582 PL_lex_stuff = NULL; 5583 } 5584 else { 5585 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) { 5586 sv_free(sv); 5587 if (PL_in_my == KEY_our) { 5588 deprecate(":unique"); 5589 } 5590 else 5591 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables"); 5592 } 5593 5594 /* NOTE: any CV attrs applied here need to be part of 5595 the CVf_BUILTIN_ATTRS define in cv.h! */ 5596 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) { 5597 sv_free(sv); 5598 CvLVALUE_on(PL_compcv); 5599 } 5600 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) { 5601 sv_free(sv); 5602 deprecate(":locked"); 5603 } 5604 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) { 5605 sv_free(sv); 5606 CvMETHOD_on(PL_compcv); 5607 } 5608 else if (!PL_in_my && len == 5 5609 && strnEQ(SvPVX(sv), "const", len)) 5610 { 5611 sv_free(sv); 5612 Perl_ck_warner_d(aTHX_ 5613 packWARN(WARN_EXPERIMENTAL__CONST_ATTR), 5614 ":const is experimental" 5615 ); 5616 CvANONCONST_on(PL_compcv); 5617 if (!CvANON(PL_compcv)) 5618 yyerror(":const is not permitted on named " 5619 "subroutines"); 5620 } 5621 /* After we've set the flags, it could be argued that 5622 we don't need to do the attributes.pm-based setting 5623 process, and shouldn't bother appending recognized 5624 flags. To experiment with that, uncomment the 5625 following "else". (Note that's already been 5626 uncommented. That keeps the above-applied built-in 5627 attributes from being intercepted (and possibly 5628 rejected) by a package's attribute routines, but is 5629 justified by the performance win for the common case 5630 of applying only built-in attributes.) */ 5631 else 5632 attrs = op_append_elem(OP_LIST, attrs, 5633 newSVOP(OP_CONST, 0, 5634 sv)); 5635 } 5636 s = skipspace(d); 5637 if (*s == ':' && s[1] != ':') 5638 s = skipspace(s+1); 5639 else if (s == d) 5640 break; /* require real whitespace or :'s */ 5641 /* XXX losing whitespace on sequential attributes here */ 5642 } 5643 { 5644 if (*s != ';' 5645 && *s != '}' 5646 && !(PL_expect == XOPERATOR 5647 ? (*s == '=' || *s == ')') 5648 : (*s == '{' || *s == '('))) 5649 { 5650 const char q = ((*s == '\'') ? '"' : '\''); 5651 /* If here for an expression, and parsed no attrs, back 5652 off. */ 5653 if (PL_expect == XOPERATOR && !attrs) { 5654 s = PL_bufptr; 5655 break; 5656 } 5657 /* MUST advance bufptr here to avoid bogus "at end of line" 5658 context messages from yyerror(). 5659 */ 5660 PL_bufptr = s; 5661 yyerror( (const char *) 5662 (*s 5663 ? Perl_form(aTHX_ "Invalid separator character " 5664 "%c%c%c in attribute list", q, *s, q) 5665 : "Unterminated attribute list" ) ); 5666 if (attrs) 5667 op_free(attrs); 5668 OPERATOR(':'); 5669 } 5670 } 5671 got_attrs: 5672 if (attrs) { 5673 NEXTVAL_NEXTTOKE.opval = attrs; 5674 force_next(THING); 5675 } 5676 TOKEN(COLONATTR); 5677 } 5678 } 5679 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) { 5680 s--; 5681 TOKEN(0); 5682 } 5683 PL_lex_allbrackets--; 5684 OPERATOR(':'); 5685 case '(': 5686 s++; 5687 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr) 5688 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */ 5689 else 5690 PL_expect = XTERM; 5691 s = skipspace(s); 5692 PL_lex_allbrackets++; 5693 TOKEN('('); 5694 case ';': 5695 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 5696 TOKEN(0); 5697 CLINE; 5698 s++; 5699 PL_expect = XSTATE; 5700 TOKEN(';'); 5701 case ')': 5702 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) 5703 TOKEN(0); 5704 s++; 5705 PL_lex_allbrackets--; 5706 s = skipspace(s); 5707 if (*s == '{') 5708 PREBLOCK(')'); 5709 TERM(')'); 5710 case ']': 5711 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 5712 TOKEN(0); 5713 s++; 5714 if (PL_lex_brackets <= 0) 5715 /* diag_listed_as: Unmatched right %s bracket */ 5716 yyerror("Unmatched right square bracket"); 5717 else 5718 --PL_lex_brackets; 5719 PL_lex_allbrackets--; 5720 if (PL_lex_state == LEX_INTERPNORMAL) { 5721 if (PL_lex_brackets == 0) { 5722 if (*s == '-' && s[1] == '>') 5723 PL_lex_state = LEX_INTERPENDMAYBE; 5724 else if (*s != '[' && *s != '{') 5725 PL_lex_state = LEX_INTERPEND; 5726 } 5727 } 5728 TERM(']'); 5729 case '{': 5730 s++; 5731 leftbracket: 5732 if (PL_lex_brackets > 100) { 5733 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 5734 } 5735 switch (PL_expect) { 5736 case XTERM: 5737 case XTERMORDORDOR: 5738 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 5739 PL_lex_allbrackets++; 5740 OPERATOR(HASHBRACK); 5741 case XOPERATOR: 5742 while (s < PL_bufend && SPACE_OR_TAB(*s)) 5743 s++; 5744 d = s; 5745 PL_tokenbuf[0] = '\0'; 5746 if (d < PL_bufend && *d == '-') { 5747 PL_tokenbuf[0] = '-'; 5748 d++; 5749 while (d < PL_bufend && SPACE_OR_TAB(*d)) 5750 d++; 5751 } 5752 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) { 5753 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 5754 FALSE, &len); 5755 while (d < PL_bufend && SPACE_OR_TAB(*d)) 5756 d++; 5757 if (*d == '}') { 5758 const char minus = (PL_tokenbuf[0] == '-'); 5759 s = force_word(s + minus, WORD, FALSE, TRUE); 5760 if (minus) 5761 force_next('-'); 5762 } 5763 } 5764 /* FALLTHROUGH */ 5765 case XATTRTERM: 5766 case XTERMBLOCK: 5767 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 5768 PL_lex_allbrackets++; 5769 PL_expect = XSTATE; 5770 break; 5771 case XATTRBLOCK: 5772 case XBLOCK: 5773 PL_lex_brackstack[PL_lex_brackets++] = XSTATE; 5774 PL_lex_allbrackets++; 5775 PL_expect = XSTATE; 5776 break; 5777 case XBLOCKTERM: 5778 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 5779 PL_lex_allbrackets++; 5780 PL_expect = XSTATE; 5781 break; 5782 default: { 5783 const char *t; 5784 if (PL_oldoldbufptr == PL_last_lop) 5785 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 5786 else 5787 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 5788 PL_lex_allbrackets++; 5789 s = skipspace(s); 5790 if (*s == '}') { 5791 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) { 5792 PL_expect = XTERM; 5793 /* This hack is to get the ${} in the message. */ 5794 PL_bufptr = s+1; 5795 yyerror("syntax error"); 5796 break; 5797 } 5798 OPERATOR(HASHBRACK); 5799 } 5800 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) { 5801 /* ${...} or @{...} etc., but not print {...} 5802 * Skip the disambiguation and treat this as a block. 5803 */ 5804 goto block_expectation; 5805 } 5806 /* This hack serves to disambiguate a pair of curlies 5807 * as being a block or an anon hash. Normally, expectation 5808 * determines that, but in cases where we're not in a 5809 * position to expect anything in particular (like inside 5810 * eval"") we have to resolve the ambiguity. This code 5811 * covers the case where the first term in the curlies is a 5812 * quoted string. Most other cases need to be explicitly 5813 * disambiguated by prepending a "+" before the opening 5814 * curly in order to force resolution as an anon hash. 5815 * 5816 * XXX should probably propagate the outer expectation 5817 * into eval"" to rely less on this hack, but that could 5818 * potentially break current behavior of eval"". 5819 * GSAR 97-07-21 5820 */ 5821 t = s; 5822 if (*s == '\'' || *s == '"' || *s == '`') { 5823 /* common case: get past first string, handling escapes */ 5824 for (t++; t < PL_bufend && *t != *s;) 5825 if (*t++ == '\\') 5826 t++; 5827 t++; 5828 } 5829 else if (*s == 'q') { 5830 if (++t < PL_bufend 5831 && (!isWORDCHAR(*t) 5832 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend 5833 && !isWORDCHAR(*t)))) 5834 { 5835 /* skip q//-like construct */ 5836 const char *tmps; 5837 char open, close, term; 5838 I32 brackets = 1; 5839 5840 while (t < PL_bufend && isSPACE(*t)) 5841 t++; 5842 /* check for q => */ 5843 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') { 5844 OPERATOR(HASHBRACK); 5845 } 5846 term = *t; 5847 open = term; 5848 if (term && (tmps = strchr("([{< )]}> )]}>",term))) 5849 term = tmps[5]; 5850 close = term; 5851 if (open == close) 5852 for (t++; t < PL_bufend; t++) { 5853 if (*t == '\\' && t+1 < PL_bufend && open != '\\') 5854 t++; 5855 else if (*t == open) 5856 break; 5857 } 5858 else { 5859 for (t++; t < PL_bufend; t++) { 5860 if (*t == '\\' && t+1 < PL_bufend) 5861 t++; 5862 else if (*t == close && --brackets <= 0) 5863 break; 5864 else if (*t == open) 5865 brackets++; 5866 } 5867 } 5868 t++; 5869 } 5870 else 5871 /* skip plain q word */ 5872 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF)) 5873 t += UTF ? UTF8SKIP(t) : 1; 5874 } 5875 else if (isWORDCHAR_lazy_if(t,UTF)) { 5876 t += UTF ? UTF8SKIP(t) : 1; 5877 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF)) 5878 t += UTF ? UTF8SKIP(t) : 1; 5879 } 5880 while (t < PL_bufend && isSPACE(*t)) 5881 t++; 5882 /* if comma follows first term, call it an anon hash */ 5883 /* XXX it could be a comma expression with loop modifiers */ 5884 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s))) 5885 || (*t == '=' && t[1] == '>'))) 5886 OPERATOR(HASHBRACK); 5887 if (PL_expect == XREF) 5888 { 5889 block_expectation: 5890 /* If there is an opening brace or 'sub:', treat it 5891 as a term to make ${{...}}{k} and &{sub:attr...} 5892 dwim. Otherwise, treat it as a statement, so 5893 map {no strict; ...} works. 5894 */ 5895 s = skipspace(s); 5896 if (*s == '{') { 5897 PL_expect = XTERM; 5898 break; 5899 } 5900 if (strnEQ(s, "sub", 3)) { 5901 d = s + 3; 5902 d = skipspace(d); 5903 if (*d == ':') { 5904 PL_expect = XTERM; 5905 break; 5906 } 5907 } 5908 PL_expect = XSTATE; 5909 } 5910 else { 5911 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE; 5912 PL_expect = XSTATE; 5913 } 5914 } 5915 break; 5916 } 5917 pl_yylval.ival = CopLINE(PL_curcop); 5918 PL_copline = NOLINE; /* invalidate current command line number */ 5919 TOKEN(formbrack ? '=' : '{'); 5920 case '}': 5921 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 5922 TOKEN(0); 5923 rightbracket: 5924 s++; 5925 if (PL_lex_brackets <= 0) 5926 /* diag_listed_as: Unmatched right %s bracket */ 5927 yyerror("Unmatched right curly bracket"); 5928 else 5929 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets]; 5930 PL_lex_allbrackets--; 5931 if (PL_lex_state == LEX_INTERPNORMAL) { 5932 if (PL_lex_brackets == 0) { 5933 if (PL_expect & XFAKEBRACK) { 5934 PL_expect &= XENUMMASK; 5935 PL_lex_state = LEX_INTERPEND; 5936 PL_bufptr = s; 5937 return yylex(); /* ignore fake brackets */ 5938 } 5939 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 5940 && SvEVALED(PL_lex_repl)) 5941 PL_lex_state = LEX_INTERPEND; 5942 else if (*s == '-' && s[1] == '>') 5943 PL_lex_state = LEX_INTERPENDMAYBE; 5944 else if (*s != '[' && *s != '{') 5945 PL_lex_state = LEX_INTERPEND; 5946 } 5947 } 5948 if (PL_expect & XFAKEBRACK) { 5949 PL_expect &= XENUMMASK; 5950 PL_bufptr = s; 5951 return yylex(); /* ignore fake brackets */ 5952 } 5953 force_next(formbrack ? '.' : '}'); 5954 if (formbrack) LEAVE; 5955 if (formbrack == 2) { /* means . where arguments were expected */ 5956 force_next(';'); 5957 TOKEN(FORMRBRACK); 5958 } 5959 TOKEN(';'); 5960 case '&': 5961 if (PL_expect == XPOSTDEREF) POSTDEREF('&'); 5962 s++; 5963 if (*s++ == '&') { 5964 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 5965 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 5966 s -= 2; 5967 TOKEN(0); 5968 } 5969 AOPERATOR(ANDAND); 5970 } 5971 s--; 5972 if (PL_expect == XOPERATOR) { 5973 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON) 5974 && isIDFIRST_lazy_if(s,UTF)) 5975 { 5976 CopLINE_dec(PL_curcop); 5977 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 5978 CopLINE_inc(PL_curcop); 5979 } 5980 d = s; 5981 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') 5982 s++; 5983 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 5984 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 5985 s = d; 5986 s--; 5987 TOKEN(0); 5988 } 5989 if (d == s) { 5990 PL_parser->saw_infix_sigil = 1; 5991 BAop(bof ? OP_NBIT_AND : OP_BIT_AND); 5992 } 5993 else 5994 BAop(OP_SBIT_AND); 5995 } 5996 5997 PL_tokenbuf[0] = '&'; 5998 s = scan_ident(s - 1, PL_tokenbuf + 1, 5999 sizeof PL_tokenbuf - 1, TRUE); 6000 pl_yylval.ival = (OPpENTERSUB_AMPER<<8); 6001 if (PL_tokenbuf[1]) { 6002 force_ident_maybe_lex('&'); 6003 } 6004 else 6005 PREREF('&'); 6006 TERM('&'); 6007 6008 case '|': 6009 s++; 6010 if (*s++ == '|') { 6011 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6012 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 6013 s -= 2; 6014 TOKEN(0); 6015 } 6016 AOPERATOR(OROR); 6017 } 6018 s--; 6019 d = s; 6020 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') 6021 s++; 6022 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6023 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 6024 s = d - 1; 6025 TOKEN(0); 6026 } 6027 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR); 6028 case '=': 6029 s++; 6030 { 6031 const char tmp = *s++; 6032 if (tmp == '=') { 6033 if (!PL_lex_allbrackets 6034 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 6035 { 6036 s -= 2; 6037 TOKEN(0); 6038 } 6039 Eop(OP_EQ); 6040 } 6041 if (tmp == '>') { 6042 if (!PL_lex_allbrackets 6043 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) 6044 { 6045 s -= 2; 6046 TOKEN(0); 6047 } 6048 OPERATOR(','); 6049 } 6050 if (tmp == '~') 6051 PMop(OP_MATCH); 6052 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX) 6053 && strchr("+-*/%.^&|<",tmp)) 6054 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6055 "Reversed %c= operator",(int)tmp); 6056 s--; 6057 if (PL_expect == XSTATE 6058 && isALPHA(tmp) 6059 && (s == PL_linestart+1 || s[-2] == '\n') ) 6060 { 6061 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered) 6062 || PL_lex_state != LEX_NORMAL) { 6063 d = PL_bufend; 6064 while (s < d) { 6065 if (*s++ == '\n') { 6066 incline(s); 6067 if (strnEQ(s,"=cut",4)) { 6068 s = strchr(s,'\n'); 6069 if (s) 6070 s++; 6071 else 6072 s = d; 6073 incline(s); 6074 goto retry; 6075 } 6076 } 6077 } 6078 goto retry; 6079 } 6080 s = PL_bufend; 6081 PL_parser->in_pod = 1; 6082 goto retry; 6083 } 6084 } 6085 if (PL_expect == XBLOCK) { 6086 const char *t = s; 6087 #ifdef PERL_STRICT_CR 6088 while (SPACE_OR_TAB(*t)) 6089 #else 6090 while (SPACE_OR_TAB(*t) || *t == '\r') 6091 #endif 6092 t++; 6093 if (*t == '\n' || *t == '#') { 6094 formbrack = 1; 6095 ENTER; 6096 SAVEI8(PL_parser->form_lex_state); 6097 SAVEI32(PL_lex_formbrack); 6098 PL_parser->form_lex_state = PL_lex_state; 6099 PL_lex_formbrack = PL_lex_brackets + 1; 6100 goto leftbracket; 6101 } 6102 } 6103 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6104 s--; 6105 TOKEN(0); 6106 } 6107 pl_yylval.ival = 0; 6108 OPERATOR(ASSIGNOP); 6109 case '!': 6110 s++; 6111 { 6112 const char tmp = *s++; 6113 if (tmp == '=') { 6114 /* was this !=~ where !~ was meant? 6115 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */ 6116 6117 if (*s == '~' && ckWARN(WARN_SYNTAX)) { 6118 const char *t = s+1; 6119 6120 while (t < PL_bufend && isSPACE(*t)) 6121 ++t; 6122 6123 if (*t == '/' || *t == '?' 6124 || ((*t == 'm' || *t == 's' || *t == 'y') 6125 && !isWORDCHAR(t[1])) 6126 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2]))) 6127 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6128 "!=~ should be !~"); 6129 } 6130 if (!PL_lex_allbrackets 6131 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 6132 { 6133 s -= 2; 6134 TOKEN(0); 6135 } 6136 Eop(OP_NE); 6137 } 6138 if (tmp == '~') 6139 PMop(OP_NOT); 6140 } 6141 s--; 6142 OPERATOR('!'); 6143 case '<': 6144 if (PL_expect != XOPERATOR) { 6145 if (s[1] != '<' && !strchr(s,'>')) 6146 check_uni(); 6147 if (s[1] == '<' && s[2] != '>') 6148 s = scan_heredoc(s); 6149 else 6150 s = scan_inputsymbol(s); 6151 PL_expect = XOPERATOR; 6152 TOKEN(sublex_start()); 6153 } 6154 s++; 6155 { 6156 char tmp = *s++; 6157 if (tmp == '<') { 6158 if (*s == '=' && !PL_lex_allbrackets 6159 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 6160 { 6161 s -= 2; 6162 TOKEN(0); 6163 } 6164 SHop(OP_LEFT_SHIFT); 6165 } 6166 if (tmp == '=') { 6167 tmp = *s++; 6168 if (tmp == '>') { 6169 if (!PL_lex_allbrackets 6170 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 6171 { 6172 s -= 3; 6173 TOKEN(0); 6174 } 6175 Eop(OP_NCMP); 6176 } 6177 s--; 6178 if (!PL_lex_allbrackets 6179 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 6180 { 6181 s -= 2; 6182 TOKEN(0); 6183 } 6184 Rop(OP_LE); 6185 } 6186 } 6187 s--; 6188 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6189 s--; 6190 TOKEN(0); 6191 } 6192 Rop(OP_LT); 6193 case '>': 6194 s++; 6195 { 6196 const char tmp = *s++; 6197 if (tmp == '>') { 6198 if (*s == '=' && !PL_lex_allbrackets 6199 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 6200 { 6201 s -= 2; 6202 TOKEN(0); 6203 } 6204 SHop(OP_RIGHT_SHIFT); 6205 } 6206 else if (tmp == '=') { 6207 if (!PL_lex_allbrackets 6208 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 6209 { 6210 s -= 2; 6211 TOKEN(0); 6212 } 6213 Rop(OP_GE); 6214 } 6215 } 6216 s--; 6217 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6218 s--; 6219 TOKEN(0); 6220 } 6221 Rop(OP_GT); 6222 6223 case '$': 6224 CLINE; 6225 6226 if (PL_expect == XOPERATOR) { 6227 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 6228 return deprecate_commaless_var_list(); 6229 } 6230 } 6231 else if (PL_expect == XPOSTDEREF) { 6232 if (s[1] == '#') { 6233 s++; 6234 POSTDEREF(DOLSHARP); 6235 } 6236 POSTDEREF('$'); 6237 } 6238 6239 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) { 6240 PL_tokenbuf[0] = '@'; 6241 s = scan_ident(s + 1, PL_tokenbuf + 1, 6242 sizeof PL_tokenbuf - 1, FALSE); 6243 if (PL_expect == XOPERATOR) { 6244 d = s; 6245 if (PL_bufptr > s) { 6246 d = PL_bufptr-1; 6247 PL_bufptr = PL_oldbufptr; 6248 } 6249 no_op("Array length", d); 6250 } 6251 if (!PL_tokenbuf[1]) 6252 PREREF(DOLSHARP); 6253 PL_expect = XOPERATOR; 6254 force_ident_maybe_lex('#'); 6255 TOKEN(DOLSHARP); 6256 } 6257 6258 PL_tokenbuf[0] = '$'; 6259 s = scan_ident(s, PL_tokenbuf + 1, 6260 sizeof PL_tokenbuf - 1, FALSE); 6261 if (PL_expect == XOPERATOR) { 6262 d = s; 6263 if (PL_bufptr > s) { 6264 d = PL_bufptr-1; 6265 PL_bufptr = PL_oldbufptr; 6266 } 6267 no_op("Scalar", d); 6268 } 6269 if (!PL_tokenbuf[1]) { 6270 if (s == PL_bufend) 6271 yyerror("Final $ should be \\$ or $name"); 6272 PREREF('$'); 6273 } 6274 6275 d = s; 6276 { 6277 const char tmp = *s; 6278 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) 6279 s = skipspace(s); 6280 6281 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 6282 && intuit_more(s)) { 6283 if (*s == '[') { 6284 PL_tokenbuf[0] = '@'; 6285 if (ckWARN(WARN_SYNTAX)) { 6286 char *t = s+1; 6287 6288 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$') 6289 t += UTF ? UTF8SKIP(t) : 1; 6290 if (*t++ == ',') { 6291 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */ 6292 while (t < PL_bufend && *t != ']') 6293 t++; 6294 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6295 "Multidimensional syntax %"UTF8f" not supported", 6296 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr)); 6297 } 6298 } 6299 } 6300 else if (*s == '{') { 6301 char *t; 6302 PL_tokenbuf[0] = '%'; 6303 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX) 6304 && (t = strchr(s, '}')) && (t = strchr(t, '='))) 6305 { 6306 char tmpbuf[sizeof PL_tokenbuf]; 6307 do { 6308 t++; 6309 } while (isSPACE(*t)); 6310 if (isIDFIRST_lazy_if(t,UTF)) { 6311 STRLEN len; 6312 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, 6313 &len); 6314 while (isSPACE(*t)) 6315 t++; 6316 if (*t == ';' 6317 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0)) 6318 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6319 "You need to quote \"%"UTF8f"\"", 6320 UTF8fARG(UTF, len, tmpbuf)); 6321 } 6322 } 6323 } 6324 } 6325 6326 PL_expect = XOPERATOR; 6327 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) { 6328 const bool islop = (PL_last_lop == PL_oldoldbufptr); 6329 if (!islop || PL_last_lop_op == OP_GREPSTART) 6330 PL_expect = XOPERATOR; 6331 else if (strchr("$@\"'`q", *s)) 6332 PL_expect = XTERM; /* e.g. print $fh "foo" */ 6333 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF)) 6334 PL_expect = XTERM; /* e.g. print $fh &sub */ 6335 else if (isIDFIRST_lazy_if(s,UTF)) { 6336 char tmpbuf[sizeof PL_tokenbuf]; 6337 int t2; 6338 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 6339 if ((t2 = keyword(tmpbuf, len, 0))) { 6340 /* binary operators exclude handle interpretations */ 6341 switch (t2) { 6342 case -KEY_x: 6343 case -KEY_eq: 6344 case -KEY_ne: 6345 case -KEY_gt: 6346 case -KEY_lt: 6347 case -KEY_ge: 6348 case -KEY_le: 6349 case -KEY_cmp: 6350 break; 6351 default: 6352 PL_expect = XTERM; /* e.g. print $fh length() */ 6353 break; 6354 } 6355 } 6356 else { 6357 PL_expect = XTERM; /* e.g. print $fh subr() */ 6358 } 6359 } 6360 else if (isDIGIT(*s)) 6361 PL_expect = XTERM; /* e.g. print $fh 3 */ 6362 else if (*s == '.' && isDIGIT(s[1])) 6363 PL_expect = XTERM; /* e.g. print $fh .3 */ 6364 else if ((*s == '?' || *s == '-' || *s == '+') 6365 && !isSPACE(s[1]) && s[1] != '=') 6366 PL_expect = XTERM; /* e.g. print $fh -1 */ 6367 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' 6368 && s[1] != '/') 6369 PL_expect = XTERM; /* e.g. print $fh /.../ 6370 XXX except DORDOR operator 6371 */ 6372 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) 6373 && s[2] != '=') 6374 PL_expect = XTERM; /* print $fh <<"EOF" */ 6375 } 6376 } 6377 force_ident_maybe_lex('$'); 6378 TOKEN('$'); 6379 6380 case '@': 6381 if (PL_expect == XPOSTDEREF) 6382 POSTDEREF('@'); 6383 PL_tokenbuf[0] = '@'; 6384 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 6385 if (PL_expect == XOPERATOR) { 6386 d = s; 6387 if (PL_bufptr > s) { 6388 d = PL_bufptr-1; 6389 PL_bufptr = PL_oldbufptr; 6390 } 6391 no_op("Array", d); 6392 } 6393 pl_yylval.ival = 0; 6394 if (!PL_tokenbuf[1]) { 6395 PREREF('@'); 6396 } 6397 if (PL_lex_state == LEX_NORMAL) 6398 s = skipspace(s); 6399 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { 6400 if (*s == '{') 6401 PL_tokenbuf[0] = '%'; 6402 6403 /* Warn about @ where they meant $. */ 6404 if (*s == '[' || *s == '{') { 6405 if (ckWARN(WARN_SYNTAX)) { 6406 S_check_scalar_slice(aTHX_ s); 6407 } 6408 } 6409 } 6410 PL_expect = XOPERATOR; 6411 force_ident_maybe_lex('@'); 6412 TERM('@'); 6413 6414 case '/': /* may be division, defined-or, or pattern */ 6415 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') { 6416 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6417 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) 6418 TOKEN(0); 6419 s += 2; 6420 AOPERATOR(DORDOR); 6421 } 6422 else if (PL_expect == XOPERATOR) { 6423 s++; 6424 if (*s == '=' && !PL_lex_allbrackets 6425 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 6426 { 6427 s--; 6428 TOKEN(0); 6429 } 6430 Mop(OP_DIVIDE); 6431 } 6432 else { 6433 /* Disable warning on "study /blah/" */ 6434 if (PL_oldoldbufptr == PL_last_uni 6435 && (*PL_last_uni != 's' || s - PL_last_uni < 5 6436 || memNE(PL_last_uni, "study", 5) 6437 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF) 6438 )) 6439 check_uni(); 6440 s = scan_pat(s,OP_MATCH); 6441 TERM(sublex_start()); 6442 } 6443 6444 case '?': /* conditional */ 6445 s++; 6446 if (!PL_lex_allbrackets 6447 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) 6448 { 6449 s--; 6450 TOKEN(0); 6451 } 6452 PL_lex_allbrackets++; 6453 OPERATOR('?'); 6454 6455 case '.': 6456 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack 6457 #ifdef PERL_STRICT_CR 6458 && s[1] == '\n' 6459 #else 6460 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n')) 6461 #endif 6462 && (s == PL_linestart || s[-1] == '\n') ) 6463 { 6464 PL_expect = XSTATE; 6465 formbrack = 2; /* dot seen where arguments expected */ 6466 goto rightbracket; 6467 } 6468 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { 6469 s += 3; 6470 OPERATOR(YADAYADA); 6471 } 6472 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { 6473 char tmp = *s++; 6474 if (*s == tmp) { 6475 if (!PL_lex_allbrackets 6476 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) 6477 { 6478 s--; 6479 TOKEN(0); 6480 } 6481 s++; 6482 if (*s == tmp) { 6483 s++; 6484 pl_yylval.ival = OPf_SPECIAL; 6485 } 6486 else 6487 pl_yylval.ival = 0; 6488 OPERATOR(DOTDOT); 6489 } 6490 if (*s == '=' && !PL_lex_allbrackets 6491 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 6492 { 6493 s--; 6494 TOKEN(0); 6495 } 6496 Aop(OP_CONCAT); 6497 } 6498 /* FALLTHROUGH */ 6499 case '0': case '1': case '2': case '3': case '4': 6500 case '5': case '6': case '7': case '8': case '9': 6501 s = scan_num(s, &pl_yylval); 6502 DEBUG_T( { printbuf("### Saw number in %s\n", s); } ); 6503 if (PL_expect == XOPERATOR) 6504 no_op("Number",s); 6505 TERM(THING); 6506 6507 case '\'': 6508 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6509 if (!s) 6510 missingterm(NULL); 6511 COPLINE_SET_FROM_MULTI_END; 6512 DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); 6513 if (PL_expect == XOPERATOR) { 6514 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 6515 return deprecate_commaless_var_list(); 6516 } 6517 else 6518 no_op("String",s); 6519 } 6520 pl_yylval.ival = OP_CONST; 6521 TERM(sublex_start()); 6522 6523 case '"': 6524 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6525 DEBUG_T( { 6526 if (s) 6527 printbuf("### Saw string before %s\n", s); 6528 else 6529 PerlIO_printf(Perl_debug_log, 6530 "### Saw unterminated string\n"); 6531 } ); 6532 if (PL_expect == XOPERATOR) { 6533 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 6534 return deprecate_commaless_var_list(); 6535 } 6536 else 6537 no_op("String",s); 6538 } 6539 if (!s) 6540 missingterm(NULL); 6541 pl_yylval.ival = OP_CONST; 6542 /* FIXME. I think that this can be const if char *d is replaced by 6543 more localised variables. */ 6544 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) { 6545 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) { 6546 pl_yylval.ival = OP_STRINGIFY; 6547 break; 6548 } 6549 } 6550 if (pl_yylval.ival == OP_CONST) 6551 COPLINE_SET_FROM_MULTI_END; 6552 TERM(sublex_start()); 6553 6554 case '`': 6555 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6556 DEBUG_T( { 6557 if (s) 6558 printbuf("### Saw backtick string before %s\n", s); 6559 else 6560 PerlIO_printf(Perl_debug_log, 6561 "### Saw unterminated backtick string\n"); 6562 } ); 6563 if (PL_expect == XOPERATOR) 6564 no_op("Backticks",s); 6565 if (!s) 6566 missingterm(NULL); 6567 pl_yylval.ival = OP_BACKTICK; 6568 TERM(sublex_start()); 6569 6570 case '\\': 6571 s++; 6572 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 6573 && isDIGIT(*s)) 6574 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", 6575 *s, *s); 6576 if (PL_expect == XOPERATOR) 6577 no_op("Backslash",s); 6578 OPERATOR(REFGEN); 6579 6580 case 'v': 6581 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) { 6582 char *start = s + 2; 6583 while (isDIGIT(*start) || *start == '_') 6584 start++; 6585 if (*start == '.' && isDIGIT(start[1])) { 6586 s = scan_num(s, &pl_yylval); 6587 TERM(THING); 6588 } 6589 else if ((*start == ':' && start[1] == ':') 6590 || (PL_expect == XSTATE && *start == ':')) 6591 goto keylookup; 6592 else if (PL_expect == XSTATE) { 6593 d = start; 6594 while (d < PL_bufend && isSPACE(*d)) d++; 6595 if (*d == ':') goto keylookup; 6596 } 6597 /* avoid v123abc() or $h{v1}, allow C<print v10;> */ 6598 if (!isALPHA(*start) && (PL_expect == XTERM 6599 || PL_expect == XREF || PL_expect == XSTATE 6600 || PL_expect == XTERMORDORDOR)) { 6601 GV *const gv = gv_fetchpvn_flags(s, start - s, 6602 UTF ? SVf_UTF8 : 0, SVt_PVCV); 6603 if (!gv) { 6604 s = scan_num(s, &pl_yylval); 6605 TERM(THING); 6606 } 6607 } 6608 } 6609 goto keylookup; 6610 case 'x': 6611 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { 6612 s++; 6613 Mop(OP_REPEAT); 6614 } 6615 goto keylookup; 6616 6617 case '_': 6618 case 'a': case 'A': 6619 case 'b': case 'B': 6620 case 'c': case 'C': 6621 case 'd': case 'D': 6622 case 'e': case 'E': 6623 case 'f': case 'F': 6624 case 'g': case 'G': 6625 case 'h': case 'H': 6626 case 'i': case 'I': 6627 case 'j': case 'J': 6628 case 'k': case 'K': 6629 case 'l': case 'L': 6630 case 'm': case 'M': 6631 case 'n': case 'N': 6632 case 'o': case 'O': 6633 case 'p': case 'P': 6634 case 'q': case 'Q': 6635 case 'r': case 'R': 6636 case 's': case 'S': 6637 case 't': case 'T': 6638 case 'u': case 'U': 6639 case 'V': 6640 case 'w': case 'W': 6641 case 'X': 6642 case 'y': case 'Y': 6643 case 'z': case 'Z': 6644 6645 keylookup: { 6646 bool anydelim; 6647 bool lex; 6648 I32 tmp; 6649 SV *sv; 6650 CV *cv; 6651 PADOFFSET off; 6652 OP *rv2cv_op; 6653 6654 lex = FALSE; 6655 orig_keyword = 0; 6656 off = 0; 6657 sv = NULL; 6658 cv = NULL; 6659 gv = NULL; 6660 gvp = NULL; 6661 rv2cv_op = NULL; 6662 6663 PL_bufptr = s; 6664 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 6665 6666 /* Some keywords can be followed by any delimiter, including ':' */ 6667 anydelim = word_takes_any_delimeter(PL_tokenbuf, len); 6668 6669 /* x::* is just a word, unless x is "CORE" */ 6670 if (!anydelim && *s == ':' && s[1] == ':') { 6671 if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE; 6672 goto just_a_word; 6673 } 6674 6675 d = s; 6676 while (d < PL_bufend && isSPACE(*d)) 6677 d++; /* no comments skipped here, or s### is misparsed */ 6678 6679 /* Is this a word before a => operator? */ 6680 if (*d == '=' && d[1] == '>') { 6681 fat_arrow: 6682 CLINE; 6683 pl_yylval.opval 6684 = (OP*)newSVOP(OP_CONST, 0, 6685 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 6686 pl_yylval.opval->op_private = OPpCONST_BARE; 6687 TERM(WORD); 6688 } 6689 6690 /* Check for plugged-in keyword */ 6691 { 6692 OP *o; 6693 int result; 6694 char *saved_bufptr = PL_bufptr; 6695 PL_bufptr = s; 6696 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o); 6697 s = PL_bufptr; 6698 if (result == KEYWORD_PLUGIN_DECLINE) { 6699 /* not a plugged-in keyword */ 6700 PL_bufptr = saved_bufptr; 6701 } else if (result == KEYWORD_PLUGIN_STMT) { 6702 pl_yylval.opval = o; 6703 CLINE; 6704 if (!PL_nexttoke) PL_expect = XSTATE; 6705 return REPORT(PLUGSTMT); 6706 } else if (result == KEYWORD_PLUGIN_EXPR) { 6707 pl_yylval.opval = o; 6708 CLINE; 6709 if (!PL_nexttoke) PL_expect = XOPERATOR; 6710 return REPORT(PLUGEXPR); 6711 } else { 6712 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", 6713 PL_tokenbuf); 6714 } 6715 } 6716 6717 /* Check for built-in keyword */ 6718 tmp = keyword(PL_tokenbuf, len, 0); 6719 6720 /* Is this a label? */ 6721 if (!anydelim && PL_expect == XSTATE 6722 && d < PL_bufend && *d == ':' && *(d + 1) != ':') { 6723 s = d + 1; 6724 pl_yylval.pval = savepvn(PL_tokenbuf, len+1); 6725 pl_yylval.pval[len] = '\0'; 6726 pl_yylval.pval[len+1] = UTF ? 1 : 0; 6727 CLINE; 6728 TOKEN(LABEL); 6729 } 6730 6731 /* Check for lexical sub */ 6732 if (PL_expect != XOPERATOR) { 6733 char tmpbuf[sizeof PL_tokenbuf + 1]; 6734 *tmpbuf = '&'; 6735 Copy(PL_tokenbuf, tmpbuf+1, len, char); 6736 off = pad_findmy_pvn(tmpbuf, len+1, 0); 6737 if (off != NOT_IN_PAD) { 6738 assert(off); /* we assume this is boolean-true below */ 6739 if (PAD_COMPNAME_FLAGS_isOUR(off)) { 6740 HV * const stash = PAD_COMPNAME_OURSTASH(off); 6741 HEK * const stashname = HvNAME_HEK(stash); 6742 sv = newSVhek(stashname); 6743 sv_catpvs(sv, "::"); 6744 sv_catpvn_flags(sv, PL_tokenbuf, len, 6745 (UTF ? SV_CATUTF8 : SV_CATBYTES)); 6746 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv), 6747 SVt_PVCV); 6748 off = 0; 6749 if (!gv) { 6750 sv_free(sv); 6751 sv = NULL; 6752 goto just_a_word; 6753 } 6754 } 6755 else { 6756 rv2cv_op = newOP(OP_PADANY, 0); 6757 rv2cv_op->op_targ = off; 6758 cv = find_lexical_cv(off); 6759 } 6760 lex = TRUE; 6761 goto just_a_word; 6762 } 6763 off = 0; 6764 } 6765 6766 if (tmp < 0) { /* second-class keyword? */ 6767 GV *ogv = NULL; /* override (winner) */ 6768 GV *hgv = NULL; /* hidden (loser) */ 6769 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) { 6770 CV *cv; 6771 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 6772 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL, 6773 SVt_PVCV)) 6774 && (cv = GvCVu(gv))) 6775 { 6776 if (GvIMPORTED_CV(gv)) 6777 ogv = gv; 6778 else if (! CvMETHOD(cv)) 6779 hgv = gv; 6780 } 6781 if (!ogv 6782 && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, 6783 len, FALSE)) 6784 && (gv = *gvp) 6785 && (isGV_with_GP(gv) 6786 ? GvCVu(gv) && GvIMPORTED_CV(gv) 6787 : SvPCS_IMPORTED(gv) 6788 && (gv_init(gv, PL_globalstash, PL_tokenbuf, 6789 len, 0), 1))) 6790 { 6791 ogv = gv; 6792 } 6793 } 6794 if (ogv) { 6795 orig_keyword = tmp; 6796 tmp = 0; /* overridden by import or by GLOBAL */ 6797 } 6798 else if (gv && !gvp 6799 && -tmp==KEY_lock /* XXX generalizable kludge */ 6800 && GvCVu(gv)) 6801 { 6802 tmp = 0; /* any sub overrides "weak" keyword */ 6803 } 6804 else { /* no override */ 6805 tmp = -tmp; 6806 if (tmp == KEY_dump) { 6807 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 6808 "dump() better written as CORE::dump()"); 6809 } 6810 gv = NULL; 6811 gvp = 0; 6812 if (hgv && tmp != KEY_x) /* never ambiguous */ 6813 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 6814 "Ambiguous call resolved as CORE::%s(), " 6815 "qualify as such or use &", 6816 GvENAME(hgv)); 6817 } 6818 } 6819 6820 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__ 6821 && (!anydelim || *s != '#')) { 6822 /* no override, and not s### either; skipspace is safe here 6823 * check for => on following line */ 6824 bool arrow; 6825 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr); 6826 STRLEN soff = s - SvPVX(PL_linestr); 6827 s = skipspace_flags(s, LEX_NO_INCLINE); 6828 arrow = *s == '=' && s[1] == '>'; 6829 PL_bufptr = SvPVX(PL_linestr) + bufoff; 6830 s = SvPVX(PL_linestr) + soff; 6831 if (arrow) 6832 goto fat_arrow; 6833 } 6834 6835 reserved_word: 6836 switch (tmp) { 6837 6838 default: /* not a keyword */ 6839 /* Trade off - by using this evil construction we can pull the 6840 variable gv into the block labelled keylookup. If not, then 6841 we have to give it function scope so that the goto from the 6842 earlier ':' case doesn't bypass the initialisation. */ 6843 if (0) { 6844 just_a_word_zero_gv: 6845 sv = NULL; 6846 cv = NULL; 6847 gv = NULL; 6848 gvp = NULL; 6849 rv2cv_op = NULL; 6850 orig_keyword = 0; 6851 lex = 0; 6852 off = 0; 6853 } 6854 just_a_word: { 6855 int pkgname = 0; 6856 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); 6857 bool safebw; 6858 6859 6860 /* Get the rest if it looks like a package qualifier */ 6861 6862 if (*s == '\'' || (*s == ':' && s[1] == ':')) { 6863 STRLEN morelen; 6864 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, 6865 TRUE, &morelen); 6866 if (!morelen) 6867 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s", 6868 UTF8fARG(UTF, len, PL_tokenbuf), 6869 *s == '\'' ? "'" : "::"); 6870 len += morelen; 6871 pkgname = 1; 6872 } 6873 6874 if (PL_expect == XOPERATOR) { 6875 if (PL_bufptr == PL_linestart) { 6876 CopLINE_dec(PL_curcop); 6877 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 6878 CopLINE_inc(PL_curcop); 6879 } 6880 else 6881 no_op("Bareword",s); 6882 } 6883 6884 /* See if the name is "Foo::", 6885 in which case Foo is a bareword 6886 (and a package name). */ 6887 6888 if (len > 2 6889 && PL_tokenbuf[len - 2] == ':' 6890 && PL_tokenbuf[len - 1] == ':') 6891 { 6892 if (ckWARN(WARN_BAREWORD) 6893 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV)) 6894 Perl_warner(aTHX_ packWARN(WARN_BAREWORD), 6895 "Bareword \"%"UTF8f"\" refers to nonexistent package", 6896 UTF8fARG(UTF, len, PL_tokenbuf)); 6897 len -= 2; 6898 PL_tokenbuf[len] = '\0'; 6899 gv = NULL; 6900 gvp = 0; 6901 safebw = TRUE; 6902 } 6903 else { 6904 safebw = FALSE; 6905 } 6906 6907 /* if we saw a global override before, get the right name */ 6908 6909 if (!sv) 6910 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, 6911 len); 6912 if (gvp) { 6913 SV * const tmp_sv = sv; 6914 sv = newSVpvs("CORE::GLOBAL::"); 6915 sv_catsv(sv, tmp_sv); 6916 SvREFCNT_dec(tmp_sv); 6917 } 6918 6919 6920 /* Presume this is going to be a bareword of some sort. */ 6921 CLINE; 6922 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 6923 pl_yylval.opval->op_private = OPpCONST_BARE; 6924 6925 /* And if "Foo::", then that's what it certainly is. */ 6926 if (safebw) 6927 goto safe_bareword; 6928 6929 if (!off) 6930 { 6931 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv)); 6932 const_op->op_private = OPpCONST_BARE; 6933 rv2cv_op = 6934 newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op); 6935 cv = lex 6936 ? isGV(gv) 6937 ? GvCV(gv) 6938 : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV 6939 ? (CV *)SvRV(gv) 6940 : ((CV *)gv) 6941 : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB); 6942 } 6943 6944 /* Use this var to track whether intuit_method has been 6945 called. intuit_method returns 0 or > 255. */ 6946 tmp = 1; 6947 6948 /* See if it's the indirect object for a list operator. */ 6949 6950 if (PL_oldoldbufptr 6951 && PL_oldoldbufptr < PL_bufptr 6952 && (PL_oldoldbufptr == PL_last_lop 6953 || PL_oldoldbufptr == PL_last_uni) 6954 && /* NO SKIPSPACE BEFORE HERE! */ 6955 (PL_expect == XREF 6956 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) 6957 == OA_FILEREF)) 6958 { 6959 bool immediate_paren = *s == '('; 6960 6961 /* (Now we can afford to cross potential line boundary.) */ 6962 s = skipspace(s); 6963 6964 /* Two barewords in a row may indicate method call. */ 6965 6966 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') 6967 && (tmp = intuit_method(s, lex ? NULL : sv, cv))) 6968 { 6969 goto method; 6970 } 6971 6972 /* If not a declared subroutine, it's an indirect object. */ 6973 /* (But it's an indir obj regardless for sort.) */ 6974 /* Also, if "_" follows a filetest operator, it's a bareword */ 6975 6976 if ( 6977 ( !immediate_paren && (PL_last_lop_op == OP_SORT 6978 || (!cv 6979 && (PL_last_lop_op != OP_MAPSTART 6980 && PL_last_lop_op != OP_GREPSTART)))) 6981 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0' 6982 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) 6983 == OA_FILESTATOP)) 6984 ) 6985 { 6986 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR; 6987 goto bareword; 6988 } 6989 } 6990 6991 PL_expect = XOPERATOR; 6992 s = skipspace(s); 6993 6994 /* Is this a word before a => operator? */ 6995 if (*s == '=' && s[1] == '>' && !pkgname) { 6996 op_free(rv2cv_op); 6997 CLINE; 6998 if (gvp || (lex && !off)) { 6999 assert (cSVOPx(pl_yylval.opval)->op_sv == sv); 7000 /* This is our own scalar, created a few lines 7001 above, so this is safe. */ 7002 SvREADONLY_off(sv); 7003 sv_setpv(sv, PL_tokenbuf); 7004 if (UTF && !IN_BYTES 7005 && is_utf8_string((U8*)PL_tokenbuf, len)) 7006 SvUTF8_on(sv); 7007 SvREADONLY_on(sv); 7008 } 7009 TERM(WORD); 7010 } 7011 7012 /* If followed by a paren, it's certainly a subroutine. */ 7013 if (*s == '(') { 7014 CLINE; 7015 if (cv) { 7016 d = s + 1; 7017 while (SPACE_OR_TAB(*d)) 7018 d++; 7019 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) { 7020 s = d + 1; 7021 goto its_constant; 7022 } 7023 } 7024 NEXTVAL_NEXTTOKE.opval = 7025 off ? rv2cv_op : pl_yylval.opval; 7026 if (off) 7027 op_free(pl_yylval.opval), force_next(PRIVATEREF); 7028 else op_free(rv2cv_op), force_next(WORD); 7029 pl_yylval.ival = 0; 7030 TOKEN('&'); 7031 } 7032 7033 /* If followed by var or block, call it a method (unless sub) */ 7034 7035 if ((*s == '$' || *s == '{') && !cv) { 7036 op_free(rv2cv_op); 7037 PL_last_lop = PL_oldbufptr; 7038 PL_last_lop_op = OP_METHOD; 7039 if (!PL_lex_allbrackets 7040 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7041 { 7042 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7043 } 7044 PL_expect = XBLOCKTERM; 7045 PL_bufptr = s; 7046 return REPORT(METHOD); 7047 } 7048 7049 /* If followed by a bareword, see if it looks like indir obj. */ 7050 7051 if (tmp == 1 && !orig_keyword 7052 && (isIDFIRST_lazy_if(s,UTF) || *s == '$') 7053 && (tmp = intuit_method(s, lex ? NULL : sv, cv))) { 7054 method: 7055 if (lex && !off) { 7056 assert(cSVOPx(pl_yylval.opval)->op_sv == sv); 7057 SvREADONLY_off(sv); 7058 sv_setpvn(sv, PL_tokenbuf, len); 7059 if (UTF && !IN_BYTES 7060 && is_utf8_string((U8*)PL_tokenbuf, len)) 7061 SvUTF8_on (sv); 7062 else SvUTF8_off(sv); 7063 } 7064 op_free(rv2cv_op); 7065 if (tmp == METHOD && !PL_lex_allbrackets 7066 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7067 { 7068 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7069 } 7070 return REPORT(tmp); 7071 } 7072 7073 /* Not a method, so call it a subroutine (if defined) */ 7074 7075 if (cv) { 7076 /* Check for a constant sub */ 7077 if ((sv = cv_const_sv_or_av(cv))) { 7078 its_constant: 7079 op_free(rv2cv_op); 7080 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); 7081 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); 7082 if (SvTYPE(sv) == SVt_PVAV) 7083 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS, 7084 pl_yylval.opval); 7085 else { 7086 pl_yylval.opval->op_private = 0; 7087 pl_yylval.opval->op_folded = 1; 7088 pl_yylval.opval->op_flags |= OPf_SPECIAL; 7089 } 7090 TOKEN(WORD); 7091 } 7092 7093 op_free(pl_yylval.opval); 7094 pl_yylval.opval = 7095 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op; 7096 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; 7097 PL_last_lop = PL_oldbufptr; 7098 PL_last_lop_op = OP_ENTERSUB; 7099 /* Is there a prototype? */ 7100 if ( 7101 SvPOK(cv)) 7102 { 7103 STRLEN protolen = CvPROTOLEN(cv); 7104 const char *proto = CvPROTO(cv); 7105 bool optional; 7106 proto = S_strip_spaces(aTHX_ proto, &protolen); 7107 if (!protolen) 7108 TERM(FUNC0SUB); 7109 if ((optional = *proto == ';')) 7110 do 7111 proto++; 7112 while (*proto == ';'); 7113 if ( 7114 ( 7115 ( 7116 *proto == '$' || *proto == '_' 7117 || *proto == '*' || *proto == '+' 7118 ) 7119 && proto[1] == '\0' 7120 ) 7121 || ( 7122 *proto == '\\' && proto[1] && proto[2] == '\0' 7123 ) 7124 ) 7125 UNIPROTO(UNIOPSUB,optional); 7126 if (*proto == '\\' && proto[1] == '[') { 7127 const char *p = proto + 2; 7128 while(*p && *p != ']') 7129 ++p; 7130 if(*p == ']' && !p[1]) 7131 UNIPROTO(UNIOPSUB,optional); 7132 } 7133 if (*proto == '&' && *s == '{') { 7134 if (PL_curstash) 7135 sv_setpvs(PL_subname, "__ANON__"); 7136 else 7137 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 7138 if (!PL_lex_allbrackets 7139 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7140 { 7141 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7142 } 7143 PREBLOCK(LSTOPSUB); 7144 } 7145 } 7146 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 7147 PL_expect = XTERM; 7148 force_next(off ? PRIVATEREF : WORD); 7149 if (!PL_lex_allbrackets 7150 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7151 { 7152 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7153 } 7154 TOKEN(NOAMP); 7155 } 7156 7157 /* Call it a bare word */ 7158 7159 if (PL_hints & HINT_STRICT_SUBS) 7160 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7161 else { 7162 bareword: 7163 /* after "print" and similar functions (corresponding to 7164 * "F? L" in opcode.pl), whatever wasn't already parsed as 7165 * a filehandle should be subject to "strict subs". 7166 * Likewise for the optional indirect-object argument to system 7167 * or exec, which can't be a bareword */ 7168 if ((PL_last_lop_op == OP_PRINT 7169 || PL_last_lop_op == OP_PRTF 7170 || PL_last_lop_op == OP_SAY 7171 || PL_last_lop_op == OP_SYSTEM 7172 || PL_last_lop_op == OP_EXEC) 7173 && (PL_hints & HINT_STRICT_SUBS)) 7174 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7175 if (lastchar != '-') { 7176 if (ckWARN(WARN_RESERVED)) { 7177 d = PL_tokenbuf; 7178 while (isLOWER(*d)) 7179 d++; 7180 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) 7181 { 7182 /* PL_warn_reserved is constant */ 7183 GCC_DIAG_IGNORE(-Wformat-nonliteral); 7184 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved, 7185 PL_tokenbuf); 7186 GCC_DIAG_RESTORE; 7187 } 7188 } 7189 } 7190 } 7191 op_free(rv2cv_op); 7192 7193 safe_bareword: 7194 if ((lastchar == '*' || lastchar == '%' || lastchar == '&') 7195 && saw_infix_sigil) { 7196 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7197 "Operator or semicolon missing before %c%"UTF8f, 7198 lastchar, 7199 UTF8fARG(UTF, strlen(PL_tokenbuf), 7200 PL_tokenbuf)); 7201 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7202 "Ambiguous use of %c resolved as operator %c", 7203 lastchar, lastchar); 7204 } 7205 TOKEN(WORD); 7206 } 7207 7208 case KEY___FILE__: 7209 FUN0OP( 7210 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) 7211 ); 7212 7213 case KEY___LINE__: 7214 FUN0OP( 7215 (OP*)newSVOP(OP_CONST, 0, 7216 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop))) 7217 ); 7218 7219 case KEY___PACKAGE__: 7220 FUN0OP( 7221 (OP*)newSVOP(OP_CONST, 0, 7222 (PL_curstash 7223 ? newSVhek(HvNAME_HEK(PL_curstash)) 7224 : &PL_sv_undef)) 7225 ); 7226 7227 case KEY___DATA__: 7228 case KEY___END__: { 7229 GV *gv; 7230 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) { 7231 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash 7232 ? PL_curstash 7233 : PL_defstash; 7234 gv = (GV *)*hv_fetchs(stash, "DATA", 1); 7235 if (!isGV(gv)) 7236 gv_init(gv,stash,"DATA",4,0); 7237 GvMULTI_on(gv); 7238 if (!GvIO(gv)) 7239 GvIOp(gv) = newIO(); 7240 IoIFP(GvIOp(gv)) = PL_rsfp; 7241 #if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC) 7242 { 7243 const int fd = PerlIO_fileno(PL_rsfp); 7244 if (fd >= 3) { 7245 fcntl(fd,F_SETFD, FD_CLOEXEC); 7246 } 7247 } 7248 #endif 7249 /* Mark this internal pseudo-handle as clean */ 7250 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT; 7251 if ((PerlIO*)PL_rsfp == PerlIO_stdin()) 7252 IoTYPE(GvIOp(gv)) = IoTYPE_STD; 7253 else 7254 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY; 7255 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS) 7256 /* if the script was opened in binmode, we need to revert 7257 * it to text mode for compatibility; but only iff it has CRs 7258 * XXX this is a questionable hack at best. */ 7259 if (PL_bufend-PL_bufptr > 2 7260 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r') 7261 { 7262 Off_t loc = 0; 7263 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) { 7264 loc = PerlIO_tell(PL_rsfp); 7265 (void)PerlIO_seek(PL_rsfp, 0L, 0); 7266 } 7267 #ifdef NETWARE 7268 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) { 7269 #else 7270 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) { 7271 #endif /* NETWARE */ 7272 if (loc > 0) 7273 PerlIO_seek(PL_rsfp, loc, 0); 7274 } 7275 } 7276 #endif 7277 #ifdef PERLIO_LAYERS 7278 if (!IN_BYTES) { 7279 if (UTF) 7280 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); 7281 else if (IN_ENCODING) { 7282 SV *name; 7283 dSP; 7284 ENTER; 7285 SAVETMPS; 7286 PUSHMARK(sp); 7287 XPUSHs(_get_encoding()); 7288 PUTBACK; 7289 call_method("name", G_SCALAR); 7290 SPAGAIN; 7291 name = POPs; 7292 PUTBACK; 7293 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, 7294 Perl_form(aTHX_ ":encoding(%"SVf")", 7295 SVfARG(name))); 7296 FREETMPS; 7297 LEAVE; 7298 } 7299 } 7300 #endif 7301 PL_rsfp = NULL; 7302 } 7303 goto fake_eof; 7304 } 7305 7306 case KEY___SUB__: 7307 FUN0OP(CvCLONE(PL_compcv) 7308 ? newOP(OP_RUNCV, 0) 7309 : newPVOP(OP_RUNCV,0,NULL)); 7310 7311 case KEY_AUTOLOAD: 7312 case KEY_DESTROY: 7313 case KEY_BEGIN: 7314 case KEY_UNITCHECK: 7315 case KEY_CHECK: 7316 case KEY_INIT: 7317 case KEY_END: 7318 if (PL_expect == XSTATE) { 7319 s = PL_bufptr; 7320 goto really_sub; 7321 } 7322 goto just_a_word; 7323 7324 case_KEY_CORE: 7325 { 7326 STRLEN olen = len; 7327 d = s; 7328 s += 2; 7329 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 7330 if ((*s == ':' && s[1] == ':') 7331 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\'')) 7332 { 7333 s = d; 7334 len = olen; 7335 Copy(PL_bufptr, PL_tokenbuf, olen, char); 7336 goto just_a_word; 7337 } 7338 if (!tmp) 7339 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword", 7340 UTF8fARG(UTF, len, PL_tokenbuf)); 7341 if (tmp < 0) 7342 tmp = -tmp; 7343 else if (tmp == KEY_require || tmp == KEY_do 7344 || tmp == KEY_glob) 7345 /* that's a way to remember we saw "CORE::" */ 7346 orig_keyword = tmp; 7347 goto reserved_word; 7348 } 7349 7350 case KEY_abs: 7351 UNI(OP_ABS); 7352 7353 case KEY_alarm: 7354 UNI(OP_ALARM); 7355 7356 case KEY_accept: 7357 LOP(OP_ACCEPT,XTERM); 7358 7359 case KEY_and: 7360 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 7361 return REPORT(0); 7362 OPERATOR(ANDOP); 7363 7364 case KEY_atan2: 7365 LOP(OP_ATAN2,XTERM); 7366 7367 case KEY_bind: 7368 LOP(OP_BIND,XTERM); 7369 7370 case KEY_binmode: 7371 LOP(OP_BINMODE,XTERM); 7372 7373 case KEY_bless: 7374 LOP(OP_BLESS,XTERM); 7375 7376 case KEY_break: 7377 FUN0(OP_BREAK); 7378 7379 case KEY_chop: 7380 UNI(OP_CHOP); 7381 7382 case KEY_continue: 7383 /* We have to disambiguate the two senses of 7384 "continue". If the next token is a '{' then 7385 treat it as the start of a continue block; 7386 otherwise treat it as a control operator. 7387 */ 7388 s = skipspace(s); 7389 if (*s == '{') 7390 PREBLOCK(CONTINUE); 7391 else 7392 FUN0(OP_CONTINUE); 7393 7394 case KEY_chdir: 7395 /* may use HOME */ 7396 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV); 7397 UNI(OP_CHDIR); 7398 7399 case KEY_close: 7400 UNI(OP_CLOSE); 7401 7402 case KEY_closedir: 7403 UNI(OP_CLOSEDIR); 7404 7405 case KEY_cmp: 7406 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7407 return REPORT(0); 7408 Eop(OP_SCMP); 7409 7410 case KEY_caller: 7411 UNI(OP_CALLER); 7412 7413 case KEY_crypt: 7414 #ifdef FCRYPT 7415 if (!PL_cryptseen) { 7416 PL_cryptseen = TRUE; 7417 init_des(); 7418 } 7419 #endif 7420 LOP(OP_CRYPT,XTERM); 7421 7422 case KEY_chmod: 7423 LOP(OP_CHMOD,XTERM); 7424 7425 case KEY_chown: 7426 LOP(OP_CHOWN,XTERM); 7427 7428 case KEY_connect: 7429 LOP(OP_CONNECT,XTERM); 7430 7431 case KEY_chr: 7432 UNI(OP_CHR); 7433 7434 case KEY_cos: 7435 UNI(OP_COS); 7436 7437 case KEY_chroot: 7438 UNI(OP_CHROOT); 7439 7440 case KEY_default: 7441 PREBLOCK(DEFAULT); 7442 7443 case KEY_do: 7444 s = skipspace(s); 7445 if (*s == '{') 7446 PRETERMBLOCK(DO); 7447 if (*s != '\'') { 7448 *PL_tokenbuf = '&'; 7449 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 7450 1, &len); 7451 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE")) 7452 && !keyword(PL_tokenbuf + 1, len, 0)) { 7453 d = skipspace(d); 7454 if (*d == '(') { 7455 force_ident_maybe_lex('&'); 7456 s = d; 7457 } 7458 } 7459 } 7460 if (orig_keyword == KEY_do) { 7461 orig_keyword = 0; 7462 pl_yylval.ival = 1; 7463 } 7464 else 7465 pl_yylval.ival = 0; 7466 OPERATOR(DO); 7467 7468 case KEY_die: 7469 PL_hints |= HINT_BLOCK_SCOPE; 7470 LOP(OP_DIE,XTERM); 7471 7472 case KEY_defined: 7473 UNI(OP_DEFINED); 7474 7475 case KEY_delete: 7476 UNI(OP_DELETE); 7477 7478 case KEY_dbmopen: 7479 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"), 7480 STR_WITH_LEN("NDBM_File::"), 7481 STR_WITH_LEN("DB_File::"), 7482 STR_WITH_LEN("GDBM_File::"), 7483 STR_WITH_LEN("SDBM_File::"), 7484 STR_WITH_LEN("ODBM_File::"), 7485 NULL); 7486 LOP(OP_DBMOPEN,XTERM); 7487 7488 case KEY_dbmclose: 7489 UNI(OP_DBMCLOSE); 7490 7491 case KEY_dump: 7492 LOOPX(OP_DUMP); 7493 7494 case KEY_else: 7495 PREBLOCK(ELSE); 7496 7497 case KEY_elsif: 7498 pl_yylval.ival = CopLINE(PL_curcop); 7499 OPERATOR(ELSIF); 7500 7501 case KEY_eq: 7502 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7503 return REPORT(0); 7504 Eop(OP_SEQ); 7505 7506 case KEY_exists: 7507 UNI(OP_EXISTS); 7508 7509 case KEY_exit: 7510 UNI(OP_EXIT); 7511 7512 case KEY_eval: 7513 s = skipspace(s); 7514 if (*s == '{') { /* block eval */ 7515 PL_expect = XTERMBLOCK; 7516 UNIBRACK(OP_ENTERTRY); 7517 } 7518 else { /* string eval */ 7519 PL_expect = XTERM; 7520 UNIBRACK(OP_ENTEREVAL); 7521 } 7522 7523 case KEY_evalbytes: 7524 PL_expect = XTERM; 7525 UNIBRACK(-OP_ENTEREVAL); 7526 7527 case KEY_eof: 7528 UNI(OP_EOF); 7529 7530 case KEY_exp: 7531 UNI(OP_EXP); 7532 7533 case KEY_each: 7534 UNI(OP_EACH); 7535 7536 case KEY_exec: 7537 LOP(OP_EXEC,XREF); 7538 7539 case KEY_endhostent: 7540 FUN0(OP_EHOSTENT); 7541 7542 case KEY_endnetent: 7543 FUN0(OP_ENETENT); 7544 7545 case KEY_endservent: 7546 FUN0(OP_ESERVENT); 7547 7548 case KEY_endprotoent: 7549 FUN0(OP_EPROTOENT); 7550 7551 case KEY_endpwent: 7552 FUN0(OP_EPWENT); 7553 7554 case KEY_endgrent: 7555 FUN0(OP_EGRENT); 7556 7557 case KEY_for: 7558 case KEY_foreach: 7559 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 7560 return REPORT(0); 7561 pl_yylval.ival = CopLINE(PL_curcop); 7562 s = skipspace(s); 7563 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) { 7564 char *p = s; 7565 7566 if ((PL_bufend - p) >= 3 7567 && strnEQ(p, "my", 2) && isSPACE(*(p + 2))) 7568 { 7569 p += 2; 7570 } 7571 else if ((PL_bufend - p) >= 4 7572 && strnEQ(p, "our", 3) && isSPACE(*(p + 3))) 7573 p += 3; 7574 p = skipspace(p); 7575 /* skip optional package name, as in "for my abc $x (..)" */ 7576 if (isIDFIRST_lazy_if(p,UTF)) { 7577 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); 7578 p = skipspace(p); 7579 } 7580 if (*p != '$') 7581 Perl_croak(aTHX_ "Missing $ on loop variable"); 7582 } 7583 OPERATOR(FOR); 7584 7585 case KEY_formline: 7586 LOP(OP_FORMLINE,XTERM); 7587 7588 case KEY_fork: 7589 FUN0(OP_FORK); 7590 7591 case KEY_fc: 7592 UNI(OP_FC); 7593 7594 case KEY_fcntl: 7595 LOP(OP_FCNTL,XTERM); 7596 7597 case KEY_fileno: 7598 UNI(OP_FILENO); 7599 7600 case KEY_flock: 7601 LOP(OP_FLOCK,XTERM); 7602 7603 case KEY_gt: 7604 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7605 return REPORT(0); 7606 Rop(OP_SGT); 7607 7608 case KEY_ge: 7609 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7610 return REPORT(0); 7611 Rop(OP_SGE); 7612 7613 case KEY_grep: 7614 LOP(OP_GREPSTART, XREF); 7615 7616 case KEY_goto: 7617 LOOPX(OP_GOTO); 7618 7619 case KEY_gmtime: 7620 UNI(OP_GMTIME); 7621 7622 case KEY_getc: 7623 UNIDOR(OP_GETC); 7624 7625 case KEY_getppid: 7626 FUN0(OP_GETPPID); 7627 7628 case KEY_getpgrp: 7629 UNI(OP_GETPGRP); 7630 7631 case KEY_getpriority: 7632 LOP(OP_GETPRIORITY,XTERM); 7633 7634 case KEY_getprotobyname: 7635 UNI(OP_GPBYNAME); 7636 7637 case KEY_getprotobynumber: 7638 LOP(OP_GPBYNUMBER,XTERM); 7639 7640 case KEY_getprotoent: 7641 FUN0(OP_GPROTOENT); 7642 7643 case KEY_getpwent: 7644 FUN0(OP_GPWENT); 7645 7646 case KEY_getpwnam: 7647 UNI(OP_GPWNAM); 7648 7649 case KEY_getpwuid: 7650 UNI(OP_GPWUID); 7651 7652 case KEY_getpeername: 7653 UNI(OP_GETPEERNAME); 7654 7655 case KEY_gethostbyname: 7656 UNI(OP_GHBYNAME); 7657 7658 case KEY_gethostbyaddr: 7659 LOP(OP_GHBYADDR,XTERM); 7660 7661 case KEY_gethostent: 7662 FUN0(OP_GHOSTENT); 7663 7664 case KEY_getnetbyname: 7665 UNI(OP_GNBYNAME); 7666 7667 case KEY_getnetbyaddr: 7668 LOP(OP_GNBYADDR,XTERM); 7669 7670 case KEY_getnetent: 7671 FUN0(OP_GNETENT); 7672 7673 case KEY_getservbyname: 7674 LOP(OP_GSBYNAME,XTERM); 7675 7676 case KEY_getservbyport: 7677 LOP(OP_GSBYPORT,XTERM); 7678 7679 case KEY_getservent: 7680 FUN0(OP_GSERVENT); 7681 7682 case KEY_getsockname: 7683 UNI(OP_GETSOCKNAME); 7684 7685 case KEY_getsockopt: 7686 LOP(OP_GSOCKOPT,XTERM); 7687 7688 case KEY_getgrent: 7689 FUN0(OP_GGRENT); 7690 7691 case KEY_getgrnam: 7692 UNI(OP_GGRNAM); 7693 7694 case KEY_getgrgid: 7695 UNI(OP_GGRGID); 7696 7697 case KEY_getlogin: 7698 FUN0(OP_GETLOGIN); 7699 7700 case KEY_given: 7701 pl_yylval.ival = CopLINE(PL_curcop); 7702 Perl_ck_warner_d(aTHX_ 7703 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 7704 "given is experimental"); 7705 OPERATOR(GIVEN); 7706 7707 case KEY_glob: 7708 LOP( 7709 orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, 7710 XTERM 7711 ); 7712 7713 case KEY_hex: 7714 UNI(OP_HEX); 7715 7716 case KEY_if: 7717 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 7718 return REPORT(0); 7719 pl_yylval.ival = CopLINE(PL_curcop); 7720 OPERATOR(IF); 7721 7722 case KEY_index: 7723 LOP(OP_INDEX,XTERM); 7724 7725 case KEY_int: 7726 UNI(OP_INT); 7727 7728 case KEY_ioctl: 7729 LOP(OP_IOCTL,XTERM); 7730 7731 case KEY_join: 7732 LOP(OP_JOIN,XTERM); 7733 7734 case KEY_keys: 7735 UNI(OP_KEYS); 7736 7737 case KEY_kill: 7738 LOP(OP_KILL,XTERM); 7739 7740 case KEY_last: 7741 LOOPX(OP_LAST); 7742 7743 case KEY_lc: 7744 UNI(OP_LC); 7745 7746 case KEY_lcfirst: 7747 UNI(OP_LCFIRST); 7748 7749 case KEY_local: 7750 pl_yylval.ival = 0; 7751 OPERATOR(LOCAL); 7752 7753 case KEY_length: 7754 UNI(OP_LENGTH); 7755 7756 case KEY_lt: 7757 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7758 return REPORT(0); 7759 Rop(OP_SLT); 7760 7761 case KEY_le: 7762 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7763 return REPORT(0); 7764 Rop(OP_SLE); 7765 7766 case KEY_localtime: 7767 UNI(OP_LOCALTIME); 7768 7769 case KEY_log: 7770 UNI(OP_LOG); 7771 7772 case KEY_link: 7773 LOP(OP_LINK,XTERM); 7774 7775 case KEY_listen: 7776 LOP(OP_LISTEN,XTERM); 7777 7778 case KEY_lock: 7779 UNI(OP_LOCK); 7780 7781 case KEY_lstat: 7782 UNI(OP_LSTAT); 7783 7784 case KEY_m: 7785 s = scan_pat(s,OP_MATCH); 7786 TERM(sublex_start()); 7787 7788 case KEY_map: 7789 LOP(OP_MAPSTART, XREF); 7790 7791 case KEY_mkdir: 7792 LOP(OP_MKDIR,XTERM); 7793 7794 case KEY_msgctl: 7795 LOP(OP_MSGCTL,XTERM); 7796 7797 case KEY_msgget: 7798 LOP(OP_MSGGET,XTERM); 7799 7800 case KEY_msgrcv: 7801 LOP(OP_MSGRCV,XTERM); 7802 7803 case KEY_msgsnd: 7804 LOP(OP_MSGSND,XTERM); 7805 7806 case KEY_our: 7807 case KEY_my: 7808 case KEY_state: 7809 if (PL_in_my) { 7810 yyerror(Perl_form(aTHX_ 7811 "Can't redeclare \"%s\" in \"%s\"", 7812 tmp == KEY_my ? "my" : 7813 tmp == KEY_state ? "state" : "our", 7814 PL_in_my == KEY_my ? "my" : 7815 PL_in_my == KEY_state ? "state" : "our")); 7816 } 7817 PL_in_my = (U16)tmp; 7818 s = skipspace(s); 7819 if (isIDFIRST_lazy_if(s,UTF)) { 7820 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); 7821 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3)) 7822 { 7823 if (!FEATURE_LEXSUBS_IS_ENABLED) 7824 Perl_croak(aTHX_ 7825 "Experimental \"%s\" subs not enabled", 7826 tmp == KEY_my ? "my" : 7827 tmp == KEY_state ? "state" : "our"); 7828 Perl_ck_warner_d(aTHX_ 7829 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS), 7830 "The lexical_subs feature is experimental"); 7831 goto really_sub; 7832 } 7833 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len); 7834 if (!PL_in_my_stash) { 7835 char tmpbuf[1024]; 7836 int len; 7837 PL_bufptr = s; 7838 len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf); 7839 PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf)); 7840 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0); 7841 } 7842 } 7843 pl_yylval.ival = 1; 7844 OPERATOR(MY); 7845 7846 case KEY_next: 7847 LOOPX(OP_NEXT); 7848 7849 case KEY_ne: 7850 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7851 return REPORT(0); 7852 Eop(OP_SNE); 7853 7854 case KEY_no: 7855 s = tokenize_use(0, s); 7856 TOKEN(USE); 7857 7858 case KEY_not: 7859 if (*s == '(' || (s = skipspace(s), *s == '(')) 7860 FUN1(OP_NOT); 7861 else { 7862 if (!PL_lex_allbrackets 7863 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7864 { 7865 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7866 } 7867 OPERATOR(NOTOP); 7868 } 7869 7870 case KEY_open: 7871 s = skipspace(s); 7872 if (isIDFIRST_lazy_if(s,UTF)) { 7873 const char *t; 7874 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, 7875 &len); 7876 for (t=d; isSPACE(*t);) 7877 t++; 7878 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) 7879 /* [perl #16184] */ 7880 && !(t[0] == '=' && t[1] == '>') 7881 && !(t[0] == ':' && t[1] == ':') 7882 && !keyword(s, d-s, 0) 7883 ) { 7884 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), 7885 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")", 7886 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s)); 7887 } 7888 } 7889 LOP(OP_OPEN,XTERM); 7890 7891 case KEY_or: 7892 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 7893 return REPORT(0); 7894 pl_yylval.ival = OP_OR; 7895 OPERATOR(OROP); 7896 7897 case KEY_ord: 7898 UNI(OP_ORD); 7899 7900 case KEY_oct: 7901 UNI(OP_OCT); 7902 7903 case KEY_opendir: 7904 LOP(OP_OPEN_DIR,XTERM); 7905 7906 case KEY_print: 7907 checkcomma(s,PL_tokenbuf,"filehandle"); 7908 LOP(OP_PRINT,XREF); 7909 7910 case KEY_printf: 7911 checkcomma(s,PL_tokenbuf,"filehandle"); 7912 LOP(OP_PRTF,XREF); 7913 7914 case KEY_prototype: 7915 UNI(OP_PROTOTYPE); 7916 7917 case KEY_push: 7918 LOP(OP_PUSH,XTERM); 7919 7920 case KEY_pop: 7921 UNIDOR(OP_POP); 7922 7923 case KEY_pos: 7924 UNIDOR(OP_POS); 7925 7926 case KEY_pack: 7927 LOP(OP_PACK,XTERM); 7928 7929 case KEY_package: 7930 s = force_word(s,WORD,FALSE,TRUE); 7931 s = skipspace(s); 7932 s = force_strict_version(s); 7933 PREBLOCK(PACKAGE); 7934 7935 case KEY_pipe: 7936 LOP(OP_PIPE_OP,XTERM); 7937 7938 case KEY_q: 7939 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 7940 if (!s) 7941 missingterm(NULL); 7942 COPLINE_SET_FROM_MULTI_END; 7943 pl_yylval.ival = OP_CONST; 7944 TERM(sublex_start()); 7945 7946 case KEY_quotemeta: 7947 UNI(OP_QUOTEMETA); 7948 7949 case KEY_qw: { 7950 OP *words = NULL; 7951 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 7952 if (!s) 7953 missingterm(NULL); 7954 COPLINE_SET_FROM_MULTI_END; 7955 PL_expect = XOPERATOR; 7956 if (SvCUR(PL_lex_stuff)) { 7957 int warned_comma = !ckWARN(WARN_QW); 7958 int warned_comment = warned_comma; 7959 d = SvPV_force(PL_lex_stuff, len); 7960 while (len) { 7961 for (; isSPACE(*d) && len; --len, ++d) 7962 /**/; 7963 if (len) { 7964 SV *sv; 7965 const char *b = d; 7966 if (!warned_comma || !warned_comment) { 7967 for (; !isSPACE(*d) && len; --len, ++d) { 7968 if (!warned_comma && *d == ',') { 7969 Perl_warner(aTHX_ packWARN(WARN_QW), 7970 "Possible attempt to separate words with commas"); 7971 ++warned_comma; 7972 } 7973 else if (!warned_comment && *d == '#') { 7974 Perl_warner(aTHX_ packWARN(WARN_QW), 7975 "Possible attempt to put comments in qw() list"); 7976 ++warned_comment; 7977 } 7978 } 7979 } 7980 else { 7981 for (; !isSPACE(*d) && len; --len, ++d) 7982 /**/; 7983 } 7984 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff)); 7985 words = op_append_elem(OP_LIST, words, 7986 newSVOP(OP_CONST, 0, tokeq(sv))); 7987 } 7988 } 7989 } 7990 if (!words) 7991 words = newNULLLIST(); 7992 SvREFCNT_dec_NN(PL_lex_stuff); 7993 PL_lex_stuff = NULL; 7994 PL_expect = XOPERATOR; 7995 pl_yylval.opval = sawparens(words); 7996 TOKEN(QWLIST); 7997 } 7998 7999 case KEY_qq: 8000 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8001 if (!s) 8002 missingterm(NULL); 8003 pl_yylval.ival = OP_STRINGIFY; 8004 if (SvIVX(PL_lex_stuff) == '\'') 8005 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */ 8006 TERM(sublex_start()); 8007 8008 case KEY_qr: 8009 s = scan_pat(s,OP_QR); 8010 TERM(sublex_start()); 8011 8012 case KEY_qx: 8013 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8014 if (!s) 8015 missingterm(NULL); 8016 pl_yylval.ival = OP_BACKTICK; 8017 TERM(sublex_start()); 8018 8019 case KEY_return: 8020 OLDLOP(OP_RETURN); 8021 8022 case KEY_require: 8023 s = skipspace(s); 8024 if (isDIGIT(*s)) { 8025 s = force_version(s, FALSE); 8026 } 8027 else if (*s != 'v' || !isDIGIT(s[1]) 8028 || (s = force_version(s, TRUE), *s == 'v')) 8029 { 8030 *PL_tokenbuf = '\0'; 8031 s = force_word(s,WORD,TRUE,TRUE); 8032 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF)) 8033 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), 8034 GV_ADD | (UTF ? SVf_UTF8 : 0)); 8035 else if (*s == '<') 8036 yyerror("<> at require-statement should be quotes"); 8037 } 8038 if (orig_keyword == KEY_require) { 8039 orig_keyword = 0; 8040 pl_yylval.ival = 1; 8041 } 8042 else 8043 pl_yylval.ival = 0; 8044 PL_expect = PL_nexttoke ? XOPERATOR : XTERM; 8045 PL_bufptr = s; 8046 PL_last_uni = PL_oldbufptr; 8047 PL_last_lop_op = OP_REQUIRE; 8048 s = skipspace(s); 8049 return REPORT( (int)REQUIRE ); 8050 8051 case KEY_reset: 8052 UNI(OP_RESET); 8053 8054 case KEY_redo: 8055 LOOPX(OP_REDO); 8056 8057 case KEY_rename: 8058 LOP(OP_RENAME,XTERM); 8059 8060 case KEY_rand: 8061 UNI(OP_RAND); 8062 8063 case KEY_rmdir: 8064 UNI(OP_RMDIR); 8065 8066 case KEY_rindex: 8067 LOP(OP_RINDEX,XTERM); 8068 8069 case KEY_read: 8070 LOP(OP_READ,XTERM); 8071 8072 case KEY_readdir: 8073 UNI(OP_READDIR); 8074 8075 case KEY_readline: 8076 UNIDOR(OP_READLINE); 8077 8078 case KEY_readpipe: 8079 UNIDOR(OP_BACKTICK); 8080 8081 case KEY_rewinddir: 8082 UNI(OP_REWINDDIR); 8083 8084 case KEY_recv: 8085 LOP(OP_RECV,XTERM); 8086 8087 case KEY_reverse: 8088 LOP(OP_REVERSE,XTERM); 8089 8090 case KEY_readlink: 8091 UNIDOR(OP_READLINK); 8092 8093 case KEY_ref: 8094 UNI(OP_REF); 8095 8096 case KEY_s: 8097 s = scan_subst(s); 8098 if (pl_yylval.opval) 8099 TERM(sublex_start()); 8100 else 8101 TOKEN(1); /* force error */ 8102 8103 case KEY_say: 8104 checkcomma(s,PL_tokenbuf,"filehandle"); 8105 LOP(OP_SAY,XREF); 8106 8107 case KEY_chomp: 8108 UNI(OP_CHOMP); 8109 8110 case KEY_scalar: 8111 UNI(OP_SCALAR); 8112 8113 case KEY_select: 8114 LOP(OP_SELECT,XTERM); 8115 8116 case KEY_seek: 8117 LOP(OP_SEEK,XTERM); 8118 8119 case KEY_semctl: 8120 LOP(OP_SEMCTL,XTERM); 8121 8122 case KEY_semget: 8123 LOP(OP_SEMGET,XTERM); 8124 8125 case KEY_semop: 8126 LOP(OP_SEMOP,XTERM); 8127 8128 case KEY_send: 8129 LOP(OP_SEND,XTERM); 8130 8131 case KEY_setpgrp: 8132 LOP(OP_SETPGRP,XTERM); 8133 8134 case KEY_setpriority: 8135 LOP(OP_SETPRIORITY,XTERM); 8136 8137 case KEY_sethostent: 8138 UNI(OP_SHOSTENT); 8139 8140 case KEY_setnetent: 8141 UNI(OP_SNETENT); 8142 8143 case KEY_setservent: 8144 UNI(OP_SSERVENT); 8145 8146 case KEY_setprotoent: 8147 UNI(OP_SPROTOENT); 8148 8149 case KEY_setpwent: 8150 FUN0(OP_SPWENT); 8151 8152 case KEY_setgrent: 8153 FUN0(OP_SGRENT); 8154 8155 case KEY_seekdir: 8156 LOP(OP_SEEKDIR,XTERM); 8157 8158 case KEY_setsockopt: 8159 LOP(OP_SSOCKOPT,XTERM); 8160 8161 case KEY_shift: 8162 UNIDOR(OP_SHIFT); 8163 8164 case KEY_shmctl: 8165 LOP(OP_SHMCTL,XTERM); 8166 8167 case KEY_shmget: 8168 LOP(OP_SHMGET,XTERM); 8169 8170 case KEY_shmread: 8171 LOP(OP_SHMREAD,XTERM); 8172 8173 case KEY_shmwrite: 8174 LOP(OP_SHMWRITE,XTERM); 8175 8176 case KEY_shutdown: 8177 LOP(OP_SHUTDOWN,XTERM); 8178 8179 case KEY_sin: 8180 UNI(OP_SIN); 8181 8182 case KEY_sleep: 8183 UNI(OP_SLEEP); 8184 8185 case KEY_socket: 8186 LOP(OP_SOCKET,XTERM); 8187 8188 case KEY_socketpair: 8189 LOP(OP_SOCKPAIR,XTERM); 8190 8191 case KEY_sort: 8192 checkcomma(s,PL_tokenbuf,"subroutine name"); 8193 s = skipspace(s); 8194 PL_expect = XTERM; 8195 s = force_word(s,WORD,TRUE,TRUE); 8196 LOP(OP_SORT,XREF); 8197 8198 case KEY_split: 8199 LOP(OP_SPLIT,XTERM); 8200 8201 case KEY_sprintf: 8202 LOP(OP_SPRINTF,XTERM); 8203 8204 case KEY_splice: 8205 LOP(OP_SPLICE,XTERM); 8206 8207 case KEY_sqrt: 8208 UNI(OP_SQRT); 8209 8210 case KEY_srand: 8211 UNI(OP_SRAND); 8212 8213 case KEY_stat: 8214 UNI(OP_STAT); 8215 8216 case KEY_study: 8217 UNI(OP_STUDY); 8218 8219 case KEY_substr: 8220 LOP(OP_SUBSTR,XTERM); 8221 8222 case KEY_format: 8223 case KEY_sub: 8224 really_sub: 8225 { 8226 char * const tmpbuf = PL_tokenbuf + 1; 8227 expectation attrful; 8228 bool have_name, have_proto; 8229 const int key = tmp; 8230 SV *format_name = NULL; 8231 8232 d = s; 8233 s = skipspace(s); 8234 8235 if (isIDFIRST_lazy_if(s,UTF) 8236 || *s == '\'' 8237 || (*s == ':' && s[1] == ':')) 8238 { 8239 8240 PL_expect = XBLOCK; 8241 attrful = XATTRBLOCK; 8242 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE, 8243 &len); 8244 if (key == KEY_format) 8245 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s); 8246 *PL_tokenbuf = '&'; 8247 if (memchr(tmpbuf, ':', len) || key != KEY_sub 8248 || pad_findmy_pvn( 8249 PL_tokenbuf, len + 1, 0 8250 ) != NOT_IN_PAD) 8251 sv_setpvn(PL_subname, tmpbuf, len); 8252 else { 8253 sv_setsv(PL_subname,PL_curstname); 8254 sv_catpvs(PL_subname,"::"); 8255 sv_catpvn(PL_subname,tmpbuf,len); 8256 } 8257 if (SvUTF8(PL_linestr)) 8258 SvUTF8_on(PL_subname); 8259 have_name = TRUE; 8260 8261 8262 s = skipspace(d); 8263 } 8264 else { 8265 if (key == KEY_my || key == KEY_our || key==KEY_state) 8266 { 8267 *d = '\0'; 8268 /* diag_listed_as: Missing name in "%s sub" */ 8269 Perl_croak(aTHX_ 8270 "Missing name in \"%s\"", PL_bufptr); 8271 } 8272 PL_expect = XTERMBLOCK; 8273 attrful = XATTRTERM; 8274 sv_setpvs(PL_subname,"?"); 8275 have_name = FALSE; 8276 } 8277 8278 if (key == KEY_format) { 8279 if (format_name) { 8280 NEXTVAL_NEXTTOKE.opval 8281 = (OP*)newSVOP(OP_CONST,0, format_name); 8282 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 8283 force_next(WORD); 8284 } 8285 PREBLOCK(FORMAT); 8286 } 8287 8288 /* Look for a prototype */ 8289 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) { 8290 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8291 COPLINE_SET_FROM_MULTI_END; 8292 if (!s) 8293 Perl_croak(aTHX_ "Prototype not terminated"); 8294 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO)); 8295 have_proto = TRUE; 8296 8297 s = skipspace(s); 8298 } 8299 else 8300 have_proto = FALSE; 8301 8302 if (*s == ':' && s[1] != ':') 8303 PL_expect = attrful; 8304 else if ((*s != '{' && *s != '(') && key != KEY_format) { 8305 assert(key == KEY_sub || key == KEY_AUTOLOAD || 8306 key == KEY_DESTROY || key == KEY_BEGIN || 8307 key == KEY_UNITCHECK || key == KEY_CHECK || 8308 key == KEY_INIT || key == KEY_END || 8309 key == KEY_my || key == KEY_state || 8310 key == KEY_our); 8311 if (!have_name) 8312 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine"); 8313 else if (*s != ';' && *s != '}') 8314 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname)); 8315 } 8316 8317 if (have_proto) { 8318 NEXTVAL_NEXTTOKE.opval = 8319 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff); 8320 PL_lex_stuff = NULL; 8321 force_next(THING); 8322 } 8323 if (!have_name) { 8324 if (PL_curstash) 8325 sv_setpvs(PL_subname, "__ANON__"); 8326 else 8327 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 8328 TOKEN(ANONSUB); 8329 } 8330 force_ident_maybe_lex('&'); 8331 TOKEN(SUB); 8332 } 8333 8334 case KEY_system: 8335 LOP(OP_SYSTEM,XREF); 8336 8337 case KEY_symlink: 8338 LOP(OP_SYMLINK,XTERM); 8339 8340 case KEY_syscall: 8341 LOP(OP_SYSCALL,XTERM); 8342 8343 case KEY_sysopen: 8344 LOP(OP_SYSOPEN,XTERM); 8345 8346 case KEY_sysseek: 8347 LOP(OP_SYSSEEK,XTERM); 8348 8349 case KEY_sysread: 8350 LOP(OP_SYSREAD,XTERM); 8351 8352 case KEY_syswrite: 8353 LOP(OP_SYSWRITE,XTERM); 8354 8355 case KEY_tr: 8356 case KEY_y: 8357 s = scan_trans(s); 8358 TERM(sublex_start()); 8359 8360 case KEY_tell: 8361 UNI(OP_TELL); 8362 8363 case KEY_telldir: 8364 UNI(OP_TELLDIR); 8365 8366 case KEY_tie: 8367 LOP(OP_TIE,XTERM); 8368 8369 case KEY_tied: 8370 UNI(OP_TIED); 8371 8372 case KEY_time: 8373 FUN0(OP_TIME); 8374 8375 case KEY_times: 8376 FUN0(OP_TMS); 8377 8378 case KEY_truncate: 8379 LOP(OP_TRUNCATE,XTERM); 8380 8381 case KEY_uc: 8382 UNI(OP_UC); 8383 8384 case KEY_ucfirst: 8385 UNI(OP_UCFIRST); 8386 8387 case KEY_untie: 8388 UNI(OP_UNTIE); 8389 8390 case KEY_until: 8391 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8392 return REPORT(0); 8393 pl_yylval.ival = CopLINE(PL_curcop); 8394 OPERATOR(UNTIL); 8395 8396 case KEY_unless: 8397 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8398 return REPORT(0); 8399 pl_yylval.ival = CopLINE(PL_curcop); 8400 OPERATOR(UNLESS); 8401 8402 case KEY_unlink: 8403 LOP(OP_UNLINK,XTERM); 8404 8405 case KEY_undef: 8406 UNIDOR(OP_UNDEF); 8407 8408 case KEY_unpack: 8409 LOP(OP_UNPACK,XTERM); 8410 8411 case KEY_utime: 8412 LOP(OP_UTIME,XTERM); 8413 8414 case KEY_umask: 8415 UNIDOR(OP_UMASK); 8416 8417 case KEY_unshift: 8418 LOP(OP_UNSHIFT,XTERM); 8419 8420 case KEY_use: 8421 s = tokenize_use(1, s); 8422 TOKEN(USE); 8423 8424 case KEY_values: 8425 UNI(OP_VALUES); 8426 8427 case KEY_vec: 8428 LOP(OP_VEC,XTERM); 8429 8430 case KEY_when: 8431 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8432 return REPORT(0); 8433 pl_yylval.ival = CopLINE(PL_curcop); 8434 Perl_ck_warner_d(aTHX_ 8435 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 8436 "when is experimental"); 8437 OPERATOR(WHEN); 8438 8439 case KEY_while: 8440 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8441 return REPORT(0); 8442 pl_yylval.ival = CopLINE(PL_curcop); 8443 OPERATOR(WHILE); 8444 8445 case KEY_warn: 8446 PL_hints |= HINT_BLOCK_SCOPE; 8447 LOP(OP_WARN,XTERM); 8448 8449 case KEY_wait: 8450 FUN0(OP_WAIT); 8451 8452 case KEY_waitpid: 8453 LOP(OP_WAITPID,XTERM); 8454 8455 case KEY_wantarray: 8456 FUN0(OP_WANTARRAY); 8457 8458 case KEY_write: 8459 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and 8460 * we use the same number on EBCDIC */ 8461 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV); 8462 UNI(OP_ENTERWRITE); 8463 8464 case KEY_x: 8465 if (PL_expect == XOPERATOR) { 8466 if (*s == '=' && !PL_lex_allbrackets 8467 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 8468 { 8469 return REPORT(0); 8470 } 8471 Mop(OP_REPEAT); 8472 } 8473 check_uni(); 8474 goto just_a_word; 8475 8476 case KEY_xor: 8477 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 8478 return REPORT(0); 8479 pl_yylval.ival = OP_XOR; 8480 OPERATOR(OROP); 8481 } 8482 }} 8483 } 8484 8485 /* 8486 S_pending_ident 8487 8488 Looks up an identifier in the pad or in a package 8489 8490 Returns: 8491 PRIVATEREF if this is a lexical name. 8492 WORD if this belongs to a package. 8493 8494 Structure: 8495 if we're in a my declaration 8496 croak if they tried to say my($foo::bar) 8497 build the ops for a my() declaration 8498 if it's an access to a my() variable 8499 build ops for access to a my() variable 8500 if in a dq string, and they've said @foo and we can't find @foo 8501 warn 8502 build ops for a bareword 8503 */ 8504 8505 static int 8506 S_pending_ident(pTHX) 8507 { 8508 PADOFFSET tmp = 0; 8509 const char pit = (char)pl_yylval.ival; 8510 const STRLEN tokenbuf_len = strlen(PL_tokenbuf); 8511 /* All routes through this function want to know if there is a colon. */ 8512 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len); 8513 8514 DEBUG_T({ PerlIO_printf(Perl_debug_log, 8515 "### Pending identifier '%s'\n", PL_tokenbuf); }); 8516 8517 /* if we're in a my(), we can't allow dynamics here. 8518 $foo'bar has already been turned into $foo::bar, so 8519 just check for colons. 8520 8521 if it's a legal name, the OP is a PADANY. 8522 */ 8523 if (PL_in_my) { 8524 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */ 8525 if (has_colon) 8526 yyerror_pv(Perl_form(aTHX_ "No package name allowed for " 8527 "variable %s in \"our\"", 8528 PL_tokenbuf), UTF ? SVf_UTF8 : 0); 8529 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0); 8530 } 8531 else { 8532 if (has_colon) { 8533 /* "my" variable %s can't be in a package */ 8534 /* PL_no_myglob is constant */ 8535 GCC_DIAG_IGNORE(-Wformat-nonliteral); 8536 yyerror_pv(Perl_form(aTHX_ PL_no_myglob, 8537 PL_in_my == KEY_my ? "my" : "state", 8538 *PL_tokenbuf == '&' ? "subroutin" : "variabl", 8539 PL_tokenbuf), 8540 UTF ? SVf_UTF8 : 0); 8541 GCC_DIAG_RESTORE; 8542 } 8543 8544 pl_yylval.opval = newOP(OP_PADANY, 0); 8545 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 8546 UTF ? SVf_UTF8 : 0); 8547 return PRIVATEREF; 8548 } 8549 } 8550 8551 /* 8552 build the ops for accesses to a my() variable. 8553 */ 8554 8555 if (!has_colon) { 8556 if (!PL_in_my) 8557 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len, 8558 0); 8559 if (tmp != NOT_IN_PAD) { 8560 /* might be an "our" variable" */ 8561 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 8562 /* build ops for a bareword */ 8563 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 8564 HEK * const stashname = HvNAME_HEK(stash); 8565 SV * const sym = newSVhek(stashname); 8566 sv_catpvs(sym, "::"); 8567 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES )); 8568 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym); 8569 pl_yylval.opval->op_private = OPpCONST_ENTERED; 8570 if (pit != '&') 8571 gv_fetchsv(sym, 8572 GV_ADDMULTI, 8573 ((PL_tokenbuf[0] == '$') ? SVt_PV 8574 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 8575 : SVt_PVHV)); 8576 return WORD; 8577 } 8578 8579 pl_yylval.opval = newOP(OP_PADANY, 0); 8580 pl_yylval.opval->op_targ = tmp; 8581 return PRIVATEREF; 8582 } 8583 } 8584 8585 /* 8586 Whine if they've said @foo in a doublequoted string, 8587 and @foo isn't a variable we can find in the symbol 8588 table. 8589 */ 8590 if (ckWARN(WARN_AMBIGUOUS) 8591 && pit == '@' 8592 && PL_lex_state != LEX_NORMAL 8593 && !PL_lex_brackets) 8594 { 8595 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 8596 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV); 8597 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) 8598 /* DO NOT warn for @- and @+ */ 8599 && !( PL_tokenbuf[2] == '\0' 8600 && ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' )) 8601 ) 8602 { 8603 /* Downgraded from fatal to warning 20000522 mjd */ 8604 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 8605 "Possible unintended interpolation of %"UTF8f 8606 " in string", 8607 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf)); 8608 } 8609 } 8610 8611 /* build ops for a bareword */ 8612 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, 8613 newSVpvn_flags(PL_tokenbuf + 1, 8614 tokenbuf_len - 1, 8615 UTF ? SVf_UTF8 : 0 )); 8616 pl_yylval.opval->op_private = OPpCONST_ENTERED; 8617 if (pit != '&') 8618 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1, 8619 (PL_in_eval ? GV_ADDMULTI : GV_ADD) 8620 | ( UTF ? SVf_UTF8 : 0 ), 8621 ((PL_tokenbuf[0] == '$') ? SVt_PV 8622 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 8623 : SVt_PVHV)); 8624 return WORD; 8625 } 8626 8627 STATIC void 8628 S_checkcomma(pTHX_ const char *s, const char *name, const char *what) 8629 { 8630 PERL_ARGS_ASSERT_CHECKCOMMA; 8631 8632 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */ 8633 if (ckWARN(WARN_SYNTAX)) { 8634 int level = 1; 8635 const char *w; 8636 for (w = s+2; *w && level; w++) { 8637 if (*w == '(') 8638 ++level; 8639 else if (*w == ')') 8640 --level; 8641 } 8642 while (isSPACE(*w)) 8643 ++w; 8644 /* the list of chars below is for end of statements or 8645 * block / parens, boolean operators (&&, ||, //) and branch 8646 * constructs (or, and, if, until, unless, while, err, for). 8647 * Not a very solid hack... */ 8648 if (!*w || !strchr(";&/|})]oaiuwef!=", *w)) 8649 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 8650 "%s (...) interpreted as function",name); 8651 } 8652 } 8653 while (s < PL_bufend && isSPACE(*s)) 8654 s++; 8655 if (*s == '(') 8656 s++; 8657 while (s < PL_bufend && isSPACE(*s)) 8658 s++; 8659 if (isIDFIRST_lazy_if(s,UTF)) { 8660 const char * const w = s; 8661 s += UTF ? UTF8SKIP(s) : 1; 8662 while (isWORDCHAR_lazy_if(s,UTF)) 8663 s += UTF ? UTF8SKIP(s) : 1; 8664 while (s < PL_bufend && isSPACE(*s)) 8665 s++; 8666 if (*s == ',') { 8667 GV* gv; 8668 PADOFFSET off; 8669 if (keyword(w, s - w, 0)) 8670 return; 8671 8672 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); 8673 if (gv && GvCVu(gv)) 8674 return; 8675 if (s - w <= 254) { 8676 char tmpbuf[256]; 8677 Copy(w, tmpbuf+1, s - w, char); 8678 *tmpbuf = '&'; 8679 off = pad_findmy_pvn(tmpbuf, s-w+1, 0); 8680 if (off != NOT_IN_PAD) return; 8681 } 8682 Perl_croak(aTHX_ "No comma allowed after %s", what); 8683 } 8684 } 8685 } 8686 8687 /* S_new_constant(): do any overload::constant lookup. 8688 8689 Either returns sv, or mortalizes/frees sv and returns a new SV*. 8690 Best used as sv=new_constant(..., sv, ...). 8691 If s, pv are NULL, calls subroutine with one argument, 8692 and <type> is used with error messages only. 8693 <type> is assumed to be well formed UTF-8 */ 8694 8695 STATIC SV * 8696 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, 8697 SV *sv, SV *pv, const char *type, STRLEN typelen) 8698 { 8699 dSP; 8700 HV * table = GvHV(PL_hintgv); /* ^H */ 8701 SV *res; 8702 SV *errsv = NULL; 8703 SV **cvp; 8704 SV *cv, *typesv; 8705 const char *why1 = "", *why2 = "", *why3 = ""; 8706 8707 PERL_ARGS_ASSERT_NEW_CONSTANT; 8708 /* We assume that this is true: */ 8709 if (*key == 'c') { assert (strEQ(key, "charnames")); } 8710 assert(type || s); 8711 8712 /* charnames doesn't work well if there have been errors found */ 8713 if (PL_error_count > 0 && *key == 'c') 8714 { 8715 SvREFCNT_dec_NN(sv); 8716 return &PL_sv_undef; 8717 } 8718 8719 sv_2mortal(sv); /* Parent created it permanently */ 8720 if (!table 8721 || ! (PL_hints & HINT_LOCALIZE_HH) 8722 || ! (cvp = hv_fetch(table, key, keylen, FALSE)) 8723 || ! SvOK(*cvp)) 8724 { 8725 char *msg; 8726 8727 /* Here haven't found what we're looking for. If it is charnames, 8728 * perhaps it needs to be loaded. Try doing that before giving up */ 8729 if (*key == 'c') { 8730 Perl_load_module(aTHX_ 8731 0, 8732 newSVpvs("_charnames"), 8733 /* version parameter; no need to specify it, as if 8734 * we get too early a version, will fail anyway, 8735 * not being able to find '_charnames' */ 8736 NULL, 8737 newSVpvs(":full"), 8738 newSVpvs(":short"), 8739 NULL); 8740 assert(sp == PL_stack_sp); 8741 table = GvHV(PL_hintgv); 8742 if (table 8743 && (PL_hints & HINT_LOCALIZE_HH) 8744 && (cvp = hv_fetch(table, key, keylen, FALSE)) 8745 && SvOK(*cvp)) 8746 { 8747 goto now_ok; 8748 } 8749 } 8750 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) { 8751 msg = Perl_form(aTHX_ 8752 "Constant(%.*s) unknown", 8753 (int)(type ? typelen : len), 8754 (type ? type: s)); 8755 } 8756 else { 8757 why1 = "$^H{"; 8758 why2 = key; 8759 why3 = "} is not defined"; 8760 report: 8761 if (*key == 'c') { 8762 msg = Perl_form(aTHX_ 8763 /* The +3 is for '\N{'; -4 for that, plus '}' */ 8764 "Unknown charname '%.*s'", (int)typelen - 4, type + 3 8765 ); 8766 } 8767 else { 8768 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s", 8769 (int)(type ? typelen : len), 8770 (type ? type: s), why1, why2, why3); 8771 } 8772 } 8773 yyerror_pv(msg, UTF ? SVf_UTF8 : 0); 8774 return SvREFCNT_inc_simple_NN(sv); 8775 } 8776 now_ok: 8777 cv = *cvp; 8778 if (!pv && s) 8779 pv = newSVpvn_flags(s, len, SVs_TEMP); 8780 if (type && pv) 8781 typesv = newSVpvn_flags(type, typelen, SVs_TEMP); 8782 else 8783 typesv = &PL_sv_undef; 8784 8785 PUSHSTACKi(PERLSI_OVERLOAD); 8786 ENTER ; 8787 SAVETMPS; 8788 8789 PUSHMARK(SP) ; 8790 EXTEND(sp, 3); 8791 if (pv) 8792 PUSHs(pv); 8793 PUSHs(sv); 8794 if (pv) 8795 PUSHs(typesv); 8796 PUTBACK; 8797 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL)); 8798 8799 SPAGAIN ; 8800 8801 /* Check the eval first */ 8802 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) { 8803 STRLEN errlen; 8804 const char * errstr; 8805 sv_catpvs(errsv, "Propagated"); 8806 errstr = SvPV_const(errsv, errlen); 8807 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */ 8808 (void)POPs; 8809 res = SvREFCNT_inc_simple_NN(sv); 8810 } 8811 else { 8812 res = POPs; 8813 SvREFCNT_inc_simple_void_NN(res); 8814 } 8815 8816 PUTBACK ; 8817 FREETMPS ; 8818 LEAVE ; 8819 POPSTACK; 8820 8821 if (!SvOK(res)) { 8822 why1 = "Call to &{$^H{"; 8823 why2 = key; 8824 why3 = "}} did not return a defined value"; 8825 sv = res; 8826 (void)sv_2mortal(sv); 8827 goto report; 8828 } 8829 8830 return res; 8831 } 8832 8833 PERL_STATIC_INLINE void 8834 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) { 8835 PERL_ARGS_ASSERT_PARSE_IDENT; 8836 8837 for (;;) { 8838 if (*d >= e) 8839 Perl_croak(aTHX_ "%s", ident_too_long); 8840 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) { 8841 /* The UTF-8 case must come first, otherwise things 8842 * like c\N{COMBINING TILDE} would start failing, as the 8843 * isWORDCHAR_A case below would gobble the 'c' up. 8844 */ 8845 8846 char *t = *s + UTF8SKIP(*s); 8847 while (isIDCONT_utf8((U8*)t)) 8848 t += UTF8SKIP(t); 8849 if (*d + (t - *s) > e) 8850 Perl_croak(aTHX_ "%s", ident_too_long); 8851 Copy(*s, *d, t - *s, char); 8852 *d += t - *s; 8853 *s = t; 8854 } 8855 else if ( isWORDCHAR_A(**s) ) { 8856 do { 8857 *(*d)++ = *(*s)++; 8858 } while (isWORDCHAR_A(**s) && *d < e); 8859 } 8860 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) { 8861 *(*d)++ = ':'; 8862 *(*d)++ = ':'; 8863 (*s)++; 8864 } 8865 else if (allow_package && **s == ':' && (*s)[1] == ':' 8866 /* Disallow things like Foo::$bar. For the curious, this is 8867 * the code path that triggers the "Bad name after" warning 8868 * when looking for barewords. 8869 */ 8870 && (*s)[2] != '$') { 8871 *(*d)++ = *(*s)++; 8872 *(*d)++ = *(*s)++; 8873 } 8874 else 8875 break; 8876 } 8877 return; 8878 } 8879 8880 /* Returns a NUL terminated string, with the length of the string written to 8881 *slp 8882 */ 8883 STATIC char * 8884 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) 8885 { 8886 char *d = dest; 8887 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 8888 bool is_utf8 = cBOOL(UTF); 8889 8890 PERL_ARGS_ASSERT_SCAN_WORD; 8891 8892 parse_ident(&s, &d, e, allow_package, is_utf8); 8893 *d = '\0'; 8894 *slp = d - dest; 8895 return s; 8896 } 8897 8898 /* Is the byte 'd' a legal single character identifier name? 'u' is true 8899 * iff Unicode semantics are to be used. The legal ones are any of: 8900 * a) all ASCII characters except: 8901 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE; 8902 * 2) '{' 8903 * The final case currently doesn't get this far in the program, so we 8904 * don't test for it. If that were to change, it would be ok to allow it. 8905 * c) When not under Unicode rules, any upper Latin1 character 8906 * d) Otherwise, when unicode rules are used, all XIDS characters. 8907 * 8908 * Because all ASCII characters have the same representation whether 8909 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and 8910 * '{' without knowing if is UTF-8 or not. 8911 * EBCDIC already uses the rules that ASCII platforms will use after the 8912 * deprecation cycle; see comment below about the deprecation. */ 8913 #ifdef EBCDIC 8914 # define VALID_LEN_ONE_IDENT(s, is_utf8) \ 8915 (isGRAPH_A(*(s)) || ((is_utf8) \ 8916 ? isIDFIRST_utf8((U8*) (s)) \ 8917 : (isGRAPH_L1(*s) \ 8918 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD))))) 8919 #else 8920 # define VALID_LEN_ONE_IDENT(s, is_utf8) \ 8921 (isGRAPH_A(*(s)) || ((is_utf8) \ 8922 ? isIDFIRST_utf8((U8*) (s)) \ 8923 : ! isASCII_utf8((U8*) (s)))) 8924 #endif 8925 8926 STATIC char * 8927 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) 8928 { 8929 I32 herelines = PL_parser->herelines; 8930 SSize_t bracket = -1; 8931 char funny = *s++; 8932 char *d = dest; 8933 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 8934 bool is_utf8 = cBOOL(UTF); 8935 I32 orig_copline = 0, tmp_copline = 0; 8936 8937 PERL_ARGS_ASSERT_SCAN_IDENT; 8938 8939 if (isSPACE(*s) || !*s) 8940 s = skipspace(s); 8941 if (isDIGIT(*s)) { 8942 while (isDIGIT(*s)) { 8943 if (d >= e) 8944 Perl_croak(aTHX_ "%s", ident_too_long); 8945 *d++ = *s++; 8946 } 8947 } 8948 else { /* See if it is a "normal" identifier */ 8949 parse_ident(&s, &d, e, 1, is_utf8); 8950 } 8951 *d = '\0'; 8952 d = dest; 8953 if (*d) { 8954 /* Either a digit variable, or parse_ident() found an identifier 8955 (anything valid as a bareword), so job done and return. */ 8956 if (PL_lex_state != LEX_NORMAL) 8957 PL_lex_state = LEX_INTERPENDMAYBE; 8958 return s; 8959 } 8960 8961 /* Here, it is not a run-of-the-mill identifier name */ 8962 8963 if (*s == '$' && s[1] 8964 && (isIDFIRST_lazy_if(s+1,is_utf8) 8965 || isDIGIT_A((U8)s[1]) 8966 || s[1] == '$' 8967 || s[1] == '{' 8968 || strnEQ(s+1,"::",2)) ) 8969 { 8970 /* Dereferencing a value in a scalar variable. 8971 The alternatives are different syntaxes for a scalar variable. 8972 Using ' as a leading package separator isn't allowed. :: is. */ 8973 return s; 8974 } 8975 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */ 8976 if (*s == '{') { 8977 bracket = s - SvPVX(PL_linestr); 8978 s++; 8979 orig_copline = CopLINE(PL_curcop); 8980 if (s < PL_bufend && isSPACE(*s)) { 8981 s = skipspace(s); 8982 } 8983 } 8984 if ((s <= PL_bufend - (is_utf8) 8985 ? UTF8SKIP(s) 8986 : 1) 8987 && VALID_LEN_ONE_IDENT(s, is_utf8)) 8988 { 8989 /* Deprecate all non-graphic characters. Include SHY as a non-graphic, 8990 * because often it has no graphic representation. (We can't get to 8991 * here with SHY when 'is_utf8' is true, so no need to include a UTF-8 8992 * test for it.) */ 8993 if ((is_utf8) 8994 ? ! isGRAPH_utf8( (U8*) s) 8995 : (! isGRAPH_L1( (U8) *s) 8996 || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD)))) 8997 { 8998 deprecate("literal non-graphic characters in variable names"); 8999 } 9000 9001 if (is_utf8) { 9002 const STRLEN skip = UTF8SKIP(s); 9003 STRLEN i; 9004 d[skip] = '\0'; 9005 for ( i = 0; i < skip; i++ ) 9006 d[i] = *s++; 9007 } 9008 else { 9009 *d = *s++; 9010 d[1] = '\0'; 9011 } 9012 } 9013 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */ 9014 if (*d == '^' && *s && isCONTROLVAR(*s)) { 9015 *d = toCTRL(*s); 9016 s++; 9017 } 9018 /* Warn about ambiguous code after unary operators if {...} notation isn't 9019 used. There's no difference in ambiguity; it's merely a heuristic 9020 about when not to warn. */ 9021 else if (ck_uni && bracket == -1) 9022 check_uni(); 9023 if (bracket != -1) { 9024 /* If we were processing {...} notation then... */ 9025 if (isIDFIRST_lazy_if(d,is_utf8)) { 9026 /* if it starts as a valid identifier, assume that it is one. 9027 (the later check for } being at the expected point will trap 9028 cases where this doesn't pan out.) */ 9029 d += is_utf8 ? UTF8SKIP(d) : 1; 9030 parse_ident(&s, &d, e, 1, is_utf8); 9031 *d = '\0'; 9032 tmp_copline = CopLINE(PL_curcop); 9033 if (s < PL_bufend && isSPACE(*s)) { 9034 s = skipspace(s); 9035 } 9036 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { 9037 /* ${foo[0]} and ${foo{bar}} notation. */ 9038 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) { 9039 const char * const brack = 9040 (const char *) 9041 ((*s == '[') ? "[...]" : "{...}"); 9042 orig_copline = CopLINE(PL_curcop); 9043 CopLINE_set(PL_curcop, tmp_copline); 9044 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */ 9045 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 9046 "Ambiguous use of %c{%s%s} resolved to %c%s%s", 9047 funny, dest, brack, funny, dest, brack); 9048 CopLINE_set(PL_curcop, orig_copline); 9049 } 9050 bracket++; 9051 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK); 9052 PL_lex_allbrackets++; 9053 return s; 9054 } 9055 } 9056 /* Handle extended ${^Foo} variables 9057 * 1999-02-27 mjd-perl-patch@plover.com */ 9058 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */ 9059 && isWORDCHAR(*s)) 9060 { 9061 d++; 9062 while (isWORDCHAR(*s) && d < e) { 9063 *d++ = *s++; 9064 } 9065 if (d >= e) 9066 Perl_croak(aTHX_ "%s", ident_too_long); 9067 *d = '\0'; 9068 } 9069 9070 if ( !tmp_copline ) 9071 tmp_copline = CopLINE(PL_curcop); 9072 if (s < PL_bufend && isSPACE(*s)) { 9073 s = skipspace(s); 9074 } 9075 9076 /* Expect to find a closing } after consuming any trailing whitespace. 9077 */ 9078 if (*s == '}') { 9079 s++; 9080 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 9081 PL_lex_state = LEX_INTERPEND; 9082 PL_expect = XREF; 9083 } 9084 if (PL_lex_state == LEX_NORMAL) { 9085 if (ckWARN(WARN_AMBIGUOUS) 9086 && (keyword(dest, d - dest, 0) 9087 || get_cvn_flags(dest, d - dest, is_utf8 9088 ? SVf_UTF8 9089 : 0))) 9090 { 9091 SV *tmp = newSVpvn_flags( dest, d - dest, 9092 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) ); 9093 if (funny == '#') 9094 funny = '@'; 9095 orig_copline = CopLINE(PL_curcop); 9096 CopLINE_set(PL_curcop, tmp_copline); 9097 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 9098 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf, 9099 funny, SVfARG(tmp), funny, SVfARG(tmp)); 9100 CopLINE_set(PL_curcop, orig_copline); 9101 } 9102 } 9103 } 9104 else { 9105 /* Didn't find the closing } at the point we expected, so restore 9106 state such that the next thing to process is the opening { and */ 9107 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */ 9108 CopLINE_set(PL_curcop, orig_copline); 9109 PL_parser->herelines = herelines; 9110 *dest = '\0'; 9111 } 9112 } 9113 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s)) 9114 PL_lex_state = LEX_INTERPEND; 9115 return s; 9116 } 9117 9118 static bool 9119 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) { 9120 9121 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag 9122 * found in the parse starting at 's', based on the subset that are valid 9123 * in this context input to this routine in 'valid_flags'. Advances s. 9124 * Returns TRUE if the input should be treated as a valid flag, so the next 9125 * char may be as well; otherwise FALSE. 'charset' should point to a NUL 9126 * upon first call on the current regex. This routine will set it to any 9127 * charset modifier found. The caller shouldn't change it. This way, 9128 * another charset modifier encountered in the parse can be detected as an 9129 * error, as we have decided to allow only one */ 9130 9131 const char c = **s; 9132 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1; 9133 9134 if ( charlen != 1 || ! strchr(valid_flags, c) ) { 9135 if (isWORDCHAR_lazy_if(*s, UTF)) { 9136 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s), 9137 UTF ? SVf_UTF8 : 0); 9138 (*s) += charlen; 9139 /* Pretend that it worked, so will continue processing before 9140 * dieing */ 9141 return TRUE; 9142 } 9143 return FALSE; 9144 } 9145 9146 switch (c) { 9147 9148 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count); 9149 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break; 9150 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break; 9151 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break; 9152 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break; 9153 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break; 9154 case LOCALE_PAT_MOD: 9155 if (*charset) { 9156 goto multiple_charsets; 9157 } 9158 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET); 9159 *charset = c; 9160 break; 9161 case UNICODE_PAT_MOD: 9162 if (*charset) { 9163 goto multiple_charsets; 9164 } 9165 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET); 9166 *charset = c; 9167 break; 9168 case ASCII_RESTRICT_PAT_MOD: 9169 if (! *charset) { 9170 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET); 9171 } 9172 else { 9173 9174 /* Error if previous modifier wasn't an 'a', but if it was, see 9175 * if, and accept, a second occurrence (only) */ 9176 if (*charset != 'a' 9177 || get_regex_charset(*pmfl) 9178 != REGEX_ASCII_RESTRICTED_CHARSET) 9179 { 9180 goto multiple_charsets; 9181 } 9182 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET); 9183 } 9184 *charset = c; 9185 break; 9186 case DEPENDS_PAT_MOD: 9187 if (*charset) { 9188 goto multiple_charsets; 9189 } 9190 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET); 9191 *charset = c; 9192 break; 9193 } 9194 9195 (*s)++; 9196 return TRUE; 9197 9198 multiple_charsets: 9199 if (*charset != c) { 9200 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c)); 9201 } 9202 else if (c == 'a') { 9203 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */ 9204 yyerror("Regexp modifier \"/a\" may appear a maximum of twice"); 9205 } 9206 else { 9207 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c)); 9208 } 9209 9210 /* Pretend that it worked, so will continue processing before dieing */ 9211 (*s)++; 9212 return TRUE; 9213 } 9214 9215 STATIC char * 9216 S_scan_pat(pTHX_ char *start, I32 type) 9217 { 9218 PMOP *pm; 9219 char *s; 9220 const char * const valid_flags = 9221 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); 9222 char charset = '\0'; /* character set modifier */ 9223 unsigned int x_mod_count = 0; 9224 9225 PERL_ARGS_ASSERT_SCAN_PAT; 9226 9227 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL); 9228 if (!s) 9229 Perl_croak(aTHX_ "Search pattern not terminated"); 9230 9231 pm = (PMOP*)newPMOP(type, 0); 9232 if (PL_multi_open == '?') { 9233 /* This is the only point in the code that sets PMf_ONCE: */ 9234 pm->op_pmflags |= PMf_ONCE; 9235 9236 /* Hence it's safe to do this bit of PMOP book-keeping here, which 9237 allows us to restrict the list needed by reset to just the ?? 9238 matches. */ 9239 assert(type != OP_TRANS); 9240 if (PL_curstash) { 9241 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab); 9242 U32 elements; 9243 if (!mg) { 9244 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0, 9245 0); 9246 } 9247 elements = mg->mg_len / sizeof(PMOP**); 9248 Renewc(mg->mg_ptr, elements + 1, PMOP*, char); 9249 ((PMOP**)mg->mg_ptr) [elements++] = pm; 9250 mg->mg_len = elements * sizeof(PMOP**); 9251 PmopSTASH_set(pm,PL_curstash); 9252 } 9253 } 9254 9255 /* if qr/...(?{..}).../, then need to parse the pattern within a new 9256 * anon CV. False positives like qr/[(?{]/ are harmless */ 9257 9258 if (type == OP_QR) { 9259 STRLEN len; 9260 char *e, *p = SvPV(PL_lex_stuff, len); 9261 e = p + len; 9262 for (; p < e; p++) { 9263 if (p[0] == '(' && p[1] == '?' 9264 && (p[2] == '{' || (p[2] == '?' && p[3] == '{'))) 9265 { 9266 pm->op_pmflags |= PMf_HAS_CV; 9267 break; 9268 } 9269 } 9270 pm->op_pmflags |= PMf_IS_QR; 9271 } 9272 9273 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), 9274 &s, &charset, &x_mod_count)) 9275 {}; 9276 /* issue a warning if /c is specified,but /g is not */ 9277 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) 9278 { 9279 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), 9280 "Use of /c modifier is meaningless without /g" ); 9281 } 9282 9283 STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count); 9284 9285 PL_lex_op = (OP*)pm; 9286 pl_yylval.ival = OP_MATCH; 9287 return s; 9288 } 9289 9290 STATIC char * 9291 S_scan_subst(pTHX_ char *start) 9292 { 9293 char *s; 9294 PMOP *pm; 9295 I32 first_start; 9296 line_t first_line; 9297 I32 es = 0; 9298 char charset = '\0'; /* character set modifier */ 9299 unsigned int x_mod_count = 0; 9300 char *t; 9301 9302 PERL_ARGS_ASSERT_SCAN_SUBST; 9303 9304 pl_yylval.ival = OP_NULL; 9305 9306 s = scan_str(start, TRUE, FALSE, FALSE, &t); 9307 9308 if (!s) 9309 Perl_croak(aTHX_ "Substitution pattern not terminated"); 9310 9311 s = t; 9312 9313 first_start = PL_multi_start; 9314 first_line = CopLINE(PL_curcop); 9315 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 9316 if (!s) { 9317 SvREFCNT_dec_NN(PL_lex_stuff); 9318 PL_lex_stuff = NULL; 9319 Perl_croak(aTHX_ "Substitution replacement not terminated"); 9320 } 9321 PL_multi_start = first_start; /* so whole substitution is taken together */ 9322 9323 pm = (PMOP*)newPMOP(OP_SUBST, 0); 9324 9325 9326 while (*s) { 9327 if (*s == EXEC_PAT_MOD) { 9328 s++; 9329 es++; 9330 } 9331 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), 9332 &s, &charset, &x_mod_count)) 9333 { 9334 break; 9335 } 9336 } 9337 9338 STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count); 9339 9340 if ((pm->op_pmflags & PMf_CONTINUE)) { 9341 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); 9342 } 9343 9344 if (es) { 9345 SV * const repl = newSVpvs(""); 9346 9347 PL_multi_end = 0; 9348 pm->op_pmflags |= PMf_EVAL; 9349 while (es-- > 0) { 9350 if (es) 9351 sv_catpvs(repl, "eval "); 9352 else 9353 sv_catpvs(repl, "do "); 9354 } 9355 sv_catpvs(repl, "{"); 9356 sv_catsv(repl, PL_sublex_info.repl); 9357 sv_catpvs(repl, "}"); 9358 SvEVALED_on(repl); 9359 SvREFCNT_dec(PL_sublex_info.repl); 9360 PL_sublex_info.repl = repl; 9361 } 9362 if (CopLINE(PL_curcop) != first_line) { 9363 sv_upgrade(PL_sublex_info.repl, SVt_PVNV); 9364 ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow = 9365 CopLINE(PL_curcop) - first_line; 9366 CopLINE_set(PL_curcop, first_line); 9367 } 9368 9369 PL_lex_op = (OP*)pm; 9370 pl_yylval.ival = OP_SUBST; 9371 return s; 9372 } 9373 9374 STATIC char * 9375 S_scan_trans(pTHX_ char *start) 9376 { 9377 char* s; 9378 OP *o; 9379 U8 squash; 9380 U8 del; 9381 U8 complement; 9382 bool nondestruct = 0; 9383 char *t; 9384 9385 PERL_ARGS_ASSERT_SCAN_TRANS; 9386 9387 pl_yylval.ival = OP_NULL; 9388 9389 s = scan_str(start,FALSE,FALSE,FALSE,&t); 9390 if (!s) 9391 Perl_croak(aTHX_ "Transliteration pattern not terminated"); 9392 9393 s = t; 9394 9395 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 9396 if (!s) { 9397 SvREFCNT_dec_NN(PL_lex_stuff); 9398 PL_lex_stuff = NULL; 9399 Perl_croak(aTHX_ "Transliteration replacement not terminated"); 9400 } 9401 9402 complement = del = squash = 0; 9403 while (1) { 9404 switch (*s) { 9405 case 'c': 9406 complement = OPpTRANS_COMPLEMENT; 9407 break; 9408 case 'd': 9409 del = OPpTRANS_DELETE; 9410 break; 9411 case 's': 9412 squash = OPpTRANS_SQUASH; 9413 break; 9414 case 'r': 9415 nondestruct = 1; 9416 break; 9417 default: 9418 goto no_more; 9419 } 9420 s++; 9421 } 9422 no_more: 9423 9424 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL); 9425 o->op_private &= ~OPpTRANS_ALL; 9426 o->op_private |= del|squash|complement| 9427 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)| 9428 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0); 9429 9430 PL_lex_op = o; 9431 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS; 9432 9433 9434 return s; 9435 } 9436 9437 /* scan_heredoc 9438 Takes a pointer to the first < in <<FOO. 9439 Returns a pointer to the byte following <<FOO. 9440 9441 This function scans a heredoc, which involves different methods 9442 depending on whether we are in a string eval, quoted construct, etc. 9443 This is because PL_linestr could containing a single line of input, or 9444 a whole string being evalled, or the contents of the current quote- 9445 like operator. 9446 9447 The two basic methods are: 9448 - Steal lines from the input stream 9449 - Scan the heredoc in PL_linestr and remove it therefrom 9450 9451 In a file scope or filtered eval, the first method is used; in a 9452 string eval, the second. 9453 9454 In a quote-like operator, we have to choose between the two, 9455 depending on where we can find a newline. We peek into outer lex- 9456 ing scopes until we find one with a newline in it. If we reach the 9457 outermost lexing scope and it is a file, we use the stream method. 9458 Otherwise it is treated as an eval. 9459 */ 9460 9461 STATIC char * 9462 S_scan_heredoc(pTHX_ char *s) 9463 { 9464 I32 op_type = OP_SCALAR; 9465 I32 len; 9466 SV *tmpstr; 9467 char term; 9468 char *d; 9469 char *e; 9470 char *peek; 9471 const bool infile = PL_rsfp || PL_parser->filtered; 9472 const line_t origline = CopLINE(PL_curcop); 9473 LEXSHARED *shared = PL_parser->lex_shared; 9474 9475 PERL_ARGS_ASSERT_SCAN_HEREDOC; 9476 9477 s += 2; 9478 d = PL_tokenbuf + 1; 9479 e = PL_tokenbuf + sizeof PL_tokenbuf - 1; 9480 *PL_tokenbuf = '\n'; 9481 peek = s; 9482 while (SPACE_OR_TAB(*peek)) 9483 peek++; 9484 if (*peek == '`' || *peek == '\'' || *peek =='"') { 9485 s = peek; 9486 term = *s++; 9487 s = delimcpy(d, e, s, PL_bufend, term, &len); 9488 if (s == PL_bufend) 9489 Perl_croak(aTHX_ "Unterminated delimiter for here document"); 9490 d += len; 9491 s++; 9492 } 9493 else { 9494 if (*s == '\\') 9495 /* <<\FOO is equivalent to <<'FOO' */ 9496 s++, term = '\''; 9497 else 9498 term = '"'; 9499 if (!isWORDCHAR_lazy_if(s,UTF)) 9500 deprecate("bare << to mean <<\"\""); 9501 peek = s; 9502 while (isWORDCHAR_lazy_if(peek,UTF)) { 9503 peek += UTF ? UTF8SKIP(peek) : 1; 9504 } 9505 len = (peek - s >= e - d) ? (e - d) : (peek - s); 9506 Copy(s, d, len, char); 9507 s += len; 9508 d += len; 9509 } 9510 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1) 9511 Perl_croak(aTHX_ "Delimiter for here document is too long"); 9512 *d++ = '\n'; 9513 *d = '\0'; 9514 len = d - PL_tokenbuf; 9515 9516 #ifndef PERL_STRICT_CR 9517 d = strchr(s, '\r'); 9518 if (d) { 9519 char * const olds = s; 9520 s = d; 9521 while (s < PL_bufend) { 9522 if (*s == '\r') { 9523 *d++ = '\n'; 9524 if (*++s == '\n') 9525 s++; 9526 } 9527 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */ 9528 *d++ = *s++; 9529 s++; 9530 } 9531 else 9532 *d++ = *s++; 9533 } 9534 *d = '\0'; 9535 PL_bufend = d; 9536 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 9537 s = olds; 9538 } 9539 #endif 9540 9541 tmpstr = newSV_type(SVt_PVIV); 9542 SvGROW(tmpstr, 80); 9543 if (term == '\'') { 9544 op_type = OP_CONST; 9545 SvIV_set(tmpstr, -1); 9546 } 9547 else if (term == '`') { 9548 op_type = OP_BACKTICK; 9549 SvIV_set(tmpstr, '\\'); 9550 } 9551 9552 PL_multi_start = origline + 1 + PL_parser->herelines; 9553 PL_multi_open = PL_multi_close = '<'; 9554 /* inside a string eval or quote-like operator */ 9555 if (!infile || PL_lex_inwhat) { 9556 SV *linestr; 9557 char *bufend; 9558 char * const olds = s; 9559 PERL_CONTEXT * const cx = CX_CUR(); 9560 /* These two fields are not set until an inner lexing scope is 9561 entered. But we need them set here. */ 9562 shared->ls_bufptr = s; 9563 shared->ls_linestr = PL_linestr; 9564 if (PL_lex_inwhat) 9565 /* Look for a newline. If the current buffer does not have one, 9566 peek into the line buffer of the parent lexing scope, going 9567 up as many levels as necessary to find one with a newline 9568 after bufptr. 9569 */ 9570 while (!(s = (char *)memchr( 9571 (void *)shared->ls_bufptr, '\n', 9572 SvEND(shared->ls_linestr)-shared->ls_bufptr 9573 ))) { 9574 shared = shared->ls_prev; 9575 /* shared is only null if we have gone beyond the outermost 9576 lexing scope. In a file, we will have broken out of the 9577 loop in the previous iteration. In an eval, the string buf- 9578 fer ends with "\n;", so the while condition above will have 9579 evaluated to false. So shared can never be null. Or so you 9580 might think. Odd syntax errors like s;@{<<; can gobble up 9581 the implicit semicolon at the end of a flie, causing the 9582 file handle to be closed even when we are not in a string 9583 eval. So shared may be null in that case. */ 9584 if (UNLIKELY(!shared)) 9585 goto interminable; 9586 /* A LEXSHARED struct with a null ls_prev pointer is the outer- 9587 most lexing scope. In a file, shared->ls_linestr at that 9588 level is just one line, so there is no body to steal. */ 9589 if (infile && !shared->ls_prev) { 9590 s = olds; 9591 goto streaming; 9592 } 9593 } 9594 else { /* eval or we've already hit EOF */ 9595 s = (char*)memchr((void*)s, '\n', PL_bufend - s); 9596 if (!s) 9597 goto interminable; 9598 } 9599 linestr = shared->ls_linestr; 9600 bufend = SvEND(linestr); 9601 d = s; 9602 while (s < bufend - len + 1 9603 && memNE(s,PL_tokenbuf,len) ) 9604 { 9605 if (*s++ == '\n') 9606 ++PL_parser->herelines; 9607 } 9608 if (s >= bufend - len + 1) { 9609 goto interminable; 9610 } 9611 sv_setpvn(tmpstr,d+1,s-d); 9612 s += len - 1; 9613 /* the preceding stmt passes a newline */ 9614 PL_parser->herelines++; 9615 9616 /* s now points to the newline after the heredoc terminator. 9617 d points to the newline before the body of the heredoc. 9618 */ 9619 9620 /* We are going to modify linestr in place here, so set 9621 aside copies of the string if necessary for re-evals or 9622 (caller $n)[6]. */ 9623 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we 9624 check shared->re_eval_str. */ 9625 if (shared->re_eval_start || shared->re_eval_str) { 9626 /* Set aside the rest of the regexp */ 9627 if (!shared->re_eval_str) 9628 shared->re_eval_str = 9629 newSVpvn(shared->re_eval_start, 9630 bufend - shared->re_eval_start); 9631 shared->re_eval_start -= s-d; 9632 } 9633 if (cxstack_ix >= 0 9634 && CxTYPE(cx) == CXt_EVAL 9635 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL 9636 && cx->blk_eval.cur_text == linestr) 9637 { 9638 cx->blk_eval.cur_text = newSVsv(linestr); 9639 SvSCREAM_on(cx->blk_eval.cur_text); 9640 } 9641 /* Copy everything from s onwards back to d. */ 9642 Move(s,d,bufend-s + 1,char); 9643 SvCUR_set(linestr, SvCUR(linestr) - (s-d)); 9644 /* Setting PL_bufend only applies when we have not dug deeper 9645 into other scopes, because sublex_done sets PL_bufend to 9646 SvEND(PL_linestr). */ 9647 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr); 9648 s = olds; 9649 } 9650 else 9651 { 9652 SV *linestr_save; 9653 char *oldbufptr_save; 9654 streaming: 9655 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */ 9656 term = PL_tokenbuf[1]; 9657 len--; 9658 linestr_save = PL_linestr; /* must restore this afterwards */ 9659 d = s; /* and this */ 9660 oldbufptr_save = PL_oldbufptr; 9661 PL_linestr = newSVpvs(""); 9662 PL_bufend = SvPVX(PL_linestr); 9663 while (1) { 9664 PL_bufptr = PL_bufend; 9665 CopLINE_set(PL_curcop, 9666 origline + 1 + PL_parser->herelines); 9667 if (!lex_next_chunk(LEX_NO_TERM) 9668 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) { 9669 /* Simply freeing linestr_save might seem simpler here, as it 9670 does not matter what PL_linestr points to, since we are 9671 about to croak; but in a quote-like op, linestr_save 9672 will have been prospectively freed already, via 9673 SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to 9674 restore PL_linestr. */ 9675 SvREFCNT_dec_NN(PL_linestr); 9676 PL_linestr = linestr_save; 9677 PL_oldbufptr = oldbufptr_save; 9678 goto interminable; 9679 } 9680 CopLINE_set(PL_curcop, origline); 9681 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') { 9682 s = lex_grow_linestr(SvLEN(PL_linestr) + 3); 9683 /* ^That should be enough to avoid this needing to grow: */ 9684 sv_catpvs(PL_linestr, "\n\0"); 9685 assert(s == SvPVX(PL_linestr)); 9686 PL_bufend = SvEND(PL_linestr); 9687 } 9688 s = PL_bufptr; 9689 PL_parser->herelines++; 9690 PL_last_lop = PL_last_uni = NULL; 9691 #ifndef PERL_STRICT_CR 9692 if (PL_bufend - PL_linestart >= 2) { 9693 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') 9694 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r')) 9695 { 9696 PL_bufend[-2] = '\n'; 9697 PL_bufend--; 9698 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 9699 } 9700 else if (PL_bufend[-1] == '\r') 9701 PL_bufend[-1] = '\n'; 9702 } 9703 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') 9704 PL_bufend[-1] = '\n'; 9705 #endif 9706 if (*s == term && PL_bufend-s >= len 9707 && memEQ(s,PL_tokenbuf + 1,len)) { 9708 SvREFCNT_dec(PL_linestr); 9709 PL_linestr = linestr_save; 9710 PL_linestart = SvPVX(linestr_save); 9711 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 9712 PL_oldbufptr = oldbufptr_save; 9713 s = d; 9714 break; 9715 } 9716 else { 9717 sv_catsv(tmpstr,PL_linestr); 9718 } 9719 } 9720 } 9721 PL_multi_end = origline + PL_parser->herelines; 9722 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) { 9723 SvPV_shrink_to_cur(tmpstr); 9724 } 9725 if (!IN_BYTES) { 9726 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) 9727 SvUTF8_on(tmpstr); 9728 else if (IN_ENCODING) 9729 sv_recode_to_utf8(tmpstr, _get_encoding()); 9730 } 9731 PL_lex_stuff = tmpstr; 9732 pl_yylval.ival = op_type; 9733 return s; 9734 9735 interminable: 9736 SvREFCNT_dec(tmpstr); 9737 CopLINE_set(PL_curcop, origline); 9738 missingterm(PL_tokenbuf + 1); 9739 } 9740 9741 /* scan_inputsymbol 9742 takes: current position in input buffer 9743 returns: new position in input buffer 9744 side-effects: pl_yylval and lex_op are set. 9745 9746 This code handles: 9747 9748 <> read from ARGV 9749 <<>> read from ARGV without magic open 9750 <FH> read from filehandle 9751 <pkg::FH> read from package qualified filehandle 9752 <pkg'FH> read from package qualified filehandle 9753 <$fh> read from filehandle in $fh 9754 <*.h> filename glob 9755 9756 */ 9757 9758 STATIC char * 9759 S_scan_inputsymbol(pTHX_ char *start) 9760 { 9761 char *s = start; /* current position in buffer */ 9762 char *end; 9763 I32 len; 9764 bool nomagicopen = FALSE; 9765 char *d = PL_tokenbuf; /* start of temp holding space */ 9766 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */ 9767 9768 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL; 9769 9770 end = strchr(s, '\n'); 9771 if (!end) 9772 end = PL_bufend; 9773 if (s[1] == '<' && s[2] == '>' && s[3] == '>') { 9774 nomagicopen = TRUE; 9775 *d = '\0'; 9776 len = 0; 9777 s += 3; 9778 } 9779 else 9780 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */ 9781 9782 /* die if we didn't have space for the contents of the <>, 9783 or if it didn't end, or if we see a newline 9784 */ 9785 9786 if (len >= (I32)sizeof PL_tokenbuf) 9787 Perl_croak(aTHX_ "Excessively long <> operator"); 9788 if (s >= end) 9789 Perl_croak(aTHX_ "Unterminated <> operator"); 9790 9791 s++; 9792 9793 /* check for <$fh> 9794 Remember, only scalar variables are interpreted as filehandles by 9795 this code. Anything more complex (e.g., <$fh{$num}>) will be 9796 treated as a glob() call. 9797 This code makes use of the fact that except for the $ at the front, 9798 a scalar variable and a filehandle look the same. 9799 */ 9800 if (*d == '$' && d[1]) d++; 9801 9802 /* allow <Pkg'VALUE> or <Pkg::VALUE> */ 9803 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':')) 9804 d += UTF ? UTF8SKIP(d) : 1; 9805 9806 /* If we've tried to read what we allow filehandles to look like, and 9807 there's still text left, then it must be a glob() and not a getline. 9808 Use scan_str to pull out the stuff between the <> and treat it 9809 as nothing more than a string. 9810 */ 9811 9812 if (d - PL_tokenbuf != len) { 9813 pl_yylval.ival = OP_GLOB; 9814 s = scan_str(start,FALSE,FALSE,FALSE,NULL); 9815 if (!s) 9816 Perl_croak(aTHX_ "Glob not terminated"); 9817 return s; 9818 } 9819 else { 9820 bool readline_overriden = FALSE; 9821 GV *gv_readline; 9822 /* we're in a filehandle read situation */ 9823 d = PL_tokenbuf; 9824 9825 /* turn <> into <ARGV> */ 9826 if (!len) 9827 Copy("ARGV",d,5,char); 9828 9829 /* Check whether readline() is overriden */ 9830 if ((gv_readline = gv_override("readline",8))) 9831 readline_overriden = TRUE; 9832 9833 /* if <$fh>, create the ops to turn the variable into a 9834 filehandle 9835 */ 9836 if (*d == '$') { 9837 /* try to find it in the pad for this block, otherwise find 9838 add symbol table ops 9839 */ 9840 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0); 9841 if (tmp != NOT_IN_PAD) { 9842 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 9843 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 9844 HEK * const stashname = HvNAME_HEK(stash); 9845 SV * const sym = sv_2mortal(newSVhek(stashname)); 9846 sv_catpvs(sym, "::"); 9847 sv_catpv(sym, d+1); 9848 d = SvPVX(sym); 9849 goto intro_sym; 9850 } 9851 else { 9852 OP * const o = newOP(OP_PADSV, 0); 9853 o->op_targ = tmp; 9854 PL_lex_op = readline_overriden 9855 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 9856 op_append_elem(OP_LIST, o, 9857 newCVREF(0, newGVOP(OP_GV,0,gv_readline)))) 9858 : (OP*)newUNOP(OP_READLINE, 0, o); 9859 } 9860 } 9861 else { 9862 GV *gv; 9863 ++d; 9864 intro_sym: 9865 gv = gv_fetchpv(d, 9866 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ), 9867 SVt_PV); 9868 PL_lex_op = readline_overriden 9869 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 9870 op_append_elem(OP_LIST, 9871 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)), 9872 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 9873 : (OP*)newUNOP(OP_READLINE, 0, 9874 newUNOP(OP_RV2SV, 0, 9875 newGVOP(OP_GV, 0, gv))); 9876 } 9877 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */ 9878 pl_yylval.ival = OP_NULL; 9879 } 9880 9881 /* If it's none of the above, it must be a literal filehandle 9882 (<Foo::BAR> or <FOO>) so build a simple readline OP */ 9883 else { 9884 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO); 9885 PL_lex_op = readline_overriden 9886 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 9887 op_append_elem(OP_LIST, 9888 newGVOP(OP_GV, 0, gv), 9889 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 9890 : (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv)); 9891 pl_yylval.ival = OP_NULL; 9892 } 9893 } 9894 9895 return s; 9896 } 9897 9898 9899 /* scan_str 9900 takes: 9901 start position in buffer 9902 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but 9903 only if they are of the open/close form 9904 keep_delims preserve the delimiters around the string 9905 re_reparse compiling a run-time /(?{})/: 9906 collapse // to /, and skip encoding src 9907 delimp if non-null, this is set to the position of 9908 the closing delimiter, or just after it if 9909 the closing and opening delimiters differ 9910 (i.e., the opening delimiter of a substitu- 9911 tion replacement) 9912 returns: position to continue reading from buffer 9913 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and 9914 updates the read buffer. 9915 9916 This subroutine pulls a string out of the input. It is called for: 9917 q single quotes q(literal text) 9918 ' single quotes 'literal text' 9919 qq double quotes qq(interpolate $here please) 9920 " double quotes "interpolate $here please" 9921 qx backticks qx(/bin/ls -l) 9922 ` backticks `/bin/ls -l` 9923 qw quote words @EXPORT_OK = qw( func() $spam ) 9924 m// regexp match m/this/ 9925 s/// regexp substitute s/this/that/ 9926 tr/// string transliterate tr/this/that/ 9927 y/// string transliterate y/this/that/ 9928 ($*@) sub prototypes sub foo ($) 9929 (stuff) sub attr parameters sub foo : attr(stuff) 9930 <> readline or globs <FOO>, <>, <$fh>, or <*.c> 9931 9932 In most of these cases (all but <>, patterns and transliterate) 9933 yylex() calls scan_str(). m// makes yylex() call scan_pat() which 9934 calls scan_str(). s/// makes yylex() call scan_subst() which calls 9935 scan_str(). tr/// and y/// make yylex() call scan_trans() which 9936 calls scan_str(). 9937 9938 It skips whitespace before the string starts, and treats the first 9939 character as the delimiter. If the delimiter is one of ([{< then 9940 the corresponding "close" character )]}> is used as the closing 9941 delimiter. It allows quoting of delimiters, and if the string has 9942 balanced delimiters ([{<>}]) it allows nesting. 9943 9944 On success, the SV with the resulting string is put into lex_stuff or, 9945 if that is already non-NULL, into lex_repl. The second case occurs only 9946 when parsing the RHS of the special constructs s/// and tr/// (y///). 9947 For convenience, the terminating delimiter character is stuffed into 9948 SvIVX of the SV. 9949 */ 9950 9951 STATIC char * 9952 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse, 9953 char **delimp 9954 ) 9955 { 9956 SV *sv; /* scalar value: string */ 9957 const char *tmps; /* temp string, used for delimiter matching */ 9958 char *s = start; /* current position in the buffer */ 9959 char term; /* terminating character */ 9960 char *to; /* current position in the sv's data */ 9961 I32 brackets = 1; /* bracket nesting level */ 9962 bool has_utf8 = FALSE; /* is there any utf8 content? */ 9963 I32 termcode; /* terminating char. code */ 9964 U8 termstr[UTF8_MAXBYTES]; /* terminating string */ 9965 STRLEN termlen; /* length of terminating string */ 9966 int last_off = 0; /* last position for nesting bracket */ 9967 line_t herelines; 9968 9969 PERL_ARGS_ASSERT_SCAN_STR; 9970 9971 /* skip space before the delimiter */ 9972 if (isSPACE(*s)) { 9973 s = skipspace(s); 9974 } 9975 9976 /* mark where we are, in case we need to report errors */ 9977 CLINE; 9978 9979 /* after skipping whitespace, the next character is the terminator */ 9980 term = *s; 9981 if (!UTF) { 9982 termcode = termstr[0] = term; 9983 termlen = 1; 9984 } 9985 else { 9986 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen); 9987 Copy(s, termstr, termlen, U8); 9988 if (!UTF8_IS_INVARIANT(term)) 9989 has_utf8 = TRUE; 9990 } 9991 9992 /* mark where we are */ 9993 PL_multi_start = CopLINE(PL_curcop); 9994 PL_multi_open = term; 9995 herelines = PL_parser->herelines; 9996 9997 /* find corresponding closing delimiter */ 9998 if (term && (tmps = strchr("([{< )]}> )]}>",term))) 9999 termcode = termstr[0] = term = tmps[5]; 10000 10001 PL_multi_close = term; 10002 10003 if (PL_multi_open == PL_multi_close) { 10004 keep_bracketed_quoted = FALSE; 10005 } 10006 10007 /* create a new SV to hold the contents. 79 is the SV's initial length. 10008 What a random number. */ 10009 sv = newSV_type(SVt_PVIV); 10010 SvGROW(sv, 80); 10011 SvIV_set(sv, termcode); 10012 (void)SvPOK_only(sv); /* validate pointer */ 10013 10014 /* move past delimiter and try to read a complete string */ 10015 if (keep_delims) 10016 sv_catpvn(sv, s, termlen); 10017 s += termlen; 10018 for (;;) { 10019 if (IN_ENCODING && !UTF && !re_reparse) { 10020 bool cont = TRUE; 10021 10022 while (cont) { 10023 int offset = s - SvPVX_const(PL_linestr); 10024 const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr, 10025 &offset, (char*)termstr, termlen); 10026 const char *ns; 10027 char *svlast; 10028 10029 if (SvIsCOW(PL_linestr)) { 10030 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos; 10031 STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos; 10032 STRLEN last_lop_pos, re_eval_start_pos, s_pos; 10033 char *buf = SvPVX(PL_linestr); 10034 bufend_pos = PL_parser->bufend - buf; 10035 bufptr_pos = PL_parser->bufptr - buf; 10036 oldbufptr_pos = PL_parser->oldbufptr - buf; 10037 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 10038 linestart_pos = PL_parser->linestart - buf; 10039 last_uni_pos = PL_parser->last_uni 10040 ? PL_parser->last_uni - buf 10041 : 0; 10042 last_lop_pos = PL_parser->last_lop 10043 ? PL_parser->last_lop - buf 10044 : 0; 10045 re_eval_start_pos = 10046 PL_parser->lex_shared->re_eval_start ? 10047 PL_parser->lex_shared->re_eval_start - buf : 0; 10048 s_pos = s - buf; 10049 10050 sv_force_normal(PL_linestr); 10051 10052 buf = SvPVX(PL_linestr); 10053 PL_parser->bufend = buf + bufend_pos; 10054 PL_parser->bufptr = buf + bufptr_pos; 10055 PL_parser->oldbufptr = buf + oldbufptr_pos; 10056 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 10057 PL_parser->linestart = buf + linestart_pos; 10058 if (PL_parser->last_uni) 10059 PL_parser->last_uni = buf + last_uni_pos; 10060 if (PL_parser->last_lop) 10061 PL_parser->last_lop = buf + last_lop_pos; 10062 if (PL_parser->lex_shared->re_eval_start) 10063 PL_parser->lex_shared->re_eval_start = 10064 buf + re_eval_start_pos; 10065 s = buf + s_pos; 10066 } 10067 ns = SvPVX_const(PL_linestr) + offset; 10068 svlast = SvEND(sv) - 1; 10069 10070 for (; s < ns; s++) { 10071 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 10072 COPLINE_INC_WITH_HERELINES; 10073 } 10074 if (!found) 10075 goto read_more_line; 10076 else { 10077 /* handle quoted delimiters */ 10078 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') { 10079 const char *t; 10080 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';) 10081 t--; 10082 if ((svlast-1 - t) % 2) { 10083 if (!keep_bracketed_quoted) { 10084 *(svlast-1) = term; 10085 *svlast = '\0'; 10086 SvCUR_set(sv, SvCUR(sv) - 1); 10087 } 10088 continue; 10089 } 10090 } 10091 if (PL_multi_open == PL_multi_close) { 10092 cont = FALSE; 10093 } 10094 else { 10095 const char *t; 10096 char *w; 10097 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) { 10098 /* At here, all closes are "was quoted" one, 10099 so we don't check PL_multi_close. */ 10100 if (*t == '\\') { 10101 if (!keep_bracketed_quoted && *(t+1) == PL_multi_open) 10102 t++; 10103 else 10104 *w++ = *t++; 10105 } 10106 else if (*t == PL_multi_open) 10107 brackets++; 10108 10109 *w = *t; 10110 } 10111 if (w < t) { 10112 *w++ = term; 10113 *w = '\0'; 10114 SvCUR_set(sv, w - SvPVX_const(sv)); 10115 } 10116 last_off = w - SvPVX(sv); 10117 if (--brackets <= 0) 10118 cont = FALSE; 10119 } 10120 } 10121 } 10122 if (!keep_delims) { 10123 SvCUR_set(sv, SvCUR(sv) - 1); 10124 *SvEND(sv) = '\0'; 10125 } 10126 break; 10127 } 10128 10129 /* extend sv if need be */ 10130 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); 10131 /* set 'to' to the next character in the sv's string */ 10132 to = SvPVX(sv)+SvCUR(sv); 10133 10134 /* if open delimiter is the close delimiter read unbridle */ 10135 if (PL_multi_open == PL_multi_close) { 10136 for (; s < PL_bufend; s++,to++) { 10137 /* embedded newlines increment the current line number */ 10138 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 10139 COPLINE_INC_WITH_HERELINES; 10140 /* handle quoted delimiters */ 10141 if (*s == '\\' && s+1 < PL_bufend && term != '\\') { 10142 if (!keep_bracketed_quoted 10143 && (s[1] == term 10144 || (re_reparse && s[1] == '\\')) 10145 ) 10146 s++; 10147 else /* any other quotes are simply copied straight through */ 10148 *to++ = *s++; 10149 } 10150 /* terminate when run out of buffer (the for() condition), or 10151 have found the terminator */ 10152 else if (*s == term) { 10153 if (termlen == 1) 10154 break; 10155 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen)) 10156 break; 10157 } 10158 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) 10159 has_utf8 = TRUE; 10160 *to = *s; 10161 } 10162 } 10163 10164 /* if the terminator isn't the same as the start character (e.g., 10165 matched brackets), we have to allow more in the quoting, and 10166 be prepared for nested brackets. 10167 */ 10168 else { 10169 /* read until we run out of string, or we find the terminator */ 10170 for (; s < PL_bufend; s++,to++) { 10171 /* embedded newlines increment the line count */ 10172 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 10173 COPLINE_INC_WITH_HERELINES; 10174 /* backslashes can escape the open or closing characters */ 10175 if (*s == '\\' && s+1 < PL_bufend) { 10176 if (!keep_bracketed_quoted 10177 && ((s[1] == PL_multi_open) || (s[1] == PL_multi_close))) 10178 { 10179 s++; 10180 } 10181 else 10182 *to++ = *s++; 10183 } 10184 /* allow nested opens and closes */ 10185 else if (*s == PL_multi_close && --brackets <= 0) 10186 break; 10187 else if (*s == PL_multi_open) 10188 brackets++; 10189 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) 10190 has_utf8 = TRUE; 10191 *to = *s; 10192 } 10193 } 10194 /* terminate the copied string and update the sv's end-of-string */ 10195 *to = '\0'; 10196 SvCUR_set(sv, to - SvPVX_const(sv)); 10197 10198 /* 10199 * this next chunk reads more into the buffer if we're not done yet 10200 */ 10201 10202 if (s < PL_bufend) 10203 break; /* handle case where we are done yet :-) */ 10204 10205 #ifndef PERL_STRICT_CR 10206 if (to - SvPVX_const(sv) >= 2) { 10207 if ( (to[-2] == '\r' && to[-1] == '\n') 10208 || (to[-2] == '\n' && to[-1] == '\r')) 10209 { 10210 to[-2] = '\n'; 10211 to--; 10212 SvCUR_set(sv, to - SvPVX_const(sv)); 10213 } 10214 else if (to[-1] == '\r') 10215 to[-1] = '\n'; 10216 } 10217 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r') 10218 to[-1] = '\n'; 10219 #endif 10220 10221 read_more_line: 10222 /* if we're out of file, or a read fails, bail and reset the current 10223 line marker so we can report where the unterminated string began 10224 */ 10225 COPLINE_INC_WITH_HERELINES; 10226 PL_bufptr = PL_bufend; 10227 if (!lex_next_chunk(0)) { 10228 sv_free(sv); 10229 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 10230 return NULL; 10231 } 10232 s = PL_bufptr; 10233 } 10234 10235 /* at this point, we have successfully read the delimited string */ 10236 10237 if (!IN_ENCODING || UTF || re_reparse) { 10238 10239 if (keep_delims) 10240 sv_catpvn(sv, s, termlen); 10241 s += termlen; 10242 } 10243 if (has_utf8 || (IN_ENCODING && !re_reparse)) 10244 SvUTF8_on(sv); 10245 10246 PL_multi_end = CopLINE(PL_curcop); 10247 CopLINE_set(PL_curcop, PL_multi_start); 10248 PL_parser->herelines = herelines; 10249 10250 /* if we allocated too much space, give some back */ 10251 if (SvCUR(sv) + 5 < SvLEN(sv)) { 10252 SvLEN_set(sv, SvCUR(sv) + 1); 10253 SvPV_renew(sv, SvLEN(sv)); 10254 } 10255 10256 /* decide whether this is the first or second quoted string we've read 10257 for this op 10258 */ 10259 10260 if (PL_lex_stuff) 10261 PL_sublex_info.repl = sv; 10262 else 10263 PL_lex_stuff = sv; 10264 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s; 10265 return s; 10266 } 10267 10268 /* 10269 scan_num 10270 takes: pointer to position in buffer 10271 returns: pointer to new position in buffer 10272 side-effects: builds ops for the constant in pl_yylval.op 10273 10274 Read a number in any of the formats that Perl accepts: 10275 10276 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12. 10277 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34 10278 0b[01](_?[01])* binary integers 10279 0[0-7](_?[0-7])* octal integers 10280 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers 10281 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats 10282 10283 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the 10284 thing it reads. 10285 10286 If it reads a number without a decimal point or an exponent, it will 10287 try converting the number to an integer and see if it can do so 10288 without loss of precision. 10289 */ 10290 10291 char * 10292 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) 10293 { 10294 const char *s = start; /* current position in buffer */ 10295 char *d; /* destination in temp buffer */ 10296 char *e; /* end of temp buffer */ 10297 NV nv; /* number read, as a double */ 10298 SV *sv = NULL; /* place to put the converted number */ 10299 bool floatit; /* boolean: int or float? */ 10300 const char *lastub = NULL; /* position of last underbar */ 10301 static const char* const number_too_long = "Number too long"; 10302 /* Hexadecimal floating point. 10303 * 10304 * In many places (where we have quads and NV is IEEE 754 double) 10305 * we can fit the mantissa bits of a NV into an unsigned quad. 10306 * (Note that UVs might not be quads even when we have quads.) 10307 * This will not work everywhere, though (either no quads, or 10308 * using long doubles), in which case we have to resort to NV, 10309 * which will probably mean horrible loss of precision due to 10310 * multiple fp operations. */ 10311 bool hexfp = FALSE; 10312 int total_bits = 0; 10313 int significant_bits = 0; 10314 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t) 10315 # define HEXFP_UQUAD 10316 Uquad_t hexfp_uquad = 0; 10317 int hexfp_frac_bits = 0; 10318 #else 10319 # define HEXFP_NV 10320 NV hexfp_nv = 0.0; 10321 #endif 10322 NV hexfp_mult = 1.0; 10323 UV high_non_zero = 0; /* highest digit */ 10324 int non_zero_integer_digits = 0; 10325 10326 PERL_ARGS_ASSERT_SCAN_NUM; 10327 10328 /* We use the first character to decide what type of number this is */ 10329 10330 switch (*s) { 10331 default: 10332 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s); 10333 10334 /* if it starts with a 0, it could be an octal number, a decimal in 10335 0.13 disguise, or a hexadecimal number, or a binary number. */ 10336 case '0': 10337 { 10338 /* variables: 10339 u holds the "number so far" 10340 shift the power of 2 of the base 10341 (hex == 4, octal == 3, binary == 1) 10342 overflowed was the number more than we can hold? 10343 10344 Shift is used when we add a digit. It also serves as an "are 10345 we in octal/hex/binary?" indicator to disallow hex characters 10346 when in octal mode. 10347 */ 10348 NV n = 0.0; 10349 UV u = 0; 10350 I32 shift; 10351 bool overflowed = FALSE; 10352 bool just_zero = TRUE; /* just plain 0 or binary number? */ 10353 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 }; 10354 static const char* const bases[5] = 10355 { "", "binary", "", "octal", "hexadecimal" }; 10356 static const char* const Bases[5] = 10357 { "", "Binary", "", "Octal", "Hexadecimal" }; 10358 static const char* const maxima[5] = 10359 { "", 10360 "0b11111111111111111111111111111111", 10361 "", 10362 "037777777777", 10363 "0xffffffff" }; 10364 const char *base, *Base, *max; 10365 10366 /* check for hex */ 10367 if (isALPHA_FOLD_EQ(s[1], 'x')) { 10368 shift = 4; 10369 s += 2; 10370 just_zero = FALSE; 10371 } else if (isALPHA_FOLD_EQ(s[1], 'b')) { 10372 shift = 1; 10373 s += 2; 10374 just_zero = FALSE; 10375 } 10376 /* check for a decimal in disguise */ 10377 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e')) 10378 goto decimal; 10379 /* so it must be octal */ 10380 else { 10381 shift = 3; 10382 s++; 10383 } 10384 10385 if (*s == '_') { 10386 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10387 "Misplaced _ in number"); 10388 lastub = s++; 10389 } 10390 10391 base = bases[shift]; 10392 Base = Bases[shift]; 10393 max = maxima[shift]; 10394 10395 /* read the rest of the number */ 10396 for (;;) { 10397 /* x is used in the overflow test, 10398 b is the digit we're adding on. */ 10399 UV x, b; 10400 10401 switch (*s) { 10402 10403 /* if we don't mention it, we're done */ 10404 default: 10405 goto out; 10406 10407 /* _ are ignored -- but warned about if consecutive */ 10408 case '_': 10409 if (lastub && s == lastub + 1) 10410 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10411 "Misplaced _ in number"); 10412 lastub = s++; 10413 break; 10414 10415 /* 8 and 9 are not octal */ 10416 case '8': case '9': 10417 if (shift == 3) 10418 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s)); 10419 /* FALLTHROUGH */ 10420 10421 /* octal digits */ 10422 case '2': case '3': case '4': 10423 case '5': case '6': case '7': 10424 if (shift == 1) 10425 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s)); 10426 /* FALLTHROUGH */ 10427 10428 case '0': case '1': 10429 b = *s++ & 15; /* ASCII digit -> value of digit */ 10430 goto digit; 10431 10432 /* hex digits */ 10433 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 10434 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 10435 /* make sure they said 0x */ 10436 if (shift != 4) 10437 goto out; 10438 b = (*s++ & 7) + 9; 10439 10440 /* Prepare to put the digit we have onto the end 10441 of the number so far. We check for overflows. 10442 */ 10443 10444 digit: 10445 just_zero = FALSE; 10446 if (!overflowed) { 10447 x = u << shift; /* make room for the digit */ 10448 10449 total_bits += shift; 10450 10451 if ((x >> shift) != u 10452 && !(PL_hints & HINT_NEW_BINARY)) { 10453 overflowed = TRUE; 10454 n = (NV) u; 10455 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 10456 "Integer overflow in %s number", 10457 base); 10458 } else 10459 u = x | b; /* add the digit to the end */ 10460 } 10461 if (overflowed) { 10462 n *= nvshift[shift]; 10463 /* If an NV has not enough bits in its 10464 * mantissa to represent an UV this summing of 10465 * small low-order numbers is a waste of time 10466 * (because the NV cannot preserve the 10467 * low-order bits anyway): we could just 10468 * remember when did we overflow and in the 10469 * end just multiply n by the right 10470 * amount. */ 10471 n += (NV) b; 10472 } 10473 10474 if (high_non_zero == 0 && b > 0) 10475 high_non_zero = b; 10476 10477 if (high_non_zero) 10478 non_zero_integer_digits++; 10479 10480 /* this could be hexfp, but peek ahead 10481 * to avoid matching ".." */ 10482 if (UNLIKELY(HEXFP_PEEK(s))) { 10483 goto out; 10484 } 10485 10486 break; 10487 } 10488 } 10489 10490 /* if we get here, we had success: make a scalar value from 10491 the number. 10492 */ 10493 out: 10494 10495 /* final misplaced underbar check */ 10496 if (s[-1] == '_') { 10497 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); 10498 } 10499 10500 if (UNLIKELY(HEXFP_PEEK(s))) { 10501 /* Do sloppy (on the underbars) but quick detection 10502 * (and value construction) for hexfp, the decimal 10503 * detection will shortly be more thorough with the 10504 * underbar checks. */ 10505 const char* h = s; 10506 significant_bits = non_zero_integer_digits * shift; 10507 #ifdef HEXFP_UQUAD 10508 hexfp_uquad = u; 10509 #else /* HEXFP_NV */ 10510 hexfp_nv = u; 10511 #endif 10512 /* Ignore the leading zero bits of 10513 * the high (first) non-zero digit. */ 10514 if (high_non_zero) { 10515 if (high_non_zero < 0x8) 10516 significant_bits--; 10517 if (high_non_zero < 0x4) 10518 significant_bits--; 10519 if (high_non_zero < 0x2) 10520 significant_bits--; 10521 } 10522 10523 if (*h == '.') { 10524 #ifdef HEXFP_NV 10525 NV nv_mult = 1.0; 10526 #endif 10527 bool accumulate = TRUE; 10528 for (h++; (isXDIGIT(*h) || *h == '_'); h++) { 10529 if (isXDIGIT(*h)) { 10530 U8 b = XDIGIT_VALUE(*h); 10531 significant_bits += shift; 10532 #ifdef HEXFP_UQUAD 10533 if (accumulate) { 10534 if (significant_bits < NV_MANT_DIG) { 10535 /* We are in the long "run" of xdigits, 10536 * accumulate the full four bits. */ 10537 hexfp_uquad <<= shift; 10538 hexfp_uquad |= b; 10539 hexfp_frac_bits += shift; 10540 } else { 10541 /* We are at a hexdigit either at, 10542 * or straddling, the edge of mantissa. 10543 * We will try grabbing as many as 10544 * possible bits. */ 10545 int tail = 10546 significant_bits - NV_MANT_DIG; 10547 if (tail <= 0) 10548 tail += shift; 10549 hexfp_uquad <<= tail; 10550 hexfp_uquad |= b >> (shift - tail); 10551 hexfp_frac_bits += tail; 10552 10553 /* Ignore the trailing zero bits 10554 * of the last non-zero xdigit. 10555 * 10556 * The assumption here is that if 10557 * one has input of e.g. the xdigit 10558 * eight (0x8), there is only one 10559 * bit being input, not the full 10560 * four bits. Conversely, if one 10561 * specifies a zero xdigit, the 10562 * assumption is that one really 10563 * wants all those bits to be zero. */ 10564 if (b) { 10565 if ((b & 0x1) == 0x0) { 10566 significant_bits--; 10567 if ((b & 0x2) == 0x0) { 10568 significant_bits--; 10569 if ((b & 0x4) == 0x0) { 10570 significant_bits--; 10571 } 10572 } 10573 } 10574 } 10575 10576 accumulate = FALSE; 10577 } 10578 } else { 10579 /* Keep skipping the xdigits, and 10580 * accumulating the significant bits, 10581 * but do not shift the uquad 10582 * (which would catastrophically drop 10583 * high-order bits) or accumulate the 10584 * xdigits anymore. */ 10585 } 10586 #else /* HEXFP_NV */ 10587 if (accumulate) { 10588 nv_mult /= 16.0; 10589 if (nv_mult > 0.0) 10590 hexfp_nv += b * nv_mult; 10591 else 10592 accumulate = FALSE; 10593 } 10594 #endif 10595 } 10596 if (significant_bits >= NV_MANT_DIG) 10597 accumulate = FALSE; 10598 } 10599 } 10600 10601 if ((total_bits > 0 || significant_bits > 0) && 10602 isALPHA_FOLD_EQ(*h, 'p')) { 10603 bool negexp = FALSE; 10604 h++; 10605 if (*h == '+') 10606 h++; 10607 else if (*h == '-') { 10608 negexp = TRUE; 10609 h++; 10610 } 10611 if (isDIGIT(*h)) { 10612 I32 hexfp_exp = 0; 10613 while (isDIGIT(*h) || *h == '_') { 10614 if (isDIGIT(*h)) { 10615 hexfp_exp *= 10; 10616 hexfp_exp += *h - '0'; 10617 #ifdef NV_MIN_EXP 10618 if (negexp 10619 && -hexfp_exp < NV_MIN_EXP - 1) { 10620 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 10621 "Hexadecimal float: exponent underflow"); 10622 break; 10623 } 10624 #endif 10625 #ifdef NV_MAX_EXP 10626 if (!negexp 10627 && hexfp_exp > NV_MAX_EXP - 1) { 10628 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 10629 "Hexadecimal float: exponent overflow"); 10630 break; 10631 } 10632 #endif 10633 } 10634 h++; 10635 } 10636 if (negexp) 10637 hexfp_exp = -hexfp_exp; 10638 #ifdef HEXFP_UQUAD 10639 hexfp_exp -= hexfp_frac_bits; 10640 #endif 10641 hexfp_mult = pow(2.0, hexfp_exp); 10642 hexfp = TRUE; 10643 goto decimal; 10644 } 10645 } 10646 } 10647 10648 if (overflowed) { 10649 if (n > 4294967295.0) 10650 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 10651 "%s number > %s non-portable", 10652 Base, max); 10653 sv = newSVnv(n); 10654 } 10655 else { 10656 #if UVSIZE > 4 10657 if (u > 0xffffffff) 10658 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 10659 "%s number > %s non-portable", 10660 Base, max); 10661 #endif 10662 sv = newSVuv(u); 10663 } 10664 if (just_zero && (PL_hints & HINT_NEW_INTEGER)) 10665 sv = new_constant(start, s - start, "integer", 10666 sv, NULL, NULL, 0); 10667 else if (PL_hints & HINT_NEW_BINARY) 10668 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0); 10669 } 10670 break; 10671 10672 /* 10673 handle decimal numbers. 10674 we're also sent here when we read a 0 as the first digit 10675 */ 10676 case '1': case '2': case '3': case '4': case '5': 10677 case '6': case '7': case '8': case '9': case '.': 10678 decimal: 10679 d = PL_tokenbuf; 10680 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */ 10681 floatit = FALSE; 10682 if (hexfp) { 10683 floatit = TRUE; 10684 *d++ = '0'; 10685 *d++ = 'x'; 10686 s = start + 2; 10687 } 10688 10689 /* read next group of digits and _ and copy into d */ 10690 while (isDIGIT(*s) 10691 || *s == '_' 10692 || UNLIKELY(hexfp && isXDIGIT(*s))) 10693 { 10694 /* skip underscores, checking for misplaced ones 10695 if -w is on 10696 */ 10697 if (*s == '_') { 10698 if (lastub && s == lastub + 1) 10699 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10700 "Misplaced _ in number"); 10701 lastub = s++; 10702 } 10703 else { 10704 /* check for end of fixed-length buffer */ 10705 if (d >= e) 10706 Perl_croak(aTHX_ "%s", number_too_long); 10707 /* if we're ok, copy the character */ 10708 *d++ = *s++; 10709 } 10710 } 10711 10712 /* final misplaced underbar check */ 10713 if (lastub && s == lastub + 1) { 10714 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); 10715 } 10716 10717 /* read a decimal portion if there is one. avoid 10718 3..5 being interpreted as the number 3. followed 10719 by .5 10720 */ 10721 if (*s == '.' && s[1] != '.') { 10722 floatit = TRUE; 10723 *d++ = *s++; 10724 10725 if (*s == '_') { 10726 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10727 "Misplaced _ in number"); 10728 lastub = s; 10729 } 10730 10731 /* copy, ignoring underbars, until we run out of digits. 10732 */ 10733 for (; isDIGIT(*s) 10734 || *s == '_' 10735 || UNLIKELY(hexfp && isXDIGIT(*s)); 10736 s++) 10737 { 10738 /* fixed length buffer check */ 10739 if (d >= e) 10740 Perl_croak(aTHX_ "%s", number_too_long); 10741 if (*s == '_') { 10742 if (lastub && s == lastub + 1) 10743 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10744 "Misplaced _ in number"); 10745 lastub = s; 10746 } 10747 else 10748 *d++ = *s; 10749 } 10750 /* fractional part ending in underbar? */ 10751 if (s[-1] == '_') { 10752 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10753 "Misplaced _ in number"); 10754 } 10755 if (*s == '.' && isDIGIT(s[1])) { 10756 /* oops, it's really a v-string, but without the "v" */ 10757 s = start; 10758 goto vstring; 10759 } 10760 } 10761 10762 /* read exponent part, if present */ 10763 if ((isALPHA_FOLD_EQ(*s, 'e') 10764 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p'))) 10765 && strchr("+-0123456789_", s[1])) 10766 { 10767 floatit = TRUE; 10768 10769 /* regardless of whether user said 3E5 or 3e5, use lower 'e', 10770 ditto for p (hexfloats) */ 10771 if ((isALPHA_FOLD_EQ(*s, 'e'))) { 10772 /* At least some Mach atof()s don't grok 'E' */ 10773 *d++ = 'e'; 10774 } 10775 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) { 10776 *d++ = 'p'; 10777 } 10778 10779 s++; 10780 10781 10782 /* stray preinitial _ */ 10783 if (*s == '_') { 10784 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10785 "Misplaced _ in number"); 10786 lastub = s++; 10787 } 10788 10789 /* allow positive or negative exponent */ 10790 if (*s == '+' || *s == '-') 10791 *d++ = *s++; 10792 10793 /* stray initial _ */ 10794 if (*s == '_') { 10795 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10796 "Misplaced _ in number"); 10797 lastub = s++; 10798 } 10799 10800 /* read digits of exponent */ 10801 while (isDIGIT(*s) || *s == '_') { 10802 if (isDIGIT(*s)) { 10803 if (d >= e) 10804 Perl_croak(aTHX_ "%s", number_too_long); 10805 *d++ = *s++; 10806 } 10807 else { 10808 if (((lastub && s == lastub + 1) 10809 || (!isDIGIT(s[1]) && s[1] != '_'))) 10810 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10811 "Misplaced _ in number"); 10812 lastub = s++; 10813 } 10814 } 10815 } 10816 10817 10818 /* 10819 We try to do an integer conversion first if no characters 10820 indicating "float" have been found. 10821 */ 10822 10823 if (!floatit) { 10824 UV uv; 10825 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv); 10826 10827 if (flags == IS_NUMBER_IN_UV) { 10828 if (uv <= IV_MAX) 10829 sv = newSViv(uv); /* Prefer IVs over UVs. */ 10830 else 10831 sv = newSVuv(uv); 10832 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) { 10833 if (uv <= (UV) IV_MIN) 10834 sv = newSViv(-(IV)uv); 10835 else 10836 floatit = TRUE; 10837 } else 10838 floatit = TRUE; 10839 } 10840 if (floatit) { 10841 STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD(); 10842 /* terminate the string */ 10843 *d = '\0'; 10844 if (UNLIKELY(hexfp)) { 10845 # ifdef NV_MANT_DIG 10846 if (significant_bits > NV_MANT_DIG) 10847 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 10848 "Hexadecimal float: mantissa overflow"); 10849 # endif 10850 #ifdef HEXFP_UQUAD 10851 nv = hexfp_uquad * hexfp_mult; 10852 #else /* HEXFP_NV */ 10853 nv = hexfp_nv * hexfp_mult; 10854 #endif 10855 } else { 10856 nv = Atof(PL_tokenbuf); 10857 } 10858 RESTORE_LC_NUMERIC_UNDERLYING(); 10859 sv = newSVnv(nv); 10860 } 10861 10862 if ( floatit 10863 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) { 10864 const char *const key = floatit ? "float" : "integer"; 10865 const STRLEN keylen = floatit ? 5 : 7; 10866 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf, 10867 key, keylen, sv, NULL, NULL, 0); 10868 } 10869 break; 10870 10871 /* if it starts with a v, it could be a v-string */ 10872 case 'v': 10873 vstring: 10874 sv = newSV(5); /* preallocate storage space */ 10875 ENTER_with_name("scan_vstring"); 10876 SAVEFREESV(sv); 10877 s = scan_vstring(s, PL_bufend, sv); 10878 SvREFCNT_inc_simple_void_NN(sv); 10879 LEAVE_with_name("scan_vstring"); 10880 break; 10881 } 10882 10883 /* make the op for the constant and return */ 10884 10885 if (sv) 10886 lvalp->opval = newSVOP(OP_CONST, 0, sv); 10887 else 10888 lvalp->opval = NULL; 10889 10890 return (char *)s; 10891 } 10892 10893 STATIC char * 10894 S_scan_formline(pTHX_ char *s) 10895 { 10896 char *eol; 10897 char *t; 10898 SV * const stuff = newSVpvs(""); 10899 bool needargs = FALSE; 10900 bool eofmt = FALSE; 10901 10902 PERL_ARGS_ASSERT_SCAN_FORMLINE; 10903 10904 while (!needargs) { 10905 if (*s == '.') { 10906 t = s+1; 10907 #ifdef PERL_STRICT_CR 10908 while (SPACE_OR_TAB(*t)) 10909 t++; 10910 #else 10911 while (SPACE_OR_TAB(*t) || *t == '\r') 10912 t++; 10913 #endif 10914 if (*t == '\n' || t == PL_bufend) { 10915 eofmt = TRUE; 10916 break; 10917 } 10918 } 10919 eol = (char *) memchr(s,'\n',PL_bufend-s); 10920 if (!eol++) 10921 eol = PL_bufend; 10922 if (*s != '#') { 10923 for (t = s; t < eol; t++) { 10924 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { 10925 needargs = FALSE; 10926 goto enough; /* ~~ must be first line in formline */ 10927 } 10928 if (*t == '@' || *t == '^') 10929 needargs = TRUE; 10930 } 10931 if (eol > s) { 10932 sv_catpvn(stuff, s, eol-s); 10933 #ifndef PERL_STRICT_CR 10934 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') { 10935 char *end = SvPVX(stuff) + SvCUR(stuff); 10936 end[-2] = '\n'; 10937 end[-1] = '\0'; 10938 SvCUR_set(stuff, SvCUR(stuff) - 1); 10939 } 10940 #endif 10941 } 10942 else 10943 break; 10944 } 10945 s = (char*)eol; 10946 if ((PL_rsfp || PL_parser->filtered) 10947 && PL_parser->form_lex_state == LEX_NORMAL) { 10948 bool got_some; 10949 PL_bufptr = PL_bufend; 10950 COPLINE_INC_WITH_HERELINES; 10951 got_some = lex_next_chunk(0); 10952 CopLINE_dec(PL_curcop); 10953 s = PL_bufptr; 10954 if (!got_some) 10955 break; 10956 } 10957 incline(s); 10958 } 10959 enough: 10960 if (!SvCUR(stuff) || needargs) 10961 PL_lex_state = PL_parser->form_lex_state; 10962 if (SvCUR(stuff)) { 10963 PL_expect = XSTATE; 10964 if (needargs) { 10965 const char *s2 = s; 10966 while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f' 10967 || *s2 == '\v') 10968 s2++; 10969 if (*s2 == '{') { 10970 PL_expect = XTERMBLOCK; 10971 NEXTVAL_NEXTTOKE.ival = 0; 10972 force_next(DO); 10973 } 10974 NEXTVAL_NEXTTOKE.ival = 0; 10975 force_next(FORMLBRACK); 10976 } 10977 if (!IN_BYTES) { 10978 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff))) 10979 SvUTF8_on(stuff); 10980 else if (IN_ENCODING) 10981 sv_recode_to_utf8(stuff, _get_encoding()); 10982 } 10983 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff); 10984 force_next(THING); 10985 } 10986 else { 10987 SvREFCNT_dec(stuff); 10988 if (eofmt) 10989 PL_lex_formbrack = 0; 10990 } 10991 return s; 10992 } 10993 10994 I32 10995 Perl_start_subparse(pTHX_ I32 is_format, U32 flags) 10996 { 10997 const I32 oldsavestack_ix = PL_savestack_ix; 10998 CV* const outsidecv = PL_compcv; 10999 11000 SAVEI32(PL_subline); 11001 save_item(PL_subname); 11002 SAVESPTR(PL_compcv); 11003 11004 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV)); 11005 CvFLAGS(PL_compcv) |= flags; 11006 11007 PL_subline = CopLINE(PL_curcop); 11008 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB); 11009 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv)); 11010 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; 11011 if (outsidecv && CvPADLIST(outsidecv)) 11012 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id; 11013 11014 return oldsavestack_ix; 11015 } 11016 11017 static int 11018 S_yywarn(pTHX_ const char *const s, U32 flags) 11019 { 11020 PERL_ARGS_ASSERT_YYWARN; 11021 11022 PL_in_eval |= EVAL_WARNONLY; 11023 yyerror_pv(s, flags); 11024 return 0; 11025 } 11026 11027 int 11028 Perl_yyerror(pTHX_ const char *const s) 11029 { 11030 PERL_ARGS_ASSERT_YYERROR; 11031 return yyerror_pvn(s, strlen(s), 0); 11032 } 11033 11034 int 11035 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags) 11036 { 11037 PERL_ARGS_ASSERT_YYERROR_PV; 11038 return yyerror_pvn(s, strlen(s), flags); 11039 } 11040 11041 int 11042 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) 11043 { 11044 const char *context = NULL; 11045 int contlen = -1; 11046 SV *msg; 11047 SV * const where_sv = newSVpvs_flags("", SVs_TEMP); 11048 int yychar = PL_parser->yychar; 11049 11050 PERL_ARGS_ASSERT_YYERROR_PVN; 11051 11052 if (!yychar || (yychar == ';' && !PL_rsfp)) 11053 sv_catpvs(where_sv, "at EOF"); 11054 else if ( PL_oldoldbufptr 11055 && PL_bufptr > PL_oldoldbufptr 11056 && PL_bufptr - PL_oldoldbufptr < 200 11057 && PL_oldoldbufptr != PL_oldbufptr 11058 && PL_oldbufptr != PL_bufptr) 11059 { 11060 /* 11061 Only for NetWare: 11062 The code below is removed for NetWare because it abends/crashes on NetWare 11063 when the script has error such as not having the closing quotes like: 11064 if ($var eq "value) 11065 Checking of white spaces is anyway done in NetWare code. 11066 */ 11067 #ifndef NETWARE 11068 while (isSPACE(*PL_oldoldbufptr)) 11069 PL_oldoldbufptr++; 11070 #endif 11071 context = PL_oldoldbufptr; 11072 contlen = PL_bufptr - PL_oldoldbufptr; 11073 } 11074 else if ( PL_oldbufptr 11075 && PL_bufptr > PL_oldbufptr 11076 && PL_bufptr - PL_oldbufptr < 200 11077 && PL_oldbufptr != PL_bufptr) { 11078 /* 11079 Only for NetWare: 11080 The code below is removed for NetWare because it abends/crashes on NetWare 11081 when the script has error such as not having the closing quotes like: 11082 if ($var eq "value) 11083 Checking of white spaces is anyway done in NetWare code. 11084 */ 11085 #ifndef NETWARE 11086 while (isSPACE(*PL_oldbufptr)) 11087 PL_oldbufptr++; 11088 #endif 11089 context = PL_oldbufptr; 11090 contlen = PL_bufptr - PL_oldbufptr; 11091 } 11092 else if (yychar > 255) 11093 sv_catpvs(where_sv, "next token ???"); 11094 else if (yychar == YYEMPTY) { 11095 if ( PL_lex_state == LEX_NORMAL 11096 || (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL)) 11097 sv_catpvs(where_sv, "at end of line"); 11098 else if (PL_lex_inpat) 11099 sv_catpvs(where_sv, "within pattern"); 11100 else 11101 sv_catpvs(where_sv, "within string"); 11102 } 11103 else { 11104 sv_catpvs(where_sv, "next char "); 11105 if (yychar < 32) 11106 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); 11107 else if (isPRINT_LC(yychar)) { 11108 const char string = yychar; 11109 sv_catpvn(where_sv, &string, 1); 11110 } 11111 else 11112 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); 11113 } 11114 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP); 11115 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ", 11116 OutCopFILE(PL_curcop), 11117 (IV)(PL_parser->preambling == NOLINE 11118 ? CopLINE(PL_curcop) 11119 : PL_parser->preambling)); 11120 if (context) 11121 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n", 11122 UTF8fARG(UTF, contlen, context)); 11123 else 11124 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv)); 11125 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) { 11126 Perl_sv_catpvf(aTHX_ msg, 11127 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n", 11128 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start); 11129 PL_multi_end = 0; 11130 } 11131 if (PL_in_eval & EVAL_WARNONLY) { 11132 PL_in_eval &= ~EVAL_WARNONLY; 11133 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg)); 11134 } 11135 else 11136 qerror(msg); 11137 if (PL_error_count >= 10) { 11138 SV * errsv; 11139 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv))) 11140 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n", 11141 SVfARG(errsv), OutCopFILE(PL_curcop)); 11142 else 11143 Perl_croak(aTHX_ "%s has too many errors.\n", 11144 OutCopFILE(PL_curcop)); 11145 } 11146 PL_in_my = 0; 11147 PL_in_my_stash = NULL; 11148 return 0; 11149 } 11150 11151 STATIC char* 11152 S_swallow_bom(pTHX_ U8 *s) 11153 { 11154 const STRLEN slen = SvCUR(PL_linestr); 11155 11156 PERL_ARGS_ASSERT_SWALLOW_BOM; 11157 11158 switch (s[0]) { 11159 case 0xFF: 11160 if (s[1] == 0xFE) { 11161 /* UTF-16 little-endian? (or UTF-32LE?) */ 11162 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ 11163 /* diag_listed_as: Unsupported script encoding %s */ 11164 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE"); 11165 #ifndef PERL_NO_UTF16_FILTER 11166 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n"); 11167 s += 2; 11168 if (PL_bufend > (char*)s) { 11169 s = add_utf16_textfilter(s, TRUE); 11170 } 11171 #else 11172 /* diag_listed_as: Unsupported script encoding %s */ 11173 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 11174 #endif 11175 } 11176 break; 11177 case 0xFE: 11178 if (s[1] == 0xFF) { /* UTF-16 big-endian? */ 11179 #ifndef PERL_NO_UTF16_FILTER 11180 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); 11181 s += 2; 11182 if (PL_bufend > (char *)s) { 11183 s = add_utf16_textfilter(s, FALSE); 11184 } 11185 #else 11186 /* diag_listed_as: Unsupported script encoding %s */ 11187 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 11188 #endif 11189 } 11190 break; 11191 case BOM_UTF8_FIRST_BYTE: { 11192 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */ 11193 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) { 11194 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); 11195 s += len + 1; /* UTF-8 */ 11196 } 11197 break; 11198 } 11199 case 0: 11200 if (slen > 3) { 11201 if (s[1] == 0) { 11202 if (s[2] == 0xFE && s[3] == 0xFF) { 11203 /* UTF-32 big-endian */ 11204 /* diag_listed_as: Unsupported script encoding %s */ 11205 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE"); 11206 } 11207 } 11208 else if (s[2] == 0 && s[3] != 0) { 11209 /* Leading bytes 11210 * 00 xx 00 xx 11211 * are a good indicator of UTF-16BE. */ 11212 #ifndef PERL_NO_UTF16_FILTER 11213 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); 11214 s = add_utf16_textfilter(s, FALSE); 11215 #else 11216 /* diag_listed_as: Unsupported script encoding %s */ 11217 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 11218 #endif 11219 } 11220 } 11221 break; 11222 11223 default: 11224 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) { 11225 /* Leading bytes 11226 * xx 00 xx 00 11227 * are a good indicator of UTF-16LE. */ 11228 #ifndef PERL_NO_UTF16_FILTER 11229 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); 11230 s = add_utf16_textfilter(s, TRUE); 11231 #else 11232 /* diag_listed_as: Unsupported script encoding %s */ 11233 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 11234 #endif 11235 } 11236 } 11237 return (char*)s; 11238 } 11239 11240 11241 #ifndef PERL_NO_UTF16_FILTER 11242 static I32 11243 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) 11244 { 11245 SV *const filter = FILTER_DATA(idx); 11246 /* We re-use this each time round, throwing the contents away before we 11247 return. */ 11248 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter)); 11249 SV *const utf8_buffer = filter; 11250 IV status = IoPAGE(filter); 11251 const bool reverse = cBOOL(IoLINES(filter)); 11252 I32 retval; 11253 11254 PERL_ARGS_ASSERT_UTF16_TEXTFILTER; 11255 11256 /* As we're automatically added, at the lowest level, and hence only called 11257 from this file, we can be sure that we're not called in block mode. Hence 11258 don't bother writing code to deal with block mode. */ 11259 if (maxlen) { 11260 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen); 11261 } 11262 if (status < 0) { 11263 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status); 11264 } 11265 DEBUG_P(PerlIO_printf(Perl_debug_log, 11266 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", 11267 FPTR2DPTR(void *, S_utf16_textfilter), 11268 reverse ? 'l' : 'b', idx, maxlen, status, 11269 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 11270 11271 while (1) { 11272 STRLEN chars; 11273 STRLEN have; 11274 I32 newlen; 11275 U8 *end; 11276 /* First, look in our buffer of existing UTF-8 data: */ 11277 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer)); 11278 11279 if (nl) { 11280 ++nl; 11281 } else if (status == 0) { 11282 /* EOF */ 11283 IoPAGE(filter) = 0; 11284 nl = SvEND(utf8_buffer); 11285 } 11286 if (nl) { 11287 STRLEN got = nl - SvPVX(utf8_buffer); 11288 /* Did we have anything to append? */ 11289 retval = got != 0; 11290 sv_catpvn(sv, SvPVX(utf8_buffer), got); 11291 /* Everything else in this code works just fine if SVp_POK isn't 11292 set. This, however, needs it, and we need it to work, else 11293 we loop infinitely because the buffer is never consumed. */ 11294 sv_chop(utf8_buffer, nl); 11295 break; 11296 } 11297 11298 /* OK, not a complete line there, so need to read some more UTF-16. 11299 Read an extra octect if the buffer currently has an odd number. */ 11300 while (1) { 11301 if (status <= 0) 11302 break; 11303 if (SvCUR(utf16_buffer) >= 2) { 11304 /* Location of the high octet of the last complete code point. 11305 Gosh, UTF-16 is a pain. All the benefits of variable length, 11306 *coupled* with all the benefits of partial reads and 11307 endianness. */ 11308 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer) 11309 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2)); 11310 11311 if (*last_hi < 0xd8 || *last_hi > 0xdb) { 11312 break; 11313 } 11314 11315 /* We have the first half of a surrogate. Read more. */ 11316 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi)); 11317 } 11318 11319 status = FILTER_READ(idx + 1, utf16_buffer, 11320 160 + (SvCUR(utf16_buffer) & 1)); 11321 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer))); 11322 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);}); 11323 if (status < 0) { 11324 /* Error */ 11325 IoPAGE(filter) = status; 11326 return status; 11327 } 11328 } 11329 11330 chars = SvCUR(utf16_buffer) >> 1; 11331 have = SvCUR(utf8_buffer); 11332 SvGROW(utf8_buffer, have + chars * 3 + 1); 11333 11334 if (reverse) { 11335 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), 11336 (U8*)SvPVX_const(utf8_buffer) + have, 11337 chars * 2, &newlen); 11338 } else { 11339 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer), 11340 (U8*)SvPVX_const(utf8_buffer) + have, 11341 chars * 2, &newlen); 11342 } 11343 SvCUR_set(utf8_buffer, have + newlen); 11344 *end = '\0'; 11345 11346 /* No need to keep this SV "well-formed" with a '\0' after the end, as 11347 it's private to us, and utf16_to_utf8{,reversed} take a 11348 (pointer,length) pair, rather than a NUL-terminated string. */ 11349 if(SvCUR(utf16_buffer) & 1) { 11350 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1]; 11351 SvCUR_set(utf16_buffer, 1); 11352 } else { 11353 SvCUR_set(utf16_buffer, 0); 11354 } 11355 } 11356 DEBUG_P(PerlIO_printf(Perl_debug_log, 11357 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", 11358 status, 11359 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 11360 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);}); 11361 return retval; 11362 } 11363 11364 static U8 * 11365 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed) 11366 { 11367 SV *filter = filter_add(S_utf16_textfilter, NULL); 11368 11369 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER; 11370 11371 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s)); 11372 sv_setpvs(filter, ""); 11373 IoLINES(filter) = reversed; 11374 IoPAGE(filter) = 1; /* Not EOF */ 11375 11376 /* Sadly, we have to return a valid pointer, come what may, so we have to 11377 ignore any error return from this. */ 11378 SvCUR_set(PL_linestr, 0); 11379 if (FILTER_READ(0, PL_linestr, 0)) { 11380 SvUTF8_on(PL_linestr); 11381 } else { 11382 SvUTF8_on(PL_linestr); 11383 } 11384 PL_bufend = SvEND(PL_linestr); 11385 return (U8*)SvPVX(PL_linestr); 11386 } 11387 #endif 11388 11389 /* 11390 Returns a pointer to the next character after the parsed 11391 vstring, as well as updating the passed in sv. 11392 11393 Function must be called like 11394 11395 sv = sv_2mortal(newSV(5)); 11396 s = scan_vstring(s,e,sv); 11397 11398 where s and e are the start and end of the string. 11399 The sv should already be large enough to store the vstring 11400 passed in, for performance reasons. 11401 11402 This function may croak if fatal warnings are enabled in the 11403 calling scope, hence the sv_2mortal in the example (to prevent 11404 a leak). Make sure to do SvREFCNT_inc afterwards if you use 11405 sv_2mortal. 11406 11407 */ 11408 11409 char * 11410 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) 11411 { 11412 const char *pos = s; 11413 const char *start = s; 11414 11415 PERL_ARGS_ASSERT_SCAN_VSTRING; 11416 11417 if (*pos == 'v') pos++; /* get past 'v' */ 11418 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 11419 pos++; 11420 if ( *pos != '.') { 11421 /* this may not be a v-string if followed by => */ 11422 const char *next = pos; 11423 while (next < e && isSPACE(*next)) 11424 ++next; 11425 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) { 11426 /* return string not v-string */ 11427 sv_setpvn(sv,(char *)s,pos-s); 11428 return (char *)pos; 11429 } 11430 } 11431 11432 if (!isALPHA(*pos)) { 11433 U8 tmpbuf[UTF8_MAXBYTES+1]; 11434 11435 if (*s == 'v') 11436 s++; /* get past 'v' */ 11437 11438 sv_setpvs(sv, ""); 11439 11440 for (;;) { 11441 /* this is atoi() that tolerates underscores */ 11442 U8 *tmpend; 11443 UV rev = 0; 11444 const char *end = pos; 11445 UV mult = 1; 11446 while (--end >= s) { 11447 if (*end != '_') { 11448 const UV orev = rev; 11449 rev += (*end - '0') * mult; 11450 mult *= 10; 11451 if (orev > rev) 11452 /* diag_listed_as: Integer overflow in %s number */ 11453 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 11454 "Integer overflow in decimal number"); 11455 } 11456 } 11457 11458 /* Append native character for the rev point */ 11459 tmpend = uvchr_to_utf8(tmpbuf, rev); 11460 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); 11461 if (!UVCHR_IS_INVARIANT(rev)) 11462 SvUTF8_on(sv); 11463 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1])) 11464 s = ++pos; 11465 else { 11466 s = pos; 11467 break; 11468 } 11469 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 11470 pos++; 11471 } 11472 SvPOK_on(sv); 11473 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start); 11474 SvRMAGICAL_on(sv); 11475 } 11476 return (char *)s; 11477 } 11478 11479 int 11480 Perl_keyword_plugin_standard(pTHX_ 11481 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) 11482 { 11483 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD; 11484 PERL_UNUSED_CONTEXT; 11485 PERL_UNUSED_ARG(keyword_ptr); 11486 PERL_UNUSED_ARG(keyword_len); 11487 PERL_UNUSED_ARG(op_ptr); 11488 return KEYWORD_PLUGIN_DECLINE; 11489 } 11490 11491 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p) 11492 static void 11493 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof) 11494 { 11495 SAVEI32(PL_lex_brackets); 11496 if (PL_lex_brackets > 100) 11497 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 11498 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF; 11499 SAVEI32(PL_lex_allbrackets); 11500 PL_lex_allbrackets = 0; 11501 SAVEI8(PL_lex_fakeeof); 11502 PL_lex_fakeeof = (U8)fakeeof; 11503 if(yyparse(gramtype) && !PL_parser->error_count) 11504 qerror(Perl_mess(aTHX_ "Parse error")); 11505 } 11506 11507 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p) 11508 static OP * 11509 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof) 11510 { 11511 OP *o; 11512 ENTER; 11513 SAVEVPTR(PL_eval_root); 11514 PL_eval_root = NULL; 11515 parse_recdescent(gramtype, fakeeof); 11516 o = PL_eval_root; 11517 LEAVE; 11518 return o; 11519 } 11520 11521 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f) 11522 static OP * 11523 S_parse_expr(pTHX_ I32 fakeeof, U32 flags) 11524 { 11525 OP *exprop; 11526 if (flags & ~PARSE_OPTIONAL) 11527 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr"); 11528 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof); 11529 if (!exprop && !(flags & PARSE_OPTIONAL)) { 11530 if (!PL_parser->error_count) 11531 qerror(Perl_mess(aTHX_ "Parse error")); 11532 exprop = newOP(OP_NULL, 0); 11533 } 11534 return exprop; 11535 } 11536 11537 /* 11538 =for apidoc Amx|OP *|parse_arithexpr|U32 flags 11539 11540 Parse a Perl arithmetic expression. This may contain operators of precedence 11541 down to the bit shift operators. The expression must be followed (and thus 11542 terminated) either by a comparison or lower-precedence operator or by 11543 something that would normally terminate an expression such as semicolon. 11544 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 11545 otherwise it is mandatory. It is up to the caller to ensure that the 11546 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 11547 the source of the code to be parsed and the lexical context for the 11548 expression. 11549 11550 The op tree representing the expression is returned. If an optional 11551 expression is absent, a null pointer is returned, otherwise the pointer 11552 will be non-null. 11553 11554 If an error occurs in parsing or compilation, in most cases a valid op 11555 tree is returned anyway. The error is reflected in the parser state, 11556 normally resulting in a single exception at the top level of parsing 11557 which covers all the compilation errors that occurred. Some compilation 11558 errors, however, will throw an exception immediately. 11559 11560 =cut 11561 */ 11562 11563 OP * 11564 Perl_parse_arithexpr(pTHX_ U32 flags) 11565 { 11566 return parse_expr(LEX_FAKEEOF_COMPARE, flags); 11567 } 11568 11569 /* 11570 =for apidoc Amx|OP *|parse_termexpr|U32 flags 11571 11572 Parse a Perl term expression. This may contain operators of precedence 11573 down to the assignment operators. The expression must be followed (and thus 11574 terminated) either by a comma or lower-precedence operator or by 11575 something that would normally terminate an expression such as semicolon. 11576 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 11577 otherwise it is mandatory. It is up to the caller to ensure that the 11578 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 11579 the source of the code to be parsed and the lexical context for the 11580 expression. 11581 11582 The op tree representing the expression is returned. If an optional 11583 expression is absent, a null pointer is returned, otherwise the pointer 11584 will be non-null. 11585 11586 If an error occurs in parsing or compilation, in most cases a valid op 11587 tree is returned anyway. The error is reflected in the parser state, 11588 normally resulting in a single exception at the top level of parsing 11589 which covers all the compilation errors that occurred. Some compilation 11590 errors, however, will throw an exception immediately. 11591 11592 =cut 11593 */ 11594 11595 OP * 11596 Perl_parse_termexpr(pTHX_ U32 flags) 11597 { 11598 return parse_expr(LEX_FAKEEOF_COMMA, flags); 11599 } 11600 11601 /* 11602 =for apidoc Amx|OP *|parse_listexpr|U32 flags 11603 11604 Parse a Perl list expression. This may contain operators of precedence 11605 down to the comma operator. The expression must be followed (and thus 11606 terminated) either by a low-precedence logic operator such as C<or> or by 11607 something that would normally terminate an expression such as semicolon. 11608 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 11609 otherwise it is mandatory. It is up to the caller to ensure that the 11610 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 11611 the source of the code to be parsed and the lexical context for the 11612 expression. 11613 11614 The op tree representing the expression is returned. If an optional 11615 expression is absent, a null pointer is returned, otherwise the pointer 11616 will be non-null. 11617 11618 If an error occurs in parsing or compilation, in most cases a valid op 11619 tree is returned anyway. The error is reflected in the parser state, 11620 normally resulting in a single exception at the top level of parsing 11621 which covers all the compilation errors that occurred. Some compilation 11622 errors, however, will throw an exception immediately. 11623 11624 =cut 11625 */ 11626 11627 OP * 11628 Perl_parse_listexpr(pTHX_ U32 flags) 11629 { 11630 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags); 11631 } 11632 11633 /* 11634 =for apidoc Amx|OP *|parse_fullexpr|U32 flags 11635 11636 Parse a single complete Perl expression. This allows the full 11637 expression grammar, including the lowest-precedence operators such 11638 as C<or>. The expression must be followed (and thus terminated) by a 11639 token that an expression would normally be terminated by: end-of-file, 11640 closing bracketing punctuation, semicolon, or one of the keywords that 11641 signals a postfix expression-statement modifier. If C<flags> has the 11642 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is 11643 mandatory. It is up to the caller to ensure that the dynamic parser 11644 state (L</PL_parser> et al) is correctly set to reflect the source of 11645 the code to be parsed and the lexical context for the expression. 11646 11647 The op tree representing the expression is returned. If an optional 11648 expression is absent, a null pointer is returned, otherwise the pointer 11649 will be non-null. 11650 11651 If an error occurs in parsing or compilation, in most cases a valid op 11652 tree is returned anyway. The error is reflected in the parser state, 11653 normally resulting in a single exception at the top level of parsing 11654 which covers all the compilation errors that occurred. Some compilation 11655 errors, however, will throw an exception immediately. 11656 11657 =cut 11658 */ 11659 11660 OP * 11661 Perl_parse_fullexpr(pTHX_ U32 flags) 11662 { 11663 return parse_expr(LEX_FAKEEOF_NONEXPR, flags); 11664 } 11665 11666 /* 11667 =for apidoc Amx|OP *|parse_block|U32 flags 11668 11669 Parse a single complete Perl code block. This consists of an opening 11670 brace, a sequence of statements, and a closing brace. The block 11671 constitutes a lexical scope, so C<my> variables and various compile-time 11672 effects can be contained within it. It is up to the caller to ensure 11673 that the dynamic parser state (L</PL_parser> et al) is correctly set to 11674 reflect the source of the code to be parsed and the lexical context for 11675 the statement. 11676 11677 The op tree representing the code block is returned. This is always a 11678 real op, never a null pointer. It will normally be a C<lineseq> list, 11679 including C<nextstate> or equivalent ops. No ops to construct any kind 11680 of runtime scope are included by virtue of it being a block. 11681 11682 If an error occurs in parsing or compilation, in most cases a valid op 11683 tree (most likely null) is returned anyway. The error is reflected in 11684 the parser state, normally resulting in a single exception at the top 11685 level of parsing which covers all the compilation errors that occurred. 11686 Some compilation errors, however, will throw an exception immediately. 11687 11688 The C<flags> parameter is reserved for future use, and must always 11689 be zero. 11690 11691 =cut 11692 */ 11693 11694 OP * 11695 Perl_parse_block(pTHX_ U32 flags) 11696 { 11697 if (flags) 11698 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block"); 11699 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER); 11700 } 11701 11702 /* 11703 =for apidoc Amx|OP *|parse_barestmt|U32 flags 11704 11705 Parse a single unadorned Perl statement. This may be a normal imperative 11706 statement or a declaration that has compile-time effect. It does not 11707 include any label or other affixture. It is up to the caller to ensure 11708 that the dynamic parser state (L</PL_parser> et al) is correctly set to 11709 reflect the source of the code to be parsed and the lexical context for 11710 the statement. 11711 11712 The op tree representing the statement is returned. This may be a 11713 null pointer if the statement is null, for example if it was actually 11714 a subroutine definition (which has compile-time side effects). If not 11715 null, it will be ops directly implementing the statement, suitable to 11716 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or 11717 equivalent op (except for those embedded in a scope contained entirely 11718 within the statement). 11719 11720 If an error occurs in parsing or compilation, in most cases a valid op 11721 tree (most likely null) is returned anyway. The error is reflected in 11722 the parser state, normally resulting in a single exception at the top 11723 level of parsing which covers all the compilation errors that occurred. 11724 Some compilation errors, however, will throw an exception immediately. 11725 11726 The C<flags> parameter is reserved for future use, and must always 11727 be zero. 11728 11729 =cut 11730 */ 11731 11732 OP * 11733 Perl_parse_barestmt(pTHX_ U32 flags) 11734 { 11735 if (flags) 11736 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt"); 11737 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER); 11738 } 11739 11740 /* 11741 =for apidoc Amx|SV *|parse_label|U32 flags 11742 11743 Parse a single label, possibly optional, of the type that may prefix a 11744 Perl statement. It is up to the caller to ensure that the dynamic parser 11745 state (L</PL_parser> et al) is correctly set to reflect the source of 11746 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the 11747 label is optional, otherwise it is mandatory. 11748 11749 The name of the label is returned in the form of a fresh scalar. If an 11750 optional label is absent, a null pointer is returned. 11751 11752 If an error occurs in parsing, which can only occur if the label is 11753 mandatory, a valid label is returned anyway. The error is reflected in 11754 the parser state, normally resulting in a single exception at the top 11755 level of parsing which covers all the compilation errors that occurred. 11756 11757 =cut 11758 */ 11759 11760 SV * 11761 Perl_parse_label(pTHX_ U32 flags) 11762 { 11763 if (flags & ~PARSE_OPTIONAL) 11764 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label"); 11765 if (PL_lex_state == LEX_KNOWNEXT) { 11766 PL_parser->yychar = yylex(); 11767 if (PL_parser->yychar == LABEL) { 11768 char * const lpv = pl_yylval.pval; 11769 STRLEN llen = strlen(lpv); 11770 PL_parser->yychar = YYEMPTY; 11771 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0); 11772 } else { 11773 yyunlex(); 11774 goto no_label; 11775 } 11776 } else { 11777 char *s, *t; 11778 STRLEN wlen, bufptr_pos; 11779 lex_read_space(0); 11780 t = s = PL_bufptr; 11781 if (!isIDFIRST_lazy_if(s, UTF)) 11782 goto no_label; 11783 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen); 11784 if (word_takes_any_delimeter(s, wlen)) 11785 goto no_label; 11786 bufptr_pos = s - SvPVX(PL_linestr); 11787 PL_bufptr = t; 11788 lex_read_space(LEX_KEEP_PREVIOUS); 11789 t = PL_bufptr; 11790 s = SvPVX(PL_linestr) + bufptr_pos; 11791 if (t[0] == ':' && t[1] != ':') { 11792 PL_oldoldbufptr = PL_oldbufptr; 11793 PL_oldbufptr = s; 11794 PL_bufptr = t+1; 11795 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0); 11796 } else { 11797 PL_bufptr = s; 11798 no_label: 11799 if (flags & PARSE_OPTIONAL) { 11800 return NULL; 11801 } else { 11802 qerror(Perl_mess(aTHX_ "Parse error")); 11803 return newSVpvs("x"); 11804 } 11805 } 11806 } 11807 } 11808 11809 /* 11810 =for apidoc Amx|OP *|parse_fullstmt|U32 flags 11811 11812 Parse a single complete Perl statement. This may be a normal imperative 11813 statement or a declaration that has compile-time effect, and may include 11814 optional labels. It is up to the caller to ensure that the dynamic 11815 parser state (L</PL_parser> et al) is correctly set to reflect the source 11816 of the code to be parsed and the lexical context for the statement. 11817 11818 The op tree representing the statement is returned. This may be a 11819 null pointer if the statement is null, for example if it was actually 11820 a subroutine definition (which has compile-time side effects). If not 11821 null, it will be the result of a L</newSTATEOP> call, normally including 11822 a C<nextstate> or equivalent op. 11823 11824 If an error occurs in parsing or compilation, in most cases a valid op 11825 tree (most likely null) is returned anyway. The error is reflected in 11826 the parser state, normally resulting in a single exception at the top 11827 level of parsing which covers all the compilation errors that occurred. 11828 Some compilation errors, however, will throw an exception immediately. 11829 11830 The C<flags> parameter is reserved for future use, and must always 11831 be zero. 11832 11833 =cut 11834 */ 11835 11836 OP * 11837 Perl_parse_fullstmt(pTHX_ U32 flags) 11838 { 11839 if (flags) 11840 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt"); 11841 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER); 11842 } 11843 11844 /* 11845 =for apidoc Amx|OP *|parse_stmtseq|U32 flags 11846 11847 Parse a sequence of zero or more Perl statements. These may be normal 11848 imperative statements, including optional labels, or declarations 11849 that have compile-time effect, or any mixture thereof. The statement 11850 sequence ends when a closing brace or end-of-file is encountered in a 11851 place where a new statement could have validly started. It is up to 11852 the caller to ensure that the dynamic parser state (L</PL_parser> et al) 11853 is correctly set to reflect the source of the code to be parsed and the 11854 lexical context for the statements. 11855 11856 The op tree representing the statement sequence is returned. This may 11857 be a null pointer if the statements were all null, for example if there 11858 were no statements or if there were only subroutine definitions (which 11859 have compile-time side effects). If not null, it will be a C<lineseq> 11860 list, normally including C<nextstate> or equivalent ops. 11861 11862 If an error occurs in parsing or compilation, in most cases a valid op 11863 tree is returned anyway. The error is reflected in the parser state, 11864 normally resulting in a single exception at the top level of parsing 11865 which covers all the compilation errors that occurred. Some compilation 11866 errors, however, will throw an exception immediately. 11867 11868 The C<flags> parameter is reserved for future use, and must always 11869 be zero. 11870 11871 =cut 11872 */ 11873 11874 OP * 11875 Perl_parse_stmtseq(pTHX_ U32 flags) 11876 { 11877 OP *stmtseqop; 11878 I32 c; 11879 if (flags) 11880 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq"); 11881 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING); 11882 c = lex_peek_unichar(0); 11883 if (c != -1 && c != /*{*/'}') 11884 qerror(Perl_mess(aTHX_ "Parse error")); 11885 return stmtseqop; 11886 } 11887 11888 #define lex_token_boundary() S_lex_token_boundary(aTHX) 11889 static void 11890 S_lex_token_boundary(pTHX) 11891 { 11892 PL_oldoldbufptr = PL_oldbufptr; 11893 PL_oldbufptr = PL_bufptr; 11894 } 11895 11896 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX) 11897 static OP * 11898 S_parse_opt_lexvar(pTHX) 11899 { 11900 I32 sigil, c; 11901 char *s, *d; 11902 OP *var; 11903 lex_token_boundary(); 11904 sigil = lex_read_unichar(0); 11905 if (lex_peek_unichar(0) == '#') { 11906 qerror(Perl_mess(aTHX_ "Parse error")); 11907 return NULL; 11908 } 11909 lex_read_space(0); 11910 c = lex_peek_unichar(0); 11911 if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c))) 11912 return NULL; 11913 s = PL_bufptr; 11914 d = PL_tokenbuf + 1; 11915 PL_tokenbuf[0] = (char)sigil; 11916 parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF)); 11917 PL_bufptr = s; 11918 if (d == PL_tokenbuf+1) 11919 return NULL; 11920 var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV, 11921 OPf_MOD | (OPpLVAL_INTRO<<8)); 11922 var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0); 11923 return var; 11924 } 11925 11926 OP * 11927 Perl_parse_subsignature(pTHX) 11928 { 11929 I32 c; 11930 int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0; 11931 OP *initops = NULL; 11932 lex_read_space(0); 11933 c = lex_peek_unichar(0); 11934 while (c != /*(*/')') { 11935 switch (c) { 11936 case '$': { 11937 OP *var, *expr; 11938 if (prev_type == 2) 11939 qerror(Perl_mess(aTHX_ "Slurpy parameter not last")); 11940 var = parse_opt_lexvar(); 11941 expr = var ? 11942 newBINOP(OP_AELEM, 0, 11943 ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)), 11944 OP_RV2AV), 11945 newSVOP(OP_CONST, 0, newSViv(pos))) : 11946 NULL; 11947 lex_read_space(0); 11948 c = lex_peek_unichar(0); 11949 if (c == '=') { 11950 lex_token_boundary(); 11951 lex_read_unichar(0); 11952 lex_read_space(0); 11953 c = lex_peek_unichar(0); 11954 if (c == ',' || c == /*(*/')') { 11955 if (var) 11956 qerror(Perl_mess(aTHX_ "Optional parameter " 11957 "lacks default expression")); 11958 } else { 11959 OP *defexpr = parse_termexpr(0); 11960 if (defexpr->op_type == OP_UNDEF 11961 && !(defexpr->op_flags & OPf_KIDS)) 11962 { 11963 op_free(defexpr); 11964 } else { 11965 OP *ifop = 11966 newBINOP(OP_GE, 0, 11967 scalar(newUNOP(OP_RV2AV, 0, 11968 newGVOP(OP_GV, 0, PL_defgv))), 11969 newSVOP(OP_CONST, 0, newSViv(pos+1))); 11970 expr = var ? 11971 newCONDOP(0, ifop, expr, defexpr) : 11972 newLOGOP(OP_OR, 0, ifop, defexpr); 11973 } 11974 } 11975 prev_type = 1; 11976 } else { 11977 if (prev_type == 1) 11978 qerror(Perl_mess(aTHX_ "Mandatory parameter " 11979 "follows optional parameter")); 11980 prev_type = 0; 11981 min_arity = pos + 1; 11982 } 11983 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr); 11984 if (expr) 11985 initops = op_append_list(OP_LINESEQ, initops, 11986 newSTATEOP(0, NULL, expr)); 11987 max_arity = ++pos; 11988 } break; 11989 case '@': 11990 case '%': { 11991 OP *var; 11992 if (prev_type == 2) 11993 qerror(Perl_mess(aTHX_ "Slurpy parameter not last")); 11994 var = parse_opt_lexvar(); 11995 if (c == '%') { 11996 OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0, 11997 newBINOP(OP_BIT_AND, 0, 11998 scalar(newUNOP(OP_RV2AV, 0, 11999 newGVOP(OP_GV, 0, PL_defgv))), 12000 newSVOP(OP_CONST, 0, newSViv(1))), 12001 op_convert_list(OP_DIE, 0, 12002 op_convert_list(OP_SPRINTF, 0, 12003 op_append_list(OP_LIST, 12004 newSVOP(OP_CONST, 0, 12005 newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")), 12006 newSLICEOP(0, 12007 op_append_list(OP_LIST, 12008 newSVOP(OP_CONST, 0, newSViv(1)), 12009 newSVOP(OP_CONST, 0, newSViv(2))), 12010 newOP(OP_CALLER, 0)))))); 12011 if (pos != min_arity) 12012 chkop = newLOGOP(OP_AND, 0, 12013 newBINOP(OP_GT, 0, 12014 scalar(newUNOP(OP_RV2AV, 0, 12015 newGVOP(OP_GV, 0, PL_defgv))), 12016 newSVOP(OP_CONST, 0, newSViv(pos))), 12017 chkop); 12018 initops = op_append_list(OP_LINESEQ, 12019 newSTATEOP(0, NULL, chkop), 12020 initops); 12021 } 12022 if (var) { 12023 OP *slice = pos ? 12024 op_prepend_elem(OP_ASLICE, 12025 newOP(OP_PUSHMARK, 0), 12026 newLISTOP(OP_ASLICE, 0, 12027 list(newRANGE(0, 12028 newSVOP(OP_CONST, 0, newSViv(pos)), 12029 newUNOP(OP_AV2ARYLEN, 0, 12030 ref(newUNOP(OP_RV2AV, 0, 12031 newGVOP(OP_GV, 0, PL_defgv)), 12032 OP_AV2ARYLEN)))), 12033 ref(newUNOP(OP_RV2AV, 0, 12034 newGVOP(OP_GV, 0, PL_defgv)), 12035 OP_ASLICE))) : 12036 newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)); 12037 initops = op_append_list(OP_LINESEQ, initops, 12038 newSTATEOP(0, NULL, 12039 newASSIGNOP(OPf_STACKED, var, 0, slice))); 12040 } 12041 prev_type = 2; 12042 max_arity = -1; 12043 } break; 12044 default: 12045 parse_error: 12046 qerror(Perl_mess(aTHX_ "Parse error")); 12047 return NULL; 12048 } 12049 lex_read_space(0); 12050 c = lex_peek_unichar(0); 12051 switch (c) { 12052 case /*(*/')': break; 12053 case ',': 12054 do { 12055 lex_token_boundary(); 12056 lex_read_unichar(0); 12057 lex_read_space(0); 12058 c = lex_peek_unichar(0); 12059 } while (c == ','); 12060 break; 12061 default: 12062 goto parse_error; 12063 } 12064 } 12065 if (min_arity != 0) { 12066 initops = op_append_list(OP_LINESEQ, 12067 newSTATEOP(0, NULL, 12068 newLOGOP(OP_OR, 0, 12069 newBINOP(OP_GE, 0, 12070 scalar(newUNOP(OP_RV2AV, 0, 12071 newGVOP(OP_GV, 0, PL_defgv))), 12072 newSVOP(OP_CONST, 0, newSViv(min_arity))), 12073 op_convert_list(OP_DIE, 0, 12074 op_convert_list(OP_SPRINTF, 0, 12075 op_append_list(OP_LIST, 12076 newSVOP(OP_CONST, 0, 12077 newSVpvs("Too few arguments for subroutine at %s line %d.\n")), 12078 newSLICEOP(0, 12079 op_append_list(OP_LIST, 12080 newSVOP(OP_CONST, 0, newSViv(1)), 12081 newSVOP(OP_CONST, 0, newSViv(2))), 12082 newOP(OP_CALLER, 0))))))), 12083 initops); 12084 } 12085 if (max_arity != -1) { 12086 initops = op_append_list(OP_LINESEQ, 12087 newSTATEOP(0, NULL, 12088 newLOGOP(OP_OR, 0, 12089 newBINOP(OP_LE, 0, 12090 scalar(newUNOP(OP_RV2AV, 0, 12091 newGVOP(OP_GV, 0, PL_defgv))), 12092 newSVOP(OP_CONST, 0, newSViv(max_arity))), 12093 op_convert_list(OP_DIE, 0, 12094 op_convert_list(OP_SPRINTF, 0, 12095 op_append_list(OP_LIST, 12096 newSVOP(OP_CONST, 0, 12097 newSVpvs("Too many arguments for subroutine at %s line %d.\n")), 12098 newSLICEOP(0, 12099 op_append_list(OP_LIST, 12100 newSVOP(OP_CONST, 0, newSViv(1)), 12101 newSVOP(OP_CONST, 0, newSViv(2))), 12102 newOP(OP_CALLER, 0))))))), 12103 initops); 12104 } 12105 return initops; 12106 } 12107 12108 /* 12109 * ex: set ts=8 sts=4 sw=4 et: 12110 */ 12111