1 /* toke.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * 'It all comes from here, the stench and the peril.' --Frodo 13 * 14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"] 15 */ 16 17 /* 18 * This file is the lexer for Perl. It's closely linked to the 19 * parser, perly.y. 20 * 21 * The main routine is yylex(), which returns the next token. 22 */ 23 24 /* 25 =head1 Lexer interface 26 27 This is the lower layer of the Perl parser, managing characters and tokens. 28 29 =for apidoc AmU|yy_parser *|PL_parser 30 31 Pointer to a structure encapsulating the state of the parsing operation 32 currently in progress. The pointer can be locally changed to perform 33 a nested parse without interfering with the state of an outer parse. 34 Individual members of C<PL_parser> have their own documentation. 35 36 =cut 37 */ 38 39 #include "EXTERN.h" 40 #define PERL_IN_TOKE_C 41 #include "perl.h" 42 #include "dquote_static.c" 43 44 #define new_constant(a,b,c,d,e,f,g) \ 45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g) 46 47 #define pl_yylval (PL_parser->yylval) 48 49 /* XXX temporary backwards compatibility */ 50 #define PL_lex_brackets (PL_parser->lex_brackets) 51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets) 52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof) 53 #define PL_lex_brackstack (PL_parser->lex_brackstack) 54 #define PL_lex_casemods (PL_parser->lex_casemods) 55 #define PL_lex_casestack (PL_parser->lex_casestack) 56 #define PL_lex_defer (PL_parser->lex_defer) 57 #define PL_lex_dojoin (PL_parser->lex_dojoin) 58 #define PL_lex_expect (PL_parser->lex_expect) 59 #define PL_lex_formbrack (PL_parser->lex_formbrack) 60 #define PL_lex_inpat (PL_parser->lex_inpat) 61 #define PL_lex_inwhat (PL_parser->lex_inwhat) 62 #define PL_lex_op (PL_parser->lex_op) 63 #define PL_lex_repl (PL_parser->lex_repl) 64 #define PL_lex_starts (PL_parser->lex_starts) 65 #define PL_lex_stuff (PL_parser->lex_stuff) 66 #define PL_multi_start (PL_parser->multi_start) 67 #define PL_multi_open (PL_parser->multi_open) 68 #define PL_multi_close (PL_parser->multi_close) 69 #define PL_preambled (PL_parser->preambled) 70 #define PL_sublex_info (PL_parser->sublex_info) 71 #define PL_linestr (PL_parser->linestr) 72 #define PL_expect (PL_parser->expect) 73 #define PL_copline (PL_parser->copline) 74 #define PL_bufptr (PL_parser->bufptr) 75 #define PL_oldbufptr (PL_parser->oldbufptr) 76 #define PL_oldoldbufptr (PL_parser->oldoldbufptr) 77 #define PL_linestart (PL_parser->linestart) 78 #define PL_bufend (PL_parser->bufend) 79 #define PL_last_uni (PL_parser->last_uni) 80 #define PL_last_lop (PL_parser->last_lop) 81 #define PL_last_lop_op (PL_parser->last_lop_op) 82 #define PL_lex_state (PL_parser->lex_state) 83 #define PL_rsfp (PL_parser->rsfp) 84 #define PL_rsfp_filters (PL_parser->rsfp_filters) 85 #define PL_in_my (PL_parser->in_my) 86 #define PL_in_my_stash (PL_parser->in_my_stash) 87 #define PL_tokenbuf (PL_parser->tokenbuf) 88 #define PL_multi_end (PL_parser->multi_end) 89 #define PL_error_count (PL_parser->error_count) 90 91 #ifdef PERL_MAD 92 # define PL_endwhite (PL_parser->endwhite) 93 # define PL_faketokens (PL_parser->faketokens) 94 # define PL_lasttoke (PL_parser->lasttoke) 95 # define PL_nextwhite (PL_parser->nextwhite) 96 # define PL_realtokenstart (PL_parser->realtokenstart) 97 # define PL_skipwhite (PL_parser->skipwhite) 98 # define PL_thisclose (PL_parser->thisclose) 99 # define PL_thismad (PL_parser->thismad) 100 # define PL_thisopen (PL_parser->thisopen) 101 # define PL_thisstuff (PL_parser->thisstuff) 102 # define PL_thistoken (PL_parser->thistoken) 103 # define PL_thiswhite (PL_parser->thiswhite) 104 # define PL_thiswhite (PL_parser->thiswhite) 105 # define PL_nexttoke (PL_parser->nexttoke) 106 # define PL_curforce (PL_parser->curforce) 107 #else 108 # define PL_nexttoke (PL_parser->nexttoke) 109 # define PL_nexttype (PL_parser->nexttype) 110 # define PL_nextval (PL_parser->nextval) 111 #endif 112 113 static const char* const ident_too_long = "Identifier too long"; 114 115 #ifdef PERL_MAD 116 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; } 117 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val 118 #else 119 # define CURMAD(slot,sv) 120 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke] 121 #endif 122 123 #define XENUMMASK 0x3f 124 #define XFAKEEOF 0x40 125 #define XFAKEBRACK 0x80 126 127 #ifdef USE_UTF8_SCRIPTS 128 # define UTF (!IN_BYTES) 129 #else 130 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8))) 131 #endif 132 133 /* The maximum number of characters preceding the unrecognized one to display */ 134 #define UNRECOGNIZED_PRECEDE_COUNT 10 135 136 /* In variables named $^X, these are the legal values for X. 137 * 1999-02-27 mjd-perl-patch@plover.com */ 138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) 139 140 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t') 141 142 /* LEX_* are values for PL_lex_state, the state of the lexer. 143 * They are arranged oddly so that the guard on the switch statement 144 * can get by with a single comparison (if the compiler is smart enough). 145 * 146 * These values refer to the various states within a sublex parse, 147 * i.e. within a double quotish string 148 */ 149 150 /* #define LEX_NOTPARSING 11 is done in perl.h. */ 151 152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */ 153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */ 154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */ 155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */ 156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */ 157 158 /* at end of code, eg "$x" followed by: */ 159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */ 160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */ 161 162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of 163 string or after \E, $foo, etc */ 164 #define LEX_INTERPCONST 2 /* NOT USED */ 165 #define LEX_FORMLINE 1 /* expecting a format line */ 166 #define LEX_KNOWNEXT 0 /* next token known; just return it */ 167 168 169 #ifdef DEBUGGING 170 static const char* const lex_state_names[] = { 171 "KNOWNEXT", 172 "FORMLINE", 173 "INTERPCONST", 174 "INTERPCONCAT", 175 "INTERPENDMAYBE", 176 "INTERPEND", 177 "INTERPSTART", 178 "INTERPPUSH", 179 "INTERPCASEMOD", 180 "INTERPNORMAL", 181 "NORMAL" 182 }; 183 #endif 184 185 #ifdef ff_next 186 #undef ff_next 187 #endif 188 189 #include "keywords.h" 190 191 /* CLINE is a macro that ensures PL_copline has a sane value */ 192 193 #ifdef CLINE 194 #undef CLINE 195 #endif 196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline)) 197 198 #ifdef PERL_MAD 199 # define SKIPSPACE0(s) skipspace0(s) 200 # define SKIPSPACE1(s) skipspace1(s) 201 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv) 202 # define PEEKSPACE(s) skipspace2(s,0) 203 #else 204 # define SKIPSPACE0(s) skipspace(s) 205 # define SKIPSPACE1(s) skipspace(s) 206 # define SKIPSPACE2(s,tsv) skipspace(s) 207 # define PEEKSPACE(s) skipspace(s) 208 #endif 209 210 /* 211 * Convenience functions to return different tokens and prime the 212 * lexer for the next token. They all take an argument. 213 * 214 * TOKEN : generic token (used for '(', DOLSHARP, etc) 215 * OPERATOR : generic operator 216 * AOPERATOR : assignment operator 217 * PREBLOCK : beginning the block after an if, while, foreach, ... 218 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref) 219 * PREREF : *EXPR where EXPR is not a simple identifier 220 * TERM : expression term 221 * LOOPX : loop exiting command (goto, last, dump, etc) 222 * FTST : file test operator 223 * FUN0 : zero-argument function 224 * FUN0OP : zero-argument function, with its op created in this file 225 * FUN1 : not used, except for not, which isn't a UNIOP 226 * BOop : bitwise or or xor 227 * BAop : bitwise and 228 * SHop : shift operator 229 * PWop : power operator 230 * PMop : pattern-matching operator 231 * Aop : addition-level operator 232 * Mop : multiplication-level operator 233 * Eop : equality-testing operator 234 * Rop : relational operator <= != gt 235 * 236 * Also see LOP and lop() below. 237 */ 238 239 #ifdef DEBUGGING /* Serve -DT. */ 240 # define REPORT(retval) tokereport((I32)retval, &pl_yylval) 241 #else 242 # define REPORT(retval) (retval) 243 #endif 244 245 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval)) 246 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval)) 247 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval))) 248 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval)) 249 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval)) 250 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval)) 251 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval)) 252 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX)) 253 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP)) 254 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0)) 255 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP)) 256 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1)) 257 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP))) 258 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP))) 259 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP))) 260 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP))) 261 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP)) 262 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP))) 263 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP))) 264 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP)) 265 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP)) 266 267 /* This bit of chicanery makes a unary function followed by 268 * a parenthesis into a function with one argument, highest precedence. 269 * The UNIDOR macro is for unary functions that can be followed by the // 270 * operator (such as C<shift // 0>). 271 */ 272 #define UNI3(f,x,have_x) { \ 273 pl_yylval.ival = f; \ 274 if (have_x) PL_expect = x; \ 275 PL_bufptr = s; \ 276 PL_last_uni = PL_oldbufptr; \ 277 PL_last_lop_op = f; \ 278 if (*s == '(') \ 279 return REPORT( (int)FUNC1 ); \ 280 s = PEEKSPACE(s); \ 281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \ 282 } 283 #define UNI(f) UNI3(f,XTERM,1) 284 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1) 285 #define UNIPROTO(f,optional) { \ 286 if (optional) PL_last_uni = PL_oldbufptr; \ 287 OPERATOR(f); \ 288 } 289 290 #define UNIBRACK(f) UNI3(f,0,0) 291 292 /* grandfather return to old style */ 293 #define OLDLOP(f) \ 294 do { \ 295 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \ 296 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \ 297 pl_yylval.ival = (f); \ 298 PL_expect = XTERM; \ 299 PL_bufptr = s; \ 300 return (int)LSTOP; \ 301 } while(0) 302 303 #define COPLINE_INC_WITH_HERELINES \ 304 STMT_START { \ 305 CopLINE_inc(PL_curcop); \ 306 if (PL_parser->lex_shared->herelines) \ 307 CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \ 308 PL_parser->lex_shared->herelines = 0; \ 309 } STMT_END 310 311 312 #ifdef DEBUGGING 313 314 /* how to interpret the pl_yylval associated with the token */ 315 enum token_type { 316 TOKENTYPE_NONE, 317 TOKENTYPE_IVAL, 318 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */ 319 TOKENTYPE_PVAL, 320 TOKENTYPE_OPVAL 321 }; 322 323 static struct debug_tokens { 324 const int token; 325 enum token_type type; 326 const char *name; 327 } const debug_tokens[] = 328 { 329 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" }, 330 { ANDAND, TOKENTYPE_NONE, "ANDAND" }, 331 { ANDOP, TOKENTYPE_NONE, "ANDOP" }, 332 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" }, 333 { ARROW, TOKENTYPE_NONE, "ARROW" }, 334 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" }, 335 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" }, 336 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" }, 337 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" }, 338 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" }, 339 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" }, 340 { DO, TOKENTYPE_NONE, "DO" }, 341 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" }, 342 { DORDOR, TOKENTYPE_NONE, "DORDOR" }, 343 { DOROP, TOKENTYPE_OPNUM, "DOROP" }, 344 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" }, 345 { ELSE, TOKENTYPE_NONE, "ELSE" }, 346 { ELSIF, TOKENTYPE_IVAL, "ELSIF" }, 347 { EQOP, TOKENTYPE_OPNUM, "EQOP" }, 348 { FOR, TOKENTYPE_IVAL, "FOR" }, 349 { FORMAT, TOKENTYPE_NONE, "FORMAT" }, 350 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" }, 351 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" }, 352 { FUNC, TOKENTYPE_OPNUM, "FUNC" }, 353 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" }, 354 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" }, 355 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" }, 356 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" }, 357 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" }, 358 { GIVEN, TOKENTYPE_IVAL, "GIVEN" }, 359 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" }, 360 { IF, TOKENTYPE_IVAL, "IF" }, 361 { LABEL, TOKENTYPE_PVAL, "LABEL" }, 362 { LOCAL, TOKENTYPE_IVAL, "LOCAL" }, 363 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" }, 364 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" }, 365 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" }, 366 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" }, 367 { METHOD, TOKENTYPE_OPVAL, "METHOD" }, 368 { MULOP, TOKENTYPE_OPNUM, "MULOP" }, 369 { MY, TOKENTYPE_IVAL, "MY" }, 370 { NOAMP, TOKENTYPE_NONE, "NOAMP" }, 371 { NOTOP, TOKENTYPE_NONE, "NOTOP" }, 372 { OROP, TOKENTYPE_IVAL, "OROP" }, 373 { OROR, TOKENTYPE_NONE, "OROR" }, 374 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" }, 375 { PEG, TOKENTYPE_NONE, "PEG" }, 376 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" }, 377 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" }, 378 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" }, 379 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" }, 380 { POSTINC, TOKENTYPE_NONE, "POSTINC" }, 381 { POWOP, TOKENTYPE_OPNUM, "POWOP" }, 382 { PREDEC, TOKENTYPE_NONE, "PREDEC" }, 383 { PREINC, TOKENTYPE_NONE, "PREINC" }, 384 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" }, 385 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" }, 386 { REFGEN, TOKENTYPE_NONE, "REFGEN" }, 387 { RELOP, TOKENTYPE_OPNUM, "RELOP" }, 388 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" }, 389 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" }, 390 { SUB, TOKENTYPE_NONE, "SUB" }, 391 { THING, TOKENTYPE_OPVAL, "THING" }, 392 { UMINUS, TOKENTYPE_NONE, "UMINUS" }, 393 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" }, 394 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" }, 395 { UNLESS, TOKENTYPE_IVAL, "UNLESS" }, 396 { UNTIL, TOKENTYPE_IVAL, "UNTIL" }, 397 { USE, TOKENTYPE_IVAL, "USE" }, 398 { WHEN, TOKENTYPE_IVAL, "WHEN" }, 399 { WHILE, TOKENTYPE_IVAL, "WHILE" }, 400 { WORD, TOKENTYPE_OPVAL, "WORD" }, 401 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" }, 402 { 0, TOKENTYPE_NONE, NULL } 403 }; 404 405 /* dump the returned token in rv, plus any optional arg in pl_yylval */ 406 407 STATIC int 408 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) 409 { 410 dVAR; 411 412 PERL_ARGS_ASSERT_TOKEREPORT; 413 414 if (DEBUG_T_TEST) { 415 const char *name = NULL; 416 enum token_type type = TOKENTYPE_NONE; 417 const struct debug_tokens *p; 418 SV* const report = newSVpvs("<== "); 419 420 for (p = debug_tokens; p->token; p++) { 421 if (p->token == (int)rv) { 422 name = p->name; 423 type = p->type; 424 break; 425 } 426 } 427 if (name) 428 Perl_sv_catpv(aTHX_ report, name); 429 else if ((char)rv > ' ' && (char)rv <= '~') 430 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); 431 else if (!rv) 432 sv_catpvs(report, "EOF"); 433 else 434 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv); 435 switch (type) { 436 case TOKENTYPE_NONE: 437 break; 438 case TOKENTYPE_IVAL: 439 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival); 440 break; 441 case TOKENTYPE_OPNUM: 442 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)", 443 PL_op_name[lvalp->ival]); 444 break; 445 case TOKENTYPE_PVAL: 446 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval); 447 break; 448 case TOKENTYPE_OPVAL: 449 if (lvalp->opval) { 450 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)", 451 PL_op_name[lvalp->opval->op_type]); 452 if (lvalp->opval->op_type == OP_CONST) { 453 Perl_sv_catpvf(aTHX_ report, " %s", 454 SvPEEK(cSVOPx_sv(lvalp->opval))); 455 } 456 457 } 458 else 459 sv_catpvs(report, "(opval=null)"); 460 break; 461 } 462 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report)); 463 }; 464 return (int)rv; 465 } 466 467 468 /* print the buffer with suitable escapes */ 469 470 STATIC void 471 S_printbuf(pTHX_ const char *const fmt, const char *const s) 472 { 473 SV* const tmp = newSVpvs(""); 474 475 PERL_ARGS_ASSERT_PRINTBUF; 476 477 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60)); 478 SvREFCNT_dec(tmp); 479 } 480 481 #endif 482 483 static int 484 S_deprecate_commaless_var_list(pTHX) { 485 PL_expect = XTERM; 486 deprecate("comma-less variable list"); 487 return REPORT(','); /* grandfather non-comma-format format */ 488 } 489 490 /* 491 * S_ao 492 * 493 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR 494 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN 495 */ 496 497 STATIC int 498 S_ao(pTHX_ int toketype) 499 { 500 dVAR; 501 if (*PL_bufptr == '=') { 502 PL_bufptr++; 503 if (toketype == ANDAND) 504 pl_yylval.ival = OP_ANDASSIGN; 505 else if (toketype == OROR) 506 pl_yylval.ival = OP_ORASSIGN; 507 else if (toketype == DORDOR) 508 pl_yylval.ival = OP_DORASSIGN; 509 toketype = ASSIGNOP; 510 } 511 return toketype; 512 } 513 514 /* 515 * S_no_op 516 * When Perl expects an operator and finds something else, no_op 517 * prints the warning. It always prints "<something> found where 518 * operator expected. It prints "Missing semicolon on previous line?" 519 * if the surprise occurs at the start of the line. "do you need to 520 * predeclare ..." is printed out for code like "sub bar; foo bar $x" 521 * where the compiler doesn't know if foo is a method call or a function. 522 * It prints "Missing operator before end of line" if there's nothing 523 * after the missing operator, or "... before <...>" if there is something 524 * after the missing operator. 525 */ 526 527 STATIC void 528 S_no_op(pTHX_ const char *const what, char *s) 529 { 530 dVAR; 531 char * const oldbp = PL_bufptr; 532 const bool is_first = (PL_oldbufptr == PL_linestart); 533 534 PERL_ARGS_ASSERT_NO_OP; 535 536 if (!s) 537 s = oldbp; 538 else 539 PL_bufptr = s; 540 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0); 541 if (ckWARN_d(WARN_SYNTAX)) { 542 if (is_first) 543 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 544 "\t(Missing semicolon on previous line?)\n"); 545 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) { 546 const char *t; 547 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':'); 548 t += UTF ? UTF8SKIP(t) : 1) 549 NOOP; 550 if (t < PL_bufptr && isSPACE(*t)) 551 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 552 "\t(Do you need to predeclare %"SVf"?)\n", 553 SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr), 554 SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); 555 } 556 else { 557 assert(s >= oldbp); 558 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 559 "\t(Missing operator before %"SVf"?)\n", 560 SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp), 561 SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); 562 } 563 } 564 PL_bufptr = oldbp; 565 } 566 567 /* 568 * S_missingterm 569 * Complain about missing quote/regexp/heredoc terminator. 570 * If it's called with NULL then it cauterizes the line buffer. 571 * If we're in a delimited string and the delimiter is a control 572 * character, it's reformatted into a two-char sequence like ^C. 573 * This is fatal. 574 */ 575 576 STATIC void 577 S_missingterm(pTHX_ char *s) 578 { 579 dVAR; 580 char tmpbuf[3]; 581 char q; 582 if (s) { 583 char * const nl = strrchr(s,'\n'); 584 if (nl) 585 *nl = '\0'; 586 } 587 else if (isCNTRL(PL_multi_close)) { 588 *tmpbuf = '^'; 589 tmpbuf[1] = (char)toCTRL(PL_multi_close); 590 tmpbuf[2] = '\0'; 591 s = tmpbuf; 592 } 593 else { 594 *tmpbuf = (char)PL_multi_close; 595 tmpbuf[1] = '\0'; 596 s = tmpbuf; 597 } 598 q = strchr(s,'"') ? '\'' : '"'; 599 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q); 600 } 601 602 #include "feature.h" 603 604 /* 605 * Check whether the named feature is enabled. 606 */ 607 bool 608 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen) 609 { 610 dVAR; 611 char he_name[8 + MAX_FEATURE_LEN] = "feature_"; 612 613 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED; 614 615 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM); 616 617 if (namelen > MAX_FEATURE_LEN) 618 return FALSE; 619 memcpy(&he_name[8], name, namelen); 620 621 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0, 622 REFCOUNTED_HE_EXISTS)); 623 } 624 625 /* 626 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and 627 * utf16-to-utf8-reversed. 628 */ 629 630 #ifdef PERL_CR_FILTER 631 static void 632 strip_return(SV *sv) 633 { 634 const char *s = SvPVX_const(sv); 635 const char * const e = s + SvCUR(sv); 636 637 PERL_ARGS_ASSERT_STRIP_RETURN; 638 639 /* outer loop optimized to do nothing if there are no CR-LFs */ 640 while (s < e) { 641 if (*s++ == '\r' && *s == '\n') { 642 /* hit a CR-LF, need to copy the rest */ 643 char *d = s - 1; 644 *d++ = *s++; 645 while (s < e) { 646 if (*s == '\r' && s[1] == '\n') 647 s++; 648 *d++ = *s++; 649 } 650 SvCUR(sv) -= s - d; 651 return; 652 } 653 } 654 } 655 656 STATIC I32 657 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) 658 { 659 const I32 count = FILTER_READ(idx+1, sv, maxlen); 660 if (count > 0 && !maxlen) 661 strip_return(sv); 662 return count; 663 } 664 #endif 665 666 /* 667 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags 668 669 Creates and initialises a new lexer/parser state object, supplying 670 a context in which to lex and parse from a new source of Perl code. 671 A pointer to the new state object is placed in L</PL_parser>. An entry 672 is made on the save stack so that upon unwinding the new state object 673 will be destroyed and the former value of L</PL_parser> will be restored. 674 Nothing else need be done to clean up the parsing context. 675 676 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if 677 non-null, provides a string (in SV form) containing code to be parsed. 678 A copy of the string is made, so subsequent modification of I<line> 679 does not affect parsing. I<rsfp>, if non-null, provides an input stream 680 from which code will be read to be parsed. If both are non-null, the 681 code in I<line> comes first and must consist of complete lines of input, 682 and I<rsfp> supplies the remainder of the source. 683 684 The I<flags> parameter is reserved for future use. Currently it is only 685 used by perl internally, so extensions should always pass zero. 686 687 =cut 688 */ 689 690 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it 691 can share filters with the current parser. 692 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the 693 caller, hence isn't owned by the parser, so shouldn't be closed on parser 694 destruction. This is used to handle the case of defaulting to reading the 695 script from the standard input because no filename was given on the command 696 line (without getting confused by situation where STDIN has been closed, so 697 the script handle is opened on fd 0) */ 698 699 void 700 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) 701 { 702 dVAR; 703 const char *s = NULL; 704 yy_parser *parser, *oparser; 705 if (flags && flags & ~LEX_START_FLAGS) 706 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start"); 707 708 /* create and initialise a parser */ 709 710 Newxz(parser, 1, yy_parser); 711 parser->old_parser = oparser = PL_parser; 712 PL_parser = parser; 713 714 parser->stack = NULL; 715 parser->ps = NULL; 716 parser->stack_size = 0; 717 718 /* on scope exit, free this parser and restore any outer one */ 719 SAVEPARSER(parser); 720 parser->saved_curcop = PL_curcop; 721 722 /* initialise lexer state */ 723 724 #ifdef PERL_MAD 725 parser->curforce = -1; 726 #else 727 parser->nexttoke = 0; 728 #endif 729 parser->error_count = oparser ? oparser->error_count : 0; 730 parser->copline = NOLINE; 731 parser->lex_state = LEX_NORMAL; 732 parser->expect = XSTATE; 733 parser->rsfp = rsfp; 734 parser->rsfp_filters = 735 !(flags & LEX_START_SAME_FILTER) || !oparser 736 ? NULL 737 : MUTABLE_AV(SvREFCNT_inc( 738 oparser->rsfp_filters 739 ? oparser->rsfp_filters 740 : (oparser->rsfp_filters = newAV()) 741 )); 742 743 Newx(parser->lex_brackstack, 120, char); 744 Newx(parser->lex_casestack, 12, char); 745 *parser->lex_casestack = '\0'; 746 Newxz(parser->lex_shared, 1, LEXSHARED); 747 748 if (line) { 749 STRLEN len; 750 s = SvPV_const(line, len); 751 parser->linestr = flags & LEX_START_COPIED 752 ? SvREFCNT_inc_simple_NN(line) 753 : newSVpvn_flags(s, len, SvUTF8(line)); 754 sv_catpvs(parser->linestr, "\n;"); 755 } else { 756 parser->linestr = newSVpvs("\n;"); 757 } 758 parser->oldoldbufptr = 759 parser->oldbufptr = 760 parser->bufptr = 761 parser->linestart = SvPVX(parser->linestr); 762 parser->bufend = parser->bufptr + SvCUR(parser->linestr); 763 parser->last_lop = parser->last_uni = NULL; 764 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES 765 |LEX_DONT_CLOSE_RSFP); 766 767 parser->in_pod = parser->filtered = 0; 768 } 769 770 771 /* delete a parser object */ 772 773 void 774 Perl_parser_free(pTHX_ const yy_parser *parser) 775 { 776 PERL_ARGS_ASSERT_PARSER_FREE; 777 778 PL_curcop = parser->saved_curcop; 779 SvREFCNT_dec(parser->linestr); 780 781 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 782 PerlIO_clearerr(parser->rsfp); 783 else if (parser->rsfp && (!parser->old_parser || 784 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp))) 785 PerlIO_close(parser->rsfp); 786 SvREFCNT_dec(parser->rsfp_filters); 787 SvREFCNT_dec(parser->lex_stuff); 788 SvREFCNT_dec(parser->sublex_info.repl); 789 790 Safefree(parser->lex_brackstack); 791 Safefree(parser->lex_casestack); 792 Safefree(parser->lex_shared); 793 PL_parser = parser->old_parser; 794 Safefree(parser); 795 } 796 797 void 798 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab) 799 { 800 #ifdef PERL_MAD 801 I32 nexttoke = parser->lasttoke; 802 #else 803 I32 nexttoke = parser->nexttoke; 804 #endif 805 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS; 806 while (nexttoke--) { 807 #ifdef PERL_MAD 808 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type 809 & 0xffff) 810 && parser->nexttoke[nexttoke].next_val.opval 811 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed 812 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) { 813 op_free(parser->nexttoke[nexttoke].next_val.opval); 814 parser->nexttoke[nexttoke].next_val.opval = NULL; 815 } 816 #else 817 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff) 818 && parser->nextval[nexttoke].opval 819 && parser->nextval[nexttoke].opval->op_slabbed 820 && OpSLAB(parser->nextval[nexttoke].opval) == slab) { 821 op_free(parser->nextval[nexttoke].opval); 822 parser->nextval[nexttoke].opval = NULL; 823 } 824 #endif 825 } 826 } 827 828 829 /* 830 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr 831 832 Buffer scalar containing the chunk currently under consideration of the 833 text currently being lexed. This is always a plain string scalar (for 834 which C<SvPOK> is true). It is not intended to be used as a scalar by 835 normal scalar means; instead refer to the buffer directly by the pointer 836 variables described below. 837 838 The lexer maintains various C<char*> pointers to things in the 839 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever 840 reallocated, all of these pointers must be updated. Don't attempt to 841 do this manually, but rather use L</lex_grow_linestr> if you need to 842 reallocate the buffer. 843 844 The content of the text chunk in the buffer is commonly exactly one 845 complete line of input, up to and including a newline terminator, 846 but there are situations where it is otherwise. The octets of the 847 buffer may be intended to be interpreted as either UTF-8 or Latin-1. 848 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8> 849 flag on this scalar, which may disagree with it. 850 851 For direct examination of the buffer, the variable 852 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current 853 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use 854 of these pointers is usually preferable to examination of the scalar 855 through normal scalar means. 856 857 =for apidoc AmxU|char *|PL_parser-E<gt>bufend 858 859 Direct pointer to the end of the chunk of text currently being lexed, the 860 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr) 861 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is 862 always located at the end of the buffer, and does not count as part of 863 the buffer's contents. 864 865 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr 866 867 Points to the current position of lexing inside the lexer buffer. 868 Characters around this point may be freely examined, within 869 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and 870 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be 871 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>. 872 873 Lexing code (whether in the Perl core or not) moves this pointer past 874 the characters that it consumes. It is also expected to perform some 875 bookkeeping whenever a newline character is consumed. This movement 876 can be more conveniently performed by the function L</lex_read_to>, 877 which handles newlines appropriately. 878 879 Interpretation of the buffer's octets can be abstracted out by 880 using the slightly higher-level functions L</lex_peek_unichar> and 881 L</lex_read_unichar>. 882 883 =for apidoc AmxU|char *|PL_parser-E<gt>linestart 884 885 Points to the start of the current line inside the lexer buffer. 886 This is useful for indicating at which column an error occurred, and 887 not much else. This must be updated by any lexing code that consumes 888 a newline; the function L</lex_read_to> handles this detail. 889 890 =cut 891 */ 892 893 /* 894 =for apidoc Amx|bool|lex_bufutf8 895 896 Indicates whether the octets in the lexer buffer 897 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding 898 of Unicode characters. If not, they should be interpreted as Latin-1 899 characters. This is analogous to the C<SvUTF8> flag for scalars. 900 901 In UTF-8 mode, it is not guaranteed that the lexer buffer actually 902 contains valid UTF-8. Lexing code must be robust in the face of invalid 903 encoding. 904 905 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar 906 is significant, but not the whole story regarding the input character 907 encoding. Normally, when a file is being read, the scalar contains octets 908 and its C<SvUTF8> flag is off, but the octets should be interpreted as 909 UTF-8 if the C<use utf8> pragma is in effect. During a string eval, 910 however, the scalar may have the C<SvUTF8> flag on, and in this case its 911 octets should be interpreted as UTF-8 unless the C<use bytes> pragma 912 is in effect. This logic may change in the future; use this function 913 instead of implementing the logic yourself. 914 915 =cut 916 */ 917 918 bool 919 Perl_lex_bufutf8(pTHX) 920 { 921 return UTF; 922 } 923 924 /* 925 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len 926 927 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate 928 at least I<len> octets (including terminating NUL). Returns a 929 pointer to the reallocated buffer. This is necessary before making 930 any direct modification of the buffer that would increase its length. 931 L</lex_stuff_pvn> provides a more convenient way to insert text into 932 the buffer. 933 934 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>; 935 this function updates all of the lexer's variables that point directly 936 into the buffer. 937 938 =cut 939 */ 940 941 char * 942 Perl_lex_grow_linestr(pTHX_ STRLEN len) 943 { 944 SV *linestr; 945 char *buf; 946 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 947 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos; 948 linestr = PL_parser->linestr; 949 buf = SvPVX(linestr); 950 if (len <= SvLEN(linestr)) 951 return buf; 952 bufend_pos = PL_parser->bufend - buf; 953 bufptr_pos = PL_parser->bufptr - buf; 954 oldbufptr_pos = PL_parser->oldbufptr - buf; 955 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 956 linestart_pos = PL_parser->linestart - buf; 957 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 958 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 959 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ? 960 PL_parser->lex_shared->re_eval_start - buf : 0; 961 962 buf = sv_grow(linestr, len); 963 964 PL_parser->bufend = buf + bufend_pos; 965 PL_parser->bufptr = buf + bufptr_pos; 966 PL_parser->oldbufptr = buf + oldbufptr_pos; 967 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 968 PL_parser->linestart = buf + linestart_pos; 969 if (PL_parser->last_uni) 970 PL_parser->last_uni = buf + last_uni_pos; 971 if (PL_parser->last_lop) 972 PL_parser->last_lop = buf + last_lop_pos; 973 if (PL_parser->lex_shared->re_eval_start) 974 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos; 975 return buf; 976 } 977 978 /* 979 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags 980 981 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 982 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 983 reallocating the buffer if necessary. This means that lexing code that 984 runs later will see the characters as if they had appeared in the input. 985 It is not recommended to do this as part of normal parsing, and most 986 uses of this facility run the risk of the inserted characters being 987 interpreted in an unintended manner. 988 989 The string to be inserted is represented by I<len> octets starting 990 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1, 991 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>. 992 The characters are recoded for the lexer buffer, according to how the 993 buffer is currently being interpreted (L</lex_bufutf8>). If a string 994 to be inserted is available as a Perl scalar, the L</lex_stuff_sv> 995 function is more convenient. 996 997 =cut 998 */ 999 1000 void 1001 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) 1002 { 1003 dVAR; 1004 char *bufptr; 1005 PERL_ARGS_ASSERT_LEX_STUFF_PVN; 1006 if (flags & ~(LEX_STUFF_UTF8)) 1007 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn"); 1008 if (UTF) { 1009 if (flags & LEX_STUFF_UTF8) { 1010 goto plain_copy; 1011 } else { 1012 STRLEN highhalf = 0; /* Count of variants */ 1013 const char *p, *e = pv+len; 1014 for (p = pv; p != e; p++) { 1015 if (! UTF8_IS_INVARIANT(*p)) { 1016 highhalf++; 1017 } 1018 } 1019 if (!highhalf) 1020 goto plain_copy; 1021 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf); 1022 bufptr = PL_parser->bufptr; 1023 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char); 1024 SvCUR_set(PL_parser->linestr, 1025 SvCUR(PL_parser->linestr) + len+highhalf); 1026 PL_parser->bufend += len+highhalf; 1027 for (p = pv; p != e; p++) { 1028 U8 c = (U8)*p; 1029 if (! UTF8_IS_INVARIANT(c)) { 1030 *bufptr++ = UTF8_TWO_BYTE_HI(c); 1031 *bufptr++ = UTF8_TWO_BYTE_LO(c); 1032 } else { 1033 *bufptr++ = (char)c; 1034 } 1035 } 1036 } 1037 } else { 1038 if (flags & LEX_STUFF_UTF8) { 1039 STRLEN highhalf = 0; 1040 const char *p, *e = pv+len; 1041 for (p = pv; p != e; p++) { 1042 U8 c = (U8)*p; 1043 if (UTF8_IS_ABOVE_LATIN1(c)) { 1044 Perl_croak(aTHX_ "Lexing code attempted to stuff " 1045 "non-Latin-1 character into Latin-1 input"); 1046 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) { 1047 p++; 1048 highhalf++; 1049 } else if (! UTF8_IS_INVARIANT(c)) { 1050 /* malformed UTF-8 */ 1051 ENTER; 1052 SAVESPTR(PL_warnhook); 1053 PL_warnhook = PERL_WARNHOOK_FATAL; 1054 utf8n_to_uvuni((U8*)p, e-p, NULL, 0); 1055 LEAVE; 1056 } 1057 } 1058 if (!highhalf) 1059 goto plain_copy; 1060 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf); 1061 bufptr = PL_parser->bufptr; 1062 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char); 1063 SvCUR_set(PL_parser->linestr, 1064 SvCUR(PL_parser->linestr) + len-highhalf); 1065 PL_parser->bufend += len-highhalf; 1066 p = pv; 1067 while (p < e) { 1068 if (UTF8_IS_INVARIANT(*p)) { 1069 *bufptr++ = *p; 1070 p++; 1071 } 1072 else { 1073 assert(p < e -1 ); 1074 *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1)); 1075 p += 2; 1076 } 1077 } 1078 } else { 1079 plain_copy: 1080 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len); 1081 bufptr = PL_parser->bufptr; 1082 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char); 1083 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len); 1084 PL_parser->bufend += len; 1085 Copy(pv, bufptr, len, char); 1086 } 1087 } 1088 } 1089 1090 /* 1091 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags 1092 1093 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1094 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1095 reallocating the buffer if necessary. This means that lexing code that 1096 runs later will see the characters as if they had appeared in the input. 1097 It is not recommended to do this as part of normal parsing, and most 1098 uses of this facility run the risk of the inserted characters being 1099 interpreted in an unintended manner. 1100 1101 The string to be inserted is represented by octets starting at I<pv> 1102 and continuing to the first nul. These octets are interpreted as either 1103 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set 1104 in I<flags>. The characters are recoded for the lexer buffer, according 1105 to how the buffer is currently being interpreted (L</lex_bufutf8>). 1106 If it is not convenient to nul-terminate a string to be inserted, the 1107 L</lex_stuff_pvn> function is more appropriate. 1108 1109 =cut 1110 */ 1111 1112 void 1113 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags) 1114 { 1115 PERL_ARGS_ASSERT_LEX_STUFF_PV; 1116 lex_stuff_pvn(pv, strlen(pv), flags); 1117 } 1118 1119 /* 1120 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags 1121 1122 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1123 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1124 reallocating the buffer if necessary. This means that lexing code that 1125 runs later will see the characters as if they had appeared in the input. 1126 It is not recommended to do this as part of normal parsing, and most 1127 uses of this facility run the risk of the inserted characters being 1128 interpreted in an unintended manner. 1129 1130 The string to be inserted is the string value of I<sv>. The characters 1131 are recoded for the lexer buffer, according to how the buffer is currently 1132 being interpreted (L</lex_bufutf8>). If a string to be inserted is 1133 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the 1134 need to construct a scalar. 1135 1136 =cut 1137 */ 1138 1139 void 1140 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags) 1141 { 1142 char *pv; 1143 STRLEN len; 1144 PERL_ARGS_ASSERT_LEX_STUFF_SV; 1145 if (flags) 1146 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv"); 1147 pv = SvPV(sv, len); 1148 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0)); 1149 } 1150 1151 /* 1152 =for apidoc Amx|void|lex_unstuff|char *ptr 1153 1154 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to 1155 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened. 1156 This hides the discarded text from any lexing code that runs later, 1157 as if the text had never appeared. 1158 1159 This is not the normal way to consume lexed text. For that, use 1160 L</lex_read_to>. 1161 1162 =cut 1163 */ 1164 1165 void 1166 Perl_lex_unstuff(pTHX_ char *ptr) 1167 { 1168 char *buf, *bufend; 1169 STRLEN unstuff_len; 1170 PERL_ARGS_ASSERT_LEX_UNSTUFF; 1171 buf = PL_parser->bufptr; 1172 if (ptr < buf) 1173 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1174 if (ptr == buf) 1175 return; 1176 bufend = PL_parser->bufend; 1177 if (ptr > bufend) 1178 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1179 unstuff_len = ptr - buf; 1180 Move(ptr, buf, bufend+1-ptr, char); 1181 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len); 1182 PL_parser->bufend = bufend - unstuff_len; 1183 } 1184 1185 /* 1186 =for apidoc Amx|void|lex_read_to|char *ptr 1187 1188 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up 1189 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>, 1190 performing the correct bookkeeping whenever a newline character is passed. 1191 This is the normal way to consume lexed text. 1192 1193 Interpretation of the buffer's octets can be abstracted out by 1194 using the slightly higher-level functions L</lex_peek_unichar> and 1195 L</lex_read_unichar>. 1196 1197 =cut 1198 */ 1199 1200 void 1201 Perl_lex_read_to(pTHX_ char *ptr) 1202 { 1203 char *s; 1204 PERL_ARGS_ASSERT_LEX_READ_TO; 1205 s = PL_parser->bufptr; 1206 if (ptr < s || ptr > PL_parser->bufend) 1207 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to"); 1208 for (; s != ptr; s++) 1209 if (*s == '\n') { 1210 COPLINE_INC_WITH_HERELINES; 1211 PL_parser->linestart = s+1; 1212 } 1213 PL_parser->bufptr = ptr; 1214 } 1215 1216 /* 1217 =for apidoc Amx|void|lex_discard_to|char *ptr 1218 1219 Discards the first part of the L</PL_parser-E<gt>linestr> buffer, 1220 up to I<ptr>. The remaining content of the buffer will be moved, and 1221 all pointers into the buffer updated appropriately. I<ptr> must not 1222 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>: 1223 it is not permitted to discard text that has yet to be lexed. 1224 1225 Normally it is not necessarily to do this directly, because it suffices to 1226 use the implicit discarding behaviour of L</lex_next_chunk> and things 1227 based on it. However, if a token stretches across multiple lines, 1228 and the lexing code has kept multiple lines of text in the buffer for 1229 that purpose, then after completion of the token it would be wise to 1230 explicitly discard the now-unneeded earlier lines, to avoid future 1231 multi-line tokens growing the buffer without bound. 1232 1233 =cut 1234 */ 1235 1236 void 1237 Perl_lex_discard_to(pTHX_ char *ptr) 1238 { 1239 char *buf; 1240 STRLEN discard_len; 1241 PERL_ARGS_ASSERT_LEX_DISCARD_TO; 1242 buf = SvPVX(PL_parser->linestr); 1243 if (ptr < buf) 1244 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1245 if (ptr == buf) 1246 return; 1247 if (ptr > PL_parser->bufptr) 1248 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1249 discard_len = ptr - buf; 1250 if (PL_parser->oldbufptr < ptr) 1251 PL_parser->oldbufptr = ptr; 1252 if (PL_parser->oldoldbufptr < ptr) 1253 PL_parser->oldoldbufptr = ptr; 1254 if (PL_parser->last_uni && PL_parser->last_uni < ptr) 1255 PL_parser->last_uni = NULL; 1256 if (PL_parser->last_lop && PL_parser->last_lop < ptr) 1257 PL_parser->last_lop = NULL; 1258 Move(ptr, buf, PL_parser->bufend+1-ptr, char); 1259 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len); 1260 PL_parser->bufend -= discard_len; 1261 PL_parser->bufptr -= discard_len; 1262 PL_parser->oldbufptr -= discard_len; 1263 PL_parser->oldoldbufptr -= discard_len; 1264 if (PL_parser->last_uni) 1265 PL_parser->last_uni -= discard_len; 1266 if (PL_parser->last_lop) 1267 PL_parser->last_lop -= discard_len; 1268 } 1269 1270 /* 1271 =for apidoc Amx|bool|lex_next_chunk|U32 flags 1272 1273 Reads in the next chunk of text to be lexed, appending it to 1274 L</PL_parser-E<gt>linestr>. This should be called when lexing code has 1275 looked to the end of the current chunk and wants to know more. It is 1276 usual, but not necessary, for lexing to have consumed the entirety of 1277 the current chunk at this time. 1278 1279 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current 1280 chunk (i.e., the current chunk has been entirely consumed), normally the 1281 current chunk will be discarded at the same time that the new chunk is 1282 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk 1283 will not be discarded. If the current chunk has not been entirely 1284 consumed, then it will not be discarded regardless of the flag. 1285 1286 Returns true if some new text was added to the buffer, or false if the 1287 buffer has reached the end of the input text. 1288 1289 =cut 1290 */ 1291 1292 #define LEX_FAKE_EOF 0x80000000 1293 #define LEX_NO_TERM 0x40000000 1294 1295 bool 1296 Perl_lex_next_chunk(pTHX_ U32 flags) 1297 { 1298 SV *linestr; 1299 char *buf; 1300 STRLEN old_bufend_pos, new_bufend_pos; 1301 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 1302 STRLEN linestart_pos, last_uni_pos, last_lop_pos; 1303 bool got_some_for_debugger = 0; 1304 bool got_some; 1305 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM)) 1306 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk"); 1307 linestr = PL_parser->linestr; 1308 buf = SvPVX(linestr); 1309 if (!(flags & LEX_KEEP_PREVIOUS) && 1310 PL_parser->bufptr == PL_parser->bufend) { 1311 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0; 1312 linestart_pos = 0; 1313 if (PL_parser->last_uni != PL_parser->bufend) 1314 PL_parser->last_uni = NULL; 1315 if (PL_parser->last_lop != PL_parser->bufend) 1316 PL_parser->last_lop = NULL; 1317 last_uni_pos = last_lop_pos = 0; 1318 *buf = 0; 1319 SvCUR(linestr) = 0; 1320 } else { 1321 old_bufend_pos = PL_parser->bufend - buf; 1322 bufptr_pos = PL_parser->bufptr - buf; 1323 oldbufptr_pos = PL_parser->oldbufptr - buf; 1324 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 1325 linestart_pos = PL_parser->linestart - buf; 1326 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 1327 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 1328 } 1329 if (flags & LEX_FAKE_EOF) { 1330 goto eof; 1331 } else if (!PL_parser->rsfp && !PL_parser->filtered) { 1332 got_some = 0; 1333 } else if (filter_gets(linestr, old_bufend_pos)) { 1334 got_some = 1; 1335 got_some_for_debugger = 1; 1336 } else if (flags & LEX_NO_TERM) { 1337 got_some = 0; 1338 } else { 1339 if (!SvPOK(linestr)) /* can get undefined by filter_gets */ 1340 sv_setpvs(linestr, ""); 1341 eof: 1342 /* End of real input. Close filehandle (unless it was STDIN), 1343 * then add implicit termination. 1344 */ 1345 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 1346 PerlIO_clearerr(PL_parser->rsfp); 1347 else if (PL_parser->rsfp) 1348 (void)PerlIO_close(PL_parser->rsfp); 1349 PL_parser->rsfp = NULL; 1350 PL_parser->in_pod = PL_parser->filtered = 0; 1351 #ifdef PERL_MAD 1352 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n)) 1353 PL_faketokens = 1; 1354 #endif 1355 if (!PL_in_eval && PL_minus_p) { 1356 sv_catpvs(linestr, 1357 /*{*/";}continue{print or die qq(-p destination: $!\\n);}"); 1358 PL_minus_n = PL_minus_p = 0; 1359 } else if (!PL_in_eval && PL_minus_n) { 1360 sv_catpvs(linestr, /*{*/";}"); 1361 PL_minus_n = 0; 1362 } else 1363 sv_catpvs(linestr, ";"); 1364 got_some = 1; 1365 } 1366 buf = SvPVX(linestr); 1367 new_bufend_pos = SvCUR(linestr); 1368 PL_parser->bufend = buf + new_bufend_pos; 1369 PL_parser->bufptr = buf + bufptr_pos; 1370 PL_parser->oldbufptr = buf + oldbufptr_pos; 1371 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 1372 PL_parser->linestart = buf + linestart_pos; 1373 if (PL_parser->last_uni) 1374 PL_parser->last_uni = buf + last_uni_pos; 1375 if (PL_parser->last_lop) 1376 PL_parser->last_lop = buf + last_lop_pos; 1377 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) && 1378 PL_curstash != PL_debstash) { 1379 /* debugger active and we're not compiling the debugger code, 1380 * so store the line into the debugger's array of lines 1381 */ 1382 update_debugger_info(NULL, buf+old_bufend_pos, 1383 new_bufend_pos-old_bufend_pos); 1384 } 1385 return got_some; 1386 } 1387 1388 /* 1389 =for apidoc Amx|I32|lex_peek_unichar|U32 flags 1390 1391 Looks ahead one (Unicode) character in the text currently being lexed. 1392 Returns the codepoint (unsigned integer value) of the next character, 1393 or -1 if lexing has reached the end of the input text. To consume the 1394 peeked character, use L</lex_read_unichar>. 1395 1396 If the next character is in (or extends into) the next chunk of input 1397 text, the next chunk will be read in. Normally the current chunk will be 1398 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> 1399 then the current chunk will not be discarded. 1400 1401 If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1402 is encountered, an exception is generated. 1403 1404 =cut 1405 */ 1406 1407 I32 1408 Perl_lex_peek_unichar(pTHX_ U32 flags) 1409 { 1410 dVAR; 1411 char *s, *bufend; 1412 if (flags & ~(LEX_KEEP_PREVIOUS)) 1413 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar"); 1414 s = PL_parser->bufptr; 1415 bufend = PL_parser->bufend; 1416 if (UTF) { 1417 U8 head; 1418 I32 unichar; 1419 STRLEN len, retlen; 1420 if (s == bufend) { 1421 if (!lex_next_chunk(flags)) 1422 return -1; 1423 s = PL_parser->bufptr; 1424 bufend = PL_parser->bufend; 1425 } 1426 head = (U8)*s; 1427 if (UTF8_IS_INVARIANT(head)) 1428 return head; 1429 if (UTF8_IS_START(head)) { 1430 len = UTF8SKIP(&head); 1431 while ((STRLEN)(bufend-s) < len) { 1432 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS)) 1433 break; 1434 s = PL_parser->bufptr; 1435 bufend = PL_parser->bufend; 1436 } 1437 } 1438 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY); 1439 if (retlen == (STRLEN)-1) { 1440 /* malformed UTF-8 */ 1441 ENTER; 1442 SAVESPTR(PL_warnhook); 1443 PL_warnhook = PERL_WARNHOOK_FATAL; 1444 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0); 1445 LEAVE; 1446 } 1447 return unichar; 1448 } else { 1449 if (s == bufend) { 1450 if (!lex_next_chunk(flags)) 1451 return -1; 1452 s = PL_parser->bufptr; 1453 } 1454 return (U8)*s; 1455 } 1456 } 1457 1458 /* 1459 =for apidoc Amx|I32|lex_read_unichar|U32 flags 1460 1461 Reads the next (Unicode) character in the text currently being lexed. 1462 Returns the codepoint (unsigned integer value) of the character read, 1463 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1 1464 if lexing has reached the end of the input text. To non-destructively 1465 examine the next character, use L</lex_peek_unichar> instead. 1466 1467 If the next character is in (or extends into) the next chunk of input 1468 text, the next chunk will be read in. Normally the current chunk will be 1469 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> 1470 then the current chunk will not be discarded. 1471 1472 If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1473 is encountered, an exception is generated. 1474 1475 =cut 1476 */ 1477 1478 I32 1479 Perl_lex_read_unichar(pTHX_ U32 flags) 1480 { 1481 I32 c; 1482 if (flags & ~(LEX_KEEP_PREVIOUS)) 1483 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar"); 1484 c = lex_peek_unichar(flags); 1485 if (c != -1) { 1486 if (c == '\n') 1487 COPLINE_INC_WITH_HERELINES; 1488 if (UTF) 1489 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr); 1490 else 1491 ++(PL_parser->bufptr); 1492 } 1493 return c; 1494 } 1495 1496 /* 1497 =for apidoc Amx|void|lex_read_space|U32 flags 1498 1499 Reads optional spaces, in Perl style, in the text currently being 1500 lexed. The spaces may include ordinary whitespace characters and 1501 Perl-style comments. C<#line> directives are processed if encountered. 1502 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points 1503 at a non-space character (or the end of the input text). 1504 1505 If spaces extend into the next chunk of input text, the next chunk will 1506 be read in. Normally the current chunk will be discarded at the same 1507 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current 1508 chunk will not be discarded. 1509 1510 =cut 1511 */ 1512 1513 #define LEX_NO_NEXT_CHUNK 0x80000000 1514 1515 void 1516 Perl_lex_read_space(pTHX_ U32 flags) 1517 { 1518 char *s, *bufend; 1519 bool need_incline = 0; 1520 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK)) 1521 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space"); 1522 #ifdef PERL_MAD 1523 if (PL_skipwhite) { 1524 sv_free(PL_skipwhite); 1525 PL_skipwhite = NULL; 1526 } 1527 if (PL_madskills) 1528 PL_skipwhite = newSVpvs(""); 1529 #endif /* PERL_MAD */ 1530 s = PL_parser->bufptr; 1531 bufend = PL_parser->bufend; 1532 while (1) { 1533 char c = *s; 1534 if (c == '#') { 1535 do { 1536 c = *++s; 1537 } while (!(c == '\n' || (c == 0 && s == bufend))); 1538 } else if (c == '\n') { 1539 s++; 1540 PL_parser->linestart = s; 1541 if (s == bufend) 1542 need_incline = 1; 1543 else 1544 incline(s); 1545 } else if (isSPACE(c)) { 1546 s++; 1547 } else if (c == 0 && s == bufend) { 1548 bool got_more; 1549 #ifdef PERL_MAD 1550 if (PL_madskills) 1551 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr); 1552 #endif /* PERL_MAD */ 1553 if (flags & LEX_NO_NEXT_CHUNK) 1554 break; 1555 PL_parser->bufptr = s; 1556 COPLINE_INC_WITH_HERELINES; 1557 got_more = lex_next_chunk(flags); 1558 CopLINE_dec(PL_curcop); 1559 s = PL_parser->bufptr; 1560 bufend = PL_parser->bufend; 1561 if (!got_more) 1562 break; 1563 if (need_incline && PL_parser->rsfp) { 1564 incline(s); 1565 need_incline = 0; 1566 } 1567 } else { 1568 break; 1569 } 1570 } 1571 #ifdef PERL_MAD 1572 if (PL_madskills) 1573 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr); 1574 #endif /* PERL_MAD */ 1575 PL_parser->bufptr = s; 1576 } 1577 1578 /* 1579 * S_incline 1580 * This subroutine has nothing to do with tilting, whether at windmills 1581 * or pinball tables. Its name is short for "increment line". It 1582 * increments the current line number in CopLINE(PL_curcop) and checks 1583 * to see whether the line starts with a comment of the form 1584 * # line 500 "foo.pm" 1585 * If so, it sets the current line number and file to the values in the comment. 1586 */ 1587 1588 STATIC void 1589 S_incline(pTHX_ const char *s) 1590 { 1591 dVAR; 1592 const char *t; 1593 const char *n; 1594 const char *e; 1595 line_t line_num; 1596 1597 PERL_ARGS_ASSERT_INCLINE; 1598 1599 COPLINE_INC_WITH_HERELINES; 1600 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL 1601 && s+1 == PL_bufend && *s == ';') { 1602 /* fake newline in string eval */ 1603 CopLINE_dec(PL_curcop); 1604 return; 1605 } 1606 if (*s++ != '#') 1607 return; 1608 while (SPACE_OR_TAB(*s)) 1609 s++; 1610 if (strnEQ(s, "line", 4)) 1611 s += 4; 1612 else 1613 return; 1614 if (SPACE_OR_TAB(*s)) 1615 s++; 1616 else 1617 return; 1618 while (SPACE_OR_TAB(*s)) 1619 s++; 1620 if (!isDIGIT(*s)) 1621 return; 1622 1623 n = s; 1624 while (isDIGIT(*s)) 1625 s++; 1626 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0') 1627 return; 1628 while (SPACE_OR_TAB(*s)) 1629 s++; 1630 if (*s == '"' && (t = strchr(s+1, '"'))) { 1631 s++; 1632 e = t + 1; 1633 } 1634 else { 1635 t = s; 1636 while (!isSPACE(*t)) 1637 t++; 1638 e = t; 1639 } 1640 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f') 1641 e++; 1642 if (*e != '\n' && *e != '\0') 1643 return; /* false alarm */ 1644 1645 line_num = atoi(n)-1; 1646 1647 if (t - s > 0) { 1648 const STRLEN len = t - s; 1649 SV *const temp_sv = CopFILESV(PL_curcop); 1650 const char *cf; 1651 STRLEN tmplen; 1652 1653 if (temp_sv) { 1654 cf = SvPVX(temp_sv); 1655 tmplen = SvCUR(temp_sv); 1656 } else { 1657 cf = NULL; 1658 tmplen = 0; 1659 } 1660 1661 if (!PL_rsfp && !PL_parser->filtered) { 1662 /* must copy *{"::_<(eval N)[oldfilename:L]"} 1663 * to *{"::_<newfilename"} */ 1664 /* However, the long form of evals is only turned on by the 1665 debugger - usually they're "(eval %lu)" */ 1666 char smallbuf[128]; 1667 char *tmpbuf; 1668 GV **gvp; 1669 STRLEN tmplen2 = len; 1670 if (tmplen + 2 <= sizeof smallbuf) 1671 tmpbuf = smallbuf; 1672 else 1673 Newx(tmpbuf, tmplen + 2, char); 1674 tmpbuf[0] = '_'; 1675 tmpbuf[1] = '<'; 1676 memcpy(tmpbuf + 2, cf, tmplen); 1677 tmplen += 2; 1678 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE); 1679 if (gvp) { 1680 char *tmpbuf2; 1681 GV *gv2; 1682 1683 if (tmplen2 + 2 <= sizeof smallbuf) 1684 tmpbuf2 = smallbuf; 1685 else 1686 Newx(tmpbuf2, tmplen2 + 2, char); 1687 1688 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) { 1689 /* Either they malloc'd it, or we malloc'd it, 1690 so no prefix is present in ours. */ 1691 tmpbuf2[0] = '_'; 1692 tmpbuf2[1] = '<'; 1693 } 1694 1695 memcpy(tmpbuf2 + 2, s, tmplen2); 1696 tmplen2 += 2; 1697 1698 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE); 1699 if (!isGV(gv2)) { 1700 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE); 1701 /* adjust ${"::_<newfilename"} to store the new file name */ 1702 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2); 1703 /* The line number may differ. If that is the case, 1704 alias the saved lines that are in the array. 1705 Otherwise alias the whole array. */ 1706 if (CopLINE(PL_curcop) == line_num) { 1707 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp))); 1708 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp))); 1709 } 1710 else if (GvAV(*gvp)) { 1711 AV * const av = GvAV(*gvp); 1712 const I32 start = CopLINE(PL_curcop)+1; 1713 I32 items = AvFILLp(av) - start; 1714 if (items > 0) { 1715 AV * const av2 = GvAVn(gv2); 1716 SV **svp = AvARRAY(av) + start; 1717 I32 l = (I32)line_num+1; 1718 while (items--) 1719 av_store(av2, l++, SvREFCNT_inc(*svp++)); 1720 } 1721 } 1722 } 1723 1724 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2); 1725 } 1726 if (tmpbuf != smallbuf) Safefree(tmpbuf); 1727 } 1728 CopFILE_free(PL_curcop); 1729 CopFILE_setn(PL_curcop, s, len); 1730 } 1731 CopLINE_set(PL_curcop, line_num); 1732 } 1733 1734 #ifdef PERL_MAD 1735 /* skip space before PL_thistoken */ 1736 1737 STATIC char * 1738 S_skipspace0(pTHX_ char *s) 1739 { 1740 PERL_ARGS_ASSERT_SKIPSPACE0; 1741 1742 s = skipspace(s); 1743 if (!PL_madskills) 1744 return s; 1745 if (PL_skipwhite) { 1746 if (!PL_thiswhite) 1747 PL_thiswhite = newSVpvs(""); 1748 sv_catsv(PL_thiswhite, PL_skipwhite); 1749 sv_free(PL_skipwhite); 1750 PL_skipwhite = 0; 1751 } 1752 PL_realtokenstart = s - SvPVX(PL_linestr); 1753 return s; 1754 } 1755 1756 /* skip space after PL_thistoken */ 1757 1758 STATIC char * 1759 S_skipspace1(pTHX_ char *s) 1760 { 1761 const char *start = s; 1762 I32 startoff = start - SvPVX(PL_linestr); 1763 1764 PERL_ARGS_ASSERT_SKIPSPACE1; 1765 1766 s = skipspace(s); 1767 if (!PL_madskills) 1768 return s; 1769 start = SvPVX(PL_linestr) + startoff; 1770 if (!PL_thistoken && PL_realtokenstart >= 0) { 1771 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart; 1772 PL_thistoken = newSVpvn(tstart, start - tstart); 1773 } 1774 PL_realtokenstart = -1; 1775 if (PL_skipwhite) { 1776 if (!PL_nextwhite) 1777 PL_nextwhite = newSVpvs(""); 1778 sv_catsv(PL_nextwhite, PL_skipwhite); 1779 sv_free(PL_skipwhite); 1780 PL_skipwhite = 0; 1781 } 1782 return s; 1783 } 1784 1785 STATIC char * 1786 S_skipspace2(pTHX_ char *s, SV **svp) 1787 { 1788 char *start; 1789 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr); 1790 const I32 startoff = s - SvPVX(PL_linestr); 1791 1792 PERL_ARGS_ASSERT_SKIPSPACE2; 1793 1794 s = skipspace(s); 1795 PL_bufptr = SvPVX(PL_linestr) + bufptroff; 1796 if (!PL_madskills || !svp) 1797 return s; 1798 start = SvPVX(PL_linestr) + startoff; 1799 if (!PL_thistoken && PL_realtokenstart >= 0) { 1800 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart; 1801 PL_thistoken = newSVpvn(tstart, start - tstart); 1802 PL_realtokenstart = -1; 1803 } 1804 if (PL_skipwhite) { 1805 if (!*svp) 1806 *svp = newSVpvs(""); 1807 sv_setsv(*svp, PL_skipwhite); 1808 sv_free(PL_skipwhite); 1809 PL_skipwhite = 0; 1810 } 1811 1812 return s; 1813 } 1814 #endif 1815 1816 STATIC void 1817 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) 1818 { 1819 AV *av = CopFILEAVx(PL_curcop); 1820 if (av) { 1821 SV * const sv = newSV_type(SVt_PVMG); 1822 if (orig_sv) 1823 sv_setsv(sv, orig_sv); 1824 else 1825 sv_setpvn(sv, buf, len); 1826 (void)SvIOK_on(sv); 1827 SvIV_set(sv, 0); 1828 av_store(av, (I32)CopLINE(PL_curcop), sv); 1829 } 1830 } 1831 1832 /* 1833 * S_skipspace 1834 * Called to gobble the appropriate amount and type of whitespace. 1835 * Skips comments as well. 1836 */ 1837 1838 STATIC char * 1839 S_skipspace(pTHX_ char *s) 1840 { 1841 #ifdef PERL_MAD 1842 char *start = s; 1843 #endif /* PERL_MAD */ 1844 PERL_ARGS_ASSERT_SKIPSPACE; 1845 #ifdef PERL_MAD 1846 if (PL_skipwhite) { 1847 sv_free(PL_skipwhite); 1848 PL_skipwhite = NULL; 1849 } 1850 #endif /* PERL_MAD */ 1851 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 1852 while (s < PL_bufend && SPACE_OR_TAB(*s)) 1853 s++; 1854 } else { 1855 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr); 1856 PL_bufptr = s; 1857 lex_read_space(LEX_KEEP_PREVIOUS | 1858 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ? 1859 LEX_NO_NEXT_CHUNK : 0)); 1860 s = PL_bufptr; 1861 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos; 1862 if (PL_linestart > PL_bufptr) 1863 PL_bufptr = PL_linestart; 1864 return s; 1865 } 1866 #ifdef PERL_MAD 1867 if (PL_madskills) 1868 PL_skipwhite = newSVpvn(start, s-start); 1869 #endif /* PERL_MAD */ 1870 return s; 1871 } 1872 1873 /* 1874 * S_check_uni 1875 * Check the unary operators to ensure there's no ambiguity in how they're 1876 * used. An ambiguous piece of code would be: 1877 * rand + 5 1878 * This doesn't mean rand() + 5. Because rand() is a unary operator, 1879 * the +5 is its argument. 1880 */ 1881 1882 STATIC void 1883 S_check_uni(pTHX) 1884 { 1885 dVAR; 1886 const char *s; 1887 const char *t; 1888 1889 if (PL_oldoldbufptr != PL_last_uni) 1890 return; 1891 while (isSPACE(*PL_last_uni)) 1892 PL_last_uni++; 1893 s = PL_last_uni; 1894 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-') 1895 s++; 1896 if ((t = strchr(s, '(')) && t < PL_bufptr) 1897 return; 1898 1899 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 1900 "Warning: Use of \"%.*s\" without parentheses is ambiguous", 1901 (int)(s - PL_last_uni), PL_last_uni); 1902 } 1903 1904 /* 1905 * LOP : macro to build a list operator. Its behaviour has been replaced 1906 * with a subroutine, S_lop() for which LOP is just another name. 1907 */ 1908 1909 #define LOP(f,x) return lop(f,x,s) 1910 1911 /* 1912 * S_lop 1913 * Build a list operator (or something that might be one). The rules: 1914 * - if we have a next token, then it's a list operator [why?] 1915 * - if the next thing is an opening paren, then it's a function 1916 * - else it's a list operator 1917 */ 1918 1919 STATIC I32 1920 S_lop(pTHX_ I32 f, int x, char *s) 1921 { 1922 dVAR; 1923 1924 PERL_ARGS_ASSERT_LOP; 1925 1926 pl_yylval.ival = f; 1927 CLINE; 1928 PL_expect = x; 1929 PL_bufptr = s; 1930 PL_last_lop = PL_oldbufptr; 1931 PL_last_lop_op = (OPCODE)f; 1932 #ifdef PERL_MAD 1933 if (PL_lasttoke) 1934 goto lstop; 1935 #else 1936 if (PL_nexttoke) 1937 goto lstop; 1938 #endif 1939 if (*s == '(') 1940 return REPORT(FUNC); 1941 s = PEEKSPACE(s); 1942 if (*s == '(') 1943 return REPORT(FUNC); 1944 else { 1945 lstop: 1946 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 1947 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 1948 return REPORT(LSTOP); 1949 } 1950 } 1951 1952 #ifdef PERL_MAD 1953 /* 1954 * S_start_force 1955 * Sets up for an eventual force_next(). start_force(0) basically does 1956 * an unshift, while start_force(-1) does a push. yylex removes items 1957 * on the "pop" end. 1958 */ 1959 1960 STATIC void 1961 S_start_force(pTHX_ int where) 1962 { 1963 int i; 1964 1965 if (where < 0) /* so people can duplicate start_force(PL_curforce) */ 1966 where = PL_lasttoke; 1967 assert(PL_curforce < 0 || PL_curforce == where); 1968 if (PL_curforce != where) { 1969 for (i = PL_lasttoke; i > where; --i) { 1970 PL_nexttoke[i] = PL_nexttoke[i-1]; 1971 } 1972 PL_lasttoke++; 1973 } 1974 if (PL_curforce < 0) /* in case of duplicate start_force() */ 1975 Zero(&PL_nexttoke[where], 1, NEXTTOKE); 1976 PL_curforce = where; 1977 if (PL_nextwhite) { 1978 if (PL_madskills) 1979 curmad('^', newSVpvs("")); 1980 CURMAD('_', PL_nextwhite); 1981 } 1982 } 1983 1984 STATIC void 1985 S_curmad(pTHX_ char slot, SV *sv) 1986 { 1987 MADPROP **where; 1988 1989 if (!sv) 1990 return; 1991 if (PL_curforce < 0) 1992 where = &PL_thismad; 1993 else 1994 where = &PL_nexttoke[PL_curforce].next_mad; 1995 1996 if (PL_faketokens) 1997 sv_setpvs(sv, ""); 1998 else { 1999 if (!IN_BYTES) { 2000 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) 2001 SvUTF8_on(sv); 2002 else if (PL_encoding) { 2003 sv_recode_to_utf8(sv, PL_encoding); 2004 } 2005 } 2006 } 2007 2008 /* keep a slot open for the head of the list? */ 2009 if (slot != '_' && *where && (*where)->mad_key == '^') { 2010 (*where)->mad_key = slot; 2011 sv_free(MUTABLE_SV(((*where)->mad_val))); 2012 (*where)->mad_val = (void*)sv; 2013 } 2014 else 2015 addmad(newMADsv(slot, sv), where, 0); 2016 } 2017 #else 2018 # define start_force(where) NOOP 2019 # define curmad(slot, sv) NOOP 2020 #endif 2021 2022 /* 2023 * S_force_next 2024 * When the lexer realizes it knows the next token (for instance, 2025 * it is reordering tokens for the parser) then it can call S_force_next 2026 * to know what token to return the next time the lexer is called. Caller 2027 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD), 2028 * and possibly PL_expect to ensure the lexer handles the token correctly. 2029 */ 2030 2031 STATIC void 2032 S_force_next(pTHX_ I32 type) 2033 { 2034 dVAR; 2035 #ifdef DEBUGGING 2036 if (DEBUG_T_TEST) { 2037 PerlIO_printf(Perl_debug_log, "### forced token:\n"); 2038 tokereport(type, &NEXTVAL_NEXTTOKE); 2039 } 2040 #endif 2041 #ifdef PERL_MAD 2042 if (PL_curforce < 0) 2043 start_force(PL_lasttoke); 2044 PL_nexttoke[PL_curforce].next_type = type; 2045 if (PL_lex_state != LEX_KNOWNEXT) 2046 PL_lex_defer = PL_lex_state; 2047 PL_lex_state = LEX_KNOWNEXT; 2048 PL_lex_expect = PL_expect; 2049 PL_curforce = -1; 2050 #else 2051 PL_nexttype[PL_nexttoke] = type; 2052 PL_nexttoke++; 2053 if (PL_lex_state != LEX_KNOWNEXT) { 2054 PL_lex_defer = PL_lex_state; 2055 PL_lex_expect = PL_expect; 2056 PL_lex_state = LEX_KNOWNEXT; 2057 } 2058 #endif 2059 } 2060 2061 void 2062 Perl_yyunlex(pTHX) 2063 { 2064 int yyc = PL_parser->yychar; 2065 if (yyc != YYEMPTY) { 2066 if (yyc) { 2067 start_force(-1); 2068 NEXTVAL_NEXTTOKE = PL_parser->yylval; 2069 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) { 2070 PL_lex_allbrackets--; 2071 PL_lex_brackets--; 2072 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16); 2073 } else if (yyc == '('/*)*/) { 2074 PL_lex_allbrackets--; 2075 yyc |= (2<<24); 2076 } 2077 force_next(yyc); 2078 } 2079 PL_parser->yychar = YYEMPTY; 2080 } 2081 } 2082 2083 STATIC SV * 2084 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) 2085 { 2086 dVAR; 2087 SV * const sv = newSVpvn_utf8(start, len, 2088 !IN_BYTES 2089 && UTF 2090 && !is_ascii_string((const U8*)start, len) 2091 && is_utf8_string((const U8*)start, len)); 2092 return sv; 2093 } 2094 2095 /* 2096 * S_force_word 2097 * When the lexer knows the next thing is a word (for instance, it has 2098 * just seen -> and it knows that the next char is a word char, then 2099 * it calls S_force_word to stick the next word into the PL_nexttoke/val 2100 * lookahead. 2101 * 2102 * Arguments: 2103 * char *start : buffer position (must be within PL_linestr) 2104 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD) 2105 * int check_keyword : if true, Perl checks to make sure the word isn't 2106 * a keyword (do this if the word is a label, e.g. goto FOO) 2107 * int allow_pack : if true, : characters will also be allowed (require, 2108 * use, etc. do this) 2109 * int allow_initial_tick : used by the "sub" lexer only. 2110 */ 2111 2112 STATIC char * 2113 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick) 2114 { 2115 dVAR; 2116 char *s; 2117 STRLEN len; 2118 2119 PERL_ARGS_ASSERT_FORCE_WORD; 2120 2121 start = SKIPSPACE1(start); 2122 s = start; 2123 if (isIDFIRST_lazy_if(s,UTF) || 2124 (allow_pack && *s == ':') || 2125 (allow_initial_tick && *s == '\'') ) 2126 { 2127 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len); 2128 if (check_keyword && keyword(PL_tokenbuf, len, 0)) 2129 return start; 2130 start_force(PL_curforce); 2131 if (PL_madskills) 2132 curmad('X', newSVpvn(start,s-start)); 2133 if (token == METHOD) { 2134 s = SKIPSPACE1(s); 2135 if (*s == '(') 2136 PL_expect = XTERM; 2137 else { 2138 PL_expect = XOPERATOR; 2139 } 2140 } 2141 if (PL_madskills) 2142 curmad('g', newSVpvs( "forced" )); 2143 NEXTVAL_NEXTTOKE.opval 2144 = (OP*)newSVOP(OP_CONST,0, 2145 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 2146 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 2147 force_next(token); 2148 } 2149 return s; 2150 } 2151 2152 /* 2153 * S_force_ident 2154 * Called when the lexer wants $foo *foo &foo etc, but the program 2155 * text only contains the "foo" portion. The first argument is a pointer 2156 * to the "foo", and the second argument is the type symbol to prefix. 2157 * Forces the next token to be a "WORD". 2158 * Creates the symbol if it didn't already exist (via gv_fetchpv()). 2159 */ 2160 2161 STATIC void 2162 S_force_ident(pTHX_ const char *s, int kind) 2163 { 2164 dVAR; 2165 2166 PERL_ARGS_ASSERT_FORCE_IDENT; 2167 2168 if (s[0]) { 2169 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */ 2170 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len, 2171 UTF ? SVf_UTF8 : 0)); 2172 start_force(PL_curforce); 2173 NEXTVAL_NEXTTOKE.opval = o; 2174 force_next(WORD); 2175 if (kind) { 2176 o->op_private = OPpCONST_ENTERED; 2177 /* XXX see note in pp_entereval() for why we forgo typo 2178 warnings if the symbol must be introduced in an eval. 2179 GSAR 96-10-12 */ 2180 gv_fetchpvn_flags(s, len, 2181 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) 2182 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), 2183 kind == '$' ? SVt_PV : 2184 kind == '@' ? SVt_PVAV : 2185 kind == '%' ? SVt_PVHV : 2186 SVt_PVGV 2187 ); 2188 } 2189 } 2190 } 2191 2192 static void 2193 S_force_ident_maybe_lex(pTHX_ char pit) 2194 { 2195 start_force(PL_curforce); 2196 NEXTVAL_NEXTTOKE.ival = pit; 2197 force_next('p'); 2198 } 2199 2200 NV 2201 Perl_str_to_version(pTHX_ SV *sv) 2202 { 2203 NV retval = 0.0; 2204 NV nshift = 1.0; 2205 STRLEN len; 2206 const char *start = SvPV_const(sv,len); 2207 const char * const end = start + len; 2208 const bool utf = SvUTF8(sv) ? TRUE : FALSE; 2209 2210 PERL_ARGS_ASSERT_STR_TO_VERSION; 2211 2212 while (start < end) { 2213 STRLEN skip; 2214 UV n; 2215 if (utf) 2216 n = utf8n_to_uvchr((U8*)start, len, &skip, 0); 2217 else { 2218 n = *(U8*)start; 2219 skip = 1; 2220 } 2221 retval += ((NV)n)/nshift; 2222 start += skip; 2223 nshift *= 1000; 2224 } 2225 return retval; 2226 } 2227 2228 /* 2229 * S_force_version 2230 * Forces the next token to be a version number. 2231 * If the next token appears to be an invalid version number, (e.g. "v2b"), 2232 * and if "guessing" is TRUE, then no new token is created (and the caller 2233 * must use an alternative parsing method). 2234 */ 2235 2236 STATIC char * 2237 S_force_version(pTHX_ char *s, int guessing) 2238 { 2239 dVAR; 2240 OP *version = NULL; 2241 char *d; 2242 #ifdef PERL_MAD 2243 I32 startoff = s - SvPVX(PL_linestr); 2244 #endif 2245 2246 PERL_ARGS_ASSERT_FORCE_VERSION; 2247 2248 s = SKIPSPACE1(s); 2249 2250 d = s; 2251 if (*d == 'v') 2252 d++; 2253 if (isDIGIT(*d)) { 2254 while (isDIGIT(*d) || *d == '_' || *d == '.') 2255 d++; 2256 #ifdef PERL_MAD 2257 if (PL_madskills) { 2258 start_force(PL_curforce); 2259 curmad('X', newSVpvn(s,d-s)); 2260 } 2261 #endif 2262 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) { 2263 SV *ver; 2264 #ifdef USE_LOCALE_NUMERIC 2265 char *loc = savepv(setlocale(LC_NUMERIC, NULL)); 2266 setlocale(LC_NUMERIC, "C"); 2267 #endif 2268 s = scan_num(s, &pl_yylval); 2269 #ifdef USE_LOCALE_NUMERIC 2270 setlocale(LC_NUMERIC, loc); 2271 Safefree(loc); 2272 #endif 2273 version = pl_yylval.opval; 2274 ver = cSVOPx(version)->op_sv; 2275 if (SvPOK(ver) && !SvNIOK(ver)) { 2276 SvUPGRADE(ver, SVt_PVNV); 2277 SvNV_set(ver, str_to_version(ver)); 2278 SvNOK_on(ver); /* hint that it is a version */ 2279 } 2280 } 2281 else if (guessing) { 2282 #ifdef PERL_MAD 2283 if (PL_madskills) { 2284 sv_free(PL_nextwhite); /* let next token collect whitespace */ 2285 PL_nextwhite = 0; 2286 s = SvPVX(PL_linestr) + startoff; 2287 } 2288 #endif 2289 return s; 2290 } 2291 } 2292 2293 #ifdef PERL_MAD 2294 if (PL_madskills && !version) { 2295 sv_free(PL_nextwhite); /* let next token collect whitespace */ 2296 PL_nextwhite = 0; 2297 s = SvPVX(PL_linestr) + startoff; 2298 } 2299 #endif 2300 /* NOTE: The parser sees the package name and the VERSION swapped */ 2301 start_force(PL_curforce); 2302 NEXTVAL_NEXTTOKE.opval = version; 2303 force_next(WORD); 2304 2305 return s; 2306 } 2307 2308 /* 2309 * S_force_strict_version 2310 * Forces the next token to be a version number using strict syntax rules. 2311 */ 2312 2313 STATIC char * 2314 S_force_strict_version(pTHX_ char *s) 2315 { 2316 dVAR; 2317 OP *version = NULL; 2318 #ifdef PERL_MAD 2319 I32 startoff = s - SvPVX(PL_linestr); 2320 #endif 2321 const char *errstr = NULL; 2322 2323 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION; 2324 2325 while (isSPACE(*s)) /* leading whitespace */ 2326 s++; 2327 2328 if (is_STRICT_VERSION(s,&errstr)) { 2329 SV *ver = newSV(0); 2330 s = (char *)scan_version(s, ver, 0); 2331 version = newSVOP(OP_CONST, 0, ver); 2332 } 2333 else if ( (*s != ';' && *s != '{' && *s != '}' ) && 2334 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' ))) 2335 { 2336 PL_bufptr = s; 2337 if (errstr) 2338 yyerror(errstr); /* version required */ 2339 return s; 2340 } 2341 2342 #ifdef PERL_MAD 2343 if (PL_madskills && !version) { 2344 sv_free(PL_nextwhite); /* let next token collect whitespace */ 2345 PL_nextwhite = 0; 2346 s = SvPVX(PL_linestr) + startoff; 2347 } 2348 #endif 2349 /* NOTE: The parser sees the package name and the VERSION swapped */ 2350 start_force(PL_curforce); 2351 NEXTVAL_NEXTTOKE.opval = version; 2352 force_next(WORD); 2353 2354 return s; 2355 } 2356 2357 /* 2358 * S_tokeq 2359 * Tokenize a quoted string passed in as an SV. It finds the next 2360 * chunk, up to end of string or a backslash. It may make a new 2361 * SV containing that chunk (if HINT_NEW_STRING is on). It also 2362 * turns \\ into \. 2363 */ 2364 2365 STATIC SV * 2366 S_tokeq(pTHX_ SV *sv) 2367 { 2368 dVAR; 2369 char *s; 2370 char *send; 2371 char *d; 2372 STRLEN len = 0; 2373 SV *pv = sv; 2374 2375 PERL_ARGS_ASSERT_TOKEQ; 2376 2377 if (!SvLEN(sv)) 2378 goto finish; 2379 2380 s = SvPV_force(sv, len); 2381 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) 2382 goto finish; 2383 send = s + len; 2384 /* This is relying on the SV being "well formed" with a trailing '\0' */ 2385 while (s < send && !(*s == '\\' && s[1] == '\\')) 2386 s++; 2387 if (s == send) 2388 goto finish; 2389 d = s; 2390 if ( PL_hints & HINT_NEW_STRING ) { 2391 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv)); 2392 } 2393 while (s < send) { 2394 if (*s == '\\') { 2395 if (s + 1 < send && (s[1] == '\\')) 2396 s++; /* all that, just for this */ 2397 } 2398 *d++ = *s++; 2399 } 2400 *d = '\0'; 2401 SvCUR_set(sv, d - SvPVX_const(sv)); 2402 finish: 2403 if ( PL_hints & HINT_NEW_STRING ) 2404 return new_constant(NULL, 0, "q", sv, pv, "q", 1); 2405 return sv; 2406 } 2407 2408 /* 2409 * Now come three functions related to double-quote context, 2410 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when 2411 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They 2412 * interact with PL_lex_state, and create fake ( ... ) argument lists 2413 * to handle functions and concatenation. 2414 * For example, 2415 * "foo\lbar" 2416 * is tokenised as 2417 * stringify ( const[foo] concat lcfirst ( const[bar] ) ) 2418 */ 2419 2420 /* 2421 * S_sublex_start 2422 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST). 2423 * 2424 * Pattern matching will set PL_lex_op to the pattern-matching op to 2425 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise). 2426 * 2427 * OP_CONST and OP_READLINE are easy--just make the new op and return. 2428 * 2429 * Everything else becomes a FUNC. 2430 * 2431 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we 2432 * had an OP_CONST or OP_READLINE). This just sets us up for a 2433 * call to S_sublex_push(). 2434 */ 2435 2436 STATIC I32 2437 S_sublex_start(pTHX) 2438 { 2439 dVAR; 2440 const I32 op_type = pl_yylval.ival; 2441 2442 if (op_type == OP_NULL) { 2443 pl_yylval.opval = PL_lex_op; 2444 PL_lex_op = NULL; 2445 return THING; 2446 } 2447 if (op_type == OP_CONST || op_type == OP_READLINE) { 2448 SV *sv = tokeq(PL_lex_stuff); 2449 2450 if (SvTYPE(sv) == SVt_PVIV) { 2451 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */ 2452 STRLEN len; 2453 const char * const p = SvPV_const(sv, len); 2454 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv)); 2455 SvREFCNT_dec(sv); 2456 sv = nsv; 2457 } 2458 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv); 2459 PL_lex_stuff = NULL; 2460 /* Allow <FH> // "foo" */ 2461 if (op_type == OP_READLINE) 2462 PL_expect = XTERMORDORDOR; 2463 return THING; 2464 } 2465 else if (op_type == OP_BACKTICK && PL_lex_op) { 2466 /* readpipe() vas overriden */ 2467 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff); 2468 pl_yylval.opval = PL_lex_op; 2469 PL_lex_op = NULL; 2470 PL_lex_stuff = NULL; 2471 return THING; 2472 } 2473 2474 PL_sublex_info.super_state = PL_lex_state; 2475 PL_sublex_info.sub_inwhat = (U16)op_type; 2476 PL_sublex_info.sub_op = PL_lex_op; 2477 PL_lex_state = LEX_INTERPPUSH; 2478 2479 PL_expect = XTERM; 2480 if (PL_lex_op) { 2481 pl_yylval.opval = PL_lex_op; 2482 PL_lex_op = NULL; 2483 return PMFUNC; 2484 } 2485 else 2486 return FUNC; 2487 } 2488 2489 /* 2490 * S_sublex_push 2491 * Create a new scope to save the lexing state. The scope will be 2492 * ended in S_sublex_done. Returns a '(', starting the function arguments 2493 * to the uc, lc, etc. found before. 2494 * Sets PL_lex_state to LEX_INTERPCONCAT. 2495 */ 2496 2497 STATIC I32 2498 S_sublex_push(pTHX) 2499 { 2500 dVAR; 2501 LEXSHARED *shared; 2502 ENTER; 2503 2504 PL_lex_state = PL_sublex_info.super_state; 2505 SAVEBOOL(PL_lex_dojoin); 2506 SAVEI32(PL_lex_brackets); 2507 SAVEI32(PL_lex_allbrackets); 2508 SAVEI32(PL_lex_formbrack); 2509 SAVEI8(PL_lex_fakeeof); 2510 SAVEI32(PL_lex_casemods); 2511 SAVEI32(PL_lex_starts); 2512 SAVEI8(PL_lex_state); 2513 SAVESPTR(PL_lex_repl); 2514 SAVEVPTR(PL_lex_inpat); 2515 SAVEI16(PL_lex_inwhat); 2516 SAVECOPLINE(PL_curcop); 2517 SAVEPPTR(PL_bufptr); 2518 SAVEPPTR(PL_bufend); 2519 SAVEPPTR(PL_oldbufptr); 2520 SAVEPPTR(PL_oldoldbufptr); 2521 SAVEPPTR(PL_last_lop); 2522 SAVEPPTR(PL_last_uni); 2523 SAVEPPTR(PL_linestart); 2524 SAVESPTR(PL_linestr); 2525 SAVEGENERICPV(PL_lex_brackstack); 2526 SAVEGENERICPV(PL_lex_casestack); 2527 SAVEGENERICPV(PL_parser->lex_shared); 2528 SAVEBOOL(PL_parser->lex_re_reparsing); 2529 2530 /* The here-doc parser needs to be able to peek into outer lexing 2531 scopes to find the body of the here-doc. So we put PL_linestr and 2532 PL_bufptr into lex_shared, to ‘share’ those values. 2533 */ 2534 PL_parser->lex_shared->ls_linestr = PL_linestr; 2535 PL_parser->lex_shared->ls_bufptr = PL_bufptr; 2536 2537 PL_linestr = PL_lex_stuff; 2538 PL_lex_repl = PL_sublex_info.repl; 2539 PL_lex_stuff = NULL; 2540 PL_sublex_info.repl = NULL; 2541 2542 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart 2543 = SvPVX(PL_linestr); 2544 PL_bufend += SvCUR(PL_linestr); 2545 PL_last_lop = PL_last_uni = NULL; 2546 SAVEFREESV(PL_linestr); 2547 if (PL_lex_repl) SAVEFREESV(PL_lex_repl); 2548 2549 PL_lex_dojoin = FALSE; 2550 PL_lex_brackets = PL_lex_formbrack = 0; 2551 PL_lex_allbrackets = 0; 2552 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2553 Newx(PL_lex_brackstack, 120, char); 2554 Newx(PL_lex_casestack, 12, char); 2555 PL_lex_casemods = 0; 2556 *PL_lex_casestack = '\0'; 2557 PL_lex_starts = 0; 2558 PL_lex_state = LEX_INTERPCONCAT; 2559 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 2560 2561 Newxz(shared, 1, LEXSHARED); 2562 shared->ls_prev = PL_parser->lex_shared; 2563 PL_parser->lex_shared = shared; 2564 2565 PL_lex_inwhat = PL_sublex_info.sub_inwhat; 2566 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS; 2567 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST) 2568 PL_lex_inpat = PL_sublex_info.sub_op; 2569 else 2570 PL_lex_inpat = NULL; 2571 2572 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING); 2573 PL_in_eval &= ~EVAL_RE_REPARSING; 2574 2575 return '('; 2576 } 2577 2578 /* 2579 * S_sublex_done 2580 * Restores lexer state after a S_sublex_push. 2581 */ 2582 2583 STATIC I32 2584 S_sublex_done(pTHX) 2585 { 2586 dVAR; 2587 if (!PL_lex_starts++) { 2588 SV * const sv = newSVpvs(""); 2589 if (SvUTF8(PL_linestr)) 2590 SvUTF8_on(sv); 2591 PL_expect = XOPERATOR; 2592 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 2593 return THING; 2594 } 2595 2596 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */ 2597 PL_lex_state = LEX_INTERPCASEMOD; 2598 return yylex(); 2599 } 2600 2601 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */ 2602 assert(PL_lex_inwhat != OP_TRANSR); 2603 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) { 2604 PL_linestr = PL_lex_repl; 2605 PL_lex_inpat = 0; 2606 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); 2607 PL_bufend += SvCUR(PL_linestr); 2608 PL_last_lop = PL_last_uni = NULL; 2609 PL_lex_dojoin = FALSE; 2610 PL_lex_brackets = 0; 2611 PL_lex_allbrackets = 0; 2612 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2613 PL_lex_casemods = 0; 2614 *PL_lex_casestack = '\0'; 2615 PL_lex_starts = 0; 2616 if (SvEVALED(PL_lex_repl)) { 2617 PL_lex_state = LEX_INTERPNORMAL; 2618 PL_lex_starts++; 2619 /* we don't clear PL_lex_repl here, so that we can check later 2620 whether this is an evalled subst; that means we rely on the 2621 logic to ensure sublex_done() is called again only via the 2622 branch (in yylex()) that clears PL_lex_repl, else we'll loop */ 2623 } 2624 else { 2625 PL_lex_state = LEX_INTERPCONCAT; 2626 PL_lex_repl = NULL; 2627 } 2628 return ','; 2629 } 2630 else { 2631 #ifdef PERL_MAD 2632 if (PL_madskills) { 2633 if (PL_thiswhite) { 2634 if (!PL_endwhite) 2635 PL_endwhite = newSVpvs(""); 2636 sv_catsv(PL_endwhite, PL_thiswhite); 2637 PL_thiswhite = 0; 2638 } 2639 if (PL_thistoken) 2640 sv_setpvs(PL_thistoken,""); 2641 else 2642 PL_realtokenstart = -1; 2643 } 2644 #endif 2645 LEAVE; 2646 PL_bufend = SvPVX(PL_linestr); 2647 PL_bufend += SvCUR(PL_linestr); 2648 PL_expect = XOPERATOR; 2649 PL_sublex_info.sub_inwhat = 0; 2650 return ')'; 2651 } 2652 } 2653 2654 PERL_STATIC_INLINE SV* 2655 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) 2656 { 2657 /* <s> points to first character of interior of \N{}, <e> to one beyond the 2658 * interior, hence to the "}". Finds what the name resolves to, returning 2659 * an SV* containing it; NULL if no valid one found */ 2660 2661 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0); 2662 2663 HV * table; 2664 SV **cvp; 2665 SV *cv; 2666 SV *rv; 2667 HV *stash; 2668 const U8* first_bad_char_loc; 2669 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */ 2670 2671 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; 2672 2673 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr, 2674 e - backslash_ptr, 2675 &first_bad_char_loc)) 2676 { 2677 /* If warnings are on, this will print a more detailed analysis of what 2678 * is wrong than the error message below */ 2679 utf8n_to_uvuni(first_bad_char_loc, 2680 e - ((char *) first_bad_char_loc), 2681 NULL, 0); 2682 2683 /* We deliberately don't try to print the malformed character, which 2684 * might not print very well; it also may be just the first of many 2685 * malformations, so don't print what comes after it */ 2686 yyerror(Perl_form(aTHX_ 2687 "Malformed UTF-8 character immediately after '%.*s'", 2688 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr)); 2689 return NULL; 2690 } 2691 2692 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr, 2693 /* include the <}> */ 2694 e - backslash_ptr + 1); 2695 if (! SvPOK(res)) { 2696 SvREFCNT_dec_NN(res); 2697 return NULL; 2698 } 2699 2700 /* See if the charnames handler is the Perl core's, and if so, we can skip 2701 * the validation needed for a user-supplied one, as Perl's does its own 2702 * validation. */ 2703 table = GvHV(PL_hintgv); /* ^H */ 2704 cvp = hv_fetchs(table, "charnames", FALSE); 2705 if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL) 2706 && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL)) 2707 { 2708 const char * const name = HvNAME(stash); 2709 if strEQ(name, "_charnames") { 2710 return res; 2711 } 2712 } 2713 2714 /* Here, it isn't Perl's charname handler. We can't rely on a 2715 * user-supplied handler to validate the input name. For non-ut8 input, 2716 * look to see that the first character is legal. Then loop through the 2717 * rest checking that each is a continuation */ 2718 2719 /* This code needs to be sync'ed with a regex in _charnames.pm which does 2720 * the same thing */ 2721 2722 if (! UTF) { 2723 if (! isALPHAU(*s)) { 2724 goto bad_charname; 2725 } 2726 s++; 2727 while (s < e) { 2728 if (! isCHARNAME_CONT(*s)) { 2729 goto bad_charname; 2730 } 2731 if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { 2732 Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated"); 2733 } 2734 s++; 2735 } 2736 if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { 2737 Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated"); 2738 } 2739 } 2740 else { 2741 /* Similarly for utf8. For invariants can check directly; for other 2742 * Latin1, can calculate their code point and check; otherwise use a 2743 * swash */ 2744 if (UTF8_IS_INVARIANT(*s)) { 2745 if (! isALPHAU(*s)) { 2746 goto bad_charname; 2747 } 2748 s++; 2749 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2750 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) { 2751 goto bad_charname; 2752 } 2753 s += 2; 2754 } 2755 else { 2756 if (! PL_utf8_charname_begin) { 2757 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; 2758 PL_utf8_charname_begin = _core_swash_init("utf8", 2759 "_Perl_Charname_Begin", 2760 &PL_sv_undef, 2761 1, 0, NULL, &flags); 2762 } 2763 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) { 2764 goto bad_charname; 2765 } 2766 s += UTF8SKIP(s); 2767 } 2768 2769 while (s < e) { 2770 if (UTF8_IS_INVARIANT(*s)) { 2771 if (! isCHARNAME_CONT(*s)) { 2772 goto bad_charname; 2773 } 2774 if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { 2775 Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated"); 2776 } 2777 s++; 2778 } 2779 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2780 if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, 2781 *(s+1))))) 2782 { 2783 goto bad_charname; 2784 } 2785 s += 2; 2786 } 2787 else { 2788 if (! PL_utf8_charname_continue) { 2789 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; 2790 PL_utf8_charname_continue = _core_swash_init("utf8", 2791 "_Perl_Charname_Continue", 2792 &PL_sv_undef, 2793 1, 0, NULL, &flags); 2794 } 2795 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) { 2796 goto bad_charname; 2797 } 2798 s += UTF8SKIP(s); 2799 } 2800 } 2801 if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { 2802 Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated"); 2803 } 2804 } 2805 2806 if (SvUTF8(res)) { /* Don't accept malformed input */ 2807 const U8* first_bad_char_loc; 2808 STRLEN len; 2809 const char* const str = SvPV_const(res, len); 2810 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) { 2811 /* If warnings are on, this will print a more detailed analysis of 2812 * what is wrong than the error message below */ 2813 utf8n_to_uvuni(first_bad_char_loc, 2814 (char *) first_bad_char_loc - str, 2815 NULL, 0); 2816 2817 /* We deliberately don't try to print the malformed character, 2818 * which might not print very well; it also may be just the first 2819 * of many malformations, so don't print what comes after it */ 2820 yyerror_pv( 2821 Perl_form(aTHX_ 2822 "Malformed UTF-8 returned by %.*s immediately after '%.*s'", 2823 (int) (e - backslash_ptr + 1), backslash_ptr, 2824 (int) ((char *) first_bad_char_loc - str), str 2825 ), 2826 SVf_UTF8); 2827 return NULL; 2828 } 2829 } 2830 2831 return res; 2832 2833 bad_charname: { 2834 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1); 2835 2836 /* The final %.*s makes sure that should the trailing NUL be missing 2837 * that this print won't run off the end of the string */ 2838 yyerror_pv( 2839 Perl_form(aTHX_ 2840 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s", 2841 (int)(s - backslash_ptr + bad_char_size), backslash_ptr, 2842 (int)(e - s + bad_char_size), s + bad_char_size 2843 ), 2844 UTF ? SVf_UTF8 : 0); 2845 return NULL; 2846 } 2847 } 2848 2849 /* 2850 scan_const 2851 2852 Extracts the next constant part of a pattern, double-quoted string, 2853 or transliteration. This is terrifying code. 2854 2855 For example, in parsing the double-quoted string "ab\x63$d", it would 2856 stop at the '$' and return an OP_CONST containing 'abc'. 2857 2858 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's 2859 processing a pattern (PL_lex_inpat is true), a transliteration 2860 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string. 2861 2862 Returns a pointer to the character scanned up to. If this is 2863 advanced from the start pointer supplied (i.e. if anything was 2864 successfully parsed), will leave an OP_CONST for the substring scanned 2865 in pl_yylval. Caller must intuit reason for not parsing further 2866 by looking at the next characters herself. 2867 2868 In patterns: 2869 expand: 2870 \N{FOO} => \N{U+hex_for_character_FOO} 2871 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...}) 2872 2873 pass through: 2874 all other \-char, including \N and \N{ apart from \N{ABC} 2875 2876 stops on: 2877 @ and $ where it appears to be a var, but not for $ as tail anchor 2878 \l \L \u \U \Q \E 2879 (?{ or (??{ 2880 2881 2882 In transliterations: 2883 characters are VERY literal, except for - not at the start or end 2884 of the string, which indicates a range. If the range is in bytes, 2885 scan_const expands the range to the full set of intermediate 2886 characters. If the range is in utf8, the hyphen is replaced with 2887 a certain range mark which will be handled by pmtrans() in op.c. 2888 2889 In double-quoted strings: 2890 backslashes: 2891 double-quoted style: \r and \n 2892 constants: \x31, etc. 2893 deprecated backrefs: \1 (in substitution replacements) 2894 case and quoting: \U \Q \E 2895 stops on @ and $ 2896 2897 scan_const does *not* construct ops to handle interpolated strings. 2898 It stops processing as soon as it finds an embedded $ or @ variable 2899 and leaves it to the caller to work out what's going on. 2900 2901 embedded arrays (whether in pattern or not) could be: 2902 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-. 2903 2904 $ in double-quoted strings must be the symbol of an embedded scalar. 2905 2906 $ in pattern could be $foo or could be tail anchor. Assumption: 2907 it's a tail anchor if $ is the last thing in the string, or if it's 2908 followed by one of "()| \r\n\t" 2909 2910 \1 (backreferences) are turned into $1 in substitutions 2911 2912 The structure of the code is 2913 while (there's a character to process) { 2914 handle transliteration ranges 2915 skip regexp comments /(?#comment)/ and codes /(?{code})/ 2916 skip #-initiated comments in //x patterns 2917 check for embedded arrays 2918 check for embedded scalars 2919 if (backslash) { 2920 deprecate \1 in substitution replacements 2921 handle string-changing backslashes \l \U \Q \E, etc. 2922 switch (what was escaped) { 2923 handle \- in a transliteration (becomes a literal -) 2924 if a pattern and not \N{, go treat as regular character 2925 handle \132 (octal characters) 2926 handle \x15 and \x{1234} (hex characters) 2927 handle \N{name} (named characters, also \N{3,5} in a pattern) 2928 handle \cV (control characters) 2929 handle printf-style backslashes (\f, \r, \n, etc) 2930 } (end switch) 2931 continue 2932 } (end if backslash) 2933 handle regular character 2934 } (end while character to read) 2935 2936 */ 2937 2938 STATIC char * 2939 S_scan_const(pTHX_ char *start) 2940 { 2941 dVAR; 2942 char *send = PL_bufend; /* end of the constant */ 2943 SV *sv = newSV(send - start); /* sv for the constant. See 2944 note below on sizing. */ 2945 char *s = start; /* start of the constant */ 2946 char *d = SvPVX(sv); /* destination for copies */ 2947 bool dorange = FALSE; /* are we in a translit range? */ 2948 bool didrange = FALSE; /* did we just finish a range? */ 2949 bool in_charclass = FALSE; /* within /[...]/ */ 2950 bool has_utf8 = FALSE; /* Output constant is UTF8 */ 2951 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed 2952 to be UTF8? But, this can 2953 show as true when the source 2954 isn't utf8, as for example 2955 when it is entirely composed 2956 of hex constants */ 2957 SV *res; /* result from charnames */ 2958 2959 /* Note on sizing: The scanned constant is placed into sv, which is 2960 * initialized by newSV() assuming one byte of output for every byte of 2961 * input. This routine expects newSV() to allocate an extra byte for a 2962 * trailing NUL, which this routine will append if it gets to the end of 2963 * the input. There may be more bytes of input than output (eg., \N{LATIN 2964 * CAPITAL LETTER A}), or more output than input if the constant ends up 2965 * recoded to utf8, but each time a construct is found that might increase 2966 * the needed size, SvGROW() is called. Its size parameter each time is 2967 * based on the best guess estimate at the time, namely the length used so 2968 * far, plus the length the current construct will occupy, plus room for 2969 * the trailing NUL, plus one byte for every input byte still unscanned */ 2970 2971 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses 2972 before set */ 2973 #ifdef EBCDIC 2974 UV literal_endpoint = 0; 2975 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */ 2976 #endif 2977 2978 PERL_ARGS_ASSERT_SCAN_CONST; 2979 2980 assert(PL_lex_inwhat != OP_TRANSR); 2981 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { 2982 /* If we are doing a trans and we know we want UTF8 set expectation */ 2983 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF); 2984 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); 2985 } 2986 2987 /* Protect sv from errors and fatal warnings. */ 2988 ENTER_with_name("scan_const"); 2989 SAVEFREESV(sv); 2990 2991 while (s < send || dorange) { 2992 2993 /* get transliterations out of the way (they're most literal) */ 2994 if (PL_lex_inwhat == OP_TRANS) { 2995 /* expand a range A-Z to the full set of characters. AIE! */ 2996 if (dorange) { 2997 I32 i; /* current expanded character */ 2998 I32 min; /* first character in range */ 2999 I32 max; /* last character in range */ 3000 3001 #ifdef EBCDIC 3002 UV uvmax = 0; 3003 #endif 3004 3005 if (has_utf8 3006 #ifdef EBCDIC 3007 && !native_range 3008 #endif 3009 ) { 3010 char * const c = (char*)utf8_hop((U8*)d, -1); 3011 char *e = d++; 3012 while (e-- > c) 3013 *(e + 1) = *e; 3014 *c = (char)UTF_TO_NATIVE(0xff); 3015 /* mark the range as done, and continue */ 3016 dorange = FALSE; 3017 didrange = TRUE; 3018 continue; 3019 } 3020 3021 i = d - SvPVX_const(sv); /* remember current offset */ 3022 #ifdef EBCDIC 3023 SvGROW(sv, 3024 SvLEN(sv) + (has_utf8 ? 3025 (512 - UTF_CONTINUATION_MARK + 3026 UNISKIP(0x100)) 3027 : 256)); 3028 /* How many two-byte within 0..255: 128 in UTF-8, 3029 * 96 in UTF-8-mod. */ 3030 #else 3031 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */ 3032 #endif 3033 d = SvPVX(sv) + i; /* refresh d after realloc */ 3034 #ifdef EBCDIC 3035 if (has_utf8) { 3036 int j; 3037 for (j = 0; j <= 1; j++) { 3038 char * const c = (char*)utf8_hop((U8*)d, -1); 3039 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0); 3040 if (j) 3041 min = (U8)uv; 3042 else if (uv < 256) 3043 max = (U8)uv; 3044 else { 3045 max = (U8)0xff; /* only to \xff */ 3046 uvmax = uv; /* \x{100} to uvmax */ 3047 } 3048 d = c; /* eat endpoint chars */ 3049 } 3050 } 3051 else { 3052 #endif 3053 d -= 2; /* eat the first char and the - */ 3054 min = (U8)*d; /* first char in range */ 3055 max = (U8)d[1]; /* last char in range */ 3056 #ifdef EBCDIC 3057 } 3058 #endif 3059 3060 if (min > max) { 3061 Perl_croak(aTHX_ 3062 "Invalid range \"%c-%c\" in transliteration operator", 3063 (char)min, (char)max); 3064 } 3065 3066 #ifdef EBCDIC 3067 if (literal_endpoint == 2 && 3068 ((isLOWER(min) && isLOWER(max)) || 3069 (isUPPER(min) && isUPPER(max)))) { 3070 if (isLOWER(min)) { 3071 for (i = min; i <= max; i++) 3072 if (isLOWER(i)) 3073 *d++ = NATIVE_TO_NEED(has_utf8,i); 3074 } else { 3075 for (i = min; i <= max; i++) 3076 if (isUPPER(i)) 3077 *d++ = NATIVE_TO_NEED(has_utf8,i); 3078 } 3079 } 3080 else 3081 #endif 3082 for (i = min; i <= max; i++) 3083 #ifdef EBCDIC 3084 if (has_utf8) { 3085 const U8 ch = (U8)NATIVE_TO_UTF(i); 3086 if (UNI_IS_INVARIANT(ch)) 3087 *d++ = (U8)i; 3088 else { 3089 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch); 3090 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch); 3091 } 3092 } 3093 else 3094 #endif 3095 *d++ = (char)i; 3096 3097 #ifdef EBCDIC 3098 if (uvmax) { 3099 d = (char*)uvchr_to_utf8((U8*)d, 0x100); 3100 if (uvmax > 0x101) 3101 *d++ = (char)UTF_TO_NATIVE(0xff); 3102 if (uvmax > 0x100) 3103 d = (char*)uvchr_to_utf8((U8*)d, uvmax); 3104 } 3105 #endif 3106 3107 /* mark the range as done, and continue */ 3108 dorange = FALSE; 3109 didrange = TRUE; 3110 #ifdef EBCDIC 3111 literal_endpoint = 0; 3112 #endif 3113 continue; 3114 } 3115 3116 /* range begins (ignore - as first or last char) */ 3117 else if (*s == '-' && s+1 < send && s != start) { 3118 if (didrange) { 3119 Perl_croak(aTHX_ "Ambiguous range in transliteration operator"); 3120 } 3121 if (has_utf8 3122 #ifdef EBCDIC 3123 && !native_range 3124 #endif 3125 ) { 3126 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */ 3127 s++; 3128 continue; 3129 } 3130 dorange = TRUE; 3131 s++; 3132 } 3133 else { 3134 didrange = FALSE; 3135 #ifdef EBCDIC 3136 literal_endpoint = 0; 3137 native_range = TRUE; 3138 #endif 3139 } 3140 } 3141 3142 /* if we get here, we're not doing a transliteration */ 3143 3144 else if (*s == '[' && PL_lex_inpat && !in_charclass) { 3145 char *s1 = s-1; 3146 int esc = 0; 3147 while (s1 >= start && *s1-- == '\\') 3148 esc = !esc; 3149 if (!esc) 3150 in_charclass = TRUE; 3151 } 3152 3153 else if (*s == ']' && PL_lex_inpat && in_charclass) { 3154 char *s1 = s-1; 3155 int esc = 0; 3156 while (s1 >= start && *s1-- == '\\') 3157 esc = !esc; 3158 if (!esc) 3159 in_charclass = FALSE; 3160 } 3161 3162 /* skip for regexp comments /(?#comment)/, except for the last 3163 * char, which will be done separately. 3164 * Stop on (?{..}) and friends */ 3165 3166 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) { 3167 if (s[2] == '#') { 3168 while (s+1 < send && *s != ')') 3169 *d++ = NATIVE_TO_NEED(has_utf8,*s++); 3170 } 3171 else if (!PL_lex_casemods && 3172 ( s[2] == '{' /* This should match regcomp.c */ 3173 || (s[2] == '?' && s[3] == '{'))) 3174 { 3175 break; 3176 } 3177 } 3178 3179 /* likewise skip #-initiated comments in //x patterns */ 3180 else if (*s == '#' && PL_lex_inpat && 3181 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) { 3182 while (s+1 < send && *s != '\n') { 3183 /* for maint-5.18, half-fix #-in-charclass bug: 3184 * *do* recognise codeblocks: /[#](?{})/ 3185 * *don't* recognise interpolated vars: /[#$x]/ 3186 */ 3187 if (in_charclass && !PL_lex_casemods && s+3 < send && 3188 s[0] == '(' && 3189 s[1] == '?' && 3190 ( s[2] == '{' 3191 || (s[2] == '?' && s[3] == '{'))) 3192 break; 3193 *d++ = NATIVE_TO_NEED(has_utf8,*s++); 3194 } 3195 if (s+ 1 < send && *s != '\n') 3196 break; /* we stopped on (?{}), not EOL */ 3197 } 3198 3199 /* no further processing of single-quoted regex */ 3200 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') 3201 goto default_action; 3202 3203 /* check for embedded arrays 3204 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) 3205 */ 3206 else if (*s == '@' && s[1]) { 3207 if (isWORDCHAR_lazy_if(s+1,UTF)) 3208 break; 3209 if (strchr(":'{$", s[1])) 3210 break; 3211 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-')) 3212 break; /* in regexp, neither @+ nor @- are interpolated */ 3213 } 3214 3215 /* check for embedded scalars. only stop if we're sure it's a 3216 variable. 3217 */ 3218 else if (*s == '$') { 3219 if (!PL_lex_inpat) /* not a regexp, so $ must be var */ 3220 break; 3221 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) { 3222 if (s[1] == '\\') { 3223 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 3224 "Possible unintended interpolation of $\\ in regex"); 3225 } 3226 break; /* in regexp, $ might be tail anchor */ 3227 } 3228 } 3229 3230 /* End of else if chain - OP_TRANS rejoin rest */ 3231 3232 /* backslashes */ 3233 if (*s == '\\' && s+1 < send) { 3234 char* e; /* Can be used for ending '}', etc. */ 3235 3236 s++; 3237 3238 /* warn on \1 - \9 in substitution replacements, but note that \11 3239 * is an octal; and \19 is \1 followed by '9' */ 3240 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat && 3241 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1])) 3242 { 3243 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); 3244 *--s = '$'; 3245 break; 3246 } 3247 3248 /* string-change backslash escapes */ 3249 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) { 3250 --s; 3251 break; 3252 } 3253 /* In a pattern, process \N, but skip any other backslash escapes. 3254 * This is because we don't want to translate an escape sequence 3255 * into a meta symbol and have the regex compiler use the meta 3256 * symbol meaning, e.g. \x{2E} would be confused with a dot. But 3257 * in spite of this, we do have to process \N here while the proper 3258 * charnames handler is in scope. See bugs #56444 and #62056. 3259 * There is a complication because \N in a pattern may also stand 3260 * for 'match a non-nl', and not mean a charname, in which case its 3261 * processing should be deferred to the regex compiler. To be a 3262 * charname it must be followed immediately by a '{', and not look 3263 * like \N followed by a curly quantifier, i.e., not something like 3264 * \N{3,}. regcurly returns a boolean indicating if it is a legal 3265 * quantifier */ 3266 else if (PL_lex_inpat 3267 && (*s != 'N' 3268 || s[1] != '{' 3269 || regcurly(s + 1, FALSE))) 3270 { 3271 *d++ = NATIVE_TO_NEED(has_utf8,'\\'); 3272 goto default_action; 3273 } 3274 3275 switch (*s) { 3276 3277 /* quoted - in transliterations */ 3278 case '-': 3279 if (PL_lex_inwhat == OP_TRANS) { 3280 *d++ = *s++; 3281 continue; 3282 } 3283 /* FALL THROUGH */ 3284 default: 3285 { 3286 if ((isALPHANUMERIC(*s))) 3287 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 3288 "Unrecognized escape \\%c passed through", 3289 *s); 3290 /* default action is to copy the quoted character */ 3291 goto default_action; 3292 } 3293 3294 /* eg. \132 indicates the octal constant 0132 */ 3295 case '0': case '1': case '2': case '3': 3296 case '4': case '5': case '6': case '7': 3297 { 3298 I32 flags = PERL_SCAN_SILENT_ILLDIGIT; 3299 STRLEN len = 3; 3300 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL)); 3301 s += len; 3302 if (len < 3 && s < send && isDIGIT(*s) 3303 && ckWARN(WARN_MISC)) 3304 { 3305 Perl_warner(aTHX_ packWARN(WARN_MISC), 3306 "%s", form_short_octal_warning(s, len)); 3307 } 3308 } 3309 goto NUM_ESCAPE_INSERT; 3310 3311 /* eg. \o{24} indicates the octal constant \024 */ 3312 case 'o': 3313 { 3314 const char* error; 3315 3316 bool valid = grok_bslash_o(&s, &uv, &error, 3317 TRUE, /* Output warning */ 3318 FALSE, /* Not strict */ 3319 TRUE, /* Output warnings for 3320 non-portables */ 3321 UTF); 3322 if (! valid) { 3323 yyerror(error); 3324 continue; 3325 } 3326 goto NUM_ESCAPE_INSERT; 3327 } 3328 3329 /* eg. \x24 indicates the hex constant 0x24 */ 3330 case 'x': 3331 { 3332 const char* error; 3333 3334 bool valid = grok_bslash_x(&s, &uv, &error, 3335 TRUE, /* Output warning */ 3336 FALSE, /* Not strict */ 3337 TRUE, /* Output warnings for 3338 non-portables */ 3339 UTF); 3340 if (! valid) { 3341 yyerror(error); 3342 continue; 3343 } 3344 } 3345 3346 NUM_ESCAPE_INSERT: 3347 /* Insert oct or hex escaped character. There will always be 3348 * enough room in sv since such escapes will be longer than any 3349 * UTF-8 sequence they can end up as, except if they force us 3350 * to recode the rest of the string into utf8 */ 3351 3352 /* Here uv is the ordinal of the next character being added in 3353 * unicode (converted from native). */ 3354 if (!UNI_IS_INVARIANT(uv)) { 3355 if (!has_utf8 && uv > 255) { 3356 /* Might need to recode whatever we have accumulated so 3357 * far if it contains any chars variant in utf8 or 3358 * utf-ebcdic. */ 3359 3360 SvCUR_set(sv, d - SvPVX_const(sv)); 3361 SvPOK_on(sv); 3362 *d = '\0'; 3363 /* See Note on sizing above. */ 3364 sv_utf8_upgrade_flags_grow(sv, 3365 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3366 UNISKIP(uv) + (STRLEN)(send - s) + 1); 3367 d = SvPVX(sv) + SvCUR(sv); 3368 has_utf8 = TRUE; 3369 } 3370 3371 if (has_utf8) { 3372 d = (char*)uvuni_to_utf8((U8*)d, uv); 3373 if (PL_lex_inwhat == OP_TRANS && 3374 PL_sublex_info.sub_op) { 3375 PL_sublex_info.sub_op->op_private |= 3376 (PL_lex_repl ? OPpTRANS_FROM_UTF 3377 : OPpTRANS_TO_UTF); 3378 } 3379 #ifdef EBCDIC 3380 if (uv > 255 && !dorange) 3381 native_range = FALSE; 3382 #endif 3383 } 3384 else { 3385 *d++ = (char)uv; 3386 } 3387 } 3388 else { 3389 *d++ = (char) uv; 3390 } 3391 continue; 3392 3393 case 'N': 3394 /* In a non-pattern \N must be a named character, like \N{LATIN 3395 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can 3396 * mean to match a non-newline. For non-patterns, named 3397 * characters are converted to their string equivalents. In 3398 * patterns, named characters are not converted to their 3399 * ultimate forms for the same reasons that other escapes 3400 * aren't. Instead, they are converted to the \N{U+...} form 3401 * to get the value from the charnames that is in effect right 3402 * now, while preserving the fact that it was a named character 3403 * so that the regex compiler knows this */ 3404 3405 /* This section of code doesn't generally use the 3406 * NATIVE_TO_NEED() macro to transform the input. I (khw) did 3407 * a close examination of this macro and determined it is a 3408 * no-op except on utfebcdic variant characters. Every 3409 * character generated by this that would normally need to be 3410 * enclosed by this macro is invariant, so the macro is not 3411 * needed, and would complicate use of copy(). XXX There are 3412 * other parts of this file where the macro is used 3413 * inconsistently, but are saved by it being a no-op */ 3414 3415 /* The structure of this section of code (besides checking for 3416 * errors and upgrading to utf8) is: 3417 * Further disambiguate between the two meanings of \N, and if 3418 * not a charname, go process it elsewhere 3419 * If of form \N{U+...}, pass it through if a pattern; 3420 * otherwise convert to utf8 3421 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a 3422 * pattern; otherwise convert to utf8 */ 3423 3424 /* Here, s points to the 'N'; the test below is guaranteed to 3425 * succeed if we are being called on a pattern as we already 3426 * know from a test above that the next character is a '{'. 3427 * On a non-pattern \N must mean 'named sequence, which 3428 * requires braces */ 3429 s++; 3430 if (*s != '{') { 3431 yyerror("Missing braces on \\N{}"); 3432 continue; 3433 } 3434 s++; 3435 3436 /* If there is no matching '}', it is an error. */ 3437 if (! (e = strchr(s, '}'))) { 3438 if (! PL_lex_inpat) { 3439 yyerror("Missing right brace on \\N{}"); 3440 } else { 3441 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N."); 3442 } 3443 continue; 3444 } 3445 3446 /* Here it looks like a named character */ 3447 3448 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */ 3449 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES 3450 | PERL_SCAN_DISALLOW_PREFIX; 3451 STRLEN len; 3452 3453 /* For \N{U+...}, the '...' is a unicode value even on 3454 * EBCDIC machines */ 3455 s += 2; /* Skip to next char after the 'U+' */ 3456 len = e - s; 3457 uv = grok_hex(s, &len, &flags, NULL); 3458 if (len == 0 || len != (STRLEN)(e - s)) { 3459 yyerror("Invalid hexadecimal number in \\N{U+...}"); 3460 s = e + 1; 3461 continue; 3462 } 3463 3464 if (PL_lex_inpat) { 3465 3466 /* On non-EBCDIC platforms, pass through to the regex 3467 * compiler unchanged. The reason we evaluated the 3468 * number above is to make sure there wasn't a syntax 3469 * error. But on EBCDIC we convert to native so 3470 * downstream code can continue to assume it's native 3471 */ 3472 s -= 5; /* Include the '\N{U+' */ 3473 #ifdef EBCDIC 3474 d += my_snprintf(d, e - s + 1 + 1, /* includes the } 3475 and the \0 */ 3476 "\\N{U+%X}", 3477 (unsigned int) UNI_TO_NATIVE(uv)); 3478 #else 3479 Copy(s, d, e - s + 1, char); /* 1 = include the } */ 3480 d += e - s + 1; 3481 #endif 3482 } 3483 else { /* Not a pattern: convert the hex to string */ 3484 3485 /* If destination is not in utf8, unconditionally 3486 * recode it to be so. This is because \N{} implies 3487 * Unicode semantics, and scalars have to be in utf8 3488 * to guarantee those semantics */ 3489 if (! has_utf8) { 3490 SvCUR_set(sv, d - SvPVX_const(sv)); 3491 SvPOK_on(sv); 3492 *d = '\0'; 3493 /* See Note on sizing above. */ 3494 sv_utf8_upgrade_flags_grow( 3495 sv, 3496 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3497 UNISKIP(uv) + (STRLEN)(send - e) + 1); 3498 d = SvPVX(sv) + SvCUR(sv); 3499 has_utf8 = TRUE; 3500 } 3501 3502 /* Add the string to the output */ 3503 if (UNI_IS_INVARIANT(uv)) { 3504 *d++ = (char) uv; 3505 } 3506 else d = (char*)uvuni_to_utf8((U8*)d, uv); 3507 } 3508 } 3509 else /* Here is \N{NAME} but not \N{U+...}. */ 3510 if ((res = get_and_check_backslash_N_name(s, e))) 3511 { 3512 STRLEN len; 3513 const char *str = SvPV_const(res, len); 3514 if (PL_lex_inpat) { 3515 3516 if (! len) { /* The name resolved to an empty string */ 3517 Copy("\\N{}", d, 4, char); 3518 d += 4; 3519 } 3520 else { 3521 /* In order to not lose information for the regex 3522 * compiler, pass the result in the specially made 3523 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are 3524 * the code points in hex of each character 3525 * returned by charnames */ 3526 3527 const char *str_end = str + len; 3528 const STRLEN off = d - SvPVX_const(sv); 3529 3530 if (! SvUTF8(res)) { 3531 /* For the non-UTF-8 case, we can determine the 3532 * exact length needed without having to parse 3533 * through the string. Each character takes up 3534 * 2 hex digits plus either a trailing dot or 3535 * the "}" */ 3536 d = off + SvGROW(sv, off 3537 + 3 * len 3538 + 6 /* For the "\N{U+", and 3539 trailing NUL */ 3540 + (STRLEN)(send - e)); 3541 Copy("\\N{U+", d, 5, char); 3542 d += 5; 3543 while (str < str_end) { 3544 char hex_string[4]; 3545 my_snprintf(hex_string, sizeof(hex_string), 3546 "%02X.", (U8) *str); 3547 Copy(hex_string, d, 3, char); 3548 d += 3; 3549 str++; 3550 } 3551 d--; /* We will overwrite below the final 3552 dot with a right brace */ 3553 } 3554 else { 3555 STRLEN char_length; /* cur char's byte length */ 3556 3557 /* and the number of bytes after this is 3558 * translated into hex digits */ 3559 STRLEN output_length; 3560 3561 /* 2 hex per byte; 2 chars for '\N'; 2 chars 3562 * for max('U+', '.'); and 1 for NUL */ 3563 char hex_string[2 * UTF8_MAXBYTES + 5]; 3564 3565 /* Get the first character of the result. */ 3566 U32 uv = utf8n_to_uvuni((U8 *) str, 3567 len, 3568 &char_length, 3569 UTF8_ALLOW_ANYUV); 3570 /* Convert first code point to hex, including 3571 * the boiler plate before it. For all these, 3572 * we convert to native format so that 3573 * downstream code can continue to assume the 3574 * input is native */ 3575 output_length = 3576 my_snprintf(hex_string, sizeof(hex_string), 3577 "\\N{U+%X", 3578 (unsigned int) UNI_TO_NATIVE(uv)); 3579 3580 /* Make sure there is enough space to hold it */ 3581 d = off + SvGROW(sv, off 3582 + output_length 3583 + (STRLEN)(send - e) 3584 + 2); /* '}' + NUL */ 3585 /* And output it */ 3586 Copy(hex_string, d, output_length, char); 3587 d += output_length; 3588 3589 /* For each subsequent character, append dot and 3590 * its ordinal in hex */ 3591 while ((str += char_length) < str_end) { 3592 const STRLEN off = d - SvPVX_const(sv); 3593 U32 uv = utf8n_to_uvuni((U8 *) str, 3594 str_end - str, 3595 &char_length, 3596 UTF8_ALLOW_ANYUV); 3597 output_length = 3598 my_snprintf(hex_string, 3599 sizeof(hex_string), 3600 ".%X", 3601 (unsigned int) UNI_TO_NATIVE(uv)); 3602 3603 d = off + SvGROW(sv, off 3604 + output_length 3605 + (STRLEN)(send - e) 3606 + 2); /* '}' + NUL */ 3607 Copy(hex_string, d, output_length, char); 3608 d += output_length; 3609 } 3610 } 3611 3612 *d++ = '}'; /* Done. Add the trailing brace */ 3613 } 3614 } 3615 else { /* Here, not in a pattern. Convert the name to a 3616 * string. */ 3617 3618 /* If destination is not in utf8, unconditionally 3619 * recode it to be so. This is because \N{} implies 3620 * Unicode semantics, and scalars have to be in utf8 3621 * to guarantee those semantics */ 3622 if (! has_utf8) { 3623 SvCUR_set(sv, d - SvPVX_const(sv)); 3624 SvPOK_on(sv); 3625 *d = '\0'; 3626 /* See Note on sizing above. */ 3627 sv_utf8_upgrade_flags_grow(sv, 3628 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3629 len + (STRLEN)(send - s) + 1); 3630 d = SvPVX(sv) + SvCUR(sv); 3631 has_utf8 = TRUE; 3632 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */ 3633 3634 /* See Note on sizing above. (NOTE: SvCUR() is not 3635 * set correctly here). */ 3636 const STRLEN off = d - SvPVX_const(sv); 3637 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1); 3638 } 3639 Copy(str, d, len, char); 3640 d += len; 3641 } 3642 3643 SvREFCNT_dec(res); 3644 3645 } /* End \N{NAME} */ 3646 #ifdef EBCDIC 3647 if (!dorange) 3648 native_range = FALSE; /* \N{} is defined to be Unicode */ 3649 #endif 3650 s = e + 1; /* Point to just after the '}' */ 3651 continue; 3652 3653 /* \c is a control character */ 3654 case 'c': 3655 s++; 3656 if (s < send) { 3657 *d++ = grok_bslash_c(*s++, has_utf8, 1); 3658 } 3659 else { 3660 yyerror("Missing control char name in \\c"); 3661 } 3662 continue; 3663 3664 /* printf-style backslashes, formfeeds, newlines, etc */ 3665 case 'b': 3666 *d++ = NATIVE_TO_NEED(has_utf8,'\b'); 3667 break; 3668 case 'n': 3669 *d++ = NATIVE_TO_NEED(has_utf8,'\n'); 3670 break; 3671 case 'r': 3672 *d++ = NATIVE_TO_NEED(has_utf8,'\r'); 3673 break; 3674 case 'f': 3675 *d++ = NATIVE_TO_NEED(has_utf8,'\f'); 3676 break; 3677 case 't': 3678 *d++ = NATIVE_TO_NEED(has_utf8,'\t'); 3679 break; 3680 case 'e': 3681 *d++ = ASCII_TO_NEED(has_utf8,'\033'); 3682 break; 3683 case 'a': 3684 *d++ = ASCII_TO_NEED(has_utf8,'\007'); 3685 break; 3686 } /* end switch */ 3687 3688 s++; 3689 continue; 3690 } /* end if (backslash) */ 3691 #ifdef EBCDIC 3692 else 3693 literal_endpoint++; 3694 #endif 3695 3696 default_action: 3697 /* If we started with encoded form, or already know we want it, 3698 then encode the next character */ 3699 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) { 3700 STRLEN len = 1; 3701 3702 3703 /* One might think that it is wasted effort in the case of the 3704 * source being utf8 (this_utf8 == TRUE) to take the next character 3705 * in the source, convert it to an unsigned value, and then convert 3706 * it back again. But the source has not been validated here. The 3707 * routine that does the conversion checks for errors like 3708 * malformed utf8 */ 3709 3710 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s); 3711 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv)); 3712 if (!has_utf8) { 3713 SvCUR_set(sv, d - SvPVX_const(sv)); 3714 SvPOK_on(sv); 3715 *d = '\0'; 3716 /* See Note on sizing above. */ 3717 sv_utf8_upgrade_flags_grow(sv, 3718 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3719 need + (STRLEN)(send - s) + 1); 3720 d = SvPVX(sv) + SvCUR(sv); 3721 has_utf8 = TRUE; 3722 } else if (need > len) { 3723 /* encoded value larger than old, may need extra space (NOTE: 3724 * SvCUR() is not set correctly here). See Note on sizing 3725 * above. */ 3726 const STRLEN off = d - SvPVX_const(sv); 3727 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off; 3728 } 3729 s += len; 3730 3731 d = (char*)uvchr_to_utf8((U8*)d, nextuv); 3732 #ifdef EBCDIC 3733 if (uv > 255 && !dorange) 3734 native_range = FALSE; 3735 #endif 3736 } 3737 else { 3738 *d++ = NATIVE_TO_NEED(has_utf8,*s++); 3739 } 3740 } /* while loop to process each character */ 3741 3742 /* terminate the string and set up the sv */ 3743 *d = '\0'; 3744 SvCUR_set(sv, d - SvPVX_const(sv)); 3745 if (SvCUR(sv) >= SvLEN(sv)) 3746 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf 3747 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv)); 3748 3749 SvPOK_on(sv); 3750 if (PL_encoding && !has_utf8) { 3751 sv_recode_to_utf8(sv, PL_encoding); 3752 if (SvUTF8(sv)) 3753 has_utf8 = TRUE; 3754 } 3755 if (has_utf8) { 3756 SvUTF8_on(sv); 3757 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { 3758 PL_sublex_info.sub_op->op_private |= 3759 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); 3760 } 3761 } 3762 3763 /* shrink the sv if we allocated more than we used */ 3764 if (SvCUR(sv) + 5 < SvLEN(sv)) { 3765 SvPV_shrink_to_cur(sv); 3766 } 3767 3768 /* return the substring (via pl_yylval) only if we parsed anything */ 3769 if (s > PL_bufptr) { 3770 SvREFCNT_inc_simple_void_NN(sv); 3771 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING )) 3772 && ! PL_parser->lex_re_reparsing) 3773 { 3774 const char *const key = PL_lex_inpat ? "qr" : "q"; 3775 const STRLEN keylen = PL_lex_inpat ? 2 : 1; 3776 const char *type; 3777 STRLEN typelen; 3778 3779 if (PL_lex_inwhat == OP_TRANS) { 3780 type = "tr"; 3781 typelen = 2; 3782 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) { 3783 type = "s"; 3784 typelen = 1; 3785 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') { 3786 type = "q"; 3787 typelen = 1; 3788 } else { 3789 type = "qq"; 3790 typelen = 2; 3791 } 3792 3793 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL, 3794 type, typelen); 3795 } 3796 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 3797 } 3798 LEAVE_with_name("scan_const"); 3799 return s; 3800 } 3801 3802 /* S_intuit_more 3803 * Returns TRUE if there's more to the expression (e.g., a subscript), 3804 * FALSE otherwise. 3805 * 3806 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/ 3807 * 3808 * ->[ and ->{ return TRUE 3809 * { and [ outside a pattern are always subscripts, so return TRUE 3810 * if we're outside a pattern and it's not { or [, then return FALSE 3811 * if we're in a pattern and the first char is a { 3812 * {4,5} (any digits around the comma) returns FALSE 3813 * if we're in a pattern and the first char is a [ 3814 * [] returns FALSE 3815 * [SOMETHING] has a funky algorithm to decide whether it's a 3816 * character class or not. It has to deal with things like 3817 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/ 3818 * anything else returns TRUE 3819 */ 3820 3821 /* This is the one truly awful dwimmer necessary to conflate C and sed. */ 3822 3823 STATIC int 3824 S_intuit_more(pTHX_ char *s) 3825 { 3826 dVAR; 3827 3828 PERL_ARGS_ASSERT_INTUIT_MORE; 3829 3830 if (PL_lex_brackets) 3831 return TRUE; 3832 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{')) 3833 return TRUE; 3834 if (*s != '{' && *s != '[') 3835 return FALSE; 3836 if (!PL_lex_inpat) 3837 return TRUE; 3838 3839 /* In a pattern, so maybe we have {n,m}. */ 3840 if (*s == '{') { 3841 if (regcurly(s, FALSE)) { 3842 return FALSE; 3843 } 3844 return TRUE; 3845 } 3846 3847 /* On the other hand, maybe we have a character class */ 3848 3849 s++; 3850 if (*s == ']' || *s == '^') 3851 return FALSE; 3852 else { 3853 /* this is terrifying, and it works */ 3854 int weight; 3855 char seen[256]; 3856 const char * const send = strchr(s,']'); 3857 unsigned char un_char, last_un_char; 3858 char tmpbuf[sizeof PL_tokenbuf * 4]; 3859 3860 if (!send) /* has to be an expression */ 3861 return TRUE; 3862 weight = 2; /* let's weigh the evidence */ 3863 3864 if (*s == '$') 3865 weight -= 3; 3866 else if (isDIGIT(*s)) { 3867 if (s[1] != ']') { 3868 if (isDIGIT(s[1]) && s[2] == ']') 3869 weight -= 10; 3870 } 3871 else 3872 weight -= 100; 3873 } 3874 Zero(seen,256,char); 3875 un_char = 255; 3876 for (; s < send; s++) { 3877 last_un_char = un_char; 3878 un_char = (unsigned char)*s; 3879 switch (*s) { 3880 case '@': 3881 case '&': 3882 case '$': 3883 weight -= seen[un_char] * 10; 3884 if (isWORDCHAR_lazy_if(s+1,UTF)) { 3885 int len; 3886 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE); 3887 len = (int)strlen(tmpbuf); 3888 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 3889 UTF ? SVf_UTF8 : 0, SVt_PV)) 3890 weight -= 100; 3891 else 3892 weight -= 10; 3893 } 3894 else if (*s == '$' && s[1] && 3895 strchr("[#!%*<>()-=",s[1])) { 3896 if (/*{*/ strchr("])} =",s[2])) 3897 weight -= 10; 3898 else 3899 weight -= 1; 3900 } 3901 break; 3902 case '\\': 3903 un_char = 254; 3904 if (s[1]) { 3905 if (strchr("wds]",s[1])) 3906 weight += 100; 3907 else if (seen[(U8)'\''] || seen[(U8)'"']) 3908 weight += 1; 3909 else if (strchr("rnftbxcav",s[1])) 3910 weight += 40; 3911 else if (isDIGIT(s[1])) { 3912 weight += 40; 3913 while (s[1] && isDIGIT(s[1])) 3914 s++; 3915 } 3916 } 3917 else 3918 weight += 100; 3919 break; 3920 case '-': 3921 if (s[1] == '\\') 3922 weight += 50; 3923 if (strchr("aA01! ",last_un_char)) 3924 weight += 30; 3925 if (strchr("zZ79~",s[1])) 3926 weight += 30; 3927 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$')) 3928 weight -= 5; /* cope with negative subscript */ 3929 break; 3930 default: 3931 if (!isWORDCHAR(last_un_char) 3932 && !(last_un_char == '$' || last_un_char == '@' 3933 || last_un_char == '&') 3934 && isALPHA(*s) && s[1] && isALPHA(s[1])) { 3935 char *d = tmpbuf; 3936 while (isALPHA(*s)) 3937 *d++ = *s++; 3938 *d = '\0'; 3939 if (keyword(tmpbuf, d - tmpbuf, 0)) 3940 weight -= 150; 3941 } 3942 if (un_char == last_un_char + 1) 3943 weight += 5; 3944 weight -= seen[un_char]; 3945 break; 3946 } 3947 seen[un_char]++; 3948 } 3949 if (weight >= 0) /* probably a character class */ 3950 return FALSE; 3951 } 3952 3953 return TRUE; 3954 } 3955 3956 /* 3957 * S_intuit_method 3958 * 3959 * Does all the checking to disambiguate 3960 * foo bar 3961 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise 3962 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args). 3963 * 3964 * First argument is the stuff after the first token, e.g. "bar". 3965 * 3966 * Not a method if foo is a filehandle. 3967 * Not a method if foo is a subroutine prototyped to take a filehandle. 3968 * Not a method if it's really "Foo $bar" 3969 * Method if it's "foo $bar" 3970 * Not a method if it's really "print foo $bar" 3971 * Method if it's really "foo package::" (interpreted as package->foo) 3972 * Not a method if bar is known to be a subroutine ("sub bar; foo bar") 3973 * Not a method if bar is a filehandle or package, but is quoted with 3974 * => 3975 */ 3976 3977 STATIC int 3978 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) 3979 { 3980 dVAR; 3981 char *s = start + (*start == '$'); 3982 char tmpbuf[sizeof PL_tokenbuf]; 3983 STRLEN len; 3984 GV* indirgv; 3985 #ifdef PERL_MAD 3986 int soff; 3987 #endif 3988 3989 PERL_ARGS_ASSERT_INTUIT_METHOD; 3990 3991 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv)) 3992 return 0; 3993 if (cv && SvPOK(cv)) { 3994 const char *proto = CvPROTO(cv); 3995 if (proto) { 3996 if (*proto == ';') 3997 proto++; 3998 if (*proto == '*') 3999 return 0; 4000 } 4001 } 4002 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 4003 /* start is the beginning of the possible filehandle/object, 4004 * and s is the end of it 4005 * tmpbuf is a copy of it 4006 */ 4007 4008 if (*start == '$') { 4009 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY || 4010 isUPPER(*PL_tokenbuf)) 4011 return 0; 4012 #ifdef PERL_MAD 4013 len = start - SvPVX(PL_linestr); 4014 #endif 4015 s = PEEKSPACE(s); 4016 #ifdef PERL_MAD 4017 start = SvPVX(PL_linestr) + len; 4018 #endif 4019 PL_bufptr = start; 4020 PL_expect = XREF; 4021 return *s == '(' ? FUNCMETH : METHOD; 4022 } 4023 if (!keyword(tmpbuf, len, 0)) { 4024 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') { 4025 len -= 2; 4026 tmpbuf[len] = '\0'; 4027 #ifdef PERL_MAD 4028 soff = s - SvPVX(PL_linestr); 4029 #endif 4030 goto bare_package; 4031 } 4032 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); 4033 if (indirgv && GvCVu(indirgv)) 4034 return 0; 4035 /* filehandle or package name makes it a method */ 4036 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) { 4037 #ifdef PERL_MAD 4038 soff = s - SvPVX(PL_linestr); 4039 #endif 4040 s = PEEKSPACE(s); 4041 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>') 4042 return 0; /* no assumptions -- "=>" quotes bareword */ 4043 bare_package: 4044 start_force(PL_curforce); 4045 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, 4046 S_newSV_maybe_utf8(aTHX_ tmpbuf, len)); 4047 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE; 4048 if (PL_madskills) 4049 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start, 4050 ( UTF ? SVf_UTF8 : 0 ))); 4051 PL_expect = XTERM; 4052 force_next(WORD); 4053 PL_bufptr = s; 4054 #ifdef PERL_MAD 4055 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */ 4056 #endif 4057 return *s == '(' ? FUNCMETH : METHOD; 4058 } 4059 } 4060 return 0; 4061 } 4062 4063 /* Encoded script support. filter_add() effectively inserts a 4064 * 'pre-processing' function into the current source input stream. 4065 * Note that the filter function only applies to the current source file 4066 * (e.g., it will not affect files 'require'd or 'use'd by this one). 4067 * 4068 * The datasv parameter (which may be NULL) can be used to pass 4069 * private data to this instance of the filter. The filter function 4070 * can recover the SV using the FILTER_DATA macro and use it to 4071 * store private buffers and state information. 4072 * 4073 * The supplied datasv parameter is upgraded to a PVIO type 4074 * and the IoDIRP/IoANY field is used to store the function pointer, 4075 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such. 4076 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for 4077 * private use must be set using malloc'd pointers. 4078 */ 4079 4080 SV * 4081 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) 4082 { 4083 dVAR; 4084 if (!funcp) 4085 return NULL; 4086 4087 if (!PL_parser) 4088 return NULL; 4089 4090 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) 4091 Perl_croak(aTHX_ "Source filters apply only to byte streams"); 4092 4093 if (!PL_rsfp_filters) 4094 PL_rsfp_filters = newAV(); 4095 if (!datasv) 4096 datasv = newSV(0); 4097 SvUPGRADE(datasv, SVt_PVIO); 4098 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */ 4099 IoFLAGS(datasv) |= IOf_FAKE_DIRP; 4100 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n", 4101 FPTR2DPTR(void *, IoANY(datasv)), 4102 SvPV_nolen(datasv))); 4103 av_unshift(PL_rsfp_filters, 1); 4104 av_store(PL_rsfp_filters, 0, datasv) ; 4105 if ( 4106 !PL_parser->filtered 4107 && PL_parser->lex_flags & LEX_EVALBYTES 4108 && PL_bufptr < PL_bufend 4109 ) { 4110 const char *s = PL_bufptr; 4111 while (s < PL_bufend) { 4112 if (*s == '\n') { 4113 SV *linestr = PL_parser->linestr; 4114 char *buf = SvPVX(linestr); 4115 STRLEN const bufptr_pos = PL_parser->bufptr - buf; 4116 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf; 4117 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf; 4118 STRLEN const linestart_pos = PL_parser->linestart - buf; 4119 STRLEN const last_uni_pos = 4120 PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 4121 STRLEN const last_lop_pos = 4122 PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 4123 av_push(PL_rsfp_filters, linestr); 4124 PL_parser->linestr = 4125 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr)); 4126 buf = SvPVX(PL_parser->linestr); 4127 PL_parser->bufend = buf + SvCUR(PL_parser->linestr); 4128 PL_parser->bufptr = buf + bufptr_pos; 4129 PL_parser->oldbufptr = buf + oldbufptr_pos; 4130 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 4131 PL_parser->linestart = buf + linestart_pos; 4132 if (PL_parser->last_uni) 4133 PL_parser->last_uni = buf + last_uni_pos; 4134 if (PL_parser->last_lop) 4135 PL_parser->last_lop = buf + last_lop_pos; 4136 SvLEN(linestr) = SvCUR(linestr); 4137 SvCUR(linestr) = s-SvPVX(linestr); 4138 PL_parser->filtered = 1; 4139 break; 4140 } 4141 s++; 4142 } 4143 } 4144 return(datasv); 4145 } 4146 4147 4148 /* Delete most recently added instance of this filter function. */ 4149 void 4150 Perl_filter_del(pTHX_ filter_t funcp) 4151 { 4152 dVAR; 4153 SV *datasv; 4154 4155 PERL_ARGS_ASSERT_FILTER_DEL; 4156 4157 #ifdef DEBUGGING 4158 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", 4159 FPTR2DPTR(void*, funcp))); 4160 #endif 4161 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0) 4162 return; 4163 /* if filter is on top of stack (usual case) just pop it off */ 4164 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters)); 4165 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) { 4166 sv_free(av_pop(PL_rsfp_filters)); 4167 4168 return; 4169 } 4170 /* we need to search for the correct entry and clear it */ 4171 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)"); 4172 } 4173 4174 4175 /* Invoke the idxth filter function for the current rsfp. */ 4176 /* maxlen 0 = read one text line */ 4177 I32 4178 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) 4179 { 4180 dVAR; 4181 filter_t funcp; 4182 SV *datasv = NULL; 4183 /* This API is bad. It should have been using unsigned int for maxlen. 4184 Not sure if we want to change the API, but if not we should sanity 4185 check the value here. */ 4186 unsigned int correct_length 4187 = maxlen < 0 ? 4188 #ifdef PERL_MICRO 4189 0x7FFFFFFF 4190 #else 4191 INT_MAX 4192 #endif 4193 : maxlen; 4194 4195 PERL_ARGS_ASSERT_FILTER_READ; 4196 4197 if (!PL_parser || !PL_rsfp_filters) 4198 return -1; 4199 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */ 4200 /* Provide a default input filter to make life easy. */ 4201 /* Note that we append to the line. This is handy. */ 4202 DEBUG_P(PerlIO_printf(Perl_debug_log, 4203 "filter_read %d: from rsfp\n", idx)); 4204 if (correct_length) { 4205 /* Want a block */ 4206 int len ; 4207 const int old_len = SvCUR(buf_sv); 4208 4209 /* ensure buf_sv is large enough */ 4210 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ; 4211 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, 4212 correct_length)) <= 0) { 4213 if (PerlIO_error(PL_rsfp)) 4214 return -1; /* error */ 4215 else 4216 return 0 ; /* end of file */ 4217 } 4218 SvCUR_set(buf_sv, old_len + len) ; 4219 SvPVX(buf_sv)[old_len + len] = '\0'; 4220 } else { 4221 /* Want a line */ 4222 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) { 4223 if (PerlIO_error(PL_rsfp)) 4224 return -1; /* error */ 4225 else 4226 return 0 ; /* end of file */ 4227 } 4228 } 4229 return SvCUR(buf_sv); 4230 } 4231 /* Skip this filter slot if filter has been deleted */ 4232 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) { 4233 DEBUG_P(PerlIO_printf(Perl_debug_log, 4234 "filter_read %d: skipped (filter deleted)\n", 4235 idx)); 4236 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */ 4237 } 4238 if (SvTYPE(datasv) != SVt_PVIO) { 4239 if (correct_length) { 4240 /* Want a block */ 4241 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv); 4242 if (!remainder) return 0; /* eof */ 4243 if (correct_length > remainder) correct_length = remainder; 4244 sv_catpvn(buf_sv, SvEND(datasv), correct_length); 4245 SvCUR_set(datasv, SvCUR(datasv) + correct_length); 4246 } else { 4247 /* Want a line */ 4248 const char *s = SvEND(datasv); 4249 const char *send = SvPVX(datasv) + SvLEN(datasv); 4250 while (s < send) { 4251 if (*s == '\n') { 4252 s++; 4253 break; 4254 } 4255 s++; 4256 } 4257 if (s == send) return 0; /* eof */ 4258 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv)); 4259 SvCUR_set(datasv, s-SvPVX(datasv)); 4260 } 4261 return SvCUR(buf_sv); 4262 } 4263 /* Get function pointer hidden within datasv */ 4264 funcp = DPTR2FPTR(filter_t, IoANY(datasv)); 4265 DEBUG_P(PerlIO_printf(Perl_debug_log, 4266 "filter_read %d: via function %p (%s)\n", 4267 idx, (void*)datasv, SvPV_nolen_const(datasv))); 4268 /* Call function. The function is expected to */ 4269 /* call "FILTER_READ(idx+1, buf_sv)" first. */ 4270 /* Return: <0:error, =0:eof, >0:not eof */ 4271 return (*funcp)(aTHX_ idx, buf_sv, correct_length); 4272 } 4273 4274 STATIC char * 4275 S_filter_gets(pTHX_ SV *sv, STRLEN append) 4276 { 4277 dVAR; 4278 4279 PERL_ARGS_ASSERT_FILTER_GETS; 4280 4281 #ifdef PERL_CR_FILTER 4282 if (!PL_rsfp_filters) { 4283 filter_add(S_cr_textfilter,NULL); 4284 } 4285 #endif 4286 if (PL_rsfp_filters) { 4287 if (!append) 4288 SvCUR_set(sv, 0); /* start with empty line */ 4289 if (FILTER_READ(0, sv, 0) > 0) 4290 return ( SvPVX(sv) ) ; 4291 else 4292 return NULL ; 4293 } 4294 else 4295 return (sv_gets(sv, PL_rsfp, append)); 4296 } 4297 4298 STATIC HV * 4299 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len) 4300 { 4301 dVAR; 4302 GV *gv; 4303 4304 PERL_ARGS_ASSERT_FIND_IN_MY_STASH; 4305 4306 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__")) 4307 return PL_curstash; 4308 4309 if (len > 2 && 4310 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') && 4311 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV))) 4312 { 4313 return GvHV(gv); /* Foo:: */ 4314 } 4315 4316 /* use constant CLASS => 'MyClass' */ 4317 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV); 4318 if (gv && GvCV(gv)) { 4319 SV * const sv = cv_const_sv(GvCV(gv)); 4320 if (sv) 4321 pkgname = SvPV_const(sv, len); 4322 } 4323 4324 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0); 4325 } 4326 4327 /* 4328 * S_readpipe_override 4329 * Check whether readpipe() is overridden, and generates the appropriate 4330 * optree, provided sublex_start() is called afterwards. 4331 */ 4332 STATIC void 4333 S_readpipe_override(pTHX) 4334 { 4335 GV **gvp; 4336 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV); 4337 pl_yylval.ival = OP_BACKTICK; 4338 if ((gv_readpipe 4339 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)) 4340 || 4341 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE)) 4342 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe) 4343 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))) 4344 { 4345 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 4346 op_append_elem(OP_LIST, 4347 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */ 4348 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe)))); 4349 } 4350 } 4351 4352 #ifdef PERL_MAD 4353 /* 4354 * Perl_madlex 4355 * The intent of this yylex wrapper is to minimize the changes to the 4356 * tokener when we aren't interested in collecting madprops. It remains 4357 * to be seen how successful this strategy will be... 4358 */ 4359 4360 int 4361 Perl_madlex(pTHX) 4362 { 4363 int optype; 4364 char *s = PL_bufptr; 4365 4366 /* make sure PL_thiswhite is initialized */ 4367 PL_thiswhite = 0; 4368 PL_thismad = 0; 4369 4370 /* previous token ate up our whitespace? */ 4371 if (!PL_lasttoke && PL_nextwhite) { 4372 PL_thiswhite = PL_nextwhite; 4373 PL_nextwhite = 0; 4374 } 4375 4376 /* isolate the token, and figure out where it is without whitespace */ 4377 PL_realtokenstart = -1; 4378 PL_thistoken = 0; 4379 optype = yylex(); 4380 s = PL_bufptr; 4381 assert(PL_curforce < 0); 4382 4383 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */ 4384 if (!PL_thistoken) { 4385 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop)) 4386 PL_thistoken = newSVpvs(""); 4387 else { 4388 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart; 4389 PL_thistoken = newSVpvn(tstart, s - tstart); 4390 } 4391 } 4392 if (PL_thismad) /* install head */ 4393 CURMAD('X', PL_thistoken); 4394 } 4395 4396 /* last whitespace of a sublex? */ 4397 if (optype == ')' && PL_endwhite) { 4398 CURMAD('X', PL_endwhite); 4399 } 4400 4401 if (!PL_thismad) { 4402 4403 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */ 4404 if (!PL_thiswhite && !PL_endwhite && !optype) { 4405 sv_free(PL_thistoken); 4406 PL_thistoken = 0; 4407 return 0; 4408 } 4409 4410 /* put off final whitespace till peg */ 4411 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) { 4412 PL_nextwhite = PL_thiswhite; 4413 PL_thiswhite = 0; 4414 } 4415 else if (PL_thisopen) { 4416 CURMAD('q', PL_thisopen); 4417 if (PL_thistoken) 4418 sv_free(PL_thistoken); 4419 PL_thistoken = 0; 4420 } 4421 else { 4422 /* Store actual token text as madprop X */ 4423 CURMAD('X', PL_thistoken); 4424 } 4425 4426 if (PL_thiswhite) { 4427 /* add preceding whitespace as madprop _ */ 4428 CURMAD('_', PL_thiswhite); 4429 } 4430 4431 if (PL_thisstuff) { 4432 /* add quoted material as madprop = */ 4433 CURMAD('=', PL_thisstuff); 4434 } 4435 4436 if (PL_thisclose) { 4437 /* add terminating quote as madprop Q */ 4438 CURMAD('Q', PL_thisclose); 4439 } 4440 } 4441 4442 /* special processing based on optype */ 4443 4444 switch (optype) { 4445 4446 /* opval doesn't need a TOKEN since it can already store mp */ 4447 case WORD: 4448 case METHOD: 4449 case FUNCMETH: 4450 case THING: 4451 case PMFUNC: 4452 case PRIVATEREF: 4453 case FUNC0SUB: 4454 case UNIOPSUB: 4455 case LSTOPSUB: 4456 if (pl_yylval.opval) 4457 append_madprops(PL_thismad, pl_yylval.opval, 0); 4458 PL_thismad = 0; 4459 return optype; 4460 4461 /* fake EOF */ 4462 case 0: 4463 optype = PEG; 4464 if (PL_endwhite) { 4465 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0); 4466 PL_endwhite = 0; 4467 } 4468 break; 4469 4470 /* pval */ 4471 case LABEL: 4472 break; 4473 4474 case ']': 4475 case '}': 4476 if (PL_faketokens) 4477 break; 4478 /* remember any fake bracket that lexer is about to discard */ 4479 if (PL_lex_brackets == 1 && 4480 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK)) 4481 { 4482 s = PL_bufptr; 4483 while (s < PL_bufend && (*s == ' ' || *s == '\t')) 4484 s++; 4485 if (*s == '}') { 4486 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr); 4487 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0); 4488 PL_thiswhite = 0; 4489 PL_bufptr = s - 1; 4490 break; /* don't bother looking for trailing comment */ 4491 } 4492 else 4493 s = PL_bufptr; 4494 } 4495 if (optype == ']') 4496 break; 4497 /* FALLTHROUGH */ 4498 4499 /* attach a trailing comment to its statement instead of next token */ 4500 case ';': 4501 if (PL_faketokens) 4502 break; 4503 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) { 4504 s = PL_bufptr; 4505 while (s < PL_bufend && (*s == ' ' || *s == '\t')) 4506 s++; 4507 if (*s == '\n' || *s == '#') { 4508 while (s < PL_bufend && *s != '\n') 4509 s++; 4510 if (s < PL_bufend) 4511 s++; 4512 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr); 4513 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0); 4514 PL_thiswhite = 0; 4515 PL_bufptr = s; 4516 } 4517 } 4518 break; 4519 4520 /* ival */ 4521 default: 4522 break; 4523 4524 } 4525 4526 /* Create new token struct. Note: opvals return early above. */ 4527 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad); 4528 PL_thismad = 0; 4529 return optype; 4530 } 4531 #endif 4532 4533 STATIC char * 4534 S_tokenize_use(pTHX_ int is_use, char *s) { 4535 dVAR; 4536 4537 PERL_ARGS_ASSERT_TOKENIZE_USE; 4538 4539 if (PL_expect != XSTATE) 4540 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", 4541 is_use ? "use" : "no")); 4542 PL_expect = XTERM; 4543 s = SKIPSPACE1(s); 4544 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) { 4545 s = force_version(s, TRUE); 4546 if (*s == ';' || *s == '}' 4547 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) { 4548 start_force(PL_curforce); 4549 NEXTVAL_NEXTTOKE.opval = NULL; 4550 force_next(WORD); 4551 } 4552 else if (*s == 'v') { 4553 s = force_word(s,WORD,FALSE,TRUE,FALSE); 4554 s = force_version(s, FALSE); 4555 } 4556 } 4557 else { 4558 s = force_word(s,WORD,FALSE,TRUE,FALSE); 4559 s = force_version(s, FALSE); 4560 } 4561 pl_yylval.ival = is_use; 4562 return s; 4563 } 4564 #ifdef DEBUGGING 4565 static const char* const exp_name[] = 4566 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", 4567 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR" 4568 }; 4569 #endif 4570 4571 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l) 4572 STATIC bool 4573 S_word_takes_any_delimeter(char *p, STRLEN len) 4574 { 4575 return (len == 1 && strchr("msyq", p[0])) || 4576 (len == 2 && ( 4577 (p[0] == 't' && p[1] == 'r') || 4578 (p[0] == 'q' && strchr("qwxr", p[1])))); 4579 } 4580 4581 /* 4582 yylex 4583 4584 Works out what to call the token just pulled out of the input 4585 stream. The yacc parser takes care of taking the ops we return and 4586 stitching them into a tree. 4587 4588 Returns: 4589 The type of the next token 4590 4591 Structure: 4592 Switch based on the current state: 4593 - if we already built the token before, use it 4594 - if we have a case modifier in a string, deal with that 4595 - handle other cases of interpolation inside a string 4596 - scan the next line if we are inside a format 4597 In the normal state switch on the next character: 4598 - default: 4599 if alphabetic, go to key lookup 4600 unrecoginized character - croak 4601 - 0/4/26: handle end-of-line or EOF 4602 - cases for whitespace 4603 - \n and #: handle comments and line numbers 4604 - various operators, brackets and sigils 4605 - numbers 4606 - quotes 4607 - 'v': vstrings (or go to key lookup) 4608 - 'x' repetition operator (or go to key lookup) 4609 - other ASCII alphanumerics (key lookup begins here): 4610 word before => ? 4611 keyword plugin 4612 scan built-in keyword (but do nothing with it yet) 4613 check for statement label 4614 check for lexical subs 4615 goto just_a_word if there is one 4616 see whether built-in keyword is overridden 4617 switch on keyword number: 4618 - default: just_a_word: 4619 not a built-in keyword; handle bareword lookup 4620 disambiguate between method and sub call 4621 fall back to bareword 4622 - cases for built-in keywords 4623 */ 4624 4625 4626 #ifdef __SC__ 4627 #pragma segment Perl_yylex 4628 #endif 4629 int 4630 Perl_yylex(pTHX) 4631 { 4632 dVAR; 4633 char *s = PL_bufptr; 4634 char *d; 4635 STRLEN len; 4636 bool bof = FALSE; 4637 U8 formbrack = 0; 4638 U32 fake_eof = 0; 4639 4640 /* orig_keyword, gvp, and gv are initialized here because 4641 * jump to the label just_a_word_zero can bypass their 4642 * initialization later. */ 4643 I32 orig_keyword = 0; 4644 GV *gv = NULL; 4645 GV **gvp = NULL; 4646 4647 DEBUG_T( { 4648 SV* tmp = newSVpvs(""); 4649 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n", 4650 (IV)CopLINE(PL_curcop), 4651 lex_state_names[PL_lex_state], 4652 exp_name[PL_expect], 4653 pv_display(tmp, s, strlen(s), 0, 60)); 4654 SvREFCNT_dec(tmp); 4655 } ); 4656 4657 switch (PL_lex_state) { 4658 #ifdef COMMENTARY 4659 case LEX_NORMAL: /* Some compilers will produce faster */ 4660 case LEX_INTERPNORMAL: /* code if we comment these out. */ 4661 break; 4662 #endif 4663 4664 /* when we've already built the next token, just pull it out of the queue */ 4665 case LEX_KNOWNEXT: 4666 #ifdef PERL_MAD 4667 PL_lasttoke--; 4668 pl_yylval = PL_nexttoke[PL_lasttoke].next_val; 4669 if (PL_madskills) { 4670 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad; 4671 PL_nexttoke[PL_lasttoke].next_mad = 0; 4672 if (PL_thismad && PL_thismad->mad_key == '_') { 4673 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val); 4674 PL_thismad->mad_val = 0; 4675 mad_free(PL_thismad); 4676 PL_thismad = 0; 4677 } 4678 } 4679 if (!PL_lasttoke) { 4680 PL_lex_state = PL_lex_defer; 4681 PL_expect = PL_lex_expect; 4682 PL_lex_defer = LEX_NORMAL; 4683 if (!PL_nexttoke[PL_lasttoke].next_type) 4684 return yylex(); 4685 } 4686 #else 4687 PL_nexttoke--; 4688 pl_yylval = PL_nextval[PL_nexttoke]; 4689 if (!PL_nexttoke) { 4690 PL_lex_state = PL_lex_defer; 4691 PL_expect = PL_lex_expect; 4692 PL_lex_defer = LEX_NORMAL; 4693 } 4694 #endif 4695 { 4696 I32 next_type; 4697 #ifdef PERL_MAD 4698 next_type = PL_nexttoke[PL_lasttoke].next_type; 4699 #else 4700 next_type = PL_nexttype[PL_nexttoke]; 4701 #endif 4702 if (next_type & (7<<24)) { 4703 if (next_type & (1<<24)) { 4704 if (PL_lex_brackets > 100) 4705 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 4706 PL_lex_brackstack[PL_lex_brackets++] = 4707 (char) ((next_type >> 16) & 0xff); 4708 } 4709 if (next_type & (2<<24)) 4710 PL_lex_allbrackets++; 4711 if (next_type & (4<<24)) 4712 PL_lex_allbrackets--; 4713 next_type &= 0xffff; 4714 } 4715 return REPORT(next_type == 'p' ? pending_ident() : next_type); 4716 } 4717 4718 /* interpolated case modifiers like \L \U, including \Q and \E. 4719 when we get here, PL_bufptr is at the \ 4720 */ 4721 case LEX_INTERPCASEMOD: 4722 #ifdef DEBUGGING 4723 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\') 4724 Perl_croak(aTHX_ 4725 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u", 4726 PL_bufptr, PL_bufend, *PL_bufptr); 4727 #endif 4728 /* handle \E or end of string */ 4729 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') { 4730 /* if at a \E */ 4731 if (PL_lex_casemods) { 4732 const char oldmod = PL_lex_casestack[--PL_lex_casemods]; 4733 PL_lex_casestack[PL_lex_casemods] = '\0'; 4734 4735 if (PL_bufptr != PL_bufend 4736 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q' 4737 || oldmod == 'F')) { 4738 PL_bufptr += 2; 4739 PL_lex_state = LEX_INTERPCONCAT; 4740 #ifdef PERL_MAD 4741 if (PL_madskills) 4742 PL_thistoken = newSVpvs("\\E"); 4743 #endif 4744 } 4745 PL_lex_allbrackets--; 4746 return REPORT(')'); 4747 } 4748 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) { 4749 /* Got an unpaired \E */ 4750 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 4751 "Useless use of \\E"); 4752 } 4753 #ifdef PERL_MAD 4754 while (PL_bufptr != PL_bufend && 4755 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') { 4756 if (PL_madskills) { 4757 if (!PL_thiswhite) 4758 PL_thiswhite = newSVpvs(""); 4759 sv_catpvn(PL_thiswhite, PL_bufptr, 2); 4760 } 4761 PL_bufptr += 2; 4762 } 4763 #else 4764 if (PL_bufptr != PL_bufend) 4765 PL_bufptr += 2; 4766 #endif 4767 PL_lex_state = LEX_INTERPCONCAT; 4768 return yylex(); 4769 } 4770 else { 4771 DEBUG_T({ PerlIO_printf(Perl_debug_log, 4772 "### Saw case modifier\n"); }); 4773 s = PL_bufptr + 1; 4774 if (s[1] == '\\' && s[2] == 'E') { 4775 #ifdef PERL_MAD 4776 if (PL_madskills) { 4777 if (!PL_thiswhite) 4778 PL_thiswhite = newSVpvs(""); 4779 sv_catpvn(PL_thiswhite, PL_bufptr, 4); 4780 } 4781 #endif 4782 PL_bufptr = s + 3; 4783 PL_lex_state = LEX_INTERPCONCAT; 4784 return yylex(); 4785 } 4786 else { 4787 I32 tmp; 4788 if (!PL_madskills) /* when just compiling don't need correct */ 4789 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) 4790 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ 4791 if ((*s == 'L' || *s == 'U' || *s == 'F') && 4792 (strchr(PL_lex_casestack, 'L') 4793 || strchr(PL_lex_casestack, 'U') 4794 || strchr(PL_lex_casestack, 'F'))) { 4795 PL_lex_casestack[--PL_lex_casemods] = '\0'; 4796 PL_lex_allbrackets--; 4797 return REPORT(')'); 4798 } 4799 if (PL_lex_casemods > 10) 4800 Renew(PL_lex_casestack, PL_lex_casemods + 2, char); 4801 PL_lex_casestack[PL_lex_casemods++] = *s; 4802 PL_lex_casestack[PL_lex_casemods] = '\0'; 4803 PL_lex_state = LEX_INTERPCONCAT; 4804 start_force(PL_curforce); 4805 NEXTVAL_NEXTTOKE.ival = 0; 4806 force_next((2<<24)|'('); 4807 start_force(PL_curforce); 4808 if (*s == 'l') 4809 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST; 4810 else if (*s == 'u') 4811 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST; 4812 else if (*s == 'L') 4813 NEXTVAL_NEXTTOKE.ival = OP_LC; 4814 else if (*s == 'U') 4815 NEXTVAL_NEXTTOKE.ival = OP_UC; 4816 else if (*s == 'Q') 4817 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA; 4818 else if (*s == 'F') 4819 NEXTVAL_NEXTTOKE.ival = OP_FC; 4820 else 4821 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s); 4822 if (PL_madskills) { 4823 SV* const tmpsv = newSVpvs("\\ "); 4824 /* replace the space with the character we want to escape 4825 */ 4826 SvPVX(tmpsv)[1] = *s; 4827 curmad('_', tmpsv); 4828 } 4829 PL_bufptr = s + 1; 4830 } 4831 force_next(FUNC); 4832 if (PL_lex_starts) { 4833 s = PL_bufptr; 4834 PL_lex_starts = 0; 4835 #ifdef PERL_MAD 4836 if (PL_madskills) { 4837 if (PL_thistoken) 4838 sv_free(PL_thistoken); 4839 PL_thistoken = newSVpvs(""); 4840 } 4841 #endif 4842 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 4843 if (PL_lex_casemods == 1 && PL_lex_inpat) 4844 OPERATOR(','); 4845 else 4846 Aop(OP_CONCAT); 4847 } 4848 else 4849 return yylex(); 4850 } 4851 4852 case LEX_INTERPPUSH: 4853 return REPORT(sublex_push()); 4854 4855 case LEX_INTERPSTART: 4856 if (PL_bufptr == PL_bufend) 4857 return REPORT(sublex_done()); 4858 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log, 4859 "### Interpolated variable\n"); }); 4860 PL_expect = XTERM; 4861 /* for /@a/, we leave the joining for the regex engine to do 4862 * (unless we're within \Q etc) */ 4863 PL_lex_dojoin = (*PL_bufptr == '@' 4864 && (!PL_lex_inpat || PL_lex_casemods)); 4865 PL_lex_state = LEX_INTERPNORMAL; 4866 if (PL_lex_dojoin) { 4867 start_force(PL_curforce); 4868 NEXTVAL_NEXTTOKE.ival = 0; 4869 force_next(','); 4870 start_force(PL_curforce); 4871 force_ident("\"", '$'); 4872 start_force(PL_curforce); 4873 NEXTVAL_NEXTTOKE.ival = 0; 4874 force_next('$'); 4875 start_force(PL_curforce); 4876 NEXTVAL_NEXTTOKE.ival = 0; 4877 force_next((2<<24)|'('); 4878 start_force(PL_curforce); 4879 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */ 4880 force_next(FUNC); 4881 } 4882 /* Convert (?{...}) and friends to 'do {...}' */ 4883 if (PL_lex_inpat && *PL_bufptr == '(') { 4884 PL_parser->lex_shared->re_eval_start = PL_bufptr; 4885 PL_bufptr += 2; 4886 if (*PL_bufptr != '{') 4887 PL_bufptr++; 4888 start_force(PL_curforce); 4889 /* XXX probably need a CURMAD(something) here */ 4890 PL_expect = XTERMBLOCK; 4891 force_next(DO); 4892 } 4893 4894 if (PL_lex_starts++) { 4895 s = PL_bufptr; 4896 #ifdef PERL_MAD 4897 if (PL_madskills) { 4898 if (PL_thistoken) 4899 sv_free(PL_thistoken); 4900 PL_thistoken = newSVpvs(""); 4901 } 4902 #endif 4903 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 4904 if (!PL_lex_casemods && PL_lex_inpat) 4905 OPERATOR(','); 4906 else 4907 Aop(OP_CONCAT); 4908 } 4909 return yylex(); 4910 4911 case LEX_INTERPENDMAYBE: 4912 if (intuit_more(PL_bufptr)) { 4913 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */ 4914 break; 4915 } 4916 /* FALL THROUGH */ 4917 4918 case LEX_INTERPEND: 4919 if (PL_lex_dojoin) { 4920 PL_lex_dojoin = FALSE; 4921 PL_lex_state = LEX_INTERPCONCAT; 4922 #ifdef PERL_MAD 4923 if (PL_madskills) { 4924 if (PL_thistoken) 4925 sv_free(PL_thistoken); 4926 PL_thistoken = newSVpvs(""); 4927 } 4928 #endif 4929 PL_lex_allbrackets--; 4930 return REPORT(')'); 4931 } 4932 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl 4933 && SvEVALED(PL_lex_repl)) 4934 { 4935 if (PL_bufptr != PL_bufend) 4936 Perl_croak(aTHX_ "Bad evalled substitution pattern"); 4937 PL_lex_repl = NULL; 4938 } 4939 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets 4940 re_eval_str. If the here-doc body’s length equals the previous 4941 value of re_eval_start, re_eval_start will now be null. So 4942 check re_eval_str as well. */ 4943 if (PL_parser->lex_shared->re_eval_start 4944 || PL_parser->lex_shared->re_eval_str) { 4945 SV *sv; 4946 if (*PL_bufptr != ')') 4947 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'"); 4948 PL_bufptr++; 4949 /* having compiled a (?{..}) expression, return the original 4950 * text too, as a const */ 4951 if (PL_parser->lex_shared->re_eval_str) { 4952 sv = PL_parser->lex_shared->re_eval_str; 4953 PL_parser->lex_shared->re_eval_str = NULL; 4954 SvCUR_set(sv, 4955 PL_bufptr - PL_parser->lex_shared->re_eval_start); 4956 SvPV_shrink_to_cur(sv); 4957 } 4958 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start, 4959 PL_bufptr - PL_parser->lex_shared->re_eval_start); 4960 start_force(PL_curforce); 4961 /* XXX probably need a CURMAD(something) here */ 4962 NEXTVAL_NEXTTOKE.opval = 4963 (OP*)newSVOP(OP_CONST, 0, 4964 sv); 4965 force_next(THING); 4966 PL_parser->lex_shared->re_eval_start = NULL; 4967 PL_expect = XTERM; 4968 return REPORT(','); 4969 } 4970 4971 /* FALLTHROUGH */ 4972 case LEX_INTERPCONCAT: 4973 #ifdef DEBUGGING 4974 if (PL_lex_brackets) 4975 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld", 4976 (long) PL_lex_brackets); 4977 #endif 4978 if (PL_bufptr == PL_bufend) 4979 return REPORT(sublex_done()); 4980 4981 /* m'foo' still needs to be parsed for possible (?{...}) */ 4982 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) { 4983 SV *sv = newSVsv(PL_linestr); 4984 sv = tokeq(sv); 4985 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 4986 s = PL_bufend; 4987 } 4988 else { 4989 s = scan_const(PL_bufptr); 4990 if (*s == '\\') 4991 PL_lex_state = LEX_INTERPCASEMOD; 4992 else 4993 PL_lex_state = LEX_INTERPSTART; 4994 } 4995 4996 if (s != PL_bufptr) { 4997 start_force(PL_curforce); 4998 if (PL_madskills) { 4999 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr)); 5000 } 5001 NEXTVAL_NEXTTOKE = pl_yylval; 5002 PL_expect = XTERM; 5003 force_next(THING); 5004 if (PL_lex_starts++) { 5005 #ifdef PERL_MAD 5006 if (PL_madskills) { 5007 if (PL_thistoken) 5008 sv_free(PL_thistoken); 5009 PL_thistoken = newSVpvs(""); 5010 } 5011 #endif 5012 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 5013 if (!PL_lex_casemods && PL_lex_inpat) 5014 OPERATOR(','); 5015 else 5016 Aop(OP_CONCAT); 5017 } 5018 else { 5019 PL_bufptr = s; 5020 return yylex(); 5021 } 5022 } 5023 5024 return yylex(); 5025 case LEX_FORMLINE: 5026 s = scan_formline(PL_bufptr); 5027 if (!PL_lex_formbrack) 5028 { 5029 formbrack = 1; 5030 goto rightbracket; 5031 } 5032 PL_bufptr = s; 5033 return yylex(); 5034 } 5035 5036 s = PL_bufptr; 5037 PL_oldoldbufptr = PL_oldbufptr; 5038 PL_oldbufptr = s; 5039 5040 retry: 5041 #ifdef PERL_MAD 5042 if (PL_thistoken) { 5043 sv_free(PL_thistoken); 5044 PL_thistoken = 0; 5045 } 5046 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */ 5047 #endif 5048 switch (*s) { 5049 default: 5050 if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s)) 5051 goto keylookup; 5052 { 5053 SV *dsv = newSVpvs_flags("", SVs_TEMP); 5054 const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s, 5055 UTF8SKIP(s), 5056 SVs_TEMP | SVf_UTF8), 5057 10, UNI_DISPLAY_ISPRINT)) 5058 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s); 5059 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); 5060 if (len > UNRECOGNIZED_PRECEDE_COUNT) { 5061 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT; 5062 } else { 5063 d = PL_linestart; 5064 } 5065 *s = '\0'; 5066 sv_setpv(dsv, d); 5067 if (UTF) 5068 SvUTF8_on(dsv); 5069 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1); 5070 } 5071 case 4: 5072 case 26: 5073 goto fake_eof; /* emulate EOF on ^D or ^Z */ 5074 case 0: 5075 #ifdef PERL_MAD 5076 if (PL_madskills) 5077 PL_faketokens = 0; 5078 #endif 5079 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) { 5080 PL_last_uni = 0; 5081 PL_last_lop = 0; 5082 if (PL_lex_brackets && 5083 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) { 5084 yyerror((const char *) 5085 (PL_lex_formbrack 5086 ? "Format not terminated" 5087 : "Missing right curly or square bracket")); 5088 } 5089 DEBUG_T( { PerlIO_printf(Perl_debug_log, 5090 "### Tokener got EOF\n"); 5091 } ); 5092 TOKEN(0); 5093 } 5094 if (s++ < PL_bufend) 5095 goto retry; /* ignore stray nulls */ 5096 PL_last_uni = 0; 5097 PL_last_lop = 0; 5098 if (!PL_in_eval && !PL_preambled) { 5099 PL_preambled = TRUE; 5100 #ifdef PERL_MAD 5101 if (PL_madskills) 5102 PL_faketokens = 1; 5103 #endif 5104 if (PL_perldb) { 5105 /* Generate a string of Perl code to load the debugger. 5106 * If PERL5DB is set, it will return the contents of that, 5107 * otherwise a compile-time require of perl5db.pl. */ 5108 5109 const char * const pdb = PerlEnv_getenv("PERL5DB"); 5110 5111 if (pdb) { 5112 sv_setpv(PL_linestr, pdb); 5113 sv_catpvs(PL_linestr,";"); 5114 } else { 5115 SETERRNO(0,SS_NORMAL); 5116 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };"); 5117 } 5118 } else 5119 sv_setpvs(PL_linestr,""); 5120 if (PL_preambleav) { 5121 SV **svp = AvARRAY(PL_preambleav); 5122 SV **const end = svp + AvFILLp(PL_preambleav); 5123 while(svp <= end) { 5124 sv_catsv(PL_linestr, *svp); 5125 ++svp; 5126 sv_catpvs(PL_linestr, ";"); 5127 } 5128 sv_free(MUTABLE_SV(PL_preambleav)); 5129 PL_preambleav = NULL; 5130 } 5131 if (PL_minus_E) 5132 sv_catpvs(PL_linestr, 5133 "use feature ':5." STRINGIFY(PERL_VERSION) "';"); 5134 if (PL_minus_n || PL_minus_p) { 5135 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/); 5136 if (PL_minus_l) 5137 sv_catpvs(PL_linestr,"chomp;"); 5138 if (PL_minus_a) { 5139 if (PL_minus_F) { 5140 if ((*PL_splitstr == '/' || *PL_splitstr == '\'' 5141 || *PL_splitstr == '"') 5142 && strchr(PL_splitstr + 1, *PL_splitstr)) 5143 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); 5144 else { 5145 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL 5146 bytes can be used as quoting characters. :-) */ 5147 const char *splits = PL_splitstr; 5148 sv_catpvs(PL_linestr, "our @F=split(q\0"); 5149 do { 5150 /* Need to \ \s */ 5151 if (*splits == '\\') 5152 sv_catpvn(PL_linestr, splits, 1); 5153 sv_catpvn(PL_linestr, splits, 1); 5154 } while (*splits++); 5155 /* This loop will embed the trailing NUL of 5156 PL_linestr as the last thing it does before 5157 terminating. */ 5158 sv_catpvs(PL_linestr, ");"); 5159 } 5160 } 5161 else 5162 sv_catpvs(PL_linestr,"our @F=split(' ');"); 5163 } 5164 } 5165 sv_catpvs(PL_linestr, "\n"); 5166 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 5167 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5168 PL_last_lop = PL_last_uni = NULL; 5169 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash) 5170 update_debugger_info(PL_linestr, NULL, 0); 5171 goto retry; 5172 } 5173 do { 5174 fake_eof = 0; 5175 bof = PL_rsfp ? TRUE : FALSE; 5176 if (0) { 5177 fake_eof: 5178 fake_eof = LEX_FAKE_EOF; 5179 } 5180 PL_bufptr = PL_bufend; 5181 COPLINE_INC_WITH_HERELINES; 5182 if (!lex_next_chunk(fake_eof)) { 5183 CopLINE_dec(PL_curcop); 5184 s = PL_bufptr; 5185 TOKEN(';'); /* not infinite loop because rsfp is NULL now */ 5186 } 5187 CopLINE_dec(PL_curcop); 5188 #ifdef PERL_MAD 5189 if (!PL_rsfp) 5190 PL_realtokenstart = -1; 5191 #endif 5192 s = PL_bufptr; 5193 /* If it looks like the start of a BOM or raw UTF-16, 5194 * check if it in fact is. */ 5195 if (bof && PL_rsfp && 5196 (*s == 0 || 5197 *(U8*)s == 0xEF || 5198 *(U8*)s >= 0xFE || 5199 s[1] == 0)) { 5200 Off_t offset = (IV)PerlIO_tell(PL_rsfp); 5201 bof = (offset == (Off_t)SvCUR(PL_linestr)); 5202 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS) 5203 /* offset may include swallowed CR */ 5204 if (!bof) 5205 bof = (offset == (Off_t)SvCUR(PL_linestr)+1); 5206 #endif 5207 if (bof) { 5208 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5209 s = swallow_bom((U8*)s); 5210 } 5211 } 5212 if (PL_parser->in_pod) { 5213 /* Incest with pod. */ 5214 #ifdef PERL_MAD 5215 if (PL_madskills) 5216 sv_catsv(PL_thiswhite, PL_linestr); 5217 #endif 5218 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) { 5219 sv_setpvs(PL_linestr, ""); 5220 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 5221 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5222 PL_last_lop = PL_last_uni = NULL; 5223 PL_parser->in_pod = 0; 5224 } 5225 } 5226 if (PL_rsfp || PL_parser->filtered) 5227 incline(s); 5228 } while (PL_parser->in_pod); 5229 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; 5230 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5231 PL_last_lop = PL_last_uni = NULL; 5232 if (CopLINE(PL_curcop) == 1) { 5233 while (s < PL_bufend && isSPACE(*s)) 5234 s++; 5235 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */ 5236 s++; 5237 #ifdef PERL_MAD 5238 if (PL_madskills) 5239 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart); 5240 #endif 5241 d = NULL; 5242 if (!PL_in_eval) { 5243 if (*s == '#' && *(s+1) == '!') 5244 d = s + 2; 5245 #ifdef ALTERNATE_SHEBANG 5246 else { 5247 static char const as[] = ALTERNATE_SHEBANG; 5248 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1)) 5249 d = s + (sizeof(as) - 1); 5250 } 5251 #endif /* ALTERNATE_SHEBANG */ 5252 } 5253 if (d) { 5254 char *ipath; 5255 char *ipathend; 5256 5257 while (isSPACE(*d)) 5258 d++; 5259 ipath = d; 5260 while (*d && !isSPACE(*d)) 5261 d++; 5262 ipathend = d; 5263 5264 #ifdef ARG_ZERO_IS_SCRIPT 5265 if (ipathend > ipath) { 5266 /* 5267 * HP-UX (at least) sets argv[0] to the script name, 5268 * which makes $^X incorrect. And Digital UNIX and Linux, 5269 * at least, set argv[0] to the basename of the Perl 5270 * interpreter. So, having found "#!", we'll set it right. 5271 */ 5272 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL, 5273 SVt_PV)); /* $^X */ 5274 assert(SvPOK(x) || SvGMAGICAL(x)); 5275 if (sv_eq(x, CopFILESV(PL_curcop))) { 5276 sv_setpvn(x, ipath, ipathend - ipath); 5277 SvSETMAGIC(x); 5278 } 5279 else { 5280 STRLEN blen; 5281 STRLEN llen; 5282 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen); 5283 const char * const lstart = SvPV_const(x,llen); 5284 if (llen < blen) { 5285 bstart += blen - llen; 5286 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') { 5287 sv_setpvn(x, ipath, ipathend - ipath); 5288 SvSETMAGIC(x); 5289 } 5290 } 5291 } 5292 TAINT_NOT; /* $^X is always tainted, but that's OK */ 5293 } 5294 #endif /* ARG_ZERO_IS_SCRIPT */ 5295 5296 /* 5297 * Look for options. 5298 */ 5299 d = instr(s,"perl -"); 5300 if (!d) { 5301 d = instr(s,"perl"); 5302 #if defined(DOSISH) 5303 /* avoid getting into infinite loops when shebang 5304 * line contains "Perl" rather than "perl" */ 5305 if (!d) { 5306 for (d = ipathend-4; d >= ipath; --d) { 5307 if ((*d == 'p' || *d == 'P') 5308 && !ibcmp(d, "perl", 4)) 5309 { 5310 break; 5311 } 5312 } 5313 if (d < ipath) 5314 d = NULL; 5315 } 5316 #endif 5317 } 5318 #ifdef ALTERNATE_SHEBANG 5319 /* 5320 * If the ALTERNATE_SHEBANG on this system starts with a 5321 * character that can be part of a Perl expression, then if 5322 * we see it but not "perl", we're probably looking at the 5323 * start of Perl code, not a request to hand off to some 5324 * other interpreter. Similarly, if "perl" is there, but 5325 * not in the first 'word' of the line, we assume the line 5326 * contains the start of the Perl program. 5327 */ 5328 if (d && *s != '#') { 5329 const char *c = ipath; 5330 while (*c && !strchr("; \t\r\n\f\v#", *c)) 5331 c++; 5332 if (c < d) 5333 d = NULL; /* "perl" not in first word; ignore */ 5334 else 5335 *s = '#'; /* Don't try to parse shebang line */ 5336 } 5337 #endif /* ALTERNATE_SHEBANG */ 5338 if (!d && 5339 *s == '#' && 5340 ipathend > ipath && 5341 !PL_minus_c && 5342 !instr(s,"indir") && 5343 instr(PL_origargv[0],"perl")) 5344 { 5345 dVAR; 5346 char **newargv; 5347 5348 *ipathend = '\0'; 5349 s = ipathend + 1; 5350 while (s < PL_bufend && isSPACE(*s)) 5351 s++; 5352 if (s < PL_bufend) { 5353 Newx(newargv,PL_origargc+3,char*); 5354 newargv[1] = s; 5355 while (s < PL_bufend && !isSPACE(*s)) 5356 s++; 5357 *s = '\0'; 5358 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*); 5359 } 5360 else 5361 newargv = PL_origargv; 5362 newargv[0] = ipath; 5363 PERL_FPU_PRE_EXEC 5364 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv)); 5365 PERL_FPU_POST_EXEC 5366 Perl_croak(aTHX_ "Can't exec %s", ipath); 5367 } 5368 if (d) { 5369 while (*d && !isSPACE(*d)) 5370 d++; 5371 while (SPACE_OR_TAB(*d)) 5372 d++; 5373 5374 if (*d++ == '-') { 5375 const bool switches_done = PL_doswitches; 5376 const U32 oldpdb = PL_perldb; 5377 const bool oldn = PL_minus_n; 5378 const bool oldp = PL_minus_p; 5379 const char *d1 = d; 5380 5381 do { 5382 bool baduni = FALSE; 5383 if (*d1 == 'C') { 5384 const char *d2 = d1 + 1; 5385 if (parse_unicode_opts((const char **)&d2) 5386 != PL_unicode) 5387 baduni = TRUE; 5388 } 5389 if (baduni || *d1 == 'M' || *d1 == 'm') { 5390 const char * const m = d1; 5391 while (*d1 && !isSPACE(*d1)) 5392 d1++; 5393 Perl_croak(aTHX_ "Too late for \"-%.*s\" option", 5394 (int)(d1 - m), m); 5395 } 5396 d1 = moreswitches(d1); 5397 } while (d1); 5398 if (PL_doswitches && !switches_done) { 5399 int argc = PL_origargc; 5400 char **argv = PL_origargv; 5401 do { 5402 argc--,argv++; 5403 } while (argc && argv[0][0] == '-' && argv[0][1]); 5404 init_argv_symbols(argc,argv); 5405 } 5406 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) || 5407 ((PL_minus_n || PL_minus_p) && !(oldn || oldp))) 5408 /* if we have already added "LINE: while (<>) {", 5409 we must not do it again */ 5410 { 5411 sv_setpvs(PL_linestr, ""); 5412 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 5413 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5414 PL_last_lop = PL_last_uni = NULL; 5415 PL_preambled = FALSE; 5416 if (PERLDB_LINE || PERLDB_SAVESRC) 5417 (void)gv_fetchfile(PL_origfilename); 5418 goto retry; 5419 } 5420 } 5421 } 5422 } 5423 } 5424 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 5425 PL_lex_state = LEX_FORMLINE; 5426 start_force(PL_curforce); 5427 NEXTVAL_NEXTTOKE.ival = 0; 5428 force_next(FORMRBRACK); 5429 TOKEN(';'); 5430 } 5431 goto retry; 5432 case '\r': 5433 #ifdef PERL_STRICT_CR 5434 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); 5435 Perl_croak(aTHX_ 5436 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); 5437 #endif 5438 case ' ': case '\t': case '\f': case 013: 5439 #ifdef PERL_MAD 5440 PL_realtokenstart = -1; 5441 if (PL_madskills) { 5442 if (!PL_thiswhite) 5443 PL_thiswhite = newSVpvs(""); 5444 sv_catpvn(PL_thiswhite, s, 1); 5445 } 5446 #endif 5447 s++; 5448 goto retry; 5449 case '#': 5450 case '\n': 5451 #ifdef PERL_MAD 5452 PL_realtokenstart = -1; 5453 if (PL_madskills) 5454 PL_faketokens = 0; 5455 #endif 5456 if (PL_lex_state != LEX_NORMAL || 5457 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) { 5458 if (*s == '#' && s == PL_linestart && PL_in_eval 5459 && !PL_rsfp && !PL_parser->filtered) { 5460 /* handle eval qq[#line 1 "foo"\n ...] */ 5461 CopLINE_dec(PL_curcop); 5462 incline(s); 5463 } 5464 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) { 5465 s = SKIPSPACE0(s); 5466 if (!PL_in_eval || PL_rsfp || PL_parser->filtered) 5467 incline(s); 5468 } 5469 else { 5470 const bool in_comment = *s == '#'; 5471 d = s; 5472 while (d < PL_bufend && *d != '\n') 5473 d++; 5474 if (d < PL_bufend) 5475 d++; 5476 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */ 5477 Perl_croak(aTHX_ "panic: input overflow, %p > %p", 5478 d, PL_bufend); 5479 #ifdef PERL_MAD 5480 if (PL_madskills) 5481 PL_thiswhite = newSVpvn(s, d - s); 5482 #endif 5483 s = d; 5484 if (in_comment && d == PL_bufend 5485 && PL_lex_state == LEX_INTERPNORMAL 5486 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 5487 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--; 5488 else incline(s); 5489 } 5490 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 5491 PL_lex_state = LEX_FORMLINE; 5492 start_force(PL_curforce); 5493 NEXTVAL_NEXTTOKE.ival = 0; 5494 force_next(FORMRBRACK); 5495 TOKEN(';'); 5496 } 5497 } 5498 else { 5499 #ifdef PERL_MAD 5500 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) { 5501 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') { 5502 PL_faketokens = 0; 5503 s = SKIPSPACE0(s); 5504 TOKEN(PEG); /* make sure any #! line is accessible */ 5505 } 5506 s = SKIPSPACE0(s); 5507 } 5508 else { 5509 /* if (PL_madskills && PL_lex_formbrack) { */ 5510 d = s; 5511 while (d < PL_bufend && *d != '\n') 5512 d++; 5513 if (d < PL_bufend) 5514 d++; 5515 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */ 5516 Perl_croak(aTHX_ "panic: input overflow"); 5517 if (PL_madskills && CopLINE(PL_curcop) >= 1) { 5518 if (!PL_thiswhite) 5519 PL_thiswhite = newSVpvs(""); 5520 if (CopLINE(PL_curcop) == 1) { 5521 sv_setpvs(PL_thiswhite, ""); 5522 PL_faketokens = 0; 5523 } 5524 sv_catpvn(PL_thiswhite, s, d - s); 5525 } 5526 s = d; 5527 /* } 5528 *s = '\0'; 5529 PL_bufend = s; */ 5530 } 5531 #else 5532 *s = '\0'; 5533 PL_bufend = s; 5534 #endif 5535 } 5536 goto retry; 5537 case '-': 5538 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) { 5539 I32 ftst = 0; 5540 char tmp; 5541 5542 s++; 5543 PL_bufptr = s; 5544 tmp = *s++; 5545 5546 while (s < PL_bufend && SPACE_OR_TAB(*s)) 5547 s++; 5548 5549 if (strnEQ(s,"=>",2)) { 5550 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE); 5551 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } ); 5552 OPERATOR('-'); /* unary minus */ 5553 } 5554 PL_last_uni = PL_oldbufptr; 5555 switch (tmp) { 5556 case 'r': ftst = OP_FTEREAD; break; 5557 case 'w': ftst = OP_FTEWRITE; break; 5558 case 'x': ftst = OP_FTEEXEC; break; 5559 case 'o': ftst = OP_FTEOWNED; break; 5560 case 'R': ftst = OP_FTRREAD; break; 5561 case 'W': ftst = OP_FTRWRITE; break; 5562 case 'X': ftst = OP_FTREXEC; break; 5563 case 'O': ftst = OP_FTROWNED; break; 5564 case 'e': ftst = OP_FTIS; break; 5565 case 'z': ftst = OP_FTZERO; break; 5566 case 's': ftst = OP_FTSIZE; break; 5567 case 'f': ftst = OP_FTFILE; break; 5568 case 'd': ftst = OP_FTDIR; break; 5569 case 'l': ftst = OP_FTLINK; break; 5570 case 'p': ftst = OP_FTPIPE; break; 5571 case 'S': ftst = OP_FTSOCK; break; 5572 case 'u': ftst = OP_FTSUID; break; 5573 case 'g': ftst = OP_FTSGID; break; 5574 case 'k': ftst = OP_FTSVTX; break; 5575 case 'b': ftst = OP_FTBLK; break; 5576 case 'c': ftst = OP_FTCHR; break; 5577 case 't': ftst = OP_FTTTY; break; 5578 case 'T': ftst = OP_FTTEXT; break; 5579 case 'B': ftst = OP_FTBINARY; break; 5580 case 'M': case 'A': case 'C': 5581 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV); 5582 switch (tmp) { 5583 case 'M': ftst = OP_FTMTIME; break; 5584 case 'A': ftst = OP_FTATIME; break; 5585 case 'C': ftst = OP_FTCTIME; break; 5586 default: break; 5587 } 5588 break; 5589 default: 5590 break; 5591 } 5592 if (ftst) { 5593 PL_last_lop_op = (OPCODE)ftst; 5594 DEBUG_T( { PerlIO_printf(Perl_debug_log, 5595 "### Saw file test %c\n", (int)tmp); 5596 } ); 5597 FTST(ftst); 5598 } 5599 else { 5600 /* Assume it was a minus followed by a one-letter named 5601 * subroutine call (or a -bareword), then. */ 5602 DEBUG_T( { PerlIO_printf(Perl_debug_log, 5603 "### '-%c' looked like a file test but was not\n", 5604 (int) tmp); 5605 } ); 5606 s = --PL_bufptr; 5607 } 5608 } 5609 { 5610 const char tmp = *s++; 5611 if (*s == tmp) { 5612 s++; 5613 if (PL_expect == XOPERATOR) 5614 TERM(POSTDEC); 5615 else 5616 OPERATOR(PREDEC); 5617 } 5618 else if (*s == '>') { 5619 s++; 5620 s = SKIPSPACE1(s); 5621 if (isIDFIRST_lazy_if(s,UTF)) { 5622 s = force_word(s,METHOD,FALSE,TRUE,FALSE); 5623 TOKEN(ARROW); 5624 } 5625 else if (*s == '$') 5626 OPERATOR(ARROW); 5627 else 5628 TERM(ARROW); 5629 } 5630 if (PL_expect == XOPERATOR) { 5631 if (*s == '=' && !PL_lex_allbrackets && 5632 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 5633 s--; 5634 TOKEN(0); 5635 } 5636 Aop(OP_SUBTRACT); 5637 } 5638 else { 5639 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5640 check_uni(); 5641 OPERATOR('-'); /* unary minus */ 5642 } 5643 } 5644 5645 case '+': 5646 { 5647 const char tmp = *s++; 5648 if (*s == tmp) { 5649 s++; 5650 if (PL_expect == XOPERATOR) 5651 TERM(POSTINC); 5652 else 5653 OPERATOR(PREINC); 5654 } 5655 if (PL_expect == XOPERATOR) { 5656 if (*s == '=' && !PL_lex_allbrackets && 5657 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 5658 s--; 5659 TOKEN(0); 5660 } 5661 Aop(OP_ADD); 5662 } 5663 else { 5664 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5665 check_uni(); 5666 OPERATOR('+'); 5667 } 5668 } 5669 5670 case '*': 5671 if (PL_expect != XOPERATOR) { 5672 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE); 5673 PL_expect = XOPERATOR; 5674 force_ident(PL_tokenbuf, '*'); 5675 if (!*PL_tokenbuf) 5676 PREREF('*'); 5677 TERM('*'); 5678 } 5679 s++; 5680 if (*s == '*') { 5681 s++; 5682 if (*s == '=' && !PL_lex_allbrackets && 5683 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 5684 s -= 2; 5685 TOKEN(0); 5686 } 5687 PWop(OP_POW); 5688 } 5689 if (*s == '=' && !PL_lex_allbrackets && 5690 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 5691 s--; 5692 TOKEN(0); 5693 } 5694 Mop(OP_MULTIPLY); 5695 5696 case '%': 5697 if (PL_expect == XOPERATOR) { 5698 if (s[1] == '=' && !PL_lex_allbrackets && 5699 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5700 TOKEN(0); 5701 ++s; 5702 Mop(OP_MODULO); 5703 } 5704 PL_tokenbuf[0] = '%'; 5705 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, 5706 sizeof PL_tokenbuf - 1, FALSE); 5707 if (!PL_tokenbuf[1]) { 5708 PREREF('%'); 5709 } 5710 PL_expect = XOPERATOR; 5711 force_ident_maybe_lex('%'); 5712 TERM('%'); 5713 5714 case '^': 5715 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 5716 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) 5717 TOKEN(0); 5718 s++; 5719 BOop(OP_BIT_XOR); 5720 case '[': 5721 if (PL_lex_brackets > 100) 5722 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 5723 PL_lex_brackstack[PL_lex_brackets++] = 0; 5724 PL_lex_allbrackets++; 5725 { 5726 const char tmp = *s++; 5727 OPERATOR(tmp); 5728 } 5729 case '~': 5730 if (s[1] == '~' 5731 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) 5732 { 5733 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 5734 TOKEN(0); 5735 s += 2; 5736 Perl_ck_warner_d(aTHX_ 5737 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 5738 "Smartmatch is experimental"); 5739 Eop(OP_SMARTMATCH); 5740 } 5741 s++; 5742 OPERATOR('~'); 5743 case ',': 5744 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) 5745 TOKEN(0); 5746 s++; 5747 OPERATOR(','); 5748 case ':': 5749 if (s[1] == ':') { 5750 len = 0; 5751 goto just_a_word_zero_gv; 5752 } 5753 s++; 5754 switch (PL_expect) { 5755 OP *attrs; 5756 #ifdef PERL_MAD 5757 I32 stuffstart; 5758 #endif 5759 case XOPERATOR: 5760 if (!PL_in_my || PL_lex_state != LEX_NORMAL) 5761 break; 5762 PL_bufptr = s; /* update in case we back off */ 5763 if (*s == '=') { 5764 Perl_croak(aTHX_ 5765 "Use of := for an empty attribute list is not allowed"); 5766 } 5767 goto grabattrs; 5768 case XATTRBLOCK: 5769 PL_expect = XBLOCK; 5770 goto grabattrs; 5771 case XATTRTERM: 5772 PL_expect = XTERMBLOCK; 5773 grabattrs: 5774 #ifdef PERL_MAD 5775 stuffstart = s - SvPVX(PL_linestr) - 1; 5776 #endif 5777 s = PEEKSPACE(s); 5778 attrs = NULL; 5779 while (isIDFIRST_lazy_if(s,UTF)) { 5780 I32 tmp; 5781 SV *sv; 5782 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 5783 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) { 5784 if (tmp < 0) tmp = -tmp; 5785 switch (tmp) { 5786 case KEY_or: 5787 case KEY_and: 5788 case KEY_for: 5789 case KEY_foreach: 5790 case KEY_unless: 5791 case KEY_if: 5792 case KEY_while: 5793 case KEY_until: 5794 goto got_attrs; 5795 default: 5796 break; 5797 } 5798 } 5799 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0); 5800 if (*d == '(') { 5801 d = scan_str(d,TRUE,TRUE,FALSE, FALSE); 5802 if (!d) { 5803 /* MUST advance bufptr here to avoid bogus 5804 "at end of line" context messages from yyerror(). 5805 */ 5806 PL_bufptr = s + len; 5807 yyerror("Unterminated attribute parameter in attribute list"); 5808 if (attrs) 5809 op_free(attrs); 5810 sv_free(sv); 5811 return REPORT(0); /* EOF indicator */ 5812 } 5813 } 5814 if (PL_lex_stuff) { 5815 sv_catsv(sv, PL_lex_stuff); 5816 attrs = op_append_elem(OP_LIST, attrs, 5817 newSVOP(OP_CONST, 0, sv)); 5818 SvREFCNT_dec(PL_lex_stuff); 5819 PL_lex_stuff = NULL; 5820 } 5821 else { 5822 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) { 5823 sv_free(sv); 5824 if (PL_in_my == KEY_our) { 5825 deprecate(":unique"); 5826 } 5827 else 5828 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables"); 5829 } 5830 5831 /* NOTE: any CV attrs applied here need to be part of 5832 the CVf_BUILTIN_ATTRS define in cv.h! */ 5833 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) { 5834 sv_free(sv); 5835 CvLVALUE_on(PL_compcv); 5836 } 5837 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) { 5838 sv_free(sv); 5839 deprecate(":locked"); 5840 } 5841 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) { 5842 sv_free(sv); 5843 CvMETHOD_on(PL_compcv); 5844 } 5845 /* After we've set the flags, it could be argued that 5846 we don't need to do the attributes.pm-based setting 5847 process, and shouldn't bother appending recognized 5848 flags. To experiment with that, uncomment the 5849 following "else". (Note that's already been 5850 uncommented. That keeps the above-applied built-in 5851 attributes from being intercepted (and possibly 5852 rejected) by a package's attribute routines, but is 5853 justified by the performance win for the common case 5854 of applying only built-in attributes.) */ 5855 else 5856 attrs = op_append_elem(OP_LIST, attrs, 5857 newSVOP(OP_CONST, 0, 5858 sv)); 5859 } 5860 s = PEEKSPACE(d); 5861 if (*s == ':' && s[1] != ':') 5862 s = PEEKSPACE(s+1); 5863 else if (s == d) 5864 break; /* require real whitespace or :'s */ 5865 /* XXX losing whitespace on sequential attributes here */ 5866 } 5867 { 5868 const char tmp 5869 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */ 5870 if (*s != ';' && *s != '}' && *s != tmp 5871 && (tmp != '=' || *s != ')')) { 5872 const char q = ((*s == '\'') ? '"' : '\''); 5873 /* If here for an expression, and parsed no attrs, back 5874 off. */ 5875 if (tmp == '=' && !attrs) { 5876 s = PL_bufptr; 5877 break; 5878 } 5879 /* MUST advance bufptr here to avoid bogus "at end of line" 5880 context messages from yyerror(). 5881 */ 5882 PL_bufptr = s; 5883 yyerror( (const char *) 5884 (*s 5885 ? Perl_form(aTHX_ "Invalid separator character " 5886 "%c%c%c in attribute list", q, *s, q) 5887 : "Unterminated attribute list" ) ); 5888 if (attrs) 5889 op_free(attrs); 5890 OPERATOR(':'); 5891 } 5892 } 5893 got_attrs: 5894 if (attrs) { 5895 start_force(PL_curforce); 5896 NEXTVAL_NEXTTOKE.opval = attrs; 5897 CURMAD('_', PL_nextwhite); 5898 force_next(THING); 5899 } 5900 #ifdef PERL_MAD 5901 if (PL_madskills) { 5902 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart, 5903 (s - SvPVX(PL_linestr)) - stuffstart); 5904 } 5905 #endif 5906 TOKEN(COLONATTR); 5907 } 5908 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) { 5909 s--; 5910 TOKEN(0); 5911 } 5912 PL_lex_allbrackets--; 5913 OPERATOR(':'); 5914 case '(': 5915 s++; 5916 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr) 5917 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */ 5918 else 5919 PL_expect = XTERM; 5920 s = SKIPSPACE1(s); 5921 PL_lex_allbrackets++; 5922 TOKEN('('); 5923 case ';': 5924 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 5925 TOKEN(0); 5926 CLINE; 5927 s++; 5928 OPERATOR(';'); 5929 case ')': 5930 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) 5931 TOKEN(0); 5932 s++; 5933 PL_lex_allbrackets--; 5934 s = SKIPSPACE1(s); 5935 if (*s == '{') 5936 PREBLOCK(')'); 5937 TERM(')'); 5938 case ']': 5939 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 5940 TOKEN(0); 5941 s++; 5942 if (PL_lex_brackets <= 0) 5943 yyerror("Unmatched right square bracket"); 5944 else 5945 --PL_lex_brackets; 5946 PL_lex_allbrackets--; 5947 if (PL_lex_state == LEX_INTERPNORMAL) { 5948 if (PL_lex_brackets == 0) { 5949 if (*s == '-' && s[1] == '>') 5950 PL_lex_state = LEX_INTERPENDMAYBE; 5951 else if (*s != '[' && *s != '{') 5952 PL_lex_state = LEX_INTERPEND; 5953 } 5954 } 5955 TERM(']'); 5956 case '{': 5957 s++; 5958 leftbracket: 5959 if (PL_lex_brackets > 100) { 5960 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 5961 } 5962 switch (PL_expect) { 5963 case XTERM: 5964 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 5965 PL_lex_allbrackets++; 5966 OPERATOR(HASHBRACK); 5967 case XOPERATOR: 5968 while (s < PL_bufend && SPACE_OR_TAB(*s)) 5969 s++; 5970 d = s; 5971 PL_tokenbuf[0] = '\0'; 5972 if (d < PL_bufend && *d == '-') { 5973 PL_tokenbuf[0] = '-'; 5974 d++; 5975 while (d < PL_bufend && SPACE_OR_TAB(*d)) 5976 d++; 5977 } 5978 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) { 5979 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 5980 FALSE, &len); 5981 while (d < PL_bufend && SPACE_OR_TAB(*d)) 5982 d++; 5983 if (*d == '}') { 5984 const char minus = (PL_tokenbuf[0] == '-'); 5985 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE); 5986 if (minus) 5987 force_next('-'); 5988 } 5989 } 5990 /* FALL THROUGH */ 5991 case XATTRBLOCK: 5992 case XBLOCK: 5993 PL_lex_brackstack[PL_lex_brackets++] = XSTATE; 5994 PL_lex_allbrackets++; 5995 PL_expect = XSTATE; 5996 break; 5997 case XATTRTERM: 5998 case XTERMBLOCK: 5999 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6000 PL_lex_allbrackets++; 6001 PL_expect = XSTATE; 6002 break; 6003 default: { 6004 const char *t; 6005 if (PL_oldoldbufptr == PL_last_lop) 6006 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 6007 else 6008 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6009 PL_lex_allbrackets++; 6010 s = SKIPSPACE1(s); 6011 if (*s == '}') { 6012 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) { 6013 PL_expect = XTERM; 6014 /* This hack is to get the ${} in the message. */ 6015 PL_bufptr = s+1; 6016 yyerror("syntax error"); 6017 break; 6018 } 6019 OPERATOR(HASHBRACK); 6020 } 6021 /* This hack serves to disambiguate a pair of curlies 6022 * as being a block or an anon hash. Normally, expectation 6023 * determines that, but in cases where we're not in a 6024 * position to expect anything in particular (like inside 6025 * eval"") we have to resolve the ambiguity. This code 6026 * covers the case where the first term in the curlies is a 6027 * quoted string. Most other cases need to be explicitly 6028 * disambiguated by prepending a "+" before the opening 6029 * curly in order to force resolution as an anon hash. 6030 * 6031 * XXX should probably propagate the outer expectation 6032 * into eval"" to rely less on this hack, but that could 6033 * potentially break current behavior of eval"". 6034 * GSAR 97-07-21 6035 */ 6036 t = s; 6037 if (*s == '\'' || *s == '"' || *s == '`') { 6038 /* common case: get past first string, handling escapes */ 6039 for (t++; t < PL_bufend && *t != *s;) 6040 if (*t++ == '\\' && (*t == '\\' || *t == *s)) 6041 t++; 6042 t++; 6043 } 6044 else if (*s == 'q') { 6045 if (++t < PL_bufend 6046 && (!isWORDCHAR(*t) 6047 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend 6048 && !isWORDCHAR(*t)))) 6049 { 6050 /* skip q//-like construct */ 6051 const char *tmps; 6052 char open, close, term; 6053 I32 brackets = 1; 6054 6055 while (t < PL_bufend && isSPACE(*t)) 6056 t++; 6057 /* check for q => */ 6058 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') { 6059 OPERATOR(HASHBRACK); 6060 } 6061 term = *t; 6062 open = term; 6063 if (term && (tmps = strchr("([{< )]}> )]}>",term))) 6064 term = tmps[5]; 6065 close = term; 6066 if (open == close) 6067 for (t++; t < PL_bufend; t++) { 6068 if (*t == '\\' && t+1 < PL_bufend && open != '\\') 6069 t++; 6070 else if (*t == open) 6071 break; 6072 } 6073 else { 6074 for (t++; t < PL_bufend; t++) { 6075 if (*t == '\\' && t+1 < PL_bufend) 6076 t++; 6077 else if (*t == close && --brackets <= 0) 6078 break; 6079 else if (*t == open) 6080 brackets++; 6081 } 6082 } 6083 t++; 6084 } 6085 else 6086 /* skip plain q word */ 6087 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF)) 6088 t += UTF8SKIP(t); 6089 } 6090 else if (isWORDCHAR_lazy_if(t,UTF)) { 6091 t += UTF8SKIP(t); 6092 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF)) 6093 t += UTF8SKIP(t); 6094 } 6095 while (t < PL_bufend && isSPACE(*t)) 6096 t++; 6097 /* if comma follows first term, call it an anon hash */ 6098 /* XXX it could be a comma expression with loop modifiers */ 6099 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s))) 6100 || (*t == '=' && t[1] == '>'))) 6101 OPERATOR(HASHBRACK); 6102 if (PL_expect == XREF) 6103 PL_expect = XTERM; 6104 else { 6105 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE; 6106 PL_expect = XSTATE; 6107 } 6108 } 6109 break; 6110 } 6111 pl_yylval.ival = CopLINE(PL_curcop); 6112 if (isSPACE(*s) || *s == '#') 6113 PL_copline = NOLINE; /* invalidate current command line number */ 6114 TOKEN(formbrack ? '=' : '{'); 6115 case '}': 6116 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 6117 TOKEN(0); 6118 rightbracket: 6119 s++; 6120 if (PL_lex_brackets <= 0) 6121 yyerror("Unmatched right curly bracket"); 6122 else 6123 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets]; 6124 PL_lex_allbrackets--; 6125 if (PL_lex_state == LEX_INTERPNORMAL) { 6126 if (PL_lex_brackets == 0) { 6127 if (PL_expect & XFAKEBRACK) { 6128 PL_expect &= XENUMMASK; 6129 PL_lex_state = LEX_INTERPEND; 6130 PL_bufptr = s; 6131 #if 0 6132 if (PL_madskills) { 6133 if (!PL_thiswhite) 6134 PL_thiswhite = newSVpvs(""); 6135 sv_catpvs(PL_thiswhite,"}"); 6136 } 6137 #endif 6138 return yylex(); /* ignore fake brackets */ 6139 } 6140 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 6141 && SvEVALED(PL_lex_repl)) 6142 PL_lex_state = LEX_INTERPEND; 6143 else if (*s == '-' && s[1] == '>') 6144 PL_lex_state = LEX_INTERPENDMAYBE; 6145 else if (*s != '[' && *s != '{') 6146 PL_lex_state = LEX_INTERPEND; 6147 } 6148 } 6149 if (PL_expect & XFAKEBRACK) { 6150 PL_expect &= XENUMMASK; 6151 PL_bufptr = s; 6152 return yylex(); /* ignore fake brackets */ 6153 } 6154 start_force(PL_curforce); 6155 if (PL_madskills) { 6156 curmad('X', newSVpvn(s-1,1)); 6157 CURMAD('_', PL_thiswhite); 6158 } 6159 force_next(formbrack ? '.' : '}'); 6160 if (formbrack) LEAVE; 6161 #ifdef PERL_MAD 6162 if (PL_madskills && !PL_thistoken) 6163 PL_thistoken = newSVpvs(""); 6164 #endif 6165 if (formbrack == 2) { /* means . where arguments were expected */ 6166 start_force(PL_curforce); 6167 force_next(';'); 6168 TOKEN(FORMRBRACK); 6169 } 6170 TOKEN(';'); 6171 case '&': 6172 s++; 6173 if (*s++ == '&') { 6174 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6175 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 6176 s -= 2; 6177 TOKEN(0); 6178 } 6179 AOPERATOR(ANDAND); 6180 } 6181 s--; 6182 if (PL_expect == XOPERATOR) { 6183 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON) 6184 && isIDFIRST_lazy_if(s,UTF)) 6185 { 6186 CopLINE_dec(PL_curcop); 6187 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 6188 CopLINE_inc(PL_curcop); 6189 } 6190 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6191 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 6192 s--; 6193 TOKEN(0); 6194 } 6195 BAop(OP_BIT_AND); 6196 } 6197 6198 PL_tokenbuf[0] = '&'; 6199 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1, 6200 sizeof PL_tokenbuf - 1, TRUE); 6201 if (PL_tokenbuf[1]) { 6202 PL_expect = XOPERATOR; 6203 force_ident_maybe_lex('&'); 6204 } 6205 else 6206 PREREF('&'); 6207 pl_yylval.ival = (OPpENTERSUB_AMPER<<8); 6208 TERM('&'); 6209 6210 case '|': 6211 s++; 6212 if (*s++ == '|') { 6213 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6214 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 6215 s -= 2; 6216 TOKEN(0); 6217 } 6218 AOPERATOR(OROR); 6219 } 6220 s--; 6221 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6222 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 6223 s--; 6224 TOKEN(0); 6225 } 6226 BOop(OP_BIT_OR); 6227 case '=': 6228 s++; 6229 { 6230 const char tmp = *s++; 6231 if (tmp == '=') { 6232 if (!PL_lex_allbrackets && 6233 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6234 s -= 2; 6235 TOKEN(0); 6236 } 6237 Eop(OP_EQ); 6238 } 6239 if (tmp == '>') { 6240 if (!PL_lex_allbrackets && 6241 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) { 6242 s -= 2; 6243 TOKEN(0); 6244 } 6245 OPERATOR(','); 6246 } 6247 if (tmp == '~') 6248 PMop(OP_MATCH); 6249 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX) 6250 && strchr("+-*/%.^&|<",tmp)) 6251 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6252 "Reversed %c= operator",(int)tmp); 6253 s--; 6254 if (PL_expect == XSTATE && isALPHA(tmp) && 6255 (s == PL_linestart+1 || s[-2] == '\n') ) 6256 { 6257 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered) 6258 || PL_lex_state != LEX_NORMAL) { 6259 d = PL_bufend; 6260 while (s < d) { 6261 if (*s++ == '\n') { 6262 incline(s); 6263 if (strnEQ(s,"=cut",4)) { 6264 s = strchr(s,'\n'); 6265 if (s) 6266 s++; 6267 else 6268 s = d; 6269 incline(s); 6270 goto retry; 6271 } 6272 } 6273 } 6274 goto retry; 6275 } 6276 #ifdef PERL_MAD 6277 if (PL_madskills) { 6278 if (!PL_thiswhite) 6279 PL_thiswhite = newSVpvs(""); 6280 sv_catpvn(PL_thiswhite, PL_linestart, 6281 PL_bufend - PL_linestart); 6282 } 6283 #endif 6284 s = PL_bufend; 6285 PL_parser->in_pod = 1; 6286 goto retry; 6287 } 6288 } 6289 if (PL_expect == XBLOCK) { 6290 const char *t = s; 6291 #ifdef PERL_STRICT_CR 6292 while (SPACE_OR_TAB(*t)) 6293 #else 6294 while (SPACE_OR_TAB(*t) || *t == '\r') 6295 #endif 6296 t++; 6297 if (*t == '\n' || *t == '#') { 6298 formbrack = 1; 6299 ENTER; 6300 SAVEI8(PL_parser->form_lex_state); 6301 SAVEI32(PL_lex_formbrack); 6302 PL_parser->form_lex_state = PL_lex_state; 6303 PL_lex_formbrack = PL_lex_brackets + 1; 6304 goto leftbracket; 6305 } 6306 } 6307 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6308 s--; 6309 TOKEN(0); 6310 } 6311 pl_yylval.ival = 0; 6312 OPERATOR(ASSIGNOP); 6313 case '!': 6314 s++; 6315 { 6316 const char tmp = *s++; 6317 if (tmp == '=') { 6318 /* was this !=~ where !~ was meant? 6319 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */ 6320 6321 if (*s == '~' && ckWARN(WARN_SYNTAX)) { 6322 const char *t = s+1; 6323 6324 while (t < PL_bufend && isSPACE(*t)) 6325 ++t; 6326 6327 if (*t == '/' || *t == '?' || 6328 ((*t == 'm' || *t == 's' || *t == 'y') 6329 && !isWORDCHAR(t[1])) || 6330 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2]))) 6331 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6332 "!=~ should be !~"); 6333 } 6334 if (!PL_lex_allbrackets && 6335 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6336 s -= 2; 6337 TOKEN(0); 6338 } 6339 Eop(OP_NE); 6340 } 6341 if (tmp == '~') 6342 PMop(OP_NOT); 6343 } 6344 s--; 6345 OPERATOR('!'); 6346 case '<': 6347 if (PL_expect != XOPERATOR) { 6348 if (s[1] != '<' && !strchr(s,'>')) 6349 check_uni(); 6350 if (s[1] == '<') 6351 s = scan_heredoc(s); 6352 else 6353 s = scan_inputsymbol(s); 6354 PL_expect = XOPERATOR; 6355 TOKEN(sublex_start()); 6356 } 6357 s++; 6358 { 6359 char tmp = *s++; 6360 if (tmp == '<') { 6361 if (*s == '=' && !PL_lex_allbrackets && 6362 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6363 s -= 2; 6364 TOKEN(0); 6365 } 6366 SHop(OP_LEFT_SHIFT); 6367 } 6368 if (tmp == '=') { 6369 tmp = *s++; 6370 if (tmp == '>') { 6371 if (!PL_lex_allbrackets && 6372 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6373 s -= 3; 6374 TOKEN(0); 6375 } 6376 Eop(OP_NCMP); 6377 } 6378 s--; 6379 if (!PL_lex_allbrackets && 6380 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6381 s -= 2; 6382 TOKEN(0); 6383 } 6384 Rop(OP_LE); 6385 } 6386 } 6387 s--; 6388 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6389 s--; 6390 TOKEN(0); 6391 } 6392 Rop(OP_LT); 6393 case '>': 6394 s++; 6395 { 6396 const char tmp = *s++; 6397 if (tmp == '>') { 6398 if (*s == '=' && !PL_lex_allbrackets && 6399 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6400 s -= 2; 6401 TOKEN(0); 6402 } 6403 SHop(OP_RIGHT_SHIFT); 6404 } 6405 else if (tmp == '=') { 6406 if (!PL_lex_allbrackets && 6407 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6408 s -= 2; 6409 TOKEN(0); 6410 } 6411 Rop(OP_GE); 6412 } 6413 } 6414 s--; 6415 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6416 s--; 6417 TOKEN(0); 6418 } 6419 Rop(OP_GT); 6420 6421 case '$': 6422 CLINE; 6423 6424 if (PL_expect == XOPERATOR) { 6425 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 6426 return deprecate_commaless_var_list(); 6427 } 6428 } 6429 6430 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) { 6431 PL_tokenbuf[0] = '@'; 6432 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1, 6433 sizeof PL_tokenbuf - 1, FALSE); 6434 if (PL_expect == XOPERATOR) 6435 no_op("Array length", s); 6436 if (!PL_tokenbuf[1]) 6437 PREREF(DOLSHARP); 6438 PL_expect = XOPERATOR; 6439 force_ident_maybe_lex('#'); 6440 TOKEN(DOLSHARP); 6441 } 6442 6443 PL_tokenbuf[0] = '$'; 6444 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, 6445 sizeof PL_tokenbuf - 1, FALSE); 6446 if (PL_expect == XOPERATOR) 6447 no_op("Scalar", s); 6448 if (!PL_tokenbuf[1]) { 6449 if (s == PL_bufend) 6450 yyerror("Final $ should be \\$ or $name"); 6451 PREREF('$'); 6452 } 6453 6454 d = s; 6455 { 6456 const char tmp = *s; 6457 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) 6458 s = SKIPSPACE1(s); 6459 6460 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 6461 && intuit_more(s)) { 6462 if (*s == '[') { 6463 PL_tokenbuf[0] = '@'; 6464 if (ckWARN(WARN_SYNTAX)) { 6465 char *t = s+1; 6466 6467 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$') 6468 t++; 6469 if (*t++ == ',') { 6470 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */ 6471 while (t < PL_bufend && *t != ']') 6472 t++; 6473 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6474 "Multidimensional syntax %.*s not supported", 6475 (int)((t - PL_bufptr) + 1), PL_bufptr); 6476 } 6477 } 6478 } 6479 else if (*s == '{') { 6480 char *t; 6481 PL_tokenbuf[0] = '%'; 6482 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX) 6483 && (t = strchr(s, '}')) && (t = strchr(t, '='))) 6484 { 6485 char tmpbuf[sizeof PL_tokenbuf]; 6486 do { 6487 t++; 6488 } while (isSPACE(*t)); 6489 if (isIDFIRST_lazy_if(t,UTF)) { 6490 STRLEN len; 6491 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, 6492 &len); 6493 while (isSPACE(*t)) 6494 t++; 6495 if (*t == ';' 6496 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0)) 6497 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6498 "You need to quote \"%"SVf"\"", 6499 SVfARG(newSVpvn_flags(tmpbuf, len, 6500 SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); 6501 } 6502 } 6503 } 6504 } 6505 6506 PL_expect = XOPERATOR; 6507 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) { 6508 const bool islop = (PL_last_lop == PL_oldoldbufptr); 6509 if (!islop || PL_last_lop_op == OP_GREPSTART) 6510 PL_expect = XOPERATOR; 6511 else if (strchr("$@\"'`q", *s)) 6512 PL_expect = XTERM; /* e.g. print $fh "foo" */ 6513 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF)) 6514 PL_expect = XTERM; /* e.g. print $fh &sub */ 6515 else if (isIDFIRST_lazy_if(s,UTF)) { 6516 char tmpbuf[sizeof PL_tokenbuf]; 6517 int t2; 6518 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 6519 if ((t2 = keyword(tmpbuf, len, 0))) { 6520 /* binary operators exclude handle interpretations */ 6521 switch (t2) { 6522 case -KEY_x: 6523 case -KEY_eq: 6524 case -KEY_ne: 6525 case -KEY_gt: 6526 case -KEY_lt: 6527 case -KEY_ge: 6528 case -KEY_le: 6529 case -KEY_cmp: 6530 break; 6531 default: 6532 PL_expect = XTERM; /* e.g. print $fh length() */ 6533 break; 6534 } 6535 } 6536 else { 6537 PL_expect = XTERM; /* e.g. print $fh subr() */ 6538 } 6539 } 6540 else if (isDIGIT(*s)) 6541 PL_expect = XTERM; /* e.g. print $fh 3 */ 6542 else if (*s == '.' && isDIGIT(s[1])) 6543 PL_expect = XTERM; /* e.g. print $fh .3 */ 6544 else if ((*s == '?' || *s == '-' || *s == '+') 6545 && !isSPACE(s[1]) && s[1] != '=') 6546 PL_expect = XTERM; /* e.g. print $fh -1 */ 6547 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' 6548 && s[1] != '/') 6549 PL_expect = XTERM; /* e.g. print $fh /.../ 6550 XXX except DORDOR operator 6551 */ 6552 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) 6553 && s[2] != '=') 6554 PL_expect = XTERM; /* print $fh <<"EOF" */ 6555 } 6556 } 6557 force_ident_maybe_lex('$'); 6558 TOKEN('$'); 6559 6560 case '@': 6561 if (PL_expect == XOPERATOR) 6562 no_op("Array", s); 6563 PL_tokenbuf[0] = '@'; 6564 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 6565 if (!PL_tokenbuf[1]) { 6566 PREREF('@'); 6567 } 6568 if (PL_lex_state == LEX_NORMAL) 6569 s = SKIPSPACE1(s); 6570 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { 6571 if (*s == '{') 6572 PL_tokenbuf[0] = '%'; 6573 6574 /* Warn about @ where they meant $. */ 6575 if (*s == '[' || *s == '{') { 6576 if (ckWARN(WARN_SYNTAX)) { 6577 const char *t = s + 1; 6578 while (*t && (isWORDCHAR_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t))) 6579 t += UTF ? UTF8SKIP(t) : 1; 6580 if (*t == '}' || *t == ']') { 6581 t++; 6582 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */ 6583 /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */ 6584 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6585 "Scalar value %"SVf" better written as $%"SVf, 6586 SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr), 6587 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))), 6588 SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1), 6589 SVs_TEMP | (UTF ? SVf_UTF8 : 0 )))); 6590 } 6591 } 6592 } 6593 } 6594 PL_expect = XOPERATOR; 6595 force_ident_maybe_lex('@'); 6596 TERM('@'); 6597 6598 case '/': /* may be division, defined-or, or pattern */ 6599 if (PL_expect == XTERMORDORDOR && s[1] == '/') { 6600 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6601 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) 6602 TOKEN(0); 6603 s += 2; 6604 AOPERATOR(DORDOR); 6605 } 6606 case '?': /* may either be conditional or pattern */ 6607 if (PL_expect == XOPERATOR) { 6608 char tmp = *s++; 6609 if(tmp == '?') { 6610 if (!PL_lex_allbrackets && 6611 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) { 6612 s--; 6613 TOKEN(0); 6614 } 6615 PL_lex_allbrackets++; 6616 OPERATOR('?'); 6617 } 6618 else { 6619 tmp = *s++; 6620 if(tmp == '/') { 6621 /* A // operator. */ 6622 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6623 (*s == '=' ? LEX_FAKEEOF_ASSIGN : 6624 LEX_FAKEEOF_LOGIC)) { 6625 s -= 2; 6626 TOKEN(0); 6627 } 6628 AOPERATOR(DORDOR); 6629 } 6630 else { 6631 s--; 6632 if (*s == '=' && !PL_lex_allbrackets && 6633 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6634 s--; 6635 TOKEN(0); 6636 } 6637 Mop(OP_DIVIDE); 6638 } 6639 } 6640 } 6641 else { 6642 /* Disable warning on "study /blah/" */ 6643 if (PL_oldoldbufptr == PL_last_uni 6644 && (*PL_last_uni != 's' || s - PL_last_uni < 5 6645 || memNE(PL_last_uni, "study", 5) 6646 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF) 6647 )) 6648 check_uni(); 6649 if (*s == '?') 6650 deprecate("?PATTERN? without explicit operator"); 6651 s = scan_pat(s,OP_MATCH); 6652 TERM(sublex_start()); 6653 } 6654 6655 case '.': 6656 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack 6657 #ifdef PERL_STRICT_CR 6658 && s[1] == '\n' 6659 #else 6660 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n')) 6661 #endif 6662 && (s == PL_linestart || s[-1] == '\n') ) 6663 { 6664 PL_expect = XSTATE; 6665 formbrack = 2; /* dot seen where arguments expected */ 6666 goto rightbracket; 6667 } 6668 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { 6669 s += 3; 6670 OPERATOR(YADAYADA); 6671 } 6672 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { 6673 char tmp = *s++; 6674 if (*s == tmp) { 6675 if (!PL_lex_allbrackets && 6676 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) { 6677 s--; 6678 TOKEN(0); 6679 } 6680 s++; 6681 if (*s == tmp) { 6682 s++; 6683 pl_yylval.ival = OPf_SPECIAL; 6684 } 6685 else 6686 pl_yylval.ival = 0; 6687 OPERATOR(DOTDOT); 6688 } 6689 if (*s == '=' && !PL_lex_allbrackets && 6690 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6691 s--; 6692 TOKEN(0); 6693 } 6694 Aop(OP_CONCAT); 6695 } 6696 /* FALL THROUGH */ 6697 case '0': case '1': case '2': case '3': case '4': 6698 case '5': case '6': case '7': case '8': case '9': 6699 s = scan_num(s, &pl_yylval); 6700 DEBUG_T( { printbuf("### Saw number in %s\n", s); } ); 6701 if (PL_expect == XOPERATOR) 6702 no_op("Number",s); 6703 TERM(THING); 6704 6705 case '\'': 6706 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); 6707 DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); 6708 if (PL_expect == XOPERATOR) { 6709 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 6710 return deprecate_commaless_var_list(); 6711 } 6712 else 6713 no_op("String",s); 6714 } 6715 if (!s) 6716 missingterm(NULL); 6717 pl_yylval.ival = OP_CONST; 6718 TERM(sublex_start()); 6719 6720 case '"': 6721 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); 6722 DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); 6723 if (PL_expect == XOPERATOR) { 6724 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 6725 return deprecate_commaless_var_list(); 6726 } 6727 else 6728 no_op("String",s); 6729 } 6730 if (!s) 6731 missingterm(NULL); 6732 pl_yylval.ival = OP_CONST; 6733 /* FIXME. I think that this can be const if char *d is replaced by 6734 more localised variables. */ 6735 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) { 6736 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) { 6737 pl_yylval.ival = OP_STRINGIFY; 6738 break; 6739 } 6740 } 6741 TERM(sublex_start()); 6742 6743 case '`': 6744 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); 6745 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } ); 6746 if (PL_expect == XOPERATOR) 6747 no_op("Backticks",s); 6748 if (!s) 6749 missingterm(NULL); 6750 readpipe_override(); 6751 TERM(sublex_start()); 6752 6753 case '\\': 6754 s++; 6755 if (PL_lex_inwhat && isDIGIT(*s)) 6756 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", 6757 *s, *s); 6758 if (PL_expect == XOPERATOR) 6759 no_op("Backslash",s); 6760 OPERATOR(REFGEN); 6761 6762 case 'v': 6763 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) { 6764 char *start = s + 2; 6765 while (isDIGIT(*start) || *start == '_') 6766 start++; 6767 if (*start == '.' && isDIGIT(start[1])) { 6768 s = scan_num(s, &pl_yylval); 6769 TERM(THING); 6770 } 6771 else if ((*start == ':' && start[1] == ':') 6772 || (PL_expect == XSTATE && *start == ':')) 6773 goto keylookup; 6774 else if (PL_expect == XSTATE) { 6775 d = start; 6776 while (d < PL_bufend && isSPACE(*d)) d++; 6777 if (*d == ':') goto keylookup; 6778 } 6779 /* avoid v123abc() or $h{v1}, allow C<print v10;> */ 6780 if (!isALPHA(*start) && (PL_expect == XTERM 6781 || PL_expect == XREF || PL_expect == XSTATE 6782 || PL_expect == XTERMORDORDOR)) { 6783 GV *const gv = gv_fetchpvn_flags(s, start - s, 6784 UTF ? SVf_UTF8 : 0, SVt_PVCV); 6785 if (!gv) { 6786 s = scan_num(s, &pl_yylval); 6787 TERM(THING); 6788 } 6789 } 6790 } 6791 goto keylookup; 6792 case 'x': 6793 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { 6794 s++; 6795 Mop(OP_REPEAT); 6796 } 6797 goto keylookup; 6798 6799 case '_': 6800 case 'a': case 'A': 6801 case 'b': case 'B': 6802 case 'c': case 'C': 6803 case 'd': case 'D': 6804 case 'e': case 'E': 6805 case 'f': case 'F': 6806 case 'g': case 'G': 6807 case 'h': case 'H': 6808 case 'i': case 'I': 6809 case 'j': case 'J': 6810 case 'k': case 'K': 6811 case 'l': case 'L': 6812 case 'm': case 'M': 6813 case 'n': case 'N': 6814 case 'o': case 'O': 6815 case 'p': case 'P': 6816 case 'q': case 'Q': 6817 case 'r': case 'R': 6818 case 's': case 'S': 6819 case 't': case 'T': 6820 case 'u': case 'U': 6821 case 'V': 6822 case 'w': case 'W': 6823 case 'X': 6824 case 'y': case 'Y': 6825 case 'z': case 'Z': 6826 6827 keylookup: { 6828 bool anydelim; 6829 bool lex; 6830 I32 tmp; 6831 SV *sv; 6832 CV *cv; 6833 PADOFFSET off; 6834 OP *rv2cv_op; 6835 6836 lex = FALSE; 6837 orig_keyword = 0; 6838 off = 0; 6839 sv = NULL; 6840 cv = NULL; 6841 gv = NULL; 6842 gvp = NULL; 6843 rv2cv_op = NULL; 6844 6845 PL_bufptr = s; 6846 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 6847 6848 /* Some keywords can be followed by any delimiter, including ':' */ 6849 anydelim = word_takes_any_delimeter(PL_tokenbuf, len); 6850 6851 /* x::* is just a word, unless x is "CORE" */ 6852 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE")) 6853 goto just_a_word; 6854 6855 d = s; 6856 while (d < PL_bufend && isSPACE(*d)) 6857 d++; /* no comments skipped here, or s### is misparsed */ 6858 6859 /* Is this a word before a => operator? */ 6860 if (*d == '=' && d[1] == '>') { 6861 CLINE; 6862 pl_yylval.opval 6863 = (OP*)newSVOP(OP_CONST, 0, 6864 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 6865 pl_yylval.opval->op_private = OPpCONST_BARE; 6866 TERM(WORD); 6867 } 6868 6869 /* Check for plugged-in keyword */ 6870 { 6871 OP *o; 6872 int result; 6873 char *saved_bufptr = PL_bufptr; 6874 PL_bufptr = s; 6875 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o); 6876 s = PL_bufptr; 6877 if (result == KEYWORD_PLUGIN_DECLINE) { 6878 /* not a plugged-in keyword */ 6879 PL_bufptr = saved_bufptr; 6880 } else if (result == KEYWORD_PLUGIN_STMT) { 6881 pl_yylval.opval = o; 6882 CLINE; 6883 PL_expect = XSTATE; 6884 return REPORT(PLUGSTMT); 6885 } else if (result == KEYWORD_PLUGIN_EXPR) { 6886 pl_yylval.opval = o; 6887 CLINE; 6888 PL_expect = XOPERATOR; 6889 return REPORT(PLUGEXPR); 6890 } else { 6891 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", 6892 PL_tokenbuf); 6893 } 6894 } 6895 6896 /* Check for built-in keyword */ 6897 tmp = keyword(PL_tokenbuf, len, 0); 6898 6899 /* Is this a label? */ 6900 if (!anydelim && PL_expect == XSTATE 6901 && d < PL_bufend && *d == ':' && *(d + 1) != ':') { 6902 s = d + 1; 6903 pl_yylval.pval = savepvn(PL_tokenbuf, len+1); 6904 pl_yylval.pval[len] = '\0'; 6905 pl_yylval.pval[len+1] = UTF ? 1 : 0; 6906 CLINE; 6907 TOKEN(LABEL); 6908 } 6909 6910 /* Check for lexical sub */ 6911 if (PL_expect != XOPERATOR) { 6912 char tmpbuf[sizeof PL_tokenbuf + 1]; 6913 *tmpbuf = '&'; 6914 Copy(PL_tokenbuf, tmpbuf+1, len, char); 6915 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0); 6916 if (off != NOT_IN_PAD) { 6917 assert(off); /* we assume this is boolean-true below */ 6918 if (PAD_COMPNAME_FLAGS_isOUR(off)) { 6919 HV * const stash = PAD_COMPNAME_OURSTASH(off); 6920 HEK * const stashname = HvNAME_HEK(stash); 6921 sv = newSVhek(stashname); 6922 sv_catpvs(sv, "::"); 6923 sv_catpvn_flags(sv, PL_tokenbuf, len, 6924 (UTF ? SV_CATUTF8 : SV_CATBYTES)); 6925 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv), 6926 SVt_PVCV); 6927 off = 0; 6928 if (!gv) { 6929 sv_free(sv); 6930 sv = NULL; 6931 goto just_a_word; 6932 } 6933 } 6934 else { 6935 rv2cv_op = newOP(OP_PADANY, 0); 6936 rv2cv_op->op_targ = off; 6937 cv = find_lexical_cv(off); 6938 } 6939 lex = TRUE; 6940 goto just_a_word; 6941 } 6942 off = 0; 6943 } 6944 6945 if (tmp < 0) { /* second-class keyword? */ 6946 GV *ogv = NULL; /* override (winner) */ 6947 GV *hgv = NULL; /* hidden (loser) */ 6948 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) { 6949 CV *cv; 6950 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 6951 UTF ? SVf_UTF8 : 0, SVt_PVCV)) && 6952 (cv = GvCVu(gv))) 6953 { 6954 if (GvIMPORTED_CV(gv)) 6955 ogv = gv; 6956 else if (! CvMETHOD(cv)) 6957 hgv = gv; 6958 } 6959 if (!ogv && 6960 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, 6961 UTF ? -(I32)len : (I32)len, FALSE)) && 6962 (gv = *gvp) && isGV_with_GP(gv) && 6963 GvCVu(gv) && GvIMPORTED_CV(gv)) 6964 { 6965 ogv = gv; 6966 } 6967 } 6968 if (ogv) { 6969 orig_keyword = tmp; 6970 tmp = 0; /* overridden by import or by GLOBAL */ 6971 } 6972 else if (gv && !gvp 6973 && -tmp==KEY_lock /* XXX generalizable kludge */ 6974 && GvCVu(gv)) 6975 { 6976 tmp = 0; /* any sub overrides "weak" keyword */ 6977 } 6978 else { /* no override */ 6979 tmp = -tmp; 6980 if (tmp == KEY_dump) { 6981 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 6982 "dump() better written as CORE::dump()"); 6983 } 6984 gv = NULL; 6985 gvp = 0; 6986 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */ 6987 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 6988 "Ambiguous call resolved as CORE::%s(), " 6989 "qualify as such or use &", 6990 GvENAME(hgv)); 6991 } 6992 } 6993 6994 reserved_word: 6995 switch (tmp) { 6996 6997 default: /* not a keyword */ 6998 /* Trade off - by using this evil construction we can pull the 6999 variable gv into the block labelled keylookup. If not, then 7000 we have to give it function scope so that the goto from the 7001 earlier ':' case doesn't bypass the initialisation. */ 7002 if (0) { 7003 just_a_word_zero_gv: 7004 sv = NULL; 7005 cv = NULL; 7006 gv = NULL; 7007 gvp = NULL; 7008 rv2cv_op = NULL; 7009 orig_keyword = 0; 7010 lex = 0; 7011 off = 0; 7012 } 7013 just_a_word: { 7014 int pkgname = 0; 7015 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); 7016 const char penultchar = 7017 lastchar && PL_bufptr - 2 >= PL_linestart 7018 ? PL_bufptr[-2] 7019 : 0; 7020 #ifdef PERL_MAD 7021 SV *nextPL_nextwhite = 0; 7022 #endif 7023 7024 7025 /* Get the rest if it looks like a package qualifier */ 7026 7027 if (*s == '\'' || (*s == ':' && s[1] == ':')) { 7028 STRLEN morelen; 7029 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, 7030 TRUE, &morelen); 7031 if (!morelen) 7032 Perl_croak(aTHX_ "Bad name after %"SVf"%s", 7033 SVfARG(newSVpvn_flags(PL_tokenbuf, len, 7034 (UTF ? SVf_UTF8 : 0) | SVs_TEMP )), 7035 *s == '\'' ? "'" : "::"); 7036 len += morelen; 7037 pkgname = 1; 7038 } 7039 7040 if (PL_expect == XOPERATOR) { 7041 if (PL_bufptr == PL_linestart) { 7042 CopLINE_dec(PL_curcop); 7043 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 7044 CopLINE_inc(PL_curcop); 7045 } 7046 else 7047 no_op("Bareword",s); 7048 } 7049 7050 /* Look for a subroutine with this name in current package, 7051 unless this is a lexical sub, or name is "Foo::", 7052 in which case Foo is a bareword 7053 (and a package name). */ 7054 7055 if (len > 2 && !PL_madskills && 7056 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') 7057 { 7058 if (ckWARN(WARN_BAREWORD) 7059 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV)) 7060 Perl_warner(aTHX_ packWARN(WARN_BAREWORD), 7061 "Bareword \"%"SVf"\" refers to nonexistent package", 7062 SVfARG(newSVpvn_flags(PL_tokenbuf, len, 7063 (UTF ? SVf_UTF8 : 0) | SVs_TEMP))); 7064 len -= 2; 7065 PL_tokenbuf[len] = '\0'; 7066 gv = NULL; 7067 gvp = 0; 7068 } 7069 else { 7070 if (!lex && !gv) { 7071 /* Mustn't actually add anything to a symbol table. 7072 But also don't want to "initialise" any placeholder 7073 constants that might already be there into full 7074 blown PVGVs with attached PVCV. */ 7075 gv = gv_fetchpvn_flags(PL_tokenbuf, len, 7076 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ), 7077 SVt_PVCV); 7078 } 7079 len = 0; 7080 } 7081 7082 /* if we saw a global override before, get the right name */ 7083 7084 if (!sv) 7085 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, 7086 len ? len : strlen(PL_tokenbuf)); 7087 if (gvp) { 7088 SV * const tmp_sv = sv; 7089 sv = newSVpvs("CORE::GLOBAL::"); 7090 sv_catsv(sv, tmp_sv); 7091 SvREFCNT_dec(tmp_sv); 7092 } 7093 7094 #ifdef PERL_MAD 7095 if (PL_madskills && !PL_thistoken) { 7096 char *start = SvPVX(PL_linestr) + PL_realtokenstart; 7097 PL_thistoken = newSVpvn(start,s - start); 7098 PL_realtokenstart = s - SvPVX(PL_linestr); 7099 } 7100 #endif 7101 7102 /* Presume this is going to be a bareword of some sort. */ 7103 CLINE; 7104 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 7105 pl_yylval.opval->op_private = OPpCONST_BARE; 7106 7107 /* And if "Foo::", then that's what it certainly is. */ 7108 if (len) 7109 goto safe_bareword; 7110 7111 if (!off) 7112 { 7113 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv)); 7114 const_op->op_private = OPpCONST_BARE; 7115 rv2cv_op = newCVREF(0, const_op); 7116 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0); 7117 } 7118 7119 /* See if it's the indirect object for a list operator. */ 7120 7121 if (PL_oldoldbufptr && 7122 PL_oldoldbufptr < PL_bufptr && 7123 (PL_oldoldbufptr == PL_last_lop 7124 || PL_oldoldbufptr == PL_last_uni) && 7125 /* NO SKIPSPACE BEFORE HERE! */ 7126 (PL_expect == XREF || 7127 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF)) 7128 { 7129 bool immediate_paren = *s == '('; 7130 7131 /* (Now we can afford to cross potential line boundary.) */ 7132 s = SKIPSPACE2(s,nextPL_nextwhite); 7133 #ifdef PERL_MAD 7134 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */ 7135 #endif 7136 7137 /* Two barewords in a row may indicate method call. */ 7138 7139 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && 7140 (tmp = intuit_method(s, gv, cv))) { 7141 op_free(rv2cv_op); 7142 if (tmp == METHOD && !PL_lex_allbrackets && 7143 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7144 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7145 return REPORT(tmp); 7146 } 7147 7148 /* If not a declared subroutine, it's an indirect object. */ 7149 /* (But it's an indir obj regardless for sort.) */ 7150 /* Also, if "_" follows a filetest operator, it's a bareword */ 7151 7152 if ( 7153 ( !immediate_paren && (PL_last_lop_op == OP_SORT || 7154 (!cv && 7155 (PL_last_lop_op != OP_MAPSTART && 7156 PL_last_lop_op != OP_GREPSTART)))) 7157 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0' 7158 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP)) 7159 ) 7160 { 7161 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR; 7162 goto bareword; 7163 } 7164 } 7165 7166 PL_expect = XOPERATOR; 7167 #ifdef PERL_MAD 7168 if (isSPACE(*s)) 7169 s = SKIPSPACE2(s,nextPL_nextwhite); 7170 PL_nextwhite = nextPL_nextwhite; 7171 #else 7172 s = skipspace(s); 7173 #endif 7174 7175 /* Is this a word before a => operator? */ 7176 if (*s == '=' && s[1] == '>' && !pkgname) { 7177 op_free(rv2cv_op); 7178 CLINE; 7179 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf); 7180 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) 7181 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv); 7182 TERM(WORD); 7183 } 7184 7185 /* If followed by a paren, it's certainly a subroutine. */ 7186 if (*s == '(') { 7187 CLINE; 7188 if (cv) { 7189 d = s + 1; 7190 while (SPACE_OR_TAB(*d)) 7191 d++; 7192 if (*d == ')' && (sv = cv_const_sv(cv))) { 7193 s = d + 1; 7194 goto its_constant; 7195 } 7196 } 7197 #ifdef PERL_MAD 7198 if (PL_madskills) { 7199 PL_nextwhite = PL_thiswhite; 7200 PL_thiswhite = 0; 7201 } 7202 start_force(PL_curforce); 7203 #endif 7204 NEXTVAL_NEXTTOKE.opval = 7205 off ? rv2cv_op : pl_yylval.opval; 7206 PL_expect = XOPERATOR; 7207 #ifdef PERL_MAD 7208 if (PL_madskills) { 7209 PL_nextwhite = nextPL_nextwhite; 7210 curmad('X', PL_thistoken); 7211 PL_thistoken = newSVpvs(""); 7212 } 7213 #endif 7214 if (off) 7215 op_free(pl_yylval.opval), force_next(PRIVATEREF); 7216 else op_free(rv2cv_op), force_next(WORD); 7217 pl_yylval.ival = 0; 7218 TOKEN('&'); 7219 } 7220 7221 /* If followed by var or block, call it a method (unless sub) */ 7222 7223 if ((*s == '$' || *s == '{') && !cv) { 7224 op_free(rv2cv_op); 7225 PL_last_lop = PL_oldbufptr; 7226 PL_last_lop_op = OP_METHOD; 7227 if (!PL_lex_allbrackets && 7228 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7229 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7230 PREBLOCK(METHOD); 7231 } 7232 7233 /* If followed by a bareword, see if it looks like indir obj. */ 7234 7235 if (!orig_keyword 7236 && (isIDFIRST_lazy_if(s,UTF) || *s == '$') 7237 && (tmp = intuit_method(s, gv, cv))) { 7238 op_free(rv2cv_op); 7239 if (tmp == METHOD && !PL_lex_allbrackets && 7240 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7241 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7242 return REPORT(tmp); 7243 } 7244 7245 /* Not a method, so call it a subroutine (if defined) */ 7246 7247 if (cv) { 7248 if (lastchar == '-' && penultchar != '-') { 7249 const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP ); 7250 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7251 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()", 7252 SVfARG(tmpsv), SVfARG(tmpsv)); 7253 } 7254 /* Check for a constant sub */ 7255 if ((sv = cv_const_sv(cv))) { 7256 its_constant: 7257 op_free(rv2cv_op); 7258 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); 7259 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); 7260 pl_yylval.opval->op_private = OPpCONST_FOLDED; 7261 pl_yylval.opval->op_flags |= OPf_SPECIAL; 7262 TOKEN(WORD); 7263 } 7264 7265 op_free(pl_yylval.opval); 7266 pl_yylval.opval = 7267 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op; 7268 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; 7269 PL_last_lop = PL_oldbufptr; 7270 PL_last_lop_op = OP_ENTERSUB; 7271 /* Is there a prototype? */ 7272 if ( 7273 #ifdef PERL_MAD 7274 cv && 7275 #endif 7276 SvPOK(cv)) 7277 { 7278 STRLEN protolen = CvPROTOLEN(cv); 7279 const char *proto = CvPROTO(cv); 7280 bool optional; 7281 if (!protolen) 7282 TERM(FUNC0SUB); 7283 if ((optional = *proto == ';')) 7284 do 7285 proto++; 7286 while (*proto == ';'); 7287 if ( 7288 ( 7289 ( 7290 *proto == '$' || *proto == '_' 7291 || *proto == '*' || *proto == '+' 7292 ) 7293 && proto[1] == '\0' 7294 ) 7295 || ( 7296 *proto == '\\' && proto[1] && proto[2] == '\0' 7297 ) 7298 ) 7299 UNIPROTO(UNIOPSUB,optional); 7300 if (*proto == '\\' && proto[1] == '[') { 7301 const char *p = proto + 2; 7302 while(*p && *p != ']') 7303 ++p; 7304 if(*p == ']' && !p[1]) 7305 UNIPROTO(UNIOPSUB,optional); 7306 } 7307 if (*proto == '&' && *s == '{') { 7308 if (PL_curstash) 7309 sv_setpvs(PL_subname, "__ANON__"); 7310 else 7311 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 7312 if (!PL_lex_allbrackets && 7313 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7314 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7315 PREBLOCK(LSTOPSUB); 7316 } 7317 } 7318 #ifdef PERL_MAD 7319 { 7320 if (PL_madskills) { 7321 PL_nextwhite = PL_thiswhite; 7322 PL_thiswhite = 0; 7323 } 7324 start_force(PL_curforce); 7325 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 7326 PL_expect = XTERM; 7327 if (PL_madskills) { 7328 PL_nextwhite = nextPL_nextwhite; 7329 curmad('X', PL_thistoken); 7330 PL_thistoken = newSVpvs(""); 7331 } 7332 force_next(off ? PRIVATEREF : WORD); 7333 if (!PL_lex_allbrackets && 7334 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7335 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7336 TOKEN(NOAMP); 7337 } 7338 } 7339 7340 /* Guess harder when madskills require "best effort". */ 7341 if (PL_madskills && (!gv || !GvCVu(gv))) { 7342 int probable_sub = 0; 7343 if (strchr("\"'`$@%0123456789!*+{[<", *s)) 7344 probable_sub = 1; 7345 else if (isALPHA(*s)) { 7346 char tmpbuf[1024]; 7347 STRLEN tmplen; 7348 d = s; 7349 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen); 7350 if (!keyword(tmpbuf, tmplen, 0)) 7351 probable_sub = 1; 7352 else { 7353 while (d < PL_bufend && isSPACE(*d)) 7354 d++; 7355 if (*d == '=' && d[1] == '>') 7356 probable_sub = 1; 7357 } 7358 } 7359 if (probable_sub) { 7360 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), 7361 SVt_PVCV); 7362 op_free(pl_yylval.opval); 7363 pl_yylval.opval = 7364 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op; 7365 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; 7366 PL_last_lop = PL_oldbufptr; 7367 PL_last_lop_op = OP_ENTERSUB; 7368 PL_nextwhite = PL_thiswhite; 7369 PL_thiswhite = 0; 7370 start_force(PL_curforce); 7371 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 7372 PL_expect = XTERM; 7373 PL_nextwhite = nextPL_nextwhite; 7374 curmad('X', PL_thistoken); 7375 PL_thistoken = newSVpvs(""); 7376 force_next(off ? PRIVATEREF : WORD); 7377 if (!PL_lex_allbrackets && 7378 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7379 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7380 TOKEN(NOAMP); 7381 } 7382 #else 7383 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 7384 PL_expect = XTERM; 7385 force_next(off ? PRIVATEREF : WORD); 7386 if (!PL_lex_allbrackets && 7387 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7388 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7389 TOKEN(NOAMP); 7390 #endif 7391 } 7392 7393 /* Call it a bare word */ 7394 7395 if (PL_hints & HINT_STRICT_SUBS) 7396 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7397 else { 7398 bareword: 7399 /* after "print" and similar functions (corresponding to 7400 * "F? L" in opcode.pl), whatever wasn't already parsed as 7401 * a filehandle should be subject to "strict subs". 7402 * Likewise for the optional indirect-object argument to system 7403 * or exec, which can't be a bareword */ 7404 if ((PL_last_lop_op == OP_PRINT 7405 || PL_last_lop_op == OP_PRTF 7406 || PL_last_lop_op == OP_SAY 7407 || PL_last_lop_op == OP_SYSTEM 7408 || PL_last_lop_op == OP_EXEC) 7409 && (PL_hints & HINT_STRICT_SUBS)) 7410 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7411 if (lastchar != '-') { 7412 if (ckWARN(WARN_RESERVED)) { 7413 d = PL_tokenbuf; 7414 while (isLOWER(*d)) 7415 d++; 7416 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) 7417 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved, 7418 PL_tokenbuf); 7419 } 7420 } 7421 } 7422 op_free(rv2cv_op); 7423 7424 safe_bareword: 7425 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) { 7426 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7427 "Operator or semicolon missing before %c%"SVf, 7428 lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf, 7429 strlen(PL_tokenbuf), 7430 SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); 7431 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7432 "Ambiguous use of %c resolved as operator %c", 7433 lastchar, lastchar); 7434 } 7435 TOKEN(WORD); 7436 } 7437 7438 case KEY___FILE__: 7439 FUN0OP( 7440 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) 7441 ); 7442 7443 case KEY___LINE__: 7444 FUN0OP( 7445 (OP*)newSVOP(OP_CONST, 0, 7446 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop))) 7447 ); 7448 7449 case KEY___PACKAGE__: 7450 FUN0OP( 7451 (OP*)newSVOP(OP_CONST, 0, 7452 (PL_curstash 7453 ? newSVhek(HvNAME_HEK(PL_curstash)) 7454 : &PL_sv_undef)) 7455 ); 7456 7457 case KEY___DATA__: 7458 case KEY___END__: { 7459 GV *gv; 7460 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) { 7461 const char *pname = "main"; 7462 STRLEN plen = 4; 7463 U32 putf8 = 0; 7464 if (PL_tokenbuf[2] == 'D') 7465 { 7466 HV * const stash = 7467 PL_curstash ? PL_curstash : PL_defstash; 7468 pname = HvNAME_get(stash); 7469 plen = HvNAMELEN (stash); 7470 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8; 7471 } 7472 gv = gv_fetchpvn_flags( 7473 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname), 7474 plen+6, GV_ADD|putf8, SVt_PVIO 7475 ); 7476 GvMULTI_on(gv); 7477 if (!GvIO(gv)) 7478 GvIOp(gv) = newIO(); 7479 IoIFP(GvIOp(gv)) = PL_rsfp; 7480 #if defined(HAS_FCNTL) && defined(F_SETFD) 7481 { 7482 const int fd = PerlIO_fileno(PL_rsfp); 7483 fcntl(fd,F_SETFD,fd >= 3); 7484 } 7485 #endif 7486 /* Mark this internal pseudo-handle as clean */ 7487 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT; 7488 if ((PerlIO*)PL_rsfp == PerlIO_stdin()) 7489 IoTYPE(GvIOp(gv)) = IoTYPE_STD; 7490 else 7491 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY; 7492 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS) 7493 /* if the script was opened in binmode, we need to revert 7494 * it to text mode for compatibility; but only iff it has CRs 7495 * XXX this is a questionable hack at best. */ 7496 if (PL_bufend-PL_bufptr > 2 7497 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r') 7498 { 7499 Off_t loc = 0; 7500 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) { 7501 loc = PerlIO_tell(PL_rsfp); 7502 (void)PerlIO_seek(PL_rsfp, 0L, 0); 7503 } 7504 #ifdef NETWARE 7505 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) { 7506 #else 7507 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) { 7508 #endif /* NETWARE */ 7509 if (loc > 0) 7510 PerlIO_seek(PL_rsfp, loc, 0); 7511 } 7512 } 7513 #endif 7514 #ifdef PERLIO_LAYERS 7515 if (!IN_BYTES) { 7516 if (UTF) 7517 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); 7518 else if (PL_encoding) { 7519 SV *name; 7520 dSP; 7521 ENTER; 7522 SAVETMPS; 7523 PUSHMARK(sp); 7524 EXTEND(SP, 1); 7525 XPUSHs(PL_encoding); 7526 PUTBACK; 7527 call_method("name", G_SCALAR); 7528 SPAGAIN; 7529 name = POPs; 7530 PUTBACK; 7531 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, 7532 Perl_form(aTHX_ ":encoding(%"SVf")", 7533 SVfARG(name))); 7534 FREETMPS; 7535 LEAVE; 7536 } 7537 } 7538 #endif 7539 #ifdef PERL_MAD 7540 if (PL_madskills) { 7541 if (PL_realtokenstart >= 0) { 7542 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart; 7543 if (!PL_endwhite) 7544 PL_endwhite = newSVpvs(""); 7545 sv_catsv(PL_endwhite, PL_thiswhite); 7546 PL_thiswhite = 0; 7547 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart); 7548 PL_realtokenstart = -1; 7549 } 7550 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite))) 7551 != NULL) ; 7552 } 7553 #endif 7554 PL_rsfp = NULL; 7555 } 7556 goto fake_eof; 7557 } 7558 7559 case KEY___SUB__: 7560 FUN0OP(newPVOP(OP_RUNCV,0,NULL)); 7561 7562 case KEY_AUTOLOAD: 7563 case KEY_DESTROY: 7564 case KEY_BEGIN: 7565 case KEY_UNITCHECK: 7566 case KEY_CHECK: 7567 case KEY_INIT: 7568 case KEY_END: 7569 if (PL_expect == XSTATE) { 7570 s = PL_bufptr; 7571 goto really_sub; 7572 } 7573 goto just_a_word; 7574 7575 case KEY_CORE: 7576 if (*s == ':' && s[1] == ':') { 7577 STRLEN olen = len; 7578 d = s; 7579 s += 2; 7580 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 7581 if ((*s == ':' && s[1] == ':') 7582 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\'')) 7583 { 7584 s = d; 7585 len = olen; 7586 Copy(PL_bufptr, PL_tokenbuf, olen, char); 7587 goto just_a_word; 7588 } 7589 if (!tmp) 7590 Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword", 7591 SVfARG(newSVpvn_flags(PL_tokenbuf, len, 7592 (UTF ? SVf_UTF8 : 0) | SVs_TEMP))); 7593 if (tmp < 0) 7594 tmp = -tmp; 7595 else if (tmp == KEY_require || tmp == KEY_do 7596 || tmp == KEY_glob) 7597 /* that's a way to remember we saw "CORE::" */ 7598 orig_keyword = tmp; 7599 goto reserved_word; 7600 } 7601 goto just_a_word; 7602 7603 case KEY_abs: 7604 UNI(OP_ABS); 7605 7606 case KEY_alarm: 7607 UNI(OP_ALARM); 7608 7609 case KEY_accept: 7610 LOP(OP_ACCEPT,XTERM); 7611 7612 case KEY_and: 7613 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 7614 return REPORT(0); 7615 OPERATOR(ANDOP); 7616 7617 case KEY_atan2: 7618 LOP(OP_ATAN2,XTERM); 7619 7620 case KEY_bind: 7621 LOP(OP_BIND,XTERM); 7622 7623 case KEY_binmode: 7624 LOP(OP_BINMODE,XTERM); 7625 7626 case KEY_bless: 7627 LOP(OP_BLESS,XTERM); 7628 7629 case KEY_break: 7630 FUN0(OP_BREAK); 7631 7632 case KEY_chop: 7633 UNI(OP_CHOP); 7634 7635 case KEY_continue: 7636 /* We have to disambiguate the two senses of 7637 "continue". If the next token is a '{' then 7638 treat it as the start of a continue block; 7639 otherwise treat it as a control operator. 7640 */ 7641 s = skipspace(s); 7642 if (*s == '{') 7643 PREBLOCK(CONTINUE); 7644 else 7645 FUN0(OP_CONTINUE); 7646 7647 case KEY_chdir: 7648 /* may use HOME */ 7649 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV); 7650 UNI(OP_CHDIR); 7651 7652 case KEY_close: 7653 UNI(OP_CLOSE); 7654 7655 case KEY_closedir: 7656 UNI(OP_CLOSEDIR); 7657 7658 case KEY_cmp: 7659 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7660 return REPORT(0); 7661 Eop(OP_SCMP); 7662 7663 case KEY_caller: 7664 UNI(OP_CALLER); 7665 7666 case KEY_crypt: 7667 #ifdef FCRYPT 7668 if (!PL_cryptseen) { 7669 PL_cryptseen = TRUE; 7670 init_des(); 7671 } 7672 #endif 7673 LOP(OP_CRYPT,XTERM); 7674 7675 case KEY_chmod: 7676 LOP(OP_CHMOD,XTERM); 7677 7678 case KEY_chown: 7679 LOP(OP_CHOWN,XTERM); 7680 7681 case KEY_connect: 7682 LOP(OP_CONNECT,XTERM); 7683 7684 case KEY_chr: 7685 UNI(OP_CHR); 7686 7687 case KEY_cos: 7688 UNI(OP_COS); 7689 7690 case KEY_chroot: 7691 UNI(OP_CHROOT); 7692 7693 case KEY_default: 7694 PREBLOCK(DEFAULT); 7695 7696 case KEY_do: 7697 s = SKIPSPACE1(s); 7698 if (*s == '{') 7699 PRETERMBLOCK(DO); 7700 if (*s != '\'') { 7701 *PL_tokenbuf = '&'; 7702 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 7703 1, &len); 7704 if (len && !keyword(PL_tokenbuf + 1, len, 0)) { 7705 d = SKIPSPACE1(d); 7706 if (*d == '(') { 7707 force_ident_maybe_lex('&'); 7708 s = d; 7709 } 7710 } 7711 } 7712 if (orig_keyword == KEY_do) { 7713 orig_keyword = 0; 7714 pl_yylval.ival = 1; 7715 } 7716 else 7717 pl_yylval.ival = 0; 7718 OPERATOR(DO); 7719 7720 case KEY_die: 7721 PL_hints |= HINT_BLOCK_SCOPE; 7722 LOP(OP_DIE,XTERM); 7723 7724 case KEY_defined: 7725 UNI(OP_DEFINED); 7726 7727 case KEY_delete: 7728 UNI(OP_DELETE); 7729 7730 case KEY_dbmopen: 7731 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"), 7732 STR_WITH_LEN("NDBM_File::"), 7733 STR_WITH_LEN("DB_File::"), 7734 STR_WITH_LEN("GDBM_File::"), 7735 STR_WITH_LEN("SDBM_File::"), 7736 STR_WITH_LEN("ODBM_File::"), 7737 NULL); 7738 LOP(OP_DBMOPEN,XTERM); 7739 7740 case KEY_dbmclose: 7741 UNI(OP_DBMCLOSE); 7742 7743 case KEY_dump: 7744 PL_expect = XOPERATOR; 7745 s = force_word(s,WORD,TRUE,FALSE,FALSE); 7746 LOOPX(OP_DUMP); 7747 7748 case KEY_else: 7749 PREBLOCK(ELSE); 7750 7751 case KEY_elsif: 7752 pl_yylval.ival = CopLINE(PL_curcop); 7753 OPERATOR(ELSIF); 7754 7755 case KEY_eq: 7756 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7757 return REPORT(0); 7758 Eop(OP_SEQ); 7759 7760 case KEY_exists: 7761 UNI(OP_EXISTS); 7762 7763 case KEY_exit: 7764 if (PL_madskills) 7765 UNI(OP_INT); 7766 UNI(OP_EXIT); 7767 7768 case KEY_eval: 7769 s = SKIPSPACE1(s); 7770 if (*s == '{') { /* block eval */ 7771 PL_expect = XTERMBLOCK; 7772 UNIBRACK(OP_ENTERTRY); 7773 } 7774 else { /* string eval */ 7775 PL_expect = XTERM; 7776 UNIBRACK(OP_ENTEREVAL); 7777 } 7778 7779 case KEY_evalbytes: 7780 PL_expect = XTERM; 7781 UNIBRACK(-OP_ENTEREVAL); 7782 7783 case KEY_eof: 7784 UNI(OP_EOF); 7785 7786 case KEY_exp: 7787 UNI(OP_EXP); 7788 7789 case KEY_each: 7790 UNI(OP_EACH); 7791 7792 case KEY_exec: 7793 LOP(OP_EXEC,XREF); 7794 7795 case KEY_endhostent: 7796 FUN0(OP_EHOSTENT); 7797 7798 case KEY_endnetent: 7799 FUN0(OP_ENETENT); 7800 7801 case KEY_endservent: 7802 FUN0(OP_ESERVENT); 7803 7804 case KEY_endprotoent: 7805 FUN0(OP_EPROTOENT); 7806 7807 case KEY_endpwent: 7808 FUN0(OP_EPWENT); 7809 7810 case KEY_endgrent: 7811 FUN0(OP_EGRENT); 7812 7813 case KEY_for: 7814 case KEY_foreach: 7815 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 7816 return REPORT(0); 7817 pl_yylval.ival = CopLINE(PL_curcop); 7818 s = SKIPSPACE1(s); 7819 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) { 7820 char *p = s; 7821 #ifdef PERL_MAD 7822 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */ 7823 #endif 7824 7825 if ((PL_bufend - p) >= 3 && 7826 strnEQ(p, "my", 2) && isSPACE(*(p + 2))) 7827 p += 2; 7828 else if ((PL_bufend - p) >= 4 && 7829 strnEQ(p, "our", 3) && isSPACE(*(p + 3))) 7830 p += 3; 7831 p = PEEKSPACE(p); 7832 if (isIDFIRST_lazy_if(p,UTF)) { 7833 p = scan_ident(p, PL_bufend, 7834 PL_tokenbuf, sizeof PL_tokenbuf, TRUE); 7835 p = PEEKSPACE(p); 7836 } 7837 if (*p != '$') 7838 Perl_croak(aTHX_ "Missing $ on loop variable"); 7839 #ifdef PERL_MAD 7840 s = SvPVX(PL_linestr) + soff; 7841 #endif 7842 } 7843 OPERATOR(FOR); 7844 7845 case KEY_formline: 7846 LOP(OP_FORMLINE,XTERM); 7847 7848 case KEY_fork: 7849 FUN0(OP_FORK); 7850 7851 case KEY_fc: 7852 UNI(OP_FC); 7853 7854 case KEY_fcntl: 7855 LOP(OP_FCNTL,XTERM); 7856 7857 case KEY_fileno: 7858 UNI(OP_FILENO); 7859 7860 case KEY_flock: 7861 LOP(OP_FLOCK,XTERM); 7862 7863 case KEY_gt: 7864 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7865 return REPORT(0); 7866 Rop(OP_SGT); 7867 7868 case KEY_ge: 7869 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7870 return REPORT(0); 7871 Rop(OP_SGE); 7872 7873 case KEY_grep: 7874 LOP(OP_GREPSTART, XREF); 7875 7876 case KEY_goto: 7877 PL_expect = XOPERATOR; 7878 s = force_word(s,WORD,TRUE,FALSE,FALSE); 7879 LOOPX(OP_GOTO); 7880 7881 case KEY_gmtime: 7882 UNI(OP_GMTIME); 7883 7884 case KEY_getc: 7885 UNIDOR(OP_GETC); 7886 7887 case KEY_getppid: 7888 FUN0(OP_GETPPID); 7889 7890 case KEY_getpgrp: 7891 UNI(OP_GETPGRP); 7892 7893 case KEY_getpriority: 7894 LOP(OP_GETPRIORITY,XTERM); 7895 7896 case KEY_getprotobyname: 7897 UNI(OP_GPBYNAME); 7898 7899 case KEY_getprotobynumber: 7900 LOP(OP_GPBYNUMBER,XTERM); 7901 7902 case KEY_getprotoent: 7903 FUN0(OP_GPROTOENT); 7904 7905 case KEY_getpwent: 7906 FUN0(OP_GPWENT); 7907 7908 case KEY_getpwnam: 7909 UNI(OP_GPWNAM); 7910 7911 case KEY_getpwuid: 7912 UNI(OP_GPWUID); 7913 7914 case KEY_getpeername: 7915 UNI(OP_GETPEERNAME); 7916 7917 case KEY_gethostbyname: 7918 UNI(OP_GHBYNAME); 7919 7920 case KEY_gethostbyaddr: 7921 LOP(OP_GHBYADDR,XTERM); 7922 7923 case KEY_gethostent: 7924 FUN0(OP_GHOSTENT); 7925 7926 case KEY_getnetbyname: 7927 UNI(OP_GNBYNAME); 7928 7929 case KEY_getnetbyaddr: 7930 LOP(OP_GNBYADDR,XTERM); 7931 7932 case KEY_getnetent: 7933 FUN0(OP_GNETENT); 7934 7935 case KEY_getservbyname: 7936 LOP(OP_GSBYNAME,XTERM); 7937 7938 case KEY_getservbyport: 7939 LOP(OP_GSBYPORT,XTERM); 7940 7941 case KEY_getservent: 7942 FUN0(OP_GSERVENT); 7943 7944 case KEY_getsockname: 7945 UNI(OP_GETSOCKNAME); 7946 7947 case KEY_getsockopt: 7948 LOP(OP_GSOCKOPT,XTERM); 7949 7950 case KEY_getgrent: 7951 FUN0(OP_GGRENT); 7952 7953 case KEY_getgrnam: 7954 UNI(OP_GGRNAM); 7955 7956 case KEY_getgrgid: 7957 UNI(OP_GGRGID); 7958 7959 case KEY_getlogin: 7960 FUN0(OP_GETLOGIN); 7961 7962 case KEY_given: 7963 pl_yylval.ival = CopLINE(PL_curcop); 7964 Perl_ck_warner_d(aTHX_ 7965 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 7966 "given is experimental"); 7967 OPERATOR(GIVEN); 7968 7969 case KEY_glob: 7970 LOP( 7971 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB, 7972 XTERM 7973 ); 7974 7975 case KEY_hex: 7976 UNI(OP_HEX); 7977 7978 case KEY_if: 7979 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 7980 return REPORT(0); 7981 pl_yylval.ival = CopLINE(PL_curcop); 7982 OPERATOR(IF); 7983 7984 case KEY_index: 7985 LOP(OP_INDEX,XTERM); 7986 7987 case KEY_int: 7988 UNI(OP_INT); 7989 7990 case KEY_ioctl: 7991 LOP(OP_IOCTL,XTERM); 7992 7993 case KEY_join: 7994 LOP(OP_JOIN,XTERM); 7995 7996 case KEY_keys: 7997 UNI(OP_KEYS); 7998 7999 case KEY_kill: 8000 LOP(OP_KILL,XTERM); 8001 8002 case KEY_last: 8003 PL_expect = XOPERATOR; 8004 s = force_word(s,WORD,TRUE,FALSE,FALSE); 8005 LOOPX(OP_LAST); 8006 8007 case KEY_lc: 8008 UNI(OP_LC); 8009 8010 case KEY_lcfirst: 8011 UNI(OP_LCFIRST); 8012 8013 case KEY_local: 8014 pl_yylval.ival = 0; 8015 OPERATOR(LOCAL); 8016 8017 case KEY_length: 8018 UNI(OP_LENGTH); 8019 8020 case KEY_lt: 8021 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8022 return REPORT(0); 8023 Rop(OP_SLT); 8024 8025 case KEY_le: 8026 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8027 return REPORT(0); 8028 Rop(OP_SLE); 8029 8030 case KEY_localtime: 8031 UNI(OP_LOCALTIME); 8032 8033 case KEY_log: 8034 UNI(OP_LOG); 8035 8036 case KEY_link: 8037 LOP(OP_LINK,XTERM); 8038 8039 case KEY_listen: 8040 LOP(OP_LISTEN,XTERM); 8041 8042 case KEY_lock: 8043 UNI(OP_LOCK); 8044 8045 case KEY_lstat: 8046 UNI(OP_LSTAT); 8047 8048 case KEY_m: 8049 s = scan_pat(s,OP_MATCH); 8050 TERM(sublex_start()); 8051 8052 case KEY_map: 8053 LOP(OP_MAPSTART, XREF); 8054 8055 case KEY_mkdir: 8056 LOP(OP_MKDIR,XTERM); 8057 8058 case KEY_msgctl: 8059 LOP(OP_MSGCTL,XTERM); 8060 8061 case KEY_msgget: 8062 LOP(OP_MSGGET,XTERM); 8063 8064 case KEY_msgrcv: 8065 LOP(OP_MSGRCV,XTERM); 8066 8067 case KEY_msgsnd: 8068 LOP(OP_MSGSND,XTERM); 8069 8070 case KEY_our: 8071 case KEY_my: 8072 case KEY_state: 8073 PL_in_my = (U16)tmp; 8074 s = SKIPSPACE1(s); 8075 if (isIDFIRST_lazy_if(s,UTF)) { 8076 #ifdef PERL_MAD 8077 char* start = s; 8078 #endif 8079 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); 8080 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3)) 8081 { 8082 if (!FEATURE_LEXSUBS_IS_ENABLED) 8083 Perl_croak(aTHX_ 8084 "Experimental \"%s\" subs not enabled", 8085 tmp == KEY_my ? "my" : 8086 tmp == KEY_state ? "state" : "our"); 8087 Perl_ck_warner_d(aTHX_ 8088 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS), 8089 "The lexical_subs feature is experimental"); 8090 goto really_sub; 8091 } 8092 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len); 8093 if (!PL_in_my_stash) { 8094 char tmpbuf[1024]; 8095 PL_bufptr = s; 8096 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf); 8097 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0); 8098 } 8099 #ifdef PERL_MAD 8100 if (PL_madskills) { /* just add type to declarator token */ 8101 sv_catsv(PL_thistoken, PL_nextwhite); 8102 PL_nextwhite = 0; 8103 sv_catpvn(PL_thistoken, start, s - start); 8104 } 8105 #endif 8106 } 8107 pl_yylval.ival = 1; 8108 OPERATOR(MY); 8109 8110 case KEY_next: 8111 PL_expect = XOPERATOR; 8112 s = force_word(s,WORD,TRUE,FALSE,FALSE); 8113 LOOPX(OP_NEXT); 8114 8115 case KEY_ne: 8116 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8117 return REPORT(0); 8118 Eop(OP_SNE); 8119 8120 case KEY_no: 8121 s = tokenize_use(0, s); 8122 TERM(USE); 8123 8124 case KEY_not: 8125 if (*s == '(' || (s = SKIPSPACE1(s), *s == '(')) 8126 FUN1(OP_NOT); 8127 else { 8128 if (!PL_lex_allbrackets && 8129 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 8130 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 8131 OPERATOR(NOTOP); 8132 } 8133 8134 case KEY_open: 8135 s = SKIPSPACE1(s); 8136 if (isIDFIRST_lazy_if(s,UTF)) { 8137 const char *t; 8138 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, 8139 &len); 8140 for (t=d; isSPACE(*t);) 8141 t++; 8142 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) 8143 /* [perl #16184] */ 8144 && !(t[0] == '=' && t[1] == '>') 8145 && !(t[0] == ':' && t[1] == ':') 8146 && !keyword(s, d-s, 0) 8147 ) { 8148 SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s), 8149 SVs_TEMP | (UTF ? SVf_UTF8 : 0)); 8150 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), 8151 "Precedence problem: open %"SVf" should be open(%"SVf")", 8152 SVfARG(tmpsv), SVfARG(tmpsv)); 8153 } 8154 } 8155 LOP(OP_OPEN,XTERM); 8156 8157 case KEY_or: 8158 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 8159 return REPORT(0); 8160 pl_yylval.ival = OP_OR; 8161 OPERATOR(OROP); 8162 8163 case KEY_ord: 8164 UNI(OP_ORD); 8165 8166 case KEY_oct: 8167 UNI(OP_OCT); 8168 8169 case KEY_opendir: 8170 LOP(OP_OPEN_DIR,XTERM); 8171 8172 case KEY_print: 8173 checkcomma(s,PL_tokenbuf,"filehandle"); 8174 LOP(OP_PRINT,XREF); 8175 8176 case KEY_printf: 8177 checkcomma(s,PL_tokenbuf,"filehandle"); 8178 LOP(OP_PRTF,XREF); 8179 8180 case KEY_prototype: 8181 UNI(OP_PROTOTYPE); 8182 8183 case KEY_push: 8184 LOP(OP_PUSH,XTERM); 8185 8186 case KEY_pop: 8187 UNIDOR(OP_POP); 8188 8189 case KEY_pos: 8190 UNIDOR(OP_POS); 8191 8192 case KEY_pack: 8193 LOP(OP_PACK,XTERM); 8194 8195 case KEY_package: 8196 s = force_word(s,WORD,FALSE,TRUE,FALSE); 8197 s = SKIPSPACE1(s); 8198 s = force_strict_version(s); 8199 PL_lex_expect = XBLOCK; 8200 OPERATOR(PACKAGE); 8201 8202 case KEY_pipe: 8203 LOP(OP_PIPE_OP,XTERM); 8204 8205 case KEY_q: 8206 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); 8207 if (!s) 8208 missingterm(NULL); 8209 pl_yylval.ival = OP_CONST; 8210 TERM(sublex_start()); 8211 8212 case KEY_quotemeta: 8213 UNI(OP_QUOTEMETA); 8214 8215 case KEY_qw: { 8216 OP *words = NULL; 8217 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); 8218 if (!s) 8219 missingterm(NULL); 8220 PL_expect = XOPERATOR; 8221 if (SvCUR(PL_lex_stuff)) { 8222 int warned_comma = !ckWARN(WARN_QW); 8223 int warned_comment = warned_comma; 8224 d = SvPV_force(PL_lex_stuff, len); 8225 while (len) { 8226 for (; isSPACE(*d) && len; --len, ++d) 8227 /**/; 8228 if (len) { 8229 SV *sv; 8230 const char *b = d; 8231 if (!warned_comma || !warned_comment) { 8232 for (; !isSPACE(*d) && len; --len, ++d) { 8233 if (!warned_comma && *d == ',') { 8234 Perl_warner(aTHX_ packWARN(WARN_QW), 8235 "Possible attempt to separate words with commas"); 8236 ++warned_comma; 8237 } 8238 else if (!warned_comment && *d == '#') { 8239 Perl_warner(aTHX_ packWARN(WARN_QW), 8240 "Possible attempt to put comments in qw() list"); 8241 ++warned_comment; 8242 } 8243 } 8244 } 8245 else { 8246 for (; !isSPACE(*d) && len; --len, ++d) 8247 /**/; 8248 } 8249 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff)); 8250 words = op_append_elem(OP_LIST, words, 8251 newSVOP(OP_CONST, 0, tokeq(sv))); 8252 } 8253 } 8254 } 8255 if (!words) 8256 words = newNULLLIST(); 8257 if (PL_lex_stuff) { 8258 SvREFCNT_dec(PL_lex_stuff); 8259 PL_lex_stuff = NULL; 8260 } 8261 PL_expect = XOPERATOR; 8262 pl_yylval.opval = sawparens(words); 8263 TOKEN(QWLIST); 8264 } 8265 8266 case KEY_qq: 8267 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); 8268 if (!s) 8269 missingterm(NULL); 8270 pl_yylval.ival = OP_STRINGIFY; 8271 if (SvIVX(PL_lex_stuff) == '\'') 8272 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */ 8273 TERM(sublex_start()); 8274 8275 case KEY_qr: 8276 s = scan_pat(s,OP_QR); 8277 TERM(sublex_start()); 8278 8279 case KEY_qx: 8280 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); 8281 if (!s) 8282 missingterm(NULL); 8283 readpipe_override(); 8284 TERM(sublex_start()); 8285 8286 case KEY_return: 8287 OLDLOP(OP_RETURN); 8288 8289 case KEY_require: 8290 s = SKIPSPACE1(s); 8291 PL_expect = XOPERATOR; 8292 if (isDIGIT(*s)) { 8293 s = force_version(s, FALSE); 8294 } 8295 else if (*s != 'v' || !isDIGIT(s[1]) 8296 || (s = force_version(s, TRUE), *s == 'v')) 8297 { 8298 *PL_tokenbuf = '\0'; 8299 s = force_word(s,WORD,TRUE,TRUE,FALSE); 8300 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF)) 8301 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), 8302 GV_ADD | (UTF ? SVf_UTF8 : 0)); 8303 else if (*s == '<') 8304 yyerror("<> should be quotes"); 8305 } 8306 if (orig_keyword == KEY_require) { 8307 orig_keyword = 0; 8308 pl_yylval.ival = 1; 8309 } 8310 else 8311 pl_yylval.ival = 0; 8312 PL_expect = XTERM; 8313 PL_bufptr = s; 8314 PL_last_uni = PL_oldbufptr; 8315 PL_last_lop_op = OP_REQUIRE; 8316 s = skipspace(s); 8317 return REPORT( (int)REQUIRE ); 8318 8319 case KEY_reset: 8320 UNI(OP_RESET); 8321 8322 case KEY_redo: 8323 PL_expect = XOPERATOR; 8324 s = force_word(s,WORD,TRUE,FALSE,FALSE); 8325 LOOPX(OP_REDO); 8326 8327 case KEY_rename: 8328 LOP(OP_RENAME,XTERM); 8329 8330 case KEY_rand: 8331 UNI(OP_RAND); 8332 8333 case KEY_rmdir: 8334 UNI(OP_RMDIR); 8335 8336 case KEY_rindex: 8337 LOP(OP_RINDEX,XTERM); 8338 8339 case KEY_read: 8340 LOP(OP_READ,XTERM); 8341 8342 case KEY_readdir: 8343 UNI(OP_READDIR); 8344 8345 case KEY_readline: 8346 UNIDOR(OP_READLINE); 8347 8348 case KEY_readpipe: 8349 UNIDOR(OP_BACKTICK); 8350 8351 case KEY_rewinddir: 8352 UNI(OP_REWINDDIR); 8353 8354 case KEY_recv: 8355 LOP(OP_RECV,XTERM); 8356 8357 case KEY_reverse: 8358 LOP(OP_REVERSE,XTERM); 8359 8360 case KEY_readlink: 8361 UNIDOR(OP_READLINK); 8362 8363 case KEY_ref: 8364 UNI(OP_REF); 8365 8366 case KEY_s: 8367 s = scan_subst(s); 8368 if (pl_yylval.opval) 8369 TERM(sublex_start()); 8370 else 8371 TOKEN(1); /* force error */ 8372 8373 case KEY_say: 8374 checkcomma(s,PL_tokenbuf,"filehandle"); 8375 LOP(OP_SAY,XREF); 8376 8377 case KEY_chomp: 8378 UNI(OP_CHOMP); 8379 8380 case KEY_scalar: 8381 UNI(OP_SCALAR); 8382 8383 case KEY_select: 8384 LOP(OP_SELECT,XTERM); 8385 8386 case KEY_seek: 8387 LOP(OP_SEEK,XTERM); 8388 8389 case KEY_semctl: 8390 LOP(OP_SEMCTL,XTERM); 8391 8392 case KEY_semget: 8393 LOP(OP_SEMGET,XTERM); 8394 8395 case KEY_semop: 8396 LOP(OP_SEMOP,XTERM); 8397 8398 case KEY_send: 8399 LOP(OP_SEND,XTERM); 8400 8401 case KEY_setpgrp: 8402 LOP(OP_SETPGRP,XTERM); 8403 8404 case KEY_setpriority: 8405 LOP(OP_SETPRIORITY,XTERM); 8406 8407 case KEY_sethostent: 8408 UNI(OP_SHOSTENT); 8409 8410 case KEY_setnetent: 8411 UNI(OP_SNETENT); 8412 8413 case KEY_setservent: 8414 UNI(OP_SSERVENT); 8415 8416 case KEY_setprotoent: 8417 UNI(OP_SPROTOENT); 8418 8419 case KEY_setpwent: 8420 FUN0(OP_SPWENT); 8421 8422 case KEY_setgrent: 8423 FUN0(OP_SGRENT); 8424 8425 case KEY_seekdir: 8426 LOP(OP_SEEKDIR,XTERM); 8427 8428 case KEY_setsockopt: 8429 LOP(OP_SSOCKOPT,XTERM); 8430 8431 case KEY_shift: 8432 UNIDOR(OP_SHIFT); 8433 8434 case KEY_shmctl: 8435 LOP(OP_SHMCTL,XTERM); 8436 8437 case KEY_shmget: 8438 LOP(OP_SHMGET,XTERM); 8439 8440 case KEY_shmread: 8441 LOP(OP_SHMREAD,XTERM); 8442 8443 case KEY_shmwrite: 8444 LOP(OP_SHMWRITE,XTERM); 8445 8446 case KEY_shutdown: 8447 LOP(OP_SHUTDOWN,XTERM); 8448 8449 case KEY_sin: 8450 UNI(OP_SIN); 8451 8452 case KEY_sleep: 8453 UNI(OP_SLEEP); 8454 8455 case KEY_socket: 8456 LOP(OP_SOCKET,XTERM); 8457 8458 case KEY_socketpair: 8459 LOP(OP_SOCKPAIR,XTERM); 8460 8461 case KEY_sort: 8462 checkcomma(s,PL_tokenbuf,"subroutine name"); 8463 s = SKIPSPACE1(s); 8464 PL_expect = XTERM; 8465 s = force_word(s,WORD,TRUE,TRUE,FALSE); 8466 LOP(OP_SORT,XREF); 8467 8468 case KEY_split: 8469 LOP(OP_SPLIT,XTERM); 8470 8471 case KEY_sprintf: 8472 LOP(OP_SPRINTF,XTERM); 8473 8474 case KEY_splice: 8475 LOP(OP_SPLICE,XTERM); 8476 8477 case KEY_sqrt: 8478 UNI(OP_SQRT); 8479 8480 case KEY_srand: 8481 UNI(OP_SRAND); 8482 8483 case KEY_stat: 8484 UNI(OP_STAT); 8485 8486 case KEY_study: 8487 UNI(OP_STUDY); 8488 8489 case KEY_substr: 8490 LOP(OP_SUBSTR,XTERM); 8491 8492 case KEY_format: 8493 case KEY_sub: 8494 really_sub: 8495 { 8496 char * const tmpbuf = PL_tokenbuf + 1; 8497 SSize_t tboffset = 0; 8498 expectation attrful; 8499 bool have_name, have_proto; 8500 const int key = tmp; 8501 8502 #ifdef PERL_MAD 8503 SV *tmpwhite = 0; 8504 8505 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart; 8506 SV *subtoken = PL_madskills 8507 ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr)) 8508 : NULL; 8509 PL_thistoken = 0; 8510 8511 d = s; 8512 s = SKIPSPACE2(s,tmpwhite); 8513 #else 8514 d = s; 8515 s = skipspace(s); 8516 #endif 8517 8518 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' || 8519 (*s == ':' && s[1] == ':')) 8520 { 8521 #ifdef PERL_MAD 8522 SV *nametoke = NULL; 8523 #endif 8524 8525 PL_expect = XBLOCK; 8526 attrful = XATTRBLOCK; 8527 /* remember buffer pos'n for later force_word */ 8528 tboffset = s - PL_oldbufptr; 8529 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE, 8530 &len); 8531 #ifdef PERL_MAD 8532 if (PL_madskills) 8533 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr)); 8534 #endif 8535 *PL_tokenbuf = '&'; 8536 if (memchr(tmpbuf, ':', len) || key != KEY_sub 8537 || pad_findmy_pvn( 8538 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0 8539 ) != NOT_IN_PAD) 8540 sv_setpvn(PL_subname, tmpbuf, len); 8541 else { 8542 sv_setsv(PL_subname,PL_curstname); 8543 sv_catpvs(PL_subname,"::"); 8544 sv_catpvn(PL_subname,tmpbuf,len); 8545 } 8546 if (SvUTF8(PL_linestr)) 8547 SvUTF8_on(PL_subname); 8548 have_name = TRUE; 8549 8550 8551 #ifdef PERL_MAD 8552 start_force(0); 8553 CURMAD('X', nametoke); 8554 CURMAD('_', tmpwhite); 8555 force_ident_maybe_lex('&'); 8556 8557 s = SKIPSPACE2(d,tmpwhite); 8558 #else 8559 s = skipspace(d); 8560 #endif 8561 } 8562 else { 8563 if (key == KEY_my || key == KEY_our || key==KEY_state) 8564 { 8565 *d = '\0'; 8566 /* diag_listed_as: Missing name in "%s sub" */ 8567 Perl_croak(aTHX_ 8568 "Missing name in \"%s\"", PL_bufptr); 8569 } 8570 PL_expect = XTERMBLOCK; 8571 attrful = XATTRTERM; 8572 sv_setpvs(PL_subname,"?"); 8573 have_name = FALSE; 8574 } 8575 8576 if (key == KEY_format) { 8577 #ifdef PERL_MAD 8578 PL_thistoken = subtoken; 8579 s = d; 8580 PERL_UNUSED_VAR(tboffset); 8581 #else 8582 if (have_name) 8583 (void) force_word(PL_oldbufptr + tboffset, WORD, 8584 FALSE, TRUE, TRUE); 8585 #endif 8586 PREBLOCK(FORMAT); 8587 } 8588 8589 /* Look for a prototype */ 8590 if (*s == '(') { 8591 char *p; 8592 bool bad_proto = FALSE; 8593 bool in_brackets = FALSE; 8594 char greedy_proto = ' '; 8595 bool proto_after_greedy_proto = FALSE; 8596 bool must_be_last = FALSE; 8597 bool underscore = FALSE; 8598 bool seen_underscore = FALSE; 8599 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO); 8600 STRLEN tmplen; 8601 8602 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); 8603 if (!s) 8604 Perl_croak(aTHX_ "Prototype not terminated"); 8605 /* strip spaces and check for bad characters */ 8606 d = SvPV(PL_lex_stuff, tmplen); 8607 tmp = 0; 8608 for (p = d; tmplen; tmplen--, ++p) { 8609 if (!isSPACE(*p)) { 8610 d[tmp++] = *p; 8611 8612 if (warnillegalproto) { 8613 if (must_be_last) 8614 proto_after_greedy_proto = TRUE; 8615 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') { 8616 bad_proto = TRUE; 8617 } 8618 else { 8619 if ( underscore ) { 8620 if ( !strchr(";@%", *p) ) 8621 bad_proto = TRUE; 8622 underscore = FALSE; 8623 } 8624 if ( *p == '[' ) { 8625 in_brackets = TRUE; 8626 } 8627 else if ( *p == ']' ) { 8628 in_brackets = FALSE; 8629 } 8630 else if ( (*p == '@' || *p == '%') && 8631 ( tmp < 2 || d[tmp-2] != '\\' ) && 8632 !in_brackets ) { 8633 must_be_last = TRUE; 8634 greedy_proto = *p; 8635 } 8636 else if ( *p == '_' ) { 8637 underscore = seen_underscore = TRUE; 8638 } 8639 } 8640 } 8641 } 8642 } 8643 d[tmp] = '\0'; 8644 if (proto_after_greedy_proto) 8645 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 8646 "Prototype after '%c' for %"SVf" : %s", 8647 greedy_proto, SVfARG(PL_subname), d); 8648 if (bad_proto) { 8649 SV *dsv = newSVpvs_flags("", SVs_TEMP); 8650 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 8651 "Illegal character %sin prototype for %"SVf" : %s", 8652 seen_underscore ? "after '_' " : "", 8653 SVfARG(PL_subname), 8654 SvUTF8(PL_lex_stuff) 8655 ? sv_uni_display(dsv, 8656 newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8), 8657 tmp, 8658 UNI_DISPLAY_ISPRINT) 8659 : pv_pretty(dsv, d, tmp, 60, NULL, NULL, 8660 PERL_PV_ESCAPE_NONASCII)); 8661 } 8662 SvCUR_set(PL_lex_stuff, tmp); 8663 have_proto = TRUE; 8664 8665 #ifdef PERL_MAD 8666 start_force(0); 8667 CURMAD('q', PL_thisopen); 8668 CURMAD('_', tmpwhite); 8669 CURMAD('=', PL_thisstuff); 8670 CURMAD('Q', PL_thisclose); 8671 NEXTVAL_NEXTTOKE.opval = 8672 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff); 8673 PL_lex_stuff = NULL; 8674 force_next(THING); 8675 8676 s = SKIPSPACE2(s,tmpwhite); 8677 #else 8678 s = skipspace(s); 8679 #endif 8680 } 8681 else 8682 have_proto = FALSE; 8683 8684 if (*s == ':' && s[1] != ':') 8685 PL_expect = attrful; 8686 else if (*s != '{' && key == KEY_sub) { 8687 if (!have_name) 8688 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine"); 8689 else if (*s != ';' && *s != '}') 8690 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname)); 8691 } 8692 8693 #ifdef PERL_MAD 8694 start_force(0); 8695 if (tmpwhite) { 8696 if (PL_madskills) 8697 curmad('^', newSVpvs("")); 8698 CURMAD('_', tmpwhite); 8699 } 8700 force_next(0); 8701 8702 PL_thistoken = subtoken; 8703 PERL_UNUSED_VAR(have_proto); 8704 #else 8705 if (have_proto) { 8706 NEXTVAL_NEXTTOKE.opval = 8707 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff); 8708 PL_lex_stuff = NULL; 8709 force_next(THING); 8710 } 8711 #endif 8712 if (!have_name) { 8713 if (PL_curstash) 8714 sv_setpvs(PL_subname, "__ANON__"); 8715 else 8716 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 8717 TOKEN(ANONSUB); 8718 } 8719 #ifndef PERL_MAD 8720 force_ident_maybe_lex('&'); 8721 #endif 8722 TOKEN(SUB); 8723 } 8724 8725 case KEY_system: 8726 LOP(OP_SYSTEM,XREF); 8727 8728 case KEY_symlink: 8729 LOP(OP_SYMLINK,XTERM); 8730 8731 case KEY_syscall: 8732 LOP(OP_SYSCALL,XTERM); 8733 8734 case KEY_sysopen: 8735 LOP(OP_SYSOPEN,XTERM); 8736 8737 case KEY_sysseek: 8738 LOP(OP_SYSSEEK,XTERM); 8739 8740 case KEY_sysread: 8741 LOP(OP_SYSREAD,XTERM); 8742 8743 case KEY_syswrite: 8744 LOP(OP_SYSWRITE,XTERM); 8745 8746 case KEY_tr: 8747 case KEY_y: 8748 s = scan_trans(s); 8749 TERM(sublex_start()); 8750 8751 case KEY_tell: 8752 UNI(OP_TELL); 8753 8754 case KEY_telldir: 8755 UNI(OP_TELLDIR); 8756 8757 case KEY_tie: 8758 LOP(OP_TIE,XTERM); 8759 8760 case KEY_tied: 8761 UNI(OP_TIED); 8762 8763 case KEY_time: 8764 FUN0(OP_TIME); 8765 8766 case KEY_times: 8767 FUN0(OP_TMS); 8768 8769 case KEY_truncate: 8770 LOP(OP_TRUNCATE,XTERM); 8771 8772 case KEY_uc: 8773 UNI(OP_UC); 8774 8775 case KEY_ucfirst: 8776 UNI(OP_UCFIRST); 8777 8778 case KEY_untie: 8779 UNI(OP_UNTIE); 8780 8781 case KEY_until: 8782 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8783 return REPORT(0); 8784 pl_yylval.ival = CopLINE(PL_curcop); 8785 OPERATOR(UNTIL); 8786 8787 case KEY_unless: 8788 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8789 return REPORT(0); 8790 pl_yylval.ival = CopLINE(PL_curcop); 8791 OPERATOR(UNLESS); 8792 8793 case KEY_unlink: 8794 LOP(OP_UNLINK,XTERM); 8795 8796 case KEY_undef: 8797 UNIDOR(OP_UNDEF); 8798 8799 case KEY_unpack: 8800 LOP(OP_UNPACK,XTERM); 8801 8802 case KEY_utime: 8803 LOP(OP_UTIME,XTERM); 8804 8805 case KEY_umask: 8806 UNIDOR(OP_UMASK); 8807 8808 case KEY_unshift: 8809 LOP(OP_UNSHIFT,XTERM); 8810 8811 case KEY_use: 8812 s = tokenize_use(1, s); 8813 OPERATOR(USE); 8814 8815 case KEY_values: 8816 UNI(OP_VALUES); 8817 8818 case KEY_vec: 8819 LOP(OP_VEC,XTERM); 8820 8821 case KEY_when: 8822 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8823 return REPORT(0); 8824 pl_yylval.ival = CopLINE(PL_curcop); 8825 Perl_ck_warner_d(aTHX_ 8826 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 8827 "when is experimental"); 8828 OPERATOR(WHEN); 8829 8830 case KEY_while: 8831 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8832 return REPORT(0); 8833 pl_yylval.ival = CopLINE(PL_curcop); 8834 OPERATOR(WHILE); 8835 8836 case KEY_warn: 8837 PL_hints |= HINT_BLOCK_SCOPE; 8838 LOP(OP_WARN,XTERM); 8839 8840 case KEY_wait: 8841 FUN0(OP_WAIT); 8842 8843 case KEY_waitpid: 8844 LOP(OP_WAITPID,XTERM); 8845 8846 case KEY_wantarray: 8847 FUN0(OP_WANTARRAY); 8848 8849 case KEY_write: 8850 #ifdef EBCDIC 8851 { 8852 char ctl_l[2]; 8853 ctl_l[0] = toCTRL('L'); 8854 ctl_l[1] = '\0'; 8855 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV); 8856 } 8857 #else 8858 /* Make sure $^L is defined */ 8859 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV); 8860 #endif 8861 UNI(OP_ENTERWRITE); 8862 8863 case KEY_x: 8864 if (PL_expect == XOPERATOR) { 8865 if (*s == '=' && !PL_lex_allbrackets && 8866 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 8867 return REPORT(0); 8868 Mop(OP_REPEAT); 8869 } 8870 check_uni(); 8871 goto just_a_word; 8872 8873 case KEY_xor: 8874 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 8875 return REPORT(0); 8876 pl_yylval.ival = OP_XOR; 8877 OPERATOR(OROP); 8878 } 8879 }} 8880 } 8881 #ifdef __SC__ 8882 #pragma segment Main 8883 #endif 8884 8885 /* 8886 S_pending_ident 8887 8888 Looks up an identifier in the pad or in a package 8889 8890 Returns: 8891 PRIVATEREF if this is a lexical name. 8892 WORD if this belongs to a package. 8893 8894 Structure: 8895 if we're in a my declaration 8896 croak if they tried to say my($foo::bar) 8897 build the ops for a my() declaration 8898 if it's an access to a my() variable 8899 build ops for access to a my() variable 8900 if in a dq string, and they've said @foo and we can't find @foo 8901 warn 8902 build ops for a bareword 8903 */ 8904 8905 static int 8906 S_pending_ident(pTHX) 8907 { 8908 dVAR; 8909 PADOFFSET tmp = 0; 8910 const char pit = (char)pl_yylval.ival; 8911 const STRLEN tokenbuf_len = strlen(PL_tokenbuf); 8912 /* All routes through this function want to know if there is a colon. */ 8913 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len); 8914 8915 DEBUG_T({ PerlIO_printf(Perl_debug_log, 8916 "### Pending identifier '%s'\n", PL_tokenbuf); }); 8917 8918 /* if we're in a my(), we can't allow dynamics here. 8919 $foo'bar has already been turned into $foo::bar, so 8920 just check for colons. 8921 8922 if it's a legal name, the OP is a PADANY. 8923 */ 8924 if (PL_in_my) { 8925 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */ 8926 if (has_colon) 8927 yyerror_pv(Perl_form(aTHX_ "No package name allowed for " 8928 "variable %s in \"our\"", 8929 PL_tokenbuf), UTF ? SVf_UTF8 : 0); 8930 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0); 8931 } 8932 else { 8933 if (has_colon) 8934 yyerror_pv(Perl_form(aTHX_ PL_no_myglob, 8935 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf), 8936 UTF ? SVf_UTF8 : 0); 8937 8938 pl_yylval.opval = newOP(OP_PADANY, 0); 8939 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 8940 UTF ? SVf_UTF8 : 0); 8941 return PRIVATEREF; 8942 } 8943 } 8944 8945 /* 8946 build the ops for accesses to a my() variable. 8947 */ 8948 8949 if (!has_colon) { 8950 if (!PL_in_my) 8951 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len, 8952 UTF ? SVf_UTF8 : 0); 8953 if (tmp != NOT_IN_PAD) { 8954 /* might be an "our" variable" */ 8955 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 8956 /* build ops for a bareword */ 8957 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 8958 HEK * const stashname = HvNAME_HEK(stash); 8959 SV * const sym = newSVhek(stashname); 8960 sv_catpvs(sym, "::"); 8961 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES )); 8962 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym); 8963 pl_yylval.opval->op_private = OPpCONST_ENTERED; 8964 if (pit != '&') 8965 gv_fetchsv(sym, 8966 (PL_in_eval 8967 ? (GV_ADDMULTI | GV_ADDINEVAL) 8968 : GV_ADDMULTI 8969 ), 8970 ((PL_tokenbuf[0] == '$') ? SVt_PV 8971 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 8972 : SVt_PVHV)); 8973 return WORD; 8974 } 8975 8976 pl_yylval.opval = newOP(OP_PADANY, 0); 8977 pl_yylval.opval->op_targ = tmp; 8978 return PRIVATEREF; 8979 } 8980 } 8981 8982 /* 8983 Whine if they've said @foo in a doublequoted string, 8984 and @foo isn't a variable we can find in the symbol 8985 table. 8986 */ 8987 if (ckWARN(WARN_AMBIGUOUS) && 8988 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) { 8989 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 8990 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV); 8991 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) 8992 /* DO NOT warn for @- and @+ */ 8993 && !( PL_tokenbuf[2] == '\0' && 8994 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' )) 8995 ) 8996 { 8997 /* Downgraded from fatal to warning 20000522 mjd */ 8998 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 8999 "Possible unintended interpolation of %"SVf" in string", 9000 SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len, 9001 SVs_TEMP | ( UTF ? SVf_UTF8 : 0 )))); 9002 } 9003 } 9004 9005 /* build ops for a bareword */ 9006 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, 9007 newSVpvn_flags(PL_tokenbuf + 1, 9008 tokenbuf_len - 1, 9009 UTF ? SVf_UTF8 : 0 )); 9010 pl_yylval.opval->op_private = OPpCONST_ENTERED; 9011 if (pit != '&') 9012 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1, 9013 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD) 9014 | ( UTF ? SVf_UTF8 : 0 ), 9015 ((PL_tokenbuf[0] == '$') ? SVt_PV 9016 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 9017 : SVt_PVHV)); 9018 return WORD; 9019 } 9020 9021 STATIC void 9022 S_checkcomma(pTHX_ const char *s, const char *name, const char *what) 9023 { 9024 dVAR; 9025 9026 PERL_ARGS_ASSERT_CHECKCOMMA; 9027 9028 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */ 9029 if (ckWARN(WARN_SYNTAX)) { 9030 int level = 1; 9031 const char *w; 9032 for (w = s+2; *w && level; w++) { 9033 if (*w == '(') 9034 ++level; 9035 else if (*w == ')') 9036 --level; 9037 } 9038 while (isSPACE(*w)) 9039 ++w; 9040 /* the list of chars below is for end of statements or 9041 * block / parens, boolean operators (&&, ||, //) and branch 9042 * constructs (or, and, if, until, unless, while, err, for). 9043 * Not a very solid hack... */ 9044 if (!*w || !strchr(";&/|})]oaiuwef!=", *w)) 9045 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 9046 "%s (...) interpreted as function",name); 9047 } 9048 } 9049 while (s < PL_bufend && isSPACE(*s)) 9050 s++; 9051 if (*s == '(') 9052 s++; 9053 while (s < PL_bufend && isSPACE(*s)) 9054 s++; 9055 if (isIDFIRST_lazy_if(s,UTF)) { 9056 const char * const w = s; 9057 s += UTF ? UTF8SKIP(s) : 1; 9058 while (isWORDCHAR_lazy_if(s,UTF)) 9059 s += UTF ? UTF8SKIP(s) : 1; 9060 while (s < PL_bufend && isSPACE(*s)) 9061 s++; 9062 if (*s == ',') { 9063 GV* gv; 9064 if (keyword(w, s - w, 0)) 9065 return; 9066 9067 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); 9068 if (gv && GvCVu(gv)) 9069 return; 9070 Perl_croak(aTHX_ "No comma allowed after %s", what); 9071 } 9072 } 9073 } 9074 9075 /* S_new_constant(): do any overload::constant lookup. 9076 9077 Either returns sv, or mortalizes/frees sv and returns a new SV*. 9078 Best used as sv=new_constant(..., sv, ...). 9079 If s, pv are NULL, calls subroutine with one argument, 9080 and <type> is used with error messages only. 9081 <type> is assumed to be well formed UTF-8 */ 9082 9083 STATIC SV * 9084 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, 9085 SV *sv, SV *pv, const char *type, STRLEN typelen) 9086 { 9087 dVAR; dSP; 9088 HV * table = GvHV(PL_hintgv); /* ^H */ 9089 SV *res; 9090 SV *errsv = NULL; 9091 SV **cvp; 9092 SV *cv, *typesv; 9093 const char *why1 = "", *why2 = "", *why3 = ""; 9094 9095 PERL_ARGS_ASSERT_NEW_CONSTANT; 9096 /* We assume that this is true: */ 9097 if (*key == 'c') { assert (strEQ(key, "charnames")); } 9098 assert(type || s); 9099 9100 /* charnames doesn't work well if there have been errors found */ 9101 if (PL_error_count > 0 && *key == 'c') 9102 { 9103 SvREFCNT_dec_NN(sv); 9104 return &PL_sv_undef; 9105 } 9106 9107 sv_2mortal(sv); /* Parent created it permanently */ 9108 if (!table 9109 || ! (PL_hints & HINT_LOCALIZE_HH) 9110 || ! (cvp = hv_fetch(table, key, keylen, FALSE)) 9111 || ! SvOK(*cvp)) 9112 { 9113 char *msg; 9114 9115 /* Here haven't found what we're looking for. If it is charnames, 9116 * perhaps it needs to be loaded. Try doing that before giving up */ 9117 if (*key == 'c') { 9118 Perl_load_module(aTHX_ 9119 0, 9120 newSVpvs("_charnames"), 9121 /* version parameter; no need to specify it, as if 9122 * we get too early a version, will fail anyway, 9123 * not being able to find '_charnames' */ 9124 NULL, 9125 newSVpvs(":full"), 9126 newSVpvs(":short"), 9127 NULL); 9128 SPAGAIN; 9129 table = GvHV(PL_hintgv); 9130 if (table 9131 && (PL_hints & HINT_LOCALIZE_HH) 9132 && (cvp = hv_fetch(table, key, keylen, FALSE)) 9133 && SvOK(*cvp)) 9134 { 9135 goto now_ok; 9136 } 9137 } 9138 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) { 9139 msg = Perl_form(aTHX_ 9140 "Constant(%.*s) unknown", 9141 (int)(type ? typelen : len), 9142 (type ? type: s)); 9143 } 9144 else { 9145 why1 = "$^H{"; 9146 why2 = key; 9147 why3 = "} is not defined"; 9148 report: 9149 if (*key == 'c') { 9150 msg = Perl_form(aTHX_ 9151 /* The +3 is for '\N{'; -4 for that, plus '}' */ 9152 "Unknown charname '%.*s'", (int)typelen - 4, type + 3 9153 ); 9154 } 9155 else { 9156 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s", 9157 (int)(type ? typelen : len), 9158 (type ? type: s), why1, why2, why3); 9159 } 9160 } 9161 yyerror_pv(msg, UTF ? SVf_UTF8 : 0); 9162 return SvREFCNT_inc_simple_NN(sv); 9163 } 9164 now_ok: 9165 cv = *cvp; 9166 if (!pv && s) 9167 pv = newSVpvn_flags(s, len, SVs_TEMP); 9168 if (type && pv) 9169 typesv = newSVpvn_flags(type, typelen, SVs_TEMP); 9170 else 9171 typesv = &PL_sv_undef; 9172 9173 PUSHSTACKi(PERLSI_OVERLOAD); 9174 ENTER ; 9175 SAVETMPS; 9176 9177 PUSHMARK(SP) ; 9178 EXTEND(sp, 3); 9179 if (pv) 9180 PUSHs(pv); 9181 PUSHs(sv); 9182 if (pv) 9183 PUSHs(typesv); 9184 PUTBACK; 9185 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL)); 9186 9187 SPAGAIN ; 9188 9189 /* Check the eval first */ 9190 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) { 9191 STRLEN errlen; 9192 const char * errstr; 9193 sv_catpvs(errsv, "Propagated"); 9194 errstr = SvPV_const(errsv, errlen); 9195 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */ 9196 (void)POPs; 9197 res = SvREFCNT_inc_simple_NN(sv); 9198 } 9199 else { 9200 res = POPs; 9201 SvREFCNT_inc_simple_void_NN(res); 9202 } 9203 9204 PUTBACK ; 9205 FREETMPS ; 9206 LEAVE ; 9207 POPSTACK; 9208 9209 if (!SvOK(res)) { 9210 why1 = "Call to &{$^H{"; 9211 why2 = key; 9212 why3 = "}} did not return a defined value"; 9213 sv = res; 9214 (void)sv_2mortal(sv); 9215 goto report; 9216 } 9217 9218 return res; 9219 } 9220 9221 PERL_STATIC_INLINE void 9222 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) { 9223 dVAR; 9224 PERL_ARGS_ASSERT_PARSE_IDENT; 9225 9226 for (;;) { 9227 if (*d >= e) 9228 Perl_croak(aTHX_ "%s", ident_too_long); 9229 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) { 9230 /* The UTF-8 case must come first, otherwise things 9231 * like c\N{COMBINING TILDE} would start failing, as the 9232 * isWORDCHAR_A case below would gobble the 'c' up. 9233 */ 9234 9235 char *t = *s + UTF8SKIP(*s); 9236 while (isIDCONT_utf8((U8*)t)) 9237 t += UTF8SKIP(t); 9238 if (*d + (t - *s) > e) 9239 Perl_croak(aTHX_ "%s", ident_too_long); 9240 Copy(*s, *d, t - *s, char); 9241 *d += t - *s; 9242 *s = t; 9243 } 9244 else if ( isWORDCHAR_A(**s) ) { 9245 do { 9246 *(*d)++ = *(*s)++; 9247 } while (isWORDCHAR_A(**s) && *d < e); 9248 } 9249 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) { 9250 *(*d)++ = ':'; 9251 *(*d)++ = ':'; 9252 (*s)++; 9253 } 9254 else if (allow_package && **s == ':' && (*s)[1] == ':' 9255 /* Disallow things like Foo::$bar. For the curious, this is 9256 * the code path that triggers the "Bad name after" warning 9257 * when looking for barewords. 9258 */ 9259 && (*s)[2] != '$') { 9260 *(*d)++ = *(*s)++; 9261 *(*d)++ = *(*s)++; 9262 } 9263 else 9264 break; 9265 } 9266 return; 9267 } 9268 9269 /* Returns a NUL terminated string, with the length of the string written to 9270 *slp 9271 */ 9272 STATIC char * 9273 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) 9274 { 9275 dVAR; 9276 char *d = dest; 9277 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 9278 bool is_utf8 = cBOOL(UTF); 9279 9280 PERL_ARGS_ASSERT_SCAN_WORD; 9281 9282 parse_ident(&s, &d, e, allow_package, is_utf8); 9283 *d = '\0'; 9284 *slp = d - dest; 9285 return s; 9286 } 9287 9288 STATIC char * 9289 S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck_uni) 9290 { 9291 dVAR; 9292 char *bracket = NULL; 9293 char funny = *s++; 9294 char *d = dest; 9295 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 9296 bool is_utf8 = cBOOL(UTF); 9297 9298 PERL_ARGS_ASSERT_SCAN_IDENT; 9299 9300 if (isSPACE(*s)) 9301 s = PEEKSPACE(s); 9302 if (isDIGIT(*s)) { 9303 while (isDIGIT(*s)) { 9304 if (d >= e) 9305 Perl_croak(aTHX_ "%s", ident_too_long); 9306 *d++ = *s++; 9307 } 9308 } 9309 else { 9310 parse_ident(&s, &d, e, 1, is_utf8); 9311 } 9312 *d = '\0'; 9313 d = dest; 9314 if (*d) { 9315 if (PL_lex_state != LEX_NORMAL) 9316 PL_lex_state = LEX_INTERPENDMAYBE; 9317 return s; 9318 } 9319 if (*s == '$' && s[1] && 9320 (isIDFIRST_lazy_if(s+1,is_utf8) 9321 || isDIGIT_A((U8)s[1]) 9322 || s[1] == '$' 9323 || s[1] == '{' 9324 || strnEQ(s+1,"::",2)) ) 9325 { 9326 return s; 9327 } 9328 if (*s == '{') { 9329 bracket = s; 9330 s++; 9331 while (s < send && SPACE_OR_TAB(*s)) 9332 s++; 9333 } 9334 9335 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)*(d)) \ 9336 || isCNTRL_A((U8)*(d)) \ 9337 || isDIGIT_A((U8)*(d)) \ 9338 || (!(u) && !UTF8_IS_INVARIANT((U8)*(d)))) 9339 if (s < send 9340 && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(s, is_utf8))) 9341 { 9342 if (is_utf8) { 9343 const STRLEN skip = UTF8SKIP(s); 9344 STRLEN i; 9345 d[skip] = '\0'; 9346 for ( i = 0; i < skip; i++ ) 9347 d[i] = *s++; 9348 } 9349 else { 9350 *d = *s++; 9351 d[1] = '\0'; 9352 } 9353 } 9354 if (*d == '^' && *s && isCONTROLVAR(*s)) { 9355 *d = toCTRL(*s); 9356 s++; 9357 } 9358 else if (ck_uni && !bracket) 9359 check_uni(); 9360 if (bracket) { 9361 if (isIDFIRST_lazy_if(d,is_utf8)) { 9362 d += is_utf8 ? UTF8SKIP(d) : 1; 9363 parse_ident(&s, &d, e, 1, is_utf8); 9364 *d = '\0'; 9365 while (s < send && SPACE_OR_TAB(*s)) 9366 s++; 9367 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { 9368 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) { 9369 const char * const brack = 9370 (const char *) 9371 ((*s == '[') ? "[...]" : "{...}"); 9372 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */ 9373 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 9374 "Ambiguous use of %c{%s%s} resolved to %c%s%s", 9375 funny, dest, brack, funny, dest, brack); 9376 } 9377 bracket++; 9378 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK); 9379 PL_lex_allbrackets++; 9380 return s; 9381 } 9382 } 9383 /* Handle extended ${^Foo} variables 9384 * 1999-02-27 mjd-perl-patch@plover.com */ 9385 else if (!isWORDCHAR(*d) && !isPRINT(*d) /* isCTRL(d) */ 9386 && isWORDCHAR(*s)) 9387 { 9388 d++; 9389 while (isWORDCHAR(*s) && d < e) { 9390 *d++ = *s++; 9391 } 9392 if (d >= e) 9393 Perl_croak(aTHX_ "%s", ident_too_long); 9394 *d = '\0'; 9395 } 9396 9397 while (s < send && SPACE_OR_TAB(*s)) 9398 s++; 9399 9400 if (*s == '}') { 9401 s++; 9402 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 9403 PL_lex_state = LEX_INTERPEND; 9404 PL_expect = XREF; 9405 } 9406 if (PL_lex_state == LEX_NORMAL) { 9407 if (ckWARN(WARN_AMBIGUOUS) && 9408 (keyword(dest, d - dest, 0) 9409 || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0))) 9410 { 9411 SV *tmp = newSVpvn_flags( dest, d - dest, 9412 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) ); 9413 if (funny == '#') 9414 funny = '@'; 9415 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 9416 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf, 9417 funny, tmp, funny, tmp); 9418 } 9419 } 9420 } 9421 else { 9422 s = bracket; /* let the parser handle it */ 9423 *dest = '\0'; 9424 } 9425 } 9426 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s)) 9427 PL_lex_state = LEX_INTERPEND; 9428 return s; 9429 } 9430 9431 static bool 9432 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) { 9433 9434 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in 9435 * the parse starting at 's', based on the subset that are valid in this 9436 * context input to this routine in 'valid_flags'. Advances s. Returns 9437 * TRUE if the input should be treated as a valid flag, so the next char 9438 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon 9439 * first call on the current regex. This routine will set it to any 9440 * charset modifier found. The caller shouldn't change it. This way, 9441 * another charset modifier encountered in the parse can be detected as an 9442 * error, as we have decided to allow only one */ 9443 9444 const char c = **s; 9445 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1; 9446 9447 if ( charlen != 1 || ! strchr(valid_flags, c) ) { 9448 if (isWORDCHAR_lazy_if(*s, UTF)) { 9449 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s), 9450 UTF ? SVf_UTF8 : 0); 9451 (*s) += charlen; 9452 /* Pretend that it worked, so will continue processing before 9453 * dieing */ 9454 return TRUE; 9455 } 9456 return FALSE; 9457 } 9458 9459 switch (c) { 9460 9461 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl); 9462 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break; 9463 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break; 9464 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break; 9465 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break; 9466 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break; 9467 case LOCALE_PAT_MOD: 9468 if (*charset) { 9469 goto multiple_charsets; 9470 } 9471 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET); 9472 *charset = c; 9473 break; 9474 case UNICODE_PAT_MOD: 9475 if (*charset) { 9476 goto multiple_charsets; 9477 } 9478 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET); 9479 *charset = c; 9480 break; 9481 case ASCII_RESTRICT_PAT_MOD: 9482 if (! *charset) { 9483 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET); 9484 } 9485 else { 9486 9487 /* Error if previous modifier wasn't an 'a', but if it was, see 9488 * if, and accept, a second occurrence (only) */ 9489 if (*charset != 'a' 9490 || get_regex_charset(*pmfl) 9491 != REGEX_ASCII_RESTRICTED_CHARSET) 9492 { 9493 goto multiple_charsets; 9494 } 9495 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET); 9496 } 9497 *charset = c; 9498 break; 9499 case DEPENDS_PAT_MOD: 9500 if (*charset) { 9501 goto multiple_charsets; 9502 } 9503 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET); 9504 *charset = c; 9505 break; 9506 } 9507 9508 (*s)++; 9509 return TRUE; 9510 9511 multiple_charsets: 9512 if (*charset != c) { 9513 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c)); 9514 } 9515 else if (c == 'a') { 9516 yyerror("Regexp modifier \"/a\" may appear a maximum of twice"); 9517 } 9518 else { 9519 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c)); 9520 } 9521 9522 /* Pretend that it worked, so will continue processing before dieing */ 9523 (*s)++; 9524 return TRUE; 9525 } 9526 9527 STATIC char * 9528 S_scan_pat(pTHX_ char *start, I32 type) 9529 { 9530 dVAR; 9531 PMOP *pm; 9532 char *s; 9533 const char * const valid_flags = 9534 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); 9535 char charset = '\0'; /* character set modifier */ 9536 #ifdef PERL_MAD 9537 char *modstart; 9538 #endif 9539 9540 PERL_ARGS_ASSERT_SCAN_PAT; 9541 9542 s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING), 9543 TRUE /* look for escaped bracketed metas */ ); 9544 9545 if (!s) { 9546 const char * const delimiter = skipspace(start); 9547 Perl_croak(aTHX_ 9548 (const char *) 9549 (*delimiter == '?' 9550 ? "Search pattern not terminated or ternary operator parsed as search pattern" 9551 : "Search pattern not terminated" )); 9552 } 9553 9554 pm = (PMOP*)newPMOP(type, 0); 9555 if (PL_multi_open == '?') { 9556 /* This is the only point in the code that sets PMf_ONCE: */ 9557 pm->op_pmflags |= PMf_ONCE; 9558 9559 /* Hence it's safe to do this bit of PMOP book-keeping here, which 9560 allows us to restrict the list needed by reset to just the ?? 9561 matches. */ 9562 assert(type != OP_TRANS); 9563 if (PL_curstash) { 9564 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab); 9565 U32 elements; 9566 if (!mg) { 9567 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0, 9568 0); 9569 } 9570 elements = mg->mg_len / sizeof(PMOP**); 9571 Renewc(mg->mg_ptr, elements + 1, PMOP*, char); 9572 ((PMOP**)mg->mg_ptr) [elements++] = pm; 9573 mg->mg_len = elements * sizeof(PMOP**); 9574 PmopSTASH_set(pm,PL_curstash); 9575 } 9576 } 9577 #ifdef PERL_MAD 9578 modstart = s; 9579 #endif 9580 9581 /* if qr/...(?{..}).../, then need to parse the pattern within a new 9582 * anon CV. False positives like qr/[(?{]/ are harmless */ 9583 9584 if (type == OP_QR) { 9585 STRLEN len; 9586 char *e, *p = SvPV(PL_lex_stuff, len); 9587 e = p + len; 9588 for (; p < e; p++) { 9589 if (p[0] == '(' && p[1] == '?' 9590 && (p[2] == '{' || (p[2] == '?' && p[3] == '{'))) 9591 { 9592 pm->op_pmflags |= PMf_HAS_CV; 9593 break; 9594 } 9595 } 9596 pm->op_pmflags |= PMf_IS_QR; 9597 } 9598 9599 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {}; 9600 #ifdef PERL_MAD 9601 if (PL_madskills && modstart != s) { 9602 SV* tmptoken = newSVpvn(modstart, s - modstart); 9603 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0); 9604 } 9605 #endif 9606 /* issue a warning if /c is specified,but /g is not */ 9607 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) 9608 { 9609 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), 9610 "Use of /c modifier is meaningless without /g" ); 9611 } 9612 9613 PL_lex_op = (OP*)pm; 9614 pl_yylval.ival = OP_MATCH; 9615 return s; 9616 } 9617 9618 STATIC char * 9619 S_scan_subst(pTHX_ char *start) 9620 { 9621 dVAR; 9622 char *s; 9623 PMOP *pm; 9624 I32 first_start; 9625 I32 es = 0; 9626 char charset = '\0'; /* character set modifier */ 9627 #ifdef PERL_MAD 9628 char *modstart; 9629 #endif 9630 9631 PERL_ARGS_ASSERT_SCAN_SUBST; 9632 9633 pl_yylval.ival = OP_NULL; 9634 9635 s = scan_str(start,!!PL_madskills,FALSE,FALSE, 9636 TRUE /* look for escaped bracketed metas */ ); 9637 9638 if (!s) 9639 Perl_croak(aTHX_ "Substitution pattern not terminated"); 9640 9641 if (s[-1] == PL_multi_open) 9642 s--; 9643 #ifdef PERL_MAD 9644 if (PL_madskills) { 9645 CURMAD('q', PL_thisopen); 9646 CURMAD('_', PL_thiswhite); 9647 CURMAD('E', PL_thisstuff); 9648 CURMAD('Q', PL_thisclose); 9649 PL_realtokenstart = s - SvPVX(PL_linestr); 9650 } 9651 #endif 9652 9653 first_start = PL_multi_start; 9654 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); 9655 if (!s) { 9656 if (PL_lex_stuff) { 9657 SvREFCNT_dec(PL_lex_stuff); 9658 PL_lex_stuff = NULL; 9659 } 9660 Perl_croak(aTHX_ "Substitution replacement not terminated"); 9661 } 9662 PL_multi_start = first_start; /* so whole substitution is taken together */ 9663 9664 pm = (PMOP*)newPMOP(OP_SUBST, 0); 9665 9666 #ifdef PERL_MAD 9667 if (PL_madskills) { 9668 CURMAD('z', PL_thisopen); 9669 CURMAD('R', PL_thisstuff); 9670 CURMAD('Z', PL_thisclose); 9671 } 9672 modstart = s; 9673 #endif 9674 9675 while (*s) { 9676 if (*s == EXEC_PAT_MOD) { 9677 s++; 9678 es++; 9679 } 9680 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset)) 9681 { 9682 break; 9683 } 9684 } 9685 9686 #ifdef PERL_MAD 9687 if (PL_madskills) { 9688 if (modstart != s) 9689 curmad('m', newSVpvn(modstart, s - modstart)); 9690 append_madprops(PL_thismad, (OP*)pm, 0); 9691 PL_thismad = 0; 9692 } 9693 #endif 9694 if ((pm->op_pmflags & PMf_CONTINUE)) { 9695 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); 9696 } 9697 9698 if (es) { 9699 SV * const repl = newSVpvs(""); 9700 9701 PL_multi_end = 0; 9702 pm->op_pmflags |= PMf_EVAL; 9703 while (es-- > 0) { 9704 if (es) 9705 sv_catpvs(repl, "eval "); 9706 else 9707 sv_catpvs(repl, "do "); 9708 } 9709 sv_catpvs(repl, "{"); 9710 sv_catsv(repl, PL_sublex_info.repl); 9711 sv_catpvs(repl, "}"); 9712 SvEVALED_on(repl); 9713 SvREFCNT_dec(PL_sublex_info.repl); 9714 PL_sublex_info.repl = repl; 9715 } 9716 9717 PL_lex_op = (OP*)pm; 9718 pl_yylval.ival = OP_SUBST; 9719 return s; 9720 } 9721 9722 STATIC char * 9723 S_scan_trans(pTHX_ char *start) 9724 { 9725 dVAR; 9726 char* s; 9727 OP *o; 9728 U8 squash; 9729 U8 del; 9730 U8 complement; 9731 bool nondestruct = 0; 9732 #ifdef PERL_MAD 9733 char *modstart; 9734 #endif 9735 9736 PERL_ARGS_ASSERT_SCAN_TRANS; 9737 9738 pl_yylval.ival = OP_NULL; 9739 9740 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE); 9741 if (!s) 9742 Perl_croak(aTHX_ "Transliteration pattern not terminated"); 9743 9744 if (s[-1] == PL_multi_open) 9745 s--; 9746 #ifdef PERL_MAD 9747 if (PL_madskills) { 9748 CURMAD('q', PL_thisopen); 9749 CURMAD('_', PL_thiswhite); 9750 CURMAD('E', PL_thisstuff); 9751 CURMAD('Q', PL_thisclose); 9752 PL_realtokenstart = s - SvPVX(PL_linestr); 9753 } 9754 #endif 9755 9756 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); 9757 if (!s) { 9758 if (PL_lex_stuff) { 9759 SvREFCNT_dec(PL_lex_stuff); 9760 PL_lex_stuff = NULL; 9761 } 9762 Perl_croak(aTHX_ "Transliteration replacement not terminated"); 9763 } 9764 if (PL_madskills) { 9765 CURMAD('z', PL_thisopen); 9766 CURMAD('R', PL_thisstuff); 9767 CURMAD('Z', PL_thisclose); 9768 } 9769 9770 complement = del = squash = 0; 9771 #ifdef PERL_MAD 9772 modstart = s; 9773 #endif 9774 while (1) { 9775 switch (*s) { 9776 case 'c': 9777 complement = OPpTRANS_COMPLEMENT; 9778 break; 9779 case 'd': 9780 del = OPpTRANS_DELETE; 9781 break; 9782 case 's': 9783 squash = OPpTRANS_SQUASH; 9784 break; 9785 case 'r': 9786 nondestruct = 1; 9787 break; 9788 default: 9789 goto no_more; 9790 } 9791 s++; 9792 } 9793 no_more: 9794 9795 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL); 9796 o->op_private &= ~OPpTRANS_ALL; 9797 o->op_private |= del|squash|complement| 9798 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)| 9799 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0); 9800 9801 PL_lex_op = o; 9802 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS; 9803 9804 #ifdef PERL_MAD 9805 if (PL_madskills) { 9806 if (modstart != s) 9807 curmad('m', newSVpvn(modstart, s - modstart)); 9808 append_madprops(PL_thismad, o, 0); 9809 PL_thismad = 0; 9810 } 9811 #endif 9812 9813 return s; 9814 } 9815 9816 /* scan_heredoc 9817 Takes a pointer to the first < in <<FOO. 9818 Returns a pointer to the byte following <<FOO. 9819 9820 This function scans a heredoc, which involves different methods 9821 depending on whether we are in a string eval, quoted construct, etc. 9822 This is because PL_linestr could containing a single line of input, or 9823 a whole string being evalled, or the contents of the current quote- 9824 like operator. 9825 9826 The two basic methods are: 9827 - Steal lines from the input stream 9828 - Scan the heredoc in PL_linestr and remove it therefrom 9829 9830 In a file scope or filtered eval, the first method is used; in a 9831 string eval, the second. 9832 9833 In a quote-like operator, we have to choose between the two, 9834 depending on where we can find a newline. We peek into outer lex- 9835 ing scopes until we find one with a newline in it. If we reach the 9836 outermost lexing scope and it is a file, we use the stream method. 9837 Otherwise it is treated as an eval. 9838 */ 9839 9840 STATIC char * 9841 S_scan_heredoc(pTHX_ char *s) 9842 { 9843 dVAR; 9844 I32 op_type = OP_SCALAR; 9845 I32 len; 9846 SV *tmpstr; 9847 char term; 9848 char *d; 9849 char *e; 9850 char *peek; 9851 const bool infile = PL_rsfp || PL_parser->filtered; 9852 LEXSHARED *shared = PL_parser->lex_shared; 9853 #ifdef PERL_MAD 9854 I32 stuffstart = s - SvPVX(PL_linestr); 9855 char *tstart; 9856 9857 PL_realtokenstart = -1; 9858 #endif 9859 9860 PERL_ARGS_ASSERT_SCAN_HEREDOC; 9861 9862 s += 2; 9863 d = PL_tokenbuf + 1; 9864 e = PL_tokenbuf + sizeof PL_tokenbuf - 1; 9865 *PL_tokenbuf = '\n'; 9866 peek = s; 9867 while (SPACE_OR_TAB(*peek)) 9868 peek++; 9869 if (*peek == '`' || *peek == '\'' || *peek =='"') { 9870 s = peek; 9871 term = *s++; 9872 s = delimcpy(d, e, s, PL_bufend, term, &len); 9873 if (s == PL_bufend) 9874 Perl_croak(aTHX_ "Unterminated delimiter for here document"); 9875 d += len; 9876 s++; 9877 } 9878 else { 9879 if (*s == '\\') 9880 /* <<\FOO is equivalent to <<'FOO' */ 9881 s++, term = '\''; 9882 else 9883 term = '"'; 9884 if (!isWORDCHAR_lazy_if(s,UTF)) 9885 deprecate("bare << to mean <<\"\""); 9886 for (; isWORDCHAR_lazy_if(s,UTF); s++) { 9887 if (d < e) 9888 *d++ = *s; 9889 } 9890 } 9891 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1) 9892 Perl_croak(aTHX_ "Delimiter for here document is too long"); 9893 *d++ = '\n'; 9894 *d = '\0'; 9895 len = d - PL_tokenbuf; 9896 9897 #ifdef PERL_MAD 9898 if (PL_madskills) { 9899 tstart = PL_tokenbuf + 1; 9900 PL_thisclose = newSVpvn(tstart, len - 1); 9901 tstart = SvPVX(PL_linestr) + stuffstart; 9902 PL_thisopen = newSVpvn(tstart, s - tstart); 9903 stuffstart = s - SvPVX(PL_linestr); 9904 } 9905 #endif 9906 #ifndef PERL_STRICT_CR 9907 d = strchr(s, '\r'); 9908 if (d) { 9909 char * const olds = s; 9910 s = d; 9911 while (s < PL_bufend) { 9912 if (*s == '\r') { 9913 *d++ = '\n'; 9914 if (*++s == '\n') 9915 s++; 9916 } 9917 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */ 9918 *d++ = *s++; 9919 s++; 9920 } 9921 else 9922 *d++ = *s++; 9923 } 9924 *d = '\0'; 9925 PL_bufend = d; 9926 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 9927 s = olds; 9928 } 9929 #endif 9930 #ifdef PERL_MAD 9931 if (PL_madskills) { 9932 tstart = SvPVX(PL_linestr) + stuffstart; 9933 if (PL_thisstuff) 9934 sv_catpvn(PL_thisstuff, tstart, s - tstart); 9935 else 9936 PL_thisstuff = newSVpvn(tstart, s - tstart); 9937 } 9938 9939 stuffstart = s - SvPVX(PL_linestr); 9940 #endif 9941 9942 tmpstr = newSV_type(SVt_PVIV); 9943 SvGROW(tmpstr, 80); 9944 if (term == '\'') { 9945 op_type = OP_CONST; 9946 SvIV_set(tmpstr, -1); 9947 } 9948 else if (term == '`') { 9949 op_type = OP_BACKTICK; 9950 SvIV_set(tmpstr, '\\'); 9951 } 9952 9953 PL_multi_start = CopLINE(PL_curcop) + 1; 9954 PL_multi_open = PL_multi_close = '<'; 9955 /* inside a string eval or quote-like operator */ 9956 if (!infile || PL_lex_inwhat) { 9957 SV *linestr; 9958 char *bufend; 9959 char * const olds = s; 9960 PERL_CONTEXT * const cx = &cxstack[cxstack_ix]; 9961 /* These two fields are not set until an inner lexing scope is 9962 entered. But we need them set here. */ 9963 shared->ls_bufptr = s; 9964 shared->ls_linestr = PL_linestr; 9965 if (PL_lex_inwhat) 9966 /* Look for a newline. If the current buffer does not have one, 9967 peek into the line buffer of the parent lexing scope, going 9968 up as many levels as necessary to find one with a newline 9969 after bufptr. 9970 */ 9971 while (!(s = (char *)memchr( 9972 (void *)shared->ls_bufptr, '\n', 9973 SvEND(shared->ls_linestr)-shared->ls_bufptr 9974 ))) { 9975 shared = shared->ls_prev; 9976 /* shared is only null if we have gone beyond the outermost 9977 lexing scope. In a file, we will have broken out of the 9978 loop in the previous iteration. In an eval, the string buf- 9979 fer ends with "\n;", so the while condition below will have 9980 evaluated to false. So shared can never be null. */ 9981 assert(shared); 9982 /* A LEXSHARED struct with a null ls_prev pointer is the outer- 9983 most lexing scope. In a file, shared->ls_linestr at that 9984 level is just one line, so there is no body to steal. */ 9985 if (infile && !shared->ls_prev) { 9986 s = olds; 9987 goto streaming; 9988 } 9989 } 9990 else { /* eval */ 9991 s = (char*)memchr((void*)s, '\n', PL_bufend - s); 9992 assert(s); 9993 } 9994 linestr = shared->ls_linestr; 9995 bufend = SvEND(linestr); 9996 d = s; 9997 while (s < bufend - len + 1 && 9998 memNE(s,PL_tokenbuf,len) ) { 9999 if (*s++ == '\n') 10000 ++shared->herelines; 10001 } 10002 if (s >= bufend - len + 1) { 10003 goto interminable; 10004 } 10005 sv_setpvn(tmpstr,d+1,s-d); 10006 #ifdef PERL_MAD 10007 if (PL_madskills) { 10008 if (PL_thisstuff) 10009 sv_catpvn(PL_thisstuff, d + 1, s - d); 10010 else 10011 PL_thisstuff = newSVpvn(d + 1, s - d); 10012 stuffstart = s - SvPVX(PL_linestr); 10013 } 10014 #endif 10015 s += len - 1; 10016 /* the preceding stmt passes a newline */ 10017 shared->herelines++; 10018 10019 /* s now points to the newline after the heredoc terminator. 10020 d points to the newline before the body of the heredoc. 10021 */ 10022 10023 /* We are going to modify linestr in place here, so set 10024 aside copies of the string if necessary for re-evals or 10025 (caller $n)[6]. */ 10026 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we 10027 check shared->re_eval_str. */ 10028 if (shared->re_eval_start || shared->re_eval_str) { 10029 /* Set aside the rest of the regexp */ 10030 if (!shared->re_eval_str) 10031 shared->re_eval_str = 10032 newSVpvn(shared->re_eval_start, 10033 bufend - shared->re_eval_start); 10034 shared->re_eval_start -= s-d; 10035 } 10036 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL && 10037 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL && 10038 cx->blk_eval.cur_text == linestr) 10039 { 10040 cx->blk_eval.cur_text = newSVsv(linestr); 10041 SvSCREAM_on(cx->blk_eval.cur_text); 10042 } 10043 /* Copy everything from s onwards back to d. */ 10044 Move(s,d,bufend-s + 1,char); 10045 SvCUR_set(linestr, SvCUR(linestr) - (s-d)); 10046 /* Setting PL_bufend only applies when we have not dug deeper 10047 into other scopes, because sublex_done sets PL_bufend to 10048 SvEND(PL_linestr). */ 10049 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr); 10050 s = olds; 10051 } 10052 else 10053 { 10054 SV *linestr_save; 10055 streaming: 10056 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */ 10057 term = PL_tokenbuf[1]; 10058 len--; 10059 linestr_save = PL_linestr; /* must restore this afterwards */ 10060 d = s; /* and this */ 10061 PL_linestr = newSVpvs(""); 10062 PL_bufend = SvPVX(PL_linestr); 10063 while (1) { 10064 #ifdef PERL_MAD 10065 if (PL_madskills) { 10066 tstart = SvPVX(PL_linestr) + stuffstart; 10067 if (PL_thisstuff) 10068 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart); 10069 else 10070 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart); 10071 } 10072 #endif 10073 PL_bufptr = PL_bufend; 10074 CopLINE_set(PL_curcop, 10075 PL_multi_start + shared->herelines); 10076 if (!lex_next_chunk(LEX_NO_TERM) 10077 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) { 10078 SvREFCNT_dec(linestr_save); 10079 goto interminable; 10080 } 10081 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1); 10082 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') { 10083 lex_grow_linestr(SvCUR(PL_linestr) + 2); 10084 sv_catpvs(PL_linestr, "\n\0"); 10085 } 10086 s = PL_bufptr; 10087 #ifdef PERL_MAD 10088 stuffstart = s - SvPVX(PL_linestr); 10089 #endif 10090 shared->herelines++; 10091 PL_last_lop = PL_last_uni = NULL; 10092 #ifndef PERL_STRICT_CR 10093 if (PL_bufend - PL_linestart >= 2) { 10094 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') || 10095 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r')) 10096 { 10097 PL_bufend[-2] = '\n'; 10098 PL_bufend--; 10099 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 10100 } 10101 else if (PL_bufend[-1] == '\r') 10102 PL_bufend[-1] = '\n'; 10103 } 10104 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') 10105 PL_bufend[-1] = '\n'; 10106 #endif 10107 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) { 10108 SvREFCNT_dec(PL_linestr); 10109 PL_linestr = linestr_save; 10110 PL_linestart = SvPVX(linestr_save); 10111 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 10112 s = d; 10113 break; 10114 } 10115 else { 10116 sv_catsv(tmpstr,PL_linestr); 10117 } 10118 } 10119 } 10120 PL_multi_end = CopLINE(PL_curcop); 10121 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) { 10122 SvPV_shrink_to_cur(tmpstr); 10123 } 10124 if (!IN_BYTES) { 10125 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) 10126 SvUTF8_on(tmpstr); 10127 else if (PL_encoding) 10128 sv_recode_to_utf8(tmpstr, PL_encoding); 10129 } 10130 PL_lex_stuff = tmpstr; 10131 pl_yylval.ival = op_type; 10132 return s; 10133 10134 interminable: 10135 SvREFCNT_dec(tmpstr); 10136 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1); 10137 missingterm(PL_tokenbuf + 1); 10138 } 10139 10140 /* scan_inputsymbol 10141 takes: current position in input buffer 10142 returns: new position in input buffer 10143 side-effects: pl_yylval and lex_op are set. 10144 10145 This code handles: 10146 10147 <> read from ARGV 10148 <FH> read from filehandle 10149 <pkg::FH> read from package qualified filehandle 10150 <pkg'FH> read from package qualified filehandle 10151 <$fh> read from filehandle in $fh 10152 <*.h> filename glob 10153 10154 */ 10155 10156 STATIC char * 10157 S_scan_inputsymbol(pTHX_ char *start) 10158 { 10159 dVAR; 10160 char *s = start; /* current position in buffer */ 10161 char *end; 10162 I32 len; 10163 char *d = PL_tokenbuf; /* start of temp holding space */ 10164 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */ 10165 10166 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL; 10167 10168 end = strchr(s, '\n'); 10169 if (!end) 10170 end = PL_bufend; 10171 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */ 10172 10173 /* die if we didn't have space for the contents of the <>, 10174 or if it didn't end, or if we see a newline 10175 */ 10176 10177 if (len >= (I32)sizeof PL_tokenbuf) 10178 Perl_croak(aTHX_ "Excessively long <> operator"); 10179 if (s >= end) 10180 Perl_croak(aTHX_ "Unterminated <> operator"); 10181 10182 s++; 10183 10184 /* check for <$fh> 10185 Remember, only scalar variables are interpreted as filehandles by 10186 this code. Anything more complex (e.g., <$fh{$num}>) will be 10187 treated as a glob() call. 10188 This code makes use of the fact that except for the $ at the front, 10189 a scalar variable and a filehandle look the same. 10190 */ 10191 if (*d == '$' && d[1]) d++; 10192 10193 /* allow <Pkg'VALUE> or <Pkg::VALUE> */ 10194 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':')) 10195 d += UTF ? UTF8SKIP(d) : 1; 10196 10197 /* If we've tried to read what we allow filehandles to look like, and 10198 there's still text left, then it must be a glob() and not a getline. 10199 Use scan_str to pull out the stuff between the <> and treat it 10200 as nothing more than a string. 10201 */ 10202 10203 if (d - PL_tokenbuf != len) { 10204 pl_yylval.ival = OP_GLOB; 10205 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE); 10206 if (!s) 10207 Perl_croak(aTHX_ "Glob not terminated"); 10208 return s; 10209 } 10210 else { 10211 bool readline_overriden = FALSE; 10212 GV *gv_readline; 10213 GV **gvp; 10214 /* we're in a filehandle read situation */ 10215 d = PL_tokenbuf; 10216 10217 /* turn <> into <ARGV> */ 10218 if (!len) 10219 Copy("ARGV",d,5,char); 10220 10221 /* Check whether readline() is overriden */ 10222 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV); 10223 if ((gv_readline 10224 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)) 10225 || 10226 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE)) 10227 && (gv_readline = *gvp) && isGV_with_GP(gv_readline) 10228 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))) 10229 readline_overriden = TRUE; 10230 10231 /* if <$fh>, create the ops to turn the variable into a 10232 filehandle 10233 */ 10234 if (*d == '$') { 10235 /* try to find it in the pad for this block, otherwise find 10236 add symbol table ops 10237 */ 10238 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0); 10239 if (tmp != NOT_IN_PAD) { 10240 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 10241 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 10242 HEK * const stashname = HvNAME_HEK(stash); 10243 SV * const sym = sv_2mortal(newSVhek(stashname)); 10244 sv_catpvs(sym, "::"); 10245 sv_catpv(sym, d+1); 10246 d = SvPVX(sym); 10247 goto intro_sym; 10248 } 10249 else { 10250 OP * const o = newOP(OP_PADSV, 0); 10251 o->op_targ = tmp; 10252 PL_lex_op = readline_overriden 10253 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 10254 op_append_elem(OP_LIST, o, 10255 newCVREF(0, newGVOP(OP_GV,0,gv_readline)))) 10256 : (OP*)newUNOP(OP_READLINE, 0, o); 10257 } 10258 } 10259 else { 10260 GV *gv; 10261 ++d; 10262 intro_sym: 10263 gv = gv_fetchpv(d, 10264 (PL_in_eval 10265 ? (GV_ADDMULTI | GV_ADDINEVAL) 10266 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ), 10267 SVt_PV); 10268 PL_lex_op = readline_overriden 10269 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 10270 op_append_elem(OP_LIST, 10271 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)), 10272 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 10273 : (OP*)newUNOP(OP_READLINE, 0, 10274 newUNOP(OP_RV2SV, 0, 10275 newGVOP(OP_GV, 0, gv))); 10276 } 10277 if (!readline_overriden) 10278 PL_lex_op->op_flags |= OPf_SPECIAL; 10279 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */ 10280 pl_yylval.ival = OP_NULL; 10281 } 10282 10283 /* If it's none of the above, it must be a literal filehandle 10284 (<Foo::BAR> or <FOO>) so build a simple readline OP */ 10285 else { 10286 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO); 10287 PL_lex_op = readline_overriden 10288 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 10289 op_append_elem(OP_LIST, 10290 newGVOP(OP_GV, 0, gv), 10291 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 10292 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv)); 10293 pl_yylval.ival = OP_NULL; 10294 } 10295 } 10296 10297 return s; 10298 } 10299 10300 10301 /* scan_str 10302 takes: start position in buffer 10303 keep_quoted preserve \ on the embedded delimiter(s) 10304 keep_delims preserve the delimiters around the string 10305 re_reparse compiling a run-time /(?{})/: 10306 collapse // to /, and skip encoding src 10307 returns: position to continue reading from buffer 10308 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and 10309 updates the read buffer. 10310 10311 This subroutine pulls a string out of the input. It is called for: 10312 q single quotes q(literal text) 10313 ' single quotes 'literal text' 10314 qq double quotes qq(interpolate $here please) 10315 " double quotes "interpolate $here please" 10316 qx backticks qx(/bin/ls -l) 10317 ` backticks `/bin/ls -l` 10318 qw quote words @EXPORT_OK = qw( func() $spam ) 10319 m// regexp match m/this/ 10320 s/// regexp substitute s/this/that/ 10321 tr/// string transliterate tr/this/that/ 10322 y/// string transliterate y/this/that/ 10323 ($*@) sub prototypes sub foo ($) 10324 (stuff) sub attr parameters sub foo : attr(stuff) 10325 <> readline or globs <FOO>, <>, <$fh>, or <*.c> 10326 10327 In most of these cases (all but <>, patterns and transliterate) 10328 yylex() calls scan_str(). m// makes yylex() call scan_pat() which 10329 calls scan_str(). s/// makes yylex() call scan_subst() which calls 10330 scan_str(). tr/// and y/// make yylex() call scan_trans() which 10331 calls scan_str(). 10332 10333 It skips whitespace before the string starts, and treats the first 10334 character as the delimiter. If the delimiter is one of ([{< then 10335 the corresponding "close" character )]}> is used as the closing 10336 delimiter. It allows quoting of delimiters, and if the string has 10337 balanced delimiters ([{<>}]) it allows nesting. 10338 10339 On success, the SV with the resulting string is put into lex_stuff or, 10340 if that is already non-NULL, into lex_repl. The second case occurs only 10341 when parsing the RHS of the special constructs s/// and tr/// (y///). 10342 For convenience, the terminating delimiter character is stuffed into 10343 SvIVX of the SV. 10344 */ 10345 10346 STATIC char * 10347 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse, 10348 bool deprecate_escaped_meta /* Should we issue a deprecation warning 10349 for certain paired metacharacters that 10350 appear escaped within it */ 10351 ) 10352 { 10353 dVAR; 10354 SV *sv; /* scalar value: string */ 10355 const char *tmps; /* temp string, used for delimiter matching */ 10356 char *s = start; /* current position in the buffer */ 10357 char term; /* terminating character */ 10358 char *to; /* current position in the sv's data */ 10359 I32 brackets = 1; /* bracket nesting level */ 10360 bool has_utf8 = FALSE; /* is there any utf8 content? */ 10361 I32 termcode; /* terminating char. code */ 10362 U8 termstr[UTF8_MAXBYTES]; /* terminating string */ 10363 STRLEN termlen; /* length of terminating string */ 10364 int last_off = 0; /* last position for nesting bracket */ 10365 char *escaped_open = NULL; 10366 #ifdef PERL_MAD 10367 int stuffstart; 10368 char *tstart; 10369 #endif 10370 10371 PERL_ARGS_ASSERT_SCAN_STR; 10372 10373 /* skip space before the delimiter */ 10374 if (isSPACE(*s)) { 10375 s = PEEKSPACE(s); 10376 } 10377 10378 #ifdef PERL_MAD 10379 if (PL_realtokenstart >= 0) { 10380 stuffstart = PL_realtokenstart; 10381 PL_realtokenstart = -1; 10382 } 10383 else 10384 stuffstart = start - SvPVX(PL_linestr); 10385 #endif 10386 /* mark where we are, in case we need to report errors */ 10387 CLINE; 10388 10389 /* after skipping whitespace, the next character is the terminator */ 10390 term = *s; 10391 if (!UTF) { 10392 termcode = termstr[0] = term; 10393 termlen = 1; 10394 } 10395 else { 10396 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen); 10397 Copy(s, termstr, termlen, U8); 10398 if (!UTF8_IS_INVARIANT(term)) 10399 has_utf8 = TRUE; 10400 } 10401 10402 /* mark where we are */ 10403 PL_multi_start = CopLINE(PL_curcop); 10404 PL_multi_open = term; 10405 10406 /* find corresponding closing delimiter */ 10407 if (term && (tmps = strchr("([{< )]}> )]}>",term))) 10408 termcode = termstr[0] = term = tmps[5]; 10409 10410 PL_multi_close = term; 10411 10412 /* A warning is raised if the input parameter requires it for escaped (by a 10413 * backslash) paired metacharacters {} [] and () when the delimiters are 10414 * those same characters, and the backslash is ineffective. This doesn't 10415 * happen for <>, as they aren't metas. */ 10416 if (deprecate_escaped_meta 10417 && (PL_multi_open == PL_multi_close 10418 || ! ckWARN_d(WARN_DEPRECATED) 10419 || PL_multi_open == '<')) 10420 { 10421 deprecate_escaped_meta = FALSE; 10422 } 10423 10424 /* create a new SV to hold the contents. 79 is the SV's initial length. 10425 What a random number. */ 10426 sv = newSV_type(SVt_PVIV); 10427 SvGROW(sv, 80); 10428 SvIV_set(sv, termcode); 10429 (void)SvPOK_only(sv); /* validate pointer */ 10430 10431 /* move past delimiter and try to read a complete string */ 10432 if (keep_delims) 10433 sv_catpvn(sv, s, termlen); 10434 s += termlen; 10435 #ifdef PERL_MAD 10436 tstart = SvPVX(PL_linestr) + stuffstart; 10437 if (PL_madskills && !PL_thisopen && !keep_delims) { 10438 PL_thisopen = newSVpvn(tstart, s - tstart); 10439 stuffstart = s - SvPVX(PL_linestr); 10440 } 10441 #endif 10442 for (;;) { 10443 if (PL_encoding && !UTF && !re_reparse) { 10444 bool cont = TRUE; 10445 10446 while (cont) { 10447 int offset = s - SvPVX_const(PL_linestr); 10448 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr, 10449 &offset, (char*)termstr, termlen); 10450 const char * const ns = SvPVX_const(PL_linestr) + offset; 10451 char * const svlast = SvEND(sv) - 1; 10452 10453 for (; s < ns; s++) { 10454 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 10455 COPLINE_INC_WITH_HERELINES; 10456 } 10457 if (!found) 10458 goto read_more_line; 10459 else { 10460 /* handle quoted delimiters */ 10461 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') { 10462 const char *t; 10463 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';) 10464 t--; 10465 if ((svlast-1 - t) % 2) { 10466 if (!keep_quoted) { 10467 *(svlast-1) = term; 10468 *svlast = '\0'; 10469 SvCUR_set(sv, SvCUR(sv) - 1); 10470 } 10471 continue; 10472 } 10473 } 10474 if (PL_multi_open == PL_multi_close) { 10475 cont = FALSE; 10476 } 10477 else { 10478 const char *t; 10479 char *w; 10480 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) { 10481 /* At here, all closes are "was quoted" one, 10482 so we don't check PL_multi_close. */ 10483 if (*t == '\\') { 10484 if (!keep_quoted && *(t+1) == PL_multi_open) 10485 t++; 10486 else 10487 *w++ = *t++; 10488 } 10489 else if (*t == PL_multi_open) 10490 brackets++; 10491 10492 *w = *t; 10493 } 10494 if (w < t) { 10495 *w++ = term; 10496 *w = '\0'; 10497 SvCUR_set(sv, w - SvPVX_const(sv)); 10498 } 10499 last_off = w - SvPVX(sv); 10500 if (--brackets <= 0) 10501 cont = FALSE; 10502 } 10503 } 10504 } 10505 if (!keep_delims) { 10506 SvCUR_set(sv, SvCUR(sv) - 1); 10507 *SvEND(sv) = '\0'; 10508 } 10509 break; 10510 } 10511 10512 /* extend sv if need be */ 10513 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); 10514 /* set 'to' to the next character in the sv's string */ 10515 to = SvPVX(sv)+SvCUR(sv); 10516 10517 /* if open delimiter is the close delimiter read unbridle */ 10518 if (PL_multi_open == PL_multi_close) { 10519 for (; s < PL_bufend; s++,to++) { 10520 /* embedded newlines increment the current line number */ 10521 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 10522 COPLINE_INC_WITH_HERELINES; 10523 /* handle quoted delimiters */ 10524 if (*s == '\\' && s+1 < PL_bufend && term != '\\') { 10525 if (!keep_quoted 10526 && (s[1] == term 10527 || (re_reparse && s[1] == '\\')) 10528 ) 10529 s++; 10530 /* any other quotes are simply copied straight through */ 10531 else 10532 *to++ = *s++; 10533 } 10534 /* terminate when run out of buffer (the for() condition), or 10535 have found the terminator */ 10536 else if (*s == term) { 10537 if (termlen == 1) 10538 break; 10539 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen)) 10540 break; 10541 } 10542 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) 10543 has_utf8 = TRUE; 10544 *to = *s; 10545 } 10546 } 10547 10548 /* if the terminator isn't the same as the start character (e.g., 10549 matched brackets), we have to allow more in the quoting, and 10550 be prepared for nested brackets. 10551 */ 10552 else { 10553 /* read until we run out of string, or we find the terminator */ 10554 for (; s < PL_bufend; s++,to++) { 10555 /* embedded newlines increment the line count */ 10556 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 10557 COPLINE_INC_WITH_HERELINES; 10558 /* backslashes can escape the open or closing characters */ 10559 if (*s == '\\' && s+1 < PL_bufend) { 10560 if (!keep_quoted && 10561 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close))) 10562 { 10563 s++; 10564 10565 /* Here, 'deprecate_escaped_meta' is true iff the 10566 * delimiters are paired metacharacters, and 's' points 10567 * to an occurrence of one of them within the string, 10568 * which was preceded by a backslash. If this is a 10569 * context where the delimiter is also a metacharacter, 10570 * the backslash is useless, and deprecated. () and [] 10571 * are meta in any context. {} are meta only when 10572 * appearing in a quantifier or in things like '\p{'. 10573 * They also aren't meta unless there is a matching 10574 * closed, escaped char later on within the string. 10575 * If 's' points to an open, set a flag; if to a close, 10576 * test that flag, and raise a warning if it was set */ 10577 10578 if (deprecate_escaped_meta) { 10579 if (*s == PL_multi_open) { 10580 if (*s != '{') { 10581 escaped_open = s; 10582 } 10583 else if (regcurly(s, 10584 TRUE /* Look for a closing 10585 '\}' */) 10586 || (s - start > 2 /* Look for e.g. 10587 '\x{' */ 10588 && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META))) 10589 { 10590 escaped_open = s; 10591 } 10592 } 10593 else if (escaped_open) { 10594 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 10595 "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open); 10596 escaped_open = NULL; 10597 } 10598 } 10599 } 10600 else 10601 *to++ = *s++; 10602 } 10603 /* allow nested opens and closes */ 10604 else if (*s == PL_multi_close && --brackets <= 0) 10605 break; 10606 else if (*s == PL_multi_open) 10607 brackets++; 10608 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) 10609 has_utf8 = TRUE; 10610 *to = *s; 10611 } 10612 } 10613 /* terminate the copied string and update the sv's end-of-string */ 10614 *to = '\0'; 10615 SvCUR_set(sv, to - SvPVX_const(sv)); 10616 10617 /* 10618 * this next chunk reads more into the buffer if we're not done yet 10619 */ 10620 10621 if (s < PL_bufend) 10622 break; /* handle case where we are done yet :-) */ 10623 10624 #ifndef PERL_STRICT_CR 10625 if (to - SvPVX_const(sv) >= 2) { 10626 if ((to[-2] == '\r' && to[-1] == '\n') || 10627 (to[-2] == '\n' && to[-1] == '\r')) 10628 { 10629 to[-2] = '\n'; 10630 to--; 10631 SvCUR_set(sv, to - SvPVX_const(sv)); 10632 } 10633 else if (to[-1] == '\r') 10634 to[-1] = '\n'; 10635 } 10636 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r') 10637 to[-1] = '\n'; 10638 #endif 10639 10640 read_more_line: 10641 /* if we're out of file, or a read fails, bail and reset the current 10642 line marker so we can report where the unterminated string began 10643 */ 10644 #ifdef PERL_MAD 10645 if (PL_madskills) { 10646 char * const tstart = SvPVX(PL_linestr) + stuffstart; 10647 if (PL_thisstuff) 10648 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart); 10649 else 10650 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart); 10651 } 10652 #endif 10653 COPLINE_INC_WITH_HERELINES; 10654 PL_bufptr = PL_bufend; 10655 if (!lex_next_chunk(0)) { 10656 sv_free(sv); 10657 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 10658 return NULL; 10659 } 10660 s = PL_bufptr; 10661 #ifdef PERL_MAD 10662 stuffstart = 0; 10663 #endif 10664 } 10665 10666 /* at this point, we have successfully read the delimited string */ 10667 10668 if (!PL_encoding || UTF || re_reparse) { 10669 #ifdef PERL_MAD 10670 if (PL_madskills) { 10671 char * const tstart = SvPVX(PL_linestr) + stuffstart; 10672 const int len = s - tstart; 10673 if (PL_thisstuff) 10674 sv_catpvn(PL_thisstuff, tstart, len); 10675 else 10676 PL_thisstuff = newSVpvn(tstart, len); 10677 if (!PL_thisclose && !keep_delims) 10678 PL_thisclose = newSVpvn(s,termlen); 10679 } 10680 #endif 10681 10682 if (keep_delims) 10683 sv_catpvn(sv, s, termlen); 10684 s += termlen; 10685 } 10686 #ifdef PERL_MAD 10687 else { 10688 if (PL_madskills) { 10689 char * const tstart = SvPVX(PL_linestr) + stuffstart; 10690 const int len = s - tstart - termlen; 10691 if (PL_thisstuff) 10692 sv_catpvn(PL_thisstuff, tstart, len); 10693 else 10694 PL_thisstuff = newSVpvn(tstart, len); 10695 if (!PL_thisclose && !keep_delims) 10696 PL_thisclose = newSVpvn(s - termlen,termlen); 10697 } 10698 } 10699 #endif 10700 if (has_utf8 || (PL_encoding && !re_reparse)) 10701 SvUTF8_on(sv); 10702 10703 PL_multi_end = CopLINE(PL_curcop); 10704 10705 /* if we allocated too much space, give some back */ 10706 if (SvCUR(sv) + 5 < SvLEN(sv)) { 10707 SvLEN_set(sv, SvCUR(sv) + 1); 10708 SvPV_renew(sv, SvLEN(sv)); 10709 } 10710 10711 /* decide whether this is the first or second quoted string we've read 10712 for this op 10713 */ 10714 10715 if (PL_lex_stuff) 10716 PL_sublex_info.repl = sv; 10717 else 10718 PL_lex_stuff = sv; 10719 return s; 10720 } 10721 10722 /* 10723 scan_num 10724 takes: pointer to position in buffer 10725 returns: pointer to new position in buffer 10726 side-effects: builds ops for the constant in pl_yylval.op 10727 10728 Read a number in any of the formats that Perl accepts: 10729 10730 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12. 10731 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34 10732 0b[01](_?[01])* 10733 0[0-7](_?[0-7])* 10734 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* 10735 10736 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the 10737 thing it reads. 10738 10739 If it reads a number without a decimal point or an exponent, it will 10740 try converting the number to an integer and see if it can do so 10741 without loss of precision. 10742 */ 10743 10744 char * 10745 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) 10746 { 10747 dVAR; 10748 const char *s = start; /* current position in buffer */ 10749 char *d; /* destination in temp buffer */ 10750 char *e; /* end of temp buffer */ 10751 NV nv; /* number read, as a double */ 10752 SV *sv = NULL; /* place to put the converted number */ 10753 bool floatit; /* boolean: int or float? */ 10754 const char *lastub = NULL; /* position of last underbar */ 10755 static const char* const number_too_long = "Number too long"; 10756 10757 PERL_ARGS_ASSERT_SCAN_NUM; 10758 10759 /* We use the first character to decide what type of number this is */ 10760 10761 switch (*s) { 10762 default: 10763 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s); 10764 10765 /* if it starts with a 0, it could be an octal number, a decimal in 10766 0.13 disguise, or a hexadecimal number, or a binary number. */ 10767 case '0': 10768 { 10769 /* variables: 10770 u holds the "number so far" 10771 shift the power of 2 of the base 10772 (hex == 4, octal == 3, binary == 1) 10773 overflowed was the number more than we can hold? 10774 10775 Shift is used when we add a digit. It also serves as an "are 10776 we in octal/hex/binary?" indicator to disallow hex characters 10777 when in octal mode. 10778 */ 10779 NV n = 0.0; 10780 UV u = 0; 10781 I32 shift; 10782 bool overflowed = FALSE; 10783 bool just_zero = TRUE; /* just plain 0 or binary number? */ 10784 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 }; 10785 static const char* const bases[5] = 10786 { "", "binary", "", "octal", "hexadecimal" }; 10787 static const char* const Bases[5] = 10788 { "", "Binary", "", "Octal", "Hexadecimal" }; 10789 static const char* const maxima[5] = 10790 { "", 10791 "0b11111111111111111111111111111111", 10792 "", 10793 "037777777777", 10794 "0xffffffff" }; 10795 const char *base, *Base, *max; 10796 10797 /* check for hex */ 10798 if (s[1] == 'x' || s[1] == 'X') { 10799 shift = 4; 10800 s += 2; 10801 just_zero = FALSE; 10802 } else if (s[1] == 'b' || s[1] == 'B') { 10803 shift = 1; 10804 s += 2; 10805 just_zero = FALSE; 10806 } 10807 /* check for a decimal in disguise */ 10808 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E') 10809 goto decimal; 10810 /* so it must be octal */ 10811 else { 10812 shift = 3; 10813 s++; 10814 } 10815 10816 if (*s == '_') { 10817 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10818 "Misplaced _ in number"); 10819 lastub = s++; 10820 } 10821 10822 base = bases[shift]; 10823 Base = Bases[shift]; 10824 max = maxima[shift]; 10825 10826 /* read the rest of the number */ 10827 for (;;) { 10828 /* x is used in the overflow test, 10829 b is the digit we're adding on. */ 10830 UV x, b; 10831 10832 switch (*s) { 10833 10834 /* if we don't mention it, we're done */ 10835 default: 10836 goto out; 10837 10838 /* _ are ignored -- but warned about if consecutive */ 10839 case '_': 10840 if (lastub && s == lastub + 1) 10841 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10842 "Misplaced _ in number"); 10843 lastub = s++; 10844 break; 10845 10846 /* 8 and 9 are not octal */ 10847 case '8': case '9': 10848 if (shift == 3) 10849 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s)); 10850 /* FALL THROUGH */ 10851 10852 /* octal digits */ 10853 case '2': case '3': case '4': 10854 case '5': case '6': case '7': 10855 if (shift == 1) 10856 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s)); 10857 /* FALL THROUGH */ 10858 10859 case '0': case '1': 10860 b = *s++ & 15; /* ASCII digit -> value of digit */ 10861 goto digit; 10862 10863 /* hex digits */ 10864 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 10865 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 10866 /* make sure they said 0x */ 10867 if (shift != 4) 10868 goto out; 10869 b = (*s++ & 7) + 9; 10870 10871 /* Prepare to put the digit we have onto the end 10872 of the number so far. We check for overflows. 10873 */ 10874 10875 digit: 10876 just_zero = FALSE; 10877 if (!overflowed) { 10878 x = u << shift; /* make room for the digit */ 10879 10880 if ((x >> shift) != u 10881 && !(PL_hints & HINT_NEW_BINARY)) { 10882 overflowed = TRUE; 10883 n = (NV) u; 10884 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 10885 "Integer overflow in %s number", 10886 base); 10887 } else 10888 u = x | b; /* add the digit to the end */ 10889 } 10890 if (overflowed) { 10891 n *= nvshift[shift]; 10892 /* If an NV has not enough bits in its 10893 * mantissa to represent an UV this summing of 10894 * small low-order numbers is a waste of time 10895 * (because the NV cannot preserve the 10896 * low-order bits anyway): we could just 10897 * remember when did we overflow and in the 10898 * end just multiply n by the right 10899 * amount. */ 10900 n += (NV) b; 10901 } 10902 break; 10903 } 10904 } 10905 10906 /* if we get here, we had success: make a scalar value from 10907 the number. 10908 */ 10909 out: 10910 10911 /* final misplaced underbar check */ 10912 if (s[-1] == '_') { 10913 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); 10914 } 10915 10916 if (overflowed) { 10917 if (n > 4294967295.0) 10918 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 10919 "%s number > %s non-portable", 10920 Base, max); 10921 sv = newSVnv(n); 10922 } 10923 else { 10924 #if UVSIZE > 4 10925 if (u > 0xffffffff) 10926 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 10927 "%s number > %s non-portable", 10928 Base, max); 10929 #endif 10930 sv = newSVuv(u); 10931 } 10932 if (just_zero && (PL_hints & HINT_NEW_INTEGER)) 10933 sv = new_constant(start, s - start, "integer", 10934 sv, NULL, NULL, 0); 10935 else if (PL_hints & HINT_NEW_BINARY) 10936 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0); 10937 } 10938 break; 10939 10940 /* 10941 handle decimal numbers. 10942 we're also sent here when we read a 0 as the first digit 10943 */ 10944 case '1': case '2': case '3': case '4': case '5': 10945 case '6': case '7': case '8': case '9': case '.': 10946 decimal: 10947 d = PL_tokenbuf; 10948 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */ 10949 floatit = FALSE; 10950 10951 /* read next group of digits and _ and copy into d */ 10952 while (isDIGIT(*s) || *s == '_') { 10953 /* skip underscores, checking for misplaced ones 10954 if -w is on 10955 */ 10956 if (*s == '_') { 10957 if (lastub && s == lastub + 1) 10958 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10959 "Misplaced _ in number"); 10960 lastub = s++; 10961 } 10962 else { 10963 /* check for end of fixed-length buffer */ 10964 if (d >= e) 10965 Perl_croak(aTHX_ "%s", number_too_long); 10966 /* if we're ok, copy the character */ 10967 *d++ = *s++; 10968 } 10969 } 10970 10971 /* final misplaced underbar check */ 10972 if (lastub && s == lastub + 1) { 10973 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); 10974 } 10975 10976 /* read a decimal portion if there is one. avoid 10977 3..5 being interpreted as the number 3. followed 10978 by .5 10979 */ 10980 if (*s == '.' && s[1] != '.') { 10981 floatit = TRUE; 10982 *d++ = *s++; 10983 10984 if (*s == '_') { 10985 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10986 "Misplaced _ in number"); 10987 lastub = s; 10988 } 10989 10990 /* copy, ignoring underbars, until we run out of digits. 10991 */ 10992 for (; isDIGIT(*s) || *s == '_'; s++) { 10993 /* fixed length buffer check */ 10994 if (d >= e) 10995 Perl_croak(aTHX_ "%s", number_too_long); 10996 if (*s == '_') { 10997 if (lastub && s == lastub + 1) 10998 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10999 "Misplaced _ in number"); 11000 lastub = s; 11001 } 11002 else 11003 *d++ = *s; 11004 } 11005 /* fractional part ending in underbar? */ 11006 if (s[-1] == '_') { 11007 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11008 "Misplaced _ in number"); 11009 } 11010 if (*s == '.' && isDIGIT(s[1])) { 11011 /* oops, it's really a v-string, but without the "v" */ 11012 s = start; 11013 goto vstring; 11014 } 11015 } 11016 11017 /* read exponent part, if present */ 11018 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) { 11019 floatit = TRUE; 11020 s++; 11021 11022 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */ 11023 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */ 11024 11025 /* stray preinitial _ */ 11026 if (*s == '_') { 11027 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11028 "Misplaced _ in number"); 11029 lastub = s++; 11030 } 11031 11032 /* allow positive or negative exponent */ 11033 if (*s == '+' || *s == '-') 11034 *d++ = *s++; 11035 11036 /* stray initial _ */ 11037 if (*s == '_') { 11038 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11039 "Misplaced _ in number"); 11040 lastub = s++; 11041 } 11042 11043 /* read digits of exponent */ 11044 while (isDIGIT(*s) || *s == '_') { 11045 if (isDIGIT(*s)) { 11046 if (d >= e) 11047 Perl_croak(aTHX_ "%s", number_too_long); 11048 *d++ = *s++; 11049 } 11050 else { 11051 if (((lastub && s == lastub + 1) || 11052 (!isDIGIT(s[1]) && s[1] != '_'))) 11053 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11054 "Misplaced _ in number"); 11055 lastub = s++; 11056 } 11057 } 11058 } 11059 11060 11061 /* 11062 We try to do an integer conversion first if no characters 11063 indicating "float" have been found. 11064 */ 11065 11066 if (!floatit) { 11067 UV uv; 11068 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv); 11069 11070 if (flags == IS_NUMBER_IN_UV) { 11071 if (uv <= IV_MAX) 11072 sv = newSViv(uv); /* Prefer IVs over UVs. */ 11073 else 11074 sv = newSVuv(uv); 11075 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) { 11076 if (uv <= (UV) IV_MIN) 11077 sv = newSViv(-(IV)uv); 11078 else 11079 floatit = TRUE; 11080 } else 11081 floatit = TRUE; 11082 } 11083 if (floatit) { 11084 /* terminate the string */ 11085 *d = '\0'; 11086 nv = Atof(PL_tokenbuf); 11087 sv = newSVnv(nv); 11088 } 11089 11090 if ( floatit 11091 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) { 11092 const char *const key = floatit ? "float" : "integer"; 11093 const STRLEN keylen = floatit ? 5 : 7; 11094 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf, 11095 key, keylen, sv, NULL, NULL, 0); 11096 } 11097 break; 11098 11099 /* if it starts with a v, it could be a v-string */ 11100 case 'v': 11101 vstring: 11102 sv = newSV(5); /* preallocate storage space */ 11103 ENTER_with_name("scan_vstring"); 11104 SAVEFREESV(sv); 11105 s = scan_vstring(s, PL_bufend, sv); 11106 SvREFCNT_inc_simple_void_NN(sv); 11107 LEAVE_with_name("scan_vstring"); 11108 break; 11109 } 11110 11111 /* make the op for the constant and return */ 11112 11113 if (sv) 11114 lvalp->opval = newSVOP(OP_CONST, 0, sv); 11115 else 11116 lvalp->opval = NULL; 11117 11118 return (char *)s; 11119 } 11120 11121 STATIC char * 11122 S_scan_formline(pTHX_ char *s) 11123 { 11124 dVAR; 11125 char *eol; 11126 char *t; 11127 SV * const stuff = newSVpvs(""); 11128 bool needargs = FALSE; 11129 bool eofmt = FALSE; 11130 #ifdef PERL_MAD 11131 char *tokenstart = s; 11132 SV* savewhite = NULL; 11133 11134 if (PL_madskills) { 11135 savewhite = PL_thiswhite; 11136 PL_thiswhite = 0; 11137 } 11138 #endif 11139 11140 PERL_ARGS_ASSERT_SCAN_FORMLINE; 11141 11142 while (!needargs) { 11143 if (*s == '.') { 11144 t = s+1; 11145 #ifdef PERL_STRICT_CR 11146 while (SPACE_OR_TAB(*t)) 11147 t++; 11148 #else 11149 while (SPACE_OR_TAB(*t) || *t == '\r') 11150 t++; 11151 #endif 11152 if (*t == '\n' || t == PL_bufend) { 11153 eofmt = TRUE; 11154 break; 11155 } 11156 } 11157 eol = (char *) memchr(s,'\n',PL_bufend-s); 11158 if (!eol++) 11159 eol = PL_bufend; 11160 if (*s != '#') { 11161 for (t = s; t < eol; t++) { 11162 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { 11163 needargs = FALSE; 11164 goto enough; /* ~~ must be first line in formline */ 11165 } 11166 if (*t == '@' || *t == '^') 11167 needargs = TRUE; 11168 } 11169 if (eol > s) { 11170 sv_catpvn(stuff, s, eol-s); 11171 #ifndef PERL_STRICT_CR 11172 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') { 11173 char *end = SvPVX(stuff) + SvCUR(stuff); 11174 end[-2] = '\n'; 11175 end[-1] = '\0'; 11176 SvCUR_set(stuff, SvCUR(stuff) - 1); 11177 } 11178 #endif 11179 } 11180 else 11181 break; 11182 } 11183 s = (char*)eol; 11184 if ((PL_rsfp || PL_parser->filtered) 11185 && PL_parser->form_lex_state == LEX_NORMAL) { 11186 bool got_some; 11187 #ifdef PERL_MAD 11188 if (PL_madskills) { 11189 if (PL_thistoken) 11190 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart); 11191 else 11192 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart); 11193 } 11194 #endif 11195 PL_bufptr = PL_bufend; 11196 COPLINE_INC_WITH_HERELINES; 11197 got_some = lex_next_chunk(0); 11198 CopLINE_dec(PL_curcop); 11199 s = PL_bufptr; 11200 #ifdef PERL_MAD 11201 tokenstart = PL_bufptr; 11202 #endif 11203 if (!got_some) 11204 break; 11205 } 11206 incline(s); 11207 } 11208 enough: 11209 if (!SvCUR(stuff) || needargs) 11210 PL_lex_state = PL_parser->form_lex_state; 11211 if (SvCUR(stuff)) { 11212 PL_expect = XSTATE; 11213 if (needargs) { 11214 start_force(PL_curforce); 11215 NEXTVAL_NEXTTOKE.ival = 0; 11216 force_next(FORMLBRACK); 11217 } 11218 if (!IN_BYTES) { 11219 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff))) 11220 SvUTF8_on(stuff); 11221 else if (PL_encoding) 11222 sv_recode_to_utf8(stuff, PL_encoding); 11223 } 11224 start_force(PL_curforce); 11225 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff); 11226 force_next(THING); 11227 } 11228 else { 11229 SvREFCNT_dec(stuff); 11230 if (eofmt) 11231 PL_lex_formbrack = 0; 11232 } 11233 #ifdef PERL_MAD 11234 if (PL_madskills) { 11235 if (PL_thistoken) 11236 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart); 11237 else 11238 PL_thistoken = newSVpvn(tokenstart, s - tokenstart); 11239 PL_thiswhite = savewhite; 11240 } 11241 #endif 11242 return s; 11243 } 11244 11245 I32 11246 Perl_start_subparse(pTHX_ I32 is_format, U32 flags) 11247 { 11248 dVAR; 11249 const I32 oldsavestack_ix = PL_savestack_ix; 11250 CV* const outsidecv = PL_compcv; 11251 11252 SAVEI32(PL_subline); 11253 save_item(PL_subname); 11254 SAVESPTR(PL_compcv); 11255 11256 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV)); 11257 CvFLAGS(PL_compcv) |= flags; 11258 11259 PL_subline = CopLINE(PL_curcop); 11260 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB); 11261 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv)); 11262 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; 11263 if (outsidecv && CvPADLIST(outsidecv)) 11264 CvPADLIST(PL_compcv)->xpadl_outid = 11265 PadlistNAMES(CvPADLIST(outsidecv)); 11266 11267 return oldsavestack_ix; 11268 } 11269 11270 #ifdef __SC__ 11271 #pragma segment Perl_yylex 11272 #endif 11273 static int 11274 S_yywarn(pTHX_ const char *const s, U32 flags) 11275 { 11276 dVAR; 11277 11278 PERL_ARGS_ASSERT_YYWARN; 11279 11280 PL_in_eval |= EVAL_WARNONLY; 11281 yyerror_pv(s, flags); 11282 PL_in_eval &= ~EVAL_WARNONLY; 11283 return 0; 11284 } 11285 11286 int 11287 Perl_yyerror(pTHX_ const char *const s) 11288 { 11289 PERL_ARGS_ASSERT_YYERROR; 11290 return yyerror_pvn(s, strlen(s), 0); 11291 } 11292 11293 int 11294 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags) 11295 { 11296 PERL_ARGS_ASSERT_YYERROR_PV; 11297 return yyerror_pvn(s, strlen(s), flags); 11298 } 11299 11300 int 11301 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) 11302 { 11303 dVAR; 11304 const char *context = NULL; 11305 int contlen = -1; 11306 SV *msg; 11307 SV * const where_sv = newSVpvs_flags("", SVs_TEMP); 11308 int yychar = PL_parser->yychar; 11309 11310 PERL_ARGS_ASSERT_YYERROR_PVN; 11311 11312 if (!yychar || (yychar == ';' && !PL_rsfp)) 11313 sv_catpvs(where_sv, "at EOF"); 11314 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr && 11315 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr && 11316 PL_oldbufptr != PL_bufptr) { 11317 /* 11318 Only for NetWare: 11319 The code below is removed for NetWare because it abends/crashes on NetWare 11320 when the script has error such as not having the closing quotes like: 11321 if ($var eq "value) 11322 Checking of white spaces is anyway done in NetWare code. 11323 */ 11324 #ifndef NETWARE 11325 while (isSPACE(*PL_oldoldbufptr)) 11326 PL_oldoldbufptr++; 11327 #endif 11328 context = PL_oldoldbufptr; 11329 contlen = PL_bufptr - PL_oldoldbufptr; 11330 } 11331 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr && 11332 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) { 11333 /* 11334 Only for NetWare: 11335 The code below is removed for NetWare because it abends/crashes on NetWare 11336 when the script has error such as not having the closing quotes like: 11337 if ($var eq "value) 11338 Checking of white spaces is anyway done in NetWare code. 11339 */ 11340 #ifndef NETWARE 11341 while (isSPACE(*PL_oldbufptr)) 11342 PL_oldbufptr++; 11343 #endif 11344 context = PL_oldbufptr; 11345 contlen = PL_bufptr - PL_oldbufptr; 11346 } 11347 else if (yychar > 255) 11348 sv_catpvs(where_sv, "next token ???"); 11349 else if (yychar == -2) { /* YYEMPTY */ 11350 if (PL_lex_state == LEX_NORMAL || 11351 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL)) 11352 sv_catpvs(where_sv, "at end of line"); 11353 else if (PL_lex_inpat) 11354 sv_catpvs(where_sv, "within pattern"); 11355 else 11356 sv_catpvs(where_sv, "within string"); 11357 } 11358 else { 11359 sv_catpvs(where_sv, "next char "); 11360 if (yychar < 32) 11361 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); 11362 else if (isPRINT_LC(yychar)) { 11363 const char string = yychar; 11364 sv_catpvn(where_sv, &string, 1); 11365 } 11366 else 11367 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); 11368 } 11369 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP); 11370 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ", 11371 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop)); 11372 if (context) 11373 Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n", 11374 SVfARG(newSVpvn_flags(context, contlen, 11375 SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); 11376 else 11377 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv)); 11378 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) { 11379 Perl_sv_catpvf(aTHX_ msg, 11380 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n", 11381 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start); 11382 PL_multi_end = 0; 11383 } 11384 if (PL_in_eval & EVAL_WARNONLY) { 11385 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg)); 11386 } 11387 else 11388 qerror(msg); 11389 if (PL_error_count >= 10) { 11390 SV * errsv; 11391 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv))) 11392 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n", 11393 SVfARG(errsv), OutCopFILE(PL_curcop)); 11394 else 11395 Perl_croak(aTHX_ "%s has too many errors.\n", 11396 OutCopFILE(PL_curcop)); 11397 } 11398 PL_in_my = 0; 11399 PL_in_my_stash = NULL; 11400 return 0; 11401 } 11402 #ifdef __SC__ 11403 #pragma segment Main 11404 #endif 11405 11406 STATIC char* 11407 S_swallow_bom(pTHX_ U8 *s) 11408 { 11409 dVAR; 11410 const STRLEN slen = SvCUR(PL_linestr); 11411 11412 PERL_ARGS_ASSERT_SWALLOW_BOM; 11413 11414 switch (s[0]) { 11415 case 0xFF: 11416 if (s[1] == 0xFE) { 11417 /* UTF-16 little-endian? (or UTF-32LE?) */ 11418 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ 11419 /* diag_listed_as: Unsupported script encoding %s */ 11420 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE"); 11421 #ifndef PERL_NO_UTF16_FILTER 11422 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n"); 11423 s += 2; 11424 if (PL_bufend > (char*)s) { 11425 s = add_utf16_textfilter(s, TRUE); 11426 } 11427 #else 11428 /* diag_listed_as: Unsupported script encoding %s */ 11429 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 11430 #endif 11431 } 11432 break; 11433 case 0xFE: 11434 if (s[1] == 0xFF) { /* UTF-16 big-endian? */ 11435 #ifndef PERL_NO_UTF16_FILTER 11436 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); 11437 s += 2; 11438 if (PL_bufend > (char *)s) { 11439 s = add_utf16_textfilter(s, FALSE); 11440 } 11441 #else 11442 /* diag_listed_as: Unsupported script encoding %s */ 11443 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 11444 #endif 11445 } 11446 break; 11447 case 0xEF: 11448 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) { 11449 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); 11450 s += 3; /* UTF-8 */ 11451 } 11452 break; 11453 case 0: 11454 if (slen > 3) { 11455 if (s[1] == 0) { 11456 if (s[2] == 0xFE && s[3] == 0xFF) { 11457 /* UTF-32 big-endian */ 11458 /* diag_listed_as: Unsupported script encoding %s */ 11459 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE"); 11460 } 11461 } 11462 else if (s[2] == 0 && s[3] != 0) { 11463 /* Leading bytes 11464 * 00 xx 00 xx 11465 * are a good indicator of UTF-16BE. */ 11466 #ifndef PERL_NO_UTF16_FILTER 11467 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); 11468 s = add_utf16_textfilter(s, FALSE); 11469 #else 11470 /* diag_listed_as: Unsupported script encoding %s */ 11471 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 11472 #endif 11473 } 11474 } 11475 #ifdef EBCDIC 11476 case 0xDD: 11477 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) { 11478 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); 11479 s += 4; /* UTF-8 */ 11480 } 11481 break; 11482 #endif 11483 11484 default: 11485 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) { 11486 /* Leading bytes 11487 * xx 00 xx 00 11488 * are a good indicator of UTF-16LE. */ 11489 #ifndef PERL_NO_UTF16_FILTER 11490 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); 11491 s = add_utf16_textfilter(s, TRUE); 11492 #else 11493 /* diag_listed_as: Unsupported script encoding %s */ 11494 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 11495 #endif 11496 } 11497 } 11498 return (char*)s; 11499 } 11500 11501 11502 #ifndef PERL_NO_UTF16_FILTER 11503 static I32 11504 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) 11505 { 11506 dVAR; 11507 SV *const filter = FILTER_DATA(idx); 11508 /* We re-use this each time round, throwing the contents away before we 11509 return. */ 11510 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter)); 11511 SV *const utf8_buffer = filter; 11512 IV status = IoPAGE(filter); 11513 const bool reverse = cBOOL(IoLINES(filter)); 11514 I32 retval; 11515 11516 PERL_ARGS_ASSERT_UTF16_TEXTFILTER; 11517 11518 /* As we're automatically added, at the lowest level, and hence only called 11519 from this file, we can be sure that we're not called in block mode. Hence 11520 don't bother writing code to deal with block mode. */ 11521 if (maxlen) { 11522 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen); 11523 } 11524 if (status < 0) { 11525 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status); 11526 } 11527 DEBUG_P(PerlIO_printf(Perl_debug_log, 11528 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", 11529 FPTR2DPTR(void *, S_utf16_textfilter), 11530 reverse ? 'l' : 'b', idx, maxlen, status, 11531 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 11532 11533 while (1) { 11534 STRLEN chars; 11535 STRLEN have; 11536 I32 newlen; 11537 U8 *end; 11538 /* First, look in our buffer of existing UTF-8 data: */ 11539 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer)); 11540 11541 if (nl) { 11542 ++nl; 11543 } else if (status == 0) { 11544 /* EOF */ 11545 IoPAGE(filter) = 0; 11546 nl = SvEND(utf8_buffer); 11547 } 11548 if (nl) { 11549 STRLEN got = nl - SvPVX(utf8_buffer); 11550 /* Did we have anything to append? */ 11551 retval = got != 0; 11552 sv_catpvn(sv, SvPVX(utf8_buffer), got); 11553 /* Everything else in this code works just fine if SVp_POK isn't 11554 set. This, however, needs it, and we need it to work, else 11555 we loop infinitely because the buffer is never consumed. */ 11556 sv_chop(utf8_buffer, nl); 11557 break; 11558 } 11559 11560 /* OK, not a complete line there, so need to read some more UTF-16. 11561 Read an extra octect if the buffer currently has an odd number. */ 11562 while (1) { 11563 if (status <= 0) 11564 break; 11565 if (SvCUR(utf16_buffer) >= 2) { 11566 /* Location of the high octet of the last complete code point. 11567 Gosh, UTF-16 is a pain. All the benefits of variable length, 11568 *coupled* with all the benefits of partial reads and 11569 endianness. */ 11570 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer) 11571 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2)); 11572 11573 if (*last_hi < 0xd8 || *last_hi > 0xdb) { 11574 break; 11575 } 11576 11577 /* We have the first half of a surrogate. Read more. */ 11578 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi)); 11579 } 11580 11581 status = FILTER_READ(idx + 1, utf16_buffer, 11582 160 + (SvCUR(utf16_buffer) & 1)); 11583 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer))); 11584 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);}); 11585 if (status < 0) { 11586 /* Error */ 11587 IoPAGE(filter) = status; 11588 return status; 11589 } 11590 } 11591 11592 chars = SvCUR(utf16_buffer) >> 1; 11593 have = SvCUR(utf8_buffer); 11594 SvGROW(utf8_buffer, have + chars * 3 + 1); 11595 11596 if (reverse) { 11597 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), 11598 (U8*)SvPVX_const(utf8_buffer) + have, 11599 chars * 2, &newlen); 11600 } else { 11601 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer), 11602 (U8*)SvPVX_const(utf8_buffer) + have, 11603 chars * 2, &newlen); 11604 } 11605 SvCUR_set(utf8_buffer, have + newlen); 11606 *end = '\0'; 11607 11608 /* No need to keep this SV "well-formed" with a '\0' after the end, as 11609 it's private to us, and utf16_to_utf8{,reversed} take a 11610 (pointer,length) pair, rather than a NUL-terminated string. */ 11611 if(SvCUR(utf16_buffer) & 1) { 11612 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1]; 11613 SvCUR_set(utf16_buffer, 1); 11614 } else { 11615 SvCUR_set(utf16_buffer, 0); 11616 } 11617 } 11618 DEBUG_P(PerlIO_printf(Perl_debug_log, 11619 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", 11620 status, 11621 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 11622 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);}); 11623 return retval; 11624 } 11625 11626 static U8 * 11627 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed) 11628 { 11629 SV *filter = filter_add(S_utf16_textfilter, NULL); 11630 11631 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER; 11632 11633 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s)); 11634 sv_setpvs(filter, ""); 11635 IoLINES(filter) = reversed; 11636 IoPAGE(filter) = 1; /* Not EOF */ 11637 11638 /* Sadly, we have to return a valid pointer, come what may, so we have to 11639 ignore any error return from this. */ 11640 SvCUR_set(PL_linestr, 0); 11641 if (FILTER_READ(0, PL_linestr, 0)) { 11642 SvUTF8_on(PL_linestr); 11643 } else { 11644 SvUTF8_on(PL_linestr); 11645 } 11646 PL_bufend = SvEND(PL_linestr); 11647 return (U8*)SvPVX(PL_linestr); 11648 } 11649 #endif 11650 11651 /* 11652 Returns a pointer to the next character after the parsed 11653 vstring, as well as updating the passed in sv. 11654 11655 Function must be called like 11656 11657 sv = sv_2mortal(newSV(5)); 11658 s = scan_vstring(s,e,sv); 11659 11660 where s and e are the start and end of the string. 11661 The sv should already be large enough to store the vstring 11662 passed in, for performance reasons. 11663 11664 This function may croak if fatal warnings are enabled in the 11665 calling scope, hence the sv_2mortal in the example (to prevent 11666 a leak). Make sure to do SvREFCNT_inc afterwards if you use 11667 sv_2mortal. 11668 11669 */ 11670 11671 char * 11672 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) 11673 { 11674 dVAR; 11675 const char *pos = s; 11676 const char *start = s; 11677 11678 PERL_ARGS_ASSERT_SCAN_VSTRING; 11679 11680 if (*pos == 'v') pos++; /* get past 'v' */ 11681 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 11682 pos++; 11683 if ( *pos != '.') { 11684 /* this may not be a v-string if followed by => */ 11685 const char *next = pos; 11686 while (next < e && isSPACE(*next)) 11687 ++next; 11688 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) { 11689 /* return string not v-string */ 11690 sv_setpvn(sv,(char *)s,pos-s); 11691 return (char *)pos; 11692 } 11693 } 11694 11695 if (!isALPHA(*pos)) { 11696 U8 tmpbuf[UTF8_MAXBYTES+1]; 11697 11698 if (*s == 'v') 11699 s++; /* get past 'v' */ 11700 11701 sv_setpvs(sv, ""); 11702 11703 for (;;) { 11704 /* this is atoi() that tolerates underscores */ 11705 U8 *tmpend; 11706 UV rev = 0; 11707 const char *end = pos; 11708 UV mult = 1; 11709 while (--end >= s) { 11710 if (*end != '_') { 11711 const UV orev = rev; 11712 rev += (*end - '0') * mult; 11713 mult *= 10; 11714 if (orev > rev) 11715 /* diag_listed_as: Integer overflow in %s number */ 11716 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 11717 "Integer overflow in decimal number"); 11718 } 11719 } 11720 #ifdef EBCDIC 11721 if (rev > 0x7FFFFFFF) 11722 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647"); 11723 #endif 11724 /* Append native character for the rev point */ 11725 tmpend = uvchr_to_utf8(tmpbuf, rev); 11726 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); 11727 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev))) 11728 SvUTF8_on(sv); 11729 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1])) 11730 s = ++pos; 11731 else { 11732 s = pos; 11733 break; 11734 } 11735 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 11736 pos++; 11737 } 11738 SvPOK_on(sv); 11739 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start); 11740 SvRMAGICAL_on(sv); 11741 } 11742 return (char *)s; 11743 } 11744 11745 int 11746 Perl_keyword_plugin_standard(pTHX_ 11747 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) 11748 { 11749 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD; 11750 PERL_UNUSED_CONTEXT; 11751 PERL_UNUSED_ARG(keyword_ptr); 11752 PERL_UNUSED_ARG(keyword_len); 11753 PERL_UNUSED_ARG(op_ptr); 11754 return KEYWORD_PLUGIN_DECLINE; 11755 } 11756 11757 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p) 11758 static void 11759 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof) 11760 { 11761 SAVEI32(PL_lex_brackets); 11762 if (PL_lex_brackets > 100) 11763 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 11764 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF; 11765 SAVEI32(PL_lex_allbrackets); 11766 PL_lex_allbrackets = 0; 11767 SAVEI8(PL_lex_fakeeof); 11768 PL_lex_fakeeof = (U8)fakeeof; 11769 if(yyparse(gramtype) && !PL_parser->error_count) 11770 qerror(Perl_mess(aTHX_ "Parse error")); 11771 } 11772 11773 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p) 11774 static OP * 11775 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof) 11776 { 11777 OP *o; 11778 ENTER; 11779 SAVEVPTR(PL_eval_root); 11780 PL_eval_root = NULL; 11781 parse_recdescent(gramtype, fakeeof); 11782 o = PL_eval_root; 11783 LEAVE; 11784 return o; 11785 } 11786 11787 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f) 11788 static OP * 11789 S_parse_expr(pTHX_ I32 fakeeof, U32 flags) 11790 { 11791 OP *exprop; 11792 if (flags & ~PARSE_OPTIONAL) 11793 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr"); 11794 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof); 11795 if (!exprop && !(flags & PARSE_OPTIONAL)) { 11796 if (!PL_parser->error_count) 11797 qerror(Perl_mess(aTHX_ "Parse error")); 11798 exprop = newOP(OP_NULL, 0); 11799 } 11800 return exprop; 11801 } 11802 11803 /* 11804 =for apidoc Amx|OP *|parse_arithexpr|U32 flags 11805 11806 Parse a Perl arithmetic expression. This may contain operators of precedence 11807 down to the bit shift operators. The expression must be followed (and thus 11808 terminated) either by a comparison or lower-precedence operator or by 11809 something that would normally terminate an expression such as semicolon. 11810 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional, 11811 otherwise it is mandatory. It is up to the caller to ensure that the 11812 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 11813 the source of the code to be parsed and the lexical context for the 11814 expression. 11815 11816 The op tree representing the expression is returned. If an optional 11817 expression is absent, a null pointer is returned, otherwise the pointer 11818 will be non-null. 11819 11820 If an error occurs in parsing or compilation, in most cases a valid op 11821 tree is returned anyway. The error is reflected in the parser state, 11822 normally resulting in a single exception at the top level of parsing 11823 which covers all the compilation errors that occurred. Some compilation 11824 errors, however, will throw an exception immediately. 11825 11826 =cut 11827 */ 11828 11829 OP * 11830 Perl_parse_arithexpr(pTHX_ U32 flags) 11831 { 11832 return parse_expr(LEX_FAKEEOF_COMPARE, flags); 11833 } 11834 11835 /* 11836 =for apidoc Amx|OP *|parse_termexpr|U32 flags 11837 11838 Parse a Perl term expression. This may contain operators of precedence 11839 down to the assignment operators. The expression must be followed (and thus 11840 terminated) either by a comma or lower-precedence operator or by 11841 something that would normally terminate an expression such as semicolon. 11842 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional, 11843 otherwise it is mandatory. It is up to the caller to ensure that the 11844 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 11845 the source of the code to be parsed and the lexical context for the 11846 expression. 11847 11848 The op tree representing the expression is returned. If an optional 11849 expression is absent, a null pointer is returned, otherwise the pointer 11850 will be non-null. 11851 11852 If an error occurs in parsing or compilation, in most cases a valid op 11853 tree is returned anyway. The error is reflected in the parser state, 11854 normally resulting in a single exception at the top level of parsing 11855 which covers all the compilation errors that occurred. Some compilation 11856 errors, however, will throw an exception immediately. 11857 11858 =cut 11859 */ 11860 11861 OP * 11862 Perl_parse_termexpr(pTHX_ U32 flags) 11863 { 11864 return parse_expr(LEX_FAKEEOF_COMMA, flags); 11865 } 11866 11867 /* 11868 =for apidoc Amx|OP *|parse_listexpr|U32 flags 11869 11870 Parse a Perl list expression. This may contain operators of precedence 11871 down to the comma operator. The expression must be followed (and thus 11872 terminated) either by a low-precedence logic operator such as C<or> or by 11873 something that would normally terminate an expression such as semicolon. 11874 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional, 11875 otherwise it is mandatory. It is up to the caller to ensure that the 11876 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 11877 the source of the code to be parsed and the lexical context for the 11878 expression. 11879 11880 The op tree representing the expression is returned. If an optional 11881 expression is absent, a null pointer is returned, otherwise the pointer 11882 will be non-null. 11883 11884 If an error occurs in parsing or compilation, in most cases a valid op 11885 tree is returned anyway. The error is reflected in the parser state, 11886 normally resulting in a single exception at the top level of parsing 11887 which covers all the compilation errors that occurred. Some compilation 11888 errors, however, will throw an exception immediately. 11889 11890 =cut 11891 */ 11892 11893 OP * 11894 Perl_parse_listexpr(pTHX_ U32 flags) 11895 { 11896 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags); 11897 } 11898 11899 /* 11900 =for apidoc Amx|OP *|parse_fullexpr|U32 flags 11901 11902 Parse a single complete Perl expression. This allows the full 11903 expression grammar, including the lowest-precedence operators such 11904 as C<or>. The expression must be followed (and thus terminated) by a 11905 token that an expression would normally be terminated by: end-of-file, 11906 closing bracketing punctuation, semicolon, or one of the keywords that 11907 signals a postfix expression-statement modifier. If I<flags> includes 11908 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is 11909 mandatory. It is up to the caller to ensure that the dynamic parser 11910 state (L</PL_parser> et al) is correctly set to reflect the source of 11911 the code to be parsed and the lexical context for the expression. 11912 11913 The op tree representing the expression is returned. If an optional 11914 expression is absent, a null pointer is returned, otherwise the pointer 11915 will be non-null. 11916 11917 If an error occurs in parsing or compilation, in most cases a valid op 11918 tree is returned anyway. The error is reflected in the parser state, 11919 normally resulting in a single exception at the top level of parsing 11920 which covers all the compilation errors that occurred. Some compilation 11921 errors, however, will throw an exception immediately. 11922 11923 =cut 11924 */ 11925 11926 OP * 11927 Perl_parse_fullexpr(pTHX_ U32 flags) 11928 { 11929 return parse_expr(LEX_FAKEEOF_NONEXPR, flags); 11930 } 11931 11932 /* 11933 =for apidoc Amx|OP *|parse_block|U32 flags 11934 11935 Parse a single complete Perl code block. This consists of an opening 11936 brace, a sequence of statements, and a closing brace. The block 11937 constitutes a lexical scope, so C<my> variables and various compile-time 11938 effects can be contained within it. It is up to the caller to ensure 11939 that the dynamic parser state (L</PL_parser> et al) is correctly set to 11940 reflect the source of the code to be parsed and the lexical context for 11941 the statement. 11942 11943 The op tree representing the code block is returned. This is always a 11944 real op, never a null pointer. It will normally be a C<lineseq> list, 11945 including C<nextstate> or equivalent ops. No ops to construct any kind 11946 of runtime scope are included by virtue of it being a block. 11947 11948 If an error occurs in parsing or compilation, in most cases a valid op 11949 tree (most likely null) is returned anyway. The error is reflected in 11950 the parser state, normally resulting in a single exception at the top 11951 level of parsing which covers all the compilation errors that occurred. 11952 Some compilation errors, however, will throw an exception immediately. 11953 11954 The I<flags> parameter is reserved for future use, and must always 11955 be zero. 11956 11957 =cut 11958 */ 11959 11960 OP * 11961 Perl_parse_block(pTHX_ U32 flags) 11962 { 11963 if (flags) 11964 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block"); 11965 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER); 11966 } 11967 11968 /* 11969 =for apidoc Amx|OP *|parse_barestmt|U32 flags 11970 11971 Parse a single unadorned Perl statement. This may be a normal imperative 11972 statement or a declaration that has compile-time effect. It does not 11973 include any label or other affixture. It is up to the caller to ensure 11974 that the dynamic parser state (L</PL_parser> et al) is correctly set to 11975 reflect the source of the code to be parsed and the lexical context for 11976 the statement. 11977 11978 The op tree representing the statement is returned. This may be a 11979 null pointer if the statement is null, for example if it was actually 11980 a subroutine definition (which has compile-time side effects). If not 11981 null, it will be ops directly implementing the statement, suitable to 11982 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or 11983 equivalent op (except for those embedded in a scope contained entirely 11984 within the statement). 11985 11986 If an error occurs in parsing or compilation, in most cases a valid op 11987 tree (most likely null) is returned anyway. The error is reflected in 11988 the parser state, normally resulting in a single exception at the top 11989 level of parsing which covers all the compilation errors that occurred. 11990 Some compilation errors, however, will throw an exception immediately. 11991 11992 The I<flags> parameter is reserved for future use, and must always 11993 be zero. 11994 11995 =cut 11996 */ 11997 11998 OP * 11999 Perl_parse_barestmt(pTHX_ U32 flags) 12000 { 12001 if (flags) 12002 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt"); 12003 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER); 12004 } 12005 12006 /* 12007 =for apidoc Amx|SV *|parse_label|U32 flags 12008 12009 Parse a single label, possibly optional, of the type that may prefix a 12010 Perl statement. It is up to the caller to ensure that the dynamic parser 12011 state (L</PL_parser> et al) is correctly set to reflect the source of 12012 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the 12013 label is optional, otherwise it is mandatory. 12014 12015 The name of the label is returned in the form of a fresh scalar. If an 12016 optional label is absent, a null pointer is returned. 12017 12018 If an error occurs in parsing, which can only occur if the label is 12019 mandatory, a valid label is returned anyway. The error is reflected in 12020 the parser state, normally resulting in a single exception at the top 12021 level of parsing which covers all the compilation errors that occurred. 12022 12023 =cut 12024 */ 12025 12026 SV * 12027 Perl_parse_label(pTHX_ U32 flags) 12028 { 12029 if (flags & ~PARSE_OPTIONAL) 12030 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label"); 12031 if (PL_lex_state == LEX_KNOWNEXT) { 12032 PL_parser->yychar = yylex(); 12033 if (PL_parser->yychar == LABEL) { 12034 char * const lpv = pl_yylval.pval; 12035 STRLEN llen = strlen(lpv); 12036 PL_parser->yychar = YYEMPTY; 12037 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0); 12038 } else { 12039 yyunlex(); 12040 goto no_label; 12041 } 12042 } else { 12043 char *s, *t; 12044 STRLEN wlen, bufptr_pos; 12045 lex_read_space(0); 12046 t = s = PL_bufptr; 12047 if (!isIDFIRST_lazy_if(s, UTF)) 12048 goto no_label; 12049 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen); 12050 if (word_takes_any_delimeter(s, wlen)) 12051 goto no_label; 12052 bufptr_pos = s - SvPVX(PL_linestr); 12053 PL_bufptr = t; 12054 lex_read_space(LEX_KEEP_PREVIOUS); 12055 t = PL_bufptr; 12056 s = SvPVX(PL_linestr) + bufptr_pos; 12057 if (t[0] == ':' && t[1] != ':') { 12058 PL_oldoldbufptr = PL_oldbufptr; 12059 PL_oldbufptr = s; 12060 PL_bufptr = t+1; 12061 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0); 12062 } else { 12063 PL_bufptr = s; 12064 no_label: 12065 if (flags & PARSE_OPTIONAL) { 12066 return NULL; 12067 } else { 12068 qerror(Perl_mess(aTHX_ "Parse error")); 12069 return newSVpvs("x"); 12070 } 12071 } 12072 } 12073 } 12074 12075 /* 12076 =for apidoc Amx|OP *|parse_fullstmt|U32 flags 12077 12078 Parse a single complete Perl statement. This may be a normal imperative 12079 statement or a declaration that has compile-time effect, and may include 12080 optional labels. It is up to the caller to ensure that the dynamic 12081 parser state (L</PL_parser> et al) is correctly set to reflect the source 12082 of the code to be parsed and the lexical context for the statement. 12083 12084 The op tree representing the statement is returned. This may be a 12085 null pointer if the statement is null, for example if it was actually 12086 a subroutine definition (which has compile-time side effects). If not 12087 null, it will be the result of a L</newSTATEOP> call, normally including 12088 a C<nextstate> or equivalent op. 12089 12090 If an error occurs in parsing or compilation, in most cases a valid op 12091 tree (most likely null) is returned anyway. The error is reflected in 12092 the parser state, normally resulting in a single exception at the top 12093 level of parsing which covers all the compilation errors that occurred. 12094 Some compilation errors, however, will throw an exception immediately. 12095 12096 The I<flags> parameter is reserved for future use, and must always 12097 be zero. 12098 12099 =cut 12100 */ 12101 12102 OP * 12103 Perl_parse_fullstmt(pTHX_ U32 flags) 12104 { 12105 if (flags) 12106 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt"); 12107 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER); 12108 } 12109 12110 /* 12111 =for apidoc Amx|OP *|parse_stmtseq|U32 flags 12112 12113 Parse a sequence of zero or more Perl statements. These may be normal 12114 imperative statements, including optional labels, or declarations 12115 that have compile-time effect, or any mixture thereof. The statement 12116 sequence ends when a closing brace or end-of-file is encountered in a 12117 place where a new statement could have validly started. It is up to 12118 the caller to ensure that the dynamic parser state (L</PL_parser> et al) 12119 is correctly set to reflect the source of the code to be parsed and the 12120 lexical context for the statements. 12121 12122 The op tree representing the statement sequence is returned. This may 12123 be a null pointer if the statements were all null, for example if there 12124 were no statements or if there were only subroutine definitions (which 12125 have compile-time side effects). If not null, it will be a C<lineseq> 12126 list, normally including C<nextstate> or equivalent ops. 12127 12128 If an error occurs in parsing or compilation, in most cases a valid op 12129 tree is returned anyway. The error is reflected in the parser state, 12130 normally resulting in a single exception at the top level of parsing 12131 which covers all the compilation errors that occurred. Some compilation 12132 errors, however, will throw an exception immediately. 12133 12134 The I<flags> parameter is reserved for future use, and must always 12135 be zero. 12136 12137 =cut 12138 */ 12139 12140 OP * 12141 Perl_parse_stmtseq(pTHX_ U32 flags) 12142 { 12143 OP *stmtseqop; 12144 I32 c; 12145 if (flags) 12146 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq"); 12147 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING); 12148 c = lex_peek_unichar(0); 12149 if (c != -1 && c != /*{*/'}') 12150 qerror(Perl_mess(aTHX_ "Parse error")); 12151 return stmtseqop; 12152 } 12153 12154 /* 12155 * Local variables: 12156 * c-indentation-style: bsd 12157 * c-basic-offset: 4 12158 * indent-tabs-mode: nil 12159 * End: 12160 * 12161 * ex: set ts=8 sts=4 sw=4 et: 12162 */ 12163