1 /* toke.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * 'It all comes from here, the stench and the peril.' --Frodo 13 * 14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"] 15 */ 16 17 /* 18 * This file is the lexer for Perl. It's closely linked to the 19 * parser, perly.y. 20 * 21 * The main routine is yylex(), which returns the next token. 22 */ 23 24 /* 25 =head1 Lexer interface 26 27 This is the lower layer of the Perl parser, managing characters and tokens. 28 29 =for apidoc AmU|yy_parser *|PL_parser 30 31 Pointer to a structure encapsulating the state of the parsing operation 32 currently in progress. The pointer can be locally changed to perform 33 a nested parse without interfering with the state of an outer parse. 34 Individual members of C<PL_parser> have their own documentation. 35 36 =cut 37 */ 38 39 #include "EXTERN.h" 40 #define PERL_IN_TOKE_C 41 #include "perl.h" 42 #include "dquote_static.c" 43 44 #define new_constant(a,b,c,d,e,f,g) \ 45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g) 46 47 #define pl_yylval (PL_parser->yylval) 48 49 /* XXX temporary backwards compatibility */ 50 #define PL_lex_brackets (PL_parser->lex_brackets) 51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets) 52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof) 53 #define PL_lex_brackstack (PL_parser->lex_brackstack) 54 #define PL_lex_casemods (PL_parser->lex_casemods) 55 #define PL_lex_casestack (PL_parser->lex_casestack) 56 #define PL_lex_defer (PL_parser->lex_defer) 57 #define PL_lex_dojoin (PL_parser->lex_dojoin) 58 #define PL_lex_expect (PL_parser->lex_expect) 59 #define PL_lex_formbrack (PL_parser->lex_formbrack) 60 #define PL_lex_inpat (PL_parser->lex_inpat) 61 #define PL_lex_inwhat (PL_parser->lex_inwhat) 62 #define PL_lex_op (PL_parser->lex_op) 63 #define PL_lex_repl (PL_parser->lex_repl) 64 #define PL_lex_starts (PL_parser->lex_starts) 65 #define PL_lex_stuff (PL_parser->lex_stuff) 66 #define PL_multi_start (PL_parser->multi_start) 67 #define PL_multi_open (PL_parser->multi_open) 68 #define PL_multi_close (PL_parser->multi_close) 69 #define PL_preambled (PL_parser->preambled) 70 #define PL_sublex_info (PL_parser->sublex_info) 71 #define PL_linestr (PL_parser->linestr) 72 #define PL_expect (PL_parser->expect) 73 #define PL_copline (PL_parser->copline) 74 #define PL_bufptr (PL_parser->bufptr) 75 #define PL_oldbufptr (PL_parser->oldbufptr) 76 #define PL_oldoldbufptr (PL_parser->oldoldbufptr) 77 #define PL_linestart (PL_parser->linestart) 78 #define PL_bufend (PL_parser->bufend) 79 #define PL_last_uni (PL_parser->last_uni) 80 #define PL_last_lop (PL_parser->last_lop) 81 #define PL_last_lop_op (PL_parser->last_lop_op) 82 #define PL_lex_state (PL_parser->lex_state) 83 #define PL_rsfp (PL_parser->rsfp) 84 #define PL_rsfp_filters (PL_parser->rsfp_filters) 85 #define PL_in_my (PL_parser->in_my) 86 #define PL_in_my_stash (PL_parser->in_my_stash) 87 #define PL_tokenbuf (PL_parser->tokenbuf) 88 #define PL_multi_end (PL_parser->multi_end) 89 #define PL_error_count (PL_parser->error_count) 90 91 #ifdef PERL_MAD 92 # define PL_endwhite (PL_parser->endwhite) 93 # define PL_faketokens (PL_parser->faketokens) 94 # define PL_lasttoke (PL_parser->lasttoke) 95 # define PL_nextwhite (PL_parser->nextwhite) 96 # define PL_realtokenstart (PL_parser->realtokenstart) 97 # define PL_skipwhite (PL_parser->skipwhite) 98 # define PL_thisclose (PL_parser->thisclose) 99 # define PL_thismad (PL_parser->thismad) 100 # define PL_thisopen (PL_parser->thisopen) 101 # define PL_thisstuff (PL_parser->thisstuff) 102 # define PL_thistoken (PL_parser->thistoken) 103 # define PL_thiswhite (PL_parser->thiswhite) 104 # define PL_thiswhite (PL_parser->thiswhite) 105 # define PL_nexttoke (PL_parser->nexttoke) 106 # define PL_curforce (PL_parser->curforce) 107 #else 108 # define PL_nexttoke (PL_parser->nexttoke) 109 # define PL_nexttype (PL_parser->nexttype) 110 # define PL_nextval (PL_parser->nextval) 111 #endif 112 113 static const char* const ident_too_long = "Identifier too long"; 114 115 #ifdef PERL_MAD 116 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; } 117 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val 118 #else 119 # define CURMAD(slot,sv) 120 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke] 121 #endif 122 123 #define XENUMMASK 0x3f 124 #define XFAKEEOF 0x40 125 #define XFAKEBRACK 0x80 126 127 #ifdef USE_UTF8_SCRIPTS 128 # define UTF (!IN_BYTES) 129 #else 130 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8))) 131 #endif 132 133 /* The maximum number of characters preceding the unrecognized one to display */ 134 #define UNRECOGNIZED_PRECEDE_COUNT 10 135 136 /* In variables named $^X, these are the legal values for X. 137 * 1999-02-27 mjd-perl-patch@plover.com */ 138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) 139 140 #define SPACE_OR_TAB(c) isBLANK_A(c) 141 142 /* LEX_* are values for PL_lex_state, the state of the lexer. 143 * They are arranged oddly so that the guard on the switch statement 144 * can get by with a single comparison (if the compiler is smart enough). 145 * 146 * These values refer to the various states within a sublex parse, 147 * i.e. within a double quotish string 148 */ 149 150 /* #define LEX_NOTPARSING 11 is done in perl.h. */ 151 152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */ 153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */ 154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */ 155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */ 156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */ 157 158 /* at end of code, eg "$x" followed by: */ 159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */ 160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */ 161 162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of 163 string or after \E, $foo, etc */ 164 #define LEX_INTERPCONST 2 /* NOT USED */ 165 #define LEX_FORMLINE 1 /* expecting a format line */ 166 #define LEX_KNOWNEXT 0 /* next token known; just return it */ 167 168 169 #ifdef DEBUGGING 170 static const char* const lex_state_names[] = { 171 "KNOWNEXT", 172 "FORMLINE", 173 "INTERPCONST", 174 "INTERPCONCAT", 175 "INTERPENDMAYBE", 176 "INTERPEND", 177 "INTERPSTART", 178 "INTERPPUSH", 179 "INTERPCASEMOD", 180 "INTERPNORMAL", 181 "NORMAL" 182 }; 183 #endif 184 185 #include "keywords.h" 186 187 /* CLINE is a macro that ensures PL_copline has a sane value */ 188 189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline)) 190 191 #ifdef PERL_MAD 192 # define SKIPSPACE0(s) skipspace0(s) 193 # define SKIPSPACE1(s) skipspace1(s) 194 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv) 195 # define PEEKSPACE(s) skipspace2(s,0) 196 #else 197 # define SKIPSPACE0(s) skipspace(s) 198 # define SKIPSPACE1(s) skipspace(s) 199 # define SKIPSPACE2(s,tsv) skipspace(s) 200 # define PEEKSPACE(s) skipspace(s) 201 #endif 202 203 /* 204 * Convenience functions to return different tokens and prime the 205 * lexer for the next token. They all take an argument. 206 * 207 * TOKEN : generic token (used for '(', DOLSHARP, etc) 208 * OPERATOR : generic operator 209 * AOPERATOR : assignment operator 210 * PREBLOCK : beginning the block after an if, while, foreach, ... 211 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref) 212 * PREREF : *EXPR where EXPR is not a simple identifier 213 * TERM : expression term 214 * POSTDEREF : postfix dereference (->$* ->@[...] etc.) 215 * LOOPX : loop exiting command (goto, last, dump, etc) 216 * FTST : file test operator 217 * FUN0 : zero-argument function 218 * FUN0OP : zero-argument function, with its op created in this file 219 * FUN1 : not used, except for not, which isn't a UNIOP 220 * BOop : bitwise or or xor 221 * BAop : bitwise and 222 * SHop : shift operator 223 * PWop : power operator 224 * PMop : pattern-matching operator 225 * Aop : addition-level operator 226 * Mop : multiplication-level operator 227 * Eop : equality-testing operator 228 * Rop : relational operator <= != gt 229 * 230 * Also see LOP and lop() below. 231 */ 232 233 #ifdef DEBUGGING /* Serve -DT. */ 234 # define REPORT(retval) tokereport((I32)retval, &pl_yylval) 235 #else 236 # define REPORT(retval) (retval) 237 #endif 238 239 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval)) 240 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval)) 241 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval))) 242 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval)) 243 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval)) 244 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval)) 245 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval)) 246 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1])) 247 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX)) 248 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP)) 249 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0)) 250 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP)) 251 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1)) 252 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP))) 253 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP))) 254 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP))) 255 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP))) 256 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP)) 257 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP))) 258 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP))) 259 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP)) 260 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP)) 261 262 /* This bit of chicanery makes a unary function followed by 263 * a parenthesis into a function with one argument, highest precedence. 264 * The UNIDOR macro is for unary functions that can be followed by the // 265 * operator (such as C<shift // 0>). 266 */ 267 #define UNI3(f,x,have_x) { \ 268 pl_yylval.ival = f; \ 269 if (have_x) PL_expect = x; \ 270 PL_bufptr = s; \ 271 PL_last_uni = PL_oldbufptr; \ 272 PL_last_lop_op = f; \ 273 if (*s == '(') \ 274 return REPORT( (int)FUNC1 ); \ 275 s = PEEKSPACE(s); \ 276 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \ 277 } 278 #define UNI(f) UNI3(f,XTERM,1) 279 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1) 280 #define UNIPROTO(f,optional) { \ 281 if (optional) PL_last_uni = PL_oldbufptr; \ 282 OPERATOR(f); \ 283 } 284 285 #define UNIBRACK(f) UNI3(f,0,0) 286 287 /* grandfather return to old style */ 288 #define OLDLOP(f) \ 289 do { \ 290 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \ 291 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \ 292 pl_yylval.ival = (f); \ 293 PL_expect = XTERM; \ 294 PL_bufptr = s; \ 295 return (int)LSTOP; \ 296 } while(0) 297 298 #define COPLINE_INC_WITH_HERELINES \ 299 STMT_START { \ 300 CopLINE_inc(PL_curcop); \ 301 if (PL_parser->herelines) \ 302 CopLINE(PL_curcop) += PL_parser->herelines, \ 303 PL_parser->herelines = 0; \ 304 } STMT_END 305 /* Called after scan_str to update CopLINE(PL_curcop), but only when there 306 * is no sublex_push to follow. */ 307 #define COPLINE_SET_FROM_MULTI_END \ 308 STMT_START { \ 309 CopLINE_set(PL_curcop, PL_multi_end); \ 310 if (PL_multi_end != PL_multi_start) \ 311 PL_parser->herelines = 0; \ 312 } STMT_END 313 314 315 #ifdef DEBUGGING 316 317 /* how to interpret the pl_yylval associated with the token */ 318 enum token_type { 319 TOKENTYPE_NONE, 320 TOKENTYPE_IVAL, 321 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */ 322 TOKENTYPE_PVAL, 323 TOKENTYPE_OPVAL 324 }; 325 326 static struct debug_tokens { 327 const int token; 328 enum token_type type; 329 const char *name; 330 } const debug_tokens[] = 331 { 332 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" }, 333 { ANDAND, TOKENTYPE_NONE, "ANDAND" }, 334 { ANDOP, TOKENTYPE_NONE, "ANDOP" }, 335 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" }, 336 { ARROW, TOKENTYPE_NONE, "ARROW" }, 337 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" }, 338 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" }, 339 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" }, 340 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" }, 341 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" }, 342 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" }, 343 { DO, TOKENTYPE_NONE, "DO" }, 344 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" }, 345 { DORDOR, TOKENTYPE_NONE, "DORDOR" }, 346 { DOROP, TOKENTYPE_OPNUM, "DOROP" }, 347 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" }, 348 { ELSE, TOKENTYPE_NONE, "ELSE" }, 349 { ELSIF, TOKENTYPE_IVAL, "ELSIF" }, 350 { EQOP, TOKENTYPE_OPNUM, "EQOP" }, 351 { FOR, TOKENTYPE_IVAL, "FOR" }, 352 { FORMAT, TOKENTYPE_NONE, "FORMAT" }, 353 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" }, 354 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" }, 355 { FUNC, TOKENTYPE_OPNUM, "FUNC" }, 356 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" }, 357 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" }, 358 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" }, 359 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" }, 360 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" }, 361 { GIVEN, TOKENTYPE_IVAL, "GIVEN" }, 362 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" }, 363 { IF, TOKENTYPE_IVAL, "IF" }, 364 { LABEL, TOKENTYPE_PVAL, "LABEL" }, 365 { LOCAL, TOKENTYPE_IVAL, "LOCAL" }, 366 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" }, 367 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" }, 368 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" }, 369 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" }, 370 { METHOD, TOKENTYPE_OPVAL, "METHOD" }, 371 { MULOP, TOKENTYPE_OPNUM, "MULOP" }, 372 { MY, TOKENTYPE_IVAL, "MY" }, 373 { NOAMP, TOKENTYPE_NONE, "NOAMP" }, 374 { NOTOP, TOKENTYPE_NONE, "NOTOP" }, 375 { OROP, TOKENTYPE_IVAL, "OROP" }, 376 { OROR, TOKENTYPE_NONE, "OROR" }, 377 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" }, 378 { PEG, TOKENTYPE_NONE, "PEG" }, 379 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" }, 380 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" }, 381 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" }, 382 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" }, 383 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" }, 384 { POSTINC, TOKENTYPE_NONE, "POSTINC" }, 385 { POWOP, TOKENTYPE_OPNUM, "POWOP" }, 386 { PREDEC, TOKENTYPE_NONE, "PREDEC" }, 387 { PREINC, TOKENTYPE_NONE, "PREINC" }, 388 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" }, 389 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" }, 390 { REFGEN, TOKENTYPE_NONE, "REFGEN" }, 391 { RELOP, TOKENTYPE_OPNUM, "RELOP" }, 392 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" }, 393 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" }, 394 { SUB, TOKENTYPE_NONE, "SUB" }, 395 { THING, TOKENTYPE_OPVAL, "THING" }, 396 { UMINUS, TOKENTYPE_NONE, "UMINUS" }, 397 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" }, 398 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" }, 399 { UNLESS, TOKENTYPE_IVAL, "UNLESS" }, 400 { UNTIL, TOKENTYPE_IVAL, "UNTIL" }, 401 { USE, TOKENTYPE_IVAL, "USE" }, 402 { WHEN, TOKENTYPE_IVAL, "WHEN" }, 403 { WHILE, TOKENTYPE_IVAL, "WHILE" }, 404 { WORD, TOKENTYPE_OPVAL, "WORD" }, 405 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" }, 406 { 0, TOKENTYPE_NONE, NULL } 407 }; 408 409 /* dump the returned token in rv, plus any optional arg in pl_yylval */ 410 411 STATIC int 412 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) 413 { 414 dVAR; 415 416 PERL_ARGS_ASSERT_TOKEREPORT; 417 418 if (DEBUG_T_TEST) { 419 const char *name = NULL; 420 enum token_type type = TOKENTYPE_NONE; 421 const struct debug_tokens *p; 422 SV* const report = newSVpvs("<== "); 423 424 for (p = debug_tokens; p->token; p++) { 425 if (p->token == (int)rv) { 426 name = p->name; 427 type = p->type; 428 break; 429 } 430 } 431 if (name) 432 Perl_sv_catpv(aTHX_ report, name); 433 else if ((char)rv > ' ' && (char)rv <= '~') 434 { 435 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); 436 if ((char)rv == 'p') 437 sv_catpvs(report, " (pending identifier)"); 438 } 439 else if (!rv) 440 sv_catpvs(report, "EOF"); 441 else 442 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv); 443 switch (type) { 444 case TOKENTYPE_NONE: 445 break; 446 case TOKENTYPE_IVAL: 447 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival); 448 break; 449 case TOKENTYPE_OPNUM: 450 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)", 451 PL_op_name[lvalp->ival]); 452 break; 453 case TOKENTYPE_PVAL: 454 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval); 455 break; 456 case TOKENTYPE_OPVAL: 457 if (lvalp->opval) { 458 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)", 459 PL_op_name[lvalp->opval->op_type]); 460 if (lvalp->opval->op_type == OP_CONST) { 461 Perl_sv_catpvf(aTHX_ report, " %s", 462 SvPEEK(cSVOPx_sv(lvalp->opval))); 463 } 464 465 } 466 else 467 sv_catpvs(report, "(opval=null)"); 468 break; 469 } 470 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report)); 471 }; 472 return (int)rv; 473 } 474 475 476 /* print the buffer with suitable escapes */ 477 478 STATIC void 479 S_printbuf(pTHX_ const char *const fmt, const char *const s) 480 { 481 SV* const tmp = newSVpvs(""); 482 483 PERL_ARGS_ASSERT_PRINTBUF; 484 485 GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */ 486 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60)); 487 GCC_DIAG_RESTORE; 488 SvREFCNT_dec(tmp); 489 } 490 491 #endif 492 493 static int 494 S_deprecate_commaless_var_list(pTHX) { 495 PL_expect = XTERM; 496 deprecate("comma-less variable list"); 497 return REPORT(','); /* grandfather non-comma-format format */ 498 } 499 500 /* 501 * S_ao 502 * 503 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR 504 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN 505 */ 506 507 STATIC int 508 S_ao(pTHX_ int toketype) 509 { 510 dVAR; 511 if (*PL_bufptr == '=') { 512 PL_bufptr++; 513 if (toketype == ANDAND) 514 pl_yylval.ival = OP_ANDASSIGN; 515 else if (toketype == OROR) 516 pl_yylval.ival = OP_ORASSIGN; 517 else if (toketype == DORDOR) 518 pl_yylval.ival = OP_DORASSIGN; 519 toketype = ASSIGNOP; 520 } 521 return toketype; 522 } 523 524 /* 525 * S_no_op 526 * When Perl expects an operator and finds something else, no_op 527 * prints the warning. It always prints "<something> found where 528 * operator expected. It prints "Missing semicolon on previous line?" 529 * if the surprise occurs at the start of the line. "do you need to 530 * predeclare ..." is printed out for code like "sub bar; foo bar $x" 531 * where the compiler doesn't know if foo is a method call or a function. 532 * It prints "Missing operator before end of line" if there's nothing 533 * after the missing operator, or "... before <...>" if there is something 534 * after the missing operator. 535 */ 536 537 STATIC void 538 S_no_op(pTHX_ const char *const what, char *s) 539 { 540 dVAR; 541 char * const oldbp = PL_bufptr; 542 const bool is_first = (PL_oldbufptr == PL_linestart); 543 544 PERL_ARGS_ASSERT_NO_OP; 545 546 if (!s) 547 s = oldbp; 548 else 549 PL_bufptr = s; 550 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0); 551 if (ckWARN_d(WARN_SYNTAX)) { 552 if (is_first) 553 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 554 "\t(Missing semicolon on previous line?)\n"); 555 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) { 556 const char *t; 557 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':'); 558 t += UTF ? UTF8SKIP(t) : 1) 559 NOOP; 560 if (t < PL_bufptr && isSPACE(*t)) 561 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 562 "\t(Do you need to predeclare %"UTF8f"?)\n", 563 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr)); 564 } 565 else { 566 assert(s >= oldbp); 567 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 568 "\t(Missing operator before %"UTF8f"?)\n", 569 UTF8fARG(UTF, s - oldbp, oldbp)); 570 } 571 } 572 PL_bufptr = oldbp; 573 } 574 575 /* 576 * S_missingterm 577 * Complain about missing quote/regexp/heredoc terminator. 578 * If it's called with NULL then it cauterizes the line buffer. 579 * If we're in a delimited string and the delimiter is a control 580 * character, it's reformatted into a two-char sequence like ^C. 581 * This is fatal. 582 */ 583 584 STATIC void 585 S_missingterm(pTHX_ char *s) 586 { 587 dVAR; 588 char tmpbuf[3]; 589 char q; 590 if (s) { 591 char * const nl = strrchr(s,'\n'); 592 if (nl) 593 *nl = '\0'; 594 } 595 else if ((U8) PL_multi_close < 32) { 596 *tmpbuf = '^'; 597 tmpbuf[1] = (char)toCTRL(PL_multi_close); 598 tmpbuf[2] = '\0'; 599 s = tmpbuf; 600 } 601 else { 602 *tmpbuf = (char)PL_multi_close; 603 tmpbuf[1] = '\0'; 604 s = tmpbuf; 605 } 606 q = strchr(s,'"') ? '\'' : '"'; 607 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q); 608 } 609 610 #include "feature.h" 611 612 /* 613 * Check whether the named feature is enabled. 614 */ 615 bool 616 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen) 617 { 618 dVAR; 619 char he_name[8 + MAX_FEATURE_LEN] = "feature_"; 620 621 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED; 622 623 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM); 624 625 if (namelen > MAX_FEATURE_LEN) 626 return FALSE; 627 memcpy(&he_name[8], name, namelen); 628 629 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0, 630 REFCOUNTED_HE_EXISTS)); 631 } 632 633 /* 634 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and 635 * utf16-to-utf8-reversed. 636 */ 637 638 #ifdef PERL_CR_FILTER 639 static void 640 strip_return(SV *sv) 641 { 642 const char *s = SvPVX_const(sv); 643 const char * const e = s + SvCUR(sv); 644 645 PERL_ARGS_ASSERT_STRIP_RETURN; 646 647 /* outer loop optimized to do nothing if there are no CR-LFs */ 648 while (s < e) { 649 if (*s++ == '\r' && *s == '\n') { 650 /* hit a CR-LF, need to copy the rest */ 651 char *d = s - 1; 652 *d++ = *s++; 653 while (s < e) { 654 if (*s == '\r' && s[1] == '\n') 655 s++; 656 *d++ = *s++; 657 } 658 SvCUR(sv) -= s - d; 659 return; 660 } 661 } 662 } 663 664 STATIC I32 665 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) 666 { 667 const I32 count = FILTER_READ(idx+1, sv, maxlen); 668 if (count > 0 && !maxlen) 669 strip_return(sv); 670 return count; 671 } 672 #endif 673 674 /* 675 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags 676 677 Creates and initialises a new lexer/parser state object, supplying 678 a context in which to lex and parse from a new source of Perl code. 679 A pointer to the new state object is placed in L</PL_parser>. An entry 680 is made on the save stack so that upon unwinding the new state object 681 will be destroyed and the former value of L</PL_parser> will be restored. 682 Nothing else need be done to clean up the parsing context. 683 684 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if 685 non-null, provides a string (in SV form) containing code to be parsed. 686 A copy of the string is made, so subsequent modification of I<line> 687 does not affect parsing. I<rsfp>, if non-null, provides an input stream 688 from which code will be read to be parsed. If both are non-null, the 689 code in I<line> comes first and must consist of complete lines of input, 690 and I<rsfp> supplies the remainder of the source. 691 692 The I<flags> parameter is reserved for future use. Currently it is only 693 used by perl internally, so extensions should always pass zero. 694 695 =cut 696 */ 697 698 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it 699 can share filters with the current parser. 700 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the 701 caller, hence isn't owned by the parser, so shouldn't be closed on parser 702 destruction. This is used to handle the case of defaulting to reading the 703 script from the standard input because no filename was given on the command 704 line (without getting confused by situation where STDIN has been closed, so 705 the script handle is opened on fd 0) */ 706 707 void 708 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) 709 { 710 dVAR; 711 const char *s = NULL; 712 yy_parser *parser, *oparser; 713 if (flags && flags & ~LEX_START_FLAGS) 714 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start"); 715 716 /* create and initialise a parser */ 717 718 Newxz(parser, 1, yy_parser); 719 parser->old_parser = oparser = PL_parser; 720 PL_parser = parser; 721 722 parser->stack = NULL; 723 parser->ps = NULL; 724 parser->stack_size = 0; 725 726 /* on scope exit, free this parser and restore any outer one */ 727 SAVEPARSER(parser); 728 parser->saved_curcop = PL_curcop; 729 730 /* initialise lexer state */ 731 732 #ifdef PERL_MAD 733 parser->curforce = -1; 734 #else 735 parser->nexttoke = 0; 736 #endif 737 parser->error_count = oparser ? oparser->error_count : 0; 738 parser->copline = parser->preambling = NOLINE; 739 parser->lex_state = LEX_NORMAL; 740 parser->expect = XSTATE; 741 parser->rsfp = rsfp; 742 parser->rsfp_filters = 743 !(flags & LEX_START_SAME_FILTER) || !oparser 744 ? NULL 745 : MUTABLE_AV(SvREFCNT_inc( 746 oparser->rsfp_filters 747 ? oparser->rsfp_filters 748 : (oparser->rsfp_filters = newAV()) 749 )); 750 751 Newx(parser->lex_brackstack, 120, char); 752 Newx(parser->lex_casestack, 12, char); 753 *parser->lex_casestack = '\0'; 754 Newxz(parser->lex_shared, 1, LEXSHARED); 755 756 if (line) { 757 STRLEN len; 758 s = SvPV_const(line, len); 759 parser->linestr = flags & LEX_START_COPIED 760 ? SvREFCNT_inc_simple_NN(line) 761 : newSVpvn_flags(s, len, SvUTF8(line)); 762 sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2); 763 } else { 764 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2); 765 } 766 parser->oldoldbufptr = 767 parser->oldbufptr = 768 parser->bufptr = 769 parser->linestart = SvPVX(parser->linestr); 770 parser->bufend = parser->bufptr + SvCUR(parser->linestr); 771 parser->last_lop = parser->last_uni = NULL; 772 773 assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES 774 |LEX_DONT_CLOSE_RSFP)); 775 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES 776 |LEX_DONT_CLOSE_RSFP)); 777 778 parser->in_pod = parser->filtered = 0; 779 } 780 781 782 /* delete a parser object */ 783 784 void 785 Perl_parser_free(pTHX_ const yy_parser *parser) 786 { 787 PERL_ARGS_ASSERT_PARSER_FREE; 788 789 PL_curcop = parser->saved_curcop; 790 SvREFCNT_dec(parser->linestr); 791 792 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 793 PerlIO_clearerr(parser->rsfp); 794 else if (parser->rsfp && (!parser->old_parser || 795 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp))) 796 PerlIO_close(parser->rsfp); 797 SvREFCNT_dec(parser->rsfp_filters); 798 SvREFCNT_dec(parser->lex_stuff); 799 SvREFCNT_dec(parser->sublex_info.repl); 800 801 Safefree(parser->lex_brackstack); 802 Safefree(parser->lex_casestack); 803 Safefree(parser->lex_shared); 804 PL_parser = parser->old_parser; 805 Safefree(parser); 806 } 807 808 void 809 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab) 810 { 811 #ifdef PERL_MAD 812 I32 nexttoke = parser->lasttoke; 813 #else 814 I32 nexttoke = parser->nexttoke; 815 #endif 816 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS; 817 while (nexttoke--) { 818 #ifdef PERL_MAD 819 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type 820 & 0xffff) 821 && parser->nexttoke[nexttoke].next_val.opval 822 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed 823 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) { 824 op_free(parser->nexttoke[nexttoke].next_val.opval); 825 parser->nexttoke[nexttoke].next_val.opval = NULL; 826 } 827 #else 828 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff) 829 && parser->nextval[nexttoke].opval 830 && parser->nextval[nexttoke].opval->op_slabbed 831 && OpSLAB(parser->nextval[nexttoke].opval) == slab) { 832 op_free(parser->nextval[nexttoke].opval); 833 parser->nextval[nexttoke].opval = NULL; 834 } 835 #endif 836 } 837 } 838 839 840 /* 841 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr 842 843 Buffer scalar containing the chunk currently under consideration of the 844 text currently being lexed. This is always a plain string scalar (for 845 which C<SvPOK> is true). It is not intended to be used as a scalar by 846 normal scalar means; instead refer to the buffer directly by the pointer 847 variables described below. 848 849 The lexer maintains various C<char*> pointers to things in the 850 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever 851 reallocated, all of these pointers must be updated. Don't attempt to 852 do this manually, but rather use L</lex_grow_linestr> if you need to 853 reallocate the buffer. 854 855 The content of the text chunk in the buffer is commonly exactly one 856 complete line of input, up to and including a newline terminator, 857 but there are situations where it is otherwise. The octets of the 858 buffer may be intended to be interpreted as either UTF-8 or Latin-1. 859 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8> 860 flag on this scalar, which may disagree with it. 861 862 For direct examination of the buffer, the variable 863 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current 864 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use 865 of these pointers is usually preferable to examination of the scalar 866 through normal scalar means. 867 868 =for apidoc AmxU|char *|PL_parser-E<gt>bufend 869 870 Direct pointer to the end of the chunk of text currently being lexed, the 871 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr) 872 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is 873 always located at the end of the buffer, and does not count as part of 874 the buffer's contents. 875 876 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr 877 878 Points to the current position of lexing inside the lexer buffer. 879 Characters around this point may be freely examined, within 880 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and 881 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be 882 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>. 883 884 Lexing code (whether in the Perl core or not) moves this pointer past 885 the characters that it consumes. It is also expected to perform some 886 bookkeeping whenever a newline character is consumed. This movement 887 can be more conveniently performed by the function L</lex_read_to>, 888 which handles newlines appropriately. 889 890 Interpretation of the buffer's octets can be abstracted out by 891 using the slightly higher-level functions L</lex_peek_unichar> and 892 L</lex_read_unichar>. 893 894 =for apidoc AmxU|char *|PL_parser-E<gt>linestart 895 896 Points to the start of the current line inside the lexer buffer. 897 This is useful for indicating at which column an error occurred, and 898 not much else. This must be updated by any lexing code that consumes 899 a newline; the function L</lex_read_to> handles this detail. 900 901 =cut 902 */ 903 904 /* 905 =for apidoc Amx|bool|lex_bufutf8 906 907 Indicates whether the octets in the lexer buffer 908 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding 909 of Unicode characters. If not, they should be interpreted as Latin-1 910 characters. This is analogous to the C<SvUTF8> flag for scalars. 911 912 In UTF-8 mode, it is not guaranteed that the lexer buffer actually 913 contains valid UTF-8. Lexing code must be robust in the face of invalid 914 encoding. 915 916 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar 917 is significant, but not the whole story regarding the input character 918 encoding. Normally, when a file is being read, the scalar contains octets 919 and its C<SvUTF8> flag is off, but the octets should be interpreted as 920 UTF-8 if the C<use utf8> pragma is in effect. During a string eval, 921 however, the scalar may have the C<SvUTF8> flag on, and in this case its 922 octets should be interpreted as UTF-8 unless the C<use bytes> pragma 923 is in effect. This logic may change in the future; use this function 924 instead of implementing the logic yourself. 925 926 =cut 927 */ 928 929 bool 930 Perl_lex_bufutf8(pTHX) 931 { 932 return UTF; 933 } 934 935 /* 936 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len 937 938 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate 939 at least I<len> octets (including terminating C<NUL>). Returns a 940 pointer to the reallocated buffer. This is necessary before making 941 any direct modification of the buffer that would increase its length. 942 L</lex_stuff_pvn> provides a more convenient way to insert text into 943 the buffer. 944 945 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>; 946 this function updates all of the lexer's variables that point directly 947 into the buffer. 948 949 =cut 950 */ 951 952 char * 953 Perl_lex_grow_linestr(pTHX_ STRLEN len) 954 { 955 SV *linestr; 956 char *buf; 957 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 958 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos; 959 linestr = PL_parser->linestr; 960 buf = SvPVX(linestr); 961 if (len <= SvLEN(linestr)) 962 return buf; 963 bufend_pos = PL_parser->bufend - buf; 964 bufptr_pos = PL_parser->bufptr - buf; 965 oldbufptr_pos = PL_parser->oldbufptr - buf; 966 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 967 linestart_pos = PL_parser->linestart - buf; 968 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 969 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 970 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ? 971 PL_parser->lex_shared->re_eval_start - buf : 0; 972 973 buf = sv_grow(linestr, len); 974 975 PL_parser->bufend = buf + bufend_pos; 976 PL_parser->bufptr = buf + bufptr_pos; 977 PL_parser->oldbufptr = buf + oldbufptr_pos; 978 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 979 PL_parser->linestart = buf + linestart_pos; 980 if (PL_parser->last_uni) 981 PL_parser->last_uni = buf + last_uni_pos; 982 if (PL_parser->last_lop) 983 PL_parser->last_lop = buf + last_lop_pos; 984 if (PL_parser->lex_shared->re_eval_start) 985 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos; 986 return buf; 987 } 988 989 /* 990 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags 991 992 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 993 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 994 reallocating the buffer if necessary. This means that lexing code that 995 runs later will see the characters as if they had appeared in the input. 996 It is not recommended to do this as part of normal parsing, and most 997 uses of this facility run the risk of the inserted characters being 998 interpreted in an unintended manner. 999 1000 The string to be inserted is represented by I<len> octets starting 1001 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1, 1002 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>. 1003 The characters are recoded for the lexer buffer, according to how the 1004 buffer is currently being interpreted (L</lex_bufutf8>). If a string 1005 to be inserted is available as a Perl scalar, the L</lex_stuff_sv> 1006 function is more convenient. 1007 1008 =cut 1009 */ 1010 1011 void 1012 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) 1013 { 1014 dVAR; 1015 char *bufptr; 1016 PERL_ARGS_ASSERT_LEX_STUFF_PVN; 1017 if (flags & ~(LEX_STUFF_UTF8)) 1018 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn"); 1019 if (UTF) { 1020 if (flags & LEX_STUFF_UTF8) { 1021 goto plain_copy; 1022 } else { 1023 STRLEN highhalf = 0; /* Count of variants */ 1024 const char *p, *e = pv+len; 1025 for (p = pv; p != e; p++) { 1026 if (! UTF8_IS_INVARIANT(*p)) { 1027 highhalf++; 1028 } 1029 } 1030 if (!highhalf) 1031 goto plain_copy; 1032 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf); 1033 bufptr = PL_parser->bufptr; 1034 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char); 1035 SvCUR_set(PL_parser->linestr, 1036 SvCUR(PL_parser->linestr) + len+highhalf); 1037 PL_parser->bufend += len+highhalf; 1038 for (p = pv; p != e; p++) { 1039 U8 c = (U8)*p; 1040 if (! UTF8_IS_INVARIANT(c)) { 1041 *bufptr++ = UTF8_TWO_BYTE_HI(c); 1042 *bufptr++ = UTF8_TWO_BYTE_LO(c); 1043 } else { 1044 *bufptr++ = (char)c; 1045 } 1046 } 1047 } 1048 } else { 1049 if (flags & LEX_STUFF_UTF8) { 1050 STRLEN highhalf = 0; 1051 const char *p, *e = pv+len; 1052 for (p = pv; p != e; p++) { 1053 U8 c = (U8)*p; 1054 if (UTF8_IS_ABOVE_LATIN1(c)) { 1055 Perl_croak(aTHX_ "Lexing code attempted to stuff " 1056 "non-Latin-1 character into Latin-1 input"); 1057 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) { 1058 p++; 1059 highhalf++; 1060 } else if (! UTF8_IS_INVARIANT(c)) { 1061 /* malformed UTF-8 */ 1062 ENTER; 1063 SAVESPTR(PL_warnhook); 1064 PL_warnhook = PERL_WARNHOOK_FATAL; 1065 utf8n_to_uvchr((U8*)p, e-p, NULL, 0); 1066 LEAVE; 1067 } 1068 } 1069 if (!highhalf) 1070 goto plain_copy; 1071 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf); 1072 bufptr = PL_parser->bufptr; 1073 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char); 1074 SvCUR_set(PL_parser->linestr, 1075 SvCUR(PL_parser->linestr) + len-highhalf); 1076 PL_parser->bufend += len-highhalf; 1077 p = pv; 1078 while (p < e) { 1079 if (UTF8_IS_INVARIANT(*p)) { 1080 *bufptr++ = *p; 1081 p++; 1082 } 1083 else { 1084 assert(p < e -1 ); 1085 *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)); 1086 p += 2; 1087 } 1088 } 1089 } else { 1090 plain_copy: 1091 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len); 1092 bufptr = PL_parser->bufptr; 1093 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char); 1094 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len); 1095 PL_parser->bufend += len; 1096 Copy(pv, bufptr, len, char); 1097 } 1098 } 1099 } 1100 1101 /* 1102 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags 1103 1104 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1105 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1106 reallocating the buffer if necessary. This means that lexing code that 1107 runs later will see the characters as if they had appeared in the input. 1108 It is not recommended to do this as part of normal parsing, and most 1109 uses of this facility run the risk of the inserted characters being 1110 interpreted in an unintended manner. 1111 1112 The string to be inserted is represented by octets starting at I<pv> 1113 and continuing to the first nul. These octets are interpreted as either 1114 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set 1115 in I<flags>. The characters are recoded for the lexer buffer, according 1116 to how the buffer is currently being interpreted (L</lex_bufutf8>). 1117 If it is not convenient to nul-terminate a string to be inserted, the 1118 L</lex_stuff_pvn> function is more appropriate. 1119 1120 =cut 1121 */ 1122 1123 void 1124 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags) 1125 { 1126 PERL_ARGS_ASSERT_LEX_STUFF_PV; 1127 lex_stuff_pvn(pv, strlen(pv), flags); 1128 } 1129 1130 /* 1131 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags 1132 1133 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1134 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1135 reallocating the buffer if necessary. This means that lexing code that 1136 runs later will see the characters as if they had appeared in the input. 1137 It is not recommended to do this as part of normal parsing, and most 1138 uses of this facility run the risk of the inserted characters being 1139 interpreted in an unintended manner. 1140 1141 The string to be inserted is the string value of I<sv>. The characters 1142 are recoded for the lexer buffer, according to how the buffer is currently 1143 being interpreted (L</lex_bufutf8>). If a string to be inserted is 1144 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the 1145 need to construct a scalar. 1146 1147 =cut 1148 */ 1149 1150 void 1151 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags) 1152 { 1153 char *pv; 1154 STRLEN len; 1155 PERL_ARGS_ASSERT_LEX_STUFF_SV; 1156 if (flags) 1157 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv"); 1158 pv = SvPV(sv, len); 1159 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0)); 1160 } 1161 1162 /* 1163 =for apidoc Amx|void|lex_unstuff|char *ptr 1164 1165 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to 1166 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened. 1167 This hides the discarded text from any lexing code that runs later, 1168 as if the text had never appeared. 1169 1170 This is not the normal way to consume lexed text. For that, use 1171 L</lex_read_to>. 1172 1173 =cut 1174 */ 1175 1176 void 1177 Perl_lex_unstuff(pTHX_ char *ptr) 1178 { 1179 char *buf, *bufend; 1180 STRLEN unstuff_len; 1181 PERL_ARGS_ASSERT_LEX_UNSTUFF; 1182 buf = PL_parser->bufptr; 1183 if (ptr < buf) 1184 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1185 if (ptr == buf) 1186 return; 1187 bufend = PL_parser->bufend; 1188 if (ptr > bufend) 1189 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1190 unstuff_len = ptr - buf; 1191 Move(ptr, buf, bufend+1-ptr, char); 1192 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len); 1193 PL_parser->bufend = bufend - unstuff_len; 1194 } 1195 1196 /* 1197 =for apidoc Amx|void|lex_read_to|char *ptr 1198 1199 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up 1200 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>, 1201 performing the correct bookkeeping whenever a newline character is passed. 1202 This is the normal way to consume lexed text. 1203 1204 Interpretation of the buffer's octets can be abstracted out by 1205 using the slightly higher-level functions L</lex_peek_unichar> and 1206 L</lex_read_unichar>. 1207 1208 =cut 1209 */ 1210 1211 void 1212 Perl_lex_read_to(pTHX_ char *ptr) 1213 { 1214 char *s; 1215 PERL_ARGS_ASSERT_LEX_READ_TO; 1216 s = PL_parser->bufptr; 1217 if (ptr < s || ptr > PL_parser->bufend) 1218 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to"); 1219 for (; s != ptr; s++) 1220 if (*s == '\n') { 1221 COPLINE_INC_WITH_HERELINES; 1222 PL_parser->linestart = s+1; 1223 } 1224 PL_parser->bufptr = ptr; 1225 } 1226 1227 /* 1228 =for apidoc Amx|void|lex_discard_to|char *ptr 1229 1230 Discards the first part of the L</PL_parser-E<gt>linestr> buffer, 1231 up to I<ptr>. The remaining content of the buffer will be moved, and 1232 all pointers into the buffer updated appropriately. I<ptr> must not 1233 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>: 1234 it is not permitted to discard text that has yet to be lexed. 1235 1236 Normally it is not necessarily to do this directly, because it suffices to 1237 use the implicit discarding behaviour of L</lex_next_chunk> and things 1238 based on it. However, if a token stretches across multiple lines, 1239 and the lexing code has kept multiple lines of text in the buffer for 1240 that purpose, then after completion of the token it would be wise to 1241 explicitly discard the now-unneeded earlier lines, to avoid future 1242 multi-line tokens growing the buffer without bound. 1243 1244 =cut 1245 */ 1246 1247 void 1248 Perl_lex_discard_to(pTHX_ char *ptr) 1249 { 1250 char *buf; 1251 STRLEN discard_len; 1252 PERL_ARGS_ASSERT_LEX_DISCARD_TO; 1253 buf = SvPVX(PL_parser->linestr); 1254 if (ptr < buf) 1255 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1256 if (ptr == buf) 1257 return; 1258 if (ptr > PL_parser->bufptr) 1259 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1260 discard_len = ptr - buf; 1261 if (PL_parser->oldbufptr < ptr) 1262 PL_parser->oldbufptr = ptr; 1263 if (PL_parser->oldoldbufptr < ptr) 1264 PL_parser->oldoldbufptr = ptr; 1265 if (PL_parser->last_uni && PL_parser->last_uni < ptr) 1266 PL_parser->last_uni = NULL; 1267 if (PL_parser->last_lop && PL_parser->last_lop < ptr) 1268 PL_parser->last_lop = NULL; 1269 Move(ptr, buf, PL_parser->bufend+1-ptr, char); 1270 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len); 1271 PL_parser->bufend -= discard_len; 1272 PL_parser->bufptr -= discard_len; 1273 PL_parser->oldbufptr -= discard_len; 1274 PL_parser->oldoldbufptr -= discard_len; 1275 if (PL_parser->last_uni) 1276 PL_parser->last_uni -= discard_len; 1277 if (PL_parser->last_lop) 1278 PL_parser->last_lop -= discard_len; 1279 } 1280 1281 /* 1282 =for apidoc Amx|bool|lex_next_chunk|U32 flags 1283 1284 Reads in the next chunk of text to be lexed, appending it to 1285 L</PL_parser-E<gt>linestr>. This should be called when lexing code has 1286 looked to the end of the current chunk and wants to know more. It is 1287 usual, but not necessary, for lexing to have consumed the entirety of 1288 the current chunk at this time. 1289 1290 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current 1291 chunk (i.e., the current chunk has been entirely consumed), normally the 1292 current chunk will be discarded at the same time that the new chunk is 1293 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk 1294 will not be discarded. If the current chunk has not been entirely 1295 consumed, then it will not be discarded regardless of the flag. 1296 1297 Returns true if some new text was added to the buffer, or false if the 1298 buffer has reached the end of the input text. 1299 1300 =cut 1301 */ 1302 1303 #define LEX_FAKE_EOF 0x80000000 1304 #define LEX_NO_TERM 0x40000000 /* here-doc */ 1305 1306 bool 1307 Perl_lex_next_chunk(pTHX_ U32 flags) 1308 { 1309 SV *linestr; 1310 char *buf; 1311 STRLEN old_bufend_pos, new_bufend_pos; 1312 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 1313 STRLEN linestart_pos, last_uni_pos, last_lop_pos; 1314 bool got_some_for_debugger = 0; 1315 bool got_some; 1316 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM)) 1317 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk"); 1318 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat) 1319 return FALSE; 1320 linestr = PL_parser->linestr; 1321 buf = SvPVX(linestr); 1322 if (!(flags & LEX_KEEP_PREVIOUS) && 1323 PL_parser->bufptr == PL_parser->bufend) { 1324 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0; 1325 linestart_pos = 0; 1326 if (PL_parser->last_uni != PL_parser->bufend) 1327 PL_parser->last_uni = NULL; 1328 if (PL_parser->last_lop != PL_parser->bufend) 1329 PL_parser->last_lop = NULL; 1330 last_uni_pos = last_lop_pos = 0; 1331 *buf = 0; 1332 SvCUR(linestr) = 0; 1333 } else { 1334 old_bufend_pos = PL_parser->bufend - buf; 1335 bufptr_pos = PL_parser->bufptr - buf; 1336 oldbufptr_pos = PL_parser->oldbufptr - buf; 1337 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 1338 linestart_pos = PL_parser->linestart - buf; 1339 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 1340 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 1341 } 1342 if (flags & LEX_FAKE_EOF) { 1343 goto eof; 1344 } else if (!PL_parser->rsfp && !PL_parser->filtered) { 1345 got_some = 0; 1346 } else if (filter_gets(linestr, old_bufend_pos)) { 1347 got_some = 1; 1348 got_some_for_debugger = 1; 1349 } else if (flags & LEX_NO_TERM) { 1350 got_some = 0; 1351 } else { 1352 if (!SvPOK(linestr)) /* can get undefined by filter_gets */ 1353 sv_setpvs(linestr, ""); 1354 eof: 1355 /* End of real input. Close filehandle (unless it was STDIN), 1356 * then add implicit termination. 1357 */ 1358 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 1359 PerlIO_clearerr(PL_parser->rsfp); 1360 else if (PL_parser->rsfp) 1361 (void)PerlIO_close(PL_parser->rsfp); 1362 PL_parser->rsfp = NULL; 1363 PL_parser->in_pod = PL_parser->filtered = 0; 1364 #ifdef PERL_MAD 1365 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n)) 1366 PL_faketokens = 1; 1367 #endif 1368 if (!PL_in_eval && PL_minus_p) { 1369 sv_catpvs(linestr, 1370 /*{*/";}continue{print or die qq(-p destination: $!\\n);}"); 1371 PL_minus_n = PL_minus_p = 0; 1372 } else if (!PL_in_eval && PL_minus_n) { 1373 sv_catpvs(linestr, /*{*/";}"); 1374 PL_minus_n = 0; 1375 } else 1376 sv_catpvs(linestr, ";"); 1377 got_some = 1; 1378 } 1379 buf = SvPVX(linestr); 1380 new_bufend_pos = SvCUR(linestr); 1381 PL_parser->bufend = buf + new_bufend_pos; 1382 PL_parser->bufptr = buf + bufptr_pos; 1383 PL_parser->oldbufptr = buf + oldbufptr_pos; 1384 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 1385 PL_parser->linestart = buf + linestart_pos; 1386 if (PL_parser->last_uni) 1387 PL_parser->last_uni = buf + last_uni_pos; 1388 if (PL_parser->last_lop) 1389 PL_parser->last_lop = buf + last_lop_pos; 1390 if (PL_parser->preambling != NOLINE) { 1391 CopLINE_set(PL_curcop, PL_parser->preambling + 1); 1392 PL_parser->preambling = NOLINE; 1393 } 1394 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) && 1395 PL_curstash != PL_debstash) { 1396 /* debugger active and we're not compiling the debugger code, 1397 * so store the line into the debugger's array of lines 1398 */ 1399 update_debugger_info(NULL, buf+old_bufend_pos, 1400 new_bufend_pos-old_bufend_pos); 1401 } 1402 return got_some; 1403 } 1404 1405 /* 1406 =for apidoc Amx|I32|lex_peek_unichar|U32 flags 1407 1408 Looks ahead one (Unicode) character in the text currently being lexed. 1409 Returns the codepoint (unsigned integer value) of the next character, 1410 or -1 if lexing has reached the end of the input text. To consume the 1411 peeked character, use L</lex_read_unichar>. 1412 1413 If the next character is in (or extends into) the next chunk of input 1414 text, the next chunk will be read in. Normally the current chunk will be 1415 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> 1416 then the current chunk will not be discarded. 1417 1418 If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1419 is encountered, an exception is generated. 1420 1421 =cut 1422 */ 1423 1424 I32 1425 Perl_lex_peek_unichar(pTHX_ U32 flags) 1426 { 1427 dVAR; 1428 char *s, *bufend; 1429 if (flags & ~(LEX_KEEP_PREVIOUS)) 1430 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar"); 1431 s = PL_parser->bufptr; 1432 bufend = PL_parser->bufend; 1433 if (UTF) { 1434 U8 head; 1435 I32 unichar; 1436 STRLEN len, retlen; 1437 if (s == bufend) { 1438 if (!lex_next_chunk(flags)) 1439 return -1; 1440 s = PL_parser->bufptr; 1441 bufend = PL_parser->bufend; 1442 } 1443 head = (U8)*s; 1444 if (UTF8_IS_INVARIANT(head)) 1445 return head; 1446 if (UTF8_IS_START(head)) { 1447 len = UTF8SKIP(&head); 1448 while ((STRLEN)(bufend-s) < len) { 1449 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS)) 1450 break; 1451 s = PL_parser->bufptr; 1452 bufend = PL_parser->bufend; 1453 } 1454 } 1455 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY); 1456 if (retlen == (STRLEN)-1) { 1457 /* malformed UTF-8 */ 1458 ENTER; 1459 SAVESPTR(PL_warnhook); 1460 PL_warnhook = PERL_WARNHOOK_FATAL; 1461 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0); 1462 LEAVE; 1463 } 1464 return unichar; 1465 } else { 1466 if (s == bufend) { 1467 if (!lex_next_chunk(flags)) 1468 return -1; 1469 s = PL_parser->bufptr; 1470 } 1471 return (U8)*s; 1472 } 1473 } 1474 1475 /* 1476 =for apidoc Amx|I32|lex_read_unichar|U32 flags 1477 1478 Reads the next (Unicode) character in the text currently being lexed. 1479 Returns the codepoint (unsigned integer value) of the character read, 1480 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1 1481 if lexing has reached the end of the input text. To non-destructively 1482 examine the next character, use L</lex_peek_unichar> instead. 1483 1484 If the next character is in (or extends into) the next chunk of input 1485 text, the next chunk will be read in. Normally the current chunk will be 1486 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> 1487 then the current chunk will not be discarded. 1488 1489 If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1490 is encountered, an exception is generated. 1491 1492 =cut 1493 */ 1494 1495 I32 1496 Perl_lex_read_unichar(pTHX_ U32 flags) 1497 { 1498 I32 c; 1499 if (flags & ~(LEX_KEEP_PREVIOUS)) 1500 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar"); 1501 c = lex_peek_unichar(flags); 1502 if (c != -1) { 1503 if (c == '\n') 1504 COPLINE_INC_WITH_HERELINES; 1505 if (UTF) 1506 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr); 1507 else 1508 ++(PL_parser->bufptr); 1509 } 1510 return c; 1511 } 1512 1513 /* 1514 =for apidoc Amx|void|lex_read_space|U32 flags 1515 1516 Reads optional spaces, in Perl style, in the text currently being 1517 lexed. The spaces may include ordinary whitespace characters and 1518 Perl-style comments. C<#line> directives are processed if encountered. 1519 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points 1520 at a non-space character (or the end of the input text). 1521 1522 If spaces extend into the next chunk of input text, the next chunk will 1523 be read in. Normally the current chunk will be discarded at the same 1524 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current 1525 chunk will not be discarded. 1526 1527 =cut 1528 */ 1529 1530 #define LEX_NO_INCLINE 0x40000000 1531 #define LEX_NO_NEXT_CHUNK 0x80000000 1532 1533 void 1534 Perl_lex_read_space(pTHX_ U32 flags) 1535 { 1536 char *s, *bufend; 1537 const bool can_incline = !(flags & LEX_NO_INCLINE); 1538 bool need_incline = 0; 1539 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE)) 1540 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space"); 1541 #ifdef PERL_MAD 1542 if (PL_skipwhite) { 1543 sv_free(PL_skipwhite); 1544 PL_skipwhite = NULL; 1545 } 1546 if (PL_madskills) 1547 PL_skipwhite = newSVpvs(""); 1548 #endif /* PERL_MAD */ 1549 s = PL_parser->bufptr; 1550 bufend = PL_parser->bufend; 1551 while (1) { 1552 char c = *s; 1553 if (c == '#') { 1554 do { 1555 c = *++s; 1556 } while (!(c == '\n' || (c == 0 && s == bufend))); 1557 } else if (c == '\n') { 1558 s++; 1559 if (can_incline) { 1560 PL_parser->linestart = s; 1561 if (s == bufend) 1562 need_incline = 1; 1563 else 1564 incline(s); 1565 } 1566 } else if (isSPACE(c)) { 1567 s++; 1568 } else if (c == 0 && s == bufend) { 1569 bool got_more; 1570 line_t l; 1571 #ifdef PERL_MAD 1572 if (PL_madskills) 1573 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr); 1574 #endif /* PERL_MAD */ 1575 if (flags & LEX_NO_NEXT_CHUNK) 1576 break; 1577 PL_parser->bufptr = s; 1578 l = CopLINE(PL_curcop); 1579 CopLINE(PL_curcop) += PL_parser->herelines + 1; 1580 got_more = lex_next_chunk(flags); 1581 CopLINE_set(PL_curcop, l); 1582 s = PL_parser->bufptr; 1583 bufend = PL_parser->bufend; 1584 if (!got_more) 1585 break; 1586 if (can_incline && need_incline && PL_parser->rsfp) { 1587 incline(s); 1588 need_incline = 0; 1589 } 1590 } else { 1591 break; 1592 } 1593 } 1594 #ifdef PERL_MAD 1595 if (PL_madskills) 1596 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr); 1597 #endif /* PERL_MAD */ 1598 PL_parser->bufptr = s; 1599 } 1600 1601 /* 1602 1603 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn 1604 1605 This function performs syntax checking on a prototype, C<proto>. 1606 If C<warn> is true, any illegal characters or mismatched brackets 1607 will trigger illegalproto warnings, declaring that they were 1608 detected in the prototype for C<name>. 1609 1610 The return value is C<true> if this is a valid prototype, and 1611 C<false> if it is not, regardless of whether C<warn> was C<true> or 1612 C<false>. 1613 1614 Note that C<NULL> is a valid C<proto> and will always return C<true>. 1615 1616 =cut 1617 1618 */ 1619 1620 bool 1621 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn) 1622 { 1623 STRLEN len, origlen; 1624 char *p = proto ? SvPV(proto, len) : NULL; 1625 bool bad_proto = FALSE; 1626 bool in_brackets = FALSE; 1627 bool after_slash = FALSE; 1628 char greedy_proto = ' '; 1629 bool proto_after_greedy_proto = FALSE; 1630 bool must_be_last = FALSE; 1631 bool underscore = FALSE; 1632 bool bad_proto_after_underscore = FALSE; 1633 1634 PERL_ARGS_ASSERT_VALIDATE_PROTO; 1635 1636 if (!proto) 1637 return TRUE; 1638 1639 origlen = len; 1640 for (; len--; p++) { 1641 if (!isSPACE(*p)) { 1642 if (must_be_last) 1643 proto_after_greedy_proto = TRUE; 1644 if (underscore) { 1645 if (!strchr(";@%", *p)) 1646 bad_proto_after_underscore = TRUE; 1647 underscore = FALSE; 1648 } 1649 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') { 1650 bad_proto = TRUE; 1651 } 1652 else { 1653 if (*p == '[') 1654 in_brackets = TRUE; 1655 else if (*p == ']') 1656 in_brackets = FALSE; 1657 else if ((*p == '@' || *p == '%') && 1658 !after_slash && 1659 !in_brackets ) { 1660 must_be_last = TRUE; 1661 greedy_proto = *p; 1662 } 1663 else if (*p == '_') 1664 underscore = TRUE; 1665 } 1666 if (*p == '\\') 1667 after_slash = TRUE; 1668 else 1669 after_slash = FALSE; 1670 } 1671 } 1672 1673 if (warn) { 1674 SV *tmpsv = newSVpvs_flags("", SVs_TEMP); 1675 p -= origlen; 1676 p = SvUTF8(proto) 1677 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8), 1678 origlen, UNI_DISPLAY_ISPRINT) 1679 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII); 1680 1681 if (proto_after_greedy_proto) 1682 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1683 "Prototype after '%c' for %"SVf" : %s", 1684 greedy_proto, SVfARG(name), p); 1685 if (in_brackets) 1686 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1687 "Missing ']' in prototype for %"SVf" : %s", 1688 SVfARG(name), p); 1689 if (bad_proto) 1690 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1691 "Illegal character in prototype for %"SVf" : %s", 1692 SVfARG(name), p); 1693 if (bad_proto_after_underscore) 1694 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1695 "Illegal character after '_' in prototype for %"SVf" : %s", 1696 SVfARG(name), p); 1697 } 1698 1699 return (! (proto_after_greedy_proto || bad_proto) ); 1700 } 1701 1702 /* 1703 * S_incline 1704 * This subroutine has nothing to do with tilting, whether at windmills 1705 * or pinball tables. Its name is short for "increment line". It 1706 * increments the current line number in CopLINE(PL_curcop) and checks 1707 * to see whether the line starts with a comment of the form 1708 * # line 500 "foo.pm" 1709 * If so, it sets the current line number and file to the values in the comment. 1710 */ 1711 1712 STATIC void 1713 S_incline(pTHX_ const char *s) 1714 { 1715 dVAR; 1716 const char *t; 1717 const char *n; 1718 const char *e; 1719 line_t line_num; 1720 1721 PERL_ARGS_ASSERT_INCLINE; 1722 1723 COPLINE_INC_WITH_HERELINES; 1724 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL 1725 && s+1 == PL_bufend && *s == ';') { 1726 /* fake newline in string eval */ 1727 CopLINE_dec(PL_curcop); 1728 return; 1729 } 1730 if (*s++ != '#') 1731 return; 1732 while (SPACE_OR_TAB(*s)) 1733 s++; 1734 if (strnEQ(s, "line", 4)) 1735 s += 4; 1736 else 1737 return; 1738 if (SPACE_OR_TAB(*s)) 1739 s++; 1740 else 1741 return; 1742 while (SPACE_OR_TAB(*s)) 1743 s++; 1744 if (!isDIGIT(*s)) 1745 return; 1746 1747 n = s; 1748 while (isDIGIT(*s)) 1749 s++; 1750 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0') 1751 return; 1752 while (SPACE_OR_TAB(*s)) 1753 s++; 1754 if (*s == '"' && (t = strchr(s+1, '"'))) { 1755 s++; 1756 e = t + 1; 1757 } 1758 else { 1759 t = s; 1760 while (!isSPACE(*t)) 1761 t++; 1762 e = t; 1763 } 1764 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f') 1765 e++; 1766 if (*e != '\n' && *e != '\0') 1767 return; /* false alarm */ 1768 1769 line_num = atoi(n)-1; 1770 1771 if (t - s > 0) { 1772 const STRLEN len = t - s; 1773 1774 if (!PL_rsfp && !PL_parser->filtered) { 1775 /* must copy *{"::_<(eval N)[oldfilename:L]"} 1776 * to *{"::_<newfilename"} */ 1777 /* However, the long form of evals is only turned on by the 1778 debugger - usually they're "(eval %lu)" */ 1779 GV * const cfgv = CopFILEGV(PL_curcop); 1780 if (cfgv) { 1781 char smallbuf[128]; 1782 STRLEN tmplen2 = len; 1783 char *tmpbuf2; 1784 GV *gv2; 1785 1786 if (tmplen2 + 2 <= sizeof smallbuf) 1787 tmpbuf2 = smallbuf; 1788 else 1789 Newx(tmpbuf2, tmplen2 + 2, char); 1790 1791 tmpbuf2[0] = '_'; 1792 tmpbuf2[1] = '<'; 1793 1794 memcpy(tmpbuf2 + 2, s, tmplen2); 1795 tmplen2 += 2; 1796 1797 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE); 1798 if (!isGV(gv2)) { 1799 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE); 1800 /* adjust ${"::_<newfilename"} to store the new file name */ 1801 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2); 1802 /* The line number may differ. If that is the case, 1803 alias the saved lines that are in the array. 1804 Otherwise alias the whole array. */ 1805 if (CopLINE(PL_curcop) == line_num) { 1806 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv))); 1807 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv))); 1808 } 1809 else if (GvAV(cfgv)) { 1810 AV * const av = GvAV(cfgv); 1811 const I32 start = CopLINE(PL_curcop)+1; 1812 I32 items = AvFILLp(av) - start; 1813 if (items > 0) { 1814 AV * const av2 = GvAVn(gv2); 1815 SV **svp = AvARRAY(av) + start; 1816 I32 l = (I32)line_num+1; 1817 while (items--) 1818 av_store(av2, l++, SvREFCNT_inc(*svp++)); 1819 } 1820 } 1821 } 1822 1823 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2); 1824 } 1825 } 1826 CopFILE_free(PL_curcop); 1827 CopFILE_setn(PL_curcop, s, len); 1828 } 1829 CopLINE_set(PL_curcop, line_num); 1830 } 1831 1832 #define skipspace(s) skipspace_flags(s, 0) 1833 1834 #ifdef PERL_MAD 1835 /* skip space before PL_thistoken */ 1836 1837 STATIC char * 1838 S_skipspace0(pTHX_ char *s) 1839 { 1840 PERL_ARGS_ASSERT_SKIPSPACE0; 1841 1842 s = skipspace(s); 1843 if (!PL_madskills) 1844 return s; 1845 if (PL_skipwhite) { 1846 if (!PL_thiswhite) 1847 PL_thiswhite = newSVpvs(""); 1848 sv_catsv(PL_thiswhite, PL_skipwhite); 1849 sv_free(PL_skipwhite); 1850 PL_skipwhite = 0; 1851 } 1852 PL_realtokenstart = s - SvPVX(PL_linestr); 1853 return s; 1854 } 1855 1856 /* skip space after PL_thistoken */ 1857 1858 STATIC char * 1859 S_skipspace1(pTHX_ char *s) 1860 { 1861 const char *start = s; 1862 I32 startoff = start - SvPVX(PL_linestr); 1863 1864 PERL_ARGS_ASSERT_SKIPSPACE1; 1865 1866 s = skipspace(s); 1867 if (!PL_madskills) 1868 return s; 1869 start = SvPVX(PL_linestr) + startoff; 1870 if (!PL_thistoken && PL_realtokenstart >= 0) { 1871 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart; 1872 PL_thistoken = newSVpvn(tstart, start - tstart); 1873 } 1874 PL_realtokenstart = -1; 1875 if (PL_skipwhite) { 1876 if (!PL_nextwhite) 1877 PL_nextwhite = newSVpvs(""); 1878 sv_catsv(PL_nextwhite, PL_skipwhite); 1879 sv_free(PL_skipwhite); 1880 PL_skipwhite = 0; 1881 } 1882 return s; 1883 } 1884 1885 STATIC char * 1886 S_skipspace2(pTHX_ char *s, SV **svp) 1887 { 1888 char *start; 1889 const I32 startoff = s - SvPVX(PL_linestr); 1890 1891 PERL_ARGS_ASSERT_SKIPSPACE2; 1892 1893 s = skipspace(s); 1894 if (!PL_madskills || !svp) 1895 return s; 1896 start = SvPVX(PL_linestr) + startoff; 1897 if (!PL_thistoken && PL_realtokenstart >= 0) { 1898 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart; 1899 PL_thistoken = newSVpvn(tstart, start - tstart); 1900 PL_realtokenstart = -1; 1901 } 1902 if (PL_skipwhite) { 1903 if (!*svp) 1904 *svp = newSVpvs(""); 1905 sv_setsv(*svp, PL_skipwhite); 1906 sv_free(PL_skipwhite); 1907 PL_skipwhite = 0; 1908 } 1909 1910 return s; 1911 } 1912 #endif 1913 1914 STATIC void 1915 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) 1916 { 1917 AV *av = CopFILEAVx(PL_curcop); 1918 if (av) { 1919 SV * sv; 1920 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG); 1921 else { 1922 sv = *av_fetch(av, 0, 1); 1923 SvUPGRADE(sv, SVt_PVMG); 1924 } 1925 if (!SvPOK(sv)) sv_setpvs(sv,""); 1926 if (orig_sv) 1927 sv_catsv(sv, orig_sv); 1928 else 1929 sv_catpvn(sv, buf, len); 1930 if (!SvIOK(sv)) { 1931 (void)SvIOK_on(sv); 1932 SvIV_set(sv, 0); 1933 } 1934 if (PL_parser->preambling == NOLINE) 1935 av_store(av, CopLINE(PL_curcop), sv); 1936 } 1937 } 1938 1939 /* 1940 * S_skipspace 1941 * Called to gobble the appropriate amount and type of whitespace. 1942 * Skips comments as well. 1943 */ 1944 1945 STATIC char * 1946 S_skipspace_flags(pTHX_ char *s, U32 flags) 1947 { 1948 #ifdef PERL_MAD 1949 char *start = s; 1950 #endif /* PERL_MAD */ 1951 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS; 1952 #ifdef PERL_MAD 1953 if (PL_skipwhite) { 1954 sv_free(PL_skipwhite); 1955 PL_skipwhite = NULL; 1956 } 1957 #endif /* PERL_MAD */ 1958 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 1959 while (s < PL_bufend && SPACE_OR_TAB(*s)) 1960 s++; 1961 } else { 1962 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr); 1963 PL_bufptr = s; 1964 lex_read_space(flags | LEX_KEEP_PREVIOUS | 1965 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ? 1966 LEX_NO_NEXT_CHUNK : 0)); 1967 s = PL_bufptr; 1968 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos; 1969 if (PL_linestart > PL_bufptr) 1970 PL_bufptr = PL_linestart; 1971 return s; 1972 } 1973 #ifdef PERL_MAD 1974 if (PL_madskills) 1975 PL_skipwhite = newSVpvn(start, s-start); 1976 #endif /* PERL_MAD */ 1977 return s; 1978 } 1979 1980 /* 1981 * S_check_uni 1982 * Check the unary operators to ensure there's no ambiguity in how they're 1983 * used. An ambiguous piece of code would be: 1984 * rand + 5 1985 * This doesn't mean rand() + 5. Because rand() is a unary operator, 1986 * the +5 is its argument. 1987 */ 1988 1989 STATIC void 1990 S_check_uni(pTHX) 1991 { 1992 dVAR; 1993 const char *s; 1994 const char *t; 1995 1996 if (PL_oldoldbufptr != PL_last_uni) 1997 return; 1998 while (isSPACE(*PL_last_uni)) 1999 PL_last_uni++; 2000 s = PL_last_uni; 2001 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-') 2002 s += UTF ? UTF8SKIP(s) : 1; 2003 if ((t = strchr(s, '(')) && t < PL_bufptr) 2004 return; 2005 2006 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 2007 "Warning: Use of \"%.*s\" without parentheses is ambiguous", 2008 (int)(s - PL_last_uni), PL_last_uni); 2009 } 2010 2011 /* 2012 * LOP : macro to build a list operator. Its behaviour has been replaced 2013 * with a subroutine, S_lop() for which LOP is just another name. 2014 */ 2015 2016 #define LOP(f,x) return lop(f,x,s) 2017 2018 /* 2019 * S_lop 2020 * Build a list operator (or something that might be one). The rules: 2021 * - if we have a next token, then it's a list operator [why?] 2022 * - if the next thing is an opening paren, then it's a function 2023 * - else it's a list operator 2024 */ 2025 2026 STATIC I32 2027 S_lop(pTHX_ I32 f, int x, char *s) 2028 { 2029 dVAR; 2030 2031 PERL_ARGS_ASSERT_LOP; 2032 2033 pl_yylval.ival = f; 2034 CLINE; 2035 PL_expect = x; 2036 PL_bufptr = s; 2037 PL_last_lop = PL_oldbufptr; 2038 PL_last_lop_op = (OPCODE)f; 2039 #ifdef PERL_MAD 2040 if (PL_lasttoke) 2041 goto lstop; 2042 #else 2043 if (PL_nexttoke) 2044 goto lstop; 2045 #endif 2046 if (*s == '(') 2047 return REPORT(FUNC); 2048 s = PEEKSPACE(s); 2049 if (*s == '(') 2050 return REPORT(FUNC); 2051 else { 2052 lstop: 2053 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 2054 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 2055 return REPORT(LSTOP); 2056 } 2057 } 2058 2059 #ifdef PERL_MAD 2060 /* 2061 * S_start_force 2062 * Sets up for an eventual force_next(). start_force(0) basically does 2063 * an unshift, while start_force(-1) does a push. yylex removes items 2064 * on the "pop" end. 2065 */ 2066 2067 STATIC void 2068 S_start_force(pTHX_ int where) 2069 { 2070 int i; 2071 2072 if (where < 0) /* so people can duplicate start_force(PL_curforce) */ 2073 where = PL_lasttoke; 2074 assert(PL_curforce < 0 || PL_curforce == where); 2075 if (PL_curforce != where) { 2076 for (i = PL_lasttoke; i > where; --i) { 2077 PL_nexttoke[i] = PL_nexttoke[i-1]; 2078 } 2079 PL_lasttoke++; 2080 } 2081 if (PL_curforce < 0) /* in case of duplicate start_force() */ 2082 Zero(&PL_nexttoke[where], 1, NEXTTOKE); 2083 PL_curforce = where; 2084 if (PL_nextwhite) { 2085 if (PL_madskills) 2086 curmad('^', newSVpvs("")); 2087 CURMAD('_', PL_nextwhite); 2088 } 2089 } 2090 2091 STATIC void 2092 S_curmad(pTHX_ char slot, SV *sv) 2093 { 2094 MADPROP **where; 2095 2096 if (!sv) 2097 return; 2098 if (PL_curforce < 0) 2099 where = &PL_thismad; 2100 else 2101 where = &PL_nexttoke[PL_curforce].next_mad; 2102 2103 if (PL_faketokens) 2104 sv_setpvs(sv, ""); 2105 else { 2106 if (!IN_BYTES) { 2107 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) 2108 SvUTF8_on(sv); 2109 else if (PL_encoding) { 2110 sv_recode_to_utf8(sv, PL_encoding); 2111 } 2112 } 2113 } 2114 2115 /* keep a slot open for the head of the list? */ 2116 if (slot != '_' && *where && (*where)->mad_key == '^') { 2117 (*where)->mad_key = slot; 2118 sv_free(MUTABLE_SV(((*where)->mad_val))); 2119 (*where)->mad_val = (void*)sv; 2120 } 2121 else 2122 addmad(newMADsv(slot, sv), where, 0); 2123 } 2124 #else 2125 # define start_force(where) NOOP 2126 # define curmad(slot, sv) NOOP 2127 #endif 2128 2129 /* 2130 * S_force_next 2131 * When the lexer realizes it knows the next token (for instance, 2132 * it is reordering tokens for the parser) then it can call S_force_next 2133 * to know what token to return the next time the lexer is called. Caller 2134 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD), 2135 * and possibly PL_expect to ensure the lexer handles the token correctly. 2136 */ 2137 2138 STATIC void 2139 S_force_next(pTHX_ I32 type) 2140 { 2141 dVAR; 2142 #ifdef DEBUGGING 2143 if (DEBUG_T_TEST) { 2144 PerlIO_printf(Perl_debug_log, "### forced token:\n"); 2145 tokereport(type, &NEXTVAL_NEXTTOKE); 2146 } 2147 #endif 2148 #ifdef PERL_MAD 2149 if (PL_curforce < 0) 2150 start_force(PL_lasttoke); 2151 PL_nexttoke[PL_curforce].next_type = type; 2152 if (PL_lex_state != LEX_KNOWNEXT) 2153 PL_lex_defer = PL_lex_state; 2154 PL_lex_state = LEX_KNOWNEXT; 2155 PL_lex_expect = PL_expect; 2156 PL_curforce = -1; 2157 #else 2158 PL_nexttype[PL_nexttoke] = type; 2159 PL_nexttoke++; 2160 if (PL_lex_state != LEX_KNOWNEXT) { 2161 PL_lex_defer = PL_lex_state; 2162 PL_lex_expect = PL_expect; 2163 PL_lex_state = LEX_KNOWNEXT; 2164 } 2165 #endif 2166 } 2167 2168 /* 2169 * S_postderef 2170 * 2171 * This subroutine handles postfix deref syntax after the arrow has already 2172 * been emitted. @* $* etc. are emitted as two separate token right here. 2173 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits 2174 * only the first, leaving yylex to find the next. 2175 */ 2176 2177 static int 2178 S_postderef(pTHX_ int const funny, char const next) 2179 { 2180 dVAR; 2181 assert(funny == DOLSHARP || strchr("$@%&*", funny)); 2182 assert(strchr("*[{", next)); 2183 if (next == '*') { 2184 PL_expect = XOPERATOR; 2185 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 2186 assert('@' == funny || '$' == funny || DOLSHARP == funny); 2187 PL_lex_state = LEX_INTERPEND; 2188 start_force(PL_curforce); 2189 force_next(POSTJOIN); 2190 } 2191 start_force(PL_curforce); 2192 force_next(next); 2193 PL_bufptr+=2; 2194 } 2195 else { 2196 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL 2197 && !PL_lex_brackets) 2198 PL_lex_dojoin = 2; 2199 PL_expect = XOPERATOR; 2200 PL_bufptr++; 2201 } 2202 return funny; 2203 } 2204 2205 void 2206 Perl_yyunlex(pTHX) 2207 { 2208 int yyc = PL_parser->yychar; 2209 if (yyc != YYEMPTY) { 2210 if (yyc) { 2211 start_force(-1); 2212 NEXTVAL_NEXTTOKE = PL_parser->yylval; 2213 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) { 2214 PL_lex_allbrackets--; 2215 PL_lex_brackets--; 2216 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16); 2217 } else if (yyc == '('/*)*/) { 2218 PL_lex_allbrackets--; 2219 yyc |= (2<<24); 2220 } 2221 force_next(yyc); 2222 } 2223 PL_parser->yychar = YYEMPTY; 2224 } 2225 } 2226 2227 STATIC SV * 2228 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) 2229 { 2230 dVAR; 2231 SV * const sv = newSVpvn_utf8(start, len, 2232 !IN_BYTES 2233 && UTF 2234 && !is_ascii_string((const U8*)start, len) 2235 && is_utf8_string((const U8*)start, len)); 2236 return sv; 2237 } 2238 2239 /* 2240 * S_force_word 2241 * When the lexer knows the next thing is a word (for instance, it has 2242 * just seen -> and it knows that the next char is a word char, then 2243 * it calls S_force_word to stick the next word into the PL_nexttoke/val 2244 * lookahead. 2245 * 2246 * Arguments: 2247 * char *start : buffer position (must be within PL_linestr) 2248 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD) 2249 * int check_keyword : if true, Perl checks to make sure the word isn't 2250 * a keyword (do this if the word is a label, e.g. goto FOO) 2251 * int allow_pack : if true, : characters will also be allowed (require, 2252 * use, etc. do this) 2253 * int allow_initial_tick : used by the "sub" lexer only. 2254 */ 2255 2256 STATIC char * 2257 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) 2258 { 2259 dVAR; 2260 char *s; 2261 STRLEN len; 2262 2263 PERL_ARGS_ASSERT_FORCE_WORD; 2264 2265 start = SKIPSPACE1(start); 2266 s = start; 2267 if (isIDFIRST_lazy_if(s,UTF) || 2268 (allow_pack && *s == ':') ) 2269 { 2270 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len); 2271 if (check_keyword) { 2272 char *s2 = PL_tokenbuf; 2273 STRLEN len2 = len; 2274 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6)) 2275 s2 += 6, len2 -= 6; 2276 if (keyword(s2, len2, 0)) 2277 return start; 2278 } 2279 start_force(PL_curforce); 2280 if (PL_madskills) 2281 curmad('X', newSVpvn(start,s-start)); 2282 if (token == METHOD) { 2283 s = SKIPSPACE1(s); 2284 if (*s == '(') 2285 PL_expect = XTERM; 2286 else { 2287 PL_expect = XOPERATOR; 2288 } 2289 } 2290 if (PL_madskills) 2291 curmad('g', newSVpvs( "forced" )); 2292 NEXTVAL_NEXTTOKE.opval 2293 = (OP*)newSVOP(OP_CONST,0, 2294 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 2295 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 2296 force_next(token); 2297 } 2298 return s; 2299 } 2300 2301 /* 2302 * S_force_ident 2303 * Called when the lexer wants $foo *foo &foo etc, but the program 2304 * text only contains the "foo" portion. The first argument is a pointer 2305 * to the "foo", and the second argument is the type symbol to prefix. 2306 * Forces the next token to be a "WORD". 2307 * Creates the symbol if it didn't already exist (via gv_fetchpv()). 2308 */ 2309 2310 STATIC void 2311 S_force_ident(pTHX_ const char *s, int kind) 2312 { 2313 dVAR; 2314 2315 PERL_ARGS_ASSERT_FORCE_IDENT; 2316 2317 if (s[0]) { 2318 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */ 2319 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len, 2320 UTF ? SVf_UTF8 : 0)); 2321 start_force(PL_curforce); 2322 NEXTVAL_NEXTTOKE.opval = o; 2323 force_next(WORD); 2324 if (kind) { 2325 o->op_private = OPpCONST_ENTERED; 2326 /* XXX see note in pp_entereval() for why we forgo typo 2327 warnings if the symbol must be introduced in an eval. 2328 GSAR 96-10-12 */ 2329 gv_fetchpvn_flags(s, len, 2330 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) 2331 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), 2332 kind == '$' ? SVt_PV : 2333 kind == '@' ? SVt_PVAV : 2334 kind == '%' ? SVt_PVHV : 2335 SVt_PVGV 2336 ); 2337 } 2338 } 2339 } 2340 2341 static void 2342 S_force_ident_maybe_lex(pTHX_ char pit) 2343 { 2344 start_force(PL_curforce); 2345 NEXTVAL_NEXTTOKE.ival = pit; 2346 force_next('p'); 2347 } 2348 2349 NV 2350 Perl_str_to_version(pTHX_ SV *sv) 2351 { 2352 NV retval = 0.0; 2353 NV nshift = 1.0; 2354 STRLEN len; 2355 const char *start = SvPV_const(sv,len); 2356 const char * const end = start + len; 2357 const bool utf = SvUTF8(sv) ? TRUE : FALSE; 2358 2359 PERL_ARGS_ASSERT_STR_TO_VERSION; 2360 2361 while (start < end) { 2362 STRLEN skip; 2363 UV n; 2364 if (utf) 2365 n = utf8n_to_uvchr((U8*)start, len, &skip, 0); 2366 else { 2367 n = *(U8*)start; 2368 skip = 1; 2369 } 2370 retval += ((NV)n)/nshift; 2371 start += skip; 2372 nshift *= 1000; 2373 } 2374 return retval; 2375 } 2376 2377 /* 2378 * S_force_version 2379 * Forces the next token to be a version number. 2380 * If the next token appears to be an invalid version number, (e.g. "v2b"), 2381 * and if "guessing" is TRUE, then no new token is created (and the caller 2382 * must use an alternative parsing method). 2383 */ 2384 2385 STATIC char * 2386 S_force_version(pTHX_ char *s, int guessing) 2387 { 2388 dVAR; 2389 OP *version = NULL; 2390 char *d; 2391 #ifdef PERL_MAD 2392 I32 startoff = s - SvPVX(PL_linestr); 2393 #endif 2394 2395 PERL_ARGS_ASSERT_FORCE_VERSION; 2396 2397 s = SKIPSPACE1(s); 2398 2399 d = s; 2400 if (*d == 'v') 2401 d++; 2402 if (isDIGIT(*d)) { 2403 while (isDIGIT(*d) || *d == '_' || *d == '.') 2404 d++; 2405 #ifdef PERL_MAD 2406 if (PL_madskills) { 2407 start_force(PL_curforce); 2408 curmad('X', newSVpvn(s,d-s)); 2409 } 2410 #endif 2411 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) { 2412 SV *ver; 2413 s = scan_num(s, &pl_yylval); 2414 version = pl_yylval.opval; 2415 ver = cSVOPx(version)->op_sv; 2416 if (SvPOK(ver) && !SvNIOK(ver)) { 2417 SvUPGRADE(ver, SVt_PVNV); 2418 SvNV_set(ver, str_to_version(ver)); 2419 SvNOK_on(ver); /* hint that it is a version */ 2420 } 2421 } 2422 else if (guessing) { 2423 #ifdef PERL_MAD 2424 if (PL_madskills) { 2425 sv_free(PL_nextwhite); /* let next token collect whitespace */ 2426 PL_nextwhite = 0; 2427 s = SvPVX(PL_linestr) + startoff; 2428 } 2429 #endif 2430 return s; 2431 } 2432 } 2433 2434 #ifdef PERL_MAD 2435 if (PL_madskills && !version) { 2436 sv_free(PL_nextwhite); /* let next token collect whitespace */ 2437 PL_nextwhite = 0; 2438 s = SvPVX(PL_linestr) + startoff; 2439 } 2440 #endif 2441 /* NOTE: The parser sees the package name and the VERSION swapped */ 2442 start_force(PL_curforce); 2443 NEXTVAL_NEXTTOKE.opval = version; 2444 force_next(WORD); 2445 2446 return s; 2447 } 2448 2449 /* 2450 * S_force_strict_version 2451 * Forces the next token to be a version number using strict syntax rules. 2452 */ 2453 2454 STATIC char * 2455 S_force_strict_version(pTHX_ char *s) 2456 { 2457 dVAR; 2458 OP *version = NULL; 2459 #ifdef PERL_MAD 2460 I32 startoff = s - SvPVX(PL_linestr); 2461 #endif 2462 const char *errstr = NULL; 2463 2464 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION; 2465 2466 while (isSPACE(*s)) /* leading whitespace */ 2467 s++; 2468 2469 if (is_STRICT_VERSION(s,&errstr)) { 2470 SV *ver = newSV(0); 2471 s = (char *)scan_version(s, ver, 0); 2472 version = newSVOP(OP_CONST, 0, ver); 2473 } 2474 else if ( (*s != ';' && *s != '{' && *s != '}' ) && 2475 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' ))) 2476 { 2477 PL_bufptr = s; 2478 if (errstr) 2479 yyerror(errstr); /* version required */ 2480 return s; 2481 } 2482 2483 #ifdef PERL_MAD 2484 if (PL_madskills && !version) { 2485 sv_free(PL_nextwhite); /* let next token collect whitespace */ 2486 PL_nextwhite = 0; 2487 s = SvPVX(PL_linestr) + startoff; 2488 } 2489 #endif 2490 /* NOTE: The parser sees the package name and the VERSION swapped */ 2491 start_force(PL_curforce); 2492 NEXTVAL_NEXTTOKE.opval = version; 2493 force_next(WORD); 2494 2495 return s; 2496 } 2497 2498 /* 2499 * S_tokeq 2500 * Tokenize a quoted string passed in as an SV. It finds the next 2501 * chunk, up to end of string or a backslash. It may make a new 2502 * SV containing that chunk (if HINT_NEW_STRING is on). It also 2503 * turns \\ into \. 2504 */ 2505 2506 STATIC SV * 2507 S_tokeq(pTHX_ SV *sv) 2508 { 2509 dVAR; 2510 char *s; 2511 char *send; 2512 char *d; 2513 SV *pv = sv; 2514 2515 PERL_ARGS_ASSERT_TOKEQ; 2516 2517 assert (SvPOK(sv)); 2518 assert (SvLEN(sv)); 2519 assert (!SvIsCOW(sv)); 2520 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */ 2521 goto finish; 2522 s = SvPVX(sv); 2523 send = SvEND(sv); 2524 /* This is relying on the SV being "well formed" with a trailing '\0' */ 2525 while (s < send && !(*s == '\\' && s[1] == '\\')) 2526 s++; 2527 if (s == send) 2528 goto finish; 2529 d = s; 2530 if ( PL_hints & HINT_NEW_STRING ) { 2531 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv), 2532 SVs_TEMP | SvUTF8(sv)); 2533 } 2534 while (s < send) { 2535 if (*s == '\\') { 2536 if (s + 1 < send && (s[1] == '\\')) 2537 s++; /* all that, just for this */ 2538 } 2539 *d++ = *s++; 2540 } 2541 *d = '\0'; 2542 SvCUR_set(sv, d - SvPVX_const(sv)); 2543 finish: 2544 if ( PL_hints & HINT_NEW_STRING ) 2545 return new_constant(NULL, 0, "q", sv, pv, "q", 1); 2546 return sv; 2547 } 2548 2549 /* 2550 * Now come three functions related to double-quote context, 2551 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when 2552 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They 2553 * interact with PL_lex_state, and create fake ( ... ) argument lists 2554 * to handle functions and concatenation. 2555 * For example, 2556 * "foo\lbar" 2557 * is tokenised as 2558 * stringify ( const[foo] concat lcfirst ( const[bar] ) ) 2559 */ 2560 2561 /* 2562 * S_sublex_start 2563 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST). 2564 * 2565 * Pattern matching will set PL_lex_op to the pattern-matching op to 2566 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise). 2567 * 2568 * OP_CONST and OP_READLINE are easy--just make the new op and return. 2569 * 2570 * Everything else becomes a FUNC. 2571 * 2572 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we 2573 * had an OP_CONST or OP_READLINE). This just sets us up for a 2574 * call to S_sublex_push(). 2575 */ 2576 2577 STATIC I32 2578 S_sublex_start(pTHX) 2579 { 2580 dVAR; 2581 const I32 op_type = pl_yylval.ival; 2582 2583 if (op_type == OP_NULL) { 2584 pl_yylval.opval = PL_lex_op; 2585 PL_lex_op = NULL; 2586 return THING; 2587 } 2588 if (op_type == OP_CONST) { 2589 SV *sv = tokeq(PL_lex_stuff); 2590 2591 if (SvTYPE(sv) == SVt_PVIV) { 2592 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */ 2593 STRLEN len; 2594 const char * const p = SvPV_const(sv, len); 2595 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv)); 2596 SvREFCNT_dec(sv); 2597 sv = nsv; 2598 } 2599 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv); 2600 PL_lex_stuff = NULL; 2601 return THING; 2602 } 2603 2604 PL_sublex_info.super_state = PL_lex_state; 2605 PL_sublex_info.sub_inwhat = (U16)op_type; 2606 PL_sublex_info.sub_op = PL_lex_op; 2607 PL_lex_state = LEX_INTERPPUSH; 2608 2609 PL_expect = XTERM; 2610 if (PL_lex_op) { 2611 pl_yylval.opval = PL_lex_op; 2612 PL_lex_op = NULL; 2613 return PMFUNC; 2614 } 2615 else 2616 return FUNC; 2617 } 2618 2619 /* 2620 * S_sublex_push 2621 * Create a new scope to save the lexing state. The scope will be 2622 * ended in S_sublex_done. Returns a '(', starting the function arguments 2623 * to the uc, lc, etc. found before. 2624 * Sets PL_lex_state to LEX_INTERPCONCAT. 2625 */ 2626 2627 STATIC I32 2628 S_sublex_push(pTHX) 2629 { 2630 dVAR; 2631 LEXSHARED *shared; 2632 const bool is_heredoc = PL_multi_close == '<'; 2633 ENTER; 2634 2635 PL_lex_state = PL_sublex_info.super_state; 2636 SAVEI8(PL_lex_dojoin); 2637 SAVEI32(PL_lex_brackets); 2638 SAVEI32(PL_lex_allbrackets); 2639 SAVEI32(PL_lex_formbrack); 2640 SAVEI8(PL_lex_fakeeof); 2641 SAVEI32(PL_lex_casemods); 2642 SAVEI32(PL_lex_starts); 2643 SAVEI8(PL_lex_state); 2644 SAVESPTR(PL_lex_repl); 2645 SAVEVPTR(PL_lex_inpat); 2646 SAVEI16(PL_lex_inwhat); 2647 if (is_heredoc) 2648 { 2649 SAVECOPLINE(PL_curcop); 2650 SAVEI32(PL_multi_end); 2651 SAVEI32(PL_parser->herelines); 2652 PL_parser->herelines = 0; 2653 } 2654 SAVEI8(PL_multi_close); 2655 SAVEPPTR(PL_bufptr); 2656 SAVEPPTR(PL_bufend); 2657 SAVEPPTR(PL_oldbufptr); 2658 SAVEPPTR(PL_oldoldbufptr); 2659 SAVEPPTR(PL_last_lop); 2660 SAVEPPTR(PL_last_uni); 2661 SAVEPPTR(PL_linestart); 2662 SAVESPTR(PL_linestr); 2663 SAVEGENERICPV(PL_lex_brackstack); 2664 SAVEGENERICPV(PL_lex_casestack); 2665 SAVEGENERICPV(PL_parser->lex_shared); 2666 SAVEBOOL(PL_parser->lex_re_reparsing); 2667 SAVEI32(PL_copline); 2668 2669 /* The here-doc parser needs to be able to peek into outer lexing 2670 scopes to find the body of the here-doc. So we put PL_linestr and 2671 PL_bufptr into lex_shared, to ‘share’ those values. 2672 */ 2673 PL_parser->lex_shared->ls_linestr = PL_linestr; 2674 PL_parser->lex_shared->ls_bufptr = PL_bufptr; 2675 2676 PL_linestr = PL_lex_stuff; 2677 PL_lex_repl = PL_sublex_info.repl; 2678 PL_lex_stuff = NULL; 2679 PL_sublex_info.repl = NULL; 2680 2681 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart 2682 = SvPVX(PL_linestr); 2683 PL_bufend += SvCUR(PL_linestr); 2684 PL_last_lop = PL_last_uni = NULL; 2685 SAVEFREESV(PL_linestr); 2686 if (PL_lex_repl) SAVEFREESV(PL_lex_repl); 2687 2688 PL_lex_dojoin = FALSE; 2689 PL_lex_brackets = PL_lex_formbrack = 0; 2690 PL_lex_allbrackets = 0; 2691 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2692 Newx(PL_lex_brackstack, 120, char); 2693 Newx(PL_lex_casestack, 12, char); 2694 PL_lex_casemods = 0; 2695 *PL_lex_casestack = '\0'; 2696 PL_lex_starts = 0; 2697 PL_lex_state = LEX_INTERPCONCAT; 2698 if (is_heredoc) 2699 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 2700 PL_copline = NOLINE; 2701 2702 Newxz(shared, 1, LEXSHARED); 2703 shared->ls_prev = PL_parser->lex_shared; 2704 PL_parser->lex_shared = shared; 2705 2706 PL_lex_inwhat = PL_sublex_info.sub_inwhat; 2707 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS; 2708 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST) 2709 PL_lex_inpat = PL_sublex_info.sub_op; 2710 else 2711 PL_lex_inpat = NULL; 2712 2713 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING); 2714 PL_in_eval &= ~EVAL_RE_REPARSING; 2715 2716 return '('; 2717 } 2718 2719 /* 2720 * S_sublex_done 2721 * Restores lexer state after a S_sublex_push. 2722 */ 2723 2724 STATIC I32 2725 S_sublex_done(pTHX) 2726 { 2727 dVAR; 2728 if (!PL_lex_starts++) { 2729 SV * const sv = newSVpvs(""); 2730 if (SvUTF8(PL_linestr)) 2731 SvUTF8_on(sv); 2732 PL_expect = XOPERATOR; 2733 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 2734 return THING; 2735 } 2736 2737 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */ 2738 PL_lex_state = LEX_INTERPCASEMOD; 2739 return yylex(); 2740 } 2741 2742 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */ 2743 assert(PL_lex_inwhat != OP_TRANSR); 2744 if (PL_lex_repl) { 2745 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS); 2746 PL_linestr = PL_lex_repl; 2747 PL_lex_inpat = 0; 2748 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); 2749 PL_bufend += SvCUR(PL_linestr); 2750 PL_last_lop = PL_last_uni = NULL; 2751 PL_lex_dojoin = FALSE; 2752 PL_lex_brackets = 0; 2753 PL_lex_allbrackets = 0; 2754 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2755 PL_lex_casemods = 0; 2756 *PL_lex_casestack = '\0'; 2757 PL_lex_starts = 0; 2758 if (SvEVALED(PL_lex_repl)) { 2759 PL_lex_state = LEX_INTERPNORMAL; 2760 PL_lex_starts++; 2761 /* we don't clear PL_lex_repl here, so that we can check later 2762 whether this is an evalled subst; that means we rely on the 2763 logic to ensure sublex_done() is called again only via the 2764 branch (in yylex()) that clears PL_lex_repl, else we'll loop */ 2765 } 2766 else { 2767 PL_lex_state = LEX_INTERPCONCAT; 2768 PL_lex_repl = NULL; 2769 } 2770 if (SvTYPE(PL_linestr) >= SVt_PVNV) { 2771 CopLINE(PL_curcop) += 2772 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow 2773 + PL_parser->herelines; 2774 PL_parser->herelines = 0; 2775 } 2776 return ','; 2777 } 2778 else { 2779 const line_t l = CopLINE(PL_curcop); 2780 #ifdef PERL_MAD 2781 if (PL_madskills) { 2782 if (PL_thiswhite) { 2783 if (!PL_endwhite) 2784 PL_endwhite = newSVpvs(""); 2785 sv_catsv(PL_endwhite, PL_thiswhite); 2786 PL_thiswhite = 0; 2787 } 2788 if (PL_thistoken) 2789 sv_setpvs(PL_thistoken,""); 2790 else 2791 PL_realtokenstart = -1; 2792 } 2793 #endif 2794 LEAVE; 2795 if (PL_multi_close == '<') 2796 PL_parser->herelines += l - PL_multi_end; 2797 PL_bufend = SvPVX(PL_linestr); 2798 PL_bufend += SvCUR(PL_linestr); 2799 PL_expect = XOPERATOR; 2800 return ')'; 2801 } 2802 } 2803 2804 PERL_STATIC_INLINE SV* 2805 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) 2806 { 2807 /* <s> points to first character of interior of \N{}, <e> to one beyond the 2808 * interior, hence to the "}". Finds what the name resolves to, returning 2809 * an SV* containing it; NULL if no valid one found */ 2810 2811 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0); 2812 2813 HV * table; 2814 SV **cvp; 2815 SV *cv; 2816 SV *rv; 2817 HV *stash; 2818 const U8* first_bad_char_loc; 2819 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */ 2820 2821 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; 2822 2823 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr, 2824 e - backslash_ptr, 2825 &first_bad_char_loc)) 2826 { 2827 /* If warnings are on, this will print a more detailed analysis of what 2828 * is wrong than the error message below */ 2829 utf8n_to_uvchr(first_bad_char_loc, 2830 e - ((char *) first_bad_char_loc), 2831 NULL, 0); 2832 2833 /* We deliberately don't try to print the malformed character, which 2834 * might not print very well; it also may be just the first of many 2835 * malformations, so don't print what comes after it */ 2836 yyerror(Perl_form(aTHX_ 2837 "Malformed UTF-8 character immediately after '%.*s'", 2838 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr)); 2839 return NULL; 2840 } 2841 2842 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr, 2843 /* include the <}> */ 2844 e - backslash_ptr + 1); 2845 if (! SvPOK(res)) { 2846 SvREFCNT_dec_NN(res); 2847 return NULL; 2848 } 2849 2850 /* See if the charnames handler is the Perl core's, and if so, we can skip 2851 * the validation needed for a user-supplied one, as Perl's does its own 2852 * validation. */ 2853 table = GvHV(PL_hintgv); /* ^H */ 2854 cvp = hv_fetchs(table, "charnames", FALSE); 2855 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv), 2856 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL)) 2857 { 2858 const char * const name = HvNAME(stash); 2859 if (HvNAMELEN(stash) == sizeof("_charnames")-1 2860 && strEQ(name, "_charnames")) { 2861 return res; 2862 } 2863 } 2864 2865 /* Here, it isn't Perl's charname handler. We can't rely on a 2866 * user-supplied handler to validate the input name. For non-ut8 input, 2867 * look to see that the first character is legal. Then loop through the 2868 * rest checking that each is a continuation */ 2869 2870 /* This code needs to be sync'ed with a regex in _charnames.pm which does 2871 * the same thing */ 2872 2873 if (! UTF) { 2874 if (! isALPHAU(*s)) { 2875 goto bad_charname; 2876 } 2877 s++; 2878 while (s < e) { 2879 if (! isCHARNAME_CONT(*s)) { 2880 goto bad_charname; 2881 } 2882 if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) { 2883 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 2884 "A sequence of multiple spaces in a charnames " 2885 "alias definition is deprecated"); 2886 } 2887 s++; 2888 } 2889 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) { 2890 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 2891 "Trailing white-space in a charnames alias " 2892 "definition is deprecated"); 2893 } 2894 } 2895 else { 2896 /* Similarly for utf8. For invariants can check directly; for other 2897 * Latin1, can calculate their code point and check; otherwise use a 2898 * swash */ 2899 if (UTF8_IS_INVARIANT(*s)) { 2900 if (! isALPHAU(*s)) { 2901 goto bad_charname; 2902 } 2903 s++; 2904 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2905 if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) { 2906 goto bad_charname; 2907 } 2908 s += 2; 2909 } 2910 else { 2911 if (! PL_utf8_charname_begin) { 2912 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; 2913 PL_utf8_charname_begin = _core_swash_init("utf8", 2914 "_Perl_Charname_Begin", 2915 &PL_sv_undef, 2916 1, 0, NULL, &flags); 2917 } 2918 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) { 2919 goto bad_charname; 2920 } 2921 s += UTF8SKIP(s); 2922 } 2923 2924 while (s < e) { 2925 if (UTF8_IS_INVARIANT(*s)) { 2926 if (! isCHARNAME_CONT(*s)) { 2927 goto bad_charname; 2928 } 2929 if (*s == ' ' && *(s-1) == ' ' 2930 && ckWARN_d(WARN_DEPRECATED)) { 2931 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 2932 "A sequence of multiple spaces in a charnam" 2933 "es alias definition is deprecated"); 2934 } 2935 s++; 2936 } 2937 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2938 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) 2939 { 2940 goto bad_charname; 2941 } 2942 s += 2; 2943 } 2944 else { 2945 if (! PL_utf8_charname_continue) { 2946 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; 2947 PL_utf8_charname_continue = _core_swash_init("utf8", 2948 "_Perl_Charname_Continue", 2949 &PL_sv_undef, 2950 1, 0, NULL, &flags); 2951 } 2952 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) { 2953 goto bad_charname; 2954 } 2955 s += UTF8SKIP(s); 2956 } 2957 } 2958 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) { 2959 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 2960 "Trailing white-space in a charnames alias " 2961 "definition is deprecated"); 2962 } 2963 } 2964 2965 if (SvUTF8(res)) { /* Don't accept malformed input */ 2966 const U8* first_bad_char_loc; 2967 STRLEN len; 2968 const char* const str = SvPV_const(res, len); 2969 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) { 2970 /* If warnings are on, this will print a more detailed analysis of 2971 * what is wrong than the error message below */ 2972 utf8n_to_uvchr(first_bad_char_loc, 2973 (char *) first_bad_char_loc - str, 2974 NULL, 0); 2975 2976 /* We deliberately don't try to print the malformed character, 2977 * which might not print very well; it also may be just the first 2978 * of many malformations, so don't print what comes after it */ 2979 yyerror_pv( 2980 Perl_form(aTHX_ 2981 "Malformed UTF-8 returned by %.*s immediately after '%.*s'", 2982 (int) (e - backslash_ptr + 1), backslash_ptr, 2983 (int) ((char *) first_bad_char_loc - str), str 2984 ), 2985 SVf_UTF8); 2986 return NULL; 2987 } 2988 } 2989 2990 return res; 2991 2992 bad_charname: { 2993 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1); 2994 2995 /* The final %.*s makes sure that should the trailing NUL be missing 2996 * that this print won't run off the end of the string */ 2997 yyerror_pv( 2998 Perl_form(aTHX_ 2999 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s", 3000 (int)(s - backslash_ptr + bad_char_size), backslash_ptr, 3001 (int)(e - s + bad_char_size), s + bad_char_size 3002 ), 3003 UTF ? SVf_UTF8 : 0); 3004 return NULL; 3005 } 3006 } 3007 3008 /* 3009 scan_const 3010 3011 Extracts the next constant part of a pattern, double-quoted string, 3012 or transliteration. This is terrifying code. 3013 3014 For example, in parsing the double-quoted string "ab\x63$d", it would 3015 stop at the '$' and return an OP_CONST containing 'abc'. 3016 3017 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's 3018 processing a pattern (PL_lex_inpat is true), a transliteration 3019 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string. 3020 3021 Returns a pointer to the character scanned up to. If this is 3022 advanced from the start pointer supplied (i.e. if anything was 3023 successfully parsed), will leave an OP_CONST for the substring scanned 3024 in pl_yylval. Caller must intuit reason for not parsing further 3025 by looking at the next characters herself. 3026 3027 In patterns: 3028 expand: 3029 \N{FOO} => \N{U+hex_for_character_FOO} 3030 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...}) 3031 3032 pass through: 3033 all other \-char, including \N and \N{ apart from \N{ABC} 3034 3035 stops on: 3036 @ and $ where it appears to be a var, but not for $ as tail anchor 3037 \l \L \u \U \Q \E 3038 (?{ or (??{ 3039 3040 3041 In transliterations: 3042 characters are VERY literal, except for - not at the start or end 3043 of the string, which indicates a range. If the range is in bytes, 3044 scan_const expands the range to the full set of intermediate 3045 characters. If the range is in utf8, the hyphen is replaced with 3046 a certain range mark which will be handled by pmtrans() in op.c. 3047 3048 In double-quoted strings: 3049 backslashes: 3050 double-quoted style: \r and \n 3051 constants: \x31, etc. 3052 deprecated backrefs: \1 (in substitution replacements) 3053 case and quoting: \U \Q \E 3054 stops on @ and $ 3055 3056 scan_const does *not* construct ops to handle interpolated strings. 3057 It stops processing as soon as it finds an embedded $ or @ variable 3058 and leaves it to the caller to work out what's going on. 3059 3060 embedded arrays (whether in pattern or not) could be: 3061 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-. 3062 3063 $ in double-quoted strings must be the symbol of an embedded scalar. 3064 3065 $ in pattern could be $foo or could be tail anchor. Assumption: 3066 it's a tail anchor if $ is the last thing in the string, or if it's 3067 followed by one of "()| \r\n\t" 3068 3069 \1 (backreferences) are turned into $1 in substitutions 3070 3071 The structure of the code is 3072 while (there's a character to process) { 3073 handle transliteration ranges 3074 skip regexp comments /(?#comment)/ and codes /(?{code})/ 3075 skip #-initiated comments in //x patterns 3076 check for embedded arrays 3077 check for embedded scalars 3078 if (backslash) { 3079 deprecate \1 in substitution replacements 3080 handle string-changing backslashes \l \U \Q \E, etc. 3081 switch (what was escaped) { 3082 handle \- in a transliteration (becomes a literal -) 3083 if a pattern and not \N{, go treat as regular character 3084 handle \132 (octal characters) 3085 handle \x15 and \x{1234} (hex characters) 3086 handle \N{name} (named characters, also \N{3,5} in a pattern) 3087 handle \cV (control characters) 3088 handle printf-style backslashes (\f, \r, \n, etc) 3089 } (end switch) 3090 continue 3091 } (end if backslash) 3092 handle regular character 3093 } (end while character to read) 3094 3095 */ 3096 3097 STATIC char * 3098 S_scan_const(pTHX_ char *start) 3099 { 3100 dVAR; 3101 char *send = PL_bufend; /* end of the constant */ 3102 SV *sv = newSV(send - start); /* sv for the constant. See 3103 note below on sizing. */ 3104 char *s = start; /* start of the constant */ 3105 char *d = SvPVX(sv); /* destination for copies */ 3106 bool dorange = FALSE; /* are we in a translit range? */ 3107 bool didrange = FALSE; /* did we just finish a range? */ 3108 bool in_charclass = FALSE; /* within /[...]/ */ 3109 bool has_utf8 = FALSE; /* Output constant is UTF8 */ 3110 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed 3111 to be UTF8? But, this can 3112 show as true when the source 3113 isn't utf8, as for example 3114 when it is entirely composed 3115 of hex constants */ 3116 SV *res; /* result from charnames */ 3117 3118 /* Note on sizing: The scanned constant is placed into sv, which is 3119 * initialized by newSV() assuming one byte of output for every byte of 3120 * input. This routine expects newSV() to allocate an extra byte for a 3121 * trailing NUL, which this routine will append if it gets to the end of 3122 * the input. There may be more bytes of input than output (eg., \N{LATIN 3123 * CAPITAL LETTER A}), or more output than input if the constant ends up 3124 * recoded to utf8, but each time a construct is found that might increase 3125 * the needed size, SvGROW() is called. Its size parameter each time is 3126 * based on the best guess estimate at the time, namely the length used so 3127 * far, plus the length the current construct will occupy, plus room for 3128 * the trailing NUL, plus one byte for every input byte still unscanned */ 3129 3130 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses 3131 before set */ 3132 #ifdef EBCDIC 3133 UV literal_endpoint = 0; 3134 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */ 3135 #endif 3136 3137 PERL_ARGS_ASSERT_SCAN_CONST; 3138 3139 assert(PL_lex_inwhat != OP_TRANSR); 3140 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { 3141 /* If we are doing a trans and we know we want UTF8 set expectation */ 3142 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF); 3143 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); 3144 } 3145 3146 /* Protect sv from errors and fatal warnings. */ 3147 ENTER_with_name("scan_const"); 3148 SAVEFREESV(sv); 3149 3150 while (s < send || dorange) { 3151 3152 /* get transliterations out of the way (they're most literal) */ 3153 if (PL_lex_inwhat == OP_TRANS) { 3154 /* expand a range A-Z to the full set of characters. AIE! */ 3155 if (dorange) { 3156 I32 i; /* current expanded character */ 3157 I32 min; /* first character in range */ 3158 I32 max; /* last character in range */ 3159 3160 #ifdef EBCDIC 3161 UV uvmax = 0; 3162 #endif 3163 3164 if (has_utf8 3165 #ifdef EBCDIC 3166 && !native_range 3167 #endif 3168 ) { 3169 char * const c = (char*)utf8_hop((U8*)d, -1); 3170 char *e = d++; 3171 while (e-- > c) 3172 *(e + 1) = *e; 3173 *c = (char) ILLEGAL_UTF8_BYTE; 3174 /* mark the range as done, and continue */ 3175 dorange = FALSE; 3176 didrange = TRUE; 3177 continue; 3178 } 3179 3180 i = d - SvPVX_const(sv); /* remember current offset */ 3181 #ifdef EBCDIC 3182 SvGROW(sv, 3183 SvLEN(sv) + (has_utf8 ? 3184 (512 - UTF_CONTINUATION_MARK + 3185 UNISKIP(0x100)) 3186 : 256)); 3187 /* How many two-byte within 0..255: 128 in UTF-8, 3188 * 96 in UTF-8-mod. */ 3189 #else 3190 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */ 3191 #endif 3192 d = SvPVX(sv) + i; /* refresh d after realloc */ 3193 #ifdef EBCDIC 3194 if (has_utf8) { 3195 int j; 3196 for (j = 0; j <= 1; j++) { 3197 char * const c = (char*)utf8_hop((U8*)d, -1); 3198 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0); 3199 if (j) 3200 min = (U8)uv; 3201 else if (uv < 256) 3202 max = (U8)uv; 3203 else { 3204 max = (U8)0xff; /* only to \xff */ 3205 uvmax = uv; /* \x{100} to uvmax */ 3206 } 3207 d = c; /* eat endpoint chars */ 3208 } 3209 } 3210 else { 3211 #endif 3212 d -= 2; /* eat the first char and the - */ 3213 min = (U8)*d; /* first char in range */ 3214 max = (U8)d[1]; /* last char in range */ 3215 #ifdef EBCDIC 3216 } 3217 #endif 3218 3219 if (min > max) { 3220 Perl_croak(aTHX_ 3221 "Invalid range \"%c-%c\" in transliteration operator", 3222 (char)min, (char)max); 3223 } 3224 3225 #ifdef EBCDIC 3226 if (literal_endpoint == 2 && 3227 ((isLOWER_A(min) && isLOWER_A(max)) || 3228 (isUPPER_A(min) && isUPPER_A(max)))) 3229 { 3230 for (i = min; i <= max; i++) { 3231 if (isALPHA_A(i)) 3232 *d++ = i; 3233 } 3234 } 3235 else 3236 #endif 3237 for (i = min; i <= max; i++) 3238 #ifdef EBCDIC 3239 if (has_utf8) { 3240 append_utf8_from_native_byte(i, &d); 3241 } 3242 else 3243 #endif 3244 *d++ = (char)i; 3245 3246 #ifdef EBCDIC 3247 if (uvmax) { 3248 d = (char*)uvchr_to_utf8((U8*)d, 0x100); 3249 if (uvmax > 0x101) 3250 *d++ = (char) ILLEGAL_UTF8_BYTE; 3251 if (uvmax > 0x100) 3252 d = (char*)uvchr_to_utf8((U8*)d, uvmax); 3253 } 3254 #endif 3255 3256 /* mark the range as done, and continue */ 3257 dorange = FALSE; 3258 didrange = TRUE; 3259 #ifdef EBCDIC 3260 literal_endpoint = 0; 3261 #endif 3262 continue; 3263 } 3264 3265 /* range begins (ignore - as first or last char) */ 3266 else if (*s == '-' && s+1 < send && s != start) { 3267 if (didrange) { 3268 Perl_croak(aTHX_ "Ambiguous range in transliteration operator"); 3269 } 3270 if (has_utf8 3271 #ifdef EBCDIC 3272 && !native_range 3273 #endif 3274 ) { 3275 *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */ 3276 s++; 3277 continue; 3278 } 3279 dorange = TRUE; 3280 s++; 3281 } 3282 else { 3283 didrange = FALSE; 3284 #ifdef EBCDIC 3285 literal_endpoint = 0; 3286 native_range = TRUE; 3287 #endif 3288 } 3289 } 3290 3291 /* if we get here, we're not doing a transliteration */ 3292 3293 else if (*s == '[' && PL_lex_inpat && !in_charclass) { 3294 char *s1 = s-1; 3295 int esc = 0; 3296 while (s1 >= start && *s1-- == '\\') 3297 esc = !esc; 3298 if (!esc) 3299 in_charclass = TRUE; 3300 } 3301 3302 else if (*s == ']' && PL_lex_inpat && in_charclass) { 3303 char *s1 = s-1; 3304 int esc = 0; 3305 while (s1 >= start && *s1-- == '\\') 3306 esc = !esc; 3307 if (!esc) 3308 in_charclass = FALSE; 3309 } 3310 3311 /* skip for regexp comments /(?#comment)/, except for the last 3312 * char, which will be done separately. 3313 * Stop on (?{..}) and friends */ 3314 3315 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) { 3316 if (s[2] == '#') { 3317 while (s+1 < send && *s != ')') 3318 *d++ = *s++; 3319 } 3320 else if (!PL_lex_casemods && 3321 ( s[2] == '{' /* This should match regcomp.c */ 3322 || (s[2] == '?' && s[3] == '{'))) 3323 { 3324 break; 3325 } 3326 } 3327 3328 /* likewise skip #-initiated comments in //x patterns */ 3329 else if (*s == '#' && PL_lex_inpat && !in_charclass && 3330 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) { 3331 while (s+1 < send && *s != '\n') 3332 *d++ = *s++; 3333 } 3334 3335 /* no further processing of single-quoted regex */ 3336 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') 3337 goto default_action; 3338 3339 /* check for embedded arrays 3340 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) 3341 */ 3342 else if (*s == '@' && s[1]) { 3343 if (isWORDCHAR_lazy_if(s+1,UTF)) 3344 break; 3345 if (strchr(":'{$", s[1])) 3346 break; 3347 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-')) 3348 break; /* in regexp, neither @+ nor @- are interpolated */ 3349 } 3350 3351 /* check for embedded scalars. only stop if we're sure it's a 3352 variable. 3353 */ 3354 else if (*s == '$') { 3355 if (!PL_lex_inpat) /* not a regexp, so $ must be var */ 3356 break; 3357 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) { 3358 if (s[1] == '\\') { 3359 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 3360 "Possible unintended interpolation of $\\ in regex"); 3361 } 3362 break; /* in regexp, $ might be tail anchor */ 3363 } 3364 } 3365 3366 /* End of else if chain - OP_TRANS rejoin rest */ 3367 3368 /* backslashes */ 3369 if (*s == '\\' && s+1 < send) { 3370 char* e; /* Can be used for ending '}', etc. */ 3371 3372 s++; 3373 3374 /* warn on \1 - \9 in substitution replacements, but note that \11 3375 * is an octal; and \19 is \1 followed by '9' */ 3376 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat && 3377 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1])) 3378 { 3379 /* diag_listed_as: \%d better written as $%d */ 3380 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); 3381 *--s = '$'; 3382 break; 3383 } 3384 3385 /* string-change backslash escapes */ 3386 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) { 3387 --s; 3388 break; 3389 } 3390 /* In a pattern, process \N, but skip any other backslash escapes. 3391 * This is because we don't want to translate an escape sequence 3392 * into a meta symbol and have the regex compiler use the meta 3393 * symbol meaning, e.g. \x{2E} would be confused with a dot. But 3394 * in spite of this, we do have to process \N here while the proper 3395 * charnames handler is in scope. See bugs #56444 and #62056. 3396 * There is a complication because \N in a pattern may also stand 3397 * for 'match a non-nl', and not mean a charname, in which case its 3398 * processing should be deferred to the regex compiler. To be a 3399 * charname it must be followed immediately by a '{', and not look 3400 * like \N followed by a curly quantifier, i.e., not something like 3401 * \N{3,}. regcurly returns a boolean indicating if it is a legal 3402 * quantifier */ 3403 else if (PL_lex_inpat 3404 && (*s != 'N' 3405 || s[1] != '{' 3406 || regcurly(s + 1, FALSE))) 3407 { 3408 *d++ = '\\'; 3409 goto default_action; 3410 } 3411 3412 switch (*s) { 3413 3414 /* quoted - in transliterations */ 3415 case '-': 3416 if (PL_lex_inwhat == OP_TRANS) { 3417 *d++ = *s++; 3418 continue; 3419 } 3420 /* FALL THROUGH */ 3421 default: 3422 { 3423 if ((isALPHANUMERIC(*s))) 3424 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 3425 "Unrecognized escape \\%c passed through", 3426 *s); 3427 /* default action is to copy the quoted character */ 3428 goto default_action; 3429 } 3430 3431 /* eg. \132 indicates the octal constant 0132 */ 3432 case '0': case '1': case '2': case '3': 3433 case '4': case '5': case '6': case '7': 3434 { 3435 I32 flags = PERL_SCAN_SILENT_ILLDIGIT; 3436 STRLEN len = 3; 3437 uv = grok_oct(s, &len, &flags, NULL); 3438 s += len; 3439 if (len < 3 && s < send && isDIGIT(*s) 3440 && ckWARN(WARN_MISC)) 3441 { 3442 Perl_warner(aTHX_ packWARN(WARN_MISC), 3443 "%s", form_short_octal_warning(s, len)); 3444 } 3445 } 3446 goto NUM_ESCAPE_INSERT; 3447 3448 /* eg. \o{24} indicates the octal constant \024 */ 3449 case 'o': 3450 { 3451 const char* error; 3452 3453 bool valid = grok_bslash_o(&s, &uv, &error, 3454 TRUE, /* Output warning */ 3455 FALSE, /* Not strict */ 3456 TRUE, /* Output warnings for 3457 non-portables */ 3458 UTF); 3459 if (! valid) { 3460 yyerror(error); 3461 continue; 3462 } 3463 goto NUM_ESCAPE_INSERT; 3464 } 3465 3466 /* eg. \x24 indicates the hex constant 0x24 */ 3467 case 'x': 3468 { 3469 const char* error; 3470 3471 bool valid = grok_bslash_x(&s, &uv, &error, 3472 TRUE, /* Output warning */ 3473 FALSE, /* Not strict */ 3474 TRUE, /* Output warnings for 3475 non-portables */ 3476 UTF); 3477 if (! valid) { 3478 yyerror(error); 3479 continue; 3480 } 3481 } 3482 3483 NUM_ESCAPE_INSERT: 3484 /* Insert oct or hex escaped character. There will always be 3485 * enough room in sv since such escapes will be longer than any 3486 * UTF-8 sequence they can end up as, except if they force us 3487 * to recode the rest of the string into utf8 */ 3488 3489 /* Here uv is the ordinal of the next character being added */ 3490 if (!UVCHR_IS_INVARIANT(uv)) { 3491 if (!has_utf8 && uv > 255) { 3492 /* Might need to recode whatever we have accumulated so 3493 * far if it contains any chars variant in utf8 or 3494 * utf-ebcdic. */ 3495 3496 SvCUR_set(sv, d - SvPVX_const(sv)); 3497 SvPOK_on(sv); 3498 *d = '\0'; 3499 /* See Note on sizing above. */ 3500 sv_utf8_upgrade_flags_grow(sv, 3501 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3502 UNISKIP(uv) + (STRLEN)(send - s) + 1); 3503 d = SvPVX(sv) + SvCUR(sv); 3504 has_utf8 = TRUE; 3505 } 3506 3507 if (has_utf8) { 3508 d = (char*)uvchr_to_utf8((U8*)d, uv); 3509 if (PL_lex_inwhat == OP_TRANS && 3510 PL_sublex_info.sub_op) { 3511 PL_sublex_info.sub_op->op_private |= 3512 (PL_lex_repl ? OPpTRANS_FROM_UTF 3513 : OPpTRANS_TO_UTF); 3514 } 3515 #ifdef EBCDIC 3516 if (uv > 255 && !dorange) 3517 native_range = FALSE; 3518 #endif 3519 } 3520 else { 3521 *d++ = (char)uv; 3522 } 3523 } 3524 else { 3525 *d++ = (char) uv; 3526 } 3527 continue; 3528 3529 case 'N': 3530 /* In a non-pattern \N must be a named character, like \N{LATIN 3531 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can 3532 * mean to match a non-newline. For non-patterns, named 3533 * characters are converted to their string equivalents. In 3534 * patterns, named characters are not converted to their 3535 * ultimate forms for the same reasons that other escapes 3536 * aren't. Instead, they are converted to the \N{U+...} form 3537 * to get the value from the charnames that is in effect right 3538 * now, while preserving the fact that it was a named character 3539 * so that the regex compiler knows this */ 3540 3541 /* The structure of this section of code (besides checking for 3542 * errors and upgrading to utf8) is: 3543 * Further disambiguate between the two meanings of \N, and if 3544 * not a charname, go process it elsewhere 3545 * If of form \N{U+...}, pass it through if a pattern; 3546 * otherwise convert to utf8 3547 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a 3548 * pattern; otherwise convert to utf8 */ 3549 3550 /* Here, s points to the 'N'; the test below is guaranteed to 3551 * succeed if we are being called on a pattern as we already 3552 * know from a test above that the next character is a '{'. 3553 * On a non-pattern \N must mean 'named sequence, which 3554 * requires braces */ 3555 s++; 3556 if (*s != '{') { 3557 yyerror("Missing braces on \\N{}"); 3558 continue; 3559 } 3560 s++; 3561 3562 /* If there is no matching '}', it is an error. */ 3563 if (! (e = strchr(s, '}'))) { 3564 if (! PL_lex_inpat) { 3565 yyerror("Missing right brace on \\N{}"); 3566 } else { 3567 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N"); 3568 } 3569 continue; 3570 } 3571 3572 /* Here it looks like a named character */ 3573 3574 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */ 3575 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES 3576 | PERL_SCAN_DISALLOW_PREFIX; 3577 STRLEN len; 3578 3579 /* For \N{U+...}, the '...' is a unicode value even on 3580 * EBCDIC machines */ 3581 s += 2; /* Skip to next char after the 'U+' */ 3582 len = e - s; 3583 uv = grok_hex(s, &len, &flags, NULL); 3584 if (len == 0 || len != (STRLEN)(e - s)) { 3585 yyerror("Invalid hexadecimal number in \\N{U+...}"); 3586 s = e + 1; 3587 continue; 3588 } 3589 3590 if (PL_lex_inpat) { 3591 3592 /* On non-EBCDIC platforms, pass through to the regex 3593 * compiler unchanged. The reason we evaluated the 3594 * number above is to make sure there wasn't a syntax 3595 * error. But on EBCDIC we convert to native so 3596 * downstream code can continue to assume it's native 3597 */ 3598 s -= 5; /* Include the '\N{U+' */ 3599 #ifdef EBCDIC 3600 d += my_snprintf(d, e - s + 1 + 1, /* includes the } 3601 and the \0 */ 3602 "\\N{U+%X}", 3603 (unsigned int) UNI_TO_NATIVE(uv)); 3604 #else 3605 Copy(s, d, e - s + 1, char); /* 1 = include the } */ 3606 d += e - s + 1; 3607 #endif 3608 } 3609 else { /* Not a pattern: convert the hex to string */ 3610 3611 /* If destination is not in utf8, unconditionally 3612 * recode it to be so. This is because \N{} implies 3613 * Unicode semantics, and scalars have to be in utf8 3614 * to guarantee those semantics */ 3615 if (! has_utf8) { 3616 SvCUR_set(sv, d - SvPVX_const(sv)); 3617 SvPOK_on(sv); 3618 *d = '\0'; 3619 /* See Note on sizing above. */ 3620 sv_utf8_upgrade_flags_grow( 3621 sv, 3622 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3623 UNISKIP(uv) + (STRLEN)(send - e) + 1); 3624 d = SvPVX(sv) + SvCUR(sv); 3625 has_utf8 = TRUE; 3626 } 3627 3628 /* Add the (Unicode) code point to the output. */ 3629 if (UNI_IS_INVARIANT(uv)) { 3630 *d++ = (char) LATIN1_TO_NATIVE(uv); 3631 } 3632 else { 3633 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0); 3634 } 3635 } 3636 } 3637 else /* Here is \N{NAME} but not \N{U+...}. */ 3638 if ((res = get_and_check_backslash_N_name(s, e))) 3639 { 3640 STRLEN len; 3641 const char *str = SvPV_const(res, len); 3642 if (PL_lex_inpat) { 3643 3644 if (! len) { /* The name resolved to an empty string */ 3645 Copy("\\N{}", d, 4, char); 3646 d += 4; 3647 } 3648 else { 3649 /* In order to not lose information for the regex 3650 * compiler, pass the result in the specially made 3651 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are 3652 * the code points in hex of each character 3653 * returned by charnames */ 3654 3655 const char *str_end = str + len; 3656 const STRLEN off = d - SvPVX_const(sv); 3657 3658 if (! SvUTF8(res)) { 3659 /* For the non-UTF-8 case, we can determine the 3660 * exact length needed without having to parse 3661 * through the string. Each character takes up 3662 * 2 hex digits plus either a trailing dot or 3663 * the "}" */ 3664 d = off + SvGROW(sv, off 3665 + 3 * len 3666 + 6 /* For the "\N{U+", and 3667 trailing NUL */ 3668 + (STRLEN)(send - e)); 3669 Copy("\\N{U+", d, 5, char); 3670 d += 5; 3671 while (str < str_end) { 3672 char hex_string[4]; 3673 my_snprintf(hex_string, sizeof(hex_string), 3674 "%02X.", (U8) *str); 3675 Copy(hex_string, d, 3, char); 3676 d += 3; 3677 str++; 3678 } 3679 d--; /* We will overwrite below the final 3680 dot with a right brace */ 3681 } 3682 else { 3683 STRLEN char_length; /* cur char's byte length */ 3684 3685 /* and the number of bytes after this is 3686 * translated into hex digits */ 3687 STRLEN output_length; 3688 3689 /* 2 hex per byte; 2 chars for '\N'; 2 chars 3690 * for max('U+', '.'); and 1 for NUL */ 3691 char hex_string[2 * UTF8_MAXBYTES + 5]; 3692 3693 /* Get the first character of the result. */ 3694 U32 uv = utf8n_to_uvchr((U8 *) str, 3695 len, 3696 &char_length, 3697 UTF8_ALLOW_ANYUV); 3698 /* Convert first code point to hex, including 3699 * the boiler plate before it. */ 3700 output_length = 3701 my_snprintf(hex_string, sizeof(hex_string), 3702 "\\N{U+%X", 3703 (unsigned int) uv); 3704 3705 /* Make sure there is enough space to hold it */ 3706 d = off + SvGROW(sv, off 3707 + output_length 3708 + (STRLEN)(send - e) 3709 + 2); /* '}' + NUL */ 3710 /* And output it */ 3711 Copy(hex_string, d, output_length, char); 3712 d += output_length; 3713 3714 /* For each subsequent character, append dot and 3715 * its ordinal in hex */ 3716 while ((str += char_length) < str_end) { 3717 const STRLEN off = d - SvPVX_const(sv); 3718 U32 uv = utf8n_to_uvchr((U8 *) str, 3719 str_end - str, 3720 &char_length, 3721 UTF8_ALLOW_ANYUV); 3722 output_length = 3723 my_snprintf(hex_string, 3724 sizeof(hex_string), 3725 ".%X", 3726 (unsigned int) uv); 3727 3728 d = off + SvGROW(sv, off 3729 + output_length 3730 + (STRLEN)(send - e) 3731 + 2); /* '}' + NUL */ 3732 Copy(hex_string, d, output_length, char); 3733 d += output_length; 3734 } 3735 } 3736 3737 *d++ = '}'; /* Done. Add the trailing brace */ 3738 } 3739 } 3740 else { /* Here, not in a pattern. Convert the name to a 3741 * string. */ 3742 3743 /* If destination is not in utf8, unconditionally 3744 * recode it to be so. This is because \N{} implies 3745 * Unicode semantics, and scalars have to be in utf8 3746 * to guarantee those semantics */ 3747 if (! has_utf8) { 3748 SvCUR_set(sv, d - SvPVX_const(sv)); 3749 SvPOK_on(sv); 3750 *d = '\0'; 3751 /* See Note on sizing above. */ 3752 sv_utf8_upgrade_flags_grow(sv, 3753 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3754 len + (STRLEN)(send - s) + 1); 3755 d = SvPVX(sv) + SvCUR(sv); 3756 has_utf8 = TRUE; 3757 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */ 3758 3759 /* See Note on sizing above. (NOTE: SvCUR() is not 3760 * set correctly here). */ 3761 const STRLEN off = d - SvPVX_const(sv); 3762 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1); 3763 } 3764 Copy(str, d, len, char); 3765 d += len; 3766 } 3767 3768 SvREFCNT_dec(res); 3769 3770 } /* End \N{NAME} */ 3771 #ifdef EBCDIC 3772 if (!dorange) 3773 native_range = FALSE; /* \N{} is defined to be Unicode */ 3774 #endif 3775 s = e + 1; /* Point to just after the '}' */ 3776 continue; 3777 3778 /* \c is a control character */ 3779 case 'c': 3780 s++; 3781 if (s < send) { 3782 *d++ = grok_bslash_c(*s++, 1); 3783 } 3784 else { 3785 yyerror("Missing control char name in \\c"); 3786 } 3787 continue; 3788 3789 /* printf-style backslashes, formfeeds, newlines, etc */ 3790 case 'b': 3791 *d++ = '\b'; 3792 break; 3793 case 'n': 3794 *d++ = '\n'; 3795 break; 3796 case 'r': 3797 *d++ = '\r'; 3798 break; 3799 case 'f': 3800 *d++ = '\f'; 3801 break; 3802 case 't': 3803 *d++ = '\t'; 3804 break; 3805 case 'e': 3806 *d++ = ASCII_TO_NATIVE('\033'); 3807 break; 3808 case 'a': 3809 *d++ = '\a'; 3810 break; 3811 } /* end switch */ 3812 3813 s++; 3814 continue; 3815 } /* end if (backslash) */ 3816 #ifdef EBCDIC 3817 else 3818 literal_endpoint++; 3819 #endif 3820 3821 default_action: 3822 /* If we started with encoded form, or already know we want it, 3823 then encode the next character */ 3824 if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) { 3825 STRLEN len = 1; 3826 3827 3828 /* One might think that it is wasted effort in the case of the 3829 * source being utf8 (this_utf8 == TRUE) to take the next character 3830 * in the source, convert it to an unsigned value, and then convert 3831 * it back again. But the source has not been validated here. The 3832 * routine that does the conversion checks for errors like 3833 * malformed utf8 */ 3834 3835 const UV nextuv = (this_utf8) 3836 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) 3837 : (UV) ((U8) *s); 3838 const STRLEN need = UNISKIP(nextuv); 3839 if (!has_utf8) { 3840 SvCUR_set(sv, d - SvPVX_const(sv)); 3841 SvPOK_on(sv); 3842 *d = '\0'; 3843 /* See Note on sizing above. */ 3844 sv_utf8_upgrade_flags_grow(sv, 3845 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3846 need + (STRLEN)(send - s) + 1); 3847 d = SvPVX(sv) + SvCUR(sv); 3848 has_utf8 = TRUE; 3849 } else if (need > len) { 3850 /* encoded value larger than old, may need extra space (NOTE: 3851 * SvCUR() is not set correctly here). See Note on sizing 3852 * above. */ 3853 const STRLEN off = d - SvPVX_const(sv); 3854 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off; 3855 } 3856 s += len; 3857 3858 d = (char*)uvchr_to_utf8((U8*)d, nextuv); 3859 #ifdef EBCDIC 3860 if (uv > 255 && !dorange) 3861 native_range = FALSE; 3862 #endif 3863 } 3864 else { 3865 *d++ = *s++; 3866 } 3867 } /* while loop to process each character */ 3868 3869 /* terminate the string and set up the sv */ 3870 *d = '\0'; 3871 SvCUR_set(sv, d - SvPVX_const(sv)); 3872 if (SvCUR(sv) >= SvLEN(sv)) 3873 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf 3874 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv)); 3875 3876 SvPOK_on(sv); 3877 if (PL_encoding && !has_utf8) { 3878 sv_recode_to_utf8(sv, PL_encoding); 3879 if (SvUTF8(sv)) 3880 has_utf8 = TRUE; 3881 } 3882 if (has_utf8) { 3883 SvUTF8_on(sv); 3884 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { 3885 PL_sublex_info.sub_op->op_private |= 3886 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); 3887 } 3888 } 3889 3890 /* shrink the sv if we allocated more than we used */ 3891 if (SvCUR(sv) + 5 < SvLEN(sv)) { 3892 SvPV_shrink_to_cur(sv); 3893 } 3894 3895 /* return the substring (via pl_yylval) only if we parsed anything */ 3896 if (s > start) { 3897 char *s2 = start; 3898 for (; s2 < s; s2++) { 3899 if (*s2 == '\n') 3900 COPLINE_INC_WITH_HERELINES; 3901 } 3902 SvREFCNT_inc_simple_void_NN(sv); 3903 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING )) 3904 && ! PL_parser->lex_re_reparsing) 3905 { 3906 const char *const key = PL_lex_inpat ? "qr" : "q"; 3907 const STRLEN keylen = PL_lex_inpat ? 2 : 1; 3908 const char *type; 3909 STRLEN typelen; 3910 3911 if (PL_lex_inwhat == OP_TRANS) { 3912 type = "tr"; 3913 typelen = 2; 3914 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) { 3915 type = "s"; 3916 typelen = 1; 3917 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') { 3918 type = "q"; 3919 typelen = 1; 3920 } else { 3921 type = "qq"; 3922 typelen = 2; 3923 } 3924 3925 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL, 3926 type, typelen); 3927 } 3928 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 3929 } 3930 LEAVE_with_name("scan_const"); 3931 return s; 3932 } 3933 3934 /* S_intuit_more 3935 * Returns TRUE if there's more to the expression (e.g., a subscript), 3936 * FALSE otherwise. 3937 * 3938 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/ 3939 * 3940 * ->[ and ->{ return TRUE 3941 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled 3942 * { and [ outside a pattern are always subscripts, so return TRUE 3943 * if we're outside a pattern and it's not { or [, then return FALSE 3944 * if we're in a pattern and the first char is a { 3945 * {4,5} (any digits around the comma) returns FALSE 3946 * if we're in a pattern and the first char is a [ 3947 * [] returns FALSE 3948 * [SOMETHING] has a funky algorithm to decide whether it's a 3949 * character class or not. It has to deal with things like 3950 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/ 3951 * anything else returns TRUE 3952 */ 3953 3954 /* This is the one truly awful dwimmer necessary to conflate C and sed. */ 3955 3956 STATIC int 3957 S_intuit_more(pTHX_ char *s) 3958 { 3959 dVAR; 3960 3961 PERL_ARGS_ASSERT_INTUIT_MORE; 3962 3963 if (PL_lex_brackets) 3964 return TRUE; 3965 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{')) 3966 return TRUE; 3967 if (*s == '-' && s[1] == '>' 3968 && FEATURE_POSTDEREF_QQ_IS_ENABLED 3969 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*'))) 3970 ||(s[2] == '@' && strchr("*[{",s[3])) )) 3971 return TRUE; 3972 if (*s != '{' && *s != '[') 3973 return FALSE; 3974 if (!PL_lex_inpat) 3975 return TRUE; 3976 3977 /* In a pattern, so maybe we have {n,m}. */ 3978 if (*s == '{') { 3979 if (regcurly(s, FALSE)) { 3980 return FALSE; 3981 } 3982 return TRUE; 3983 } 3984 3985 /* On the other hand, maybe we have a character class */ 3986 3987 s++; 3988 if (*s == ']' || *s == '^') 3989 return FALSE; 3990 else { 3991 /* this is terrifying, and it works */ 3992 int weight; 3993 char seen[256]; 3994 const char * const send = strchr(s,']'); 3995 unsigned char un_char, last_un_char; 3996 char tmpbuf[sizeof PL_tokenbuf * 4]; 3997 3998 if (!send) /* has to be an expression */ 3999 return TRUE; 4000 weight = 2; /* let's weigh the evidence */ 4001 4002 if (*s == '$') 4003 weight -= 3; 4004 else if (isDIGIT(*s)) { 4005 if (s[1] != ']') { 4006 if (isDIGIT(s[1]) && s[2] == ']') 4007 weight -= 10; 4008 } 4009 else 4010 weight -= 100; 4011 } 4012 Zero(seen,256,char); 4013 un_char = 255; 4014 for (; s < send; s++) { 4015 last_un_char = un_char; 4016 un_char = (unsigned char)*s; 4017 switch (*s) { 4018 case '@': 4019 case '&': 4020 case '$': 4021 weight -= seen[un_char] * 10; 4022 if (isWORDCHAR_lazy_if(s+1,UTF)) { 4023 int len; 4024 char *tmp = PL_bufend; 4025 PL_bufend = (char*)send; 4026 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE); 4027 PL_bufend = tmp; 4028 len = (int)strlen(tmpbuf); 4029 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 4030 UTF ? SVf_UTF8 : 0, SVt_PV)) 4031 weight -= 100; 4032 else 4033 weight -= 10; 4034 } 4035 else if (*s == '$' && s[1] && 4036 strchr("[#!%*<>()-=",s[1])) { 4037 if (/*{*/ strchr("])} =",s[2])) 4038 weight -= 10; 4039 else 4040 weight -= 1; 4041 } 4042 break; 4043 case '\\': 4044 un_char = 254; 4045 if (s[1]) { 4046 if (strchr("wds]",s[1])) 4047 weight += 100; 4048 else if (seen[(U8)'\''] || seen[(U8)'"']) 4049 weight += 1; 4050 else if (strchr("rnftbxcav",s[1])) 4051 weight += 40; 4052 else if (isDIGIT(s[1])) { 4053 weight += 40; 4054 while (s[1] && isDIGIT(s[1])) 4055 s++; 4056 } 4057 } 4058 else 4059 weight += 100; 4060 break; 4061 case '-': 4062 if (s[1] == '\\') 4063 weight += 50; 4064 if (strchr("aA01! ",last_un_char)) 4065 weight += 30; 4066 if (strchr("zZ79~",s[1])) 4067 weight += 30; 4068 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$')) 4069 weight -= 5; /* cope with negative subscript */ 4070 break; 4071 default: 4072 if (!isWORDCHAR(last_un_char) 4073 && !(last_un_char == '$' || last_un_char == '@' 4074 || last_un_char == '&') 4075 && isALPHA(*s) && s[1] && isALPHA(s[1])) { 4076 char *d = s; 4077 while (isALPHA(*s)) 4078 s++; 4079 if (keyword(d, s - d, 0)) 4080 weight -= 150; 4081 } 4082 if (un_char == last_un_char + 1) 4083 weight += 5; 4084 weight -= seen[un_char]; 4085 break; 4086 } 4087 seen[un_char]++; 4088 } 4089 if (weight >= 0) /* probably a character class */ 4090 return FALSE; 4091 } 4092 4093 return TRUE; 4094 } 4095 4096 /* 4097 * S_intuit_method 4098 * 4099 * Does all the checking to disambiguate 4100 * foo bar 4101 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise 4102 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args). 4103 * 4104 * First argument is the stuff after the first token, e.g. "bar". 4105 * 4106 * Not a method if foo is a filehandle. 4107 * Not a method if foo is a subroutine prototyped to take a filehandle. 4108 * Not a method if it's really "Foo $bar" 4109 * Method if it's "foo $bar" 4110 * Not a method if it's really "print foo $bar" 4111 * Method if it's really "foo package::" (interpreted as package->foo) 4112 * Not a method if bar is known to be a subroutine ("sub bar; foo bar") 4113 * Not a method if bar is a filehandle or package, but is quoted with 4114 * => 4115 */ 4116 4117 STATIC int 4118 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) 4119 { 4120 dVAR; 4121 char *s = start + (*start == '$'); 4122 char tmpbuf[sizeof PL_tokenbuf]; 4123 STRLEN len; 4124 GV* indirgv; 4125 #ifdef PERL_MAD 4126 int soff; 4127 #endif 4128 4129 PERL_ARGS_ASSERT_INTUIT_METHOD; 4130 4131 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv)) 4132 return 0; 4133 if (cv && SvPOK(cv)) { 4134 const char *proto = CvPROTO(cv); 4135 if (proto) { 4136 while (*proto && (isSPACE(*proto) || *proto == ';')) 4137 proto++; 4138 if (*proto == '*') 4139 return 0; 4140 } 4141 } 4142 4143 if (*start == '$') { 4144 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY || 4145 isUPPER(*PL_tokenbuf)) 4146 return 0; 4147 #ifdef PERL_MAD 4148 len = start - SvPVX(PL_linestr); 4149 #endif 4150 s = PEEKSPACE(s); 4151 #ifdef PERL_MAD 4152 start = SvPVX(PL_linestr) + len; 4153 #endif 4154 PL_bufptr = start; 4155 PL_expect = XREF; 4156 return *s == '(' ? FUNCMETH : METHOD; 4157 } 4158 4159 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 4160 /* start is the beginning of the possible filehandle/object, 4161 * and s is the end of it 4162 * tmpbuf is a copy of it (but with single quotes as double colons) 4163 */ 4164 4165 if (!keyword(tmpbuf, len, 0)) { 4166 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') { 4167 len -= 2; 4168 tmpbuf[len] = '\0'; 4169 #ifdef PERL_MAD 4170 soff = s - SvPVX(PL_linestr); 4171 #endif 4172 goto bare_package; 4173 } 4174 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); 4175 if (indirgv && GvCVu(indirgv)) 4176 return 0; 4177 /* filehandle or package name makes it a method */ 4178 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) { 4179 #ifdef PERL_MAD 4180 soff = s - SvPVX(PL_linestr); 4181 #endif 4182 s = PEEKSPACE(s); 4183 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>') 4184 return 0; /* no assumptions -- "=>" quotes bareword */ 4185 bare_package: 4186 start_force(PL_curforce); 4187 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, 4188 S_newSV_maybe_utf8(aTHX_ tmpbuf, len)); 4189 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE; 4190 if (PL_madskills) 4191 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start, 4192 ( UTF ? SVf_UTF8 : 0 ))); 4193 PL_expect = XTERM; 4194 force_next(WORD); 4195 PL_bufptr = s; 4196 #ifdef PERL_MAD 4197 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */ 4198 #endif 4199 return *s == '(' ? FUNCMETH : METHOD; 4200 } 4201 } 4202 return 0; 4203 } 4204 4205 /* Encoded script support. filter_add() effectively inserts a 4206 * 'pre-processing' function into the current source input stream. 4207 * Note that the filter function only applies to the current source file 4208 * (e.g., it will not affect files 'require'd or 'use'd by this one). 4209 * 4210 * The datasv parameter (which may be NULL) can be used to pass 4211 * private data to this instance of the filter. The filter function 4212 * can recover the SV using the FILTER_DATA macro and use it to 4213 * store private buffers and state information. 4214 * 4215 * The supplied datasv parameter is upgraded to a PVIO type 4216 * and the IoDIRP/IoANY field is used to store the function pointer, 4217 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such. 4218 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for 4219 * private use must be set using malloc'd pointers. 4220 */ 4221 4222 SV * 4223 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) 4224 { 4225 dVAR; 4226 if (!funcp) 4227 return NULL; 4228 4229 if (!PL_parser) 4230 return NULL; 4231 4232 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) 4233 Perl_croak(aTHX_ "Source filters apply only to byte streams"); 4234 4235 if (!PL_rsfp_filters) 4236 PL_rsfp_filters = newAV(); 4237 if (!datasv) 4238 datasv = newSV(0); 4239 SvUPGRADE(datasv, SVt_PVIO); 4240 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */ 4241 IoFLAGS(datasv) |= IOf_FAKE_DIRP; 4242 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n", 4243 FPTR2DPTR(void *, IoANY(datasv)), 4244 SvPV_nolen(datasv))); 4245 av_unshift(PL_rsfp_filters, 1); 4246 av_store(PL_rsfp_filters, 0, datasv) ; 4247 if ( 4248 !PL_parser->filtered 4249 && PL_parser->lex_flags & LEX_EVALBYTES 4250 && PL_bufptr < PL_bufend 4251 ) { 4252 const char *s = PL_bufptr; 4253 while (s < PL_bufend) { 4254 if (*s == '\n') { 4255 SV *linestr = PL_parser->linestr; 4256 char *buf = SvPVX(linestr); 4257 STRLEN const bufptr_pos = PL_parser->bufptr - buf; 4258 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf; 4259 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf; 4260 STRLEN const linestart_pos = PL_parser->linestart - buf; 4261 STRLEN const last_uni_pos = 4262 PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 4263 STRLEN const last_lop_pos = 4264 PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 4265 av_push(PL_rsfp_filters, linestr); 4266 PL_parser->linestr = 4267 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr)); 4268 buf = SvPVX(PL_parser->linestr); 4269 PL_parser->bufend = buf + SvCUR(PL_parser->linestr); 4270 PL_parser->bufptr = buf + bufptr_pos; 4271 PL_parser->oldbufptr = buf + oldbufptr_pos; 4272 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 4273 PL_parser->linestart = buf + linestart_pos; 4274 if (PL_parser->last_uni) 4275 PL_parser->last_uni = buf + last_uni_pos; 4276 if (PL_parser->last_lop) 4277 PL_parser->last_lop = buf + last_lop_pos; 4278 SvLEN(linestr) = SvCUR(linestr); 4279 SvCUR(linestr) = s-SvPVX(linestr); 4280 PL_parser->filtered = 1; 4281 break; 4282 } 4283 s++; 4284 } 4285 } 4286 return(datasv); 4287 } 4288 4289 4290 /* Delete most recently added instance of this filter function. */ 4291 void 4292 Perl_filter_del(pTHX_ filter_t funcp) 4293 { 4294 dVAR; 4295 SV *datasv; 4296 4297 PERL_ARGS_ASSERT_FILTER_DEL; 4298 4299 #ifdef DEBUGGING 4300 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", 4301 FPTR2DPTR(void*, funcp))); 4302 #endif 4303 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0) 4304 return; 4305 /* if filter is on top of stack (usual case) just pop it off */ 4306 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters)); 4307 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) { 4308 sv_free(av_pop(PL_rsfp_filters)); 4309 4310 return; 4311 } 4312 /* we need to search for the correct entry and clear it */ 4313 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)"); 4314 } 4315 4316 4317 /* Invoke the idxth filter function for the current rsfp. */ 4318 /* maxlen 0 = read one text line */ 4319 I32 4320 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) 4321 { 4322 dVAR; 4323 filter_t funcp; 4324 SV *datasv = NULL; 4325 /* This API is bad. It should have been using unsigned int for maxlen. 4326 Not sure if we want to change the API, but if not we should sanity 4327 check the value here. */ 4328 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen; 4329 4330 PERL_ARGS_ASSERT_FILTER_READ; 4331 4332 if (!PL_parser || !PL_rsfp_filters) 4333 return -1; 4334 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */ 4335 /* Provide a default input filter to make life easy. */ 4336 /* Note that we append to the line. This is handy. */ 4337 DEBUG_P(PerlIO_printf(Perl_debug_log, 4338 "filter_read %d: from rsfp\n", idx)); 4339 if (correct_length) { 4340 /* Want a block */ 4341 int len ; 4342 const int old_len = SvCUR(buf_sv); 4343 4344 /* ensure buf_sv is large enough */ 4345 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ; 4346 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, 4347 correct_length)) <= 0) { 4348 if (PerlIO_error(PL_rsfp)) 4349 return -1; /* error */ 4350 else 4351 return 0 ; /* end of file */ 4352 } 4353 SvCUR_set(buf_sv, old_len + len) ; 4354 SvPVX(buf_sv)[old_len + len] = '\0'; 4355 } else { 4356 /* Want a line */ 4357 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) { 4358 if (PerlIO_error(PL_rsfp)) 4359 return -1; /* error */ 4360 else 4361 return 0 ; /* end of file */ 4362 } 4363 } 4364 return SvCUR(buf_sv); 4365 } 4366 /* Skip this filter slot if filter has been deleted */ 4367 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) { 4368 DEBUG_P(PerlIO_printf(Perl_debug_log, 4369 "filter_read %d: skipped (filter deleted)\n", 4370 idx)); 4371 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */ 4372 } 4373 if (SvTYPE(datasv) != SVt_PVIO) { 4374 if (correct_length) { 4375 /* Want a block */ 4376 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv); 4377 if (!remainder) return 0; /* eof */ 4378 if (correct_length > remainder) correct_length = remainder; 4379 sv_catpvn(buf_sv, SvEND(datasv), correct_length); 4380 SvCUR_set(datasv, SvCUR(datasv) + correct_length); 4381 } else { 4382 /* Want a line */ 4383 const char *s = SvEND(datasv); 4384 const char *send = SvPVX(datasv) + SvLEN(datasv); 4385 while (s < send) { 4386 if (*s == '\n') { 4387 s++; 4388 break; 4389 } 4390 s++; 4391 } 4392 if (s == send) return 0; /* eof */ 4393 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv)); 4394 SvCUR_set(datasv, s-SvPVX(datasv)); 4395 } 4396 return SvCUR(buf_sv); 4397 } 4398 /* Get function pointer hidden within datasv */ 4399 funcp = DPTR2FPTR(filter_t, IoANY(datasv)); 4400 DEBUG_P(PerlIO_printf(Perl_debug_log, 4401 "filter_read %d: via function %p (%s)\n", 4402 idx, (void*)datasv, SvPV_nolen_const(datasv))); 4403 /* Call function. The function is expected to */ 4404 /* call "FILTER_READ(idx+1, buf_sv)" first. */ 4405 /* Return: <0:error, =0:eof, >0:not eof */ 4406 return (*funcp)(aTHX_ idx, buf_sv, correct_length); 4407 } 4408 4409 STATIC char * 4410 S_filter_gets(pTHX_ SV *sv, STRLEN append) 4411 { 4412 dVAR; 4413 4414 PERL_ARGS_ASSERT_FILTER_GETS; 4415 4416 #ifdef PERL_CR_FILTER 4417 if (!PL_rsfp_filters) { 4418 filter_add(S_cr_textfilter,NULL); 4419 } 4420 #endif 4421 if (PL_rsfp_filters) { 4422 if (!append) 4423 SvCUR_set(sv, 0); /* start with empty line */ 4424 if (FILTER_READ(0, sv, 0) > 0) 4425 return ( SvPVX(sv) ) ; 4426 else 4427 return NULL ; 4428 } 4429 else 4430 return (sv_gets(sv, PL_rsfp, append)); 4431 } 4432 4433 STATIC HV * 4434 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len) 4435 { 4436 dVAR; 4437 GV *gv; 4438 4439 PERL_ARGS_ASSERT_FIND_IN_MY_STASH; 4440 4441 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__")) 4442 return PL_curstash; 4443 4444 if (len > 2 && 4445 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') && 4446 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV))) 4447 { 4448 return GvHV(gv); /* Foo:: */ 4449 } 4450 4451 /* use constant CLASS => 'MyClass' */ 4452 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV); 4453 if (gv && GvCV(gv)) { 4454 SV * const sv = cv_const_sv(GvCV(gv)); 4455 if (sv) 4456 pkgname = SvPV_const(sv, len); 4457 } 4458 4459 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0); 4460 } 4461 4462 #ifdef PERL_MAD 4463 /* 4464 * Perl_madlex 4465 * The intent of this yylex wrapper is to minimize the changes to the 4466 * tokener when we aren't interested in collecting madprops. It remains 4467 * to be seen how successful this strategy will be... 4468 */ 4469 4470 int 4471 Perl_madlex(pTHX) 4472 { 4473 int optype; 4474 char *s = PL_bufptr; 4475 4476 /* make sure PL_thiswhite is initialized */ 4477 PL_thiswhite = 0; 4478 PL_thismad = 0; 4479 4480 /* previous token ate up our whitespace? */ 4481 if (!PL_lasttoke && PL_nextwhite) { 4482 PL_thiswhite = PL_nextwhite; 4483 PL_nextwhite = 0; 4484 } 4485 4486 /* isolate the token, and figure out where it is without whitespace */ 4487 PL_realtokenstart = -1; 4488 PL_thistoken = 0; 4489 optype = yylex(); 4490 s = PL_bufptr; 4491 assert(PL_curforce < 0); 4492 4493 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */ 4494 if (!PL_thistoken) { 4495 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop)) 4496 PL_thistoken = newSVpvs(""); 4497 else { 4498 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart; 4499 PL_thistoken = newSVpvn(tstart, s - tstart); 4500 } 4501 } 4502 if (PL_thismad) /* install head */ 4503 CURMAD('X', PL_thistoken); 4504 } 4505 4506 /* last whitespace of a sublex? */ 4507 if (optype == ')' && PL_endwhite) { 4508 CURMAD('X', PL_endwhite); 4509 } 4510 4511 if (!PL_thismad) { 4512 4513 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */ 4514 if (!PL_thiswhite && !PL_endwhite && !optype) { 4515 sv_free(PL_thistoken); 4516 PL_thistoken = 0; 4517 return 0; 4518 } 4519 4520 /* put off final whitespace till peg */ 4521 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) { 4522 PL_nextwhite = PL_thiswhite; 4523 PL_thiswhite = 0; 4524 } 4525 else if (PL_thisopen) { 4526 CURMAD('q', PL_thisopen); 4527 if (PL_thistoken) 4528 sv_free(PL_thistoken); 4529 PL_thistoken = 0; 4530 } 4531 else { 4532 /* Store actual token text as madprop X */ 4533 CURMAD('X', PL_thistoken); 4534 } 4535 4536 if (PL_thiswhite) { 4537 /* add preceding whitespace as madprop _ */ 4538 CURMAD('_', PL_thiswhite); 4539 } 4540 4541 if (PL_thisstuff) { 4542 /* add quoted material as madprop = */ 4543 CURMAD('=', PL_thisstuff); 4544 } 4545 4546 if (PL_thisclose) { 4547 /* add terminating quote as madprop Q */ 4548 CURMAD('Q', PL_thisclose); 4549 } 4550 } 4551 4552 /* special processing based on optype */ 4553 4554 switch (optype) { 4555 4556 /* opval doesn't need a TOKEN since it can already store mp */ 4557 case WORD: 4558 case METHOD: 4559 case FUNCMETH: 4560 case THING: 4561 case PMFUNC: 4562 case PRIVATEREF: 4563 case FUNC0SUB: 4564 case UNIOPSUB: 4565 case LSTOPSUB: 4566 if (pl_yylval.opval) 4567 append_madprops(PL_thismad, pl_yylval.opval, 0); 4568 PL_thismad = 0; 4569 return optype; 4570 4571 /* fake EOF */ 4572 case 0: 4573 optype = PEG; 4574 if (PL_endwhite) { 4575 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0); 4576 PL_endwhite = 0; 4577 } 4578 break; 4579 4580 /* pval */ 4581 case LABEL: 4582 break; 4583 4584 case ']': 4585 case '}': 4586 if (PL_faketokens) 4587 break; 4588 /* remember any fake bracket that lexer is about to discard */ 4589 if (PL_lex_brackets == 1 && 4590 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK)) 4591 { 4592 s = PL_bufptr; 4593 while (s < PL_bufend && (*s == ' ' || *s == '\t')) 4594 s++; 4595 if (*s == '}') { 4596 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr); 4597 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0); 4598 PL_thiswhite = 0; 4599 PL_bufptr = s - 1; 4600 break; /* don't bother looking for trailing comment */ 4601 } 4602 else 4603 s = PL_bufptr; 4604 } 4605 if (optype == ']') 4606 break; 4607 /* FALLTHROUGH */ 4608 4609 /* attach a trailing comment to its statement instead of next token */ 4610 case ';': 4611 if (PL_faketokens) 4612 break; 4613 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) { 4614 s = PL_bufptr; 4615 while (s < PL_bufend && (*s == ' ' || *s == '\t')) 4616 s++; 4617 if (*s == '\n' || *s == '#') { 4618 while (s < PL_bufend && *s != '\n') 4619 s++; 4620 if (s < PL_bufend) 4621 s++; 4622 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr); 4623 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0); 4624 PL_thiswhite = 0; 4625 PL_bufptr = s; 4626 } 4627 } 4628 break; 4629 4630 /* ival */ 4631 default: 4632 break; 4633 4634 } 4635 4636 /* Create new token struct. Note: opvals return early above. */ 4637 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad); 4638 PL_thismad = 0; 4639 return optype; 4640 } 4641 #endif 4642 4643 STATIC char * 4644 S_tokenize_use(pTHX_ int is_use, char *s) { 4645 dVAR; 4646 4647 PERL_ARGS_ASSERT_TOKENIZE_USE; 4648 4649 if (PL_expect != XSTATE) 4650 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", 4651 is_use ? "use" : "no")); 4652 PL_expect = XTERM; 4653 s = SKIPSPACE1(s); 4654 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) { 4655 s = force_version(s, TRUE); 4656 if (*s == ';' || *s == '}' 4657 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) { 4658 start_force(PL_curforce); 4659 NEXTVAL_NEXTTOKE.opval = NULL; 4660 force_next(WORD); 4661 } 4662 else if (*s == 'v') { 4663 s = force_word(s,WORD,FALSE,TRUE); 4664 s = force_version(s, FALSE); 4665 } 4666 } 4667 else { 4668 s = force_word(s,WORD,FALSE,TRUE); 4669 s = force_version(s, FALSE); 4670 } 4671 pl_yylval.ival = is_use; 4672 return s; 4673 } 4674 #ifdef DEBUGGING 4675 static const char* const exp_name[] = 4676 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", 4677 "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR" 4678 }; 4679 #endif 4680 4681 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l) 4682 STATIC bool 4683 S_word_takes_any_delimeter(char *p, STRLEN len) 4684 { 4685 return (len == 1 && strchr("msyq", p[0])) || 4686 (len == 2 && ( 4687 (p[0] == 't' && p[1] == 'r') || 4688 (p[0] == 'q' && strchr("qwxr", p[1])))); 4689 } 4690 4691 static void 4692 S_check_scalar_slice(pTHX_ char *s) 4693 { 4694 s++; 4695 while (*s == ' ' || *s == '\t') s++; 4696 if (*s == 'q' && s[1] == 'w' 4697 && !isWORDCHAR_lazy_if(s+2,UTF)) 4698 return; 4699 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s))) 4700 s += UTF ? UTF8SKIP(s) : 1; 4701 if (*s == '}' || *s == ']') 4702 pl_yylval.ival = OPpSLICEWARNING; 4703 } 4704 4705 /* 4706 yylex 4707 4708 Works out what to call the token just pulled out of the input 4709 stream. The yacc parser takes care of taking the ops we return and 4710 stitching them into a tree. 4711 4712 Returns: 4713 The type of the next token 4714 4715 Structure: 4716 Switch based on the current state: 4717 - if we already built the token before, use it 4718 - if we have a case modifier in a string, deal with that 4719 - handle other cases of interpolation inside a string 4720 - scan the next line if we are inside a format 4721 In the normal state switch on the next character: 4722 - default: 4723 if alphabetic, go to key lookup 4724 unrecoginized character - croak 4725 - 0/4/26: handle end-of-line or EOF 4726 - cases for whitespace 4727 - \n and #: handle comments and line numbers 4728 - various operators, brackets and sigils 4729 - numbers 4730 - quotes 4731 - 'v': vstrings (or go to key lookup) 4732 - 'x' repetition operator (or go to key lookup) 4733 - other ASCII alphanumerics (key lookup begins here): 4734 word before => ? 4735 keyword plugin 4736 scan built-in keyword (but do nothing with it yet) 4737 check for statement label 4738 check for lexical subs 4739 goto just_a_word if there is one 4740 see whether built-in keyword is overridden 4741 switch on keyword number: 4742 - default: just_a_word: 4743 not a built-in keyword; handle bareword lookup 4744 disambiguate between method and sub call 4745 fall back to bareword 4746 - cases for built-in keywords 4747 */ 4748 4749 4750 int 4751 Perl_yylex(pTHX) 4752 { 4753 dVAR; 4754 char *s = PL_bufptr; 4755 char *d; 4756 STRLEN len; 4757 bool bof = FALSE; 4758 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil); 4759 U8 formbrack = 0; 4760 U32 fake_eof = 0; 4761 4762 /* orig_keyword, gvp, and gv are initialized here because 4763 * jump to the label just_a_word_zero can bypass their 4764 * initialization later. */ 4765 I32 orig_keyword = 0; 4766 GV *gv = NULL; 4767 GV **gvp = NULL; 4768 4769 DEBUG_T( { 4770 SV* tmp = newSVpvs(""); 4771 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n", 4772 (IV)CopLINE(PL_curcop), 4773 lex_state_names[PL_lex_state], 4774 exp_name[PL_expect], 4775 pv_display(tmp, s, strlen(s), 0, 60)); 4776 SvREFCNT_dec(tmp); 4777 } ); 4778 4779 switch (PL_lex_state) { 4780 case LEX_NORMAL: 4781 case LEX_INTERPNORMAL: 4782 break; 4783 4784 /* when we've already built the next token, just pull it out of the queue */ 4785 case LEX_KNOWNEXT: 4786 #ifdef PERL_MAD 4787 PL_lasttoke--; 4788 pl_yylval = PL_nexttoke[PL_lasttoke].next_val; 4789 if (PL_madskills) { 4790 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad; 4791 PL_nexttoke[PL_lasttoke].next_mad = 0; 4792 if (PL_thismad && PL_thismad->mad_key == '_') { 4793 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val); 4794 PL_thismad->mad_val = 0; 4795 mad_free(PL_thismad); 4796 PL_thismad = 0; 4797 } 4798 } 4799 if (!PL_lasttoke) { 4800 PL_lex_state = PL_lex_defer; 4801 PL_expect = PL_lex_expect; 4802 PL_lex_defer = LEX_NORMAL; 4803 if (!PL_nexttoke[PL_lasttoke].next_type) 4804 return yylex(); 4805 } 4806 #else 4807 PL_nexttoke--; 4808 pl_yylval = PL_nextval[PL_nexttoke]; 4809 if (!PL_nexttoke) { 4810 PL_lex_state = PL_lex_defer; 4811 PL_expect = PL_lex_expect; 4812 PL_lex_defer = LEX_NORMAL; 4813 } 4814 #endif 4815 { 4816 I32 next_type; 4817 #ifdef PERL_MAD 4818 next_type = PL_nexttoke[PL_lasttoke].next_type; 4819 #else 4820 next_type = PL_nexttype[PL_nexttoke]; 4821 #endif 4822 if (next_type & (7<<24)) { 4823 if (next_type & (1<<24)) { 4824 if (PL_lex_brackets > 100) 4825 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 4826 PL_lex_brackstack[PL_lex_brackets++] = 4827 (char) ((next_type >> 16) & 0xff); 4828 } 4829 if (next_type & (2<<24)) 4830 PL_lex_allbrackets++; 4831 if (next_type & (4<<24)) 4832 PL_lex_allbrackets--; 4833 next_type &= 0xffff; 4834 } 4835 return REPORT(next_type == 'p' ? pending_ident() : next_type); 4836 } 4837 4838 /* interpolated case modifiers like \L \U, including \Q and \E. 4839 when we get here, PL_bufptr is at the \ 4840 */ 4841 case LEX_INTERPCASEMOD: 4842 #ifdef DEBUGGING 4843 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\') 4844 Perl_croak(aTHX_ 4845 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u", 4846 PL_bufptr, PL_bufend, *PL_bufptr); 4847 #endif 4848 /* handle \E or end of string */ 4849 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') { 4850 /* if at a \E */ 4851 if (PL_lex_casemods) { 4852 const char oldmod = PL_lex_casestack[--PL_lex_casemods]; 4853 PL_lex_casestack[PL_lex_casemods] = '\0'; 4854 4855 if (PL_bufptr != PL_bufend 4856 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q' 4857 || oldmod == 'F')) { 4858 PL_bufptr += 2; 4859 PL_lex_state = LEX_INTERPCONCAT; 4860 #ifdef PERL_MAD 4861 if (PL_madskills) 4862 PL_thistoken = newSVpvs("\\E"); 4863 #endif 4864 } 4865 PL_lex_allbrackets--; 4866 return REPORT(')'); 4867 } 4868 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) { 4869 /* Got an unpaired \E */ 4870 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 4871 "Useless use of \\E"); 4872 } 4873 #ifdef PERL_MAD 4874 while (PL_bufptr != PL_bufend && 4875 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') { 4876 if (PL_madskills) { 4877 if (!PL_thiswhite) 4878 PL_thiswhite = newSVpvs(""); 4879 sv_catpvn(PL_thiswhite, PL_bufptr, 2); 4880 } 4881 PL_bufptr += 2; 4882 } 4883 #else 4884 if (PL_bufptr != PL_bufend) 4885 PL_bufptr += 2; 4886 #endif 4887 PL_lex_state = LEX_INTERPCONCAT; 4888 return yylex(); 4889 } 4890 else { 4891 DEBUG_T({ PerlIO_printf(Perl_debug_log, 4892 "### Saw case modifier\n"); }); 4893 s = PL_bufptr + 1; 4894 if (s[1] == '\\' && s[2] == 'E') { 4895 #ifdef PERL_MAD 4896 if (PL_madskills) { 4897 if (!PL_thiswhite) 4898 PL_thiswhite = newSVpvs(""); 4899 sv_catpvn(PL_thiswhite, PL_bufptr, 4); 4900 } 4901 #endif 4902 PL_bufptr = s + 3; 4903 PL_lex_state = LEX_INTERPCONCAT; 4904 return yylex(); 4905 } 4906 else { 4907 I32 tmp; 4908 if (!PL_madskills) /* when just compiling don't need correct */ 4909 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) 4910 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ 4911 if ((*s == 'L' || *s == 'U' || *s == 'F') && 4912 (strchr(PL_lex_casestack, 'L') 4913 || strchr(PL_lex_casestack, 'U') 4914 || strchr(PL_lex_casestack, 'F'))) { 4915 PL_lex_casestack[--PL_lex_casemods] = '\0'; 4916 PL_lex_allbrackets--; 4917 return REPORT(')'); 4918 } 4919 if (PL_lex_casemods > 10) 4920 Renew(PL_lex_casestack, PL_lex_casemods + 2, char); 4921 PL_lex_casestack[PL_lex_casemods++] = *s; 4922 PL_lex_casestack[PL_lex_casemods] = '\0'; 4923 PL_lex_state = LEX_INTERPCONCAT; 4924 start_force(PL_curforce); 4925 NEXTVAL_NEXTTOKE.ival = 0; 4926 force_next((2<<24)|'('); 4927 start_force(PL_curforce); 4928 if (*s == 'l') 4929 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST; 4930 else if (*s == 'u') 4931 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST; 4932 else if (*s == 'L') 4933 NEXTVAL_NEXTTOKE.ival = OP_LC; 4934 else if (*s == 'U') 4935 NEXTVAL_NEXTTOKE.ival = OP_UC; 4936 else if (*s == 'Q') 4937 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA; 4938 else if (*s == 'F') 4939 NEXTVAL_NEXTTOKE.ival = OP_FC; 4940 else 4941 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s); 4942 if (PL_madskills) { 4943 SV* const tmpsv = newSVpvs("\\ "); 4944 /* replace the space with the character we want to escape 4945 */ 4946 SvPVX(tmpsv)[1] = *s; 4947 curmad('_', tmpsv); 4948 } 4949 PL_bufptr = s + 1; 4950 } 4951 force_next(FUNC); 4952 if (PL_lex_starts) { 4953 s = PL_bufptr; 4954 PL_lex_starts = 0; 4955 #ifdef PERL_MAD 4956 if (PL_madskills) { 4957 if (PL_thistoken) 4958 sv_free(PL_thistoken); 4959 PL_thistoken = newSVpvs(""); 4960 } 4961 #endif 4962 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 4963 if (PL_lex_casemods == 1 && PL_lex_inpat) 4964 OPERATOR(','); 4965 else 4966 Aop(OP_CONCAT); 4967 } 4968 else 4969 return yylex(); 4970 } 4971 4972 case LEX_INTERPPUSH: 4973 return REPORT(sublex_push()); 4974 4975 case LEX_INTERPSTART: 4976 if (PL_bufptr == PL_bufend) 4977 return REPORT(sublex_done()); 4978 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log, 4979 "### Interpolated variable\n"); }); 4980 PL_expect = XTERM; 4981 /* for /@a/, we leave the joining for the regex engine to do 4982 * (unless we're within \Q etc) */ 4983 PL_lex_dojoin = (*PL_bufptr == '@' 4984 && (!PL_lex_inpat || PL_lex_casemods)); 4985 PL_lex_state = LEX_INTERPNORMAL; 4986 if (PL_lex_dojoin) { 4987 start_force(PL_curforce); 4988 NEXTVAL_NEXTTOKE.ival = 0; 4989 force_next(','); 4990 start_force(PL_curforce); 4991 force_ident("\"", '$'); 4992 start_force(PL_curforce); 4993 NEXTVAL_NEXTTOKE.ival = 0; 4994 force_next('$'); 4995 start_force(PL_curforce); 4996 NEXTVAL_NEXTTOKE.ival = 0; 4997 force_next((2<<24)|'('); 4998 start_force(PL_curforce); 4999 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */ 5000 force_next(FUNC); 5001 } 5002 /* Convert (?{...}) and friends to 'do {...}' */ 5003 if (PL_lex_inpat && *PL_bufptr == '(') { 5004 PL_parser->lex_shared->re_eval_start = PL_bufptr; 5005 PL_bufptr += 2; 5006 if (*PL_bufptr != '{') 5007 PL_bufptr++; 5008 start_force(PL_curforce); 5009 /* XXX probably need a CURMAD(something) here */ 5010 PL_expect = XTERMBLOCK; 5011 force_next(DO); 5012 } 5013 5014 if (PL_lex_starts++) { 5015 s = PL_bufptr; 5016 #ifdef PERL_MAD 5017 if (PL_madskills) { 5018 if (PL_thistoken) 5019 sv_free(PL_thistoken); 5020 PL_thistoken = newSVpvs(""); 5021 } 5022 #endif 5023 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 5024 if (!PL_lex_casemods && PL_lex_inpat) 5025 OPERATOR(','); 5026 else 5027 Aop(OP_CONCAT); 5028 } 5029 return yylex(); 5030 5031 case LEX_INTERPENDMAYBE: 5032 if (intuit_more(PL_bufptr)) { 5033 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */ 5034 break; 5035 } 5036 /* FALL THROUGH */ 5037 5038 case LEX_INTERPEND: 5039 if (PL_lex_dojoin) { 5040 const U8 dojoin_was = PL_lex_dojoin; 5041 PL_lex_dojoin = FALSE; 5042 PL_lex_state = LEX_INTERPCONCAT; 5043 #ifdef PERL_MAD 5044 if (PL_madskills) { 5045 if (PL_thistoken) 5046 sv_free(PL_thistoken); 5047 PL_thistoken = newSVpvs(""); 5048 } 5049 #endif 5050 PL_lex_allbrackets--; 5051 return REPORT(dojoin_was == 1 ? ')' : POSTJOIN); 5052 } 5053 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl 5054 && SvEVALED(PL_lex_repl)) 5055 { 5056 if (PL_bufptr != PL_bufend) 5057 Perl_croak(aTHX_ "Bad evalled substitution pattern"); 5058 PL_lex_repl = NULL; 5059 } 5060 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets 5061 re_eval_str. If the here-doc body’s length equals the previous 5062 value of re_eval_start, re_eval_start will now be null. So 5063 check re_eval_str as well. */ 5064 if (PL_parser->lex_shared->re_eval_start 5065 || PL_parser->lex_shared->re_eval_str) { 5066 SV *sv; 5067 if (*PL_bufptr != ')') 5068 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'"); 5069 PL_bufptr++; 5070 /* having compiled a (?{..}) expression, return the original 5071 * text too, as a const */ 5072 if (PL_parser->lex_shared->re_eval_str) { 5073 sv = PL_parser->lex_shared->re_eval_str; 5074 PL_parser->lex_shared->re_eval_str = NULL; 5075 SvCUR_set(sv, 5076 PL_bufptr - PL_parser->lex_shared->re_eval_start); 5077 SvPV_shrink_to_cur(sv); 5078 } 5079 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start, 5080 PL_bufptr - PL_parser->lex_shared->re_eval_start); 5081 start_force(PL_curforce); 5082 /* XXX probably need a CURMAD(something) here */ 5083 NEXTVAL_NEXTTOKE.opval = 5084 (OP*)newSVOP(OP_CONST, 0, 5085 sv); 5086 force_next(THING); 5087 PL_parser->lex_shared->re_eval_start = NULL; 5088 PL_expect = XTERM; 5089 return REPORT(','); 5090 } 5091 5092 /* FALLTHROUGH */ 5093 case LEX_INTERPCONCAT: 5094 #ifdef DEBUGGING 5095 if (PL_lex_brackets) 5096 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld", 5097 (long) PL_lex_brackets); 5098 #endif 5099 if (PL_bufptr == PL_bufend) 5100 return REPORT(sublex_done()); 5101 5102 /* m'foo' still needs to be parsed for possible (?{...}) */ 5103 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) { 5104 SV *sv = newSVsv(PL_linestr); 5105 sv = tokeq(sv); 5106 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 5107 s = PL_bufend; 5108 } 5109 else { 5110 s = scan_const(PL_bufptr); 5111 if (*s == '\\') 5112 PL_lex_state = LEX_INTERPCASEMOD; 5113 else 5114 PL_lex_state = LEX_INTERPSTART; 5115 } 5116 5117 if (s != PL_bufptr) { 5118 start_force(PL_curforce); 5119 if (PL_madskills) { 5120 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr)); 5121 } 5122 NEXTVAL_NEXTTOKE = pl_yylval; 5123 PL_expect = XTERM; 5124 force_next(THING); 5125 if (PL_lex_starts++) { 5126 #ifdef PERL_MAD 5127 if (PL_madskills) { 5128 if (PL_thistoken) 5129 sv_free(PL_thistoken); 5130 PL_thistoken = newSVpvs(""); 5131 } 5132 #endif 5133 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 5134 if (!PL_lex_casemods && PL_lex_inpat) 5135 OPERATOR(','); 5136 else 5137 Aop(OP_CONCAT); 5138 } 5139 else { 5140 PL_bufptr = s; 5141 return yylex(); 5142 } 5143 } 5144 5145 return yylex(); 5146 case LEX_FORMLINE: 5147 s = scan_formline(PL_bufptr); 5148 if (!PL_lex_formbrack) 5149 { 5150 formbrack = 1; 5151 goto rightbracket; 5152 } 5153 PL_bufptr = s; 5154 return yylex(); 5155 } 5156 5157 /* We really do *not* want PL_linestr ever becoming a COW. */ 5158 assert (!SvIsCOW(PL_linestr)); 5159 s = PL_bufptr; 5160 PL_oldoldbufptr = PL_oldbufptr; 5161 PL_oldbufptr = s; 5162 PL_parser->saw_infix_sigil = 0; 5163 5164 retry: 5165 #ifdef PERL_MAD 5166 if (PL_thistoken) { 5167 sv_free(PL_thistoken); 5168 PL_thistoken = 0; 5169 } 5170 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */ 5171 #endif 5172 switch (*s) { 5173 default: 5174 if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s)) 5175 goto keylookup; 5176 { 5177 SV *dsv = newSVpvs_flags("", SVs_TEMP); 5178 const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s, 5179 UTF8SKIP(s), 5180 SVs_TEMP | SVf_UTF8), 5181 10, UNI_DISPLAY_ISPRINT) 5182 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s); 5183 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); 5184 if (len > UNRECOGNIZED_PRECEDE_COUNT) { 5185 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT; 5186 } else { 5187 d = PL_linestart; 5188 } 5189 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c, 5190 UTF8fARG(UTF, (s - d), d), 5191 (int) len + 1); 5192 } 5193 case 4: 5194 case 26: 5195 goto fake_eof; /* emulate EOF on ^D or ^Z */ 5196 case 0: 5197 #ifdef PERL_MAD 5198 if (PL_madskills) 5199 PL_faketokens = 0; 5200 #endif 5201 if ((!PL_rsfp || PL_lex_inwhat) 5202 && (!PL_parser->filtered || s+1 < PL_bufend)) { 5203 PL_last_uni = 0; 5204 PL_last_lop = 0; 5205 if (PL_lex_brackets && 5206 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) { 5207 yyerror((const char *) 5208 (PL_lex_formbrack 5209 ? "Format not terminated" 5210 : "Missing right curly or square bracket")); 5211 } 5212 DEBUG_T( { PerlIO_printf(Perl_debug_log, 5213 "### Tokener got EOF\n"); 5214 } ); 5215 TOKEN(0); 5216 } 5217 if (s++ < PL_bufend) 5218 goto retry; /* ignore stray nulls */ 5219 PL_last_uni = 0; 5220 PL_last_lop = 0; 5221 if (!PL_in_eval && !PL_preambled) { 5222 PL_preambled = TRUE; 5223 #ifdef PERL_MAD 5224 if (PL_madskills) 5225 PL_faketokens = 1; 5226 #endif 5227 if (PL_perldb) { 5228 /* Generate a string of Perl code to load the debugger. 5229 * If PERL5DB is set, it will return the contents of that, 5230 * otherwise a compile-time require of perl5db.pl. */ 5231 5232 const char * const pdb = PerlEnv_getenv("PERL5DB"); 5233 5234 if (pdb) { 5235 sv_setpv(PL_linestr, pdb); 5236 sv_catpvs(PL_linestr,";"); 5237 } else { 5238 SETERRNO(0,SS_NORMAL); 5239 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };"); 5240 } 5241 PL_parser->preambling = CopLINE(PL_curcop); 5242 } else 5243 sv_setpvs(PL_linestr,""); 5244 if (PL_preambleav) { 5245 SV **svp = AvARRAY(PL_preambleav); 5246 SV **const end = svp + AvFILLp(PL_preambleav); 5247 while(svp <= end) { 5248 sv_catsv(PL_linestr, *svp); 5249 ++svp; 5250 sv_catpvs(PL_linestr, ";"); 5251 } 5252 sv_free(MUTABLE_SV(PL_preambleav)); 5253 PL_preambleav = NULL; 5254 } 5255 if (PL_minus_E) 5256 sv_catpvs(PL_linestr, 5257 "use feature ':5." STRINGIFY(PERL_VERSION) "';"); 5258 if (PL_minus_n || PL_minus_p) { 5259 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/); 5260 if (PL_minus_l) 5261 sv_catpvs(PL_linestr,"chomp;"); 5262 if (PL_minus_a) { 5263 if (PL_minus_F) { 5264 if ((*PL_splitstr == '/' || *PL_splitstr == '\'' 5265 || *PL_splitstr == '"') 5266 && strchr(PL_splitstr + 1, *PL_splitstr)) 5267 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); 5268 else { 5269 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL 5270 bytes can be used as quoting characters. :-) */ 5271 const char *splits = PL_splitstr; 5272 sv_catpvs(PL_linestr, "our @F=split(q\0"); 5273 do { 5274 /* Need to \ \s */ 5275 if (*splits == '\\') 5276 sv_catpvn(PL_linestr, splits, 1); 5277 sv_catpvn(PL_linestr, splits, 1); 5278 } while (*splits++); 5279 /* This loop will embed the trailing NUL of 5280 PL_linestr as the last thing it does before 5281 terminating. */ 5282 sv_catpvs(PL_linestr, ");"); 5283 } 5284 } 5285 else 5286 sv_catpvs(PL_linestr,"our @F=split(' ');"); 5287 } 5288 } 5289 sv_catpvs(PL_linestr, "\n"); 5290 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 5291 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5292 PL_last_lop = PL_last_uni = NULL; 5293 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash) 5294 update_debugger_info(PL_linestr, NULL, 0); 5295 goto retry; 5296 } 5297 do { 5298 fake_eof = 0; 5299 bof = PL_rsfp ? TRUE : FALSE; 5300 if (0) { 5301 fake_eof: 5302 fake_eof = LEX_FAKE_EOF; 5303 } 5304 PL_bufptr = PL_bufend; 5305 COPLINE_INC_WITH_HERELINES; 5306 if (!lex_next_chunk(fake_eof)) { 5307 CopLINE_dec(PL_curcop); 5308 s = PL_bufptr; 5309 TOKEN(';'); /* not infinite loop because rsfp is NULL now */ 5310 } 5311 CopLINE_dec(PL_curcop); 5312 #ifdef PERL_MAD 5313 if (!PL_rsfp) 5314 PL_realtokenstart = -1; 5315 #endif 5316 s = PL_bufptr; 5317 /* If it looks like the start of a BOM or raw UTF-16, 5318 * check if it in fact is. */ 5319 if (bof && PL_rsfp && 5320 (*s == 0 || 5321 *(U8*)s == BOM_UTF8_FIRST_BYTE || 5322 *(U8*)s >= 0xFE || 5323 s[1] == 0)) { 5324 Off_t offset = (IV)PerlIO_tell(PL_rsfp); 5325 bof = (offset == (Off_t)SvCUR(PL_linestr)); 5326 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS) 5327 /* offset may include swallowed CR */ 5328 if (!bof) 5329 bof = (offset == (Off_t)SvCUR(PL_linestr)+1); 5330 #endif 5331 if (bof) { 5332 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5333 s = swallow_bom((U8*)s); 5334 } 5335 } 5336 if (PL_parser->in_pod) { 5337 /* Incest with pod. */ 5338 #ifdef PERL_MAD 5339 if (PL_madskills) 5340 sv_catsv(PL_thiswhite, PL_linestr); 5341 #endif 5342 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) { 5343 sv_setpvs(PL_linestr, ""); 5344 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 5345 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5346 PL_last_lop = PL_last_uni = NULL; 5347 PL_parser->in_pod = 0; 5348 } 5349 } 5350 if (PL_rsfp || PL_parser->filtered) 5351 incline(s); 5352 } while (PL_parser->in_pod); 5353 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; 5354 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5355 PL_last_lop = PL_last_uni = NULL; 5356 if (CopLINE(PL_curcop) == 1) { 5357 while (s < PL_bufend && isSPACE(*s)) 5358 s++; 5359 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */ 5360 s++; 5361 #ifdef PERL_MAD 5362 if (PL_madskills) 5363 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart); 5364 #endif 5365 d = NULL; 5366 if (!PL_in_eval) { 5367 if (*s == '#' && *(s+1) == '!') 5368 d = s + 2; 5369 #ifdef ALTERNATE_SHEBANG 5370 else { 5371 static char const as[] = ALTERNATE_SHEBANG; 5372 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1)) 5373 d = s + (sizeof(as) - 1); 5374 } 5375 #endif /* ALTERNATE_SHEBANG */ 5376 } 5377 if (d) { 5378 char *ipath; 5379 char *ipathend; 5380 5381 while (isSPACE(*d)) 5382 d++; 5383 ipath = d; 5384 while (*d && !isSPACE(*d)) 5385 d++; 5386 ipathend = d; 5387 5388 #ifdef ARG_ZERO_IS_SCRIPT 5389 if (ipathend > ipath) { 5390 /* 5391 * HP-UX (at least) sets argv[0] to the script name, 5392 * which makes $^X incorrect. And Digital UNIX and Linux, 5393 * at least, set argv[0] to the basename of the Perl 5394 * interpreter. So, having found "#!", we'll set it right. 5395 */ 5396 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL, 5397 SVt_PV)); /* $^X */ 5398 assert(SvPOK(x) || SvGMAGICAL(x)); 5399 if (sv_eq(x, CopFILESV(PL_curcop))) { 5400 sv_setpvn(x, ipath, ipathend - ipath); 5401 SvSETMAGIC(x); 5402 } 5403 else { 5404 STRLEN blen; 5405 STRLEN llen; 5406 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen); 5407 const char * const lstart = SvPV_const(x,llen); 5408 if (llen < blen) { 5409 bstart += blen - llen; 5410 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') { 5411 sv_setpvn(x, ipath, ipathend - ipath); 5412 SvSETMAGIC(x); 5413 } 5414 } 5415 } 5416 TAINT_NOT; /* $^X is always tainted, but that's OK */ 5417 } 5418 #endif /* ARG_ZERO_IS_SCRIPT */ 5419 5420 /* 5421 * Look for options. 5422 */ 5423 d = instr(s,"perl -"); 5424 if (!d) { 5425 d = instr(s,"perl"); 5426 #if defined(DOSISH) 5427 /* avoid getting into infinite loops when shebang 5428 * line contains "Perl" rather than "perl" */ 5429 if (!d) { 5430 for (d = ipathend-4; d >= ipath; --d) { 5431 if ((*d == 'p' || *d == 'P') 5432 && !ibcmp(d, "perl", 4)) 5433 { 5434 break; 5435 } 5436 } 5437 if (d < ipath) 5438 d = NULL; 5439 } 5440 #endif 5441 } 5442 #ifdef ALTERNATE_SHEBANG 5443 /* 5444 * If the ALTERNATE_SHEBANG on this system starts with a 5445 * character that can be part of a Perl expression, then if 5446 * we see it but not "perl", we're probably looking at the 5447 * start of Perl code, not a request to hand off to some 5448 * other interpreter. Similarly, if "perl" is there, but 5449 * not in the first 'word' of the line, we assume the line 5450 * contains the start of the Perl program. 5451 */ 5452 if (d && *s != '#') { 5453 const char *c = ipath; 5454 while (*c && !strchr("; \t\r\n\f\v#", *c)) 5455 c++; 5456 if (c < d) 5457 d = NULL; /* "perl" not in first word; ignore */ 5458 else 5459 *s = '#'; /* Don't try to parse shebang line */ 5460 } 5461 #endif /* ALTERNATE_SHEBANG */ 5462 if (!d && 5463 *s == '#' && 5464 ipathend > ipath && 5465 !PL_minus_c && 5466 !instr(s,"indir") && 5467 instr(PL_origargv[0],"perl")) 5468 { 5469 dVAR; 5470 char **newargv; 5471 5472 *ipathend = '\0'; 5473 s = ipathend + 1; 5474 while (s < PL_bufend && isSPACE(*s)) 5475 s++; 5476 if (s < PL_bufend) { 5477 Newx(newargv,PL_origargc+3,char*); 5478 newargv[1] = s; 5479 while (s < PL_bufend && !isSPACE(*s)) 5480 s++; 5481 *s = '\0'; 5482 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*); 5483 } 5484 else 5485 newargv = PL_origargv; 5486 newargv[0] = ipath; 5487 PERL_FPU_PRE_EXEC 5488 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv)); 5489 PERL_FPU_POST_EXEC 5490 Perl_croak(aTHX_ "Can't exec %s", ipath); 5491 } 5492 if (d) { 5493 while (*d && !isSPACE(*d)) 5494 d++; 5495 while (SPACE_OR_TAB(*d)) 5496 d++; 5497 5498 if (*d++ == '-') { 5499 const bool switches_done = PL_doswitches; 5500 const U32 oldpdb = PL_perldb; 5501 const bool oldn = PL_minus_n; 5502 const bool oldp = PL_minus_p; 5503 const char *d1 = d; 5504 5505 do { 5506 bool baduni = FALSE; 5507 if (*d1 == 'C') { 5508 const char *d2 = d1 + 1; 5509 if (parse_unicode_opts((const char **)&d2) 5510 != PL_unicode) 5511 baduni = TRUE; 5512 } 5513 if (baduni || *d1 == 'M' || *d1 == 'm') { 5514 const char * const m = d1; 5515 while (*d1 && !isSPACE(*d1)) 5516 d1++; 5517 Perl_croak(aTHX_ "Too late for \"-%.*s\" option", 5518 (int)(d1 - m), m); 5519 } 5520 d1 = moreswitches(d1); 5521 } while (d1); 5522 if (PL_doswitches && !switches_done) { 5523 int argc = PL_origargc; 5524 char **argv = PL_origargv; 5525 do { 5526 argc--,argv++; 5527 } while (argc && argv[0][0] == '-' && argv[0][1]); 5528 init_argv_symbols(argc,argv); 5529 } 5530 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) || 5531 ((PL_minus_n || PL_minus_p) && !(oldn || oldp))) 5532 /* if we have already added "LINE: while (<>) {", 5533 we must not do it again */ 5534 { 5535 sv_setpvs(PL_linestr, ""); 5536 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 5537 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5538 PL_last_lop = PL_last_uni = NULL; 5539 PL_preambled = FALSE; 5540 if (PERLDB_LINE || PERLDB_SAVESRC) 5541 (void)gv_fetchfile(PL_origfilename); 5542 goto retry; 5543 } 5544 } 5545 } 5546 } 5547 } 5548 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 5549 PL_lex_state = LEX_FORMLINE; 5550 start_force(PL_curforce); 5551 NEXTVAL_NEXTTOKE.ival = 0; 5552 force_next(FORMRBRACK); 5553 TOKEN(';'); 5554 } 5555 goto retry; 5556 case '\r': 5557 #ifdef PERL_STRICT_CR 5558 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); 5559 Perl_croak(aTHX_ 5560 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); 5561 #endif 5562 case ' ': case '\t': case '\f': case 013: 5563 #ifdef PERL_MAD 5564 PL_realtokenstart = -1; 5565 if (PL_madskills) { 5566 if (!PL_thiswhite) 5567 PL_thiswhite = newSVpvs(""); 5568 sv_catpvn(PL_thiswhite, s, 1); 5569 } 5570 #endif 5571 s++; 5572 goto retry; 5573 case '#': 5574 case '\n': 5575 #ifdef PERL_MAD 5576 PL_realtokenstart = -1; 5577 if (PL_madskills) 5578 PL_faketokens = 0; 5579 #endif 5580 if (PL_lex_state != LEX_NORMAL || 5581 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) { 5582 if (*s == '#' && s == PL_linestart && PL_in_eval 5583 && !PL_rsfp && !PL_parser->filtered) { 5584 /* handle eval qq[#line 1 "foo"\n ...] */ 5585 CopLINE_dec(PL_curcop); 5586 incline(s); 5587 } 5588 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) { 5589 s = SKIPSPACE0(s); 5590 if (!PL_in_eval || PL_rsfp || PL_parser->filtered) 5591 incline(s); 5592 } 5593 else { 5594 const bool in_comment = *s == '#'; 5595 d = s; 5596 while (d < PL_bufend && *d != '\n') 5597 d++; 5598 if (d < PL_bufend) 5599 d++; 5600 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */ 5601 Perl_croak(aTHX_ "panic: input overflow, %p > %p", 5602 d, PL_bufend); 5603 #ifdef PERL_MAD 5604 if (PL_madskills) 5605 PL_thiswhite = newSVpvn(s, d - s); 5606 #endif 5607 s = d; 5608 if (in_comment && d == PL_bufend 5609 && PL_lex_state == LEX_INTERPNORMAL 5610 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 5611 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--; 5612 else incline(s); 5613 } 5614 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 5615 PL_lex_state = LEX_FORMLINE; 5616 start_force(PL_curforce); 5617 NEXTVAL_NEXTTOKE.ival = 0; 5618 force_next(FORMRBRACK); 5619 TOKEN(';'); 5620 } 5621 } 5622 else { 5623 #ifdef PERL_MAD 5624 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) { 5625 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') { 5626 PL_faketokens = 0; 5627 s = SKIPSPACE0(s); 5628 TOKEN(PEG); /* make sure any #! line is accessible */ 5629 } 5630 s = SKIPSPACE0(s); 5631 } 5632 else { 5633 #endif 5634 if (PL_madskills) d = s; 5635 while (s < PL_bufend && *s != '\n') 5636 s++; 5637 if (s < PL_bufend) 5638 { 5639 s++; 5640 if (s < PL_bufend) 5641 incline(s); 5642 } 5643 else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */ 5644 Perl_croak(aTHX_ "panic: input overflow"); 5645 #ifdef PERL_MAD 5646 if (PL_madskills && CopLINE(PL_curcop) >= 1) { 5647 if (!PL_thiswhite) 5648 PL_thiswhite = newSVpvs(""); 5649 if (CopLINE(PL_curcop) == 1) { 5650 sv_setpvs(PL_thiswhite, ""); 5651 PL_faketokens = 0; 5652 } 5653 sv_catpvn(PL_thiswhite, d, s - d); 5654 } 5655 } 5656 #endif 5657 } 5658 goto retry; 5659 case '-': 5660 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) { 5661 I32 ftst = 0; 5662 char tmp; 5663 5664 s++; 5665 PL_bufptr = s; 5666 tmp = *s++; 5667 5668 while (s < PL_bufend && SPACE_OR_TAB(*s)) 5669 s++; 5670 5671 if (strnEQ(s,"=>",2)) { 5672 s = force_word(PL_bufptr,WORD,FALSE,FALSE); 5673 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } ); 5674 OPERATOR('-'); /* unary minus */ 5675 } 5676 switch (tmp) { 5677 case 'r': ftst = OP_FTEREAD; break; 5678 case 'w': ftst = OP_FTEWRITE; break; 5679 case 'x': ftst = OP_FTEEXEC; break; 5680 case 'o': ftst = OP_FTEOWNED; break; 5681 case 'R': ftst = OP_FTRREAD; break; 5682 case 'W': ftst = OP_FTRWRITE; break; 5683 case 'X': ftst = OP_FTREXEC; break; 5684 case 'O': ftst = OP_FTROWNED; break; 5685 case 'e': ftst = OP_FTIS; break; 5686 case 'z': ftst = OP_FTZERO; break; 5687 case 's': ftst = OP_FTSIZE; break; 5688 case 'f': ftst = OP_FTFILE; break; 5689 case 'd': ftst = OP_FTDIR; break; 5690 case 'l': ftst = OP_FTLINK; break; 5691 case 'p': ftst = OP_FTPIPE; break; 5692 case 'S': ftst = OP_FTSOCK; break; 5693 case 'u': ftst = OP_FTSUID; break; 5694 case 'g': ftst = OP_FTSGID; break; 5695 case 'k': ftst = OP_FTSVTX; break; 5696 case 'b': ftst = OP_FTBLK; break; 5697 case 'c': ftst = OP_FTCHR; break; 5698 case 't': ftst = OP_FTTTY; break; 5699 case 'T': ftst = OP_FTTEXT; break; 5700 case 'B': ftst = OP_FTBINARY; break; 5701 case 'M': case 'A': case 'C': 5702 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV); 5703 switch (tmp) { 5704 case 'M': ftst = OP_FTMTIME; break; 5705 case 'A': ftst = OP_FTATIME; break; 5706 case 'C': ftst = OP_FTCTIME; break; 5707 default: break; 5708 } 5709 break; 5710 default: 5711 break; 5712 } 5713 if (ftst) { 5714 PL_last_uni = PL_oldbufptr; 5715 PL_last_lop_op = (OPCODE)ftst; 5716 DEBUG_T( { PerlIO_printf(Perl_debug_log, 5717 "### Saw file test %c\n", (int)tmp); 5718 } ); 5719 FTST(ftst); 5720 } 5721 else { 5722 /* Assume it was a minus followed by a one-letter named 5723 * subroutine call (or a -bareword), then. */ 5724 DEBUG_T( { PerlIO_printf(Perl_debug_log, 5725 "### '-%c' looked like a file test but was not\n", 5726 (int) tmp); 5727 } ); 5728 s = --PL_bufptr; 5729 } 5730 } 5731 { 5732 const char tmp = *s++; 5733 if (*s == tmp) { 5734 s++; 5735 if (PL_expect == XOPERATOR) 5736 TERM(POSTDEC); 5737 else 5738 OPERATOR(PREDEC); 5739 } 5740 else if (*s == '>') { 5741 s++; 5742 s = SKIPSPACE1(s); 5743 if (FEATURE_POSTDEREF_IS_ENABLED && ( 5744 ((*s == '$' || *s == '&') && s[1] == '*') 5745 ||(*s == '$' && s[1] == '#' && s[2] == '*') 5746 ||((*s == '@' || *s == '%') && strchr("*[{", s[1])) 5747 ||(*s == '*' && (s[1] == '*' || s[1] == '{')) 5748 )) 5749 { 5750 Perl_ck_warner_d(aTHX_ 5751 packWARN(WARN_EXPERIMENTAL__POSTDEREF), 5752 "Postfix dereference is experimental" 5753 ); 5754 PL_expect = XPOSTDEREF; 5755 TOKEN(ARROW); 5756 } 5757 if (isIDFIRST_lazy_if(s,UTF)) { 5758 s = force_word(s,METHOD,FALSE,TRUE); 5759 TOKEN(ARROW); 5760 } 5761 else if (*s == '$') 5762 OPERATOR(ARROW); 5763 else 5764 TERM(ARROW); 5765 } 5766 if (PL_expect == XOPERATOR) { 5767 if (*s == '=' && !PL_lex_allbrackets && 5768 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 5769 s--; 5770 TOKEN(0); 5771 } 5772 Aop(OP_SUBTRACT); 5773 } 5774 else { 5775 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5776 check_uni(); 5777 OPERATOR('-'); /* unary minus */ 5778 } 5779 } 5780 5781 case '+': 5782 { 5783 const char tmp = *s++; 5784 if (*s == tmp) { 5785 s++; 5786 if (PL_expect == XOPERATOR) 5787 TERM(POSTINC); 5788 else 5789 OPERATOR(PREINC); 5790 } 5791 if (PL_expect == XOPERATOR) { 5792 if (*s == '=' && !PL_lex_allbrackets && 5793 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 5794 s--; 5795 TOKEN(0); 5796 } 5797 Aop(OP_ADD); 5798 } 5799 else { 5800 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5801 check_uni(); 5802 OPERATOR('+'); 5803 } 5804 } 5805 5806 case '*': 5807 if (PL_expect == XPOSTDEREF) POSTDEREF('*'); 5808 if (PL_expect != XOPERATOR) { 5809 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE); 5810 PL_expect = XOPERATOR; 5811 force_ident(PL_tokenbuf, '*'); 5812 if (!*PL_tokenbuf) 5813 PREREF('*'); 5814 TERM('*'); 5815 } 5816 s++; 5817 if (*s == '*') { 5818 s++; 5819 if (*s == '=' && !PL_lex_allbrackets && 5820 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 5821 s -= 2; 5822 TOKEN(0); 5823 } 5824 PWop(OP_POW); 5825 } 5826 if (*s == '=' && !PL_lex_allbrackets && 5827 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 5828 s--; 5829 TOKEN(0); 5830 } 5831 PL_parser->saw_infix_sigil = 1; 5832 Mop(OP_MULTIPLY); 5833 5834 case '%': 5835 { 5836 if (PL_expect == XOPERATOR) { 5837 if (s[1] == '=' && !PL_lex_allbrackets && 5838 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5839 TOKEN(0); 5840 ++s; 5841 PL_parser->saw_infix_sigil = 1; 5842 Mop(OP_MODULO); 5843 } 5844 else if (PL_expect == XPOSTDEREF) POSTDEREF('%'); 5845 PL_tokenbuf[0] = '%'; 5846 s = scan_ident(s, PL_tokenbuf + 1, 5847 sizeof PL_tokenbuf - 1, FALSE); 5848 pl_yylval.ival = 0; 5849 if (!PL_tokenbuf[1]) { 5850 PREREF('%'); 5851 } 5852 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { 5853 if (*s == '[') 5854 PL_tokenbuf[0] = '@'; 5855 } 5856 PL_expect = XOPERATOR; 5857 force_ident_maybe_lex('%'); 5858 TERM('%'); 5859 } 5860 case '^': 5861 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 5862 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) 5863 TOKEN(0); 5864 s++; 5865 BOop(OP_BIT_XOR); 5866 case '[': 5867 if (PL_lex_brackets > 100) 5868 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 5869 PL_lex_brackstack[PL_lex_brackets++] = 0; 5870 PL_lex_allbrackets++; 5871 { 5872 const char tmp = *s++; 5873 OPERATOR(tmp); 5874 } 5875 case '~': 5876 if (s[1] == '~' 5877 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) 5878 { 5879 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 5880 TOKEN(0); 5881 s += 2; 5882 Perl_ck_warner_d(aTHX_ 5883 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 5884 "Smartmatch is experimental"); 5885 Eop(OP_SMARTMATCH); 5886 } 5887 s++; 5888 OPERATOR('~'); 5889 case ',': 5890 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) 5891 TOKEN(0); 5892 s++; 5893 OPERATOR(','); 5894 case ':': 5895 if (s[1] == ':') { 5896 len = 0; 5897 goto just_a_word_zero_gv; 5898 } 5899 s++; 5900 switch (PL_expect) { 5901 OP *attrs; 5902 #ifdef PERL_MAD 5903 I32 stuffstart; 5904 #endif 5905 case XOPERATOR: 5906 if (!PL_in_my || PL_lex_state != LEX_NORMAL) 5907 break; 5908 PL_bufptr = s; /* update in case we back off */ 5909 if (*s == '=') { 5910 Perl_croak(aTHX_ 5911 "Use of := for an empty attribute list is not allowed"); 5912 } 5913 goto grabattrs; 5914 case XATTRBLOCK: 5915 PL_expect = XBLOCK; 5916 goto grabattrs; 5917 case XATTRTERM: 5918 PL_expect = XTERMBLOCK; 5919 grabattrs: 5920 #ifdef PERL_MAD 5921 stuffstart = s - SvPVX(PL_linestr) - 1; 5922 #endif 5923 s = PEEKSPACE(s); 5924 attrs = NULL; 5925 while (isIDFIRST_lazy_if(s,UTF)) { 5926 I32 tmp; 5927 SV *sv; 5928 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 5929 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) { 5930 if (tmp < 0) tmp = -tmp; 5931 switch (tmp) { 5932 case KEY_or: 5933 case KEY_and: 5934 case KEY_for: 5935 case KEY_foreach: 5936 case KEY_unless: 5937 case KEY_if: 5938 case KEY_while: 5939 case KEY_until: 5940 goto got_attrs; 5941 default: 5942 break; 5943 } 5944 } 5945 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0); 5946 if (*d == '(') { 5947 d = scan_str(d,TRUE,TRUE,FALSE,FALSE,NULL); 5948 COPLINE_SET_FROM_MULTI_END; 5949 if (!d) { 5950 /* MUST advance bufptr here to avoid bogus 5951 "at end of line" context messages from yyerror(). 5952 */ 5953 PL_bufptr = s + len; 5954 yyerror("Unterminated attribute parameter in attribute list"); 5955 if (attrs) 5956 op_free(attrs); 5957 sv_free(sv); 5958 return REPORT(0); /* EOF indicator */ 5959 } 5960 } 5961 if (PL_lex_stuff) { 5962 sv_catsv(sv, PL_lex_stuff); 5963 attrs = op_append_elem(OP_LIST, attrs, 5964 newSVOP(OP_CONST, 0, sv)); 5965 SvREFCNT_dec(PL_lex_stuff); 5966 PL_lex_stuff = NULL; 5967 } 5968 else { 5969 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) { 5970 sv_free(sv); 5971 if (PL_in_my == KEY_our) { 5972 deprecate(":unique"); 5973 } 5974 else 5975 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables"); 5976 } 5977 5978 /* NOTE: any CV attrs applied here need to be part of 5979 the CVf_BUILTIN_ATTRS define in cv.h! */ 5980 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) { 5981 sv_free(sv); 5982 CvLVALUE_on(PL_compcv); 5983 } 5984 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) { 5985 sv_free(sv); 5986 deprecate(":locked"); 5987 } 5988 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) { 5989 sv_free(sv); 5990 CvMETHOD_on(PL_compcv); 5991 } 5992 /* After we've set the flags, it could be argued that 5993 we don't need to do the attributes.pm-based setting 5994 process, and shouldn't bother appending recognized 5995 flags. To experiment with that, uncomment the 5996 following "else". (Note that's already been 5997 uncommented. That keeps the above-applied built-in 5998 attributes from being intercepted (and possibly 5999 rejected) by a package's attribute routines, but is 6000 justified by the performance win for the common case 6001 of applying only built-in attributes.) */ 6002 else 6003 attrs = op_append_elem(OP_LIST, attrs, 6004 newSVOP(OP_CONST, 0, 6005 sv)); 6006 } 6007 s = PEEKSPACE(d); 6008 if (*s == ':' && s[1] != ':') 6009 s = PEEKSPACE(s+1); 6010 else if (s == d) 6011 break; /* require real whitespace or :'s */ 6012 /* XXX losing whitespace on sequential attributes here */ 6013 } 6014 { 6015 if (*s != ';' && *s != '}' && 6016 !(PL_expect == XOPERATOR 6017 ? (*s == '=' || *s == ')') 6018 : (*s == '{' || *s == '('))) { 6019 const char q = ((*s == '\'') ? '"' : '\''); 6020 /* If here for an expression, and parsed no attrs, back 6021 off. */ 6022 if (PL_expect == XOPERATOR && !attrs) { 6023 s = PL_bufptr; 6024 break; 6025 } 6026 /* MUST advance bufptr here to avoid bogus "at end of line" 6027 context messages from yyerror(). 6028 */ 6029 PL_bufptr = s; 6030 yyerror( (const char *) 6031 (*s 6032 ? Perl_form(aTHX_ "Invalid separator character " 6033 "%c%c%c in attribute list", q, *s, q) 6034 : "Unterminated attribute list" ) ); 6035 if (attrs) 6036 op_free(attrs); 6037 OPERATOR(':'); 6038 } 6039 } 6040 got_attrs: 6041 if (attrs) { 6042 start_force(PL_curforce); 6043 NEXTVAL_NEXTTOKE.opval = attrs; 6044 CURMAD('_', PL_nextwhite); 6045 force_next(THING); 6046 } 6047 #ifdef PERL_MAD 6048 if (PL_madskills) { 6049 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart, 6050 (s - SvPVX(PL_linestr)) - stuffstart); 6051 } 6052 #endif 6053 TOKEN(COLONATTR); 6054 } 6055 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) { 6056 s--; 6057 TOKEN(0); 6058 } 6059 PL_lex_allbrackets--; 6060 OPERATOR(':'); 6061 case '(': 6062 s++; 6063 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr) 6064 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */ 6065 else 6066 PL_expect = XTERM; 6067 s = SKIPSPACE1(s); 6068 PL_lex_allbrackets++; 6069 TOKEN('('); 6070 case ';': 6071 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 6072 TOKEN(0); 6073 CLINE; 6074 s++; 6075 OPERATOR(';'); 6076 case ')': 6077 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) 6078 TOKEN(0); 6079 s++; 6080 PL_lex_allbrackets--; 6081 s = SKIPSPACE1(s); 6082 if (*s == '{') 6083 PREBLOCK(')'); 6084 TERM(')'); 6085 case ']': 6086 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 6087 TOKEN(0); 6088 s++; 6089 if (PL_lex_brackets <= 0) 6090 /* diag_listed_as: Unmatched right %s bracket */ 6091 yyerror("Unmatched right square bracket"); 6092 else 6093 --PL_lex_brackets; 6094 PL_lex_allbrackets--; 6095 if (PL_lex_state == LEX_INTERPNORMAL) { 6096 if (PL_lex_brackets == 0) { 6097 if (*s == '-' && s[1] == '>') 6098 PL_lex_state = LEX_INTERPENDMAYBE; 6099 else if (*s != '[' && *s != '{') 6100 PL_lex_state = LEX_INTERPEND; 6101 } 6102 } 6103 TERM(']'); 6104 case '{': 6105 s++; 6106 leftbracket: 6107 if (PL_lex_brackets > 100) { 6108 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 6109 } 6110 switch (PL_expect) { 6111 case XTERM: 6112 case XTERMORDORDOR: 6113 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6114 PL_lex_allbrackets++; 6115 OPERATOR(HASHBRACK); 6116 case XOPERATOR: 6117 while (s < PL_bufend && SPACE_OR_TAB(*s)) 6118 s++; 6119 d = s; 6120 PL_tokenbuf[0] = '\0'; 6121 if (d < PL_bufend && *d == '-') { 6122 PL_tokenbuf[0] = '-'; 6123 d++; 6124 while (d < PL_bufend && SPACE_OR_TAB(*d)) 6125 d++; 6126 } 6127 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) { 6128 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 6129 FALSE, &len); 6130 while (d < PL_bufend && SPACE_OR_TAB(*d)) 6131 d++; 6132 if (*d == '}') { 6133 const char minus = (PL_tokenbuf[0] == '-'); 6134 s = force_word(s + minus, WORD, FALSE, TRUE); 6135 if (minus) 6136 force_next('-'); 6137 } 6138 } 6139 /* FALL THROUGH */ 6140 case XATTRBLOCK: 6141 case XBLOCK: 6142 PL_lex_brackstack[PL_lex_brackets++] = XSTATE; 6143 PL_lex_allbrackets++; 6144 PL_expect = XSTATE; 6145 break; 6146 case XATTRTERM: 6147 case XTERMBLOCK: 6148 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6149 PL_lex_allbrackets++; 6150 PL_expect = XSTATE; 6151 break; 6152 default: { 6153 const char *t; 6154 if (PL_oldoldbufptr == PL_last_lop) 6155 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 6156 else 6157 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6158 PL_lex_allbrackets++; 6159 s = SKIPSPACE1(s); 6160 if (*s == '}') { 6161 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) { 6162 PL_expect = XTERM; 6163 /* This hack is to get the ${} in the message. */ 6164 PL_bufptr = s+1; 6165 yyerror("syntax error"); 6166 break; 6167 } 6168 OPERATOR(HASHBRACK); 6169 } 6170 /* This hack serves to disambiguate a pair of curlies 6171 * as being a block or an anon hash. Normally, expectation 6172 * determines that, but in cases where we're not in a 6173 * position to expect anything in particular (like inside 6174 * eval"") we have to resolve the ambiguity. This code 6175 * covers the case where the first term in the curlies is a 6176 * quoted string. Most other cases need to be explicitly 6177 * disambiguated by prepending a "+" before the opening 6178 * curly in order to force resolution as an anon hash. 6179 * 6180 * XXX should probably propagate the outer expectation 6181 * into eval"" to rely less on this hack, but that could 6182 * potentially break current behavior of eval"". 6183 * GSAR 97-07-21 6184 */ 6185 t = s; 6186 if (*s == '\'' || *s == '"' || *s == '`') { 6187 /* common case: get past first string, handling escapes */ 6188 for (t++; t < PL_bufend && *t != *s;) 6189 if (*t++ == '\\' && (*t == '\\' || *t == *s)) 6190 t++; 6191 t++; 6192 } 6193 else if (*s == 'q') { 6194 if (++t < PL_bufend 6195 && (!isWORDCHAR(*t) 6196 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend 6197 && !isWORDCHAR(*t)))) 6198 { 6199 /* skip q//-like construct */ 6200 const char *tmps; 6201 char open, close, term; 6202 I32 brackets = 1; 6203 6204 while (t < PL_bufend && isSPACE(*t)) 6205 t++; 6206 /* check for q => */ 6207 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') { 6208 OPERATOR(HASHBRACK); 6209 } 6210 term = *t; 6211 open = term; 6212 if (term && (tmps = strchr("([{< )]}> )]}>",term))) 6213 term = tmps[5]; 6214 close = term; 6215 if (open == close) 6216 for (t++; t < PL_bufend; t++) { 6217 if (*t == '\\' && t+1 < PL_bufend && open != '\\') 6218 t++; 6219 else if (*t == open) 6220 break; 6221 } 6222 else { 6223 for (t++; t < PL_bufend; t++) { 6224 if (*t == '\\' && t+1 < PL_bufend) 6225 t++; 6226 else if (*t == close && --brackets <= 0) 6227 break; 6228 else if (*t == open) 6229 brackets++; 6230 } 6231 } 6232 t++; 6233 } 6234 else 6235 /* skip plain q word */ 6236 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF)) 6237 t += UTF8SKIP(t); 6238 } 6239 else if (isWORDCHAR_lazy_if(t,UTF)) { 6240 t += UTF8SKIP(t); 6241 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF)) 6242 t += UTF8SKIP(t); 6243 } 6244 while (t < PL_bufend && isSPACE(*t)) 6245 t++; 6246 /* if comma follows first term, call it an anon hash */ 6247 /* XXX it could be a comma expression with loop modifiers */ 6248 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s))) 6249 || (*t == '=' && t[1] == '>'))) 6250 OPERATOR(HASHBRACK); 6251 if (PL_expect == XREF) 6252 PL_expect = XTERM; 6253 else { 6254 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE; 6255 PL_expect = XSTATE; 6256 } 6257 } 6258 break; 6259 } 6260 pl_yylval.ival = CopLINE(PL_curcop); 6261 if (isSPACE(*s) || *s == '#') 6262 PL_copline = NOLINE; /* invalidate current command line number */ 6263 TOKEN(formbrack ? '=' : '{'); 6264 case '}': 6265 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 6266 TOKEN(0); 6267 rightbracket: 6268 s++; 6269 if (PL_lex_brackets <= 0) 6270 /* diag_listed_as: Unmatched right %s bracket */ 6271 yyerror("Unmatched right curly bracket"); 6272 else 6273 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets]; 6274 PL_lex_allbrackets--; 6275 if (PL_lex_state == LEX_INTERPNORMAL) { 6276 if (PL_lex_brackets == 0) { 6277 if (PL_expect & XFAKEBRACK) { 6278 PL_expect &= XENUMMASK; 6279 PL_lex_state = LEX_INTERPEND; 6280 PL_bufptr = s; 6281 #if 0 6282 if (PL_madskills) { 6283 if (!PL_thiswhite) 6284 PL_thiswhite = newSVpvs(""); 6285 sv_catpvs(PL_thiswhite,"}"); 6286 } 6287 #endif 6288 return yylex(); /* ignore fake brackets */ 6289 } 6290 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 6291 && SvEVALED(PL_lex_repl)) 6292 PL_lex_state = LEX_INTERPEND; 6293 else if (*s == '-' && s[1] == '>') 6294 PL_lex_state = LEX_INTERPENDMAYBE; 6295 else if (*s != '[' && *s != '{') 6296 PL_lex_state = LEX_INTERPEND; 6297 } 6298 } 6299 if (PL_expect & XFAKEBRACK) { 6300 PL_expect &= XENUMMASK; 6301 PL_bufptr = s; 6302 return yylex(); /* ignore fake brackets */ 6303 } 6304 start_force(PL_curforce); 6305 if (PL_madskills) { 6306 curmad('X', newSVpvn(s-1,1)); 6307 CURMAD('_', PL_thiswhite); 6308 } 6309 force_next(formbrack ? '.' : '}'); 6310 if (formbrack) LEAVE; 6311 #ifdef PERL_MAD 6312 if (PL_madskills && !PL_thistoken) 6313 PL_thistoken = newSVpvs(""); 6314 #endif 6315 if (formbrack == 2) { /* means . where arguments were expected */ 6316 start_force(PL_curforce); 6317 force_next(';'); 6318 TOKEN(FORMRBRACK); 6319 } 6320 TOKEN(';'); 6321 case '&': 6322 if (PL_expect == XPOSTDEREF) POSTDEREF('&'); 6323 s++; 6324 if (*s++ == '&') { 6325 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6326 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 6327 s -= 2; 6328 TOKEN(0); 6329 } 6330 AOPERATOR(ANDAND); 6331 } 6332 s--; 6333 if (PL_expect == XOPERATOR) { 6334 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON) 6335 && isIDFIRST_lazy_if(s,UTF)) 6336 { 6337 CopLINE_dec(PL_curcop); 6338 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 6339 CopLINE_inc(PL_curcop); 6340 } 6341 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6342 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 6343 s--; 6344 TOKEN(0); 6345 } 6346 PL_parser->saw_infix_sigil = 1; 6347 BAop(OP_BIT_AND); 6348 } 6349 6350 PL_tokenbuf[0] = '&'; 6351 s = scan_ident(s - 1, PL_tokenbuf + 1, 6352 sizeof PL_tokenbuf - 1, TRUE); 6353 if (PL_tokenbuf[1]) { 6354 PL_expect = XOPERATOR; 6355 force_ident_maybe_lex('&'); 6356 } 6357 else 6358 PREREF('&'); 6359 pl_yylval.ival = (OPpENTERSUB_AMPER<<8); 6360 TERM('&'); 6361 6362 case '|': 6363 s++; 6364 if (*s++ == '|') { 6365 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6366 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 6367 s -= 2; 6368 TOKEN(0); 6369 } 6370 AOPERATOR(OROR); 6371 } 6372 s--; 6373 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6374 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 6375 s--; 6376 TOKEN(0); 6377 } 6378 BOop(OP_BIT_OR); 6379 case '=': 6380 s++; 6381 { 6382 const char tmp = *s++; 6383 if (tmp == '=') { 6384 if (!PL_lex_allbrackets && 6385 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6386 s -= 2; 6387 TOKEN(0); 6388 } 6389 Eop(OP_EQ); 6390 } 6391 if (tmp == '>') { 6392 if (!PL_lex_allbrackets && 6393 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) { 6394 s -= 2; 6395 TOKEN(0); 6396 } 6397 OPERATOR(','); 6398 } 6399 if (tmp == '~') 6400 PMop(OP_MATCH); 6401 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX) 6402 && strchr("+-*/%.^&|<",tmp)) 6403 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6404 "Reversed %c= operator",(int)tmp); 6405 s--; 6406 if (PL_expect == XSTATE && isALPHA(tmp) && 6407 (s == PL_linestart+1 || s[-2] == '\n') ) 6408 { 6409 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered) 6410 || PL_lex_state != LEX_NORMAL) { 6411 d = PL_bufend; 6412 while (s < d) { 6413 if (*s++ == '\n') { 6414 incline(s); 6415 if (strnEQ(s,"=cut",4)) { 6416 s = strchr(s,'\n'); 6417 if (s) 6418 s++; 6419 else 6420 s = d; 6421 incline(s); 6422 goto retry; 6423 } 6424 } 6425 } 6426 goto retry; 6427 } 6428 #ifdef PERL_MAD 6429 if (PL_madskills) { 6430 if (!PL_thiswhite) 6431 PL_thiswhite = newSVpvs(""); 6432 sv_catpvn(PL_thiswhite, PL_linestart, 6433 PL_bufend - PL_linestart); 6434 } 6435 #endif 6436 s = PL_bufend; 6437 PL_parser->in_pod = 1; 6438 goto retry; 6439 } 6440 } 6441 if (PL_expect == XBLOCK) { 6442 const char *t = s; 6443 #ifdef PERL_STRICT_CR 6444 while (SPACE_OR_TAB(*t)) 6445 #else 6446 while (SPACE_OR_TAB(*t) || *t == '\r') 6447 #endif 6448 t++; 6449 if (*t == '\n' || *t == '#') { 6450 formbrack = 1; 6451 ENTER; 6452 SAVEI8(PL_parser->form_lex_state); 6453 SAVEI32(PL_lex_formbrack); 6454 PL_parser->form_lex_state = PL_lex_state; 6455 PL_lex_formbrack = PL_lex_brackets + 1; 6456 goto leftbracket; 6457 } 6458 } 6459 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6460 s--; 6461 TOKEN(0); 6462 } 6463 pl_yylval.ival = 0; 6464 OPERATOR(ASSIGNOP); 6465 case '!': 6466 s++; 6467 { 6468 const char tmp = *s++; 6469 if (tmp == '=') { 6470 /* was this !=~ where !~ was meant? 6471 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */ 6472 6473 if (*s == '~' && ckWARN(WARN_SYNTAX)) { 6474 const char *t = s+1; 6475 6476 while (t < PL_bufend && isSPACE(*t)) 6477 ++t; 6478 6479 if (*t == '/' || *t == '?' || 6480 ((*t == 'm' || *t == 's' || *t == 'y') 6481 && !isWORDCHAR(t[1])) || 6482 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2]))) 6483 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6484 "!=~ should be !~"); 6485 } 6486 if (!PL_lex_allbrackets && 6487 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6488 s -= 2; 6489 TOKEN(0); 6490 } 6491 Eop(OP_NE); 6492 } 6493 if (tmp == '~') 6494 PMop(OP_NOT); 6495 } 6496 s--; 6497 OPERATOR('!'); 6498 case '<': 6499 if (PL_expect != XOPERATOR) { 6500 if (s[1] != '<' && !strchr(s,'>')) 6501 check_uni(); 6502 if (s[1] == '<') 6503 s = scan_heredoc(s); 6504 else 6505 s = scan_inputsymbol(s); 6506 PL_expect = XOPERATOR; 6507 TOKEN(sublex_start()); 6508 } 6509 s++; 6510 { 6511 char tmp = *s++; 6512 if (tmp == '<') { 6513 if (*s == '=' && !PL_lex_allbrackets && 6514 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6515 s -= 2; 6516 TOKEN(0); 6517 } 6518 SHop(OP_LEFT_SHIFT); 6519 } 6520 if (tmp == '=') { 6521 tmp = *s++; 6522 if (tmp == '>') { 6523 if (!PL_lex_allbrackets && 6524 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6525 s -= 3; 6526 TOKEN(0); 6527 } 6528 Eop(OP_NCMP); 6529 } 6530 s--; 6531 if (!PL_lex_allbrackets && 6532 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6533 s -= 2; 6534 TOKEN(0); 6535 } 6536 Rop(OP_LE); 6537 } 6538 } 6539 s--; 6540 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6541 s--; 6542 TOKEN(0); 6543 } 6544 Rop(OP_LT); 6545 case '>': 6546 s++; 6547 { 6548 const char tmp = *s++; 6549 if (tmp == '>') { 6550 if (*s == '=' && !PL_lex_allbrackets && 6551 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6552 s -= 2; 6553 TOKEN(0); 6554 } 6555 SHop(OP_RIGHT_SHIFT); 6556 } 6557 else if (tmp == '=') { 6558 if (!PL_lex_allbrackets && 6559 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6560 s -= 2; 6561 TOKEN(0); 6562 } 6563 Rop(OP_GE); 6564 } 6565 } 6566 s--; 6567 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6568 s--; 6569 TOKEN(0); 6570 } 6571 Rop(OP_GT); 6572 6573 case '$': 6574 CLINE; 6575 6576 if (PL_expect == XOPERATOR) { 6577 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 6578 return deprecate_commaless_var_list(); 6579 } 6580 } 6581 else if (PL_expect == XPOSTDEREF) { 6582 if (s[1] == '#') { 6583 s++; 6584 POSTDEREF(DOLSHARP); 6585 } 6586 POSTDEREF('$'); 6587 } 6588 6589 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) { 6590 PL_tokenbuf[0] = '@'; 6591 s = scan_ident(s + 1, PL_tokenbuf + 1, 6592 sizeof PL_tokenbuf - 1, FALSE); 6593 if (PL_expect == XOPERATOR) 6594 no_op("Array length", s); 6595 if (!PL_tokenbuf[1]) 6596 PREREF(DOLSHARP); 6597 PL_expect = XOPERATOR; 6598 force_ident_maybe_lex('#'); 6599 TOKEN(DOLSHARP); 6600 } 6601 6602 PL_tokenbuf[0] = '$'; 6603 s = scan_ident(s, PL_tokenbuf + 1, 6604 sizeof PL_tokenbuf - 1, FALSE); 6605 if (PL_expect == XOPERATOR) 6606 no_op("Scalar", s); 6607 if (!PL_tokenbuf[1]) { 6608 if (s == PL_bufend) 6609 yyerror("Final $ should be \\$ or $name"); 6610 PREREF('$'); 6611 } 6612 6613 d = s; 6614 { 6615 const char tmp = *s; 6616 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) 6617 s = SKIPSPACE1(s); 6618 6619 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 6620 && intuit_more(s)) { 6621 if (*s == '[') { 6622 PL_tokenbuf[0] = '@'; 6623 if (ckWARN(WARN_SYNTAX)) { 6624 char *t = s+1; 6625 6626 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$') 6627 t += UTF ? UTF8SKIP(t) : 1; 6628 if (*t++ == ',') { 6629 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */ 6630 while (t < PL_bufend && *t != ']') 6631 t++; 6632 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6633 "Multidimensional syntax %.*s not supported", 6634 (int)((t - PL_bufptr) + 1), PL_bufptr); 6635 } 6636 } 6637 } 6638 else if (*s == '{') { 6639 char *t; 6640 PL_tokenbuf[0] = '%'; 6641 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX) 6642 && (t = strchr(s, '}')) && (t = strchr(t, '='))) 6643 { 6644 char tmpbuf[sizeof PL_tokenbuf]; 6645 do { 6646 t++; 6647 } while (isSPACE(*t)); 6648 if (isIDFIRST_lazy_if(t,UTF)) { 6649 STRLEN len; 6650 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, 6651 &len); 6652 while (isSPACE(*t)) 6653 t++; 6654 if (*t == ';' 6655 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0)) 6656 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6657 "You need to quote \"%"UTF8f"\"", 6658 UTF8fARG(UTF, len, tmpbuf)); 6659 } 6660 } 6661 } 6662 } 6663 6664 PL_expect = XOPERATOR; 6665 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) { 6666 const bool islop = (PL_last_lop == PL_oldoldbufptr); 6667 if (!islop || PL_last_lop_op == OP_GREPSTART) 6668 PL_expect = XOPERATOR; 6669 else if (strchr("$@\"'`q", *s)) 6670 PL_expect = XTERM; /* e.g. print $fh "foo" */ 6671 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF)) 6672 PL_expect = XTERM; /* e.g. print $fh &sub */ 6673 else if (isIDFIRST_lazy_if(s,UTF)) { 6674 char tmpbuf[sizeof PL_tokenbuf]; 6675 int t2; 6676 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 6677 if ((t2 = keyword(tmpbuf, len, 0))) { 6678 /* binary operators exclude handle interpretations */ 6679 switch (t2) { 6680 case -KEY_x: 6681 case -KEY_eq: 6682 case -KEY_ne: 6683 case -KEY_gt: 6684 case -KEY_lt: 6685 case -KEY_ge: 6686 case -KEY_le: 6687 case -KEY_cmp: 6688 break; 6689 default: 6690 PL_expect = XTERM; /* e.g. print $fh length() */ 6691 break; 6692 } 6693 } 6694 else { 6695 PL_expect = XTERM; /* e.g. print $fh subr() */ 6696 } 6697 } 6698 else if (isDIGIT(*s)) 6699 PL_expect = XTERM; /* e.g. print $fh 3 */ 6700 else if (*s == '.' && isDIGIT(s[1])) 6701 PL_expect = XTERM; /* e.g. print $fh .3 */ 6702 else if ((*s == '?' || *s == '-' || *s == '+') 6703 && !isSPACE(s[1]) && s[1] != '=') 6704 PL_expect = XTERM; /* e.g. print $fh -1 */ 6705 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' 6706 && s[1] != '/') 6707 PL_expect = XTERM; /* e.g. print $fh /.../ 6708 XXX except DORDOR operator 6709 */ 6710 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) 6711 && s[2] != '=') 6712 PL_expect = XTERM; /* print $fh <<"EOF" */ 6713 } 6714 } 6715 force_ident_maybe_lex('$'); 6716 TOKEN('$'); 6717 6718 case '@': 6719 if (PL_expect == XOPERATOR) 6720 no_op("Array", s); 6721 else if (PL_expect == XPOSTDEREF) POSTDEREF('@'); 6722 PL_tokenbuf[0] = '@'; 6723 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 6724 pl_yylval.ival = 0; 6725 if (!PL_tokenbuf[1]) { 6726 PREREF('@'); 6727 } 6728 if (PL_lex_state == LEX_NORMAL) 6729 s = SKIPSPACE1(s); 6730 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { 6731 if (*s == '{') 6732 PL_tokenbuf[0] = '%'; 6733 6734 /* Warn about @ where they meant $. */ 6735 if (*s == '[' || *s == '{') { 6736 if (ckWARN(WARN_SYNTAX)) { 6737 S_check_scalar_slice(aTHX_ s); 6738 } 6739 } 6740 } 6741 PL_expect = XOPERATOR; 6742 force_ident_maybe_lex('@'); 6743 TERM('@'); 6744 6745 case '/': /* may be division, defined-or, or pattern */ 6746 if (PL_expect == XTERMORDORDOR && s[1] == '/') { 6747 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6748 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) 6749 TOKEN(0); 6750 s += 2; 6751 AOPERATOR(DORDOR); 6752 } 6753 case '?': /* may either be conditional or pattern */ 6754 if (PL_expect == XOPERATOR) { 6755 char tmp = *s++; 6756 if(tmp == '?') { 6757 if (!PL_lex_allbrackets && 6758 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) { 6759 s--; 6760 TOKEN(0); 6761 } 6762 PL_lex_allbrackets++; 6763 OPERATOR('?'); 6764 } 6765 else { 6766 tmp = *s++; 6767 if(tmp == '/') { 6768 /* A // operator. */ 6769 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6770 (*s == '=' ? LEX_FAKEEOF_ASSIGN : 6771 LEX_FAKEEOF_LOGIC)) { 6772 s -= 2; 6773 TOKEN(0); 6774 } 6775 AOPERATOR(DORDOR); 6776 } 6777 else { 6778 s--; 6779 if (*s == '=' && !PL_lex_allbrackets && 6780 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6781 s--; 6782 TOKEN(0); 6783 } 6784 Mop(OP_DIVIDE); 6785 } 6786 } 6787 } 6788 else { 6789 /* Disable warning on "study /blah/" */ 6790 if (PL_oldoldbufptr == PL_last_uni 6791 && (*PL_last_uni != 's' || s - PL_last_uni < 5 6792 || memNE(PL_last_uni, "study", 5) 6793 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF) 6794 )) 6795 check_uni(); 6796 if (*s == '?') 6797 deprecate("?PATTERN? without explicit operator"); 6798 s = scan_pat(s,OP_MATCH); 6799 TERM(sublex_start()); 6800 } 6801 6802 case '.': 6803 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack 6804 #ifdef PERL_STRICT_CR 6805 && s[1] == '\n' 6806 #else 6807 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n')) 6808 #endif 6809 && (s == PL_linestart || s[-1] == '\n') ) 6810 { 6811 PL_expect = XSTATE; 6812 formbrack = 2; /* dot seen where arguments expected */ 6813 goto rightbracket; 6814 } 6815 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { 6816 s += 3; 6817 OPERATOR(YADAYADA); 6818 } 6819 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { 6820 char tmp = *s++; 6821 if (*s == tmp) { 6822 if (!PL_lex_allbrackets && 6823 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) { 6824 s--; 6825 TOKEN(0); 6826 } 6827 s++; 6828 if (*s == tmp) { 6829 s++; 6830 pl_yylval.ival = OPf_SPECIAL; 6831 } 6832 else 6833 pl_yylval.ival = 0; 6834 OPERATOR(DOTDOT); 6835 } 6836 if (*s == '=' && !PL_lex_allbrackets && 6837 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6838 s--; 6839 TOKEN(0); 6840 } 6841 Aop(OP_CONCAT); 6842 } 6843 /* FALL THROUGH */ 6844 case '0': case '1': case '2': case '3': case '4': 6845 case '5': case '6': case '7': case '8': case '9': 6846 s = scan_num(s, &pl_yylval); 6847 DEBUG_T( { printbuf("### Saw number in %s\n", s); } ); 6848 if (PL_expect == XOPERATOR) 6849 no_op("Number",s); 6850 TERM(THING); 6851 6852 case '\'': 6853 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 6854 if (!s) 6855 missingterm(NULL); 6856 COPLINE_SET_FROM_MULTI_END; 6857 DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); 6858 if (PL_expect == XOPERATOR) { 6859 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 6860 return deprecate_commaless_var_list(); 6861 } 6862 else 6863 no_op("String",s); 6864 } 6865 pl_yylval.ival = OP_CONST; 6866 TERM(sublex_start()); 6867 6868 case '"': 6869 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 6870 DEBUG_T( { 6871 if (s) 6872 printbuf("### Saw string before %s\n", s); 6873 else 6874 PerlIO_printf(Perl_debug_log, 6875 "### Saw unterminated string\n"); 6876 } ); 6877 if (PL_expect == XOPERATOR) { 6878 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 6879 return deprecate_commaless_var_list(); 6880 } 6881 else 6882 no_op("String",s); 6883 } 6884 if (!s) 6885 missingterm(NULL); 6886 pl_yylval.ival = OP_CONST; 6887 /* FIXME. I think that this can be const if char *d is replaced by 6888 more localised variables. */ 6889 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) { 6890 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) { 6891 pl_yylval.ival = OP_STRINGIFY; 6892 break; 6893 } 6894 } 6895 if (pl_yylval.ival == OP_CONST) 6896 COPLINE_SET_FROM_MULTI_END; 6897 TERM(sublex_start()); 6898 6899 case '`': 6900 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 6901 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } ); 6902 if (PL_expect == XOPERATOR) 6903 no_op("Backticks",s); 6904 if (!s) 6905 missingterm(NULL); 6906 pl_yylval.ival = OP_BACKTICK; 6907 TERM(sublex_start()); 6908 6909 case '\\': 6910 s++; 6911 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 6912 && isDIGIT(*s)) 6913 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", 6914 *s, *s); 6915 if (PL_expect == XOPERATOR) 6916 no_op("Backslash",s); 6917 OPERATOR(REFGEN); 6918 6919 case 'v': 6920 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) { 6921 char *start = s + 2; 6922 while (isDIGIT(*start) || *start == '_') 6923 start++; 6924 if (*start == '.' && isDIGIT(start[1])) { 6925 s = scan_num(s, &pl_yylval); 6926 TERM(THING); 6927 } 6928 else if ((*start == ':' && start[1] == ':') 6929 || (PL_expect == XSTATE && *start == ':')) 6930 goto keylookup; 6931 else if (PL_expect == XSTATE) { 6932 d = start; 6933 while (d < PL_bufend && isSPACE(*d)) d++; 6934 if (*d == ':') goto keylookup; 6935 } 6936 /* avoid v123abc() or $h{v1}, allow C<print v10;> */ 6937 if (!isALPHA(*start) && (PL_expect == XTERM 6938 || PL_expect == XREF || PL_expect == XSTATE 6939 || PL_expect == XTERMORDORDOR)) { 6940 GV *const gv = gv_fetchpvn_flags(s, start - s, 6941 UTF ? SVf_UTF8 : 0, SVt_PVCV); 6942 if (!gv) { 6943 s = scan_num(s, &pl_yylval); 6944 TERM(THING); 6945 } 6946 } 6947 } 6948 goto keylookup; 6949 case 'x': 6950 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { 6951 s++; 6952 Mop(OP_REPEAT); 6953 } 6954 goto keylookup; 6955 6956 case '_': 6957 case 'a': case 'A': 6958 case 'b': case 'B': 6959 case 'c': case 'C': 6960 case 'd': case 'D': 6961 case 'e': case 'E': 6962 case 'f': case 'F': 6963 case 'g': case 'G': 6964 case 'h': case 'H': 6965 case 'i': case 'I': 6966 case 'j': case 'J': 6967 case 'k': case 'K': 6968 case 'l': case 'L': 6969 case 'm': case 'M': 6970 case 'n': case 'N': 6971 case 'o': case 'O': 6972 case 'p': case 'P': 6973 case 'q': case 'Q': 6974 case 'r': case 'R': 6975 case 's': case 'S': 6976 case 't': case 'T': 6977 case 'u': case 'U': 6978 case 'V': 6979 case 'w': case 'W': 6980 case 'X': 6981 case 'y': case 'Y': 6982 case 'z': case 'Z': 6983 6984 keylookup: { 6985 bool anydelim; 6986 bool lex; 6987 I32 tmp; 6988 SV *sv; 6989 CV *cv; 6990 PADOFFSET off; 6991 OP *rv2cv_op; 6992 6993 lex = FALSE; 6994 orig_keyword = 0; 6995 off = 0; 6996 sv = NULL; 6997 cv = NULL; 6998 gv = NULL; 6999 gvp = NULL; 7000 rv2cv_op = NULL; 7001 7002 PL_bufptr = s; 7003 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 7004 7005 /* Some keywords can be followed by any delimiter, including ':' */ 7006 anydelim = word_takes_any_delimeter(PL_tokenbuf, len); 7007 7008 /* x::* is just a word, unless x is "CORE" */ 7009 if (!anydelim && *s == ':' && s[1] == ':') { 7010 if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE; 7011 goto just_a_word; 7012 } 7013 7014 d = s; 7015 while (d < PL_bufend && isSPACE(*d)) 7016 d++; /* no comments skipped here, or s### is misparsed */ 7017 7018 /* Is this a word before a => operator? */ 7019 if (*d == '=' && d[1] == '>') { 7020 fat_arrow: 7021 CLINE; 7022 pl_yylval.opval 7023 = (OP*)newSVOP(OP_CONST, 0, 7024 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 7025 pl_yylval.opval->op_private = OPpCONST_BARE; 7026 TERM(WORD); 7027 } 7028 7029 /* Check for plugged-in keyword */ 7030 { 7031 OP *o; 7032 int result; 7033 char *saved_bufptr = PL_bufptr; 7034 PL_bufptr = s; 7035 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o); 7036 s = PL_bufptr; 7037 if (result == KEYWORD_PLUGIN_DECLINE) { 7038 /* not a plugged-in keyword */ 7039 PL_bufptr = saved_bufptr; 7040 } else if (result == KEYWORD_PLUGIN_STMT) { 7041 pl_yylval.opval = o; 7042 CLINE; 7043 PL_expect = XSTATE; 7044 return REPORT(PLUGSTMT); 7045 } else if (result == KEYWORD_PLUGIN_EXPR) { 7046 pl_yylval.opval = o; 7047 CLINE; 7048 PL_expect = XOPERATOR; 7049 return REPORT(PLUGEXPR); 7050 } else { 7051 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", 7052 PL_tokenbuf); 7053 } 7054 } 7055 7056 /* Check for built-in keyword */ 7057 tmp = keyword(PL_tokenbuf, len, 0); 7058 7059 /* Is this a label? */ 7060 if (!anydelim && PL_expect == XSTATE 7061 && d < PL_bufend && *d == ':' && *(d + 1) != ':') { 7062 s = d + 1; 7063 pl_yylval.pval = savepvn(PL_tokenbuf, len+1); 7064 pl_yylval.pval[len] = '\0'; 7065 pl_yylval.pval[len+1] = UTF ? 1 : 0; 7066 CLINE; 7067 TOKEN(LABEL); 7068 } 7069 7070 /* Check for lexical sub */ 7071 if (PL_expect != XOPERATOR) { 7072 char tmpbuf[sizeof PL_tokenbuf + 1]; 7073 *tmpbuf = '&'; 7074 Copy(PL_tokenbuf, tmpbuf+1, len, char); 7075 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0); 7076 if (off != NOT_IN_PAD) { 7077 assert(off); /* we assume this is boolean-true below */ 7078 if (PAD_COMPNAME_FLAGS_isOUR(off)) { 7079 HV * const stash = PAD_COMPNAME_OURSTASH(off); 7080 HEK * const stashname = HvNAME_HEK(stash); 7081 sv = newSVhek(stashname); 7082 sv_catpvs(sv, "::"); 7083 sv_catpvn_flags(sv, PL_tokenbuf, len, 7084 (UTF ? SV_CATUTF8 : SV_CATBYTES)); 7085 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv), 7086 SVt_PVCV); 7087 off = 0; 7088 if (!gv) { 7089 sv_free(sv); 7090 sv = NULL; 7091 goto just_a_word; 7092 } 7093 } 7094 else { 7095 rv2cv_op = newOP(OP_PADANY, 0); 7096 rv2cv_op->op_targ = off; 7097 cv = find_lexical_cv(off); 7098 } 7099 lex = TRUE; 7100 goto just_a_word; 7101 } 7102 off = 0; 7103 } 7104 7105 if (tmp < 0) { /* second-class keyword? */ 7106 GV *ogv = NULL; /* override (winner) */ 7107 GV *hgv = NULL; /* hidden (loser) */ 7108 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) { 7109 CV *cv; 7110 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 7111 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL, 7112 SVt_PVCV)) && 7113 (cv = GvCVu(gv))) 7114 { 7115 if (GvIMPORTED_CV(gv)) 7116 ogv = gv; 7117 else if (! CvMETHOD(cv)) 7118 hgv = gv; 7119 } 7120 if (!ogv && 7121 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, 7122 len, FALSE)) && 7123 (gv = *gvp) && ( 7124 isGV_with_GP(gv) 7125 ? GvCVu(gv) && GvIMPORTED_CV(gv) 7126 : SvPCS_IMPORTED(gv) 7127 && (gv_init(gv, PL_globalstash, PL_tokenbuf, 7128 len, 0), 1) 7129 )) 7130 { 7131 ogv = gv; 7132 } 7133 } 7134 if (ogv) { 7135 orig_keyword = tmp; 7136 tmp = 0; /* overridden by import or by GLOBAL */ 7137 } 7138 else if (gv && !gvp 7139 && -tmp==KEY_lock /* XXX generalizable kludge */ 7140 && GvCVu(gv)) 7141 { 7142 tmp = 0; /* any sub overrides "weak" keyword */ 7143 } 7144 else { /* no override */ 7145 tmp = -tmp; 7146 if (tmp == KEY_dump) { 7147 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 7148 "dump() better written as CORE::dump()"); 7149 } 7150 gv = NULL; 7151 gvp = 0; 7152 if (hgv && tmp != KEY_x) /* never ambiguous */ 7153 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 7154 "Ambiguous call resolved as CORE::%s(), " 7155 "qualify as such or use &", 7156 GvENAME(hgv)); 7157 } 7158 } 7159 7160 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__ 7161 && (!anydelim || *s != '#')) { 7162 /* no override, and not s### either; skipspace is safe here 7163 * check for => on following line */ 7164 bool arrow; 7165 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr); 7166 STRLEN soff = s - SvPVX(PL_linestr); 7167 s = skipspace_flags(s, LEX_NO_INCLINE); 7168 arrow = *s == '=' && s[1] == '>'; 7169 PL_bufptr = SvPVX(PL_linestr) + bufoff; 7170 s = SvPVX(PL_linestr) + soff; 7171 if (arrow) 7172 goto fat_arrow; 7173 } 7174 7175 reserved_word: 7176 switch (tmp) { 7177 7178 default: /* not a keyword */ 7179 /* Trade off - by using this evil construction we can pull the 7180 variable gv into the block labelled keylookup. If not, then 7181 we have to give it function scope so that the goto from the 7182 earlier ':' case doesn't bypass the initialisation. */ 7183 if (0) { 7184 just_a_word_zero_gv: 7185 sv = NULL; 7186 cv = NULL; 7187 gv = NULL; 7188 gvp = NULL; 7189 rv2cv_op = NULL; 7190 orig_keyword = 0; 7191 lex = 0; 7192 off = 0; 7193 } 7194 just_a_word: { 7195 int pkgname = 0; 7196 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); 7197 const char penultchar = 7198 lastchar && PL_bufptr - 2 >= PL_linestart 7199 ? PL_bufptr[-2] 7200 : 0; 7201 #ifdef PERL_MAD 7202 SV *nextPL_nextwhite = 0; 7203 #endif 7204 7205 7206 /* Get the rest if it looks like a package qualifier */ 7207 7208 if (*s == '\'' || (*s == ':' && s[1] == ':')) { 7209 STRLEN morelen; 7210 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, 7211 TRUE, &morelen); 7212 if (!morelen) 7213 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s", 7214 UTF8fARG(UTF, len, PL_tokenbuf), 7215 *s == '\'' ? "'" : "::"); 7216 len += morelen; 7217 pkgname = 1; 7218 } 7219 7220 if (PL_expect == XOPERATOR) { 7221 if (PL_bufptr == PL_linestart) { 7222 CopLINE_dec(PL_curcop); 7223 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 7224 CopLINE_inc(PL_curcop); 7225 } 7226 else 7227 no_op("Bareword",s); 7228 } 7229 7230 /* Look for a subroutine with this name in current package, 7231 unless this is a lexical sub, or name is "Foo::", 7232 in which case Foo is a bareword 7233 (and a package name). */ 7234 7235 if (len > 2 && !PL_madskills && 7236 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') 7237 { 7238 if (ckWARN(WARN_BAREWORD) 7239 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV)) 7240 Perl_warner(aTHX_ packWARN(WARN_BAREWORD), 7241 "Bareword \"%"UTF8f"\" refers to nonexistent package", 7242 UTF8fARG(UTF, len, PL_tokenbuf)); 7243 len -= 2; 7244 PL_tokenbuf[len] = '\0'; 7245 gv = NULL; 7246 gvp = 0; 7247 } 7248 else { 7249 if (!lex && !gv) { 7250 /* Mustn't actually add anything to a symbol table. 7251 But also don't want to "initialise" any placeholder 7252 constants that might already be there into full 7253 blown PVGVs with attached PVCV. */ 7254 gv = gv_fetchpvn_flags(PL_tokenbuf, len, 7255 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ), 7256 SVt_PVCV); 7257 } 7258 len = 0; 7259 } 7260 7261 /* if we saw a global override before, get the right name */ 7262 7263 if (!sv) 7264 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, 7265 len ? len : strlen(PL_tokenbuf)); 7266 if (gvp) { 7267 SV * const tmp_sv = sv; 7268 sv = newSVpvs("CORE::GLOBAL::"); 7269 sv_catsv(sv, tmp_sv); 7270 SvREFCNT_dec(tmp_sv); 7271 } 7272 7273 #ifdef PERL_MAD 7274 if (PL_madskills && !PL_thistoken) { 7275 char *start = SvPVX(PL_linestr) + PL_realtokenstart; 7276 PL_thistoken = newSVpvn(start,s - start); 7277 PL_realtokenstart = s - SvPVX(PL_linestr); 7278 } 7279 #endif 7280 7281 /* Presume this is going to be a bareword of some sort. */ 7282 CLINE; 7283 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 7284 pl_yylval.opval->op_private = OPpCONST_BARE; 7285 7286 /* And if "Foo::", then that's what it certainly is. */ 7287 if (len) 7288 goto safe_bareword; 7289 7290 if (!off) 7291 { 7292 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv)); 7293 const_op->op_private = OPpCONST_BARE; 7294 rv2cv_op = newCVREF(0, const_op); 7295 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0); 7296 } 7297 7298 /* See if it's the indirect object for a list operator. */ 7299 7300 if (PL_oldoldbufptr && 7301 PL_oldoldbufptr < PL_bufptr && 7302 (PL_oldoldbufptr == PL_last_lop 7303 || PL_oldoldbufptr == PL_last_uni) && 7304 /* NO SKIPSPACE BEFORE HERE! */ 7305 (PL_expect == XREF || 7306 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF)) 7307 { 7308 bool immediate_paren = *s == '('; 7309 7310 /* (Now we can afford to cross potential line boundary.) */ 7311 s = SKIPSPACE2(s,nextPL_nextwhite); 7312 #ifdef PERL_MAD 7313 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */ 7314 #endif 7315 7316 /* Two barewords in a row may indicate method call. */ 7317 7318 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && 7319 (tmp = intuit_method(s, gv, cv))) { 7320 op_free(rv2cv_op); 7321 if (tmp == METHOD && !PL_lex_allbrackets && 7322 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7323 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7324 return REPORT(tmp); 7325 } 7326 7327 /* If not a declared subroutine, it's an indirect object. */ 7328 /* (But it's an indir obj regardless for sort.) */ 7329 /* Also, if "_" follows a filetest operator, it's a bareword */ 7330 7331 if ( 7332 ( !immediate_paren && (PL_last_lop_op == OP_SORT || 7333 (!cv && 7334 (PL_last_lop_op != OP_MAPSTART && 7335 PL_last_lop_op != OP_GREPSTART)))) 7336 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0' 7337 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP)) 7338 ) 7339 { 7340 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR; 7341 goto bareword; 7342 } 7343 } 7344 7345 PL_expect = XOPERATOR; 7346 #ifdef PERL_MAD 7347 if (isSPACE(*s)) 7348 s = SKIPSPACE2(s,nextPL_nextwhite); 7349 PL_nextwhite = nextPL_nextwhite; 7350 #else 7351 s = skipspace(s); 7352 #endif 7353 7354 /* Is this a word before a => operator? */ 7355 if (*s == '=' && s[1] == '>' && !pkgname) { 7356 op_free(rv2cv_op); 7357 CLINE; 7358 /* This is our own scalar, created a few lines above, 7359 so this is safe. */ 7360 SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv); 7361 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf); 7362 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) 7363 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv); 7364 SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv); 7365 TERM(WORD); 7366 } 7367 7368 /* If followed by a paren, it's certainly a subroutine. */ 7369 if (*s == '(') { 7370 CLINE; 7371 if (cv) { 7372 d = s + 1; 7373 while (SPACE_OR_TAB(*d)) 7374 d++; 7375 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) { 7376 s = d + 1; 7377 goto its_constant; 7378 } 7379 } 7380 #ifdef PERL_MAD 7381 if (PL_madskills) { 7382 PL_nextwhite = PL_thiswhite; 7383 PL_thiswhite = 0; 7384 } 7385 start_force(PL_curforce); 7386 #endif 7387 NEXTVAL_NEXTTOKE.opval = 7388 off ? rv2cv_op : pl_yylval.opval; 7389 PL_expect = XOPERATOR; 7390 #ifdef PERL_MAD 7391 if (PL_madskills) { 7392 PL_nextwhite = nextPL_nextwhite; 7393 curmad('X', PL_thistoken); 7394 PL_thistoken = newSVpvs(""); 7395 } 7396 #endif 7397 if (off) 7398 op_free(pl_yylval.opval), force_next(PRIVATEREF); 7399 else op_free(rv2cv_op), force_next(WORD); 7400 pl_yylval.ival = 0; 7401 TOKEN('&'); 7402 } 7403 7404 /* If followed by var or block, call it a method (unless sub) */ 7405 7406 if ((*s == '$' || *s == '{') && !cv) { 7407 op_free(rv2cv_op); 7408 PL_last_lop = PL_oldbufptr; 7409 PL_last_lop_op = OP_METHOD; 7410 if (!PL_lex_allbrackets && 7411 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7412 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7413 PREBLOCK(METHOD); 7414 } 7415 7416 /* If followed by a bareword, see if it looks like indir obj. */ 7417 7418 if (!orig_keyword 7419 && (isIDFIRST_lazy_if(s,UTF) || *s == '$') 7420 && (tmp = intuit_method(s, gv, cv))) { 7421 op_free(rv2cv_op); 7422 if (tmp == METHOD && !PL_lex_allbrackets && 7423 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7424 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7425 return REPORT(tmp); 7426 } 7427 7428 /* Not a method, so call it a subroutine (if defined) */ 7429 7430 if (cv) { 7431 if (lastchar == '-' && penultchar != '-') { 7432 const STRLEN l = len ? len : strlen(PL_tokenbuf); 7433 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7434 "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()", 7435 UTF8fARG(UTF, l, PL_tokenbuf), 7436 UTF8fARG(UTF, l, PL_tokenbuf)); 7437 } 7438 /* Check for a constant sub */ 7439 if ((sv = cv_const_sv_or_av(cv))) { 7440 its_constant: 7441 op_free(rv2cv_op); 7442 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); 7443 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); 7444 if (SvTYPE(sv) == SVt_PVAV) 7445 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS, 7446 pl_yylval.opval); 7447 else { 7448 pl_yylval.opval->op_private = 0; 7449 pl_yylval.opval->op_folded = 1; 7450 pl_yylval.opval->op_flags |= OPf_SPECIAL; 7451 } 7452 TOKEN(WORD); 7453 } 7454 7455 op_free(pl_yylval.opval); 7456 pl_yylval.opval = 7457 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op; 7458 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; 7459 PL_last_lop = PL_oldbufptr; 7460 PL_last_lop_op = OP_ENTERSUB; 7461 /* Is there a prototype? */ 7462 if ( 7463 #ifdef PERL_MAD 7464 cv && 7465 #endif 7466 SvPOK(cv)) 7467 { 7468 STRLEN protolen = CvPROTOLEN(cv); 7469 const char *proto = CvPROTO(cv); 7470 bool optional; 7471 proto = S_strip_spaces(aTHX_ proto, &protolen); 7472 if (!protolen) 7473 TERM(FUNC0SUB); 7474 if ((optional = *proto == ';')) 7475 do 7476 proto++; 7477 while (*proto == ';'); 7478 if ( 7479 ( 7480 ( 7481 *proto == '$' || *proto == '_' 7482 || *proto == '*' || *proto == '+' 7483 ) 7484 && proto[1] == '\0' 7485 ) 7486 || ( 7487 *proto == '\\' && proto[1] && proto[2] == '\0' 7488 ) 7489 ) 7490 UNIPROTO(UNIOPSUB,optional); 7491 if (*proto == '\\' && proto[1] == '[') { 7492 const char *p = proto + 2; 7493 while(*p && *p != ']') 7494 ++p; 7495 if(*p == ']' && !p[1]) 7496 UNIPROTO(UNIOPSUB,optional); 7497 } 7498 if (*proto == '&' && *s == '{') { 7499 if (PL_curstash) 7500 sv_setpvs(PL_subname, "__ANON__"); 7501 else 7502 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 7503 if (!PL_lex_allbrackets && 7504 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7505 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7506 PREBLOCK(LSTOPSUB); 7507 } 7508 } 7509 #ifdef PERL_MAD 7510 { 7511 if (PL_madskills) { 7512 PL_nextwhite = PL_thiswhite; 7513 PL_thiswhite = 0; 7514 } 7515 start_force(PL_curforce); 7516 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 7517 PL_expect = XTERM; 7518 if (PL_madskills) { 7519 PL_nextwhite = nextPL_nextwhite; 7520 curmad('X', PL_thistoken); 7521 PL_thistoken = newSVpvs(""); 7522 } 7523 force_next(off ? PRIVATEREF : WORD); 7524 if (!PL_lex_allbrackets && 7525 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7526 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7527 TOKEN(NOAMP); 7528 } 7529 } 7530 7531 /* Guess harder when madskills require "best effort". */ 7532 if (PL_madskills && (!gv || !GvCVu(gv))) { 7533 int probable_sub = 0; 7534 if (strchr("\"'`$@%0123456789!*+{[<", *s)) 7535 probable_sub = 1; 7536 else if (isALPHA(*s)) { 7537 char tmpbuf[1024]; 7538 STRLEN tmplen; 7539 d = s; 7540 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen); 7541 if (!keyword(tmpbuf, tmplen, 0)) 7542 probable_sub = 1; 7543 else { 7544 while (d < PL_bufend && isSPACE(*d)) 7545 d++; 7546 if (*d == '=' && d[1] == '>') 7547 probable_sub = 1; 7548 } 7549 } 7550 if (probable_sub) { 7551 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), 7552 SVt_PVCV); 7553 op_free(pl_yylval.opval); 7554 pl_yylval.opval = 7555 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op; 7556 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; 7557 PL_last_lop = PL_oldbufptr; 7558 PL_last_lop_op = OP_ENTERSUB; 7559 PL_nextwhite = PL_thiswhite; 7560 PL_thiswhite = 0; 7561 start_force(PL_curforce); 7562 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 7563 PL_expect = XTERM; 7564 PL_nextwhite = nextPL_nextwhite; 7565 curmad('X', PL_thistoken); 7566 PL_thistoken = newSVpvs(""); 7567 force_next(off ? PRIVATEREF : WORD); 7568 if (!PL_lex_allbrackets && 7569 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7570 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7571 TOKEN(NOAMP); 7572 } 7573 #else 7574 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 7575 PL_expect = XTERM; 7576 force_next(off ? PRIVATEREF : WORD); 7577 if (!PL_lex_allbrackets && 7578 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7579 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7580 TOKEN(NOAMP); 7581 #endif 7582 } 7583 7584 /* Call it a bare word */ 7585 7586 if (PL_hints & HINT_STRICT_SUBS) 7587 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7588 else { 7589 bareword: 7590 /* after "print" and similar functions (corresponding to 7591 * "F? L" in opcode.pl), whatever wasn't already parsed as 7592 * a filehandle should be subject to "strict subs". 7593 * Likewise for the optional indirect-object argument to system 7594 * or exec, which can't be a bareword */ 7595 if ((PL_last_lop_op == OP_PRINT 7596 || PL_last_lop_op == OP_PRTF 7597 || PL_last_lop_op == OP_SAY 7598 || PL_last_lop_op == OP_SYSTEM 7599 || PL_last_lop_op == OP_EXEC) 7600 && (PL_hints & HINT_STRICT_SUBS)) 7601 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7602 if (lastchar != '-') { 7603 if (ckWARN(WARN_RESERVED)) { 7604 d = PL_tokenbuf; 7605 while (isLOWER(*d)) 7606 d++; 7607 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) 7608 { 7609 /* PL_warn_reserved is constant */ 7610 GCC_DIAG_IGNORE(-Wformat-nonliteral); 7611 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved, 7612 PL_tokenbuf); 7613 GCC_DIAG_RESTORE; 7614 } 7615 } 7616 } 7617 } 7618 op_free(rv2cv_op); 7619 7620 safe_bareword: 7621 if ((lastchar == '*' || lastchar == '%' || lastchar == '&') 7622 && saw_infix_sigil) { 7623 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7624 "Operator or semicolon missing before %c%"UTF8f, 7625 lastchar, 7626 UTF8fARG(UTF, strlen(PL_tokenbuf), 7627 PL_tokenbuf)); 7628 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7629 "Ambiguous use of %c resolved as operator %c", 7630 lastchar, lastchar); 7631 } 7632 TOKEN(WORD); 7633 } 7634 7635 case KEY___FILE__: 7636 FUN0OP( 7637 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) 7638 ); 7639 7640 case KEY___LINE__: 7641 FUN0OP( 7642 (OP*)newSVOP(OP_CONST, 0, 7643 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop))) 7644 ); 7645 7646 case KEY___PACKAGE__: 7647 FUN0OP( 7648 (OP*)newSVOP(OP_CONST, 0, 7649 (PL_curstash 7650 ? newSVhek(HvNAME_HEK(PL_curstash)) 7651 : &PL_sv_undef)) 7652 ); 7653 7654 case KEY___DATA__: 7655 case KEY___END__: { 7656 GV *gv; 7657 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) { 7658 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash 7659 ? PL_curstash 7660 : PL_defstash; 7661 gv = (GV *)*hv_fetchs(stash, "DATA", 1); 7662 if (!isGV(gv)) 7663 gv_init(gv,stash,"DATA",4,0); 7664 GvMULTI_on(gv); 7665 if (!GvIO(gv)) 7666 GvIOp(gv) = newIO(); 7667 IoIFP(GvIOp(gv)) = PL_rsfp; 7668 #if defined(HAS_FCNTL) && defined(F_SETFD) 7669 { 7670 const int fd = PerlIO_fileno(PL_rsfp); 7671 fcntl(fd,F_SETFD,fd >= 3); 7672 } 7673 #endif 7674 /* Mark this internal pseudo-handle as clean */ 7675 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT; 7676 if ((PerlIO*)PL_rsfp == PerlIO_stdin()) 7677 IoTYPE(GvIOp(gv)) = IoTYPE_STD; 7678 else 7679 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY; 7680 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS) 7681 /* if the script was opened in binmode, we need to revert 7682 * it to text mode for compatibility; but only iff it has CRs 7683 * XXX this is a questionable hack at best. */ 7684 if (PL_bufend-PL_bufptr > 2 7685 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r') 7686 { 7687 Off_t loc = 0; 7688 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) { 7689 loc = PerlIO_tell(PL_rsfp); 7690 (void)PerlIO_seek(PL_rsfp, 0L, 0); 7691 } 7692 #ifdef NETWARE 7693 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) { 7694 #else 7695 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) { 7696 #endif /* NETWARE */ 7697 if (loc > 0) 7698 PerlIO_seek(PL_rsfp, loc, 0); 7699 } 7700 } 7701 #endif 7702 #ifdef PERLIO_LAYERS 7703 if (!IN_BYTES) { 7704 if (UTF) 7705 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); 7706 else if (PL_encoding) { 7707 SV *name; 7708 dSP; 7709 ENTER; 7710 SAVETMPS; 7711 PUSHMARK(sp); 7712 XPUSHs(PL_encoding); 7713 PUTBACK; 7714 call_method("name", G_SCALAR); 7715 SPAGAIN; 7716 name = POPs; 7717 PUTBACK; 7718 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, 7719 Perl_form(aTHX_ ":encoding(%"SVf")", 7720 SVfARG(name))); 7721 FREETMPS; 7722 LEAVE; 7723 } 7724 } 7725 #endif 7726 #ifdef PERL_MAD 7727 if (PL_madskills) { 7728 if (PL_realtokenstart >= 0) { 7729 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart; 7730 if (!PL_endwhite) 7731 PL_endwhite = newSVpvs(""); 7732 sv_catsv(PL_endwhite, PL_thiswhite); 7733 PL_thiswhite = 0; 7734 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart); 7735 PL_realtokenstart = -1; 7736 } 7737 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite))) 7738 != NULL) ; 7739 } 7740 #endif 7741 PL_rsfp = NULL; 7742 } 7743 goto fake_eof; 7744 } 7745 7746 case KEY___SUB__: 7747 FUN0OP(newPVOP(OP_RUNCV,0,NULL)); 7748 7749 case KEY_AUTOLOAD: 7750 case KEY_DESTROY: 7751 case KEY_BEGIN: 7752 case KEY_UNITCHECK: 7753 case KEY_CHECK: 7754 case KEY_INIT: 7755 case KEY_END: 7756 if (PL_expect == XSTATE) { 7757 s = PL_bufptr; 7758 goto really_sub; 7759 } 7760 goto just_a_word; 7761 7762 case_KEY_CORE: 7763 { 7764 STRLEN olen = len; 7765 d = s; 7766 s += 2; 7767 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 7768 if ((*s == ':' && s[1] == ':') 7769 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\'')) 7770 { 7771 s = d; 7772 len = olen; 7773 Copy(PL_bufptr, PL_tokenbuf, olen, char); 7774 goto just_a_word; 7775 } 7776 if (!tmp) 7777 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword", 7778 UTF8fARG(UTF, len, PL_tokenbuf)); 7779 if (tmp < 0) 7780 tmp = -tmp; 7781 else if (tmp == KEY_require || tmp == KEY_do 7782 || tmp == KEY_glob) 7783 /* that's a way to remember we saw "CORE::" */ 7784 orig_keyword = tmp; 7785 goto reserved_word; 7786 } 7787 7788 case KEY_abs: 7789 UNI(OP_ABS); 7790 7791 case KEY_alarm: 7792 UNI(OP_ALARM); 7793 7794 case KEY_accept: 7795 LOP(OP_ACCEPT,XTERM); 7796 7797 case KEY_and: 7798 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 7799 return REPORT(0); 7800 OPERATOR(ANDOP); 7801 7802 case KEY_atan2: 7803 LOP(OP_ATAN2,XTERM); 7804 7805 case KEY_bind: 7806 LOP(OP_BIND,XTERM); 7807 7808 case KEY_binmode: 7809 LOP(OP_BINMODE,XTERM); 7810 7811 case KEY_bless: 7812 LOP(OP_BLESS,XTERM); 7813 7814 case KEY_break: 7815 FUN0(OP_BREAK); 7816 7817 case KEY_chop: 7818 UNI(OP_CHOP); 7819 7820 case KEY_continue: 7821 /* We have to disambiguate the two senses of 7822 "continue". If the next token is a '{' then 7823 treat it as the start of a continue block; 7824 otherwise treat it as a control operator. 7825 */ 7826 s = skipspace(s); 7827 if (*s == '{') 7828 PREBLOCK(CONTINUE); 7829 else 7830 FUN0(OP_CONTINUE); 7831 7832 case KEY_chdir: 7833 /* may use HOME */ 7834 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV); 7835 UNI(OP_CHDIR); 7836 7837 case KEY_close: 7838 UNI(OP_CLOSE); 7839 7840 case KEY_closedir: 7841 UNI(OP_CLOSEDIR); 7842 7843 case KEY_cmp: 7844 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7845 return REPORT(0); 7846 Eop(OP_SCMP); 7847 7848 case KEY_caller: 7849 UNI(OP_CALLER); 7850 7851 case KEY_crypt: 7852 #ifdef FCRYPT 7853 if (!PL_cryptseen) { 7854 PL_cryptseen = TRUE; 7855 init_des(); 7856 } 7857 #endif 7858 LOP(OP_CRYPT,XTERM); 7859 7860 case KEY_chmod: 7861 LOP(OP_CHMOD,XTERM); 7862 7863 case KEY_chown: 7864 LOP(OP_CHOWN,XTERM); 7865 7866 case KEY_connect: 7867 LOP(OP_CONNECT,XTERM); 7868 7869 case KEY_chr: 7870 UNI(OP_CHR); 7871 7872 case KEY_cos: 7873 UNI(OP_COS); 7874 7875 case KEY_chroot: 7876 UNI(OP_CHROOT); 7877 7878 case KEY_default: 7879 PREBLOCK(DEFAULT); 7880 7881 case KEY_do: 7882 s = SKIPSPACE1(s); 7883 if (*s == '{') 7884 PRETERMBLOCK(DO); 7885 if (*s != '\'') { 7886 *PL_tokenbuf = '&'; 7887 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 7888 1, &len); 7889 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE")) 7890 && !keyword(PL_tokenbuf + 1, len, 0)) { 7891 d = SKIPSPACE1(d); 7892 if (*d == '(') { 7893 force_ident_maybe_lex('&'); 7894 s = d; 7895 } 7896 } 7897 } 7898 if (orig_keyword == KEY_do) { 7899 orig_keyword = 0; 7900 pl_yylval.ival = 1; 7901 } 7902 else 7903 pl_yylval.ival = 0; 7904 OPERATOR(DO); 7905 7906 case KEY_die: 7907 PL_hints |= HINT_BLOCK_SCOPE; 7908 LOP(OP_DIE,XTERM); 7909 7910 case KEY_defined: 7911 UNI(OP_DEFINED); 7912 7913 case KEY_delete: 7914 UNI(OP_DELETE); 7915 7916 case KEY_dbmopen: 7917 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"), 7918 STR_WITH_LEN("NDBM_File::"), 7919 STR_WITH_LEN("DB_File::"), 7920 STR_WITH_LEN("GDBM_File::"), 7921 STR_WITH_LEN("SDBM_File::"), 7922 STR_WITH_LEN("ODBM_File::"), 7923 NULL); 7924 LOP(OP_DBMOPEN,XTERM); 7925 7926 case KEY_dbmclose: 7927 UNI(OP_DBMCLOSE); 7928 7929 case KEY_dump: 7930 PL_expect = XOPERATOR; 7931 s = force_word(s,WORD,TRUE,FALSE); 7932 LOOPX(OP_DUMP); 7933 7934 case KEY_else: 7935 PREBLOCK(ELSE); 7936 7937 case KEY_elsif: 7938 pl_yylval.ival = CopLINE(PL_curcop); 7939 OPERATOR(ELSIF); 7940 7941 case KEY_eq: 7942 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7943 return REPORT(0); 7944 Eop(OP_SEQ); 7945 7946 case KEY_exists: 7947 UNI(OP_EXISTS); 7948 7949 case KEY_exit: 7950 if (PL_madskills) 7951 UNI(OP_INT); 7952 UNI(OP_EXIT); 7953 7954 case KEY_eval: 7955 s = SKIPSPACE1(s); 7956 if (*s == '{') { /* block eval */ 7957 PL_expect = XTERMBLOCK; 7958 UNIBRACK(OP_ENTERTRY); 7959 } 7960 else { /* string eval */ 7961 PL_expect = XTERM; 7962 UNIBRACK(OP_ENTEREVAL); 7963 } 7964 7965 case KEY_evalbytes: 7966 PL_expect = XTERM; 7967 UNIBRACK(-OP_ENTEREVAL); 7968 7969 case KEY_eof: 7970 UNI(OP_EOF); 7971 7972 case KEY_exp: 7973 UNI(OP_EXP); 7974 7975 case KEY_each: 7976 UNI(OP_EACH); 7977 7978 case KEY_exec: 7979 LOP(OP_EXEC,XREF); 7980 7981 case KEY_endhostent: 7982 FUN0(OP_EHOSTENT); 7983 7984 case KEY_endnetent: 7985 FUN0(OP_ENETENT); 7986 7987 case KEY_endservent: 7988 FUN0(OP_ESERVENT); 7989 7990 case KEY_endprotoent: 7991 FUN0(OP_EPROTOENT); 7992 7993 case KEY_endpwent: 7994 FUN0(OP_EPWENT); 7995 7996 case KEY_endgrent: 7997 FUN0(OP_EGRENT); 7998 7999 case KEY_for: 8000 case KEY_foreach: 8001 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8002 return REPORT(0); 8003 pl_yylval.ival = CopLINE(PL_curcop); 8004 s = SKIPSPACE1(s); 8005 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) { 8006 char *p = s; 8007 #ifdef PERL_MAD 8008 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */ 8009 #endif 8010 8011 if ((PL_bufend - p) >= 3 && 8012 strnEQ(p, "my", 2) && isSPACE(*(p + 2))) 8013 p += 2; 8014 else if ((PL_bufend - p) >= 4 && 8015 strnEQ(p, "our", 3) && isSPACE(*(p + 3))) 8016 p += 3; 8017 p = PEEKSPACE(p); 8018 /* skip optional package name, as in "for my abc $x (..)" */ 8019 if (isIDFIRST_lazy_if(p,UTF)) { 8020 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); 8021 p = PEEKSPACE(p); 8022 } 8023 if (*p != '$') 8024 Perl_croak(aTHX_ "Missing $ on loop variable"); 8025 #ifdef PERL_MAD 8026 s = SvPVX(PL_linestr) + soff; 8027 #endif 8028 } 8029 OPERATOR(FOR); 8030 8031 case KEY_formline: 8032 LOP(OP_FORMLINE,XTERM); 8033 8034 case KEY_fork: 8035 FUN0(OP_FORK); 8036 8037 case KEY_fc: 8038 UNI(OP_FC); 8039 8040 case KEY_fcntl: 8041 LOP(OP_FCNTL,XTERM); 8042 8043 case KEY_fileno: 8044 UNI(OP_FILENO); 8045 8046 case KEY_flock: 8047 LOP(OP_FLOCK,XTERM); 8048 8049 case KEY_gt: 8050 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8051 return REPORT(0); 8052 Rop(OP_SGT); 8053 8054 case KEY_ge: 8055 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8056 return REPORT(0); 8057 Rop(OP_SGE); 8058 8059 case KEY_grep: 8060 LOP(OP_GREPSTART, XREF); 8061 8062 case KEY_goto: 8063 PL_expect = XOPERATOR; 8064 s = force_word(s,WORD,TRUE,FALSE); 8065 LOOPX(OP_GOTO); 8066 8067 case KEY_gmtime: 8068 UNI(OP_GMTIME); 8069 8070 case KEY_getc: 8071 UNIDOR(OP_GETC); 8072 8073 case KEY_getppid: 8074 FUN0(OP_GETPPID); 8075 8076 case KEY_getpgrp: 8077 UNI(OP_GETPGRP); 8078 8079 case KEY_getpriority: 8080 LOP(OP_GETPRIORITY,XTERM); 8081 8082 case KEY_getprotobyname: 8083 UNI(OP_GPBYNAME); 8084 8085 case KEY_getprotobynumber: 8086 LOP(OP_GPBYNUMBER,XTERM); 8087 8088 case KEY_getprotoent: 8089 FUN0(OP_GPROTOENT); 8090 8091 case KEY_getpwent: 8092 FUN0(OP_GPWENT); 8093 8094 case KEY_getpwnam: 8095 UNI(OP_GPWNAM); 8096 8097 case KEY_getpwuid: 8098 UNI(OP_GPWUID); 8099 8100 case KEY_getpeername: 8101 UNI(OP_GETPEERNAME); 8102 8103 case KEY_gethostbyname: 8104 UNI(OP_GHBYNAME); 8105 8106 case KEY_gethostbyaddr: 8107 LOP(OP_GHBYADDR,XTERM); 8108 8109 case KEY_gethostent: 8110 FUN0(OP_GHOSTENT); 8111 8112 case KEY_getnetbyname: 8113 UNI(OP_GNBYNAME); 8114 8115 case KEY_getnetbyaddr: 8116 LOP(OP_GNBYADDR,XTERM); 8117 8118 case KEY_getnetent: 8119 FUN0(OP_GNETENT); 8120 8121 case KEY_getservbyname: 8122 LOP(OP_GSBYNAME,XTERM); 8123 8124 case KEY_getservbyport: 8125 LOP(OP_GSBYPORT,XTERM); 8126 8127 case KEY_getservent: 8128 FUN0(OP_GSERVENT); 8129 8130 case KEY_getsockname: 8131 UNI(OP_GETSOCKNAME); 8132 8133 case KEY_getsockopt: 8134 LOP(OP_GSOCKOPT,XTERM); 8135 8136 case KEY_getgrent: 8137 FUN0(OP_GGRENT); 8138 8139 case KEY_getgrnam: 8140 UNI(OP_GGRNAM); 8141 8142 case KEY_getgrgid: 8143 UNI(OP_GGRGID); 8144 8145 case KEY_getlogin: 8146 FUN0(OP_GETLOGIN); 8147 8148 case KEY_given: 8149 pl_yylval.ival = CopLINE(PL_curcop); 8150 Perl_ck_warner_d(aTHX_ 8151 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 8152 "given is experimental"); 8153 OPERATOR(GIVEN); 8154 8155 case KEY_glob: 8156 LOP( 8157 orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, 8158 XTERM 8159 ); 8160 8161 case KEY_hex: 8162 UNI(OP_HEX); 8163 8164 case KEY_if: 8165 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8166 return REPORT(0); 8167 pl_yylval.ival = CopLINE(PL_curcop); 8168 OPERATOR(IF); 8169 8170 case KEY_index: 8171 LOP(OP_INDEX,XTERM); 8172 8173 case KEY_int: 8174 UNI(OP_INT); 8175 8176 case KEY_ioctl: 8177 LOP(OP_IOCTL,XTERM); 8178 8179 case KEY_join: 8180 LOP(OP_JOIN,XTERM); 8181 8182 case KEY_keys: 8183 UNI(OP_KEYS); 8184 8185 case KEY_kill: 8186 LOP(OP_KILL,XTERM); 8187 8188 case KEY_last: 8189 PL_expect = XOPERATOR; 8190 s = force_word(s,WORD,TRUE,FALSE); 8191 LOOPX(OP_LAST); 8192 8193 case KEY_lc: 8194 UNI(OP_LC); 8195 8196 case KEY_lcfirst: 8197 UNI(OP_LCFIRST); 8198 8199 case KEY_local: 8200 pl_yylval.ival = 0; 8201 OPERATOR(LOCAL); 8202 8203 case KEY_length: 8204 UNI(OP_LENGTH); 8205 8206 case KEY_lt: 8207 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8208 return REPORT(0); 8209 Rop(OP_SLT); 8210 8211 case KEY_le: 8212 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8213 return REPORT(0); 8214 Rop(OP_SLE); 8215 8216 case KEY_localtime: 8217 UNI(OP_LOCALTIME); 8218 8219 case KEY_log: 8220 UNI(OP_LOG); 8221 8222 case KEY_link: 8223 LOP(OP_LINK,XTERM); 8224 8225 case KEY_listen: 8226 LOP(OP_LISTEN,XTERM); 8227 8228 case KEY_lock: 8229 UNI(OP_LOCK); 8230 8231 case KEY_lstat: 8232 UNI(OP_LSTAT); 8233 8234 case KEY_m: 8235 s = scan_pat(s,OP_MATCH); 8236 TERM(sublex_start()); 8237 8238 case KEY_map: 8239 LOP(OP_MAPSTART, XREF); 8240 8241 case KEY_mkdir: 8242 LOP(OP_MKDIR,XTERM); 8243 8244 case KEY_msgctl: 8245 LOP(OP_MSGCTL,XTERM); 8246 8247 case KEY_msgget: 8248 LOP(OP_MSGGET,XTERM); 8249 8250 case KEY_msgrcv: 8251 LOP(OP_MSGRCV,XTERM); 8252 8253 case KEY_msgsnd: 8254 LOP(OP_MSGSND,XTERM); 8255 8256 case KEY_our: 8257 case KEY_my: 8258 case KEY_state: 8259 PL_in_my = (U16)tmp; 8260 s = SKIPSPACE1(s); 8261 if (isIDFIRST_lazy_if(s,UTF)) { 8262 #ifdef PERL_MAD 8263 char* start = s; 8264 #endif 8265 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); 8266 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3)) 8267 { 8268 if (!FEATURE_LEXSUBS_IS_ENABLED) 8269 Perl_croak(aTHX_ 8270 "Experimental \"%s\" subs not enabled", 8271 tmp == KEY_my ? "my" : 8272 tmp == KEY_state ? "state" : "our"); 8273 Perl_ck_warner_d(aTHX_ 8274 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS), 8275 "The lexical_subs feature is experimental"); 8276 goto really_sub; 8277 } 8278 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len); 8279 if (!PL_in_my_stash) { 8280 char tmpbuf[1024]; 8281 PL_bufptr = s; 8282 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf); 8283 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0); 8284 } 8285 #ifdef PERL_MAD 8286 if (PL_madskills) { /* just add type to declarator token */ 8287 sv_catsv(PL_thistoken, PL_nextwhite); 8288 PL_nextwhite = 0; 8289 sv_catpvn(PL_thistoken, start, s - start); 8290 } 8291 #endif 8292 } 8293 pl_yylval.ival = 1; 8294 OPERATOR(MY); 8295 8296 case KEY_next: 8297 PL_expect = XOPERATOR; 8298 s = force_word(s,WORD,TRUE,FALSE); 8299 LOOPX(OP_NEXT); 8300 8301 case KEY_ne: 8302 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8303 return REPORT(0); 8304 Eop(OP_SNE); 8305 8306 case KEY_no: 8307 s = tokenize_use(0, s); 8308 TERM(USE); 8309 8310 case KEY_not: 8311 if (*s == '(' || (s = SKIPSPACE1(s), *s == '(')) 8312 FUN1(OP_NOT); 8313 else { 8314 if (!PL_lex_allbrackets && 8315 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 8316 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 8317 OPERATOR(NOTOP); 8318 } 8319 8320 case KEY_open: 8321 s = SKIPSPACE1(s); 8322 if (isIDFIRST_lazy_if(s,UTF)) { 8323 const char *t; 8324 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, 8325 &len); 8326 for (t=d; isSPACE(*t);) 8327 t++; 8328 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) 8329 /* [perl #16184] */ 8330 && !(t[0] == '=' && t[1] == '>') 8331 && !(t[0] == ':' && t[1] == ':') 8332 && !keyword(s, d-s, 0) 8333 ) { 8334 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), 8335 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")", 8336 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s)); 8337 } 8338 } 8339 LOP(OP_OPEN,XTERM); 8340 8341 case KEY_or: 8342 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 8343 return REPORT(0); 8344 pl_yylval.ival = OP_OR; 8345 OPERATOR(OROP); 8346 8347 case KEY_ord: 8348 UNI(OP_ORD); 8349 8350 case KEY_oct: 8351 UNI(OP_OCT); 8352 8353 case KEY_opendir: 8354 LOP(OP_OPEN_DIR,XTERM); 8355 8356 case KEY_print: 8357 checkcomma(s,PL_tokenbuf,"filehandle"); 8358 LOP(OP_PRINT,XREF); 8359 8360 case KEY_printf: 8361 checkcomma(s,PL_tokenbuf,"filehandle"); 8362 LOP(OP_PRTF,XREF); 8363 8364 case KEY_prototype: 8365 UNI(OP_PROTOTYPE); 8366 8367 case KEY_push: 8368 LOP(OP_PUSH,XTERM); 8369 8370 case KEY_pop: 8371 UNIDOR(OP_POP); 8372 8373 case KEY_pos: 8374 UNIDOR(OP_POS); 8375 8376 case KEY_pack: 8377 LOP(OP_PACK,XTERM); 8378 8379 case KEY_package: 8380 s = force_word(s,WORD,FALSE,TRUE); 8381 s = SKIPSPACE1(s); 8382 s = force_strict_version(s); 8383 PL_lex_expect = XBLOCK; 8384 OPERATOR(PACKAGE); 8385 8386 case KEY_pipe: 8387 LOP(OP_PIPE_OP,XTERM); 8388 8389 case KEY_q: 8390 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 8391 if (!s) 8392 missingterm(NULL); 8393 COPLINE_SET_FROM_MULTI_END; 8394 pl_yylval.ival = OP_CONST; 8395 TERM(sublex_start()); 8396 8397 case KEY_quotemeta: 8398 UNI(OP_QUOTEMETA); 8399 8400 case KEY_qw: { 8401 OP *words = NULL; 8402 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 8403 if (!s) 8404 missingterm(NULL); 8405 COPLINE_SET_FROM_MULTI_END; 8406 PL_expect = XOPERATOR; 8407 if (SvCUR(PL_lex_stuff)) { 8408 int warned_comma = !ckWARN(WARN_QW); 8409 int warned_comment = warned_comma; 8410 d = SvPV_force(PL_lex_stuff, len); 8411 while (len) { 8412 for (; isSPACE(*d) && len; --len, ++d) 8413 /**/; 8414 if (len) { 8415 SV *sv; 8416 const char *b = d; 8417 if (!warned_comma || !warned_comment) { 8418 for (; !isSPACE(*d) && len; --len, ++d) { 8419 if (!warned_comma && *d == ',') { 8420 Perl_warner(aTHX_ packWARN(WARN_QW), 8421 "Possible attempt to separate words with commas"); 8422 ++warned_comma; 8423 } 8424 else if (!warned_comment && *d == '#') { 8425 Perl_warner(aTHX_ packWARN(WARN_QW), 8426 "Possible attempt to put comments in qw() list"); 8427 ++warned_comment; 8428 } 8429 } 8430 } 8431 else { 8432 for (; !isSPACE(*d) && len; --len, ++d) 8433 /**/; 8434 } 8435 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff)); 8436 words = op_append_elem(OP_LIST, words, 8437 newSVOP(OP_CONST, 0, tokeq(sv))); 8438 } 8439 } 8440 } 8441 if (!words) 8442 words = newNULLLIST(); 8443 if (PL_lex_stuff) { 8444 SvREFCNT_dec(PL_lex_stuff); 8445 PL_lex_stuff = NULL; 8446 } 8447 PL_expect = XOPERATOR; 8448 pl_yylval.opval = sawparens(words); 8449 TOKEN(QWLIST); 8450 } 8451 8452 case KEY_qq: 8453 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 8454 if (!s) 8455 missingterm(NULL); 8456 pl_yylval.ival = OP_STRINGIFY; 8457 if (SvIVX(PL_lex_stuff) == '\'') 8458 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */ 8459 TERM(sublex_start()); 8460 8461 case KEY_qr: 8462 s = scan_pat(s,OP_QR); 8463 TERM(sublex_start()); 8464 8465 case KEY_qx: 8466 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 8467 if (!s) 8468 missingterm(NULL); 8469 pl_yylval.ival = OP_BACKTICK; 8470 TERM(sublex_start()); 8471 8472 case KEY_return: 8473 OLDLOP(OP_RETURN); 8474 8475 case KEY_require: 8476 s = SKIPSPACE1(s); 8477 PL_expect = XOPERATOR; 8478 if (isDIGIT(*s)) { 8479 s = force_version(s, FALSE); 8480 } 8481 else if (*s != 'v' || !isDIGIT(s[1]) 8482 || (s = force_version(s, TRUE), *s == 'v')) 8483 { 8484 *PL_tokenbuf = '\0'; 8485 s = force_word(s,WORD,TRUE,TRUE); 8486 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF)) 8487 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), 8488 GV_ADD | (UTF ? SVf_UTF8 : 0)); 8489 else if (*s == '<') 8490 yyerror("<> should be quotes"); 8491 } 8492 if (orig_keyword == KEY_require) { 8493 orig_keyword = 0; 8494 pl_yylval.ival = 1; 8495 } 8496 else 8497 pl_yylval.ival = 0; 8498 PL_expect = XTERM; 8499 PL_bufptr = s; 8500 PL_last_uni = PL_oldbufptr; 8501 PL_last_lop_op = OP_REQUIRE; 8502 s = skipspace(s); 8503 return REPORT( (int)REQUIRE ); 8504 8505 case KEY_reset: 8506 UNI(OP_RESET); 8507 8508 case KEY_redo: 8509 PL_expect = XOPERATOR; 8510 s = force_word(s,WORD,TRUE,FALSE); 8511 LOOPX(OP_REDO); 8512 8513 case KEY_rename: 8514 LOP(OP_RENAME,XTERM); 8515 8516 case KEY_rand: 8517 UNI(OP_RAND); 8518 8519 case KEY_rmdir: 8520 UNI(OP_RMDIR); 8521 8522 case KEY_rindex: 8523 LOP(OP_RINDEX,XTERM); 8524 8525 case KEY_read: 8526 LOP(OP_READ,XTERM); 8527 8528 case KEY_readdir: 8529 UNI(OP_READDIR); 8530 8531 case KEY_readline: 8532 UNIDOR(OP_READLINE); 8533 8534 case KEY_readpipe: 8535 UNIDOR(OP_BACKTICK); 8536 8537 case KEY_rewinddir: 8538 UNI(OP_REWINDDIR); 8539 8540 case KEY_recv: 8541 LOP(OP_RECV,XTERM); 8542 8543 case KEY_reverse: 8544 LOP(OP_REVERSE,XTERM); 8545 8546 case KEY_readlink: 8547 UNIDOR(OP_READLINK); 8548 8549 case KEY_ref: 8550 UNI(OP_REF); 8551 8552 case KEY_s: 8553 s = scan_subst(s); 8554 if (pl_yylval.opval) 8555 TERM(sublex_start()); 8556 else 8557 TOKEN(1); /* force error */ 8558 8559 case KEY_say: 8560 checkcomma(s,PL_tokenbuf,"filehandle"); 8561 LOP(OP_SAY,XREF); 8562 8563 case KEY_chomp: 8564 UNI(OP_CHOMP); 8565 8566 case KEY_scalar: 8567 UNI(OP_SCALAR); 8568 8569 case KEY_select: 8570 LOP(OP_SELECT,XTERM); 8571 8572 case KEY_seek: 8573 LOP(OP_SEEK,XTERM); 8574 8575 case KEY_semctl: 8576 LOP(OP_SEMCTL,XTERM); 8577 8578 case KEY_semget: 8579 LOP(OP_SEMGET,XTERM); 8580 8581 case KEY_semop: 8582 LOP(OP_SEMOP,XTERM); 8583 8584 case KEY_send: 8585 LOP(OP_SEND,XTERM); 8586 8587 case KEY_setpgrp: 8588 LOP(OP_SETPGRP,XTERM); 8589 8590 case KEY_setpriority: 8591 LOP(OP_SETPRIORITY,XTERM); 8592 8593 case KEY_sethostent: 8594 UNI(OP_SHOSTENT); 8595 8596 case KEY_setnetent: 8597 UNI(OP_SNETENT); 8598 8599 case KEY_setservent: 8600 UNI(OP_SSERVENT); 8601 8602 case KEY_setprotoent: 8603 UNI(OP_SPROTOENT); 8604 8605 case KEY_setpwent: 8606 FUN0(OP_SPWENT); 8607 8608 case KEY_setgrent: 8609 FUN0(OP_SGRENT); 8610 8611 case KEY_seekdir: 8612 LOP(OP_SEEKDIR,XTERM); 8613 8614 case KEY_setsockopt: 8615 LOP(OP_SSOCKOPT,XTERM); 8616 8617 case KEY_shift: 8618 UNIDOR(OP_SHIFT); 8619 8620 case KEY_shmctl: 8621 LOP(OP_SHMCTL,XTERM); 8622 8623 case KEY_shmget: 8624 LOP(OP_SHMGET,XTERM); 8625 8626 case KEY_shmread: 8627 LOP(OP_SHMREAD,XTERM); 8628 8629 case KEY_shmwrite: 8630 LOP(OP_SHMWRITE,XTERM); 8631 8632 case KEY_shutdown: 8633 LOP(OP_SHUTDOWN,XTERM); 8634 8635 case KEY_sin: 8636 UNI(OP_SIN); 8637 8638 case KEY_sleep: 8639 UNI(OP_SLEEP); 8640 8641 case KEY_socket: 8642 LOP(OP_SOCKET,XTERM); 8643 8644 case KEY_socketpair: 8645 LOP(OP_SOCKPAIR,XTERM); 8646 8647 case KEY_sort: 8648 checkcomma(s,PL_tokenbuf,"subroutine name"); 8649 s = SKIPSPACE1(s); 8650 PL_expect = XTERM; 8651 s = force_word(s,WORD,TRUE,TRUE); 8652 LOP(OP_SORT,XREF); 8653 8654 case KEY_split: 8655 LOP(OP_SPLIT,XTERM); 8656 8657 case KEY_sprintf: 8658 LOP(OP_SPRINTF,XTERM); 8659 8660 case KEY_splice: 8661 LOP(OP_SPLICE,XTERM); 8662 8663 case KEY_sqrt: 8664 UNI(OP_SQRT); 8665 8666 case KEY_srand: 8667 UNI(OP_SRAND); 8668 8669 case KEY_stat: 8670 UNI(OP_STAT); 8671 8672 case KEY_study: 8673 UNI(OP_STUDY); 8674 8675 case KEY_substr: 8676 LOP(OP_SUBSTR,XTERM); 8677 8678 case KEY_format: 8679 case KEY_sub: 8680 really_sub: 8681 { 8682 char * const tmpbuf = PL_tokenbuf + 1; 8683 expectation attrful; 8684 bool have_name, have_proto; 8685 const int key = tmp; 8686 #ifndef PERL_MAD 8687 SV *format_name = NULL; 8688 #endif 8689 8690 #ifdef PERL_MAD 8691 SV *tmpwhite = 0; 8692 8693 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart; 8694 SV *subtoken = PL_madskills 8695 ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr)) 8696 : NULL; 8697 PL_thistoken = 0; 8698 8699 d = s; 8700 s = SKIPSPACE2(s,tmpwhite); 8701 #else 8702 d = s; 8703 s = skipspace(s); 8704 #endif 8705 8706 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' || 8707 (*s == ':' && s[1] == ':')) 8708 { 8709 #ifdef PERL_MAD 8710 SV *nametoke = NULL; 8711 #endif 8712 8713 PL_expect = XBLOCK; 8714 attrful = XATTRBLOCK; 8715 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE, 8716 &len); 8717 #ifdef PERL_MAD 8718 if (PL_madskills) 8719 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr)); 8720 #else 8721 if (key == KEY_format) 8722 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s); 8723 #endif 8724 *PL_tokenbuf = '&'; 8725 if (memchr(tmpbuf, ':', len) || key != KEY_sub 8726 || pad_findmy_pvn( 8727 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0 8728 ) != NOT_IN_PAD) 8729 sv_setpvn(PL_subname, tmpbuf, len); 8730 else { 8731 sv_setsv(PL_subname,PL_curstname); 8732 sv_catpvs(PL_subname,"::"); 8733 sv_catpvn(PL_subname,tmpbuf,len); 8734 } 8735 if (SvUTF8(PL_linestr)) 8736 SvUTF8_on(PL_subname); 8737 have_name = TRUE; 8738 8739 8740 #ifdef PERL_MAD 8741 start_force(0); 8742 CURMAD('X', nametoke); 8743 CURMAD('_', tmpwhite); 8744 force_ident_maybe_lex('&'); 8745 8746 s = SKIPSPACE2(d,tmpwhite); 8747 #else 8748 s = skipspace(d); 8749 #endif 8750 } 8751 else { 8752 if (key == KEY_my || key == KEY_our || key==KEY_state) 8753 { 8754 *d = '\0'; 8755 /* diag_listed_as: Missing name in "%s sub" */ 8756 Perl_croak(aTHX_ 8757 "Missing name in \"%s\"", PL_bufptr); 8758 } 8759 PL_expect = XTERMBLOCK; 8760 attrful = XATTRTERM; 8761 sv_setpvs(PL_subname,"?"); 8762 have_name = FALSE; 8763 } 8764 8765 if (key == KEY_format) { 8766 #ifdef PERL_MAD 8767 PL_thistoken = subtoken; 8768 s = d; 8769 #else 8770 if (format_name) { 8771 start_force(PL_curforce); 8772 NEXTVAL_NEXTTOKE.opval 8773 = (OP*)newSVOP(OP_CONST,0, format_name); 8774 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 8775 force_next(WORD); 8776 } 8777 #endif 8778 PREBLOCK(FORMAT); 8779 } 8780 8781 /* Look for a prototype */ 8782 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) { 8783 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 8784 COPLINE_SET_FROM_MULTI_END; 8785 if (!s) 8786 Perl_croak(aTHX_ "Prototype not terminated"); 8787 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO)); 8788 have_proto = TRUE; 8789 8790 #ifdef PERL_MAD 8791 start_force(0); 8792 CURMAD('q', PL_thisopen); 8793 CURMAD('_', tmpwhite); 8794 CURMAD('=', PL_thisstuff); 8795 CURMAD('Q', PL_thisclose); 8796 NEXTVAL_NEXTTOKE.opval = 8797 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff); 8798 PL_lex_stuff = NULL; 8799 force_next(THING); 8800 8801 s = SKIPSPACE2(s,tmpwhite); 8802 #else 8803 s = skipspace(s); 8804 #endif 8805 } 8806 else 8807 have_proto = FALSE; 8808 8809 if (*s == ':' && s[1] != ':') 8810 PL_expect = attrful; 8811 else if ((*s != '{' && *s != '(') && key == KEY_sub) { 8812 if (!have_name) 8813 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine"); 8814 else if (*s != ';' && *s != '}') 8815 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname)); 8816 } 8817 8818 #ifdef PERL_MAD 8819 start_force(0); 8820 if (tmpwhite) { 8821 if (PL_madskills) 8822 curmad('^', newSVpvs("")); 8823 CURMAD('_', tmpwhite); 8824 } 8825 force_next(0); 8826 8827 PL_thistoken = subtoken; 8828 PERL_UNUSED_VAR(have_proto); 8829 #else 8830 if (have_proto) { 8831 NEXTVAL_NEXTTOKE.opval = 8832 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff); 8833 PL_lex_stuff = NULL; 8834 force_next(THING); 8835 } 8836 #endif 8837 if (!have_name) { 8838 if (PL_curstash) 8839 sv_setpvs(PL_subname, "__ANON__"); 8840 else 8841 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 8842 TOKEN(ANONSUB); 8843 } 8844 #ifndef PERL_MAD 8845 force_ident_maybe_lex('&'); 8846 #endif 8847 TOKEN(SUB); 8848 } 8849 8850 case KEY_system: 8851 LOP(OP_SYSTEM,XREF); 8852 8853 case KEY_symlink: 8854 LOP(OP_SYMLINK,XTERM); 8855 8856 case KEY_syscall: 8857 LOP(OP_SYSCALL,XTERM); 8858 8859 case KEY_sysopen: 8860 LOP(OP_SYSOPEN,XTERM); 8861 8862 case KEY_sysseek: 8863 LOP(OP_SYSSEEK,XTERM); 8864 8865 case KEY_sysread: 8866 LOP(OP_SYSREAD,XTERM); 8867 8868 case KEY_syswrite: 8869 LOP(OP_SYSWRITE,XTERM); 8870 8871 case KEY_tr: 8872 case KEY_y: 8873 s = scan_trans(s); 8874 TERM(sublex_start()); 8875 8876 case KEY_tell: 8877 UNI(OP_TELL); 8878 8879 case KEY_telldir: 8880 UNI(OP_TELLDIR); 8881 8882 case KEY_tie: 8883 LOP(OP_TIE,XTERM); 8884 8885 case KEY_tied: 8886 UNI(OP_TIED); 8887 8888 case KEY_time: 8889 FUN0(OP_TIME); 8890 8891 case KEY_times: 8892 FUN0(OP_TMS); 8893 8894 case KEY_truncate: 8895 LOP(OP_TRUNCATE,XTERM); 8896 8897 case KEY_uc: 8898 UNI(OP_UC); 8899 8900 case KEY_ucfirst: 8901 UNI(OP_UCFIRST); 8902 8903 case KEY_untie: 8904 UNI(OP_UNTIE); 8905 8906 case KEY_until: 8907 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8908 return REPORT(0); 8909 pl_yylval.ival = CopLINE(PL_curcop); 8910 OPERATOR(UNTIL); 8911 8912 case KEY_unless: 8913 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8914 return REPORT(0); 8915 pl_yylval.ival = CopLINE(PL_curcop); 8916 OPERATOR(UNLESS); 8917 8918 case KEY_unlink: 8919 LOP(OP_UNLINK,XTERM); 8920 8921 case KEY_undef: 8922 UNIDOR(OP_UNDEF); 8923 8924 case KEY_unpack: 8925 LOP(OP_UNPACK,XTERM); 8926 8927 case KEY_utime: 8928 LOP(OP_UTIME,XTERM); 8929 8930 case KEY_umask: 8931 UNIDOR(OP_UMASK); 8932 8933 case KEY_unshift: 8934 LOP(OP_UNSHIFT,XTERM); 8935 8936 case KEY_use: 8937 s = tokenize_use(1, s); 8938 OPERATOR(USE); 8939 8940 case KEY_values: 8941 UNI(OP_VALUES); 8942 8943 case KEY_vec: 8944 LOP(OP_VEC,XTERM); 8945 8946 case KEY_when: 8947 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8948 return REPORT(0); 8949 pl_yylval.ival = CopLINE(PL_curcop); 8950 Perl_ck_warner_d(aTHX_ 8951 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 8952 "when is experimental"); 8953 OPERATOR(WHEN); 8954 8955 case KEY_while: 8956 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8957 return REPORT(0); 8958 pl_yylval.ival = CopLINE(PL_curcop); 8959 OPERATOR(WHILE); 8960 8961 case KEY_warn: 8962 PL_hints |= HINT_BLOCK_SCOPE; 8963 LOP(OP_WARN,XTERM); 8964 8965 case KEY_wait: 8966 FUN0(OP_WAIT); 8967 8968 case KEY_waitpid: 8969 LOP(OP_WAITPID,XTERM); 8970 8971 case KEY_wantarray: 8972 FUN0(OP_WANTARRAY); 8973 8974 case KEY_write: 8975 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and 8976 * we use the same number on EBCDIC */ 8977 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV); 8978 UNI(OP_ENTERWRITE); 8979 8980 case KEY_x: 8981 if (PL_expect == XOPERATOR) { 8982 if (*s == '=' && !PL_lex_allbrackets && 8983 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 8984 return REPORT(0); 8985 Mop(OP_REPEAT); 8986 } 8987 check_uni(); 8988 goto just_a_word; 8989 8990 case KEY_xor: 8991 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 8992 return REPORT(0); 8993 pl_yylval.ival = OP_XOR; 8994 OPERATOR(OROP); 8995 } 8996 }} 8997 } 8998 8999 /* 9000 S_pending_ident 9001 9002 Looks up an identifier in the pad or in a package 9003 9004 Returns: 9005 PRIVATEREF if this is a lexical name. 9006 WORD if this belongs to a package. 9007 9008 Structure: 9009 if we're in a my declaration 9010 croak if they tried to say my($foo::bar) 9011 build the ops for a my() declaration 9012 if it's an access to a my() variable 9013 build ops for access to a my() variable 9014 if in a dq string, and they've said @foo and we can't find @foo 9015 warn 9016 build ops for a bareword 9017 */ 9018 9019 static int 9020 S_pending_ident(pTHX) 9021 { 9022 dVAR; 9023 PADOFFSET tmp = 0; 9024 const char pit = (char)pl_yylval.ival; 9025 const STRLEN tokenbuf_len = strlen(PL_tokenbuf); 9026 /* All routes through this function want to know if there is a colon. */ 9027 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len); 9028 9029 DEBUG_T({ PerlIO_printf(Perl_debug_log, 9030 "### Pending identifier '%s'\n", PL_tokenbuf); }); 9031 9032 /* if we're in a my(), we can't allow dynamics here. 9033 $foo'bar has already been turned into $foo::bar, so 9034 just check for colons. 9035 9036 if it's a legal name, the OP is a PADANY. 9037 */ 9038 if (PL_in_my) { 9039 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */ 9040 if (has_colon) 9041 yyerror_pv(Perl_form(aTHX_ "No package name allowed for " 9042 "variable %s in \"our\"", 9043 PL_tokenbuf), UTF ? SVf_UTF8 : 0); 9044 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0); 9045 } 9046 else { 9047 if (has_colon) { 9048 /* PL_no_myglob is constant */ 9049 GCC_DIAG_IGNORE(-Wformat-nonliteral); 9050 yyerror_pv(Perl_form(aTHX_ PL_no_myglob, 9051 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf), 9052 UTF ? SVf_UTF8 : 0); 9053 GCC_DIAG_RESTORE; 9054 } 9055 9056 pl_yylval.opval = newOP(OP_PADANY, 0); 9057 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 9058 UTF ? SVf_UTF8 : 0); 9059 return PRIVATEREF; 9060 } 9061 } 9062 9063 /* 9064 build the ops for accesses to a my() variable. 9065 */ 9066 9067 if (!has_colon) { 9068 if (!PL_in_my) 9069 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len, 9070 UTF ? SVf_UTF8 : 0); 9071 if (tmp != NOT_IN_PAD) { 9072 /* might be an "our" variable" */ 9073 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 9074 /* build ops for a bareword */ 9075 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 9076 HEK * const stashname = HvNAME_HEK(stash); 9077 SV * const sym = newSVhek(stashname); 9078 sv_catpvs(sym, "::"); 9079 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES )); 9080 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym); 9081 pl_yylval.opval->op_private = OPpCONST_ENTERED; 9082 if (pit != '&') 9083 gv_fetchsv(sym, 9084 (PL_in_eval 9085 ? (GV_ADDMULTI | GV_ADDINEVAL) 9086 : GV_ADDMULTI 9087 ), 9088 ((PL_tokenbuf[0] == '$') ? SVt_PV 9089 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 9090 : SVt_PVHV)); 9091 return WORD; 9092 } 9093 9094 pl_yylval.opval = newOP(OP_PADANY, 0); 9095 pl_yylval.opval->op_targ = tmp; 9096 return PRIVATEREF; 9097 } 9098 } 9099 9100 /* 9101 Whine if they've said @foo in a doublequoted string, 9102 and @foo isn't a variable we can find in the symbol 9103 table. 9104 */ 9105 if (ckWARN(WARN_AMBIGUOUS) && 9106 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) { 9107 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 9108 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV); 9109 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) 9110 /* DO NOT warn for @- and @+ */ 9111 && !( PL_tokenbuf[2] == '\0' && 9112 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' )) 9113 ) 9114 { 9115 /* Downgraded from fatal to warning 20000522 mjd */ 9116 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 9117 "Possible unintended interpolation of %"UTF8f 9118 " in string", 9119 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf)); 9120 } 9121 } 9122 9123 /* build ops for a bareword */ 9124 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, 9125 newSVpvn_flags(PL_tokenbuf + 1, 9126 tokenbuf_len - 1, 9127 UTF ? SVf_UTF8 : 0 )); 9128 pl_yylval.opval->op_private = OPpCONST_ENTERED; 9129 if (pit != '&') 9130 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1, 9131 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD) 9132 | ( UTF ? SVf_UTF8 : 0 ), 9133 ((PL_tokenbuf[0] == '$') ? SVt_PV 9134 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 9135 : SVt_PVHV)); 9136 return WORD; 9137 } 9138 9139 STATIC void 9140 S_checkcomma(pTHX_ const char *s, const char *name, const char *what) 9141 { 9142 dVAR; 9143 9144 PERL_ARGS_ASSERT_CHECKCOMMA; 9145 9146 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */ 9147 if (ckWARN(WARN_SYNTAX)) { 9148 int level = 1; 9149 const char *w; 9150 for (w = s+2; *w && level; w++) { 9151 if (*w == '(') 9152 ++level; 9153 else if (*w == ')') 9154 --level; 9155 } 9156 while (isSPACE(*w)) 9157 ++w; 9158 /* the list of chars below is for end of statements or 9159 * block / parens, boolean operators (&&, ||, //) and branch 9160 * constructs (or, and, if, until, unless, while, err, for). 9161 * Not a very solid hack... */ 9162 if (!*w || !strchr(";&/|})]oaiuwef!=", *w)) 9163 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 9164 "%s (...) interpreted as function",name); 9165 } 9166 } 9167 while (s < PL_bufend && isSPACE(*s)) 9168 s++; 9169 if (*s == '(') 9170 s++; 9171 while (s < PL_bufend && isSPACE(*s)) 9172 s++; 9173 if (isIDFIRST_lazy_if(s,UTF)) { 9174 const char * const w = s; 9175 s += UTF ? UTF8SKIP(s) : 1; 9176 while (isWORDCHAR_lazy_if(s,UTF)) 9177 s += UTF ? UTF8SKIP(s) : 1; 9178 while (s < PL_bufend && isSPACE(*s)) 9179 s++; 9180 if (*s == ',') { 9181 GV* gv; 9182 if (keyword(w, s - w, 0)) 9183 return; 9184 9185 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); 9186 if (gv && GvCVu(gv)) 9187 return; 9188 Perl_croak(aTHX_ "No comma allowed after %s", what); 9189 } 9190 } 9191 } 9192 9193 /* S_new_constant(): do any overload::constant lookup. 9194 9195 Either returns sv, or mortalizes/frees sv and returns a new SV*. 9196 Best used as sv=new_constant(..., sv, ...). 9197 If s, pv are NULL, calls subroutine with one argument, 9198 and <type> is used with error messages only. 9199 <type> is assumed to be well formed UTF-8 */ 9200 9201 STATIC SV * 9202 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, 9203 SV *sv, SV *pv, const char *type, STRLEN typelen) 9204 { 9205 dVAR; dSP; 9206 HV * table = GvHV(PL_hintgv); /* ^H */ 9207 SV *res; 9208 SV *errsv = NULL; 9209 SV **cvp; 9210 SV *cv, *typesv; 9211 const char *why1 = "", *why2 = "", *why3 = ""; 9212 9213 PERL_ARGS_ASSERT_NEW_CONSTANT; 9214 /* We assume that this is true: */ 9215 if (*key == 'c') { assert (strEQ(key, "charnames")); } 9216 assert(type || s); 9217 9218 /* charnames doesn't work well if there have been errors found */ 9219 if (PL_error_count > 0 && *key == 'c') 9220 { 9221 SvREFCNT_dec_NN(sv); 9222 return &PL_sv_undef; 9223 } 9224 9225 sv_2mortal(sv); /* Parent created it permanently */ 9226 if (!table 9227 || ! (PL_hints & HINT_LOCALIZE_HH) 9228 || ! (cvp = hv_fetch(table, key, keylen, FALSE)) 9229 || ! SvOK(*cvp)) 9230 { 9231 char *msg; 9232 9233 /* Here haven't found what we're looking for. If it is charnames, 9234 * perhaps it needs to be loaded. Try doing that before giving up */ 9235 if (*key == 'c') { 9236 Perl_load_module(aTHX_ 9237 0, 9238 newSVpvs("_charnames"), 9239 /* version parameter; no need to specify it, as if 9240 * we get too early a version, will fail anyway, 9241 * not being able to find '_charnames' */ 9242 NULL, 9243 newSVpvs(":full"), 9244 newSVpvs(":short"), 9245 NULL); 9246 assert(sp == PL_stack_sp); 9247 table = GvHV(PL_hintgv); 9248 if (table 9249 && (PL_hints & HINT_LOCALIZE_HH) 9250 && (cvp = hv_fetch(table, key, keylen, FALSE)) 9251 && SvOK(*cvp)) 9252 { 9253 goto now_ok; 9254 } 9255 } 9256 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) { 9257 msg = Perl_form(aTHX_ 9258 "Constant(%.*s) unknown", 9259 (int)(type ? typelen : len), 9260 (type ? type: s)); 9261 } 9262 else { 9263 why1 = "$^H{"; 9264 why2 = key; 9265 why3 = "} is not defined"; 9266 report: 9267 if (*key == 'c') { 9268 msg = Perl_form(aTHX_ 9269 /* The +3 is for '\N{'; -4 for that, plus '}' */ 9270 "Unknown charname '%.*s'", (int)typelen - 4, type + 3 9271 ); 9272 } 9273 else { 9274 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s", 9275 (int)(type ? typelen : len), 9276 (type ? type: s), why1, why2, why3); 9277 } 9278 } 9279 yyerror_pv(msg, UTF ? SVf_UTF8 : 0); 9280 return SvREFCNT_inc_simple_NN(sv); 9281 } 9282 now_ok: 9283 cv = *cvp; 9284 if (!pv && s) 9285 pv = newSVpvn_flags(s, len, SVs_TEMP); 9286 if (type && pv) 9287 typesv = newSVpvn_flags(type, typelen, SVs_TEMP); 9288 else 9289 typesv = &PL_sv_undef; 9290 9291 PUSHSTACKi(PERLSI_OVERLOAD); 9292 ENTER ; 9293 SAVETMPS; 9294 9295 PUSHMARK(SP) ; 9296 EXTEND(sp, 3); 9297 if (pv) 9298 PUSHs(pv); 9299 PUSHs(sv); 9300 if (pv) 9301 PUSHs(typesv); 9302 PUTBACK; 9303 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL)); 9304 9305 SPAGAIN ; 9306 9307 /* Check the eval first */ 9308 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) { 9309 STRLEN errlen; 9310 const char * errstr; 9311 sv_catpvs(errsv, "Propagated"); 9312 errstr = SvPV_const(errsv, errlen); 9313 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */ 9314 (void)POPs; 9315 res = SvREFCNT_inc_simple_NN(sv); 9316 } 9317 else { 9318 res = POPs; 9319 SvREFCNT_inc_simple_void_NN(res); 9320 } 9321 9322 PUTBACK ; 9323 FREETMPS ; 9324 LEAVE ; 9325 POPSTACK; 9326 9327 if (!SvOK(res)) { 9328 why1 = "Call to &{$^H{"; 9329 why2 = key; 9330 why3 = "}} did not return a defined value"; 9331 sv = res; 9332 (void)sv_2mortal(sv); 9333 goto report; 9334 } 9335 9336 return res; 9337 } 9338 9339 PERL_STATIC_INLINE void 9340 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) { 9341 dVAR; 9342 PERL_ARGS_ASSERT_PARSE_IDENT; 9343 9344 for (;;) { 9345 if (*d >= e) 9346 Perl_croak(aTHX_ "%s", ident_too_long); 9347 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) { 9348 /* The UTF-8 case must come first, otherwise things 9349 * like c\N{COMBINING TILDE} would start failing, as the 9350 * isWORDCHAR_A case below would gobble the 'c' up. 9351 */ 9352 9353 char *t = *s + UTF8SKIP(*s); 9354 while (isIDCONT_utf8((U8*)t)) 9355 t += UTF8SKIP(t); 9356 if (*d + (t - *s) > e) 9357 Perl_croak(aTHX_ "%s", ident_too_long); 9358 Copy(*s, *d, t - *s, char); 9359 *d += t - *s; 9360 *s = t; 9361 } 9362 else if ( isWORDCHAR_A(**s) ) { 9363 do { 9364 *(*d)++ = *(*s)++; 9365 } while (isWORDCHAR_A(**s) && *d < e); 9366 } 9367 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) { 9368 *(*d)++ = ':'; 9369 *(*d)++ = ':'; 9370 (*s)++; 9371 } 9372 else if (allow_package && **s == ':' && (*s)[1] == ':' 9373 /* Disallow things like Foo::$bar. For the curious, this is 9374 * the code path that triggers the "Bad name after" warning 9375 * when looking for barewords. 9376 */ 9377 && (*s)[2] != '$') { 9378 *(*d)++ = *(*s)++; 9379 *(*d)++ = *(*s)++; 9380 } 9381 else 9382 break; 9383 } 9384 return; 9385 } 9386 9387 /* Returns a NUL terminated string, with the length of the string written to 9388 *slp 9389 */ 9390 STATIC char * 9391 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) 9392 { 9393 dVAR; 9394 char *d = dest; 9395 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 9396 bool is_utf8 = cBOOL(UTF); 9397 9398 PERL_ARGS_ASSERT_SCAN_WORD; 9399 9400 parse_ident(&s, &d, e, allow_package, is_utf8); 9401 *d = '\0'; 9402 *slp = d - dest; 9403 return s; 9404 } 9405 9406 STATIC char * 9407 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) 9408 { 9409 dVAR; 9410 I32 herelines = PL_parser->herelines; 9411 SSize_t bracket = -1; 9412 char funny = *s++; 9413 char *d = dest; 9414 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 9415 bool is_utf8 = cBOOL(UTF); 9416 I32 orig_copline = 0, tmp_copline = 0; 9417 9418 PERL_ARGS_ASSERT_SCAN_IDENT; 9419 9420 if (isSPACE(*s)) 9421 s = PEEKSPACE(s); 9422 if (isDIGIT(*s)) { 9423 while (isDIGIT(*s)) { 9424 if (d >= e) 9425 Perl_croak(aTHX_ "%s", ident_too_long); 9426 *d++ = *s++; 9427 } 9428 } 9429 else { 9430 parse_ident(&s, &d, e, 1, is_utf8); 9431 } 9432 *d = '\0'; 9433 d = dest; 9434 if (*d) { 9435 /* Either a digit variable, or parse_ident() found an identifier 9436 (anything valid as a bareword), so job done and return. */ 9437 if (PL_lex_state != LEX_NORMAL) 9438 PL_lex_state = LEX_INTERPENDMAYBE; 9439 return s; 9440 } 9441 if (*s == '$' && s[1] && 9442 (isIDFIRST_lazy_if(s+1,is_utf8) 9443 || isDIGIT_A((U8)s[1]) 9444 || s[1] == '$' 9445 || s[1] == '{' 9446 || strnEQ(s+1,"::",2)) ) 9447 { 9448 /* Dereferencing a value in a scalar variable. 9449 The alternatives are different syntaxes for a scalar variable. 9450 Using ' as a leading package separator isn't allowed. :: is. */ 9451 return s; 9452 } 9453 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */ 9454 if (*s == '{') { 9455 bracket = s - SvPVX(PL_linestr); 9456 s++; 9457 orig_copline = CopLINE(PL_curcop); 9458 if (s < PL_bufend && isSPACE(*s)) { 9459 s = PEEKSPACE(s); 9460 } 9461 } 9462 9463 /* Is the byte 'd' a legal single character identifier name? 'u' is true 9464 * iff Unicode semantics are to be used. The legal ones are any of: 9465 * a) ASCII digits 9466 * b) ASCII punctuation 9467 * c) When not under Unicode rules, any upper Latin1 character 9468 * d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally 9469 * been matched by \s on ASCII platforms. That is: \c?, plus 1-32, minus 9470 * the \s ones. */ 9471 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \ 9472 || isDIGIT_A((U8)(d)) \ 9473 || (!(u) && !isASCII((U8)(d))) \ 9474 || ((((U8)(d)) < 32) \ 9475 && (((((U8)(d)) >= 14) \ 9476 || (((U8)(d)) <= 8 && (d) != 0) \ 9477 || (((U8)(d)) == 13)))) \ 9478 || (((U8)(d)) == toCTRL('?'))) 9479 if (s < PL_bufend 9480 && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8))) 9481 { 9482 if ( isCNTRL_A((U8)*s) ) { 9483 deprecate("literal control characters in variable names"); 9484 } 9485 9486 if (is_utf8) { 9487 const STRLEN skip = UTF8SKIP(s); 9488 STRLEN i; 9489 d[skip] = '\0'; 9490 for ( i = 0; i < skip; i++ ) 9491 d[i] = *s++; 9492 } 9493 else { 9494 *d = *s++; 9495 d[1] = '\0'; 9496 } 9497 } 9498 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */ 9499 if (*d == '^' && *s && isCONTROLVAR(*s)) { 9500 *d = toCTRL(*s); 9501 s++; 9502 } 9503 /* Warn about ambiguous code after unary operators if {...} notation isn't 9504 used. There's no difference in ambiguity; it's merely a heuristic 9505 about when not to warn. */ 9506 else if (ck_uni && bracket == -1) 9507 check_uni(); 9508 if (bracket != -1) { 9509 /* If we were processing {...} notation then... */ 9510 if (isIDFIRST_lazy_if(d,is_utf8)) { 9511 /* if it starts as a valid identifier, assume that it is one. 9512 (the later check for } being at the expected point will trap 9513 cases where this doesn't pan out.) */ 9514 d += is_utf8 ? UTF8SKIP(d) : 1; 9515 parse_ident(&s, &d, e, 1, is_utf8); 9516 *d = '\0'; 9517 tmp_copline = CopLINE(PL_curcop); 9518 if (s < PL_bufend && isSPACE(*s)) { 9519 s = PEEKSPACE(s); 9520 } 9521 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { 9522 /* ${foo[0]} and ${foo{bar}} notation. */ 9523 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) { 9524 const char * const brack = 9525 (const char *) 9526 ((*s == '[') ? "[...]" : "{...}"); 9527 orig_copline = CopLINE(PL_curcop); 9528 CopLINE_set(PL_curcop, tmp_copline); 9529 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */ 9530 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 9531 "Ambiguous use of %c{%s%s} resolved to %c%s%s", 9532 funny, dest, brack, funny, dest, brack); 9533 CopLINE_set(PL_curcop, orig_copline); 9534 } 9535 bracket++; 9536 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK); 9537 PL_lex_allbrackets++; 9538 return s; 9539 } 9540 } 9541 /* Handle extended ${^Foo} variables 9542 * 1999-02-27 mjd-perl-patch@plover.com */ 9543 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */ 9544 && isWORDCHAR(*s)) 9545 { 9546 d++; 9547 while (isWORDCHAR(*s) && d < e) { 9548 *d++ = *s++; 9549 } 9550 if (d >= e) 9551 Perl_croak(aTHX_ "%s", ident_too_long); 9552 *d = '\0'; 9553 } 9554 9555 if ( !tmp_copline ) 9556 tmp_copline = CopLINE(PL_curcop); 9557 if (s < PL_bufend && isSPACE(*s)) { 9558 s = PEEKSPACE(s); 9559 } 9560 9561 /* Expect to find a closing } after consuming any trailing whitespace. 9562 */ 9563 if (*s == '}') { 9564 s++; 9565 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 9566 PL_lex_state = LEX_INTERPEND; 9567 PL_expect = XREF; 9568 } 9569 if (PL_lex_state == LEX_NORMAL) { 9570 if (ckWARN(WARN_AMBIGUOUS) && 9571 (keyword(dest, d - dest, 0) 9572 || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0))) 9573 { 9574 SV *tmp = newSVpvn_flags( dest, d - dest, 9575 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) ); 9576 if (funny == '#') 9577 funny = '@'; 9578 orig_copline = CopLINE(PL_curcop); 9579 CopLINE_set(PL_curcop, tmp_copline); 9580 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 9581 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf, 9582 funny, tmp, funny, tmp); 9583 CopLINE_set(PL_curcop, orig_copline); 9584 } 9585 } 9586 } 9587 else { 9588 /* Didn't find the closing } at the point we expected, so restore 9589 state such that the next thing to process is the opening { and */ 9590 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */ 9591 CopLINE_set(PL_curcop, orig_copline); 9592 PL_parser->herelines = herelines; 9593 *dest = '\0'; 9594 } 9595 } 9596 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s)) 9597 PL_lex_state = LEX_INTERPEND; 9598 return s; 9599 } 9600 9601 static bool 9602 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) { 9603 9604 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in 9605 * the parse starting at 's', based on the subset that are valid in this 9606 * context input to this routine in 'valid_flags'. Advances s. Returns 9607 * TRUE if the input should be treated as a valid flag, so the next char 9608 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon 9609 * first call on the current regex. This routine will set it to any 9610 * charset modifier found. The caller shouldn't change it. This way, 9611 * another charset modifier encountered in the parse can be detected as an 9612 * error, as we have decided to allow only one */ 9613 9614 const char c = **s; 9615 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1; 9616 9617 if ( charlen != 1 || ! strchr(valid_flags, c) ) { 9618 if (isWORDCHAR_lazy_if(*s, UTF)) { 9619 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s), 9620 UTF ? SVf_UTF8 : 0); 9621 (*s) += charlen; 9622 /* Pretend that it worked, so will continue processing before 9623 * dieing */ 9624 return TRUE; 9625 } 9626 return FALSE; 9627 } 9628 9629 switch (c) { 9630 9631 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl); 9632 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break; 9633 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break; 9634 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break; 9635 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break; 9636 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break; 9637 case LOCALE_PAT_MOD: 9638 if (*charset) { 9639 goto multiple_charsets; 9640 } 9641 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET); 9642 *charset = c; 9643 break; 9644 case UNICODE_PAT_MOD: 9645 if (*charset) { 9646 goto multiple_charsets; 9647 } 9648 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET); 9649 *charset = c; 9650 break; 9651 case ASCII_RESTRICT_PAT_MOD: 9652 if (! *charset) { 9653 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET); 9654 } 9655 else { 9656 9657 /* Error if previous modifier wasn't an 'a', but if it was, see 9658 * if, and accept, a second occurrence (only) */ 9659 if (*charset != 'a' 9660 || get_regex_charset(*pmfl) 9661 != REGEX_ASCII_RESTRICTED_CHARSET) 9662 { 9663 goto multiple_charsets; 9664 } 9665 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET); 9666 } 9667 *charset = c; 9668 break; 9669 case DEPENDS_PAT_MOD: 9670 if (*charset) { 9671 goto multiple_charsets; 9672 } 9673 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET); 9674 *charset = c; 9675 break; 9676 } 9677 9678 (*s)++; 9679 return TRUE; 9680 9681 multiple_charsets: 9682 if (*charset != c) { 9683 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c)); 9684 } 9685 else if (c == 'a') { 9686 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */ 9687 yyerror("Regexp modifier \"/a\" may appear a maximum of twice"); 9688 } 9689 else { 9690 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c)); 9691 } 9692 9693 /* Pretend that it worked, so will continue processing before dieing */ 9694 (*s)++; 9695 return TRUE; 9696 } 9697 9698 STATIC char * 9699 S_scan_pat(pTHX_ char *start, I32 type) 9700 { 9701 dVAR; 9702 PMOP *pm; 9703 char *s; 9704 const char * const valid_flags = 9705 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); 9706 char charset = '\0'; /* character set modifier */ 9707 #ifdef PERL_MAD 9708 char *modstart; 9709 #endif 9710 9711 PERL_ARGS_ASSERT_SCAN_PAT; 9712 9713 s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING), 9714 TRUE /* look for escaped bracketed metas */, NULL); 9715 9716 if (!s) { 9717 const char * const delimiter = skipspace(start); 9718 Perl_croak(aTHX_ 9719 (const char *) 9720 (*delimiter == '?' 9721 ? "Search pattern not terminated or ternary operator parsed as search pattern" 9722 : "Search pattern not terminated" )); 9723 } 9724 9725 pm = (PMOP*)newPMOP(type, 0); 9726 if (PL_multi_open == '?') { 9727 /* This is the only point in the code that sets PMf_ONCE: */ 9728 pm->op_pmflags |= PMf_ONCE; 9729 9730 /* Hence it's safe to do this bit of PMOP book-keeping here, which 9731 allows us to restrict the list needed by reset to just the ?? 9732 matches. */ 9733 assert(type != OP_TRANS); 9734 if (PL_curstash) { 9735 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab); 9736 U32 elements; 9737 if (!mg) { 9738 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0, 9739 0); 9740 } 9741 elements = mg->mg_len / sizeof(PMOP**); 9742 Renewc(mg->mg_ptr, elements + 1, PMOP*, char); 9743 ((PMOP**)mg->mg_ptr) [elements++] = pm; 9744 mg->mg_len = elements * sizeof(PMOP**); 9745 PmopSTASH_set(pm,PL_curstash); 9746 } 9747 } 9748 #ifdef PERL_MAD 9749 modstart = s; 9750 #endif 9751 9752 /* if qr/...(?{..}).../, then need to parse the pattern within a new 9753 * anon CV. False positives like qr/[(?{]/ are harmless */ 9754 9755 if (type == OP_QR) { 9756 STRLEN len; 9757 char *e, *p = SvPV(PL_lex_stuff, len); 9758 e = p + len; 9759 for (; p < e; p++) { 9760 if (p[0] == '(' && p[1] == '?' 9761 && (p[2] == '{' || (p[2] == '?' && p[3] == '{'))) 9762 { 9763 pm->op_pmflags |= PMf_HAS_CV; 9764 break; 9765 } 9766 } 9767 pm->op_pmflags |= PMf_IS_QR; 9768 } 9769 9770 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {}; 9771 #ifdef PERL_MAD 9772 if (PL_madskills && modstart != s) { 9773 SV* tmptoken = newSVpvn(modstart, s - modstart); 9774 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0); 9775 } 9776 #endif 9777 /* issue a warning if /c is specified,but /g is not */ 9778 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) 9779 { 9780 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), 9781 "Use of /c modifier is meaningless without /g" ); 9782 } 9783 9784 PL_lex_op = (OP*)pm; 9785 pl_yylval.ival = OP_MATCH; 9786 return s; 9787 } 9788 9789 STATIC char * 9790 S_scan_subst(pTHX_ char *start) 9791 { 9792 dVAR; 9793 char *s; 9794 PMOP *pm; 9795 I32 first_start; 9796 line_t first_line; 9797 I32 es = 0; 9798 char charset = '\0'; /* character set modifier */ 9799 #ifdef PERL_MAD 9800 char *modstart; 9801 #endif 9802 char *t; 9803 9804 PERL_ARGS_ASSERT_SCAN_SUBST; 9805 9806 pl_yylval.ival = OP_NULL; 9807 9808 s = scan_str(start,!!PL_madskills,FALSE,FALSE, 9809 TRUE /* look for escaped bracketed metas */, &t); 9810 9811 if (!s) 9812 Perl_croak(aTHX_ "Substitution pattern not terminated"); 9813 9814 s = t; 9815 #ifdef PERL_MAD 9816 if (PL_madskills) { 9817 CURMAD('q', PL_thisopen); 9818 CURMAD('_', PL_thiswhite); 9819 CURMAD('E', PL_thisstuff); 9820 CURMAD('Q', PL_thisclose); 9821 PL_realtokenstart = s - SvPVX(PL_linestr); 9822 } 9823 #endif 9824 9825 first_start = PL_multi_start; 9826 first_line = CopLINE(PL_curcop); 9827 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 9828 if (!s) { 9829 if (PL_lex_stuff) { 9830 SvREFCNT_dec(PL_lex_stuff); 9831 PL_lex_stuff = NULL; 9832 } 9833 Perl_croak(aTHX_ "Substitution replacement not terminated"); 9834 } 9835 PL_multi_start = first_start; /* so whole substitution is taken together */ 9836 9837 pm = (PMOP*)newPMOP(OP_SUBST, 0); 9838 9839 #ifdef PERL_MAD 9840 if (PL_madskills) { 9841 CURMAD('z', PL_thisopen); 9842 CURMAD('R', PL_thisstuff); 9843 CURMAD('Z', PL_thisclose); 9844 } 9845 modstart = s; 9846 #endif 9847 9848 while (*s) { 9849 if (*s == EXEC_PAT_MOD) { 9850 s++; 9851 es++; 9852 } 9853 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset)) 9854 { 9855 break; 9856 } 9857 } 9858 9859 #ifdef PERL_MAD 9860 if (PL_madskills) { 9861 if (modstart != s) 9862 curmad('m', newSVpvn(modstart, s - modstart)); 9863 append_madprops(PL_thismad, (OP*)pm, 0); 9864 PL_thismad = 0; 9865 } 9866 #endif 9867 if ((pm->op_pmflags & PMf_CONTINUE)) { 9868 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); 9869 } 9870 9871 if (es) { 9872 SV * const repl = newSVpvs(""); 9873 9874 PL_multi_end = 0; 9875 pm->op_pmflags |= PMf_EVAL; 9876 while (es-- > 0) { 9877 if (es) 9878 sv_catpvs(repl, "eval "); 9879 else 9880 sv_catpvs(repl, "do "); 9881 } 9882 sv_catpvs(repl, "{"); 9883 sv_catsv(repl, PL_sublex_info.repl); 9884 sv_catpvs(repl, "}"); 9885 SvEVALED_on(repl); 9886 SvREFCNT_dec(PL_sublex_info.repl); 9887 PL_sublex_info.repl = repl; 9888 } 9889 if (CopLINE(PL_curcop) != first_line) { 9890 sv_upgrade(PL_sublex_info.repl, SVt_PVNV); 9891 ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow = 9892 CopLINE(PL_curcop) - first_line; 9893 CopLINE_set(PL_curcop, first_line); 9894 } 9895 9896 PL_lex_op = (OP*)pm; 9897 pl_yylval.ival = OP_SUBST; 9898 return s; 9899 } 9900 9901 STATIC char * 9902 S_scan_trans(pTHX_ char *start) 9903 { 9904 dVAR; 9905 char* s; 9906 OP *o; 9907 U8 squash; 9908 U8 del; 9909 U8 complement; 9910 bool nondestruct = 0; 9911 #ifdef PERL_MAD 9912 char *modstart; 9913 #endif 9914 char *t; 9915 9916 PERL_ARGS_ASSERT_SCAN_TRANS; 9917 9918 pl_yylval.ival = OP_NULL; 9919 9920 s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,&t); 9921 if (!s) 9922 Perl_croak(aTHX_ "Transliteration pattern not terminated"); 9923 9924 s = t; 9925 #ifdef PERL_MAD 9926 if (PL_madskills) { 9927 CURMAD('q', PL_thisopen); 9928 CURMAD('_', PL_thiswhite); 9929 CURMAD('E', PL_thisstuff); 9930 CURMAD('Q', PL_thisclose); 9931 PL_realtokenstart = s - SvPVX(PL_linestr); 9932 } 9933 #endif 9934 9935 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 9936 if (!s) { 9937 if (PL_lex_stuff) { 9938 SvREFCNT_dec(PL_lex_stuff); 9939 PL_lex_stuff = NULL; 9940 } 9941 Perl_croak(aTHX_ "Transliteration replacement not terminated"); 9942 } 9943 if (PL_madskills) { 9944 CURMAD('z', PL_thisopen); 9945 CURMAD('R', PL_thisstuff); 9946 CURMAD('Z', PL_thisclose); 9947 } 9948 9949 complement = del = squash = 0; 9950 #ifdef PERL_MAD 9951 modstart = s; 9952 #endif 9953 while (1) { 9954 switch (*s) { 9955 case 'c': 9956 complement = OPpTRANS_COMPLEMENT; 9957 break; 9958 case 'd': 9959 del = OPpTRANS_DELETE; 9960 break; 9961 case 's': 9962 squash = OPpTRANS_SQUASH; 9963 break; 9964 case 'r': 9965 nondestruct = 1; 9966 break; 9967 default: 9968 goto no_more; 9969 } 9970 s++; 9971 } 9972 no_more: 9973 9974 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL); 9975 o->op_private &= ~OPpTRANS_ALL; 9976 o->op_private |= del|squash|complement| 9977 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)| 9978 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0); 9979 9980 PL_lex_op = o; 9981 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS; 9982 9983 #ifdef PERL_MAD 9984 if (PL_madskills) { 9985 if (modstart != s) 9986 curmad('m', newSVpvn(modstart, s - modstart)); 9987 append_madprops(PL_thismad, o, 0); 9988 PL_thismad = 0; 9989 } 9990 #endif 9991 9992 return s; 9993 } 9994 9995 /* scan_heredoc 9996 Takes a pointer to the first < in <<FOO. 9997 Returns a pointer to the byte following <<FOO. 9998 9999 This function scans a heredoc, which involves different methods 10000 depending on whether we are in a string eval, quoted construct, etc. 10001 This is because PL_linestr could containing a single line of input, or 10002 a whole string being evalled, or the contents of the current quote- 10003 like operator. 10004 10005 The two basic methods are: 10006 - Steal lines from the input stream 10007 - Scan the heredoc in PL_linestr and remove it therefrom 10008 10009 In a file scope or filtered eval, the first method is used; in a 10010 string eval, the second. 10011 10012 In a quote-like operator, we have to choose between the two, 10013 depending on where we can find a newline. We peek into outer lex- 10014 ing scopes until we find one with a newline in it. If we reach the 10015 outermost lexing scope and it is a file, we use the stream method. 10016 Otherwise it is treated as an eval. 10017 */ 10018 10019 STATIC char * 10020 S_scan_heredoc(pTHX_ char *s) 10021 { 10022 dVAR; 10023 I32 op_type = OP_SCALAR; 10024 I32 len; 10025 SV *tmpstr; 10026 char term; 10027 char *d; 10028 char *e; 10029 char *peek; 10030 const bool infile = PL_rsfp || PL_parser->filtered; 10031 const line_t origline = CopLINE(PL_curcop); 10032 LEXSHARED *shared = PL_parser->lex_shared; 10033 #ifdef PERL_MAD 10034 I32 stuffstart = s - SvPVX(PL_linestr); 10035 char *tstart; 10036 10037 PL_realtokenstart = -1; 10038 #endif 10039 10040 PERL_ARGS_ASSERT_SCAN_HEREDOC; 10041 10042 s += 2; 10043 d = PL_tokenbuf + 1; 10044 e = PL_tokenbuf + sizeof PL_tokenbuf - 1; 10045 *PL_tokenbuf = '\n'; 10046 peek = s; 10047 while (SPACE_OR_TAB(*peek)) 10048 peek++; 10049 if (*peek == '`' || *peek == '\'' || *peek =='"') { 10050 s = peek; 10051 term = *s++; 10052 s = delimcpy(d, e, s, PL_bufend, term, &len); 10053 if (s == PL_bufend) 10054 Perl_croak(aTHX_ "Unterminated delimiter for here document"); 10055 d += len; 10056 s++; 10057 } 10058 else { 10059 if (*s == '\\') 10060 /* <<\FOO is equivalent to <<'FOO' */ 10061 s++, term = '\''; 10062 else 10063 term = '"'; 10064 if (!isWORDCHAR_lazy_if(s,UTF)) 10065 deprecate("bare << to mean <<\"\""); 10066 peek = s; 10067 while (isWORDCHAR_lazy_if(peek,UTF)) { 10068 peek += UTF ? UTF8SKIP(peek) : 1; 10069 } 10070 len = (peek - s >= e - d) ? (e - d) : (peek - s); 10071 Copy(s, d, len, char); 10072 s += len; 10073 d += len; 10074 } 10075 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1) 10076 Perl_croak(aTHX_ "Delimiter for here document is too long"); 10077 *d++ = '\n'; 10078 *d = '\0'; 10079 len = d - PL_tokenbuf; 10080 10081 #ifdef PERL_MAD 10082 if (PL_madskills) { 10083 tstart = PL_tokenbuf + 1; 10084 PL_thisclose = newSVpvn(tstart, len - 1); 10085 tstart = SvPVX(PL_linestr) + stuffstart; 10086 PL_thisopen = newSVpvn(tstart, s - tstart); 10087 stuffstart = s - SvPVX(PL_linestr); 10088 } 10089 #endif 10090 #ifndef PERL_STRICT_CR 10091 d = strchr(s, '\r'); 10092 if (d) { 10093 char * const olds = s; 10094 s = d; 10095 while (s < PL_bufend) { 10096 if (*s == '\r') { 10097 *d++ = '\n'; 10098 if (*++s == '\n') 10099 s++; 10100 } 10101 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */ 10102 *d++ = *s++; 10103 s++; 10104 } 10105 else 10106 *d++ = *s++; 10107 } 10108 *d = '\0'; 10109 PL_bufend = d; 10110 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 10111 s = olds; 10112 } 10113 #endif 10114 #ifdef PERL_MAD 10115 if (PL_madskills) { 10116 tstart = SvPVX(PL_linestr) + stuffstart; 10117 if (PL_thisstuff) 10118 sv_catpvn(PL_thisstuff, tstart, s - tstart); 10119 else 10120 PL_thisstuff = newSVpvn(tstart, s - tstart); 10121 } 10122 10123 stuffstart = s - SvPVX(PL_linestr); 10124 #endif 10125 10126 tmpstr = newSV_type(SVt_PVIV); 10127 SvGROW(tmpstr, 80); 10128 if (term == '\'') { 10129 op_type = OP_CONST; 10130 SvIV_set(tmpstr, -1); 10131 } 10132 else if (term == '`') { 10133 op_type = OP_BACKTICK; 10134 SvIV_set(tmpstr, '\\'); 10135 } 10136 10137 PL_multi_start = origline + 1 + PL_parser->herelines; 10138 PL_multi_open = PL_multi_close = '<'; 10139 /* inside a string eval or quote-like operator */ 10140 if (!infile || PL_lex_inwhat) { 10141 SV *linestr; 10142 char *bufend; 10143 char * const olds = s; 10144 PERL_CONTEXT * const cx = &cxstack[cxstack_ix]; 10145 /* These two fields are not set until an inner lexing scope is 10146 entered. But we need them set here. */ 10147 shared->ls_bufptr = s; 10148 shared->ls_linestr = PL_linestr; 10149 if (PL_lex_inwhat) 10150 /* Look for a newline. If the current buffer does not have one, 10151 peek into the line buffer of the parent lexing scope, going 10152 up as many levels as necessary to find one with a newline 10153 after bufptr. 10154 */ 10155 while (!(s = (char *)memchr( 10156 (void *)shared->ls_bufptr, '\n', 10157 SvEND(shared->ls_linestr)-shared->ls_bufptr 10158 ))) { 10159 shared = shared->ls_prev; 10160 /* shared is only null if we have gone beyond the outermost 10161 lexing scope. In a file, we will have broken out of the 10162 loop in the previous iteration. In an eval, the string buf- 10163 fer ends with "\n;", so the while condition above will have 10164 evaluated to false. So shared can never be null. */ 10165 assert(shared); 10166 /* A LEXSHARED struct with a null ls_prev pointer is the outer- 10167 most lexing scope. In a file, shared->ls_linestr at that 10168 level is just one line, so there is no body to steal. */ 10169 if (infile && !shared->ls_prev) { 10170 s = olds; 10171 goto streaming; 10172 } 10173 } 10174 else { /* eval */ 10175 s = (char*)memchr((void*)s, '\n', PL_bufend - s); 10176 assert(s); 10177 } 10178 linestr = shared->ls_linestr; 10179 bufend = SvEND(linestr); 10180 d = s; 10181 while (s < bufend - len + 1 && 10182 memNE(s,PL_tokenbuf,len) ) { 10183 if (*s++ == '\n') 10184 ++PL_parser->herelines; 10185 } 10186 if (s >= bufend - len + 1) { 10187 goto interminable; 10188 } 10189 sv_setpvn(tmpstr,d+1,s-d); 10190 #ifdef PERL_MAD 10191 if (PL_madskills) { 10192 if (PL_thisstuff) 10193 sv_catpvn(PL_thisstuff, d + 1, s - d); 10194 else 10195 PL_thisstuff = newSVpvn(d + 1, s - d); 10196 stuffstart = s - SvPVX(PL_linestr); 10197 } 10198 #endif 10199 s += len - 1; 10200 /* the preceding stmt passes a newline */ 10201 PL_parser->herelines++; 10202 10203 /* s now points to the newline after the heredoc terminator. 10204 d points to the newline before the body of the heredoc. 10205 */ 10206 10207 /* We are going to modify linestr in place here, so set 10208 aside copies of the string if necessary for re-evals or 10209 (caller $n)[6]. */ 10210 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we 10211 check shared->re_eval_str. */ 10212 if (shared->re_eval_start || shared->re_eval_str) { 10213 /* Set aside the rest of the regexp */ 10214 if (!shared->re_eval_str) 10215 shared->re_eval_str = 10216 newSVpvn(shared->re_eval_start, 10217 bufend - shared->re_eval_start); 10218 shared->re_eval_start -= s-d; 10219 } 10220 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL && 10221 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL && 10222 cx->blk_eval.cur_text == linestr) 10223 { 10224 cx->blk_eval.cur_text = newSVsv(linestr); 10225 SvSCREAM_on(cx->blk_eval.cur_text); 10226 } 10227 /* Copy everything from s onwards back to d. */ 10228 Move(s,d,bufend-s + 1,char); 10229 SvCUR_set(linestr, SvCUR(linestr) - (s-d)); 10230 /* Setting PL_bufend only applies when we have not dug deeper 10231 into other scopes, because sublex_done sets PL_bufend to 10232 SvEND(PL_linestr). */ 10233 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr); 10234 s = olds; 10235 } 10236 else 10237 { 10238 SV *linestr_save; 10239 streaming: 10240 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */ 10241 term = PL_tokenbuf[1]; 10242 len--; 10243 linestr_save = PL_linestr; /* must restore this afterwards */ 10244 d = s; /* and this */ 10245 PL_linestr = newSVpvs(""); 10246 PL_bufend = SvPVX(PL_linestr); 10247 while (1) { 10248 #ifdef PERL_MAD 10249 if (PL_madskills) { 10250 tstart = SvPVX(PL_linestr) + stuffstart; 10251 if (PL_thisstuff) 10252 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart); 10253 else 10254 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart); 10255 } 10256 #endif 10257 PL_bufptr = PL_bufend; 10258 CopLINE_set(PL_curcop, 10259 origline + 1 + PL_parser->herelines); 10260 if (!lex_next_chunk(LEX_NO_TERM) 10261 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) { 10262 SvREFCNT_dec(linestr_save); 10263 goto interminable; 10264 } 10265 CopLINE_set(PL_curcop, origline); 10266 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') { 10267 s = lex_grow_linestr(SvLEN(PL_linestr) + 3); 10268 /* ^That should be enough to avoid this needing to grow: */ 10269 sv_catpvs(PL_linestr, "\n\0"); 10270 assert(s == SvPVX(PL_linestr)); 10271 PL_bufend = SvEND(PL_linestr); 10272 } 10273 s = PL_bufptr; 10274 #ifdef PERL_MAD 10275 stuffstart = s - SvPVX(PL_linestr); 10276 #endif 10277 PL_parser->herelines++; 10278 PL_last_lop = PL_last_uni = NULL; 10279 #ifndef PERL_STRICT_CR 10280 if (PL_bufend - PL_linestart >= 2) { 10281 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') || 10282 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r')) 10283 { 10284 PL_bufend[-2] = '\n'; 10285 PL_bufend--; 10286 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 10287 } 10288 else if (PL_bufend[-1] == '\r') 10289 PL_bufend[-1] = '\n'; 10290 } 10291 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') 10292 PL_bufend[-1] = '\n'; 10293 #endif 10294 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) { 10295 SvREFCNT_dec(PL_linestr); 10296 PL_linestr = linestr_save; 10297 PL_linestart = SvPVX(linestr_save); 10298 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 10299 s = d; 10300 break; 10301 } 10302 else { 10303 sv_catsv(tmpstr,PL_linestr); 10304 } 10305 } 10306 } 10307 PL_multi_end = origline + PL_parser->herelines; 10308 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) { 10309 SvPV_shrink_to_cur(tmpstr); 10310 } 10311 if (!IN_BYTES) { 10312 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) 10313 SvUTF8_on(tmpstr); 10314 else if (PL_encoding) 10315 sv_recode_to_utf8(tmpstr, PL_encoding); 10316 } 10317 PL_lex_stuff = tmpstr; 10318 pl_yylval.ival = op_type; 10319 return s; 10320 10321 interminable: 10322 SvREFCNT_dec(tmpstr); 10323 CopLINE_set(PL_curcop, origline); 10324 missingterm(PL_tokenbuf + 1); 10325 } 10326 10327 /* scan_inputsymbol 10328 takes: current position in input buffer 10329 returns: new position in input buffer 10330 side-effects: pl_yylval and lex_op are set. 10331 10332 This code handles: 10333 10334 <> read from ARGV 10335 <FH> read from filehandle 10336 <pkg::FH> read from package qualified filehandle 10337 <pkg'FH> read from package qualified filehandle 10338 <$fh> read from filehandle in $fh 10339 <*.h> filename glob 10340 10341 */ 10342 10343 STATIC char * 10344 S_scan_inputsymbol(pTHX_ char *start) 10345 { 10346 dVAR; 10347 char *s = start; /* current position in buffer */ 10348 char *end; 10349 I32 len; 10350 char *d = PL_tokenbuf; /* start of temp holding space */ 10351 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */ 10352 10353 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL; 10354 10355 end = strchr(s, '\n'); 10356 if (!end) 10357 end = PL_bufend; 10358 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */ 10359 10360 /* die if we didn't have space for the contents of the <>, 10361 or if it didn't end, or if we see a newline 10362 */ 10363 10364 if (len >= (I32)sizeof PL_tokenbuf) 10365 Perl_croak(aTHX_ "Excessively long <> operator"); 10366 if (s >= end) 10367 Perl_croak(aTHX_ "Unterminated <> operator"); 10368 10369 s++; 10370 10371 /* check for <$fh> 10372 Remember, only scalar variables are interpreted as filehandles by 10373 this code. Anything more complex (e.g., <$fh{$num}>) will be 10374 treated as a glob() call. 10375 This code makes use of the fact that except for the $ at the front, 10376 a scalar variable and a filehandle look the same. 10377 */ 10378 if (*d == '$' && d[1]) d++; 10379 10380 /* allow <Pkg'VALUE> or <Pkg::VALUE> */ 10381 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':')) 10382 d += UTF ? UTF8SKIP(d) : 1; 10383 10384 /* If we've tried to read what we allow filehandles to look like, and 10385 there's still text left, then it must be a glob() and not a getline. 10386 Use scan_str to pull out the stuff between the <> and treat it 10387 as nothing more than a string. 10388 */ 10389 10390 if (d - PL_tokenbuf != len) { 10391 pl_yylval.ival = OP_GLOB; 10392 s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 10393 if (!s) 10394 Perl_croak(aTHX_ "Glob not terminated"); 10395 return s; 10396 } 10397 else { 10398 bool readline_overriden = FALSE; 10399 GV *gv_readline; 10400 /* we're in a filehandle read situation */ 10401 d = PL_tokenbuf; 10402 10403 /* turn <> into <ARGV> */ 10404 if (!len) 10405 Copy("ARGV",d,5,char); 10406 10407 /* Check whether readline() is overriden */ 10408 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV); 10409 if ((gv_readline = gv_override("readline",8))) 10410 readline_overriden = TRUE; 10411 10412 /* if <$fh>, create the ops to turn the variable into a 10413 filehandle 10414 */ 10415 if (*d == '$') { 10416 /* try to find it in the pad for this block, otherwise find 10417 add symbol table ops 10418 */ 10419 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0); 10420 if (tmp != NOT_IN_PAD) { 10421 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 10422 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 10423 HEK * const stashname = HvNAME_HEK(stash); 10424 SV * const sym = sv_2mortal(newSVhek(stashname)); 10425 sv_catpvs(sym, "::"); 10426 sv_catpv(sym, d+1); 10427 d = SvPVX(sym); 10428 goto intro_sym; 10429 } 10430 else { 10431 OP * const o = newOP(OP_PADSV, 0); 10432 o->op_targ = tmp; 10433 PL_lex_op = readline_overriden 10434 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 10435 op_append_elem(OP_LIST, o, 10436 newCVREF(0, newGVOP(OP_GV,0,gv_readline)))) 10437 : (OP*)newUNOP(OP_READLINE, 0, o); 10438 } 10439 } 10440 else { 10441 GV *gv; 10442 ++d; 10443 intro_sym: 10444 gv = gv_fetchpv(d, 10445 (PL_in_eval 10446 ? (GV_ADDMULTI | GV_ADDINEVAL) 10447 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ), 10448 SVt_PV); 10449 PL_lex_op = readline_overriden 10450 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 10451 op_append_elem(OP_LIST, 10452 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)), 10453 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 10454 : (OP*)newUNOP(OP_READLINE, 0, 10455 newUNOP(OP_RV2SV, 0, 10456 newGVOP(OP_GV, 0, gv))); 10457 } 10458 if (!readline_overriden) 10459 PL_lex_op->op_flags |= OPf_SPECIAL; 10460 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */ 10461 pl_yylval.ival = OP_NULL; 10462 } 10463 10464 /* If it's none of the above, it must be a literal filehandle 10465 (<Foo::BAR> or <FOO>) so build a simple readline OP */ 10466 else { 10467 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO); 10468 PL_lex_op = readline_overriden 10469 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 10470 op_append_elem(OP_LIST, 10471 newGVOP(OP_GV, 0, gv), 10472 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 10473 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv)); 10474 pl_yylval.ival = OP_NULL; 10475 } 10476 } 10477 10478 return s; 10479 } 10480 10481 10482 /* scan_str 10483 takes: 10484 start position in buffer 10485 keep_quoted preserve \ on the embedded delimiter(s) 10486 keep_delims preserve the delimiters around the string 10487 re_reparse compiling a run-time /(?{})/: 10488 collapse // to /, and skip encoding src 10489 deprecate_escaped_meta issue a deprecation warning for cer- 10490 tain paired metacharacters that appear 10491 escaped within it 10492 delimp if non-null, this is set to the position of 10493 the closing delimiter, or just after it if 10494 the closing and opening delimiters differ 10495 (i.e., the opening delimiter of a substitu- 10496 tion replacement) 10497 returns: position to continue reading from buffer 10498 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and 10499 updates the read buffer. 10500 10501 This subroutine pulls a string out of the input. It is called for: 10502 q single quotes q(literal text) 10503 ' single quotes 'literal text' 10504 qq double quotes qq(interpolate $here please) 10505 " double quotes "interpolate $here please" 10506 qx backticks qx(/bin/ls -l) 10507 ` backticks `/bin/ls -l` 10508 qw quote words @EXPORT_OK = qw( func() $spam ) 10509 m// regexp match m/this/ 10510 s/// regexp substitute s/this/that/ 10511 tr/// string transliterate tr/this/that/ 10512 y/// string transliterate y/this/that/ 10513 ($*@) sub prototypes sub foo ($) 10514 (stuff) sub attr parameters sub foo : attr(stuff) 10515 <> readline or globs <FOO>, <>, <$fh>, or <*.c> 10516 10517 In most of these cases (all but <>, patterns and transliterate) 10518 yylex() calls scan_str(). m// makes yylex() call scan_pat() which 10519 calls scan_str(). s/// makes yylex() call scan_subst() which calls 10520 scan_str(). tr/// and y/// make yylex() call scan_trans() which 10521 calls scan_str(). 10522 10523 It skips whitespace before the string starts, and treats the first 10524 character as the delimiter. If the delimiter is one of ([{< then 10525 the corresponding "close" character )]}> is used as the closing 10526 delimiter. It allows quoting of delimiters, and if the string has 10527 balanced delimiters ([{<>}]) it allows nesting. 10528 10529 On success, the SV with the resulting string is put into lex_stuff or, 10530 if that is already non-NULL, into lex_repl. The second case occurs only 10531 when parsing the RHS of the special constructs s/// and tr/// (y///). 10532 For convenience, the terminating delimiter character is stuffed into 10533 SvIVX of the SV. 10534 */ 10535 10536 STATIC char * 10537 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse, 10538 bool deprecate_escaped_meta, char **delimp 10539 ) 10540 { 10541 dVAR; 10542 SV *sv; /* scalar value: string */ 10543 const char *tmps; /* temp string, used for delimiter matching */ 10544 char *s = start; /* current position in the buffer */ 10545 char term; /* terminating character */ 10546 char *to; /* current position in the sv's data */ 10547 I32 brackets = 1; /* bracket nesting level */ 10548 bool has_utf8 = FALSE; /* is there any utf8 content? */ 10549 I32 termcode; /* terminating char. code */ 10550 U8 termstr[UTF8_MAXBYTES]; /* terminating string */ 10551 STRLEN termlen; /* length of terminating string */ 10552 int last_off = 0; /* last position for nesting bracket */ 10553 char *escaped_open = NULL; 10554 line_t herelines; 10555 #ifdef PERL_MAD 10556 int stuffstart; 10557 char *tstart; 10558 #endif 10559 10560 PERL_ARGS_ASSERT_SCAN_STR; 10561 10562 /* skip space before the delimiter */ 10563 if (isSPACE(*s)) { 10564 s = PEEKSPACE(s); 10565 } 10566 10567 #ifdef PERL_MAD 10568 if (PL_realtokenstart >= 0) { 10569 stuffstart = PL_realtokenstart; 10570 PL_realtokenstart = -1; 10571 } 10572 else 10573 stuffstart = start - SvPVX(PL_linestr); 10574 #endif 10575 /* mark where we are, in case we need to report errors */ 10576 CLINE; 10577 10578 /* after skipping whitespace, the next character is the terminator */ 10579 term = *s; 10580 if (!UTF) { 10581 termcode = termstr[0] = term; 10582 termlen = 1; 10583 } 10584 else { 10585 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen); 10586 Copy(s, termstr, termlen, U8); 10587 if (!UTF8_IS_INVARIANT(term)) 10588 has_utf8 = TRUE; 10589 } 10590 10591 /* mark where we are */ 10592 PL_multi_start = CopLINE(PL_curcop); 10593 PL_multi_open = term; 10594 herelines = PL_parser->herelines; 10595 10596 /* find corresponding closing delimiter */ 10597 if (term && (tmps = strchr("([{< )]}> )]}>",term))) 10598 termcode = termstr[0] = term = tmps[5]; 10599 10600 PL_multi_close = term; 10601 10602 /* A warning is raised if the input parameter requires it for escaped (by a 10603 * backslash) paired metacharacters {} [] and () when the delimiters are 10604 * those same characters, and the backslash is ineffective. This doesn't 10605 * happen for <>, as they aren't metas. */ 10606 if (deprecate_escaped_meta 10607 && (PL_multi_open == PL_multi_close 10608 || PL_multi_open == '<' 10609 || ! ckWARN_d(WARN_DEPRECATED))) 10610 { 10611 deprecate_escaped_meta = FALSE; 10612 } 10613 10614 /* create a new SV to hold the contents. 79 is the SV's initial length. 10615 What a random number. */ 10616 sv = newSV_type(SVt_PVIV); 10617 SvGROW(sv, 80); 10618 SvIV_set(sv, termcode); 10619 (void)SvPOK_only(sv); /* validate pointer */ 10620 10621 /* move past delimiter and try to read a complete string */ 10622 if (keep_delims) 10623 sv_catpvn(sv, s, termlen); 10624 s += termlen; 10625 #ifdef PERL_MAD 10626 tstart = SvPVX(PL_linestr) + stuffstart; 10627 if (PL_madskills && !PL_thisopen && !keep_delims) { 10628 PL_thisopen = newSVpvn(tstart, s - tstart); 10629 stuffstart = s - SvPVX(PL_linestr); 10630 } 10631 #endif 10632 for (;;) { 10633 if (PL_encoding && !UTF && !re_reparse) { 10634 bool cont = TRUE; 10635 10636 while (cont) { 10637 int offset = s - SvPVX_const(PL_linestr); 10638 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr, 10639 &offset, (char*)termstr, termlen); 10640 const char *ns; 10641 char *svlast; 10642 10643 if (SvIsCOW(PL_linestr)) { 10644 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos; 10645 STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos; 10646 STRLEN last_lop_pos, re_eval_start_pos, s_pos; 10647 char *buf = SvPVX(PL_linestr); 10648 bufend_pos = PL_parser->bufend - buf; 10649 bufptr_pos = PL_parser->bufptr - buf; 10650 oldbufptr_pos = PL_parser->oldbufptr - buf; 10651 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 10652 linestart_pos = PL_parser->linestart - buf; 10653 last_uni_pos = PL_parser->last_uni 10654 ? PL_parser->last_uni - buf 10655 : 0; 10656 last_lop_pos = PL_parser->last_lop 10657 ? PL_parser->last_lop - buf 10658 : 0; 10659 re_eval_start_pos = 10660 PL_parser->lex_shared->re_eval_start ? 10661 PL_parser->lex_shared->re_eval_start - buf : 0; 10662 s_pos = s - buf; 10663 10664 sv_force_normal(PL_linestr); 10665 10666 buf = SvPVX(PL_linestr); 10667 PL_parser->bufend = buf + bufend_pos; 10668 PL_parser->bufptr = buf + bufptr_pos; 10669 PL_parser->oldbufptr = buf + oldbufptr_pos; 10670 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 10671 PL_parser->linestart = buf + linestart_pos; 10672 if (PL_parser->last_uni) 10673 PL_parser->last_uni = buf + last_uni_pos; 10674 if (PL_parser->last_lop) 10675 PL_parser->last_lop = buf + last_lop_pos; 10676 if (PL_parser->lex_shared->re_eval_start) 10677 PL_parser->lex_shared->re_eval_start = 10678 buf + re_eval_start_pos; 10679 s = buf + s_pos; 10680 } 10681 ns = SvPVX_const(PL_linestr) + offset; 10682 svlast = SvEND(sv) - 1; 10683 10684 for (; s < ns; s++) { 10685 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 10686 COPLINE_INC_WITH_HERELINES; 10687 } 10688 if (!found) 10689 goto read_more_line; 10690 else { 10691 /* handle quoted delimiters */ 10692 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') { 10693 const char *t; 10694 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';) 10695 t--; 10696 if ((svlast-1 - t) % 2) { 10697 if (!keep_quoted) { 10698 *(svlast-1) = term; 10699 *svlast = '\0'; 10700 SvCUR_set(sv, SvCUR(sv) - 1); 10701 } 10702 continue; 10703 } 10704 } 10705 if (PL_multi_open == PL_multi_close) { 10706 cont = FALSE; 10707 } 10708 else { 10709 const char *t; 10710 char *w; 10711 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) { 10712 /* At here, all closes are "was quoted" one, 10713 so we don't check PL_multi_close. */ 10714 if (*t == '\\') { 10715 if (!keep_quoted && *(t+1) == PL_multi_open) 10716 t++; 10717 else 10718 *w++ = *t++; 10719 } 10720 else if (*t == PL_multi_open) 10721 brackets++; 10722 10723 *w = *t; 10724 } 10725 if (w < t) { 10726 *w++ = term; 10727 *w = '\0'; 10728 SvCUR_set(sv, w - SvPVX_const(sv)); 10729 } 10730 last_off = w - SvPVX(sv); 10731 if (--brackets <= 0) 10732 cont = FALSE; 10733 } 10734 } 10735 } 10736 if (!keep_delims) { 10737 SvCUR_set(sv, SvCUR(sv) - 1); 10738 *SvEND(sv) = '\0'; 10739 } 10740 break; 10741 } 10742 10743 /* extend sv if need be */ 10744 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); 10745 /* set 'to' to the next character in the sv's string */ 10746 to = SvPVX(sv)+SvCUR(sv); 10747 10748 /* if open delimiter is the close delimiter read unbridle */ 10749 if (PL_multi_open == PL_multi_close) { 10750 for (; s < PL_bufend; s++,to++) { 10751 /* embedded newlines increment the current line number */ 10752 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 10753 COPLINE_INC_WITH_HERELINES; 10754 /* handle quoted delimiters */ 10755 if (*s == '\\' && s+1 < PL_bufend && term != '\\') { 10756 if (!keep_quoted 10757 && (s[1] == term 10758 || (re_reparse && s[1] == '\\')) 10759 ) 10760 s++; 10761 /* any other quotes are simply copied straight through */ 10762 else 10763 *to++ = *s++; 10764 } 10765 /* terminate when run out of buffer (the for() condition), or 10766 have found the terminator */ 10767 else if (*s == term) { 10768 if (termlen == 1) 10769 break; 10770 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen)) 10771 break; 10772 } 10773 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) 10774 has_utf8 = TRUE; 10775 *to = *s; 10776 } 10777 } 10778 10779 /* if the terminator isn't the same as the start character (e.g., 10780 matched brackets), we have to allow more in the quoting, and 10781 be prepared for nested brackets. 10782 */ 10783 else { 10784 /* read until we run out of string, or we find the terminator */ 10785 for (; s < PL_bufend; s++,to++) { 10786 /* embedded newlines increment the line count */ 10787 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 10788 COPLINE_INC_WITH_HERELINES; 10789 /* backslashes can escape the open or closing characters */ 10790 if (*s == '\\' && s+1 < PL_bufend) { 10791 if (!keep_quoted && 10792 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close))) 10793 { 10794 s++; 10795 10796 /* Here, 'deprecate_escaped_meta' is true iff the 10797 * delimiters are paired metacharacters, and 's' points 10798 * to an occurrence of one of them within the string, 10799 * which was preceded by a backslash. If this is a 10800 * context where the delimiter is also a metacharacter, 10801 * the backslash is useless, and deprecated. () and [] 10802 * are meta in any context. {} are meta only when 10803 * appearing in a quantifier or in things like '\p{' 10804 * (but '\\p{' isn't meta). They also aren't meta 10805 * unless there is a matching closed, escaped char 10806 * later on within the string. If 's' points to an 10807 * open, set a flag; if to a close, test that flag, and 10808 * raise a warning if it was set */ 10809 10810 if (deprecate_escaped_meta) { 10811 if (*s == PL_multi_open) { 10812 if (*s != '{') { 10813 escaped_open = s; 10814 } 10815 /* Look for a closing '\}' */ 10816 else if (regcurly(s, TRUE)) { 10817 escaped_open = s; 10818 } 10819 /* Look for e.g. '\x{' */ 10820 else if (s - start > 2 10821 && _generic_isCC(*(s-2), 10822 _CC_BACKSLASH_FOO_LBRACE_IS_META)) 10823 { /* Exclude '\\x', '\\\\x', etc. */ 10824 char *lookbehind = s - 4; 10825 bool is_meta = TRUE; 10826 while (lookbehind >= start 10827 && *lookbehind == '\\') 10828 { 10829 is_meta = ! is_meta; 10830 lookbehind--; 10831 } 10832 if (is_meta) { 10833 escaped_open = s; 10834 } 10835 } 10836 } 10837 else if (escaped_open) { 10838 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 10839 "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open); 10840 escaped_open = NULL; 10841 } 10842 } 10843 } 10844 else 10845 *to++ = *s++; 10846 } 10847 /* allow nested opens and closes */ 10848 else if (*s == PL_multi_close && --brackets <= 0) 10849 break; 10850 else if (*s == PL_multi_open) 10851 brackets++; 10852 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) 10853 has_utf8 = TRUE; 10854 *to = *s; 10855 } 10856 } 10857 /* terminate the copied string and update the sv's end-of-string */ 10858 *to = '\0'; 10859 SvCUR_set(sv, to - SvPVX_const(sv)); 10860 10861 /* 10862 * this next chunk reads more into the buffer if we're not done yet 10863 */ 10864 10865 if (s < PL_bufend) 10866 break; /* handle case where we are done yet :-) */ 10867 10868 #ifndef PERL_STRICT_CR 10869 if (to - SvPVX_const(sv) >= 2) { 10870 if ((to[-2] == '\r' && to[-1] == '\n') || 10871 (to[-2] == '\n' && to[-1] == '\r')) 10872 { 10873 to[-2] = '\n'; 10874 to--; 10875 SvCUR_set(sv, to - SvPVX_const(sv)); 10876 } 10877 else if (to[-1] == '\r') 10878 to[-1] = '\n'; 10879 } 10880 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r') 10881 to[-1] = '\n'; 10882 #endif 10883 10884 read_more_line: 10885 /* if we're out of file, or a read fails, bail and reset the current 10886 line marker so we can report where the unterminated string began 10887 */ 10888 #ifdef PERL_MAD 10889 if (PL_madskills) { 10890 char * const tstart = SvPVX(PL_linestr) + stuffstart; 10891 if (PL_thisstuff) 10892 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart); 10893 else 10894 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart); 10895 } 10896 #endif 10897 COPLINE_INC_WITH_HERELINES; 10898 PL_bufptr = PL_bufend; 10899 if (!lex_next_chunk(0)) { 10900 sv_free(sv); 10901 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 10902 return NULL; 10903 } 10904 s = PL_bufptr; 10905 #ifdef PERL_MAD 10906 stuffstart = 0; 10907 #endif 10908 } 10909 10910 /* at this point, we have successfully read the delimited string */ 10911 10912 if (!PL_encoding || UTF || re_reparse) { 10913 #ifdef PERL_MAD 10914 if (PL_madskills) { 10915 char * const tstart = SvPVX(PL_linestr) + stuffstart; 10916 const int len = s - tstart; 10917 if (PL_thisstuff) 10918 sv_catpvn(PL_thisstuff, tstart, len); 10919 else 10920 PL_thisstuff = newSVpvn(tstart, len); 10921 if (!PL_thisclose && !keep_delims) 10922 PL_thisclose = newSVpvn(s,termlen); 10923 } 10924 #endif 10925 10926 if (keep_delims) 10927 sv_catpvn(sv, s, termlen); 10928 s += termlen; 10929 } 10930 #ifdef PERL_MAD 10931 else { 10932 if (PL_madskills) { 10933 char * const tstart = SvPVX(PL_linestr) + stuffstart; 10934 const int len = s - tstart - termlen; 10935 if (PL_thisstuff) 10936 sv_catpvn(PL_thisstuff, tstart, len); 10937 else 10938 PL_thisstuff = newSVpvn(tstart, len); 10939 if (!PL_thisclose && !keep_delims) 10940 PL_thisclose = newSVpvn(s - termlen,termlen); 10941 } 10942 } 10943 #endif 10944 if (has_utf8 || (PL_encoding && !re_reparse)) 10945 SvUTF8_on(sv); 10946 10947 PL_multi_end = CopLINE(PL_curcop); 10948 CopLINE_set(PL_curcop, PL_multi_start); 10949 PL_parser->herelines = herelines; 10950 10951 /* if we allocated too much space, give some back */ 10952 if (SvCUR(sv) + 5 < SvLEN(sv)) { 10953 SvLEN_set(sv, SvCUR(sv) + 1); 10954 SvPV_renew(sv, SvLEN(sv)); 10955 } 10956 10957 /* decide whether this is the first or second quoted string we've read 10958 for this op 10959 */ 10960 10961 if (PL_lex_stuff) 10962 PL_sublex_info.repl = sv; 10963 else 10964 PL_lex_stuff = sv; 10965 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s; 10966 return s; 10967 } 10968 10969 /* 10970 scan_num 10971 takes: pointer to position in buffer 10972 returns: pointer to new position in buffer 10973 side-effects: builds ops for the constant in pl_yylval.op 10974 10975 Read a number in any of the formats that Perl accepts: 10976 10977 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12. 10978 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34 10979 0b[01](_?[01])* 10980 0[0-7](_?[0-7])* 10981 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* 10982 10983 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the 10984 thing it reads. 10985 10986 If it reads a number without a decimal point or an exponent, it will 10987 try converting the number to an integer and see if it can do so 10988 without loss of precision. 10989 */ 10990 10991 char * 10992 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) 10993 { 10994 dVAR; 10995 const char *s = start; /* current position in buffer */ 10996 char *d; /* destination in temp buffer */ 10997 char *e; /* end of temp buffer */ 10998 NV nv; /* number read, as a double */ 10999 SV *sv = NULL; /* place to put the converted number */ 11000 bool floatit; /* boolean: int or float? */ 11001 const char *lastub = NULL; /* position of last underbar */ 11002 static const char* const number_too_long = "Number too long"; 11003 11004 PERL_ARGS_ASSERT_SCAN_NUM; 11005 11006 /* We use the first character to decide what type of number this is */ 11007 11008 switch (*s) { 11009 default: 11010 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s); 11011 11012 /* if it starts with a 0, it could be an octal number, a decimal in 11013 0.13 disguise, or a hexadecimal number, or a binary number. */ 11014 case '0': 11015 { 11016 /* variables: 11017 u holds the "number so far" 11018 shift the power of 2 of the base 11019 (hex == 4, octal == 3, binary == 1) 11020 overflowed was the number more than we can hold? 11021 11022 Shift is used when we add a digit. It also serves as an "are 11023 we in octal/hex/binary?" indicator to disallow hex characters 11024 when in octal mode. 11025 */ 11026 NV n = 0.0; 11027 UV u = 0; 11028 I32 shift; 11029 bool overflowed = FALSE; 11030 bool just_zero = TRUE; /* just plain 0 or binary number? */ 11031 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 }; 11032 static const char* const bases[5] = 11033 { "", "binary", "", "octal", "hexadecimal" }; 11034 static const char* const Bases[5] = 11035 { "", "Binary", "", "Octal", "Hexadecimal" }; 11036 static const char* const maxima[5] = 11037 { "", 11038 "0b11111111111111111111111111111111", 11039 "", 11040 "037777777777", 11041 "0xffffffff" }; 11042 const char *base, *Base, *max; 11043 11044 /* check for hex */ 11045 if (s[1] == 'x' || s[1] == 'X') { 11046 shift = 4; 11047 s += 2; 11048 just_zero = FALSE; 11049 } else if (s[1] == 'b' || s[1] == 'B') { 11050 shift = 1; 11051 s += 2; 11052 just_zero = FALSE; 11053 } 11054 /* check for a decimal in disguise */ 11055 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E') 11056 goto decimal; 11057 /* so it must be octal */ 11058 else { 11059 shift = 3; 11060 s++; 11061 } 11062 11063 if (*s == '_') { 11064 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11065 "Misplaced _ in number"); 11066 lastub = s++; 11067 } 11068 11069 base = bases[shift]; 11070 Base = Bases[shift]; 11071 max = maxima[shift]; 11072 11073 /* read the rest of the number */ 11074 for (;;) { 11075 /* x is used in the overflow test, 11076 b is the digit we're adding on. */ 11077 UV x, b; 11078 11079 switch (*s) { 11080 11081 /* if we don't mention it, we're done */ 11082 default: 11083 goto out; 11084 11085 /* _ are ignored -- but warned about if consecutive */ 11086 case '_': 11087 if (lastub && s == lastub + 1) 11088 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11089 "Misplaced _ in number"); 11090 lastub = s++; 11091 break; 11092 11093 /* 8 and 9 are not octal */ 11094 case '8': case '9': 11095 if (shift == 3) 11096 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s)); 11097 /* FALL THROUGH */ 11098 11099 /* octal digits */ 11100 case '2': case '3': case '4': 11101 case '5': case '6': case '7': 11102 if (shift == 1) 11103 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s)); 11104 /* FALL THROUGH */ 11105 11106 case '0': case '1': 11107 b = *s++ & 15; /* ASCII digit -> value of digit */ 11108 goto digit; 11109 11110 /* hex digits */ 11111 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 11112 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 11113 /* make sure they said 0x */ 11114 if (shift != 4) 11115 goto out; 11116 b = (*s++ & 7) + 9; 11117 11118 /* Prepare to put the digit we have onto the end 11119 of the number so far. We check for overflows. 11120 */ 11121 11122 digit: 11123 just_zero = FALSE; 11124 if (!overflowed) { 11125 x = u << shift; /* make room for the digit */ 11126 11127 if ((x >> shift) != u 11128 && !(PL_hints & HINT_NEW_BINARY)) { 11129 overflowed = TRUE; 11130 n = (NV) u; 11131 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 11132 "Integer overflow in %s number", 11133 base); 11134 } else 11135 u = x | b; /* add the digit to the end */ 11136 } 11137 if (overflowed) { 11138 n *= nvshift[shift]; 11139 /* If an NV has not enough bits in its 11140 * mantissa to represent an UV this summing of 11141 * small low-order numbers is a waste of time 11142 * (because the NV cannot preserve the 11143 * low-order bits anyway): we could just 11144 * remember when did we overflow and in the 11145 * end just multiply n by the right 11146 * amount. */ 11147 n += (NV) b; 11148 } 11149 break; 11150 } 11151 } 11152 11153 /* if we get here, we had success: make a scalar value from 11154 the number. 11155 */ 11156 out: 11157 11158 /* final misplaced underbar check */ 11159 if (s[-1] == '_') { 11160 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); 11161 } 11162 11163 if (overflowed) { 11164 if (n > 4294967295.0) 11165 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 11166 "%s number > %s non-portable", 11167 Base, max); 11168 sv = newSVnv(n); 11169 } 11170 else { 11171 #if UVSIZE > 4 11172 if (u > 0xffffffff) 11173 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 11174 "%s number > %s non-portable", 11175 Base, max); 11176 #endif 11177 sv = newSVuv(u); 11178 } 11179 if (just_zero && (PL_hints & HINT_NEW_INTEGER)) 11180 sv = new_constant(start, s - start, "integer", 11181 sv, NULL, NULL, 0); 11182 else if (PL_hints & HINT_NEW_BINARY) 11183 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0); 11184 } 11185 break; 11186 11187 /* 11188 handle decimal numbers. 11189 we're also sent here when we read a 0 as the first digit 11190 */ 11191 case '1': case '2': case '3': case '4': case '5': 11192 case '6': case '7': case '8': case '9': case '.': 11193 decimal: 11194 d = PL_tokenbuf; 11195 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */ 11196 floatit = FALSE; 11197 11198 /* read next group of digits and _ and copy into d */ 11199 while (isDIGIT(*s) || *s == '_') { 11200 /* skip underscores, checking for misplaced ones 11201 if -w is on 11202 */ 11203 if (*s == '_') { 11204 if (lastub && s == lastub + 1) 11205 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11206 "Misplaced _ in number"); 11207 lastub = s++; 11208 } 11209 else { 11210 /* check for end of fixed-length buffer */ 11211 if (d >= e) 11212 Perl_croak(aTHX_ "%s", number_too_long); 11213 /* if we're ok, copy the character */ 11214 *d++ = *s++; 11215 } 11216 } 11217 11218 /* final misplaced underbar check */ 11219 if (lastub && s == lastub + 1) { 11220 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); 11221 } 11222 11223 /* read a decimal portion if there is one. avoid 11224 3..5 being interpreted as the number 3. followed 11225 by .5 11226 */ 11227 if (*s == '.' && s[1] != '.') { 11228 floatit = TRUE; 11229 *d++ = *s++; 11230 11231 if (*s == '_') { 11232 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11233 "Misplaced _ in number"); 11234 lastub = s; 11235 } 11236 11237 /* copy, ignoring underbars, until we run out of digits. 11238 */ 11239 for (; isDIGIT(*s) || *s == '_'; s++) { 11240 /* fixed length buffer check */ 11241 if (d >= e) 11242 Perl_croak(aTHX_ "%s", number_too_long); 11243 if (*s == '_') { 11244 if (lastub && s == lastub + 1) 11245 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11246 "Misplaced _ in number"); 11247 lastub = s; 11248 } 11249 else 11250 *d++ = *s; 11251 } 11252 /* fractional part ending in underbar? */ 11253 if (s[-1] == '_') { 11254 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11255 "Misplaced _ in number"); 11256 } 11257 if (*s == '.' && isDIGIT(s[1])) { 11258 /* oops, it's really a v-string, but without the "v" */ 11259 s = start; 11260 goto vstring; 11261 } 11262 } 11263 11264 /* read exponent part, if present */ 11265 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) { 11266 floatit = TRUE; 11267 s++; 11268 11269 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */ 11270 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */ 11271 11272 /* stray preinitial _ */ 11273 if (*s == '_') { 11274 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11275 "Misplaced _ in number"); 11276 lastub = s++; 11277 } 11278 11279 /* allow positive or negative exponent */ 11280 if (*s == '+' || *s == '-') 11281 *d++ = *s++; 11282 11283 /* stray initial _ */ 11284 if (*s == '_') { 11285 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11286 "Misplaced _ in number"); 11287 lastub = s++; 11288 } 11289 11290 /* read digits of exponent */ 11291 while (isDIGIT(*s) || *s == '_') { 11292 if (isDIGIT(*s)) { 11293 if (d >= e) 11294 Perl_croak(aTHX_ "%s", number_too_long); 11295 *d++ = *s++; 11296 } 11297 else { 11298 if (((lastub && s == lastub + 1) || 11299 (!isDIGIT(s[1]) && s[1] != '_'))) 11300 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11301 "Misplaced _ in number"); 11302 lastub = s++; 11303 } 11304 } 11305 } 11306 11307 11308 /* 11309 We try to do an integer conversion first if no characters 11310 indicating "float" have been found. 11311 */ 11312 11313 if (!floatit) { 11314 UV uv; 11315 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv); 11316 11317 if (flags == IS_NUMBER_IN_UV) { 11318 if (uv <= IV_MAX) 11319 sv = newSViv(uv); /* Prefer IVs over UVs. */ 11320 else 11321 sv = newSVuv(uv); 11322 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) { 11323 if (uv <= (UV) IV_MIN) 11324 sv = newSViv(-(IV)uv); 11325 else 11326 floatit = TRUE; 11327 } else 11328 floatit = TRUE; 11329 } 11330 if (floatit) { 11331 STORE_NUMERIC_LOCAL_SET_STANDARD(); 11332 /* terminate the string */ 11333 *d = '\0'; 11334 nv = Atof(PL_tokenbuf); 11335 RESTORE_NUMERIC_LOCAL(); 11336 sv = newSVnv(nv); 11337 } 11338 11339 if ( floatit 11340 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) { 11341 const char *const key = floatit ? "float" : "integer"; 11342 const STRLEN keylen = floatit ? 5 : 7; 11343 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf, 11344 key, keylen, sv, NULL, NULL, 0); 11345 } 11346 break; 11347 11348 /* if it starts with a v, it could be a v-string */ 11349 case 'v': 11350 vstring: 11351 sv = newSV(5); /* preallocate storage space */ 11352 ENTER_with_name("scan_vstring"); 11353 SAVEFREESV(sv); 11354 s = scan_vstring(s, PL_bufend, sv); 11355 SvREFCNT_inc_simple_void_NN(sv); 11356 LEAVE_with_name("scan_vstring"); 11357 break; 11358 } 11359 11360 /* make the op for the constant and return */ 11361 11362 if (sv) 11363 lvalp->opval = newSVOP(OP_CONST, 0, sv); 11364 else 11365 lvalp->opval = NULL; 11366 11367 return (char *)s; 11368 } 11369 11370 STATIC char * 11371 S_scan_formline(pTHX_ char *s) 11372 { 11373 dVAR; 11374 char *eol; 11375 char *t; 11376 SV * const stuff = newSVpvs(""); 11377 bool needargs = FALSE; 11378 bool eofmt = FALSE; 11379 #ifdef PERL_MAD 11380 char *tokenstart = s; 11381 SV* savewhite = NULL; 11382 11383 if (PL_madskills) { 11384 savewhite = PL_thiswhite; 11385 PL_thiswhite = 0; 11386 } 11387 #endif 11388 11389 PERL_ARGS_ASSERT_SCAN_FORMLINE; 11390 11391 while (!needargs) { 11392 if (*s == '.') { 11393 t = s+1; 11394 #ifdef PERL_STRICT_CR 11395 while (SPACE_OR_TAB(*t)) 11396 t++; 11397 #else 11398 while (SPACE_OR_TAB(*t) || *t == '\r') 11399 t++; 11400 #endif 11401 if (*t == '\n' || t == PL_bufend) { 11402 eofmt = TRUE; 11403 break; 11404 } 11405 } 11406 eol = (char *) memchr(s,'\n',PL_bufend-s); 11407 if (!eol++) 11408 eol = PL_bufend; 11409 if (*s != '#') { 11410 for (t = s; t < eol; t++) { 11411 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { 11412 needargs = FALSE; 11413 goto enough; /* ~~ must be first line in formline */ 11414 } 11415 if (*t == '@' || *t == '^') 11416 needargs = TRUE; 11417 } 11418 if (eol > s) { 11419 sv_catpvn(stuff, s, eol-s); 11420 #ifndef PERL_STRICT_CR 11421 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') { 11422 char *end = SvPVX(stuff) + SvCUR(stuff); 11423 end[-2] = '\n'; 11424 end[-1] = '\0'; 11425 SvCUR_set(stuff, SvCUR(stuff) - 1); 11426 } 11427 #endif 11428 } 11429 else 11430 break; 11431 } 11432 s = (char*)eol; 11433 if ((PL_rsfp || PL_parser->filtered) 11434 && PL_parser->form_lex_state == LEX_NORMAL) { 11435 bool got_some; 11436 #ifdef PERL_MAD 11437 if (PL_madskills) { 11438 if (PL_thistoken) 11439 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart); 11440 else 11441 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart); 11442 } 11443 #endif 11444 PL_bufptr = PL_bufend; 11445 COPLINE_INC_WITH_HERELINES; 11446 got_some = lex_next_chunk(0); 11447 CopLINE_dec(PL_curcop); 11448 s = PL_bufptr; 11449 #ifdef PERL_MAD 11450 tokenstart = PL_bufptr; 11451 #endif 11452 if (!got_some) 11453 break; 11454 } 11455 incline(s); 11456 } 11457 enough: 11458 if (!SvCUR(stuff) || needargs) 11459 PL_lex_state = PL_parser->form_lex_state; 11460 if (SvCUR(stuff)) { 11461 PL_expect = XSTATE; 11462 if (needargs) { 11463 const char *s2 = s; 11464 while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f' 11465 || *s2 == 013) 11466 s2++; 11467 if (*s2 == '{') { 11468 start_force(PL_curforce); 11469 PL_expect = XTERMBLOCK; 11470 NEXTVAL_NEXTTOKE.ival = 0; 11471 force_next(DO); 11472 } 11473 start_force(PL_curforce); 11474 NEXTVAL_NEXTTOKE.ival = 0; 11475 force_next(FORMLBRACK); 11476 } 11477 if (!IN_BYTES) { 11478 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff))) 11479 SvUTF8_on(stuff); 11480 else if (PL_encoding) 11481 sv_recode_to_utf8(stuff, PL_encoding); 11482 } 11483 start_force(PL_curforce); 11484 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff); 11485 force_next(THING); 11486 } 11487 else { 11488 SvREFCNT_dec(stuff); 11489 if (eofmt) 11490 PL_lex_formbrack = 0; 11491 } 11492 #ifdef PERL_MAD 11493 if (PL_madskills) { 11494 if (PL_thistoken) 11495 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart); 11496 else 11497 PL_thistoken = newSVpvn(tokenstart, s - tokenstart); 11498 PL_thiswhite = savewhite; 11499 } 11500 #endif 11501 return s; 11502 } 11503 11504 I32 11505 Perl_start_subparse(pTHX_ I32 is_format, U32 flags) 11506 { 11507 dVAR; 11508 const I32 oldsavestack_ix = PL_savestack_ix; 11509 CV* const outsidecv = PL_compcv; 11510 11511 SAVEI32(PL_subline); 11512 save_item(PL_subname); 11513 SAVESPTR(PL_compcv); 11514 11515 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV)); 11516 CvFLAGS(PL_compcv) |= flags; 11517 11518 PL_subline = CopLINE(PL_curcop); 11519 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB); 11520 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv)); 11521 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; 11522 if (outsidecv && CvPADLIST(outsidecv)) 11523 CvPADLIST(PL_compcv)->xpadl_outid = 11524 PadlistNAMES(CvPADLIST(outsidecv)); 11525 11526 return oldsavestack_ix; 11527 } 11528 11529 static int 11530 S_yywarn(pTHX_ const char *const s, U32 flags) 11531 { 11532 dVAR; 11533 11534 PERL_ARGS_ASSERT_YYWARN; 11535 11536 PL_in_eval |= EVAL_WARNONLY; 11537 yyerror_pv(s, flags); 11538 PL_in_eval &= ~EVAL_WARNONLY; 11539 return 0; 11540 } 11541 11542 int 11543 Perl_yyerror(pTHX_ const char *const s) 11544 { 11545 PERL_ARGS_ASSERT_YYERROR; 11546 return yyerror_pvn(s, strlen(s), 0); 11547 } 11548 11549 int 11550 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags) 11551 { 11552 PERL_ARGS_ASSERT_YYERROR_PV; 11553 return yyerror_pvn(s, strlen(s), flags); 11554 } 11555 11556 int 11557 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) 11558 { 11559 dVAR; 11560 const char *context = NULL; 11561 int contlen = -1; 11562 SV *msg; 11563 SV * const where_sv = newSVpvs_flags("", SVs_TEMP); 11564 int yychar = PL_parser->yychar; 11565 11566 PERL_ARGS_ASSERT_YYERROR_PVN; 11567 11568 if (!yychar || (yychar == ';' && !PL_rsfp)) 11569 sv_catpvs(where_sv, "at EOF"); 11570 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr && 11571 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr && 11572 PL_oldbufptr != PL_bufptr) { 11573 /* 11574 Only for NetWare: 11575 The code below is removed for NetWare because it abends/crashes on NetWare 11576 when the script has error such as not having the closing quotes like: 11577 if ($var eq "value) 11578 Checking of white spaces is anyway done in NetWare code. 11579 */ 11580 #ifndef NETWARE 11581 while (isSPACE(*PL_oldoldbufptr)) 11582 PL_oldoldbufptr++; 11583 #endif 11584 context = PL_oldoldbufptr; 11585 contlen = PL_bufptr - PL_oldoldbufptr; 11586 } 11587 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr && 11588 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) { 11589 /* 11590 Only for NetWare: 11591 The code below is removed for NetWare because it abends/crashes on NetWare 11592 when the script has error such as not having the closing quotes like: 11593 if ($var eq "value) 11594 Checking of white spaces is anyway done in NetWare code. 11595 */ 11596 #ifndef NETWARE 11597 while (isSPACE(*PL_oldbufptr)) 11598 PL_oldbufptr++; 11599 #endif 11600 context = PL_oldbufptr; 11601 contlen = PL_bufptr - PL_oldbufptr; 11602 } 11603 else if (yychar > 255) 11604 sv_catpvs(where_sv, "next token ???"); 11605 else if (yychar == -2) { /* YYEMPTY */ 11606 if (PL_lex_state == LEX_NORMAL || 11607 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL)) 11608 sv_catpvs(where_sv, "at end of line"); 11609 else if (PL_lex_inpat) 11610 sv_catpvs(where_sv, "within pattern"); 11611 else 11612 sv_catpvs(where_sv, "within string"); 11613 } 11614 else { 11615 sv_catpvs(where_sv, "next char "); 11616 if (yychar < 32) 11617 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); 11618 else if (isPRINT_LC(yychar)) { 11619 const char string = yychar; 11620 sv_catpvn(where_sv, &string, 1); 11621 } 11622 else 11623 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); 11624 } 11625 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP); 11626 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ", 11627 OutCopFILE(PL_curcop), 11628 (IV)(PL_parser->preambling == NOLINE 11629 ? CopLINE(PL_curcop) 11630 : PL_parser->preambling)); 11631 if (context) 11632 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n", 11633 UTF8fARG(UTF, contlen, context)); 11634 else 11635 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv)); 11636 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) { 11637 Perl_sv_catpvf(aTHX_ msg, 11638 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n", 11639 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start); 11640 PL_multi_end = 0; 11641 } 11642 if (PL_in_eval & EVAL_WARNONLY) { 11643 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg)); 11644 } 11645 else 11646 qerror(msg); 11647 if (PL_error_count >= 10) { 11648 SV * errsv; 11649 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv))) 11650 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n", 11651 SVfARG(errsv), OutCopFILE(PL_curcop)); 11652 else 11653 Perl_croak(aTHX_ "%s has too many errors.\n", 11654 OutCopFILE(PL_curcop)); 11655 } 11656 PL_in_my = 0; 11657 PL_in_my_stash = NULL; 11658 return 0; 11659 } 11660 11661 STATIC char* 11662 S_swallow_bom(pTHX_ U8 *s) 11663 { 11664 dVAR; 11665 const STRLEN slen = SvCUR(PL_linestr); 11666 11667 PERL_ARGS_ASSERT_SWALLOW_BOM; 11668 11669 switch (s[0]) { 11670 case 0xFF: 11671 if (s[1] == 0xFE) { 11672 /* UTF-16 little-endian? (or UTF-32LE?) */ 11673 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ 11674 /* diag_listed_as: Unsupported script encoding %s */ 11675 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE"); 11676 #ifndef PERL_NO_UTF16_FILTER 11677 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n"); 11678 s += 2; 11679 if (PL_bufend > (char*)s) { 11680 s = add_utf16_textfilter(s, TRUE); 11681 } 11682 #else 11683 /* diag_listed_as: Unsupported script encoding %s */ 11684 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 11685 #endif 11686 } 11687 break; 11688 case 0xFE: 11689 if (s[1] == 0xFF) { /* UTF-16 big-endian? */ 11690 #ifndef PERL_NO_UTF16_FILTER 11691 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); 11692 s += 2; 11693 if (PL_bufend > (char *)s) { 11694 s = add_utf16_textfilter(s, FALSE); 11695 } 11696 #else 11697 /* diag_listed_as: Unsupported script encoding %s */ 11698 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 11699 #endif 11700 } 11701 break; 11702 case BOM_UTF8_FIRST_BYTE: { 11703 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */ 11704 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) { 11705 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); 11706 s += len + 1; /* UTF-8 */ 11707 } 11708 break; 11709 } 11710 case 0: 11711 if (slen > 3) { 11712 if (s[1] == 0) { 11713 if (s[2] == 0xFE && s[3] == 0xFF) { 11714 /* UTF-32 big-endian */ 11715 /* diag_listed_as: Unsupported script encoding %s */ 11716 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE"); 11717 } 11718 } 11719 else if (s[2] == 0 && s[3] != 0) { 11720 /* Leading bytes 11721 * 00 xx 00 xx 11722 * are a good indicator of UTF-16BE. */ 11723 #ifndef PERL_NO_UTF16_FILTER 11724 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); 11725 s = add_utf16_textfilter(s, FALSE); 11726 #else 11727 /* diag_listed_as: Unsupported script encoding %s */ 11728 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 11729 #endif 11730 } 11731 } 11732 11733 default: 11734 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) { 11735 /* Leading bytes 11736 * xx 00 xx 00 11737 * are a good indicator of UTF-16LE. */ 11738 #ifndef PERL_NO_UTF16_FILTER 11739 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); 11740 s = add_utf16_textfilter(s, TRUE); 11741 #else 11742 /* diag_listed_as: Unsupported script encoding %s */ 11743 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 11744 #endif 11745 } 11746 } 11747 return (char*)s; 11748 } 11749 11750 11751 #ifndef PERL_NO_UTF16_FILTER 11752 static I32 11753 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) 11754 { 11755 dVAR; 11756 SV *const filter = FILTER_DATA(idx); 11757 /* We re-use this each time round, throwing the contents away before we 11758 return. */ 11759 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter)); 11760 SV *const utf8_buffer = filter; 11761 IV status = IoPAGE(filter); 11762 const bool reverse = cBOOL(IoLINES(filter)); 11763 I32 retval; 11764 11765 PERL_ARGS_ASSERT_UTF16_TEXTFILTER; 11766 11767 /* As we're automatically added, at the lowest level, and hence only called 11768 from this file, we can be sure that we're not called in block mode. Hence 11769 don't bother writing code to deal with block mode. */ 11770 if (maxlen) { 11771 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen); 11772 } 11773 if (status < 0) { 11774 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status); 11775 } 11776 DEBUG_P(PerlIO_printf(Perl_debug_log, 11777 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", 11778 FPTR2DPTR(void *, S_utf16_textfilter), 11779 reverse ? 'l' : 'b', idx, maxlen, status, 11780 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 11781 11782 while (1) { 11783 STRLEN chars; 11784 STRLEN have; 11785 I32 newlen; 11786 U8 *end; 11787 /* First, look in our buffer of existing UTF-8 data: */ 11788 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer)); 11789 11790 if (nl) { 11791 ++nl; 11792 } else if (status == 0) { 11793 /* EOF */ 11794 IoPAGE(filter) = 0; 11795 nl = SvEND(utf8_buffer); 11796 } 11797 if (nl) { 11798 STRLEN got = nl - SvPVX(utf8_buffer); 11799 /* Did we have anything to append? */ 11800 retval = got != 0; 11801 sv_catpvn(sv, SvPVX(utf8_buffer), got); 11802 /* Everything else in this code works just fine if SVp_POK isn't 11803 set. This, however, needs it, and we need it to work, else 11804 we loop infinitely because the buffer is never consumed. */ 11805 sv_chop(utf8_buffer, nl); 11806 break; 11807 } 11808 11809 /* OK, not a complete line there, so need to read some more UTF-16. 11810 Read an extra octect if the buffer currently has an odd number. */ 11811 while (1) { 11812 if (status <= 0) 11813 break; 11814 if (SvCUR(utf16_buffer) >= 2) { 11815 /* Location of the high octet of the last complete code point. 11816 Gosh, UTF-16 is a pain. All the benefits of variable length, 11817 *coupled* with all the benefits of partial reads and 11818 endianness. */ 11819 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer) 11820 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2)); 11821 11822 if (*last_hi < 0xd8 || *last_hi > 0xdb) { 11823 break; 11824 } 11825 11826 /* We have the first half of a surrogate. Read more. */ 11827 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi)); 11828 } 11829 11830 status = FILTER_READ(idx + 1, utf16_buffer, 11831 160 + (SvCUR(utf16_buffer) & 1)); 11832 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer))); 11833 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);}); 11834 if (status < 0) { 11835 /* Error */ 11836 IoPAGE(filter) = status; 11837 return status; 11838 } 11839 } 11840 11841 chars = SvCUR(utf16_buffer) >> 1; 11842 have = SvCUR(utf8_buffer); 11843 SvGROW(utf8_buffer, have + chars * 3 + 1); 11844 11845 if (reverse) { 11846 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), 11847 (U8*)SvPVX_const(utf8_buffer) + have, 11848 chars * 2, &newlen); 11849 } else { 11850 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer), 11851 (U8*)SvPVX_const(utf8_buffer) + have, 11852 chars * 2, &newlen); 11853 } 11854 SvCUR_set(utf8_buffer, have + newlen); 11855 *end = '\0'; 11856 11857 /* No need to keep this SV "well-formed" with a '\0' after the end, as 11858 it's private to us, and utf16_to_utf8{,reversed} take a 11859 (pointer,length) pair, rather than a NUL-terminated string. */ 11860 if(SvCUR(utf16_buffer) & 1) { 11861 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1]; 11862 SvCUR_set(utf16_buffer, 1); 11863 } else { 11864 SvCUR_set(utf16_buffer, 0); 11865 } 11866 } 11867 DEBUG_P(PerlIO_printf(Perl_debug_log, 11868 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", 11869 status, 11870 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 11871 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);}); 11872 return retval; 11873 } 11874 11875 static U8 * 11876 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed) 11877 { 11878 SV *filter = filter_add(S_utf16_textfilter, NULL); 11879 11880 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER; 11881 11882 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s)); 11883 sv_setpvs(filter, ""); 11884 IoLINES(filter) = reversed; 11885 IoPAGE(filter) = 1; /* Not EOF */ 11886 11887 /* Sadly, we have to return a valid pointer, come what may, so we have to 11888 ignore any error return from this. */ 11889 SvCUR_set(PL_linestr, 0); 11890 if (FILTER_READ(0, PL_linestr, 0)) { 11891 SvUTF8_on(PL_linestr); 11892 } else { 11893 SvUTF8_on(PL_linestr); 11894 } 11895 PL_bufend = SvEND(PL_linestr); 11896 return (U8*)SvPVX(PL_linestr); 11897 } 11898 #endif 11899 11900 /* 11901 Returns a pointer to the next character after the parsed 11902 vstring, as well as updating the passed in sv. 11903 11904 Function must be called like 11905 11906 sv = sv_2mortal(newSV(5)); 11907 s = scan_vstring(s,e,sv); 11908 11909 where s and e are the start and end of the string. 11910 The sv should already be large enough to store the vstring 11911 passed in, for performance reasons. 11912 11913 This function may croak if fatal warnings are enabled in the 11914 calling scope, hence the sv_2mortal in the example (to prevent 11915 a leak). Make sure to do SvREFCNT_inc afterwards if you use 11916 sv_2mortal. 11917 11918 */ 11919 11920 char * 11921 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) 11922 { 11923 dVAR; 11924 const char *pos = s; 11925 const char *start = s; 11926 11927 PERL_ARGS_ASSERT_SCAN_VSTRING; 11928 11929 if (*pos == 'v') pos++; /* get past 'v' */ 11930 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 11931 pos++; 11932 if ( *pos != '.') { 11933 /* this may not be a v-string if followed by => */ 11934 const char *next = pos; 11935 while (next < e && isSPACE(*next)) 11936 ++next; 11937 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) { 11938 /* return string not v-string */ 11939 sv_setpvn(sv,(char *)s,pos-s); 11940 return (char *)pos; 11941 } 11942 } 11943 11944 if (!isALPHA(*pos)) { 11945 U8 tmpbuf[UTF8_MAXBYTES+1]; 11946 11947 if (*s == 'v') 11948 s++; /* get past 'v' */ 11949 11950 sv_setpvs(sv, ""); 11951 11952 for (;;) { 11953 /* this is atoi() that tolerates underscores */ 11954 U8 *tmpend; 11955 UV rev = 0; 11956 const char *end = pos; 11957 UV mult = 1; 11958 while (--end >= s) { 11959 if (*end != '_') { 11960 const UV orev = rev; 11961 rev += (*end - '0') * mult; 11962 mult *= 10; 11963 if (orev > rev) 11964 /* diag_listed_as: Integer overflow in %s number */ 11965 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 11966 "Integer overflow in decimal number"); 11967 } 11968 } 11969 #ifdef EBCDIC 11970 if (rev > 0x7FFFFFFF) 11971 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647"); 11972 #endif 11973 /* Append native character for the rev point */ 11974 tmpend = uvchr_to_utf8(tmpbuf, rev); 11975 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); 11976 if (!UVCHR_IS_INVARIANT(rev)) 11977 SvUTF8_on(sv); 11978 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1])) 11979 s = ++pos; 11980 else { 11981 s = pos; 11982 break; 11983 } 11984 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 11985 pos++; 11986 } 11987 SvPOK_on(sv); 11988 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start); 11989 SvRMAGICAL_on(sv); 11990 } 11991 return (char *)s; 11992 } 11993 11994 int 11995 Perl_keyword_plugin_standard(pTHX_ 11996 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) 11997 { 11998 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD; 11999 PERL_UNUSED_CONTEXT; 12000 PERL_UNUSED_ARG(keyword_ptr); 12001 PERL_UNUSED_ARG(keyword_len); 12002 PERL_UNUSED_ARG(op_ptr); 12003 return KEYWORD_PLUGIN_DECLINE; 12004 } 12005 12006 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p) 12007 static void 12008 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof) 12009 { 12010 SAVEI32(PL_lex_brackets); 12011 if (PL_lex_brackets > 100) 12012 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 12013 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF; 12014 SAVEI32(PL_lex_allbrackets); 12015 PL_lex_allbrackets = 0; 12016 SAVEI8(PL_lex_fakeeof); 12017 PL_lex_fakeeof = (U8)fakeeof; 12018 if(yyparse(gramtype) && !PL_parser->error_count) 12019 qerror(Perl_mess(aTHX_ "Parse error")); 12020 } 12021 12022 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p) 12023 static OP * 12024 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof) 12025 { 12026 OP *o; 12027 ENTER; 12028 SAVEVPTR(PL_eval_root); 12029 PL_eval_root = NULL; 12030 parse_recdescent(gramtype, fakeeof); 12031 o = PL_eval_root; 12032 LEAVE; 12033 return o; 12034 } 12035 12036 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f) 12037 static OP * 12038 S_parse_expr(pTHX_ I32 fakeeof, U32 flags) 12039 { 12040 OP *exprop; 12041 if (flags & ~PARSE_OPTIONAL) 12042 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr"); 12043 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof); 12044 if (!exprop && !(flags & PARSE_OPTIONAL)) { 12045 if (!PL_parser->error_count) 12046 qerror(Perl_mess(aTHX_ "Parse error")); 12047 exprop = newOP(OP_NULL, 0); 12048 } 12049 return exprop; 12050 } 12051 12052 /* 12053 =for apidoc Amx|OP *|parse_arithexpr|U32 flags 12054 12055 Parse a Perl arithmetic expression. This may contain operators of precedence 12056 down to the bit shift operators. The expression must be followed (and thus 12057 terminated) either by a comparison or lower-precedence operator or by 12058 something that would normally terminate an expression such as semicolon. 12059 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional, 12060 otherwise it is mandatory. It is up to the caller to ensure that the 12061 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 12062 the source of the code to be parsed and the lexical context for the 12063 expression. 12064 12065 The op tree representing the expression is returned. If an optional 12066 expression is absent, a null pointer is returned, otherwise the pointer 12067 will be non-null. 12068 12069 If an error occurs in parsing or compilation, in most cases a valid op 12070 tree is returned anyway. The error is reflected in the parser state, 12071 normally resulting in a single exception at the top level of parsing 12072 which covers all the compilation errors that occurred. Some compilation 12073 errors, however, will throw an exception immediately. 12074 12075 =cut 12076 */ 12077 12078 OP * 12079 Perl_parse_arithexpr(pTHX_ U32 flags) 12080 { 12081 return parse_expr(LEX_FAKEEOF_COMPARE, flags); 12082 } 12083 12084 /* 12085 =for apidoc Amx|OP *|parse_termexpr|U32 flags 12086 12087 Parse a Perl term expression. This may contain operators of precedence 12088 down to the assignment operators. The expression must be followed (and thus 12089 terminated) either by a comma or lower-precedence operator or by 12090 something that would normally terminate an expression such as semicolon. 12091 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional, 12092 otherwise it is mandatory. It is up to the caller to ensure that the 12093 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 12094 the source of the code to be parsed and the lexical context for the 12095 expression. 12096 12097 The op tree representing the expression is returned. If an optional 12098 expression is absent, a null pointer is returned, otherwise the pointer 12099 will be non-null. 12100 12101 If an error occurs in parsing or compilation, in most cases a valid op 12102 tree is returned anyway. The error is reflected in the parser state, 12103 normally resulting in a single exception at the top level of parsing 12104 which covers all the compilation errors that occurred. Some compilation 12105 errors, however, will throw an exception immediately. 12106 12107 =cut 12108 */ 12109 12110 OP * 12111 Perl_parse_termexpr(pTHX_ U32 flags) 12112 { 12113 return parse_expr(LEX_FAKEEOF_COMMA, flags); 12114 } 12115 12116 /* 12117 =for apidoc Amx|OP *|parse_listexpr|U32 flags 12118 12119 Parse a Perl list expression. This may contain operators of precedence 12120 down to the comma operator. The expression must be followed (and thus 12121 terminated) either by a low-precedence logic operator such as C<or> or by 12122 something that would normally terminate an expression such as semicolon. 12123 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional, 12124 otherwise it is mandatory. It is up to the caller to ensure that the 12125 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 12126 the source of the code to be parsed and the lexical context for the 12127 expression. 12128 12129 The op tree representing the expression is returned. If an optional 12130 expression is absent, a null pointer is returned, otherwise the pointer 12131 will be non-null. 12132 12133 If an error occurs in parsing or compilation, in most cases a valid op 12134 tree is returned anyway. The error is reflected in the parser state, 12135 normally resulting in a single exception at the top level of parsing 12136 which covers all the compilation errors that occurred. Some compilation 12137 errors, however, will throw an exception immediately. 12138 12139 =cut 12140 */ 12141 12142 OP * 12143 Perl_parse_listexpr(pTHX_ U32 flags) 12144 { 12145 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags); 12146 } 12147 12148 /* 12149 =for apidoc Amx|OP *|parse_fullexpr|U32 flags 12150 12151 Parse a single complete Perl expression. This allows the full 12152 expression grammar, including the lowest-precedence operators such 12153 as C<or>. The expression must be followed (and thus terminated) by a 12154 token that an expression would normally be terminated by: end-of-file, 12155 closing bracketing punctuation, semicolon, or one of the keywords that 12156 signals a postfix expression-statement modifier. If I<flags> includes 12157 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is 12158 mandatory. It is up to the caller to ensure that the dynamic parser 12159 state (L</PL_parser> et al) is correctly set to reflect the source of 12160 the code to be parsed and the lexical context for the expression. 12161 12162 The op tree representing the expression is returned. If an optional 12163 expression is absent, a null pointer is returned, otherwise the pointer 12164 will be non-null. 12165 12166 If an error occurs in parsing or compilation, in most cases a valid op 12167 tree is returned anyway. The error is reflected in the parser state, 12168 normally resulting in a single exception at the top level of parsing 12169 which covers all the compilation errors that occurred. Some compilation 12170 errors, however, will throw an exception immediately. 12171 12172 =cut 12173 */ 12174 12175 OP * 12176 Perl_parse_fullexpr(pTHX_ U32 flags) 12177 { 12178 return parse_expr(LEX_FAKEEOF_NONEXPR, flags); 12179 } 12180 12181 /* 12182 =for apidoc Amx|OP *|parse_block|U32 flags 12183 12184 Parse a single complete Perl code block. This consists of an opening 12185 brace, a sequence of statements, and a closing brace. The block 12186 constitutes a lexical scope, so C<my> variables and various compile-time 12187 effects can be contained within it. It is up to the caller to ensure 12188 that the dynamic parser state (L</PL_parser> et al) is correctly set to 12189 reflect the source of the code to be parsed and the lexical context for 12190 the statement. 12191 12192 The op tree representing the code block is returned. This is always a 12193 real op, never a null pointer. It will normally be a C<lineseq> list, 12194 including C<nextstate> or equivalent ops. No ops to construct any kind 12195 of runtime scope are included by virtue of it being a block. 12196 12197 If an error occurs in parsing or compilation, in most cases a valid op 12198 tree (most likely null) is returned anyway. The error is reflected in 12199 the parser state, normally resulting in a single exception at the top 12200 level of parsing which covers all the compilation errors that occurred. 12201 Some compilation errors, however, will throw an exception immediately. 12202 12203 The I<flags> parameter is reserved for future use, and must always 12204 be zero. 12205 12206 =cut 12207 */ 12208 12209 OP * 12210 Perl_parse_block(pTHX_ U32 flags) 12211 { 12212 if (flags) 12213 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block"); 12214 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER); 12215 } 12216 12217 /* 12218 =for apidoc Amx|OP *|parse_barestmt|U32 flags 12219 12220 Parse a single unadorned Perl statement. This may be a normal imperative 12221 statement or a declaration that has compile-time effect. It does not 12222 include any label or other affixture. It is up to the caller to ensure 12223 that the dynamic parser state (L</PL_parser> et al) is correctly set to 12224 reflect the source of the code to be parsed and the lexical context for 12225 the statement. 12226 12227 The op tree representing the statement is returned. This may be a 12228 null pointer if the statement is null, for example if it was actually 12229 a subroutine definition (which has compile-time side effects). If not 12230 null, it will be ops directly implementing the statement, suitable to 12231 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or 12232 equivalent op (except for those embedded in a scope contained entirely 12233 within the statement). 12234 12235 If an error occurs in parsing or compilation, in most cases a valid op 12236 tree (most likely null) is returned anyway. The error is reflected in 12237 the parser state, normally resulting in a single exception at the top 12238 level of parsing which covers all the compilation errors that occurred. 12239 Some compilation errors, however, will throw an exception immediately. 12240 12241 The I<flags> parameter is reserved for future use, and must always 12242 be zero. 12243 12244 =cut 12245 */ 12246 12247 OP * 12248 Perl_parse_barestmt(pTHX_ U32 flags) 12249 { 12250 if (flags) 12251 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt"); 12252 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER); 12253 } 12254 12255 /* 12256 =for apidoc Amx|SV *|parse_label|U32 flags 12257 12258 Parse a single label, possibly optional, of the type that may prefix a 12259 Perl statement. It is up to the caller to ensure that the dynamic parser 12260 state (L</PL_parser> et al) is correctly set to reflect the source of 12261 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the 12262 label is optional, otherwise it is mandatory. 12263 12264 The name of the label is returned in the form of a fresh scalar. If an 12265 optional label is absent, a null pointer is returned. 12266 12267 If an error occurs in parsing, which can only occur if the label is 12268 mandatory, a valid label is returned anyway. The error is reflected in 12269 the parser state, normally resulting in a single exception at the top 12270 level of parsing which covers all the compilation errors that occurred. 12271 12272 =cut 12273 */ 12274 12275 SV * 12276 Perl_parse_label(pTHX_ U32 flags) 12277 { 12278 if (flags & ~PARSE_OPTIONAL) 12279 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label"); 12280 if (PL_lex_state == LEX_KNOWNEXT) { 12281 PL_parser->yychar = yylex(); 12282 if (PL_parser->yychar == LABEL) { 12283 char * const lpv = pl_yylval.pval; 12284 STRLEN llen = strlen(lpv); 12285 PL_parser->yychar = YYEMPTY; 12286 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0); 12287 } else { 12288 yyunlex(); 12289 goto no_label; 12290 } 12291 } else { 12292 char *s, *t; 12293 STRLEN wlen, bufptr_pos; 12294 lex_read_space(0); 12295 t = s = PL_bufptr; 12296 if (!isIDFIRST_lazy_if(s, UTF)) 12297 goto no_label; 12298 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen); 12299 if (word_takes_any_delimeter(s, wlen)) 12300 goto no_label; 12301 bufptr_pos = s - SvPVX(PL_linestr); 12302 PL_bufptr = t; 12303 lex_read_space(LEX_KEEP_PREVIOUS); 12304 t = PL_bufptr; 12305 s = SvPVX(PL_linestr) + bufptr_pos; 12306 if (t[0] == ':' && t[1] != ':') { 12307 PL_oldoldbufptr = PL_oldbufptr; 12308 PL_oldbufptr = s; 12309 PL_bufptr = t+1; 12310 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0); 12311 } else { 12312 PL_bufptr = s; 12313 no_label: 12314 if (flags & PARSE_OPTIONAL) { 12315 return NULL; 12316 } else { 12317 qerror(Perl_mess(aTHX_ "Parse error")); 12318 return newSVpvs("x"); 12319 } 12320 } 12321 } 12322 } 12323 12324 /* 12325 =for apidoc Amx|OP *|parse_fullstmt|U32 flags 12326 12327 Parse a single complete Perl statement. This may be a normal imperative 12328 statement or a declaration that has compile-time effect, and may include 12329 optional labels. It is up to the caller to ensure that the dynamic 12330 parser state (L</PL_parser> et al) is correctly set to reflect the source 12331 of the code to be parsed and the lexical context for the statement. 12332 12333 The op tree representing the statement is returned. This may be a 12334 null pointer if the statement is null, for example if it was actually 12335 a subroutine definition (which has compile-time side effects). If not 12336 null, it will be the result of a L</newSTATEOP> call, normally including 12337 a C<nextstate> or equivalent op. 12338 12339 If an error occurs in parsing or compilation, in most cases a valid op 12340 tree (most likely null) is returned anyway. The error is reflected in 12341 the parser state, normally resulting in a single exception at the top 12342 level of parsing which covers all the compilation errors that occurred. 12343 Some compilation errors, however, will throw an exception immediately. 12344 12345 The I<flags> parameter is reserved for future use, and must always 12346 be zero. 12347 12348 =cut 12349 */ 12350 12351 OP * 12352 Perl_parse_fullstmt(pTHX_ U32 flags) 12353 { 12354 if (flags) 12355 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt"); 12356 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER); 12357 } 12358 12359 /* 12360 =for apidoc Amx|OP *|parse_stmtseq|U32 flags 12361 12362 Parse a sequence of zero or more Perl statements. These may be normal 12363 imperative statements, including optional labels, or declarations 12364 that have compile-time effect, or any mixture thereof. The statement 12365 sequence ends when a closing brace or end-of-file is encountered in a 12366 place where a new statement could have validly started. It is up to 12367 the caller to ensure that the dynamic parser state (L</PL_parser> et al) 12368 is correctly set to reflect the source of the code to be parsed and the 12369 lexical context for the statements. 12370 12371 The op tree representing the statement sequence is returned. This may 12372 be a null pointer if the statements were all null, for example if there 12373 were no statements or if there were only subroutine definitions (which 12374 have compile-time side effects). If not null, it will be a C<lineseq> 12375 list, normally including C<nextstate> or equivalent ops. 12376 12377 If an error occurs in parsing or compilation, in most cases a valid op 12378 tree is returned anyway. The error is reflected in the parser state, 12379 normally resulting in a single exception at the top level of parsing 12380 which covers all the compilation errors that occurred. Some compilation 12381 errors, however, will throw an exception immediately. 12382 12383 The I<flags> parameter is reserved for future use, and must always 12384 be zero. 12385 12386 =cut 12387 */ 12388 12389 OP * 12390 Perl_parse_stmtseq(pTHX_ U32 flags) 12391 { 12392 OP *stmtseqop; 12393 I32 c; 12394 if (flags) 12395 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq"); 12396 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING); 12397 c = lex_peek_unichar(0); 12398 if (c != -1 && c != /*{*/'}') 12399 qerror(Perl_mess(aTHX_ "Parse error")); 12400 return stmtseqop; 12401 } 12402 12403 #define lex_token_boundary() S_lex_token_boundary(aTHX) 12404 static void 12405 S_lex_token_boundary(pTHX) 12406 { 12407 PL_oldoldbufptr = PL_oldbufptr; 12408 PL_oldbufptr = PL_bufptr; 12409 } 12410 12411 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX) 12412 static OP * 12413 S_parse_opt_lexvar(pTHX) 12414 { 12415 I32 sigil, c; 12416 char *s, *d; 12417 OP *var; 12418 lex_token_boundary(); 12419 sigil = lex_read_unichar(0); 12420 if (lex_peek_unichar(0) == '#') { 12421 qerror(Perl_mess(aTHX_ "Parse error")); 12422 return NULL; 12423 } 12424 lex_read_space(0); 12425 c = lex_peek_unichar(0); 12426 if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c))) 12427 return NULL; 12428 s = PL_bufptr; 12429 d = PL_tokenbuf + 1; 12430 PL_tokenbuf[0] = (char)sigil; 12431 parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF)); 12432 PL_bufptr = s; 12433 if (d == PL_tokenbuf+1) 12434 return NULL; 12435 *d = 0; 12436 var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV, 12437 OPf_MOD | (OPpLVAL_INTRO<<8)); 12438 var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0); 12439 return var; 12440 } 12441 12442 OP * 12443 Perl_parse_subsignature(pTHX) 12444 { 12445 I32 c; 12446 int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0; 12447 OP *initops = NULL; 12448 lex_read_space(0); 12449 c = lex_peek_unichar(0); 12450 while (c != /*(*/')') { 12451 switch (c) { 12452 case '$': { 12453 OP *var, *expr; 12454 if (prev_type == 2) 12455 qerror(Perl_mess(aTHX_ "Slurpy parameter not last")); 12456 var = parse_opt_lexvar(); 12457 expr = var ? 12458 newBINOP(OP_AELEM, 0, 12459 ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)), 12460 OP_RV2AV), 12461 newSVOP(OP_CONST, 0, newSViv(pos))) : 12462 NULL; 12463 lex_read_space(0); 12464 c = lex_peek_unichar(0); 12465 if (c == '=') { 12466 lex_token_boundary(); 12467 lex_read_unichar(0); 12468 lex_read_space(0); 12469 c = lex_peek_unichar(0); 12470 if (c == ',' || c == /*(*/')') { 12471 if (var) 12472 qerror(Perl_mess(aTHX_ "Optional parameter " 12473 "lacks default expression")); 12474 } else { 12475 OP *defexpr = parse_termexpr(0); 12476 if (defexpr->op_type == OP_UNDEF && 12477 !(defexpr->op_flags & OPf_KIDS)) { 12478 op_free(defexpr); 12479 } else { 12480 OP *ifop = 12481 newBINOP(OP_GE, 0, 12482 scalar(newUNOP(OP_RV2AV, 0, 12483 newGVOP(OP_GV, 0, PL_defgv))), 12484 newSVOP(OP_CONST, 0, newSViv(pos+1))); 12485 expr = var ? 12486 newCONDOP(0, ifop, expr, defexpr) : 12487 newLOGOP(OP_OR, 0, ifop, defexpr); 12488 } 12489 } 12490 prev_type = 1; 12491 } else { 12492 if (prev_type == 1) 12493 qerror(Perl_mess(aTHX_ "Mandatory parameter " 12494 "follows optional parameter")); 12495 prev_type = 0; 12496 min_arity = pos + 1; 12497 } 12498 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr); 12499 if (expr) 12500 initops = op_append_list(OP_LINESEQ, initops, 12501 newSTATEOP(0, NULL, expr)); 12502 max_arity = ++pos; 12503 } break; 12504 case '@': 12505 case '%': { 12506 OP *var; 12507 if (prev_type == 2) 12508 qerror(Perl_mess(aTHX_ "Slurpy parameter not last")); 12509 var = parse_opt_lexvar(); 12510 if (c == '%') { 12511 OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0, 12512 newBINOP(OP_BIT_AND, 0, 12513 scalar(newUNOP(OP_RV2AV, 0, 12514 newGVOP(OP_GV, 0, PL_defgv))), 12515 newSVOP(OP_CONST, 0, newSViv(1))), 12516 newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0), 12517 newSVOP(OP_CONST, 0, 12518 newSVpvs("Odd name/value argument " 12519 "for subroutine")))); 12520 if (pos != min_arity) 12521 chkop = newLOGOP(OP_AND, 0, 12522 newBINOP(OP_GT, 0, 12523 scalar(newUNOP(OP_RV2AV, 0, 12524 newGVOP(OP_GV, 0, PL_defgv))), 12525 newSVOP(OP_CONST, 0, newSViv(pos))), 12526 chkop); 12527 initops = op_append_list(OP_LINESEQ, 12528 newSTATEOP(0, NULL, chkop), 12529 initops); 12530 } 12531 if (var) { 12532 OP *slice = pos ? 12533 op_prepend_elem(OP_ASLICE, 12534 newOP(OP_PUSHMARK, 0), 12535 newLISTOP(OP_ASLICE, 0, 12536 list(newRANGE(0, 12537 newSVOP(OP_CONST, 0, newSViv(pos)), 12538 newUNOP(OP_AV2ARYLEN, 0, 12539 ref(newUNOP(OP_RV2AV, 0, 12540 newGVOP(OP_GV, 0, PL_defgv)), 12541 OP_AV2ARYLEN)))), 12542 ref(newUNOP(OP_RV2AV, 0, 12543 newGVOP(OP_GV, 0, PL_defgv)), 12544 OP_ASLICE))) : 12545 newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)); 12546 initops = op_append_list(OP_LINESEQ, initops, 12547 newSTATEOP(0, NULL, 12548 newASSIGNOP(OPf_STACKED, var, 0, slice))); 12549 } 12550 prev_type = 2; 12551 max_arity = -1; 12552 } break; 12553 default: 12554 parse_error: 12555 qerror(Perl_mess(aTHX_ "Parse error")); 12556 return NULL; 12557 } 12558 lex_read_space(0); 12559 c = lex_peek_unichar(0); 12560 switch (c) { 12561 case /*(*/')': break; 12562 case ',': 12563 do { 12564 lex_token_boundary(); 12565 lex_read_unichar(0); 12566 lex_read_space(0); 12567 c = lex_peek_unichar(0); 12568 } while (c == ','); 12569 break; 12570 default: 12571 goto parse_error; 12572 } 12573 } 12574 if (min_arity != 0) { 12575 initops = op_append_list(OP_LINESEQ, 12576 newSTATEOP(0, NULL, 12577 newLOGOP(OP_OR, 0, 12578 newBINOP(OP_GE, 0, 12579 scalar(newUNOP(OP_RV2AV, 0, 12580 newGVOP(OP_GV, 0, PL_defgv))), 12581 newSVOP(OP_CONST, 0, newSViv(min_arity))), 12582 newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0), 12583 newSVOP(OP_CONST, 0, 12584 newSVpvs("Too few arguments for subroutine"))))), 12585 initops); 12586 } 12587 if (max_arity != -1) { 12588 initops = op_append_list(OP_LINESEQ, 12589 newSTATEOP(0, NULL, 12590 newLOGOP(OP_OR, 0, 12591 newBINOP(OP_LE, 0, 12592 scalar(newUNOP(OP_RV2AV, 0, 12593 newGVOP(OP_GV, 0, PL_defgv))), 12594 newSVOP(OP_CONST, 0, newSViv(max_arity))), 12595 newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0), 12596 newSVOP(OP_CONST, 0, 12597 newSVpvs("Too many arguments for subroutine"))))), 12598 initops); 12599 } 12600 return initops; 12601 } 12602 12603 /* 12604 * Local variables: 12605 * c-indentation-style: bsd 12606 * c-basic-offset: 4 12607 * indent-tabs-mode: nil 12608 * End: 12609 * 12610 * ex: set ts=8 sts=4 sw=4 et: 12611 */ 12612