1 /* toke.c 2 * 3 * Copyright (c) 1991-2002, Larry Wall 4 * 5 * You may distribute under the terms of either the GNU General Public 6 * License or the Artistic License, as specified in the README file. 7 * 8 */ 9 10 /* 11 * "It all comes from here, the stench and the peril." --Frodo 12 */ 13 14 /* 15 * This file is the lexer for Perl. It's closely linked to the 16 * parser, perly.y. 17 * 18 * The main routine is yylex(), which returns the next token. 19 */ 20 21 #include "EXTERN.h" 22 #define PERL_IN_TOKE_C 23 #include "perl.h" 24 25 #define yychar PL_yychar 26 #define yylval PL_yylval 27 28 static char ident_too_long[] = "Identifier too long"; 29 static char c_without_g[] = "Use of /c modifier is meaningless without /g"; 30 static char c_in_subst[] = "Use of /c modifier is meaningless in s///"; 31 32 static void restore_rsfp(pTHX_ void *f); 33 #ifndef PERL_NO_UTF16_FILTER 34 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen); 35 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen); 36 #endif 37 38 #define XFAKEBRACK 128 39 #define XENUMMASK 127 40 41 #ifdef USE_UTF8_SCRIPTS 42 # define UTF (!IN_BYTES) 43 #else 44 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8)) 45 #endif 46 47 /* In variables named $^X, these are the legal values for X. 48 * 1999-02-27 mjd-perl-patch@plover.com */ 49 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) 50 51 /* On MacOS, respect nonbreaking spaces */ 52 #ifdef MACOS_TRADITIONAL 53 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t') 54 #else 55 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t') 56 #endif 57 58 /* LEX_* are values for PL_lex_state, the state of the lexer. 59 * They are arranged oddly so that the guard on the switch statement 60 * can get by with a single comparison (if the compiler is smart enough). 61 */ 62 63 /* #define LEX_NOTPARSING 11 is done in perl.h. */ 64 65 #define LEX_NORMAL 10 66 #define LEX_INTERPNORMAL 9 67 #define LEX_INTERPCASEMOD 8 68 #define LEX_INTERPPUSH 7 69 #define LEX_INTERPSTART 6 70 #define LEX_INTERPEND 5 71 #define LEX_INTERPENDMAYBE 4 72 #define LEX_INTERPCONCAT 3 73 #define LEX_INTERPCONST 2 74 #define LEX_FORMLINE 1 75 #define LEX_KNOWNEXT 0 76 77 #ifdef ff_next 78 #undef ff_next 79 #endif 80 81 #ifdef USE_PURE_BISON 82 # ifndef YYMAXLEVEL 83 # define YYMAXLEVEL 100 84 # endif 85 YYSTYPE* yylval_pointer[YYMAXLEVEL]; 86 int* yychar_pointer[YYMAXLEVEL]; 87 int yyactlevel = -1; 88 # undef yylval 89 # undef yychar 90 # define yylval (*yylval_pointer[yyactlevel]) 91 # define yychar (*yychar_pointer[yyactlevel]) 92 # define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel] 93 # undef yylex 94 # define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]) 95 #endif 96 97 #include "keywords.h" 98 99 /* CLINE is a macro that ensures PL_copline has a sane value */ 100 101 #ifdef CLINE 102 #undef CLINE 103 #endif 104 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline)) 105 106 /* 107 * Convenience functions to return different tokens and prime the 108 * lexer for the next token. They all take an argument. 109 * 110 * TOKEN : generic token (used for '(', DOLSHARP, etc) 111 * OPERATOR : generic operator 112 * AOPERATOR : assignment operator 113 * PREBLOCK : beginning the block after an if, while, foreach, ... 114 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref) 115 * PREREF : *EXPR where EXPR is not a simple identifier 116 * TERM : expression term 117 * LOOPX : loop exiting command (goto, last, dump, etc) 118 * FTST : file test operator 119 * FUN0 : zero-argument function 120 * FUN1 : not used, except for not, which isn't a UNIOP 121 * BOop : bitwise or or xor 122 * BAop : bitwise and 123 * SHop : shift operator 124 * PWop : power operator 125 * PMop : pattern-matching operator 126 * Aop : addition-level operator 127 * Mop : multiplication-level operator 128 * Eop : equality-testing operator 129 * Rop : relational operator <= != gt 130 * 131 * Also see LOP and lop() below. 132 */ 133 134 /* Note that REPORT() and REPORT2() will be expressions that supply 135 * their own trailing comma, not suitable for statements as such. */ 136 #ifdef DEBUGGING /* Serve -DT. */ 137 # define REPORT(x,retval) tokereport(x,s,(int)retval), 138 # define REPORT2(x,retval) tokereport(x,s, yylval.ival), 139 #else 140 # define REPORT(x,retval) 141 # define REPORT2(x,retval) 142 #endif 143 144 #define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval) 145 #define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval) 146 #define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)) 147 #define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval) 148 #define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval) 149 #define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval) 150 #define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval) 151 #define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX) 152 #define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERM,PL_bufptr = s,(int)UNIOP) 153 #define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0) 154 #define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1) 155 #define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP)) 156 #define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP)) 157 #define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP)) 158 #define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP)) 159 #define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP) 160 #define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP)) 161 #define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP)) 162 #define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP) 163 #define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP) 164 165 /* This bit of chicanery makes a unary function followed by 166 * a parenthesis into a function with one argument, highest precedence. 167 */ 168 #define UNI(f) return(yylval.ival = f, \ 169 REPORT("uni",f) \ 170 PL_expect = XTERM, \ 171 PL_bufptr = s, \ 172 PL_last_uni = PL_oldbufptr, \ 173 PL_last_lop_op = f, \ 174 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) ) 175 176 #define UNIBRACK(f) return(yylval.ival = f, \ 177 REPORT("uni",f) \ 178 PL_bufptr = s, \ 179 PL_last_uni = PL_oldbufptr, \ 180 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) ) 181 182 /* grandfather return to old style */ 183 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP) 184 185 #ifdef DEBUGGING 186 187 STATIC void 188 S_tokereport(pTHX_ char *thing, char* s, I32 rv) 189 { 190 DEBUG_T({ 191 SV* report = newSVpv(thing, 0); 192 Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop), 193 (IV)rv); 194 195 if (s - PL_bufptr > 0) 196 sv_catpvn(report, PL_bufptr, s - PL_bufptr); 197 else { 198 if (PL_oldbufptr && *PL_oldbufptr) 199 sv_catpv(report, PL_tokenbuf); 200 } 201 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report)); 202 }); 203 } 204 205 #endif 206 207 /* 208 * S_ao 209 * 210 * This subroutine detects &&= and ||= and turns an ANDAND or OROR 211 * into an OP_ANDASSIGN or OP_ORASSIGN 212 */ 213 214 STATIC int 215 S_ao(pTHX_ int toketype) 216 { 217 if (*PL_bufptr == '=') { 218 PL_bufptr++; 219 if (toketype == ANDAND) 220 yylval.ival = OP_ANDASSIGN; 221 else if (toketype == OROR) 222 yylval.ival = OP_ORASSIGN; 223 toketype = ASSIGNOP; 224 } 225 return toketype; 226 } 227 228 /* 229 * S_no_op 230 * When Perl expects an operator and finds something else, no_op 231 * prints the warning. It always prints "<something> found where 232 * operator expected. It prints "Missing semicolon on previous line?" 233 * if the surprise occurs at the start of the line. "do you need to 234 * predeclare ..." is printed out for code like "sub bar; foo bar $x" 235 * where the compiler doesn't know if foo is a method call or a function. 236 * It prints "Missing operator before end of line" if there's nothing 237 * after the missing operator, or "... before <...>" if there is something 238 * after the missing operator. 239 */ 240 241 STATIC void 242 S_no_op(pTHX_ char *what, char *s) 243 { 244 char *oldbp = PL_bufptr; 245 bool is_first = (PL_oldbufptr == PL_linestart); 246 247 if (!s) 248 s = oldbp; 249 else 250 PL_bufptr = s; 251 yywarn(Perl_form(aTHX_ "%s found where operator expected", what)); 252 if (is_first) 253 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n"); 254 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) { 255 char *t; 256 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ; 257 if (t < PL_bufptr && isSPACE(*t)) 258 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n", 259 t - PL_oldoldbufptr, PL_oldoldbufptr); 260 } 261 else { 262 assert(s >= oldbp); 263 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp); 264 } 265 PL_bufptr = oldbp; 266 } 267 268 /* 269 * S_missingterm 270 * Complain about missing quote/regexp/heredoc terminator. 271 * If it's called with (char *)NULL then it cauterizes the line buffer. 272 * If we're in a delimited string and the delimiter is a control 273 * character, it's reformatted into a two-char sequence like ^C. 274 * This is fatal. 275 */ 276 277 STATIC void 278 S_missingterm(pTHX_ char *s) 279 { 280 char tmpbuf[3]; 281 char q; 282 if (s) { 283 char *nl = strrchr(s,'\n'); 284 if (nl) 285 *nl = '\0'; 286 } 287 else if ( 288 #ifdef EBCDIC 289 iscntrl(PL_multi_close) 290 #else 291 PL_multi_close < 32 || PL_multi_close == 127 292 #endif 293 ) { 294 *tmpbuf = '^'; 295 tmpbuf[1] = toCTRL(PL_multi_close); 296 s = "\\n"; 297 tmpbuf[2] = '\0'; 298 s = tmpbuf; 299 } 300 else { 301 *tmpbuf = (char)PL_multi_close; 302 tmpbuf[1] = '\0'; 303 s = tmpbuf; 304 } 305 q = strchr(s,'"') ? '\'' : '"'; 306 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q); 307 } 308 309 /* 310 * Perl_deprecate 311 */ 312 313 void 314 Perl_deprecate(pTHX_ char *s) 315 { 316 if (ckWARN(WARN_DEPRECATED)) 317 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s); 318 } 319 320 void 321 Perl_deprecate_old(pTHX_ char *s) 322 { 323 /* This function should NOT be called for any new deprecated warnings */ 324 /* Use Perl_deprecate instead */ 325 /* */ 326 /* It is here to maintain backward compatibility with the pre-5.8 */ 327 /* warnings category hierarchy. The "deprecated" category used to */ 328 /* live under the "syntax" category. It is now a top-level category */ 329 /* in its own right. */ 330 331 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX)) 332 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX), 333 "Use of %s is deprecated", s); 334 } 335 336 /* 337 * depcom 338 * Deprecate a comma-less variable list. 339 */ 340 341 STATIC void 342 S_depcom(pTHX) 343 { 344 deprecate_old("comma-less variable list"); 345 } 346 347 /* 348 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and 349 * utf16-to-utf8-reversed. 350 */ 351 352 #ifdef PERL_CR_FILTER 353 static void 354 strip_return(SV *sv) 355 { 356 register char *s = SvPVX(sv); 357 register char *e = s + SvCUR(sv); 358 /* outer loop optimized to do nothing if there are no CR-LFs */ 359 while (s < e) { 360 if (*s++ == '\r' && *s == '\n') { 361 /* hit a CR-LF, need to copy the rest */ 362 register char *d = s - 1; 363 *d++ = *s++; 364 while (s < e) { 365 if (*s == '\r' && s[1] == '\n') 366 s++; 367 *d++ = *s++; 368 } 369 SvCUR(sv) -= s - d; 370 return; 371 } 372 } 373 } 374 375 STATIC I32 376 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) 377 { 378 I32 count = FILTER_READ(idx+1, sv, maxlen); 379 if (count > 0 && !maxlen) 380 strip_return(sv); 381 return count; 382 } 383 #endif 384 385 /* 386 * Perl_lex_start 387 * Initialize variables. Uses the Perl save_stack to save its state (for 388 * recursive calls to the parser). 389 */ 390 391 void 392 Perl_lex_start(pTHX_ SV *line) 393 { 394 char *s; 395 STRLEN len; 396 397 SAVEI32(PL_lex_dojoin); 398 SAVEI32(PL_lex_brackets); 399 SAVEI32(PL_lex_casemods); 400 SAVEI32(PL_lex_starts); 401 SAVEI32(PL_lex_state); 402 SAVEVPTR(PL_lex_inpat); 403 SAVEI32(PL_lex_inwhat); 404 if (PL_lex_state == LEX_KNOWNEXT) { 405 I32 toke = PL_nexttoke; 406 while (--toke >= 0) { 407 SAVEI32(PL_nexttype[toke]); 408 SAVEVPTR(PL_nextval[toke]); 409 } 410 SAVEI32(PL_nexttoke); 411 } 412 SAVECOPLINE(PL_curcop); 413 SAVEPPTR(PL_bufptr); 414 SAVEPPTR(PL_bufend); 415 SAVEPPTR(PL_oldbufptr); 416 SAVEPPTR(PL_oldoldbufptr); 417 SAVEPPTR(PL_last_lop); 418 SAVEPPTR(PL_last_uni); 419 SAVEPPTR(PL_linestart); 420 SAVESPTR(PL_linestr); 421 SAVEPPTR(PL_lex_brackstack); 422 SAVEPPTR(PL_lex_casestack); 423 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp); 424 SAVESPTR(PL_lex_stuff); 425 SAVEI32(PL_lex_defer); 426 SAVEI32(PL_sublex_info.sub_inwhat); 427 SAVESPTR(PL_lex_repl); 428 SAVEINT(PL_expect); 429 SAVEINT(PL_lex_expect); 430 431 PL_lex_state = LEX_NORMAL; 432 PL_lex_defer = 0; 433 PL_expect = XSTATE; 434 PL_lex_brackets = 0; 435 New(899, PL_lex_brackstack, 120, char); 436 New(899, PL_lex_casestack, 12, char); 437 SAVEFREEPV(PL_lex_brackstack); 438 SAVEFREEPV(PL_lex_casestack); 439 PL_lex_casemods = 0; 440 *PL_lex_casestack = '\0'; 441 PL_lex_dojoin = 0; 442 PL_lex_starts = 0; 443 PL_lex_stuff = Nullsv; 444 PL_lex_repl = Nullsv; 445 PL_lex_inpat = 0; 446 PL_nexttoke = 0; 447 PL_lex_inwhat = 0; 448 PL_sublex_info.sub_inwhat = 0; 449 PL_linestr = line; 450 if (SvREADONLY(PL_linestr)) 451 PL_linestr = sv_2mortal(newSVsv(PL_linestr)); 452 s = SvPV(PL_linestr, len); 453 if (!len || s[len-1] != ';') { 454 if (!(SvFLAGS(PL_linestr) & SVs_TEMP)) 455 PL_linestr = sv_2mortal(newSVsv(PL_linestr)); 456 sv_catpvn(PL_linestr, "\n;", 2); 457 } 458 SvTEMP_off(PL_linestr); 459 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr); 460 PL_bufend = PL_bufptr + SvCUR(PL_linestr); 461 PL_last_lop = PL_last_uni = Nullch; 462 PL_rsfp = 0; 463 } 464 465 /* 466 * Perl_lex_end 467 * Finalizer for lexing operations. Must be called when the parser is 468 * done with the lexer. 469 */ 470 471 void 472 Perl_lex_end(pTHX) 473 { 474 PL_doextract = FALSE; 475 } 476 477 /* 478 * S_incline 479 * This subroutine has nothing to do with tilting, whether at windmills 480 * or pinball tables. Its name is short for "increment line". It 481 * increments the current line number in CopLINE(PL_curcop) and checks 482 * to see whether the line starts with a comment of the form 483 * # line 500 "foo.pm" 484 * If so, it sets the current line number and file to the values in the comment. 485 */ 486 487 STATIC void 488 S_incline(pTHX_ char *s) 489 { 490 char *t; 491 char *n; 492 char *e; 493 char ch; 494 495 CopLINE_inc(PL_curcop); 496 if (*s++ != '#') 497 return; 498 while (SPACE_OR_TAB(*s)) s++; 499 if (strnEQ(s, "line", 4)) 500 s += 4; 501 else 502 return; 503 if (SPACE_OR_TAB(*s)) 504 s++; 505 else 506 return; 507 while (SPACE_OR_TAB(*s)) s++; 508 if (!isDIGIT(*s)) 509 return; 510 n = s; 511 while (isDIGIT(*s)) 512 s++; 513 while (SPACE_OR_TAB(*s)) 514 s++; 515 if (*s == '"' && (t = strchr(s+1, '"'))) { 516 s++; 517 e = t + 1; 518 } 519 else { 520 for (t = s; !isSPACE(*t); t++) ; 521 e = t; 522 } 523 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f') 524 e++; 525 if (*e != '\n' && *e != '\0') 526 return; /* false alarm */ 527 528 ch = *t; 529 *t = '\0'; 530 if (t - s > 0) { 531 CopFILE_free(PL_curcop); 532 CopFILE_set(PL_curcop, s); 533 } 534 *t = ch; 535 CopLINE_set(PL_curcop, atoi(n)-1); 536 } 537 538 /* 539 * S_skipspace 540 * Called to gobble the appropriate amount and type of whitespace. 541 * Skips comments as well. 542 */ 543 544 STATIC char * 545 S_skipspace(pTHX_ register char *s) 546 { 547 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 548 while (s < PL_bufend && SPACE_OR_TAB(*s)) 549 s++; 550 return s; 551 } 552 for (;;) { 553 STRLEN prevlen; 554 SSize_t oldprevlen, oldoldprevlen; 555 SSize_t oldloplen = 0, oldunilen = 0; 556 while (s < PL_bufend && isSPACE(*s)) { 557 if (*s++ == '\n' && PL_in_eval && !PL_rsfp) 558 incline(s); 559 } 560 561 /* comment */ 562 if (s < PL_bufend && *s == '#') { 563 while (s < PL_bufend && *s != '\n') 564 s++; 565 if (s < PL_bufend) { 566 s++; 567 if (PL_in_eval && !PL_rsfp) { 568 incline(s); 569 continue; 570 } 571 } 572 } 573 574 /* only continue to recharge the buffer if we're at the end 575 * of the buffer, we're not reading from a source filter, and 576 * we're in normal lexing mode 577 */ 578 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat || 579 PL_lex_state == LEX_FORMLINE) 580 return s; 581 582 /* try to recharge the buffer */ 583 if ((s = filter_gets(PL_linestr, PL_rsfp, 584 (prevlen = SvCUR(PL_linestr)))) == Nullch) 585 { 586 /* end of file. Add on the -p or -n magic */ 587 if (PL_minus_n || PL_minus_p) { 588 sv_setpv(PL_linestr,PL_minus_p ? 589 ";}continue{print or die qq(-p destination: $!\\n)" : 590 ""); 591 sv_catpv(PL_linestr,";}"); 592 PL_minus_n = PL_minus_p = 0; 593 } 594 else 595 sv_setpv(PL_linestr,";"); 596 597 /* reset variables for next time we lex */ 598 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart 599 = SvPVX(PL_linestr); 600 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 601 PL_last_lop = PL_last_uni = Nullch; 602 603 /* Close the filehandle. Could be from -P preprocessor, 604 * STDIN, or a regular file. If we were reading code from 605 * STDIN (because the commandline held no -e or filename) 606 * then we don't close it, we reset it so the code can 607 * read from STDIN too. 608 */ 609 610 if (PL_preprocess && !PL_in_eval) 611 (void)PerlProc_pclose(PL_rsfp); 612 else if ((PerlIO*)PL_rsfp == PerlIO_stdin()) 613 PerlIO_clearerr(PL_rsfp); 614 else 615 (void)PerlIO_close(PL_rsfp); 616 PL_rsfp = Nullfp; 617 return s; 618 } 619 620 /* not at end of file, so we only read another line */ 621 /* make corresponding updates to old pointers, for yyerror() */ 622 oldprevlen = PL_oldbufptr - PL_bufend; 623 oldoldprevlen = PL_oldoldbufptr - PL_bufend; 624 if (PL_last_uni) 625 oldunilen = PL_last_uni - PL_bufend; 626 if (PL_last_lop) 627 oldloplen = PL_last_lop - PL_bufend; 628 PL_linestart = PL_bufptr = s + prevlen; 629 PL_bufend = s + SvCUR(PL_linestr); 630 s = PL_bufptr; 631 PL_oldbufptr = s + oldprevlen; 632 PL_oldoldbufptr = s + oldoldprevlen; 633 if (PL_last_uni) 634 PL_last_uni = s + oldunilen; 635 if (PL_last_lop) 636 PL_last_lop = s + oldloplen; 637 incline(s); 638 639 /* debugger active and we're not compiling the debugger code, 640 * so store the line into the debugger's array of lines 641 */ 642 if (PERLDB_LINE && PL_curstash != PL_debstash) { 643 SV *sv = NEWSV(85,0); 644 645 sv_upgrade(sv, SVt_PVMG); 646 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr); 647 (void)SvIOK_on(sv); 648 SvIVX(sv) = 0; 649 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv); 650 } 651 } 652 } 653 654 /* 655 * S_check_uni 656 * Check the unary operators to ensure there's no ambiguity in how they're 657 * used. An ambiguous piece of code would be: 658 * rand + 5 659 * This doesn't mean rand() + 5. Because rand() is a unary operator, 660 * the +5 is its argument. 661 */ 662 663 STATIC void 664 S_check_uni(pTHX) 665 { 666 char *s; 667 char *t; 668 669 if (PL_oldoldbufptr != PL_last_uni) 670 return; 671 while (isSPACE(*PL_last_uni)) 672 PL_last_uni++; 673 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ; 674 if ((t = strchr(s, '(')) && t < PL_bufptr) 675 return; 676 if (ckWARN_d(WARN_AMBIGUOUS)){ 677 char ch = *s; 678 *s = '\0'; 679 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 680 "Warning: Use of \"%s\" without parens is ambiguous", 681 PL_last_uni); 682 *s = ch; 683 } 684 } 685 686 /* 687 * LOP : macro to build a list operator. Its behaviour has been replaced 688 * with a subroutine, S_lop() for which LOP is just another name. 689 */ 690 691 #define LOP(f,x) return lop(f,x,s) 692 693 /* 694 * S_lop 695 * Build a list operator (or something that might be one). The rules: 696 * - if we have a next token, then it's a list operator [why?] 697 * - if the next thing is an opening paren, then it's a function 698 * - else it's a list operator 699 */ 700 701 STATIC I32 702 S_lop(pTHX_ I32 f, int x, char *s) 703 { 704 yylval.ival = f; 705 CLINE; 706 REPORT("lop", f) 707 PL_expect = x; 708 PL_bufptr = s; 709 PL_last_lop = PL_oldbufptr; 710 PL_last_lop_op = (OPCODE)f; 711 if (PL_nexttoke) 712 return LSTOP; 713 if (*s == '(') 714 return FUNC; 715 s = skipspace(s); 716 if (*s == '(') 717 return FUNC; 718 else 719 return LSTOP; 720 } 721 722 /* 723 * S_force_next 724 * When the lexer realizes it knows the next token (for instance, 725 * it is reordering tokens for the parser) then it can call S_force_next 726 * to know what token to return the next time the lexer is called. Caller 727 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer 728 * handles the token correctly. 729 */ 730 731 STATIC void 732 S_force_next(pTHX_ I32 type) 733 { 734 PL_nexttype[PL_nexttoke] = type; 735 PL_nexttoke++; 736 if (PL_lex_state != LEX_KNOWNEXT) { 737 PL_lex_defer = PL_lex_state; 738 PL_lex_expect = PL_expect; 739 PL_lex_state = LEX_KNOWNEXT; 740 } 741 } 742 743 /* 744 * S_force_word 745 * When the lexer knows the next thing is a word (for instance, it has 746 * just seen -> and it knows that the next char is a word char, then 747 * it calls S_force_word to stick the next word into the PL_next lookahead. 748 * 749 * Arguments: 750 * char *start : buffer position (must be within PL_linestr) 751 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD) 752 * int check_keyword : if true, Perl checks to make sure the word isn't 753 * a keyword (do this if the word is a label, e.g. goto FOO) 754 * int allow_pack : if true, : characters will also be allowed (require, 755 * use, etc. do this) 756 * int allow_initial_tick : used by the "sub" lexer only. 757 */ 758 759 STATIC char * 760 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick) 761 { 762 register char *s; 763 STRLEN len; 764 765 start = skipspace(start); 766 s = start; 767 if (isIDFIRST_lazy_if(s,UTF) || 768 (allow_pack && *s == ':') || 769 (allow_initial_tick && *s == '\'') ) 770 { 771 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len); 772 if (check_keyword && keyword(PL_tokenbuf, len)) 773 return start; 774 if (token == METHOD) { 775 s = skipspace(s); 776 if (*s == '(') 777 PL_expect = XTERM; 778 else { 779 PL_expect = XOPERATOR; 780 } 781 } 782 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0)); 783 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE; 784 force_next(token); 785 } 786 return s; 787 } 788 789 /* 790 * S_force_ident 791 * Called when the lexer wants $foo *foo &foo etc, but the program 792 * text only contains the "foo" portion. The first argument is a pointer 793 * to the "foo", and the second argument is the type symbol to prefix. 794 * Forces the next token to be a "WORD". 795 * Creates the symbol if it didn't already exist (via gv_fetchpv()). 796 */ 797 798 STATIC void 799 S_force_ident(pTHX_ register char *s, int kind) 800 { 801 if (s && *s) { 802 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0)); 803 PL_nextval[PL_nexttoke].opval = o; 804 force_next(WORD); 805 if (kind) { 806 o->op_private = OPpCONST_ENTERED; 807 /* XXX see note in pp_entereval() for why we forgo typo 808 warnings if the symbol must be introduced in an eval. 809 GSAR 96-10-12 */ 810 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE, 811 kind == '$' ? SVt_PV : 812 kind == '@' ? SVt_PVAV : 813 kind == '%' ? SVt_PVHV : 814 SVt_PVGV 815 ); 816 } 817 } 818 } 819 820 NV 821 Perl_str_to_version(pTHX_ SV *sv) 822 { 823 NV retval = 0.0; 824 NV nshift = 1.0; 825 STRLEN len; 826 char *start = SvPVx(sv,len); 827 bool utf = SvUTF8(sv) ? TRUE : FALSE; 828 char *end = start + len; 829 while (start < end) { 830 STRLEN skip; 831 UV n; 832 if (utf) 833 n = utf8n_to_uvchr((U8*)start, len, &skip, 0); 834 else { 835 n = *(U8*)start; 836 skip = 1; 837 } 838 retval += ((NV)n)/nshift; 839 start += skip; 840 nshift *= 1000; 841 } 842 return retval; 843 } 844 845 /* 846 * S_force_version 847 * Forces the next token to be a version number. 848 * If the next token appears to be an invalid version number, (e.g. "v2b"), 849 * and if "guessing" is TRUE, then no new token is created (and the caller 850 * must use an alternative parsing method). 851 */ 852 853 STATIC char * 854 S_force_version(pTHX_ char *s, int guessing) 855 { 856 OP *version = Nullop; 857 char *d; 858 859 s = skipspace(s); 860 861 d = s; 862 if (*d == 'v') 863 d++; 864 if (isDIGIT(*d)) { 865 while (isDIGIT(*d) || *d == '_' || *d == '.') 866 d++; 867 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) { 868 SV *ver; 869 s = scan_num(s, &yylval); 870 version = yylval.opval; 871 ver = cSVOPx(version)->op_sv; 872 if (SvPOK(ver) && !SvNIOK(ver)) { 873 (void)SvUPGRADE(ver, SVt_PVNV); 874 SvNVX(ver) = str_to_version(ver); 875 SvNOK_on(ver); /* hint that it is a version */ 876 } 877 } 878 else if (guessing) 879 return s; 880 } 881 882 /* NOTE: The parser sees the package name and the VERSION swapped */ 883 PL_nextval[PL_nexttoke].opval = version; 884 force_next(WORD); 885 886 return s; 887 } 888 889 /* 890 * S_tokeq 891 * Tokenize a quoted string passed in as an SV. It finds the next 892 * chunk, up to end of string or a backslash. It may make a new 893 * SV containing that chunk (if HINT_NEW_STRING is on). It also 894 * turns \\ into \. 895 */ 896 897 STATIC SV * 898 S_tokeq(pTHX_ SV *sv) 899 { 900 register char *s; 901 register char *send; 902 register char *d; 903 STRLEN len = 0; 904 SV *pv = sv; 905 906 if (!SvLEN(sv)) 907 goto finish; 908 909 s = SvPV_force(sv, len); 910 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) 911 goto finish; 912 send = s + len; 913 while (s < send && *s != '\\') 914 s++; 915 if (s == send) 916 goto finish; 917 d = s; 918 if ( PL_hints & HINT_NEW_STRING ) { 919 pv = sv_2mortal(newSVpvn(SvPVX(pv), len)); 920 if (SvUTF8(sv)) 921 SvUTF8_on(pv); 922 } 923 while (s < send) { 924 if (*s == '\\') { 925 if (s + 1 < send && (s[1] == '\\')) 926 s++; /* all that, just for this */ 927 } 928 *d++ = *s++; 929 } 930 *d = '\0'; 931 SvCUR_set(sv, d - SvPVX(sv)); 932 finish: 933 if ( PL_hints & HINT_NEW_STRING ) 934 return new_constant(NULL, 0, "q", sv, pv, "q"); 935 return sv; 936 } 937 938 /* 939 * Now come three functions related to double-quote context, 940 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when 941 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They 942 * interact with PL_lex_state, and create fake ( ... ) argument lists 943 * to handle functions and concatenation. 944 * They assume that whoever calls them will be setting up a fake 945 * join call, because each subthing puts a ',' after it. This lets 946 * "lower \luPpEr" 947 * become 948 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,) 949 * 950 * (I'm not sure whether the spurious commas at the end of lcfirst's 951 * arguments and join's arguments are created or not). 952 */ 953 954 /* 955 * S_sublex_start 956 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST). 957 * 958 * Pattern matching will set PL_lex_op to the pattern-matching op to 959 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise). 960 * 961 * OP_CONST and OP_READLINE are easy--just make the new op and return. 962 * 963 * Everything else becomes a FUNC. 964 * 965 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we 966 * had an OP_CONST or OP_READLINE). This just sets us up for a 967 * call to S_sublex_push(). 968 */ 969 970 STATIC I32 971 S_sublex_start(pTHX) 972 { 973 register I32 op_type = yylval.ival; 974 975 if (op_type == OP_NULL) { 976 yylval.opval = PL_lex_op; 977 PL_lex_op = Nullop; 978 return THING; 979 } 980 if (op_type == OP_CONST || op_type == OP_READLINE) { 981 SV *sv = tokeq(PL_lex_stuff); 982 983 if (SvTYPE(sv) == SVt_PVIV) { 984 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */ 985 STRLEN len; 986 char *p; 987 SV *nsv; 988 989 p = SvPV(sv, len); 990 nsv = newSVpvn(p, len); 991 if (SvUTF8(sv)) 992 SvUTF8_on(nsv); 993 SvREFCNT_dec(sv); 994 sv = nsv; 995 } 996 yylval.opval = (OP*)newSVOP(op_type, 0, sv); 997 PL_lex_stuff = Nullsv; 998 return THING; 999 } 1000 1001 PL_sublex_info.super_state = PL_lex_state; 1002 PL_sublex_info.sub_inwhat = op_type; 1003 PL_sublex_info.sub_op = PL_lex_op; 1004 PL_lex_state = LEX_INTERPPUSH; 1005 1006 PL_expect = XTERM; 1007 if (PL_lex_op) { 1008 yylval.opval = PL_lex_op; 1009 PL_lex_op = Nullop; 1010 return PMFUNC; 1011 } 1012 else 1013 return FUNC; 1014 } 1015 1016 /* 1017 * S_sublex_push 1018 * Create a new scope to save the lexing state. The scope will be 1019 * ended in S_sublex_done. Returns a '(', starting the function arguments 1020 * to the uc, lc, etc. found before. 1021 * Sets PL_lex_state to LEX_INTERPCONCAT. 1022 */ 1023 1024 STATIC I32 1025 S_sublex_push(pTHX) 1026 { 1027 ENTER; 1028 1029 PL_lex_state = PL_sublex_info.super_state; 1030 SAVEI32(PL_lex_dojoin); 1031 SAVEI32(PL_lex_brackets); 1032 SAVEI32(PL_lex_casemods); 1033 SAVEI32(PL_lex_starts); 1034 SAVEI32(PL_lex_state); 1035 SAVEVPTR(PL_lex_inpat); 1036 SAVEI32(PL_lex_inwhat); 1037 SAVECOPLINE(PL_curcop); 1038 SAVEPPTR(PL_bufptr); 1039 SAVEPPTR(PL_bufend); 1040 SAVEPPTR(PL_oldbufptr); 1041 SAVEPPTR(PL_oldoldbufptr); 1042 SAVEPPTR(PL_last_lop); 1043 SAVEPPTR(PL_last_uni); 1044 SAVEPPTR(PL_linestart); 1045 SAVESPTR(PL_linestr); 1046 SAVEPPTR(PL_lex_brackstack); 1047 SAVEPPTR(PL_lex_casestack); 1048 1049 PL_linestr = PL_lex_stuff; 1050 PL_lex_stuff = Nullsv; 1051 1052 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart 1053 = SvPVX(PL_linestr); 1054 PL_bufend += SvCUR(PL_linestr); 1055 PL_last_lop = PL_last_uni = Nullch; 1056 SAVEFREESV(PL_linestr); 1057 1058 PL_lex_dojoin = FALSE; 1059 PL_lex_brackets = 0; 1060 New(899, PL_lex_brackstack, 120, char); 1061 New(899, PL_lex_casestack, 12, char); 1062 SAVEFREEPV(PL_lex_brackstack); 1063 SAVEFREEPV(PL_lex_casestack); 1064 PL_lex_casemods = 0; 1065 *PL_lex_casestack = '\0'; 1066 PL_lex_starts = 0; 1067 PL_lex_state = LEX_INTERPCONCAT; 1068 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 1069 1070 PL_lex_inwhat = PL_sublex_info.sub_inwhat; 1071 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST) 1072 PL_lex_inpat = PL_sublex_info.sub_op; 1073 else 1074 PL_lex_inpat = Nullop; 1075 1076 return '('; 1077 } 1078 1079 /* 1080 * S_sublex_done 1081 * Restores lexer state after a S_sublex_push. 1082 */ 1083 1084 STATIC I32 1085 S_sublex_done(pTHX) 1086 { 1087 if (!PL_lex_starts++) { 1088 SV *sv = newSVpvn("",0); 1089 if (SvUTF8(PL_linestr)) 1090 SvUTF8_on(sv); 1091 PL_expect = XOPERATOR; 1092 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 1093 return THING; 1094 } 1095 1096 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */ 1097 PL_lex_state = LEX_INTERPCASEMOD; 1098 return yylex(); 1099 } 1100 1101 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */ 1102 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) { 1103 PL_linestr = PL_lex_repl; 1104 PL_lex_inpat = 0; 1105 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); 1106 PL_bufend += SvCUR(PL_linestr); 1107 PL_last_lop = PL_last_uni = Nullch; 1108 SAVEFREESV(PL_linestr); 1109 PL_lex_dojoin = FALSE; 1110 PL_lex_brackets = 0; 1111 PL_lex_casemods = 0; 1112 *PL_lex_casestack = '\0'; 1113 PL_lex_starts = 0; 1114 if (SvEVALED(PL_lex_repl)) { 1115 PL_lex_state = LEX_INTERPNORMAL; 1116 PL_lex_starts++; 1117 /* we don't clear PL_lex_repl here, so that we can check later 1118 whether this is an evalled subst; that means we rely on the 1119 logic to ensure sublex_done() is called again only via the 1120 branch (in yylex()) that clears PL_lex_repl, else we'll loop */ 1121 } 1122 else { 1123 PL_lex_state = LEX_INTERPCONCAT; 1124 PL_lex_repl = Nullsv; 1125 } 1126 return ','; 1127 } 1128 else { 1129 LEAVE; 1130 PL_bufend = SvPVX(PL_linestr); 1131 PL_bufend += SvCUR(PL_linestr); 1132 PL_expect = XOPERATOR; 1133 PL_sublex_info.sub_inwhat = 0; 1134 return ')'; 1135 } 1136 } 1137 1138 /* 1139 scan_const 1140 1141 Extracts a pattern, double-quoted string, or transliteration. This 1142 is terrifying code. 1143 1144 It looks at lex_inwhat and PL_lex_inpat to find out whether it's 1145 processing a pattern (PL_lex_inpat is true), a transliteration 1146 (lex_inwhat & OP_TRANS is true), or a double-quoted string. 1147 1148 Returns a pointer to the character scanned up to. Iff this is 1149 advanced from the start pointer supplied (ie if anything was 1150 successfully parsed), will leave an OP for the substring scanned 1151 in yylval. Caller must intuit reason for not parsing further 1152 by looking at the next characters herself. 1153 1154 In patterns: 1155 backslashes: 1156 double-quoted style: \r and \n 1157 regexp special ones: \D \s 1158 constants: \x3 1159 backrefs: \1 (deprecated in substitution replacements) 1160 case and quoting: \U \Q \E 1161 stops on @ and $, but not for $ as tail anchor 1162 1163 In transliterations: 1164 characters are VERY literal, except for - not at the start or end 1165 of the string, which indicates a range. scan_const expands the 1166 range to the full set of intermediate characters. 1167 1168 In double-quoted strings: 1169 backslashes: 1170 double-quoted style: \r and \n 1171 constants: \x3 1172 backrefs: \1 (deprecated) 1173 case and quoting: \U \Q \E 1174 stops on @ and $ 1175 1176 scan_const does *not* construct ops to handle interpolated strings. 1177 It stops processing as soon as it finds an embedded $ or @ variable 1178 and leaves it to the caller to work out what's going on. 1179 1180 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo. 1181 1182 $ in pattern could be $foo or could be tail anchor. Assumption: 1183 it's a tail anchor if $ is the last thing in the string, or if it's 1184 followed by one of ")| \n\t" 1185 1186 \1 (backreferences) are turned into $1 1187 1188 The structure of the code is 1189 while (there's a character to process) { 1190 handle transliteration ranges 1191 skip regexp comments 1192 skip # initiated comments in //x patterns 1193 check for embedded @foo 1194 check for embedded scalars 1195 if (backslash) { 1196 leave intact backslashes from leave (below) 1197 deprecate \1 in strings and sub replacements 1198 handle string-changing backslashes \l \U \Q \E, etc. 1199 switch (what was escaped) { 1200 handle - in a transliteration (becomes a literal -) 1201 handle \132 octal characters 1202 handle 0x15 hex characters 1203 handle \cV (control V) 1204 handle printf backslashes (\f, \r, \n, etc) 1205 } (end switch) 1206 } (end if backslash) 1207 } (end while character to read) 1208 1209 */ 1210 1211 STATIC char * 1212 S_scan_const(pTHX_ char *start) 1213 { 1214 register char *send = PL_bufend; /* end of the constant */ 1215 SV *sv = NEWSV(93, send - start); /* sv for the constant */ 1216 register char *s = start; /* start of the constant */ 1217 register char *d = SvPVX(sv); /* destination for copies */ 1218 bool dorange = FALSE; /* are we in a translit range? */ 1219 bool didrange = FALSE; /* did we just finish a range? */ 1220 I32 has_utf8 = FALSE; /* Output constant is UTF8 */ 1221 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */ 1222 UV uv; 1223 1224 const char *leaveit = /* set of acceptably-backslashed characters */ 1225 PL_lex_inpat 1226 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#" 1227 : ""; 1228 1229 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { 1230 /* If we are doing a trans and we know we want UTF8 set expectation */ 1231 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF); 1232 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); 1233 } 1234 1235 1236 while (s < send || dorange) { 1237 /* get transliterations out of the way (they're most literal) */ 1238 if (PL_lex_inwhat == OP_TRANS) { 1239 /* expand a range A-Z to the full set of characters. AIE! */ 1240 if (dorange) { 1241 I32 i; /* current expanded character */ 1242 I32 min; /* first character in range */ 1243 I32 max; /* last character in range */ 1244 1245 if (has_utf8) { 1246 char *c = (char*)utf8_hop((U8*)d, -1); 1247 char *e = d++; 1248 while (e-- > c) 1249 *(e + 1) = *e; 1250 *c = (char)UTF_TO_NATIVE(0xff); 1251 /* mark the range as done, and continue */ 1252 dorange = FALSE; 1253 didrange = TRUE; 1254 continue; 1255 } 1256 1257 i = d - SvPVX(sv); /* remember current offset */ 1258 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */ 1259 d = SvPVX(sv) + i; /* refresh d after realloc */ 1260 d -= 2; /* eat the first char and the - */ 1261 1262 min = (U8)*d; /* first char in range */ 1263 max = (U8)d[1]; /* last char in range */ 1264 1265 if (min > max) { 1266 Perl_croak(aTHX_ 1267 "Invalid [] range \"%c-%c\" in transliteration operator", 1268 (char)min, (char)max); 1269 } 1270 1271 #ifdef EBCDIC 1272 if ((isLOWER(min) && isLOWER(max)) || 1273 (isUPPER(min) && isUPPER(max))) { 1274 if (isLOWER(min)) { 1275 for (i = min; i <= max; i++) 1276 if (isLOWER(i)) 1277 *d++ = NATIVE_TO_NEED(has_utf8,i); 1278 } else { 1279 for (i = min; i <= max; i++) 1280 if (isUPPER(i)) 1281 *d++ = NATIVE_TO_NEED(has_utf8,i); 1282 } 1283 } 1284 else 1285 #endif 1286 for (i = min; i <= max; i++) 1287 *d++ = (char)i; 1288 1289 /* mark the range as done, and continue */ 1290 dorange = FALSE; 1291 didrange = TRUE; 1292 continue; 1293 } 1294 1295 /* range begins (ignore - as first or last char) */ 1296 else if (*s == '-' && s+1 < send && s != start) { 1297 if (didrange) { 1298 Perl_croak(aTHX_ "Ambiguous range in transliteration operator"); 1299 } 1300 if (has_utf8) { 1301 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */ 1302 s++; 1303 continue; 1304 } 1305 dorange = TRUE; 1306 s++; 1307 } 1308 else { 1309 didrange = FALSE; 1310 } 1311 } 1312 1313 /* if we get here, we're not doing a transliteration */ 1314 1315 /* skip for regexp comments /(?#comment)/ and code /(?{code})/, 1316 except for the last char, which will be done separately. */ 1317 else if (*s == '(' && PL_lex_inpat && s[1] == '?') { 1318 if (s[2] == '#') { 1319 while (s < send && *s != ')') 1320 *d++ = NATIVE_TO_NEED(has_utf8,*s++); 1321 } 1322 else if (s[2] == '{' /* This should match regcomp.c */ 1323 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{')) 1324 { 1325 I32 count = 1; 1326 char *regparse = s + (s[2] == '{' ? 3 : 4); 1327 char c; 1328 1329 while (count && (c = *regparse)) { 1330 if (c == '\\' && regparse[1]) 1331 regparse++; 1332 else if (c == '{') 1333 count++; 1334 else if (c == '}') 1335 count--; 1336 regparse++; 1337 } 1338 if (*regparse != ')') { 1339 regparse--; /* Leave one char for continuation. */ 1340 yyerror("Sequence (?{...}) not terminated or not {}-balanced"); 1341 } 1342 while (s < regparse) 1343 *d++ = NATIVE_TO_NEED(has_utf8,*s++); 1344 } 1345 } 1346 1347 /* likewise skip #-initiated comments in //x patterns */ 1348 else if (*s == '#' && PL_lex_inpat && 1349 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) { 1350 while (s+1 < send && *s != '\n') 1351 *d++ = NATIVE_TO_NEED(has_utf8,*s++); 1352 } 1353 1354 /* check for embedded arrays 1355 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) 1356 */ 1357 else if (*s == '@' && s[1] 1358 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1]))) 1359 break; 1360 1361 /* check for embedded scalars. only stop if we're sure it's a 1362 variable. 1363 */ 1364 else if (*s == '$') { 1365 if (!PL_lex_inpat) /* not a regexp, so $ must be var */ 1366 break; 1367 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) 1368 break; /* in regexp, $ might be tail anchor */ 1369 } 1370 1371 /* End of else if chain - OP_TRANS rejoin rest */ 1372 1373 /* backslashes */ 1374 if (*s == '\\' && s+1 < send) { 1375 s++; 1376 1377 /* some backslashes we leave behind */ 1378 if (*leaveit && *s && strchr(leaveit, *s)) { 1379 *d++ = NATIVE_TO_NEED(has_utf8,'\\'); 1380 *d++ = NATIVE_TO_NEED(has_utf8,*s++); 1381 continue; 1382 } 1383 1384 /* deprecate \1 in strings and substitution replacements */ 1385 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat && 1386 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1])) 1387 { 1388 if (ckWARN(WARN_SYNTAX)) 1389 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); 1390 *--s = '$'; 1391 break; 1392 } 1393 1394 /* string-change backslash escapes */ 1395 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) { 1396 --s; 1397 break; 1398 } 1399 1400 /* if we get here, it's either a quoted -, or a digit */ 1401 switch (*s) { 1402 1403 /* quoted - in transliterations */ 1404 case '-': 1405 if (PL_lex_inwhat == OP_TRANS) { 1406 *d++ = *s++; 1407 continue; 1408 } 1409 /* FALL THROUGH */ 1410 default: 1411 { 1412 if (ckWARN(WARN_MISC) && 1413 isALNUM(*s) && 1414 *s != '_') 1415 Perl_warner(aTHX_ packWARN(WARN_MISC), 1416 "Unrecognized escape \\%c passed through", 1417 *s); 1418 /* default action is to copy the quoted character */ 1419 goto default_action; 1420 } 1421 1422 /* \132 indicates an octal constant */ 1423 case '0': case '1': case '2': case '3': 1424 case '4': case '5': case '6': case '7': 1425 { 1426 I32 flags = 0; 1427 STRLEN len = 3; 1428 uv = grok_oct(s, &len, &flags, NULL); 1429 s += len; 1430 } 1431 goto NUM_ESCAPE_INSERT; 1432 1433 /* \x24 indicates a hex constant */ 1434 case 'x': 1435 ++s; 1436 if (*s == '{') { 1437 char* e = strchr(s, '}'); 1438 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES | 1439 PERL_SCAN_DISALLOW_PREFIX; 1440 STRLEN len; 1441 1442 ++s; 1443 if (!e) { 1444 yyerror("Missing right brace on \\x{}"); 1445 continue; 1446 } 1447 len = e - s; 1448 uv = grok_hex(s, &len, &flags, NULL); 1449 s = e + 1; 1450 } 1451 else { 1452 { 1453 STRLEN len = 2; 1454 I32 flags = PERL_SCAN_DISALLOW_PREFIX; 1455 uv = grok_hex(s, &len, &flags, NULL); 1456 s += len; 1457 } 1458 } 1459 1460 NUM_ESCAPE_INSERT: 1461 /* Insert oct or hex escaped character. 1462 * There will always enough room in sv since such 1463 * escapes will be longer than any UTF-8 sequence 1464 * they can end up as. */ 1465 1466 /* We need to map to chars to ASCII before doing the tests 1467 to cover EBCDIC 1468 */ 1469 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) { 1470 if (!has_utf8 && uv > 255) { 1471 /* Might need to recode whatever we have 1472 * accumulated so far if it contains any 1473 * hibit chars. 1474 * 1475 * (Can't we keep track of that and avoid 1476 * this rescan? --jhi) 1477 */ 1478 int hicount = 0; 1479 U8 *c; 1480 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) { 1481 if (!NATIVE_IS_INVARIANT(*c)) { 1482 hicount++; 1483 } 1484 } 1485 if (hicount) { 1486 STRLEN offset = d - SvPVX(sv); 1487 U8 *src, *dst; 1488 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset; 1489 src = (U8 *)d - 1; 1490 dst = src+hicount; 1491 d += hicount; 1492 while (src >= (U8 *)SvPVX(sv)) { 1493 if (!NATIVE_IS_INVARIANT(*src)) { 1494 U8 ch = NATIVE_TO_ASCII(*src); 1495 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch); 1496 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch); 1497 } 1498 else { 1499 *dst-- = *src; 1500 } 1501 src--; 1502 } 1503 } 1504 } 1505 1506 if (has_utf8 || uv > 255) { 1507 d = (char*)uvchr_to_utf8((U8*)d, uv); 1508 has_utf8 = TRUE; 1509 if (PL_lex_inwhat == OP_TRANS && 1510 PL_sublex_info.sub_op) { 1511 PL_sublex_info.sub_op->op_private |= 1512 (PL_lex_repl ? OPpTRANS_FROM_UTF 1513 : OPpTRANS_TO_UTF); 1514 } 1515 } 1516 else { 1517 *d++ = (char)uv; 1518 } 1519 } 1520 else { 1521 *d++ = (char) uv; 1522 } 1523 continue; 1524 1525 /* \N{LATIN SMALL LETTER A} is a named character */ 1526 case 'N': 1527 ++s; 1528 if (*s == '{') { 1529 char* e = strchr(s, '}'); 1530 SV *res; 1531 STRLEN len; 1532 char *str; 1533 1534 if (!e) { 1535 yyerror("Missing right brace on \\N{}"); 1536 e = s - 1; 1537 goto cont_scan; 1538 } 1539 if (e > s + 2 && s[1] == 'U' && s[2] == '+') { 1540 /* \N{U+...} */ 1541 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES | 1542 PERL_SCAN_DISALLOW_PREFIX; 1543 s += 3; 1544 len = e - s; 1545 uv = grok_hex(s, &len, &flags, NULL); 1546 s = e + 1; 1547 goto NUM_ESCAPE_INSERT; 1548 } 1549 res = newSVpvn(s + 1, e - s - 1); 1550 res = new_constant( Nullch, 0, "charnames", 1551 res, Nullsv, "\\N{...}" ); 1552 if (has_utf8) 1553 sv_utf8_upgrade(res); 1554 str = SvPV(res,len); 1555 #ifdef EBCDIC_NEVER_MIND 1556 /* charnames uses pack U and that has been 1557 * recently changed to do the below uni->native 1558 * mapping, so this would be redundant (and wrong, 1559 * the code point would be doubly converted). 1560 * But leave this in just in case the pack U change 1561 * gets revoked, but the semantics is still 1562 * desireable for charnames. --jhi */ 1563 { 1564 UV uv = utf8_to_uvchr((U8*)str, 0); 1565 1566 if (uv < 0x100) { 1567 U8 tmpbuf[UTF8_MAXLEN+1], *d; 1568 1569 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv)); 1570 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf); 1571 str = SvPV(res, len); 1572 } 1573 } 1574 #endif 1575 if (!has_utf8 && SvUTF8(res)) { 1576 char *ostart = SvPVX(sv); 1577 SvCUR_set(sv, d - ostart); 1578 SvPOK_on(sv); 1579 *d = '\0'; 1580 sv_utf8_upgrade(sv); 1581 /* this just broke our allocation above... */ 1582 SvGROW(sv, (STRLEN)(send - start)); 1583 d = SvPVX(sv) + SvCUR(sv); 1584 has_utf8 = TRUE; 1585 } 1586 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */ 1587 char *odest = SvPVX(sv); 1588 1589 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4))); 1590 d = SvPVX(sv) + (d - odest); 1591 } 1592 Copy(str, d, len, char); 1593 d += len; 1594 SvREFCNT_dec(res); 1595 cont_scan: 1596 s = e + 1; 1597 } 1598 else 1599 yyerror("Missing braces on \\N{}"); 1600 continue; 1601 1602 /* \c is a control character */ 1603 case 'c': 1604 s++; 1605 { 1606 U8 c = *s++; 1607 #ifdef EBCDIC 1608 if (isLOWER(c)) 1609 c = toUPPER(c); 1610 #endif 1611 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c)); 1612 } 1613 continue; 1614 1615 /* printf-style backslashes, formfeeds, newlines, etc */ 1616 case 'b': 1617 *d++ = NATIVE_TO_NEED(has_utf8,'\b'); 1618 break; 1619 case 'n': 1620 *d++ = NATIVE_TO_NEED(has_utf8,'\n'); 1621 break; 1622 case 'r': 1623 *d++ = NATIVE_TO_NEED(has_utf8,'\r'); 1624 break; 1625 case 'f': 1626 *d++ = NATIVE_TO_NEED(has_utf8,'\f'); 1627 break; 1628 case 't': 1629 *d++ = NATIVE_TO_NEED(has_utf8,'\t'); 1630 break; 1631 case 'e': 1632 *d++ = ASCII_TO_NEED(has_utf8,'\033'); 1633 break; 1634 case 'a': 1635 *d++ = ASCII_TO_NEED(has_utf8,'\007'); 1636 break; 1637 } /* end switch */ 1638 1639 s++; 1640 continue; 1641 } /* end if (backslash) */ 1642 1643 default_action: 1644 /* If we started with encoded form, or already know we want it 1645 and then encode the next character */ 1646 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) { 1647 STRLEN len = 1; 1648 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s); 1649 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv)); 1650 s += len; 1651 if (need > len) { 1652 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */ 1653 STRLEN off = d - SvPVX(sv); 1654 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off; 1655 } 1656 d = (char*)uvchr_to_utf8((U8*)d, uv); 1657 has_utf8 = TRUE; 1658 } 1659 else { 1660 *d++ = NATIVE_TO_NEED(has_utf8,*s++); 1661 } 1662 } /* while loop to process each character */ 1663 1664 /* terminate the string and set up the sv */ 1665 *d = '\0'; 1666 SvCUR_set(sv, d - SvPVX(sv)); 1667 if (SvCUR(sv) >= SvLEN(sv)) 1668 Perl_croak(aTHX_ "panic: constant overflowed allocated space"); 1669 1670 SvPOK_on(sv); 1671 if (PL_encoding && !has_utf8) { 1672 sv_recode_to_utf8(sv, PL_encoding); 1673 has_utf8 = TRUE; 1674 } 1675 if (has_utf8) { 1676 SvUTF8_on(sv); 1677 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { 1678 PL_sublex_info.sub_op->op_private |= 1679 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); 1680 } 1681 } 1682 1683 /* shrink the sv if we allocated more than we used */ 1684 if (SvCUR(sv) + 5 < SvLEN(sv)) { 1685 SvLEN_set(sv, SvCUR(sv) + 1); 1686 Renew(SvPVX(sv), SvLEN(sv), char); 1687 } 1688 1689 /* return the substring (via yylval) only if we parsed anything */ 1690 if (s > PL_bufptr) { 1691 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) 1692 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"), 1693 sv, Nullsv, 1694 ( PL_lex_inwhat == OP_TRANS 1695 ? "tr" 1696 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) 1697 ? "s" 1698 : "qq"))); 1699 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 1700 } else 1701 SvREFCNT_dec(sv); 1702 return s; 1703 } 1704 1705 /* S_intuit_more 1706 * Returns TRUE if there's more to the expression (e.g., a subscript), 1707 * FALSE otherwise. 1708 * 1709 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/ 1710 * 1711 * ->[ and ->{ return TRUE 1712 * { and [ outside a pattern are always subscripts, so return TRUE 1713 * if we're outside a pattern and it's not { or [, then return FALSE 1714 * if we're in a pattern and the first char is a { 1715 * {4,5} (any digits around the comma) returns FALSE 1716 * if we're in a pattern and the first char is a [ 1717 * [] returns FALSE 1718 * [SOMETHING] has a funky algorithm to decide whether it's a 1719 * character class or not. It has to deal with things like 1720 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/ 1721 * anything else returns TRUE 1722 */ 1723 1724 /* This is the one truly awful dwimmer necessary to conflate C and sed. */ 1725 1726 STATIC int 1727 S_intuit_more(pTHX_ register char *s) 1728 { 1729 if (PL_lex_brackets) 1730 return TRUE; 1731 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{')) 1732 return TRUE; 1733 if (*s != '{' && *s != '[') 1734 return FALSE; 1735 if (!PL_lex_inpat) 1736 return TRUE; 1737 1738 /* In a pattern, so maybe we have {n,m}. */ 1739 if (*s == '{') { 1740 s++; 1741 if (!isDIGIT(*s)) 1742 return TRUE; 1743 while (isDIGIT(*s)) 1744 s++; 1745 if (*s == ',') 1746 s++; 1747 while (isDIGIT(*s)) 1748 s++; 1749 if (*s == '}') 1750 return FALSE; 1751 return TRUE; 1752 1753 } 1754 1755 /* On the other hand, maybe we have a character class */ 1756 1757 s++; 1758 if (*s == ']' || *s == '^') 1759 return FALSE; 1760 else { 1761 /* this is terrifying, and it works */ 1762 int weight = 2; /* let's weigh the evidence */ 1763 char seen[256]; 1764 unsigned char un_char = 255, last_un_char; 1765 char *send = strchr(s,']'); 1766 char tmpbuf[sizeof PL_tokenbuf * 4]; 1767 1768 if (!send) /* has to be an expression */ 1769 return TRUE; 1770 1771 Zero(seen,256,char); 1772 if (*s == '$') 1773 weight -= 3; 1774 else if (isDIGIT(*s)) { 1775 if (s[1] != ']') { 1776 if (isDIGIT(s[1]) && s[2] == ']') 1777 weight -= 10; 1778 } 1779 else 1780 weight -= 100; 1781 } 1782 for (; s < send; s++) { 1783 last_un_char = un_char; 1784 un_char = (unsigned char)*s; 1785 switch (*s) { 1786 case '@': 1787 case '&': 1788 case '$': 1789 weight -= seen[un_char] * 10; 1790 if (isALNUM_lazy_if(s+1,UTF)) { 1791 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE); 1792 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV)) 1793 weight -= 100; 1794 else 1795 weight -= 10; 1796 } 1797 else if (*s == '$' && s[1] && 1798 strchr("[#!%*<>()-=",s[1])) { 1799 if (/*{*/ strchr("])} =",s[2])) 1800 weight -= 10; 1801 else 1802 weight -= 1; 1803 } 1804 break; 1805 case '\\': 1806 un_char = 254; 1807 if (s[1]) { 1808 if (strchr("wds]",s[1])) 1809 weight += 100; 1810 else if (seen['\''] || seen['"']) 1811 weight += 1; 1812 else if (strchr("rnftbxcav",s[1])) 1813 weight += 40; 1814 else if (isDIGIT(s[1])) { 1815 weight += 40; 1816 while (s[1] && isDIGIT(s[1])) 1817 s++; 1818 } 1819 } 1820 else 1821 weight += 100; 1822 break; 1823 case '-': 1824 if (s[1] == '\\') 1825 weight += 50; 1826 if (strchr("aA01! ",last_un_char)) 1827 weight += 30; 1828 if (strchr("zZ79~",s[1])) 1829 weight += 30; 1830 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$')) 1831 weight -= 5; /* cope with negative subscript */ 1832 break; 1833 default: 1834 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) && 1835 isALPHA(*s) && s[1] && isALPHA(s[1])) { 1836 char *d = tmpbuf; 1837 while (isALPHA(*s)) 1838 *d++ = *s++; 1839 *d = '\0'; 1840 if (keyword(tmpbuf, d - tmpbuf)) 1841 weight -= 150; 1842 } 1843 if (un_char == last_un_char + 1) 1844 weight += 5; 1845 weight -= seen[un_char]; 1846 break; 1847 } 1848 seen[un_char]++; 1849 } 1850 if (weight >= 0) /* probably a character class */ 1851 return FALSE; 1852 } 1853 1854 return TRUE; 1855 } 1856 1857 /* 1858 * S_intuit_method 1859 * 1860 * Does all the checking to disambiguate 1861 * foo bar 1862 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise 1863 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args). 1864 * 1865 * First argument is the stuff after the first token, e.g. "bar". 1866 * 1867 * Not a method if bar is a filehandle. 1868 * Not a method if foo is a subroutine prototyped to take a filehandle. 1869 * Not a method if it's really "Foo $bar" 1870 * Method if it's "foo $bar" 1871 * Not a method if it's really "print foo $bar" 1872 * Method if it's really "foo package::" (interpreted as package->foo) 1873 * Not a method if bar is known to be a subroutine ("sub bar; foo bar") 1874 * Not a method if bar is a filehandle or package, but is quoted with 1875 * => 1876 */ 1877 1878 STATIC int 1879 S_intuit_method(pTHX_ char *start, GV *gv) 1880 { 1881 char *s = start + (*start == '$'); 1882 char tmpbuf[sizeof PL_tokenbuf]; 1883 STRLEN len; 1884 GV* indirgv; 1885 1886 if (gv) { 1887 CV *cv; 1888 if (GvIO(gv)) 1889 return 0; 1890 if ((cv = GvCVu(gv))) { 1891 char *proto = SvPVX(cv); 1892 if (proto) { 1893 if (*proto == ';') 1894 proto++; 1895 if (*proto == '*') 1896 return 0; 1897 } 1898 } else 1899 gv = 0; 1900 } 1901 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 1902 /* start is the beginning of the possible filehandle/object, 1903 * and s is the end of it 1904 * tmpbuf is a copy of it 1905 */ 1906 1907 if (*start == '$') { 1908 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf)) 1909 return 0; 1910 s = skipspace(s); 1911 PL_bufptr = start; 1912 PL_expect = XREF; 1913 return *s == '(' ? FUNCMETH : METHOD; 1914 } 1915 if (!keyword(tmpbuf, len)) { 1916 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') { 1917 len -= 2; 1918 tmpbuf[len] = '\0'; 1919 goto bare_package; 1920 } 1921 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV); 1922 if (indirgv && GvCVu(indirgv)) 1923 return 0; 1924 /* filehandle or package name makes it a method */ 1925 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) { 1926 s = skipspace(s); 1927 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>') 1928 return 0; /* no assumptions -- "=>" quotes bearword */ 1929 bare_package: 1930 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, 1931 newSVpvn(tmpbuf,len)); 1932 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE; 1933 PL_expect = XTERM; 1934 force_next(WORD); 1935 PL_bufptr = s; 1936 return *s == '(' ? FUNCMETH : METHOD; 1937 } 1938 } 1939 return 0; 1940 } 1941 1942 /* 1943 * S_incl_perldb 1944 * Return a string of Perl code to load the debugger. If PERL5DB 1945 * is set, it will return the contents of that, otherwise a 1946 * compile-time require of perl5db.pl. 1947 */ 1948 1949 STATIC char* 1950 S_incl_perldb(pTHX) 1951 { 1952 if (PL_perldb) { 1953 char *pdb = PerlEnv_getenv("PERL5DB"); 1954 1955 if (pdb) 1956 return pdb; 1957 SETERRNO(0,SS$_NORMAL); 1958 return "BEGIN { require 'perl5db.pl' }"; 1959 } 1960 return ""; 1961 } 1962 1963 1964 /* Encoded script support. filter_add() effectively inserts a 1965 * 'pre-processing' function into the current source input stream. 1966 * Note that the filter function only applies to the current source file 1967 * (e.g., it will not affect files 'require'd or 'use'd by this one). 1968 * 1969 * The datasv parameter (which may be NULL) can be used to pass 1970 * private data to this instance of the filter. The filter function 1971 * can recover the SV using the FILTER_DATA macro and use it to 1972 * store private buffers and state information. 1973 * 1974 * The supplied datasv parameter is upgraded to a PVIO type 1975 * and the IoDIRP/IoANY field is used to store the function pointer, 1976 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such. 1977 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for 1978 * private use must be set using malloc'd pointers. 1979 */ 1980 1981 SV * 1982 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) 1983 { 1984 if (!funcp) 1985 return Nullsv; 1986 1987 if (!PL_rsfp_filters) 1988 PL_rsfp_filters = newAV(); 1989 if (!datasv) 1990 datasv = NEWSV(255,0); 1991 if (!SvUPGRADE(datasv, SVt_PVIO)) 1992 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO"); 1993 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */ 1994 IoFLAGS(datasv) |= IOf_FAKE_DIRP; 1995 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n", 1996 (void*)funcp, SvPV_nolen(datasv))); 1997 av_unshift(PL_rsfp_filters, 1); 1998 av_store(PL_rsfp_filters, 0, datasv) ; 1999 return(datasv); 2000 } 2001 2002 2003 /* Delete most recently added instance of this filter function. */ 2004 void 2005 Perl_filter_del(pTHX_ filter_t funcp) 2006 { 2007 SV *datasv; 2008 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp)); 2009 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0) 2010 return; 2011 /* if filter is on top of stack (usual case) just pop it off */ 2012 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters)); 2013 if (IoANY(datasv) == (void *)funcp) { 2014 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP; 2015 IoANY(datasv) = (void *)NULL; 2016 sv_free(av_pop(PL_rsfp_filters)); 2017 2018 return; 2019 } 2020 /* we need to search for the correct entry and clear it */ 2021 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)"); 2022 } 2023 2024 2025 /* Invoke the n'th filter function for the current rsfp. */ 2026 I32 2027 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) 2028 2029 2030 /* 0 = read one text line */ 2031 { 2032 filter_t funcp; 2033 SV *datasv = NULL; 2034 2035 if (!PL_rsfp_filters) 2036 return -1; 2037 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */ 2038 /* Provide a default input filter to make life easy. */ 2039 /* Note that we append to the line. This is handy. */ 2040 DEBUG_P(PerlIO_printf(Perl_debug_log, 2041 "filter_read %d: from rsfp\n", idx)); 2042 if (maxlen) { 2043 /* Want a block */ 2044 int len ; 2045 int old_len = SvCUR(buf_sv) ; 2046 2047 /* ensure buf_sv is large enough */ 2048 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ; 2049 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){ 2050 if (PerlIO_error(PL_rsfp)) 2051 return -1; /* error */ 2052 else 2053 return 0 ; /* end of file */ 2054 } 2055 SvCUR_set(buf_sv, old_len + len) ; 2056 } else { 2057 /* Want a line */ 2058 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) { 2059 if (PerlIO_error(PL_rsfp)) 2060 return -1; /* error */ 2061 else 2062 return 0 ; /* end of file */ 2063 } 2064 } 2065 return SvCUR(buf_sv); 2066 } 2067 /* Skip this filter slot if filter has been deleted */ 2068 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){ 2069 DEBUG_P(PerlIO_printf(Perl_debug_log, 2070 "filter_read %d: skipped (filter deleted)\n", 2071 idx)); 2072 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */ 2073 } 2074 /* Get function pointer hidden within datasv */ 2075 funcp = (filter_t)IoANY(datasv); 2076 DEBUG_P(PerlIO_printf(Perl_debug_log, 2077 "filter_read %d: via function %p (%s)\n", 2078 idx, (void*)funcp, SvPV_nolen(datasv))); 2079 /* Call function. The function is expected to */ 2080 /* call "FILTER_READ(idx+1, buf_sv)" first. */ 2081 /* Return: <0:error, =0:eof, >0:not eof */ 2082 return (*funcp)(aTHX_ idx, buf_sv, maxlen); 2083 } 2084 2085 STATIC char * 2086 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append) 2087 { 2088 #ifdef PERL_CR_FILTER 2089 if (!PL_rsfp_filters) { 2090 filter_add(S_cr_textfilter,NULL); 2091 } 2092 #endif 2093 if (PL_rsfp_filters) { 2094 2095 if (!append) 2096 SvCUR_set(sv, 0); /* start with empty line */ 2097 if (FILTER_READ(0, sv, 0) > 0) 2098 return ( SvPVX(sv) ) ; 2099 else 2100 return Nullch ; 2101 } 2102 else 2103 return (sv_gets(sv, fp, append)); 2104 } 2105 2106 STATIC HV * 2107 S_find_in_my_stash(pTHX_ char *pkgname, I32 len) 2108 { 2109 GV *gv; 2110 2111 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__")) 2112 return PL_curstash; 2113 2114 if (len > 2 && 2115 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') && 2116 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV))) 2117 { 2118 return GvHV(gv); /* Foo:: */ 2119 } 2120 2121 /* use constant CLASS => 'MyClass' */ 2122 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) { 2123 SV *sv; 2124 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) { 2125 pkgname = SvPV_nolen(sv); 2126 } 2127 } 2128 2129 return gv_stashpv(pkgname, FALSE); 2130 } 2131 2132 #ifdef DEBUGGING 2133 static char* exp_name[] = 2134 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", 2135 "ATTRTERM", "TERMBLOCK" 2136 }; 2137 #endif 2138 2139 /* 2140 yylex 2141 2142 Works out what to call the token just pulled out of the input 2143 stream. The yacc parser takes care of taking the ops we return and 2144 stitching them into a tree. 2145 2146 Returns: 2147 PRIVATEREF 2148 2149 Structure: 2150 if read an identifier 2151 if we're in a my declaration 2152 croak if they tried to say my($foo::bar) 2153 build the ops for a my() declaration 2154 if it's an access to a my() variable 2155 are we in a sort block? 2156 croak if my($a); $a <=> $b 2157 build ops for access to a my() variable 2158 if in a dq string, and they've said @foo and we can't find @foo 2159 croak 2160 build ops for a bareword 2161 if we already built the token before, use it. 2162 */ 2163 2164 #ifdef USE_PURE_BISON 2165 int 2166 Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp) 2167 { 2168 int r; 2169 2170 yyactlevel++; 2171 yylval_pointer[yyactlevel] = lvalp; 2172 yychar_pointer[yyactlevel] = lcharp; 2173 if (yyactlevel >= YYMAXLEVEL) 2174 Perl_croak(aTHX_ "panic: YYMAXLEVEL"); 2175 2176 r = Perl_yylex(aTHX); 2177 2178 if (yyactlevel > 0) 2179 yyactlevel--; 2180 2181 return r; 2182 } 2183 #endif 2184 2185 #ifdef __SC__ 2186 #pragma segment Perl_yylex 2187 #endif 2188 int 2189 Perl_yylex(pTHX) 2190 { 2191 register char *s; 2192 register char *d; 2193 register I32 tmp; 2194 STRLEN len; 2195 GV *gv = Nullgv; 2196 GV **gvp = 0; 2197 bool bof = FALSE; 2198 2199 /* check if there's an identifier for us to look at */ 2200 if (PL_pending_ident) 2201 return S_pending_ident(aTHX); 2202 2203 /* no identifier pending identification */ 2204 2205 switch (PL_lex_state) { 2206 #ifdef COMMENTARY 2207 case LEX_NORMAL: /* Some compilers will produce faster */ 2208 case LEX_INTERPNORMAL: /* code if we comment these out. */ 2209 break; 2210 #endif 2211 2212 /* when we've already built the next token, just pull it out of the queue */ 2213 case LEX_KNOWNEXT: 2214 PL_nexttoke--; 2215 yylval = PL_nextval[PL_nexttoke]; 2216 if (!PL_nexttoke) { 2217 PL_lex_state = PL_lex_defer; 2218 PL_expect = PL_lex_expect; 2219 PL_lex_defer = LEX_NORMAL; 2220 } 2221 DEBUG_T({ PerlIO_printf(Perl_debug_log, 2222 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr, 2223 (IV)PL_nexttype[PL_nexttoke]); }); 2224 2225 return(PL_nexttype[PL_nexttoke]); 2226 2227 /* interpolated case modifiers like \L \U, including \Q and \E. 2228 when we get here, PL_bufptr is at the \ 2229 */ 2230 case LEX_INTERPCASEMOD: 2231 #ifdef DEBUGGING 2232 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\') 2233 Perl_croak(aTHX_ "panic: INTERPCASEMOD"); 2234 #endif 2235 /* handle \E or end of string */ 2236 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') { 2237 char oldmod; 2238 2239 /* if at a \E */ 2240 if (PL_lex_casemods) { 2241 oldmod = PL_lex_casestack[--PL_lex_casemods]; 2242 PL_lex_casestack[PL_lex_casemods] = '\0'; 2243 2244 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) { 2245 PL_bufptr += 2; 2246 PL_lex_state = LEX_INTERPCONCAT; 2247 } 2248 return ')'; 2249 } 2250 if (PL_bufptr != PL_bufend) 2251 PL_bufptr += 2; 2252 PL_lex_state = LEX_INTERPCONCAT; 2253 return yylex(); 2254 } 2255 else { 2256 DEBUG_T({ PerlIO_printf(Perl_debug_log, 2257 "### Saw case modifier at '%s'\n", PL_bufptr); }); 2258 s = PL_bufptr + 1; 2259 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) 2260 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ 2261 if (strchr("LU", *s) && 2262 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) 2263 { 2264 PL_lex_casestack[--PL_lex_casemods] = '\0'; 2265 return ')'; 2266 } 2267 if (PL_lex_casemods > 10) { 2268 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char); 2269 if (newlb != PL_lex_casestack) { 2270 SAVEFREEPV(newlb); 2271 PL_lex_casestack = newlb; 2272 } 2273 } 2274 PL_lex_casestack[PL_lex_casemods++] = *s; 2275 PL_lex_casestack[PL_lex_casemods] = '\0'; 2276 PL_lex_state = LEX_INTERPCONCAT; 2277 PL_nextval[PL_nexttoke].ival = 0; 2278 force_next('('); 2279 if (*s == 'l') 2280 PL_nextval[PL_nexttoke].ival = OP_LCFIRST; 2281 else if (*s == 'u') 2282 PL_nextval[PL_nexttoke].ival = OP_UCFIRST; 2283 else if (*s == 'L') 2284 PL_nextval[PL_nexttoke].ival = OP_LC; 2285 else if (*s == 'U') 2286 PL_nextval[PL_nexttoke].ival = OP_UC; 2287 else if (*s == 'Q') 2288 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA; 2289 else 2290 Perl_croak(aTHX_ "panic: yylex"); 2291 PL_bufptr = s + 1; 2292 force_next(FUNC); 2293 if (PL_lex_starts) { 2294 s = PL_bufptr; 2295 PL_lex_starts = 0; 2296 Aop(OP_CONCAT); 2297 } 2298 else 2299 return yylex(); 2300 } 2301 2302 case LEX_INTERPPUSH: 2303 return sublex_push(); 2304 2305 case LEX_INTERPSTART: 2306 if (PL_bufptr == PL_bufend) 2307 return sublex_done(); 2308 DEBUG_T({ PerlIO_printf(Perl_debug_log, 2309 "### Interpolated variable at '%s'\n", PL_bufptr); }); 2310 PL_expect = XTERM; 2311 PL_lex_dojoin = (*PL_bufptr == '@'); 2312 PL_lex_state = LEX_INTERPNORMAL; 2313 if (PL_lex_dojoin) { 2314 PL_nextval[PL_nexttoke].ival = 0; 2315 force_next(','); 2316 #ifdef USE_5005THREADS 2317 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0); 2318 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\""); 2319 force_next(PRIVATEREF); 2320 #else 2321 force_ident("\"", '$'); 2322 #endif /* USE_5005THREADS */ 2323 PL_nextval[PL_nexttoke].ival = 0; 2324 force_next('$'); 2325 PL_nextval[PL_nexttoke].ival = 0; 2326 force_next('('); 2327 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */ 2328 force_next(FUNC); 2329 } 2330 if (PL_lex_starts++) { 2331 s = PL_bufptr; 2332 Aop(OP_CONCAT); 2333 } 2334 return yylex(); 2335 2336 case LEX_INTERPENDMAYBE: 2337 if (intuit_more(PL_bufptr)) { 2338 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */ 2339 break; 2340 } 2341 /* FALL THROUGH */ 2342 2343 case LEX_INTERPEND: 2344 if (PL_lex_dojoin) { 2345 PL_lex_dojoin = FALSE; 2346 PL_lex_state = LEX_INTERPCONCAT; 2347 return ')'; 2348 } 2349 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl 2350 && SvEVALED(PL_lex_repl)) 2351 { 2352 if (PL_bufptr != PL_bufend) 2353 Perl_croak(aTHX_ "Bad evalled substitution pattern"); 2354 PL_lex_repl = Nullsv; 2355 } 2356 /* FALLTHROUGH */ 2357 case LEX_INTERPCONCAT: 2358 #ifdef DEBUGGING 2359 if (PL_lex_brackets) 2360 Perl_croak(aTHX_ "panic: INTERPCONCAT"); 2361 #endif 2362 if (PL_bufptr == PL_bufend) 2363 return sublex_done(); 2364 2365 if (SvIVX(PL_linestr) == '\'') { 2366 SV *sv = newSVsv(PL_linestr); 2367 if (!PL_lex_inpat) 2368 sv = tokeq(sv); 2369 else if ( PL_hints & HINT_NEW_RE ) 2370 sv = new_constant(NULL, 0, "qr", sv, sv, "q"); 2371 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 2372 s = PL_bufend; 2373 } 2374 else { 2375 s = scan_const(PL_bufptr); 2376 if (*s == '\\') 2377 PL_lex_state = LEX_INTERPCASEMOD; 2378 else 2379 PL_lex_state = LEX_INTERPSTART; 2380 } 2381 2382 if (s != PL_bufptr) { 2383 PL_nextval[PL_nexttoke] = yylval; 2384 PL_expect = XTERM; 2385 force_next(THING); 2386 if (PL_lex_starts++) 2387 Aop(OP_CONCAT); 2388 else { 2389 PL_bufptr = s; 2390 return yylex(); 2391 } 2392 } 2393 2394 return yylex(); 2395 case LEX_FORMLINE: 2396 PL_lex_state = LEX_NORMAL; 2397 s = scan_formline(PL_bufptr); 2398 if (!PL_lex_formbrack) 2399 goto rightbracket; 2400 OPERATOR(';'); 2401 } 2402 2403 s = PL_bufptr; 2404 PL_oldoldbufptr = PL_oldbufptr; 2405 PL_oldbufptr = s; 2406 DEBUG_T( { 2407 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n", 2408 exp_name[PL_expect], s); 2409 } ); 2410 2411 retry: 2412 switch (*s) { 2413 default: 2414 if (isIDFIRST_lazy_if(s,UTF)) 2415 goto keylookup; 2416 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255); 2417 case 4: 2418 case 26: 2419 goto fake_eof; /* emulate EOF on ^D or ^Z */ 2420 case 0: 2421 if (!PL_rsfp) { 2422 PL_last_uni = 0; 2423 PL_last_lop = 0; 2424 if (PL_lex_brackets) 2425 yyerror("Missing right curly or square bracket"); 2426 DEBUG_T( { PerlIO_printf(Perl_debug_log, 2427 "### Tokener got EOF\n"); 2428 } ); 2429 TOKEN(0); 2430 } 2431 if (s++ < PL_bufend) 2432 goto retry; /* ignore stray nulls */ 2433 PL_last_uni = 0; 2434 PL_last_lop = 0; 2435 if (!PL_in_eval && !PL_preambled) { 2436 PL_preambled = TRUE; 2437 sv_setpv(PL_linestr,incl_perldb()); 2438 if (SvCUR(PL_linestr)) 2439 sv_catpv(PL_linestr,";"); 2440 if (PL_preambleav){ 2441 while(AvFILLp(PL_preambleav) >= 0) { 2442 SV *tmpsv = av_shift(PL_preambleav); 2443 sv_catsv(PL_linestr, tmpsv); 2444 sv_catpv(PL_linestr, ";"); 2445 sv_free(tmpsv); 2446 } 2447 sv_free((SV*)PL_preambleav); 2448 PL_preambleav = NULL; 2449 } 2450 if (PL_minus_n || PL_minus_p) { 2451 sv_catpv(PL_linestr, "LINE: while (<>) {"); 2452 if (PL_minus_l) 2453 sv_catpv(PL_linestr,"chomp;"); 2454 if (PL_minus_a) { 2455 if (PL_minus_F) { 2456 if (strchr("/'\"", *PL_splitstr) 2457 && strchr(PL_splitstr + 1, *PL_splitstr)) 2458 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); 2459 else { 2460 char delim; 2461 s = "'~#\200\1'"; /* surely one char is unused...*/ 2462 while (s[1] && strchr(PL_splitstr, *s)) s++; 2463 delim = *s; 2464 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c", 2465 "q" + (delim == '\''), delim); 2466 for (s = PL_splitstr; *s; s++) { 2467 if (*s == '\\') 2468 sv_catpvn(PL_linestr, "\\", 1); 2469 sv_catpvn(PL_linestr, s, 1); 2470 } 2471 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim); 2472 } 2473 } 2474 else 2475 sv_catpv(PL_linestr,"our @F=split(' ');"); 2476 } 2477 } 2478 sv_catpv(PL_linestr, "\n"); 2479 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 2480 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 2481 PL_last_lop = PL_last_uni = Nullch; 2482 if (PERLDB_LINE && PL_curstash != PL_debstash) { 2483 SV *sv = NEWSV(85,0); 2484 2485 sv_upgrade(sv, SVt_PVMG); 2486 sv_setsv(sv,PL_linestr); 2487 (void)SvIOK_on(sv); 2488 SvIVX(sv) = 0; 2489 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv); 2490 } 2491 goto retry; 2492 } 2493 do { 2494 bof = PL_rsfp ? TRUE : FALSE; 2495 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) { 2496 fake_eof: 2497 if (PL_rsfp) { 2498 if (PL_preprocess && !PL_in_eval) 2499 (void)PerlProc_pclose(PL_rsfp); 2500 else if ((PerlIO *)PL_rsfp == PerlIO_stdin()) 2501 PerlIO_clearerr(PL_rsfp); 2502 else 2503 (void)PerlIO_close(PL_rsfp); 2504 PL_rsfp = Nullfp; 2505 PL_doextract = FALSE; 2506 } 2507 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) { 2508 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : ""); 2509 sv_catpv(PL_linestr,";}"); 2510 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 2511 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 2512 PL_last_lop = PL_last_uni = Nullch; 2513 PL_minus_n = PL_minus_p = 0; 2514 goto retry; 2515 } 2516 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 2517 PL_last_lop = PL_last_uni = Nullch; 2518 sv_setpv(PL_linestr,""); 2519 TOKEN(';'); /* not infinite loop because rsfp is NULL now */ 2520 } 2521 /* if it looks like the start of a BOM, check if it in fact is */ 2522 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) { 2523 #ifdef PERLIO_IS_STDIO 2524 # ifdef __GNU_LIBRARY__ 2525 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */ 2526 # define FTELL_FOR_PIPE_IS_BROKEN 2527 # endif 2528 # else 2529 # ifdef __GLIBC__ 2530 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */ 2531 # define FTELL_FOR_PIPE_IS_BROKEN 2532 # endif 2533 # endif 2534 # endif 2535 #endif 2536 #ifdef FTELL_FOR_PIPE_IS_BROKEN 2537 /* This loses the possibility to detect the bof 2538 * situation on perl -P when the libc5 is being used. 2539 * Workaround? Maybe attach some extra state to PL_rsfp? 2540 */ 2541 if (!PL_preprocess) 2542 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr); 2543 #else 2544 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr); 2545 #endif 2546 if (bof) { 2547 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 2548 s = swallow_bom((U8*)s); 2549 } 2550 } 2551 if (PL_doextract) { 2552 /* Incest with pod. */ 2553 if (*s == '=' && strnEQ(s, "=cut", 4)) { 2554 sv_setpv(PL_linestr, ""); 2555 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 2556 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 2557 PL_last_lop = PL_last_uni = Nullch; 2558 PL_doextract = FALSE; 2559 } 2560 } 2561 incline(s); 2562 } while (PL_doextract); 2563 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; 2564 if (PERLDB_LINE && PL_curstash != PL_debstash) { 2565 SV *sv = NEWSV(85,0); 2566 2567 sv_upgrade(sv, SVt_PVMG); 2568 sv_setsv(sv,PL_linestr); 2569 (void)SvIOK_on(sv); 2570 SvIVX(sv) = 0; 2571 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv); 2572 } 2573 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 2574 PL_last_lop = PL_last_uni = Nullch; 2575 if (CopLINE(PL_curcop) == 1) { 2576 while (s < PL_bufend && isSPACE(*s)) 2577 s++; 2578 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */ 2579 s++; 2580 d = Nullch; 2581 if (!PL_in_eval) { 2582 if (*s == '#' && *(s+1) == '!') 2583 d = s + 2; 2584 #ifdef ALTERNATE_SHEBANG 2585 else { 2586 static char as[] = ALTERNATE_SHEBANG; 2587 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1)) 2588 d = s + (sizeof(as) - 1); 2589 } 2590 #endif /* ALTERNATE_SHEBANG */ 2591 } 2592 if (d) { 2593 char *ipath; 2594 char *ipathend; 2595 2596 while (isSPACE(*d)) 2597 d++; 2598 ipath = d; 2599 while (*d && !isSPACE(*d)) 2600 d++; 2601 ipathend = d; 2602 2603 #ifdef ARG_ZERO_IS_SCRIPT 2604 if (ipathend > ipath) { 2605 /* 2606 * HP-UX (at least) sets argv[0] to the script name, 2607 * which makes $^X incorrect. And Digital UNIX and Linux, 2608 * at least, set argv[0] to the basename of the Perl 2609 * interpreter. So, having found "#!", we'll set it right. 2610 */ 2611 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */ 2612 assert(SvPOK(x) || SvGMAGICAL(x)); 2613 if (sv_eq(x, CopFILESV(PL_curcop))) { 2614 sv_setpvn(x, ipath, ipathend - ipath); 2615 SvSETMAGIC(x); 2616 } 2617 else { 2618 STRLEN blen; 2619 STRLEN llen; 2620 char *bstart = SvPV(CopFILESV(PL_curcop),blen); 2621 char *lstart = SvPV(x,llen); 2622 if (llen < blen) { 2623 bstart += blen - llen; 2624 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') { 2625 sv_setpvn(x, ipath, ipathend - ipath); 2626 SvSETMAGIC(x); 2627 } 2628 } 2629 } 2630 TAINT_NOT; /* $^X is always tainted, but that's OK */ 2631 } 2632 #endif /* ARG_ZERO_IS_SCRIPT */ 2633 2634 /* 2635 * Look for options. 2636 */ 2637 d = instr(s,"perl -"); 2638 if (!d) { 2639 d = instr(s,"perl"); 2640 #if defined(DOSISH) 2641 /* avoid getting into infinite loops when shebang 2642 * line contains "Perl" rather than "perl" */ 2643 if (!d) { 2644 for (d = ipathend-4; d >= ipath; --d) { 2645 if ((*d == 'p' || *d == 'P') 2646 && !ibcmp(d, "perl", 4)) 2647 { 2648 break; 2649 } 2650 } 2651 if (d < ipath) 2652 d = Nullch; 2653 } 2654 #endif 2655 } 2656 #ifdef ALTERNATE_SHEBANG 2657 /* 2658 * If the ALTERNATE_SHEBANG on this system starts with a 2659 * character that can be part of a Perl expression, then if 2660 * we see it but not "perl", we're probably looking at the 2661 * start of Perl code, not a request to hand off to some 2662 * other interpreter. Similarly, if "perl" is there, but 2663 * not in the first 'word' of the line, we assume the line 2664 * contains the start of the Perl program. 2665 */ 2666 if (d && *s != '#') { 2667 char *c = ipath; 2668 while (*c && !strchr("; \t\r\n\f\v#", *c)) 2669 c++; 2670 if (c < d) 2671 d = Nullch; /* "perl" not in first word; ignore */ 2672 else 2673 *s = '#'; /* Don't try to parse shebang line */ 2674 } 2675 #endif /* ALTERNATE_SHEBANG */ 2676 #ifndef MACOS_TRADITIONAL 2677 if (!d && 2678 *s == '#' && 2679 ipathend > ipath && 2680 !PL_minus_c && 2681 !instr(s,"indir") && 2682 instr(PL_origargv[0],"perl")) 2683 { 2684 char **newargv; 2685 2686 *ipathend = '\0'; 2687 s = ipathend + 1; 2688 while (s < PL_bufend && isSPACE(*s)) 2689 s++; 2690 if (s < PL_bufend) { 2691 Newz(899,newargv,PL_origargc+3,char*); 2692 newargv[1] = s; 2693 while (s < PL_bufend && !isSPACE(*s)) 2694 s++; 2695 *s = '\0'; 2696 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*); 2697 } 2698 else 2699 newargv = PL_origargv; 2700 newargv[0] = ipath; 2701 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv)); 2702 Perl_croak(aTHX_ "Can't exec %s", ipath); 2703 } 2704 #endif 2705 if (d) { 2706 U32 oldpdb = PL_perldb; 2707 bool oldn = PL_minus_n; 2708 bool oldp = PL_minus_p; 2709 2710 while (*d && !isSPACE(*d)) d++; 2711 while (SPACE_OR_TAB(*d)) d++; 2712 2713 if (*d++ == '-') { 2714 bool switches_done = PL_doswitches; 2715 do { 2716 if (*d == 'M' || *d == 'm') { 2717 char *m = d; 2718 while (*d && !isSPACE(*d)) d++; 2719 Perl_croak(aTHX_ "Too late for \"-%.*s\" option", 2720 (int)(d - m), m); 2721 } 2722 d = moreswitches(d); 2723 } while (d); 2724 if ((PERLDB_LINE && !oldpdb) || 2725 ((PL_minus_n || PL_minus_p) && !(oldn || oldp))) 2726 /* if we have already added "LINE: while (<>) {", 2727 we must not do it again */ 2728 { 2729 sv_setpv(PL_linestr, ""); 2730 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 2731 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 2732 PL_last_lop = PL_last_uni = Nullch; 2733 PL_preambled = FALSE; 2734 if (PERLDB_LINE) 2735 (void)gv_fetchfile(PL_origfilename); 2736 goto retry; 2737 } 2738 if (PL_doswitches && !switches_done) { 2739 int argc = PL_origargc; 2740 char **argv = PL_origargv; 2741 do { 2742 argc--,argv++; 2743 } while (argc && argv[0][0] == '-' && argv[0][1]); 2744 init_argv_symbols(argc,argv); 2745 } 2746 } 2747 } 2748 } 2749 } 2750 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 2751 PL_bufptr = s; 2752 PL_lex_state = LEX_FORMLINE; 2753 return yylex(); 2754 } 2755 goto retry; 2756 case '\r': 2757 #ifdef PERL_STRICT_CR 2758 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); 2759 Perl_croak(aTHX_ 2760 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); 2761 #endif 2762 case ' ': case '\t': case '\f': case 013: 2763 #ifdef MACOS_TRADITIONAL 2764 case '\312': 2765 #endif 2766 s++; 2767 goto retry; 2768 case '#': 2769 case '\n': 2770 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) { 2771 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) { 2772 /* handle eval qq[#line 1 "foo"\n ...] */ 2773 CopLINE_dec(PL_curcop); 2774 incline(s); 2775 } 2776 d = PL_bufend; 2777 while (s < d && *s != '\n') 2778 s++; 2779 if (s < d) 2780 s++; 2781 else if (s > d) /* Found by Ilya: feed random input to Perl. */ 2782 Perl_croak(aTHX_ "panic: input overflow"); 2783 incline(s); 2784 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 2785 PL_bufptr = s; 2786 PL_lex_state = LEX_FORMLINE; 2787 return yylex(); 2788 } 2789 } 2790 else { 2791 *s = '\0'; 2792 PL_bufend = s; 2793 } 2794 goto retry; 2795 case '-': 2796 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) { 2797 I32 ftst = 0; 2798 2799 s++; 2800 PL_bufptr = s; 2801 tmp = *s++; 2802 2803 while (s < PL_bufend && SPACE_OR_TAB(*s)) 2804 s++; 2805 2806 if (strnEQ(s,"=>",2)) { 2807 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE); 2808 DEBUG_T( { PerlIO_printf(Perl_debug_log, 2809 "### Saw unary minus before =>, forcing word '%s'\n", s); 2810 } ); 2811 OPERATOR('-'); /* unary minus */ 2812 } 2813 PL_last_uni = PL_oldbufptr; 2814 switch (tmp) { 2815 case 'r': ftst = OP_FTEREAD; break; 2816 case 'w': ftst = OP_FTEWRITE; break; 2817 case 'x': ftst = OP_FTEEXEC; break; 2818 case 'o': ftst = OP_FTEOWNED; break; 2819 case 'R': ftst = OP_FTRREAD; break; 2820 case 'W': ftst = OP_FTRWRITE; break; 2821 case 'X': ftst = OP_FTREXEC; break; 2822 case 'O': ftst = OP_FTROWNED; break; 2823 case 'e': ftst = OP_FTIS; break; 2824 case 'z': ftst = OP_FTZERO; break; 2825 case 's': ftst = OP_FTSIZE; break; 2826 case 'f': ftst = OP_FTFILE; break; 2827 case 'd': ftst = OP_FTDIR; break; 2828 case 'l': ftst = OP_FTLINK; break; 2829 case 'p': ftst = OP_FTPIPE; break; 2830 case 'S': ftst = OP_FTSOCK; break; 2831 case 'u': ftst = OP_FTSUID; break; 2832 case 'g': ftst = OP_FTSGID; break; 2833 case 'k': ftst = OP_FTSVTX; break; 2834 case 'b': ftst = OP_FTBLK; break; 2835 case 'c': ftst = OP_FTCHR; break; 2836 case 't': ftst = OP_FTTTY; break; 2837 case 'T': ftst = OP_FTTEXT; break; 2838 case 'B': ftst = OP_FTBINARY; break; 2839 case 'M': case 'A': case 'C': 2840 gv_fetchpv("\024",TRUE, SVt_PV); 2841 switch (tmp) { 2842 case 'M': ftst = OP_FTMTIME; break; 2843 case 'A': ftst = OP_FTATIME; break; 2844 case 'C': ftst = OP_FTCTIME; break; 2845 default: break; 2846 } 2847 break; 2848 default: 2849 break; 2850 } 2851 if (ftst) { 2852 PL_last_lop_op = (OPCODE)ftst; 2853 DEBUG_T( { PerlIO_printf(Perl_debug_log, 2854 "### Saw file test %c\n", (int)ftst); 2855 } ); 2856 FTST(ftst); 2857 } 2858 else { 2859 /* Assume it was a minus followed by a one-letter named 2860 * subroutine call (or a -bareword), then. */ 2861 DEBUG_T( { PerlIO_printf(Perl_debug_log, 2862 "### %c looked like a file test but was not\n", 2863 (int)ftst); 2864 } ); 2865 s -= 2; 2866 } 2867 } 2868 tmp = *s++; 2869 if (*s == tmp) { 2870 s++; 2871 if (PL_expect == XOPERATOR) 2872 TERM(POSTDEC); 2873 else 2874 OPERATOR(PREDEC); 2875 } 2876 else if (*s == '>') { 2877 s++; 2878 s = skipspace(s); 2879 if (isIDFIRST_lazy_if(s,UTF)) { 2880 s = force_word(s,METHOD,FALSE,TRUE,FALSE); 2881 TOKEN(ARROW); 2882 } 2883 else if (*s == '$') 2884 OPERATOR(ARROW); 2885 else 2886 TERM(ARROW); 2887 } 2888 if (PL_expect == XOPERATOR) 2889 Aop(OP_SUBTRACT); 2890 else { 2891 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 2892 check_uni(); 2893 OPERATOR('-'); /* unary minus */ 2894 } 2895 2896 case '+': 2897 tmp = *s++; 2898 if (*s == tmp) { 2899 s++; 2900 if (PL_expect == XOPERATOR) 2901 TERM(POSTINC); 2902 else 2903 OPERATOR(PREINC); 2904 } 2905 if (PL_expect == XOPERATOR) 2906 Aop(OP_ADD); 2907 else { 2908 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 2909 check_uni(); 2910 OPERATOR('+'); 2911 } 2912 2913 case '*': 2914 if (PL_expect != XOPERATOR) { 2915 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE); 2916 PL_expect = XOPERATOR; 2917 force_ident(PL_tokenbuf, '*'); 2918 if (!*PL_tokenbuf) 2919 PREREF('*'); 2920 TERM('*'); 2921 } 2922 s++; 2923 if (*s == '*') { 2924 s++; 2925 PWop(OP_POW); 2926 } 2927 Mop(OP_MULTIPLY); 2928 2929 case '%': 2930 if (PL_expect == XOPERATOR) { 2931 ++s; 2932 Mop(OP_MODULO); 2933 } 2934 PL_tokenbuf[0] = '%'; 2935 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE); 2936 if (!PL_tokenbuf[1]) { 2937 if (s == PL_bufend) 2938 yyerror("Final % should be \\% or %name"); 2939 PREREF('%'); 2940 } 2941 PL_pending_ident = '%'; 2942 TERM('%'); 2943 2944 case '^': 2945 s++; 2946 BOop(OP_BIT_XOR); 2947 case '[': 2948 PL_lex_brackets++; 2949 /* FALL THROUGH */ 2950 case '~': 2951 case ',': 2952 tmp = *s++; 2953 OPERATOR(tmp); 2954 case ':': 2955 if (s[1] == ':') { 2956 len = 0; 2957 goto just_a_word; 2958 } 2959 s++; 2960 switch (PL_expect) { 2961 OP *attrs; 2962 case XOPERATOR: 2963 if (!PL_in_my || PL_lex_state != LEX_NORMAL) 2964 break; 2965 PL_bufptr = s; /* update in case we back off */ 2966 goto grabattrs; 2967 case XATTRBLOCK: 2968 PL_expect = XBLOCK; 2969 goto grabattrs; 2970 case XATTRTERM: 2971 PL_expect = XTERMBLOCK; 2972 grabattrs: 2973 s = skipspace(s); 2974 attrs = Nullop; 2975 while (isIDFIRST_lazy_if(s,UTF)) { 2976 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 2977 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) { 2978 if (tmp < 0) tmp = -tmp; 2979 switch (tmp) { 2980 case KEY_or: 2981 case KEY_and: 2982 case KEY_for: 2983 case KEY_unless: 2984 case KEY_if: 2985 case KEY_while: 2986 case KEY_until: 2987 goto got_attrs; 2988 default: 2989 break; 2990 } 2991 } 2992 if (*d == '(') { 2993 d = scan_str(d,TRUE,TRUE); 2994 if (!d) { 2995 /* MUST advance bufptr here to avoid bogus 2996 "at end of line" context messages from yyerror(). 2997 */ 2998 PL_bufptr = s + len; 2999 yyerror("Unterminated attribute parameter in attribute list"); 3000 if (attrs) 3001 op_free(attrs); 3002 return 0; /* EOF indicator */ 3003 } 3004 } 3005 if (PL_lex_stuff) { 3006 SV *sv = newSVpvn(s, len); 3007 sv_catsv(sv, PL_lex_stuff); 3008 attrs = append_elem(OP_LIST, attrs, 3009 newSVOP(OP_CONST, 0, sv)); 3010 SvREFCNT_dec(PL_lex_stuff); 3011 PL_lex_stuff = Nullsv; 3012 } 3013 else { 3014 /* NOTE: any CV attrs applied here need to be part of 3015 the CVf_BUILTIN_ATTRS define in cv.h! */ 3016 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len)) 3017 CvLVALUE_on(PL_compcv); 3018 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len)) 3019 CvLOCKED_on(PL_compcv); 3020 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len)) 3021 CvMETHOD_on(PL_compcv); 3022 #ifdef USE_ITHREADS 3023 else if (PL_in_my == KEY_our && len == 6 && 3024 strnEQ(s, "unique", len)) 3025 GvUNIQUE_on(cGVOPx_gv(yylval.opval)); 3026 #endif 3027 /* After we've set the flags, it could be argued that 3028 we don't need to do the attributes.pm-based setting 3029 process, and shouldn't bother appending recognized 3030 flags. To experiment with that, uncomment the 3031 following "else". (Note that's already been 3032 uncommented. That keeps the above-applied built-in 3033 attributes from being intercepted (and possibly 3034 rejected) by a package's attribute routines, but is 3035 justified by the performance win for the common case 3036 of applying only built-in attributes.) */ 3037 else 3038 attrs = append_elem(OP_LIST, attrs, 3039 newSVOP(OP_CONST, 0, 3040 newSVpvn(s, len))); 3041 } 3042 s = skipspace(d); 3043 if (*s == ':' && s[1] != ':') 3044 s = skipspace(s+1); 3045 else if (s == d) 3046 break; /* require real whitespace or :'s */ 3047 } 3048 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */ 3049 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) { 3050 char q = ((*s == '\'') ? '"' : '\''); 3051 /* If here for an expression, and parsed no attrs, back off. */ 3052 if (tmp == '=' && !attrs) { 3053 s = PL_bufptr; 3054 break; 3055 } 3056 /* MUST advance bufptr here to avoid bogus "at end of line" 3057 context messages from yyerror(). 3058 */ 3059 PL_bufptr = s; 3060 if (!*s) 3061 yyerror("Unterminated attribute list"); 3062 else 3063 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list", 3064 q, *s, q)); 3065 if (attrs) 3066 op_free(attrs); 3067 OPERATOR(':'); 3068 } 3069 got_attrs: 3070 if (attrs) { 3071 PL_nextval[PL_nexttoke].opval = attrs; 3072 force_next(THING); 3073 } 3074 TOKEN(COLONATTR); 3075 } 3076 OPERATOR(':'); 3077 case '(': 3078 s++; 3079 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr) 3080 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */ 3081 else 3082 PL_expect = XTERM; 3083 TOKEN('('); 3084 case ';': 3085 CLINE; 3086 tmp = *s++; 3087 OPERATOR(tmp); 3088 case ')': 3089 tmp = *s++; 3090 s = skipspace(s); 3091 if (*s == '{') 3092 PREBLOCK(tmp); 3093 TERM(tmp); 3094 case ']': 3095 s++; 3096 if (PL_lex_brackets <= 0) 3097 yyerror("Unmatched right square bracket"); 3098 else 3099 --PL_lex_brackets; 3100 if (PL_lex_state == LEX_INTERPNORMAL) { 3101 if (PL_lex_brackets == 0) { 3102 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>')) 3103 PL_lex_state = LEX_INTERPEND; 3104 } 3105 } 3106 TERM(']'); 3107 case '{': 3108 leftbracket: 3109 s++; 3110 if (PL_lex_brackets > 100) { 3111 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char); 3112 if (newlb != PL_lex_brackstack) { 3113 SAVEFREEPV(newlb); 3114 PL_lex_brackstack = newlb; 3115 } 3116 } 3117 switch (PL_expect) { 3118 case XTERM: 3119 if (PL_lex_formbrack) { 3120 s--; 3121 PRETERMBLOCK(DO); 3122 } 3123 if (PL_oldoldbufptr == PL_last_lop) 3124 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 3125 else 3126 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 3127 OPERATOR(HASHBRACK); 3128 case XOPERATOR: 3129 while (s < PL_bufend && SPACE_OR_TAB(*s)) 3130 s++; 3131 d = s; 3132 PL_tokenbuf[0] = '\0'; 3133 if (d < PL_bufend && *d == '-') { 3134 PL_tokenbuf[0] = '-'; 3135 d++; 3136 while (d < PL_bufend && SPACE_OR_TAB(*d)) 3137 d++; 3138 } 3139 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) { 3140 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 3141 FALSE, &len); 3142 while (d < PL_bufend && SPACE_OR_TAB(*d)) 3143 d++; 3144 if (*d == '}') { 3145 char minus = (PL_tokenbuf[0] == '-'); 3146 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE); 3147 if (minus) 3148 force_next('-'); 3149 } 3150 } 3151 /* FALL THROUGH */ 3152 case XATTRBLOCK: 3153 case XBLOCK: 3154 PL_lex_brackstack[PL_lex_brackets++] = XSTATE; 3155 PL_expect = XSTATE; 3156 break; 3157 case XATTRTERM: 3158 case XTERMBLOCK: 3159 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 3160 PL_expect = XSTATE; 3161 break; 3162 default: { 3163 char *t; 3164 if (PL_oldoldbufptr == PL_last_lop) 3165 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 3166 else 3167 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 3168 s = skipspace(s); 3169 if (*s == '}') { 3170 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) { 3171 PL_expect = XTERM; 3172 /* This hack is to get the ${} in the message. */ 3173 PL_bufptr = s+1; 3174 yyerror("syntax error"); 3175 break; 3176 } 3177 OPERATOR(HASHBRACK); 3178 } 3179 /* This hack serves to disambiguate a pair of curlies 3180 * as being a block or an anon hash. Normally, expectation 3181 * determines that, but in cases where we're not in a 3182 * position to expect anything in particular (like inside 3183 * eval"") we have to resolve the ambiguity. This code 3184 * covers the case where the first term in the curlies is a 3185 * quoted string. Most other cases need to be explicitly 3186 * disambiguated by prepending a `+' before the opening 3187 * curly in order to force resolution as an anon hash. 3188 * 3189 * XXX should probably propagate the outer expectation 3190 * into eval"" to rely less on this hack, but that could 3191 * potentially break current behavior of eval"". 3192 * GSAR 97-07-21 3193 */ 3194 t = s; 3195 if (*s == '\'' || *s == '"' || *s == '`') { 3196 /* common case: get past first string, handling escapes */ 3197 for (t++; t < PL_bufend && *t != *s;) 3198 if (*t++ == '\\' && (*t == '\\' || *t == *s)) 3199 t++; 3200 t++; 3201 } 3202 else if (*s == 'q') { 3203 if (++t < PL_bufend 3204 && (!isALNUM(*t) 3205 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend 3206 && !isALNUM(*t)))) 3207 { 3208 char *tmps; 3209 char open, close, term; 3210 I32 brackets = 1; 3211 3212 while (t < PL_bufend && isSPACE(*t)) 3213 t++; 3214 term = *t; 3215 open = term; 3216 if (term && (tmps = strchr("([{< )]}> )]}>",term))) 3217 term = tmps[5]; 3218 close = term; 3219 if (open == close) 3220 for (t++; t < PL_bufend; t++) { 3221 if (*t == '\\' && t+1 < PL_bufend && open != '\\') 3222 t++; 3223 else if (*t == open) 3224 break; 3225 } 3226 else 3227 for (t++; t < PL_bufend; t++) { 3228 if (*t == '\\' && t+1 < PL_bufend) 3229 t++; 3230 else if (*t == close && --brackets <= 0) 3231 break; 3232 else if (*t == open) 3233 brackets++; 3234 } 3235 } 3236 t++; 3237 } 3238 else if (isALNUM_lazy_if(t,UTF)) { 3239 t += UTF8SKIP(t); 3240 while (t < PL_bufend && isALNUM_lazy_if(t,UTF)) 3241 t += UTF8SKIP(t); 3242 } 3243 while (t < PL_bufend && isSPACE(*t)) 3244 t++; 3245 /* if comma follows first term, call it an anon hash */ 3246 /* XXX it could be a comma expression with loop modifiers */ 3247 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s))) 3248 || (*t == '=' && t[1] == '>'))) 3249 OPERATOR(HASHBRACK); 3250 if (PL_expect == XREF) 3251 PL_expect = XTERM; 3252 else { 3253 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE; 3254 PL_expect = XSTATE; 3255 } 3256 } 3257 break; 3258 } 3259 yylval.ival = CopLINE(PL_curcop); 3260 if (isSPACE(*s) || *s == '#') 3261 PL_copline = NOLINE; /* invalidate current command line number */ 3262 TOKEN('{'); 3263 case '}': 3264 rightbracket: 3265 s++; 3266 if (PL_lex_brackets <= 0) 3267 yyerror("Unmatched right curly bracket"); 3268 else 3269 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets]; 3270 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL) 3271 PL_lex_formbrack = 0; 3272 if (PL_lex_state == LEX_INTERPNORMAL) { 3273 if (PL_lex_brackets == 0) { 3274 if (PL_expect & XFAKEBRACK) { 3275 PL_expect &= XENUMMASK; 3276 PL_lex_state = LEX_INTERPEND; 3277 PL_bufptr = s; 3278 return yylex(); /* ignore fake brackets */ 3279 } 3280 if (*s == '-' && s[1] == '>') 3281 PL_lex_state = LEX_INTERPENDMAYBE; 3282 else if (*s != '[' && *s != '{') 3283 PL_lex_state = LEX_INTERPEND; 3284 } 3285 } 3286 if (PL_expect & XFAKEBRACK) { 3287 PL_expect &= XENUMMASK; 3288 PL_bufptr = s; 3289 return yylex(); /* ignore fake brackets */ 3290 } 3291 force_next('}'); 3292 TOKEN(';'); 3293 case '&': 3294 s++; 3295 tmp = *s++; 3296 if (tmp == '&') 3297 AOPERATOR(ANDAND); 3298 s--; 3299 if (PL_expect == XOPERATOR) { 3300 if (ckWARN(WARN_SEMICOLON) 3301 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart) 3302 { 3303 CopLINE_dec(PL_curcop); 3304 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi); 3305 CopLINE_inc(PL_curcop); 3306 } 3307 BAop(OP_BIT_AND); 3308 } 3309 3310 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE); 3311 if (*PL_tokenbuf) { 3312 PL_expect = XOPERATOR; 3313 force_ident(PL_tokenbuf, '&'); 3314 } 3315 else 3316 PREREF('&'); 3317 yylval.ival = (OPpENTERSUB_AMPER<<8); 3318 TERM('&'); 3319 3320 case '|': 3321 s++; 3322 tmp = *s++; 3323 if (tmp == '|') 3324 AOPERATOR(OROR); 3325 s--; 3326 BOop(OP_BIT_OR); 3327 case '=': 3328 s++; 3329 tmp = *s++; 3330 if (tmp == '=') 3331 Eop(OP_EQ); 3332 if (tmp == '>') 3333 OPERATOR(','); 3334 if (tmp == '~') 3335 PMop(OP_MATCH); 3336 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp)) 3337 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp); 3338 s--; 3339 if (PL_expect == XSTATE && isALPHA(tmp) && 3340 (s == PL_linestart+1 || s[-2] == '\n') ) 3341 { 3342 if (PL_in_eval && !PL_rsfp) { 3343 d = PL_bufend; 3344 while (s < d) { 3345 if (*s++ == '\n') { 3346 incline(s); 3347 if (strnEQ(s,"=cut",4)) { 3348 s = strchr(s,'\n'); 3349 if (s) 3350 s++; 3351 else 3352 s = d; 3353 incline(s); 3354 goto retry; 3355 } 3356 } 3357 } 3358 goto retry; 3359 } 3360 s = PL_bufend; 3361 PL_doextract = TRUE; 3362 goto retry; 3363 } 3364 if (PL_lex_brackets < PL_lex_formbrack) { 3365 char *t; 3366 #ifdef PERL_STRICT_CR 3367 for (t = s; SPACE_OR_TAB(*t); t++) ; 3368 #else 3369 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ; 3370 #endif 3371 if (*t == '\n' || *t == '#') { 3372 s--; 3373 PL_expect = XBLOCK; 3374 goto leftbracket; 3375 } 3376 } 3377 yylval.ival = 0; 3378 OPERATOR(ASSIGNOP); 3379 case '!': 3380 s++; 3381 tmp = *s++; 3382 if (tmp == '=') 3383 Eop(OP_NE); 3384 if (tmp == '~') 3385 PMop(OP_NOT); 3386 s--; 3387 OPERATOR('!'); 3388 case '<': 3389 if (PL_expect != XOPERATOR) { 3390 if (s[1] != '<' && !strchr(s,'>')) 3391 check_uni(); 3392 if (s[1] == '<') 3393 s = scan_heredoc(s); 3394 else 3395 s = scan_inputsymbol(s); 3396 TERM(sublex_start()); 3397 } 3398 s++; 3399 tmp = *s++; 3400 if (tmp == '<') 3401 SHop(OP_LEFT_SHIFT); 3402 if (tmp == '=') { 3403 tmp = *s++; 3404 if (tmp == '>') 3405 Eop(OP_NCMP); 3406 s--; 3407 Rop(OP_LE); 3408 } 3409 s--; 3410 Rop(OP_LT); 3411 case '>': 3412 s++; 3413 tmp = *s++; 3414 if (tmp == '>') 3415 SHop(OP_RIGHT_SHIFT); 3416 if (tmp == '=') 3417 Rop(OP_GE); 3418 s--; 3419 Rop(OP_GT); 3420 3421 case '$': 3422 CLINE; 3423 3424 if (PL_expect == XOPERATOR) { 3425 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 3426 PL_expect = XTERM; 3427 depcom(); 3428 return ','; /* grandfather non-comma-format format */ 3429 } 3430 } 3431 3432 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) { 3433 PL_tokenbuf[0] = '@'; 3434 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1, 3435 sizeof PL_tokenbuf - 1, FALSE); 3436 if (PL_expect == XOPERATOR) 3437 no_op("Array length", s); 3438 if (!PL_tokenbuf[1]) 3439 PREREF(DOLSHARP); 3440 PL_expect = XOPERATOR; 3441 PL_pending_ident = '#'; 3442 TOKEN(DOLSHARP); 3443 } 3444 3445 PL_tokenbuf[0] = '$'; 3446 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, 3447 sizeof PL_tokenbuf - 1, FALSE); 3448 if (PL_expect == XOPERATOR) 3449 no_op("Scalar", s); 3450 if (!PL_tokenbuf[1]) { 3451 if (s == PL_bufend) 3452 yyerror("Final $ should be \\$ or $name"); 3453 PREREF('$'); 3454 } 3455 3456 /* This kludge not intended to be bulletproof. */ 3457 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) { 3458 yylval.opval = newSVOP(OP_CONST, 0, 3459 newSViv(PL_compiling.cop_arybase)); 3460 yylval.opval->op_private = OPpCONST_ARYBASE; 3461 TERM(THING); 3462 } 3463 3464 d = s; 3465 tmp = (I32)*s; 3466 if (PL_lex_state == LEX_NORMAL) 3467 s = skipspace(s); 3468 3469 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { 3470 char *t; 3471 if (*s == '[') { 3472 PL_tokenbuf[0] = '@'; 3473 if (ckWARN(WARN_SYNTAX)) { 3474 for(t = s + 1; 3475 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$'; 3476 t++) ; 3477 if (*t++ == ',') { 3478 PL_bufptr = skipspace(PL_bufptr); 3479 while (t < PL_bufend && *t != ']') 3480 t++; 3481 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 3482 "Multidimensional syntax %.*s not supported", 3483 (t - PL_bufptr) + 1, PL_bufptr); 3484 } 3485 } 3486 } 3487 else if (*s == '{') { 3488 PL_tokenbuf[0] = '%'; 3489 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") && 3490 (t = strchr(s, '}')) && (t = strchr(t, '='))) 3491 { 3492 char tmpbuf[sizeof PL_tokenbuf]; 3493 STRLEN len; 3494 for (t++; isSPACE(*t); t++) ; 3495 if (isIDFIRST_lazy_if(t,UTF)) { 3496 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len); 3497 for (; isSPACE(*t); t++) ; 3498 if (*t == ';' && get_cv(tmpbuf, FALSE)) 3499 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 3500 "You need to quote \"%s\"", tmpbuf); 3501 } 3502 } 3503 } 3504 } 3505 3506 PL_expect = XOPERATOR; 3507 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) { 3508 bool islop = (PL_last_lop == PL_oldoldbufptr); 3509 if (!islop || PL_last_lop_op == OP_GREPSTART) 3510 PL_expect = XOPERATOR; 3511 else if (strchr("$@\"'`q", *s)) 3512 PL_expect = XTERM; /* e.g. print $fh "foo" */ 3513 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF)) 3514 PL_expect = XTERM; /* e.g. print $fh &sub */ 3515 else if (isIDFIRST_lazy_if(s,UTF)) { 3516 char tmpbuf[sizeof PL_tokenbuf]; 3517 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 3518 if ((tmp = keyword(tmpbuf, len))) { 3519 /* binary operators exclude handle interpretations */ 3520 switch (tmp) { 3521 case -KEY_x: 3522 case -KEY_eq: 3523 case -KEY_ne: 3524 case -KEY_gt: 3525 case -KEY_lt: 3526 case -KEY_ge: 3527 case -KEY_le: 3528 case -KEY_cmp: 3529 break; 3530 default: 3531 PL_expect = XTERM; /* e.g. print $fh length() */ 3532 break; 3533 } 3534 } 3535 else { 3536 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV); 3537 if (gv && GvCVu(gv)) 3538 PL_expect = XTERM; /* e.g. print $fh subr() */ 3539 } 3540 } 3541 else if (isDIGIT(*s)) 3542 PL_expect = XTERM; /* e.g. print $fh 3 */ 3543 else if (*s == '.' && isDIGIT(s[1])) 3544 PL_expect = XTERM; /* e.g. print $fh .3 */ 3545 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=') 3546 PL_expect = XTERM; /* e.g. print $fh -1 */ 3547 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=') 3548 PL_expect = XTERM; /* print $fh <<"EOF" */ 3549 } 3550 PL_pending_ident = '$'; 3551 TOKEN('$'); 3552 3553 case '@': 3554 if (PL_expect == XOPERATOR) 3555 no_op("Array", s); 3556 PL_tokenbuf[0] = '@'; 3557 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 3558 if (!PL_tokenbuf[1]) { 3559 if (s == PL_bufend) 3560 yyerror("Final @ should be \\@ or @name"); 3561 PREREF('@'); 3562 } 3563 if (PL_lex_state == LEX_NORMAL) 3564 s = skipspace(s); 3565 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { 3566 if (*s == '{') 3567 PL_tokenbuf[0] = '%'; 3568 3569 /* Warn about @ where they meant $. */ 3570 if (ckWARN(WARN_SYNTAX)) { 3571 if (*s == '[' || *s == '{') { 3572 char *t = s + 1; 3573 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t))) 3574 t++; 3575 if (*t == '}' || *t == ']') { 3576 t++; 3577 PL_bufptr = skipspace(PL_bufptr); 3578 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 3579 "Scalar value %.*s better written as $%.*s", 3580 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1); 3581 } 3582 } 3583 } 3584 } 3585 PL_pending_ident = '@'; 3586 TERM('@'); 3587 3588 case '/': /* may either be division or pattern */ 3589 case '?': /* may either be conditional or pattern */ 3590 if (PL_expect != XOPERATOR) { 3591 /* Disable warning on "study /blah/" */ 3592 if (PL_oldoldbufptr == PL_last_uni 3593 && (*PL_last_uni != 's' || s - PL_last_uni < 5 3594 || memNE(PL_last_uni, "study", 5) 3595 || isALNUM_lazy_if(PL_last_uni+5,UTF))) 3596 check_uni(); 3597 s = scan_pat(s,OP_MATCH); 3598 TERM(sublex_start()); 3599 } 3600 tmp = *s++; 3601 if (tmp == '/') 3602 Mop(OP_DIVIDE); 3603 OPERATOR(tmp); 3604 3605 case '.': 3606 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack 3607 #ifdef PERL_STRICT_CR 3608 && s[1] == '\n' 3609 #else 3610 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n')) 3611 #endif 3612 && (s == PL_linestart || s[-1] == '\n') ) 3613 { 3614 PL_lex_formbrack = 0; 3615 PL_expect = XSTATE; 3616 goto rightbracket; 3617 } 3618 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { 3619 tmp = *s++; 3620 if (*s == tmp) { 3621 s++; 3622 if (*s == tmp) { 3623 s++; 3624 yylval.ival = OPf_SPECIAL; 3625 } 3626 else 3627 yylval.ival = 0; 3628 OPERATOR(DOTDOT); 3629 } 3630 if (PL_expect != XOPERATOR) 3631 check_uni(); 3632 Aop(OP_CONCAT); 3633 } 3634 /* FALL THROUGH */ 3635 case '0': case '1': case '2': case '3': case '4': 3636 case '5': case '6': case '7': case '8': case '9': 3637 s = scan_num(s, &yylval); 3638 DEBUG_T( { PerlIO_printf(Perl_debug_log, 3639 "### Saw number in '%s'\n", s); 3640 } ); 3641 if (PL_expect == XOPERATOR) 3642 no_op("Number",s); 3643 TERM(THING); 3644 3645 case '\'': 3646 s = scan_str(s,FALSE,FALSE); 3647 DEBUG_T( { PerlIO_printf(Perl_debug_log, 3648 "### Saw string before '%s'\n", s); 3649 } ); 3650 if (PL_expect == XOPERATOR) { 3651 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 3652 PL_expect = XTERM; 3653 depcom(); 3654 return ','; /* grandfather non-comma-format format */ 3655 } 3656 else 3657 no_op("String",s); 3658 } 3659 if (!s) 3660 missingterm((char*)0); 3661 yylval.ival = OP_CONST; 3662 TERM(sublex_start()); 3663 3664 case '"': 3665 s = scan_str(s,FALSE,FALSE); 3666 DEBUG_T( { PerlIO_printf(Perl_debug_log, 3667 "### Saw string before '%s'\n", s); 3668 } ); 3669 if (PL_expect == XOPERATOR) { 3670 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 3671 PL_expect = XTERM; 3672 depcom(); 3673 return ','; /* grandfather non-comma-format format */ 3674 } 3675 else 3676 no_op("String",s); 3677 } 3678 if (!s) 3679 missingterm((char*)0); 3680 yylval.ival = OP_CONST; 3681 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) { 3682 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) { 3683 yylval.ival = OP_STRINGIFY; 3684 break; 3685 } 3686 } 3687 TERM(sublex_start()); 3688 3689 case '`': 3690 s = scan_str(s,FALSE,FALSE); 3691 DEBUG_T( { PerlIO_printf(Perl_debug_log, 3692 "### Saw backtick string before '%s'\n", s); 3693 } ); 3694 if (PL_expect == XOPERATOR) 3695 no_op("Backticks",s); 3696 if (!s) 3697 missingterm((char*)0); 3698 yylval.ival = OP_BACKTICK; 3699 set_csh(); 3700 TERM(sublex_start()); 3701 3702 case '\\': 3703 s++; 3704 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s)) 3705 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", 3706 *s, *s); 3707 if (PL_expect == XOPERATOR) 3708 no_op("Backslash",s); 3709 OPERATOR(REFGEN); 3710 3711 case 'v': 3712 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) { 3713 char *start = s; 3714 start++; 3715 start++; 3716 while (isDIGIT(*start) || *start == '_') 3717 start++; 3718 if (*start == '.' && isDIGIT(start[1])) { 3719 s = scan_num(s, &yylval); 3720 TERM(THING); 3721 } 3722 /* avoid v123abc() or $h{v1}, allow C<print v10;> */ 3723 else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF || PL_expect == XSTATE)) { 3724 char c = *start; 3725 GV *gv; 3726 *start = '\0'; 3727 gv = gv_fetchpv(s, FALSE, SVt_PVCV); 3728 *start = c; 3729 if (!gv) { 3730 s = scan_num(s, &yylval); 3731 TERM(THING); 3732 } 3733 } 3734 } 3735 goto keylookup; 3736 case 'x': 3737 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { 3738 s++; 3739 Mop(OP_REPEAT); 3740 } 3741 goto keylookup; 3742 3743 case '_': 3744 case 'a': case 'A': 3745 case 'b': case 'B': 3746 case 'c': case 'C': 3747 case 'd': case 'D': 3748 case 'e': case 'E': 3749 case 'f': case 'F': 3750 case 'g': case 'G': 3751 case 'h': case 'H': 3752 case 'i': case 'I': 3753 case 'j': case 'J': 3754 case 'k': case 'K': 3755 case 'l': case 'L': 3756 case 'm': case 'M': 3757 case 'n': case 'N': 3758 case 'o': case 'O': 3759 case 'p': case 'P': 3760 case 'q': case 'Q': 3761 case 'r': case 'R': 3762 case 's': case 'S': 3763 case 't': case 'T': 3764 case 'u': case 'U': 3765 case 'V': 3766 case 'w': case 'W': 3767 case 'X': 3768 case 'y': case 'Y': 3769 case 'z': case 'Z': 3770 3771 keylookup: { 3772 gv = Nullgv; 3773 gvp = 0; 3774 3775 PL_bufptr = s; 3776 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 3777 3778 /* Some keywords can be followed by any delimiter, including ':' */ 3779 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) || 3780 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') || 3781 (PL_tokenbuf[0] == 'q' && 3782 strchr("qwxr", PL_tokenbuf[1]))))); 3783 3784 /* x::* is just a word, unless x is "CORE" */ 3785 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE")) 3786 goto just_a_word; 3787 3788 d = s; 3789 while (d < PL_bufend && isSPACE(*d)) 3790 d++; /* no comments skipped here, or s### is misparsed */ 3791 3792 /* Is this a label? */ 3793 if (!tmp && PL_expect == XSTATE 3794 && d < PL_bufend && *d == ':' && *(d + 1) != ':') { 3795 s = d + 1; 3796 yylval.pval = savepv(PL_tokenbuf); 3797 CLINE; 3798 TOKEN(LABEL); 3799 } 3800 3801 /* Check for keywords */ 3802 tmp = keyword(PL_tokenbuf, len); 3803 3804 /* Is this a word before a => operator? */ 3805 if (*d == '=' && d[1] == '>') { 3806 CLINE; 3807 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0)); 3808 yylval.opval->op_private = OPpCONST_BARE; 3809 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) 3810 SvUTF8_on(((SVOP*)yylval.opval)->op_sv); 3811 TERM(WORD); 3812 } 3813 3814 if (tmp < 0) { /* second-class keyword? */ 3815 GV *ogv = Nullgv; /* override (winner) */ 3816 GV *hgv = Nullgv; /* hidden (loser) */ 3817 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) { 3818 CV *cv; 3819 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) && 3820 (cv = GvCVu(gv))) 3821 { 3822 if (GvIMPORTED_CV(gv)) 3823 ogv = gv; 3824 else if (! CvMETHOD(cv)) 3825 hgv = gv; 3826 } 3827 if (!ogv && 3828 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) && 3829 (gv = *gvp) != (GV*)&PL_sv_undef && 3830 GvCVu(gv) && GvIMPORTED_CV(gv)) 3831 { 3832 ogv = gv; 3833 } 3834 } 3835 if (ogv) { 3836 tmp = 0; /* overridden by import or by GLOBAL */ 3837 } 3838 else if (gv && !gvp 3839 && -tmp==KEY_lock /* XXX generalizable kludge */ 3840 && GvCVu(gv) 3841 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE)) 3842 { 3843 tmp = 0; /* any sub overrides "weak" keyword */ 3844 } 3845 else { /* no override */ 3846 tmp = -tmp; 3847 if (tmp == KEY_dump && ckWARN(WARN_MISC)) { 3848 Perl_warner(aTHX_ packWARN(WARN_MISC), 3849 "dump() better written as CORE::dump()"); 3850 } 3851 gv = Nullgv; 3852 gvp = 0; 3853 if (ckWARN(WARN_AMBIGUOUS) && hgv 3854 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */ 3855 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 3856 "Ambiguous call resolved as CORE::%s(), %s", 3857 GvENAME(hgv), "qualify as such or use &"); 3858 } 3859 } 3860 3861 reserved_word: 3862 switch (tmp) { 3863 3864 default: /* not a keyword */ 3865 just_a_word: { 3866 SV *sv; 3867 int pkgname = 0; 3868 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); 3869 3870 /* Get the rest if it looks like a package qualifier */ 3871 3872 if (*s == '\'' || (*s == ':' && s[1] == ':')) { 3873 STRLEN morelen; 3874 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, 3875 TRUE, &morelen); 3876 if (!morelen) 3877 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf, 3878 *s == '\'' ? "'" : "::"); 3879 len += morelen; 3880 pkgname = 1; 3881 } 3882 3883 if (PL_expect == XOPERATOR) { 3884 if (PL_bufptr == PL_linestart) { 3885 CopLINE_dec(PL_curcop); 3886 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi); 3887 CopLINE_inc(PL_curcop); 3888 } 3889 else 3890 no_op("Bareword",s); 3891 } 3892 3893 /* Look for a subroutine with this name in current package, 3894 unless name is "Foo::", in which case Foo is a bearword 3895 (and a package name). */ 3896 3897 if (len > 2 && 3898 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') 3899 { 3900 if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV)) 3901 Perl_warner(aTHX_ packWARN(WARN_BAREWORD), 3902 "Bareword \"%s\" refers to nonexistent package", 3903 PL_tokenbuf); 3904 len -= 2; 3905 PL_tokenbuf[len] = '\0'; 3906 gv = Nullgv; 3907 gvp = 0; 3908 } 3909 else { 3910 len = 0; 3911 if (!gv) 3912 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV); 3913 } 3914 3915 /* if we saw a global override before, get the right name */ 3916 3917 if (gvp) { 3918 sv = newSVpvn("CORE::GLOBAL::",14); 3919 sv_catpv(sv,PL_tokenbuf); 3920 } 3921 else 3922 sv = newSVpv(PL_tokenbuf,0); 3923 3924 /* Presume this is going to be a bareword of some sort. */ 3925 3926 CLINE; 3927 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 3928 yylval.opval->op_private = OPpCONST_BARE; 3929 /* UTF-8 package name? */ 3930 if (UTF && !IN_BYTES && 3931 is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) 3932 SvUTF8_on(sv); 3933 3934 /* And if "Foo::", then that's what it certainly is. */ 3935 3936 if (len) 3937 goto safe_bareword; 3938 3939 /* See if it's the indirect object for a list operator. */ 3940 3941 if (PL_oldoldbufptr && 3942 PL_oldoldbufptr < PL_bufptr && 3943 (PL_oldoldbufptr == PL_last_lop 3944 || PL_oldoldbufptr == PL_last_uni) && 3945 /* NO SKIPSPACE BEFORE HERE! */ 3946 (PL_expect == XREF || 3947 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF)) 3948 { 3949 bool immediate_paren = *s == '('; 3950 3951 /* (Now we can afford to cross potential line boundary.) */ 3952 s = skipspace(s); 3953 3954 /* Two barewords in a row may indicate method call. */ 3955 3956 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv))) 3957 return tmp; 3958 3959 /* If not a declared subroutine, it's an indirect object. */ 3960 /* (But it's an indir obj regardless for sort.) */ 3961 3962 if ( !immediate_paren && (PL_last_lop_op == OP_SORT || 3963 ((!gv || !GvCVu(gv)) && 3964 (PL_last_lop_op != OP_MAPSTART && 3965 PL_last_lop_op != OP_GREPSTART)))) 3966 { 3967 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR; 3968 goto bareword; 3969 } 3970 } 3971 3972 PL_expect = XOPERATOR; 3973 s = skipspace(s); 3974 3975 /* Is this a word before a => operator? */ 3976 if (*s == '=' && s[1] == '>' && !pkgname) { 3977 CLINE; 3978 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf); 3979 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) 3980 SvUTF8_on(((SVOP*)yylval.opval)->op_sv); 3981 TERM(WORD); 3982 } 3983 3984 /* If followed by a paren, it's certainly a subroutine. */ 3985 if (*s == '(') { 3986 CLINE; 3987 if (gv && GvCVu(gv)) { 3988 for (d = s + 1; SPACE_OR_TAB(*d); d++) ; 3989 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) { 3990 s = d + 1; 3991 goto its_constant; 3992 } 3993 } 3994 PL_nextval[PL_nexttoke].opval = yylval.opval; 3995 PL_expect = XOPERATOR; 3996 force_next(WORD); 3997 yylval.ival = 0; 3998 TOKEN('&'); 3999 } 4000 4001 /* If followed by var or block, call it a method (unless sub) */ 4002 4003 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) { 4004 PL_last_lop = PL_oldbufptr; 4005 PL_last_lop_op = OP_METHOD; 4006 PREBLOCK(METHOD); 4007 } 4008 4009 /* If followed by a bareword, see if it looks like indir obj. */ 4010 4011 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp = intuit_method(s,gv))) 4012 return tmp; 4013 4014 /* Not a method, so call it a subroutine (if defined) */ 4015 4016 if (gv && GvCVu(gv)) { 4017 CV* cv; 4018 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS)) 4019 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 4020 "Ambiguous use of -%s resolved as -&%s()", 4021 PL_tokenbuf, PL_tokenbuf); 4022 /* Check for a constant sub */ 4023 cv = GvCV(gv); 4024 if ((sv = cv_const_sv(cv))) { 4025 its_constant: 4026 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv); 4027 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv); 4028 yylval.opval->op_private = 0; 4029 TOKEN(WORD); 4030 } 4031 4032 /* Resolve to GV now. */ 4033 op_free(yylval.opval); 4034 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv)); 4035 yylval.opval->op_private |= OPpENTERSUB_NOPAREN; 4036 PL_last_lop = PL_oldbufptr; 4037 PL_last_lop_op = OP_ENTERSUB; 4038 /* Is there a prototype? */ 4039 if (SvPOK(cv)) { 4040 STRLEN len; 4041 char *proto = SvPV((SV*)cv, len); 4042 if (!len) 4043 TERM(FUNC0SUB); 4044 if (strEQ(proto, "$")) 4045 OPERATOR(UNIOPSUB); 4046 if (*proto == '&' && *s == '{') { 4047 sv_setpv(PL_subname, PL_curstash ? 4048 "__ANON__" : "__ANON__::__ANON__"); 4049 PREBLOCK(LSTOPSUB); 4050 } 4051 } 4052 PL_nextval[PL_nexttoke].opval = yylval.opval; 4053 PL_expect = XTERM; 4054 force_next(WORD); 4055 TOKEN(NOAMP); 4056 } 4057 4058 /* Call it a bare word */ 4059 4060 if (PL_hints & HINT_STRICT_SUBS) 4061 yylval.opval->op_private |= OPpCONST_STRICT; 4062 else { 4063 bareword: 4064 if (ckWARN(WARN_RESERVED)) { 4065 if (lastchar != '-') { 4066 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ; 4067 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE)) 4068 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved, 4069 PL_tokenbuf); 4070 } 4071 } 4072 } 4073 4074 safe_bareword: 4075 if (lastchar && strchr("*%&", lastchar) && ckWARN_d(WARN_AMBIGUOUS)) { 4076 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 4077 "Operator or semicolon missing before %c%s", 4078 lastchar, PL_tokenbuf); 4079 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 4080 "Ambiguous use of %c resolved as operator %c", 4081 lastchar, lastchar); 4082 } 4083 TOKEN(WORD); 4084 } 4085 4086 case KEY___FILE__: 4087 yylval.opval = (OP*)newSVOP(OP_CONST, 0, 4088 newSVpv(CopFILE(PL_curcop),0)); 4089 TERM(THING); 4090 4091 case KEY___LINE__: 4092 yylval.opval = (OP*)newSVOP(OP_CONST, 0, 4093 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop))); 4094 TERM(THING); 4095 4096 case KEY___PACKAGE__: 4097 yylval.opval = (OP*)newSVOP(OP_CONST, 0, 4098 (PL_curstash 4099 ? newSVsv(PL_curstname) 4100 : &PL_sv_undef)); 4101 TERM(THING); 4102 4103 case KEY___DATA__: 4104 case KEY___END__: { 4105 GV *gv; 4106 4107 /*SUPPRESS 560*/ 4108 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) { 4109 char *pname = "main"; 4110 if (PL_tokenbuf[2] == 'D') 4111 pname = HvNAME(PL_curstash ? PL_curstash : PL_defstash); 4112 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO); 4113 GvMULTI_on(gv); 4114 if (!GvIO(gv)) 4115 GvIOp(gv) = newIO(); 4116 IoIFP(GvIOp(gv)) = PL_rsfp; 4117 #if defined(HAS_FCNTL) && defined(F_SETFD) 4118 { 4119 int fd = PerlIO_fileno(PL_rsfp); 4120 fcntl(fd,F_SETFD,fd >= 3); 4121 } 4122 #endif 4123 /* Mark this internal pseudo-handle as clean */ 4124 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT; 4125 if (PL_preprocess) 4126 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE; 4127 else if ((PerlIO*)PL_rsfp == PerlIO_stdin()) 4128 IoTYPE(GvIOp(gv)) = IoTYPE_STD; 4129 else 4130 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY; 4131 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS) 4132 /* if the script was opened in binmode, we need to revert 4133 * it to text mode for compatibility; but only iff it has CRs 4134 * XXX this is a questionable hack at best. */ 4135 if (PL_bufend-PL_bufptr > 2 4136 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r') 4137 { 4138 Off_t loc = 0; 4139 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) { 4140 loc = PerlIO_tell(PL_rsfp); 4141 (void)PerlIO_seek(PL_rsfp, 0L, 0); 4142 } 4143 #ifdef NETWARE 4144 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) { 4145 #else 4146 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) { 4147 #endif /* NETWARE */ 4148 #ifdef PERLIO_IS_STDIO /* really? */ 4149 # if defined(__BORLANDC__) 4150 /* XXX see note in do_binmode() */ 4151 ((FILE*)PL_rsfp)->flags &= ~_F_BIN; 4152 # endif 4153 #endif 4154 if (loc > 0) 4155 PerlIO_seek(PL_rsfp, loc, 0); 4156 } 4157 } 4158 #endif 4159 #ifdef PERLIO_LAYERS 4160 if (UTF && !IN_BYTES) 4161 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); 4162 #endif 4163 PL_rsfp = Nullfp; 4164 } 4165 goto fake_eof; 4166 } 4167 4168 case KEY_AUTOLOAD: 4169 case KEY_DESTROY: 4170 case KEY_BEGIN: 4171 case KEY_CHECK: 4172 case KEY_INIT: 4173 case KEY_END: 4174 if (PL_expect == XSTATE) { 4175 s = PL_bufptr; 4176 goto really_sub; 4177 } 4178 goto just_a_word; 4179 4180 case KEY_CORE: 4181 if (*s == ':' && s[1] == ':') { 4182 s += 2; 4183 d = s; 4184 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 4185 if (!(tmp = keyword(PL_tokenbuf, len))) 4186 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf); 4187 if (tmp < 0) 4188 tmp = -tmp; 4189 goto reserved_word; 4190 } 4191 goto just_a_word; 4192 4193 case KEY_abs: 4194 UNI(OP_ABS); 4195 4196 case KEY_alarm: 4197 UNI(OP_ALARM); 4198 4199 case KEY_accept: 4200 LOP(OP_ACCEPT,XTERM); 4201 4202 case KEY_and: 4203 OPERATOR(ANDOP); 4204 4205 case KEY_atan2: 4206 LOP(OP_ATAN2,XTERM); 4207 4208 case KEY_bind: 4209 LOP(OP_BIND,XTERM); 4210 4211 case KEY_binmode: 4212 LOP(OP_BINMODE,XTERM); 4213 4214 case KEY_bless: 4215 LOP(OP_BLESS,XTERM); 4216 4217 case KEY_chop: 4218 UNI(OP_CHOP); 4219 4220 case KEY_continue: 4221 PREBLOCK(CONTINUE); 4222 4223 case KEY_chdir: 4224 (void)gv_fetchpv("ENV",TRUE, SVt_PVHV); /* may use HOME */ 4225 UNI(OP_CHDIR); 4226 4227 case KEY_close: 4228 UNI(OP_CLOSE); 4229 4230 case KEY_closedir: 4231 UNI(OP_CLOSEDIR); 4232 4233 case KEY_cmp: 4234 Eop(OP_SCMP); 4235 4236 case KEY_caller: 4237 UNI(OP_CALLER); 4238 4239 case KEY_crypt: 4240 #ifdef FCRYPT 4241 if (!PL_cryptseen) { 4242 PL_cryptseen = TRUE; 4243 init_des(); 4244 } 4245 #endif 4246 LOP(OP_CRYPT,XTERM); 4247 4248 case KEY_chmod: 4249 LOP(OP_CHMOD,XTERM); 4250 4251 case KEY_chown: 4252 LOP(OP_CHOWN,XTERM); 4253 4254 case KEY_connect: 4255 LOP(OP_CONNECT,XTERM); 4256 4257 case KEY_chr: 4258 UNI(OP_CHR); 4259 4260 case KEY_cos: 4261 UNI(OP_COS); 4262 4263 case KEY_chroot: 4264 UNI(OP_CHROOT); 4265 4266 case KEY_do: 4267 s = skipspace(s); 4268 if (*s == '{') 4269 PRETERMBLOCK(DO); 4270 if (*s != '\'') 4271 s = force_word(s,WORD,TRUE,TRUE,FALSE); 4272 OPERATOR(DO); 4273 4274 case KEY_die: 4275 PL_hints |= HINT_BLOCK_SCOPE; 4276 LOP(OP_DIE,XTERM); 4277 4278 case KEY_defined: 4279 UNI(OP_DEFINED); 4280 4281 case KEY_delete: 4282 UNI(OP_DELETE); 4283 4284 case KEY_dbmopen: 4285 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV); 4286 LOP(OP_DBMOPEN,XTERM); 4287 4288 case KEY_dbmclose: 4289 UNI(OP_DBMCLOSE); 4290 4291 case KEY_dump: 4292 s = force_word(s,WORD,TRUE,FALSE,FALSE); 4293 LOOPX(OP_DUMP); 4294 4295 case KEY_else: 4296 PREBLOCK(ELSE); 4297 4298 case KEY_elsif: 4299 yylval.ival = CopLINE(PL_curcop); 4300 OPERATOR(ELSIF); 4301 4302 case KEY_eq: 4303 Eop(OP_SEQ); 4304 4305 case KEY_exists: 4306 UNI(OP_EXISTS); 4307 4308 case KEY_exit: 4309 UNI(OP_EXIT); 4310 4311 case KEY_eval: 4312 s = skipspace(s); 4313 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM; 4314 UNIBRACK(OP_ENTEREVAL); 4315 4316 case KEY_eof: 4317 UNI(OP_EOF); 4318 4319 case KEY_exp: 4320 UNI(OP_EXP); 4321 4322 case KEY_each: 4323 UNI(OP_EACH); 4324 4325 case KEY_exec: 4326 set_csh(); 4327 LOP(OP_EXEC,XREF); 4328 4329 case KEY_endhostent: 4330 FUN0(OP_EHOSTENT); 4331 4332 case KEY_endnetent: 4333 FUN0(OP_ENETENT); 4334 4335 case KEY_endservent: 4336 FUN0(OP_ESERVENT); 4337 4338 case KEY_endprotoent: 4339 FUN0(OP_EPROTOENT); 4340 4341 case KEY_endpwent: 4342 FUN0(OP_EPWENT); 4343 4344 case KEY_endgrent: 4345 FUN0(OP_EGRENT); 4346 4347 case KEY_for: 4348 case KEY_foreach: 4349 yylval.ival = CopLINE(PL_curcop); 4350 s = skipspace(s); 4351 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) { 4352 char *p = s; 4353 if ((PL_bufend - p) >= 3 && 4354 strnEQ(p, "my", 2) && isSPACE(*(p + 2))) 4355 p += 2; 4356 else if ((PL_bufend - p) >= 4 && 4357 strnEQ(p, "our", 3) && isSPACE(*(p + 3))) 4358 p += 3; 4359 p = skipspace(p); 4360 if (isIDFIRST_lazy_if(p,UTF)) { 4361 p = scan_ident(p, PL_bufend, 4362 PL_tokenbuf, sizeof PL_tokenbuf, TRUE); 4363 p = skipspace(p); 4364 } 4365 if (*p != '$') 4366 Perl_croak(aTHX_ "Missing $ on loop variable"); 4367 } 4368 OPERATOR(FOR); 4369 4370 case KEY_formline: 4371 LOP(OP_FORMLINE,XTERM); 4372 4373 case KEY_fork: 4374 FUN0(OP_FORK); 4375 4376 case KEY_fcntl: 4377 LOP(OP_FCNTL,XTERM); 4378 4379 case KEY_fileno: 4380 UNI(OP_FILENO); 4381 4382 case KEY_flock: 4383 LOP(OP_FLOCK,XTERM); 4384 4385 case KEY_gt: 4386 Rop(OP_SGT); 4387 4388 case KEY_ge: 4389 Rop(OP_SGE); 4390 4391 case KEY_grep: 4392 LOP(OP_GREPSTART, XREF); 4393 4394 case KEY_goto: 4395 s = force_word(s,WORD,TRUE,FALSE,FALSE); 4396 LOOPX(OP_GOTO); 4397 4398 case KEY_gmtime: 4399 UNI(OP_GMTIME); 4400 4401 case KEY_getc: 4402 UNI(OP_GETC); 4403 4404 case KEY_getppid: 4405 FUN0(OP_GETPPID); 4406 4407 case KEY_getpgrp: 4408 UNI(OP_GETPGRP); 4409 4410 case KEY_getpriority: 4411 LOP(OP_GETPRIORITY,XTERM); 4412 4413 case KEY_getprotobyname: 4414 UNI(OP_GPBYNAME); 4415 4416 case KEY_getprotobynumber: 4417 LOP(OP_GPBYNUMBER,XTERM); 4418 4419 case KEY_getprotoent: 4420 FUN0(OP_GPROTOENT); 4421 4422 case KEY_getpwent: 4423 FUN0(OP_GPWENT); 4424 4425 case KEY_getpwnam: 4426 UNI(OP_GPWNAM); 4427 4428 case KEY_getpwuid: 4429 UNI(OP_GPWUID); 4430 4431 case KEY_getpeername: 4432 UNI(OP_GETPEERNAME); 4433 4434 case KEY_gethostbyname: 4435 UNI(OP_GHBYNAME); 4436 4437 case KEY_gethostbyaddr: 4438 LOP(OP_GHBYADDR,XTERM); 4439 4440 case KEY_gethostent: 4441 FUN0(OP_GHOSTENT); 4442 4443 case KEY_getnetbyname: 4444 UNI(OP_GNBYNAME); 4445 4446 case KEY_getnetbyaddr: 4447 LOP(OP_GNBYADDR,XTERM); 4448 4449 case KEY_getnetent: 4450 FUN0(OP_GNETENT); 4451 4452 case KEY_getservbyname: 4453 LOP(OP_GSBYNAME,XTERM); 4454 4455 case KEY_getservbyport: 4456 LOP(OP_GSBYPORT,XTERM); 4457 4458 case KEY_getservent: 4459 FUN0(OP_GSERVENT); 4460 4461 case KEY_getsockname: 4462 UNI(OP_GETSOCKNAME); 4463 4464 case KEY_getsockopt: 4465 LOP(OP_GSOCKOPT,XTERM); 4466 4467 case KEY_getgrent: 4468 FUN0(OP_GGRENT); 4469 4470 case KEY_getgrnam: 4471 UNI(OP_GGRNAM); 4472 4473 case KEY_getgrgid: 4474 UNI(OP_GGRGID); 4475 4476 case KEY_getlogin: 4477 FUN0(OP_GETLOGIN); 4478 4479 case KEY_glob: 4480 set_csh(); 4481 LOP(OP_GLOB,XTERM); 4482 4483 case KEY_hex: 4484 UNI(OP_HEX); 4485 4486 case KEY_if: 4487 yylval.ival = CopLINE(PL_curcop); 4488 OPERATOR(IF); 4489 4490 case KEY_index: 4491 LOP(OP_INDEX,XTERM); 4492 4493 case KEY_int: 4494 UNI(OP_INT); 4495 4496 case KEY_ioctl: 4497 LOP(OP_IOCTL,XTERM); 4498 4499 case KEY_join: 4500 LOP(OP_JOIN,XTERM); 4501 4502 case KEY_keys: 4503 UNI(OP_KEYS); 4504 4505 case KEY_kill: 4506 LOP(OP_KILL,XTERM); 4507 4508 case KEY_last: 4509 s = force_word(s,WORD,TRUE,FALSE,FALSE); 4510 LOOPX(OP_LAST); 4511 4512 case KEY_lc: 4513 UNI(OP_LC); 4514 4515 case KEY_lcfirst: 4516 UNI(OP_LCFIRST); 4517 4518 case KEY_local: 4519 yylval.ival = 0; 4520 OPERATOR(LOCAL); 4521 4522 case KEY_length: 4523 UNI(OP_LENGTH); 4524 4525 case KEY_lt: 4526 Rop(OP_SLT); 4527 4528 case KEY_le: 4529 Rop(OP_SLE); 4530 4531 case KEY_localtime: 4532 UNI(OP_LOCALTIME); 4533 4534 case KEY_log: 4535 UNI(OP_LOG); 4536 4537 case KEY_link: 4538 LOP(OP_LINK,XTERM); 4539 4540 case KEY_listen: 4541 LOP(OP_LISTEN,XTERM); 4542 4543 case KEY_lock: 4544 UNI(OP_LOCK); 4545 4546 case KEY_lstat: 4547 UNI(OP_LSTAT); 4548 4549 case KEY_m: 4550 s = scan_pat(s,OP_MATCH); 4551 TERM(sublex_start()); 4552 4553 case KEY_map: 4554 LOP(OP_MAPSTART, XREF); 4555 4556 case KEY_mkdir: 4557 LOP(OP_MKDIR,XTERM); 4558 4559 case KEY_msgctl: 4560 LOP(OP_MSGCTL,XTERM); 4561 4562 case KEY_msgget: 4563 LOP(OP_MSGGET,XTERM); 4564 4565 case KEY_msgrcv: 4566 LOP(OP_MSGRCV,XTERM); 4567 4568 case KEY_msgsnd: 4569 LOP(OP_MSGSND,XTERM); 4570 4571 case KEY_our: 4572 case KEY_my: 4573 PL_in_my = tmp; 4574 s = skipspace(s); 4575 if (isIDFIRST_lazy_if(s,UTF)) { 4576 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); 4577 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3)) 4578 goto really_sub; 4579 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len); 4580 if (!PL_in_my_stash) { 4581 char tmpbuf[1024]; 4582 PL_bufptr = s; 4583 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf); 4584 yyerror(tmpbuf); 4585 } 4586 } 4587 yylval.ival = 1; 4588 OPERATOR(MY); 4589 4590 case KEY_next: 4591 s = force_word(s,WORD,TRUE,FALSE,FALSE); 4592 LOOPX(OP_NEXT); 4593 4594 case KEY_ne: 4595 Eop(OP_SNE); 4596 4597 case KEY_no: 4598 if (PL_expect != XSTATE) 4599 yyerror("\"no\" not allowed in expression"); 4600 s = force_word(s,WORD,FALSE,TRUE,FALSE); 4601 s = force_version(s, FALSE); 4602 yylval.ival = 0; 4603 OPERATOR(USE); 4604 4605 case KEY_not: 4606 if (*s == '(' || (s = skipspace(s), *s == '(')) 4607 FUN1(OP_NOT); 4608 else 4609 OPERATOR(NOTOP); 4610 4611 case KEY_open: 4612 s = skipspace(s); 4613 if (isIDFIRST_lazy_if(s,UTF)) { 4614 char *t; 4615 for (d = s; isALNUM_lazy_if(d,UTF); d++) ; 4616 t = skipspace(d); 4617 if (strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)) 4618 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), 4619 "Precedence problem: open %.*s should be open(%.*s)", 4620 d-s,s, d-s,s); 4621 } 4622 LOP(OP_OPEN,XTERM); 4623 4624 case KEY_or: 4625 yylval.ival = OP_OR; 4626 OPERATOR(OROP); 4627 4628 case KEY_ord: 4629 UNI(OP_ORD); 4630 4631 case KEY_oct: 4632 UNI(OP_OCT); 4633 4634 case KEY_opendir: 4635 LOP(OP_OPEN_DIR,XTERM); 4636 4637 case KEY_print: 4638 checkcomma(s,PL_tokenbuf,"filehandle"); 4639 LOP(OP_PRINT,XREF); 4640 4641 case KEY_printf: 4642 checkcomma(s,PL_tokenbuf,"filehandle"); 4643 LOP(OP_PRTF,XREF); 4644 4645 case KEY_prototype: 4646 UNI(OP_PROTOTYPE); 4647 4648 case KEY_push: 4649 LOP(OP_PUSH,XTERM); 4650 4651 case KEY_pop: 4652 UNI(OP_POP); 4653 4654 case KEY_pos: 4655 UNI(OP_POS); 4656 4657 case KEY_pack: 4658 LOP(OP_PACK,XTERM); 4659 4660 case KEY_package: 4661 s = force_word(s,WORD,FALSE,TRUE,FALSE); 4662 OPERATOR(PACKAGE); 4663 4664 case KEY_pipe: 4665 LOP(OP_PIPE_OP,XTERM); 4666 4667 case KEY_q: 4668 s = scan_str(s,FALSE,FALSE); 4669 if (!s) 4670 missingterm((char*)0); 4671 yylval.ival = OP_CONST; 4672 TERM(sublex_start()); 4673 4674 case KEY_quotemeta: 4675 UNI(OP_QUOTEMETA); 4676 4677 case KEY_qw: 4678 s = scan_str(s,FALSE,FALSE); 4679 if (!s) 4680 missingterm((char*)0); 4681 force_next(')'); 4682 if (SvCUR(PL_lex_stuff)) { 4683 OP *words = Nullop; 4684 int warned = 0; 4685 d = SvPV_force(PL_lex_stuff, len); 4686 while (len) { 4687 SV *sv; 4688 for (; isSPACE(*d) && len; --len, ++d) ; 4689 if (len) { 4690 char *b = d; 4691 if (!warned && ckWARN(WARN_QW)) { 4692 for (; !isSPACE(*d) && len; --len, ++d) { 4693 if (*d == ',') { 4694 Perl_warner(aTHX_ packWARN(WARN_QW), 4695 "Possible attempt to separate words with commas"); 4696 ++warned; 4697 } 4698 else if (*d == '#') { 4699 Perl_warner(aTHX_ packWARN(WARN_QW), 4700 "Possible attempt to put comments in qw() list"); 4701 ++warned; 4702 } 4703 } 4704 } 4705 else { 4706 for (; !isSPACE(*d) && len; --len, ++d) ; 4707 } 4708 sv = newSVpvn(b, d-b); 4709 if (DO_UTF8(PL_lex_stuff)) 4710 SvUTF8_on(sv); 4711 words = append_elem(OP_LIST, words, 4712 newSVOP(OP_CONST, 0, tokeq(sv))); 4713 } 4714 } 4715 if (words) { 4716 PL_nextval[PL_nexttoke].opval = words; 4717 force_next(THING); 4718 } 4719 } 4720 if (PL_lex_stuff) { 4721 SvREFCNT_dec(PL_lex_stuff); 4722 PL_lex_stuff = Nullsv; 4723 } 4724 PL_expect = XTERM; 4725 TOKEN('('); 4726 4727 case KEY_qq: 4728 s = scan_str(s,FALSE,FALSE); 4729 if (!s) 4730 missingterm((char*)0); 4731 yylval.ival = OP_STRINGIFY; 4732 if (SvIVX(PL_lex_stuff) == '\'') 4733 SvIVX(PL_lex_stuff) = 0; /* qq'$foo' should intepolate */ 4734 TERM(sublex_start()); 4735 4736 case KEY_qr: 4737 s = scan_pat(s,OP_QR); 4738 TERM(sublex_start()); 4739 4740 case KEY_qx: 4741 s = scan_str(s,FALSE,FALSE); 4742 if (!s) 4743 missingterm((char*)0); 4744 yylval.ival = OP_BACKTICK; 4745 set_csh(); 4746 TERM(sublex_start()); 4747 4748 case KEY_return: 4749 OLDLOP(OP_RETURN); 4750 4751 case KEY_require: 4752 s = skipspace(s); 4753 if (isDIGIT(*s)) { 4754 s = force_version(s, FALSE); 4755 } 4756 else if (*s != 'v' || !isDIGIT(s[1]) 4757 || (s = force_version(s, TRUE), *s == 'v')) 4758 { 4759 *PL_tokenbuf = '\0'; 4760 s = force_word(s,WORD,TRUE,TRUE,FALSE); 4761 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF)) 4762 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE); 4763 else if (*s == '<') 4764 yyerror("<> should be quotes"); 4765 } 4766 UNI(OP_REQUIRE); 4767 4768 case KEY_reset: 4769 UNI(OP_RESET); 4770 4771 case KEY_redo: 4772 s = force_word(s,WORD,TRUE,FALSE,FALSE); 4773 LOOPX(OP_REDO); 4774 4775 case KEY_rename: 4776 LOP(OP_RENAME,XTERM); 4777 4778 case KEY_rand: 4779 UNI(OP_RAND); 4780 4781 case KEY_rmdir: 4782 UNI(OP_RMDIR); 4783 4784 case KEY_rindex: 4785 LOP(OP_RINDEX,XTERM); 4786 4787 case KEY_read: 4788 LOP(OP_READ,XTERM); 4789 4790 case KEY_readdir: 4791 UNI(OP_READDIR); 4792 4793 case KEY_readline: 4794 set_csh(); 4795 UNI(OP_READLINE); 4796 4797 case KEY_readpipe: 4798 set_csh(); 4799 UNI(OP_BACKTICK); 4800 4801 case KEY_rewinddir: 4802 UNI(OP_REWINDDIR); 4803 4804 case KEY_recv: 4805 LOP(OP_RECV,XTERM); 4806 4807 case KEY_reverse: 4808 LOP(OP_REVERSE,XTERM); 4809 4810 case KEY_readlink: 4811 UNI(OP_READLINK); 4812 4813 case KEY_ref: 4814 UNI(OP_REF); 4815 4816 case KEY_s: 4817 s = scan_subst(s); 4818 if (yylval.opval) 4819 TERM(sublex_start()); 4820 else 4821 TOKEN(1); /* force error */ 4822 4823 case KEY_chomp: 4824 UNI(OP_CHOMP); 4825 4826 case KEY_scalar: 4827 UNI(OP_SCALAR); 4828 4829 case KEY_select: 4830 LOP(OP_SELECT,XTERM); 4831 4832 case KEY_seek: 4833 LOP(OP_SEEK,XTERM); 4834 4835 case KEY_semctl: 4836 LOP(OP_SEMCTL,XTERM); 4837 4838 case KEY_semget: 4839 LOP(OP_SEMGET,XTERM); 4840 4841 case KEY_semop: 4842 LOP(OP_SEMOP,XTERM); 4843 4844 case KEY_send: 4845 LOP(OP_SEND,XTERM); 4846 4847 case KEY_setpgrp: 4848 LOP(OP_SETPGRP,XTERM); 4849 4850 case KEY_setpriority: 4851 LOP(OP_SETPRIORITY,XTERM); 4852 4853 case KEY_sethostent: 4854 UNI(OP_SHOSTENT); 4855 4856 case KEY_setnetent: 4857 UNI(OP_SNETENT); 4858 4859 case KEY_setservent: 4860 UNI(OP_SSERVENT); 4861 4862 case KEY_setprotoent: 4863 UNI(OP_SPROTOENT); 4864 4865 case KEY_setpwent: 4866 FUN0(OP_SPWENT); 4867 4868 case KEY_setgrent: 4869 FUN0(OP_SGRENT); 4870 4871 case KEY_seekdir: 4872 LOP(OP_SEEKDIR,XTERM); 4873 4874 case KEY_setsockopt: 4875 LOP(OP_SSOCKOPT,XTERM); 4876 4877 case KEY_shift: 4878 UNI(OP_SHIFT); 4879 4880 case KEY_shmctl: 4881 LOP(OP_SHMCTL,XTERM); 4882 4883 case KEY_shmget: 4884 LOP(OP_SHMGET,XTERM); 4885 4886 case KEY_shmread: 4887 LOP(OP_SHMREAD,XTERM); 4888 4889 case KEY_shmwrite: 4890 LOP(OP_SHMWRITE,XTERM); 4891 4892 case KEY_shutdown: 4893 LOP(OP_SHUTDOWN,XTERM); 4894 4895 case KEY_sin: 4896 UNI(OP_SIN); 4897 4898 case KEY_sleep: 4899 UNI(OP_SLEEP); 4900 4901 case KEY_socket: 4902 LOP(OP_SOCKET,XTERM); 4903 4904 case KEY_socketpair: 4905 LOP(OP_SOCKPAIR,XTERM); 4906 4907 case KEY_sort: 4908 checkcomma(s,PL_tokenbuf,"subroutine name"); 4909 s = skipspace(s); 4910 if (*s == ';' || *s == ')') /* probably a close */ 4911 Perl_croak(aTHX_ "sort is now a reserved word"); 4912 PL_expect = XTERM; 4913 s = force_word(s,WORD,TRUE,TRUE,FALSE); 4914 LOP(OP_SORT,XREF); 4915 4916 case KEY_split: 4917 LOP(OP_SPLIT,XTERM); 4918 4919 case KEY_sprintf: 4920 LOP(OP_SPRINTF,XTERM); 4921 4922 case KEY_splice: 4923 LOP(OP_SPLICE,XTERM); 4924 4925 case KEY_sqrt: 4926 UNI(OP_SQRT); 4927 4928 case KEY_srand: 4929 UNI(OP_SRAND); 4930 4931 case KEY_stat: 4932 UNI(OP_STAT); 4933 4934 case KEY_study: 4935 UNI(OP_STUDY); 4936 4937 case KEY_substr: 4938 LOP(OP_SUBSTR,XTERM); 4939 4940 case KEY_format: 4941 case KEY_sub: 4942 really_sub: 4943 { 4944 char tmpbuf[sizeof PL_tokenbuf]; 4945 SSize_t tboffset = 0; 4946 expectation attrful; 4947 bool have_name, have_proto, bad_proto; 4948 int key = tmp; 4949 4950 s = skipspace(s); 4951 4952 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' || 4953 (*s == ':' && s[1] == ':')) 4954 { 4955 PL_expect = XBLOCK; 4956 attrful = XATTRBLOCK; 4957 /* remember buffer pos'n for later force_word */ 4958 tboffset = s - PL_oldbufptr; 4959 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 4960 if (strchr(tmpbuf, ':')) 4961 sv_setpv(PL_subname, tmpbuf); 4962 else { 4963 sv_setsv(PL_subname,PL_curstname); 4964 sv_catpvn(PL_subname,"::",2); 4965 sv_catpvn(PL_subname,tmpbuf,len); 4966 } 4967 s = skipspace(d); 4968 have_name = TRUE; 4969 } 4970 else { 4971 if (key == KEY_my) 4972 Perl_croak(aTHX_ "Missing name in \"my sub\""); 4973 PL_expect = XTERMBLOCK; 4974 attrful = XATTRTERM; 4975 sv_setpv(PL_subname,"?"); 4976 have_name = FALSE; 4977 } 4978 4979 if (key == KEY_format) { 4980 if (*s == '=') 4981 PL_lex_formbrack = PL_lex_brackets + 1; 4982 if (have_name) 4983 (void) force_word(PL_oldbufptr + tboffset, WORD, 4984 FALSE, TRUE, TRUE); 4985 OPERATOR(FORMAT); 4986 } 4987 4988 /* Look for a prototype */ 4989 if (*s == '(') { 4990 char *p; 4991 4992 s = scan_str(s,FALSE,FALSE); 4993 if (!s) 4994 Perl_croak(aTHX_ "Prototype not terminated"); 4995 /* strip spaces and check for bad characters */ 4996 d = SvPVX(PL_lex_stuff); 4997 tmp = 0; 4998 bad_proto = FALSE; 4999 for (p = d; *p; ++p) { 5000 if (!isSPACE(*p)) { 5001 d[tmp++] = *p; 5002 if (!strchr("$@%*;[]&\\", *p)) 5003 bad_proto = TRUE; 5004 } 5005 } 5006 d[tmp] = '\0'; 5007 if (bad_proto && ckWARN(WARN_SYNTAX)) 5008 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 5009 "Illegal character in prototype for %s : %s", 5010 SvPVX(PL_subname), d); 5011 SvCUR(PL_lex_stuff) = tmp; 5012 have_proto = TRUE; 5013 5014 s = skipspace(s); 5015 } 5016 else 5017 have_proto = FALSE; 5018 5019 if (*s == ':' && s[1] != ':') 5020 PL_expect = attrful; 5021 5022 if (have_proto) { 5023 PL_nextval[PL_nexttoke].opval = 5024 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff); 5025 PL_lex_stuff = Nullsv; 5026 force_next(THING); 5027 } 5028 if (!have_name) { 5029 sv_setpv(PL_subname, 5030 PL_curstash ? "__ANON__" : "__ANON__::__ANON__"); 5031 TOKEN(ANONSUB); 5032 } 5033 (void) force_word(PL_oldbufptr + tboffset, WORD, 5034 FALSE, TRUE, TRUE); 5035 if (key == KEY_my) 5036 TOKEN(MYSUB); 5037 TOKEN(SUB); 5038 } 5039 5040 case KEY_system: 5041 set_csh(); 5042 LOP(OP_SYSTEM,XREF); 5043 5044 case KEY_symlink: 5045 LOP(OP_SYMLINK,XTERM); 5046 5047 case KEY_syscall: 5048 LOP(OP_SYSCALL,XTERM); 5049 5050 case KEY_sysopen: 5051 LOP(OP_SYSOPEN,XTERM); 5052 5053 case KEY_sysseek: 5054 LOP(OP_SYSSEEK,XTERM); 5055 5056 case KEY_sysread: 5057 LOP(OP_SYSREAD,XTERM); 5058 5059 case KEY_syswrite: 5060 LOP(OP_SYSWRITE,XTERM); 5061 5062 case KEY_tr: 5063 s = scan_trans(s); 5064 TERM(sublex_start()); 5065 5066 case KEY_tell: 5067 UNI(OP_TELL); 5068 5069 case KEY_telldir: 5070 UNI(OP_TELLDIR); 5071 5072 case KEY_tie: 5073 LOP(OP_TIE,XTERM); 5074 5075 case KEY_tied: 5076 UNI(OP_TIED); 5077 5078 case KEY_time: 5079 FUN0(OP_TIME); 5080 5081 case KEY_times: 5082 FUN0(OP_TMS); 5083 5084 case KEY_truncate: 5085 LOP(OP_TRUNCATE,XTERM); 5086 5087 case KEY_uc: 5088 UNI(OP_UC); 5089 5090 case KEY_ucfirst: 5091 UNI(OP_UCFIRST); 5092 5093 case KEY_untie: 5094 UNI(OP_UNTIE); 5095 5096 case KEY_until: 5097 yylval.ival = CopLINE(PL_curcop); 5098 OPERATOR(UNTIL); 5099 5100 case KEY_unless: 5101 yylval.ival = CopLINE(PL_curcop); 5102 OPERATOR(UNLESS); 5103 5104 case KEY_unlink: 5105 LOP(OP_UNLINK,XTERM); 5106 5107 case KEY_undef: 5108 UNI(OP_UNDEF); 5109 5110 case KEY_unpack: 5111 LOP(OP_UNPACK,XTERM); 5112 5113 case KEY_utime: 5114 LOP(OP_UTIME,XTERM); 5115 5116 case KEY_umask: 5117 UNI(OP_UMASK); 5118 5119 case KEY_unshift: 5120 LOP(OP_UNSHIFT,XTERM); 5121 5122 case KEY_use: 5123 if (PL_expect != XSTATE) 5124 yyerror("\"use\" not allowed in expression"); 5125 s = skipspace(s); 5126 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) { 5127 s = force_version(s, TRUE); 5128 if (*s == ';' || (s = skipspace(s), *s == ';')) { 5129 PL_nextval[PL_nexttoke].opval = Nullop; 5130 force_next(WORD); 5131 } 5132 else if (*s == 'v') { 5133 s = force_word(s,WORD,FALSE,TRUE,FALSE); 5134 s = force_version(s, FALSE); 5135 } 5136 } 5137 else { 5138 s = force_word(s,WORD,FALSE,TRUE,FALSE); 5139 s = force_version(s, FALSE); 5140 } 5141 yylval.ival = 1; 5142 OPERATOR(USE); 5143 5144 case KEY_values: 5145 UNI(OP_VALUES); 5146 5147 case KEY_vec: 5148 LOP(OP_VEC,XTERM); 5149 5150 case KEY_while: 5151 yylval.ival = CopLINE(PL_curcop); 5152 OPERATOR(WHILE); 5153 5154 case KEY_warn: 5155 PL_hints |= HINT_BLOCK_SCOPE; 5156 LOP(OP_WARN,XTERM); 5157 5158 case KEY_wait: 5159 FUN0(OP_WAIT); 5160 5161 case KEY_waitpid: 5162 LOP(OP_WAITPID,XTERM); 5163 5164 case KEY_wantarray: 5165 FUN0(OP_WANTARRAY); 5166 5167 case KEY_write: 5168 #ifdef EBCDIC 5169 { 5170 char ctl_l[2]; 5171 ctl_l[0] = toCTRL('L'); 5172 ctl_l[1] = '\0'; 5173 gv_fetchpv(ctl_l,TRUE, SVt_PV); 5174 } 5175 #else 5176 gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */ 5177 #endif 5178 UNI(OP_ENTERWRITE); 5179 5180 case KEY_x: 5181 if (PL_expect == XOPERATOR) 5182 Mop(OP_REPEAT); 5183 check_uni(); 5184 goto just_a_word; 5185 5186 case KEY_xor: 5187 yylval.ival = OP_XOR; 5188 OPERATOR(OROP); 5189 5190 case KEY_y: 5191 s = scan_trans(s); 5192 TERM(sublex_start()); 5193 } 5194 }} 5195 } 5196 #ifdef __SC__ 5197 #pragma segment Main 5198 #endif 5199 5200 static int 5201 S_pending_ident(pTHX) 5202 { 5203 register char *d; 5204 register I32 tmp; 5205 /* pit holds the identifier we read and pending_ident is reset */ 5206 char pit = PL_pending_ident; 5207 PL_pending_ident = 0; 5208 5209 DEBUG_T({ PerlIO_printf(Perl_debug_log, 5210 "### Tokener saw identifier '%s'\n", PL_tokenbuf); }); 5211 5212 /* if we're in a my(), we can't allow dynamics here. 5213 $foo'bar has already been turned into $foo::bar, so 5214 just check for colons. 5215 5216 if it's a legal name, the OP is a PADANY. 5217 */ 5218 if (PL_in_my) { 5219 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */ 5220 if (strchr(PL_tokenbuf,':')) 5221 yyerror(Perl_form(aTHX_ "No package name allowed for " 5222 "variable %s in \"our\"", 5223 PL_tokenbuf)); 5224 tmp = pad_allocmy(PL_tokenbuf); 5225 } 5226 else { 5227 if (strchr(PL_tokenbuf,':')) 5228 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf)); 5229 5230 yylval.opval = newOP(OP_PADANY, 0); 5231 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf); 5232 return PRIVATEREF; 5233 } 5234 } 5235 5236 /* 5237 build the ops for accesses to a my() variable. 5238 5239 Deny my($a) or my($b) in a sort block, *if* $a or $b is 5240 then used in a comparison. This catches most, but not 5241 all cases. For instance, it catches 5242 sort { my($a); $a <=> $b } 5243 but not 5244 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; } 5245 (although why you'd do that is anyone's guess). 5246 */ 5247 5248 if (!strchr(PL_tokenbuf,':')) { 5249 #ifdef USE_5005THREADS 5250 /* Check for single character per-thread SVs */ 5251 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0' 5252 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */ 5253 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD) 5254 { 5255 yylval.opval = newOP(OP_THREADSV, 0); 5256 yylval.opval->op_targ = tmp; 5257 return PRIVATEREF; 5258 } 5259 #endif /* USE_5005THREADS */ 5260 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) { 5261 SV *namesv = AvARRAY(PL_comppad_name)[tmp]; 5262 /* might be an "our" variable" */ 5263 if (SvFLAGS(namesv) & SVpad_OUR) { 5264 /* build ops for a bareword */ 5265 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0); 5266 sv_catpvn(sym, "::", 2); 5267 sv_catpv(sym, PL_tokenbuf+1); 5268 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym); 5269 yylval.opval->op_private = OPpCONST_ENTERED; 5270 gv_fetchpv(SvPVX(sym), 5271 (PL_in_eval 5272 ? (GV_ADDMULTI | GV_ADDINEVAL) 5273 : GV_ADDMULTI 5274 ), 5275 ((PL_tokenbuf[0] == '$') ? SVt_PV 5276 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 5277 : SVt_PVHV)); 5278 return WORD; 5279 } 5280 5281 /* if it's a sort block and they're naming $a or $b */ 5282 if (PL_last_lop_op == OP_SORT && 5283 PL_tokenbuf[0] == '$' && 5284 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b') 5285 && !PL_tokenbuf[2]) 5286 { 5287 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart; 5288 d < PL_bufend && *d != '\n'; 5289 d++) 5290 { 5291 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) { 5292 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison", 5293 PL_tokenbuf); 5294 } 5295 } 5296 } 5297 5298 yylval.opval = newOP(OP_PADANY, 0); 5299 yylval.opval->op_targ = tmp; 5300 return PRIVATEREF; 5301 } 5302 } 5303 5304 /* 5305 Whine if they've said @foo in a doublequoted string, 5306 and @foo isn't a variable we can find in the symbol 5307 table. 5308 */ 5309 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) { 5310 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV); 5311 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) 5312 && ckWARN(WARN_AMBIGUOUS)) 5313 { 5314 /* Downgraded from fatal to warning 20000522 mjd */ 5315 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 5316 "Possible unintended interpolation of %s in string", 5317 PL_tokenbuf); 5318 } 5319 } 5320 5321 /* build ops for a bareword */ 5322 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0)); 5323 yylval.opval->op_private = OPpCONST_ENTERED; 5324 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE, 5325 ((PL_tokenbuf[0] == '$') ? SVt_PV 5326 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 5327 : SVt_PVHV)); 5328 return WORD; 5329 } 5330 5331 I32 5332 Perl_keyword(pTHX_ register char *d, I32 len) 5333 { 5334 switch (*d) { 5335 case '_': 5336 if (d[1] == '_') { 5337 if (strEQ(d,"__FILE__")) return -KEY___FILE__; 5338 if (strEQ(d,"__LINE__")) return -KEY___LINE__; 5339 if (strEQ(d,"__PACKAGE__")) return -KEY___PACKAGE__; 5340 if (strEQ(d,"__DATA__")) return KEY___DATA__; 5341 if (strEQ(d,"__END__")) return KEY___END__; 5342 } 5343 break; 5344 case 'A': 5345 if (strEQ(d,"AUTOLOAD")) return KEY_AUTOLOAD; 5346 break; 5347 case 'a': 5348 switch (len) { 5349 case 3: 5350 if (strEQ(d,"and")) return -KEY_and; 5351 if (strEQ(d,"abs")) return -KEY_abs; 5352 break; 5353 case 5: 5354 if (strEQ(d,"alarm")) return -KEY_alarm; 5355 if (strEQ(d,"atan2")) return -KEY_atan2; 5356 break; 5357 case 6: 5358 if (strEQ(d,"accept")) return -KEY_accept; 5359 break; 5360 } 5361 break; 5362 case 'B': 5363 if (strEQ(d,"BEGIN")) return KEY_BEGIN; 5364 break; 5365 case 'b': 5366 if (strEQ(d,"bless")) return -KEY_bless; 5367 if (strEQ(d,"bind")) return -KEY_bind; 5368 if (strEQ(d,"binmode")) return -KEY_binmode; 5369 break; 5370 case 'C': 5371 if (strEQ(d,"CORE")) return -KEY_CORE; 5372 if (strEQ(d,"CHECK")) return KEY_CHECK; 5373 break; 5374 case 'c': 5375 switch (len) { 5376 case 3: 5377 if (strEQ(d,"cmp")) return -KEY_cmp; 5378 if (strEQ(d,"chr")) return -KEY_chr; 5379 if (strEQ(d,"cos")) return -KEY_cos; 5380 break; 5381 case 4: 5382 if (strEQ(d,"chop")) return -KEY_chop; 5383 break; 5384 case 5: 5385 if (strEQ(d,"close")) return -KEY_close; 5386 if (strEQ(d,"chdir")) return -KEY_chdir; 5387 if (strEQ(d,"chomp")) return -KEY_chomp; 5388 if (strEQ(d,"chmod")) return -KEY_chmod; 5389 if (strEQ(d,"chown")) return -KEY_chown; 5390 if (strEQ(d,"crypt")) return -KEY_crypt; 5391 break; 5392 case 6: 5393 if (strEQ(d,"chroot")) return -KEY_chroot; 5394 if (strEQ(d,"caller")) return -KEY_caller; 5395 break; 5396 case 7: 5397 if (strEQ(d,"connect")) return -KEY_connect; 5398 break; 5399 case 8: 5400 if (strEQ(d,"closedir")) return -KEY_closedir; 5401 if (strEQ(d,"continue")) return -KEY_continue; 5402 break; 5403 } 5404 break; 5405 case 'D': 5406 if (strEQ(d,"DESTROY")) return KEY_DESTROY; 5407 break; 5408 case 'd': 5409 switch (len) { 5410 case 2: 5411 if (strEQ(d,"do")) return KEY_do; 5412 break; 5413 case 3: 5414 if (strEQ(d,"die")) return -KEY_die; 5415 break; 5416 case 4: 5417 if (strEQ(d,"dump")) return -KEY_dump; 5418 break; 5419 case 6: 5420 if (strEQ(d,"delete")) return KEY_delete; 5421 break; 5422 case 7: 5423 if (strEQ(d,"defined")) return KEY_defined; 5424 if (strEQ(d,"dbmopen")) return -KEY_dbmopen; 5425 break; 5426 case 8: 5427 if (strEQ(d,"dbmclose")) return -KEY_dbmclose; 5428 break; 5429 } 5430 break; 5431 case 'E': 5432 if (strEQ(d,"END")) return KEY_END; 5433 break; 5434 case 'e': 5435 switch (len) { 5436 case 2: 5437 if (strEQ(d,"eq")) return -KEY_eq; 5438 break; 5439 case 3: 5440 if (strEQ(d,"eof")) return -KEY_eof; 5441 if (strEQ(d,"exp")) return -KEY_exp; 5442 break; 5443 case 4: 5444 if (strEQ(d,"else")) return KEY_else; 5445 if (strEQ(d,"exit")) return -KEY_exit; 5446 if (strEQ(d,"eval")) return KEY_eval; 5447 if (strEQ(d,"exec")) return -KEY_exec; 5448 if (strEQ(d,"each")) return -KEY_each; 5449 break; 5450 case 5: 5451 if (strEQ(d,"elsif")) return KEY_elsif; 5452 break; 5453 case 6: 5454 if (strEQ(d,"exists")) return KEY_exists; 5455 if (strEQ(d,"elseif")) Perl_warn(aTHX_ "elseif should be elsif"); 5456 break; 5457 case 8: 5458 if (strEQ(d,"endgrent")) return -KEY_endgrent; 5459 if (strEQ(d,"endpwent")) return -KEY_endpwent; 5460 break; 5461 case 9: 5462 if (strEQ(d,"endnetent")) return -KEY_endnetent; 5463 break; 5464 case 10: 5465 if (strEQ(d,"endhostent")) return -KEY_endhostent; 5466 if (strEQ(d,"endservent")) return -KEY_endservent; 5467 break; 5468 case 11: 5469 if (strEQ(d,"endprotoent")) return -KEY_endprotoent; 5470 break; 5471 } 5472 break; 5473 case 'f': 5474 switch (len) { 5475 case 3: 5476 if (strEQ(d,"for")) return KEY_for; 5477 break; 5478 case 4: 5479 if (strEQ(d,"fork")) return -KEY_fork; 5480 break; 5481 case 5: 5482 if (strEQ(d,"fcntl")) return -KEY_fcntl; 5483 if (strEQ(d,"flock")) return -KEY_flock; 5484 break; 5485 case 6: 5486 if (strEQ(d,"format")) return KEY_format; 5487 if (strEQ(d,"fileno")) return -KEY_fileno; 5488 break; 5489 case 7: 5490 if (strEQ(d,"foreach")) return KEY_foreach; 5491 break; 5492 case 8: 5493 if (strEQ(d,"formline")) return -KEY_formline; 5494 break; 5495 } 5496 break; 5497 case 'g': 5498 if (strnEQ(d,"get",3)) { 5499 d += 3; 5500 if (*d == 'p') { 5501 switch (len) { 5502 case 7: 5503 if (strEQ(d,"ppid")) return -KEY_getppid; 5504 if (strEQ(d,"pgrp")) return -KEY_getpgrp; 5505 break; 5506 case 8: 5507 if (strEQ(d,"pwent")) return -KEY_getpwent; 5508 if (strEQ(d,"pwnam")) return -KEY_getpwnam; 5509 if (strEQ(d,"pwuid")) return -KEY_getpwuid; 5510 break; 5511 case 11: 5512 if (strEQ(d,"peername")) return -KEY_getpeername; 5513 if (strEQ(d,"protoent")) return -KEY_getprotoent; 5514 if (strEQ(d,"priority")) return -KEY_getpriority; 5515 break; 5516 case 14: 5517 if (strEQ(d,"protobyname")) return -KEY_getprotobyname; 5518 break; 5519 case 16: 5520 if (strEQ(d,"protobynumber"))return -KEY_getprotobynumber; 5521 break; 5522 } 5523 } 5524 else if (*d == 'h') { 5525 if (strEQ(d,"hostbyname")) return -KEY_gethostbyname; 5526 if (strEQ(d,"hostbyaddr")) return -KEY_gethostbyaddr; 5527 if (strEQ(d,"hostent")) return -KEY_gethostent; 5528 } 5529 else if (*d == 'n') { 5530 if (strEQ(d,"netbyname")) return -KEY_getnetbyname; 5531 if (strEQ(d,"netbyaddr")) return -KEY_getnetbyaddr; 5532 if (strEQ(d,"netent")) return -KEY_getnetent; 5533 } 5534 else if (*d == 's') { 5535 if (strEQ(d,"servbyname")) return -KEY_getservbyname; 5536 if (strEQ(d,"servbyport")) return -KEY_getservbyport; 5537 if (strEQ(d,"servent")) return -KEY_getservent; 5538 if (strEQ(d,"sockname")) return -KEY_getsockname; 5539 if (strEQ(d,"sockopt")) return -KEY_getsockopt; 5540 } 5541 else if (*d == 'g') { 5542 if (strEQ(d,"grent")) return -KEY_getgrent; 5543 if (strEQ(d,"grnam")) return -KEY_getgrnam; 5544 if (strEQ(d,"grgid")) return -KEY_getgrgid; 5545 } 5546 else if (*d == 'l') { 5547 if (strEQ(d,"login")) return -KEY_getlogin; 5548 } 5549 else if (strEQ(d,"c")) return -KEY_getc; 5550 break; 5551 } 5552 switch (len) { 5553 case 2: 5554 if (strEQ(d,"gt")) return -KEY_gt; 5555 if (strEQ(d,"ge")) return -KEY_ge; 5556 break; 5557 case 4: 5558 if (strEQ(d,"grep")) return KEY_grep; 5559 if (strEQ(d,"goto")) return KEY_goto; 5560 if (strEQ(d,"glob")) return KEY_glob; 5561 break; 5562 case 6: 5563 if (strEQ(d,"gmtime")) return -KEY_gmtime; 5564 break; 5565 } 5566 break; 5567 case 'h': 5568 if (strEQ(d,"hex")) return -KEY_hex; 5569 break; 5570 case 'I': 5571 if (strEQ(d,"INIT")) return KEY_INIT; 5572 break; 5573 case 'i': 5574 switch (len) { 5575 case 2: 5576 if (strEQ(d,"if")) return KEY_if; 5577 break; 5578 case 3: 5579 if (strEQ(d,"int")) return -KEY_int; 5580 break; 5581 case 5: 5582 if (strEQ(d,"index")) return -KEY_index; 5583 if (strEQ(d,"ioctl")) return -KEY_ioctl; 5584 break; 5585 } 5586 break; 5587 case 'j': 5588 if (strEQ(d,"join")) return -KEY_join; 5589 break; 5590 case 'k': 5591 if (len == 4) { 5592 if (strEQ(d,"keys")) return -KEY_keys; 5593 if (strEQ(d,"kill")) return -KEY_kill; 5594 } 5595 break; 5596 case 'l': 5597 switch (len) { 5598 case 2: 5599 if (strEQ(d,"lt")) return -KEY_lt; 5600 if (strEQ(d,"le")) return -KEY_le; 5601 if (strEQ(d,"lc")) return -KEY_lc; 5602 break; 5603 case 3: 5604 if (strEQ(d,"log")) return -KEY_log; 5605 break; 5606 case 4: 5607 if (strEQ(d,"last")) return KEY_last; 5608 if (strEQ(d,"link")) return -KEY_link; 5609 if (strEQ(d,"lock")) return -KEY_lock; 5610 break; 5611 case 5: 5612 if (strEQ(d,"local")) return KEY_local; 5613 if (strEQ(d,"lstat")) return -KEY_lstat; 5614 break; 5615 case 6: 5616 if (strEQ(d,"length")) return -KEY_length; 5617 if (strEQ(d,"listen")) return -KEY_listen; 5618 break; 5619 case 7: 5620 if (strEQ(d,"lcfirst")) return -KEY_lcfirst; 5621 break; 5622 case 9: 5623 if (strEQ(d,"localtime")) return -KEY_localtime; 5624 break; 5625 } 5626 break; 5627 case 'm': 5628 switch (len) { 5629 case 1: return KEY_m; 5630 case 2: 5631 if (strEQ(d,"my")) return KEY_my; 5632 break; 5633 case 3: 5634 if (strEQ(d,"map")) return KEY_map; 5635 break; 5636 case 5: 5637 if (strEQ(d,"mkdir")) return -KEY_mkdir; 5638 break; 5639 case 6: 5640 if (strEQ(d,"msgctl")) return -KEY_msgctl; 5641 if (strEQ(d,"msgget")) return -KEY_msgget; 5642 if (strEQ(d,"msgrcv")) return -KEY_msgrcv; 5643 if (strEQ(d,"msgsnd")) return -KEY_msgsnd; 5644 break; 5645 } 5646 break; 5647 case 'n': 5648 if (strEQ(d,"next")) return KEY_next; 5649 if (strEQ(d,"ne")) return -KEY_ne; 5650 if (strEQ(d,"not")) return -KEY_not; 5651 if (strEQ(d,"no")) return KEY_no; 5652 break; 5653 case 'o': 5654 switch (len) { 5655 case 2: 5656 if (strEQ(d,"or")) return -KEY_or; 5657 break; 5658 case 3: 5659 if (strEQ(d,"ord")) return -KEY_ord; 5660 if (strEQ(d,"oct")) return -KEY_oct; 5661 if (strEQ(d,"our")) return KEY_our; 5662 break; 5663 case 4: 5664 if (strEQ(d,"open")) return -KEY_open; 5665 break; 5666 case 7: 5667 if (strEQ(d,"opendir")) return -KEY_opendir; 5668 break; 5669 } 5670 break; 5671 case 'p': 5672 switch (len) { 5673 case 3: 5674 if (strEQ(d,"pop")) return -KEY_pop; 5675 if (strEQ(d,"pos")) return KEY_pos; 5676 break; 5677 case 4: 5678 if (strEQ(d,"push")) return -KEY_push; 5679 if (strEQ(d,"pack")) return -KEY_pack; 5680 if (strEQ(d,"pipe")) return -KEY_pipe; 5681 break; 5682 case 5: 5683 if (strEQ(d,"print")) return KEY_print; 5684 break; 5685 case 6: 5686 if (strEQ(d,"printf")) return KEY_printf; 5687 break; 5688 case 7: 5689 if (strEQ(d,"package")) return KEY_package; 5690 break; 5691 case 9: 5692 if (strEQ(d,"prototype")) return KEY_prototype; 5693 } 5694 break; 5695 case 'q': 5696 if (len <= 2) { 5697 if (strEQ(d,"q")) return KEY_q; 5698 if (strEQ(d,"qr")) return KEY_qr; 5699 if (strEQ(d,"qq")) return KEY_qq; 5700 if (strEQ(d,"qw")) return KEY_qw; 5701 if (strEQ(d,"qx")) return KEY_qx; 5702 } 5703 else if (strEQ(d,"quotemeta")) return -KEY_quotemeta; 5704 break; 5705 case 'r': 5706 switch (len) { 5707 case 3: 5708 if (strEQ(d,"ref")) return -KEY_ref; 5709 break; 5710 case 4: 5711 if (strEQ(d,"read")) return -KEY_read; 5712 if (strEQ(d,"rand")) return -KEY_rand; 5713 if (strEQ(d,"recv")) return -KEY_recv; 5714 if (strEQ(d,"redo")) return KEY_redo; 5715 break; 5716 case 5: 5717 if (strEQ(d,"rmdir")) return -KEY_rmdir; 5718 if (strEQ(d,"reset")) return -KEY_reset; 5719 break; 5720 case 6: 5721 if (strEQ(d,"return")) return KEY_return; 5722 if (strEQ(d,"rename")) return -KEY_rename; 5723 if (strEQ(d,"rindex")) return -KEY_rindex; 5724 break; 5725 case 7: 5726 if (strEQ(d,"require")) return KEY_require; 5727 if (strEQ(d,"reverse")) return -KEY_reverse; 5728 if (strEQ(d,"readdir")) return -KEY_readdir; 5729 break; 5730 case 8: 5731 if (strEQ(d,"readlink")) return -KEY_readlink; 5732 if (strEQ(d,"readline")) return -KEY_readline; 5733 if (strEQ(d,"readpipe")) return -KEY_readpipe; 5734 break; 5735 case 9: 5736 if (strEQ(d,"rewinddir")) return -KEY_rewinddir; 5737 break; 5738 } 5739 break; 5740 case 's': 5741 switch (d[1]) { 5742 case 0: return KEY_s; 5743 case 'c': 5744 if (strEQ(d,"scalar")) return KEY_scalar; 5745 break; 5746 case 'e': 5747 switch (len) { 5748 case 4: 5749 if (strEQ(d,"seek")) return -KEY_seek; 5750 if (strEQ(d,"send")) return -KEY_send; 5751 break; 5752 case 5: 5753 if (strEQ(d,"semop")) return -KEY_semop; 5754 break; 5755 case 6: 5756 if (strEQ(d,"select")) return -KEY_select; 5757 if (strEQ(d,"semctl")) return -KEY_semctl; 5758 if (strEQ(d,"semget")) return -KEY_semget; 5759 break; 5760 case 7: 5761 if (strEQ(d,"setpgrp")) return -KEY_setpgrp; 5762 if (strEQ(d,"seekdir")) return -KEY_seekdir; 5763 break; 5764 case 8: 5765 if (strEQ(d,"setpwent")) return -KEY_setpwent; 5766 if (strEQ(d,"setgrent")) return -KEY_setgrent; 5767 break; 5768 case 9: 5769 if (strEQ(d,"setnetent")) return -KEY_setnetent; 5770 break; 5771 case 10: 5772 if (strEQ(d,"setsockopt")) return -KEY_setsockopt; 5773 if (strEQ(d,"sethostent")) return -KEY_sethostent; 5774 if (strEQ(d,"setservent")) return -KEY_setservent; 5775 break; 5776 case 11: 5777 if (strEQ(d,"setpriority")) return -KEY_setpriority; 5778 if (strEQ(d,"setprotoent")) return -KEY_setprotoent; 5779 break; 5780 } 5781 break; 5782 case 'h': 5783 switch (len) { 5784 case 5: 5785 if (strEQ(d,"shift")) return -KEY_shift; 5786 break; 5787 case 6: 5788 if (strEQ(d,"shmctl")) return -KEY_shmctl; 5789 if (strEQ(d,"shmget")) return -KEY_shmget; 5790 break; 5791 case 7: 5792 if (strEQ(d,"shmread")) return -KEY_shmread; 5793 break; 5794 case 8: 5795 if (strEQ(d,"shmwrite")) return -KEY_shmwrite; 5796 if (strEQ(d,"shutdown")) return -KEY_shutdown; 5797 break; 5798 } 5799 break; 5800 case 'i': 5801 if (strEQ(d,"sin")) return -KEY_sin; 5802 break; 5803 case 'l': 5804 if (strEQ(d,"sleep")) return -KEY_sleep; 5805 break; 5806 case 'o': 5807 if (strEQ(d,"sort")) return KEY_sort; 5808 if (strEQ(d,"socket")) return -KEY_socket; 5809 if (strEQ(d,"socketpair")) return -KEY_socketpair; 5810 break; 5811 case 'p': 5812 if (strEQ(d,"split")) return KEY_split; 5813 if (strEQ(d,"sprintf")) return -KEY_sprintf; 5814 if (strEQ(d,"splice")) return -KEY_splice; 5815 break; 5816 case 'q': 5817 if (strEQ(d,"sqrt")) return -KEY_sqrt; 5818 break; 5819 case 'r': 5820 if (strEQ(d,"srand")) return -KEY_srand; 5821 break; 5822 case 't': 5823 if (strEQ(d,"stat")) return -KEY_stat; 5824 if (strEQ(d,"study")) return KEY_study; 5825 break; 5826 case 'u': 5827 if (strEQ(d,"substr")) return -KEY_substr; 5828 if (strEQ(d,"sub")) return KEY_sub; 5829 break; 5830 case 'y': 5831 switch (len) { 5832 case 6: 5833 if (strEQ(d,"system")) return -KEY_system; 5834 break; 5835 case 7: 5836 if (strEQ(d,"symlink")) return -KEY_symlink; 5837 if (strEQ(d,"syscall")) return -KEY_syscall; 5838 if (strEQ(d,"sysopen")) return -KEY_sysopen; 5839 if (strEQ(d,"sysread")) return -KEY_sysread; 5840 if (strEQ(d,"sysseek")) return -KEY_sysseek; 5841 break; 5842 case 8: 5843 if (strEQ(d,"syswrite")) return -KEY_syswrite; 5844 break; 5845 } 5846 break; 5847 } 5848 break; 5849 case 't': 5850 switch (len) { 5851 case 2: 5852 if (strEQ(d,"tr")) return KEY_tr; 5853 break; 5854 case 3: 5855 if (strEQ(d,"tie")) return KEY_tie; 5856 break; 5857 case 4: 5858 if (strEQ(d,"tell")) return -KEY_tell; 5859 if (strEQ(d,"tied")) return KEY_tied; 5860 if (strEQ(d,"time")) return -KEY_time; 5861 break; 5862 case 5: 5863 if (strEQ(d,"times")) return -KEY_times; 5864 break; 5865 case 7: 5866 if (strEQ(d,"telldir")) return -KEY_telldir; 5867 break; 5868 case 8: 5869 if (strEQ(d,"truncate")) return -KEY_truncate; 5870 break; 5871 } 5872 break; 5873 case 'u': 5874 switch (len) { 5875 case 2: 5876 if (strEQ(d,"uc")) return -KEY_uc; 5877 break; 5878 case 3: 5879 if (strEQ(d,"use")) return KEY_use; 5880 break; 5881 case 5: 5882 if (strEQ(d,"undef")) return KEY_undef; 5883 if (strEQ(d,"until")) return KEY_until; 5884 if (strEQ(d,"untie")) return KEY_untie; 5885 if (strEQ(d,"utime")) return -KEY_utime; 5886 if (strEQ(d,"umask")) return -KEY_umask; 5887 break; 5888 case 6: 5889 if (strEQ(d,"unless")) return KEY_unless; 5890 if (strEQ(d,"unpack")) return -KEY_unpack; 5891 if (strEQ(d,"unlink")) return -KEY_unlink; 5892 break; 5893 case 7: 5894 if (strEQ(d,"unshift")) return -KEY_unshift; 5895 if (strEQ(d,"ucfirst")) return -KEY_ucfirst; 5896 break; 5897 } 5898 break; 5899 case 'v': 5900 if (strEQ(d,"values")) return -KEY_values; 5901 if (strEQ(d,"vec")) return -KEY_vec; 5902 break; 5903 case 'w': 5904 switch (len) { 5905 case 4: 5906 if (strEQ(d,"warn")) return -KEY_warn; 5907 if (strEQ(d,"wait")) return -KEY_wait; 5908 break; 5909 case 5: 5910 if (strEQ(d,"while")) return KEY_while; 5911 if (strEQ(d,"write")) return -KEY_write; 5912 break; 5913 case 7: 5914 if (strEQ(d,"waitpid")) return -KEY_waitpid; 5915 break; 5916 case 9: 5917 if (strEQ(d,"wantarray")) return -KEY_wantarray; 5918 break; 5919 } 5920 break; 5921 case 'x': 5922 if (len == 1) return -KEY_x; 5923 if (strEQ(d,"xor")) return -KEY_xor; 5924 break; 5925 case 'y': 5926 if (len == 1) return KEY_y; 5927 break; 5928 case 'z': 5929 break; 5930 } 5931 return 0; 5932 } 5933 5934 STATIC void 5935 S_checkcomma(pTHX_ register char *s, char *name, char *what) 5936 { 5937 char *w; 5938 5939 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */ 5940 if (ckWARN(WARN_SYNTAX)) { 5941 int level = 1; 5942 for (w = s+2; *w && level; w++) { 5943 if (*w == '(') 5944 ++level; 5945 else if (*w == ')') 5946 --level; 5947 } 5948 if (*w) 5949 for (; *w && isSPACE(*w); w++) ; 5950 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */ 5951 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 5952 "%s (...) interpreted as function",name); 5953 } 5954 } 5955 while (s < PL_bufend && isSPACE(*s)) 5956 s++; 5957 if (*s == '(') 5958 s++; 5959 while (s < PL_bufend && isSPACE(*s)) 5960 s++; 5961 if (isIDFIRST_lazy_if(s,UTF)) { 5962 w = s++; 5963 while (isALNUM_lazy_if(s,UTF)) 5964 s++; 5965 while (s < PL_bufend && isSPACE(*s)) 5966 s++; 5967 if (*s == ',') { 5968 int kw; 5969 *s = '\0'; 5970 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0; 5971 *s = ','; 5972 if (kw) 5973 return; 5974 Perl_croak(aTHX_ "No comma allowed after %s", what); 5975 } 5976 } 5977 } 5978 5979 /* Either returns sv, or mortalizes sv and returns a new SV*. 5980 Best used as sv=new_constant(..., sv, ...). 5981 If s, pv are NULL, calls subroutine with one argument, 5982 and type is used with error messages only. */ 5983 5984 STATIC SV * 5985 S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv, 5986 const char *type) 5987 { 5988 dSP; 5989 HV *table = GvHV(PL_hintgv); /* ^H */ 5990 SV *res; 5991 SV **cvp; 5992 SV *cv, *typesv; 5993 const char *why1, *why2, *why3; 5994 5995 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) { 5996 SV *msg; 5997 5998 why2 = strEQ(key,"charnames") 5999 ? "(possibly a missing \"use charnames ...\")" 6000 : ""; 6001 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s", 6002 (type ? type: "undef"), why2); 6003 6004 /* This is convoluted and evil ("goto considered harmful") 6005 * but I do not understand the intricacies of all the different 6006 * failure modes of %^H in here. The goal here is to make 6007 * the most probable error message user-friendly. --jhi */ 6008 6009 goto msgdone; 6010 6011 report: 6012 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s", 6013 (type ? type: "undef"), why1, why2, why3); 6014 msgdone: 6015 yyerror(SvPVX(msg)); 6016 SvREFCNT_dec(msg); 6017 return sv; 6018 } 6019 cvp = hv_fetch(table, key, strlen(key), FALSE); 6020 if (!cvp || !SvOK(*cvp)) { 6021 why1 = "$^H{"; 6022 why2 = key; 6023 why3 = "} is not defined"; 6024 goto report; 6025 } 6026 sv_2mortal(sv); /* Parent created it permanently */ 6027 cv = *cvp; 6028 if (!pv && s) 6029 pv = sv_2mortal(newSVpvn(s, len)); 6030 if (type && pv) 6031 typesv = sv_2mortal(newSVpv(type, 0)); 6032 else 6033 typesv = &PL_sv_undef; 6034 6035 PUSHSTACKi(PERLSI_OVERLOAD); 6036 ENTER ; 6037 SAVETMPS; 6038 6039 PUSHMARK(SP) ; 6040 EXTEND(sp, 3); 6041 if (pv) 6042 PUSHs(pv); 6043 PUSHs(sv); 6044 if (pv) 6045 PUSHs(typesv); 6046 PUTBACK; 6047 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL)); 6048 6049 SPAGAIN ; 6050 6051 /* Check the eval first */ 6052 if (!PL_in_eval && SvTRUE(ERRSV)) { 6053 STRLEN n_a; 6054 sv_catpv(ERRSV, "Propagated"); 6055 yyerror(SvPV(ERRSV, n_a)); /* Duplicates the message inside eval */ 6056 (void)POPs; 6057 res = SvREFCNT_inc(sv); 6058 } 6059 else { 6060 res = POPs; 6061 (void)SvREFCNT_inc(res); 6062 } 6063 6064 PUTBACK ; 6065 FREETMPS ; 6066 LEAVE ; 6067 POPSTACK; 6068 6069 if (!SvOK(res)) { 6070 why1 = "Call to &{$^H{"; 6071 why2 = key; 6072 why3 = "}} did not return a defined value"; 6073 sv = res; 6074 goto report; 6075 } 6076 6077 return res; 6078 } 6079 6080 STATIC char * 6081 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) 6082 { 6083 register char *d = dest; 6084 register char *e = d + destlen - 3; /* two-character token, ending NUL */ 6085 for (;;) { 6086 if (d >= e) 6087 Perl_croak(aTHX_ ident_too_long); 6088 if (isALNUM(*s)) /* UTF handled below */ 6089 *d++ = *s++; 6090 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) { 6091 *d++ = ':'; 6092 *d++ = ':'; 6093 s++; 6094 } 6095 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') { 6096 *d++ = *s++; 6097 *d++ = *s++; 6098 } 6099 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) { 6100 char *t = s + UTF8SKIP(s); 6101 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t)) 6102 t += UTF8SKIP(t); 6103 if (d + (t - s) > e) 6104 Perl_croak(aTHX_ ident_too_long); 6105 Copy(s, d, t - s, char); 6106 d += t - s; 6107 s = t; 6108 } 6109 else { 6110 *d = '\0'; 6111 *slp = d - dest; 6112 return s; 6113 } 6114 } 6115 } 6116 6117 STATIC char * 6118 S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN destlen, I32 ck_uni) 6119 { 6120 register char *d; 6121 register char *e; 6122 char *bracket = 0; 6123 char funny = *s++; 6124 6125 if (isSPACE(*s)) 6126 s = skipspace(s); 6127 d = dest; 6128 e = d + destlen - 3; /* two-character token, ending NUL */ 6129 if (isDIGIT(*s)) { 6130 while (isDIGIT(*s)) { 6131 if (d >= e) 6132 Perl_croak(aTHX_ ident_too_long); 6133 *d++ = *s++; 6134 } 6135 } 6136 else { 6137 for (;;) { 6138 if (d >= e) 6139 Perl_croak(aTHX_ ident_too_long); 6140 if (isALNUM(*s)) /* UTF handled below */ 6141 *d++ = *s++; 6142 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) { 6143 *d++ = ':'; 6144 *d++ = ':'; 6145 s++; 6146 } 6147 else if (*s == ':' && s[1] == ':') { 6148 *d++ = *s++; 6149 *d++ = *s++; 6150 } 6151 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) { 6152 char *t = s + UTF8SKIP(s); 6153 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t)) 6154 t += UTF8SKIP(t); 6155 if (d + (t - s) > e) 6156 Perl_croak(aTHX_ ident_too_long); 6157 Copy(s, d, t - s, char); 6158 d += t - s; 6159 s = t; 6160 } 6161 else 6162 break; 6163 } 6164 } 6165 *d = '\0'; 6166 d = dest; 6167 if (*d) { 6168 if (PL_lex_state != LEX_NORMAL) 6169 PL_lex_state = LEX_INTERPENDMAYBE; 6170 return s; 6171 } 6172 if (*s == '$' && s[1] && 6173 (isALNUM_lazy_if(s+1,UTF) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) ) 6174 { 6175 return s; 6176 } 6177 if (*s == '{') { 6178 bracket = s; 6179 s++; 6180 } 6181 else if (ck_uni) 6182 check_uni(); 6183 if (s < send) 6184 *d = *s++; 6185 d[1] = '\0'; 6186 if (*d == '^' && *s && isCONTROLVAR(*s)) { 6187 *d = toCTRL(*s); 6188 s++; 6189 } 6190 if (bracket) { 6191 if (isSPACE(s[-1])) { 6192 while (s < send) { 6193 char ch = *s++; 6194 if (!SPACE_OR_TAB(ch)) { 6195 *d = ch; 6196 break; 6197 } 6198 } 6199 } 6200 if (isIDFIRST_lazy_if(d,UTF)) { 6201 d++; 6202 if (UTF) { 6203 e = s; 6204 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') { 6205 e += UTF8SKIP(e); 6206 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e)) 6207 e += UTF8SKIP(e); 6208 } 6209 Copy(s, d, e - s, char); 6210 d += e - s; 6211 s = e; 6212 } 6213 else { 6214 while ((isALNUM(*s) || *s == ':') && d < e) 6215 *d++ = *s++; 6216 if (d >= e) 6217 Perl_croak(aTHX_ ident_too_long); 6218 } 6219 *d = '\0'; 6220 while (s < send && SPACE_OR_TAB(*s)) s++; 6221 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { 6222 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) { 6223 const char *brack = *s == '[' ? "[...]" : "{...}"; 6224 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 6225 "Ambiguous use of %c{%s%s} resolved to %c%s%s", 6226 funny, dest, brack, funny, dest, brack); 6227 } 6228 bracket++; 6229 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK); 6230 return s; 6231 } 6232 } 6233 /* Handle extended ${^Foo} variables 6234 * 1999-02-27 mjd-perl-patch@plover.com */ 6235 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */ 6236 && isALNUM(*s)) 6237 { 6238 d++; 6239 while (isALNUM(*s) && d < e) { 6240 *d++ = *s++; 6241 } 6242 if (d >= e) 6243 Perl_croak(aTHX_ ident_too_long); 6244 *d = '\0'; 6245 } 6246 if (*s == '}') { 6247 s++; 6248 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) 6249 PL_lex_state = LEX_INTERPEND; 6250 if (funny == '#') 6251 funny = '@'; 6252 if (PL_lex_state == LEX_NORMAL) { 6253 if (ckWARN(WARN_AMBIGUOUS) && 6254 (keyword(dest, d - dest) || get_cv(dest, FALSE))) 6255 { 6256 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 6257 "Ambiguous use of %c{%s} resolved to %c%s", 6258 funny, dest, funny, dest); 6259 } 6260 } 6261 } 6262 else { 6263 s = bracket; /* let the parser handle it */ 6264 *dest = '\0'; 6265 } 6266 } 6267 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s)) 6268 PL_lex_state = LEX_INTERPEND; 6269 return s; 6270 } 6271 6272 void 6273 Perl_pmflag(pTHX_ U32* pmfl, int ch) 6274 { 6275 if (ch == 'i') 6276 *pmfl |= PMf_FOLD; 6277 else if (ch == 'g') 6278 *pmfl |= PMf_GLOBAL; 6279 else if (ch == 'c') 6280 *pmfl |= PMf_CONTINUE; 6281 else if (ch == 'o') 6282 *pmfl |= PMf_KEEP; 6283 else if (ch == 'm') 6284 *pmfl |= PMf_MULTILINE; 6285 else if (ch == 's') 6286 *pmfl |= PMf_SINGLELINE; 6287 else if (ch == 'x') 6288 *pmfl |= PMf_EXTENDED; 6289 } 6290 6291 STATIC char * 6292 S_scan_pat(pTHX_ char *start, I32 type) 6293 { 6294 PMOP *pm; 6295 char *s; 6296 6297 s = scan_str(start,FALSE,FALSE); 6298 if (!s) 6299 Perl_croak(aTHX_ "Search pattern not terminated"); 6300 6301 pm = (PMOP*)newPMOP(type, 0); 6302 if (PL_multi_open == '?') 6303 pm->op_pmflags |= PMf_ONCE; 6304 if(type == OP_QR) { 6305 while (*s && strchr("iomsx", *s)) 6306 pmflag(&pm->op_pmflags,*s++); 6307 } 6308 else { 6309 while (*s && strchr("iogcmsx", *s)) 6310 pmflag(&pm->op_pmflags,*s++); 6311 } 6312 /* issue a warning if /c is specified,but /g is not */ 6313 if (ckWARN(WARN_REGEXP) && 6314 (pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) 6315 { 6316 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_without_g); 6317 } 6318 6319 pm->op_pmpermflags = pm->op_pmflags; 6320 6321 PL_lex_op = (OP*)pm; 6322 yylval.ival = OP_MATCH; 6323 return s; 6324 } 6325 6326 STATIC char * 6327 S_scan_subst(pTHX_ char *start) 6328 { 6329 register char *s; 6330 register PMOP *pm; 6331 I32 first_start; 6332 I32 es = 0; 6333 6334 yylval.ival = OP_NULL; 6335 6336 s = scan_str(start,FALSE,FALSE); 6337 6338 if (!s) 6339 Perl_croak(aTHX_ "Substitution pattern not terminated"); 6340 6341 if (s[-1] == PL_multi_open) 6342 s--; 6343 6344 first_start = PL_multi_start; 6345 s = scan_str(s,FALSE,FALSE); 6346 if (!s) { 6347 if (PL_lex_stuff) { 6348 SvREFCNT_dec(PL_lex_stuff); 6349 PL_lex_stuff = Nullsv; 6350 } 6351 Perl_croak(aTHX_ "Substitution replacement not terminated"); 6352 } 6353 PL_multi_start = first_start; /* so whole substitution is taken together */ 6354 6355 pm = (PMOP*)newPMOP(OP_SUBST, 0); 6356 while (*s) { 6357 if (*s == 'e') { 6358 s++; 6359 es++; 6360 } 6361 else if (strchr("iogcmsx", *s)) 6362 pmflag(&pm->op_pmflags,*s++); 6363 else 6364 break; 6365 } 6366 6367 /* /c is not meaningful with s/// */ 6368 if (ckWARN(WARN_REGEXP) && (pm->op_pmflags & PMf_CONTINUE)) 6369 { 6370 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_in_subst); 6371 } 6372 6373 if (es) { 6374 SV *repl; 6375 PL_sublex_info.super_bufptr = s; 6376 PL_sublex_info.super_bufend = PL_bufend; 6377 PL_multi_end = 0; 6378 pm->op_pmflags |= PMf_EVAL; 6379 repl = newSVpvn("",0); 6380 while (es-- > 0) 6381 sv_catpv(repl, es ? "eval " : "do "); 6382 sv_catpvn(repl, "{ ", 2); 6383 sv_catsv(repl, PL_lex_repl); 6384 sv_catpvn(repl, " };", 2); 6385 SvEVALED_on(repl); 6386 SvREFCNT_dec(PL_lex_repl); 6387 PL_lex_repl = repl; 6388 } 6389 6390 pm->op_pmpermflags = pm->op_pmflags; 6391 PL_lex_op = (OP*)pm; 6392 yylval.ival = OP_SUBST; 6393 return s; 6394 } 6395 6396 STATIC char * 6397 S_scan_trans(pTHX_ char *start) 6398 { 6399 register char* s; 6400 OP *o; 6401 short *tbl; 6402 I32 squash; 6403 I32 del; 6404 I32 complement; 6405 6406 yylval.ival = OP_NULL; 6407 6408 s = scan_str(start,FALSE,FALSE); 6409 if (!s) 6410 Perl_croak(aTHX_ "Transliteration pattern not terminated"); 6411 if (s[-1] == PL_multi_open) 6412 s--; 6413 6414 s = scan_str(s,FALSE,FALSE); 6415 if (!s) { 6416 if (PL_lex_stuff) { 6417 SvREFCNT_dec(PL_lex_stuff); 6418 PL_lex_stuff = Nullsv; 6419 } 6420 Perl_croak(aTHX_ "Transliteration replacement not terminated"); 6421 } 6422 6423 complement = del = squash = 0; 6424 while (strchr("cds", *s)) { 6425 if (*s == 'c') 6426 complement = OPpTRANS_COMPLEMENT; 6427 else if (*s == 'd') 6428 del = OPpTRANS_DELETE; 6429 else if (*s == 's') 6430 squash = OPpTRANS_SQUASH; 6431 s++; 6432 } 6433 6434 New(803, tbl, complement&&!del?258:256, short); 6435 o = newPVOP(OP_TRANS, 0, (char*)tbl); 6436 o->op_private = del|squash|complement| 6437 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)| 6438 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0); 6439 6440 PL_lex_op = o; 6441 yylval.ival = OP_TRANS; 6442 return s; 6443 } 6444 6445 STATIC char * 6446 S_scan_heredoc(pTHX_ register char *s) 6447 { 6448 SV *herewas; 6449 I32 op_type = OP_SCALAR; 6450 I32 len; 6451 SV *tmpstr; 6452 char term; 6453 register char *d; 6454 register char *e; 6455 char *peek; 6456 int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR)); 6457 6458 s += 2; 6459 d = PL_tokenbuf; 6460 e = PL_tokenbuf + sizeof PL_tokenbuf - 1; 6461 if (!outer) 6462 *d++ = '\n'; 6463 for (peek = s; SPACE_OR_TAB(*peek); peek++) ; 6464 if (*peek && strchr("`'\"",*peek)) { 6465 s = peek; 6466 term = *s++; 6467 s = delimcpy(d, e, s, PL_bufend, term, &len); 6468 d += len; 6469 if (s < PL_bufend) 6470 s++; 6471 } 6472 else { 6473 if (*s == '\\') 6474 s++, term = '\''; 6475 else 6476 term = '"'; 6477 if (!isALNUM_lazy_if(s,UTF)) 6478 deprecate_old("bare << to mean <<\"\""); 6479 for (; isALNUM_lazy_if(s,UTF); s++) { 6480 if (d < e) 6481 *d++ = *s; 6482 } 6483 } 6484 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1) 6485 Perl_croak(aTHX_ "Delimiter for here document is too long"); 6486 *d++ = '\n'; 6487 *d = '\0'; 6488 len = d - PL_tokenbuf; 6489 #ifndef PERL_STRICT_CR 6490 d = strchr(s, '\r'); 6491 if (d) { 6492 char *olds = s; 6493 s = d; 6494 while (s < PL_bufend) { 6495 if (*s == '\r') { 6496 *d++ = '\n'; 6497 if (*++s == '\n') 6498 s++; 6499 } 6500 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */ 6501 *d++ = *s++; 6502 s++; 6503 } 6504 else 6505 *d++ = *s++; 6506 } 6507 *d = '\0'; 6508 PL_bufend = d; 6509 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr)); 6510 s = olds; 6511 } 6512 #endif 6513 d = "\n"; 6514 if (outer || !(d=ninstr(s,PL_bufend,d,d+1))) 6515 herewas = newSVpvn(s,PL_bufend-s); 6516 else 6517 s--, herewas = newSVpvn(s,d-s); 6518 s += SvCUR(herewas); 6519 6520 tmpstr = NEWSV(87,79); 6521 sv_upgrade(tmpstr, SVt_PVIV); 6522 if (term == '\'') { 6523 op_type = OP_CONST; 6524 SvIVX(tmpstr) = -1; 6525 } 6526 else if (term == '`') { 6527 op_type = OP_BACKTICK; 6528 SvIVX(tmpstr) = '\\'; 6529 } 6530 6531 CLINE; 6532 PL_multi_start = CopLINE(PL_curcop); 6533 PL_multi_open = PL_multi_close = '<'; 6534 term = *PL_tokenbuf; 6535 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) { 6536 char *bufptr = PL_sublex_info.super_bufptr; 6537 char *bufend = PL_sublex_info.super_bufend; 6538 char *olds = s - SvCUR(herewas); 6539 s = strchr(bufptr, '\n'); 6540 if (!s) 6541 s = bufend; 6542 d = s; 6543 while (s < bufend && 6544 (*s != term || memNE(s,PL_tokenbuf,len)) ) { 6545 if (*s++ == '\n') 6546 CopLINE_inc(PL_curcop); 6547 } 6548 if (s >= bufend) { 6549 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 6550 missingterm(PL_tokenbuf); 6551 } 6552 sv_setpvn(herewas,bufptr,d-bufptr+1); 6553 sv_setpvn(tmpstr,d+1,s-d); 6554 s += len - 1; 6555 sv_catpvn(herewas,s,bufend-s); 6556 (void)strcpy(bufptr,SvPVX(herewas)); 6557 6558 s = olds; 6559 goto retval; 6560 } 6561 else if (!outer) { 6562 d = s; 6563 while (s < PL_bufend && 6564 (*s != term || memNE(s,PL_tokenbuf,len)) ) { 6565 if (*s++ == '\n') 6566 CopLINE_inc(PL_curcop); 6567 } 6568 if (s >= PL_bufend) { 6569 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 6570 missingterm(PL_tokenbuf); 6571 } 6572 sv_setpvn(tmpstr,d+1,s-d); 6573 s += len - 1; 6574 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */ 6575 6576 sv_catpvn(herewas,s,PL_bufend-s); 6577 sv_setsv(PL_linestr,herewas); 6578 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr); 6579 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 6580 PL_last_lop = PL_last_uni = Nullch; 6581 } 6582 else 6583 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */ 6584 while (s >= PL_bufend) { /* multiple line string? */ 6585 if (!outer || 6586 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) { 6587 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 6588 missingterm(PL_tokenbuf); 6589 } 6590 CopLINE_inc(PL_curcop); 6591 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 6592 PL_last_lop = PL_last_uni = Nullch; 6593 #ifndef PERL_STRICT_CR 6594 if (PL_bufend - PL_linestart >= 2) { 6595 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') || 6596 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r')) 6597 { 6598 PL_bufend[-2] = '\n'; 6599 PL_bufend--; 6600 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr)); 6601 } 6602 else if (PL_bufend[-1] == '\r') 6603 PL_bufend[-1] = '\n'; 6604 } 6605 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') 6606 PL_bufend[-1] = '\n'; 6607 #endif 6608 if (PERLDB_LINE && PL_curstash != PL_debstash) { 6609 SV *sv = NEWSV(88,0); 6610 6611 sv_upgrade(sv, SVt_PVMG); 6612 sv_setsv(sv,PL_linestr); 6613 (void)SvIOK_on(sv); 6614 SvIVX(sv) = 0; 6615 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv); 6616 } 6617 if (*s == term && memEQ(s,PL_tokenbuf,len)) { 6618 s = PL_bufend - 1; 6619 *s = ' '; 6620 sv_catsv(PL_linestr,herewas); 6621 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 6622 } 6623 else { 6624 s = PL_bufend; 6625 sv_catsv(tmpstr,PL_linestr); 6626 } 6627 } 6628 s++; 6629 retval: 6630 PL_multi_end = CopLINE(PL_curcop); 6631 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) { 6632 SvLEN_set(tmpstr, SvCUR(tmpstr) + 1); 6633 Renew(SvPVX(tmpstr), SvLEN(tmpstr), char); 6634 } 6635 SvREFCNT_dec(herewas); 6636 if (UTF && !IN_BYTES && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr))) 6637 SvUTF8_on(tmpstr); 6638 PL_lex_stuff = tmpstr; 6639 yylval.ival = op_type; 6640 return s; 6641 } 6642 6643 /* scan_inputsymbol 6644 takes: current position in input buffer 6645 returns: new position in input buffer 6646 side-effects: yylval and lex_op are set. 6647 6648 This code handles: 6649 6650 <> read from ARGV 6651 <FH> read from filehandle 6652 <pkg::FH> read from package qualified filehandle 6653 <pkg'FH> read from package qualified filehandle 6654 <$fh> read from filehandle in $fh 6655 <*.h> filename glob 6656 6657 */ 6658 6659 STATIC char * 6660 S_scan_inputsymbol(pTHX_ char *start) 6661 { 6662 register char *s = start; /* current position in buffer */ 6663 register char *d; 6664 register char *e; 6665 char *end; 6666 I32 len; 6667 6668 d = PL_tokenbuf; /* start of temp holding space */ 6669 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */ 6670 end = strchr(s, '\n'); 6671 if (!end) 6672 end = PL_bufend; 6673 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */ 6674 6675 /* die if we didn't have space for the contents of the <>, 6676 or if it didn't end, or if we see a newline 6677 */ 6678 6679 if (len >= sizeof PL_tokenbuf) 6680 Perl_croak(aTHX_ "Excessively long <> operator"); 6681 if (s >= end) 6682 Perl_croak(aTHX_ "Unterminated <> operator"); 6683 6684 s++; 6685 6686 /* check for <$fh> 6687 Remember, only scalar variables are interpreted as filehandles by 6688 this code. Anything more complex (e.g., <$fh{$num}>) will be 6689 treated as a glob() call. 6690 This code makes use of the fact that except for the $ at the front, 6691 a scalar variable and a filehandle look the same. 6692 */ 6693 if (*d == '$' && d[1]) d++; 6694 6695 /* allow <Pkg'VALUE> or <Pkg::VALUE> */ 6696 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':')) 6697 d++; 6698 6699 /* If we've tried to read what we allow filehandles to look like, and 6700 there's still text left, then it must be a glob() and not a getline. 6701 Use scan_str to pull out the stuff between the <> and treat it 6702 as nothing more than a string. 6703 */ 6704 6705 if (d - PL_tokenbuf != len) { 6706 yylval.ival = OP_GLOB; 6707 set_csh(); 6708 s = scan_str(start,FALSE,FALSE); 6709 if (!s) 6710 Perl_croak(aTHX_ "Glob not terminated"); 6711 return s; 6712 } 6713 else { 6714 bool readline_overriden = FALSE; 6715 GV *gv_readline = Nullgv; 6716 GV **gvp; 6717 /* we're in a filehandle read situation */ 6718 d = PL_tokenbuf; 6719 6720 /* turn <> into <ARGV> */ 6721 if (!len) 6722 (void)strcpy(d,"ARGV"); 6723 6724 /* Check whether readline() is overriden */ 6725 if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV)) 6726 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)) 6727 || 6728 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE)) 6729 && (gv_readline = *gvp) != (GV*)&PL_sv_undef 6730 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))) 6731 readline_overriden = TRUE; 6732 6733 /* if <$fh>, create the ops to turn the variable into a 6734 filehandle 6735 */ 6736 if (*d == '$') { 6737 I32 tmp; 6738 6739 /* try to find it in the pad for this block, otherwise find 6740 add symbol table ops 6741 */ 6742 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) { 6743 SV *namesv = AvARRAY(PL_comppad_name)[tmp]; 6744 if (SvFLAGS(namesv) & SVpad_OUR) { 6745 SV *sym = sv_2mortal(newSVpv(HvNAME(GvSTASH(namesv)),0)); 6746 sv_catpvn(sym, "::", 2); 6747 sv_catpv(sym, d+1); 6748 d = SvPVX(sym); 6749 goto intro_sym; 6750 } 6751 else { 6752 OP *o = newOP(OP_PADSV, 0); 6753 o->op_targ = tmp; 6754 PL_lex_op = readline_overriden 6755 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 6756 append_elem(OP_LIST, o, 6757 newCVREF(0, newGVOP(OP_GV,0,gv_readline)))) 6758 : (OP*)newUNOP(OP_READLINE, 0, o); 6759 } 6760 } 6761 else { 6762 GV *gv; 6763 ++d; 6764 intro_sym: 6765 gv = gv_fetchpv(d, 6766 (PL_in_eval 6767 ? (GV_ADDMULTI | GV_ADDINEVAL) 6768 : GV_ADDMULTI), 6769 SVt_PV); 6770 PL_lex_op = readline_overriden 6771 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 6772 append_elem(OP_LIST, 6773 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)), 6774 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 6775 : (OP*)newUNOP(OP_READLINE, 0, 6776 newUNOP(OP_RV2SV, 0, 6777 newGVOP(OP_GV, 0, gv))); 6778 } 6779 if (!readline_overriden) 6780 PL_lex_op->op_flags |= OPf_SPECIAL; 6781 /* we created the ops in PL_lex_op, so make yylval.ival a null op */ 6782 yylval.ival = OP_NULL; 6783 } 6784 6785 /* If it's none of the above, it must be a literal filehandle 6786 (<Foo::BAR> or <FOO>) so build a simple readline OP */ 6787 else { 6788 GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO); 6789 PL_lex_op = readline_overriden 6790 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 6791 append_elem(OP_LIST, 6792 newGVOP(OP_GV, 0, gv), 6793 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 6794 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv)); 6795 yylval.ival = OP_NULL; 6796 } 6797 } 6798 6799 return s; 6800 } 6801 6802 6803 /* scan_str 6804 takes: start position in buffer 6805 keep_quoted preserve \ on the embedded delimiter(s) 6806 keep_delims preserve the delimiters around the string 6807 returns: position to continue reading from buffer 6808 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and 6809 updates the read buffer. 6810 6811 This subroutine pulls a string out of the input. It is called for: 6812 q single quotes q(literal text) 6813 ' single quotes 'literal text' 6814 qq double quotes qq(interpolate $here please) 6815 " double quotes "interpolate $here please" 6816 qx backticks qx(/bin/ls -l) 6817 ` backticks `/bin/ls -l` 6818 qw quote words @EXPORT_OK = qw( func() $spam ) 6819 m// regexp match m/this/ 6820 s/// regexp substitute s/this/that/ 6821 tr/// string transliterate tr/this/that/ 6822 y/// string transliterate y/this/that/ 6823 ($*@) sub prototypes sub foo ($) 6824 (stuff) sub attr parameters sub foo : attr(stuff) 6825 <> readline or globs <FOO>, <>, <$fh>, or <*.c> 6826 6827 In most of these cases (all but <>, patterns and transliterate) 6828 yylex() calls scan_str(). m// makes yylex() call scan_pat() which 6829 calls scan_str(). s/// makes yylex() call scan_subst() which calls 6830 scan_str(). tr/// and y/// make yylex() call scan_trans() which 6831 calls scan_str(). 6832 6833 It skips whitespace before the string starts, and treats the first 6834 character as the delimiter. If the delimiter is one of ([{< then 6835 the corresponding "close" character )]}> is used as the closing 6836 delimiter. It allows quoting of delimiters, and if the string has 6837 balanced delimiters ([{<>}]) it allows nesting. 6838 6839 On success, the SV with the resulting string is put into lex_stuff or, 6840 if that is already non-NULL, into lex_repl. The second case occurs only 6841 when parsing the RHS of the special constructs s/// and tr/// (y///). 6842 For convenience, the terminating delimiter character is stuffed into 6843 SvIVX of the SV. 6844 */ 6845 6846 STATIC char * 6847 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) 6848 { 6849 SV *sv; /* scalar value: string */ 6850 char *tmps; /* temp string, used for delimiter matching */ 6851 register char *s = start; /* current position in the buffer */ 6852 register char term; /* terminating character */ 6853 register char *to; /* current position in the sv's data */ 6854 I32 brackets = 1; /* bracket nesting level */ 6855 bool has_utf8 = FALSE; /* is there any utf8 content? */ 6856 6857 /* skip space before the delimiter */ 6858 if (isSPACE(*s)) 6859 s = skipspace(s); 6860 6861 /* mark where we are, in case we need to report errors */ 6862 CLINE; 6863 6864 /* after skipping whitespace, the next character is the terminator */ 6865 term = *s; 6866 if (!UTF8_IS_INVARIANT((U8)term) && UTF) 6867 has_utf8 = TRUE; 6868 6869 /* mark where we are */ 6870 PL_multi_start = CopLINE(PL_curcop); 6871 PL_multi_open = term; 6872 6873 /* find corresponding closing delimiter */ 6874 if (term && (tmps = strchr("([{< )]}> )]}>",term))) 6875 term = tmps[5]; 6876 PL_multi_close = term; 6877 6878 /* create a new SV to hold the contents. 87 is leak category, I'm 6879 assuming. 79 is the SV's initial length. What a random number. */ 6880 sv = NEWSV(87,79); 6881 sv_upgrade(sv, SVt_PVIV); 6882 SvIVX(sv) = term; 6883 (void)SvPOK_only(sv); /* validate pointer */ 6884 6885 /* move past delimiter and try to read a complete string */ 6886 if (keep_delims) 6887 sv_catpvn(sv, s, 1); 6888 s++; 6889 for (;;) { 6890 /* extend sv if need be */ 6891 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); 6892 /* set 'to' to the next character in the sv's string */ 6893 to = SvPVX(sv)+SvCUR(sv); 6894 6895 /* if open delimiter is the close delimiter read unbridle */ 6896 if (PL_multi_open == PL_multi_close) { 6897 for (; s < PL_bufend; s++,to++) { 6898 /* embedded newlines increment the current line number */ 6899 if (*s == '\n' && !PL_rsfp) 6900 CopLINE_inc(PL_curcop); 6901 /* handle quoted delimiters */ 6902 if (*s == '\\' && s+1 < PL_bufend && term != '\\') { 6903 if (!keep_quoted && s[1] == term) 6904 s++; 6905 /* any other quotes are simply copied straight through */ 6906 else 6907 *to++ = *s++; 6908 } 6909 /* terminate when run out of buffer (the for() condition), or 6910 have found the terminator */ 6911 else if (*s == term) 6912 break; 6913 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) 6914 has_utf8 = TRUE; 6915 *to = *s; 6916 } 6917 } 6918 6919 /* if the terminator isn't the same as the start character (e.g., 6920 matched brackets), we have to allow more in the quoting, and 6921 be prepared for nested brackets. 6922 */ 6923 else { 6924 /* read until we run out of string, or we find the terminator */ 6925 for (; s < PL_bufend; s++,to++) { 6926 /* embedded newlines increment the line count */ 6927 if (*s == '\n' && !PL_rsfp) 6928 CopLINE_inc(PL_curcop); 6929 /* backslashes can escape the open or closing characters */ 6930 if (*s == '\\' && s+1 < PL_bufend) { 6931 if (!keep_quoted && 6932 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close))) 6933 s++; 6934 else 6935 *to++ = *s++; 6936 } 6937 /* allow nested opens and closes */ 6938 else if (*s == PL_multi_close && --brackets <= 0) 6939 break; 6940 else if (*s == PL_multi_open) 6941 brackets++; 6942 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) 6943 has_utf8 = TRUE; 6944 *to = *s; 6945 } 6946 } 6947 /* terminate the copied string and update the sv's end-of-string */ 6948 *to = '\0'; 6949 SvCUR_set(sv, to - SvPVX(sv)); 6950 6951 /* 6952 * this next chunk reads more into the buffer if we're not done yet 6953 */ 6954 6955 if (s < PL_bufend) 6956 break; /* handle case where we are done yet :-) */ 6957 6958 #ifndef PERL_STRICT_CR 6959 if (to - SvPVX(sv) >= 2) { 6960 if ((to[-2] == '\r' && to[-1] == '\n') || 6961 (to[-2] == '\n' && to[-1] == '\r')) 6962 { 6963 to[-2] = '\n'; 6964 to--; 6965 SvCUR_set(sv, to - SvPVX(sv)); 6966 } 6967 else if (to[-1] == '\r') 6968 to[-1] = '\n'; 6969 } 6970 else if (to - SvPVX(sv) == 1 && to[-1] == '\r') 6971 to[-1] = '\n'; 6972 #endif 6973 6974 /* if we're out of file, or a read fails, bail and reset the current 6975 line marker so we can report where the unterminated string began 6976 */ 6977 if (!PL_rsfp || 6978 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) { 6979 sv_free(sv); 6980 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 6981 return Nullch; 6982 } 6983 /* we read a line, so increment our line counter */ 6984 CopLINE_inc(PL_curcop); 6985 6986 /* update debugger info */ 6987 if (PERLDB_LINE && PL_curstash != PL_debstash) { 6988 SV *sv = NEWSV(88,0); 6989 6990 sv_upgrade(sv, SVt_PVMG); 6991 sv_setsv(sv,PL_linestr); 6992 (void)SvIOK_on(sv); 6993 SvIVX(sv) = 0; 6994 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv); 6995 } 6996 6997 /* having changed the buffer, we must update PL_bufend */ 6998 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 6999 PL_last_lop = PL_last_uni = Nullch; 7000 } 7001 7002 /* at this point, we have successfully read the delimited string */ 7003 7004 if (keep_delims) 7005 sv_catpvn(sv, s, 1); 7006 if (has_utf8) 7007 SvUTF8_on(sv); 7008 PL_multi_end = CopLINE(PL_curcop); 7009 s++; 7010 7011 /* if we allocated too much space, give some back */ 7012 if (SvCUR(sv) + 5 < SvLEN(sv)) { 7013 SvLEN_set(sv, SvCUR(sv) + 1); 7014 Renew(SvPVX(sv), SvLEN(sv), char); 7015 } 7016 7017 /* decide whether this is the first or second quoted string we've read 7018 for this op 7019 */ 7020 7021 if (PL_lex_stuff) 7022 PL_lex_repl = sv; 7023 else 7024 PL_lex_stuff = sv; 7025 return s; 7026 } 7027 7028 /* 7029 scan_num 7030 takes: pointer to position in buffer 7031 returns: pointer to new position in buffer 7032 side-effects: builds ops for the constant in yylval.op 7033 7034 Read a number in any of the formats that Perl accepts: 7035 7036 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12. 7037 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34 7038 0b[01](_?[01])* 7039 0[0-7](_?[0-7])* 7040 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* 7041 7042 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the 7043 thing it reads. 7044 7045 If it reads a number without a decimal point or an exponent, it will 7046 try converting the number to an integer and see if it can do so 7047 without loss of precision. 7048 */ 7049 7050 char * 7051 Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) 7052 { 7053 register char *s = start; /* current position in buffer */ 7054 register char *d; /* destination in temp buffer */ 7055 register char *e; /* end of temp buffer */ 7056 NV nv; /* number read, as a double */ 7057 SV *sv = Nullsv; /* place to put the converted number */ 7058 bool floatit; /* boolean: int or float? */ 7059 char *lastub = 0; /* position of last underbar */ 7060 static char number_too_long[] = "Number too long"; 7061 7062 /* We use the first character to decide what type of number this is */ 7063 7064 switch (*s) { 7065 default: 7066 Perl_croak(aTHX_ "panic: scan_num"); 7067 7068 /* if it starts with a 0, it could be an octal number, a decimal in 7069 0.13 disguise, or a hexadecimal number, or a binary number. */ 7070 case '0': 7071 { 7072 /* variables: 7073 u holds the "number so far" 7074 shift the power of 2 of the base 7075 (hex == 4, octal == 3, binary == 1) 7076 overflowed was the number more than we can hold? 7077 7078 Shift is used when we add a digit. It also serves as an "are 7079 we in octal/hex/binary?" indicator to disallow hex characters 7080 when in octal mode. 7081 */ 7082 NV n = 0.0; 7083 UV u = 0; 7084 I32 shift; 7085 bool overflowed = FALSE; 7086 static NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 }; 7087 static char* bases[5] = { "", "binary", "", "octal", 7088 "hexadecimal" }; 7089 static char* Bases[5] = { "", "Binary", "", "Octal", 7090 "Hexadecimal" }; 7091 static char *maxima[5] = { "", 7092 "0b11111111111111111111111111111111", 7093 "", 7094 "037777777777", 7095 "0xffffffff" }; 7096 char *base, *Base, *max; 7097 7098 /* check for hex */ 7099 if (s[1] == 'x') { 7100 shift = 4; 7101 s += 2; 7102 } else if (s[1] == 'b') { 7103 shift = 1; 7104 s += 2; 7105 } 7106 /* check for a decimal in disguise */ 7107 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E') 7108 goto decimal; 7109 /* so it must be octal */ 7110 else { 7111 shift = 3; 7112 s++; 7113 } 7114 7115 if (*s == '_') { 7116 if (ckWARN(WARN_SYNTAX)) 7117 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 7118 "Misplaced _ in number"); 7119 lastub = s++; 7120 } 7121 7122 base = bases[shift]; 7123 Base = Bases[shift]; 7124 max = maxima[shift]; 7125 7126 /* read the rest of the number */ 7127 for (;;) { 7128 /* x is used in the overflow test, 7129 b is the digit we're adding on. */ 7130 UV x, b; 7131 7132 switch (*s) { 7133 7134 /* if we don't mention it, we're done */ 7135 default: 7136 goto out; 7137 7138 /* _ are ignored -- but warned about if consecutive */ 7139 case '_': 7140 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1) 7141 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 7142 "Misplaced _ in number"); 7143 lastub = s++; 7144 break; 7145 7146 /* 8 and 9 are not octal */ 7147 case '8': case '9': 7148 if (shift == 3) 7149 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s)); 7150 /* FALL THROUGH */ 7151 7152 /* octal digits */ 7153 case '2': case '3': case '4': 7154 case '5': case '6': case '7': 7155 if (shift == 1) 7156 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s)); 7157 /* FALL THROUGH */ 7158 7159 case '0': case '1': 7160 b = *s++ & 15; /* ASCII digit -> value of digit */ 7161 goto digit; 7162 7163 /* hex digits */ 7164 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 7165 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 7166 /* make sure they said 0x */ 7167 if (shift != 4) 7168 goto out; 7169 b = (*s++ & 7) + 9; 7170 7171 /* Prepare to put the digit we have onto the end 7172 of the number so far. We check for overflows. 7173 */ 7174 7175 digit: 7176 if (!overflowed) { 7177 x = u << shift; /* make room for the digit */ 7178 7179 if ((x >> shift) != u 7180 && !(PL_hints & HINT_NEW_BINARY)) { 7181 overflowed = TRUE; 7182 n = (NV) u; 7183 if (ckWARN_d(WARN_OVERFLOW)) 7184 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW), 7185 "Integer overflow in %s number", 7186 base); 7187 } else 7188 u = x | b; /* add the digit to the end */ 7189 } 7190 if (overflowed) { 7191 n *= nvshift[shift]; 7192 /* If an NV has not enough bits in its 7193 * mantissa to represent an UV this summing of 7194 * small low-order numbers is a waste of time 7195 * (because the NV cannot preserve the 7196 * low-order bits anyway): we could just 7197 * remember when did we overflow and in the 7198 * end just multiply n by the right 7199 * amount. */ 7200 n += (NV) b; 7201 } 7202 break; 7203 } 7204 } 7205 7206 /* if we get here, we had success: make a scalar value from 7207 the number. 7208 */ 7209 out: 7210 7211 /* final misplaced underbar check */ 7212 if (s[-1] == '_') { 7213 if (ckWARN(WARN_SYNTAX)) 7214 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); 7215 } 7216 7217 sv = NEWSV(92,0); 7218 if (overflowed) { 7219 if (ckWARN(WARN_PORTABLE) && n > 4294967295.0) 7220 Perl_warner(aTHX_ packWARN(WARN_PORTABLE), 7221 "%s number > %s non-portable", 7222 Base, max); 7223 sv_setnv(sv, n); 7224 } 7225 else { 7226 #if UVSIZE > 4 7227 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff) 7228 Perl_warner(aTHX_ packWARN(WARN_PORTABLE), 7229 "%s number > %s non-portable", 7230 Base, max); 7231 #endif 7232 sv_setuv(sv, u); 7233 } 7234 if (PL_hints & HINT_NEW_BINARY) 7235 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL); 7236 } 7237 break; 7238 7239 /* 7240 handle decimal numbers. 7241 we're also sent here when we read a 0 as the first digit 7242 */ 7243 case '1': case '2': case '3': case '4': case '5': 7244 case '6': case '7': case '8': case '9': case '.': 7245 decimal: 7246 d = PL_tokenbuf; 7247 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */ 7248 floatit = FALSE; 7249 7250 /* read next group of digits and _ and copy into d */ 7251 while (isDIGIT(*s) || *s == '_') { 7252 /* skip underscores, checking for misplaced ones 7253 if -w is on 7254 */ 7255 if (*s == '_') { 7256 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1) 7257 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 7258 "Misplaced _ in number"); 7259 lastub = s++; 7260 } 7261 else { 7262 /* check for end of fixed-length buffer */ 7263 if (d >= e) 7264 Perl_croak(aTHX_ number_too_long); 7265 /* if we're ok, copy the character */ 7266 *d++ = *s++; 7267 } 7268 } 7269 7270 /* final misplaced underbar check */ 7271 if (lastub && s == lastub + 1) { 7272 if (ckWARN(WARN_SYNTAX)) 7273 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); 7274 } 7275 7276 /* read a decimal portion if there is one. avoid 7277 3..5 being interpreted as the number 3. followed 7278 by .5 7279 */ 7280 if (*s == '.' && s[1] != '.') { 7281 floatit = TRUE; 7282 *d++ = *s++; 7283 7284 if (*s == '_') { 7285 if (ckWARN(WARN_SYNTAX)) 7286 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 7287 "Misplaced _ in number"); 7288 lastub = s; 7289 } 7290 7291 /* copy, ignoring underbars, until we run out of digits. 7292 */ 7293 for (; isDIGIT(*s) || *s == '_'; s++) { 7294 /* fixed length buffer check */ 7295 if (d >= e) 7296 Perl_croak(aTHX_ number_too_long); 7297 if (*s == '_') { 7298 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1) 7299 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 7300 "Misplaced _ in number"); 7301 lastub = s; 7302 } 7303 else 7304 *d++ = *s; 7305 } 7306 /* fractional part ending in underbar? */ 7307 if (s[-1] == '_') { 7308 if (ckWARN(WARN_SYNTAX)) 7309 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 7310 "Misplaced _ in number"); 7311 } 7312 if (*s == '.' && isDIGIT(s[1])) { 7313 /* oops, it's really a v-string, but without the "v" */ 7314 s = start; 7315 goto vstring; 7316 } 7317 } 7318 7319 /* read exponent part, if present */ 7320 if (*s && strchr("eE",*s) && strchr("+-0123456789_", s[1])) { 7321 floatit = TRUE; 7322 s++; 7323 7324 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */ 7325 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */ 7326 7327 /* stray preinitial _ */ 7328 if (*s == '_') { 7329 if (ckWARN(WARN_SYNTAX)) 7330 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 7331 "Misplaced _ in number"); 7332 lastub = s++; 7333 } 7334 7335 /* allow positive or negative exponent */ 7336 if (*s == '+' || *s == '-') 7337 *d++ = *s++; 7338 7339 /* stray initial _ */ 7340 if (*s == '_') { 7341 if (ckWARN(WARN_SYNTAX)) 7342 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 7343 "Misplaced _ in number"); 7344 lastub = s++; 7345 } 7346 7347 /* read digits of exponent */ 7348 while (isDIGIT(*s) || *s == '_') { 7349 if (isDIGIT(*s)) { 7350 if (d >= e) 7351 Perl_croak(aTHX_ number_too_long); 7352 *d++ = *s++; 7353 } 7354 else { 7355 if (ckWARN(WARN_SYNTAX) && 7356 ((lastub && s == lastub + 1) || 7357 (!isDIGIT(s[1]) && s[1] != '_'))) 7358 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 7359 "Misplaced _ in number"); 7360 lastub = s++; 7361 } 7362 } 7363 } 7364 7365 7366 /* make an sv from the string */ 7367 sv = NEWSV(92,0); 7368 7369 /* 7370 We try to do an integer conversion first if no characters 7371 indicating "float" have been found. 7372 */ 7373 7374 if (!floatit) { 7375 UV uv; 7376 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv); 7377 7378 if (flags == IS_NUMBER_IN_UV) { 7379 if (uv <= IV_MAX) 7380 sv_setiv(sv, uv); /* Prefer IVs over UVs. */ 7381 else 7382 sv_setuv(sv, uv); 7383 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) { 7384 if (uv <= (UV) IV_MIN) 7385 sv_setiv(sv, -(IV)uv); 7386 else 7387 floatit = TRUE; 7388 } else 7389 floatit = TRUE; 7390 } 7391 if (floatit) { 7392 /* terminate the string */ 7393 *d = '\0'; 7394 nv = Atof(PL_tokenbuf); 7395 sv_setnv(sv, nv); 7396 } 7397 7398 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) : 7399 (PL_hints & HINT_NEW_INTEGER) ) 7400 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf, 7401 (floatit ? "float" : "integer"), 7402 sv, Nullsv, NULL); 7403 break; 7404 7405 /* if it starts with a v, it could be a v-string */ 7406 case 'v': 7407 vstring: 7408 sv = NEWSV(92,5); /* preallocate storage space */ 7409 s = new_vstring(s,sv); 7410 break; 7411 } 7412 7413 /* make the op for the constant and return */ 7414 7415 if (sv) 7416 lvalp->opval = newSVOP(OP_CONST, 0, sv); 7417 else 7418 lvalp->opval = Nullop; 7419 7420 return s; 7421 } 7422 7423 STATIC char * 7424 S_scan_formline(pTHX_ register char *s) 7425 { 7426 register char *eol; 7427 register char *t; 7428 SV *stuff = newSVpvn("",0); 7429 bool needargs = FALSE; 7430 7431 while (!needargs) { 7432 if (*s == '.' || *s == /*{*/'}') { 7433 /*SUPPRESS 530*/ 7434 #ifdef PERL_STRICT_CR 7435 for (t = s+1;SPACE_OR_TAB(*t); t++) ; 7436 #else 7437 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ; 7438 #endif 7439 if (*t == '\n' || t == PL_bufend) 7440 break; 7441 } 7442 if (PL_in_eval && !PL_rsfp) { 7443 eol = strchr(s,'\n'); 7444 if (!eol++) 7445 eol = PL_bufend; 7446 } 7447 else 7448 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 7449 if (*s != '#') { 7450 for (t = s; t < eol; t++) { 7451 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { 7452 needargs = FALSE; 7453 goto enough; /* ~~ must be first line in formline */ 7454 } 7455 if (*t == '@' || *t == '^') 7456 needargs = TRUE; 7457 } 7458 if (eol > s) { 7459 sv_catpvn(stuff, s, eol-s); 7460 #ifndef PERL_STRICT_CR 7461 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') { 7462 char *end = SvPVX(stuff) + SvCUR(stuff); 7463 end[-2] = '\n'; 7464 end[-1] = '\0'; 7465 SvCUR(stuff)--; 7466 } 7467 #endif 7468 } 7469 else 7470 break; 7471 } 7472 s = eol; 7473 if (PL_rsfp) { 7474 s = filter_gets(PL_linestr, PL_rsfp, 0); 7475 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr); 7476 PL_bufend = PL_bufptr + SvCUR(PL_linestr); 7477 PL_last_lop = PL_last_uni = Nullch; 7478 if (!s) { 7479 s = PL_bufptr; 7480 yyerror("Format not terminated"); 7481 break; 7482 } 7483 } 7484 incline(s); 7485 } 7486 enough: 7487 if (SvCUR(stuff)) { 7488 PL_expect = XTERM; 7489 if (needargs) { 7490 PL_lex_state = LEX_NORMAL; 7491 PL_nextval[PL_nexttoke].ival = 0; 7492 force_next(','); 7493 } 7494 else 7495 PL_lex_state = LEX_FORMLINE; 7496 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff); 7497 force_next(THING); 7498 PL_nextval[PL_nexttoke].ival = OP_FORMLINE; 7499 force_next(LSTOP); 7500 } 7501 else { 7502 SvREFCNT_dec(stuff); 7503 PL_lex_formbrack = 0; 7504 PL_bufptr = s; 7505 } 7506 return s; 7507 } 7508 7509 STATIC void 7510 S_set_csh(pTHX) 7511 { 7512 #ifdef CSH 7513 if (!PL_cshlen) 7514 PL_cshlen = strlen(PL_cshname); 7515 #endif 7516 } 7517 7518 I32 7519 Perl_start_subparse(pTHX_ I32 is_format, U32 flags) 7520 { 7521 I32 oldsavestack_ix = PL_savestack_ix; 7522 CV* outsidecv = PL_compcv; 7523 AV* comppadlist; 7524 7525 if (PL_compcv) { 7526 assert(SvTYPE(PL_compcv) == SVt_PVCV); 7527 } 7528 SAVEI32(PL_subline); 7529 save_item(PL_subname); 7530 SAVEI32(PL_padix); 7531 SAVECOMPPAD(); 7532 SAVESPTR(PL_comppad_name); 7533 SAVESPTR(PL_compcv); 7534 SAVEI32(PL_comppad_name_fill); 7535 SAVEI32(PL_min_intro_pending); 7536 SAVEI32(PL_max_intro_pending); 7537 SAVEI32(PL_pad_reset_pending); 7538 7539 PL_compcv = (CV*)NEWSV(1104,0); 7540 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV); 7541 CvFLAGS(PL_compcv) |= flags; 7542 7543 PL_comppad = newAV(); 7544 av_push(PL_comppad, Nullsv); 7545 PL_curpad = AvARRAY(PL_comppad); 7546 PL_comppad_name = newAV(); 7547 PL_comppad_name_fill = 0; 7548 PL_min_intro_pending = 0; 7549 PL_padix = 0; 7550 PL_subline = CopLINE(PL_curcop); 7551 #ifdef USE_5005THREADS 7552 av_store(PL_comppad_name, 0, newSVpvn("@_", 2)); 7553 PL_curpad[0] = (SV*)newAV(); 7554 SvPADMY_on(PL_curpad[0]); /* XXX Needed? */ 7555 #endif /* USE_5005THREADS */ 7556 7557 comppadlist = newAV(); 7558 AvREAL_off(comppadlist); 7559 av_store(comppadlist, 0, (SV*)PL_comppad_name); 7560 av_store(comppadlist, 1, (SV*)PL_comppad); 7561 7562 CvPADLIST(PL_compcv) = comppadlist; 7563 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv); 7564 #ifdef USE_5005THREADS 7565 CvOWNER(PL_compcv) = 0; 7566 New(666, CvMUTEXP(PL_compcv), 1, perl_mutex); 7567 MUTEX_INIT(CvMUTEXP(PL_compcv)); 7568 #endif /* USE_5005THREADS */ 7569 7570 return oldsavestack_ix; 7571 } 7572 7573 #ifdef __SC__ 7574 #pragma segment Perl_yylex 7575 #endif 7576 int 7577 Perl_yywarn(pTHX_ char *s) 7578 { 7579 PL_in_eval |= EVAL_WARNONLY; 7580 yyerror(s); 7581 PL_in_eval &= ~EVAL_WARNONLY; 7582 return 0; 7583 } 7584 7585 int 7586 Perl_yyerror(pTHX_ char *s) 7587 { 7588 char *where = NULL; 7589 char *context = NULL; 7590 int contlen = -1; 7591 SV *msg; 7592 7593 if (!yychar || (yychar == ';' && !PL_rsfp)) 7594 where = "at EOF"; 7595 else if (PL_bufptr > PL_oldoldbufptr && PL_bufptr - PL_oldoldbufptr < 200 && 7596 PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) { 7597 /* 7598 Only for NetWare: 7599 The code below is removed for NetWare because it abends/crashes on NetWare 7600 when the script has error such as not having the closing quotes like: 7601 if ($var eq "value) 7602 Checking of white spaces is anyway done in NetWare code. 7603 */ 7604 #ifndef NETWARE 7605 while (isSPACE(*PL_oldoldbufptr)) 7606 PL_oldoldbufptr++; 7607 #endif 7608 context = PL_oldoldbufptr; 7609 contlen = PL_bufptr - PL_oldoldbufptr; 7610 } 7611 else if (PL_bufptr > PL_oldbufptr && PL_bufptr - PL_oldbufptr < 200 && 7612 PL_oldbufptr != PL_bufptr) { 7613 /* 7614 Only for NetWare: 7615 The code below is removed for NetWare because it abends/crashes on NetWare 7616 when the script has error such as not having the closing quotes like: 7617 if ($var eq "value) 7618 Checking of white spaces is anyway done in NetWare code. 7619 */ 7620 #ifndef NETWARE 7621 while (isSPACE(*PL_oldbufptr)) 7622 PL_oldbufptr++; 7623 #endif 7624 context = PL_oldbufptr; 7625 contlen = PL_bufptr - PL_oldbufptr; 7626 } 7627 else if (yychar > 255) 7628 where = "next token ???"; 7629 #ifdef USE_PURE_BISON 7630 /* GNU Bison sets the value -2 */ 7631 else if (yychar == -2) { 7632 #else 7633 else if ((yychar & 127) == 127) { 7634 #endif 7635 if (PL_lex_state == LEX_NORMAL || 7636 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL)) 7637 where = "at end of line"; 7638 else if (PL_lex_inpat) 7639 where = "within pattern"; 7640 else 7641 where = "within string"; 7642 } 7643 else { 7644 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10)); 7645 if (yychar < 32) 7646 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); 7647 else if (isPRINT_LC(yychar)) 7648 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar); 7649 else 7650 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); 7651 where = SvPVX(where_sv); 7652 } 7653 msg = sv_2mortal(newSVpv(s, 0)); 7654 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ", 7655 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop)); 7656 if (context) 7657 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context); 7658 else 7659 Perl_sv_catpvf(aTHX_ msg, "%s\n", where); 7660 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) { 7661 Perl_sv_catpvf(aTHX_ msg, 7662 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n", 7663 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start); 7664 PL_multi_end = 0; 7665 } 7666 if (PL_in_eval & EVAL_WARNONLY) 7667 Perl_warn(aTHX_ "%"SVf, msg); 7668 else 7669 qerror(msg); 7670 if (PL_error_count >= 10) { 7671 if (PL_in_eval && SvCUR(ERRSV)) 7672 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n", 7673 ERRSV, OutCopFILE(PL_curcop)); 7674 else 7675 Perl_croak(aTHX_ "%s has too many errors.\n", 7676 OutCopFILE(PL_curcop)); 7677 } 7678 PL_in_my = 0; 7679 PL_in_my_stash = Nullhv; 7680 return 0; 7681 } 7682 #ifdef __SC__ 7683 #pragma segment Main 7684 #endif 7685 7686 STATIC char* 7687 S_swallow_bom(pTHX_ U8 *s) 7688 { 7689 STRLEN slen; 7690 slen = SvCUR(PL_linestr); 7691 switch (*s) { 7692 case 0xFF: 7693 if (s[1] == 0xFE) { 7694 /* UTF-16 little-endian */ 7695 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ 7696 Perl_croak(aTHX_ "Unsupported script encoding"); 7697 #ifndef PERL_NO_UTF16_FILTER 7698 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n")); 7699 s += 2; 7700 if (PL_bufend > (char*)s) { 7701 U8 *news; 7702 I32 newlen; 7703 7704 filter_add(utf16rev_textfilter, NULL); 7705 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8); 7706 PL_bufend = (char*)utf16_to_utf8_reversed(s, news, 7707 PL_bufend - (char*)s - 1, 7708 &newlen); 7709 Copy(news, s, newlen, U8); 7710 SvCUR_set(PL_linestr, newlen); 7711 PL_bufend = SvPVX(PL_linestr) + newlen; 7712 news[newlen++] = '\0'; 7713 Safefree(news); 7714 } 7715 #else 7716 Perl_croak(aTHX_ "Unsupported script encoding"); 7717 #endif 7718 } 7719 break; 7720 case 0xFE: 7721 if (s[1] == 0xFF) { /* UTF-16 big-endian */ 7722 #ifndef PERL_NO_UTF16_FILTER 7723 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n")); 7724 s += 2; 7725 if (PL_bufend > (char *)s) { 7726 U8 *news; 7727 I32 newlen; 7728 7729 filter_add(utf16_textfilter, NULL); 7730 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8); 7731 PL_bufend = (char*)utf16_to_utf8(s, news, 7732 PL_bufend - (char*)s, 7733 &newlen); 7734 Copy(news, s, newlen, U8); 7735 SvCUR_set(PL_linestr, newlen); 7736 PL_bufend = SvPVX(PL_linestr) + newlen; 7737 news[newlen++] = '\0'; 7738 Safefree(news); 7739 } 7740 #else 7741 Perl_croak(aTHX_ "Unsupported script encoding"); 7742 #endif 7743 } 7744 break; 7745 case 0xEF: 7746 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) { 7747 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n")); 7748 s += 3; /* UTF-8 */ 7749 } 7750 break; 7751 case 0: 7752 if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */ 7753 s[2] == 0xFE && s[3] == 0xFF) 7754 { 7755 Perl_croak(aTHX_ "Unsupported script encoding"); 7756 } 7757 } 7758 return (char*)s; 7759 } 7760 7761 /* 7762 * restore_rsfp 7763 * Restore a source filter. 7764 */ 7765 7766 static void 7767 restore_rsfp(pTHX_ void *f) 7768 { 7769 PerlIO *fp = (PerlIO*)f; 7770 7771 if (PL_rsfp == PerlIO_stdin()) 7772 PerlIO_clearerr(PL_rsfp); 7773 else if (PL_rsfp && (PL_rsfp != fp)) 7774 PerlIO_close(PL_rsfp); 7775 PL_rsfp = fp; 7776 } 7777 7778 #ifndef PERL_NO_UTF16_FILTER 7779 static I32 7780 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) 7781 { 7782 I32 count = FILTER_READ(idx+1, sv, maxlen); 7783 if (count) { 7784 U8* tmps; 7785 U8* tend; 7786 I32 newlen; 7787 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8); 7788 if (!*SvPV_nolen(sv)) 7789 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */ 7790 return count; 7791 7792 tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen); 7793 sv_usepvn(sv, (char*)tmps, tend - tmps); 7794 } 7795 return count; 7796 } 7797 7798 static I32 7799 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen) 7800 { 7801 I32 count = FILTER_READ(idx+1, sv, maxlen); 7802 if (count) { 7803 U8* tmps; 7804 U8* tend; 7805 I32 newlen; 7806 if (!*SvPV_nolen(sv)) 7807 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */ 7808 return count; 7809 7810 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8); 7811 tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen); 7812 sv_usepvn(sv, (char*)tmps, tend - tmps); 7813 } 7814 return count; 7815 } 7816 #endif 7817 7818