1 /* toke.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * 'It all comes from here, the stench and the peril.' --Frodo 13 * 14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"] 15 */ 16 17 /* 18 * This file is the lexer for Perl. It's closely linked to the 19 * parser, perly.y. 20 * 21 * The main routine is yylex(), which returns the next token. 22 */ 23 24 /* 25 =head1 Lexer interface 26 This is the lower layer of the Perl parser, managing characters and tokens. 27 28 =for apidoc AmnU|yy_parser *|PL_parser 29 30 Pointer to a structure encapsulating the state of the parsing operation 31 currently in progress. The pointer can be locally changed to perform 32 a nested parse without interfering with the state of an outer parse. 33 Individual members of C<PL_parser> have their own documentation. 34 35 =cut 36 */ 37 38 #include "EXTERN.h" 39 #define PERL_IN_TOKE_C 40 #include "perl.h" 41 #include "invlist_inline.h" 42 43 #define new_constant(a,b,c,d,e,f,g, h) \ 44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h) 45 46 #define pl_yylval (PL_parser->yylval) 47 48 /* XXX temporary backwards compatibility */ 49 #define PL_lex_brackets (PL_parser->lex_brackets) 50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets) 51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof) 52 #define PL_lex_brackstack (PL_parser->lex_brackstack) 53 #define PL_lex_casemods (PL_parser->lex_casemods) 54 #define PL_lex_casestack (PL_parser->lex_casestack) 55 #define PL_lex_dojoin (PL_parser->lex_dojoin) 56 #define PL_lex_formbrack (PL_parser->lex_formbrack) 57 #define PL_lex_inpat (PL_parser->lex_inpat) 58 #define PL_lex_inwhat (PL_parser->lex_inwhat) 59 #define PL_lex_op (PL_parser->lex_op) 60 #define PL_lex_repl (PL_parser->lex_repl) 61 #define PL_lex_starts (PL_parser->lex_starts) 62 #define PL_lex_stuff (PL_parser->lex_stuff) 63 #define PL_multi_start (PL_parser->multi_start) 64 #define PL_multi_open (PL_parser->multi_open) 65 #define PL_multi_close (PL_parser->multi_close) 66 #define PL_preambled (PL_parser->preambled) 67 #define PL_linestr (PL_parser->linestr) 68 #define PL_expect (PL_parser->expect) 69 #define PL_copline (PL_parser->copline) 70 #define PL_bufptr (PL_parser->bufptr) 71 #define PL_oldbufptr (PL_parser->oldbufptr) 72 #define PL_oldoldbufptr (PL_parser->oldoldbufptr) 73 #define PL_linestart (PL_parser->linestart) 74 #define PL_bufend (PL_parser->bufend) 75 #define PL_last_uni (PL_parser->last_uni) 76 #define PL_last_lop (PL_parser->last_lop) 77 #define PL_last_lop_op (PL_parser->last_lop_op) 78 #define PL_lex_state (PL_parser->lex_state) 79 #define PL_rsfp (PL_parser->rsfp) 80 #define PL_rsfp_filters (PL_parser->rsfp_filters) 81 #define PL_in_my (PL_parser->in_my) 82 #define PL_in_my_stash (PL_parser->in_my_stash) 83 #define PL_tokenbuf (PL_parser->tokenbuf) 84 #define PL_multi_end (PL_parser->multi_end) 85 #define PL_error_count (PL_parser->error_count) 86 87 # define PL_nexttoke (PL_parser->nexttoke) 88 # define PL_nexttype (PL_parser->nexttype) 89 # define PL_nextval (PL_parser->nextval) 90 91 92 #define SvEVALED(sv) \ 93 (SvTYPE(sv) >= SVt_PVNV \ 94 && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen) 95 96 static const char ident_too_long[] = "Identifier too long"; 97 static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'"; 98 99 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke] 100 101 #define XENUMMASK 0x3f 102 #define XFAKEEOF 0x40 103 #define XFAKEBRACK 0x80 104 105 #ifdef USE_UTF8_SCRIPTS 106 # define UTF cBOOL(!IN_BYTES) 107 #else 108 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8))) 109 #endif 110 111 /* The maximum number of characters preceding the unrecognized one to display */ 112 #define UNRECOGNIZED_PRECEDE_COUNT 10 113 114 /* In variables named $^X, these are the legal values for X. 115 * 1999-02-27 mjd-perl-patch@plover.com */ 116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x))) 117 118 /* Non-identifier plugin infix operators are allowed any printing character 119 * except spaces, digits, or identifier chars 120 */ 121 #define isPLUGINFIX(c) (c && !isSPACE(c) && !isDIGIT(c) && !isALPHA(c)) 122 /* Plugin infix operators may not begin with a quote symbol */ 123 #define isPLUGINFIX_FIRST(c) (isPLUGINFIX(c) && c != '"' && c != '\'') 124 125 #define PLUGINFIX_IS_ENABLED UNLIKELY(PL_infix_plugin != &Perl_infix_plugin_standard) 126 127 #define SPACE_OR_TAB(c) isBLANK_A(c) 128 129 #define HEXFP_PEEK(s) \ 130 (((s[0] == '.') && \ 131 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \ 132 isALPHA_FOLD_EQ(s[0], 'p')) 133 134 /* LEX_* are values for PL_lex_state, the state of the lexer. 135 * They are arranged oddly so that the guard on the switch statement 136 * can get by with a single comparison (if the compiler is smart enough). 137 * 138 * These values refer to the various states within a sublex parse, 139 * i.e. within a double quotish string 140 */ 141 142 /* #define LEX_NOTPARSING 11 is done in perl.h. */ 143 144 #define LEX_NORMAL 10 /* normal code (ie not within "...") */ 145 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */ 146 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */ 147 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */ 148 #define LEX_INTERPSTART 6 /* expecting the start of a $var */ 149 150 /* at end of code, eg "$x" followed by: */ 151 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */ 152 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */ 153 154 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of 155 string or after \E, $foo, etc */ 156 #define LEX_INTERPCONST 2 /* NOT USED */ 157 #define LEX_FORMLINE 1 /* expecting a format line */ 158 159 /* returned to yyl_try() to request it to retry the parse loop, expected to only 160 be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof() 161 can also return it. 162 163 yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1, 164 other token values are 258 or higher (see perly.h), so -1 should be 165 a safe value here. 166 */ 167 #define YYL_RETRY (-1) 168 169 #ifdef DEBUGGING 170 static const char* const lex_state_names[] = { 171 "KNOWNEXT", 172 "FORMLINE", 173 "INTERPCONST", 174 "INTERPCONCAT", 175 "INTERPENDMAYBE", 176 "INTERPEND", 177 "INTERPSTART", 178 "INTERPPUSH", 179 "INTERPCASEMOD", 180 "INTERPNORMAL", 181 "NORMAL" 182 }; 183 #endif 184 185 #include "keywords.h" 186 187 /* CLINE is a macro that ensures PL_copline has a sane value */ 188 189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline)) 190 191 /* 192 * Convenience functions to return different tokens and prime the 193 * lexer for the next token. They all take an argument. 194 * 195 * TOKEN : generic token (used for '(', DOLSHARP, etc) 196 * OPERATOR : generic operator 197 * AOPERATOR : assignment operator 198 * PREBLOCK : beginning the block after an if, while, foreach, ... 199 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref) 200 * PREREF : *EXPR where EXPR is not a simple identifier 201 * TERM : expression term 202 * POSTDEREF : postfix dereference (->$* ->@[...] etc.) 203 * LOOPX : loop exiting command (goto, last, dump, etc) 204 * FTST : file test operator 205 * FUN0 : zero-argument function 206 * FUN0OP : zero-argument function, with its op created in this file 207 * FUN1 : not used, except for not, which isn't a UNIOP 208 * BOop : bitwise or or xor 209 * BAop : bitwise and 210 * BCop : bitwise complement 211 * SHop : shift operator 212 * PWop : power operator 213 * PMop : pattern-matching operator 214 * Aop : addition-level operator 215 * AopNOASSIGN : addition-level operator that is never part of .= 216 * Mop : multiplication-level operator 217 * ChEop : chaining equality-testing operator 218 * NCEop : non-chaining comparison operator at equality precedence 219 * ChRop : chaining relational operator <= != gt 220 * NCRop : non-chaining relational operator isa 221 * 222 * Also see LOP and lop() below. 223 */ 224 225 #ifdef DEBUGGING /* Serve -DT. */ 226 # define REPORT(retval) tokereport((I32)retval, &pl_yylval) 227 #else 228 # define REPORT(retval) (retval) 229 #endif 230 231 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval)) 232 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval)) 233 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval)) 234 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval)) 235 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval)) 236 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval)) 237 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval)) 238 #define PHASERBLOCK(f) return (pl_yylval.ival=f, PL_expect = XBLOCK, PL_bufptr = s, REPORT((int)PHASER)) 239 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1])) 240 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \ 241 pl_yylval.ival=f, \ 242 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \ 243 REPORT((int)LOOPEX)) 244 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP)) 245 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0)) 246 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP)) 247 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1)) 248 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP)) 249 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP)) 250 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \ 251 REPORT(PERLY_TILDE) 252 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP)) 253 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP)) 254 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP)) 255 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP)) 256 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP)) 257 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP)) 258 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP)) 259 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP)) 260 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP)) 261 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP)) 262 263 /* This bit of chicanery makes a unary function followed by 264 * a parenthesis into a function with one argument, highest precedence. 265 * The UNIDOR macro is for unary functions that can be followed by the // 266 * operator (such as C<shift // 0>). 267 */ 268 #define UNI3(f,x,have_x) { \ 269 pl_yylval.ival = f; \ 270 if (have_x) PL_expect = x; \ 271 PL_bufptr = s; \ 272 PL_last_uni = PL_oldbufptr; \ 273 PL_last_lop_op = (f) < 0 ? -(f) : (f); \ 274 if (*s == '(') \ 275 return REPORT( (int)FUNC1 ); \ 276 s = skipspace(s); \ 277 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \ 278 } 279 #define UNI(f) UNI3(f,XTERM,1) 280 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1) 281 #define UNIPROTO(f,optional) { \ 282 if (optional) PL_last_uni = PL_oldbufptr; \ 283 OPERATOR(f); \ 284 } 285 286 #define UNIBRACK(f) UNI3(f,0,0) 287 288 /* return has special case parsing. 289 * 290 * List operators have low precedence. Functions have high precedence. 291 * Every built in, *except return*, if written with () around its arguments, is 292 * parsed as a function. Hence every other list built in: 293 * 294 * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9 295 * 429 296 * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5 297 * 639 298 * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()' 299 * Useless use of a constant (2) in void context at -e line 1. 300 * Useless use of a constant (4) in void context at -e line 1. 301 * 302 * $ 303 * 304 * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a 305 * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string. 306 * 307 * Whereas return: 308 * 309 * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()' 310 * 2 311 * 4 312 * 9 313 * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()' 314 * Useless use of a constant (2) in void context at -e line 1. 315 * Useless use of a constant (4) in void context at -e line 1. 316 * 9 317 * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()' 318 * Useless use of a constant (2) in void context at -e line 1. 319 * Useless use of a constant (4) in void context at -e line 1. 320 * 9 321 * $ 322 * 323 * and: 324 * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()' 325 * 2 326 * 4 327 * 6 328 * 329 * This last example is what we expect, but it's clearly inconsistent with how 330 * C<return(2,4,6) * 1.5> *ought* to behave, if the rules were consistently 331 * followed. 332 * 333 * 334 * Perl 3 attempted to be consistent: 335 * 336 * The rules are more consistent about where parens are needed and 337 * where they are not. In particular, unary operators and list operators now 338 * behave like functions if they're called like functions. 339 * 340 * However, the behaviour for return was reverted to the "old" parsing with 341 * patches 9-12: 342 * 343 * The construct 344 * return (1,2,3); 345 * did not do what was expected, since return was swallowing the 346 * parens in order to consider itself a function. The solution, 347 * since return never wants any trailing expression such as 348 * return (1,2,3) + 2; 349 * is to simply make return an exception to the paren-makes-a-function 350 * rule, and treat it the way it always was, so that it doesn't 351 * strip the parens. 352 * 353 * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with 354 * LOP(OP_RETURN, XTERM); 355 * 356 * and constructs such as 357 * 358 * return (Internals::V())[2] 359 * 360 * turn into syntax errors 361 */ 362 363 #define OLDLOP(f) \ 364 do { \ 365 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \ 366 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \ 367 pl_yylval.ival = (f); \ 368 PL_expect = XTERM; \ 369 PL_bufptr = s; \ 370 return (int)LSTOP; \ 371 } while(0) 372 373 #define COPLINE_INC_WITH_HERELINES \ 374 STMT_START { \ 375 CopLINE_inc(PL_curcop); \ 376 if (PL_parser->herelines) \ 377 CopLINE(PL_curcop) += PL_parser->herelines, \ 378 PL_parser->herelines = 0; \ 379 } STMT_END 380 /* Called after scan_str to update CopLINE(PL_curcop), but only when there 381 * is no sublex_push to follow. */ 382 #define COPLINE_SET_FROM_MULTI_END \ 383 STMT_START { \ 384 CopLINE_set(PL_curcop, PL_multi_end); \ 385 if (PL_multi_end != PL_multi_start) \ 386 PL_parser->herelines = 0; \ 387 } STMT_END 388 389 390 /* A file-local structure for passing around information about subroutines and 391 * related definable words */ 392 struct code { 393 SV *sv; 394 CV *cv; 395 GV *gv, **gvp; 396 OP *rv2cv_op; 397 PADOFFSET off; 398 bool lex; 399 }; 400 401 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE }; 402 403 #ifdef DEBUGGING 404 405 /* how to interpret the pl_yylval associated with the token */ 406 enum token_type { 407 TOKENTYPE_NONE, 408 TOKENTYPE_IVAL, 409 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */ 410 TOKENTYPE_PVAL, 411 TOKENTYPE_OPVAL 412 }; 413 414 #define DEBUG_TOKEN(Type, Name) \ 415 { Name, TOKENTYPE_##Type, #Name } 416 417 static struct debug_tokens { 418 const int token; 419 enum token_type type; 420 const char *name; 421 } const debug_tokens[] = 422 { 423 DEBUG_TOKEN (OPNUM, ADDOP), 424 DEBUG_TOKEN (NONE, ANDAND), 425 DEBUG_TOKEN (NONE, ANDOP), 426 DEBUG_TOKEN (NONE, ARROW), 427 DEBUG_TOKEN (OPNUM, ASSIGNOP), 428 DEBUG_TOKEN (OPNUM, BITANDOP), 429 DEBUG_TOKEN (OPNUM, BITOROP), 430 DEBUG_TOKEN (OPNUM, CHEQOP), 431 DEBUG_TOKEN (OPNUM, CHRELOP), 432 DEBUG_TOKEN (NONE, COLONATTR), 433 DEBUG_TOKEN (NONE, DOLSHARP), 434 DEBUG_TOKEN (NONE, DORDOR), 435 DEBUG_TOKEN (IVAL, DOTDOT), 436 DEBUG_TOKEN (NONE, FORMLBRACK), 437 DEBUG_TOKEN (NONE, FORMRBRACK), 438 DEBUG_TOKEN (OPNUM, FUNC), 439 DEBUG_TOKEN (OPNUM, FUNC0), 440 DEBUG_TOKEN (OPVAL, FUNC0OP), 441 DEBUG_TOKEN (OPVAL, FUNC0SUB), 442 DEBUG_TOKEN (OPNUM, FUNC1), 443 DEBUG_TOKEN (NONE, HASHBRACK), 444 DEBUG_TOKEN (IVAL, KW_CATCH), 445 DEBUG_TOKEN (IVAL, KW_CLASS), 446 DEBUG_TOKEN (IVAL, KW_CONTINUE), 447 DEBUG_TOKEN (IVAL, KW_DEFAULT), 448 DEBUG_TOKEN (IVAL, KW_DO), 449 DEBUG_TOKEN (IVAL, KW_ELSE), 450 DEBUG_TOKEN (IVAL, KW_ELSIF), 451 DEBUG_TOKEN (IVAL, KW_FIELD), 452 DEBUG_TOKEN (IVAL, KW_GIVEN), 453 DEBUG_TOKEN (IVAL, KW_FOR), 454 DEBUG_TOKEN (IVAL, KW_FORMAT), 455 DEBUG_TOKEN (IVAL, KW_IF), 456 DEBUG_TOKEN (IVAL, KW_LOCAL), 457 DEBUG_TOKEN (IVAL, KW_METHOD_anon), 458 DEBUG_TOKEN (IVAL, KW_METHOD_named), 459 DEBUG_TOKEN (IVAL, KW_MY), 460 DEBUG_TOKEN (IVAL, KW_PACKAGE), 461 DEBUG_TOKEN (IVAL, KW_REQUIRE), 462 DEBUG_TOKEN (IVAL, KW_SUB_anon), 463 DEBUG_TOKEN (IVAL, KW_SUB_anon_sig), 464 DEBUG_TOKEN (IVAL, KW_SUB_named), 465 DEBUG_TOKEN (IVAL, KW_SUB_named_sig), 466 DEBUG_TOKEN (IVAL, KW_TRY), 467 DEBUG_TOKEN (IVAL, KW_USE_or_NO), 468 DEBUG_TOKEN (IVAL, KW_UNLESS), 469 DEBUG_TOKEN (IVAL, KW_UNTIL), 470 DEBUG_TOKEN (IVAL, KW_WHEN), 471 DEBUG_TOKEN (IVAL, KW_WHILE), 472 DEBUG_TOKEN (OPVAL, LABEL), 473 DEBUG_TOKEN (OPNUM, LOOPEX), 474 DEBUG_TOKEN (OPNUM, LSTOP), 475 DEBUG_TOKEN (OPVAL, LSTOPSUB), 476 DEBUG_TOKEN (OPNUM, MATCHOP), 477 DEBUG_TOKEN (OPVAL, METHCALL), 478 DEBUG_TOKEN (OPVAL, METHCALL0), 479 DEBUG_TOKEN (OPNUM, MULOP), 480 DEBUG_TOKEN (OPNUM, NCEQOP), 481 DEBUG_TOKEN (OPNUM, NCRELOP), 482 DEBUG_TOKEN (NONE, NOAMP), 483 DEBUG_TOKEN (NONE, NOTOP), 484 DEBUG_TOKEN (IVAL, OROP), 485 DEBUG_TOKEN (IVAL, OROR), 486 DEBUG_TOKEN (IVAL, PERLY_AMPERSAND), 487 DEBUG_TOKEN (IVAL, PERLY_BRACE_CLOSE), 488 DEBUG_TOKEN (IVAL, PERLY_BRACE_OPEN), 489 DEBUG_TOKEN (IVAL, PERLY_BRACKET_CLOSE), 490 DEBUG_TOKEN (IVAL, PERLY_BRACKET_OPEN), 491 DEBUG_TOKEN (IVAL, PERLY_COLON), 492 DEBUG_TOKEN (IVAL, PERLY_COMMA), 493 DEBUG_TOKEN (IVAL, PERLY_DOT), 494 DEBUG_TOKEN (IVAL, PERLY_EQUAL_SIGN), 495 DEBUG_TOKEN (IVAL, PERLY_EXCLAMATION_MARK), 496 DEBUG_TOKEN (IVAL, PERLY_MINUS), 497 DEBUG_TOKEN (IVAL, PERLY_PAREN_OPEN), 498 DEBUG_TOKEN (IVAL, PERLY_PERCENT_SIGN), 499 DEBUG_TOKEN (IVAL, PERLY_PLUS), 500 DEBUG_TOKEN (IVAL, PERLY_QUESTION_MARK), 501 DEBUG_TOKEN (IVAL, PERLY_SEMICOLON), 502 DEBUG_TOKEN (IVAL, PERLY_SLASH), 503 DEBUG_TOKEN (IVAL, PERLY_SNAIL), 504 DEBUG_TOKEN (IVAL, PERLY_STAR), 505 DEBUG_TOKEN (IVAL, PERLY_TILDE), 506 DEBUG_TOKEN (OPVAL, PLUGEXPR), 507 DEBUG_TOKEN (OPVAL, PLUGSTMT), 508 DEBUG_TOKEN (PVAL, PLUGIN_ADD_OP), 509 DEBUG_TOKEN (PVAL, PLUGIN_ASSIGN_OP), 510 DEBUG_TOKEN (PVAL, PLUGIN_HIGH_OP), 511 DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_AND_OP), 512 DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_OR_OP), 513 DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_AND_LOW_OP), 514 DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_OR_LOW_OP), 515 DEBUG_TOKEN (PVAL, PLUGIN_LOW_OP), 516 DEBUG_TOKEN (PVAL, PLUGIN_MUL_OP), 517 DEBUG_TOKEN (PVAL, PLUGIN_POW_OP), 518 DEBUG_TOKEN (PVAL, PLUGIN_REL_OP), 519 DEBUG_TOKEN (OPVAL, PMFUNC), 520 DEBUG_TOKEN (NONE, POSTJOIN), 521 DEBUG_TOKEN (NONE, POSTDEC), 522 DEBUG_TOKEN (NONE, POSTINC), 523 DEBUG_TOKEN (OPNUM, POWOP), 524 DEBUG_TOKEN (NONE, PREDEC), 525 DEBUG_TOKEN (NONE, PREINC), 526 DEBUG_TOKEN (OPVAL, PRIVATEREF), 527 DEBUG_TOKEN (OPVAL, QWLIST), 528 DEBUG_TOKEN (NONE, REFGEN), 529 DEBUG_TOKEN (OPNUM, SHIFTOP), 530 DEBUG_TOKEN (NONE, SUBLEXEND), 531 DEBUG_TOKEN (NONE, SUBLEXSTART), 532 DEBUG_TOKEN (OPVAL, THING), 533 DEBUG_TOKEN (NONE, UMINUS), 534 DEBUG_TOKEN (OPNUM, UNIOP), 535 DEBUG_TOKEN (OPVAL, UNIOPSUB), 536 DEBUG_TOKEN (OPVAL, BAREWORD), 537 DEBUG_TOKEN (IVAL, YADAYADA), 538 { 0, TOKENTYPE_NONE, NULL } 539 }; 540 541 #undef DEBUG_TOKEN 542 543 /* dump the returned token in rv, plus any optional arg in pl_yylval */ 544 545 STATIC int 546 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) 547 { 548 PERL_ARGS_ASSERT_TOKEREPORT; 549 550 if (DEBUG_T_TEST) { 551 const char *name = NULL; 552 enum token_type type = TOKENTYPE_NONE; 553 const struct debug_tokens *p; 554 SV* const report = newSVpvs("<== "); 555 556 for (p = debug_tokens; p->token; p++) { 557 if (p->token == (int)rv) { 558 name = p->name; 559 type = p->type; 560 break; 561 } 562 } 563 if (name) 564 Perl_sv_catpv(aTHX_ report, name); 565 else if (isGRAPH(rv)) 566 { 567 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); 568 if ((char)rv == 'p') 569 sv_catpvs(report, " (pending identifier)"); 570 } 571 else if (!rv) 572 sv_catpvs(report, "EOF"); 573 else 574 Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv); 575 switch (type) { 576 case TOKENTYPE_NONE: 577 break; 578 case TOKENTYPE_IVAL: 579 Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival); 580 break; 581 case TOKENTYPE_OPNUM: 582 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)", 583 PL_op_name[lvalp->ival]); 584 break; 585 case TOKENTYPE_PVAL: 586 Perl_sv_catpvf(aTHX_ report, "(pval=%p)", lvalp->pval); 587 break; 588 case TOKENTYPE_OPVAL: 589 if (lvalp->opval) { 590 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)", 591 PL_op_name[lvalp->opval->op_type]); 592 if (lvalp->opval->op_type == OP_CONST) { 593 Perl_sv_catpvf(aTHX_ report, " %s", 594 SvPEEK(cSVOPx_sv(lvalp->opval))); 595 } 596 597 } 598 else 599 sv_catpvs(report, "(opval=null)"); 600 break; 601 } 602 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report)); 603 }; 604 return (int)rv; 605 } 606 607 608 /* print the buffer with suitable escapes */ 609 610 STATIC void 611 S_printbuf(pTHX_ const char *const fmt, const char *const s) 612 { 613 SV* const tmp = newSVpvs(""); 614 615 PERL_ARGS_ASSERT_PRINTBUF; 616 617 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */ 618 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60)); 619 GCC_DIAG_RESTORE_STMT; 620 SvREFCNT_dec(tmp); 621 } 622 623 #endif 624 625 /* 626 * S_ao 627 * 628 * This subroutine looks for an '=' next to the operator that has just been 629 * parsed and turns it into an ASSIGNOP if it finds one. 630 */ 631 632 STATIC int 633 S_ao(pTHX_ int toketype) 634 { 635 if (*PL_bufptr == '=') { 636 PL_bufptr++; 637 638 switch (toketype) { 639 case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break; 640 case OROR: pl_yylval.ival = OP_ORASSIGN; break; 641 case DORDOR: pl_yylval.ival = OP_DORASSIGN; break; 642 } 643 644 toketype = ASSIGNOP; 645 } 646 return REPORT(toketype); 647 } 648 649 /* 650 * S_no_op 651 * When Perl expects an operator and finds something else, no_op 652 * prints the warning. It always prints "<something> found where 653 * operator expected. It prints "Missing semicolon on previous line?" 654 * if the surprise occurs at the start of the line. "do you need to 655 * predeclare ..." is printed out for code like "sub bar; foo bar $x" 656 * where the compiler doesn't know if foo is a method call or a function. 657 * It prints "Missing operator before end of line" if there's nothing 658 * after the missing operator, or "... before <...>" if there is something 659 * after the missing operator. 660 * 661 * PL_bufptr is expected to point to the start of the thing that was found, 662 * and s after the next token or partial token. 663 */ 664 665 STATIC void 666 S_no_op(pTHX_ const char *const what, char *s) 667 { 668 char * const oldbp = PL_bufptr; 669 const bool is_first = (PL_oldbufptr == PL_linestart); 670 SV *message = sv_2mortal( newSVpvf( 671 PERL_DIAG_WARN_SYNTAX("%s found where operator expected"), 672 what 673 ) ); 674 675 PERL_ARGS_ASSERT_NO_OP; 676 677 if (!s) 678 s = oldbp; 679 else 680 PL_bufptr = s; 681 682 if (ckWARN_d(WARN_SYNTAX)) { 683 bool has_more = FALSE; 684 if (is_first) { 685 has_more = TRUE; 686 sv_catpvs(message, 687 " (Missing semicolon on previous line?)"); 688 } 689 else if (PL_oldoldbufptr) { 690 /* yyerror (via yywarn) would do this itself, so we should too */ 691 const char *t; 692 for (t = PL_oldoldbufptr; 693 t < PL_bufptr && isSPACE(*t); 694 t += UTF ? UTF8SKIP(t) : 1) 695 { 696 NOOP; 697 } 698 /* see if we can identify the cause of the warning */ 699 if (isIDFIRST_lazy_if_safe(t,PL_bufend,UTF)) 700 { 701 const char *t_start= t; 702 for ( ; 703 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':'); 704 t += UTF ? UTF8SKIP(t) : 1) 705 { 706 NOOP; 707 } 708 if (t < PL_bufptr && isSPACE(*t)) { 709 has_more = TRUE; 710 sv_catpvf( message, 711 " (Do you need to predeclare \"%" UTF8f "\"?)", 712 UTF8fARG(UTF, t - t_start, t_start)); 713 } 714 } 715 } 716 if (!has_more) { 717 const char *t= oldbp; 718 assert(s >= oldbp); 719 while (t < s && isSPACE(*t)) { 720 t += UTF ? UTF8SKIP(t) : 1; 721 } 722 723 sv_catpvf(message, 724 " (Missing operator before \"%" UTF8f "\"?)", 725 UTF8fARG(UTF, s - t, t)); 726 } 727 } 728 yywarn(SvPV_nolen(message), UTF ? SVf_UTF8 : 0); 729 PL_bufptr = oldbp; 730 } 731 732 /* 733 * S_missingterm 734 * Complain about missing quote/regexp/heredoc terminator. 735 * If it's called with NULL then it cauterizes the line buffer. 736 * If we're in a delimited string and the delimiter is a control 737 * character, it's reformatted into a two-char sequence like ^C. 738 * This is fatal. 739 */ 740 741 STATIC void 742 S_missingterm(pTHX_ char *s, STRLEN len) 743 { 744 char tmpbuf[UTF8_MAXBYTES + 1]; 745 char q; 746 bool uni = FALSE; 747 if (s) { 748 char * const nl = (char *) my_memrchr(s, '\n', len); 749 if (nl) { 750 *nl = '\0'; 751 len = nl - s; 752 } 753 uni = UTF; 754 } 755 else if (PL_multi_close < 32) { 756 *tmpbuf = '^'; 757 tmpbuf[1] = (char)toCTRL(PL_multi_close); 758 tmpbuf[2] = '\0'; 759 s = tmpbuf; 760 len = 2; 761 } 762 else { 763 if (! UTF && LIKELY(PL_multi_close < 256)) { 764 *tmpbuf = (char)PL_multi_close; 765 tmpbuf[1] = '\0'; 766 len = 1; 767 } 768 else { 769 char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close); 770 *end = '\0'; 771 len = end - tmpbuf; 772 uni = TRUE; 773 } 774 s = tmpbuf; 775 } 776 q = memchr(s, '"', len) ? '\'' : '"'; 777 Perl_croak(aTHX_ "Can't find string terminator %c%" UTF8f "%c" 778 " anywhere before EOF", q, UTF8fARG(uni, len, s), q); 779 } 780 781 #include "feature.h" 782 783 /* 784 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and 785 * utf16-to-utf8-reversed. 786 */ 787 788 #ifdef PERL_CR_FILTER 789 static void 790 strip_return(SV *sv) 791 { 792 const char *s = SvPVX_const(sv); 793 const char * const e = s + SvCUR(sv); 794 795 PERL_ARGS_ASSERT_STRIP_RETURN; 796 797 /* outer loop optimized to do nothing if there are no CR-LFs */ 798 while (s < e) { 799 if (*s++ == '\r' && *s == '\n') { 800 /* hit a CR-LF, need to copy the rest */ 801 char *d = s - 1; 802 *d++ = *s++; 803 while (s < e) { 804 if (*s == '\r' && s[1] == '\n') 805 s++; 806 *d++ = *s++; 807 } 808 SvCUR(sv) -= s - d; 809 return; 810 } 811 } 812 } 813 814 STATIC I32 815 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) 816 { 817 const I32 count = FILTER_READ(idx+1, sv, maxlen); 818 if (count > 0 && !maxlen) 819 strip_return(sv); 820 return count; 821 } 822 #endif 823 824 /* 825 =for apidoc lex_start 826 827 Creates and initialises a new lexer/parser state object, supplying 828 a context in which to lex and parse from a new source of Perl code. 829 A pointer to the new state object is placed in L</PL_parser>. An entry 830 is made on the save stack so that upon unwinding, the new state object 831 will be destroyed and the former value of L</PL_parser> will be restored. 832 Nothing else need be done to clean up the parsing context. 833 834 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if 835 non-null, provides a string (in SV form) containing code to be parsed. 836 A copy of the string is made, so subsequent modification of C<line> 837 does not affect parsing. C<rsfp>, if non-null, provides an input stream 838 from which code will be read to be parsed. If both are non-null, the 839 code in C<line> comes first and must consist of complete lines of input, 840 and C<rsfp> supplies the remainder of the source. 841 842 The C<flags> parameter is reserved for future use. Currently it is only 843 used by perl internally, so extensions should always pass zero. 844 845 =cut 846 */ 847 848 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it 849 can share filters with the current parser. 850 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the 851 caller, hence isn't owned by the parser, so shouldn't be closed on parser 852 destruction. This is used to handle the case of defaulting to reading the 853 script from the standard input because no filename was given on the command 854 line (without getting confused by situation where STDIN has been closed, so 855 the script handle is opened on fd 0) */ 856 857 void 858 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) 859 { 860 const char *s = NULL; 861 yy_parser *parser, *oparser; 862 863 if (flags && flags & ~LEX_START_FLAGS) 864 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start"); 865 866 /* create and initialise a parser */ 867 868 Newxz(parser, 1, yy_parser); 869 parser->old_parser = oparser = PL_parser; 870 PL_parser = parser; 871 872 parser->stack = NULL; 873 parser->stack_max1 = NULL; 874 parser->ps = NULL; 875 876 /* on scope exit, free this parser and restore any outer one */ 877 SAVEPARSER(parser); 878 parser->saved_curcop = PL_curcop; 879 880 /* initialise lexer state */ 881 882 parser->nexttoke = 0; 883 parser->error_count = oparser ? oparser->error_count : 0; 884 parser->copline = parser->preambling = NOLINE; 885 parser->lex_state = LEX_NORMAL; 886 parser->expect = XSTATE; 887 parser->rsfp = rsfp; 888 parser->recheck_utf8_validity = TRUE; 889 parser->rsfp_filters = 890 !(flags & LEX_START_SAME_FILTER) || !oparser 891 ? NULL 892 : MUTABLE_AV(SvREFCNT_inc( 893 oparser->rsfp_filters 894 ? oparser->rsfp_filters 895 : (oparser->rsfp_filters = newAV()) 896 )); 897 898 Newx(parser->lex_brackstack, 120, char); 899 Newx(parser->lex_casestack, 12, char); 900 *parser->lex_casestack = '\0'; 901 Newxz(parser->lex_shared, 1, LEXSHARED); 902 903 if (line) { 904 Size_t len; 905 const U8* first_bad_char_loc; 906 907 s = SvPV_const(line, len); 908 909 if ( SvUTF8(line) 910 && UNLIKELY(! is_utf8_string_loc((U8 *) s, 911 SvCUR(line), 912 &first_bad_char_loc))) 913 { 914 _force_out_malformed_utf8_message(first_bad_char_loc, 915 (U8 *) s + SvCUR(line), 916 0, 917 1 /* 1 means die */ ); 918 NOT_REACHED; /* NOTREACHED */ 919 } 920 921 parser->linestr = flags & LEX_START_COPIED 922 ? SvREFCNT_inc_simple_NN(line) 923 : newSVpvn_flags(s, len, SvUTF8(line)); 924 if (!rsfp) 925 sv_catpvs(parser->linestr, "\n;"); 926 } else { 927 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2); 928 } 929 930 parser->oldoldbufptr = 931 parser->oldbufptr = 932 parser->bufptr = 933 parser->linestart = SvPVX(parser->linestr); 934 parser->bufend = parser->bufptr + SvCUR(parser->linestr); 935 parser->last_lop = parser->last_uni = NULL; 936 937 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES 938 |LEX_DONT_CLOSE_RSFP)); 939 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES 940 |LEX_DONT_CLOSE_RSFP)); 941 942 parser->in_pod = parser->filtered = 0; 943 } 944 945 946 /* delete a parser object */ 947 948 void 949 Perl_parser_free(pTHX_ const yy_parser *parser) 950 { 951 PERL_ARGS_ASSERT_PARSER_FREE; 952 953 PL_curcop = parser->saved_curcop; 954 SvREFCNT_dec(parser->linestr); 955 956 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 957 PerlIO_clearerr(parser->rsfp); 958 else if (parser->rsfp && (!parser->old_parser 959 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp))) 960 PerlIO_close(parser->rsfp); 961 SvREFCNT_dec(parser->rsfp_filters); 962 SvREFCNT_dec(parser->lex_stuff); 963 SvREFCNT_dec(parser->lex_sub_repl); 964 965 Safefree(parser->lex_brackstack); 966 Safefree(parser->lex_casestack); 967 Safefree(parser->lex_shared); 968 PL_parser = parser->old_parser; 969 Safefree(parser); 970 } 971 972 void 973 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab) 974 { 975 I32 nexttoke = parser->nexttoke; 976 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS; 977 while (nexttoke--) { 978 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff) 979 && parser->nextval[nexttoke].opval 980 && parser->nextval[nexttoke].opval->op_slabbed 981 && OpSLAB(parser->nextval[nexttoke].opval) == slab) { 982 op_free(parser->nextval[nexttoke].opval); 983 parser->nextval[nexttoke].opval = NULL; 984 } 985 } 986 } 987 988 989 /* 990 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr 991 992 Buffer scalar containing the chunk currently under consideration of the 993 text currently being lexed. This is always a plain string scalar (for 994 which C<SvPOK> is true). It is not intended to be used as a scalar by 995 normal scalar means; instead refer to the buffer directly by the pointer 996 variables described below. 997 998 The lexer maintains various C<char*> pointers to things in the 999 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever 1000 reallocated, all of these pointers must be updated. Don't attempt to 1001 do this manually, but rather use L</lex_grow_linestr> if you need to 1002 reallocate the buffer. 1003 1004 The content of the text chunk in the buffer is commonly exactly one 1005 complete line of input, up to and including a newline terminator, 1006 but there are situations where it is otherwise. The octets of the 1007 buffer may be intended to be interpreted as either UTF-8 or Latin-1. 1008 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8> 1009 flag on this scalar, which may disagree with it. 1010 1011 For direct examination of the buffer, the variable 1012 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current 1013 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use 1014 of these pointers is usually preferable to examination of the scalar 1015 through normal scalar means. 1016 1017 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend 1018 1019 Direct pointer to the end of the chunk of text currently being lexed, the 1020 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr) 1021 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is 1022 always located at the end of the buffer, and does not count as part of 1023 the buffer's contents. 1024 1025 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr 1026 1027 Points to the current position of lexing inside the lexer buffer. 1028 Characters around this point may be freely examined, within 1029 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and 1030 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be 1031 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>. 1032 1033 Lexing code (whether in the Perl core or not) moves this pointer past 1034 the characters that it consumes. It is also expected to perform some 1035 bookkeeping whenever a newline character is consumed. This movement 1036 can be more conveniently performed by the function L</lex_read_to>, 1037 which handles newlines appropriately. 1038 1039 Interpretation of the buffer's octets can be abstracted out by 1040 using the slightly higher-level functions L</lex_peek_unichar> and 1041 L</lex_read_unichar>. 1042 1043 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart 1044 1045 Points to the start of the current line inside the lexer buffer. 1046 This is useful for indicating at which column an error occurred, and 1047 not much else. This must be updated by any lexing code that consumes 1048 a newline; the function L</lex_read_to> handles this detail. 1049 1050 =cut 1051 */ 1052 1053 /* 1054 =for apidoc lex_bufutf8 1055 1056 Indicates whether the octets in the lexer buffer 1057 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding 1058 of Unicode characters. If not, they should be interpreted as Latin-1 1059 characters. This is analogous to the C<SvUTF8> flag for scalars. 1060 1061 In UTF-8 mode, it is not guaranteed that the lexer buffer actually 1062 contains valid UTF-8. Lexing code must be robust in the face of invalid 1063 encoding. 1064 1065 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar 1066 is significant, but not the whole story regarding the input character 1067 encoding. Normally, when a file is being read, the scalar contains octets 1068 and its C<SvUTF8> flag is off, but the octets should be interpreted as 1069 UTF-8 if the C<use utf8> pragma is in effect. During a string eval, 1070 however, the scalar may have the C<SvUTF8> flag on, and in this case its 1071 octets should be interpreted as UTF-8 unless the C<use bytes> pragma 1072 is in effect. This logic may change in the future; use this function 1073 instead of implementing the logic yourself. 1074 1075 =cut 1076 */ 1077 1078 bool 1079 Perl_lex_bufutf8(pTHX) 1080 { 1081 return UTF; 1082 } 1083 1084 /* 1085 =for apidoc lex_grow_linestr 1086 1087 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate 1088 at least C<len> octets (including terminating C<NUL>). Returns a 1089 pointer to the reallocated buffer. This is necessary before making 1090 any direct modification of the buffer that would increase its length. 1091 L</lex_stuff_pvn> provides a more convenient way to insert text into 1092 the buffer. 1093 1094 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>; 1095 this function updates all of the lexer's variables that point directly 1096 into the buffer. 1097 1098 =cut 1099 */ 1100 1101 char * 1102 Perl_lex_grow_linestr(pTHX_ STRLEN len) 1103 { 1104 SV *linestr; 1105 char *buf; 1106 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 1107 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos; 1108 bool current; 1109 1110 linestr = PL_parser->linestr; 1111 buf = SvPVX(linestr); 1112 if (len <= SvLEN(linestr)) 1113 return buf; 1114 1115 /* Is the lex_shared linestr SV the same as the current linestr SV? 1116 * Only in this case does re_eval_start need adjusting, since it 1117 * points within lex_shared->ls_linestr's buffer */ 1118 current = ( !PL_parser->lex_shared->ls_linestr 1119 || linestr == PL_parser->lex_shared->ls_linestr); 1120 1121 bufend_pos = PL_parser->bufend - buf; 1122 bufptr_pos = PL_parser->bufptr - buf; 1123 oldbufptr_pos = PL_parser->oldbufptr - buf; 1124 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 1125 linestart_pos = PL_parser->linestart - buf; 1126 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 1127 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 1128 re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ? 1129 PL_parser->lex_shared->re_eval_start - buf : 0; 1130 1131 buf = sv_grow(linestr, len); 1132 1133 PL_parser->bufend = buf + bufend_pos; 1134 PL_parser->bufptr = buf + bufptr_pos; 1135 PL_parser->oldbufptr = buf + oldbufptr_pos; 1136 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 1137 PL_parser->linestart = buf + linestart_pos; 1138 if (PL_parser->last_uni) 1139 PL_parser->last_uni = buf + last_uni_pos; 1140 if (PL_parser->last_lop) 1141 PL_parser->last_lop = buf + last_lop_pos; 1142 if (current && PL_parser->lex_shared->re_eval_start) 1143 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos; 1144 return buf; 1145 } 1146 1147 /* 1148 =for apidoc lex_stuff_pvn 1149 1150 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1151 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1152 reallocating the buffer if necessary. This means that lexing code that 1153 runs later will see the characters as if they had appeared in the input. 1154 It is not recommended to do this as part of normal parsing, and most 1155 uses of this facility run the risk of the inserted characters being 1156 interpreted in an unintended manner. 1157 1158 The string to be inserted is represented by C<len> octets starting 1159 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1, 1160 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>. 1161 The characters are recoded for the lexer buffer, according to how the 1162 buffer is currently being interpreted (L</lex_bufutf8>). If a string 1163 to be inserted is available as a Perl scalar, the L</lex_stuff_sv> 1164 function is more convenient. 1165 1166 =for apidoc Amnh||LEX_STUFF_UTF8 1167 1168 =cut 1169 */ 1170 1171 void 1172 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) 1173 { 1174 char *bufptr; 1175 PERL_ARGS_ASSERT_LEX_STUFF_PVN; 1176 if (flags & ~(LEX_STUFF_UTF8)) 1177 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn"); 1178 if (UTF) { 1179 if (flags & LEX_STUFF_UTF8) { 1180 goto plain_copy; 1181 } else { 1182 STRLEN highhalf = variant_under_utf8_count((U8 *) pv, 1183 (U8 *) pv + len); 1184 const char *p, *e = pv+len;; 1185 if (!highhalf) 1186 goto plain_copy; 1187 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf); 1188 bufptr = PL_parser->bufptr; 1189 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char); 1190 SvCUR_set(PL_parser->linestr, 1191 SvCUR(PL_parser->linestr) + len+highhalf); 1192 PL_parser->bufend += len+highhalf; 1193 for (p = pv; p != e; p++) { 1194 append_utf8_from_native_byte(*p, (U8 **) &bufptr); 1195 } 1196 } 1197 } else { 1198 if (flags & LEX_STUFF_UTF8) { 1199 STRLEN highhalf = 0; 1200 const char *p, *e = pv+len; 1201 for (p = pv; p != e; p++) { 1202 U8 c = (U8)*p; 1203 if (UTF8_IS_ABOVE_LATIN1(c)) { 1204 Perl_croak(aTHX_ "Lexing code attempted to stuff " 1205 "non-Latin-1 character into Latin-1 input"); 1206 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) { 1207 p++; 1208 highhalf++; 1209 } else assert(UTF8_IS_INVARIANT(c)); 1210 } 1211 if (!highhalf) 1212 goto plain_copy; 1213 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf); 1214 bufptr = PL_parser->bufptr; 1215 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char); 1216 SvCUR_set(PL_parser->linestr, 1217 SvCUR(PL_parser->linestr) + len-highhalf); 1218 PL_parser->bufend += len-highhalf; 1219 p = pv; 1220 while (p < e) { 1221 if (UTF8_IS_INVARIANT(*p)) { 1222 *bufptr++ = *p; 1223 p++; 1224 } 1225 else { 1226 assert(p < e -1 ); 1227 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1)); 1228 p += 2; 1229 } 1230 } 1231 } else { 1232 plain_copy: 1233 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len); 1234 bufptr = PL_parser->bufptr; 1235 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char); 1236 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len); 1237 PL_parser->bufend += len; 1238 Copy(pv, bufptr, len, char); 1239 } 1240 } 1241 } 1242 1243 /* 1244 =for apidoc lex_stuff_pv 1245 1246 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1247 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1248 reallocating the buffer if necessary. This means that lexing code that 1249 runs later will see the characters as if they had appeared in the input. 1250 It is not recommended to do this as part of normal parsing, and most 1251 uses of this facility run the risk of the inserted characters being 1252 interpreted in an unintended manner. 1253 1254 The string to be inserted is represented by octets starting at C<pv> 1255 and continuing to the first nul. These octets are interpreted as either 1256 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set 1257 in C<flags>. The characters are recoded for the lexer buffer, according 1258 to how the buffer is currently being interpreted (L</lex_bufutf8>). 1259 If it is not convenient to nul-terminate a string to be inserted, the 1260 L</lex_stuff_pvn> function is more appropriate. 1261 1262 =cut 1263 */ 1264 1265 void 1266 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags) 1267 { 1268 PERL_ARGS_ASSERT_LEX_STUFF_PV; 1269 lex_stuff_pvn(pv, strlen(pv), flags); 1270 } 1271 1272 /* 1273 =for apidoc lex_stuff_sv 1274 1275 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1276 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1277 reallocating the buffer if necessary. This means that lexing code that 1278 runs later will see the characters as if they had appeared in the input. 1279 It is not recommended to do this as part of normal parsing, and most 1280 uses of this facility run the risk of the inserted characters being 1281 interpreted in an unintended manner. 1282 1283 The string to be inserted is the string value of C<sv>. The characters 1284 are recoded for the lexer buffer, according to how the buffer is currently 1285 being interpreted (L</lex_bufutf8>). If a string to be inserted is 1286 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the 1287 need to construct a scalar. 1288 1289 =cut 1290 */ 1291 1292 void 1293 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags) 1294 { 1295 char *pv; 1296 STRLEN len; 1297 PERL_ARGS_ASSERT_LEX_STUFF_SV; 1298 if (flags) 1299 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv"); 1300 pv = SvPV(sv, len); 1301 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0)); 1302 } 1303 1304 /* 1305 =for apidoc lex_unstuff 1306 1307 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to 1308 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened. 1309 This hides the discarded text from any lexing code that runs later, 1310 as if the text had never appeared. 1311 1312 This is not the normal way to consume lexed text. For that, use 1313 L</lex_read_to>. 1314 1315 =cut 1316 */ 1317 1318 void 1319 Perl_lex_unstuff(pTHX_ char *ptr) 1320 { 1321 char *buf, *bufend; 1322 STRLEN unstuff_len; 1323 PERL_ARGS_ASSERT_LEX_UNSTUFF; 1324 buf = PL_parser->bufptr; 1325 if (ptr < buf) 1326 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1327 if (ptr == buf) 1328 return; 1329 bufend = PL_parser->bufend; 1330 if (ptr > bufend) 1331 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1332 unstuff_len = ptr - buf; 1333 Move(ptr, buf, bufend+1-ptr, char); 1334 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len); 1335 PL_parser->bufend = bufend - unstuff_len; 1336 } 1337 1338 /* 1339 =for apidoc lex_read_to 1340 1341 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up 1342 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>, 1343 performing the correct bookkeeping whenever a newline character is passed. 1344 This is the normal way to consume lexed text. 1345 1346 Interpretation of the buffer's octets can be abstracted out by 1347 using the slightly higher-level functions L</lex_peek_unichar> and 1348 L</lex_read_unichar>. 1349 1350 =cut 1351 */ 1352 1353 void 1354 Perl_lex_read_to(pTHX_ char *ptr) 1355 { 1356 char *s; 1357 PERL_ARGS_ASSERT_LEX_READ_TO; 1358 s = PL_parser->bufptr; 1359 if (ptr < s || ptr > PL_parser->bufend) 1360 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to"); 1361 for (; s != ptr; s++) 1362 if (*s == '\n') { 1363 COPLINE_INC_WITH_HERELINES; 1364 PL_parser->linestart = s+1; 1365 } 1366 PL_parser->bufptr = ptr; 1367 } 1368 1369 /* 1370 =for apidoc lex_discard_to 1371 1372 Discards the first part of the L</PL_parser-E<gt>linestr> buffer, 1373 up to C<ptr>. The remaining content of the buffer will be moved, and 1374 all pointers into the buffer updated appropriately. C<ptr> must not 1375 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>: 1376 it is not permitted to discard text that has yet to be lexed. 1377 1378 Normally it is not necessarily to do this directly, because it suffices to 1379 use the implicit discarding behaviour of L</lex_next_chunk> and things 1380 based on it. However, if a token stretches across multiple lines, 1381 and the lexing code has kept multiple lines of text in the buffer for 1382 that purpose, then after completion of the token it would be wise to 1383 explicitly discard the now-unneeded earlier lines, to avoid future 1384 multi-line tokens growing the buffer without bound. 1385 1386 =cut 1387 */ 1388 1389 void 1390 Perl_lex_discard_to(pTHX_ char *ptr) 1391 { 1392 char *buf; 1393 STRLEN discard_len; 1394 PERL_ARGS_ASSERT_LEX_DISCARD_TO; 1395 buf = SvPVX(PL_parser->linestr); 1396 if (ptr < buf) 1397 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1398 if (ptr == buf) 1399 return; 1400 if (ptr > PL_parser->bufptr) 1401 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1402 discard_len = ptr - buf; 1403 if (PL_parser->oldbufptr < ptr) 1404 PL_parser->oldbufptr = ptr; 1405 if (PL_parser->oldoldbufptr < ptr) 1406 PL_parser->oldoldbufptr = ptr; 1407 if (PL_parser->last_uni && PL_parser->last_uni < ptr) 1408 PL_parser->last_uni = NULL; 1409 if (PL_parser->last_lop && PL_parser->last_lop < ptr) 1410 PL_parser->last_lop = NULL; 1411 Move(ptr, buf, PL_parser->bufend+1-ptr, char); 1412 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len); 1413 PL_parser->bufend -= discard_len; 1414 PL_parser->bufptr -= discard_len; 1415 PL_parser->oldbufptr -= discard_len; 1416 PL_parser->oldoldbufptr -= discard_len; 1417 if (PL_parser->last_uni) 1418 PL_parser->last_uni -= discard_len; 1419 if (PL_parser->last_lop) 1420 PL_parser->last_lop -= discard_len; 1421 } 1422 1423 void 1424 Perl_notify_parser_that_changed_to_utf8(pTHX) 1425 { 1426 /* Called when $^H is changed to indicate that HINT_UTF8 has changed from 1427 * off to on. At compile time, this has the effect of entering a 'use 1428 * utf8' section. This means that any input was not previously checked for 1429 * UTF-8 (because it was off), but now we do need to check it, or our 1430 * assumptions about the input being sane could be wrong, and we could 1431 * segfault. This routine just sets a flag so that the next time we look 1432 * at the input we do the well-formed UTF-8 check. If we aren't in the 1433 * proper phase, there may not be a parser object, but if there is, setting 1434 * the flag is harmless */ 1435 1436 if (PL_parser) { 1437 PL_parser->recheck_utf8_validity = TRUE; 1438 } 1439 } 1440 1441 /* 1442 =for apidoc lex_next_chunk 1443 1444 Reads in the next chunk of text to be lexed, appending it to 1445 L</PL_parser-E<gt>linestr>. This should be called when lexing code has 1446 looked to the end of the current chunk and wants to know more. It is 1447 usual, but not necessary, for lexing to have consumed the entirety of 1448 the current chunk at this time. 1449 1450 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current 1451 chunk (i.e., the current chunk has been entirely consumed), normally the 1452 current chunk will be discarded at the same time that the new chunk is 1453 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk 1454 will not be discarded. If the current chunk has not been entirely 1455 consumed, then it will not be discarded regardless of the flag. 1456 1457 Returns true if some new text was added to the buffer, or false if the 1458 buffer has reached the end of the input text. 1459 1460 =for apidoc Amnh||LEX_KEEP_PREVIOUS 1461 1462 =cut 1463 */ 1464 1465 #define LEX_FAKE_EOF 0x80000000 1466 #define LEX_NO_TERM 0x40000000 /* here-doc */ 1467 1468 bool 1469 Perl_lex_next_chunk(pTHX_ U32 flags) 1470 { 1471 SV *linestr; 1472 char *buf; 1473 STRLEN old_bufend_pos, new_bufend_pos; 1474 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 1475 STRLEN linestart_pos, last_uni_pos, last_lop_pos; 1476 bool got_some_for_debugger = 0; 1477 bool got_some; 1478 1479 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM)) 1480 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk"); 1481 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat) 1482 return FALSE; 1483 linestr = PL_parser->linestr; 1484 buf = SvPVX(linestr); 1485 if (!(flags & LEX_KEEP_PREVIOUS) 1486 && PL_parser->bufptr == PL_parser->bufend) 1487 { 1488 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0; 1489 linestart_pos = 0; 1490 if (PL_parser->last_uni != PL_parser->bufend) 1491 PL_parser->last_uni = NULL; 1492 if (PL_parser->last_lop != PL_parser->bufend) 1493 PL_parser->last_lop = NULL; 1494 last_uni_pos = last_lop_pos = 0; 1495 *buf = 0; 1496 SvCUR_set(linestr, 0); 1497 } else { 1498 old_bufend_pos = PL_parser->bufend - buf; 1499 bufptr_pos = PL_parser->bufptr - buf; 1500 oldbufptr_pos = PL_parser->oldbufptr - buf; 1501 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 1502 linestart_pos = PL_parser->linestart - buf; 1503 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 1504 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 1505 } 1506 if (flags & LEX_FAKE_EOF) { 1507 goto eof; 1508 } else if (!PL_parser->rsfp && !PL_parser->filtered) { 1509 got_some = 0; 1510 } else if (filter_gets(linestr, old_bufend_pos)) { 1511 got_some = 1; 1512 got_some_for_debugger = 1; 1513 } else if (flags & LEX_NO_TERM) { 1514 got_some = 0; 1515 } else { 1516 if (!SvPOK(linestr)) /* can get undefined by filter_gets */ 1517 SvPVCLEAR(linestr); 1518 eof: 1519 /* End of real input. Close filehandle (unless it was STDIN), 1520 * then add implicit termination. 1521 */ 1522 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 1523 PerlIO_clearerr(PL_parser->rsfp); 1524 else if (PL_parser->rsfp) 1525 (void)PerlIO_close(PL_parser->rsfp); 1526 PL_parser->rsfp = NULL; 1527 PL_parser->in_pod = PL_parser->filtered = 0; 1528 if (!PL_in_eval && PL_minus_p) { 1529 sv_catpvs(linestr, 1530 /*{*/";}continue{print or die qq(-p destination: $!\\n);}"); 1531 PL_minus_n = PL_minus_p = 0; 1532 } else if (!PL_in_eval && PL_minus_n) { 1533 sv_catpvs(linestr, /*{*/";}"); 1534 PL_minus_n = 0; 1535 } else 1536 sv_catpvs(linestr, ";"); 1537 got_some = 1; 1538 } 1539 buf = SvPVX(linestr); 1540 new_bufend_pos = SvCUR(linestr); 1541 PL_parser->bufend = buf + new_bufend_pos; 1542 PL_parser->bufptr = buf + bufptr_pos; 1543 1544 if (UTF) { 1545 const U8* first_bad_char_loc; 1546 if (UNLIKELY(! is_utf8_string_loc( 1547 (U8 *) PL_parser->bufptr, 1548 PL_parser->bufend - PL_parser->bufptr, 1549 &first_bad_char_loc))) 1550 { 1551 _force_out_malformed_utf8_message(first_bad_char_loc, 1552 (U8 *) PL_parser->bufend, 1553 0, 1554 1 /* 1 means die */ ); 1555 NOT_REACHED; /* NOTREACHED */ 1556 } 1557 } 1558 1559 PL_parser->oldbufptr = buf + oldbufptr_pos; 1560 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 1561 PL_parser->linestart = buf + linestart_pos; 1562 if (PL_parser->last_uni) 1563 PL_parser->last_uni = buf + last_uni_pos; 1564 if (PL_parser->last_lop) 1565 PL_parser->last_lop = buf + last_lop_pos; 1566 if (PL_parser->preambling != NOLINE) { 1567 CopLINE_set(PL_curcop, PL_parser->preambling + 1); 1568 PL_parser->preambling = NOLINE; 1569 } 1570 if ( got_some_for_debugger 1571 && PERLDB_LINE_OR_SAVESRC 1572 && PL_curstash != PL_debstash) 1573 { 1574 /* debugger active and we're not compiling the debugger code, 1575 * so store the line into the debugger's array of lines 1576 */ 1577 update_debugger_info(NULL, buf+old_bufend_pos, 1578 new_bufend_pos-old_bufend_pos); 1579 } 1580 return got_some; 1581 } 1582 1583 /* 1584 =for apidoc lex_peek_unichar 1585 1586 Looks ahead one (Unicode) character in the text currently being lexed. 1587 Returns the codepoint (unsigned integer value) of the next character, 1588 or -1 if lexing has reached the end of the input text. To consume the 1589 peeked character, use L</lex_read_unichar>. 1590 1591 If the next character is in (or extends into) the next chunk of input 1592 text, the next chunk will be read in. Normally the current chunk will be 1593 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> 1594 bit set, then the current chunk will not be discarded. 1595 1596 If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1597 is encountered, an exception is generated. 1598 1599 =cut 1600 */ 1601 1602 I32 1603 Perl_lex_peek_unichar(pTHX_ U32 flags) 1604 { 1605 char *s, *bufend; 1606 if (flags & ~(LEX_KEEP_PREVIOUS)) 1607 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar"); 1608 s = PL_parser->bufptr; 1609 bufend = PL_parser->bufend; 1610 if (UTF) { 1611 U8 head; 1612 I32 unichar; 1613 STRLEN len, retlen; 1614 if (s == bufend) { 1615 if (!lex_next_chunk(flags)) 1616 return -1; 1617 s = PL_parser->bufptr; 1618 bufend = PL_parser->bufend; 1619 } 1620 head = (U8)*s; 1621 if (UTF8_IS_INVARIANT(head)) 1622 return head; 1623 if (UTF8_IS_START(head)) { 1624 len = UTF8SKIP(&head); 1625 while ((STRLEN)(bufend-s) < len) { 1626 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS)) 1627 break; 1628 s = PL_parser->bufptr; 1629 bufend = PL_parser->bufend; 1630 } 1631 } 1632 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY); 1633 if (retlen == (STRLEN)-1) { 1634 _force_out_malformed_utf8_message((U8 *) s, 1635 (U8 *) bufend, 1636 0, 1637 1 /* 1 means die */ ); 1638 NOT_REACHED; /* NOTREACHED */ 1639 } 1640 return unichar; 1641 } else { 1642 if (s == bufend) { 1643 if (!lex_next_chunk(flags)) 1644 return -1; 1645 s = PL_parser->bufptr; 1646 } 1647 return (U8)*s; 1648 } 1649 } 1650 1651 /* 1652 =for apidoc lex_read_unichar 1653 1654 Reads the next (Unicode) character in the text currently being lexed. 1655 Returns the codepoint (unsigned integer value) of the character read, 1656 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1 1657 if lexing has reached the end of the input text. To non-destructively 1658 examine the next character, use L</lex_peek_unichar> instead. 1659 1660 If the next character is in (or extends into) the next chunk of input 1661 text, the next chunk will be read in. Normally the current chunk will be 1662 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> 1663 bit set, then the current chunk will not be discarded. 1664 1665 If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1666 is encountered, an exception is generated. 1667 1668 =cut 1669 */ 1670 1671 I32 1672 Perl_lex_read_unichar(pTHX_ U32 flags) 1673 { 1674 I32 c; 1675 if (flags & ~(LEX_KEEP_PREVIOUS)) 1676 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar"); 1677 c = lex_peek_unichar(flags); 1678 if (c != -1) { 1679 if (c == '\n') 1680 COPLINE_INC_WITH_HERELINES; 1681 if (UTF) 1682 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr); 1683 else 1684 ++(PL_parser->bufptr); 1685 } 1686 return c; 1687 } 1688 1689 /* 1690 =for apidoc lex_read_space 1691 1692 Reads optional spaces, in Perl style, in the text currently being 1693 lexed. The spaces may include ordinary whitespace characters and 1694 Perl-style comments. C<#line> directives are processed if encountered. 1695 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points 1696 at a non-space character (or the end of the input text). 1697 1698 If spaces extend into the next chunk of input text, the next chunk will 1699 be read in. Normally the current chunk will be discarded at the same 1700 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current 1701 chunk will not be discarded. 1702 1703 =cut 1704 */ 1705 1706 #define LEX_NO_INCLINE 0x40000000 1707 #define LEX_NO_NEXT_CHUNK 0x80000000 1708 1709 void 1710 Perl_lex_read_space(pTHX_ U32 flags) 1711 { 1712 char *s, *bufend; 1713 const bool can_incline = !(flags & LEX_NO_INCLINE); 1714 bool need_incline = 0; 1715 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE)) 1716 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space"); 1717 s = PL_parser->bufptr; 1718 bufend = PL_parser->bufend; 1719 while (1) { 1720 char c = *s; 1721 if (c == '#') { 1722 do { 1723 c = *++s; 1724 } while (!(c == '\n' || (c == 0 && s == bufend))); 1725 } else if (c == '\n') { 1726 s++; 1727 if (can_incline) { 1728 PL_parser->linestart = s; 1729 if (s == bufend) 1730 need_incline = 1; 1731 else 1732 incline(s, bufend); 1733 } 1734 } else if (isSPACE(c)) { 1735 s++; 1736 } else if (c == 0 && s == bufend) { 1737 bool got_more; 1738 line_t l; 1739 if (flags & LEX_NO_NEXT_CHUNK) 1740 break; 1741 PL_parser->bufptr = s; 1742 l = CopLINE(PL_curcop); 1743 CopLINE(PL_curcop) += PL_parser->herelines + 1; 1744 got_more = lex_next_chunk(flags); 1745 CopLINE_set(PL_curcop, l); 1746 s = PL_parser->bufptr; 1747 bufend = PL_parser->bufend; 1748 if (!got_more) 1749 break; 1750 if (can_incline && need_incline && PL_parser->rsfp) { 1751 incline(s, bufend); 1752 need_incline = 0; 1753 } 1754 } else if (!c) { 1755 s++; 1756 } else { 1757 break; 1758 } 1759 } 1760 PL_parser->bufptr = s; 1761 } 1762 1763 /* 1764 1765 =for apidoc validate_proto 1766 1767 This function performs syntax checking on a prototype, C<proto>. 1768 If C<warn> is true, any illegal characters or mismatched brackets 1769 will trigger illegalproto warnings, declaring that they were 1770 detected in the prototype for C<name>. 1771 1772 The return value is C<true> if this is a valid prototype, and 1773 C<false> if it is not, regardless of whether C<warn> was C<true> or 1774 C<false>. 1775 1776 Note that C<NULL> is a valid C<proto> and will always return C<true>. 1777 1778 =cut 1779 1780 */ 1781 1782 bool 1783 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash) 1784 { 1785 STRLEN len, origlen; 1786 char *p; 1787 bool bad_proto = FALSE; 1788 bool in_brackets = FALSE; 1789 bool after_slash = FALSE; 1790 char greedy_proto = ' '; 1791 bool proto_after_greedy_proto = FALSE; 1792 bool must_be_last = FALSE; 1793 bool underscore = FALSE; 1794 bool bad_proto_after_underscore = FALSE; 1795 1796 PERL_ARGS_ASSERT_VALIDATE_PROTO; 1797 1798 if (!proto) 1799 return TRUE; 1800 1801 p = SvPV(proto, len); 1802 origlen = len; 1803 for (; len--; p++) { 1804 if (!isSPACE(*p)) { 1805 if (must_be_last) 1806 proto_after_greedy_proto = TRUE; 1807 if (underscore) { 1808 if (!memCHRs(";@%", *p)) 1809 bad_proto_after_underscore = TRUE; 1810 underscore = FALSE; 1811 } 1812 if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') { 1813 bad_proto = TRUE; 1814 } 1815 else { 1816 if (*p == '[') 1817 in_brackets = TRUE; 1818 else if (*p == ']') 1819 in_brackets = FALSE; 1820 else if ((*p == '@' || *p == '%') 1821 && !after_slash 1822 && !in_brackets ) 1823 { 1824 must_be_last = TRUE; 1825 greedy_proto = *p; 1826 } 1827 else if (*p == '_') 1828 underscore = TRUE; 1829 } 1830 if (*p == '\\') 1831 after_slash = TRUE; 1832 else 1833 after_slash = FALSE; 1834 } 1835 } 1836 1837 if (warn) { 1838 SV *tmpsv = newSVpvs_flags("", SVs_TEMP); 1839 p -= origlen; 1840 p = SvUTF8(proto) 1841 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8), 1842 origlen, UNI_DISPLAY_ISPRINT) 1843 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII); 1844 1845 if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) { 1846 SV *name2 = sv_2mortal(newSVsv(PL_curstname)); 1847 sv_catpvs(name2, "::"); 1848 sv_catsv(name2, (SV *)name); 1849 name = name2; 1850 } 1851 1852 if (proto_after_greedy_proto) 1853 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1854 "Prototype after '%c' for %" SVf " : %s", 1855 greedy_proto, SVfARG(name), p); 1856 if (in_brackets) 1857 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1858 "Missing ']' in prototype for %" SVf " : %s", 1859 SVfARG(name), p); 1860 if (bad_proto) 1861 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1862 "Illegal character in prototype for %" SVf " : %s", 1863 SVfARG(name), p); 1864 if (bad_proto_after_underscore) 1865 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1866 "Illegal character after '_' in prototype for %" SVf " : %s", 1867 SVfARG(name), p); 1868 } 1869 1870 return (! (proto_after_greedy_proto || bad_proto) ); 1871 } 1872 1873 /* 1874 * S_incline 1875 * This subroutine has nothing to do with tilting, whether at windmills 1876 * or pinball tables. Its name is short for "increment line". It 1877 * increments the current line number in CopLINE(PL_curcop) and checks 1878 * to see whether the line starts with a comment of the form 1879 * # line 500 "foo.pm" 1880 * If so, it sets the current line number and file to the values in the comment. 1881 */ 1882 1883 STATIC void 1884 S_incline(pTHX_ const char *s, const char *end) 1885 { 1886 const char *t; 1887 const char *n; 1888 const char *e; 1889 line_t line_num; 1890 UV uv; 1891 1892 PERL_ARGS_ASSERT_INCLINE; 1893 1894 assert(end >= s); 1895 1896 COPLINE_INC_WITH_HERELINES; 1897 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL 1898 && s+1 == PL_bufend && *s == ';') { 1899 /* fake newline in string eval */ 1900 CopLINE_dec(PL_curcop); 1901 return; 1902 } 1903 if (*s++ != '#') 1904 return; 1905 while (SPACE_OR_TAB(*s)) 1906 s++; 1907 if (memBEGINs(s, (STRLEN) (end - s), "line")) 1908 s += sizeof("line") - 1; 1909 else 1910 return; 1911 if (SPACE_OR_TAB(*s)) 1912 s++; 1913 else 1914 return; 1915 while (SPACE_OR_TAB(*s)) 1916 s++; 1917 if (!isDIGIT(*s)) 1918 return; 1919 1920 n = s; 1921 while (isDIGIT(*s)) 1922 s++; 1923 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0') 1924 return; 1925 while (SPACE_OR_TAB(*s)) 1926 s++; 1927 if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) { 1928 s++; 1929 e = t + 1; 1930 } 1931 else { 1932 t = s; 1933 while (*t && !isSPACE(*t)) 1934 t++; 1935 e = t; 1936 } 1937 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f') 1938 e++; 1939 if (*e != '\n' && *e != '\0') 1940 return; /* false alarm */ 1941 1942 if (!grok_atoUV(n, &uv, &e)) 1943 return; 1944 line_num = ((line_t)uv) - 1; 1945 1946 if (t - s > 0) { 1947 const STRLEN len = t - s; 1948 1949 if (!PL_rsfp && !PL_parser->filtered) { 1950 /* must copy *{"::_<(eval N)[oldfilename:L]"} 1951 * to *{"::_<newfilename"} */ 1952 /* However, the long form of evals is only turned on by the 1953 debugger - usually they're "(eval %lu)" */ 1954 GV * const cfgv = CopFILEGV(PL_curcop); 1955 if (cfgv) { 1956 char smallbuf[128]; 1957 STRLEN tmplen2 = len; 1958 char *tmpbuf2; 1959 GV *gv2; 1960 1961 if (tmplen2 + 2 <= sizeof smallbuf) 1962 tmpbuf2 = smallbuf; 1963 else 1964 Newx(tmpbuf2, tmplen2 + 2, char); 1965 1966 tmpbuf2[0] = '_'; 1967 tmpbuf2[1] = '<'; 1968 1969 memcpy(tmpbuf2 + 2, s, tmplen2); 1970 tmplen2 += 2; 1971 1972 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE); 1973 if (!isGV(gv2)) { 1974 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE); 1975 /* adjust ${"::_<newfilename"} to store the new file name */ 1976 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2); 1977 /* The line number may differ. If that is the case, 1978 alias the saved lines that are in the array. 1979 Otherwise alias the whole array. */ 1980 if (CopLINE(PL_curcop) == line_num) { 1981 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv))); 1982 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv))); 1983 } 1984 else if (GvAV(cfgv)) { 1985 AV * const av = GvAV(cfgv); 1986 const line_t start = CopLINE(PL_curcop)+1; 1987 SSize_t items = AvFILLp(av) - start; 1988 if (items > 0) { 1989 AV * const av2 = GvAVn(gv2); 1990 SV **svp = AvARRAY(av) + start; 1991 Size_t l = line_num+1; 1992 while (items-- && l < SSize_t_MAX && l == (line_t)l) 1993 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++)); 1994 } 1995 } 1996 } 1997 1998 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2); 1999 } 2000 } 2001 CopFILE_free(PL_curcop); 2002 CopFILE_setn(PL_curcop, s, len); 2003 } 2004 CopLINE_set(PL_curcop, line_num); 2005 } 2006 2007 STATIC void 2008 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) 2009 { 2010 AV *av = CopFILEAVx(PL_curcop); 2011 if (av) { 2012 SV * sv; 2013 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG); 2014 else { 2015 sv = *av_fetch(av, 0, 1); 2016 SvUPGRADE(sv, SVt_PVMG); 2017 } 2018 if (!SvPOK(sv)) SvPVCLEAR(sv); 2019 if (orig_sv) 2020 sv_catsv(sv, orig_sv); 2021 else 2022 sv_catpvn(sv, buf, len); 2023 if (!SvIOK(sv)) { 2024 (void)SvIOK_on(sv); 2025 SvIV_set(sv, 0); 2026 } 2027 if (PL_parser->preambling == NOLINE) 2028 av_store(av, CopLINE(PL_curcop), sv); 2029 } 2030 } 2031 2032 /* 2033 * skipspace 2034 * Called to gobble the appropriate amount and type of whitespace. 2035 * Skips comments as well. 2036 * Returns the next character after the whitespace that is skipped. 2037 * 2038 * peekspace 2039 * Same thing, but look ahead without incrementing line numbers or 2040 * adjusting PL_linestart. 2041 */ 2042 2043 #define skipspace(s) skipspace_flags(s, 0) 2044 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE) 2045 2046 char * 2047 Perl_skipspace_flags(pTHX_ char *s, U32 flags) 2048 { 2049 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS; 2050 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 2051 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s)) 2052 s++; 2053 } else { 2054 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr); 2055 PL_bufptr = s; 2056 lex_read_space(flags | LEX_KEEP_PREVIOUS | 2057 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ? 2058 LEX_NO_NEXT_CHUNK : 0)); 2059 s = PL_bufptr; 2060 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos; 2061 if (PL_linestart > PL_bufptr) 2062 PL_bufptr = PL_linestart; 2063 return s; 2064 } 2065 return s; 2066 } 2067 2068 /* 2069 * S_check_uni 2070 * Check the unary operators to ensure there's no ambiguity in how they're 2071 * used. An ambiguous piece of code would be: 2072 * rand + 5 2073 * This doesn't mean rand() + 5. Because rand() is a unary operator, 2074 * the +5 is its argument. 2075 */ 2076 2077 STATIC void 2078 S_check_uni(pTHX) 2079 { 2080 const char *s; 2081 2082 if (PL_oldoldbufptr != PL_last_uni) 2083 return; 2084 while (isSPACE(*PL_last_uni)) 2085 PL_last_uni++; 2086 s = PL_last_uni; 2087 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-') 2088 s += UTF ? UTF8SKIP(s) : 1; 2089 if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s)) 2090 return; 2091 2092 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 2093 "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous", 2094 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni)); 2095 } 2096 2097 /* 2098 * LOP : macro to build a list operator. Its behaviour has been replaced 2099 * with a subroutine, S_lop() for which LOP is just another name. 2100 */ 2101 2102 #define LOP(f,x) return lop(f,x,s) 2103 2104 /* 2105 * S_lop 2106 * Build a list operator (or something that might be one). The rules: 2107 * - if we have a next token, then it's a list operator (no parens) for 2108 * which the next token has already been parsed; e.g., 2109 * sort foo @args 2110 * sort foo (@args) 2111 * - if the next thing is an opening paren, then it's a function 2112 * - else it's a list operator 2113 */ 2114 2115 STATIC I32 2116 S_lop(pTHX_ I32 f, U8 x, char *s) 2117 { 2118 PERL_ARGS_ASSERT_LOP; 2119 2120 pl_yylval.ival = f; 2121 CLINE; 2122 PL_bufptr = s; 2123 PL_last_lop = PL_oldbufptr; 2124 PL_last_lop_op = (OPCODE)f; 2125 if (PL_nexttoke) 2126 goto lstop; 2127 PL_expect = x; 2128 if (*s == '(') 2129 return REPORT(FUNC); 2130 s = skipspace(s); 2131 if (*s == '(') 2132 return REPORT(FUNC); 2133 else { 2134 lstop: 2135 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 2136 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 2137 return REPORT(LSTOP); 2138 } 2139 } 2140 2141 /* 2142 * S_force_next 2143 * When the lexer realizes it knows the next token (for instance, 2144 * it is reordering tokens for the parser) then it can call S_force_next 2145 * to know what token to return the next time the lexer is called. Caller 2146 * will need to set PL_nextval[] and possibly PL_expect to ensure 2147 * the lexer handles the token correctly. 2148 */ 2149 2150 STATIC void 2151 S_force_next(pTHX_ I32 type) 2152 { 2153 #ifdef DEBUGGING 2154 if (DEBUG_T_TEST) { 2155 PerlIO_printf(Perl_debug_log, "### forced token:\n"); 2156 tokereport(type, &NEXTVAL_NEXTTOKE); 2157 } 2158 #endif 2159 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype)); 2160 PL_nexttype[PL_nexttoke] = type; 2161 PL_nexttoke++; 2162 } 2163 2164 /* 2165 * S_postderef 2166 * 2167 * This subroutine handles postfix deref syntax after the arrow has already 2168 * been emitted. @* $* etc. are emitted as two separate tokens right here. 2169 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits 2170 * only the first, leaving yylex to find the next. 2171 */ 2172 2173 static int 2174 S_postderef(pTHX_ int const funny, char const next) 2175 { 2176 assert(funny == DOLSHARP 2177 || funny == PERLY_DOLLAR 2178 || funny == PERLY_SNAIL 2179 || funny == PERLY_PERCENT_SIGN 2180 || funny == PERLY_AMPERSAND 2181 || funny == PERLY_STAR 2182 ); 2183 if (next == '*') { 2184 PL_expect = XOPERATOR; 2185 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 2186 assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny); 2187 PL_lex_state = LEX_INTERPEND; 2188 if (PERLY_SNAIL == funny) 2189 force_next(POSTJOIN); 2190 } 2191 force_next(PERLY_STAR); 2192 PL_bufptr+=2; 2193 } 2194 else { 2195 if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL 2196 && !PL_lex_brackets) 2197 PL_lex_dojoin = 2; 2198 PL_expect = XOPERATOR; 2199 PL_bufptr++; 2200 } 2201 return funny; 2202 } 2203 2204 void 2205 Perl_yyunlex(pTHX) 2206 { 2207 int yyc = PL_parser->yychar; 2208 if (yyc != YYEMPTY) { 2209 if (yyc) { 2210 NEXTVAL_NEXTTOKE = PL_parser->yylval; 2211 if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) { 2212 PL_lex_allbrackets--; 2213 PL_lex_brackets--; 2214 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16); 2215 } else if (yyc == PERLY_PAREN_OPEN) { 2216 PL_lex_allbrackets--; 2217 yyc |= (2<<24); 2218 } 2219 force_next(yyc); 2220 } 2221 PL_parser->yychar = YYEMPTY; 2222 } 2223 } 2224 2225 STATIC SV * 2226 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) 2227 { 2228 SV * const sv = newSVpvn_utf8(start, len, 2229 ! IN_BYTES 2230 && UTF 2231 && len != 0 2232 && is_utf8_non_invariant_string((const U8*)start, len)); 2233 return sv; 2234 } 2235 2236 /* 2237 * S_force_word 2238 * When the lexer knows the next thing is a word (for instance, it has 2239 * just seen -> and it knows that the next char is a word char, then 2240 * it calls S_force_word to stick the next word into the PL_nexttoke/val 2241 * lookahead. 2242 * 2243 * Arguments: 2244 * char *start : buffer position (must be within PL_linestr) 2245 * int token : PL_next* will be this type of bare word 2246 * (e.g., METHCALL0,BAREWORD) 2247 * int check_keyword : if true, Perl checks to make sure the word isn't 2248 * a keyword (do this if the word is a label, e.g. goto FOO) 2249 * int allow_pack : if true, : characters will also be allowed (require, 2250 * use, etc. do this) 2251 */ 2252 2253 STATIC char * 2254 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) 2255 { 2256 char *s; 2257 STRLEN len; 2258 2259 PERL_ARGS_ASSERT_FORCE_WORD; 2260 2261 start = skipspace(start); 2262 s = start; 2263 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) 2264 || (allow_pack && *s == ':' && s[1] == ':') ) 2265 { 2266 s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len, allow_pack); 2267 if (check_keyword) { 2268 char *s2 = PL_tokenbuf; 2269 STRLEN len2 = len; 2270 if (allow_pack && memBEGINPs(s2, len, "CORE::")) { 2271 s2 += sizeof("CORE::") - 1; 2272 len2 -= sizeof("CORE::") - 1; 2273 } 2274 if (keyword(s2, len2, 0)) 2275 return start; 2276 } 2277 if (token == METHCALL0) { 2278 s = skipspace(s); 2279 if (*s == '(') 2280 PL_expect = XTERM; 2281 else { 2282 PL_expect = XOPERATOR; 2283 } 2284 } 2285 NEXTVAL_NEXTTOKE.opval 2286 = newSVOP(OP_CONST,0, 2287 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 2288 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 2289 force_next(token); 2290 } 2291 return s; 2292 } 2293 2294 /* 2295 * S_force_ident 2296 * Called when the lexer wants $foo *foo &foo etc, but the program 2297 * text only contains the "foo" portion. The first argument is a pointer 2298 * to the "foo", and the second argument is the type symbol to prefix. 2299 * Forces the next token to be a "BAREWORD". 2300 * Creates the symbol if it didn't already exist (via gv_fetchpv()). 2301 */ 2302 2303 STATIC void 2304 S_force_ident(pTHX_ const char *s, int kind) 2305 { 2306 PERL_ARGS_ASSERT_FORCE_IDENT; 2307 2308 if (s[0]) { 2309 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */ 2310 OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len, 2311 UTF ? SVf_UTF8 : 0)); 2312 NEXTVAL_NEXTTOKE.opval = o; 2313 force_next(BAREWORD); 2314 if (kind) { 2315 o->op_private = OPpCONST_ENTERED; 2316 /* XXX see note in pp_entereval() for why we forgo typo 2317 warnings if the symbol must be introduced in an eval. 2318 GSAR 96-10-12 */ 2319 gv_fetchpvn_flags(s, len, 2320 (PL_in_eval ? GV_ADDMULTI 2321 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), 2322 kind == PERLY_DOLLAR ? SVt_PV : 2323 kind == PERLY_SNAIL ? SVt_PVAV : 2324 kind == PERLY_PERCENT_SIGN ? SVt_PVHV : 2325 SVt_PVGV 2326 ); 2327 } 2328 } 2329 } 2330 2331 static void 2332 S_force_ident_maybe_lex(pTHX_ char pit) 2333 { 2334 NEXTVAL_NEXTTOKE.ival = pit; 2335 force_next('p'); 2336 } 2337 2338 NV 2339 Perl_str_to_version(pTHX_ SV *sv) 2340 { 2341 NV retval = 0.0; 2342 NV nshift = 1.0; 2343 STRLEN len; 2344 const char *start = SvPV_const(sv,len); 2345 const char * const end = start + len; 2346 const bool utf = cBOOL(SvUTF8(sv)); 2347 2348 PERL_ARGS_ASSERT_STR_TO_VERSION; 2349 2350 while (start < end) { 2351 STRLEN skip; 2352 UV n; 2353 if (utf) 2354 n = utf8n_to_uvchr((U8*)start, len, &skip, 0); 2355 else { 2356 n = *(U8*)start; 2357 skip = 1; 2358 } 2359 retval += ((NV)n)/nshift; 2360 start += skip; 2361 nshift *= 1000; 2362 } 2363 return retval; 2364 } 2365 2366 /* 2367 * S_force_version 2368 * Forces the next token to be a version number. 2369 * If the next token appears to be an invalid version number, (e.g. "v2b"), 2370 * and if "guessing" is TRUE, then no new token is created (and the caller 2371 * must use an alternative parsing method). 2372 */ 2373 2374 STATIC char * 2375 S_force_version(pTHX_ char *s, int guessing) 2376 { 2377 OP *version = NULL; 2378 char *d; 2379 2380 PERL_ARGS_ASSERT_FORCE_VERSION; 2381 2382 s = skipspace(s); 2383 2384 d = s; 2385 if (*d == 'v') 2386 d++; 2387 if (isDIGIT(*d)) { 2388 while (isDIGIT(*d) || *d == '_' || *d == '.') 2389 d++; 2390 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) { 2391 SV *ver; 2392 s = scan_num(s, &pl_yylval); 2393 version = pl_yylval.opval; 2394 ver = cSVOPx(version)->op_sv; 2395 if (SvPOK(ver) && !SvNIOK(ver)) { 2396 SvUPGRADE(ver, SVt_PVNV); 2397 SvNV_set(ver, str_to_version(ver)); 2398 SvNOK_on(ver); /* hint that it is a version */ 2399 } 2400 } 2401 else if (guessing) { 2402 return s; 2403 } 2404 } 2405 2406 /* NOTE: The parser sees the package name and the VERSION swapped */ 2407 NEXTVAL_NEXTTOKE.opval = version; 2408 force_next(BAREWORD); 2409 2410 return s; 2411 } 2412 2413 /* 2414 * S_force_strict_version 2415 * Forces the next token to be a version number using strict syntax rules. 2416 */ 2417 2418 STATIC char * 2419 S_force_strict_version(pTHX_ char *s) 2420 { 2421 OP *version = NULL; 2422 const char *errstr = NULL; 2423 2424 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION; 2425 2426 while (isSPACE(*s)) /* leading whitespace */ 2427 s++; 2428 2429 if (is_STRICT_VERSION(s,&errstr)) { 2430 SV *ver = newSV_type(SVt_NULL); 2431 s = (char *)scan_version(s, ver, 0); 2432 version = newSVOP(OP_CONST, 0, ver); 2433 } 2434 else if ((*s != ';' && *s != ':' && *s != '{' && *s != '}' ) 2435 && (s = skipspace(s), (*s != ';' && *s != ':' && *s != '{' && *s != '}' ))) 2436 { 2437 PL_bufptr = s; 2438 if (errstr) 2439 yyerror(errstr); /* version required */ 2440 return s; 2441 } 2442 2443 /* NOTE: The parser sees the package name and the VERSION swapped */ 2444 NEXTVAL_NEXTTOKE.opval = version; 2445 force_next(BAREWORD); 2446 2447 return s; 2448 } 2449 2450 /* 2451 * S_tokeq 2452 * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv', 2453 * modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is 2454 * unchanged, and a new SV containing the modified input is returned. 2455 */ 2456 2457 STATIC SV * 2458 S_tokeq(pTHX_ SV *sv) 2459 { 2460 char *s; 2461 char *send; 2462 char *d; 2463 SV *pv = sv; 2464 2465 PERL_ARGS_ASSERT_TOKEQ; 2466 2467 assert (SvPOK(sv)); 2468 assert (SvLEN(sv)); 2469 assert (!SvIsCOW(sv)); 2470 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */ 2471 goto finish; 2472 s = SvPVX(sv); 2473 send = SvEND(sv); 2474 /* This is relying on the SV being "well formed" with a trailing '\0' */ 2475 while (s < send && !(*s == '\\' && s[1] == '\\')) 2476 s++; 2477 if (s == send) 2478 goto finish; 2479 d = s; 2480 if ( PL_hints & HINT_NEW_STRING ) { 2481 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv), 2482 SVs_TEMP | SvUTF8(sv)); 2483 } 2484 while (s < send) { 2485 if (*s == '\\') { 2486 if (s + 1 < send && (s[1] == '\\')) 2487 s++; /* all that, just for this */ 2488 } 2489 *d++ = *s++; 2490 } 2491 *d = '\0'; 2492 SvCUR_set(sv, d - SvPVX_const(sv)); 2493 finish: 2494 if ( PL_hints & HINT_NEW_STRING ) 2495 return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL); 2496 return sv; 2497 } 2498 2499 /* 2500 * Now come three functions related to double-quote context, 2501 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when 2502 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They 2503 * interact with PL_lex_state, and create fake ( ... ) argument lists 2504 * to handle functions and concatenation. 2505 * For example, 2506 * "foo\lbar" 2507 * is tokenised as 2508 * stringify ( const[foo] concat lcfirst ( const[bar] ) ) 2509 */ 2510 2511 /* 2512 * S_sublex_start 2513 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST). 2514 * 2515 * Pattern matching will set PL_lex_op to the pattern-matching op to 2516 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise). 2517 * 2518 * OP_CONST is easy--just make the new op and return. 2519 * 2520 * Everything else becomes a FUNC. 2521 * 2522 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we 2523 * had an OP_CONST. This just sets us up for a 2524 * call to S_sublex_push(). 2525 */ 2526 2527 STATIC I32 2528 S_sublex_start(pTHX) 2529 { 2530 const I32 op_type = pl_yylval.ival; 2531 2532 if (op_type == OP_NULL) { 2533 pl_yylval.opval = PL_lex_op; 2534 PL_lex_op = NULL; 2535 return THING; 2536 } 2537 if (op_type == OP_CONST) { 2538 SV *sv = PL_lex_stuff; 2539 PL_lex_stuff = NULL; 2540 sv = tokeq(sv); 2541 2542 if (SvTYPE(sv) == SVt_PVIV) { 2543 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */ 2544 STRLEN len; 2545 const char * const p = SvPV_const(sv, len); 2546 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv)); 2547 SvREFCNT_dec(sv); 2548 sv = nsv; 2549 } 2550 pl_yylval.opval = newSVOP(op_type, 0, sv); 2551 return THING; 2552 } 2553 2554 PL_parser->lex_super_state = PL_lex_state; 2555 PL_parser->lex_sub_inwhat = (U16)op_type; 2556 PL_parser->lex_sub_op = PL_lex_op; 2557 PL_parser->sub_no_recover = FALSE; 2558 PL_parser->sub_error_count = PL_error_count; 2559 PL_lex_state = LEX_INTERPPUSH; 2560 2561 PL_expect = XTERM; 2562 if (PL_lex_op) { 2563 pl_yylval.opval = PL_lex_op; 2564 PL_lex_op = NULL; 2565 return PMFUNC; 2566 } 2567 else 2568 return FUNC; 2569 } 2570 2571 /* 2572 * S_sublex_push 2573 * Create a new scope to save the lexing state. The scope will be 2574 * ended in S_sublex_done. Returns a '(', starting the function arguments 2575 * to the uc, lc, etc. found before. 2576 * Sets PL_lex_state to LEX_INTERPCONCAT. 2577 */ 2578 2579 STATIC I32 2580 S_sublex_push(pTHX) 2581 { 2582 LEXSHARED *shared; 2583 const bool is_heredoc = PL_multi_close == '<'; 2584 ENTER; 2585 2586 PL_lex_state = PL_parser->lex_super_state; 2587 SAVEI8(PL_lex_dojoin); 2588 SAVEI32(PL_lex_brackets); 2589 SAVEI32(PL_lex_allbrackets); 2590 SAVEI32(PL_lex_formbrack); 2591 SAVEI8(PL_lex_fakeeof); 2592 SAVEI32(PL_lex_casemods); 2593 SAVEI32(PL_lex_starts); 2594 SAVEI8(PL_lex_state); 2595 SAVESPTR(PL_lex_repl); 2596 SAVEVPTR(PL_lex_inpat); 2597 SAVEI16(PL_lex_inwhat); 2598 if (is_heredoc) 2599 { 2600 SAVECOPLINE(PL_curcop); 2601 SAVEI32(PL_multi_end); 2602 SAVEI32(PL_parser->herelines); 2603 PL_parser->herelines = 0; 2604 } 2605 SAVEIV(PL_multi_close); 2606 SAVEPPTR(PL_bufptr); 2607 SAVEPPTR(PL_bufend); 2608 SAVEPPTR(PL_oldbufptr); 2609 SAVEPPTR(PL_oldoldbufptr); 2610 SAVEPPTR(PL_last_lop); 2611 SAVEPPTR(PL_last_uni); 2612 SAVEPPTR(PL_linestart); 2613 SAVESPTR(PL_linestr); 2614 SAVEGENERICPV(PL_lex_brackstack); 2615 SAVEGENERICPV(PL_lex_casestack); 2616 SAVEGENERICPV(PL_parser->lex_shared); 2617 SAVEBOOL(PL_parser->lex_re_reparsing); 2618 SAVEI32(PL_copline); 2619 2620 /* The here-doc parser needs to be able to peek into outer lexing 2621 scopes to find the body of the here-doc. So we put PL_linestr and 2622 PL_bufptr into lex_shared, to 'share' those values. 2623 */ 2624 PL_parser->lex_shared->ls_linestr = PL_linestr; 2625 PL_parser->lex_shared->ls_bufptr = PL_bufptr; 2626 2627 PL_linestr = PL_lex_stuff; 2628 PL_lex_repl = PL_parser->lex_sub_repl; 2629 PL_lex_stuff = NULL; 2630 PL_parser->lex_sub_repl = NULL; 2631 2632 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets 2633 set for an inner quote-like operator and then an error causes scope- 2634 popping. We must not have a PL_lex_stuff value left dangling, as 2635 that breaks assumptions elsewhere. See bug #123617. */ 2636 SAVEGENERICSV(PL_lex_stuff); 2637 SAVEGENERICSV(PL_parser->lex_sub_repl); 2638 2639 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart 2640 = SvPVX(PL_linestr); 2641 PL_bufend += SvCUR(PL_linestr); 2642 PL_last_lop = PL_last_uni = NULL; 2643 SAVEFREESV(PL_linestr); 2644 if (PL_lex_repl) SAVEFREESV(PL_lex_repl); 2645 2646 PL_lex_dojoin = FALSE; 2647 PL_lex_brackets = PL_lex_formbrack = 0; 2648 PL_lex_allbrackets = 0; 2649 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2650 Newx(PL_lex_brackstack, 120, char); 2651 Newx(PL_lex_casestack, 12, char); 2652 PL_lex_casemods = 0; 2653 *PL_lex_casestack = '\0'; 2654 PL_lex_starts = 0; 2655 PL_lex_state = LEX_INTERPCONCAT; 2656 if (is_heredoc) 2657 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 2658 PL_copline = NOLINE; 2659 2660 Newxz(shared, 1, LEXSHARED); 2661 shared->ls_prev = PL_parser->lex_shared; 2662 PL_parser->lex_shared = shared; 2663 2664 PL_lex_inwhat = PL_parser->lex_sub_inwhat; 2665 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS; 2666 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST) 2667 PL_lex_inpat = PL_parser->lex_sub_op; 2668 else 2669 PL_lex_inpat = NULL; 2670 2671 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING); 2672 PL_in_eval &= ~EVAL_RE_REPARSING; 2673 2674 return SUBLEXSTART; 2675 } 2676 2677 /* 2678 * S_sublex_done 2679 * Restores lexer state after a S_sublex_push. 2680 */ 2681 2682 STATIC I32 2683 S_sublex_done(pTHX) 2684 { 2685 if (!PL_lex_starts++) { 2686 SV * const sv = newSVpvs(""); 2687 if (SvUTF8(PL_linestr)) 2688 SvUTF8_on(sv); 2689 PL_expect = XOPERATOR; 2690 pl_yylval.opval = newSVOP(OP_CONST, 0, sv); 2691 return THING; 2692 } 2693 2694 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */ 2695 PL_lex_state = LEX_INTERPCASEMOD; 2696 return yylex(); 2697 } 2698 2699 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */ 2700 assert(PL_lex_inwhat != OP_TRANSR); 2701 if (PL_lex_repl) { 2702 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS); 2703 PL_linestr = PL_lex_repl; 2704 PL_lex_inpat = 0; 2705 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); 2706 PL_bufend += SvCUR(PL_linestr); 2707 PL_last_lop = PL_last_uni = NULL; 2708 PL_lex_dojoin = FALSE; 2709 PL_lex_brackets = 0; 2710 PL_lex_allbrackets = 0; 2711 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2712 PL_lex_casemods = 0; 2713 *PL_lex_casestack = '\0'; 2714 PL_lex_starts = 0; 2715 if (SvEVALED(PL_lex_repl)) { 2716 PL_lex_state = LEX_INTERPNORMAL; 2717 PL_lex_starts++; 2718 /* we don't clear PL_lex_repl here, so that we can check later 2719 whether this is an evalled subst; that means we rely on the 2720 logic to ensure sublex_done() is called again only via the 2721 branch (in yylex()) that clears PL_lex_repl, else we'll loop */ 2722 } 2723 else { 2724 PL_lex_state = LEX_INTERPCONCAT; 2725 PL_lex_repl = NULL; 2726 } 2727 if (SvTYPE(PL_linestr) >= SVt_PVNV) { 2728 CopLINE(PL_curcop) += 2729 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines 2730 + PL_parser->herelines; 2731 PL_parser->herelines = 0; 2732 } 2733 return PERLY_SLASH; 2734 } 2735 else { 2736 const line_t l = CopLINE(PL_curcop); 2737 LEAVE; 2738 if (PL_parser->sub_error_count != PL_error_count) { 2739 if (PL_parser->sub_no_recover) { 2740 yyquit(); 2741 NOT_REACHED; 2742 } 2743 } 2744 if (PL_multi_close == '<') 2745 PL_parser->herelines += l - PL_multi_end; 2746 PL_bufend = SvPVX(PL_linestr); 2747 PL_bufend += SvCUR(PL_linestr); 2748 PL_expect = XOPERATOR; 2749 return SUBLEXEND; 2750 } 2751 } 2752 2753 HV * 2754 Perl_load_charnames(pTHX_ SV * char_name, const char * context, 2755 const STRLEN context_len, const char ** error_msg) 2756 { 2757 /* Load the official _charnames module if not already there. The 2758 * parameters are just to give info for any error messages generated: 2759 * char_name a name to look up which is the reason for loading this 2760 * context 'char_name' in the context in the input in which it appears 2761 * context_len how many bytes 'context' occupies 2762 * error_msg *error_msg will be set to any error 2763 * 2764 * Returns the ^H table if success; otherwise NULL */ 2765 2766 unsigned int i; 2767 HV * table; 2768 SV **cvp; 2769 SV * res; 2770 2771 PERL_ARGS_ASSERT_LOAD_CHARNAMES; 2772 2773 /* This loop is executed 1 1/2 times. On the first time through, if it 2774 * isn't already loaded, try loading it, and iterate just once to see if it 2775 * worked. */ 2776 for (i = 0; i < 2; i++) { 2777 table = GvHV(PL_hintgv); /* ^H */ 2778 2779 if ( table 2780 && (PL_hints & HINT_LOCALIZE_HH) 2781 && (cvp = hv_fetchs(table, "charnames", FALSE)) 2782 && SvOK(*cvp)) 2783 { 2784 return table; /* Quit if already loaded */ 2785 } 2786 2787 if (i == 0) { 2788 Perl_load_module(aTHX_ 2789 0, 2790 newSVpvs("_charnames"), 2791 2792 /* version parameter; no need to specify it, as if we get too early 2793 * a version, will fail anyway, not being able to find 'charnames' 2794 * */ 2795 NULL, 2796 newSVpvs(":full"), 2797 newSVpvs(":short"), 2798 NULL); 2799 } 2800 } 2801 2802 /* Here, it failed; new_constant will give appropriate error messages */ 2803 *error_msg = NULL; 2804 res = new_constant( NULL, 0, "charnames", char_name, NULL, 2805 context, context_len, error_msg); 2806 SvREFCNT_dec(res); 2807 2808 return NULL; 2809 } 2810 2811 STATIC SV* 2812 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e) 2813 { 2814 /* This justs wraps get_and_check_backslash_N_name() to output any error 2815 * message it returns. */ 2816 2817 const char * error_msg = NULL; 2818 SV * result; 2819 2820 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER; 2821 2822 /* charnames doesn't work well if there have been errors found */ 2823 if (PL_error_count > 0) { 2824 return NULL; 2825 } 2826 2827 result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg); 2828 2829 if (error_msg) { 2830 yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0); 2831 } 2832 2833 return result; 2834 } 2835 2836 SV* 2837 Perl_get_and_check_backslash_N_name(pTHX_ const char* s, 2838 const char* e, 2839 const bool is_utf8, 2840 const char ** error_msg) 2841 { 2842 /* <s> points to first character of interior of \N{}, <e> to one beyond the 2843 * interior, hence to the "}". Finds what the name resolves to, returning 2844 * an SV* containing it; NULL if no valid one found. 2845 * 2846 * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it 2847 * doesn't have to be. */ 2848 2849 SV* char_name; 2850 SV* res; 2851 HV * table; 2852 SV **cvp; 2853 SV *cv; 2854 SV *rv; 2855 HV *stash; 2856 2857 /* Points to the beginning of the \N{... so that any messages include the 2858 * context of what's failing*/ 2859 const char* context = s - 3; 2860 STRLEN context_len = e - context + 1; /* include all of \N{...} */ 2861 2862 2863 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; 2864 2865 assert(e >= s); 2866 assert(s > (char *) 3); 2867 2868 while (s < e && isBLANK(*s)) { 2869 s++; 2870 } 2871 2872 while (s < e && isBLANK(*(e - 1))) { 2873 e--; 2874 } 2875 2876 char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0); 2877 2878 if (!SvCUR(char_name)) { 2879 SvREFCNT_dec_NN(char_name); 2880 /* diag_listed_as: Unknown charname '%s' */ 2881 *error_msg = Perl_form(aTHX_ "Unknown charname ''"); 2882 return NULL; 2883 } 2884 2885 /* Autoload the charnames module */ 2886 2887 table = load_charnames(char_name, context, context_len, error_msg); 2888 if (table == NULL) { 2889 return NULL; 2890 } 2891 2892 *error_msg = NULL; 2893 res = new_constant( NULL, 0, "charnames", char_name, NULL, 2894 context, context_len, error_msg); 2895 if (*error_msg) { 2896 *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name)); 2897 2898 SvREFCNT_dec(res); 2899 return NULL; 2900 } 2901 2902 /* See if the charnames handler is the Perl core's, and if so, we can skip 2903 * the validation needed for a user-supplied one, as Perl's does its own 2904 * validation. */ 2905 cvp = hv_fetchs(table, "charnames", FALSE); 2906 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv), 2907 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL)) 2908 { 2909 const char * const name = HvNAME(stash); 2910 if (memEQs(name, HvNAMELEN(stash), "_charnames")) { 2911 return res; 2912 } 2913 } 2914 2915 /* Here, it isn't Perl's charname handler. We can't rely on a 2916 * user-supplied handler to validate the input name. For non-ut8 input, 2917 * look to see that the first character is legal. Then loop through the 2918 * rest checking that each is a continuation */ 2919 2920 /* This code makes the reasonable assumption that the only Latin1-range 2921 * characters that begin a character name alias are alphabetic, otherwise 2922 * would have to create a isCHARNAME_BEGIN macro */ 2923 2924 if (! is_utf8) { 2925 if (! isALPHAU(*s)) { 2926 goto bad_charname; 2927 } 2928 s++; 2929 while (s < e) { 2930 if (! isCHARNAME_CONT(*s)) { 2931 goto bad_charname; 2932 } 2933 if (*s == ' ' && *(s-1) == ' ') { 2934 goto multi_spaces; 2935 } 2936 s++; 2937 } 2938 } 2939 else { 2940 /* Similarly for utf8. For invariants can check directly; for other 2941 * Latin1, can calculate their code point and check; otherwise use an 2942 * inversion list */ 2943 if (UTF8_IS_INVARIANT(*s)) { 2944 if (! isALPHAU(*s)) { 2945 goto bad_charname; 2946 } 2947 s++; 2948 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2949 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) { 2950 goto bad_charname; 2951 } 2952 s += 2; 2953 } 2954 else { 2955 if (! _invlist_contains_cp(PL_utf8_charname_begin, 2956 utf8_to_uvchr_buf((U8 *) s, 2957 (U8 *) e, 2958 NULL))) 2959 { 2960 goto bad_charname; 2961 } 2962 s += UTF8SKIP(s); 2963 } 2964 2965 while (s < e) { 2966 if (UTF8_IS_INVARIANT(*s)) { 2967 if (! isCHARNAME_CONT(*s)) { 2968 goto bad_charname; 2969 } 2970 if (*s == ' ' && *(s-1) == ' ') { 2971 goto multi_spaces; 2972 } 2973 s++; 2974 } 2975 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2976 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) 2977 { 2978 goto bad_charname; 2979 } 2980 s += 2; 2981 } 2982 else { 2983 if (! _invlist_contains_cp(PL_utf8_charname_continue, 2984 utf8_to_uvchr_buf((U8 *) s, 2985 (U8 *) e, 2986 NULL))) 2987 { 2988 goto bad_charname; 2989 } 2990 s += UTF8SKIP(s); 2991 } 2992 } 2993 } 2994 if (*(s-1) == ' ') { 2995 /* diag_listed_as: charnames alias definitions may not contain 2996 trailing white-space; marked by <-- HERE in %s 2997 */ 2998 *error_msg = Perl_form(aTHX_ 2999 "charnames alias definitions may not contain trailing " 3000 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s", 3001 (int)(s - context + 1), context, 3002 (int)(e - s + 1), s + 1); 3003 return NULL; 3004 } 3005 3006 if (SvUTF8(res)) { /* Don't accept malformed charname value */ 3007 const U8* first_bad_char_loc; 3008 STRLEN len; 3009 const char* const str = SvPV_const(res, len); 3010 if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len, 3011 &first_bad_char_loc))) 3012 { 3013 _force_out_malformed_utf8_message(first_bad_char_loc, 3014 (U8 *) PL_parser->bufend, 3015 0, 3016 0 /* 0 means don't die */ ); 3017 /* diag_listed_as: Malformed UTF-8 returned by \N{%s} 3018 immediately after '%s' */ 3019 *error_msg = Perl_form(aTHX_ 3020 "Malformed UTF-8 returned by %.*s immediately after '%.*s'", 3021 (int) context_len, context, 3022 (int) ((char *) first_bad_char_loc - str), str); 3023 return NULL; 3024 } 3025 } 3026 3027 return res; 3028 3029 bad_charname: { 3030 3031 /* The final %.*s makes sure that should the trailing NUL be missing 3032 * that this print won't run off the end of the string */ 3033 /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE 3034 in \N{%s} */ 3035 *error_msg = Perl_form(aTHX_ 3036 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s", 3037 (int)(s - context + 1), context, 3038 (int)(e - s + 1), s + 1); 3039 return NULL; 3040 } 3041 3042 multi_spaces: 3043 /* diag_listed_as: charnames alias definitions may not contain a 3044 sequence of multiple spaces; marked by <-- HERE 3045 in %s */ 3046 *error_msg = Perl_form(aTHX_ 3047 "charnames alias definitions may not contain a sequence of " 3048 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s", 3049 (int)(s - context + 1), context, 3050 (int)(e - s + 1), s + 1); 3051 return NULL; 3052 } 3053 3054 /* 3055 scan_const 3056 3057 Extracts the next constant part of a pattern, double-quoted string, 3058 or transliteration. This is terrifying code. 3059 3060 For example, in parsing the double-quoted string "ab\x63$d", it would 3061 stop at the '$' and return an OP_CONST containing 'abc'. 3062 3063 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's 3064 processing a pattern (PL_lex_inpat is true), a transliteration 3065 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string. 3066 3067 Returns a pointer to the character scanned up to. If this is 3068 advanced from the start pointer supplied (i.e. if anything was 3069 successfully parsed), will leave an OP_CONST for the substring scanned 3070 in pl_yylval. Caller must intuit reason for not parsing further 3071 by looking at the next characters herself. 3072 3073 In patterns: 3074 expand: 3075 \N{FOO} => \N{U+hex_for_character_FOO} 3076 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...}) 3077 3078 pass through: 3079 all other \-char, including \N and \N{ apart from \N{ABC} 3080 3081 stops on: 3082 @ and $ where it appears to be a var, but not for $ as tail anchor 3083 \l \L \u \U \Q \E 3084 (?{ or (??{ or (*{ 3085 3086 In transliterations: 3087 characters are VERY literal, except for - not at the start or end 3088 of the string, which indicates a range. However some backslash sequences 3089 are recognized: \r, \n, and the like 3090 \007 \o{}, \x{}, \N{} 3091 If all elements in the transliteration are below 256, 3092 scan_const expands the range to the full set of intermediate 3093 characters. If the range is in utf8, the hyphen is replaced with 3094 a certain range mark which will be handled by pmtrans() in op.c. 3095 3096 In double-quoted strings: 3097 backslashes: 3098 all those recognized in transliterations 3099 deprecated backrefs: \1 (in substitution replacements) 3100 case and quoting: \U \Q \E 3101 stops on @ and $ 3102 3103 scan_const does *not* construct ops to handle interpolated strings. 3104 It stops processing as soon as it finds an embedded $ or @ variable 3105 and leaves it to the caller to work out what's going on. 3106 3107 embedded arrays (whether in pattern or not) could be: 3108 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-. 3109 3110 $ in double-quoted strings must be the symbol of an embedded scalar. 3111 3112 $ in pattern could be $foo or could be tail anchor. Assumption: 3113 it's a tail anchor if $ is the last thing in the string, or if it's 3114 followed by one of "()| \r\n\t" 3115 3116 \1 (backreferences) are turned into $1 in substitutions 3117 3118 The structure of the code is 3119 while (there's a character to process) { 3120 handle transliteration ranges 3121 skip regexp comments /(?#comment)/ and codes /(?{code})/ ((*{code})/ 3122 skip #-initiated comments in //x patterns 3123 check for embedded arrays 3124 check for embedded scalars 3125 if (backslash) { 3126 deprecate \1 in substitution replacements 3127 handle string-changing backslashes \l \U \Q \E, etc. 3128 switch (what was escaped) { 3129 handle \- in a transliteration (becomes a literal -) 3130 if a pattern and not \N{, go treat as regular character 3131 handle \132 (octal characters) 3132 handle \x15 and \x{1234} (hex characters) 3133 handle \N{name} (named characters, also \N{3,5} in a pattern) 3134 handle \cV (control characters) 3135 handle printf-style backslashes (\f, \r, \n, etc) 3136 } (end switch) 3137 continue 3138 } (end if backslash) 3139 handle regular character 3140 } (end while character to read) 3141 3142 */ 3143 3144 STATIC char * 3145 S_scan_const(pTHX_ char *start) 3146 { 3147 const char * const send = PL_bufend;/* end of the constant */ 3148 SV *sv = newSV(send - start); /* sv for the constant. See note below 3149 on sizing. */ 3150 char *s = start; /* start of the constant */ 3151 char *d = SvPVX(sv); /* destination for copies */ 3152 bool dorange = FALSE; /* are we in a translit range? */ 3153 bool didrange = FALSE; /* did we just finish a range? */ 3154 bool in_charclass = FALSE; /* within /[...]/ */ 3155 const bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be 3156 UTF8? But, this can show as true 3157 when the source isn't utf8, as for 3158 example when it is entirely composed 3159 of hex constants */ 3160 bool d_is_utf8 = FALSE; /* Output constant is UTF8 */ 3161 STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the 3162 number of characters found so far 3163 that will expand (into 2 bytes) 3164 should we have to convert to 3165 UTF-8) */ 3166 SV *res; /* result from charnames */ 3167 STRLEN offset_to_max = 0; /* The offset in the output to where the range 3168 high-end character is temporarily placed */ 3169 3170 /* Does something require special handling in tr/// ? This avoids extra 3171 * work in a less likely case. As such, khw didn't feel it was worth 3172 * adding any branches to the more mainline code to handle this, which 3173 * means that this doesn't get set in some circumstances when things like 3174 * \x{100} get expanded out. As a result there needs to be extra testing 3175 * done in the tr code */ 3176 bool has_above_latin1 = FALSE; 3177 3178 /* Note on sizing: The scanned constant is placed into sv, which is 3179 * initialized by newSV() assuming one byte of output for every byte of 3180 * input. This routine expects newSV() to allocate an extra byte for a 3181 * trailing NUL, which this routine will append if it gets to the end of 3182 * the input. There may be more bytes of input than output (eg., \N{LATIN 3183 * CAPITAL LETTER A}), or more output than input if the constant ends up 3184 * recoded to utf8, but each time a construct is found that might increase 3185 * the needed size, SvGROW() is called. Its size parameter each time is 3186 * based on the best guess estimate at the time, namely the length used so 3187 * far, plus the length the current construct will occupy, plus room for 3188 * the trailing NUL, plus one byte for every input byte still unscanned */ 3189 3190 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses 3191 before set */ 3192 #ifdef EBCDIC 3193 int backslash_N = 0; /* ? was the character from \N{} */ 3194 int non_portable_endpoint = 0; /* ? In a range is an endpoint 3195 platform-specific like \x65 */ 3196 #endif 3197 3198 PERL_ARGS_ASSERT_SCAN_CONST; 3199 3200 assert(PL_lex_inwhat != OP_TRANSR); 3201 3202 /* Protect sv from errors and fatal warnings. */ 3203 ENTER_with_name("scan_const"); 3204 SAVEFREESV(sv); 3205 3206 /* A bunch of code in the loop below assumes that if s[n] exists and is not 3207 * NUL, then s[n+1] exists. This assertion makes sure that assumption is 3208 * valid */ 3209 assert(*send == '\0'); 3210 3211 while (s < send 3212 || dorange /* Handle tr/// range at right edge of input */ 3213 ) { 3214 3215 /* get transliterations out of the way (they're most literal) */ 3216 if (PL_lex_inwhat == OP_TRANS) { 3217 3218 /* But there isn't any special handling necessary unless there is a 3219 * range, so for most cases we just drop down and handle the value 3220 * as any other. There are two exceptions. 3221 * 3222 * 1. A hyphen indicates that we are actually going to have a 3223 * range. In this case, skip the '-', set a flag, then drop 3224 * down to handle what should be the end range value. 3225 * 2. After we've handled that value, the next time through, that 3226 * flag is set and we fix up the range. 3227 * 3228 * Ranges entirely within Latin1 are expanded out entirely, in 3229 * order to make the transliteration a simple table look-up. 3230 * Ranges that extend above Latin1 have to be done differently, so 3231 * there is no advantage to expanding them here, so they are 3232 * stored here as Min, RANGE_INDICATOR, Max. 'RANGE_INDICATOR' is 3233 * a byte that can't occur in legal UTF-8, and hence can signify a 3234 * hyphen without any possible ambiguity. On EBCDIC machines, if 3235 * the range is expressed as Unicode, the Latin1 portion is 3236 * expanded out even if the range extends above Latin1. This is 3237 * because each code point in it has to be processed here 3238 * individually to get its native translation */ 3239 3240 if (! dorange) { 3241 3242 /* Here, we don't think we're in a range. If the new character 3243 * is not a hyphen; or if it is a hyphen, but it's too close to 3244 * either edge to indicate a range, or if we haven't output any 3245 * characters yet then it's a regular character. */ 3246 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv)) 3247 { 3248 3249 /* A regular character. Process like any other, but first 3250 * clear any flags */ 3251 didrange = FALSE; 3252 dorange = FALSE; 3253 #ifdef EBCDIC 3254 non_portable_endpoint = 0; 3255 backslash_N = 0; 3256 #endif 3257 /* The tests here for being above Latin1 and similar ones 3258 * in the following 'else' suffice to find all such 3259 * occurences in the constant, except those added by a 3260 * backslash escape sequence, like \x{100}. Mostly, those 3261 * set 'has_above_latin1' as appropriate */ 3262 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) { 3263 has_above_latin1 = TRUE; 3264 } 3265 3266 /* Drops down to generic code to process current byte */ 3267 } 3268 else { /* Is a '-' in the context where it means a range */ 3269 if (didrange) { /* Something like y/A-C-Z// */ 3270 Perl_croak(aTHX_ "Ambiguous range in transliteration" 3271 " operator"); 3272 } 3273 3274 dorange = TRUE; 3275 3276 s++; /* Skip past the hyphen */ 3277 3278 /* d now points to where the end-range character will be 3279 * placed. Drop down to get that character. We'll finish 3280 * processing the range the next time through the loop */ 3281 3282 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) { 3283 has_above_latin1 = TRUE; 3284 } 3285 3286 /* Drops down to generic code to process current byte */ 3287 } 3288 } /* End of not a range */ 3289 else { 3290 /* Here we have parsed a range. Now must handle it. At this 3291 * point: 3292 * 'sv' is a SV* that contains the output string we are 3293 * constructing. The final two characters in that string 3294 * are the range start and range end, in order. 3295 * 'd' points to just beyond the range end in the 'sv' string, 3296 * where we would next place something 3297 */ 3298 char * max_ptr; 3299 char * min_ptr; 3300 IV range_min; 3301 IV range_max; /* last character in range */ 3302 STRLEN grow; 3303 Size_t offset_to_min = 0; 3304 Size_t extras = 0; 3305 #ifdef EBCDIC 3306 bool convert_unicode; 3307 IV real_range_max = 0; 3308 #endif 3309 /* Get the code point values of the range ends. */ 3310 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1; 3311 offset_to_max = max_ptr - SvPVX_const(sv); 3312 if (d_is_utf8) { 3313 /* We know the utf8 is valid, because we just constructed 3314 * it ourselves in previous loop iterations */ 3315 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1); 3316 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL); 3317 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL); 3318 3319 /* This compensates for not all code setting 3320 * 'has_above_latin1', so that we don't skip stuff that 3321 * should be executed */ 3322 if (range_max > 255) { 3323 has_above_latin1 = TRUE; 3324 } 3325 } 3326 else { 3327 min_ptr = max_ptr - 1; 3328 range_min = * (U8*) min_ptr; 3329 range_max = * (U8*) max_ptr; 3330 } 3331 3332 /* If the range is just a single code point, like tr/a-a/.../, 3333 * that code point is already in the output, twice. We can 3334 * just back up over the second instance and avoid all the rest 3335 * of the work. But if it is a variant character, it's been 3336 * counted twice, so decrement. (This unlikely scenario is 3337 * special cased, like the one for a range of 2 code points 3338 * below, only because the main-line code below needs a range 3339 * of 3 or more to work without special casing. Might as well 3340 * get it out of the way now.) */ 3341 if (UNLIKELY(range_max == range_min)) { 3342 d = max_ptr; 3343 if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) { 3344 utf8_variant_count--; 3345 } 3346 goto range_done; 3347 } 3348 3349 #ifdef EBCDIC 3350 /* On EBCDIC platforms, we may have to deal with portable 3351 * ranges. These happen if at least one range endpoint is a 3352 * Unicode value (\N{...}), or if the range is a subset of 3353 * [A-Z] or [a-z], and both ends are literal characters, 3354 * like 'A', and not like \x{C1} */ 3355 convert_unicode = 3356 cBOOL(backslash_N) /* \N{} forces Unicode, 3357 hence portable range */ 3358 || ( ! non_portable_endpoint 3359 && (( isLOWER_A(range_min) && isLOWER_A(range_max)) 3360 || (isUPPER_A(range_min) && isUPPER_A(range_max)))); 3361 if (convert_unicode) { 3362 3363 /* Special handling is needed for these portable ranges. 3364 * They are defined to be in Unicode terms, which includes 3365 * all the Unicode code points between the end points. 3366 * Convert to Unicode to get the Unicode range. Later we 3367 * will convert each code point in the range back to 3368 * native. */ 3369 range_min = NATIVE_TO_UNI(range_min); 3370 range_max = NATIVE_TO_UNI(range_max); 3371 } 3372 #endif 3373 3374 if (range_min > range_max) { 3375 #ifdef EBCDIC 3376 if (convert_unicode) { 3377 /* Need to convert back to native for meaningful 3378 * messages for this platform */ 3379 range_min = UNI_TO_NATIVE(range_min); 3380 range_max = UNI_TO_NATIVE(range_max); 3381 } 3382 #endif 3383 /* Use the characters themselves for the error message if 3384 * ASCII printables; otherwise some visible representation 3385 * of them */ 3386 if (isPRINT_A(range_min) && isPRINT_A(range_max)) { 3387 Perl_croak(aTHX_ 3388 "Invalid range \"%c-%c\" in transliteration operator", 3389 (char)range_min, (char)range_max); 3390 } 3391 #ifdef EBCDIC 3392 else if (convert_unicode) { 3393 /* diag_listed_as: Invalid range "%s" in transliteration operator */ 3394 Perl_croak(aTHX_ 3395 "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04" 3396 UVXf "}\" in transliteration operator", 3397 range_min, range_max); 3398 } 3399 #endif 3400 else { 3401 /* diag_listed_as: Invalid range "%s" in transliteration operator */ 3402 Perl_croak(aTHX_ 3403 "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\"" 3404 " in transliteration operator", 3405 range_min, range_max); 3406 } 3407 } 3408 3409 /* If the range is exactly two code points long, they are 3410 * already both in the output */ 3411 if (UNLIKELY(range_min + 1 == range_max)) { 3412 goto range_done; 3413 } 3414 3415 /* Here the range contains at least 3 code points */ 3416 3417 if (d_is_utf8) { 3418 3419 /* If everything in the transliteration is below 256, we 3420 * can avoid special handling later. A translation table 3421 * for each of those bytes is created by op.c. So we 3422 * expand out all ranges to their constituent code points. 3423 * But if we've encountered something above 255, the 3424 * expanding won't help, so skip doing that. But if it's 3425 * EBCDIC, we may have to look at each character below 256 3426 * if we have to convert to/from Unicode values */ 3427 if ( has_above_latin1 3428 #ifdef EBCDIC 3429 && (range_min > 255 || ! convert_unicode) 3430 #endif 3431 ) { 3432 const STRLEN off = d - SvPVX(sv); 3433 const STRLEN extra = 1 + (send - s) + 1; 3434 char *e; 3435 3436 /* Move the high character one byte to the right; then 3437 * insert between it and the range begin, an illegal 3438 * byte which serves to indicate this is a range (using 3439 * a '-' would be ambiguous). */ 3440 3441 if (off + extra > SvLEN(sv)) { 3442 d = off + SvGROW(sv, off + extra); 3443 max_ptr = d - off + offset_to_max; 3444 } 3445 3446 e = d++; 3447 while (e-- > max_ptr) { 3448 *(e + 1) = *e; 3449 } 3450 *(e + 1) = (char) RANGE_INDICATOR; 3451 goto range_done; 3452 } 3453 3454 /* Here, we're going to expand out the range. For EBCDIC 3455 * the range can extend above 255 (not so in ASCII), so 3456 * for EBCDIC, split it into the parts above and below 3457 * 255/256 */ 3458 #ifdef EBCDIC 3459 if (range_max > 255) { 3460 real_range_max = range_max; 3461 range_max = 255; 3462 } 3463 #endif 3464 } 3465 3466 /* Here we need to expand out the string to contain each 3467 * character in the range. Grow the output to handle this. 3468 * For non-UTF8, we need a byte for each code point in the 3469 * range, minus the three that we've already allocated for: the 3470 * hyphen, the min, and the max. For UTF-8, we need this 3471 * plus an extra byte for each code point that occupies two 3472 * bytes (is variant) when in UTF-8 (except we've already 3473 * allocated for the end points, including if they are 3474 * variants). For ASCII platforms and Unicode ranges on EBCDIC 3475 * platforms, it's easy to calculate a precise number. To 3476 * start, we count the variants in the range, which we need 3477 * elsewhere in this function anyway. (For the case where it 3478 * isn't easy to calculate, 'extras' has been initialized to 0, 3479 * and the calculation is done in a loop further down.) */ 3480 #ifdef EBCDIC 3481 if (convert_unicode) 3482 #endif 3483 { 3484 /* This is executed unconditionally on ASCII, and for 3485 * Unicode ranges on EBCDIC. Under these conditions, all 3486 * code points above a certain value are variant; and none 3487 * under that value are. We just need to find out how much 3488 * of the range is above that value. We don't count the 3489 * end points here, as they will already have been counted 3490 * as they were parsed. */ 3491 if (range_min >= UTF_CONTINUATION_MARK) { 3492 3493 /* The whole range is made up of variants */ 3494 extras = (range_max - 1) - (range_min + 1) + 1; 3495 } 3496 else if (range_max >= UTF_CONTINUATION_MARK) { 3497 3498 /* Only the higher portion of the range is variants */ 3499 extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1; 3500 } 3501 3502 utf8_variant_count += extras; 3503 } 3504 3505 /* The base growth is the number of code points in the range, 3506 * not including the endpoints, which have already been sized 3507 * for (and output). We don't subtract for the hyphen, as it 3508 * has been parsed but not output, and the SvGROW below is 3509 * based only on what's been output plus what's left to parse. 3510 * */ 3511 grow = (range_max - 1) - (range_min + 1) + 1; 3512 3513 if (d_is_utf8) { 3514 #ifdef EBCDIC 3515 /* In some cases in EBCDIC, we haven't yet calculated a 3516 * precise amount needed for the UTF-8 variants. Just 3517 * assume the worst case, that everything will expand by a 3518 * byte */ 3519 if (! convert_unicode) { 3520 grow *= 2; 3521 } 3522 else 3523 #endif 3524 { 3525 /* Otherwise we know exactly how many variants there 3526 * are in the range. */ 3527 grow += extras; 3528 } 3529 } 3530 3531 /* Grow, but position the output to overwrite the range min end 3532 * point, because in some cases we overwrite that */ 3533 SvCUR_set(sv, d - SvPVX_const(sv)); 3534 offset_to_min = min_ptr - SvPVX_const(sv); 3535 3536 /* See Note on sizing above. */ 3537 d = offset_to_min + SvGROW(sv, SvCUR(sv) 3538 + (send - s) 3539 + grow 3540 + 1 /* Trailing NUL */ ); 3541 3542 /* Now, we can expand out the range. */ 3543 #ifdef EBCDIC 3544 if (convert_unicode) { 3545 SSize_t i; 3546 3547 /* Recall that the min and max are now in Unicode terms, so 3548 * we have to convert each character to its native 3549 * equivalent */ 3550 if (d_is_utf8) { 3551 for (i = range_min; i <= range_max; i++) { 3552 append_utf8_from_native_byte( 3553 LATIN1_TO_NATIVE((U8) i), 3554 (U8 **) &d); 3555 } 3556 } 3557 else { 3558 for (i = range_min; i <= range_max; i++) { 3559 *d++ = (char)LATIN1_TO_NATIVE((U8) i); 3560 } 3561 } 3562 } 3563 else 3564 #endif 3565 /* Always gets run for ASCII, and sometimes for EBCDIC. */ 3566 { 3567 /* Here, no conversions are necessary, which means that the 3568 * first character in the range is already in 'd' and 3569 * valid, so we can skip overwriting it */ 3570 if (d_is_utf8) { 3571 SSize_t i; 3572 d += UTF8SKIP(d); 3573 for (i = range_min + 1; i <= range_max; i++) { 3574 append_utf8_from_native_byte((U8) i, (U8 **) &d); 3575 } 3576 } 3577 else { 3578 SSize_t i; 3579 d++; 3580 assert(range_min + 1 <= range_max); 3581 for (i = range_min + 1; i < range_max; i++) { 3582 #ifdef EBCDIC 3583 /* In this case on EBCDIC, we haven't calculated 3584 * the variants. Do it here, as we go along */ 3585 if (! UVCHR_IS_INVARIANT(i)) { 3586 utf8_variant_count++; 3587 } 3588 #endif 3589 *d++ = (char)i; 3590 } 3591 3592 /* The range_max is done outside the loop so as to 3593 * avoid having to special case not incrementing 3594 * 'utf8_variant_count' on EBCDIC (it's already been 3595 * counted when originally parsed) */ 3596 *d++ = (char) range_max; 3597 } 3598 } 3599 3600 #ifdef EBCDIC 3601 /* If the original range extended above 255, add in that 3602 * portion. */ 3603 if (real_range_max) { 3604 *d++ = (char) UTF8_TWO_BYTE_HI(0x100); 3605 *d++ = (char) UTF8_TWO_BYTE_LO(0x100); 3606 if (real_range_max > 0x100) { 3607 if (real_range_max > 0x101) { 3608 *d++ = (char) RANGE_INDICATOR; 3609 } 3610 d = (char*)uvchr_to_utf8((U8*)d, real_range_max); 3611 } 3612 } 3613 #endif 3614 3615 range_done: 3616 /* mark the range as done, and continue */ 3617 didrange = TRUE; 3618 dorange = FALSE; 3619 #ifdef EBCDIC 3620 non_portable_endpoint = 0; 3621 backslash_N = 0; 3622 #endif 3623 continue; 3624 } /* End of is a range */ 3625 } /* End of transliteration. Joins main code after these else's */ 3626 else if (*s == '[' && PL_lex_inpat && !in_charclass) { 3627 char *s1 = s-1; 3628 int esc = 0; 3629 while (s1 >= start && *s1-- == '\\') 3630 esc = !esc; 3631 if (!esc) 3632 in_charclass = TRUE; 3633 } 3634 else if (*s == ']' && PL_lex_inpat && in_charclass) { 3635 char *s1 = s-1; 3636 int esc = 0; 3637 while (s1 >= start && *s1-- == '\\') 3638 esc = !esc; 3639 if (!esc) 3640 in_charclass = FALSE; 3641 } 3642 /* skip for regexp comments /(?#comment)/, except for the last 3643 * char, which will be done separately. Stop on (?{..}) and 3644 * friends (??{ ... }) or (*{ ... }) */ 3645 else if (*s == '(' && PL_lex_inpat && (s[1] == '?' || s[1] == '*') && !in_charclass) { 3646 if (s[1] == '?' && s[2] == '#') { 3647 if (s_is_utf8) { 3648 PERL_UINT_FAST8_T len = UTF8SKIP(s); 3649 3650 while (s + len < send && *s != ')') { 3651 Copy(s, d, len, U8); 3652 d += len; 3653 s += len; 3654 len = UTF8_SAFE_SKIP(s, send); 3655 } 3656 } 3657 else while (s+1 < send && *s != ')') { 3658 *d++ = *s++; 3659 } 3660 } 3661 else 3662 if (!PL_lex_casemods && 3663 /* The following should match regcomp.c */ 3664 ((s[1] == '?' && (s[2] == '{' /* (?{ ... }) */ 3665 || (s[2] == '?' && s[3] == '{'))) || /* (??{ ... }) */ 3666 (s[1] == '*' && (s[2] == '{' ))) /* (*{ ... }) */ 3667 ){ 3668 break; 3669 } 3670 } 3671 /* likewise skip #-initiated comments in //x patterns */ 3672 else if (*s == '#' 3673 && PL_lex_inpat 3674 && !in_charclass 3675 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) 3676 { 3677 while (s < send && *s != '\n') 3678 *d++ = *s++; 3679 } 3680 /* no further processing of single-quoted regex */ 3681 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') 3682 goto default_action; 3683 3684 /* check for embedded arrays 3685 * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) 3686 */ 3687 else if (*s == '@' && s[1]) { 3688 if (UTF 3689 ? isIDFIRST_utf8_safe(s+1, send) 3690 : isWORDCHAR_A(s[1])) 3691 { 3692 break; 3693 } 3694 if (memCHRs(":'{$", s[1])) 3695 break; 3696 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-')) 3697 break; /* in regexp, neither @+ nor @- are interpolated */ 3698 } 3699 /* check for embedded scalars. only stop if we're sure it's a 3700 * variable. */ 3701 else if (*s == '$') { 3702 if (!PL_lex_inpat) /* not a regexp, so $ must be var */ 3703 break; 3704 if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) { 3705 if (s[1] == '\\') { 3706 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 3707 "Possible unintended interpolation of $\\ in regex"); 3708 } 3709 break; /* in regexp, $ might be tail anchor */ 3710 } 3711 } 3712 3713 /* End of else if chain - OP_TRANS rejoin rest */ 3714 3715 if (UNLIKELY(s >= send)) { 3716 assert(s == send); 3717 break; 3718 } 3719 3720 /* backslashes */ 3721 if (*s == '\\' && s+1 < send) { 3722 char* bslash = s; /* point to beginning \ */ 3723 char* rbrace; /* point to ending '}' */ 3724 char* e; /* 1 past the meat (non-blanks) before the 3725 brace */ 3726 s++; 3727 3728 /* warn on \1 - \9 in substitution replacements, but note that \11 3729 * is an octal; and \19 is \1 followed by '9' */ 3730 if (PL_lex_inwhat == OP_SUBST 3731 && !PL_lex_inpat 3732 && isDIGIT(*s) 3733 && *s != '0' 3734 && !isDIGIT(s[1])) 3735 { 3736 /* diag_listed_as: \%d better written as $%d */ 3737 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); 3738 s = bslash; 3739 *s = '$'; 3740 break; 3741 } 3742 3743 /* string-change backslash escapes */ 3744 if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) { 3745 s = bslash; 3746 break; 3747 } 3748 /* In a pattern, process \N, but skip any other backslash escapes. 3749 * This is because we don't want to translate an escape sequence 3750 * into a meta symbol and have the regex compiler use the meta 3751 * symbol meaning, e.g. \x{2E} would be confused with a dot. But 3752 * in spite of this, we do have to process \N here while the proper 3753 * charnames handler is in scope. See bugs #56444 and #62056. 3754 * 3755 * There is a complication because \N in a pattern may also stand 3756 * for 'match a non-nl', and not mean a charname, in which case its 3757 * processing should be deferred to the regex compiler. To be a 3758 * charname it must be followed immediately by a '{', and not look 3759 * like \N followed by a curly quantifier, i.e., not something like 3760 * \N{3,}. regcurly returns a boolean indicating if it is a legal 3761 * quantifier */ 3762 else if (PL_lex_inpat 3763 && (*s != 'N' 3764 || s[1] != '{' 3765 || regcurly(s + 1, send, NULL))) 3766 { 3767 *d++ = '\\'; 3768 goto default_action; 3769 } 3770 3771 switch (*s) { 3772 default: 3773 { 3774 if ((isALPHANUMERIC(*s))) 3775 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 3776 "Unrecognized escape \\%c passed through", 3777 *s); 3778 /* default action is to copy the quoted character */ 3779 goto default_action; 3780 } 3781 3782 /* eg. \132 indicates the octal constant 0132 */ 3783 case '0': case '1': case '2': case '3': 3784 case '4': case '5': case '6': case '7': 3785 { 3786 I32 flags = PERL_SCAN_SILENT_ILLDIGIT 3787 | PERL_SCAN_NOTIFY_ILLDIGIT; 3788 STRLEN len = 3; 3789 uv = grok_oct(s, &len, &flags, NULL); 3790 s += len; 3791 if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT) 3792 && s < send 3793 && isDIGIT(*s) /* like \08, \178 */ 3794 && ckWARN(WARN_MISC)) 3795 { 3796 Perl_warner(aTHX_ packWARN(WARN_MISC), "%s", 3797 form_alien_digit_msg(8, len, s, send, UTF, FALSE)); 3798 } 3799 } 3800 goto NUM_ESCAPE_INSERT; 3801 3802 /* eg. \o{24} indicates the octal constant \024 */ 3803 case 'o': 3804 { 3805 const char* error; 3806 3807 if (! grok_bslash_o(&s, send, 3808 &uv, &error, 3809 NULL, 3810 FALSE, /* Not strict */ 3811 FALSE, /* No illegal cp's */ 3812 UTF)) 3813 { 3814 yyerror(error); 3815 uv = 0; /* drop through to ensure range ends are set */ 3816 } 3817 goto NUM_ESCAPE_INSERT; 3818 } 3819 3820 /* eg. \x24 indicates the hex constant 0x24 */ 3821 case 'x': 3822 { 3823 const char* error; 3824 3825 if (! grok_bslash_x(&s, send, 3826 &uv, &error, 3827 NULL, 3828 FALSE, /* Not strict */ 3829 FALSE, /* No illegal cp's */ 3830 UTF)) 3831 { 3832 yyerror(error); 3833 uv = 0; /* drop through to ensure range ends are set */ 3834 } 3835 } 3836 3837 NUM_ESCAPE_INSERT: 3838 /* Insert oct or hex escaped character. */ 3839 3840 /* Here uv is the ordinal of the next character being added */ 3841 if (UVCHR_IS_INVARIANT(uv)) { 3842 *d++ = (char) uv; 3843 } 3844 else { 3845 if (!d_is_utf8 && uv > 255) { 3846 3847 /* Here, 'uv' won't fit unless we convert to UTF-8. 3848 * If we've only seen invariants so far, all we have to 3849 * do is turn on the flag */ 3850 if (utf8_variant_count == 0) { 3851 SvUTF8_on(sv); 3852 } 3853 else { 3854 SvCUR_set(sv, d - SvPVX_const(sv)); 3855 SvPOK_on(sv); 3856 *d = '\0'; 3857 3858 sv_utf8_upgrade_flags_grow( 3859 sv, 3860 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3861 3862 /* Since we're having to grow here, 3863 * make sure we have enough room for 3864 * this escape and a NUL, so the 3865 * code immediately below won't have 3866 * to actually grow again */ 3867 UVCHR_SKIP(uv) 3868 + (STRLEN)(send - s) + 1); 3869 d = SvPVX(sv) + SvCUR(sv); 3870 } 3871 3872 has_above_latin1 = TRUE; 3873 d_is_utf8 = TRUE; 3874 } 3875 3876 if (! d_is_utf8) { 3877 *d++ = (char)uv; 3878 utf8_variant_count++; 3879 } 3880 else { 3881 /* Usually, there will already be enough room in 'sv' 3882 * since such escapes are likely longer than any UTF-8 3883 * sequence they can end up as. This isn't the case on 3884 * EBCDIC where \x{40000000} contains 12 bytes, and the 3885 * UTF-8 for it contains 14. And, we have to allow for 3886 * a trailing NUL. It probably can't happen on ASCII 3887 * platforms, but be safe. See Note on sizing above. */ 3888 const STRLEN needed = d - SvPVX(sv) 3889 + UVCHR_SKIP(uv) 3890 + (send - s) 3891 + 1; 3892 if (UNLIKELY(needed > SvLEN(sv))) { 3893 SvCUR_set(sv, d - SvPVX_const(sv)); 3894 d = SvCUR(sv) + SvGROW(sv, needed); 3895 } 3896 3897 d = (char*) uvchr_to_utf8_flags((U8*)d, uv, 3898 (ckWARN(WARN_PORTABLE)) 3899 ? UNICODE_WARN_PERL_EXTENDED 3900 : 0); 3901 } 3902 } 3903 #ifdef EBCDIC 3904 non_portable_endpoint++; 3905 #endif 3906 continue; 3907 3908 case 'N': 3909 /* In a non-pattern \N must be like \N{U+0041}, or it can be a 3910 * named character, like \N{LATIN SMALL LETTER A}, or a named 3911 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND 3912 * GRAVE} (except y/// can't handle the latter, croaking). For 3913 * convenience all three forms are referred to as "named 3914 * characters" below. 3915 * 3916 * For patterns, \N also can mean to match a non-newline. Code 3917 * before this 'switch' statement should already have handled 3918 * this situation, and hence this code only has to deal with 3919 * the named character cases. 3920 * 3921 * For non-patterns, the named characters are converted to 3922 * their string equivalents. In patterns, named characters are 3923 * not converted to their ultimate forms for the same reasons 3924 * that other escapes aren't (mainly that the ultimate 3925 * character could be considered a meta-symbol by the regex 3926 * compiler). Instead, they are converted to the \N{U+...} 3927 * form to get the value from the charnames that is in effect 3928 * right now, while preserving the fact that it was a named 3929 * character, so that the regex compiler knows this. 3930 * 3931 * The structure of this section of code (besides checking for 3932 * errors and upgrading to utf8) is: 3933 * If the named character is of the form \N{U+...}, pass it 3934 * through if a pattern; otherwise convert the code point 3935 * to utf8 3936 * Otherwise must be some \N{NAME}: convert to 3937 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8 3938 * 3939 * Transliteration is an exception. The conversion to utf8 is 3940 * only done if the code point requires it to be representable. 3941 * 3942 * Here, 's' points to the 'N'; the test below is guaranteed to 3943 * succeed if we are being called on a pattern, as we already 3944 * know from a test above that the next character is a '{'. A 3945 * non-pattern \N must mean 'named character', which requires 3946 * braces */ 3947 s++; 3948 if (*s != '{') { 3949 yyerror("Missing braces on \\N{}"); 3950 *d++ = '\0'; 3951 continue; 3952 } 3953 s++; 3954 3955 /* If there is no matching '}', it is an error. */ 3956 if (! (rbrace = (char *) memchr(s, '}', send - s))) { 3957 if (! PL_lex_inpat) { 3958 yyerror("Missing right brace on \\N{}"); 3959 } else { 3960 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N"); 3961 } 3962 yyquit(); /* Have exhausted the input. */ 3963 } 3964 3965 /* Here it looks like a named character */ 3966 while (s < rbrace && isBLANK(*s)) { 3967 s++; 3968 } 3969 3970 e = rbrace; 3971 while (s < e && isBLANK(*(e - 1))) { 3972 e--; 3973 } 3974 3975 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */ 3976 s += 2; /* Skip to next char after the 'U+' */ 3977 if (PL_lex_inpat) { 3978 3979 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */ 3980 /* Check the syntax. */ 3981 if (!isXDIGIT(*s)) { 3982 bad_NU: 3983 yyerror( 3984 "Invalid hexadecimal number in \\N{U+...}" 3985 ); 3986 s = rbrace + 1; 3987 *d++ = '\0'; 3988 continue; 3989 } 3990 while (++s < e) { 3991 if (isXDIGIT(*s)) 3992 continue; 3993 else if ((*s == '.' || *s == '_') 3994 && isXDIGIT(s[1])) 3995 continue; 3996 goto bad_NU; 3997 } 3998 3999 /* Pass everything through unchanged. 4000 * +1 is to include the '}' */ 4001 Copy(bslash, d, rbrace - bslash + 1, char); 4002 d += rbrace - bslash + 1; 4003 } 4004 else { /* Not a pattern: convert the hex to string */ 4005 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES 4006 | PERL_SCAN_SILENT_ILLDIGIT 4007 | PERL_SCAN_SILENT_OVERFLOW 4008 | PERL_SCAN_DISALLOW_PREFIX; 4009 STRLEN len = e - s; 4010 4011 uv = grok_hex(s, &len, &flags, NULL); 4012 if (len == 0 || (len != (STRLEN)(e - s))) 4013 goto bad_NU; 4014 4015 if ( uv > MAX_LEGAL_CP 4016 || (flags & PERL_SCAN_GREATER_THAN_UV_MAX)) 4017 { 4018 yyerror(form_cp_too_large_msg(16, s, len, 0)); 4019 uv = 0; /* drop through to ensure range ends are 4020 set */ 4021 } 4022 4023 /* For non-tr///, if the destination is not in utf8, 4024 * unconditionally recode it to be so. This is 4025 * because \N{} implies Unicode semantics, and scalars 4026 * have to be in utf8 to guarantee those semantics. 4027 * tr/// doesn't care about Unicode rules, so no need 4028 * there to upgrade to UTF-8 for small enough code 4029 * points */ 4030 if (! d_is_utf8 && ( uv > 0xFF 4031 || PL_lex_inwhat != OP_TRANS)) 4032 { 4033 /* See Note on sizing above. */ 4034 const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1; 4035 4036 SvCUR_set(sv, d - SvPVX_const(sv)); 4037 SvPOK_on(sv); 4038 *d = '\0'; 4039 4040 if (utf8_variant_count == 0) { 4041 SvUTF8_on(sv); 4042 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra); 4043 } 4044 else { 4045 sv_utf8_upgrade_flags_grow( 4046 sv, 4047 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 4048 extra); 4049 d = SvPVX(sv) + SvCUR(sv); 4050 } 4051 4052 d_is_utf8 = TRUE; 4053 has_above_latin1 = TRUE; 4054 } 4055 4056 /* Add the (Unicode) code point to the output. */ 4057 if (OFFUNI_IS_INVARIANT(uv)) { 4058 *d++ = (char) LATIN1_TO_NATIVE(uv); 4059 } 4060 else if (! d_is_utf8) { 4061 *d++ = (char) LATIN1_TO_NATIVE(uv); 4062 utf8_variant_count++; 4063 } 4064 else { 4065 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 4066 (ckWARN(WARN_PORTABLE)) 4067 ? UNICODE_WARN_PERL_EXTENDED 4068 : 0); 4069 } 4070 } 4071 } 4072 else /* Here is \N{NAME} but not \N{U+...}. */ 4073 if (! (res = get_and_check_backslash_N_name_wrapper(s, e))) 4074 { /* Failed. We should die eventually, but for now use a NUL 4075 to keep parsing */ 4076 *d++ = '\0'; 4077 } 4078 else { /* Successfully evaluated the name */ 4079 STRLEN len; 4080 const char *str = SvPV_const(res, len); 4081 if (PL_lex_inpat) { 4082 4083 if (! len) { /* The name resolved to an empty string */ 4084 const char empty_N[] = "\\N{_}"; 4085 Copy(empty_N, d, sizeof(empty_N) - 1, char); 4086 d += sizeof(empty_N) - 1; 4087 } 4088 else { 4089 /* In order to not lose information for the regex 4090 * compiler, pass the result in the specially made 4091 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are 4092 * the code points in hex of each character 4093 * returned by charnames */ 4094 4095 const char *str_end = str + len; 4096 const STRLEN off = d - SvPVX_const(sv); 4097 4098 if (! SvUTF8(res)) { 4099 /* For the non-UTF-8 case, we can determine the 4100 * exact length needed without having to parse 4101 * through the string. Each character takes up 4102 * 2 hex digits plus either a trailing dot or 4103 * the "}" */ 4104 const char initial_text[] = "\\N{U+"; 4105 const STRLEN initial_len = sizeof(initial_text) 4106 - 1; 4107 d = off + SvGROW(sv, off 4108 + 3 * len 4109 4110 /* +1 for trailing NUL */ 4111 + initial_len + 1 4112 4113 + (STRLEN)(send - rbrace)); 4114 Copy(initial_text, d, initial_len, char); 4115 d += initial_len; 4116 while (str < str_end) { 4117 char hex_string[4]; 4118 int len = 4119 my_snprintf(hex_string, 4120 sizeof(hex_string), 4121 "%02X.", 4122 4123 /* The regex compiler is 4124 * expecting Unicode, not 4125 * native */ 4126 NATIVE_TO_LATIN1(*str)); 4127 PERL_MY_SNPRINTF_POST_GUARD(len, 4128 sizeof(hex_string)); 4129 Copy(hex_string, d, 3, char); 4130 d += 3; 4131 str++; 4132 } 4133 d--; /* Below, we will overwrite the final 4134 dot with a right brace */ 4135 } 4136 else { 4137 STRLEN char_length; /* cur char's byte length */ 4138 4139 /* and the number of bytes after this is 4140 * translated into hex digits */ 4141 STRLEN output_length; 4142 4143 /* 2 hex per byte; 2 chars for '\N'; 2 chars 4144 * for max('U+', '.'); and 1 for NUL */ 4145 char hex_string[2 * UTF8_MAXBYTES + 5]; 4146 4147 /* Get the first character of the result. */ 4148 U32 uv = utf8n_to_uvchr((U8 *) str, 4149 len, 4150 &char_length, 4151 UTF8_ALLOW_ANYUV); 4152 /* Convert first code point to Unicode hex, 4153 * including the boiler plate before it. */ 4154 output_length = 4155 my_snprintf(hex_string, sizeof(hex_string), 4156 "\\N{U+%X", 4157 (unsigned int) NATIVE_TO_UNI(uv)); 4158 4159 /* Make sure there is enough space to hold it */ 4160 d = off + SvGROW(sv, off 4161 + output_length 4162 + (STRLEN)(send - rbrace) 4163 + 2); /* '}' + NUL */ 4164 /* And output it */ 4165 Copy(hex_string, d, output_length, char); 4166 d += output_length; 4167 4168 /* For each subsequent character, append dot and 4169 * its Unicode code point in hex */ 4170 while ((str += char_length) < str_end) { 4171 const STRLEN off = d - SvPVX_const(sv); 4172 U32 uv = utf8n_to_uvchr((U8 *) str, 4173 str_end - str, 4174 &char_length, 4175 UTF8_ALLOW_ANYUV); 4176 output_length = 4177 my_snprintf(hex_string, 4178 sizeof(hex_string), 4179 ".%X", 4180 (unsigned int) NATIVE_TO_UNI(uv)); 4181 4182 d = off + SvGROW(sv, off 4183 + output_length 4184 + (STRLEN)(send - rbrace) 4185 + 2); /* '}' + NUL */ 4186 Copy(hex_string, d, output_length, char); 4187 d += output_length; 4188 } 4189 } 4190 4191 *d++ = '}'; /* Done. Add the trailing brace */ 4192 } 4193 } 4194 else { /* Here, not in a pattern. Convert the name to a 4195 * string. */ 4196 4197 if (PL_lex_inwhat == OP_TRANS) { 4198 str = SvPV_const(res, len); 4199 if (len > ((SvUTF8(res)) 4200 ? UTF8SKIP(str) 4201 : 1U)) 4202 { 4203 yyerror(Perl_form(aTHX_ 4204 "%.*s must not be a named sequence" 4205 " in transliteration operator", 4206 /* +1 to include the "}" */ 4207 (int) (rbrace + 1 - start), start)); 4208 *d++ = '\0'; 4209 goto end_backslash_N; 4210 } 4211 4212 if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) { 4213 has_above_latin1 = TRUE; 4214 } 4215 4216 } 4217 else if (! SvUTF8(res)) { 4218 /* Make sure \N{} return is UTF-8. This is because 4219 * \N{} implies Unicode semantics, and scalars have 4220 * to be in utf8 to guarantee those semantics; but 4221 * not needed in tr/// */ 4222 sv_utf8_upgrade_flags(res, 0); 4223 str = SvPV_const(res, len); 4224 } 4225 4226 /* Upgrade destination to be utf8 if this new 4227 * component is */ 4228 if (! d_is_utf8 && SvUTF8(res)) { 4229 /* See Note on sizing above. */ 4230 const STRLEN extra = len + (send - s) + 1; 4231 4232 SvCUR_set(sv, d - SvPVX_const(sv)); 4233 SvPOK_on(sv); 4234 *d = '\0'; 4235 4236 if (utf8_variant_count == 0) { 4237 SvUTF8_on(sv); 4238 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra); 4239 } 4240 else { 4241 sv_utf8_upgrade_flags_grow(sv, 4242 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 4243 extra); 4244 d = SvPVX(sv) + SvCUR(sv); 4245 } 4246 d_is_utf8 = TRUE; 4247 } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */ 4248 4249 /* See Note on sizing above. (NOTE: SvCUR() is not 4250 * set correctly here). */ 4251 const STRLEN extra = len + (send - rbrace) + 1; 4252 const STRLEN off = d - SvPVX_const(sv); 4253 d = off + SvGROW(sv, off + extra); 4254 } 4255 Copy(str, d, len, char); 4256 d += len; 4257 } 4258 4259 SvREFCNT_dec(res); 4260 4261 } /* End \N{NAME} */ 4262 4263 end_backslash_N: 4264 #ifdef EBCDIC 4265 backslash_N++; /* \N{} is defined to be Unicode */ 4266 #endif 4267 s = rbrace + 1; /* Point to just after the '}' */ 4268 continue; 4269 4270 /* \c is a control character */ 4271 case 'c': 4272 s++; 4273 if (s < send) { 4274 const char * message; 4275 4276 if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) { 4277 yyerror(message); 4278 yyquit(); /* Have always immediately croaked on 4279 errors in this */ 4280 } 4281 d++; 4282 } 4283 else { 4284 yyerror("Missing control char name in \\c"); 4285 yyquit(); /* Are at end of input, no sense continuing */ 4286 } 4287 #ifdef EBCDIC 4288 non_portable_endpoint++; 4289 #endif 4290 break; 4291 4292 /* printf-style backslashes, formfeeds, newlines, etc */ 4293 case 'b': 4294 *d++ = '\b'; 4295 break; 4296 case 'n': 4297 *d++ = '\n'; 4298 break; 4299 case 'r': 4300 *d++ = '\r'; 4301 break; 4302 case 'f': 4303 *d++ = '\f'; 4304 break; 4305 case 't': 4306 *d++ = '\t'; 4307 break; 4308 case 'e': 4309 *d++ = ESC_NATIVE; 4310 break; 4311 case 'a': 4312 *d++ = '\a'; 4313 break; 4314 } /* end switch */ 4315 4316 s++; 4317 continue; 4318 } /* end if (backslash) */ 4319 4320 default_action: 4321 /* Just copy the input to the output, though we may have to convert 4322 * to/from UTF-8. 4323 * 4324 * If the input has the same representation in UTF-8 as not, it will be 4325 * a single byte, and we don't care about UTF8ness; just copy the byte */ 4326 if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) { 4327 *d++ = *s++; 4328 } 4329 else if (! s_is_utf8 && ! d_is_utf8) { 4330 /* If neither source nor output is UTF-8, is also a single byte, 4331 * just copy it; but this byte counts should we later have to 4332 * convert to UTF-8 */ 4333 *d++ = *s++; 4334 utf8_variant_count++; 4335 } 4336 else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */ 4337 const STRLEN len = UTF8SKIP(s); 4338 4339 /* We expect the source to have already been checked for 4340 * malformedness */ 4341 assert(isUTF8_CHAR((U8 *) s, (U8 *) send)); 4342 4343 Copy(s, d, len, U8); 4344 d += len; 4345 s += len; 4346 } 4347 else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */ 4348 STRLEN need = send - s + 1; /* See Note on sizing above. */ 4349 4350 SvCUR_set(sv, d - SvPVX_const(sv)); 4351 SvPOK_on(sv); 4352 *d = '\0'; 4353 4354 if (utf8_variant_count == 0) { 4355 SvUTF8_on(sv); 4356 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need); 4357 } 4358 else { 4359 sv_utf8_upgrade_flags_grow(sv, 4360 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 4361 need); 4362 d = SvPVX(sv) + SvCUR(sv); 4363 } 4364 d_is_utf8 = TRUE; 4365 goto default_action; /* Redo, having upgraded so both are UTF-8 */ 4366 } 4367 else { /* UTF8ness matters: convert this non-UTF8 source char to 4368 UTF-8 for output. It will occupy 2 bytes, but don't include 4369 the input byte since we haven't incremented 's' yet. See 4370 Note on sizing above. */ 4371 const STRLEN off = d - SvPVX(sv); 4372 const STRLEN extra = 2 + (send - s - 1) + 1; 4373 if (off + extra > SvLEN(sv)) { 4374 d = off + SvGROW(sv, off + extra); 4375 } 4376 *d++ = UTF8_EIGHT_BIT_HI(*s); 4377 *d++ = UTF8_EIGHT_BIT_LO(*s); 4378 s++; 4379 } 4380 } /* while loop to process each character */ 4381 4382 { 4383 const STRLEN off = d - SvPVX(sv); 4384 4385 /* See if room for the terminating NUL */ 4386 if (UNLIKELY(off >= SvLEN(sv))) { 4387 4388 #ifndef DEBUGGING 4389 4390 if (off > SvLEN(sv)) 4391 #endif 4392 Perl_croak(aTHX_ "panic: constant overflowed allocated space," 4393 " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv)); 4394 4395 /* Whew! Here we don't have room for the terminating NUL, but 4396 * everything else so far has fit. It's not too late to grow 4397 * to fit the NUL and continue on. But it is a bug, as the code 4398 * above was supposed to have made room for this, so under 4399 * DEBUGGING builds, we panic anyway. */ 4400 d = off + SvGROW(sv, off + 1); 4401 } 4402 } 4403 4404 /* terminate the string and set up the sv */ 4405 *d = '\0'; 4406 SvCUR_set(sv, d - SvPVX_const(sv)); 4407 4408 SvPOK_on(sv); 4409 if (d_is_utf8) { 4410 SvUTF8_on(sv); 4411 } 4412 4413 /* shrink the sv if we allocated more than we used */ 4414 if (SvCUR(sv) + 5 < SvLEN(sv)) { 4415 SvPV_shrink_to_cur(sv); 4416 } 4417 4418 /* return the substring (via pl_yylval) only if we parsed anything */ 4419 if (s > start) { 4420 char *s2 = start; 4421 for (; s2 < s; s2++) { 4422 if (*s2 == '\n') 4423 COPLINE_INC_WITH_HERELINES; 4424 } 4425 SvREFCNT_inc_simple_void_NN(sv); 4426 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING )) 4427 && ! PL_parser->lex_re_reparsing) 4428 { 4429 const char *const key = PL_lex_inpat ? "qr" : "q"; 4430 const STRLEN keylen = PL_lex_inpat ? 2 : 1; 4431 const char *type; 4432 STRLEN typelen; 4433 4434 if (PL_lex_inwhat == OP_TRANS) { 4435 type = "tr"; 4436 typelen = 2; 4437 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) { 4438 type = "s"; 4439 typelen = 1; 4440 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') { 4441 type = "q"; 4442 typelen = 1; 4443 } else { 4444 type = "qq"; 4445 typelen = 2; 4446 } 4447 4448 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL, 4449 type, typelen, NULL); 4450 } 4451 pl_yylval.opval = newSVOP(OP_CONST, 0, sv); 4452 } 4453 LEAVE_with_name("scan_const"); 4454 return s; 4455 } 4456 4457 /* S_intuit_more 4458 * Returns TRUE if there's more to the expression (e.g., a subscript), 4459 * FALSE otherwise. 4460 * 4461 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/ 4462 * 4463 * ->[ and ->{ return TRUE 4464 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled 4465 * { and [ outside a pattern are always subscripts, so return TRUE 4466 * if we're outside a pattern and it's not { or [, then return FALSE 4467 * if we're in a pattern and the first char is a { 4468 * {4,5} (any digits around the comma) returns FALSE 4469 * if we're in a pattern and the first char is a [ 4470 * [] returns FALSE 4471 * [SOMETHING] has a funky heuristic to decide whether it's a 4472 * character class or not. It has to deal with things like 4473 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/ 4474 * anything else returns TRUE 4475 */ 4476 4477 /* This is the one truly awful dwimmer necessary to conflate C and sed. */ 4478 4479 STATIC int 4480 S_intuit_more(pTHX_ char *s, char *e) 4481 { 4482 PERL_ARGS_ASSERT_INTUIT_MORE; 4483 4484 /* This function has been mostly untouched for a long time, due to its, 4485 * 'scariness', and lack of comments. khw has gone through and done some 4486 * cleanup, while finding various instances of problematic behavior. 4487 * Rather than change this base-level function immediately, khw has added 4488 * commentary to those areas. */ 4489 4490 /* If recursed within brackets, there is more to the expression */ 4491 if (PL_lex_brackets) 4492 return TRUE; 4493 4494 /* If begins with '->' ... */ 4495 if (s[0] == '-' && s[1] == '>') { 4496 4497 /* '->[' and '->{' imply more to the expression */ 4498 if (s[2] == '[' || s[2] == '{') { 4499 return TRUE; 4500 } 4501 4502 /* Any post deref construct implies more to the expression */ 4503 if ( FEATURE_POSTDEREF_QQ_IS_ENABLED 4504 && ( (s[2] == '$' && ( s[3] == '*' 4505 || (s[3] == '#' && s[4] == '*'))) 4506 || (s[2] == '@' && memCHRs("*[{", s[3])) )) 4507 { 4508 return TRUE; 4509 } 4510 } 4511 4512 if (s[0] != '{' && s[0] != '[') 4513 return FALSE; 4514 4515 /* quit immediately from any errors from now on */ 4516 PL_parser->sub_no_recover = TRUE; 4517 4518 /* Here is '{' or '['. Outside patterns, they're always subscripts */ 4519 if (!PL_lex_inpat) 4520 return TRUE; 4521 4522 /* In a pattern, so maybe we have {n,m}, in which case, there isn't more to 4523 * the expression. 4524 * 4525 * khw: This assumes that anything matching regcurly is a character class. 4526 * The syntax of regcurly has been loosened since this function was 4527 * written, and regcurly never required a comma, as in {0}. Probably it is 4528 * ok as-is */ 4529 if (s[0] == '{') { 4530 if (regcurly(s, e, NULL)) { 4531 return FALSE; 4532 } 4533 return TRUE; 4534 } 4535 4536 /* Here is '[': maybe we have a character class. Examine the guts */ 4537 s++; 4538 4539 /* '^' implies a character class; An empty '[]' isn't legal, but it does 4540 * mean there isn't more to come */ 4541 if (s[0] == ']' || s[0] == '^') 4542 return FALSE; 4543 4544 /* Find matching ']'. khw: This means any s[1] below is guaranteed to 4545 * exist */ 4546 const char * const send = (char *) memchr(s, ']', e - s); 4547 if (! send) /* has to be an expression */ 4548 return TRUE; 4549 4550 /* If the construct consists entirely of one or two digits, call it a 4551 * subscript. */ 4552 if (isDIGIT(s[0]) && send - s <= 2 && (send - s == 1 || (isDIGIT(s[1])))) { 4553 return TRUE; 4554 } 4555 4556 /* this is terrifying, and it mostly works. See GH #16478. 4557 * 4558 * khw: That ticket shows that the heuristics here get things wrong. That 4559 * most of the weights are divisible by 5 indicates that not a lot of 4560 * tuning was done, and that the values are fairly arbitrary. Especially 4561 * problematic are when all characters in the construct are numeric. We 4562 * have [89] always resolving to a subscript, though that could well be a 4563 * character class that is related to finding non-octals. And [100] is a 4564 * character class when it could well be a subscript. */ 4565 4566 int weight; 4567 4568 if (s[0] == '$') { /* First char is dollar; lean very slightly to it 4569 being a subscript */ 4570 weight = -1; 4571 } 4572 else { /* Otherwise, lean a little more towards it being a 4573 character class. */ 4574 weight = 2; 4575 } 4576 4577 /* Unsigned version of current character */ 4578 unsigned char un_char = 0; 4579 4580 /* Keep track of how many multiple occurrences of the same character there 4581 * are */ 4582 char seen[256]; 4583 Zero(seen, 256, char); 4584 4585 /* Examine each character in the construct */ 4586 bool first_time = true; 4587 for (; s < send; s++, first_time = false) { 4588 unsigned char prev_un_char = un_char; 4589 un_char = (unsigned char) s[0]; 4590 switch (s[0]) { 4591 case '@': 4592 case '&': 4593 case '$': 4594 4595 /* Each additional occurrence of one of these three strongly 4596 * indicates it is a subscript */ 4597 weight -= seen[un_char] * 10; 4598 4599 /* Following one of these characters, we look to see if there is an 4600 * identifier already found in the program by that name. If so, 4601 * strongly suspect this isn't a character class */ 4602 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) { 4603 int len; 4604 char tmpbuf[sizeof PL_tokenbuf * 4]; 4605 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE); 4606 len = (int)strlen(tmpbuf); 4607 if ( len > 1 4608 && gv_fetchpvn_flags(tmpbuf, 4609 len, 4610 UTF ? SVf_UTF8 : 0, 4611 SVt_PV)) 4612 weight -= 100; 4613 else /* Not a multi-char identifier already known in the 4614 program; is somewhat likely to be a subscript */ 4615 weight -= 10; 4616 } 4617 else if ( s[0] == '$' 4618 && s[1] 4619 && memCHRs("[#!%*<>()-=", s[1])) 4620 { 4621 /* Here we have what could be a punctuation variable. If the 4622 * next character after it is a closing bracket, it makes it 4623 * quite likely to be that, and hence a subscript. If it is 4624 * something else, more mildly a subscript */ 4625 if (/*{*/ memCHRs("])} =", s[2])) 4626 weight -= 10; 4627 else 4628 weight -= 1; 4629 } 4630 break; 4631 4632 case '\\': 4633 if (s[1]) { 4634 if (memCHRs("wds]", s[1])) 4635 weight += 100; /* \w \d \s => strongly charclass */ 4636 /* khw: Why not \W \D \S \h \v, etc as well? */ 4637 else if (seen[(U8)'\''] || seen[(U8)'"']) 4638 weight += 1; /* \' => mildly charclass */ 4639 else if (memCHRs("abcfnrtvx", s[1])) 4640 weight += 40; /* \n, etc => charclass */ 4641 /* khw: Why not \e etc as well? */ 4642 else if (isDIGIT(s[1])) { 4643 weight += 40; /* \123 => charclass */ 4644 while (s[1] && isDIGIT(s[1])) 4645 s++; 4646 } 4647 } 4648 else /* \ followed by NUL strongly indicates character class */ 4649 weight += 100; 4650 break; 4651 4652 case '-': 4653 /* If it is something like '-\', it is more likely to be a 4654 * character class. 4655 * 4656 * khw: The rest of the conditionals in this 'case' really should 4657 * be subject to an 'else' of this condition */ 4658 if (s[1] == '\\') 4659 weight += 50; 4660 4661 /* If it is something like 'a-' or '0-', it is more likely to 4662 * be a character class. '!' is the first ASCII graphic, so '!-' 4663 * would be the start of a range of graphics. */ 4664 if (! first_time && memCHRs("aA01! ", prev_un_char)) 4665 weight += 30; 4666 4667 /* If it is something like '-Z' or '-7' (for octal) or '-9' it 4668 * is more likely to be a character class. '~' is the final ASCII 4669 * graphic, so '-~' would be the end of a range of graphics. 4670 * 4671 * khw: Having [-z] really doesn't imply what the comments above 4672 * indicate, so this should only be tested when '! first_time' */ 4673 if (memCHRs("zZ79~", s[1])) 4674 weight += 30; 4675 4676 /* If it is something like -1 or -$foo, it is more likely to be a 4677 * subscript. */ 4678 if (first_time && (isDIGIT(s[1]) || s[1] == '$')) { 4679 weight -= 5; /* cope with negative subscript */ 4680 } 4681 break; 4682 4683 default: 4684 if ( (first_time || ( ! isWORDCHAR(prev_un_char) 4685 && prev_un_char != '$' 4686 && prev_un_char != '@' 4687 && prev_un_char != '&')) 4688 && isALPHA(s[0]) 4689 && isALPHA(s[1])) 4690 { 4691 /* Here it's \W (that isn't [$@&] ) followed immediately by two 4692 * alphas in a row. Accumulate all the consecutive alphas */ 4693 char *d = s; 4694 while (isALPHA(s[0])) 4695 s++; 4696 4697 /* If those alphas spell a keyword, it's almost certainly not a 4698 * character class */ 4699 if (keyword(d, s - d, 0)) 4700 weight -= 150; 4701 4702 /* khw: Should those alphas be marked as seen? */ 4703 } 4704 4705 /* Consecutive chars like [...12...] and [...ab...] are presumed 4706 * more likely to be character classes */ 4707 if ( ! first_time 4708 && ( NATIVE_TO_LATIN1(un_char) 4709 == NATIVE_TO_LATIN1(prev_un_char) + 1)) 4710 { 4711 weight += 5; 4712 } 4713 4714 /* But repeating a character inside a character class does nothing, 4715 * like [aba], so less likely that someone makes such a class, more 4716 * likely that it is a subscript; the more repeats, the less 4717 * likely. */ 4718 weight -= seen[un_char]; 4719 break; 4720 } /* End of switch */ 4721 4722 /* khw: 'seen' is declared as a char. This ++ can cause it to wrap. 4723 * This gives different results with compilers for which a plain 'char' 4724 * is actually unsigned, versus those where it is signed. I believe it 4725 * is undefined behavior to wrap a 'signed'. I think it should be 4726 * instead declared an unsigned int to make the chances of wrapping 4727 * essentially zero. 4728 * 4729 * And I believe that extra backslashes are different from other 4730 * repeated characters. */ 4731 seen[un_char]++; 4732 } /* End of loop through each character of the construct */ 4733 4734 if (weight >= 0) /* probably a character class */ 4735 return FALSE; 4736 4737 return TRUE; 4738 } 4739 4740 /* 4741 * S_intuit_method 4742 * 4743 * Does all the checking to disambiguate 4744 * foo bar 4745 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise 4746 * METHCALL (bar->foo(args)) or METHCALL0 (bar->foo args). 4747 * 4748 * First argument is the stuff after the first token, e.g. "bar". 4749 * 4750 * Not a method if foo is a filehandle. 4751 * Not a method if foo is a subroutine prototyped to take a filehandle. 4752 * Not a method if it's really "Foo $bar" 4753 * Method if it's "foo $bar" 4754 * Not a method if it's really "print foo $bar" 4755 * Method if it's really "foo package::" (interpreted as package->foo) 4756 * Not a method if bar is known to be a subroutine ("sub bar; foo bar") 4757 * Not a method if bar is a filehandle or package, but is quoted with 4758 * => 4759 */ 4760 4761 STATIC int 4762 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv) 4763 { 4764 char *s = start + (*start == '$'); 4765 char tmpbuf[sizeof PL_tokenbuf]; 4766 STRLEN len; 4767 GV* indirgv; 4768 /* Mustn't actually add anything to a symbol table. 4769 But also don't want to "initialise" any placeholder 4770 constants that might already be there into full 4771 blown PVGVs with attached PVCV. */ 4772 GV * const gv = 4773 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL; 4774 4775 PERL_ARGS_ASSERT_INTUIT_METHOD; 4776 4777 if (!FEATURE_INDIRECT_IS_ENABLED) 4778 return 0; 4779 4780 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv)) 4781 return 0; 4782 if (cv && SvPOK(cv)) { 4783 const char *proto = CvPROTO(cv); 4784 if (proto) { 4785 while (*proto && (isSPACE(*proto) || *proto == ';')) 4786 proto++; 4787 if (*proto == '*') 4788 return 0; 4789 } 4790 } 4791 4792 if (*start == '$') { 4793 SSize_t start_off = start - SvPVX(PL_linestr); 4794 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY 4795 || isUPPER(*PL_tokenbuf)) 4796 return 0; 4797 /* this could be $# */ 4798 if (isSPACE(*s)) 4799 s = skipspace(s); 4800 PL_bufptr = SvPVX(PL_linestr) + start_off; 4801 PL_expect = XREF; 4802 return *s == '(' ? METHCALL : METHCALL0; 4803 } 4804 4805 s = scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE); 4806 /* start is the beginning of the possible filehandle/object, 4807 * and s is the end of it 4808 * tmpbuf is a copy of it (but with single quotes as double colons) 4809 */ 4810 4811 if (!keyword(tmpbuf, len, 0)) { 4812 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') { 4813 len -= 2; 4814 tmpbuf[len] = '\0'; 4815 goto bare_package; 4816 } 4817 indirgv = gv_fetchpvn_flags(tmpbuf, len, 4818 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ), 4819 SVt_PVCV); 4820 if (indirgv && SvTYPE(indirgv) != SVt_NULL 4821 && (!isGV(indirgv) || GvCVu(indirgv))) 4822 return 0; 4823 /* filehandle or package name makes it a method */ 4824 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) { 4825 s = skipspace(s); 4826 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>') 4827 return 0; /* no assumptions -- "=>" quotes bareword */ 4828 bare_package: 4829 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, 4830 S_newSV_maybe_utf8(aTHX_ tmpbuf, len)); 4831 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE; 4832 PL_expect = XTERM; 4833 force_next(BAREWORD); 4834 PL_bufptr = s; 4835 return *s == '(' ? METHCALL : METHCALL0; 4836 } 4837 } 4838 return 0; 4839 } 4840 4841 /* Encoded script support. filter_add() effectively inserts a 4842 * 'pre-processing' function into the current source input stream. 4843 * Note that the filter function only applies to the current source file 4844 * (e.g., it will not affect files 'require'd or 'use'd by this one). 4845 * 4846 * The datasv parameter (which may be NULL) can be used to pass 4847 * private data to this instance of the filter. The filter function 4848 * can recover the SV using the FILTER_DATA macro and use it to 4849 * store private buffers and state information. 4850 * 4851 * The supplied datasv parameter is upgraded to a PVIO type 4852 * and the IoDIRP/IoANY field is used to store the function pointer, 4853 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such. 4854 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for 4855 * private use must be set using malloc'd pointers. 4856 */ 4857 4858 SV * 4859 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) 4860 { 4861 if (!funcp) 4862 return NULL; 4863 4864 if (!PL_parser) 4865 return NULL; 4866 4867 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) 4868 Perl_croak(aTHX_ "Source filters apply only to byte streams"); 4869 4870 if (!PL_rsfp_filters) 4871 PL_rsfp_filters = newAV(); 4872 if (!datasv) 4873 datasv = newSV(0); 4874 SvUPGRADE(datasv, SVt_PVIO); 4875 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */ 4876 IoFLAGS(datasv) |= IOf_FAKE_DIRP; 4877 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n", 4878 FPTR2DPTR(void *, IoANY(datasv)), 4879 SvPV_nolen(datasv))); 4880 av_unshift(PL_rsfp_filters, 1); 4881 av_store(PL_rsfp_filters, 0, datasv) ; 4882 if ( 4883 !PL_parser->filtered 4884 && PL_parser->lex_flags & LEX_EVALBYTES 4885 && PL_bufptr < PL_bufend 4886 ) { 4887 const char *s = PL_bufptr; 4888 while (s < PL_bufend) { 4889 if (*s == '\n') { 4890 SV *linestr = PL_parser->linestr; 4891 char *buf = SvPVX(linestr); 4892 STRLEN const bufptr_pos = PL_parser->bufptr - buf; 4893 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf; 4894 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf; 4895 STRLEN const linestart_pos = PL_parser->linestart - buf; 4896 STRLEN const last_uni_pos = 4897 PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 4898 STRLEN const last_lop_pos = 4899 PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 4900 av_push(PL_rsfp_filters, linestr); 4901 PL_parser->linestr = 4902 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr)); 4903 buf = SvPVX(PL_parser->linestr); 4904 PL_parser->bufend = buf + SvCUR(PL_parser->linestr); 4905 PL_parser->bufptr = buf + bufptr_pos; 4906 PL_parser->oldbufptr = buf + oldbufptr_pos; 4907 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 4908 PL_parser->linestart = buf + linestart_pos; 4909 if (PL_parser->last_uni) 4910 PL_parser->last_uni = buf + last_uni_pos; 4911 if (PL_parser->last_lop) 4912 PL_parser->last_lop = buf + last_lop_pos; 4913 SvLEN_set(linestr, SvCUR(linestr)); 4914 SvCUR_set(linestr, s - SvPVX(linestr)); 4915 PL_parser->filtered = 1; 4916 break; 4917 } 4918 s++; 4919 } 4920 } 4921 return(datasv); 4922 } 4923 4924 /* 4925 =for apidoc_section $filters 4926 =for apidoc filter_del 4927 4928 Delete most recently added instance of the filter function argument 4929 4930 =cut 4931 */ 4932 4933 void 4934 Perl_filter_del(pTHX_ filter_t funcp) 4935 { 4936 SV *datasv; 4937 4938 PERL_ARGS_ASSERT_FILTER_DEL; 4939 4940 #ifdef DEBUGGING 4941 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", 4942 FPTR2DPTR(void*, funcp))); 4943 #endif 4944 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0) 4945 return; 4946 /* if filter is on top of stack (usual case) just pop it off */ 4947 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters)); 4948 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) { 4949 SvREFCNT_dec(av_pop(PL_rsfp_filters)); 4950 4951 return; 4952 } 4953 /* we need to search for the correct entry and clear it */ 4954 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)"); 4955 } 4956 4957 4958 /* Invoke the idxth filter function for the current rsfp. */ 4959 /* maxlen 0 = read one text line */ 4960 I32 4961 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) 4962 { 4963 filter_t funcp; 4964 I32 ret; 4965 SV *datasv = NULL; 4966 /* This API is bad. It should have been using unsigned int for maxlen. 4967 Not sure if we want to change the API, but if not we should sanity 4968 check the value here. */ 4969 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen; 4970 4971 PERL_ARGS_ASSERT_FILTER_READ; 4972 4973 if (!PL_parser || !PL_rsfp_filters) 4974 return -1; 4975 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */ 4976 /* Provide a default input filter to make life easy. */ 4977 /* Note that we append to the line. This is handy. */ 4978 DEBUG_P(PerlIO_printf(Perl_debug_log, 4979 "filter_read %d: from rsfp\n", idx)); 4980 if (correct_length) { 4981 /* Want a block */ 4982 int len ; 4983 const int old_len = SvCUR(buf_sv); 4984 4985 /* ensure buf_sv is large enough */ 4986 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ; 4987 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, 4988 correct_length)) <= 0) { 4989 if (PerlIO_error(PL_rsfp)) 4990 return -1; /* error */ 4991 else 4992 return 0 ; /* end of file */ 4993 } 4994 SvCUR_set(buf_sv, old_len + len) ; 4995 SvPVX(buf_sv)[old_len + len] = '\0'; 4996 } else { 4997 /* Want a line */ 4998 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) { 4999 if (PerlIO_error(PL_rsfp)) 5000 return -1; /* error */ 5001 else 5002 return 0 ; /* end of file */ 5003 } 5004 } 5005 return SvCUR(buf_sv); 5006 } 5007 /* Skip this filter slot if filter has been deleted */ 5008 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) { 5009 DEBUG_P(PerlIO_printf(Perl_debug_log, 5010 "filter_read %d: skipped (filter deleted)\n", 5011 idx)); 5012 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */ 5013 } 5014 if (SvTYPE(datasv) != SVt_PVIO) { 5015 if (correct_length) { 5016 /* Want a block */ 5017 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv); 5018 if (!remainder) return 0; /* eof */ 5019 if (correct_length > remainder) correct_length = remainder; 5020 sv_catpvn(buf_sv, SvEND(datasv), correct_length); 5021 SvCUR_set(datasv, SvCUR(datasv) + correct_length); 5022 } else { 5023 /* Want a line */ 5024 const char *s = SvEND(datasv); 5025 const char *send = SvPVX(datasv) + SvLEN(datasv); 5026 while (s < send) { 5027 if (*s == '\n') { 5028 s++; 5029 break; 5030 } 5031 s++; 5032 } 5033 if (s == send) return 0; /* eof */ 5034 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv)); 5035 SvCUR_set(datasv, s-SvPVX(datasv)); 5036 } 5037 return SvCUR(buf_sv); 5038 } 5039 /* Get function pointer hidden within datasv */ 5040 funcp = DPTR2FPTR(filter_t, IoANY(datasv)); 5041 DEBUG_P(PerlIO_printf(Perl_debug_log, 5042 "filter_read %d: via function %p (%s)\n", 5043 idx, (void*)datasv, SvPV_nolen_const(datasv))); 5044 /* Call function. The function is expected to */ 5045 /* call "FILTER_READ(idx+1, buf_sv)" first. */ 5046 /* Return: <0:error, =0:eof, >0:not eof */ 5047 ENTER; 5048 save_scalar(PL_errgv); 5049 5050 /* although this calls out to a random C function, there's a good 5051 * chance that that function will call back into perl (e.g. using 5052 * Filter::Util::Call). So downgrade the stack to 5053 * non-reference-counted for backwards compatibility - i.e. do the 5054 * equivalent of xs_wrap(), but this time we know there are no 5055 * args to be passed or returned on the stack, simplifying it. 5056 */ 5057 #ifdef PERL_RC_STACK 5058 assert(AvREAL(PL_curstack)); 5059 I32 oldbase = PL_curstackinfo->si_stack_nonrc_base; 5060 I32 oldsp = PL_stack_sp - PL_stack_base; 5061 if (!oldbase) 5062 PL_curstackinfo->si_stack_nonrc_base = oldsp + 1; 5063 #endif 5064 5065 ret = (*funcp)(aTHX_ idx, buf_sv, correct_length); 5066 5067 #ifdef PERL_RC_STACK 5068 assert(oldsp == PL_stack_sp - PL_stack_base); 5069 assert(AvREAL(PL_curstack)); 5070 assert(PL_curstackinfo->si_stack_nonrc_base == 5071 oldbase ? oldbase : oldsp + 1); 5072 PL_curstackinfo->si_stack_nonrc_base = oldbase; 5073 #endif 5074 5075 LEAVE; 5076 return ret; 5077 } 5078 5079 STATIC char * 5080 S_filter_gets(pTHX_ SV *sv, STRLEN append) 5081 { 5082 PERL_ARGS_ASSERT_FILTER_GETS; 5083 5084 #ifdef PERL_CR_FILTER 5085 if (!PL_rsfp_filters) { 5086 filter_add(S_cr_textfilter,NULL); 5087 } 5088 #endif 5089 if (PL_rsfp_filters) { 5090 if (!append) 5091 SvCUR_set(sv, 0); /* start with empty line */ 5092 if (FILTER_READ(0, sv, 0) > 0) 5093 return ( SvPVX(sv) ) ; 5094 else 5095 return NULL ; 5096 } 5097 else 5098 return (sv_gets(sv, PL_rsfp, append)); 5099 } 5100 5101 STATIC HV * 5102 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len) 5103 { 5104 GV *gv; 5105 5106 PERL_ARGS_ASSERT_FIND_IN_MY_STASH; 5107 5108 if (memEQs(pkgname, len, "__PACKAGE__")) 5109 return PL_curstash; 5110 5111 if (len > 2 5112 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') 5113 && (gv = gv_fetchpvn_flags(pkgname, 5114 len, 5115 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV))) 5116 { 5117 return GvHV(gv); /* Foo:: */ 5118 } 5119 5120 /* use constant CLASS => 'MyClass' */ 5121 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV); 5122 if (gv && GvCV(gv)) { 5123 SV * const sv = cv_const_sv(GvCV(gv)); 5124 if (sv) 5125 return gv_stashsv(sv, 0); 5126 } 5127 5128 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0); 5129 } 5130 5131 5132 STATIC char * 5133 S_tokenize_use(pTHX_ int is_use, char *s) { 5134 PERL_ARGS_ASSERT_TOKENIZE_USE; 5135 5136 if (PL_expect != XSTATE) 5137 /* diag_listed_as: "use" not allowed in expression */ 5138 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", 5139 is_use ? "use" : "no")); 5140 PL_expect = XTERM; 5141 s = skipspace(s); 5142 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) { 5143 s = force_version(s, TRUE); 5144 if (*s == ';' || *s == '}' 5145 || (s = skipspace(s), (*s == ';' || *s == '}'))) { 5146 NEXTVAL_NEXTTOKE.opval = NULL; 5147 force_next(BAREWORD); 5148 } 5149 else if (*s == 'v') { 5150 s = force_word(s,BAREWORD,FALSE,TRUE); 5151 s = force_version(s, FALSE); 5152 } 5153 } 5154 else { 5155 s = force_word(s,BAREWORD,FALSE,TRUE); 5156 s = force_version(s, FALSE); 5157 } 5158 pl_yylval.ival = is_use; 5159 return s; 5160 } 5161 #ifdef DEBUGGING 5162 static const char* const exp_name[] = 5163 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", 5164 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF", 5165 "SIGVAR", "TERMORDORDOR" 5166 }; 5167 #endif 5168 5169 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l) 5170 STATIC bool 5171 S_word_takes_any_delimiter(char *p, STRLEN len) 5172 { 5173 return (len == 1 && memCHRs("msyq", p[0])) 5174 || (len == 2 5175 && ((p[0] == 't' && p[1] == 'r') 5176 || (p[0] == 'q' && memCHRs("qwxr", p[1])))); 5177 } 5178 5179 static void 5180 S_check_scalar_slice(pTHX_ char *s) 5181 { 5182 s++; 5183 while (SPACE_OR_TAB(*s)) s++; 5184 if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2, 5185 PL_bufend, 5186 UTF)) 5187 { 5188 return; 5189 } 5190 while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) 5191 || (*s && memCHRs(" \t$#+-'\"", *s))) 5192 { 5193 s += UTF ? UTF8SKIP(s) : 1; 5194 } 5195 if (*s == '}' || *s == ']') 5196 pl_yylval.ival = OPpSLICEWARNING; 5197 } 5198 5199 #define lex_token_boundary() S_lex_token_boundary(aTHX) 5200 static void 5201 S_lex_token_boundary(pTHX) 5202 { 5203 PL_oldoldbufptr = PL_oldbufptr; 5204 PL_oldbufptr = PL_bufptr; 5205 } 5206 5207 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s) 5208 static char * 5209 S_vcs_conflict_marker(pTHX_ char *s) 5210 { 5211 lex_token_boundary(); 5212 PL_bufptr = s; 5213 yyerror("Version control conflict marker"); 5214 while (s < PL_bufend && *s != '\n') 5215 s++; 5216 return s; 5217 } 5218 5219 static int 5220 yyl_sigvar(pTHX_ char *s) 5221 { 5222 /* we expect the sigil and optional var name part of a 5223 * signature element here. Since a '$' is not necessarily 5224 * followed by a var name, handle it specially here; the general 5225 * yylex code would otherwise try to interpret whatever follows 5226 * as a var; e.g. ($, ...) would be seen as the var '$,' 5227 */ 5228 5229 U8 sigil; 5230 5231 s = skipspace(s); 5232 sigil = *s++; 5233 PL_bufptr = s; /* for error reporting */ 5234 switch (sigil) { 5235 case '$': 5236 case '@': 5237 case '%': 5238 /* spot stuff that looks like an prototype */ 5239 if (memCHRs("$:@%&*;\\[]", *s)) { 5240 yyerror("Illegal character following sigil in a subroutine signature"); 5241 break; 5242 } 5243 /* '$#' is banned, while '$ # comment' isn't */ 5244 if (*s == '#') { 5245 yyerror("'#' not allowed immediately following a sigil in a subroutine signature"); 5246 break; 5247 } 5248 s = skipspace(s); 5249 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 5250 char *dest = PL_tokenbuf + 1; 5251 /* read var name, including sigil, into PL_tokenbuf */ 5252 PL_tokenbuf[0] = sigil; 5253 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1, 5254 0, cBOOL(UTF), FALSE, FALSE); 5255 *dest = '\0'; 5256 assert(PL_tokenbuf[1]); /* we have a variable name */ 5257 } 5258 else { 5259 *PL_tokenbuf = 0; 5260 PL_in_my = 0; 5261 } 5262 5263 s = skipspace(s); 5264 /* parse the = for the default ourselves to avoid '+=' etc being accepted here 5265 * as the ASSIGNOP, and exclude other tokens that start with = 5266 */ 5267 if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) { 5268 /* save now to report with the same context as we did when 5269 * all ASSIGNOPS were accepted */ 5270 PL_oldbufptr = s; 5271 5272 ++s; 5273 NEXTVAL_NEXTTOKE.ival = OP_SASSIGN; 5274 force_next(ASSIGNOP); 5275 PL_expect = XTERM; 5276 } 5277 else if(*s == '/' && s[1] == '/' && s[2] == '=') { 5278 PL_oldbufptr = s; 5279 5280 s += 3; 5281 NEXTVAL_NEXTTOKE.ival = OP_DORASSIGN; 5282 force_next(ASSIGNOP); 5283 PL_expect = XTERM; 5284 } 5285 else if(*s == '|' && s[1] == '|' && s[2] == '=') { 5286 PL_oldbufptr = s; 5287 5288 s += 3; 5289 NEXTVAL_NEXTTOKE.ival = OP_ORASSIGN; 5290 force_next(ASSIGNOP); 5291 PL_expect = XTERM; 5292 } 5293 else if (*s == ',' || *s == ')') { 5294 PL_expect = XOPERATOR; 5295 } 5296 else { 5297 /* make sure the context shows the unexpected character and 5298 * hopefully a bit more */ 5299 if (*s) ++s; 5300 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')') 5301 s++; 5302 PL_bufptr = s; /* for error reporting */ 5303 yyerror("Illegal operator following parameter in a subroutine signature"); 5304 PL_in_my = 0; 5305 } 5306 if (*PL_tokenbuf) { 5307 NEXTVAL_NEXTTOKE.ival = sigil; 5308 force_next('p'); /* force a signature pending identifier */ 5309 } 5310 break; 5311 5312 case ')': 5313 PL_expect = XBLOCK; 5314 break; 5315 case ',': /* handle ($a,,$b) */ 5316 break; 5317 5318 default: 5319 PL_in_my = 0; 5320 yyerror("A signature parameter must start with '$', '@' or '%'"); 5321 /* very crude error recovery: skip to likely next signature 5322 * element */ 5323 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')') 5324 s++; 5325 break; 5326 } 5327 5328 switch (sigil) { 5329 case ',': TOKEN (PERLY_COMMA); 5330 case '$': TOKEN (PERLY_DOLLAR); 5331 case '@': TOKEN (PERLY_SNAIL); 5332 case '%': TOKEN (PERLY_PERCENT_SIGN); 5333 case ')': TOKEN (PERLY_PAREN_CLOSE); 5334 default: TOKEN (sigil); 5335 } 5336 } 5337 5338 static int 5339 yyl_dollar(pTHX_ char *s) 5340 { 5341 CLINE; 5342 5343 if (PL_expect == XPOSTDEREF) { 5344 if (s[1] == '#') { 5345 s++; 5346 POSTDEREF(DOLSHARP); 5347 } 5348 POSTDEREF(PERLY_DOLLAR); 5349 } 5350 5351 if ( s[1] == '#' 5352 && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF) 5353 || memCHRs("{$:+-@", s[2]))) 5354 { 5355 PL_tokenbuf[0] = '@'; 5356 s = scan_ident(s + 1, PL_tokenbuf + 1, 5357 sizeof PL_tokenbuf - 1, FALSE); 5358 if (PL_expect == XOPERATOR) { 5359 char *d = s; 5360 if (PL_bufptr > s) { 5361 d = PL_bufptr-1; 5362 PL_bufptr = PL_oldbufptr; 5363 } 5364 no_op("Array length", d); 5365 } 5366 if (!PL_tokenbuf[1]) 5367 PREREF(DOLSHARP); 5368 PL_expect = XOPERATOR; 5369 force_ident_maybe_lex('#'); 5370 TOKEN(DOLSHARP); 5371 } 5372 5373 PL_tokenbuf[0] = '$'; 5374 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 5375 if (PL_expect == XOPERATOR) { 5376 char *d = s; 5377 if (PL_bufptr > s) { 5378 d = PL_bufptr-1; 5379 PL_bufptr = PL_oldbufptr; 5380 } 5381 no_op("Scalar", d); 5382 } 5383 if (!PL_tokenbuf[1]) { 5384 if (s == PL_bufend) 5385 yyerror("Final $ should be \\$ or $name"); 5386 PREREF(PERLY_DOLLAR); 5387 } 5388 5389 { 5390 const char tmp = *s; 5391 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) 5392 s = skipspace(s); 5393 5394 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 5395 && intuit_more(s, PL_bufend)) { 5396 if (*s == '[') { 5397 PL_tokenbuf[0] = '@'; 5398 if (ckWARN(WARN_SYNTAX)) { 5399 char *t = s+1; 5400 5401 while ( t < PL_bufend ) { 5402 if (isSPACE(*t)) { 5403 do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t)); 5404 /* consumed one or more space chars */ 5405 } else if (*t == '$' || *t == '@') { 5406 /* could be more than one '$' like $$ref or @$ref */ 5407 do { t++; } while (t < PL_bufend && *t == '$'); 5408 5409 /* could be an abigail style identifier like $ foo */ 5410 while (t < PL_bufend && *t == ' ') t++; 5411 5412 /* strip off the name of the var */ 5413 while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) 5414 t += UTF ? UTF8SKIP(t) : 1; 5415 /* consumed a varname */ 5416 } else if (isDIGIT(*t)) { 5417 /* deal with hex constants like 0x11 */ 5418 if (t[0] == '0' && t[1] == 'x') { 5419 t += 2; 5420 while (t < PL_bufend && isXDIGIT(*t)) t++; 5421 } else { 5422 /* deal with decimal/octal constants like 1 and 0123 */ 5423 do { t++; } while (isDIGIT(*t)); 5424 if (t<PL_bufend && *t == '.') { 5425 do { t++; } while (isDIGIT(*t)); 5426 } 5427 } 5428 /* consumed a number */ 5429 } else { 5430 /* not a var nor a space nor a number */ 5431 break; 5432 } 5433 } 5434 if (t < PL_bufend && *t++ == ',') { 5435 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */ 5436 while (t < PL_bufend && *t != ']') 5437 t++; 5438 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 5439 "Multidimensional syntax %" UTF8f " not supported", 5440 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr)); 5441 } 5442 } 5443 } 5444 else if (*s == '{') { 5445 char *t; 5446 PL_tokenbuf[0] = '%'; 5447 if ( strEQ(PL_tokenbuf+1, "SIG") 5448 && ckWARN(WARN_SYNTAX) 5449 && (t = (char *) memchr(s, '}', PL_bufend - s)) 5450 && (t = (char *) memchr(t, '=', PL_bufend - t))) 5451 { 5452 char tmpbuf[sizeof PL_tokenbuf]; 5453 do { 5454 t++; 5455 } while (isSPACE(*t)); 5456 if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) { 5457 STRLEN len; 5458 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len); 5459 while (isSPACE(*t)) 5460 t++; 5461 if ( *t == ';' 5462 && get_cvn_flags(tmpbuf, len, UTF 5463 ? SVf_UTF8 5464 : 0)) 5465 { 5466 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 5467 "You need to quote \"%" UTF8f "\"", 5468 UTF8fARG(UTF, len, tmpbuf)); 5469 } 5470 } 5471 } 5472 } 5473 } 5474 5475 PL_expect = XOPERATOR; 5476 if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) { 5477 const bool islop = (PL_last_lop == PL_oldoldbufptr); 5478 if (!islop || PL_last_lop_op == OP_GREPSTART) 5479 PL_expect = XOPERATOR; 5480 else if (memCHRs("$@\"'`q", *s)) 5481 PL_expect = XTERM; /* e.g. print $fh "foo" */ 5482 else if ( memCHRs("&*<%", *s) 5483 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF)) 5484 { 5485 PL_expect = XTERM; /* e.g. print $fh &sub */ 5486 } 5487 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 5488 char tmpbuf[sizeof PL_tokenbuf]; 5489 int t2; 5490 STRLEN len; 5491 scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE); 5492 if ((t2 = keyword(tmpbuf, len, 0))) { 5493 /* binary operators exclude handle interpretations */ 5494 switch (t2) { 5495 case -KEY_x: 5496 case -KEY_eq: 5497 case -KEY_ne: 5498 case -KEY_gt: 5499 case -KEY_lt: 5500 case -KEY_ge: 5501 case -KEY_le: 5502 case -KEY_cmp: 5503 break; 5504 default: 5505 PL_expect = XTERM; /* e.g. print $fh length() */ 5506 break; 5507 } 5508 } 5509 else { 5510 PL_expect = XTERM; /* e.g. print $fh subr() */ 5511 } 5512 } 5513 else if (isDIGIT(*s)) 5514 PL_expect = XTERM; /* e.g. print $fh 3 */ 5515 else if (*s == '.' && isDIGIT(s[1])) 5516 PL_expect = XTERM; /* e.g. print $fh .3 */ 5517 else if ((*s == '?' || *s == '-' || *s == '+') 5518 && !isSPACE(s[1]) && s[1] != '=') 5519 PL_expect = XTERM; /* e.g. print $fh -1 */ 5520 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' 5521 && s[1] != '/') 5522 PL_expect = XTERM; /* e.g. print $fh /.../ 5523 XXX except DORDOR operator 5524 */ 5525 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) 5526 && s[2] != '=') 5527 PL_expect = XTERM; /* print $fh <<"EOF" */ 5528 } 5529 } 5530 force_ident_maybe_lex('$'); 5531 TOKEN(PERLY_DOLLAR); 5532 } 5533 5534 static int 5535 yyl_sub(pTHX_ char *s, const int key) 5536 { 5537 char * const tmpbuf = PL_tokenbuf + 1; 5538 bool have_name, have_proto; 5539 STRLEN len; 5540 SV *format_name = NULL; 5541 bool is_method = (key == KEY_method); 5542 5543 /* method always implies signatures */ 5544 bool is_sigsub = is_method || FEATURE_SIGNATURES_IS_ENABLED; 5545 5546 SSize_t off = s-SvPVX(PL_linestr); 5547 char *d; 5548 5549 s = skipspace(s); /* can move PL_linestr */ 5550 5551 d = SvPVX(PL_linestr)+off; 5552 5553 SAVEBOOL(PL_parser->sig_seen); 5554 PL_parser->sig_seen = FALSE; 5555 5556 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) 5557 || *s == '\'' 5558 || (*s == ':' && s[1] == ':')) 5559 { 5560 5561 PL_expect = XATTRBLOCK; 5562 d = scan_word6(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE, 5563 &len, TRUE); 5564 if (key == KEY_format) 5565 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s); 5566 *PL_tokenbuf = '&'; 5567 if (memchr(tmpbuf, ':', len) || key != KEY_sub 5568 || pad_findmy_pvn( 5569 PL_tokenbuf, len + 1, 0 5570 ) != NOT_IN_PAD) 5571 sv_setpvn(PL_subname, tmpbuf, len); 5572 else { 5573 sv_setsv(PL_subname,PL_curstname); 5574 sv_catpvs(PL_subname,"::"); 5575 sv_catpvn(PL_subname,tmpbuf,len); 5576 } 5577 if (SvUTF8(PL_linestr)) 5578 SvUTF8_on(PL_subname); 5579 have_name = TRUE; 5580 5581 s = skipspace(d); 5582 } 5583 else { 5584 if (key == KEY_my || key == KEY_our || key==KEY_state) { 5585 *d = '\0'; 5586 /* diag_listed_as: Missing name in "%s sub" */ 5587 Perl_croak(aTHX_ 5588 "Missing name in \"%s\"", PL_bufptr); 5589 } 5590 PL_expect = XATTRTERM; 5591 sv_setpvs(PL_subname,"?"); 5592 have_name = FALSE; 5593 } 5594 5595 if (key == KEY_format) { 5596 if (format_name) { 5597 NEXTVAL_NEXTTOKE.opval 5598 = newSVOP(OP_CONST,0, format_name); 5599 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 5600 force_next(BAREWORD); 5601 } 5602 PREBLOCK(KW_FORMAT); 5603 } 5604 5605 /* Look for a prototype */ 5606 if (*s == '(' && !is_sigsub) { 5607 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 5608 if (!s) 5609 Perl_croak(aTHX_ "Prototype not terminated"); 5610 COPLINE_SET_FROM_MULTI_END; 5611 (void)validate_proto(PL_subname, PL_lex_stuff, 5612 ckWARN(WARN_ILLEGALPROTO), 0); 5613 have_proto = TRUE; 5614 5615 s = skipspace(s); 5616 } 5617 else 5618 have_proto = FALSE; 5619 5620 if ( !(*s == ':' && s[1] != ':') 5621 && (*s != '{' && *s != '(') && key != KEY_format) 5622 { 5623 assert(key == KEY_sub || key == KEY_method || 5624 key == KEY_AUTOLOAD || key == KEY_DESTROY || 5625 key == KEY_BEGIN || key == KEY_UNITCHECK || key == KEY_CHECK || 5626 key == KEY_INIT || key == KEY_END || 5627 key == KEY_my || key == KEY_state || 5628 key == KEY_our); 5629 if (!have_name) 5630 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine"); 5631 else if (*s != ';' && *s != '}') 5632 Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname)); 5633 } 5634 5635 if (have_proto) { 5636 NEXTVAL_NEXTTOKE.opval = 5637 newSVOP(OP_CONST, 0, PL_lex_stuff); 5638 PL_lex_stuff = NULL; 5639 force_next(THING); 5640 } 5641 5642 if (!have_name) { 5643 if (PL_curstash) 5644 sv_setpvs(PL_subname, "__ANON__"); 5645 else 5646 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 5647 if (is_method) 5648 TOKEN(KW_METHOD_anon); 5649 else if (is_sigsub) 5650 TOKEN(KW_SUB_anon_sig); 5651 else 5652 TOKEN(KW_SUB_anon); 5653 } 5654 force_ident_maybe_lex('&'); 5655 if (is_method) 5656 TOKEN(KW_METHOD_named); 5657 else if (is_sigsub) 5658 TOKEN(KW_SUB_named_sig); 5659 else 5660 TOKEN(KW_SUB_named); 5661 } 5662 5663 static int 5664 yyl_interpcasemod(pTHX_ char *s) 5665 { 5666 #ifdef DEBUGGING 5667 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\') 5668 Perl_croak(aTHX_ 5669 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u", 5670 PL_bufptr, PL_bufend, *PL_bufptr); 5671 #endif 5672 5673 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') { 5674 /* if at a \E */ 5675 if (PL_lex_casemods) { 5676 const char oldmod = PL_lex_casestack[--PL_lex_casemods]; 5677 PL_lex_casestack[PL_lex_casemods] = '\0'; 5678 5679 if (PL_bufptr != PL_bufend 5680 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q' 5681 || oldmod == 'F')) { 5682 PL_bufptr += 2; 5683 PL_lex_state = LEX_INTERPCONCAT; 5684 } 5685 PL_lex_allbrackets--; 5686 return REPORT(PERLY_PAREN_CLOSE); 5687 } 5688 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) { 5689 /* Got an unpaired \E */ 5690 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 5691 "Useless use of \\E"); 5692 } 5693 if (PL_bufptr != PL_bufend) 5694 PL_bufptr += 2; 5695 PL_lex_state = LEX_INTERPCONCAT; 5696 return yylex(); 5697 } 5698 else { 5699 DEBUG_T({ 5700 PerlIO_printf(Perl_debug_log, "### Saw case modifier\n"); 5701 }); 5702 s = PL_bufptr + 1; 5703 if (s[1] == '\\' && s[2] == 'E') { 5704 PL_bufptr = s + 3; 5705 PL_lex_state = LEX_INTERPCONCAT; 5706 return yylex(); 5707 } 5708 else { 5709 I32 tmp; 5710 if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u") 5711 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l")) 5712 { 5713 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ 5714 } 5715 if ((*s == 'L' || *s == 'U' || *s == 'F') 5716 && (strpbrk(PL_lex_casestack, "LUF"))) 5717 { 5718 PL_lex_casestack[--PL_lex_casemods] = '\0'; 5719 PL_lex_allbrackets--; 5720 return REPORT(PERLY_PAREN_CLOSE); 5721 } 5722 if (PL_lex_casemods > 10) 5723 Renew(PL_lex_casestack, PL_lex_casemods + 2, char); 5724 PL_lex_casestack[PL_lex_casemods++] = *s; 5725 PL_lex_casestack[PL_lex_casemods] = '\0'; 5726 PL_lex_state = LEX_INTERPCONCAT; 5727 NEXTVAL_NEXTTOKE.ival = 0; 5728 force_next((2<<24)|PERLY_PAREN_OPEN); 5729 if (*s == 'l') 5730 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST; 5731 else if (*s == 'u') 5732 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST; 5733 else if (*s == 'L') 5734 NEXTVAL_NEXTTOKE.ival = OP_LC; 5735 else if (*s == 'U') 5736 NEXTVAL_NEXTTOKE.ival = OP_UC; 5737 else if (*s == 'Q') 5738 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA; 5739 else if (*s == 'F') 5740 NEXTVAL_NEXTTOKE.ival = OP_FC; 5741 else 5742 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s); 5743 PL_bufptr = s + 1; 5744 } 5745 force_next(FUNC); 5746 if (PL_lex_starts) { 5747 s = PL_bufptr; 5748 PL_lex_starts = 0; 5749 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 5750 if (PL_lex_casemods == 1 && PL_lex_inpat) 5751 TOKEN(PERLY_COMMA); 5752 else 5753 AopNOASSIGN(OP_CONCAT); 5754 } 5755 else 5756 return yylex(); 5757 } 5758 } 5759 5760 static int 5761 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword, 5762 GV **pgv, GV ***pgvp) 5763 { 5764 GV *ogv = NULL; /* override (winner) */ 5765 GV *hgv = NULL; /* hidden (loser) */ 5766 GV *gv = *pgv; 5767 5768 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) { 5769 CV *cv; 5770 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 5771 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL, 5772 SVt_PVCV)) 5773 && (cv = GvCVu(gv))) 5774 { 5775 if (GvIMPORTED_CV(gv)) 5776 ogv = gv; 5777 else if (! CvNOWARN_AMBIGUOUS(cv)) 5778 hgv = gv; 5779 } 5780 if (!ogv 5781 && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE)) 5782 && (gv = **pgvp) 5783 && (isGV_with_GP(gv) 5784 ? GvCVu(gv) && GvIMPORTED_CV(gv) 5785 : SvPCS_IMPORTED(gv) 5786 && (gv_init(gv, PL_globalstash, PL_tokenbuf, 5787 len, 0), 1))) 5788 { 5789 ogv = gv; 5790 } 5791 } 5792 5793 *pgv = gv; 5794 5795 if (ogv) { 5796 *orig_keyword = key; 5797 return 0; /* overridden by import or by GLOBAL */ 5798 } 5799 else if (gv && !*pgvp 5800 && -key==KEY_lock /* XXX generalizable kludge */ 5801 && GvCVu(gv)) 5802 { 5803 return 0; /* any sub overrides "weak" keyword */ 5804 } 5805 else { /* no override */ 5806 key = -key; 5807 if (key == KEY_dump) { 5808 Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30"); 5809 } 5810 *pgv = NULL; 5811 *pgvp = 0; 5812 if (hgv && key != KEY_x) /* never ambiguous */ 5813 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 5814 "Ambiguous call resolved as CORE::%s(), " 5815 "qualify as such or use &", 5816 GvENAME(hgv)); 5817 return key; 5818 } 5819 } 5820 5821 static int 5822 yyl_qw(pTHX_ char *s, STRLEN len) 5823 { 5824 OP *words = NULL; 5825 5826 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 5827 if (!s) 5828 missingterm(NULL, 0); 5829 5830 COPLINE_SET_FROM_MULTI_END; 5831 PL_expect = XOPERATOR; 5832 if (SvCUR(PL_lex_stuff)) { 5833 int warned_comma = !ckWARN(WARN_QW); 5834 int warned_comment = warned_comma; 5835 char *d = SvPV_force(PL_lex_stuff, len); 5836 while (len) { 5837 for (; isSPACE(*d) && len; --len, ++d) 5838 /**/; 5839 if (len) { 5840 SV *sv; 5841 const char *b = d; 5842 if (!warned_comma || !warned_comment) { 5843 for (; !isSPACE(*d) && len; --len, ++d) { 5844 if (!warned_comma && *d == ',') { 5845 Perl_warner(aTHX_ packWARN(WARN_QW), 5846 "Possible attempt to separate words with commas"); 5847 ++warned_comma; 5848 } 5849 else if (!warned_comment && *d == '#') { 5850 Perl_warner(aTHX_ packWARN(WARN_QW), 5851 "Possible attempt to put comments in qw() list"); 5852 ++warned_comment; 5853 } 5854 } 5855 } 5856 else { 5857 for (; !isSPACE(*d) && len; --len, ++d) 5858 /**/; 5859 } 5860 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff)); 5861 words = op_append_elem(OP_LIST, words, 5862 newSVOP(OP_CONST, 0, tokeq(sv))); 5863 } 5864 } 5865 } 5866 if (!words) 5867 words = newNULLLIST(); 5868 SvREFCNT_dec_NN(PL_lex_stuff); 5869 PL_lex_stuff = NULL; 5870 PL_expect = XOPERATOR; 5871 pl_yylval.opval = sawparens(words); 5872 TOKEN(QWLIST); 5873 } 5874 5875 static int 5876 yyl_hyphen(pTHX_ char *s) 5877 { 5878 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) { 5879 I32 ftst = 0; 5880 char tmp; 5881 5882 s++; 5883 PL_bufptr = s; 5884 tmp = *s++; 5885 5886 while (s < PL_bufend && SPACE_OR_TAB(*s)) 5887 s++; 5888 5889 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) { 5890 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE); 5891 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } ); 5892 OPERATOR(PERLY_MINUS); /* unary minus */ 5893 } 5894 switch (tmp) { 5895 case 'r': ftst = OP_FTEREAD; break; 5896 case 'w': ftst = OP_FTEWRITE; break; 5897 case 'x': ftst = OP_FTEEXEC; break; 5898 case 'o': ftst = OP_FTEOWNED; break; 5899 case 'R': ftst = OP_FTRREAD; break; 5900 case 'W': ftst = OP_FTRWRITE; break; 5901 case 'X': ftst = OP_FTREXEC; break; 5902 case 'O': ftst = OP_FTROWNED; break; 5903 case 'e': ftst = OP_FTIS; break; 5904 case 'z': ftst = OP_FTZERO; break; 5905 case 's': ftst = OP_FTSIZE; break; 5906 case 'f': ftst = OP_FTFILE; break; 5907 case 'd': ftst = OP_FTDIR; break; 5908 case 'l': ftst = OP_FTLINK; break; 5909 case 'p': ftst = OP_FTPIPE; break; 5910 case 'S': ftst = OP_FTSOCK; break; 5911 case 'u': ftst = OP_FTSUID; break; 5912 case 'g': ftst = OP_FTSGID; break; 5913 case 'k': ftst = OP_FTSVTX; break; 5914 case 'b': ftst = OP_FTBLK; break; 5915 case 'c': ftst = OP_FTCHR; break; 5916 case 't': ftst = OP_FTTTY; break; 5917 case 'T': ftst = OP_FTTEXT; break; 5918 case 'B': ftst = OP_FTBINARY; break; 5919 case 'M': case 'A': case 'C': 5920 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV); 5921 switch (tmp) { 5922 case 'M': ftst = OP_FTMTIME; break; 5923 case 'A': ftst = OP_FTATIME; break; 5924 case 'C': ftst = OP_FTCTIME; break; 5925 default: break; 5926 } 5927 break; 5928 default: 5929 break; 5930 } 5931 if (ftst) { 5932 PL_last_uni = PL_oldbufptr; 5933 PL_last_lop_op = (OPCODE)ftst; 5934 DEBUG_T( { 5935 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp); 5936 } ); 5937 FTST(ftst); 5938 } 5939 else { 5940 /* Assume it was a minus followed by a one-letter named 5941 * subroutine call (or a -bareword), then. */ 5942 DEBUG_T( { 5943 PerlIO_printf(Perl_debug_log, 5944 "### '-%c' looked like a file test but was not\n", 5945 (int) tmp); 5946 } ); 5947 s = --PL_bufptr; 5948 } 5949 } 5950 { 5951 const char tmp = *s++; 5952 if (*s == tmp) { 5953 s++; 5954 if (PL_expect == XOPERATOR) 5955 TERM(POSTDEC); 5956 else 5957 OPERATOR(PREDEC); 5958 } 5959 else if (*s == '>') { 5960 s++; 5961 s = skipspace(s); 5962 if (((*s == '$' || *s == '&') && s[1] == '*') 5963 ||(*s == '$' && s[1] == '#' && s[2] == '*') 5964 ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1])) 5965 ||(*s == '*' && (s[1] == '*' || s[1] == '{')) 5966 ) 5967 { 5968 PL_expect = XPOSTDEREF; 5969 TOKEN(ARROW); 5970 } 5971 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 5972 s = force_word(s,METHCALL0,FALSE,TRUE); 5973 TOKEN(ARROW); 5974 } 5975 else if (*s == '$') 5976 OPERATOR(ARROW); 5977 else 5978 TERM(ARROW); 5979 } 5980 if (PL_expect == XOPERATOR) { 5981 if (*s == '=' 5982 && !PL_lex_allbrackets 5983 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5984 { 5985 s--; 5986 TOKEN(0); 5987 } 5988 Aop(OP_SUBTRACT); 5989 } 5990 else { 5991 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5992 check_uni(); 5993 OPERATOR(PERLY_MINUS); /* unary minus */ 5994 } 5995 } 5996 } 5997 5998 static int 5999 yyl_plus(pTHX_ char *s) 6000 { 6001 const char tmp = *s++; 6002 if (*s == tmp) { 6003 s++; 6004 if (PL_expect == XOPERATOR) 6005 TERM(POSTINC); 6006 else 6007 OPERATOR(PREINC); 6008 } 6009 if (PL_expect == XOPERATOR) { 6010 if (*s == '=' 6011 && !PL_lex_allbrackets 6012 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 6013 { 6014 s--; 6015 TOKEN(0); 6016 } 6017 Aop(OP_ADD); 6018 } 6019 else { 6020 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 6021 check_uni(); 6022 OPERATOR(PERLY_PLUS); 6023 } 6024 } 6025 6026 static int 6027 yyl_star(pTHX_ char *s) 6028 { 6029 if (PL_expect == XPOSTDEREF) 6030 POSTDEREF(PERLY_STAR); 6031 6032 if (PL_expect != XOPERATOR) { 6033 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE); 6034 PL_expect = XOPERATOR; 6035 force_ident(PL_tokenbuf, PERLY_STAR); 6036 if (!*PL_tokenbuf) 6037 PREREF(PERLY_STAR); 6038 TERM(PERLY_STAR); 6039 } 6040 6041 s++; 6042 if (*s == '*') { 6043 s++; 6044 if (*s == '=' && !PL_lex_allbrackets 6045 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 6046 { 6047 s -= 2; 6048 TOKEN(0); 6049 } 6050 PWop(OP_POW); 6051 } 6052 6053 if (*s == '=' 6054 && !PL_lex_allbrackets 6055 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 6056 { 6057 s--; 6058 TOKEN(0); 6059 } 6060 6061 Mop(OP_MULTIPLY); 6062 } 6063 6064 static int 6065 yyl_percent(pTHX_ char *s) 6066 { 6067 if (PL_expect == XOPERATOR) { 6068 if (s[1] == '=' 6069 && !PL_lex_allbrackets 6070 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 6071 { 6072 TOKEN(0); 6073 } 6074 ++s; 6075 Mop(OP_MODULO); 6076 } 6077 else if (PL_expect == XPOSTDEREF) 6078 POSTDEREF(PERLY_PERCENT_SIGN); 6079 6080 PL_tokenbuf[0] = '%'; 6081 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 6082 pl_yylval.ival = 0; 6083 if (!PL_tokenbuf[1]) { 6084 PREREF(PERLY_PERCENT_SIGN); 6085 } 6086 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 6087 && intuit_more(s, PL_bufend)) { 6088 if (*s == '[') 6089 PL_tokenbuf[0] = '@'; 6090 } 6091 PL_expect = XOPERATOR; 6092 force_ident_maybe_lex('%'); 6093 TERM(PERLY_PERCENT_SIGN); 6094 } 6095 6096 static int 6097 yyl_caret(pTHX_ char *s) 6098 { 6099 char *d = s; 6100 const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED); 6101 if (s[1] == '^') { 6102 s += 2; 6103 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6104 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 6105 s -= 2; 6106 TOKEN(0); 6107 } 6108 pl_yylval.ival = OP_XOR; 6109 OPERATOR(OROR); 6110 } 6111 if (bof && s[1] == '.') 6112 s++; 6113 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6114 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) 6115 { 6116 s = d; 6117 TOKEN(0); 6118 } 6119 s++; 6120 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR); 6121 } 6122 6123 static int 6124 yyl_colon(pTHX_ char *s) 6125 { 6126 OP *attrs; 6127 6128 switch (PL_expect) { 6129 case XOPERATOR: 6130 if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets)) 6131 break; 6132 PL_bufptr = s; /* update in case we back off */ 6133 if (*s == '=') { 6134 Perl_croak(aTHX_ 6135 "Use of := for an empty attribute list is not allowed"); 6136 } 6137 goto grabattrs; 6138 case XATTRBLOCK: 6139 PL_expect = XBLOCK; 6140 goto grabattrs; 6141 case XATTRTERM: 6142 PL_expect = XTERMBLOCK; 6143 grabattrs: 6144 /* NB: as well as parsing normal attributes, we also end up 6145 * here if there is something looking like attributes 6146 * following a signature (which is illegal, but used to be 6147 * legal in 5.20..5.26). If the latter, we still parse the 6148 * attributes so that error messages(s) are less confusing, 6149 * but ignore them (parser->sig_seen). 6150 */ 6151 s = skipspace(s); 6152 attrs = NULL; 6153 while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 6154 I32 tmp; 6155 SV *sv; 6156 STRLEN len; 6157 char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE); 6158 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) { 6159 if (tmp < 0) tmp = -tmp; 6160 switch (tmp) { 6161 case KEY_or: 6162 case KEY_and: 6163 case KEY_for: 6164 case KEY_foreach: 6165 case KEY_unless: 6166 case KEY_if: 6167 case KEY_while: 6168 case KEY_until: 6169 goto got_attrs; 6170 default: 6171 break; 6172 } 6173 } 6174 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0); 6175 if (*d == '(') { 6176 d = scan_str(d,TRUE,TRUE,FALSE,NULL); 6177 if (!d) { 6178 op_free(attrs); 6179 ASSUME(sv && SvREFCNT(sv) == 1); 6180 SvREFCNT_dec(sv); 6181 Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list"); 6182 } 6183 COPLINE_SET_FROM_MULTI_END; 6184 } 6185 if (PL_lex_stuff) { 6186 sv_catsv(sv, PL_lex_stuff); 6187 attrs = op_append_elem(OP_LIST, attrs, 6188 newSVOP(OP_CONST, 0, sv)); 6189 SvREFCNT_dec_NN(PL_lex_stuff); 6190 PL_lex_stuff = NULL; 6191 } 6192 else { 6193 attrs = op_append_elem(OP_LIST, attrs, 6194 newSVOP(OP_CONST, 0, sv)); 6195 } 6196 s = skipspace(d); 6197 if (*s == ':' && s[1] != ':') 6198 s = skipspace(s+1); 6199 else if (s == d) 6200 break; /* require real whitespace or :'s */ 6201 /* XXX losing whitespace on sequential attributes here */ 6202 } 6203 6204 if (*s != ';' 6205 && *s != '}' 6206 && !(PL_expect == XOPERATOR 6207 /* if an operator is expected, permit =, //= and ||= or ) to end */ 6208 ? (*s == '=' || *s == ')' || *s == '/' || *s == '|') 6209 : (*s == '{' || *s == '('))) 6210 { 6211 const char q = ((*s == '\'') ? '"' : '\''); 6212 /* If here for an expression, and parsed no attrs, back off. */ 6213 if (PL_expect == XOPERATOR && !attrs) { 6214 s = PL_bufptr; 6215 break; 6216 } 6217 /* MUST advance bufptr here to avoid bogus "at end of line" 6218 context messages from yyerror(). 6219 */ 6220 PL_bufptr = s; 6221 yyerror( (const char *) 6222 (*s 6223 ? Perl_form(aTHX_ "Invalid separator character " 6224 "%c%c%c in attribute list", q, *s, q) 6225 : "Unterminated attribute list" ) ); 6226 op_free(attrs); 6227 OPERATOR(PERLY_COLON); 6228 } 6229 6230 got_attrs: 6231 if (PL_parser->sig_seen) { 6232 /* see comment about about sig_seen and parser error 6233 * handling */ 6234 op_free(attrs); 6235 Perl_croak(aTHX_ "Subroutine attributes must come " 6236 "before the signature"); 6237 } 6238 if (attrs) { 6239 NEXTVAL_NEXTTOKE.opval = attrs; 6240 force_next(THING); 6241 } 6242 TOKEN(COLONATTR); 6243 } 6244 6245 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) { 6246 s--; 6247 TOKEN(0); 6248 } 6249 6250 PL_lex_allbrackets--; 6251 OPERATOR(PERLY_COLON); 6252 } 6253 6254 static int 6255 yyl_subproto(pTHX_ char *s, CV *cv) 6256 { 6257 STRLEN protolen = CvPROTOLEN(cv); 6258 const char *proto = CvPROTO(cv); 6259 bool optional; 6260 6261 proto = S_strip_spaces(aTHX_ proto, &protolen); 6262 if (!protolen) 6263 TERM(FUNC0SUB); 6264 if ((optional = *proto == ';')) { 6265 do { 6266 proto++; 6267 } while (*proto == ';'); 6268 } 6269 6270 if ( 6271 ( 6272 ( 6273 *proto == '$' || *proto == '_' 6274 || *proto == '*' || *proto == '+' 6275 ) 6276 && proto[1] == '\0' 6277 ) 6278 || ( 6279 *proto == '\\' && proto[1] && proto[2] == '\0' 6280 ) 6281 ) { 6282 UNIPROTO(UNIOPSUB,optional); 6283 } 6284 6285 if (*proto == '\\' && proto[1] == '[') { 6286 const char *p = proto + 2; 6287 while(*p && *p != ']') 6288 ++p; 6289 if(*p == ']' && !p[1]) 6290 UNIPROTO(UNIOPSUB,optional); 6291 } 6292 6293 if (*proto == '&' && *s == '{') { 6294 if (PL_curstash) 6295 sv_setpvs(PL_subname, "__ANON__"); 6296 else 6297 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 6298 if (!PL_lex_allbrackets 6299 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 6300 { 6301 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 6302 } 6303 PREBLOCK(LSTOPSUB); 6304 } 6305 6306 return KEY_NULL; 6307 } 6308 6309 static int 6310 yyl_leftcurly(pTHX_ char *s, const U8 formbrack) 6311 { 6312 char *d; 6313 if (PL_lex_brackets > 100) { 6314 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 6315 } 6316 6317 switch (PL_expect) { 6318 case XTERM: 6319 case XTERMORDORDOR: 6320 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6321 PL_lex_allbrackets++; 6322 OPERATOR(HASHBRACK); 6323 case XOPERATOR: 6324 while (s < PL_bufend && SPACE_OR_TAB(*s)) 6325 s++; 6326 d = s; 6327 PL_tokenbuf[0] = '\0'; 6328 if (d < PL_bufend && *d == '-') { 6329 PL_tokenbuf[0] = '-'; 6330 d++; 6331 while (d < PL_bufend && SPACE_OR_TAB(*d)) 6332 d++; 6333 } 6334 if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) { 6335 STRLEN len; 6336 d = scan_word6(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 6337 FALSE, &len, FALSE); 6338 while (d < PL_bufend && SPACE_OR_TAB(*d)) 6339 d++; 6340 if (*d == '}') { 6341 const char minus = (PL_tokenbuf[0] == '-'); 6342 s = force_word(s + minus, BAREWORD, FALSE, TRUE); 6343 if (minus) 6344 force_next(PERLY_MINUS); 6345 } 6346 } 6347 /* FALLTHROUGH */ 6348 case XATTRTERM: 6349 case XTERMBLOCK: 6350 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6351 PL_lex_allbrackets++; 6352 PL_expect = XSTATE; 6353 break; 6354 case XATTRBLOCK: 6355 case XBLOCK: 6356 PL_lex_brackstack[PL_lex_brackets++] = XSTATE; 6357 PL_lex_allbrackets++; 6358 PL_expect = XSTATE; 6359 break; 6360 case XBLOCKTERM: 6361 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 6362 PL_lex_allbrackets++; 6363 PL_expect = XSTATE; 6364 break; 6365 default: { 6366 const char *t; 6367 if (PL_oldoldbufptr == PL_last_lop) 6368 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 6369 else 6370 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6371 PL_lex_allbrackets++; 6372 s = skipspace(s); 6373 if (*s == '}') { 6374 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) { 6375 PL_expect = XTERM; 6376 /* This hack is to get the ${} in the message. */ 6377 PL_bufptr = s+1; 6378 yyerror("syntax error"); 6379 yyquit(); 6380 break; 6381 } 6382 OPERATOR(HASHBRACK); 6383 } 6384 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) { 6385 /* ${...} or @{...} etc., but not print {...} 6386 * Skip the disambiguation and treat this as a block. 6387 */ 6388 goto block_expectation; 6389 } 6390 /* This hack serves to disambiguate a pair of curlies 6391 * as being a block or an anon hash. Normally, expectation 6392 * determines that, but in cases where we're not in a 6393 * position to expect anything in particular (like inside 6394 * eval"") we have to resolve the ambiguity. This code 6395 * covers the case where the first term in the curlies is a 6396 * quoted string. Most other cases need to be explicitly 6397 * disambiguated by prepending a "+" before the opening 6398 * curly in order to force resolution as an anon hash. 6399 * 6400 * XXX should probably propagate the outer expectation 6401 * into eval"" to rely less on this hack, but that could 6402 * potentially break current behavior of eval"". 6403 * GSAR 97-07-21 6404 */ 6405 t = s; 6406 if (*s == '\'' || *s == '"' || *s == '`') { 6407 /* common case: get past first string, handling escapes */ 6408 for (t++; t < PL_bufend && *t != *s;) 6409 if (*t++ == '\\') 6410 t++; 6411 t++; 6412 } 6413 else if (*s == 'q') { 6414 if (++t < PL_bufend 6415 && (!isWORDCHAR(*t) 6416 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend 6417 && !isWORDCHAR(*t)))) 6418 { 6419 /* skip q//-like construct */ 6420 const char *tmps; 6421 char open, close, term; 6422 I32 brackets = 1; 6423 6424 while (t < PL_bufend && isSPACE(*t)) 6425 t++; 6426 /* check for q => */ 6427 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') { 6428 OPERATOR(HASHBRACK); 6429 } 6430 term = *t; 6431 open = term; 6432 if (term && (tmps = memCHRs("([{< )]}> )]}>",term))) 6433 term = tmps[5]; 6434 close = term; 6435 if (open == close) 6436 for (t++; t < PL_bufend; t++) { 6437 if (*t == '\\' && t+1 < PL_bufend && open != '\\') 6438 t++; 6439 else if (*t == open) 6440 break; 6441 } 6442 else { 6443 for (t++; t < PL_bufend; t++) { 6444 if (*t == '\\' && t+1 < PL_bufend) 6445 t++; 6446 else if (*t == close && --brackets <= 0) 6447 break; 6448 else if (*t == open) 6449 brackets++; 6450 } 6451 } 6452 t++; 6453 } 6454 else 6455 /* skip plain q word */ 6456 while ( t < PL_bufend 6457 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) 6458 { 6459 t += UTF ? UTF8SKIP(t) : 1; 6460 } 6461 } 6462 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) { 6463 t += UTF ? UTF8SKIP(t) : 1; 6464 while ( t < PL_bufend 6465 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) 6466 { 6467 t += UTF ? UTF8SKIP(t) : 1; 6468 } 6469 } 6470 while (t < PL_bufend && isSPACE(*t)) 6471 t++; 6472 /* if comma follows first term, call it an anon hash */ 6473 /* XXX it could be a comma expression with loop modifiers */ 6474 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s))) 6475 || (*t == '=' && t[1] == '>'))) 6476 OPERATOR(HASHBRACK); 6477 if (PL_expect == XREF) { 6478 block_expectation: 6479 /* If there is an opening brace or 'sub:', treat it 6480 as a term to make ${{...}}{k} and &{sub:attr...} 6481 dwim. Otherwise, treat it as a statement, so 6482 map {no strict; ...} works. 6483 */ 6484 s = skipspace(s); 6485 if (*s == '{') { 6486 PL_expect = XTERM; 6487 break; 6488 } 6489 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) { 6490 PL_bufptr = s; 6491 d = s + 3; 6492 d = skipspace(d); 6493 s = PL_bufptr; 6494 if (*d == ':') { 6495 PL_expect = XTERM; 6496 break; 6497 } 6498 } 6499 PL_expect = XSTATE; 6500 } 6501 else { 6502 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE; 6503 PL_expect = XSTATE; 6504 } 6505 } 6506 break; 6507 } 6508 6509 pl_yylval.ival = CopLINE(PL_curcop); 6510 PL_copline = NOLINE; /* invalidate current command line number */ 6511 TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN); 6512 } 6513 6514 static int 6515 yyl_rightcurly(pTHX_ char *s, const U8 formbrack) 6516 { 6517 assert(s != PL_bufend); 6518 s++; 6519 6520 if (PL_lex_brackets <= 0) 6521 /* diag_listed_as: Unmatched right %s bracket */ 6522 yyerror("Unmatched right curly bracket"); 6523 else 6524 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets]; 6525 6526 PL_lex_allbrackets--; 6527 6528 if (PL_lex_state == LEX_INTERPNORMAL) { 6529 if (PL_lex_brackets == 0) { 6530 if (PL_expect & XFAKEBRACK) { 6531 PL_expect &= XENUMMASK; 6532 PL_lex_state = LEX_INTERPEND; 6533 PL_bufptr = s; 6534 return yylex(); /* ignore fake brackets */ 6535 } 6536 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 6537 && SvEVALED(PL_lex_repl)) 6538 PL_lex_state = LEX_INTERPEND; 6539 else if (*s == '-' && s[1] == '>') 6540 PL_lex_state = LEX_INTERPENDMAYBE; 6541 else if (*s != '[' && *s != '{') 6542 PL_lex_state = LEX_INTERPEND; 6543 } 6544 } 6545 6546 if (PL_expect & XFAKEBRACK) { 6547 PL_expect &= XENUMMASK; 6548 PL_bufptr = s; 6549 return yylex(); /* ignore fake brackets */ 6550 } 6551 6552 force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE); 6553 if (formbrack) LEAVE_with_name("lex_format"); 6554 if (formbrack == 2) { /* means . where arguments were expected */ 6555 force_next(PERLY_SEMICOLON); 6556 TOKEN(FORMRBRACK); 6557 } 6558 6559 TOKEN(PERLY_SEMICOLON); 6560 } 6561 6562 static int 6563 yyl_ampersand(pTHX_ char *s) 6564 { 6565 if (PL_expect == XPOSTDEREF) 6566 POSTDEREF(PERLY_AMPERSAND); 6567 6568 s++; 6569 if (*s++ == '&') { 6570 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6571 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 6572 s -= 2; 6573 TOKEN(0); 6574 } 6575 AOPERATOR(ANDAND); 6576 } 6577 s--; 6578 6579 if (PL_expect == XOPERATOR) { 6580 char *d; 6581 bool bof; 6582 if ( PL_bufptr == PL_linestart 6583 && ckWARN(WARN_SEMICOLON) 6584 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) 6585 { 6586 CopLINE_dec(PL_curcop); 6587 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 6588 CopLINE_inc(PL_curcop); 6589 } 6590 d = s; 6591 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') 6592 s++; 6593 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6594 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 6595 s = d; 6596 s--; 6597 TOKEN(0); 6598 } 6599 if (d == s) 6600 BAop(bof ? OP_NBIT_AND : OP_BIT_AND); 6601 else 6602 BAop(OP_SBIT_AND); 6603 } 6604 6605 PL_tokenbuf[0] = '&'; 6606 s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE); 6607 pl_yylval.ival = (OPpENTERSUB_AMPER<<8); 6608 6609 if (PL_tokenbuf[1]) 6610 force_ident_maybe_lex('&'); 6611 else 6612 PREREF(PERLY_AMPERSAND); 6613 6614 TERM(PERLY_AMPERSAND); 6615 } 6616 6617 static int 6618 yyl_verticalbar(pTHX_ char *s) 6619 { 6620 char *d; 6621 bool bof; 6622 6623 s++; 6624 if (*s++ == '|') { 6625 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6626 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 6627 s -= 2; 6628 TOKEN(0); 6629 } 6630 pl_yylval.ival = OP_OR; 6631 AOPERATOR(OROR); 6632 } 6633 6634 s--; 6635 d = s; 6636 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') 6637 s++; 6638 6639 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6640 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 6641 s = d - 1; 6642 TOKEN(0); 6643 } 6644 6645 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR); 6646 } 6647 6648 static int 6649 yyl_bang(pTHX_ char *s) 6650 { 6651 const char tmp = *s++; 6652 if (tmp == '=') { 6653 /* was this !=~ where !~ was meant? 6654 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */ 6655 6656 if (*s == '~' && ckWARN(WARN_SYNTAX)) { 6657 const char *t = s+1; 6658 6659 while (t < PL_bufend && isSPACE(*t)) 6660 ++t; 6661 6662 if (*t == '/' || *t == '?' 6663 || ((*t == 'm' || *t == 's' || *t == 'y') 6664 && !isWORDCHAR(t[1])) 6665 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2]))) 6666 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6667 "!=~ should be !~"); 6668 } 6669 6670 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6671 s -= 2; 6672 TOKEN(0); 6673 } 6674 6675 ChEop(OP_NE); 6676 } 6677 6678 if (tmp == '~') 6679 PMop(OP_NOT); 6680 6681 s--; 6682 OPERATOR(PERLY_EXCLAMATION_MARK); 6683 } 6684 6685 static int 6686 yyl_snail(pTHX_ char *s) 6687 { 6688 if (PL_expect == XPOSTDEREF) 6689 POSTDEREF(PERLY_SNAIL); 6690 PL_tokenbuf[0] = '@'; 6691 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 6692 if (PL_expect == XOPERATOR) { 6693 char *d = s; 6694 if (PL_bufptr > s) { 6695 d = PL_bufptr-1; 6696 PL_bufptr = PL_oldbufptr; 6697 } 6698 no_op("Array", d); 6699 } 6700 pl_yylval.ival = 0; 6701 if (!PL_tokenbuf[1]) { 6702 PREREF(PERLY_SNAIL); 6703 } 6704 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) 6705 s = skipspace(s); 6706 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 6707 && intuit_more(s, PL_bufend)) 6708 { 6709 if (*s == '{') 6710 PL_tokenbuf[0] = '%'; 6711 6712 /* Warn about @ where they meant $. */ 6713 if (*s == '[' || *s == '{') { 6714 if (ckWARN(WARN_SYNTAX)) { 6715 S_check_scalar_slice(aTHX_ s); 6716 } 6717 } 6718 } 6719 PL_expect = XOPERATOR; 6720 force_ident_maybe_lex('@'); 6721 TERM(PERLY_SNAIL); 6722 } 6723 6724 static int 6725 yyl_slash(pTHX_ char *s) 6726 { 6727 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') { 6728 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6729 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) 6730 TOKEN(0); 6731 s += 2; 6732 AOPERATOR(DORDOR); 6733 } 6734 else if (PL_expect == XOPERATOR) { 6735 s++; 6736 if (*s == '=' && !PL_lex_allbrackets 6737 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 6738 { 6739 s--; 6740 TOKEN(0); 6741 } 6742 Mop(OP_DIVIDE); 6743 } 6744 else { 6745 /* Disable warning on "study /blah/" */ 6746 if ( PL_oldoldbufptr == PL_last_uni 6747 && ( *PL_last_uni != 's' || s - PL_last_uni < 5 6748 || memNE(PL_last_uni, "study", 5) 6749 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF) 6750 )) 6751 check_uni(); 6752 s = scan_pat(s,OP_MATCH); 6753 TERM(sublex_start()); 6754 } 6755 } 6756 6757 static int 6758 yyl_leftsquare(pTHX_ char *s) 6759 { 6760 if (PL_lex_brackets > 100) 6761 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 6762 PL_lex_brackstack[PL_lex_brackets++] = 0; 6763 PL_lex_allbrackets++; 6764 s++; 6765 OPERATOR(PERLY_BRACKET_OPEN); 6766 } 6767 6768 static int 6769 yyl_rightsquare(pTHX_ char *s) 6770 { 6771 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 6772 TOKEN(0); 6773 s++; 6774 if (PL_lex_brackets <= 0) 6775 /* diag_listed_as: Unmatched right %s bracket */ 6776 yyerror("Unmatched right square bracket"); 6777 else 6778 --PL_lex_brackets; 6779 PL_lex_allbrackets--; 6780 if (PL_lex_state == LEX_INTERPNORMAL) { 6781 if (PL_lex_brackets == 0) { 6782 if (*s == '-' && s[1] == '>') 6783 PL_lex_state = LEX_INTERPENDMAYBE; 6784 else if (*s != '[' && *s != '{') 6785 PL_lex_state = LEX_INTERPEND; 6786 } 6787 } 6788 TERM(PERLY_BRACKET_CLOSE); 6789 } 6790 6791 static int 6792 yyl_tilde(pTHX_ char *s) 6793 { 6794 bool bof; 6795 if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) { 6796 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 6797 TOKEN(0); 6798 s += 2; 6799 Perl_ck_warner_d(aTHX_ 6800 packWARN(WARN_DEPRECATED__SMARTMATCH), 6801 "Smartmatch is deprecated"); 6802 NCEop(OP_SMARTMATCH); 6803 } 6804 s++; 6805 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') { 6806 s++; 6807 BCop(OP_SCOMPLEMENT); 6808 } 6809 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT); 6810 } 6811 6812 static int 6813 yyl_leftparen(pTHX_ char *s) 6814 { 6815 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr) 6816 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */ 6817 else 6818 PL_expect = XTERM; 6819 s = skipspace(s); 6820 PL_lex_allbrackets++; 6821 TOKEN(PERLY_PAREN_OPEN); 6822 } 6823 6824 static int 6825 yyl_rightparen(pTHX_ char *s) 6826 { 6827 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) 6828 TOKEN(0); 6829 s++; 6830 PL_lex_allbrackets--; 6831 s = skipspace(s); 6832 if (*s == '{') 6833 PREBLOCK(PERLY_PAREN_CLOSE); 6834 TERM(PERLY_PAREN_CLOSE); 6835 } 6836 6837 static int 6838 yyl_leftpointy(pTHX_ char *s) 6839 { 6840 char tmp; 6841 6842 if (PL_expect != XOPERATOR) { 6843 if (s[1] != '<' && !memchr(s,'>', PL_bufend - s)) 6844 check_uni(); 6845 if (s[1] == '<' && s[2] != '>') 6846 s = scan_heredoc(s); 6847 else 6848 s = scan_inputsymbol(s); 6849 PL_expect = XOPERATOR; 6850 TOKEN(sublex_start()); 6851 } 6852 6853 s++; 6854 6855 tmp = *s++; 6856 if (tmp == '<') { 6857 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6858 s -= 2; 6859 TOKEN(0); 6860 } 6861 SHop(OP_LEFT_SHIFT); 6862 } 6863 if (tmp == '=') { 6864 tmp = *s++; 6865 if (tmp == '>') { 6866 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6867 s -= 3; 6868 TOKEN(0); 6869 } 6870 NCEop(OP_NCMP); 6871 } 6872 s--; 6873 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6874 s -= 2; 6875 TOKEN(0); 6876 } 6877 ChRop(OP_LE); 6878 } 6879 6880 s--; 6881 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6882 s--; 6883 TOKEN(0); 6884 } 6885 6886 ChRop(OP_LT); 6887 } 6888 6889 static int 6890 yyl_rightpointy(pTHX_ char *s) 6891 { 6892 const char tmp = *s++; 6893 6894 if (tmp == '>') { 6895 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6896 s -= 2; 6897 TOKEN(0); 6898 } 6899 SHop(OP_RIGHT_SHIFT); 6900 } 6901 else if (tmp == '=') { 6902 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6903 s -= 2; 6904 TOKEN(0); 6905 } 6906 ChRop(OP_GE); 6907 } 6908 6909 s--; 6910 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6911 s--; 6912 TOKEN(0); 6913 } 6914 6915 ChRop(OP_GT); 6916 } 6917 6918 static int 6919 yyl_sglquote(pTHX_ char *s) 6920 { 6921 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6922 if (!s) 6923 missingterm(NULL, 0); 6924 COPLINE_SET_FROM_MULTI_END; 6925 DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); 6926 if (PL_expect == XOPERATOR) { 6927 no_op("String",s); 6928 } 6929 pl_yylval.ival = OP_CONST; 6930 TERM(sublex_start()); 6931 } 6932 6933 static int 6934 yyl_dblquote(pTHX_ char *s) 6935 { 6936 char *d; 6937 STRLEN len; 6938 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6939 DEBUG_T( { 6940 if (s) 6941 printbuf("### Saw string before %s\n", s); 6942 else 6943 PerlIO_printf(Perl_debug_log, 6944 "### Saw unterminated string\n"); 6945 } ); 6946 if (PL_expect == XOPERATOR) { 6947 no_op("String",s); 6948 } 6949 if (!s) 6950 missingterm(NULL, 0); 6951 pl_yylval.ival = OP_CONST; 6952 /* FIXME. I think that this can be const if char *d is replaced by 6953 more localised variables. */ 6954 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) { 6955 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) { 6956 pl_yylval.ival = OP_STRINGIFY; 6957 break; 6958 } 6959 } 6960 if (pl_yylval.ival == OP_CONST) 6961 COPLINE_SET_FROM_MULTI_END; 6962 TERM(sublex_start()); 6963 } 6964 6965 static int 6966 yyl_backtick(pTHX_ char *s) 6967 { 6968 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6969 DEBUG_T( { 6970 if (s) 6971 printbuf("### Saw backtick string before %s\n", s); 6972 else 6973 PerlIO_printf(Perl_debug_log, 6974 "### Saw unterminated backtick string\n"); 6975 } ); 6976 if (PL_expect == XOPERATOR) 6977 no_op("Backticks",s); 6978 if (!s) 6979 missingterm(NULL, 0); 6980 pl_yylval.ival = OP_BACKTICK; 6981 TERM(sublex_start()); 6982 } 6983 6984 static int 6985 yyl_backslash(pTHX_ char *s) 6986 { 6987 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s)) 6988 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", 6989 *s, *s); 6990 if (PL_expect == XOPERATOR) 6991 no_op("Backslash",s); 6992 OPERATOR(REFGEN); 6993 } 6994 6995 static void 6996 yyl_data_handle(pTHX) 6997 { 6998 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash 6999 ? PL_curstash 7000 : PL_defstash; 7001 GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1); 7002 7003 if (!isGV(gv)) 7004 gv_init(gv,stash,"DATA",4,0); 7005 7006 GvMULTI_on(gv); 7007 if (!GvIO(gv)) 7008 GvIOp(gv) = newIO(); 7009 IoIFP(GvIOp(gv)) = PL_rsfp; 7010 7011 /* Mark this internal pseudo-handle as clean */ 7012 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT; 7013 if ((PerlIO*)PL_rsfp == PerlIO_stdin()) 7014 IoTYPE(GvIOp(gv)) = IoTYPE_STD; 7015 else 7016 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY; 7017 7018 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS) 7019 /* if the script was opened in binmode, we need to revert 7020 * it to text mode for compatibility; but only iff it has CRs 7021 * XXX this is a questionable hack at best. */ 7022 if (PL_bufend-PL_bufptr > 2 7023 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r') 7024 { 7025 Off_t loc = 0; 7026 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) { 7027 loc = PerlIO_tell(PL_rsfp); 7028 (void)PerlIO_seek(PL_rsfp, 0L, 0); 7029 } 7030 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) { 7031 if (loc > 0) 7032 PerlIO_seek(PL_rsfp, loc, 0); 7033 } 7034 } 7035 #endif 7036 7037 #ifdef PERLIO_LAYERS 7038 if (!IN_BYTES) { 7039 if (UTF) 7040 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); 7041 } 7042 #endif 7043 7044 PL_rsfp = NULL; 7045 } 7046 7047 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*) 7048 __attribute__noreturn__; 7049 7050 PERL_STATIC_NO_RET void 7051 yyl_croak_unrecognised(pTHX_ char *s) 7052 { 7053 SV *dsv = newSVpvs_flags("", SVs_TEMP); 7054 const char *c; 7055 char *d; 7056 STRLEN len; 7057 7058 if (UTF) { 7059 STRLEN skiplen = UTF8SKIP(s); 7060 STRLEN stravail = PL_bufend - s; 7061 c = sv_uni_display(dsv, newSVpvn_flags(s, 7062 skiplen > stravail ? stravail : skiplen, 7063 SVs_TEMP | SVf_UTF8), 7064 10, UNI_DISPLAY_ISPRINT); 7065 } 7066 else { 7067 c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s); 7068 } 7069 7070 if (s >= PL_linestart) { 7071 d = PL_linestart; 7072 } 7073 else { 7074 /* somehow (probably due to a parse failure), PL_linestart has advanced 7075 * pass PL_bufptr, get a reasonable beginning of line 7076 */ 7077 d = s; 7078 while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n') 7079 --d; 7080 } 7081 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d); 7082 if (len > UNRECOGNIZED_PRECEDE_COUNT) { 7083 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT; 7084 } 7085 7086 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c, 7087 UTF8fARG(UTF, (s - d), d), 7088 (int) len + 1); 7089 } 7090 7091 static int 7092 yyl_require(pTHX_ char *s, I32 orig_keyword) 7093 { 7094 s = skipspace(s); 7095 if (isDIGIT(*s)) { 7096 s = force_version(s, FALSE); 7097 } 7098 else if (*s != 'v' || !isDIGIT(s[1]) 7099 || (s = force_version(s, TRUE), *s == 'v')) 7100 { 7101 *PL_tokenbuf = '\0'; 7102 s = force_word(s,BAREWORD,TRUE,TRUE); 7103 if (isIDFIRST_lazy_if_safe(PL_tokenbuf, 7104 PL_tokenbuf + sizeof(PL_tokenbuf), 7105 UTF)) 7106 { 7107 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), 7108 GV_ADD | (UTF ? SVf_UTF8 : 0)); 7109 } 7110 else if (*s == '<') 7111 yyerror("<> at require-statement should be quotes"); 7112 } 7113 7114 if (orig_keyword == KEY_require) 7115 pl_yylval.ival = 1; 7116 else 7117 pl_yylval.ival = 0; 7118 7119 PL_expect = PL_nexttoke ? XOPERATOR : XTERM; 7120 PL_bufptr = s; 7121 PL_last_uni = PL_oldbufptr; 7122 PL_last_lop_op = OP_REQUIRE; 7123 s = skipspace(s); 7124 return REPORT( (int)KW_REQUIRE ); 7125 } 7126 7127 static int 7128 yyl_foreach(pTHX_ char *s) 7129 { 7130 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 7131 return REPORT(0); 7132 pl_yylval.ival = CopLINE(PL_curcop); 7133 s = skipspace(s); 7134 if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 7135 char *p = s; 7136 SSize_t s_off = s - SvPVX(PL_linestr); 7137 bool paren_is_valid = FALSE; 7138 bool maybe_package = FALSE; 7139 bool saw_core = FALSE; 7140 bool core_valid = FALSE; 7141 7142 if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) { 7143 saw_core = TRUE; 7144 p += 6; 7145 } 7146 if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) { 7147 core_valid = TRUE; 7148 paren_is_valid = TRUE; 7149 if (isSPACE(p[2])) { 7150 p = skipspace(p + 3); 7151 maybe_package = TRUE; 7152 } 7153 else { 7154 p += 2; 7155 } 7156 } 7157 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) { 7158 core_valid = TRUE; 7159 if (isSPACE(p[3])) { 7160 p = skipspace(p + 4); 7161 maybe_package = TRUE; 7162 } 7163 else { 7164 p += 3; 7165 } 7166 } 7167 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) { 7168 core_valid = TRUE; 7169 if (isSPACE(p[5])) { 7170 p = skipspace(p + 6); 7171 } 7172 else { 7173 p += 5; 7174 } 7175 } 7176 if (saw_core && !core_valid) { 7177 Perl_croak(aTHX_ "Missing $ on loop variable"); 7178 } 7179 7180 if (maybe_package && !saw_core) { 7181 /* skip optional package name, as in "for my abc $x (..)" */ 7182 if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) { 7183 STRLEN len; 7184 p = scan_word6(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE); 7185 p = skipspace(p); 7186 paren_is_valid = FALSE; 7187 } 7188 } 7189 7190 if (UNLIKELY(paren_is_valid && *p == '(')) { 7191 ; /* fine - this is foreach my (list) */ 7192 } 7193 else if (UNLIKELY(*p != '$' && *p != '\\')) { 7194 /* "for myfoo (" will end up here, but with p pointing at the 'f' */ 7195 Perl_croak(aTHX_ "Missing $ on loop variable"); 7196 } 7197 /* The buffer may have been reallocated, update s */ 7198 s = SvPVX(PL_linestr) + s_off; 7199 } 7200 OPERATOR(KW_FOR); 7201 } 7202 7203 static int 7204 yyl_do(pTHX_ char *s, I32 orig_keyword) 7205 { 7206 s = skipspace(s); 7207 if (*s == '{') 7208 PRETERMBLOCK(KW_DO); 7209 if (*s != '\'') { 7210 char *d; 7211 STRLEN len; 7212 *PL_tokenbuf = '&'; 7213 d = scan_word6(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 7214 1, &len, TRUE); 7215 if (len && memNEs(PL_tokenbuf+1, len, "CORE") 7216 && !keyword(PL_tokenbuf + 1, len, 0)) { 7217 SSize_t off = s-SvPVX(PL_linestr); 7218 d = skipspace(d); 7219 s = SvPVX(PL_linestr)+off; 7220 if (*d == '(') { 7221 force_ident_maybe_lex('&'); 7222 s = d; 7223 } 7224 } 7225 } 7226 if (orig_keyword == KEY_do) 7227 pl_yylval.ival = 1; 7228 else 7229 pl_yylval.ival = 0; 7230 OPERATOR(KW_DO); 7231 } 7232 7233 static int 7234 yyl_my(pTHX_ char *s, I32 my) 7235 { 7236 if (PL_in_my) { 7237 PL_bufptr = s; 7238 yyerror(Perl_form(aTHX_ 7239 "Can't redeclare \"%s\" in \"%s\"", 7240 my == KEY_my ? "my" : 7241 my == KEY_state ? "state" : "our", 7242 PL_in_my == KEY_my ? "my" : 7243 PL_in_my == KEY_state ? "state" : "our")); 7244 } 7245 PL_in_my = (U16)my; 7246 s = skipspace(s); 7247 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 7248 STRLEN len; 7249 s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE); 7250 if (memEQs(PL_tokenbuf, len, "sub")) 7251 return yyl_sub(aTHX_ s, my); 7252 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len); 7253 if (!PL_in_my_stash) { 7254 char tmpbuf[1024]; 7255 int i; 7256 PL_bufptr = s; 7257 i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf); 7258 PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf)); 7259 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0); 7260 } 7261 } 7262 else if (*s == '\\') { 7263 if (!FEATURE_MYREF_IS_ENABLED) 7264 Perl_croak(aTHX_ "The experimental declared_refs " 7265 "feature is not enabled"); 7266 Perl_ck_warner_d(aTHX_ 7267 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS), 7268 "Declaring references is experimental"); 7269 } 7270 OPERATOR(KW_MY); 7271 } 7272 7273 static int yyl_try(pTHX_ char*); 7274 7275 static bool 7276 yyl_eol_needs_semicolon(pTHX_ char **ps) 7277 { 7278 char *s = *ps; 7279 if (PL_lex_state != LEX_NORMAL 7280 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) 7281 { 7282 const bool in_comment = *s == '#'; 7283 char *d; 7284 if (*s == '#' && s == PL_linestart && PL_in_eval 7285 && !PL_rsfp && !PL_parser->filtered) { 7286 /* handle eval qq[#line 1 "foo"\n ...] */ 7287 CopLINE_dec(PL_curcop); 7288 incline(s, PL_bufend); 7289 } 7290 d = s; 7291 while (d < PL_bufend && *d != '\n') 7292 d++; 7293 if (d < PL_bufend) 7294 d++; 7295 s = d; 7296 if (in_comment && d == PL_bufend 7297 && PL_lex_state == LEX_INTERPNORMAL 7298 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 7299 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--; 7300 else 7301 incline(s, PL_bufend); 7302 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 7303 PL_lex_state = LEX_FORMLINE; 7304 force_next(FORMRBRACK); 7305 *ps = s; 7306 return TRUE; 7307 } 7308 } 7309 else { 7310 while (s < PL_bufend && *s != '\n') 7311 s++; 7312 if (s < PL_bufend) { 7313 s++; 7314 if (s < PL_bufend) 7315 incline(s, PL_bufend); 7316 } 7317 } 7318 *ps = s; 7319 return FALSE; 7320 } 7321 7322 static int 7323 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s) 7324 { 7325 char *d; 7326 7327 goto start; 7328 7329 do { 7330 fake_eof = 0; 7331 bof = cBOOL(PL_rsfp); 7332 start: 7333 7334 PL_bufptr = PL_bufend; 7335 COPLINE_INC_WITH_HERELINES; 7336 if (!lex_next_chunk(fake_eof)) { 7337 CopLINE_dec(PL_curcop); 7338 s = PL_bufptr; 7339 TOKEN(PERLY_SEMICOLON); /* not infinite loop because rsfp is NULL now */ 7340 } 7341 CopLINE_dec(PL_curcop); 7342 s = PL_bufptr; 7343 /* If it looks like the start of a BOM or raw UTF-16, 7344 * check if it in fact is. */ 7345 if (bof && PL_rsfp 7346 && ( *s == 0 7347 || *(U8*)s == BOM_UTF8_FIRST_BYTE 7348 || *(U8*)s >= 0xFE 7349 || s[1] == 0)) 7350 { 7351 Off_t offset = (IV)PerlIO_tell(PL_rsfp); 7352 bof = (offset == (Off_t)SvCUR(PL_linestr)); 7353 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS) 7354 /* offset may include swallowed CR */ 7355 if (!bof) 7356 bof = (offset == (Off_t)SvCUR(PL_linestr)+1); 7357 #endif 7358 if (bof) { 7359 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 7360 s = swallow_bom((U8*)s); 7361 } 7362 } 7363 if (PL_parser->in_pod) { 7364 /* Incest with pod. */ 7365 if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut") 7366 && !isALPHA(s[4])) 7367 { 7368 SvPVCLEAR(PL_linestr); 7369 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 7370 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 7371 PL_last_lop = PL_last_uni = NULL; 7372 PL_parser->in_pod = 0; 7373 } 7374 } 7375 if (PL_rsfp || PL_parser->filtered) 7376 incline(s, PL_bufend); 7377 } while (PL_parser->in_pod); 7378 7379 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; 7380 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 7381 PL_last_lop = PL_last_uni = NULL; 7382 if (CopLINE(PL_curcop) == 1) { 7383 while (s < PL_bufend && isSPACE(*s)) 7384 s++; 7385 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */ 7386 s++; 7387 d = NULL; 7388 if (!PL_in_eval) { 7389 if (*s == '#' && *(s+1) == '!') 7390 d = s + 2; 7391 #ifdef ALTERNATE_SHEBANG 7392 else { 7393 static char const as[] = ALTERNATE_SHEBANG; 7394 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1)) 7395 d = s + (sizeof(as) - 1); 7396 } 7397 #endif /* ALTERNATE_SHEBANG */ 7398 } 7399 if (d) { 7400 char *ipath; 7401 char *ipathend; 7402 7403 while (isSPACE(*d)) 7404 d++; 7405 ipath = d; 7406 while (*d && !isSPACE(*d)) 7407 d++; 7408 ipathend = d; 7409 7410 #ifdef ARG_ZERO_IS_SCRIPT 7411 if (ipathend > ipath) { 7412 /* 7413 * HP-UX (at least) sets argv[0] to the script name, 7414 * which makes $^X incorrect. And Digital UNIX and Linux, 7415 * at least, set argv[0] to the basename of the Perl 7416 * interpreter. So, having found "#!", we'll set it right. 7417 */ 7418 SV* copfilesv = CopFILESV(PL_curcop); 7419 if (copfilesv) { 7420 SV * const x = 7421 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL, 7422 SVt_PV)); /* $^X */ 7423 assert(SvPOK(x) || SvGMAGICAL(x)); 7424 if (sv_eq(x, copfilesv)) { 7425 sv_setpvn(x, ipath, ipathend - ipath); 7426 SvSETMAGIC(x); 7427 } 7428 else { 7429 STRLEN blen; 7430 STRLEN llen; 7431 const char *bstart = SvPV_const(copfilesv, blen); 7432 const char * const lstart = SvPV_const(x, llen); 7433 if (llen < blen) { 7434 bstart += blen - llen; 7435 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') { 7436 sv_setpvn(x, ipath, ipathend - ipath); 7437 SvSETMAGIC(x); 7438 } 7439 } 7440 } 7441 } 7442 else { 7443 /* Anything to do if no copfilesv? */ 7444 } 7445 TAINT_NOT; /* $^X is always tainted, but that's OK */ 7446 } 7447 #endif /* ARG_ZERO_IS_SCRIPT */ 7448 7449 /* 7450 * Look for options. 7451 */ 7452 d = instr(s,"perl -"); 7453 if (!d) { 7454 d = instr(s,"perl"); 7455 #if defined(DOSISH) 7456 /* avoid getting into infinite loops when shebang 7457 * line contains "Perl" rather than "perl" */ 7458 if (!d) { 7459 for (d = ipathend-4; d >= ipath; --d) { 7460 if (isALPHA_FOLD_EQ(*d, 'p') 7461 && !ibcmp(d, "perl", 4)) 7462 { 7463 break; 7464 } 7465 } 7466 if (d < ipath) 7467 d = NULL; 7468 } 7469 #endif 7470 } 7471 #ifdef ALTERNATE_SHEBANG 7472 /* 7473 * If the ALTERNATE_SHEBANG on this system starts with a 7474 * character that can be part of a Perl expression, then if 7475 * we see it but not "perl", we're probably looking at the 7476 * start of Perl code, not a request to hand off to some 7477 * other interpreter. Similarly, if "perl" is there, but 7478 * not in the first 'word' of the line, we assume the line 7479 * contains the start of the Perl program. 7480 */ 7481 if (d && *s != '#') { 7482 const char *c = ipath; 7483 while (*c && !memCHRs("; \t\r\n\f\v#", *c)) 7484 c++; 7485 if (c < d) 7486 d = NULL; /* "perl" not in first word; ignore */ 7487 else 7488 *s = '#'; /* Don't try to parse shebang line */ 7489 } 7490 #endif /* ALTERNATE_SHEBANG */ 7491 if (!d 7492 && *s == '#' 7493 && ipathend > ipath 7494 && !PL_minus_c 7495 && !instr(s,"indir") 7496 && instr(PL_origargv[0],"perl")) 7497 { 7498 char **newargv; 7499 7500 *ipathend = '\0'; 7501 s = ipathend + 1; 7502 while (s < PL_bufend && isSPACE(*s)) 7503 s++; 7504 if (s < PL_bufend) { 7505 Newx(newargv,PL_origargc+3,char*); 7506 newargv[1] = s; 7507 while (s < PL_bufend && !isSPACE(*s)) 7508 s++; 7509 *s = '\0'; 7510 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*); 7511 } 7512 else 7513 newargv = PL_origargv; 7514 newargv[0] = ipath; 7515 PERL_FPU_PRE_EXEC 7516 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv)); 7517 PERL_FPU_POST_EXEC 7518 Perl_croak(aTHX_ "Can't exec %s", ipath); 7519 } 7520 if (d) { 7521 while (*d && !isSPACE(*d)) 7522 d++; 7523 while (SPACE_OR_TAB(*d)) 7524 d++; 7525 7526 if (*d++ == '-') { 7527 const bool switches_done = PL_doswitches; 7528 const U32 oldpdb = PL_perldb; 7529 const bool oldn = PL_minus_n; 7530 const bool oldp = PL_minus_p; 7531 const char *d1 = d; 7532 7533 do { 7534 bool baduni = FALSE; 7535 if (*d1 == 'C') { 7536 const char *d2 = d1 + 1; 7537 if (parse_unicode_opts((const char **)&d2) 7538 != PL_unicode) 7539 baduni = TRUE; 7540 } 7541 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) { 7542 const char * const m = d1; 7543 while (*d1 && !isSPACE(*d1)) 7544 d1++; 7545 Perl_croak(aTHX_ "Too late for \"-%.*s\" option", 7546 (int)(d1 - m), m); 7547 } 7548 d1 = moreswitches(d1); 7549 } while (d1); 7550 if (PL_doswitches && !switches_done) { 7551 int argc = PL_origargc; 7552 char **argv = PL_origargv; 7553 do { 7554 argc--,argv++; 7555 } while (argc && argv[0][0] == '-' && argv[0][1]); 7556 init_argv_symbols(argc,argv); 7557 } 7558 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb) 7559 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp))) 7560 /* if we have already added "LINE: while (<>) {", 7561 we must not do it again */ 7562 { 7563 SvPVCLEAR(PL_linestr); 7564 PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 7565 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 7566 PL_last_lop = PL_last_uni = NULL; 7567 PL_preambled = FALSE; 7568 if (PERLDB_LINE_OR_SAVESRC) 7569 (void)gv_fetchfile(PL_origfilename); 7570 return YYL_RETRY; 7571 } 7572 } 7573 } 7574 } 7575 } 7576 7577 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 7578 PL_lex_state = LEX_FORMLINE; 7579 force_next(FORMRBRACK); 7580 TOKEN(PERLY_SEMICOLON); 7581 } 7582 7583 PL_bufptr = s; 7584 return YYL_RETRY; 7585 } 7586 7587 static int 7588 yyl_fatcomma(pTHX_ char *s, STRLEN len) 7589 { 7590 CLINE; 7591 pl_yylval.opval 7592 = newSVOP(OP_CONST, 0, 7593 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 7594 pl_yylval.opval->op_private = OPpCONST_BARE; 7595 TERM(BAREWORD); 7596 } 7597 7598 static int 7599 yyl_safe_bareword(pTHX_ char *s, const char lastchar) 7600 { 7601 if ((lastchar == '*' || lastchar == '%' || lastchar == '&') 7602 && PL_parser->saw_infix_sigil) 7603 { 7604 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7605 "Operator or semicolon missing before %c%" UTF8f, 7606 lastchar, 7607 UTF8fARG(UTF, strlen(PL_tokenbuf), 7608 PL_tokenbuf)); 7609 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7610 "Ambiguous use of %c resolved as operator %c", 7611 lastchar, lastchar); 7612 } 7613 TOKEN(BAREWORD); 7614 } 7615 7616 static int 7617 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off) 7618 { 7619 if (sv) { 7620 op_free(rv2cv_op); 7621 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); 7622 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); 7623 if (SvTYPE(sv) == SVt_PVAV) 7624 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS, 7625 pl_yylval.opval); 7626 else { 7627 pl_yylval.opval->op_private = 0; 7628 pl_yylval.opval->op_folded = 1; 7629 pl_yylval.opval->op_flags |= OPf_SPECIAL; 7630 } 7631 TOKEN(BAREWORD); 7632 } 7633 7634 op_free(pl_yylval.opval); 7635 pl_yylval.opval = 7636 off ? newCVREF(0, rv2cv_op) : rv2cv_op; 7637 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; 7638 PL_last_lop = PL_oldbufptr; 7639 PL_last_lop_op = OP_ENTERSUB; 7640 7641 /* Is there a prototype? */ 7642 if (SvPOK(cv)) { 7643 int k = yyl_subproto(aTHX_ s, cv); 7644 if (k != KEY_NULL) 7645 return k; 7646 } 7647 7648 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 7649 PL_expect = XTERM; 7650 force_next(off ? PRIVATEREF : BAREWORD); 7651 if (!PL_lex_allbrackets 7652 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7653 { 7654 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7655 } 7656 7657 TOKEN(NOAMP); 7658 } 7659 7660 /* Honour "reserved word" warnings, and enforce strict subs */ 7661 static void 7662 yyl_strictwarn_bareword(pTHX_ const char lastchar) 7663 { 7664 /* after "print" and similar functions (corresponding to 7665 * "F? L" in opcode.pl), whatever wasn't already parsed as 7666 * a filehandle should be subject to "strict subs". 7667 * Likewise for the optional indirect-object argument to system 7668 * or exec, which can't be a bareword */ 7669 if ((PL_last_lop_op == OP_PRINT 7670 || PL_last_lop_op == OP_PRTF 7671 || PL_last_lop_op == OP_SAY 7672 || PL_last_lop_op == OP_SYSTEM 7673 || PL_last_lop_op == OP_EXEC) 7674 && (PL_hints & HINT_STRICT_SUBS)) 7675 { 7676 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7677 } 7678 7679 if (lastchar != '-' && ckWARN(WARN_RESERVED)) { 7680 char *d = PL_tokenbuf; 7681 while (isLOWER(*d)) 7682 d++; 7683 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) { 7684 /* PL_warn_reserved is constant */ 7685 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); 7686 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved, 7687 PL_tokenbuf); 7688 GCC_DIAG_RESTORE_STMT; 7689 } 7690 } 7691 } 7692 7693 static int 7694 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c) 7695 { 7696 int pkgname = 0; 7697 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); 7698 bool safebw; 7699 bool no_op_error = FALSE; 7700 /* Use this var to track whether intuit_method has been 7701 called. intuit_method returns 0 or > 255. */ 7702 int key = 1; 7703 7704 if (PL_expect == XOPERATOR) { 7705 if (PL_bufptr == PL_linestart) { 7706 CopLINE_dec(PL_curcop); 7707 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 7708 CopLINE_inc(PL_curcop); 7709 } 7710 else 7711 /* We want to call no_op with s pointing after the 7712 bareword, so defer it. But we want it to come 7713 before the Bad name croak. */ 7714 no_op_error = TRUE; 7715 } 7716 7717 /* Get the rest if it looks like a package qualifier */ 7718 7719 if (*s == '\'' || (*s == ':' && s[1] == ':')) { 7720 STRLEN morelen; 7721 s = scan_word6(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, 7722 TRUE, &morelen, TRUE); 7723 if (no_op_error) { 7724 no_op("Bareword",s); 7725 no_op_error = FALSE; 7726 } 7727 if (!morelen) 7728 Perl_croak(aTHX_ "Bad name after %" UTF8f "%s", 7729 UTF8fARG(UTF, len, PL_tokenbuf), 7730 *s == '\'' ? "'" : "::"); 7731 len += morelen; 7732 pkgname = 1; 7733 } 7734 7735 if (no_op_error) 7736 no_op("Bareword",s); 7737 7738 /* See if the name is "Foo::", 7739 in which case Foo is a bareword 7740 (and a package name). */ 7741 7742 if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') { 7743 if (ckWARN(WARN_BAREWORD) 7744 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV)) 7745 Perl_warner(aTHX_ packWARN(WARN_BAREWORD), 7746 "Bareword \"%" UTF8f 7747 "\" refers to nonexistent package", 7748 UTF8fARG(UTF, len, PL_tokenbuf)); 7749 len -= 2; 7750 PL_tokenbuf[len] = '\0'; 7751 c.gv = NULL; 7752 c.gvp = 0; 7753 safebw = TRUE; 7754 } 7755 else { 7756 safebw = FALSE; 7757 } 7758 7759 /* if we saw a global override before, get the right name */ 7760 7761 if (!c.sv) 7762 c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len); 7763 if (c.gvp) { 7764 SV *sv = newSVpvs("CORE::GLOBAL::"); 7765 sv_catsv(sv, c.sv); 7766 SvREFCNT_dec(c.sv); 7767 c.sv = sv; 7768 } 7769 7770 /* Presume this is going to be a bareword of some sort. */ 7771 CLINE; 7772 pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv); 7773 pl_yylval.opval->op_private = OPpCONST_BARE; 7774 7775 /* And if "Foo::", then that's what it certainly is. */ 7776 if (safebw) 7777 return yyl_safe_bareword(aTHX_ s, lastchar); 7778 7779 if (!c.off) { 7780 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv)); 7781 const_op->op_private = OPpCONST_BARE; 7782 c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op); 7783 c.cv = c.lex 7784 ? isGV(c.gv) 7785 ? GvCV(c.gv) 7786 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV 7787 ? (CV *)SvRV(c.gv) 7788 : ((CV *)c.gv) 7789 : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB); 7790 } 7791 7792 /* See if it's the indirect object for a list operator. */ 7793 7794 if (PL_oldoldbufptr 7795 && PL_oldoldbufptr < PL_bufptr 7796 && (PL_oldoldbufptr == PL_last_lop 7797 || PL_oldoldbufptr == PL_last_uni) 7798 && /* NO SKIPSPACE BEFORE HERE! */ 7799 (PL_expect == XREF 7800 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) 7801 == OA_FILEREF)) 7802 { 7803 bool immediate_paren = *s == '('; 7804 SSize_t s_off; 7805 7806 /* (Now we can afford to cross potential line boundary.) */ 7807 s = skipspace(s); 7808 7809 /* intuit_method() can indirectly call lex_next_chunk(), 7810 * invalidating s 7811 */ 7812 s_off = s - SvPVX(PL_linestr); 7813 /* Two barewords in a row may indicate method call. */ 7814 if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) 7815 || *s == '$') 7816 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv))) 7817 { 7818 /* the code at method: doesn't use s */ 7819 goto method; 7820 } 7821 s = SvPVX(PL_linestr) + s_off; 7822 7823 /* If not a declared subroutine, it's an indirect object. */ 7824 /* (But it's an indir obj regardless for sort.) */ 7825 /* Also, if "_" follows a filetest operator, it's a bareword */ 7826 7827 if ( 7828 ( !immediate_paren && (PL_last_lop_op == OP_SORT 7829 || (!c.cv 7830 && (PL_last_lop_op != OP_MAPSTART 7831 && PL_last_lop_op != OP_GREPSTART)))) 7832 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0' 7833 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) 7834 == OA_FILESTATOP)) 7835 ) 7836 { 7837 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR; 7838 yyl_strictwarn_bareword(aTHX_ lastchar); 7839 op_free(c.rv2cv_op); 7840 return yyl_safe_bareword(aTHX_ s, lastchar); 7841 } 7842 } 7843 7844 PL_expect = XOPERATOR; 7845 s = skipspace(s); 7846 7847 /* Is this a word before a => operator? */ 7848 if (*s == '=' && s[1] == '>' && !pkgname) { 7849 op_free(c.rv2cv_op); 7850 CLINE; 7851 if (c.gvp || (c.lex && !c.off)) { 7852 assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv); 7853 /* This is our own scalar, created a few lines 7854 above, so this is safe. */ 7855 SvREADONLY_off(c.sv); 7856 sv_setpv(c.sv, PL_tokenbuf); 7857 if (UTF && !IN_BYTES 7858 && is_utf8_string((U8*)PL_tokenbuf, len)) 7859 SvUTF8_on(c.sv); 7860 SvREADONLY_on(c.sv); 7861 } 7862 TERM(BAREWORD); 7863 } 7864 7865 /* If followed by a paren, it's certainly a subroutine. */ 7866 if (*s == '(') { 7867 CLINE; 7868 if (c.cv) { 7869 char *d = s + 1; 7870 while (SPACE_OR_TAB(*d)) 7871 d++; 7872 if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv))) 7873 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off); 7874 } 7875 NEXTVAL_NEXTTOKE.opval = 7876 c.off ? c.rv2cv_op : pl_yylval.opval; 7877 if (c.off) 7878 op_free(pl_yylval.opval), force_next(PRIVATEREF); 7879 else op_free(c.rv2cv_op), force_next(BAREWORD); 7880 pl_yylval.ival = 0; 7881 TOKEN(PERLY_AMPERSAND); 7882 } 7883 7884 /* If followed by var or block, call it a method (unless sub) */ 7885 7886 if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) { 7887 op_free(c.rv2cv_op); 7888 PL_last_lop = PL_oldbufptr; 7889 PL_last_lop_op = OP_METHOD; 7890 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7891 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7892 PL_expect = XBLOCKTERM; 7893 PL_bufptr = s; 7894 return REPORT(METHCALL0); 7895 } 7896 7897 /* If followed by a bareword, see if it looks like indir obj. */ 7898 7899 if ( key == 1 7900 && !orig_keyword 7901 && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$') 7902 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv))) 7903 { 7904 method: 7905 if (c.lex && !c.off) { 7906 assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv); 7907 SvREADONLY_off(c.sv); 7908 sv_setpvn(c.sv, PL_tokenbuf, len); 7909 if (UTF && !IN_BYTES 7910 && is_utf8_string((U8*)PL_tokenbuf, len)) 7911 SvUTF8_on(c.sv); 7912 else SvUTF8_off(c.sv); 7913 } 7914 op_free(c.rv2cv_op); 7915 if (key == METHCALL0 && !PL_lex_allbrackets 7916 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7917 { 7918 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7919 } 7920 return REPORT(key); 7921 } 7922 7923 /* Not a method, so call it a subroutine (if defined) */ 7924 7925 if (c.cv) { 7926 /* Check for a constant sub */ 7927 c.sv = cv_const_sv_or_av(c.cv); 7928 return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off); 7929 } 7930 7931 /* Call it a bare word */ 7932 7933 if (PL_hints & HINT_STRICT_SUBS) 7934 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7935 else 7936 yyl_strictwarn_bareword(aTHX_ lastchar); 7937 7938 op_free(c.rv2cv_op); 7939 7940 return yyl_safe_bareword(aTHX_ s, lastchar); 7941 } 7942 7943 static int 7944 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c) 7945 { 7946 switch (key) { 7947 default: /* not a keyword */ 7948 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c); 7949 7950 case KEY___FILE__: 7951 FUN0OP(newSVOP(OP_CONST, OPpCONST_TOKEN_FILE<<8, 7952 newSVpv(CopFILE(PL_curcop),0)) ); 7953 7954 case KEY___LINE__: 7955 FUN0OP(newSVOP(OP_CONST, OPpCONST_TOKEN_LINE<<8, 7956 Perl_newSVpvf(aTHX_ "%" LINE_Tf, CopLINE(PL_curcop)))); 7957 7958 case KEY___PACKAGE__: 7959 FUN0OP(newSVOP(OP_CONST, OPpCONST_TOKEN_PACKAGE<<8, 7960 (PL_curstash 7961 ? newSVhek(HvNAME_HEK(PL_curstash)) 7962 : &PL_sv_undef)) 7963 ); 7964 7965 case KEY___DATA__: 7966 case KEY___END__: 7967 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) 7968 yyl_data_handle(aTHX); 7969 return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s); 7970 7971 case KEY___SUB__: 7972 /* If !CvCLONE(PL_compcv) then rpeep will probably turn this into an 7973 * OP_CONST. We need to make it big enough to allow room for that if 7974 * so */ 7975 FUN0OP(CvCLONE(PL_compcv) 7976 ? newOP(OP_RUNCV, 0) 7977 : newSVOP(OP_RUNCV, 0, &PL_sv_undef)); 7978 7979 case KEY___CLASS__: 7980 FUN0(OP_CLASSNAME); 7981 7982 case KEY_AUTOLOAD: 7983 case KEY_DESTROY: 7984 case KEY_BEGIN: 7985 case KEY_UNITCHECK: 7986 case KEY_CHECK: 7987 case KEY_INIT: 7988 case KEY_END: 7989 if (PL_expect == XSTATE) 7990 return yyl_sub(aTHX_ PL_bufptr, key); 7991 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c); 7992 7993 case KEY_ADJUST: 7994 Perl_ck_warner_d(aTHX_ 7995 packWARN(WARN_EXPERIMENTAL__CLASS), "ADJUST is experimental"); 7996 7997 /* The way that KEY_CHECK et.al. are handled currently are nothing 7998 * short of crazy. We won't copy that model for new phasers, but use 7999 * this as an experiment to test if this will work 8000 */ 8001 PHASERBLOCK(KEY_ADJUST); 8002 8003 case KEY_abs: 8004 UNI(OP_ABS); 8005 8006 case KEY_alarm: 8007 UNI(OP_ALARM); 8008 8009 case KEY_accept: 8010 LOP(OP_ACCEPT,XTERM); 8011 8012 case KEY_and: 8013 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 8014 return REPORT(0); 8015 OPERATOR(ANDOP); 8016 8017 case KEY_atan2: 8018 LOP(OP_ATAN2,XTERM); 8019 8020 case KEY_bind: 8021 LOP(OP_BIND,XTERM); 8022 8023 case KEY_binmode: 8024 LOP(OP_BINMODE,XTERM); 8025 8026 case KEY_bless: 8027 LOP(OP_BLESS,XTERM); 8028 8029 case KEY_break: 8030 FUN0(OP_BREAK); 8031 8032 case KEY_catch: 8033 PREBLOCK(KW_CATCH); 8034 8035 case KEY_chop: 8036 UNI(OP_CHOP); 8037 8038 case KEY_class: 8039 Perl_ck_warner_d(aTHX_ 8040 packWARN(WARN_EXPERIMENTAL__CLASS), "class is experimental"); 8041 8042 s = force_word(s,BAREWORD,FALSE,TRUE); 8043 s = skipspace(s); 8044 s = force_strict_version(s); 8045 PL_expect = XATTRBLOCK; 8046 TOKEN(KW_CLASS); 8047 8048 case KEY_continue: 8049 /* We have to disambiguate the two senses of 8050 "continue". If the next token is a '{' then 8051 treat it as the start of a continue block; 8052 otherwise treat it as a control operator. 8053 */ 8054 s = skipspace(s); 8055 if (*s == '{') 8056 PREBLOCK(KW_CONTINUE); 8057 else 8058 FUN0(OP_CONTINUE); 8059 8060 case KEY_chdir: 8061 /* may use HOME */ 8062 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV); 8063 UNI(OP_CHDIR); 8064 8065 case KEY_close: 8066 UNI(OP_CLOSE); 8067 8068 case KEY_closedir: 8069 UNI(OP_CLOSEDIR); 8070 8071 case KEY_cmp: 8072 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8073 return REPORT(0); 8074 NCEop(OP_SCMP); 8075 8076 case KEY_caller: 8077 UNI(OP_CALLER); 8078 8079 case KEY_crypt: 8080 8081 LOP(OP_CRYPT,XTERM); 8082 8083 case KEY_chmod: 8084 LOP(OP_CHMOD,XTERM); 8085 8086 case KEY_chown: 8087 LOP(OP_CHOWN,XTERM); 8088 8089 case KEY_connect: 8090 LOP(OP_CONNECT,XTERM); 8091 8092 case KEY_chr: 8093 UNI(OP_CHR); 8094 8095 case KEY_cos: 8096 UNI(OP_COS); 8097 8098 case KEY_chroot: 8099 UNI(OP_CHROOT); 8100 8101 case KEY_default: 8102 PREBLOCK(KW_DEFAULT); 8103 8104 case KEY_defer: 8105 Perl_ck_warner_d(aTHX_ 8106 packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental"); 8107 PREBLOCK(KW_DEFER); 8108 8109 case KEY_do: 8110 return yyl_do(aTHX_ s, orig_keyword); 8111 8112 case KEY_die: 8113 PL_hints |= HINT_BLOCK_SCOPE; 8114 LOP(OP_DIE,XTERM); 8115 8116 case KEY_defined: 8117 UNI(OP_DEFINED); 8118 8119 case KEY_delete: 8120 UNI(OP_DELETE); 8121 8122 case KEY_dbmopen: 8123 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"), 8124 STR_WITH_LEN("NDBM_File::"), 8125 STR_WITH_LEN("DB_File::"), 8126 STR_WITH_LEN("GDBM_File::"), 8127 STR_WITH_LEN("SDBM_File::"), 8128 STR_WITH_LEN("ODBM_File::"), 8129 NULL); 8130 LOP(OP_DBMOPEN,XTERM); 8131 8132 case KEY_dbmclose: 8133 UNI(OP_DBMCLOSE); 8134 8135 case KEY_dump: 8136 LOOPX(OP_DUMP); 8137 8138 case KEY_else: 8139 PREBLOCK(KW_ELSE); 8140 8141 case KEY_elsif: 8142 pl_yylval.ival = CopLINE(PL_curcop); 8143 OPERATOR(KW_ELSIF); 8144 8145 case KEY_eq: 8146 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8147 return REPORT(0); 8148 ChEop(OP_SEQ); 8149 8150 case KEY_exists: 8151 UNI(OP_EXISTS); 8152 8153 case KEY_exit: 8154 UNI(OP_EXIT); 8155 8156 case KEY_eval: 8157 s = skipspace(s); 8158 if (*s == '{') { /* block eval */ 8159 PL_expect = XTERMBLOCK; 8160 UNIBRACK(OP_ENTERTRY); 8161 } 8162 else { /* string eval */ 8163 PL_expect = XTERM; 8164 UNIBRACK(OP_ENTEREVAL); 8165 } 8166 8167 case KEY_evalbytes: 8168 PL_expect = XTERM; 8169 UNIBRACK(-OP_ENTEREVAL); 8170 8171 case KEY_eof: 8172 UNI(OP_EOF); 8173 8174 case KEY_exp: 8175 UNI(OP_EXP); 8176 8177 case KEY_each: 8178 UNI(OP_EACH); 8179 8180 case KEY_exec: 8181 LOP(OP_EXEC,XREF); 8182 8183 case KEY_endhostent: 8184 FUN0(OP_EHOSTENT); 8185 8186 case KEY_endnetent: 8187 FUN0(OP_ENETENT); 8188 8189 case KEY_endservent: 8190 FUN0(OP_ESERVENT); 8191 8192 case KEY_endprotoent: 8193 FUN0(OP_EPROTOENT); 8194 8195 case KEY_endpwent: 8196 FUN0(OP_EPWENT); 8197 8198 case KEY_endgrent: 8199 FUN0(OP_EGRENT); 8200 8201 case KEY_field: 8202 /* TODO: maybe this should use the same parser/grammar structures as 8203 * `my`, but it's also rather messy because of the `our` conflation 8204 */ 8205 Perl_ck_warner_d(aTHX_ 8206 packWARN(WARN_EXPERIMENTAL__CLASS), "field is experimental"); 8207 8208 croak_kw_unless_class("field"); 8209 8210 PL_parser->in_my = KEY_field; 8211 OPERATOR(KW_FIELD); 8212 8213 case KEY_finally: 8214 Perl_ck_warner_d(aTHX_ 8215 packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental"); 8216 PREBLOCK(KW_FINALLY); 8217 8218 case KEY_for: 8219 case KEY_foreach: 8220 return yyl_foreach(aTHX_ s); 8221 8222 case KEY_formline: 8223 LOP(OP_FORMLINE,XTERM); 8224 8225 case KEY_fork: 8226 FUN0(OP_FORK); 8227 8228 case KEY_fc: 8229 UNI(OP_FC); 8230 8231 case KEY_fcntl: 8232 LOP(OP_FCNTL,XTERM); 8233 8234 case KEY_fileno: 8235 UNI(OP_FILENO); 8236 8237 case KEY_flock: 8238 LOP(OP_FLOCK,XTERM); 8239 8240 case KEY_gt: 8241 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8242 return REPORT(0); 8243 ChRop(OP_SGT); 8244 8245 case KEY_ge: 8246 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8247 return REPORT(0); 8248 ChRop(OP_SGE); 8249 8250 case KEY_grep: 8251 LOP(OP_GREPSTART, XREF); 8252 8253 case KEY_goto: 8254 LOOPX(OP_GOTO); 8255 8256 case KEY_gmtime: 8257 UNI(OP_GMTIME); 8258 8259 case KEY_getc: 8260 UNIDOR(OP_GETC); 8261 8262 case KEY_getppid: 8263 FUN0(OP_GETPPID); 8264 8265 case KEY_getpgrp: 8266 UNI(OP_GETPGRP); 8267 8268 case KEY_getpriority: 8269 LOP(OP_GETPRIORITY,XTERM); 8270 8271 case KEY_getprotobyname: 8272 UNI(OP_GPBYNAME); 8273 8274 case KEY_getprotobynumber: 8275 LOP(OP_GPBYNUMBER,XTERM); 8276 8277 case KEY_getprotoent: 8278 FUN0(OP_GPROTOENT); 8279 8280 case KEY_getpwent: 8281 FUN0(OP_GPWENT); 8282 8283 case KEY_getpwnam: 8284 UNI(OP_GPWNAM); 8285 8286 case KEY_getpwuid: 8287 UNI(OP_GPWUID); 8288 8289 case KEY_getpeername: 8290 UNI(OP_GETPEERNAME); 8291 8292 case KEY_gethostbyname: 8293 UNI(OP_GHBYNAME); 8294 8295 case KEY_gethostbyaddr: 8296 LOP(OP_GHBYADDR,XTERM); 8297 8298 case KEY_gethostent: 8299 FUN0(OP_GHOSTENT); 8300 8301 case KEY_getnetbyname: 8302 UNI(OP_GNBYNAME); 8303 8304 case KEY_getnetbyaddr: 8305 LOP(OP_GNBYADDR,XTERM); 8306 8307 case KEY_getnetent: 8308 FUN0(OP_GNETENT); 8309 8310 case KEY_getservbyname: 8311 LOP(OP_GSBYNAME,XTERM); 8312 8313 case KEY_getservbyport: 8314 LOP(OP_GSBYPORT,XTERM); 8315 8316 case KEY_getservent: 8317 FUN0(OP_GSERVENT); 8318 8319 case KEY_getsockname: 8320 UNI(OP_GETSOCKNAME); 8321 8322 case KEY_getsockopt: 8323 LOP(OP_GSOCKOPT,XTERM); 8324 8325 case KEY_getgrent: 8326 FUN0(OP_GGRENT); 8327 8328 case KEY_getgrnam: 8329 UNI(OP_GGRNAM); 8330 8331 case KEY_getgrgid: 8332 UNI(OP_GGRGID); 8333 8334 case KEY_getlogin: 8335 FUN0(OP_GETLOGIN); 8336 8337 case KEY_given: 8338 pl_yylval.ival = CopLINE(PL_curcop); 8339 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__SMARTMATCH), 8340 "given is deprecated"); 8341 OPERATOR(KW_GIVEN); 8342 8343 case KEY_glob: 8344 LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM ); 8345 8346 case KEY_hex: 8347 UNI(OP_HEX); 8348 8349 case KEY_if: 8350 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8351 return REPORT(0); 8352 pl_yylval.ival = CopLINE(PL_curcop); 8353 OPERATOR(KW_IF); 8354 8355 case KEY_index: 8356 LOP(OP_INDEX,XTERM); 8357 8358 case KEY_int: 8359 UNI(OP_INT); 8360 8361 case KEY_ioctl: 8362 LOP(OP_IOCTL,XTERM); 8363 8364 case KEY_isa: 8365 NCRop(OP_ISA); 8366 8367 case KEY_join: 8368 LOP(OP_JOIN,XTERM); 8369 8370 case KEY_keys: 8371 UNI(OP_KEYS); 8372 8373 case KEY_kill: 8374 LOP(OP_KILL,XTERM); 8375 8376 case KEY_last: 8377 LOOPX(OP_LAST); 8378 8379 case KEY_lc: 8380 UNI(OP_LC); 8381 8382 case KEY_lcfirst: 8383 UNI(OP_LCFIRST); 8384 8385 case KEY_local: 8386 OPERATOR(KW_LOCAL); 8387 8388 case KEY_length: 8389 UNI(OP_LENGTH); 8390 8391 case KEY_lt: 8392 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8393 return REPORT(0); 8394 ChRop(OP_SLT); 8395 8396 case KEY_le: 8397 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8398 return REPORT(0); 8399 ChRop(OP_SLE); 8400 8401 case KEY_localtime: 8402 UNI(OP_LOCALTIME); 8403 8404 case KEY_log: 8405 UNI(OP_LOG); 8406 8407 case KEY_link: 8408 LOP(OP_LINK,XTERM); 8409 8410 case KEY_listen: 8411 LOP(OP_LISTEN,XTERM); 8412 8413 case KEY_lock: 8414 UNI(OP_LOCK); 8415 8416 case KEY_lstat: 8417 UNI(OP_LSTAT); 8418 8419 case KEY_m: 8420 s = scan_pat(s,OP_MATCH); 8421 TERM(sublex_start()); 8422 8423 case KEY_map: 8424 LOP(OP_MAPSTART, XREF); 8425 8426 case KEY_mkdir: 8427 LOP(OP_MKDIR,XTERM); 8428 8429 case KEY_msgctl: 8430 LOP(OP_MSGCTL,XTERM); 8431 8432 case KEY_msgget: 8433 LOP(OP_MSGGET,XTERM); 8434 8435 case KEY_msgrcv: 8436 LOP(OP_MSGRCV,XTERM); 8437 8438 case KEY_msgsnd: 8439 LOP(OP_MSGSND,XTERM); 8440 8441 case KEY_our: 8442 case KEY_my: 8443 case KEY_state: 8444 return yyl_my(aTHX_ s, key); 8445 8446 case KEY_next: 8447 LOOPX(OP_NEXT); 8448 8449 case KEY_ne: 8450 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8451 return REPORT(0); 8452 ChEop(OP_SNE); 8453 8454 case KEY_no: 8455 s = tokenize_use(0, s); 8456 TOKEN(KW_USE_or_NO); 8457 8458 case KEY_not: 8459 if (*s == '(' || (s = skipspace(s), *s == '(')) 8460 FUN1(OP_NOT); 8461 else { 8462 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 8463 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 8464 OPERATOR(NOTOP); 8465 } 8466 8467 case KEY_open: 8468 s = skipspace(s); 8469 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 8470 const char *t; 8471 char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE); 8472 for (t=d; isSPACE(*t);) 8473 t++; 8474 if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) 8475 /* [perl #16184] */ 8476 && !(t[0] == '=' && t[1] == '>') 8477 && !(t[0] == ':' && t[1] == ':') 8478 && !keyword(s, d-s, 0) 8479 ) { 8480 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), 8481 "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")", 8482 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s)); 8483 } 8484 } 8485 LOP(OP_OPEN,XTERM); 8486 8487 case KEY_or: 8488 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 8489 return REPORT(0); 8490 pl_yylval.ival = OP_OR; 8491 OPERATOR(OROP); 8492 8493 case KEY_ord: 8494 UNI(OP_ORD); 8495 8496 case KEY_oct: 8497 UNI(OP_OCT); 8498 8499 case KEY_opendir: 8500 LOP(OP_OPEN_DIR,XTERM); 8501 8502 case KEY_print: 8503 checkcomma(s,PL_tokenbuf,"filehandle"); 8504 LOP(OP_PRINT,XREF); 8505 8506 case KEY_printf: 8507 checkcomma(s,PL_tokenbuf,"filehandle"); 8508 LOP(OP_PRTF,XREF); 8509 8510 case KEY_prototype: 8511 UNI(OP_PROTOTYPE); 8512 8513 case KEY_push: 8514 LOP(OP_PUSH,XTERM); 8515 8516 case KEY_pop: 8517 UNIDOR(OP_POP); 8518 8519 case KEY_pos: 8520 UNIDOR(OP_POS); 8521 8522 case KEY_pack: 8523 LOP(OP_PACK,XTERM); 8524 8525 case KEY_package: 8526 s = force_word(s,BAREWORD,FALSE,TRUE); 8527 s = skipspace(s); 8528 s = force_strict_version(s); 8529 PREBLOCK(KW_PACKAGE); 8530 8531 case KEY_pipe: 8532 LOP(OP_PIPE_OP,XTERM); 8533 8534 case KEY_q: 8535 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8536 if (!s) 8537 missingterm(NULL, 0); 8538 COPLINE_SET_FROM_MULTI_END; 8539 pl_yylval.ival = OP_CONST; 8540 TERM(sublex_start()); 8541 8542 case KEY_quotemeta: 8543 UNI(OP_QUOTEMETA); 8544 8545 case KEY_qw: 8546 return yyl_qw(aTHX_ s, len); 8547 8548 case KEY_qq: 8549 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8550 if (!s) 8551 missingterm(NULL, 0); 8552 pl_yylval.ival = OP_STRINGIFY; 8553 if (SvIVX(PL_lex_stuff) == '\'') 8554 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */ 8555 TERM(sublex_start()); 8556 8557 case KEY_qr: 8558 s = scan_pat(s,OP_QR); 8559 TERM(sublex_start()); 8560 8561 case KEY_qx: 8562 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8563 if (!s) 8564 missingterm(NULL, 0); 8565 pl_yylval.ival = OP_BACKTICK; 8566 TERM(sublex_start()); 8567 8568 case KEY_return: 8569 OLDLOP(OP_RETURN); 8570 8571 case KEY_require: 8572 return yyl_require(aTHX_ s, orig_keyword); 8573 8574 case KEY_reset: 8575 UNI(OP_RESET); 8576 8577 case KEY_redo: 8578 LOOPX(OP_REDO); 8579 8580 case KEY_rename: 8581 LOP(OP_RENAME,XTERM); 8582 8583 case KEY_rand: 8584 UNI(OP_RAND); 8585 8586 case KEY_rmdir: 8587 UNI(OP_RMDIR); 8588 8589 case KEY_rindex: 8590 LOP(OP_RINDEX,XTERM); 8591 8592 case KEY_read: 8593 LOP(OP_READ,XTERM); 8594 8595 case KEY_readdir: 8596 UNI(OP_READDIR); 8597 8598 case KEY_readline: 8599 UNIDOR(OP_READLINE); 8600 8601 case KEY_readpipe: 8602 UNIDOR(OP_BACKTICK); 8603 8604 case KEY_rewinddir: 8605 UNI(OP_REWINDDIR); 8606 8607 case KEY_recv: 8608 LOP(OP_RECV,XTERM); 8609 8610 case KEY_reverse: 8611 LOP(OP_REVERSE,XTERM); 8612 8613 case KEY_readlink: 8614 UNIDOR(OP_READLINK); 8615 8616 case KEY_ref: 8617 UNI(OP_REF); 8618 8619 case KEY_s: 8620 s = scan_subst(s); 8621 if (pl_yylval.opval) 8622 TERM(sublex_start()); 8623 else 8624 TOKEN(1); /* force error */ 8625 8626 case KEY_say: 8627 checkcomma(s,PL_tokenbuf,"filehandle"); 8628 LOP(OP_SAY,XREF); 8629 8630 case KEY_chomp: 8631 UNI(OP_CHOMP); 8632 8633 case KEY_scalar: 8634 UNI(OP_SCALAR); 8635 8636 case KEY_select: 8637 LOP(OP_SELECT,XTERM); 8638 8639 case KEY_seek: 8640 LOP(OP_SEEK,XTERM); 8641 8642 case KEY_semctl: 8643 LOP(OP_SEMCTL,XTERM); 8644 8645 case KEY_semget: 8646 LOP(OP_SEMGET,XTERM); 8647 8648 case KEY_semop: 8649 LOP(OP_SEMOP,XTERM); 8650 8651 case KEY_send: 8652 LOP(OP_SEND,XTERM); 8653 8654 case KEY_setpgrp: 8655 LOP(OP_SETPGRP,XTERM); 8656 8657 case KEY_setpriority: 8658 LOP(OP_SETPRIORITY,XTERM); 8659 8660 case KEY_sethostent: 8661 UNI(OP_SHOSTENT); 8662 8663 case KEY_setnetent: 8664 UNI(OP_SNETENT); 8665 8666 case KEY_setservent: 8667 UNI(OP_SSERVENT); 8668 8669 case KEY_setprotoent: 8670 UNI(OP_SPROTOENT); 8671 8672 case KEY_setpwent: 8673 FUN0(OP_SPWENT); 8674 8675 case KEY_setgrent: 8676 FUN0(OP_SGRENT); 8677 8678 case KEY_seekdir: 8679 LOP(OP_SEEKDIR,XTERM); 8680 8681 case KEY_setsockopt: 8682 LOP(OP_SSOCKOPT,XTERM); 8683 8684 case KEY_shift: 8685 UNIDOR(OP_SHIFT); 8686 8687 case KEY_shmctl: 8688 LOP(OP_SHMCTL,XTERM); 8689 8690 case KEY_shmget: 8691 LOP(OP_SHMGET,XTERM); 8692 8693 case KEY_shmread: 8694 LOP(OP_SHMREAD,XTERM); 8695 8696 case KEY_shmwrite: 8697 LOP(OP_SHMWRITE,XTERM); 8698 8699 case KEY_shutdown: 8700 LOP(OP_SHUTDOWN,XTERM); 8701 8702 case KEY_sin: 8703 UNI(OP_SIN); 8704 8705 case KEY_sleep: 8706 UNI(OP_SLEEP); 8707 8708 case KEY_socket: 8709 LOP(OP_SOCKET,XTERM); 8710 8711 case KEY_socketpair: 8712 LOP(OP_SOCKPAIR,XTERM); 8713 8714 case KEY_sort: 8715 checkcomma(s,PL_tokenbuf,"subroutine name"); 8716 s = skipspace(s); 8717 PL_expect = XTERM; 8718 s = force_word(s,BAREWORD,TRUE,TRUE); 8719 LOP(OP_SORT,XREF); 8720 8721 case KEY_split: 8722 LOP(OP_SPLIT,XTERM); 8723 8724 case KEY_sprintf: 8725 LOP(OP_SPRINTF,XTERM); 8726 8727 case KEY_splice: 8728 LOP(OP_SPLICE,XTERM); 8729 8730 case KEY_sqrt: 8731 UNI(OP_SQRT); 8732 8733 case KEY_srand: 8734 UNI(OP_SRAND); 8735 8736 case KEY_stat: 8737 UNI(OP_STAT); 8738 8739 case KEY_study: 8740 UNI(OP_STUDY); 8741 8742 case KEY_substr: 8743 LOP(OP_SUBSTR,XTERM); 8744 8745 case KEY_method: 8746 /* For now we just treat 'method' identical to 'sub' plus a warning */ 8747 Perl_ck_warner_d(aTHX_ 8748 packWARN(WARN_EXPERIMENTAL__CLASS), "method is experimental"); 8749 return yyl_sub(aTHX_ s, KEY_method); 8750 8751 case KEY_format: 8752 case KEY_sub: 8753 return yyl_sub(aTHX_ s, key); 8754 8755 case KEY_system: 8756 LOP(OP_SYSTEM,XREF); 8757 8758 case KEY_symlink: 8759 LOP(OP_SYMLINK,XTERM); 8760 8761 case KEY_syscall: 8762 LOP(OP_SYSCALL,XTERM); 8763 8764 case KEY_sysopen: 8765 LOP(OP_SYSOPEN,XTERM); 8766 8767 case KEY_sysseek: 8768 LOP(OP_SYSSEEK,XTERM); 8769 8770 case KEY_sysread: 8771 LOP(OP_SYSREAD,XTERM); 8772 8773 case KEY_syswrite: 8774 LOP(OP_SYSWRITE,XTERM); 8775 8776 case KEY_tr: 8777 case KEY_y: 8778 s = scan_trans(s); 8779 TERM(sublex_start()); 8780 8781 case KEY_tell: 8782 UNI(OP_TELL); 8783 8784 case KEY_telldir: 8785 UNI(OP_TELLDIR); 8786 8787 case KEY_tie: 8788 LOP(OP_TIE,XTERM); 8789 8790 case KEY_tied: 8791 UNI(OP_TIED); 8792 8793 case KEY_time: 8794 FUN0(OP_TIME); 8795 8796 case KEY_times: 8797 FUN0(OP_TMS); 8798 8799 case KEY_truncate: 8800 LOP(OP_TRUNCATE,XTERM); 8801 8802 case KEY_try: 8803 pl_yylval.ival = CopLINE(PL_curcop); 8804 PREBLOCK(KW_TRY); 8805 8806 case KEY_uc: 8807 UNI(OP_UC); 8808 8809 case KEY_ucfirst: 8810 UNI(OP_UCFIRST); 8811 8812 case KEY_untie: 8813 UNI(OP_UNTIE); 8814 8815 case KEY_until: 8816 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8817 return REPORT(0); 8818 pl_yylval.ival = CopLINE(PL_curcop); 8819 OPERATOR(KW_UNTIL); 8820 8821 case KEY_unless: 8822 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8823 return REPORT(0); 8824 pl_yylval.ival = CopLINE(PL_curcop); 8825 OPERATOR(KW_UNLESS); 8826 8827 case KEY_unlink: 8828 LOP(OP_UNLINK,XTERM); 8829 8830 case KEY_undef: 8831 UNIDOR(OP_UNDEF); 8832 8833 case KEY_unpack: 8834 LOP(OP_UNPACK,XTERM); 8835 8836 case KEY_utime: 8837 LOP(OP_UTIME,XTERM); 8838 8839 case KEY_umask: 8840 UNIDOR(OP_UMASK); 8841 8842 case KEY_unshift: 8843 LOP(OP_UNSHIFT,XTERM); 8844 8845 case KEY_use: 8846 s = tokenize_use(1, s); 8847 TOKEN(KW_USE_or_NO); 8848 8849 case KEY_values: 8850 UNI(OP_VALUES); 8851 8852 case KEY_vec: 8853 LOP(OP_VEC,XTERM); 8854 8855 case KEY_when: 8856 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8857 return REPORT(0); 8858 pl_yylval.ival = CopLINE(PL_curcop); 8859 Perl_ck_warner_d(aTHX_ 8860 packWARN(WARN_DEPRECATED__SMARTMATCH), 8861 "when is deprecated"); 8862 OPERATOR(KW_WHEN); 8863 8864 case KEY_while: 8865 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8866 return REPORT(0); 8867 pl_yylval.ival = CopLINE(PL_curcop); 8868 OPERATOR(KW_WHILE); 8869 8870 case KEY_warn: 8871 PL_hints |= HINT_BLOCK_SCOPE; 8872 LOP(OP_WARN,XTERM); 8873 8874 case KEY_wait: 8875 FUN0(OP_WAIT); 8876 8877 case KEY_waitpid: 8878 LOP(OP_WAITPID,XTERM); 8879 8880 case KEY_wantarray: 8881 FUN0(OP_WANTARRAY); 8882 8883 case KEY_write: 8884 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and 8885 * we use the same number on EBCDIC */ 8886 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV); 8887 UNI(OP_ENTERWRITE); 8888 8889 case KEY_x: 8890 if (PL_expect == XOPERATOR) { 8891 if (*s == '=' && !PL_lex_allbrackets 8892 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 8893 { 8894 return REPORT(0); 8895 } 8896 Mop(OP_REPEAT); 8897 } 8898 check_uni(); 8899 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c); 8900 8901 case KEY_xor: 8902 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 8903 return REPORT(0); 8904 pl_yylval.ival = OP_XOR; 8905 OPERATOR(OROP); 8906 } 8907 } 8908 8909 static int 8910 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c) 8911 { 8912 I32 key = 0; 8913 I32 orig_keyword = 0; 8914 STRLEN olen = len; 8915 char *d = s; 8916 s += 2; 8917 s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE); 8918 if ((*s == ':' && s[1] == ':') 8919 || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\'')) 8920 { 8921 Copy(PL_bufptr, PL_tokenbuf, olen, char); 8922 return yyl_just_a_word(aTHX_ d, olen, 0, c); 8923 } 8924 if (!key) 8925 Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword", 8926 UTF8fARG(UTF, len, PL_tokenbuf)); 8927 if (key < 0) 8928 key = -key; 8929 else if (key == KEY_require || key == KEY_do 8930 || key == KEY_glob) 8931 /* that's a way to remember we saw "CORE::" */ 8932 orig_keyword = key; 8933 8934 /* Known to be a reserved word at this point */ 8935 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c); 8936 } 8937 8938 struct Perl_custom_infix_result { 8939 struct Perl_custom_infix *def; 8940 SV *parsedata; 8941 }; 8942 8943 static enum yytokentype tokentype_for_plugop(struct Perl_custom_infix *def) 8944 { 8945 enum Perl_custom_infix_precedence prec = def->prec; 8946 if(prec <= INFIX_PREC_LOW) 8947 return PLUGIN_LOW_OP; 8948 if(prec <= INFIX_PREC_LOGICAL_OR_LOW) 8949 return PLUGIN_LOGICAL_OR_LOW_OP; 8950 if(prec <= INFIX_PREC_LOGICAL_AND_LOW) 8951 return PLUGIN_LOGICAL_AND_LOW_OP; 8952 if(prec <= INFIX_PREC_ASSIGN) 8953 return PLUGIN_ASSIGN_OP; 8954 if(prec <= INFIX_PREC_LOGICAL_OR) 8955 return PLUGIN_LOGICAL_OR_OP; 8956 if(prec <= INFIX_PREC_LOGICAL_AND) 8957 return PLUGIN_LOGICAL_AND_OP; 8958 if(prec <= INFIX_PREC_REL) 8959 return PLUGIN_REL_OP; 8960 if(prec <= INFIX_PREC_ADD) 8961 return PLUGIN_ADD_OP; 8962 if(prec <= INFIX_PREC_MUL) 8963 return PLUGIN_MUL_OP; 8964 if(prec <= INFIX_PREC_POW) 8965 return PLUGIN_POW_OP; 8966 return PLUGIN_HIGH_OP; 8967 } 8968 8969 OP * 8970 Perl_build_infix_plugin(pTHX_ OP *lhs, OP *rhs, void *tokendata) 8971 { 8972 PERL_ARGS_ASSERT_BUILD_INFIX_PLUGIN; 8973 8974 struct Perl_custom_infix_result *result = (struct Perl_custom_infix_result *)tokendata; 8975 SAVEFREEPV(result); 8976 if(result->parsedata) 8977 SAVEFREESV(result->parsedata); 8978 8979 return (*result->def->build_op)(aTHX_ 8980 &result->parsedata, lhs, rhs, result->def); 8981 } 8982 8983 static int 8984 yyl_keylookup(pTHX_ char *s, GV *gv) 8985 { 8986 STRLEN len; 8987 bool anydelim; 8988 I32 key; 8989 struct code c = no_code; 8990 I32 orig_keyword = 0; 8991 char *d; 8992 8993 c.gv = gv; 8994 8995 PL_bufptr = s; 8996 s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE); 8997 8998 /* Some keywords can be followed by any delimiter, including ':' */ 8999 anydelim = word_takes_any_delimiter(PL_tokenbuf, len); 9000 9001 /* x::* is just a word, unless x is "CORE" */ 9002 if (!anydelim && *s == ':' && s[1] == ':') { 9003 if (memEQs(PL_tokenbuf, len, "CORE")) 9004 return yyl_key_core(aTHX_ s, len, c); 9005 return yyl_just_a_word(aTHX_ s, len, 0, c); 9006 } 9007 9008 d = s; 9009 while (d < PL_bufend && isSPACE(*d)) 9010 d++; /* no comments skipped here, or s### is misparsed */ 9011 9012 /* Is this a word before a => operator? */ 9013 if (*d == '=' && d[1] == '>') { 9014 return yyl_fatcomma(aTHX_ s, len); 9015 } 9016 9017 /* Check for plugged-in keyword */ 9018 { 9019 OP *o; 9020 int result; 9021 char *saved_bufptr = PL_bufptr; 9022 PL_bufptr = s; 9023 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o); 9024 s = PL_bufptr; 9025 if (result == KEYWORD_PLUGIN_DECLINE) { 9026 /* not a plugged-in keyword */ 9027 PL_bufptr = saved_bufptr; 9028 } else if (result == KEYWORD_PLUGIN_STMT) { 9029 pl_yylval.opval = o; 9030 CLINE; 9031 if (!PL_nexttoke) PL_expect = XSTATE; 9032 return REPORT(PLUGSTMT); 9033 } else if (result == KEYWORD_PLUGIN_EXPR) { 9034 pl_yylval.opval = o; 9035 CLINE; 9036 if (!PL_nexttoke) PL_expect = XOPERATOR; 9037 return REPORT(PLUGEXPR); 9038 } else { 9039 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf); 9040 } 9041 } 9042 9043 /* Check for plugged-in named operator */ 9044 if(PLUGINFIX_IS_ENABLED) { 9045 struct Perl_custom_infix *def; 9046 STRLEN result; 9047 result = PL_infix_plugin(aTHX_ PL_tokenbuf, len, &def); 9048 if(result) { 9049 if(result != len) 9050 Perl_croak(aTHX_ "Bad infix plugin result (%zd) - did not consume entire identifier <%s>\n", 9051 result, PL_tokenbuf); 9052 PL_bufptr = s = d; 9053 struct Perl_custom_infix_result *result; 9054 Newx(result, 1, struct Perl_custom_infix_result); 9055 result->def = def; 9056 result->parsedata = NULL; 9057 if(def->parse) { 9058 (*def->parse)(aTHX_ &result->parsedata, def); 9059 s = PL_bufptr; /* restore local s variable */ 9060 } 9061 pl_yylval.pval = result; 9062 CLINE; 9063 OPERATOR(tokentype_for_plugop(def)); 9064 } 9065 } 9066 9067 /* Is this a label? */ 9068 if (!anydelim && PL_expect == XSTATE 9069 && d < PL_bufend && *d == ':' && *(d + 1) != ':') { 9070 s = d + 1; 9071 pl_yylval.opval = 9072 newSVOP(OP_CONST, 0, 9073 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0)); 9074 CLINE; 9075 TOKEN(LABEL); 9076 } 9077 9078 /* Check for lexical sub */ 9079 if (PL_expect != XOPERATOR) { 9080 char tmpbuf[sizeof PL_tokenbuf + 1]; 9081 *tmpbuf = '&'; 9082 Copy(PL_tokenbuf, tmpbuf+1, len, char); 9083 c.off = pad_findmy_pvn(tmpbuf, len+1, 0); 9084 if (c.off != NOT_IN_PAD) { 9085 assert(c.off); /* we assume this is boolean-true below */ 9086 if (PAD_COMPNAME_FLAGS_isOUR(c.off)) { 9087 HV * const stash = PAD_COMPNAME_OURSTASH(c.off); 9088 HEK * const stashname = HvNAME_HEK(stash); 9089 c.sv = newSVhek(stashname); 9090 sv_catpvs(c.sv, "::"); 9091 sv_catpvn_flags(c.sv, PL_tokenbuf, len, 9092 (UTF ? SV_CATUTF8 : SV_CATBYTES)); 9093 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv), 9094 SVt_PVCV); 9095 c.off = 0; 9096 if (!c.gv) { 9097 ASSUME(c.sv && SvREFCNT(c.sv) == 1); 9098 SvREFCNT_dec(c.sv); 9099 c.sv = NULL; 9100 return yyl_just_a_word(aTHX_ s, len, 0, c); 9101 } 9102 } 9103 else { 9104 c.rv2cv_op = newOP(OP_PADANY, 0); 9105 c.rv2cv_op->op_targ = c.off; 9106 c.cv = find_lexical_cv(c.off); 9107 } 9108 c.lex = TRUE; 9109 return yyl_just_a_word(aTHX_ s, len, 0, c); 9110 } 9111 c.off = 0; 9112 } 9113 9114 /* Check for built-in keyword */ 9115 key = keyword(PL_tokenbuf, len, 0); 9116 9117 if (key < 0) 9118 key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp); 9119 9120 if (key && key != KEY___DATA__ && key != KEY___END__ 9121 && (!anydelim || *s != '#')) { 9122 /* no override, and not s### either; skipspace is safe here 9123 * check for => on following line */ 9124 bool arrow; 9125 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr); 9126 STRLEN soff = s - SvPVX(PL_linestr); 9127 s = peekspace(s); 9128 arrow = *s == '=' && s[1] == '>'; 9129 PL_bufptr = SvPVX(PL_linestr) + bufoff; 9130 s = SvPVX(PL_linestr) + soff; 9131 if (arrow) 9132 return yyl_fatcomma(aTHX_ s, len); 9133 } 9134 9135 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c); 9136 } 9137 9138 static int 9139 yyl_try(pTHX_ char *s) 9140 { 9141 char *d; 9142 GV *gv = NULL; 9143 int tok; 9144 9145 retry: 9146 /* Check for plugged-in symbolic operator */ 9147 if(PLUGINFIX_IS_ENABLED && isPLUGINFIX_FIRST(*s)) { 9148 struct Perl_custom_infix *def; 9149 char *s_end = s, *d = PL_tokenbuf; 9150 STRLEN len; 9151 9152 /* Copy the longest sequence of isPLUGINFIX() chars into PL_tokenbuf */ 9153 while(s_end < PL_bufend && d < PL_tokenbuf+sizeof(PL_tokenbuf)-1 && isPLUGINFIX(*s_end)) 9154 *d++ = *s_end++; 9155 *d = '\0'; 9156 9157 if((len = (*PL_infix_plugin)(aTHX_ PL_tokenbuf, s_end - s, &def))) { 9158 s += len; 9159 struct Perl_custom_infix_result *result; 9160 Newx(result, 1, struct Perl_custom_infix_result); 9161 result->def = def; 9162 result->parsedata = NULL; 9163 if(def->parse) { 9164 PL_bufptr = s; 9165 (*def->parse)(aTHX_ &result->parsedata, def); 9166 s = PL_bufptr; /* restore local s variable */ 9167 } 9168 pl_yylval.pval = result; 9169 CLINE; 9170 OPERATOR(tokentype_for_plugop(def)); 9171 } 9172 } 9173 9174 switch (*s) { 9175 default: 9176 if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) { 9177 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9178 return tok; 9179 goto retry_bufptr; 9180 } 9181 yyl_croak_unrecognised(aTHX_ s); 9182 9183 case 4: 9184 case 26: 9185 /* emulate EOF on ^D or ^Z */ 9186 if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY) 9187 return tok; 9188 retry_bufptr: 9189 s = PL_bufptr; 9190 goto retry; 9191 9192 case 0: 9193 if ((!PL_rsfp || PL_lex_inwhat) 9194 && (!PL_parser->filtered || s+1 < PL_bufend)) { 9195 PL_last_uni = 0; 9196 PL_last_lop = 0; 9197 if (PL_lex_brackets 9198 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) 9199 { 9200 yyerror((const char *) 9201 (PL_lex_formbrack 9202 ? "Format not terminated" 9203 : "Missing right curly or square bracket")); 9204 } 9205 DEBUG_T({ 9206 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n"); 9207 }); 9208 TOKEN(0); 9209 } 9210 if (s++ < PL_bufend) 9211 goto retry; /* ignore stray nulls */ 9212 PL_last_uni = 0; 9213 PL_last_lop = 0; 9214 if (!PL_in_eval && !PL_preambled) { 9215 PL_preambled = TRUE; 9216 if (PL_perldb) { 9217 /* Generate a string of Perl code to load the debugger. 9218 * If PERL5DB is set, it will return the contents of that, 9219 * otherwise a compile-time require of perl5db.pl. */ 9220 9221 const char * const pdb = PerlEnv_getenv("PERL5DB"); 9222 9223 if (pdb) { 9224 sv_setpv(PL_linestr, pdb); 9225 sv_catpvs(PL_linestr,";"); 9226 } else { 9227 SETERRNO(0,SS_NORMAL); 9228 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };"); 9229 } 9230 PL_parser->preambling = CopLINE(PL_curcop); 9231 } else 9232 SvPVCLEAR(PL_linestr); 9233 if (PL_preambleav) { 9234 SV **svp = AvARRAY(PL_preambleav); 9235 SV **const end = svp + AvFILLp(PL_preambleav); 9236 while(svp <= end) { 9237 sv_catsv(PL_linestr, *svp); 9238 ++svp; 9239 sv_catpvs(PL_linestr, ";"); 9240 } 9241 SvREFCNT_dec(MUTABLE_SV(PL_preambleav)); 9242 PL_preambleav = NULL; 9243 } 9244 if (PL_minus_E) 9245 sv_catpvs(PL_linestr, 9246 "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "'; " 9247 "use builtin ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';"); 9248 if (PL_minus_n || PL_minus_p) { 9249 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/); 9250 if (PL_minus_l) 9251 sv_catpvs(PL_linestr,"chomp;"); 9252 if (PL_minus_a) { 9253 if (PL_minus_F) { 9254 if ( ( *PL_splitstr == '/' 9255 || *PL_splitstr == '\'' 9256 || *PL_splitstr == '"') 9257 && strchr(PL_splitstr + 1, *PL_splitstr)) 9258 { 9259 /* strchr is ok, because -F pattern can't contain 9260 * embedded NULs */ 9261 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); 9262 } 9263 else { 9264 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL 9265 bytes can be used as quoting characters. :-) */ 9266 const char *splits = PL_splitstr; 9267 sv_catpvs(PL_linestr, "our @F=split(q\0"); 9268 do { 9269 /* Need to \ \s */ 9270 if (*splits == '\\') 9271 sv_catpvn(PL_linestr, splits, 1); 9272 sv_catpvn(PL_linestr, splits, 1); 9273 } while (*splits++); 9274 /* This loop will embed the trailing NUL of 9275 PL_linestr as the last thing it does before 9276 terminating. */ 9277 sv_catpvs(PL_linestr, ");"); 9278 } 9279 } 9280 else 9281 sv_catpvs(PL_linestr,"our @F=split(' ');"); 9282 } 9283 } 9284 sv_catpvs(PL_linestr, "\n"); 9285 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 9286 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 9287 PL_last_lop = PL_last_uni = NULL; 9288 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash) 9289 update_debugger_info(PL_linestr, NULL, 0); 9290 goto retry; 9291 } 9292 if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY) 9293 return tok; 9294 goto retry_bufptr; 9295 9296 case '\r': 9297 #ifdef PERL_STRICT_CR 9298 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); 9299 Perl_croak(aTHX_ 9300 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); 9301 #endif 9302 case ' ': case '\t': case '\f': case '\v': 9303 s++; 9304 goto retry; 9305 9306 case '#': 9307 case '\n': { 9308 const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s); 9309 if (needs_semicolon) 9310 TOKEN(PERLY_SEMICOLON); 9311 else 9312 goto retry; 9313 } 9314 9315 case '-': 9316 return yyl_hyphen(aTHX_ s); 9317 9318 case '+': 9319 return yyl_plus(aTHX_ s); 9320 9321 case '*': 9322 return yyl_star(aTHX_ s); 9323 9324 case '%': 9325 return yyl_percent(aTHX_ s); 9326 9327 case '^': 9328 return yyl_caret(aTHX_ s); 9329 9330 case '[': 9331 return yyl_leftsquare(aTHX_ s); 9332 9333 case '~': 9334 return yyl_tilde(aTHX_ s); 9335 9336 case ',': 9337 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) 9338 TOKEN(0); 9339 s++; 9340 OPERATOR(PERLY_COMMA); 9341 case ':': 9342 if (s[1] == ':') 9343 return yyl_just_a_word(aTHX_ s, 0, 0, no_code); 9344 return yyl_colon(aTHX_ s + 1); 9345 9346 case '(': 9347 return yyl_leftparen(aTHX_ s + 1); 9348 9349 case ';': 9350 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 9351 TOKEN(0); 9352 CLINE; 9353 s++; 9354 PL_expect = XSTATE; 9355 TOKEN(PERLY_SEMICOLON); 9356 9357 case ')': 9358 return yyl_rightparen(aTHX_ s); 9359 9360 case ']': 9361 return yyl_rightsquare(aTHX_ s); 9362 9363 case '{': 9364 return yyl_leftcurly(aTHX_ s + 1, 0); 9365 9366 case '}': 9367 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 9368 TOKEN(0); 9369 return yyl_rightcurly(aTHX_ s, 0); 9370 9371 case '&': 9372 return yyl_ampersand(aTHX_ s); 9373 9374 case '|': 9375 return yyl_verticalbar(aTHX_ s); 9376 9377 case '=': 9378 if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n') 9379 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "=====")) 9380 { 9381 s = vcs_conflict_marker(s + 7); 9382 goto retry; 9383 } 9384 9385 s++; 9386 { 9387 const char tmp = *s++; 9388 if (tmp == '=') { 9389 if (!PL_lex_allbrackets 9390 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 9391 { 9392 s -= 2; 9393 TOKEN(0); 9394 } 9395 ChEop(OP_EQ); 9396 } 9397 if (tmp == '>') { 9398 if (!PL_lex_allbrackets 9399 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) 9400 { 9401 s -= 2; 9402 TOKEN(0); 9403 } 9404 OPERATOR(PERLY_COMMA); 9405 } 9406 if (tmp == '~') 9407 PMop(OP_MATCH); 9408 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX) 9409 && memCHRs("+-*/%.^&|<",tmp)) 9410 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 9411 "Reversed %c= operator",(int)tmp); 9412 s--; 9413 if (PL_expect == XSTATE 9414 && isALPHA(tmp) 9415 && (s == PL_linestart+1 || s[-2] == '\n') ) 9416 { 9417 if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered) 9418 || PL_lex_state != LEX_NORMAL) 9419 { 9420 d = PL_bufend; 9421 while (s < d) { 9422 if (*s++ == '\n') { 9423 incline(s, PL_bufend); 9424 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut")) 9425 { 9426 s = (char *) memchr(s,'\n', d - s); 9427 if (s) 9428 s++; 9429 else 9430 s = d; 9431 incline(s, PL_bufend); 9432 goto retry; 9433 } 9434 } 9435 } 9436 goto retry; 9437 } 9438 s = PL_bufend; 9439 PL_parser->in_pod = 1; 9440 goto retry; 9441 } 9442 } 9443 if (PL_expect == XBLOCK) { 9444 const char *t = s; 9445 #ifdef PERL_STRICT_CR 9446 while (SPACE_OR_TAB(*t)) 9447 #else 9448 while (SPACE_OR_TAB(*t) || *t == '\r') 9449 #endif 9450 t++; 9451 if (*t == '\n' || *t == '#') { 9452 ENTER_with_name("lex_format"); 9453 SAVEI8(PL_parser->form_lex_state); 9454 SAVEI32(PL_lex_formbrack); 9455 PL_parser->form_lex_state = PL_lex_state; 9456 PL_lex_formbrack = PL_lex_brackets + 1; 9457 PL_parser->sub_error_count = PL_error_count; 9458 return yyl_leftcurly(aTHX_ s, 1); 9459 } 9460 } 9461 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 9462 s--; 9463 TOKEN(0); 9464 } 9465 pl_yylval.ival = 0; 9466 OPERATOR(ASSIGNOP); 9467 9468 case '!': 9469 return yyl_bang(aTHX_ s + 1); 9470 9471 case '<': 9472 if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n') 9473 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<")) 9474 { 9475 s = vcs_conflict_marker(s + 7); 9476 goto retry; 9477 } 9478 return yyl_leftpointy(aTHX_ s); 9479 9480 case '>': 9481 if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n') 9482 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>")) 9483 { 9484 s = vcs_conflict_marker(s + 7); 9485 goto retry; 9486 } 9487 return yyl_rightpointy(aTHX_ s + 1); 9488 9489 case '$': 9490 return yyl_dollar(aTHX_ s); 9491 9492 case '@': 9493 return yyl_snail(aTHX_ s); 9494 9495 case '/': /* may be division, defined-or, or pattern */ 9496 return yyl_slash(aTHX_ s); 9497 9498 case '?': /* conditional */ 9499 s++; 9500 if (!PL_lex_allbrackets 9501 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) 9502 { 9503 s--; 9504 TOKEN(0); 9505 } 9506 PL_lex_allbrackets++; 9507 OPERATOR(PERLY_QUESTION_MARK); 9508 9509 case '.': 9510 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack 9511 #ifdef PERL_STRICT_CR 9512 && s[1] == '\n' 9513 #else 9514 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n')) 9515 #endif 9516 && (s == PL_linestart || s[-1] == '\n') ) 9517 { 9518 PL_expect = XSTATE; 9519 /* formbrack==2 means dot seen where arguments expected */ 9520 return yyl_rightcurly(aTHX_ s, 2); 9521 } 9522 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { 9523 s += 3; 9524 OPERATOR(YADAYADA); 9525 } 9526 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { 9527 char tmp = *s++; 9528 if (*s == tmp) { 9529 if (!PL_lex_allbrackets 9530 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) 9531 { 9532 s--; 9533 TOKEN(0); 9534 } 9535 s++; 9536 if (*s == tmp) { 9537 s++; 9538 pl_yylval.ival = OPf_SPECIAL; 9539 } 9540 else 9541 pl_yylval.ival = 0; 9542 OPERATOR(DOTDOT); 9543 } 9544 if (*s == '=' && !PL_lex_allbrackets 9545 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 9546 { 9547 s--; 9548 TOKEN(0); 9549 } 9550 Aop(OP_CONCAT); 9551 } 9552 /* FALLTHROUGH */ 9553 case '0': case '1': case '2': case '3': case '4': 9554 case '5': case '6': case '7': case '8': case '9': 9555 s = scan_num(s, &pl_yylval); 9556 DEBUG_T( { printbuf("### Saw number in %s\n", s); } ); 9557 if (PL_expect == XOPERATOR) 9558 no_op("Number",s); 9559 TERM(THING); 9560 9561 case '\'': 9562 return yyl_sglquote(aTHX_ s); 9563 9564 case '"': 9565 return yyl_dblquote(aTHX_ s); 9566 9567 case '`': 9568 return yyl_backtick(aTHX_ s); 9569 9570 case '\\': 9571 return yyl_backslash(aTHX_ s + 1); 9572 9573 case 'v': 9574 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) { 9575 char *start = s + 2; 9576 while (isDIGIT(*start) || *start == '_') 9577 start++; 9578 if (*start == '.' && isDIGIT(start[1])) { 9579 s = scan_num(s, &pl_yylval); 9580 TERM(THING); 9581 } 9582 else if ((*start == ':' && start[1] == ':') 9583 || (PL_expect == XSTATE && *start == ':')) { 9584 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9585 return tok; 9586 goto retry_bufptr; 9587 } 9588 else if (PL_expect == XSTATE) { 9589 d = start; 9590 while (d < PL_bufend && isSPACE(*d)) d++; 9591 if (*d == ':') { 9592 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9593 return tok; 9594 goto retry_bufptr; 9595 } 9596 } 9597 /* avoid v123abc() or $h{v1}, allow C<print v10;> */ 9598 if (!isALPHA(*start) && (PL_expect == XTERM 9599 || PL_expect == XREF || PL_expect == XSTATE 9600 || PL_expect == XTERMORDORDOR)) { 9601 GV *const gv = gv_fetchpvn_flags(s, start - s, 9602 UTF ? SVf_UTF8 : 0, SVt_PVCV); 9603 if (!gv) { 9604 s = scan_num(s, &pl_yylval); 9605 TERM(THING); 9606 } 9607 } 9608 } 9609 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9610 return tok; 9611 goto retry_bufptr; 9612 9613 case 'x': 9614 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { 9615 s++; 9616 Mop(OP_REPEAT); 9617 } 9618 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9619 return tok; 9620 goto retry_bufptr; 9621 9622 case '_': 9623 case 'a': case 'A': 9624 case 'b': case 'B': 9625 case 'c': case 'C': 9626 case 'd': case 'D': 9627 case 'e': case 'E': 9628 case 'f': case 'F': 9629 case 'g': case 'G': 9630 case 'h': case 'H': 9631 case 'i': case 'I': 9632 case 'j': case 'J': 9633 case 'k': case 'K': 9634 case 'l': case 'L': 9635 case 'm': case 'M': 9636 case 'n': case 'N': 9637 case 'o': case 'O': 9638 case 'p': case 'P': 9639 case 'q': case 'Q': 9640 case 'r': case 'R': 9641 case 's': case 'S': 9642 case 't': case 'T': 9643 case 'u': case 'U': 9644 case 'V': 9645 case 'w': case 'W': 9646 case 'X': 9647 case 'y': case 'Y': 9648 case 'z': case 'Z': 9649 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9650 return tok; 9651 goto retry_bufptr; 9652 } 9653 } 9654 9655 9656 /* 9657 yylex 9658 9659 Works out what to call the token just pulled out of the input 9660 stream. The yacc parser takes care of taking the ops we return and 9661 stitching them into a tree. 9662 9663 Returns: 9664 The type of the next token 9665 9666 Structure: 9667 Check if we have already built the token; if so, use it. 9668 Switch based on the current state: 9669 - if we have a case modifier in a string, deal with that 9670 - handle other cases of interpolation inside a string 9671 - scan the next line if we are inside a format 9672 In the normal state, switch on the next character: 9673 - default: 9674 if alphabetic, go to key lookup 9675 unrecognized character - croak 9676 - 0/4/26: handle end-of-line or EOF 9677 - cases for whitespace 9678 - \n and #: handle comments and line numbers 9679 - various operators, brackets and sigils 9680 - numbers 9681 - quotes 9682 - 'v': vstrings (or go to key lookup) 9683 - 'x' repetition operator (or go to key lookup) 9684 - other ASCII alphanumerics (key lookup begins here): 9685 word before => ? 9686 keyword plugin 9687 scan built-in keyword (but do nothing with it yet) 9688 check for statement label 9689 check for lexical subs 9690 return yyl_just_a_word if there is one 9691 see whether built-in keyword is overridden 9692 switch on keyword number: 9693 - default: return yyl_just_a_word: 9694 not a built-in keyword; handle bareword lookup 9695 disambiguate between method and sub call 9696 fall back to bareword 9697 - cases for built-in keywords 9698 */ 9699 9700 int 9701 Perl_yylex(pTHX) 9702 { 9703 char *s = PL_bufptr; 9704 9705 if (UNLIKELY(PL_parser->recheck_utf8_validity)) { 9706 const U8* first_bad_char_loc; 9707 if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr, 9708 PL_bufend - PL_bufptr, 9709 &first_bad_char_loc))) 9710 { 9711 _force_out_malformed_utf8_message(first_bad_char_loc, 9712 (U8 *) PL_bufend, 9713 0, 9714 1 /* 1 means die */ ); 9715 NOT_REACHED; /* NOTREACHED */ 9716 } 9717 PL_parser->recheck_utf8_validity = FALSE; 9718 } 9719 DEBUG_T( { 9720 SV* tmp = newSVpvs(""); 9721 PerlIO_printf(Perl_debug_log, "### %" LINE_Tf ":LEX_%s/X%s %s\n", 9722 CopLINE(PL_curcop), 9723 lex_state_names[PL_lex_state], 9724 exp_name[PL_expect], 9725 pv_display(tmp, s, strlen(s), 0, 60)); 9726 SvREFCNT_dec(tmp); 9727 } ); 9728 9729 /* when we've already built the next token, just pull it out of the queue */ 9730 if (PL_nexttoke) { 9731 PL_nexttoke--; 9732 pl_yylval = PL_nextval[PL_nexttoke]; 9733 { 9734 I32 next_type; 9735 next_type = PL_nexttype[PL_nexttoke]; 9736 if (next_type & (7<<24)) { 9737 if (next_type & (1<<24)) { 9738 if (PL_lex_brackets > 100) 9739 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 9740 PL_lex_brackstack[PL_lex_brackets++] = 9741 (char) ((U8) (next_type >> 16)); 9742 } 9743 if (next_type & (2<<24)) 9744 PL_lex_allbrackets++; 9745 if (next_type & (4<<24)) 9746 PL_lex_allbrackets--; 9747 next_type &= 0xffff; 9748 } 9749 return REPORT(next_type == 'p' ? pending_ident() : next_type); 9750 } 9751 } 9752 9753 switch (PL_lex_state) { 9754 case LEX_NORMAL: 9755 case LEX_INTERPNORMAL: 9756 break; 9757 9758 /* interpolated case modifiers like \L \U, including \Q and \E. 9759 when we get here, PL_bufptr is at the \ 9760 */ 9761 case LEX_INTERPCASEMOD: 9762 /* handle \E or end of string */ 9763 return yyl_interpcasemod(aTHX_ s); 9764 9765 case LEX_INTERPPUSH: 9766 return REPORT(sublex_push()); 9767 9768 case LEX_INTERPSTART: 9769 if (PL_bufptr == PL_bufend) 9770 return REPORT(sublex_done()); 9771 DEBUG_T({ 9772 if(*PL_bufptr != '(') 9773 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n"); 9774 }); 9775 PL_expect = XTERM; 9776 /* for /@a/, we leave the joining for the regex engine to do 9777 * (unless we're within \Q etc) */ 9778 PL_lex_dojoin = (*PL_bufptr == '@' 9779 && (!PL_lex_inpat || PL_lex_casemods)); 9780 PL_lex_state = LEX_INTERPNORMAL; 9781 if (PL_lex_dojoin) { 9782 NEXTVAL_NEXTTOKE.ival = 0; 9783 force_next(PERLY_COMMA); 9784 force_ident("\"", PERLY_DOLLAR); 9785 NEXTVAL_NEXTTOKE.ival = 0; 9786 force_next(PERLY_DOLLAR); 9787 NEXTVAL_NEXTTOKE.ival = 0; 9788 force_next((2<<24)|PERLY_PAREN_OPEN); 9789 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */ 9790 force_next(FUNC); 9791 } 9792 /* Convert (?{...}) or (*{...}) and friends to 'do {...}' */ 9793 if (PL_lex_inpat && *PL_bufptr == '(') { 9794 PL_parser->lex_shared->re_eval_start = PL_bufptr; 9795 PL_bufptr += 2; 9796 if (*PL_bufptr != '{') 9797 PL_bufptr++; 9798 PL_expect = XTERMBLOCK; 9799 force_next(KW_DO); 9800 } 9801 9802 if (PL_lex_starts++) { 9803 s = PL_bufptr; 9804 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 9805 if (!PL_lex_casemods && PL_lex_inpat) 9806 TOKEN(PERLY_COMMA); 9807 else 9808 AopNOASSIGN(OP_CONCAT); 9809 } 9810 return yylex(); 9811 9812 case LEX_INTERPENDMAYBE: 9813 if (intuit_more(PL_bufptr, PL_bufend)) { 9814 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */ 9815 break; 9816 } 9817 /* FALLTHROUGH */ 9818 9819 case LEX_INTERPEND: 9820 if (PL_lex_dojoin) { 9821 const U8 dojoin_was = PL_lex_dojoin; 9822 PL_lex_dojoin = FALSE; 9823 PL_lex_state = LEX_INTERPCONCAT; 9824 PL_lex_allbrackets--; 9825 return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN); 9826 } 9827 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl 9828 && SvEVALED(PL_lex_repl)) 9829 { 9830 if (PL_bufptr != PL_bufend) 9831 Perl_croak(aTHX_ "Bad evalled substitution pattern"); 9832 PL_lex_repl = NULL; 9833 } 9834 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets 9835 re_eval_str. If the here-doc body's length equals the previous 9836 value of re_eval_start, re_eval_start will now be null. So 9837 check re_eval_str as well. */ 9838 if (PL_parser->lex_shared->re_eval_start 9839 || PL_parser->lex_shared->re_eval_str) { 9840 SV *sv; 9841 if (*PL_bufptr != ')') 9842 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'"); 9843 PL_bufptr++; 9844 /* having compiled a (?{..}) expression, return the original 9845 * text too, as a const */ 9846 if (PL_parser->lex_shared->re_eval_str) { 9847 sv = PL_parser->lex_shared->re_eval_str; 9848 PL_parser->lex_shared->re_eval_str = NULL; 9849 SvCUR_set(sv, 9850 PL_bufptr - PL_parser->lex_shared->re_eval_start); 9851 SvPV_shrink_to_cur(sv); 9852 } 9853 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start, 9854 PL_bufptr - PL_parser->lex_shared->re_eval_start); 9855 NEXTVAL_NEXTTOKE.opval = 9856 newSVOP(OP_CONST, 0, 9857 sv); 9858 force_next(THING); 9859 PL_parser->lex_shared->re_eval_start = NULL; 9860 PL_expect = XTERM; 9861 return REPORT(PERLY_COMMA); 9862 } 9863 9864 /* FALLTHROUGH */ 9865 case LEX_INTERPCONCAT: 9866 #ifdef DEBUGGING 9867 if (PL_lex_brackets) 9868 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld", 9869 (long) PL_lex_brackets); 9870 #endif 9871 if (PL_bufptr == PL_bufend) 9872 return REPORT(sublex_done()); 9873 9874 /* m'foo' still needs to be parsed for possible (?{...}) */ 9875 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) { 9876 SV *sv = newSVsv(PL_linestr); 9877 sv = tokeq(sv); 9878 pl_yylval.opval = newSVOP(OP_CONST, 0, sv); 9879 s = PL_bufend; 9880 } 9881 else { 9882 int save_error_count = PL_error_count; 9883 9884 s = scan_const(PL_bufptr); 9885 9886 /* Set flag if this was a pattern and there were errors. op.c will 9887 * refuse to compile a pattern with this flag set. Otherwise, we 9888 * could get segfaults, etc. */ 9889 if (PL_lex_inpat && PL_error_count > save_error_count) { 9890 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR; 9891 } 9892 if (*s == '\\') 9893 PL_lex_state = LEX_INTERPCASEMOD; 9894 else 9895 PL_lex_state = LEX_INTERPSTART; 9896 } 9897 9898 if (s != PL_bufptr) { 9899 NEXTVAL_NEXTTOKE = pl_yylval; 9900 PL_expect = XTERM; 9901 force_next(THING); 9902 if (PL_lex_starts++) { 9903 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 9904 if (!PL_lex_casemods && PL_lex_inpat) 9905 TOKEN(PERLY_COMMA); 9906 else 9907 AopNOASSIGN(OP_CONCAT); 9908 } 9909 else { 9910 PL_bufptr = s; 9911 return yylex(); 9912 } 9913 } 9914 9915 return yylex(); 9916 case LEX_FORMLINE: 9917 if (PL_parser->sub_error_count != PL_error_count) { 9918 /* There was an error parsing a formline, which tends to 9919 mess up the parser. 9920 Unlike interpolated sub-parsing, we can't treat any of 9921 these as recoverable, so no need to check sub_no_recover. 9922 */ 9923 yyquit(); 9924 } 9925 assert(PL_lex_formbrack); 9926 s = scan_formline(PL_bufptr); 9927 if (!PL_lex_formbrack) 9928 return yyl_rightcurly(aTHX_ s, 1); 9929 PL_bufptr = s; 9930 return yylex(); 9931 } 9932 9933 /* We really do *not* want PL_linestr ever becoming a COW. */ 9934 assert (!SvIsCOW(PL_linestr)); 9935 s = PL_bufptr; 9936 PL_oldoldbufptr = PL_oldbufptr; 9937 PL_oldbufptr = s; 9938 9939 if (PL_in_my == KEY_sigvar) { 9940 PL_parser->saw_infix_sigil = 0; 9941 return yyl_sigvar(aTHX_ s); 9942 } 9943 9944 { 9945 /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil. 9946 On its return, we then need to set it to indicate whether the token 9947 we just encountered was an infix operator that (if we hadn't been 9948 expecting an operator) have been a sigil. 9949 */ 9950 bool expected_operator = (PL_expect == XOPERATOR); 9951 int ret = yyl_try(aTHX_ s); 9952 switch (pl_yylval.ival) { 9953 case OP_BIT_AND: 9954 case OP_MODULO: 9955 case OP_MULTIPLY: 9956 case OP_NBIT_AND: 9957 if (expected_operator) { 9958 PL_parser->saw_infix_sigil = 1; 9959 break; 9960 } 9961 /* FALLTHROUGH */ 9962 default: 9963 PL_parser->saw_infix_sigil = 0; 9964 } 9965 return ret; 9966 } 9967 } 9968 9969 9970 /* 9971 S_pending_ident 9972 9973 Looks up an identifier in the pad or in a package 9974 9975 PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable 9976 rather than a plain pad var. 9977 9978 Returns: 9979 PRIVATEREF if this is a lexical name. 9980 BAREWORD if this belongs to a package. 9981 9982 Structure: 9983 if we're in a my declaration 9984 croak if they tried to say my($foo::bar) 9985 build the ops for a my() declaration 9986 if it's an access to a my() variable 9987 build ops for access to a my() variable 9988 if in a dq string, and they've said @foo and we can't find @foo 9989 warn 9990 build ops for a bareword 9991 */ 9992 9993 static int 9994 S_pending_ident(pTHX) 9995 { 9996 PADOFFSET tmp = 0; 9997 const char pit = (char)pl_yylval.ival; 9998 const STRLEN tokenbuf_len = strlen(PL_tokenbuf); 9999 /* All routes through this function want to know if there is a colon. */ 10000 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len); 10001 10002 DEBUG_T({ PerlIO_printf(Perl_debug_log, 10003 "### Pending identifier '%s'\n", PL_tokenbuf); }); 10004 assert(tokenbuf_len >= 2); 10005 10006 /* if we're in a my(), we can't allow dynamics here. 10007 $foo'bar has already been turned into $foo::bar, so 10008 just check for colons. 10009 10010 if it's a legal name, the OP is a PADANY. 10011 */ 10012 if (PL_in_my) { 10013 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */ 10014 if (has_colon) 10015 /* diag_listed_as: No package name allowed for variable %s 10016 in "our" */ 10017 yyerror_pv(Perl_form(aTHX_ "No package name allowed for " 10018 "%s %s in \"our\"", 10019 *PL_tokenbuf=='&' ? "subroutine" : "variable", 10020 PL_tokenbuf), UTF ? SVf_UTF8 : 0); 10021 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0); 10022 } 10023 else { 10024 OP *o; 10025 if (has_colon) { 10026 /* "my" variable %s can't be in a package */ 10027 /* PL_no_myglob is constant */ 10028 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); 10029 yyerror_pv(Perl_form(aTHX_ PL_no_myglob, 10030 PL_in_my == KEY_my ? "my" : 10031 PL_in_my == KEY_field ? "field" : "state", 10032 *PL_tokenbuf == '&' ? "subroutine" : "variable", 10033 PL_tokenbuf), 10034 UTF ? SVf_UTF8 : 0); 10035 GCC_DIAG_RESTORE_STMT; 10036 } 10037 10038 if (PL_in_my == KEY_sigvar) { 10039 /* A signature 'padop' needs in addition, an op_first to 10040 * point to a child sigdefelem, and an extra field to hold 10041 * the signature index. We can achieve both by using an 10042 * UNOP_AUX and (ab)using the op_aux field to hold the 10043 * index. If we ever need more fields, use a real malloced 10044 * aux strut instead. 10045 */ 10046 o = newUNOP_AUX(OP_ARGELEM, 0, NULL, 10047 INT2PTR(UNOP_AUX_item *, 10048 (PL_parser->sig_elems))); 10049 o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV 10050 : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV 10051 : OPpARGELEM_HV); 10052 } 10053 else 10054 o = newOP(OP_PADANY, 0); 10055 o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 10056 UTF ? SVf_UTF8 : 0); 10057 if (PL_in_my == KEY_sigvar) 10058 PL_in_my = 0; 10059 10060 pl_yylval.opval = o; 10061 return PRIVATEREF; 10062 } 10063 } 10064 10065 /* 10066 build the ops for accesses to a my() variable. 10067 */ 10068 10069 if (!has_colon) { 10070 if (!PL_in_my) 10071 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len, 10072 0); 10073 if (tmp != NOT_IN_PAD) { 10074 /* might be an "our" variable" */ 10075 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 10076 /* build ops for a bareword */ 10077 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 10078 HEK * const stashname = HvNAME_HEK(stash); 10079 SV * const sym = newSVhek(stashname); 10080 sv_catpvs(sym, "::"); 10081 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES )); 10082 pl_yylval.opval = newSVOP(OP_CONST, 0, sym); 10083 pl_yylval.opval->op_private = OPpCONST_ENTERED; 10084 if (pit != '&') 10085 gv_fetchsv(sym, 10086 GV_ADDMULTI, 10087 ((PL_tokenbuf[0] == '$') ? SVt_PV 10088 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 10089 : SVt_PVHV)); 10090 return BAREWORD; 10091 } 10092 10093 pl_yylval.opval = newOP(OP_PADANY, 0); 10094 pl_yylval.opval->op_targ = tmp; 10095 return PRIVATEREF; 10096 } 10097 } 10098 10099 /* 10100 Whine if they've said @foo or @foo{key} in a doublequoted string, 10101 and @foo (or %foo) isn't a variable we can find in the symbol 10102 table. 10103 */ 10104 if (ckWARN(WARN_AMBIGUOUS) 10105 && pit == '@' 10106 && PL_lex_state != LEX_NORMAL 10107 && !PL_lex_brackets) 10108 { 10109 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, 10110 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG, 10111 SVt_PVAV); 10112 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) 10113 ) 10114 { 10115 /* Downgraded from fatal to warning 20000522 mjd */ 10116 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 10117 "Possible unintended interpolation of %" UTF8f 10118 " in string", 10119 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf)); 10120 } 10121 } 10122 10123 /* build ops for a bareword */ 10124 pl_yylval.opval = newSVOP(OP_CONST, 0, 10125 newSVpvn_flags(PL_tokenbuf + 1, 10126 tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, 10127 UTF ? SVf_UTF8 : 0 )); 10128 pl_yylval.opval->op_private = OPpCONST_ENTERED; 10129 if (pit != '&') 10130 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, 10131 (PL_in_eval ? GV_ADDMULTI : GV_ADD) 10132 | ( UTF ? SVf_UTF8 : 0 ), 10133 ((PL_tokenbuf[0] == '$') ? SVt_PV 10134 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 10135 : SVt_PVHV)); 10136 return BAREWORD; 10137 } 10138 10139 STATIC void 10140 S_checkcomma(pTHX_ const char *s, const char *name, const char *what) 10141 { 10142 PERL_ARGS_ASSERT_CHECKCOMMA; 10143 10144 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */ 10145 if (ckWARN(WARN_SYNTAX)) { 10146 int level = 1; 10147 const char *w; 10148 for (w = s+2; *w && level; w++) { 10149 if (*w == '(') 10150 ++level; 10151 else if (*w == ')') 10152 --level; 10153 } 10154 while (isSPACE(*w)) 10155 ++w; 10156 /* the list of chars below is for end of statements or 10157 * block / parens, boolean operators (&&, ||, //) and branch 10158 * constructs (or, and, if, until, unless, while, err, for). 10159 * Not a very solid hack... */ 10160 if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w)) 10161 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 10162 "%s (...) interpreted as function",name); 10163 } 10164 } 10165 while (s < PL_bufend && isSPACE(*s)) 10166 s++; 10167 if (*s == '(') 10168 s++; 10169 while (s < PL_bufend && isSPACE(*s)) 10170 s++; 10171 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 10172 const char * const w = s; 10173 s += UTF ? UTF8SKIP(s) : 1; 10174 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)) 10175 s += UTF ? UTF8SKIP(s) : 1; 10176 while (s < PL_bufend && isSPACE(*s)) 10177 s++; 10178 if (*s == ',') { 10179 GV* gv; 10180 if (keyword(w, s - w, 0)) 10181 return; 10182 10183 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); 10184 if (gv && GvCVu(gv)) 10185 return; 10186 if (s - w <= 254) { 10187 PADOFFSET off; 10188 char tmpbuf[256]; 10189 Copy(w, tmpbuf+1, s - w, char); 10190 *tmpbuf = '&'; 10191 off = pad_findmy_pvn(tmpbuf, s-w+1, 0); 10192 if (off != NOT_IN_PAD) return; 10193 } 10194 Perl_croak(aTHX_ "No comma allowed after %s", what); 10195 } 10196 } 10197 } 10198 10199 /* S_new_constant(): do any overload::constant lookup. 10200 10201 Either returns sv, or mortalizes/frees sv and returns a new SV*. 10202 Best used as sv=new_constant(..., sv, ...). 10203 If s, pv are NULL, calls subroutine with one argument, 10204 and <type> is used with error messages only. 10205 <type> is assumed to be well formed UTF-8. 10206 10207 If error_msg is not NULL, *error_msg will be set to any error encountered. 10208 Otherwise yyerror() will be used to output it */ 10209 10210 STATIC SV * 10211 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, 10212 SV *sv, SV *pv, const char *type, STRLEN typelen, 10213 const char ** error_msg) 10214 { 10215 dSP; 10216 HV * table = GvHV(PL_hintgv); /* ^H */ 10217 SV *res; 10218 SV *errsv = NULL; 10219 SV **cvp; 10220 SV *cv, *typesv; 10221 const char *why1 = "", *why2 = "", *why3 = ""; 10222 const char * optional_colon = ":"; /* Only some messages have a colon */ 10223 char *msg; 10224 10225 PERL_ARGS_ASSERT_NEW_CONSTANT; 10226 /* We assume that this is true: */ 10227 assert(type || s); 10228 10229 sv_2mortal(sv); /* Parent created it permanently */ 10230 10231 if ( ! table 10232 || ! (PL_hints & HINT_LOCALIZE_HH)) 10233 { 10234 why1 = "unknown"; 10235 optional_colon = ""; 10236 goto report; 10237 } 10238 10239 cvp = hv_fetch(table, key, keylen, FALSE); 10240 if (!cvp || !SvOK(*cvp)) { 10241 why1 = "$^H{"; 10242 why2 = key; 10243 why3 = "} is not defined"; 10244 goto report; 10245 } 10246 10247 cv = *cvp; 10248 if (!pv && s) 10249 pv = newSVpvn_flags(s, len, SVs_TEMP); 10250 if (type && pv) 10251 typesv = newSVpvn_flags(type, typelen, SVs_TEMP); 10252 else 10253 typesv = &PL_sv_undef; 10254 10255 PUSHSTACKi(PERLSI_OVERLOAD); 10256 ENTER ; 10257 SAVETMPS; 10258 10259 PUSHMARK(SP) ; 10260 EXTEND(sp, 3); 10261 if (pv) 10262 PUSHs(pv); 10263 PUSHs(sv); 10264 if (pv) 10265 PUSHs(typesv); 10266 PUTBACK; 10267 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL)); 10268 10269 SPAGAIN ; 10270 10271 /* Check the eval first */ 10272 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) { 10273 STRLEN errlen; 10274 const char * errstr; 10275 sv_catpvs(errsv, "Propagated"); 10276 errstr = SvPV_const(errsv, errlen); 10277 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */ 10278 (void)POPs; 10279 res = SvREFCNT_inc_simple_NN(sv); 10280 } 10281 else { 10282 res = POPs; 10283 SvREFCNT_inc_simple_void_NN(res); 10284 } 10285 10286 PUTBACK ; 10287 FREETMPS ; 10288 LEAVE ; 10289 POPSTACK; 10290 10291 if (SvOK(res)) { 10292 return res; 10293 } 10294 10295 sv = res; 10296 (void)sv_2mortal(sv); 10297 10298 why1 = "Call to &{$^H{"; 10299 why2 = key; 10300 why3 = "}} did not return a defined value"; 10301 10302 report: 10303 10304 msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s", 10305 (int)(type ? typelen : len), 10306 (type ? type: s), 10307 optional_colon, 10308 why1, why2, why3); 10309 if (error_msg) { 10310 *error_msg = msg; 10311 } 10312 else { 10313 yyerror_pv(msg, UTF ? SVf_UTF8 : 0); 10314 } 10315 return SvREFCNT_inc_simple_NN(sv); 10316 } 10317 10318 PERL_STATIC_INLINE void 10319 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, 10320 bool is_utf8, bool check_dollar, bool tick_warn) 10321 { 10322 int saw_tick = 0; 10323 const char *olds = *s; 10324 PERL_ARGS_ASSERT_PARSE_IDENT; 10325 10326 while (*s < PL_bufend) { 10327 if (*d >= e) 10328 Perl_croak(aTHX_ "%s", ident_too_long); 10329 if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) { 10330 /* The UTF-8 case must come first, otherwise things 10331 * like c\N{COMBINING TILDE} would start failing, as the 10332 * isWORDCHAR_A case below would gobble the 'c' up. 10333 */ 10334 10335 char *t = *s + UTF8SKIP(*s); 10336 while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) { 10337 t += UTF8SKIP(t); 10338 } 10339 if (*d + (t - *s) > e) 10340 Perl_croak(aTHX_ "%s", ident_too_long); 10341 Copy(*s, *d, t - *s, char); 10342 *d += t - *s; 10343 *s = t; 10344 } 10345 else if ( isWORDCHAR_A(**s) ) { 10346 do { 10347 *(*d)++ = *(*s)++; 10348 } while (isWORDCHAR_A(**s) && *d < e); 10349 } 10350 else if ( allow_package 10351 && **s == '\'' 10352 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8)) 10353 { 10354 *(*d)++ = ':'; 10355 *(*d)++ = ':'; 10356 (*s)++; 10357 saw_tick++; 10358 } 10359 else if (allow_package && **s == ':' && (*s)[1] == ':' 10360 /* Disallow things like Foo::$bar. For the curious, this is 10361 * the code path that triggers the "Bad name after" warning 10362 * when looking for barewords. 10363 */ 10364 && !(check_dollar && (*s)[2] == '$')) { 10365 *(*d)++ = *(*s)++; 10366 *(*d)++ = *(*s)++; 10367 } 10368 else 10369 break; 10370 } 10371 if (UNLIKELY(saw_tick && tick_warn && ckWARN2_d(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR))) { 10372 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 10373 char *this_d; 10374 char *d2; 10375 Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */ 10376 d2 = this_d; 10377 SAVEFREEPV(this_d); 10378 10379 Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR), 10380 "Old package separator used in string"); 10381 if (olds[-1] == '#') 10382 *d2++ = olds[-2]; 10383 *d2++ = olds[-1]; 10384 while (olds < *s) { 10385 if (*olds == '\'') { 10386 *d2++ = '\\'; 10387 *d2++ = *olds++; 10388 } 10389 else 10390 *d2++ = *olds++; 10391 } 10392 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 10393 "\t(Did you mean \"%" UTF8f "\" instead?)\n", 10394 UTF8fARG(is_utf8, d2-this_d, this_d)); 10395 } 10396 else { 10397 Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR), 10398 "Old package separator \"'\" deprecated"); 10399 } 10400 } 10401 return; 10402 } 10403 10404 /* Returns a NUL terminated string, with the length of the string written to 10405 *slp 10406 10407 scan_word6() may be removed once ' in names is removed. 10408 */ 10409 char * 10410 Perl_scan_word6(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp, bool warn_tick) 10411 { 10412 char *d = dest; 10413 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 10414 bool is_utf8 = cBOOL(UTF); 10415 10416 PERL_ARGS_ASSERT_SCAN_WORD6; 10417 10418 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, warn_tick); 10419 *d = '\0'; 10420 *slp = d - dest; 10421 return s; 10422 } 10423 10424 char * 10425 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) 10426 { 10427 PERL_ARGS_ASSERT_SCAN_WORD; 10428 return scan_word6(s, dest, destlen, allow_package, slp, FALSE); 10429 } 10430 10431 /* scan s and extract an identifier ($var) from it if possible 10432 * into dest. 10433 * XXX: This function has subtle implications on parsing, and 10434 * changing how it behaves can cause a variable to change from 10435 * being a run time rv2sv call or a compile time binding to a 10436 * specific variable name. 10437 */ 10438 STATIC char * 10439 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) 10440 { 10441 I32 herelines = PL_parser->herelines; 10442 SSize_t bracket = -1; 10443 char funny = *s++; 10444 char *d = dest; 10445 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 10446 bool is_utf8 = cBOOL(UTF); 10447 line_t orig_copline = 0, tmp_copline = 0; 10448 10449 PERL_ARGS_ASSERT_SCAN_IDENT; 10450 10451 if (isSPACE(*s) || !*s) 10452 s = skipspace(s); 10453 if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */ 10454 bool is_zero= *s == '0' ? TRUE : FALSE; 10455 char *digit_start= d; 10456 *d++ = *s++; 10457 while (s < PL_bufend && isDIGIT(*s)) { 10458 if (d >= e) 10459 Perl_croak(aTHX_ "%s", ident_too_long); 10460 *d++ = *s++; 10461 } 10462 if (is_zero && d - digit_start > 1) 10463 Perl_croak(aTHX_ ident_var_zero_multi_digit); 10464 } 10465 else { /* See if it is a "normal" identifier */ 10466 parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE); 10467 } 10468 *d = '\0'; 10469 d = dest; 10470 if (*d) { 10471 /* Either a digit variable, or parse_ident() found an identifier 10472 (anything valid as a bareword), so job done and return. */ 10473 if (PL_lex_state != LEX_NORMAL) 10474 PL_lex_state = LEX_INTERPENDMAYBE; 10475 return s; 10476 } 10477 10478 /* Here, it is not a run-of-the-mill identifier name */ 10479 10480 if (*s == '$' && s[1] 10481 && ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8) 10482 || isDIGIT_A((U8)s[1]) 10483 || s[1] == '$' 10484 || s[1] == '{' 10485 || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) ) 10486 { 10487 /* Dereferencing a value in a scalar variable. 10488 The alternatives are different syntaxes for a scalar variable. 10489 Using ' as a leading package separator isn't allowed. :: is. */ 10490 return s; 10491 } 10492 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */ 10493 if (*s == '{') { 10494 bracket = s - SvPVX(PL_linestr); 10495 s++; 10496 orig_copline = CopLINE(PL_curcop); 10497 if (s < PL_bufend && isSPACE(*s)) { 10498 s = skipspace(s); 10499 } 10500 } 10501 10502 10503 /* Extract the first character of the variable name from 's' and 10504 * copy it, null terminated into 'd'. Note that this does not 10505 * involve checking for just IDFIRST characters, as it allows the 10506 * '^' for ${^FOO} type variable names, and it allows all the 10507 * characters that are legal in a single character variable name. 10508 * 10509 * The legal ones are any of: 10510 * a) all ASCII characters except: 10511 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE; 10512 * 2) '{' 10513 * The final case currently doesn't get this far in the program, so we 10514 * don't test for it. If that were to change, it would be ok to allow it. 10515 * b) When not under Unicode rules, any upper Latin1 character 10516 * c) Otherwise, when unicode rules are used, all XIDS characters. 10517 * 10518 * Because all ASCII characters have the same representation whether 10519 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and 10520 * '{' without knowing if is UTF-8 or not. */ 10521 10522 if ((s <= PL_bufend - ((is_utf8) 10523 ? UTF8SKIP(s) 10524 : 1)) 10525 && ( 10526 isGRAPH_A(*s) 10527 || 10528 ( is_utf8 10529 ? isIDFIRST_utf8_safe(s, PL_bufend) 10530 : (isGRAPH_L1(*s) 10531 && LIKELY((U8) *s != LATIN1_TO_NATIVE(0xAD)) 10532 ) 10533 ) 10534 ) 10535 ){ 10536 if (is_utf8) { 10537 const STRLEN skip = UTF8SKIP(s); 10538 STRLEN i; 10539 d[skip] = '\0'; 10540 for ( i = 0; i < skip; i++ ) 10541 d[i] = *s++; 10542 } 10543 else { 10544 *d = *s++; 10545 d[1] = '\0'; 10546 } 10547 } 10548 10549 /* special case to handle ${10}, ${11} the same way we handle ${1} etc */ 10550 if (isDIGIT(*d)) { 10551 bool is_zero= *d == '0' ? TRUE : FALSE; 10552 char *digit_start= d; 10553 while (s < PL_bufend && isDIGIT(*s)) { 10554 d++; 10555 if (d >= e) 10556 Perl_croak(aTHX_ "%s", ident_too_long); 10557 *d= *s++; 10558 } 10559 if (is_zero && d - digit_start >= 1) /* d points at the last digit */ 10560 Perl_croak(aTHX_ ident_var_zero_multi_digit); 10561 d[1] = '\0'; 10562 } 10563 10564 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */ 10565 else if (*d == '^' && *s && isCONTROLVAR(*s)) { 10566 *d = toCTRL(*s); 10567 s++; 10568 } 10569 /* Warn about ambiguous code after unary operators if {...} notation isn't 10570 used. There's no difference in ambiguity; it's merely a heuristic 10571 about when not to warn. */ 10572 else if (ck_uni && bracket == -1) 10573 check_uni(); 10574 10575 if (bracket != -1) { 10576 bool skip; 10577 char *s2; 10578 /* If we were processing {...} notation then... */ 10579 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) 10580 || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */ 10581 && isWORDCHAR(*s)) 10582 ) { 10583 /* note we have to check for a normal identifier first, 10584 * as it handles utf8 symbols, and only after that has 10585 * been ruled out can we look at the caret words */ 10586 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) { 10587 /* if it starts as a valid identifier, assume that it is one. 10588 (the later check for } being at the expected point will trap 10589 cases where this doesn't pan out.) */ 10590 d += is_utf8 ? UTF8SKIP(d) : 1; 10591 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE); 10592 *d = '\0'; 10593 } 10594 else { /* caret word: ${^Foo} ${^CAPTURE[0]} */ 10595 d++; 10596 while (isWORDCHAR(*s) && d < e) { 10597 *d++ = *s++; 10598 } 10599 if (d >= e) 10600 Perl_croak(aTHX_ "%s", ident_too_long); 10601 *d = '\0'; 10602 } 10603 tmp_copline = CopLINE(PL_curcop); 10604 if (s < PL_bufend && isSPACE(*s)) { 10605 s = skipspace(s); 10606 } 10607 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { 10608 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */ 10609 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) { 10610 const char * const brack = 10611 (const char *) 10612 ((*s == '[') ? "[...]" : "{...}"); 10613 orig_copline = CopLINE(PL_curcop); 10614 CopLINE_set(PL_curcop, tmp_copline); 10615 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */ 10616 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 10617 "Ambiguous use of %c{%s%s} resolved to %c%s%s", 10618 funny, dest, brack, funny, dest, brack); 10619 CopLINE_set(PL_curcop, orig_copline); 10620 } 10621 bracket++; 10622 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK); 10623 PL_lex_allbrackets++; 10624 return s; 10625 } 10626 } 10627 10628 if ( !tmp_copline ) 10629 tmp_copline = CopLINE(PL_curcop); 10630 if ((skip = s < PL_bufend && isSPACE(*s))) { 10631 /* Avoid incrementing line numbers or resetting PL_linestart, 10632 in case we have to back up. */ 10633 STRLEN s_off = s - SvPVX(PL_linestr); 10634 s2 = peekspace(s); 10635 s = SvPVX(PL_linestr) + s_off; 10636 } 10637 else 10638 s2 = s; 10639 10640 /* Expect to find a closing } after consuming any trailing whitespace. 10641 */ 10642 if (*s2 == '}') { 10643 /* Now increment line numbers if applicable. */ 10644 if (skip) 10645 s = skipspace(s); 10646 s++; 10647 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 10648 PL_lex_state = LEX_INTERPEND; 10649 PL_expect = XREF; 10650 } 10651 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) { 10652 if (ckWARN(WARN_AMBIGUOUS) 10653 && (keyword(dest, d - dest, 0) 10654 || get_cvn_flags(dest, d - dest, is_utf8 10655 ? SVf_UTF8 10656 : 0))) 10657 { 10658 SV *tmp = newSVpvn_flags( dest, d - dest, 10659 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) ); 10660 if (funny == '#') 10661 funny = '@'; 10662 orig_copline = CopLINE(PL_curcop); 10663 CopLINE_set(PL_curcop, tmp_copline); 10664 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 10665 "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf, 10666 funny, SVfARG(tmp), funny, SVfARG(tmp)); 10667 CopLINE_set(PL_curcop, orig_copline); 10668 } 10669 } 10670 } 10671 else { 10672 /* Didn't find the closing } at the point we expected, so restore 10673 state such that the next thing to process is the opening { and */ 10674 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */ 10675 CopLINE_set(PL_curcop, orig_copline); 10676 PL_parser->herelines = herelines; 10677 *dest = '\0'; 10678 PL_parser->sub_no_recover = TRUE; 10679 } 10680 } 10681 else if ( PL_lex_state == LEX_INTERPNORMAL 10682 && !PL_lex_brackets 10683 && !intuit_more(s, PL_bufend)) 10684 PL_lex_state = LEX_INTERPEND; 10685 return s; 10686 } 10687 10688 static bool 10689 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) { 10690 10691 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag 10692 * found in the parse starting at 's', based on the subset that are valid 10693 * in this context input to this routine in 'valid_flags'. Advances s. 10694 * Returns TRUE if the input should be treated as a valid flag, so the next 10695 * char may be as well; otherwise FALSE. 'charset' should point to a NUL 10696 * upon first call on the current regex. This routine will set it to any 10697 * charset modifier found. The caller shouldn't change it. This way, 10698 * another charset modifier encountered in the parse can be detected as an 10699 * error, as we have decided to allow only one */ 10700 10701 const char c = **s; 10702 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1; 10703 10704 if ( charlen != 1 || ! strchr(valid_flags, c) ) { 10705 if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) { 10706 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s), 10707 UTF ? SVf_UTF8 : 0); 10708 (*s) += charlen; 10709 /* Pretend that it worked, so will continue processing before 10710 * dieing */ 10711 return TRUE; 10712 } 10713 return FALSE; 10714 } 10715 10716 switch (c) { 10717 10718 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count); 10719 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break; 10720 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break; 10721 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break; 10722 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break; 10723 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break; 10724 case LOCALE_PAT_MOD: 10725 if (*charset) { 10726 goto multiple_charsets; 10727 } 10728 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET); 10729 *charset = c; 10730 break; 10731 case UNICODE_PAT_MOD: 10732 if (*charset) { 10733 goto multiple_charsets; 10734 } 10735 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET); 10736 *charset = c; 10737 break; 10738 case ASCII_RESTRICT_PAT_MOD: 10739 if (! *charset) { 10740 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET); 10741 } 10742 else { 10743 10744 /* Error if previous modifier wasn't an 'a', but if it was, see 10745 * if, and accept, a second occurrence (only) */ 10746 if (*charset != 'a' 10747 || get_regex_charset(*pmfl) 10748 != REGEX_ASCII_RESTRICTED_CHARSET) 10749 { 10750 goto multiple_charsets; 10751 } 10752 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET); 10753 } 10754 *charset = c; 10755 break; 10756 case DEPENDS_PAT_MOD: 10757 if (*charset) { 10758 goto multiple_charsets; 10759 } 10760 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET); 10761 *charset = c; 10762 break; 10763 } 10764 10765 (*s)++; 10766 return TRUE; 10767 10768 multiple_charsets: 10769 if (*charset != c) { 10770 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c)); 10771 } 10772 else if (c == 'a') { 10773 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */ 10774 yyerror("Regexp modifier \"/a\" may appear a maximum of twice"); 10775 } 10776 else { 10777 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c)); 10778 } 10779 10780 /* Pretend that it worked, so will continue processing before dieing */ 10781 (*s)++; 10782 return TRUE; 10783 } 10784 10785 STATIC char * 10786 S_scan_pat(pTHX_ char *start, I32 type) 10787 { 10788 PMOP *pm; 10789 char *s; 10790 const char * const valid_flags = 10791 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); 10792 char charset = '\0'; /* character set modifier */ 10793 unsigned int x_mod_count = 0; 10794 10795 PERL_ARGS_ASSERT_SCAN_PAT; 10796 10797 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL); 10798 if (!s) 10799 Perl_croak(aTHX_ "Search pattern not terminated"); 10800 10801 pm = (PMOP*)newPMOP(type, 0); 10802 if (PL_multi_open == '?') { 10803 /* This is the only point in the code that sets PMf_ONCE: */ 10804 pm->op_pmflags |= PMf_ONCE; 10805 10806 /* Hence it's safe to do this bit of PMOP book-keeping here, which 10807 allows us to restrict the list needed by reset to just the ?? 10808 matches. */ 10809 assert(type != OP_TRANS); 10810 if (PL_curstash) { 10811 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab); 10812 U32 elements; 10813 if (!mg) { 10814 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0, 10815 0); 10816 } 10817 elements = mg->mg_len / sizeof(PMOP**); 10818 Renewc(mg->mg_ptr, elements + 1, PMOP*, char); 10819 ((PMOP**)mg->mg_ptr) [elements++] = pm; 10820 mg->mg_len = elements * sizeof(PMOP**); 10821 PmopSTASH_set(pm,PL_curstash); 10822 } 10823 } 10824 10825 /* if qr/...(?{..}).../, then need to parse the pattern within a new 10826 * anon CV. False positives like qr/[(?{]/ are harmless */ 10827 10828 if (type == OP_QR) { 10829 STRLEN len; 10830 char *e, *p = SvPV(PL_lex_stuff, len); 10831 e = p + len; 10832 for (; p < e; p++) { 10833 if (p[0] == '(' && ( 10834 (p[1] == '?' && (p[2] == '{' || 10835 (p[2] == '?' && p[3] == '{'))) || 10836 (p[1] == '*' && (p[2] == '{' || 10837 (p[2] == '*' && p[3] == '{'))) 10838 )){ 10839 pm->op_pmflags |= PMf_HAS_CV; 10840 break; 10841 } 10842 } 10843 pm->op_pmflags |= PMf_IS_QR; 10844 } 10845 10846 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), 10847 &s, &charset, &x_mod_count)) 10848 {}; 10849 /* issue a warning if /c is specified,but /g is not */ 10850 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) 10851 { 10852 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), 10853 "Use of /c modifier is meaningless without /g" ); 10854 } 10855 10856 PL_lex_op = (OP*)pm; 10857 pl_yylval.ival = OP_MATCH; 10858 return s; 10859 } 10860 10861 STATIC char * 10862 S_scan_subst(pTHX_ char *start) 10863 { 10864 char *s; 10865 PMOP *pm; 10866 I32 first_start; 10867 line_t first_line; 10868 line_t linediff = 0; 10869 I32 es = 0; 10870 char charset = '\0'; /* character set modifier */ 10871 unsigned int x_mod_count = 0; 10872 char *t; 10873 10874 PERL_ARGS_ASSERT_SCAN_SUBST; 10875 10876 pl_yylval.ival = OP_NULL; 10877 10878 s = scan_str(start, TRUE, FALSE, FALSE, &t); 10879 10880 if (!s) 10881 Perl_croak(aTHX_ "Substitution pattern not terminated"); 10882 10883 s = t; 10884 10885 first_start = PL_multi_start; 10886 first_line = CopLINE(PL_curcop); 10887 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 10888 if (!s) { 10889 SvREFCNT_dec_NN(PL_lex_stuff); 10890 PL_lex_stuff = NULL; 10891 Perl_croak(aTHX_ "Substitution replacement not terminated"); 10892 } 10893 PL_multi_start = first_start; /* so whole substitution is taken together */ 10894 10895 pm = (PMOP*)newPMOP(OP_SUBST, 0); 10896 10897 10898 while (*s) { 10899 if (*s == EXEC_PAT_MOD) { 10900 s++; 10901 es++; 10902 } 10903 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), 10904 &s, &charset, &x_mod_count)) 10905 { 10906 break; 10907 } 10908 } 10909 10910 if ((pm->op_pmflags & PMf_CONTINUE)) { 10911 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); 10912 } 10913 10914 if (es) { 10915 SV * const repl = newSVpvs(""); 10916 10917 PL_multi_end = 0; 10918 pm->op_pmflags |= PMf_EVAL; 10919 for (; es > 1; es--) { 10920 sv_catpvs(repl, "eval "); 10921 } 10922 sv_catpvs(repl, "do {"); 10923 sv_catsv(repl, PL_parser->lex_sub_repl); 10924 sv_catpvs(repl, "}"); 10925 SvREFCNT_dec(PL_parser->lex_sub_repl); 10926 PL_parser->lex_sub_repl = repl; 10927 } 10928 10929 10930 linediff = CopLINE(PL_curcop) - first_line; 10931 if (linediff) 10932 CopLINE_set(PL_curcop, first_line); 10933 10934 if (linediff || es) { 10935 /* the IVX field indicates that the replacement string is a s///e; 10936 * the NVX field indicates how many src code lines the replacement 10937 * spreads over */ 10938 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV); 10939 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff; 10940 ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen = 10941 cBOOL(es); 10942 } 10943 10944 PL_lex_op = (OP*)pm; 10945 pl_yylval.ival = OP_SUBST; 10946 return s; 10947 } 10948 10949 STATIC char * 10950 S_scan_trans(pTHX_ char *start) 10951 { 10952 char* s; 10953 OP *o; 10954 U8 squash; 10955 U8 del; 10956 U8 complement; 10957 bool nondestruct = 0; 10958 char *t; 10959 10960 PERL_ARGS_ASSERT_SCAN_TRANS; 10961 10962 pl_yylval.ival = OP_NULL; 10963 10964 s = scan_str(start,FALSE,FALSE,FALSE,&t); 10965 if (!s) 10966 Perl_croak(aTHX_ "Transliteration pattern not terminated"); 10967 10968 s = t; 10969 10970 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 10971 if (!s) { 10972 SvREFCNT_dec_NN(PL_lex_stuff); 10973 PL_lex_stuff = NULL; 10974 Perl_croak(aTHX_ "Transliteration replacement not terminated"); 10975 } 10976 10977 complement = del = squash = 0; 10978 while (1) { 10979 switch (*s) { 10980 case 'c': 10981 complement = OPpTRANS_COMPLEMENT; 10982 break; 10983 case 'd': 10984 del = OPpTRANS_DELETE; 10985 break; 10986 case 's': 10987 squash = OPpTRANS_SQUASH; 10988 break; 10989 case 'r': 10990 nondestruct = 1; 10991 break; 10992 default: 10993 goto no_more; 10994 } 10995 s++; 10996 } 10997 no_more: 10998 10999 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL); 11000 o->op_private &= ~OPpTRANS_ALL; 11001 o->op_private |= del|squash|complement; 11002 11003 PL_lex_op = o; 11004 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS; 11005 11006 11007 return s; 11008 } 11009 11010 /* scan_heredoc 11011 Takes a pointer to the first < in <<FOO. 11012 Returns a pointer to the byte following <<FOO. 11013 11014 This function scans a heredoc, which involves different methods 11015 depending on whether we are in a string eval, quoted construct, etc. 11016 This is because PL_linestr could containing a single line of input, or 11017 a whole string being evalled, or the contents of the current quote- 11018 like operator. 11019 11020 The two basic methods are: 11021 - Steal lines from the input stream 11022 - Scan the heredoc in PL_linestr and remove it therefrom 11023 11024 In a file scope or filtered eval, the first method is used; in a 11025 string eval, the second. 11026 11027 In a quote-like operator, we have to choose between the two, 11028 depending on where we can find a newline. We peek into outer lex- 11029 ing scopes until we find one with a newline in it. If we reach the 11030 outermost lexing scope and it is a file, we use the stream method. 11031 Otherwise it is treated as an eval. 11032 */ 11033 11034 STATIC char * 11035 S_scan_heredoc(pTHX_ char *s) 11036 { 11037 I32 op_type = OP_SCALAR; 11038 I32 len; 11039 SV *tmpstr; 11040 char term; 11041 char *d; 11042 char *e; 11043 char *peek; 11044 char *indent = 0; 11045 I32 indent_len = 0; 11046 bool indented = FALSE; 11047 const bool infile = PL_rsfp || PL_parser->filtered; 11048 const line_t origline = CopLINE(PL_curcop); 11049 LEXSHARED *shared = PL_parser->lex_shared; 11050 11051 PERL_ARGS_ASSERT_SCAN_HEREDOC; 11052 11053 s += 2; 11054 d = PL_tokenbuf + 1; 11055 e = PL_tokenbuf + sizeof PL_tokenbuf - 1; 11056 *PL_tokenbuf = '\n'; 11057 peek = s; 11058 11059 if (*peek == '~') { 11060 indented = TRUE; 11061 peek++; s++; 11062 } 11063 11064 while (SPACE_OR_TAB(*peek)) 11065 peek++; 11066 11067 if (*peek == '`' || *peek == '\'' || *peek =='"') { 11068 s = peek; 11069 term = *s++; 11070 s = delimcpy(d, e, s, PL_bufend, term, &len); 11071 if (s == PL_bufend) 11072 Perl_croak(aTHX_ "Unterminated delimiter for here document"); 11073 d += len; 11074 s++; 11075 } 11076 else { 11077 if (*s == '\\') 11078 /* <<\FOO is equivalent to <<'FOO' */ 11079 s++, term = '\''; 11080 else 11081 term = '"'; 11082 11083 if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)) 11084 Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden"); 11085 11086 peek = s; 11087 11088 while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) { 11089 peek += UTF ? UTF8SKIP(peek) : 1; 11090 } 11091 11092 len = (peek - s >= e - d) ? (e - d) : (peek - s); 11093 Copy(s, d, len, char); 11094 s += len; 11095 d += len; 11096 } 11097 11098 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1) 11099 Perl_croak(aTHX_ "Delimiter for here document is too long"); 11100 11101 *d++ = '\n'; 11102 *d = '\0'; 11103 len = d - PL_tokenbuf; 11104 11105 #ifndef PERL_STRICT_CR 11106 d = (char *) memchr(s, '\r', PL_bufend - s); 11107 if (d) { 11108 char * const olds = s; 11109 s = d; 11110 while (s < PL_bufend) { 11111 if (*s == '\r') { 11112 *d++ = '\n'; 11113 if (*++s == '\n') 11114 s++; 11115 } 11116 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */ 11117 *d++ = *s++; 11118 s++; 11119 } 11120 else 11121 *d++ = *s++; 11122 } 11123 *d = '\0'; 11124 PL_bufend = d; 11125 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 11126 s = olds; 11127 } 11128 #endif 11129 11130 tmpstr = newSV_type(SVt_PVIV); 11131 if (term == '\'') { 11132 op_type = OP_CONST; 11133 SvIV_set(tmpstr, -1); 11134 } 11135 else if (term == '`') { 11136 op_type = OP_BACKTICK; 11137 SvIV_set(tmpstr, '\\'); 11138 } 11139 11140 PL_multi_start = origline + 1 + PL_parser->herelines; 11141 PL_multi_open = PL_multi_close = '<'; 11142 11143 /* inside a string eval or quote-like operator */ 11144 if (!infile || PL_lex_inwhat) { 11145 SV *linestr; 11146 char *bufend; 11147 char * const olds = s; 11148 PERL_CONTEXT * const cx = CX_CUR(); 11149 /* These two fields are not set until an inner lexing scope is 11150 entered. But we need them set here. */ 11151 shared->ls_bufptr = s; 11152 shared->ls_linestr = PL_linestr; 11153 11154 if (PL_lex_inwhat) { 11155 /* Look for a newline. If the current buffer does not have one, 11156 peek into the line buffer of the parent lexing scope, going 11157 up as many levels as necessary to find one with a newline 11158 after bufptr. 11159 */ 11160 while (!(s = (char *)memchr( 11161 (void *)shared->ls_bufptr, '\n', 11162 SvEND(shared->ls_linestr)-shared->ls_bufptr 11163 ))) 11164 { 11165 shared = shared->ls_prev; 11166 /* shared is only null if we have gone beyond the outermost 11167 lexing scope. In a file, we will have broken out of the 11168 loop in the previous iteration. In an eval, the string buf- 11169 fer ends with "\n;", so the while condition above will have 11170 evaluated to false. So shared can never be null. Or so you 11171 might think. Odd syntax errors like s;@{<<; can gobble up 11172 the implicit semicolon at the end of a flie, causing the 11173 file handle to be closed even when we are not in a string 11174 eval. So shared may be null in that case. 11175 (Closing '>>}' here to balance the earlier open brace for 11176 editors that look for matched pairs.) */ 11177 if (UNLIKELY(!shared)) 11178 goto interminable; 11179 /* A LEXSHARED struct with a null ls_prev pointer is the outer- 11180 most lexing scope. In a file, shared->ls_linestr at that 11181 level is just one line, so there is no body to steal. */ 11182 if (infile && !shared->ls_prev) { 11183 s = olds; 11184 goto streaming; 11185 } 11186 } 11187 } 11188 else { /* eval or we've already hit EOF */ 11189 s = (char*)memchr((void*)s, '\n', PL_bufend - s); 11190 if (!s) 11191 goto interminable; 11192 } 11193 11194 linestr = shared->ls_linestr; 11195 bufend = SvEND(linestr); 11196 d = s; 11197 if (indented) { 11198 char *myolds = s; 11199 11200 while (s < bufend - len + 1) { 11201 if (*s++ == '\n') 11202 ++PL_parser->herelines; 11203 11204 if (memEQ(s, PL_tokenbuf + 1, len - 1)) { 11205 char *backup = s; 11206 indent_len = 0; 11207 11208 /* Only valid if it's preceded by whitespace only */ 11209 while (backup != myolds && --backup >= myolds) { 11210 if (! SPACE_OR_TAB(*backup)) { 11211 break; 11212 } 11213 indent_len++; 11214 } 11215 11216 /* No whitespace or all! */ 11217 if (backup == s || *backup == '\n') { 11218 Newx(indent, indent_len + 1, char); 11219 memcpy(indent, backup + 1, indent_len); 11220 indent[indent_len] = 0; 11221 s--; /* before our delimiter */ 11222 PL_parser->herelines--; /* this line doesn't count */ 11223 break; 11224 } 11225 } 11226 } 11227 } 11228 else { 11229 while (s < bufend - len + 1 11230 && memNE(s,PL_tokenbuf,len) ) 11231 { 11232 if (*s++ == '\n') 11233 ++PL_parser->herelines; 11234 } 11235 } 11236 11237 if (s >= bufend - len + 1) { 11238 goto interminable; 11239 } 11240 11241 sv_setpvn_fresh(tmpstr,d+1,s-d); 11242 s += len - 1; 11243 /* the preceding stmt passes a newline */ 11244 PL_parser->herelines++; 11245 11246 /* s now points to the newline after the heredoc terminator. 11247 d points to the newline before the body of the heredoc. 11248 */ 11249 11250 /* We are going to modify linestr in place here, so set 11251 aside copies of the string if necessary for re-evals or 11252 (caller $n)[6]. */ 11253 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we 11254 check shared->re_eval_str. */ 11255 if (shared->re_eval_start || shared->re_eval_str) { 11256 /* Set aside the rest of the regexp */ 11257 if (!shared->re_eval_str) 11258 shared->re_eval_str = 11259 newSVpvn(shared->re_eval_start, 11260 bufend - shared->re_eval_start); 11261 shared->re_eval_start -= s-d; 11262 } 11263 11264 if (cxstack_ix >= 0 11265 && CxTYPE(cx) == CXt_EVAL 11266 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL 11267 && cx->blk_eval.cur_text == linestr) 11268 { 11269 cx->blk_eval.cur_text = newSVsv(linestr); 11270 cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */ 11271 } 11272 11273 /* Copy everything from s onwards back to d. */ 11274 Move(s,d,bufend-s + 1,char); 11275 SvCUR_set(linestr, SvCUR(linestr) - (s-d)); 11276 /* Setting PL_bufend only applies when we have not dug deeper 11277 into other scopes, because sublex_done sets PL_bufend to 11278 SvEND(PL_linestr). */ 11279 if (shared == PL_parser->lex_shared) 11280 PL_bufend = SvEND(linestr); 11281 s = olds; 11282 } 11283 else { 11284 SV *linestr_save; 11285 char *oldbufptr_save; 11286 char *oldoldbufptr_save; 11287 streaming: 11288 sv_grow_fresh(tmpstr, 80); 11289 SvPVCLEAR_FRESH(tmpstr); /* avoid "uninitialized" warning */ 11290 term = PL_tokenbuf[1]; 11291 len--; 11292 linestr_save = PL_linestr; /* must restore this afterwards */ 11293 d = s; /* and this */ 11294 oldbufptr_save = PL_oldbufptr; 11295 oldoldbufptr_save = PL_oldoldbufptr; 11296 PL_linestr = newSVpvs(""); 11297 PL_bufend = SvPVX(PL_linestr); 11298 11299 while (1) { 11300 PL_bufptr = PL_bufend; 11301 CopLINE_set(PL_curcop, 11302 origline + 1 + PL_parser->herelines); 11303 11304 if ( !lex_next_chunk(LEX_NO_TERM) 11305 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) 11306 { 11307 /* Simply freeing linestr_save might seem simpler here, as it 11308 does not matter what PL_linestr points to, since we are 11309 about to croak; but in a quote-like op, linestr_save 11310 will have been prospectively freed already, via 11311 SAVEFREESV(PL_linestr) in sublex_push, so it's easier to 11312 restore PL_linestr. */ 11313 SvREFCNT_dec_NN(PL_linestr); 11314 PL_linestr = linestr_save; 11315 PL_oldbufptr = oldbufptr_save; 11316 PL_oldoldbufptr = oldoldbufptr_save; 11317 goto interminable; 11318 } 11319 11320 CopLINE_set(PL_curcop, origline); 11321 11322 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') { 11323 s = lex_grow_linestr(SvLEN(PL_linestr) + 3); 11324 /* ^That should be enough to avoid this needing to grow: */ 11325 sv_catpvs(PL_linestr, "\n\0"); 11326 assert(s == SvPVX(PL_linestr)); 11327 PL_bufend = SvEND(PL_linestr); 11328 } 11329 11330 s = PL_bufptr; 11331 PL_parser->herelines++; 11332 PL_last_lop = PL_last_uni = NULL; 11333 11334 #ifndef PERL_STRICT_CR 11335 if (PL_bufend - PL_linestart >= 2) { 11336 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') 11337 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r')) 11338 { 11339 PL_bufend[-2] = '\n'; 11340 PL_bufend--; 11341 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 11342 } 11343 else if (PL_bufend[-1] == '\r') 11344 PL_bufend[-1] = '\n'; 11345 } 11346 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') 11347 PL_bufend[-1] = '\n'; 11348 #endif 11349 11350 if (indented && (PL_bufend-s) >= len) { 11351 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len)); 11352 11353 if (found) { 11354 char *backup = found; 11355 indent_len = 0; 11356 11357 /* Only valid if it's preceded by whitespace only */ 11358 while (backup != s && --backup >= s) { 11359 if (! SPACE_OR_TAB(*backup)) { 11360 break; 11361 } 11362 indent_len++; 11363 } 11364 11365 /* All whitespace or none! */ 11366 if (backup == found || SPACE_OR_TAB(*backup)) { 11367 Newx(indent, indent_len + 1, char); 11368 memcpy(indent, backup, indent_len); 11369 indent[indent_len] = 0; 11370 SvREFCNT_dec(PL_linestr); 11371 PL_linestr = linestr_save; 11372 PL_linestart = SvPVX(linestr_save); 11373 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 11374 PL_oldbufptr = oldbufptr_save; 11375 PL_oldoldbufptr = oldoldbufptr_save; 11376 s = d; 11377 break; 11378 } 11379 } 11380 11381 /* Didn't find it */ 11382 sv_catsv(tmpstr,PL_linestr); 11383 } 11384 else { 11385 if (*s == term && PL_bufend-s >= len 11386 && memEQ(s,PL_tokenbuf + 1,len)) 11387 { 11388 SvREFCNT_dec(PL_linestr); 11389 PL_linestr = linestr_save; 11390 PL_linestart = SvPVX(linestr_save); 11391 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 11392 PL_oldbufptr = oldbufptr_save; 11393 PL_oldoldbufptr = oldoldbufptr_save; 11394 s = d; 11395 break; 11396 } 11397 else { 11398 sv_catsv(tmpstr,PL_linestr); 11399 } 11400 } 11401 } /* while (1) */ 11402 } 11403 11404 PL_multi_end = origline + PL_parser->herelines; 11405 11406 if (indented && indent) { 11407 STRLEN linecount = 1; 11408 STRLEN herelen = SvCUR(tmpstr); 11409 char *ss = SvPVX(tmpstr); 11410 char *se = ss + herelen; 11411 SV *newstr = newSV(herelen+1); 11412 SvPOK_on(newstr); 11413 11414 /* Trim leading whitespace */ 11415 while (ss < se) { 11416 /* newline only? Copy and move on */ 11417 if (*ss == '\n') { 11418 sv_catpvs(newstr,"\n"); 11419 ss++; 11420 linecount++; 11421 11422 /* Found our indentation? Strip it */ 11423 } 11424 else if (se - ss >= indent_len 11425 && memEQ(ss, indent, indent_len)) 11426 { 11427 STRLEN le = 0; 11428 ss += indent_len; 11429 11430 while ((ss + le) < se && *(ss + le) != '\n') 11431 le++; 11432 11433 sv_catpvn(newstr, ss, le); 11434 ss += le; 11435 11436 /* Line doesn't begin with our indentation? Croak */ 11437 } 11438 else { 11439 Safefree(indent); 11440 Perl_croak(aTHX_ 11441 "Indentation on line %d of here-doc doesn't match delimiter", 11442 (int)linecount 11443 ); 11444 } 11445 } /* while */ 11446 11447 /* avoid sv_setsv() as we don't want to COW here */ 11448 sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr)); 11449 Safefree(indent); 11450 SvREFCNT_dec_NN(newstr); 11451 } 11452 11453 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) { 11454 SvPV_shrink_to_cur(tmpstr); 11455 } 11456 11457 if (!IN_BYTES) { 11458 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) 11459 SvUTF8_on(tmpstr); 11460 } 11461 11462 PL_lex_stuff = tmpstr; 11463 pl_yylval.ival = op_type; 11464 return s; 11465 11466 interminable: 11467 if (indent) 11468 Safefree(indent); 11469 SvREFCNT_dec(tmpstr); 11470 CopLINE_set(PL_curcop, origline); 11471 missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1); 11472 } 11473 11474 11475 /* scan_inputsymbol 11476 takes: position of first '<' in input buffer 11477 returns: position of first char following the matching '>' in 11478 input buffer 11479 side-effects: pl_yylval and lex_op are set. 11480 11481 This code handles: 11482 11483 <> read from ARGV 11484 <<>> read from ARGV without magic open 11485 <FH> read from filehandle 11486 <pkg::FH> read from package qualified filehandle 11487 <pkg'FH> read from package qualified filehandle 11488 <$fh> read from filehandle in $fh 11489 <*.h> filename glob 11490 11491 */ 11492 11493 STATIC char * 11494 S_scan_inputsymbol(pTHX_ char *start) 11495 { 11496 char *s = start; /* current position in buffer */ 11497 char *end; 11498 I32 len; 11499 bool nomagicopen = FALSE; 11500 char *d = PL_tokenbuf; /* start of temp holding space */ 11501 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */ 11502 11503 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL; 11504 11505 end = (char *) memchr(s, '\n', PL_bufend - s); 11506 if (!end) 11507 end = PL_bufend; 11508 if (s[1] == '<' && s[2] == '>' && s[3] == '>') { 11509 nomagicopen = TRUE; 11510 *d = '\0'; 11511 len = 0; 11512 s += 3; 11513 } 11514 else 11515 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */ 11516 11517 /* die if we didn't have space for the contents of the <>, 11518 or if it didn't end, or if we see a newline 11519 */ 11520 11521 if (len >= (I32)sizeof PL_tokenbuf) 11522 Perl_croak(aTHX_ "Excessively long <> operator"); 11523 if (s >= end) 11524 Perl_croak(aTHX_ "Unterminated <> operator"); 11525 11526 s++; 11527 11528 /* check for <$fh> 11529 Remember, only scalar variables are interpreted as filehandles by 11530 this code. Anything more complex (e.g., <$fh{$num}>) will be 11531 treated as a glob() call. 11532 This code makes use of the fact that except for the $ at the front, 11533 a scalar variable and a filehandle look the same. 11534 */ 11535 if (*d == '$' && d[1]) d++; 11536 11537 /* allow <Pkg'VALUE> or <Pkg::VALUE> */ 11538 while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') { 11539 d += UTF ? UTF8SKIP(d) : 1; 11540 } 11541 11542 /* If we've tried to read what we allow filehandles to look like, and 11543 there's still text left, then it must be a glob() and not a getline. 11544 Use scan_str to pull out the stuff between the <> and treat it 11545 as nothing more than a string. 11546 */ 11547 11548 if (d - PL_tokenbuf != len) { 11549 pl_yylval.ival = OP_GLOB; 11550 s = scan_str(start,FALSE,FALSE,FALSE,NULL); 11551 if (!s) 11552 Perl_croak(aTHX_ "Glob not terminated"); 11553 return s; 11554 } 11555 else { 11556 bool readline_overridden = FALSE; 11557 GV *gv_readline; 11558 /* we're in a filehandle read situation */ 11559 d = PL_tokenbuf; 11560 11561 /* turn <> into <ARGV> */ 11562 if (!len) 11563 Copy("ARGV",d,5,char); 11564 11565 /* Check whether readline() is overridden */ 11566 if ((gv_readline = gv_override("readline",8))) 11567 readline_overridden = TRUE; 11568 11569 /* if <$fh>, create the ops to turn the variable into a 11570 filehandle 11571 */ 11572 if (*d == '$') { 11573 /* try to find it in the pad for this block, otherwise find 11574 add symbol table ops 11575 */ 11576 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0); 11577 if (tmp != NOT_IN_PAD) { 11578 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 11579 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 11580 HEK * const stashname = HvNAME_HEK(stash); 11581 SV * const sym = newSVhek_mortal(stashname); 11582 sv_catpvs(sym, "::"); 11583 sv_catpv(sym, d+1); 11584 d = SvPVX(sym); 11585 goto intro_sym; 11586 } 11587 else { 11588 OP * const o = newPADxVOP(OP_PADSV, 0, tmp); 11589 PL_lex_op = readline_overridden 11590 ? newUNOP(OP_ENTERSUB, OPf_STACKED, 11591 op_append_elem(OP_LIST, o, 11592 newCVREF(0, newGVOP(OP_GV,0,gv_readline)))) 11593 : newUNOP(OP_READLINE, 0, o); 11594 } 11595 } 11596 else { 11597 GV *gv; 11598 ++d; 11599 intro_sym: 11600 gv = gv_fetchpv(d, 11601 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ), 11602 SVt_PV); 11603 PL_lex_op = readline_overridden 11604 ? newUNOP(OP_ENTERSUB, OPf_STACKED, 11605 op_append_elem(OP_LIST, 11606 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)), 11607 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 11608 : newUNOP(OP_READLINE, 0, 11609 newUNOP(OP_RV2SV, 0, 11610 newGVOP(OP_GV, 0, gv))); 11611 } 11612 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */ 11613 pl_yylval.ival = OP_NULL; 11614 } 11615 11616 /* If it's none of the above, it must be a literal filehandle 11617 (<Foo::BAR> or <FOO>) so build a simple readline OP */ 11618 else { 11619 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO); 11620 PL_lex_op = readline_overridden 11621 ? newUNOP(OP_ENTERSUB, OPf_STACKED, 11622 op_append_elem(OP_LIST, 11623 newGVOP(OP_GV, 0, gv), 11624 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 11625 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv)); 11626 pl_yylval.ival = OP_NULL; 11627 11628 /* leave the token generation above to avoid confusing the parser */ 11629 if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) { 11630 no_bareword_filehandle(d); 11631 } 11632 } 11633 } 11634 11635 return s; 11636 } 11637 11638 11639 /* scan_str 11640 takes: 11641 start position in buffer 11642 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but 11643 only if they are of the open/close form 11644 keep_delims preserve the delimiters around the string 11645 re_reparse compiling a run-time /(?{})/: 11646 collapse // to /, and skip encoding src 11647 delimp if non-null, this is set to the position of 11648 the closing delimiter, or just after it if 11649 the closing and opening delimiters differ 11650 (i.e., the opening delimiter of a substitu- 11651 tion replacement) 11652 returns: position to continue reading from buffer 11653 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and 11654 updates the read buffer. 11655 11656 This subroutine pulls a string out of the input. It is called for: 11657 q single quotes q(literal text) 11658 ' single quotes 'literal text' 11659 qq double quotes qq(interpolate $here please) 11660 " double quotes "interpolate $here please" 11661 qx backticks qx(/bin/ls -l) 11662 ` backticks `/bin/ls -l` 11663 qw quote words @EXPORT_OK = qw( func() $spam ) 11664 m// regexp match m/this/ 11665 s/// regexp substitute s/this/that/ 11666 tr/// string transliterate tr/this/that/ 11667 y/// string transliterate y/this/that/ 11668 ($*@) sub prototypes sub foo ($) 11669 (stuff) sub attr parameters sub foo : attr(stuff) 11670 <> readline or globs <FOO>, <>, <$fh>, or <*.c> 11671 11672 In most of these cases (all but <>, patterns and transliterate) 11673 yylex() calls scan_str(). m// makes yylex() call scan_pat() which 11674 calls scan_str(). s/// makes yylex() call scan_subst() which calls 11675 scan_str(). tr/// and y/// make yylex() call scan_trans() which 11676 calls scan_str(). 11677 11678 It skips whitespace before the string starts, and treats the first 11679 character as the delimiter. If the delimiter is one of ([{< then 11680 the corresponding "close" character )]}> is used as the closing 11681 delimiter. It allows quoting of delimiters, and if the string has 11682 balanced delimiters ([{<>}]) it allows nesting. 11683 11684 On success, the SV with the resulting string is put into lex_stuff or, 11685 if that is already non-NULL, into lex_repl. The second case occurs only 11686 when parsing the RHS of the special constructs s/// and tr/// (y///). 11687 For convenience, the terminating delimiter character is stuffed into 11688 SvIVX of the SV. 11689 */ 11690 11691 char * 11692 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse, 11693 char **delimp 11694 ) 11695 { 11696 SV *sv; /* scalar value: string */ 11697 char *s = start; /* current position in the buffer */ 11698 char *to; /* current position in the sv's data */ 11699 int brackets = 1; /* bracket nesting level */ 11700 bool d_is_utf8 = FALSE; /* is there any utf8 content? */ 11701 UV open_delim_code; /* code point */ 11702 char open_delim_str[UTF8_MAXBYTES+1]; 11703 STRLEN delim_byte_len; /* each delimiter currently is the same number 11704 of bytes */ 11705 line_t herelines; 11706 11707 /* The only non-UTF character that isn't a stand alone grapheme is 11708 * white-space, hence can't be a delimiter. */ 11709 const char * non_grapheme_msg = "Use of unassigned code point or" 11710 " non-standalone grapheme for a delimiter" 11711 " is not allowed"; 11712 PERL_ARGS_ASSERT_SCAN_STR; 11713 11714 /* skip space before the delimiter */ 11715 if (isSPACE(*s)) { /* skipspace can change the buffer 's' is in, so 11716 'start' also has to change */ 11717 s = start = skipspace(s); 11718 } 11719 11720 /* mark where we are, in case we need to report errors */ 11721 CLINE; 11722 11723 /* after skipping whitespace, the next character is the delimiter */ 11724 if (! UTF || UTF8_IS_INVARIANT(*s)) { 11725 open_delim_code = (U8) *s; 11726 open_delim_str[0] = *s; 11727 delim_byte_len = 1; 11728 } 11729 else { 11730 open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, 11731 &delim_byte_len); 11732 if (UNLIKELY(! is_grapheme((U8 *) start, 11733 (U8 *) s, 11734 (U8 *) PL_bufend, 11735 open_delim_code))) 11736 { 11737 yyerror(non_grapheme_msg); 11738 } 11739 11740 Copy(s, open_delim_str, delim_byte_len, char); 11741 } 11742 open_delim_str[delim_byte_len] = '\0'; /* Only for safety */ 11743 11744 11745 /* mark where we are */ 11746 PL_multi_start = CopLINE(PL_curcop); 11747 PL_multi_open = open_delim_code; 11748 herelines = PL_parser->herelines; 11749 11750 const char * legal_paired_opening_delims; 11751 const char * legal_paired_closing_delims; 11752 const char * deprecated_opening_delims; 11753 if (FEATURE_MORE_DELIMS_IS_ENABLED) { 11754 if (UTF) { 11755 legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS; 11756 legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS; 11757 11758 /* We are deprecating using a closing delimiter as the opening, in 11759 * case we want in the future to accept them reversed. The string 11760 * may include ones that are legal, but the code below won't look 11761 * at this string unless it didn't find a legal opening one */ 11762 deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS; 11763 } 11764 else { 11765 legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS; 11766 legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS; 11767 deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS; 11768 } 11769 } 11770 else { 11771 legal_paired_opening_delims = "([{<"; 11772 legal_paired_closing_delims = ")]}>"; 11773 deprecated_opening_delims = (UTF) 11774 ? DEPRECATED_OPENING_UTF8_BRACKETS 11775 : DEPRECATED_OPENING_NON_UTF8_BRACKETS; 11776 } 11777 11778 const char * legal_paired_opening_delims_end = legal_paired_opening_delims 11779 + strlen(legal_paired_opening_delims); 11780 const char * deprecated_delims_end = deprecated_opening_delims 11781 + strlen(deprecated_opening_delims); 11782 11783 const char * close_delim_str = open_delim_str; 11784 UV close_delim_code = open_delim_code; 11785 11786 /* If the delimiter has a mirror-image closing one, get it */ 11787 const char *tmps = ninstr(legal_paired_opening_delims, 11788 legal_paired_opening_delims_end, 11789 open_delim_str, open_delim_str + delim_byte_len); 11790 if (tmps) { 11791 /* Here, there is a paired delimiter, and tmps points to its position 11792 in the string of the accepted opening paired delimiters. The 11793 corresponding position in the string of closing ones is the 11794 beginning of the paired mate. Both contain the same number of 11795 bytes. */ 11796 close_delim_str = legal_paired_closing_delims 11797 + (tmps - legal_paired_opening_delims); 11798 11799 /* The list of paired delimiters contains all the ASCII ones that have 11800 * always been legal, and no other ASCIIs. Don't raise a message if 11801 * using one of these */ 11802 if (! isASCII(open_delim_code)) { 11803 Perl_ck_warner_d(aTHX_ 11804 packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS), 11805 "Use of '%" UTF8f "' is experimental as a string delimiter", 11806 UTF8fARG(UTF, delim_byte_len, open_delim_str)); 11807 } 11808 11809 close_delim_code = (UTF) 11810 ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL) 11811 : * (U8 *) close_delim_str; 11812 } 11813 else { /* Here, the delimiter isn't paired, hence the close is the same as 11814 the open; and has already been set up. But make sure it isn't 11815 deprecated to use this particular delimiter, as we plan 11816 eventually to make it paired. */ 11817 if (ninstr(deprecated_opening_delims, deprecated_delims_end, 11818 open_delim_str, open_delim_str + delim_byte_len)) 11819 { 11820 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__DELIMITER_WILL_BE_PAIRED), 11821 "Use of '%" UTF8f "' is deprecated as a string delimiter", 11822 UTF8fARG(UTF, delim_byte_len, open_delim_str)); 11823 } 11824 11825 /* Note that a NUL may be used as a delimiter, and this happens when 11826 * delimiting an empty string, and no special handling for it is 11827 * needed, as ninstr() calls are used */ 11828 } 11829 11830 PL_multi_close = close_delim_code; 11831 11832 if (PL_multi_open == PL_multi_close) { 11833 keep_bracketed_quoted = FALSE; 11834 } 11835 11836 /* create a new SV to hold the contents. 79 is the SV's initial length. 11837 What a random number. */ 11838 sv = newSV_type(SVt_PVIV); 11839 sv_grow_fresh(sv, 79); 11840 SvIV_set(sv, close_delim_code); 11841 (void)SvPOK_only(sv); /* validate pointer */ 11842 11843 /* move past delimiter and try to read a complete string */ 11844 if (keep_delims) 11845 sv_catpvn(sv, s, delim_byte_len); 11846 s += delim_byte_len; 11847 for (;;) { 11848 /* extend sv if need be */ 11849 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); 11850 /* set 'to' to the next character in the sv's string */ 11851 to = SvPVX(sv)+SvCUR(sv); 11852 11853 /* read until we run out of string, or we find the closing delimiter */ 11854 while (s < PL_bufend) { 11855 /* embedded newlines increment the line count */ 11856 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 11857 COPLINE_INC_WITH_HERELINES; 11858 11859 /* backslashes can escape the closing delimiter */ 11860 if ( *s == '\\' && s < PL_bufend - delim_byte_len 11861 11862 /* ... but not if the delimiter itself is a backslash */ 11863 && close_delim_code != '\\') 11864 { 11865 /* Here, we have an escaping backslash. If we're supposed to 11866 * discard those that escape the closing delimiter, just 11867 * discard this one */ 11868 if ( ! keep_bracketed_quoted 11869 && ( memEQ(s + 1, open_delim_str, delim_byte_len) 11870 || ( PL_multi_open == PL_multi_close 11871 && re_reparse && s[1] == '\\') 11872 || memEQ(s + 1, close_delim_str, delim_byte_len))) 11873 { 11874 s++; 11875 } 11876 else /* any other escapes are simply copied straight through */ 11877 *to++ = *s++; 11878 } 11879 else if ( s < PL_bufend - (delim_byte_len - 1) 11880 && memEQ(s, close_delim_str, delim_byte_len) 11881 && --brackets <= 0) 11882 { 11883 /* Found unescaped closing delimiter, unnested if we care about 11884 * that; so are done. 11885 * 11886 * In the case of the opening and closing delimiters being 11887 * different, we have to deal with nesting; the conditional 11888 * above makes sure we don't get here until the nesting level, 11889 * 'brackets', is back down to zero. In the other case, 11890 * nesting isn't an issue, and 'brackets' never can get 11891 * incremented above 0, so will come here at the first closing 11892 * delimiter. 11893 * 11894 * Only grapheme delimiters are legal. */ 11895 if ( UTF /* All Non-UTF-8's are graphemes */ 11896 && UNLIKELY(! is_grapheme((U8 *) start, 11897 (U8 *) s, 11898 (U8 *) PL_bufend, 11899 close_delim_code))) 11900 { 11901 yyerror(non_grapheme_msg); 11902 } 11903 11904 break; 11905 } 11906 /* No nesting if open eq close */ 11907 else if ( PL_multi_open != PL_multi_close 11908 && s < PL_bufend - (delim_byte_len - 1) 11909 && memEQ(s, open_delim_str, delim_byte_len)) 11910 { 11911 brackets++; 11912 } 11913 11914 /* Here, still in the middle of the string; copy this character */ 11915 if (! UTF || UTF8_IS_INVARIANT((U8) *s)) { 11916 *to++ = *s++; 11917 } 11918 else { 11919 size_t this_char_len = UTF8SKIP(s); 11920 Copy(s, to, this_char_len, char); 11921 s += this_char_len; 11922 to += this_char_len; 11923 11924 d_is_utf8 = TRUE; 11925 } 11926 } /* End of loop through buffer */ 11927 11928 /* Here, found end of the string, OR ran out of buffer: terminate the 11929 * copied string and update the sv's end-of-string */ 11930 *to = '\0'; 11931 SvCUR_set(sv, to - SvPVX_const(sv)); 11932 11933 /* 11934 * this next chunk reads more into the buffer if we're not done yet 11935 */ 11936 11937 if (s < PL_bufend) 11938 break; /* handle case where we are done yet :-) */ 11939 11940 #ifndef PERL_STRICT_CR 11941 if (to - SvPVX_const(sv) >= 2) { 11942 if ( (to[-2] == '\r' && to[-1] == '\n') 11943 || (to[-2] == '\n' && to[-1] == '\r')) 11944 { 11945 to[-2] = '\n'; 11946 to--; 11947 SvCUR_set(sv, to - SvPVX_const(sv)); 11948 } 11949 else if (to[-1] == '\r') 11950 to[-1] = '\n'; 11951 } 11952 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r') 11953 to[-1] = '\n'; 11954 #endif 11955 11956 /* if we're out of file, or a read fails, bail and reset the current 11957 line marker so we can report where the unterminated string began 11958 */ 11959 COPLINE_INC_WITH_HERELINES; 11960 PL_bufptr = PL_bufend; 11961 if (!lex_next_chunk(0)) { 11962 ASSUME(sv); 11963 SvREFCNT_dec(sv); 11964 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 11965 return NULL; 11966 } 11967 s = start = PL_bufptr; 11968 } /* End of infinite loop */ 11969 11970 /* at this point, we have successfully read the delimited string */ 11971 11972 if (keep_delims) 11973 sv_catpvn(sv, s, delim_byte_len); 11974 s += delim_byte_len; 11975 11976 if (d_is_utf8) 11977 SvUTF8_on(sv); 11978 11979 PL_multi_end = CopLINE(PL_curcop); 11980 CopLINE_set(PL_curcop, PL_multi_start); 11981 PL_parser->herelines = herelines; 11982 11983 /* if we allocated too much space, give some back */ 11984 if (SvCUR(sv) + 5 < SvLEN(sv)) { 11985 SvLEN_set(sv, SvCUR(sv) + 1); 11986 SvPV_shrink_to_cur(sv); 11987 } 11988 11989 /* decide whether this is the first or second quoted string we've read 11990 for this op 11991 */ 11992 11993 if (PL_lex_stuff) 11994 PL_parser->lex_sub_repl = sv; 11995 else 11996 PL_lex_stuff = sv; 11997 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-delim_byte_len : s; 11998 return s; 11999 } 12000 12001 /* 12002 scan_num 12003 takes: pointer to position in buffer 12004 returns: pointer to new position in buffer 12005 side-effects: builds ops for the constant in pl_yylval.op 12006 12007 Read a number in any of the formats that Perl accepts: 12008 12009 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12. 12010 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34 12011 0b[01](_?[01])* binary integers 12012 0o?[0-7](_?[0-7])* octal integers 12013 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers 12014 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats 12015 12016 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the 12017 thing it reads. 12018 12019 If it reads a number without a decimal point or an exponent, it will 12020 try converting the number to an integer and see if it can do so 12021 without loss of precision. 12022 */ 12023 12024 char * 12025 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) 12026 { 12027 const char *s = start; /* current position in buffer */ 12028 char *d; /* destination in temp buffer */ 12029 char *e; /* end of temp buffer */ 12030 NV nv; /* number read, as a double */ 12031 SV *sv = NULL; /* place to put the converted number */ 12032 bool floatit; /* boolean: int or float? */ 12033 const char *lastub = NULL; /* position of last underbar */ 12034 static const char* const number_too_long = "Number too long"; 12035 bool warned_about_underscore = 0; 12036 I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */ 12037 #define WARN_ABOUT_UNDERSCORE() \ 12038 do { \ 12039 if (!warned_about_underscore) { \ 12040 warned_about_underscore = 1; \ 12041 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \ 12042 "Misplaced _ in number"); \ 12043 } \ 12044 } while(0) 12045 /* Hexadecimal floating point. 12046 * 12047 * In many places (where we have quads and NV is IEEE 754 double) 12048 * we can fit the mantissa bits of a NV into an unsigned quad. 12049 * (Note that UVs might not be quads even when we have quads.) 12050 * This will not work everywhere, though (either no quads, or 12051 * using long doubles), in which case we have to resort to NV, 12052 * which will probably mean horrible loss of precision due to 12053 * multiple fp operations. */ 12054 bool hexfp = FALSE; 12055 int total_bits = 0; 12056 int significant_bits = 0; 12057 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t) 12058 # define HEXFP_UQUAD 12059 Uquad_t hexfp_uquad = 0; 12060 int hexfp_frac_bits = 0; 12061 #else 12062 # define HEXFP_NV 12063 NV hexfp_nv = 0.0; 12064 #endif 12065 NV hexfp_mult = 1.0; 12066 UV high_non_zero = 0; /* highest digit */ 12067 int non_zero_integer_digits = 0; 12068 bool new_octal = FALSE; /* octal with "0o" prefix */ 12069 12070 PERL_ARGS_ASSERT_SCAN_NUM; 12071 12072 /* We use the first character to decide what type of number this is */ 12073 12074 switch (*s) { 12075 default: 12076 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s); 12077 12078 /* if it starts with a 0, it could be an octal number, a decimal in 12079 0.13 disguise, or a hexadecimal number, or a binary number. */ 12080 case '0': 12081 { 12082 /* variables: 12083 u holds the "number so far" 12084 overflowed was the number more than we can hold? 12085 12086 Shift is used when we add a digit. It also serves as an "are 12087 we in octal/hex/binary?" indicator to disallow hex characters 12088 when in octal mode. 12089 */ 12090 NV n = 0.0; 12091 UV u = 0; 12092 bool overflowed = FALSE; 12093 bool just_zero = TRUE; /* just plain 0 or binary number? */ 12094 bool has_digs = FALSE; 12095 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 }; 12096 static const char* const bases[5] = 12097 { "", "binary", "", "octal", "hexadecimal" }; 12098 static const char* const Bases[5] = 12099 { "", "Binary", "", "Octal", "Hexadecimal" }; 12100 static const char* const maxima[5] = 12101 { "", 12102 "0b11111111111111111111111111111111", 12103 "", 12104 "037777777777", 12105 "0xffffffff" }; 12106 12107 /* check for hex */ 12108 if (isALPHA_FOLD_EQ(s[1], 'x')) { 12109 shift = 4; 12110 s += 2; 12111 just_zero = FALSE; 12112 } else if (isALPHA_FOLD_EQ(s[1], 'b')) { 12113 shift = 1; 12114 s += 2; 12115 just_zero = FALSE; 12116 } 12117 /* check for a decimal in disguise */ 12118 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e')) 12119 goto decimal; 12120 /* so it must be octal */ 12121 else { 12122 shift = 3; 12123 s++; 12124 if (isALPHA_FOLD_EQ(*s, 'o')) { 12125 s++; 12126 just_zero = FALSE; 12127 new_octal = TRUE; 12128 } 12129 } 12130 12131 if (*s == '_') { 12132 WARN_ABOUT_UNDERSCORE(); 12133 lastub = s++; 12134 } 12135 12136 /* read the rest of the number */ 12137 for (;;) { 12138 /* x is used in the overflow test, 12139 b is the digit we're adding on. */ 12140 UV x, b; 12141 12142 switch (*s) { 12143 12144 /* if we don't mention it, we're done */ 12145 default: 12146 goto out; 12147 12148 /* _ are ignored -- but warned about if consecutive */ 12149 case '_': 12150 if (lastub && s == lastub + 1) 12151 WARN_ABOUT_UNDERSCORE(); 12152 lastub = s++; 12153 break; 12154 12155 /* 8 and 9 are not octal */ 12156 case '8': case '9': 12157 if (shift == 3) 12158 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s)); 12159 /* FALLTHROUGH */ 12160 12161 /* octal digits */ 12162 case '2': case '3': case '4': 12163 case '5': case '6': case '7': 12164 if (shift == 1) 12165 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s)); 12166 /* FALLTHROUGH */ 12167 12168 case '0': case '1': 12169 b = *s++ & 15; /* ASCII digit -> value of digit */ 12170 goto digit; 12171 12172 /* hex digits */ 12173 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 12174 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 12175 /* make sure they said 0x */ 12176 if (shift != 4) 12177 goto out; 12178 b = (*s++ & 7) + 9; 12179 12180 /* Prepare to put the digit we have onto the end 12181 of the number so far. We check for overflows. 12182 */ 12183 12184 digit: 12185 just_zero = FALSE; 12186 has_digs = TRUE; 12187 if (!overflowed) { 12188 assert(shift >= 0); 12189 x = u << shift; /* make room for the digit */ 12190 12191 total_bits += shift; 12192 12193 if ((x >> shift) != u 12194 && !(PL_hints & HINT_NEW_BINARY)) { 12195 overflowed = TRUE; 12196 n = (NV) u; 12197 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 12198 "Integer overflow in %s number", 12199 bases[shift]); 12200 } else 12201 u = x | b; /* add the digit to the end */ 12202 } 12203 if (overflowed) { 12204 n *= nvshift[shift]; 12205 /* If an NV has not enough bits in its 12206 * mantissa to represent an UV this summing of 12207 * small low-order numbers is a waste of time 12208 * (because the NV cannot preserve the 12209 * low-order bits anyway): we could just 12210 * remember when did we overflow and in the 12211 * end just multiply n by the right 12212 * amount. */ 12213 n += (NV) b; 12214 } 12215 12216 if (high_non_zero == 0 && b > 0) 12217 high_non_zero = b; 12218 12219 if (high_non_zero) 12220 non_zero_integer_digits++; 12221 12222 /* this could be hexfp, but peek ahead 12223 * to avoid matching ".." */ 12224 if (UNLIKELY(HEXFP_PEEK(s))) { 12225 goto out; 12226 } 12227 12228 break; 12229 } 12230 } 12231 12232 /* if we get here, we had success: make a scalar value from 12233 the number. 12234 */ 12235 out: 12236 12237 /* final misplaced underbar check */ 12238 if (s[-1] == '_') 12239 WARN_ABOUT_UNDERSCORE(); 12240 12241 if (UNLIKELY(HEXFP_PEEK(s))) { 12242 /* Do sloppy (on the underbars) but quick detection 12243 * (and value construction) for hexfp, the decimal 12244 * detection will shortly be more thorough with the 12245 * underbar checks. */ 12246 const char* h = s; 12247 significant_bits = non_zero_integer_digits * shift; 12248 #ifdef HEXFP_UQUAD 12249 hexfp_uquad = u; 12250 #else /* HEXFP_NV */ 12251 hexfp_nv = u; 12252 #endif 12253 /* Ignore the leading zero bits of 12254 * the high (first) non-zero digit. */ 12255 if (high_non_zero) { 12256 if (high_non_zero < 0x8) 12257 significant_bits--; 12258 if (high_non_zero < 0x4) 12259 significant_bits--; 12260 if (high_non_zero < 0x2) 12261 significant_bits--; 12262 } 12263 12264 if (*h == '.') { 12265 #ifdef HEXFP_NV 12266 NV nv_mult = 1.0; 12267 #endif 12268 bool accumulate = TRUE; 12269 U8 b = 0; /* silence compiler warning */ 12270 int lim = 1 << shift; 12271 for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) || 12272 *h == '_'); h++) { 12273 if (isXDIGIT(*h)) { 12274 significant_bits += shift; 12275 #ifdef HEXFP_UQUAD 12276 if (accumulate) { 12277 if (significant_bits < NV_MANT_DIG) { 12278 /* We are in the long "run" of xdigits, 12279 * accumulate the full four bits. */ 12280 assert(shift >= 0); 12281 hexfp_uquad <<= shift; 12282 hexfp_uquad |= b; 12283 hexfp_frac_bits += shift; 12284 } else if (significant_bits - shift < NV_MANT_DIG) { 12285 /* We are at a hexdigit either at, 12286 * or straddling, the edge of mantissa. 12287 * We will try grabbing as many as 12288 * possible bits. */ 12289 int tail = 12290 significant_bits - NV_MANT_DIG; 12291 if (tail <= 0) 12292 tail += shift; 12293 assert(tail >= 0); 12294 hexfp_uquad <<= tail; 12295 assert((shift - tail) >= 0); 12296 hexfp_uquad |= b >> (shift - tail); 12297 hexfp_frac_bits += tail; 12298 12299 /* Ignore the trailing zero bits 12300 * of the last non-zero xdigit. 12301 * 12302 * The assumption here is that if 12303 * one has input of e.g. the xdigit 12304 * eight (0x8), there is only one 12305 * bit being input, not the full 12306 * four bits. Conversely, if one 12307 * specifies a zero xdigit, the 12308 * assumption is that one really 12309 * wants all those bits to be zero. */ 12310 if (b) { 12311 if ((b & 0x1) == 0x0) { 12312 significant_bits--; 12313 if ((b & 0x2) == 0x0) { 12314 significant_bits--; 12315 if ((b & 0x4) == 0x0) { 12316 significant_bits--; 12317 } 12318 } 12319 } 12320 } 12321 12322 accumulate = FALSE; 12323 } 12324 } else { 12325 /* Keep skipping the xdigits, and 12326 * accumulating the significant bits, 12327 * but do not shift the uquad 12328 * (which would catastrophically drop 12329 * high-order bits) or accumulate the 12330 * xdigits anymore. */ 12331 } 12332 #else /* HEXFP_NV */ 12333 if (accumulate) { 12334 nv_mult /= nvshift[shift]; 12335 if (nv_mult > 0.0) 12336 hexfp_nv += b * nv_mult; 12337 else 12338 accumulate = FALSE; 12339 } 12340 #endif 12341 } 12342 if (significant_bits >= NV_MANT_DIG) 12343 accumulate = FALSE; 12344 } 12345 } 12346 12347 if ((total_bits > 0 || significant_bits > 0) && 12348 isALPHA_FOLD_EQ(*h, 'p')) { 12349 bool negexp = FALSE; 12350 h++; 12351 if (*h == '+') 12352 h++; 12353 else if (*h == '-') { 12354 negexp = TRUE; 12355 h++; 12356 } 12357 if (isDIGIT(*h)) { 12358 I32 hexfp_exp = 0; 12359 while (isDIGIT(*h) || *h == '_') { 12360 if (isDIGIT(*h)) { 12361 hexfp_exp *= 10; 12362 hexfp_exp += *h - '0'; 12363 #ifdef NV_MIN_EXP 12364 if (negexp 12365 && -hexfp_exp < NV_MIN_EXP - 1) { 12366 /* NOTE: this means that the exponent 12367 * underflow warning happens for 12368 * the IEEE 754 subnormals (denormals), 12369 * because DBL_MIN_EXP etc are the lowest 12370 * possible binary (or, rather, DBL_RADIX-base) 12371 * exponent for normals, not subnormals. 12372 * 12373 * This may or may not be a good thing. */ 12374 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 12375 "Hexadecimal float: exponent underflow"); 12376 break; 12377 } 12378 #endif 12379 #ifdef NV_MAX_EXP 12380 if (!negexp 12381 && hexfp_exp > NV_MAX_EXP - 1) { 12382 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 12383 "Hexadecimal float: exponent overflow"); 12384 break; 12385 } 12386 #endif 12387 } 12388 h++; 12389 } 12390 if (negexp) 12391 hexfp_exp = -hexfp_exp; 12392 #ifdef HEXFP_UQUAD 12393 hexfp_exp -= hexfp_frac_bits; 12394 #endif 12395 hexfp_mult = Perl_pow(2.0, hexfp_exp); 12396 hexfp = TRUE; 12397 goto decimal; 12398 } 12399 } 12400 } 12401 12402 if (!just_zero && !has_digs) { 12403 /* 0x, 0o or 0b with no digits, treat it as an error. 12404 Originally this backed up the parse before the b or 12405 x, but that has the potential for silent changes in 12406 behaviour, like for: "0x.3" and "0x+$foo". 12407 */ 12408 const char *d = s; 12409 char *oldbp = PL_bufptr; 12410 if (*d) ++d; /* so the user sees the bad non-digit */ 12411 PL_bufptr = (char *)d; /* so yyerror reports the context */ 12412 yyerror(Perl_form(aTHX_ "No digits found for %s literal", 12413 bases[shift])); 12414 PL_bufptr = oldbp; 12415 } 12416 12417 if (overflowed) { 12418 if (n > 4294967295.0) 12419 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 12420 "%s number > %s non-portable", 12421 Bases[shift], 12422 new_octal ? "0o37777777777" : maxima[shift]); 12423 sv = newSVnv(n); 12424 } 12425 else { 12426 #if UVSIZE > 4 12427 if (u > 0xffffffff) 12428 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 12429 "%s number > %s non-portable", 12430 Bases[shift], 12431 new_octal ? "0o37777777777" : maxima[shift]); 12432 #endif 12433 sv = newSVuv(u); 12434 } 12435 if (just_zero && (PL_hints & HINT_NEW_INTEGER)) 12436 sv = new_constant(start, s - start, "integer", 12437 sv, NULL, NULL, 0, NULL); 12438 else if (PL_hints & HINT_NEW_BINARY) 12439 sv = new_constant(start, s - start, "binary", 12440 sv, NULL, NULL, 0, NULL); 12441 } 12442 break; 12443 12444 /* 12445 handle decimal numbers. 12446 we're also sent here when we read a 0 as the first digit 12447 */ 12448 case '1': case '2': case '3': case '4': case '5': 12449 case '6': case '7': case '8': case '9': case '.': 12450 decimal: 12451 d = PL_tokenbuf; 12452 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */ 12453 floatit = FALSE; 12454 if (hexfp) { 12455 floatit = TRUE; 12456 *d++ = '0'; 12457 switch (shift) { 12458 case 4: 12459 *d++ = 'x'; 12460 s = start + 2; 12461 break; 12462 case 3: 12463 if (new_octal) { 12464 *d++ = 'o'; 12465 s = start + 2; 12466 break; 12467 } 12468 s = start + 1; 12469 break; 12470 case 1: 12471 *d++ = 'b'; 12472 s = start + 2; 12473 break; 12474 default: 12475 NOT_REACHED; /* NOTREACHED */ 12476 } 12477 } 12478 12479 /* read next group of digits and _ and copy into d */ 12480 while (isDIGIT(*s) 12481 || *s == '_' 12482 || UNLIKELY(hexfp && isXDIGIT(*s))) 12483 { 12484 /* skip underscores, checking for misplaced ones 12485 if -w is on 12486 */ 12487 if (*s == '_') { 12488 if (lastub && s == lastub + 1) 12489 WARN_ABOUT_UNDERSCORE(); 12490 lastub = s++; 12491 } 12492 else { 12493 /* check for end of fixed-length buffer */ 12494 if (d >= e) 12495 Perl_croak(aTHX_ "%s", number_too_long); 12496 /* if we're ok, copy the character */ 12497 *d++ = *s++; 12498 } 12499 } 12500 12501 /* final misplaced underbar check */ 12502 if (lastub && s == lastub + 1) 12503 WARN_ABOUT_UNDERSCORE(); 12504 12505 /* read a decimal portion if there is one. avoid 12506 3..5 being interpreted as the number 3. followed 12507 by .5 12508 */ 12509 if (*s == '.' && s[1] != '.') { 12510 floatit = TRUE; 12511 *d++ = *s++; 12512 12513 if (*s == '_') { 12514 WARN_ABOUT_UNDERSCORE(); 12515 lastub = s; 12516 } 12517 12518 /* copy, ignoring underbars, until we run out of digits. 12519 */ 12520 for (; isDIGIT(*s) 12521 || *s == '_' 12522 || UNLIKELY(hexfp && isXDIGIT(*s)); 12523 s++) 12524 { 12525 /* fixed length buffer check */ 12526 if (d >= e) 12527 Perl_croak(aTHX_ "%s", number_too_long); 12528 if (*s == '_') { 12529 if (lastub && s == lastub + 1) 12530 WARN_ABOUT_UNDERSCORE(); 12531 lastub = s; 12532 } 12533 else 12534 *d++ = *s; 12535 } 12536 /* fractional part ending in underbar? */ 12537 if (s[-1] == '_') 12538 WARN_ABOUT_UNDERSCORE(); 12539 if (*s == '.' && isDIGIT(s[1])) { 12540 /* oops, it's really a v-string, but without the "v" */ 12541 s = start; 12542 goto vstring; 12543 } 12544 } 12545 12546 /* read exponent part, if present */ 12547 if ((isALPHA_FOLD_EQ(*s, 'e') 12548 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p'))) 12549 && memCHRs("+-0123456789_", s[1])) 12550 { 12551 int exp_digits = 0; 12552 const char *save_s = s; 12553 char * save_d = d; 12554 12555 /* regardless of whether user said 3E5 or 3e5, use lower 'e', 12556 ditto for p (hexfloats) */ 12557 if ((isALPHA_FOLD_EQ(*s, 'e'))) { 12558 /* At least some Mach atof()s don't grok 'E' */ 12559 *d++ = 'e'; 12560 } 12561 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) { 12562 *d++ = 'p'; 12563 } 12564 12565 s++; 12566 12567 12568 /* stray preinitial _ */ 12569 if (*s == '_') { 12570 WARN_ABOUT_UNDERSCORE(); 12571 lastub = s++; 12572 } 12573 12574 /* allow positive or negative exponent */ 12575 if (*s == '+' || *s == '-') 12576 *d++ = *s++; 12577 12578 /* stray initial _ */ 12579 if (*s == '_') { 12580 WARN_ABOUT_UNDERSCORE(); 12581 lastub = s++; 12582 } 12583 12584 /* read digits of exponent */ 12585 while (isDIGIT(*s) || *s == '_') { 12586 if (isDIGIT(*s)) { 12587 ++exp_digits; 12588 if (d >= e) 12589 Perl_croak(aTHX_ "%s", number_too_long); 12590 *d++ = *s++; 12591 } 12592 else { 12593 if (((lastub && s == lastub + 1) 12594 || (!isDIGIT(s[1]) && s[1] != '_'))) 12595 WARN_ABOUT_UNDERSCORE(); 12596 lastub = s++; 12597 } 12598 } 12599 12600 if (!exp_digits) { 12601 /* no exponent digits, the [eEpP] could be for something else, 12602 * though in practice we don't get here for p since that's preparsed 12603 * earlier, and results in only the 0xX being consumed, so behave similarly 12604 * for decimal floats and consume only the D.DD, leaving the [eE] to the 12605 * next token. 12606 */ 12607 s = save_s; 12608 d = save_d; 12609 } 12610 else { 12611 floatit = TRUE; 12612 } 12613 } 12614 12615 12616 /* 12617 We try to do an integer conversion first if no characters 12618 indicating "float" have been found. 12619 */ 12620 12621 if (!floatit) { 12622 UV uv; 12623 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv); 12624 12625 if (flags == IS_NUMBER_IN_UV) { 12626 if (uv <= IV_MAX) 12627 sv = newSViv(uv); /* Prefer IVs over UVs. */ 12628 else 12629 sv = newSVuv(uv); 12630 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) { 12631 if (uv <= (UV) IV_MIN) 12632 sv = newSViv(-(IV)uv); 12633 else 12634 floatit = TRUE; 12635 } else 12636 floatit = TRUE; 12637 } 12638 if (floatit) { 12639 /* terminate the string */ 12640 *d = '\0'; 12641 if (UNLIKELY(hexfp)) { 12642 # ifdef NV_MANT_DIG 12643 if (significant_bits > NV_MANT_DIG) 12644 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 12645 "Hexadecimal float: mantissa overflow"); 12646 # endif 12647 #ifdef HEXFP_UQUAD 12648 nv = hexfp_uquad * hexfp_mult; 12649 #else /* HEXFP_NV */ 12650 nv = hexfp_nv * hexfp_mult; 12651 #endif 12652 } else { 12653 nv = Atof(PL_tokenbuf); 12654 } 12655 sv = newSVnv(nv); 12656 } 12657 12658 if ( floatit 12659 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) { 12660 const char *const key = floatit ? "float" : "integer"; 12661 const STRLEN keylen = floatit ? 5 : 7; 12662 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf, 12663 key, keylen, sv, NULL, NULL, 0, NULL); 12664 } 12665 break; 12666 12667 /* if it starts with a v, it could be a v-string */ 12668 case 'v': 12669 vstring: 12670 sv = newSV(5); /* preallocate storage space */ 12671 ENTER_with_name("scan_vstring"); 12672 SAVEFREESV(sv); 12673 s = scan_vstring(s, PL_bufend, sv); 12674 SvREFCNT_inc_simple_void_NN(sv); 12675 LEAVE_with_name("scan_vstring"); 12676 break; 12677 } 12678 12679 /* make the op for the constant and return */ 12680 12681 if (sv) 12682 lvalp->opval = newSVOP(OP_CONST, 0, sv); 12683 else 12684 lvalp->opval = NULL; 12685 12686 return (char *)s; 12687 } 12688 12689 STATIC char * 12690 S_scan_formline(pTHX_ char *s) 12691 { 12692 SV * const stuff = newSVpvs(""); 12693 bool needargs = FALSE; 12694 bool eofmt = FALSE; 12695 12696 PERL_ARGS_ASSERT_SCAN_FORMLINE; 12697 12698 while (!needargs) { 12699 char *eol; 12700 if (*s == '.') { 12701 char *t = s+1; 12702 #ifdef PERL_STRICT_CR 12703 while (SPACE_OR_TAB(*t)) 12704 t++; 12705 #else 12706 while (SPACE_OR_TAB(*t) || *t == '\r') 12707 t++; 12708 #endif 12709 if (*t == '\n' || t == PL_bufend) { 12710 eofmt = TRUE; 12711 break; 12712 } 12713 } 12714 eol = (char *) memchr(s,'\n',PL_bufend-s); 12715 if (! eol) { 12716 eol = PL_bufend; 12717 } 12718 else { 12719 eol++; 12720 } 12721 if (*s != '#') { 12722 char *t; 12723 for (t = s; t < eol; t++) { 12724 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { 12725 needargs = FALSE; 12726 goto enough; /* ~~ must be first line in formline */ 12727 } 12728 if (*t == '@' || *t == '^') 12729 needargs = TRUE; 12730 } 12731 if (eol > s) { 12732 sv_catpvn(stuff, s, eol-s); 12733 #ifndef PERL_STRICT_CR 12734 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') { 12735 char *end = SvPVX(stuff) + SvCUR(stuff); 12736 end[-2] = '\n'; 12737 end[-1] = '\0'; 12738 SvCUR_set(stuff, SvCUR(stuff) - 1); 12739 } 12740 #endif 12741 } 12742 else 12743 break; 12744 } 12745 s = (char*)eol; 12746 if ((PL_rsfp || PL_parser->filtered) 12747 && PL_parser->form_lex_state == LEX_NORMAL) { 12748 bool got_some; 12749 PL_bufptr = PL_bufend; 12750 COPLINE_INC_WITH_HERELINES; 12751 got_some = lex_next_chunk(0); 12752 CopLINE_dec(PL_curcop); 12753 s = PL_bufptr; 12754 if (!got_some) 12755 break; 12756 } 12757 incline(s, PL_bufend); 12758 } 12759 enough: 12760 if (!SvCUR(stuff) || needargs) 12761 PL_lex_state = PL_parser->form_lex_state; 12762 if (SvCUR(stuff)) { 12763 PL_expect = XSTATE; 12764 if (needargs) { 12765 const char *s2 = s; 12766 while (isSPACE(*s2) && *s2 != '\n') 12767 s2++; 12768 if (*s2 == '{') { 12769 PL_expect = XTERMBLOCK; 12770 NEXTVAL_NEXTTOKE.ival = 0; 12771 force_next(KW_DO); 12772 } 12773 NEXTVAL_NEXTTOKE.ival = 0; 12774 force_next(FORMLBRACK); 12775 } 12776 if (!IN_BYTES) { 12777 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff))) 12778 SvUTF8_on(stuff); 12779 } 12780 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff); 12781 force_next(THING); 12782 } 12783 else { 12784 SvREFCNT_dec(stuff); 12785 if (eofmt) 12786 PL_lex_formbrack = 0; 12787 } 12788 return s; 12789 } 12790 12791 /* 12792 =for apidoc start_subparse 12793 12794 Set things up for parsing a subroutine. 12795 12796 If C<is_format> is non-zero, the input is to be considered a format sub 12797 (a specialised sub used to implement perl's C<format> feature); else a 12798 normal C<sub>. 12799 12800 C<flags> are added to the flags for C<PL_compcv>. C<flags> may include the 12801 C<CVf_IsMETHOD> bit, which causes the new subroutine to be a method. 12802 12803 This returns the value of C<PL_savestack_ix> that was in effect upon entry to 12804 the function; 12805 12806 =cut 12807 */ 12808 12809 I32 12810 Perl_start_subparse(pTHX_ I32 is_format, U32 flags) 12811 { 12812 const I32 oldsavestack_ix = PL_savestack_ix; 12813 CV* const outsidecv = PL_compcv; 12814 bool is_method = flags & CVf_IsMETHOD; 12815 12816 if (is_method) 12817 croak_kw_unless_class("method"); 12818 12819 SAVEI32(PL_subline); 12820 save_item(PL_subname); 12821 SAVESPTR(PL_compcv); 12822 12823 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV)); 12824 CvFLAGS(PL_compcv) |= flags; 12825 12826 PL_subline = CopLINE(PL_curcop); 12827 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB); 12828 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv)); 12829 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; 12830 if (outsidecv && CvPADLIST(outsidecv)) 12831 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id; 12832 if (is_method) 12833 class_prepare_method_parse(PL_compcv); 12834 12835 return oldsavestack_ix; 12836 } 12837 12838 /* If o represents a builtin attribute, apply it to cv and returns true. 12839 * Otherwise does nothing and returns false 12840 */ 12841 12842 STATIC bool 12843 S_apply_builtin_cv_attribute(pTHX_ CV *cv, OP *o) 12844 { 12845 assert(o->op_type == OP_CONST); 12846 SV *sv = cSVOPo_sv; 12847 STRLEN len = SvCUR(sv); 12848 12849 /* NOTE: any CV attrs applied here need to be part of 12850 the CVf_BUILTIN_ATTRS define in cv.h! */ 12851 12852 if(memEQs(SvPVX(sv), len, "lvalue")) 12853 CvLVALUE_on(cv); 12854 else if(memEQs(SvPVX(sv), len, "method")) 12855 CvNOWARN_AMBIGUOUS_on(cv); 12856 else if(memEQs(SvPVX(sv), len, "const")) { 12857 CvANONCONST_on(cv); 12858 if (!CvANON(cv)) 12859 yyerror(":const is not permitted on named subroutines"); 12860 } 12861 else 12862 return false; 12863 12864 return true; 12865 } 12866 12867 /* 12868 =for apidoc apply_builtin_cv_attributes 12869 12870 Given an OP_LIST containing attribute definitions, filter it for known builtin 12871 attributes to apply to the cv, returning a possibly-smaller list containing 12872 just the remaining ones. 12873 12874 =cut 12875 */ 12876 12877 OP * 12878 Perl_apply_builtin_cv_attributes(pTHX_ CV *cv, OP *attrlist) 12879 { 12880 PERL_ARGS_ASSERT_APPLY_BUILTIN_CV_ATTRIBUTES; 12881 12882 if(!attrlist) 12883 return attrlist; 12884 12885 if(attrlist->op_type != OP_LIST) { 12886 /* Not in fact a list but just a single attribute */ 12887 if(S_apply_builtin_cv_attribute(aTHX_ cv, attrlist)) { 12888 op_free(attrlist); 12889 return NULL; 12890 } 12891 12892 return attrlist; 12893 } 12894 12895 OP *prev = cLISTOPx(attrlist)->op_first; 12896 assert(prev->op_type == OP_PUSHMARK); 12897 OP *o = OpSIBLING(prev); 12898 12899 OP *next; 12900 for(; o; o = next) { 12901 next = OpSIBLING(o); 12902 12903 if(S_apply_builtin_cv_attribute(aTHX_ cv, o)) { 12904 op_sibling_splice(attrlist, prev, 1, NULL); 12905 op_free(o); 12906 } 12907 else { 12908 prev = o; 12909 } 12910 } 12911 12912 if(OpHAS_SIBLING(cLISTOPx(attrlist)->op_first)) 12913 return attrlist; 12914 12915 /* The list is now entirely empty, we might as well discard it */ 12916 op_free(attrlist); 12917 return NULL; 12918 } 12919 12920 12921 /* Do extra initialisation of a CV (typically one just created by 12922 * start_subparse()) if that CV is for a named sub 12923 */ 12924 12925 void 12926 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop) 12927 { 12928 PERL_ARGS_ASSERT_INIT_NAMED_CV; 12929 12930 if (nameop->op_type == OP_CONST) { 12931 const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv); 12932 if ( strEQ(name, "BEGIN") 12933 || strEQ(name, "END") 12934 || strEQ(name, "INIT") 12935 || strEQ(name, "CHECK") 12936 || strEQ(name, "UNITCHECK") 12937 ) 12938 CvSPECIAL_on(cv); 12939 } 12940 else 12941 /* State subs inside anonymous subs need to be 12942 clonable themselves. */ 12943 if ( CvANON(CvOUTSIDE(cv)) 12944 || CvCLONE(CvOUTSIDE(cv)) 12945 || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST( 12946 CvOUTSIDE(cv) 12947 ))[nameop->op_targ]) 12948 ) 12949 CvCLONE_on(cv); 12950 } 12951 12952 12953 static int 12954 S_yywarn(pTHX_ const char *const s, U32 flags) 12955 { 12956 PERL_ARGS_ASSERT_YYWARN; 12957 12958 PL_in_eval |= EVAL_WARNONLY; 12959 yyerror_pv(s, flags); 12960 return 0; 12961 } 12962 12963 void 12964 Perl_abort_execution(pTHX_ SV* msg_sv, const char * const name) 12965 { 12966 PERL_ARGS_ASSERT_ABORT_EXECUTION; 12967 12968 if (msg_sv) { 12969 if (PL_minus_c) 12970 Perl_croak(aTHX_ "%" SVf "%s had compilation errors.\n", SVfARG(msg_sv), name); 12971 else { 12972 Perl_croak(aTHX_ 12973 "%" SVf "Execution of %s aborted due to compilation errors.\n", SVfARG(msg_sv), name); 12974 } 12975 } else { 12976 if (PL_minus_c) 12977 Perl_croak(aTHX_ "%s had compilation errors.\n", name); 12978 else { 12979 Perl_croak(aTHX_ 12980 "Execution of %s aborted due to compilation errors.\n", name); 12981 } 12982 } 12983 12984 NOT_REACHED; /* NOTREACHED */ 12985 } 12986 12987 void 12988 Perl_yyquit(pTHX) 12989 { 12990 /* Called, after at least one error has been found, to abort the parse now, 12991 * instead of trying to forge ahead */ 12992 12993 yyerror_pvn(NULL, 0, 0); 12994 } 12995 12996 int 12997 Perl_yyerror(pTHX_ const char *const s) 12998 { 12999 PERL_ARGS_ASSERT_YYERROR; 13000 int r = yyerror_pvn(s, strlen(s), 0); 13001 return r; 13002 } 13003 13004 int 13005 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags) 13006 { 13007 PERL_ARGS_ASSERT_YYERROR_PV; 13008 int r = yyerror_pvn(s, strlen(s), flags); 13009 return r; 13010 } 13011 13012 int 13013 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) 13014 { 13015 const char *context = NULL; 13016 int contlen = -1; 13017 SV *msg; 13018 SV * const where_sv = newSVpvs_flags("", SVs_TEMP); 13019 int yychar = PL_parser->yychar; 13020 13021 /* Output error message 's' with length 'len'. 'flags' are SV flags that 13022 * apply. If the number of errors found is large enough, it abandons 13023 * parsing. If 's' is NULL, there is no message, and it abandons 13024 * processing unconditionally */ 13025 13026 if (s != NULL) { 13027 if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp)) 13028 sv_catpvs(where_sv, "at EOF"); 13029 else if ( PL_oldoldbufptr 13030 && PL_bufptr > PL_oldoldbufptr 13031 && PL_bufptr - PL_oldoldbufptr < 200 13032 && PL_oldoldbufptr != PL_oldbufptr 13033 && PL_oldbufptr != PL_bufptr) 13034 { 13035 while (isSPACE(*PL_oldoldbufptr)) 13036 PL_oldoldbufptr++; 13037 context = PL_oldoldbufptr; 13038 contlen = PL_bufptr - PL_oldoldbufptr; 13039 } 13040 else if ( PL_oldbufptr 13041 && PL_bufptr > PL_oldbufptr 13042 && PL_bufptr - PL_oldbufptr < 200 13043 && PL_oldbufptr != PL_bufptr) 13044 { 13045 while (isSPACE(*PL_oldbufptr)) 13046 PL_oldbufptr++; 13047 context = PL_oldbufptr; 13048 contlen = PL_bufptr - PL_oldbufptr; 13049 } 13050 else if (yychar > 255) 13051 sv_catpvs(where_sv, "next token ???"); 13052 else if (yychar == YYEMPTY) { 13053 if (PL_lex_state == LEX_NORMAL) 13054 sv_catpvs(where_sv, "at end of line"); 13055 else if (PL_lex_inpat) 13056 sv_catpvs(where_sv, "within pattern"); 13057 else 13058 sv_catpvs(where_sv, "within string"); 13059 } 13060 else { 13061 sv_catpvs(where_sv, "next char "); 13062 if (yychar < 32) 13063 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); 13064 else if (isPRINT_LC(yychar)) { 13065 const char string = yychar; 13066 sv_catpvn(where_sv, &string, 1); 13067 } 13068 else 13069 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); 13070 } 13071 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP); 13072 Perl_sv_catpvf(aTHX_ msg, " at %s line %" LINE_Tf ", ", 13073 OutCopFILE(PL_curcop), 13074 (PL_parser->preambling == NOLINE 13075 ? CopLINE(PL_curcop) 13076 : PL_parser->preambling)); 13077 if (context) 13078 Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n", 13079 UTF8fARG(UTF, contlen, context)); 13080 else 13081 Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv)); 13082 if ( PL_multi_start < PL_multi_end 13083 && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) 13084 { 13085 Perl_sv_catpvf(aTHX_ msg, 13086 " (Might be a runaway multi-line %c%c string starting on" 13087 " line %" LINE_Tf ")\n", 13088 (int)PL_multi_open,(int)PL_multi_close,(line_t)PL_multi_start); 13089 PL_multi_end = 0; 13090 } 13091 if (PL_in_eval & EVAL_WARNONLY) { 13092 PL_in_eval &= ~EVAL_WARNONLY; 13093 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg)); 13094 } 13095 else { 13096 qerror(msg); 13097 } 13098 } 13099 /* if there was no message then this is a yyquit(), which is actualy handled 13100 * by qerror() with a NULL argument */ 13101 if (s == NULL) 13102 qerror(NULL); 13103 13104 PL_in_my = 0; 13105 PL_in_my_stash = NULL; 13106 return 0; 13107 } 13108 13109 STATIC char* 13110 S_swallow_bom(pTHX_ U8 *s) 13111 { 13112 const STRLEN slen = SvCUR(PL_linestr); 13113 13114 PERL_ARGS_ASSERT_SWALLOW_BOM; 13115 13116 switch (s[0]) { 13117 case 0xFF: 13118 if (s[1] == 0xFE) { 13119 /* UTF-16 little-endian? (or UTF-32LE?) */ 13120 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ 13121 /* diag_listed_as: Unsupported script encoding %s */ 13122 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE"); 13123 #ifndef PERL_NO_UTF16_FILTER 13124 #ifdef DEBUGGING 13125 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n"); 13126 #endif 13127 s += 2; 13128 if (PL_bufend > (char*)s) { 13129 s = add_utf16_textfilter(s, TRUE); 13130 } 13131 #else 13132 /* diag_listed_as: Unsupported script encoding %s */ 13133 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 13134 #endif 13135 } 13136 break; 13137 case 0xFE: 13138 if (s[1] == 0xFF) { /* UTF-16 big-endian? */ 13139 #ifndef PERL_NO_UTF16_FILTER 13140 #ifdef DEBUGGING 13141 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); 13142 #endif 13143 s += 2; 13144 if (PL_bufend > (char *)s) { 13145 s = add_utf16_textfilter(s, FALSE); 13146 } 13147 #else 13148 /* diag_listed_as: Unsupported script encoding %s */ 13149 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 13150 #endif 13151 } 13152 break; 13153 case BOM_UTF8_FIRST_BYTE: { 13154 if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) { 13155 #ifdef DEBUGGING 13156 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); 13157 #endif 13158 s += sizeof(BOM_UTF8) - 1; /* UTF-8 */ 13159 } 13160 break; 13161 } 13162 case 0: 13163 if (slen > 3) { 13164 if (s[1] == 0) { 13165 if (s[2] == 0xFE && s[3] == 0xFF) { 13166 /* UTF-32 big-endian */ 13167 /* diag_listed_as: Unsupported script encoding %s */ 13168 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE"); 13169 } 13170 } 13171 else if (s[2] == 0 && s[3] != 0) { 13172 /* Leading bytes 13173 * 00 xx 00 xx 13174 * are a good indicator of UTF-16BE. */ 13175 #ifndef PERL_NO_UTF16_FILTER 13176 #ifdef DEBUGGING 13177 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); 13178 #endif 13179 s = add_utf16_textfilter(s, FALSE); 13180 #else 13181 /* diag_listed_as: Unsupported script encoding %s */ 13182 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 13183 #endif 13184 } 13185 } 13186 break; 13187 13188 default: 13189 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) { 13190 /* Leading bytes 13191 * xx 00 xx 00 13192 * are a good indicator of UTF-16LE. */ 13193 #ifndef PERL_NO_UTF16_FILTER 13194 #ifdef DEBUGGING 13195 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); 13196 #endif 13197 s = add_utf16_textfilter(s, TRUE); 13198 #else 13199 /* diag_listed_as: Unsupported script encoding %s */ 13200 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 13201 #endif 13202 } 13203 } 13204 return (char*)s; 13205 } 13206 13207 13208 #ifndef PERL_NO_UTF16_FILTER 13209 static I32 13210 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) 13211 { 13212 SV *const filter = FILTER_DATA(idx); 13213 /* We re-use this each time round, throwing the contents away before we 13214 return. */ 13215 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter)); 13216 SV *const utf8_buffer = filter; 13217 IV status = IoPAGE(filter); 13218 const bool reverse = cBOOL(IoLINES(filter)); 13219 I32 retval; 13220 13221 PERL_ARGS_ASSERT_UTF16_TEXTFILTER; 13222 13223 /* As we're automatically added, at the lowest level, and hence only called 13224 from this file, we can be sure that we're not called in block mode. Hence 13225 don't bother writing code to deal with block mode. */ 13226 if (maxlen) { 13227 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen); 13228 } 13229 if (status < 0) { 13230 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status); 13231 } 13232 DEBUG_P(PerlIO_printf(Perl_debug_log, 13233 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n", 13234 FPTR2DPTR(void *, S_utf16_textfilter), 13235 reverse ? 'l' : 'b', idx, maxlen, status, 13236 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 13237 13238 while (1) { 13239 STRLEN chars; 13240 STRLEN have; 13241 Size_t newlen; 13242 U8 *end; 13243 /* First, look in our buffer of existing UTF-8 data: */ 13244 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer)); 13245 13246 if (nl) { 13247 ++nl; 13248 } else if (status == 0) { 13249 /* EOF */ 13250 IoPAGE(filter) = 0; 13251 nl = SvEND(utf8_buffer); 13252 } 13253 if (nl) { 13254 STRLEN got = nl - SvPVX(utf8_buffer); 13255 /* Did we have anything to append? */ 13256 retval = got != 0; 13257 sv_catpvn(sv, SvPVX(utf8_buffer), got); 13258 /* Everything else in this code works just fine if SVp_POK isn't 13259 set. This, however, needs it, and we need it to work, else 13260 we loop infinitely because the buffer is never consumed. */ 13261 sv_chop(utf8_buffer, nl); 13262 break; 13263 } 13264 13265 /* OK, not a complete line there, so need to read some more UTF-16. 13266 Read an extra octect if the buffer currently has an odd number. */ 13267 while (1) { 13268 if (status <= 0) 13269 break; 13270 if (SvCUR(utf16_buffer) >= 2) { 13271 /* Location of the high octet of the last complete code point. 13272 Gosh, UTF-16 is a pain. All the benefits of variable length, 13273 *coupled* with all the benefits of partial reads and 13274 endianness. */ 13275 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer) 13276 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2)); 13277 13278 if (*last_hi < 0xd8 || *last_hi > 0xdb) { 13279 break; 13280 } 13281 13282 /* We have the first half of a surrogate. Read more. */ 13283 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi)); 13284 } 13285 13286 status = FILTER_READ(idx + 1, utf16_buffer, 13287 160 + (SvCUR(utf16_buffer) & 1)); 13288 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer))); 13289 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);}); 13290 if (status < 0) { 13291 /* Error */ 13292 IoPAGE(filter) = status; 13293 return status; 13294 } 13295 } 13296 13297 /* 'chars' isn't quite the right name, as code points above 0xFFFF 13298 * require 4 bytes per char */ 13299 chars = SvCUR(utf16_buffer) >> 1; 13300 have = SvCUR(utf8_buffer); 13301 13302 /* Assume the worst case size as noted by the functions: twice the 13303 * number of input bytes */ 13304 SvGROW(utf8_buffer, have + chars * 4 + 1); 13305 13306 if (reverse) { 13307 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), 13308 (U8*)SvPVX_const(utf8_buffer) + have, 13309 chars * 2, &newlen); 13310 } else { 13311 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer), 13312 (U8*)SvPVX_const(utf8_buffer) + have, 13313 chars * 2, &newlen); 13314 } 13315 SvCUR_set(utf8_buffer, have + newlen); 13316 *end = '\0'; 13317 13318 /* No need to keep this SV "well-formed" with a '\0' after the end, as 13319 it's private to us, and utf16_to_utf8{,reversed} take a 13320 (pointer,length) pair, rather than a NUL-terminated string. */ 13321 if(SvCUR(utf16_buffer) & 1) { 13322 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1]; 13323 SvCUR_set(utf16_buffer, 1); 13324 } else { 13325 SvCUR_set(utf16_buffer, 0); 13326 } 13327 } 13328 DEBUG_P(PerlIO_printf(Perl_debug_log, 13329 "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n", 13330 status, 13331 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 13332 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);}); 13333 return retval; 13334 } 13335 13336 static U8 * 13337 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed) 13338 { 13339 SV *filter = filter_add(S_utf16_textfilter, NULL); 13340 13341 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER; 13342 13343 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s)); 13344 SvPVCLEAR(filter); 13345 IoLINES(filter) = reversed; 13346 IoPAGE(filter) = 1; /* Not EOF */ 13347 13348 /* Sadly, we have to return a valid pointer, come what may, so we have to 13349 ignore any error return from this. */ 13350 SvCUR_set(PL_linestr, 0); 13351 if (FILTER_READ(0, PL_linestr, 0)) { 13352 SvUTF8_on(PL_linestr); 13353 } else { 13354 SvUTF8_on(PL_linestr); 13355 } 13356 PL_bufend = SvEND(PL_linestr); 13357 return (U8*)SvPVX(PL_linestr); 13358 } 13359 #endif 13360 13361 /* 13362 =for apidoc scan_vstring 13363 13364 Returns a pointer to the next character after the parsed 13365 vstring, as well as updating the passed in sv. 13366 13367 Function must be called like 13368 13369 sv = sv_2mortal(newSV(5)); 13370 s = scan_vstring(s,e,sv); 13371 13372 where s and e are the start and end of the string. 13373 The sv should already be large enough to store the vstring 13374 passed in, for performance reasons. 13375 13376 This function may croak if fatal warnings are enabled in the 13377 calling scope, hence the sv_2mortal in the example (to prevent 13378 a leak). Make sure to do SvREFCNT_inc afterwards if you use 13379 sv_2mortal. 13380 13381 =cut 13382 */ 13383 13384 char * 13385 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) 13386 { 13387 const char *pos = s; 13388 const char *start = s; 13389 13390 PERL_ARGS_ASSERT_SCAN_VSTRING; 13391 13392 if (*pos == 'v') pos++; /* get past 'v' */ 13393 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 13394 pos++; 13395 if ( *pos != '.') { 13396 /* this may not be a v-string if followed by => */ 13397 const char *next = pos; 13398 while (next < e && isSPACE(*next)) 13399 ++next; 13400 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) { 13401 /* return string not v-string */ 13402 sv_setpvn(sv,(char *)s,pos-s); 13403 return (char *)pos; 13404 } 13405 } 13406 13407 if (!isALPHA(*pos)) { 13408 U8 tmpbuf[UTF8_MAXBYTES+1]; 13409 13410 if (*s == 'v') 13411 s++; /* get past 'v' */ 13412 13413 SvPVCLEAR(sv); 13414 13415 for (;;) { 13416 /* this is atoi() that tolerates underscores */ 13417 U8 *tmpend; 13418 UV rev = 0; 13419 const char *end = pos; 13420 UV mult = 1; 13421 while (--end >= s) { 13422 if (*end != '_') { 13423 const UV orev = rev; 13424 rev += (*end - '0') * mult; 13425 mult *= 10; 13426 if (orev > rev) 13427 /* diag_listed_as: Integer overflow in %s number */ 13428 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 13429 "Integer overflow in decimal number"); 13430 } 13431 } 13432 13433 /* Append native character for the rev point */ 13434 tmpend = uvchr_to_utf8(tmpbuf, rev); 13435 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); 13436 if (!UVCHR_IS_INVARIANT(rev)) 13437 SvUTF8_on(sv); 13438 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1])) 13439 s = ++pos; 13440 else { 13441 s = pos; 13442 break; 13443 } 13444 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 13445 pos++; 13446 } 13447 SvPOK_on(sv); 13448 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start); 13449 SvRMAGICAL_on(sv); 13450 } 13451 return (char *)s; 13452 } 13453 13454 int 13455 Perl_keyword_plugin_standard(pTHX_ 13456 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) 13457 { 13458 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD; 13459 PERL_UNUSED_CONTEXT; 13460 PERL_UNUSED_ARG(keyword_ptr); 13461 PERL_UNUSED_ARG(keyword_len); 13462 PERL_UNUSED_ARG(op_ptr); 13463 return KEYWORD_PLUGIN_DECLINE; 13464 } 13465 13466 STRLEN 13467 Perl_infix_plugin_standard(pTHX_ 13468 char *operator_ptr, STRLEN operator_len, struct Perl_custom_infix **def) 13469 { 13470 PERL_ARGS_ASSERT_INFIX_PLUGIN_STANDARD; 13471 PERL_UNUSED_CONTEXT; 13472 PERL_UNUSED_ARG(operator_ptr); 13473 PERL_UNUSED_ARG(operator_len); 13474 PERL_UNUSED_ARG(def); 13475 return 0; 13476 } 13477 13478 /* 13479 =for apidoc_section $lexer 13480 =for apidoc wrap_keyword_plugin 13481 13482 Puts a C function into the chain of keyword plugins. This is the 13483 preferred way to manipulate the L</PL_keyword_plugin> variable. 13484 C<new_plugin> is a pointer to the C function that is to be added to the 13485 keyword plugin chain, and C<old_plugin_p> points to the storage location 13486 where a pointer to the next function in the chain will be stored. The 13487 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable, 13488 while the value previously stored there is written to C<*old_plugin_p>. 13489 13490 L</PL_keyword_plugin> is global to an entire process, and a module wishing 13491 to hook keyword parsing may find itself invoked more than once per 13492 process, typically in different threads. To handle that situation, this 13493 function is idempotent. The location C<*old_plugin_p> must initially 13494 (once per process) contain a null pointer. A C variable of static 13495 duration (declared at file scope, typically also marked C<static> to give 13496 it internal linkage) will be implicitly initialised appropriately, if it 13497 does not have an explicit initialiser. This function will only actually 13498 modify the plugin chain if it finds C<*old_plugin_p> to be null. This 13499 function is also thread safe on the small scale. It uses appropriate 13500 locking to avoid race conditions in accessing L</PL_keyword_plugin>. 13501 13502 When this function is called, the function referenced by C<new_plugin> 13503 must be ready to be called, except for C<*old_plugin_p> being unfilled. 13504 In a threading situation, C<new_plugin> may be called immediately, even 13505 before this function has returned. C<*old_plugin_p> will always be 13506 appropriately set before C<new_plugin> is called. If C<new_plugin> 13507 decides not to do anything special with the identifier that it is given 13508 (which is the usual case for most calls to a keyword plugin), it must 13509 chain the plugin function referenced by C<*old_plugin_p>. 13510 13511 Taken all together, XS code to install a keyword plugin should typically 13512 look something like this: 13513 13514 static Perl_keyword_plugin_t next_keyword_plugin; 13515 static OP *my_keyword_plugin(pTHX_ 13516 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) 13517 { 13518 if (memEQs(keyword_ptr, keyword_len, 13519 "my_new_keyword")) { 13520 ... 13521 } else { 13522 return next_keyword_plugin(aTHX_ 13523 keyword_ptr, keyword_len, op_ptr); 13524 } 13525 } 13526 BOOT: 13527 wrap_keyword_plugin(my_keyword_plugin, 13528 &next_keyword_plugin); 13529 13530 Direct access to L</PL_keyword_plugin> should be avoided. 13531 13532 =cut 13533 */ 13534 13535 void 13536 Perl_wrap_keyword_plugin(pTHX_ 13537 Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p) 13538 { 13539 13540 PERL_UNUSED_CONTEXT; 13541 PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN; 13542 if (*old_plugin_p) return; 13543 KEYWORD_PLUGIN_MUTEX_LOCK; 13544 if (!*old_plugin_p) { 13545 *old_plugin_p = PL_keyword_plugin; 13546 PL_keyword_plugin = new_plugin; 13547 } 13548 KEYWORD_PLUGIN_MUTEX_UNLOCK; 13549 } 13550 13551 /* 13552 =for apidoc wrap_infix_plugin 13553 13554 B<NOTE:> This API exists entirely for the purpose of making the CPAN module 13555 C<XS::Parse::Infix> work. It is not expected that additional modules will make 13556 use of it; rather, that they should use C<XS::Parse::Infix> to provide parsing 13557 of new infix operators. 13558 13559 Puts a C function into the chain of infix plugins. This is the preferred 13560 way to manipulate the L</PL_infix_plugin> variable. C<new_plugin> is a 13561 pointer to the C function that is to be added to the infix plugin chain, and 13562 C<old_plugin_p> points to a storage location where a pointer to the next 13563 function in the chain will be stored. The value of C<new_plugin> is written 13564 into the L</PL_infix_plugin> variable, while the value previously stored there 13565 is written to C<*old_plugin_p>. 13566 13567 Direct access to L</PL_infix_plugin> should be avoided. 13568 13569 =cut 13570 */ 13571 13572 void 13573 Perl_wrap_infix_plugin(pTHX_ 13574 Perl_infix_plugin_t new_plugin, Perl_infix_plugin_t *old_plugin_p) 13575 { 13576 13577 PERL_UNUSED_CONTEXT; 13578 PERL_ARGS_ASSERT_WRAP_INFIX_PLUGIN; 13579 if (*old_plugin_p) return; 13580 /* We use the same mutex as for PL_keyword_plugin as it's so rare either 13581 * of them is actually updated; no need for a dedicated one each */ 13582 KEYWORD_PLUGIN_MUTEX_LOCK; 13583 if (!*old_plugin_p) { 13584 *old_plugin_p = PL_infix_plugin; 13585 PL_infix_plugin = new_plugin; 13586 } 13587 KEYWORD_PLUGIN_MUTEX_UNLOCK; 13588 } 13589 13590 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p) 13591 static void 13592 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof) 13593 { 13594 SAVEI32(PL_lex_brackets); 13595 if (PL_lex_brackets > 100) 13596 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 13597 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF; 13598 SAVEI32(PL_lex_allbrackets); 13599 PL_lex_allbrackets = 0; 13600 SAVEI8(PL_lex_fakeeof); 13601 PL_lex_fakeeof = (U8)fakeeof; 13602 if(yyparse(gramtype) && !PL_parser->error_count) 13603 qerror(Perl_mess(aTHX_ "Parse error")); 13604 } 13605 13606 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p) 13607 static OP * 13608 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof) 13609 { 13610 OP *o; 13611 ENTER; 13612 SAVEVPTR(PL_eval_root); 13613 PL_eval_root = NULL; 13614 parse_recdescent(gramtype, fakeeof); 13615 o = PL_eval_root; 13616 LEAVE; 13617 return o; 13618 } 13619 13620 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f) 13621 static OP * 13622 S_parse_expr(pTHX_ I32 fakeeof, U32 flags) 13623 { 13624 OP *exprop; 13625 if (flags & ~PARSE_OPTIONAL) 13626 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr"); 13627 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof); 13628 if (!exprop && !(flags & PARSE_OPTIONAL)) { 13629 if (!PL_parser->error_count) 13630 qerror(Perl_mess(aTHX_ "Parse error")); 13631 exprop = newOP(OP_NULL, 0); 13632 } 13633 return exprop; 13634 } 13635 13636 /* 13637 =for apidoc parse_arithexpr 13638 13639 Parse a Perl arithmetic expression. This may contain operators of precedence 13640 down to the bit shift operators. The expression must be followed (and thus 13641 terminated) either by a comparison or lower-precedence operator or by 13642 something that would normally terminate an expression such as semicolon. 13643 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 13644 otherwise it is mandatory. It is up to the caller to ensure that the 13645 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 13646 the source of the code to be parsed and the lexical context for the 13647 expression. 13648 13649 The op tree representing the expression is returned. If an optional 13650 expression is absent, a null pointer is returned, otherwise the pointer 13651 will be non-null. 13652 13653 If an error occurs in parsing or compilation, in most cases a valid op 13654 tree is returned anyway. The error is reflected in the parser state, 13655 normally resulting in a single exception at the top level of parsing 13656 which covers all the compilation errors that occurred. Some compilation 13657 errors, however, will throw an exception immediately. 13658 13659 =for apidoc Amnh||PARSE_OPTIONAL 13660 13661 =cut 13662 13663 */ 13664 13665 OP * 13666 Perl_parse_arithexpr(pTHX_ U32 flags) 13667 { 13668 return parse_expr(LEX_FAKEEOF_COMPARE, flags); 13669 } 13670 13671 /* 13672 =for apidoc parse_termexpr 13673 13674 Parse a Perl term expression. This may contain operators of precedence 13675 down to the assignment operators. The expression must be followed (and thus 13676 terminated) either by a comma or lower-precedence operator or by 13677 something that would normally terminate an expression such as semicolon. 13678 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 13679 otherwise it is mandatory. It is up to the caller to ensure that the 13680 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 13681 the source of the code to be parsed and the lexical context for the 13682 expression. 13683 13684 The op tree representing the expression is returned. If an optional 13685 expression is absent, a null pointer is returned, otherwise the pointer 13686 will be non-null. 13687 13688 If an error occurs in parsing or compilation, in most cases a valid op 13689 tree is returned anyway. The error is reflected in the parser state, 13690 normally resulting in a single exception at the top level of parsing 13691 which covers all the compilation errors that occurred. Some compilation 13692 errors, however, will throw an exception immediately. 13693 13694 =cut 13695 */ 13696 13697 OP * 13698 Perl_parse_termexpr(pTHX_ U32 flags) 13699 { 13700 return parse_expr(LEX_FAKEEOF_COMMA, flags); 13701 } 13702 13703 /* 13704 =for apidoc parse_listexpr 13705 13706 Parse a Perl list expression. This may contain operators of precedence 13707 down to the comma operator. The expression must be followed (and thus 13708 terminated) either by a low-precedence logic operator such as C<or> or by 13709 something that would normally terminate an expression such as semicolon. 13710 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 13711 otherwise it is mandatory. It is up to the caller to ensure that the 13712 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 13713 the source of the code to be parsed and the lexical context for the 13714 expression. 13715 13716 The op tree representing the expression is returned. If an optional 13717 expression is absent, a null pointer is returned, otherwise the pointer 13718 will be non-null. 13719 13720 If an error occurs in parsing or compilation, in most cases a valid op 13721 tree is returned anyway. The error is reflected in the parser state, 13722 normally resulting in a single exception at the top level of parsing 13723 which covers all the compilation errors that occurred. Some compilation 13724 errors, however, will throw an exception immediately. 13725 13726 =cut 13727 */ 13728 13729 OP * 13730 Perl_parse_listexpr(pTHX_ U32 flags) 13731 { 13732 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags); 13733 } 13734 13735 /* 13736 =for apidoc parse_fullexpr 13737 13738 Parse a single complete Perl expression. This allows the full 13739 expression grammar, including the lowest-precedence operators such 13740 as C<or>. The expression must be followed (and thus terminated) by a 13741 token that an expression would normally be terminated by: end-of-file, 13742 closing bracketing punctuation, semicolon, or one of the keywords that 13743 signals a postfix expression-statement modifier. If C<flags> has the 13744 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is 13745 mandatory. It is up to the caller to ensure that the dynamic parser 13746 state (L</PL_parser> et al) is correctly set to reflect the source of 13747 the code to be parsed and the lexical context for the expression. 13748 13749 The op tree representing the expression is returned. If an optional 13750 expression is absent, a null pointer is returned, otherwise the pointer 13751 will be non-null. 13752 13753 If an error occurs in parsing or compilation, in most cases a valid op 13754 tree is returned anyway. The error is reflected in the parser state, 13755 normally resulting in a single exception at the top level of parsing 13756 which covers all the compilation errors that occurred. Some compilation 13757 errors, however, will throw an exception immediately. 13758 13759 =cut 13760 */ 13761 13762 OP * 13763 Perl_parse_fullexpr(pTHX_ U32 flags) 13764 { 13765 return parse_expr(LEX_FAKEEOF_NONEXPR, flags); 13766 } 13767 13768 /* 13769 =for apidoc parse_block 13770 13771 Parse a single complete Perl code block. This consists of an opening 13772 brace, a sequence of statements, and a closing brace. The block 13773 constitutes a lexical scope, so C<my> variables and various compile-time 13774 effects can be contained within it. It is up to the caller to ensure 13775 that the dynamic parser state (L</PL_parser> et al) is correctly set to 13776 reflect the source of the code to be parsed and the lexical context for 13777 the statement. 13778 13779 The op tree representing the code block is returned. This is always a 13780 real op, never a null pointer. It will normally be a C<lineseq> list, 13781 including C<nextstate> or equivalent ops. No ops to construct any kind 13782 of runtime scope are included by virtue of it being a block. 13783 13784 If an error occurs in parsing or compilation, in most cases a valid op 13785 tree (most likely null) is returned anyway. The error is reflected in 13786 the parser state, normally resulting in a single exception at the top 13787 level of parsing which covers all the compilation errors that occurred. 13788 Some compilation errors, however, will throw an exception immediately. 13789 13790 The C<flags> parameter is reserved for future use, and must always 13791 be zero. 13792 13793 =cut 13794 */ 13795 13796 OP * 13797 Perl_parse_block(pTHX_ U32 flags) 13798 { 13799 if (flags) 13800 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block"); 13801 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER); 13802 } 13803 13804 /* 13805 =for apidoc parse_barestmt 13806 13807 Parse a single unadorned Perl statement. This may be a normal imperative 13808 statement or a declaration that has compile-time effect. It does not 13809 include any label or other affixture. It is up to the caller to ensure 13810 that the dynamic parser state (L</PL_parser> et al) is correctly set to 13811 reflect the source of the code to be parsed and the lexical context for 13812 the statement. 13813 13814 The op tree representing the statement is returned. This may be a 13815 null pointer if the statement is null, for example if it was actually 13816 a subroutine definition (which has compile-time side effects). If not 13817 null, it will be ops directly implementing the statement, suitable to 13818 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or 13819 equivalent op (except for those embedded in a scope contained entirely 13820 within the statement). 13821 13822 If an error occurs in parsing or compilation, in most cases a valid op 13823 tree (most likely null) is returned anyway. The error is reflected in 13824 the parser state, normally resulting in a single exception at the top 13825 level of parsing which covers all the compilation errors that occurred. 13826 Some compilation errors, however, will throw an exception immediately. 13827 13828 The C<flags> parameter is reserved for future use, and must always 13829 be zero. 13830 13831 =cut 13832 */ 13833 13834 OP * 13835 Perl_parse_barestmt(pTHX_ U32 flags) 13836 { 13837 if (flags) 13838 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt"); 13839 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER); 13840 } 13841 13842 /* 13843 =for apidoc parse_label 13844 13845 Parse a single label, possibly optional, of the type that may prefix a 13846 Perl statement. It is up to the caller to ensure that the dynamic parser 13847 state (L</PL_parser> et al) is correctly set to reflect the source of 13848 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the 13849 label is optional, otherwise it is mandatory. 13850 13851 The name of the label is returned in the form of a fresh scalar. If an 13852 optional label is absent, a null pointer is returned. 13853 13854 If an error occurs in parsing, which can only occur if the label is 13855 mandatory, a valid label is returned anyway. The error is reflected in 13856 the parser state, normally resulting in a single exception at the top 13857 level of parsing which covers all the compilation errors that occurred. 13858 13859 =cut 13860 */ 13861 13862 SV * 13863 Perl_parse_label(pTHX_ U32 flags) 13864 { 13865 if (flags & ~PARSE_OPTIONAL) 13866 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label"); 13867 if (PL_nexttoke) { 13868 PL_parser->yychar = yylex(); 13869 if (PL_parser->yychar == LABEL) { 13870 SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv; 13871 PL_parser->yychar = YYEMPTY; 13872 cSVOPx(pl_yylval.opval)->op_sv = NULL; 13873 op_free(pl_yylval.opval); 13874 return labelsv; 13875 } else { 13876 yyunlex(); 13877 goto no_label; 13878 } 13879 } else { 13880 char *s, *t; 13881 STRLEN wlen, bufptr_pos; 13882 lex_read_space(0); 13883 t = s = PL_bufptr; 13884 if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) 13885 goto no_label; 13886 t = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen, FALSE); 13887 if (word_takes_any_delimiter(s, wlen)) 13888 goto no_label; 13889 bufptr_pos = s - SvPVX(PL_linestr); 13890 PL_bufptr = t; 13891 lex_read_space(LEX_KEEP_PREVIOUS); 13892 t = PL_bufptr; 13893 s = SvPVX(PL_linestr) + bufptr_pos; 13894 if (t[0] == ':' && t[1] != ':') { 13895 PL_oldoldbufptr = PL_oldbufptr; 13896 PL_oldbufptr = s; 13897 PL_bufptr = t+1; 13898 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0); 13899 } else { 13900 PL_bufptr = s; 13901 no_label: 13902 if (flags & PARSE_OPTIONAL) { 13903 return NULL; 13904 } else { 13905 qerror(Perl_mess(aTHX_ "Parse error")); 13906 return newSVpvs("x"); 13907 } 13908 } 13909 } 13910 } 13911 13912 /* 13913 =for apidoc parse_fullstmt 13914 13915 Parse a single complete Perl statement. This may be a normal imperative 13916 statement or a declaration that has compile-time effect, and may include 13917 optional labels. It is up to the caller to ensure that the dynamic 13918 parser state (L</PL_parser> et al) is correctly set to reflect the source 13919 of the code to be parsed and the lexical context for the statement. 13920 13921 The op tree representing the statement is returned. This may be a 13922 null pointer if the statement is null, for example if it was actually 13923 a subroutine definition (which has compile-time side effects). If not 13924 null, it will be the result of a L</newSTATEOP> call, normally including 13925 a C<nextstate> or equivalent op. 13926 13927 If an error occurs in parsing or compilation, in most cases a valid op 13928 tree (most likely null) is returned anyway. The error is reflected in 13929 the parser state, normally resulting in a single exception at the top 13930 level of parsing which covers all the compilation errors that occurred. 13931 Some compilation errors, however, will throw an exception immediately. 13932 13933 The C<flags> parameter is reserved for future use, and must always 13934 be zero. 13935 13936 =cut 13937 */ 13938 13939 OP * 13940 Perl_parse_fullstmt(pTHX_ U32 flags) 13941 { 13942 if (flags) 13943 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt"); 13944 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER); 13945 } 13946 13947 /* 13948 =for apidoc parse_stmtseq 13949 13950 Parse a sequence of zero or more Perl statements. These may be normal 13951 imperative statements, including optional labels, or declarations 13952 that have compile-time effect, or any mixture thereof. The statement 13953 sequence ends when a closing brace or end-of-file is encountered in a 13954 place where a new statement could have validly started. It is up to 13955 the caller to ensure that the dynamic parser state (L</PL_parser> et al) 13956 is correctly set to reflect the source of the code to be parsed and the 13957 lexical context for the statements. 13958 13959 The op tree representing the statement sequence is returned. This may 13960 be a null pointer if the statements were all null, for example if there 13961 were no statements or if there were only subroutine definitions (which 13962 have compile-time side effects). If not null, it will be a C<lineseq> 13963 list, normally including C<nextstate> or equivalent ops. 13964 13965 If an error occurs in parsing or compilation, in most cases a valid op 13966 tree is returned anyway. The error is reflected in the parser state, 13967 normally resulting in a single exception at the top level of parsing 13968 which covers all the compilation errors that occurred. Some compilation 13969 errors, however, will throw an exception immediately. 13970 13971 The C<flags> parameter is reserved for future use, and must always 13972 be zero. 13973 13974 =cut 13975 */ 13976 13977 OP * 13978 Perl_parse_stmtseq(pTHX_ U32 flags) 13979 { 13980 OP *stmtseqop; 13981 I32 c; 13982 if (flags) 13983 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq"); 13984 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING); 13985 c = lex_peek_unichar(0); 13986 if (c != -1 && c != /*{*/'}') 13987 qerror(Perl_mess(aTHX_ "Parse error")); 13988 return stmtseqop; 13989 } 13990 13991 /* 13992 =for apidoc parse_subsignature 13993 13994 Parse a subroutine signature declaration. This is the contents of the 13995 parentheses following a named or anonymous subroutine declaration when the 13996 C<signatures> feature is enabled. Note that this function neither expects 13997 nor consumes the opening and closing parentheses around the signature; it 13998 is the caller's job to handle these. 13999 14000 This function must only be called during parsing of a subroutine; after 14001 L</start_subparse> has been called. It might allocate lexical variables on 14002 the pad for the current subroutine. 14003 14004 The op tree to unpack the arguments from the stack at runtime is returned. 14005 This op tree should appear at the beginning of the compiled function. The 14006 caller may wish to use L</op_append_list> to build their function body 14007 after it, or splice it together with the body before calling L</newATTRSUB>. 14008 14009 The C<flags> parameter is reserved for future use, and must always 14010 be zero. 14011 14012 =cut 14013 */ 14014 14015 OP * 14016 Perl_parse_subsignature(pTHX_ U32 flags) 14017 { 14018 if (flags) 14019 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature"); 14020 return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR); 14021 } 14022 14023 /* 14024 * ex: set ts=8 sts=4 sw=4 et: 14025 */ 14026