1 /* toke.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11 /*
12 * "It all comes from here, the stench and the peril." --Frodo
13 */
14
15 /*
16 * This file is the lexer for Perl. It's closely linked to the
17 * parser, perly.y.
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
22 #include "EXTERN.h"
23 #define PERL_IN_TOKE_C
24 #include "perl.h"
25
26 #define yychar PL_yychar
27 #define yylval PL_yylval
28
29 static char ident_too_long[] = "Identifier too long";
30 static char c_without_g[] = "Use of /c modifier is meaningless without /g";
31 static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
32
33 static void restore_rsfp(pTHX_ void *f);
34 #ifndef PERL_NO_UTF16_FILTER
35 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
36 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
37 #endif
38
39 #define XFAKEBRACK 128
40 #define XENUMMASK 127
41
42 #ifdef USE_UTF8_SCRIPTS
43 # define UTF (!IN_BYTES)
44 #else
45 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
46 #endif
47
48 /* In variables named $^X, these are the legal values for X.
49 * 1999-02-27 mjd-perl-patch@plover.com */
50 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51
52 /* On MacOS, respect nonbreaking spaces */
53 #ifdef MACOS_TRADITIONAL
54 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
55 #else
56 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
57 #endif
58
59 /* LEX_* are values for PL_lex_state, the state of the lexer.
60 * They are arranged oddly so that the guard on the switch statement
61 * can get by with a single comparison (if the compiler is smart enough).
62 */
63
64 /* #define LEX_NOTPARSING 11 is done in perl.h. */
65
66 #define LEX_NORMAL 10
67 #define LEX_INTERPNORMAL 9
68 #define LEX_INTERPCASEMOD 8
69 #define LEX_INTERPPUSH 7
70 #define LEX_INTERPSTART 6
71 #define LEX_INTERPEND 5
72 #define LEX_INTERPENDMAYBE 4
73 #define LEX_INTERPCONCAT 3
74 #define LEX_INTERPCONST 2
75 #define LEX_FORMLINE 1
76 #define LEX_KNOWNEXT 0
77
78 #ifdef ff_next
79 #undef ff_next
80 #endif
81
82 #ifdef USE_PURE_BISON
83 # ifndef YYMAXLEVEL
84 # define YYMAXLEVEL 100
85 # endif
86 YYSTYPE* yylval_pointer[YYMAXLEVEL];
87 int* yychar_pointer[YYMAXLEVEL];
88 int yyactlevel = -1;
89 # undef yylval
90 # undef yychar
91 # define yylval (*yylval_pointer[yyactlevel])
92 # define yychar (*yychar_pointer[yyactlevel])
93 # define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
94 # undef yylex
95 # define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
96 #endif
97
98 #include "keywords.h"
99
100 /* CLINE is a macro that ensures PL_copline has a sane value */
101
102 #ifdef CLINE
103 #undef CLINE
104 #endif
105 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
106
107 /*
108 * Convenience functions to return different tokens and prime the
109 * lexer for the next token. They all take an argument.
110 *
111 * TOKEN : generic token (used for '(', DOLSHARP, etc)
112 * OPERATOR : generic operator
113 * AOPERATOR : assignment operator
114 * PREBLOCK : beginning the block after an if, while, foreach, ...
115 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
116 * PREREF : *EXPR where EXPR is not a simple identifier
117 * TERM : expression term
118 * LOOPX : loop exiting command (goto, last, dump, etc)
119 * FTST : file test operator
120 * FUN0 : zero-argument function
121 * FUN1 : not used, except for not, which isn't a UNIOP
122 * BOop : bitwise or or xor
123 * BAop : bitwise and
124 * SHop : shift operator
125 * PWop : power operator
126 * PMop : pattern-matching operator
127 * Aop : addition-level operator
128 * Mop : multiplication-level operator
129 * Eop : equality-testing operator
130 * Rop : relational operator <= != gt
131 *
132 * Also see LOP and lop() below.
133 */
134
135 /* Note that REPORT() and REPORT2() will be expressions that supply
136 * their own trailing comma, not suitable for statements as such. */
137 #ifdef DEBUGGING /* Serve -DT. */
138 # define REPORT(x,retval) tokereport(x,s,(int)retval),
139 # define REPORT2(x,retval) tokereport(x,s, yylval.ival),
140 #else
141 # define REPORT(x,retval)
142 # define REPORT2(x,retval)
143 #endif
144
145 #define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
146 #define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
147 #define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
148 #define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
149 #define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
150 #define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
151 #define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
152 #define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
153 #define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
154 #define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
155 #define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
156 #define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
157 #define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
158 #define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
159 #define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
160 #define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
161 #define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
162 #define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
163 #define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
164 #define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
165
166 /* This bit of chicanery makes a unary function followed by
167 * a parenthesis into a function with one argument, highest precedence.
168 */
169 #define UNI(f) return(yylval.ival = f, \
170 REPORT("uni",f) \
171 PL_expect = XTERM, \
172 PL_bufptr = s, \
173 PL_last_uni = PL_oldbufptr, \
174 PL_last_lop_op = f, \
175 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
176
177 #define UNIBRACK(f) return(yylval.ival = f, \
178 REPORT("uni",f) \
179 PL_bufptr = s, \
180 PL_last_uni = PL_oldbufptr, \
181 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
182
183 /* grandfather return to old style */
184 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
185
186 #ifdef DEBUGGING
187
188 STATIC void
S_tokereport(pTHX_ char * thing,char * s,I32 rv)189 S_tokereport(pTHX_ char *thing, char* s, I32 rv)
190 {
191 DEBUG_T({
192 SV* report = newSVpv(thing, 0);
193 Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
194 (IV)rv);
195
196 if (s - PL_bufptr > 0)
197 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
198 else {
199 if (PL_oldbufptr && *PL_oldbufptr)
200 sv_catpv(report, PL_tokenbuf);
201 }
202 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
203 });
204 }
205
206 #endif
207
208 /*
209 * S_ao
210 *
211 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
212 * into an OP_ANDASSIGN or OP_ORASSIGN
213 */
214
215 STATIC int
S_ao(pTHX_ int toketype)216 S_ao(pTHX_ int toketype)
217 {
218 if (*PL_bufptr == '=') {
219 PL_bufptr++;
220 if (toketype == ANDAND)
221 yylval.ival = OP_ANDASSIGN;
222 else if (toketype == OROR)
223 yylval.ival = OP_ORASSIGN;
224 toketype = ASSIGNOP;
225 }
226 return toketype;
227 }
228
229 /*
230 * S_no_op
231 * When Perl expects an operator and finds something else, no_op
232 * prints the warning. It always prints "<something> found where
233 * operator expected. It prints "Missing semicolon on previous line?"
234 * if the surprise occurs at the start of the line. "do you need to
235 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
236 * where the compiler doesn't know if foo is a method call or a function.
237 * It prints "Missing operator before end of line" if there's nothing
238 * after the missing operator, or "... before <...>" if there is something
239 * after the missing operator.
240 */
241
242 STATIC void
S_no_op(pTHX_ char * what,char * s)243 S_no_op(pTHX_ char *what, char *s)
244 {
245 char *oldbp = PL_bufptr;
246 bool is_first = (PL_oldbufptr == PL_linestart);
247
248 if (!s)
249 s = oldbp;
250 else
251 PL_bufptr = s;
252 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
253 if (ckWARN_d(WARN_SYNTAX)) {
254 if (is_first)
255 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
256 "\t(Missing semicolon on previous line?)\n");
257 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
258 char *t;
259 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
260 if (t < PL_bufptr && isSPACE(*t))
261 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
262 "\t(Do you need to predeclare %.*s?)\n",
263 t - PL_oldoldbufptr, PL_oldoldbufptr);
264 }
265 else {
266 assert(s >= oldbp);
267 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
268 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
269 }
270 }
271 PL_bufptr = oldbp;
272 }
273
274 /*
275 * S_missingterm
276 * Complain about missing quote/regexp/heredoc terminator.
277 * If it's called with (char *)NULL then it cauterizes the line buffer.
278 * If we're in a delimited string and the delimiter is a control
279 * character, it's reformatted into a two-char sequence like ^C.
280 * This is fatal.
281 */
282
283 STATIC void
S_missingterm(pTHX_ char * s)284 S_missingterm(pTHX_ char *s)
285 {
286 char tmpbuf[3];
287 char q;
288 if (s) {
289 char *nl = strrchr(s,'\n');
290 if (nl)
291 *nl = '\0';
292 }
293 else if (
294 #ifdef EBCDIC
295 iscntrl(PL_multi_close)
296 #else
297 PL_multi_close < 32 || PL_multi_close == 127
298 #endif
299 ) {
300 *tmpbuf = '^';
301 tmpbuf[1] = toCTRL(PL_multi_close);
302 s = "\\n";
303 tmpbuf[2] = '\0';
304 s = tmpbuf;
305 }
306 else {
307 *tmpbuf = (char)PL_multi_close;
308 tmpbuf[1] = '\0';
309 s = tmpbuf;
310 }
311 q = strchr(s,'"') ? '\'' : '"';
312 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
313 }
314
315 /*
316 * Perl_deprecate
317 */
318
319 void
Perl_deprecate(pTHX_ char * s)320 Perl_deprecate(pTHX_ char *s)
321 {
322 if (ckWARN(WARN_DEPRECATED))
323 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
324 }
325
326 void
Perl_deprecate_old(pTHX_ char * s)327 Perl_deprecate_old(pTHX_ char *s)
328 {
329 /* This function should NOT be called for any new deprecated warnings */
330 /* Use Perl_deprecate instead */
331 /* */
332 /* It is here to maintain backward compatibility with the pre-5.8 */
333 /* warnings category hierarchy. The "deprecated" category used to */
334 /* live under the "syntax" category. It is now a top-level category */
335 /* in its own right. */
336
337 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
338 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
339 "Use of %s is deprecated", s);
340 }
341
342 /*
343 * depcom
344 * Deprecate a comma-less variable list.
345 */
346
347 STATIC void
S_depcom(pTHX)348 S_depcom(pTHX)
349 {
350 deprecate_old("comma-less variable list");
351 }
352
353 /*
354 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
355 * utf16-to-utf8-reversed.
356 */
357
358 #ifdef PERL_CR_FILTER
359 static void
strip_return(SV * sv)360 strip_return(SV *sv)
361 {
362 register char *s = SvPVX(sv);
363 register char *e = s + SvCUR(sv);
364 /* outer loop optimized to do nothing if there are no CR-LFs */
365 while (s < e) {
366 if (*s++ == '\r' && *s == '\n') {
367 /* hit a CR-LF, need to copy the rest */
368 register char *d = s - 1;
369 *d++ = *s++;
370 while (s < e) {
371 if (*s == '\r' && s[1] == '\n')
372 s++;
373 *d++ = *s++;
374 }
375 SvCUR(sv) -= s - d;
376 return;
377 }
378 }
379 }
380
381 STATIC I32
S_cr_textfilter(pTHX_ int idx,SV * sv,int maxlen)382 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
383 {
384 I32 count = FILTER_READ(idx+1, sv, maxlen);
385 if (count > 0 && !maxlen)
386 strip_return(sv);
387 return count;
388 }
389 #endif
390
391 /*
392 * Perl_lex_start
393 * Initialize variables. Uses the Perl save_stack to save its state (for
394 * recursive calls to the parser).
395 */
396
397 void
Perl_lex_start(pTHX_ SV * line)398 Perl_lex_start(pTHX_ SV *line)
399 {
400 char *s;
401 STRLEN len;
402
403 SAVEI32(PL_lex_dojoin);
404 SAVEI32(PL_lex_brackets);
405 SAVEI32(PL_lex_casemods);
406 SAVEI32(PL_lex_starts);
407 SAVEI32(PL_lex_state);
408 SAVEVPTR(PL_lex_inpat);
409 SAVEI32(PL_lex_inwhat);
410 if (PL_lex_state == LEX_KNOWNEXT) {
411 I32 toke = PL_nexttoke;
412 while (--toke >= 0) {
413 SAVEI32(PL_nexttype[toke]);
414 SAVEVPTR(PL_nextval[toke]);
415 }
416 SAVEI32(PL_nexttoke);
417 }
418 SAVECOPLINE(PL_curcop);
419 SAVEPPTR(PL_bufptr);
420 SAVEPPTR(PL_bufend);
421 SAVEPPTR(PL_oldbufptr);
422 SAVEPPTR(PL_oldoldbufptr);
423 SAVEPPTR(PL_last_lop);
424 SAVEPPTR(PL_last_uni);
425 SAVEPPTR(PL_linestart);
426 SAVESPTR(PL_linestr);
427 SAVEGENERICPV(PL_lex_brackstack);
428 SAVEGENERICPV(PL_lex_casestack);
429 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
430 SAVESPTR(PL_lex_stuff);
431 SAVEI32(PL_lex_defer);
432 SAVEI32(PL_sublex_info.sub_inwhat);
433 SAVESPTR(PL_lex_repl);
434 SAVEINT(PL_expect);
435 SAVEINT(PL_lex_expect);
436
437 PL_lex_state = LEX_NORMAL;
438 PL_lex_defer = 0;
439 PL_expect = XSTATE;
440 PL_lex_brackets = 0;
441 New(899, PL_lex_brackstack, 120, char);
442 New(899, PL_lex_casestack, 12, char);
443 PL_lex_casemods = 0;
444 *PL_lex_casestack = '\0';
445 PL_lex_dojoin = 0;
446 PL_lex_starts = 0;
447 PL_lex_stuff = Nullsv;
448 PL_lex_repl = Nullsv;
449 PL_lex_inpat = 0;
450 PL_nexttoke = 0;
451 PL_lex_inwhat = 0;
452 PL_sublex_info.sub_inwhat = 0;
453 PL_linestr = line;
454 if (SvREADONLY(PL_linestr))
455 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
456 s = SvPV(PL_linestr, len);
457 if (!len || s[len-1] != ';') {
458 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
459 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
460 sv_catpvn(PL_linestr, "\n;", 2);
461 }
462 SvTEMP_off(PL_linestr);
463 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
464 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
465 PL_last_lop = PL_last_uni = Nullch;
466 PL_rsfp = 0;
467 }
468
469 /*
470 * Perl_lex_end
471 * Finalizer for lexing operations. Must be called when the parser is
472 * done with the lexer.
473 */
474
475 void
Perl_lex_end(pTHX)476 Perl_lex_end(pTHX)
477 {
478 PL_doextract = FALSE;
479 }
480
481 /*
482 * S_incline
483 * This subroutine has nothing to do with tilting, whether at windmills
484 * or pinball tables. Its name is short for "increment line". It
485 * increments the current line number in CopLINE(PL_curcop) and checks
486 * to see whether the line starts with a comment of the form
487 * # line 500 "foo.pm"
488 * If so, it sets the current line number and file to the values in the comment.
489 */
490
491 STATIC void
S_incline(pTHX_ char * s)492 S_incline(pTHX_ char *s)
493 {
494 char *t;
495 char *n;
496 char *e;
497 char ch;
498
499 CopLINE_inc(PL_curcop);
500 if (*s++ != '#')
501 return;
502 while (SPACE_OR_TAB(*s)) s++;
503 if (strnEQ(s, "line", 4))
504 s += 4;
505 else
506 return;
507 if (SPACE_OR_TAB(*s))
508 s++;
509 else
510 return;
511 while (SPACE_OR_TAB(*s)) s++;
512 if (!isDIGIT(*s))
513 return;
514 n = s;
515 while (isDIGIT(*s))
516 s++;
517 while (SPACE_OR_TAB(*s))
518 s++;
519 if (*s == '"' && (t = strchr(s+1, '"'))) {
520 s++;
521 e = t + 1;
522 }
523 else {
524 for (t = s; !isSPACE(*t); t++) ;
525 e = t;
526 }
527 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
528 e++;
529 if (*e != '\n' && *e != '\0')
530 return; /* false alarm */
531
532 ch = *t;
533 *t = '\0';
534 if (t - s > 0) {
535 CopFILE_free(PL_curcop);
536 CopFILE_set(PL_curcop, s);
537 }
538 *t = ch;
539 CopLINE_set(PL_curcop, atoi(n)-1);
540 }
541
542 /*
543 * S_skipspace
544 * Called to gobble the appropriate amount and type of whitespace.
545 * Skips comments as well.
546 */
547
548 STATIC char *
S_skipspace(pTHX_ register char * s)549 S_skipspace(pTHX_ register char *s)
550 {
551 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
552 while (s < PL_bufend && SPACE_OR_TAB(*s))
553 s++;
554 return s;
555 }
556 for (;;) {
557 STRLEN prevlen;
558 SSize_t oldprevlen, oldoldprevlen;
559 SSize_t oldloplen = 0, oldunilen = 0;
560 while (s < PL_bufend && isSPACE(*s)) {
561 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
562 incline(s);
563 }
564
565 /* comment */
566 if (s < PL_bufend && *s == '#') {
567 while (s < PL_bufend && *s != '\n')
568 s++;
569 if (s < PL_bufend) {
570 s++;
571 if (PL_in_eval && !PL_rsfp) {
572 incline(s);
573 continue;
574 }
575 }
576 }
577
578 /* only continue to recharge the buffer if we're at the end
579 * of the buffer, we're not reading from a source filter, and
580 * we're in normal lexing mode
581 */
582 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
583 PL_lex_state == LEX_FORMLINE)
584 return s;
585
586 /* try to recharge the buffer */
587 if ((s = filter_gets(PL_linestr, PL_rsfp,
588 (prevlen = SvCUR(PL_linestr)))) == Nullch)
589 {
590 /* end of file. Add on the -p or -n magic */
591 if (PL_minus_n || PL_minus_p) {
592 sv_setpv(PL_linestr,PL_minus_p ?
593 ";}continue{print or die qq(-p destination: $!\\n)" :
594 "");
595 sv_catpv(PL_linestr,";}");
596 PL_minus_n = PL_minus_p = 0;
597 }
598 else
599 sv_setpv(PL_linestr,";");
600
601 /* reset variables for next time we lex */
602 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
603 = SvPVX(PL_linestr);
604 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
605 PL_last_lop = PL_last_uni = Nullch;
606
607 /* Close the filehandle. Could be from -P preprocessor,
608 * STDIN, or a regular file. If we were reading code from
609 * STDIN (because the commandline held no -e or filename)
610 * then we don't close it, we reset it so the code can
611 * read from STDIN too.
612 */
613
614 if (PL_preprocess && !PL_in_eval)
615 (void)PerlProc_pclose(PL_rsfp);
616 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
617 PerlIO_clearerr(PL_rsfp);
618 else
619 (void)PerlIO_close(PL_rsfp);
620 PL_rsfp = Nullfp;
621 return s;
622 }
623
624 /* not at end of file, so we only read another line */
625 /* make corresponding updates to old pointers, for yyerror() */
626 oldprevlen = PL_oldbufptr - PL_bufend;
627 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
628 if (PL_last_uni)
629 oldunilen = PL_last_uni - PL_bufend;
630 if (PL_last_lop)
631 oldloplen = PL_last_lop - PL_bufend;
632 PL_linestart = PL_bufptr = s + prevlen;
633 PL_bufend = s + SvCUR(PL_linestr);
634 s = PL_bufptr;
635 PL_oldbufptr = s + oldprevlen;
636 PL_oldoldbufptr = s + oldoldprevlen;
637 if (PL_last_uni)
638 PL_last_uni = s + oldunilen;
639 if (PL_last_lop)
640 PL_last_lop = s + oldloplen;
641 incline(s);
642
643 /* debugger active and we're not compiling the debugger code,
644 * so store the line into the debugger's array of lines
645 */
646 if (PERLDB_LINE && PL_curstash != PL_debstash) {
647 SV *sv = NEWSV(85,0);
648
649 sv_upgrade(sv, SVt_PVMG);
650 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
651 (void)SvIOK_on(sv);
652 SvIVX(sv) = 0;
653 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
654 }
655 }
656 }
657
658 /*
659 * S_check_uni
660 * Check the unary operators to ensure there's no ambiguity in how they're
661 * used. An ambiguous piece of code would be:
662 * rand + 5
663 * This doesn't mean rand() + 5. Because rand() is a unary operator,
664 * the +5 is its argument.
665 */
666
667 STATIC void
S_check_uni(pTHX)668 S_check_uni(pTHX)
669 {
670 char *s;
671 char *t;
672
673 if (PL_oldoldbufptr != PL_last_uni)
674 return;
675 while (isSPACE(*PL_last_uni))
676 PL_last_uni++;
677 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
678 if ((t = strchr(s, '(')) && t < PL_bufptr)
679 return;
680 if (ckWARN_d(WARN_AMBIGUOUS)){
681 char ch = *s;
682 *s = '\0';
683 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
684 "Warning: Use of \"%s\" without parentheses is ambiguous",
685 PL_last_uni);
686 *s = ch;
687 }
688 }
689
690 /*
691 * LOP : macro to build a list operator. Its behaviour has been replaced
692 * with a subroutine, S_lop() for which LOP is just another name.
693 */
694
695 #define LOP(f,x) return lop(f,x,s)
696
697 /*
698 * S_lop
699 * Build a list operator (or something that might be one). The rules:
700 * - if we have a next token, then it's a list operator [why?]
701 * - if the next thing is an opening paren, then it's a function
702 * - else it's a list operator
703 */
704
705 STATIC I32
S_lop(pTHX_ I32 f,int x,char * s)706 S_lop(pTHX_ I32 f, int x, char *s)
707 {
708 yylval.ival = f;
709 CLINE;
710 REPORT("lop", f)
711 PL_expect = x;
712 PL_bufptr = s;
713 PL_last_lop = PL_oldbufptr;
714 PL_last_lop_op = (OPCODE)f;
715 if (PL_nexttoke)
716 return LSTOP;
717 if (*s == '(')
718 return FUNC;
719 s = skipspace(s);
720 if (*s == '(')
721 return FUNC;
722 else
723 return LSTOP;
724 }
725
726 /*
727 * S_force_next
728 * When the lexer realizes it knows the next token (for instance,
729 * it is reordering tokens for the parser) then it can call S_force_next
730 * to know what token to return the next time the lexer is called. Caller
731 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
732 * handles the token correctly.
733 */
734
735 STATIC void
S_force_next(pTHX_ I32 type)736 S_force_next(pTHX_ I32 type)
737 {
738 PL_nexttype[PL_nexttoke] = type;
739 PL_nexttoke++;
740 if (PL_lex_state != LEX_KNOWNEXT) {
741 PL_lex_defer = PL_lex_state;
742 PL_lex_expect = PL_expect;
743 PL_lex_state = LEX_KNOWNEXT;
744 }
745 }
746
747 /*
748 * S_force_word
749 * When the lexer knows the next thing is a word (for instance, it has
750 * just seen -> and it knows that the next char is a word char, then
751 * it calls S_force_word to stick the next word into the PL_next lookahead.
752 *
753 * Arguments:
754 * char *start : buffer position (must be within PL_linestr)
755 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
756 * int check_keyword : if true, Perl checks to make sure the word isn't
757 * a keyword (do this if the word is a label, e.g. goto FOO)
758 * int allow_pack : if true, : characters will also be allowed (require,
759 * use, etc. do this)
760 * int allow_initial_tick : used by the "sub" lexer only.
761 */
762
763 STATIC char *
S_force_word(pTHX_ register char * start,int token,int check_keyword,int allow_pack,int allow_initial_tick)764 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
765 {
766 register char *s;
767 STRLEN len;
768
769 start = skipspace(start);
770 s = start;
771 if (isIDFIRST_lazy_if(s,UTF) ||
772 (allow_pack && *s == ':') ||
773 (allow_initial_tick && *s == '\'') )
774 {
775 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
776 if (check_keyword && keyword(PL_tokenbuf, len))
777 return start;
778 if (token == METHOD) {
779 s = skipspace(s);
780 if (*s == '(')
781 PL_expect = XTERM;
782 else {
783 PL_expect = XOPERATOR;
784 }
785 }
786 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
787 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
788 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
789 SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke].opval)->op_sv);
790 force_next(token);
791 }
792 return s;
793 }
794
795 /*
796 * S_force_ident
797 * Called when the lexer wants $foo *foo &foo etc, but the program
798 * text only contains the "foo" portion. The first argument is a pointer
799 * to the "foo", and the second argument is the type symbol to prefix.
800 * Forces the next token to be a "WORD".
801 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
802 */
803
804 STATIC void
S_force_ident(pTHX_ register char * s,int kind)805 S_force_ident(pTHX_ register char *s, int kind)
806 {
807 if (s && *s) {
808 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
809 PL_nextval[PL_nexttoke].opval = o;
810 force_next(WORD);
811 if (kind) {
812 o->op_private = OPpCONST_ENTERED;
813 /* XXX see note in pp_entereval() for why we forgo typo
814 warnings if the symbol must be introduced in an eval.
815 GSAR 96-10-12 */
816 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
817 kind == '$' ? SVt_PV :
818 kind == '@' ? SVt_PVAV :
819 kind == '%' ? SVt_PVHV :
820 SVt_PVGV
821 );
822 }
823 }
824 }
825
826 NV
Perl_str_to_version(pTHX_ SV * sv)827 Perl_str_to_version(pTHX_ SV *sv)
828 {
829 NV retval = 0.0;
830 NV nshift = 1.0;
831 STRLEN len;
832 char *start = SvPVx(sv,len);
833 bool utf = SvUTF8(sv) ? TRUE : FALSE;
834 char *end = start + len;
835 while (start < end) {
836 STRLEN skip;
837 UV n;
838 if (utf)
839 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
840 else {
841 n = *(U8*)start;
842 skip = 1;
843 }
844 retval += ((NV)n)/nshift;
845 start += skip;
846 nshift *= 1000;
847 }
848 return retval;
849 }
850
851 /*
852 * S_force_version
853 * Forces the next token to be a version number.
854 * If the next token appears to be an invalid version number, (e.g. "v2b"),
855 * and if "guessing" is TRUE, then no new token is created (and the caller
856 * must use an alternative parsing method).
857 */
858
859 STATIC char *
S_force_version(pTHX_ char * s,int guessing)860 S_force_version(pTHX_ char *s, int guessing)
861 {
862 OP *version = Nullop;
863 char *d;
864
865 s = skipspace(s);
866
867 d = s;
868 if (*d == 'v')
869 d++;
870 if (isDIGIT(*d)) {
871 while (isDIGIT(*d) || *d == '_' || *d == '.')
872 d++;
873 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
874 SV *ver;
875 s = scan_num(s, &yylval);
876 version = yylval.opval;
877 ver = cSVOPx(version)->op_sv;
878 if (SvPOK(ver) && !SvNIOK(ver)) {
879 (void)SvUPGRADE(ver, SVt_PVNV);
880 SvNVX(ver) = str_to_version(ver);
881 SvNOK_on(ver); /* hint that it is a version */
882 }
883 }
884 else if (guessing)
885 return s;
886 }
887
888 /* NOTE: The parser sees the package name and the VERSION swapped */
889 PL_nextval[PL_nexttoke].opval = version;
890 force_next(WORD);
891
892 return s;
893 }
894
895 /*
896 * S_tokeq
897 * Tokenize a quoted string passed in as an SV. It finds the next
898 * chunk, up to end of string or a backslash. It may make a new
899 * SV containing that chunk (if HINT_NEW_STRING is on). It also
900 * turns \\ into \.
901 */
902
903 STATIC SV *
S_tokeq(pTHX_ SV * sv)904 S_tokeq(pTHX_ SV *sv)
905 {
906 register char *s;
907 register char *send;
908 register char *d;
909 STRLEN len = 0;
910 SV *pv = sv;
911
912 if (!SvLEN(sv))
913 goto finish;
914
915 s = SvPV_force(sv, len);
916 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
917 goto finish;
918 send = s + len;
919 while (s < send && *s != '\\')
920 s++;
921 if (s == send)
922 goto finish;
923 d = s;
924 if ( PL_hints & HINT_NEW_STRING ) {
925 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
926 if (SvUTF8(sv))
927 SvUTF8_on(pv);
928 }
929 while (s < send) {
930 if (*s == '\\') {
931 if (s + 1 < send && (s[1] == '\\'))
932 s++; /* all that, just for this */
933 }
934 *d++ = *s++;
935 }
936 *d = '\0';
937 SvCUR_set(sv, d - SvPVX(sv));
938 finish:
939 if ( PL_hints & HINT_NEW_STRING )
940 return new_constant(NULL, 0, "q", sv, pv, "q");
941 return sv;
942 }
943
944 /*
945 * Now come three functions related to double-quote context,
946 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
947 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
948 * interact with PL_lex_state, and create fake ( ... ) argument lists
949 * to handle functions and concatenation.
950 * They assume that whoever calls them will be setting up a fake
951 * join call, because each subthing puts a ',' after it. This lets
952 * "lower \luPpEr"
953 * become
954 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
955 *
956 * (I'm not sure whether the spurious commas at the end of lcfirst's
957 * arguments and join's arguments are created or not).
958 */
959
960 /*
961 * S_sublex_start
962 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
963 *
964 * Pattern matching will set PL_lex_op to the pattern-matching op to
965 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
966 *
967 * OP_CONST and OP_READLINE are easy--just make the new op and return.
968 *
969 * Everything else becomes a FUNC.
970 *
971 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
972 * had an OP_CONST or OP_READLINE). This just sets us up for a
973 * call to S_sublex_push().
974 */
975
976 STATIC I32
S_sublex_start(pTHX)977 S_sublex_start(pTHX)
978 {
979 register I32 op_type = yylval.ival;
980
981 if (op_type == OP_NULL) {
982 yylval.opval = PL_lex_op;
983 PL_lex_op = Nullop;
984 return THING;
985 }
986 if (op_type == OP_CONST || op_type == OP_READLINE) {
987 SV *sv = tokeq(PL_lex_stuff);
988
989 if (SvTYPE(sv) == SVt_PVIV) {
990 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
991 STRLEN len;
992 char *p;
993 SV *nsv;
994
995 p = SvPV(sv, len);
996 nsv = newSVpvn(p, len);
997 if (SvUTF8(sv))
998 SvUTF8_on(nsv);
999 SvREFCNT_dec(sv);
1000 sv = nsv;
1001 }
1002 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1003 PL_lex_stuff = Nullsv;
1004 return THING;
1005 }
1006
1007 PL_sublex_info.super_state = PL_lex_state;
1008 PL_sublex_info.sub_inwhat = op_type;
1009 PL_sublex_info.sub_op = PL_lex_op;
1010 PL_lex_state = LEX_INTERPPUSH;
1011
1012 PL_expect = XTERM;
1013 if (PL_lex_op) {
1014 yylval.opval = PL_lex_op;
1015 PL_lex_op = Nullop;
1016 return PMFUNC;
1017 }
1018 else
1019 return FUNC;
1020 }
1021
1022 /*
1023 * S_sublex_push
1024 * Create a new scope to save the lexing state. The scope will be
1025 * ended in S_sublex_done. Returns a '(', starting the function arguments
1026 * to the uc, lc, etc. found before.
1027 * Sets PL_lex_state to LEX_INTERPCONCAT.
1028 */
1029
1030 STATIC I32
S_sublex_push(pTHX)1031 S_sublex_push(pTHX)
1032 {
1033 ENTER;
1034
1035 PL_lex_state = PL_sublex_info.super_state;
1036 SAVEI32(PL_lex_dojoin);
1037 SAVEI32(PL_lex_brackets);
1038 SAVEI32(PL_lex_casemods);
1039 SAVEI32(PL_lex_starts);
1040 SAVEI32(PL_lex_state);
1041 SAVEVPTR(PL_lex_inpat);
1042 SAVEI32(PL_lex_inwhat);
1043 SAVECOPLINE(PL_curcop);
1044 SAVEPPTR(PL_bufptr);
1045 SAVEPPTR(PL_bufend);
1046 SAVEPPTR(PL_oldbufptr);
1047 SAVEPPTR(PL_oldoldbufptr);
1048 SAVEPPTR(PL_last_lop);
1049 SAVEPPTR(PL_last_uni);
1050 SAVEPPTR(PL_linestart);
1051 SAVESPTR(PL_linestr);
1052 SAVEGENERICPV(PL_lex_brackstack);
1053 SAVEGENERICPV(PL_lex_casestack);
1054
1055 PL_linestr = PL_lex_stuff;
1056 PL_lex_stuff = Nullsv;
1057
1058 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1059 = SvPVX(PL_linestr);
1060 PL_bufend += SvCUR(PL_linestr);
1061 PL_last_lop = PL_last_uni = Nullch;
1062 SAVEFREESV(PL_linestr);
1063
1064 PL_lex_dojoin = FALSE;
1065 PL_lex_brackets = 0;
1066 New(899, PL_lex_brackstack, 120, char);
1067 New(899, PL_lex_casestack, 12, char);
1068 PL_lex_casemods = 0;
1069 *PL_lex_casestack = '\0';
1070 PL_lex_starts = 0;
1071 PL_lex_state = LEX_INTERPCONCAT;
1072 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1073
1074 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1075 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1076 PL_lex_inpat = PL_sublex_info.sub_op;
1077 else
1078 PL_lex_inpat = Nullop;
1079
1080 return '(';
1081 }
1082
1083 /*
1084 * S_sublex_done
1085 * Restores lexer state after a S_sublex_push.
1086 */
1087
1088 STATIC I32
S_sublex_done(pTHX)1089 S_sublex_done(pTHX)
1090 {
1091 if (!PL_lex_starts++) {
1092 SV *sv = newSVpvn("",0);
1093 if (SvUTF8(PL_linestr))
1094 SvUTF8_on(sv);
1095 PL_expect = XOPERATOR;
1096 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1097 return THING;
1098 }
1099
1100 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1101 PL_lex_state = LEX_INTERPCASEMOD;
1102 return yylex();
1103 }
1104
1105 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1106 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1107 PL_linestr = PL_lex_repl;
1108 PL_lex_inpat = 0;
1109 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1110 PL_bufend += SvCUR(PL_linestr);
1111 PL_last_lop = PL_last_uni = Nullch;
1112 SAVEFREESV(PL_linestr);
1113 PL_lex_dojoin = FALSE;
1114 PL_lex_brackets = 0;
1115 PL_lex_casemods = 0;
1116 *PL_lex_casestack = '\0';
1117 PL_lex_starts = 0;
1118 if (SvEVALED(PL_lex_repl)) {
1119 PL_lex_state = LEX_INTERPNORMAL;
1120 PL_lex_starts++;
1121 /* we don't clear PL_lex_repl here, so that we can check later
1122 whether this is an evalled subst; that means we rely on the
1123 logic to ensure sublex_done() is called again only via the
1124 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1125 }
1126 else {
1127 PL_lex_state = LEX_INTERPCONCAT;
1128 PL_lex_repl = Nullsv;
1129 }
1130 return ',';
1131 }
1132 else {
1133 LEAVE;
1134 PL_bufend = SvPVX(PL_linestr);
1135 PL_bufend += SvCUR(PL_linestr);
1136 PL_expect = XOPERATOR;
1137 PL_sublex_info.sub_inwhat = 0;
1138 return ')';
1139 }
1140 }
1141
1142 /*
1143 scan_const
1144
1145 Extracts a pattern, double-quoted string, or transliteration. This
1146 is terrifying code.
1147
1148 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1149 processing a pattern (PL_lex_inpat is true), a transliteration
1150 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1151
1152 Returns a pointer to the character scanned up to. Iff this is
1153 advanced from the start pointer supplied (ie if anything was
1154 successfully parsed), will leave an OP for the substring scanned
1155 in yylval. Caller must intuit reason for not parsing further
1156 by looking at the next characters herself.
1157
1158 In patterns:
1159 backslashes:
1160 double-quoted style: \r and \n
1161 regexp special ones: \D \s
1162 constants: \x3
1163 backrefs: \1 (deprecated in substitution replacements)
1164 case and quoting: \U \Q \E
1165 stops on @ and $, but not for $ as tail anchor
1166
1167 In transliterations:
1168 characters are VERY literal, except for - not at the start or end
1169 of the string, which indicates a range. scan_const expands the
1170 range to the full set of intermediate characters.
1171
1172 In double-quoted strings:
1173 backslashes:
1174 double-quoted style: \r and \n
1175 constants: \x3
1176 backrefs: \1 (deprecated)
1177 case and quoting: \U \Q \E
1178 stops on @ and $
1179
1180 scan_const does *not* construct ops to handle interpolated strings.
1181 It stops processing as soon as it finds an embedded $ or @ variable
1182 and leaves it to the caller to work out what's going on.
1183
1184 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1185
1186 $ in pattern could be $foo or could be tail anchor. Assumption:
1187 it's a tail anchor if $ is the last thing in the string, or if it's
1188 followed by one of ")| \n\t"
1189
1190 \1 (backreferences) are turned into $1
1191
1192 The structure of the code is
1193 while (there's a character to process) {
1194 handle transliteration ranges
1195 skip regexp comments
1196 skip # initiated comments in //x patterns
1197 check for embedded @foo
1198 check for embedded scalars
1199 if (backslash) {
1200 leave intact backslashes from leave (below)
1201 deprecate \1 in strings and sub replacements
1202 handle string-changing backslashes \l \U \Q \E, etc.
1203 switch (what was escaped) {
1204 handle - in a transliteration (becomes a literal -)
1205 handle \132 octal characters
1206 handle 0x15 hex characters
1207 handle \cV (control V)
1208 handle printf backslashes (\f, \r, \n, etc)
1209 } (end switch)
1210 } (end if backslash)
1211 } (end while character to read)
1212
1213 */
1214
1215 STATIC char *
S_scan_const(pTHX_ char * start)1216 S_scan_const(pTHX_ char *start)
1217 {
1218 register char *send = PL_bufend; /* end of the constant */
1219 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1220 register char *s = start; /* start of the constant */
1221 register char *d = SvPVX(sv); /* destination for copies */
1222 bool dorange = FALSE; /* are we in a translit range? */
1223 bool didrange = FALSE; /* did we just finish a range? */
1224 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1225 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1226 UV uv;
1227
1228 const char *leaveit = /* set of acceptably-backslashed characters */
1229 PL_lex_inpat
1230 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
1231 : "";
1232
1233 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1234 /* If we are doing a trans and we know we want UTF8 set expectation */
1235 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1236 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1237 }
1238
1239
1240 while (s < send || dorange) {
1241 /* get transliterations out of the way (they're most literal) */
1242 if (PL_lex_inwhat == OP_TRANS) {
1243 /* expand a range A-Z to the full set of characters. AIE! */
1244 if (dorange) {
1245 I32 i; /* current expanded character */
1246 I32 min; /* first character in range */
1247 I32 max; /* last character in range */
1248
1249 if (has_utf8) {
1250 char *c = (char*)utf8_hop((U8*)d, -1);
1251 char *e = d++;
1252 while (e-- > c)
1253 *(e + 1) = *e;
1254 *c = (char)UTF_TO_NATIVE(0xff);
1255 /* mark the range as done, and continue */
1256 dorange = FALSE;
1257 didrange = TRUE;
1258 continue;
1259 }
1260
1261 i = d - SvPVX(sv); /* remember current offset */
1262 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1263 d = SvPVX(sv) + i; /* refresh d after realloc */
1264 d -= 2; /* eat the first char and the - */
1265
1266 min = (U8)*d; /* first char in range */
1267 max = (U8)d[1]; /* last char in range */
1268
1269 if (min > max) {
1270 Perl_croak(aTHX_
1271 "Invalid range \"%c-%c\" in transliteration operator",
1272 (char)min, (char)max);
1273 }
1274
1275 #ifdef EBCDIC
1276 if ((isLOWER(min) && isLOWER(max)) ||
1277 (isUPPER(min) && isUPPER(max))) {
1278 if (isLOWER(min)) {
1279 for (i = min; i <= max; i++)
1280 if (isLOWER(i))
1281 *d++ = NATIVE_TO_NEED(has_utf8,i);
1282 } else {
1283 for (i = min; i <= max; i++)
1284 if (isUPPER(i))
1285 *d++ = NATIVE_TO_NEED(has_utf8,i);
1286 }
1287 }
1288 else
1289 #endif
1290 for (i = min; i <= max; i++)
1291 *d++ = (char)i;
1292
1293 /* mark the range as done, and continue */
1294 dorange = FALSE;
1295 didrange = TRUE;
1296 continue;
1297 }
1298
1299 /* range begins (ignore - as first or last char) */
1300 else if (*s == '-' && s+1 < send && s != start) {
1301 if (didrange) {
1302 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1303 }
1304 if (has_utf8) {
1305 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1306 s++;
1307 continue;
1308 }
1309 dorange = TRUE;
1310 s++;
1311 }
1312 else {
1313 didrange = FALSE;
1314 }
1315 }
1316
1317 /* if we get here, we're not doing a transliteration */
1318
1319 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1320 except for the last char, which will be done separately. */
1321 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1322 if (s[2] == '#') {
1323 while (s+1 < send && *s != ')')
1324 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1325 }
1326 else if (s[2] == '{' /* This should match regcomp.c */
1327 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1328 {
1329 I32 count = 1;
1330 char *regparse = s + (s[2] == '{' ? 3 : 4);
1331 char c;
1332
1333 while (count && (c = *regparse)) {
1334 if (c == '\\' && regparse[1])
1335 regparse++;
1336 else if (c == '{')
1337 count++;
1338 else if (c == '}')
1339 count--;
1340 regparse++;
1341 }
1342 if (*regparse != ')')
1343 regparse--; /* Leave one char for continuation. */
1344 while (s < regparse)
1345 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1346 }
1347 }
1348
1349 /* likewise skip #-initiated comments in //x patterns */
1350 else if (*s == '#' && PL_lex_inpat &&
1351 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1352 while (s+1 < send && *s != '\n')
1353 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1354 }
1355
1356 /* check for embedded arrays
1357 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1358 */
1359 else if (*s == '@' && s[1]
1360 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1361 break;
1362
1363 /* check for embedded scalars. only stop if we're sure it's a
1364 variable.
1365 */
1366 else if (*s == '$') {
1367 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1368 break;
1369 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1370 break; /* in regexp, $ might be tail anchor */
1371 }
1372
1373 /* End of else if chain - OP_TRANS rejoin rest */
1374
1375 /* backslashes */
1376 if (*s == '\\' && s+1 < send) {
1377 s++;
1378
1379 /* some backslashes we leave behind */
1380 if (*leaveit && *s && strchr(leaveit, *s)) {
1381 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1382 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1383 continue;
1384 }
1385
1386 /* deprecate \1 in strings and substitution replacements */
1387 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1388 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1389 {
1390 if (ckWARN(WARN_SYNTAX))
1391 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1392 *--s = '$';
1393 break;
1394 }
1395
1396 /* string-change backslash escapes */
1397 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1398 --s;
1399 break;
1400 }
1401
1402 /* if we get here, it's either a quoted -, or a digit */
1403 switch (*s) {
1404
1405 /* quoted - in transliterations */
1406 case '-':
1407 if (PL_lex_inwhat == OP_TRANS) {
1408 *d++ = *s++;
1409 continue;
1410 }
1411 /* FALL THROUGH */
1412 default:
1413 {
1414 if (ckWARN(WARN_MISC) &&
1415 isALNUM(*s) &&
1416 *s != '_')
1417 Perl_warner(aTHX_ packWARN(WARN_MISC),
1418 "Unrecognized escape \\%c passed through",
1419 *s);
1420 /* default action is to copy the quoted character */
1421 goto default_action;
1422 }
1423
1424 /* \132 indicates an octal constant */
1425 case '0': case '1': case '2': case '3':
1426 case '4': case '5': case '6': case '7':
1427 {
1428 I32 flags = 0;
1429 STRLEN len = 3;
1430 uv = grok_oct(s, &len, &flags, NULL);
1431 s += len;
1432 }
1433 goto NUM_ESCAPE_INSERT;
1434
1435 /* \x24 indicates a hex constant */
1436 case 'x':
1437 ++s;
1438 if (*s == '{') {
1439 char* e = strchr(s, '}');
1440 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1441 PERL_SCAN_DISALLOW_PREFIX;
1442 STRLEN len;
1443
1444 ++s;
1445 if (!e) {
1446 yyerror("Missing right brace on \\x{}");
1447 continue;
1448 }
1449 len = e - s;
1450 uv = grok_hex(s, &len, &flags, NULL);
1451 s = e + 1;
1452 }
1453 else {
1454 {
1455 STRLEN len = 2;
1456 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1457 uv = grok_hex(s, &len, &flags, NULL);
1458 s += len;
1459 }
1460 }
1461
1462 NUM_ESCAPE_INSERT:
1463 /* Insert oct or hex escaped character.
1464 * There will always enough room in sv since such
1465 * escapes will be longer than any UTF-8 sequence
1466 * they can end up as. */
1467
1468 /* We need to map to chars to ASCII before doing the tests
1469 to cover EBCDIC
1470 */
1471 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1472 if (!has_utf8 && uv > 255) {
1473 /* Might need to recode whatever we have
1474 * accumulated so far if it contains any
1475 * hibit chars.
1476 *
1477 * (Can't we keep track of that and avoid
1478 * this rescan? --jhi)
1479 */
1480 int hicount = 0;
1481 U8 *c;
1482 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1483 if (!NATIVE_IS_INVARIANT(*c)) {
1484 hicount++;
1485 }
1486 }
1487 if (hicount) {
1488 STRLEN offset = d - SvPVX(sv);
1489 U8 *src, *dst;
1490 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1491 src = (U8 *)d - 1;
1492 dst = src+hicount;
1493 d += hicount;
1494 while (src >= (U8 *)SvPVX(sv)) {
1495 if (!NATIVE_IS_INVARIANT(*src)) {
1496 U8 ch = NATIVE_TO_ASCII(*src);
1497 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1498 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1499 }
1500 else {
1501 *dst-- = *src;
1502 }
1503 src--;
1504 }
1505 }
1506 }
1507
1508 if (has_utf8 || uv > 255) {
1509 d = (char*)uvchr_to_utf8((U8*)d, uv);
1510 has_utf8 = TRUE;
1511 if (PL_lex_inwhat == OP_TRANS &&
1512 PL_sublex_info.sub_op) {
1513 PL_sublex_info.sub_op->op_private |=
1514 (PL_lex_repl ? OPpTRANS_FROM_UTF
1515 : OPpTRANS_TO_UTF);
1516 }
1517 }
1518 else {
1519 *d++ = (char)uv;
1520 }
1521 }
1522 else {
1523 *d++ = (char) uv;
1524 }
1525 continue;
1526
1527 /* \N{LATIN SMALL LETTER A} is a named character */
1528 case 'N':
1529 ++s;
1530 if (*s == '{') {
1531 char* e = strchr(s, '}');
1532 SV *res;
1533 STRLEN len;
1534 char *str;
1535
1536 if (!e) {
1537 yyerror("Missing right brace on \\N{}");
1538 e = s - 1;
1539 goto cont_scan;
1540 }
1541 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1542 /* \N{U+...} */
1543 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1544 PERL_SCAN_DISALLOW_PREFIX;
1545 s += 3;
1546 len = e - s;
1547 uv = grok_hex(s, &len, &flags, NULL);
1548 s = e + 1;
1549 goto NUM_ESCAPE_INSERT;
1550 }
1551 res = newSVpvn(s + 1, e - s - 1);
1552 res = new_constant( Nullch, 0, "charnames",
1553 res, Nullsv, "\\N{...}" );
1554 if (has_utf8)
1555 sv_utf8_upgrade(res);
1556 str = SvPV(res,len);
1557 #ifdef EBCDIC_NEVER_MIND
1558 /* charnames uses pack U and that has been
1559 * recently changed to do the below uni->native
1560 * mapping, so this would be redundant (and wrong,
1561 * the code point would be doubly converted).
1562 * But leave this in just in case the pack U change
1563 * gets revoked, but the semantics is still
1564 * desireable for charnames. --jhi */
1565 {
1566 UV uv = utf8_to_uvchr((U8*)str, 0);
1567
1568 if (uv < 0x100) {
1569 U8 tmpbuf[UTF8_MAXLEN+1], *d;
1570
1571 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1572 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1573 str = SvPV(res, len);
1574 }
1575 }
1576 #endif
1577 if (!has_utf8 && SvUTF8(res)) {
1578 char *ostart = SvPVX(sv);
1579 SvCUR_set(sv, d - ostart);
1580 SvPOK_on(sv);
1581 *d = '\0';
1582 sv_utf8_upgrade(sv);
1583 /* this just broke our allocation above... */
1584 SvGROW(sv, (STRLEN)(send - start));
1585 d = SvPVX(sv) + SvCUR(sv);
1586 has_utf8 = TRUE;
1587 }
1588 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1589 char *odest = SvPVX(sv);
1590
1591 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1592 d = SvPVX(sv) + (d - odest);
1593 }
1594 Copy(str, d, len, char);
1595 d += len;
1596 SvREFCNT_dec(res);
1597 cont_scan:
1598 s = e + 1;
1599 }
1600 else
1601 yyerror("Missing braces on \\N{}");
1602 continue;
1603
1604 /* \c is a control character */
1605 case 'c':
1606 s++;
1607 if (s < send) {
1608 U8 c = *s++;
1609 #ifdef EBCDIC
1610 if (isLOWER(c))
1611 c = toUPPER(c);
1612 #endif
1613 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1614 }
1615 else {
1616 yyerror("Missing control char name in \\c");
1617 }
1618 continue;
1619
1620 /* printf-style backslashes, formfeeds, newlines, etc */
1621 case 'b':
1622 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1623 break;
1624 case 'n':
1625 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1626 break;
1627 case 'r':
1628 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1629 break;
1630 case 'f':
1631 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1632 break;
1633 case 't':
1634 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1635 break;
1636 case 'e':
1637 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1638 break;
1639 case 'a':
1640 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1641 break;
1642 } /* end switch */
1643
1644 s++;
1645 continue;
1646 } /* end if (backslash) */
1647
1648 default_action:
1649 /* If we started with encoded form, or already know we want it
1650 and then encode the next character */
1651 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1652 STRLEN len = 1;
1653 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1654 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1655 s += len;
1656 if (need > len) {
1657 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1658 STRLEN off = d - SvPVX(sv);
1659 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1660 }
1661 d = (char*)uvchr_to_utf8((U8*)d, uv);
1662 has_utf8 = TRUE;
1663 }
1664 else {
1665 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1666 }
1667 } /* while loop to process each character */
1668
1669 /* terminate the string and set up the sv */
1670 *d = '\0';
1671 SvCUR_set(sv, d - SvPVX(sv));
1672 if (SvCUR(sv) >= SvLEN(sv))
1673 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
1674
1675 SvPOK_on(sv);
1676 if (PL_encoding && !has_utf8) {
1677 sv_recode_to_utf8(sv, PL_encoding);
1678 if (SvUTF8(sv))
1679 has_utf8 = TRUE;
1680 }
1681 if (has_utf8) {
1682 SvUTF8_on(sv);
1683 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1684 PL_sublex_info.sub_op->op_private |=
1685 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1686 }
1687 }
1688
1689 /* shrink the sv if we allocated more than we used */
1690 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1691 SvLEN_set(sv, SvCUR(sv) + 1);
1692 Renew(SvPVX(sv), SvLEN(sv), char);
1693 }
1694
1695 /* return the substring (via yylval) only if we parsed anything */
1696 if (s > PL_bufptr) {
1697 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1698 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1699 sv, Nullsv,
1700 ( PL_lex_inwhat == OP_TRANS
1701 ? "tr"
1702 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1703 ? "s"
1704 : "qq")));
1705 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1706 } else
1707 SvREFCNT_dec(sv);
1708 return s;
1709 }
1710
1711 /* S_intuit_more
1712 * Returns TRUE if there's more to the expression (e.g., a subscript),
1713 * FALSE otherwise.
1714 *
1715 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1716 *
1717 * ->[ and ->{ return TRUE
1718 * { and [ outside a pattern are always subscripts, so return TRUE
1719 * if we're outside a pattern and it's not { or [, then return FALSE
1720 * if we're in a pattern and the first char is a {
1721 * {4,5} (any digits around the comma) returns FALSE
1722 * if we're in a pattern and the first char is a [
1723 * [] returns FALSE
1724 * [SOMETHING] has a funky algorithm to decide whether it's a
1725 * character class or not. It has to deal with things like
1726 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1727 * anything else returns TRUE
1728 */
1729
1730 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
1731
1732 STATIC int
S_intuit_more(pTHX_ register char * s)1733 S_intuit_more(pTHX_ register char *s)
1734 {
1735 if (PL_lex_brackets)
1736 return TRUE;
1737 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1738 return TRUE;
1739 if (*s != '{' && *s != '[')
1740 return FALSE;
1741 if (!PL_lex_inpat)
1742 return TRUE;
1743
1744 /* In a pattern, so maybe we have {n,m}. */
1745 if (*s == '{') {
1746 s++;
1747 if (!isDIGIT(*s))
1748 return TRUE;
1749 while (isDIGIT(*s))
1750 s++;
1751 if (*s == ',')
1752 s++;
1753 while (isDIGIT(*s))
1754 s++;
1755 if (*s == '}')
1756 return FALSE;
1757 return TRUE;
1758
1759 }
1760
1761 /* On the other hand, maybe we have a character class */
1762
1763 s++;
1764 if (*s == ']' || *s == '^')
1765 return FALSE;
1766 else {
1767 /* this is terrifying, and it works */
1768 int weight = 2; /* let's weigh the evidence */
1769 char seen[256];
1770 unsigned char un_char = 255, last_un_char;
1771 char *send = strchr(s,']');
1772 char tmpbuf[sizeof PL_tokenbuf * 4];
1773
1774 if (!send) /* has to be an expression */
1775 return TRUE;
1776
1777 Zero(seen,256,char);
1778 if (*s == '$')
1779 weight -= 3;
1780 else if (isDIGIT(*s)) {
1781 if (s[1] != ']') {
1782 if (isDIGIT(s[1]) && s[2] == ']')
1783 weight -= 10;
1784 }
1785 else
1786 weight -= 100;
1787 }
1788 for (; s < send; s++) {
1789 last_un_char = un_char;
1790 un_char = (unsigned char)*s;
1791 switch (*s) {
1792 case '@':
1793 case '&':
1794 case '$':
1795 weight -= seen[un_char] * 10;
1796 if (isALNUM_lazy_if(s+1,UTF)) {
1797 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
1798 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
1799 weight -= 100;
1800 else
1801 weight -= 10;
1802 }
1803 else if (*s == '$' && s[1] &&
1804 strchr("[#!%*<>()-=",s[1])) {
1805 if (/*{*/ strchr("])} =",s[2]))
1806 weight -= 10;
1807 else
1808 weight -= 1;
1809 }
1810 break;
1811 case '\\':
1812 un_char = 254;
1813 if (s[1]) {
1814 if (strchr("wds]",s[1]))
1815 weight += 100;
1816 else if (seen['\''] || seen['"'])
1817 weight += 1;
1818 else if (strchr("rnftbxcav",s[1]))
1819 weight += 40;
1820 else if (isDIGIT(s[1])) {
1821 weight += 40;
1822 while (s[1] && isDIGIT(s[1]))
1823 s++;
1824 }
1825 }
1826 else
1827 weight += 100;
1828 break;
1829 case '-':
1830 if (s[1] == '\\')
1831 weight += 50;
1832 if (strchr("aA01! ",last_un_char))
1833 weight += 30;
1834 if (strchr("zZ79~",s[1]))
1835 weight += 30;
1836 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1837 weight -= 5; /* cope with negative subscript */
1838 break;
1839 default:
1840 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
1841 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1842 char *d = tmpbuf;
1843 while (isALPHA(*s))
1844 *d++ = *s++;
1845 *d = '\0';
1846 if (keyword(tmpbuf, d - tmpbuf))
1847 weight -= 150;
1848 }
1849 if (un_char == last_un_char + 1)
1850 weight += 5;
1851 weight -= seen[un_char];
1852 break;
1853 }
1854 seen[un_char]++;
1855 }
1856 if (weight >= 0) /* probably a character class */
1857 return FALSE;
1858 }
1859
1860 return TRUE;
1861 }
1862
1863 /*
1864 * S_intuit_method
1865 *
1866 * Does all the checking to disambiguate
1867 * foo bar
1868 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1869 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1870 *
1871 * First argument is the stuff after the first token, e.g. "bar".
1872 *
1873 * Not a method if bar is a filehandle.
1874 * Not a method if foo is a subroutine prototyped to take a filehandle.
1875 * Not a method if it's really "Foo $bar"
1876 * Method if it's "foo $bar"
1877 * Not a method if it's really "print foo $bar"
1878 * Method if it's really "foo package::" (interpreted as package->foo)
1879 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
1880 * Not a method if bar is a filehandle or package, but is quoted with
1881 * =>
1882 */
1883
1884 STATIC int
S_intuit_method(pTHX_ char * start,GV * gv)1885 S_intuit_method(pTHX_ char *start, GV *gv)
1886 {
1887 char *s = start + (*start == '$');
1888 char tmpbuf[sizeof PL_tokenbuf];
1889 STRLEN len;
1890 GV* indirgv;
1891
1892 if (gv) {
1893 CV *cv;
1894 if (GvIO(gv))
1895 return 0;
1896 if ((cv = GvCVu(gv))) {
1897 char *proto = SvPVX(cv);
1898 if (proto) {
1899 if (*proto == ';')
1900 proto++;
1901 if (*proto == '*')
1902 return 0;
1903 }
1904 } else
1905 gv = 0;
1906 }
1907 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
1908 /* start is the beginning of the possible filehandle/object,
1909 * and s is the end of it
1910 * tmpbuf is a copy of it
1911 */
1912
1913 if (*start == '$') {
1914 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
1915 return 0;
1916 s = skipspace(s);
1917 PL_bufptr = start;
1918 PL_expect = XREF;
1919 return *s == '(' ? FUNCMETH : METHOD;
1920 }
1921 if (!keyword(tmpbuf, len)) {
1922 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1923 len -= 2;
1924 tmpbuf[len] = '\0';
1925 goto bare_package;
1926 }
1927 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
1928 if (indirgv && GvCVu(indirgv))
1929 return 0;
1930 /* filehandle or package name makes it a method */
1931 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
1932 s = skipspace(s);
1933 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
1934 return 0; /* no assumptions -- "=>" quotes bearword */
1935 bare_package:
1936 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
1937 newSVpvn(tmpbuf,len));
1938 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1939 PL_expect = XTERM;
1940 force_next(WORD);
1941 PL_bufptr = s;
1942 return *s == '(' ? FUNCMETH : METHOD;
1943 }
1944 }
1945 return 0;
1946 }
1947
1948 /*
1949 * S_incl_perldb
1950 * Return a string of Perl code to load the debugger. If PERL5DB
1951 * is set, it will return the contents of that, otherwise a
1952 * compile-time require of perl5db.pl.
1953 */
1954
1955 STATIC char*
S_incl_perldb(pTHX)1956 S_incl_perldb(pTHX)
1957 {
1958 if (PL_perldb) {
1959 char *pdb = PerlEnv_getenv("PERL5DB");
1960
1961 if (pdb)
1962 return pdb;
1963 SETERRNO(0,SS_NORMAL);
1964 return "BEGIN { require 'perl5db.pl' }";
1965 }
1966 return "";
1967 }
1968
1969
1970 /* Encoded script support. filter_add() effectively inserts a
1971 * 'pre-processing' function into the current source input stream.
1972 * Note that the filter function only applies to the current source file
1973 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1974 *
1975 * The datasv parameter (which may be NULL) can be used to pass
1976 * private data to this instance of the filter. The filter function
1977 * can recover the SV using the FILTER_DATA macro and use it to
1978 * store private buffers and state information.
1979 *
1980 * The supplied datasv parameter is upgraded to a PVIO type
1981 * and the IoDIRP/IoANY field is used to store the function pointer,
1982 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
1983 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1984 * private use must be set using malloc'd pointers.
1985 */
1986
1987 SV *
Perl_filter_add(pTHX_ filter_t funcp,SV * datasv)1988 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
1989 {
1990 if (!funcp)
1991 return Nullsv;
1992
1993 if (!PL_rsfp_filters)
1994 PL_rsfp_filters = newAV();
1995 if (!datasv)
1996 datasv = NEWSV(255,0);
1997 if (!SvUPGRADE(datasv, SVt_PVIO))
1998 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
1999 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
2000 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2001 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2002 (void*)funcp, SvPV_nolen(datasv)));
2003 av_unshift(PL_rsfp_filters, 1);
2004 av_store(PL_rsfp_filters, 0, datasv) ;
2005 return(datasv);
2006 }
2007
2008
2009 /* Delete most recently added instance of this filter function. */
2010 void
Perl_filter_del(pTHX_ filter_t funcp)2011 Perl_filter_del(pTHX_ filter_t funcp)
2012 {
2013 SV *datasv;
2014 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
2015 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2016 return;
2017 /* if filter is on top of stack (usual case) just pop it off */
2018 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2019 if (IoANY(datasv) == (void *)funcp) {
2020 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2021 IoANY(datasv) = (void *)NULL;
2022 sv_free(av_pop(PL_rsfp_filters));
2023
2024 return;
2025 }
2026 /* we need to search for the correct entry and clear it */
2027 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2028 }
2029
2030
2031 /* Invoke the n'th filter function for the current rsfp. */
2032 I32
Perl_filter_read(pTHX_ int idx,SV * buf_sv,int maxlen)2033 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2034
2035
2036 /* 0 = read one text line */
2037 {
2038 filter_t funcp;
2039 SV *datasv = NULL;
2040
2041 if (!PL_rsfp_filters)
2042 return -1;
2043 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
2044 /* Provide a default input filter to make life easy. */
2045 /* Note that we append to the line. This is handy. */
2046 DEBUG_P(PerlIO_printf(Perl_debug_log,
2047 "filter_read %d: from rsfp\n", idx));
2048 if (maxlen) {
2049 /* Want a block */
2050 int len ;
2051 int old_len = SvCUR(buf_sv) ;
2052
2053 /* ensure buf_sv is large enough */
2054 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2055 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2056 if (PerlIO_error(PL_rsfp))
2057 return -1; /* error */
2058 else
2059 return 0 ; /* end of file */
2060 }
2061 SvCUR_set(buf_sv, old_len + len) ;
2062 } else {
2063 /* Want a line */
2064 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2065 if (PerlIO_error(PL_rsfp))
2066 return -1; /* error */
2067 else
2068 return 0 ; /* end of file */
2069 }
2070 }
2071 return SvCUR(buf_sv);
2072 }
2073 /* Skip this filter slot if filter has been deleted */
2074 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
2075 DEBUG_P(PerlIO_printf(Perl_debug_log,
2076 "filter_read %d: skipped (filter deleted)\n",
2077 idx));
2078 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2079 }
2080 /* Get function pointer hidden within datasv */
2081 funcp = (filter_t)IoANY(datasv);
2082 DEBUG_P(PerlIO_printf(Perl_debug_log,
2083 "filter_read %d: via function %p (%s)\n",
2084 idx, (void*)funcp, SvPV_nolen(datasv)));
2085 /* Call function. The function is expected to */
2086 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2087 /* Return: <0:error, =0:eof, >0:not eof */
2088 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2089 }
2090
2091 STATIC char *
S_filter_gets(pTHX_ register SV * sv,register PerlIO * fp,STRLEN append)2092 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2093 {
2094 #ifdef PERL_CR_FILTER
2095 if (!PL_rsfp_filters) {
2096 filter_add(S_cr_textfilter,NULL);
2097 }
2098 #endif
2099 if (PL_rsfp_filters) {
2100
2101 if (!append)
2102 SvCUR_set(sv, 0); /* start with empty line */
2103 if (FILTER_READ(0, sv, 0) > 0)
2104 return ( SvPVX(sv) ) ;
2105 else
2106 return Nullch ;
2107 }
2108 else
2109 return (sv_gets(sv, fp, append));
2110 }
2111
2112 STATIC HV *
S_find_in_my_stash(pTHX_ char * pkgname,I32 len)2113 S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
2114 {
2115 GV *gv;
2116
2117 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2118 return PL_curstash;
2119
2120 if (len > 2 &&
2121 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2122 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2123 {
2124 return GvHV(gv); /* Foo:: */
2125 }
2126
2127 /* use constant CLASS => 'MyClass' */
2128 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2129 SV *sv;
2130 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2131 pkgname = SvPV_nolen(sv);
2132 }
2133 }
2134
2135 return gv_stashpv(pkgname, FALSE);
2136 }
2137
2138 #ifdef DEBUGGING
2139 static char* exp_name[] =
2140 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2141 "ATTRTERM", "TERMBLOCK"
2142 };
2143 #endif
2144
2145 /*
2146 yylex
2147
2148 Works out what to call the token just pulled out of the input
2149 stream. The yacc parser takes care of taking the ops we return and
2150 stitching them into a tree.
2151
2152 Returns:
2153 PRIVATEREF
2154
2155 Structure:
2156 if read an identifier
2157 if we're in a my declaration
2158 croak if they tried to say my($foo::bar)
2159 build the ops for a my() declaration
2160 if it's an access to a my() variable
2161 are we in a sort block?
2162 croak if my($a); $a <=> $b
2163 build ops for access to a my() variable
2164 if in a dq string, and they've said @foo and we can't find @foo
2165 croak
2166 build ops for a bareword
2167 if we already built the token before, use it.
2168 */
2169
2170 #ifdef USE_PURE_BISON
2171 int
Perl_yylex_r(pTHX_ YYSTYPE * lvalp,int * lcharp)2172 Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
2173 {
2174 int r;
2175
2176 yyactlevel++;
2177 yylval_pointer[yyactlevel] = lvalp;
2178 yychar_pointer[yyactlevel] = lcharp;
2179 if (yyactlevel >= YYMAXLEVEL)
2180 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
2181
2182 r = Perl_yylex(aTHX);
2183
2184 if (yyactlevel > 0)
2185 yyactlevel--;
2186
2187 return r;
2188 }
2189 #endif
2190
2191 #ifdef __SC__
2192 #pragma segment Perl_yylex
2193 #endif
2194 int
Perl_yylex(pTHX)2195 Perl_yylex(pTHX)
2196 {
2197 register char *s;
2198 register char *d;
2199 register I32 tmp;
2200 STRLEN len;
2201 GV *gv = Nullgv;
2202 GV **gvp = 0;
2203 bool bof = FALSE;
2204 I32 orig_keyword = 0;
2205
2206 /* check if there's an identifier for us to look at */
2207 if (PL_pending_ident)
2208 return S_pending_ident(aTHX);
2209
2210 /* no identifier pending identification */
2211
2212 switch (PL_lex_state) {
2213 #ifdef COMMENTARY
2214 case LEX_NORMAL: /* Some compilers will produce faster */
2215 case LEX_INTERPNORMAL: /* code if we comment these out. */
2216 break;
2217 #endif
2218
2219 /* when we've already built the next token, just pull it out of the queue */
2220 case LEX_KNOWNEXT:
2221 PL_nexttoke--;
2222 yylval = PL_nextval[PL_nexttoke];
2223 if (!PL_nexttoke) {
2224 PL_lex_state = PL_lex_defer;
2225 PL_expect = PL_lex_expect;
2226 PL_lex_defer = LEX_NORMAL;
2227 }
2228 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2229 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2230 (IV)PL_nexttype[PL_nexttoke]); });
2231
2232 return(PL_nexttype[PL_nexttoke]);
2233
2234 /* interpolated case modifiers like \L \U, including \Q and \E.
2235 when we get here, PL_bufptr is at the \
2236 */
2237 case LEX_INTERPCASEMOD:
2238 #ifdef DEBUGGING
2239 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2240 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2241 #endif
2242 /* handle \E or end of string */
2243 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2244 char oldmod;
2245
2246 /* if at a \E */
2247 if (PL_lex_casemods) {
2248 oldmod = PL_lex_casestack[--PL_lex_casemods];
2249 PL_lex_casestack[PL_lex_casemods] = '\0';
2250
2251 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2252 PL_bufptr += 2;
2253 PL_lex_state = LEX_INTERPCONCAT;
2254 }
2255 return ')';
2256 }
2257 if (PL_bufptr != PL_bufend)
2258 PL_bufptr += 2;
2259 PL_lex_state = LEX_INTERPCONCAT;
2260 return yylex();
2261 }
2262 else {
2263 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2264 "### Saw case modifier at '%s'\n", PL_bufptr); });
2265 s = PL_bufptr + 1;
2266 if (s[1] == '\\' && s[2] == 'E') {
2267 PL_bufptr = s + 3;
2268 PL_lex_state = LEX_INTERPCONCAT;
2269 return yylex();
2270 }
2271 else {
2272 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2273 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2274 if (strchr("LU", *s) &&
2275 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2276 PL_lex_casestack[--PL_lex_casemods] = '\0';
2277 return ')';
2278 }
2279 if (PL_lex_casemods > 10)
2280 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2281 PL_lex_casestack[PL_lex_casemods++] = *s;
2282 PL_lex_casestack[PL_lex_casemods] = '\0';
2283 PL_lex_state = LEX_INTERPCONCAT;
2284 PL_nextval[PL_nexttoke].ival = 0;
2285 force_next('(');
2286 if (*s == 'l')
2287 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2288 else if (*s == 'u')
2289 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2290 else if (*s == 'L')
2291 PL_nextval[PL_nexttoke].ival = OP_LC;
2292 else if (*s == 'U')
2293 PL_nextval[PL_nexttoke].ival = OP_UC;
2294 else if (*s == 'Q')
2295 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2296 else
2297 Perl_croak(aTHX_ "panic: yylex");
2298 PL_bufptr = s + 1;
2299 }
2300 force_next(FUNC);
2301 if (PL_lex_starts) {
2302 s = PL_bufptr;
2303 PL_lex_starts = 0;
2304 Aop(OP_CONCAT);
2305 }
2306 else
2307 return yylex();
2308 }
2309
2310 case LEX_INTERPPUSH:
2311 return sublex_push();
2312
2313 case LEX_INTERPSTART:
2314 if (PL_bufptr == PL_bufend)
2315 return sublex_done();
2316 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2317 "### Interpolated variable at '%s'\n", PL_bufptr); });
2318 PL_expect = XTERM;
2319 PL_lex_dojoin = (*PL_bufptr == '@');
2320 PL_lex_state = LEX_INTERPNORMAL;
2321 if (PL_lex_dojoin) {
2322 PL_nextval[PL_nexttoke].ival = 0;
2323 force_next(',');
2324 #ifdef USE_5005THREADS
2325 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2326 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
2327 force_next(PRIVATEREF);
2328 #else
2329 force_ident("\"", '$');
2330 #endif /* USE_5005THREADS */
2331 PL_nextval[PL_nexttoke].ival = 0;
2332 force_next('$');
2333 PL_nextval[PL_nexttoke].ival = 0;
2334 force_next('(');
2335 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2336 force_next(FUNC);
2337 }
2338 if (PL_lex_starts++) {
2339 s = PL_bufptr;
2340 Aop(OP_CONCAT);
2341 }
2342 return yylex();
2343
2344 case LEX_INTERPENDMAYBE:
2345 if (intuit_more(PL_bufptr)) {
2346 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2347 break;
2348 }
2349 /* FALL THROUGH */
2350
2351 case LEX_INTERPEND:
2352 if (PL_lex_dojoin) {
2353 PL_lex_dojoin = FALSE;
2354 PL_lex_state = LEX_INTERPCONCAT;
2355 return ')';
2356 }
2357 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2358 && SvEVALED(PL_lex_repl))
2359 {
2360 if (PL_bufptr != PL_bufend)
2361 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2362 PL_lex_repl = Nullsv;
2363 }
2364 /* FALLTHROUGH */
2365 case LEX_INTERPCONCAT:
2366 #ifdef DEBUGGING
2367 if (PL_lex_brackets)
2368 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2369 #endif
2370 if (PL_bufptr == PL_bufend)
2371 return sublex_done();
2372
2373 if (SvIVX(PL_linestr) == '\'') {
2374 SV *sv = newSVsv(PL_linestr);
2375 if (!PL_lex_inpat)
2376 sv = tokeq(sv);
2377 else if ( PL_hints & HINT_NEW_RE )
2378 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2379 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2380 s = PL_bufend;
2381 }
2382 else {
2383 s = scan_const(PL_bufptr);
2384 if (*s == '\\')
2385 PL_lex_state = LEX_INTERPCASEMOD;
2386 else
2387 PL_lex_state = LEX_INTERPSTART;
2388 }
2389
2390 if (s != PL_bufptr) {
2391 PL_nextval[PL_nexttoke] = yylval;
2392 PL_expect = XTERM;
2393 force_next(THING);
2394 if (PL_lex_starts++)
2395 Aop(OP_CONCAT);
2396 else {
2397 PL_bufptr = s;
2398 return yylex();
2399 }
2400 }
2401
2402 return yylex();
2403 case LEX_FORMLINE:
2404 PL_lex_state = LEX_NORMAL;
2405 s = scan_formline(PL_bufptr);
2406 if (!PL_lex_formbrack)
2407 goto rightbracket;
2408 OPERATOR(';');
2409 }
2410
2411 s = PL_bufptr;
2412 PL_oldoldbufptr = PL_oldbufptr;
2413 PL_oldbufptr = s;
2414 DEBUG_T( {
2415 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2416 exp_name[PL_expect], s);
2417 } );
2418
2419 retry:
2420 switch (*s) {
2421 default:
2422 if (isIDFIRST_lazy_if(s,UTF))
2423 goto keylookup;
2424 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2425 case 4:
2426 case 26:
2427 goto fake_eof; /* emulate EOF on ^D or ^Z */
2428 case 0:
2429 if (!PL_rsfp) {
2430 PL_last_uni = 0;
2431 PL_last_lop = 0;
2432 if (PL_lex_brackets) {
2433 if (PL_lex_formbrack)
2434 yyerror("Format not terminated");
2435 else
2436 yyerror("Missing right curly or square bracket");
2437 }
2438 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2439 "### Tokener got EOF\n");
2440 } );
2441 TOKEN(0);
2442 }
2443 if (s++ < PL_bufend)
2444 goto retry; /* ignore stray nulls */
2445 PL_last_uni = 0;
2446 PL_last_lop = 0;
2447 if (!PL_in_eval && !PL_preambled) {
2448 PL_preambled = TRUE;
2449 sv_setpv(PL_linestr,incl_perldb());
2450 if (SvCUR(PL_linestr))
2451 sv_catpv(PL_linestr,";");
2452 if (PL_preambleav){
2453 while(AvFILLp(PL_preambleav) >= 0) {
2454 SV *tmpsv = av_shift(PL_preambleav);
2455 sv_catsv(PL_linestr, tmpsv);
2456 sv_catpv(PL_linestr, ";");
2457 sv_free(tmpsv);
2458 }
2459 sv_free((SV*)PL_preambleav);
2460 PL_preambleav = NULL;
2461 }
2462 if (PL_minus_n || PL_minus_p) {
2463 sv_catpv(PL_linestr, "LINE: while (<>) {");
2464 if (PL_minus_l)
2465 sv_catpv(PL_linestr,"chomp;");
2466 if (PL_minus_a) {
2467 if (PL_minus_F) {
2468 if (strchr("/'\"", *PL_splitstr)
2469 && strchr(PL_splitstr + 1, *PL_splitstr))
2470 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2471 else {
2472 char delim;
2473 s = "'~#\200\1'"; /* surely one char is unused...*/
2474 while (s[1] && strchr(PL_splitstr, *s)) s++;
2475 delim = *s;
2476 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
2477 "q" + (delim == '\''), delim);
2478 for (s = PL_splitstr; *s; s++) {
2479 if (*s == '\\')
2480 sv_catpvn(PL_linestr, "\\", 1);
2481 sv_catpvn(PL_linestr, s, 1);
2482 }
2483 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
2484 }
2485 }
2486 else
2487 sv_catpv(PL_linestr,"our @F=split(' ');");
2488 }
2489 }
2490 sv_catpv(PL_linestr, "\n");
2491 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2492 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2493 PL_last_lop = PL_last_uni = Nullch;
2494 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2495 SV *sv = NEWSV(85,0);
2496
2497 sv_upgrade(sv, SVt_PVMG);
2498 sv_setsv(sv,PL_linestr);
2499 (void)SvIOK_on(sv);
2500 SvIVX(sv) = 0;
2501 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2502 }
2503 goto retry;
2504 }
2505 do {
2506 bof = PL_rsfp ? TRUE : FALSE;
2507 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2508 fake_eof:
2509 if (PL_rsfp) {
2510 if (PL_preprocess && !PL_in_eval)
2511 (void)PerlProc_pclose(PL_rsfp);
2512 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2513 PerlIO_clearerr(PL_rsfp);
2514 else
2515 (void)PerlIO_close(PL_rsfp);
2516 PL_rsfp = Nullfp;
2517 PL_doextract = FALSE;
2518 }
2519 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2520 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2521 sv_catpv(PL_linestr,";}");
2522 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2523 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2524 PL_last_lop = PL_last_uni = Nullch;
2525 PL_minus_n = PL_minus_p = 0;
2526 goto retry;
2527 }
2528 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2529 PL_last_lop = PL_last_uni = Nullch;
2530 sv_setpv(PL_linestr,"");
2531 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2532 }
2533 /* If it looks like the start of a BOM or raw UTF-16,
2534 * check if it in fact is. */
2535 else if (bof &&
2536 (*s == 0 ||
2537 *(U8*)s == 0xEF ||
2538 *(U8*)s >= 0xFE ||
2539 s[1] == 0)) {
2540 #ifdef PERLIO_IS_STDIO
2541 # ifdef __GNU_LIBRARY__
2542 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2543 # define FTELL_FOR_PIPE_IS_BROKEN
2544 # endif
2545 # else
2546 # ifdef __GLIBC__
2547 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2548 # define FTELL_FOR_PIPE_IS_BROKEN
2549 # endif
2550 # endif
2551 # endif
2552 #endif
2553 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2554 /* This loses the possibility to detect the bof
2555 * situation on perl -P when the libc5 is being used.
2556 * Workaround? Maybe attach some extra state to PL_rsfp?
2557 */
2558 if (!PL_preprocess)
2559 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2560 #else
2561 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2562 #endif
2563 if (bof) {
2564 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2565 s = swallow_bom((U8*)s);
2566 }
2567 }
2568 if (PL_doextract) {
2569 /* Incest with pod. */
2570 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2571 sv_setpv(PL_linestr, "");
2572 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2573 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2574 PL_last_lop = PL_last_uni = Nullch;
2575 PL_doextract = FALSE;
2576 }
2577 }
2578 incline(s);
2579 } while (PL_doextract);
2580 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2581 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2582 SV *sv = NEWSV(85,0);
2583
2584 sv_upgrade(sv, SVt_PVMG);
2585 sv_setsv(sv,PL_linestr);
2586 (void)SvIOK_on(sv);
2587 SvIVX(sv) = 0;
2588 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2589 }
2590 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2591 PL_last_lop = PL_last_uni = Nullch;
2592 if (CopLINE(PL_curcop) == 1) {
2593 while (s < PL_bufend && isSPACE(*s))
2594 s++;
2595 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2596 s++;
2597 d = Nullch;
2598 if (!PL_in_eval) {
2599 if (*s == '#' && *(s+1) == '!')
2600 d = s + 2;
2601 #ifdef ALTERNATE_SHEBANG
2602 else {
2603 static char as[] = ALTERNATE_SHEBANG;
2604 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2605 d = s + (sizeof(as) - 1);
2606 }
2607 #endif /* ALTERNATE_SHEBANG */
2608 }
2609 if (d) {
2610 char *ipath;
2611 char *ipathend;
2612
2613 while (isSPACE(*d))
2614 d++;
2615 ipath = d;
2616 while (*d && !isSPACE(*d))
2617 d++;
2618 ipathend = d;
2619
2620 #ifdef ARG_ZERO_IS_SCRIPT
2621 if (ipathend > ipath) {
2622 /*
2623 * HP-UX (at least) sets argv[0] to the script name,
2624 * which makes $^X incorrect. And Digital UNIX and Linux,
2625 * at least, set argv[0] to the basename of the Perl
2626 * interpreter. So, having found "#!", we'll set it right.
2627 */
2628 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
2629 assert(SvPOK(x) || SvGMAGICAL(x));
2630 if (sv_eq(x, CopFILESV(PL_curcop))) {
2631 sv_setpvn(x, ipath, ipathend - ipath);
2632 SvSETMAGIC(x);
2633 }
2634 else {
2635 STRLEN blen;
2636 STRLEN llen;
2637 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2638 char *lstart = SvPV(x,llen);
2639 if (llen < blen) {
2640 bstart += blen - llen;
2641 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2642 sv_setpvn(x, ipath, ipathend - ipath);
2643 SvSETMAGIC(x);
2644 }
2645 }
2646 }
2647 TAINT_NOT; /* $^X is always tainted, but that's OK */
2648 }
2649 #endif /* ARG_ZERO_IS_SCRIPT */
2650
2651 /*
2652 * Look for options.
2653 */
2654 d = instr(s,"perl -");
2655 if (!d) {
2656 d = instr(s,"perl");
2657 #if defined(DOSISH)
2658 /* avoid getting into infinite loops when shebang
2659 * line contains "Perl" rather than "perl" */
2660 if (!d) {
2661 for (d = ipathend-4; d >= ipath; --d) {
2662 if ((*d == 'p' || *d == 'P')
2663 && !ibcmp(d, "perl", 4))
2664 {
2665 break;
2666 }
2667 }
2668 if (d < ipath)
2669 d = Nullch;
2670 }
2671 #endif
2672 }
2673 #ifdef ALTERNATE_SHEBANG
2674 /*
2675 * If the ALTERNATE_SHEBANG on this system starts with a
2676 * character that can be part of a Perl expression, then if
2677 * we see it but not "perl", we're probably looking at the
2678 * start of Perl code, not a request to hand off to some
2679 * other interpreter. Similarly, if "perl" is there, but
2680 * not in the first 'word' of the line, we assume the line
2681 * contains the start of the Perl program.
2682 */
2683 if (d && *s != '#') {
2684 char *c = ipath;
2685 while (*c && !strchr("; \t\r\n\f\v#", *c))
2686 c++;
2687 if (c < d)
2688 d = Nullch; /* "perl" not in first word; ignore */
2689 else
2690 *s = '#'; /* Don't try to parse shebang line */
2691 }
2692 #endif /* ALTERNATE_SHEBANG */
2693 #ifndef MACOS_TRADITIONAL
2694 if (!d &&
2695 *s == '#' &&
2696 ipathend > ipath &&
2697 !PL_minus_c &&
2698 !instr(s,"indir") &&
2699 instr(PL_origargv[0],"perl"))
2700 {
2701 char **newargv;
2702
2703 *ipathend = '\0';
2704 s = ipathend + 1;
2705 while (s < PL_bufend && isSPACE(*s))
2706 s++;
2707 if (s < PL_bufend) {
2708 Newz(899,newargv,PL_origargc+3,char*);
2709 newargv[1] = s;
2710 while (s < PL_bufend && !isSPACE(*s))
2711 s++;
2712 *s = '\0';
2713 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2714 }
2715 else
2716 newargv = PL_origargv;
2717 newargv[0] = ipath;
2718 PERL_FPU_PRE_EXEC
2719 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2720 PERL_FPU_POST_EXEC
2721 Perl_croak(aTHX_ "Can't exec %s", ipath);
2722 }
2723 #endif
2724 if (d) {
2725 U32 oldpdb = PL_perldb;
2726 bool oldn = PL_minus_n;
2727 bool oldp = PL_minus_p;
2728
2729 while (*d && !isSPACE(*d)) d++;
2730 while (SPACE_OR_TAB(*d)) d++;
2731
2732 if (*d++ == '-') {
2733 bool switches_done = PL_doswitches;
2734 do {
2735 if (*d == 'M' || *d == 'm') {
2736 char *m = d;
2737 while (*d && !isSPACE(*d)) d++;
2738 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
2739 (int)(d - m), m);
2740 }
2741 d = moreswitches(d);
2742 } while (d);
2743 if (PL_doswitches && !switches_done) {
2744 int argc = PL_origargc;
2745 char **argv = PL_origargv;
2746 do {
2747 argc--,argv++;
2748 } while (argc && argv[0][0] == '-' && argv[0][1]);
2749 init_argv_symbols(argc,argv);
2750 }
2751 if ((PERLDB_LINE && !oldpdb) ||
2752 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
2753 /* if we have already added "LINE: while (<>) {",
2754 we must not do it again */
2755 {
2756 sv_setpv(PL_linestr, "");
2757 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2758 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2759 PL_last_lop = PL_last_uni = Nullch;
2760 PL_preambled = FALSE;
2761 if (PERLDB_LINE)
2762 (void)gv_fetchfile(PL_origfilename);
2763 goto retry;
2764 }
2765 if (PL_doswitches && !switches_done) {
2766 int argc = PL_origargc;
2767 char **argv = PL_origargv;
2768 do {
2769 argc--,argv++;
2770 } while (argc && argv[0][0] == '-' && argv[0][1]);
2771 init_argv_symbols(argc,argv);
2772 }
2773 }
2774 }
2775 }
2776 }
2777 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2778 PL_bufptr = s;
2779 PL_lex_state = LEX_FORMLINE;
2780 return yylex();
2781 }
2782 goto retry;
2783 case '\r':
2784 #ifdef PERL_STRICT_CR
2785 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2786 Perl_croak(aTHX_
2787 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
2788 #endif
2789 case ' ': case '\t': case '\f': case 013:
2790 #ifdef MACOS_TRADITIONAL
2791 case '\312':
2792 #endif
2793 s++;
2794 goto retry;
2795 case '#':
2796 case '\n':
2797 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2798 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2799 /* handle eval qq[#line 1 "foo"\n ...] */
2800 CopLINE_dec(PL_curcop);
2801 incline(s);
2802 }
2803 d = PL_bufend;
2804 while (s < d && *s != '\n')
2805 s++;
2806 if (s < d)
2807 s++;
2808 else if (s > d) /* Found by Ilya: feed random input to Perl. */
2809 Perl_croak(aTHX_ "panic: input overflow");
2810 incline(s);
2811 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2812 PL_bufptr = s;
2813 PL_lex_state = LEX_FORMLINE;
2814 return yylex();
2815 }
2816 }
2817 else {
2818 *s = '\0';
2819 PL_bufend = s;
2820 }
2821 goto retry;
2822 case '-':
2823 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
2824 I32 ftst = 0;
2825
2826 s++;
2827 PL_bufptr = s;
2828 tmp = *s++;
2829
2830 while (s < PL_bufend && SPACE_OR_TAB(*s))
2831 s++;
2832
2833 if (strnEQ(s,"=>",2)) {
2834 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
2835 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2836 "### Saw unary minus before =>, forcing word '%s'\n", s);
2837 } );
2838 OPERATOR('-'); /* unary minus */
2839 }
2840 PL_last_uni = PL_oldbufptr;
2841 switch (tmp) {
2842 case 'r': ftst = OP_FTEREAD; break;
2843 case 'w': ftst = OP_FTEWRITE; break;
2844 case 'x': ftst = OP_FTEEXEC; break;
2845 case 'o': ftst = OP_FTEOWNED; break;
2846 case 'R': ftst = OP_FTRREAD; break;
2847 case 'W': ftst = OP_FTRWRITE; break;
2848 case 'X': ftst = OP_FTREXEC; break;
2849 case 'O': ftst = OP_FTROWNED; break;
2850 case 'e': ftst = OP_FTIS; break;
2851 case 'z': ftst = OP_FTZERO; break;
2852 case 's': ftst = OP_FTSIZE; break;
2853 case 'f': ftst = OP_FTFILE; break;
2854 case 'd': ftst = OP_FTDIR; break;
2855 case 'l': ftst = OP_FTLINK; break;
2856 case 'p': ftst = OP_FTPIPE; break;
2857 case 'S': ftst = OP_FTSOCK; break;
2858 case 'u': ftst = OP_FTSUID; break;
2859 case 'g': ftst = OP_FTSGID; break;
2860 case 'k': ftst = OP_FTSVTX; break;
2861 case 'b': ftst = OP_FTBLK; break;
2862 case 'c': ftst = OP_FTCHR; break;
2863 case 't': ftst = OP_FTTTY; break;
2864 case 'T': ftst = OP_FTTEXT; break;
2865 case 'B': ftst = OP_FTBINARY; break;
2866 case 'M': case 'A': case 'C':
2867 gv_fetchpv("\024",TRUE, SVt_PV);
2868 switch (tmp) {
2869 case 'M': ftst = OP_FTMTIME; break;
2870 case 'A': ftst = OP_FTATIME; break;
2871 case 'C': ftst = OP_FTCTIME; break;
2872 default: break;
2873 }
2874 break;
2875 default:
2876 break;
2877 }
2878 if (ftst) {
2879 PL_last_lop_op = (OPCODE)ftst;
2880 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2881 "### Saw file test %c\n", (int)ftst);
2882 } );
2883 FTST(ftst);
2884 }
2885 else {
2886 /* Assume it was a minus followed by a one-letter named
2887 * subroutine call (or a -bareword), then. */
2888 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2889 "### '-%c' looked like a file test but was not\n",
2890 tmp);
2891 } );
2892 s = --PL_bufptr;
2893 }
2894 }
2895 tmp = *s++;
2896 if (*s == tmp) {
2897 s++;
2898 if (PL_expect == XOPERATOR)
2899 TERM(POSTDEC);
2900 else
2901 OPERATOR(PREDEC);
2902 }
2903 else if (*s == '>') {
2904 s++;
2905 s = skipspace(s);
2906 if (isIDFIRST_lazy_if(s,UTF)) {
2907 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
2908 TOKEN(ARROW);
2909 }
2910 else if (*s == '$')
2911 OPERATOR(ARROW);
2912 else
2913 TERM(ARROW);
2914 }
2915 if (PL_expect == XOPERATOR)
2916 Aop(OP_SUBTRACT);
2917 else {
2918 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2919 check_uni();
2920 OPERATOR('-'); /* unary minus */
2921 }
2922
2923 case '+':
2924 tmp = *s++;
2925 if (*s == tmp) {
2926 s++;
2927 if (PL_expect == XOPERATOR)
2928 TERM(POSTINC);
2929 else
2930 OPERATOR(PREINC);
2931 }
2932 if (PL_expect == XOPERATOR)
2933 Aop(OP_ADD);
2934 else {
2935 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2936 check_uni();
2937 OPERATOR('+');
2938 }
2939
2940 case '*':
2941 if (PL_expect != XOPERATOR) {
2942 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2943 PL_expect = XOPERATOR;
2944 force_ident(PL_tokenbuf, '*');
2945 if (!*PL_tokenbuf)
2946 PREREF('*');
2947 TERM('*');
2948 }
2949 s++;
2950 if (*s == '*') {
2951 s++;
2952 PWop(OP_POW);
2953 }
2954 Mop(OP_MULTIPLY);
2955
2956 case '%':
2957 if (PL_expect == XOPERATOR) {
2958 ++s;
2959 Mop(OP_MODULO);
2960 }
2961 PL_tokenbuf[0] = '%';
2962 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2963 if (!PL_tokenbuf[1]) {
2964 PREREF('%');
2965 }
2966 PL_pending_ident = '%';
2967 TERM('%');
2968
2969 case '^':
2970 s++;
2971 BOop(OP_BIT_XOR);
2972 case '[':
2973 PL_lex_brackets++;
2974 /* FALL THROUGH */
2975 case '~':
2976 case ',':
2977 tmp = *s++;
2978 OPERATOR(tmp);
2979 case ':':
2980 if (s[1] == ':') {
2981 len = 0;
2982 goto just_a_word;
2983 }
2984 s++;
2985 switch (PL_expect) {
2986 OP *attrs;
2987 case XOPERATOR:
2988 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2989 break;
2990 PL_bufptr = s; /* update in case we back off */
2991 goto grabattrs;
2992 case XATTRBLOCK:
2993 PL_expect = XBLOCK;
2994 goto grabattrs;
2995 case XATTRTERM:
2996 PL_expect = XTERMBLOCK;
2997 grabattrs:
2998 s = skipspace(s);
2999 attrs = Nullop;
3000 while (isIDFIRST_lazy_if(s,UTF)) {
3001 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3002 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3003 if (tmp < 0) tmp = -tmp;
3004 switch (tmp) {
3005 case KEY_or:
3006 case KEY_and:
3007 case KEY_for:
3008 case KEY_unless:
3009 case KEY_if:
3010 case KEY_while:
3011 case KEY_until:
3012 goto got_attrs;
3013 default:
3014 break;
3015 }
3016 }
3017 if (*d == '(') {
3018 d = scan_str(d,TRUE,TRUE);
3019 if (!d) {
3020 /* MUST advance bufptr here to avoid bogus
3021 "at end of line" context messages from yyerror().
3022 */
3023 PL_bufptr = s + len;
3024 yyerror("Unterminated attribute parameter in attribute list");
3025 if (attrs)
3026 op_free(attrs);
3027 return 0; /* EOF indicator */
3028 }
3029 }
3030 if (PL_lex_stuff) {
3031 SV *sv = newSVpvn(s, len);
3032 sv_catsv(sv, PL_lex_stuff);
3033 attrs = append_elem(OP_LIST, attrs,
3034 newSVOP(OP_CONST, 0, sv));
3035 SvREFCNT_dec(PL_lex_stuff);
3036 PL_lex_stuff = Nullsv;
3037 }
3038 else {
3039 if (len == 6 && strnEQ(s, "unique", len)) {
3040 if (PL_in_my == KEY_our)
3041 #ifdef USE_ITHREADS
3042 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3043 #else
3044 ; /* skip to avoid loading attributes.pm */
3045 #endif
3046 else
3047 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3048 }
3049
3050 /* NOTE: any CV attrs applied here need to be part of
3051 the CVf_BUILTIN_ATTRS define in cv.h! */
3052 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3053 CvLVALUE_on(PL_compcv);
3054 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3055 CvLOCKED_on(PL_compcv);
3056 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3057 CvMETHOD_on(PL_compcv);
3058 /* After we've set the flags, it could be argued that
3059 we don't need to do the attributes.pm-based setting
3060 process, and shouldn't bother appending recognized
3061 flags. To experiment with that, uncomment the
3062 following "else". (Note that's already been
3063 uncommented. That keeps the above-applied built-in
3064 attributes from being intercepted (and possibly
3065 rejected) by a package's attribute routines, but is
3066 justified by the performance win for the common case
3067 of applying only built-in attributes.) */
3068 else
3069 attrs = append_elem(OP_LIST, attrs,
3070 newSVOP(OP_CONST, 0,
3071 newSVpvn(s, len)));
3072 }
3073 s = skipspace(d);
3074 if (*s == ':' && s[1] != ':')
3075 s = skipspace(s+1);
3076 else if (s == d)
3077 break; /* require real whitespace or :'s */
3078 }
3079 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3080 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
3081 char q = ((*s == '\'') ? '"' : '\'');
3082 /* If here for an expression, and parsed no attrs, back off. */
3083 if (tmp == '=' && !attrs) {
3084 s = PL_bufptr;
3085 break;
3086 }
3087 /* MUST advance bufptr here to avoid bogus "at end of line"
3088 context messages from yyerror().
3089 */
3090 PL_bufptr = s;
3091 if (!*s)
3092 yyerror("Unterminated attribute list");
3093 else
3094 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3095 q, *s, q));
3096 if (attrs)
3097 op_free(attrs);
3098 OPERATOR(':');
3099 }
3100 got_attrs:
3101 if (attrs) {
3102 PL_nextval[PL_nexttoke].opval = attrs;
3103 force_next(THING);
3104 }
3105 TOKEN(COLONATTR);
3106 }
3107 OPERATOR(':');
3108 case '(':
3109 s++;
3110 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3111 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3112 else
3113 PL_expect = XTERM;
3114 s = skipspace(s);
3115 TOKEN('(');
3116 case ';':
3117 CLINE;
3118 tmp = *s++;
3119 OPERATOR(tmp);
3120 case ')':
3121 tmp = *s++;
3122 s = skipspace(s);
3123 if (*s == '{')
3124 PREBLOCK(tmp);
3125 TERM(tmp);
3126 case ']':
3127 s++;
3128 if (PL_lex_brackets <= 0)
3129 yyerror("Unmatched right square bracket");
3130 else
3131 --PL_lex_brackets;
3132 if (PL_lex_state == LEX_INTERPNORMAL) {
3133 if (PL_lex_brackets == 0) {
3134 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3135 PL_lex_state = LEX_INTERPEND;
3136 }
3137 }
3138 TERM(']');
3139 case '{':
3140 leftbracket:
3141 s++;
3142 if (PL_lex_brackets > 100) {
3143 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3144 }
3145 switch (PL_expect) {
3146 case XTERM:
3147 if (PL_lex_formbrack) {
3148 s--;
3149 PRETERMBLOCK(DO);
3150 }
3151 if (PL_oldoldbufptr == PL_last_lop)
3152 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3153 else
3154 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3155 OPERATOR(HASHBRACK);
3156 case XOPERATOR:
3157 while (s < PL_bufend && SPACE_OR_TAB(*s))
3158 s++;
3159 d = s;
3160 PL_tokenbuf[0] = '\0';
3161 if (d < PL_bufend && *d == '-') {
3162 PL_tokenbuf[0] = '-';
3163 d++;
3164 while (d < PL_bufend && SPACE_OR_TAB(*d))
3165 d++;
3166 }
3167 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3168 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3169 FALSE, &len);
3170 while (d < PL_bufend && SPACE_OR_TAB(*d))
3171 d++;
3172 if (*d == '}') {
3173 char minus = (PL_tokenbuf[0] == '-');
3174 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3175 if (minus)
3176 force_next('-');
3177 }
3178 }
3179 /* FALL THROUGH */
3180 case XATTRBLOCK:
3181 case XBLOCK:
3182 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3183 PL_expect = XSTATE;
3184 break;
3185 case XATTRTERM:
3186 case XTERMBLOCK:
3187 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3188 PL_expect = XSTATE;
3189 break;
3190 default: {
3191 char *t;
3192 if (PL_oldoldbufptr == PL_last_lop)
3193 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3194 else
3195 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3196 s = skipspace(s);
3197 if (*s == '}') {
3198 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3199 PL_expect = XTERM;
3200 /* This hack is to get the ${} in the message. */
3201 PL_bufptr = s+1;
3202 yyerror("syntax error");
3203 break;
3204 }
3205 OPERATOR(HASHBRACK);
3206 }
3207 /* This hack serves to disambiguate a pair of curlies
3208 * as being a block or an anon hash. Normally, expectation
3209 * determines that, but in cases where we're not in a
3210 * position to expect anything in particular (like inside
3211 * eval"") we have to resolve the ambiguity. This code
3212 * covers the case where the first term in the curlies is a
3213 * quoted string. Most other cases need to be explicitly
3214 * disambiguated by prepending a `+' before the opening
3215 * curly in order to force resolution as an anon hash.
3216 *
3217 * XXX should probably propagate the outer expectation
3218 * into eval"" to rely less on this hack, but that could
3219 * potentially break current behavior of eval"".
3220 * GSAR 97-07-21
3221 */
3222 t = s;
3223 if (*s == '\'' || *s == '"' || *s == '`') {
3224 /* common case: get past first string, handling escapes */
3225 for (t++; t < PL_bufend && *t != *s;)
3226 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3227 t++;
3228 t++;
3229 }
3230 else if (*s == 'q') {
3231 if (++t < PL_bufend
3232 && (!isALNUM(*t)
3233 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3234 && !isALNUM(*t))))
3235 {
3236 /* skip q//-like construct */
3237 char *tmps;
3238 char open, close, term;
3239 I32 brackets = 1;
3240
3241 while (t < PL_bufend && isSPACE(*t))
3242 t++;
3243 /* check for q => */
3244 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3245 OPERATOR(HASHBRACK);
3246 }
3247 term = *t;
3248 open = term;
3249 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3250 term = tmps[5];
3251 close = term;
3252 if (open == close)
3253 for (t++; t < PL_bufend; t++) {
3254 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3255 t++;
3256 else if (*t == open)
3257 break;
3258 }
3259 else {
3260 for (t++; t < PL_bufend; t++) {
3261 if (*t == '\\' && t+1 < PL_bufend)
3262 t++;
3263 else if (*t == close && --brackets <= 0)
3264 break;
3265 else if (*t == open)
3266 brackets++;
3267 }
3268 }
3269 t++;
3270 }
3271 else
3272 /* skip plain q word */
3273 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3274 t += UTF8SKIP(t);
3275 }
3276 else if (isALNUM_lazy_if(t,UTF)) {
3277 t += UTF8SKIP(t);
3278 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3279 t += UTF8SKIP(t);
3280 }
3281 while (t < PL_bufend && isSPACE(*t))
3282 t++;
3283 /* if comma follows first term, call it an anon hash */
3284 /* XXX it could be a comma expression with loop modifiers */
3285 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3286 || (*t == '=' && t[1] == '>')))
3287 OPERATOR(HASHBRACK);
3288 if (PL_expect == XREF)
3289 PL_expect = XTERM;
3290 else {
3291 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3292 PL_expect = XSTATE;
3293 }
3294 }
3295 break;
3296 }
3297 yylval.ival = CopLINE(PL_curcop);
3298 if (isSPACE(*s) || *s == '#')
3299 PL_copline = NOLINE; /* invalidate current command line number */
3300 TOKEN('{');
3301 case '}':
3302 rightbracket:
3303 s++;
3304 if (PL_lex_brackets <= 0)
3305 yyerror("Unmatched right curly bracket");
3306 else
3307 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3308 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3309 PL_lex_formbrack = 0;
3310 if (PL_lex_state == LEX_INTERPNORMAL) {
3311 if (PL_lex_brackets == 0) {
3312 if (PL_expect & XFAKEBRACK) {
3313 PL_expect &= XENUMMASK;
3314 PL_lex_state = LEX_INTERPEND;
3315 PL_bufptr = s;
3316 return yylex(); /* ignore fake brackets */
3317 }
3318 if (*s == '-' && s[1] == '>')
3319 PL_lex_state = LEX_INTERPENDMAYBE;
3320 else if (*s != '[' && *s != '{')
3321 PL_lex_state = LEX_INTERPEND;
3322 }
3323 }
3324 if (PL_expect & XFAKEBRACK) {
3325 PL_expect &= XENUMMASK;
3326 PL_bufptr = s;
3327 return yylex(); /* ignore fake brackets */
3328 }
3329 force_next('}');
3330 TOKEN(';');
3331 case '&':
3332 s++;
3333 tmp = *s++;
3334 if (tmp == '&')
3335 AOPERATOR(ANDAND);
3336 s--;
3337 if (PL_expect == XOPERATOR) {
3338 if (ckWARN(WARN_SEMICOLON)
3339 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3340 {
3341 CopLINE_dec(PL_curcop);
3342 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3343 CopLINE_inc(PL_curcop);
3344 }
3345 BAop(OP_BIT_AND);
3346 }
3347
3348 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3349 if (*PL_tokenbuf) {
3350 PL_expect = XOPERATOR;
3351 force_ident(PL_tokenbuf, '&');
3352 }
3353 else
3354 PREREF('&');
3355 yylval.ival = (OPpENTERSUB_AMPER<<8);
3356 TERM('&');
3357
3358 case '|':
3359 s++;
3360 tmp = *s++;
3361 if (tmp == '|')
3362 AOPERATOR(OROR);
3363 s--;
3364 BOop(OP_BIT_OR);
3365 case '=':
3366 s++;
3367 tmp = *s++;
3368 if (tmp == '=')
3369 Eop(OP_EQ);
3370 if (tmp == '>')
3371 OPERATOR(',');
3372 if (tmp == '~')
3373 PMop(OP_MATCH);
3374 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
3375 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
3376 s--;
3377 if (PL_expect == XSTATE && isALPHA(tmp) &&
3378 (s == PL_linestart+1 || s[-2] == '\n') )
3379 {
3380 if (PL_in_eval && !PL_rsfp) {
3381 d = PL_bufend;
3382 while (s < d) {
3383 if (*s++ == '\n') {
3384 incline(s);
3385 if (strnEQ(s,"=cut",4)) {
3386 s = strchr(s,'\n');
3387 if (s)
3388 s++;
3389 else
3390 s = d;
3391 incline(s);
3392 goto retry;
3393 }
3394 }
3395 }
3396 goto retry;
3397 }
3398 s = PL_bufend;
3399 PL_doextract = TRUE;
3400 goto retry;
3401 }
3402 if (PL_lex_brackets < PL_lex_formbrack) {
3403 char *t;
3404 #ifdef PERL_STRICT_CR
3405 for (t = s; SPACE_OR_TAB(*t); t++) ;
3406 #else
3407 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3408 #endif
3409 if (*t == '\n' || *t == '#') {
3410 s--;
3411 PL_expect = XBLOCK;
3412 goto leftbracket;
3413 }
3414 }
3415 yylval.ival = 0;
3416 OPERATOR(ASSIGNOP);
3417 case '!':
3418 s++;
3419 tmp = *s++;
3420 if (tmp == '=')
3421 Eop(OP_NE);
3422 if (tmp == '~')
3423 PMop(OP_NOT);
3424 s--;
3425 OPERATOR('!');
3426 case '<':
3427 if (PL_expect != XOPERATOR) {
3428 if (s[1] != '<' && !strchr(s,'>'))
3429 check_uni();
3430 if (s[1] == '<')
3431 s = scan_heredoc(s);
3432 else
3433 s = scan_inputsymbol(s);
3434 TERM(sublex_start());
3435 }
3436 s++;
3437 tmp = *s++;
3438 if (tmp == '<')
3439 SHop(OP_LEFT_SHIFT);
3440 if (tmp == '=') {
3441 tmp = *s++;
3442 if (tmp == '>')
3443 Eop(OP_NCMP);
3444 s--;
3445 Rop(OP_LE);
3446 }
3447 s--;
3448 Rop(OP_LT);
3449 case '>':
3450 s++;
3451 tmp = *s++;
3452 if (tmp == '>')
3453 SHop(OP_RIGHT_SHIFT);
3454 if (tmp == '=')
3455 Rop(OP_GE);
3456 s--;
3457 Rop(OP_GT);
3458
3459 case '$':
3460 CLINE;
3461
3462 if (PL_expect == XOPERATOR) {
3463 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3464 PL_expect = XTERM;
3465 depcom();
3466 return ','; /* grandfather non-comma-format format */
3467 }
3468 }
3469
3470 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3471 PL_tokenbuf[0] = '@';
3472 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3473 sizeof PL_tokenbuf - 1, FALSE);
3474 if (PL_expect == XOPERATOR)
3475 no_op("Array length", s);
3476 if (!PL_tokenbuf[1])
3477 PREREF(DOLSHARP);
3478 PL_expect = XOPERATOR;
3479 PL_pending_ident = '#';
3480 TOKEN(DOLSHARP);
3481 }
3482
3483 PL_tokenbuf[0] = '$';
3484 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3485 sizeof PL_tokenbuf - 1, FALSE);
3486 if (PL_expect == XOPERATOR)
3487 no_op("Scalar", s);
3488 if (!PL_tokenbuf[1]) {
3489 if (s == PL_bufend)
3490 yyerror("Final $ should be \\$ or $name");
3491 PREREF('$');
3492 }
3493
3494 /* This kludge not intended to be bulletproof. */
3495 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3496 yylval.opval = newSVOP(OP_CONST, 0,
3497 newSViv(PL_compiling.cop_arybase));
3498 yylval.opval->op_private = OPpCONST_ARYBASE;
3499 TERM(THING);
3500 }
3501
3502 d = s;
3503 tmp = (I32)*s;
3504 if (PL_lex_state == LEX_NORMAL)
3505 s = skipspace(s);
3506
3507 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3508 char *t;
3509 if (*s == '[') {
3510 PL_tokenbuf[0] = '@';
3511 if (ckWARN(WARN_SYNTAX)) {
3512 for(t = s + 1;
3513 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3514 t++) ;
3515 if (*t++ == ',') {
3516 PL_bufptr = skipspace(PL_bufptr);
3517 while (t < PL_bufend && *t != ']')
3518 t++;
3519 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3520 "Multidimensional syntax %.*s not supported",
3521 (t - PL_bufptr) + 1, PL_bufptr);
3522 }
3523 }
3524 }
3525 else if (*s == '{') {
3526 PL_tokenbuf[0] = '%';
3527 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
3528 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3529 {
3530 char tmpbuf[sizeof PL_tokenbuf];
3531 STRLEN len;
3532 for (t++; isSPACE(*t); t++) ;
3533 if (isIDFIRST_lazy_if(t,UTF)) {
3534 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
3535 for (; isSPACE(*t); t++) ;
3536 if (*t == ';' && get_cv(tmpbuf, FALSE))
3537 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3538 "You need to quote \"%s\"", tmpbuf);
3539 }
3540 }
3541 }
3542 }
3543
3544 PL_expect = XOPERATOR;
3545 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3546 bool islop = (PL_last_lop == PL_oldoldbufptr);
3547 if (!islop || PL_last_lop_op == OP_GREPSTART)
3548 PL_expect = XOPERATOR;
3549 else if (strchr("$@\"'`q", *s))
3550 PL_expect = XTERM; /* e.g. print $fh "foo" */
3551 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3552 PL_expect = XTERM; /* e.g. print $fh &sub */
3553 else if (isIDFIRST_lazy_if(s,UTF)) {
3554 char tmpbuf[sizeof PL_tokenbuf];
3555 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3556 if ((tmp = keyword(tmpbuf, len))) {
3557 /* binary operators exclude handle interpretations */
3558 switch (tmp) {
3559 case -KEY_x:
3560 case -KEY_eq:
3561 case -KEY_ne:
3562 case -KEY_gt:
3563 case -KEY_lt:
3564 case -KEY_ge:
3565 case -KEY_le:
3566 case -KEY_cmp:
3567 break;
3568 default:
3569 PL_expect = XTERM; /* e.g. print $fh length() */
3570 break;
3571 }
3572 }
3573 else {
3574 PL_expect = XTERM; /* e.g. print $fh subr() */
3575 }
3576 }
3577 else if (isDIGIT(*s))
3578 PL_expect = XTERM; /* e.g. print $fh 3 */
3579 else if (*s == '.' && isDIGIT(s[1]))
3580 PL_expect = XTERM; /* e.g. print $fh .3 */
3581 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3582 PL_expect = XTERM; /* e.g. print $fh -1 */
3583 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3584 PL_expect = XTERM; /* print $fh <<"EOF" */
3585 }
3586 PL_pending_ident = '$';
3587 TOKEN('$');
3588
3589 case '@':
3590 if (PL_expect == XOPERATOR)
3591 no_op("Array", s);
3592 PL_tokenbuf[0] = '@';
3593 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3594 if (!PL_tokenbuf[1]) {
3595 PREREF('@');
3596 }
3597 if (PL_lex_state == LEX_NORMAL)
3598 s = skipspace(s);
3599 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3600 if (*s == '{')
3601 PL_tokenbuf[0] = '%';
3602
3603 /* Warn about @ where they meant $. */
3604 if (ckWARN(WARN_SYNTAX)) {
3605 if (*s == '[' || *s == '{') {
3606 char *t = s + 1;
3607 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3608 t++;
3609 if (*t == '}' || *t == ']') {
3610 t++;
3611 PL_bufptr = skipspace(PL_bufptr);
3612 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3613 "Scalar value %.*s better written as $%.*s",
3614 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
3615 }
3616 }
3617 }
3618 }
3619 PL_pending_ident = '@';
3620 TERM('@');
3621
3622 case '/': /* may either be division or pattern */
3623 case '?': /* may either be conditional or pattern */
3624 if (PL_expect != XOPERATOR) {
3625 /* Disable warning on "study /blah/" */
3626 if (PL_oldoldbufptr == PL_last_uni
3627 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3628 || memNE(PL_last_uni, "study", 5)
3629 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
3630 check_uni();
3631 s = scan_pat(s,OP_MATCH);
3632 TERM(sublex_start());
3633 }
3634 tmp = *s++;
3635 if (tmp == '/')
3636 Mop(OP_DIVIDE);
3637 OPERATOR(tmp);
3638
3639 case '.':
3640 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3641 #ifdef PERL_STRICT_CR
3642 && s[1] == '\n'
3643 #else
3644 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3645 #endif
3646 && (s == PL_linestart || s[-1] == '\n') )
3647 {
3648 PL_lex_formbrack = 0;
3649 PL_expect = XSTATE;
3650 goto rightbracket;
3651 }
3652 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
3653 tmp = *s++;
3654 if (*s == tmp) {
3655 s++;
3656 if (*s == tmp) {
3657 s++;
3658 yylval.ival = OPf_SPECIAL;
3659 }
3660 else
3661 yylval.ival = 0;
3662 OPERATOR(DOTDOT);
3663 }
3664 if (PL_expect != XOPERATOR)
3665 check_uni();
3666 Aop(OP_CONCAT);
3667 }
3668 /* FALL THROUGH */
3669 case '0': case '1': case '2': case '3': case '4':
3670 case '5': case '6': case '7': case '8': case '9':
3671 s = scan_num(s, &yylval);
3672 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3673 "### Saw number before '%s'\n", s);
3674 } );
3675 if (PL_expect == XOPERATOR)
3676 no_op("Number",s);
3677 TERM(THING);
3678
3679 case '\'':
3680 s = scan_str(s,FALSE,FALSE);
3681 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3682 "### Saw string before '%s'\n", s);
3683 } );
3684 if (PL_expect == XOPERATOR) {
3685 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3686 PL_expect = XTERM;
3687 depcom();
3688 return ','; /* grandfather non-comma-format format */
3689 }
3690 else
3691 no_op("String",s);
3692 }
3693 if (!s)
3694 missingterm((char*)0);
3695 yylval.ival = OP_CONST;
3696 TERM(sublex_start());
3697
3698 case '"':
3699 s = scan_str(s,FALSE,FALSE);
3700 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3701 "### Saw string before '%s'\n", s);
3702 } );
3703 if (PL_expect == XOPERATOR) {
3704 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3705 PL_expect = XTERM;
3706 depcom();
3707 return ','; /* grandfather non-comma-format format */
3708 }
3709 else
3710 no_op("String",s);
3711 }
3712 if (!s)
3713 missingterm((char*)0);
3714 yylval.ival = OP_CONST;
3715 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
3716 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
3717 yylval.ival = OP_STRINGIFY;
3718 break;
3719 }
3720 }
3721 TERM(sublex_start());
3722
3723 case '`':
3724 s = scan_str(s,FALSE,FALSE);
3725 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3726 "### Saw backtick string before '%s'\n", s);
3727 } );
3728 if (PL_expect == XOPERATOR)
3729 no_op("Backticks",s);
3730 if (!s)
3731 missingterm((char*)0);
3732 yylval.ival = OP_BACKTICK;
3733 set_csh();
3734 TERM(sublex_start());
3735
3736 case '\\':
3737 s++;
3738 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
3739 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
3740 *s, *s);
3741 if (PL_expect == XOPERATOR)
3742 no_op("Backslash",s);
3743 OPERATOR(REFGEN);
3744
3745 case 'v':
3746 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
3747 char *start = s;
3748 start++;
3749 start++;
3750 while (isDIGIT(*start) || *start == '_')
3751 start++;
3752 if (*start == '.' && isDIGIT(start[1])) {
3753 s = scan_num(s, &yylval);
3754 TERM(THING);
3755 }
3756 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
3757 else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF || PL_expect == XSTATE)) {
3758 char c = *start;
3759 GV *gv;
3760 *start = '\0';
3761 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3762 *start = c;
3763 if (!gv) {
3764 s = scan_num(s, &yylval);
3765 TERM(THING);
3766 }
3767 }
3768 }
3769 goto keylookup;
3770 case 'x':
3771 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
3772 s++;
3773 Mop(OP_REPEAT);
3774 }
3775 goto keylookup;
3776
3777 case '_':
3778 case 'a': case 'A':
3779 case 'b': case 'B':
3780 case 'c': case 'C':
3781 case 'd': case 'D':
3782 case 'e': case 'E':
3783 case 'f': case 'F':
3784 case 'g': case 'G':
3785 case 'h': case 'H':
3786 case 'i': case 'I':
3787 case 'j': case 'J':
3788 case 'k': case 'K':
3789 case 'l': case 'L':
3790 case 'm': case 'M':
3791 case 'n': case 'N':
3792 case 'o': case 'O':
3793 case 'p': case 'P':
3794 case 'q': case 'Q':
3795 case 'r': case 'R':
3796 case 's': case 'S':
3797 case 't': case 'T':
3798 case 'u': case 'U':
3799 case 'V':
3800 case 'w': case 'W':
3801 case 'X':
3802 case 'y': case 'Y':
3803 case 'z': case 'Z':
3804
3805 keylookup: {
3806 orig_keyword = 0;
3807 gv = Nullgv;
3808 gvp = 0;
3809
3810 PL_bufptr = s;
3811 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3812
3813 /* Some keywords can be followed by any delimiter, including ':' */
3814 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
3815 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3816 (PL_tokenbuf[0] == 'q' &&
3817 strchr("qwxr", PL_tokenbuf[1])))));
3818
3819 /* x::* is just a word, unless x is "CORE" */
3820 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
3821 goto just_a_word;
3822
3823 d = s;
3824 while (d < PL_bufend && isSPACE(*d))
3825 d++; /* no comments skipped here, or s### is misparsed */
3826
3827 /* Is this a label? */
3828 if (!tmp && PL_expect == XSTATE
3829 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
3830 s = d + 1;
3831 yylval.pval = savepv(PL_tokenbuf);
3832 CLINE;
3833 TOKEN(LABEL);
3834 }
3835
3836 /* Check for keywords */
3837 tmp = keyword(PL_tokenbuf, len);
3838
3839 /* Is this a word before a => operator? */
3840 if (*d == '=' && d[1] == '>') {
3841 CLINE;
3842 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
3843 yylval.opval->op_private = OPpCONST_BARE;
3844 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
3845 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
3846 TERM(WORD);
3847 }
3848
3849 if (tmp < 0) { /* second-class keyword? */
3850 GV *ogv = Nullgv; /* override (winner) */
3851 GV *hgv = Nullgv; /* hidden (loser) */
3852 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
3853 CV *cv;
3854 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
3855 (cv = GvCVu(gv)))
3856 {
3857 if (GvIMPORTED_CV(gv))
3858 ogv = gv;
3859 else if (! CvMETHOD(cv))
3860 hgv = gv;
3861 }
3862 if (!ogv &&
3863 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3864 (gv = *gvp) != (GV*)&PL_sv_undef &&
3865 GvCVu(gv) && GvIMPORTED_CV(gv))
3866 {
3867 ogv = gv;
3868 }
3869 }
3870 if (ogv) {
3871 orig_keyword = tmp;
3872 tmp = 0; /* overridden by import or by GLOBAL */
3873 }
3874 else if (gv && !gvp
3875 && -tmp==KEY_lock /* XXX generalizable kludge */
3876 && GvCVu(gv)
3877 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
3878 {
3879 tmp = 0; /* any sub overrides "weak" keyword */
3880 }
3881 else { /* no override */
3882 tmp = -tmp;
3883 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
3884 Perl_warner(aTHX_ packWARN(WARN_MISC),
3885 "dump() better written as CORE::dump()");
3886 }
3887 gv = Nullgv;
3888 gvp = 0;
3889 if (ckWARN(WARN_AMBIGUOUS) && hgv
3890 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
3891 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3892 "Ambiguous call resolved as CORE::%s(), %s",
3893 GvENAME(hgv), "qualify as such or use &");
3894 }
3895 }
3896
3897 reserved_word:
3898 switch (tmp) {
3899
3900 default: /* not a keyword */
3901 just_a_word: {
3902 SV *sv;
3903 int pkgname = 0;
3904 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
3905
3906 /* Get the rest if it looks like a package qualifier */
3907
3908 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
3909 STRLEN morelen;
3910 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
3911 TRUE, &morelen);
3912 if (!morelen)
3913 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
3914 *s == '\'' ? "'" : "::");
3915 len += morelen;
3916 pkgname = 1;
3917 }
3918
3919 if (PL_expect == XOPERATOR) {
3920 if (PL_bufptr == PL_linestart) {
3921 CopLINE_dec(PL_curcop);
3922 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3923 CopLINE_inc(PL_curcop);
3924 }
3925 else
3926 no_op("Bareword",s);
3927 }
3928
3929 /* Look for a subroutine with this name in current package,
3930 unless name is "Foo::", in which case Foo is a bearword
3931 (and a package name). */
3932
3933 if (len > 2 &&
3934 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
3935 {
3936 if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
3937 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
3938 "Bareword \"%s\" refers to nonexistent package",
3939 PL_tokenbuf);
3940 len -= 2;
3941 PL_tokenbuf[len] = '\0';
3942 gv = Nullgv;
3943 gvp = 0;
3944 }
3945 else {
3946 len = 0;
3947 if (!gv)
3948 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
3949 }
3950
3951 /* if we saw a global override before, get the right name */
3952
3953 if (gvp) {
3954 sv = newSVpvn("CORE::GLOBAL::",14);
3955 sv_catpv(sv,PL_tokenbuf);
3956 }
3957 else
3958 sv = newSVpv(PL_tokenbuf,0);
3959
3960 /* Presume this is going to be a bareword of some sort. */
3961
3962 CLINE;
3963 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3964 yylval.opval->op_private = OPpCONST_BARE;
3965 /* UTF-8 package name? */
3966 if (UTF && !IN_BYTES &&
3967 is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
3968 SvUTF8_on(sv);
3969
3970 /* And if "Foo::", then that's what it certainly is. */
3971
3972 if (len)
3973 goto safe_bareword;
3974
3975 /* See if it's the indirect object for a list operator. */
3976
3977 if (PL_oldoldbufptr &&
3978 PL_oldoldbufptr < PL_bufptr &&
3979 (PL_oldoldbufptr == PL_last_lop
3980 || PL_oldoldbufptr == PL_last_uni) &&
3981 /* NO SKIPSPACE BEFORE HERE! */
3982 (PL_expect == XREF ||
3983 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
3984 {
3985 bool immediate_paren = *s == '(';
3986
3987 /* (Now we can afford to cross potential line boundary.) */
3988 s = skipspace(s);
3989
3990 /* Two barewords in a row may indicate method call. */
3991
3992 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
3993 return tmp;
3994
3995 /* If not a declared subroutine, it's an indirect object. */
3996 /* (But it's an indir obj regardless for sort.) */
3997
3998 if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
3999 ((!gv || !GvCVu(gv)) &&
4000 (PL_last_lop_op != OP_MAPSTART &&
4001 PL_last_lop_op != OP_GREPSTART))))
4002 {
4003 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4004 goto bareword;
4005 }
4006 }
4007
4008 PL_expect = XOPERATOR;
4009 s = skipspace(s);
4010
4011 /* Is this a word before a => operator? */
4012 if (*s == '=' && s[1] == '>' && !pkgname) {
4013 CLINE;
4014 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4015 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4016 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4017 TERM(WORD);
4018 }
4019
4020 /* If followed by a paren, it's certainly a subroutine. */
4021 if (*s == '(') {
4022 CLINE;
4023 if (gv && GvCVu(gv)) {
4024 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
4025 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
4026 s = d + 1;
4027 goto its_constant;
4028 }
4029 }
4030 PL_nextval[PL_nexttoke].opval = yylval.opval;
4031 PL_expect = XOPERATOR;
4032 force_next(WORD);
4033 yylval.ival = 0;
4034 TOKEN('&');
4035 }
4036
4037 /* If followed by var or block, call it a method (unless sub) */
4038
4039 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
4040 PL_last_lop = PL_oldbufptr;
4041 PL_last_lop_op = OP_METHOD;
4042 PREBLOCK(METHOD);
4043 }
4044
4045 /* If followed by a bareword, see if it looks like indir obj. */
4046
4047 if (!orig_keyword
4048 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4049 && (tmp = intuit_method(s,gv)))
4050 return tmp;
4051
4052 /* Not a method, so call it a subroutine (if defined) */
4053
4054 if (gv && GvCVu(gv)) {
4055 CV* cv;
4056 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4057 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4058 "Ambiguous use of -%s resolved as -&%s()",
4059 PL_tokenbuf, PL_tokenbuf);
4060 /* Check for a constant sub */
4061 cv = GvCV(gv);
4062 if ((sv = cv_const_sv(cv))) {
4063 its_constant:
4064 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4065 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4066 yylval.opval->op_private = 0;
4067 TOKEN(WORD);
4068 }
4069
4070 /* Resolve to GV now. */
4071 op_free(yylval.opval);
4072 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4073 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4074 PL_last_lop = PL_oldbufptr;
4075 PL_last_lop_op = OP_ENTERSUB;
4076 /* Is there a prototype? */
4077 if (SvPOK(cv)) {
4078 STRLEN len;
4079 char *proto = SvPV((SV*)cv, len);
4080 if (!len)
4081 TERM(FUNC0SUB);
4082 if (strEQ(proto, "$"))
4083 OPERATOR(UNIOPSUB);
4084 while (*proto == ';')
4085 proto++;
4086 if (*proto == '&' && *s == '{') {
4087 sv_setpv(PL_subname, PL_curstash ?
4088 "__ANON__" : "__ANON__::__ANON__");
4089 PREBLOCK(LSTOPSUB);
4090 }
4091 }
4092 PL_nextval[PL_nexttoke].opval = yylval.opval;
4093 PL_expect = XTERM;
4094 force_next(WORD);
4095 TOKEN(NOAMP);
4096 }
4097
4098 /* Call it a bare word */
4099
4100 if (PL_hints & HINT_STRICT_SUBS)
4101 yylval.opval->op_private |= OPpCONST_STRICT;
4102 else {
4103 bareword:
4104 if (ckWARN(WARN_RESERVED)) {
4105 if (lastchar != '-') {
4106 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4107 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4108 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4109 PL_tokenbuf);
4110 }
4111 }
4112 }
4113
4114 safe_bareword:
4115 if (lastchar && strchr("*%&", lastchar) && ckWARN_d(WARN_AMBIGUOUS)) {
4116 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4117 "Operator or semicolon missing before %c%s",
4118 lastchar, PL_tokenbuf);
4119 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4120 "Ambiguous use of %c resolved as operator %c",
4121 lastchar, lastchar);
4122 }
4123 TOKEN(WORD);
4124 }
4125
4126 case KEY___FILE__:
4127 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4128 newSVpv(CopFILE(PL_curcop),0));
4129 TERM(THING);
4130
4131 case KEY___LINE__:
4132 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4133 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4134 TERM(THING);
4135
4136 case KEY___PACKAGE__:
4137 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4138 (PL_curstash
4139 ? newSVsv(PL_curstname)
4140 : &PL_sv_undef));
4141 TERM(THING);
4142
4143 case KEY___DATA__:
4144 case KEY___END__: {
4145 GV *gv;
4146
4147 /*SUPPRESS 560*/
4148 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4149 char *pname = "main";
4150 if (PL_tokenbuf[2] == 'D')
4151 pname = HvNAME(PL_curstash ? PL_curstash : PL_defstash);
4152 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO);
4153 GvMULTI_on(gv);
4154 if (!GvIO(gv))
4155 GvIOp(gv) = newIO();
4156 IoIFP(GvIOp(gv)) = PL_rsfp;
4157 #if defined(HAS_FCNTL) && defined(F_SETFD)
4158 {
4159 int fd = PerlIO_fileno(PL_rsfp);
4160 fcntl(fd,F_SETFD,fd >= 3);
4161 }
4162 #endif
4163 /* Mark this internal pseudo-handle as clean */
4164 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4165 if (PL_preprocess)
4166 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4167 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4168 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4169 else
4170 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4171 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4172 /* if the script was opened in binmode, we need to revert
4173 * it to text mode for compatibility; but only iff it has CRs
4174 * XXX this is a questionable hack at best. */
4175 if (PL_bufend-PL_bufptr > 2
4176 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4177 {
4178 Off_t loc = 0;
4179 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4180 loc = PerlIO_tell(PL_rsfp);
4181 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4182 }
4183 #ifdef NETWARE
4184 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4185 #else
4186 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4187 #endif /* NETWARE */
4188 #ifdef PERLIO_IS_STDIO /* really? */
4189 # if defined(__BORLANDC__)
4190 /* XXX see note in do_binmode() */
4191 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4192 # endif
4193 #endif
4194 if (loc > 0)
4195 PerlIO_seek(PL_rsfp, loc, 0);
4196 }
4197 }
4198 #endif
4199 #ifdef PERLIO_LAYERS
4200 if (!IN_BYTES) {
4201 if (UTF)
4202 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4203 else if (PL_encoding) {
4204 SV *name;
4205 dSP;
4206 ENTER;
4207 SAVETMPS;
4208 PUSHMARK(sp);
4209 EXTEND(SP, 1);
4210 XPUSHs(PL_encoding);
4211 PUTBACK;
4212 call_method("name", G_SCALAR);
4213 SPAGAIN;
4214 name = POPs;
4215 PUTBACK;
4216 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4217 Perl_form(aTHX_ ":encoding(%"SVf")",
4218 name));
4219 FREETMPS;
4220 LEAVE;
4221 }
4222 }
4223 #endif
4224 PL_rsfp = Nullfp;
4225 }
4226 goto fake_eof;
4227 }
4228
4229 case KEY_AUTOLOAD:
4230 case KEY_DESTROY:
4231 case KEY_BEGIN:
4232 case KEY_CHECK:
4233 case KEY_INIT:
4234 case KEY_END:
4235 if (PL_expect == XSTATE) {
4236 s = PL_bufptr;
4237 goto really_sub;
4238 }
4239 goto just_a_word;
4240
4241 case KEY_CORE:
4242 if (*s == ':' && s[1] == ':') {
4243 s += 2;
4244 d = s;
4245 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4246 if (!(tmp = keyword(PL_tokenbuf, len)))
4247 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4248 if (tmp < 0)
4249 tmp = -tmp;
4250 goto reserved_word;
4251 }
4252 goto just_a_word;
4253
4254 case KEY_abs:
4255 UNI(OP_ABS);
4256
4257 case KEY_alarm:
4258 UNI(OP_ALARM);
4259
4260 case KEY_accept:
4261 LOP(OP_ACCEPT,XTERM);
4262
4263 case KEY_and:
4264 OPERATOR(ANDOP);
4265
4266 case KEY_atan2:
4267 LOP(OP_ATAN2,XTERM);
4268
4269 case KEY_bind:
4270 LOP(OP_BIND,XTERM);
4271
4272 case KEY_binmode:
4273 LOP(OP_BINMODE,XTERM);
4274
4275 case KEY_bless:
4276 LOP(OP_BLESS,XTERM);
4277
4278 case KEY_chop:
4279 UNI(OP_CHOP);
4280
4281 case KEY_continue:
4282 PREBLOCK(CONTINUE);
4283
4284 case KEY_chdir:
4285 (void)gv_fetchpv("ENV",TRUE, SVt_PVHV); /* may use HOME */
4286 UNI(OP_CHDIR);
4287
4288 case KEY_close:
4289 UNI(OP_CLOSE);
4290
4291 case KEY_closedir:
4292 UNI(OP_CLOSEDIR);
4293
4294 case KEY_cmp:
4295 Eop(OP_SCMP);
4296
4297 case KEY_caller:
4298 UNI(OP_CALLER);
4299
4300 case KEY_crypt:
4301 #ifdef FCRYPT
4302 if (!PL_cryptseen) {
4303 PL_cryptseen = TRUE;
4304 init_des();
4305 }
4306 #endif
4307 LOP(OP_CRYPT,XTERM);
4308
4309 case KEY_chmod:
4310 LOP(OP_CHMOD,XTERM);
4311
4312 case KEY_chown:
4313 LOP(OP_CHOWN,XTERM);
4314
4315 case KEY_connect:
4316 LOP(OP_CONNECT,XTERM);
4317
4318 case KEY_chr:
4319 UNI(OP_CHR);
4320
4321 case KEY_cos:
4322 UNI(OP_COS);
4323
4324 case KEY_chroot:
4325 UNI(OP_CHROOT);
4326
4327 case KEY_do:
4328 s = skipspace(s);
4329 if (*s == '{')
4330 PRETERMBLOCK(DO);
4331 if (*s != '\'')
4332 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4333 OPERATOR(DO);
4334
4335 case KEY_die:
4336 PL_hints |= HINT_BLOCK_SCOPE;
4337 LOP(OP_DIE,XTERM);
4338
4339 case KEY_defined:
4340 UNI(OP_DEFINED);
4341
4342 case KEY_delete:
4343 UNI(OP_DELETE);
4344
4345 case KEY_dbmopen:
4346 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4347 LOP(OP_DBMOPEN,XTERM);
4348
4349 case KEY_dbmclose:
4350 UNI(OP_DBMCLOSE);
4351
4352 case KEY_dump:
4353 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4354 LOOPX(OP_DUMP);
4355
4356 case KEY_else:
4357 PREBLOCK(ELSE);
4358
4359 case KEY_elsif:
4360 yylval.ival = CopLINE(PL_curcop);
4361 OPERATOR(ELSIF);
4362
4363 case KEY_eq:
4364 Eop(OP_SEQ);
4365
4366 case KEY_exists:
4367 UNI(OP_EXISTS);
4368
4369 case KEY_exit:
4370 UNI(OP_EXIT);
4371
4372 case KEY_eval:
4373 s = skipspace(s);
4374 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4375 UNIBRACK(OP_ENTEREVAL);
4376
4377 case KEY_eof:
4378 UNI(OP_EOF);
4379
4380 case KEY_exp:
4381 UNI(OP_EXP);
4382
4383 case KEY_each:
4384 UNI(OP_EACH);
4385
4386 case KEY_exec:
4387 set_csh();
4388 LOP(OP_EXEC,XREF);
4389
4390 case KEY_endhostent:
4391 FUN0(OP_EHOSTENT);
4392
4393 case KEY_endnetent:
4394 FUN0(OP_ENETENT);
4395
4396 case KEY_endservent:
4397 FUN0(OP_ESERVENT);
4398
4399 case KEY_endprotoent:
4400 FUN0(OP_EPROTOENT);
4401
4402 case KEY_endpwent:
4403 FUN0(OP_EPWENT);
4404
4405 case KEY_endgrent:
4406 FUN0(OP_EGRENT);
4407
4408 case KEY_for:
4409 case KEY_foreach:
4410 yylval.ival = CopLINE(PL_curcop);
4411 s = skipspace(s);
4412 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4413 char *p = s;
4414 if ((PL_bufend - p) >= 3 &&
4415 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4416 p += 2;
4417 else if ((PL_bufend - p) >= 4 &&
4418 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4419 p += 3;
4420 p = skipspace(p);
4421 if (isIDFIRST_lazy_if(p,UTF)) {
4422 p = scan_ident(p, PL_bufend,
4423 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4424 p = skipspace(p);
4425 }
4426 if (*p != '$')
4427 Perl_croak(aTHX_ "Missing $ on loop variable");
4428 }
4429 OPERATOR(FOR);
4430
4431 case KEY_formline:
4432 LOP(OP_FORMLINE,XTERM);
4433
4434 case KEY_fork:
4435 FUN0(OP_FORK);
4436
4437 case KEY_fcntl:
4438 LOP(OP_FCNTL,XTERM);
4439
4440 case KEY_fileno:
4441 UNI(OP_FILENO);
4442
4443 case KEY_flock:
4444 LOP(OP_FLOCK,XTERM);
4445
4446 case KEY_gt:
4447 Rop(OP_SGT);
4448
4449 case KEY_ge:
4450 Rop(OP_SGE);
4451
4452 case KEY_grep:
4453 LOP(OP_GREPSTART, XREF);
4454
4455 case KEY_goto:
4456 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4457 LOOPX(OP_GOTO);
4458
4459 case KEY_gmtime:
4460 UNI(OP_GMTIME);
4461
4462 case KEY_getc:
4463 UNI(OP_GETC);
4464
4465 case KEY_getppid:
4466 FUN0(OP_GETPPID);
4467
4468 case KEY_getpgrp:
4469 UNI(OP_GETPGRP);
4470
4471 case KEY_getpriority:
4472 LOP(OP_GETPRIORITY,XTERM);
4473
4474 case KEY_getprotobyname:
4475 UNI(OP_GPBYNAME);
4476
4477 case KEY_getprotobynumber:
4478 LOP(OP_GPBYNUMBER,XTERM);
4479
4480 case KEY_getprotoent:
4481 FUN0(OP_GPROTOENT);
4482
4483 case KEY_getpwent:
4484 FUN0(OP_GPWENT);
4485
4486 case KEY_getpwnam:
4487 UNI(OP_GPWNAM);
4488
4489 case KEY_getpwuid:
4490 UNI(OP_GPWUID);
4491
4492 case KEY_getpeername:
4493 UNI(OP_GETPEERNAME);
4494
4495 case KEY_gethostbyname:
4496 UNI(OP_GHBYNAME);
4497
4498 case KEY_gethostbyaddr:
4499 LOP(OP_GHBYADDR,XTERM);
4500
4501 case KEY_gethostent:
4502 FUN0(OP_GHOSTENT);
4503
4504 case KEY_getnetbyname:
4505 UNI(OP_GNBYNAME);
4506
4507 case KEY_getnetbyaddr:
4508 LOP(OP_GNBYADDR,XTERM);
4509
4510 case KEY_getnetent:
4511 FUN0(OP_GNETENT);
4512
4513 case KEY_getservbyname:
4514 LOP(OP_GSBYNAME,XTERM);
4515
4516 case KEY_getservbyport:
4517 LOP(OP_GSBYPORT,XTERM);
4518
4519 case KEY_getservent:
4520 FUN0(OP_GSERVENT);
4521
4522 case KEY_getsockname:
4523 UNI(OP_GETSOCKNAME);
4524
4525 case KEY_getsockopt:
4526 LOP(OP_GSOCKOPT,XTERM);
4527
4528 case KEY_getgrent:
4529 FUN0(OP_GGRENT);
4530
4531 case KEY_getgrnam:
4532 UNI(OP_GGRNAM);
4533
4534 case KEY_getgrgid:
4535 UNI(OP_GGRGID);
4536
4537 case KEY_getlogin:
4538 FUN0(OP_GETLOGIN);
4539
4540 case KEY_glob:
4541 set_csh();
4542 LOP(OP_GLOB,XTERM);
4543
4544 case KEY_hex:
4545 UNI(OP_HEX);
4546
4547 case KEY_if:
4548 yylval.ival = CopLINE(PL_curcop);
4549 OPERATOR(IF);
4550
4551 case KEY_index:
4552 LOP(OP_INDEX,XTERM);
4553
4554 case KEY_int:
4555 UNI(OP_INT);
4556
4557 case KEY_ioctl:
4558 LOP(OP_IOCTL,XTERM);
4559
4560 case KEY_join:
4561 LOP(OP_JOIN,XTERM);
4562
4563 case KEY_keys:
4564 UNI(OP_KEYS);
4565
4566 case KEY_kill:
4567 LOP(OP_KILL,XTERM);
4568
4569 case KEY_last:
4570 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4571 LOOPX(OP_LAST);
4572
4573 case KEY_lc:
4574 UNI(OP_LC);
4575
4576 case KEY_lcfirst:
4577 UNI(OP_LCFIRST);
4578
4579 case KEY_local:
4580 yylval.ival = 0;
4581 OPERATOR(LOCAL);
4582
4583 case KEY_length:
4584 UNI(OP_LENGTH);
4585
4586 case KEY_lt:
4587 Rop(OP_SLT);
4588
4589 case KEY_le:
4590 Rop(OP_SLE);
4591
4592 case KEY_localtime:
4593 UNI(OP_LOCALTIME);
4594
4595 case KEY_log:
4596 UNI(OP_LOG);
4597
4598 case KEY_link:
4599 LOP(OP_LINK,XTERM);
4600
4601 case KEY_listen:
4602 LOP(OP_LISTEN,XTERM);
4603
4604 case KEY_lock:
4605 UNI(OP_LOCK);
4606
4607 case KEY_lstat:
4608 UNI(OP_LSTAT);
4609
4610 case KEY_m:
4611 s = scan_pat(s,OP_MATCH);
4612 TERM(sublex_start());
4613
4614 case KEY_map:
4615 LOP(OP_MAPSTART, XREF);
4616
4617 case KEY_mkdir:
4618 LOP(OP_MKDIR,XTERM);
4619
4620 case KEY_msgctl:
4621 LOP(OP_MSGCTL,XTERM);
4622
4623 case KEY_msgget:
4624 LOP(OP_MSGGET,XTERM);
4625
4626 case KEY_msgrcv:
4627 LOP(OP_MSGRCV,XTERM);
4628
4629 case KEY_msgsnd:
4630 LOP(OP_MSGSND,XTERM);
4631
4632 case KEY_our:
4633 case KEY_my:
4634 PL_in_my = tmp;
4635 s = skipspace(s);
4636 if (isIDFIRST_lazy_if(s,UTF)) {
4637 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
4638 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
4639 goto really_sub;
4640 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
4641 if (!PL_in_my_stash) {
4642 char tmpbuf[1024];
4643 PL_bufptr = s;
4644 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
4645 yyerror(tmpbuf);
4646 }
4647 }
4648 yylval.ival = 1;
4649 OPERATOR(MY);
4650
4651 case KEY_next:
4652 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4653 LOOPX(OP_NEXT);
4654
4655 case KEY_ne:
4656 Eop(OP_SNE);
4657
4658 case KEY_no:
4659 if (PL_expect != XSTATE)
4660 yyerror("\"no\" not allowed in expression");
4661 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4662 s = force_version(s, FALSE);
4663 yylval.ival = 0;
4664 OPERATOR(USE);
4665
4666 case KEY_not:
4667 if (*s == '(' || (s = skipspace(s), *s == '('))
4668 FUN1(OP_NOT);
4669 else
4670 OPERATOR(NOTOP);
4671
4672 case KEY_open:
4673 s = skipspace(s);
4674 if (isIDFIRST_lazy_if(s,UTF)) {
4675 char *t;
4676 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
4677 for (t=d; *t && isSPACE(*t); t++) ;
4678 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
4679 /* [perl #16184] */
4680 && !(t[0] == '=' && t[1] == '>')
4681 ) {
4682 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
4683 "Precedence problem: open %.*s should be open(%.*s)",
4684 d - s, s, d - s, s);
4685 }
4686 }
4687 LOP(OP_OPEN,XTERM);
4688
4689 case KEY_or:
4690 yylval.ival = OP_OR;
4691 OPERATOR(OROP);
4692
4693 case KEY_ord:
4694 UNI(OP_ORD);
4695
4696 case KEY_oct:
4697 UNI(OP_OCT);
4698
4699 case KEY_opendir:
4700 LOP(OP_OPEN_DIR,XTERM);
4701
4702 case KEY_print:
4703 checkcomma(s,PL_tokenbuf,"filehandle");
4704 LOP(OP_PRINT,XREF);
4705
4706 case KEY_printf:
4707 checkcomma(s,PL_tokenbuf,"filehandle");
4708 LOP(OP_PRTF,XREF);
4709
4710 case KEY_prototype:
4711 UNI(OP_PROTOTYPE);
4712
4713 case KEY_push:
4714 LOP(OP_PUSH,XTERM);
4715
4716 case KEY_pop:
4717 UNI(OP_POP);
4718
4719 case KEY_pos:
4720 UNI(OP_POS);
4721
4722 case KEY_pack:
4723 LOP(OP_PACK,XTERM);
4724
4725 case KEY_package:
4726 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4727 OPERATOR(PACKAGE);
4728
4729 case KEY_pipe:
4730 LOP(OP_PIPE_OP,XTERM);
4731
4732 case KEY_q:
4733 s = scan_str(s,FALSE,FALSE);
4734 if (!s)
4735 missingterm((char*)0);
4736 yylval.ival = OP_CONST;
4737 TERM(sublex_start());
4738
4739 case KEY_quotemeta:
4740 UNI(OP_QUOTEMETA);
4741
4742 case KEY_qw:
4743 s = scan_str(s,FALSE,FALSE);
4744 if (!s)
4745 missingterm((char*)0);
4746 force_next(')');
4747 if (SvCUR(PL_lex_stuff)) {
4748 OP *words = Nullop;
4749 int warned = 0;
4750 d = SvPV_force(PL_lex_stuff, len);
4751 while (len) {
4752 SV *sv;
4753 for (; isSPACE(*d) && len; --len, ++d) ;
4754 if (len) {
4755 char *b = d;
4756 if (!warned && ckWARN(WARN_QW)) {
4757 for (; !isSPACE(*d) && len; --len, ++d) {
4758 if (*d == ',') {
4759 Perl_warner(aTHX_ packWARN(WARN_QW),
4760 "Possible attempt to separate words with commas");
4761 ++warned;
4762 }
4763 else if (*d == '#') {
4764 Perl_warner(aTHX_ packWARN(WARN_QW),
4765 "Possible attempt to put comments in qw() list");
4766 ++warned;
4767 }
4768 }
4769 }
4770 else {
4771 for (; !isSPACE(*d) && len; --len, ++d) ;
4772 }
4773 sv = newSVpvn(b, d-b);
4774 if (DO_UTF8(PL_lex_stuff))
4775 SvUTF8_on(sv);
4776 words = append_elem(OP_LIST, words,
4777 newSVOP(OP_CONST, 0, tokeq(sv)));
4778 }
4779 }
4780 if (words) {
4781 PL_nextval[PL_nexttoke].opval = words;
4782 force_next(THING);
4783 }
4784 }
4785 if (PL_lex_stuff) {
4786 SvREFCNT_dec(PL_lex_stuff);
4787 PL_lex_stuff = Nullsv;
4788 }
4789 PL_expect = XTERM;
4790 TOKEN('(');
4791
4792 case KEY_qq:
4793 s = scan_str(s,FALSE,FALSE);
4794 if (!s)
4795 missingterm((char*)0);
4796 yylval.ival = OP_STRINGIFY;
4797 if (SvIVX(PL_lex_stuff) == '\'')
4798 SvIVX(PL_lex_stuff) = 0; /* qq'$foo' should intepolate */
4799 TERM(sublex_start());
4800
4801 case KEY_qr:
4802 s = scan_pat(s,OP_QR);
4803 TERM(sublex_start());
4804
4805 case KEY_qx:
4806 s = scan_str(s,FALSE,FALSE);
4807 if (!s)
4808 missingterm((char*)0);
4809 yylval.ival = OP_BACKTICK;
4810 set_csh();
4811 TERM(sublex_start());
4812
4813 case KEY_return:
4814 OLDLOP(OP_RETURN);
4815
4816 case KEY_require:
4817 s = skipspace(s);
4818 if (isDIGIT(*s)) {
4819 s = force_version(s, FALSE);
4820 }
4821 else if (*s != 'v' || !isDIGIT(s[1])
4822 || (s = force_version(s, TRUE), *s == 'v'))
4823 {
4824 *PL_tokenbuf = '\0';
4825 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4826 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
4827 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
4828 else if (*s == '<')
4829 yyerror("<> should be quotes");
4830 }
4831 UNI(OP_REQUIRE);
4832
4833 case KEY_reset:
4834 UNI(OP_RESET);
4835
4836 case KEY_redo:
4837 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4838 LOOPX(OP_REDO);
4839
4840 case KEY_rename:
4841 LOP(OP_RENAME,XTERM);
4842
4843 case KEY_rand:
4844 UNI(OP_RAND);
4845
4846 case KEY_rmdir:
4847 UNI(OP_RMDIR);
4848
4849 case KEY_rindex:
4850 LOP(OP_RINDEX,XTERM);
4851
4852 case KEY_read:
4853 LOP(OP_READ,XTERM);
4854
4855 case KEY_readdir:
4856 UNI(OP_READDIR);
4857
4858 case KEY_readline:
4859 set_csh();
4860 UNI(OP_READLINE);
4861
4862 case KEY_readpipe:
4863 set_csh();
4864 UNI(OP_BACKTICK);
4865
4866 case KEY_rewinddir:
4867 UNI(OP_REWINDDIR);
4868
4869 case KEY_recv:
4870 LOP(OP_RECV,XTERM);
4871
4872 case KEY_reverse:
4873 LOP(OP_REVERSE,XTERM);
4874
4875 case KEY_readlink:
4876 UNI(OP_READLINK);
4877
4878 case KEY_ref:
4879 UNI(OP_REF);
4880
4881 case KEY_s:
4882 s = scan_subst(s);
4883 if (yylval.opval)
4884 TERM(sublex_start());
4885 else
4886 TOKEN(1); /* force error */
4887
4888 case KEY_chomp:
4889 UNI(OP_CHOMP);
4890
4891 case KEY_scalar:
4892 UNI(OP_SCALAR);
4893
4894 case KEY_select:
4895 LOP(OP_SELECT,XTERM);
4896
4897 case KEY_seek:
4898 LOP(OP_SEEK,XTERM);
4899
4900 case KEY_semctl:
4901 LOP(OP_SEMCTL,XTERM);
4902
4903 case KEY_semget:
4904 LOP(OP_SEMGET,XTERM);
4905
4906 case KEY_semop:
4907 LOP(OP_SEMOP,XTERM);
4908
4909 case KEY_send:
4910 LOP(OP_SEND,XTERM);
4911
4912 case KEY_setpgrp:
4913 LOP(OP_SETPGRP,XTERM);
4914
4915 case KEY_setpriority:
4916 LOP(OP_SETPRIORITY,XTERM);
4917
4918 case KEY_sethostent:
4919 UNI(OP_SHOSTENT);
4920
4921 case KEY_setnetent:
4922 UNI(OP_SNETENT);
4923
4924 case KEY_setservent:
4925 UNI(OP_SSERVENT);
4926
4927 case KEY_setprotoent:
4928 UNI(OP_SPROTOENT);
4929
4930 case KEY_setpwent:
4931 FUN0(OP_SPWENT);
4932
4933 case KEY_setgrent:
4934 FUN0(OP_SGRENT);
4935
4936 case KEY_seekdir:
4937 LOP(OP_SEEKDIR,XTERM);
4938
4939 case KEY_setsockopt:
4940 LOP(OP_SSOCKOPT,XTERM);
4941
4942 case KEY_shift:
4943 UNI(OP_SHIFT);
4944
4945 case KEY_shmctl:
4946 LOP(OP_SHMCTL,XTERM);
4947
4948 case KEY_shmget:
4949 LOP(OP_SHMGET,XTERM);
4950
4951 case KEY_shmread:
4952 LOP(OP_SHMREAD,XTERM);
4953
4954 case KEY_shmwrite:
4955 LOP(OP_SHMWRITE,XTERM);
4956
4957 case KEY_shutdown:
4958 LOP(OP_SHUTDOWN,XTERM);
4959
4960 case KEY_sin:
4961 UNI(OP_SIN);
4962
4963 case KEY_sleep:
4964 UNI(OP_SLEEP);
4965
4966 case KEY_socket:
4967 LOP(OP_SOCKET,XTERM);
4968
4969 case KEY_socketpair:
4970 LOP(OP_SOCKPAIR,XTERM);
4971
4972 case KEY_sort:
4973 checkcomma(s,PL_tokenbuf,"subroutine name");
4974 s = skipspace(s);
4975 if (*s == ';' || *s == ')') /* probably a close */
4976 Perl_croak(aTHX_ "sort is now a reserved word");
4977 PL_expect = XTERM;
4978 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4979 LOP(OP_SORT,XREF);
4980
4981 case KEY_split:
4982 LOP(OP_SPLIT,XTERM);
4983
4984 case KEY_sprintf:
4985 LOP(OP_SPRINTF,XTERM);
4986
4987 case KEY_splice:
4988 LOP(OP_SPLICE,XTERM);
4989
4990 case KEY_sqrt:
4991 UNI(OP_SQRT);
4992
4993 case KEY_srand:
4994 UNI(OP_SRAND);
4995
4996 case KEY_stat:
4997 UNI(OP_STAT);
4998
4999 case KEY_study:
5000 UNI(OP_STUDY);
5001
5002 case KEY_substr:
5003 LOP(OP_SUBSTR,XTERM);
5004
5005 case KEY_format:
5006 case KEY_sub:
5007 really_sub:
5008 {
5009 char tmpbuf[sizeof PL_tokenbuf];
5010 SSize_t tboffset = 0;
5011 expectation attrful;
5012 bool have_name, have_proto, bad_proto;
5013 int key = tmp;
5014
5015 s = skipspace(s);
5016
5017 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5018 (*s == ':' && s[1] == ':'))
5019 {
5020 PL_expect = XBLOCK;
5021 attrful = XATTRBLOCK;
5022 /* remember buffer pos'n for later force_word */
5023 tboffset = s - PL_oldbufptr;
5024 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5025 if (strchr(tmpbuf, ':'))
5026 sv_setpv(PL_subname, tmpbuf);
5027 else {
5028 sv_setsv(PL_subname,PL_curstname);
5029 sv_catpvn(PL_subname,"::",2);
5030 sv_catpvn(PL_subname,tmpbuf,len);
5031 }
5032 s = skipspace(d);
5033 have_name = TRUE;
5034 }
5035 else {
5036 if (key == KEY_my)
5037 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5038 PL_expect = XTERMBLOCK;
5039 attrful = XATTRTERM;
5040 sv_setpv(PL_subname,"?");
5041 have_name = FALSE;
5042 }
5043
5044 if (key == KEY_format) {
5045 if (*s == '=')
5046 PL_lex_formbrack = PL_lex_brackets + 1;
5047 if (have_name)
5048 (void) force_word(PL_oldbufptr + tboffset, WORD,
5049 FALSE, TRUE, TRUE);
5050 OPERATOR(FORMAT);
5051 }
5052
5053 /* Look for a prototype */
5054 if (*s == '(') {
5055 char *p;
5056
5057 s = scan_str(s,FALSE,FALSE);
5058 if (!s)
5059 Perl_croak(aTHX_ "Prototype not terminated");
5060 /* strip spaces and check for bad characters */
5061 d = SvPVX(PL_lex_stuff);
5062 tmp = 0;
5063 bad_proto = FALSE;
5064 for (p = d; *p; ++p) {
5065 if (!isSPACE(*p)) {
5066 d[tmp++] = *p;
5067 if (!strchr("$@%*;[]&\\", *p))
5068 bad_proto = TRUE;
5069 }
5070 }
5071 d[tmp] = '\0';
5072 if (bad_proto && ckWARN(WARN_SYNTAX))
5073 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5074 "Illegal character in prototype for %"SVf" : %s",
5075 PL_subname, d);
5076 SvCUR(PL_lex_stuff) = tmp;
5077 have_proto = TRUE;
5078
5079 s = skipspace(s);
5080 }
5081 else
5082 have_proto = FALSE;
5083
5084 if (*s == ':' && s[1] != ':')
5085 PL_expect = attrful;
5086 else if (!have_name && *s != '{' && key == KEY_sub)
5087 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5088
5089 if (have_proto) {
5090 PL_nextval[PL_nexttoke].opval =
5091 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5092 PL_lex_stuff = Nullsv;
5093 force_next(THING);
5094 }
5095 if (!have_name) {
5096 sv_setpv(PL_subname,
5097 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
5098 TOKEN(ANONSUB);
5099 }
5100 (void) force_word(PL_oldbufptr + tboffset, WORD,
5101 FALSE, TRUE, TRUE);
5102 if (key == KEY_my)
5103 TOKEN(MYSUB);
5104 TOKEN(SUB);
5105 }
5106
5107 case KEY_system:
5108 set_csh();
5109 LOP(OP_SYSTEM,XREF);
5110
5111 case KEY_symlink:
5112 LOP(OP_SYMLINK,XTERM);
5113
5114 case KEY_syscall:
5115 LOP(OP_SYSCALL,XTERM);
5116
5117 case KEY_sysopen:
5118 LOP(OP_SYSOPEN,XTERM);
5119
5120 case KEY_sysseek:
5121 LOP(OP_SYSSEEK,XTERM);
5122
5123 case KEY_sysread:
5124 LOP(OP_SYSREAD,XTERM);
5125
5126 case KEY_syswrite:
5127 LOP(OP_SYSWRITE,XTERM);
5128
5129 case KEY_tr:
5130 s = scan_trans(s);
5131 TERM(sublex_start());
5132
5133 case KEY_tell:
5134 UNI(OP_TELL);
5135
5136 case KEY_telldir:
5137 UNI(OP_TELLDIR);
5138
5139 case KEY_tie:
5140 LOP(OP_TIE,XTERM);
5141
5142 case KEY_tied:
5143 UNI(OP_TIED);
5144
5145 case KEY_time:
5146 FUN0(OP_TIME);
5147
5148 case KEY_times:
5149 FUN0(OP_TMS);
5150
5151 case KEY_truncate:
5152 LOP(OP_TRUNCATE,XTERM);
5153
5154 case KEY_uc:
5155 UNI(OP_UC);
5156
5157 case KEY_ucfirst:
5158 UNI(OP_UCFIRST);
5159
5160 case KEY_untie:
5161 UNI(OP_UNTIE);
5162
5163 case KEY_until:
5164 yylval.ival = CopLINE(PL_curcop);
5165 OPERATOR(UNTIL);
5166
5167 case KEY_unless:
5168 yylval.ival = CopLINE(PL_curcop);
5169 OPERATOR(UNLESS);
5170
5171 case KEY_unlink:
5172 LOP(OP_UNLINK,XTERM);
5173
5174 case KEY_undef:
5175 UNI(OP_UNDEF);
5176
5177 case KEY_unpack:
5178 LOP(OP_UNPACK,XTERM);
5179
5180 case KEY_utime:
5181 LOP(OP_UTIME,XTERM);
5182
5183 case KEY_umask:
5184 UNI(OP_UMASK);
5185
5186 case KEY_unshift:
5187 LOP(OP_UNSHIFT,XTERM);
5188
5189 case KEY_use:
5190 if (PL_expect != XSTATE)
5191 yyerror("\"use\" not allowed in expression");
5192 s = skipspace(s);
5193 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
5194 s = force_version(s, TRUE);
5195 if (*s == ';' || (s = skipspace(s), *s == ';')) {
5196 PL_nextval[PL_nexttoke].opval = Nullop;
5197 force_next(WORD);
5198 }
5199 else if (*s == 'v') {
5200 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5201 s = force_version(s, FALSE);
5202 }
5203 }
5204 else {
5205 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5206 s = force_version(s, FALSE);
5207 }
5208 yylval.ival = 1;
5209 OPERATOR(USE);
5210
5211 case KEY_values:
5212 UNI(OP_VALUES);
5213
5214 case KEY_vec:
5215 LOP(OP_VEC,XTERM);
5216
5217 case KEY_while:
5218 yylval.ival = CopLINE(PL_curcop);
5219 OPERATOR(WHILE);
5220
5221 case KEY_warn:
5222 PL_hints |= HINT_BLOCK_SCOPE;
5223 LOP(OP_WARN,XTERM);
5224
5225 case KEY_wait:
5226 FUN0(OP_WAIT);
5227
5228 case KEY_waitpid:
5229 LOP(OP_WAITPID,XTERM);
5230
5231 case KEY_wantarray:
5232 FUN0(OP_WANTARRAY);
5233
5234 case KEY_write:
5235 #ifdef EBCDIC
5236 {
5237 char ctl_l[2];
5238 ctl_l[0] = toCTRL('L');
5239 ctl_l[1] = '\0';
5240 gv_fetchpv(ctl_l,TRUE, SVt_PV);
5241 }
5242 #else
5243 gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
5244 #endif
5245 UNI(OP_ENTERWRITE);
5246
5247 case KEY_x:
5248 if (PL_expect == XOPERATOR)
5249 Mop(OP_REPEAT);
5250 check_uni();
5251 goto just_a_word;
5252
5253 case KEY_xor:
5254 yylval.ival = OP_XOR;
5255 OPERATOR(OROP);
5256
5257 case KEY_y:
5258 s = scan_trans(s);
5259 TERM(sublex_start());
5260 }
5261 }}
5262 }
5263 #ifdef __SC__
5264 #pragma segment Main
5265 #endif
5266
5267 static int
5268 S_pending_ident(pTHX)
5269 {
5270 register char *d;
5271 register I32 tmp = 0;
5272 /* pit holds the identifier we read and pending_ident is reset */
5273 char pit = PL_pending_ident;
5274 PL_pending_ident = 0;
5275
5276 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5277 "### Tokener saw identifier '%s'\n", PL_tokenbuf); });
5278
5279 /* if we're in a my(), we can't allow dynamics here.
5280 $foo'bar has already been turned into $foo::bar, so
5281 just check for colons.
5282
5283 if it's a legal name, the OP is a PADANY.
5284 */
5285 if (PL_in_my) {
5286 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5287 if (strchr(PL_tokenbuf,':'))
5288 yyerror(Perl_form(aTHX_ "No package name allowed for "
5289 "variable %s in \"our\"",
5290 PL_tokenbuf));
5291 tmp = allocmy(PL_tokenbuf);
5292 }
5293 else {
5294 if (strchr(PL_tokenbuf,':'))
5295 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5296
5297 yylval.opval = newOP(OP_PADANY, 0);
5298 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5299 return PRIVATEREF;
5300 }
5301 }
5302
5303 /*
5304 build the ops for accesses to a my() variable.
5305
5306 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5307 then used in a comparison. This catches most, but not
5308 all cases. For instance, it catches
5309 sort { my($a); $a <=> $b }
5310 but not
5311 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5312 (although why you'd do that is anyone's guess).
5313 */
5314
5315 if (!strchr(PL_tokenbuf,':')) {
5316 #ifdef USE_5005THREADS
5317 /* Check for single character per-thread SVs */
5318 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
5319 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
5320 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
5321 {
5322 yylval.opval = newOP(OP_THREADSV, 0);
5323 yylval.opval->op_targ = tmp;
5324 return PRIVATEREF;
5325 }
5326 #endif /* USE_5005THREADS */
5327 if (!PL_in_my)
5328 tmp = pad_findmy(PL_tokenbuf);
5329 if (tmp != NOT_IN_PAD) {
5330 /* might be an "our" variable" */
5331 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
5332 /* build ops for a bareword */
5333 SV *sym = newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)), 0);
5334 sv_catpvn(sym, "::", 2);
5335 sv_catpv(sym, PL_tokenbuf+1);
5336 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5337 yylval.opval->op_private = OPpCONST_ENTERED;
5338 gv_fetchpv(SvPVX(sym),
5339 (PL_in_eval
5340 ? (GV_ADDMULTI | GV_ADDINEVAL)
5341 : GV_ADDMULTI
5342 ),
5343 ((PL_tokenbuf[0] == '$') ? SVt_PV
5344 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5345 : SVt_PVHV));
5346 return WORD;
5347 }
5348
5349 /* if it's a sort block and they're naming $a or $b */
5350 if (PL_last_lop_op == OP_SORT &&
5351 PL_tokenbuf[0] == '$' &&
5352 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5353 && !PL_tokenbuf[2])
5354 {
5355 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5356 d < PL_bufend && *d != '\n';
5357 d++)
5358 {
5359 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5360 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5361 PL_tokenbuf);
5362 }
5363 }
5364 }
5365
5366 yylval.opval = newOP(OP_PADANY, 0);
5367 yylval.opval->op_targ = tmp;
5368 return PRIVATEREF;
5369 }
5370 }
5371
5372 /*
5373 Whine if they've said @foo in a doublequoted string,
5374 and @foo isn't a variable we can find in the symbol
5375 table.
5376 */
5377 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5378 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
5379 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5380 && ckWARN(WARN_AMBIGUOUS))
5381 {
5382 /* Downgraded from fatal to warning 20000522 mjd */
5383 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5384 "Possible unintended interpolation of %s in string",
5385 PL_tokenbuf);
5386 }
5387 }
5388
5389 /* build ops for a bareword */
5390 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5391 yylval.opval->op_private = OPpCONST_ENTERED;
5392 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
5393 ((PL_tokenbuf[0] == '$') ? SVt_PV
5394 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5395 : SVt_PVHV));
5396 return WORD;
5397 }
5398
5399 I32
5400 Perl_keyword(pTHX_ register char *d, I32 len)
5401 {
5402 switch (*d) {
5403 case '_':
5404 if (d[1] == '_') {
5405 if (strEQ(d,"__FILE__")) return -KEY___FILE__;
5406 if (strEQ(d,"__LINE__")) return -KEY___LINE__;
5407 if (strEQ(d,"__PACKAGE__")) return -KEY___PACKAGE__;
5408 if (strEQ(d,"__DATA__")) return KEY___DATA__;
5409 if (strEQ(d,"__END__")) return KEY___END__;
5410 }
5411 break;
5412 case 'A':
5413 if (strEQ(d,"AUTOLOAD")) return KEY_AUTOLOAD;
5414 break;
5415 case 'a':
5416 switch (len) {
5417 case 3:
5418 if (strEQ(d,"and")) return -KEY_and;
5419 if (strEQ(d,"abs")) return -KEY_abs;
5420 break;
5421 case 5:
5422 if (strEQ(d,"alarm")) return -KEY_alarm;
5423 if (strEQ(d,"atan2")) return -KEY_atan2;
5424 break;
5425 case 6:
5426 if (strEQ(d,"accept")) return -KEY_accept;
5427 break;
5428 }
5429 break;
5430 case 'B':
5431 if (strEQ(d,"BEGIN")) return KEY_BEGIN;
5432 break;
5433 case 'b':
5434 if (strEQ(d,"bless")) return -KEY_bless;
5435 if (strEQ(d,"bind")) return -KEY_bind;
5436 if (strEQ(d,"binmode")) return -KEY_binmode;
5437 break;
5438 case 'C':
5439 if (strEQ(d,"CORE")) return -KEY_CORE;
5440 if (strEQ(d,"CHECK")) return KEY_CHECK;
5441 break;
5442 case 'c':
5443 switch (len) {
5444 case 3:
5445 if (strEQ(d,"cmp")) return -KEY_cmp;
5446 if (strEQ(d,"chr")) return -KEY_chr;
5447 if (strEQ(d,"cos")) return -KEY_cos;
5448 break;
5449 case 4:
5450 if (strEQ(d,"chop")) return -KEY_chop;
5451 break;
5452 case 5:
5453 if (strEQ(d,"close")) return -KEY_close;
5454 if (strEQ(d,"chdir")) return -KEY_chdir;
5455 if (strEQ(d,"chomp")) return -KEY_chomp;
5456 if (strEQ(d,"chmod")) return -KEY_chmod;
5457 if (strEQ(d,"chown")) return -KEY_chown;
5458 if (strEQ(d,"crypt")) return -KEY_crypt;
5459 break;
5460 case 6:
5461 if (strEQ(d,"chroot")) return -KEY_chroot;
5462 if (strEQ(d,"caller")) return -KEY_caller;
5463 break;
5464 case 7:
5465 if (strEQ(d,"connect")) return -KEY_connect;
5466 break;
5467 case 8:
5468 if (strEQ(d,"closedir")) return -KEY_closedir;
5469 if (strEQ(d,"continue")) return -KEY_continue;
5470 break;
5471 }
5472 break;
5473 case 'D':
5474 if (strEQ(d,"DESTROY")) return KEY_DESTROY;
5475 break;
5476 case 'd':
5477 switch (len) {
5478 case 2:
5479 if (strEQ(d,"do")) return KEY_do;
5480 break;
5481 case 3:
5482 if (strEQ(d,"die")) return -KEY_die;
5483 break;
5484 case 4:
5485 if (strEQ(d,"dump")) return -KEY_dump;
5486 break;
5487 case 6:
5488 if (strEQ(d,"delete")) return KEY_delete;
5489 break;
5490 case 7:
5491 if (strEQ(d,"defined")) return KEY_defined;
5492 if (strEQ(d,"dbmopen")) return -KEY_dbmopen;
5493 break;
5494 case 8:
5495 if (strEQ(d,"dbmclose")) return -KEY_dbmclose;
5496 break;
5497 }
5498 break;
5499 case 'E':
5500 if (strEQ(d,"END")) return KEY_END;
5501 break;
5502 case 'e':
5503 switch (len) {
5504 case 2:
5505 if (strEQ(d,"eq")) return -KEY_eq;
5506 break;
5507 case 3:
5508 if (strEQ(d,"eof")) return -KEY_eof;
5509 if (strEQ(d,"exp")) return -KEY_exp;
5510 break;
5511 case 4:
5512 if (strEQ(d,"else")) return KEY_else;
5513 if (strEQ(d,"exit")) return -KEY_exit;
5514 if (strEQ(d,"eval")) return KEY_eval;
5515 if (strEQ(d,"exec")) return -KEY_exec;
5516 if (strEQ(d,"each")) return -KEY_each;
5517 break;
5518 case 5:
5519 if (strEQ(d,"elsif")) return KEY_elsif;
5520 break;
5521 case 6:
5522 if (strEQ(d,"exists")) return KEY_exists;
5523 if (strEQ(d,"elseif") && ckWARN_d(WARN_SYNTAX))
5524 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5525 "elseif should be elsif");
5526 break;
5527 case 8:
5528 if (strEQ(d,"endgrent")) return -KEY_endgrent;
5529 if (strEQ(d,"endpwent")) return -KEY_endpwent;
5530 break;
5531 case 9:
5532 if (strEQ(d,"endnetent")) return -KEY_endnetent;
5533 break;
5534 case 10:
5535 if (strEQ(d,"endhostent")) return -KEY_endhostent;
5536 if (strEQ(d,"endservent")) return -KEY_endservent;
5537 break;
5538 case 11:
5539 if (strEQ(d,"endprotoent")) return -KEY_endprotoent;
5540 break;
5541 }
5542 break;
5543 case 'f':
5544 switch (len) {
5545 case 3:
5546 if (strEQ(d,"for")) return KEY_for;
5547 break;
5548 case 4:
5549 if (strEQ(d,"fork")) return -KEY_fork;
5550 break;
5551 case 5:
5552 if (strEQ(d,"fcntl")) return -KEY_fcntl;
5553 if (strEQ(d,"flock")) return -KEY_flock;
5554 break;
5555 case 6:
5556 if (strEQ(d,"format")) return KEY_format;
5557 if (strEQ(d,"fileno")) return -KEY_fileno;
5558 break;
5559 case 7:
5560 if (strEQ(d,"foreach")) return KEY_foreach;
5561 break;
5562 case 8:
5563 if (strEQ(d,"formline")) return -KEY_formline;
5564 break;
5565 }
5566 break;
5567 case 'g':
5568 if (strnEQ(d,"get",3)) {
5569 d += 3;
5570 if (*d == 'p') {
5571 switch (len) {
5572 case 7:
5573 if (strEQ(d,"ppid")) return -KEY_getppid;
5574 if (strEQ(d,"pgrp")) return -KEY_getpgrp;
5575 break;
5576 case 8:
5577 if (strEQ(d,"pwent")) return -KEY_getpwent;
5578 if (strEQ(d,"pwnam")) return -KEY_getpwnam;
5579 if (strEQ(d,"pwuid")) return -KEY_getpwuid;
5580 break;
5581 case 11:
5582 if (strEQ(d,"peername")) return -KEY_getpeername;
5583 if (strEQ(d,"protoent")) return -KEY_getprotoent;
5584 if (strEQ(d,"priority")) return -KEY_getpriority;
5585 break;
5586 case 14:
5587 if (strEQ(d,"protobyname")) return -KEY_getprotobyname;
5588 break;
5589 case 16:
5590 if (strEQ(d,"protobynumber"))return -KEY_getprotobynumber;
5591 break;
5592 }
5593 }
5594 else if (*d == 'h') {
5595 if (strEQ(d,"hostbyname")) return -KEY_gethostbyname;
5596 if (strEQ(d,"hostbyaddr")) return -KEY_gethostbyaddr;
5597 if (strEQ(d,"hostent")) return -KEY_gethostent;
5598 }
5599 else if (*d == 'n') {
5600 if (strEQ(d,"netbyname")) return -KEY_getnetbyname;
5601 if (strEQ(d,"netbyaddr")) return -KEY_getnetbyaddr;
5602 if (strEQ(d,"netent")) return -KEY_getnetent;
5603 }
5604 else if (*d == 's') {
5605 if (strEQ(d,"servbyname")) return -KEY_getservbyname;
5606 if (strEQ(d,"servbyport")) return -KEY_getservbyport;
5607 if (strEQ(d,"servent")) return -KEY_getservent;
5608 if (strEQ(d,"sockname")) return -KEY_getsockname;
5609 if (strEQ(d,"sockopt")) return -KEY_getsockopt;
5610 }
5611 else if (*d == 'g') {
5612 if (strEQ(d,"grent")) return -KEY_getgrent;
5613 if (strEQ(d,"grnam")) return -KEY_getgrnam;
5614 if (strEQ(d,"grgid")) return -KEY_getgrgid;
5615 }
5616 else if (*d == 'l') {
5617 if (strEQ(d,"login")) return -KEY_getlogin;
5618 }
5619 else if (strEQ(d,"c")) return -KEY_getc;
5620 break;
5621 }
5622 switch (len) {
5623 case 2:
5624 if (strEQ(d,"gt")) return -KEY_gt;
5625 if (strEQ(d,"ge")) return -KEY_ge;
5626 break;
5627 case 4:
5628 if (strEQ(d,"grep")) return KEY_grep;
5629 if (strEQ(d,"goto")) return KEY_goto;
5630 if (strEQ(d,"glob")) return KEY_glob;
5631 break;
5632 case 6:
5633 if (strEQ(d,"gmtime")) return -KEY_gmtime;
5634 break;
5635 }
5636 break;
5637 case 'h':
5638 if (strEQ(d,"hex")) return -KEY_hex;
5639 break;
5640 case 'I':
5641 if (strEQ(d,"INIT")) return KEY_INIT;
5642 break;
5643 case 'i':
5644 switch (len) {
5645 case 2:
5646 if (strEQ(d,"if")) return KEY_if;
5647 break;
5648 case 3:
5649 if (strEQ(d,"int")) return -KEY_int;
5650 break;
5651 case 5:
5652 if (strEQ(d,"index")) return -KEY_index;
5653 if (strEQ(d,"ioctl")) return -KEY_ioctl;
5654 break;
5655 }
5656 break;
5657 case 'j':
5658 if (strEQ(d,"join")) return -KEY_join;
5659 break;
5660 case 'k':
5661 if (len == 4) {
5662 if (strEQ(d,"keys")) return -KEY_keys;
5663 if (strEQ(d,"kill")) return -KEY_kill;
5664 }
5665 break;
5666 case 'l':
5667 switch (len) {
5668 case 2:
5669 if (strEQ(d,"lt")) return -KEY_lt;
5670 if (strEQ(d,"le")) return -KEY_le;
5671 if (strEQ(d,"lc")) return -KEY_lc;
5672 break;
5673 case 3:
5674 if (strEQ(d,"log")) return -KEY_log;
5675 break;
5676 case 4:
5677 if (strEQ(d,"last")) return KEY_last;
5678 if (strEQ(d,"link")) return -KEY_link;
5679 if (strEQ(d,"lock")) return -KEY_lock;
5680 break;
5681 case 5:
5682 if (strEQ(d,"local")) return KEY_local;
5683 if (strEQ(d,"lstat")) return -KEY_lstat;
5684 break;
5685 case 6:
5686 if (strEQ(d,"length")) return -KEY_length;
5687 if (strEQ(d,"listen")) return -KEY_listen;
5688 break;
5689 case 7:
5690 if (strEQ(d,"lcfirst")) return -KEY_lcfirst;
5691 break;
5692 case 9:
5693 if (strEQ(d,"localtime")) return -KEY_localtime;
5694 break;
5695 }
5696 break;
5697 case 'm':
5698 switch (len) {
5699 case 1: return KEY_m;
5700 case 2:
5701 if (strEQ(d,"my")) return KEY_my;
5702 break;
5703 case 3:
5704 if (strEQ(d,"map")) return KEY_map;
5705 break;
5706 case 5:
5707 if (strEQ(d,"mkdir")) return -KEY_mkdir;
5708 break;
5709 case 6:
5710 if (strEQ(d,"msgctl")) return -KEY_msgctl;
5711 if (strEQ(d,"msgget")) return -KEY_msgget;
5712 if (strEQ(d,"msgrcv")) return -KEY_msgrcv;
5713 if (strEQ(d,"msgsnd")) return -KEY_msgsnd;
5714 break;
5715 }
5716 break;
5717 case 'n':
5718 if (strEQ(d,"next")) return KEY_next;
5719 if (strEQ(d,"ne")) return -KEY_ne;
5720 if (strEQ(d,"not")) return -KEY_not;
5721 if (strEQ(d,"no")) return KEY_no;
5722 break;
5723 case 'o':
5724 switch (len) {
5725 case 2:
5726 if (strEQ(d,"or")) return -KEY_or;
5727 break;
5728 case 3:
5729 if (strEQ(d,"ord")) return -KEY_ord;
5730 if (strEQ(d,"oct")) return -KEY_oct;
5731 if (strEQ(d,"our")) return KEY_our;
5732 break;
5733 case 4:
5734 if (strEQ(d,"open")) return -KEY_open;
5735 break;
5736 case 7:
5737 if (strEQ(d,"opendir")) return -KEY_opendir;
5738 break;
5739 }
5740 break;
5741 case 'p':
5742 switch (len) {
5743 case 3:
5744 if (strEQ(d,"pop")) return -KEY_pop;
5745 if (strEQ(d,"pos")) return KEY_pos;
5746 break;
5747 case 4:
5748 if (strEQ(d,"push")) return -KEY_push;
5749 if (strEQ(d,"pack")) return -KEY_pack;
5750 if (strEQ(d,"pipe")) return -KEY_pipe;
5751 break;
5752 case 5:
5753 if (strEQ(d,"print")) return KEY_print;
5754 break;
5755 case 6:
5756 if (strEQ(d,"printf")) return KEY_printf;
5757 break;
5758 case 7:
5759 if (strEQ(d,"package")) return KEY_package;
5760 break;
5761 case 9:
5762 if (strEQ(d,"prototype")) return KEY_prototype;
5763 }
5764 break;
5765 case 'q':
5766 if (len <= 2) {
5767 if (strEQ(d,"q")) return KEY_q;
5768 if (strEQ(d,"qr")) return KEY_qr;
5769 if (strEQ(d,"qq")) return KEY_qq;
5770 if (strEQ(d,"qw")) return KEY_qw;
5771 if (strEQ(d,"qx")) return KEY_qx;
5772 }
5773 else if (strEQ(d,"quotemeta")) return -KEY_quotemeta;
5774 break;
5775 case 'r':
5776 switch (len) {
5777 case 3:
5778 if (strEQ(d,"ref")) return -KEY_ref;
5779 break;
5780 case 4:
5781 if (strEQ(d,"read")) return -KEY_read;
5782 if (strEQ(d,"rand")) return -KEY_rand;
5783 if (strEQ(d,"recv")) return -KEY_recv;
5784 if (strEQ(d,"redo")) return KEY_redo;
5785 break;
5786 case 5:
5787 if (strEQ(d,"rmdir")) return -KEY_rmdir;
5788 if (strEQ(d,"reset")) return -KEY_reset;
5789 break;
5790 case 6:
5791 if (strEQ(d,"return")) return KEY_return;
5792 if (strEQ(d,"rename")) return -KEY_rename;
5793 if (strEQ(d,"rindex")) return -KEY_rindex;
5794 break;
5795 case 7:
5796 if (strEQ(d,"require")) return KEY_require;
5797 if (strEQ(d,"reverse")) return -KEY_reverse;
5798 if (strEQ(d,"readdir")) return -KEY_readdir;
5799 break;
5800 case 8:
5801 if (strEQ(d,"readlink")) return -KEY_readlink;
5802 if (strEQ(d,"readline")) return -KEY_readline;
5803 if (strEQ(d,"readpipe")) return -KEY_readpipe;
5804 break;
5805 case 9:
5806 if (strEQ(d,"rewinddir")) return -KEY_rewinddir;
5807 break;
5808 }
5809 break;
5810 case 's':
5811 switch (d[1]) {
5812 case 0: return KEY_s;
5813 case 'c':
5814 if (strEQ(d,"scalar")) return KEY_scalar;
5815 break;
5816 case 'e':
5817 switch (len) {
5818 case 4:
5819 if (strEQ(d,"seek")) return -KEY_seek;
5820 if (strEQ(d,"send")) return -KEY_send;
5821 break;
5822 case 5:
5823 if (strEQ(d,"semop")) return -KEY_semop;
5824 break;
5825 case 6:
5826 if (strEQ(d,"select")) return -KEY_select;
5827 if (strEQ(d,"semctl")) return -KEY_semctl;
5828 if (strEQ(d,"semget")) return -KEY_semget;
5829 break;
5830 case 7:
5831 if (strEQ(d,"setpgrp")) return -KEY_setpgrp;
5832 if (strEQ(d,"seekdir")) return -KEY_seekdir;
5833 break;
5834 case 8:
5835 if (strEQ(d,"setpwent")) return -KEY_setpwent;
5836 if (strEQ(d,"setgrent")) return -KEY_setgrent;
5837 break;
5838 case 9:
5839 if (strEQ(d,"setnetent")) return -KEY_setnetent;
5840 break;
5841 case 10:
5842 if (strEQ(d,"setsockopt")) return -KEY_setsockopt;
5843 if (strEQ(d,"sethostent")) return -KEY_sethostent;
5844 if (strEQ(d,"setservent")) return -KEY_setservent;
5845 break;
5846 case 11:
5847 if (strEQ(d,"setpriority")) return -KEY_setpriority;
5848 if (strEQ(d,"setprotoent")) return -KEY_setprotoent;
5849 break;
5850 }
5851 break;
5852 case 'h':
5853 switch (len) {
5854 case 5:
5855 if (strEQ(d,"shift")) return -KEY_shift;
5856 break;
5857 case 6:
5858 if (strEQ(d,"shmctl")) return -KEY_shmctl;
5859 if (strEQ(d,"shmget")) return -KEY_shmget;
5860 break;
5861 case 7:
5862 if (strEQ(d,"shmread")) return -KEY_shmread;
5863 break;
5864 case 8:
5865 if (strEQ(d,"shmwrite")) return -KEY_shmwrite;
5866 if (strEQ(d,"shutdown")) return -KEY_shutdown;
5867 break;
5868 }
5869 break;
5870 case 'i':
5871 if (strEQ(d,"sin")) return -KEY_sin;
5872 break;
5873 case 'l':
5874 if (strEQ(d,"sleep")) return -KEY_sleep;
5875 break;
5876 case 'o':
5877 if (strEQ(d,"sort")) return KEY_sort;
5878 if (strEQ(d,"socket")) return -KEY_socket;
5879 if (strEQ(d,"socketpair")) return -KEY_socketpair;
5880 break;
5881 case 'p':
5882 if (strEQ(d,"split")) return KEY_split;
5883 if (strEQ(d,"sprintf")) return -KEY_sprintf;
5884 if (strEQ(d,"splice")) return -KEY_splice;
5885 break;
5886 case 'q':
5887 if (strEQ(d,"sqrt")) return -KEY_sqrt;
5888 break;
5889 case 'r':
5890 if (strEQ(d,"srand")) return -KEY_srand;
5891 break;
5892 case 't':
5893 if (strEQ(d,"stat")) return -KEY_stat;
5894 if (strEQ(d,"study")) return KEY_study;
5895 break;
5896 case 'u':
5897 if (strEQ(d,"substr")) return -KEY_substr;
5898 if (strEQ(d,"sub")) return KEY_sub;
5899 break;
5900 case 'y':
5901 switch (len) {
5902 case 6:
5903 if (strEQ(d,"system")) return -KEY_system;
5904 break;
5905 case 7:
5906 if (strEQ(d,"symlink")) return -KEY_symlink;
5907 if (strEQ(d,"syscall")) return -KEY_syscall;
5908 if (strEQ(d,"sysopen")) return -KEY_sysopen;
5909 if (strEQ(d,"sysread")) return -KEY_sysread;
5910 if (strEQ(d,"sysseek")) return -KEY_sysseek;
5911 break;
5912 case 8:
5913 if (strEQ(d,"syswrite")) return -KEY_syswrite;
5914 break;
5915 }
5916 break;
5917 }
5918 break;
5919 case 't':
5920 switch (len) {
5921 case 2:
5922 if (strEQ(d,"tr")) return KEY_tr;
5923 break;
5924 case 3:
5925 if (strEQ(d,"tie")) return KEY_tie;
5926 break;
5927 case 4:
5928 if (strEQ(d,"tell")) return -KEY_tell;
5929 if (strEQ(d,"tied")) return KEY_tied;
5930 if (strEQ(d,"time")) return -KEY_time;
5931 break;
5932 case 5:
5933 if (strEQ(d,"times")) return -KEY_times;
5934 break;
5935 case 7:
5936 if (strEQ(d,"telldir")) return -KEY_telldir;
5937 break;
5938 case 8:
5939 if (strEQ(d,"truncate")) return -KEY_truncate;
5940 break;
5941 }
5942 break;
5943 case 'u':
5944 switch (len) {
5945 case 2:
5946 if (strEQ(d,"uc")) return -KEY_uc;
5947 break;
5948 case 3:
5949 if (strEQ(d,"use")) return KEY_use;
5950 break;
5951 case 5:
5952 if (strEQ(d,"undef")) return KEY_undef;
5953 if (strEQ(d,"until")) return KEY_until;
5954 if (strEQ(d,"untie")) return KEY_untie;
5955 if (strEQ(d,"utime")) return -KEY_utime;
5956 if (strEQ(d,"umask")) return -KEY_umask;
5957 break;
5958 case 6:
5959 if (strEQ(d,"unless")) return KEY_unless;
5960 if (strEQ(d,"unpack")) return -KEY_unpack;
5961 if (strEQ(d,"unlink")) return -KEY_unlink;
5962 break;
5963 case 7:
5964 if (strEQ(d,"unshift")) return -KEY_unshift;
5965 if (strEQ(d,"ucfirst")) return -KEY_ucfirst;
5966 break;
5967 }
5968 break;
5969 case 'v':
5970 if (strEQ(d,"values")) return -KEY_values;
5971 if (strEQ(d,"vec")) return -KEY_vec;
5972 break;
5973 case 'w':
5974 switch (len) {
5975 case 4:
5976 if (strEQ(d,"warn")) return -KEY_warn;
5977 if (strEQ(d,"wait")) return -KEY_wait;
5978 break;
5979 case 5:
5980 if (strEQ(d,"while")) return KEY_while;
5981 if (strEQ(d,"write")) return -KEY_write;
5982 break;
5983 case 7:
5984 if (strEQ(d,"waitpid")) return -KEY_waitpid;
5985 break;
5986 case 9:
5987 if (strEQ(d,"wantarray")) return -KEY_wantarray;
5988 break;
5989 }
5990 break;
5991 case 'x':
5992 if (len == 1) return -KEY_x;
5993 if (strEQ(d,"xor")) return -KEY_xor;
5994 break;
5995 case 'y':
5996 if (len == 1) return KEY_y;
5997 break;
5998 case 'z':
5999 break;
6000 }
6001 return 0;
6002 }
6003
6004 STATIC void
6005 S_checkcomma(pTHX_ register char *s, char *name, char *what)
6006 {
6007 char *w;
6008
6009 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
6010 if (ckWARN(WARN_SYNTAX)) {
6011 int level = 1;
6012 for (w = s+2; *w && level; w++) {
6013 if (*w == '(')
6014 ++level;
6015 else if (*w == ')')
6016 --level;
6017 }
6018 if (*w)
6019 for (; *w && isSPACE(*w); w++) ;
6020 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
6021 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6022 "%s (...) interpreted as function",name);
6023 }
6024 }
6025 while (s < PL_bufend && isSPACE(*s))
6026 s++;
6027 if (*s == '(')
6028 s++;
6029 while (s < PL_bufend && isSPACE(*s))
6030 s++;
6031 if (isIDFIRST_lazy_if(s,UTF)) {
6032 w = s++;
6033 while (isALNUM_lazy_if(s,UTF))
6034 s++;
6035 while (s < PL_bufend && isSPACE(*s))
6036 s++;
6037 if (*s == ',') {
6038 int kw;
6039 *s = '\0';
6040 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
6041 *s = ',';
6042 if (kw)
6043 return;
6044 Perl_croak(aTHX_ "No comma allowed after %s", what);
6045 }
6046 }
6047 }
6048
6049 /* Either returns sv, or mortalizes sv and returns a new SV*.
6050 Best used as sv=new_constant(..., sv, ...).
6051 If s, pv are NULL, calls subroutine with one argument,
6052 and type is used with error messages only. */
6053
6054 STATIC SV *
6055 S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
6056 const char *type)
6057 {
6058 dSP;
6059 HV *table = GvHV(PL_hintgv); /* ^H */
6060 SV *res;
6061 SV **cvp;
6062 SV *cv, *typesv;
6063 const char *why1, *why2, *why3;
6064
6065 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
6066 SV *msg;
6067
6068 why2 = strEQ(key,"charnames")
6069 ? "(possibly a missing \"use charnames ...\")"
6070 : "";
6071 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
6072 (type ? type: "undef"), why2);
6073
6074 /* This is convoluted and evil ("goto considered harmful")
6075 * but I do not understand the intricacies of all the different
6076 * failure modes of %^H in here. The goal here is to make
6077 * the most probable error message user-friendly. --jhi */
6078
6079 goto msgdone;
6080
6081 report:
6082 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
6083 (type ? type: "undef"), why1, why2, why3);
6084 msgdone:
6085 yyerror(SvPVX(msg));
6086 SvREFCNT_dec(msg);
6087 return sv;
6088 }
6089 cvp = hv_fetch(table, key, strlen(key), FALSE);
6090 if (!cvp || !SvOK(*cvp)) {
6091 why1 = "$^H{";
6092 why2 = key;
6093 why3 = "} is not defined";
6094 goto report;
6095 }
6096 sv_2mortal(sv); /* Parent created it permanently */
6097 cv = *cvp;
6098 if (!pv && s)
6099 pv = sv_2mortal(newSVpvn(s, len));
6100 if (type && pv)
6101 typesv = sv_2mortal(newSVpv(type, 0));
6102 else
6103 typesv = &PL_sv_undef;
6104
6105 PUSHSTACKi(PERLSI_OVERLOAD);
6106 ENTER ;
6107 SAVETMPS;
6108
6109 PUSHMARK(SP) ;
6110 EXTEND(sp, 3);
6111 if (pv)
6112 PUSHs(pv);
6113 PUSHs(sv);
6114 if (pv)
6115 PUSHs(typesv);
6116 PUTBACK;
6117 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
6118
6119 SPAGAIN ;
6120
6121 /* Check the eval first */
6122 if (!PL_in_eval && SvTRUE(ERRSV)) {
6123 STRLEN n_a;
6124 sv_catpv(ERRSV, "Propagated");
6125 yyerror(SvPV(ERRSV, n_a)); /* Duplicates the message inside eval */
6126 (void)POPs;
6127 res = SvREFCNT_inc(sv);
6128 }
6129 else {
6130 res = POPs;
6131 (void)SvREFCNT_inc(res);
6132 }
6133
6134 PUTBACK ;
6135 FREETMPS ;
6136 LEAVE ;
6137 POPSTACK;
6138
6139 if (!SvOK(res)) {
6140 why1 = "Call to &{$^H{";
6141 why2 = key;
6142 why3 = "}} did not return a defined value";
6143 sv = res;
6144 goto report;
6145 }
6146
6147 return res;
6148 }
6149
6150 STATIC char *
6151 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
6152 {
6153 register char *d = dest;
6154 register char *e = d + destlen - 3; /* two-character token, ending NUL */
6155 for (;;) {
6156 if (d >= e)
6157 Perl_croak(aTHX_ ident_too_long);
6158 if (isALNUM(*s)) /* UTF handled below */
6159 *d++ = *s++;
6160 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
6161 *d++ = ':';
6162 *d++ = ':';
6163 s++;
6164 }
6165 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
6166 *d++ = *s++;
6167 *d++ = *s++;
6168 }
6169 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
6170 char *t = s + UTF8SKIP(s);
6171 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
6172 t += UTF8SKIP(t);
6173 if (d + (t - s) > e)
6174 Perl_croak(aTHX_ ident_too_long);
6175 Copy(s, d, t - s, char);
6176 d += t - s;
6177 s = t;
6178 }
6179 else {
6180 *d = '\0';
6181 *slp = d - dest;
6182 return s;
6183 }
6184 }
6185 }
6186
6187 STATIC char *
6188 S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN destlen, I32 ck_uni)
6189 {
6190 register char *d;
6191 register char *e;
6192 char *bracket = 0;
6193 char funny = *s++;
6194
6195 if (isSPACE(*s))
6196 s = skipspace(s);
6197 d = dest;
6198 e = d + destlen - 3; /* two-character token, ending NUL */
6199 if (isDIGIT(*s)) {
6200 while (isDIGIT(*s)) {
6201 if (d >= e)
6202 Perl_croak(aTHX_ ident_too_long);
6203 *d++ = *s++;
6204 }
6205 }
6206 else {
6207 for (;;) {
6208 if (d >= e)
6209 Perl_croak(aTHX_ ident_too_long);
6210 if (isALNUM(*s)) /* UTF handled below */
6211 *d++ = *s++;
6212 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
6213 *d++ = ':';
6214 *d++ = ':';
6215 s++;
6216 }
6217 else if (*s == ':' && s[1] == ':') {
6218 *d++ = *s++;
6219 *d++ = *s++;
6220 }
6221 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
6222 char *t = s + UTF8SKIP(s);
6223 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
6224 t += UTF8SKIP(t);
6225 if (d + (t - s) > e)
6226 Perl_croak(aTHX_ ident_too_long);
6227 Copy(s, d, t - s, char);
6228 d += t - s;
6229 s = t;
6230 }
6231 else
6232 break;
6233 }
6234 }
6235 *d = '\0';
6236 d = dest;
6237 if (*d) {
6238 if (PL_lex_state != LEX_NORMAL)
6239 PL_lex_state = LEX_INTERPENDMAYBE;
6240 return s;
6241 }
6242 if (*s == '$' && s[1] &&
6243 (isALNUM_lazy_if(s+1,UTF) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
6244 {
6245 return s;
6246 }
6247 if (*s == '{') {
6248 bracket = s;
6249 s++;
6250 }
6251 else if (ck_uni)
6252 check_uni();
6253 if (s < send)
6254 *d = *s++;
6255 d[1] = '\0';
6256 if (*d == '^' && *s && isCONTROLVAR(*s)) {
6257 *d = toCTRL(*s);
6258 s++;
6259 }
6260 if (bracket) {
6261 if (isSPACE(s[-1])) {
6262 while (s < send) {
6263 char ch = *s++;
6264 if (!SPACE_OR_TAB(ch)) {
6265 *d = ch;
6266 break;
6267 }
6268 }
6269 }
6270 if (isIDFIRST_lazy_if(d,UTF)) {
6271 d++;
6272 if (UTF) {
6273 e = s;
6274 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
6275 e += UTF8SKIP(e);
6276 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
6277 e += UTF8SKIP(e);
6278 }
6279 Copy(s, d, e - s, char);
6280 d += e - s;
6281 s = e;
6282 }
6283 else {
6284 while ((isALNUM(*s) || *s == ':') && d < e)
6285 *d++ = *s++;
6286 if (d >= e)
6287 Perl_croak(aTHX_ ident_too_long);
6288 }
6289 *d = '\0';
6290 while (s < send && SPACE_OR_TAB(*s)) s++;
6291 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
6292 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
6293 const char *brack = *s == '[' ? "[...]" : "{...}";
6294 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6295 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
6296 funny, dest, brack, funny, dest, brack);
6297 }
6298 bracket++;
6299 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
6300 return s;
6301 }
6302 }
6303 /* Handle extended ${^Foo} variables
6304 * 1999-02-27 mjd-perl-patch@plover.com */
6305 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
6306 && isALNUM(*s))
6307 {
6308 d++;
6309 while (isALNUM(*s) && d < e) {
6310 *d++ = *s++;
6311 }
6312 if (d >= e)
6313 Perl_croak(aTHX_ ident_too_long);
6314 *d = '\0';
6315 }
6316 if (*s == '}') {
6317 s++;
6318 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
6319 PL_lex_state = LEX_INTERPEND;
6320 PL_expect = XREF;
6321 }
6322 if (funny == '#')
6323 funny = '@';
6324 if (PL_lex_state == LEX_NORMAL) {
6325 if (ckWARN(WARN_AMBIGUOUS) &&
6326 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
6327 {
6328 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6329 "Ambiguous use of %c{%s} resolved to %c%s",
6330 funny, dest, funny, dest);
6331 }
6332 }
6333 }
6334 else {
6335 s = bracket; /* let the parser handle it */
6336 *dest = '\0';
6337 }
6338 }
6339 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
6340 PL_lex_state = LEX_INTERPEND;
6341 return s;
6342 }
6343
6344 void
6345 Perl_pmflag(pTHX_ U32* pmfl, int ch)
6346 {
6347 if (ch == 'i')
6348 *pmfl |= PMf_FOLD;
6349 else if (ch == 'g')
6350 *pmfl |= PMf_GLOBAL;
6351 else if (ch == 'c')
6352 *pmfl |= PMf_CONTINUE;
6353 else if (ch == 'o')
6354 *pmfl |= PMf_KEEP;
6355 else if (ch == 'm')
6356 *pmfl |= PMf_MULTILINE;
6357 else if (ch == 's')
6358 *pmfl |= PMf_SINGLELINE;
6359 else if (ch == 'x')
6360 *pmfl |= PMf_EXTENDED;
6361 }
6362
6363 STATIC char *
6364 S_scan_pat(pTHX_ char *start, I32 type)
6365 {
6366 PMOP *pm;
6367 char *s;
6368
6369 s = scan_str(start,FALSE,FALSE);
6370 if (!s)
6371 Perl_croak(aTHX_ "Search pattern not terminated");
6372
6373 pm = (PMOP*)newPMOP(type, 0);
6374 if (PL_multi_open == '?')
6375 pm->op_pmflags |= PMf_ONCE;
6376 if(type == OP_QR) {
6377 while (*s && strchr("iomsx", *s))
6378 pmflag(&pm->op_pmflags,*s++);
6379 }
6380 else {
6381 while (*s && strchr("iogcmsx", *s))
6382 pmflag(&pm->op_pmflags,*s++);
6383 }
6384 /* issue a warning if /c is specified,but /g is not */
6385 if (ckWARN(WARN_REGEXP) &&
6386 (pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
6387 {
6388 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_without_g);
6389 }
6390
6391 pm->op_pmpermflags = pm->op_pmflags;
6392
6393 PL_lex_op = (OP*)pm;
6394 yylval.ival = OP_MATCH;
6395 return s;
6396 }
6397
6398 STATIC char *
6399 S_scan_subst(pTHX_ char *start)
6400 {
6401 register char *s;
6402 register PMOP *pm;
6403 I32 first_start;
6404 I32 es = 0;
6405
6406 yylval.ival = OP_NULL;
6407
6408 s = scan_str(start,FALSE,FALSE);
6409
6410 if (!s)
6411 Perl_croak(aTHX_ "Substitution pattern not terminated");
6412
6413 if (s[-1] == PL_multi_open)
6414 s--;
6415
6416 first_start = PL_multi_start;
6417 s = scan_str(s,FALSE,FALSE);
6418 if (!s) {
6419 if (PL_lex_stuff) {
6420 SvREFCNT_dec(PL_lex_stuff);
6421 PL_lex_stuff = Nullsv;
6422 }
6423 Perl_croak(aTHX_ "Substitution replacement not terminated");
6424 }
6425 PL_multi_start = first_start; /* so whole substitution is taken together */
6426
6427 pm = (PMOP*)newPMOP(OP_SUBST, 0);
6428 while (*s) {
6429 if (*s == 'e') {
6430 s++;
6431 es++;
6432 }
6433 else if (strchr("iogcmsx", *s))
6434 pmflag(&pm->op_pmflags,*s++);
6435 else
6436 break;
6437 }
6438
6439 /* /c is not meaningful with s/// */
6440 if (ckWARN(WARN_REGEXP) && (pm->op_pmflags & PMf_CONTINUE))
6441 {
6442 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_in_subst);
6443 }
6444
6445 if (es) {
6446 SV *repl;
6447 PL_sublex_info.super_bufptr = s;
6448 PL_sublex_info.super_bufend = PL_bufend;
6449 PL_multi_end = 0;
6450 pm->op_pmflags |= PMf_EVAL;
6451 repl = newSVpvn("",0);
6452 while (es-- > 0)
6453 sv_catpv(repl, es ? "eval " : "do ");
6454 sv_catpvn(repl, "{ ", 2);
6455 sv_catsv(repl, PL_lex_repl);
6456 sv_catpvn(repl, " };", 2);
6457 SvEVALED_on(repl);
6458 SvREFCNT_dec(PL_lex_repl);
6459 PL_lex_repl = repl;
6460 }
6461
6462 pm->op_pmpermflags = pm->op_pmflags;
6463 PL_lex_op = (OP*)pm;
6464 yylval.ival = OP_SUBST;
6465 return s;
6466 }
6467
6468 STATIC char *
6469 S_scan_trans(pTHX_ char *start)
6470 {
6471 register char* s;
6472 OP *o;
6473 short *tbl;
6474 I32 squash;
6475 I32 del;
6476 I32 complement;
6477
6478 yylval.ival = OP_NULL;
6479
6480 s = scan_str(start,FALSE,FALSE);
6481 if (!s)
6482 Perl_croak(aTHX_ "Transliteration pattern not terminated");
6483 if (s[-1] == PL_multi_open)
6484 s--;
6485
6486 s = scan_str(s,FALSE,FALSE);
6487 if (!s) {
6488 if (PL_lex_stuff) {
6489 SvREFCNT_dec(PL_lex_stuff);
6490 PL_lex_stuff = Nullsv;
6491 }
6492 Perl_croak(aTHX_ "Transliteration replacement not terminated");
6493 }
6494
6495 complement = del = squash = 0;
6496 while (strchr("cds", *s)) {
6497 if (*s == 'c')
6498 complement = OPpTRANS_COMPLEMENT;
6499 else if (*s == 'd')
6500 del = OPpTRANS_DELETE;
6501 else if (*s == 's')
6502 squash = OPpTRANS_SQUASH;
6503 s++;
6504 }
6505
6506 New(803, tbl, complement&&!del?258:256, short);
6507 o = newPVOP(OP_TRANS, 0, (char*)tbl);
6508 o->op_private = del|squash|complement|
6509 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
6510 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
6511
6512 PL_lex_op = o;
6513 yylval.ival = OP_TRANS;
6514 return s;
6515 }
6516
6517 STATIC char *
6518 S_scan_heredoc(pTHX_ register char *s)
6519 {
6520 SV *herewas;
6521 I32 op_type = OP_SCALAR;
6522 I32 len;
6523 SV *tmpstr;
6524 char term;
6525 register char *d;
6526 register char *e;
6527 char *peek;
6528 int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
6529
6530 s += 2;
6531 d = PL_tokenbuf;
6532 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
6533 if (!outer)
6534 *d++ = '\n';
6535 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
6536 if (*peek && strchr("`'\"",*peek)) {
6537 s = peek;
6538 term = *s++;
6539 s = delimcpy(d, e, s, PL_bufend, term, &len);
6540 d += len;
6541 if (s < PL_bufend)
6542 s++;
6543 }
6544 else {
6545 if (*s == '\\')
6546 s++, term = '\'';
6547 else
6548 term = '"';
6549 if (!isALNUM_lazy_if(s,UTF))
6550 deprecate_old("bare << to mean <<\"\"");
6551 for (; isALNUM_lazy_if(s,UTF); s++) {
6552 if (d < e)
6553 *d++ = *s;
6554 }
6555 }
6556 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
6557 Perl_croak(aTHX_ "Delimiter for here document is too long");
6558 *d++ = '\n';
6559 *d = '\0';
6560 len = d - PL_tokenbuf;
6561 #ifndef PERL_STRICT_CR
6562 d = strchr(s, '\r');
6563 if (d) {
6564 char *olds = s;
6565 s = d;
6566 while (s < PL_bufend) {
6567 if (*s == '\r') {
6568 *d++ = '\n';
6569 if (*++s == '\n')
6570 s++;
6571 }
6572 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
6573 *d++ = *s++;
6574 s++;
6575 }
6576 else
6577 *d++ = *s++;
6578 }
6579 *d = '\0';
6580 PL_bufend = d;
6581 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
6582 s = olds;
6583 }
6584 #endif
6585 d = "\n";
6586 if (outer || !(d=ninstr(s,PL_bufend,d,d+1)))
6587 herewas = newSVpvn(s,PL_bufend-s);
6588 else
6589 s--, herewas = newSVpvn(s,d-s);
6590 s += SvCUR(herewas);
6591
6592 tmpstr = NEWSV(87,79);
6593 sv_upgrade(tmpstr, SVt_PVIV);
6594 if (term == '\'') {
6595 op_type = OP_CONST;
6596 SvIVX(tmpstr) = -1;
6597 }
6598 else if (term == '`') {
6599 op_type = OP_BACKTICK;
6600 SvIVX(tmpstr) = '\\';
6601 }
6602
6603 CLINE;
6604 PL_multi_start = CopLINE(PL_curcop);
6605 PL_multi_open = PL_multi_close = '<';
6606 term = *PL_tokenbuf;
6607 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6608 char *bufptr = PL_sublex_info.super_bufptr;
6609 char *bufend = PL_sublex_info.super_bufend;
6610 char *olds = s - SvCUR(herewas);
6611 s = strchr(bufptr, '\n');
6612 if (!s)
6613 s = bufend;
6614 d = s;
6615 while (s < bufend &&
6616 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
6617 if (*s++ == '\n')
6618 CopLINE_inc(PL_curcop);
6619 }
6620 if (s >= bufend) {
6621 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
6622 missingterm(PL_tokenbuf);
6623 }
6624 sv_setpvn(herewas,bufptr,d-bufptr+1);
6625 sv_setpvn(tmpstr,d+1,s-d);
6626 s += len - 1;
6627 sv_catpvn(herewas,s,bufend-s);
6628 (void)strcpy(bufptr,SvPVX(herewas));
6629
6630 s = olds;
6631 goto retval;
6632 }
6633 else if (!outer) {
6634 d = s;
6635 while (s < PL_bufend &&
6636 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
6637 if (*s++ == '\n')
6638 CopLINE_inc(PL_curcop);
6639 }
6640 if (s >= PL_bufend) {
6641 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
6642 missingterm(PL_tokenbuf);
6643 }
6644 sv_setpvn(tmpstr,d+1,s-d);
6645 s += len - 1;
6646 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
6647
6648 sv_catpvn(herewas,s,PL_bufend-s);
6649 sv_setsv(PL_linestr,herewas);
6650 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
6651 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6652 PL_last_lop = PL_last_uni = Nullch;
6653 }
6654 else
6655 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
6656 while (s >= PL_bufend) { /* multiple line string? */
6657 if (!outer ||
6658 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
6659 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
6660 missingterm(PL_tokenbuf);
6661 }
6662 CopLINE_inc(PL_curcop);
6663 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6664 PL_last_lop = PL_last_uni = Nullch;
6665 #ifndef PERL_STRICT_CR
6666 if (PL_bufend - PL_linestart >= 2) {
6667 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
6668 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
6669 {
6670 PL_bufend[-2] = '\n';
6671 PL_bufend--;
6672 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
6673 }
6674 else if (PL_bufend[-1] == '\r')
6675 PL_bufend[-1] = '\n';
6676 }
6677 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
6678 PL_bufend[-1] = '\n';
6679 #endif
6680 if (PERLDB_LINE && PL_curstash != PL_debstash) {
6681 SV *sv = NEWSV(88,0);
6682
6683 sv_upgrade(sv, SVt_PVMG);
6684 sv_setsv(sv,PL_linestr);
6685 (void)SvIOK_on(sv);
6686 SvIVX(sv) = 0;
6687 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
6688 }
6689 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
6690 s = PL_bufend - 1;
6691 *s = ' ';
6692 sv_catsv(PL_linestr,herewas);
6693 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6694 }
6695 else {
6696 s = PL_bufend;
6697 sv_catsv(tmpstr,PL_linestr);
6698 }
6699 }
6700 s++;
6701 retval:
6702 PL_multi_end = CopLINE(PL_curcop);
6703 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
6704 SvLEN_set(tmpstr, SvCUR(tmpstr) + 1);
6705 Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
6706 }
6707 SvREFCNT_dec(herewas);
6708 if (!IN_BYTES) {
6709 if (UTF && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
6710 SvUTF8_on(tmpstr);
6711 else if (PL_encoding)
6712 sv_recode_to_utf8(tmpstr, PL_encoding);
6713 }
6714 PL_lex_stuff = tmpstr;
6715 yylval.ival = op_type;
6716 return s;
6717 }
6718
6719 /* scan_inputsymbol
6720 takes: current position in input buffer
6721 returns: new position in input buffer
6722 side-effects: yylval and lex_op are set.
6723
6724 This code handles:
6725
6726 <> read from ARGV
6727 <FH> read from filehandle
6728 <pkg::FH> read from package qualified filehandle
6729 <pkg'FH> read from package qualified filehandle
6730 <$fh> read from filehandle in $fh
6731 <*.h> filename glob
6732
6733 */
6734
6735 STATIC char *
6736 S_scan_inputsymbol(pTHX_ char *start)
6737 {
6738 register char *s = start; /* current position in buffer */
6739 register char *d;
6740 register char *e;
6741 char *end;
6742 I32 len;
6743
6744 d = PL_tokenbuf; /* start of temp holding space */
6745 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
6746 end = strchr(s, '\n');
6747 if (!end)
6748 end = PL_bufend;
6749 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
6750
6751 /* die if we didn't have space for the contents of the <>,
6752 or if it didn't end, or if we see a newline
6753 */
6754
6755 if (len >= sizeof PL_tokenbuf)
6756 Perl_croak(aTHX_ "Excessively long <> operator");
6757 if (s >= end)
6758 Perl_croak(aTHX_ "Unterminated <> operator");
6759
6760 s++;
6761
6762 /* check for <$fh>
6763 Remember, only scalar variables are interpreted as filehandles by
6764 this code. Anything more complex (e.g., <$fh{$num}>) will be
6765 treated as a glob() call.
6766 This code makes use of the fact that except for the $ at the front,
6767 a scalar variable and a filehandle look the same.
6768 */
6769 if (*d == '$' && d[1]) d++;
6770
6771 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
6772 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
6773 d++;
6774
6775 /* If we've tried to read what we allow filehandles to look like, and
6776 there's still text left, then it must be a glob() and not a getline.
6777 Use scan_str to pull out the stuff between the <> and treat it
6778 as nothing more than a string.
6779 */
6780
6781 if (d - PL_tokenbuf != len) {
6782 yylval.ival = OP_GLOB;
6783 set_csh();
6784 s = scan_str(start,FALSE,FALSE);
6785 if (!s)
6786 Perl_croak(aTHX_ "Glob not terminated");
6787 return s;
6788 }
6789 else {
6790 bool readline_overriden = FALSE;
6791 GV *gv_readline = Nullgv;
6792 GV **gvp;
6793 /* we're in a filehandle read situation */
6794 d = PL_tokenbuf;
6795
6796 /* turn <> into <ARGV> */
6797 if (!len)
6798 (void)strcpy(d,"ARGV");
6799
6800 /* Check whether readline() is overriden */
6801 if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV))
6802 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
6803 ||
6804 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
6805 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
6806 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
6807 readline_overriden = TRUE;
6808
6809 /* if <$fh>, create the ops to turn the variable into a
6810 filehandle
6811 */
6812 if (*d == '$') {
6813 I32 tmp;
6814
6815 /* try to find it in the pad for this block, otherwise find
6816 add symbol table ops
6817 */
6818 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
6819 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
6820 SV *sym = sv_2mortal(
6821 newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)),0));
6822 sv_catpvn(sym, "::", 2);
6823 sv_catpv(sym, d+1);
6824 d = SvPVX(sym);
6825 goto intro_sym;
6826 }
6827 else {
6828 OP *o = newOP(OP_PADSV, 0);
6829 o->op_targ = tmp;
6830 PL_lex_op = readline_overriden
6831 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
6832 append_elem(OP_LIST, o,
6833 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
6834 : (OP*)newUNOP(OP_READLINE, 0, o);
6835 }
6836 }
6837 else {
6838 GV *gv;
6839 ++d;
6840 intro_sym:
6841 gv = gv_fetchpv(d,
6842 (PL_in_eval
6843 ? (GV_ADDMULTI | GV_ADDINEVAL)
6844 : GV_ADDMULTI),
6845 SVt_PV);
6846 PL_lex_op = readline_overriden
6847 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
6848 append_elem(OP_LIST,
6849 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
6850 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
6851 : (OP*)newUNOP(OP_READLINE, 0,
6852 newUNOP(OP_RV2SV, 0,
6853 newGVOP(OP_GV, 0, gv)));
6854 }
6855 if (!readline_overriden)
6856 PL_lex_op->op_flags |= OPf_SPECIAL;
6857 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
6858 yylval.ival = OP_NULL;
6859 }
6860
6861 /* If it's none of the above, it must be a literal filehandle
6862 (<Foo::BAR> or <FOO>) so build a simple readline OP */
6863 else {
6864 GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO);
6865 PL_lex_op = readline_overriden
6866 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
6867 append_elem(OP_LIST,
6868 newGVOP(OP_GV, 0, gv),
6869 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
6870 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6871 yylval.ival = OP_NULL;
6872 }
6873 }
6874
6875 return s;
6876 }
6877
6878
6879 /* scan_str
6880 takes: start position in buffer
6881 keep_quoted preserve \ on the embedded delimiter(s)
6882 keep_delims preserve the delimiters around the string
6883 returns: position to continue reading from buffer
6884 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
6885 updates the read buffer.
6886
6887 This subroutine pulls a string out of the input. It is called for:
6888 q single quotes q(literal text)
6889 ' single quotes 'literal text'
6890 qq double quotes qq(interpolate $here please)
6891 " double quotes "interpolate $here please"
6892 qx backticks qx(/bin/ls -l)
6893 ` backticks `/bin/ls -l`
6894 qw quote words @EXPORT_OK = qw( func() $spam )
6895 m// regexp match m/this/
6896 s/// regexp substitute s/this/that/
6897 tr/// string transliterate tr/this/that/
6898 y/// string transliterate y/this/that/
6899 ($*@) sub prototypes sub foo ($)
6900 (stuff) sub attr parameters sub foo : attr(stuff)
6901 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
6902
6903 In most of these cases (all but <>, patterns and transliterate)
6904 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
6905 calls scan_str(). s/// makes yylex() call scan_subst() which calls
6906 scan_str(). tr/// and y/// make yylex() call scan_trans() which
6907 calls scan_str().
6908
6909 It skips whitespace before the string starts, and treats the first
6910 character as the delimiter. If the delimiter is one of ([{< then
6911 the corresponding "close" character )]}> is used as the closing
6912 delimiter. It allows quoting of delimiters, and if the string has
6913 balanced delimiters ([{<>}]) it allows nesting.
6914
6915 On success, the SV with the resulting string is put into lex_stuff or,
6916 if that is already non-NULL, into lex_repl. The second case occurs only
6917 when parsing the RHS of the special constructs s/// and tr/// (y///).
6918 For convenience, the terminating delimiter character is stuffed into
6919 SvIVX of the SV.
6920 */
6921
6922 STATIC char *
6923 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
6924 {
6925 SV *sv; /* scalar value: string */
6926 char *tmps; /* temp string, used for delimiter matching */
6927 register char *s = start; /* current position in the buffer */
6928 register char term; /* terminating character */
6929 register char *to; /* current position in the sv's data */
6930 I32 brackets = 1; /* bracket nesting level */
6931 bool has_utf8 = FALSE; /* is there any utf8 content? */
6932 I32 termcode; /* terminating char. code */
6933 U8 termstr[UTF8_MAXLEN]; /* terminating string */
6934 STRLEN termlen; /* length of terminating string */
6935 char *last = NULL; /* last position for nesting bracket */
6936
6937 /* skip space before the delimiter */
6938 if (isSPACE(*s))
6939 s = skipspace(s);
6940
6941 /* mark where we are, in case we need to report errors */
6942 CLINE;
6943
6944 /* after skipping whitespace, the next character is the terminator */
6945 term = *s;
6946 if (!UTF) {
6947 termcode = termstr[0] = term;
6948 termlen = 1;
6949 }
6950 else {
6951 termcode = utf8_to_uvchr((U8*)s, &termlen);
6952 Copy(s, termstr, termlen, U8);
6953 if (!UTF8_IS_INVARIANT(term))
6954 has_utf8 = TRUE;
6955 }
6956
6957 /* mark where we are */
6958 PL_multi_start = CopLINE(PL_curcop);
6959 PL_multi_open = term;
6960
6961 /* find corresponding closing delimiter */
6962 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6963 termcode = termstr[0] = term = tmps[5];
6964
6965 PL_multi_close = term;
6966
6967 /* create a new SV to hold the contents. 87 is leak category, I'm
6968 assuming. 79 is the SV's initial length. What a random number. */
6969 sv = NEWSV(87,79);
6970 sv_upgrade(sv, SVt_PVIV);
6971 SvIVX(sv) = termcode;
6972 (void)SvPOK_only(sv); /* validate pointer */
6973
6974 /* move past delimiter and try to read a complete string */
6975 if (keep_delims)
6976 sv_catpvn(sv, s, termlen);
6977 s += termlen;
6978 for (;;) {
6979 if (PL_encoding && !UTF) {
6980 bool cont = TRUE;
6981
6982 while (cont) {
6983 int offset = s - SvPVX(PL_linestr);
6984 bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
6985 &offset, (char*)termstr, termlen);
6986 char *ns = SvPVX(PL_linestr) + offset;
6987 char *svlast = SvEND(sv) - 1;
6988
6989 for (; s < ns; s++) {
6990 if (*s == '\n' && !PL_rsfp)
6991 CopLINE_inc(PL_curcop);
6992 }
6993 if (!found)
6994 goto read_more_line;
6995 else {
6996 /* handle quoted delimiters */
6997 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
6998 char *t;
6999 for (t = svlast-2; t >= SvPVX(sv) && *t == '\\';)
7000 t--;
7001 if ((svlast-1 - t) % 2) {
7002 if (!keep_quoted) {
7003 *(svlast-1) = term;
7004 *svlast = '\0';
7005 SvCUR_set(sv, SvCUR(sv) - 1);
7006 }
7007 continue;
7008 }
7009 }
7010 if (PL_multi_open == PL_multi_close) {
7011 cont = FALSE;
7012 }
7013 else {
7014 char *t, *w;
7015 if (!last)
7016 last = SvPVX(sv);
7017 for (w = t = last; t < svlast; w++, t++) {
7018 /* At here, all closes are "was quoted" one,
7019 so we don't check PL_multi_close. */
7020 if (*t == '\\') {
7021 if (!keep_quoted && *(t+1) == PL_multi_open)
7022 t++;
7023 else
7024 *w++ = *t++;
7025 }
7026 else if (*t == PL_multi_open)
7027 brackets++;
7028
7029 *w = *t;
7030 }
7031 if (w < t) {
7032 *w++ = term;
7033 *w = '\0';
7034 SvCUR_set(sv, w - SvPVX(sv));
7035 }
7036 last = w;
7037 if (--brackets <= 0)
7038 cont = FALSE;
7039 }
7040 }
7041 }
7042 if (!keep_delims) {
7043 SvCUR_set(sv, SvCUR(sv) - 1);
7044 *SvEND(sv) = '\0';
7045 }
7046 break;
7047 }
7048
7049 /* extend sv if need be */
7050 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
7051 /* set 'to' to the next character in the sv's string */
7052 to = SvPVX(sv)+SvCUR(sv);
7053
7054 /* if open delimiter is the close delimiter read unbridle */
7055 if (PL_multi_open == PL_multi_close) {
7056 for (; s < PL_bufend; s++,to++) {
7057 /* embedded newlines increment the current line number */
7058 if (*s == '\n' && !PL_rsfp)
7059 CopLINE_inc(PL_curcop);
7060 /* handle quoted delimiters */
7061 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
7062 if (!keep_quoted && s[1] == term)
7063 s++;
7064 /* any other quotes are simply copied straight through */
7065 else
7066 *to++ = *s++;
7067 }
7068 /* terminate when run out of buffer (the for() condition), or
7069 have found the terminator */
7070 else if (*s == term) {
7071 if (termlen == 1)
7072 break;
7073 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
7074 break;
7075 }
7076 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
7077 has_utf8 = TRUE;
7078 *to = *s;
7079 }
7080 }
7081
7082 /* if the terminator isn't the same as the start character (e.g.,
7083 matched brackets), we have to allow more in the quoting, and
7084 be prepared for nested brackets.
7085 */
7086 else {
7087 /* read until we run out of string, or we find the terminator */
7088 for (; s < PL_bufend; s++,to++) {
7089 /* embedded newlines increment the line count */
7090 if (*s == '\n' && !PL_rsfp)
7091 CopLINE_inc(PL_curcop);
7092 /* backslashes can escape the open or closing characters */
7093 if (*s == '\\' && s+1 < PL_bufend) {
7094 if (!keep_quoted &&
7095 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
7096 s++;
7097 else
7098 *to++ = *s++;
7099 }
7100 /* allow nested opens and closes */
7101 else if (*s == PL_multi_close && --brackets <= 0)
7102 break;
7103 else if (*s == PL_multi_open)
7104 brackets++;
7105 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
7106 has_utf8 = TRUE;
7107 *to = *s;
7108 }
7109 }
7110 /* terminate the copied string and update the sv's end-of-string */
7111 *to = '\0';
7112 SvCUR_set(sv, to - SvPVX(sv));
7113
7114 /*
7115 * this next chunk reads more into the buffer if we're not done yet
7116 */
7117
7118 if (s < PL_bufend)
7119 break; /* handle case where we are done yet :-) */
7120
7121 #ifndef PERL_STRICT_CR
7122 if (to - SvPVX(sv) >= 2) {
7123 if ((to[-2] == '\r' && to[-1] == '\n') ||
7124 (to[-2] == '\n' && to[-1] == '\r'))
7125 {
7126 to[-2] = '\n';
7127 to--;
7128 SvCUR_set(sv, to - SvPVX(sv));
7129 }
7130 else if (to[-1] == '\r')
7131 to[-1] = '\n';
7132 }
7133 else if (to - SvPVX(sv) == 1 && to[-1] == '\r')
7134 to[-1] = '\n';
7135 #endif
7136
7137 read_more_line:
7138 /* if we're out of file, or a read fails, bail and reset the current
7139 line marker so we can report where the unterminated string began
7140 */
7141 if (!PL_rsfp ||
7142 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
7143 sv_free(sv);
7144 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
7145 return Nullch;
7146 }
7147 /* we read a line, so increment our line counter */
7148 CopLINE_inc(PL_curcop);
7149
7150 /* update debugger info */
7151 if (PERLDB_LINE && PL_curstash != PL_debstash) {
7152 SV *sv = NEWSV(88,0);
7153
7154 sv_upgrade(sv, SVt_PVMG);
7155 sv_setsv(sv,PL_linestr);
7156 (void)SvIOK_on(sv);
7157 SvIVX(sv) = 0;
7158 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
7159 }
7160
7161 /* having changed the buffer, we must update PL_bufend */
7162 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7163 PL_last_lop = PL_last_uni = Nullch;
7164 }
7165
7166 /* at this point, we have successfully read the delimited string */
7167
7168 if (!PL_encoding || UTF) {
7169 if (keep_delims)
7170 sv_catpvn(sv, s, termlen);
7171 s += termlen;
7172 }
7173 if (has_utf8 || PL_encoding)
7174 SvUTF8_on(sv);
7175
7176 PL_multi_end = CopLINE(PL_curcop);
7177
7178 /* if we allocated too much space, give some back */
7179 if (SvCUR(sv) + 5 < SvLEN(sv)) {
7180 SvLEN_set(sv, SvCUR(sv) + 1);
7181 Renew(SvPVX(sv), SvLEN(sv), char);
7182 }
7183
7184 /* decide whether this is the first or second quoted string we've read
7185 for this op
7186 */
7187
7188 if (PL_lex_stuff)
7189 PL_lex_repl = sv;
7190 else
7191 PL_lex_stuff = sv;
7192 return s;
7193 }
7194
7195 /*
7196 scan_num
7197 takes: pointer to position in buffer
7198 returns: pointer to new position in buffer
7199 side-effects: builds ops for the constant in yylval.op
7200
7201 Read a number in any of the formats that Perl accepts:
7202
7203 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
7204 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
7205 0b[01](_?[01])*
7206 0[0-7](_?[0-7])*
7207 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
7208
7209 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
7210 thing it reads.
7211
7212 If it reads a number without a decimal point or an exponent, it will
7213 try converting the number to an integer and see if it can do so
7214 without loss of precision.
7215 */
7216
7217 char *
7218 Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
7219 {
7220 register char *s = start; /* current position in buffer */
7221 register char *d; /* destination in temp buffer */
7222 register char *e; /* end of temp buffer */
7223 NV nv; /* number read, as a double */
7224 SV *sv = Nullsv; /* place to put the converted number */
7225 bool floatit; /* boolean: int or float? */
7226 char *lastub = 0; /* position of last underbar */
7227 static char number_too_long[] = "Number too long";
7228
7229 /* We use the first character to decide what type of number this is */
7230
7231 switch (*s) {
7232 default:
7233 Perl_croak(aTHX_ "panic: scan_num");
7234
7235 /* if it starts with a 0, it could be an octal number, a decimal in
7236 0.13 disguise, or a hexadecimal number, or a binary number. */
7237 case '0':
7238 {
7239 /* variables:
7240 u holds the "number so far"
7241 shift the power of 2 of the base
7242 (hex == 4, octal == 3, binary == 1)
7243 overflowed was the number more than we can hold?
7244
7245 Shift is used when we add a digit. It also serves as an "are
7246 we in octal/hex/binary?" indicator to disallow hex characters
7247 when in octal mode.
7248 */
7249 NV n = 0.0;
7250 UV u = 0;
7251 I32 shift;
7252 bool overflowed = FALSE;
7253 static NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
7254 static char* bases[5] = { "", "binary", "", "octal",
7255 "hexadecimal" };
7256 static char* Bases[5] = { "", "Binary", "", "Octal",
7257 "Hexadecimal" };
7258 static char *maxima[5] = { "",
7259 "0b11111111111111111111111111111111",
7260 "",
7261 "037777777777",
7262 "0xffffffff" };
7263 char *base, *Base, *max;
7264
7265 /* check for hex */
7266 if (s[1] == 'x') {
7267 shift = 4;
7268 s += 2;
7269 } else if (s[1] == 'b') {
7270 shift = 1;
7271 s += 2;
7272 }
7273 /* check for a decimal in disguise */
7274 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
7275 goto decimal;
7276 /* so it must be octal */
7277 else {
7278 shift = 3;
7279 s++;
7280 }
7281
7282 if (*s == '_') {
7283 if (ckWARN(WARN_SYNTAX))
7284 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7285 "Misplaced _ in number");
7286 lastub = s++;
7287 }
7288
7289 base = bases[shift];
7290 Base = Bases[shift];
7291 max = maxima[shift];
7292
7293 /* read the rest of the number */
7294 for (;;) {
7295 /* x is used in the overflow test,
7296 b is the digit we're adding on. */
7297 UV x, b;
7298
7299 switch (*s) {
7300
7301 /* if we don't mention it, we're done */
7302 default:
7303 goto out;
7304
7305 /* _ are ignored -- but warned about if consecutive */
7306 case '_':
7307 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
7308 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7309 "Misplaced _ in number");
7310 lastub = s++;
7311 break;
7312
7313 /* 8 and 9 are not octal */
7314 case '8': case '9':
7315 if (shift == 3)
7316 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
7317 /* FALL THROUGH */
7318
7319 /* octal digits */
7320 case '2': case '3': case '4':
7321 case '5': case '6': case '7':
7322 if (shift == 1)
7323 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
7324 /* FALL THROUGH */
7325
7326 case '0': case '1':
7327 b = *s++ & 15; /* ASCII digit -> value of digit */
7328 goto digit;
7329
7330 /* hex digits */
7331 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
7332 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
7333 /* make sure they said 0x */
7334 if (shift != 4)
7335 goto out;
7336 b = (*s++ & 7) + 9;
7337
7338 /* Prepare to put the digit we have onto the end
7339 of the number so far. We check for overflows.
7340 */
7341
7342 digit:
7343 if (!overflowed) {
7344 x = u << shift; /* make room for the digit */
7345
7346 if ((x >> shift) != u
7347 && !(PL_hints & HINT_NEW_BINARY)) {
7348 overflowed = TRUE;
7349 n = (NV) u;
7350 if (ckWARN_d(WARN_OVERFLOW))
7351 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
7352 "Integer overflow in %s number",
7353 base);
7354 } else
7355 u = x | b; /* add the digit to the end */
7356 }
7357 if (overflowed) {
7358 n *= nvshift[shift];
7359 /* If an NV has not enough bits in its
7360 * mantissa to represent an UV this summing of
7361 * small low-order numbers is a waste of time
7362 * (because the NV cannot preserve the
7363 * low-order bits anyway): we could just
7364 * remember when did we overflow and in the
7365 * end just multiply n by the right
7366 * amount. */
7367 n += (NV) b;
7368 }
7369 break;
7370 }
7371 }
7372
7373 /* if we get here, we had success: make a scalar value from
7374 the number.
7375 */
7376 out:
7377
7378 /* final misplaced underbar check */
7379 if (s[-1] == '_') {
7380 if (ckWARN(WARN_SYNTAX))
7381 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
7382 }
7383
7384 sv = NEWSV(92,0);
7385 if (overflowed) {
7386 if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
7387 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
7388 "%s number > %s non-portable",
7389 Base, max);
7390 sv_setnv(sv, n);
7391 }
7392 else {
7393 #if UVSIZE > 4
7394 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff)
7395 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
7396 "%s number > %s non-portable",
7397 Base, max);
7398 #endif
7399 sv_setuv(sv, u);
7400 }
7401 if (PL_hints & HINT_NEW_BINARY)
7402 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
7403 }
7404 break;
7405
7406 /*
7407 handle decimal numbers.
7408 we're also sent here when we read a 0 as the first digit
7409 */
7410 case '1': case '2': case '3': case '4': case '5':
7411 case '6': case '7': case '8': case '9': case '.':
7412 decimal:
7413 d = PL_tokenbuf;
7414 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
7415 floatit = FALSE;
7416
7417 /* read next group of digits and _ and copy into d */
7418 while (isDIGIT(*s) || *s == '_') {
7419 /* skip underscores, checking for misplaced ones
7420 if -w is on
7421 */
7422 if (*s == '_') {
7423 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
7424 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7425 "Misplaced _ in number");
7426 lastub = s++;
7427 }
7428 else {
7429 /* check for end of fixed-length buffer */
7430 if (d >= e)
7431 Perl_croak(aTHX_ number_too_long);
7432 /* if we're ok, copy the character */
7433 *d++ = *s++;
7434 }
7435 }
7436
7437 /* final misplaced underbar check */
7438 if (lastub && s == lastub + 1) {
7439 if (ckWARN(WARN_SYNTAX))
7440 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
7441 }
7442
7443 /* read a decimal portion if there is one. avoid
7444 3..5 being interpreted as the number 3. followed
7445 by .5
7446 */
7447 if (*s == '.' && s[1] != '.') {
7448 floatit = TRUE;
7449 *d++ = *s++;
7450
7451 if (*s == '_') {
7452 if (ckWARN(WARN_SYNTAX))
7453 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7454 "Misplaced _ in number");
7455 lastub = s;
7456 }
7457
7458 /* copy, ignoring underbars, until we run out of digits.
7459 */
7460 for (; isDIGIT(*s) || *s == '_'; s++) {
7461 /* fixed length buffer check */
7462 if (d >= e)
7463 Perl_croak(aTHX_ number_too_long);
7464 if (*s == '_') {
7465 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
7466 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7467 "Misplaced _ in number");
7468 lastub = s;
7469 }
7470 else
7471 *d++ = *s;
7472 }
7473 /* fractional part ending in underbar? */
7474 if (s[-1] == '_') {
7475 if (ckWARN(WARN_SYNTAX))
7476 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7477 "Misplaced _ in number");
7478 }
7479 if (*s == '.' && isDIGIT(s[1])) {
7480 /* oops, it's really a v-string, but without the "v" */
7481 s = start;
7482 goto vstring;
7483 }
7484 }
7485
7486 /* read exponent part, if present */
7487 if (*s && strchr("eE",*s) && strchr("+-0123456789_", s[1])) {
7488 floatit = TRUE;
7489 s++;
7490
7491 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
7492 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
7493
7494 /* stray preinitial _ */
7495 if (*s == '_') {
7496 if (ckWARN(WARN_SYNTAX))
7497 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7498 "Misplaced _ in number");
7499 lastub = s++;
7500 }
7501
7502 /* allow positive or negative exponent */
7503 if (*s == '+' || *s == '-')
7504 *d++ = *s++;
7505
7506 /* stray initial _ */
7507 if (*s == '_') {
7508 if (ckWARN(WARN_SYNTAX))
7509 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7510 "Misplaced _ in number");
7511 lastub = s++;
7512 }
7513
7514 /* read digits of exponent */
7515 while (isDIGIT(*s) || *s == '_') {
7516 if (isDIGIT(*s)) {
7517 if (d >= e)
7518 Perl_croak(aTHX_ number_too_long);
7519 *d++ = *s++;
7520 }
7521 else {
7522 if (ckWARN(WARN_SYNTAX) &&
7523 ((lastub && s == lastub + 1) ||
7524 (!isDIGIT(s[1]) && s[1] != '_')))
7525 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7526 "Misplaced _ in number");
7527 lastub = s++;
7528 }
7529 }
7530 }
7531
7532
7533 /* make an sv from the string */
7534 sv = NEWSV(92,0);
7535
7536 /*
7537 We try to do an integer conversion first if no characters
7538 indicating "float" have been found.
7539 */
7540
7541 if (!floatit) {
7542 UV uv;
7543 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
7544
7545 if (flags == IS_NUMBER_IN_UV) {
7546 if (uv <= IV_MAX)
7547 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
7548 else
7549 sv_setuv(sv, uv);
7550 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
7551 if (uv <= (UV) IV_MIN)
7552 sv_setiv(sv, -(IV)uv);
7553 else
7554 floatit = TRUE;
7555 } else
7556 floatit = TRUE;
7557 }
7558 if (floatit) {
7559 /* terminate the string */
7560 *d = '\0';
7561 nv = Atof(PL_tokenbuf);
7562 sv_setnv(sv, nv);
7563 }
7564
7565 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
7566 (PL_hints & HINT_NEW_INTEGER) )
7567 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
7568 (floatit ? "float" : "integer"),
7569 sv, Nullsv, NULL);
7570 break;
7571
7572 /* if it starts with a v, it could be a v-string */
7573 case 'v':
7574 vstring:
7575 sv = NEWSV(92,5); /* preallocate storage space */
7576 s = scan_vstring(s,sv);
7577 DEBUG_T( { PerlIO_printf(Perl_debug_log,
7578 "### Saw v-string before '%s'\n", s);
7579 } );
7580 break;
7581 }
7582
7583 /* make the op for the constant and return */
7584
7585 if (sv)
7586 lvalp->opval = newSVOP(OP_CONST, 0, sv);
7587 else
7588 lvalp->opval = Nullop;
7589
7590 return s;
7591 }
7592
7593 STATIC char *
7594 S_scan_formline(pTHX_ register char *s)
7595 {
7596 register char *eol;
7597 register char *t;
7598 SV *stuff = newSVpvn("",0);
7599 bool needargs = FALSE;
7600 bool eofmt = FALSE;
7601
7602 while (!needargs) {
7603 if (*s == '.') {
7604 /*SUPPRESS 530*/
7605 #ifdef PERL_STRICT_CR
7606 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
7607 #else
7608 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
7609 #endif
7610 if (*t == '\n' || t == PL_bufend) {
7611 eofmt = TRUE;
7612 break;
7613 }
7614 }
7615 if (PL_in_eval && !PL_rsfp) {
7616 eol = memchr(s,'\n',PL_bufend-s);
7617 if (!eol++)
7618 eol = PL_bufend;
7619 }
7620 else
7621 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7622 if (*s != '#') {
7623 for (t = s; t < eol; t++) {
7624 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
7625 needargs = FALSE;
7626 goto enough; /* ~~ must be first line in formline */
7627 }
7628 if (*t == '@' || *t == '^')
7629 needargs = TRUE;
7630 }
7631 if (eol > s) {
7632 sv_catpvn(stuff, s, eol-s);
7633 #ifndef PERL_STRICT_CR
7634 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
7635 char *end = SvPVX(stuff) + SvCUR(stuff);
7636 end[-2] = '\n';
7637 end[-1] = '\0';
7638 SvCUR(stuff)--;
7639 }
7640 #endif
7641 }
7642 else
7643 break;
7644 }
7645 s = eol;
7646 if (PL_rsfp) {
7647 s = filter_gets(PL_linestr, PL_rsfp, 0);
7648 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
7649 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
7650 PL_last_lop = PL_last_uni = Nullch;
7651 if (!s) {
7652 s = PL_bufptr;
7653 break;
7654 }
7655 }
7656 incline(s);
7657 }
7658 enough:
7659 if (SvCUR(stuff)) {
7660 PL_expect = XTERM;
7661 if (needargs) {
7662 PL_lex_state = LEX_NORMAL;
7663 PL_nextval[PL_nexttoke].ival = 0;
7664 force_next(',');
7665 }
7666 else
7667 PL_lex_state = LEX_FORMLINE;
7668 if (!IN_BYTES) {
7669 if (UTF && is_utf8_string((U8*)SvPVX(stuff), SvCUR(stuff)))
7670 SvUTF8_on(stuff);
7671 else if (PL_encoding)
7672 sv_recode_to_utf8(stuff, PL_encoding);
7673 }
7674 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
7675 force_next(THING);
7676 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
7677 force_next(LSTOP);
7678 }
7679 else {
7680 SvREFCNT_dec(stuff);
7681 if (eofmt)
7682 PL_lex_formbrack = 0;
7683 PL_bufptr = s;
7684 }
7685 return s;
7686 }
7687
7688 STATIC void
7689 S_set_csh(pTHX)
7690 {
7691 #ifdef CSH
7692 if (!PL_cshlen)
7693 PL_cshlen = strlen(PL_cshname);
7694 #endif
7695 }
7696
7697 I32
7698 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
7699 {
7700 I32 oldsavestack_ix = PL_savestack_ix;
7701 CV* outsidecv = PL_compcv;
7702
7703 if (PL_compcv) {
7704 assert(SvTYPE(PL_compcv) == SVt_PVCV);
7705 }
7706 SAVEI32(PL_subline);
7707 save_item(PL_subname);
7708 SAVESPTR(PL_compcv);
7709
7710 PL_compcv = (CV*)NEWSV(1104,0);
7711 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
7712 CvFLAGS(PL_compcv) |= flags;
7713
7714 PL_subline = CopLINE(PL_curcop);
7715 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
7716 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
7717 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
7718 #ifdef USE_5005THREADS
7719 CvOWNER(PL_compcv) = 0;
7720 New(666, CvMUTEXP(PL_compcv), 1, perl_mutex);
7721 MUTEX_INIT(CvMUTEXP(PL_compcv));
7722 #endif /* USE_5005THREADS */
7723
7724 return oldsavestack_ix;
7725 }
7726
7727 #ifdef __SC__
7728 #pragma segment Perl_yylex
7729 #endif
7730 int
7731 Perl_yywarn(pTHX_ char *s)
7732 {
7733 PL_in_eval |= EVAL_WARNONLY;
7734 yyerror(s);
7735 PL_in_eval &= ~EVAL_WARNONLY;
7736 return 0;
7737 }
7738
7739 int
7740 Perl_yyerror(pTHX_ char *s)
7741 {
7742 char *where = NULL;
7743 char *context = NULL;
7744 int contlen = -1;
7745 SV *msg;
7746
7747 if (!yychar || (yychar == ';' && !PL_rsfp))
7748 where = "at EOF";
7749 else if (PL_bufptr > PL_oldoldbufptr && PL_bufptr - PL_oldoldbufptr < 200 &&
7750 PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) {
7751 /*
7752 Only for NetWare:
7753 The code below is removed for NetWare because it abends/crashes on NetWare
7754 when the script has error such as not having the closing quotes like:
7755 if ($var eq "value)
7756 Checking of white spaces is anyway done in NetWare code.
7757 */
7758 #ifndef NETWARE
7759 while (isSPACE(*PL_oldoldbufptr))
7760 PL_oldoldbufptr++;
7761 #endif
7762 context = PL_oldoldbufptr;
7763 contlen = PL_bufptr - PL_oldoldbufptr;
7764 }
7765 else if (PL_bufptr > PL_oldbufptr && PL_bufptr - PL_oldbufptr < 200 &&
7766 PL_oldbufptr != PL_bufptr) {
7767 /*
7768 Only for NetWare:
7769 The code below is removed for NetWare because it abends/crashes on NetWare
7770 when the script has error such as not having the closing quotes like:
7771 if ($var eq "value)
7772 Checking of white spaces is anyway done in NetWare code.
7773 */
7774 #ifndef NETWARE
7775 while (isSPACE(*PL_oldbufptr))
7776 PL_oldbufptr++;
7777 #endif
7778 context = PL_oldbufptr;
7779 contlen = PL_bufptr - PL_oldbufptr;
7780 }
7781 else if (yychar > 255)
7782 where = "next token ???";
7783 #ifdef USE_PURE_BISON
7784 /* GNU Bison sets the value -2 */
7785 else if (yychar == -2) {
7786 #else
7787 else if ((yychar & 127) == 127) {
7788 #endif
7789 if (PL_lex_state == LEX_NORMAL ||
7790 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
7791 where = "at end of line";
7792 else if (PL_lex_inpat)
7793 where = "within pattern";
7794 else
7795 where = "within string";
7796 }
7797 else {
7798 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
7799 if (yychar < 32)
7800 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
7801 else if (isPRINT_LC(yychar))
7802 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
7803 else
7804 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
7805 where = SvPVX(where_sv);
7806 }
7807 msg = sv_2mortal(newSVpv(s, 0));
7808 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
7809 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
7810 if (context)
7811 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
7812 else
7813 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
7814 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
7815 Perl_sv_catpvf(aTHX_ msg,
7816 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
7817 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
7818 PL_multi_end = 0;
7819 }
7820 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
7821 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
7822 else
7823 qerror(msg);
7824 if (PL_error_count >= 10) {
7825 if (PL_in_eval && SvCUR(ERRSV))
7826 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
7827 ERRSV, OutCopFILE(PL_curcop));
7828 else
7829 Perl_croak(aTHX_ "%s has too many errors.\n",
7830 OutCopFILE(PL_curcop));
7831 }
7832 PL_in_my = 0;
7833 PL_in_my_stash = Nullhv;
7834 return 0;
7835 }
7836 #ifdef __SC__
7837 #pragma segment Main
7838 #endif
7839
7840 STATIC char*
7841 S_swallow_bom(pTHX_ U8 *s)
7842 {
7843 STRLEN slen;
7844 slen = SvCUR(PL_linestr);
7845 switch (s[0]) {
7846 case 0xFF:
7847 if (s[1] == 0xFE) {
7848 /* UTF-16 little-endian? (or UTF32-LE?) */
7849 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
7850 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
7851 #ifndef PERL_NO_UTF16_FILTER
7852 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
7853 s += 2;
7854 utf16le:
7855 if (PL_bufend > (char*)s) {
7856 U8 *news;
7857 I32 newlen;
7858
7859 filter_add(utf16rev_textfilter, NULL);
7860 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
7861 PL_bufend =
7862 (char*)utf16_to_utf8_reversed(s, news,
7863 PL_bufend - (char*)s - 1,
7864 &newlen);
7865 sv_setpvn(PL_linestr, (const char*)news, newlen);
7866 Safefree(news);
7867 SvUTF8_on(PL_linestr);
7868 s = (U8*)SvPVX(PL_linestr);
7869 PL_bufend = SvPVX(PL_linestr) + newlen;
7870 }
7871 #else
7872 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
7873 #endif
7874 }
7875 break;
7876 case 0xFE:
7877 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
7878 #ifndef PERL_NO_UTF16_FILTER
7879 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
7880 s += 2;
7881 utf16be:
7882 if (PL_bufend > (char *)s) {
7883 U8 *news;
7884 I32 newlen;
7885
7886 filter_add(utf16_textfilter, NULL);
7887 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
7888 PL_bufend =
7889 (char*)utf16_to_utf8(s, news,
7890 PL_bufend - (char*)s,
7891 &newlen);
7892 sv_setpvn(PL_linestr, (const char*)news, newlen);
7893 Safefree(news);
7894 SvUTF8_on(PL_linestr);
7895 s = (U8*)SvPVX(PL_linestr);
7896 PL_bufend = SvPVX(PL_linestr) + newlen;
7897 }
7898 #else
7899 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
7900 #endif
7901 }
7902 break;
7903 case 0xEF:
7904 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7905 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
7906 s += 3; /* UTF-8 */
7907 }
7908 break;
7909 case 0:
7910 if (slen > 3) {
7911 if (s[1] == 0) {
7912 if (s[2] == 0xFE && s[3] == 0xFF) {
7913 /* UTF-32 big-endian */
7914 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
7915 }
7916 }
7917 else if (s[2] == 0 && s[3] != 0) {
7918 /* Leading bytes
7919 * 00 xx 00 xx
7920 * are a good indicator of UTF-16BE. */
7921 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
7922 goto utf16be;
7923 }
7924 }
7925 default:
7926 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
7927 /* Leading bytes
7928 * xx 00 xx 00
7929 * are a good indicator of UTF-16LE. */
7930 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
7931 goto utf16le;
7932 }
7933 }
7934 return (char*)s;
7935 }
7936
7937 /*
7938 * restore_rsfp
7939 * Restore a source filter.
7940 */
7941
7942 static void
7943 restore_rsfp(pTHX_ void *f)
7944 {
7945 PerlIO *fp = (PerlIO*)f;
7946
7947 if (PL_rsfp == PerlIO_stdin())
7948 PerlIO_clearerr(PL_rsfp);
7949 else if (PL_rsfp && (PL_rsfp != fp))
7950 PerlIO_close(PL_rsfp);
7951 PL_rsfp = fp;
7952 }
7953
7954 #ifndef PERL_NO_UTF16_FILTER
7955 static I32
7956 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
7957 {
7958 I32 count = FILTER_READ(idx+1, sv, maxlen);
7959 if (count) {
7960 U8* tmps;
7961 U8* tend;
7962 I32 newlen;
7963 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
7964 if (!*SvPV_nolen(sv))
7965 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
7966 return count;
7967
7968 tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
7969 sv_usepvn(sv, (char*)tmps, tend - tmps);
7970 }
7971 return count;
7972 }
7973
7974 static I32
7975 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
7976 {
7977 I32 count = FILTER_READ(idx+1, sv, maxlen);
7978 if (count) {
7979 U8* tmps;
7980 U8* tend;
7981 I32 newlen;
7982 if (!*SvPV_nolen(sv))
7983 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
7984 return count;
7985
7986 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
7987 tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
7988 sv_usepvn(sv, (char*)tmps, tend - tmps);
7989 }
7990 return count;
7991 }
7992 #endif
7993
7994 /*
7995 Returns a pointer to the next character after the parsed
7996 vstring, as well as updating the passed in sv.
7997
7998 Function must be called like
7999
8000 sv = NEWSV(92,5);
8001 s = scan_vstring(s,sv);
8002
8003 The sv should already be large enough to store the vstring
8004 passed in, for performance reasons.
8005
8006 */
8007
8008 char *
8009 Perl_scan_vstring(pTHX_ char *s, SV *sv)
8010 {
8011 char *pos = s;
8012 char *start = s;
8013 if (*pos == 'v') pos++; /* get past 'v' */
8014 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
8015 pos++;
8016 if ( *pos != '.') {
8017 /* this may not be a v-string if followed by => */
8018 char *next = pos;
8019 while (next < PL_bufend && isSPACE(*next))
8020 ++next;
8021 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
8022 /* return string not v-string */
8023 sv_setpvn(sv,(char *)s,pos-s);
8024 return pos;
8025 }
8026 }
8027
8028 if (!isALPHA(*pos)) {
8029 UV rev;
8030 U8 tmpbuf[UTF8_MAXLEN+1];
8031 U8 *tmpend;
8032
8033 if (*s == 'v') s++; /* get past 'v' */
8034
8035 sv_setpvn(sv, "", 0);
8036
8037 for (;;) {
8038 rev = 0;
8039 {
8040 /* this is atoi() that tolerates underscores */
8041 char *end = pos;
8042 UV mult = 1;
8043 while (--end >= s) {
8044 UV orev;
8045 if (*end == '_')
8046 continue;
8047 orev = rev;
8048 rev += (*end - '0') * mult;
8049 mult *= 10;
8050 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
8051 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
8052 "Integer overflow in decimal number");
8053 }
8054 }
8055 #ifdef EBCDIC
8056 if (rev > 0x7FFFFFFF)
8057 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
8058 #endif
8059 /* Append native character for the rev point */
8060 tmpend = uvchr_to_utf8(tmpbuf, rev);
8061 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
8062 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
8063 SvUTF8_on(sv);
8064 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
8065 s = ++pos;
8066 else {
8067 s = pos;
8068 break;
8069 }
8070 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
8071 pos++;
8072 }
8073 SvPOK_on(sv);
8074 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
8075 SvRMAGICAL_on(sv);
8076 }
8077 return s;
8078 }
8079
8080