xref: /openbsd-src/gnu/usr.bin/perl/toke.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*    toke.c
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  *  'It all comes from here, the stench and the peril.'    --Frodo
13  *
14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
15  */
16 
17 /*
18  * This file is the lexer for Perl.  It's closely linked to the
19  * parser, perly.y.
20  *
21  * The main routine is yylex(), which returns the next token.
22  */
23 
24 /*
25 =head1 Lexer interface
26 
27 This is the lower layer of the Perl parser, managing characters and tokens.
28 
29 =for apidoc AmU|yy_parser *|PL_parser
30 
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress.  The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
35 
36 =cut
37 */
38 
39 #include "EXTERN.h"
40 #define PERL_IN_TOKE_C
41 #include "perl.h"
42 #include "dquote_static.c"
43 
44 #define new_constant(a,b,c,d,e,f,g)	\
45 	S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46 
47 #define pl_yylval	(PL_parser->yylval)
48 
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets		(PL_parser->lex_brackets)
51 #define PL_lex_allbrackets	(PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof		(PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack	(PL_parser->lex_brackstack)
54 #define PL_lex_casemods		(PL_parser->lex_casemods)
55 #define PL_lex_casestack        (PL_parser->lex_casestack)
56 #define PL_lex_defer		(PL_parser->lex_defer)
57 #define PL_lex_dojoin		(PL_parser->lex_dojoin)
58 #define PL_lex_expect		(PL_parser->lex_expect)
59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
60 #define PL_lex_inpat		(PL_parser->lex_inpat)
61 #define PL_lex_inwhat		(PL_parser->lex_inwhat)
62 #define PL_lex_op		(PL_parser->lex_op)
63 #define PL_lex_repl		(PL_parser->lex_repl)
64 #define PL_lex_starts		(PL_parser->lex_starts)
65 #define PL_lex_stuff		(PL_parser->lex_stuff)
66 #define PL_multi_start		(PL_parser->multi_start)
67 #define PL_multi_open		(PL_parser->multi_open)
68 #define PL_multi_close		(PL_parser->multi_close)
69 #define PL_preambled		(PL_parser->preambled)
70 #define PL_sublex_info		(PL_parser->sublex_info)
71 #define PL_linestr		(PL_parser->linestr)
72 #define PL_expect		(PL_parser->expect)
73 #define PL_copline		(PL_parser->copline)
74 #define PL_bufptr		(PL_parser->bufptr)
75 #define PL_oldbufptr		(PL_parser->oldbufptr)
76 #define PL_oldoldbufptr		(PL_parser->oldoldbufptr)
77 #define PL_linestart		(PL_parser->linestart)
78 #define PL_bufend		(PL_parser->bufend)
79 #define PL_last_uni		(PL_parser->last_uni)
80 #define PL_last_lop		(PL_parser->last_lop)
81 #define PL_last_lop_op		(PL_parser->last_lop_op)
82 #define PL_lex_state		(PL_parser->lex_state)
83 #define PL_rsfp			(PL_parser->rsfp)
84 #define PL_rsfp_filters		(PL_parser->rsfp_filters)
85 #define PL_in_my		(PL_parser->in_my)
86 #define PL_in_my_stash		(PL_parser->in_my_stash)
87 #define PL_tokenbuf		(PL_parser->tokenbuf)
88 #define PL_multi_end		(PL_parser->multi_end)
89 #define PL_error_count		(PL_parser->error_count)
90 
91 #ifdef PERL_MAD
92 #  define PL_endwhite		(PL_parser->endwhite)
93 #  define PL_faketokens		(PL_parser->faketokens)
94 #  define PL_lasttoke		(PL_parser->lasttoke)
95 #  define PL_nextwhite		(PL_parser->nextwhite)
96 #  define PL_realtokenstart	(PL_parser->realtokenstart)
97 #  define PL_skipwhite		(PL_parser->skipwhite)
98 #  define PL_thisclose		(PL_parser->thisclose)
99 #  define PL_thismad		(PL_parser->thismad)
100 #  define PL_thisopen		(PL_parser->thisopen)
101 #  define PL_thisstuff		(PL_parser->thisstuff)
102 #  define PL_thistoken		(PL_parser->thistoken)
103 #  define PL_thiswhite		(PL_parser->thiswhite)
104 #  define PL_thiswhite		(PL_parser->thiswhite)
105 #  define PL_nexttoke		(PL_parser->nexttoke)
106 #  define PL_curforce		(PL_parser->curforce)
107 #else
108 #  define PL_nexttoke		(PL_parser->nexttoke)
109 #  define PL_nexttype		(PL_parser->nexttype)
110 #  define PL_nextval		(PL_parser->nextval)
111 #endif
112 
113 static const char* const ident_too_long = "Identifier too long";
114 
115 #ifdef PERL_MAD
116 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
117 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
118 #else
119 #  define CURMAD(slot,sv)
120 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
121 #endif
122 
123 #define XENUMMASK  0x3f
124 #define XFAKEEOF   0x40
125 #define XFAKEBRACK 0x80
126 
127 #ifdef USE_UTF8_SCRIPTS
128 #   define UTF (!IN_BYTES)
129 #else
130 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
131 #endif
132 
133 /* The maximum number of characters preceding the unrecognized one to display */
134 #define UNRECOGNIZED_PRECEDE_COUNT 10
135 
136 /* In variables named $^X, these are the legal values for X.
137  * 1999-02-27 mjd-perl-patch@plover.com */
138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
139 
140 #define SPACE_OR_TAB(c) isBLANK_A(c)
141 
142 /* LEX_* are values for PL_lex_state, the state of the lexer.
143  * They are arranged oddly so that the guard on the switch statement
144  * can get by with a single comparison (if the compiler is smart enough).
145  *
146  * These values refer to the various states within a sublex parse,
147  * i.e. within a double quotish string
148  */
149 
150 /* #define LEX_NOTPARSING		11 is done in perl.h. */
151 
152 #define LEX_NORMAL		10 /* normal code (ie not within "...")     */
153 #define LEX_INTERPNORMAL	 9 /* code within a string, eg "$foo[$x+1]" */
154 #define LEX_INTERPCASEMOD	 8 /* expecting a \U, \Q or \E etc          */
155 #define LEX_INTERPPUSH		 7 /* starting a new sublex parse level     */
156 #define LEX_INTERPSTART		 6 /* expecting the start of a $var         */
157 
158 				   /* at end of code, eg "$x" followed by:  */
159 #define LEX_INTERPEND		 5 /* ... eg not one of [, { or ->          */
160 #define LEX_INTERPENDMAYBE	 4 /* ... eg one of [, { or ->              */
161 
162 #define LEX_INTERPCONCAT	 3 /* expecting anything, eg at start of
163 				        string or after \E, $foo, etc       */
164 #define LEX_INTERPCONST		 2 /* NOT USED */
165 #define LEX_FORMLINE		 1 /* expecting a format line               */
166 #define LEX_KNOWNEXT		 0 /* next token known; just return it      */
167 
168 
169 #ifdef DEBUGGING
170 static const char* const lex_state_names[] = {
171     "KNOWNEXT",
172     "FORMLINE",
173     "INTERPCONST",
174     "INTERPCONCAT",
175     "INTERPENDMAYBE",
176     "INTERPEND",
177     "INTERPSTART",
178     "INTERPPUSH",
179     "INTERPCASEMOD",
180     "INTERPNORMAL",
181     "NORMAL"
182 };
183 #endif
184 
185 #include "keywords.h"
186 
187 /* CLINE is a macro that ensures PL_copline has a sane value */
188 
189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
190 
191 #ifdef PERL_MAD
192 #  define SKIPSPACE0(s) skipspace0(s)
193 #  define SKIPSPACE1(s) skipspace1(s)
194 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
195 #  define PEEKSPACE(s) skipspace2(s,0)
196 #else
197 #  define SKIPSPACE0(s) skipspace(s)
198 #  define SKIPSPACE1(s) skipspace(s)
199 #  define SKIPSPACE2(s,tsv) skipspace(s)
200 #  define PEEKSPACE(s) skipspace(s)
201 #endif
202 
203 /*
204  * Convenience functions to return different tokens and prime the
205  * lexer for the next token.  They all take an argument.
206  *
207  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
208  * OPERATOR     : generic operator
209  * AOPERATOR    : assignment operator
210  * PREBLOCK     : beginning the block after an if, while, foreach, ...
211  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
212  * PREREF       : *EXPR where EXPR is not a simple identifier
213  * TERM         : expression term
214  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
215  * LOOPX        : loop exiting command (goto, last, dump, etc)
216  * FTST         : file test operator
217  * FUN0         : zero-argument function
218  * FUN0OP       : zero-argument function, with its op created in this file
219  * FUN1         : not used, except for not, which isn't a UNIOP
220  * BOop         : bitwise or or xor
221  * BAop         : bitwise and
222  * SHop         : shift operator
223  * PWop         : power operator
224  * PMop         : pattern-matching operator
225  * Aop          : addition-level operator
226  * Mop          : multiplication-level operator
227  * Eop          : equality-testing operator
228  * Rop          : relational operator <= != gt
229  *
230  * Also see LOP and lop() below.
231  */
232 
233 #ifdef DEBUGGING /* Serve -DT. */
234 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
235 #else
236 #   define REPORT(retval) (retval)
237 #endif
238 
239 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
240 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
241 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
242 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
243 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
244 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
245 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
246 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
247 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
248 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
249 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
250 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
251 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
252 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
253 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
254 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
255 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
256 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
257 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
258 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
259 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
260 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
261 
262 /* This bit of chicanery makes a unary function followed by
263  * a parenthesis into a function with one argument, highest precedence.
264  * The UNIDOR macro is for unary functions that can be followed by the //
265  * operator (such as C<shift // 0>).
266  */
267 #define UNI3(f,x,have_x) { \
268 	pl_yylval.ival = f; \
269 	if (have_x) PL_expect = x; \
270 	PL_bufptr = s; \
271 	PL_last_uni = PL_oldbufptr; \
272 	PL_last_lop_op = f; \
273 	if (*s == '(') \
274 	    return REPORT( (int)FUNC1 ); \
275 	s = PEEKSPACE(s); \
276 	return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
277 	}
278 #define UNI(f)    UNI3(f,XTERM,1)
279 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
280 #define UNIPROTO(f,optional) { \
281 	if (optional) PL_last_uni = PL_oldbufptr; \
282 	OPERATOR(f); \
283 	}
284 
285 #define UNIBRACK(f) UNI3(f,0,0)
286 
287 /* grandfather return to old style */
288 #define OLDLOP(f) \
289 	do { \
290 	    if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
291 		PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
292 	    pl_yylval.ival = (f); \
293 	    PL_expect = XTERM; \
294 	    PL_bufptr = s; \
295 	    return (int)LSTOP; \
296 	} while(0)
297 
298 #define COPLINE_INC_WITH_HERELINES		    \
299     STMT_START {				     \
300 	CopLINE_inc(PL_curcop);			      \
301 	if (PL_parser->herelines)		       \
302 	    CopLINE(PL_curcop) += PL_parser->herelines, \
303 	    PL_parser->herelines = 0;			 \
304     } STMT_END
305 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
306  * is no sublex_push to follow. */
307 #define COPLINE_SET_FROM_MULTI_END	      \
308     STMT_START {			       \
309 	CopLINE_set(PL_curcop, PL_multi_end);	\
310 	if (PL_multi_end != PL_multi_start)	 \
311 	    PL_parser->herelines = 0;		  \
312     } STMT_END
313 
314 
315 #ifdef DEBUGGING
316 
317 /* how to interpret the pl_yylval associated with the token */
318 enum token_type {
319     TOKENTYPE_NONE,
320     TOKENTYPE_IVAL,
321     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
322     TOKENTYPE_PVAL,
323     TOKENTYPE_OPVAL
324 };
325 
326 static struct debug_tokens {
327     const int token;
328     enum token_type type;
329     const char *name;
330 } const debug_tokens[] =
331 {
332     { ADDOP,		TOKENTYPE_OPNUM,	"ADDOP" },
333     { ANDAND,		TOKENTYPE_NONE,		"ANDAND" },
334     { ANDOP,		TOKENTYPE_NONE,		"ANDOP" },
335     { ANONSUB,		TOKENTYPE_IVAL,		"ANONSUB" },
336     { ARROW,		TOKENTYPE_NONE,		"ARROW" },
337     { ASSIGNOP,		TOKENTYPE_OPNUM,	"ASSIGNOP" },
338     { BITANDOP,		TOKENTYPE_OPNUM,	"BITANDOP" },
339     { BITOROP,		TOKENTYPE_OPNUM,	"BITOROP" },
340     { COLONATTR,	TOKENTYPE_NONE,		"COLONATTR" },
341     { CONTINUE,		TOKENTYPE_NONE,		"CONTINUE" },
342     { DEFAULT,		TOKENTYPE_NONE,		"DEFAULT" },
343     { DO,		TOKENTYPE_NONE,		"DO" },
344     { DOLSHARP,		TOKENTYPE_NONE,		"DOLSHARP" },
345     { DORDOR,		TOKENTYPE_NONE,		"DORDOR" },
346     { DOROP,		TOKENTYPE_OPNUM,	"DOROP" },
347     { DOTDOT,		TOKENTYPE_IVAL,		"DOTDOT" },
348     { ELSE,		TOKENTYPE_NONE,		"ELSE" },
349     { ELSIF,		TOKENTYPE_IVAL,		"ELSIF" },
350     { EQOP,		TOKENTYPE_OPNUM,	"EQOP" },
351     { FOR,		TOKENTYPE_IVAL,		"FOR" },
352     { FORMAT,		TOKENTYPE_NONE,		"FORMAT" },
353     { FORMLBRACK,	TOKENTYPE_NONE,		"FORMLBRACK" },
354     { FORMRBRACK,	TOKENTYPE_NONE,		"FORMRBRACK" },
355     { FUNC,		TOKENTYPE_OPNUM,	"FUNC" },
356     { FUNC0,		TOKENTYPE_OPNUM,	"FUNC0" },
357     { FUNC0OP,		TOKENTYPE_OPVAL,	"FUNC0OP" },
358     { FUNC0SUB,		TOKENTYPE_OPVAL,	"FUNC0SUB" },
359     { FUNC1,		TOKENTYPE_OPNUM,	"FUNC1" },
360     { FUNCMETH,		TOKENTYPE_OPVAL,	"FUNCMETH" },
361     { GIVEN,		TOKENTYPE_IVAL,		"GIVEN" },
362     { HASHBRACK,	TOKENTYPE_NONE,		"HASHBRACK" },
363     { IF,		TOKENTYPE_IVAL,		"IF" },
364     { LABEL,		TOKENTYPE_PVAL,		"LABEL" },
365     { LOCAL,		TOKENTYPE_IVAL,		"LOCAL" },
366     { LOOPEX,		TOKENTYPE_OPNUM,	"LOOPEX" },
367     { LSTOP,		TOKENTYPE_OPNUM,	"LSTOP" },
368     { LSTOPSUB,		TOKENTYPE_OPVAL,	"LSTOPSUB" },
369     { MATCHOP,		TOKENTYPE_OPNUM,	"MATCHOP" },
370     { METHOD,		TOKENTYPE_OPVAL,	"METHOD" },
371     { MULOP,		TOKENTYPE_OPNUM,	"MULOP" },
372     { MY,		TOKENTYPE_IVAL,		"MY" },
373     { NOAMP,		TOKENTYPE_NONE,		"NOAMP" },
374     { NOTOP,		TOKENTYPE_NONE,		"NOTOP" },
375     { OROP,		TOKENTYPE_IVAL,		"OROP" },
376     { OROR,		TOKENTYPE_NONE,		"OROR" },
377     { PACKAGE,		TOKENTYPE_NONE,		"PACKAGE" },
378     { PEG,		TOKENTYPE_NONE,		"PEG" },
379     { PLUGEXPR,		TOKENTYPE_OPVAL,	"PLUGEXPR" },
380     { PLUGSTMT,		TOKENTYPE_OPVAL,	"PLUGSTMT" },
381     { PMFUNC,		TOKENTYPE_OPVAL,	"PMFUNC" },
382     { POSTJOIN,		TOKENTYPE_NONE,		"POSTJOIN" },
383     { POSTDEC,		TOKENTYPE_NONE,		"POSTDEC" },
384     { POSTINC,		TOKENTYPE_NONE,		"POSTINC" },
385     { POWOP,		TOKENTYPE_OPNUM,	"POWOP" },
386     { PREDEC,		TOKENTYPE_NONE,		"PREDEC" },
387     { PREINC,		TOKENTYPE_NONE,		"PREINC" },
388     { PRIVATEREF,	TOKENTYPE_OPVAL,	"PRIVATEREF" },
389     { QWLIST,		TOKENTYPE_OPVAL,	"QWLIST" },
390     { REFGEN,		TOKENTYPE_NONE,		"REFGEN" },
391     { RELOP,		TOKENTYPE_OPNUM,	"RELOP" },
392     { REQUIRE,		TOKENTYPE_NONE,		"REQUIRE" },
393     { SHIFTOP,		TOKENTYPE_OPNUM,	"SHIFTOP" },
394     { SUB,		TOKENTYPE_NONE,		"SUB" },
395     { THING,		TOKENTYPE_OPVAL,	"THING" },
396     { UMINUS,		TOKENTYPE_NONE,		"UMINUS" },
397     { UNIOP,		TOKENTYPE_OPNUM,	"UNIOP" },
398     { UNIOPSUB,		TOKENTYPE_OPVAL,	"UNIOPSUB" },
399     { UNLESS,		TOKENTYPE_IVAL,		"UNLESS" },
400     { UNTIL,		TOKENTYPE_IVAL,		"UNTIL" },
401     { USE,		TOKENTYPE_IVAL,		"USE" },
402     { WHEN,		TOKENTYPE_IVAL,		"WHEN" },
403     { WHILE,		TOKENTYPE_IVAL,		"WHILE" },
404     { WORD,		TOKENTYPE_OPVAL,	"WORD" },
405     { YADAYADA,		TOKENTYPE_IVAL,		"YADAYADA" },
406     { 0,		TOKENTYPE_NONE,		NULL }
407 };
408 
409 /* dump the returned token in rv, plus any optional arg in pl_yylval */
410 
411 STATIC int
412 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
413 {
414     dVAR;
415 
416     PERL_ARGS_ASSERT_TOKEREPORT;
417 
418     if (DEBUG_T_TEST) {
419 	const char *name = NULL;
420 	enum token_type type = TOKENTYPE_NONE;
421 	const struct debug_tokens *p;
422 	SV* const report = newSVpvs("<== ");
423 
424 	for (p = debug_tokens; p->token; p++) {
425 	    if (p->token == (int)rv) {
426 		name = p->name;
427 		type = p->type;
428 		break;
429 	    }
430 	}
431 	if (name)
432 	    Perl_sv_catpv(aTHX_ report, name);
433 	else if ((char)rv > ' ' && (char)rv <= '~')
434 	{
435 	    Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
436 	    if ((char)rv == 'p')
437 		sv_catpvs(report, " (pending identifier)");
438 	}
439 	else if (!rv)
440 	    sv_catpvs(report, "EOF");
441 	else
442 	    Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
443 	switch (type) {
444 	case TOKENTYPE_NONE:
445 	    break;
446 	case TOKENTYPE_IVAL:
447 	    Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
448 	    break;
449 	case TOKENTYPE_OPNUM:
450 	    Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
451 				    PL_op_name[lvalp->ival]);
452 	    break;
453 	case TOKENTYPE_PVAL:
454 	    Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
455 	    break;
456 	case TOKENTYPE_OPVAL:
457 	    if (lvalp->opval) {
458 		Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
459 				    PL_op_name[lvalp->opval->op_type]);
460 		if (lvalp->opval->op_type == OP_CONST) {
461 		    Perl_sv_catpvf(aTHX_ report, " %s",
462 			SvPEEK(cSVOPx_sv(lvalp->opval)));
463 		}
464 
465 	    }
466 	    else
467 		sv_catpvs(report, "(opval=null)");
468 	    break;
469 	}
470         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
471     };
472     return (int)rv;
473 }
474 
475 
476 /* print the buffer with suitable escapes */
477 
478 STATIC void
479 S_printbuf(pTHX_ const char *const fmt, const char *const s)
480 {
481     SV* const tmp = newSVpvs("");
482 
483     PERL_ARGS_ASSERT_PRINTBUF;
484 
485     GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
486     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
487     GCC_DIAG_RESTORE;
488     SvREFCNT_dec(tmp);
489 }
490 
491 #endif
492 
493 static int
494 S_deprecate_commaless_var_list(pTHX) {
495     PL_expect = XTERM;
496     deprecate("comma-less variable list");
497     return REPORT(','); /* grandfather non-comma-format format */
498 }
499 
500 /*
501  * S_ao
502  *
503  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
504  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
505  */
506 
507 STATIC int
508 S_ao(pTHX_ int toketype)
509 {
510     dVAR;
511     if (*PL_bufptr == '=') {
512 	PL_bufptr++;
513 	if (toketype == ANDAND)
514 	    pl_yylval.ival = OP_ANDASSIGN;
515 	else if (toketype == OROR)
516 	    pl_yylval.ival = OP_ORASSIGN;
517 	else if (toketype == DORDOR)
518 	    pl_yylval.ival = OP_DORASSIGN;
519 	toketype = ASSIGNOP;
520     }
521     return toketype;
522 }
523 
524 /*
525  * S_no_op
526  * When Perl expects an operator and finds something else, no_op
527  * prints the warning.  It always prints "<something> found where
528  * operator expected.  It prints "Missing semicolon on previous line?"
529  * if the surprise occurs at the start of the line.  "do you need to
530  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
531  * where the compiler doesn't know if foo is a method call or a function.
532  * It prints "Missing operator before end of line" if there's nothing
533  * after the missing operator, or "... before <...>" if there is something
534  * after the missing operator.
535  */
536 
537 STATIC void
538 S_no_op(pTHX_ const char *const what, char *s)
539 {
540     dVAR;
541     char * const oldbp = PL_bufptr;
542     const bool is_first = (PL_oldbufptr == PL_linestart);
543 
544     PERL_ARGS_ASSERT_NO_OP;
545 
546     if (!s)
547 	s = oldbp;
548     else
549 	PL_bufptr = s;
550     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
551     if (ckWARN_d(WARN_SYNTAX)) {
552 	if (is_first)
553 	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
554 		    "\t(Missing semicolon on previous line?)\n");
555 	else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
556 	    const char *t;
557 	    for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
558                                                             t += UTF ? UTF8SKIP(t) : 1)
559 		NOOP;
560 	    if (t < PL_bufptr && isSPACE(*t))
561 		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
562 			"\t(Do you need to predeclare %"UTF8f"?)\n",
563 		      UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
564 	}
565 	else {
566 	    assert(s >= oldbp);
567 	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
568 		    "\t(Missing operator before %"UTF8f"?)\n",
569 		     UTF8fARG(UTF, s - oldbp, oldbp));
570 	}
571     }
572     PL_bufptr = oldbp;
573 }
574 
575 /*
576  * S_missingterm
577  * Complain about missing quote/regexp/heredoc terminator.
578  * If it's called with NULL then it cauterizes the line buffer.
579  * If we're in a delimited string and the delimiter is a control
580  * character, it's reformatted into a two-char sequence like ^C.
581  * This is fatal.
582  */
583 
584 STATIC void
585 S_missingterm(pTHX_ char *s)
586 {
587     dVAR;
588     char tmpbuf[3];
589     char q;
590     if (s) {
591 	char * const nl = strrchr(s,'\n');
592 	if (nl)
593 	    *nl = '\0';
594     }
595     else if ((U8) PL_multi_close < 32) {
596 	*tmpbuf = '^';
597 	tmpbuf[1] = (char)toCTRL(PL_multi_close);
598 	tmpbuf[2] = '\0';
599 	s = tmpbuf;
600     }
601     else {
602 	*tmpbuf = (char)PL_multi_close;
603 	tmpbuf[1] = '\0';
604 	s = tmpbuf;
605     }
606     q = strchr(s,'"') ? '\'' : '"';
607     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
608 }
609 
610 #include "feature.h"
611 
612 /*
613  * Check whether the named feature is enabled.
614  */
615 bool
616 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
617 {
618     dVAR;
619     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
620 
621     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
622 
623     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
624 
625     if (namelen > MAX_FEATURE_LEN)
626 	return FALSE;
627     memcpy(&he_name[8], name, namelen);
628 
629     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
630 				     REFCOUNTED_HE_EXISTS));
631 }
632 
633 /*
634  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
635  * utf16-to-utf8-reversed.
636  */
637 
638 #ifdef PERL_CR_FILTER
639 static void
640 strip_return(SV *sv)
641 {
642     const char *s = SvPVX_const(sv);
643     const char * const e = s + SvCUR(sv);
644 
645     PERL_ARGS_ASSERT_STRIP_RETURN;
646 
647     /* outer loop optimized to do nothing if there are no CR-LFs */
648     while (s < e) {
649 	if (*s++ == '\r' && *s == '\n') {
650 	    /* hit a CR-LF, need to copy the rest */
651 	    char *d = s - 1;
652 	    *d++ = *s++;
653 	    while (s < e) {
654 		if (*s == '\r' && s[1] == '\n')
655 		    s++;
656 		*d++ = *s++;
657 	    }
658 	    SvCUR(sv) -= s - d;
659 	    return;
660 	}
661     }
662 }
663 
664 STATIC I32
665 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
666 {
667     const I32 count = FILTER_READ(idx+1, sv, maxlen);
668     if (count > 0 && !maxlen)
669 	strip_return(sv);
670     return count;
671 }
672 #endif
673 
674 /*
675 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
676 
677 Creates and initialises a new lexer/parser state object, supplying
678 a context in which to lex and parse from a new source of Perl code.
679 A pointer to the new state object is placed in L</PL_parser>.  An entry
680 is made on the save stack so that upon unwinding the new state object
681 will be destroyed and the former value of L</PL_parser> will be restored.
682 Nothing else need be done to clean up the parsing context.
683 
684 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
685 non-null, provides a string (in SV form) containing code to be parsed.
686 A copy of the string is made, so subsequent modification of I<line>
687 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
688 from which code will be read to be parsed.  If both are non-null, the
689 code in I<line> comes first and must consist of complete lines of input,
690 and I<rsfp> supplies the remainder of the source.
691 
692 The I<flags> parameter is reserved for future use.  Currently it is only
693 used by perl internally, so extensions should always pass zero.
694 
695 =cut
696 */
697 
698 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
699    can share filters with the current parser.
700    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
701    caller, hence isn't owned by the parser, so shouldn't be closed on parser
702    destruction. This is used to handle the case of defaulting to reading the
703    script from the standard input because no filename was given on the command
704    line (without getting confused by situation where STDIN has been closed, so
705    the script handle is opened on fd 0)  */
706 
707 void
708 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
709 {
710     dVAR;
711     const char *s = NULL;
712     yy_parser *parser, *oparser;
713     if (flags && flags & ~LEX_START_FLAGS)
714 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
715 
716     /* create and initialise a parser */
717 
718     Newxz(parser, 1, yy_parser);
719     parser->old_parser = oparser = PL_parser;
720     PL_parser = parser;
721 
722     parser->stack = NULL;
723     parser->ps = NULL;
724     parser->stack_size = 0;
725 
726     /* on scope exit, free this parser and restore any outer one */
727     SAVEPARSER(parser);
728     parser->saved_curcop = PL_curcop;
729 
730     /* initialise lexer state */
731 
732 #ifdef PERL_MAD
733     parser->curforce = -1;
734 #else
735     parser->nexttoke = 0;
736 #endif
737     parser->error_count = oparser ? oparser->error_count : 0;
738     parser->copline = parser->preambling = NOLINE;
739     parser->lex_state = LEX_NORMAL;
740     parser->expect = XSTATE;
741     parser->rsfp = rsfp;
742     parser->rsfp_filters =
743       !(flags & LEX_START_SAME_FILTER) || !oparser
744         ? NULL
745         : MUTABLE_AV(SvREFCNT_inc(
746             oparser->rsfp_filters
747              ? oparser->rsfp_filters
748              : (oparser->rsfp_filters = newAV())
749           ));
750 
751     Newx(parser->lex_brackstack, 120, char);
752     Newx(parser->lex_casestack, 12, char);
753     *parser->lex_casestack = '\0';
754     Newxz(parser->lex_shared, 1, LEXSHARED);
755 
756     if (line) {
757 	STRLEN len;
758 	s = SvPV_const(line, len);
759 	parser->linestr = flags & LEX_START_COPIED
760 			    ? SvREFCNT_inc_simple_NN(line)
761 			    : newSVpvn_flags(s, len, SvUTF8(line));
762 	sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
763     } else {
764 	parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
765     }
766     parser->oldoldbufptr =
767 	parser->oldbufptr =
768 	parser->bufptr =
769 	parser->linestart = SvPVX(parser->linestr);
770     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
771     parser->last_lop = parser->last_uni = NULL;
772 
773     assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
774                                                         |LEX_DONT_CLOSE_RSFP));
775     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
776                                                         |LEX_DONT_CLOSE_RSFP));
777 
778     parser->in_pod = parser->filtered = 0;
779 }
780 
781 
782 /* delete a parser object */
783 
784 void
785 Perl_parser_free(pTHX_  const yy_parser *parser)
786 {
787     PERL_ARGS_ASSERT_PARSER_FREE;
788 
789     PL_curcop = parser->saved_curcop;
790     SvREFCNT_dec(parser->linestr);
791 
792     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
793 	PerlIO_clearerr(parser->rsfp);
794     else if (parser->rsfp && (!parser->old_parser ||
795 		(parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
796 	PerlIO_close(parser->rsfp);
797     SvREFCNT_dec(parser->rsfp_filters);
798     SvREFCNT_dec(parser->lex_stuff);
799     SvREFCNT_dec(parser->sublex_info.repl);
800 
801     Safefree(parser->lex_brackstack);
802     Safefree(parser->lex_casestack);
803     Safefree(parser->lex_shared);
804     PL_parser = parser->old_parser;
805     Safefree(parser);
806 }
807 
808 void
809 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
810 {
811 #ifdef PERL_MAD
812     I32 nexttoke = parser->lasttoke;
813 #else
814     I32 nexttoke = parser->nexttoke;
815 #endif
816     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
817     while (nexttoke--) {
818 #ifdef PERL_MAD
819 	if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
820 				& 0xffff)
821 	 && parser->nexttoke[nexttoke].next_val.opval
822 	 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
823 	 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
824 		op_free(parser->nexttoke[nexttoke].next_val.opval);
825 		parser->nexttoke[nexttoke].next_val.opval = NULL;
826 	}
827 #else
828 	if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
829 	 && parser->nextval[nexttoke].opval
830 	 && parser->nextval[nexttoke].opval->op_slabbed
831 	 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
832 	    op_free(parser->nextval[nexttoke].opval);
833 	    parser->nextval[nexttoke].opval = NULL;
834 	}
835 #endif
836     }
837 }
838 
839 
840 /*
841 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
842 
843 Buffer scalar containing the chunk currently under consideration of the
844 text currently being lexed.  This is always a plain string scalar (for
845 which C<SvPOK> is true).  It is not intended to be used as a scalar by
846 normal scalar means; instead refer to the buffer directly by the pointer
847 variables described below.
848 
849 The lexer maintains various C<char*> pointers to things in the
850 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
851 reallocated, all of these pointers must be updated.  Don't attempt to
852 do this manually, but rather use L</lex_grow_linestr> if you need to
853 reallocate the buffer.
854 
855 The content of the text chunk in the buffer is commonly exactly one
856 complete line of input, up to and including a newline terminator,
857 but there are situations where it is otherwise.  The octets of the
858 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
859 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
860 flag on this scalar, which may disagree with it.
861 
862 For direct examination of the buffer, the variable
863 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
864 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
865 of these pointers is usually preferable to examination of the scalar
866 through normal scalar means.
867 
868 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
869 
870 Direct pointer to the end of the chunk of text currently being lexed, the
871 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
872 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
873 always located at the end of the buffer, and does not count as part of
874 the buffer's contents.
875 
876 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
877 
878 Points to the current position of lexing inside the lexer buffer.
879 Characters around this point may be freely examined, within
880 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
881 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
882 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
883 
884 Lexing code (whether in the Perl core or not) moves this pointer past
885 the characters that it consumes.  It is also expected to perform some
886 bookkeeping whenever a newline character is consumed.  This movement
887 can be more conveniently performed by the function L</lex_read_to>,
888 which handles newlines appropriately.
889 
890 Interpretation of the buffer's octets can be abstracted out by
891 using the slightly higher-level functions L</lex_peek_unichar> and
892 L</lex_read_unichar>.
893 
894 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
895 
896 Points to the start of the current line inside the lexer buffer.
897 This is useful for indicating at which column an error occurred, and
898 not much else.  This must be updated by any lexing code that consumes
899 a newline; the function L</lex_read_to> handles this detail.
900 
901 =cut
902 */
903 
904 /*
905 =for apidoc Amx|bool|lex_bufutf8
906 
907 Indicates whether the octets in the lexer buffer
908 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
909 of Unicode characters.  If not, they should be interpreted as Latin-1
910 characters.  This is analogous to the C<SvUTF8> flag for scalars.
911 
912 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
913 contains valid UTF-8.  Lexing code must be robust in the face of invalid
914 encoding.
915 
916 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
917 is significant, but not the whole story regarding the input character
918 encoding.  Normally, when a file is being read, the scalar contains octets
919 and its C<SvUTF8> flag is off, but the octets should be interpreted as
920 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
921 however, the scalar may have the C<SvUTF8> flag on, and in this case its
922 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
923 is in effect.  This logic may change in the future; use this function
924 instead of implementing the logic yourself.
925 
926 =cut
927 */
928 
929 bool
930 Perl_lex_bufutf8(pTHX)
931 {
932     return UTF;
933 }
934 
935 /*
936 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
937 
938 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
939 at least I<len> octets (including terminating C<NUL>).  Returns a
940 pointer to the reallocated buffer.  This is necessary before making
941 any direct modification of the buffer that would increase its length.
942 L</lex_stuff_pvn> provides a more convenient way to insert text into
943 the buffer.
944 
945 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
946 this function updates all of the lexer's variables that point directly
947 into the buffer.
948 
949 =cut
950 */
951 
952 char *
953 Perl_lex_grow_linestr(pTHX_ STRLEN len)
954 {
955     SV *linestr;
956     char *buf;
957     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
958     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
959     linestr = PL_parser->linestr;
960     buf = SvPVX(linestr);
961     if (len <= SvLEN(linestr))
962 	return buf;
963     bufend_pos = PL_parser->bufend - buf;
964     bufptr_pos = PL_parser->bufptr - buf;
965     oldbufptr_pos = PL_parser->oldbufptr - buf;
966     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
967     linestart_pos = PL_parser->linestart - buf;
968     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
969     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
970     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
971                             PL_parser->lex_shared->re_eval_start - buf : 0;
972 
973     buf = sv_grow(linestr, len);
974 
975     PL_parser->bufend = buf + bufend_pos;
976     PL_parser->bufptr = buf + bufptr_pos;
977     PL_parser->oldbufptr = buf + oldbufptr_pos;
978     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
979     PL_parser->linestart = buf + linestart_pos;
980     if (PL_parser->last_uni)
981 	PL_parser->last_uni = buf + last_uni_pos;
982     if (PL_parser->last_lop)
983 	PL_parser->last_lop = buf + last_lop_pos;
984     if (PL_parser->lex_shared->re_eval_start)
985         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
986     return buf;
987 }
988 
989 /*
990 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
991 
992 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
993 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
994 reallocating the buffer if necessary.  This means that lexing code that
995 runs later will see the characters as if they had appeared in the input.
996 It is not recommended to do this as part of normal parsing, and most
997 uses of this facility run the risk of the inserted characters being
998 interpreted in an unintended manner.
999 
1000 The string to be inserted is represented by I<len> octets starting
1001 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1002 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
1003 The characters are recoded for the lexer buffer, according to how the
1004 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1005 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1006 function is more convenient.
1007 
1008 =cut
1009 */
1010 
1011 void
1012 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1013 {
1014     dVAR;
1015     char *bufptr;
1016     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1017     if (flags & ~(LEX_STUFF_UTF8))
1018 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1019     if (UTF) {
1020 	if (flags & LEX_STUFF_UTF8) {
1021 	    goto plain_copy;
1022 	} else {
1023 	    STRLEN highhalf = 0;    /* Count of variants */
1024 	    const char *p, *e = pv+len;
1025 	    for (p = pv; p != e; p++) {
1026 		if (! UTF8_IS_INVARIANT(*p)) {
1027                     highhalf++;
1028                 }
1029             }
1030 	    if (!highhalf)
1031 		goto plain_copy;
1032 	    lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1033 	    bufptr = PL_parser->bufptr;
1034 	    Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1035 	    SvCUR_set(PL_parser->linestr,
1036 	    	SvCUR(PL_parser->linestr) + len+highhalf);
1037 	    PL_parser->bufend += len+highhalf;
1038 	    for (p = pv; p != e; p++) {
1039 		U8 c = (U8)*p;
1040 		if (! UTF8_IS_INVARIANT(c)) {
1041 		    *bufptr++ = UTF8_TWO_BYTE_HI(c);
1042 		    *bufptr++ = UTF8_TWO_BYTE_LO(c);
1043 		} else {
1044 		    *bufptr++ = (char)c;
1045 		}
1046 	    }
1047 	}
1048     } else {
1049 	if (flags & LEX_STUFF_UTF8) {
1050 	    STRLEN highhalf = 0;
1051 	    const char *p, *e = pv+len;
1052 	    for (p = pv; p != e; p++) {
1053 		U8 c = (U8)*p;
1054 		if (UTF8_IS_ABOVE_LATIN1(c)) {
1055 		    Perl_croak(aTHX_ "Lexing code attempted to stuff "
1056 				"non-Latin-1 character into Latin-1 input");
1057 		} else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1058 		    p++;
1059 		    highhalf++;
1060 		} else if (! UTF8_IS_INVARIANT(c)) {
1061 		    /* malformed UTF-8 */
1062 		    ENTER;
1063 		    SAVESPTR(PL_warnhook);
1064 		    PL_warnhook = PERL_WARNHOOK_FATAL;
1065 		    utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1066 		    LEAVE;
1067 		}
1068 	    }
1069 	    if (!highhalf)
1070 		goto plain_copy;
1071 	    lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1072 	    bufptr = PL_parser->bufptr;
1073 	    Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1074 	    SvCUR_set(PL_parser->linestr,
1075 	    	SvCUR(PL_parser->linestr) + len-highhalf);
1076 	    PL_parser->bufend += len-highhalf;
1077 	    p = pv;
1078 	    while (p < e) {
1079 		if (UTF8_IS_INVARIANT(*p)) {
1080 		    *bufptr++ = *p;
1081                     p++;
1082 		}
1083 		else {
1084                     assert(p < e -1 );
1085 		    *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1086 		    p += 2;
1087                 }
1088 	    }
1089 	} else {
1090 	  plain_copy:
1091 	    lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1092 	    bufptr = PL_parser->bufptr;
1093 	    Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1094 	    SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1095 	    PL_parser->bufend += len;
1096 	    Copy(pv, bufptr, len, char);
1097 	}
1098     }
1099 }
1100 
1101 /*
1102 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1103 
1104 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1105 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1106 reallocating the buffer if necessary.  This means that lexing code that
1107 runs later will see the characters as if they had appeared in the input.
1108 It is not recommended to do this as part of normal parsing, and most
1109 uses of this facility run the risk of the inserted characters being
1110 interpreted in an unintended manner.
1111 
1112 The string to be inserted is represented by octets starting at I<pv>
1113 and continuing to the first nul.  These octets are interpreted as either
1114 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1115 in I<flags>.  The characters are recoded for the lexer buffer, according
1116 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1117 If it is not convenient to nul-terminate a string to be inserted, the
1118 L</lex_stuff_pvn> function is more appropriate.
1119 
1120 =cut
1121 */
1122 
1123 void
1124 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1125 {
1126     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1127     lex_stuff_pvn(pv, strlen(pv), flags);
1128 }
1129 
1130 /*
1131 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1132 
1133 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1134 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1135 reallocating the buffer if necessary.  This means that lexing code that
1136 runs later will see the characters as if they had appeared in the input.
1137 It is not recommended to do this as part of normal parsing, and most
1138 uses of this facility run the risk of the inserted characters being
1139 interpreted in an unintended manner.
1140 
1141 The string to be inserted is the string value of I<sv>.  The characters
1142 are recoded for the lexer buffer, according to how the buffer is currently
1143 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1144 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1145 need to construct a scalar.
1146 
1147 =cut
1148 */
1149 
1150 void
1151 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1152 {
1153     char *pv;
1154     STRLEN len;
1155     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1156     if (flags)
1157 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1158     pv = SvPV(sv, len);
1159     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1160 }
1161 
1162 /*
1163 =for apidoc Amx|void|lex_unstuff|char *ptr
1164 
1165 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1166 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1167 This hides the discarded text from any lexing code that runs later,
1168 as if the text had never appeared.
1169 
1170 This is not the normal way to consume lexed text.  For that, use
1171 L</lex_read_to>.
1172 
1173 =cut
1174 */
1175 
1176 void
1177 Perl_lex_unstuff(pTHX_ char *ptr)
1178 {
1179     char *buf, *bufend;
1180     STRLEN unstuff_len;
1181     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1182     buf = PL_parser->bufptr;
1183     if (ptr < buf)
1184 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1185     if (ptr == buf)
1186 	return;
1187     bufend = PL_parser->bufend;
1188     if (ptr > bufend)
1189 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1190     unstuff_len = ptr - buf;
1191     Move(ptr, buf, bufend+1-ptr, char);
1192     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1193     PL_parser->bufend = bufend - unstuff_len;
1194 }
1195 
1196 /*
1197 =for apidoc Amx|void|lex_read_to|char *ptr
1198 
1199 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1200 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1201 performing the correct bookkeeping whenever a newline character is passed.
1202 This is the normal way to consume lexed text.
1203 
1204 Interpretation of the buffer's octets can be abstracted out by
1205 using the slightly higher-level functions L</lex_peek_unichar> and
1206 L</lex_read_unichar>.
1207 
1208 =cut
1209 */
1210 
1211 void
1212 Perl_lex_read_to(pTHX_ char *ptr)
1213 {
1214     char *s;
1215     PERL_ARGS_ASSERT_LEX_READ_TO;
1216     s = PL_parser->bufptr;
1217     if (ptr < s || ptr > PL_parser->bufend)
1218 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1219     for (; s != ptr; s++)
1220 	if (*s == '\n') {
1221 	    COPLINE_INC_WITH_HERELINES;
1222 	    PL_parser->linestart = s+1;
1223 	}
1224     PL_parser->bufptr = ptr;
1225 }
1226 
1227 /*
1228 =for apidoc Amx|void|lex_discard_to|char *ptr
1229 
1230 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1231 up to I<ptr>.  The remaining content of the buffer will be moved, and
1232 all pointers into the buffer updated appropriately.  I<ptr> must not
1233 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1234 it is not permitted to discard text that has yet to be lexed.
1235 
1236 Normally it is not necessarily to do this directly, because it suffices to
1237 use the implicit discarding behaviour of L</lex_next_chunk> and things
1238 based on it.  However, if a token stretches across multiple lines,
1239 and the lexing code has kept multiple lines of text in the buffer for
1240 that purpose, then after completion of the token it would be wise to
1241 explicitly discard the now-unneeded earlier lines, to avoid future
1242 multi-line tokens growing the buffer without bound.
1243 
1244 =cut
1245 */
1246 
1247 void
1248 Perl_lex_discard_to(pTHX_ char *ptr)
1249 {
1250     char *buf;
1251     STRLEN discard_len;
1252     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1253     buf = SvPVX(PL_parser->linestr);
1254     if (ptr < buf)
1255 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1256     if (ptr == buf)
1257 	return;
1258     if (ptr > PL_parser->bufptr)
1259 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1260     discard_len = ptr - buf;
1261     if (PL_parser->oldbufptr < ptr)
1262 	PL_parser->oldbufptr = ptr;
1263     if (PL_parser->oldoldbufptr < ptr)
1264 	PL_parser->oldoldbufptr = ptr;
1265     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1266 	PL_parser->last_uni = NULL;
1267     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1268 	PL_parser->last_lop = NULL;
1269     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1270     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1271     PL_parser->bufend -= discard_len;
1272     PL_parser->bufptr -= discard_len;
1273     PL_parser->oldbufptr -= discard_len;
1274     PL_parser->oldoldbufptr -= discard_len;
1275     if (PL_parser->last_uni)
1276 	PL_parser->last_uni -= discard_len;
1277     if (PL_parser->last_lop)
1278 	PL_parser->last_lop -= discard_len;
1279 }
1280 
1281 /*
1282 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1283 
1284 Reads in the next chunk of text to be lexed, appending it to
1285 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1286 looked to the end of the current chunk and wants to know more.  It is
1287 usual, but not necessary, for lexing to have consumed the entirety of
1288 the current chunk at this time.
1289 
1290 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1291 chunk (i.e., the current chunk has been entirely consumed), normally the
1292 current chunk will be discarded at the same time that the new chunk is
1293 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1294 will not be discarded.  If the current chunk has not been entirely
1295 consumed, then it will not be discarded regardless of the flag.
1296 
1297 Returns true if some new text was added to the buffer, or false if the
1298 buffer has reached the end of the input text.
1299 
1300 =cut
1301 */
1302 
1303 #define LEX_FAKE_EOF 0x80000000
1304 #define LEX_NO_TERM  0x40000000 /* here-doc */
1305 
1306 bool
1307 Perl_lex_next_chunk(pTHX_ U32 flags)
1308 {
1309     SV *linestr;
1310     char *buf;
1311     STRLEN old_bufend_pos, new_bufend_pos;
1312     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1313     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1314     bool got_some_for_debugger = 0;
1315     bool got_some;
1316     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1317 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1318     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1319 	return FALSE;
1320     linestr = PL_parser->linestr;
1321     buf = SvPVX(linestr);
1322     if (!(flags & LEX_KEEP_PREVIOUS) &&
1323 	    PL_parser->bufptr == PL_parser->bufend) {
1324 	old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1325 	linestart_pos = 0;
1326 	if (PL_parser->last_uni != PL_parser->bufend)
1327 	    PL_parser->last_uni = NULL;
1328 	if (PL_parser->last_lop != PL_parser->bufend)
1329 	    PL_parser->last_lop = NULL;
1330 	last_uni_pos = last_lop_pos = 0;
1331 	*buf = 0;
1332 	SvCUR(linestr) = 0;
1333     } else {
1334 	old_bufend_pos = PL_parser->bufend - buf;
1335 	bufptr_pos = PL_parser->bufptr - buf;
1336 	oldbufptr_pos = PL_parser->oldbufptr - buf;
1337 	oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1338 	linestart_pos = PL_parser->linestart - buf;
1339 	last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1340 	last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1341     }
1342     if (flags & LEX_FAKE_EOF) {
1343 	goto eof;
1344     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1345 	got_some = 0;
1346     } else if (filter_gets(linestr, old_bufend_pos)) {
1347 	got_some = 1;
1348 	got_some_for_debugger = 1;
1349     } else if (flags & LEX_NO_TERM) {
1350 	got_some = 0;
1351     } else {
1352 	if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1353 	    sv_setpvs(linestr, "");
1354 	eof:
1355 	/* End of real input.  Close filehandle (unless it was STDIN),
1356 	 * then add implicit termination.
1357 	 */
1358 	if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1359 	    PerlIO_clearerr(PL_parser->rsfp);
1360 	else if (PL_parser->rsfp)
1361 	    (void)PerlIO_close(PL_parser->rsfp);
1362 	PL_parser->rsfp = NULL;
1363 	PL_parser->in_pod = PL_parser->filtered = 0;
1364 #ifdef PERL_MAD
1365 	if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1366 	    PL_faketokens = 1;
1367 #endif
1368 	if (!PL_in_eval && PL_minus_p) {
1369 	    sv_catpvs(linestr,
1370 		/*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1371 	    PL_minus_n = PL_minus_p = 0;
1372 	} else if (!PL_in_eval && PL_minus_n) {
1373 	    sv_catpvs(linestr, /*{*/";}");
1374 	    PL_minus_n = 0;
1375 	} else
1376 	    sv_catpvs(linestr, ";");
1377 	got_some = 1;
1378     }
1379     buf = SvPVX(linestr);
1380     new_bufend_pos = SvCUR(linestr);
1381     PL_parser->bufend = buf + new_bufend_pos;
1382     PL_parser->bufptr = buf + bufptr_pos;
1383     PL_parser->oldbufptr = buf + oldbufptr_pos;
1384     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1385     PL_parser->linestart = buf + linestart_pos;
1386     if (PL_parser->last_uni)
1387 	PL_parser->last_uni = buf + last_uni_pos;
1388     if (PL_parser->last_lop)
1389 	PL_parser->last_lop = buf + last_lop_pos;
1390     if (PL_parser->preambling != NOLINE) {
1391 	CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1392 	PL_parser->preambling = NOLINE;
1393     }
1394     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1395 	    PL_curstash != PL_debstash) {
1396 	/* debugger active and we're not compiling the debugger code,
1397 	 * so store the line into the debugger's array of lines
1398 	 */
1399 	update_debugger_info(NULL, buf+old_bufend_pos,
1400 	    new_bufend_pos-old_bufend_pos);
1401     }
1402     return got_some;
1403 }
1404 
1405 /*
1406 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1407 
1408 Looks ahead one (Unicode) character in the text currently being lexed.
1409 Returns the codepoint (unsigned integer value) of the next character,
1410 or -1 if lexing has reached the end of the input text.  To consume the
1411 peeked character, use L</lex_read_unichar>.
1412 
1413 If the next character is in (or extends into) the next chunk of input
1414 text, the next chunk will be read in.  Normally the current chunk will be
1415 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1416 then the current chunk will not be discarded.
1417 
1418 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1419 is encountered, an exception is generated.
1420 
1421 =cut
1422 */
1423 
1424 I32
1425 Perl_lex_peek_unichar(pTHX_ U32 flags)
1426 {
1427     dVAR;
1428     char *s, *bufend;
1429     if (flags & ~(LEX_KEEP_PREVIOUS))
1430 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1431     s = PL_parser->bufptr;
1432     bufend = PL_parser->bufend;
1433     if (UTF) {
1434 	U8 head;
1435 	I32 unichar;
1436 	STRLEN len, retlen;
1437 	if (s == bufend) {
1438 	    if (!lex_next_chunk(flags))
1439 		return -1;
1440 	    s = PL_parser->bufptr;
1441 	    bufend = PL_parser->bufend;
1442 	}
1443 	head = (U8)*s;
1444 	if (UTF8_IS_INVARIANT(head))
1445 	    return head;
1446 	if (UTF8_IS_START(head)) {
1447 	    len = UTF8SKIP(&head);
1448 	    while ((STRLEN)(bufend-s) < len) {
1449 		if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1450 		    break;
1451 		s = PL_parser->bufptr;
1452 		bufend = PL_parser->bufend;
1453 	    }
1454 	}
1455 	unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1456 	if (retlen == (STRLEN)-1) {
1457 	    /* malformed UTF-8 */
1458 	    ENTER;
1459 	    SAVESPTR(PL_warnhook);
1460 	    PL_warnhook = PERL_WARNHOOK_FATAL;
1461 	    utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1462 	    LEAVE;
1463 	}
1464 	return unichar;
1465     } else {
1466 	if (s == bufend) {
1467 	    if (!lex_next_chunk(flags))
1468 		return -1;
1469 	    s = PL_parser->bufptr;
1470 	}
1471 	return (U8)*s;
1472     }
1473 }
1474 
1475 /*
1476 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1477 
1478 Reads the next (Unicode) character in the text currently being lexed.
1479 Returns the codepoint (unsigned integer value) of the character read,
1480 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1481 if lexing has reached the end of the input text.  To non-destructively
1482 examine the next character, use L</lex_peek_unichar> instead.
1483 
1484 If the next character is in (or extends into) the next chunk of input
1485 text, the next chunk will be read in.  Normally the current chunk will be
1486 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1487 then the current chunk will not be discarded.
1488 
1489 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1490 is encountered, an exception is generated.
1491 
1492 =cut
1493 */
1494 
1495 I32
1496 Perl_lex_read_unichar(pTHX_ U32 flags)
1497 {
1498     I32 c;
1499     if (flags & ~(LEX_KEEP_PREVIOUS))
1500 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1501     c = lex_peek_unichar(flags);
1502     if (c != -1) {
1503 	if (c == '\n')
1504 	    COPLINE_INC_WITH_HERELINES;
1505 	if (UTF)
1506 	    PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1507 	else
1508 	    ++(PL_parser->bufptr);
1509     }
1510     return c;
1511 }
1512 
1513 /*
1514 =for apidoc Amx|void|lex_read_space|U32 flags
1515 
1516 Reads optional spaces, in Perl style, in the text currently being
1517 lexed.  The spaces may include ordinary whitespace characters and
1518 Perl-style comments.  C<#line> directives are processed if encountered.
1519 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1520 at a non-space character (or the end of the input text).
1521 
1522 If spaces extend into the next chunk of input text, the next chunk will
1523 be read in.  Normally the current chunk will be discarded at the same
1524 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1525 chunk will not be discarded.
1526 
1527 =cut
1528 */
1529 
1530 #define LEX_NO_INCLINE    0x40000000
1531 #define LEX_NO_NEXT_CHUNK 0x80000000
1532 
1533 void
1534 Perl_lex_read_space(pTHX_ U32 flags)
1535 {
1536     char *s, *bufend;
1537     const bool can_incline = !(flags & LEX_NO_INCLINE);
1538     bool need_incline = 0;
1539     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1540 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1541 #ifdef PERL_MAD
1542     if (PL_skipwhite) {
1543 	sv_free(PL_skipwhite);
1544 	PL_skipwhite = NULL;
1545     }
1546     if (PL_madskills)
1547 	PL_skipwhite = newSVpvs("");
1548 #endif /* PERL_MAD */
1549     s = PL_parser->bufptr;
1550     bufend = PL_parser->bufend;
1551     while (1) {
1552 	char c = *s;
1553 	if (c == '#') {
1554 	    do {
1555 		c = *++s;
1556 	    } while (!(c == '\n' || (c == 0 && s == bufend)));
1557 	} else if (c == '\n') {
1558 	    s++;
1559 	    if (can_incline) {
1560 		PL_parser->linestart = s;
1561 		if (s == bufend)
1562 		    need_incline = 1;
1563 		else
1564 		    incline(s);
1565 	    }
1566 	} else if (isSPACE(c)) {
1567 	    s++;
1568 	} else if (c == 0 && s == bufend) {
1569 	    bool got_more;
1570 	    line_t l;
1571 #ifdef PERL_MAD
1572 	    if (PL_madskills)
1573 		sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1574 #endif /* PERL_MAD */
1575 	    if (flags & LEX_NO_NEXT_CHUNK)
1576 		break;
1577 	    PL_parser->bufptr = s;
1578 	    l = CopLINE(PL_curcop);
1579 	    CopLINE(PL_curcop) += PL_parser->herelines + 1;
1580 	    got_more = lex_next_chunk(flags);
1581 	    CopLINE_set(PL_curcop, l);
1582 	    s = PL_parser->bufptr;
1583 	    bufend = PL_parser->bufend;
1584 	    if (!got_more)
1585 		break;
1586 	    if (can_incline && need_incline && PL_parser->rsfp) {
1587 		incline(s);
1588 		need_incline = 0;
1589 	    }
1590 	} else {
1591 	    break;
1592 	}
1593     }
1594 #ifdef PERL_MAD
1595     if (PL_madskills)
1596 	sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1597 #endif /* PERL_MAD */
1598     PL_parser->bufptr = s;
1599 }
1600 
1601 /*
1602 
1603 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1604 
1605 This function performs syntax checking on a prototype, C<proto>.
1606 If C<warn> is true, any illegal characters or mismatched brackets
1607 will trigger illegalproto warnings, declaring that they were
1608 detected in the prototype for C<name>.
1609 
1610 The return value is C<true> if this is a valid prototype, and
1611 C<false> if it is not, regardless of whether C<warn> was C<true> or
1612 C<false>.
1613 
1614 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1615 
1616 =cut
1617 
1618  */
1619 
1620 bool
1621 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1622 {
1623     STRLEN len, origlen;
1624     char *p = proto ? SvPV(proto, len) : NULL;
1625     bool bad_proto = FALSE;
1626     bool in_brackets = FALSE;
1627     bool after_slash = FALSE;
1628     char greedy_proto = ' ';
1629     bool proto_after_greedy_proto = FALSE;
1630     bool must_be_last = FALSE;
1631     bool underscore = FALSE;
1632     bool bad_proto_after_underscore = FALSE;
1633 
1634     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1635 
1636     if (!proto)
1637 	return TRUE;
1638 
1639     origlen = len;
1640     for (; len--; p++) {
1641 	if (!isSPACE(*p)) {
1642 	    if (must_be_last)
1643 		proto_after_greedy_proto = TRUE;
1644 	    if (underscore) {
1645 		if (!strchr(";@%", *p))
1646 		    bad_proto_after_underscore = TRUE;
1647 		underscore = FALSE;
1648 	    }
1649 	    if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1650 		bad_proto = TRUE;
1651 	    }
1652 	    else {
1653 		if (*p == '[')
1654 		    in_brackets = TRUE;
1655 		else if (*p == ']')
1656 		    in_brackets = FALSE;
1657 		else if ((*p == '@' || *p == '%') &&
1658 		    !after_slash &&
1659 		    !in_brackets ) {
1660 		    must_be_last = TRUE;
1661 		    greedy_proto = *p;
1662 		}
1663 		else if (*p == '_')
1664 		    underscore = TRUE;
1665 	    }
1666 	    if (*p == '\\')
1667 		after_slash = TRUE;
1668 	    else
1669 		after_slash = FALSE;
1670 	}
1671     }
1672 
1673     if (warn) {
1674 	SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1675 	p -= origlen;
1676 	p = SvUTF8(proto)
1677 	    ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1678 	                     origlen, UNI_DISPLAY_ISPRINT)
1679 	    : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1680 
1681 	if (proto_after_greedy_proto)
1682 	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1683 			"Prototype after '%c' for %"SVf" : %s",
1684 			greedy_proto, SVfARG(name), p);
1685 	if (in_brackets)
1686 	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1687 			"Missing ']' in prototype for %"SVf" : %s",
1688 			SVfARG(name), p);
1689 	if (bad_proto)
1690 	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1691 			"Illegal character in prototype for %"SVf" : %s",
1692 			SVfARG(name), p);
1693 	if (bad_proto_after_underscore)
1694 	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1695 			"Illegal character after '_' in prototype for %"SVf" : %s",
1696 			SVfARG(name), p);
1697     }
1698 
1699     return (! (proto_after_greedy_proto || bad_proto) );
1700 }
1701 
1702 /*
1703  * S_incline
1704  * This subroutine has nothing to do with tilting, whether at windmills
1705  * or pinball tables.  Its name is short for "increment line".  It
1706  * increments the current line number in CopLINE(PL_curcop) and checks
1707  * to see whether the line starts with a comment of the form
1708  *    # line 500 "foo.pm"
1709  * If so, it sets the current line number and file to the values in the comment.
1710  */
1711 
1712 STATIC void
1713 S_incline(pTHX_ const char *s)
1714 {
1715     dVAR;
1716     const char *t;
1717     const char *n;
1718     const char *e;
1719     line_t line_num;
1720 
1721     PERL_ARGS_ASSERT_INCLINE;
1722 
1723     COPLINE_INC_WITH_HERELINES;
1724     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1725      && s+1 == PL_bufend && *s == ';') {
1726 	/* fake newline in string eval */
1727 	CopLINE_dec(PL_curcop);
1728 	return;
1729     }
1730     if (*s++ != '#')
1731 	return;
1732     while (SPACE_OR_TAB(*s))
1733 	s++;
1734     if (strnEQ(s, "line", 4))
1735 	s += 4;
1736     else
1737 	return;
1738     if (SPACE_OR_TAB(*s))
1739 	s++;
1740     else
1741 	return;
1742     while (SPACE_OR_TAB(*s))
1743 	s++;
1744     if (!isDIGIT(*s))
1745 	return;
1746 
1747     n = s;
1748     while (isDIGIT(*s))
1749 	s++;
1750     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1751 	return;
1752     while (SPACE_OR_TAB(*s))
1753 	s++;
1754     if (*s == '"' && (t = strchr(s+1, '"'))) {
1755 	s++;
1756 	e = t + 1;
1757     }
1758     else {
1759 	t = s;
1760 	while (!isSPACE(*t))
1761 	    t++;
1762 	e = t;
1763     }
1764     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1765 	e++;
1766     if (*e != '\n' && *e != '\0')
1767 	return;		/* false alarm */
1768 
1769     line_num = atoi(n)-1;
1770 
1771     if (t - s > 0) {
1772 	const STRLEN len = t - s;
1773 
1774 	if (!PL_rsfp && !PL_parser->filtered) {
1775 	    /* must copy *{"::_<(eval N)[oldfilename:L]"}
1776 	     * to *{"::_<newfilename"} */
1777 	    /* However, the long form of evals is only turned on by the
1778 	       debugger - usually they're "(eval %lu)" */
1779 	    GV * const cfgv = CopFILEGV(PL_curcop);
1780 	    if (cfgv) {
1781 		char smallbuf[128];
1782 		STRLEN tmplen2 = len;
1783 		char *tmpbuf2;
1784 		GV *gv2;
1785 
1786 		if (tmplen2 + 2 <= sizeof smallbuf)
1787 		    tmpbuf2 = smallbuf;
1788 		else
1789 		    Newx(tmpbuf2, tmplen2 + 2, char);
1790 
1791 		tmpbuf2[0] = '_';
1792 		tmpbuf2[1] = '<';
1793 
1794 		memcpy(tmpbuf2 + 2, s, tmplen2);
1795 		tmplen2 += 2;
1796 
1797 		gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1798 		if (!isGV(gv2)) {
1799 		    gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1800 		    /* adjust ${"::_<newfilename"} to store the new file name */
1801 		    GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1802 		    /* The line number may differ. If that is the case,
1803 		       alias the saved lines that are in the array.
1804 		       Otherwise alias the whole array. */
1805 		    if (CopLINE(PL_curcop) == line_num) {
1806 			GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1807 			GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1808 		    }
1809 		    else if (GvAV(cfgv)) {
1810 			AV * const av = GvAV(cfgv);
1811 			const I32 start = CopLINE(PL_curcop)+1;
1812 			I32 items = AvFILLp(av) - start;
1813 			if (items > 0) {
1814 			    AV * const av2 = GvAVn(gv2);
1815 			    SV **svp = AvARRAY(av) + start;
1816 			    I32 l = (I32)line_num+1;
1817 			    while (items--)
1818 				av_store(av2, l++, SvREFCNT_inc(*svp++));
1819 			}
1820 		    }
1821 		}
1822 
1823 		if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1824 	    }
1825 	}
1826 	CopFILE_free(PL_curcop);
1827 	CopFILE_setn(PL_curcop, s, len);
1828     }
1829     CopLINE_set(PL_curcop, line_num);
1830 }
1831 
1832 #define skipspace(s) skipspace_flags(s, 0)
1833 
1834 #ifdef PERL_MAD
1835 /* skip space before PL_thistoken */
1836 
1837 STATIC char *
1838 S_skipspace0(pTHX_ char *s)
1839 {
1840     PERL_ARGS_ASSERT_SKIPSPACE0;
1841 
1842     s = skipspace(s);
1843     if (!PL_madskills)
1844 	return s;
1845     if (PL_skipwhite) {
1846 	if (!PL_thiswhite)
1847 	    PL_thiswhite = newSVpvs("");
1848 	sv_catsv(PL_thiswhite, PL_skipwhite);
1849 	sv_free(PL_skipwhite);
1850 	PL_skipwhite = 0;
1851     }
1852     PL_realtokenstart = s - SvPVX(PL_linestr);
1853     return s;
1854 }
1855 
1856 /* skip space after PL_thistoken */
1857 
1858 STATIC char *
1859 S_skipspace1(pTHX_ char *s)
1860 {
1861     const char *start = s;
1862     I32 startoff = start - SvPVX(PL_linestr);
1863 
1864     PERL_ARGS_ASSERT_SKIPSPACE1;
1865 
1866     s = skipspace(s);
1867     if (!PL_madskills)
1868 	return s;
1869     start = SvPVX(PL_linestr) + startoff;
1870     if (!PL_thistoken && PL_realtokenstart >= 0) {
1871 	const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1872 	PL_thistoken = newSVpvn(tstart, start - tstart);
1873     }
1874     PL_realtokenstart = -1;
1875     if (PL_skipwhite) {
1876 	if (!PL_nextwhite)
1877 	    PL_nextwhite = newSVpvs("");
1878 	sv_catsv(PL_nextwhite, PL_skipwhite);
1879 	sv_free(PL_skipwhite);
1880 	PL_skipwhite = 0;
1881     }
1882     return s;
1883 }
1884 
1885 STATIC char *
1886 S_skipspace2(pTHX_ char *s, SV **svp)
1887 {
1888     char *start;
1889     const I32 startoff = s - SvPVX(PL_linestr);
1890 
1891     PERL_ARGS_ASSERT_SKIPSPACE2;
1892 
1893     s = skipspace(s);
1894     if (!PL_madskills || !svp)
1895 	return s;
1896     start = SvPVX(PL_linestr) + startoff;
1897     if (!PL_thistoken && PL_realtokenstart >= 0) {
1898 	char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1899 	PL_thistoken = newSVpvn(tstart, start - tstart);
1900 	PL_realtokenstart = -1;
1901     }
1902     if (PL_skipwhite) {
1903 	if (!*svp)
1904 	    *svp = newSVpvs("");
1905 	sv_setsv(*svp, PL_skipwhite);
1906 	sv_free(PL_skipwhite);
1907 	PL_skipwhite = 0;
1908     }
1909 
1910     return s;
1911 }
1912 #endif
1913 
1914 STATIC void
1915 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1916 {
1917     AV *av = CopFILEAVx(PL_curcop);
1918     if (av) {
1919 	SV * sv;
1920 	if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1921 	else {
1922 	    sv = *av_fetch(av, 0, 1);
1923 	    SvUPGRADE(sv, SVt_PVMG);
1924 	}
1925 	if (!SvPOK(sv)) sv_setpvs(sv,"");
1926 	if (orig_sv)
1927 	    sv_catsv(sv, orig_sv);
1928 	else
1929 	    sv_catpvn(sv, buf, len);
1930 	if (!SvIOK(sv)) {
1931 	    (void)SvIOK_on(sv);
1932 	    SvIV_set(sv, 0);
1933 	}
1934 	if (PL_parser->preambling == NOLINE)
1935 	    av_store(av, CopLINE(PL_curcop), sv);
1936     }
1937 }
1938 
1939 /*
1940  * S_skipspace
1941  * Called to gobble the appropriate amount and type of whitespace.
1942  * Skips comments as well.
1943  */
1944 
1945 STATIC char *
1946 S_skipspace_flags(pTHX_ char *s, U32 flags)
1947 {
1948 #ifdef PERL_MAD
1949     char *start = s;
1950 #endif /* PERL_MAD */
1951     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1952 #ifdef PERL_MAD
1953     if (PL_skipwhite) {
1954 	sv_free(PL_skipwhite);
1955 	PL_skipwhite = NULL;
1956     }
1957 #endif /* PERL_MAD */
1958     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1959 	while (s < PL_bufend && SPACE_OR_TAB(*s))
1960 	    s++;
1961     } else {
1962 	STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1963 	PL_bufptr = s;
1964 	lex_read_space(flags | LEX_KEEP_PREVIOUS |
1965 		(PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1966 		    LEX_NO_NEXT_CHUNK : 0));
1967 	s = PL_bufptr;
1968 	PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1969 	if (PL_linestart > PL_bufptr)
1970 	    PL_bufptr = PL_linestart;
1971 	return s;
1972     }
1973 #ifdef PERL_MAD
1974     if (PL_madskills)
1975 	PL_skipwhite = newSVpvn(start, s-start);
1976 #endif /* PERL_MAD */
1977     return s;
1978 }
1979 
1980 /*
1981  * S_check_uni
1982  * Check the unary operators to ensure there's no ambiguity in how they're
1983  * used.  An ambiguous piece of code would be:
1984  *     rand + 5
1985  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1986  * the +5 is its argument.
1987  */
1988 
1989 STATIC void
1990 S_check_uni(pTHX)
1991 {
1992     dVAR;
1993     const char *s;
1994     const char *t;
1995 
1996     if (PL_oldoldbufptr != PL_last_uni)
1997 	return;
1998     while (isSPACE(*PL_last_uni))
1999 	PL_last_uni++;
2000     s = PL_last_uni;
2001     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
2002 	s += UTF ? UTF8SKIP(s) : 1;
2003     if ((t = strchr(s, '(')) && t < PL_bufptr)
2004 	return;
2005 
2006     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2007 		     "Warning: Use of \"%.*s\" without parentheses is ambiguous",
2008 		     (int)(s - PL_last_uni), PL_last_uni);
2009 }
2010 
2011 /*
2012  * LOP : macro to build a list operator.  Its behaviour has been replaced
2013  * with a subroutine, S_lop() for which LOP is just another name.
2014  */
2015 
2016 #define LOP(f,x) return lop(f,x,s)
2017 
2018 /*
2019  * S_lop
2020  * Build a list operator (or something that might be one).  The rules:
2021  *  - if we have a next token, then it's a list operator [why?]
2022  *  - if the next thing is an opening paren, then it's a function
2023  *  - else it's a list operator
2024  */
2025 
2026 STATIC I32
2027 S_lop(pTHX_ I32 f, int x, char *s)
2028 {
2029     dVAR;
2030 
2031     PERL_ARGS_ASSERT_LOP;
2032 
2033     pl_yylval.ival = f;
2034     CLINE;
2035     PL_expect = x;
2036     PL_bufptr = s;
2037     PL_last_lop = PL_oldbufptr;
2038     PL_last_lop_op = (OPCODE)f;
2039 #ifdef PERL_MAD
2040     if (PL_lasttoke)
2041 	goto lstop;
2042 #else
2043     if (PL_nexttoke)
2044 	goto lstop;
2045 #endif
2046     if (*s == '(')
2047 	return REPORT(FUNC);
2048     s = PEEKSPACE(s);
2049     if (*s == '(')
2050 	return REPORT(FUNC);
2051     else {
2052 	lstop:
2053 	if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2054 	    PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2055 	return REPORT(LSTOP);
2056     }
2057 }
2058 
2059 #ifdef PERL_MAD
2060  /*
2061  * S_start_force
2062  * Sets up for an eventual force_next().  start_force(0) basically does
2063  * an unshift, while start_force(-1) does a push.  yylex removes items
2064  * on the "pop" end.
2065  */
2066 
2067 STATIC void
2068 S_start_force(pTHX_ int where)
2069 {
2070     int i;
2071 
2072     if (where < 0)	/* so people can duplicate start_force(PL_curforce) */
2073 	where = PL_lasttoke;
2074     assert(PL_curforce < 0 || PL_curforce == where);
2075     if (PL_curforce != where) {
2076 	for (i = PL_lasttoke; i > where; --i) {
2077 	    PL_nexttoke[i] = PL_nexttoke[i-1];
2078 	}
2079 	PL_lasttoke++;
2080     }
2081     if (PL_curforce < 0)	/* in case of duplicate start_force() */
2082 	Zero(&PL_nexttoke[where], 1, NEXTTOKE);
2083     PL_curforce = where;
2084     if (PL_nextwhite) {
2085 	if (PL_madskills)
2086 	    curmad('^', newSVpvs(""));
2087 	CURMAD('_', PL_nextwhite);
2088     }
2089 }
2090 
2091 STATIC void
2092 S_curmad(pTHX_ char slot, SV *sv)
2093 {
2094     MADPROP **where;
2095 
2096     if (!sv)
2097 	return;
2098     if (PL_curforce < 0)
2099 	where = &PL_thismad;
2100     else
2101 	where = &PL_nexttoke[PL_curforce].next_mad;
2102 
2103     if (PL_faketokens)
2104 	sv_setpvs(sv, "");
2105     else {
2106 	if (!IN_BYTES) {
2107 	    if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2108 		SvUTF8_on(sv);
2109 	    else if (PL_encoding) {
2110 		sv_recode_to_utf8(sv, PL_encoding);
2111 	    }
2112 	}
2113     }
2114 
2115     /* keep a slot open for the head of the list? */
2116     if (slot != '_' && *where && (*where)->mad_key == '^') {
2117 	(*where)->mad_key = slot;
2118 	sv_free(MUTABLE_SV(((*where)->mad_val)));
2119 	(*where)->mad_val = (void*)sv;
2120     }
2121     else
2122 	addmad(newMADsv(slot, sv), where, 0);
2123 }
2124 #else
2125 #  define start_force(where)    NOOP
2126 #  define curmad(slot, sv)      NOOP
2127 #endif
2128 
2129 /*
2130  * S_force_next
2131  * When the lexer realizes it knows the next token (for instance,
2132  * it is reordering tokens for the parser) then it can call S_force_next
2133  * to know what token to return the next time the lexer is called.  Caller
2134  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2135  * and possibly PL_expect to ensure the lexer handles the token correctly.
2136  */
2137 
2138 STATIC void
2139 S_force_next(pTHX_ I32 type)
2140 {
2141     dVAR;
2142 #ifdef DEBUGGING
2143     if (DEBUG_T_TEST) {
2144         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2145 	tokereport(type, &NEXTVAL_NEXTTOKE);
2146     }
2147 #endif
2148 #ifdef PERL_MAD
2149     if (PL_curforce < 0)
2150 	start_force(PL_lasttoke);
2151     PL_nexttoke[PL_curforce].next_type = type;
2152     if (PL_lex_state != LEX_KNOWNEXT)
2153  	PL_lex_defer = PL_lex_state;
2154     PL_lex_state = LEX_KNOWNEXT;
2155     PL_lex_expect = PL_expect;
2156     PL_curforce = -1;
2157 #else
2158     PL_nexttype[PL_nexttoke] = type;
2159     PL_nexttoke++;
2160     if (PL_lex_state != LEX_KNOWNEXT) {
2161 	PL_lex_defer = PL_lex_state;
2162 	PL_lex_expect = PL_expect;
2163 	PL_lex_state = LEX_KNOWNEXT;
2164     }
2165 #endif
2166 }
2167 
2168 /*
2169  * S_postderef
2170  *
2171  * This subroutine handles postfix deref syntax after the arrow has already
2172  * been emitted.  @* $* etc. are emitted as two separate token right here.
2173  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2174  * only the first, leaving yylex to find the next.
2175  */
2176 
2177 static int
2178 S_postderef(pTHX_ int const funny, char const next)
2179 {
2180     dVAR;
2181     assert(funny == DOLSHARP || strchr("$@%&*", funny));
2182     assert(strchr("*[{", next));
2183     if (next == '*') {
2184 	PL_expect = XOPERATOR;
2185 	if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2186 	    assert('@' == funny || '$' == funny || DOLSHARP == funny);
2187 	    PL_lex_state = LEX_INTERPEND;
2188 	    start_force(PL_curforce);
2189 	    force_next(POSTJOIN);
2190 	}
2191 	start_force(PL_curforce);
2192 	force_next(next);
2193 	PL_bufptr+=2;
2194     }
2195     else {
2196 	if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2197 	 && !PL_lex_brackets)
2198 	    PL_lex_dojoin = 2;
2199 	PL_expect = XOPERATOR;
2200 	PL_bufptr++;
2201     }
2202     return funny;
2203 }
2204 
2205 void
2206 Perl_yyunlex(pTHX)
2207 {
2208     int yyc = PL_parser->yychar;
2209     if (yyc != YYEMPTY) {
2210 	if (yyc) {
2211 	    start_force(-1);
2212 	    NEXTVAL_NEXTTOKE = PL_parser->yylval;
2213 	    if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2214 		PL_lex_allbrackets--;
2215 		PL_lex_brackets--;
2216 		yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2217 	    } else if (yyc == '('/*)*/) {
2218 		PL_lex_allbrackets--;
2219 		yyc |= (2<<24);
2220 	    }
2221 	    force_next(yyc);
2222 	}
2223 	PL_parser->yychar = YYEMPTY;
2224     }
2225 }
2226 
2227 STATIC SV *
2228 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2229 {
2230     dVAR;
2231     SV * const sv = newSVpvn_utf8(start, len,
2232 				  !IN_BYTES
2233 				  && UTF
2234 				  && !is_ascii_string((const U8*)start, len)
2235 				  && is_utf8_string((const U8*)start, len));
2236     return sv;
2237 }
2238 
2239 /*
2240  * S_force_word
2241  * When the lexer knows the next thing is a word (for instance, it has
2242  * just seen -> and it knows that the next char is a word char, then
2243  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2244  * lookahead.
2245  *
2246  * Arguments:
2247  *   char *start : buffer position (must be within PL_linestr)
2248  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2249  *   int check_keyword : if true, Perl checks to make sure the word isn't
2250  *       a keyword (do this if the word is a label, e.g. goto FOO)
2251  *   int allow_pack : if true, : characters will also be allowed (require,
2252  *       use, etc. do this)
2253  *   int allow_initial_tick : used by the "sub" lexer only.
2254  */
2255 
2256 STATIC char *
2257 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2258 {
2259     dVAR;
2260     char *s;
2261     STRLEN len;
2262 
2263     PERL_ARGS_ASSERT_FORCE_WORD;
2264 
2265     start = SKIPSPACE1(start);
2266     s = start;
2267     if (isIDFIRST_lazy_if(s,UTF) ||
2268 	(allow_pack && *s == ':') )
2269     {
2270 	s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2271 	if (check_keyword) {
2272 	  char *s2 = PL_tokenbuf;
2273 	  STRLEN len2 = len;
2274 	  if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2275 	    s2 += 6, len2 -= 6;
2276 	  if (keyword(s2, len2, 0))
2277 	    return start;
2278 	}
2279 	start_force(PL_curforce);
2280 	if (PL_madskills)
2281 	    curmad('X', newSVpvn(start,s-start));
2282 	if (token == METHOD) {
2283 	    s = SKIPSPACE1(s);
2284 	    if (*s == '(')
2285 		PL_expect = XTERM;
2286 	    else {
2287 		PL_expect = XOPERATOR;
2288 	    }
2289 	}
2290 	if (PL_madskills)
2291 	    curmad('g', newSVpvs( "forced" ));
2292 	NEXTVAL_NEXTTOKE.opval
2293 	    = (OP*)newSVOP(OP_CONST,0,
2294 			   S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2295 	NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2296 	force_next(token);
2297     }
2298     return s;
2299 }
2300 
2301 /*
2302  * S_force_ident
2303  * Called when the lexer wants $foo *foo &foo etc, but the program
2304  * text only contains the "foo" portion.  The first argument is a pointer
2305  * to the "foo", and the second argument is the type symbol to prefix.
2306  * Forces the next token to be a "WORD".
2307  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2308  */
2309 
2310 STATIC void
2311 S_force_ident(pTHX_ const char *s, int kind)
2312 {
2313     dVAR;
2314 
2315     PERL_ARGS_ASSERT_FORCE_IDENT;
2316 
2317     if (s[0]) {
2318 	const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2319 	OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2320                                                                 UTF ? SVf_UTF8 : 0));
2321 	start_force(PL_curforce);
2322 	NEXTVAL_NEXTTOKE.opval = o;
2323 	force_next(WORD);
2324 	if (kind) {
2325 	    o->op_private = OPpCONST_ENTERED;
2326 	    /* XXX see note in pp_entereval() for why we forgo typo
2327 	       warnings if the symbol must be introduced in an eval.
2328 	       GSAR 96-10-12 */
2329 	    gv_fetchpvn_flags(s, len,
2330 			      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2331 			      : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2332 			      kind == '$' ? SVt_PV :
2333 			      kind == '@' ? SVt_PVAV :
2334 			      kind == '%' ? SVt_PVHV :
2335 			      SVt_PVGV
2336 			      );
2337 	}
2338     }
2339 }
2340 
2341 static void
2342 S_force_ident_maybe_lex(pTHX_ char pit)
2343 {
2344     start_force(PL_curforce);
2345     NEXTVAL_NEXTTOKE.ival = pit;
2346     force_next('p');
2347 }
2348 
2349 NV
2350 Perl_str_to_version(pTHX_ SV *sv)
2351 {
2352     NV retval = 0.0;
2353     NV nshift = 1.0;
2354     STRLEN len;
2355     const char *start = SvPV_const(sv,len);
2356     const char * const end = start + len;
2357     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2358 
2359     PERL_ARGS_ASSERT_STR_TO_VERSION;
2360 
2361     while (start < end) {
2362 	STRLEN skip;
2363 	UV n;
2364 	if (utf)
2365 	    n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2366 	else {
2367 	    n = *(U8*)start;
2368 	    skip = 1;
2369 	}
2370 	retval += ((NV)n)/nshift;
2371 	start += skip;
2372 	nshift *= 1000;
2373     }
2374     return retval;
2375 }
2376 
2377 /*
2378  * S_force_version
2379  * Forces the next token to be a version number.
2380  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2381  * and if "guessing" is TRUE, then no new token is created (and the caller
2382  * must use an alternative parsing method).
2383  */
2384 
2385 STATIC char *
2386 S_force_version(pTHX_ char *s, int guessing)
2387 {
2388     dVAR;
2389     OP *version = NULL;
2390     char *d;
2391 #ifdef PERL_MAD
2392     I32 startoff = s - SvPVX(PL_linestr);
2393 #endif
2394 
2395     PERL_ARGS_ASSERT_FORCE_VERSION;
2396 
2397     s = SKIPSPACE1(s);
2398 
2399     d = s;
2400     if (*d == 'v')
2401 	d++;
2402     if (isDIGIT(*d)) {
2403 	while (isDIGIT(*d) || *d == '_' || *d == '.')
2404 	    d++;
2405 #ifdef PERL_MAD
2406 	if (PL_madskills) {
2407 	    start_force(PL_curforce);
2408 	    curmad('X', newSVpvn(s,d-s));
2409 	}
2410 #endif
2411         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2412 	    SV *ver;
2413             s = scan_num(s, &pl_yylval);
2414             version = pl_yylval.opval;
2415 	    ver = cSVOPx(version)->op_sv;
2416 	    if (SvPOK(ver) && !SvNIOK(ver)) {
2417 		SvUPGRADE(ver, SVt_PVNV);
2418 		SvNV_set(ver, str_to_version(ver));
2419 		SvNOK_on(ver);		/* hint that it is a version */
2420 	    }
2421         }
2422 	else if (guessing) {
2423 #ifdef PERL_MAD
2424 	    if (PL_madskills) {
2425 		sv_free(PL_nextwhite);	/* let next token collect whitespace */
2426 		PL_nextwhite = 0;
2427 		s = SvPVX(PL_linestr) + startoff;
2428 	    }
2429 #endif
2430 	    return s;
2431 	}
2432     }
2433 
2434 #ifdef PERL_MAD
2435     if (PL_madskills && !version) {
2436 	sv_free(PL_nextwhite);	/* let next token collect whitespace */
2437 	PL_nextwhite = 0;
2438 	s = SvPVX(PL_linestr) + startoff;
2439     }
2440 #endif
2441     /* NOTE: The parser sees the package name and the VERSION swapped */
2442     start_force(PL_curforce);
2443     NEXTVAL_NEXTTOKE.opval = version;
2444     force_next(WORD);
2445 
2446     return s;
2447 }
2448 
2449 /*
2450  * S_force_strict_version
2451  * Forces the next token to be a version number using strict syntax rules.
2452  */
2453 
2454 STATIC char *
2455 S_force_strict_version(pTHX_ char *s)
2456 {
2457     dVAR;
2458     OP *version = NULL;
2459 #ifdef PERL_MAD
2460     I32 startoff = s - SvPVX(PL_linestr);
2461 #endif
2462     const char *errstr = NULL;
2463 
2464     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2465 
2466     while (isSPACE(*s)) /* leading whitespace */
2467 	s++;
2468 
2469     if (is_STRICT_VERSION(s,&errstr)) {
2470 	SV *ver = newSV(0);
2471 	s = (char *)scan_version(s, ver, 0);
2472 	version = newSVOP(OP_CONST, 0, ver);
2473     }
2474     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2475 	    (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2476     {
2477 	PL_bufptr = s;
2478 	if (errstr)
2479 	    yyerror(errstr); /* version required */
2480 	return s;
2481     }
2482 
2483 #ifdef PERL_MAD
2484     if (PL_madskills && !version) {
2485 	sv_free(PL_nextwhite);	/* let next token collect whitespace */
2486 	PL_nextwhite = 0;
2487 	s = SvPVX(PL_linestr) + startoff;
2488     }
2489 #endif
2490     /* NOTE: The parser sees the package name and the VERSION swapped */
2491     start_force(PL_curforce);
2492     NEXTVAL_NEXTTOKE.opval = version;
2493     force_next(WORD);
2494 
2495     return s;
2496 }
2497 
2498 /*
2499  * S_tokeq
2500  * Tokenize a quoted string passed in as an SV.  It finds the next
2501  * chunk, up to end of string or a backslash.  It may make a new
2502  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2503  * turns \\ into \.
2504  */
2505 
2506 STATIC SV *
2507 S_tokeq(pTHX_ SV *sv)
2508 {
2509     dVAR;
2510     char *s;
2511     char *send;
2512     char *d;
2513     SV *pv = sv;
2514 
2515     PERL_ARGS_ASSERT_TOKEQ;
2516 
2517     assert (SvPOK(sv));
2518     assert (SvLEN(sv));
2519     assert (!SvIsCOW(sv));
2520     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2521 	goto finish;
2522     s = SvPVX(sv);
2523     send = SvEND(sv);
2524     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2525     while (s < send && !(*s == '\\' && s[1] == '\\'))
2526 	s++;
2527     if (s == send)
2528 	goto finish;
2529     d = s;
2530     if ( PL_hints & HINT_NEW_STRING ) {
2531 	pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2532 			    SVs_TEMP | SvUTF8(sv));
2533     }
2534     while (s < send) {
2535 	if (*s == '\\') {
2536 	    if (s + 1 < send && (s[1] == '\\'))
2537 		s++;		/* all that, just for this */
2538 	}
2539 	*d++ = *s++;
2540     }
2541     *d = '\0';
2542     SvCUR_set(sv, d - SvPVX_const(sv));
2543   finish:
2544     if ( PL_hints & HINT_NEW_STRING )
2545        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2546     return sv;
2547 }
2548 
2549 /*
2550  * Now come three functions related to double-quote context,
2551  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2552  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2553  * interact with PL_lex_state, and create fake ( ... ) argument lists
2554  * to handle functions and concatenation.
2555  * For example,
2556  *   "foo\lbar"
2557  * is tokenised as
2558  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2559  */
2560 
2561 /*
2562  * S_sublex_start
2563  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2564  *
2565  * Pattern matching will set PL_lex_op to the pattern-matching op to
2566  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2567  *
2568  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2569  *
2570  * Everything else becomes a FUNC.
2571  *
2572  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2573  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2574  * call to S_sublex_push().
2575  */
2576 
2577 STATIC I32
2578 S_sublex_start(pTHX)
2579 {
2580     dVAR;
2581     const I32 op_type = pl_yylval.ival;
2582 
2583     if (op_type == OP_NULL) {
2584 	pl_yylval.opval = PL_lex_op;
2585 	PL_lex_op = NULL;
2586 	return THING;
2587     }
2588     if (op_type == OP_CONST) {
2589 	SV *sv = tokeq(PL_lex_stuff);
2590 
2591 	if (SvTYPE(sv) == SVt_PVIV) {
2592 	    /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2593 	    STRLEN len;
2594 	    const char * const p = SvPV_const(sv, len);
2595 	    SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2596 	    SvREFCNT_dec(sv);
2597 	    sv = nsv;
2598 	}
2599 	pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2600 	PL_lex_stuff = NULL;
2601 	return THING;
2602     }
2603 
2604     PL_sublex_info.super_state = PL_lex_state;
2605     PL_sublex_info.sub_inwhat = (U16)op_type;
2606     PL_sublex_info.sub_op = PL_lex_op;
2607     PL_lex_state = LEX_INTERPPUSH;
2608 
2609     PL_expect = XTERM;
2610     if (PL_lex_op) {
2611 	pl_yylval.opval = PL_lex_op;
2612 	PL_lex_op = NULL;
2613 	return PMFUNC;
2614     }
2615     else
2616 	return FUNC;
2617 }
2618 
2619 /*
2620  * S_sublex_push
2621  * Create a new scope to save the lexing state.  The scope will be
2622  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2623  * to the uc, lc, etc. found before.
2624  * Sets PL_lex_state to LEX_INTERPCONCAT.
2625  */
2626 
2627 STATIC I32
2628 S_sublex_push(pTHX)
2629 {
2630     dVAR;
2631     LEXSHARED *shared;
2632     const bool is_heredoc = PL_multi_close == '<';
2633     ENTER;
2634 
2635     PL_lex_state = PL_sublex_info.super_state;
2636     SAVEI8(PL_lex_dojoin);
2637     SAVEI32(PL_lex_brackets);
2638     SAVEI32(PL_lex_allbrackets);
2639     SAVEI32(PL_lex_formbrack);
2640     SAVEI8(PL_lex_fakeeof);
2641     SAVEI32(PL_lex_casemods);
2642     SAVEI32(PL_lex_starts);
2643     SAVEI8(PL_lex_state);
2644     SAVESPTR(PL_lex_repl);
2645     SAVEVPTR(PL_lex_inpat);
2646     SAVEI16(PL_lex_inwhat);
2647     if (is_heredoc)
2648     {
2649 	SAVECOPLINE(PL_curcop);
2650 	SAVEI32(PL_multi_end);
2651 	SAVEI32(PL_parser->herelines);
2652 	PL_parser->herelines = 0;
2653     }
2654     SAVEI8(PL_multi_close);
2655     SAVEPPTR(PL_bufptr);
2656     SAVEPPTR(PL_bufend);
2657     SAVEPPTR(PL_oldbufptr);
2658     SAVEPPTR(PL_oldoldbufptr);
2659     SAVEPPTR(PL_last_lop);
2660     SAVEPPTR(PL_last_uni);
2661     SAVEPPTR(PL_linestart);
2662     SAVESPTR(PL_linestr);
2663     SAVEGENERICPV(PL_lex_brackstack);
2664     SAVEGENERICPV(PL_lex_casestack);
2665     SAVEGENERICPV(PL_parser->lex_shared);
2666     SAVEBOOL(PL_parser->lex_re_reparsing);
2667     SAVEI32(PL_copline);
2668 
2669     /* The here-doc parser needs to be able to peek into outer lexing
2670        scopes to find the body of the here-doc.  So we put PL_linestr and
2671        PL_bufptr into lex_shared, to ‘share’ those values.
2672      */
2673     PL_parser->lex_shared->ls_linestr = PL_linestr;
2674     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2675 
2676     PL_linestr = PL_lex_stuff;
2677     PL_lex_repl = PL_sublex_info.repl;
2678     PL_lex_stuff = NULL;
2679     PL_sublex_info.repl = NULL;
2680 
2681     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2682 	= SvPVX(PL_linestr);
2683     PL_bufend += SvCUR(PL_linestr);
2684     PL_last_lop = PL_last_uni = NULL;
2685     SAVEFREESV(PL_linestr);
2686     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2687 
2688     PL_lex_dojoin = FALSE;
2689     PL_lex_brackets = PL_lex_formbrack = 0;
2690     PL_lex_allbrackets = 0;
2691     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2692     Newx(PL_lex_brackstack, 120, char);
2693     Newx(PL_lex_casestack, 12, char);
2694     PL_lex_casemods = 0;
2695     *PL_lex_casestack = '\0';
2696     PL_lex_starts = 0;
2697     PL_lex_state = LEX_INTERPCONCAT;
2698     if (is_heredoc)
2699 	CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2700     PL_copline = NOLINE;
2701 
2702     Newxz(shared, 1, LEXSHARED);
2703     shared->ls_prev = PL_parser->lex_shared;
2704     PL_parser->lex_shared = shared;
2705 
2706     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2707     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2708     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2709 	PL_lex_inpat = PL_sublex_info.sub_op;
2710     else
2711 	PL_lex_inpat = NULL;
2712 
2713     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2714     PL_in_eval &= ~EVAL_RE_REPARSING;
2715 
2716     return '(';
2717 }
2718 
2719 /*
2720  * S_sublex_done
2721  * Restores lexer state after a S_sublex_push.
2722  */
2723 
2724 STATIC I32
2725 S_sublex_done(pTHX)
2726 {
2727     dVAR;
2728     if (!PL_lex_starts++) {
2729 	SV * const sv = newSVpvs("");
2730 	if (SvUTF8(PL_linestr))
2731 	    SvUTF8_on(sv);
2732 	PL_expect = XOPERATOR;
2733 	pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2734 	return THING;
2735     }
2736 
2737     if (PL_lex_casemods) {		/* oops, we've got some unbalanced parens */
2738 	PL_lex_state = LEX_INTERPCASEMOD;
2739 	return yylex();
2740     }
2741 
2742     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2743     assert(PL_lex_inwhat != OP_TRANSR);
2744     if (PL_lex_repl) {
2745 	assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2746 	PL_linestr = PL_lex_repl;
2747 	PL_lex_inpat = 0;
2748 	PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2749 	PL_bufend += SvCUR(PL_linestr);
2750 	PL_last_lop = PL_last_uni = NULL;
2751 	PL_lex_dojoin = FALSE;
2752 	PL_lex_brackets = 0;
2753 	PL_lex_allbrackets = 0;
2754 	PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2755 	PL_lex_casemods = 0;
2756 	*PL_lex_casestack = '\0';
2757 	PL_lex_starts = 0;
2758 	if (SvEVALED(PL_lex_repl)) {
2759 	    PL_lex_state = LEX_INTERPNORMAL;
2760 	    PL_lex_starts++;
2761 	    /*	we don't clear PL_lex_repl here, so that we can check later
2762 		whether this is an evalled subst; that means we rely on the
2763 		logic to ensure sublex_done() is called again only via the
2764 		branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2765 	}
2766 	else {
2767 	    PL_lex_state = LEX_INTERPCONCAT;
2768 	    PL_lex_repl = NULL;
2769 	}
2770 	if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2771 	    CopLINE(PL_curcop) +=
2772 		((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2773 		 + PL_parser->herelines;
2774 	    PL_parser->herelines = 0;
2775 	}
2776 	return ',';
2777     }
2778     else {
2779 	const line_t l = CopLINE(PL_curcop);
2780 #ifdef PERL_MAD
2781 	if (PL_madskills) {
2782 	    if (PL_thiswhite) {
2783 		if (!PL_endwhite)
2784 		    PL_endwhite = newSVpvs("");
2785 		sv_catsv(PL_endwhite, PL_thiswhite);
2786 		PL_thiswhite = 0;
2787 	    }
2788 	    if (PL_thistoken)
2789 		sv_setpvs(PL_thistoken,"");
2790 	    else
2791 		PL_realtokenstart = -1;
2792 	}
2793 #endif
2794 	LEAVE;
2795 	if (PL_multi_close == '<')
2796 	    PL_parser->herelines += l - PL_multi_end;
2797 	PL_bufend = SvPVX(PL_linestr);
2798 	PL_bufend += SvCUR(PL_linestr);
2799 	PL_expect = XOPERATOR;
2800 	return ')';
2801     }
2802 }
2803 
2804 PERL_STATIC_INLINE SV*
2805 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2806 {
2807     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2808      * interior, hence to the "}".  Finds what the name resolves to, returning
2809      * an SV* containing it; NULL if no valid one found */
2810 
2811     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2812 
2813     HV * table;
2814     SV **cvp;
2815     SV *cv;
2816     SV *rv;
2817     HV *stash;
2818     const U8* first_bad_char_loc;
2819     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2820 
2821     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2822 
2823     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2824                                      e - backslash_ptr,
2825                                      &first_bad_char_loc))
2826     {
2827         /* If warnings are on, this will print a more detailed analysis of what
2828          * is wrong than the error message below */
2829         utf8n_to_uvchr(first_bad_char_loc,
2830                        e - ((char *) first_bad_char_loc),
2831                        NULL, 0);
2832 
2833         /* We deliberately don't try to print the malformed character, which
2834          * might not print very well; it also may be just the first of many
2835          * malformations, so don't print what comes after it */
2836         yyerror(Perl_form(aTHX_
2837             "Malformed UTF-8 character immediately after '%.*s'",
2838             (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2839 	return NULL;
2840     }
2841 
2842     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2843                         /* include the <}> */
2844                         e - backslash_ptr + 1);
2845     if (! SvPOK(res)) {
2846         SvREFCNT_dec_NN(res);
2847         return NULL;
2848     }
2849 
2850     /* See if the charnames handler is the Perl core's, and if so, we can skip
2851      * the validation needed for a user-supplied one, as Perl's does its own
2852      * validation. */
2853     table = GvHV(PL_hintgv);		 /* ^H */
2854     cvp = hv_fetchs(table, "charnames", FALSE);
2855     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2856         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2857     {
2858         const char * const name = HvNAME(stash);
2859         if (HvNAMELEN(stash) == sizeof("_charnames")-1
2860          && strEQ(name, "_charnames")) {
2861            return res;
2862        }
2863     }
2864 
2865     /* Here, it isn't Perl's charname handler.  We can't rely on a
2866      * user-supplied handler to validate the input name.  For non-ut8 input,
2867      * look to see that the first character is legal.  Then loop through the
2868      * rest checking that each is a continuation */
2869 
2870     /* This code needs to be sync'ed with a regex in _charnames.pm which does
2871      * the same thing */
2872 
2873     if (! UTF) {
2874         if (! isALPHAU(*s)) {
2875             goto bad_charname;
2876         }
2877         s++;
2878         while (s < e) {
2879             if (! isCHARNAME_CONT(*s)) {
2880                 goto bad_charname;
2881             }
2882 	    if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2883                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2884                            "A sequence of multiple spaces in a charnames "
2885                            "alias definition is deprecated");
2886             }
2887             s++;
2888         }
2889         if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2890             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2891                         "Trailing white-space in a charnames alias "
2892                         "definition is deprecated");
2893         }
2894     }
2895     else {
2896         /* Similarly for utf8.  For invariants can check directly; for other
2897          * Latin1, can calculate their code point and check; otherwise  use a
2898          * swash */
2899         if (UTF8_IS_INVARIANT(*s)) {
2900             if (! isALPHAU(*s)) {
2901                 goto bad_charname;
2902             }
2903             s++;
2904         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2905             if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2906                 goto bad_charname;
2907             }
2908             s += 2;
2909         }
2910         else {
2911             if (! PL_utf8_charname_begin) {
2912                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2913                 PL_utf8_charname_begin = _core_swash_init("utf8",
2914                                                         "_Perl_Charname_Begin",
2915                                                         &PL_sv_undef,
2916                                                         1, 0, NULL, &flags);
2917             }
2918             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2919                 goto bad_charname;
2920             }
2921             s += UTF8SKIP(s);
2922         }
2923 
2924         while (s < e) {
2925             if (UTF8_IS_INVARIANT(*s)) {
2926                 if (! isCHARNAME_CONT(*s)) {
2927                     goto bad_charname;
2928                 }
2929                 if (*s == ' ' && *(s-1) == ' '
2930                  && ckWARN_d(WARN_DEPRECATED)) {
2931                     Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2932                                "A sequence of multiple spaces in a charnam"
2933                                "es alias definition is deprecated");
2934                 }
2935                 s++;
2936             }
2937             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2938                 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2939                 {
2940                     goto bad_charname;
2941                 }
2942                 s += 2;
2943             }
2944             else {
2945                 if (! PL_utf8_charname_continue) {
2946                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2947                     PL_utf8_charname_continue = _core_swash_init("utf8",
2948                                                 "_Perl_Charname_Continue",
2949                                                 &PL_sv_undef,
2950                                                 1, 0, NULL, &flags);
2951                 }
2952                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2953                     goto bad_charname;
2954                 }
2955                 s += UTF8SKIP(s);
2956             }
2957         }
2958         if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2959             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2960                        "Trailing white-space in a charnames alias "
2961                        "definition is deprecated");
2962         }
2963     }
2964 
2965     if (SvUTF8(res)) { /* Don't accept malformed input */
2966         const U8* first_bad_char_loc;
2967         STRLEN len;
2968         const char* const str = SvPV_const(res, len);
2969         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2970             /* If warnings are on, this will print a more detailed analysis of
2971              * what is wrong than the error message below */
2972             utf8n_to_uvchr(first_bad_char_loc,
2973                            (char *) first_bad_char_loc - str,
2974                            NULL, 0);
2975 
2976             /* We deliberately don't try to print the malformed character,
2977              * which might not print very well; it also may be just the first
2978              * of many malformations, so don't print what comes after it */
2979             yyerror_pv(
2980               Perl_form(aTHX_
2981                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2982                  (int) (e - backslash_ptr + 1), backslash_ptr,
2983                  (int) ((char *) first_bad_char_loc - str), str
2984               ),
2985               SVf_UTF8);
2986             return NULL;
2987         }
2988     }
2989 
2990     return res;
2991 
2992   bad_charname: {
2993         int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2994 
2995         /* The final %.*s makes sure that should the trailing NUL be missing
2996          * that this print won't run off the end of the string */
2997         yyerror_pv(
2998           Perl_form(aTHX_
2999             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
3000             (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
3001             (int)(e - s + bad_char_size), s + bad_char_size
3002           ),
3003           UTF ? SVf_UTF8 : 0);
3004         return NULL;
3005     }
3006 }
3007 
3008 /*
3009   scan_const
3010 
3011   Extracts the next constant part of a pattern, double-quoted string,
3012   or transliteration.  This is terrifying code.
3013 
3014   For example, in parsing the double-quoted string "ab\x63$d", it would
3015   stop at the '$' and return an OP_CONST containing 'abc'.
3016 
3017   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3018   processing a pattern (PL_lex_inpat is true), a transliteration
3019   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3020 
3021   Returns a pointer to the character scanned up to. If this is
3022   advanced from the start pointer supplied (i.e. if anything was
3023   successfully parsed), will leave an OP_CONST for the substring scanned
3024   in pl_yylval. Caller must intuit reason for not parsing further
3025   by looking at the next characters herself.
3026 
3027   In patterns:
3028     expand:
3029       \N{FOO}  => \N{U+hex_for_character_FOO}
3030       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3031 
3032     pass through:
3033 	all other \-char, including \N and \N{ apart from \N{ABC}
3034 
3035     stops on:
3036 	@ and $ where it appears to be a var, but not for $ as tail anchor
3037         \l \L \u \U \Q \E
3038 	(?{  or  (??{
3039 
3040 
3041   In transliterations:
3042     characters are VERY literal, except for - not at the start or end
3043     of the string, which indicates a range. If the range is in bytes,
3044     scan_const expands the range to the full set of intermediate
3045     characters. If the range is in utf8, the hyphen is replaced with
3046     a certain range mark which will be handled by pmtrans() in op.c.
3047 
3048   In double-quoted strings:
3049     backslashes:
3050       double-quoted style: \r and \n
3051       constants: \x31, etc.
3052       deprecated backrefs: \1 (in substitution replacements)
3053       case and quoting: \U \Q \E
3054     stops on @ and $
3055 
3056   scan_const does *not* construct ops to handle interpolated strings.
3057   It stops processing as soon as it finds an embedded $ or @ variable
3058   and leaves it to the caller to work out what's going on.
3059 
3060   embedded arrays (whether in pattern or not) could be:
3061       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3062 
3063   $ in double-quoted strings must be the symbol of an embedded scalar.
3064 
3065   $ in pattern could be $foo or could be tail anchor.  Assumption:
3066   it's a tail anchor if $ is the last thing in the string, or if it's
3067   followed by one of "()| \r\n\t"
3068 
3069   \1 (backreferences) are turned into $1 in substitutions
3070 
3071   The structure of the code is
3072       while (there's a character to process) {
3073 	  handle transliteration ranges
3074 	  skip regexp comments /(?#comment)/ and codes /(?{code})/
3075 	  skip #-initiated comments in //x patterns
3076 	  check for embedded arrays
3077 	  check for embedded scalars
3078 	  if (backslash) {
3079 	      deprecate \1 in substitution replacements
3080 	      handle string-changing backslashes \l \U \Q \E, etc.
3081 	      switch (what was escaped) {
3082 		  handle \- in a transliteration (becomes a literal -)
3083 		  if a pattern and not \N{, go treat as regular character
3084 		  handle \132 (octal characters)
3085 		  handle \x15 and \x{1234} (hex characters)
3086 		  handle \N{name} (named characters, also \N{3,5} in a pattern)
3087 		  handle \cV (control characters)
3088 		  handle printf-style backslashes (\f, \r, \n, etc)
3089 	      } (end switch)
3090 	      continue
3091 	  } (end if backslash)
3092           handle regular character
3093     } (end while character to read)
3094 
3095 */
3096 
3097 STATIC char *
3098 S_scan_const(pTHX_ char *start)
3099 {
3100     dVAR;
3101     char *send = PL_bufend;		/* end of the constant */
3102     SV *sv = newSV(send - start);		/* sv for the constant.  See
3103 						   note below on sizing. */
3104     char *s = start;			/* start of the constant */
3105     char *d = SvPVX(sv);		/* destination for copies */
3106     bool dorange = FALSE;			/* are we in a translit range? */
3107     bool didrange = FALSE;		        /* did we just finish a range? */
3108     bool in_charclass = FALSE;			/* within /[...]/ */
3109     bool has_utf8 = FALSE;			/* Output constant is UTF8 */
3110     bool  this_utf8 = cBOOL(UTF);		/* Is the source string assumed
3111 						   to be UTF8?  But, this can
3112 						   show as true when the source
3113 						   isn't utf8, as for example
3114 						   when it is entirely composed
3115 						   of hex constants */
3116     SV *res;		                /* result from charnames */
3117 
3118     /* Note on sizing:  The scanned constant is placed into sv, which is
3119      * initialized by newSV() assuming one byte of output for every byte of
3120      * input.  This routine expects newSV() to allocate an extra byte for a
3121      * trailing NUL, which this routine will append if it gets to the end of
3122      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3123      * CAPITAL LETTER A}), or more output than input if the constant ends up
3124      * recoded to utf8, but each time a construct is found that might increase
3125      * the needed size, SvGROW() is called.  Its size parameter each time is
3126      * based on the best guess estimate at the time, namely the length used so
3127      * far, plus the length the current construct will occupy, plus room for
3128      * the trailing NUL, plus one byte for every input byte still unscanned */
3129 
3130     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3131                        before set */
3132 #ifdef EBCDIC
3133     UV literal_endpoint = 0;
3134     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
3135 #endif
3136 
3137     PERL_ARGS_ASSERT_SCAN_CONST;
3138 
3139     assert(PL_lex_inwhat != OP_TRANSR);
3140     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3141 	/* If we are doing a trans and we know we want UTF8 set expectation */
3142 	has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3143 	this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3144     }
3145 
3146     /* Protect sv from errors and fatal warnings. */
3147     ENTER_with_name("scan_const");
3148     SAVEFREESV(sv);
3149 
3150     while (s < send || dorange) {
3151 
3152         /* get transliterations out of the way (they're most literal) */
3153 	if (PL_lex_inwhat == OP_TRANS) {
3154 	    /* expand a range A-Z to the full set of characters.  AIE! */
3155 	    if (dorange) {
3156 		I32 i;				/* current expanded character */
3157 		I32 min;			/* first character in range */
3158 		I32 max;			/* last character in range */
3159 
3160 #ifdef EBCDIC
3161 		UV uvmax = 0;
3162 #endif
3163 
3164 		if (has_utf8
3165 #ifdef EBCDIC
3166 		    && !native_range
3167 #endif
3168                 ) {
3169 		    char * const c = (char*)utf8_hop((U8*)d, -1);
3170 		    char *e = d++;
3171 		    while (e-- > c)
3172 			*(e + 1) = *e;
3173 		    *c = (char) ILLEGAL_UTF8_BYTE;
3174 		    /* mark the range as done, and continue */
3175 		    dorange = FALSE;
3176 		    didrange = TRUE;
3177 		    continue;
3178 		}
3179 
3180 		i = d - SvPVX_const(sv);		/* remember current offset */
3181 #ifdef EBCDIC
3182                 SvGROW(sv,
3183 		       SvLEN(sv) + (has_utf8 ?
3184 				    (512 - UTF_CONTINUATION_MARK +
3185 				     UNISKIP(0x100))
3186 				    : 256));
3187                 /* How many two-byte within 0..255: 128 in UTF-8,
3188 		 * 96 in UTF-8-mod. */
3189 #else
3190 		SvGROW(sv, SvLEN(sv) + 256);	/* never more than 256 chars in a range */
3191 #endif
3192 		d = SvPVX(sv) + i;		/* refresh d after realloc */
3193 #ifdef EBCDIC
3194                 if (has_utf8) {
3195                     int j;
3196                     for (j = 0; j <= 1; j++) {
3197                         char * const c = (char*)utf8_hop((U8*)d, -1);
3198                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3199                         if (j)
3200                             min = (U8)uv;
3201                         else if (uv < 256)
3202                             max = (U8)uv;
3203                         else {
3204                             max = (U8)0xff; /* only to \xff */
3205                             uvmax = uv; /* \x{100} to uvmax */
3206                         }
3207                         d = c; /* eat endpoint chars */
3208                      }
3209                 }
3210                else {
3211 #endif
3212 		   d -= 2;		/* eat the first char and the - */
3213 		   min = (U8)*d;	/* first char in range */
3214 		   max = (U8)d[1];	/* last char in range  */
3215 #ifdef EBCDIC
3216 	       }
3217 #endif
3218 
3219                 if (min > max) {
3220 		    Perl_croak(aTHX_
3221 			       "Invalid range \"%c-%c\" in transliteration operator",
3222 			       (char)min, (char)max);
3223                 }
3224 
3225 #ifdef EBCDIC
3226 		if (literal_endpoint == 2 &&
3227 		    ((isLOWER_A(min) && isLOWER_A(max)) ||
3228 		     (isUPPER_A(min) && isUPPER_A(max))))
3229                 {
3230                     for (i = min; i <= max; i++) {
3231                         if (isALPHA_A(i))
3232                             *d++ = i;
3233 		    }
3234 		}
3235 		else
3236 #endif
3237 		    for (i = min; i <= max; i++)
3238 #ifdef EBCDIC
3239                         if (has_utf8) {
3240                             append_utf8_from_native_byte(i, &d);
3241                         }
3242                         else
3243 #endif
3244                             *d++ = (char)i;
3245 
3246 #ifdef EBCDIC
3247                 if (uvmax) {
3248                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3249                     if (uvmax > 0x101)
3250                         *d++ = (char) ILLEGAL_UTF8_BYTE;
3251                     if (uvmax > 0x100)
3252                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3253                 }
3254 #endif
3255 
3256 		/* mark the range as done, and continue */
3257 		dorange = FALSE;
3258 		didrange = TRUE;
3259 #ifdef EBCDIC
3260 		literal_endpoint = 0;
3261 #endif
3262 		continue;
3263 	    }
3264 
3265 	    /* range begins (ignore - as first or last char) */
3266 	    else if (*s == '-' && s+1 < send  && s != start) {
3267 		if (didrange) {
3268 		    Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3269 		}
3270 		if (has_utf8
3271 #ifdef EBCDIC
3272 		    && !native_range
3273 #endif
3274 		    ) {
3275 		    *d++ = (char) ILLEGAL_UTF8_BYTE;	/* use illegal utf8 byte--see pmtrans */
3276 		    s++;
3277 		    continue;
3278 		}
3279 		dorange = TRUE;
3280 		s++;
3281 	    }
3282 	    else {
3283 		didrange = FALSE;
3284 #ifdef EBCDIC
3285 		literal_endpoint = 0;
3286 		native_range = TRUE;
3287 #endif
3288 	    }
3289 	}
3290 
3291 	/* if we get here, we're not doing a transliteration */
3292 
3293 	else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3294 	    char *s1 = s-1;
3295 	    int esc = 0;
3296 	    while (s1 >= start && *s1-- == '\\')
3297 		esc = !esc;
3298 	    if (!esc)
3299 		in_charclass = TRUE;
3300 	}
3301 
3302 	else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
3303 	    char *s1 = s-1;
3304 	    int esc = 0;
3305 	    while (s1 >= start && *s1-- == '\\')
3306 		esc = !esc;
3307 	    if (!esc)
3308 		in_charclass = FALSE;
3309 	}
3310 
3311 	/* skip for regexp comments /(?#comment)/, except for the last
3312 	 * char, which will be done separately.
3313 	 * Stop on (?{..}) and friends */
3314 
3315 	else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3316 	    if (s[2] == '#') {
3317 		while (s+1 < send && *s != ')')
3318 		    *d++ = *s++;
3319 	    }
3320 	    else if (!PL_lex_casemods &&
3321 		     (    s[2] == '{' /* This should match regcomp.c */
3322 		      || (s[2] == '?' && s[3] == '{')))
3323 	    {
3324 		break;
3325 	    }
3326 	}
3327 
3328 	/* likewise skip #-initiated comments in //x patterns */
3329 	else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3330 	  ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3331 	    while (s+1 < send && *s != '\n')
3332 		*d++ = *s++;
3333 	}
3334 
3335 	/* no further processing of single-quoted regex */
3336 	else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3337 	    goto default_action;
3338 
3339 	/* check for embedded arrays
3340 	   (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3341 	   */
3342 	else if (*s == '@' && s[1]) {
3343 	    if (isWORDCHAR_lazy_if(s+1,UTF))
3344 		break;
3345 	    if (strchr(":'{$", s[1]))
3346 		break;
3347 	    if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3348 		break; /* in regexp, neither @+ nor @- are interpolated */
3349 	}
3350 
3351 	/* check for embedded scalars.  only stop if we're sure it's a
3352 	   variable.
3353         */
3354 	else if (*s == '$') {
3355 	    if (!PL_lex_inpat)	/* not a regexp, so $ must be var */
3356 		break;
3357 	    if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3358 		if (s[1] == '\\') {
3359 		    Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3360 				   "Possible unintended interpolation of $\\ in regex");
3361 		}
3362 		break;		/* in regexp, $ might be tail anchor */
3363             }
3364 	}
3365 
3366 	/* End of else if chain - OP_TRANS rejoin rest */
3367 
3368 	/* backslashes */
3369 	if (*s == '\\' && s+1 < send) {
3370 	    char* e;	/* Can be used for ending '}', etc. */
3371 
3372 	    s++;
3373 
3374 	    /* warn on \1 - \9 in substitution replacements, but note that \11
3375 	     * is an octal; and \19 is \1 followed by '9' */
3376 	    if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3377 		isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3378 	    {
3379 		/* diag_listed_as: \%d better written as $%d */
3380 		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3381 		*--s = '$';
3382 		break;
3383 	    }
3384 
3385 	    /* string-change backslash escapes */
3386 	    if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3387 		--s;
3388 		break;
3389 	    }
3390 	    /* In a pattern, process \N, but skip any other backslash escapes.
3391 	     * This is because we don't want to translate an escape sequence
3392 	     * into a meta symbol and have the regex compiler use the meta
3393 	     * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3394 	     * in spite of this, we do have to process \N here while the proper
3395 	     * charnames handler is in scope.  See bugs #56444 and #62056.
3396 	     * There is a complication because \N in a pattern may also stand
3397 	     * for 'match a non-nl', and not mean a charname, in which case its
3398 	     * processing should be deferred to the regex compiler.  To be a
3399 	     * charname it must be followed immediately by a '{', and not look
3400 	     * like \N followed by a curly quantifier, i.e., not something like
3401 	     * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3402 	     * quantifier */
3403 	    else if (PL_lex_inpat
3404 		    && (*s != 'N'
3405 			|| s[1] != '{'
3406 			|| regcurly(s + 1, FALSE)))
3407 	    {
3408 		*d++ = '\\';
3409 		goto default_action;
3410 	    }
3411 
3412 	    switch (*s) {
3413 
3414 	    /* quoted - in transliterations */
3415 	    case '-':
3416 		if (PL_lex_inwhat == OP_TRANS) {
3417 		    *d++ = *s++;
3418 		    continue;
3419 		}
3420 		/* FALL THROUGH */
3421 	    default:
3422 	        {
3423 		    if ((isALPHANUMERIC(*s)))
3424 			Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3425 				       "Unrecognized escape \\%c passed through",
3426 				       *s);
3427 		    /* default action is to copy the quoted character */
3428 		    goto default_action;
3429 		}
3430 
3431 	    /* eg. \132 indicates the octal constant 0132 */
3432 	    case '0': case '1': case '2': case '3':
3433 	    case '4': case '5': case '6': case '7':
3434 		{
3435                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3436                     STRLEN len = 3;
3437 		    uv = grok_oct(s, &len, &flags, NULL);
3438 		    s += len;
3439                     if (len < 3 && s < send && isDIGIT(*s)
3440                         && ckWARN(WARN_MISC))
3441                     {
3442                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3443                                     "%s", form_short_octal_warning(s, len));
3444                     }
3445 		}
3446 		goto NUM_ESCAPE_INSERT;
3447 
3448 	    /* eg. \o{24} indicates the octal constant \024 */
3449 	    case 'o':
3450 		{
3451 		    const char* error;
3452 
3453 		    bool valid = grok_bslash_o(&s, &uv, &error,
3454                                                TRUE, /* Output warning */
3455                                                FALSE, /* Not strict */
3456                                                TRUE, /* Output warnings for
3457                                                          non-portables */
3458                                                UTF);
3459 		    if (! valid) {
3460 			yyerror(error);
3461 			continue;
3462 		    }
3463 		    goto NUM_ESCAPE_INSERT;
3464 		}
3465 
3466 	    /* eg. \x24 indicates the hex constant 0x24 */
3467 	    case 'x':
3468 		{
3469 		    const char* error;
3470 
3471 		    bool valid = grok_bslash_x(&s, &uv, &error,
3472                                                TRUE, /* Output warning */
3473                                                FALSE, /* Not strict */
3474                                                TRUE,  /* Output warnings for
3475                                                          non-portables */
3476                                                UTF);
3477 		    if (! valid) {
3478 			yyerror(error);
3479 			continue;
3480 		    }
3481 		}
3482 
3483 	      NUM_ESCAPE_INSERT:
3484 		/* Insert oct or hex escaped character.  There will always be
3485 		 * enough room in sv since such escapes will be longer than any
3486 		 * UTF-8 sequence they can end up as, except if they force us
3487 		 * to recode the rest of the string into utf8 */
3488 
3489 		/* Here uv is the ordinal of the next character being added */
3490 		if (!UVCHR_IS_INVARIANT(uv)) {
3491 		    if (!has_utf8 && uv > 255) {
3492 			/* Might need to recode whatever we have accumulated so
3493 			 * far if it contains any chars variant in utf8 or
3494 			 * utf-ebcdic. */
3495 
3496 			SvCUR_set(sv, d - SvPVX_const(sv));
3497 			SvPOK_on(sv);
3498 			*d = '\0';
3499 			/* See Note on sizing above.  */
3500 			sv_utf8_upgrade_flags_grow(sv,
3501 					SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3502 					UNISKIP(uv) + (STRLEN)(send - s) + 1);
3503 			d = SvPVX(sv) + SvCUR(sv);
3504 			has_utf8 = TRUE;
3505                     }
3506 
3507                     if (has_utf8) {
3508 		        d = (char*)uvchr_to_utf8((U8*)d, uv);
3509 			if (PL_lex_inwhat == OP_TRANS &&
3510 			    PL_sublex_info.sub_op) {
3511 			    PL_sublex_info.sub_op->op_private |=
3512 				(PL_lex_repl ? OPpTRANS_FROM_UTF
3513 					     : OPpTRANS_TO_UTF);
3514 			}
3515 #ifdef EBCDIC
3516 			if (uv > 255 && !dorange)
3517 			    native_range = FALSE;
3518 #endif
3519                     }
3520 		    else {
3521 		        *d++ = (char)uv;
3522 		    }
3523 		}
3524 		else {
3525 		    *d++ = (char) uv;
3526 		}
3527 		continue;
3528 
3529  	    case 'N':
3530 		/* In a non-pattern \N must be a named character, like \N{LATIN
3531 		 * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3532 		 * mean to match a non-newline.  For non-patterns, named
3533 		 * characters are converted to their string equivalents. In
3534 		 * patterns, named characters are not converted to their
3535 		 * ultimate forms for the same reasons that other escapes
3536 		 * aren't.  Instead, they are converted to the \N{U+...} form
3537 		 * to get the value from the charnames that is in effect right
3538 		 * now, while preserving the fact that it was a named character
3539 		 * so that the regex compiler knows this */
3540 
3541 		/* The structure of this section of code (besides checking for
3542 		 * errors and upgrading to utf8) is:
3543 		 *  Further disambiguate between the two meanings of \N, and if
3544 		 *	not a charname, go process it elsewhere
3545 		 *  If of form \N{U+...}, pass it through if a pattern;
3546 		 *	otherwise convert to utf8
3547 		 *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3548 		 *  pattern; otherwise convert to utf8 */
3549 
3550 		/* Here, s points to the 'N'; the test below is guaranteed to
3551 		 * succeed if we are being called on a pattern as we already
3552 		 * know from a test above that the next character is a '{'.
3553 		 * On a non-pattern \N must mean 'named sequence, which
3554 		 * requires braces */
3555 		s++;
3556 		if (*s != '{') {
3557 		    yyerror("Missing braces on \\N{}");
3558 		    continue;
3559 		}
3560 		s++;
3561 
3562 		/* If there is no matching '}', it is an error. */
3563 		if (! (e = strchr(s, '}'))) {
3564 		    if (! PL_lex_inpat) {
3565 			yyerror("Missing right brace on \\N{}");
3566 		    } else {
3567 			yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3568 		    }
3569 		    continue;
3570 		}
3571 
3572 		/* Here it looks like a named character */
3573 
3574 		if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3575 		    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3576 				| PERL_SCAN_DISALLOW_PREFIX;
3577 		    STRLEN len;
3578 
3579 		    /* For \N{U+...}, the '...' is a unicode value even on
3580 		     * EBCDIC machines */
3581 		    s += 2;	    /* Skip to next char after the 'U+' */
3582 		    len = e - s;
3583 		    uv = grok_hex(s, &len, &flags, NULL);
3584 		    if (len == 0 || len != (STRLEN)(e - s)) {
3585 			yyerror("Invalid hexadecimal number in \\N{U+...}");
3586 			s = e + 1;
3587 			continue;
3588 		    }
3589 
3590 		    if (PL_lex_inpat) {
3591 
3592 			/* On non-EBCDIC platforms, pass through to the regex
3593 			 * compiler unchanged.  The reason we evaluated the
3594 			 * number above is to make sure there wasn't a syntax
3595 			 * error.  But on EBCDIC we convert to native so
3596 			 * downstream code can continue to assume it's native
3597 			 */
3598 			s -= 5;	    /* Include the '\N{U+' */
3599 #ifdef EBCDIC
3600 			d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3601 							       and the \0 */
3602 				    "\\N{U+%X}",
3603 				    (unsigned int) UNI_TO_NATIVE(uv));
3604 #else
3605 			Copy(s, d, e - s + 1, char);	/* 1 = include the } */
3606 			d += e - s + 1;
3607 #endif
3608 		    }
3609 		    else {  /* Not a pattern: convert the hex to string */
3610 
3611 			 /* If destination is not in utf8, unconditionally
3612 			  * recode it to be so.  This is because \N{} implies
3613 			  * Unicode semantics, and scalars have to be in utf8
3614 			  * to guarantee those semantics */
3615 			if (! has_utf8) {
3616 			    SvCUR_set(sv, d - SvPVX_const(sv));
3617 			    SvPOK_on(sv);
3618 			    *d = '\0';
3619 			    /* See Note on sizing above.  */
3620 			    sv_utf8_upgrade_flags_grow(
3621 					sv,
3622 					SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3623 					UNISKIP(uv) + (STRLEN)(send - e) + 1);
3624 			    d = SvPVX(sv) + SvCUR(sv);
3625 			    has_utf8 = TRUE;
3626 			}
3627 
3628                         /* Add the (Unicode) code point to the output. */
3629 			if (UNI_IS_INVARIANT(uv)) {
3630 			    *d++ = (char) LATIN1_TO_NATIVE(uv);
3631 			}
3632 			else {
3633                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3634                         }
3635 		    }
3636 		}
3637 		else /* Here is \N{NAME} but not \N{U+...}. */
3638                      if ((res = get_and_check_backslash_N_name(s, e)))
3639                 {
3640                     STRLEN len;
3641                     const char *str = SvPV_const(res, len);
3642                     if (PL_lex_inpat) {
3643 
3644 			if (! len) { /* The name resolved to an empty string */
3645 			    Copy("\\N{}", d, 4, char);
3646 			    d += 4;
3647 			}
3648 			else {
3649 			    /* In order to not lose information for the regex
3650 			    * compiler, pass the result in the specially made
3651 			    * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3652 			    * the code points in hex of each character
3653 			    * returned by charnames */
3654 
3655 			    const char *str_end = str + len;
3656 			    const STRLEN off = d - SvPVX_const(sv);
3657 
3658                             if (! SvUTF8(res)) {
3659                                 /* For the non-UTF-8 case, we can determine the
3660                                  * exact length needed without having to parse
3661                                  * through the string.  Each character takes up
3662                                  * 2 hex digits plus either a trailing dot or
3663                                  * the "}" */
3664                                 d = off + SvGROW(sv, off
3665                                                     + 3 * len
3666                                                     + 6 /* For the "\N{U+", and
3667                                                            trailing NUL */
3668                                                     + (STRLEN)(send - e));
3669                                 Copy("\\N{U+", d, 5, char);
3670                                 d += 5;
3671                                 while (str < str_end) {
3672                                     char hex_string[4];
3673                                     my_snprintf(hex_string, sizeof(hex_string),
3674                                                 "%02X.", (U8) *str);
3675                                     Copy(hex_string, d, 3, char);
3676                                     d += 3;
3677                                     str++;
3678                                 }
3679                                 d--;    /* We will overwrite below the final
3680                                            dot with a right brace */
3681                             }
3682                             else {
3683                                 STRLEN char_length; /* cur char's byte length */
3684 
3685                                 /* and the number of bytes after this is
3686                                  * translated into hex digits */
3687                                 STRLEN output_length;
3688 
3689                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3690                                  * for max('U+', '.'); and 1 for NUL */
3691                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3692 
3693                                 /* Get the first character of the result. */
3694                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3695                                                         len,
3696                                                         &char_length,
3697                                                         UTF8_ALLOW_ANYUV);
3698                                 /* Convert first code point to hex, including
3699                                  * the boiler plate before it. */
3700                                 output_length =
3701                                     my_snprintf(hex_string, sizeof(hex_string),
3702                                                 "\\N{U+%X",
3703                                                 (unsigned int) uv);
3704 
3705                                 /* Make sure there is enough space to hold it */
3706                                 d = off + SvGROW(sv, off
3707                                                     + output_length
3708                                                     + (STRLEN)(send - e)
3709                                                     + 2);	/* '}' + NUL */
3710                                 /* And output it */
3711                                 Copy(hex_string, d, output_length, char);
3712                                 d += output_length;
3713 
3714                                 /* For each subsequent character, append dot and
3715                                 * its ordinal in hex */
3716                                 while ((str += char_length) < str_end) {
3717                                     const STRLEN off = d - SvPVX_const(sv);
3718                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3719                                                             str_end - str,
3720                                                             &char_length,
3721                                                             UTF8_ALLOW_ANYUV);
3722                                     output_length =
3723                                         my_snprintf(hex_string,
3724                                                     sizeof(hex_string),
3725                                                     ".%X",
3726                                                     (unsigned int) uv);
3727 
3728                                     d = off + SvGROW(sv, off
3729                                                         + output_length
3730                                                         + (STRLEN)(send - e)
3731                                                         + 2);	/* '}' +  NUL */
3732                                     Copy(hex_string, d, output_length, char);
3733                                     d += output_length;
3734                                 }
3735 			    }
3736 
3737 			    *d++ = '}';	/* Done.  Add the trailing brace */
3738 			}
3739 		    }
3740 		    else { /* Here, not in a pattern.  Convert the name to a
3741 			    * string. */
3742 
3743 			 /* If destination is not in utf8, unconditionally
3744 			  * recode it to be so.  This is because \N{} implies
3745 			  * Unicode semantics, and scalars have to be in utf8
3746 			  * to guarantee those semantics */
3747 			if (! has_utf8) {
3748 			    SvCUR_set(sv, d - SvPVX_const(sv));
3749 			    SvPOK_on(sv);
3750 			    *d = '\0';
3751 			    /* See Note on sizing above.  */
3752 			    sv_utf8_upgrade_flags_grow(sv,
3753 						SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3754 						len + (STRLEN)(send - s) + 1);
3755 			    d = SvPVX(sv) + SvCUR(sv);
3756 			    has_utf8 = TRUE;
3757 			} else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3758 
3759 			    /* See Note on sizing above.  (NOTE: SvCUR() is not
3760 			     * set correctly here). */
3761 			    const STRLEN off = d - SvPVX_const(sv);
3762 			    d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3763 			}
3764 			Copy(str, d, len, char);
3765 			d += len;
3766 		    }
3767 
3768 		    SvREFCNT_dec(res);
3769 
3770 		} /* End \N{NAME} */
3771 #ifdef EBCDIC
3772 		if (!dorange)
3773 		    native_range = FALSE; /* \N{} is defined to be Unicode */
3774 #endif
3775 		s = e + 1;  /* Point to just after the '}' */
3776 		continue;
3777 
3778 	    /* \c is a control character */
3779 	    case 'c':
3780 		s++;
3781 		if (s < send) {
3782 		    *d++ = grok_bslash_c(*s++, 1);
3783 		}
3784 		else {
3785 		    yyerror("Missing control char name in \\c");
3786 		}
3787 		continue;
3788 
3789 	    /* printf-style backslashes, formfeeds, newlines, etc */
3790 	    case 'b':
3791 		*d++ = '\b';
3792 		break;
3793 	    case 'n':
3794 		*d++ = '\n';
3795 		break;
3796 	    case 'r':
3797 		*d++ = '\r';
3798 		break;
3799 	    case 'f':
3800 		*d++ = '\f';
3801 		break;
3802 	    case 't':
3803 		*d++ = '\t';
3804 		break;
3805 	    case 'e':
3806 		*d++ = ASCII_TO_NATIVE('\033');
3807 		break;
3808 	    case 'a':
3809 		*d++ = '\a';
3810 		break;
3811 	    } /* end switch */
3812 
3813 	    s++;
3814 	    continue;
3815 	} /* end if (backslash) */
3816 #ifdef EBCDIC
3817 	else
3818 	    literal_endpoint++;
3819 #endif
3820 
3821     default_action:
3822 	/* If we started with encoded form, or already know we want it,
3823 	   then encode the next character */
3824 	if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3825 	    STRLEN len  = 1;
3826 
3827 
3828 	    /* One might think that it is wasted effort in the case of the
3829 	     * source being utf8 (this_utf8 == TRUE) to take the next character
3830 	     * in the source, convert it to an unsigned value, and then convert
3831 	     * it back again.  But the source has not been validated here.  The
3832 	     * routine that does the conversion checks for errors like
3833 	     * malformed utf8 */
3834 
3835 	    const UV nextuv   = (this_utf8)
3836                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3837                                 : (UV) ((U8) *s);
3838 	    const STRLEN need = UNISKIP(nextuv);
3839 	    if (!has_utf8) {
3840 		SvCUR_set(sv, d - SvPVX_const(sv));
3841 		SvPOK_on(sv);
3842 		*d = '\0';
3843 		/* See Note on sizing above.  */
3844 		sv_utf8_upgrade_flags_grow(sv,
3845 					SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3846 					need + (STRLEN)(send - s) + 1);
3847 		d = SvPVX(sv) + SvCUR(sv);
3848 		has_utf8 = TRUE;
3849 	    } else if (need > len) {
3850 		/* encoded value larger than old, may need extra space (NOTE:
3851 		 * SvCUR() is not set correctly here).   See Note on sizing
3852 		 * above.  */
3853 		const STRLEN off = d - SvPVX_const(sv);
3854 		d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3855 	    }
3856 	    s += len;
3857 
3858 	    d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3859 #ifdef EBCDIC
3860 	    if (uv > 255 && !dorange)
3861 		native_range = FALSE;
3862 #endif
3863 	}
3864 	else {
3865 	    *d++ = *s++;
3866 	}
3867     } /* while loop to process each character */
3868 
3869     /* terminate the string and set up the sv */
3870     *d = '\0';
3871     SvCUR_set(sv, d - SvPVX_const(sv));
3872     if (SvCUR(sv) >= SvLEN(sv))
3873 	Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3874 		   " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3875 
3876     SvPOK_on(sv);
3877     if (PL_encoding && !has_utf8) {
3878 	sv_recode_to_utf8(sv, PL_encoding);
3879 	if (SvUTF8(sv))
3880 	    has_utf8 = TRUE;
3881     }
3882     if (has_utf8) {
3883 	SvUTF8_on(sv);
3884 	if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3885 	    PL_sublex_info.sub_op->op_private |=
3886 		    (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3887 	}
3888     }
3889 
3890     /* shrink the sv if we allocated more than we used */
3891     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3892 	SvPV_shrink_to_cur(sv);
3893     }
3894 
3895     /* return the substring (via pl_yylval) only if we parsed anything */
3896     if (s > start) {
3897 	char *s2 = start;
3898 	for (; s2 < s; s2++) {
3899 	    if (*s2 == '\n')
3900 		COPLINE_INC_WITH_HERELINES;
3901 	}
3902 	SvREFCNT_inc_simple_void_NN(sv);
3903 	if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3904             && ! PL_parser->lex_re_reparsing)
3905         {
3906 	    const char *const key = PL_lex_inpat ? "qr" : "q";
3907 	    const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3908 	    const char *type;
3909 	    STRLEN typelen;
3910 
3911 	    if (PL_lex_inwhat == OP_TRANS) {
3912 		type = "tr";
3913 		typelen = 2;
3914 	    } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3915 		type = "s";
3916 		typelen = 1;
3917 	    } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3918 		type = "q";
3919 		typelen = 1;
3920 	    } else  {
3921 		type = "qq";
3922 		typelen = 2;
3923 	    }
3924 
3925 	    sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3926 				type, typelen);
3927 	}
3928 	pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3929     }
3930     LEAVE_with_name("scan_const");
3931     return s;
3932 }
3933 
3934 /* S_intuit_more
3935  * Returns TRUE if there's more to the expression (e.g., a subscript),
3936  * FALSE otherwise.
3937  *
3938  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3939  *
3940  * ->[ and ->{ return TRUE
3941  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3942  * { and [ outside a pattern are always subscripts, so return TRUE
3943  * if we're outside a pattern and it's not { or [, then return FALSE
3944  * if we're in a pattern and the first char is a {
3945  *   {4,5} (any digits around the comma) returns FALSE
3946  * if we're in a pattern and the first char is a [
3947  *   [] returns FALSE
3948  *   [SOMETHING] has a funky algorithm to decide whether it's a
3949  *      character class or not.  It has to deal with things like
3950  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3951  * anything else returns TRUE
3952  */
3953 
3954 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3955 
3956 STATIC int
3957 S_intuit_more(pTHX_ char *s)
3958 {
3959     dVAR;
3960 
3961     PERL_ARGS_ASSERT_INTUIT_MORE;
3962 
3963     if (PL_lex_brackets)
3964 	return TRUE;
3965     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3966 	return TRUE;
3967     if (*s == '-' && s[1] == '>'
3968      && FEATURE_POSTDEREF_QQ_IS_ENABLED
3969      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3970 	||(s[2] == '@' && strchr("*[{",s[3])) ))
3971 	return TRUE;
3972     if (*s != '{' && *s != '[')
3973 	return FALSE;
3974     if (!PL_lex_inpat)
3975 	return TRUE;
3976 
3977     /* In a pattern, so maybe we have {n,m}. */
3978     if (*s == '{') {
3979 	if (regcurly(s, FALSE)) {
3980 	    return FALSE;
3981 	}
3982 	return TRUE;
3983     }
3984 
3985     /* On the other hand, maybe we have a character class */
3986 
3987     s++;
3988     if (*s == ']' || *s == '^')
3989 	return FALSE;
3990     else {
3991         /* this is terrifying, and it works */
3992 	int weight;
3993 	char seen[256];
3994 	const char * const send = strchr(s,']');
3995 	unsigned char un_char, last_un_char;
3996 	char tmpbuf[sizeof PL_tokenbuf * 4];
3997 
3998 	if (!send)		/* has to be an expression */
3999 	    return TRUE;
4000 	weight = 2;		/* let's weigh the evidence */
4001 
4002 	if (*s == '$')
4003 	    weight -= 3;
4004 	else if (isDIGIT(*s)) {
4005 	    if (s[1] != ']') {
4006 		if (isDIGIT(s[1]) && s[2] == ']')
4007 		    weight -= 10;
4008 	    }
4009 	    else
4010 		weight -= 100;
4011 	}
4012 	Zero(seen,256,char);
4013 	un_char = 255;
4014 	for (; s < send; s++) {
4015 	    last_un_char = un_char;
4016 	    un_char = (unsigned char)*s;
4017 	    switch (*s) {
4018 	    case '@':
4019 	    case '&':
4020 	    case '$':
4021 		weight -= seen[un_char] * 10;
4022 		if (isWORDCHAR_lazy_if(s+1,UTF)) {
4023 		    int len;
4024                     char *tmp = PL_bufend;
4025                     PL_bufend = (char*)send;
4026                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4027                     PL_bufend = tmp;
4028 		    len = (int)strlen(tmpbuf);
4029 		    if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4030                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4031 			weight -= 100;
4032 		    else
4033 			weight -= 10;
4034 		}
4035 		else if (*s == '$' && s[1] &&
4036 		  strchr("[#!%*<>()-=",s[1])) {
4037 		    if (/*{*/ strchr("])} =",s[2]))
4038 			weight -= 10;
4039 		    else
4040 			weight -= 1;
4041 		}
4042 		break;
4043 	    case '\\':
4044 		un_char = 254;
4045 		if (s[1]) {
4046 		    if (strchr("wds]",s[1]))
4047 			weight += 100;
4048 		    else if (seen[(U8)'\''] || seen[(U8)'"'])
4049 			weight += 1;
4050 		    else if (strchr("rnftbxcav",s[1]))
4051 			weight += 40;
4052 		    else if (isDIGIT(s[1])) {
4053 			weight += 40;
4054 			while (s[1] && isDIGIT(s[1]))
4055 			    s++;
4056 		    }
4057 		}
4058 		else
4059 		    weight += 100;
4060 		break;
4061 	    case '-':
4062 		if (s[1] == '\\')
4063 		    weight += 50;
4064 		if (strchr("aA01! ",last_un_char))
4065 		    weight += 30;
4066 		if (strchr("zZ79~",s[1]))
4067 		    weight += 30;
4068 		if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4069 		    weight -= 5;	/* cope with negative subscript */
4070 		break;
4071 	    default:
4072 		if (!isWORDCHAR(last_un_char)
4073 		    && !(last_un_char == '$' || last_un_char == '@'
4074 			 || last_un_char == '&')
4075 		    && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4076 		    char *d = s;
4077 		    while (isALPHA(*s))
4078 			s++;
4079 		    if (keyword(d, s - d, 0))
4080 			weight -= 150;
4081 		}
4082 		if (un_char == last_un_char + 1)
4083 		    weight += 5;
4084 		weight -= seen[un_char];
4085 		break;
4086 	    }
4087 	    seen[un_char]++;
4088 	}
4089 	if (weight >= 0)	/* probably a character class */
4090 	    return FALSE;
4091     }
4092 
4093     return TRUE;
4094 }
4095 
4096 /*
4097  * S_intuit_method
4098  *
4099  * Does all the checking to disambiguate
4100  *   foo bar
4101  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4102  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4103  *
4104  * First argument is the stuff after the first token, e.g. "bar".
4105  *
4106  * Not a method if foo is a filehandle.
4107  * Not a method if foo is a subroutine prototyped to take a filehandle.
4108  * Not a method if it's really "Foo $bar"
4109  * Method if it's "foo $bar"
4110  * Not a method if it's really "print foo $bar"
4111  * Method if it's really "foo package::" (interpreted as package->foo)
4112  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4113  * Not a method if bar is a filehandle or package, but is quoted with
4114  *   =>
4115  */
4116 
4117 STATIC int
4118 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
4119 {
4120     dVAR;
4121     char *s = start + (*start == '$');
4122     char tmpbuf[sizeof PL_tokenbuf];
4123     STRLEN len;
4124     GV* indirgv;
4125 #ifdef PERL_MAD
4126     int soff;
4127 #endif
4128 
4129     PERL_ARGS_ASSERT_INTUIT_METHOD;
4130 
4131     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4132 	    return 0;
4133     if (cv && SvPOK(cv)) {
4134 	const char *proto = CvPROTO(cv);
4135 	if (proto) {
4136 	    while (*proto && (isSPACE(*proto) || *proto == ';'))
4137 		proto++;
4138 	    if (*proto == '*')
4139 		return 0;
4140 	}
4141     }
4142 
4143     if (*start == '$') {
4144 	if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
4145 		isUPPER(*PL_tokenbuf))
4146 	    return 0;
4147 #ifdef PERL_MAD
4148 	len = start - SvPVX(PL_linestr);
4149 #endif
4150 	s = PEEKSPACE(s);
4151 #ifdef PERL_MAD
4152 	start = SvPVX(PL_linestr) + len;
4153 #endif
4154 	PL_bufptr = start;
4155 	PL_expect = XREF;
4156 	return *s == '(' ? FUNCMETH : METHOD;
4157     }
4158 
4159     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4160     /* start is the beginning of the possible filehandle/object,
4161      * and s is the end of it
4162      * tmpbuf is a copy of it (but with single quotes as double colons)
4163      */
4164 
4165     if (!keyword(tmpbuf, len, 0)) {
4166 	if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4167 	    len -= 2;
4168 	    tmpbuf[len] = '\0';
4169 #ifdef PERL_MAD
4170 	    soff = s - SvPVX(PL_linestr);
4171 #endif
4172 	    goto bare_package;
4173 	}
4174 	indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4175 	if (indirgv && GvCVu(indirgv))
4176 	    return 0;
4177 	/* filehandle or package name makes it a method */
4178 	if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4179 #ifdef PERL_MAD
4180 	    soff = s - SvPVX(PL_linestr);
4181 #endif
4182 	    s = PEEKSPACE(s);
4183 	    if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4184 		return 0;	/* no assumptions -- "=>" quotes bareword */
4185       bare_package:
4186 	    start_force(PL_curforce);
4187 	    NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4188 						  S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4189 	    NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4190 	    if (PL_madskills)
4191 		curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4192                                                             ( UTF ? SVf_UTF8 : 0 )));
4193 	    PL_expect = XTERM;
4194 	    force_next(WORD);
4195 	    PL_bufptr = s;
4196 #ifdef PERL_MAD
4197 	    PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4198 #endif
4199 	    return *s == '(' ? FUNCMETH : METHOD;
4200 	}
4201     }
4202     return 0;
4203 }
4204 
4205 /* Encoded script support. filter_add() effectively inserts a
4206  * 'pre-processing' function into the current source input stream.
4207  * Note that the filter function only applies to the current source file
4208  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4209  *
4210  * The datasv parameter (which may be NULL) can be used to pass
4211  * private data to this instance of the filter. The filter function
4212  * can recover the SV using the FILTER_DATA macro and use it to
4213  * store private buffers and state information.
4214  *
4215  * The supplied datasv parameter is upgraded to a PVIO type
4216  * and the IoDIRP/IoANY field is used to store the function pointer,
4217  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4218  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4219  * private use must be set using malloc'd pointers.
4220  */
4221 
4222 SV *
4223 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4224 {
4225     dVAR;
4226     if (!funcp)
4227 	return NULL;
4228 
4229     if (!PL_parser)
4230 	return NULL;
4231 
4232     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4233 	Perl_croak(aTHX_ "Source filters apply only to byte streams");
4234 
4235     if (!PL_rsfp_filters)
4236 	PL_rsfp_filters = newAV();
4237     if (!datasv)
4238 	datasv = newSV(0);
4239     SvUPGRADE(datasv, SVt_PVIO);
4240     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4241     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4242     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4243 			  FPTR2DPTR(void *, IoANY(datasv)),
4244 			  SvPV_nolen(datasv)));
4245     av_unshift(PL_rsfp_filters, 1);
4246     av_store(PL_rsfp_filters, 0, datasv) ;
4247     if (
4248 	!PL_parser->filtered
4249      && PL_parser->lex_flags & LEX_EVALBYTES
4250      && PL_bufptr < PL_bufend
4251     ) {
4252 	const char *s = PL_bufptr;
4253 	while (s < PL_bufend) {
4254 	    if (*s == '\n') {
4255 		SV *linestr = PL_parser->linestr;
4256 		char *buf = SvPVX(linestr);
4257 		STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4258 		STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4259 		STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4260 		STRLEN const linestart_pos = PL_parser->linestart - buf;
4261 		STRLEN const last_uni_pos =
4262 		    PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4263 		STRLEN const last_lop_pos =
4264 		    PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4265 		av_push(PL_rsfp_filters, linestr);
4266 		PL_parser->linestr =
4267 		    newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4268 		buf = SvPVX(PL_parser->linestr);
4269 		PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4270 		PL_parser->bufptr = buf + bufptr_pos;
4271 		PL_parser->oldbufptr = buf + oldbufptr_pos;
4272 		PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4273 		PL_parser->linestart = buf + linestart_pos;
4274 		if (PL_parser->last_uni)
4275 		    PL_parser->last_uni = buf + last_uni_pos;
4276 		if (PL_parser->last_lop)
4277 		    PL_parser->last_lop = buf + last_lop_pos;
4278 		SvLEN(linestr) = SvCUR(linestr);
4279 		SvCUR(linestr) = s-SvPVX(linestr);
4280 		PL_parser->filtered = 1;
4281 		break;
4282 	    }
4283 	    s++;
4284 	}
4285     }
4286     return(datasv);
4287 }
4288 
4289 
4290 /* Delete most recently added instance of this filter function.	*/
4291 void
4292 Perl_filter_del(pTHX_ filter_t funcp)
4293 {
4294     dVAR;
4295     SV *datasv;
4296 
4297     PERL_ARGS_ASSERT_FILTER_DEL;
4298 
4299 #ifdef DEBUGGING
4300     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4301 			  FPTR2DPTR(void*, funcp)));
4302 #endif
4303     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4304 	return;
4305     /* if filter is on top of stack (usual case) just pop it off */
4306     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4307     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4308 	sv_free(av_pop(PL_rsfp_filters));
4309 
4310         return;
4311     }
4312     /* we need to search for the correct entry and clear it	*/
4313     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4314 }
4315 
4316 
4317 /* Invoke the idxth filter function for the current rsfp.	 */
4318 /* maxlen 0 = read one text line */
4319 I32
4320 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4321 {
4322     dVAR;
4323     filter_t funcp;
4324     SV *datasv = NULL;
4325     /* This API is bad. It should have been using unsigned int for maxlen.
4326        Not sure if we want to change the API, but if not we should sanity
4327        check the value here.  */
4328     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4329 
4330     PERL_ARGS_ASSERT_FILTER_READ;
4331 
4332     if (!PL_parser || !PL_rsfp_filters)
4333 	return -1;
4334     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?	*/
4335 	/* Provide a default input filter to make life easy.	*/
4336 	/* Note that we append to the line. This is handy.	*/
4337 	DEBUG_P(PerlIO_printf(Perl_debug_log,
4338 			      "filter_read %d: from rsfp\n", idx));
4339 	if (correct_length) {
4340  	    /* Want a block */
4341 	    int len ;
4342 	    const int old_len = SvCUR(buf_sv);
4343 
4344 	    /* ensure buf_sv is large enough */
4345 	    SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4346 	    if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4347 				   correct_length)) <= 0) {
4348 		if (PerlIO_error(PL_rsfp))
4349 	            return -1;		/* error */
4350 	        else
4351 		    return 0 ;		/* end of file */
4352 	    }
4353 	    SvCUR_set(buf_sv, old_len + len) ;
4354 	    SvPVX(buf_sv)[old_len + len] = '\0';
4355 	} else {
4356 	    /* Want a line */
4357             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4358 		if (PerlIO_error(PL_rsfp))
4359 	            return -1;		/* error */
4360 	        else
4361 		    return 0 ;		/* end of file */
4362 	    }
4363 	}
4364 	return SvCUR(buf_sv);
4365     }
4366     /* Skip this filter slot if filter has been deleted	*/
4367     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4368 	DEBUG_P(PerlIO_printf(Perl_debug_log,
4369 			      "filter_read %d: skipped (filter deleted)\n",
4370 			      idx));
4371 	return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4372     }
4373     if (SvTYPE(datasv) != SVt_PVIO) {
4374 	if (correct_length) {
4375  	    /* Want a block */
4376 	    const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4377 	    if (!remainder) return 0; /* eof */
4378 	    if (correct_length > remainder) correct_length = remainder;
4379 	    sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4380 	    SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4381 	} else {
4382 	    /* Want a line */
4383 	    const char *s = SvEND(datasv);
4384 	    const char *send = SvPVX(datasv) + SvLEN(datasv);
4385 	    while (s < send) {
4386 		if (*s == '\n') {
4387 		    s++;
4388 		    break;
4389 		}
4390 		s++;
4391 	    }
4392 	    if (s == send) return 0; /* eof */
4393 	    sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4394 	    SvCUR_set(datasv, s-SvPVX(datasv));
4395 	}
4396 	return SvCUR(buf_sv);
4397     }
4398     /* Get function pointer hidden within datasv	*/
4399     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4400     DEBUG_P(PerlIO_printf(Perl_debug_log,
4401 			  "filter_read %d: via function %p (%s)\n",
4402 			  idx, (void*)datasv, SvPV_nolen_const(datasv)));
4403     /* Call function. The function is expected to 	*/
4404     /* call "FILTER_READ(idx+1, buf_sv)" first.		*/
4405     /* Return: <0:error, =0:eof, >0:not eof 		*/
4406     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4407 }
4408 
4409 STATIC char *
4410 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4411 {
4412     dVAR;
4413 
4414     PERL_ARGS_ASSERT_FILTER_GETS;
4415 
4416 #ifdef PERL_CR_FILTER
4417     if (!PL_rsfp_filters) {
4418 	filter_add(S_cr_textfilter,NULL);
4419     }
4420 #endif
4421     if (PL_rsfp_filters) {
4422 	if (!append)
4423             SvCUR_set(sv, 0);	/* start with empty line	*/
4424         if (FILTER_READ(0, sv, 0) > 0)
4425             return ( SvPVX(sv) ) ;
4426         else
4427 	    return NULL ;
4428     }
4429     else
4430         return (sv_gets(sv, PL_rsfp, append));
4431 }
4432 
4433 STATIC HV *
4434 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4435 {
4436     dVAR;
4437     GV *gv;
4438 
4439     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4440 
4441     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4442         return PL_curstash;
4443 
4444     if (len > 2 &&
4445         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4446         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4447     {
4448         return GvHV(gv);			/* Foo:: */
4449     }
4450 
4451     /* use constant CLASS => 'MyClass' */
4452     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4453     if (gv && GvCV(gv)) {
4454 	SV * const sv = cv_const_sv(GvCV(gv));
4455 	if (sv)
4456             pkgname = SvPV_const(sv, len);
4457     }
4458 
4459     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4460 }
4461 
4462 #ifdef PERL_MAD
4463  /*
4464  * Perl_madlex
4465  * The intent of this yylex wrapper is to minimize the changes to the
4466  * tokener when we aren't interested in collecting madprops.  It remains
4467  * to be seen how successful this strategy will be...
4468  */
4469 
4470 int
4471 Perl_madlex(pTHX)
4472 {
4473     int optype;
4474     char *s = PL_bufptr;
4475 
4476     /* make sure PL_thiswhite is initialized */
4477     PL_thiswhite = 0;
4478     PL_thismad = 0;
4479 
4480     /* previous token ate up our whitespace? */
4481     if (!PL_lasttoke && PL_nextwhite) {
4482 	PL_thiswhite = PL_nextwhite;
4483 	PL_nextwhite = 0;
4484     }
4485 
4486     /* isolate the token, and figure out where it is without whitespace */
4487     PL_realtokenstart = -1;
4488     PL_thistoken = 0;
4489     optype = yylex();
4490     s = PL_bufptr;
4491     assert(PL_curforce < 0);
4492 
4493     if (!PL_thismad || PL_thismad->mad_key == '^') {	/* not forced already? */
4494 	if (!PL_thistoken) {
4495 	    if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4496 		PL_thistoken = newSVpvs("");
4497 	    else {
4498 		char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4499 		PL_thistoken = newSVpvn(tstart, s - tstart);
4500 	    }
4501 	}
4502 	if (PL_thismad)	/* install head */
4503 	    CURMAD('X', PL_thistoken);
4504     }
4505 
4506     /* last whitespace of a sublex? */
4507     if (optype == ')' && PL_endwhite) {
4508 	CURMAD('X', PL_endwhite);
4509     }
4510 
4511     if (!PL_thismad) {
4512 
4513 	/* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4514 	if (!PL_thiswhite && !PL_endwhite && !optype) {
4515 	    sv_free(PL_thistoken);
4516 	    PL_thistoken = 0;
4517 	    return 0;
4518 	}
4519 
4520 	/* put off final whitespace till peg */
4521 	if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4522 	    PL_nextwhite = PL_thiswhite;
4523 	    PL_thiswhite = 0;
4524 	}
4525 	else if (PL_thisopen) {
4526 	    CURMAD('q', PL_thisopen);
4527 	    if (PL_thistoken)
4528 		sv_free(PL_thistoken);
4529 	    PL_thistoken = 0;
4530 	}
4531 	else {
4532 	    /* Store actual token text as madprop X */
4533 	    CURMAD('X', PL_thistoken);
4534 	}
4535 
4536 	if (PL_thiswhite) {
4537 	    /* add preceding whitespace as madprop _ */
4538 	    CURMAD('_', PL_thiswhite);
4539 	}
4540 
4541 	if (PL_thisstuff) {
4542 	    /* add quoted material as madprop = */
4543 	    CURMAD('=', PL_thisstuff);
4544 	}
4545 
4546 	if (PL_thisclose) {
4547 	    /* add terminating quote as madprop Q */
4548 	    CURMAD('Q', PL_thisclose);
4549 	}
4550     }
4551 
4552     /* special processing based on optype */
4553 
4554     switch (optype) {
4555 
4556     /* opval doesn't need a TOKEN since it can already store mp */
4557     case WORD:
4558     case METHOD:
4559     case FUNCMETH:
4560     case THING:
4561     case PMFUNC:
4562     case PRIVATEREF:
4563     case FUNC0SUB:
4564     case UNIOPSUB:
4565     case LSTOPSUB:
4566 	if (pl_yylval.opval)
4567 	    append_madprops(PL_thismad, pl_yylval.opval, 0);
4568 	PL_thismad = 0;
4569 	return optype;
4570 
4571     /* fake EOF */
4572     case 0:
4573 	optype = PEG;
4574 	if (PL_endwhite) {
4575 	    addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4576 	    PL_endwhite = 0;
4577 	}
4578 	break;
4579 
4580     /* pval */
4581     case LABEL:
4582 	break;
4583 
4584     case ']':
4585     case '}':
4586 	if (PL_faketokens)
4587 	    break;
4588 	/* remember any fake bracket that lexer is about to discard */
4589 	if (PL_lex_brackets == 1 &&
4590 	    ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4591 	{
4592 	    s = PL_bufptr;
4593 	    while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4594 		s++;
4595 	    if (*s == '}') {
4596 		PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4597 		addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4598 		PL_thiswhite = 0;
4599 		PL_bufptr = s - 1;
4600 		break;	/* don't bother looking for trailing comment */
4601 	    }
4602 	    else
4603 		s = PL_bufptr;
4604 	}
4605 	if (optype == ']')
4606 	    break;
4607 	/* FALLTHROUGH */
4608 
4609     /* attach a trailing comment to its statement instead of next token */
4610     case ';':
4611 	if (PL_faketokens)
4612 	    break;
4613 	if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4614 	    s = PL_bufptr;
4615 	    while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4616 		s++;
4617 	    if (*s == '\n' || *s == '#') {
4618 		while (s < PL_bufend && *s != '\n')
4619 		    s++;
4620 		if (s < PL_bufend)
4621 		    s++;
4622 		PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4623 		addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4624 		PL_thiswhite = 0;
4625 		PL_bufptr = s;
4626 	    }
4627 	}
4628 	break;
4629 
4630     /* ival */
4631     default:
4632 	break;
4633 
4634     }
4635 
4636     /* Create new token struct.  Note: opvals return early above. */
4637     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4638     PL_thismad = 0;
4639     return optype;
4640 }
4641 #endif
4642 
4643 STATIC char *
4644 S_tokenize_use(pTHX_ int is_use, char *s) {
4645     dVAR;
4646 
4647     PERL_ARGS_ASSERT_TOKENIZE_USE;
4648 
4649     if (PL_expect != XSTATE)
4650 	yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4651 		    is_use ? "use" : "no"));
4652     PL_expect = XTERM;
4653     s = SKIPSPACE1(s);
4654     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4655 	s = force_version(s, TRUE);
4656 	if (*s == ';' || *s == '}'
4657 		|| (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4658 	    start_force(PL_curforce);
4659 	    NEXTVAL_NEXTTOKE.opval = NULL;
4660 	    force_next(WORD);
4661 	}
4662 	else if (*s == 'v') {
4663 	    s = force_word(s,WORD,FALSE,TRUE);
4664 	    s = force_version(s, FALSE);
4665 	}
4666     }
4667     else {
4668 	s = force_word(s,WORD,FALSE,TRUE);
4669 	s = force_version(s, FALSE);
4670     }
4671     pl_yylval.ival = is_use;
4672     return s;
4673 }
4674 #ifdef DEBUGGING
4675     static const char* const exp_name[] =
4676 	{ "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4677 	  "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR"
4678 	};
4679 #endif
4680 
4681 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4682 STATIC bool
4683 S_word_takes_any_delimeter(char *p, STRLEN len)
4684 {
4685     return (len == 1 && strchr("msyq", p[0])) ||
4686 	   (len == 2 && (
4687 	    (p[0] == 't' && p[1] == 'r') ||
4688 	    (p[0] == 'q' && strchr("qwxr", p[1]))));
4689 }
4690 
4691 static void
4692 S_check_scalar_slice(pTHX_ char *s)
4693 {
4694     s++;
4695     while (*s == ' ' || *s == '\t') s++;
4696     if (*s == 'q' && s[1] == 'w'
4697      && !isWORDCHAR_lazy_if(s+2,UTF))
4698 	return;
4699     while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4700 	s += UTF ? UTF8SKIP(s) : 1;
4701     if (*s == '}' || *s == ']')
4702 	pl_yylval.ival = OPpSLICEWARNING;
4703 }
4704 
4705 /*
4706   yylex
4707 
4708   Works out what to call the token just pulled out of the input
4709   stream.  The yacc parser takes care of taking the ops we return and
4710   stitching them into a tree.
4711 
4712   Returns:
4713     The type of the next token
4714 
4715   Structure:
4716       Switch based on the current state:
4717 	  - if we already built the token before, use it
4718 	  - if we have a case modifier in a string, deal with that
4719 	  - handle other cases of interpolation inside a string
4720 	  - scan the next line if we are inside a format
4721       In the normal state switch on the next character:
4722 	  - default:
4723 	    if alphabetic, go to key lookup
4724 	    unrecoginized character - croak
4725 	  - 0/4/26: handle end-of-line or EOF
4726 	  - cases for whitespace
4727 	  - \n and #: handle comments and line numbers
4728 	  - various operators, brackets and sigils
4729 	  - numbers
4730 	  - quotes
4731 	  - 'v': vstrings (or go to key lookup)
4732 	  - 'x' repetition operator (or go to key lookup)
4733 	  - other ASCII alphanumerics (key lookup begins here):
4734 	      word before => ?
4735 	      keyword plugin
4736 	      scan built-in keyword (but do nothing with it yet)
4737 	      check for statement label
4738 	      check for lexical subs
4739 		  goto just_a_word if there is one
4740 	      see whether built-in keyword is overridden
4741 	      switch on keyword number:
4742 		  - default: just_a_word:
4743 		      not a built-in keyword; handle bareword lookup
4744 		      disambiguate between method and sub call
4745 		      fall back to bareword
4746 		  - cases for built-in keywords
4747 */
4748 
4749 
4750 int
4751 Perl_yylex(pTHX)
4752 {
4753     dVAR;
4754     char *s = PL_bufptr;
4755     char *d;
4756     STRLEN len;
4757     bool bof = FALSE;
4758     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4759     U8 formbrack = 0;
4760     U32 fake_eof = 0;
4761 
4762     /* orig_keyword, gvp, and gv are initialized here because
4763      * jump to the label just_a_word_zero can bypass their
4764      * initialization later. */
4765     I32 orig_keyword = 0;
4766     GV *gv = NULL;
4767     GV **gvp = NULL;
4768 
4769     DEBUG_T( {
4770 	SV* tmp = newSVpvs("");
4771 	PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4772 	    (IV)CopLINE(PL_curcop),
4773 	    lex_state_names[PL_lex_state],
4774 	    exp_name[PL_expect],
4775 	    pv_display(tmp, s, strlen(s), 0, 60));
4776 	SvREFCNT_dec(tmp);
4777     } );
4778 
4779     switch (PL_lex_state) {
4780     case LEX_NORMAL:
4781     case LEX_INTERPNORMAL:
4782 	break;
4783 
4784     /* when we've already built the next token, just pull it out of the queue */
4785     case LEX_KNOWNEXT:
4786 #ifdef PERL_MAD
4787 	PL_lasttoke--;
4788 	pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4789 	if (PL_madskills) {
4790 	    PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4791 	    PL_nexttoke[PL_lasttoke].next_mad = 0;
4792 	    if (PL_thismad && PL_thismad->mad_key == '_') {
4793 		PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4794 		PL_thismad->mad_val = 0;
4795 		mad_free(PL_thismad);
4796 		PL_thismad = 0;
4797 	    }
4798 	}
4799 	if (!PL_lasttoke) {
4800 	    PL_lex_state = PL_lex_defer;
4801   	    PL_expect = PL_lex_expect;
4802   	    PL_lex_defer = LEX_NORMAL;
4803 	    if (!PL_nexttoke[PL_lasttoke].next_type)
4804 		return yylex();
4805   	}
4806 #else
4807 	PL_nexttoke--;
4808 	pl_yylval = PL_nextval[PL_nexttoke];
4809 	if (!PL_nexttoke) {
4810 	    PL_lex_state = PL_lex_defer;
4811 	    PL_expect = PL_lex_expect;
4812 	    PL_lex_defer = LEX_NORMAL;
4813 	}
4814 #endif
4815 	{
4816 	    I32 next_type;
4817 #ifdef PERL_MAD
4818 	    next_type = PL_nexttoke[PL_lasttoke].next_type;
4819 #else
4820 	    next_type = PL_nexttype[PL_nexttoke];
4821 #endif
4822 	    if (next_type & (7<<24)) {
4823 		if (next_type & (1<<24)) {
4824 		    if (PL_lex_brackets > 100)
4825 			Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4826 		    PL_lex_brackstack[PL_lex_brackets++] =
4827 			(char) ((next_type >> 16) & 0xff);
4828 		}
4829 		if (next_type & (2<<24))
4830 		    PL_lex_allbrackets++;
4831 		if (next_type & (4<<24))
4832 		    PL_lex_allbrackets--;
4833 		next_type &= 0xffff;
4834 	    }
4835 	    return REPORT(next_type == 'p' ? pending_ident() : next_type);
4836 	}
4837 
4838     /* interpolated case modifiers like \L \U, including \Q and \E.
4839        when we get here, PL_bufptr is at the \
4840     */
4841     case LEX_INTERPCASEMOD:
4842 #ifdef DEBUGGING
4843 	if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4844 	    Perl_croak(aTHX_
4845 		       "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4846 		       PL_bufptr, PL_bufend, *PL_bufptr);
4847 #endif
4848 	/* handle \E or end of string */
4849        	if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4850 	    /* if at a \E */
4851 	    if (PL_lex_casemods) {
4852 		const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4853 		PL_lex_casestack[PL_lex_casemods] = '\0';
4854 
4855 		if (PL_bufptr != PL_bufend
4856 		    && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4857                         || oldmod == 'F')) {
4858 		    PL_bufptr += 2;
4859 		    PL_lex_state = LEX_INTERPCONCAT;
4860 #ifdef PERL_MAD
4861 		    if (PL_madskills)
4862 			PL_thistoken = newSVpvs("\\E");
4863 #endif
4864 		}
4865 		PL_lex_allbrackets--;
4866 		return REPORT(')');
4867 	    }
4868             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4869                /* Got an unpaired \E */
4870                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4871                         "Useless use of \\E");
4872             }
4873 #ifdef PERL_MAD
4874 	    while (PL_bufptr != PL_bufend &&
4875 	      PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4876 		if (PL_madskills) {
4877 		  if (!PL_thiswhite)
4878 		    PL_thiswhite = newSVpvs("");
4879 		  sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4880 		}
4881 		PL_bufptr += 2;
4882 	    }
4883 #else
4884 	    if (PL_bufptr != PL_bufend)
4885 		PL_bufptr += 2;
4886 #endif
4887 	    PL_lex_state = LEX_INTERPCONCAT;
4888 	    return yylex();
4889 	}
4890 	else {
4891 	    DEBUG_T({ PerlIO_printf(Perl_debug_log,
4892               "### Saw case modifier\n"); });
4893 	    s = PL_bufptr + 1;
4894 	    if (s[1] == '\\' && s[2] == 'E') {
4895 #ifdef PERL_MAD
4896 		if (PL_madskills) {
4897 		  if (!PL_thiswhite)
4898 		    PL_thiswhite = newSVpvs("");
4899 		  sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4900 		}
4901 #endif
4902 	        PL_bufptr = s + 3;
4903 		PL_lex_state = LEX_INTERPCONCAT;
4904 		return yylex();
4905 	    }
4906 	    else {
4907 		I32 tmp;
4908 		if (!PL_madskills) /* when just compiling don't need correct */
4909 		    if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4910 			tmp = *s, *s = s[2], s[2] = (char)tmp;	/* misordered... */
4911 		if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4912 		    (strchr(PL_lex_casestack, 'L')
4913                         || strchr(PL_lex_casestack, 'U')
4914                         || strchr(PL_lex_casestack, 'F'))) {
4915 		    PL_lex_casestack[--PL_lex_casemods] = '\0';
4916 		    PL_lex_allbrackets--;
4917 		    return REPORT(')');
4918 		}
4919 		if (PL_lex_casemods > 10)
4920 		    Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4921 		PL_lex_casestack[PL_lex_casemods++] = *s;
4922 		PL_lex_casestack[PL_lex_casemods] = '\0';
4923 		PL_lex_state = LEX_INTERPCONCAT;
4924 		start_force(PL_curforce);
4925 		NEXTVAL_NEXTTOKE.ival = 0;
4926 		force_next((2<<24)|'(');
4927 		start_force(PL_curforce);
4928 		if (*s == 'l')
4929 		    NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4930 		else if (*s == 'u')
4931 		    NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4932 		else if (*s == 'L')
4933 		    NEXTVAL_NEXTTOKE.ival = OP_LC;
4934 		else if (*s == 'U')
4935 		    NEXTVAL_NEXTTOKE.ival = OP_UC;
4936 		else if (*s == 'Q')
4937 		    NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4938                 else if (*s == 'F')
4939 		    NEXTVAL_NEXTTOKE.ival = OP_FC;
4940 		else
4941 		    Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4942 		if (PL_madskills) {
4943 		    SV* const tmpsv = newSVpvs("\\ ");
4944 		    /* replace the space with the character we want to escape
4945 		     */
4946 		    SvPVX(tmpsv)[1] = *s;
4947 		    curmad('_', tmpsv);
4948 		}
4949 		PL_bufptr = s + 1;
4950 	    }
4951 	    force_next(FUNC);
4952 	    if (PL_lex_starts) {
4953 		s = PL_bufptr;
4954 		PL_lex_starts = 0;
4955 #ifdef PERL_MAD
4956 		if (PL_madskills) {
4957 		    if (PL_thistoken)
4958 			sv_free(PL_thistoken);
4959 		    PL_thistoken = newSVpvs("");
4960 		}
4961 #endif
4962 		/* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4963 		if (PL_lex_casemods == 1 && PL_lex_inpat)
4964 		    OPERATOR(',');
4965 		else
4966 		    Aop(OP_CONCAT);
4967 	    }
4968 	    else
4969 		return yylex();
4970 	}
4971 
4972     case LEX_INTERPPUSH:
4973         return REPORT(sublex_push());
4974 
4975     case LEX_INTERPSTART:
4976 	if (PL_bufptr == PL_bufend)
4977 	    return REPORT(sublex_done());
4978 	DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4979               "### Interpolated variable\n"); });
4980 	PL_expect = XTERM;
4981         /* for /@a/, we leave the joining for the regex engine to do
4982          * (unless we're within \Q etc) */
4983 	PL_lex_dojoin = (*PL_bufptr == '@'
4984                             && (!PL_lex_inpat || PL_lex_casemods));
4985 	PL_lex_state = LEX_INTERPNORMAL;
4986 	if (PL_lex_dojoin) {
4987 	    start_force(PL_curforce);
4988 	    NEXTVAL_NEXTTOKE.ival = 0;
4989 	    force_next(',');
4990 	    start_force(PL_curforce);
4991 	    force_ident("\"", '$');
4992 	    start_force(PL_curforce);
4993 	    NEXTVAL_NEXTTOKE.ival = 0;
4994 	    force_next('$');
4995 	    start_force(PL_curforce);
4996 	    NEXTVAL_NEXTTOKE.ival = 0;
4997 	    force_next((2<<24)|'(');
4998 	    start_force(PL_curforce);
4999 	    NEXTVAL_NEXTTOKE.ival = OP_JOIN;	/* emulate join($", ...) */
5000 	    force_next(FUNC);
5001 	}
5002 	/* Convert (?{...}) and friends to 'do {...}' */
5003 	if (PL_lex_inpat && *PL_bufptr == '(') {
5004 	    PL_parser->lex_shared->re_eval_start = PL_bufptr;
5005 	    PL_bufptr += 2;
5006 	    if (*PL_bufptr != '{')
5007 		PL_bufptr++;
5008 	    start_force(PL_curforce);
5009 	    /* XXX probably need a CURMAD(something) here */
5010 	    PL_expect = XTERMBLOCK;
5011 	    force_next(DO);
5012 	}
5013 
5014 	if (PL_lex_starts++) {
5015 	    s = PL_bufptr;
5016 #ifdef PERL_MAD
5017 	    if (PL_madskills) {
5018 		if (PL_thistoken)
5019 		    sv_free(PL_thistoken);
5020 		PL_thistoken = newSVpvs("");
5021 	    }
5022 #endif
5023 	    /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5024 	    if (!PL_lex_casemods && PL_lex_inpat)
5025 		OPERATOR(',');
5026 	    else
5027 		Aop(OP_CONCAT);
5028 	}
5029 	return yylex();
5030 
5031     case LEX_INTERPENDMAYBE:
5032 	if (intuit_more(PL_bufptr)) {
5033 	    PL_lex_state = LEX_INTERPNORMAL;	/* false alarm, more expr */
5034 	    break;
5035 	}
5036 	/* FALL THROUGH */
5037 
5038     case LEX_INTERPEND:
5039 	if (PL_lex_dojoin) {
5040 	    const U8 dojoin_was = PL_lex_dojoin;
5041 	    PL_lex_dojoin = FALSE;
5042 	    PL_lex_state = LEX_INTERPCONCAT;
5043 #ifdef PERL_MAD
5044 	    if (PL_madskills) {
5045 		if (PL_thistoken)
5046 		    sv_free(PL_thistoken);
5047 		PL_thistoken = newSVpvs("");
5048 	    }
5049 #endif
5050 	    PL_lex_allbrackets--;
5051 	    return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
5052 	}
5053 	if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5054 	    && SvEVALED(PL_lex_repl))
5055 	{
5056 	    if (PL_bufptr != PL_bufend)
5057 		Perl_croak(aTHX_ "Bad evalled substitution pattern");
5058 	    PL_lex_repl = NULL;
5059 	}
5060 	/* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
5061 	   re_eval_str.  If the here-doc body’s length equals the previous
5062 	   value of re_eval_start, re_eval_start will now be null.  So
5063 	   check re_eval_str as well. */
5064 	if (PL_parser->lex_shared->re_eval_start
5065 	 || PL_parser->lex_shared->re_eval_str) {
5066 	    SV *sv;
5067 	    if (*PL_bufptr != ')')
5068 		Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5069 	    PL_bufptr++;
5070 	    /* having compiled a (?{..}) expression, return the original
5071 	     * text too, as a const */
5072 	    if (PL_parser->lex_shared->re_eval_str) {
5073 		sv = PL_parser->lex_shared->re_eval_str;
5074 		PL_parser->lex_shared->re_eval_str = NULL;
5075 		SvCUR_set(sv,
5076 			 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5077 		SvPV_shrink_to_cur(sv);
5078 	    }
5079 	    else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5080 			 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5081 	    start_force(PL_curforce);
5082 	    /* XXX probably need a CURMAD(something) here */
5083 	    NEXTVAL_NEXTTOKE.opval =
5084 		    (OP*)newSVOP(OP_CONST, 0,
5085 				 sv);
5086 	    force_next(THING);
5087 	    PL_parser->lex_shared->re_eval_start = NULL;
5088 	    PL_expect = XTERM;
5089 	    return REPORT(',');
5090 	}
5091 
5092 	/* FALLTHROUGH */
5093     case LEX_INTERPCONCAT:
5094 #ifdef DEBUGGING
5095 	if (PL_lex_brackets)
5096 	    Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5097 		       (long) PL_lex_brackets);
5098 #endif
5099 	if (PL_bufptr == PL_bufend)
5100 	    return REPORT(sublex_done());
5101 
5102 	/* m'foo' still needs to be parsed for possible (?{...}) */
5103 	if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5104 	    SV *sv = newSVsv(PL_linestr);
5105 	    sv = tokeq(sv);
5106 	    pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5107 	    s = PL_bufend;
5108 	}
5109 	else {
5110 	    s = scan_const(PL_bufptr);
5111 	    if (*s == '\\')
5112 		PL_lex_state = LEX_INTERPCASEMOD;
5113 	    else
5114 		PL_lex_state = LEX_INTERPSTART;
5115 	}
5116 
5117 	if (s != PL_bufptr) {
5118 	    start_force(PL_curforce);
5119 	    if (PL_madskills) {
5120 		curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5121 	    }
5122 	    NEXTVAL_NEXTTOKE = pl_yylval;
5123 	    PL_expect = XTERM;
5124 	    force_next(THING);
5125 	    if (PL_lex_starts++) {
5126 #ifdef PERL_MAD
5127 		if (PL_madskills) {
5128 		    if (PL_thistoken)
5129 			sv_free(PL_thistoken);
5130 		    PL_thistoken = newSVpvs("");
5131 		}
5132 #endif
5133 		/* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5134 		if (!PL_lex_casemods && PL_lex_inpat)
5135 		    OPERATOR(',');
5136 		else
5137 		    Aop(OP_CONCAT);
5138 	    }
5139 	    else {
5140 		PL_bufptr = s;
5141 		return yylex();
5142 	    }
5143 	}
5144 
5145 	return yylex();
5146     case LEX_FORMLINE:
5147 	s = scan_formline(PL_bufptr);
5148 	if (!PL_lex_formbrack)
5149 	{
5150 	    formbrack = 1;
5151 	    goto rightbracket;
5152 	}
5153 	PL_bufptr = s;
5154 	return yylex();
5155     }
5156 
5157     /* We really do *not* want PL_linestr ever becoming a COW. */
5158     assert (!SvIsCOW(PL_linestr));
5159     s = PL_bufptr;
5160     PL_oldoldbufptr = PL_oldbufptr;
5161     PL_oldbufptr = s;
5162     PL_parser->saw_infix_sigil = 0;
5163 
5164   retry:
5165 #ifdef PERL_MAD
5166     if (PL_thistoken) {
5167 	sv_free(PL_thistoken);
5168 	PL_thistoken = 0;
5169     }
5170     PL_realtokenstart = s - SvPVX(PL_linestr);	/* assume but undo on ws */
5171 #endif
5172     switch (*s) {
5173     default:
5174 	if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
5175 	    goto keylookup;
5176 	{
5177         SV *dsv = newSVpvs_flags("", SVs_TEMP);
5178         const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
5179                                                     UTF8SKIP(s),
5180                                                     SVs_TEMP | SVf_UTF8),
5181                                             10, UNI_DISPLAY_ISPRINT)
5182                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5183         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5184         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5185             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5186         } else {
5187             d = PL_linestart;
5188         }
5189         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
5190                           UTF8fARG(UTF, (s - d), d),
5191                          (int) len + 1);
5192     }
5193     case 4:
5194     case 26:
5195 	goto fake_eof;			/* emulate EOF on ^D or ^Z */
5196     case 0:
5197 #ifdef PERL_MAD
5198 	if (PL_madskills)
5199 	    PL_faketokens = 0;
5200 #endif
5201 	if ((!PL_rsfp || PL_lex_inwhat)
5202 	 && (!PL_parser->filtered || s+1 < PL_bufend)) {
5203 	    PL_last_uni = 0;
5204 	    PL_last_lop = 0;
5205 	    if (PL_lex_brackets &&
5206 		    PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5207 		yyerror((const char *)
5208 			(PL_lex_formbrack
5209 			 ? "Format not terminated"
5210 			 : "Missing right curly or square bracket"));
5211 	    }
5212             DEBUG_T( { PerlIO_printf(Perl_debug_log,
5213                         "### Tokener got EOF\n");
5214             } );
5215 	    TOKEN(0);
5216 	}
5217 	if (s++ < PL_bufend)
5218 	    goto retry;			/* ignore stray nulls */
5219 	PL_last_uni = 0;
5220 	PL_last_lop = 0;
5221 	if (!PL_in_eval && !PL_preambled) {
5222 	    PL_preambled = TRUE;
5223 #ifdef PERL_MAD
5224 	    if (PL_madskills)
5225 		PL_faketokens = 1;
5226 #endif
5227 	    if (PL_perldb) {
5228 		/* Generate a string of Perl code to load the debugger.
5229 		 * If PERL5DB is set, it will return the contents of that,
5230 		 * otherwise a compile-time require of perl5db.pl.  */
5231 
5232 		const char * const pdb = PerlEnv_getenv("PERL5DB");
5233 
5234 		if (pdb) {
5235 		    sv_setpv(PL_linestr, pdb);
5236 		    sv_catpvs(PL_linestr,";");
5237 		} else {
5238 		    SETERRNO(0,SS_NORMAL);
5239 		    sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5240 		}
5241 		PL_parser->preambling = CopLINE(PL_curcop);
5242 	    } else
5243 		sv_setpvs(PL_linestr,"");
5244 	    if (PL_preambleav) {
5245 		SV **svp = AvARRAY(PL_preambleav);
5246 		SV **const end = svp + AvFILLp(PL_preambleav);
5247 		while(svp <= end) {
5248 		    sv_catsv(PL_linestr, *svp);
5249 		    ++svp;
5250 		    sv_catpvs(PL_linestr, ";");
5251 		}
5252 		sv_free(MUTABLE_SV(PL_preambleav));
5253 		PL_preambleav = NULL;
5254 	    }
5255 	    if (PL_minus_E)
5256 		sv_catpvs(PL_linestr,
5257 			  "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5258 	    if (PL_minus_n || PL_minus_p) {
5259 		sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5260 		if (PL_minus_l)
5261 		    sv_catpvs(PL_linestr,"chomp;");
5262 		if (PL_minus_a) {
5263 		    if (PL_minus_F) {
5264 			if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5265 			     || *PL_splitstr == '"')
5266 			      && strchr(PL_splitstr + 1, *PL_splitstr))
5267 			    Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5268 			else {
5269 			    /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5270 			       bytes can be used as quoting characters.  :-) */
5271 			    const char *splits = PL_splitstr;
5272 			    sv_catpvs(PL_linestr, "our @F=split(q\0");
5273 			    do {
5274 				/* Need to \ \s  */
5275 				if (*splits == '\\')
5276 				    sv_catpvn(PL_linestr, splits, 1);
5277 				sv_catpvn(PL_linestr, splits, 1);
5278 			    } while (*splits++);
5279 			    /* This loop will embed the trailing NUL of
5280 			       PL_linestr as the last thing it does before
5281 			       terminating.  */
5282 			    sv_catpvs(PL_linestr, ");");
5283 			}
5284 		    }
5285 		    else
5286 		        sv_catpvs(PL_linestr,"our @F=split(' ');");
5287 		}
5288 	    }
5289 	    sv_catpvs(PL_linestr, "\n");
5290 	    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5291 	    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5292 	    PL_last_lop = PL_last_uni = NULL;
5293 	    if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5294 		update_debugger_info(PL_linestr, NULL, 0);
5295 	    goto retry;
5296 	}
5297 	do {
5298 	    fake_eof = 0;
5299 	    bof = PL_rsfp ? TRUE : FALSE;
5300 	    if (0) {
5301 	      fake_eof:
5302 		fake_eof = LEX_FAKE_EOF;
5303 	    }
5304 	    PL_bufptr = PL_bufend;
5305 	    COPLINE_INC_WITH_HERELINES;
5306 	    if (!lex_next_chunk(fake_eof)) {
5307 		CopLINE_dec(PL_curcop);
5308 		s = PL_bufptr;
5309 		TOKEN(';');	/* not infinite loop because rsfp is NULL now */
5310 	    }
5311 	    CopLINE_dec(PL_curcop);
5312 #ifdef PERL_MAD
5313 	    if (!PL_rsfp)
5314 		PL_realtokenstart = -1;
5315 #endif
5316 	    s = PL_bufptr;
5317 	    /* If it looks like the start of a BOM or raw UTF-16,
5318 	     * check if it in fact is. */
5319 	    if (bof && PL_rsfp &&
5320 		     (*s == 0 ||
5321 		      *(U8*)s == BOM_UTF8_FIRST_BYTE ||
5322 		      *(U8*)s >= 0xFE ||
5323 		      s[1] == 0)) {
5324 		Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5325 		bof = (offset == (Off_t)SvCUR(PL_linestr));
5326 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5327 		/* offset may include swallowed CR */
5328 		if (!bof)
5329 		    bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5330 #endif
5331 		if (bof) {
5332 		    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5333 		    s = swallow_bom((U8*)s);
5334 		}
5335 	    }
5336 	    if (PL_parser->in_pod) {
5337 		/* Incest with pod. */
5338 #ifdef PERL_MAD
5339 		if (PL_madskills)
5340 		    sv_catsv(PL_thiswhite, PL_linestr);
5341 #endif
5342 		if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5343 		    sv_setpvs(PL_linestr, "");
5344 		    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5345 		    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5346 		    PL_last_lop = PL_last_uni = NULL;
5347 		    PL_parser->in_pod = 0;
5348 		}
5349 	    }
5350 	    if (PL_rsfp || PL_parser->filtered)
5351 		incline(s);
5352 	} while (PL_parser->in_pod);
5353 	PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5354 	PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5355 	PL_last_lop = PL_last_uni = NULL;
5356 	if (CopLINE(PL_curcop) == 1) {
5357 	    while (s < PL_bufend && isSPACE(*s))
5358 		s++;
5359 	    if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5360 		s++;
5361 #ifdef PERL_MAD
5362 	    if (PL_madskills)
5363 		PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5364 #endif
5365 	    d = NULL;
5366 	    if (!PL_in_eval) {
5367 		if (*s == '#' && *(s+1) == '!')
5368 		    d = s + 2;
5369 #ifdef ALTERNATE_SHEBANG
5370 		else {
5371 		    static char const as[] = ALTERNATE_SHEBANG;
5372 		    if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5373 			d = s + (sizeof(as) - 1);
5374 		}
5375 #endif /* ALTERNATE_SHEBANG */
5376 	    }
5377 	    if (d) {
5378 		char *ipath;
5379 		char *ipathend;
5380 
5381 		while (isSPACE(*d))
5382 		    d++;
5383 		ipath = d;
5384 		while (*d && !isSPACE(*d))
5385 		    d++;
5386 		ipathend = d;
5387 
5388 #ifdef ARG_ZERO_IS_SCRIPT
5389 		if (ipathend > ipath) {
5390 		    /*
5391 		     * HP-UX (at least) sets argv[0] to the script name,
5392 		     * which makes $^X incorrect.  And Digital UNIX and Linux,
5393 		     * at least, set argv[0] to the basename of the Perl
5394 		     * interpreter. So, having found "#!", we'll set it right.
5395 		     */
5396 		    SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5397 						    SVt_PV)); /* $^X */
5398 		    assert(SvPOK(x) || SvGMAGICAL(x));
5399 		    if (sv_eq(x, CopFILESV(PL_curcop))) {
5400 			sv_setpvn(x, ipath, ipathend - ipath);
5401 			SvSETMAGIC(x);
5402 		    }
5403 		    else {
5404 			STRLEN blen;
5405 			STRLEN llen;
5406 			const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5407 			const char * const lstart = SvPV_const(x,llen);
5408 			if (llen < blen) {
5409 			    bstart += blen - llen;
5410 			    if (strnEQ(bstart, lstart, llen) &&	bstart[-1] == '/') {
5411 				sv_setpvn(x, ipath, ipathend - ipath);
5412 				SvSETMAGIC(x);
5413 			    }
5414 			}
5415 		    }
5416 		    TAINT_NOT;	/* $^X is always tainted, but that's OK */
5417 		}
5418 #endif /* ARG_ZERO_IS_SCRIPT */
5419 
5420 		/*
5421 		 * Look for options.
5422 		 */
5423 		d = instr(s,"perl -");
5424 		if (!d) {
5425 		    d = instr(s,"perl");
5426 #if defined(DOSISH)
5427 		    /* avoid getting into infinite loops when shebang
5428 		     * line contains "Perl" rather than "perl" */
5429 		    if (!d) {
5430 			for (d = ipathend-4; d >= ipath; --d) {
5431 			    if ((*d == 'p' || *d == 'P')
5432 				&& !ibcmp(d, "perl", 4))
5433 			    {
5434 				break;
5435 			    }
5436 			}
5437 			if (d < ipath)
5438 			    d = NULL;
5439 		    }
5440 #endif
5441 		}
5442 #ifdef ALTERNATE_SHEBANG
5443 		/*
5444 		 * If the ALTERNATE_SHEBANG on this system starts with a
5445 		 * character that can be part of a Perl expression, then if
5446 		 * we see it but not "perl", we're probably looking at the
5447 		 * start of Perl code, not a request to hand off to some
5448 		 * other interpreter.  Similarly, if "perl" is there, but
5449 		 * not in the first 'word' of the line, we assume the line
5450 		 * contains the start of the Perl program.
5451 		 */
5452 		if (d && *s != '#') {
5453 		    const char *c = ipath;
5454 		    while (*c && !strchr("; \t\r\n\f\v#", *c))
5455 			c++;
5456 		    if (c < d)
5457 			d = NULL;	/* "perl" not in first word; ignore */
5458 		    else
5459 			*s = '#';	/* Don't try to parse shebang line */
5460 		}
5461 #endif /* ALTERNATE_SHEBANG */
5462 		if (!d &&
5463 		    *s == '#' &&
5464 		    ipathend > ipath &&
5465 		    !PL_minus_c &&
5466 		    !instr(s,"indir") &&
5467 		    instr(PL_origargv[0],"perl"))
5468 		{
5469 		    dVAR;
5470 		    char **newargv;
5471 
5472 		    *ipathend = '\0';
5473 		    s = ipathend + 1;
5474 		    while (s < PL_bufend && isSPACE(*s))
5475 			s++;
5476 		    if (s < PL_bufend) {
5477 			Newx(newargv,PL_origargc+3,char*);
5478 			newargv[1] = s;
5479 			while (s < PL_bufend && !isSPACE(*s))
5480 			    s++;
5481 			*s = '\0';
5482 			Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5483 		    }
5484 		    else
5485 			newargv = PL_origargv;
5486 		    newargv[0] = ipath;
5487 		    PERL_FPU_PRE_EXEC
5488 		    PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5489 		    PERL_FPU_POST_EXEC
5490 		    Perl_croak(aTHX_ "Can't exec %s", ipath);
5491 		}
5492 		if (d) {
5493 		    while (*d && !isSPACE(*d))
5494 			d++;
5495 		    while (SPACE_OR_TAB(*d))
5496 			d++;
5497 
5498 		    if (*d++ == '-') {
5499 			const bool switches_done = PL_doswitches;
5500 			const U32 oldpdb = PL_perldb;
5501 			const bool oldn = PL_minus_n;
5502 			const bool oldp = PL_minus_p;
5503 			const char *d1 = d;
5504 
5505 			do {
5506 			    bool baduni = FALSE;
5507 			    if (*d1 == 'C') {
5508 				const char *d2 = d1 + 1;
5509 				if (parse_unicode_opts((const char **)&d2)
5510 				    != PL_unicode)
5511 				    baduni = TRUE;
5512 			    }
5513 			    if (baduni || *d1 == 'M' || *d1 == 'm') {
5514 				const char * const m = d1;
5515 				while (*d1 && !isSPACE(*d1))
5516 				    d1++;
5517 				Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5518 				      (int)(d1 - m), m);
5519 			    }
5520 			    d1 = moreswitches(d1);
5521 			} while (d1);
5522 			if (PL_doswitches && !switches_done) {
5523 			    int argc = PL_origargc;
5524 			    char **argv = PL_origargv;
5525 			    do {
5526 				argc--,argv++;
5527 			    } while (argc && argv[0][0] == '-' && argv[0][1]);
5528 			    init_argv_symbols(argc,argv);
5529 			}
5530 			if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5531 			    ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5532 			      /* if we have already added "LINE: while (<>) {",
5533 			         we must not do it again */
5534 			{
5535 			    sv_setpvs(PL_linestr, "");
5536 			    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5537 			    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5538 			    PL_last_lop = PL_last_uni = NULL;
5539 			    PL_preambled = FALSE;
5540 			    if (PERLDB_LINE || PERLDB_SAVESRC)
5541 				(void)gv_fetchfile(PL_origfilename);
5542 			    goto retry;
5543 			}
5544 		    }
5545 		}
5546 	    }
5547 	}
5548 	if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5549 	    PL_lex_state = LEX_FORMLINE;
5550 	    start_force(PL_curforce);
5551 	    NEXTVAL_NEXTTOKE.ival = 0;
5552 	    force_next(FORMRBRACK);
5553 	    TOKEN(';');
5554 	}
5555 	goto retry;
5556     case '\r':
5557 #ifdef PERL_STRICT_CR
5558 	Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5559 	Perl_croak(aTHX_
5560       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5561 #endif
5562     case ' ': case '\t': case '\f': case 013:
5563 #ifdef PERL_MAD
5564 	PL_realtokenstart = -1;
5565 	if (PL_madskills) {
5566 	  if (!PL_thiswhite)
5567 	    PL_thiswhite = newSVpvs("");
5568 	  sv_catpvn(PL_thiswhite, s, 1);
5569 	}
5570 #endif
5571 	s++;
5572 	goto retry;
5573     case '#':
5574     case '\n':
5575 #ifdef PERL_MAD
5576 	PL_realtokenstart = -1;
5577 	if (PL_madskills)
5578 	    PL_faketokens = 0;
5579 #endif
5580 	if (PL_lex_state != LEX_NORMAL ||
5581 	     (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5582 	    if (*s == '#' && s == PL_linestart && PL_in_eval
5583 	     && !PL_rsfp && !PL_parser->filtered) {
5584 		/* handle eval qq[#line 1 "foo"\n ...] */
5585 		CopLINE_dec(PL_curcop);
5586 		incline(s);
5587 	    }
5588 	    if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5589 		s = SKIPSPACE0(s);
5590 		if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5591 		    incline(s);
5592 	    }
5593 	    else {
5594 		const bool in_comment = *s == '#';
5595 		d = s;
5596 		while (d < PL_bufend && *d != '\n')
5597 		    d++;
5598 		if (d < PL_bufend)
5599 		    d++;
5600 		else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5601 		    Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5602 			       d, PL_bufend);
5603 #ifdef PERL_MAD
5604 		if (PL_madskills)
5605 		    PL_thiswhite = newSVpvn(s, d - s);
5606 #endif
5607 		s = d;
5608 		if (in_comment && d == PL_bufend
5609 		 && PL_lex_state == LEX_INTERPNORMAL
5610 		 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5611 		 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5612 		else incline(s);
5613 	    }
5614 	    if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5615 		PL_lex_state = LEX_FORMLINE;
5616 		start_force(PL_curforce);
5617 		NEXTVAL_NEXTTOKE.ival = 0;
5618 		force_next(FORMRBRACK);
5619 		TOKEN(';');
5620 	    }
5621 	}
5622 	else {
5623 #ifdef PERL_MAD
5624 	    if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5625 		if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5626 		    PL_faketokens = 0;
5627 		    s = SKIPSPACE0(s);
5628 		    TOKEN(PEG);	/* make sure any #! line is accessible */
5629 		}
5630 		s = SKIPSPACE0(s);
5631 	    }
5632 	    else {
5633 #endif
5634 		    if (PL_madskills) d = s;
5635 		    while (s < PL_bufend && *s != '\n')
5636 			s++;
5637 		    if (s < PL_bufend)
5638 		    {
5639 			s++;
5640 			if (s < PL_bufend)
5641 			    incline(s);
5642 		    }
5643 		    else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5644 		      Perl_croak(aTHX_ "panic: input overflow");
5645 #ifdef PERL_MAD
5646 		    if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5647 			if (!PL_thiswhite)
5648 			    PL_thiswhite = newSVpvs("");
5649 			if (CopLINE(PL_curcop) == 1) {
5650 			    sv_setpvs(PL_thiswhite, "");
5651 			    PL_faketokens = 0;
5652 			}
5653 			sv_catpvn(PL_thiswhite, d, s - d);
5654 		    }
5655 	    }
5656 #endif
5657 	}
5658 	goto retry;
5659     case '-':
5660 	if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5661 	    I32 ftst = 0;
5662 	    char tmp;
5663 
5664 	    s++;
5665 	    PL_bufptr = s;
5666 	    tmp = *s++;
5667 
5668 	    while (s < PL_bufend && SPACE_OR_TAB(*s))
5669 		s++;
5670 
5671 	    if (strnEQ(s,"=>",2)) {
5672 		s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5673 		DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5674 		OPERATOR('-');		/* unary minus */
5675 	    }
5676 	    switch (tmp) {
5677 	    case 'r': ftst = OP_FTEREAD;	break;
5678 	    case 'w': ftst = OP_FTEWRITE;	break;
5679 	    case 'x': ftst = OP_FTEEXEC;	break;
5680 	    case 'o': ftst = OP_FTEOWNED;	break;
5681 	    case 'R': ftst = OP_FTRREAD;	break;
5682 	    case 'W': ftst = OP_FTRWRITE;	break;
5683 	    case 'X': ftst = OP_FTREXEC;	break;
5684 	    case 'O': ftst = OP_FTROWNED;	break;
5685 	    case 'e': ftst = OP_FTIS;		break;
5686 	    case 'z': ftst = OP_FTZERO;		break;
5687 	    case 's': ftst = OP_FTSIZE;		break;
5688 	    case 'f': ftst = OP_FTFILE;		break;
5689 	    case 'd': ftst = OP_FTDIR;		break;
5690 	    case 'l': ftst = OP_FTLINK;		break;
5691 	    case 'p': ftst = OP_FTPIPE;		break;
5692 	    case 'S': ftst = OP_FTSOCK;		break;
5693 	    case 'u': ftst = OP_FTSUID;		break;
5694 	    case 'g': ftst = OP_FTSGID;		break;
5695 	    case 'k': ftst = OP_FTSVTX;		break;
5696 	    case 'b': ftst = OP_FTBLK;		break;
5697 	    case 'c': ftst = OP_FTCHR;		break;
5698 	    case 't': ftst = OP_FTTTY;		break;
5699 	    case 'T': ftst = OP_FTTEXT;		break;
5700 	    case 'B': ftst = OP_FTBINARY;	break;
5701 	    case 'M': case 'A': case 'C':
5702 		gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5703 		switch (tmp) {
5704 		case 'M': ftst = OP_FTMTIME;	break;
5705 		case 'A': ftst = OP_FTATIME;	break;
5706 		case 'C': ftst = OP_FTCTIME;	break;
5707 		default:			break;
5708 		}
5709 		break;
5710 	    default:
5711 		break;
5712 	    }
5713 	    if (ftst) {
5714                 PL_last_uni = PL_oldbufptr;
5715 		PL_last_lop_op = (OPCODE)ftst;
5716 		DEBUG_T( { PerlIO_printf(Perl_debug_log,
5717                         "### Saw file test %c\n", (int)tmp);
5718 		} );
5719 		FTST(ftst);
5720 	    }
5721 	    else {
5722 		/* Assume it was a minus followed by a one-letter named
5723 		 * subroutine call (or a -bareword), then. */
5724 		DEBUG_T( { PerlIO_printf(Perl_debug_log,
5725 			"### '-%c' looked like a file test but was not\n",
5726 			(int) tmp);
5727 		} );
5728 		s = --PL_bufptr;
5729 	    }
5730 	}
5731 	{
5732 	    const char tmp = *s++;
5733 	    if (*s == tmp) {
5734 		s++;
5735 		if (PL_expect == XOPERATOR)
5736 		    TERM(POSTDEC);
5737 		else
5738 		    OPERATOR(PREDEC);
5739 	    }
5740 	    else if (*s == '>') {
5741 		s++;
5742 		s = SKIPSPACE1(s);
5743 		if (FEATURE_POSTDEREF_IS_ENABLED && (
5744 		    ((*s == '$' || *s == '&') && s[1] == '*')
5745 		  ||(*s == '$' && s[1] == '#' && s[2] == '*')
5746 		  ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5747 		  ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5748 		 ))
5749 		{
5750 		    Perl_ck_warner_d(aTHX_
5751 			packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5752 			"Postfix dereference is experimental"
5753 		    );
5754 		    PL_expect = XPOSTDEREF;
5755 		    TOKEN(ARROW);
5756 		}
5757 		if (isIDFIRST_lazy_if(s,UTF)) {
5758 		    s = force_word(s,METHOD,FALSE,TRUE);
5759 		    TOKEN(ARROW);
5760 		}
5761 		else if (*s == '$')
5762 		    OPERATOR(ARROW);
5763 		else
5764 		    TERM(ARROW);
5765 	    }
5766 	    if (PL_expect == XOPERATOR) {
5767 		if (*s == '=' && !PL_lex_allbrackets &&
5768 			PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5769 		    s--;
5770 		    TOKEN(0);
5771 		}
5772 		Aop(OP_SUBTRACT);
5773 	    }
5774 	    else {
5775 		if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5776 		    check_uni();
5777 		OPERATOR('-');		/* unary minus */
5778 	    }
5779 	}
5780 
5781     case '+':
5782 	{
5783 	    const char tmp = *s++;
5784 	    if (*s == tmp) {
5785 		s++;
5786 		if (PL_expect == XOPERATOR)
5787 		    TERM(POSTINC);
5788 		else
5789 		    OPERATOR(PREINC);
5790 	    }
5791 	    if (PL_expect == XOPERATOR) {
5792 		if (*s == '=' && !PL_lex_allbrackets &&
5793 			PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5794 		    s--;
5795 		    TOKEN(0);
5796 		}
5797 		Aop(OP_ADD);
5798 	    }
5799 	    else {
5800 		if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5801 		    check_uni();
5802 		OPERATOR('+');
5803 	    }
5804 	}
5805 
5806     case '*':
5807 	if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5808 	if (PL_expect != XOPERATOR) {
5809 	    s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5810 	    PL_expect = XOPERATOR;
5811 	    force_ident(PL_tokenbuf, '*');
5812 	    if (!*PL_tokenbuf)
5813 		PREREF('*');
5814 	    TERM('*');
5815 	}
5816 	s++;
5817 	if (*s == '*') {
5818 	    s++;
5819 	    if (*s == '=' && !PL_lex_allbrackets &&
5820 		    PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5821 		s -= 2;
5822 		TOKEN(0);
5823 	    }
5824 	    PWop(OP_POW);
5825 	}
5826 	if (*s == '=' && !PL_lex_allbrackets &&
5827 		PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5828 	    s--;
5829 	    TOKEN(0);
5830 	}
5831 	PL_parser->saw_infix_sigil = 1;
5832 	Mop(OP_MULTIPLY);
5833 
5834     case '%':
5835     {
5836 	if (PL_expect == XOPERATOR) {
5837 	    if (s[1] == '=' && !PL_lex_allbrackets &&
5838 		    PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5839 		TOKEN(0);
5840 	    ++s;
5841 	    PL_parser->saw_infix_sigil = 1;
5842 	    Mop(OP_MODULO);
5843 	}
5844 	else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5845 	PL_tokenbuf[0] = '%';
5846 	s = scan_ident(s, PL_tokenbuf + 1,
5847 		sizeof PL_tokenbuf - 1, FALSE);
5848 	pl_yylval.ival = 0;
5849 	if (!PL_tokenbuf[1]) {
5850 	    PREREF('%');
5851 	}
5852 	if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5853 	    if (*s == '[')
5854 		PL_tokenbuf[0] = '@';
5855 	}
5856 	PL_expect = XOPERATOR;
5857 	force_ident_maybe_lex('%');
5858 	TERM('%');
5859     }
5860     case '^':
5861 	if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5862 		(s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5863 	    TOKEN(0);
5864 	s++;
5865 	BOop(OP_BIT_XOR);
5866     case '[':
5867 	if (PL_lex_brackets > 100)
5868 	    Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5869 	PL_lex_brackstack[PL_lex_brackets++] = 0;
5870 	PL_lex_allbrackets++;
5871 	{
5872 	    const char tmp = *s++;
5873 	    OPERATOR(tmp);
5874 	}
5875     case '~':
5876 	if (s[1] == '~'
5877 	    && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5878 	{
5879 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5880 		TOKEN(0);
5881 	    s += 2;
5882             Perl_ck_warner_d(aTHX_
5883                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5884                 "Smartmatch is experimental");
5885 	    Eop(OP_SMARTMATCH);
5886 	}
5887 	s++;
5888 	OPERATOR('~');
5889     case ',':
5890 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5891 	    TOKEN(0);
5892 	s++;
5893 	OPERATOR(',');
5894     case ':':
5895 	if (s[1] == ':') {
5896 	    len = 0;
5897 	    goto just_a_word_zero_gv;
5898 	}
5899 	s++;
5900 	switch (PL_expect) {
5901 	    OP *attrs;
5902 #ifdef PERL_MAD
5903 	    I32 stuffstart;
5904 #endif
5905 	case XOPERATOR:
5906 	    if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5907 		break;
5908 	    PL_bufptr = s;	/* update in case we back off */
5909 	    if (*s == '=') {
5910 		Perl_croak(aTHX_
5911 			   "Use of := for an empty attribute list is not allowed");
5912 	    }
5913 	    goto grabattrs;
5914 	case XATTRBLOCK:
5915 	    PL_expect = XBLOCK;
5916 	    goto grabattrs;
5917 	case XATTRTERM:
5918 	    PL_expect = XTERMBLOCK;
5919 	 grabattrs:
5920 #ifdef PERL_MAD
5921 	    stuffstart = s - SvPVX(PL_linestr) - 1;
5922 #endif
5923 	    s = PEEKSPACE(s);
5924 	    attrs = NULL;
5925 	    while (isIDFIRST_lazy_if(s,UTF)) {
5926 		I32 tmp;
5927 		SV *sv;
5928 		d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5929 		if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5930 		    if (tmp < 0) tmp = -tmp;
5931 		    switch (tmp) {
5932 		    case KEY_or:
5933 		    case KEY_and:
5934 		    case KEY_for:
5935 		    case KEY_foreach:
5936 		    case KEY_unless:
5937 		    case KEY_if:
5938 		    case KEY_while:
5939 		    case KEY_until:
5940 			goto got_attrs;
5941 		    default:
5942 			break;
5943 		    }
5944 		}
5945 		sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5946 		if (*d == '(') {
5947 		    d = scan_str(d,TRUE,TRUE,FALSE,FALSE,NULL);
5948 		    COPLINE_SET_FROM_MULTI_END;
5949 		    if (!d) {
5950 			/* MUST advance bufptr here to avoid bogus
5951 			   "at end of line" context messages from yyerror().
5952 			 */
5953 			PL_bufptr = s + len;
5954 			yyerror("Unterminated attribute parameter in attribute list");
5955 			if (attrs)
5956 			    op_free(attrs);
5957 			sv_free(sv);
5958 			return REPORT(0);	/* EOF indicator */
5959 		    }
5960 		}
5961 		if (PL_lex_stuff) {
5962 		    sv_catsv(sv, PL_lex_stuff);
5963 		    attrs = op_append_elem(OP_LIST, attrs,
5964 					newSVOP(OP_CONST, 0, sv));
5965 		    SvREFCNT_dec(PL_lex_stuff);
5966 		    PL_lex_stuff = NULL;
5967 		}
5968 		else {
5969 		    if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5970 			sv_free(sv);
5971 			if (PL_in_my == KEY_our) {
5972 			    deprecate(":unique");
5973 			}
5974 			else
5975 			    Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5976 		    }
5977 
5978 		    /* NOTE: any CV attrs applied here need to be part of
5979 		       the CVf_BUILTIN_ATTRS define in cv.h! */
5980 		    else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5981 			sv_free(sv);
5982 			CvLVALUE_on(PL_compcv);
5983 		    }
5984 		    else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5985 			sv_free(sv);
5986 			deprecate(":locked");
5987 		    }
5988 		    else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5989 			sv_free(sv);
5990 			CvMETHOD_on(PL_compcv);
5991 		    }
5992 		    /* After we've set the flags, it could be argued that
5993 		       we don't need to do the attributes.pm-based setting
5994 		       process, and shouldn't bother appending recognized
5995 		       flags.  To experiment with that, uncomment the
5996 		       following "else".  (Note that's already been
5997 		       uncommented.  That keeps the above-applied built-in
5998 		       attributes from being intercepted (and possibly
5999 		       rejected) by a package's attribute routines, but is
6000 		       justified by the performance win for the common case
6001 		       of applying only built-in attributes.) */
6002 		    else
6003 		        attrs = op_append_elem(OP_LIST, attrs,
6004 					    newSVOP(OP_CONST, 0,
6005 					      	    sv));
6006 		}
6007 		s = PEEKSPACE(d);
6008 		if (*s == ':' && s[1] != ':')
6009 		    s = PEEKSPACE(s+1);
6010 		else if (s == d)
6011 		    break;	/* require real whitespace or :'s */
6012 		/* XXX losing whitespace on sequential attributes here */
6013 	    }
6014 	    {
6015 		if (*s != ';' && *s != '}' &&
6016 		    !(PL_expect == XOPERATOR
6017 			? (*s == '=' ||  *s == ')')
6018 			: (*s == '{' ||  *s == '('))) {
6019 		    const char q = ((*s == '\'') ? '"' : '\'');
6020 		    /* If here for an expression, and parsed no attrs, back
6021 		       off. */
6022 		    if (PL_expect == XOPERATOR && !attrs) {
6023 			s = PL_bufptr;
6024 			break;
6025 		    }
6026 		    /* MUST advance bufptr here to avoid bogus "at end of line"
6027 		       context messages from yyerror().
6028 		    */
6029 		    PL_bufptr = s;
6030 		    yyerror( (const char *)
6031 			     (*s
6032 			      ? Perl_form(aTHX_ "Invalid separator character "
6033 					  "%c%c%c in attribute list", q, *s, q)
6034 			      : "Unterminated attribute list" ) );
6035 		    if (attrs)
6036 			op_free(attrs);
6037 		    OPERATOR(':');
6038 		}
6039 	    }
6040 	got_attrs:
6041 	    if (attrs) {
6042 		start_force(PL_curforce);
6043 		NEXTVAL_NEXTTOKE.opval = attrs;
6044 		CURMAD('_', PL_nextwhite);
6045 		force_next(THING);
6046 	    }
6047 #ifdef PERL_MAD
6048 	    if (PL_madskills) {
6049 		PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
6050 				     (s - SvPVX(PL_linestr)) - stuffstart);
6051 	    }
6052 #endif
6053 	    TOKEN(COLONATTR);
6054 	}
6055 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6056 	    s--;
6057 	    TOKEN(0);
6058 	}
6059 	PL_lex_allbrackets--;
6060 	OPERATOR(':');
6061     case '(':
6062 	s++;
6063 	if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6064 	    PL_oldbufptr = PL_oldoldbufptr;		/* allow print(STDOUT 123) */
6065 	else
6066 	    PL_expect = XTERM;
6067 	s = SKIPSPACE1(s);
6068 	PL_lex_allbrackets++;
6069 	TOKEN('(');
6070     case ';':
6071 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6072 	    TOKEN(0);
6073 	CLINE;
6074 	s++;
6075 	OPERATOR(';');
6076     case ')':
6077 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6078 	    TOKEN(0);
6079 	s++;
6080 	PL_lex_allbrackets--;
6081 	s = SKIPSPACE1(s);
6082 	if (*s == '{')
6083 	    PREBLOCK(')');
6084 	TERM(')');
6085     case ']':
6086 	if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6087 	    TOKEN(0);
6088 	s++;
6089 	if (PL_lex_brackets <= 0)
6090 	    /* diag_listed_as: Unmatched right %s bracket */
6091 	    yyerror("Unmatched right square bracket");
6092 	else
6093 	    --PL_lex_brackets;
6094 	PL_lex_allbrackets--;
6095 	if (PL_lex_state == LEX_INTERPNORMAL) {
6096 	    if (PL_lex_brackets == 0) {
6097 		if (*s == '-' && s[1] == '>')
6098 		    PL_lex_state = LEX_INTERPENDMAYBE;
6099 		else if (*s != '[' && *s != '{')
6100 		    PL_lex_state = LEX_INTERPEND;
6101 	    }
6102 	}
6103 	TERM(']');
6104     case '{':
6105 	s++;
6106       leftbracket:
6107 	if (PL_lex_brackets > 100) {
6108 	    Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6109 	}
6110 	switch (PL_expect) {
6111 	case XTERM:
6112 	case XTERMORDORDOR:
6113 	    PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6114 	    PL_lex_allbrackets++;
6115 	    OPERATOR(HASHBRACK);
6116 	case XOPERATOR:
6117 	    while (s < PL_bufend && SPACE_OR_TAB(*s))
6118 		s++;
6119 	    d = s;
6120 	    PL_tokenbuf[0] = '\0';
6121 	    if (d < PL_bufend && *d == '-') {
6122 		PL_tokenbuf[0] = '-';
6123 		d++;
6124 		while (d < PL_bufend && SPACE_OR_TAB(*d))
6125 		    d++;
6126 	    }
6127 	    if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
6128 		d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6129 			      FALSE, &len);
6130 		while (d < PL_bufend && SPACE_OR_TAB(*d))
6131 		    d++;
6132 		if (*d == '}') {
6133 		    const char minus = (PL_tokenbuf[0] == '-');
6134 		    s = force_word(s + minus, WORD, FALSE, TRUE);
6135 		    if (minus)
6136 			force_next('-');
6137 		}
6138 	    }
6139 	    /* FALL THROUGH */
6140 	case XATTRBLOCK:
6141 	case XBLOCK:
6142 	    PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6143 	    PL_lex_allbrackets++;
6144 	    PL_expect = XSTATE;
6145 	    break;
6146 	case XATTRTERM:
6147 	case XTERMBLOCK:
6148 	    PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6149 	    PL_lex_allbrackets++;
6150 	    PL_expect = XSTATE;
6151 	    break;
6152 	default: {
6153 		const char *t;
6154 		if (PL_oldoldbufptr == PL_last_lop)
6155 		    PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6156 		else
6157 		    PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6158 		PL_lex_allbrackets++;
6159 		s = SKIPSPACE1(s);
6160 		if (*s == '}') {
6161 		    if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6162 			PL_expect = XTERM;
6163 			/* This hack is to get the ${} in the message. */
6164 			PL_bufptr = s+1;
6165 			yyerror("syntax error");
6166 			break;
6167 		    }
6168 		    OPERATOR(HASHBRACK);
6169 		}
6170 		/* This hack serves to disambiguate a pair of curlies
6171 		 * as being a block or an anon hash.  Normally, expectation
6172 		 * determines that, but in cases where we're not in a
6173 		 * position to expect anything in particular (like inside
6174 		 * eval"") we have to resolve the ambiguity.  This code
6175 		 * covers the case where the first term in the curlies is a
6176 		 * quoted string.  Most other cases need to be explicitly
6177 		 * disambiguated by prepending a "+" before the opening
6178 		 * curly in order to force resolution as an anon hash.
6179 		 *
6180 		 * XXX should probably propagate the outer expectation
6181 		 * into eval"" to rely less on this hack, but that could
6182 		 * potentially break current behavior of eval"".
6183 		 * GSAR 97-07-21
6184 		 */
6185 		t = s;
6186 		if (*s == '\'' || *s == '"' || *s == '`') {
6187 		    /* common case: get past first string, handling escapes */
6188 		    for (t++; t < PL_bufend && *t != *s;)
6189 			if (*t++ == '\\' && (*t == '\\' || *t == *s))
6190 			    t++;
6191 		    t++;
6192 		}
6193 		else if (*s == 'q') {
6194 		    if (++t < PL_bufend
6195 			&& (!isWORDCHAR(*t)
6196 			    || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6197 				&& !isWORDCHAR(*t))))
6198 		    {
6199 			/* skip q//-like construct */
6200 			const char *tmps;
6201 			char open, close, term;
6202 			I32 brackets = 1;
6203 
6204 			while (t < PL_bufend && isSPACE(*t))
6205 			    t++;
6206 			/* check for q => */
6207 			if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6208 			    OPERATOR(HASHBRACK);
6209 			}
6210 			term = *t;
6211 			open = term;
6212 			if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6213 			    term = tmps[5];
6214 			close = term;
6215 			if (open == close)
6216 			    for (t++; t < PL_bufend; t++) {
6217 				if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6218 				    t++;
6219 				else if (*t == open)
6220 				    break;
6221 			    }
6222 			else {
6223 			    for (t++; t < PL_bufend; t++) {
6224 				if (*t == '\\' && t+1 < PL_bufend)
6225 				    t++;
6226 				else if (*t == close && --brackets <= 0)
6227 				    break;
6228 				else if (*t == open)
6229 				    brackets++;
6230 			    }
6231 			}
6232 			t++;
6233 		    }
6234 		    else
6235 			/* skip plain q word */
6236 			while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6237 			     t += UTF8SKIP(t);
6238 		}
6239 		else if (isWORDCHAR_lazy_if(t,UTF)) {
6240 		    t += UTF8SKIP(t);
6241 		    while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6242 			 t += UTF8SKIP(t);
6243 		}
6244 		while (t < PL_bufend && isSPACE(*t))
6245 		    t++;
6246 		/* if comma follows first term, call it an anon hash */
6247 		/* XXX it could be a comma expression with loop modifiers */
6248 		if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6249 				   || (*t == '=' && t[1] == '>')))
6250 		    OPERATOR(HASHBRACK);
6251 		if (PL_expect == XREF)
6252 		    PL_expect = XTERM;
6253 		else {
6254 		    PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6255 		    PL_expect = XSTATE;
6256 		}
6257 	    }
6258 	    break;
6259 	}
6260 	pl_yylval.ival = CopLINE(PL_curcop);
6261 	if (isSPACE(*s) || *s == '#')
6262 	    PL_copline = NOLINE;   /* invalidate current command line number */
6263 	TOKEN(formbrack ? '=' : '{');
6264     case '}':
6265 	if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6266 	    TOKEN(0);
6267       rightbracket:
6268 	s++;
6269 	if (PL_lex_brackets <= 0)
6270 	    /* diag_listed_as: Unmatched right %s bracket */
6271 	    yyerror("Unmatched right curly bracket");
6272 	else
6273 	    PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6274 	PL_lex_allbrackets--;
6275 	if (PL_lex_state == LEX_INTERPNORMAL) {
6276 	    if (PL_lex_brackets == 0) {
6277 		if (PL_expect & XFAKEBRACK) {
6278 		    PL_expect &= XENUMMASK;
6279 		    PL_lex_state = LEX_INTERPEND;
6280 		    PL_bufptr = s;
6281 #if 0
6282 		    if (PL_madskills) {
6283 			if (!PL_thiswhite)
6284 			    PL_thiswhite = newSVpvs("");
6285 			sv_catpvs(PL_thiswhite,"}");
6286 		    }
6287 #endif
6288 		    return yylex();	/* ignore fake brackets */
6289 		}
6290 		if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6291 		 && SvEVALED(PL_lex_repl))
6292 		    PL_lex_state = LEX_INTERPEND;
6293 		else if (*s == '-' && s[1] == '>')
6294 		    PL_lex_state = LEX_INTERPENDMAYBE;
6295 		else if (*s != '[' && *s != '{')
6296 		    PL_lex_state = LEX_INTERPEND;
6297 	    }
6298 	}
6299 	if (PL_expect & XFAKEBRACK) {
6300 	    PL_expect &= XENUMMASK;
6301 	    PL_bufptr = s;
6302 	    return yylex();		/* ignore fake brackets */
6303 	}
6304 	start_force(PL_curforce);
6305 	if (PL_madskills) {
6306 	    curmad('X', newSVpvn(s-1,1));
6307 	    CURMAD('_', PL_thiswhite);
6308 	}
6309 	force_next(formbrack ? '.' : '}');
6310 	if (formbrack) LEAVE;
6311 #ifdef PERL_MAD
6312 	if (PL_madskills && !PL_thistoken)
6313 	    PL_thistoken = newSVpvs("");
6314 #endif
6315 	if (formbrack == 2) { /* means . where arguments were expected */
6316 	    start_force(PL_curforce);
6317 	    force_next(';');
6318 	    TOKEN(FORMRBRACK);
6319 	}
6320 	TOKEN(';');
6321     case '&':
6322 	if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6323 	s++;
6324 	if (*s++ == '&') {
6325 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6326 		    (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6327 		s -= 2;
6328 		TOKEN(0);
6329 	    }
6330 	    AOPERATOR(ANDAND);
6331 	}
6332 	s--;
6333 	if (PL_expect == XOPERATOR) {
6334 	    if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6335 		&& isIDFIRST_lazy_if(s,UTF))
6336 	    {
6337 		CopLINE_dec(PL_curcop);
6338 		Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6339 		CopLINE_inc(PL_curcop);
6340 	    }
6341 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6342 		    (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6343 		s--;
6344 		TOKEN(0);
6345 	    }
6346 	    PL_parser->saw_infix_sigil = 1;
6347 	    BAop(OP_BIT_AND);
6348 	}
6349 
6350 	PL_tokenbuf[0] = '&';
6351 	s = scan_ident(s - 1, PL_tokenbuf + 1,
6352 		       sizeof PL_tokenbuf - 1, TRUE);
6353 	if (PL_tokenbuf[1]) {
6354 	    PL_expect = XOPERATOR;
6355 	    force_ident_maybe_lex('&');
6356 	}
6357 	else
6358 	    PREREF('&');
6359 	pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6360 	TERM('&');
6361 
6362     case '|':
6363 	s++;
6364 	if (*s++ == '|') {
6365 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6366 		    (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6367 		s -= 2;
6368 		TOKEN(0);
6369 	    }
6370 	    AOPERATOR(OROR);
6371 	}
6372 	s--;
6373 	if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6374 		(*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6375 	    s--;
6376 	    TOKEN(0);
6377 	}
6378 	BOop(OP_BIT_OR);
6379     case '=':
6380 	s++;
6381 	{
6382 	    const char tmp = *s++;
6383 	    if (tmp == '=') {
6384 		if (!PL_lex_allbrackets &&
6385 			PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6386 		    s -= 2;
6387 		    TOKEN(0);
6388 		}
6389 		Eop(OP_EQ);
6390 	    }
6391 	    if (tmp == '>') {
6392 		if (!PL_lex_allbrackets &&
6393 			PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6394 		    s -= 2;
6395 		    TOKEN(0);
6396 		}
6397 		OPERATOR(',');
6398 	    }
6399 	    if (tmp == '~')
6400 		PMop(OP_MATCH);
6401 	    if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6402 		&& strchr("+-*/%.^&|<",tmp))
6403 		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6404 			    "Reversed %c= operator",(int)tmp);
6405 	    s--;
6406 	    if (PL_expect == XSTATE && isALPHA(tmp) &&
6407 		(s == PL_linestart+1 || s[-2] == '\n') )
6408 		{
6409 		    if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6410 			|| PL_lex_state != LEX_NORMAL) {
6411 			d = PL_bufend;
6412 			while (s < d) {
6413 			    if (*s++ == '\n') {
6414 				incline(s);
6415 				if (strnEQ(s,"=cut",4)) {
6416 				    s = strchr(s,'\n');
6417 				    if (s)
6418 					s++;
6419 				    else
6420 					s = d;
6421 				    incline(s);
6422 				    goto retry;
6423 				}
6424 			    }
6425 			}
6426 			goto retry;
6427 		    }
6428 #ifdef PERL_MAD
6429 		    if (PL_madskills) {
6430 			if (!PL_thiswhite)
6431 			    PL_thiswhite = newSVpvs("");
6432 			sv_catpvn(PL_thiswhite, PL_linestart,
6433 				  PL_bufend - PL_linestart);
6434 		    }
6435 #endif
6436 		    s = PL_bufend;
6437 		    PL_parser->in_pod = 1;
6438 		    goto retry;
6439 		}
6440 	}
6441 	if (PL_expect == XBLOCK) {
6442 	    const char *t = s;
6443 #ifdef PERL_STRICT_CR
6444 	    while (SPACE_OR_TAB(*t))
6445 #else
6446 	    while (SPACE_OR_TAB(*t) || *t == '\r')
6447 #endif
6448 		t++;
6449 	    if (*t == '\n' || *t == '#') {
6450 		formbrack = 1;
6451 		ENTER;
6452 		SAVEI8(PL_parser->form_lex_state);
6453 		SAVEI32(PL_lex_formbrack);
6454 		PL_parser->form_lex_state = PL_lex_state;
6455 		PL_lex_formbrack = PL_lex_brackets + 1;
6456 		goto leftbracket;
6457 	    }
6458 	}
6459 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6460 	    s--;
6461 	    TOKEN(0);
6462 	}
6463 	pl_yylval.ival = 0;
6464 	OPERATOR(ASSIGNOP);
6465     case '!':
6466 	s++;
6467 	{
6468 	    const char tmp = *s++;
6469 	    if (tmp == '=') {
6470 		/* was this !=~ where !~ was meant?
6471 		 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6472 
6473 		if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6474 		    const char *t = s+1;
6475 
6476 		    while (t < PL_bufend && isSPACE(*t))
6477 			++t;
6478 
6479 		    if (*t == '/' || *t == '?' ||
6480 			((*t == 'm' || *t == 's' || *t == 'y')
6481 			 && !isWORDCHAR(t[1])) ||
6482 			(*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6483 			Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6484 				    "!=~ should be !~");
6485 		}
6486 		if (!PL_lex_allbrackets &&
6487 			PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6488 		    s -= 2;
6489 		    TOKEN(0);
6490 		}
6491 		Eop(OP_NE);
6492 	    }
6493 	    if (tmp == '~')
6494 		PMop(OP_NOT);
6495 	}
6496 	s--;
6497 	OPERATOR('!');
6498     case '<':
6499 	if (PL_expect != XOPERATOR) {
6500 	    if (s[1] != '<' && !strchr(s,'>'))
6501 		check_uni();
6502 	    if (s[1] == '<')
6503 		s = scan_heredoc(s);
6504 	    else
6505 		s = scan_inputsymbol(s);
6506 	    PL_expect = XOPERATOR;
6507 	    TOKEN(sublex_start());
6508 	}
6509 	s++;
6510 	{
6511 	    char tmp = *s++;
6512 	    if (tmp == '<') {
6513 		if (*s == '=' && !PL_lex_allbrackets &&
6514 			PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6515 		    s -= 2;
6516 		    TOKEN(0);
6517 		}
6518 		SHop(OP_LEFT_SHIFT);
6519 	    }
6520 	    if (tmp == '=') {
6521 		tmp = *s++;
6522 		if (tmp == '>') {
6523 		    if (!PL_lex_allbrackets &&
6524 			    PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6525 			s -= 3;
6526 			TOKEN(0);
6527 		    }
6528 		    Eop(OP_NCMP);
6529 		}
6530 		s--;
6531 		if (!PL_lex_allbrackets &&
6532 			PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6533 		    s -= 2;
6534 		    TOKEN(0);
6535 		}
6536 		Rop(OP_LE);
6537 	    }
6538 	}
6539 	s--;
6540 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6541 	    s--;
6542 	    TOKEN(0);
6543 	}
6544 	Rop(OP_LT);
6545     case '>':
6546 	s++;
6547 	{
6548 	    const char tmp = *s++;
6549 	    if (tmp == '>') {
6550 		if (*s == '=' && !PL_lex_allbrackets &&
6551 			PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6552 		    s -= 2;
6553 		    TOKEN(0);
6554 		}
6555 		SHop(OP_RIGHT_SHIFT);
6556 	    }
6557 	    else if (tmp == '=') {
6558 		if (!PL_lex_allbrackets &&
6559 			PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6560 		    s -= 2;
6561 		    TOKEN(0);
6562 		}
6563 		Rop(OP_GE);
6564 	    }
6565 	}
6566 	s--;
6567 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6568 	    s--;
6569 	    TOKEN(0);
6570 	}
6571 	Rop(OP_GT);
6572 
6573     case '$':
6574 	CLINE;
6575 
6576 	if (PL_expect == XOPERATOR) {
6577 	    if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6578 		return deprecate_commaless_var_list();
6579 	    }
6580 	}
6581 	else if (PL_expect == XPOSTDEREF) {
6582 	    if (s[1] == '#') {
6583 		s++;
6584 		POSTDEREF(DOLSHARP);
6585 	    }
6586 	    POSTDEREF('$');
6587 	}
6588 
6589 	if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6590 	    PL_tokenbuf[0] = '@';
6591 	    s = scan_ident(s + 1, PL_tokenbuf + 1,
6592 			   sizeof PL_tokenbuf - 1, FALSE);
6593 	    if (PL_expect == XOPERATOR)
6594 		no_op("Array length", s);
6595 	    if (!PL_tokenbuf[1])
6596 		PREREF(DOLSHARP);
6597 	    PL_expect = XOPERATOR;
6598 	    force_ident_maybe_lex('#');
6599 	    TOKEN(DOLSHARP);
6600 	}
6601 
6602 	PL_tokenbuf[0] = '$';
6603 	s = scan_ident(s, PL_tokenbuf + 1,
6604 		       sizeof PL_tokenbuf - 1, FALSE);
6605 	if (PL_expect == XOPERATOR)
6606 	    no_op("Scalar", s);
6607 	if (!PL_tokenbuf[1]) {
6608 	    if (s == PL_bufend)
6609 		yyerror("Final $ should be \\$ or $name");
6610 	    PREREF('$');
6611 	}
6612 
6613 	d = s;
6614 	{
6615 	    const char tmp = *s;
6616 	    if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6617 		s = SKIPSPACE1(s);
6618 
6619 	    if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6620 		&& intuit_more(s)) {
6621 		if (*s == '[') {
6622 		    PL_tokenbuf[0] = '@';
6623 		    if (ckWARN(WARN_SYNTAX)) {
6624 			char *t = s+1;
6625 
6626 			while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6627 			    t += UTF ? UTF8SKIP(t) : 1;
6628 			if (*t++ == ',') {
6629 			    PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6630 			    while (t < PL_bufend && *t != ']')
6631 				t++;
6632 			    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6633 					"Multidimensional syntax %.*s not supported",
6634 				    (int)((t - PL_bufptr) + 1), PL_bufptr);
6635 			}
6636 		    }
6637 		}
6638 		else if (*s == '{') {
6639 		    char *t;
6640 		    PL_tokenbuf[0] = '%';
6641 		    if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6642 			&& (t = strchr(s, '}')) && (t = strchr(t, '=')))
6643 			{
6644 			    char tmpbuf[sizeof PL_tokenbuf];
6645 			    do {
6646 				t++;
6647 			    } while (isSPACE(*t));
6648 			    if (isIDFIRST_lazy_if(t,UTF)) {
6649 				STRLEN len;
6650 				t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6651 					      &len);
6652 				while (isSPACE(*t))
6653 				    t++;
6654 				if (*t == ';'
6655                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6656 				    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6657 					"You need to quote \"%"UTF8f"\"",
6658 					 UTF8fARG(UTF, len, tmpbuf));
6659 			    }
6660 			}
6661 		}
6662 	    }
6663 
6664 	    PL_expect = XOPERATOR;
6665 	    if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6666 		const bool islop = (PL_last_lop == PL_oldoldbufptr);
6667 		if (!islop || PL_last_lop_op == OP_GREPSTART)
6668 		    PL_expect = XOPERATOR;
6669 		else if (strchr("$@\"'`q", *s))
6670 		    PL_expect = XTERM;		/* e.g. print $fh "foo" */
6671 		else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6672 		    PL_expect = XTERM;		/* e.g. print $fh &sub */
6673 		else if (isIDFIRST_lazy_if(s,UTF)) {
6674 		    char tmpbuf[sizeof PL_tokenbuf];
6675 		    int t2;
6676 		    scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6677 		    if ((t2 = keyword(tmpbuf, len, 0))) {
6678 			/* binary operators exclude handle interpretations */
6679 			switch (t2) {
6680 			case -KEY_x:
6681 			case -KEY_eq:
6682 			case -KEY_ne:
6683 			case -KEY_gt:
6684 			case -KEY_lt:
6685 			case -KEY_ge:
6686 			case -KEY_le:
6687 			case -KEY_cmp:
6688 			    break;
6689 			default:
6690 			    PL_expect = XTERM;	/* e.g. print $fh length() */
6691 			    break;
6692 			}
6693 		    }
6694 		    else {
6695 			PL_expect = XTERM;	/* e.g. print $fh subr() */
6696 		    }
6697 		}
6698 		else if (isDIGIT(*s))
6699 		    PL_expect = XTERM;		/* e.g. print $fh 3 */
6700 		else if (*s == '.' && isDIGIT(s[1]))
6701 		    PL_expect = XTERM;		/* e.g. print $fh .3 */
6702 		else if ((*s == '?' || *s == '-' || *s == '+')
6703 			 && !isSPACE(s[1]) && s[1] != '=')
6704 		    PL_expect = XTERM;		/* e.g. print $fh -1 */
6705 		else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6706 			 && s[1] != '/')
6707 		    PL_expect = XTERM;		/* e.g. print $fh /.../
6708 						   XXX except DORDOR operator
6709 						*/
6710 		else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6711 			 && s[2] != '=')
6712 		    PL_expect = XTERM;		/* print $fh <<"EOF" */
6713 	    }
6714 	}
6715 	force_ident_maybe_lex('$');
6716 	TOKEN('$');
6717 
6718     case '@':
6719 	if (PL_expect == XOPERATOR)
6720 	    no_op("Array", s);
6721 	else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6722 	PL_tokenbuf[0] = '@';
6723 	s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6724 	pl_yylval.ival = 0;
6725 	if (!PL_tokenbuf[1]) {
6726 	    PREREF('@');
6727 	}
6728 	if (PL_lex_state == LEX_NORMAL)
6729 	    s = SKIPSPACE1(s);
6730 	if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6731 	    if (*s == '{')
6732 		PL_tokenbuf[0] = '%';
6733 
6734 	    /* Warn about @ where they meant $. */
6735 	    if (*s == '[' || *s == '{') {
6736 		if (ckWARN(WARN_SYNTAX)) {
6737 		    S_check_scalar_slice(aTHX_ s);
6738 		}
6739 	    }
6740 	}
6741 	PL_expect = XOPERATOR;
6742 	force_ident_maybe_lex('@');
6743 	TERM('@');
6744 
6745      case '/':			/* may be division, defined-or, or pattern */
6746 	if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6747 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6748 		    (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6749 		TOKEN(0);
6750 	    s += 2;
6751 	    AOPERATOR(DORDOR);
6752 	}
6753      case '?':			/* may either be conditional or pattern */
6754 	if (PL_expect == XOPERATOR) {
6755 	     char tmp = *s++;
6756 	     if(tmp == '?') {
6757 		if (!PL_lex_allbrackets &&
6758 			PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6759 		    s--;
6760 		    TOKEN(0);
6761 		}
6762 		PL_lex_allbrackets++;
6763 		OPERATOR('?');
6764 	     }
6765              else {
6766 	         tmp = *s++;
6767 	         if(tmp == '/') {
6768 	             /* A // operator. */
6769 		    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6770 			    (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6771 					    LEX_FAKEEOF_LOGIC)) {
6772 			s -= 2;
6773 			TOKEN(0);
6774 		    }
6775 	            AOPERATOR(DORDOR);
6776 	         }
6777 	         else {
6778 	             s--;
6779 		     if (*s == '=' && !PL_lex_allbrackets &&
6780 			     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6781 			 s--;
6782 			 TOKEN(0);
6783 		     }
6784 	             Mop(OP_DIVIDE);
6785 	         }
6786 	     }
6787 	 }
6788 	 else {
6789 	     /* Disable warning on "study /blah/" */
6790 	     if (PL_oldoldbufptr == PL_last_uni
6791 	      && (*PL_last_uni != 's' || s - PL_last_uni < 5
6792 	          || memNE(PL_last_uni, "study", 5)
6793 	          || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6794 	      ))
6795 	         check_uni();
6796 	     if (*s == '?')
6797 		 deprecate("?PATTERN? without explicit operator");
6798 	     s = scan_pat(s,OP_MATCH);
6799 	     TERM(sublex_start());
6800 	 }
6801 
6802     case '.':
6803 	if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6804 #ifdef PERL_STRICT_CR
6805 	    && s[1] == '\n'
6806 #else
6807 	    && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6808 #endif
6809 	    && (s == PL_linestart || s[-1] == '\n') )
6810 	{
6811 	    PL_expect = XSTATE;
6812 	    formbrack = 2; /* dot seen where arguments expected */
6813 	    goto rightbracket;
6814 	}
6815 	if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6816 	    s += 3;
6817 	    OPERATOR(YADAYADA);
6818 	}
6819 	if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6820 	    char tmp = *s++;
6821 	    if (*s == tmp) {
6822 		if (!PL_lex_allbrackets &&
6823 			PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6824 		    s--;
6825 		    TOKEN(0);
6826 		}
6827 		s++;
6828 		if (*s == tmp) {
6829 		    s++;
6830 		    pl_yylval.ival = OPf_SPECIAL;
6831 		}
6832 		else
6833 		    pl_yylval.ival = 0;
6834 		OPERATOR(DOTDOT);
6835 	    }
6836 	    if (*s == '=' && !PL_lex_allbrackets &&
6837 		    PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6838 		s--;
6839 		TOKEN(0);
6840 	    }
6841 	    Aop(OP_CONCAT);
6842 	}
6843 	/* FALL THROUGH */
6844     case '0': case '1': case '2': case '3': case '4':
6845     case '5': case '6': case '7': case '8': case '9':
6846 	s = scan_num(s, &pl_yylval);
6847 	DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6848 	if (PL_expect == XOPERATOR)
6849 	    no_op("Number",s);
6850 	TERM(THING);
6851 
6852     case '\'':
6853 	s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6854 	if (!s)
6855 	    missingterm(NULL);
6856 	COPLINE_SET_FROM_MULTI_END;
6857 	DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6858 	if (PL_expect == XOPERATOR) {
6859 	    if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6860 		return deprecate_commaless_var_list();
6861 	    }
6862 	    else
6863 		no_op("String",s);
6864 	}
6865 	pl_yylval.ival = OP_CONST;
6866 	TERM(sublex_start());
6867 
6868     case '"':
6869 	s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6870 	DEBUG_T( {
6871 	    if (s)
6872 		printbuf("### Saw string before %s\n", s);
6873 	    else
6874 		PerlIO_printf(Perl_debug_log,
6875 			     "### Saw unterminated string\n");
6876 	} );
6877 	if (PL_expect == XOPERATOR) {
6878 	    if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6879 		return deprecate_commaless_var_list();
6880 	    }
6881 	    else
6882 		no_op("String",s);
6883 	}
6884 	if (!s)
6885 	    missingterm(NULL);
6886 	pl_yylval.ival = OP_CONST;
6887 	/* FIXME. I think that this can be const if char *d is replaced by
6888 	   more localised variables.  */
6889 	for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6890 	    if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6891 		pl_yylval.ival = OP_STRINGIFY;
6892 		break;
6893 	    }
6894 	}
6895 	if (pl_yylval.ival == OP_CONST)
6896 	    COPLINE_SET_FROM_MULTI_END;
6897 	TERM(sublex_start());
6898 
6899     case '`':
6900 	s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6901 	DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6902 	if (PL_expect == XOPERATOR)
6903 	    no_op("Backticks",s);
6904 	if (!s)
6905 	    missingterm(NULL);
6906 	pl_yylval.ival = OP_BACKTICK;
6907 	TERM(sublex_start());
6908 
6909     case '\\':
6910 	s++;
6911 	if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6912 	 && isDIGIT(*s))
6913 	    Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6914 			   *s, *s);
6915 	if (PL_expect == XOPERATOR)
6916 	    no_op("Backslash",s);
6917 	OPERATOR(REFGEN);
6918 
6919     case 'v':
6920 	if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6921 	    char *start = s + 2;
6922 	    while (isDIGIT(*start) || *start == '_')
6923 		start++;
6924 	    if (*start == '.' && isDIGIT(start[1])) {
6925 		s = scan_num(s, &pl_yylval);
6926 		TERM(THING);
6927 	    }
6928 	    else if ((*start == ':' && start[1] == ':')
6929 		  || (PL_expect == XSTATE && *start == ':'))
6930 		goto keylookup;
6931 	    else if (PL_expect == XSTATE) {
6932 		d = start;
6933 		while (d < PL_bufend && isSPACE(*d)) d++;
6934 		if (*d == ':') goto keylookup;
6935 	    }
6936 	    /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6937 	    if (!isALPHA(*start) && (PL_expect == XTERM
6938 			|| PL_expect == XREF || PL_expect == XSTATE
6939 			|| PL_expect == XTERMORDORDOR)) {
6940 		GV *const gv = gv_fetchpvn_flags(s, start - s,
6941                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6942 		if (!gv) {
6943 		    s = scan_num(s, &pl_yylval);
6944 		    TERM(THING);
6945 		}
6946 	    }
6947 	}
6948 	goto keylookup;
6949     case 'x':
6950 	if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6951 	    s++;
6952 	    Mop(OP_REPEAT);
6953 	}
6954 	goto keylookup;
6955 
6956     case '_':
6957     case 'a': case 'A':
6958     case 'b': case 'B':
6959     case 'c': case 'C':
6960     case 'd': case 'D':
6961     case 'e': case 'E':
6962     case 'f': case 'F':
6963     case 'g': case 'G':
6964     case 'h': case 'H':
6965     case 'i': case 'I':
6966     case 'j': case 'J':
6967     case 'k': case 'K':
6968     case 'l': case 'L':
6969     case 'm': case 'M':
6970     case 'n': case 'N':
6971     case 'o': case 'O':
6972     case 'p': case 'P':
6973     case 'q': case 'Q':
6974     case 'r': case 'R':
6975     case 's': case 'S':
6976     case 't': case 'T':
6977     case 'u': case 'U':
6978 	      case 'V':
6979     case 'w': case 'W':
6980 	      case 'X':
6981     case 'y': case 'Y':
6982     case 'z': case 'Z':
6983 
6984       keylookup: {
6985 	bool anydelim;
6986 	bool lex;
6987 	I32 tmp;
6988 	SV *sv;
6989 	CV *cv;
6990 	PADOFFSET off;
6991 	OP *rv2cv_op;
6992 
6993 	lex = FALSE;
6994 	orig_keyword = 0;
6995 	off = 0;
6996 	sv = NULL;
6997 	cv = NULL;
6998 	gv = NULL;
6999 	gvp = NULL;
7000 	rv2cv_op = NULL;
7001 
7002 	PL_bufptr = s;
7003 	s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7004 
7005 	/* Some keywords can be followed by any delimiter, including ':' */
7006 	anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
7007 
7008 	/* x::* is just a word, unless x is "CORE" */
7009 	if (!anydelim && *s == ':' && s[1] == ':') {
7010 	    if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
7011 	    goto just_a_word;
7012 	}
7013 
7014 	d = s;
7015 	while (d < PL_bufend && isSPACE(*d))
7016 		d++;	/* no comments skipped here, or s### is misparsed */
7017 
7018 	/* Is this a word before a => operator? */
7019 	if (*d == '=' && d[1] == '>') {
7020 	  fat_arrow:
7021 	    CLINE;
7022 	    pl_yylval.opval
7023 		= (OP*)newSVOP(OP_CONST, 0,
7024 			       S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7025 	    pl_yylval.opval->op_private = OPpCONST_BARE;
7026 	    TERM(WORD);
7027 	}
7028 
7029 	/* Check for plugged-in keyword */
7030 	{
7031 	    OP *o;
7032 	    int result;
7033 	    char *saved_bufptr = PL_bufptr;
7034 	    PL_bufptr = s;
7035 	    result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7036 	    s = PL_bufptr;
7037 	    if (result == KEYWORD_PLUGIN_DECLINE) {
7038 		/* not a plugged-in keyword */
7039 		PL_bufptr = saved_bufptr;
7040 	    } else if (result == KEYWORD_PLUGIN_STMT) {
7041 		pl_yylval.opval = o;
7042 		CLINE;
7043 		PL_expect = XSTATE;
7044 		return REPORT(PLUGSTMT);
7045 	    } else if (result == KEYWORD_PLUGIN_EXPR) {
7046 		pl_yylval.opval = o;
7047 		CLINE;
7048 		PL_expect = XOPERATOR;
7049 		return REPORT(PLUGEXPR);
7050 	    } else {
7051 		Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7052 					PL_tokenbuf);
7053 	    }
7054 	}
7055 
7056 	/* Check for built-in keyword */
7057 	tmp = keyword(PL_tokenbuf, len, 0);
7058 
7059 	/* Is this a label? */
7060 	if (!anydelim && PL_expect == XSTATE
7061 	      && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7062 	    s = d + 1;
7063 	    pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7064 	    pl_yylval.pval[len] = '\0';
7065 	    pl_yylval.pval[len+1] = UTF ? 1 : 0;
7066 	    CLINE;
7067 	    TOKEN(LABEL);
7068 	}
7069 
7070 	/* Check for lexical sub */
7071 	if (PL_expect != XOPERATOR) {
7072 	    char tmpbuf[sizeof PL_tokenbuf + 1];
7073 	    *tmpbuf = '&';
7074 	    Copy(PL_tokenbuf, tmpbuf+1, len, char);
7075 	    off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
7076 	    if (off != NOT_IN_PAD) {
7077 		assert(off); /* we assume this is boolean-true below */
7078 		if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7079 		    HV *  const stash = PAD_COMPNAME_OURSTASH(off);
7080 		    HEK * const stashname = HvNAME_HEK(stash);
7081 		    sv = newSVhek(stashname);
7082                     sv_catpvs(sv, "::");
7083                     sv_catpvn_flags(sv, PL_tokenbuf, len,
7084 				    (UTF ? SV_CATUTF8 : SV_CATBYTES));
7085 		    gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7086 				    SVt_PVCV);
7087 		    off = 0;
7088 		    if (!gv) {
7089 			sv_free(sv);
7090 			sv = NULL;
7091 			goto just_a_word;
7092 		    }
7093 		}
7094 		else {
7095 		    rv2cv_op = newOP(OP_PADANY, 0);
7096 		    rv2cv_op->op_targ = off;
7097 		    cv = find_lexical_cv(off);
7098 		}
7099 		lex = TRUE;
7100 		goto just_a_word;
7101 	    }
7102 	    off = 0;
7103 	}
7104 
7105 	if (tmp < 0) {			/* second-class keyword? */
7106 	    GV *ogv = NULL;	/* override (winner) */
7107 	    GV *hgv = NULL;	/* hidden (loser) */
7108 	    if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7109 		CV *cv;
7110 		if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7111 					    (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
7112 					    SVt_PVCV)) &&
7113 		    (cv = GvCVu(gv)))
7114 		{
7115 		    if (GvIMPORTED_CV(gv))
7116 			ogv = gv;
7117 		    else if (! CvMETHOD(cv))
7118 			hgv = gv;
7119 		}
7120 		if (!ogv &&
7121 		    (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7122 					  len, FALSE)) &&
7123 		    (gv = *gvp) && (
7124 			isGV_with_GP(gv)
7125 			    ? GvCVu(gv) && GvIMPORTED_CV(gv)
7126 			    :   SvPCS_IMPORTED(gv)
7127 			     && (gv_init(gv, PL_globalstash, PL_tokenbuf,
7128 					 len, 0), 1)
7129 		   ))
7130 		{
7131 		    ogv = gv;
7132 		}
7133 	    }
7134 	    if (ogv) {
7135 		orig_keyword = tmp;
7136 		tmp = 0;		/* overridden by import or by GLOBAL */
7137 	    }
7138 	    else if (gv && !gvp
7139 		     && -tmp==KEY_lock	/* XXX generalizable kludge */
7140 		     && GvCVu(gv))
7141 	    {
7142 		tmp = 0;		/* any sub overrides "weak" keyword */
7143 	    }
7144 	    else {			/* no override */
7145 		tmp = -tmp;
7146 		if (tmp == KEY_dump) {
7147 		    Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7148 				   "dump() better written as CORE::dump()");
7149 		}
7150 		gv = NULL;
7151 		gvp = 0;
7152 		if (hgv && tmp != KEY_x)	/* never ambiguous */
7153 		    Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7154 				   "Ambiguous call resolved as CORE::%s(), "
7155 				   "qualify as such or use &",
7156 				   GvENAME(hgv));
7157 	    }
7158 	}
7159 
7160 	if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7161 	 && (!anydelim || *s != '#')) {
7162 	    /* no override, and not s### either; skipspace is safe here
7163 	     * check for => on following line */
7164 	    bool arrow;
7165 	    STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7166 	    STRLEN   soff = s         - SvPVX(PL_linestr);
7167 	    s = skipspace_flags(s, LEX_NO_INCLINE);
7168 	    arrow = *s == '=' && s[1] == '>';
7169 	    PL_bufptr = SvPVX(PL_linestr) + bufoff;
7170 	    s         = SvPVX(PL_linestr) +   soff;
7171 	    if (arrow)
7172 		goto fat_arrow;
7173 	}
7174 
7175       reserved_word:
7176 	switch (tmp) {
7177 
7178 	default:			/* not a keyword */
7179 	    /* Trade off - by using this evil construction we can pull the
7180 	       variable gv into the block labelled keylookup. If not, then
7181 	       we have to give it function scope so that the goto from the
7182 	       earlier ':' case doesn't bypass the initialisation.  */
7183 	    if (0) {
7184 	    just_a_word_zero_gv:
7185 		sv = NULL;
7186 		cv = NULL;
7187 		gv = NULL;
7188 		gvp = NULL;
7189 		rv2cv_op = NULL;
7190 		orig_keyword = 0;
7191 		lex = 0;
7192 		off = 0;
7193 	    }
7194 	  just_a_word: {
7195 		int pkgname = 0;
7196 		const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7197 		const char penultchar =
7198 		    lastchar && PL_bufptr - 2 >= PL_linestart
7199 			 ? PL_bufptr[-2]
7200 			 : 0;
7201 #ifdef PERL_MAD
7202 		SV *nextPL_nextwhite = 0;
7203 #endif
7204 
7205 
7206 		/* Get the rest if it looks like a package qualifier */
7207 
7208 		if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7209 		    STRLEN morelen;
7210 		    s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7211 				  TRUE, &morelen);
7212 		    if (!morelen)
7213 			Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
7214 				UTF8fARG(UTF, len, PL_tokenbuf),
7215 				*s == '\'' ? "'" : "::");
7216 		    len += morelen;
7217 		    pkgname = 1;
7218 		}
7219 
7220 		if (PL_expect == XOPERATOR) {
7221 		    if (PL_bufptr == PL_linestart) {
7222 			CopLINE_dec(PL_curcop);
7223 			Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7224 			CopLINE_inc(PL_curcop);
7225 		    }
7226 		    else
7227 			no_op("Bareword",s);
7228 		}
7229 
7230 		/* Look for a subroutine with this name in current package,
7231 		   unless this is a lexical sub, or name is "Foo::",
7232 		   in which case Foo is a bareword
7233 		   (and a package name). */
7234 
7235 		if (len > 2 && !PL_madskills &&
7236 		    PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7237 		{
7238 		    if (ckWARN(WARN_BAREWORD)
7239 			&& ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7240 			Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7241 		  	  "Bareword \"%"UTF8f"\" refers to nonexistent package",
7242 			   UTF8fARG(UTF, len, PL_tokenbuf));
7243 		    len -= 2;
7244 		    PL_tokenbuf[len] = '\0';
7245 		    gv = NULL;
7246 		    gvp = 0;
7247 		}
7248 		else {
7249 		    if (!lex && !gv) {
7250 			/* Mustn't actually add anything to a symbol table.
7251 			   But also don't want to "initialise" any placeholder
7252 			   constants that might already be there into full
7253 			   blown PVGVs with attached PVCV.  */
7254 			gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7255 					       GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7256 					       SVt_PVCV);
7257 		    }
7258 		    len = 0;
7259 		}
7260 
7261 		/* if we saw a global override before, get the right name */
7262 
7263 		if (!sv)
7264 		  sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7265 		    len ? len : strlen(PL_tokenbuf));
7266 		if (gvp) {
7267 		    SV * const tmp_sv = sv;
7268 		    sv = newSVpvs("CORE::GLOBAL::");
7269 		    sv_catsv(sv, tmp_sv);
7270 		    SvREFCNT_dec(tmp_sv);
7271 		}
7272 
7273 #ifdef PERL_MAD
7274 		if (PL_madskills && !PL_thistoken) {
7275 		    char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7276 		    PL_thistoken = newSVpvn(start,s - start);
7277 		    PL_realtokenstart = s - SvPVX(PL_linestr);
7278 		}
7279 #endif
7280 
7281 		/* Presume this is going to be a bareword of some sort. */
7282 		CLINE;
7283 		pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7284 		pl_yylval.opval->op_private = OPpCONST_BARE;
7285 
7286 		/* And if "Foo::", then that's what it certainly is. */
7287 		if (len)
7288 		    goto safe_bareword;
7289 
7290 		if (!off)
7291 		{
7292 		    OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7293 		    const_op->op_private = OPpCONST_BARE;
7294 		    rv2cv_op = newCVREF(0, const_op);
7295 		    cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7296 		}
7297 
7298 		/* See if it's the indirect object for a list operator. */
7299 
7300 		if (PL_oldoldbufptr &&
7301 		    PL_oldoldbufptr < PL_bufptr &&
7302 		    (PL_oldoldbufptr == PL_last_lop
7303 		     || PL_oldoldbufptr == PL_last_uni) &&
7304 		    /* NO SKIPSPACE BEFORE HERE! */
7305 		    (PL_expect == XREF ||
7306 		     ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7307 		{
7308 		    bool immediate_paren = *s == '(';
7309 
7310 		    /* (Now we can afford to cross potential line boundary.) */
7311 		    s = SKIPSPACE2(s,nextPL_nextwhite);
7312 #ifdef PERL_MAD
7313 		    PL_nextwhite = nextPL_nextwhite;	/* assume no & deception */
7314 #endif
7315 
7316 		    /* Two barewords in a row may indicate method call. */
7317 
7318 		    if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7319 			(tmp = intuit_method(s, gv, cv))) {
7320 			op_free(rv2cv_op);
7321 			if (tmp == METHOD && !PL_lex_allbrackets &&
7322 				PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7323 			    PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7324 			return REPORT(tmp);
7325 		    }
7326 
7327 		    /* If not a declared subroutine, it's an indirect object. */
7328 		    /* (But it's an indir obj regardless for sort.) */
7329 		    /* Also, if "_" follows a filetest operator, it's a bareword */
7330 
7331 		    if (
7332 			( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7333                          (!cv &&
7334                         (PL_last_lop_op != OP_MAPSTART &&
7335 			 PL_last_lop_op != OP_GREPSTART))))
7336 		       || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7337 			    && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7338 		       )
7339 		    {
7340 			PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7341 			goto bareword;
7342 		    }
7343 		}
7344 
7345 		PL_expect = XOPERATOR;
7346 #ifdef PERL_MAD
7347 		if (isSPACE(*s))
7348 		    s = SKIPSPACE2(s,nextPL_nextwhite);
7349 		PL_nextwhite = nextPL_nextwhite;
7350 #else
7351 		s = skipspace(s);
7352 #endif
7353 
7354 		/* Is this a word before a => operator? */
7355 		if (*s == '=' && s[1] == '>' && !pkgname) {
7356 		    op_free(rv2cv_op);
7357 		    CLINE;
7358 		    /* This is our own scalar, created a few lines above,
7359 		       so this is safe. */
7360 		    SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
7361 		    sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7362 		    if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7363 		      SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7364 		    SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
7365 		    TERM(WORD);
7366 		}
7367 
7368 		/* If followed by a paren, it's certainly a subroutine. */
7369 		if (*s == '(') {
7370 		    CLINE;
7371 		    if (cv) {
7372 			d = s + 1;
7373 			while (SPACE_OR_TAB(*d))
7374 			    d++;
7375 			if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7376 			    s = d + 1;
7377 			    goto its_constant;
7378 			}
7379 		    }
7380 #ifdef PERL_MAD
7381 		    if (PL_madskills) {
7382 			PL_nextwhite = PL_thiswhite;
7383 			PL_thiswhite = 0;
7384 		    }
7385 		    start_force(PL_curforce);
7386 #endif
7387 		    NEXTVAL_NEXTTOKE.opval =
7388 			off ? rv2cv_op : pl_yylval.opval;
7389 		    PL_expect = XOPERATOR;
7390 #ifdef PERL_MAD
7391 		    if (PL_madskills) {
7392 			PL_nextwhite = nextPL_nextwhite;
7393 			curmad('X', PL_thistoken);
7394 			PL_thistoken = newSVpvs("");
7395 		    }
7396 #endif
7397 		    if (off)
7398 			 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7399 		    else op_free(rv2cv_op),	   force_next(WORD);
7400 		    pl_yylval.ival = 0;
7401 		    TOKEN('&');
7402 		}
7403 
7404 		/* If followed by var or block, call it a method (unless sub) */
7405 
7406 		if ((*s == '$' || *s == '{') && !cv) {
7407 		    op_free(rv2cv_op);
7408 		    PL_last_lop = PL_oldbufptr;
7409 		    PL_last_lop_op = OP_METHOD;
7410 		    if (!PL_lex_allbrackets &&
7411 			    PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7412 			PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7413 		    PREBLOCK(METHOD);
7414 		}
7415 
7416 		/* If followed by a bareword, see if it looks like indir obj. */
7417 
7418 		if (!orig_keyword
7419 			&& (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7420 			&& (tmp = intuit_method(s, gv, cv))) {
7421 		    op_free(rv2cv_op);
7422 		    if (tmp == METHOD && !PL_lex_allbrackets &&
7423 			    PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7424 			PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7425 		    return REPORT(tmp);
7426 		}
7427 
7428 		/* Not a method, so call it a subroutine (if defined) */
7429 
7430 		if (cv) {
7431 		    if (lastchar == '-' && penultchar != '-') {
7432 			const STRLEN l = len ? len : strlen(PL_tokenbuf);
7433  			Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7434 			    "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
7435 			     UTF8fARG(UTF, l, PL_tokenbuf),
7436 			     UTF8fARG(UTF, l, PL_tokenbuf));
7437                     }
7438 		    /* Check for a constant sub */
7439 		    if ((sv = cv_const_sv_or_av(cv))) {
7440 		  its_constant:
7441 			op_free(rv2cv_op);
7442 			SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7443 			((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7444 			if (SvTYPE(sv) == SVt_PVAV)
7445 			    pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7446 						      pl_yylval.opval);
7447 			else {
7448 			    pl_yylval.opval->op_private = 0;
7449 			    pl_yylval.opval->op_folded = 1;
7450 			    pl_yylval.opval->op_flags |= OPf_SPECIAL;
7451 			}
7452 			TOKEN(WORD);
7453 		    }
7454 
7455 		    op_free(pl_yylval.opval);
7456 		    pl_yylval.opval =
7457 			off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7458 		    pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7459 		    PL_last_lop = PL_oldbufptr;
7460 		    PL_last_lop_op = OP_ENTERSUB;
7461 		    /* Is there a prototype? */
7462 		    if (
7463 #ifdef PERL_MAD
7464 			cv &&
7465 #endif
7466 			SvPOK(cv))
7467 		    {
7468 			STRLEN protolen = CvPROTOLEN(cv);
7469 			const char *proto = CvPROTO(cv);
7470 			bool optional;
7471 			proto = S_strip_spaces(aTHX_ proto, &protolen);
7472 			if (!protolen)
7473 			    TERM(FUNC0SUB);
7474 			if ((optional = *proto == ';'))
7475 			  do
7476 			    proto++;
7477 			  while (*proto == ';');
7478 			if (
7479 			    (
7480 			        (
7481 			            *proto == '$' || *proto == '_'
7482 			         || *proto == '*' || *proto == '+'
7483 			        )
7484 			     && proto[1] == '\0'
7485 			    )
7486 			 || (
7487 			     *proto == '\\' && proto[1] && proto[2] == '\0'
7488 			    )
7489 			)
7490 			    UNIPROTO(UNIOPSUB,optional);
7491 			if (*proto == '\\' && proto[1] == '[') {
7492 			    const char *p = proto + 2;
7493 			    while(*p && *p != ']')
7494 				++p;
7495 			    if(*p == ']' && !p[1])
7496 				UNIPROTO(UNIOPSUB,optional);
7497 			}
7498 			if (*proto == '&' && *s == '{') {
7499 			    if (PL_curstash)
7500 				sv_setpvs(PL_subname, "__ANON__");
7501 			    else
7502 				sv_setpvs(PL_subname, "__ANON__::__ANON__");
7503 			    if (!PL_lex_allbrackets &&
7504 				    PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7505 				PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7506 			    PREBLOCK(LSTOPSUB);
7507 			}
7508 		    }
7509 #ifdef PERL_MAD
7510 		    {
7511 			if (PL_madskills) {
7512 			    PL_nextwhite = PL_thiswhite;
7513 			    PL_thiswhite = 0;
7514 			}
7515 			start_force(PL_curforce);
7516 			NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7517 			PL_expect = XTERM;
7518 			if (PL_madskills) {
7519 			    PL_nextwhite = nextPL_nextwhite;
7520 			    curmad('X', PL_thistoken);
7521 			    PL_thistoken = newSVpvs("");
7522 			}
7523 			force_next(off ? PRIVATEREF : WORD);
7524 			if (!PL_lex_allbrackets &&
7525 				PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7526 			    PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7527 			TOKEN(NOAMP);
7528 		    }
7529 		}
7530 
7531 		/* Guess harder when madskills require "best effort". */
7532 		if (PL_madskills && (!gv || !GvCVu(gv))) {
7533 		    int probable_sub = 0;
7534 		    if (strchr("\"'`$@%0123456789!*+{[<", *s))
7535 			probable_sub = 1;
7536 		    else if (isALPHA(*s)) {
7537 			char tmpbuf[1024];
7538 			STRLEN tmplen;
7539 			d = s;
7540 			d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7541 			if (!keyword(tmpbuf, tmplen, 0))
7542 			    probable_sub = 1;
7543 			else {
7544 			    while (d < PL_bufend && isSPACE(*d))
7545 				d++;
7546 			    if (*d == '=' && d[1] == '>')
7547 				probable_sub = 1;
7548 			}
7549 		    }
7550 		    if (probable_sub) {
7551 			gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7552                                         SVt_PVCV);
7553 			op_free(pl_yylval.opval);
7554 			pl_yylval.opval =
7555 			    off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7556 			pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7557 			PL_last_lop = PL_oldbufptr;
7558 			PL_last_lop_op = OP_ENTERSUB;
7559 			PL_nextwhite = PL_thiswhite;
7560 			PL_thiswhite = 0;
7561 			start_force(PL_curforce);
7562 			NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7563 			PL_expect = XTERM;
7564 			PL_nextwhite = nextPL_nextwhite;
7565 			curmad('X', PL_thistoken);
7566 			PL_thistoken = newSVpvs("");
7567 			force_next(off ? PRIVATEREF : WORD);
7568 			if (!PL_lex_allbrackets &&
7569 				PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7570 			    PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7571 			TOKEN(NOAMP);
7572 		    }
7573 #else
7574 		    NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7575 		    PL_expect = XTERM;
7576 		    force_next(off ? PRIVATEREF : WORD);
7577 		    if (!PL_lex_allbrackets &&
7578 			    PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7579 			PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7580 		    TOKEN(NOAMP);
7581 #endif
7582 		}
7583 
7584 		/* Call it a bare word */
7585 
7586 		if (PL_hints & HINT_STRICT_SUBS)
7587 		    pl_yylval.opval->op_private |= OPpCONST_STRICT;
7588 		else {
7589 		bareword:
7590 		    /* after "print" and similar functions (corresponding to
7591 		     * "F? L" in opcode.pl), whatever wasn't already parsed as
7592 		     * a filehandle should be subject to "strict subs".
7593 		     * Likewise for the optional indirect-object argument to system
7594 		     * or exec, which can't be a bareword */
7595 		    if ((PL_last_lop_op == OP_PRINT
7596 			    || PL_last_lop_op == OP_PRTF
7597 			    || PL_last_lop_op == OP_SAY
7598 			    || PL_last_lop_op == OP_SYSTEM
7599 			    || PL_last_lop_op == OP_EXEC)
7600 			    && (PL_hints & HINT_STRICT_SUBS))
7601 			pl_yylval.opval->op_private |= OPpCONST_STRICT;
7602 		    if (lastchar != '-') {
7603 			if (ckWARN(WARN_RESERVED)) {
7604 			    d = PL_tokenbuf;
7605 			    while (isLOWER(*d))
7606 				d++;
7607 			    if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7608                             {
7609                                 /* PL_warn_reserved is constant */
7610                                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7611 				Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7612 				       PL_tokenbuf);
7613                                 GCC_DIAG_RESTORE;
7614                             }
7615 			}
7616 		    }
7617 		}
7618 		op_free(rv2cv_op);
7619 
7620 	    safe_bareword:
7621 		if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7622 		 && saw_infix_sigil) {
7623 		    Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7624 				     "Operator or semicolon missing before %c%"UTF8f,
7625 				     lastchar,
7626 				     UTF8fARG(UTF, strlen(PL_tokenbuf),
7627 					      PL_tokenbuf));
7628 		    Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7629 				     "Ambiguous use of %c resolved as operator %c",
7630 				     lastchar, lastchar);
7631 		}
7632 		TOKEN(WORD);
7633 	    }
7634 
7635 	case KEY___FILE__:
7636 	    FUN0OP(
7637 		(OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7638 	    );
7639 
7640 	case KEY___LINE__:
7641 	    FUN0OP(
7642         	(OP*)newSVOP(OP_CONST, 0,
7643 		    Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7644 	    );
7645 
7646 	case KEY___PACKAGE__:
7647 	    FUN0OP(
7648 		(OP*)newSVOP(OP_CONST, 0,
7649 					(PL_curstash
7650 					 ? newSVhek(HvNAME_HEK(PL_curstash))
7651 					 : &PL_sv_undef))
7652 	    );
7653 
7654 	case KEY___DATA__:
7655 	case KEY___END__: {
7656 	    GV *gv;
7657 	    if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7658 		HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7659 					? PL_curstash
7660 					: PL_defstash;
7661 		gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7662 		if (!isGV(gv))
7663 		    gv_init(gv,stash,"DATA",4,0);
7664 		GvMULTI_on(gv);
7665 		if (!GvIO(gv))
7666 		    GvIOp(gv) = newIO();
7667 		IoIFP(GvIOp(gv)) = PL_rsfp;
7668 #if defined(HAS_FCNTL) && defined(F_SETFD)
7669 		{
7670 		    const int fd = PerlIO_fileno(PL_rsfp);
7671 		    fcntl(fd,F_SETFD,fd >= 3);
7672 		}
7673 #endif
7674 		/* Mark this internal pseudo-handle as clean */
7675 		IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7676 		if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7677 		    IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7678 		else
7679 		    IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7680 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7681 		/* if the script was opened in binmode, we need to revert
7682 		 * it to text mode for compatibility; but only iff it has CRs
7683 		 * XXX this is a questionable hack at best. */
7684 		if (PL_bufend-PL_bufptr > 2
7685 		    && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7686 		{
7687 		    Off_t loc = 0;
7688 		    if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7689 			loc = PerlIO_tell(PL_rsfp);
7690 			(void)PerlIO_seek(PL_rsfp, 0L, 0);
7691 		    }
7692 #ifdef NETWARE
7693 			if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7694 #else
7695 		    if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7696 #endif	/* NETWARE */
7697 			if (loc > 0)
7698 			    PerlIO_seek(PL_rsfp, loc, 0);
7699 		    }
7700 		}
7701 #endif
7702 #ifdef PERLIO_LAYERS
7703 		if (!IN_BYTES) {
7704 		    if (UTF)
7705 			PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7706 		    else if (PL_encoding) {
7707 			SV *name;
7708 			dSP;
7709 			ENTER;
7710 			SAVETMPS;
7711 			PUSHMARK(sp);
7712 			XPUSHs(PL_encoding);
7713 			PUTBACK;
7714 			call_method("name", G_SCALAR);
7715 			SPAGAIN;
7716 			name = POPs;
7717 			PUTBACK;
7718 			PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7719 					    Perl_form(aTHX_ ":encoding(%"SVf")",
7720 						      SVfARG(name)));
7721 			FREETMPS;
7722 			LEAVE;
7723 		    }
7724 		}
7725 #endif
7726 #ifdef PERL_MAD
7727 		if (PL_madskills) {
7728 		    if (PL_realtokenstart >= 0) {
7729 			char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7730 			if (!PL_endwhite)
7731 			    PL_endwhite = newSVpvs("");
7732 			sv_catsv(PL_endwhite, PL_thiswhite);
7733 			PL_thiswhite = 0;
7734 			sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7735 			PL_realtokenstart = -1;
7736 		    }
7737 		    while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7738 			   != NULL) ;
7739 		}
7740 #endif
7741 		PL_rsfp = NULL;
7742 	    }
7743 	    goto fake_eof;
7744 	}
7745 
7746 	case KEY___SUB__:
7747 	    FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7748 
7749 	case KEY_AUTOLOAD:
7750 	case KEY_DESTROY:
7751 	case KEY_BEGIN:
7752 	case KEY_UNITCHECK:
7753 	case KEY_CHECK:
7754 	case KEY_INIT:
7755 	case KEY_END:
7756 	    if (PL_expect == XSTATE) {
7757 		s = PL_bufptr;
7758 		goto really_sub;
7759 	    }
7760 	    goto just_a_word;
7761 
7762 	case_KEY_CORE:
7763 	    {
7764 		STRLEN olen = len;
7765 		d = s;
7766 		s += 2;
7767 		s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7768 		if ((*s == ':' && s[1] == ':')
7769 		 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7770 		{
7771 		    s = d;
7772 		    len = olen;
7773 		    Copy(PL_bufptr, PL_tokenbuf, olen, char);
7774 		    goto just_a_word;
7775 		}
7776 		if (!tmp)
7777 		    Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7778 				      UTF8fARG(UTF, len, PL_tokenbuf));
7779 		if (tmp < 0)
7780 		    tmp = -tmp;
7781 		else if (tmp == KEY_require || tmp == KEY_do
7782 		      || tmp == KEY_glob)
7783 		    /* that's a way to remember we saw "CORE::" */
7784 		    orig_keyword = tmp;
7785 		goto reserved_word;
7786 	    }
7787 
7788 	case KEY_abs:
7789 	    UNI(OP_ABS);
7790 
7791 	case KEY_alarm:
7792 	    UNI(OP_ALARM);
7793 
7794 	case KEY_accept:
7795 	    LOP(OP_ACCEPT,XTERM);
7796 
7797 	case KEY_and:
7798 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7799 		return REPORT(0);
7800 	    OPERATOR(ANDOP);
7801 
7802 	case KEY_atan2:
7803 	    LOP(OP_ATAN2,XTERM);
7804 
7805 	case KEY_bind:
7806 	    LOP(OP_BIND,XTERM);
7807 
7808 	case KEY_binmode:
7809 	    LOP(OP_BINMODE,XTERM);
7810 
7811 	case KEY_bless:
7812 	    LOP(OP_BLESS,XTERM);
7813 
7814 	case KEY_break:
7815 	    FUN0(OP_BREAK);
7816 
7817 	case KEY_chop:
7818 	    UNI(OP_CHOP);
7819 
7820 	case KEY_continue:
7821 		    /* We have to disambiguate the two senses of
7822 		      "continue". If the next token is a '{' then
7823 		      treat it as the start of a continue block;
7824 		      otherwise treat it as a control operator.
7825 		     */
7826 		    s = skipspace(s);
7827 		    if (*s == '{')
7828 	    PREBLOCK(CONTINUE);
7829 		    else
7830 			FUN0(OP_CONTINUE);
7831 
7832 	case KEY_chdir:
7833 	    /* may use HOME */
7834 	    (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7835 	    UNI(OP_CHDIR);
7836 
7837 	case KEY_close:
7838 	    UNI(OP_CLOSE);
7839 
7840 	case KEY_closedir:
7841 	    UNI(OP_CLOSEDIR);
7842 
7843 	case KEY_cmp:
7844 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7845 		return REPORT(0);
7846 	    Eop(OP_SCMP);
7847 
7848 	case KEY_caller:
7849 	    UNI(OP_CALLER);
7850 
7851 	case KEY_crypt:
7852 #ifdef FCRYPT
7853 	    if (!PL_cryptseen) {
7854 		PL_cryptseen = TRUE;
7855 		init_des();
7856 	    }
7857 #endif
7858 	    LOP(OP_CRYPT,XTERM);
7859 
7860 	case KEY_chmod:
7861 	    LOP(OP_CHMOD,XTERM);
7862 
7863 	case KEY_chown:
7864 	    LOP(OP_CHOWN,XTERM);
7865 
7866 	case KEY_connect:
7867 	    LOP(OP_CONNECT,XTERM);
7868 
7869 	case KEY_chr:
7870 	    UNI(OP_CHR);
7871 
7872 	case KEY_cos:
7873 	    UNI(OP_COS);
7874 
7875 	case KEY_chroot:
7876 	    UNI(OP_CHROOT);
7877 
7878 	case KEY_default:
7879 	    PREBLOCK(DEFAULT);
7880 
7881 	case KEY_do:
7882 	    s = SKIPSPACE1(s);
7883 	    if (*s == '{')
7884 		PRETERMBLOCK(DO);
7885 	    if (*s != '\'') {
7886 		*PL_tokenbuf = '&';
7887 		d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7888 			      1, &len);
7889 		if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7890 		 && !keyword(PL_tokenbuf + 1, len, 0)) {
7891 		    d = SKIPSPACE1(d);
7892 		    if (*d == '(') {
7893 			force_ident_maybe_lex('&');
7894 			s = d;
7895 		    }
7896 		}
7897 	    }
7898 	    if (orig_keyword == KEY_do) {
7899 		orig_keyword = 0;
7900 		pl_yylval.ival = 1;
7901 	    }
7902 	    else
7903 		pl_yylval.ival = 0;
7904 	    OPERATOR(DO);
7905 
7906 	case KEY_die:
7907 	    PL_hints |= HINT_BLOCK_SCOPE;
7908 	    LOP(OP_DIE,XTERM);
7909 
7910 	case KEY_defined:
7911 	    UNI(OP_DEFINED);
7912 
7913 	case KEY_delete:
7914 	    UNI(OP_DELETE);
7915 
7916 	case KEY_dbmopen:
7917 	    Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7918 			      STR_WITH_LEN("NDBM_File::"),
7919 			      STR_WITH_LEN("DB_File::"),
7920 			      STR_WITH_LEN("GDBM_File::"),
7921 			      STR_WITH_LEN("SDBM_File::"),
7922 			      STR_WITH_LEN("ODBM_File::"),
7923 			      NULL);
7924 	    LOP(OP_DBMOPEN,XTERM);
7925 
7926 	case KEY_dbmclose:
7927 	    UNI(OP_DBMCLOSE);
7928 
7929 	case KEY_dump:
7930 	    PL_expect = XOPERATOR;
7931 	    s = force_word(s,WORD,TRUE,FALSE);
7932 	    LOOPX(OP_DUMP);
7933 
7934 	case KEY_else:
7935 	    PREBLOCK(ELSE);
7936 
7937 	case KEY_elsif:
7938 	    pl_yylval.ival = CopLINE(PL_curcop);
7939 	    OPERATOR(ELSIF);
7940 
7941 	case KEY_eq:
7942 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7943 		return REPORT(0);
7944 	    Eop(OP_SEQ);
7945 
7946 	case KEY_exists:
7947 	    UNI(OP_EXISTS);
7948 
7949 	case KEY_exit:
7950 	    if (PL_madskills)
7951 		UNI(OP_INT);
7952 	    UNI(OP_EXIT);
7953 
7954 	case KEY_eval:
7955 	    s = SKIPSPACE1(s);
7956 	    if (*s == '{') { /* block eval */
7957 		PL_expect = XTERMBLOCK;
7958 		UNIBRACK(OP_ENTERTRY);
7959 	    }
7960 	    else { /* string eval */
7961 		PL_expect = XTERM;
7962 		UNIBRACK(OP_ENTEREVAL);
7963 	    }
7964 
7965 	case KEY_evalbytes:
7966 	    PL_expect = XTERM;
7967 	    UNIBRACK(-OP_ENTEREVAL);
7968 
7969 	case KEY_eof:
7970 	    UNI(OP_EOF);
7971 
7972 	case KEY_exp:
7973 	    UNI(OP_EXP);
7974 
7975 	case KEY_each:
7976 	    UNI(OP_EACH);
7977 
7978 	case KEY_exec:
7979 	    LOP(OP_EXEC,XREF);
7980 
7981 	case KEY_endhostent:
7982 	    FUN0(OP_EHOSTENT);
7983 
7984 	case KEY_endnetent:
7985 	    FUN0(OP_ENETENT);
7986 
7987 	case KEY_endservent:
7988 	    FUN0(OP_ESERVENT);
7989 
7990 	case KEY_endprotoent:
7991 	    FUN0(OP_EPROTOENT);
7992 
7993 	case KEY_endpwent:
7994 	    FUN0(OP_EPWENT);
7995 
7996 	case KEY_endgrent:
7997 	    FUN0(OP_EGRENT);
7998 
7999 	case KEY_for:
8000 	case KEY_foreach:
8001 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8002 		return REPORT(0);
8003 	    pl_yylval.ival = CopLINE(PL_curcop);
8004 	    s = SKIPSPACE1(s);
8005 	    if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
8006 		char *p = s;
8007 #ifdef PERL_MAD
8008 		int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
8009 #endif
8010 
8011 		if ((PL_bufend - p) >= 3 &&
8012 		    strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
8013 		    p += 2;
8014 		else if ((PL_bufend - p) >= 4 &&
8015 		    strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
8016 		    p += 3;
8017 		p = PEEKSPACE(p);
8018                 /* skip optional package name, as in "for my abc $x (..)" */
8019 		if (isIDFIRST_lazy_if(p,UTF)) {
8020 		    p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8021 		    p = PEEKSPACE(p);
8022 		}
8023 		if (*p != '$')
8024 		    Perl_croak(aTHX_ "Missing $ on loop variable");
8025 #ifdef PERL_MAD
8026 		s = SvPVX(PL_linestr) + soff;
8027 #endif
8028 	    }
8029 	    OPERATOR(FOR);
8030 
8031 	case KEY_formline:
8032 	    LOP(OP_FORMLINE,XTERM);
8033 
8034 	case KEY_fork:
8035 	    FUN0(OP_FORK);
8036 
8037 	case KEY_fc:
8038 	    UNI(OP_FC);
8039 
8040 	case KEY_fcntl:
8041 	    LOP(OP_FCNTL,XTERM);
8042 
8043 	case KEY_fileno:
8044 	    UNI(OP_FILENO);
8045 
8046 	case KEY_flock:
8047 	    LOP(OP_FLOCK,XTERM);
8048 
8049 	case KEY_gt:
8050 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8051 		return REPORT(0);
8052 	    Rop(OP_SGT);
8053 
8054 	case KEY_ge:
8055 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8056 		return REPORT(0);
8057 	    Rop(OP_SGE);
8058 
8059 	case KEY_grep:
8060 	    LOP(OP_GREPSTART, XREF);
8061 
8062 	case KEY_goto:
8063 	    PL_expect = XOPERATOR;
8064 	    s = force_word(s,WORD,TRUE,FALSE);
8065 	    LOOPX(OP_GOTO);
8066 
8067 	case KEY_gmtime:
8068 	    UNI(OP_GMTIME);
8069 
8070 	case KEY_getc:
8071 	    UNIDOR(OP_GETC);
8072 
8073 	case KEY_getppid:
8074 	    FUN0(OP_GETPPID);
8075 
8076 	case KEY_getpgrp:
8077 	    UNI(OP_GETPGRP);
8078 
8079 	case KEY_getpriority:
8080 	    LOP(OP_GETPRIORITY,XTERM);
8081 
8082 	case KEY_getprotobyname:
8083 	    UNI(OP_GPBYNAME);
8084 
8085 	case KEY_getprotobynumber:
8086 	    LOP(OP_GPBYNUMBER,XTERM);
8087 
8088 	case KEY_getprotoent:
8089 	    FUN0(OP_GPROTOENT);
8090 
8091 	case KEY_getpwent:
8092 	    FUN0(OP_GPWENT);
8093 
8094 	case KEY_getpwnam:
8095 	    UNI(OP_GPWNAM);
8096 
8097 	case KEY_getpwuid:
8098 	    UNI(OP_GPWUID);
8099 
8100 	case KEY_getpeername:
8101 	    UNI(OP_GETPEERNAME);
8102 
8103 	case KEY_gethostbyname:
8104 	    UNI(OP_GHBYNAME);
8105 
8106 	case KEY_gethostbyaddr:
8107 	    LOP(OP_GHBYADDR,XTERM);
8108 
8109 	case KEY_gethostent:
8110 	    FUN0(OP_GHOSTENT);
8111 
8112 	case KEY_getnetbyname:
8113 	    UNI(OP_GNBYNAME);
8114 
8115 	case KEY_getnetbyaddr:
8116 	    LOP(OP_GNBYADDR,XTERM);
8117 
8118 	case KEY_getnetent:
8119 	    FUN0(OP_GNETENT);
8120 
8121 	case KEY_getservbyname:
8122 	    LOP(OP_GSBYNAME,XTERM);
8123 
8124 	case KEY_getservbyport:
8125 	    LOP(OP_GSBYPORT,XTERM);
8126 
8127 	case KEY_getservent:
8128 	    FUN0(OP_GSERVENT);
8129 
8130 	case KEY_getsockname:
8131 	    UNI(OP_GETSOCKNAME);
8132 
8133 	case KEY_getsockopt:
8134 	    LOP(OP_GSOCKOPT,XTERM);
8135 
8136 	case KEY_getgrent:
8137 	    FUN0(OP_GGRENT);
8138 
8139 	case KEY_getgrnam:
8140 	    UNI(OP_GGRNAM);
8141 
8142 	case KEY_getgrgid:
8143 	    UNI(OP_GGRGID);
8144 
8145 	case KEY_getlogin:
8146 	    FUN0(OP_GETLOGIN);
8147 
8148 	case KEY_given:
8149 	    pl_yylval.ival = CopLINE(PL_curcop);
8150             Perl_ck_warner_d(aTHX_
8151                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8152                 "given is experimental");
8153 	    OPERATOR(GIVEN);
8154 
8155 	case KEY_glob:
8156 	    LOP(
8157 	     orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
8158 	     XTERM
8159 	    );
8160 
8161 	case KEY_hex:
8162 	    UNI(OP_HEX);
8163 
8164 	case KEY_if:
8165 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8166 		return REPORT(0);
8167 	    pl_yylval.ival = CopLINE(PL_curcop);
8168 	    OPERATOR(IF);
8169 
8170 	case KEY_index:
8171 	    LOP(OP_INDEX,XTERM);
8172 
8173 	case KEY_int:
8174 	    UNI(OP_INT);
8175 
8176 	case KEY_ioctl:
8177 	    LOP(OP_IOCTL,XTERM);
8178 
8179 	case KEY_join:
8180 	    LOP(OP_JOIN,XTERM);
8181 
8182 	case KEY_keys:
8183 	    UNI(OP_KEYS);
8184 
8185 	case KEY_kill:
8186 	    LOP(OP_KILL,XTERM);
8187 
8188 	case KEY_last:
8189 	    PL_expect = XOPERATOR;
8190 	    s = force_word(s,WORD,TRUE,FALSE);
8191 	    LOOPX(OP_LAST);
8192 
8193 	case KEY_lc:
8194 	    UNI(OP_LC);
8195 
8196 	case KEY_lcfirst:
8197 	    UNI(OP_LCFIRST);
8198 
8199 	case KEY_local:
8200 	    pl_yylval.ival = 0;
8201 	    OPERATOR(LOCAL);
8202 
8203 	case KEY_length:
8204 	    UNI(OP_LENGTH);
8205 
8206 	case KEY_lt:
8207 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8208 		return REPORT(0);
8209 	    Rop(OP_SLT);
8210 
8211 	case KEY_le:
8212 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8213 		return REPORT(0);
8214 	    Rop(OP_SLE);
8215 
8216 	case KEY_localtime:
8217 	    UNI(OP_LOCALTIME);
8218 
8219 	case KEY_log:
8220 	    UNI(OP_LOG);
8221 
8222 	case KEY_link:
8223 	    LOP(OP_LINK,XTERM);
8224 
8225 	case KEY_listen:
8226 	    LOP(OP_LISTEN,XTERM);
8227 
8228 	case KEY_lock:
8229 	    UNI(OP_LOCK);
8230 
8231 	case KEY_lstat:
8232 	    UNI(OP_LSTAT);
8233 
8234 	case KEY_m:
8235 	    s = scan_pat(s,OP_MATCH);
8236 	    TERM(sublex_start());
8237 
8238 	case KEY_map:
8239 	    LOP(OP_MAPSTART, XREF);
8240 
8241 	case KEY_mkdir:
8242 	    LOP(OP_MKDIR,XTERM);
8243 
8244 	case KEY_msgctl:
8245 	    LOP(OP_MSGCTL,XTERM);
8246 
8247 	case KEY_msgget:
8248 	    LOP(OP_MSGGET,XTERM);
8249 
8250 	case KEY_msgrcv:
8251 	    LOP(OP_MSGRCV,XTERM);
8252 
8253 	case KEY_msgsnd:
8254 	    LOP(OP_MSGSND,XTERM);
8255 
8256 	case KEY_our:
8257 	case KEY_my:
8258 	case KEY_state:
8259 	    PL_in_my = (U16)tmp;
8260 	    s = SKIPSPACE1(s);
8261 	    if (isIDFIRST_lazy_if(s,UTF)) {
8262 #ifdef PERL_MAD
8263 		char* start = s;
8264 #endif
8265 		s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8266 		if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8267 		{
8268 		    if (!FEATURE_LEXSUBS_IS_ENABLED)
8269 			Perl_croak(aTHX_
8270 				  "Experimental \"%s\" subs not enabled",
8271 				   tmp == KEY_my    ? "my"    :
8272 				   tmp == KEY_state ? "state" : "our");
8273 		    Perl_ck_warner_d(aTHX_
8274 			packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8275 			"The lexical_subs feature is experimental");
8276 		    goto really_sub;
8277 		}
8278 		PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8279 		if (!PL_in_my_stash) {
8280 		    char tmpbuf[1024];
8281 		    PL_bufptr = s;
8282 		    my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8283 		    yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8284 		}
8285 #ifdef PERL_MAD
8286 		if (PL_madskills) {	/* just add type to declarator token */
8287 		    sv_catsv(PL_thistoken, PL_nextwhite);
8288 		    PL_nextwhite = 0;
8289 		    sv_catpvn(PL_thistoken, start, s - start);
8290 		}
8291 #endif
8292 	    }
8293 	    pl_yylval.ival = 1;
8294 	    OPERATOR(MY);
8295 
8296 	case KEY_next:
8297 	    PL_expect = XOPERATOR;
8298 	    s = force_word(s,WORD,TRUE,FALSE);
8299 	    LOOPX(OP_NEXT);
8300 
8301 	case KEY_ne:
8302 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8303 		return REPORT(0);
8304 	    Eop(OP_SNE);
8305 
8306 	case KEY_no:
8307 	    s = tokenize_use(0, s);
8308 	    TERM(USE);
8309 
8310 	case KEY_not:
8311 	    if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8312 		FUN1(OP_NOT);
8313 	    else {
8314 		if (!PL_lex_allbrackets &&
8315 			PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8316 		    PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8317 		OPERATOR(NOTOP);
8318 	    }
8319 
8320 	case KEY_open:
8321 	    s = SKIPSPACE1(s);
8322 	    if (isIDFIRST_lazy_if(s,UTF)) {
8323           const char *t;
8324           d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8325               &len);
8326 		for (t=d; isSPACE(*t);)
8327 		    t++;
8328 		if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8329 		    /* [perl #16184] */
8330 		    && !(t[0] == '=' && t[1] == '>')
8331 		    && !(t[0] == ':' && t[1] == ':')
8332 		    && !keyword(s, d-s, 0)
8333 		) {
8334 		    Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8335 		       "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
8336 			UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8337 		}
8338 	    }
8339 	    LOP(OP_OPEN,XTERM);
8340 
8341 	case KEY_or:
8342 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8343 		return REPORT(0);
8344 	    pl_yylval.ival = OP_OR;
8345 	    OPERATOR(OROP);
8346 
8347 	case KEY_ord:
8348 	    UNI(OP_ORD);
8349 
8350 	case KEY_oct:
8351 	    UNI(OP_OCT);
8352 
8353 	case KEY_opendir:
8354 	    LOP(OP_OPEN_DIR,XTERM);
8355 
8356 	case KEY_print:
8357 	    checkcomma(s,PL_tokenbuf,"filehandle");
8358 	    LOP(OP_PRINT,XREF);
8359 
8360 	case KEY_printf:
8361 	    checkcomma(s,PL_tokenbuf,"filehandle");
8362 	    LOP(OP_PRTF,XREF);
8363 
8364 	case KEY_prototype:
8365 	    UNI(OP_PROTOTYPE);
8366 
8367 	case KEY_push:
8368 	    LOP(OP_PUSH,XTERM);
8369 
8370 	case KEY_pop:
8371 	    UNIDOR(OP_POP);
8372 
8373 	case KEY_pos:
8374 	    UNIDOR(OP_POS);
8375 
8376 	case KEY_pack:
8377 	    LOP(OP_PACK,XTERM);
8378 
8379 	case KEY_package:
8380 	    s = force_word(s,WORD,FALSE,TRUE);
8381 	    s = SKIPSPACE1(s);
8382 	    s = force_strict_version(s);
8383 	    PL_lex_expect = XBLOCK;
8384 	    OPERATOR(PACKAGE);
8385 
8386 	case KEY_pipe:
8387 	    LOP(OP_PIPE_OP,XTERM);
8388 
8389 	case KEY_q:
8390 	    s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8391 	    if (!s)
8392 		missingterm(NULL);
8393 	    COPLINE_SET_FROM_MULTI_END;
8394 	    pl_yylval.ival = OP_CONST;
8395 	    TERM(sublex_start());
8396 
8397 	case KEY_quotemeta:
8398 	    UNI(OP_QUOTEMETA);
8399 
8400 	case KEY_qw: {
8401 	    OP *words = NULL;
8402 	    s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8403 	    if (!s)
8404 		missingterm(NULL);
8405 	    COPLINE_SET_FROM_MULTI_END;
8406 	    PL_expect = XOPERATOR;
8407 	    if (SvCUR(PL_lex_stuff)) {
8408 		int warned_comma = !ckWARN(WARN_QW);
8409 		int warned_comment = warned_comma;
8410 		d = SvPV_force(PL_lex_stuff, len);
8411 		while (len) {
8412 		    for (; isSPACE(*d) && len; --len, ++d)
8413 			/**/;
8414 		    if (len) {
8415 			SV *sv;
8416 			const char *b = d;
8417 			if (!warned_comma || !warned_comment) {
8418 			    for (; !isSPACE(*d) && len; --len, ++d) {
8419 				if (!warned_comma && *d == ',') {
8420 				    Perl_warner(aTHX_ packWARN(WARN_QW),
8421 					"Possible attempt to separate words with commas");
8422 				    ++warned_comma;
8423 				}
8424 				else if (!warned_comment && *d == '#') {
8425 				    Perl_warner(aTHX_ packWARN(WARN_QW),
8426 					"Possible attempt to put comments in qw() list");
8427 				    ++warned_comment;
8428 				}
8429 			    }
8430 			}
8431 			else {
8432 			    for (; !isSPACE(*d) && len; --len, ++d)
8433 				/**/;
8434 			}
8435 			sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8436 			words = op_append_elem(OP_LIST, words,
8437 					    newSVOP(OP_CONST, 0, tokeq(sv)));
8438 		    }
8439 		}
8440 	    }
8441 	    if (!words)
8442 		words = newNULLLIST();
8443 	    if (PL_lex_stuff) {
8444 		SvREFCNT_dec(PL_lex_stuff);
8445 		PL_lex_stuff = NULL;
8446 	    }
8447 	    PL_expect = XOPERATOR;
8448 	    pl_yylval.opval = sawparens(words);
8449 	    TOKEN(QWLIST);
8450 	}
8451 
8452 	case KEY_qq:
8453 	    s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8454 	    if (!s)
8455 		missingterm(NULL);
8456 	    pl_yylval.ival = OP_STRINGIFY;
8457 	    if (SvIVX(PL_lex_stuff) == '\'')
8458 		SvIV_set(PL_lex_stuff, 0);	/* qq'$foo' should interpolate */
8459 	    TERM(sublex_start());
8460 
8461 	case KEY_qr:
8462 	    s = scan_pat(s,OP_QR);
8463 	    TERM(sublex_start());
8464 
8465 	case KEY_qx:
8466 	    s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8467 	    if (!s)
8468 		missingterm(NULL);
8469 	    pl_yylval.ival = OP_BACKTICK;
8470 	    TERM(sublex_start());
8471 
8472 	case KEY_return:
8473 	    OLDLOP(OP_RETURN);
8474 
8475 	case KEY_require:
8476 	    s = SKIPSPACE1(s);
8477 	    PL_expect = XOPERATOR;
8478 	    if (isDIGIT(*s)) {
8479 		s = force_version(s, FALSE);
8480 	    }
8481 	    else if (*s != 'v' || !isDIGIT(s[1])
8482 		    || (s = force_version(s, TRUE), *s == 'v'))
8483 	    {
8484 		*PL_tokenbuf = '\0';
8485 		s = force_word(s,WORD,TRUE,TRUE);
8486 		if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8487 		    gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8488                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8489 		else if (*s == '<')
8490 		    yyerror("<> should be quotes");
8491 	    }
8492 	    if (orig_keyword == KEY_require) {
8493 		orig_keyword = 0;
8494 		pl_yylval.ival = 1;
8495 	    }
8496 	    else
8497 		pl_yylval.ival = 0;
8498 	    PL_expect = XTERM;
8499 	    PL_bufptr = s;
8500 	    PL_last_uni = PL_oldbufptr;
8501 	    PL_last_lop_op = OP_REQUIRE;
8502 	    s = skipspace(s);
8503 	    return REPORT( (int)REQUIRE );
8504 
8505 	case KEY_reset:
8506 	    UNI(OP_RESET);
8507 
8508 	case KEY_redo:
8509 	    PL_expect = XOPERATOR;
8510 	    s = force_word(s,WORD,TRUE,FALSE);
8511 	    LOOPX(OP_REDO);
8512 
8513 	case KEY_rename:
8514 	    LOP(OP_RENAME,XTERM);
8515 
8516 	case KEY_rand:
8517 	    UNI(OP_RAND);
8518 
8519 	case KEY_rmdir:
8520 	    UNI(OP_RMDIR);
8521 
8522 	case KEY_rindex:
8523 	    LOP(OP_RINDEX,XTERM);
8524 
8525 	case KEY_read:
8526 	    LOP(OP_READ,XTERM);
8527 
8528 	case KEY_readdir:
8529 	    UNI(OP_READDIR);
8530 
8531 	case KEY_readline:
8532 	    UNIDOR(OP_READLINE);
8533 
8534 	case KEY_readpipe:
8535 	    UNIDOR(OP_BACKTICK);
8536 
8537 	case KEY_rewinddir:
8538 	    UNI(OP_REWINDDIR);
8539 
8540 	case KEY_recv:
8541 	    LOP(OP_RECV,XTERM);
8542 
8543 	case KEY_reverse:
8544 	    LOP(OP_REVERSE,XTERM);
8545 
8546 	case KEY_readlink:
8547 	    UNIDOR(OP_READLINK);
8548 
8549 	case KEY_ref:
8550 	    UNI(OP_REF);
8551 
8552 	case KEY_s:
8553 	    s = scan_subst(s);
8554 	    if (pl_yylval.opval)
8555 		TERM(sublex_start());
8556 	    else
8557 		TOKEN(1);	/* force error */
8558 
8559 	case KEY_say:
8560 	    checkcomma(s,PL_tokenbuf,"filehandle");
8561 	    LOP(OP_SAY,XREF);
8562 
8563 	case KEY_chomp:
8564 	    UNI(OP_CHOMP);
8565 
8566 	case KEY_scalar:
8567 	    UNI(OP_SCALAR);
8568 
8569 	case KEY_select:
8570 	    LOP(OP_SELECT,XTERM);
8571 
8572 	case KEY_seek:
8573 	    LOP(OP_SEEK,XTERM);
8574 
8575 	case KEY_semctl:
8576 	    LOP(OP_SEMCTL,XTERM);
8577 
8578 	case KEY_semget:
8579 	    LOP(OP_SEMGET,XTERM);
8580 
8581 	case KEY_semop:
8582 	    LOP(OP_SEMOP,XTERM);
8583 
8584 	case KEY_send:
8585 	    LOP(OP_SEND,XTERM);
8586 
8587 	case KEY_setpgrp:
8588 	    LOP(OP_SETPGRP,XTERM);
8589 
8590 	case KEY_setpriority:
8591 	    LOP(OP_SETPRIORITY,XTERM);
8592 
8593 	case KEY_sethostent:
8594 	    UNI(OP_SHOSTENT);
8595 
8596 	case KEY_setnetent:
8597 	    UNI(OP_SNETENT);
8598 
8599 	case KEY_setservent:
8600 	    UNI(OP_SSERVENT);
8601 
8602 	case KEY_setprotoent:
8603 	    UNI(OP_SPROTOENT);
8604 
8605 	case KEY_setpwent:
8606 	    FUN0(OP_SPWENT);
8607 
8608 	case KEY_setgrent:
8609 	    FUN0(OP_SGRENT);
8610 
8611 	case KEY_seekdir:
8612 	    LOP(OP_SEEKDIR,XTERM);
8613 
8614 	case KEY_setsockopt:
8615 	    LOP(OP_SSOCKOPT,XTERM);
8616 
8617 	case KEY_shift:
8618 	    UNIDOR(OP_SHIFT);
8619 
8620 	case KEY_shmctl:
8621 	    LOP(OP_SHMCTL,XTERM);
8622 
8623 	case KEY_shmget:
8624 	    LOP(OP_SHMGET,XTERM);
8625 
8626 	case KEY_shmread:
8627 	    LOP(OP_SHMREAD,XTERM);
8628 
8629 	case KEY_shmwrite:
8630 	    LOP(OP_SHMWRITE,XTERM);
8631 
8632 	case KEY_shutdown:
8633 	    LOP(OP_SHUTDOWN,XTERM);
8634 
8635 	case KEY_sin:
8636 	    UNI(OP_SIN);
8637 
8638 	case KEY_sleep:
8639 	    UNI(OP_SLEEP);
8640 
8641 	case KEY_socket:
8642 	    LOP(OP_SOCKET,XTERM);
8643 
8644 	case KEY_socketpair:
8645 	    LOP(OP_SOCKPAIR,XTERM);
8646 
8647 	case KEY_sort:
8648 	    checkcomma(s,PL_tokenbuf,"subroutine name");
8649 	    s = SKIPSPACE1(s);
8650 	    PL_expect = XTERM;
8651 	    s = force_word(s,WORD,TRUE,TRUE);
8652 	    LOP(OP_SORT,XREF);
8653 
8654 	case KEY_split:
8655 	    LOP(OP_SPLIT,XTERM);
8656 
8657 	case KEY_sprintf:
8658 	    LOP(OP_SPRINTF,XTERM);
8659 
8660 	case KEY_splice:
8661 	    LOP(OP_SPLICE,XTERM);
8662 
8663 	case KEY_sqrt:
8664 	    UNI(OP_SQRT);
8665 
8666 	case KEY_srand:
8667 	    UNI(OP_SRAND);
8668 
8669 	case KEY_stat:
8670 	    UNI(OP_STAT);
8671 
8672 	case KEY_study:
8673 	    UNI(OP_STUDY);
8674 
8675 	case KEY_substr:
8676 	    LOP(OP_SUBSTR,XTERM);
8677 
8678 	case KEY_format:
8679 	case KEY_sub:
8680 	  really_sub:
8681 	    {
8682 		char * const tmpbuf = PL_tokenbuf + 1;
8683 		expectation attrful;
8684 		bool have_name, have_proto;
8685 		const int key = tmp;
8686 #ifndef PERL_MAD
8687                 SV *format_name = NULL;
8688 #endif
8689 
8690 #ifdef PERL_MAD
8691 		SV *tmpwhite = 0;
8692 
8693 		char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8694 		SV *subtoken = PL_madskills
8695 		   ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8696 		   : NULL;
8697 		PL_thistoken = 0;
8698 
8699 		d = s;
8700 		s = SKIPSPACE2(s,tmpwhite);
8701 #else
8702 		d = s;
8703 		s = skipspace(s);
8704 #endif
8705 
8706 		if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8707 		    (*s == ':' && s[1] == ':'))
8708 		{
8709 #ifdef PERL_MAD
8710 		    SV *nametoke = NULL;
8711 #endif
8712 
8713 		    PL_expect = XBLOCK;
8714 		    attrful = XATTRBLOCK;
8715 		    d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8716 				  &len);
8717 #ifdef PERL_MAD
8718 		    if (PL_madskills)
8719 			nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8720 #else
8721                     if (key == KEY_format)
8722 			format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8723 #endif
8724 		    *PL_tokenbuf = '&';
8725 		    if (memchr(tmpbuf, ':', len) || key != KEY_sub
8726 		     || pad_findmy_pvn(
8727 			    PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8728 			) != NOT_IN_PAD)
8729 			sv_setpvn(PL_subname, tmpbuf, len);
8730 		    else {
8731 			sv_setsv(PL_subname,PL_curstname);
8732 			sv_catpvs(PL_subname,"::");
8733 			sv_catpvn(PL_subname,tmpbuf,len);
8734 		    }
8735                     if (SvUTF8(PL_linestr))
8736                         SvUTF8_on(PL_subname);
8737 		    have_name = TRUE;
8738 
8739 
8740 #ifdef PERL_MAD
8741 		    start_force(0);
8742 		    CURMAD('X', nametoke);
8743 		    CURMAD('_', tmpwhite);
8744 		    force_ident_maybe_lex('&');
8745 
8746 		    s = SKIPSPACE2(d,tmpwhite);
8747 #else
8748 		    s = skipspace(d);
8749 #endif
8750 		}
8751 		else {
8752 		    if (key == KEY_my || key == KEY_our || key==KEY_state)
8753 		    {
8754 			*d = '\0';
8755 			/* diag_listed_as: Missing name in "%s sub" */
8756 			Perl_croak(aTHX_
8757 				  "Missing name in \"%s\"", PL_bufptr);
8758 		    }
8759 		    PL_expect = XTERMBLOCK;
8760 		    attrful = XATTRTERM;
8761 		    sv_setpvs(PL_subname,"?");
8762 		    have_name = FALSE;
8763 		}
8764 
8765 		if (key == KEY_format) {
8766 #ifdef PERL_MAD
8767 		    PL_thistoken = subtoken;
8768 		    s = d;
8769 #else
8770 		    if (format_name) {
8771                         start_force(PL_curforce);
8772                         NEXTVAL_NEXTTOKE.opval
8773                             = (OP*)newSVOP(OP_CONST,0, format_name);
8774                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8775                         force_next(WORD);
8776                     }
8777 #endif
8778 		    PREBLOCK(FORMAT);
8779 		}
8780 
8781 		/* Look for a prototype */
8782 		if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8783 		    s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8784 		    COPLINE_SET_FROM_MULTI_END;
8785 		    if (!s)
8786 			Perl_croak(aTHX_ "Prototype not terminated");
8787 		    (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8788 		    have_proto = TRUE;
8789 
8790 #ifdef PERL_MAD
8791 		    start_force(0);
8792 		    CURMAD('q', PL_thisopen);
8793 		    CURMAD('_', tmpwhite);
8794 		    CURMAD('=', PL_thisstuff);
8795 		    CURMAD('Q', PL_thisclose);
8796 		    NEXTVAL_NEXTTOKE.opval =
8797 			(OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8798 		    PL_lex_stuff = NULL;
8799 		    force_next(THING);
8800 
8801 		    s = SKIPSPACE2(s,tmpwhite);
8802 #else
8803 		    s = skipspace(s);
8804 #endif
8805 		}
8806 		else
8807 		    have_proto = FALSE;
8808 
8809 		if (*s == ':' && s[1] != ':')
8810 		    PL_expect = attrful;
8811 		else if ((*s != '{' && *s != '(') && key == KEY_sub) {
8812 		    if (!have_name)
8813 			Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8814 		    else if (*s != ';' && *s != '}')
8815 			Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8816 		}
8817 
8818 #ifdef PERL_MAD
8819 		start_force(0);
8820 		if (tmpwhite) {
8821 		    if (PL_madskills)
8822 			curmad('^', newSVpvs(""));
8823 		    CURMAD('_', tmpwhite);
8824 		}
8825 		force_next(0);
8826 
8827 		PL_thistoken = subtoken;
8828                 PERL_UNUSED_VAR(have_proto);
8829 #else
8830 		if (have_proto) {
8831 		    NEXTVAL_NEXTTOKE.opval =
8832 			(OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8833 		    PL_lex_stuff = NULL;
8834 		    force_next(THING);
8835 		}
8836 #endif
8837 		if (!have_name) {
8838 		    if (PL_curstash)
8839 			sv_setpvs(PL_subname, "__ANON__");
8840 		    else
8841 			sv_setpvs(PL_subname, "__ANON__::__ANON__");
8842 		    TOKEN(ANONSUB);
8843 		}
8844 #ifndef PERL_MAD
8845 		force_ident_maybe_lex('&');
8846 #endif
8847 		TOKEN(SUB);
8848 	    }
8849 
8850 	case KEY_system:
8851 	    LOP(OP_SYSTEM,XREF);
8852 
8853 	case KEY_symlink:
8854 	    LOP(OP_SYMLINK,XTERM);
8855 
8856 	case KEY_syscall:
8857 	    LOP(OP_SYSCALL,XTERM);
8858 
8859 	case KEY_sysopen:
8860 	    LOP(OP_SYSOPEN,XTERM);
8861 
8862 	case KEY_sysseek:
8863 	    LOP(OP_SYSSEEK,XTERM);
8864 
8865 	case KEY_sysread:
8866 	    LOP(OP_SYSREAD,XTERM);
8867 
8868 	case KEY_syswrite:
8869 	    LOP(OP_SYSWRITE,XTERM);
8870 
8871 	case KEY_tr:
8872 	case KEY_y:
8873 	    s = scan_trans(s);
8874 	    TERM(sublex_start());
8875 
8876 	case KEY_tell:
8877 	    UNI(OP_TELL);
8878 
8879 	case KEY_telldir:
8880 	    UNI(OP_TELLDIR);
8881 
8882 	case KEY_tie:
8883 	    LOP(OP_TIE,XTERM);
8884 
8885 	case KEY_tied:
8886 	    UNI(OP_TIED);
8887 
8888 	case KEY_time:
8889 	    FUN0(OP_TIME);
8890 
8891 	case KEY_times:
8892 	    FUN0(OP_TMS);
8893 
8894 	case KEY_truncate:
8895 	    LOP(OP_TRUNCATE,XTERM);
8896 
8897 	case KEY_uc:
8898 	    UNI(OP_UC);
8899 
8900 	case KEY_ucfirst:
8901 	    UNI(OP_UCFIRST);
8902 
8903 	case KEY_untie:
8904 	    UNI(OP_UNTIE);
8905 
8906 	case KEY_until:
8907 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8908 		return REPORT(0);
8909 	    pl_yylval.ival = CopLINE(PL_curcop);
8910 	    OPERATOR(UNTIL);
8911 
8912 	case KEY_unless:
8913 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8914 		return REPORT(0);
8915 	    pl_yylval.ival = CopLINE(PL_curcop);
8916 	    OPERATOR(UNLESS);
8917 
8918 	case KEY_unlink:
8919 	    LOP(OP_UNLINK,XTERM);
8920 
8921 	case KEY_undef:
8922 	    UNIDOR(OP_UNDEF);
8923 
8924 	case KEY_unpack:
8925 	    LOP(OP_UNPACK,XTERM);
8926 
8927 	case KEY_utime:
8928 	    LOP(OP_UTIME,XTERM);
8929 
8930 	case KEY_umask:
8931 	    UNIDOR(OP_UMASK);
8932 
8933 	case KEY_unshift:
8934 	    LOP(OP_UNSHIFT,XTERM);
8935 
8936 	case KEY_use:
8937 	    s = tokenize_use(1, s);
8938 	    OPERATOR(USE);
8939 
8940 	case KEY_values:
8941 	    UNI(OP_VALUES);
8942 
8943 	case KEY_vec:
8944 	    LOP(OP_VEC,XTERM);
8945 
8946 	case KEY_when:
8947 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8948 		return REPORT(0);
8949 	    pl_yylval.ival = CopLINE(PL_curcop);
8950             Perl_ck_warner_d(aTHX_
8951                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8952                 "when is experimental");
8953 	    OPERATOR(WHEN);
8954 
8955 	case KEY_while:
8956 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8957 		return REPORT(0);
8958 	    pl_yylval.ival = CopLINE(PL_curcop);
8959 	    OPERATOR(WHILE);
8960 
8961 	case KEY_warn:
8962 	    PL_hints |= HINT_BLOCK_SCOPE;
8963 	    LOP(OP_WARN,XTERM);
8964 
8965 	case KEY_wait:
8966 	    FUN0(OP_WAIT);
8967 
8968 	case KEY_waitpid:
8969 	    LOP(OP_WAITPID,XTERM);
8970 
8971 	case KEY_wantarray:
8972 	    FUN0(OP_WANTARRAY);
8973 
8974 	case KEY_write:
8975             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8976              * we use the same number on EBCDIC */
8977 	    gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8978 	    UNI(OP_ENTERWRITE);
8979 
8980 	case KEY_x:
8981 	    if (PL_expect == XOPERATOR) {
8982 		if (*s == '=' && !PL_lex_allbrackets &&
8983 			PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8984 		    return REPORT(0);
8985 		Mop(OP_REPEAT);
8986 	    }
8987 	    check_uni();
8988 	    goto just_a_word;
8989 
8990 	case KEY_xor:
8991 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8992 		return REPORT(0);
8993 	    pl_yylval.ival = OP_XOR;
8994 	    OPERATOR(OROP);
8995 	}
8996     }}
8997 }
8998 
8999 /*
9000   S_pending_ident
9001 
9002   Looks up an identifier in the pad or in a package
9003 
9004   Returns:
9005     PRIVATEREF if this is a lexical name.
9006     WORD       if this belongs to a package.
9007 
9008   Structure:
9009       if we're in a my declaration
9010 	  croak if they tried to say my($foo::bar)
9011 	  build the ops for a my() declaration
9012       if it's an access to a my() variable
9013 	  build ops for access to a my() variable
9014       if in a dq string, and they've said @foo and we can't find @foo
9015 	  warn
9016       build ops for a bareword
9017 */
9018 
9019 static int
9020 S_pending_ident(pTHX)
9021 {
9022     dVAR;
9023     PADOFFSET tmp = 0;
9024     const char pit = (char)pl_yylval.ival;
9025     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9026     /* All routes through this function want to know if there is a colon.  */
9027     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9028 
9029     DEBUG_T({ PerlIO_printf(Perl_debug_log,
9030           "### Pending identifier '%s'\n", PL_tokenbuf); });
9031 
9032     /* if we're in a my(), we can't allow dynamics here.
9033        $foo'bar has already been turned into $foo::bar, so
9034        just check for colons.
9035 
9036        if it's a legal name, the OP is a PADANY.
9037     */
9038     if (PL_in_my) {
9039         if (PL_in_my == KEY_our) {	/* "our" is merely analogous to "my" */
9040             if (has_colon)
9041                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9042                                   "variable %s in \"our\"",
9043                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9044             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9045         }
9046         else {
9047             if (has_colon) {
9048                 /* PL_no_myglob is constant */
9049                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
9050                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9051 			    PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
9052                             UTF ? SVf_UTF8 : 0);
9053                 GCC_DIAG_RESTORE;
9054             }
9055 
9056             pl_yylval.opval = newOP(OP_PADANY, 0);
9057             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9058                                                         UTF ? SVf_UTF8 : 0);
9059 	    return PRIVATEREF;
9060         }
9061     }
9062 
9063     /*
9064        build the ops for accesses to a my() variable.
9065     */
9066 
9067     if (!has_colon) {
9068 	if (!PL_in_my)
9069 	    tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9070                                     UTF ? SVf_UTF8 : 0);
9071         if (tmp != NOT_IN_PAD) {
9072             /* might be an "our" variable" */
9073             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9074                 /* build ops for a bareword */
9075 		HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9076 		HEK * const stashname = HvNAME_HEK(stash);
9077 		SV *  const sym = newSVhek(stashname);
9078                 sv_catpvs(sym, "::");
9079                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9080                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
9081                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9082                 if (pit != '&')
9083                   gv_fetchsv(sym,
9084                     (PL_in_eval
9085                         ? (GV_ADDMULTI | GV_ADDINEVAL)
9086                         : GV_ADDMULTI
9087                     ),
9088                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9089                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9090                      : SVt_PVHV));
9091                 return WORD;
9092             }
9093 
9094             pl_yylval.opval = newOP(OP_PADANY, 0);
9095             pl_yylval.opval->op_targ = tmp;
9096             return PRIVATEREF;
9097         }
9098     }
9099 
9100     /*
9101        Whine if they've said @foo in a doublequoted string,
9102        and @foo isn't a variable we can find in the symbol
9103        table.
9104     */
9105     if (ckWARN(WARN_AMBIGUOUS) &&
9106 	pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9107         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9108                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
9109         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9110 		/* DO NOT warn for @- and @+ */
9111 		&& !( PL_tokenbuf[2] == '\0' &&
9112 		    ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9113 	   )
9114         {
9115             /* Downgraded from fatal to warning 20000522 mjd */
9116             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9117 			"Possible unintended interpolation of %"UTF8f
9118 			" in string",
9119 			UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9120         }
9121     }
9122 
9123     /* build ops for a bareword */
9124     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9125 				   newSVpvn_flags(PL_tokenbuf + 1,
9126 						      tokenbuf_len - 1,
9127                                                       UTF ? SVf_UTF8 : 0 ));
9128     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9129     if (pit != '&')
9130 	gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9131 		     (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9132                      | ( UTF ? SVf_UTF8 : 0 ),
9133 		     ((PL_tokenbuf[0] == '$') ? SVt_PV
9134 		      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9135 		      : SVt_PVHV));
9136     return WORD;
9137 }
9138 
9139 STATIC void
9140 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9141 {
9142     dVAR;
9143 
9144     PERL_ARGS_ASSERT_CHECKCOMMA;
9145 
9146     if (*s == ' ' && s[1] == '(') {	/* XXX gotta be a better way */
9147 	if (ckWARN(WARN_SYNTAX)) {
9148 	    int level = 1;
9149 	    const char *w;
9150 	    for (w = s+2; *w && level; w++) {
9151 		if (*w == '(')
9152 		    ++level;
9153 		else if (*w == ')')
9154 		    --level;
9155 	    }
9156 	    while (isSPACE(*w))
9157 		++w;
9158 	    /* the list of chars below is for end of statements or
9159 	     * block / parens, boolean operators (&&, ||, //) and branch
9160 	     * constructs (or, and, if, until, unless, while, err, for).
9161 	     * Not a very solid hack... */
9162 	    if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9163 		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9164 			    "%s (...) interpreted as function",name);
9165 	}
9166     }
9167     while (s < PL_bufend && isSPACE(*s))
9168 	s++;
9169     if (*s == '(')
9170 	s++;
9171     while (s < PL_bufend && isSPACE(*s))
9172 	s++;
9173     if (isIDFIRST_lazy_if(s,UTF)) {
9174 	const char * const w = s;
9175         s += UTF ? UTF8SKIP(s) : 1;
9176 	while (isWORDCHAR_lazy_if(s,UTF))
9177 	    s += UTF ? UTF8SKIP(s) : 1;
9178 	while (s < PL_bufend && isSPACE(*s))
9179 	    s++;
9180 	if (*s == ',') {
9181 	    GV* gv;
9182 	    if (keyword(w, s - w, 0))
9183 		return;
9184 
9185 	    gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9186 	    if (gv && GvCVu(gv))
9187 		return;
9188 	    Perl_croak(aTHX_ "No comma allowed after %s", what);
9189 	}
9190     }
9191 }
9192 
9193 /* S_new_constant(): do any overload::constant lookup.
9194 
9195    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9196    Best used as sv=new_constant(..., sv, ...).
9197    If s, pv are NULL, calls subroutine with one argument,
9198    and <type> is used with error messages only.
9199    <type> is assumed to be well formed UTF-8 */
9200 
9201 STATIC SV *
9202 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9203 	       SV *sv, SV *pv, const char *type, STRLEN typelen)
9204 {
9205     dVAR; dSP;
9206     HV * table = GvHV(PL_hintgv);		 /* ^H */
9207     SV *res;
9208     SV *errsv = NULL;
9209     SV **cvp;
9210     SV *cv, *typesv;
9211     const char *why1 = "", *why2 = "", *why3 = "";
9212 
9213     PERL_ARGS_ASSERT_NEW_CONSTANT;
9214     /* We assume that this is true: */
9215     if (*key == 'c') { assert (strEQ(key, "charnames")); }
9216     assert(type || s);
9217 
9218     /* charnames doesn't work well if there have been errors found */
9219     if (PL_error_count > 0 && *key == 'c')
9220     {
9221 	SvREFCNT_dec_NN(sv);
9222 	return &PL_sv_undef;
9223     }
9224 
9225     sv_2mortal(sv);			/* Parent created it permanently */
9226     if (!table
9227 	|| ! (PL_hints & HINT_LOCALIZE_HH)
9228 	|| ! (cvp = hv_fetch(table, key, keylen, FALSE))
9229 	|| ! SvOK(*cvp))
9230     {
9231 	char *msg;
9232 
9233 	/* Here haven't found what we're looking for.  If it is charnames,
9234 	 * perhaps it needs to be loaded.  Try doing that before giving up */
9235 	if (*key == 'c') {
9236 	    Perl_load_module(aTHX_
9237 		            0,
9238 			    newSVpvs("_charnames"),
9239 			     /* version parameter; no need to specify it, as if
9240 			      * we get too early a version, will fail anyway,
9241 			      * not being able to find '_charnames' */
9242 			    NULL,
9243 			    newSVpvs(":full"),
9244 			    newSVpvs(":short"),
9245 			    NULL);
9246             assert(sp == PL_stack_sp);
9247 	    table = GvHV(PL_hintgv);
9248 	    if (table
9249 		&& (PL_hints & HINT_LOCALIZE_HH)
9250 		&& (cvp = hv_fetch(table, key, keylen, FALSE))
9251 		&& SvOK(*cvp))
9252 	    {
9253 		goto now_ok;
9254 	    }
9255 	}
9256 	if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9257 	    msg = Perl_form(aTHX_
9258 			       "Constant(%.*s) unknown",
9259 				(int)(type ? typelen : len),
9260 				(type ? type: s));
9261 	}
9262 	else {
9263             why1 = "$^H{";
9264             why2 = key;
9265             why3 = "} is not defined";
9266         report:
9267             if (*key == 'c') {
9268                 msg = Perl_form(aTHX_
9269                             /* The +3 is for '\N{'; -4 for that, plus '}' */
9270                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9271                       );
9272             }
9273             else {
9274                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9275                                     (int)(type ? typelen : len),
9276                                     (type ? type: s), why1, why2, why3);
9277             }
9278         }
9279 	yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9280   	return SvREFCNT_inc_simple_NN(sv);
9281     }
9282 now_ok:
9283     cv = *cvp;
9284     if (!pv && s)
9285   	pv = newSVpvn_flags(s, len, SVs_TEMP);
9286     if (type && pv)
9287   	typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9288     else
9289   	typesv = &PL_sv_undef;
9290 
9291     PUSHSTACKi(PERLSI_OVERLOAD);
9292     ENTER ;
9293     SAVETMPS;
9294 
9295     PUSHMARK(SP) ;
9296     EXTEND(sp, 3);
9297     if (pv)
9298  	PUSHs(pv);
9299     PUSHs(sv);
9300     if (pv)
9301  	PUSHs(typesv);
9302     PUTBACK;
9303     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9304 
9305     SPAGAIN ;
9306 
9307     /* Check the eval first */
9308     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9309 	STRLEN errlen;
9310 	const char * errstr;
9311 	sv_catpvs(errsv, "Propagated");
9312 	errstr = SvPV_const(errsv, errlen);
9313 	yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9314 	(void)POPs;
9315 	res = SvREFCNT_inc_simple_NN(sv);
9316     }
9317     else {
9318  	res = POPs;
9319 	SvREFCNT_inc_simple_void_NN(res);
9320     }
9321 
9322     PUTBACK ;
9323     FREETMPS ;
9324     LEAVE ;
9325     POPSTACK;
9326 
9327     if (!SvOK(res)) {
9328  	why1 = "Call to &{$^H{";
9329  	why2 = key;
9330  	why3 = "}} did not return a defined value";
9331  	sv = res;
9332 	(void)sv_2mortal(sv);
9333  	goto report;
9334     }
9335 
9336     return res;
9337 }
9338 
9339 PERL_STATIC_INLINE void
9340 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9341     dVAR;
9342     PERL_ARGS_ASSERT_PARSE_IDENT;
9343 
9344     for (;;) {
9345         if (*d >= e)
9346             Perl_croak(aTHX_ "%s", ident_too_long);
9347         if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9348              /* The UTF-8 case must come first, otherwise things
9349              * like c\N{COMBINING TILDE} would start failing, as the
9350              * isWORDCHAR_A case below would gobble the 'c' up.
9351              */
9352 
9353             char *t = *s + UTF8SKIP(*s);
9354             while (isIDCONT_utf8((U8*)t))
9355                 t += UTF8SKIP(t);
9356             if (*d + (t - *s) > e)
9357                 Perl_croak(aTHX_ "%s", ident_too_long);
9358             Copy(*s, *d, t - *s, char);
9359             *d += t - *s;
9360             *s = t;
9361         }
9362         else if ( isWORDCHAR_A(**s) ) {
9363             do {
9364                 *(*d)++ = *(*s)++;
9365             } while (isWORDCHAR_A(**s) && *d < e);
9366         }
9367         else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9368             *(*d)++ = ':';
9369             *(*d)++ = ':';
9370             (*s)++;
9371         }
9372         else if (allow_package && **s == ':' && (*s)[1] == ':'
9373            /* Disallow things like Foo::$bar. For the curious, this is
9374             * the code path that triggers the "Bad name after" warning
9375             * when looking for barewords.
9376             */
9377            && (*s)[2] != '$') {
9378             *(*d)++ = *(*s)++;
9379             *(*d)++ = *(*s)++;
9380         }
9381         else
9382             break;
9383     }
9384     return;
9385 }
9386 
9387 /* Returns a NUL terminated string, with the length of the string written to
9388    *slp
9389    */
9390 STATIC char *
9391 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9392 {
9393     dVAR;
9394     char *d = dest;
9395     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9396     bool is_utf8 = cBOOL(UTF);
9397 
9398     PERL_ARGS_ASSERT_SCAN_WORD;
9399 
9400     parse_ident(&s, &d, e, allow_package, is_utf8);
9401     *d = '\0';
9402     *slp = d - dest;
9403     return s;
9404 }
9405 
9406 STATIC char *
9407 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9408 {
9409     dVAR;
9410     I32 herelines = PL_parser->herelines;
9411     SSize_t bracket = -1;
9412     char funny = *s++;
9413     char *d = dest;
9414     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9415     bool is_utf8 = cBOOL(UTF);
9416     I32 orig_copline = 0, tmp_copline = 0;
9417 
9418     PERL_ARGS_ASSERT_SCAN_IDENT;
9419 
9420     if (isSPACE(*s))
9421 	s = PEEKSPACE(s);
9422     if (isDIGIT(*s)) {
9423 	while (isDIGIT(*s)) {
9424 	    if (d >= e)
9425 		Perl_croak(aTHX_ "%s", ident_too_long);
9426 	    *d++ = *s++;
9427 	}
9428     }
9429     else {
9430         parse_ident(&s, &d, e, 1, is_utf8);
9431     }
9432     *d = '\0';
9433     d = dest;
9434     if (*d) {
9435         /* Either a digit variable, or parse_ident() found an identifier
9436            (anything valid as a bareword), so job done and return.  */
9437 	if (PL_lex_state != LEX_NORMAL)
9438 	    PL_lex_state = LEX_INTERPENDMAYBE;
9439 	return s;
9440     }
9441     if (*s == '$' && s[1] &&
9442       (isIDFIRST_lazy_if(s+1,is_utf8)
9443          || isDIGIT_A((U8)s[1])
9444          || s[1] == '$'
9445          || s[1] == '{'
9446          || strnEQ(s+1,"::",2)) )
9447     {
9448         /* Dereferencing a value in a scalar variable.
9449            The alternatives are different syntaxes for a scalar variable.
9450            Using ' as a leading package separator isn't allowed. :: is.   */
9451 	return s;
9452     }
9453     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9454     if (*s == '{') {
9455 	bracket = s - SvPVX(PL_linestr);
9456 	s++;
9457 	orig_copline = CopLINE(PL_curcop);
9458         if (s < PL_bufend && isSPACE(*s)) {
9459             s = PEEKSPACE(s);
9460         }
9461     }
9462 
9463 /* Is the byte 'd' a legal single character identifier name?  'u' is true
9464  * iff Unicode semantics are to be used.  The legal ones are any of:
9465  *  a) ASCII digits
9466  *  b) ASCII punctuation
9467  *  c) When not under Unicode rules, any upper Latin1 character
9468  *  d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
9469  *     been matched by \s on ASCII platforms.  That is: \c?, plus 1-32, minus
9470  *     the \s ones. */
9471 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d))                       \
9472                                    || isDIGIT_A((U8)(d))                    \
9473                                    || (!(u) && !isASCII((U8)(d)))           \
9474                                    || ((((U8)(d)) < 32)                     \
9475                                        && (((((U8)(d)) >= 14)               \
9476                                            || (((U8)(d)) <= 8 && (d) != 0) \
9477                                            || (((U8)(d)) == 13))))          \
9478                                    || (((U8)(d)) == toCTRL('?')))
9479     if (s < PL_bufend
9480         && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
9481     {
9482         if ( isCNTRL_A((U8)*s) ) {
9483             deprecate("literal control characters in variable names");
9484         }
9485 
9486         if (is_utf8) {
9487             const STRLEN skip = UTF8SKIP(s);
9488             STRLEN i;
9489             d[skip] = '\0';
9490             for ( i = 0; i < skip; i++ )
9491                 d[i] = *s++;
9492         }
9493         else {
9494             *d = *s++;
9495             d[1] = '\0';
9496         }
9497     }
9498     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9499     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9500 	*d = toCTRL(*s);
9501 	s++;
9502     }
9503     /* Warn about ambiguous code after unary operators if {...} notation isn't
9504        used.  There's no difference in ambiguity; it's merely a heuristic
9505        about when not to warn.  */
9506     else if (ck_uni && bracket == -1)
9507 	check_uni();
9508     if (bracket != -1) {
9509         /* If we were processing {...} notation then...  */
9510 	if (isIDFIRST_lazy_if(d,is_utf8)) {
9511             /* if it starts as a valid identifier, assume that it is one.
9512                (the later check for } being at the expected point will trap
9513                cases where this doesn't pan out.)  */
9514         d += is_utf8 ? UTF8SKIP(d) : 1;
9515         parse_ident(&s, &d, e, 1, is_utf8);
9516 	    *d = '\0';
9517             tmp_copline = CopLINE(PL_curcop);
9518             if (s < PL_bufend && isSPACE(*s)) {
9519                 s = PEEKSPACE(s);
9520             }
9521 	    if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9522                 /* ${foo[0]} and ${foo{bar}} notation.  */
9523 		if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9524 		    const char * const brack =
9525 			(const char *)
9526 			((*s == '[') ? "[...]" : "{...}");
9527                     orig_copline = CopLINE(PL_curcop);
9528                     CopLINE_set(PL_curcop, tmp_copline);
9529    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9530 		    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9531 			"Ambiguous use of %c{%s%s} resolved to %c%s%s",
9532 			funny, dest, brack, funny, dest, brack);
9533                     CopLINE_set(PL_curcop, orig_copline);
9534 		}
9535 		bracket++;
9536 		PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9537 		PL_lex_allbrackets++;
9538 		return s;
9539 	    }
9540 	}
9541 	/* Handle extended ${^Foo} variables
9542 	 * 1999-02-27 mjd-perl-patch@plover.com */
9543 	else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9544 		 && isWORDCHAR(*s))
9545 	{
9546 	    d++;
9547 	    while (isWORDCHAR(*s) && d < e) {
9548 		*d++ = *s++;
9549 	    }
9550 	    if (d >= e)
9551 		Perl_croak(aTHX_ "%s", ident_too_long);
9552 	    *d = '\0';
9553 	}
9554 
9555         if ( !tmp_copline )
9556             tmp_copline = CopLINE(PL_curcop);
9557         if (s < PL_bufend && isSPACE(*s)) {
9558             s = PEEKSPACE(s);
9559         }
9560 
9561         /* Expect to find a closing } after consuming any trailing whitespace.
9562          */
9563 	if (*s == '}') {
9564 	    s++;
9565 	    if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9566 		PL_lex_state = LEX_INTERPEND;
9567 		PL_expect = XREF;
9568 	    }
9569 	    if (PL_lex_state == LEX_NORMAL) {
9570 		if (ckWARN(WARN_AMBIGUOUS) &&
9571 		    (keyword(dest, d - dest, 0)
9572 		     || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
9573 		{
9574                     SV *tmp = newSVpvn_flags( dest, d - dest,
9575                                             SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9576 		    if (funny == '#')
9577 			funny = '@';
9578                     orig_copline = CopLINE(PL_curcop);
9579                     CopLINE_set(PL_curcop, tmp_copline);
9580 		    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9581 			"Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9582 			funny, tmp, funny, tmp);
9583                     CopLINE_set(PL_curcop, orig_copline);
9584 		}
9585 	    }
9586 	}
9587 	else {
9588             /* Didn't find the closing } at the point we expected, so restore
9589                state such that the next thing to process is the opening { and */
9590 	    s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9591             CopLINE_set(PL_curcop, orig_copline);
9592             PL_parser->herelines = herelines;
9593 	    *dest = '\0';
9594 	}
9595     }
9596     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9597 	PL_lex_state = LEX_INTERPEND;
9598     return s;
9599 }
9600 
9601 static bool
9602 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9603 
9604     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9605      * the parse starting at 's', based on the subset that are valid in this
9606      * context input to this routine in 'valid_flags'. Advances s.  Returns
9607      * TRUE if the input should be treated as a valid flag, so the next char
9608      * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9609      * first call on the current regex.  This routine will set it to any
9610      * charset modifier found.  The caller shouldn't change it.  This way,
9611      * another charset modifier encountered in the parse can be detected as an
9612      * error, as we have decided to allow only one */
9613 
9614     const char c = **s;
9615     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9616 
9617     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9618         if (isWORDCHAR_lazy_if(*s, UTF)) {
9619             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9620                        UTF ? SVf_UTF8 : 0);
9621             (*s) += charlen;
9622             /* Pretend that it worked, so will continue processing before
9623              * dieing */
9624             return TRUE;
9625         }
9626         return FALSE;
9627     }
9628 
9629     switch (c) {
9630 
9631         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9632         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9633         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9634         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9635         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9636         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9637 	case LOCALE_PAT_MOD:
9638 	    if (*charset) {
9639 		goto multiple_charsets;
9640 	    }
9641 	    set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9642 	    *charset = c;
9643 	    break;
9644 	case UNICODE_PAT_MOD:
9645 	    if (*charset) {
9646 		goto multiple_charsets;
9647 	    }
9648 	    set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9649 	    *charset = c;
9650 	    break;
9651 	case ASCII_RESTRICT_PAT_MOD:
9652 	    if (! *charset) {
9653 		set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9654 	    }
9655 	    else {
9656 
9657 		/* Error if previous modifier wasn't an 'a', but if it was, see
9658 		 * if, and accept, a second occurrence (only) */
9659 		if (*charset != 'a'
9660 		    || get_regex_charset(*pmfl)
9661 			!= REGEX_ASCII_RESTRICTED_CHARSET)
9662 		{
9663 			goto multiple_charsets;
9664 		}
9665 		set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9666 	    }
9667 	    *charset = c;
9668 	    break;
9669 	case DEPENDS_PAT_MOD:
9670 	    if (*charset) {
9671 		goto multiple_charsets;
9672 	    }
9673 	    set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9674 	    *charset = c;
9675 	    break;
9676     }
9677 
9678     (*s)++;
9679     return TRUE;
9680 
9681     multiple_charsets:
9682 	if (*charset != c) {
9683 	    yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9684 	}
9685 	else if (c == 'a') {
9686   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9687 	    yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9688 	}
9689 	else {
9690 	    yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9691 	}
9692 
9693 	/* Pretend that it worked, so will continue processing before dieing */
9694 	(*s)++;
9695 	return TRUE;
9696 }
9697 
9698 STATIC char *
9699 S_scan_pat(pTHX_ char *start, I32 type)
9700 {
9701     dVAR;
9702     PMOP *pm;
9703     char *s;
9704     const char * const valid_flags =
9705 	(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9706     char charset = '\0';    /* character set modifier */
9707 #ifdef PERL_MAD
9708     char *modstart;
9709 #endif
9710 
9711     PERL_ARGS_ASSERT_SCAN_PAT;
9712 
9713     s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9714                        TRUE /* look for escaped bracketed metas */, NULL);
9715 
9716     if (!s) {
9717 	const char * const delimiter = skipspace(start);
9718 	Perl_croak(aTHX_
9719 		   (const char *)
9720 		   (*delimiter == '?'
9721 		    ? "Search pattern not terminated or ternary operator parsed as search pattern"
9722 		    : "Search pattern not terminated" ));
9723     }
9724 
9725     pm = (PMOP*)newPMOP(type, 0);
9726     if (PL_multi_open == '?') {
9727 	/* This is the only point in the code that sets PMf_ONCE:  */
9728 	pm->op_pmflags |= PMf_ONCE;
9729 
9730 	/* Hence it's safe to do this bit of PMOP book-keeping here, which
9731 	   allows us to restrict the list needed by reset to just the ??
9732 	   matches.  */
9733 	assert(type != OP_TRANS);
9734 	if (PL_curstash) {
9735 	    MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9736 	    U32 elements;
9737 	    if (!mg) {
9738 		mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9739 				 0);
9740 	    }
9741 	    elements = mg->mg_len / sizeof(PMOP**);
9742 	    Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9743 	    ((PMOP**)mg->mg_ptr) [elements++] = pm;
9744 	    mg->mg_len = elements * sizeof(PMOP**);
9745 	    PmopSTASH_set(pm,PL_curstash);
9746 	}
9747     }
9748 #ifdef PERL_MAD
9749     modstart = s;
9750 #endif
9751 
9752     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9753      * anon CV. False positives like qr/[(?{]/ are harmless */
9754 
9755     if (type == OP_QR) {
9756 	STRLEN len;
9757 	char *e, *p = SvPV(PL_lex_stuff, len);
9758 	e = p + len;
9759 	for (; p < e; p++) {
9760 	    if (p[0] == '(' && p[1] == '?'
9761 		&& (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9762 	    {
9763 		pm->op_pmflags |= PMf_HAS_CV;
9764 		break;
9765 	    }
9766 	}
9767 	pm->op_pmflags |= PMf_IS_QR;
9768     }
9769 
9770     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9771 #ifdef PERL_MAD
9772     if (PL_madskills && modstart != s) {
9773 	SV* tmptoken = newSVpvn(modstart, s - modstart);
9774 	append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9775     }
9776 #endif
9777     /* issue a warning if /c is specified,but /g is not */
9778     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9779     {
9780         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9781 		       "Use of /c modifier is meaningless without /g" );
9782     }
9783 
9784     PL_lex_op = (OP*)pm;
9785     pl_yylval.ival = OP_MATCH;
9786     return s;
9787 }
9788 
9789 STATIC char *
9790 S_scan_subst(pTHX_ char *start)
9791 {
9792     dVAR;
9793     char *s;
9794     PMOP *pm;
9795     I32 first_start;
9796     line_t first_line;
9797     I32 es = 0;
9798     char charset = '\0';    /* character set modifier */
9799 #ifdef PERL_MAD
9800     char *modstart;
9801 #endif
9802     char *t;
9803 
9804     PERL_ARGS_ASSERT_SCAN_SUBST;
9805 
9806     pl_yylval.ival = OP_NULL;
9807 
9808     s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9809                  TRUE /* look for escaped bracketed metas */, &t);
9810 
9811     if (!s)
9812 	Perl_croak(aTHX_ "Substitution pattern not terminated");
9813 
9814     s = t;
9815 #ifdef PERL_MAD
9816     if (PL_madskills) {
9817 	CURMAD('q', PL_thisopen);
9818 	CURMAD('_', PL_thiswhite);
9819 	CURMAD('E', PL_thisstuff);
9820 	CURMAD('Q', PL_thisclose);
9821 	PL_realtokenstart = s - SvPVX(PL_linestr);
9822     }
9823 #endif
9824 
9825     first_start = PL_multi_start;
9826     first_line = CopLINE(PL_curcop);
9827     s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
9828     if (!s) {
9829 	if (PL_lex_stuff) {
9830 	    SvREFCNT_dec(PL_lex_stuff);
9831 	    PL_lex_stuff = NULL;
9832 	}
9833 	Perl_croak(aTHX_ "Substitution replacement not terminated");
9834     }
9835     PL_multi_start = first_start;	/* so whole substitution is taken together */
9836 
9837     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9838 
9839 #ifdef PERL_MAD
9840     if (PL_madskills) {
9841 	CURMAD('z', PL_thisopen);
9842 	CURMAD('R', PL_thisstuff);
9843 	CURMAD('Z', PL_thisclose);
9844     }
9845     modstart = s;
9846 #endif
9847 
9848     while (*s) {
9849 	if (*s == EXEC_PAT_MOD) {
9850 	    s++;
9851 	    es++;
9852 	}
9853 	else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9854 	{
9855 	    break;
9856 	}
9857     }
9858 
9859 #ifdef PERL_MAD
9860     if (PL_madskills) {
9861 	if (modstart != s)
9862 	    curmad('m', newSVpvn(modstart, s - modstart));
9863 	append_madprops(PL_thismad, (OP*)pm, 0);
9864 	PL_thismad = 0;
9865     }
9866 #endif
9867     if ((pm->op_pmflags & PMf_CONTINUE)) {
9868         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9869     }
9870 
9871     if (es) {
9872 	SV * const repl = newSVpvs("");
9873 
9874 	PL_multi_end = 0;
9875 	pm->op_pmflags |= PMf_EVAL;
9876 	while (es-- > 0) {
9877 	    if (es)
9878 		sv_catpvs(repl, "eval ");
9879 	    else
9880 		sv_catpvs(repl, "do ");
9881 	}
9882 	sv_catpvs(repl, "{");
9883 	sv_catsv(repl, PL_sublex_info.repl);
9884 	sv_catpvs(repl, "}");
9885 	SvEVALED_on(repl);
9886 	SvREFCNT_dec(PL_sublex_info.repl);
9887 	PL_sublex_info.repl = repl;
9888     }
9889     if (CopLINE(PL_curcop) != first_line) {
9890 	sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9891 	((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9892 	    CopLINE(PL_curcop) - first_line;
9893 	CopLINE_set(PL_curcop, first_line);
9894     }
9895 
9896     PL_lex_op = (OP*)pm;
9897     pl_yylval.ival = OP_SUBST;
9898     return s;
9899 }
9900 
9901 STATIC char *
9902 S_scan_trans(pTHX_ char *start)
9903 {
9904     dVAR;
9905     char* s;
9906     OP *o;
9907     U8 squash;
9908     U8 del;
9909     U8 complement;
9910     bool nondestruct = 0;
9911 #ifdef PERL_MAD
9912     char *modstart;
9913 #endif
9914     char *t;
9915 
9916     PERL_ARGS_ASSERT_SCAN_TRANS;
9917 
9918     pl_yylval.ival = OP_NULL;
9919 
9920     s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,&t);
9921     if (!s)
9922 	Perl_croak(aTHX_ "Transliteration pattern not terminated");
9923 
9924     s = t;
9925 #ifdef PERL_MAD
9926     if (PL_madskills) {
9927 	CURMAD('q', PL_thisopen);
9928 	CURMAD('_', PL_thiswhite);
9929 	CURMAD('E', PL_thisstuff);
9930 	CURMAD('Q', PL_thisclose);
9931 	PL_realtokenstart = s - SvPVX(PL_linestr);
9932     }
9933 #endif
9934 
9935     s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
9936     if (!s) {
9937 	if (PL_lex_stuff) {
9938 	    SvREFCNT_dec(PL_lex_stuff);
9939 	    PL_lex_stuff = NULL;
9940 	}
9941 	Perl_croak(aTHX_ "Transliteration replacement not terminated");
9942     }
9943     if (PL_madskills) {
9944 	CURMAD('z', PL_thisopen);
9945 	CURMAD('R', PL_thisstuff);
9946 	CURMAD('Z', PL_thisclose);
9947     }
9948 
9949     complement = del = squash = 0;
9950 #ifdef PERL_MAD
9951     modstart = s;
9952 #endif
9953     while (1) {
9954 	switch (*s) {
9955 	case 'c':
9956 	    complement = OPpTRANS_COMPLEMENT;
9957 	    break;
9958 	case 'd':
9959 	    del = OPpTRANS_DELETE;
9960 	    break;
9961 	case 's':
9962 	    squash = OPpTRANS_SQUASH;
9963 	    break;
9964 	case 'r':
9965 	    nondestruct = 1;
9966 	    break;
9967 	default:
9968 	    goto no_more;
9969 	}
9970 	s++;
9971     }
9972   no_more:
9973 
9974     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9975     o->op_private &= ~OPpTRANS_ALL;
9976     o->op_private |= del|squash|complement|
9977       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9978       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9979 
9980     PL_lex_op = o;
9981     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9982 
9983 #ifdef PERL_MAD
9984     if (PL_madskills) {
9985 	if (modstart != s)
9986 	    curmad('m', newSVpvn(modstart, s - modstart));
9987 	append_madprops(PL_thismad, o, 0);
9988 	PL_thismad = 0;
9989     }
9990 #endif
9991 
9992     return s;
9993 }
9994 
9995 /* scan_heredoc
9996    Takes a pointer to the first < in <<FOO.
9997    Returns a pointer to the byte following <<FOO.
9998 
9999    This function scans a heredoc, which involves different methods
10000    depending on whether we are in a string eval, quoted construct, etc.
10001    This is because PL_linestr could containing a single line of input, or
10002    a whole string being evalled, or the contents of the current quote-
10003    like operator.
10004 
10005    The two basic methods are:
10006     - Steal lines from the input stream
10007     - Scan the heredoc in PL_linestr and remove it therefrom
10008 
10009    In a file scope or filtered eval, the first method is used; in a
10010    string eval, the second.
10011 
10012    In a quote-like operator, we have to choose between the two,
10013    depending on where we can find a newline.  We peek into outer lex-
10014    ing scopes until we find one with a newline in it.  If we reach the
10015    outermost lexing scope and it is a file, we use the stream method.
10016    Otherwise it is treated as an eval.
10017 */
10018 
10019 STATIC char *
10020 S_scan_heredoc(pTHX_ char *s)
10021 {
10022     dVAR;
10023     I32 op_type = OP_SCALAR;
10024     I32 len;
10025     SV *tmpstr;
10026     char term;
10027     char *d;
10028     char *e;
10029     char *peek;
10030     const bool infile = PL_rsfp || PL_parser->filtered;
10031     const line_t origline = CopLINE(PL_curcop);
10032     LEXSHARED *shared = PL_parser->lex_shared;
10033 #ifdef PERL_MAD
10034     I32 stuffstart = s - SvPVX(PL_linestr);
10035     char *tstart;
10036 
10037     PL_realtokenstart = -1;
10038 #endif
10039 
10040     PERL_ARGS_ASSERT_SCAN_HEREDOC;
10041 
10042     s += 2;
10043     d = PL_tokenbuf + 1;
10044     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10045     *PL_tokenbuf = '\n';
10046     peek = s;
10047     while (SPACE_OR_TAB(*peek))
10048 	peek++;
10049     if (*peek == '`' || *peek == '\'' || *peek =='"') {
10050 	s = peek;
10051 	term = *s++;
10052 	s = delimcpy(d, e, s, PL_bufend, term, &len);
10053 	if (s == PL_bufend)
10054 	    Perl_croak(aTHX_ "Unterminated delimiter for here document");
10055 	d += len;
10056 	s++;
10057     }
10058     else {
10059 	if (*s == '\\')
10060             /* <<\FOO is equivalent to <<'FOO' */
10061 	    s++, term = '\'';
10062 	else
10063 	    term = '"';
10064 	if (!isWORDCHAR_lazy_if(s,UTF))
10065 	    deprecate("bare << to mean <<\"\"");
10066 	peek = s;
10067 	while (isWORDCHAR_lazy_if(peek,UTF)) {
10068 	    peek += UTF ? UTF8SKIP(peek) : 1;
10069 	}
10070 	len = (peek - s >= e - d) ? (e - d) : (peek - s);
10071 	Copy(s, d, len, char);
10072 	s += len;
10073 	d += len;
10074     }
10075     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10076 	Perl_croak(aTHX_ "Delimiter for here document is too long");
10077     *d++ = '\n';
10078     *d = '\0';
10079     len = d - PL_tokenbuf;
10080 
10081 #ifdef PERL_MAD
10082     if (PL_madskills) {
10083 	tstart = PL_tokenbuf + 1;
10084 	PL_thisclose = newSVpvn(tstart, len - 1);
10085 	tstart = SvPVX(PL_linestr) + stuffstart;
10086 	PL_thisopen = newSVpvn(tstart, s - tstart);
10087 	stuffstart = s - SvPVX(PL_linestr);
10088     }
10089 #endif
10090 #ifndef PERL_STRICT_CR
10091     d = strchr(s, '\r');
10092     if (d) {
10093 	char * const olds = s;
10094 	s = d;
10095 	while (s < PL_bufend) {
10096 	    if (*s == '\r') {
10097 		*d++ = '\n';
10098 		if (*++s == '\n')
10099 		    s++;
10100 	    }
10101 	    else if (*s == '\n' && s[1] == '\r') {	/* \015\013 on a mac? */
10102 		*d++ = *s++;
10103 		s++;
10104 	    }
10105 	    else
10106 		*d++ = *s++;
10107 	}
10108 	*d = '\0';
10109 	PL_bufend = d;
10110 	SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10111 	s = olds;
10112     }
10113 #endif
10114 #ifdef PERL_MAD
10115     if (PL_madskills) {
10116 	tstart = SvPVX(PL_linestr) + stuffstart;
10117 	if (PL_thisstuff)
10118 	    sv_catpvn(PL_thisstuff, tstart, s - tstart);
10119 	else
10120 	    PL_thisstuff = newSVpvn(tstart, s - tstart);
10121     }
10122 
10123     stuffstart = s - SvPVX(PL_linestr);
10124 #endif
10125 
10126     tmpstr = newSV_type(SVt_PVIV);
10127     SvGROW(tmpstr, 80);
10128     if (term == '\'') {
10129 	op_type = OP_CONST;
10130 	SvIV_set(tmpstr, -1);
10131     }
10132     else if (term == '`') {
10133 	op_type = OP_BACKTICK;
10134 	SvIV_set(tmpstr, '\\');
10135     }
10136 
10137     PL_multi_start = origline + 1 + PL_parser->herelines;
10138     PL_multi_open = PL_multi_close = '<';
10139     /* inside a string eval or quote-like operator */
10140     if (!infile || PL_lex_inwhat) {
10141 	SV *linestr;
10142 	char *bufend;
10143 	char * const olds = s;
10144 	PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
10145 	/* These two fields are not set until an inner lexing scope is
10146 	   entered.  But we need them set here. */
10147 	shared->ls_bufptr  = s;
10148 	shared->ls_linestr = PL_linestr;
10149 	if (PL_lex_inwhat)
10150 	  /* Look for a newline.  If the current buffer does not have one,
10151 	     peek into the line buffer of the parent lexing scope, going
10152  	     up as many levels as necessary to find one with a newline
10153 	     after bufptr.
10154 	   */
10155 	  while (!(s = (char *)memchr(
10156 		    (void *)shared->ls_bufptr, '\n',
10157 		    SvEND(shared->ls_linestr)-shared->ls_bufptr
10158 		))) {
10159 	    shared = shared->ls_prev;
10160 	    /* shared is only null if we have gone beyond the outermost
10161 	       lexing scope.  In a file, we will have broken out of the
10162 	       loop in the previous iteration.  In an eval, the string buf-
10163 	       fer ends with "\n;", so the while condition above will have
10164 	       evaluated to false.  So shared can never be null. */
10165 	    assert(shared);
10166 	    /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10167 	       most lexing scope.  In a file, shared->ls_linestr at that
10168 	       level is just one line, so there is no body to steal. */
10169 	    if (infile && !shared->ls_prev) {
10170 		s = olds;
10171 		goto streaming;
10172 	    }
10173 	  }
10174 	else {	/* eval */
10175 	    s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10176 	    assert(s);
10177 	}
10178 	linestr = shared->ls_linestr;
10179 	bufend = SvEND(linestr);
10180 	d = s;
10181 	while (s < bufend - len + 1 &&
10182           memNE(s,PL_tokenbuf,len) ) {
10183 	    if (*s++ == '\n')
10184 		++PL_parser->herelines;
10185 	}
10186 	if (s >= bufend - len + 1) {
10187 	    goto interminable;
10188 	}
10189 	sv_setpvn(tmpstr,d+1,s-d);
10190 #ifdef PERL_MAD
10191 	if (PL_madskills) {
10192 	    if (PL_thisstuff)
10193 		sv_catpvn(PL_thisstuff, d + 1, s - d);
10194 	    else
10195 		PL_thisstuff = newSVpvn(d + 1, s - d);
10196 	    stuffstart = s - SvPVX(PL_linestr);
10197 	}
10198 #endif
10199 	s += len - 1;
10200 	/* the preceding stmt passes a newline */
10201 	PL_parser->herelines++;
10202 
10203 	/* s now points to the newline after the heredoc terminator.
10204 	   d points to the newline before the body of the heredoc.
10205 	 */
10206 
10207 	/* We are going to modify linestr in place here, so set
10208 	   aside copies of the string if necessary for re-evals or
10209 	   (caller $n)[6]. */
10210 	/* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10211 	   check shared->re_eval_str. */
10212 	if (shared->re_eval_start || shared->re_eval_str) {
10213 	    /* Set aside the rest of the regexp */
10214 	    if (!shared->re_eval_str)
10215 		shared->re_eval_str =
10216 		       newSVpvn(shared->re_eval_start,
10217 				bufend - shared->re_eval_start);
10218 	    shared->re_eval_start -= s-d;
10219 	}
10220 	if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10221             CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10222             cx->blk_eval.cur_text == linestr)
10223         {
10224 	    cx->blk_eval.cur_text = newSVsv(linestr);
10225 	    SvSCREAM_on(cx->blk_eval.cur_text);
10226 	}
10227 	/* Copy everything from s onwards back to d. */
10228 	Move(s,d,bufend-s + 1,char);
10229 	SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10230 	/* Setting PL_bufend only applies when we have not dug deeper
10231 	   into other scopes, because sublex_done sets PL_bufend to
10232 	   SvEND(PL_linestr). */
10233 	if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10234 	s = olds;
10235     }
10236     else
10237     {
10238       SV *linestr_save;
10239      streaming:
10240       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
10241       term = PL_tokenbuf[1];
10242       len--;
10243       linestr_save = PL_linestr; /* must restore this afterwards */
10244       d = s;			 /* and this */
10245       PL_linestr = newSVpvs("");
10246       PL_bufend = SvPVX(PL_linestr);
10247       while (1) {
10248 #ifdef PERL_MAD
10249 	if (PL_madskills) {
10250 	    tstart = SvPVX(PL_linestr) + stuffstart;
10251 	    if (PL_thisstuff)
10252 		sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10253 	    else
10254 		PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10255 	}
10256 #endif
10257 	PL_bufptr = PL_bufend;
10258 	CopLINE_set(PL_curcop,
10259 		    origline + 1 + PL_parser->herelines);
10260 	if (!lex_next_chunk(LEX_NO_TERM)
10261 	 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10262 	    SvREFCNT_dec(linestr_save);
10263 	    goto interminable;
10264 	}
10265 	CopLINE_set(PL_curcop, origline);
10266 	if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10267             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10268             /* ^That should be enough to avoid this needing to grow:  */
10269 	    sv_catpvs(PL_linestr, "\n\0");
10270             assert(s == SvPVX(PL_linestr));
10271             PL_bufend = SvEND(PL_linestr);
10272 	}
10273 	s = PL_bufptr;
10274 #ifdef PERL_MAD
10275 	stuffstart = s - SvPVX(PL_linestr);
10276 #endif
10277 	PL_parser->herelines++;
10278 	PL_last_lop = PL_last_uni = NULL;
10279 #ifndef PERL_STRICT_CR
10280 	if (PL_bufend - PL_linestart >= 2) {
10281 	    if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10282 		(PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10283 	    {
10284 		PL_bufend[-2] = '\n';
10285 		PL_bufend--;
10286 		SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10287 	    }
10288 	    else if (PL_bufend[-1] == '\r')
10289 		PL_bufend[-1] = '\n';
10290 	}
10291 	else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10292 	    PL_bufend[-1] = '\n';
10293 #endif
10294 	if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10295 	    SvREFCNT_dec(PL_linestr);
10296 	    PL_linestr = linestr_save;
10297 	    PL_linestart = SvPVX(linestr_save);
10298 	    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10299 	    s = d;
10300 	    break;
10301 	}
10302 	else {
10303 	    sv_catsv(tmpstr,PL_linestr);
10304 	}
10305       }
10306     }
10307     PL_multi_end = origline + PL_parser->herelines;
10308     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10309 	SvPV_shrink_to_cur(tmpstr);
10310     }
10311     if (!IN_BYTES) {
10312 	if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10313 	    SvUTF8_on(tmpstr);
10314 	else if (PL_encoding)
10315 	    sv_recode_to_utf8(tmpstr, PL_encoding);
10316     }
10317     PL_lex_stuff = tmpstr;
10318     pl_yylval.ival = op_type;
10319     return s;
10320 
10321   interminable:
10322     SvREFCNT_dec(tmpstr);
10323     CopLINE_set(PL_curcop, origline);
10324     missingterm(PL_tokenbuf + 1);
10325 }
10326 
10327 /* scan_inputsymbol
10328    takes: current position in input buffer
10329    returns: new position in input buffer
10330    side-effects: pl_yylval and lex_op are set.
10331 
10332    This code handles:
10333 
10334    <>		read from ARGV
10335    <FH> 	read from filehandle
10336    <pkg::FH>	read from package qualified filehandle
10337    <pkg'FH>	read from package qualified filehandle
10338    <$fh>	read from filehandle in $fh
10339    <*.h>	filename glob
10340 
10341 */
10342 
10343 STATIC char *
10344 S_scan_inputsymbol(pTHX_ char *start)
10345 {
10346     dVAR;
10347     char *s = start;		/* current position in buffer */
10348     char *end;
10349     I32 len;
10350     char *d = PL_tokenbuf;					/* start of temp holding space */
10351     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;	/* end of temp holding space */
10352 
10353     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10354 
10355     end = strchr(s, '\n');
10356     if (!end)
10357 	end = PL_bufend;
10358     s = delimcpy(d, e, s + 1, end, '>', &len);	/* extract until > */
10359 
10360     /* die if we didn't have space for the contents of the <>,
10361        or if it didn't end, or if we see a newline
10362     */
10363 
10364     if (len >= (I32)sizeof PL_tokenbuf)
10365 	Perl_croak(aTHX_ "Excessively long <> operator");
10366     if (s >= end)
10367 	Perl_croak(aTHX_ "Unterminated <> operator");
10368 
10369     s++;
10370 
10371     /* check for <$fh>
10372        Remember, only scalar variables are interpreted as filehandles by
10373        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10374        treated as a glob() call.
10375        This code makes use of the fact that except for the $ at the front,
10376        a scalar variable and a filehandle look the same.
10377     */
10378     if (*d == '$' && d[1]) d++;
10379 
10380     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10381     while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10382 	d += UTF ? UTF8SKIP(d) : 1;
10383 
10384     /* If we've tried to read what we allow filehandles to look like, and
10385        there's still text left, then it must be a glob() and not a getline.
10386        Use scan_str to pull out the stuff between the <> and treat it
10387        as nothing more than a string.
10388     */
10389 
10390     if (d - PL_tokenbuf != len) {
10391 	pl_yylval.ival = OP_GLOB;
10392 	s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
10393 	if (!s)
10394 	   Perl_croak(aTHX_ "Glob not terminated");
10395 	return s;
10396     }
10397     else {
10398 	bool readline_overriden = FALSE;
10399 	GV *gv_readline;
10400     	/* we're in a filehandle read situation */
10401 	d = PL_tokenbuf;
10402 
10403 	/* turn <> into <ARGV> */
10404 	if (!len)
10405 	    Copy("ARGV",d,5,char);
10406 
10407 	/* Check whether readline() is overriden */
10408 	gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10409 	if ((gv_readline = gv_override("readline",8)))
10410 	    readline_overriden = TRUE;
10411 
10412 	/* if <$fh>, create the ops to turn the variable into a
10413 	   filehandle
10414 	*/
10415 	if (*d == '$') {
10416 	    /* try to find it in the pad for this block, otherwise find
10417 	       add symbol table ops
10418 	    */
10419 	    const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10420 	    if (tmp != NOT_IN_PAD) {
10421 		if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10422 		    HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10423 		    HEK * const stashname = HvNAME_HEK(stash);
10424 		    SV * const sym = sv_2mortal(newSVhek(stashname));
10425 		    sv_catpvs(sym, "::");
10426 		    sv_catpv(sym, d+1);
10427 		    d = SvPVX(sym);
10428 		    goto intro_sym;
10429 		}
10430 		else {
10431 		    OP * const o = newOP(OP_PADSV, 0);
10432 		    o->op_targ = tmp;
10433 		    PL_lex_op = readline_overriden
10434 			? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10435 				op_append_elem(OP_LIST, o,
10436 				    newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10437 			: (OP*)newUNOP(OP_READLINE, 0, o);
10438 		}
10439 	    }
10440 	    else {
10441 		GV *gv;
10442 		++d;
10443 intro_sym:
10444 		gv = gv_fetchpv(d,
10445 				(PL_in_eval
10446 				 ? (GV_ADDMULTI | GV_ADDINEVAL)
10447 				 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10448 				SVt_PV);
10449 		PL_lex_op = readline_overriden
10450 		    ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10451 			    op_append_elem(OP_LIST,
10452 				newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10453 				newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10454 		    : (OP*)newUNOP(OP_READLINE, 0,
10455 			    newUNOP(OP_RV2SV, 0,
10456 				newGVOP(OP_GV, 0, gv)));
10457 	    }
10458 	    if (!readline_overriden)
10459 		PL_lex_op->op_flags |= OPf_SPECIAL;
10460 	    /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10461 	    pl_yylval.ival = OP_NULL;
10462 	}
10463 
10464 	/* If it's none of the above, it must be a literal filehandle
10465 	   (<Foo::BAR> or <FOO>) so build a simple readline OP */
10466 	else {
10467 	    GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10468 	    PL_lex_op = readline_overriden
10469 		? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10470 			op_append_elem(OP_LIST,
10471 			    newGVOP(OP_GV, 0, gv),
10472 			    newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10473 		: (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10474 	    pl_yylval.ival = OP_NULL;
10475 	}
10476     }
10477 
10478     return s;
10479 }
10480 
10481 
10482 /* scan_str
10483    takes:
10484 	start			position in buffer
10485 	keep_quoted		preserve \ on the embedded delimiter(s)
10486 	keep_delims		preserve the delimiters around the string
10487 	re_reparse		compiling a run-time /(?{})/:
10488 				   collapse // to /,  and skip encoding src
10489 	deprecate_escaped_meta	issue a deprecation warning for cer-
10490 				tain paired metacharacters that appear
10491 				escaped within it
10492 	delimp			if non-null, this is set to the position of
10493 				the closing delimiter, or just after it if
10494 				the closing and opening delimiters differ
10495 				(i.e., the opening delimiter of a substitu-
10496 				tion replacement)
10497    returns: position to continue reading from buffer
10498    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10499    	updates the read buffer.
10500 
10501    This subroutine pulls a string out of the input.  It is called for:
10502    	q		single quotes		q(literal text)
10503 	'		single quotes		'literal text'
10504 	qq		double quotes		qq(interpolate $here please)
10505 	"		double quotes		"interpolate $here please"
10506 	qx		backticks		qx(/bin/ls -l)
10507 	`		backticks		`/bin/ls -l`
10508 	qw		quote words		@EXPORT_OK = qw( func() $spam )
10509 	m//		regexp match		m/this/
10510 	s///		regexp substitute	s/this/that/
10511 	tr///		string transliterate	tr/this/that/
10512 	y///		string transliterate	y/this/that/
10513 	($*@)		sub prototypes		sub foo ($)
10514 	(stuff)		sub attr parameters	sub foo : attr(stuff)
10515 	<>		readline or globs	<FOO>, <>, <$fh>, or <*.c>
10516 
10517    In most of these cases (all but <>, patterns and transliterate)
10518    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10519    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10520    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10521    calls scan_str().
10522 
10523    It skips whitespace before the string starts, and treats the first
10524    character as the delimiter.  If the delimiter is one of ([{< then
10525    the corresponding "close" character )]}> is used as the closing
10526    delimiter.  It allows quoting of delimiters, and if the string has
10527    balanced delimiters ([{<>}]) it allows nesting.
10528 
10529    On success, the SV with the resulting string is put into lex_stuff or,
10530    if that is already non-NULL, into lex_repl. The second case occurs only
10531    when parsing the RHS of the special constructs s/// and tr/// (y///).
10532    For convenience, the terminating delimiter character is stuffed into
10533    SvIVX of the SV.
10534 */
10535 
10536 STATIC char *
10537 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10538 		 bool deprecate_escaped_meta, char **delimp
10539     )
10540 {
10541     dVAR;
10542     SV *sv;			/* scalar value: string */
10543     const char *tmps;		/* temp string, used for delimiter matching */
10544     char *s = start;		/* current position in the buffer */
10545     char term;			/* terminating character */
10546     char *to;			/* current position in the sv's data */
10547     I32 brackets = 1;		/* bracket nesting level */
10548     bool has_utf8 = FALSE;	/* is there any utf8 content? */
10549     I32 termcode;		/* terminating char. code */
10550     U8 termstr[UTF8_MAXBYTES];	/* terminating string */
10551     STRLEN termlen;		/* length of terminating string */
10552     int last_off = 0;		/* last position for nesting bracket */
10553     char *escaped_open = NULL;
10554     line_t herelines;
10555 #ifdef PERL_MAD
10556     int stuffstart;
10557     char *tstart;
10558 #endif
10559 
10560     PERL_ARGS_ASSERT_SCAN_STR;
10561 
10562     /* skip space before the delimiter */
10563     if (isSPACE(*s)) {
10564 	s = PEEKSPACE(s);
10565     }
10566 
10567 #ifdef PERL_MAD
10568     if (PL_realtokenstart >= 0) {
10569 	stuffstart = PL_realtokenstart;
10570 	PL_realtokenstart = -1;
10571     }
10572     else
10573 	stuffstart = start - SvPVX(PL_linestr);
10574 #endif
10575     /* mark where we are, in case we need to report errors */
10576     CLINE;
10577 
10578     /* after skipping whitespace, the next character is the terminator */
10579     term = *s;
10580     if (!UTF) {
10581 	termcode = termstr[0] = term;
10582 	termlen = 1;
10583     }
10584     else {
10585 	termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10586 	Copy(s, termstr, termlen, U8);
10587 	if (!UTF8_IS_INVARIANT(term))
10588 	    has_utf8 = TRUE;
10589     }
10590 
10591     /* mark where we are */
10592     PL_multi_start = CopLINE(PL_curcop);
10593     PL_multi_open = term;
10594     herelines = PL_parser->herelines;
10595 
10596     /* find corresponding closing delimiter */
10597     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10598 	termcode = termstr[0] = term = tmps[5];
10599 
10600     PL_multi_close = term;
10601 
10602     /* A warning is raised if the input parameter requires it for escaped (by a
10603      * backslash) paired metacharacters {} [] and () when the delimiters are
10604      * those same characters, and the backslash is ineffective.  This doesn't
10605      * happen for <>, as they aren't metas. */
10606     if (deprecate_escaped_meta
10607         && (PL_multi_open == PL_multi_close
10608             || PL_multi_open == '<'
10609             || ! ckWARN_d(WARN_DEPRECATED)))
10610     {
10611         deprecate_escaped_meta = FALSE;
10612     }
10613 
10614     /* create a new SV to hold the contents.  79 is the SV's initial length.
10615        What a random number. */
10616     sv = newSV_type(SVt_PVIV);
10617     SvGROW(sv, 80);
10618     SvIV_set(sv, termcode);
10619     (void)SvPOK_only(sv);		/* validate pointer */
10620 
10621     /* move past delimiter and try to read a complete string */
10622     if (keep_delims)
10623 	sv_catpvn(sv, s, termlen);
10624     s += termlen;
10625 #ifdef PERL_MAD
10626     tstart = SvPVX(PL_linestr) + stuffstart;
10627     if (PL_madskills && !PL_thisopen && !keep_delims) {
10628 	PL_thisopen = newSVpvn(tstart, s - tstart);
10629 	stuffstart = s - SvPVX(PL_linestr);
10630     }
10631 #endif
10632     for (;;) {
10633 	if (PL_encoding && !UTF && !re_reparse) {
10634 	    bool cont = TRUE;
10635 
10636 	    while (cont) {
10637 		int offset = s - SvPVX_const(PL_linestr);
10638 		const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10639 					   &offset, (char*)termstr, termlen);
10640 		const char *ns;
10641 		char *svlast;
10642 
10643 		if (SvIsCOW(PL_linestr)) {
10644 		    STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10645 		    STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10646 		    STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10647 		    char *buf = SvPVX(PL_linestr);
10648 		    bufend_pos = PL_parser->bufend - buf;
10649 		    bufptr_pos = PL_parser->bufptr - buf;
10650 		    oldbufptr_pos = PL_parser->oldbufptr - buf;
10651 		    oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10652 		    linestart_pos = PL_parser->linestart - buf;
10653 		    last_uni_pos = PL_parser->last_uni
10654 			? PL_parser->last_uni - buf
10655 			: 0;
10656 		    last_lop_pos = PL_parser->last_lop
10657 			? PL_parser->last_lop - buf
10658 			: 0;
10659 		    re_eval_start_pos =
10660 			PL_parser->lex_shared->re_eval_start ?
10661                             PL_parser->lex_shared->re_eval_start - buf : 0;
10662 		    s_pos = s - buf;
10663 
10664 		    sv_force_normal(PL_linestr);
10665 
10666 		    buf = SvPVX(PL_linestr);
10667 		    PL_parser->bufend = buf + bufend_pos;
10668 		    PL_parser->bufptr = buf + bufptr_pos;
10669 		    PL_parser->oldbufptr = buf + oldbufptr_pos;
10670 		    PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10671 		    PL_parser->linestart = buf + linestart_pos;
10672 		    if (PL_parser->last_uni)
10673 			PL_parser->last_uni = buf + last_uni_pos;
10674 		    if (PL_parser->last_lop)
10675 			PL_parser->last_lop = buf + last_lop_pos;
10676 		    if (PL_parser->lex_shared->re_eval_start)
10677 		        PL_parser->lex_shared->re_eval_start  =
10678 			    buf + re_eval_start_pos;
10679 		    s = buf + s_pos;
10680 		}
10681 		ns = SvPVX_const(PL_linestr) + offset;
10682 		svlast = SvEND(sv) - 1;
10683 
10684 		for (; s < ns; s++) {
10685 		    if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10686 			COPLINE_INC_WITH_HERELINES;
10687 		}
10688 		if (!found)
10689 		    goto read_more_line;
10690 		else {
10691 		    /* handle quoted delimiters */
10692 		    if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10693 			const char *t;
10694 			for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10695 			    t--;
10696 			if ((svlast-1 - t) % 2) {
10697 			    if (!keep_quoted) {
10698 				*(svlast-1) = term;
10699 				*svlast = '\0';
10700 				SvCUR_set(sv, SvCUR(sv) - 1);
10701 			    }
10702 			    continue;
10703 			}
10704 		    }
10705 		    if (PL_multi_open == PL_multi_close) {
10706 			cont = FALSE;
10707 		    }
10708 		    else {
10709 			const char *t;
10710 			char *w;
10711 			for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10712 			    /* At here, all closes are "was quoted" one,
10713 			       so we don't check PL_multi_close. */
10714 			    if (*t == '\\') {
10715 				if (!keep_quoted && *(t+1) == PL_multi_open)
10716 				    t++;
10717 				else
10718 				    *w++ = *t++;
10719 			    }
10720 			    else if (*t == PL_multi_open)
10721 				brackets++;
10722 
10723 			    *w = *t;
10724 			}
10725 			if (w < t) {
10726 			    *w++ = term;
10727 			    *w = '\0';
10728 			    SvCUR_set(sv, w - SvPVX_const(sv));
10729 			}
10730 			last_off = w - SvPVX(sv);
10731 			if (--brackets <= 0)
10732 			    cont = FALSE;
10733 		    }
10734 		}
10735 	    }
10736 	    if (!keep_delims) {
10737 		SvCUR_set(sv, SvCUR(sv) - 1);
10738 		*SvEND(sv) = '\0';
10739 	    }
10740 	    break;
10741 	}
10742 
10743     	/* extend sv if need be */
10744 	SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10745 	/* set 'to' to the next character in the sv's string */
10746 	to = SvPVX(sv)+SvCUR(sv);
10747 
10748 	/* if open delimiter is the close delimiter read unbridle */
10749 	if (PL_multi_open == PL_multi_close) {
10750 	    for (; s < PL_bufend; s++,to++) {
10751 	    	/* embedded newlines increment the current line number */
10752 		if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10753 		    COPLINE_INC_WITH_HERELINES;
10754 		/* handle quoted delimiters */
10755 		if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10756 		    if (!keep_quoted
10757 		        && (s[1] == term
10758 			    || (re_reparse && s[1] == '\\'))
10759 		    )
10760 			s++;
10761 		    /* any other quotes are simply copied straight through */
10762 		    else
10763 			*to++ = *s++;
10764 		}
10765 		/* terminate when run out of buffer (the for() condition), or
10766 		   have found the terminator */
10767 		else if (*s == term) {
10768 		    if (termlen == 1)
10769 			break;
10770 		    if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10771 			break;
10772 		}
10773 		else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10774 		    has_utf8 = TRUE;
10775 		*to = *s;
10776 	    }
10777 	}
10778 
10779 	/* if the terminator isn't the same as the start character (e.g.,
10780 	   matched brackets), we have to allow more in the quoting, and
10781 	   be prepared for nested brackets.
10782 	*/
10783 	else {
10784 	    /* read until we run out of string, or we find the terminator */
10785 	    for (; s < PL_bufend; s++,to++) {
10786 	    	/* embedded newlines increment the line count */
10787 		if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10788 		    COPLINE_INC_WITH_HERELINES;
10789 		/* backslashes can escape the open or closing characters */
10790 		if (*s == '\\' && s+1 < PL_bufend) {
10791 		    if (!keep_quoted &&
10792 			((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10793                     {
10794 			s++;
10795 
10796                         /* Here, 'deprecate_escaped_meta' is true iff the
10797                          * delimiters are paired metacharacters, and 's' points
10798                          * to an occurrence of one of them within the string,
10799                          * which was preceded by a backslash.  If this is a
10800                          * context where the delimiter is also a metacharacter,
10801                          * the backslash is useless, and deprecated.  () and []
10802                          * are meta in any context. {} are meta only when
10803                          * appearing in a quantifier or in things like '\p{'
10804                          * (but '\\p{' isn't meta).  They also aren't meta
10805                          * unless there is a matching closed, escaped char
10806                          * later on within the string.  If 's' points to an
10807                          * open, set a flag; if to a close, test that flag, and
10808                          * raise a warning if it was set */
10809 
10810 			if (deprecate_escaped_meta) {
10811                             if (*s == PL_multi_open) {
10812                                 if (*s != '{') {
10813                                     escaped_open = s;
10814                                 }
10815                                      /* Look for a closing '\}' */
10816                                 else if (regcurly(s, TRUE)) {
10817                                     escaped_open = s;
10818                                 }
10819                                      /* Look for e.g.  '\x{' */
10820                                 else if (s - start > 2
10821                                          && _generic_isCC(*(s-2),
10822                                              _CC_BACKSLASH_FOO_LBRACE_IS_META))
10823                                 { /* Exclude '\\x', '\\\\x', etc. */
10824                                     char *lookbehind = s - 4;
10825                                     bool is_meta = TRUE;
10826                                     while (lookbehind >= start
10827                                            && *lookbehind == '\\')
10828                                     {
10829                                         is_meta = ! is_meta;
10830                                         lookbehind--;
10831                                     }
10832                                     if (is_meta) {
10833                                         escaped_open = s;
10834                                     }
10835                                 }
10836                             }
10837                             else if (escaped_open) {
10838                                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10839                                     "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10840                                 escaped_open = NULL;
10841                             }
10842                         }
10843                     }
10844 		    else
10845 			*to++ = *s++;
10846 		}
10847 		/* allow nested opens and closes */
10848 		else if (*s == PL_multi_close && --brackets <= 0)
10849 		    break;
10850 		else if (*s == PL_multi_open)
10851 		    brackets++;
10852 		else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10853 		    has_utf8 = TRUE;
10854 		*to = *s;
10855 	    }
10856 	}
10857 	/* terminate the copied string and update the sv's end-of-string */
10858 	*to = '\0';
10859 	SvCUR_set(sv, to - SvPVX_const(sv));
10860 
10861 	/*
10862 	 * this next chunk reads more into the buffer if we're not done yet
10863 	 */
10864 
10865   	if (s < PL_bufend)
10866 	    break;		/* handle case where we are done yet :-) */
10867 
10868 #ifndef PERL_STRICT_CR
10869 	if (to - SvPVX_const(sv) >= 2) {
10870 	    if ((to[-2] == '\r' && to[-1] == '\n') ||
10871 		(to[-2] == '\n' && to[-1] == '\r'))
10872 	    {
10873 		to[-2] = '\n';
10874 		to--;
10875 		SvCUR_set(sv, to - SvPVX_const(sv));
10876 	    }
10877 	    else if (to[-1] == '\r')
10878 		to[-1] = '\n';
10879 	}
10880 	else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10881 	    to[-1] = '\n';
10882 #endif
10883 
10884      read_more_line:
10885 	/* if we're out of file, or a read fails, bail and reset the current
10886 	   line marker so we can report where the unterminated string began
10887 	*/
10888 #ifdef PERL_MAD
10889 	if (PL_madskills) {
10890 	    char * const tstart = SvPVX(PL_linestr) + stuffstart;
10891 	    if (PL_thisstuff)
10892 		sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10893 	    else
10894 		PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10895 	}
10896 #endif
10897 	COPLINE_INC_WITH_HERELINES;
10898 	PL_bufptr = PL_bufend;
10899 	if (!lex_next_chunk(0)) {
10900 	    sv_free(sv);
10901 	    CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10902 	    return NULL;
10903 	}
10904 	s = PL_bufptr;
10905 #ifdef PERL_MAD
10906 	stuffstart = 0;
10907 #endif
10908     }
10909 
10910     /* at this point, we have successfully read the delimited string */
10911 
10912     if (!PL_encoding || UTF || re_reparse) {
10913 #ifdef PERL_MAD
10914 	if (PL_madskills) {
10915 	    char * const tstart = SvPVX(PL_linestr) + stuffstart;
10916 	    const int len = s - tstart;
10917 	    if (PL_thisstuff)
10918 		sv_catpvn(PL_thisstuff, tstart, len);
10919 	    else
10920 		PL_thisstuff = newSVpvn(tstart, len);
10921 	    if (!PL_thisclose && !keep_delims)
10922 		PL_thisclose = newSVpvn(s,termlen);
10923 	}
10924 #endif
10925 
10926 	if (keep_delims)
10927 	    sv_catpvn(sv, s, termlen);
10928 	s += termlen;
10929     }
10930 #ifdef PERL_MAD
10931     else {
10932 	if (PL_madskills) {
10933 	    char * const tstart = SvPVX(PL_linestr) + stuffstart;
10934 	    const int len = s - tstart - termlen;
10935 	    if (PL_thisstuff)
10936 		sv_catpvn(PL_thisstuff, tstart, len);
10937 	    else
10938 		PL_thisstuff = newSVpvn(tstart, len);
10939 	    if (!PL_thisclose && !keep_delims)
10940 		PL_thisclose = newSVpvn(s - termlen,termlen);
10941 	}
10942     }
10943 #endif
10944     if (has_utf8 || (PL_encoding && !re_reparse))
10945 	SvUTF8_on(sv);
10946 
10947     PL_multi_end = CopLINE(PL_curcop);
10948     CopLINE_set(PL_curcop, PL_multi_start);
10949     PL_parser->herelines = herelines;
10950 
10951     /* if we allocated too much space, give some back */
10952     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10953 	SvLEN_set(sv, SvCUR(sv) + 1);
10954 	SvPV_renew(sv, SvLEN(sv));
10955     }
10956 
10957     /* decide whether this is the first or second quoted string we've read
10958        for this op
10959     */
10960 
10961     if (PL_lex_stuff)
10962 	PL_sublex_info.repl = sv;
10963     else
10964 	PL_lex_stuff = sv;
10965     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10966     return s;
10967 }
10968 
10969 /*
10970   scan_num
10971   takes: pointer to position in buffer
10972   returns: pointer to new position in buffer
10973   side-effects: builds ops for the constant in pl_yylval.op
10974 
10975   Read a number in any of the formats that Perl accepts:
10976 
10977   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)	12 12.34 12.
10978   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)			.34
10979   0b[01](_?[01])*
10980   0[0-7](_?[0-7])*
10981   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10982 
10983   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10984   thing it reads.
10985 
10986   If it reads a number without a decimal point or an exponent, it will
10987   try converting the number to an integer and see if it can do so
10988   without loss of precision.
10989 */
10990 
10991 char *
10992 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10993 {
10994     dVAR;
10995     const char *s = start;	/* current position in buffer */
10996     char *d;			/* destination in temp buffer */
10997     char *e;			/* end of temp buffer */
10998     NV nv;				/* number read, as a double */
10999     SV *sv = NULL;			/* place to put the converted number */
11000     bool floatit;			/* boolean: int or float? */
11001     const char *lastub = NULL;		/* position of last underbar */
11002     static const char* const number_too_long = "Number too long";
11003 
11004     PERL_ARGS_ASSERT_SCAN_NUM;
11005 
11006     /* We use the first character to decide what type of number this is */
11007 
11008     switch (*s) {
11009     default:
11010 	Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11011 
11012     /* if it starts with a 0, it could be an octal number, a decimal in
11013        0.13 disguise, or a hexadecimal number, or a binary number. */
11014     case '0':
11015 	{
11016 	  /* variables:
11017 	     u		holds the "number so far"
11018 	     shift	the power of 2 of the base
11019 			(hex == 4, octal == 3, binary == 1)
11020 	     overflowed	was the number more than we can hold?
11021 
11022 	     Shift is used when we add a digit.  It also serves as an "are
11023 	     we in octal/hex/binary?" indicator to disallow hex characters
11024 	     when in octal mode.
11025 	   */
11026 	    NV n = 0.0;
11027 	    UV u = 0;
11028 	    I32 shift;
11029 	    bool overflowed = FALSE;
11030 	    bool just_zero  = TRUE;	/* just plain 0 or binary number? */
11031 	    static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11032 	    static const char* const bases[5] =
11033 	      { "", "binary", "", "octal", "hexadecimal" };
11034 	    static const char* const Bases[5] =
11035 	      { "", "Binary", "", "Octal", "Hexadecimal" };
11036 	    static const char* const maxima[5] =
11037 	      { "",
11038 		"0b11111111111111111111111111111111",
11039 		"",
11040 		"037777777777",
11041 		"0xffffffff" };
11042 	    const char *base, *Base, *max;
11043 
11044 	    /* check for hex */
11045 	    if (s[1] == 'x' || s[1] == 'X') {
11046 		shift = 4;
11047 		s += 2;
11048 		just_zero = FALSE;
11049 	    } else if (s[1] == 'b' || s[1] == 'B') {
11050 		shift = 1;
11051 		s += 2;
11052 		just_zero = FALSE;
11053 	    }
11054 	    /* check for a decimal in disguise */
11055 	    else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11056 		goto decimal;
11057 	    /* so it must be octal */
11058 	    else {
11059 		shift = 3;
11060 		s++;
11061 	    }
11062 
11063 	    if (*s == '_') {
11064 		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11065 			       "Misplaced _ in number");
11066 	       lastub = s++;
11067 	    }
11068 
11069 	    base = bases[shift];
11070 	    Base = Bases[shift];
11071 	    max  = maxima[shift];
11072 
11073 	    /* read the rest of the number */
11074 	    for (;;) {
11075 		/* x is used in the overflow test,
11076 		   b is the digit we're adding on. */
11077 		UV x, b;
11078 
11079 		switch (*s) {
11080 
11081 		/* if we don't mention it, we're done */
11082 		default:
11083 		    goto out;
11084 
11085 		/* _ are ignored -- but warned about if consecutive */
11086 		case '_':
11087 		    if (lastub && s == lastub + 1)
11088 		        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11089 				       "Misplaced _ in number");
11090 		    lastub = s++;
11091 		    break;
11092 
11093 		/* 8 and 9 are not octal */
11094 		case '8': case '9':
11095 		    if (shift == 3)
11096 			yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11097 		    /* FALL THROUGH */
11098 
11099 	        /* octal digits */
11100 		case '2': case '3': case '4':
11101 		case '5': case '6': case '7':
11102 		    if (shift == 1)
11103 			yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11104 		    /* FALL THROUGH */
11105 
11106 		case '0': case '1':
11107 		    b = *s++ & 15;		/* ASCII digit -> value of digit */
11108 		    goto digit;
11109 
11110 	        /* hex digits */
11111 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11112 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11113 		    /* make sure they said 0x */
11114 		    if (shift != 4)
11115 			goto out;
11116 		    b = (*s++ & 7) + 9;
11117 
11118 		    /* Prepare to put the digit we have onto the end
11119 		       of the number so far.  We check for overflows.
11120 		    */
11121 
11122 		  digit:
11123 		    just_zero = FALSE;
11124 		    if (!overflowed) {
11125 			x = u << shift;	/* make room for the digit */
11126 
11127 			if ((x >> shift) != u
11128 			    && !(PL_hints & HINT_NEW_BINARY)) {
11129 			    overflowed = TRUE;
11130 			    n = (NV) u;
11131 			    Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11132 					     "Integer overflow in %s number",
11133 					     base);
11134 			} else
11135 			    u = x | b;		/* add the digit to the end */
11136 		    }
11137 		    if (overflowed) {
11138 			n *= nvshift[shift];
11139 			/* If an NV has not enough bits in its
11140 			 * mantissa to represent an UV this summing of
11141 			 * small low-order numbers is a waste of time
11142 			 * (because the NV cannot preserve the
11143 			 * low-order bits anyway): we could just
11144 			 * remember when did we overflow and in the
11145 			 * end just multiply n by the right
11146 			 * amount. */
11147 			n += (NV) b;
11148 		    }
11149 		    break;
11150 		}
11151 	    }
11152 
11153 	  /* if we get here, we had success: make a scalar value from
11154 	     the number.
11155 	  */
11156 	  out:
11157 
11158 	    /* final misplaced underbar check */
11159 	    if (s[-1] == '_') {
11160 		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11161 	    }
11162 
11163 	    if (overflowed) {
11164 		if (n > 4294967295.0)
11165 		    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11166 				   "%s number > %s non-portable",
11167 				   Base, max);
11168 		sv = newSVnv(n);
11169 	    }
11170 	    else {
11171 #if UVSIZE > 4
11172 		if (u > 0xffffffff)
11173 		    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11174 				   "%s number > %s non-portable",
11175 				   Base, max);
11176 #endif
11177 		sv = newSVuv(u);
11178 	    }
11179 	    if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11180 		sv = new_constant(start, s - start, "integer",
11181 				  sv, NULL, NULL, 0);
11182 	    else if (PL_hints & HINT_NEW_BINARY)
11183 		sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11184 	}
11185 	break;
11186 
11187     /*
11188       handle decimal numbers.
11189       we're also sent here when we read a 0 as the first digit
11190     */
11191     case '1': case '2': case '3': case '4': case '5':
11192     case '6': case '7': case '8': case '9': case '.':
11193       decimal:
11194 	d = PL_tokenbuf;
11195 	e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11196 	floatit = FALSE;
11197 
11198 	/* read next group of digits and _ and copy into d */
11199 	while (isDIGIT(*s) || *s == '_') {
11200 	    /* skip underscores, checking for misplaced ones
11201 	       if -w is on
11202 	    */
11203 	    if (*s == '_') {
11204 		if (lastub && s == lastub + 1)
11205 		    Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11206 				   "Misplaced _ in number");
11207 		lastub = s++;
11208 	    }
11209 	    else {
11210 	        /* check for end of fixed-length buffer */
11211 		if (d >= e)
11212 		    Perl_croak(aTHX_ "%s", number_too_long);
11213 		/* if we're ok, copy the character */
11214 		*d++ = *s++;
11215 	    }
11216 	}
11217 
11218 	/* final misplaced underbar check */
11219 	if (lastub && s == lastub + 1) {
11220 	    Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11221 	}
11222 
11223 	/* read a decimal portion if there is one.  avoid
11224 	   3..5 being interpreted as the number 3. followed
11225 	   by .5
11226 	*/
11227 	if (*s == '.' && s[1] != '.') {
11228 	    floatit = TRUE;
11229 	    *d++ = *s++;
11230 
11231 	    if (*s == '_') {
11232 		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11233 			       "Misplaced _ in number");
11234 		lastub = s;
11235 	    }
11236 
11237 	    /* copy, ignoring underbars, until we run out of digits.
11238 	    */
11239 	    for (; isDIGIT(*s) || *s == '_'; s++) {
11240 	        /* fixed length buffer check */
11241 		if (d >= e)
11242 		    Perl_croak(aTHX_ "%s", number_too_long);
11243 		if (*s == '_') {
11244 		   if (lastub && s == lastub + 1)
11245 		       Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11246 				      "Misplaced _ in number");
11247 		   lastub = s;
11248 		}
11249 		else
11250 		    *d++ = *s;
11251 	    }
11252 	    /* fractional part ending in underbar? */
11253 	    if (s[-1] == '_') {
11254 		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11255 			       "Misplaced _ in number");
11256 	    }
11257 	    if (*s == '.' && isDIGIT(s[1])) {
11258 		/* oops, it's really a v-string, but without the "v" */
11259 		s = start;
11260 		goto vstring;
11261 	    }
11262 	}
11263 
11264 	/* read exponent part, if present */
11265 	if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11266 	    floatit = TRUE;
11267 	    s++;
11268 
11269 	    /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11270 	    *d++ = 'e';		/* At least some Mach atof()s don't grok 'E' */
11271 
11272 	    /* stray preinitial _ */
11273 	    if (*s == '_') {
11274 		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11275 			       "Misplaced _ in number");
11276 	        lastub = s++;
11277 	    }
11278 
11279 	    /* allow positive or negative exponent */
11280 	    if (*s == '+' || *s == '-')
11281 		*d++ = *s++;
11282 
11283 	    /* stray initial _ */
11284 	    if (*s == '_') {
11285 		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11286 			       "Misplaced _ in number");
11287 	        lastub = s++;
11288 	    }
11289 
11290 	    /* read digits of exponent */
11291 	    while (isDIGIT(*s) || *s == '_') {
11292 	        if (isDIGIT(*s)) {
11293 		    if (d >= e)
11294 		        Perl_croak(aTHX_ "%s", number_too_long);
11295 		    *d++ = *s++;
11296 		}
11297 		else {
11298 		   if (((lastub && s == lastub + 1) ||
11299 			(!isDIGIT(s[1]) && s[1] != '_')))
11300 		       Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11301 				      "Misplaced _ in number");
11302 		   lastub = s++;
11303 		}
11304 	    }
11305 	}
11306 
11307 
11308 	/*
11309            We try to do an integer conversion first if no characters
11310            indicating "float" have been found.
11311 	 */
11312 
11313 	if (!floatit) {
11314     	    UV uv;
11315 	    const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11316 
11317             if (flags == IS_NUMBER_IN_UV) {
11318               if (uv <= IV_MAX)
11319 		sv = newSViv(uv); /* Prefer IVs over UVs. */
11320               else
11321 	    	sv = newSVuv(uv);
11322             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11323               if (uv <= (UV) IV_MIN)
11324                 sv = newSViv(-(IV)uv);
11325               else
11326 	    	floatit = TRUE;
11327             } else
11328               floatit = TRUE;
11329         }
11330 	if (floatit) {
11331             STORE_NUMERIC_LOCAL_SET_STANDARD();
11332 	    /* terminate the string */
11333 	    *d = '\0';
11334 	    nv = Atof(PL_tokenbuf);
11335             RESTORE_NUMERIC_LOCAL();
11336 	    sv = newSVnv(nv);
11337 	}
11338 
11339 	if ( floatit
11340 	     ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11341 	    const char *const key = floatit ? "float" : "integer";
11342 	    const STRLEN keylen = floatit ? 5 : 7;
11343 	    sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11344 				key, keylen, sv, NULL, NULL, 0);
11345 	}
11346 	break;
11347 
11348     /* if it starts with a v, it could be a v-string */
11349     case 'v':
11350 vstring:
11351 		sv = newSV(5); /* preallocate storage space */
11352 		ENTER_with_name("scan_vstring");
11353 		SAVEFREESV(sv);
11354 		s = scan_vstring(s, PL_bufend, sv);
11355 		SvREFCNT_inc_simple_void_NN(sv);
11356 		LEAVE_with_name("scan_vstring");
11357 	break;
11358     }
11359 
11360     /* make the op for the constant and return */
11361 
11362     if (sv)
11363 	lvalp->opval = newSVOP(OP_CONST, 0, sv);
11364     else
11365 	lvalp->opval = NULL;
11366 
11367     return (char *)s;
11368 }
11369 
11370 STATIC char *
11371 S_scan_formline(pTHX_ char *s)
11372 {
11373     dVAR;
11374     char *eol;
11375     char *t;
11376     SV * const stuff = newSVpvs("");
11377     bool needargs = FALSE;
11378     bool eofmt = FALSE;
11379 #ifdef PERL_MAD
11380     char *tokenstart = s;
11381     SV* savewhite = NULL;
11382 
11383     if (PL_madskills) {
11384 	savewhite = PL_thiswhite;
11385 	PL_thiswhite = 0;
11386     }
11387 #endif
11388 
11389     PERL_ARGS_ASSERT_SCAN_FORMLINE;
11390 
11391     while (!needargs) {
11392 	if (*s == '.') {
11393 	    t = s+1;
11394 #ifdef PERL_STRICT_CR
11395 	    while (SPACE_OR_TAB(*t))
11396 		t++;
11397 #else
11398 	    while (SPACE_OR_TAB(*t) || *t == '\r')
11399 		t++;
11400 #endif
11401 	    if (*t == '\n' || t == PL_bufend) {
11402 	        eofmt = TRUE;
11403 		break;
11404             }
11405 	}
11406 	eol = (char *) memchr(s,'\n',PL_bufend-s);
11407 	if (!eol++)
11408 		eol = PL_bufend;
11409 	if (*s != '#') {
11410 	    for (t = s; t < eol; t++) {
11411 		if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11412 		    needargs = FALSE;
11413 		    goto enough;	/* ~~ must be first line in formline */
11414 		}
11415 		if (*t == '@' || *t == '^')
11416 		    needargs = TRUE;
11417 	    }
11418 	    if (eol > s) {
11419 	        sv_catpvn(stuff, s, eol-s);
11420 #ifndef PERL_STRICT_CR
11421 		if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11422 		    char *end = SvPVX(stuff) + SvCUR(stuff);
11423 		    end[-2] = '\n';
11424 		    end[-1] = '\0';
11425 		    SvCUR_set(stuff, SvCUR(stuff) - 1);
11426 		}
11427 #endif
11428 	    }
11429 	    else
11430 	      break;
11431 	}
11432 	s = (char*)eol;
11433 	if ((PL_rsfp || PL_parser->filtered)
11434 	 && PL_parser->form_lex_state == LEX_NORMAL) {
11435 	    bool got_some;
11436 #ifdef PERL_MAD
11437 	    if (PL_madskills) {
11438 		if (PL_thistoken)
11439 		    sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11440 		else
11441 		    PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11442 	    }
11443 #endif
11444 	    PL_bufptr = PL_bufend;
11445 	    COPLINE_INC_WITH_HERELINES;
11446 	    got_some = lex_next_chunk(0);
11447 	    CopLINE_dec(PL_curcop);
11448 	    s = PL_bufptr;
11449 #ifdef PERL_MAD
11450 	    tokenstart = PL_bufptr;
11451 #endif
11452 	    if (!got_some)
11453 		break;
11454 	}
11455 	incline(s);
11456     }
11457   enough:
11458     if (!SvCUR(stuff) || needargs)
11459 	PL_lex_state = PL_parser->form_lex_state;
11460     if (SvCUR(stuff)) {
11461 	PL_expect = XSTATE;
11462 	if (needargs) {
11463 	    const char *s2 = s;
11464 	    while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
11465 		|| *s2 == 013)
11466 		s2++;
11467 	    if (*s2 == '{') {
11468 		start_force(PL_curforce);
11469 		PL_expect = XTERMBLOCK;
11470 		NEXTVAL_NEXTTOKE.ival = 0;
11471 		force_next(DO);
11472 	    }
11473 	    start_force(PL_curforce);
11474 	    NEXTVAL_NEXTTOKE.ival = 0;
11475 	    force_next(FORMLBRACK);
11476 	}
11477 	if (!IN_BYTES) {
11478 	    if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11479 		SvUTF8_on(stuff);
11480 	    else if (PL_encoding)
11481 		sv_recode_to_utf8(stuff, PL_encoding);
11482 	}
11483 	start_force(PL_curforce);
11484 	NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11485 	force_next(THING);
11486     }
11487     else {
11488 	SvREFCNT_dec(stuff);
11489 	if (eofmt)
11490 	    PL_lex_formbrack = 0;
11491     }
11492 #ifdef PERL_MAD
11493     if (PL_madskills) {
11494 	if (PL_thistoken)
11495 	    sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11496 	else
11497 	    PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11498 	PL_thiswhite = savewhite;
11499     }
11500 #endif
11501     return s;
11502 }
11503 
11504 I32
11505 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11506 {
11507     dVAR;
11508     const I32 oldsavestack_ix = PL_savestack_ix;
11509     CV* const outsidecv = PL_compcv;
11510 
11511     SAVEI32(PL_subline);
11512     save_item(PL_subname);
11513     SAVESPTR(PL_compcv);
11514 
11515     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11516     CvFLAGS(PL_compcv) |= flags;
11517 
11518     PL_subline = CopLINE(PL_curcop);
11519     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11520     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11521     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11522     if (outsidecv && CvPADLIST(outsidecv))
11523 	CvPADLIST(PL_compcv)->xpadl_outid =
11524 	    PadlistNAMES(CvPADLIST(outsidecv));
11525 
11526     return oldsavestack_ix;
11527 }
11528 
11529 static int
11530 S_yywarn(pTHX_ const char *const s, U32 flags)
11531 {
11532     dVAR;
11533 
11534     PERL_ARGS_ASSERT_YYWARN;
11535 
11536     PL_in_eval |= EVAL_WARNONLY;
11537     yyerror_pv(s, flags);
11538     PL_in_eval &= ~EVAL_WARNONLY;
11539     return 0;
11540 }
11541 
11542 int
11543 Perl_yyerror(pTHX_ const char *const s)
11544 {
11545     PERL_ARGS_ASSERT_YYERROR;
11546     return yyerror_pvn(s, strlen(s), 0);
11547 }
11548 
11549 int
11550 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11551 {
11552     PERL_ARGS_ASSERT_YYERROR_PV;
11553     return yyerror_pvn(s, strlen(s), flags);
11554 }
11555 
11556 int
11557 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11558 {
11559     dVAR;
11560     const char *context = NULL;
11561     int contlen = -1;
11562     SV *msg;
11563     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11564     int yychar  = PL_parser->yychar;
11565 
11566     PERL_ARGS_ASSERT_YYERROR_PVN;
11567 
11568     if (!yychar || (yychar == ';' && !PL_rsfp))
11569 	sv_catpvs(where_sv, "at EOF");
11570     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11571       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11572       PL_oldbufptr != PL_bufptr) {
11573 	/*
11574 		Only for NetWare:
11575 		The code below is removed for NetWare because it abends/crashes on NetWare
11576 		when the script has error such as not having the closing quotes like:
11577 		    if ($var eq "value)
11578 		Checking of white spaces is anyway done in NetWare code.
11579 	*/
11580 #ifndef NETWARE
11581 	while (isSPACE(*PL_oldoldbufptr))
11582 	    PL_oldoldbufptr++;
11583 #endif
11584 	context = PL_oldoldbufptr;
11585 	contlen = PL_bufptr - PL_oldoldbufptr;
11586     }
11587     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11588       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11589 	/*
11590 		Only for NetWare:
11591 		The code below is removed for NetWare because it abends/crashes on NetWare
11592 		when the script has error such as not having the closing quotes like:
11593 		    if ($var eq "value)
11594 		Checking of white spaces is anyway done in NetWare code.
11595 	*/
11596 #ifndef NETWARE
11597 	while (isSPACE(*PL_oldbufptr))
11598 	    PL_oldbufptr++;
11599 #endif
11600 	context = PL_oldbufptr;
11601 	contlen = PL_bufptr - PL_oldbufptr;
11602     }
11603     else if (yychar > 255)
11604 	sv_catpvs(where_sv, "next token ???");
11605     else if (yychar == -2) { /* YYEMPTY */
11606 	if (PL_lex_state == LEX_NORMAL ||
11607 	   (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11608 	    sv_catpvs(where_sv, "at end of line");
11609 	else if (PL_lex_inpat)
11610 	    sv_catpvs(where_sv, "within pattern");
11611 	else
11612 	    sv_catpvs(where_sv, "within string");
11613     }
11614     else {
11615 	sv_catpvs(where_sv, "next char ");
11616 	if (yychar < 32)
11617 	    Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11618 	else if (isPRINT_LC(yychar)) {
11619 	    const char string = yychar;
11620 	    sv_catpvn(where_sv, &string, 1);
11621 	}
11622 	else
11623 	    Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11624     }
11625     msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11626     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11627         OutCopFILE(PL_curcop),
11628         (IV)(PL_parser->preambling == NOLINE
11629                ? CopLINE(PL_curcop)
11630                : PL_parser->preambling));
11631     if (context)
11632 	Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11633 			     UTF8fARG(UTF, contlen, context));
11634     else
11635 	Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11636     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11637         Perl_sv_catpvf(aTHX_ msg,
11638         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11639                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11640         PL_multi_end = 0;
11641     }
11642     if (PL_in_eval & EVAL_WARNONLY) {
11643 	Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11644     }
11645     else
11646 	qerror(msg);
11647     if (PL_error_count >= 10) {
11648 	SV * errsv;
11649 	if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11650 	    Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11651 		       SVfARG(errsv), OutCopFILE(PL_curcop));
11652 	else
11653 	    Perl_croak(aTHX_ "%s has too many errors.\n",
11654             OutCopFILE(PL_curcop));
11655     }
11656     PL_in_my = 0;
11657     PL_in_my_stash = NULL;
11658     return 0;
11659 }
11660 
11661 STATIC char*
11662 S_swallow_bom(pTHX_ U8 *s)
11663 {
11664     dVAR;
11665     const STRLEN slen = SvCUR(PL_linestr);
11666 
11667     PERL_ARGS_ASSERT_SWALLOW_BOM;
11668 
11669     switch (s[0]) {
11670     case 0xFF:
11671 	if (s[1] == 0xFE) {
11672 	    /* UTF-16 little-endian? (or UTF-32LE?) */
11673 	    if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11674 		/* diag_listed_as: Unsupported script encoding %s */
11675 		Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11676 #ifndef PERL_NO_UTF16_FILTER
11677 	    if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11678 	    s += 2;
11679 	    if (PL_bufend > (char*)s) {
11680 		s = add_utf16_textfilter(s, TRUE);
11681 	    }
11682 #else
11683 	    /* diag_listed_as: Unsupported script encoding %s */
11684 	    Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11685 #endif
11686 	}
11687 	break;
11688     case 0xFE:
11689 	if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11690 #ifndef PERL_NO_UTF16_FILTER
11691 	    if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11692 	    s += 2;
11693 	    if (PL_bufend > (char *)s) {
11694 		s = add_utf16_textfilter(s, FALSE);
11695 	    }
11696 #else
11697 	    /* diag_listed_as: Unsupported script encoding %s */
11698 	    Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11699 #endif
11700 	}
11701 	break;
11702     case BOM_UTF8_FIRST_BYTE: {
11703         const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11704         if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11705             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11706             s += len + 1;                      /* UTF-8 */
11707         }
11708         break;
11709     }
11710     case 0:
11711 	if (slen > 3) {
11712 	     if (s[1] == 0) {
11713 		  if (s[2] == 0xFE && s[3] == 0xFF) {
11714 		       /* UTF-32 big-endian */
11715 		       /* diag_listed_as: Unsupported script encoding %s */
11716 		       Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11717 		  }
11718 	     }
11719 	     else if (s[2] == 0 && s[3] != 0) {
11720 		  /* Leading bytes
11721 		   * 00 xx 00 xx
11722 		   * are a good indicator of UTF-16BE. */
11723 #ifndef PERL_NO_UTF16_FILTER
11724 		  if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11725 		  s = add_utf16_textfilter(s, FALSE);
11726 #else
11727 		  /* diag_listed_as: Unsupported script encoding %s */
11728 		  Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11729 #endif
11730 	     }
11731 	}
11732 
11733     default:
11734 	 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11735 		  /* Leading bytes
11736 		   * xx 00 xx 00
11737 		   * are a good indicator of UTF-16LE. */
11738 #ifndef PERL_NO_UTF16_FILTER
11739 	      if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11740 	      s = add_utf16_textfilter(s, TRUE);
11741 #else
11742 	      /* diag_listed_as: Unsupported script encoding %s */
11743 	      Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11744 #endif
11745 	 }
11746     }
11747     return (char*)s;
11748 }
11749 
11750 
11751 #ifndef PERL_NO_UTF16_FILTER
11752 static I32
11753 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11754 {
11755     dVAR;
11756     SV *const filter = FILTER_DATA(idx);
11757     /* We re-use this each time round, throwing the contents away before we
11758        return.  */
11759     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11760     SV *const utf8_buffer = filter;
11761     IV status = IoPAGE(filter);
11762     const bool reverse = cBOOL(IoLINES(filter));
11763     I32 retval;
11764 
11765     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11766 
11767     /* As we're automatically added, at the lowest level, and hence only called
11768        from this file, we can be sure that we're not called in block mode. Hence
11769        don't bother writing code to deal with block mode.  */
11770     if (maxlen) {
11771 	Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11772     }
11773     if (status < 0) {
11774 	Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11775     }
11776     DEBUG_P(PerlIO_printf(Perl_debug_log,
11777 			  "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11778 			  FPTR2DPTR(void *, S_utf16_textfilter),
11779 			  reverse ? 'l' : 'b', idx, maxlen, status,
11780 			  (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11781 
11782     while (1) {
11783 	STRLEN chars;
11784 	STRLEN have;
11785 	I32 newlen;
11786 	U8 *end;
11787 	/* First, look in our buffer of existing UTF-8 data:  */
11788 	char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11789 
11790 	if (nl) {
11791 	    ++nl;
11792 	} else if (status == 0) {
11793 	    /* EOF */
11794 	    IoPAGE(filter) = 0;
11795 	    nl = SvEND(utf8_buffer);
11796 	}
11797 	if (nl) {
11798 	    STRLEN got = nl - SvPVX(utf8_buffer);
11799 	    /* Did we have anything to append?  */
11800 	    retval = got != 0;
11801 	    sv_catpvn(sv, SvPVX(utf8_buffer), got);
11802 	    /* Everything else in this code works just fine if SVp_POK isn't
11803 	       set.  This, however, needs it, and we need it to work, else
11804 	       we loop infinitely because the buffer is never consumed.  */
11805 	    sv_chop(utf8_buffer, nl);
11806 	    break;
11807 	}
11808 
11809 	/* OK, not a complete line there, so need to read some more UTF-16.
11810 	   Read an extra octect if the buffer currently has an odd number. */
11811 	while (1) {
11812 	    if (status <= 0)
11813 		break;
11814 	    if (SvCUR(utf16_buffer) >= 2) {
11815 		/* Location of the high octet of the last complete code point.
11816 		   Gosh, UTF-16 is a pain. All the benefits of variable length,
11817 		   *coupled* with all the benefits of partial reads and
11818 		   endianness.  */
11819 		const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11820 		    + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11821 
11822 		if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11823 		    break;
11824 		}
11825 
11826 		/* We have the first half of a surrogate. Read more.  */
11827 		DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11828 	    }
11829 
11830 	    status = FILTER_READ(idx + 1, utf16_buffer,
11831 				 160 + (SvCUR(utf16_buffer) & 1));
11832 	    DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11833 	    DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11834 	    if (status < 0) {
11835 		/* Error */
11836 		IoPAGE(filter) = status;
11837 		return status;
11838 	    }
11839 	}
11840 
11841 	chars = SvCUR(utf16_buffer) >> 1;
11842 	have = SvCUR(utf8_buffer);
11843 	SvGROW(utf8_buffer, have + chars * 3 + 1);
11844 
11845 	if (reverse) {
11846 	    end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11847 					 (U8*)SvPVX_const(utf8_buffer) + have,
11848 					 chars * 2, &newlen);
11849 	} else {
11850 	    end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11851 				(U8*)SvPVX_const(utf8_buffer) + have,
11852 				chars * 2, &newlen);
11853 	}
11854 	SvCUR_set(utf8_buffer, have + newlen);
11855 	*end = '\0';
11856 
11857 	/* No need to keep this SV "well-formed" with a '\0' after the end, as
11858 	   it's private to us, and utf16_to_utf8{,reversed} take a
11859 	   (pointer,length) pair, rather than a NUL-terminated string.  */
11860 	if(SvCUR(utf16_buffer) & 1) {
11861 	    *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11862 	    SvCUR_set(utf16_buffer, 1);
11863 	} else {
11864 	    SvCUR_set(utf16_buffer, 0);
11865 	}
11866     }
11867     DEBUG_P(PerlIO_printf(Perl_debug_log,
11868 			  "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11869 			  status,
11870 			  (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11871     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11872     return retval;
11873 }
11874 
11875 static U8 *
11876 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11877 {
11878     SV *filter = filter_add(S_utf16_textfilter, NULL);
11879 
11880     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11881 
11882     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11883     sv_setpvs(filter, "");
11884     IoLINES(filter) = reversed;
11885     IoPAGE(filter) = 1; /* Not EOF */
11886 
11887     /* Sadly, we have to return a valid pointer, come what may, so we have to
11888        ignore any error return from this.  */
11889     SvCUR_set(PL_linestr, 0);
11890     if (FILTER_READ(0, PL_linestr, 0)) {
11891 	SvUTF8_on(PL_linestr);
11892     } else {
11893 	SvUTF8_on(PL_linestr);
11894     }
11895     PL_bufend = SvEND(PL_linestr);
11896     return (U8*)SvPVX(PL_linestr);
11897 }
11898 #endif
11899 
11900 /*
11901 Returns a pointer to the next character after the parsed
11902 vstring, as well as updating the passed in sv.
11903 
11904 Function must be called like
11905 
11906 	sv = sv_2mortal(newSV(5));
11907 	s = scan_vstring(s,e,sv);
11908 
11909 where s and e are the start and end of the string.
11910 The sv should already be large enough to store the vstring
11911 passed in, for performance reasons.
11912 
11913 This function may croak if fatal warnings are enabled in the
11914 calling scope, hence the sv_2mortal in the example (to prevent
11915 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
11916 sv_2mortal.
11917 
11918 */
11919 
11920 char *
11921 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11922 {
11923     dVAR;
11924     const char *pos = s;
11925     const char *start = s;
11926 
11927     PERL_ARGS_ASSERT_SCAN_VSTRING;
11928 
11929     if (*pos == 'v') pos++;  /* get past 'v' */
11930     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11931 	pos++;
11932     if ( *pos != '.') {
11933 	/* this may not be a v-string if followed by => */
11934 	const char *next = pos;
11935 	while (next < e && isSPACE(*next))
11936 	    ++next;
11937 	if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11938 	    /* return string not v-string */
11939 	    sv_setpvn(sv,(char *)s,pos-s);
11940 	    return (char *)pos;
11941 	}
11942     }
11943 
11944     if (!isALPHA(*pos)) {
11945 	U8 tmpbuf[UTF8_MAXBYTES+1];
11946 
11947 	if (*s == 'v')
11948 	    s++;  /* get past 'v' */
11949 
11950 	sv_setpvs(sv, "");
11951 
11952 	for (;;) {
11953 	    /* this is atoi() that tolerates underscores */
11954 	    U8 *tmpend;
11955 	    UV rev = 0;
11956 	    const char *end = pos;
11957 	    UV mult = 1;
11958 	    while (--end >= s) {
11959 		if (*end != '_') {
11960 		    const UV orev = rev;
11961 		    rev += (*end - '0') * mult;
11962 		    mult *= 10;
11963 		    if (orev > rev)
11964 			/* diag_listed_as: Integer overflow in %s number */
11965 			Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11966 					 "Integer overflow in decimal number");
11967 		}
11968 	    }
11969 #ifdef EBCDIC
11970 	    if (rev > 0x7FFFFFFF)
11971 		 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11972 #endif
11973 	    /* Append native character for the rev point */
11974 	    tmpend = uvchr_to_utf8(tmpbuf, rev);
11975 	    sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11976 	    if (!UVCHR_IS_INVARIANT(rev))
11977 		 SvUTF8_on(sv);
11978 	    if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11979 		 s = ++pos;
11980 	    else {
11981 		 s = pos;
11982 		 break;
11983 	    }
11984 	    while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11985 		 pos++;
11986 	}
11987 	SvPOK_on(sv);
11988 	sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11989 	SvRMAGICAL_on(sv);
11990     }
11991     return (char *)s;
11992 }
11993 
11994 int
11995 Perl_keyword_plugin_standard(pTHX_
11996 	char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11997 {
11998     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11999     PERL_UNUSED_CONTEXT;
12000     PERL_UNUSED_ARG(keyword_ptr);
12001     PERL_UNUSED_ARG(keyword_len);
12002     PERL_UNUSED_ARG(op_ptr);
12003     return KEYWORD_PLUGIN_DECLINE;
12004 }
12005 
12006 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12007 static void
12008 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12009 {
12010     SAVEI32(PL_lex_brackets);
12011     if (PL_lex_brackets > 100)
12012 	Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12013     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12014     SAVEI32(PL_lex_allbrackets);
12015     PL_lex_allbrackets = 0;
12016     SAVEI8(PL_lex_fakeeof);
12017     PL_lex_fakeeof = (U8)fakeeof;
12018     if(yyparse(gramtype) && !PL_parser->error_count)
12019 	qerror(Perl_mess(aTHX_ "Parse error"));
12020 }
12021 
12022 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12023 static OP *
12024 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12025 {
12026     OP *o;
12027     ENTER;
12028     SAVEVPTR(PL_eval_root);
12029     PL_eval_root = NULL;
12030     parse_recdescent(gramtype, fakeeof);
12031     o = PL_eval_root;
12032     LEAVE;
12033     return o;
12034 }
12035 
12036 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12037 static OP *
12038 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12039 {
12040     OP *exprop;
12041     if (flags & ~PARSE_OPTIONAL)
12042 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12043     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12044     if (!exprop && !(flags & PARSE_OPTIONAL)) {
12045 	if (!PL_parser->error_count)
12046 	    qerror(Perl_mess(aTHX_ "Parse error"));
12047 	exprop = newOP(OP_NULL, 0);
12048     }
12049     return exprop;
12050 }
12051 
12052 /*
12053 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
12054 
12055 Parse a Perl arithmetic expression.  This may contain operators of precedence
12056 down to the bit shift operators.  The expression must be followed (and thus
12057 terminated) either by a comparison or lower-precedence operator or by
12058 something that would normally terminate an expression such as semicolon.
12059 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12060 otherwise it is mandatory.  It is up to the caller to ensure that the
12061 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12062 the source of the code to be parsed and the lexical context for the
12063 expression.
12064 
12065 The op tree representing the expression is returned.  If an optional
12066 expression is absent, a null pointer is returned, otherwise the pointer
12067 will be non-null.
12068 
12069 If an error occurs in parsing or compilation, in most cases a valid op
12070 tree is returned anyway.  The error is reflected in the parser state,
12071 normally resulting in a single exception at the top level of parsing
12072 which covers all the compilation errors that occurred.  Some compilation
12073 errors, however, will throw an exception immediately.
12074 
12075 =cut
12076 */
12077 
12078 OP *
12079 Perl_parse_arithexpr(pTHX_ U32 flags)
12080 {
12081     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12082 }
12083 
12084 /*
12085 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12086 
12087 Parse a Perl term expression.  This may contain operators of precedence
12088 down to the assignment operators.  The expression must be followed (and thus
12089 terminated) either by a comma or lower-precedence operator or by
12090 something that would normally terminate an expression such as semicolon.
12091 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12092 otherwise it is mandatory.  It is up to the caller to ensure that the
12093 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12094 the source of the code to be parsed and the lexical context for the
12095 expression.
12096 
12097 The op tree representing the expression is returned.  If an optional
12098 expression is absent, a null pointer is returned, otherwise the pointer
12099 will be non-null.
12100 
12101 If an error occurs in parsing or compilation, in most cases a valid op
12102 tree is returned anyway.  The error is reflected in the parser state,
12103 normally resulting in a single exception at the top level of parsing
12104 which covers all the compilation errors that occurred.  Some compilation
12105 errors, however, will throw an exception immediately.
12106 
12107 =cut
12108 */
12109 
12110 OP *
12111 Perl_parse_termexpr(pTHX_ U32 flags)
12112 {
12113     return parse_expr(LEX_FAKEEOF_COMMA, flags);
12114 }
12115 
12116 /*
12117 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12118 
12119 Parse a Perl list expression.  This may contain operators of precedence
12120 down to the comma operator.  The expression must be followed (and thus
12121 terminated) either by a low-precedence logic operator such as C<or> or by
12122 something that would normally terminate an expression such as semicolon.
12123 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12124 otherwise it is mandatory.  It is up to the caller to ensure that the
12125 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12126 the source of the code to be parsed and the lexical context for the
12127 expression.
12128 
12129 The op tree representing the expression is returned.  If an optional
12130 expression is absent, a null pointer is returned, otherwise the pointer
12131 will be non-null.
12132 
12133 If an error occurs in parsing or compilation, in most cases a valid op
12134 tree is returned anyway.  The error is reflected in the parser state,
12135 normally resulting in a single exception at the top level of parsing
12136 which covers all the compilation errors that occurred.  Some compilation
12137 errors, however, will throw an exception immediately.
12138 
12139 =cut
12140 */
12141 
12142 OP *
12143 Perl_parse_listexpr(pTHX_ U32 flags)
12144 {
12145     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12146 }
12147 
12148 /*
12149 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12150 
12151 Parse a single complete Perl expression.  This allows the full
12152 expression grammar, including the lowest-precedence operators such
12153 as C<or>.  The expression must be followed (and thus terminated) by a
12154 token that an expression would normally be terminated by: end-of-file,
12155 closing bracketing punctuation, semicolon, or one of the keywords that
12156 signals a postfix expression-statement modifier.  If I<flags> includes
12157 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
12158 mandatory.  It is up to the caller to ensure that the dynamic parser
12159 state (L</PL_parser> et al) is correctly set to reflect the source of
12160 the code to be parsed and the lexical context for the expression.
12161 
12162 The op tree representing the expression is returned.  If an optional
12163 expression is absent, a null pointer is returned, otherwise the pointer
12164 will be non-null.
12165 
12166 If an error occurs in parsing or compilation, in most cases a valid op
12167 tree is returned anyway.  The error is reflected in the parser state,
12168 normally resulting in a single exception at the top level of parsing
12169 which covers all the compilation errors that occurred.  Some compilation
12170 errors, however, will throw an exception immediately.
12171 
12172 =cut
12173 */
12174 
12175 OP *
12176 Perl_parse_fullexpr(pTHX_ U32 flags)
12177 {
12178     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12179 }
12180 
12181 /*
12182 =for apidoc Amx|OP *|parse_block|U32 flags
12183 
12184 Parse a single complete Perl code block.  This consists of an opening
12185 brace, a sequence of statements, and a closing brace.  The block
12186 constitutes a lexical scope, so C<my> variables and various compile-time
12187 effects can be contained within it.  It is up to the caller to ensure
12188 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12189 reflect the source of the code to be parsed and the lexical context for
12190 the statement.
12191 
12192 The op tree representing the code block is returned.  This is always a
12193 real op, never a null pointer.  It will normally be a C<lineseq> list,
12194 including C<nextstate> or equivalent ops.  No ops to construct any kind
12195 of runtime scope are included by virtue of it being a block.
12196 
12197 If an error occurs in parsing or compilation, in most cases a valid op
12198 tree (most likely null) is returned anyway.  The error is reflected in
12199 the parser state, normally resulting in a single exception at the top
12200 level of parsing which covers all the compilation errors that occurred.
12201 Some compilation errors, however, will throw an exception immediately.
12202 
12203 The I<flags> parameter is reserved for future use, and must always
12204 be zero.
12205 
12206 =cut
12207 */
12208 
12209 OP *
12210 Perl_parse_block(pTHX_ U32 flags)
12211 {
12212     if (flags)
12213 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12214     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12215 }
12216 
12217 /*
12218 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12219 
12220 Parse a single unadorned Perl statement.  This may be a normal imperative
12221 statement or a declaration that has compile-time effect.  It does not
12222 include any label or other affixture.  It is up to the caller to ensure
12223 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12224 reflect the source of the code to be parsed and the lexical context for
12225 the statement.
12226 
12227 The op tree representing the statement is returned.  This may be a
12228 null pointer if the statement is null, for example if it was actually
12229 a subroutine definition (which has compile-time side effects).  If not
12230 null, it will be ops directly implementing the statement, suitable to
12231 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
12232 equivalent op (except for those embedded in a scope contained entirely
12233 within the statement).
12234 
12235 If an error occurs in parsing or compilation, in most cases a valid op
12236 tree (most likely null) is returned anyway.  The error is reflected in
12237 the parser state, normally resulting in a single exception at the top
12238 level of parsing which covers all the compilation errors that occurred.
12239 Some compilation errors, however, will throw an exception immediately.
12240 
12241 The I<flags> parameter is reserved for future use, and must always
12242 be zero.
12243 
12244 =cut
12245 */
12246 
12247 OP *
12248 Perl_parse_barestmt(pTHX_ U32 flags)
12249 {
12250     if (flags)
12251 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12252     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12253 }
12254 
12255 /*
12256 =for apidoc Amx|SV *|parse_label|U32 flags
12257 
12258 Parse a single label, possibly optional, of the type that may prefix a
12259 Perl statement.  It is up to the caller to ensure that the dynamic parser
12260 state (L</PL_parser> et al) is correctly set to reflect the source of
12261 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
12262 label is optional, otherwise it is mandatory.
12263 
12264 The name of the label is returned in the form of a fresh scalar.  If an
12265 optional label is absent, a null pointer is returned.
12266 
12267 If an error occurs in parsing, which can only occur if the label is
12268 mandatory, a valid label is returned anyway.  The error is reflected in
12269 the parser state, normally resulting in a single exception at the top
12270 level of parsing which covers all the compilation errors that occurred.
12271 
12272 =cut
12273 */
12274 
12275 SV *
12276 Perl_parse_label(pTHX_ U32 flags)
12277 {
12278     if (flags & ~PARSE_OPTIONAL)
12279 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12280     if (PL_lex_state == LEX_KNOWNEXT) {
12281 	PL_parser->yychar = yylex();
12282 	if (PL_parser->yychar == LABEL) {
12283 	    char * const lpv = pl_yylval.pval;
12284 	    STRLEN llen = strlen(lpv);
12285 	    PL_parser->yychar = YYEMPTY;
12286 	    return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12287 	} else {
12288 	    yyunlex();
12289 	    goto no_label;
12290 	}
12291     } else {
12292 	char *s, *t;
12293 	STRLEN wlen, bufptr_pos;
12294 	lex_read_space(0);
12295 	t = s = PL_bufptr;
12296         if (!isIDFIRST_lazy_if(s, UTF))
12297 	    goto no_label;
12298 	t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12299 	if (word_takes_any_delimeter(s, wlen))
12300 	    goto no_label;
12301 	bufptr_pos = s - SvPVX(PL_linestr);
12302 	PL_bufptr = t;
12303 	lex_read_space(LEX_KEEP_PREVIOUS);
12304 	t = PL_bufptr;
12305 	s = SvPVX(PL_linestr) + bufptr_pos;
12306 	if (t[0] == ':' && t[1] != ':') {
12307 	    PL_oldoldbufptr = PL_oldbufptr;
12308 	    PL_oldbufptr = s;
12309 	    PL_bufptr = t+1;
12310 	    return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12311 	} else {
12312 	    PL_bufptr = s;
12313 	    no_label:
12314 	    if (flags & PARSE_OPTIONAL) {
12315 		return NULL;
12316 	    } else {
12317 		qerror(Perl_mess(aTHX_ "Parse error"));
12318 		return newSVpvs("x");
12319 	    }
12320 	}
12321     }
12322 }
12323 
12324 /*
12325 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12326 
12327 Parse a single complete Perl statement.  This may be a normal imperative
12328 statement or a declaration that has compile-time effect, and may include
12329 optional labels.  It is up to the caller to ensure that the dynamic
12330 parser state (L</PL_parser> et al) is correctly set to reflect the source
12331 of the code to be parsed and the lexical context for the statement.
12332 
12333 The op tree representing the statement is returned.  This may be a
12334 null pointer if the statement is null, for example if it was actually
12335 a subroutine definition (which has compile-time side effects).  If not
12336 null, it will be the result of a L</newSTATEOP> call, normally including
12337 a C<nextstate> or equivalent op.
12338 
12339 If an error occurs in parsing or compilation, in most cases a valid op
12340 tree (most likely null) is returned anyway.  The error is reflected in
12341 the parser state, normally resulting in a single exception at the top
12342 level of parsing which covers all the compilation errors that occurred.
12343 Some compilation errors, however, will throw an exception immediately.
12344 
12345 The I<flags> parameter is reserved for future use, and must always
12346 be zero.
12347 
12348 =cut
12349 */
12350 
12351 OP *
12352 Perl_parse_fullstmt(pTHX_ U32 flags)
12353 {
12354     if (flags)
12355 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12356     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12357 }
12358 
12359 /*
12360 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12361 
12362 Parse a sequence of zero or more Perl statements.  These may be normal
12363 imperative statements, including optional labels, or declarations
12364 that have compile-time effect, or any mixture thereof.  The statement
12365 sequence ends when a closing brace or end-of-file is encountered in a
12366 place where a new statement could have validly started.  It is up to
12367 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12368 is correctly set to reflect the source of the code to be parsed and the
12369 lexical context for the statements.
12370 
12371 The op tree representing the statement sequence is returned.  This may
12372 be a null pointer if the statements were all null, for example if there
12373 were no statements or if there were only subroutine definitions (which
12374 have compile-time side effects).  If not null, it will be a C<lineseq>
12375 list, normally including C<nextstate> or equivalent ops.
12376 
12377 If an error occurs in parsing or compilation, in most cases a valid op
12378 tree is returned anyway.  The error is reflected in the parser state,
12379 normally resulting in a single exception at the top level of parsing
12380 which covers all the compilation errors that occurred.  Some compilation
12381 errors, however, will throw an exception immediately.
12382 
12383 The I<flags> parameter is reserved for future use, and must always
12384 be zero.
12385 
12386 =cut
12387 */
12388 
12389 OP *
12390 Perl_parse_stmtseq(pTHX_ U32 flags)
12391 {
12392     OP *stmtseqop;
12393     I32 c;
12394     if (flags)
12395 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12396     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12397     c = lex_peek_unichar(0);
12398     if (c != -1 && c != /*{*/'}')
12399 	qerror(Perl_mess(aTHX_ "Parse error"));
12400     return stmtseqop;
12401 }
12402 
12403 #define lex_token_boundary() S_lex_token_boundary(aTHX)
12404 static void
12405 S_lex_token_boundary(pTHX)
12406 {
12407     PL_oldoldbufptr = PL_oldbufptr;
12408     PL_oldbufptr = PL_bufptr;
12409 }
12410 
12411 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
12412 static OP *
12413 S_parse_opt_lexvar(pTHX)
12414 {
12415     I32 sigil, c;
12416     char *s, *d;
12417     OP *var;
12418     lex_token_boundary();
12419     sigil = lex_read_unichar(0);
12420     if (lex_peek_unichar(0) == '#') {
12421 	qerror(Perl_mess(aTHX_ "Parse error"));
12422 	return NULL;
12423     }
12424     lex_read_space(0);
12425     c = lex_peek_unichar(0);
12426     if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
12427 	return NULL;
12428     s = PL_bufptr;
12429     d = PL_tokenbuf + 1;
12430     PL_tokenbuf[0] = (char)sigil;
12431     parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
12432     PL_bufptr = s;
12433     if (d == PL_tokenbuf+1)
12434 	return NULL;
12435     *d = 0;
12436     var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
12437 		OPf_MOD | (OPpLVAL_INTRO<<8));
12438     var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
12439     return var;
12440 }
12441 
12442 OP *
12443 Perl_parse_subsignature(pTHX)
12444 {
12445     I32 c;
12446     int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
12447     OP *initops = NULL;
12448     lex_read_space(0);
12449     c = lex_peek_unichar(0);
12450     while (c != /*(*/')') {
12451 	switch (c) {
12452 	    case '$': {
12453 		OP *var, *expr;
12454 		if (prev_type == 2)
12455 		    qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
12456 		var = parse_opt_lexvar();
12457 		expr = var ?
12458 		    newBINOP(OP_AELEM, 0,
12459 			ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
12460 			    OP_RV2AV),
12461 			newSVOP(OP_CONST, 0, newSViv(pos))) :
12462 		    NULL;
12463 		lex_read_space(0);
12464 		c = lex_peek_unichar(0);
12465 		if (c == '=') {
12466 		    lex_token_boundary();
12467 		    lex_read_unichar(0);
12468 		    lex_read_space(0);
12469 		    c = lex_peek_unichar(0);
12470 		    if (c == ',' || c == /*(*/')') {
12471 			if (var)
12472 			    qerror(Perl_mess(aTHX_ "Optional parameter "
12473 				    "lacks default expression"));
12474 		    } else {
12475 			OP *defexpr = parse_termexpr(0);
12476 			if (defexpr->op_type == OP_UNDEF &&
12477 				!(defexpr->op_flags & OPf_KIDS)) {
12478 			    op_free(defexpr);
12479 			} else {
12480 			    OP *ifop =
12481 				newBINOP(OP_GE, 0,
12482 				    scalar(newUNOP(OP_RV2AV, 0,
12483 					    newGVOP(OP_GV, 0, PL_defgv))),
12484 				    newSVOP(OP_CONST, 0, newSViv(pos+1)));
12485 			    expr = var ?
12486 				newCONDOP(0, ifop, expr, defexpr) :
12487 				newLOGOP(OP_OR, 0, ifop, defexpr);
12488 			}
12489 		    }
12490 		    prev_type = 1;
12491 		} else {
12492 		    if (prev_type == 1)
12493 			qerror(Perl_mess(aTHX_ "Mandatory parameter "
12494 				"follows optional parameter"));
12495 		    prev_type = 0;
12496 		    min_arity = pos + 1;
12497 		}
12498 		if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
12499 		if (expr)
12500 		    initops = op_append_list(OP_LINESEQ, initops,
12501 				newSTATEOP(0, NULL, expr));
12502 		max_arity = ++pos;
12503 	    } break;
12504 	    case '@':
12505 	    case '%': {
12506 		OP *var;
12507 		if (prev_type == 2)
12508 		    qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
12509 		var = parse_opt_lexvar();
12510 		if (c == '%') {
12511 		    OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
12512 			    newBINOP(OP_BIT_AND, 0,
12513 				scalar(newUNOP(OP_RV2AV, 0,
12514 				    newGVOP(OP_GV, 0, PL_defgv))),
12515 				newSVOP(OP_CONST, 0, newSViv(1))),
12516 			    newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
12517 				newSVOP(OP_CONST, 0,
12518 				    newSVpvs("Odd name/value argument "
12519 					"for subroutine"))));
12520 		    if (pos != min_arity)
12521 			chkop = newLOGOP(OP_AND, 0,
12522 				    newBINOP(OP_GT, 0,
12523 					scalar(newUNOP(OP_RV2AV, 0,
12524 					    newGVOP(OP_GV, 0, PL_defgv))),
12525 					newSVOP(OP_CONST, 0, newSViv(pos))),
12526 				    chkop);
12527 		    initops = op_append_list(OP_LINESEQ,
12528 				newSTATEOP(0, NULL, chkop),
12529 				initops);
12530 		}
12531 		if (var) {
12532 		    OP *slice = pos ?
12533 			op_prepend_elem(OP_ASLICE,
12534 			    newOP(OP_PUSHMARK, 0),
12535 			    newLISTOP(OP_ASLICE, 0,
12536 				list(newRANGE(0,
12537 				    newSVOP(OP_CONST, 0, newSViv(pos)),
12538 				    newUNOP(OP_AV2ARYLEN, 0,
12539 					ref(newUNOP(OP_RV2AV, 0,
12540 						newGVOP(OP_GV, 0, PL_defgv)),
12541 					    OP_AV2ARYLEN)))),
12542 				ref(newUNOP(OP_RV2AV, 0,
12543 					newGVOP(OP_GV, 0, PL_defgv)),
12544 				    OP_ASLICE))) :
12545 			newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
12546 		    initops = op_append_list(OP_LINESEQ, initops,
12547 			newSTATEOP(0, NULL,
12548 			    newASSIGNOP(OPf_STACKED, var, 0, slice)));
12549 		}
12550 		prev_type = 2;
12551 		max_arity = -1;
12552 	    } break;
12553 	    default:
12554 		parse_error:
12555 		qerror(Perl_mess(aTHX_ "Parse error"));
12556 		return NULL;
12557 	}
12558 	lex_read_space(0);
12559 	c = lex_peek_unichar(0);
12560 	switch (c) {
12561 	    case /*(*/')': break;
12562 	    case ',':
12563 		do {
12564 		    lex_token_boundary();
12565 		    lex_read_unichar(0);
12566 		    lex_read_space(0);
12567 		    c = lex_peek_unichar(0);
12568 		} while (c == ',');
12569 		break;
12570 	    default:
12571 		goto parse_error;
12572 	}
12573     }
12574     if (min_arity != 0) {
12575 	initops = op_append_list(OP_LINESEQ,
12576 	    newSTATEOP(0, NULL,
12577 		newLOGOP(OP_OR, 0,
12578 		    newBINOP(OP_GE, 0,
12579 			scalar(newUNOP(OP_RV2AV, 0,
12580 			    newGVOP(OP_GV, 0, PL_defgv))),
12581 			newSVOP(OP_CONST, 0, newSViv(min_arity))),
12582 		    newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
12583 			newSVOP(OP_CONST, 0,
12584 			    newSVpvs("Too few arguments for subroutine"))))),
12585 	    initops);
12586     }
12587     if (max_arity != -1) {
12588 	initops = op_append_list(OP_LINESEQ,
12589 	    newSTATEOP(0, NULL,
12590 		newLOGOP(OP_OR, 0,
12591 		    newBINOP(OP_LE, 0,
12592 			scalar(newUNOP(OP_RV2AV, 0,
12593 			    newGVOP(OP_GV, 0, PL_defgv))),
12594 			newSVOP(OP_CONST, 0, newSViv(max_arity))),
12595 		    newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
12596 			newSVOP(OP_CONST, 0,
12597 			    newSVpvs("Too many arguments for subroutine"))))),
12598 	    initops);
12599     }
12600     return initops;
12601 }
12602 
12603 /*
12604  * Local variables:
12605  * c-indentation-style: bsd
12606  * c-basic-offset: 4
12607  * indent-tabs-mode: nil
12608  * End:
12609  *
12610  * ex: set ts=8 sts=4 sw=4 et:
12611  */
12612