xref: /openbsd-src/gnu/usr.bin/perl/toke.c (revision d59bb9942320b767f2a19aaa7690c8c6e30b724c)
1 /*    toke.c
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  *  'It all comes from here, the stench and the peril.'    --Frodo
13  *
14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
15  */
16 
17 /*
18  * This file is the lexer for Perl.  It's closely linked to the
19  * parser, perly.y.
20  *
21  * The main routine is yylex(), which returns the next token.
22  */
23 
24 /*
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
27 
28 =for apidoc AmU|yy_parser *|PL_parser
29 
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress.  The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
34 
35 =cut
36 */
37 
38 #include "EXTERN.h"
39 #define PERL_IN_TOKE_C
40 #include "perl.h"
41 #include "dquote_inline.h"
42 
43 #define new_constant(a,b,c,d,e,f,g)	\
44 	S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
45 
46 #define pl_yylval	(PL_parser->yylval)
47 
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets		(PL_parser->lex_brackets)
50 #define PL_lex_allbrackets	(PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof		(PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack	(PL_parser->lex_brackstack)
53 #define PL_lex_casemods		(PL_parser->lex_casemods)
54 #define PL_lex_casestack        (PL_parser->lex_casestack)
55 #define PL_lex_defer		(PL_parser->lex_defer)
56 #define PL_lex_dojoin		(PL_parser->lex_dojoin)
57 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
58 #define PL_lex_inpat		(PL_parser->lex_inpat)
59 #define PL_lex_inwhat		(PL_parser->lex_inwhat)
60 #define PL_lex_op		(PL_parser->lex_op)
61 #define PL_lex_repl		(PL_parser->lex_repl)
62 #define PL_lex_starts		(PL_parser->lex_starts)
63 #define PL_lex_stuff		(PL_parser->lex_stuff)
64 #define PL_multi_start		(PL_parser->multi_start)
65 #define PL_multi_open		(PL_parser->multi_open)
66 #define PL_multi_close		(PL_parser->multi_close)
67 #define PL_preambled		(PL_parser->preambled)
68 #define PL_sublex_info		(PL_parser->sublex_info)
69 #define PL_linestr		(PL_parser->linestr)
70 #define PL_expect		(PL_parser->expect)
71 #define PL_copline		(PL_parser->copline)
72 #define PL_bufptr		(PL_parser->bufptr)
73 #define PL_oldbufptr		(PL_parser->oldbufptr)
74 #define PL_oldoldbufptr		(PL_parser->oldoldbufptr)
75 #define PL_linestart		(PL_parser->linestart)
76 #define PL_bufend		(PL_parser->bufend)
77 #define PL_last_uni		(PL_parser->last_uni)
78 #define PL_last_lop		(PL_parser->last_lop)
79 #define PL_last_lop_op		(PL_parser->last_lop_op)
80 #define PL_lex_state		(PL_parser->lex_state)
81 #define PL_rsfp			(PL_parser->rsfp)
82 #define PL_rsfp_filters		(PL_parser->rsfp_filters)
83 #define PL_in_my		(PL_parser->in_my)
84 #define PL_in_my_stash		(PL_parser->in_my_stash)
85 #define PL_tokenbuf		(PL_parser->tokenbuf)
86 #define PL_multi_end		(PL_parser->multi_end)
87 #define PL_error_count		(PL_parser->error_count)
88 
89 #  define PL_nexttoke		(PL_parser->nexttoke)
90 #  define PL_nexttype		(PL_parser->nexttype)
91 #  define PL_nextval		(PL_parser->nextval)
92 
93 static const char* const ident_too_long = "Identifier too long";
94 
95 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
96 
97 #define XENUMMASK  0x3f
98 #define XFAKEEOF   0x40
99 #define XFAKEBRACK 0x80
100 
101 #ifdef USE_UTF8_SCRIPTS
102 #   define UTF cBOOL(!IN_BYTES)
103 #else
104 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
105 #endif
106 
107 /* The maximum number of characters preceding the unrecognized one to display */
108 #define UNRECOGNIZED_PRECEDE_COUNT 10
109 
110 /* In variables named $^X, these are the legal values for X.
111  * 1999-02-27 mjd-perl-patch@plover.com */
112 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
113 
114 #define SPACE_OR_TAB(c) isBLANK_A(c)
115 
116 #define HEXFP_PEEK(s)     \
117     (((s[0] == '.') && \
118       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
119      isALPHA_FOLD_EQ(s[0], 'p'))
120 
121 /* LEX_* are values for PL_lex_state, the state of the lexer.
122  * They are arranged oddly so that the guard on the switch statement
123  * can get by with a single comparison (if the compiler is smart enough).
124  *
125  * These values refer to the various states within a sublex parse,
126  * i.e. within a double quotish string
127  */
128 
129 /* #define LEX_NOTPARSING		11 is done in perl.h. */
130 
131 #define LEX_NORMAL		10 /* normal code (ie not within "...")     */
132 #define LEX_INTERPNORMAL	 9 /* code within a string, eg "$foo[$x+1]" */
133 #define LEX_INTERPCASEMOD	 8 /* expecting a \U, \Q or \E etc          */
134 #define LEX_INTERPPUSH		 7 /* starting a new sublex parse level     */
135 #define LEX_INTERPSTART		 6 /* expecting the start of a $var         */
136 
137 				   /* at end of code, eg "$x" followed by:  */
138 #define LEX_INTERPEND		 5 /* ... eg not one of [, { or ->          */
139 #define LEX_INTERPENDMAYBE	 4 /* ... eg one of [, { or ->              */
140 
141 #define LEX_INTERPCONCAT	 3 /* expecting anything, eg at start of
142 				        string or after \E, $foo, etc       */
143 #define LEX_INTERPCONST		 2 /* NOT USED */
144 #define LEX_FORMLINE		 1 /* expecting a format line               */
145 #define LEX_KNOWNEXT		 0 /* next token known; just return it      */
146 
147 
148 #ifdef DEBUGGING
149 static const char* const lex_state_names[] = {
150     "KNOWNEXT",
151     "FORMLINE",
152     "INTERPCONST",
153     "INTERPCONCAT",
154     "INTERPENDMAYBE",
155     "INTERPEND",
156     "INTERPSTART",
157     "INTERPPUSH",
158     "INTERPCASEMOD",
159     "INTERPNORMAL",
160     "NORMAL"
161 };
162 #endif
163 
164 #include "keywords.h"
165 
166 /* CLINE is a macro that ensures PL_copline has a sane value */
167 
168 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
169 
170 /*
171  * Convenience functions to return different tokens and prime the
172  * lexer for the next token.  They all take an argument.
173  *
174  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
175  * OPERATOR     : generic operator
176  * AOPERATOR    : assignment operator
177  * PREBLOCK     : beginning the block after an if, while, foreach, ...
178  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
179  * PREREF       : *EXPR where EXPR is not a simple identifier
180  * TERM         : expression term
181  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
182  * LOOPX        : loop exiting command (goto, last, dump, etc)
183  * FTST         : file test operator
184  * FUN0         : zero-argument function
185  * FUN0OP       : zero-argument function, with its op created in this file
186  * FUN1         : not used, except for not, which isn't a UNIOP
187  * BOop         : bitwise or or xor
188  * BAop         : bitwise and
189  * BCop         : bitwise complement
190  * SHop         : shift operator
191  * PWop         : power operator
192  * PMop         : pattern-matching operator
193  * Aop          : addition-level operator
194  * AopNOASSIGN  : addition-level operator that is never part of .=
195  * Mop          : multiplication-level operator
196  * Eop          : equality-testing operator
197  * Rop          : relational operator <= != gt
198  *
199  * Also see LOP and lop() below.
200  */
201 
202 #ifdef DEBUGGING /* Serve -DT. */
203 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
204 #else
205 #   define REPORT(retval) (retval)
206 #endif
207 
208 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
209 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
210 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
211 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
212 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
213 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
214 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
215 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
216 #define LOOPX(f) return (PL_bufptr = force_word(s,WORD,TRUE,FALSE), \
217 			 pl_yylval.ival=f, \
218 			 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
219 			 REPORT((int)LOOPEX))
220 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
221 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
222 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
223 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
224 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
225 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
226 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
227 		       REPORT('~')
228 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
229 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
230 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
231 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
232 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
233 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
234 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
235 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
236 
237 /* This bit of chicanery makes a unary function followed by
238  * a parenthesis into a function with one argument, highest precedence.
239  * The UNIDOR macro is for unary functions that can be followed by the //
240  * operator (such as C<shift // 0>).
241  */
242 #define UNI3(f,x,have_x) { \
243 	pl_yylval.ival = f; \
244 	if (have_x) PL_expect = x; \
245 	PL_bufptr = s; \
246 	PL_last_uni = PL_oldbufptr; \
247 	PL_last_lop_op = f; \
248 	if (*s == '(') \
249 	    return REPORT( (int)FUNC1 ); \
250 	s = skipspace(s); \
251 	return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
252 	}
253 #define UNI(f)    UNI3(f,XTERM,1)
254 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
255 #define UNIPROTO(f,optional) { \
256 	if (optional) PL_last_uni = PL_oldbufptr; \
257 	OPERATOR(f); \
258 	}
259 
260 #define UNIBRACK(f) UNI3(f,0,0)
261 
262 /* grandfather return to old style */
263 #define OLDLOP(f) \
264 	do { \
265 	    if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
266 		PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
267 	    pl_yylval.ival = (f); \
268 	    PL_expect = XTERM; \
269 	    PL_bufptr = s; \
270 	    return (int)LSTOP; \
271 	} while(0)
272 
273 #define COPLINE_INC_WITH_HERELINES		    \
274     STMT_START {				     \
275 	CopLINE_inc(PL_curcop);			      \
276 	if (PL_parser->herelines)		       \
277 	    CopLINE(PL_curcop) += PL_parser->herelines, \
278 	    PL_parser->herelines = 0;			 \
279     } STMT_END
280 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
281  * is no sublex_push to follow. */
282 #define COPLINE_SET_FROM_MULTI_END	      \
283     STMT_START {			       \
284 	CopLINE_set(PL_curcop, PL_multi_end);	\
285 	if (PL_multi_end != PL_multi_start)	 \
286 	    PL_parser->herelines = 0;		  \
287     } STMT_END
288 
289 
290 #ifdef DEBUGGING
291 
292 /* how to interpret the pl_yylval associated with the token */
293 enum token_type {
294     TOKENTYPE_NONE,
295     TOKENTYPE_IVAL,
296     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
297     TOKENTYPE_PVAL,
298     TOKENTYPE_OPVAL
299 };
300 
301 static struct debug_tokens {
302     const int token;
303     enum token_type type;
304     const char *name;
305 } const debug_tokens[] =
306 {
307     { ADDOP,		TOKENTYPE_OPNUM,	"ADDOP" },
308     { ANDAND,		TOKENTYPE_NONE,		"ANDAND" },
309     { ANDOP,		TOKENTYPE_NONE,		"ANDOP" },
310     { ANONSUB,		TOKENTYPE_IVAL,		"ANONSUB" },
311     { ARROW,		TOKENTYPE_NONE,		"ARROW" },
312     { ASSIGNOP,		TOKENTYPE_OPNUM,	"ASSIGNOP" },
313     { BITANDOP,		TOKENTYPE_OPNUM,	"BITANDOP" },
314     { BITOROP,		TOKENTYPE_OPNUM,	"BITOROP" },
315     { COLONATTR,	TOKENTYPE_NONE,		"COLONATTR" },
316     { CONTINUE,		TOKENTYPE_NONE,		"CONTINUE" },
317     { DEFAULT,		TOKENTYPE_NONE,		"DEFAULT" },
318     { DO,		TOKENTYPE_NONE,		"DO" },
319     { DOLSHARP,		TOKENTYPE_NONE,		"DOLSHARP" },
320     { DORDOR,		TOKENTYPE_NONE,		"DORDOR" },
321     { DOROP,		TOKENTYPE_OPNUM,	"DOROP" },
322     { DOTDOT,		TOKENTYPE_IVAL,		"DOTDOT" },
323     { ELSE,		TOKENTYPE_NONE,		"ELSE" },
324     { ELSIF,		TOKENTYPE_IVAL,		"ELSIF" },
325     { EQOP,		TOKENTYPE_OPNUM,	"EQOP" },
326     { FOR,		TOKENTYPE_IVAL,		"FOR" },
327     { FORMAT,		TOKENTYPE_NONE,		"FORMAT" },
328     { FORMLBRACK,	TOKENTYPE_NONE,		"FORMLBRACK" },
329     { FORMRBRACK,	TOKENTYPE_NONE,		"FORMRBRACK" },
330     { FUNC,		TOKENTYPE_OPNUM,	"FUNC" },
331     { FUNC0,		TOKENTYPE_OPNUM,	"FUNC0" },
332     { FUNC0OP,		TOKENTYPE_OPVAL,	"FUNC0OP" },
333     { FUNC0SUB,		TOKENTYPE_OPVAL,	"FUNC0SUB" },
334     { FUNC1,		TOKENTYPE_OPNUM,	"FUNC1" },
335     { FUNCMETH,		TOKENTYPE_OPVAL,	"FUNCMETH" },
336     { GIVEN,		TOKENTYPE_IVAL,		"GIVEN" },
337     { HASHBRACK,	TOKENTYPE_NONE,		"HASHBRACK" },
338     { IF,		TOKENTYPE_IVAL,		"IF" },
339     { LABEL,		TOKENTYPE_PVAL,		"LABEL" },
340     { LOCAL,		TOKENTYPE_IVAL,		"LOCAL" },
341     { LOOPEX,		TOKENTYPE_OPNUM,	"LOOPEX" },
342     { LSTOP,		TOKENTYPE_OPNUM,	"LSTOP" },
343     { LSTOPSUB,		TOKENTYPE_OPVAL,	"LSTOPSUB" },
344     { MATCHOP,		TOKENTYPE_OPNUM,	"MATCHOP" },
345     { METHOD,		TOKENTYPE_OPVAL,	"METHOD" },
346     { MULOP,		TOKENTYPE_OPNUM,	"MULOP" },
347     { MY,		TOKENTYPE_IVAL,		"MY" },
348     { NOAMP,		TOKENTYPE_NONE,		"NOAMP" },
349     { NOTOP,		TOKENTYPE_NONE,		"NOTOP" },
350     { OROP,		TOKENTYPE_IVAL,		"OROP" },
351     { OROR,		TOKENTYPE_NONE,		"OROR" },
352     { PACKAGE,		TOKENTYPE_NONE,		"PACKAGE" },
353     { PLUGEXPR,		TOKENTYPE_OPVAL,	"PLUGEXPR" },
354     { PLUGSTMT,		TOKENTYPE_OPVAL,	"PLUGSTMT" },
355     { PMFUNC,		TOKENTYPE_OPVAL,	"PMFUNC" },
356     { POSTJOIN,		TOKENTYPE_NONE,		"POSTJOIN" },
357     { POSTDEC,		TOKENTYPE_NONE,		"POSTDEC" },
358     { POSTINC,		TOKENTYPE_NONE,		"POSTINC" },
359     { POWOP,		TOKENTYPE_OPNUM,	"POWOP" },
360     { PREDEC,		TOKENTYPE_NONE,		"PREDEC" },
361     { PREINC,		TOKENTYPE_NONE,		"PREINC" },
362     { PRIVATEREF,	TOKENTYPE_OPVAL,	"PRIVATEREF" },
363     { QWLIST,		TOKENTYPE_OPVAL,	"QWLIST" },
364     { REFGEN,		TOKENTYPE_NONE,		"REFGEN" },
365     { RELOP,		TOKENTYPE_OPNUM,	"RELOP" },
366     { REQUIRE,		TOKENTYPE_NONE,		"REQUIRE" },
367     { SHIFTOP,		TOKENTYPE_OPNUM,	"SHIFTOP" },
368     { SUB,		TOKENTYPE_NONE,		"SUB" },
369     { THING,		TOKENTYPE_OPVAL,	"THING" },
370     { UMINUS,		TOKENTYPE_NONE,		"UMINUS" },
371     { UNIOP,		TOKENTYPE_OPNUM,	"UNIOP" },
372     { UNIOPSUB,		TOKENTYPE_OPVAL,	"UNIOPSUB" },
373     { UNLESS,		TOKENTYPE_IVAL,		"UNLESS" },
374     { UNTIL,		TOKENTYPE_IVAL,		"UNTIL" },
375     { USE,		TOKENTYPE_IVAL,		"USE" },
376     { WHEN,		TOKENTYPE_IVAL,		"WHEN" },
377     { WHILE,		TOKENTYPE_IVAL,		"WHILE" },
378     { WORD,		TOKENTYPE_OPVAL,	"WORD" },
379     { YADAYADA,		TOKENTYPE_IVAL,		"YADAYADA" },
380     { 0,		TOKENTYPE_NONE,		NULL }
381 };
382 
383 /* dump the returned token in rv, plus any optional arg in pl_yylval */
384 
385 STATIC int
386 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
387 {
388     PERL_ARGS_ASSERT_TOKEREPORT;
389 
390     if (DEBUG_T_TEST) {
391 	const char *name = NULL;
392 	enum token_type type = TOKENTYPE_NONE;
393 	const struct debug_tokens *p;
394 	SV* const report = newSVpvs("<== ");
395 
396 	for (p = debug_tokens; p->token; p++) {
397 	    if (p->token == (int)rv) {
398 		name = p->name;
399 		type = p->type;
400 		break;
401 	    }
402 	}
403 	if (name)
404 	    Perl_sv_catpv(aTHX_ report, name);
405 	else if (isGRAPH(rv))
406 	{
407 	    Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
408 	    if ((char)rv == 'p')
409 		sv_catpvs(report, " (pending identifier)");
410 	}
411 	else if (!rv)
412 	    sv_catpvs(report, "EOF");
413 	else
414 	    Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
415 	switch (type) {
416 	case TOKENTYPE_NONE:
417 	    break;
418 	case TOKENTYPE_IVAL:
419 	    Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
420 	    break;
421 	case TOKENTYPE_OPNUM:
422 	    Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
423 				    PL_op_name[lvalp->ival]);
424 	    break;
425 	case TOKENTYPE_PVAL:
426 	    Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
427 	    break;
428 	case TOKENTYPE_OPVAL:
429 	    if (lvalp->opval) {
430 		Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
431 				    PL_op_name[lvalp->opval->op_type]);
432 		if (lvalp->opval->op_type == OP_CONST) {
433 		    Perl_sv_catpvf(aTHX_ report, " %s",
434 			SvPEEK(cSVOPx_sv(lvalp->opval)));
435 		}
436 
437 	    }
438 	    else
439 		sv_catpvs(report, "(opval=null)");
440 	    break;
441 	}
442         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
443     };
444     return (int)rv;
445 }
446 
447 
448 /* print the buffer with suitable escapes */
449 
450 STATIC void
451 S_printbuf(pTHX_ const char *const fmt, const char *const s)
452 {
453     SV* const tmp = newSVpvs("");
454 
455     PERL_ARGS_ASSERT_PRINTBUF;
456 
457     GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
458     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
459     GCC_DIAG_RESTORE;
460     SvREFCNT_dec(tmp);
461 }
462 
463 #endif
464 
465 static int
466 S_deprecate_commaless_var_list(pTHX) {
467     PL_expect = XTERM;
468     deprecate("comma-less variable list");
469     return REPORT(','); /* grandfather non-comma-format format */
470 }
471 
472 /*
473  * S_ao
474  *
475  * This subroutine looks for an '=' next to the operator that has just been
476  * parsed and turns it into an ASSIGNOP if it finds one.
477  */
478 
479 STATIC int
480 S_ao(pTHX_ int toketype)
481 {
482     if (*PL_bufptr == '=') {
483 	PL_bufptr++;
484 	if (toketype == ANDAND)
485 	    pl_yylval.ival = OP_ANDASSIGN;
486 	else if (toketype == OROR)
487 	    pl_yylval.ival = OP_ORASSIGN;
488 	else if (toketype == DORDOR)
489 	    pl_yylval.ival = OP_DORASSIGN;
490 	toketype = ASSIGNOP;
491     }
492     return REPORT(toketype);
493 }
494 
495 /*
496  * S_no_op
497  * When Perl expects an operator and finds something else, no_op
498  * prints the warning.  It always prints "<something> found where
499  * operator expected.  It prints "Missing semicolon on previous line?"
500  * if the surprise occurs at the start of the line.  "do you need to
501  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
502  * where the compiler doesn't know if foo is a method call or a function.
503  * It prints "Missing operator before end of line" if there's nothing
504  * after the missing operator, or "... before <...>" if there is something
505  * after the missing operator.
506  *
507  * PL_bufptr is expected to point to the start of the thing that was found,
508  * and s after the next token or partial token.
509  */
510 
511 STATIC void
512 S_no_op(pTHX_ const char *const what, char *s)
513 {
514     char * const oldbp = PL_bufptr;
515     const bool is_first = (PL_oldbufptr == PL_linestart);
516 
517     PERL_ARGS_ASSERT_NO_OP;
518 
519     if (!s)
520 	s = oldbp;
521     else
522 	PL_bufptr = s;
523     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
524     if (ckWARN_d(WARN_SYNTAX)) {
525 	if (is_first)
526 	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
527 		    "\t(Missing semicolon on previous line?)\n");
528 	else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
529 	    const char *t;
530 	    for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
531                                                             t += UTF ? UTF8SKIP(t) : 1)
532 		NOOP;
533 	    if (t < PL_bufptr && isSPACE(*t))
534 		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
535 			"\t(Do you need to predeclare %"UTF8f"?)\n",
536 		      UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
537 	}
538 	else {
539 	    assert(s >= oldbp);
540 	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
541 		    "\t(Missing operator before %"UTF8f"?)\n",
542 		     UTF8fARG(UTF, s - oldbp, oldbp));
543 	}
544     }
545     PL_bufptr = oldbp;
546 }
547 
548 /*
549  * S_missingterm
550  * Complain about missing quote/regexp/heredoc terminator.
551  * If it's called with NULL then it cauterizes the line buffer.
552  * If we're in a delimited string and the delimiter is a control
553  * character, it's reformatted into a two-char sequence like ^C.
554  * This is fatal.
555  */
556 
557 STATIC void
558 S_missingterm(pTHX_ char *s)
559 {
560     char tmpbuf[3];
561     char q;
562     if (s) {
563 	char * const nl = strrchr(s,'\n');
564 	if (nl)
565 	    *nl = '\0';
566     }
567     else if ((U8) PL_multi_close < 32) {
568 	*tmpbuf = '^';
569 	tmpbuf[1] = (char)toCTRL(PL_multi_close);
570 	tmpbuf[2] = '\0';
571 	s = tmpbuf;
572     }
573     else {
574 	*tmpbuf = (char)PL_multi_close;
575 	tmpbuf[1] = '\0';
576 	s = tmpbuf;
577     }
578     q = strchr(s,'"') ? '\'' : '"';
579     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
580 }
581 
582 #include "feature.h"
583 
584 /*
585  * Check whether the named feature is enabled.
586  */
587 bool
588 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
589 {
590     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
591 
592     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
593 
594     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
595 
596     if (namelen > MAX_FEATURE_LEN)
597 	return FALSE;
598     memcpy(&he_name[8], name, namelen);
599 
600     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
601 				     REFCOUNTED_HE_EXISTS));
602 }
603 
604 /*
605  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
606  * utf16-to-utf8-reversed.
607  */
608 
609 #ifdef PERL_CR_FILTER
610 static void
611 strip_return(SV *sv)
612 {
613     const char *s = SvPVX_const(sv);
614     const char * const e = s + SvCUR(sv);
615 
616     PERL_ARGS_ASSERT_STRIP_RETURN;
617 
618     /* outer loop optimized to do nothing if there are no CR-LFs */
619     while (s < e) {
620 	if (*s++ == '\r' && *s == '\n') {
621 	    /* hit a CR-LF, need to copy the rest */
622 	    char *d = s - 1;
623 	    *d++ = *s++;
624 	    while (s < e) {
625 		if (*s == '\r' && s[1] == '\n')
626 		    s++;
627 		*d++ = *s++;
628 	    }
629 	    SvCUR(sv) -= s - d;
630 	    return;
631 	}
632     }
633 }
634 
635 STATIC I32
636 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
637 {
638     const I32 count = FILTER_READ(idx+1, sv, maxlen);
639     if (count > 0 && !maxlen)
640 	strip_return(sv);
641     return count;
642 }
643 #endif
644 
645 /*
646 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
647 
648 Creates and initialises a new lexer/parser state object, supplying
649 a context in which to lex and parse from a new source of Perl code.
650 A pointer to the new state object is placed in L</PL_parser>.  An entry
651 is made on the save stack so that upon unwinding the new state object
652 will be destroyed and the former value of L</PL_parser> will be restored.
653 Nothing else need be done to clean up the parsing context.
654 
655 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
656 non-null, provides a string (in SV form) containing code to be parsed.
657 A copy of the string is made, so subsequent modification of C<line>
658 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
659 from which code will be read to be parsed.  If both are non-null, the
660 code in C<line> comes first and must consist of complete lines of input,
661 and C<rsfp> supplies the remainder of the source.
662 
663 The C<flags> parameter is reserved for future use.  Currently it is only
664 used by perl internally, so extensions should always pass zero.
665 
666 =cut
667 */
668 
669 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
670    can share filters with the current parser.
671    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
672    caller, hence isn't owned by the parser, so shouldn't be closed on parser
673    destruction. This is used to handle the case of defaulting to reading the
674    script from the standard input because no filename was given on the command
675    line (without getting confused by situation where STDIN has been closed, so
676    the script handle is opened on fd 0)  */
677 
678 void
679 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
680 {
681     const char *s = NULL;
682     yy_parser *parser, *oparser;
683     if (flags && flags & ~LEX_START_FLAGS)
684 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
685 
686     /* create and initialise a parser */
687 
688     Newxz(parser, 1, yy_parser);
689     parser->old_parser = oparser = PL_parser;
690     PL_parser = parser;
691 
692     parser->stack = NULL;
693     parser->ps = NULL;
694     parser->stack_size = 0;
695 
696     /* on scope exit, free this parser and restore any outer one */
697     SAVEPARSER(parser);
698     parser->saved_curcop = PL_curcop;
699 
700     /* initialise lexer state */
701 
702     parser->nexttoke = 0;
703     parser->error_count = oparser ? oparser->error_count : 0;
704     parser->copline = parser->preambling = NOLINE;
705     parser->lex_state = LEX_NORMAL;
706     parser->expect = XSTATE;
707     parser->rsfp = rsfp;
708     parser->rsfp_filters =
709       !(flags & LEX_START_SAME_FILTER) || !oparser
710         ? NULL
711         : MUTABLE_AV(SvREFCNT_inc(
712             oparser->rsfp_filters
713              ? oparser->rsfp_filters
714              : (oparser->rsfp_filters = newAV())
715           ));
716 
717     Newx(parser->lex_brackstack, 120, char);
718     Newx(parser->lex_casestack, 12, char);
719     *parser->lex_casestack = '\0';
720     Newxz(parser->lex_shared, 1, LEXSHARED);
721 
722     if (line) {
723 	STRLEN len;
724 	s = SvPV_const(line, len);
725 	parser->linestr = flags & LEX_START_COPIED
726 			    ? SvREFCNT_inc_simple_NN(line)
727 			    : newSVpvn_flags(s, len, SvUTF8(line));
728 	sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
729     } else {
730 	parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
731     }
732     parser->oldoldbufptr =
733 	parser->oldbufptr =
734 	parser->bufptr =
735 	parser->linestart = SvPVX(parser->linestr);
736     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
737     parser->last_lop = parser->last_uni = NULL;
738 
739     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
740                                                         |LEX_DONT_CLOSE_RSFP));
741     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
742                                                         |LEX_DONT_CLOSE_RSFP));
743 
744     parser->in_pod = parser->filtered = 0;
745 }
746 
747 
748 /* delete a parser object */
749 
750 void
751 Perl_parser_free(pTHX_  const yy_parser *parser)
752 {
753     PERL_ARGS_ASSERT_PARSER_FREE;
754 
755     PL_curcop = parser->saved_curcop;
756     SvREFCNT_dec(parser->linestr);
757 
758     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
759 	PerlIO_clearerr(parser->rsfp);
760     else if (parser->rsfp && (!parser->old_parser
761           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
762 	PerlIO_close(parser->rsfp);
763     SvREFCNT_dec(parser->rsfp_filters);
764     SvREFCNT_dec(parser->lex_stuff);
765     SvREFCNT_dec(parser->sublex_info.repl);
766 
767     Safefree(parser->lex_brackstack);
768     Safefree(parser->lex_casestack);
769     Safefree(parser->lex_shared);
770     PL_parser = parser->old_parser;
771     Safefree(parser);
772 }
773 
774 void
775 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
776 {
777     I32 nexttoke = parser->nexttoke;
778     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
779     while (nexttoke--) {
780 	if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
781 	 && parser->nextval[nexttoke].opval
782 	 && parser->nextval[nexttoke].opval->op_slabbed
783 	 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
784 	    op_free(parser->nextval[nexttoke].opval);
785 	    parser->nextval[nexttoke].opval = NULL;
786 	}
787     }
788 }
789 
790 
791 /*
792 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
793 
794 Buffer scalar containing the chunk currently under consideration of the
795 text currently being lexed.  This is always a plain string scalar (for
796 which C<SvPOK> is true).  It is not intended to be used as a scalar by
797 normal scalar means; instead refer to the buffer directly by the pointer
798 variables described below.
799 
800 The lexer maintains various C<char*> pointers to things in the
801 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
802 reallocated, all of these pointers must be updated.  Don't attempt to
803 do this manually, but rather use L</lex_grow_linestr> if you need to
804 reallocate the buffer.
805 
806 The content of the text chunk in the buffer is commonly exactly one
807 complete line of input, up to and including a newline terminator,
808 but there are situations where it is otherwise.  The octets of the
809 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
810 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
811 flag on this scalar, which may disagree with it.
812 
813 For direct examination of the buffer, the variable
814 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
815 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
816 of these pointers is usually preferable to examination of the scalar
817 through normal scalar means.
818 
819 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
820 
821 Direct pointer to the end of the chunk of text currently being lexed, the
822 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
823 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
824 always located at the end of the buffer, and does not count as part of
825 the buffer's contents.
826 
827 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
828 
829 Points to the current position of lexing inside the lexer buffer.
830 Characters around this point may be freely examined, within
831 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
832 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
833 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
834 
835 Lexing code (whether in the Perl core or not) moves this pointer past
836 the characters that it consumes.  It is also expected to perform some
837 bookkeeping whenever a newline character is consumed.  This movement
838 can be more conveniently performed by the function L</lex_read_to>,
839 which handles newlines appropriately.
840 
841 Interpretation of the buffer's octets can be abstracted out by
842 using the slightly higher-level functions L</lex_peek_unichar> and
843 L</lex_read_unichar>.
844 
845 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
846 
847 Points to the start of the current line inside the lexer buffer.
848 This is useful for indicating at which column an error occurred, and
849 not much else.  This must be updated by any lexing code that consumes
850 a newline; the function L</lex_read_to> handles this detail.
851 
852 =cut
853 */
854 
855 /*
856 =for apidoc Amx|bool|lex_bufutf8
857 
858 Indicates whether the octets in the lexer buffer
859 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
860 of Unicode characters.  If not, they should be interpreted as Latin-1
861 characters.  This is analogous to the C<SvUTF8> flag for scalars.
862 
863 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
864 contains valid UTF-8.  Lexing code must be robust in the face of invalid
865 encoding.
866 
867 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
868 is significant, but not the whole story regarding the input character
869 encoding.  Normally, when a file is being read, the scalar contains octets
870 and its C<SvUTF8> flag is off, but the octets should be interpreted as
871 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
872 however, the scalar may have the C<SvUTF8> flag on, and in this case its
873 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
874 is in effect.  This logic may change in the future; use this function
875 instead of implementing the logic yourself.
876 
877 =cut
878 */
879 
880 bool
881 Perl_lex_bufutf8(pTHX)
882 {
883     return UTF;
884 }
885 
886 /*
887 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
888 
889 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
890 at least C<len> octets (including terminating C<NUL>).  Returns a
891 pointer to the reallocated buffer.  This is necessary before making
892 any direct modification of the buffer that would increase its length.
893 L</lex_stuff_pvn> provides a more convenient way to insert text into
894 the buffer.
895 
896 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
897 this function updates all of the lexer's variables that point directly
898 into the buffer.
899 
900 =cut
901 */
902 
903 char *
904 Perl_lex_grow_linestr(pTHX_ STRLEN len)
905 {
906     SV *linestr;
907     char *buf;
908     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
909     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
910     linestr = PL_parser->linestr;
911     buf = SvPVX(linestr);
912     if (len <= SvLEN(linestr))
913 	return buf;
914     bufend_pos = PL_parser->bufend - buf;
915     bufptr_pos = PL_parser->bufptr - buf;
916     oldbufptr_pos = PL_parser->oldbufptr - buf;
917     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
918     linestart_pos = PL_parser->linestart - buf;
919     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
920     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
921     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
922                             PL_parser->lex_shared->re_eval_start - buf : 0;
923 
924     buf = sv_grow(linestr, len);
925 
926     PL_parser->bufend = buf + bufend_pos;
927     PL_parser->bufptr = buf + bufptr_pos;
928     PL_parser->oldbufptr = buf + oldbufptr_pos;
929     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
930     PL_parser->linestart = buf + linestart_pos;
931     if (PL_parser->last_uni)
932 	PL_parser->last_uni = buf + last_uni_pos;
933     if (PL_parser->last_lop)
934 	PL_parser->last_lop = buf + last_lop_pos;
935     if (PL_parser->lex_shared->re_eval_start)
936         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
937     return buf;
938 }
939 
940 /*
941 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
942 
943 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
944 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
945 reallocating the buffer if necessary.  This means that lexing code that
946 runs later will see the characters as if they had appeared in the input.
947 It is not recommended to do this as part of normal parsing, and most
948 uses of this facility run the risk of the inserted characters being
949 interpreted in an unintended manner.
950 
951 The string to be inserted is represented by C<len> octets starting
952 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
953 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
954 The characters are recoded for the lexer buffer, according to how the
955 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
956 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
957 function is more convenient.
958 
959 =cut
960 */
961 
962 void
963 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
964 {
965     dVAR;
966     char *bufptr;
967     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
968     if (flags & ~(LEX_STUFF_UTF8))
969 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
970     if (UTF) {
971 	if (flags & LEX_STUFF_UTF8) {
972 	    goto plain_copy;
973 	} else {
974 	    STRLEN highhalf = 0;    /* Count of variants */
975 	    const char *p, *e = pv+len;
976 	    for (p = pv; p != e; p++) {
977 		if (! UTF8_IS_INVARIANT(*p)) {
978                     highhalf++;
979                 }
980             }
981 	    if (!highhalf)
982 		goto plain_copy;
983 	    lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
984 	    bufptr = PL_parser->bufptr;
985 	    Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
986 	    SvCUR_set(PL_parser->linestr,
987 	    	SvCUR(PL_parser->linestr) + len+highhalf);
988 	    PL_parser->bufend += len+highhalf;
989 	    for (p = pv; p != e; p++) {
990 		U8 c = (U8)*p;
991 		if (! UTF8_IS_INVARIANT(c)) {
992 		    *bufptr++ = UTF8_TWO_BYTE_HI(c);
993 		    *bufptr++ = UTF8_TWO_BYTE_LO(c);
994 		} else {
995 		    *bufptr++ = (char)c;
996 		}
997 	    }
998 	}
999     } else {
1000 	if (flags & LEX_STUFF_UTF8) {
1001 	    STRLEN highhalf = 0;
1002 	    const char *p, *e = pv+len;
1003 	    for (p = pv; p != e; p++) {
1004 		U8 c = (U8)*p;
1005 		if (UTF8_IS_ABOVE_LATIN1(c)) {
1006 		    Perl_croak(aTHX_ "Lexing code attempted to stuff "
1007 				"non-Latin-1 character into Latin-1 input");
1008 		} else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1009 		    p++;
1010 		    highhalf++;
1011 		} else if (! UTF8_IS_INVARIANT(c)) {
1012 		    /* malformed UTF-8 */
1013 		    ENTER;
1014 		    SAVESPTR(PL_warnhook);
1015 		    PL_warnhook = PERL_WARNHOOK_FATAL;
1016 		    utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1017 		    LEAVE;
1018 		}
1019 	    }
1020 	    if (!highhalf)
1021 		goto plain_copy;
1022 	    lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1023 	    bufptr = PL_parser->bufptr;
1024 	    Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1025 	    SvCUR_set(PL_parser->linestr,
1026 	    	SvCUR(PL_parser->linestr) + len-highhalf);
1027 	    PL_parser->bufend += len-highhalf;
1028 	    p = pv;
1029 	    while (p < e) {
1030 		if (UTF8_IS_INVARIANT(*p)) {
1031 		    *bufptr++ = *p;
1032                     p++;
1033 		}
1034 		else {
1035                     assert(p < e -1 );
1036 		    *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1037 		    p += 2;
1038                 }
1039 	    }
1040 	} else {
1041 	  plain_copy:
1042 	    lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1043 	    bufptr = PL_parser->bufptr;
1044 	    Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1045 	    SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1046 	    PL_parser->bufend += len;
1047 	    Copy(pv, bufptr, len, char);
1048 	}
1049     }
1050 }
1051 
1052 /*
1053 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1054 
1055 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1056 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1057 reallocating the buffer if necessary.  This means that lexing code that
1058 runs later will see the characters as if they had appeared in the input.
1059 It is not recommended to do this as part of normal parsing, and most
1060 uses of this facility run the risk of the inserted characters being
1061 interpreted in an unintended manner.
1062 
1063 The string to be inserted is represented by octets starting at C<pv>
1064 and continuing to the first nul.  These octets are interpreted as either
1065 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1066 in C<flags>.  The characters are recoded for the lexer buffer, according
1067 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1068 If it is not convenient to nul-terminate a string to be inserted, the
1069 L</lex_stuff_pvn> function is more appropriate.
1070 
1071 =cut
1072 */
1073 
1074 void
1075 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1076 {
1077     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1078     lex_stuff_pvn(pv, strlen(pv), flags);
1079 }
1080 
1081 /*
1082 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1083 
1084 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1085 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1086 reallocating the buffer if necessary.  This means that lexing code that
1087 runs later will see the characters as if they had appeared in the input.
1088 It is not recommended to do this as part of normal parsing, and most
1089 uses of this facility run the risk of the inserted characters being
1090 interpreted in an unintended manner.
1091 
1092 The string to be inserted is the string value of C<sv>.  The characters
1093 are recoded for the lexer buffer, according to how the buffer is currently
1094 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1095 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1096 need to construct a scalar.
1097 
1098 =cut
1099 */
1100 
1101 void
1102 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1103 {
1104     char *pv;
1105     STRLEN len;
1106     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1107     if (flags)
1108 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1109     pv = SvPV(sv, len);
1110     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1111 }
1112 
1113 /*
1114 =for apidoc Amx|void|lex_unstuff|char *ptr
1115 
1116 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1117 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1118 This hides the discarded text from any lexing code that runs later,
1119 as if the text had never appeared.
1120 
1121 This is not the normal way to consume lexed text.  For that, use
1122 L</lex_read_to>.
1123 
1124 =cut
1125 */
1126 
1127 void
1128 Perl_lex_unstuff(pTHX_ char *ptr)
1129 {
1130     char *buf, *bufend;
1131     STRLEN unstuff_len;
1132     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1133     buf = PL_parser->bufptr;
1134     if (ptr < buf)
1135 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1136     if (ptr == buf)
1137 	return;
1138     bufend = PL_parser->bufend;
1139     if (ptr > bufend)
1140 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1141     unstuff_len = ptr - buf;
1142     Move(ptr, buf, bufend+1-ptr, char);
1143     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1144     PL_parser->bufend = bufend - unstuff_len;
1145 }
1146 
1147 /*
1148 =for apidoc Amx|void|lex_read_to|char *ptr
1149 
1150 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1151 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1152 performing the correct bookkeeping whenever a newline character is passed.
1153 This is the normal way to consume lexed text.
1154 
1155 Interpretation of the buffer's octets can be abstracted out by
1156 using the slightly higher-level functions L</lex_peek_unichar> and
1157 L</lex_read_unichar>.
1158 
1159 =cut
1160 */
1161 
1162 void
1163 Perl_lex_read_to(pTHX_ char *ptr)
1164 {
1165     char *s;
1166     PERL_ARGS_ASSERT_LEX_READ_TO;
1167     s = PL_parser->bufptr;
1168     if (ptr < s || ptr > PL_parser->bufend)
1169 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1170     for (; s != ptr; s++)
1171 	if (*s == '\n') {
1172 	    COPLINE_INC_WITH_HERELINES;
1173 	    PL_parser->linestart = s+1;
1174 	}
1175     PL_parser->bufptr = ptr;
1176 }
1177 
1178 /*
1179 =for apidoc Amx|void|lex_discard_to|char *ptr
1180 
1181 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1182 up to C<ptr>.  The remaining content of the buffer will be moved, and
1183 all pointers into the buffer updated appropriately.  C<ptr> must not
1184 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1185 it is not permitted to discard text that has yet to be lexed.
1186 
1187 Normally it is not necessarily to do this directly, because it suffices to
1188 use the implicit discarding behaviour of L</lex_next_chunk> and things
1189 based on it.  However, if a token stretches across multiple lines,
1190 and the lexing code has kept multiple lines of text in the buffer for
1191 that purpose, then after completion of the token it would be wise to
1192 explicitly discard the now-unneeded earlier lines, to avoid future
1193 multi-line tokens growing the buffer without bound.
1194 
1195 =cut
1196 */
1197 
1198 void
1199 Perl_lex_discard_to(pTHX_ char *ptr)
1200 {
1201     char *buf;
1202     STRLEN discard_len;
1203     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1204     buf = SvPVX(PL_parser->linestr);
1205     if (ptr < buf)
1206 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1207     if (ptr == buf)
1208 	return;
1209     if (ptr > PL_parser->bufptr)
1210 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1211     discard_len = ptr - buf;
1212     if (PL_parser->oldbufptr < ptr)
1213 	PL_parser->oldbufptr = ptr;
1214     if (PL_parser->oldoldbufptr < ptr)
1215 	PL_parser->oldoldbufptr = ptr;
1216     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1217 	PL_parser->last_uni = NULL;
1218     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1219 	PL_parser->last_lop = NULL;
1220     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1221     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1222     PL_parser->bufend -= discard_len;
1223     PL_parser->bufptr -= discard_len;
1224     PL_parser->oldbufptr -= discard_len;
1225     PL_parser->oldoldbufptr -= discard_len;
1226     if (PL_parser->last_uni)
1227 	PL_parser->last_uni -= discard_len;
1228     if (PL_parser->last_lop)
1229 	PL_parser->last_lop -= discard_len;
1230 }
1231 
1232 /*
1233 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1234 
1235 Reads in the next chunk of text to be lexed, appending it to
1236 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1237 looked to the end of the current chunk and wants to know more.  It is
1238 usual, but not necessary, for lexing to have consumed the entirety of
1239 the current chunk at this time.
1240 
1241 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1242 chunk (i.e., the current chunk has been entirely consumed), normally the
1243 current chunk will be discarded at the same time that the new chunk is
1244 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1245 will not be discarded.  If the current chunk has not been entirely
1246 consumed, then it will not be discarded regardless of the flag.
1247 
1248 Returns true if some new text was added to the buffer, or false if the
1249 buffer has reached the end of the input text.
1250 
1251 =cut
1252 */
1253 
1254 #define LEX_FAKE_EOF 0x80000000
1255 #define LEX_NO_TERM  0x40000000 /* here-doc */
1256 
1257 bool
1258 Perl_lex_next_chunk(pTHX_ U32 flags)
1259 {
1260     SV *linestr;
1261     char *buf;
1262     STRLEN old_bufend_pos, new_bufend_pos;
1263     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1264     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1265     bool got_some_for_debugger = 0;
1266     bool got_some;
1267     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1268 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1269     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1270 	return FALSE;
1271     linestr = PL_parser->linestr;
1272     buf = SvPVX(linestr);
1273     if (!(flags & LEX_KEEP_PREVIOUS)
1274           && PL_parser->bufptr == PL_parser->bufend)
1275     {
1276 	old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1277 	linestart_pos = 0;
1278 	if (PL_parser->last_uni != PL_parser->bufend)
1279 	    PL_parser->last_uni = NULL;
1280 	if (PL_parser->last_lop != PL_parser->bufend)
1281 	    PL_parser->last_lop = NULL;
1282 	last_uni_pos = last_lop_pos = 0;
1283 	*buf = 0;
1284 	SvCUR(linestr) = 0;
1285     } else {
1286 	old_bufend_pos = PL_parser->bufend - buf;
1287 	bufptr_pos = PL_parser->bufptr - buf;
1288 	oldbufptr_pos = PL_parser->oldbufptr - buf;
1289 	oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1290 	linestart_pos = PL_parser->linestart - buf;
1291 	last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1292 	last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1293     }
1294     if (flags & LEX_FAKE_EOF) {
1295 	goto eof;
1296     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1297 	got_some = 0;
1298     } else if (filter_gets(linestr, old_bufend_pos)) {
1299 	got_some = 1;
1300 	got_some_for_debugger = 1;
1301     } else if (flags & LEX_NO_TERM) {
1302 	got_some = 0;
1303     } else {
1304 	if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1305 	    sv_setpvs(linestr, "");
1306 	eof:
1307 	/* End of real input.  Close filehandle (unless it was STDIN),
1308 	 * then add implicit termination.
1309 	 */
1310 	if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1311 	    PerlIO_clearerr(PL_parser->rsfp);
1312 	else if (PL_parser->rsfp)
1313 	    (void)PerlIO_close(PL_parser->rsfp);
1314 	PL_parser->rsfp = NULL;
1315 	PL_parser->in_pod = PL_parser->filtered = 0;
1316 	if (!PL_in_eval && PL_minus_p) {
1317 	    sv_catpvs(linestr,
1318 		/*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1319 	    PL_minus_n = PL_minus_p = 0;
1320 	} else if (!PL_in_eval && PL_minus_n) {
1321 	    sv_catpvs(linestr, /*{*/";}");
1322 	    PL_minus_n = 0;
1323 	} else
1324 	    sv_catpvs(linestr, ";");
1325 	got_some = 1;
1326     }
1327     buf = SvPVX(linestr);
1328     new_bufend_pos = SvCUR(linestr);
1329     PL_parser->bufend = buf + new_bufend_pos;
1330     PL_parser->bufptr = buf + bufptr_pos;
1331     PL_parser->oldbufptr = buf + oldbufptr_pos;
1332     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1333     PL_parser->linestart = buf + linestart_pos;
1334     if (PL_parser->last_uni)
1335 	PL_parser->last_uni = buf + last_uni_pos;
1336     if (PL_parser->last_lop)
1337 	PL_parser->last_lop = buf + last_lop_pos;
1338     if (PL_parser->preambling != NOLINE) {
1339 	CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1340 	PL_parser->preambling = NOLINE;
1341     }
1342     if (   got_some_for_debugger
1343         && PERLDB_LINE_OR_SAVESRC
1344         && PL_curstash != PL_debstash)
1345     {
1346 	/* debugger active and we're not compiling the debugger code,
1347 	 * so store the line into the debugger's array of lines
1348 	 */
1349 	update_debugger_info(NULL, buf+old_bufend_pos,
1350 	    new_bufend_pos-old_bufend_pos);
1351     }
1352     return got_some;
1353 }
1354 
1355 /*
1356 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1357 
1358 Looks ahead one (Unicode) character in the text currently being lexed.
1359 Returns the codepoint (unsigned integer value) of the next character,
1360 or -1 if lexing has reached the end of the input text.  To consume the
1361 peeked character, use L</lex_read_unichar>.
1362 
1363 If the next character is in (or extends into) the next chunk of input
1364 text, the next chunk will be read in.  Normally the current chunk will be
1365 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1366 bit set, then the current chunk will not be discarded.
1367 
1368 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1369 is encountered, an exception is generated.
1370 
1371 =cut
1372 */
1373 
1374 I32
1375 Perl_lex_peek_unichar(pTHX_ U32 flags)
1376 {
1377     dVAR;
1378     char *s, *bufend;
1379     if (flags & ~(LEX_KEEP_PREVIOUS))
1380 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1381     s = PL_parser->bufptr;
1382     bufend = PL_parser->bufend;
1383     if (UTF) {
1384 	U8 head;
1385 	I32 unichar;
1386 	STRLEN len, retlen;
1387 	if (s == bufend) {
1388 	    if (!lex_next_chunk(flags))
1389 		return -1;
1390 	    s = PL_parser->bufptr;
1391 	    bufend = PL_parser->bufend;
1392 	}
1393 	head = (U8)*s;
1394 	if (UTF8_IS_INVARIANT(head))
1395 	    return head;
1396 	if (UTF8_IS_START(head)) {
1397 	    len = UTF8SKIP(&head);
1398 	    while ((STRLEN)(bufend-s) < len) {
1399 		if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1400 		    break;
1401 		s = PL_parser->bufptr;
1402 		bufend = PL_parser->bufend;
1403 	    }
1404 	}
1405 	unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1406 	if (retlen == (STRLEN)-1) {
1407 	    /* malformed UTF-8 */
1408 	    ENTER;
1409 	    SAVESPTR(PL_warnhook);
1410 	    PL_warnhook = PERL_WARNHOOK_FATAL;
1411 	    utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1412 	    LEAVE;
1413 	}
1414 	return unichar;
1415     } else {
1416 	if (s == bufend) {
1417 	    if (!lex_next_chunk(flags))
1418 		return -1;
1419 	    s = PL_parser->bufptr;
1420 	}
1421 	return (U8)*s;
1422     }
1423 }
1424 
1425 /*
1426 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1427 
1428 Reads the next (Unicode) character in the text currently being lexed.
1429 Returns the codepoint (unsigned integer value) of the character read,
1430 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1431 if lexing has reached the end of the input text.  To non-destructively
1432 examine the next character, use L</lex_peek_unichar> instead.
1433 
1434 If the next character is in (or extends into) the next chunk of input
1435 text, the next chunk will be read in.  Normally the current chunk will be
1436 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1437 bit set, then the current chunk will not be discarded.
1438 
1439 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1440 is encountered, an exception is generated.
1441 
1442 =cut
1443 */
1444 
1445 I32
1446 Perl_lex_read_unichar(pTHX_ U32 flags)
1447 {
1448     I32 c;
1449     if (flags & ~(LEX_KEEP_PREVIOUS))
1450 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1451     c = lex_peek_unichar(flags);
1452     if (c != -1) {
1453 	if (c == '\n')
1454 	    COPLINE_INC_WITH_HERELINES;
1455 	if (UTF)
1456 	    PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1457 	else
1458 	    ++(PL_parser->bufptr);
1459     }
1460     return c;
1461 }
1462 
1463 /*
1464 =for apidoc Amx|void|lex_read_space|U32 flags
1465 
1466 Reads optional spaces, in Perl style, in the text currently being
1467 lexed.  The spaces may include ordinary whitespace characters and
1468 Perl-style comments.  C<#line> directives are processed if encountered.
1469 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1470 at a non-space character (or the end of the input text).
1471 
1472 If spaces extend into the next chunk of input text, the next chunk will
1473 be read in.  Normally the current chunk will be discarded at the same
1474 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1475 chunk will not be discarded.
1476 
1477 =cut
1478 */
1479 
1480 #define LEX_NO_INCLINE    0x40000000
1481 #define LEX_NO_NEXT_CHUNK 0x80000000
1482 
1483 void
1484 Perl_lex_read_space(pTHX_ U32 flags)
1485 {
1486     char *s, *bufend;
1487     const bool can_incline = !(flags & LEX_NO_INCLINE);
1488     bool need_incline = 0;
1489     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1490 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1491     s = PL_parser->bufptr;
1492     bufend = PL_parser->bufend;
1493     while (1) {
1494 	char c = *s;
1495 	if (c == '#') {
1496 	    do {
1497 		c = *++s;
1498 	    } while (!(c == '\n' || (c == 0 && s == bufend)));
1499 	} else if (c == '\n') {
1500 	    s++;
1501 	    if (can_incline) {
1502 		PL_parser->linestart = s;
1503 		if (s == bufend)
1504 		    need_incline = 1;
1505 		else
1506 		    incline(s);
1507 	    }
1508 	} else if (isSPACE(c)) {
1509 	    s++;
1510 	} else if (c == 0 && s == bufend) {
1511 	    bool got_more;
1512 	    line_t l;
1513 	    if (flags & LEX_NO_NEXT_CHUNK)
1514 		break;
1515 	    PL_parser->bufptr = s;
1516 	    l = CopLINE(PL_curcop);
1517 	    CopLINE(PL_curcop) += PL_parser->herelines + 1;
1518 	    got_more = lex_next_chunk(flags);
1519 	    CopLINE_set(PL_curcop, l);
1520 	    s = PL_parser->bufptr;
1521 	    bufend = PL_parser->bufend;
1522 	    if (!got_more)
1523 		break;
1524 	    if (can_incline && need_incline && PL_parser->rsfp) {
1525 		incline(s);
1526 		need_incline = 0;
1527 	    }
1528 	} else if (!c) {
1529 	    s++;
1530 	} else {
1531 	    break;
1532 	}
1533     }
1534     PL_parser->bufptr = s;
1535 }
1536 
1537 /*
1538 
1539 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1540 
1541 This function performs syntax checking on a prototype, C<proto>.
1542 If C<warn> is true, any illegal characters or mismatched brackets
1543 will trigger illegalproto warnings, declaring that they were
1544 detected in the prototype for C<name>.
1545 
1546 The return value is C<true> if this is a valid prototype, and
1547 C<false> if it is not, regardless of whether C<warn> was C<true> or
1548 C<false>.
1549 
1550 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1551 
1552 =cut
1553 
1554  */
1555 
1556 bool
1557 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1558 {
1559     STRLEN len, origlen;
1560     char *p = proto ? SvPV(proto, len) : NULL;
1561     bool bad_proto = FALSE;
1562     bool in_brackets = FALSE;
1563     bool after_slash = FALSE;
1564     char greedy_proto = ' ';
1565     bool proto_after_greedy_proto = FALSE;
1566     bool must_be_last = FALSE;
1567     bool underscore = FALSE;
1568     bool bad_proto_after_underscore = FALSE;
1569 
1570     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1571 
1572     if (!proto)
1573 	return TRUE;
1574 
1575     origlen = len;
1576     for (; len--; p++) {
1577 	if (!isSPACE(*p)) {
1578 	    if (must_be_last)
1579 		proto_after_greedy_proto = TRUE;
1580 	    if (underscore) {
1581 		if (!strchr(";@%", *p))
1582 		    bad_proto_after_underscore = TRUE;
1583 		underscore = FALSE;
1584 	    }
1585 	    if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1586 		bad_proto = TRUE;
1587 	    }
1588 	    else {
1589 		if (*p == '[')
1590 		    in_brackets = TRUE;
1591 		else if (*p == ']')
1592 		    in_brackets = FALSE;
1593 		else if ((*p == '@' || *p == '%')
1594                          && !after_slash
1595                          && !in_brackets )
1596                 {
1597 		    must_be_last = TRUE;
1598 		    greedy_proto = *p;
1599 		}
1600 		else if (*p == '_')
1601 		    underscore = TRUE;
1602 	    }
1603 	    if (*p == '\\')
1604 		after_slash = TRUE;
1605 	    else
1606 		after_slash = FALSE;
1607 	}
1608     }
1609 
1610     if (warn) {
1611 	SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1612 	p -= origlen;
1613 	p = SvUTF8(proto)
1614 	    ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1615 	                     origlen, UNI_DISPLAY_ISPRINT)
1616 	    : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1617 
1618 	if (proto_after_greedy_proto)
1619 	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1620 			"Prototype after '%c' for %"SVf" : %s",
1621 			greedy_proto, SVfARG(name), p);
1622 	if (in_brackets)
1623 	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1624 			"Missing ']' in prototype for %"SVf" : %s",
1625 			SVfARG(name), p);
1626 	if (bad_proto)
1627 	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1628 			"Illegal character in prototype for %"SVf" : %s",
1629 			SVfARG(name), p);
1630 	if (bad_proto_after_underscore)
1631 	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1632 			"Illegal character after '_' in prototype for %"SVf" : %s",
1633 			SVfARG(name), p);
1634     }
1635 
1636     return (! (proto_after_greedy_proto || bad_proto) );
1637 }
1638 
1639 /*
1640  * S_incline
1641  * This subroutine has nothing to do with tilting, whether at windmills
1642  * or pinball tables.  Its name is short for "increment line".  It
1643  * increments the current line number in CopLINE(PL_curcop) and checks
1644  * to see whether the line starts with a comment of the form
1645  *    # line 500 "foo.pm"
1646  * If so, it sets the current line number and file to the values in the comment.
1647  */
1648 
1649 STATIC void
1650 S_incline(pTHX_ const char *s)
1651 {
1652     const char *t;
1653     const char *n;
1654     const char *e;
1655     line_t line_num;
1656     UV uv;
1657 
1658     PERL_ARGS_ASSERT_INCLINE;
1659 
1660     COPLINE_INC_WITH_HERELINES;
1661     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1662      && s+1 == PL_bufend && *s == ';') {
1663 	/* fake newline in string eval */
1664 	CopLINE_dec(PL_curcop);
1665 	return;
1666     }
1667     if (*s++ != '#')
1668 	return;
1669     while (SPACE_OR_TAB(*s))
1670 	s++;
1671     if (strnEQ(s, "line", 4))
1672 	s += 4;
1673     else
1674 	return;
1675     if (SPACE_OR_TAB(*s))
1676 	s++;
1677     else
1678 	return;
1679     while (SPACE_OR_TAB(*s))
1680 	s++;
1681     if (!isDIGIT(*s))
1682 	return;
1683 
1684     n = s;
1685     while (isDIGIT(*s))
1686 	s++;
1687     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1688 	return;
1689     while (SPACE_OR_TAB(*s))
1690 	s++;
1691     if (*s == '"' && (t = strchr(s+1, '"'))) {
1692 	s++;
1693 	e = t + 1;
1694     }
1695     else {
1696 	t = s;
1697 	while (*t && !isSPACE(*t))
1698 	    t++;
1699 	e = t;
1700     }
1701     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1702 	e++;
1703     if (*e != '\n' && *e != '\0')
1704 	return;		/* false alarm */
1705 
1706     if (!grok_atoUV(n, &uv, &e))
1707         return;
1708     line_num = ((line_t)uv) - 1;
1709 
1710     if (t - s > 0) {
1711 	const STRLEN len = t - s;
1712 
1713 	if (!PL_rsfp && !PL_parser->filtered) {
1714 	    /* must copy *{"::_<(eval N)[oldfilename:L]"}
1715 	     * to *{"::_<newfilename"} */
1716 	    /* However, the long form of evals is only turned on by the
1717 	       debugger - usually they're "(eval %lu)" */
1718 	    GV * const cfgv = CopFILEGV(PL_curcop);
1719 	    if (cfgv) {
1720 		char smallbuf[128];
1721 		STRLEN tmplen2 = len;
1722 		char *tmpbuf2;
1723 		GV *gv2;
1724 
1725 		if (tmplen2 + 2 <= sizeof smallbuf)
1726 		    tmpbuf2 = smallbuf;
1727 		else
1728 		    Newx(tmpbuf2, tmplen2 + 2, char);
1729 
1730 		tmpbuf2[0] = '_';
1731 		tmpbuf2[1] = '<';
1732 
1733 		memcpy(tmpbuf2 + 2, s, tmplen2);
1734 		tmplen2 += 2;
1735 
1736 		gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1737 		if (!isGV(gv2)) {
1738 		    gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1739 		    /* adjust ${"::_<newfilename"} to store the new file name */
1740 		    GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1741 		    /* The line number may differ. If that is the case,
1742 		       alias the saved lines that are in the array.
1743 		       Otherwise alias the whole array. */
1744 		    if (CopLINE(PL_curcop) == line_num) {
1745 			GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1746 			GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1747 		    }
1748 		    else if (GvAV(cfgv)) {
1749 			AV * const av = GvAV(cfgv);
1750 			const I32 start = CopLINE(PL_curcop)+1;
1751 			I32 items = AvFILLp(av) - start;
1752 			if (items > 0) {
1753 			    AV * const av2 = GvAVn(gv2);
1754 			    SV **svp = AvARRAY(av) + start;
1755 			    I32 l = (I32)line_num+1;
1756 			    while (items--)
1757 				av_store(av2, l++, SvREFCNT_inc(*svp++));
1758 			}
1759 		    }
1760 		}
1761 
1762 		if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1763 	    }
1764 	}
1765 	CopFILE_free(PL_curcop);
1766 	CopFILE_setn(PL_curcop, s, len);
1767     }
1768     CopLINE_set(PL_curcop, line_num);
1769 }
1770 
1771 #define skipspace(s) skipspace_flags(s, 0)
1772 
1773 
1774 STATIC void
1775 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1776 {
1777     AV *av = CopFILEAVx(PL_curcop);
1778     if (av) {
1779 	SV * sv;
1780 	if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1781 	else {
1782 	    sv = *av_fetch(av, 0, 1);
1783 	    SvUPGRADE(sv, SVt_PVMG);
1784 	}
1785 	if (!SvPOK(sv)) sv_setpvs(sv,"");
1786 	if (orig_sv)
1787 	    sv_catsv(sv, orig_sv);
1788 	else
1789 	    sv_catpvn(sv, buf, len);
1790 	if (!SvIOK(sv)) {
1791 	    (void)SvIOK_on(sv);
1792 	    SvIV_set(sv, 0);
1793 	}
1794 	if (PL_parser->preambling == NOLINE)
1795 	    av_store(av, CopLINE(PL_curcop), sv);
1796     }
1797 }
1798 
1799 /*
1800  * S_skipspace
1801  * Called to gobble the appropriate amount and type of whitespace.
1802  * Skips comments as well.
1803  */
1804 
1805 STATIC char *
1806 S_skipspace_flags(pTHX_ char *s, U32 flags)
1807 {
1808     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1809     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1810 	while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1811 	    s++;
1812     } else {
1813 	STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1814 	PL_bufptr = s;
1815 	lex_read_space(flags | LEX_KEEP_PREVIOUS |
1816 		(PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1817 		    LEX_NO_NEXT_CHUNK : 0));
1818 	s = PL_bufptr;
1819 	PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1820 	if (PL_linestart > PL_bufptr)
1821 	    PL_bufptr = PL_linestart;
1822 	return s;
1823     }
1824     return s;
1825 }
1826 
1827 /*
1828  * S_check_uni
1829  * Check the unary operators to ensure there's no ambiguity in how they're
1830  * used.  An ambiguous piece of code would be:
1831  *     rand + 5
1832  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1833  * the +5 is its argument.
1834  */
1835 
1836 STATIC void
1837 S_check_uni(pTHX)
1838 {
1839     const char *s;
1840     const char *t;
1841 
1842     if (PL_oldoldbufptr != PL_last_uni)
1843 	return;
1844     while (isSPACE(*PL_last_uni))
1845 	PL_last_uni++;
1846     s = PL_last_uni;
1847     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1848 	s += UTF ? UTF8SKIP(s) : 1;
1849     if ((t = strchr(s, '(')) && t < PL_bufptr)
1850 	return;
1851 
1852     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1853 		     "Warning: Use of \"%"UTF8f"\" without parentheses is ambiguous",
1854 		     UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1855 }
1856 
1857 /*
1858  * LOP : macro to build a list operator.  Its behaviour has been replaced
1859  * with a subroutine, S_lop() for which LOP is just another name.
1860  */
1861 
1862 #define LOP(f,x) return lop(f,x,s)
1863 
1864 /*
1865  * S_lop
1866  * Build a list operator (or something that might be one).  The rules:
1867  *  - if we have a next token, then it's a list operator (no parens) for
1868  *    which the next token has already been parsed; e.g.,
1869  *       sort foo @args
1870  *       sort foo (@args)
1871  *  - if the next thing is an opening paren, then it's a function
1872  *  - else it's a list operator
1873  */
1874 
1875 STATIC I32
1876 S_lop(pTHX_ I32 f, int x, char *s)
1877 {
1878     PERL_ARGS_ASSERT_LOP;
1879 
1880     pl_yylval.ival = f;
1881     CLINE;
1882     PL_bufptr = s;
1883     PL_last_lop = PL_oldbufptr;
1884     PL_last_lop_op = (OPCODE)f;
1885     if (PL_nexttoke)
1886 	goto lstop;
1887     PL_expect = x;
1888     if (*s == '(')
1889 	return REPORT(FUNC);
1890     s = skipspace(s);
1891     if (*s == '(')
1892 	return REPORT(FUNC);
1893     else {
1894 	lstop:
1895 	if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1896 	    PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1897 	return REPORT(LSTOP);
1898     }
1899 }
1900 
1901 /*
1902  * S_force_next
1903  * When the lexer realizes it knows the next token (for instance,
1904  * it is reordering tokens for the parser) then it can call S_force_next
1905  * to know what token to return the next time the lexer is called.  Caller
1906  * will need to set PL_nextval[] and possibly PL_expect to ensure
1907  * the lexer handles the token correctly.
1908  */
1909 
1910 STATIC void
1911 S_force_next(pTHX_ I32 type)
1912 {
1913 #ifdef DEBUGGING
1914     if (DEBUG_T_TEST) {
1915         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1916 	tokereport(type, &NEXTVAL_NEXTTOKE);
1917     }
1918 #endif
1919     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
1920     PL_nexttype[PL_nexttoke] = type;
1921     PL_nexttoke++;
1922     if (PL_lex_state != LEX_KNOWNEXT) {
1923 	PL_lex_defer = PL_lex_state;
1924 	PL_lex_state = LEX_KNOWNEXT;
1925     }
1926 }
1927 
1928 /*
1929  * S_postderef
1930  *
1931  * This subroutine handles postfix deref syntax after the arrow has already
1932  * been emitted.  @* $* etc. are emitted as two separate token right here.
1933  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
1934  * only the first, leaving yylex to find the next.
1935  */
1936 
1937 static int
1938 S_postderef(pTHX_ int const funny, char const next)
1939 {
1940     assert(funny == DOLSHARP || strchr("$@%&*", funny));
1941     assert(strchr("*[{", next));
1942     if (next == '*') {
1943 	PL_expect = XOPERATOR;
1944 	if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
1945 	    assert('@' == funny || '$' == funny || DOLSHARP == funny);
1946 	    PL_lex_state = LEX_INTERPEND;
1947 	    force_next(POSTJOIN);
1948 	}
1949 	force_next(next);
1950 	PL_bufptr+=2;
1951     }
1952     else {
1953 	if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
1954 	 && !PL_lex_brackets)
1955 	    PL_lex_dojoin = 2;
1956 	PL_expect = XOPERATOR;
1957 	PL_bufptr++;
1958     }
1959     return funny;
1960 }
1961 
1962 void
1963 Perl_yyunlex(pTHX)
1964 {
1965     int yyc = PL_parser->yychar;
1966     if (yyc != YYEMPTY) {
1967 	if (yyc) {
1968 	    NEXTVAL_NEXTTOKE = PL_parser->yylval;
1969 	    if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1970 		PL_lex_allbrackets--;
1971 		PL_lex_brackets--;
1972 		yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1973 	    } else if (yyc == '('/*)*/) {
1974 		PL_lex_allbrackets--;
1975 		yyc |= (2<<24);
1976 	    }
1977 	    force_next(yyc);
1978 	}
1979 	PL_parser->yychar = YYEMPTY;
1980     }
1981 }
1982 
1983 STATIC SV *
1984 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1985 {
1986     SV * const sv = newSVpvn_utf8(start, len,
1987 				  !IN_BYTES
1988 				  && UTF
1989 				  && !is_invariant_string((const U8*)start, len)
1990 				  && is_utf8_string((const U8*)start, len));
1991     return sv;
1992 }
1993 
1994 /*
1995  * S_force_word
1996  * When the lexer knows the next thing is a word (for instance, it has
1997  * just seen -> and it knows that the next char is a word char, then
1998  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1999  * lookahead.
2000  *
2001  * Arguments:
2002  *   char *start : buffer position (must be within PL_linestr)
2003  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2004  *   int check_keyword : if true, Perl checks to make sure the word isn't
2005  *       a keyword (do this if the word is a label, e.g. goto FOO)
2006  *   int allow_pack : if true, : characters will also be allowed (require,
2007  *       use, etc. do this)
2008  */
2009 
2010 STATIC char *
2011 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2012 {
2013     char *s;
2014     STRLEN len;
2015 
2016     PERL_ARGS_ASSERT_FORCE_WORD;
2017 
2018     start = skipspace(start);
2019     s = start;
2020     if (isIDFIRST_lazy_if(s,UTF)
2021         || (allow_pack && *s == ':') )
2022     {
2023 	s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2024 	if (check_keyword) {
2025 	  char *s2 = PL_tokenbuf;
2026 	  STRLEN len2 = len;
2027 	  if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2028 	    s2 += 6, len2 -= 6;
2029 	  if (keyword(s2, len2, 0))
2030 	    return start;
2031 	}
2032 	if (token == METHOD) {
2033 	    s = skipspace(s);
2034 	    if (*s == '(')
2035 		PL_expect = XTERM;
2036 	    else {
2037 		PL_expect = XOPERATOR;
2038 	    }
2039 	}
2040 	NEXTVAL_NEXTTOKE.opval
2041 	    = (OP*)newSVOP(OP_CONST,0,
2042 			   S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2043 	NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2044 	force_next(token);
2045     }
2046     return s;
2047 }
2048 
2049 /*
2050  * S_force_ident
2051  * Called when the lexer wants $foo *foo &foo etc, but the program
2052  * text only contains the "foo" portion.  The first argument is a pointer
2053  * to the "foo", and the second argument is the type symbol to prefix.
2054  * Forces the next token to be a "WORD".
2055  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2056  */
2057 
2058 STATIC void
2059 S_force_ident(pTHX_ const char *s, int kind)
2060 {
2061     PERL_ARGS_ASSERT_FORCE_IDENT;
2062 
2063     if (s[0]) {
2064 	const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2065 	OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2066                                                                 UTF ? SVf_UTF8 : 0));
2067 	NEXTVAL_NEXTTOKE.opval = o;
2068 	force_next(WORD);
2069 	if (kind) {
2070 	    o->op_private = OPpCONST_ENTERED;
2071 	    /* XXX see note in pp_entereval() for why we forgo typo
2072 	       warnings if the symbol must be introduced in an eval.
2073 	       GSAR 96-10-12 */
2074 	    gv_fetchpvn_flags(s, len,
2075 			      (PL_in_eval ? GV_ADDMULTI
2076 			      : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2077 			      kind == '$' ? SVt_PV :
2078 			      kind == '@' ? SVt_PVAV :
2079 			      kind == '%' ? SVt_PVHV :
2080 			      SVt_PVGV
2081 			      );
2082 	}
2083     }
2084 }
2085 
2086 static void
2087 S_force_ident_maybe_lex(pTHX_ char pit)
2088 {
2089     NEXTVAL_NEXTTOKE.ival = pit;
2090     force_next('p');
2091 }
2092 
2093 NV
2094 Perl_str_to_version(pTHX_ SV *sv)
2095 {
2096     NV retval = 0.0;
2097     NV nshift = 1.0;
2098     STRLEN len;
2099     const char *start = SvPV_const(sv,len);
2100     const char * const end = start + len;
2101     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2102 
2103     PERL_ARGS_ASSERT_STR_TO_VERSION;
2104 
2105     while (start < end) {
2106 	STRLEN skip;
2107 	UV n;
2108 	if (utf)
2109 	    n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2110 	else {
2111 	    n = *(U8*)start;
2112 	    skip = 1;
2113 	}
2114 	retval += ((NV)n)/nshift;
2115 	start += skip;
2116 	nshift *= 1000;
2117     }
2118     return retval;
2119 }
2120 
2121 /*
2122  * S_force_version
2123  * Forces the next token to be a version number.
2124  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2125  * and if "guessing" is TRUE, then no new token is created (and the caller
2126  * must use an alternative parsing method).
2127  */
2128 
2129 STATIC char *
2130 S_force_version(pTHX_ char *s, int guessing)
2131 {
2132     OP *version = NULL;
2133     char *d;
2134 
2135     PERL_ARGS_ASSERT_FORCE_VERSION;
2136 
2137     s = skipspace(s);
2138 
2139     d = s;
2140     if (*d == 'v')
2141 	d++;
2142     if (isDIGIT(*d)) {
2143 	while (isDIGIT(*d) || *d == '_' || *d == '.')
2144 	    d++;
2145         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2146 	    SV *ver;
2147             s = scan_num(s, &pl_yylval);
2148             version = pl_yylval.opval;
2149 	    ver = cSVOPx(version)->op_sv;
2150 	    if (SvPOK(ver) && !SvNIOK(ver)) {
2151 		SvUPGRADE(ver, SVt_PVNV);
2152 		SvNV_set(ver, str_to_version(ver));
2153 		SvNOK_on(ver);		/* hint that it is a version */
2154 	    }
2155         }
2156 	else if (guessing) {
2157 	    return s;
2158 	}
2159     }
2160 
2161     /* NOTE: The parser sees the package name and the VERSION swapped */
2162     NEXTVAL_NEXTTOKE.opval = version;
2163     force_next(WORD);
2164 
2165     return s;
2166 }
2167 
2168 /*
2169  * S_force_strict_version
2170  * Forces the next token to be a version number using strict syntax rules.
2171  */
2172 
2173 STATIC char *
2174 S_force_strict_version(pTHX_ char *s)
2175 {
2176     OP *version = NULL;
2177     const char *errstr = NULL;
2178 
2179     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2180 
2181     while (isSPACE(*s)) /* leading whitespace */
2182 	s++;
2183 
2184     if (is_STRICT_VERSION(s,&errstr)) {
2185 	SV *ver = newSV(0);
2186 	s = (char *)scan_version(s, ver, 0);
2187 	version = newSVOP(OP_CONST, 0, ver);
2188     }
2189     else if ((*s != ';' && *s != '{' && *s != '}' )
2190              && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2191     {
2192 	PL_bufptr = s;
2193 	if (errstr)
2194 	    yyerror(errstr); /* version required */
2195 	return s;
2196     }
2197 
2198     /* NOTE: The parser sees the package name and the VERSION swapped */
2199     NEXTVAL_NEXTTOKE.opval = version;
2200     force_next(WORD);
2201 
2202     return s;
2203 }
2204 
2205 /*
2206  * S_tokeq
2207  * Tokenize a quoted string passed in as an SV.  It finds the next
2208  * chunk, up to end of string or a backslash.  It may make a new
2209  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2210  * turns \\ into \.
2211  */
2212 
2213 STATIC SV *
2214 S_tokeq(pTHX_ SV *sv)
2215 {
2216     char *s;
2217     char *send;
2218     char *d;
2219     SV *pv = sv;
2220 
2221     PERL_ARGS_ASSERT_TOKEQ;
2222 
2223     assert (SvPOK(sv));
2224     assert (SvLEN(sv));
2225     assert (!SvIsCOW(sv));
2226     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2227 	goto finish;
2228     s = SvPVX(sv);
2229     send = SvEND(sv);
2230     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2231     while (s < send && !(*s == '\\' && s[1] == '\\'))
2232 	s++;
2233     if (s == send)
2234 	goto finish;
2235     d = s;
2236     if ( PL_hints & HINT_NEW_STRING ) {
2237 	pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2238 			    SVs_TEMP | SvUTF8(sv));
2239     }
2240     while (s < send) {
2241 	if (*s == '\\') {
2242 	    if (s + 1 < send && (s[1] == '\\'))
2243 		s++;		/* all that, just for this */
2244 	}
2245 	*d++ = *s++;
2246     }
2247     *d = '\0';
2248     SvCUR_set(sv, d - SvPVX_const(sv));
2249   finish:
2250     if ( PL_hints & HINT_NEW_STRING )
2251        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2252     return sv;
2253 }
2254 
2255 /*
2256  * Now come three functions related to double-quote context,
2257  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2258  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2259  * interact with PL_lex_state, and create fake ( ... ) argument lists
2260  * to handle functions and concatenation.
2261  * For example,
2262  *   "foo\lbar"
2263  * is tokenised as
2264  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2265  */
2266 
2267 /*
2268  * S_sublex_start
2269  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2270  *
2271  * Pattern matching will set PL_lex_op to the pattern-matching op to
2272  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2273  *
2274  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2275  *
2276  * Everything else becomes a FUNC.
2277  *
2278  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2279  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2280  * call to S_sublex_push().
2281  */
2282 
2283 STATIC I32
2284 S_sublex_start(pTHX)
2285 {
2286     const I32 op_type = pl_yylval.ival;
2287 
2288     if (op_type == OP_NULL) {
2289 	pl_yylval.opval = PL_lex_op;
2290 	PL_lex_op = NULL;
2291 	return THING;
2292     }
2293     if (op_type == OP_CONST) {
2294 	SV *sv = PL_lex_stuff;
2295 	PL_lex_stuff = NULL;
2296 	sv = tokeq(sv);
2297 
2298 	if (SvTYPE(sv) == SVt_PVIV) {
2299 	    /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2300 	    STRLEN len;
2301 	    const char * const p = SvPV_const(sv, len);
2302 	    SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2303 	    SvREFCNT_dec(sv);
2304 	    sv = nsv;
2305 	}
2306 	pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2307 	return THING;
2308     }
2309 
2310     PL_sublex_info.super_state = PL_lex_state;
2311     PL_sublex_info.sub_inwhat = (U16)op_type;
2312     PL_sublex_info.sub_op = PL_lex_op;
2313     PL_lex_state = LEX_INTERPPUSH;
2314 
2315     PL_expect = XTERM;
2316     if (PL_lex_op) {
2317 	pl_yylval.opval = PL_lex_op;
2318 	PL_lex_op = NULL;
2319 	return PMFUNC;
2320     }
2321     else
2322 	return FUNC;
2323 }
2324 
2325 /*
2326  * S_sublex_push
2327  * Create a new scope to save the lexing state.  The scope will be
2328  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2329  * to the uc, lc, etc. found before.
2330  * Sets PL_lex_state to LEX_INTERPCONCAT.
2331  */
2332 
2333 STATIC I32
2334 S_sublex_push(pTHX)
2335 {
2336     LEXSHARED *shared;
2337     const bool is_heredoc = PL_multi_close == '<';
2338     ENTER;
2339 
2340     PL_lex_state = PL_sublex_info.super_state;
2341     SAVEI8(PL_lex_dojoin);
2342     SAVEI32(PL_lex_brackets);
2343     SAVEI32(PL_lex_allbrackets);
2344     SAVEI32(PL_lex_formbrack);
2345     SAVEI8(PL_lex_fakeeof);
2346     SAVEI32(PL_lex_casemods);
2347     SAVEI32(PL_lex_starts);
2348     SAVEI8(PL_lex_state);
2349     SAVEI8(PL_lex_defer);
2350     SAVESPTR(PL_lex_repl);
2351     SAVEVPTR(PL_lex_inpat);
2352     SAVEI16(PL_lex_inwhat);
2353     if (is_heredoc)
2354     {
2355 	SAVECOPLINE(PL_curcop);
2356 	SAVEI32(PL_multi_end);
2357 	SAVEI32(PL_parser->herelines);
2358 	PL_parser->herelines = 0;
2359     }
2360     SAVEI8(PL_multi_close);
2361     SAVEPPTR(PL_bufptr);
2362     SAVEPPTR(PL_bufend);
2363     SAVEPPTR(PL_oldbufptr);
2364     SAVEPPTR(PL_oldoldbufptr);
2365     SAVEPPTR(PL_last_lop);
2366     SAVEPPTR(PL_last_uni);
2367     SAVEPPTR(PL_linestart);
2368     SAVESPTR(PL_linestr);
2369     SAVEGENERICPV(PL_lex_brackstack);
2370     SAVEGENERICPV(PL_lex_casestack);
2371     SAVEGENERICPV(PL_parser->lex_shared);
2372     SAVEBOOL(PL_parser->lex_re_reparsing);
2373     SAVEI32(PL_copline);
2374 
2375     /* The here-doc parser needs to be able to peek into outer lexing
2376        scopes to find the body of the here-doc.  So we put PL_linestr and
2377        PL_bufptr into lex_shared, to ‘share’ those values.
2378      */
2379     PL_parser->lex_shared->ls_linestr = PL_linestr;
2380     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2381 
2382     PL_linestr = PL_lex_stuff;
2383     PL_lex_repl = PL_sublex_info.repl;
2384     PL_lex_stuff = NULL;
2385     PL_sublex_info.repl = NULL;
2386 
2387     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2388        set for an inner quote-like operator and then an error causes scope-
2389        popping.  We must not have a PL_lex_stuff value left dangling, as
2390        that breaks assumptions elsewhere.  See bug #123617.  */
2391     SAVEGENERICSV(PL_lex_stuff);
2392     SAVEGENERICSV(PL_sublex_info.repl);
2393 
2394     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2395 	= SvPVX(PL_linestr);
2396     PL_bufend += SvCUR(PL_linestr);
2397     PL_last_lop = PL_last_uni = NULL;
2398     SAVEFREESV(PL_linestr);
2399     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2400 
2401     PL_lex_dojoin = FALSE;
2402     PL_lex_brackets = PL_lex_formbrack = 0;
2403     PL_lex_allbrackets = 0;
2404     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2405     Newx(PL_lex_brackstack, 120, char);
2406     Newx(PL_lex_casestack, 12, char);
2407     PL_lex_casemods = 0;
2408     *PL_lex_casestack = '\0';
2409     PL_lex_starts = 0;
2410     PL_lex_state = LEX_INTERPCONCAT;
2411     if (is_heredoc)
2412 	CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2413     PL_copline = NOLINE;
2414 
2415     Newxz(shared, 1, LEXSHARED);
2416     shared->ls_prev = PL_parser->lex_shared;
2417     PL_parser->lex_shared = shared;
2418 
2419     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2420     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2421     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2422 	PL_lex_inpat = PL_sublex_info.sub_op;
2423     else
2424 	PL_lex_inpat = NULL;
2425 
2426     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2427     PL_in_eval &= ~EVAL_RE_REPARSING;
2428 
2429     return '(';
2430 }
2431 
2432 /*
2433  * S_sublex_done
2434  * Restores lexer state after a S_sublex_push.
2435  */
2436 
2437 STATIC I32
2438 S_sublex_done(pTHX)
2439 {
2440     if (!PL_lex_starts++) {
2441 	SV * const sv = newSVpvs("");
2442 	if (SvUTF8(PL_linestr))
2443 	    SvUTF8_on(sv);
2444 	PL_expect = XOPERATOR;
2445 	pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2446 	return THING;
2447     }
2448 
2449     if (PL_lex_casemods) {		/* oops, we've got some unbalanced parens */
2450 	PL_lex_state = LEX_INTERPCASEMOD;
2451 	return yylex();
2452     }
2453 
2454     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2455     assert(PL_lex_inwhat != OP_TRANSR);
2456     if (PL_lex_repl) {
2457 	assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2458 	PL_linestr = PL_lex_repl;
2459 	PL_lex_inpat = 0;
2460 	PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2461 	PL_bufend += SvCUR(PL_linestr);
2462 	PL_last_lop = PL_last_uni = NULL;
2463 	PL_lex_dojoin = FALSE;
2464 	PL_lex_brackets = 0;
2465 	PL_lex_allbrackets = 0;
2466 	PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2467 	PL_lex_casemods = 0;
2468 	*PL_lex_casestack = '\0';
2469 	PL_lex_starts = 0;
2470 	if (SvEVALED(PL_lex_repl)) {
2471 	    PL_lex_state = LEX_INTERPNORMAL;
2472 	    PL_lex_starts++;
2473 	    /*	we don't clear PL_lex_repl here, so that we can check later
2474 		whether this is an evalled subst; that means we rely on the
2475 		logic to ensure sublex_done() is called again only via the
2476 		branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2477 	}
2478 	else {
2479 	    PL_lex_state = LEX_INTERPCONCAT;
2480 	    PL_lex_repl = NULL;
2481 	}
2482 	if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2483 	    CopLINE(PL_curcop) +=
2484 		((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2485 		 + PL_parser->herelines;
2486 	    PL_parser->herelines = 0;
2487 	}
2488 	return '/';
2489     }
2490     else {
2491 	const line_t l = CopLINE(PL_curcop);
2492 	LEAVE;
2493 	if (PL_multi_close == '<')
2494 	    PL_parser->herelines += l - PL_multi_end;
2495 	PL_bufend = SvPVX(PL_linestr);
2496 	PL_bufend += SvCUR(PL_linestr);
2497 	PL_expect = XOPERATOR;
2498 	return ')';
2499     }
2500 }
2501 
2502 PERL_STATIC_INLINE SV*
2503 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2504 {
2505     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2506      * interior, hence to the "}".  Finds what the name resolves to, returning
2507      * an SV* containing it; NULL if no valid one found */
2508 
2509     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2510 
2511     HV * table;
2512     SV **cvp;
2513     SV *cv;
2514     SV *rv;
2515     HV *stash;
2516     const U8* first_bad_char_loc;
2517     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2518 
2519     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2520 
2521     if (!SvCUR(res)) {
2522         Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
2523                        "Unknown charname '' is deprecated");
2524         return res;
2525     }
2526 
2527     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2528                                      e - backslash_ptr,
2529                                      &first_bad_char_loc))
2530     {
2531         /* If warnings are on, this will print a more detailed analysis of what
2532          * is wrong than the error message below */
2533         utf8n_to_uvchr(first_bad_char_loc,
2534                        e - ((char *) first_bad_char_loc),
2535                        NULL, 0);
2536 
2537         /* We deliberately don't try to print the malformed character, which
2538          * might not print very well; it also may be just the first of many
2539          * malformations, so don't print what comes after it */
2540         yyerror_pv(Perl_form(aTHX_
2541             "Malformed UTF-8 character immediately after '%.*s'",
2542             (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
2543                    SVf_UTF8);
2544 	return NULL;
2545     }
2546 
2547     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2548                         /* include the <}> */
2549                         e - backslash_ptr + 1);
2550     if (! SvPOK(res)) {
2551         SvREFCNT_dec_NN(res);
2552         return NULL;
2553     }
2554 
2555     /* See if the charnames handler is the Perl core's, and if so, we can skip
2556      * the validation needed for a user-supplied one, as Perl's does its own
2557      * validation. */
2558     table = GvHV(PL_hintgv);		 /* ^H */
2559     cvp = hv_fetchs(table, "charnames", FALSE);
2560     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2561         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2562     {
2563         const char * const name = HvNAME(stash);
2564         if (HvNAMELEN(stash) == sizeof("_charnames")-1
2565          && strEQ(name, "_charnames")) {
2566            return res;
2567        }
2568     }
2569 
2570     /* Here, it isn't Perl's charname handler.  We can't rely on a
2571      * user-supplied handler to validate the input name.  For non-ut8 input,
2572      * look to see that the first character is legal.  Then loop through the
2573      * rest checking that each is a continuation */
2574 
2575     /* This code makes the reasonable assumption that the only Latin1-range
2576      * characters that begin a character name alias are alphabetic, otherwise
2577      * would have to create a isCHARNAME_BEGIN macro */
2578 
2579     if (! UTF) {
2580         if (! isALPHAU(*s)) {
2581             goto bad_charname;
2582         }
2583         s++;
2584         while (s < e) {
2585             if (! isCHARNAME_CONT(*s)) {
2586                 goto bad_charname;
2587             }
2588 	    if (*s == ' ' && *(s-1) == ' ') {
2589                 goto multi_spaces;
2590             }
2591 	    if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
2592                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2593                            "NO-BREAK SPACE in a charnames "
2594                            "alias definition is deprecated");
2595             }
2596             s++;
2597         }
2598     }
2599     else {
2600         /* Similarly for utf8.  For invariants can check directly; for other
2601          * Latin1, can calculate their code point and check; otherwise  use a
2602          * swash */
2603         if (UTF8_IS_INVARIANT(*s)) {
2604             if (! isALPHAU(*s)) {
2605                 goto bad_charname;
2606             }
2607             s++;
2608         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2609             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2610                 goto bad_charname;
2611             }
2612             s += 2;
2613         }
2614         else {
2615             if (! PL_utf8_charname_begin) {
2616                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2617                 PL_utf8_charname_begin = _core_swash_init("utf8",
2618                                                         "_Perl_Charname_Begin",
2619                                                         &PL_sv_undef,
2620                                                         1, 0, NULL, &flags);
2621             }
2622             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2623                 goto bad_charname;
2624             }
2625             s += UTF8SKIP(s);
2626         }
2627 
2628         while (s < e) {
2629             if (UTF8_IS_INVARIANT(*s)) {
2630                 if (! isCHARNAME_CONT(*s)) {
2631                     goto bad_charname;
2632                 }
2633                 if (*s == ' ' && *(s-1) == ' ') {
2634                     goto multi_spaces;
2635                 }
2636                 s++;
2637             }
2638             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2639                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2640                 {
2641                     goto bad_charname;
2642                 }
2643                 if (*s == *NBSP_UTF8
2644                     && *(s+1) == *(NBSP_UTF8+1)
2645                     && ckWARN_d(WARN_DEPRECATED))
2646                 {
2647                     Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2648                                 "NO-BREAK SPACE in a charnames "
2649                                 "alias definition is deprecated");
2650                 }
2651                 s += 2;
2652             }
2653             else {
2654                 if (! PL_utf8_charname_continue) {
2655                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2656                     PL_utf8_charname_continue = _core_swash_init("utf8",
2657                                                 "_Perl_Charname_Continue",
2658                                                 &PL_sv_undef,
2659                                                 1, 0, NULL, &flags);
2660                 }
2661                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2662                     goto bad_charname;
2663                 }
2664                 s += UTF8SKIP(s);
2665             }
2666         }
2667     }
2668     if (*(s-1) == ' ') {
2669         yyerror_pv(
2670             Perl_form(aTHX_
2671             "charnames alias definitions may not contain trailing "
2672             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2673             (int)(s - backslash_ptr + 1), backslash_ptr,
2674             (int)(e - s + 1), s + 1
2675             ),
2676         UTF ? SVf_UTF8 : 0);
2677         return NULL;
2678     }
2679 
2680     if (SvUTF8(res)) { /* Don't accept malformed input */
2681         const U8* first_bad_char_loc;
2682         STRLEN len;
2683         const char* const str = SvPV_const(res, len);
2684         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2685             /* If warnings are on, this will print a more detailed analysis of
2686              * what is wrong than the error message below */
2687             utf8n_to_uvchr(first_bad_char_loc,
2688                            (char *) first_bad_char_loc - str,
2689                            NULL, 0);
2690 
2691             /* We deliberately don't try to print the malformed character,
2692              * which might not print very well; it also may be just the first
2693              * of many malformations, so don't print what comes after it */
2694             yyerror_pv(
2695               Perl_form(aTHX_
2696                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2697                  (int) (e - backslash_ptr + 1), backslash_ptr,
2698                  (int) ((char *) first_bad_char_loc - str), str
2699               ),
2700               SVf_UTF8);
2701             return NULL;
2702         }
2703     }
2704 
2705     return res;
2706 
2707   bad_charname: {
2708 
2709         /* The final %.*s makes sure that should the trailing NUL be missing
2710          * that this print won't run off the end of the string */
2711         yyerror_pv(
2712           Perl_form(aTHX_
2713             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2714             (int)(s - backslash_ptr + 1), backslash_ptr,
2715             (int)(e - s + 1), s + 1
2716           ),
2717           UTF ? SVf_UTF8 : 0);
2718         return NULL;
2719     }
2720 
2721   multi_spaces:
2722         yyerror_pv(
2723           Perl_form(aTHX_
2724             "charnames alias definitions may not contain a sequence of "
2725             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2726             (int)(s - backslash_ptr + 1), backslash_ptr,
2727             (int)(e - s + 1), s + 1
2728           ),
2729           UTF ? SVf_UTF8 : 0);
2730         return NULL;
2731 }
2732 
2733 /*
2734   scan_const
2735 
2736   Extracts the next constant part of a pattern, double-quoted string,
2737   or transliteration.  This is terrifying code.
2738 
2739   For example, in parsing the double-quoted string "ab\x63$d", it would
2740   stop at the '$' and return an OP_CONST containing 'abc'.
2741 
2742   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2743   processing a pattern (PL_lex_inpat is true), a transliteration
2744   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2745 
2746   Returns a pointer to the character scanned up to. If this is
2747   advanced from the start pointer supplied (i.e. if anything was
2748   successfully parsed), will leave an OP_CONST for the substring scanned
2749   in pl_yylval. Caller must intuit reason for not parsing further
2750   by looking at the next characters herself.
2751 
2752   In patterns:
2753     expand:
2754       \N{FOO}  => \N{U+hex_for_character_FOO}
2755       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2756 
2757     pass through:
2758 	all other \-char, including \N and \N{ apart from \N{ABC}
2759 
2760     stops on:
2761 	@ and $ where it appears to be a var, but not for $ as tail anchor
2762         \l \L \u \U \Q \E
2763 	(?{  or  (??{
2764 
2765   In transliterations:
2766     characters are VERY literal, except for - not at the start or end
2767     of the string, which indicates a range. If the range is in bytes,
2768     scan_const expands the range to the full set of intermediate
2769     characters. If the range is in utf8, the hyphen is replaced with
2770     a certain range mark which will be handled by pmtrans() in op.c.
2771 
2772   In double-quoted strings:
2773     backslashes:
2774       double-quoted style: \r and \n
2775       constants: \x31, etc.
2776       deprecated backrefs: \1 (in substitution replacements)
2777       case and quoting: \U \Q \E
2778     stops on @ and $
2779 
2780   scan_const does *not* construct ops to handle interpolated strings.
2781   It stops processing as soon as it finds an embedded $ or @ variable
2782   and leaves it to the caller to work out what's going on.
2783 
2784   embedded arrays (whether in pattern or not) could be:
2785       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2786 
2787   $ in double-quoted strings must be the symbol of an embedded scalar.
2788 
2789   $ in pattern could be $foo or could be tail anchor.  Assumption:
2790   it's a tail anchor if $ is the last thing in the string, or if it's
2791   followed by one of "()| \r\n\t"
2792 
2793   \1 (backreferences) are turned into $1 in substitutions
2794 
2795   The structure of the code is
2796       while (there's a character to process) {
2797 	  handle transliteration ranges
2798 	  skip regexp comments /(?#comment)/ and codes /(?{code})/
2799 	  skip #-initiated comments in //x patterns
2800 	  check for embedded arrays
2801 	  check for embedded scalars
2802 	  if (backslash) {
2803 	      deprecate \1 in substitution replacements
2804 	      handle string-changing backslashes \l \U \Q \E, etc.
2805 	      switch (what was escaped) {
2806 		  handle \- in a transliteration (becomes a literal -)
2807 		  if a pattern and not \N{, go treat as regular character
2808 		  handle \132 (octal characters)
2809 		  handle \x15 and \x{1234} (hex characters)
2810 		  handle \N{name} (named characters, also \N{3,5} in a pattern)
2811 		  handle \cV (control characters)
2812 		  handle printf-style backslashes (\f, \r, \n, etc)
2813 	      } (end switch)
2814 	      continue
2815 	  } (end if backslash)
2816           handle regular character
2817     } (end while character to read)
2818 
2819 */
2820 
2821 STATIC char *
2822 S_scan_const(pTHX_ char *start)
2823 {
2824     char *send = PL_bufend;		/* end of the constant */
2825     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
2826                                            on sizing. */
2827     char *s = start;			/* start of the constant */
2828     char *d = SvPVX(sv);		/* destination for copies */
2829     bool dorange = FALSE;               /* are we in a translit range? */
2830     bool didrange = FALSE;              /* did we just finish a range? */
2831     bool in_charclass = FALSE;          /* within /[...]/ */
2832     bool has_utf8 = FALSE;              /* Output constant is UTF8 */
2833     bool  this_utf8 = cBOOL(UTF);       /* Is the source string assumed to be
2834                                            UTF8?  But, this can show as true
2835                                            when the source isn't utf8, as for
2836                                            example when it is entirely composed
2837                                            of hex constants */
2838     SV *res;		                /* result from charnames */
2839     STRLEN offset_to_max;   /* The offset in the output to where the range
2840                                high-end character is temporarily placed */
2841 
2842     /* Note on sizing:  The scanned constant is placed into sv, which is
2843      * initialized by newSV() assuming one byte of output for every byte of
2844      * input.  This routine expects newSV() to allocate an extra byte for a
2845      * trailing NUL, which this routine will append if it gets to the end of
2846      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2847      * CAPITAL LETTER A}), or more output than input if the constant ends up
2848      * recoded to utf8, but each time a construct is found that might increase
2849      * the needed size, SvGROW() is called.  Its size parameter each time is
2850      * based on the best guess estimate at the time, namely the length used so
2851      * far, plus the length the current construct will occupy, plus room for
2852      * the trailing NUL, plus one byte for every input byte still unscanned */
2853 
2854     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2855                        before set */
2856 #ifdef EBCDIC
2857     int backslash_N = 0;            /* ? was the character from \N{} */
2858     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
2859                                        platform-specific like \x65 */
2860 #endif
2861 
2862     PERL_ARGS_ASSERT_SCAN_CONST;
2863 
2864     assert(PL_lex_inwhat != OP_TRANSR);
2865     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2866 	/* If we are doing a trans and we know we want UTF8 set expectation */
2867 	has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2868 	this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2869     }
2870 
2871     /* Protect sv from errors and fatal warnings. */
2872     ENTER_with_name("scan_const");
2873     SAVEFREESV(sv);
2874 
2875     while (s < send
2876            || dorange   /* Handle tr/// range at right edge of input */
2877     ) {
2878 
2879         /* get transliterations out of the way (they're most literal) */
2880 	if (PL_lex_inwhat == OP_TRANS) {
2881 
2882             /* But there isn't any special handling necessary unless there is a
2883              * range, so for most cases we just drop down and handle the value
2884              * as any other.  There are two exceptions.
2885              *
2886              * 1.  A minus sign indicates that we are actually going to have
2887              *     a range.  In this case, skip the '-', set a flag, then drop
2888              *     down to handle what should be the end range value.
2889              * 2.  After we've handled that value, the next time through, that
2890              *     flag is set and we fix up the range.
2891              *
2892              * Ranges entirely within Latin1 are expanded out entirely, in
2893              * order to avoid the significant overhead of making a swash.
2894              * Ranges that extend above Latin1 have to have a swash, so there
2895              * is no advantage to abbreviate them here, so they are stored here
2896              * as Min, ILLEGAL_UTF8_BYTE, Max.  The illegal byte signifies a
2897              * hyphen without any possible ambiguity.  On EBCDIC machines, if
2898              * the range is expressed as Unicode, the Latin1 portion is
2899              * expanded out even if the entire range extends above Latin1.
2900              * This is because each code point in it has to be processed here
2901              * individually to get its native translation */
2902 
2903 	    if (! dorange) {
2904 
2905                 /* Here, we don't think we're in a range.  If we've processed
2906                  * at least one character, then see if this next one is a '-',
2907                  * indicating the previous one was the start of a range.  But
2908                  * don't bother if we're too close to the end for the minus to
2909                  * mean that. */
2910                 if (*s != '-' || s >= send - 1 || s == start) {
2911 
2912                     /* A regular character.  Process like any other, but first
2913                      * clear any flags */
2914                     didrange = FALSE;
2915                     dorange = FALSE;
2916 #ifdef EBCDIC
2917                     non_portable_endpoint = 0;
2918                     backslash_N = 0;
2919 #endif
2920                     /* Drops down to generic code to process current byte */
2921                 }
2922                 else {
2923                     if (didrange) { /* Something like y/A-C-Z// */
2924                         Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2925                     }
2926 
2927                     dorange = TRUE;
2928 
2929                     s++;    /* Skip past the minus */
2930 
2931                     /* d now points to where the end-range character will be
2932                      * placed.  Save it so won't have to go finding it later,
2933                      * and drop down to get that character.  (Actually we
2934                      * instead save the offset, to handle the case where a
2935                      * realloc in the meantime could change the actual
2936                      * pointer).  We'll finish processing the range the next
2937                      * time through the loop */
2938                     offset_to_max = d - SvPVX_const(sv);
2939                 }
2940             }  /* End of not a range */
2941             else {
2942                 /* Here we have parsed a range.  Now must handle it.  At this
2943                  * point:
2944                  * 'sv' is a SV* that contains the output string we are
2945                  *      constructing.  The final two characters in that string
2946                  *      are the range start and range end, in order.
2947                  * 'd'  points to just beyond the range end in the 'sv' string,
2948                  *      where we would next place something
2949                  * 'offset_to_max' is the offset in 'sv' at which the character
2950                  *      before 'd' begins.
2951                  */
2952                 const char * max_ptr = SvPVX_const(sv) + offset_to_max;
2953                 const char * min_ptr;
2954                 IV range_min;
2955 		IV range_max;	/* last character in range */
2956                 STRLEN save_offset;
2957                 STRLEN grow;
2958 #ifndef EBCDIC  /* Not meaningful except in EBCDIC, so initialize to false */
2959                 const bool convert_unicode = FALSE;
2960                 const IV real_range_max = 0;
2961 #else
2962                 bool convert_unicode;
2963                 IV real_range_max = 0;
2964 #endif
2965 
2966                 /* Get the range-ends code point values. */
2967                 if (has_utf8) {
2968                     /* We know the utf8 is valid, because we just constructed
2969                      * it ourselves in previous loop iterations */
2970                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
2971                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
2972                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
2973                 }
2974                 else {
2975                     min_ptr = max_ptr - 1;
2976                     range_min = * (U8*) min_ptr;
2977                     range_max = * (U8*) max_ptr;
2978                 }
2979 
2980 #ifdef EBCDIC
2981                 /* On EBCDIC platforms, we may have to deal with portable
2982                  * ranges.  These happen if at least one range endpoint is a
2983                  * Unicode value (\N{...}), or if the range is a subset of
2984                  * [A-Z] or [a-z], and both ends are literal characters,
2985                  * like 'A', and not like \x{C1} */
2986                 if ((convert_unicode
2987                      = cBOOL(backslash_N)   /* \N{} forces Unicode, hence
2988                                                portable range */
2989                       || (   ! non_portable_endpoint
2990                           && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
2991                              || (isUPPER_A(range_min) && isUPPER_A(range_max))))
2992                 )) {
2993 
2994                     /* Special handling is needed for these portable ranges.
2995                      * They are defined to all be in Unicode terms, which
2996                      * include all Unicode code points between the end points.
2997                      * Convert to Unicode to get the Unicode range.  Later we
2998                      * will convert each code point in the range back to
2999                      * native.  */
3000                     range_min = NATIVE_TO_UNI(range_min);
3001                     range_max = NATIVE_TO_UNI(range_max);
3002                 }
3003 #endif
3004 
3005                 if (range_min > range_max) {
3006                     if (convert_unicode) {
3007                         /* Need to convert back to native for meaningful
3008                          * messages for this platform */
3009                         range_min = UNI_TO_NATIVE(range_min);
3010                         range_max = UNI_TO_NATIVE(range_max);
3011                     }
3012 
3013                     /* Use the characters themselves for the error message if
3014                      * ASCII printables; otherwise some visible representation
3015                      * of them */
3016                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3017                         Perl_croak(aTHX_
3018 			 "Invalid range \"%c-%c\" in transliteration operator",
3019 			 (char)range_min, (char)range_max);
3020                     }
3021                     else if (convert_unicode) {
3022                         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3023                         Perl_croak(aTHX_
3024 			       "Invalid range \"\\N{U+%04"UVXf"}-\\N{U+%04"UVXf"}\""
3025                                " in transliteration operator",
3026 			       range_min, range_max);
3027                     }
3028                     else {
3029                         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3030                         Perl_croak(aTHX_
3031 			       "Invalid range \"\\x{%04"UVXf"}-\\x{%04"UVXf"}\""
3032                                " in transliteration operator",
3033 			       range_min, range_max);
3034                     }
3035                 }
3036 
3037 		if (has_utf8) {
3038 
3039                     /* We try to avoid creating a swash.  If the upper end of
3040                      * this range is below 256, this range won't force a swash;
3041                      * otherwise it does force a swash, and as long as we have
3042                      * to have one, we might as well not expand things out.
3043                      * But if it's EBCDIC, we may have to look at each
3044                      * character below 256 if we have to convert to/from
3045                      * Unicode values */
3046                     if (range_max > 255
3047 #ifdef EBCDIC
3048 		        && (range_min > 255 || ! convert_unicode)
3049 #endif
3050                     ) {
3051                         /* Move the high character one byte to the right; then
3052                          * insert between it and the range begin, an illegal
3053                          * byte which serves to indicate this is a range (using
3054                          * a '-' could be ambiguous). */
3055                         char *e = d++;
3056                         while (e-- > max_ptr) {
3057                             *(e + 1) = *e;
3058                         }
3059                         *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
3060                         goto range_done;
3061                     }
3062 
3063                     /* Here, we're going to expand out the range.  For EBCDIC
3064                      * the range can extend above 255 (not so in ASCII), so
3065                      * for EBCDIC, split it into the parts above and below
3066                      * 255/256 */
3067 #ifdef EBCDIC
3068                     if (range_max > 255) {
3069                         real_range_max = range_max;
3070                         range_max = 255;
3071                     }
3072 #endif
3073 		}
3074 
3075                 /* Here we need to expand out the string to contain each
3076                  * character in the range.  Grow the output to handle this */
3077 
3078                 save_offset  = min_ptr - SvPVX_const(sv);
3079 
3080                 /* The base growth is the number of code points in the range */
3081                 grow = range_max - range_min + 1;
3082                 if (has_utf8) {
3083 
3084                     /* But if the output is UTF-8, some of those characters may
3085                      * need two bytes (since the maximum range value here is
3086                      * 255, the max bytes per character is two).  On ASCII
3087                      * platforms, it's not much trouble to get an accurate
3088                      * count of what's needed.  But on EBCDIC, the ones that
3089                      * need 2 bytes are scattered around, so just use a worst
3090                      * case value instead of calculating for that platform.  */
3091 #ifdef EBCDIC
3092                     grow *= 2;
3093 #else
3094                     /* Only those above 127 require 2 bytes.  This may be
3095                      * everything in the range, or not */
3096                     if (range_min > 127) {
3097                         grow *= 2;
3098                     }
3099                     else if (range_max > 127) {
3100                         grow += range_max - 127;
3101                     }
3102 #endif
3103                 }
3104 
3105                 /* Subtract 3 for the bytes that were already accounted for
3106                  * (min, max, and the hyphen) */
3107                 SvGROW(sv, SvLEN(sv) + grow - 3);
3108 		d = SvPVX(sv) + save_offset;	/* refresh d after realloc */
3109 
3110                 /* Here, we expand out the range.  On ASCII platforms, the
3111                  * compiler should optimize out the 'convert_unicode==TRUE'
3112                  * portion of this */
3113                 if (convert_unicode) {
3114                     IV i;
3115 
3116                     /* Recall that the min and max are now in Unicode terms, so
3117                      * we have to convert each character to its native
3118                      * equivalent */
3119                     if (has_utf8) {
3120                         for (i = range_min; i <= range_max; i++) {
3121                             append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i),
3122                                                          (U8 **) &d);
3123                         }
3124                     }
3125                     else {
3126                         for (i = range_min; i <= range_max; i++) {
3127                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3128                         }
3129 		    }
3130 		}
3131                 else {
3132                     IV i;
3133 
3134                     /* Here, no conversions are necessary, which means that the
3135                      * first character in the range is already in 'd' and
3136                      * valid, so we can skip overwriting it */
3137                     if (has_utf8) {
3138                         d += UTF8SKIP(d);
3139                         for (i = range_min + 1; i <= range_max; i++) {
3140                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3141                         }
3142                     }
3143                     else {
3144                         d++;
3145                         for (i = range_min + 1; i <= range_max; i++) {
3146                             *d++ = (char)i;
3147                         }
3148 		    }
3149 		}
3150 
3151                 /* (Compilers should optimize this out for non-EBCDIC).  If the
3152                  * original range extended above 255, add in that portion */
3153                 if (real_range_max) {
3154                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3155                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3156                     if (real_range_max > 0x101)
3157                         *d++ = (char) ILLEGAL_UTF8_BYTE;
3158                     if (real_range_max > 0x100)
3159                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3160                 }
3161 
3162               range_done:
3163 		/* mark the range as done, and continue */
3164 		didrange = TRUE;
3165 		dorange = FALSE;
3166 #ifdef EBCDIC
3167 		non_portable_endpoint = 0;
3168                 backslash_N = 0;
3169 #endif
3170 		continue;
3171 	    } /* End of is a range */
3172         } /* End of transliteration.  Joins main code after these else's */
3173 	else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3174 	    char *s1 = s-1;
3175 	    int esc = 0;
3176 	    while (s1 >= start && *s1-- == '\\')
3177 		esc = !esc;
3178 	    if (!esc)
3179 		in_charclass = TRUE;
3180 	}
3181 
3182 	else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
3183 	    char *s1 = s-1;
3184 	    int esc = 0;
3185 	    while (s1 >= start && *s1-- == '\\')
3186 		esc = !esc;
3187 	    if (!esc)
3188 		in_charclass = FALSE;
3189 	}
3190 
3191 	/* skip for regexp comments /(?#comment)/, except for the last
3192 	 * char, which will be done separately.
3193 	 * Stop on (?{..}) and friends */
3194 
3195 	else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3196 	    if (s[2] == '#') {
3197 		while (s+1 < send && *s != ')')
3198 		    *d++ = *s++;
3199 	    }
3200 	    else if (!PL_lex_casemods
3201                      && (    s[2] == '{' /* This should match regcomp.c */
3202 		         || (s[2] == '?' && s[3] == '{')))
3203 	    {
3204 		break;
3205 	    }
3206 	}
3207 
3208 	/* likewise skip #-initiated comments in //x patterns */
3209 	else if (*s == '#'
3210                  && PL_lex_inpat
3211                  && !in_charclass
3212                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3213         {
3214 	    while (s+1 < send && *s != '\n')
3215 		*d++ = *s++;
3216 	}
3217 
3218 	/* no further processing of single-quoted regex */
3219 	else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3220 	    goto default_action;
3221 
3222 	/* check for embedded arrays
3223 	   (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3224 	   */
3225 	else if (*s == '@' && s[1]) {
3226 	    if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1]))
3227 		break;
3228 	    if (strchr(":'{$", s[1]))
3229 		break;
3230 	    if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3231 		break; /* in regexp, neither @+ nor @- are interpolated */
3232 	}
3233 
3234 	/* check for embedded scalars.  only stop if we're sure it's a
3235 	   variable.
3236         */
3237 	else if (*s == '$') {
3238 	    if (!PL_lex_inpat)	/* not a regexp, so $ must be var */
3239 		break;
3240 	    if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3241 		if (s[1] == '\\') {
3242 		    Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3243 				   "Possible unintended interpolation of $\\ in regex");
3244 		}
3245 		break;		/* in regexp, $ might be tail anchor */
3246             }
3247 	}
3248 
3249 	/* End of else if chain - OP_TRANS rejoin rest */
3250 
3251 	/* backslashes */
3252 	if (*s == '\\' && s+1 < send) {
3253 	    char* e;	/* Can be used for ending '}', etc. */
3254 
3255 	    s++;
3256 
3257 	    /* warn on \1 - \9 in substitution replacements, but note that \11
3258 	     * is an octal; and \19 is \1 followed by '9' */
3259 	    if (PL_lex_inwhat == OP_SUBST
3260                 && !PL_lex_inpat
3261                 && isDIGIT(*s)
3262                 && *s != '0'
3263                 && !isDIGIT(s[1]))
3264 	    {
3265 		/* diag_listed_as: \%d better written as $%d */
3266 		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3267 		*--s = '$';
3268 		break;
3269 	    }
3270 
3271 	    /* string-change backslash escapes */
3272 	    if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3273 		--s;
3274 		break;
3275 	    }
3276 	    /* In a pattern, process \N, but skip any other backslash escapes.
3277 	     * This is because we don't want to translate an escape sequence
3278 	     * into a meta symbol and have the regex compiler use the meta
3279 	     * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3280 	     * in spite of this, we do have to process \N here while the proper
3281 	     * charnames handler is in scope.  See bugs #56444 and #62056.
3282              *
3283 	     * There is a complication because \N in a pattern may also stand
3284 	     * for 'match a non-nl', and not mean a charname, in which case its
3285 	     * processing should be deferred to the regex compiler.  To be a
3286 	     * charname it must be followed immediately by a '{', and not look
3287 	     * like \N followed by a curly quantifier, i.e., not something like
3288 	     * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3289 	     * quantifier */
3290 	    else if (PL_lex_inpat
3291 		    && (*s != 'N'
3292 			|| s[1] != '{'
3293 			|| regcurly(s + 1)))
3294 	    {
3295 		*d++ = '\\';
3296 		goto default_action;
3297 	    }
3298 
3299 	    switch (*s) {
3300 	    default:
3301 	        {
3302 		    if ((isALPHANUMERIC(*s)))
3303 			Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3304 				       "Unrecognized escape \\%c passed through",
3305 				       *s);
3306 		    /* default action is to copy the quoted character */
3307 		    goto default_action;
3308 		}
3309 
3310 	    /* eg. \132 indicates the octal constant 0132 */
3311 	    case '0': case '1': case '2': case '3':
3312 	    case '4': case '5': case '6': case '7':
3313 		{
3314                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3315                     STRLEN len = 3;
3316 		    uv = grok_oct(s, &len, &flags, NULL);
3317 		    s += len;
3318                     if (len < 3 && s < send && isDIGIT(*s)
3319                         && ckWARN(WARN_MISC))
3320                     {
3321                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3322                                     "%s", form_short_octal_warning(s, len));
3323                     }
3324 		}
3325 		goto NUM_ESCAPE_INSERT;
3326 
3327 	    /* eg. \o{24} indicates the octal constant \024 */
3328 	    case 'o':
3329 		{
3330 		    const char* error;
3331 
3332 		    bool valid = grok_bslash_o(&s, &uv, &error,
3333                                                TRUE, /* Output warning */
3334                                                FALSE, /* Not strict */
3335                                                TRUE, /* Output warnings for
3336                                                          non-portables */
3337                                                UTF);
3338 		    if (! valid) {
3339 			yyerror(error);
3340 			continue;
3341 		    }
3342 		    goto NUM_ESCAPE_INSERT;
3343 		}
3344 
3345 	    /* eg. \x24 indicates the hex constant 0x24 */
3346 	    case 'x':
3347 		{
3348 		    const char* error;
3349 
3350 		    bool valid = grok_bslash_x(&s, &uv, &error,
3351                                                TRUE, /* Output warning */
3352                                                FALSE, /* Not strict */
3353                                                TRUE,  /* Output warnings for
3354                                                          non-portables */
3355                                                UTF);
3356 		    if (! valid) {
3357 			yyerror(error);
3358 			continue;
3359 		    }
3360 		}
3361 
3362 	      NUM_ESCAPE_INSERT:
3363 		/* Insert oct or hex escaped character. */
3364 
3365 		/* Here uv is the ordinal of the next character being added */
3366 		if (UVCHR_IS_INVARIANT(uv)) {
3367 		    *d++ = (char) uv;
3368 		}
3369 		else {
3370 		    if (!has_utf8 && uv > 255) {
3371 			/* Might need to recode whatever we have accumulated so
3372 			 * far if it contains any chars variant in utf8 or
3373 			 * utf-ebcdic. */
3374 
3375 			SvCUR_set(sv, d - SvPVX_const(sv));
3376 			SvPOK_on(sv);
3377 			*d = '\0';
3378 			/* See Note on sizing above.  */
3379 			sv_utf8_upgrade_flags_grow(
3380                                        sv,
3381                                        SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
3382                                                   /* Above-latin1 in string
3383                                                    * implies no encoding */
3384                                                   |SV_UTF8_NO_ENCODING,
3385                                        UVCHR_SKIP(uv) + (STRLEN)(send - s) + 1);
3386 			d = SvPVX(sv) + SvCUR(sv);
3387 			has_utf8 = TRUE;
3388                     }
3389 
3390                     if (has_utf8) {
3391                        /* Usually, there will already be enough room in 'sv'
3392                         * since such escapes are likely longer than any UTF-8
3393                         * sequence they can end up as.  This isn't the case on
3394                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3395                         * UTF-8 for it contains 14.  And, we have to allow for
3396                         * a trailing NUL.  It probably can't happen on ASCII
3397                         * platforms, but be safe */
3398                         const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv)
3399                                             + 1;
3400                         if (UNLIKELY(needed > SvLEN(sv))) {
3401                             SvCUR_set(sv, d - SvPVX_const(sv));
3402                             d = sv_grow(sv, needed) + SvCUR(sv);
3403                         }
3404 
3405 		        d = (char*)uvchr_to_utf8((U8*)d, uv);
3406 			if (PL_lex_inwhat == OP_TRANS
3407                             && PL_sublex_info.sub_op)
3408                         {
3409 			    PL_sublex_info.sub_op->op_private |=
3410 				(PL_lex_repl ? OPpTRANS_FROM_UTF
3411 					     : OPpTRANS_TO_UTF);
3412 			}
3413                     }
3414 		    else {
3415 		        *d++ = (char)uv;
3416 		    }
3417 		}
3418 #ifdef EBCDIC
3419                 non_portable_endpoint++;
3420 #endif
3421 		continue;
3422 
3423  	    case 'N':
3424                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3425                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3426                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3427                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3428                  * convenience all three forms are referred to as "named
3429                  * characters" below.
3430                  *
3431                  * For patterns, \N also can mean to match a non-newline.  Code
3432                  * before this 'switch' statement should already have handled
3433                  * this situation, and hence this code only has to deal with
3434                  * the named character cases.
3435                  *
3436                  * For non-patterns, the named characters are converted to
3437                  * their string equivalents.  In patterns, named characters are
3438                  * not converted to their ultimate forms for the same reasons
3439                  * that other escapes aren't.  Instead, they are converted to
3440                  * the \N{U+...} form to get the value from the charnames that
3441                  * is in effect right now, while preserving the fact that it
3442                  * was a named character, so that the regex compiler knows
3443                  * this.
3444                  *
3445 		 * The structure of this section of code (besides checking for
3446 		 * errors and upgrading to utf8) is:
3447                  *    If the named character is of the form \N{U+...}, pass it
3448                  *      through if a pattern; otherwise convert the code point
3449                  *      to utf8
3450                  *    Otherwise must be some \N{NAME}: convert to
3451                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3452                  *
3453                  * Transliteration is an exception.  The conversion to utf8 is
3454                  * only done if the code point requires it to be representable.
3455                  *
3456                  * Here, 's' points to the 'N'; the test below is guaranteed to
3457 		 * succeed if we are being called on a pattern, as we already
3458                  * know from a test above that the next character is a '{'.  A
3459                  * non-pattern \N must mean 'named character', which requires
3460                  * braces */
3461 		s++;
3462 		if (*s != '{') {
3463 		    yyerror("Missing braces on \\N{}");
3464 		    continue;
3465 		}
3466 		s++;
3467 
3468 		/* If there is no matching '}', it is an error. */
3469 		if (! (e = strchr(s, '}'))) {
3470 		    if (! PL_lex_inpat) {
3471 			yyerror("Missing right brace on \\N{}");
3472 		    } else {
3473 			yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3474 		    }
3475 		    continue;
3476 		}
3477 
3478 		/* Here it looks like a named character */
3479 
3480 		if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3481 		    s += 2;	    /* Skip to next char after the 'U+' */
3482 		    if (PL_lex_inpat) {
3483 
3484                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3485                         /* Check the syntax.  */
3486                         const char *orig_s;
3487                         orig_s = s - 5;
3488                         if (!isXDIGIT(*s)) {
3489                           bad_NU:
3490                             yyerror(
3491                                 "Invalid hexadecimal number in \\N{U+...}"
3492                             );
3493                             s = e + 1;
3494                             continue;
3495                         }
3496                         while (++s < e) {
3497                             if (isXDIGIT(*s))
3498                                 continue;
3499                             else if ((*s == '.' || *s == '_')
3500                                   && isXDIGIT(s[1]))
3501                                 continue;
3502                             goto bad_NU;
3503                         }
3504 
3505                         /* Pass everything through unchanged.
3506                          * +1 is for the '}' */
3507                         Copy(orig_s, d, e - orig_s + 1, char);
3508                         d += e - orig_s + 1;
3509 		    }
3510 		    else {  /* Not a pattern: convert the hex to string */
3511                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3512 				| PERL_SCAN_SILENT_ILLDIGIT
3513 				| PERL_SCAN_DISALLOW_PREFIX;
3514                         STRLEN len = e - s;
3515                         uv = grok_hex(s, &len, &flags, NULL);
3516                         if (len == 0 || (len != (STRLEN)(e - s)))
3517                             goto bad_NU;
3518 
3519                          /* For non-tr///, if the destination is not in utf8,
3520                           * unconditionally recode it to be so.  This is
3521                           * because \N{} implies Unicode semantics, and scalars
3522                           * have to be in utf8 to guarantee those semantics.
3523                           * tr/// doesn't care about Unicode rules, so no need
3524                           * there to upgrade to UTF-8 for small enough code
3525                           * points */
3526 			if (! has_utf8 && (   uv > 0xFF
3527                                            || PL_lex_inwhat != OP_TRANS))
3528                         {
3529 			    SvCUR_set(sv, d - SvPVX_const(sv));
3530 			    SvPOK_on(sv);
3531 			    *d = '\0';
3532 			    /* See Note on sizing above.  */
3533 			    sv_utf8_upgrade_flags_grow(
3534                                     sv,
3535                                     SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3536 				    UVCHR_SKIP(uv) + (STRLEN)(send - e) + 1);
3537 			    d = SvPVX(sv) + SvCUR(sv);
3538 			    has_utf8 = TRUE;
3539 			}
3540 
3541                         /* Add the (Unicode) code point to the output. */
3542 			if (OFFUNI_IS_INVARIANT(uv)) {
3543 			    *d++ = (char) LATIN1_TO_NATIVE(uv);
3544 			}
3545 			else {
3546                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3547                         }
3548 		    }
3549 		}
3550 		else /* Here is \N{NAME} but not \N{U+...}. */
3551                      if ((res = get_and_check_backslash_N_name(s, e)))
3552                 {
3553                     STRLEN len;
3554                     const char *str = SvPV_const(res, len);
3555                     if (PL_lex_inpat) {
3556 
3557 			if (! len) { /* The name resolved to an empty string */
3558 			    Copy("\\N{}", d, 4, char);
3559 			    d += 4;
3560 			}
3561 			else {
3562 			    /* In order to not lose information for the regex
3563 			    * compiler, pass the result in the specially made
3564 			    * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3565 			    * the code points in hex of each character
3566 			    * returned by charnames */
3567 
3568 			    const char *str_end = str + len;
3569 			    const STRLEN off = d - SvPVX_const(sv);
3570 
3571                             if (! SvUTF8(res)) {
3572                                 /* For the non-UTF-8 case, we can determine the
3573                                  * exact length needed without having to parse
3574                                  * through the string.  Each character takes up
3575                                  * 2 hex digits plus either a trailing dot or
3576                                  * the "}" */
3577                                 const char initial_text[] = "\\N{U+";
3578                                 const STRLEN initial_len = sizeof(initial_text)
3579                                                            - 1;
3580                                 d = off + SvGROW(sv, off
3581                                                     + 3 * len
3582 
3583                                                     /* +1 for trailing NUL */
3584                                                     + initial_len + 1
3585 
3586                                                     + (STRLEN)(send - e));
3587                                 Copy(initial_text, d, initial_len, char);
3588                                 d += initial_len;
3589                                 while (str < str_end) {
3590                                     char hex_string[4];
3591                                     int len =
3592                                         my_snprintf(hex_string,
3593                                                   sizeof(hex_string),
3594                                                   "%02X.",
3595 
3596                                                   /* The regex compiler is
3597                                                    * expecting Unicode, not
3598                                                    * native */
3599                                                   NATIVE_TO_LATIN1(*str));
3600                                     PERL_MY_SNPRINTF_POST_GUARD(len,
3601                                                            sizeof(hex_string));
3602                                     Copy(hex_string, d, 3, char);
3603                                     d += 3;
3604                                     str++;
3605                                 }
3606                                 d--;    /* Below, we will overwrite the final
3607                                            dot with a right brace */
3608                             }
3609                             else {
3610                                 STRLEN char_length; /* cur char's byte length */
3611 
3612                                 /* and the number of bytes after this is
3613                                  * translated into hex digits */
3614                                 STRLEN output_length;
3615 
3616                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3617                                  * for max('U+', '.'); and 1 for NUL */
3618                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3619 
3620                                 /* Get the first character of the result. */
3621                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3622                                                         len,
3623                                                         &char_length,
3624                                                         UTF8_ALLOW_ANYUV);
3625                                 /* Convert first code point to Unicode hex,
3626                                  * including the boiler plate before it. */
3627                                 output_length =
3628                                     my_snprintf(hex_string, sizeof(hex_string),
3629                                              "\\N{U+%X",
3630                                              (unsigned int) NATIVE_TO_UNI(uv));
3631 
3632                                 /* Make sure there is enough space to hold it */
3633                                 d = off + SvGROW(sv, off
3634                                                     + output_length
3635                                                     + (STRLEN)(send - e)
3636                                                     + 2);	/* '}' + NUL */
3637                                 /* And output it */
3638                                 Copy(hex_string, d, output_length, char);
3639                                 d += output_length;
3640 
3641                                 /* For each subsequent character, append dot and
3642                                 * its Unicode code point in hex */
3643                                 while ((str += char_length) < str_end) {
3644                                     const STRLEN off = d - SvPVX_const(sv);
3645                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3646                                                             str_end - str,
3647                                                             &char_length,
3648                                                             UTF8_ALLOW_ANYUV);
3649                                     output_length =
3650                                         my_snprintf(hex_string,
3651                                              sizeof(hex_string),
3652                                              ".%X",
3653                                              (unsigned int) NATIVE_TO_UNI(uv));
3654 
3655                                     d = off + SvGROW(sv, off
3656                                                         + output_length
3657                                                         + (STRLEN)(send - e)
3658                                                         + 2);	/* '}' +  NUL */
3659                                     Copy(hex_string, d, output_length, char);
3660                                     d += output_length;
3661                                 }
3662 			    }
3663 
3664 			    *d++ = '}';	/* Done.  Add the trailing brace */
3665 			}
3666 		    }
3667 		    else { /* Here, not in a pattern.  Convert the name to a
3668 			    * string. */
3669 
3670                         if (PL_lex_inwhat == OP_TRANS) {
3671                             str = SvPV_const(res, len);
3672                             if (len > ((SvUTF8(res))
3673                                        ? UTF8SKIP(str)
3674                                        : 1U))
3675                             {
3676                                 yyerror(Perl_form(aTHX_
3677                                     "%.*s must not be a named sequence"
3678                                     " in transliteration operator",
3679                                         /*  +1 to include the "}" */
3680                                     (int) (e + 1 - start), start));
3681                                 goto end_backslash_N;
3682                             }
3683                         }
3684                         else if (! SvUTF8(res)) {
3685                             /* Make sure \N{} return is UTF-8.  This is because
3686                             * \N{} implies Unicode semantics, and scalars have to
3687                             * be in utf8 to guarantee those semantics; but not
3688                             * needed in tr/// */
3689                             sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
3690                             str = SvPV_const(res, len);
3691                         }
3692 
3693                          /* Upgrade destination to be utf8 if this new
3694                           * component is */
3695 			if (! has_utf8 && SvUTF8(res)) {
3696 			    SvCUR_set(sv, d - SvPVX_const(sv));
3697 			    SvPOK_on(sv);
3698 			    *d = '\0';
3699 			    /* See Note on sizing above.  */
3700 			    sv_utf8_upgrade_flags_grow(sv,
3701 						SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3702 						len + (STRLEN)(send - s) + 1);
3703 			    d = SvPVX(sv) + SvCUR(sv);
3704 			    has_utf8 = TRUE;
3705 			} else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3706 
3707 			    /* See Note on sizing above.  (NOTE: SvCUR() is not
3708 			     * set correctly here). */
3709 			    const STRLEN off = d - SvPVX_const(sv);
3710 			    d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3711 			}
3712 			Copy(str, d, len, char);
3713 			d += len;
3714 		    }
3715 
3716 		    SvREFCNT_dec(res);
3717 
3718 		} /* End \N{NAME} */
3719 
3720               end_backslash_N:
3721 #ifdef EBCDIC
3722                 backslash_N++; /* \N{} is defined to be Unicode */
3723 #endif
3724 		s = e + 1;  /* Point to just after the '}' */
3725 		continue;
3726 
3727 	    /* \c is a control character */
3728 	    case 'c':
3729 		s++;
3730 		if (s < send) {
3731 		    *d++ = grok_bslash_c(*s++, 1);
3732 		}
3733 		else {
3734 		    yyerror("Missing control char name in \\c");
3735 		}
3736 #ifdef EBCDIC
3737                 non_portable_endpoint++;
3738 #endif
3739 		continue;
3740 
3741 	    /* printf-style backslashes, formfeeds, newlines, etc */
3742 	    case 'b':
3743 		*d++ = '\b';
3744 		break;
3745 	    case 'n':
3746 		*d++ = '\n';
3747 		break;
3748 	    case 'r':
3749 		*d++ = '\r';
3750 		break;
3751 	    case 'f':
3752 		*d++ = '\f';
3753 		break;
3754 	    case 't':
3755 		*d++ = '\t';
3756 		break;
3757 	    case 'e':
3758 		*d++ = ESC_NATIVE;
3759 		break;
3760 	    case 'a':
3761 		*d++ = '\a';
3762 		break;
3763 	    } /* end switch */
3764 
3765 	    s++;
3766 	    continue;
3767 	} /* end if (backslash) */
3768 
3769     default_action:
3770 	/* If we started with encoded form, or already know we want it,
3771 	   then encode the next character */
3772 	if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3773 	    STRLEN len  = 1;
3774 
3775 	    /* One might think that it is wasted effort in the case of the
3776 	     * source being utf8 (this_utf8 == TRUE) to take the next character
3777 	     * in the source, convert it to an unsigned value, and then convert
3778 	     * it back again.  But the source has not been validated here.  The
3779 	     * routine that does the conversion checks for errors like
3780 	     * malformed utf8 */
3781 
3782 	    const UV nextuv   = (this_utf8)
3783                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3784                                 : (UV) ((U8) *s);
3785 	    const STRLEN need = UVCHR_SKIP(nextuv);
3786 	    if (!has_utf8) {
3787 		SvCUR_set(sv, d - SvPVX_const(sv));
3788 		SvPOK_on(sv);
3789 		*d = '\0';
3790 		/* See Note on sizing above.  */
3791 		sv_utf8_upgrade_flags_grow(sv,
3792 					SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3793 					need + (STRLEN)(send - s) + 1);
3794 		d = SvPVX(sv) + SvCUR(sv);
3795 		has_utf8 = TRUE;
3796 	    } else if (need > len) {
3797 		/* encoded value larger than old, may need extra space (NOTE:
3798 		 * SvCUR() is not set correctly here).   See Note on sizing
3799 		 * above.  */
3800 		const STRLEN off = d - SvPVX_const(sv);
3801 		d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3802 	    }
3803 	    s += len;
3804 
3805 	    d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3806 	}
3807 	else {
3808 	    *d++ = *s++;
3809 	}
3810     } /* while loop to process each character */
3811 
3812     /* terminate the string and set up the sv */
3813     *d = '\0';
3814     SvCUR_set(sv, d - SvPVX_const(sv));
3815     if (SvCUR(sv) >= SvLEN(sv))
3816 	Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3817 		   " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3818 
3819     SvPOK_on(sv);
3820     if (IN_ENCODING && !has_utf8) {
3821 	sv_recode_to_utf8(sv, _get_encoding());
3822 	if (SvUTF8(sv))
3823 	    has_utf8 = TRUE;
3824     }
3825     if (has_utf8) {
3826 	SvUTF8_on(sv);
3827 	if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3828 	    PL_sublex_info.sub_op->op_private |=
3829 		    (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3830 	}
3831     }
3832 
3833     /* shrink the sv if we allocated more than we used */
3834     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3835 	SvPV_shrink_to_cur(sv);
3836     }
3837 
3838     /* return the substring (via pl_yylval) only if we parsed anything */
3839     if (s > start) {
3840 	char *s2 = start;
3841 	for (; s2 < s; s2++) {
3842 	    if (*s2 == '\n')
3843 		COPLINE_INC_WITH_HERELINES;
3844 	}
3845 	SvREFCNT_inc_simple_void_NN(sv);
3846 	if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3847             && ! PL_parser->lex_re_reparsing)
3848         {
3849 	    const char *const key = PL_lex_inpat ? "qr" : "q";
3850 	    const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3851 	    const char *type;
3852 	    STRLEN typelen;
3853 
3854 	    if (PL_lex_inwhat == OP_TRANS) {
3855 		type = "tr";
3856 		typelen = 2;
3857 	    } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3858 		type = "s";
3859 		typelen = 1;
3860 	    } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3861 		type = "q";
3862 		typelen = 1;
3863 	    } else  {
3864 		type = "qq";
3865 		typelen = 2;
3866 	    }
3867 
3868 	    sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3869 				type, typelen);
3870 	}
3871 	pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3872     }
3873     LEAVE_with_name("scan_const");
3874     return s;
3875 }
3876 
3877 /* S_intuit_more
3878  * Returns TRUE if there's more to the expression (e.g., a subscript),
3879  * FALSE otherwise.
3880  *
3881  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3882  *
3883  * ->[ and ->{ return TRUE
3884  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3885  * { and [ outside a pattern are always subscripts, so return TRUE
3886  * if we're outside a pattern and it's not { or [, then return FALSE
3887  * if we're in a pattern and the first char is a {
3888  *   {4,5} (any digits around the comma) returns FALSE
3889  * if we're in a pattern and the first char is a [
3890  *   [] returns FALSE
3891  *   [SOMETHING] has a funky algorithm to decide whether it's a
3892  *      character class or not.  It has to deal with things like
3893  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3894  * anything else returns TRUE
3895  */
3896 
3897 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3898 
3899 STATIC int
3900 S_intuit_more(pTHX_ char *s)
3901 {
3902     PERL_ARGS_ASSERT_INTUIT_MORE;
3903 
3904     if (PL_lex_brackets)
3905 	return TRUE;
3906     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3907 	return TRUE;
3908     if (*s == '-' && s[1] == '>'
3909      && FEATURE_POSTDEREF_QQ_IS_ENABLED
3910      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3911 	||(s[2] == '@' && strchr("*[{",s[3])) ))
3912 	return TRUE;
3913     if (*s != '{' && *s != '[')
3914 	return FALSE;
3915     if (!PL_lex_inpat)
3916 	return TRUE;
3917 
3918     /* In a pattern, so maybe we have {n,m}. */
3919     if (*s == '{') {
3920 	if (regcurly(s)) {
3921 	    return FALSE;
3922 	}
3923 	return TRUE;
3924     }
3925 
3926     /* On the other hand, maybe we have a character class */
3927 
3928     s++;
3929     if (*s == ']' || *s == '^')
3930 	return FALSE;
3931     else {
3932         /* this is terrifying, and it works */
3933 	int weight;
3934 	char seen[256];
3935 	const char * const send = strchr(s,']');
3936 	unsigned char un_char, last_un_char;
3937 	char tmpbuf[sizeof PL_tokenbuf * 4];
3938 
3939 	if (!send)		/* has to be an expression */
3940 	    return TRUE;
3941 	weight = 2;		/* let's weigh the evidence */
3942 
3943 	if (*s == '$')
3944 	    weight -= 3;
3945 	else if (isDIGIT(*s)) {
3946 	    if (s[1] != ']') {
3947 		if (isDIGIT(s[1]) && s[2] == ']')
3948 		    weight -= 10;
3949 	    }
3950 	    else
3951 		weight -= 100;
3952 	}
3953 	Zero(seen,256,char);
3954 	un_char = 255;
3955 	for (; s < send; s++) {
3956 	    last_un_char = un_char;
3957 	    un_char = (unsigned char)*s;
3958 	    switch (*s) {
3959 	    case '@':
3960 	    case '&':
3961 	    case '$':
3962 		weight -= seen[un_char] * 10;
3963 		if (isWORDCHAR_lazy_if(s+1,UTF)) {
3964 		    int len;
3965                     char *tmp = PL_bufend;
3966                     PL_bufend = (char*)send;
3967                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
3968                     PL_bufend = tmp;
3969 		    len = (int)strlen(tmpbuf);
3970 		    if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3971                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3972 			weight -= 100;
3973 		    else
3974 			weight -= 10;
3975 		}
3976 		else if (*s == '$'
3977                          && s[1]
3978                          && strchr("[#!%*<>()-=",s[1]))
3979                 {
3980 		    if (/*{*/ strchr("])} =",s[2]))
3981 			weight -= 10;
3982 		    else
3983 			weight -= 1;
3984 		}
3985 		break;
3986 	    case '\\':
3987 		un_char = 254;
3988 		if (s[1]) {
3989 		    if (strchr("wds]",s[1]))
3990 			weight += 100;
3991 		    else if (seen[(U8)'\''] || seen[(U8)'"'])
3992 			weight += 1;
3993 		    else if (strchr("rnftbxcav",s[1]))
3994 			weight += 40;
3995 		    else if (isDIGIT(s[1])) {
3996 			weight += 40;
3997 			while (s[1] && isDIGIT(s[1]))
3998 			    s++;
3999 		    }
4000 		}
4001 		else
4002 		    weight += 100;
4003 		break;
4004 	    case '-':
4005 		if (s[1] == '\\')
4006 		    weight += 50;
4007 		if (strchr("aA01! ",last_un_char))
4008 		    weight += 30;
4009 		if (strchr("zZ79~",s[1]))
4010 		    weight += 30;
4011 		if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4012 		    weight -= 5;	/* cope with negative subscript */
4013 		break;
4014 	    default:
4015 		if (!isWORDCHAR(last_un_char)
4016 		    && !(last_un_char == '$' || last_un_char == '@'
4017 			 || last_un_char == '&')
4018 		    && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4019 		    char *d = s;
4020 		    while (isALPHA(*s))
4021 			s++;
4022 		    if (keyword(d, s - d, 0))
4023 			weight -= 150;
4024 		}
4025 		if (un_char == last_un_char + 1)
4026 		    weight += 5;
4027 		weight -= seen[un_char];
4028 		break;
4029 	    }
4030 	    seen[un_char]++;
4031 	}
4032 	if (weight >= 0)	/* probably a character class */
4033 	    return FALSE;
4034     }
4035 
4036     return TRUE;
4037 }
4038 
4039 /*
4040  * S_intuit_method
4041  *
4042  * Does all the checking to disambiguate
4043  *   foo bar
4044  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4045  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4046  *
4047  * First argument is the stuff after the first token, e.g. "bar".
4048  *
4049  * Not a method if foo is a filehandle.
4050  * Not a method if foo is a subroutine prototyped to take a filehandle.
4051  * Not a method if it's really "Foo $bar"
4052  * Method if it's "foo $bar"
4053  * Not a method if it's really "print foo $bar"
4054  * Method if it's really "foo package::" (interpreted as package->foo)
4055  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4056  * Not a method if bar is a filehandle or package, but is quoted with
4057  *   =>
4058  */
4059 
4060 STATIC int
4061 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4062 {
4063     char *s = start + (*start == '$');
4064     char tmpbuf[sizeof PL_tokenbuf];
4065     STRLEN len;
4066     GV* indirgv;
4067 	/* Mustn't actually add anything to a symbol table.
4068 	   But also don't want to "initialise" any placeholder
4069 	   constants that might already be there into full
4070 	   blown PVGVs with attached PVCV.  */
4071     GV * const gv =
4072 	ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4073 
4074     PERL_ARGS_ASSERT_INTUIT_METHOD;
4075 
4076     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4077 	    return 0;
4078     if (cv && SvPOK(cv)) {
4079 	const char *proto = CvPROTO(cv);
4080 	if (proto) {
4081 	    while (*proto && (isSPACE(*proto) || *proto == ';'))
4082 		proto++;
4083 	    if (*proto == '*')
4084 		return 0;
4085 	}
4086     }
4087 
4088     if (*start == '$') {
4089 	if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4090             || isUPPER(*PL_tokenbuf))
4091 	    return 0;
4092 	s = skipspace(s);
4093 	PL_bufptr = start;
4094 	PL_expect = XREF;
4095 	return *s == '(' ? FUNCMETH : METHOD;
4096     }
4097 
4098     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4099     /* start is the beginning of the possible filehandle/object,
4100      * and s is the end of it
4101      * tmpbuf is a copy of it (but with single quotes as double colons)
4102      */
4103 
4104     if (!keyword(tmpbuf, len, 0)) {
4105 	if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4106 	    len -= 2;
4107 	    tmpbuf[len] = '\0';
4108 	    goto bare_package;
4109 	}
4110 	indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4111 	if (indirgv && GvCVu(indirgv))
4112 	    return 0;
4113 	/* filehandle or package name makes it a method */
4114 	if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4115 	    s = skipspace(s);
4116 	    if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4117 		return 0;	/* no assumptions -- "=>" quotes bareword */
4118       bare_package:
4119 	    NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4120 						  S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4121 	    NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4122 	    PL_expect = XTERM;
4123 	    force_next(WORD);
4124 	    PL_bufptr = s;
4125 	    return *s == '(' ? FUNCMETH : METHOD;
4126 	}
4127     }
4128     return 0;
4129 }
4130 
4131 /* Encoded script support. filter_add() effectively inserts a
4132  * 'pre-processing' function into the current source input stream.
4133  * Note that the filter function only applies to the current source file
4134  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4135  *
4136  * The datasv parameter (which may be NULL) can be used to pass
4137  * private data to this instance of the filter. The filter function
4138  * can recover the SV using the FILTER_DATA macro and use it to
4139  * store private buffers and state information.
4140  *
4141  * The supplied datasv parameter is upgraded to a PVIO type
4142  * and the IoDIRP/IoANY field is used to store the function pointer,
4143  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4144  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4145  * private use must be set using malloc'd pointers.
4146  */
4147 
4148 SV *
4149 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4150 {
4151     if (!funcp)
4152 	return NULL;
4153 
4154     if (!PL_parser)
4155 	return NULL;
4156 
4157     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4158 	Perl_croak(aTHX_ "Source filters apply only to byte streams");
4159 
4160     if (!PL_rsfp_filters)
4161 	PL_rsfp_filters = newAV();
4162     if (!datasv)
4163 	datasv = newSV(0);
4164     SvUPGRADE(datasv, SVt_PVIO);
4165     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4166     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4167     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4168 			  FPTR2DPTR(void *, IoANY(datasv)),
4169 			  SvPV_nolen(datasv)));
4170     av_unshift(PL_rsfp_filters, 1);
4171     av_store(PL_rsfp_filters, 0, datasv) ;
4172     if (
4173 	!PL_parser->filtered
4174      && PL_parser->lex_flags & LEX_EVALBYTES
4175      && PL_bufptr < PL_bufend
4176     ) {
4177 	const char *s = PL_bufptr;
4178 	while (s < PL_bufend) {
4179 	    if (*s == '\n') {
4180 		SV *linestr = PL_parser->linestr;
4181 		char *buf = SvPVX(linestr);
4182 		STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4183 		STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4184 		STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4185 		STRLEN const linestart_pos = PL_parser->linestart - buf;
4186 		STRLEN const last_uni_pos =
4187 		    PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4188 		STRLEN const last_lop_pos =
4189 		    PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4190 		av_push(PL_rsfp_filters, linestr);
4191 		PL_parser->linestr =
4192 		    newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4193 		buf = SvPVX(PL_parser->linestr);
4194 		PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4195 		PL_parser->bufptr = buf + bufptr_pos;
4196 		PL_parser->oldbufptr = buf + oldbufptr_pos;
4197 		PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4198 		PL_parser->linestart = buf + linestart_pos;
4199 		if (PL_parser->last_uni)
4200 		    PL_parser->last_uni = buf + last_uni_pos;
4201 		if (PL_parser->last_lop)
4202 		    PL_parser->last_lop = buf + last_lop_pos;
4203 		SvLEN(linestr) = SvCUR(linestr);
4204 		SvCUR(linestr) = s-SvPVX(linestr);
4205 		PL_parser->filtered = 1;
4206 		break;
4207 	    }
4208 	    s++;
4209 	}
4210     }
4211     return(datasv);
4212 }
4213 
4214 
4215 /* Delete most recently added instance of this filter function.	*/
4216 void
4217 Perl_filter_del(pTHX_ filter_t funcp)
4218 {
4219     SV *datasv;
4220 
4221     PERL_ARGS_ASSERT_FILTER_DEL;
4222 
4223 #ifdef DEBUGGING
4224     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4225 			  FPTR2DPTR(void*, funcp)));
4226 #endif
4227     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4228 	return;
4229     /* if filter is on top of stack (usual case) just pop it off */
4230     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4231     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4232 	sv_free(av_pop(PL_rsfp_filters));
4233 
4234         return;
4235     }
4236     /* we need to search for the correct entry and clear it	*/
4237     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4238 }
4239 
4240 
4241 /* Invoke the idxth filter function for the current rsfp.	 */
4242 /* maxlen 0 = read one text line */
4243 I32
4244 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4245 {
4246     filter_t funcp;
4247     SV *datasv = NULL;
4248     /* This API is bad. It should have been using unsigned int for maxlen.
4249        Not sure if we want to change the API, but if not we should sanity
4250        check the value here.  */
4251     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4252 
4253     PERL_ARGS_ASSERT_FILTER_READ;
4254 
4255     if (!PL_parser || !PL_rsfp_filters)
4256 	return -1;
4257     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?	*/
4258 	/* Provide a default input filter to make life easy.	*/
4259 	/* Note that we append to the line. This is handy.	*/
4260 	DEBUG_P(PerlIO_printf(Perl_debug_log,
4261 			      "filter_read %d: from rsfp\n", idx));
4262 	if (correct_length) {
4263  	    /* Want a block */
4264 	    int len ;
4265 	    const int old_len = SvCUR(buf_sv);
4266 
4267 	    /* ensure buf_sv is large enough */
4268 	    SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4269 	    if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4270 				   correct_length)) <= 0) {
4271 		if (PerlIO_error(PL_rsfp))
4272 	            return -1;		/* error */
4273 	        else
4274 		    return 0 ;		/* end of file */
4275 	    }
4276 	    SvCUR_set(buf_sv, old_len + len) ;
4277 	    SvPVX(buf_sv)[old_len + len] = '\0';
4278 	} else {
4279 	    /* Want a line */
4280             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4281 		if (PerlIO_error(PL_rsfp))
4282 	            return -1;		/* error */
4283 	        else
4284 		    return 0 ;		/* end of file */
4285 	    }
4286 	}
4287 	return SvCUR(buf_sv);
4288     }
4289     /* Skip this filter slot if filter has been deleted	*/
4290     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4291 	DEBUG_P(PerlIO_printf(Perl_debug_log,
4292 			      "filter_read %d: skipped (filter deleted)\n",
4293 			      idx));
4294 	return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4295     }
4296     if (SvTYPE(datasv) != SVt_PVIO) {
4297 	if (correct_length) {
4298  	    /* Want a block */
4299 	    const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4300 	    if (!remainder) return 0; /* eof */
4301 	    if (correct_length > remainder) correct_length = remainder;
4302 	    sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4303 	    SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4304 	} else {
4305 	    /* Want a line */
4306 	    const char *s = SvEND(datasv);
4307 	    const char *send = SvPVX(datasv) + SvLEN(datasv);
4308 	    while (s < send) {
4309 		if (*s == '\n') {
4310 		    s++;
4311 		    break;
4312 		}
4313 		s++;
4314 	    }
4315 	    if (s == send) return 0; /* eof */
4316 	    sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4317 	    SvCUR_set(datasv, s-SvPVX(datasv));
4318 	}
4319 	return SvCUR(buf_sv);
4320     }
4321     /* Get function pointer hidden within datasv	*/
4322     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4323     DEBUG_P(PerlIO_printf(Perl_debug_log,
4324 			  "filter_read %d: via function %p (%s)\n",
4325 			  idx, (void*)datasv, SvPV_nolen_const(datasv)));
4326     /* Call function. The function is expected to 	*/
4327     /* call "FILTER_READ(idx+1, buf_sv)" first.		*/
4328     /* Return: <0:error, =0:eof, >0:not eof 		*/
4329     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4330 }
4331 
4332 STATIC char *
4333 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4334 {
4335     PERL_ARGS_ASSERT_FILTER_GETS;
4336 
4337 #ifdef PERL_CR_FILTER
4338     if (!PL_rsfp_filters) {
4339 	filter_add(S_cr_textfilter,NULL);
4340     }
4341 #endif
4342     if (PL_rsfp_filters) {
4343 	if (!append)
4344             SvCUR_set(sv, 0);	/* start with empty line	*/
4345         if (FILTER_READ(0, sv, 0) > 0)
4346             return ( SvPVX(sv) ) ;
4347         else
4348 	    return NULL ;
4349     }
4350     else
4351         return (sv_gets(sv, PL_rsfp, append));
4352 }
4353 
4354 STATIC HV *
4355 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4356 {
4357     GV *gv;
4358 
4359     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4360 
4361     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4362         return PL_curstash;
4363 
4364     if (len > 2
4365         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4366         && (gv = gv_fetchpvn_flags(pkgname,
4367                                    len,
4368                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4369     {
4370         return GvHV(gv);			/* Foo:: */
4371     }
4372 
4373     /* use constant CLASS => 'MyClass' */
4374     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4375     if (gv && GvCV(gv)) {
4376 	SV * const sv = cv_const_sv(GvCV(gv));
4377 	if (sv)
4378 	    return gv_stashsv(sv, 0);
4379     }
4380 
4381     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4382 }
4383 
4384 
4385 STATIC char *
4386 S_tokenize_use(pTHX_ int is_use, char *s) {
4387     PERL_ARGS_ASSERT_TOKENIZE_USE;
4388 
4389     if (PL_expect != XSTATE)
4390 	yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4391 		    is_use ? "use" : "no"));
4392     PL_expect = XTERM;
4393     s = skipspace(s);
4394     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4395 	s = force_version(s, TRUE);
4396 	if (*s == ';' || *s == '}'
4397 		|| (s = skipspace(s), (*s == ';' || *s == '}'))) {
4398 	    NEXTVAL_NEXTTOKE.opval = NULL;
4399 	    force_next(WORD);
4400 	}
4401 	else if (*s == 'v') {
4402 	    s = force_word(s,WORD,FALSE,TRUE);
4403 	    s = force_version(s, FALSE);
4404 	}
4405     }
4406     else {
4407 	s = force_word(s,WORD,FALSE,TRUE);
4408 	s = force_version(s, FALSE);
4409     }
4410     pl_yylval.ival = is_use;
4411     return s;
4412 }
4413 #ifdef DEBUGGING
4414     static const char* const exp_name[] =
4415 	{ "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4416 	  "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4417 	  "TERMORDORDOR"
4418 	};
4419 #endif
4420 
4421 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4422 STATIC bool
4423 S_word_takes_any_delimeter(char *p, STRLEN len)
4424 {
4425     return (len == 1 && strchr("msyq", p[0]))
4426             || (len == 2
4427                 && ((p[0] == 't' && p[1] == 'r')
4428                     || (p[0] == 'q' && strchr("qwxr", p[1]))));
4429 }
4430 
4431 static void
4432 S_check_scalar_slice(pTHX_ char *s)
4433 {
4434     s++;
4435     while (*s == ' ' || *s == '\t') s++;
4436     if (*s == 'q' && s[1] == 'w'
4437      && !isWORDCHAR_lazy_if(s+2,UTF))
4438 	return;
4439     while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4440 	s += UTF ? UTF8SKIP(s) : 1;
4441     if (*s == '}' || *s == ']')
4442 	pl_yylval.ival = OPpSLICEWARNING;
4443 }
4444 
4445 /*
4446   yylex
4447 
4448   Works out what to call the token just pulled out of the input
4449   stream.  The yacc parser takes care of taking the ops we return and
4450   stitching them into a tree.
4451 
4452   Returns:
4453     The type of the next token
4454 
4455   Structure:
4456       Switch based on the current state:
4457 	  - if we already built the token before, use it
4458 	  - if we have a case modifier in a string, deal with that
4459 	  - handle other cases of interpolation inside a string
4460 	  - scan the next line if we are inside a format
4461       In the normal state switch on the next character:
4462 	  - default:
4463 	    if alphabetic, go to key lookup
4464 	    unrecoginized character - croak
4465 	  - 0/4/26: handle end-of-line or EOF
4466 	  - cases for whitespace
4467 	  - \n and #: handle comments and line numbers
4468 	  - various operators, brackets and sigils
4469 	  - numbers
4470 	  - quotes
4471 	  - 'v': vstrings (or go to key lookup)
4472 	  - 'x' repetition operator (or go to key lookup)
4473 	  - other ASCII alphanumerics (key lookup begins here):
4474 	      word before => ?
4475 	      keyword plugin
4476 	      scan built-in keyword (but do nothing with it yet)
4477 	      check for statement label
4478 	      check for lexical subs
4479 		  goto just_a_word if there is one
4480 	      see whether built-in keyword is overridden
4481 	      switch on keyword number:
4482 		  - default: just_a_word:
4483 		      not a built-in keyword; handle bareword lookup
4484 		      disambiguate between method and sub call
4485 		      fall back to bareword
4486 		  - cases for built-in keywords
4487 */
4488 
4489 
4490 int
4491 Perl_yylex(pTHX)
4492 {
4493     dVAR;
4494     char *s = PL_bufptr;
4495     char *d;
4496     STRLEN len;
4497     bool bof = FALSE;
4498     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4499     U8 formbrack = 0;
4500     U32 fake_eof = 0;
4501 
4502     /* orig_keyword, gvp, and gv are initialized here because
4503      * jump to the label just_a_word_zero can bypass their
4504      * initialization later. */
4505     I32 orig_keyword = 0;
4506     GV *gv = NULL;
4507     GV **gvp = NULL;
4508 
4509     DEBUG_T( {
4510 	SV* tmp = newSVpvs("");
4511 	PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4512 	    (IV)CopLINE(PL_curcop),
4513 	    lex_state_names[PL_lex_state],
4514 	    exp_name[PL_expect],
4515 	    pv_display(tmp, s, strlen(s), 0, 60));
4516 	SvREFCNT_dec(tmp);
4517     } );
4518 
4519     /* when we've already built the next token, just pull it out of the queue */
4520     if (PL_nexttoke) {
4521 	PL_nexttoke--;
4522 	pl_yylval = PL_nextval[PL_nexttoke];
4523 	if (!PL_nexttoke) {
4524 	    PL_lex_state = PL_lex_defer;
4525 	    PL_lex_defer = LEX_NORMAL;
4526 	}
4527 	{
4528 	    I32 next_type;
4529 	    next_type = PL_nexttype[PL_nexttoke];
4530 	    if (next_type & (7<<24)) {
4531 		if (next_type & (1<<24)) {
4532 		    if (PL_lex_brackets > 100)
4533 			Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4534 		    PL_lex_brackstack[PL_lex_brackets++] =
4535 			(char) ((next_type >> 16) & 0xff);
4536 		}
4537 		if (next_type & (2<<24))
4538 		    PL_lex_allbrackets++;
4539 		if (next_type & (4<<24))
4540 		    PL_lex_allbrackets--;
4541 		next_type &= 0xffff;
4542 	    }
4543 	    return REPORT(next_type == 'p' ? pending_ident() : next_type);
4544 	}
4545     }
4546 
4547     switch (PL_lex_state) {
4548     case LEX_NORMAL:
4549     case LEX_INTERPNORMAL:
4550 	break;
4551 
4552     /* interpolated case modifiers like \L \U, including \Q and \E.
4553        when we get here, PL_bufptr is at the \
4554     */
4555     case LEX_INTERPCASEMOD:
4556 #ifdef DEBUGGING
4557 	if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4558 	    Perl_croak(aTHX_
4559 		       "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4560 		       PL_bufptr, PL_bufend, *PL_bufptr);
4561 #endif
4562 	/* handle \E or end of string */
4563        	if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4564 	    /* if at a \E */
4565 	    if (PL_lex_casemods) {
4566 		const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4567 		PL_lex_casestack[PL_lex_casemods] = '\0';
4568 
4569 		if (PL_bufptr != PL_bufend
4570 		    && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4571                         || oldmod == 'F')) {
4572 		    PL_bufptr += 2;
4573 		    PL_lex_state = LEX_INTERPCONCAT;
4574 		}
4575 		PL_lex_allbrackets--;
4576 		return REPORT(')');
4577 	    }
4578             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4579                /* Got an unpaired \E */
4580                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4581                         "Useless use of \\E");
4582             }
4583 	    if (PL_bufptr != PL_bufend)
4584 		PL_bufptr += 2;
4585 	    PL_lex_state = LEX_INTERPCONCAT;
4586 	    return yylex();
4587 	}
4588 	else {
4589 	    DEBUG_T({ PerlIO_printf(Perl_debug_log,
4590               "### Saw case modifier\n"); });
4591 	    s = PL_bufptr + 1;
4592 	    if (s[1] == '\\' && s[2] == 'E') {
4593 	        PL_bufptr = s + 3;
4594 		PL_lex_state = LEX_INTERPCONCAT;
4595 		return yylex();
4596 	    }
4597 	    else {
4598 		I32 tmp;
4599                 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4600                     tmp = *s, *s = s[2], s[2] = (char)tmp;	/* misordered... */
4601 		if ((*s == 'L' || *s == 'U' || *s == 'F')
4602                     && (strchr(PL_lex_casestack, 'L')
4603                         || strchr(PL_lex_casestack, 'U')
4604                         || strchr(PL_lex_casestack, 'F')))
4605                 {
4606 		    PL_lex_casestack[--PL_lex_casemods] = '\0';
4607 		    PL_lex_allbrackets--;
4608 		    return REPORT(')');
4609 		}
4610 		if (PL_lex_casemods > 10)
4611 		    Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4612 		PL_lex_casestack[PL_lex_casemods++] = *s;
4613 		PL_lex_casestack[PL_lex_casemods] = '\0';
4614 		PL_lex_state = LEX_INTERPCONCAT;
4615 		NEXTVAL_NEXTTOKE.ival = 0;
4616 		force_next((2<<24)|'(');
4617 		if (*s == 'l')
4618 		    NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4619 		else if (*s == 'u')
4620 		    NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4621 		else if (*s == 'L')
4622 		    NEXTVAL_NEXTTOKE.ival = OP_LC;
4623 		else if (*s == 'U')
4624 		    NEXTVAL_NEXTTOKE.ival = OP_UC;
4625 		else if (*s == 'Q')
4626 		    NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4627                 else if (*s == 'F')
4628 		    NEXTVAL_NEXTTOKE.ival = OP_FC;
4629 		else
4630 		    Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4631 		PL_bufptr = s + 1;
4632 	    }
4633 	    force_next(FUNC);
4634 	    if (PL_lex_starts) {
4635 		s = PL_bufptr;
4636 		PL_lex_starts = 0;
4637 		/* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4638 		if (PL_lex_casemods == 1 && PL_lex_inpat)
4639 		    TOKEN(',');
4640 		else
4641 		    AopNOASSIGN(OP_CONCAT);
4642 	    }
4643 	    else
4644 		return yylex();
4645 	}
4646 
4647     case LEX_INTERPPUSH:
4648         return REPORT(sublex_push());
4649 
4650     case LEX_INTERPSTART:
4651 	if (PL_bufptr == PL_bufend)
4652 	    return REPORT(sublex_done());
4653 	DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4654               "### Interpolated variable\n"); });
4655 	PL_expect = XTERM;
4656         /* for /@a/, we leave the joining for the regex engine to do
4657          * (unless we're within \Q etc) */
4658 	PL_lex_dojoin = (*PL_bufptr == '@'
4659                             && (!PL_lex_inpat || PL_lex_casemods));
4660 	PL_lex_state = LEX_INTERPNORMAL;
4661 	if (PL_lex_dojoin) {
4662 	    NEXTVAL_NEXTTOKE.ival = 0;
4663 	    force_next(',');
4664 	    force_ident("\"", '$');
4665 	    NEXTVAL_NEXTTOKE.ival = 0;
4666 	    force_next('$');
4667 	    NEXTVAL_NEXTTOKE.ival = 0;
4668 	    force_next((2<<24)|'(');
4669 	    NEXTVAL_NEXTTOKE.ival = OP_JOIN;	/* emulate join($", ...) */
4670 	    force_next(FUNC);
4671 	}
4672 	/* Convert (?{...}) and friends to 'do {...}' */
4673 	if (PL_lex_inpat && *PL_bufptr == '(') {
4674 	    PL_parser->lex_shared->re_eval_start = PL_bufptr;
4675 	    PL_bufptr += 2;
4676 	    if (*PL_bufptr != '{')
4677 		PL_bufptr++;
4678 	    PL_expect = XTERMBLOCK;
4679 	    force_next(DO);
4680 	}
4681 
4682 	if (PL_lex_starts++) {
4683 	    s = PL_bufptr;
4684 	    /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4685 	    if (!PL_lex_casemods && PL_lex_inpat)
4686 		TOKEN(',');
4687 	    else
4688 		AopNOASSIGN(OP_CONCAT);
4689 	}
4690 	return yylex();
4691 
4692     case LEX_INTERPENDMAYBE:
4693 	if (intuit_more(PL_bufptr)) {
4694 	    PL_lex_state = LEX_INTERPNORMAL;	/* false alarm, more expr */
4695 	    break;
4696 	}
4697 	/* FALLTHROUGH */
4698 
4699     case LEX_INTERPEND:
4700 	/* Treat state as LEX_NORMAL if we have no inner lexing scope.
4701 	   XXX This hack can be removed if we stop setting PL_lex_state to
4702 	   LEX_KNOWNEXT, as can the hack under LEX_INTREPCONCAT below.  */
4703 	if (UNLIKELY(!PL_lex_inwhat)) {
4704 	    PL_lex_state = LEX_NORMAL;
4705 	    break;
4706 	}
4707 
4708 	if (PL_lex_dojoin) {
4709 	    const U8 dojoin_was = PL_lex_dojoin;
4710 	    PL_lex_dojoin = FALSE;
4711 	    PL_lex_state = LEX_INTERPCONCAT;
4712 	    PL_lex_allbrackets--;
4713 	    return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
4714 	}
4715 	if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4716 	    && SvEVALED(PL_lex_repl))
4717 	{
4718 	    if (PL_bufptr != PL_bufend)
4719 		Perl_croak(aTHX_ "Bad evalled substitution pattern");
4720 	    PL_lex_repl = NULL;
4721 	}
4722 	/* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
4723 	   re_eval_str.  If the here-doc body’s length equals the previous
4724 	   value of re_eval_start, re_eval_start will now be null.  So
4725 	   check re_eval_str as well. */
4726 	if (PL_parser->lex_shared->re_eval_start
4727 	 || PL_parser->lex_shared->re_eval_str) {
4728 	    SV *sv;
4729 	    if (*PL_bufptr != ')')
4730 		Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4731 	    PL_bufptr++;
4732 	    /* having compiled a (?{..}) expression, return the original
4733 	     * text too, as a const */
4734 	    if (PL_parser->lex_shared->re_eval_str) {
4735 		sv = PL_parser->lex_shared->re_eval_str;
4736 		PL_parser->lex_shared->re_eval_str = NULL;
4737 		SvCUR_set(sv,
4738 			 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4739 		SvPV_shrink_to_cur(sv);
4740 	    }
4741 	    else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4742 			 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4743 	    NEXTVAL_NEXTTOKE.opval =
4744 		    (OP*)newSVOP(OP_CONST, 0,
4745 				 sv);
4746 	    force_next(THING);
4747 	    PL_parser->lex_shared->re_eval_start = NULL;
4748 	    PL_expect = XTERM;
4749 	    return REPORT(',');
4750 	}
4751 
4752 	/* FALLTHROUGH */
4753     case LEX_INTERPCONCAT:
4754 #ifdef DEBUGGING
4755 	if (PL_lex_brackets)
4756 	    Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4757 		       (long) PL_lex_brackets);
4758 #endif
4759 	/* Treat state as LEX_NORMAL when not in an inner lexing scope.
4760 	   XXX This hack can be removed if we stop setting PL_lex_state to
4761 	   LEX_KNOWNEXT.  */
4762 	if (UNLIKELY(!PL_lex_inwhat)) {
4763 	    PL_lex_state = LEX_NORMAL;
4764 	    break;
4765 	}
4766 
4767 	if (PL_bufptr == PL_bufend)
4768 	    return REPORT(sublex_done());
4769 
4770 	/* m'foo' still needs to be parsed for possible (?{...}) */
4771 	if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4772 	    SV *sv = newSVsv(PL_linestr);
4773 	    sv = tokeq(sv);
4774 	    pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4775 	    s = PL_bufend;
4776 	}
4777 	else {
4778 	    s = scan_const(PL_bufptr);
4779 	    if (*s == '\\')
4780 		PL_lex_state = LEX_INTERPCASEMOD;
4781 	    else
4782 		PL_lex_state = LEX_INTERPSTART;
4783 	}
4784 
4785 	if (s != PL_bufptr) {
4786 	    NEXTVAL_NEXTTOKE = pl_yylval;
4787 	    PL_expect = XTERM;
4788 	    force_next(THING);
4789 	    if (PL_lex_starts++) {
4790 		/* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4791 		if (!PL_lex_casemods && PL_lex_inpat)
4792 		    TOKEN(',');
4793 		else
4794 		    AopNOASSIGN(OP_CONCAT);
4795 	    }
4796 	    else {
4797 		PL_bufptr = s;
4798 		return yylex();
4799 	    }
4800 	}
4801 
4802 	return yylex();
4803     case LEX_FORMLINE:
4804 	s = scan_formline(PL_bufptr);
4805 	if (!PL_lex_formbrack)
4806 	{
4807 	    formbrack = 1;
4808 	    goto rightbracket;
4809 	}
4810 	PL_bufptr = s;
4811 	return yylex();
4812     }
4813 
4814     /* We really do *not* want PL_linestr ever becoming a COW. */
4815     assert (!SvIsCOW(PL_linestr));
4816     s = PL_bufptr;
4817     PL_oldoldbufptr = PL_oldbufptr;
4818     PL_oldbufptr = s;
4819     PL_parser->saw_infix_sigil = 0;
4820 
4821   retry:
4822     switch (*s) {
4823     default:
4824 	if (UTF) {
4825             if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
4826                 ENTER;
4827                 SAVESPTR(PL_warnhook);
4828                 PL_warnhook = PERL_WARNHOOK_FATAL;
4829                 utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
4830                 LEAVE;
4831             }
4832             if (isIDFIRST_utf8((U8*)s)) {
4833                 goto keylookup;
4834             }
4835         }
4836         else if (isALNUMC(*s)) {
4837 	    goto keylookup;
4838 	}
4839     {
4840         SV *dsv = newSVpvs_flags("", SVs_TEMP);
4841         const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
4842                                                     UTF8SKIP(s),
4843                                                     SVs_TEMP | SVf_UTF8),
4844                                             10, UNI_DISPLAY_ISPRINT)
4845                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4846         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4847         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4848             d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4849         } else {
4850             d = PL_linestart;
4851         }
4852         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
4853                           UTF8fARG(UTF, (s - d), d),
4854                          (int) len + 1);
4855     }
4856     case 4:
4857     case 26:
4858 	goto fake_eof;			/* emulate EOF on ^D or ^Z */
4859     case 0:
4860 	if ((!PL_rsfp || PL_lex_inwhat)
4861 	 && (!PL_parser->filtered || s+1 < PL_bufend)) {
4862 	    PL_last_uni = 0;
4863 	    PL_last_lop = 0;
4864 	    if (PL_lex_brackets
4865                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
4866             {
4867 		yyerror((const char *)
4868 			(PL_lex_formbrack
4869 			 ? "Format not terminated"
4870 			 : "Missing right curly or square bracket"));
4871 	    }
4872             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4873                         "### Tokener got EOF\n");
4874             } );
4875 	    TOKEN(0);
4876 	}
4877 	if (s++ < PL_bufend)
4878 	    goto retry;			/* ignore stray nulls */
4879 	PL_last_uni = 0;
4880 	PL_last_lop = 0;
4881 	if (!PL_in_eval && !PL_preambled) {
4882 	    PL_preambled = TRUE;
4883 	    if (PL_perldb) {
4884 		/* Generate a string of Perl code to load the debugger.
4885 		 * If PERL5DB is set, it will return the contents of that,
4886 		 * otherwise a compile-time require of perl5db.pl.  */
4887 
4888 		const char * const pdb = PerlEnv_getenv("PERL5DB");
4889 
4890 		if (pdb) {
4891 		    sv_setpv(PL_linestr, pdb);
4892 		    sv_catpvs(PL_linestr,";");
4893 		} else {
4894 		    SETERRNO(0,SS_NORMAL);
4895 		    sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4896 		}
4897 		PL_parser->preambling = CopLINE(PL_curcop);
4898 	    } else
4899 		sv_setpvs(PL_linestr,"");
4900 	    if (PL_preambleav) {
4901 		SV **svp = AvARRAY(PL_preambleav);
4902 		SV **const end = svp + AvFILLp(PL_preambleav);
4903 		while(svp <= end) {
4904 		    sv_catsv(PL_linestr, *svp);
4905 		    ++svp;
4906 		    sv_catpvs(PL_linestr, ";");
4907 		}
4908 		sv_free(MUTABLE_SV(PL_preambleav));
4909 		PL_preambleav = NULL;
4910 	    }
4911 	    if (PL_minus_E)
4912 		sv_catpvs(PL_linestr,
4913 			  "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4914 	    if (PL_minus_n || PL_minus_p) {
4915 		sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4916 		if (PL_minus_l)
4917 		    sv_catpvs(PL_linestr,"chomp;");
4918 		if (PL_minus_a) {
4919 		    if (PL_minus_F) {
4920 			if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4921 			     || *PL_splitstr == '"')
4922 			      && strchr(PL_splitstr + 1, *PL_splitstr))
4923 			    Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4924 			else {
4925 			    /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4926 			       bytes can be used as quoting characters.  :-) */
4927 			    const char *splits = PL_splitstr;
4928 			    sv_catpvs(PL_linestr, "our @F=split(q\0");
4929 			    do {
4930 				/* Need to \ \s  */
4931 				if (*splits == '\\')
4932 				    sv_catpvn(PL_linestr, splits, 1);
4933 				sv_catpvn(PL_linestr, splits, 1);
4934 			    } while (*splits++);
4935 			    /* This loop will embed the trailing NUL of
4936 			       PL_linestr as the last thing it does before
4937 			       terminating.  */
4938 			    sv_catpvs(PL_linestr, ");");
4939 			}
4940 		    }
4941 		    else
4942 		        sv_catpvs(PL_linestr,"our @F=split(' ');");
4943 		}
4944 	    }
4945 	    sv_catpvs(PL_linestr, "\n");
4946 	    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4947 	    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4948 	    PL_last_lop = PL_last_uni = NULL;
4949 	    if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
4950 		update_debugger_info(PL_linestr, NULL, 0);
4951 	    goto retry;
4952 	}
4953 	do {
4954 	    fake_eof = 0;
4955 	    bof = PL_rsfp ? TRUE : FALSE;
4956 	    if (0) {
4957 	      fake_eof:
4958 		fake_eof = LEX_FAKE_EOF;
4959 	    }
4960 	    PL_bufptr = PL_bufend;
4961 	    COPLINE_INC_WITH_HERELINES;
4962 	    if (!lex_next_chunk(fake_eof)) {
4963 		CopLINE_dec(PL_curcop);
4964 		s = PL_bufptr;
4965 		TOKEN(';');	/* not infinite loop because rsfp is NULL now */
4966 	    }
4967 	    CopLINE_dec(PL_curcop);
4968 	    s = PL_bufptr;
4969 	    /* If it looks like the start of a BOM or raw UTF-16,
4970 	     * check if it in fact is. */
4971 	    if (bof && PL_rsfp
4972                 && (*s == 0
4973                     || *(U8*)s == BOM_UTF8_FIRST_BYTE
4974                         || *(U8*)s >= 0xFE
4975                         || s[1] == 0))
4976             {
4977 		Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4978 		bof = (offset == (Off_t)SvCUR(PL_linestr));
4979 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4980 		/* offset may include swallowed CR */
4981 		if (!bof)
4982 		    bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4983 #endif
4984 		if (bof) {
4985 		    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4986 		    s = swallow_bom((U8*)s);
4987 		}
4988 	    }
4989 	    if (PL_parser->in_pod) {
4990 		/* Incest with pod. */
4991 		if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4992 		    sv_setpvs(PL_linestr, "");
4993 		    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4994 		    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4995 		    PL_last_lop = PL_last_uni = NULL;
4996 		    PL_parser->in_pod = 0;
4997 		}
4998 	    }
4999 	    if (PL_rsfp || PL_parser->filtered)
5000 		incline(s);
5001 	} while (PL_parser->in_pod);
5002 	PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5003 	PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5004 	PL_last_lop = PL_last_uni = NULL;
5005 	if (CopLINE(PL_curcop) == 1) {
5006 	    while (s < PL_bufend && isSPACE(*s))
5007 		s++;
5008 	    if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5009 		s++;
5010 	    d = NULL;
5011 	    if (!PL_in_eval) {
5012 		if (*s == '#' && *(s+1) == '!')
5013 		    d = s + 2;
5014 #ifdef ALTERNATE_SHEBANG
5015 		else {
5016 		    static char const as[] = ALTERNATE_SHEBANG;
5017 		    if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5018 			d = s + (sizeof(as) - 1);
5019 		}
5020 #endif /* ALTERNATE_SHEBANG */
5021 	    }
5022 	    if (d) {
5023 		char *ipath;
5024 		char *ipathend;
5025 
5026 		while (isSPACE(*d))
5027 		    d++;
5028 		ipath = d;
5029 		while (*d && !isSPACE(*d))
5030 		    d++;
5031 		ipathend = d;
5032 
5033 #ifdef ARG_ZERO_IS_SCRIPT
5034 		if (ipathend > ipath) {
5035 		    /*
5036 		     * HP-UX (at least) sets argv[0] to the script name,
5037 		     * which makes $^X incorrect.  And Digital UNIX and Linux,
5038 		     * at least, set argv[0] to the basename of the Perl
5039 		     * interpreter. So, having found "#!", we'll set it right.
5040 		     */
5041                     SV* copfilesv = CopFILESV(PL_curcop);
5042                     if (copfilesv) {
5043                         SV * const x =
5044                             GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5045                                              SVt_PV)); /* $^X */
5046                         assert(SvPOK(x) || SvGMAGICAL(x));
5047                         if (sv_eq(x, copfilesv)) {
5048                             sv_setpvn(x, ipath, ipathend - ipath);
5049                             SvSETMAGIC(x);
5050                         }
5051                         else {
5052                             STRLEN blen;
5053                             STRLEN llen;
5054                             const char *bstart = SvPV_const(copfilesv, blen);
5055                             const char * const lstart = SvPV_const(x, llen);
5056                             if (llen < blen) {
5057                                 bstart += blen - llen;
5058                                 if (strnEQ(bstart, lstart, llen) &&	bstart[-1] == '/') {
5059                                     sv_setpvn(x, ipath, ipathend - ipath);
5060                                     SvSETMAGIC(x);
5061                                 }
5062                             }
5063 			}
5064                     }
5065                     else {
5066                         /* Anything to do if no copfilesv? */
5067 		    }
5068 		    TAINT_NOT;	/* $^X is always tainted, but that's OK */
5069 		}
5070 #endif /* ARG_ZERO_IS_SCRIPT */
5071 
5072 		/*
5073 		 * Look for options.
5074 		 */
5075 		d = instr(s,"perl -");
5076 		if (!d) {
5077 		    d = instr(s,"perl");
5078 #if defined(DOSISH)
5079 		    /* avoid getting into infinite loops when shebang
5080 		     * line contains "Perl" rather than "perl" */
5081 		    if (!d) {
5082 			for (d = ipathend-4; d >= ipath; --d) {
5083 			    if (isALPHA_FOLD_EQ(*d, 'p')
5084 				&& !ibcmp(d, "perl", 4))
5085 			    {
5086 				break;
5087 			    }
5088 			}
5089 			if (d < ipath)
5090 			    d = NULL;
5091 		    }
5092 #endif
5093 		}
5094 #ifdef ALTERNATE_SHEBANG
5095 		/*
5096 		 * If the ALTERNATE_SHEBANG on this system starts with a
5097 		 * character that can be part of a Perl expression, then if
5098 		 * we see it but not "perl", we're probably looking at the
5099 		 * start of Perl code, not a request to hand off to some
5100 		 * other interpreter.  Similarly, if "perl" is there, but
5101 		 * not in the first 'word' of the line, we assume the line
5102 		 * contains the start of the Perl program.
5103 		 */
5104 		if (d && *s != '#') {
5105 		    const char *c = ipath;
5106 		    while (*c && !strchr("; \t\r\n\f\v#", *c))
5107 			c++;
5108 		    if (c < d)
5109 			d = NULL;	/* "perl" not in first word; ignore */
5110 		    else
5111 			*s = '#';	/* Don't try to parse shebang line */
5112 		}
5113 #endif /* ALTERNATE_SHEBANG */
5114 		if (!d
5115                     && *s == '#'
5116                     && ipathend > ipath
5117                     && !PL_minus_c
5118                     && !instr(s,"indir")
5119                     && instr(PL_origargv[0],"perl"))
5120 		{
5121 		    dVAR;
5122 		    char **newargv;
5123 
5124 		    *ipathend = '\0';
5125 		    s = ipathend + 1;
5126 		    while (s < PL_bufend && isSPACE(*s))
5127 			s++;
5128 		    if (s < PL_bufend) {
5129 			Newx(newargv,PL_origargc+3,char*);
5130 			newargv[1] = s;
5131 			while (s < PL_bufend && !isSPACE(*s))
5132 			    s++;
5133 			*s = '\0';
5134 			Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5135 		    }
5136 		    else
5137 			newargv = PL_origargv;
5138 		    newargv[0] = ipath;
5139 		    PERL_FPU_PRE_EXEC
5140 		    PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5141 		    PERL_FPU_POST_EXEC
5142 		    Perl_croak(aTHX_ "Can't exec %s", ipath);
5143 		}
5144 		if (d) {
5145 		    while (*d && !isSPACE(*d))
5146 			d++;
5147 		    while (SPACE_OR_TAB(*d))
5148 			d++;
5149 
5150 		    if (*d++ == '-') {
5151 			const bool switches_done = PL_doswitches;
5152 			const U32 oldpdb = PL_perldb;
5153 			const bool oldn = PL_minus_n;
5154 			const bool oldp = PL_minus_p;
5155 			const char *d1 = d;
5156 
5157 			do {
5158 			    bool baduni = FALSE;
5159 			    if (*d1 == 'C') {
5160 				const char *d2 = d1 + 1;
5161 				if (parse_unicode_opts((const char **)&d2)
5162 				    != PL_unicode)
5163 				    baduni = TRUE;
5164 			    }
5165 			    if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
5166 				const char * const m = d1;
5167 				while (*d1 && !isSPACE(*d1))
5168 				    d1++;
5169 				Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5170 				      (int)(d1 - m), m);
5171 			    }
5172 			    d1 = moreswitches(d1);
5173 			} while (d1);
5174 			if (PL_doswitches && !switches_done) {
5175 			    int argc = PL_origargc;
5176 			    char **argv = PL_origargv;
5177 			    do {
5178 				argc--,argv++;
5179 			    } while (argc && argv[0][0] == '-' && argv[0][1]);
5180 			    init_argv_symbols(argc,argv);
5181 			}
5182 			if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
5183                             || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5184 			      /* if we have already added "LINE: while (<>) {",
5185 			         we must not do it again */
5186 			{
5187 			    sv_setpvs(PL_linestr, "");
5188 			    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5189 			    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5190 			    PL_last_lop = PL_last_uni = NULL;
5191 			    PL_preambled = FALSE;
5192 			    if (PERLDB_LINE_OR_SAVESRC)
5193 				(void)gv_fetchfile(PL_origfilename);
5194 			    goto retry;
5195 			}
5196 		    }
5197 		}
5198 	    }
5199 	}
5200 	if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5201 	    PL_lex_state = LEX_FORMLINE;
5202 	    force_next(FORMRBRACK);
5203 	    TOKEN(';');
5204 	}
5205 	goto retry;
5206     case '\r':
5207 #ifdef PERL_STRICT_CR
5208 	Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5209 	Perl_croak(aTHX_
5210       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5211 #endif
5212     case ' ': case '\t': case '\f': case '\v':
5213 	s++;
5214 	goto retry;
5215     case '#':
5216     case '\n':
5217 	if (PL_lex_state != LEX_NORMAL
5218             || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
5219         {
5220             const bool in_comment = *s == '#';
5221 	    if (*s == '#' && s == PL_linestart && PL_in_eval
5222 	     && !PL_rsfp && !PL_parser->filtered) {
5223 		/* handle eval qq[#line 1 "foo"\n ...] */
5224 		CopLINE_dec(PL_curcop);
5225 		incline(s);
5226 	    }
5227             d = s;
5228             while (d < PL_bufend && *d != '\n')
5229                 d++;
5230             if (d < PL_bufend)
5231                 d++;
5232             else if (d > PL_bufend)
5233                 /* Found by Ilya: feed random input to Perl. */
5234                 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5235                            d, PL_bufend);
5236             s = d;
5237             if (in_comment && d == PL_bufend
5238                 && PL_lex_state == LEX_INTERPNORMAL
5239                 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5240                 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5241             else
5242                 incline(s);
5243 	    if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5244 		PL_lex_state = LEX_FORMLINE;
5245 		force_next(FORMRBRACK);
5246 		TOKEN(';');
5247 	    }
5248 	}
5249 	else {
5250             while (s < PL_bufend && *s != '\n')
5251                 s++;
5252             if (s < PL_bufend)
5253                 {
5254                     s++;
5255                     if (s < PL_bufend)
5256                         incline(s);
5257                 }
5258             else if (s > PL_bufend)
5259                 /* Found by Ilya: feed random input to Perl. */
5260                 Perl_croak(aTHX_ "panic: input overflow");
5261 	}
5262 	goto retry;
5263     case '-':
5264 	if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5265 	    I32 ftst = 0;
5266 	    char tmp;
5267 
5268 	    s++;
5269 	    PL_bufptr = s;
5270 	    tmp = *s++;
5271 
5272 	    while (s < PL_bufend && SPACE_OR_TAB(*s))
5273 		s++;
5274 
5275 	    if (strnEQ(s,"=>",2)) {
5276 		s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5277 		DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5278 		OPERATOR('-');		/* unary minus */
5279 	    }
5280 	    switch (tmp) {
5281 	    case 'r': ftst = OP_FTEREAD;	break;
5282 	    case 'w': ftst = OP_FTEWRITE;	break;
5283 	    case 'x': ftst = OP_FTEEXEC;	break;
5284 	    case 'o': ftst = OP_FTEOWNED;	break;
5285 	    case 'R': ftst = OP_FTRREAD;	break;
5286 	    case 'W': ftst = OP_FTRWRITE;	break;
5287 	    case 'X': ftst = OP_FTREXEC;	break;
5288 	    case 'O': ftst = OP_FTROWNED;	break;
5289 	    case 'e': ftst = OP_FTIS;		break;
5290 	    case 'z': ftst = OP_FTZERO;		break;
5291 	    case 's': ftst = OP_FTSIZE;		break;
5292 	    case 'f': ftst = OP_FTFILE;		break;
5293 	    case 'd': ftst = OP_FTDIR;		break;
5294 	    case 'l': ftst = OP_FTLINK;		break;
5295 	    case 'p': ftst = OP_FTPIPE;		break;
5296 	    case 'S': ftst = OP_FTSOCK;		break;
5297 	    case 'u': ftst = OP_FTSUID;		break;
5298 	    case 'g': ftst = OP_FTSGID;		break;
5299 	    case 'k': ftst = OP_FTSVTX;		break;
5300 	    case 'b': ftst = OP_FTBLK;		break;
5301 	    case 'c': ftst = OP_FTCHR;		break;
5302 	    case 't': ftst = OP_FTTTY;		break;
5303 	    case 'T': ftst = OP_FTTEXT;		break;
5304 	    case 'B': ftst = OP_FTBINARY;	break;
5305 	    case 'M': case 'A': case 'C':
5306 		gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5307 		switch (tmp) {
5308 		case 'M': ftst = OP_FTMTIME;	break;
5309 		case 'A': ftst = OP_FTATIME;	break;
5310 		case 'C': ftst = OP_FTCTIME;	break;
5311 		default:			break;
5312 		}
5313 		break;
5314 	    default:
5315 		break;
5316 	    }
5317 	    if (ftst) {
5318                 PL_last_uni = PL_oldbufptr;
5319 		PL_last_lop_op = (OPCODE)ftst;
5320 		DEBUG_T( { PerlIO_printf(Perl_debug_log,
5321                         "### Saw file test %c\n", (int)tmp);
5322 		} );
5323 		FTST(ftst);
5324 	    }
5325 	    else {
5326 		/* Assume it was a minus followed by a one-letter named
5327 		 * subroutine call (or a -bareword), then. */
5328 		DEBUG_T( { PerlIO_printf(Perl_debug_log,
5329 			"### '-%c' looked like a file test but was not\n",
5330 			(int) tmp);
5331 		} );
5332 		s = --PL_bufptr;
5333 	    }
5334 	}
5335 	{
5336 	    const char tmp = *s++;
5337 	    if (*s == tmp) {
5338 		s++;
5339 		if (PL_expect == XOPERATOR)
5340 		    TERM(POSTDEC);
5341 		else
5342 		    OPERATOR(PREDEC);
5343 	    }
5344 	    else if (*s == '>') {
5345 		s++;
5346 		s = skipspace(s);
5347 		if (((*s == '$' || *s == '&') && s[1] == '*')
5348 		  ||(*s == '$' && s[1] == '#' && s[2] == '*')
5349 		  ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5350 		  ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5351 		 )
5352 		{
5353 		    PL_expect = XPOSTDEREF;
5354 		    TOKEN(ARROW);
5355 		}
5356 		if (isIDFIRST_lazy_if(s,UTF)) {
5357 		    s = force_word(s,METHOD,FALSE,TRUE);
5358 		    TOKEN(ARROW);
5359 		}
5360 		else if (*s == '$')
5361 		    OPERATOR(ARROW);
5362 		else
5363 		    TERM(ARROW);
5364 	    }
5365 	    if (PL_expect == XOPERATOR) {
5366 		if (*s == '='
5367                     && !PL_lex_allbrackets
5368                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5369                 {
5370 		    s--;
5371 		    TOKEN(0);
5372 		}
5373 		Aop(OP_SUBTRACT);
5374 	    }
5375 	    else {
5376 		if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5377 		    check_uni();
5378 		OPERATOR('-');		/* unary minus */
5379 	    }
5380 	}
5381 
5382     case '+':
5383 	{
5384 	    const char tmp = *s++;
5385 	    if (*s == tmp) {
5386 		s++;
5387 		if (PL_expect == XOPERATOR)
5388 		    TERM(POSTINC);
5389 		else
5390 		    OPERATOR(PREINC);
5391 	    }
5392 	    if (PL_expect == XOPERATOR) {
5393 		if (*s == '='
5394                     && !PL_lex_allbrackets
5395                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5396                 {
5397 		    s--;
5398 		    TOKEN(0);
5399 		}
5400 		Aop(OP_ADD);
5401 	    }
5402 	    else {
5403 		if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5404 		    check_uni();
5405 		OPERATOR('+');
5406 	    }
5407 	}
5408 
5409     case '*':
5410 	if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5411 	if (PL_expect != XOPERATOR) {
5412 	    s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5413 	    PL_expect = XOPERATOR;
5414 	    force_ident(PL_tokenbuf, '*');
5415 	    if (!*PL_tokenbuf)
5416 		PREREF('*');
5417 	    TERM('*');
5418 	}
5419 	s++;
5420 	if (*s == '*') {
5421 	    s++;
5422 	    if (*s == '=' && !PL_lex_allbrackets
5423                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5424             {
5425 		s -= 2;
5426 		TOKEN(0);
5427 	    }
5428 	    PWop(OP_POW);
5429 	}
5430 	if (*s == '='
5431             && !PL_lex_allbrackets
5432             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5433         {
5434 	    s--;
5435 	    TOKEN(0);
5436 	}
5437 	PL_parser->saw_infix_sigil = 1;
5438 	Mop(OP_MULTIPLY);
5439 
5440     case '%':
5441     {
5442 	if (PL_expect == XOPERATOR) {
5443 	    if (s[1] == '='
5444                 && !PL_lex_allbrackets
5445                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5446             {
5447 		TOKEN(0);
5448             }
5449 	    ++s;
5450 	    PL_parser->saw_infix_sigil = 1;
5451 	    Mop(OP_MODULO);
5452 	}
5453 	else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5454 	PL_tokenbuf[0] = '%';
5455 	s = scan_ident(s, PL_tokenbuf + 1,
5456 		sizeof PL_tokenbuf - 1, FALSE);
5457 	pl_yylval.ival = 0;
5458 	if (!PL_tokenbuf[1]) {
5459 	    PREREF('%');
5460 	}
5461 	if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5462 	    if (*s == '[')
5463 		PL_tokenbuf[0] = '@';
5464 	}
5465 	PL_expect = XOPERATOR;
5466 	force_ident_maybe_lex('%');
5467 	TERM('%');
5468     }
5469     case '^':
5470 	d = s;
5471 	bof = FEATURE_BITWISE_IS_ENABLED;
5472 	if (bof && s[1] == '.')
5473 	    s++;
5474 	if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5475 		(s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5476 	{
5477 	    s = d;
5478 	    TOKEN(0);
5479 	}
5480 	s++;
5481 	BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5482     case '[':
5483 	if (PL_lex_brackets > 100)
5484 	    Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5485 	PL_lex_brackstack[PL_lex_brackets++] = 0;
5486 	PL_lex_allbrackets++;
5487 	{
5488 	    const char tmp = *s++;
5489 	    OPERATOR(tmp);
5490 	}
5491     case '~':
5492 	if (s[1] == '~'
5493 	    && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5494 	{
5495 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5496 		TOKEN(0);
5497 	    s += 2;
5498             Perl_ck_warner_d(aTHX_
5499                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5500                 "Smartmatch is experimental");
5501 	    Eop(OP_SMARTMATCH);
5502 	}
5503 	s++;
5504 	if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5505 	    s++;
5506 	    BCop(OP_SCOMPLEMENT);
5507 	}
5508 	BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5509     case ',':
5510 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5511 	    TOKEN(0);
5512 	s++;
5513 	OPERATOR(',');
5514     case ':':
5515 	if (s[1] == ':') {
5516 	    len = 0;
5517 	    goto just_a_word_zero_gv;
5518 	}
5519 	s++;
5520         {
5521         OP *attrs;
5522 
5523 	switch (PL_expect) {
5524 	case XOPERATOR:
5525 	    if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5526 		break;
5527 	    PL_bufptr = s;	/* update in case we back off */
5528 	    if (*s == '=') {
5529 		Perl_croak(aTHX_
5530 			   "Use of := for an empty attribute list is not allowed");
5531 	    }
5532 	    goto grabattrs;
5533 	case XATTRBLOCK:
5534 	    PL_expect = XBLOCK;
5535 	    goto grabattrs;
5536 	case XATTRTERM:
5537 	    PL_expect = XTERMBLOCK;
5538 	 grabattrs:
5539 	    s = skipspace(s);
5540 	    attrs = NULL;
5541 	    while (isIDFIRST_lazy_if(s,UTF)) {
5542 		I32 tmp;
5543 		SV *sv;
5544 		d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5545 		if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5546 		    if (tmp < 0) tmp = -tmp;
5547 		    switch (tmp) {
5548 		    case KEY_or:
5549 		    case KEY_and:
5550 		    case KEY_for:
5551 		    case KEY_foreach:
5552 		    case KEY_unless:
5553 		    case KEY_if:
5554 		    case KEY_while:
5555 		    case KEY_until:
5556 			goto got_attrs;
5557 		    default:
5558 			break;
5559 		    }
5560 		}
5561 		sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5562 		if (*d == '(') {
5563 		    d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5564 		    COPLINE_SET_FROM_MULTI_END;
5565 		    if (!d) {
5566 			/* MUST advance bufptr here to avoid bogus
5567 			   "at end of line" context messages from yyerror().
5568 			 */
5569 			PL_bufptr = s + len;
5570 			yyerror("Unterminated attribute parameter in attribute list");
5571 			if (attrs)
5572 			    op_free(attrs);
5573 			sv_free(sv);
5574 			return REPORT(0);	/* EOF indicator */
5575 		    }
5576 		}
5577 		if (PL_lex_stuff) {
5578 		    sv_catsv(sv, PL_lex_stuff);
5579 		    attrs = op_append_elem(OP_LIST, attrs,
5580 					newSVOP(OP_CONST, 0, sv));
5581 		    SvREFCNT_dec_NN(PL_lex_stuff);
5582 		    PL_lex_stuff = NULL;
5583 		}
5584 		else {
5585 		    if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5586 			sv_free(sv);
5587 			if (PL_in_my == KEY_our) {
5588 			    deprecate(":unique");
5589 			}
5590 			else
5591 			    Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5592 		    }
5593 
5594 		    /* NOTE: any CV attrs applied here need to be part of
5595 		       the CVf_BUILTIN_ATTRS define in cv.h! */
5596 		    else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5597 			sv_free(sv);
5598 			CvLVALUE_on(PL_compcv);
5599 		    }
5600 		    else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5601 			sv_free(sv);
5602 			deprecate(":locked");
5603 		    }
5604 		    else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5605 			sv_free(sv);
5606 			CvMETHOD_on(PL_compcv);
5607 		    }
5608 		    else if (!PL_in_my && len == 5
5609 			  && strnEQ(SvPVX(sv), "const", len))
5610 		    {
5611 			sv_free(sv);
5612 			Perl_ck_warner_d(aTHX_
5613 			    packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5614 			   ":const is experimental"
5615 			);
5616 			CvANONCONST_on(PL_compcv);
5617 			if (!CvANON(PL_compcv))
5618 			    yyerror(":const is not permitted on named "
5619 				    "subroutines");
5620 		    }
5621 		    /* After we've set the flags, it could be argued that
5622 		       we don't need to do the attributes.pm-based setting
5623 		       process, and shouldn't bother appending recognized
5624 		       flags.  To experiment with that, uncomment the
5625 		       following "else".  (Note that's already been
5626 		       uncommented.  That keeps the above-applied built-in
5627 		       attributes from being intercepted (and possibly
5628 		       rejected) by a package's attribute routines, but is
5629 		       justified by the performance win for the common case
5630 		       of applying only built-in attributes.) */
5631 		    else
5632 		        attrs = op_append_elem(OP_LIST, attrs,
5633 					    newSVOP(OP_CONST, 0,
5634 					      	    sv));
5635 		}
5636 		s = skipspace(d);
5637 		if (*s == ':' && s[1] != ':')
5638 		    s = skipspace(s+1);
5639 		else if (s == d)
5640 		    break;	/* require real whitespace or :'s */
5641 		/* XXX losing whitespace on sequential attributes here */
5642 	    }
5643 	    {
5644 		if (*s != ';'
5645                     && *s != '}'
5646                     && !(PL_expect == XOPERATOR
5647 			 ? (*s == '=' ||  *s == ')')
5648 			 : (*s == '{' ||  *s == '(')))
5649                 {
5650 		    const char q = ((*s == '\'') ? '"' : '\'');
5651 		    /* If here for an expression, and parsed no attrs, back
5652 		       off. */
5653 		    if (PL_expect == XOPERATOR && !attrs) {
5654 			s = PL_bufptr;
5655 			break;
5656 		    }
5657 		    /* MUST advance bufptr here to avoid bogus "at end of line"
5658 		       context messages from yyerror().
5659 		    */
5660 		    PL_bufptr = s;
5661 		    yyerror( (const char *)
5662 			     (*s
5663 			      ? Perl_form(aTHX_ "Invalid separator character "
5664 					  "%c%c%c in attribute list", q, *s, q)
5665 			      : "Unterminated attribute list" ) );
5666 		    if (attrs)
5667 			op_free(attrs);
5668 		    OPERATOR(':');
5669 		}
5670 	    }
5671 	got_attrs:
5672 	    if (attrs) {
5673 		NEXTVAL_NEXTTOKE.opval = attrs;
5674 		force_next(THING);
5675 	    }
5676 	    TOKEN(COLONATTR);
5677 	}
5678 	}
5679 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5680 	    s--;
5681 	    TOKEN(0);
5682 	}
5683 	PL_lex_allbrackets--;
5684 	OPERATOR(':');
5685     case '(':
5686 	s++;
5687 	if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5688 	    PL_oldbufptr = PL_oldoldbufptr;		/* allow print(STDOUT 123) */
5689 	else
5690 	    PL_expect = XTERM;
5691 	s = skipspace(s);
5692 	PL_lex_allbrackets++;
5693 	TOKEN('(');
5694     case ';':
5695 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5696 	    TOKEN(0);
5697 	CLINE;
5698 	s++;
5699 	PL_expect = XSTATE;
5700 	TOKEN(';');
5701     case ')':
5702 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5703 	    TOKEN(0);
5704 	s++;
5705 	PL_lex_allbrackets--;
5706 	s = skipspace(s);
5707 	if (*s == '{')
5708 	    PREBLOCK(')');
5709 	TERM(')');
5710     case ']':
5711 	if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5712 	    TOKEN(0);
5713 	s++;
5714 	if (PL_lex_brackets <= 0)
5715 	    /* diag_listed_as: Unmatched right %s bracket */
5716 	    yyerror("Unmatched right square bracket");
5717 	else
5718 	    --PL_lex_brackets;
5719 	PL_lex_allbrackets--;
5720 	if (PL_lex_state == LEX_INTERPNORMAL) {
5721 	    if (PL_lex_brackets == 0) {
5722 		if (*s == '-' && s[1] == '>')
5723 		    PL_lex_state = LEX_INTERPENDMAYBE;
5724 		else if (*s != '[' && *s != '{')
5725 		    PL_lex_state = LEX_INTERPEND;
5726 	    }
5727 	}
5728 	TERM(']');
5729     case '{':
5730 	s++;
5731       leftbracket:
5732 	if (PL_lex_brackets > 100) {
5733 	    Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5734 	}
5735 	switch (PL_expect) {
5736 	case XTERM:
5737 	case XTERMORDORDOR:
5738 	    PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5739 	    PL_lex_allbrackets++;
5740 	    OPERATOR(HASHBRACK);
5741 	case XOPERATOR:
5742 	    while (s < PL_bufend && SPACE_OR_TAB(*s))
5743 		s++;
5744 	    d = s;
5745 	    PL_tokenbuf[0] = '\0';
5746 	    if (d < PL_bufend && *d == '-') {
5747 		PL_tokenbuf[0] = '-';
5748 		d++;
5749 		while (d < PL_bufend && SPACE_OR_TAB(*d))
5750 		    d++;
5751 	    }
5752 	    if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5753 		d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5754 			      FALSE, &len);
5755 		while (d < PL_bufend && SPACE_OR_TAB(*d))
5756 		    d++;
5757 		if (*d == '}') {
5758 		    const char minus = (PL_tokenbuf[0] == '-');
5759 		    s = force_word(s + minus, WORD, FALSE, TRUE);
5760 		    if (minus)
5761 			force_next('-');
5762 		}
5763 	    }
5764 	    /* FALLTHROUGH */
5765 	case XATTRTERM:
5766 	case XTERMBLOCK:
5767 	    PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5768 	    PL_lex_allbrackets++;
5769 	    PL_expect = XSTATE;
5770 	    break;
5771 	case XATTRBLOCK:
5772 	case XBLOCK:
5773 	    PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5774 	    PL_lex_allbrackets++;
5775 	    PL_expect = XSTATE;
5776 	    break;
5777 	case XBLOCKTERM:
5778 	    PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5779 	    PL_lex_allbrackets++;
5780 	    PL_expect = XSTATE;
5781 	    break;
5782 	default: {
5783 		const char *t;
5784 		if (PL_oldoldbufptr == PL_last_lop)
5785 		    PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5786 		else
5787 		    PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5788 		PL_lex_allbrackets++;
5789 		s = skipspace(s);
5790 		if (*s == '}') {
5791 		    if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5792 			PL_expect = XTERM;
5793 			/* This hack is to get the ${} in the message. */
5794 			PL_bufptr = s+1;
5795 			yyerror("syntax error");
5796 			break;
5797 		    }
5798 		    OPERATOR(HASHBRACK);
5799 		}
5800 		if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
5801 		    /* ${...} or @{...} etc., but not print {...}
5802 		     * Skip the disambiguation and treat this as a block.
5803 		     */
5804 		    goto block_expectation;
5805 		}
5806 		/* This hack serves to disambiguate a pair of curlies
5807 		 * as being a block or an anon hash.  Normally, expectation
5808 		 * determines that, but in cases where we're not in a
5809 		 * position to expect anything in particular (like inside
5810 		 * eval"") we have to resolve the ambiguity.  This code
5811 		 * covers the case where the first term in the curlies is a
5812 		 * quoted string.  Most other cases need to be explicitly
5813 		 * disambiguated by prepending a "+" before the opening
5814 		 * curly in order to force resolution as an anon hash.
5815 		 *
5816 		 * XXX should probably propagate the outer expectation
5817 		 * into eval"" to rely less on this hack, but that could
5818 		 * potentially break current behavior of eval"".
5819 		 * GSAR 97-07-21
5820 		 */
5821 		t = s;
5822 		if (*s == '\'' || *s == '"' || *s == '`') {
5823 		    /* common case: get past first string, handling escapes */
5824 		    for (t++; t < PL_bufend && *t != *s;)
5825 			if (*t++ == '\\')
5826 			    t++;
5827 		    t++;
5828 		}
5829 		else if (*s == 'q') {
5830 		    if (++t < PL_bufend
5831 			&& (!isWORDCHAR(*t)
5832 			    || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5833 				&& !isWORDCHAR(*t))))
5834 		    {
5835 			/* skip q//-like construct */
5836 			const char *tmps;
5837 			char open, close, term;
5838 			I32 brackets = 1;
5839 
5840 			while (t < PL_bufend && isSPACE(*t))
5841 			    t++;
5842 			/* check for q => */
5843 			if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5844 			    OPERATOR(HASHBRACK);
5845 			}
5846 			term = *t;
5847 			open = term;
5848 			if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5849 			    term = tmps[5];
5850 			close = term;
5851 			if (open == close)
5852 			    for (t++; t < PL_bufend; t++) {
5853 				if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5854 				    t++;
5855 				else if (*t == open)
5856 				    break;
5857 			    }
5858 			else {
5859 			    for (t++; t < PL_bufend; t++) {
5860 				if (*t == '\\' && t+1 < PL_bufend)
5861 				    t++;
5862 				else if (*t == close && --brackets <= 0)
5863 				    break;
5864 				else if (*t == open)
5865 				    brackets++;
5866 			    }
5867 			}
5868 			t++;
5869 		    }
5870 		    else
5871 			/* skip plain q word */
5872 			while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5873 			    t += UTF ? UTF8SKIP(t) : 1;
5874 		}
5875 		else if (isWORDCHAR_lazy_if(t,UTF)) {
5876 		    t += UTF ? UTF8SKIP(t) : 1;
5877 		    while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5878 			t += UTF ? UTF8SKIP(t) : 1;
5879 		}
5880 		while (t < PL_bufend && isSPACE(*t))
5881 		    t++;
5882 		/* if comma follows first term, call it an anon hash */
5883 		/* XXX it could be a comma expression with loop modifiers */
5884 		if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5885 				   || (*t == '=' && t[1] == '>')))
5886 		    OPERATOR(HASHBRACK);
5887 		if (PL_expect == XREF)
5888 		{
5889 		  block_expectation:
5890 		    /* If there is an opening brace or 'sub:', treat it
5891 		       as a term to make ${{...}}{k} and &{sub:attr...}
5892 		       dwim.  Otherwise, treat it as a statement, so
5893 		       map {no strict; ...} works.
5894 		     */
5895 		    s = skipspace(s);
5896 		    if (*s == '{') {
5897 			PL_expect = XTERM;
5898 			break;
5899 		    }
5900 		    if (strnEQ(s, "sub", 3)) {
5901 			d = s + 3;
5902 			d = skipspace(d);
5903 			if (*d == ':') {
5904 			    PL_expect = XTERM;
5905 			    break;
5906 			}
5907 		    }
5908 		    PL_expect = XSTATE;
5909 		}
5910 		else {
5911 		    PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5912 		    PL_expect = XSTATE;
5913 		}
5914 	    }
5915 	    break;
5916 	}
5917 	pl_yylval.ival = CopLINE(PL_curcop);
5918 	PL_copline = NOLINE;   /* invalidate current command line number */
5919 	TOKEN(formbrack ? '=' : '{');
5920     case '}':
5921 	if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5922 	    TOKEN(0);
5923       rightbracket:
5924 	s++;
5925 	if (PL_lex_brackets <= 0)
5926 	    /* diag_listed_as: Unmatched right %s bracket */
5927 	    yyerror("Unmatched right curly bracket");
5928 	else
5929 	    PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5930 	PL_lex_allbrackets--;
5931 	if (PL_lex_state == LEX_INTERPNORMAL) {
5932 	    if (PL_lex_brackets == 0) {
5933 		if (PL_expect & XFAKEBRACK) {
5934 		    PL_expect &= XENUMMASK;
5935 		    PL_lex_state = LEX_INTERPEND;
5936 		    PL_bufptr = s;
5937 		    return yylex();	/* ignore fake brackets */
5938 		}
5939 		if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5940 		 && SvEVALED(PL_lex_repl))
5941 		    PL_lex_state = LEX_INTERPEND;
5942 		else if (*s == '-' && s[1] == '>')
5943 		    PL_lex_state = LEX_INTERPENDMAYBE;
5944 		else if (*s != '[' && *s != '{')
5945 		    PL_lex_state = LEX_INTERPEND;
5946 	    }
5947 	}
5948 	if (PL_expect & XFAKEBRACK) {
5949 	    PL_expect &= XENUMMASK;
5950 	    PL_bufptr = s;
5951 	    return yylex();		/* ignore fake brackets */
5952 	}
5953 	force_next(formbrack ? '.' : '}');
5954 	if (formbrack) LEAVE;
5955 	if (formbrack == 2) { /* means . where arguments were expected */
5956 	    force_next(';');
5957 	    TOKEN(FORMRBRACK);
5958 	}
5959 	TOKEN(';');
5960     case '&':
5961 	if (PL_expect == XPOSTDEREF) POSTDEREF('&');
5962 	s++;
5963 	if (*s++ == '&') {
5964 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5965 		    (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5966 		s -= 2;
5967 		TOKEN(0);
5968 	    }
5969 	    AOPERATOR(ANDAND);
5970 	}
5971 	s--;
5972 	if (PL_expect == XOPERATOR) {
5973 	    if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5974 		&& isIDFIRST_lazy_if(s,UTF))
5975 	    {
5976 		CopLINE_dec(PL_curcop);
5977 		Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5978 		CopLINE_inc(PL_curcop);
5979 	    }
5980 	    d = s;
5981 	    if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
5982 		s++;
5983 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5984 		    (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5985 		s = d;
5986 		s--;
5987 		TOKEN(0);
5988 	    }
5989 	    if (d == s) {
5990 		PL_parser->saw_infix_sigil = 1;
5991 		BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
5992 	    }
5993 	    else
5994 		BAop(OP_SBIT_AND);
5995 	}
5996 
5997 	PL_tokenbuf[0] = '&';
5998 	s = scan_ident(s - 1, PL_tokenbuf + 1,
5999 		       sizeof PL_tokenbuf - 1, TRUE);
6000 	pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6001 	if (PL_tokenbuf[1]) {
6002 	    force_ident_maybe_lex('&');
6003 	}
6004 	else
6005 	    PREREF('&');
6006 	TERM('&');
6007 
6008     case '|':
6009 	s++;
6010 	if (*s++ == '|') {
6011 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6012 		    (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6013 		s -= 2;
6014 		TOKEN(0);
6015 	    }
6016 	    AOPERATOR(OROR);
6017 	}
6018 	s--;
6019 	d = s;
6020 	if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6021 	    s++;
6022 	if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6023 		(*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6024 	    s = d - 1;
6025 	    TOKEN(0);
6026 	}
6027 	BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6028     case '=':
6029 	s++;
6030 	{
6031 	    const char tmp = *s++;
6032 	    if (tmp == '=') {
6033 		if (!PL_lex_allbrackets
6034                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6035                 {
6036 		    s -= 2;
6037 		    TOKEN(0);
6038 		}
6039 		Eop(OP_EQ);
6040 	    }
6041 	    if (tmp == '>') {
6042 		if (!PL_lex_allbrackets
6043                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
6044                 {
6045 		    s -= 2;
6046 		    TOKEN(0);
6047 		}
6048 		OPERATOR(',');
6049 	    }
6050 	    if (tmp == '~')
6051 		PMop(OP_MATCH);
6052 	    if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6053 		&& strchr("+-*/%.^&|<",tmp))
6054 		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6055 			    "Reversed %c= operator",(int)tmp);
6056 	    s--;
6057 	    if (PL_expect == XSTATE
6058                 && isALPHA(tmp)
6059                 && (s == PL_linestart+1 || s[-2] == '\n') )
6060             {
6061                 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6062                     || PL_lex_state != LEX_NORMAL) {
6063                     d = PL_bufend;
6064                     while (s < d) {
6065                         if (*s++ == '\n') {
6066                             incline(s);
6067                             if (strnEQ(s,"=cut",4)) {
6068                                 s = strchr(s,'\n');
6069                                 if (s)
6070                                     s++;
6071                                 else
6072                                     s = d;
6073                                 incline(s);
6074                                 goto retry;
6075                             }
6076                         }
6077                     }
6078                     goto retry;
6079                 }
6080                 s = PL_bufend;
6081                 PL_parser->in_pod = 1;
6082                 goto retry;
6083             }
6084 	}
6085 	if (PL_expect == XBLOCK) {
6086 	    const char *t = s;
6087 #ifdef PERL_STRICT_CR
6088 	    while (SPACE_OR_TAB(*t))
6089 #else
6090 	    while (SPACE_OR_TAB(*t) || *t == '\r')
6091 #endif
6092 		t++;
6093 	    if (*t == '\n' || *t == '#') {
6094 		formbrack = 1;
6095 		ENTER;
6096 		SAVEI8(PL_parser->form_lex_state);
6097 		SAVEI32(PL_lex_formbrack);
6098 		PL_parser->form_lex_state = PL_lex_state;
6099 		PL_lex_formbrack = PL_lex_brackets + 1;
6100 		goto leftbracket;
6101 	    }
6102 	}
6103 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6104 	    s--;
6105 	    TOKEN(0);
6106 	}
6107 	pl_yylval.ival = 0;
6108 	OPERATOR(ASSIGNOP);
6109     case '!':
6110 	s++;
6111 	{
6112 	    const char tmp = *s++;
6113 	    if (tmp == '=') {
6114 		/* was this !=~ where !~ was meant?
6115 		 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6116 
6117 		if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6118 		    const char *t = s+1;
6119 
6120 		    while (t < PL_bufend && isSPACE(*t))
6121 			++t;
6122 
6123 		    if (*t == '/' || *t == '?'
6124                         || ((*t == 'm' || *t == 's' || *t == 'y')
6125 			    && !isWORDCHAR(t[1]))
6126                         || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6127 			Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6128 				    "!=~ should be !~");
6129 		}
6130 		if (!PL_lex_allbrackets
6131                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6132                 {
6133 		    s -= 2;
6134 		    TOKEN(0);
6135 		}
6136 		Eop(OP_NE);
6137 	    }
6138 	    if (tmp == '~')
6139 		PMop(OP_NOT);
6140 	}
6141 	s--;
6142 	OPERATOR('!');
6143     case '<':
6144 	if (PL_expect != XOPERATOR) {
6145 	    if (s[1] != '<' && !strchr(s,'>'))
6146 		check_uni();
6147 	    if (s[1] == '<' && s[2] != '>')
6148 		s = scan_heredoc(s);
6149 	    else
6150 		s = scan_inputsymbol(s);
6151 	    PL_expect = XOPERATOR;
6152 	    TOKEN(sublex_start());
6153 	}
6154 	s++;
6155 	{
6156 	    char tmp = *s++;
6157 	    if (tmp == '<') {
6158 		if (*s == '=' && !PL_lex_allbrackets
6159                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6160                 {
6161 		    s -= 2;
6162 		    TOKEN(0);
6163 		}
6164 		SHop(OP_LEFT_SHIFT);
6165 	    }
6166 	    if (tmp == '=') {
6167 		tmp = *s++;
6168 		if (tmp == '>') {
6169 		    if (!PL_lex_allbrackets
6170                         && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6171                     {
6172 			s -= 3;
6173 			TOKEN(0);
6174 		    }
6175 		    Eop(OP_NCMP);
6176 		}
6177 		s--;
6178 		if (!PL_lex_allbrackets
6179                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6180                 {
6181 		    s -= 2;
6182 		    TOKEN(0);
6183 		}
6184 		Rop(OP_LE);
6185 	    }
6186 	}
6187 	s--;
6188 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6189 	    s--;
6190 	    TOKEN(0);
6191 	}
6192 	Rop(OP_LT);
6193     case '>':
6194 	s++;
6195 	{
6196 	    const char tmp = *s++;
6197 	    if (tmp == '>') {
6198 		if (*s == '=' && !PL_lex_allbrackets
6199                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6200                 {
6201 		    s -= 2;
6202 		    TOKEN(0);
6203 		}
6204 		SHop(OP_RIGHT_SHIFT);
6205 	    }
6206 	    else if (tmp == '=') {
6207 		if (!PL_lex_allbrackets
6208                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6209                 {
6210 		    s -= 2;
6211 		    TOKEN(0);
6212 		}
6213 		Rop(OP_GE);
6214 	    }
6215 	}
6216 	s--;
6217 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6218 	    s--;
6219 	    TOKEN(0);
6220 	}
6221 	Rop(OP_GT);
6222 
6223     case '$':
6224 	CLINE;
6225 
6226 	if (PL_expect == XOPERATOR) {
6227 	    if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6228 		return deprecate_commaless_var_list();
6229 	    }
6230 	}
6231 	else if (PL_expect == XPOSTDEREF) {
6232 	    if (s[1] == '#') {
6233 		s++;
6234 		POSTDEREF(DOLSHARP);
6235 	    }
6236 	    POSTDEREF('$');
6237 	}
6238 
6239 	if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6240 	    PL_tokenbuf[0] = '@';
6241 	    s = scan_ident(s + 1, PL_tokenbuf + 1,
6242 			   sizeof PL_tokenbuf - 1, FALSE);
6243             if (PL_expect == XOPERATOR) {
6244                 d = s;
6245                 if (PL_bufptr > s) {
6246                     d = PL_bufptr-1;
6247                     PL_bufptr = PL_oldbufptr;
6248                 }
6249 		no_op("Array length", d);
6250             }
6251 	    if (!PL_tokenbuf[1])
6252 		PREREF(DOLSHARP);
6253 	    PL_expect = XOPERATOR;
6254 	    force_ident_maybe_lex('#');
6255 	    TOKEN(DOLSHARP);
6256 	}
6257 
6258 	PL_tokenbuf[0] = '$';
6259 	s = scan_ident(s, PL_tokenbuf + 1,
6260 		       sizeof PL_tokenbuf - 1, FALSE);
6261 	if (PL_expect == XOPERATOR) {
6262 	    d = s;
6263 	    if (PL_bufptr > s) {
6264 		d = PL_bufptr-1;
6265 		PL_bufptr = PL_oldbufptr;
6266 	    }
6267 	    no_op("Scalar", d);
6268 	}
6269 	if (!PL_tokenbuf[1]) {
6270 	    if (s == PL_bufend)
6271 		yyerror("Final $ should be \\$ or $name");
6272 	    PREREF('$');
6273 	}
6274 
6275 	d = s;
6276 	{
6277 	    const char tmp = *s;
6278 	    if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6279 		s = skipspace(s);
6280 
6281 	    if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6282 		&& intuit_more(s)) {
6283 		if (*s == '[') {
6284 		    PL_tokenbuf[0] = '@';
6285 		    if (ckWARN(WARN_SYNTAX)) {
6286 			char *t = s+1;
6287 
6288 			while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6289 			    t += UTF ? UTF8SKIP(t) : 1;
6290 			if (*t++ == ',') {
6291 			    PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6292 			    while (t < PL_bufend && *t != ']')
6293 				t++;
6294 			    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6295 					"Multidimensional syntax %"UTF8f" not supported",
6296                                         UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
6297 			}
6298 		    }
6299 		}
6300 		else if (*s == '{') {
6301 		    char *t;
6302 		    PL_tokenbuf[0] = '%';
6303 		    if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6304 			&& (t = strchr(s, '}')) && (t = strchr(t, '=')))
6305 			{
6306 			    char tmpbuf[sizeof PL_tokenbuf];
6307 			    do {
6308 				t++;
6309 			    } while (isSPACE(*t));
6310 			    if (isIDFIRST_lazy_if(t,UTF)) {
6311 				STRLEN len;
6312 				t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6313 					      &len);
6314 				while (isSPACE(*t))
6315 				    t++;
6316 				if (*t == ';'
6317                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6318 				    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6319 					"You need to quote \"%"UTF8f"\"",
6320 					 UTF8fARG(UTF, len, tmpbuf));
6321 			    }
6322 			}
6323 		}
6324 	    }
6325 
6326 	    PL_expect = XOPERATOR;
6327 	    if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6328 		const bool islop = (PL_last_lop == PL_oldoldbufptr);
6329 		if (!islop || PL_last_lop_op == OP_GREPSTART)
6330 		    PL_expect = XOPERATOR;
6331 		else if (strchr("$@\"'`q", *s))
6332 		    PL_expect = XTERM;		/* e.g. print $fh "foo" */
6333 		else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6334 		    PL_expect = XTERM;		/* e.g. print $fh &sub */
6335 		else if (isIDFIRST_lazy_if(s,UTF)) {
6336 		    char tmpbuf[sizeof PL_tokenbuf];
6337 		    int t2;
6338 		    scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6339 		    if ((t2 = keyword(tmpbuf, len, 0))) {
6340 			/* binary operators exclude handle interpretations */
6341 			switch (t2) {
6342 			case -KEY_x:
6343 			case -KEY_eq:
6344 			case -KEY_ne:
6345 			case -KEY_gt:
6346 			case -KEY_lt:
6347 			case -KEY_ge:
6348 			case -KEY_le:
6349 			case -KEY_cmp:
6350 			    break;
6351 			default:
6352 			    PL_expect = XTERM;	/* e.g. print $fh length() */
6353 			    break;
6354 			}
6355 		    }
6356 		    else {
6357 			PL_expect = XTERM;	/* e.g. print $fh subr() */
6358 		    }
6359 		}
6360 		else if (isDIGIT(*s))
6361 		    PL_expect = XTERM;		/* e.g. print $fh 3 */
6362 		else if (*s == '.' && isDIGIT(s[1]))
6363 		    PL_expect = XTERM;		/* e.g. print $fh .3 */
6364 		else if ((*s == '?' || *s == '-' || *s == '+')
6365 			 && !isSPACE(s[1]) && s[1] != '=')
6366 		    PL_expect = XTERM;		/* e.g. print $fh -1 */
6367 		else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6368 			 && s[1] != '/')
6369 		    PL_expect = XTERM;		/* e.g. print $fh /.../
6370 						   XXX except DORDOR operator
6371 						*/
6372 		else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6373 			 && s[2] != '=')
6374 		    PL_expect = XTERM;		/* print $fh <<"EOF" */
6375 	    }
6376 	}
6377 	force_ident_maybe_lex('$');
6378 	TOKEN('$');
6379 
6380     case '@':
6381         if (PL_expect == XPOSTDEREF)
6382             POSTDEREF('@');
6383 	PL_tokenbuf[0] = '@';
6384 	s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6385 	if (PL_expect == XOPERATOR) {
6386             d = s;
6387             if (PL_bufptr > s) {
6388                 d = PL_bufptr-1;
6389                 PL_bufptr = PL_oldbufptr;
6390             }
6391 	    no_op("Array", d);
6392         }
6393 	pl_yylval.ival = 0;
6394 	if (!PL_tokenbuf[1]) {
6395 	    PREREF('@');
6396 	}
6397 	if (PL_lex_state == LEX_NORMAL)
6398 	    s = skipspace(s);
6399 	if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6400 	    if (*s == '{')
6401 		PL_tokenbuf[0] = '%';
6402 
6403 	    /* Warn about @ where they meant $. */
6404 	    if (*s == '[' || *s == '{') {
6405 		if (ckWARN(WARN_SYNTAX)) {
6406 		    S_check_scalar_slice(aTHX_ s);
6407 		}
6408 	    }
6409 	}
6410 	PL_expect = XOPERATOR;
6411 	force_ident_maybe_lex('@');
6412 	TERM('@');
6413 
6414      case '/':			/* may be division, defined-or, or pattern */
6415 	if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6416 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6417 		    (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6418 		TOKEN(0);
6419 	    s += 2;
6420 	    AOPERATOR(DORDOR);
6421 	}
6422 	else if (PL_expect == XOPERATOR) {
6423 	    s++;
6424 	    if (*s == '=' && !PL_lex_allbrackets
6425                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6426             {
6427 		s--;
6428 		TOKEN(0);
6429 	    }
6430 	    Mop(OP_DIVIDE);
6431         }
6432 	else {
6433 	    /* Disable warning on "study /blah/" */
6434 	    if (PL_oldoldbufptr == PL_last_uni
6435 	     && (*PL_last_uni != 's' || s - PL_last_uni < 5
6436 	         || memNE(PL_last_uni, "study", 5)
6437 	         || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6438 	     ))
6439 	        check_uni();
6440 	    s = scan_pat(s,OP_MATCH);
6441 	    TERM(sublex_start());
6442 	}
6443 
6444      case '?':			/* conditional */
6445 	s++;
6446 	if (!PL_lex_allbrackets
6447             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
6448         {
6449 	    s--;
6450 	    TOKEN(0);
6451 	}
6452 	PL_lex_allbrackets++;
6453 	OPERATOR('?');
6454 
6455     case '.':
6456 	if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6457 #ifdef PERL_STRICT_CR
6458 	    && s[1] == '\n'
6459 #else
6460 	    && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6461 #endif
6462 	    && (s == PL_linestart || s[-1] == '\n') )
6463 	{
6464 	    PL_expect = XSTATE;
6465 	    formbrack = 2; /* dot seen where arguments expected */
6466 	    goto rightbracket;
6467 	}
6468 	if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6469 	    s += 3;
6470 	    OPERATOR(YADAYADA);
6471 	}
6472 	if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6473 	    char tmp = *s++;
6474 	    if (*s == tmp) {
6475 		if (!PL_lex_allbrackets
6476                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
6477                 {
6478 		    s--;
6479 		    TOKEN(0);
6480 		}
6481 		s++;
6482 		if (*s == tmp) {
6483 		    s++;
6484 		    pl_yylval.ival = OPf_SPECIAL;
6485 		}
6486 		else
6487 		    pl_yylval.ival = 0;
6488 		OPERATOR(DOTDOT);
6489 	    }
6490 	    if (*s == '=' && !PL_lex_allbrackets
6491                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6492             {
6493 		s--;
6494 		TOKEN(0);
6495 	    }
6496 	    Aop(OP_CONCAT);
6497 	}
6498 	/* FALLTHROUGH */
6499     case '0': case '1': case '2': case '3': case '4':
6500     case '5': case '6': case '7': case '8': case '9':
6501 	s = scan_num(s, &pl_yylval);
6502 	DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6503 	if (PL_expect == XOPERATOR)
6504 	    no_op("Number",s);
6505 	TERM(THING);
6506 
6507     case '\'':
6508 	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6509 	if (!s)
6510 	    missingterm(NULL);
6511 	COPLINE_SET_FROM_MULTI_END;
6512 	DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6513 	if (PL_expect == XOPERATOR) {
6514 	    if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6515 		return deprecate_commaless_var_list();
6516 	    }
6517 	    else
6518 		no_op("String",s);
6519 	}
6520 	pl_yylval.ival = OP_CONST;
6521 	TERM(sublex_start());
6522 
6523     case '"':
6524 	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6525 	DEBUG_T( {
6526 	    if (s)
6527 		printbuf("### Saw string before %s\n", s);
6528 	    else
6529 		PerlIO_printf(Perl_debug_log,
6530 			     "### Saw unterminated string\n");
6531 	} );
6532 	if (PL_expect == XOPERATOR) {
6533 	    if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6534 		return deprecate_commaless_var_list();
6535 	    }
6536 	    else
6537 		no_op("String",s);
6538 	}
6539 	if (!s)
6540 	    missingterm(NULL);
6541 	pl_yylval.ival = OP_CONST;
6542 	/* FIXME. I think that this can be const if char *d is replaced by
6543 	   more localised variables.  */
6544 	for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6545 	    if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6546 		pl_yylval.ival = OP_STRINGIFY;
6547 		break;
6548 	    }
6549 	}
6550 	if (pl_yylval.ival == OP_CONST)
6551 	    COPLINE_SET_FROM_MULTI_END;
6552 	TERM(sublex_start());
6553 
6554     case '`':
6555 	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6556 	DEBUG_T( {
6557             if (s)
6558                 printbuf("### Saw backtick string before %s\n", s);
6559             else
6560 		PerlIO_printf(Perl_debug_log,
6561 			     "### Saw unterminated backtick string\n");
6562         } );
6563 	if (PL_expect == XOPERATOR)
6564 	    no_op("Backticks",s);
6565 	if (!s)
6566 	    missingterm(NULL);
6567 	pl_yylval.ival = OP_BACKTICK;
6568 	TERM(sublex_start());
6569 
6570     case '\\':
6571 	s++;
6572 	if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6573 	 && isDIGIT(*s))
6574 	    Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6575 			   *s, *s);
6576 	if (PL_expect == XOPERATOR)
6577 	    no_op("Backslash",s);
6578 	OPERATOR(REFGEN);
6579 
6580     case 'v':
6581 	if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6582 	    char *start = s + 2;
6583 	    while (isDIGIT(*start) || *start == '_')
6584 		start++;
6585 	    if (*start == '.' && isDIGIT(start[1])) {
6586 		s = scan_num(s, &pl_yylval);
6587 		TERM(THING);
6588 	    }
6589 	    else if ((*start == ':' && start[1] == ':')
6590 		  || (PL_expect == XSTATE && *start == ':'))
6591 		goto keylookup;
6592 	    else if (PL_expect == XSTATE) {
6593 		d = start;
6594 		while (d < PL_bufend && isSPACE(*d)) d++;
6595 		if (*d == ':') goto keylookup;
6596 	    }
6597 	    /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6598 	    if (!isALPHA(*start) && (PL_expect == XTERM
6599 			|| PL_expect == XREF || PL_expect == XSTATE
6600 			|| PL_expect == XTERMORDORDOR)) {
6601 		GV *const gv = gv_fetchpvn_flags(s, start - s,
6602                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6603 		if (!gv) {
6604 		    s = scan_num(s, &pl_yylval);
6605 		    TERM(THING);
6606 		}
6607 	    }
6608 	}
6609 	goto keylookup;
6610     case 'x':
6611 	if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6612 	    s++;
6613 	    Mop(OP_REPEAT);
6614 	}
6615 	goto keylookup;
6616 
6617     case '_':
6618     case 'a': case 'A':
6619     case 'b': case 'B':
6620     case 'c': case 'C':
6621     case 'd': case 'D':
6622     case 'e': case 'E':
6623     case 'f': case 'F':
6624     case 'g': case 'G':
6625     case 'h': case 'H':
6626     case 'i': case 'I':
6627     case 'j': case 'J':
6628     case 'k': case 'K':
6629     case 'l': case 'L':
6630     case 'm': case 'M':
6631     case 'n': case 'N':
6632     case 'o': case 'O':
6633     case 'p': case 'P':
6634     case 'q': case 'Q':
6635     case 'r': case 'R':
6636     case 's': case 'S':
6637     case 't': case 'T':
6638     case 'u': case 'U':
6639 	      case 'V':
6640     case 'w': case 'W':
6641 	      case 'X':
6642     case 'y': case 'Y':
6643     case 'z': case 'Z':
6644 
6645       keylookup: {
6646 	bool anydelim;
6647 	bool lex;
6648 	I32 tmp;
6649 	SV *sv;
6650 	CV *cv;
6651 	PADOFFSET off;
6652 	OP *rv2cv_op;
6653 
6654 	lex = FALSE;
6655 	orig_keyword = 0;
6656 	off = 0;
6657 	sv = NULL;
6658 	cv = NULL;
6659 	gv = NULL;
6660 	gvp = NULL;
6661 	rv2cv_op = NULL;
6662 
6663 	PL_bufptr = s;
6664 	s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6665 
6666 	/* Some keywords can be followed by any delimiter, including ':' */
6667 	anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6668 
6669 	/* x::* is just a word, unless x is "CORE" */
6670 	if (!anydelim && *s == ':' && s[1] == ':') {
6671 	    if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6672 	    goto just_a_word;
6673 	}
6674 
6675 	d = s;
6676 	while (d < PL_bufend && isSPACE(*d))
6677 		d++;	/* no comments skipped here, or s### is misparsed */
6678 
6679 	/* Is this a word before a => operator? */
6680 	if (*d == '=' && d[1] == '>') {
6681 	  fat_arrow:
6682 	    CLINE;
6683 	    pl_yylval.opval
6684 		= (OP*)newSVOP(OP_CONST, 0,
6685 			       S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6686 	    pl_yylval.opval->op_private = OPpCONST_BARE;
6687 	    TERM(WORD);
6688 	}
6689 
6690 	/* Check for plugged-in keyword */
6691 	{
6692 	    OP *o;
6693 	    int result;
6694 	    char *saved_bufptr = PL_bufptr;
6695 	    PL_bufptr = s;
6696 	    result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6697 	    s = PL_bufptr;
6698 	    if (result == KEYWORD_PLUGIN_DECLINE) {
6699 		/* not a plugged-in keyword */
6700 		PL_bufptr = saved_bufptr;
6701 	    } else if (result == KEYWORD_PLUGIN_STMT) {
6702 		pl_yylval.opval = o;
6703 		CLINE;
6704 		if (!PL_nexttoke) PL_expect = XSTATE;
6705 		return REPORT(PLUGSTMT);
6706 	    } else if (result == KEYWORD_PLUGIN_EXPR) {
6707 		pl_yylval.opval = o;
6708 		CLINE;
6709 		if (!PL_nexttoke) PL_expect = XOPERATOR;
6710 		return REPORT(PLUGEXPR);
6711 	    } else {
6712 		Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6713 					PL_tokenbuf);
6714 	    }
6715 	}
6716 
6717 	/* Check for built-in keyword */
6718 	tmp = keyword(PL_tokenbuf, len, 0);
6719 
6720 	/* Is this a label? */
6721 	if (!anydelim && PL_expect == XSTATE
6722 	      && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6723 	    s = d + 1;
6724 	    pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6725 	    pl_yylval.pval[len] = '\0';
6726 	    pl_yylval.pval[len+1] = UTF ? 1 : 0;
6727 	    CLINE;
6728 	    TOKEN(LABEL);
6729 	}
6730 
6731 	/* Check for lexical sub */
6732 	if (PL_expect != XOPERATOR) {
6733 	    char tmpbuf[sizeof PL_tokenbuf + 1];
6734 	    *tmpbuf = '&';
6735 	    Copy(PL_tokenbuf, tmpbuf+1, len, char);
6736 	    off = pad_findmy_pvn(tmpbuf, len+1, 0);
6737 	    if (off != NOT_IN_PAD) {
6738 		assert(off); /* we assume this is boolean-true below */
6739 		if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6740 		    HV *  const stash = PAD_COMPNAME_OURSTASH(off);
6741 		    HEK * const stashname = HvNAME_HEK(stash);
6742 		    sv = newSVhek(stashname);
6743                     sv_catpvs(sv, "::");
6744                     sv_catpvn_flags(sv, PL_tokenbuf, len,
6745 				    (UTF ? SV_CATUTF8 : SV_CATBYTES));
6746 		    gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6747 				    SVt_PVCV);
6748 		    off = 0;
6749 		    if (!gv) {
6750 			sv_free(sv);
6751 			sv = NULL;
6752 			goto just_a_word;
6753 		    }
6754 		}
6755 		else {
6756 		    rv2cv_op = newOP(OP_PADANY, 0);
6757 		    rv2cv_op->op_targ = off;
6758 		    cv = find_lexical_cv(off);
6759 		}
6760 		lex = TRUE;
6761 		goto just_a_word;
6762 	    }
6763 	    off = 0;
6764 	}
6765 
6766 	if (tmp < 0) {			/* second-class keyword? */
6767 	    GV *ogv = NULL;	/* override (winner) */
6768 	    GV *hgv = NULL;	/* hidden (loser) */
6769 	    if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6770 		CV *cv;
6771 		if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6772 					    (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
6773 					    SVt_PVCV))
6774                     && (cv = GvCVu(gv)))
6775 		{
6776 		    if (GvIMPORTED_CV(gv))
6777 			ogv = gv;
6778 		    else if (! CvMETHOD(cv))
6779 			hgv = gv;
6780 		}
6781 		if (!ogv
6782                     && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6783                                                               len, FALSE))
6784                     && (gv = *gvp)
6785                     && (isGV_with_GP(gv)
6786 			? GvCVu(gv) && GvIMPORTED_CV(gv)
6787 			:   SvPCS_IMPORTED(gv)
6788 			&& (gv_init(gv, PL_globalstash, PL_tokenbuf,
6789                                                                  len, 0), 1)))
6790 		{
6791 		    ogv = gv;
6792 		}
6793 	    }
6794 	    if (ogv) {
6795 		orig_keyword = tmp;
6796 		tmp = 0;		/* overridden by import or by GLOBAL */
6797 	    }
6798 	    else if (gv && !gvp
6799 		     && -tmp==KEY_lock	/* XXX generalizable kludge */
6800 		     && GvCVu(gv))
6801 	    {
6802 		tmp = 0;		/* any sub overrides "weak" keyword */
6803 	    }
6804 	    else {			/* no override */
6805 		tmp = -tmp;
6806 		if (tmp == KEY_dump) {
6807 		    Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6808 				   "dump() better written as CORE::dump()");
6809 		}
6810 		gv = NULL;
6811 		gvp = 0;
6812 		if (hgv && tmp != KEY_x)	/* never ambiguous */
6813 		    Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6814 				   "Ambiguous call resolved as CORE::%s(), "
6815 				   "qualify as such or use &",
6816 				   GvENAME(hgv));
6817 	    }
6818 	}
6819 
6820 	if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
6821 	 && (!anydelim || *s != '#')) {
6822 	    /* no override, and not s### either; skipspace is safe here
6823 	     * check for => on following line */
6824 	    bool arrow;
6825 	    STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
6826 	    STRLEN   soff = s         - SvPVX(PL_linestr);
6827 	    s = skipspace_flags(s, LEX_NO_INCLINE);
6828 	    arrow = *s == '=' && s[1] == '>';
6829 	    PL_bufptr = SvPVX(PL_linestr) + bufoff;
6830 	    s         = SvPVX(PL_linestr) +   soff;
6831 	    if (arrow)
6832 		goto fat_arrow;
6833 	}
6834 
6835       reserved_word:
6836 	switch (tmp) {
6837 
6838 	default:			/* not a keyword */
6839 	    /* Trade off - by using this evil construction we can pull the
6840 	       variable gv into the block labelled keylookup. If not, then
6841 	       we have to give it function scope so that the goto from the
6842 	       earlier ':' case doesn't bypass the initialisation.  */
6843 	    if (0) {
6844 	    just_a_word_zero_gv:
6845 		sv = NULL;
6846 		cv = NULL;
6847 		gv = NULL;
6848 		gvp = NULL;
6849 		rv2cv_op = NULL;
6850 		orig_keyword = 0;
6851 		lex = 0;
6852 		off = 0;
6853 	    }
6854 	  just_a_word: {
6855 		int pkgname = 0;
6856 		const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6857 		bool safebw;
6858 
6859 
6860 		/* Get the rest if it looks like a package qualifier */
6861 
6862 		if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6863 		    STRLEN morelen;
6864 		    s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6865 				  TRUE, &morelen);
6866 		    if (!morelen)
6867 			Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
6868 				UTF8fARG(UTF, len, PL_tokenbuf),
6869 				*s == '\'' ? "'" : "::");
6870 		    len += morelen;
6871 		    pkgname = 1;
6872 		}
6873 
6874 		if (PL_expect == XOPERATOR) {
6875 		    if (PL_bufptr == PL_linestart) {
6876 			CopLINE_dec(PL_curcop);
6877 			Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6878 			CopLINE_inc(PL_curcop);
6879 		    }
6880 		    else
6881 			no_op("Bareword",s);
6882 		}
6883 
6884 		/* See if the name is "Foo::",
6885 		   in which case Foo is a bareword
6886 		   (and a package name). */
6887 
6888 		if (len > 2
6889                     && PL_tokenbuf[len - 2] == ':'
6890                     && PL_tokenbuf[len - 1] == ':')
6891 		{
6892 		    if (ckWARN(WARN_BAREWORD)
6893 			&& ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6894 			Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6895 		  	  "Bareword \"%"UTF8f"\" refers to nonexistent package",
6896 			   UTF8fARG(UTF, len, PL_tokenbuf));
6897 		    len -= 2;
6898 		    PL_tokenbuf[len] = '\0';
6899 		    gv = NULL;
6900 		    gvp = 0;
6901 		    safebw = TRUE;
6902 		}
6903 		else {
6904 		    safebw = FALSE;
6905 		}
6906 
6907 		/* if we saw a global override before, get the right name */
6908 
6909 		if (!sv)
6910 		  sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6911 						len);
6912 		if (gvp) {
6913 		    SV * const tmp_sv = sv;
6914 		    sv = newSVpvs("CORE::GLOBAL::");
6915 		    sv_catsv(sv, tmp_sv);
6916 		    SvREFCNT_dec(tmp_sv);
6917 		}
6918 
6919 
6920 		/* Presume this is going to be a bareword of some sort. */
6921 		CLINE;
6922 		pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6923 		pl_yylval.opval->op_private = OPpCONST_BARE;
6924 
6925 		/* And if "Foo::", then that's what it certainly is. */
6926 		if (safebw)
6927 		    goto safe_bareword;
6928 
6929 		if (!off)
6930 		{
6931 		    OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6932 		    const_op->op_private = OPpCONST_BARE;
6933 		    rv2cv_op =
6934 			newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
6935 		    cv = lex
6936 			? isGV(gv)
6937 			    ? GvCV(gv)
6938 			    : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
6939 				? (CV *)SvRV(gv)
6940 				: ((CV *)gv)
6941 			: rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
6942 		}
6943 
6944 		/* Use this var to track whether intuit_method has been
6945 		   called.  intuit_method returns 0 or > 255.  */
6946 		tmp = 1;
6947 
6948 		/* See if it's the indirect object for a list operator. */
6949 
6950 		if (PL_oldoldbufptr
6951                     && PL_oldoldbufptr < PL_bufptr
6952                     && (PL_oldoldbufptr == PL_last_lop
6953 		        || PL_oldoldbufptr == PL_last_uni)
6954                     && /* NO SKIPSPACE BEFORE HERE! */
6955 		       (PL_expect == XREF
6956                         || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
6957                                                                == OA_FILEREF))
6958 		{
6959 		    bool immediate_paren = *s == '(';
6960 
6961 		    /* (Now we can afford to cross potential line boundary.) */
6962 		    s = skipspace(s);
6963 
6964 		    /* Two barewords in a row may indicate method call. */
6965 
6966 		    if ((isIDFIRST_lazy_if(s,UTF) || *s == '$')
6967                         && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
6968                     {
6969 			goto method;
6970 		    }
6971 
6972 		    /* If not a declared subroutine, it's an indirect object. */
6973 		    /* (But it's an indir obj regardless for sort.) */
6974 		    /* Also, if "_" follows a filetest operator, it's a bareword */
6975 
6976 		    if (
6977 			( !immediate_paren && (PL_last_lop_op == OP_SORT
6978                          || (!cv
6979                              && (PL_last_lop_op != OP_MAPSTART
6980                                  && PL_last_lop_op != OP_GREPSTART))))
6981 		       || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6982 			    && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
6983                                                             == OA_FILESTATOP))
6984 		       )
6985 		    {
6986 			PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6987 			goto bareword;
6988 		    }
6989 		}
6990 
6991 		PL_expect = XOPERATOR;
6992 		s = skipspace(s);
6993 
6994 		/* Is this a word before a => operator? */
6995 		if (*s == '=' && s[1] == '>' && !pkgname) {
6996 		    op_free(rv2cv_op);
6997 		    CLINE;
6998 		    if (gvp || (lex && !off)) {
6999 			assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
7000 			/* This is our own scalar, created a few lines
7001 			   above, so this is safe. */
7002 			SvREADONLY_off(sv);
7003 			sv_setpv(sv, PL_tokenbuf);
7004 			if (UTF && !IN_BYTES
7005 			 && is_utf8_string((U8*)PL_tokenbuf, len))
7006 			      SvUTF8_on(sv);
7007 			SvREADONLY_on(sv);
7008 		    }
7009 		    TERM(WORD);
7010 		}
7011 
7012 		/* If followed by a paren, it's certainly a subroutine. */
7013 		if (*s == '(') {
7014 		    CLINE;
7015 		    if (cv) {
7016 			d = s + 1;
7017 			while (SPACE_OR_TAB(*d))
7018 			    d++;
7019 			if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7020 			    s = d + 1;
7021 			    goto its_constant;
7022 			}
7023 		    }
7024 		    NEXTVAL_NEXTTOKE.opval =
7025 			off ? rv2cv_op : pl_yylval.opval;
7026 		    if (off)
7027 			 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7028 		    else op_free(rv2cv_op),	   force_next(WORD);
7029 		    pl_yylval.ival = 0;
7030 		    TOKEN('&');
7031 		}
7032 
7033 		/* If followed by var or block, call it a method (unless sub) */
7034 
7035 		if ((*s == '$' || *s == '{') && !cv) {
7036 		    op_free(rv2cv_op);
7037 		    PL_last_lop = PL_oldbufptr;
7038 		    PL_last_lop_op = OP_METHOD;
7039 		    if (!PL_lex_allbrackets
7040                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7041                     {
7042 			PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7043                     }
7044 		    PL_expect = XBLOCKTERM;
7045 		    PL_bufptr = s;
7046 		    return REPORT(METHOD);
7047 		}
7048 
7049 		/* If followed by a bareword, see if it looks like indir obj. */
7050 
7051 		if (tmp == 1 && !orig_keyword
7052 			&& (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7053 			&& (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
7054 		  method:
7055 		    if (lex && !off) {
7056 			assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
7057 			SvREADONLY_off(sv);
7058 			sv_setpvn(sv, PL_tokenbuf, len);
7059 			if (UTF && !IN_BYTES
7060 			 && is_utf8_string((U8*)PL_tokenbuf, len))
7061 			    SvUTF8_on (sv);
7062 			else SvUTF8_off(sv);
7063 		    }
7064 		    op_free(rv2cv_op);
7065 		    if (tmp == METHOD && !PL_lex_allbrackets
7066                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7067                     {
7068 			PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7069                     }
7070 		    return REPORT(tmp);
7071 		}
7072 
7073 		/* Not a method, so call it a subroutine (if defined) */
7074 
7075 		if (cv) {
7076 		    /* Check for a constant sub */
7077 		    if ((sv = cv_const_sv_or_av(cv))) {
7078 		  its_constant:
7079 			op_free(rv2cv_op);
7080 			SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7081 			((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7082 			if (SvTYPE(sv) == SVt_PVAV)
7083 			    pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7084 						      pl_yylval.opval);
7085 			else {
7086 			    pl_yylval.opval->op_private = 0;
7087 			    pl_yylval.opval->op_folded = 1;
7088 			    pl_yylval.opval->op_flags |= OPf_SPECIAL;
7089 			}
7090 			TOKEN(WORD);
7091 		    }
7092 
7093 		    op_free(pl_yylval.opval);
7094 		    pl_yylval.opval =
7095 			off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7096 		    pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7097 		    PL_last_lop = PL_oldbufptr;
7098 		    PL_last_lop_op = OP_ENTERSUB;
7099 		    /* Is there a prototype? */
7100 		    if (
7101 			SvPOK(cv))
7102 		    {
7103 			STRLEN protolen = CvPROTOLEN(cv);
7104 			const char *proto = CvPROTO(cv);
7105 			bool optional;
7106 			proto = S_strip_spaces(aTHX_ proto, &protolen);
7107 			if (!protolen)
7108 			    TERM(FUNC0SUB);
7109 			if ((optional = *proto == ';'))
7110 			  do
7111 			    proto++;
7112 			  while (*proto == ';');
7113 			if (
7114 			    (
7115 			        (
7116 			            *proto == '$' || *proto == '_'
7117 			         || *proto == '*' || *proto == '+'
7118 			        )
7119 			     && proto[1] == '\0'
7120 			    )
7121 			 || (
7122 			     *proto == '\\' && proto[1] && proto[2] == '\0'
7123 			    )
7124 			)
7125 			    UNIPROTO(UNIOPSUB,optional);
7126 			if (*proto == '\\' && proto[1] == '[') {
7127 			    const char *p = proto + 2;
7128 			    while(*p && *p != ']')
7129 				++p;
7130 			    if(*p == ']' && !p[1])
7131 				UNIPROTO(UNIOPSUB,optional);
7132 			}
7133 			if (*proto == '&' && *s == '{') {
7134 			    if (PL_curstash)
7135 				sv_setpvs(PL_subname, "__ANON__");
7136 			    else
7137 				sv_setpvs(PL_subname, "__ANON__::__ANON__");
7138 			    if (!PL_lex_allbrackets
7139                                 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7140                             {
7141 				PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7142                             }
7143 			    PREBLOCK(LSTOPSUB);
7144 			}
7145 		    }
7146 		    NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7147 		    PL_expect = XTERM;
7148 		    force_next(off ? PRIVATEREF : WORD);
7149 		    if (!PL_lex_allbrackets
7150                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7151                     {
7152 			PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7153                     }
7154 		    TOKEN(NOAMP);
7155 		}
7156 
7157 		/* Call it a bare word */
7158 
7159 		if (PL_hints & HINT_STRICT_SUBS)
7160 		    pl_yylval.opval->op_private |= OPpCONST_STRICT;
7161 		else {
7162 		bareword:
7163 		    /* after "print" and similar functions (corresponding to
7164 		     * "F? L" in opcode.pl), whatever wasn't already parsed as
7165 		     * a filehandle should be subject to "strict subs".
7166 		     * Likewise for the optional indirect-object argument to system
7167 		     * or exec, which can't be a bareword */
7168 		    if ((PL_last_lop_op == OP_PRINT
7169 			    || PL_last_lop_op == OP_PRTF
7170 			    || PL_last_lop_op == OP_SAY
7171 			    || PL_last_lop_op == OP_SYSTEM
7172 			    || PL_last_lop_op == OP_EXEC)
7173 			    && (PL_hints & HINT_STRICT_SUBS))
7174 			pl_yylval.opval->op_private |= OPpCONST_STRICT;
7175 		    if (lastchar != '-') {
7176 			if (ckWARN(WARN_RESERVED)) {
7177 			    d = PL_tokenbuf;
7178 			    while (isLOWER(*d))
7179 				d++;
7180 			    if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7181                             {
7182                                 /* PL_warn_reserved is constant */
7183                                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7184 				Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7185 				       PL_tokenbuf);
7186                                 GCC_DIAG_RESTORE;
7187                             }
7188 			}
7189 		    }
7190 		}
7191 		op_free(rv2cv_op);
7192 
7193 	    safe_bareword:
7194 		if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7195 		 && saw_infix_sigil) {
7196 		    Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7197 				     "Operator or semicolon missing before %c%"UTF8f,
7198 				     lastchar,
7199 				     UTF8fARG(UTF, strlen(PL_tokenbuf),
7200 					      PL_tokenbuf));
7201 		    Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7202 				     "Ambiguous use of %c resolved as operator %c",
7203 				     lastchar, lastchar);
7204 		}
7205 		TOKEN(WORD);
7206 	    }
7207 
7208 	case KEY___FILE__:
7209 	    FUN0OP(
7210 		(OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7211 	    );
7212 
7213 	case KEY___LINE__:
7214 	    FUN0OP(
7215         	(OP*)newSVOP(OP_CONST, 0,
7216 		    Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7217 	    );
7218 
7219 	case KEY___PACKAGE__:
7220 	    FUN0OP(
7221 		(OP*)newSVOP(OP_CONST, 0,
7222 					(PL_curstash
7223 					 ? newSVhek(HvNAME_HEK(PL_curstash))
7224 					 : &PL_sv_undef))
7225 	    );
7226 
7227 	case KEY___DATA__:
7228 	case KEY___END__: {
7229 	    GV *gv;
7230 	    if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7231 		HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7232 					? PL_curstash
7233 					: PL_defstash;
7234 		gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7235 		if (!isGV(gv))
7236 		    gv_init(gv,stash,"DATA",4,0);
7237 		GvMULTI_on(gv);
7238 		if (!GvIO(gv))
7239 		    GvIOp(gv) = newIO();
7240 		IoIFP(GvIOp(gv)) = PL_rsfp;
7241 #if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
7242 		{
7243 		    const int fd = PerlIO_fileno(PL_rsfp);
7244                     if (fd >= 3) {
7245                         fcntl(fd,F_SETFD, FD_CLOEXEC);
7246                     }
7247 		}
7248 #endif
7249 		/* Mark this internal pseudo-handle as clean */
7250 		IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7251 		if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7252 		    IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7253 		else
7254 		    IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7255 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7256 		/* if the script was opened in binmode, we need to revert
7257 		 * it to text mode for compatibility; but only iff it has CRs
7258 		 * XXX this is a questionable hack at best. */
7259 		if (PL_bufend-PL_bufptr > 2
7260 		    && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7261 		{
7262 		    Off_t loc = 0;
7263 		    if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7264 			loc = PerlIO_tell(PL_rsfp);
7265 			(void)PerlIO_seek(PL_rsfp, 0L, 0);
7266 		    }
7267 #ifdef NETWARE
7268 			if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7269 #else
7270 		    if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7271 #endif	/* NETWARE */
7272 			if (loc > 0)
7273 			    PerlIO_seek(PL_rsfp, loc, 0);
7274 		    }
7275 		}
7276 #endif
7277 #ifdef PERLIO_LAYERS
7278 		if (!IN_BYTES) {
7279 		    if (UTF)
7280 			PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7281 		    else if (IN_ENCODING) {
7282 			SV *name;
7283 			dSP;
7284 			ENTER;
7285 			SAVETMPS;
7286 			PUSHMARK(sp);
7287 			XPUSHs(_get_encoding());
7288 			PUTBACK;
7289 			call_method("name", G_SCALAR);
7290 			SPAGAIN;
7291 			name = POPs;
7292 			PUTBACK;
7293 			PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7294 					    Perl_form(aTHX_ ":encoding(%"SVf")",
7295 						      SVfARG(name)));
7296 			FREETMPS;
7297 			LEAVE;
7298 		    }
7299 		}
7300 #endif
7301 		PL_rsfp = NULL;
7302 	    }
7303 	    goto fake_eof;
7304 	}
7305 
7306 	case KEY___SUB__:
7307 	    FUN0OP(CvCLONE(PL_compcv)
7308 			? newOP(OP_RUNCV, 0)
7309 			: newPVOP(OP_RUNCV,0,NULL));
7310 
7311 	case KEY_AUTOLOAD:
7312 	case KEY_DESTROY:
7313 	case KEY_BEGIN:
7314 	case KEY_UNITCHECK:
7315 	case KEY_CHECK:
7316 	case KEY_INIT:
7317 	case KEY_END:
7318 	    if (PL_expect == XSTATE) {
7319 		s = PL_bufptr;
7320 		goto really_sub;
7321 	    }
7322 	    goto just_a_word;
7323 
7324 	case_KEY_CORE:
7325 	    {
7326 		STRLEN olen = len;
7327 		d = s;
7328 		s += 2;
7329 		s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7330 		if ((*s == ':' && s[1] == ':')
7331 		 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7332 		{
7333 		    s = d;
7334 		    len = olen;
7335 		    Copy(PL_bufptr, PL_tokenbuf, olen, char);
7336 		    goto just_a_word;
7337 		}
7338 		if (!tmp)
7339 		    Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7340 				      UTF8fARG(UTF, len, PL_tokenbuf));
7341 		if (tmp < 0)
7342 		    tmp = -tmp;
7343 		else if (tmp == KEY_require || tmp == KEY_do
7344 		      || tmp == KEY_glob)
7345 		    /* that's a way to remember we saw "CORE::" */
7346 		    orig_keyword = tmp;
7347 		goto reserved_word;
7348 	    }
7349 
7350 	case KEY_abs:
7351 	    UNI(OP_ABS);
7352 
7353 	case KEY_alarm:
7354 	    UNI(OP_ALARM);
7355 
7356 	case KEY_accept:
7357 	    LOP(OP_ACCEPT,XTERM);
7358 
7359 	case KEY_and:
7360 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7361 		return REPORT(0);
7362 	    OPERATOR(ANDOP);
7363 
7364 	case KEY_atan2:
7365 	    LOP(OP_ATAN2,XTERM);
7366 
7367 	case KEY_bind:
7368 	    LOP(OP_BIND,XTERM);
7369 
7370 	case KEY_binmode:
7371 	    LOP(OP_BINMODE,XTERM);
7372 
7373 	case KEY_bless:
7374 	    LOP(OP_BLESS,XTERM);
7375 
7376 	case KEY_break:
7377 	    FUN0(OP_BREAK);
7378 
7379 	case KEY_chop:
7380 	    UNI(OP_CHOP);
7381 
7382 	case KEY_continue:
7383 		    /* We have to disambiguate the two senses of
7384 		      "continue". If the next token is a '{' then
7385 		      treat it as the start of a continue block;
7386 		      otherwise treat it as a control operator.
7387 		     */
7388 		    s = skipspace(s);
7389 		    if (*s == '{')
7390 	    PREBLOCK(CONTINUE);
7391 		    else
7392 			FUN0(OP_CONTINUE);
7393 
7394 	case KEY_chdir:
7395 	    /* may use HOME */
7396 	    (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7397 	    UNI(OP_CHDIR);
7398 
7399 	case KEY_close:
7400 	    UNI(OP_CLOSE);
7401 
7402 	case KEY_closedir:
7403 	    UNI(OP_CLOSEDIR);
7404 
7405 	case KEY_cmp:
7406 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7407 		return REPORT(0);
7408 	    Eop(OP_SCMP);
7409 
7410 	case KEY_caller:
7411 	    UNI(OP_CALLER);
7412 
7413 	case KEY_crypt:
7414 #ifdef FCRYPT
7415 	    if (!PL_cryptseen) {
7416 		PL_cryptseen = TRUE;
7417 		init_des();
7418 	    }
7419 #endif
7420 	    LOP(OP_CRYPT,XTERM);
7421 
7422 	case KEY_chmod:
7423 	    LOP(OP_CHMOD,XTERM);
7424 
7425 	case KEY_chown:
7426 	    LOP(OP_CHOWN,XTERM);
7427 
7428 	case KEY_connect:
7429 	    LOP(OP_CONNECT,XTERM);
7430 
7431 	case KEY_chr:
7432 	    UNI(OP_CHR);
7433 
7434 	case KEY_cos:
7435 	    UNI(OP_COS);
7436 
7437 	case KEY_chroot:
7438 	    UNI(OP_CHROOT);
7439 
7440 	case KEY_default:
7441 	    PREBLOCK(DEFAULT);
7442 
7443 	case KEY_do:
7444 	    s = skipspace(s);
7445 	    if (*s == '{')
7446 		PRETERMBLOCK(DO);
7447 	    if (*s != '\'') {
7448 		*PL_tokenbuf = '&';
7449 		d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7450 			      1, &len);
7451 		if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7452 		 && !keyword(PL_tokenbuf + 1, len, 0)) {
7453 		    d = skipspace(d);
7454 		    if (*d == '(') {
7455 			force_ident_maybe_lex('&');
7456 			s = d;
7457 		    }
7458 		}
7459 	    }
7460 	    if (orig_keyword == KEY_do) {
7461 		orig_keyword = 0;
7462 		pl_yylval.ival = 1;
7463 	    }
7464 	    else
7465 		pl_yylval.ival = 0;
7466 	    OPERATOR(DO);
7467 
7468 	case KEY_die:
7469 	    PL_hints |= HINT_BLOCK_SCOPE;
7470 	    LOP(OP_DIE,XTERM);
7471 
7472 	case KEY_defined:
7473 	    UNI(OP_DEFINED);
7474 
7475 	case KEY_delete:
7476 	    UNI(OP_DELETE);
7477 
7478 	case KEY_dbmopen:
7479 	    Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7480 			      STR_WITH_LEN("NDBM_File::"),
7481 			      STR_WITH_LEN("DB_File::"),
7482 			      STR_WITH_LEN("GDBM_File::"),
7483 			      STR_WITH_LEN("SDBM_File::"),
7484 			      STR_WITH_LEN("ODBM_File::"),
7485 			      NULL);
7486 	    LOP(OP_DBMOPEN,XTERM);
7487 
7488 	case KEY_dbmclose:
7489 	    UNI(OP_DBMCLOSE);
7490 
7491 	case KEY_dump:
7492 	    LOOPX(OP_DUMP);
7493 
7494 	case KEY_else:
7495 	    PREBLOCK(ELSE);
7496 
7497 	case KEY_elsif:
7498 	    pl_yylval.ival = CopLINE(PL_curcop);
7499 	    OPERATOR(ELSIF);
7500 
7501 	case KEY_eq:
7502 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7503 		return REPORT(0);
7504 	    Eop(OP_SEQ);
7505 
7506 	case KEY_exists:
7507 	    UNI(OP_EXISTS);
7508 
7509 	case KEY_exit:
7510 	    UNI(OP_EXIT);
7511 
7512 	case KEY_eval:
7513 	    s = skipspace(s);
7514 	    if (*s == '{') { /* block eval */
7515 		PL_expect = XTERMBLOCK;
7516 		UNIBRACK(OP_ENTERTRY);
7517 	    }
7518 	    else { /* string eval */
7519 		PL_expect = XTERM;
7520 		UNIBRACK(OP_ENTEREVAL);
7521 	    }
7522 
7523 	case KEY_evalbytes:
7524 	    PL_expect = XTERM;
7525 	    UNIBRACK(-OP_ENTEREVAL);
7526 
7527 	case KEY_eof:
7528 	    UNI(OP_EOF);
7529 
7530 	case KEY_exp:
7531 	    UNI(OP_EXP);
7532 
7533 	case KEY_each:
7534 	    UNI(OP_EACH);
7535 
7536 	case KEY_exec:
7537 	    LOP(OP_EXEC,XREF);
7538 
7539 	case KEY_endhostent:
7540 	    FUN0(OP_EHOSTENT);
7541 
7542 	case KEY_endnetent:
7543 	    FUN0(OP_ENETENT);
7544 
7545 	case KEY_endservent:
7546 	    FUN0(OP_ESERVENT);
7547 
7548 	case KEY_endprotoent:
7549 	    FUN0(OP_EPROTOENT);
7550 
7551 	case KEY_endpwent:
7552 	    FUN0(OP_EPWENT);
7553 
7554 	case KEY_endgrent:
7555 	    FUN0(OP_EGRENT);
7556 
7557 	case KEY_for:
7558 	case KEY_foreach:
7559 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7560 		return REPORT(0);
7561 	    pl_yylval.ival = CopLINE(PL_curcop);
7562 	    s = skipspace(s);
7563 	    if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7564 		char *p = s;
7565 
7566 		if ((PL_bufend - p) >= 3
7567                     && strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7568                 {
7569 		    p += 2;
7570                 }
7571 		else if ((PL_bufend - p) >= 4
7572                          && strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7573 		    p += 3;
7574 		p = skipspace(p);
7575                 /* skip optional package name, as in "for my abc $x (..)" */
7576 		if (isIDFIRST_lazy_if(p,UTF)) {
7577 		    p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7578 		    p = skipspace(p);
7579 		}
7580 		if (*p != '$')
7581 		    Perl_croak(aTHX_ "Missing $ on loop variable");
7582 	    }
7583 	    OPERATOR(FOR);
7584 
7585 	case KEY_formline:
7586 	    LOP(OP_FORMLINE,XTERM);
7587 
7588 	case KEY_fork:
7589 	    FUN0(OP_FORK);
7590 
7591 	case KEY_fc:
7592 	    UNI(OP_FC);
7593 
7594 	case KEY_fcntl:
7595 	    LOP(OP_FCNTL,XTERM);
7596 
7597 	case KEY_fileno:
7598 	    UNI(OP_FILENO);
7599 
7600 	case KEY_flock:
7601 	    LOP(OP_FLOCK,XTERM);
7602 
7603 	case KEY_gt:
7604 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7605 		return REPORT(0);
7606 	    Rop(OP_SGT);
7607 
7608 	case KEY_ge:
7609 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7610 		return REPORT(0);
7611 	    Rop(OP_SGE);
7612 
7613 	case KEY_grep:
7614 	    LOP(OP_GREPSTART, XREF);
7615 
7616 	case KEY_goto:
7617 	    LOOPX(OP_GOTO);
7618 
7619 	case KEY_gmtime:
7620 	    UNI(OP_GMTIME);
7621 
7622 	case KEY_getc:
7623 	    UNIDOR(OP_GETC);
7624 
7625 	case KEY_getppid:
7626 	    FUN0(OP_GETPPID);
7627 
7628 	case KEY_getpgrp:
7629 	    UNI(OP_GETPGRP);
7630 
7631 	case KEY_getpriority:
7632 	    LOP(OP_GETPRIORITY,XTERM);
7633 
7634 	case KEY_getprotobyname:
7635 	    UNI(OP_GPBYNAME);
7636 
7637 	case KEY_getprotobynumber:
7638 	    LOP(OP_GPBYNUMBER,XTERM);
7639 
7640 	case KEY_getprotoent:
7641 	    FUN0(OP_GPROTOENT);
7642 
7643 	case KEY_getpwent:
7644 	    FUN0(OP_GPWENT);
7645 
7646 	case KEY_getpwnam:
7647 	    UNI(OP_GPWNAM);
7648 
7649 	case KEY_getpwuid:
7650 	    UNI(OP_GPWUID);
7651 
7652 	case KEY_getpeername:
7653 	    UNI(OP_GETPEERNAME);
7654 
7655 	case KEY_gethostbyname:
7656 	    UNI(OP_GHBYNAME);
7657 
7658 	case KEY_gethostbyaddr:
7659 	    LOP(OP_GHBYADDR,XTERM);
7660 
7661 	case KEY_gethostent:
7662 	    FUN0(OP_GHOSTENT);
7663 
7664 	case KEY_getnetbyname:
7665 	    UNI(OP_GNBYNAME);
7666 
7667 	case KEY_getnetbyaddr:
7668 	    LOP(OP_GNBYADDR,XTERM);
7669 
7670 	case KEY_getnetent:
7671 	    FUN0(OP_GNETENT);
7672 
7673 	case KEY_getservbyname:
7674 	    LOP(OP_GSBYNAME,XTERM);
7675 
7676 	case KEY_getservbyport:
7677 	    LOP(OP_GSBYPORT,XTERM);
7678 
7679 	case KEY_getservent:
7680 	    FUN0(OP_GSERVENT);
7681 
7682 	case KEY_getsockname:
7683 	    UNI(OP_GETSOCKNAME);
7684 
7685 	case KEY_getsockopt:
7686 	    LOP(OP_GSOCKOPT,XTERM);
7687 
7688 	case KEY_getgrent:
7689 	    FUN0(OP_GGRENT);
7690 
7691 	case KEY_getgrnam:
7692 	    UNI(OP_GGRNAM);
7693 
7694 	case KEY_getgrgid:
7695 	    UNI(OP_GGRGID);
7696 
7697 	case KEY_getlogin:
7698 	    FUN0(OP_GETLOGIN);
7699 
7700 	case KEY_given:
7701 	    pl_yylval.ival = CopLINE(PL_curcop);
7702             Perl_ck_warner_d(aTHX_
7703                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7704                 "given is experimental");
7705 	    OPERATOR(GIVEN);
7706 
7707 	case KEY_glob:
7708 	    LOP(
7709 	     orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
7710 	     XTERM
7711 	    );
7712 
7713 	case KEY_hex:
7714 	    UNI(OP_HEX);
7715 
7716 	case KEY_if:
7717 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7718 		return REPORT(0);
7719 	    pl_yylval.ival = CopLINE(PL_curcop);
7720 	    OPERATOR(IF);
7721 
7722 	case KEY_index:
7723 	    LOP(OP_INDEX,XTERM);
7724 
7725 	case KEY_int:
7726 	    UNI(OP_INT);
7727 
7728 	case KEY_ioctl:
7729 	    LOP(OP_IOCTL,XTERM);
7730 
7731 	case KEY_join:
7732 	    LOP(OP_JOIN,XTERM);
7733 
7734 	case KEY_keys:
7735 	    UNI(OP_KEYS);
7736 
7737 	case KEY_kill:
7738 	    LOP(OP_KILL,XTERM);
7739 
7740 	case KEY_last:
7741 	    LOOPX(OP_LAST);
7742 
7743 	case KEY_lc:
7744 	    UNI(OP_LC);
7745 
7746 	case KEY_lcfirst:
7747 	    UNI(OP_LCFIRST);
7748 
7749 	case KEY_local:
7750 	    pl_yylval.ival = 0;
7751 	    OPERATOR(LOCAL);
7752 
7753 	case KEY_length:
7754 	    UNI(OP_LENGTH);
7755 
7756 	case KEY_lt:
7757 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7758 		return REPORT(0);
7759 	    Rop(OP_SLT);
7760 
7761 	case KEY_le:
7762 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7763 		return REPORT(0);
7764 	    Rop(OP_SLE);
7765 
7766 	case KEY_localtime:
7767 	    UNI(OP_LOCALTIME);
7768 
7769 	case KEY_log:
7770 	    UNI(OP_LOG);
7771 
7772 	case KEY_link:
7773 	    LOP(OP_LINK,XTERM);
7774 
7775 	case KEY_listen:
7776 	    LOP(OP_LISTEN,XTERM);
7777 
7778 	case KEY_lock:
7779 	    UNI(OP_LOCK);
7780 
7781 	case KEY_lstat:
7782 	    UNI(OP_LSTAT);
7783 
7784 	case KEY_m:
7785 	    s = scan_pat(s,OP_MATCH);
7786 	    TERM(sublex_start());
7787 
7788 	case KEY_map:
7789 	    LOP(OP_MAPSTART, XREF);
7790 
7791 	case KEY_mkdir:
7792 	    LOP(OP_MKDIR,XTERM);
7793 
7794 	case KEY_msgctl:
7795 	    LOP(OP_MSGCTL,XTERM);
7796 
7797 	case KEY_msgget:
7798 	    LOP(OP_MSGGET,XTERM);
7799 
7800 	case KEY_msgrcv:
7801 	    LOP(OP_MSGRCV,XTERM);
7802 
7803 	case KEY_msgsnd:
7804 	    LOP(OP_MSGSND,XTERM);
7805 
7806 	case KEY_our:
7807 	case KEY_my:
7808 	case KEY_state:
7809 	    if (PL_in_my) {
7810 	        yyerror(Perl_form(aTHX_
7811 	                          "Can't redeclare \"%s\" in \"%s\"",
7812 	                           tmp      == KEY_my    ? "my" :
7813 	                           tmp      == KEY_state ? "state" : "our",
7814 	                           PL_in_my == KEY_my    ? "my" :
7815 	                           PL_in_my == KEY_state ? "state" : "our"));
7816 	    }
7817 	    PL_in_my = (U16)tmp;
7818 	    s = skipspace(s);
7819 	    if (isIDFIRST_lazy_if(s,UTF)) {
7820 		s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7821 		if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7822 		{
7823 		    if (!FEATURE_LEXSUBS_IS_ENABLED)
7824 			Perl_croak(aTHX_
7825 				  "Experimental \"%s\" subs not enabled",
7826 				   tmp == KEY_my    ? "my"    :
7827 				   tmp == KEY_state ? "state" : "our");
7828 		    Perl_ck_warner_d(aTHX_
7829 			packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
7830 			"The lexical_subs feature is experimental");
7831 		    goto really_sub;
7832 		}
7833 		PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7834 		if (!PL_in_my_stash) {
7835 		    char tmpbuf[1024];
7836                     int len;
7837 		    PL_bufptr = s;
7838 		    len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7839                     PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
7840 		    yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7841 		}
7842 	    }
7843 	    pl_yylval.ival = 1;
7844 	    OPERATOR(MY);
7845 
7846 	case KEY_next:
7847 	    LOOPX(OP_NEXT);
7848 
7849 	case KEY_ne:
7850 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7851 		return REPORT(0);
7852 	    Eop(OP_SNE);
7853 
7854 	case KEY_no:
7855 	    s = tokenize_use(0, s);
7856 	    TOKEN(USE);
7857 
7858 	case KEY_not:
7859 	    if (*s == '(' || (s = skipspace(s), *s == '('))
7860 		FUN1(OP_NOT);
7861 	    else {
7862 		if (!PL_lex_allbrackets
7863                     && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7864                 {
7865 		    PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7866                 }
7867 		OPERATOR(NOTOP);
7868 	    }
7869 
7870 	case KEY_open:
7871 	    s = skipspace(s);
7872 	    if (isIDFIRST_lazy_if(s,UTF)) {
7873           const char *t;
7874           d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
7875               &len);
7876 		for (t=d; isSPACE(*t);)
7877 		    t++;
7878 		if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7879 		    /* [perl #16184] */
7880 		    && !(t[0] == '=' && t[1] == '>')
7881 		    && !(t[0] == ':' && t[1] == ':')
7882 		    && !keyword(s, d-s, 0)
7883 		) {
7884 		    Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7885 		       "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
7886 			UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
7887 		}
7888 	    }
7889 	    LOP(OP_OPEN,XTERM);
7890 
7891 	case KEY_or:
7892 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7893 		return REPORT(0);
7894 	    pl_yylval.ival = OP_OR;
7895 	    OPERATOR(OROP);
7896 
7897 	case KEY_ord:
7898 	    UNI(OP_ORD);
7899 
7900 	case KEY_oct:
7901 	    UNI(OP_OCT);
7902 
7903 	case KEY_opendir:
7904 	    LOP(OP_OPEN_DIR,XTERM);
7905 
7906 	case KEY_print:
7907 	    checkcomma(s,PL_tokenbuf,"filehandle");
7908 	    LOP(OP_PRINT,XREF);
7909 
7910 	case KEY_printf:
7911 	    checkcomma(s,PL_tokenbuf,"filehandle");
7912 	    LOP(OP_PRTF,XREF);
7913 
7914 	case KEY_prototype:
7915 	    UNI(OP_PROTOTYPE);
7916 
7917 	case KEY_push:
7918 	    LOP(OP_PUSH,XTERM);
7919 
7920 	case KEY_pop:
7921 	    UNIDOR(OP_POP);
7922 
7923 	case KEY_pos:
7924 	    UNIDOR(OP_POS);
7925 
7926 	case KEY_pack:
7927 	    LOP(OP_PACK,XTERM);
7928 
7929 	case KEY_package:
7930 	    s = force_word(s,WORD,FALSE,TRUE);
7931 	    s = skipspace(s);
7932 	    s = force_strict_version(s);
7933 	    PREBLOCK(PACKAGE);
7934 
7935 	case KEY_pipe:
7936 	    LOP(OP_PIPE_OP,XTERM);
7937 
7938 	case KEY_q:
7939 	    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7940 	    if (!s)
7941 		missingterm(NULL);
7942 	    COPLINE_SET_FROM_MULTI_END;
7943 	    pl_yylval.ival = OP_CONST;
7944 	    TERM(sublex_start());
7945 
7946 	case KEY_quotemeta:
7947 	    UNI(OP_QUOTEMETA);
7948 
7949 	case KEY_qw: {
7950 	    OP *words = NULL;
7951 	    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7952 	    if (!s)
7953 		missingterm(NULL);
7954 	    COPLINE_SET_FROM_MULTI_END;
7955 	    PL_expect = XOPERATOR;
7956 	    if (SvCUR(PL_lex_stuff)) {
7957 		int warned_comma = !ckWARN(WARN_QW);
7958 		int warned_comment = warned_comma;
7959 		d = SvPV_force(PL_lex_stuff, len);
7960 		while (len) {
7961 		    for (; isSPACE(*d) && len; --len, ++d)
7962 			/**/;
7963 		    if (len) {
7964 			SV *sv;
7965 			const char *b = d;
7966 			if (!warned_comma || !warned_comment) {
7967 			    for (; !isSPACE(*d) && len; --len, ++d) {
7968 				if (!warned_comma && *d == ',') {
7969 				    Perl_warner(aTHX_ packWARN(WARN_QW),
7970 					"Possible attempt to separate words with commas");
7971 				    ++warned_comma;
7972 				}
7973 				else if (!warned_comment && *d == '#') {
7974 				    Perl_warner(aTHX_ packWARN(WARN_QW),
7975 					"Possible attempt to put comments in qw() list");
7976 				    ++warned_comment;
7977 				}
7978 			    }
7979 			}
7980 			else {
7981 			    for (; !isSPACE(*d) && len; --len, ++d)
7982 				/**/;
7983 			}
7984 			sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7985 			words = op_append_elem(OP_LIST, words,
7986 					    newSVOP(OP_CONST, 0, tokeq(sv)));
7987 		    }
7988 		}
7989 	    }
7990 	    if (!words)
7991 		words = newNULLLIST();
7992 	    SvREFCNT_dec_NN(PL_lex_stuff);
7993 	    PL_lex_stuff = NULL;
7994 	    PL_expect = XOPERATOR;
7995 	    pl_yylval.opval = sawparens(words);
7996 	    TOKEN(QWLIST);
7997 	}
7998 
7999 	case KEY_qq:
8000 	    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8001 	    if (!s)
8002 		missingterm(NULL);
8003 	    pl_yylval.ival = OP_STRINGIFY;
8004 	    if (SvIVX(PL_lex_stuff) == '\'')
8005 		SvIV_set(PL_lex_stuff, 0);	/* qq'$foo' should interpolate */
8006 	    TERM(sublex_start());
8007 
8008 	case KEY_qr:
8009 	    s = scan_pat(s,OP_QR);
8010 	    TERM(sublex_start());
8011 
8012 	case KEY_qx:
8013 	    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8014 	    if (!s)
8015 		missingterm(NULL);
8016 	    pl_yylval.ival = OP_BACKTICK;
8017 	    TERM(sublex_start());
8018 
8019 	case KEY_return:
8020 	    OLDLOP(OP_RETURN);
8021 
8022 	case KEY_require:
8023 	    s = skipspace(s);
8024 	    if (isDIGIT(*s)) {
8025 		s = force_version(s, FALSE);
8026 	    }
8027 	    else if (*s != 'v' || !isDIGIT(s[1])
8028 		    || (s = force_version(s, TRUE), *s == 'v'))
8029 	    {
8030 		*PL_tokenbuf = '\0';
8031 		s = force_word(s,WORD,TRUE,TRUE);
8032 		if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8033 		    gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8034                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8035 		else if (*s == '<')
8036 		    yyerror("<> at require-statement should be quotes");
8037 	    }
8038 	    if (orig_keyword == KEY_require) {
8039 		orig_keyword = 0;
8040 		pl_yylval.ival = 1;
8041 	    }
8042 	    else
8043 		pl_yylval.ival = 0;
8044 	    PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
8045 	    PL_bufptr = s;
8046 	    PL_last_uni = PL_oldbufptr;
8047 	    PL_last_lop_op = OP_REQUIRE;
8048 	    s = skipspace(s);
8049 	    return REPORT( (int)REQUIRE );
8050 
8051 	case KEY_reset:
8052 	    UNI(OP_RESET);
8053 
8054 	case KEY_redo:
8055 	    LOOPX(OP_REDO);
8056 
8057 	case KEY_rename:
8058 	    LOP(OP_RENAME,XTERM);
8059 
8060 	case KEY_rand:
8061 	    UNI(OP_RAND);
8062 
8063 	case KEY_rmdir:
8064 	    UNI(OP_RMDIR);
8065 
8066 	case KEY_rindex:
8067 	    LOP(OP_RINDEX,XTERM);
8068 
8069 	case KEY_read:
8070 	    LOP(OP_READ,XTERM);
8071 
8072 	case KEY_readdir:
8073 	    UNI(OP_READDIR);
8074 
8075 	case KEY_readline:
8076 	    UNIDOR(OP_READLINE);
8077 
8078 	case KEY_readpipe:
8079 	    UNIDOR(OP_BACKTICK);
8080 
8081 	case KEY_rewinddir:
8082 	    UNI(OP_REWINDDIR);
8083 
8084 	case KEY_recv:
8085 	    LOP(OP_RECV,XTERM);
8086 
8087 	case KEY_reverse:
8088 	    LOP(OP_REVERSE,XTERM);
8089 
8090 	case KEY_readlink:
8091 	    UNIDOR(OP_READLINK);
8092 
8093 	case KEY_ref:
8094 	    UNI(OP_REF);
8095 
8096 	case KEY_s:
8097 	    s = scan_subst(s);
8098 	    if (pl_yylval.opval)
8099 		TERM(sublex_start());
8100 	    else
8101 		TOKEN(1);	/* force error */
8102 
8103 	case KEY_say:
8104 	    checkcomma(s,PL_tokenbuf,"filehandle");
8105 	    LOP(OP_SAY,XREF);
8106 
8107 	case KEY_chomp:
8108 	    UNI(OP_CHOMP);
8109 
8110 	case KEY_scalar:
8111 	    UNI(OP_SCALAR);
8112 
8113 	case KEY_select:
8114 	    LOP(OP_SELECT,XTERM);
8115 
8116 	case KEY_seek:
8117 	    LOP(OP_SEEK,XTERM);
8118 
8119 	case KEY_semctl:
8120 	    LOP(OP_SEMCTL,XTERM);
8121 
8122 	case KEY_semget:
8123 	    LOP(OP_SEMGET,XTERM);
8124 
8125 	case KEY_semop:
8126 	    LOP(OP_SEMOP,XTERM);
8127 
8128 	case KEY_send:
8129 	    LOP(OP_SEND,XTERM);
8130 
8131 	case KEY_setpgrp:
8132 	    LOP(OP_SETPGRP,XTERM);
8133 
8134 	case KEY_setpriority:
8135 	    LOP(OP_SETPRIORITY,XTERM);
8136 
8137 	case KEY_sethostent:
8138 	    UNI(OP_SHOSTENT);
8139 
8140 	case KEY_setnetent:
8141 	    UNI(OP_SNETENT);
8142 
8143 	case KEY_setservent:
8144 	    UNI(OP_SSERVENT);
8145 
8146 	case KEY_setprotoent:
8147 	    UNI(OP_SPROTOENT);
8148 
8149 	case KEY_setpwent:
8150 	    FUN0(OP_SPWENT);
8151 
8152 	case KEY_setgrent:
8153 	    FUN0(OP_SGRENT);
8154 
8155 	case KEY_seekdir:
8156 	    LOP(OP_SEEKDIR,XTERM);
8157 
8158 	case KEY_setsockopt:
8159 	    LOP(OP_SSOCKOPT,XTERM);
8160 
8161 	case KEY_shift:
8162 	    UNIDOR(OP_SHIFT);
8163 
8164 	case KEY_shmctl:
8165 	    LOP(OP_SHMCTL,XTERM);
8166 
8167 	case KEY_shmget:
8168 	    LOP(OP_SHMGET,XTERM);
8169 
8170 	case KEY_shmread:
8171 	    LOP(OP_SHMREAD,XTERM);
8172 
8173 	case KEY_shmwrite:
8174 	    LOP(OP_SHMWRITE,XTERM);
8175 
8176 	case KEY_shutdown:
8177 	    LOP(OP_SHUTDOWN,XTERM);
8178 
8179 	case KEY_sin:
8180 	    UNI(OP_SIN);
8181 
8182 	case KEY_sleep:
8183 	    UNI(OP_SLEEP);
8184 
8185 	case KEY_socket:
8186 	    LOP(OP_SOCKET,XTERM);
8187 
8188 	case KEY_socketpair:
8189 	    LOP(OP_SOCKPAIR,XTERM);
8190 
8191 	case KEY_sort:
8192 	    checkcomma(s,PL_tokenbuf,"subroutine name");
8193 	    s = skipspace(s);
8194 	    PL_expect = XTERM;
8195 	    s = force_word(s,WORD,TRUE,TRUE);
8196 	    LOP(OP_SORT,XREF);
8197 
8198 	case KEY_split:
8199 	    LOP(OP_SPLIT,XTERM);
8200 
8201 	case KEY_sprintf:
8202 	    LOP(OP_SPRINTF,XTERM);
8203 
8204 	case KEY_splice:
8205 	    LOP(OP_SPLICE,XTERM);
8206 
8207 	case KEY_sqrt:
8208 	    UNI(OP_SQRT);
8209 
8210 	case KEY_srand:
8211 	    UNI(OP_SRAND);
8212 
8213 	case KEY_stat:
8214 	    UNI(OP_STAT);
8215 
8216 	case KEY_study:
8217 	    UNI(OP_STUDY);
8218 
8219 	case KEY_substr:
8220 	    LOP(OP_SUBSTR,XTERM);
8221 
8222 	case KEY_format:
8223 	case KEY_sub:
8224 	  really_sub:
8225 	    {
8226 		char * const tmpbuf = PL_tokenbuf + 1;
8227 		expectation attrful;
8228 		bool have_name, have_proto;
8229 		const int key = tmp;
8230                 SV *format_name = NULL;
8231 
8232 		d = s;
8233 		s = skipspace(s);
8234 
8235 		if (isIDFIRST_lazy_if(s,UTF)
8236                     || *s == '\''
8237                     || (*s == ':' && s[1] == ':'))
8238 		{
8239 
8240 		    PL_expect = XBLOCK;
8241 		    attrful = XATTRBLOCK;
8242 		    d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8243 				  &len);
8244                     if (key == KEY_format)
8245 			format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8246 		    *PL_tokenbuf = '&';
8247 		    if (memchr(tmpbuf, ':', len) || key != KEY_sub
8248 		     || pad_findmy_pvn(
8249 			    PL_tokenbuf, len + 1, 0
8250 			) != NOT_IN_PAD)
8251 			sv_setpvn(PL_subname, tmpbuf, len);
8252 		    else {
8253 			sv_setsv(PL_subname,PL_curstname);
8254 			sv_catpvs(PL_subname,"::");
8255 			sv_catpvn(PL_subname,tmpbuf,len);
8256 		    }
8257                     if (SvUTF8(PL_linestr))
8258                         SvUTF8_on(PL_subname);
8259 		    have_name = TRUE;
8260 
8261 
8262 		    s = skipspace(d);
8263 		}
8264 		else {
8265 		    if (key == KEY_my || key == KEY_our || key==KEY_state)
8266 		    {
8267 			*d = '\0';
8268 			/* diag_listed_as: Missing name in "%s sub" */
8269 			Perl_croak(aTHX_
8270 				  "Missing name in \"%s\"", PL_bufptr);
8271 		    }
8272 		    PL_expect = XTERMBLOCK;
8273 		    attrful = XATTRTERM;
8274 		    sv_setpvs(PL_subname,"?");
8275 		    have_name = FALSE;
8276 		}
8277 
8278 		if (key == KEY_format) {
8279 		    if (format_name) {
8280                         NEXTVAL_NEXTTOKE.opval
8281                             = (OP*)newSVOP(OP_CONST,0, format_name);
8282                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8283                         force_next(WORD);
8284                     }
8285 		    PREBLOCK(FORMAT);
8286 		}
8287 
8288 		/* Look for a prototype */
8289 		if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8290 		    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8291 		    COPLINE_SET_FROM_MULTI_END;
8292 		    if (!s)
8293 			Perl_croak(aTHX_ "Prototype not terminated");
8294 		    (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8295 		    have_proto = TRUE;
8296 
8297 		    s = skipspace(s);
8298 		}
8299 		else
8300 		    have_proto = FALSE;
8301 
8302 		if (*s == ':' && s[1] != ':')
8303 		    PL_expect = attrful;
8304 		else if ((*s != '{' && *s != '(') && key != KEY_format) {
8305                     assert(key == KEY_sub || key == KEY_AUTOLOAD ||
8306                            key == KEY_DESTROY || key == KEY_BEGIN ||
8307                            key == KEY_UNITCHECK || key == KEY_CHECK ||
8308                            key == KEY_INIT || key == KEY_END ||
8309                            key == KEY_my || key == KEY_state ||
8310                            key == KEY_our);
8311 		    if (!have_name)
8312 			Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8313 		    else if (*s != ';' && *s != '}')
8314 			Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8315 		}
8316 
8317 		if (have_proto) {
8318 		    NEXTVAL_NEXTTOKE.opval =
8319 			(OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8320 		    PL_lex_stuff = NULL;
8321 		    force_next(THING);
8322 		}
8323 		if (!have_name) {
8324 		    if (PL_curstash)
8325 			sv_setpvs(PL_subname, "__ANON__");
8326 		    else
8327 			sv_setpvs(PL_subname, "__ANON__::__ANON__");
8328 		    TOKEN(ANONSUB);
8329 		}
8330 		force_ident_maybe_lex('&');
8331 		TOKEN(SUB);
8332 	    }
8333 
8334 	case KEY_system:
8335 	    LOP(OP_SYSTEM,XREF);
8336 
8337 	case KEY_symlink:
8338 	    LOP(OP_SYMLINK,XTERM);
8339 
8340 	case KEY_syscall:
8341 	    LOP(OP_SYSCALL,XTERM);
8342 
8343 	case KEY_sysopen:
8344 	    LOP(OP_SYSOPEN,XTERM);
8345 
8346 	case KEY_sysseek:
8347 	    LOP(OP_SYSSEEK,XTERM);
8348 
8349 	case KEY_sysread:
8350 	    LOP(OP_SYSREAD,XTERM);
8351 
8352 	case KEY_syswrite:
8353 	    LOP(OP_SYSWRITE,XTERM);
8354 
8355 	case KEY_tr:
8356 	case KEY_y:
8357 	    s = scan_trans(s);
8358 	    TERM(sublex_start());
8359 
8360 	case KEY_tell:
8361 	    UNI(OP_TELL);
8362 
8363 	case KEY_telldir:
8364 	    UNI(OP_TELLDIR);
8365 
8366 	case KEY_tie:
8367 	    LOP(OP_TIE,XTERM);
8368 
8369 	case KEY_tied:
8370 	    UNI(OP_TIED);
8371 
8372 	case KEY_time:
8373 	    FUN0(OP_TIME);
8374 
8375 	case KEY_times:
8376 	    FUN0(OP_TMS);
8377 
8378 	case KEY_truncate:
8379 	    LOP(OP_TRUNCATE,XTERM);
8380 
8381 	case KEY_uc:
8382 	    UNI(OP_UC);
8383 
8384 	case KEY_ucfirst:
8385 	    UNI(OP_UCFIRST);
8386 
8387 	case KEY_untie:
8388 	    UNI(OP_UNTIE);
8389 
8390 	case KEY_until:
8391 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8392 		return REPORT(0);
8393 	    pl_yylval.ival = CopLINE(PL_curcop);
8394 	    OPERATOR(UNTIL);
8395 
8396 	case KEY_unless:
8397 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8398 		return REPORT(0);
8399 	    pl_yylval.ival = CopLINE(PL_curcop);
8400 	    OPERATOR(UNLESS);
8401 
8402 	case KEY_unlink:
8403 	    LOP(OP_UNLINK,XTERM);
8404 
8405 	case KEY_undef:
8406 	    UNIDOR(OP_UNDEF);
8407 
8408 	case KEY_unpack:
8409 	    LOP(OP_UNPACK,XTERM);
8410 
8411 	case KEY_utime:
8412 	    LOP(OP_UTIME,XTERM);
8413 
8414 	case KEY_umask:
8415 	    UNIDOR(OP_UMASK);
8416 
8417 	case KEY_unshift:
8418 	    LOP(OP_UNSHIFT,XTERM);
8419 
8420 	case KEY_use:
8421 	    s = tokenize_use(1, s);
8422 	    TOKEN(USE);
8423 
8424 	case KEY_values:
8425 	    UNI(OP_VALUES);
8426 
8427 	case KEY_vec:
8428 	    LOP(OP_VEC,XTERM);
8429 
8430 	case KEY_when:
8431 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8432 		return REPORT(0);
8433 	    pl_yylval.ival = CopLINE(PL_curcop);
8434             Perl_ck_warner_d(aTHX_
8435                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8436                 "when is experimental");
8437 	    OPERATOR(WHEN);
8438 
8439 	case KEY_while:
8440 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8441 		return REPORT(0);
8442 	    pl_yylval.ival = CopLINE(PL_curcop);
8443 	    OPERATOR(WHILE);
8444 
8445 	case KEY_warn:
8446 	    PL_hints |= HINT_BLOCK_SCOPE;
8447 	    LOP(OP_WARN,XTERM);
8448 
8449 	case KEY_wait:
8450 	    FUN0(OP_WAIT);
8451 
8452 	case KEY_waitpid:
8453 	    LOP(OP_WAITPID,XTERM);
8454 
8455 	case KEY_wantarray:
8456 	    FUN0(OP_WANTARRAY);
8457 
8458 	case KEY_write:
8459             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8460              * we use the same number on EBCDIC */
8461 	    gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8462 	    UNI(OP_ENTERWRITE);
8463 
8464 	case KEY_x:
8465 	    if (PL_expect == XOPERATOR) {
8466 		if (*s == '=' && !PL_lex_allbrackets
8467                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8468                 {
8469 		    return REPORT(0);
8470                 }
8471 		Mop(OP_REPEAT);
8472 	    }
8473 	    check_uni();
8474 	    goto just_a_word;
8475 
8476 	case KEY_xor:
8477 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8478 		return REPORT(0);
8479 	    pl_yylval.ival = OP_XOR;
8480 	    OPERATOR(OROP);
8481 	}
8482     }}
8483 }
8484 
8485 /*
8486   S_pending_ident
8487 
8488   Looks up an identifier in the pad or in a package
8489 
8490   Returns:
8491     PRIVATEREF if this is a lexical name.
8492     WORD       if this belongs to a package.
8493 
8494   Structure:
8495       if we're in a my declaration
8496 	  croak if they tried to say my($foo::bar)
8497 	  build the ops for a my() declaration
8498       if it's an access to a my() variable
8499 	  build ops for access to a my() variable
8500       if in a dq string, and they've said @foo and we can't find @foo
8501 	  warn
8502       build ops for a bareword
8503 */
8504 
8505 static int
8506 S_pending_ident(pTHX)
8507 {
8508     PADOFFSET tmp = 0;
8509     const char pit = (char)pl_yylval.ival;
8510     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8511     /* All routes through this function want to know if there is a colon.  */
8512     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8513 
8514     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8515           "### Pending identifier '%s'\n", PL_tokenbuf); });
8516 
8517     /* if we're in a my(), we can't allow dynamics here.
8518        $foo'bar has already been turned into $foo::bar, so
8519        just check for colons.
8520 
8521        if it's a legal name, the OP is a PADANY.
8522     */
8523     if (PL_in_my) {
8524         if (PL_in_my == KEY_our) {	/* "our" is merely analogous to "my" */
8525             if (has_colon)
8526                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8527                                   "variable %s in \"our\"",
8528                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8529             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8530         }
8531         else {
8532             if (has_colon) {
8533                 /* "my" variable %s can't be in a package */
8534                 /* PL_no_myglob is constant */
8535                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8536                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8537                             PL_in_my == KEY_my ? "my" : "state",
8538                             *PL_tokenbuf == '&' ? "subroutin" : "variabl",
8539                             PL_tokenbuf),
8540                             UTF ? SVf_UTF8 : 0);
8541                 GCC_DIAG_RESTORE;
8542             }
8543 
8544             pl_yylval.opval = newOP(OP_PADANY, 0);
8545             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8546                                                         UTF ? SVf_UTF8 : 0);
8547 	    return PRIVATEREF;
8548         }
8549     }
8550 
8551     /*
8552        build the ops for accesses to a my() variable.
8553     */
8554 
8555     if (!has_colon) {
8556 	if (!PL_in_my)
8557 	    tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8558                                  0);
8559         if (tmp != NOT_IN_PAD) {
8560             /* might be an "our" variable" */
8561             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8562                 /* build ops for a bareword */
8563 		HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8564 		HEK * const stashname = HvNAME_HEK(stash);
8565 		SV *  const sym = newSVhek(stashname);
8566                 sv_catpvs(sym, "::");
8567                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8568                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8569                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8570                 if (pit != '&')
8571                   gv_fetchsv(sym,
8572                     GV_ADDMULTI,
8573                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8574                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8575                      : SVt_PVHV));
8576                 return WORD;
8577             }
8578 
8579             pl_yylval.opval = newOP(OP_PADANY, 0);
8580             pl_yylval.opval->op_targ = tmp;
8581             return PRIVATEREF;
8582         }
8583     }
8584 
8585     /*
8586        Whine if they've said @foo in a doublequoted string,
8587        and @foo isn't a variable we can find in the symbol
8588        table.
8589     */
8590     if (ckWARN(WARN_AMBIGUOUS)
8591         && pit == '@'
8592         && PL_lex_state != LEX_NORMAL
8593         && !PL_lex_brackets)
8594     {
8595         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8596                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8597         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8598 		/* DO NOT warn for @- and @+ */
8599 		&& !( PL_tokenbuf[2] == '\0'
8600                       && ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8601 	   )
8602         {
8603             /* Downgraded from fatal to warning 20000522 mjd */
8604             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8605 			"Possible unintended interpolation of %"UTF8f
8606 			" in string",
8607 			UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8608         }
8609     }
8610 
8611     /* build ops for a bareword */
8612     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8613 				   newSVpvn_flags(PL_tokenbuf + 1,
8614 						      tokenbuf_len - 1,
8615                                                       UTF ? SVf_UTF8 : 0 ));
8616     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8617     if (pit != '&')
8618 	gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8619 		     (PL_in_eval ? GV_ADDMULTI : GV_ADD)
8620                      | ( UTF ? SVf_UTF8 : 0 ),
8621 		     ((PL_tokenbuf[0] == '$') ? SVt_PV
8622 		      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8623 		      : SVt_PVHV));
8624     return WORD;
8625 }
8626 
8627 STATIC void
8628 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8629 {
8630     PERL_ARGS_ASSERT_CHECKCOMMA;
8631 
8632     if (*s == ' ' && s[1] == '(') {	/* XXX gotta be a better way */
8633 	if (ckWARN(WARN_SYNTAX)) {
8634 	    int level = 1;
8635 	    const char *w;
8636 	    for (w = s+2; *w && level; w++) {
8637 		if (*w == '(')
8638 		    ++level;
8639 		else if (*w == ')')
8640 		    --level;
8641 	    }
8642 	    while (isSPACE(*w))
8643 		++w;
8644 	    /* the list of chars below is for end of statements or
8645 	     * block / parens, boolean operators (&&, ||, //) and branch
8646 	     * constructs (or, and, if, until, unless, while, err, for).
8647 	     * Not a very solid hack... */
8648 	    if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8649 		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8650 			    "%s (...) interpreted as function",name);
8651 	}
8652     }
8653     while (s < PL_bufend && isSPACE(*s))
8654 	s++;
8655     if (*s == '(')
8656 	s++;
8657     while (s < PL_bufend && isSPACE(*s))
8658 	s++;
8659     if (isIDFIRST_lazy_if(s,UTF)) {
8660 	const char * const w = s;
8661         s += UTF ? UTF8SKIP(s) : 1;
8662 	while (isWORDCHAR_lazy_if(s,UTF))
8663 	    s += UTF ? UTF8SKIP(s) : 1;
8664 	while (s < PL_bufend && isSPACE(*s))
8665 	    s++;
8666 	if (*s == ',') {
8667 	    GV* gv;
8668 	    PADOFFSET off;
8669 	    if (keyword(w, s - w, 0))
8670 		return;
8671 
8672 	    gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8673 	    if (gv && GvCVu(gv))
8674 		return;
8675 	    if (s - w <= 254) {
8676 		char tmpbuf[256];
8677 		Copy(w, tmpbuf+1, s - w, char);
8678 		*tmpbuf = '&';
8679 		off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
8680 		if (off != NOT_IN_PAD) return;
8681 	    }
8682 	    Perl_croak(aTHX_ "No comma allowed after %s", what);
8683 	}
8684     }
8685 }
8686 
8687 /* S_new_constant(): do any overload::constant lookup.
8688 
8689    Either returns sv, or mortalizes/frees sv and returns a new SV*.
8690    Best used as sv=new_constant(..., sv, ...).
8691    If s, pv are NULL, calls subroutine with one argument,
8692    and <type> is used with error messages only.
8693    <type> is assumed to be well formed UTF-8 */
8694 
8695 STATIC SV *
8696 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8697 	       SV *sv, SV *pv, const char *type, STRLEN typelen)
8698 {
8699     dSP;
8700     HV * table = GvHV(PL_hintgv);		 /* ^H */
8701     SV *res;
8702     SV *errsv = NULL;
8703     SV **cvp;
8704     SV *cv, *typesv;
8705     const char *why1 = "", *why2 = "", *why3 = "";
8706 
8707     PERL_ARGS_ASSERT_NEW_CONSTANT;
8708     /* We assume that this is true: */
8709     if (*key == 'c') { assert (strEQ(key, "charnames")); }
8710     assert(type || s);
8711 
8712     /* charnames doesn't work well if there have been errors found */
8713     if (PL_error_count > 0 && *key == 'c')
8714     {
8715 	SvREFCNT_dec_NN(sv);
8716 	return &PL_sv_undef;
8717     }
8718 
8719     sv_2mortal(sv);			/* Parent created it permanently */
8720     if (!table
8721 	|| ! (PL_hints & HINT_LOCALIZE_HH)
8722 	|| ! (cvp = hv_fetch(table, key, keylen, FALSE))
8723 	|| ! SvOK(*cvp))
8724     {
8725 	char *msg;
8726 
8727 	/* Here haven't found what we're looking for.  If it is charnames,
8728 	 * perhaps it needs to be loaded.  Try doing that before giving up */
8729 	if (*key == 'c') {
8730 	    Perl_load_module(aTHX_
8731 		            0,
8732 			    newSVpvs("_charnames"),
8733 			     /* version parameter; no need to specify it, as if
8734 			      * we get too early a version, will fail anyway,
8735 			      * not being able to find '_charnames' */
8736 			    NULL,
8737 			    newSVpvs(":full"),
8738 			    newSVpvs(":short"),
8739 			    NULL);
8740             assert(sp == PL_stack_sp);
8741 	    table = GvHV(PL_hintgv);
8742 	    if (table
8743 		&& (PL_hints & HINT_LOCALIZE_HH)
8744 		&& (cvp = hv_fetch(table, key, keylen, FALSE))
8745 		&& SvOK(*cvp))
8746 	    {
8747 		goto now_ok;
8748 	    }
8749 	}
8750 	if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8751 	    msg = Perl_form(aTHX_
8752 			       "Constant(%.*s) unknown",
8753 				(int)(type ? typelen : len),
8754 				(type ? type: s));
8755 	}
8756 	else {
8757             why1 = "$^H{";
8758             why2 = key;
8759             why3 = "} is not defined";
8760         report:
8761             if (*key == 'c') {
8762                 msg = Perl_form(aTHX_
8763                             /* The +3 is for '\N{'; -4 for that, plus '}' */
8764                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
8765                       );
8766             }
8767             else {
8768                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
8769                                     (int)(type ? typelen : len),
8770                                     (type ? type: s), why1, why2, why3);
8771             }
8772         }
8773 	yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
8774   	return SvREFCNT_inc_simple_NN(sv);
8775     }
8776   now_ok:
8777     cv = *cvp;
8778     if (!pv && s)
8779   	pv = newSVpvn_flags(s, len, SVs_TEMP);
8780     if (type && pv)
8781   	typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8782     else
8783   	typesv = &PL_sv_undef;
8784 
8785     PUSHSTACKi(PERLSI_OVERLOAD);
8786     ENTER ;
8787     SAVETMPS;
8788 
8789     PUSHMARK(SP) ;
8790     EXTEND(sp, 3);
8791     if (pv)
8792  	PUSHs(pv);
8793     PUSHs(sv);
8794     if (pv)
8795  	PUSHs(typesv);
8796     PUTBACK;
8797     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8798 
8799     SPAGAIN ;
8800 
8801     /* Check the eval first */
8802     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
8803 	STRLEN errlen;
8804 	const char * errstr;
8805 	sv_catpvs(errsv, "Propagated");
8806 	errstr = SvPV_const(errsv, errlen);
8807 	yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
8808 	(void)POPs;
8809 	res = SvREFCNT_inc_simple_NN(sv);
8810     }
8811     else {
8812  	res = POPs;
8813 	SvREFCNT_inc_simple_void_NN(res);
8814     }
8815 
8816     PUTBACK ;
8817     FREETMPS ;
8818     LEAVE ;
8819     POPSTACK;
8820 
8821     if (!SvOK(res)) {
8822  	why1 = "Call to &{$^H{";
8823  	why2 = key;
8824  	why3 = "}} did not return a defined value";
8825  	sv = res;
8826 	(void)sv_2mortal(sv);
8827  	goto report;
8828     }
8829 
8830     return res;
8831 }
8832 
8833 PERL_STATIC_INLINE void
8834 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
8835     PERL_ARGS_ASSERT_PARSE_IDENT;
8836 
8837     for (;;) {
8838         if (*d >= e)
8839             Perl_croak(aTHX_ "%s", ident_too_long);
8840         if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
8841              /* The UTF-8 case must come first, otherwise things
8842              * like c\N{COMBINING TILDE} would start failing, as the
8843              * isWORDCHAR_A case below would gobble the 'c' up.
8844              */
8845 
8846             char *t = *s + UTF8SKIP(*s);
8847             while (isIDCONT_utf8((U8*)t))
8848                 t += UTF8SKIP(t);
8849             if (*d + (t - *s) > e)
8850                 Perl_croak(aTHX_ "%s", ident_too_long);
8851             Copy(*s, *d, t - *s, char);
8852             *d += t - *s;
8853             *s = t;
8854         }
8855         else if ( isWORDCHAR_A(**s) ) {
8856             do {
8857                 *(*d)++ = *(*s)++;
8858             } while (isWORDCHAR_A(**s) && *d < e);
8859         }
8860         else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
8861             *(*d)++ = ':';
8862             *(*d)++ = ':';
8863             (*s)++;
8864         }
8865         else if (allow_package && **s == ':' && (*s)[1] == ':'
8866            /* Disallow things like Foo::$bar. For the curious, this is
8867             * the code path that triggers the "Bad name after" warning
8868             * when looking for barewords.
8869             */
8870            && (*s)[2] != '$') {
8871             *(*d)++ = *(*s)++;
8872             *(*d)++ = *(*s)++;
8873         }
8874         else
8875             break;
8876     }
8877     return;
8878 }
8879 
8880 /* Returns a NUL terminated string, with the length of the string written to
8881    *slp
8882    */
8883 STATIC char *
8884 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8885 {
8886     char *d = dest;
8887     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8888     bool is_utf8 = cBOOL(UTF);
8889 
8890     PERL_ARGS_ASSERT_SCAN_WORD;
8891 
8892     parse_ident(&s, &d, e, allow_package, is_utf8);
8893     *d = '\0';
8894     *slp = d - dest;
8895     return s;
8896 }
8897 
8898 /* Is the byte 'd' a legal single character identifier name?  'u' is true
8899  * iff Unicode semantics are to be used.  The legal ones are any of:
8900  *  a) all ASCII characters except:
8901  *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
8902  *          2) '{'
8903  *     The final case currently doesn't get this far in the program, so we
8904  *     don't test for it.  If that were to change, it would be ok to allow it.
8905  *  c) When not under Unicode rules, any upper Latin1 character
8906  *  d) Otherwise, when unicode rules are used, all XIDS characters.
8907  *
8908  *      Because all ASCII characters have the same representation whether
8909  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
8910  *      '{' without knowing if is UTF-8 or not.
8911  * EBCDIC already uses the rules that ASCII platforms will use after the
8912  * deprecation cycle; see comment below about the deprecation. */
8913 #ifdef EBCDIC
8914 #   define VALID_LEN_ONE_IDENT(s, is_utf8)                                    \
8915     (isGRAPH_A(*(s)) || ((is_utf8)                                            \
8916                          ? isIDFIRST_utf8((U8*) (s))                          \
8917                          : (isGRAPH_L1(*s)                                    \
8918                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
8919 #else
8920 #   define VALID_LEN_ONE_IDENT(s, is_utf8)                                    \
8921     (isGRAPH_A(*(s)) || ((is_utf8)                                            \
8922                          ? isIDFIRST_utf8((U8*) (s))                          \
8923                          : ! isASCII_utf8((U8*) (s))))
8924 #endif
8925 
8926 STATIC char *
8927 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
8928 {
8929     I32 herelines = PL_parser->herelines;
8930     SSize_t bracket = -1;
8931     char funny = *s++;
8932     char *d = dest;
8933     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8934     bool is_utf8 = cBOOL(UTF);
8935     I32 orig_copline = 0, tmp_copline = 0;
8936 
8937     PERL_ARGS_ASSERT_SCAN_IDENT;
8938 
8939     if (isSPACE(*s) || !*s)
8940 	s = skipspace(s);
8941     if (isDIGIT(*s)) {
8942 	while (isDIGIT(*s)) {
8943 	    if (d >= e)
8944 		Perl_croak(aTHX_ "%s", ident_too_long);
8945 	    *d++ = *s++;
8946 	}
8947     }
8948     else {  /* See if it is a "normal" identifier */
8949         parse_ident(&s, &d, e, 1, is_utf8);
8950     }
8951     *d = '\0';
8952     d = dest;
8953     if (*d) {
8954         /* Either a digit variable, or parse_ident() found an identifier
8955            (anything valid as a bareword), so job done and return.  */
8956 	if (PL_lex_state != LEX_NORMAL)
8957 	    PL_lex_state = LEX_INTERPENDMAYBE;
8958 	return s;
8959     }
8960 
8961     /* Here, it is not a run-of-the-mill identifier name */
8962 
8963     if (*s == '$' && s[1]
8964         && (isIDFIRST_lazy_if(s+1,is_utf8)
8965             || isDIGIT_A((U8)s[1])
8966             || s[1] == '$'
8967             || s[1] == '{'
8968             || strnEQ(s+1,"::",2)) )
8969     {
8970         /* Dereferencing a value in a scalar variable.
8971            The alternatives are different syntaxes for a scalar variable.
8972            Using ' as a leading package separator isn't allowed. :: is.   */
8973 	return s;
8974     }
8975     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
8976     if (*s == '{') {
8977 	bracket = s - SvPVX(PL_linestr);
8978 	s++;
8979 	orig_copline = CopLINE(PL_curcop);
8980         if (s < PL_bufend && isSPACE(*s)) {
8981             s = skipspace(s);
8982         }
8983     }
8984     if ((s <= PL_bufend - (is_utf8)
8985                           ? UTF8SKIP(s)
8986                           : 1)
8987         && VALID_LEN_ONE_IDENT(s, is_utf8))
8988     {
8989         /* Deprecate all non-graphic characters.  Include SHY as a non-graphic,
8990          * because often it has no graphic representation.  (We can't get to
8991          * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
8992          * test for it.) */
8993         if ((is_utf8)
8994             ? ! isGRAPH_utf8( (U8*) s)
8995             : (! isGRAPH_L1( (U8) *s)
8996                || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
8997         {
8998             deprecate("literal non-graphic characters in variable names");
8999         }
9000 
9001         if (is_utf8) {
9002             const STRLEN skip = UTF8SKIP(s);
9003             STRLEN i;
9004             d[skip] = '\0';
9005             for ( i = 0; i < skip; i++ )
9006                 d[i] = *s++;
9007         }
9008         else {
9009             *d = *s++;
9010             d[1] = '\0';
9011         }
9012     }
9013     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9014     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9015 	*d = toCTRL(*s);
9016 	s++;
9017     }
9018     /* Warn about ambiguous code after unary operators if {...} notation isn't
9019        used.  There's no difference in ambiguity; it's merely a heuristic
9020        about when not to warn.  */
9021     else if (ck_uni && bracket == -1)
9022 	check_uni();
9023     if (bracket != -1) {
9024         /* If we were processing {...} notation then...  */
9025 	if (isIDFIRST_lazy_if(d,is_utf8)) {
9026             /* if it starts as a valid identifier, assume that it is one.
9027                (the later check for } being at the expected point will trap
9028                cases where this doesn't pan out.)  */
9029             d += is_utf8 ? UTF8SKIP(d) : 1;
9030             parse_ident(&s, &d, e, 1, is_utf8);
9031 	    *d = '\0';
9032             tmp_copline = CopLINE(PL_curcop);
9033             if (s < PL_bufend && isSPACE(*s)) {
9034                 s = skipspace(s);
9035             }
9036 	    if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9037                 /* ${foo[0]} and ${foo{bar}} notation.  */
9038 		if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9039 		    const char * const brack =
9040 			(const char *)
9041 			((*s == '[') ? "[...]" : "{...}");
9042                     orig_copline = CopLINE(PL_curcop);
9043                     CopLINE_set(PL_curcop, tmp_copline);
9044    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9045 		    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9046 			"Ambiguous use of %c{%s%s} resolved to %c%s%s",
9047 			funny, dest, brack, funny, dest, brack);
9048                     CopLINE_set(PL_curcop, orig_copline);
9049 		}
9050 		bracket++;
9051 		PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9052 		PL_lex_allbrackets++;
9053 		return s;
9054 	    }
9055 	}
9056 	/* Handle extended ${^Foo} variables
9057 	 * 1999-02-27 mjd-perl-patch@plover.com */
9058 	else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9059 		 && isWORDCHAR(*s))
9060 	{
9061 	    d++;
9062 	    while (isWORDCHAR(*s) && d < e) {
9063 		*d++ = *s++;
9064 	    }
9065 	    if (d >= e)
9066 		Perl_croak(aTHX_ "%s", ident_too_long);
9067 	    *d = '\0';
9068 	}
9069 
9070         if ( !tmp_copline )
9071             tmp_copline = CopLINE(PL_curcop);
9072         if (s < PL_bufend && isSPACE(*s)) {
9073             s = skipspace(s);
9074         }
9075 
9076         /* Expect to find a closing } after consuming any trailing whitespace.
9077          */
9078 	if (*s == '}') {
9079 	    s++;
9080 	    if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9081 		PL_lex_state = LEX_INTERPEND;
9082 		PL_expect = XREF;
9083 	    }
9084 	    if (PL_lex_state == LEX_NORMAL) {
9085 		if (ckWARN(WARN_AMBIGUOUS)
9086                     && (keyword(dest, d - dest, 0)
9087 		        || get_cvn_flags(dest, d - dest, is_utf8
9088                            ? SVf_UTF8
9089                            : 0)))
9090 		{
9091                     SV *tmp = newSVpvn_flags( dest, d - dest,
9092                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9093 		    if (funny == '#')
9094 			funny = '@';
9095                     orig_copline = CopLINE(PL_curcop);
9096                     CopLINE_set(PL_curcop, tmp_copline);
9097 		    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9098 			"Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9099 			funny, SVfARG(tmp), funny, SVfARG(tmp));
9100                     CopLINE_set(PL_curcop, orig_copline);
9101 		}
9102 	    }
9103 	}
9104 	else {
9105             /* Didn't find the closing } at the point we expected, so restore
9106                state such that the next thing to process is the opening { and */
9107 	    s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9108             CopLINE_set(PL_curcop, orig_copline);
9109             PL_parser->herelines = herelines;
9110 	    *dest = '\0';
9111 	}
9112     }
9113     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9114 	PL_lex_state = LEX_INTERPEND;
9115     return s;
9116 }
9117 
9118 static bool
9119 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
9120 
9121     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
9122      * found in the parse starting at 's', based on the subset that are valid
9123      * in this context input to this routine in 'valid_flags'. Advances s.
9124      * Returns TRUE if the input should be treated as a valid flag, so the next
9125      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
9126      * upon first call on the current regex.  This routine will set it to any
9127      * charset modifier found.  The caller shouldn't change it.  This way,
9128      * another charset modifier encountered in the parse can be detected as an
9129      * error, as we have decided to allow only one */
9130 
9131     const char c = **s;
9132     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9133 
9134     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9135         if (isWORDCHAR_lazy_if(*s, UTF)) {
9136             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9137                        UTF ? SVf_UTF8 : 0);
9138             (*s) += charlen;
9139             /* Pretend that it worked, so will continue processing before
9140              * dieing */
9141             return TRUE;
9142         }
9143         return FALSE;
9144     }
9145 
9146     switch (c) {
9147 
9148         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
9149         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9150         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9151         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9152         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9153         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9154 	case LOCALE_PAT_MOD:
9155 	    if (*charset) {
9156 		goto multiple_charsets;
9157 	    }
9158 	    set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9159 	    *charset = c;
9160 	    break;
9161 	case UNICODE_PAT_MOD:
9162 	    if (*charset) {
9163 		goto multiple_charsets;
9164 	    }
9165 	    set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9166 	    *charset = c;
9167 	    break;
9168 	case ASCII_RESTRICT_PAT_MOD:
9169 	    if (! *charset) {
9170 		set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9171 	    }
9172 	    else {
9173 
9174 		/* Error if previous modifier wasn't an 'a', but if it was, see
9175 		 * if, and accept, a second occurrence (only) */
9176 		if (*charset != 'a'
9177 		    || get_regex_charset(*pmfl)
9178 			!= REGEX_ASCII_RESTRICTED_CHARSET)
9179 		{
9180 			goto multiple_charsets;
9181 		}
9182 		set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9183 	    }
9184 	    *charset = c;
9185 	    break;
9186 	case DEPENDS_PAT_MOD:
9187 	    if (*charset) {
9188 		goto multiple_charsets;
9189 	    }
9190 	    set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9191 	    *charset = c;
9192 	    break;
9193     }
9194 
9195     (*s)++;
9196     return TRUE;
9197 
9198     multiple_charsets:
9199 	if (*charset != c) {
9200 	    yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9201 	}
9202 	else if (c == 'a') {
9203   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9204 	    yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9205 	}
9206 	else {
9207 	    yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9208 	}
9209 
9210 	/* Pretend that it worked, so will continue processing before dieing */
9211 	(*s)++;
9212 	return TRUE;
9213 }
9214 
9215 STATIC char *
9216 S_scan_pat(pTHX_ char *start, I32 type)
9217 {
9218     PMOP *pm;
9219     char *s;
9220     const char * const valid_flags =
9221 	(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9222     char charset = '\0';    /* character set modifier */
9223     unsigned int x_mod_count = 0;
9224 
9225     PERL_ARGS_ASSERT_SCAN_PAT;
9226 
9227     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
9228     if (!s)
9229 	Perl_croak(aTHX_ "Search pattern not terminated");
9230 
9231     pm = (PMOP*)newPMOP(type, 0);
9232     if (PL_multi_open == '?') {
9233 	/* This is the only point in the code that sets PMf_ONCE:  */
9234 	pm->op_pmflags |= PMf_ONCE;
9235 
9236 	/* Hence it's safe to do this bit of PMOP book-keeping here, which
9237 	   allows us to restrict the list needed by reset to just the ??
9238 	   matches.  */
9239 	assert(type != OP_TRANS);
9240 	if (PL_curstash) {
9241 	    MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9242 	    U32 elements;
9243 	    if (!mg) {
9244 		mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9245 				 0);
9246 	    }
9247 	    elements = mg->mg_len / sizeof(PMOP**);
9248 	    Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9249 	    ((PMOP**)mg->mg_ptr) [elements++] = pm;
9250 	    mg->mg_len = elements * sizeof(PMOP**);
9251 	    PmopSTASH_set(pm,PL_curstash);
9252 	}
9253     }
9254 
9255     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9256      * anon CV. False positives like qr/[(?{]/ are harmless */
9257 
9258     if (type == OP_QR) {
9259 	STRLEN len;
9260 	char *e, *p = SvPV(PL_lex_stuff, len);
9261 	e = p + len;
9262 	for (; p < e; p++) {
9263 	    if (p[0] == '(' && p[1] == '?'
9264 		&& (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9265 	    {
9266 		pm->op_pmflags |= PMf_HAS_CV;
9267 		break;
9268 	    }
9269 	}
9270 	pm->op_pmflags |= PMf_IS_QR;
9271     }
9272 
9273     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
9274                                 &s, &charset, &x_mod_count))
9275     {};
9276     /* issue a warning if /c is specified,but /g is not */
9277     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9278     {
9279         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9280 		       "Use of /c modifier is meaningless without /g" );
9281     }
9282 
9283     STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
9284 
9285     PL_lex_op = (OP*)pm;
9286     pl_yylval.ival = OP_MATCH;
9287     return s;
9288 }
9289 
9290 STATIC char *
9291 S_scan_subst(pTHX_ char *start)
9292 {
9293     char *s;
9294     PMOP *pm;
9295     I32 first_start;
9296     line_t first_line;
9297     I32 es = 0;
9298     char charset = '\0';    /* character set modifier */
9299     unsigned int x_mod_count = 0;
9300     char *t;
9301 
9302     PERL_ARGS_ASSERT_SCAN_SUBST;
9303 
9304     pl_yylval.ival = OP_NULL;
9305 
9306     s = scan_str(start, TRUE, FALSE, FALSE, &t);
9307 
9308     if (!s)
9309 	Perl_croak(aTHX_ "Substitution pattern not terminated");
9310 
9311     s = t;
9312 
9313     first_start = PL_multi_start;
9314     first_line = CopLINE(PL_curcop);
9315     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9316     if (!s) {
9317 	SvREFCNT_dec_NN(PL_lex_stuff);
9318 	PL_lex_stuff = NULL;
9319 	Perl_croak(aTHX_ "Substitution replacement not terminated");
9320     }
9321     PL_multi_start = first_start;	/* so whole substitution is taken together */
9322 
9323     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9324 
9325 
9326     while (*s) {
9327 	if (*s == EXEC_PAT_MOD) {
9328 	    s++;
9329 	    es++;
9330 	}
9331 	else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9332                                   &s, &charset, &x_mod_count))
9333 	{
9334 	    break;
9335 	}
9336     }
9337 
9338     STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
9339 
9340     if ((pm->op_pmflags & PMf_CONTINUE)) {
9341         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9342     }
9343 
9344     if (es) {
9345 	SV * const repl = newSVpvs("");
9346 
9347 	PL_multi_end = 0;
9348 	pm->op_pmflags |= PMf_EVAL;
9349 	while (es-- > 0) {
9350 	    if (es)
9351 		sv_catpvs(repl, "eval ");
9352 	    else
9353 		sv_catpvs(repl, "do ");
9354 	}
9355 	sv_catpvs(repl, "{");
9356 	sv_catsv(repl, PL_sublex_info.repl);
9357 	sv_catpvs(repl, "}");
9358 	SvEVALED_on(repl);
9359 	SvREFCNT_dec(PL_sublex_info.repl);
9360 	PL_sublex_info.repl = repl;
9361     }
9362     if (CopLINE(PL_curcop) != first_line) {
9363 	sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9364 	((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9365 	    CopLINE(PL_curcop) - first_line;
9366 	CopLINE_set(PL_curcop, first_line);
9367     }
9368 
9369     PL_lex_op = (OP*)pm;
9370     pl_yylval.ival = OP_SUBST;
9371     return s;
9372 }
9373 
9374 STATIC char *
9375 S_scan_trans(pTHX_ char *start)
9376 {
9377     char* s;
9378     OP *o;
9379     U8 squash;
9380     U8 del;
9381     U8 complement;
9382     bool nondestruct = 0;
9383     char *t;
9384 
9385     PERL_ARGS_ASSERT_SCAN_TRANS;
9386 
9387     pl_yylval.ival = OP_NULL;
9388 
9389     s = scan_str(start,FALSE,FALSE,FALSE,&t);
9390     if (!s)
9391 	Perl_croak(aTHX_ "Transliteration pattern not terminated");
9392 
9393     s = t;
9394 
9395     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9396     if (!s) {
9397 	SvREFCNT_dec_NN(PL_lex_stuff);
9398 	PL_lex_stuff = NULL;
9399 	Perl_croak(aTHX_ "Transliteration replacement not terminated");
9400     }
9401 
9402     complement = del = squash = 0;
9403     while (1) {
9404 	switch (*s) {
9405 	case 'c':
9406 	    complement = OPpTRANS_COMPLEMENT;
9407 	    break;
9408 	case 'd':
9409 	    del = OPpTRANS_DELETE;
9410 	    break;
9411 	case 's':
9412 	    squash = OPpTRANS_SQUASH;
9413 	    break;
9414 	case 'r':
9415 	    nondestruct = 1;
9416 	    break;
9417 	default:
9418 	    goto no_more;
9419 	}
9420 	s++;
9421     }
9422   no_more:
9423 
9424     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9425     o->op_private &= ~OPpTRANS_ALL;
9426     o->op_private |= del|squash|complement|
9427       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9428       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9429 
9430     PL_lex_op = o;
9431     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9432 
9433 
9434     return s;
9435 }
9436 
9437 /* scan_heredoc
9438    Takes a pointer to the first < in <<FOO.
9439    Returns a pointer to the byte following <<FOO.
9440 
9441    This function scans a heredoc, which involves different methods
9442    depending on whether we are in a string eval, quoted construct, etc.
9443    This is because PL_linestr could containing a single line of input, or
9444    a whole string being evalled, or the contents of the current quote-
9445    like operator.
9446 
9447    The two basic methods are:
9448     - Steal lines from the input stream
9449     - Scan the heredoc in PL_linestr and remove it therefrom
9450 
9451    In a file scope or filtered eval, the first method is used; in a
9452    string eval, the second.
9453 
9454    In a quote-like operator, we have to choose between the two,
9455    depending on where we can find a newline.  We peek into outer lex-
9456    ing scopes until we find one with a newline in it.  If we reach the
9457    outermost lexing scope and it is a file, we use the stream method.
9458    Otherwise it is treated as an eval.
9459 */
9460 
9461 STATIC char *
9462 S_scan_heredoc(pTHX_ char *s)
9463 {
9464     I32 op_type = OP_SCALAR;
9465     I32 len;
9466     SV *tmpstr;
9467     char term;
9468     char *d;
9469     char *e;
9470     char *peek;
9471     const bool infile = PL_rsfp || PL_parser->filtered;
9472     const line_t origline = CopLINE(PL_curcop);
9473     LEXSHARED *shared = PL_parser->lex_shared;
9474 
9475     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9476 
9477     s += 2;
9478     d = PL_tokenbuf + 1;
9479     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9480     *PL_tokenbuf = '\n';
9481     peek = s;
9482     while (SPACE_OR_TAB(*peek))
9483 	peek++;
9484     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9485 	s = peek;
9486 	term = *s++;
9487 	s = delimcpy(d, e, s, PL_bufend, term, &len);
9488 	if (s == PL_bufend)
9489 	    Perl_croak(aTHX_ "Unterminated delimiter for here document");
9490 	d += len;
9491 	s++;
9492     }
9493     else {
9494 	if (*s == '\\')
9495             /* <<\FOO is equivalent to <<'FOO' */
9496 	    s++, term = '\'';
9497 	else
9498 	    term = '"';
9499 	if (!isWORDCHAR_lazy_if(s,UTF))
9500 	    deprecate("bare << to mean <<\"\"");
9501 	peek = s;
9502 	while (isWORDCHAR_lazy_if(peek,UTF)) {
9503 	    peek += UTF ? UTF8SKIP(peek) : 1;
9504 	}
9505 	len = (peek - s >= e - d) ? (e - d) : (peek - s);
9506 	Copy(s, d, len, char);
9507 	s += len;
9508 	d += len;
9509     }
9510     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9511 	Perl_croak(aTHX_ "Delimiter for here document is too long");
9512     *d++ = '\n';
9513     *d = '\0';
9514     len = d - PL_tokenbuf;
9515 
9516 #ifndef PERL_STRICT_CR
9517     d = strchr(s, '\r');
9518     if (d) {
9519 	char * const olds = s;
9520 	s = d;
9521 	while (s < PL_bufend) {
9522 	    if (*s == '\r') {
9523 		*d++ = '\n';
9524 		if (*++s == '\n')
9525 		    s++;
9526 	    }
9527 	    else if (*s == '\n' && s[1] == '\r') {	/* \015\013 on a mac? */
9528 		*d++ = *s++;
9529 		s++;
9530 	    }
9531 	    else
9532 		*d++ = *s++;
9533 	}
9534 	*d = '\0';
9535 	PL_bufend = d;
9536 	SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9537 	s = olds;
9538     }
9539 #endif
9540 
9541     tmpstr = newSV_type(SVt_PVIV);
9542     SvGROW(tmpstr, 80);
9543     if (term == '\'') {
9544 	op_type = OP_CONST;
9545 	SvIV_set(tmpstr, -1);
9546     }
9547     else if (term == '`') {
9548 	op_type = OP_BACKTICK;
9549 	SvIV_set(tmpstr, '\\');
9550     }
9551 
9552     PL_multi_start = origline + 1 + PL_parser->herelines;
9553     PL_multi_open = PL_multi_close = '<';
9554     /* inside a string eval or quote-like operator */
9555     if (!infile || PL_lex_inwhat) {
9556 	SV *linestr;
9557 	char *bufend;
9558 	char * const olds = s;
9559 	PERL_CONTEXT * const cx = CX_CUR();
9560 	/* These two fields are not set until an inner lexing scope is
9561 	   entered.  But we need them set here. */
9562 	shared->ls_bufptr  = s;
9563 	shared->ls_linestr = PL_linestr;
9564 	if (PL_lex_inwhat)
9565 	  /* Look for a newline.  If the current buffer does not have one,
9566 	     peek into the line buffer of the parent lexing scope, going
9567  	     up as many levels as necessary to find one with a newline
9568 	     after bufptr.
9569 	   */
9570 	  while (!(s = (char *)memchr(
9571 		    (void *)shared->ls_bufptr, '\n',
9572 		    SvEND(shared->ls_linestr)-shared->ls_bufptr
9573 		))) {
9574 	    shared = shared->ls_prev;
9575 	    /* shared is only null if we have gone beyond the outermost
9576 	       lexing scope.  In a file, we will have broken out of the
9577 	       loop in the previous iteration.  In an eval, the string buf-
9578 	       fer ends with "\n;", so the while condition above will have
9579 	       evaluated to false.  So shared can never be null.  Or so you
9580 	       might think.  Odd syntax errors like s;@{<<; can gobble up
9581 	       the implicit semicolon at the end of a flie, causing the
9582 	       file handle to be closed even when we are not in a string
9583 	       eval.  So shared may be null in that case.  */
9584 	    if (UNLIKELY(!shared))
9585 		goto interminable;
9586 	    /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9587 	       most lexing scope.  In a file, shared->ls_linestr at that
9588 	       level is just one line, so there is no body to steal. */
9589 	    if (infile && !shared->ls_prev) {
9590 		s = olds;
9591 		goto streaming;
9592 	    }
9593 	  }
9594 	else {	/* eval or we've already hit EOF */
9595 	    s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9596 	    if (!s)
9597                 goto interminable;
9598 	}
9599 	linestr = shared->ls_linestr;
9600 	bufend = SvEND(linestr);
9601 	d = s;
9602 	while (s < bufend - len + 1
9603                && memNE(s,PL_tokenbuf,len) )
9604         {
9605 	    if (*s++ == '\n')
9606 		++PL_parser->herelines;
9607 	}
9608 	if (s >= bufend - len + 1) {
9609 	    goto interminable;
9610 	}
9611 	sv_setpvn(tmpstr,d+1,s-d);
9612 	s += len - 1;
9613 	/* the preceding stmt passes a newline */
9614 	PL_parser->herelines++;
9615 
9616 	/* s now points to the newline after the heredoc terminator.
9617 	   d points to the newline before the body of the heredoc.
9618 	 */
9619 
9620 	/* We are going to modify linestr in place here, so set
9621 	   aside copies of the string if necessary for re-evals or
9622 	   (caller $n)[6]. */
9623 	/* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9624 	   check shared->re_eval_str. */
9625 	if (shared->re_eval_start || shared->re_eval_str) {
9626 	    /* Set aside the rest of the regexp */
9627 	    if (!shared->re_eval_str)
9628 		shared->re_eval_str =
9629 		       newSVpvn(shared->re_eval_start,
9630 				bufend - shared->re_eval_start);
9631 	    shared->re_eval_start -= s-d;
9632 	}
9633 	if (cxstack_ix >= 0
9634             && CxTYPE(cx) == CXt_EVAL
9635             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
9636             && cx->blk_eval.cur_text == linestr)
9637         {
9638 	    cx->blk_eval.cur_text = newSVsv(linestr);
9639 	    SvSCREAM_on(cx->blk_eval.cur_text);
9640 	}
9641 	/* Copy everything from s onwards back to d. */
9642 	Move(s,d,bufend-s + 1,char);
9643 	SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9644 	/* Setting PL_bufend only applies when we have not dug deeper
9645 	   into other scopes, because sublex_done sets PL_bufend to
9646 	   SvEND(PL_linestr). */
9647 	if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9648 	s = olds;
9649     }
9650     else
9651     {
9652       SV *linestr_save;
9653       char *oldbufptr_save;
9654      streaming:
9655       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9656       term = PL_tokenbuf[1];
9657       len--;
9658       linestr_save = PL_linestr; /* must restore this afterwards */
9659       d = s;			 /* and this */
9660       oldbufptr_save = PL_oldbufptr;
9661       PL_linestr = newSVpvs("");
9662       PL_bufend = SvPVX(PL_linestr);
9663       while (1) {
9664 	PL_bufptr = PL_bufend;
9665 	CopLINE_set(PL_curcop,
9666 		    origline + 1 + PL_parser->herelines);
9667 	if (!lex_next_chunk(LEX_NO_TERM)
9668 	 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9669 	    /* Simply freeing linestr_save might seem simpler here, as it
9670 	       does not matter what PL_linestr points to, since we are
9671 	       about to croak; but in a quote-like op, linestr_save
9672 	       will have been prospectively freed already, via
9673 	       SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
9674 	       restore PL_linestr. */
9675 	    SvREFCNT_dec_NN(PL_linestr);
9676 	    PL_linestr = linestr_save;
9677             PL_oldbufptr = oldbufptr_save;
9678 	    goto interminable;
9679 	}
9680 	CopLINE_set(PL_curcop, origline);
9681 	if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9682             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
9683             /* ^That should be enough to avoid this needing to grow:  */
9684 	    sv_catpvs(PL_linestr, "\n\0");
9685             assert(s == SvPVX(PL_linestr));
9686             PL_bufend = SvEND(PL_linestr);
9687 	}
9688 	s = PL_bufptr;
9689 	PL_parser->herelines++;
9690 	PL_last_lop = PL_last_uni = NULL;
9691 #ifndef PERL_STRICT_CR
9692 	if (PL_bufend - PL_linestart >= 2) {
9693 	    if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
9694                 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9695 	    {
9696 		PL_bufend[-2] = '\n';
9697 		PL_bufend--;
9698 		SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9699 	    }
9700 	    else if (PL_bufend[-1] == '\r')
9701 		PL_bufend[-1] = '\n';
9702 	}
9703 	else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9704 	    PL_bufend[-1] = '\n';
9705 #endif
9706 	if (*s == term && PL_bufend-s >= len
9707 	 && memEQ(s,PL_tokenbuf + 1,len)) {
9708 	    SvREFCNT_dec(PL_linestr);
9709 	    PL_linestr = linestr_save;
9710 	    PL_linestart = SvPVX(linestr_save);
9711 	    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9712             PL_oldbufptr = oldbufptr_save;
9713 	    s = d;
9714 	    break;
9715 	}
9716 	else {
9717 	    sv_catsv(tmpstr,PL_linestr);
9718 	}
9719       }
9720     }
9721     PL_multi_end = origline + PL_parser->herelines;
9722     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9723 	SvPV_shrink_to_cur(tmpstr);
9724     }
9725     if (!IN_BYTES) {
9726 	if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9727 	    SvUTF8_on(tmpstr);
9728 	else if (IN_ENCODING)
9729 	    sv_recode_to_utf8(tmpstr, _get_encoding());
9730     }
9731     PL_lex_stuff = tmpstr;
9732     pl_yylval.ival = op_type;
9733     return s;
9734 
9735   interminable:
9736     SvREFCNT_dec(tmpstr);
9737     CopLINE_set(PL_curcop, origline);
9738     missingterm(PL_tokenbuf + 1);
9739 }
9740 
9741 /* scan_inputsymbol
9742    takes: current position in input buffer
9743    returns: new position in input buffer
9744    side-effects: pl_yylval and lex_op are set.
9745 
9746    This code handles:
9747 
9748    <>		read from ARGV
9749    <<>>		read from ARGV without magic open
9750    <FH> 	read from filehandle
9751    <pkg::FH>	read from package qualified filehandle
9752    <pkg'FH>	read from package qualified filehandle
9753    <$fh>	read from filehandle in $fh
9754    <*.h>	filename glob
9755 
9756 */
9757 
9758 STATIC char *
9759 S_scan_inputsymbol(pTHX_ char *start)
9760 {
9761     char *s = start;		/* current position in buffer */
9762     char *end;
9763     I32 len;
9764     bool nomagicopen = FALSE;
9765     char *d = PL_tokenbuf;					/* start of temp holding space */
9766     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;	/* end of temp holding space */
9767 
9768     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9769 
9770     end = strchr(s, '\n');
9771     if (!end)
9772 	end = PL_bufend;
9773     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
9774         nomagicopen = TRUE;
9775         *d = '\0';
9776         len = 0;
9777         s += 3;
9778     }
9779     else
9780         s = delimcpy(d, e, s + 1, end, '>', &len);	/* extract until > */
9781 
9782     /* die if we didn't have space for the contents of the <>,
9783        or if it didn't end, or if we see a newline
9784     */
9785 
9786     if (len >= (I32)sizeof PL_tokenbuf)
9787 	Perl_croak(aTHX_ "Excessively long <> operator");
9788     if (s >= end)
9789 	Perl_croak(aTHX_ "Unterminated <> operator");
9790 
9791     s++;
9792 
9793     /* check for <$fh>
9794        Remember, only scalar variables are interpreted as filehandles by
9795        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9796        treated as a glob() call.
9797        This code makes use of the fact that except for the $ at the front,
9798        a scalar variable and a filehandle look the same.
9799     */
9800     if (*d == '$' && d[1]) d++;
9801 
9802     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9803     while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9804 	d += UTF ? UTF8SKIP(d) : 1;
9805 
9806     /* If we've tried to read what we allow filehandles to look like, and
9807        there's still text left, then it must be a glob() and not a getline.
9808        Use scan_str to pull out the stuff between the <> and treat it
9809        as nothing more than a string.
9810     */
9811 
9812     if (d - PL_tokenbuf != len) {
9813 	pl_yylval.ival = OP_GLOB;
9814 	s = scan_str(start,FALSE,FALSE,FALSE,NULL);
9815 	if (!s)
9816 	   Perl_croak(aTHX_ "Glob not terminated");
9817 	return s;
9818     }
9819     else {
9820 	bool readline_overriden = FALSE;
9821 	GV *gv_readline;
9822     	/* we're in a filehandle read situation */
9823 	d = PL_tokenbuf;
9824 
9825 	/* turn <> into <ARGV> */
9826 	if (!len)
9827 	    Copy("ARGV",d,5,char);
9828 
9829 	/* Check whether readline() is overriden */
9830 	if ((gv_readline = gv_override("readline",8)))
9831 	    readline_overriden = TRUE;
9832 
9833 	/* if <$fh>, create the ops to turn the variable into a
9834 	   filehandle
9835 	*/
9836 	if (*d == '$') {
9837 	    /* try to find it in the pad for this block, otherwise find
9838 	       add symbol table ops
9839 	    */
9840 	    const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
9841 	    if (tmp != NOT_IN_PAD) {
9842 		if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9843 		    HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9844 		    HEK * const stashname = HvNAME_HEK(stash);
9845 		    SV * const sym = sv_2mortal(newSVhek(stashname));
9846 		    sv_catpvs(sym, "::");
9847 		    sv_catpv(sym, d+1);
9848 		    d = SvPVX(sym);
9849 		    goto intro_sym;
9850 		}
9851 		else {
9852 		    OP * const o = newOP(OP_PADSV, 0);
9853 		    o->op_targ = tmp;
9854 		    PL_lex_op = readline_overriden
9855 			? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9856 				op_append_elem(OP_LIST, o,
9857 				    newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9858 			: (OP*)newUNOP(OP_READLINE, 0, o);
9859 		}
9860 	    }
9861 	    else {
9862 		GV *gv;
9863 		++d;
9864               intro_sym:
9865 		gv = gv_fetchpv(d,
9866 				GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
9867 				SVt_PV);
9868 		PL_lex_op = readline_overriden
9869 		    ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9870 			    op_append_elem(OP_LIST,
9871 				newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9872 				newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9873 		    : (OP*)newUNOP(OP_READLINE, 0,
9874 			    newUNOP(OP_RV2SV, 0,
9875 				newGVOP(OP_GV, 0, gv)));
9876 	    }
9877 	    /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9878 	    pl_yylval.ival = OP_NULL;
9879 	}
9880 
9881 	/* If it's none of the above, it must be a literal filehandle
9882 	   (<Foo::BAR> or <FOO>) so build a simple readline OP */
9883 	else {
9884 	    GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9885 	    PL_lex_op = readline_overriden
9886 		? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9887 			op_append_elem(OP_LIST,
9888 			    newGVOP(OP_GV, 0, gv),
9889 			    newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9890 		: (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
9891 	    pl_yylval.ival = OP_NULL;
9892 	}
9893     }
9894 
9895     return s;
9896 }
9897 
9898 
9899 /* scan_str
9900    takes:
9901 	start			position in buffer
9902         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
9903                                 only if they are of the open/close form
9904 	keep_delims		preserve the delimiters around the string
9905 	re_reparse		compiling a run-time /(?{})/:
9906 				   collapse // to /,  and skip encoding src
9907 	delimp			if non-null, this is set to the position of
9908 				the closing delimiter, or just after it if
9909 				the closing and opening delimiters differ
9910 				(i.e., the opening delimiter of a substitu-
9911 				tion replacement)
9912    returns: position to continue reading from buffer
9913    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9914    	updates the read buffer.
9915 
9916    This subroutine pulls a string out of the input.  It is called for:
9917    	q		single quotes		q(literal text)
9918 	'		single quotes		'literal text'
9919 	qq		double quotes		qq(interpolate $here please)
9920 	"		double quotes		"interpolate $here please"
9921 	qx		backticks		qx(/bin/ls -l)
9922 	`		backticks		`/bin/ls -l`
9923 	qw		quote words		@EXPORT_OK = qw( func() $spam )
9924 	m//		regexp match		m/this/
9925 	s///		regexp substitute	s/this/that/
9926 	tr///		string transliterate	tr/this/that/
9927 	y///		string transliterate	y/this/that/
9928 	($*@)		sub prototypes		sub foo ($)
9929 	(stuff)		sub attr parameters	sub foo : attr(stuff)
9930 	<>		readline or globs	<FOO>, <>, <$fh>, or <*.c>
9931 
9932    In most of these cases (all but <>, patterns and transliterate)
9933    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9934    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9935    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9936    calls scan_str().
9937 
9938    It skips whitespace before the string starts, and treats the first
9939    character as the delimiter.  If the delimiter is one of ([{< then
9940    the corresponding "close" character )]}> is used as the closing
9941    delimiter.  It allows quoting of delimiters, and if the string has
9942    balanced delimiters ([{<>}]) it allows nesting.
9943 
9944    On success, the SV with the resulting string is put into lex_stuff or,
9945    if that is already non-NULL, into lex_repl. The second case occurs only
9946    when parsing the RHS of the special constructs s/// and tr/// (y///).
9947    For convenience, the terminating delimiter character is stuffed into
9948    SvIVX of the SV.
9949 */
9950 
9951 STATIC char *
9952 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
9953 		 char **delimp
9954     )
9955 {
9956     SV *sv;			/* scalar value: string */
9957     const char *tmps;		/* temp string, used for delimiter matching */
9958     char *s = start;		/* current position in the buffer */
9959     char term;			/* terminating character */
9960     char *to;			/* current position in the sv's data */
9961     I32 brackets = 1;		/* bracket nesting level */
9962     bool has_utf8 = FALSE;	/* is there any utf8 content? */
9963     I32 termcode;		/* terminating char. code */
9964     U8 termstr[UTF8_MAXBYTES];	/* terminating string */
9965     STRLEN termlen;		/* length of terminating string */
9966     int last_off = 0;		/* last position for nesting bracket */
9967     line_t herelines;
9968 
9969     PERL_ARGS_ASSERT_SCAN_STR;
9970 
9971     /* skip space before the delimiter */
9972     if (isSPACE(*s)) {
9973 	s = skipspace(s);
9974     }
9975 
9976     /* mark where we are, in case we need to report errors */
9977     CLINE;
9978 
9979     /* after skipping whitespace, the next character is the terminator */
9980     term = *s;
9981     if (!UTF) {
9982 	termcode = termstr[0] = term;
9983 	termlen = 1;
9984     }
9985     else {
9986 	termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
9987 	Copy(s, termstr, termlen, U8);
9988 	if (!UTF8_IS_INVARIANT(term))
9989 	    has_utf8 = TRUE;
9990     }
9991 
9992     /* mark where we are */
9993     PL_multi_start = CopLINE(PL_curcop);
9994     PL_multi_open = term;
9995     herelines = PL_parser->herelines;
9996 
9997     /* find corresponding closing delimiter */
9998     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9999 	termcode = termstr[0] = term = tmps[5];
10000 
10001     PL_multi_close = term;
10002 
10003     if (PL_multi_open == PL_multi_close) {
10004         keep_bracketed_quoted = FALSE;
10005     }
10006 
10007     /* create a new SV to hold the contents.  79 is the SV's initial length.
10008        What a random number. */
10009     sv = newSV_type(SVt_PVIV);
10010     SvGROW(sv, 80);
10011     SvIV_set(sv, termcode);
10012     (void)SvPOK_only(sv);		/* validate pointer */
10013 
10014     /* move past delimiter and try to read a complete string */
10015     if (keep_delims)
10016 	sv_catpvn(sv, s, termlen);
10017     s += termlen;
10018     for (;;) {
10019 	if (IN_ENCODING && !UTF && !re_reparse) {
10020 	    bool cont = TRUE;
10021 
10022 	    while (cont) {
10023 		int offset = s - SvPVX_const(PL_linestr);
10024 		const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
10025 					   &offset, (char*)termstr, termlen);
10026 		const char *ns;
10027 		char *svlast;
10028 
10029 		if (SvIsCOW(PL_linestr)) {
10030 		    STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10031 		    STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10032 		    STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10033 		    char *buf = SvPVX(PL_linestr);
10034 		    bufend_pos = PL_parser->bufend - buf;
10035 		    bufptr_pos = PL_parser->bufptr - buf;
10036 		    oldbufptr_pos = PL_parser->oldbufptr - buf;
10037 		    oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10038 		    linestart_pos = PL_parser->linestart - buf;
10039 		    last_uni_pos = PL_parser->last_uni
10040 			? PL_parser->last_uni - buf
10041 			: 0;
10042 		    last_lop_pos = PL_parser->last_lop
10043 			? PL_parser->last_lop - buf
10044 			: 0;
10045 		    re_eval_start_pos =
10046 			PL_parser->lex_shared->re_eval_start ?
10047                             PL_parser->lex_shared->re_eval_start - buf : 0;
10048 		    s_pos = s - buf;
10049 
10050 		    sv_force_normal(PL_linestr);
10051 
10052 		    buf = SvPVX(PL_linestr);
10053 		    PL_parser->bufend = buf + bufend_pos;
10054 		    PL_parser->bufptr = buf + bufptr_pos;
10055 		    PL_parser->oldbufptr = buf + oldbufptr_pos;
10056 		    PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10057 		    PL_parser->linestart = buf + linestart_pos;
10058 		    if (PL_parser->last_uni)
10059 			PL_parser->last_uni = buf + last_uni_pos;
10060 		    if (PL_parser->last_lop)
10061 			PL_parser->last_lop = buf + last_lop_pos;
10062 		    if (PL_parser->lex_shared->re_eval_start)
10063 		        PL_parser->lex_shared->re_eval_start  =
10064 			    buf + re_eval_start_pos;
10065 		    s = buf + s_pos;
10066 		}
10067 		ns = SvPVX_const(PL_linestr) + offset;
10068 		svlast = SvEND(sv) - 1;
10069 
10070 		for (; s < ns; s++) {
10071 		    if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10072 			COPLINE_INC_WITH_HERELINES;
10073 		}
10074 		if (!found)
10075 		    goto read_more_line;
10076 		else {
10077 		    /* handle quoted delimiters */
10078 		    if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10079 			const char *t;
10080 			for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10081 			    t--;
10082 			if ((svlast-1 - t) % 2) {
10083 			    if (!keep_bracketed_quoted) {
10084 				*(svlast-1) = term;
10085 				*svlast = '\0';
10086 				SvCUR_set(sv, SvCUR(sv) - 1);
10087 			    }
10088 			    continue;
10089 			}
10090 		    }
10091 		    if (PL_multi_open == PL_multi_close) {
10092 			cont = FALSE;
10093 		    }
10094 		    else {
10095 			const char *t;
10096 			char *w;
10097 			for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10098 			    /* At here, all closes are "was quoted" one,
10099 			       so we don't check PL_multi_close. */
10100 			    if (*t == '\\') {
10101 				if (!keep_bracketed_quoted && *(t+1) == PL_multi_open)
10102 				    t++;
10103 				else
10104 				    *w++ = *t++;
10105 			    }
10106 			    else if (*t == PL_multi_open)
10107 				brackets++;
10108 
10109 			    *w = *t;
10110 			}
10111 			if (w < t) {
10112 			    *w++ = term;
10113 			    *w = '\0';
10114 			    SvCUR_set(sv, w - SvPVX_const(sv));
10115 			}
10116 			last_off = w - SvPVX(sv);
10117 			if (--brackets <= 0)
10118 			    cont = FALSE;
10119 		    }
10120 		}
10121 	    }
10122 	    if (!keep_delims) {
10123 		SvCUR_set(sv, SvCUR(sv) - 1);
10124 		*SvEND(sv) = '\0';
10125 	    }
10126 	    break;
10127 	}
10128 
10129     	/* extend sv if need be */
10130 	SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10131 	/* set 'to' to the next character in the sv's string */
10132 	to = SvPVX(sv)+SvCUR(sv);
10133 
10134 	/* if open delimiter is the close delimiter read unbridle */
10135 	if (PL_multi_open == PL_multi_close) {
10136 	    for (; s < PL_bufend; s++,to++) {
10137 	    	/* embedded newlines increment the current line number */
10138 		if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10139 		    COPLINE_INC_WITH_HERELINES;
10140 		/* handle quoted delimiters */
10141 		if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10142 		    if (!keep_bracketed_quoted
10143 		        && (s[1] == term
10144 			    || (re_reparse && s[1] == '\\'))
10145 		    )
10146 			s++;
10147 		    else /* any other quotes are simply copied straight through */
10148 			*to++ = *s++;
10149 		}
10150 		/* terminate when run out of buffer (the for() condition), or
10151 		   have found the terminator */
10152 		else if (*s == term) {
10153 		    if (termlen == 1)
10154 			break;
10155 		    if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10156 			break;
10157 		}
10158 		else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10159 		    has_utf8 = TRUE;
10160 		*to = *s;
10161 	    }
10162 	}
10163 
10164 	/* if the terminator isn't the same as the start character (e.g.,
10165 	   matched brackets), we have to allow more in the quoting, and
10166 	   be prepared for nested brackets.
10167 	*/
10168 	else {
10169 	    /* read until we run out of string, or we find the terminator */
10170 	    for (; s < PL_bufend; s++,to++) {
10171 	    	/* embedded newlines increment the line count */
10172 		if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10173 		    COPLINE_INC_WITH_HERELINES;
10174 		/* backslashes can escape the open or closing characters */
10175 		if (*s == '\\' && s+1 < PL_bufend) {
10176 		    if (!keep_bracketed_quoted
10177                        && ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10178                     {
10179 			s++;
10180                     }
10181 		    else
10182 			*to++ = *s++;
10183                 }
10184 		/* allow nested opens and closes */
10185 		else if (*s == PL_multi_close && --brackets <= 0)
10186 		    break;
10187 		else if (*s == PL_multi_open)
10188 		    brackets++;
10189 		else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10190 		    has_utf8 = TRUE;
10191 		*to = *s;
10192 	    }
10193 	}
10194 	/* terminate the copied string and update the sv's end-of-string */
10195 	*to = '\0';
10196 	SvCUR_set(sv, to - SvPVX_const(sv));
10197 
10198 	/*
10199 	 * this next chunk reads more into the buffer if we're not done yet
10200 	 */
10201 
10202   	if (s < PL_bufend)
10203 	    break;		/* handle case where we are done yet :-) */
10204 
10205 #ifndef PERL_STRICT_CR
10206 	if (to - SvPVX_const(sv) >= 2) {
10207 	    if (   (to[-2] == '\r' && to[-1] == '\n')
10208                 || (to[-2] == '\n' && to[-1] == '\r'))
10209 	    {
10210 		to[-2] = '\n';
10211 		to--;
10212 		SvCUR_set(sv, to - SvPVX_const(sv));
10213 	    }
10214 	    else if (to[-1] == '\r')
10215 		to[-1] = '\n';
10216 	}
10217 	else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10218 	    to[-1] = '\n';
10219 #endif
10220 
10221      read_more_line:
10222 	/* if we're out of file, or a read fails, bail and reset the current
10223 	   line marker so we can report where the unterminated string began
10224 	*/
10225 	COPLINE_INC_WITH_HERELINES;
10226 	PL_bufptr = PL_bufend;
10227 	if (!lex_next_chunk(0)) {
10228 	    sv_free(sv);
10229 	    CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10230 	    return NULL;
10231 	}
10232 	s = PL_bufptr;
10233     }
10234 
10235     /* at this point, we have successfully read the delimited string */
10236 
10237     if (!IN_ENCODING || UTF || re_reparse) {
10238 
10239 	if (keep_delims)
10240 	    sv_catpvn(sv, s, termlen);
10241 	s += termlen;
10242     }
10243     if (has_utf8 || (IN_ENCODING && !re_reparse))
10244 	SvUTF8_on(sv);
10245 
10246     PL_multi_end = CopLINE(PL_curcop);
10247     CopLINE_set(PL_curcop, PL_multi_start);
10248     PL_parser->herelines = herelines;
10249 
10250     /* if we allocated too much space, give some back */
10251     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10252 	SvLEN_set(sv, SvCUR(sv) + 1);
10253 	SvPV_renew(sv, SvLEN(sv));
10254     }
10255 
10256     /* decide whether this is the first or second quoted string we've read
10257        for this op
10258     */
10259 
10260     if (PL_lex_stuff)
10261 	PL_sublex_info.repl = sv;
10262     else
10263 	PL_lex_stuff = sv;
10264     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10265     return s;
10266 }
10267 
10268 /*
10269   scan_num
10270   takes: pointer to position in buffer
10271   returns: pointer to new position in buffer
10272   side-effects: builds ops for the constant in pl_yylval.op
10273 
10274   Read a number in any of the formats that Perl accepts:
10275 
10276   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)	12 12.34 12.
10277   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)			.34
10278   0b[01](_?[01])*                                       binary integers
10279   0[0-7](_?[0-7])*                                      octal integers
10280   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
10281   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
10282 
10283   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10284   thing it reads.
10285 
10286   If it reads a number without a decimal point or an exponent, it will
10287   try converting the number to an integer and see if it can do so
10288   without loss of precision.
10289 */
10290 
10291 char *
10292 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10293 {
10294     const char *s = start;	/* current position in buffer */
10295     char *d;			/* destination in temp buffer */
10296     char *e;			/* end of temp buffer */
10297     NV nv;				/* number read, as a double */
10298     SV *sv = NULL;			/* place to put the converted number */
10299     bool floatit;			/* boolean: int or float? */
10300     const char *lastub = NULL;		/* position of last underbar */
10301     static const char* const number_too_long = "Number too long";
10302     /* Hexadecimal floating point.
10303      *
10304      * In many places (where we have quads and NV is IEEE 754 double)
10305      * we can fit the mantissa bits of a NV into an unsigned quad.
10306      * (Note that UVs might not be quads even when we have quads.)
10307      * This will not work everywhere, though (either no quads, or
10308      * using long doubles), in which case we have to resort to NV,
10309      * which will probably mean horrible loss of precision due to
10310      * multiple fp operations. */
10311     bool hexfp = FALSE;
10312     int total_bits = 0;
10313     int significant_bits = 0;
10314 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10315 #  define HEXFP_UQUAD
10316     Uquad_t hexfp_uquad = 0;
10317     int hexfp_frac_bits = 0;
10318 #else
10319 #  define HEXFP_NV
10320     NV hexfp_nv = 0.0;
10321 #endif
10322     NV hexfp_mult = 1.0;
10323     UV high_non_zero = 0; /* highest digit */
10324     int non_zero_integer_digits = 0;
10325 
10326     PERL_ARGS_ASSERT_SCAN_NUM;
10327 
10328     /* We use the first character to decide what type of number this is */
10329 
10330     switch (*s) {
10331     default:
10332 	Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10333 
10334     /* if it starts with a 0, it could be an octal number, a decimal in
10335        0.13 disguise, or a hexadecimal number, or a binary number. */
10336     case '0':
10337 	{
10338 	  /* variables:
10339 	     u		holds the "number so far"
10340 	     shift	the power of 2 of the base
10341 			(hex == 4, octal == 3, binary == 1)
10342 	     overflowed	was the number more than we can hold?
10343 
10344 	     Shift is used when we add a digit.  It also serves as an "are
10345 	     we in octal/hex/binary?" indicator to disallow hex characters
10346 	     when in octal mode.
10347 	   */
10348 	    NV n = 0.0;
10349 	    UV u = 0;
10350 	    I32 shift;
10351 	    bool overflowed = FALSE;
10352 	    bool just_zero  = TRUE;	/* just plain 0 or binary number? */
10353 	    static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10354 	    static const char* const bases[5] =
10355 	      { "", "binary", "", "octal", "hexadecimal" };
10356 	    static const char* const Bases[5] =
10357 	      { "", "Binary", "", "Octal", "Hexadecimal" };
10358 	    static const char* const maxima[5] =
10359 	      { "",
10360 		"0b11111111111111111111111111111111",
10361 		"",
10362 		"037777777777",
10363 		"0xffffffff" };
10364 	    const char *base, *Base, *max;
10365 
10366 	    /* check for hex */
10367 	    if (isALPHA_FOLD_EQ(s[1], 'x')) {
10368 		shift = 4;
10369 		s += 2;
10370 		just_zero = FALSE;
10371 	    } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10372 		shift = 1;
10373 		s += 2;
10374 		just_zero = FALSE;
10375 	    }
10376 	    /* check for a decimal in disguise */
10377 	    else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10378 		goto decimal;
10379 	    /* so it must be octal */
10380 	    else {
10381 		shift = 3;
10382 		s++;
10383 	    }
10384 
10385 	    if (*s == '_') {
10386 		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10387 			       "Misplaced _ in number");
10388 	       lastub = s++;
10389 	    }
10390 
10391 	    base = bases[shift];
10392 	    Base = Bases[shift];
10393 	    max  = maxima[shift];
10394 
10395 	    /* read the rest of the number */
10396 	    for (;;) {
10397 		/* x is used in the overflow test,
10398 		   b is the digit we're adding on. */
10399 		UV x, b;
10400 
10401 		switch (*s) {
10402 
10403 		/* if we don't mention it, we're done */
10404 		default:
10405 		    goto out;
10406 
10407 		/* _ are ignored -- but warned about if consecutive */
10408 		case '_':
10409 		    if (lastub && s == lastub + 1)
10410 		        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10411 				       "Misplaced _ in number");
10412 		    lastub = s++;
10413 		    break;
10414 
10415 		/* 8 and 9 are not octal */
10416 		case '8': case '9':
10417 		    if (shift == 3)
10418 			yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10419 		    /* FALLTHROUGH */
10420 
10421 	        /* octal digits */
10422 		case '2': case '3': case '4':
10423 		case '5': case '6': case '7':
10424 		    if (shift == 1)
10425 			yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10426 		    /* FALLTHROUGH */
10427 
10428 		case '0': case '1':
10429 		    b = *s++ & 15;		/* ASCII digit -> value of digit */
10430 		    goto digit;
10431 
10432 	        /* hex digits */
10433 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10434 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10435 		    /* make sure they said 0x */
10436 		    if (shift != 4)
10437 			goto out;
10438 		    b = (*s++ & 7) + 9;
10439 
10440 		    /* Prepare to put the digit we have onto the end
10441 		       of the number so far.  We check for overflows.
10442 		    */
10443 
10444 		  digit:
10445 		    just_zero = FALSE;
10446 		    if (!overflowed) {
10447 			x = u << shift;	/* make room for the digit */
10448 
10449                         total_bits += shift;
10450 
10451 			if ((x >> shift) != u
10452 			    && !(PL_hints & HINT_NEW_BINARY)) {
10453 			    overflowed = TRUE;
10454 			    n = (NV) u;
10455 			    Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10456 					     "Integer overflow in %s number",
10457 					     base);
10458 			} else
10459 			    u = x | b;		/* add the digit to the end */
10460 		    }
10461 		    if (overflowed) {
10462 			n *= nvshift[shift];
10463 			/* If an NV has not enough bits in its
10464 			 * mantissa to represent an UV this summing of
10465 			 * small low-order numbers is a waste of time
10466 			 * (because the NV cannot preserve the
10467 			 * low-order bits anyway): we could just
10468 			 * remember when did we overflow and in the
10469 			 * end just multiply n by the right
10470 			 * amount. */
10471 			n += (NV) b;
10472 		    }
10473 
10474                     if (high_non_zero == 0 && b > 0)
10475                         high_non_zero = b;
10476 
10477                     if (high_non_zero)
10478                         non_zero_integer_digits++;
10479 
10480                     /* this could be hexfp, but peek ahead
10481                      * to avoid matching ".." */
10482                     if (UNLIKELY(HEXFP_PEEK(s))) {
10483                         goto out;
10484                     }
10485 
10486 		    break;
10487 		}
10488 	    }
10489 
10490 	  /* if we get here, we had success: make a scalar value from
10491 	     the number.
10492 	  */
10493 	  out:
10494 
10495 	    /* final misplaced underbar check */
10496 	    if (s[-1] == '_') {
10497 		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10498 	    }
10499 
10500             if (UNLIKELY(HEXFP_PEEK(s))) {
10501                 /* Do sloppy (on the underbars) but quick detection
10502                  * (and value construction) for hexfp, the decimal
10503                  * detection will shortly be more thorough with the
10504                  * underbar checks. */
10505                 const char* h = s;
10506                 significant_bits = non_zero_integer_digits * shift;
10507 #ifdef HEXFP_UQUAD
10508                 hexfp_uquad = u;
10509 #else /* HEXFP_NV */
10510                 hexfp_nv = u;
10511 #endif
10512                 /* Ignore the leading zero bits of
10513                  * the high (first) non-zero digit. */
10514                 if (high_non_zero) {
10515                     if (high_non_zero < 0x8)
10516                         significant_bits--;
10517                     if (high_non_zero < 0x4)
10518                         significant_bits--;
10519                     if (high_non_zero < 0x2)
10520                         significant_bits--;
10521                 }
10522 
10523                 if (*h == '.') {
10524 #ifdef HEXFP_NV
10525                     NV nv_mult = 1.0;
10526 #endif
10527                     bool accumulate = TRUE;
10528                     for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
10529                         if (isXDIGIT(*h)) {
10530                             U8 b = XDIGIT_VALUE(*h);
10531                             significant_bits += shift;
10532 #ifdef HEXFP_UQUAD
10533                             if (accumulate) {
10534                                 if (significant_bits < NV_MANT_DIG) {
10535                                     /* We are in the long "run" of xdigits,
10536                                      * accumulate the full four bits. */
10537                                     hexfp_uquad <<= shift;
10538                                     hexfp_uquad |= b;
10539                                     hexfp_frac_bits += shift;
10540                                 } else {
10541                                     /* We are at a hexdigit either at,
10542                                      * or straddling, the edge of mantissa.
10543                                      * We will try grabbing as many as
10544                                      * possible bits. */
10545                                     int tail =
10546                                       significant_bits - NV_MANT_DIG;
10547                                     if (tail <= 0)
10548                                        tail += shift;
10549                                     hexfp_uquad <<= tail;
10550                                     hexfp_uquad |= b >> (shift - tail);
10551                                     hexfp_frac_bits += tail;
10552 
10553                                     /* Ignore the trailing zero bits
10554                                      * of the last non-zero xdigit.
10555                                      *
10556                                      * The assumption here is that if
10557                                      * one has input of e.g. the xdigit
10558                                      * eight (0x8), there is only one
10559                                      * bit being input, not the full
10560                                      * four bits.  Conversely, if one
10561                                      * specifies a zero xdigit, the
10562                                      * assumption is that one really
10563                                      * wants all those bits to be zero. */
10564                                     if (b) {
10565                                         if ((b & 0x1) == 0x0) {
10566                                             significant_bits--;
10567                                             if ((b & 0x2) == 0x0) {
10568                                                 significant_bits--;
10569                                                 if ((b & 0x4) == 0x0) {
10570                                                     significant_bits--;
10571                                                 }
10572                                             }
10573                                         }
10574                                     }
10575 
10576                                     accumulate = FALSE;
10577                                 }
10578                             } else {
10579                                 /* Keep skipping the xdigits, and
10580                                  * accumulating the significant bits,
10581                                  * but do not shift the uquad
10582                                  * (which would catastrophically drop
10583                                  * high-order bits) or accumulate the
10584                                  * xdigits anymore. */
10585                             }
10586 #else /* HEXFP_NV */
10587                             if (accumulate) {
10588                                 nv_mult /= 16.0;
10589                                 if (nv_mult > 0.0)
10590                                     hexfp_nv += b * nv_mult;
10591                                 else
10592                                     accumulate = FALSE;
10593                             }
10594 #endif
10595                         }
10596                         if (significant_bits >= NV_MANT_DIG)
10597                             accumulate = FALSE;
10598                     }
10599                 }
10600 
10601                 if ((total_bits > 0 || significant_bits > 0) &&
10602                     isALPHA_FOLD_EQ(*h, 'p')) {
10603                     bool negexp = FALSE;
10604                     h++;
10605                     if (*h == '+')
10606                         h++;
10607                     else if (*h == '-') {
10608                         negexp = TRUE;
10609                         h++;
10610                     }
10611                     if (isDIGIT(*h)) {
10612                         I32 hexfp_exp = 0;
10613                         while (isDIGIT(*h) || *h == '_') {
10614                             if (isDIGIT(*h)) {
10615                                 hexfp_exp *= 10;
10616                                 hexfp_exp += *h - '0';
10617 #ifdef NV_MIN_EXP
10618                                 if (negexp
10619                                     && -hexfp_exp < NV_MIN_EXP - 1) {
10620                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10621                                                    "Hexadecimal float: exponent underflow");
10622                                     break;
10623                                 }
10624 #endif
10625 #ifdef NV_MAX_EXP
10626                                 if (!negexp
10627                                     && hexfp_exp > NV_MAX_EXP - 1) {
10628                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10629                                                    "Hexadecimal float: exponent overflow");
10630                                     break;
10631                                 }
10632 #endif
10633                             }
10634                             h++;
10635                         }
10636                         if (negexp)
10637                             hexfp_exp = -hexfp_exp;
10638 #ifdef HEXFP_UQUAD
10639                         hexfp_exp -= hexfp_frac_bits;
10640 #endif
10641                         hexfp_mult = pow(2.0, hexfp_exp);
10642                         hexfp = TRUE;
10643                         goto decimal;
10644                     }
10645                 }
10646             }
10647 
10648 	    if (overflowed) {
10649 		if (n > 4294967295.0)
10650 		    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10651 				   "%s number > %s non-portable",
10652 				   Base, max);
10653 		sv = newSVnv(n);
10654 	    }
10655 	    else {
10656 #if UVSIZE > 4
10657 		if (u > 0xffffffff)
10658 		    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10659 				   "%s number > %s non-portable",
10660 				   Base, max);
10661 #endif
10662 		sv = newSVuv(u);
10663 	    }
10664 	    if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10665 		sv = new_constant(start, s - start, "integer",
10666 				  sv, NULL, NULL, 0);
10667 	    else if (PL_hints & HINT_NEW_BINARY)
10668 		sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10669 	}
10670 	break;
10671 
10672     /*
10673       handle decimal numbers.
10674       we're also sent here when we read a 0 as the first digit
10675     */
10676     case '1': case '2': case '3': case '4': case '5':
10677     case '6': case '7': case '8': case '9': case '.':
10678       decimal:
10679 	d = PL_tokenbuf;
10680 	e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10681         floatit = FALSE;
10682         if (hexfp) {
10683             floatit = TRUE;
10684             *d++ = '0';
10685             *d++ = 'x';
10686             s = start + 2;
10687         }
10688 
10689 	/* read next group of digits and _ and copy into d */
10690 	while (isDIGIT(*s)
10691                || *s == '_'
10692                || UNLIKELY(hexfp && isXDIGIT(*s)))
10693         {
10694 	    /* skip underscores, checking for misplaced ones
10695 	       if -w is on
10696 	    */
10697 	    if (*s == '_') {
10698 		if (lastub && s == lastub + 1)
10699 		    Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10700 				   "Misplaced _ in number");
10701 		lastub = s++;
10702 	    }
10703 	    else {
10704 	        /* check for end of fixed-length buffer */
10705 		if (d >= e)
10706 		    Perl_croak(aTHX_ "%s", number_too_long);
10707 		/* if we're ok, copy the character */
10708 		*d++ = *s++;
10709 	    }
10710 	}
10711 
10712 	/* final misplaced underbar check */
10713 	if (lastub && s == lastub + 1) {
10714 	    Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10715 	}
10716 
10717 	/* read a decimal portion if there is one.  avoid
10718 	   3..5 being interpreted as the number 3. followed
10719 	   by .5
10720 	*/
10721 	if (*s == '.' && s[1] != '.') {
10722 	    floatit = TRUE;
10723 	    *d++ = *s++;
10724 
10725 	    if (*s == '_') {
10726 		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10727 			       "Misplaced _ in number");
10728 		lastub = s;
10729 	    }
10730 
10731 	    /* copy, ignoring underbars, until we run out of digits.
10732 	    */
10733 	    for (; isDIGIT(*s)
10734                    || *s == '_'
10735                    || UNLIKELY(hexfp && isXDIGIT(*s));
10736                  s++)
10737             {
10738 	        /* fixed length buffer check */
10739 		if (d >= e)
10740 		    Perl_croak(aTHX_ "%s", number_too_long);
10741 		if (*s == '_') {
10742 		   if (lastub && s == lastub + 1)
10743 		       Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10744 				      "Misplaced _ in number");
10745 		   lastub = s;
10746 		}
10747 		else
10748 		    *d++ = *s;
10749 	    }
10750 	    /* fractional part ending in underbar? */
10751 	    if (s[-1] == '_') {
10752 		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10753 			       "Misplaced _ in number");
10754 	    }
10755 	    if (*s == '.' && isDIGIT(s[1])) {
10756 		/* oops, it's really a v-string, but without the "v" */
10757 		s = start;
10758 		goto vstring;
10759 	    }
10760 	}
10761 
10762 	/* read exponent part, if present */
10763 	if ((isALPHA_FOLD_EQ(*s, 'e')
10764               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
10765             && strchr("+-0123456789_", s[1]))
10766         {
10767             floatit = TRUE;
10768 
10769 	    /* regardless of whether user said 3E5 or 3e5, use lower 'e',
10770                ditto for p (hexfloats) */
10771             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
10772 		/* At least some Mach atof()s don't grok 'E' */
10773                 *d++ = 'e';
10774             }
10775             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
10776                 *d++ = 'p';
10777             }
10778 
10779 	    s++;
10780 
10781 
10782 	    /* stray preinitial _ */
10783 	    if (*s == '_') {
10784 		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10785 			       "Misplaced _ in number");
10786 	        lastub = s++;
10787 	    }
10788 
10789 	    /* allow positive or negative exponent */
10790 	    if (*s == '+' || *s == '-')
10791 		*d++ = *s++;
10792 
10793 	    /* stray initial _ */
10794 	    if (*s == '_') {
10795 		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10796 			       "Misplaced _ in number");
10797 	        lastub = s++;
10798 	    }
10799 
10800 	    /* read digits of exponent */
10801 	    while (isDIGIT(*s) || *s == '_') {
10802 	        if (isDIGIT(*s)) {
10803 		    if (d >= e)
10804 		        Perl_croak(aTHX_ "%s", number_too_long);
10805 		    *d++ = *s++;
10806 		}
10807 		else {
10808 		   if (((lastub && s == lastub + 1)
10809                         || (!isDIGIT(s[1]) && s[1] != '_')))
10810 		       Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10811 				      "Misplaced _ in number");
10812 		   lastub = s++;
10813 		}
10814 	    }
10815 	}
10816 
10817 
10818 	/*
10819            We try to do an integer conversion first if no characters
10820            indicating "float" have been found.
10821 	 */
10822 
10823 	if (!floatit) {
10824     	    UV uv;
10825 	    const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10826 
10827             if (flags == IS_NUMBER_IN_UV) {
10828               if (uv <= IV_MAX)
10829 		sv = newSViv(uv); /* Prefer IVs over UVs. */
10830               else
10831 	    	sv = newSVuv(uv);
10832             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10833               if (uv <= (UV) IV_MIN)
10834                 sv = newSViv(-(IV)uv);
10835               else
10836 	    	floatit = TRUE;
10837             } else
10838               floatit = TRUE;
10839         }
10840 	if (floatit) {
10841             STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
10842 	    /* terminate the string */
10843 	    *d = '\0';
10844             if (UNLIKELY(hexfp)) {
10845 #  ifdef NV_MANT_DIG
10846                 if (significant_bits > NV_MANT_DIG)
10847                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10848                                    "Hexadecimal float: mantissa overflow");
10849 #  endif
10850 #ifdef HEXFP_UQUAD
10851                 nv = hexfp_uquad * hexfp_mult;
10852 #else /* HEXFP_NV */
10853                 nv = hexfp_nv * hexfp_mult;
10854 #endif
10855             } else {
10856                 nv = Atof(PL_tokenbuf);
10857             }
10858             RESTORE_LC_NUMERIC_UNDERLYING();
10859             sv = newSVnv(nv);
10860 	}
10861 
10862 	if ( floatit
10863 	     ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10864 	    const char *const key = floatit ? "float" : "integer";
10865 	    const STRLEN keylen = floatit ? 5 : 7;
10866 	    sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10867 				key, keylen, sv, NULL, NULL, 0);
10868 	}
10869 	break;
10870 
10871     /* if it starts with a v, it could be a v-string */
10872     case 'v':
10873     vstring:
10874 		sv = newSV(5); /* preallocate storage space */
10875 		ENTER_with_name("scan_vstring");
10876 		SAVEFREESV(sv);
10877 		s = scan_vstring(s, PL_bufend, sv);
10878 		SvREFCNT_inc_simple_void_NN(sv);
10879 		LEAVE_with_name("scan_vstring");
10880 	break;
10881     }
10882 
10883     /* make the op for the constant and return */
10884 
10885     if (sv)
10886 	lvalp->opval = newSVOP(OP_CONST, 0, sv);
10887     else
10888 	lvalp->opval = NULL;
10889 
10890     return (char *)s;
10891 }
10892 
10893 STATIC char *
10894 S_scan_formline(pTHX_ char *s)
10895 {
10896     char *eol;
10897     char *t;
10898     SV * const stuff = newSVpvs("");
10899     bool needargs = FALSE;
10900     bool eofmt = FALSE;
10901 
10902     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10903 
10904     while (!needargs) {
10905 	if (*s == '.') {
10906 	    t = s+1;
10907 #ifdef PERL_STRICT_CR
10908 	    while (SPACE_OR_TAB(*t))
10909 		t++;
10910 #else
10911 	    while (SPACE_OR_TAB(*t) || *t == '\r')
10912 		t++;
10913 #endif
10914 	    if (*t == '\n' || t == PL_bufend) {
10915 	        eofmt = TRUE;
10916 		break;
10917             }
10918 	}
10919 	eol = (char *) memchr(s,'\n',PL_bufend-s);
10920 	if (!eol++)
10921 		eol = PL_bufend;
10922 	if (*s != '#') {
10923 	    for (t = s; t < eol; t++) {
10924 		if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10925 		    needargs = FALSE;
10926 		    goto enough;	/* ~~ must be first line in formline */
10927 		}
10928 		if (*t == '@' || *t == '^')
10929 		    needargs = TRUE;
10930 	    }
10931 	    if (eol > s) {
10932 	        sv_catpvn(stuff, s, eol-s);
10933 #ifndef PERL_STRICT_CR
10934 		if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10935 		    char *end = SvPVX(stuff) + SvCUR(stuff);
10936 		    end[-2] = '\n';
10937 		    end[-1] = '\0';
10938 		    SvCUR_set(stuff, SvCUR(stuff) - 1);
10939 		}
10940 #endif
10941 	    }
10942 	    else
10943 	      break;
10944 	}
10945 	s = (char*)eol;
10946 	if ((PL_rsfp || PL_parser->filtered)
10947 	 && PL_parser->form_lex_state == LEX_NORMAL) {
10948 	    bool got_some;
10949 	    PL_bufptr = PL_bufend;
10950 	    COPLINE_INC_WITH_HERELINES;
10951 	    got_some = lex_next_chunk(0);
10952 	    CopLINE_dec(PL_curcop);
10953 	    s = PL_bufptr;
10954 	    if (!got_some)
10955 		break;
10956 	}
10957 	incline(s);
10958     }
10959   enough:
10960     if (!SvCUR(stuff) || needargs)
10961 	PL_lex_state = PL_parser->form_lex_state;
10962     if (SvCUR(stuff)) {
10963 	PL_expect = XSTATE;
10964 	if (needargs) {
10965 	    const char *s2 = s;
10966 	    while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
10967 		|| *s2 == '\v')
10968 		s2++;
10969 	    if (*s2 == '{') {
10970 		PL_expect = XTERMBLOCK;
10971 		NEXTVAL_NEXTTOKE.ival = 0;
10972 		force_next(DO);
10973 	    }
10974 	    NEXTVAL_NEXTTOKE.ival = 0;
10975 	    force_next(FORMLBRACK);
10976 	}
10977 	if (!IN_BYTES) {
10978 	    if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10979 		SvUTF8_on(stuff);
10980 	    else if (IN_ENCODING)
10981 		sv_recode_to_utf8(stuff, _get_encoding());
10982 	}
10983 	NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10984 	force_next(THING);
10985     }
10986     else {
10987 	SvREFCNT_dec(stuff);
10988 	if (eofmt)
10989 	    PL_lex_formbrack = 0;
10990     }
10991     return s;
10992 }
10993 
10994 I32
10995 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10996 {
10997     const I32 oldsavestack_ix = PL_savestack_ix;
10998     CV* const outsidecv = PL_compcv;
10999 
11000     SAVEI32(PL_subline);
11001     save_item(PL_subname);
11002     SAVESPTR(PL_compcv);
11003 
11004     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11005     CvFLAGS(PL_compcv) |= flags;
11006 
11007     PL_subline = CopLINE(PL_curcop);
11008     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11009     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11010     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11011     if (outsidecv && CvPADLIST(outsidecv))
11012 	CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
11013 
11014     return oldsavestack_ix;
11015 }
11016 
11017 static int
11018 S_yywarn(pTHX_ const char *const s, U32 flags)
11019 {
11020     PERL_ARGS_ASSERT_YYWARN;
11021 
11022     PL_in_eval |= EVAL_WARNONLY;
11023     yyerror_pv(s, flags);
11024     return 0;
11025 }
11026 
11027 int
11028 Perl_yyerror(pTHX_ const char *const s)
11029 {
11030     PERL_ARGS_ASSERT_YYERROR;
11031     return yyerror_pvn(s, strlen(s), 0);
11032 }
11033 
11034 int
11035 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11036 {
11037     PERL_ARGS_ASSERT_YYERROR_PV;
11038     return yyerror_pvn(s, strlen(s), flags);
11039 }
11040 
11041 int
11042 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11043 {
11044     const char *context = NULL;
11045     int contlen = -1;
11046     SV *msg;
11047     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11048     int yychar  = PL_parser->yychar;
11049 
11050     PERL_ARGS_ASSERT_YYERROR_PVN;
11051 
11052     if (!yychar || (yychar == ';' && !PL_rsfp))
11053 	sv_catpvs(where_sv, "at EOF");
11054     else if (   PL_oldoldbufptr
11055              && PL_bufptr > PL_oldoldbufptr
11056              && PL_bufptr - PL_oldoldbufptr < 200
11057              && PL_oldoldbufptr != PL_oldbufptr
11058              && PL_oldbufptr != PL_bufptr)
11059     {
11060 	/*
11061 		Only for NetWare:
11062 		The code below is removed for NetWare because it abends/crashes on NetWare
11063 		when the script has error such as not having the closing quotes like:
11064 		    if ($var eq "value)
11065 		Checking of white spaces is anyway done in NetWare code.
11066 	*/
11067 #ifndef NETWARE
11068 	while (isSPACE(*PL_oldoldbufptr))
11069 	    PL_oldoldbufptr++;
11070 #endif
11071 	context = PL_oldoldbufptr;
11072 	contlen = PL_bufptr - PL_oldoldbufptr;
11073     }
11074     else if (  PL_oldbufptr
11075             && PL_bufptr > PL_oldbufptr
11076             && PL_bufptr - PL_oldbufptr < 200
11077             && PL_oldbufptr != PL_bufptr) {
11078 	/*
11079 		Only for NetWare:
11080 		The code below is removed for NetWare because it abends/crashes on NetWare
11081 		when the script has error such as not having the closing quotes like:
11082 		    if ($var eq "value)
11083 		Checking of white spaces is anyway done in NetWare code.
11084 	*/
11085 #ifndef NETWARE
11086 	while (isSPACE(*PL_oldbufptr))
11087 	    PL_oldbufptr++;
11088 #endif
11089 	context = PL_oldbufptr;
11090 	contlen = PL_bufptr - PL_oldbufptr;
11091     }
11092     else if (yychar > 255)
11093 	sv_catpvs(where_sv, "next token ???");
11094     else if (yychar == YYEMPTY) {
11095 	if (    PL_lex_state == LEX_NORMAL
11096             || (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11097 	    sv_catpvs(where_sv, "at end of line");
11098 	else if (PL_lex_inpat)
11099 	    sv_catpvs(where_sv, "within pattern");
11100 	else
11101 	    sv_catpvs(where_sv, "within string");
11102     }
11103     else {
11104 	sv_catpvs(where_sv, "next char ");
11105 	if (yychar < 32)
11106 	    Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11107 	else if (isPRINT_LC(yychar)) {
11108 	    const char string = yychar;
11109 	    sv_catpvn(where_sv, &string, 1);
11110 	}
11111 	else
11112 	    Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11113     }
11114     msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11115     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11116         OutCopFILE(PL_curcop),
11117         (IV)(PL_parser->preambling == NOLINE
11118                ? CopLINE(PL_curcop)
11119                : PL_parser->preambling));
11120     if (context)
11121 	Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11122 			     UTF8fARG(UTF, contlen, context));
11123     else
11124 	Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11125     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11126         Perl_sv_catpvf(aTHX_ msg,
11127         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11128                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11129         PL_multi_end = 0;
11130     }
11131     if (PL_in_eval & EVAL_WARNONLY) {
11132 	PL_in_eval &= ~EVAL_WARNONLY;
11133 	Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11134     }
11135     else
11136 	qerror(msg);
11137     if (PL_error_count >= 10) {
11138 	SV * errsv;
11139 	if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11140 	    Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11141 		       SVfARG(errsv), OutCopFILE(PL_curcop));
11142 	else
11143 	    Perl_croak(aTHX_ "%s has too many errors.\n",
11144             OutCopFILE(PL_curcop));
11145     }
11146     PL_in_my = 0;
11147     PL_in_my_stash = NULL;
11148     return 0;
11149 }
11150 
11151 STATIC char*
11152 S_swallow_bom(pTHX_ U8 *s)
11153 {
11154     const STRLEN slen = SvCUR(PL_linestr);
11155 
11156     PERL_ARGS_ASSERT_SWALLOW_BOM;
11157 
11158     switch (s[0]) {
11159     case 0xFF:
11160 	if (s[1] == 0xFE) {
11161 	    /* UTF-16 little-endian? (or UTF-32LE?) */
11162 	    if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11163 		/* diag_listed_as: Unsupported script encoding %s */
11164 		Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11165 #ifndef PERL_NO_UTF16_FILTER
11166 	    if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11167 	    s += 2;
11168 	    if (PL_bufend > (char*)s) {
11169 		s = add_utf16_textfilter(s, TRUE);
11170 	    }
11171 #else
11172 	    /* diag_listed_as: Unsupported script encoding %s */
11173 	    Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11174 #endif
11175 	}
11176 	break;
11177     case 0xFE:
11178 	if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11179 #ifndef PERL_NO_UTF16_FILTER
11180 	    if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11181 	    s += 2;
11182 	    if (PL_bufend > (char *)s) {
11183 		s = add_utf16_textfilter(s, FALSE);
11184 	    }
11185 #else
11186 	    /* diag_listed_as: Unsupported script encoding %s */
11187 	    Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11188 #endif
11189 	}
11190 	break;
11191     case BOM_UTF8_FIRST_BYTE: {
11192         const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11193         if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11194             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11195             s += len + 1;                      /* UTF-8 */
11196         }
11197         break;
11198     }
11199     case 0:
11200 	if (slen > 3) {
11201 	     if (s[1] == 0) {
11202 		  if (s[2] == 0xFE && s[3] == 0xFF) {
11203 		       /* UTF-32 big-endian */
11204 		       /* diag_listed_as: Unsupported script encoding %s */
11205 		       Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11206 		  }
11207 	     }
11208 	     else if (s[2] == 0 && s[3] != 0) {
11209 		  /* Leading bytes
11210 		   * 00 xx 00 xx
11211 		   * are a good indicator of UTF-16BE. */
11212 #ifndef PERL_NO_UTF16_FILTER
11213 		  if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11214 		  s = add_utf16_textfilter(s, FALSE);
11215 #else
11216 		  /* diag_listed_as: Unsupported script encoding %s */
11217 		  Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11218 #endif
11219 	     }
11220 	}
11221         break;
11222 
11223     default:
11224 	 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11225 		  /* Leading bytes
11226 		   * xx 00 xx 00
11227 		   * are a good indicator of UTF-16LE. */
11228 #ifndef PERL_NO_UTF16_FILTER
11229 	      if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11230 	      s = add_utf16_textfilter(s, TRUE);
11231 #else
11232 	      /* diag_listed_as: Unsupported script encoding %s */
11233 	      Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11234 #endif
11235 	 }
11236     }
11237     return (char*)s;
11238 }
11239 
11240 
11241 #ifndef PERL_NO_UTF16_FILTER
11242 static I32
11243 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11244 {
11245     SV *const filter = FILTER_DATA(idx);
11246     /* We re-use this each time round, throwing the contents away before we
11247        return.  */
11248     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11249     SV *const utf8_buffer = filter;
11250     IV status = IoPAGE(filter);
11251     const bool reverse = cBOOL(IoLINES(filter));
11252     I32 retval;
11253 
11254     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11255 
11256     /* As we're automatically added, at the lowest level, and hence only called
11257        from this file, we can be sure that we're not called in block mode. Hence
11258        don't bother writing code to deal with block mode.  */
11259     if (maxlen) {
11260 	Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11261     }
11262     if (status < 0) {
11263 	Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11264     }
11265     DEBUG_P(PerlIO_printf(Perl_debug_log,
11266 			  "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11267 			  FPTR2DPTR(void *, S_utf16_textfilter),
11268 			  reverse ? 'l' : 'b', idx, maxlen, status,
11269 			  (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11270 
11271     while (1) {
11272 	STRLEN chars;
11273 	STRLEN have;
11274 	I32 newlen;
11275 	U8 *end;
11276 	/* First, look in our buffer of existing UTF-8 data:  */
11277 	char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11278 
11279 	if (nl) {
11280 	    ++nl;
11281 	} else if (status == 0) {
11282 	    /* EOF */
11283 	    IoPAGE(filter) = 0;
11284 	    nl = SvEND(utf8_buffer);
11285 	}
11286 	if (nl) {
11287 	    STRLEN got = nl - SvPVX(utf8_buffer);
11288 	    /* Did we have anything to append?  */
11289 	    retval = got != 0;
11290 	    sv_catpvn(sv, SvPVX(utf8_buffer), got);
11291 	    /* Everything else in this code works just fine if SVp_POK isn't
11292 	       set.  This, however, needs it, and we need it to work, else
11293 	       we loop infinitely because the buffer is never consumed.  */
11294 	    sv_chop(utf8_buffer, nl);
11295 	    break;
11296 	}
11297 
11298 	/* OK, not a complete line there, so need to read some more UTF-16.
11299 	   Read an extra octect if the buffer currently has an odd number. */
11300 	while (1) {
11301 	    if (status <= 0)
11302 		break;
11303 	    if (SvCUR(utf16_buffer) >= 2) {
11304 		/* Location of the high octet of the last complete code point.
11305 		   Gosh, UTF-16 is a pain. All the benefits of variable length,
11306 		   *coupled* with all the benefits of partial reads and
11307 		   endianness.  */
11308 		const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11309 		    + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11310 
11311 		if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11312 		    break;
11313 		}
11314 
11315 		/* We have the first half of a surrogate. Read more.  */
11316 		DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11317 	    }
11318 
11319 	    status = FILTER_READ(idx + 1, utf16_buffer,
11320 				 160 + (SvCUR(utf16_buffer) & 1));
11321 	    DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11322 	    DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11323 	    if (status < 0) {
11324 		/* Error */
11325 		IoPAGE(filter) = status;
11326 		return status;
11327 	    }
11328 	}
11329 
11330 	chars = SvCUR(utf16_buffer) >> 1;
11331 	have = SvCUR(utf8_buffer);
11332 	SvGROW(utf8_buffer, have + chars * 3 + 1);
11333 
11334 	if (reverse) {
11335 	    end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11336 					 (U8*)SvPVX_const(utf8_buffer) + have,
11337 					 chars * 2, &newlen);
11338 	} else {
11339 	    end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11340 				(U8*)SvPVX_const(utf8_buffer) + have,
11341 				chars * 2, &newlen);
11342 	}
11343 	SvCUR_set(utf8_buffer, have + newlen);
11344 	*end = '\0';
11345 
11346 	/* No need to keep this SV "well-formed" with a '\0' after the end, as
11347 	   it's private to us, and utf16_to_utf8{,reversed} take a
11348 	   (pointer,length) pair, rather than a NUL-terminated string.  */
11349 	if(SvCUR(utf16_buffer) & 1) {
11350 	    *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11351 	    SvCUR_set(utf16_buffer, 1);
11352 	} else {
11353 	    SvCUR_set(utf16_buffer, 0);
11354 	}
11355     }
11356     DEBUG_P(PerlIO_printf(Perl_debug_log,
11357 			  "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11358 			  status,
11359 			  (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11360     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11361     return retval;
11362 }
11363 
11364 static U8 *
11365 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11366 {
11367     SV *filter = filter_add(S_utf16_textfilter, NULL);
11368 
11369     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11370 
11371     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11372     sv_setpvs(filter, "");
11373     IoLINES(filter) = reversed;
11374     IoPAGE(filter) = 1; /* Not EOF */
11375 
11376     /* Sadly, we have to return a valid pointer, come what may, so we have to
11377        ignore any error return from this.  */
11378     SvCUR_set(PL_linestr, 0);
11379     if (FILTER_READ(0, PL_linestr, 0)) {
11380 	SvUTF8_on(PL_linestr);
11381     } else {
11382 	SvUTF8_on(PL_linestr);
11383     }
11384     PL_bufend = SvEND(PL_linestr);
11385     return (U8*)SvPVX(PL_linestr);
11386 }
11387 #endif
11388 
11389 /*
11390 Returns a pointer to the next character after the parsed
11391 vstring, as well as updating the passed in sv.
11392 
11393 Function must be called like
11394 
11395 	sv = sv_2mortal(newSV(5));
11396 	s = scan_vstring(s,e,sv);
11397 
11398 where s and e are the start and end of the string.
11399 The sv should already be large enough to store the vstring
11400 passed in, for performance reasons.
11401 
11402 This function may croak if fatal warnings are enabled in the
11403 calling scope, hence the sv_2mortal in the example (to prevent
11404 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
11405 sv_2mortal.
11406 
11407 */
11408 
11409 char *
11410 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11411 {
11412     const char *pos = s;
11413     const char *start = s;
11414 
11415     PERL_ARGS_ASSERT_SCAN_VSTRING;
11416 
11417     if (*pos == 'v') pos++;  /* get past 'v' */
11418     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11419 	pos++;
11420     if ( *pos != '.') {
11421 	/* this may not be a v-string if followed by => */
11422 	const char *next = pos;
11423 	while (next < e && isSPACE(*next))
11424 	    ++next;
11425 	if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11426 	    /* return string not v-string */
11427 	    sv_setpvn(sv,(char *)s,pos-s);
11428 	    return (char *)pos;
11429 	}
11430     }
11431 
11432     if (!isALPHA(*pos)) {
11433 	U8 tmpbuf[UTF8_MAXBYTES+1];
11434 
11435 	if (*s == 'v')
11436 	    s++;  /* get past 'v' */
11437 
11438 	sv_setpvs(sv, "");
11439 
11440 	for (;;) {
11441 	    /* this is atoi() that tolerates underscores */
11442 	    U8 *tmpend;
11443 	    UV rev = 0;
11444 	    const char *end = pos;
11445 	    UV mult = 1;
11446 	    while (--end >= s) {
11447 		if (*end != '_') {
11448 		    const UV orev = rev;
11449 		    rev += (*end - '0') * mult;
11450 		    mult *= 10;
11451 		    if (orev > rev)
11452 			/* diag_listed_as: Integer overflow in %s number */
11453 			Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11454 					 "Integer overflow in decimal number");
11455 		}
11456 	    }
11457 
11458 	    /* Append native character for the rev point */
11459 	    tmpend = uvchr_to_utf8(tmpbuf, rev);
11460 	    sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11461 	    if (!UVCHR_IS_INVARIANT(rev))
11462 		 SvUTF8_on(sv);
11463 	    if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11464 		 s = ++pos;
11465 	    else {
11466 		 s = pos;
11467 		 break;
11468 	    }
11469 	    while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11470 		 pos++;
11471 	}
11472 	SvPOK_on(sv);
11473 	sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11474 	SvRMAGICAL_on(sv);
11475     }
11476     return (char *)s;
11477 }
11478 
11479 int
11480 Perl_keyword_plugin_standard(pTHX_
11481 	char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11482 {
11483     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11484     PERL_UNUSED_CONTEXT;
11485     PERL_UNUSED_ARG(keyword_ptr);
11486     PERL_UNUSED_ARG(keyword_len);
11487     PERL_UNUSED_ARG(op_ptr);
11488     return KEYWORD_PLUGIN_DECLINE;
11489 }
11490 
11491 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11492 static void
11493 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11494 {
11495     SAVEI32(PL_lex_brackets);
11496     if (PL_lex_brackets > 100)
11497 	Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11498     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11499     SAVEI32(PL_lex_allbrackets);
11500     PL_lex_allbrackets = 0;
11501     SAVEI8(PL_lex_fakeeof);
11502     PL_lex_fakeeof = (U8)fakeeof;
11503     if(yyparse(gramtype) && !PL_parser->error_count)
11504 	qerror(Perl_mess(aTHX_ "Parse error"));
11505 }
11506 
11507 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11508 static OP *
11509 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11510 {
11511     OP *o;
11512     ENTER;
11513     SAVEVPTR(PL_eval_root);
11514     PL_eval_root = NULL;
11515     parse_recdescent(gramtype, fakeeof);
11516     o = PL_eval_root;
11517     LEAVE;
11518     return o;
11519 }
11520 
11521 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11522 static OP *
11523 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11524 {
11525     OP *exprop;
11526     if (flags & ~PARSE_OPTIONAL)
11527 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11528     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11529     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11530 	if (!PL_parser->error_count)
11531 	    qerror(Perl_mess(aTHX_ "Parse error"));
11532 	exprop = newOP(OP_NULL, 0);
11533     }
11534     return exprop;
11535 }
11536 
11537 /*
11538 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11539 
11540 Parse a Perl arithmetic expression.  This may contain operators of precedence
11541 down to the bit shift operators.  The expression must be followed (and thus
11542 terminated) either by a comparison or lower-precedence operator or by
11543 something that would normally terminate an expression such as semicolon.
11544 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11545 otherwise it is mandatory.  It is up to the caller to ensure that the
11546 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11547 the source of the code to be parsed and the lexical context for the
11548 expression.
11549 
11550 The op tree representing the expression is returned.  If an optional
11551 expression is absent, a null pointer is returned, otherwise the pointer
11552 will be non-null.
11553 
11554 If an error occurs in parsing or compilation, in most cases a valid op
11555 tree is returned anyway.  The error is reflected in the parser state,
11556 normally resulting in a single exception at the top level of parsing
11557 which covers all the compilation errors that occurred.  Some compilation
11558 errors, however, will throw an exception immediately.
11559 
11560 =cut
11561 */
11562 
11563 OP *
11564 Perl_parse_arithexpr(pTHX_ U32 flags)
11565 {
11566     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11567 }
11568 
11569 /*
11570 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11571 
11572 Parse a Perl term expression.  This may contain operators of precedence
11573 down to the assignment operators.  The expression must be followed (and thus
11574 terminated) either by a comma or lower-precedence operator or by
11575 something that would normally terminate an expression such as semicolon.
11576 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11577 otherwise it is mandatory.  It is up to the caller to ensure that the
11578 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11579 the source of the code to be parsed and the lexical context for the
11580 expression.
11581 
11582 The op tree representing the expression is returned.  If an optional
11583 expression is absent, a null pointer is returned, otherwise the pointer
11584 will be non-null.
11585 
11586 If an error occurs in parsing or compilation, in most cases a valid op
11587 tree is returned anyway.  The error is reflected in the parser state,
11588 normally resulting in a single exception at the top level of parsing
11589 which covers all the compilation errors that occurred.  Some compilation
11590 errors, however, will throw an exception immediately.
11591 
11592 =cut
11593 */
11594 
11595 OP *
11596 Perl_parse_termexpr(pTHX_ U32 flags)
11597 {
11598     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11599 }
11600 
11601 /*
11602 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11603 
11604 Parse a Perl list expression.  This may contain operators of precedence
11605 down to the comma operator.  The expression must be followed (and thus
11606 terminated) either by a low-precedence logic operator such as C<or> or by
11607 something that would normally terminate an expression such as semicolon.
11608 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11609 otherwise it is mandatory.  It is up to the caller to ensure that the
11610 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11611 the source of the code to be parsed and the lexical context for the
11612 expression.
11613 
11614 The op tree representing the expression is returned.  If an optional
11615 expression is absent, a null pointer is returned, otherwise the pointer
11616 will be non-null.
11617 
11618 If an error occurs in parsing or compilation, in most cases a valid op
11619 tree is returned anyway.  The error is reflected in the parser state,
11620 normally resulting in a single exception at the top level of parsing
11621 which covers all the compilation errors that occurred.  Some compilation
11622 errors, however, will throw an exception immediately.
11623 
11624 =cut
11625 */
11626 
11627 OP *
11628 Perl_parse_listexpr(pTHX_ U32 flags)
11629 {
11630     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11631 }
11632 
11633 /*
11634 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11635 
11636 Parse a single complete Perl expression.  This allows the full
11637 expression grammar, including the lowest-precedence operators such
11638 as C<or>.  The expression must be followed (and thus terminated) by a
11639 token that an expression would normally be terminated by: end-of-file,
11640 closing bracketing punctuation, semicolon, or one of the keywords that
11641 signals a postfix expression-statement modifier.  If C<flags> has the
11642 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
11643 mandatory.  It is up to the caller to ensure that the dynamic parser
11644 state (L</PL_parser> et al) is correctly set to reflect the source of
11645 the code to be parsed and the lexical context for the expression.
11646 
11647 The op tree representing the expression is returned.  If an optional
11648 expression is absent, a null pointer is returned, otherwise the pointer
11649 will be non-null.
11650 
11651 If an error occurs in parsing or compilation, in most cases a valid op
11652 tree is returned anyway.  The error is reflected in the parser state,
11653 normally resulting in a single exception at the top level of parsing
11654 which covers all the compilation errors that occurred.  Some compilation
11655 errors, however, will throw an exception immediately.
11656 
11657 =cut
11658 */
11659 
11660 OP *
11661 Perl_parse_fullexpr(pTHX_ U32 flags)
11662 {
11663     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11664 }
11665 
11666 /*
11667 =for apidoc Amx|OP *|parse_block|U32 flags
11668 
11669 Parse a single complete Perl code block.  This consists of an opening
11670 brace, a sequence of statements, and a closing brace.  The block
11671 constitutes a lexical scope, so C<my> variables and various compile-time
11672 effects can be contained within it.  It is up to the caller to ensure
11673 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11674 reflect the source of the code to be parsed and the lexical context for
11675 the statement.
11676 
11677 The op tree representing the code block is returned.  This is always a
11678 real op, never a null pointer.  It will normally be a C<lineseq> list,
11679 including C<nextstate> or equivalent ops.  No ops to construct any kind
11680 of runtime scope are included by virtue of it being a block.
11681 
11682 If an error occurs in parsing or compilation, in most cases a valid op
11683 tree (most likely null) is returned anyway.  The error is reflected in
11684 the parser state, normally resulting in a single exception at the top
11685 level of parsing which covers all the compilation errors that occurred.
11686 Some compilation errors, however, will throw an exception immediately.
11687 
11688 The C<flags> parameter is reserved for future use, and must always
11689 be zero.
11690 
11691 =cut
11692 */
11693 
11694 OP *
11695 Perl_parse_block(pTHX_ U32 flags)
11696 {
11697     if (flags)
11698 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11699     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11700 }
11701 
11702 /*
11703 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11704 
11705 Parse a single unadorned Perl statement.  This may be a normal imperative
11706 statement or a declaration that has compile-time effect.  It does not
11707 include any label or other affixture.  It is up to the caller to ensure
11708 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11709 reflect the source of the code to be parsed and the lexical context for
11710 the statement.
11711 
11712 The op tree representing the statement is returned.  This may be a
11713 null pointer if the statement is null, for example if it was actually
11714 a subroutine definition (which has compile-time side effects).  If not
11715 null, it will be ops directly implementing the statement, suitable to
11716 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11717 equivalent op (except for those embedded in a scope contained entirely
11718 within the statement).
11719 
11720 If an error occurs in parsing or compilation, in most cases a valid op
11721 tree (most likely null) is returned anyway.  The error is reflected in
11722 the parser state, normally resulting in a single exception at the top
11723 level of parsing which covers all the compilation errors that occurred.
11724 Some compilation errors, however, will throw an exception immediately.
11725 
11726 The C<flags> parameter is reserved for future use, and must always
11727 be zero.
11728 
11729 =cut
11730 */
11731 
11732 OP *
11733 Perl_parse_barestmt(pTHX_ U32 flags)
11734 {
11735     if (flags)
11736 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11737     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11738 }
11739 
11740 /*
11741 =for apidoc Amx|SV *|parse_label|U32 flags
11742 
11743 Parse a single label, possibly optional, of the type that may prefix a
11744 Perl statement.  It is up to the caller to ensure that the dynamic parser
11745 state (L</PL_parser> et al) is correctly set to reflect the source of
11746 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
11747 label is optional, otherwise it is mandatory.
11748 
11749 The name of the label is returned in the form of a fresh scalar.  If an
11750 optional label is absent, a null pointer is returned.
11751 
11752 If an error occurs in parsing, which can only occur if the label is
11753 mandatory, a valid label is returned anyway.  The error is reflected in
11754 the parser state, normally resulting in a single exception at the top
11755 level of parsing which covers all the compilation errors that occurred.
11756 
11757 =cut
11758 */
11759 
11760 SV *
11761 Perl_parse_label(pTHX_ U32 flags)
11762 {
11763     if (flags & ~PARSE_OPTIONAL)
11764 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11765     if (PL_lex_state == LEX_KNOWNEXT) {
11766 	PL_parser->yychar = yylex();
11767 	if (PL_parser->yychar == LABEL) {
11768 	    char * const lpv = pl_yylval.pval;
11769 	    STRLEN llen = strlen(lpv);
11770 	    PL_parser->yychar = YYEMPTY;
11771 	    return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
11772 	} else {
11773 	    yyunlex();
11774 	    goto no_label;
11775 	}
11776     } else {
11777 	char *s, *t;
11778 	STRLEN wlen, bufptr_pos;
11779 	lex_read_space(0);
11780 	t = s = PL_bufptr;
11781         if (!isIDFIRST_lazy_if(s, UTF))
11782 	    goto no_label;
11783 	t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11784 	if (word_takes_any_delimeter(s, wlen))
11785 	    goto no_label;
11786 	bufptr_pos = s - SvPVX(PL_linestr);
11787 	PL_bufptr = t;
11788 	lex_read_space(LEX_KEEP_PREVIOUS);
11789 	t = PL_bufptr;
11790 	s = SvPVX(PL_linestr) + bufptr_pos;
11791 	if (t[0] == ':' && t[1] != ':') {
11792 	    PL_oldoldbufptr = PL_oldbufptr;
11793 	    PL_oldbufptr = s;
11794 	    PL_bufptr = t+1;
11795 	    return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11796 	} else {
11797 	    PL_bufptr = s;
11798 	    no_label:
11799 	    if (flags & PARSE_OPTIONAL) {
11800 		return NULL;
11801 	    } else {
11802 		qerror(Perl_mess(aTHX_ "Parse error"));
11803 		return newSVpvs("x");
11804 	    }
11805 	}
11806     }
11807 }
11808 
11809 /*
11810 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11811 
11812 Parse a single complete Perl statement.  This may be a normal imperative
11813 statement or a declaration that has compile-time effect, and may include
11814 optional labels.  It is up to the caller to ensure that the dynamic
11815 parser state (L</PL_parser> et al) is correctly set to reflect the source
11816 of the code to be parsed and the lexical context for the statement.
11817 
11818 The op tree representing the statement is returned.  This may be a
11819 null pointer if the statement is null, for example if it was actually
11820 a subroutine definition (which has compile-time side effects).  If not
11821 null, it will be the result of a L</newSTATEOP> call, normally including
11822 a C<nextstate> or equivalent op.
11823 
11824 If an error occurs in parsing or compilation, in most cases a valid op
11825 tree (most likely null) is returned anyway.  The error is reflected in
11826 the parser state, normally resulting in a single exception at the top
11827 level of parsing which covers all the compilation errors that occurred.
11828 Some compilation errors, however, will throw an exception immediately.
11829 
11830 The C<flags> parameter is reserved for future use, and must always
11831 be zero.
11832 
11833 =cut
11834 */
11835 
11836 OP *
11837 Perl_parse_fullstmt(pTHX_ U32 flags)
11838 {
11839     if (flags)
11840 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11841     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11842 }
11843 
11844 /*
11845 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11846 
11847 Parse a sequence of zero or more Perl statements.  These may be normal
11848 imperative statements, including optional labels, or declarations
11849 that have compile-time effect, or any mixture thereof.  The statement
11850 sequence ends when a closing brace or end-of-file is encountered in a
11851 place where a new statement could have validly started.  It is up to
11852 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11853 is correctly set to reflect the source of the code to be parsed and the
11854 lexical context for the statements.
11855 
11856 The op tree representing the statement sequence is returned.  This may
11857 be a null pointer if the statements were all null, for example if there
11858 were no statements or if there were only subroutine definitions (which
11859 have compile-time side effects).  If not null, it will be a C<lineseq>
11860 list, normally including C<nextstate> or equivalent ops.
11861 
11862 If an error occurs in parsing or compilation, in most cases a valid op
11863 tree is returned anyway.  The error is reflected in the parser state,
11864 normally resulting in a single exception at the top level of parsing
11865 which covers all the compilation errors that occurred.  Some compilation
11866 errors, however, will throw an exception immediately.
11867 
11868 The C<flags> parameter is reserved for future use, and must always
11869 be zero.
11870 
11871 =cut
11872 */
11873 
11874 OP *
11875 Perl_parse_stmtseq(pTHX_ U32 flags)
11876 {
11877     OP *stmtseqop;
11878     I32 c;
11879     if (flags)
11880 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11881     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11882     c = lex_peek_unichar(0);
11883     if (c != -1 && c != /*{*/'}')
11884 	qerror(Perl_mess(aTHX_ "Parse error"));
11885     return stmtseqop;
11886 }
11887 
11888 #define lex_token_boundary() S_lex_token_boundary(aTHX)
11889 static void
11890 S_lex_token_boundary(pTHX)
11891 {
11892     PL_oldoldbufptr = PL_oldbufptr;
11893     PL_oldbufptr = PL_bufptr;
11894 }
11895 
11896 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
11897 static OP *
11898 S_parse_opt_lexvar(pTHX)
11899 {
11900     I32 sigil, c;
11901     char *s, *d;
11902     OP *var;
11903     lex_token_boundary();
11904     sigil = lex_read_unichar(0);
11905     if (lex_peek_unichar(0) == '#') {
11906 	qerror(Perl_mess(aTHX_ "Parse error"));
11907 	return NULL;
11908     }
11909     lex_read_space(0);
11910     c = lex_peek_unichar(0);
11911     if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
11912 	return NULL;
11913     s = PL_bufptr;
11914     d = PL_tokenbuf + 1;
11915     PL_tokenbuf[0] = (char)sigil;
11916     parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
11917     PL_bufptr = s;
11918     if (d == PL_tokenbuf+1)
11919 	return NULL;
11920     var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
11921 		OPf_MOD | (OPpLVAL_INTRO<<8));
11922     var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
11923     return var;
11924 }
11925 
11926 OP *
11927 Perl_parse_subsignature(pTHX)
11928 {
11929     I32 c;
11930     int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
11931     OP *initops = NULL;
11932     lex_read_space(0);
11933     c = lex_peek_unichar(0);
11934     while (c != /*(*/')') {
11935 	switch (c) {
11936 	    case '$': {
11937 		OP *var, *expr;
11938 		if (prev_type == 2)
11939 		    qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11940 		var = parse_opt_lexvar();
11941 		expr = var ?
11942 		    newBINOP(OP_AELEM, 0,
11943 			ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
11944 			    OP_RV2AV),
11945 			newSVOP(OP_CONST, 0, newSViv(pos))) :
11946 		    NULL;
11947 		lex_read_space(0);
11948 		c = lex_peek_unichar(0);
11949 		if (c == '=') {
11950 		    lex_token_boundary();
11951 		    lex_read_unichar(0);
11952 		    lex_read_space(0);
11953 		    c = lex_peek_unichar(0);
11954 		    if (c == ',' || c == /*(*/')') {
11955 			if (var)
11956 			    qerror(Perl_mess(aTHX_ "Optional parameter "
11957 				    "lacks default expression"));
11958 		    } else {
11959 			OP *defexpr = parse_termexpr(0);
11960 			if (defexpr->op_type == OP_UNDEF
11961                             && !(defexpr->op_flags & OPf_KIDS))
11962                         {
11963 			    op_free(defexpr);
11964 			} else {
11965 			    OP *ifop =
11966 				newBINOP(OP_GE, 0,
11967 				    scalar(newUNOP(OP_RV2AV, 0,
11968 					    newGVOP(OP_GV, 0, PL_defgv))),
11969 				    newSVOP(OP_CONST, 0, newSViv(pos+1)));
11970 			    expr = var ?
11971 				newCONDOP(0, ifop, expr, defexpr) :
11972 				newLOGOP(OP_OR, 0, ifop, defexpr);
11973 			}
11974 		    }
11975 		    prev_type = 1;
11976 		} else {
11977 		    if (prev_type == 1)
11978 			qerror(Perl_mess(aTHX_ "Mandatory parameter "
11979 				"follows optional parameter"));
11980 		    prev_type = 0;
11981 		    min_arity = pos + 1;
11982 		}
11983 		if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
11984 		if (expr)
11985 		    initops = op_append_list(OP_LINESEQ, initops,
11986 				newSTATEOP(0, NULL, expr));
11987 		max_arity = ++pos;
11988 	    } break;
11989 	    case '@':
11990 	    case '%': {
11991 		OP *var;
11992 		if (prev_type == 2)
11993 		    qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11994 		var = parse_opt_lexvar();
11995 		if (c == '%') {
11996 		    OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
11997 			    newBINOP(OP_BIT_AND, 0,
11998 				scalar(newUNOP(OP_RV2AV, 0,
11999 				    newGVOP(OP_GV, 0, PL_defgv))),
12000 				newSVOP(OP_CONST, 0, newSViv(1))),
12001 		            op_convert_list(OP_DIE, 0,
12002 		                op_convert_list(OP_SPRINTF, 0,
12003 		                    op_append_list(OP_LIST,
12004 		                        newSVOP(OP_CONST, 0,
12005 		                            newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")),
12006 		                        newSLICEOP(0,
12007 		                            op_append_list(OP_LIST,
12008 		                                newSVOP(OP_CONST, 0, newSViv(1)),
12009 		                                newSVOP(OP_CONST, 0, newSViv(2))),
12010 		                            newOP(OP_CALLER, 0))))));
12011 		    if (pos != min_arity)
12012 			chkop = newLOGOP(OP_AND, 0,
12013 				    newBINOP(OP_GT, 0,
12014 					scalar(newUNOP(OP_RV2AV, 0,
12015 					    newGVOP(OP_GV, 0, PL_defgv))),
12016 					newSVOP(OP_CONST, 0, newSViv(pos))),
12017 				    chkop);
12018 		    initops = op_append_list(OP_LINESEQ,
12019 				newSTATEOP(0, NULL, chkop),
12020 				initops);
12021 		}
12022 		if (var) {
12023 		    OP *slice = pos ?
12024 			op_prepend_elem(OP_ASLICE,
12025 			    newOP(OP_PUSHMARK, 0),
12026 			    newLISTOP(OP_ASLICE, 0,
12027 				list(newRANGE(0,
12028 				    newSVOP(OP_CONST, 0, newSViv(pos)),
12029 				    newUNOP(OP_AV2ARYLEN, 0,
12030 					ref(newUNOP(OP_RV2AV, 0,
12031 						newGVOP(OP_GV, 0, PL_defgv)),
12032 					    OP_AV2ARYLEN)))),
12033 				ref(newUNOP(OP_RV2AV, 0,
12034 					newGVOP(OP_GV, 0, PL_defgv)),
12035 				    OP_ASLICE))) :
12036 			newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
12037 		    initops = op_append_list(OP_LINESEQ, initops,
12038 			newSTATEOP(0, NULL,
12039 			    newASSIGNOP(OPf_STACKED, var, 0, slice)));
12040 		}
12041 		prev_type = 2;
12042 		max_arity = -1;
12043 	    } break;
12044 	    default:
12045 		parse_error:
12046 		qerror(Perl_mess(aTHX_ "Parse error"));
12047 		return NULL;
12048 	}
12049 	lex_read_space(0);
12050 	c = lex_peek_unichar(0);
12051 	switch (c) {
12052 	    case /*(*/')': break;
12053 	    case ',':
12054 		do {
12055 		    lex_token_boundary();
12056 		    lex_read_unichar(0);
12057 		    lex_read_space(0);
12058 		    c = lex_peek_unichar(0);
12059 		} while (c == ',');
12060 		break;
12061 	    default:
12062 		goto parse_error;
12063 	}
12064     }
12065     if (min_arity != 0) {
12066 	initops = op_append_list(OP_LINESEQ,
12067 	    newSTATEOP(0, NULL,
12068 		newLOGOP(OP_OR, 0,
12069 		    newBINOP(OP_GE, 0,
12070 			scalar(newUNOP(OP_RV2AV, 0,
12071 			    newGVOP(OP_GV, 0, PL_defgv))),
12072 			newSVOP(OP_CONST, 0, newSViv(min_arity))),
12073 		    op_convert_list(OP_DIE, 0,
12074 		        op_convert_list(OP_SPRINTF, 0,
12075 		            op_append_list(OP_LIST,
12076 		                newSVOP(OP_CONST, 0,
12077 		                    newSVpvs("Too few arguments for subroutine at %s line %d.\n")),
12078 		                newSLICEOP(0,
12079 		                    op_append_list(OP_LIST,
12080 		                        newSVOP(OP_CONST, 0, newSViv(1)),
12081 		                        newSVOP(OP_CONST, 0, newSViv(2))),
12082 		                    newOP(OP_CALLER, 0))))))),
12083 	    initops);
12084     }
12085     if (max_arity != -1) {
12086 	initops = op_append_list(OP_LINESEQ,
12087 	    newSTATEOP(0, NULL,
12088 		newLOGOP(OP_OR, 0,
12089 		    newBINOP(OP_LE, 0,
12090 			scalar(newUNOP(OP_RV2AV, 0,
12091 			    newGVOP(OP_GV, 0, PL_defgv))),
12092 			newSVOP(OP_CONST, 0, newSViv(max_arity))),
12093 		    op_convert_list(OP_DIE, 0,
12094 		        op_convert_list(OP_SPRINTF, 0,
12095 		            op_append_list(OP_LIST,
12096 		                newSVOP(OP_CONST, 0,
12097 		                    newSVpvs("Too many arguments for subroutine at %s line %d.\n")),
12098 		                newSLICEOP(0,
12099 		                    op_append_list(OP_LIST,
12100 		                        newSVOP(OP_CONST, 0, newSViv(1)),
12101 		                        newSVOP(OP_CONST, 0, newSViv(2))),
12102 		                    newOP(OP_CALLER, 0))))))),
12103 	    initops);
12104     }
12105     return initops;
12106 }
12107 
12108 /*
12109  * ex: set ts=8 sts=4 sw=4 et:
12110  */
12111