File Coverage

toke.c
Criterion Covered Total %
statement 4203 4568 92.0
branch 5015 6734 74.5
condition n/a
subroutine n/a
total 9218 11302 81.6


line stmt bran cond sub time code
1           /* toke.c
2           *
3           * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4           * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5           *
6           * You may distribute under the terms of either the GNU General Public
7           * License or the Artistic License, as specified in the README file.
8           *
9           */
10            
11           /*
12           * 'It all comes from here, the stench and the peril.' --Frodo
13           *
14           * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
15           */
16            
17           /*
18           * This file is the lexer for Perl. It's closely linked to the
19           * parser, perly.y.
20           *
21           * The main routine is yylex(), which returns the next token.
22           */
23            
24           /*
25           =head1 Lexer interface
26            
27           This is the lower layer of the Perl parser, managing characters and tokens.
28            
29           =for apidoc AmU|yy_parser *|PL_parser
30            
31           Pointer to a structure encapsulating the state of the parsing operation
32           currently in progress. The pointer can be locally changed to perform
33           a nested parse without interfering with the state of an outer parse.
34           Individual members of C have their own documentation.
35            
36           =cut
37           */
38            
39           #include "EXTERN.h"
40           #define PERL_IN_TOKE_C
41           #include "perl.h"
42           #include "dquote_static.c"
43            
44           #define new_constant(a,b,c,d,e,f,g) \
45           S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46            
47           #define pl_yylval (PL_parser->yylval)
48            
49           /* XXX temporary backwards compatibility */
50           #define PL_lex_brackets (PL_parser->lex_brackets)
51           #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52           #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53           #define PL_lex_brackstack (PL_parser->lex_brackstack)
54           #define PL_lex_casemods (PL_parser->lex_casemods)
55           #define PL_lex_casestack (PL_parser->lex_casestack)
56           #define PL_lex_defer (PL_parser->lex_defer)
57           #define PL_lex_dojoin (PL_parser->lex_dojoin)
58           #define PL_lex_expect (PL_parser->lex_expect)
59           #define PL_lex_formbrack (PL_parser->lex_formbrack)
60           #define PL_lex_inpat (PL_parser->lex_inpat)
61           #define PL_lex_inwhat (PL_parser->lex_inwhat)
62           #define PL_lex_op (PL_parser->lex_op)
63           #define PL_lex_repl (PL_parser->lex_repl)
64           #define PL_lex_starts (PL_parser->lex_starts)
65           #define PL_lex_stuff (PL_parser->lex_stuff)
66           #define PL_multi_start (PL_parser->multi_start)
67           #define PL_multi_open (PL_parser->multi_open)
68           #define PL_multi_close (PL_parser->multi_close)
69           #define PL_preambled (PL_parser->preambled)
70           #define PL_sublex_info (PL_parser->sublex_info)
71           #define PL_linestr (PL_parser->linestr)
72           #define PL_expect (PL_parser->expect)
73           #define PL_copline (PL_parser->copline)
74           #define PL_bufptr (PL_parser->bufptr)
75           #define PL_oldbufptr (PL_parser->oldbufptr)
76           #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77           #define PL_linestart (PL_parser->linestart)
78           #define PL_bufend (PL_parser->bufend)
79           #define PL_last_uni (PL_parser->last_uni)
80           #define PL_last_lop (PL_parser->last_lop)
81           #define PL_last_lop_op (PL_parser->last_lop_op)
82           #define PL_lex_state (PL_parser->lex_state)
83           #define PL_rsfp (PL_parser->rsfp)
84           #define PL_rsfp_filters (PL_parser->rsfp_filters)
85           #define PL_in_my (PL_parser->in_my)
86           #define PL_in_my_stash (PL_parser->in_my_stash)
87           #define PL_tokenbuf (PL_parser->tokenbuf)
88           #define PL_multi_end (PL_parser->multi_end)
89           #define PL_error_count (PL_parser->error_count)
90            
91           #ifdef PERL_MAD
92           # define PL_endwhite (PL_parser->endwhite)
93           # define PL_faketokens (PL_parser->faketokens)
94           # define PL_lasttoke (PL_parser->lasttoke)
95           # define PL_nextwhite (PL_parser->nextwhite)
96           # define PL_realtokenstart (PL_parser->realtokenstart)
97           # define PL_skipwhite (PL_parser->skipwhite)
98           # define PL_thisclose (PL_parser->thisclose)
99           # define PL_thismad (PL_parser->thismad)
100           # define PL_thisopen (PL_parser->thisopen)
101           # define PL_thisstuff (PL_parser->thisstuff)
102           # define PL_thistoken (PL_parser->thistoken)
103           # define PL_thiswhite (PL_parser->thiswhite)
104           # define PL_thiswhite (PL_parser->thiswhite)
105           # define PL_nexttoke (PL_parser->nexttoke)
106           # define PL_curforce (PL_parser->curforce)
107           #else
108           # define PL_nexttoke (PL_parser->nexttoke)
109           # define PL_nexttype (PL_parser->nexttype)
110           # define PL_nextval (PL_parser->nextval)
111           #endif
112            
113           static const char* const ident_too_long = "Identifier too long";
114            
115           #ifdef PERL_MAD
116           # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
117           # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
118           #else
119           # define CURMAD(slot,sv)
120           # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
121           #endif
122            
123           #define XENUMMASK 0x3f
124           #define XFAKEEOF 0x40
125           #define XFAKEBRACK 0x80
126            
127           #ifdef USE_UTF8_SCRIPTS
128           # define UTF (!IN_BYTES)
129           #else
130           # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
131           #endif
132            
133           /* The maximum number of characters preceding the unrecognized one to display */
134           #define UNRECOGNIZED_PRECEDE_COUNT 10
135            
136           /* In variables named $^X, these are the legal values for X.
137           * 1999-02-27 mjd-perl-patch@plover.com */
138           #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
139            
140           #define SPACE_OR_TAB(c) isBLANK_A(c)
141            
142           /* LEX_* are values for PL_lex_state, the state of the lexer.
143           * They are arranged oddly so that the guard on the switch statement
144           * can get by with a single comparison (if the compiler is smart enough).
145           *
146           * These values refer to the various states within a sublex parse,
147           * i.e. within a double quotish string
148           */
149            
150           /* #define LEX_NOTPARSING 11 is done in perl.h. */
151            
152           #define LEX_NORMAL 10 /* normal code (ie not within "...") */
153           #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154           #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155           #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156           #define LEX_INTERPSTART 6 /* expecting the start of a $var */
157            
158           /* at end of code, eg "$x" followed by: */
159           #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160           #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
161            
162           #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163           string or after \E, $foo, etc */
164           #define LEX_INTERPCONST 2 /* NOT USED */
165           #define LEX_FORMLINE 1 /* expecting a format line */
166           #define LEX_KNOWNEXT 0 /* next token known; just return it */
167            
168            
169           #ifdef DEBUGGING
170           static const char* const lex_state_names[] = {
171           "KNOWNEXT",
172           "FORMLINE",
173           "INTERPCONST",
174           "INTERPCONCAT",
175           "INTERPENDMAYBE",
176           "INTERPEND",
177           "INTERPSTART",
178           "INTERPPUSH",
179           "INTERPCASEMOD",
180           "INTERPNORMAL",
181           "NORMAL"
182           };
183           #endif
184            
185           #ifdef ff_next
186           #undef ff_next
187           #endif
188            
189           #include "keywords.h"
190            
191           /* CLINE is a macro that ensures PL_copline has a sane value */
192            
193           #ifdef CLINE
194           #undef CLINE
195           #endif
196           #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
197            
198           #ifdef PERL_MAD
199           # define SKIPSPACE0(s) skipspace0(s)
200           # define SKIPSPACE1(s) skipspace1(s)
201           # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
202           # define PEEKSPACE(s) skipspace2(s,0)
203           #else
204           # define SKIPSPACE0(s) skipspace(s)
205           # define SKIPSPACE1(s) skipspace(s)
206           # define SKIPSPACE2(s,tsv) skipspace(s)
207           # define PEEKSPACE(s) skipspace(s)
208           #endif
209            
210           /*
211           * Convenience functions to return different tokens and prime the
212           * lexer for the next token. They all take an argument.
213           *
214           * TOKEN : generic token (used for '(', DOLSHARP, etc)
215           * OPERATOR : generic operator
216           * AOPERATOR : assignment operator
217           * PREBLOCK : beginning the block after an if, while, foreach, ...
218           * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
219           * PREREF : *EXPR where EXPR is not a simple identifier
220           * TERM : expression term
221           * LOOPX : loop exiting command (goto, last, dump, etc)
222           * FTST : file test operator
223           * FUN0 : zero-argument function
224           * FUN0OP : zero-argument function, with its op created in this file
225           * FUN1 : not used, except for not, which isn't a UNIOP
226           * BOop : bitwise or or xor
227           * BAop : bitwise and
228           * SHop : shift operator
229           * PWop : power operator
230           * PMop : pattern-matching operator
231           * Aop : addition-level operator
232           * Mop : multiplication-level operator
233           * Eop : equality-testing operator
234           * Rop : relational operator <= != gt
235           *
236           * Also see LOP and lop() below.
237           */
238            
239           #ifdef DEBUGGING /* Serve -DT. */
240           # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
241           #else
242           # define REPORT(retval) (retval)
243           #endif
244            
245           #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
246           #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
247           #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
248           #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
249           #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
250           #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
251           #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
252           #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
253           #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
254           #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
255           #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
256           #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257           #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258           #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259           #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260           #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261           #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262           #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263           #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264           #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265           #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
266            
267           /* This bit of chicanery makes a unary function followed by
268           * a parenthesis into a function with one argument, highest precedence.
269           * The UNIDOR macro is for unary functions that can be followed by the //
270           * operator (such as C).
271           */
272           #define UNI3(f,x,have_x) { \
273           pl_yylval.ival = f; \
274           if (have_x) PL_expect = x; \
275           PL_bufptr = s; \
276           PL_last_uni = PL_oldbufptr; \
277           PL_last_lop_op = f; \
278           if (*s == '(') \
279           return REPORT( (int)FUNC1 ); \
280           s = PEEKSPACE(s); \
281           return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
282           }
283           #define UNI(f) UNI3(f,XTERM,1)
284           #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
285           #define UNIPROTO(f,optional) { \
286           if (optional) PL_last_uni = PL_oldbufptr; \
287           OPERATOR(f); \
288           }
289            
290           #define UNIBRACK(f) UNI3(f,0,0)
291            
292           /* grandfather return to old style */
293           #define OLDLOP(f) \
294           do { \
295           if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
296           PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
297           pl_yylval.ival = (f); \
298           PL_expect = XTERM; \
299           PL_bufptr = s; \
300           return (int)LSTOP; \
301           } while(0)
302            
303           #define COPLINE_INC_WITH_HERELINES \
304           STMT_START { \
305           CopLINE_inc(PL_curcop); \
306           if (PL_parser->lex_shared->herelines) \
307           CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
308           PL_parser->lex_shared->herelines = 0; \
309           } STMT_END
310           /* Called after scan_str to update CopLINE(PL_curcop), but only when there
311           * is no sublex_push to follow. */
312           #define COPLINE_SET_FROM_MULTI_END \
313           STMT_START { \
314           CopLINE_set(PL_curcop, PL_multi_end); \
315           if (PL_multi_end != PL_multi_start) \
316           PL_parser->lex_shared->herelines = 0; \
317           } STMT_END
318            
319            
320           #ifdef DEBUGGING
321            
322           /* how to interpret the pl_yylval associated with the token */
323           enum token_type {
324           TOKENTYPE_NONE,
325           TOKENTYPE_IVAL,
326           TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
327           TOKENTYPE_PVAL,
328           TOKENTYPE_OPVAL
329           };
330            
331           static struct debug_tokens {
332           const int token;
333           enum token_type type;
334           const char *name;
335           } const debug_tokens[] =
336           {
337           { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
338           { ANDAND, TOKENTYPE_NONE, "ANDAND" },
339           { ANDOP, TOKENTYPE_NONE, "ANDOP" },
340           { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
341           { ARROW, TOKENTYPE_NONE, "ARROW" },
342           { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
343           { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
344           { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
345           { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
346           { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
347           { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
348           { DO, TOKENTYPE_NONE, "DO" },
349           { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
350           { DORDOR, TOKENTYPE_NONE, "DORDOR" },
351           { DOROP, TOKENTYPE_OPNUM, "DOROP" },
352           { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
353           { ELSE, TOKENTYPE_NONE, "ELSE" },
354           { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
355           { EQOP, TOKENTYPE_OPNUM, "EQOP" },
356           { FOR, TOKENTYPE_IVAL, "FOR" },
357           { FORMAT, TOKENTYPE_NONE, "FORMAT" },
358           { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
359           { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
360           { FUNC, TOKENTYPE_OPNUM, "FUNC" },
361           { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
362           { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
363           { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
364           { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
365           { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
366           { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
367           { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
368           { IF, TOKENTYPE_IVAL, "IF" },
369           { LABEL, TOKENTYPE_PVAL, "LABEL" },
370           { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
371           { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
372           { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
373           { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
374           { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
375           { METHOD, TOKENTYPE_OPVAL, "METHOD" },
376           { MULOP, TOKENTYPE_OPNUM, "MULOP" },
377           { MY, TOKENTYPE_IVAL, "MY" },
378           { NOAMP, TOKENTYPE_NONE, "NOAMP" },
379           { NOTOP, TOKENTYPE_NONE, "NOTOP" },
380           { OROP, TOKENTYPE_IVAL, "OROP" },
381           { OROR, TOKENTYPE_NONE, "OROR" },
382           { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
383           { PEG, TOKENTYPE_NONE, "PEG" },
384           { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
385           { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
386           { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
387           { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
388           { POSTINC, TOKENTYPE_NONE, "POSTINC" },
389           { POWOP, TOKENTYPE_OPNUM, "POWOP" },
390           { PREDEC, TOKENTYPE_NONE, "PREDEC" },
391           { PREINC, TOKENTYPE_NONE, "PREINC" },
392           { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
393           { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
394           { REFGEN, TOKENTYPE_NONE, "REFGEN" },
395           { RELOP, TOKENTYPE_OPNUM, "RELOP" },
396           { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
397           { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
398           { SUB, TOKENTYPE_NONE, "SUB" },
399           { THING, TOKENTYPE_OPVAL, "THING" },
400           { UMINUS, TOKENTYPE_NONE, "UMINUS" },
401           { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
402           { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
403           { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
404           { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
405           { USE, TOKENTYPE_IVAL, "USE" },
406           { WHEN, TOKENTYPE_IVAL, "WHEN" },
407           { WHILE, TOKENTYPE_IVAL, "WHILE" },
408           { WORD, TOKENTYPE_OPVAL, "WORD" },
409           { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
410           { 0, TOKENTYPE_NONE, NULL }
411           };
412            
413           /* dump the returned token in rv, plus any optional arg in pl_yylval */
414            
415           STATIC int
416           S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
417           {
418           dVAR;
419            
420           PERL_ARGS_ASSERT_TOKEREPORT;
421            
422           if (DEBUG_T_TEST) {
423           const char *name = NULL;
424           enum token_type type = TOKENTYPE_NONE;
425           const struct debug_tokens *p;
426           SV* const report = newSVpvs("<== ");
427            
428           for (p = debug_tokens; p->token; p++) {
429           if (p->token == (int)rv) {
430           name = p->name;
431           type = p->type;
432           break;
433           }
434           }
435           if (name)
436           Perl_sv_catpv(aTHX_ report, name);
437           else if ((char)rv > ' ' && (char)rv <= '~')
438           {
439           Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
440           if ((char)rv == 'p')
441           sv_catpvs(report, " (pending identifier)");
442           }
443           else if (!rv)
444           sv_catpvs(report, "EOF");
445           else
446           Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
447           switch (type) {
448           case TOKENTYPE_NONE:
449           break;
450           case TOKENTYPE_IVAL:
451           Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
452           break;
453           case TOKENTYPE_OPNUM:
454           Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
455           PL_op_name[lvalp->ival]);
456           break;
457           case TOKENTYPE_PVAL:
458           Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
459           break;
460           case TOKENTYPE_OPVAL:
461           if (lvalp->opval) {
462           Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
463           PL_op_name[lvalp->opval->op_type]);
464           if (lvalp->opval->op_type == OP_CONST) {
465           Perl_sv_catpvf(aTHX_ report, " %s",
466           SvPEEK(cSVOPx_sv(lvalp->opval)));
467           }
468            
469           }
470           else
471           sv_catpvs(report, "(opval=null)");
472           break;
473           }
474           PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
475           };
476           return (int)rv;
477           }
478            
479            
480           /* print the buffer with suitable escapes */
481            
482           STATIC void
483           S_printbuf(pTHX_ const char *const fmt, const char *const s)
484           {
485           SV* const tmp = newSVpvs("");
486            
487           PERL_ARGS_ASSERT_PRINTBUF;
488            
489           PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
490           SvREFCNT_dec(tmp);
491           }
492            
493           #endif
494            
495           static int
496 12         S_deprecate_commaless_var_list(pTHX) {
497 12         PL_expect = XTERM;
498 12         deprecate("comma-less variable list");
499 12         return REPORT(','); /* grandfather non-comma-format format */
500           }
501            
502           /*
503           * S_ao
504           *
505           * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
506           * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
507           */
508            
509           STATIC int
510 25594627         S_ao(pTHX_ int toketype)
511           {
512           dVAR;
513 25594627 100       if (*PL_bufptr == '=') {
514 2374478         PL_bufptr++;
515 2374478 100       if (toketype == ANDAND)
516 54         pl_yylval.ival = OP_ANDASSIGN;
517 2374424 100       else if (toketype == OROR)
518 533831         pl_yylval.ival = OP_ORASSIGN;
519 1840593 100       else if (toketype == DORDOR)
520 1010         pl_yylval.ival = OP_DORASSIGN;
521           toketype = ASSIGNOP;
522           }
523 25594627         return toketype;
524           }
525            
526           /*
527           * S_no_op
528           * When Perl expects an operator and finds something else, no_op
529           * prints the warning. It always prints " found where
530           * operator expected. It prints "Missing semicolon on previous line?"
531           * if the surprise occurs at the start of the line. "do you need to
532           * predeclare ..." is printed out for code like "sub bar; foo bar $x"
533           * where the compiler doesn't know if foo is a method call or a function.
534           * It prints "Missing operator before end of line" if there's nothing
535           * after the missing operator, or "... before <...>" if there is something
536           * after the missing operator.
537           */
538            
539           STATIC void
540 108         S_no_op(pTHX_ const char *const what, char *s)
541           {
542           dVAR;
543 108         char * const oldbp = PL_bufptr;
544 108         const bool is_first = (PL_oldbufptr == PL_linestart);
545            
546           PERL_ARGS_ASSERT_NO_OP;
547            
548 108 50       if (!s)
549           s = oldbp;
550           else
551 108         PL_bufptr = s;
552 108 50       yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
    100        
    50        
    100        
    100        
553 108 100       if (ckWARN_d(WARN_SYNTAX)) {
554 62 50       if (is_first)
555 0         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
556           "\t(Missing semicolon on previous line?)\n");
557 62 50       else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
    50        
    50        
    50        
    0        
    50        
    100        
    100        
    100        
    50        
    50        
    0        
    0        
    0        
    50        
558           const char *t;
559 297 50       for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
    50        
    50        
    0        
    50        
    100        
    100        
    100        
    100        
    100        
    50        
    50        
    50        
    50        
    100        
560 240 50       t += UTF ? UTF8SKIP(t) : 1)
    50        
    0        
    50        
    100        
561           NOOP;
562 38 100       if (t < PL_bufptr && isSPACE(*t))
    100        
563 70 50       Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
    50        
564           "\t(Do you need to predeclare %"UTF8f"?)\n",
565 60 50       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
    0        
    100        
566           }
567           else {
568           assert(s >= oldbp);
569 84 50       Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
    50        
570           "\t(Missing operator before %"UTF8f"?)\n",
571 72 50       UTF8fARG(UTF, s - oldbp, oldbp));
    0        
    50        
572           }
573           }
574 108         PL_bufptr = oldbp;
575 108         }
576            
577           /*
578           * S_missingterm
579           * Complain about missing quote/regexp/heredoc terminator.
580           * If it's called with NULL then it cauterizes the line buffer.
581           * If we're in a delimited string and the delimiter is a control
582           * character, it's reformatted into a two-char sequence like ^C.
583           * This is fatal.
584           */
585            
586           STATIC void
587 98         S_missingterm(pTHX_ char *s)
588           {
589           dVAR;
590           char tmpbuf[3];
591           char q;
592 98 100       if (s) {
593 86         char * const nl = strrchr(s,'\n');
594 86 50       if (nl)
595 86         *nl = '\0';
596           }
597 12 50       else if ((U8) PL_multi_close < 32) {
598 0         *tmpbuf = '^';
599 0 0       tmpbuf[1] = (char)toCTRL(PL_multi_close);
600 0         tmpbuf[2] = '\0';
601           s = tmpbuf;
602           }
603           else {
604 12         *tmpbuf = (char)PL_multi_close;
605 12         tmpbuf[1] = '\0';
606           s = tmpbuf;
607           }
608 98 50       q = strchr(s,'"') ? '\'' : '"';
609 98         Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
610           }
611            
612           #include "feature.h"
613            
614           /*
615           * Check whether the named feature is enabled.
616           */
617           bool
618 3312         Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
619           {
620           dVAR;
621 3312         char he_name[8 + MAX_FEATURE_LEN] = "feature_";
622            
623           PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
624            
625           assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
626            
627 3312 50       if (namelen > MAX_FEATURE_LEN)
628           return FALSE;
629 3312         memcpy(&he_name[8], name, namelen);
630            
631 3312         return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
632           REFCOUNTED_HE_EXISTS));
633           }
634            
635           /*
636           * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
637           * utf16-to-utf8-reversed.
638           */
639            
640           #ifdef PERL_CR_FILTER
641           static void
642           strip_return(SV *sv)
643           {
644           const char *s = SvPVX_const(sv);
645           const char * const e = s + SvCUR(sv);
646            
647           PERL_ARGS_ASSERT_STRIP_RETURN;
648            
649           /* outer loop optimized to do nothing if there are no CR-LFs */
650           while (s < e) {
651           if (*s++ == '\r' && *s == '\n') {
652           /* hit a CR-LF, need to copy the rest */
653           char *d = s - 1;
654           *d++ = *s++;
655           while (s < e) {
656           if (*s == '\r' && s[1] == '\n')
657           s++;
658           *d++ = *s++;
659           }
660           SvCUR(sv) -= s - d;
661           return;
662           }
663           }
664           }
665            
666           STATIC I32
667           S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
668           {
669           const I32 count = FILTER_READ(idx+1, sv, maxlen);
670           if (count > 0 && !maxlen)
671           strip_return(sv);
672           return count;
673           }
674           #endif
675            
676           /*
677           =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
678            
679           Creates and initialises a new lexer/parser state object, supplying
680           a context in which to lex and parse from a new source of Perl code.
681           A pointer to the new state object is placed in L. An entry
682           is made on the save stack so that upon unwinding the new state object
683           will be destroyed and the former value of L will be restored.
684           Nothing else need be done to clean up the parsing context.
685            
686           The code to be parsed comes from I and I. I, if
687           non-null, provides a string (in SV form) containing code to be parsed.
688           A copy of the string is made, so subsequent modification of I
689           does not affect parsing. I, if non-null, provides an input stream
690           from which code will be read to be parsed. If both are non-null, the
691           code in I comes first and must consist of complete lines of input,
692           and I supplies the remainder of the source.
693            
694           The I parameter is reserved for future use. Currently it is only
695           used by perl internally, so extensions should always pass zero.
696            
697           =cut
698           */
699            
700           /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
701           can share filters with the current parser.
702           LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
703           caller, hence isn't owned by the parser, so shouldn't be closed on parser
704           destruction. This is used to handle the case of defaulting to reading the
705           script from the standard input because no filename was given on the command
706           line (without getting confused by situation where STDIN has been closed, so
707           the script handle is opened on fd 0) */
708            
709           void
710 4388977         Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
711           {
712           dVAR;
713           const char *s = NULL;
714           yy_parser *parser, *oparser;
715 4388977 100       if (flags && flags & ~LEX_START_FLAGS)
    50        
716 0         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
717            
718           /* create and initialise a parser */
719            
720 4388977         Newxz(parser, 1, yy_parser);
721 4388977         parser->old_parser = oparser = PL_parser;
722 4388977         PL_parser = parser;
723            
724 4388977         parser->stack = NULL;
725 4388977         parser->ps = NULL;
726 4388977         parser->stack_size = 0;
727            
728           /* on scope exit, free this parser and restore any outer one */
729 4388977         SAVEPARSER(parser);
730 4388977         parser->saved_curcop = PL_curcop;
731            
732           /* initialise lexer state */
733            
734           #ifdef PERL_MAD
735           parser->curforce = -1;
736           #else
737 4388977         parser->nexttoke = 0;
738           #endif
739 4388977 100       parser->error_count = oparser ? oparser->error_count : 0;
740 4388977         parser->copline = NOLINE;
741 4388977         parser->lex_state = LEX_NORMAL;
742 4388977         parser->expect = XSTATE;
743 4388977         parser->rsfp = rsfp;
744 4388977         parser->rsfp_filters =
745 4388977         !(flags & LEX_START_SAME_FILTER) || !oparser
746           ? NULL
747 4388977 100       : MUTABLE_AV(SvREFCNT_inc(
    100        
748           oparser->rsfp_filters
749           ? oparser->rsfp_filters
750           : (oparser->rsfp_filters = newAV())
751           ));
752            
753 4388977         Newx(parser->lex_brackstack, 120, char);
754 4388977         Newx(parser->lex_casestack, 12, char);
755 4388977         *parser->lex_casestack = '\0';
756 4388977         Newxz(parser->lex_shared, 1, LEXSHARED);
757            
758 4388977 100       if (line) {
759           STRLEN len;
760 3759876 100       s = SvPV_const(line, len);
761 7519752         parser->linestr = flags & LEX_START_COPIED
762 318         ? SvREFCNT_inc_simple_NN(line)
763 5628496 100       : newSVpvn_flags(s, len, SvUTF8(line));
764 3759876 100       sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
765           } else {
766 629101 100       parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
767           }
768 4388977         parser->oldoldbufptr =
769 4388977         parser->oldbufptr =
770 4388977         parser->bufptr =
771 4388977         parser->linestart = SvPVX(parser->linestr);
772 4388977         parser->bufend = parser->bufptr + SvCUR(parser->linestr);
773 4388977         parser->last_lop = parser->last_uni = NULL;
774 4388977         parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
775           |LEX_DONT_CLOSE_RSFP);
776            
777 4388977         parser->in_pod = parser->filtered = 0;
778 4388977         }
779            
780            
781           /* delete a parser object */
782            
783           void
784 4388977         Perl_parser_free(pTHX_ const yy_parser *parser)
785           {
786           PERL_ARGS_ASSERT_PARSER_FREE;
787            
788 4388977         PL_curcop = parser->saved_curcop;
789 4388977         SvREFCNT_dec(parser->linestr);
790            
791 4388977 100       if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
792 44         PerlIO_clearerr(parser->rsfp);
793 4389224 100       else if (parser->rsfp && (!parser->old_parser ||
    100        
    50        
794 582         (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
795 1454         PerlIO_close(parser->rsfp);
796 4388977         SvREFCNT_dec(parser->rsfp_filters);
797 4388977         SvREFCNT_dec(parser->lex_stuff);
798 4388977         SvREFCNT_dec(parser->sublex_info.repl);
799            
800 4388977         Safefree(parser->lex_brackstack);
801 4388977         Safefree(parser->lex_casestack);
802 4388977         Safefree(parser->lex_shared);
803 4388977         PL_parser = parser->old_parser;
804 4388977         Safefree(parser);
805 4388977         }
806            
807           void
808 237692         Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
809           {
810           #ifdef PERL_MAD
811           I32 nexttoke = parser->lasttoke;
812           #else
813 237692         I32 nexttoke = parser->nexttoke;
814           #endif
815           PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
816 352762 100       while (nexttoke--) {
817           #ifdef PERL_MAD
818           if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
819           & 0xffff)
820           && parser->nexttoke[nexttoke].next_val.opval
821           && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
822           && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
823           op_free(parser->nexttoke[nexttoke].next_val.opval);
824           parser->nexttoke[nexttoke].next_val.opval = NULL;
825           }
826           #else
827 6 50       if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
828 4 50       && parser->nextval[nexttoke].opval
829 4 50       && parser->nextval[nexttoke].opval->op_slabbed
830 4 50       && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
831 4         op_free(parser->nextval[nexttoke].opval);
832 4         parser->nextval[nexttoke].opval = NULL;
833           }
834           #endif
835           }
836 237692         }
837            
838            
839           /*
840           =for apidoc AmxU|SV *|PL_parser-Elinestr
841            
842           Buffer scalar containing the chunk currently under consideration of the
843           text currently being lexed. This is always a plain string scalar (for
844           which C is true). It is not intended to be used as a scalar by
845           normal scalar means; instead refer to the buffer directly by the pointer
846           variables described below.
847            
848           The lexer maintains various C pointers to things in the
849           Clinestr> buffer. If Clinestr> is ever
850           reallocated, all of these pointers must be updated. Don't attempt to
851           do this manually, but rather use L if you need to
852           reallocate the buffer.
853            
854           The content of the text chunk in the buffer is commonly exactly one
855           complete line of input, up to and including a newline terminator,
856           but there are situations where it is otherwise. The octets of the
857           buffer may be intended to be interpreted as either UTF-8 or Latin-1.
858           The function L tells you which. Do not use the C
859           flag on this scalar, which may disagree with it.
860            
861           For direct examination of the buffer, the variable
862           Lbufend> points to the end of the buffer. The current
863           lexing position is pointed to by Lbufptr>. Direct use
864           of these pointers is usually preferable to examination of the scalar
865           through normal scalar means.
866            
867           =for apidoc AmxU|char *|PL_parser-Ebufend
868            
869           Direct pointer to the end of the chunk of text currently being lexed, the
870           end of the lexer buffer. This is equal to Clinestr)
871           + SvCUR(PL_parser-Elinestr)>. A NUL character (zero octet) is
872           always located at the end of the buffer, and does not count as part of
873           the buffer's contents.
874            
875           =for apidoc AmxU|char *|PL_parser-Ebufptr
876            
877           Points to the current position of lexing inside the lexer buffer.
878           Characters around this point may be freely examined, within
879           the range delimited by Clinestr>)> and
880           Lbufend>. The octets of the buffer may be intended to be
881           interpreted as either UTF-8 or Latin-1, as indicated by L.
882            
883           Lexing code (whether in the Perl core or not) moves this pointer past
884           the characters that it consumes. It is also expected to perform some
885           bookkeeping whenever a newline character is consumed. This movement
886           can be more conveniently performed by the function L,
887           which handles newlines appropriately.
888            
889           Interpretation of the buffer's octets can be abstracted out by
890           using the slightly higher-level functions L and
891           L.
892            
893           =for apidoc AmxU|char *|PL_parser-Elinestart
894            
895           Points to the start of the current line inside the lexer buffer.
896           This is useful for indicating at which column an error occurred, and
897           not much else. This must be updated by any lexing code that consumes
898           a newline; the function L handles this detail.
899            
900           =cut
901           */
902            
903           /*
904           =for apidoc Amx|bool|lex_bufutf8
905            
906           Indicates whether the octets in the lexer buffer
907           (Llinestr>) should be interpreted as the UTF-8 encoding
908           of Unicode characters. If not, they should be interpreted as Latin-1
909           characters. This is analogous to the C flag for scalars.
910            
911           In UTF-8 mode, it is not guaranteed that the lexer buffer actually
912           contains valid UTF-8. Lexing code must be robust in the face of invalid
913           encoding.
914            
915           The actual C flag of the Llinestr> scalar
916           is significant, but not the whole story regarding the input character
917           encoding. Normally, when a file is being read, the scalar contains octets
918           and its C flag is off, but the octets should be interpreted as
919           UTF-8 if the C pragma is in effect. During a string eval,
920           however, the scalar may have the C flag on, and in this case its
921           octets should be interpreted as UTF-8 unless the C pragma
922           is in effect. This logic may change in the future; use this function
923           instead of implementing the logic yourself.
924            
925           =cut
926           */
927            
928           bool
929 0         Perl_lex_bufutf8(pTHX)
930           {
931 0 0       return UTF;
    0        
    0        
    0        
    0        
932           }
933            
934           /*
935           =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
936            
937           Reallocates the lexer buffer (Llinestr>) to accommodate
938           at least I octets (including terminating NUL). Returns a
939           pointer to the reallocated buffer. This is necessary before making
940           any direct modification of the buffer that would increase its length.
941           L provides a more convenient way to insert text into
942           the buffer.
943            
944           Do not use C or C directly on Clinestr>;
945           this function updates all of the lexer's variables that point directly
946           into the buffer.
947            
948           =cut
949           */
950            
951           char *
952 76         Perl_lex_grow_linestr(pTHX_ STRLEN len)
953           {
954           SV *linestr;
955           char *buf;
956           STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
957           STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
958 76         linestr = PL_parser->linestr;
959 76         buf = SvPVX(linestr);
960 76 100       if (len <= SvLEN(linestr))
961           return buf;
962 72         bufend_pos = PL_parser->bufend - buf;
963 72         bufptr_pos = PL_parser->bufptr - buf;
964 72         oldbufptr_pos = PL_parser->oldbufptr - buf;
965 72         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
966 72         linestart_pos = PL_parser->linestart - buf;
967 72 50       last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
968 72 50       last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
969 72 50       re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
970 0         PL_parser->lex_shared->re_eval_start - buf : 0;
971            
972 72         buf = sv_grow(linestr, len);
973            
974 72         PL_parser->bufend = buf + bufend_pos;
975 72         PL_parser->bufptr = buf + bufptr_pos;
976 72         PL_parser->oldbufptr = buf + oldbufptr_pos;
977 72         PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
978 72         PL_parser->linestart = buf + linestart_pos;
979 72 50       if (PL_parser->last_uni)
980 0         PL_parser->last_uni = buf + last_uni_pos;
981 72 50       if (PL_parser->last_lop)
982 0         PL_parser->last_lop = buf + last_lop_pos;
983 72 50       if (PL_parser->lex_shared->re_eval_start)
984 38         PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
985           return buf;
986           }
987            
988           /*
989           =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
990            
991           Insert characters into the lexer buffer (Llinestr>),
992           immediately after the current lexing point (Lbufptr>),
993           reallocating the buffer if necessary. This means that lexing code that
994           runs later will see the characters as if they had appeared in the input.
995           It is not recommended to do this as part of normal parsing, and most
996           uses of this facility run the risk of the inserted characters being
997           interpreted in an unintended manner.
998            
999           The string to be inserted is represented by I octets starting
1000           at I. These octets are interpreted as either UTF-8 or Latin-1,
1001           according to whether the C flag is set in I.
1002           The characters are recoded for the lexer buffer, according to how the
1003           buffer is currently being interpreted (L). If a string
1004           to be inserted is available as a Perl scalar, the L
1005           function is more convenient.
1006            
1007           =cut
1008           */
1009            
1010           void
1011 4         Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1012           {
1013           dVAR;
1014           char *bufptr;
1015           PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1016 4 50       if (flags & ~(LEX_STUFF_UTF8))
1017 0         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1018 4 50       if (UTF) {
    50        
    0        
    50        
    50        
1019 0 0       if (flags & LEX_STUFF_UTF8) {
1020           goto plain_copy;
1021           } else {
1022           STRLEN highhalf = 0; /* Count of variants */
1023 0         const char *p, *e = pv+len;
1024 0 0       for (p = pv; p != e; p++) {
1025 0 0       if (! UTF8_IS_INVARIANT(*p)) {
1026 0         highhalf++;
1027           }
1028           }
1029 0 0       if (!highhalf)
1030           goto plain_copy;
1031 0         lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1032 0         bufptr = PL_parser->bufptr;
1033 0         Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1034 0         SvCUR_set(PL_parser->linestr,
1035           SvCUR(PL_parser->linestr) + len+highhalf);
1036 0         PL_parser->bufend += len+highhalf;
1037 0 0       for (p = pv; p != e; p++) {
1038 0         U8 c = (U8)*p;
1039 0 0       if (! UTF8_IS_INVARIANT(c)) {
1040 0         *bufptr++ = UTF8_TWO_BYTE_HI(c);
1041 0         *bufptr++ = UTF8_TWO_BYTE_LO(c);
1042           } else {
1043 0         *bufptr++ = (char)c;
1044           }
1045           }
1046           }
1047           } else {
1048 4 50       if (flags & LEX_STUFF_UTF8) {
1049           STRLEN highhalf = 0;
1050 0         const char *p, *e = pv+len;
1051 0 0       for (p = pv; p != e; p++) {
1052 0         U8 c = (U8)*p;
1053 0 0       if (UTF8_IS_ABOVE_LATIN1(c)) {
1054 0         Perl_croak(aTHX_ "Lexing code attempted to stuff "
1055           "non-Latin-1 character into Latin-1 input");
1056 0 0       } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
    0        
    0        
1057 0         p++;
1058 0         highhalf++;
1059 0 0       } else if (! UTF8_IS_INVARIANT(c)) {
1060           /* malformed UTF-8 */
1061 0         ENTER;
1062 0         SAVESPTR(PL_warnhook);
1063 0         PL_warnhook = PERL_WARNHOOK_FATAL;
1064 0         utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1065 0         LEAVE;
1066           }
1067           }
1068 0 0       if (!highhalf)
1069           goto plain_copy;
1070 0         lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1071 0         bufptr = PL_parser->bufptr;
1072 0         Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1073 0         SvCUR_set(PL_parser->linestr,
1074           SvCUR(PL_parser->linestr) + len-highhalf);
1075 0         PL_parser->bufend += len-highhalf;
1076           p = pv;
1077 0 0       while (p < e) {
1078 0 0       if (UTF8_IS_INVARIANT(*p)) {
1079 0         *bufptr++ = *p;
1080 0         p++;
1081           }
1082           else {
1083           assert(p < e -1 );
1084 0         *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1085 0         p += 2;
1086           }
1087           }
1088           } else {
1089           plain_copy:
1090 4         lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1091 4         bufptr = PL_parser->bufptr;
1092 4         Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1093 4         SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1094 4         PL_parser->bufend += len;
1095 4         Copy(pv, bufptr, len, char);
1096           }
1097           }
1098 4         }
1099            
1100           /*
1101           =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1102            
1103           Insert characters into the lexer buffer (Llinestr>),
1104           immediately after the current lexing point (Lbufptr>),
1105           reallocating the buffer if necessary. This means that lexing code that
1106           runs later will see the characters as if they had appeared in the input.
1107           It is not recommended to do this as part of normal parsing, and most
1108           uses of this facility run the risk of the inserted characters being
1109           interpreted in an unintended manner.
1110            
1111           The string to be inserted is represented by octets starting at I
1112           and continuing to the first nul. These octets are interpreted as either
1113           UTF-8 or Latin-1, according to whether the C flag is set
1114           in I. The characters are recoded for the lexer buffer, according
1115           to how the buffer is currently being interpreted (L).
1116           If it is not convenient to nul-terminate a string to be inserted, the
1117           L function is more appropriate.
1118            
1119           =cut
1120           */
1121            
1122           void
1123 0         Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1124           {
1125           PERL_ARGS_ASSERT_LEX_STUFF_PV;
1126 0         lex_stuff_pvn(pv, strlen(pv), flags);
1127 0         }
1128            
1129           /*
1130           =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1131            
1132           Insert characters into the lexer buffer (Llinestr>),
1133           immediately after the current lexing point (Lbufptr>),
1134           reallocating the buffer if necessary. This means that lexing code that
1135           runs later will see the characters as if they had appeared in the input.
1136           It is not recommended to do this as part of normal parsing, and most
1137           uses of this facility run the risk of the inserted characters being
1138           interpreted in an unintended manner.
1139            
1140           The string to be inserted is the string value of I. The characters
1141           are recoded for the lexer buffer, according to how the buffer is currently
1142           being interpreted (L). If a string to be inserted is
1143           not already a Perl scalar, the L function avoids the
1144           need to construct a scalar.
1145            
1146           =cut
1147           */
1148            
1149           void
1150 0         Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1151           {
1152           char *pv;
1153           STRLEN len;
1154           PERL_ARGS_ASSERT_LEX_STUFF_SV;
1155 0 0       if (flags)
1156 0         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1157 0 0       pv = SvPV(sv, len);
1158 0 0       lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1159 0         }
1160            
1161           /*
1162           =for apidoc Amx|void|lex_unstuff|char *ptr
1163            
1164           Discards text about to be lexed, from Lbufptr> up to
1165           I. Text following I will be moved, and the buffer shortened.
1166           This hides the discarded text from any lexing code that runs later,
1167           as if the text had never appeared.
1168            
1169           This is not the normal way to consume lexed text. For that, use
1170           L.
1171            
1172           =cut
1173           */
1174            
1175           void
1176 0         Perl_lex_unstuff(pTHX_ char *ptr)
1177           {
1178           char *buf, *bufend;
1179           STRLEN unstuff_len;
1180           PERL_ARGS_ASSERT_LEX_UNSTUFF;
1181 0         buf = PL_parser->bufptr;
1182 0 0       if (ptr < buf)
1183 0         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1184 0 0       if (ptr == buf)
1185 0         return;
1186 0         bufend = PL_parser->bufend;
1187 0 0       if (ptr > bufend)
1188 0         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1189 0         unstuff_len = ptr - buf;
1190 0         Move(ptr, buf, bufend+1-ptr, char);
1191 0         SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1192 0         PL_parser->bufend = bufend - unstuff_len;
1193           }
1194            
1195           /*
1196           =for apidoc Amx|void|lex_read_to|char *ptr
1197            
1198           Consume text in the lexer buffer, from Lbufptr> up
1199           to I. This advances Lbufptr> to match I,
1200           performing the correct bookkeeping whenever a newline character is passed.
1201           This is the normal way to consume lexed text.
1202            
1203           Interpretation of the buffer's octets can be abstracted out by
1204           using the slightly higher-level functions L and
1205           L.
1206            
1207           =cut
1208           */
1209            
1210           void
1211 40         Perl_lex_read_to(pTHX_ char *ptr)
1212           {
1213           char *s;
1214           PERL_ARGS_ASSERT_LEX_READ_TO;
1215 40         s = PL_parser->bufptr;
1216 40 50       if (ptr < s || ptr > PL_parser->bufend)
    50        
1217 0         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1218 184 100       for (; s != ptr; s++)
1219 164 50       if (*s == '\n') {
1220 0 0       COPLINE_INC_WITH_HERELINES;
1221 0         PL_parser->linestart = s+1;
1222           }
1223 40         PL_parser->bufptr = ptr;
1224 40         }
1225            
1226           /*
1227           =for apidoc Amx|void|lex_discard_to|char *ptr
1228            
1229           Discards the first part of the Llinestr> buffer,
1230           up to I. The remaining content of the buffer will be moved, and
1231           all pointers into the buffer updated appropriately. I must not
1232           be later in the buffer than the position of Lbufptr>:
1233           it is not permitted to discard text that has yet to be lexed.
1234            
1235           Normally it is not necessarily to do this directly, because it suffices to
1236           use the implicit discarding behaviour of L and things
1237           based on it. However, if a token stretches across multiple lines,
1238           and the lexing code has kept multiple lines of text in the buffer for
1239           that purpose, then after completion of the token it would be wise to
1240           explicitly discard the now-unneeded earlier lines, to avoid future
1241           multi-line tokens growing the buffer without bound.
1242            
1243           =cut
1244           */
1245            
1246           void
1247 0         Perl_lex_discard_to(pTHX_ char *ptr)
1248           {
1249           char *buf;
1250           STRLEN discard_len;
1251           PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1252 0         buf = SvPVX(PL_parser->linestr);
1253 0 0       if (ptr < buf)
1254 0         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1255 0 0       if (ptr == buf)
1256 0         return;
1257 0 0       if (ptr > PL_parser->bufptr)
1258 0         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1259 0         discard_len = ptr - buf;
1260 0 0       if (PL_parser->oldbufptr < ptr)
1261 0         PL_parser->oldbufptr = ptr;
1262 0 0       if (PL_parser->oldoldbufptr < ptr)
1263 0         PL_parser->oldoldbufptr = ptr;
1264 0 0       if (PL_parser->last_uni && PL_parser->last_uni < ptr)
    0        
1265 0         PL_parser->last_uni = NULL;
1266 0 0       if (PL_parser->last_lop && PL_parser->last_lop < ptr)
    0        
1267 0         PL_parser->last_lop = NULL;
1268 0         Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1269 0         SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1270 0         PL_parser->bufend -= discard_len;
1271 0         PL_parser->bufptr -= discard_len;
1272 0         PL_parser->oldbufptr -= discard_len;
1273 0         PL_parser->oldoldbufptr -= discard_len;
1274 0 0       if (PL_parser->last_uni)
1275 0         PL_parser->last_uni -= discard_len;
1276 0 0       if (PL_parser->last_lop)
1277 0         PL_parser->last_lop -= discard_len;
1278           }
1279            
1280           /*
1281           =for apidoc Amx|bool|lex_next_chunk|U32 flags
1282            
1283           Reads in the next chunk of text to be lexed, appending it to
1284           Llinestr>. This should be called when lexing code has
1285           looked to the end of the current chunk and wants to know more. It is
1286           usual, but not necessary, for lexing to have consumed the entirety of
1287           the current chunk at this time.
1288            
1289           If Lbufptr> is pointing to the very end of the current
1290           chunk (i.e., the current chunk has been entirely consumed), normally the
1291           current chunk will be discarded at the same time that the new chunk is
1292           read in. If I includes C, the current chunk
1293           will not be discarded. If the current chunk has not been entirely
1294           consumed, then it will not be discarded regardless of the flag.
1295            
1296           Returns true if some new text was added to the buffer, or false if the
1297           buffer has reached the end of the input text.
1298            
1299           =cut
1300           */
1301            
1302           #define LEX_FAKE_EOF 0x80000000
1303           #define LEX_NO_TERM 0x40000000
1304            
1305           bool
1306 215833189         Perl_lex_next_chunk(pTHX_ U32 flags)
1307           {
1308           SV *linestr;
1309           char *buf;
1310           STRLEN old_bufend_pos, new_bufend_pos;
1311           STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1312           STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1313           bool got_some_for_debugger = 0;
1314           bool got_some;
1315 215833189 50       if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1316 0         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1317 215833189         linestr = PL_parser->linestr;
1318 215833189         buf = SvPVX(linestr);
1319 317086085 100       if (!(flags & LEX_KEEP_PREVIOUS) &&
    50        
1320 209530253         PL_parser->bufptr == PL_parser->bufend) {
1321           old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1322           linestart_pos = 0;
1323 209530253 50       if (PL_parser->last_uni != PL_parser->bufend)
1324 209530253         PL_parser->last_uni = NULL;
1325 209530253 50       if (PL_parser->last_lop != PL_parser->bufend)
1326 209530253         PL_parser->last_lop = NULL;
1327           last_uni_pos = last_lop_pos = 0;
1328 209530253         *buf = 0;
1329 209530253         SvCUR(linestr) = 0;
1330           } else {
1331 6302936         old_bufend_pos = PL_parser->bufend - buf;
1332 6302936         bufptr_pos = PL_parser->bufptr - buf;
1333 6302936         oldbufptr_pos = PL_parser->oldbufptr - buf;
1334 6302936         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1335 6302936         linestart_pos = PL_parser->linestart - buf;
1336 6302936 100       last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1337 6302936 100       last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1338           }
1339 215833189 100       if (flags & LEX_FAKE_EOF) {
1340           goto eof;
1341 215443259 100       } else if (!PL_parser->rsfp && !PL_parser->filtered) {
    100        
1342           got_some = 0;
1343 215443149 100       } else if (filter_gets(linestr, old_bufend_pos)) {
1344           got_some = 1;
1345           got_some_for_debugger = 1;
1346 224157 100       } else if (flags & LEX_NO_TERM) {
1347           got_some = 0;
1348           } else {
1349 223977 100       if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1350 14         sv_setpvs(linestr, "");
1351           eof:
1352           /* End of real input. Close filehandle (unless it was STDIN),
1353           * then add implicit termination.
1354           */
1355 613907 100       if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1356 40         PerlIO_clearerr(PL_parser->rsfp);
1357 613867 100       else if (PL_parser->rsfp)
1358 611619         (void)PerlIO_close(PL_parser->rsfp);
1359 613907         PL_parser->rsfp = NULL;
1360 613907         PL_parser->in_pod = PL_parser->filtered = 0;
1361           #ifdef PERL_MAD
1362           if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1363           PL_faketokens = 1;
1364           #endif
1365 613907 100       if (!PL_in_eval && PL_minus_p) {
    100        
1366 26         sv_catpvs(linestr,
1367           /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1368 26         PL_minus_n = PL_minus_p = 0;
1369 613881 100       } else if (!PL_in_eval && PL_minus_n) {
    100        
1370 90         sv_catpvs(linestr, /*{*/";}");
1371 90         PL_minus_n = 0;
1372           } else
1373 613791         sv_catpvs(linestr, ";");
1374           got_some = 1;
1375           }
1376 215833179         buf = SvPVX(linestr);
1377 215833179         new_bufend_pos = SvCUR(linestr);
1378 215833179         PL_parser->bufend = buf + new_bufend_pos;
1379 215833179         PL_parser->bufptr = buf + bufptr_pos;
1380 215833179         PL_parser->oldbufptr = buf + oldbufptr_pos;
1381 215833179         PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1382 215833179         PL_parser->linestart = buf + linestart_pos;
1383 215833179 100       if (PL_parser->last_uni)
1384 825798         PL_parser->last_uni = buf + last_uni_pos;
1385 215833179 100       if (PL_parser->last_lop)
1386 571803         PL_parser->last_lop = buf + last_lop_pos;
1387 217243210 100       if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
    100        
    100        
    100        
1388 2820062         PL_curstash != PL_debstash) {
1389           /* debugger active and we're not compiling the debugger code,
1390           * so store the line into the debugger's array of lines
1391           */
1392 1041768         update_debugger_info(NULL, buf+old_bufend_pos,
1393           new_bufend_pos-old_bufend_pos);
1394           }
1395 215833179         return got_some;
1396           }
1397            
1398           /*
1399           =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1400            
1401           Looks ahead one (Unicode) character in the text currently being lexed.
1402           Returns the codepoint (unsigned integer value) of the next character,
1403           or -1 if lexing has reached the end of the input text. To consume the
1404           peeked character, use L.
1405            
1406           If the next character is in (or extends into) the next chunk of input
1407           text, the next chunk will be read in. Normally the current chunk will be
1408           discarded at the same time, but if I includes C
1409           then the current chunk will not be discarded.
1410            
1411           If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1412           is encountered, an exception is generated.
1413            
1414           =cut
1415           */
1416            
1417           I32
1418 536         Perl_lex_peek_unichar(pTHX_ U32 flags)
1419           {
1420           dVAR;
1421           char *s, *bufend;
1422 536 50       if (flags & ~(LEX_KEEP_PREVIOUS))
1423 0         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1424 536         s = PL_parser->bufptr;
1425 536         bufend = PL_parser->bufend;
1426 536 50       if (UTF) {
    50        
    0        
    50        
    50        
1427           U8 head;
1428           I32 unichar;
1429           STRLEN len, retlen;
1430 0 0       if (s == bufend) {
1431 0 0       if (!lex_next_chunk(flags))
1432           return -1;
1433 0         s = PL_parser->bufptr;
1434 0         bufend = PL_parser->bufend;
1435           }
1436 0         head = (U8)*s;
1437 0 0       if (UTF8_IS_INVARIANT(head))
1438 0         return head;
1439 0 0       if (UTF8_IS_START(head)) {
1440 0         len = UTF8SKIP(&head);
1441 0 0       while ((STRLEN)(bufend-s) < len) {
1442 0 0       if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1443           break;
1444 0         s = PL_parser->bufptr;
1445 0         bufend = PL_parser->bufend;
1446           }
1447           }
1448 0         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1449 0 0       if (retlen == (STRLEN)-1) {
1450           /* malformed UTF-8 */
1451 0         ENTER;
1452 0         SAVESPTR(PL_warnhook);
1453 0         PL_warnhook = PERL_WARNHOOK_FATAL;
1454 0         utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1455 0         LEAVE;
1456           }
1457           return unichar;
1458           } else {
1459 536 100       if (s == bufend) {
1460 6 50       if (!lex_next_chunk(flags))
1461           return -1;
1462 0         s = PL_parser->bufptr;
1463           }
1464 533         return (U8)*s;
1465           }
1466           }
1467            
1468           /*
1469           =for apidoc Amx|I32|lex_read_unichar|U32 flags
1470            
1471           Reads the next (Unicode) character in the text currently being lexed.
1472           Returns the codepoint (unsigned integer value) of the character read,
1473           and moves Lbufptr> past the character, or returns -1
1474           if lexing has reached the end of the input text. To non-destructively
1475           examine the next character, use L instead.
1476            
1477           If the next character is in (or extends into) the next chunk of input
1478           text, the next chunk will be read in. Normally the current chunk will be
1479           discarded at the same time, but if I includes C
1480           then the current chunk will not be discarded.
1481            
1482           If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1483           is encountered, an exception is generated.
1484            
1485           =cut
1486           */
1487            
1488           I32
1489 202         Perl_lex_read_unichar(pTHX_ U32 flags)
1490           {
1491           I32 c;
1492 202 50       if (flags & ~(LEX_KEEP_PREVIOUS))
1493 0         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1494 202         c = lex_peek_unichar(flags);
1495 202 50       if (c != -1) {
1496 202 50       if (c == '\n')
1497 0 0       COPLINE_INC_WITH_HERELINES;
1498 202 50       if (UTF)
    50        
    0        
    50        
    50        
1499 0         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1500           else
1501 202         ++(PL_parser->bufptr);
1502           }
1503 202         return c;
1504           }
1505            
1506           /*
1507           =for apidoc Amx|void|lex_read_space|U32 flags
1508            
1509           Reads optional spaces, in Perl style, in the text currently being
1510           lexed. The spaces may include ordinary whitespace characters and
1511           Perl-style comments. C<#line> directives are processed if encountered.
1512           Lbufptr> is moved past the spaces, so that it points
1513           at a non-space character (or the end of the input text).
1514            
1515           If spaces extend into the next chunk of input text, the next chunk will
1516           be read in. Normally the current chunk will be discarded at the same
1517           time, but if I includes C then the current
1518           chunk will not be discarded.
1519            
1520           =cut
1521           */
1522            
1523           #define LEX_NO_INCLINE 0x40000000
1524           #define LEX_NO_NEXT_CHUNK 0x80000000
1525            
1526           void
1527 470855641         Perl_lex_read_space(pTHX_ U32 flags)
1528           {
1529           char *s, *bufend;
1530 470855641         const bool can_incline = !(flags & LEX_NO_INCLINE);
1531           bool need_incline = 0;
1532 470855641 50       if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1533 0         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1534           #ifdef PERL_MAD
1535           if (PL_skipwhite) {
1536           sv_free(PL_skipwhite);
1537           PL_skipwhite = NULL;
1538           }
1539           if (PL_madskills)
1540           PL_skipwhite = newSVpvs("");
1541           #endif /* PERL_MAD */
1542 470855641         s = PL_parser->bufptr;
1543 616830542         bufend = PL_parser->bufend;
1544           while (1) {
1545 753100338         char c = *s;
1546 753100338 100       if (c == '#') {
1547           do {
1548 15208990         c = *++s;
1549 15208990 100       } while (!(c == '\n' || (c == 0 && s == bufend)));
    100        
1550 752663352 100       } else if (c == '\n') {
1551 6828186         s++;
1552 6828186 100       if (can_incline) {
1553 6015990         PL_parser->linestart = s;
1554 6015990 100       if (s == bufend)
1555           need_incline = 1;
1556           else
1557 523464         incline(s);
1558           }
1559 745835166 100       } else if (isSPACE(c)) {
1560 268676675         s++;
1561 477158491 100       } else if (c == 0 && s == bufend) {
1562           bool got_more;
1563           #ifdef PERL_MAD
1564           if (PL_madskills)
1565           sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1566           #endif /* PERL_MAD */
1567 6302958 100       if (flags & LEX_NO_NEXT_CHUNK)
1568           break;
1569 6302956         PL_parser->bufptr = s;
1570 6302956 100       if (can_incline) COPLINE_INC_WITH_HERELINES;
    100        
1571 6302956         got_more = lex_next_chunk(flags);
1572 6302956 100       if (can_incline) CopLINE_dec(PL_curcop);
1573 6302956         s = PL_parser->bufptr;
1574 6302956         bufend = PL_parser->bufend;
1575 6302956 100       if (!got_more)
1576           break;
1577 6302850 100       if (can_incline && need_incline && PL_parser->rsfp) {
    100        
    100        
1578 5483412         incline(s);
1579           need_incline = 0;
1580           }
1581           } else {
1582           break;
1583           }
1584           }
1585           #ifdef PERL_MAD
1586           if (PL_madskills)
1587           sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1588           #endif /* PERL_MAD */
1589 470855641         PL_parser->bufptr = s;
1590 470855641         }
1591            
1592           /*
1593            
1594           =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1595            
1596           This function performs syntax checking on a prototype, C.
1597           If C is true, any illegal characters or mismatched brackets
1598           will trigger illegalproto warnings, declaring that they were
1599           detected in the prototype for C.
1600            
1601           The return value is C if this is a valid prototype, and
1602           C if it is not, regardless of whether C was C or
1603           C.
1604            
1605           Note that C is a valid C and will always return C.
1606            
1607           =cut
1608            
1609           */
1610            
1611           bool
1612 519944         Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1613           {
1614           STRLEN len, origlen;
1615 519944 50       char *p = proto ? SvPV(proto, len) : NULL;
    50        
1616           bool bad_proto = FALSE;
1617           bool in_brackets = FALSE;
1618           bool after_slash = FALSE;
1619           char greedy_proto = ' ';
1620           bool proto_after_greedy_proto = FALSE;
1621           bool must_be_last = FALSE;
1622           bool underscore = FALSE;
1623           bool bad_proto_after_underscore = FALSE;
1624            
1625           PERL_ARGS_ASSERT_VALIDATE_PROTO;
1626            
1627 519944 50       if (!proto)
1628           return TRUE;
1629            
1630 519944         origlen = len;
1631 1014054 100       for (; len--; p++) {
1632 494110 100       if (!isSPACE(*p)) {
1633 494058 100       if (must_be_last)
1634           proto_after_greedy_proto = TRUE;
1635 494058 100       if (underscore) {
1636 78 100       if (!strchr(";@%", *p))
1637           bad_proto_after_underscore = TRUE;
1638           underscore = FALSE;
1639           }
1640 494058 100       if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
    100        
1641           bad_proto = TRUE;
1642           }
1643           else {
1644 493976 100       if (*p == '[')
1645           in_brackets = TRUE;
1646 491036 100       else if (*p == ']')
1647           in_brackets = FALSE;
1648 488098 100       else if ((*p == '@' || *p == '%') &&
    100        
1649 34816 100       !after_slash &&
1650           !in_brackets ) {
1651           must_be_last = TRUE;
1652 28962         greedy_proto = *p;
1653           }
1654 459136 100       else if (*p == '_')
1655           underscore = TRUE;
1656           }
1657 494058 100       if (*p == '\\')
1658           after_slash = TRUE;
1659           else
1660           after_slash = FALSE;
1661           }
1662           }
1663            
1664 519944 100       if (warn) {
1665 147014         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1666 147014         p -= origlen;
1667 147014         p = SvUTF8(proto)
1668 4         ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1669           origlen, UNI_DISPLAY_ISPRINT)
1670 147018 100       : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1671            
1672 147014 100       if (proto_after_greedy_proto)
1673 38         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1674           "Prototype after '%c' for %"SVf" : %s",
1675           greedy_proto, SVfARG(name), p);
1676 147014 100       if (in_brackets)
1677 2         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1678           "Missing ']' in prototype for %"SVf" : %s",
1679           SVfARG(name), p);
1680 147014 100       if (bad_proto)
1681 30         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1682           "Illegal character in prototype for %"SVf" : %s",
1683           SVfARG(name), p);
1684 147014 100       if (bad_proto_after_underscore)
1685 8         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1686           "Illegal character after '_' in prototype for %"SVf" : %s",
1687           SVfARG(name), p);
1688           }
1689            
1690 519944 100       return (! (proto_after_greedy_proto || bad_proto) );
    100        
1691           }
1692            
1693           /*
1694           * S_incline
1695           * This subroutine has nothing to do with tilting, whether at windmills
1696           * or pinball tables. Its name is short for "increment line". It
1697           * increments the current line number in CopLINE(PL_curcop) and checks
1698           * to see whether the line starts with a comment of the form
1699           * # line 500 "foo.pm"
1700           * If so, it sets the current line number and file to the values in the comment.
1701           */
1702            
1703           STATIC void
1704 202428658         S_incline(pTHX_ const char *s)
1705           {
1706           dVAR;
1707           const char *t;
1708           const char *n;
1709           const char *e;
1710           line_t line_num;
1711            
1712           PERL_ARGS_ASSERT_INCLINE;
1713            
1714 202428658 100       COPLINE_INC_WITH_HERELINES;
1715 202428658 100       if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
    100        
    100        
1716 7378420 100       && s+1 == PL_bufend && *s == ';') {
    50        
1717           /* fake newline in string eval */
1718 3626370         CopLINE_dec(PL_curcop);
1719 3626370         return;
1720           }
1721 198802288 100       if (*s++ != '#')
1722           return;
1723 17709658 100       while (SPACE_OR_TAB(*s))
1724 9237789         s++;
1725 8471869 100       if (strnEQ(s, "line", 4))
1726           s += 4;
1727           else
1728           return;
1729 315040 100       if (SPACE_OR_TAB(*s))
1730 310300         s++;
1731           else
1732           return;
1733 465592 100       while (SPACE_OR_TAB(*s))
1734 142         s++;
1735 310300 100       if (!isDIGIT(*s))
1736           return;
1737            
1738           n = s;
1739 1153442 100       while (isDIGIT(*s))
1740 844318         s++;
1741 309124 100       if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
    50        
    100        
    50        
1742           return;
1743 615496 100       while (SPACE_OR_TAB(*s))
1744 306374         s++;
1745 309122 100       if (*s == '"' && (t = strchr(s+1, '"'))) {
    100        
1746 291766         s++;
1747 300444         e = t + 1;
1748           }
1749           else {
1750           t = s;
1751 259114 100       while (!isSPACE(*t))
1752 241758         t++;
1753           e = t;
1754           }
1755 309130 100       while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
    50        
    50        
1756 8         e++;
1757 309122 100       if (*e != '\n' && *e != '\0')
1758           return; /* false alarm */
1759            
1760 309116         line_num = atoi(n)-1;
1761            
1762 309116 100       if (t - s > 0) {
1763 306364         const STRLEN len = t - s;
1764            
1765 306364 100       if (!PL_rsfp && !PL_parser->filtered) {
    50        
1766           /* must copy *{"::_<(eval N)[oldfilename:L]"}
1767           * to *{"::_
1768           /* However, the long form of evals is only turned on by the
1769           debugger - usually they're "(eval %lu)" */
1770 284802         GV * const cfgv = CopFILEGV(PL_curcop);
1771 284802 50       if (cfgv) {
1772           char smallbuf[128];
1773           STRLEN tmplen2 = len;
1774           char *tmpbuf2;
1775           GV *gv2;
1776            
1777 284802 100       if (tmplen2 + 2 <= sizeof smallbuf)
1778           tmpbuf2 = smallbuf;
1779           else
1780 4         Newx(tmpbuf2, tmplen2 + 2, char);
1781            
1782 284802         tmpbuf2[0] = '_';
1783 284802         tmpbuf2[1] = '<';
1784            
1785 284802         memcpy(tmpbuf2 + 2, s, tmplen2);
1786 284802         tmplen2 += 2;
1787            
1788 284802         gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1789 284802 100       if (!isGV(gv2)) {
1790 2146         gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1791           /* adjust ${"::_
1792 2146         GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1793           /* The line number may differ. If that is the case,
1794           alias the saved lines that are in the array.
1795           Otherwise alias the whole array. */
1796 2146 100       if (CopLINE(PL_curcop) == line_num) {
1797 20         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1798 15         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1799           }
1800 2136 100       else if (GvAV(cfgv)) {
1801 22         AV * const av = GvAV(cfgv);
1802 22         const I32 start = CopLINE(PL_curcop)+1;
1803 22         I32 items = AvFILLp(av) - start;
1804 22 50       if (items > 0) {
1805 22 50       AV * const av2 = GvAVn(gv2);
1806 22         SV **svp = AvARRAY(av) + start;
1807 22         I32 l = (I32)line_num+1;
1808 79 100       while (items--)
1809 69         av_store(av2, l++, SvREFCNT_inc(*svp++));
1810           }
1811           }
1812           }
1813            
1814 284802 100       if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1815           }
1816           }
1817 306364         CopFILE_free(PL_curcop);
1818 612728         CopFILE_setn(PL_curcop, s, len);
1819           }
1820 104779884         CopLINE_set(PL_curcop, line_num);
1821           }
1822            
1823           #define skipspace(s) skipspace_flags(s, 0)
1824            
1825           #ifdef PERL_MAD
1826           /* skip space before PL_thistoken */
1827            
1828           STATIC char *
1829           S_skipspace0(pTHX_ char *s)
1830           {
1831           PERL_ARGS_ASSERT_SKIPSPACE0;
1832            
1833           s = skipspace(s);
1834           if (!PL_madskills)
1835           return s;
1836           if (PL_skipwhite) {
1837           if (!PL_thiswhite)
1838           PL_thiswhite = newSVpvs("");
1839           sv_catsv(PL_thiswhite, PL_skipwhite);
1840           sv_free(PL_skipwhite);
1841           PL_skipwhite = 0;
1842           }
1843           PL_realtokenstart = s - SvPVX(PL_linestr);
1844           return s;
1845           }
1846            
1847           /* skip space after PL_thistoken */
1848            
1849           STATIC char *
1850           S_skipspace1(pTHX_ char *s)
1851           {
1852           const char *start = s;
1853           I32 startoff = start - SvPVX(PL_linestr);
1854            
1855           PERL_ARGS_ASSERT_SKIPSPACE1;
1856            
1857           s = skipspace(s);
1858           if (!PL_madskills)
1859           return s;
1860           start = SvPVX(PL_linestr) + startoff;
1861           if (!PL_thistoken && PL_realtokenstart >= 0) {
1862           const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1863           PL_thistoken = newSVpvn(tstart, start - tstart);
1864           }
1865           PL_realtokenstart = -1;
1866           if (PL_skipwhite) {
1867           if (!PL_nextwhite)
1868           PL_nextwhite = newSVpvs("");
1869           sv_catsv(PL_nextwhite, PL_skipwhite);
1870           sv_free(PL_skipwhite);
1871           PL_skipwhite = 0;
1872           }
1873           return s;
1874           }
1875            
1876           STATIC char *
1877           S_skipspace2(pTHX_ char *s, SV **svp)
1878           {
1879           char *start;
1880           const I32 startoff = s - SvPVX(PL_linestr);
1881            
1882           PERL_ARGS_ASSERT_SKIPSPACE2;
1883            
1884           s = skipspace(s);
1885           if (!PL_madskills || !svp)
1886           return s;
1887           start = SvPVX(PL_linestr) + startoff;
1888           if (!PL_thistoken && PL_realtokenstart >= 0) {
1889           char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1890           PL_thistoken = newSVpvn(tstart, start - tstart);
1891           PL_realtokenstart = -1;
1892           }
1893           if (PL_skipwhite) {
1894           if (!*svp)
1895           *svp = newSVpvs("");
1896           sv_setsv(*svp, PL_skipwhite);
1897           sv_free(PL_skipwhite);
1898           PL_skipwhite = 0;
1899           }
1900          
1901           return s;
1902           }
1903           #endif
1904            
1905           STATIC void
1906 1041982         S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1907           {
1908 1041982         AV *av = CopFILEAVx(PL_curcop);
1909 1041982 100       if (av) {
1910 1041900         SV * const sv = newSV_type(SVt_PVMG);
1911 1041900 100       if (orig_sv)
1912 214         sv_setsv_flags(sv, orig_sv, 0); /* no cow */
1913           else
1914 1041686         sv_setpvn(sv, buf, len);
1915 1041900         (void)SvIOK_on(sv);
1916 1041900         SvIV_set(sv, 0);
1917 1041900         av_store(av, CopLINE(PL_curcop), sv);
1918           }
1919 1041982         }
1920            
1921           /*
1922           * S_skipspace
1923           * Called to gobble the appropriate amount and type of whitespace.
1924           * Skips comments as well.
1925           */
1926            
1927           STATIC char *
1928 470855659         S_skipspace_flags(pTHX_ char *s, U32 flags)
1929           {
1930           #ifdef PERL_MAD
1931           char *start = s;
1932           #endif /* PERL_MAD */
1933           PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1934           #ifdef PERL_MAD
1935           if (PL_skipwhite) {
1936           sv_free(PL_skipwhite);
1937           PL_skipwhite = NULL;
1938           }
1939           #endif /* PERL_MAD */
1940 470855659 100       if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
    100        
1941 562 50       while (s < PL_bufend && SPACE_OR_TAB(*s))
    100        
1942 144         s++;
1943           } else {
1944 470855241         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1945 470855241         PL_bufptr = s;
1946 470855241 100       lex_read_space(flags | LEX_KEEP_PREVIOUS |
    50        
1947           (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1948           LEX_NO_NEXT_CHUNK : 0));
1949 470855241         s = PL_bufptr;
1950 470855241         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1951 470855241 100       if (PL_linestart > PL_bufptr)
1952 245610822         PL_bufptr = PL_linestart;
1953           return s;
1954           }
1955           #ifdef PERL_MAD
1956           if (PL_madskills)
1957           PL_skipwhite = newSVpvn(start, s-start);
1958           #endif /* PERL_MAD */
1959           return s;
1960           }
1961            
1962           /*
1963           * S_check_uni
1964           * Check the unary operators to ensure there's no ambiguity in how they're
1965           * used. An ambiguous piece of code would be:
1966           * rand + 5
1967           * This doesn't mean rand() + 5. Because rand() is a unary operator,
1968           * the +5 is its argument.
1969           */
1970            
1971           STATIC void
1972 304762         S_check_uni(pTHX)
1973           {
1974           dVAR;
1975           const char *s;
1976           const char *t;
1977            
1978 304762 100       if (PL_oldoldbufptr != PL_last_uni)
1979           return;
1980 162 100       while (isSPACE(*PL_last_uni))
1981 66         PL_last_uni++;
1982 96         s = PL_last_uni;
1983 492 100       while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
    50        
    50        
    0        
    50        
    50        
    100        
    0        
    0        
    0        
    0        
    0        
    0        
    0        
    50        
1984 348         s++;
1985 96 100       if ((t = strchr(s, '(')) && t < PL_bufptr)
    50        
1986           return;
1987            
1988 173087         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1989           "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1990 12         (int)(s - PL_last_uni), PL_last_uni);
1991           }
1992            
1993           /*
1994           * LOP : macro to build a list operator. Its behaviour has been replaced
1995           * with a subroutine, S_lop() for which LOP is just another name.
1996           */
1997            
1998           #define LOP(f,x) return lop(f,x,s)
1999            
2000           /*
2001           * S_lop
2002           * Build a list operator (or something that might be one). The rules:
2003           * - if we have a next token, then it's a list operator [why?]
2004           * - if the next thing is an opening paren, then it's a function
2005           * - else it's a list operator
2006           */
2007            
2008           STATIC I32
2009 8624847         S_lop(pTHX_ I32 f, int x, char *s)
2010           {
2011           dVAR;
2012            
2013           PERL_ARGS_ASSERT_LOP;
2014            
2015 8624847         pl_yylval.ival = f;
2016 8624847         CLINE;
2017 8624847         PL_expect = x;
2018 8624847         PL_bufptr = s;
2019 8624847         PL_last_lop = PL_oldbufptr;
2020 8624847         PL_last_lop_op = (OPCODE)f;
2021           #ifdef PERL_MAD
2022           if (PL_lasttoke)
2023           goto lstop;
2024           #else
2025 8624847 100       if (PL_nexttoke)
2026           goto lstop;
2027           #endif
2028 8624385 100       if (*s == '(')
2029           return REPORT(FUNC);
2030 5827704         s = PEEKSPACE(s);
2031 5827704 100       if (*s == '(')
2032           return REPORT(FUNC);
2033           else {
2034           lstop:
2035 5737004 100       if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
    100        
2036 4507879         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2037           return REPORT(LSTOP);
2038           }
2039           }
2040            
2041           #ifdef PERL_MAD
2042           /*
2043           * S_start_force
2044           * Sets up for an eventual force_next(). start_force(0) basically does
2045           * an unshift, while start_force(-1) does a push. yylex removes items
2046           * on the "pop" end.
2047           */
2048            
2049           STATIC void
2050           S_start_force(pTHX_ int where)
2051           {
2052           int i;
2053            
2054           if (where < 0) /* so people can duplicate start_force(PL_curforce) */
2055           where = PL_lasttoke;
2056           assert(PL_curforce < 0 || PL_curforce == where);
2057           if (PL_curforce != where) {
2058           for (i = PL_lasttoke; i > where; --i) {
2059           PL_nexttoke[i] = PL_nexttoke[i-1];
2060           }
2061           PL_lasttoke++;
2062           }
2063           if (PL_curforce < 0) /* in case of duplicate start_force() */
2064           Zero(&PL_nexttoke[where], 1, NEXTTOKE);
2065           PL_curforce = where;
2066           if (PL_nextwhite) {
2067           if (PL_madskills)
2068           curmad('^', newSVpvs(""));
2069           CURMAD('_', PL_nextwhite);
2070           }
2071           }
2072            
2073           STATIC void
2074           S_curmad(pTHX_ char slot, SV *sv)
2075           {
2076           MADPROP **where;
2077            
2078           if (!sv)
2079           return;
2080           if (PL_curforce < 0)
2081           where = &PL_thismad;
2082           else
2083           where = &PL_nexttoke[PL_curforce].next_mad;
2084            
2085           if (PL_faketokens)
2086           sv_setpvs(sv, "");
2087           else {
2088           if (!IN_BYTES) {
2089           if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2090           SvUTF8_on(sv);
2091           else if (PL_encoding) {
2092           sv_recode_to_utf8(sv, PL_encoding);
2093           }
2094           }
2095           }
2096            
2097           /* keep a slot open for the head of the list? */
2098           if (slot != '_' && *where && (*where)->mad_key == '^') {
2099           (*where)->mad_key = slot;
2100           sv_free(MUTABLE_SV(((*where)->mad_val)));
2101           (*where)->mad_val = (void*)sv;
2102           }
2103           else
2104           addmad(newMADsv(slot, sv), where, 0);
2105           }
2106           #else
2107           # define start_force(where) NOOP
2108           # define curmad(slot, sv) NOOP
2109           #endif
2110            
2111           /*
2112           * S_force_next
2113           * When the lexer realizes it knows the next token (for instance,
2114           * it is reordering tokens for the parser) then it can call S_force_next
2115           * to know what token to return the next time the lexer is called. Caller
2116           * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2117           * and possibly PL_expect to ensure the lexer handles the token correctly.
2118           */
2119            
2120           STATIC void
2121 253471872         S_force_next(pTHX_ I32 type)
2122           {
2123           dVAR;
2124           #ifdef DEBUGGING
2125           if (DEBUG_T_TEST) {
2126           PerlIO_printf(Perl_debug_log, "### forced token:\n");
2127           tokereport(type, &NEXTVAL_NEXTTOKE);
2128           }
2129           #endif
2130           #ifdef PERL_MAD
2131           if (PL_curforce < 0)
2132           start_force(PL_lasttoke);
2133           PL_nexttoke[PL_curforce].next_type = type;
2134           if (PL_lex_state != LEX_KNOWNEXT)
2135           PL_lex_defer = PL_lex_state;
2136           PL_lex_state = LEX_KNOWNEXT;
2137           PL_lex_expect = PL_expect;
2138           PL_curforce = -1;
2139           #else
2140 253471872         PL_nexttype[PL_nexttoke] = type;
2141 253471872         PL_nexttoke++;
2142 253471872 100       if (PL_lex_state != LEX_KNOWNEXT) {
2143 246791240         PL_lex_defer = PL_lex_state;
2144 246791240         PL_lex_expect = PL_expect;
2145 246791240         PL_lex_state = LEX_KNOWNEXT;
2146           }
2147           #endif
2148 253471872         }
2149            
2150           void
2151 214         Perl_yyunlex(pTHX)
2152           {
2153 214         int yyc = PL_parser->yychar;
2154 214 100       if (yyc != YYEMPTY) {
2155 78 50       if (yyc) {
2156           start_force(-1);
2157 78         NEXTVAL_NEXTTOKE = PL_parser->yylval;
2158 78 100       if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
    100        
2159 8         PL_lex_allbrackets--;
2160 8         PL_lex_brackets--;
2161 8         yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2162 70 50       } else if (yyc == '('/*)*/) {
2163 0         PL_lex_allbrackets--;
2164 0         yyc |= (2<<24);
2165           }
2166 78         force_next(yyc);
2167           }
2168 78         PL_parser->yychar = YYEMPTY;
2169           }
2170 214         }
2171            
2172           STATIC SV *
2173 46609495         S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2174           {
2175           dVAR;
2176 46609495 100       SV * const sv = newSVpvn_utf8(start, len,
    50        
    100        
    50        
    100        
    100        
    100        
    50        
2177           !IN_BYTES
2178           && UTF
2179           && !is_ascii_string((const U8*)start, len)
2180           && is_utf8_string((const U8*)start, len));
2181 46609495         return sv;
2182           }
2183            
2184           /*
2185           * S_force_word
2186           * When the lexer knows the next thing is a word (for instance, it has
2187           * just seen -> and it knows that the next char is a word char, then
2188           * it calls S_force_word to stick the next word into the PL_nexttoke/val
2189           * lookahead.
2190           *
2191           * Arguments:
2192           * char *start : buffer position (must be within PL_linestr)
2193           * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2194           * int check_keyword : if true, Perl checks to make sure the word isn't
2195           * a keyword (do this if the word is a label, e.g. goto FOO)
2196           * int allow_pack : if true, : characters will also be allowed (require,
2197           * use, etc. do this)
2198           * int allow_initial_tick : used by the "sub" lexer only.
2199           */
2200            
2201           STATIC char *
2202 29956093         S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2203           {
2204           dVAR;
2205           char *s;
2206           STRLEN len;
2207            
2208           PERL_ARGS_ASSERT_FORCE_WORD;
2209            
2210 29956093         start = SKIPSPACE1(start);
2211           s = start;
2212 29956093 100       if (isIDFIRST_lazy_if(s,UTF) ||
    50        
    100        
    50        
    100        
    100        
    100        
    100        
    100        
    100        
    50        
    50        
    50        
    50        
    100        
2213 125237 100       (allow_pack && *s == ':') )
2214           {
2215 28983389         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2216 28983389 100       if (check_keyword) {
2217 2490986         char *s2 = PL_tokenbuf;
2218 2490986 100       if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
    100        
    100        
2219 4         s2 += 6, len -= 6;
2220 2490986 100       if (keyword(s2, len, 0))
2221           return start;
2222           }
2223           start_force(PL_curforce);
2224           if (PL_madskills)
2225           curmad('X', newSVpvn(start,s-start));
2226 28004531 100       if (token == METHOD) {
2227 12627485         s = SKIPSPACE1(s);
2228 12627485 100       if (*s == '(')
2229 6208113         PL_expect = XTERM;
2230           else {
2231 6419372         PL_expect = XOPERATOR;
2232           }
2233           }
2234           if (PL_madskills)
2235           curmad('g', newSVpvs( "forced" ));
2236 28004531         NEXTVAL_NEXTTOKE.opval
2237 28004531         = (OP*)newSVOP(OP_CONST,0,
2238           S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2239 28004531         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2240 28004531         force_next(token);
2241           }
2242 29484303         return s;
2243           }
2244            
2245           /*
2246           * S_force_ident
2247           * Called when the lexer wants $foo *foo &foo etc, but the program
2248           * text only contains the "foo" portion. The first argument is a pointer
2249           * to the "foo", and the second argument is the type symbol to prefix.
2250           * Forces the next token to be a "WORD".
2251           * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2252           */
2253            
2254           STATIC void
2255 1802178         S_force_ident(pTHX_ const char *s, int kind)
2256           {
2257           dVAR;
2258            
2259           PERL_ARGS_ASSERT_FORCE_IDENT;
2260            
2261 1802178 100       if (s[0]) {
2262 1080726 100       const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2263 1080726 50       OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
    100        
    50        
    50        
    100        
2264           UTF ? SVf_UTF8 : 0));
2265           start_force(PL_curforce);
2266 1080726         NEXTVAL_NEXTTOKE.opval = o;
2267 1080726         force_next(WORD);
2268 1080726 50       if (kind) {
2269 1080726         o->op_private = OPpCONST_ENTERED;
2270           /* XXX see note in pp_entereval() for why we forgo typo
2271           warnings if the symbol must be introduced in an eval.
2272           GSAR 96-10-12 */
2273 1080726 100       gv_fetchpvn_flags(s, len,
    50        
    50        
    100        
    50        
    100        
    50        
    50        
    100        
2274           (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2275           : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2276           kind == '$' ? SVt_PV :
2277           kind == '@' ? SVt_PVAV :
2278           kind == '%' ? SVt_PVHV :
2279           SVt_PVGV
2280           );
2281           }
2282           }
2283 1802178         }
2284            
2285           static void
2286 142444391         S_force_ident_maybe_lex(pTHX_ char pit)
2287           {
2288           start_force(PL_curforce);
2289 142444391         NEXTVAL_NEXTTOKE.ival = pit;
2290 142444391         force_next('p');
2291 142444391         }
2292            
2293           NV
2294 552         Perl_str_to_version(pTHX_ SV *sv)
2295           {
2296           NV retval = 0.0;
2297           NV nshift = 1.0;
2298           STRLEN len;
2299 552 50       const char *start = SvPV_const(sv,len);
2300 552         const char * const end = start + len;
2301 552         const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2302            
2303           PERL_ARGS_ASSERT_STR_TO_VERSION;
2304            
2305 2270 100       while (start < end) {
2306           STRLEN skip;
2307           UV n;
2308 1622 100       if (utf)
2309 36         n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2310           else {
2311 1586         n = *(U8*)start;
2312 1586         skip = 1;
2313           }
2314 1622         retval += ((NV)n)/nshift;
2315 1622         start += skip;
2316 1622         nshift *= 1000;
2317           }
2318 552         return retval;
2319           }
2320            
2321           /*
2322           * S_force_version
2323           * Forces the next token to be a version number.
2324           * If the next token appears to be an invalid version number, (e.g. "v2b"),
2325           * and if "guessing" is TRUE, then no new token is created (and the caller
2326           * must use an alternative parsing method).
2327           */
2328            
2329           STATIC char *
2330 4697200         S_force_version(pTHX_ char *s, int guessing)
2331           {
2332           dVAR;
2333           OP *version = NULL;
2334           char *d;
2335           #ifdef PERL_MAD
2336           I32 startoff = s - SvPVX(PL_linestr);
2337           #endif
2338            
2339           PERL_ARGS_ASSERT_FORCE_VERSION;
2340            
2341 4697200         s = SKIPSPACE1(s);
2342            
2343           d = s;
2344 4697200 100       if (*d == 'v')
2345 148         d++;
2346 4697200 100       if (isDIGIT(*d)) {
2347 1218624 100       while (isDIGIT(*d) || *d == '_' || *d == '.')
    100        
    100        
2348 1030870         d++;
2349           #ifdef PERL_MAD
2350           if (PL_madskills) {
2351           start_force(PL_curforce);
2352           curmad('X', newSVpvn(s,d-s));
2353           }
2354           #endif
2355 187754 100       if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
    100        
    50        
    100        
    50        
2356           SV *ver;
2357           #ifdef USE_LOCALE_NUMERIC
2358 187720         char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2359 187720         setlocale(LC_NUMERIC, "C");
2360           #endif
2361 187720         s = scan_num(s, &pl_yylval);
2362           #ifdef USE_LOCALE_NUMERIC
2363 187720         setlocale(LC_NUMERIC, loc);
2364 187720         Safefree(loc);
2365           #endif
2366 187720         version = pl_yylval.opval;
2367 187720         ver = cSVOPx(version)->op_sv;
2368 187816 100       if (SvPOK(ver) && !SvNIOK(ver)) {
    50        
2369 96         SvUPGRADE(ver, SVt_PVNV);
2370 552         SvNV_set(ver, str_to_version(ver));
2371 552         SvNOK_on(ver); /* hint that it is a version */
2372           }
2373           }
2374 34 50       else if (guessing) {
2375           #ifdef PERL_MAD
2376           if (PL_madskills) {
2377           sv_free(PL_nextwhite); /* let next token collect whitespace */
2378           PL_nextwhite = 0;
2379           s = SvPVX(PL_linestr) + startoff;
2380           }
2381           #endif
2382           return s;
2383           }
2384           }
2385            
2386           #ifdef PERL_MAD
2387           if (PL_madskills && !version) {
2388           sv_free(PL_nextwhite); /* let next token collect whitespace */
2389           PL_nextwhite = 0;
2390           s = SvPVX(PL_linestr) + startoff;
2391           }
2392           #endif
2393           /* NOTE: The parser sees the package name and the VERSION swapped */
2394           start_force(PL_curforce);
2395 4697200         NEXTVAL_NEXTTOKE.opval = version;
2396 4697200         force_next(WORD);
2397            
2398 4697200         return s;
2399           }
2400            
2401           /*
2402           * S_force_strict_version
2403           * Forces the next token to be a version number using strict syntax rules.
2404           */
2405            
2406           STATIC char *
2407 700686         S_force_strict_version(pTHX_ char *s)
2408           {
2409           dVAR;
2410           OP *version = NULL;
2411           #ifdef PERL_MAD
2412           I32 startoff = s - SvPVX(PL_linestr);
2413           #endif
2414 700686         const char *errstr = NULL;
2415            
2416           PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2417            
2418 1037351 50       while (isSPACE(*s)) /* leading whitespace */
2419 0         s++;
2420            
2421 700686 100       if (is_STRICT_VERSION(s,&errstr)) {
2422 88         SV *ver = newSV(0);
2423 88         s = (char *)scan_version(s, ver, 0);
2424 88         version = newSVOP(OP_CONST, 0, ver);
2425           }
2426 700854 100       else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
    50        
2427 256 50       (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2428           {
2429 128         PL_bufptr = s;
2430 128 50       if (errstr)
2431 128         yyerror(errstr); /* version required */
2432           return s;
2433           }
2434            
2435           #ifdef PERL_MAD
2436           if (PL_madskills && !version) {
2437           sv_free(PL_nextwhite); /* let next token collect whitespace */
2438           PL_nextwhite = 0;
2439           s = SvPVX(PL_linestr) + startoff;
2440           }
2441           #endif
2442           /* NOTE: The parser sees the package name and the VERSION swapped */
2443           start_force(PL_curforce);
2444 700558         NEXTVAL_NEXTTOKE.opval = version;
2445 700558         force_next(WORD);
2446            
2447 700622         return s;
2448           }
2449            
2450           /*
2451           * S_tokeq
2452           * Tokenize a quoted string passed in as an SV. It finds the next
2453           * chunk, up to end of string or a backslash. It may make a new
2454           * SV containing that chunk (if HINT_NEW_STRING is on). It also
2455           * turns \\ into \.
2456           */
2457            
2458           STATIC SV *
2459 46589553         S_tokeq(pTHX_ SV *sv)
2460           {
2461           dVAR;
2462           char *s;
2463           char *send;
2464           char *d;
2465 46589553         STRLEN len = 0;
2466           SV *pv = sv;
2467            
2468           PERL_ARGS_ASSERT_TOKEQ;
2469            
2470 46589553 50       if (!SvLEN(sv))
2471           goto finish;
2472            
2473 46589553 50       s = SvPV_force(sv, len);
2474 46589553 100       if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
    100        
2475           goto finish;
2476 46367387         send = s + len;
2477           /* This is relying on the SV being "well formed" with a trailing '\0' */
2478 443066293 100       while (s < send && !(*s == '\\' && s[1] == '\\'))
    100        
    100        
2479 374216179         s++;
2480 46367387 100       if (s == send)
2481           goto finish;
2482           d = s;
2483 208390 50       if ( PL_hints & HINT_NEW_STRING ) {
2484 107255         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2485           }
2486 1425704 100       while (s < send) {
2487 1217314 100       if (*s == '\\') {
2488 253070 50       if (s + 1 < send && (s[1] == '\\'))
    100        
2489 231280         s++; /* all that, just for this */
2490           }
2491 1217314         *d++ = *s++;
2492           }
2493 208390         *d = '\0';
2494 208390         SvCUR_set(sv, d - SvPVX_const(sv));
2495           finish:
2496 46589553 100       if ( PL_hints & HINT_NEW_STRING )
2497 23996463         return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2498           return sv;
2499           }
2500            
2501           /*
2502           * Now come three functions related to double-quote context,
2503           * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2504           * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2505           * interact with PL_lex_state, and create fake ( ... ) argument lists
2506           * to handle functions and concatenation.
2507           * For example,
2508           * "foo\lbar"
2509           * is tokenised as
2510           * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2511           */
2512            
2513           /*
2514           * S_sublex_start
2515           * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2516           *
2517           * Pattern matching will set PL_lex_op to the pattern-matching op to
2518           * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2519           *
2520           * OP_CONST and OP_READLINE are easy--just make the new op and return.
2521           *
2522           * Everything else becomes a FUNC.
2523           *
2524           * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2525           * had an OP_CONST or OP_READLINE). This just sets us up for a
2526           * call to S_sublex_push().
2527           */
2528            
2529           STATIC I32
2530 53038691         S_sublex_start(pTHX)
2531           {
2532           dVAR;
2533 53038691         const I32 op_type = pl_yylval.ival;
2534            
2535 53038691 100       if (op_type == OP_NULL) {
2536 90680         pl_yylval.opval = PL_lex_op;
2537 90680         PL_lex_op = NULL;
2538 90680         return THING;
2539           }
2540 52948011 100       if (op_type == OP_CONST || op_type == OP_READLINE) {
2541 37751218         SV *sv = tokeq(PL_lex_stuff);
2542            
2543 37751218 100       if (SvTYPE(sv) == SVt_PVIV) {
2544           /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2545           STRLEN len;
2546 37751216 50       const char * const p = SvPV_const(sv, len);
2547 37751216         SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2548 37751216         SvREFCNT_dec(sv);
2549           sv = nsv;
2550           }
2551 37751218         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2552 37751218         PL_lex_stuff = NULL;
2553           /* Allow // "foo" */
2554 37751218 50       if (op_type == OP_READLINE)
2555 0         PL_expect = XTERMORDORDOR;
2556           return THING;
2557           }
2558 15196793 100       else if (op_type == OP_BACKTICK && PL_lex_op) {
    100        
2559           /* readpipe() was overridden */
2560 8         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2561 8         pl_yylval.opval = PL_lex_op;
2562 8         PL_lex_op = NULL;
2563 8         PL_lex_stuff = NULL;
2564 8         return THING;
2565           }
2566            
2567 15196785         PL_sublex_info.super_state = PL_lex_state;
2568 15196785         PL_sublex_info.sub_inwhat = (U16)op_type;
2569 15196785         PL_sublex_info.sub_op = PL_lex_op;
2570 15196785         PL_lex_state = LEX_INTERPPUSH;
2571            
2572 15196785         PL_expect = XTERM;
2573 15196785 100       if (PL_lex_op) {
2574 4846039         pl_yylval.opval = PL_lex_op;
2575 4846039         PL_lex_op = NULL;
2576 29691837         return PMFUNC;
2577           }
2578           else
2579           return FUNC;
2580           }
2581            
2582           /*
2583           * S_sublex_push
2584           * Create a new scope to save the lexing state. The scope will be
2585           * ended in S_sublex_done. Returns a '(', starting the function arguments
2586           * to the uc, lc, etc. found before.
2587           * Sets PL_lex_state to LEX_INTERPCONCAT.
2588           */
2589            
2590           STATIC I32
2591 15196785         S_sublex_push(pTHX)
2592           {
2593           dVAR;
2594           LEXSHARED *shared;
2595 15196785         const bool is_heredoc =
2596 15196785         CopLINE(PL_curcop) == (line_t)PL_multi_start - 1;
2597 15196785         ENTER;
2598            
2599           assert(CopLINE(PL_curcop) == (line_t)PL_multi_start
2600           || CopLINE(PL_curcop) == (line_t)PL_multi_start - 1);
2601            
2602 15196785         PL_lex_state = PL_sublex_info.super_state;
2603 15196785         SAVEBOOL(PL_lex_dojoin);
2604 15196785         SAVEI32(PL_lex_brackets);
2605 15196785         SAVEI32(PL_lex_allbrackets);
2606 15196785         SAVEI32(PL_lex_formbrack);
2607 15196785         SAVEI8(PL_lex_fakeeof);
2608 15196785         SAVEI32(PL_lex_casemods);
2609 15196785         SAVEI32(PL_lex_starts);
2610 15196785         SAVEI8(PL_lex_state);
2611 15196785         SAVESPTR(PL_lex_repl);
2612 15196785         SAVEVPTR(PL_lex_inpat);
2613 15196785         SAVEI16(PL_lex_inwhat);
2614 15196785 100       if (is_heredoc)
2615 134157         SAVECOPLINE(PL_curcop);
2616 15196785         SAVEPPTR(PL_bufptr);
2617 15196785         SAVEPPTR(PL_bufend);
2618 15196785         SAVEPPTR(PL_oldbufptr);
2619 15196785         SAVEPPTR(PL_oldoldbufptr);
2620 15196785         SAVEPPTR(PL_last_lop);
2621 15196785         SAVEPPTR(PL_last_uni);
2622 15196785         SAVEPPTR(PL_linestart);
2623 15196785         SAVESPTR(PL_linestr);
2624 15196785         SAVEGENERICPV(PL_lex_brackstack);
2625 15196785         SAVEGENERICPV(PL_lex_casestack);
2626 15196785         SAVEGENERICPV(PL_parser->lex_shared);
2627 15196785         SAVEBOOL(PL_parser->lex_re_reparsing);
2628 15196785         SAVEI32(PL_copline);
2629            
2630           /* The here-doc parser needs to be able to peek into outer lexing
2631           scopes to find the body of the here-doc. So we put PL_linestr and
2632           PL_bufptr into lex_shared, to ‘share’ those values.
2633           */
2634 15196785         PL_parser->lex_shared->ls_linestr = PL_linestr;
2635 15196785         PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2636            
2637 15196785         PL_linestr = PL_lex_stuff;
2638 15196785         PL_lex_repl = PL_sublex_info.repl;
2639 15196785         PL_lex_stuff = NULL;
2640 15196785         PL_sublex_info.repl = NULL;
2641            
2642 15196785         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2643 15196785         = SvPVX(PL_linestr);
2644 15196785         PL_bufend += SvCUR(PL_linestr);
2645 15196785         PL_last_lop = PL_last_uni = NULL;
2646 15196785         SAVEFREESV(PL_linestr);
2647 15196785 100       if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2648            
2649 15196785         PL_lex_dojoin = FALSE;
2650 15196785         PL_lex_brackets = PL_lex_formbrack = 0;
2651 15196785         PL_lex_allbrackets = 0;
2652 15196785         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2653 15196785         Newx(PL_lex_brackstack, 120, char);
2654 15196785         Newx(PL_lex_casestack, 12, char);
2655 15196785         PL_lex_casemods = 0;
2656 15196785         *PL_lex_casestack = '\0';
2657 15196785         PL_lex_starts = 0;
2658 15196785         PL_lex_state = LEX_INTERPCONCAT;
2659 15196785 100       if (is_heredoc)
2660 134157         CopLINE_inc(PL_curcop);
2661 15196785         PL_copline = NOLINE;
2662          
2663 15196785         Newxz(shared, 1, LEXSHARED);
2664 15196785         shared->ls_prev = PL_parser->lex_shared;
2665 15196785         PL_parser->lex_shared = shared;
2666 15196785 100       if (!is_heredoc && PL_multi_start != PL_multi_end) {
    100        
2667 282217         shared->herelines = shared->ls_prev->herelines;
2668 282217         shared->ls_prev->herelines = 0;
2669           }
2670            
2671 15196785         PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2672 15196785 100       if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2673 15196785 100       if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2674 4736789         PL_lex_inpat = PL_sublex_info.sub_op;
2675           else
2676 10459996         PL_lex_inpat = NULL;
2677            
2678 15196785         PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2679 15196785         PL_in_eval &= ~EVAL_RE_REPARSING;
2680            
2681 15196785         return '(';
2682           }
2683            
2684           /*
2685           * S_sublex_done
2686           * Restores lexer state after a S_sublex_push.
2687           */
2688            
2689           STATIC I32
2690 17555602         S_sublex_done(pTHX)
2691           {
2692           dVAR;
2693 17555602 100       if (!PL_lex_starts++) {
2694 730414         SV * const sv = newSVpvs("");
2695 730414 100       if (SvUTF8(PL_linestr))
2696 2         SvUTF8_on(sv);
2697 730414         PL_expect = XOPERATOR;
2698 730414         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2699 730414         return THING;
2700           }
2701            
2702 16825188 100       if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2703 31382         PL_lex_state = LEX_INTERPCASEMOD;
2704 31382         return yylex();
2705           }
2706            
2707           /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2708           assert(PL_lex_inwhat != OP_TRANSR);
2709 16793806 100       if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
    50        
2710 1597241         PL_linestr = PL_lex_repl;
2711 1597241         PL_lex_inpat = 0;
2712 1597241         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2713 1597241         PL_bufend += SvCUR(PL_linestr);
2714 1597241         PL_last_lop = PL_last_uni = NULL;
2715 1597241         PL_lex_dojoin = FALSE;
2716 1597241         PL_lex_brackets = 0;
2717 1597241         PL_lex_allbrackets = 0;
2718 1597241         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2719 1597241         PL_lex_casemods = 0;
2720 1597241         *PL_lex_casestack = '\0';
2721 1597241         PL_lex_starts = 0;
2722 1597241 100       if (SvEVALED(PL_lex_repl)) {
2723 175369         PL_lex_state = LEX_INTERPNORMAL;
2724 175369         PL_lex_starts++;
2725           /* we don't clear PL_lex_repl here, so that we can check later
2726           whether this is an evalled subst; that means we rely on the
2727           logic to ensure sublex_done() is called again only via the
2728           branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2729           }
2730           else {
2731 1421872         PL_lex_state = LEX_INTERPCONCAT;
2732 1421872         PL_lex_repl = NULL;
2733           }
2734 1597241 100       if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2735 56570         CopLINE(PL_curcop) +=
2736 28285         ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2737 28285         + PL_parser->lex_shared->herelines;
2738 28285         PL_parser->lex_shared->herelines = 0;
2739           }
2740           return ',';
2741           }
2742           else {
2743           #ifdef PERL_MAD
2744           if (PL_madskills) {
2745           if (PL_thiswhite) {
2746           if (!PL_endwhite)
2747           PL_endwhite = newSVpvs("");
2748           sv_catsv(PL_endwhite, PL_thiswhite);
2749           PL_thiswhite = 0;
2750           }
2751           if (PL_thistoken)
2752           sv_setpvs(PL_thistoken,"");
2753           else
2754           PL_realtokenstart = -1;
2755           }
2756           #endif
2757 15196565         LEAVE;
2758 15196565         PL_bufend = SvPVX(PL_linestr);
2759 15196565         PL_bufend += SvCUR(PL_linestr);
2760 15196565         PL_expect = XOPERATOR;
2761 15196565         PL_sublex_info.sub_inwhat = 0;
2762 16420353         return ')';
2763           }
2764           }
2765            
2766           PERL_STATIC_INLINE SV*
2767 2578         S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2768           {
2769           /* points to first character of interior of \N{}, to one beyond the
2770           * interior, hence to the "}". Finds what the name resolves to, returning
2771           * an SV* containing it; NULL if no valid one found */
2772            
2773 2578 50       SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
    100        
    50        
    50        
    100        
2774            
2775           HV * table;
2776           SV **cvp;
2777           SV *cv;
2778           SV *rv;
2779           HV *stash;
2780           const U8* first_bad_char_loc;
2781 2578         const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2782            
2783           PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2784            
2785 2578 50       if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
    100        
    50        
    50        
    100        
    100        
2786           e - backslash_ptr,
2787           &first_bad_char_loc))
2788           {
2789           /* If warnings are on, this will print a more detailed analysis of what
2790           * is wrong than the error message below */
2791 2         utf8n_to_uvchr(first_bad_char_loc,
2792           e - ((char *) first_bad_char_loc),
2793           NULL, 0);
2794            
2795           /* We deliberately don't try to print the malformed character, which
2796           * might not print very well; it also may be just the first of many
2797           * malformations, so don't print what comes after it */
2798 2         yyerror(Perl_form(aTHX_
2799           "Malformed UTF-8 character immediately after '%.*s'",
2800           (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2801 2         return NULL;
2802           }
2803            
2804 2576         res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2805           /* include the <}> */
2806           e - backslash_ptr + 1);
2807 2572 100       if (! SvPOK(res)) {
2808           SvREFCNT_dec_NN(res);
2809           return NULL;
2810           }
2811            
2812           /* See if the charnames handler is the Perl core's, and if so, we can skip
2813           * the validation needed for a user-supplied one, as Perl's does its own
2814           * validation. */
2815 2462         table = GvHV(PL_hintgv); /* ^H */
2816 2462         cvp = hv_fetchs(table, "charnames", FALSE);
2817 2462 100       if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
    50        
    100        
    50        
2818 2456 100       && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
    50        
2819           {
2820 2454 50       const char * const name = HvNAME(stash);
    50        
    50        
    0        
    50        
    50        
2821 2454 100       if strEQ(name, "_charnames") {
2822           return res;
2823           }
2824           }
2825            
2826           /* Here, it isn't Perl's charname handler. We can't rely on a
2827           * user-supplied handler to validate the input name. For non-ut8 input,
2828           * look to see that the first character is legal. Then loop through the
2829           * rest checking that each is a continuation */
2830            
2831           /* This code needs to be sync'ed with a regex in _charnames.pm which does
2832           * the same thing */
2833            
2834 100 50       if (! UTF) {
    100        
    50        
    50        
    100        
2835 72 100       if (! isALPHAU(*s)) {
2836           goto bad_charname;
2837           }
2838 70         s++;
2839 561 100       while (s < e) {
2840 462 100       if (! isCHARNAME_CONT(*s)) {
2841           goto bad_charname;
2842           }
2843 458 100       if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
    100        
    100        
2844 6         Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2845           "A sequence of multiple spaces in a charnames "
2846           "alias definition is deprecated");
2847           }
2848 456         s++;
2849           }
2850 64 100       if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
    100        
2851 6         Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2852           "Trailing white-space in a charnames alias "
2853           "definition is deprecated");
2854           }
2855           }
2856           else {
2857           /* Similarly for utf8. For invariants can check directly; for other
2858           * Latin1, can calculate their code point and check; otherwise use a
2859           * swash */
2860 28 100       if (UTF8_IS_INVARIANT(*s)) {
2861 24 100       if (! isALPHAU(*s)) {
2862           goto bad_charname;
2863           }
2864 23         s++;
2865 4 50       } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2866 0 0       if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
    0        
2867           goto bad_charname;
2868           }
2869 0         s += 2;
2870           }
2871           else {
2872 4 100       if (! PL_utf8_charname_begin) {
2873 2         U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2874 2         PL_utf8_charname_begin = _core_swash_init("utf8",
2875           "_Perl_Charname_Begin",
2876           &PL_sv_undef,
2877           1, 0, NULL, &flags);
2878           }
2879 4 100       if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2880           goto bad_charname;
2881           }
2882 2         s += UTF8SKIP(s);
2883           }
2884            
2885 260 100       while (s < e) {
2886 244 100       if (UTF8_IS_INVARIANT(*s)) {
2887 238 100       if (! isCHARNAME_CONT(*s)) {
2888           goto bad_charname;
2889           }
2890 236 100       if (*s == ' ' && *(s-1) == ' '
    100        
2891 8 100       && ckWARN_d(WARN_DEPRECATED)) {
2892 6         Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2893           "A sequence of multiple spaces in a charnam"
2894           "es alias definition is deprecated");
2895           }
2896 234         s++;
2897           }
2898 6 100       else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2899 2 50       if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
    50        
2900           {
2901           goto bad_charname;
2902           }
2903 0         s += 2;
2904           }
2905           else {
2906 4 100       if (! PL_utf8_charname_continue) {
2907 2         U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2908 2         PL_utf8_charname_continue = _core_swash_init("utf8",
2909           "_Perl_Charname_Continue",
2910           &PL_sv_undef,
2911           1, 0, NULL, &flags);
2912           }
2913 4 100       if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2914           goto bad_charname;
2915           }
2916 119         s += UTF8SKIP(s);
2917           }
2918           }
2919 16 100       if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
    100        
2920 6         Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2921           "Trailing white-space in a charnames alias "
2922           "definition is deprecated");
2923           }
2924           }
2925            
2926 76 100       if (SvUTF8(res)) { /* Don't accept malformed input */
2927           const U8* first_bad_char_loc;
2928           STRLEN len;
2929 16 50       const char* const str = SvPV_const(res, len);
2930 16 100       if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2931           /* If warnings are on, this will print a more detailed analysis of
2932           * what is wrong than the error message below */
2933 2         utf8n_to_uvchr(first_bad_char_loc,
2934           (char *) first_bad_char_loc - str,
2935           NULL, 0);
2936            
2937           /* We deliberately don't try to print the malformed character,
2938           * which might not print very well; it also may be just the first
2939           * of many malformations, so don't print what comes after it */
2940 2         yyerror_pv(
2941           Perl_form(aTHX_
2942           "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2943           (int) (e - backslash_ptr + 1), backslash_ptr,
2944           (int) ((char *) first_bad_char_loc - str), str
2945           ),
2946           SVf_UTF8);
2947 2         return NULL;
2948           }
2949           }
2950            
2951           return res;
2952            
2953           bad_charname: {
2954 16 50       int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
    100        
    50        
    50        
    100        
2955            
2956           /* The final %.*s makes sure that should the trailing NUL be missing
2957           * that this print won't run off the end of the string */
2958 16 50       yyerror_pv(
    100        
    50        
    50        
    100        
2959           Perl_form(aTHX_
2960           "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2961           (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
2962           (int)(e - s + bad_char_size), s + bad_char_size
2963           ),
2964           UTF ? SVf_UTF8 : 0);
2965 1291         return NULL;
2966           }
2967           }
2968            
2969           /*
2970           scan_const
2971            
2972           Extracts the next constant part of a pattern, double-quoted string,
2973           or transliteration. This is terrifying code.
2974            
2975           For example, in parsing the double-quoted string "ab\x63$d", it would
2976           stop at the '$' and return an OP_CONST containing 'abc'.
2977            
2978           It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2979           processing a pattern (PL_lex_inpat is true), a transliteration
2980           (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2981            
2982           Returns a pointer to the character scanned up to. If this is
2983           advanced from the start pointer supplied (i.e. if anything was
2984           successfully parsed), will leave an OP_CONST for the substring scanned
2985           in pl_yylval. Caller must intuit reason for not parsing further
2986           by looking at the next characters herself.
2987            
2988           In patterns:
2989           expand:
2990           \N{FOO} => \N{U+hex_for_character_FOO}
2991           (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2992            
2993           pass through:
2994           all other \-char, including \N and \N{ apart from \N{ABC}
2995            
2996           stops on:
2997           @ and $ where it appears to be a var, but not for $ as tail anchor
2998           \l \L \u \U \Q \E
2999           (?{ or (??{
3000            
3001            
3002           In transliterations:
3003           characters are VERY literal, except for - not at the start or end
3004           of the string, which indicates a range. If the range is in bytes,
3005           scan_const expands the range to the full set of intermediate
3006           characters. If the range is in utf8, the hyphen is replaced with
3007           a certain range mark which will be handled by pmtrans() in op.c.
3008            
3009           In double-quoted strings:
3010           backslashes:
3011           double-quoted style: \r and \n
3012           constants: \x31, etc.
3013           deprecated backrefs: \1 (in substitution replacements)
3014           case and quoting: \U \Q \E
3015           stops on @ and $
3016            
3017           scan_const does *not* construct ops to handle interpolated strings.
3018           It stops processing as soon as it finds an embedded $ or @ variable
3019           and leaves it to the caller to work out what's going on.
3020            
3021           embedded arrays (whether in pattern or not) could be:
3022           @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3023            
3024           $ in double-quoted strings must be the symbol of an embedded scalar.
3025            
3026           $ in pattern could be $foo or could be tail anchor. Assumption:
3027           it's a tail anchor if $ is the last thing in the string, or if it's
3028           followed by one of "()| \r\n\t"
3029            
3030           \1 (backreferences) are turned into $1 in substitutions
3031            
3032           The structure of the code is
3033           while (there's a character to process) {
3034           handle transliteration ranges
3035           skip regexp comments /(?#comment)/ and codes /(?{code})/
3036           skip #-initiated comments in //x patterns
3037           check for embedded arrays
3038           check for embedded scalars
3039           if (backslash) {
3040           deprecate \1 in substitution replacements
3041           handle string-changing backslashes \l \U \Q \E, etc.
3042           switch (what was escaped) {
3043           handle \- in a transliteration (becomes a literal -)
3044           if a pattern and not \N{, go treat as regular character
3045           handle \132 (octal characters)
3046           handle \x15 and \x{1234} (hex characters)
3047           handle \N{name} (named characters, also \N{3,5} in a pattern)
3048           handle \cV (control characters)
3049           handle printf-style backslashes (\f, \r, \n, etc)
3050           } (end switch)
3051           continue
3052           } (end if backslash)
3053           handle regular character
3054           } (end while character to read)
3055          
3056           */
3057            
3058           STATIC char *
3059 22710265         S_scan_const(pTHX_ char *start)
3060           {
3061           dVAR;
3062 22710265         char *send = PL_bufend; /* end of the constant */
3063 22710265         SV *sv = newSV(send - start); /* sv for the constant. See
3064           note below on sizing. */
3065 22710265         char *s = start; /* start of the constant */
3066 22710265         char *d = SvPVX(sv); /* destination for copies */
3067           bool dorange = FALSE; /* are we in a translit range? */
3068           bool didrange = FALSE; /* did we just finish a range? */
3069           bool in_charclass = FALSE; /* within /[...]/ */
3070           bool has_utf8 = FALSE; /* Output constant is UTF8 */
3071 22710265 50       bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
    100        
    50        
    100        
    100        
3072           to be UTF8? But, this can
3073           show as true when the source
3074           isn't utf8, as for example
3075           when it is entirely composed
3076           of hex constants */
3077           SV *res; /* result from charnames */
3078            
3079           /* Note on sizing: The scanned constant is placed into sv, which is
3080           * initialized by newSV() assuming one byte of output for every byte of
3081           * input. This routine expects newSV() to allocate an extra byte for a
3082           * trailing NUL, which this routine will append if it gets to the end of
3083           * the input. There may be more bytes of input than output (eg., \N{LATIN
3084           * CAPITAL LETTER A}), or more output than input if the constant ends up
3085           * recoded to utf8, but each time a construct is found that might increase
3086           * the needed size, SvGROW() is called. Its size parameter each time is
3087           * based on the best guess estimate at the time, namely the length used so
3088           * far, plus the length the current construct will occupy, plus room for
3089           * the trailing NUL, plus one byte for every input byte still unscanned */
3090            
3091 22710265         UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3092           before set */
3093           #ifdef EBCDIC
3094           UV literal_endpoint = 0;
3095           bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
3096           #endif
3097            
3098           PERL_ARGS_ASSERT_SCAN_CONST;
3099            
3100           assert(PL_lex_inwhat != OP_TRANSR);
3101 22710265 100       if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
    50        
3102           /* If we are doing a trans and we know we want UTF8 set expectation */
3103 135970         has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3104 135970 100       this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3105           }
3106            
3107           /* Protect sv from errors and fatal warnings. */
3108 22710265         ENTER_with_name("scan_const");
3109 22710265         SAVEFREESV(sv);
3110            
3111 258448946 100       while (s < send || dorange) {
    100        
3112            
3113           /* get transliterations out of the way (they're most literal) */
3114 234247528 100       if (PL_lex_inwhat == OP_TRANS) {
3115           /* expand a range A-Z to the full set of characters. AIE! */
3116 327926 100       if (dorange) {
3117           I32 i; /* current expanded character */
3118           I32 min; /* first character in range */
3119           I32 max; /* last character in range */
3120            
3121           #ifdef EBCDIC
3122           UV uvmax = 0;
3123           #endif
3124            
3125 58448 100       if (has_utf8
3126           #ifdef EBCDIC
3127           && !native_range
3128           #endif
3129           ) {
3130 8         char * const c = (char*)utf8_hop((U8*)d, -1);
3131 8         char *e = d++;
3132 30 100       while (e-- > c)
3133 18         *(e + 1) = *e;
3134 8         *c = (char) ILLEGAL_UTF8_BYTE;
3135           /* mark the range as done, and continue */
3136           dorange = FALSE;
3137           didrange = TRUE;
3138 8         continue;
3139           }
3140            
3141 58440         i = d - SvPVX_const(sv); /* remember current offset */
3142           #ifdef EBCDIC
3143           SvGROW(sv,
3144           SvLEN(sv) + (has_utf8 ?
3145           (512 - UTF_CONTINUATION_MARK +
3146           UNISKIP(0x100))
3147           : 256));
3148           /* How many two-byte within 0..255: 128 in UTF-8,
3149           * 96 in UTF-8-mod. */
3150           #else
3151 58440 50       SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
    50        
3152           #endif
3153 58440         d = SvPVX(sv) + i; /* refresh d after realloc */
3154           #ifdef EBCDIC
3155           if (has_utf8) {
3156           int j;
3157           for (j = 0; j <= 1; j++) {
3158           char * const c = (char*)utf8_hop((U8*)d, -1);
3159           const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3160           if (j)
3161           min = (U8)uv;
3162           else if (uv < 256)
3163           max = (U8)uv;
3164           else {
3165           max = (U8)0xff; /* only to \xff */
3166           uvmax = uv; /* \x{100} to uvmax */
3167           }
3168           d = c; /* eat endpoint chars */
3169           }
3170           }
3171           else {
3172           #endif
3173 58440         d -= 2; /* eat the first char and the - */
3174 58440         min = (U8)*d; /* first char in range */
3175 58440         max = (U8)d[1]; /* last char in range */
3176           #ifdef EBCDIC
3177           }
3178           #endif
3179            
3180 58440 100       if (min > max) {
3181 9         Perl_croak(aTHX_
3182           "Invalid range \"%c-%c\" in transliteration operator",
3183 12         (char)min, (char)max);
3184           }
3185            
3186           #ifdef EBCDIC
3187           if (literal_endpoint == 2 &&
3188           ((isLOWER_A(min) && isLOWER_A(max)) ||
3189           (isUPPER_A(min) && isUPPER_A(max))))
3190           {
3191           for (i = min; i <= max; i++) {
3192           if (isALPHA_A(i))
3193           *d++ = i;
3194           }
3195           }
3196           else
3197           #endif
3198 2732291 100       for (i = min; i <= max; i++)
3199           #ifdef EBCDIC
3200           if (has_utf8) {
3201           append_utf8_from_native_byte(i, &d);
3202           }
3203           else
3204           #endif
3205 2701994         *d++ = (char)i;
3206          
3207           #ifdef EBCDIC
3208           if (uvmax) {
3209           d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3210           if (uvmax > 0x101)
3211           *d++ = (char) ILLEGAL_UTF8_BYTE;
3212           if (uvmax > 0x100)
3213           d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3214           }
3215           #endif
3216            
3217           /* mark the range as done, and continue */
3218           dorange = FALSE;
3219           didrange = TRUE;
3220           #ifdef EBCDIC
3221           literal_endpoint = 0;
3222           #endif
3223 58434         continue;
3224           }
3225            
3226           /* range begins (ignore - as first or last char) */
3227 269478 100       else if (*s == '-' && s+1 < send && s != start) {
    100        
3228 58540 100       if (didrange) {
3229 6         Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3230           }
3231 58534 100       if (has_utf8
3232           #ifdef EBCDIC
3233           && !native_range
3234           #endif
3235           ) {
3236 86         *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */
3237 86         s++;
3238 86         continue;
3239           }
3240           dorange = TRUE;
3241 58448         s++;
3242           }
3243           else {
3244           didrange = FALSE;
3245           #ifdef EBCDIC
3246           literal_endpoint = 0;
3247           native_range = TRUE;
3248           #endif
3249           }
3250           }
3251            
3252           /* if we get here, we're not doing a transliteration */
3253            
3254 233919602 100       else if (*s == '[' && PL_lex_inpat && !in_charclass) {
    100        
    100        
3255 1812839         char *s1 = s-1;
3256           int esc = 0;
3257 2721771 100       while (s1 >= start && *s1-- == '\\')
    100        
3258 41752         esc = !esc;
3259 1812839 100       if (!esc)
3260           in_charclass = TRUE;
3261           }
3262            
3263 232106763 100       else if (*s == ']' && PL_lex_inpat && in_charclass) {
    100        
    100        
3264 1807945         char *s1 = s-1;
3265           int esc = 0;
3266 2882018 50       while (s1 >= start && *s1-- == '\\')
    100        
3267 209340         esc = !esc;
3268 1807945 100       if (!esc)
3269           in_charclass = FALSE;
3270           }
3271            
3272           /* skip for regexp comments /(?#comment)/, except for the last
3273           * char, which will be done separately.
3274           * Stop on (?{..}) and friends */
3275            
3276 230298818 100       else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
    100        
    100        
    100        
3277 791588 100       if (s[2] == '#') {
3278 3048 100       while (s+1 < send && *s != ')')
    100        
3279 2608         *d++ = *s++;
3280           }
3281 1173025 100       else if (!PL_lex_casemods &&
    100        
3282 791112         ( s[2] == '{' /* This should match regcomp.c */
3283 788696 100       || (s[2] == '?' && s[3] == '{')))
    50        
3284           {
3285           break;
3286           }
3287           }
3288            
3289           /* likewise skip #-initiated comments in //x patterns */
3290 229616858 100       else if (*s == '#' && PL_lex_inpat && !in_charclass &&
    100        
    100        
    100        
3291 232936         ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3292 4351078 100       while (s+1 < send && *s != '\n')
    100        
3293 4146494         *d++ = *s++;
3294           }
3295            
3296           /* no further processing of single-quoted regex */
3297 229302646 100       else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
    100        
3298           goto default_action;
3299            
3300           /* check for embedded arrays
3301           (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3302           */
3303 228531966 100       else if (*s == '@' && s[1]) {
    100        
3304 248862 100       if (isWORDCHAR_lazy_if(s+1,UTF))
    50        
    100        
    50        
    50        
    100        
    50        
    0        
    0        
    0        
    100        
3305           break;
3306 64220 100       if (strchr(":'{$", s[1]))
3307           break;
3308 37642 100       if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
    100        
3309           break; /* in regexp, neither @+ nor @- are interpolated */
3310           }
3311            
3312           /* check for embedded scalars. only stop if we're sure it's a
3313           variable.
3314           */
3315 228283104 100       else if (*s == '$') {
3316 10113463 100       if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3317           break;
3318 1486465 100       if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
    100        
3319 405558 100       if (s[1] == '\\') {
3320 12         Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3321           "Possible unintended interpolation of $\\ in regex");
3322           }
3323           break; /* in regexp, $ might be tail anchor */
3324           }
3325           }
3326            
3327           /* End of else if chain - OP_TRANS rejoin rest */
3328            
3329           /* backslashes */
3330 224163272 100       if (*s == '\\' && s+1 < send) {
    100        
3331           char* e; /* Can be used for ending '}', etc. */
3332            
3333 44060493         s++;
3334            
3335           /* warn on \1 - \9 in substitution replacements, but note that \11
3336           * is an octal; and \19 is \1 followed by '9' */
3337 44255036 100       if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
    100        
    100        
3338 198671 100       isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
    100        
3339           {
3340 10         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3341 6         *--s = '$';
3342 6         break;
3343           }
3344            
3345           /* string-change backslash escapes */
3346 44060483 100       if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
    100        
    100        
3347 200626         --s;
3348 200626         break;
3349           }
3350           /* In a pattern, process \N, but skip any other backslash escapes.
3351           * This is because we don't want to translate an escape sequence
3352           * into a meta symbol and have the regex compiler use the meta
3353           * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3354           * in spite of this, we do have to process \N here while the proper
3355           * charnames handler is in scope. See bugs #56444 and #62056.
3356           * There is a complication because \N in a pattern may also stand
3357           * for 'match a non-nl', and not mean a charname, in which case its
3358           * processing should be deferred to the regex compiler. To be a
3359           * charname it must be followed immediately by a '{', and not look
3360           * like \N followed by a curly quantifier, i.e., not something like
3361           * \N{3,}. regcurly returns a boolean indicating if it is a legal
3362           * quantifier */
3363 43859857 100       else if (PL_lex_inpat
3364 5740825 100       && (*s != 'N'
3365 1068 100       || s[1] != '{'
3366 590 100       || regcurly(s + 1, FALSE)))
3367           {
3368 5740239         *d++ = '\\';
3369 5740239         goto default_action;
3370           }
3371            
3372 38119618         switch (*s) {
3373            
3374           /* quoted - in transliterations */
3375           case '-':
3376 5306 100       if (PL_lex_inwhat == OP_TRANS) {
3377 532         *d++ = *s++;
3378 532         continue;
3379           }
3380           /* FALL THROUGH */
3381           default:
3382           {
3383 1678285 100       if ((isALPHANUMERIC(*s)))
3384 10         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3385           "Unrecognized escape \\%c passed through",
3386 10         *s);
3387           /* default action is to copy the quoted character */
3388           goto default_action;
3389           }
3390            
3391           /* eg. \132 indicates the octal constant 0132 */
3392           case '0': case '1': case '2': case '3':
3393           case '4': case '5': case '6': case '7':
3394           {
3395 317094         I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3396 317094         STRLEN len = 3;
3397 317094         uv = grok_oct(s, &len, &flags, NULL);
3398 317094         s += len;
3399 317094 100       if (len < 3 && s < send && isDIGIT(*s)
    50        
3400 0 0       && ckWARN(WARN_MISC))
3401           {
3402 0         Perl_warner(aTHX_ packWARN(WARN_MISC),
3403           "%s", form_short_octal_warning(s, len));
3404           }
3405           }
3406           goto NUM_ESCAPE_INSERT;
3407            
3408           /* eg. \o{24} indicates the octal constant \024 */
3409           case 'o':
3410           {
3411           const char* error;
3412            
3413 64 50       bool valid = grok_bslash_o(&s, &uv, &error,
    50        
    0        
    50        
    50        
3414           TRUE, /* Output warning */
3415           FALSE, /* Not strict */
3416           TRUE, /* Output warnings for
3417           non-portables */
3418           UTF);
3419 60 100       if (! valid) {
3420 50         yyerror(error);
3421 46         continue;
3422           }
3423           goto NUM_ESCAPE_INSERT;
3424           }
3425            
3426           /* eg. \x24 indicates the hex constant 0x24 */
3427           case 'x':
3428           {
3429           const char* error;
3430            
3431           bool valid = grok_bslash_x(&s, &uv, &error,
3432           TRUE, /* Output warning */
3433           FALSE, /* Not strict */
3434           TRUE, /* Output warnings for
3435           non-portables */
3436           UTF);
3437 32465268 100       if (! valid) {
3438 42         yyerror(error);
3439 38         continue;
3440           }
3441           }
3442            
3443           NUM_ESCAPE_INSERT:
3444           /* Insert oct or hex escaped character. There will always be
3445           * enough room in sv since such escapes will be longer than any
3446           * UTF-8 sequence they can end up as, except if they force us
3447           * to recode the rest of the string into utf8 */
3448          
3449           /* Here uv is the ordinal of the next character being added */
3450 32782330 100       if (!NATIVE_IS_INVARIANT(uv)) {
3451 1122234 100       if (!has_utf8 && uv > 255) {
    100        
3452           /* Might need to recode whatever we have accumulated so
3453           * far if it contains any chars variant in utf8 or
3454           * utf-ebcdic. */
3455          
3456 264432         SvCUR_set(sv, d - SvPVX_const(sv));
3457 264432         SvPOK_on(sv);
3458 264432         *d = '\0';
3459           /* See Note on sizing above. */
3460 264432 50       sv_utf8_upgrade_flags_grow(sv,
    100        
    100        
    100        
    100        
    50        
    100        
3461           SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3462           UNISKIP(uv) + (STRLEN)(send - s) + 1);
3463 264432         d = SvPVX(sv) + SvCUR(sv);
3464           has_utf8 = TRUE;
3465           }
3466            
3467 1122234 100       if (has_utf8) {
3468 300990         d = (char*)uvchr_to_utf8((U8*)d, uv);
3469 301066 100       if (PL_lex_inwhat == OP_TRANS &&
    50        
3470 152         PL_sublex_info.sub_op) {
3471 228 100       PL_sublex_info.sub_op->op_private |=
3472 152         (PL_lex_repl ? OPpTRANS_FROM_UTF
3473           : OPpTRANS_TO_UTF);
3474           }
3475           #ifdef EBCDIC
3476           if (uv > 255 && !dorange)
3477           native_range = FALSE;
3478           #endif
3479           }
3480           else {
3481 821244         *d++ = (char)uv;
3482           }
3483           }
3484           else {
3485 31660096         *d++ = (char) uv;
3486           }
3487 32782330         continue;
3488            
3489           case 'N':
3490           /* In a non-pattern \N must be a named character, like \N{LATIN
3491           * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3492           * mean to match a non-newline. For non-patterns, named
3493           * characters are converted to their string equivalents. In
3494           * patterns, named characters are not converted to their
3495           * ultimate forms for the same reasons that other escapes
3496           * aren't. Instead, they are converted to the \N{U+...} form
3497           * to get the value from the charnames that is in effect right
3498           * now, while preserving the fact that it was a named character
3499           * so that the regex compiler knows this */
3500            
3501           /* The structure of this section of code (besides checking for
3502           * errors and upgrading to utf8) is:
3503           * Further disambiguate between the two meanings of \N, and if
3504           * not a charname, go process it elsewhere
3505           * If of form \N{U+...}, pass it through if a pattern;
3506           * otherwise convert to utf8
3507           * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3508           * pattern; otherwise convert to utf8 */
3509            
3510           /* Here, s points to the 'N'; the test below is guaranteed to
3511           * succeed if we are being called on a pattern as we already
3512           * know from a test above that the next character is a '{'.
3513           * On a non-pattern \N must mean 'named sequence, which
3514           * requires braces */
3515 3180         s++;
3516 3180 100       if (*s != '{') {
3517 42         yyerror("Missing braces on \\N{}");
3518 38         continue;
3519           }
3520 3138         s++;
3521            
3522           /* If there is no matching '}', it is an error. */
3523 3138 100       if (! (e = strchr(s, '}'))) {
3524 56 100       if (! PL_lex_inpat) {
3525 42         yyerror("Missing right brace on \\N{}");
3526           } else {
3527 14         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3528           }
3529 52         continue;
3530           }
3531            
3532           /* Here it looks like a named character */
3533            
3534 3082 100       if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
    100        
3535 504         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3536           | PERL_SCAN_DISALLOW_PREFIX;
3537           STRLEN len;
3538            
3539           /* For \N{U+...}, the '...' is a unicode value even on
3540           * EBCDIC machines */
3541 504         s += 2; /* Skip to next char after the 'U+' */
3542 504         len = e - s;
3543 504         uv = grok_hex(s, &len, &flags, NULL);
3544 448 100       if (len == 0 || len != (STRLEN)(e - s)) {
    50        
3545 82         yyerror("Invalid hexadecimal number in \\N{U+...}");
3546 78         s = e + 1;
3547 78         continue;
3548           }
3549            
3550 366 100       if (PL_lex_inpat) {
3551            
3552           /* On non-EBCDIC platforms, pass through to the regex
3553           * compiler unchanged. The reason we evaluated the
3554           * number above is to make sure there wasn't a syntax
3555           * error. But on EBCDIC we convert to native so
3556           * downstream code can continue to assume it's native
3557           */
3558 284         s -= 5; /* Include the '\N{U+' */
3559           #ifdef EBCDIC
3560           d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3561           and the \0 */
3562           "\\N{U+%X}",
3563           (unsigned int) UNI_TO_NATIVE(uv));
3564           #else
3565 284         Copy(s, d, e - s + 1, char); /* 1 = include the } */
3566 284         d += e - s + 1;
3567           #endif
3568           }
3569           else { /* Not a pattern: convert the hex to string */
3570            
3571           /* If destination is not in utf8, unconditionally
3572           * recode it to be so. This is because \N{} implies
3573           * Unicode semantics, and scalars have to be in utf8
3574           * to guarantee those semantics */
3575 82 100       if (! has_utf8) {
3576 56         SvCUR_set(sv, d - SvPVX_const(sv));
3577 56         SvPOK_on(sv);
3578 56         *d = '\0';
3579           /* See Note on sizing above. */
3580 56 50       sv_utf8_upgrade_flags_grow(
    100        
    100        
    50        
    0        
    0        
    0        
3581           sv,
3582           SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3583           UNISKIP(uv) + (STRLEN)(send - e) + 1);
3584 56         d = SvPVX(sv) + SvCUR(sv);
3585           has_utf8 = TRUE;
3586           }
3587            
3588           /* Add the (Unicode) code point to the output. */
3589 82 100       if (UNI_IS_INVARIANT(uv)) {
3590 8         *d++ = (char) LATIN1_TO_NATIVE(uv);
3591           }
3592           else {
3593 74         d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3594           }
3595           }
3596           }
3597           else /* Here is \N{NAME} but not \N{U+...}. */
3598 2578 100       if ((res = get_and_check_backslash_N_name(s, e)))
3599           {
3600           STRLEN len;
3601 2436 50       const char *str = SvPV_const(res, len);
3602 2436 100       if (PL_lex_inpat) {
3603            
3604 172 100       if (! len) { /* The name resolved to an empty string */
3605 12         Copy("\\N{}", d, 4, char);
3606 12         d += 4;
3607           }
3608           else {
3609           /* In order to not lose information for the regex
3610           * compiler, pass the result in the specially made
3611           * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3612           * the code points in hex of each character
3613           * returned by charnames */
3614            
3615 160         const char *str_end = str + len;
3616 160         const STRLEN off = d - SvPVX_const(sv);
3617            
3618 160 100       if (! SvUTF8(res)) {
3619           /* For the non-UTF-8 case, we can determine the
3620           * exact length needed without having to parse
3621           * through the string. Each character takes up
3622           * 2 hex digits plus either a trailing dot or
3623           * the "}" */
3624 22 50       d = off + SvGROW(sv, off
    100        
3625           + 3 * len
3626           + 6 /* For the "\N{U+", and
3627           trailing NUL */
3628           + (STRLEN)(send - e));
3629 22         Copy("\\N{U+", d, 5, char);
3630 22         d += 5;
3631 1603 100       while (str < str_end) {
3632           char hex_string[4];
3633 2355 50       my_snprintf(hex_string, sizeof(hex_string),
3634           "%02X.", (U8) *str);
3635 1570         Copy(hex_string, d, 3, char);
3636 1570         d += 3;
3637 1570         str++;
3638           }
3639 22         d--; /* We will overwrite below the final
3640           dot with a right brace */
3641           }
3642           else {
3643           STRLEN char_length; /* cur char's byte length */
3644            
3645           /* and the number of bytes after this is
3646           * translated into hex digits */
3647           STRLEN output_length;
3648            
3649           /* 2 hex per byte; 2 chars for '\N'; 2 chars
3650           * for max('U+', '.'); and 1 for NUL */
3651           char hex_string[2 * UTF8_MAXBYTES + 5];
3652            
3653           /* Get the first character of the result. */
3654 138         U32 uv = utf8n_to_uvchr((U8 *) str,
3655           len,
3656           &char_length,
3657           UTF8_ALLOW_ANYUV);
3658           /* Convert first code point to hex, including
3659           * the boiler plate before it. */
3660 138         output_length =
3661 138 50       my_snprintf(hex_string, sizeof(hex_string),
3662           "\\N{U+%X",
3663           (unsigned int) uv);
3664            
3665           /* Make sure there is enough space to hold it */
3666 138 50       d = off + SvGROW(sv, off
    50        
3667           + output_length
3668           + (STRLEN)(send - e)
3669           + 2); /* '}' + NUL */
3670           /* And output it */
3671 138         Copy(hex_string, d, output_length, char);
3672 138         d += output_length;
3673            
3674           /* For each subsequent character, append dot and
3675           * its ordinal in hex */
3676 211 100       while ((str += char_length) < str_end) {
3677 4         const STRLEN off = d - SvPVX_const(sv);
3678 4         U32 uv = utf8n_to_uvchr((U8 *) str,
3679           str_end - str,
3680           &char_length,
3681           UTF8_ALLOW_ANYUV);
3682 4         output_length =
3683 4 50       my_snprintf(hex_string,
3684           sizeof(hex_string),
3685           ".%X",
3686           (unsigned int) uv);
3687            
3688 4 50       d = off + SvGROW(sv, off
    50        
3689           + output_length
3690           + (STRLEN)(send - e)
3691           + 2); /* '}' + NUL */
3692 4         Copy(hex_string, d, output_length, char);
3693 4         d += output_length;
3694           }
3695           }
3696            
3697 160         *d++ = '}'; /* Done. Add the trailing brace */
3698           }
3699           }
3700           else { /* Here, not in a pattern. Convert the name to a
3701           * string. */
3702            
3703           /* If destination is not in utf8, unconditionally
3704           * recode it to be so. This is because \N{} implies
3705           * Unicode semantics, and scalars have to be in utf8
3706           * to guarantee those semantics */
3707 2264 100       if (! has_utf8) {
3708 2232         SvCUR_set(sv, d - SvPVX_const(sv));
3709 2232         SvPOK_on(sv);
3710 2232         *d = '\0';
3711           /* See Note on sizing above. */
3712 2232         sv_utf8_upgrade_flags_grow(sv,
3713           SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3714           len + (STRLEN)(send - s) + 1);
3715 2232         d = SvPVX(sv) + SvCUR(sv);
3716           has_utf8 = TRUE;
3717 32 50       } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3718            
3719           /* See Note on sizing above. (NOTE: SvCUR() is not
3720           * set correctly here). */
3721 0         const STRLEN off = d - SvPVX_const(sv);
3722 0 0       d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
    0        
3723           }
3724 2264         Copy(str, d, len, char);
3725 2264         d += len;
3726           }
3727            
3728 2436         SvREFCNT_dec(res);
3729            
3730           } /* End \N{NAME} */
3731           #ifdef EBCDIC
3732           if (!dorange)
3733           native_range = FALSE; /* \N{} is defined to be Unicode */
3734           #endif
3735 2932         s = e + 1; /* Point to just after the '}' */
3736 2932         continue;
3737            
3738           /* \c is a control character */
3739           case 'c':
3740 231466         s++;
3741 231466 100       if (s < send) {
3742 231424         *d++ = grok_bslash_c(*s++, has_utf8, 1);
3743           }
3744           else {
3745 42         yyerror("Missing control char name in \\c");
3746           }
3747 231454         continue;
3748            
3749           /* printf-style backslashes, formfeeds, newlines, etc */
3750           case 'b':
3751 77326         *d++ = '\b';
3752 77326         break;
3753           case 'n':
3754 2602192         *d++ = '\n';
3755 2602192         break;
3756           case 'r':
3757 25598         *d++ = '\r';
3758 25598         break;
3759           case 'f':
3760 59390         *d++ = '\f';
3761 59390         break;
3762           case 't':
3763 632333         *d++ = '\t';
3764 632333         break;
3765           case 'e':
3766 11906         *d++ = ASCII_TO_NATIVE('\033');
3767 11906         break;
3768           case 'a':
3769 14984         *d++ = '\a';
3770 14984         break;
3771           } /* end switch */
3772            
3773 3423729         s++;
3774 3423729         continue;
3775           } /* end if (backslash) */
3776           #ifdef EBCDIC
3777           else
3778           literal_endpoint++;
3779           #endif
3780            
3781           default_action:
3782           /* If we started with encoded form, or already know we want it,
3783           then encode the next character */
3784 188335713 100       if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
    100        
    100        
3785 43734         STRLEN len = 1;
3786            
3787            
3788           /* One might think that it is wasted effort in the case of the
3789           * source being utf8 (this_utf8 == TRUE) to take the next character
3790           * in the source, convert it to an unsigned value, and then convert
3791           * it back again. But the source has not been validated here. The
3792           * routine that does the conversion checks for errors like
3793           * malformed utf8 */
3794            
3795           const UV nextuv = (this_utf8)
3796 43730         ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3797 65599 100       : (UV) ((U8) *s);
3798 43734 100       const STRLEN need = UNISKIP(nextuv);
    100        
    100        
    50        
    0        
    0        
    0        
3799 43734 100       if (!has_utf8) {
3800 38318         SvCUR_set(sv, d - SvPVX_const(sv));
3801 38318         SvPOK_on(sv);
3802 38318         *d = '\0';
3803           /* See Note on sizing above. */
3804 38318         sv_utf8_upgrade_flags_grow(sv,
3805           SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3806           need + (STRLEN)(send - s) + 1);
3807 38318         d = SvPVX(sv) + SvCUR(sv);
3808           has_utf8 = TRUE;
3809 5416 100       } else if (need > len) {
3810           /* encoded value larger than old, may need extra space (NOTE:
3811           * SvCUR() is not set correctly here). See Note on sizing
3812           * above. */
3813 4         const STRLEN off = d - SvPVX_const(sv);
3814 4 50       d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
    50        
3815           }
3816 43734         s += len;
3817            
3818 43734         d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3819           #ifdef EBCDIC
3820           if (uv > 255 && !dorange)
3821           native_range = FALSE;
3822           #endif
3823           }
3824           else {
3825 206923360         *d++ = *s++;
3826           }
3827           } /* while loop to process each character */
3828            
3829           /* terminate the string and set up the sv */
3830 22710137         *d = '\0';
3831 22710137         SvCUR_set(sv, d - SvPVX_const(sv));
3832 22710137 50       if (SvCUR(sv) >= SvLEN(sv))
3833 0         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3834 0         " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3835            
3836 22710137         SvPOK_on(sv);
3837 22710137 100       if (PL_encoding && !has_utf8) {
    100        
3838 732         sv_recode_to_utf8(sv, PL_encoding);
3839 732 50       if (SvUTF8(sv))
3840           has_utf8 = TRUE;
3841           }
3842 22710137 100       if (has_utf8) {
3843 305900         SvUTF8_on(sv);
3844 305900 100       if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
    50        
3845 324 100       PL_sublex_info.sub_op->op_private |=
3846 216         (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3847           }
3848           }
3849            
3850           /* shrink the sv if we allocated more than we used */
3851 22710137 100       if (SvCUR(sv) + 5 < SvLEN(sv)) {
3852 20928118         SvPV_shrink_to_cur(sv);
3853           }
3854            
3855           /* return the substring (via pl_yylval) only if we parsed anything */
3856 22710137 100       if (s > PL_bufptr) {
3857 19981035         char *s2 = PL_bufptr;
3858 359563858 100       for (; s2 < s; s2++) {
3859 339582823 100       if (*s2 == '\n')
3860 1632227 100       COPLINE_INC_WITH_HERELINES;
3861           }
3862 19981035         SvREFCNT_inc_simple_void_NN(sv);
3863 19981035 100       if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
    100        
3864 118 100       && ! PL_parser->lex_re_reparsing)
3865           {
3866 86 100       const char *const key = PL_lex_inpat ? "qr" : "q";
3867 86 100       const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3868           const char *type;
3869           STRLEN typelen;
3870            
3871 86 100       if (PL_lex_inwhat == OP_TRANS) {
3872           type = "tr";
3873           typelen = 2;
3874 82 100       } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
    50        
3875           type = "s";
3876           typelen = 1;
3877 78 100       } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
    100        
3878           type = "q";
3879           typelen = 1;
3880           } else {
3881           type = "qq";
3882           typelen = 2;
3883           }
3884            
3885 86         sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3886           type, typelen);
3887           }
3888 19981035         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3889           }
3890 22710137         LEAVE_with_name("scan_const");
3891 22710137         return s;
3892           }
3893            
3894           /* S_intuit_more
3895           * Returns TRUE if there's more to the expression (e.g., a subscript),
3896           * FALSE otherwise.
3897           *
3898           * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3899           *
3900           * ->[ and ->{ return TRUE
3901           * { and [ outside a pattern are always subscripts, so return TRUE
3902           * if we're outside a pattern and it's not { or [, then return FALSE
3903           * if we're in a pattern and the first char is a {
3904           * {4,5} (any digits around the comma) returns FALSE
3905           * if we're in a pattern and the first char is a [
3906           * [] returns FALSE
3907           * [SOMETHING] has a funky algorithm to decide whether it's a
3908           * character class or not. It has to deal with things like
3909           * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3910           * anything else returns TRUE
3911           */
3912            
3913           /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3914            
3915           STATIC int
3916 136239962         S_intuit_more(pTHX_ char *s)
3917           {
3918           dVAR;
3919            
3920           PERL_ARGS_ASSERT_INTUIT_MORE;
3921            
3922 136239962 100       if (PL_lex_brackets)
3923           return TRUE;
3924 23666468 100       if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
    100        
    100        
3925           return TRUE;
3926 22970064 100       if (*s != '{' && *s != '[')
3927           return FALSE;
3928 1521148 100       if (!PL_lex_inpat)
3929           return TRUE;
3930            
3931           /* In a pattern, so maybe we have {n,m}. */
3932 20826 100       if (*s == '{') {
3933 13974 100       if (regcurly(s, FALSE)) {
3934           return FALSE;
3935           }
3936 7942         return TRUE;
3937           }
3938            
3939           /* On the other hand, maybe we have a character class */
3940            
3941 6852         s++;
3942 6852 100       if (*s == ']' || *s == '^')
3943           return FALSE;
3944           else {
3945           /* this is terrifying, and it works */
3946           int weight;
3947           char seen[256];
3948 6792         const char * const send = strchr(s,']');
3949           unsigned char un_char, last_un_char;
3950           char tmpbuf[sizeof PL_tokenbuf * 4];
3951            
3952 6792 50       if (!send) /* has to be an expression */
3953           return TRUE;
3954           weight = 2; /* let's weigh the evidence */
3955            
3956 6792 100       if (*s == '$')
3957           weight -= 3;
3958 6452 100       else if (isDIGIT(*s)) {
3959 5916 50       if (s[1] != ']') {
3960 0 0       if (isDIGIT(s[1]) && s[2] == ']')
    0        
3961           weight -= 10;
3962           }
3963           else
3964           weight -= 100;
3965           }
3966           Zero(seen,256,char);
3967           un_char = 255;
3968 16812 100       for (; s < send; s++) {
3969           last_un_char = un_char;
3970 10020         un_char = (unsigned char)*s;
3971 10020         switch (*s) {
3972           case '@':
3973           case '&':
3974           case '$':
3975 340         weight -= seen[un_char] * 10;
3976 340 50       if (isWORDCHAR_lazy_if(s+1,UTF)) {
    50        
    50        
    0        
    50        
    50        
    0        
    0        
    0        
    0        
    50        
3977           int len;
3978 340         scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3979 340         len = (int)strlen(tmpbuf);
3980 340 100       if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
    50        
    50        
    0        
    50        
    50        
    50        
3981           UTF ? SVf_UTF8 : 0, SVt_PV))
3982 0         weight -= 100;
3983           else
3984 340         weight -= 10;
3985           }
3986 0 0       else if (*s == '$' && s[1] &&
    0        
    0        
3987 0         strchr("[#!%*<>()-=",s[1])) {
3988 0 0       if (/*{*/ strchr("])} =",s[2]))
3989 0         weight -= 10;
3990           else
3991 0         weight -= 1;
3992           }
3993           break;
3994           case '\\':
3995           un_char = 254;
3996 1044 50       if (s[1]) {
3997 1044 100       if (strchr("wds]",s[1]))
3998 4         weight += 100;
3999 1040 50       else if (seen[(U8)'\''] || seen[(U8)'"'])
    50        
4000 0         weight += 1;
4001 1040 50       else if (strchr("rnftbxcav",s[1]))
4002 0         weight += 40;
4003 1040 50       else if (isDIGIT(s[1])) {
4004 0         weight += 40;
4005 0 0       while (s[1] && isDIGIT(s[1]))
    0        
4006 0         s++;
4007           }
4008           }
4009           else
4010 0         weight += 100;
4011           break;
4012           case '-':
4013 12 50       if (s[1] == '\\')
4014 0         weight += 50;
4015 12 100       if (strchr("aA01! ",last_un_char))
4016 4         weight += 30;
4017 12 100       if (strchr("zZ79~",s[1]))
4018 4         weight += 30;
4019 12 100       if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
    50        
    0        
4020 8         weight -= 5; /* cope with negative subscript */
4021           break;
4022           default:
4023 8624 100       if (!isWORDCHAR(last_un_char)
4024 7308 100       && !(last_un_char == '$' || last_un_char == '@'
    50        
4025           || last_un_char == '&')
4026 6968 100       && isALPHA(*s) && s[1] && isALPHA(s[1])) {
    50        
    50        
4027           char *d = tmpbuf;
4028 0 0       while (isALPHA(*s))
4029 0         *d++ = *s++;
4030 0         *d = '\0';
4031 0 0       if (keyword(tmpbuf, d - tmpbuf, 0))
4032 0         weight -= 150;
4033           }
4034 8624 50       if (un_char == last_un_char + 1)
4035 0         weight += 5;
4036 8624         weight -= seen[un_char];
4037 8624         break;
4038           }
4039 10020         seen[un_char]++;
4040           }
4041 6792 100       if (weight >= 0) /* probably a character class */
4042           return FALSE;
4043           }
4044            
4045 70304664         return TRUE;
4046           }
4047            
4048           /*
4049           * S_intuit_method
4050           *
4051           * Does all the checking to disambiguate
4052           * foo bar
4053           * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4054           * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4055           *
4056           * First argument is the stuff after the first token, e.g. "bar".
4057           *
4058           * Not a method if foo is a filehandle.
4059           * Not a method if foo is a subroutine prototyped to take a filehandle.
4060           * Not a method if it's really "Foo $bar"
4061           * Method if it's "foo $bar"
4062           * Not a method if it's really "print foo $bar"
4063           * Method if it's really "foo package::" (interpreted as package->foo)
4064           * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4065           * Not a method if bar is a filehandle or package, but is quoted with
4066           * =>
4067           */
4068            
4069           STATIC int
4070 765059         S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
4071           {
4072           dVAR;
4073 765059         char *s = start + (*start == '$');
4074           char tmpbuf[sizeof PL_tokenbuf];
4075           STRLEN len;
4076           GV* indirgv;
4077           #ifdef PERL_MAD
4078           int soff;
4079           #endif
4080            
4081           PERL_ARGS_ASSERT_INTUIT_METHOD;
4082            
4083 765059 100       if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
    50        
    50        
    50        
    50        
    100        
4084           return 0;
4085 683315 100       if (cv && SvPOK(cv)) {
    100        
4086 164088 50       const char *proto = CvPROTO(cv);
    50        
    100        
4087 164088 50       if (proto) {
4088 164212 100       while (*proto && (isSPACE(*proto) || *proto == ';'))
    100        
    100        
4089 124         proto++;
4090 164088 100       if (*proto == '*')
4091           return 0;
4092           }
4093           }
4094            
4095 682693 100       if (*start == '$') {
4096 347732 100       if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
    50        
    0        
    0        
4097 0         isUPPER(*PL_tokenbuf))
4098           return 0;
4099           #ifdef PERL_MAD
4100           len = start - SvPVX(PL_linestr);
4101           #endif
4102 0         s = PEEKSPACE(s);
4103           #ifdef PERL_MAD
4104           start = SvPVX(PL_linestr) + len;
4105           #endif
4106 0         PL_bufptr = start;
4107 0         PL_expect = XREF;
4108 0 0       return *s == '(' ? FUNCMETH : METHOD;
4109           }
4110            
4111 334961         s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4112           /* start is the beginning of the possible filehandle/object,
4113           * and s is the end of it
4114           * tmpbuf is a copy of it (but with single quotes as double colons)
4115           */
4116            
4117 334961 100       if (!keyword(tmpbuf, len, 0)) {
4118 177227 100       if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
    100        
    100        
4119 4         len -= 2;
4120 4         tmpbuf[len] = '\0';
4121           #ifdef PERL_MAD
4122           soff = s - SvPVX(PL_linestr);
4123           #endif
4124 4         goto bare_package;
4125           }
4126 177223 50       indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
    100        
    50        
    50        
    100        
4127 177223 100       if (indirgv && GvCVu(indirgv))
    50        
    100        
4128           return 0;
4129           /* filehandle or package name makes it a method */
4130 165865 100       if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
    100        
    50        
    50        
    50        
    50        
    100        
    50        
    50        
    100        
    100        
4131           #ifdef PERL_MAD
4132           soff = s - SvPVX(PL_linestr);
4133           #endif
4134 92615         s = PEEKSPACE(s);
4135 92615 100       if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
    100        
    50        
4136           return 0; /* no assumptions -- "=>" quotes bareword */
4137           bare_package:
4138           start_force(PL_curforce);
4139 62949         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4140           S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4141 62949         NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4142           if (PL_madskills)
4143           curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4144           ( UTF ? SVf_UTF8 : 0 )));
4145 62949         PL_expect = XTERM;
4146 62949         force_next(WORD);
4147 62949         PL_bufptr = s;
4148           #ifdef PERL_MAD
4149           PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4150           #endif
4151 433261 100       return *s == '(' ? FUNCMETH : METHOD;
4152           }
4153           }
4154           return 0;
4155           }
4156            
4157           /* Encoded script support. filter_add() effectively inserts a
4158           * 'pre-processing' function into the current source input stream.
4159           * Note that the filter function only applies to the current source file
4160           * (e.g., it will not affect files 'require'd or 'use'd by this one).
4161           *
4162           * The datasv parameter (which may be NULL) can be used to pass
4163           * private data to this instance of the filter. The filter function
4164           * can recover the SV using the FILTER_DATA macro and use it to
4165           * store private buffers and state information.
4166           *
4167           * The supplied datasv parameter is upgraded to a PVIO type
4168           * and the IoDIRP/IoANY field is used to store the function pointer,
4169           * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4170           * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4171           * private use must be set using malloc'd pointers.
4172           */
4173            
4174           SV *
4175 19730         Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4176 19728 50       {
4177           dVAR;
4178 19730 50       if (!funcp)
4179           return NULL;