File Coverage

/usr/local/lib/perl5/5.42.0/x86_64-linux/CORE/inline.h
Criterion Covered Total %
statement 8 25 32.0
branch 2 20 10.0
condition n/a
subroutine n/a
pod n/a
total 10 45 22.2


line stmt bran cond sub pod time code
1             /*> inline.h
2             *
3             * Copyright (C) 2012 by Larry Wall and others
4             *
5             * You may distribute under the terms of either the GNU General Public
6             * License or the Artistic License, as specified in the README file.
7             *
8             * This file contains tables and code adapted from
9             * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which requires this
10             * copyright notice:
11              
12             Copyright (c) 2008-2009 Bjoern Hoehrmann
13              
14             Permission is hereby granted, free of charge, to any person obtaining a copy of
15             this software and associated documentation files (the "Software"), to deal in
16             the Software without restriction, including without limitation the rights to
17             use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
18             of the Software, and to permit persons to whom the Software is furnished to do
19             so, subject to the following conditions:
20              
21             The above copyright notice and this permission notice shall be included in all
22             copies or substantial portions of the Software.
23              
24             THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25             IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26             FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27             AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28             LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29             OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30             SOFTWARE.
31              
32             *
33             * This file is a home for static inline functions that cannot go in other
34             * header files, because they depend on proto.h (included after most other
35             * headers) or struct definitions.
36             *
37             * Note also perlstatic.h for functions that can't or shouldn't be inlined, but
38             * whose details should be exposed to the compiler, for such things as tail
39             * call optimization.
40             *
41             * Each section names the header file that the functions "belong" to.
42             */
43              
44             /* ------------------------------- av.h ------------------------------- */
45              
46             /*
47             =for apidoc_section $AV
48             =for apidoc av_count
49             Returns the number of elements in the array C. This is the true length of
50             the array, including any undefined elements. It is always the same as
51             S>.
52              
53             =cut
54             */
55             PERL_STATIC_INLINE Size_t
56             Perl_av_count(pTHX_ AV *av)
57             {
58             PERL_ARGS_ASSERT_AV_COUNT;
59             assert(SvTYPE(av) == SVt_PVAV);
60              
61             return AvFILL(av) + 1;
62             }
63              
64             /* ------------------------------- av.c ------------------------------- */
65              
66             /*
67             =for apidoc av_store_simple
68              
69             This is a cut-down version of av_store that assumes that the array is
70             very straightforward - no magic, not readonly, and AvREAL - and that
71             C is not negative. This function MUST NOT be used in situations
72             where any of those assumptions may not hold.
73              
74             Stores an SV in an array. The array index is specified as C. It
75             can be dereferenced to get the C that was stored there (= C)).
76              
77             Note that the caller is responsible for suitably incrementing the reference
78             count of C before the call.
79              
80             Approximate Perl equivalent: C.
81              
82             =cut
83             */
84              
85             PERL_STATIC_INLINE SV**
86             Perl_av_store_simple(pTHX_ AV *av, SSize_t key, SV *val)
87             {
88             SV** ary;
89              
90             PERL_ARGS_ASSERT_AV_STORE_SIMPLE;
91             assert(SvTYPE(av) == SVt_PVAV);
92             assert(!SvMAGICAL(av));
93             assert(!SvREADONLY(av));
94             assert(AvREAL(av));
95             assert(key > -1);
96              
97             ary = AvARRAY(av);
98              
99             if (AvFILLp(av) < key) {
100             if (key > AvMAX(av)) {
101             av_extend(av,key);
102             ary = AvARRAY(av);
103             }
104             AvFILLp(av) = key;
105             } else
106             SvREFCNT_dec(ary[key]);
107              
108             ary[key] = val;
109             return &ary[key];
110             }
111              
112             /*
113             =for apidoc av_fetch_simple
114              
115             This is a cut-down version of av_fetch that assumes that the array is
116             very straightforward - no magic, not readonly, and AvREAL - and that
117             C is not negative. This function MUST NOT be used in situations
118             where any of those assumptions may not hold.
119              
120             Returns the SV at the specified index in the array. The C is the
121             index. If lval is true, you are guaranteed to get a real SV back (in case
122             it wasn't real before), which you can then modify. Check that the return
123             value is non-null before dereferencing it to a C.
124              
125             The rough perl equivalent is C<$myarray[$key]>.
126              
127             =cut
128             */
129              
130             PERL_STATIC_INLINE SV**
131             Perl_av_fetch_simple(pTHX_ AV *av, SSize_t key, I32 lval)
132             {
133             PERL_ARGS_ASSERT_AV_FETCH_SIMPLE;
134             assert(SvTYPE(av) == SVt_PVAV);
135             assert(!SvMAGICAL(av));
136             assert(!SvREADONLY(av));
137             assert(AvREAL(av));
138             assert(key > -1);
139              
140             if ( (key > AvFILLp(av)) || !AvARRAY(av)[key]) {
141             return lval ? av_store_simple(av,key,newSV_type(SVt_NULL)) : NULL;
142             } else {
143             return &AvARRAY(av)[key];
144             }
145             }
146              
147             PERL_STATIC_INLINE void
148             Perl_av_push_simple(pTHX_ AV *av, SV *val)
149             {
150             PERL_ARGS_ASSERT_AV_PUSH_SIMPLE;
151             assert(SvTYPE(av) == SVt_PVAV);
152             assert(!SvMAGICAL(av));
153             assert(!SvREADONLY(av));
154             assert(AvREAL(av));
155             assert(AvFILLp(av) > -2);
156              
157             (void)av_store_simple(av,AvFILLp(av)+1,val);
158             }
159              
160             /*
161             =for apidoc av_new_alloc
162              
163             This implements L>
164             and L>, which are the public API for this
165             functionality.
166              
167             Creates a new AV and allocates its SV* array.
168              
169             This is similar to, but more efficient than doing:
170              
171             AV *av = newAV();
172             av_extend(av, key);
173              
174             The size parameter is used to pre-allocate a SV* array large enough to
175             hold at least elements C<0..(size-1)>. C must be at least 1.
176              
177             The C parameter controls whether or not the array is NULL
178             initialized.
179              
180             =cut
181             */
182              
183             PERL_STATIC_INLINE AV *
184             Perl_av_new_alloc(pTHX_ SSize_t size, bool zeroflag)
185             {
186             AV * const av = newAV();
187             SV** ary;
188             PERL_ARGS_ASSERT_AV_NEW_ALLOC;
189             assert(size > 0);
190              
191             Newx(ary, size, SV*); /* Newx performs the memwrap check */
192             AvALLOC(av) = ary;
193             AvARRAY(av) = ary;
194             AvMAX(av) = size - 1;
195              
196             if (zeroflag)
197             Zero(ary, size, SV*);
198              
199             return av;
200             }
201              
202              
203             /* remove (AvARRAY(av) - AvALLOC(av)) offset from empty array */
204              
205             PERL_STATIC_INLINE void
206             Perl_av_remove_offset(pTHX_ AV *av)
207             {
208             PERL_ARGS_ASSERT_AV_REMOVE_OFFSET;
209             assert(AvFILLp(av) == -1);
210             SSize_t i = AvARRAY(av) - AvALLOC(av);
211             if (i) {
212             AvARRAY(av) = AvALLOC(av);
213             AvMAX(av) += i;
214             #ifdef PERL_RC_STACK
215             Zero(AvALLOC(av), i, SV*);
216             #endif
217             }
218             }
219              
220              
221             /* ------------------------------- cv.h ------------------------------- */
222              
223             /*
224             =for apidoc_section $CV
225             =for apidoc CvGV
226             Returns the GV associated with the CV C, reifying it if necessary.
227              
228             =cut
229             */
230             PERL_STATIC_INLINE GV *
231             Perl_CvGV(pTHX_ CV *sv)
232             {
233             PERL_ARGS_ASSERT_CVGV;
234              
235             return CvNAMED(sv)
236             ? Perl_cvgv_from_hek(aTHX_ sv)
237             : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
238             }
239              
240             /*
241             =for apidoc CvDEPTH
242             Returns the recursion level of the CV C. Hence >= 2 indicates we are in a
243             recursive call.
244              
245             =cut
246             */
247             PERL_STATIC_INLINE I32 *
248             Perl_CvDEPTH(const CV * const sv)
249             {
250             PERL_ARGS_ASSERT_CVDEPTH;
251             assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
252              
253             return &((XPVCV*)SvANY(sv))->xcv_depth;
254             }
255              
256             /*
257             CvPROTO returns the prototype as stored, which is not necessarily what
258             the interpreter should be using. Specifically, the interpreter assumes
259             that spaces have been stripped, which has been the case if the prototype
260             was added by toke.c, but is generally not the case if it was added elsewhere.
261             Since we can't enforce the spacelessness at assignment time, this routine
262             provides a temporary copy at parse time with spaces removed.
263             I is the start of the original buffer, I is the length of the
264             prototype and will be updated when this returns.
265             */
266              
267             #ifdef PERL_CORE
268             PERL_STATIC_INLINE char *
269             S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
270             {
271             SV * tmpsv;
272             char * tmps;
273             tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
274             tmps = SvPVX(tmpsv);
275             while ((*len)--) {
276             if (!isSPACE(*orig))
277             *tmps++ = *orig;
278             orig++;
279             }
280             *tmps = '\0';
281             *len = tmps - SvPVX(tmpsv);
282             return SvPVX(tmpsv);
283             }
284             #endif
285              
286             /* ------------------------------- iperlsys.h ------------------------------- */
287             #if ! defined(PERL_IMPLICIT_SYS) && defined(USE_ITHREADS)
288              
289             /* Otherwise this function is implemented as macros in iperlsys.h */
290              
291             PERL_STATIC_INLINE bool
292             S_PerlEnv_putenv(pTHX_ char * str)
293             {
294             PERL_ARGS_ASSERT_PERLENV_PUTENV;
295              
296             ENV_LOCK;
297             bool retval = putenv(str);
298             ENV_UNLOCK;
299              
300             return retval;
301             }
302              
303             #endif
304              
305             /* ------------------------------- mg.h ------------------------------- */
306              
307             #if defined(PERL_CORE) || defined(PERL_EXT)
308             /* assumes get-magic and stringification have already occurred */
309             PERL_STATIC_INLINE STRLEN
310             S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
311             {
312             assert(mg->mg_type == PERL_MAGIC_regex_global);
313             assert(mg->mg_len != -1);
314             if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
315             return (STRLEN)mg->mg_len;
316             else {
317             const STRLEN pos = (STRLEN)mg->mg_len;
318             /* Without this check, we may read past the end of the buffer: */
319             if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
320             return sv_or_pv_pos_u2b(sv, s, pos, NULL);
321             }
322             }
323             #endif
324              
325             /* ------------------------------- pad.h ------------------------------ */
326              
327             #if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
328             PERL_STATIC_INLINE bool
329             S_PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
330             {
331             PERL_ARGS_ASSERT_PADNAMEIN_SCOPE;
332              
333             /* is seq within the range _LOW to _HIGH ?
334             * This is complicated by the fact that PL_cop_seqmax
335             * may have wrapped around at some point */
336             if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
337             return FALSE; /* not yet introduced */
338              
339             if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
340             /* in compiling scope */
341             if (
342             (seq > COP_SEQ_RANGE_LOW(pn))
343             ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
344             : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
345             )
346             return TRUE;
347             }
348             else if (
349             (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
350             ?
351             ( seq > COP_SEQ_RANGE_LOW(pn)
352             || seq <= COP_SEQ_RANGE_HIGH(pn))
353              
354             : ( seq > COP_SEQ_RANGE_LOW(pn)
355             && seq <= COP_SEQ_RANGE_HIGH(pn))
356             )
357             return TRUE;
358             return FALSE;
359             }
360             #endif
361              
362             /* ------------------------------- pp.h ------------------------------- */
363              
364             PERL_STATIC_INLINE Stack_off_t
365             Perl_TOPMARK(pTHX)
366             {
367             DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
368             "MARK top %p %" IVdf "\n",
369             PL_markstack_ptr,
370             (IV)*PL_markstack_ptr)));
371             return *PL_markstack_ptr;
372             }
373              
374             PERL_STATIC_INLINE Stack_off_t
375 214098           Perl_POPMARK(pTHX)
376             {
377             DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
378             "MARK pop %p %" IVdf "\n",
379             (PL_markstack_ptr-1),
380             (IV)*(PL_markstack_ptr-1))));
381             assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
382 214098           return *PL_markstack_ptr--;
383             }
384              
385             /*
386             =for apidoc_section $rpp
387              
388             =for apidoc rpp_extend
389             Ensures that there is space on the stack to push C items, extending it
390             if necessary.
391              
392             =cut
393             */
394              
395             PERL_STATIC_INLINE void
396             Perl_rpp_extend(pTHX_ SSize_t n)
397             {
398             PERL_ARGS_ASSERT_RPP_EXTEND;
399              
400             EXTEND_HWM_SET(PL_stack_sp, n);
401             #ifndef STRESS_REALLOC
402             if (UNLIKELY(_EXTEND_NEEDS_GROW(PL_stack_sp, n)))
403             #endif
404             {
405             (void)stack_grow(PL_stack_sp, PL_stack_sp, n);
406             }
407             }
408              
409              
410             /*
411             =for apidoc rpp_popfree_to
412              
413             Pop and free all items on the argument stack above C. On return,
414             C will be equal to C.
415              
416             =cut
417             */
418              
419             PERL_STATIC_INLINE void
420             Perl_rpp_popfree_to(pTHX_ SV **sp)
421             {
422             PERL_ARGS_ASSERT_RPP_POPFREE_TO;
423              
424             assert(sp <= PL_stack_sp);
425             #ifdef PERL_RC_STACK
426             assert(rpp_stack_is_rc());
427             while (PL_stack_sp > sp) {
428             SV *sv = *PL_stack_sp--;
429             SvREFCNT_dec(sv);
430             }
431             #else
432             PL_stack_sp = sp;
433             #endif
434             }
435              
436              
437             /*
438             =for apidoc rpp_popfree_to_NN
439              
440             A variant of rpp_popfree_to() which assumes that all the pointers being
441             popped off the stack are non-NULL.
442              
443             =cut
444             */
445              
446             PERL_STATIC_INLINE void
447             Perl_rpp_popfree_to_NN(pTHX_ SV **sp)
448             {
449             PERL_ARGS_ASSERT_RPP_POPFREE_TO_NN;
450              
451             assert(sp <= PL_stack_sp);
452             #ifdef PERL_RC_STACK
453             assert(rpp_stack_is_rc());
454             while (PL_stack_sp > sp) {
455             SV *sv = *PL_stack_sp--;
456             assert(sv);
457             SvREFCNT_dec_NN(sv);
458             }
459             #else
460             PL_stack_sp = sp;
461             #endif
462             }
463              
464              
465             /*
466             =for apidoc rpp_popfree_1
467              
468             Pop and free the top item on the argument stack and update C.
469              
470             =cut
471             */
472              
473             PERL_STATIC_INLINE void
474             Perl_rpp_popfree_1(pTHX)
475             {
476             PERL_ARGS_ASSERT_RPP_POPFREE_1;
477              
478             #ifdef PERL_RC_STACK
479             assert(rpp_stack_is_rc());
480             SV *sv = *PL_stack_sp--;
481             SvREFCNT_dec(sv);
482             #else
483             PL_stack_sp--;
484             #endif
485             }
486              
487              
488             /*
489             =for apidoc rpp_popfree_1_NN
490              
491             A variant of rpp_popfree_1() which assumes that the pointer being popped
492             off the stack is non-NULL.
493              
494             =cut
495             */
496              
497             PERL_STATIC_INLINE void
498             Perl_rpp_popfree_1_NN(pTHX)
499             {
500             PERL_ARGS_ASSERT_RPP_POPFREE_1_NN;
501              
502             assert(*PL_stack_sp);
503             #ifdef PERL_RC_STACK
504             assert(rpp_stack_is_rc());
505             SV *sv = *PL_stack_sp--;
506             SvREFCNT_dec_NN(sv);
507             #else
508             PL_stack_sp--;
509             #endif
510             }
511              
512              
513             /*
514             =for apidoc rpp_popfree_2
515              
516             Pop and free the top two items on the argument stack and update
517             C.
518              
519             =cut
520             */
521              
522              
523             PERL_STATIC_INLINE void
524             Perl_rpp_popfree_2(pTHX)
525             {
526             PERL_ARGS_ASSERT_RPP_POPFREE_2;
527              
528             #ifdef PERL_RC_STACK
529             assert(rpp_stack_is_rc());
530             for (int i = 0; i < 2; i++) {
531             SV *sv = *PL_stack_sp--;
532             SvREFCNT_dec(sv);
533             }
534             #else
535             PL_stack_sp -= 2;
536             #endif
537             }
538              
539              
540             /*
541             =for apidoc rpp_popfree_2_NN
542              
543             A variant of rpp_popfree_2() which assumes that the two pointers being
544             popped off the stack are non-NULL.
545              
546             =cut
547             */
548              
549              
550             PERL_STATIC_INLINE void
551             Perl_rpp_popfree_2_NN(pTHX)
552             {
553             PERL_ARGS_ASSERT_RPP_POPFREE_2_NN;
554             #ifdef PERL_RC_STACK
555             SV *sv2 = *PL_stack_sp--;
556             assert(sv2);
557             SV *sv1 = *PL_stack_sp;
558             assert(sv1);
559              
560             assert(rpp_stack_is_rc());
561             U32 rc1 = SvREFCNT(sv1);
562             U32 rc2 = SvREFCNT(sv2);
563             /* This expression is intended to be true if either of rc1 or rc2 has
564             * the value 0 or 1, but using only a single branch test, rather
565             * than the two branches that a compiler would plant for a boolean
566             * expression. We are working on the assumption that, most of the
567             * time, neither of the args to a binary function will need to be
568             * freed - they're likely to lex vars, or PADTMPs or whatever.
569             * So give the CPU a single branch that is rarely taken. */
570             if (UNLIKELY( !(rc1>>1) + !(rc2>>1) ))
571             /* at least one of the old SVs needs freeing. Do it the long way */
572             Perl_rpp_free_2_(aTHX_ sv1, sv2, rc1, rc2);
573             else {
574             SvREFCNT(sv1) = rc1 - 1;
575             SvREFCNT(sv2) = rc2 - 1;
576             }
577             PL_stack_sp--;
578             #else
579             PL_stack_sp -= 2;
580             #endif
581             }
582              
583              
584             /*
585             =for apidoc rpp_pop_1_norc
586              
587             Pop and return the top item off the argument stack and update
588             C. It's similar to rpp_popfree_1(), except that it actually
589             returns a value, and it I decrement the SV's reference count.
590             On non-C builds it actually increments the SV's reference
591             count.
592              
593             This is useful in cases where the popped value is immediately embedded
594             somewhere e.g. via av_store(), allowing you skip decrementing and then
595             immediately incrementing the reference count again (and risk prematurely
596             freeing the SV if it had a RC of 1). On non-RC builds, the reference count
597             bookkeeping still works too, which is why it should be used rather than
598             a simple C<*PL_stack_sp-->.
599              
600             =cut
601             */
602              
603             PERL_STATIC_INLINE SV*
604             Perl_rpp_pop_1_norc(pTHX)
605             {
606             PERL_ARGS_ASSERT_RPP_POP_1_NORC
607              
608             SV *sv = *PL_stack_sp--;
609              
610             #ifndef PERL_RC_STACK
611             SvREFCNT_inc(sv);
612             #else
613             assert(rpp_stack_is_rc());
614             #endif
615             return sv;
616             }
617              
618              
619              
620             /*
621             =for apidoc rpp_push_1
622             =for apidoc_item rpp_push_IMM
623             =for apidoc_item rpp_push_2
624             =for apidoc_item rpp_xpush_1
625             =for apidoc_item rpp_xpush_IMM
626             =for apidoc_item rpp_xpush_2
627              
628             Push one or two SVs onto the stack, incrementing their reference counts
629             and updating C. With the C variants, it extends the stack
630             first. The C variants assume that the single argument is an immortal
631             such as <&PL_sv_undef> and, for efficiency, will skip incrementing its
632             reference count.
633              
634             =cut
635             */
636              
637             PERL_STATIC_INLINE void
638             Perl_rpp_push_1(pTHX_ SV *sv)
639             {
640             PERL_ARGS_ASSERT_RPP_PUSH_1;
641              
642             *++PL_stack_sp = sv;
643             #ifdef PERL_RC_STACK
644             assert(rpp_stack_is_rc());
645             SvREFCNT_inc_simple_void_NN(sv);
646             #endif
647             }
648              
649             PERL_STATIC_INLINE void
650             Perl_rpp_push_IMM(pTHX_ SV *sv)
651             {
652             PERL_ARGS_ASSERT_RPP_PUSH_IMM;
653              
654             assert(SvIMMORTAL(sv));
655             *++PL_stack_sp = sv;
656             #ifdef PERL_RC_STACK
657             assert(rpp_stack_is_rc());
658             #endif
659             }
660              
661             PERL_STATIC_INLINE void
662             Perl_rpp_push_2(pTHX_ SV *sv1, SV *sv2)
663             {
664             PERL_ARGS_ASSERT_RPP_PUSH_2;
665              
666             *++PL_stack_sp = sv1;
667             *++PL_stack_sp = sv2;
668             #ifdef PERL_RC_STACK
669             assert(rpp_stack_is_rc());
670             SvREFCNT_inc_simple_void_NN(sv1);
671             SvREFCNT_inc_simple_void_NN(sv2);
672             #endif
673             }
674              
675             PERL_STATIC_INLINE void
676             Perl_rpp_xpush_1(pTHX_ SV *sv)
677             {
678             PERL_ARGS_ASSERT_RPP_XPUSH_1;
679              
680             rpp_extend(1);
681             rpp_push_1(sv);
682             }
683              
684             PERL_STATIC_INLINE void
685             Perl_rpp_xpush_IMM(pTHX_ SV *sv)
686             {
687             PERL_ARGS_ASSERT_RPP_XPUSH_IMM;
688              
689             rpp_extend(1);
690             rpp_push_IMM(sv);
691             }
692              
693             PERL_STATIC_INLINE void
694             Perl_rpp_xpush_2(pTHX_ SV *sv1, SV *sv2)
695             {
696             PERL_ARGS_ASSERT_RPP_XPUSH_2;
697              
698             rpp_extend(2);
699             rpp_push_2(sv1, sv2);
700             }
701              
702              
703             /*
704             =for apidoc rpp_push_1_norc
705              
706             Push C onto the stack without incrementing its reference count, and
707             update C. On non-PERL_RC_STACK builds, mortalise too.
708              
709             This is most useful where an SV has just been created and already has a
710             reference count of 1, but has not yet been anchored anywhere.
711              
712             =cut
713             */
714              
715             PERL_STATIC_INLINE void
716             Perl_rpp_push_1_norc(pTHX_ SV *sv)
717             {
718             PERL_ARGS_ASSERT_RPP_PUSH_1;
719              
720             *++PL_stack_sp = sv;
721             #ifdef PERL_RC_STACK
722             assert(rpp_stack_is_rc());
723             #else
724             sv_2mortal(sv);
725             #endif
726             }
727              
728              
729             /*
730             =for apidoc rpp_replace_1_1
731             =for apidoc_item rpp_replace_1_1_NN
732             =for apidoc_item rpp_replace_1_IMM_NN
733              
734             Replace the current top stack item with C, while suitably adjusting
735             reference counts. Equivalent to rpp_popfree_1(); rpp_push_1(sv), but
736             is more efficient and handles both SVs being the same.
737              
738             The C<_NN> variant assumes that the pointer on the stack to the SV being
739             freed is non-NULL.
740              
741             The C variant is like the C<_NN> variant, but in addition, assumes
742             that the single argument is an immortal such as <&PL_sv_undef> and, for
743             efficiency, will skip incrementing its reference count.
744              
745             =cut
746             */
747              
748             PERL_STATIC_INLINE void
749             Perl_rpp_replace_1_1(pTHX_ SV *sv)
750             {
751             PERL_ARGS_ASSERT_RPP_REPLACE_1_1;
752              
753             assert(sv);
754             #ifdef PERL_RC_STACK
755             assert(rpp_stack_is_rc());
756             SV *oldsv = *PL_stack_sp;
757             *PL_stack_sp = sv;
758             SvREFCNT_inc_simple_void_NN(sv);
759             SvREFCNT_dec(oldsv);
760             #else
761             *PL_stack_sp = sv;
762             #endif
763             }
764              
765              
766             PERL_STATIC_INLINE void
767             Perl_rpp_replace_1_1_NN(pTHX_ SV *sv)
768             {
769             PERL_ARGS_ASSERT_RPP_REPLACE_1_1_NN;
770              
771             assert(sv);
772             assert(*PL_stack_sp);
773             #ifdef PERL_RC_STACK
774             assert(rpp_stack_is_rc());
775             SV *oldsv = *PL_stack_sp;
776             *PL_stack_sp = sv;
777             SvREFCNT_inc_simple_void_NN(sv);
778             SvREFCNT_dec_NN(oldsv);
779             #else
780             *PL_stack_sp = sv;
781             #endif
782             }
783              
784              
785             PERL_STATIC_INLINE void
786             Perl_rpp_replace_1_IMM_NN(pTHX_ SV *sv)
787             {
788             PERL_ARGS_ASSERT_RPP_REPLACE_1_IMM_NN;
789              
790             assert(sv);
791             assert(SvIMMORTAL(sv));
792             assert(*PL_stack_sp);
793             #ifdef PERL_RC_STACK
794             assert(rpp_stack_is_rc());
795             SV *oldsv = *PL_stack_sp;
796             *PL_stack_sp = sv;
797             SvREFCNT_dec_NN(oldsv);
798             #else
799             *PL_stack_sp = sv;
800             #endif
801             }
802              
803              
804             /*
805             =for apidoc rpp_replace_2_1
806             =for apidoc_item rpp_replace_2_1_NN
807             =for apidoc_item rpp_replace_2_IMM_NN
808              
809             Replace the current top to stacks item with C, while suitably
810             adjusting reference counts. Equivalent to rpp_popfree_2(); rpp_push_1(sv),
811             but is more efficient and handles SVs being the same.
812              
813             The C<_NN> variant assumes that the pointers on the stack to the SVs being
814             freed are non-NULL.
815              
816             The C variant is like the C<_NN> variant, but in addition, assumes
817             that the single argument is an immortal such as <&PL_sv_undef> and, for
818             efficiency, will skip incrementing its reference count.
819             =cut
820             */
821              
822             PERL_STATIC_INLINE void
823             Perl_rpp_replace_2_1(pTHX_ SV *sv)
824             {
825             PERL_ARGS_ASSERT_RPP_REPLACE_2_1;
826              
827             #ifdef PERL_RC_STACK
828             assert(rpp_stack_is_rc());
829             /* replace PL_stack_sp[-1] first; leave PL_stack_sp[0] in place while
830             * we free [-1], so if an exception occurs, [0] will still be freed.
831             */
832             SV *oldsv = PL_stack_sp[-1];
833             PL_stack_sp[-1] = sv;
834             SvREFCNT_inc_simple_void_NN(sv);
835             SvREFCNT_dec(oldsv);
836             oldsv = *PL_stack_sp--;
837             SvREFCNT_dec(oldsv);
838             #else
839             *--PL_stack_sp = sv;
840             #endif
841             }
842              
843              
844             /* Private helper function for _NN and _IMM_NN variants.
845             * Assumes sv has already had its ref count incremented,
846             * ready for being put on the stack.
847             * Intended to be small and fast, since it's inlined into many hot parts of
848             * code.
849             */
850              
851             PERL_STATIC_INLINE void
852             Perl_rpp_replace_2_1_COMMON(pTHX_ SV *sv)
853             {
854              
855             assert(sv);
856             #ifdef PERL_RC_STACK
857             SV *sv2 = *PL_stack_sp--;
858             assert(sv2);
859             SV *sv1 = *PL_stack_sp;
860             assert(sv1);
861              
862             *PL_stack_sp = sv;
863             assert(rpp_stack_is_rc());
864             U32 rc1 = SvREFCNT(sv1);
865             U32 rc2 = SvREFCNT(sv2);
866             /* This expression is intended to be true if either of rc1 or rc2 has
867             * the value 0 or 1, but using only a single branch test, rather
868             * than the two branches that a compiler would plant for a boolean
869             * expression. We are working on the assumption that, most of the
870             * time, neither of the args to a binary function will need to be
871             * freed - they're likely to lex vars, or PADTMPs or whatever.
872             * So give the CPU a single branch that is rarely taken. */
873             if (UNLIKELY( !(rc1>>1) + !(rc2>>1) ))
874             /* at least one of the old SVs needs freeing. Do it the long way */
875             Perl_rpp_free_2_(aTHX_ sv1, sv2, rc1, rc2);
876             else {
877             SvREFCNT(sv1) = rc1 - 1;
878             SvREFCNT(sv2) = rc2 - 1;
879             }
880             #else
881             *--PL_stack_sp = sv;
882             #endif
883             }
884              
885              
886             PERL_STATIC_INLINE void
887             Perl_rpp_replace_2_1_NN(pTHX_ SV *sv)
888             {
889             PERL_ARGS_ASSERT_RPP_REPLACE_2_1_NN;
890              
891             assert(sv);
892             #ifdef PERL_RC_STACK
893             SvREFCNT_inc_simple_void_NN(sv);
894             #endif
895             rpp_replace_2_1_COMMON(sv);
896             }
897              
898              
899             PERL_STATIC_INLINE void
900             Perl_rpp_replace_2_IMM_NN(pTHX_ SV *sv)
901             {
902             PERL_ARGS_ASSERT_RPP_REPLACE_2_IMM_NN;
903              
904             assert(sv);
905             assert(SvIMMORTAL(sv));
906             rpp_replace_2_1_COMMON(sv);
907             }
908              
909              
910             /*
911             =for apidoc rpp_replace_at
912              
913             Replace the SV at address sp within the stack with C, while suitably
914             adjusting reference counts. Equivalent to C<*sp = sv>, except with proper
915             reference count handling.
916              
917             =cut
918             */
919              
920             PERL_STATIC_INLINE void
921             Perl_rpp_replace_at(pTHX_ SV **sp, SV *sv)
922             {
923             PERL_ARGS_ASSERT_RPP_REPLACE_AT;
924              
925             #ifdef PERL_RC_STACK
926             assert(rpp_stack_is_rc());
927             SV *oldsv = *sp;
928             *sp = sv;
929             SvREFCNT_inc_simple_void_NN(sv);
930             SvREFCNT_dec(oldsv);
931             #else
932             *sp = sv;
933             #endif
934             }
935              
936              
937             /*
938             =for apidoc rpp_replace_at_NN
939              
940             A variant of rpp_replace_at() which assumes that the SV pointer on the
941             stack is non-NULL.
942              
943             =cut
944             */
945              
946             PERL_STATIC_INLINE void
947             Perl_rpp_replace_at_NN(pTHX_ SV **sp, SV *sv)
948             {
949             PERL_ARGS_ASSERT_RPP_REPLACE_AT_NN;
950              
951             assert(sv);
952             assert(*sp);
953             #ifdef PERL_RC_STACK
954             assert(rpp_stack_is_rc());
955             SV *oldsv = *sp;
956             *sp = sv;
957             SvREFCNT_inc_simple_void_NN(sv);
958             SvREFCNT_dec_NN(oldsv);
959             #else
960             *sp = sv;
961             #endif
962             }
963              
964              
965             /*
966             =for apidoc rpp_replace_at_norc
967              
968             Replace the SV at address sp within the stack with C, while suitably
969             adjusting the reference count of the old SV. Equivalent to C<*sp = sv>,
970             except with proper reference count handling.
971              
972             C's reference count doesn't get incremented. On non-C
973             builds, it gets mortalised too.
974              
975             This is most useful where an SV has just been created and already has a
976             reference count of 1, but has not yet been anchored anywhere.
977              
978             =cut
979             */
980              
981             PERL_STATIC_INLINE void
982             Perl_rpp_replace_at_norc(pTHX_ SV **sp, SV *sv)
983             {
984             PERL_ARGS_ASSERT_RPP_REPLACE_AT_NORC;
985              
986             #ifdef PERL_RC_STACK
987             assert(rpp_stack_is_rc());
988             SV *oldsv = *sp;
989             *sp = sv;
990             SvREFCNT_dec(oldsv);
991             #else
992             *sp = sv;
993             sv_2mortal(sv);
994             #endif
995             }
996              
997              
998             /*
999             =for apidoc rpp_replace_at_norc_NN
1000              
1001             A variant of rpp_replace_at_norc() which assumes that the SV pointer on the
1002             stack is non-NULL.
1003              
1004             =cut
1005             */
1006              
1007             PERL_STATIC_INLINE void
1008             Perl_rpp_replace_at_norc_NN(pTHX_ SV **sp, SV *sv)
1009             {
1010             PERL_ARGS_ASSERT_RPP_REPLACE_AT_NORC_NN;
1011              
1012             assert(*sp);
1013             #ifdef PERL_RC_STACK
1014             assert(rpp_stack_is_rc());
1015             SV *oldsv = *sp;
1016             *sp = sv;
1017             SvREFCNT_dec_NN(oldsv);
1018             #else
1019             *sp = sv;
1020             sv_2mortal(sv);
1021             #endif
1022             }
1023              
1024              
1025             /*
1026             =for apidoc rpp_context
1027              
1028             Impose void, scalar or list context on the stack.
1029             First, pop C items off the stack, then when C is:
1030             C: return as-is.
1031             C: pop everything back to C
1032             C: move the top stack item (or C<&PL_sv_undef> if none) to
1033             C and free everything above it.
1034              
1035             =cut
1036             */
1037              
1038             PERL_STATIC_INLINE void
1039             Perl_rpp_context(pTHX_ SV **mark, U8 gimme, SSize_t extra)
1040             {
1041             PERL_ARGS_ASSERT_RPP_CONTEXT;
1042             assert(extra >= 0);
1043             assert(mark <= PL_stack_sp - extra);
1044              
1045             if (gimme == G_LIST)
1046             mark = PL_stack_sp - extra;
1047             else if (gimme == G_SCALAR) {
1048             SV **svp = PL_stack_sp - extra;
1049             mark++;
1050             if (mark > svp) {
1051             /* empty list (plus extra) */
1052             rpp_popfree_to(svp);
1053             rpp_extend(1);
1054             *++PL_stack_sp = &PL_sv_undef;
1055             return;
1056             }
1057             /* swap top and bottom list items */
1058             SV *top = *svp;
1059             *svp = *mark;
1060             *mark = top;
1061             }
1062             rpp_popfree_to(mark);
1063             }
1064              
1065              
1066              
1067              
1068             /*
1069             =for apidoc rpp_try_AMAGIC_1
1070             =for apidoc_item rpp_try_AMAGIC_2
1071              
1072             Check whether either of the one or two SVs at the top of the stack is
1073             magical or a ref, and in either case handle it specially: invoke get
1074             magic, call an overload method, or replace a ref with a temporary numeric
1075             value, as appropriate. If this function returns true, it indicates that
1076             the correct return value is already on the stack. Intended to be used at
1077             the beginning of the PP function for unary or binary ops.
1078              
1079             =cut
1080             */
1081              
1082             PERL_STATIC_INLINE bool
1083             Perl_rpp_try_AMAGIC_1(pTHX_ int method, int flags)
1084             {
1085             return UNLIKELY((SvFLAGS(*PL_stack_sp) & (SVf_ROK|SVs_GMG)))
1086             && Perl_try_amagic_un(aTHX_ method, flags);
1087             }
1088              
1089             PERL_STATIC_INLINE bool
1090             Perl_rpp_try_AMAGIC_2(pTHX_ int method, int flags)
1091             {
1092             return UNLIKELY(((SvFLAGS(PL_stack_sp[-1])|SvFLAGS(PL_stack_sp[0]))
1093             & (SVf_ROK|SVs_GMG)))
1094             && Perl_try_amagic_bin(aTHX_ method, flags);
1095             }
1096              
1097              
1098             /*
1099             =for apidoc rpp_stack_is_rc
1100              
1101             Returns a boolean value indicating whether the stack is currently
1102             reference-counted. Note that if the stack is split (bottom half RC, top
1103             half non-RC), this function returns false, even if the top half currently
1104             contains zero items.
1105              
1106             =cut
1107             */
1108              
1109             PERL_STATIC_INLINE bool
1110             Perl_rpp_stack_is_rc(pTHX)
1111             {
1112             #ifdef PERL_RC_STACK
1113             return AvREAL(PL_curstack) && !PL_curstackinfo->si_stack_nonrc_base;
1114             #else
1115             return 0;
1116             #endif
1117              
1118             }
1119              
1120              
1121             /*
1122             =for apidoc rpp_is_lone
1123              
1124             Indicates whether the stacked SV C (assumed to be not yet popped off
1125             the stack) is only kept alive due to a single reference from the argument
1126             stack and/or and the temps stack.
1127              
1128             This can used for example to decide whether the copying of return values
1129             in rvalue context can be skipped, or whether it shouldn't be assigned to
1130             in lvalue context.
1131              
1132             =cut
1133             */
1134              
1135             PERL_STATIC_INLINE bool
1136             Perl_rpp_is_lone(pTHX_ SV *sv)
1137             {
1138             #ifdef PERL_RC_STACK
1139             /* note that rpp_is_lone() can be used in wrapped pp functions,
1140             * where technically the stack is no longer ref-counted; but because
1141             * the args are non-RC copies of RC args further down the stack, we
1142             * can't be in a *completely* non-ref stack.
1143             */
1144             assert(AvREAL(PL_curstack));
1145             #endif
1146              
1147             return SvREFCNT(sv) <= (U32)cBOOL(SvTEMP(sv))
1148             #ifdef PERL_RC_STACK
1149             + 1u
1150             && !SvIMMORTAL(sv) /* PL_sv_undef etc are never stealable */
1151             #endif
1152             ;
1153             }
1154              
1155              
1156             /*
1157             =for apidoc rpp_invoke_xs
1158              
1159             Call the XS function associated with C. Wraps the call if necessary to
1160             handle XS functions which are not aware of reference-counted stacks.
1161              
1162             =cut
1163             */
1164              
1165              
1166             PERL_STATIC_INLINE void
1167             Perl_rpp_invoke_xs(pTHX_ CV *cv)
1168             {
1169             PERL_ARGS_ASSERT_RPP_INVOKE_XS;
1170              
1171             #ifdef PERL_RC_STACK
1172             if (!CvXS_RCSTACK(cv))
1173             Perl_xs_wrap(aTHX_ CvXSUB(cv), cv);
1174             else
1175             #endif
1176             CvXSUB(cv)(aTHX_ cv);
1177             }
1178              
1179              
1180             /* for SvCANEXISTDELETE() macro in pp.h */
1181             PERL_STATIC_INLINE bool
1182             Perl_sv_can_existdelete(pTHX_ SV *sv)
1183             {
1184             /* Anything without tie magic is fine */
1185             MAGIC *mg;
1186             if(!SvRMAGICAL(sv) || !(mg = mg_find(sv, PERL_MAGIC_tied)))
1187             return true;
1188              
1189             HV *stash = SvSTASH(SvRV(SvTIED_obj(sv, mg)));
1190             return stash &&
1191             gv_fetchmethod_autoload(stash, "EXISTS", TRUE) &&
1192             gv_fetchmethod_autoload(stash, "DELETE", TRUE);
1193             }
1194              
1195              
1196             /* ----------------------------- regexp.h ----------------------------- */
1197              
1198             /* PVLVs need to act as a superset of all scalar types - they are basically
1199             * PVMGs with a few extra fields.
1200             * REGEXPs are first class scalars, but have many fields that can't be copied
1201             * into a PVLV body.
1202             *
1203             * Hence we take a different approach - instead of a copy, PVLVs store a pointer
1204             * back to the original body. To avoid increasing the size of PVLVs just for the
1205             * rare case of REGEXP assignment, this pointer is stored in the memory usually
1206             * used for SvLEN(). Hence the check for SVt_PVLV below, and the ? : ternary to
1207             * read the pointer from the two possible locations. The macro SvLEN() wraps the
1208             * access to the union's member xpvlenu_len, but there is no equivalent macro
1209             * for wrapping the union's member xpvlenu_rx, hence the direct reference here.
1210             *
1211             * See commit df6b4bd56551f2d3 for more details. */
1212              
1213             PERL_STATIC_INLINE struct regexp *
1214             Perl_ReANY(const REGEXP * const re)
1215             {
1216             XPV* const p = (XPV*)SvANY(re);
1217              
1218             PERL_ARGS_ASSERT_REANY;
1219             assert(isREGEXP(re));
1220              
1221             return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
1222             : (struct regexp *)p;
1223             }
1224              
1225             /* ------------------------------- utf8.h ------------------------------- */
1226              
1227             /*
1228             =for apidoc_section $unicode
1229             */
1230              
1231             PERL_STATIC_INLINE void
1232             Perl_append_utf8_from_native_byte(const U8 byte, U8** dest)
1233             {
1234             /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
1235             * encoded string at '*dest', updating '*dest' to include it */
1236              
1237             PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
1238              
1239             if (NATIVE_BYTE_IS_INVARIANT(byte))
1240             *((*dest)++) = byte;
1241             else {
1242             *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
1243             *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
1244             }
1245             }
1246              
1247             PERL_STATIC_INLINE U8 *
1248             Perl_bytes_to_utf8(pTHX_ const U8 *s, STRLEN *lenp)
1249             {
1250             return bytes_to_utf8_free_me(s, lenp, NULL);
1251             }
1252              
1253             PERL_STATIC_INLINE U8 *
1254             Perl_bytes_to_utf8_temp_pv(pTHX_ const U8 *s, STRLEN *lenp)
1255             {
1256             void * free_me = NULL;
1257             U8 * converted = bytes_to_utf8_free_me(s, lenp, &free_me);
1258              
1259             if (free_me) {
1260             SAVEFREEPV(free_me);
1261             }
1262              
1263             return converted;
1264             }
1265              
1266             PERL_STATIC_INLINE bool
1267             Perl_utf8_to_bytes_new_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp, void ** free_me)
1268             {
1269             /* utf8_to_bytes_() is declared to take a non-const s_ptr because it may
1270             * change it, but NOT when called with PL_utf8_to_bytes_new_memory, so it
1271             * is ok to cast away const */
1272             return utf8_to_bytes_((U8 **) s_ptr, lenp, free_me,
1273             PL_utf8_to_bytes_new_memory);
1274             }
1275              
1276             PERL_STATIC_INLINE bool
1277             Perl_utf8_to_bytes_temp_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp)
1278             {
1279             /* utf8_to_bytes_() requires a non-NULL pointer, but doesn't use it when
1280             * called with PL_utf8_to_bytes_use_temporary */
1281             void* dummy = NULL;
1282              
1283             /* utf8_to_bytes_() is declared to take a non-const s_ptr because it may
1284             * change it, but NOT when called with PL_utf8_to_bytes_use_temporary, so
1285             * it is ok to cast away const */
1286             return utf8_to_bytes_((U8 **) s_ptr, lenp, &dummy,
1287             PL_utf8_to_bytes_use_temporary);
1288             }
1289              
1290             PERL_STATIC_INLINE bool
1291             Perl_utf8_to_bytes_overwrite(pTHX_ U8 **s_ptr, STRLEN *lenp)
1292             {
1293             /* utf8_to_bytes_() requires a non-NULL pointer, but doesn't use it when
1294             * called with PL_utf8_to_bytes_overwrite */
1295             void* dummy = NULL;
1296              
1297             return utf8_to_bytes_(s_ptr, lenp, &dummy, PL_utf8_to_bytes_overwrite);
1298             }
1299              
1300             /*
1301             =for apidoc valid_utf8_to_uvchr
1302             Like C>, but should only be called when it is
1303             known that the next character in the input UTF-8 string C is well-formed
1304             (I, it passes C>. Surrogates, non-character code
1305             points, and non-Unicode code points are allowed.
1306              
1307             =cut
1308              
1309             */
1310              
1311             PERL_STATIC_INLINE UV
1312             Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
1313             {
1314             const UV expectlen = UTF8SKIP(s);
1315             const U8* send = s + expectlen;
1316             UV uv = *s;
1317              
1318             PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
1319              
1320             if (retlen) {
1321             *retlen = expectlen;
1322             }
1323              
1324             /* An invariant is trivially returned */
1325             if (expectlen == 1) {
1326             return uv;
1327             }
1328              
1329             /* Remove the leading bits that indicate the number of bytes, leaving just
1330             * the bits that are part of the value */
1331             uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
1332              
1333             /* Now, loop through the remaining bytes, accumulating each into the
1334             * working total as we go. (I khw tried unrolling the loop for up to 4
1335             * bytes, but there was no performance improvement) */
1336             for (++s; s < send; s++) {
1337             uv = UTF8_ACCUMULATE(uv, *s);
1338             }
1339              
1340             return UNI_TO_NATIVE(uv);
1341              
1342             }
1343              
1344             /* This looks like 0x010101... */
1345             # define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
1346              
1347             /* This looks like 0x808080... */
1348             # define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
1349             # define PERL_WORDSIZE sizeof(PERL_UINTMAX_T)
1350             # define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
1351              
1352             /* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
1353             * or'ing together the lowest bits of 'x'. Hopefully the final term gets
1354             * optimized out completely on a 32-bit system, and its mask gets optimized out
1355             * on a 64-bit system */
1356             # define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
1357             | ( PTR2nat(x) >> 1) \
1358             | ( ( (PTR2nat(x) \
1359             & PERL_WORD_BOUNDARY_MASK) >> 2))))
1360              
1361             /*
1362             =for apidoc is_utf8_invariant_string
1363             =for apidoc_item is_utf8_invariant_string_loc
1364             =for apidoc_item is_ascii_string
1365             =for apidoc_item is_invariant_string
1366              
1367             These each return TRUE if the first C bytes of the string C are the
1368             same regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
1369             EBCDIC machines); otherwise they returns FALSE. That is, they return TRUE if
1370             they are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and
1371             only the ASCII characters fit this definition. On EBCDIC machines, the
1372             ASCII-range characters are invariant, but so also are the C1 controls.
1373              
1374             If C is 0, it will be calculated using C, (which means if you
1375             use this option, that C can't have embedded C characters and has to
1376             have a terminating C byte).
1377              
1378             All forms except C have identical behavior. The
1379             only difference with it is that it has an extra pointer parameter, C, into
1380             which, if it isn't NULL, the location of the first UTF-8 variant character in
1381             the C pointer will be stored upon failure. If all characters are UTF-8
1382             invariant, this function does not change the contents of C<*ep>.
1383              
1384             C is somewhat misleadingly named.
1385             C is preferred, as it indicates under what conditions
1386             the string is invariant.
1387              
1388             C is misleadingly-named. On ASCII-ish platforms, the name
1389             isn't misleading: the ASCII-range characters are exactly the UTF-8 invariants.
1390             But EBCDIC machines have more UTF-8 invariants than just the ASCII characters,
1391             so the name C is preferred.
1392              
1393             See also
1394             C> and C>.
1395              
1396             =for apidoc_defn ARTm|bool|is_utf8_invariant_string|NN const U8 * const s|STRLEN len
1397              
1398             =cut
1399              
1400             */
1401              
1402             #define is_utf8_invariant_string(s, len) \
1403             is_utf8_invariant_string_loc(s, len, NULL)
1404              
1405             PERL_STATIC_INLINE bool
1406             Perl_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
1407             {
1408             const U8* send;
1409             const U8* x = s;
1410              
1411             PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
1412              
1413             if (len == 0) {
1414             len = strlen((const char *)s);
1415             }
1416              
1417             send = s + len;
1418              
1419             #ifndef EBCDIC
1420              
1421             /* Do the word-at-a-time iff there is at least one usable full word. That
1422             * means that after advancing to a word boundary, there still is at least a
1423             * full word left. The number of bytes needed to advance is 'wordsize -
1424             * offset' unless offset is 0. */
1425             if ((STRLEN) (send - x) >= PERL_WORDSIZE
1426              
1427             /* This term is wordsize if subword; 0 if not */
1428             + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
1429              
1430             /* 'offset' */
1431             - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
1432             {
1433              
1434             /* Process per-byte until reach word boundary. XXX This loop could be
1435             * eliminated if we knew that this platform had fast unaligned reads */
1436             while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
1437             if (! UTF8_IS_INVARIANT(*x)) {
1438             if (ep) {
1439             *ep = x;
1440             }
1441              
1442             return FALSE;
1443             }
1444             x++;
1445             }
1446              
1447             /* Here, we know we have at least one full word to process. Process
1448             * per-word as long as we have at least a full word left */
1449             do {
1450             if ((* (const PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
1451              
1452             /* Found a variant. Just return if caller doesn't want its
1453             * exact position */
1454             if (! ep) {
1455             return FALSE;
1456             }
1457              
1458             # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 \
1459             || BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
1460              
1461             *ep = x + variant_byte_number(* (const PERL_UINTMAX_T *) x);
1462             assert(*ep >= s && *ep < send);
1463              
1464             return FALSE;
1465              
1466             # else /* If weird byte order, drop into next loop to do byte-at-a-time
1467             checks. */
1468              
1469             break;
1470             # endif
1471             }
1472              
1473             x += PERL_WORDSIZE;
1474              
1475             } while (x + PERL_WORDSIZE <= send);
1476             }
1477              
1478             #endif /* End of ! EBCDIC */
1479              
1480             /* Process per-byte. (Can't use libc functions like strpbrk() because
1481             * input isn't necessarily a C string) */
1482             while (x < send) {
1483             if (! UTF8_IS_INVARIANT(*x)) {
1484             if (ep) {
1485             *ep = x;
1486             }
1487              
1488             return FALSE;
1489             }
1490              
1491             x++;
1492             }
1493              
1494             return TRUE;
1495             }
1496              
1497             /* See if the platform has builtins for finding the most/least significant bit,
1498             * and which one is right for using on 32 and 64 bit operands */
1499             #if (__has_builtin(__builtin_clz) || PERL_GCC_VERSION_GE(3,4,0))
1500             # if U32SIZE == INTSIZE
1501             # define PERL_CLZ_32 __builtin_clz
1502             # endif
1503             # if defined(U64TYPE) && U64SIZE == INTSIZE
1504             # define PERL_CLZ_64 __builtin_clz
1505             # endif
1506             #endif
1507             #if (__has_builtin(__builtin_ctz) || PERL_GCC_VERSION_GE(3,4,0))
1508             # if U32SIZE == INTSIZE
1509             # define PERL_CTZ_32 __builtin_ctz
1510             # endif
1511             # if defined(U64TYPE) && U64SIZE == INTSIZE
1512             # define PERL_CTZ_64 __builtin_ctz
1513             # endif
1514             #endif
1515              
1516             #if (__has_builtin(__builtin_clzl) || PERL_GCC_VERSION_GE(3,4,0))
1517             # if U32SIZE == LONGSIZE && ! defined(PERL_CLZ_32)
1518             # define PERL_CLZ_32 __builtin_clzl
1519             # endif
1520             # if defined(U64TYPE) && U64SIZE == LONGSIZE && ! defined(PERL_CLZ_64)
1521             # define PERL_CLZ_64 __builtin_clzl
1522             # endif
1523             #endif
1524             #if (__has_builtin(__builtin_ctzl) || PERL_GCC_VERSION_GE(3,4,0))
1525             # if U32SIZE == LONGSIZE && ! defined(PERL_CTZ_32)
1526             # define PERL_CTZ_32 __builtin_ctzl
1527             # endif
1528             # if defined(U64TYPE) && U64SIZE == LONGSIZE && ! defined(PERL_CTZ_64)
1529             # define PERL_CTZ_64 __builtin_ctzl
1530             # endif
1531             #endif
1532              
1533             #if (__has_builtin(__builtin_clzll) || PERL_GCC_VERSION_GE(3,4,0))
1534             # if U32SIZE == LONGLONGSIZE && ! defined(PERL_CLZ_32)
1535             # define PERL_CLZ_32 __builtin_clzll
1536             # endif
1537             # if defined(U64TYPE) && U64SIZE == LONGLONGSIZE && ! defined(PERL_CLZ_64)
1538             # define PERL_CLZ_64 __builtin_clzll
1539             # endif
1540             #endif
1541             #if (__has_builtin(__builtin_ctzll) || PERL_GCC_VERSION_GE(3,4,0))
1542             # if U32SIZE == LONGLONGSIZE && ! defined(PERL_CTZ_32)
1543             # define PERL_CTZ_32 __builtin_ctzll
1544             # endif
1545             # if defined(U64TYPE) && U64SIZE == LONGLONGSIZE && ! defined(PERL_CTZ_64)
1546             # define PERL_CTZ_64 __builtin_ctzll
1547             # endif
1548             #endif
1549              
1550             #if defined(WIN32)
1551             # include
1552             /* MinGW warns that it ignores "pragma intrinsic". */
1553             # if defined(_MSC_VER)
1554             # pragma intrinsic(_BitScanForward)
1555             # pragma intrinsic(_BitScanReverse)
1556             # if defined(_WIN64)
1557             # pragma intrinsic(_BitScanForward64)
1558             # pragma intrinsic(_BitScanReverse64)
1559             # endif
1560             # endif
1561             #endif
1562              
1563             /* The reason there are not checks to see if ffs() and ffsl() are available for
1564             * determining the lsb, is because these don't improve on the deBruijn method
1565             * fallback, which is just a branchless integer multiply, array element
1566             * retrieval, and shift. The others, even if the function call overhead is
1567             * optimized out, have to cope with the possibility of the input being all
1568             * zeroes, and almost certainly will have conditionals for this eventuality.
1569             * khw, at the time of this commit, looked at the source for both gcc and clang
1570             * to verify this. (gcc used a method inferior to deBruijn.) */
1571              
1572             /* Below are functions to find the first, last, or only set bit in a word. On
1573             * platforms with 64-bit capability, there is a pair for each operation; the
1574             * first taking a 64 bit operand, and the second a 32 bit one. The logic is
1575             * the same in each pair, so the second is stripped of most comments. */
1576              
1577             #ifdef U64TYPE /* HAS_QUAD not usable outside the core */
1578              
1579             PERL_STATIC_INLINE unsigned
1580             Perl_lsbit_pos64(U64 word)
1581             {
1582             /* Find the position (0..63) of the least significant set bit in the input
1583             * word */
1584              
1585             ASSUME(word != 0);
1586              
1587             /* If we can determine that the platform has a usable fast method to get
1588             * this info, use that */
1589              
1590             # if defined(PERL_CTZ_64)
1591             # define PERL_HAS_FAST_GET_LSB_POS64
1592              
1593             return (unsigned) PERL_CTZ_64(word);
1594              
1595             # elif U64SIZE == 8 && defined(_WIN64)
1596             # define PERL_HAS_FAST_GET_LSB_POS64
1597              
1598             {
1599             unsigned long index;
1600             _BitScanForward64(&index, word);
1601             return (unsigned)index;
1602             }
1603              
1604             # else
1605              
1606             /* Here, we didn't find a fast method for finding the lsb. Fall back to
1607             * making the lsb the only set bit in the word, and use our function that
1608             * works on words with a single bit set.
1609             *
1610             * Isolate the lsb;
1611             * https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
1612             *
1613             * The word will look like this, with a rightmost set bit in position 's':
1614             * ('x's are don't cares, and 'y's are their complements)
1615             * s
1616             * x..x100..00
1617             * y..y011..11 Complement
1618             * y..y100..00 Add 1
1619             * 0..0100..00 And with the original
1620             *
1621             * (Yes, complementing and adding 1 is just taking the negative on 2's
1622             * complement machines, but not on 1's complement ones, and some compilers
1623             * complain about negating an unsigned.)
1624             */
1625             return single_1bit_pos64(word & (~word + 1));
1626              
1627             # endif
1628              
1629             }
1630              
1631             # define lsbit_pos_uintmax_(word) lsbit_pos64(word)
1632             #else /* ! QUAD */
1633             # define lsbit_pos_uintmax_(word) lsbit_pos32(word)
1634             #endif
1635              
1636             PERL_STATIC_INLINE unsigned /* Like above for 32 bit word */
1637             Perl_lsbit_pos32(U32 word)
1638             {
1639             /* Find the position (0..31) of the least significant set bit in the input
1640             * word */
1641              
1642             ASSUME(word != 0);
1643              
1644             #if defined(PERL_CTZ_32)
1645             # define PERL_HAS_FAST_GET_LSB_POS32
1646              
1647             return (unsigned) PERL_CTZ_32(word);
1648              
1649             #elif U32SIZE == 4 && defined(WIN32)
1650             # define PERL_HAS_FAST_GET_LSB_POS32
1651              
1652             {
1653             unsigned long index;
1654             _BitScanForward(&index, word);
1655             return (unsigned)index;
1656             }
1657              
1658             #elif defined(PERL_HAS_FAST_GET_LSB_POS64)
1659             # define PERL_HAS_FAST_GET_LSB_POS32
1660              
1661             /* Unlikely, but possible for the platform to have a wider fast operation
1662             * but not a narrower one. But easy enough to handle the case by widening
1663             * the parameter size. */
1664             return lsbit_pos64(word);
1665              
1666             #else
1667              
1668             return single_1bit_pos32(word & (~word + 1));
1669              
1670             #endif
1671              
1672             }
1673              
1674              
1675             /* Convert the leading zeros count to the bit position of the first set bit.
1676             * This just subtracts from the highest position, 31 or 63. But some compilers
1677             * don't optimize this optimally, and so a bit of bit twiddling encourages them
1678             * to do the right thing. It turns out that subtracting a smaller non-negative
1679             * number 'x' from 2**n-1 for any n is the same as taking the exclusive-or of
1680             * the two numbers. To see why, first note that the sum of any number, x, and
1681             * its complement, x', is all ones. So all ones minus x is x'. Then note that
1682             * the xor of x and all ones is x'. */
1683             #define LZC_TO_MSBIT_POS_(size, lzc) ((size##SIZE * CHARBITS - 1) ^ (lzc))
1684              
1685             #ifdef U64TYPE /* HAS_QUAD not usable outside the core */
1686              
1687             PERL_STATIC_INLINE unsigned
1688             Perl_msbit_pos64(U64 word)
1689             {
1690             /* Find the position (0..63) of the most significant set bit in the input
1691             * word */
1692              
1693             ASSUME(word != 0);
1694              
1695             /* If we can determine that the platform has a usable fast method to get
1696             * this, use that */
1697              
1698             # if defined(PERL_CLZ_64)
1699             # define PERL_HAS_FAST_GET_MSB_POS64
1700              
1701             return (unsigned) LZC_TO_MSBIT_POS_(U64, PERL_CLZ_64(word));
1702              
1703             # elif U64SIZE == 8 && defined(_WIN64)
1704             # define PERL_HAS_FAST_GET_MSB_POS64
1705              
1706             {
1707             unsigned long index;
1708             _BitScanReverse64(&index, word);
1709             return (unsigned)index;
1710             }
1711              
1712             # else
1713              
1714             /* Here, we didn't find a fast method for finding the msb. Fall back to
1715             * making the msb the only set bit in the word, and use our function that
1716             * works on words with a single bit set.
1717             *
1718             * Isolate the msb; http://codeforces.com/blog/entry/10330
1719             *
1720             * Only the most significant set bit matters. Or'ing word with its right
1721             * shift of 1 makes that bit and the next one to its right both 1.
1722             * Repeating that with the right shift of 2 makes for 4 1-bits in a row.
1723             * ... We end with the msb and all to the right being 1. */
1724             word |= (word >> 1);
1725             word |= (word >> 2);
1726             word |= (word >> 4);
1727             word |= (word >> 8);
1728             word |= (word >> 16);
1729             word |= (word >> 32);
1730              
1731             /* Then subtracting the right shift by 1 clears all but the left-most of
1732             * the 1 bits, which is our desired result */
1733             word -= (word >> 1);
1734              
1735             /* Now we have a single bit set */
1736             return single_1bit_pos64(word);
1737              
1738             # endif
1739              
1740             }
1741              
1742             # define msbit_pos_uintmax_(word) msbit_pos64(word)
1743             #else /* ! QUAD */
1744             # define msbit_pos_uintmax_(word) msbit_pos32(word)
1745             #endif
1746              
1747             PERL_STATIC_INLINE unsigned
1748             Perl_msbit_pos32(U32 word)
1749             {
1750             /* Find the position (0..31) of the most significant set bit in the input
1751             * word */
1752              
1753             ASSUME(word != 0);
1754              
1755             #if defined(PERL_CLZ_32)
1756             # define PERL_HAS_FAST_GET_MSB_POS32
1757              
1758             return (unsigned) LZC_TO_MSBIT_POS_(U32, PERL_CLZ_32(word));
1759             #elif U32SIZE == 4 && defined(WIN32)
1760             # define PERL_HAS_FAST_GET_MSB_POS32
1761              
1762             {
1763             unsigned long index;
1764             _BitScanReverse(&index, word);
1765             return (unsigned)index;
1766             }
1767              
1768             #elif defined(PERL_HAS_FAST_GET_MSB_POS64)
1769             # define PERL_HAS_FAST_GET_MSB_POS32
1770              
1771             return msbit_pos64(word); /* Let compiler widen parameter */
1772              
1773             #else
1774              
1775             word |= (word >> 1);
1776             word |= (word >> 2);
1777             word |= (word >> 4);
1778             word |= (word >> 8);
1779             word |= (word >> 16);
1780             word -= (word >> 1);
1781             return single_1bit_pos32(word);
1782              
1783             #endif
1784              
1785             }
1786              
1787             /* Note that if you are working through all the 1 bits in a word, and don't
1788             * care which order you process them in, it is better to use lsbit_pos. This
1789             * is because some platforms have a fast way to find the msb but not the lsb,
1790             * and others vice versa. The code above falls back to use the single
1791             * available fast method when the desired one is missing, and it is cheaper to
1792             * fall back from lsb to msb than the other way around */
1793              
1794             #if UVSIZE == U64SIZE
1795             # define msbit_pos(word) msbit_pos64(word)
1796             # define lsbit_pos(word) lsbit_pos64(word)
1797             #elif UVSIZE == U32SIZE
1798             # define msbit_pos(word) msbit_pos32(word)
1799             # define lsbit_pos(word) lsbit_pos32(word)
1800             #endif
1801              
1802             #ifdef U64TYPE /* HAS_QUAD not usable outside the core */
1803              
1804             PERL_STATIC_INLINE unsigned
1805             Perl_single_1bit_pos64(U64 word)
1806             {
1807             /* Given a 64-bit word known to contain all zero bits except one 1 bit,
1808             * find and return the 1's position: 0..63 */
1809              
1810             # ifdef PERL_CORE /* macro not exported */
1811             ASSUME(isPOWER_OF_2(word));
1812             # else
1813             ASSUME(word && (word & (word-1)) == 0);
1814             # endif
1815              
1816             /* The only set bit is both the most and least significant bit. If we have
1817             * a fast way of finding either one, use that.
1818             *
1819             * It may appear at first glance that those functions call this one, but
1820             * they don't if the corresponding #define is set */
1821              
1822             # ifdef PERL_HAS_FAST_GET_MSB_POS64
1823              
1824             return msbit_pos64(word);
1825              
1826             # elif defined(PERL_HAS_FAST_GET_LSB_POS64)
1827              
1828             return lsbit_pos64(word);
1829              
1830             # else
1831              
1832             /* The position of the only set bit in a word can be quickly calculated
1833             * using deBruijn sequences. See for example
1834             * https://en.wikipedia.org/wiki/De_Bruijn_sequence */
1835             return PL_deBruijn_bitpos_tab64[(word * PERL_deBruijnMagic64_)
1836             >> PERL_deBruijnShift64_];
1837             # endif
1838              
1839             }
1840              
1841             #endif
1842              
1843             PERL_STATIC_INLINE unsigned
1844             Perl_single_1bit_pos32(U32 word)
1845             {
1846             /* Given a 32-bit word known to contain all zero bits except one 1 bit,
1847             * find and return the 1's position: 0..31 */
1848              
1849             #ifdef PERL_CORE /* macro not exported */
1850             ASSUME(isPOWER_OF_2(word));
1851             #else
1852             ASSUME(word && (word & (word-1)) == 0);
1853             #endif
1854             #ifdef PERL_HAS_FAST_GET_MSB_POS32
1855              
1856             return msbit_pos32(word);
1857              
1858             #elif defined(PERL_HAS_FAST_GET_LSB_POS32)
1859              
1860             return lsbit_pos32(word);
1861              
1862             #else
1863              
1864             return PL_deBruijn_bitpos_tab32[(word * PERL_deBruijnMagic32_)
1865             >> PERL_deBruijnShift32_];
1866             #endif
1867              
1868             }
1869              
1870             #ifndef EBCDIC
1871              
1872             PERL_STATIC_INLINE unsigned int
1873             Perl_variant_byte_number(PERL_UINTMAX_T word)
1874             {
1875             /* This returns the position in a word (0..7) of the first variant byte in
1876             * it. This is a helper function. Note that there are no branches */
1877              
1878             /* Get just the msb bits of each byte */
1879             word &= PERL_VARIANTS_WORD_MASK;
1880              
1881             /* This should only be called if we know there is a variant byte in the
1882             * word */
1883             assert(word);
1884              
1885             # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
1886              
1887             /* Bytes are stored like
1888             * Byte8 ... Byte2 Byte1
1889             * 63..56...15...8 7...0
1890             * so getting the lsb of the whole modified word is getting the msb of the
1891             * first byte that has its msb set */
1892             word = lsbit_pos_uintmax_(word);
1893              
1894             /* Here, word contains the position 7,15,23,...55,63 of that bit. Convert
1895             * to 0..7 */
1896             return (unsigned int) ((word + 1) >> 3) - 1;
1897              
1898             # elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
1899              
1900             /* Bytes are stored like
1901             * Byte1 Byte2 ... Byte8
1902             * 63..56 55..47 ... 7...0
1903             * so getting the msb of the whole modified word is getting the msb of the
1904             * first byte that has its msb set */
1905             word = msbit_pos_uintmax_(word);
1906              
1907             /* Here, word contains the position 63,55,...,23,15,7 of that bit. Convert
1908             * to 0..7 */
1909             word = ((word + 1) >> 3) - 1;
1910              
1911             /* And invert the result because of the reversed byte order on this
1912             * platform */
1913             word = CHARBITS - word - 1;
1914              
1915             return (unsigned int) word;
1916              
1917             # else
1918             # error Unexpected byte order
1919             # endif
1920              
1921             }
1922              
1923             #endif
1924             #if defined(PERL_CORE) || defined(PERL_EXT)
1925              
1926             /*
1927             =for apidoc variant_under_utf8_count
1928              
1929             This function looks at the sequence of bytes between C and C, which are
1930             assumed to be encoded in ASCII/Latin1, and returns how many of them would
1931             change should the string be translated into UTF-8. Due to the nature of UTF-8,
1932             each of these would occupy two bytes instead of the single one in the input
1933             string. Thus, this function returns the precise number of bytes the string
1934             would expand by when translated to UTF-8.
1935              
1936             Unlike most of the other functions that have C in their name, the input
1937             to this function is NOT a UTF-8-encoded string. The function name is slightly
1938             I to emphasize this.
1939              
1940             This function is internal to Perl because khw thinks that any XS code that
1941             would want this is probably operating too close to the internals. Presenting a
1942             valid use case could change that.
1943              
1944             See also
1945             C>
1946             and
1947             C>,
1948              
1949             =cut
1950              
1951             */
1952              
1953             PERL_STATIC_INLINE Size_t
1954             S_variant_under_utf8_count(const U8* const s, const U8* const e)
1955             {
1956             const U8* x = s;
1957             Size_t count = 0;
1958              
1959             PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
1960              
1961             # ifndef EBCDIC
1962              
1963             /* Test if the string is long enough to use word-at-a-time. (Logic is the
1964             * same as for is_utf8_invariant_string()) */
1965             if ((STRLEN) (e - x) >= PERL_WORDSIZE
1966             + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
1967             - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
1968             {
1969              
1970             /* Process per-byte until reach word boundary. XXX This loop could be
1971             * eliminated if we knew that this platform had fast unaligned reads */
1972             while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
1973             count += ! UTF8_IS_INVARIANT(*x++);
1974             }
1975              
1976             /* Process per-word as long as we have at least a full word left */
1977             do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
1978             explanation of how this works */
1979             PERL_UINTMAX_T increment
1980             = ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
1981             * PERL_COUNT_MULTIPLIER)
1982             >> ((PERL_WORDSIZE - 1) * CHARBITS);
1983             count += (Size_t) increment;
1984             x += PERL_WORDSIZE;
1985             } while (x + PERL_WORDSIZE <= e);
1986             }
1987              
1988             # endif
1989              
1990             /* Process per-byte */
1991             while (x < e) {
1992             if (! UTF8_IS_INVARIANT(*x)) {
1993             count++;
1994             }
1995              
1996             x++;
1997             }
1998              
1999             return count;
2000             }
2001              
2002             #endif
2003              
2004             /* Keep these around for these files */
2005             #if ! defined(PERL_IN_REGEXEC_C) && ! defined(PERL_IN_UTF8_C)
2006             # undef PERL_WORDSIZE
2007             # undef PERL_COUNT_MULTIPLIER
2008             # undef PERL_WORD_BOUNDARY_MASK
2009             # undef PERL_VARIANTS_WORD_MASK
2010             #endif
2011              
2012             #define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
2013              
2014             #if defined(PERL_CORE) || defined (PERL_EXT)
2015              
2016             /*
2017             =for apidoc is_utf8_non_invariant_string
2018              
2019             Returns TRUE if L returns FALSE for the first
2020             C bytes of the string C, but they are, nonetheless, legal Perl-extended
2021             UTF-8; otherwise returns FALSE.
2022              
2023             A TRUE return means that at least one code point represented by the sequence
2024             either is a wide character not representable as a single byte, or the
2025             representation differs depending on whether the sequence is encoded in UTF-8 or
2026             not.
2027              
2028             See also C>.
2029              
2030             =cut
2031              
2032             This is commonly used to determine if a SV's UTF-8 flag should be turned on.
2033             It generally needn't be if its string is entirely UTF-8 invariant, and it
2034             shouldn't be if it otherwise contains invalid UTF-8.
2035              
2036             It is an internal function because khw thinks that XS code shouldn't be working
2037             at this low a level. A valid use case could change that.
2038              
2039             */
2040              
2041             PERL_STATIC_INLINE bool
2042             Perl_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
2043             {
2044             const U8 * first_variant;
2045              
2046             PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
2047              
2048             if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
2049             return FALSE;
2050             }
2051              
2052             return is_utf8_string(first_variant, len - (first_variant - s));
2053             }
2054              
2055             #endif
2056              
2057             /*
2058             =for apidoc is_utf8_string
2059             =for apidoc_item is_utf8_string_loc
2060             =for apidoc_item is_utf8_string_loclen
2061             =for apidoc_item is_strict_utf8_string
2062             =for apidoc_item is_strict_utf8_string_loc
2063             =for apidoc_item is_strict_utf8_string_loclen
2064             =for apidoc_item is_c9strict_utf8_string
2065             =for apidoc_item is_c9strict_utf8_string_loc
2066             =for apidoc_item is_c9strict_utf8_string_loclen
2067             =for apidoc_item is_utf8_string_flags
2068             =for apidoc_item is_utf8_string_loc_flags
2069             =for apidoc_item is_utf8_string_loclen_flags
2070              
2071             These each return TRUE if the first C bytes of string C form a valid
2072             UTF-8 string for varying degrees of strictness, FALSE otherwise. If C is
2073             0, it will be calculated using C (which means if you use this
2074             option, that C can't have embedded C characters and has to have a
2075             terminating C byte). Note that all characters being ASCII constitute 'a
2076             valid UTF-8 string'.
2077              
2078             Some of the functions also return information about the string. Those that
2079             have the suffix C<_loc> in their names have an extra parameter, C. If that
2080             is not NULL, the function stores into it the location of how far it got in
2081             parsing C. If the function is returning TRUE, this will be a pointer to the
2082             byte immediately after the end of C. If FALSE, it will be the location of
2083             the first byte that fails the criteria.
2084              
2085             The functions that instead have the suffix C<_loclen> have a second extra
2086             parameter, C. They act as the plain C<_loc> functions do with their C
2087             parameter, but if C is not null, the functions store into it the number of
2088             UTF-8 encoded characters found at the point where parsing stopped. If the
2089             function is returning TRUE, this will be the full count of the UTF-8 characters
2090             in C; if FALSE, it will be the count before the first invalid one.
2091              
2092             C (and C and C)
2093             consider Perl's extended UTF-8 to be valid. That means that
2094             code points above Unicode, surrogates, and non-character code points are
2095             all considered valid by this function. Problems may arise in interchange with
2096             non-Perl applications, or (unlikely) between machines with different word
2097             sizes.
2098              
2099             C (and C and
2100             C) consider only Unicode-range (0 to 0x10FFFF)
2101             code points to be valid, with the surrogates and non-character code points
2102             invalid. This level of strictness is what is safe to accept from outside
2103             sources that use Unicode rules.
2104              
2105             The forms whose names contain C conform to the level of strictness
2106             given in
2107             L.
2108             This means Unicode-range code points including non-character ones are
2109             considered valid, but not the surrogates. This level of strictness is
2110             considered safe for cooperating components that know how the other components
2111             handle non-character code points.
2112              
2113             The forms whose names contain C<_flags> allow you to customize the acceptable
2114             level of strictness. They have an extra parameter, C to indicate the
2115             types of code points that are acceptable. If C is 0, they give the
2116             same results as C> (and kin); if C is
2117             C, they give the same results as
2118             C> (and kin); and if C is
2119             C, they give the same results as
2120             C> (and kin). Otherwise C may be any
2121             combination of the C> flags understood by
2122             C>, with the same meanings.
2123              
2124             It's better to use one of the non-C<_flags> functions if they give you the
2125             desired strictness, as those have a better chance of being inlined by the C
2126             compiler.
2127              
2128             See also
2129             C>,
2130             C>,
2131              
2132             =cut
2133             */
2134              
2135             #define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
2136              
2137             #define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
2138              
2139             PERL_STATIC_INLINE bool
2140             Perl_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
2141             {
2142             const U8 * first_variant;
2143              
2144             PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
2145             assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE));
2146              
2147             if (len == 0) {
2148             len = strlen((const char *)s);
2149             }
2150              
2151             if (flags == 0) {
2152             return is_utf8_string(s, len);
2153             }
2154              
2155             if ((flags & UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
2156             == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
2157             {
2158             return is_strict_utf8_string(s, len);
2159             }
2160              
2161             if ((flags & UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
2162             == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
2163             {
2164             return is_c9strict_utf8_string(s, len);
2165             }
2166              
2167             if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
2168             const U8* const send = s + len;
2169             const U8* x = first_variant;
2170              
2171             while (x < send) {
2172             STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
2173             if (UNLIKELY(! cur_len)) {
2174             return FALSE;
2175             }
2176             x += cur_len;
2177             }
2178             }
2179              
2180             return TRUE;
2181             }
2182              
2183             #define Perl_is_utf8_string_loc(s, len, ep) \
2184             Perl_is_utf8_string_loclen(s, len, ep, 0)
2185              
2186             PERL_STATIC_INLINE bool
2187             Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
2188             {
2189             const U8 * first_variant;
2190              
2191             PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
2192              
2193             if (len == 0) {
2194             len = strlen((const char *) s);
2195             }
2196              
2197             if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
2198             if (el)
2199             *el = len;
2200              
2201             if (ep) {
2202             *ep = s + len;
2203             }
2204              
2205             return TRUE;
2206             }
2207              
2208             {
2209             const U8* const send = s + len;
2210             const U8* x = first_variant;
2211             STRLEN outlen = first_variant - s;
2212              
2213             while (x < send) {
2214             const STRLEN cur_len = isUTF8_CHAR(x, send);
2215             if (UNLIKELY(! cur_len)) {
2216             break;
2217             }
2218             x += cur_len;
2219             outlen++;
2220             }
2221              
2222             if (el)
2223             *el = outlen;
2224              
2225             if (ep) {
2226             *ep = x;
2227             }
2228              
2229             return (x == send);
2230             }
2231             }
2232              
2233             /* The perl core arranges to never call the DFA below without there being at
2234             * least one byte available to look at. This allows the DFA to use a do {}
2235             * while loop which means that calling it with a UTF-8 invariant has a single
2236             * conditional, same as the calling code checking for invariance ahead of time.
2237             * And having the calling code remove that conditional speeds up by that
2238             * conditional, the case where it wasn't invariant. So there's no reason to
2239             * check before calling this.
2240             *
2241             * But we don't know this for non-core calls, so have to retain the check for
2242             * them. */
2243             #ifdef PERL_CORE
2244             # define PERL_NON_CORE_CHECK_EMPTY(s,e) assert((e) > (s))
2245             #else
2246             # define PERL_NON_CORE_CHECK_EMPTY(s,e) if ((e) <= (s)) return FALSE
2247             #endif
2248              
2249             /*
2250             * DFA for checking input is valid UTF-8 syntax.
2251             *
2252             * This uses adaptations of the table and algorithm given in
2253             * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides comprehensive
2254             * documentation of the original version. A copyright notice for the original
2255             * version is given at the beginning of this file. The Perl adaptations are
2256             * documented at the definition of PL_extended_utf8_dfa_tab[].
2257             *
2258             * This dfa is fast. There are three exit conditions:
2259             * 1) a well-formed code point, acceptable to the table
2260             * 2) the beginning bytes of an incomplete character, whose completion might
2261             * or might not be acceptable
2262             * 3) unacceptable to the table. Some of the adaptations have certain,
2263             * hopefully less likely to occur, legal inputs be unacceptable to the
2264             * table, so these must be sorted out afterwards.
2265             *
2266             * This macro is a complete implementation of the code executing the DFA. It
2267             * is passed the input sequence bounds and the table to use, and what to do
2268             * for each of the exit conditions. There are three canned actions, likely to
2269             * be the ones you want:
2270             * DFA_RETURN_SUCCESS_
2271             * DFA_RETURN_FAILURE_
2272             * DFA_GOTO_TEASE_APART_FF_
2273             *
2274             * You pass a parameter giving the action to take for each of the three
2275             * possible exit conditions:
2276             *
2277             * 'accept_action' This is executed when the DFA accepts the input.
2278             * DFA_RETURN_SUCCESS_ is the most likely candidate.
2279             * 'reject_action' This is executed when the DFA rejects the input.
2280             * DFA_RETURN_FAILURE_ is a candidate, or 'goto label' where
2281             * you have written code to distinguish the rejecting state
2282             * results. Because it happens in several places, and
2283             * involves #ifdefs, the special action
2284             * DFA_GOTO_TEASE_APART_FF_ is what you want with
2285             * PL_extended_utf8_dfa_tab. On platforms without
2286             * EXTRA_LONG_UTF8, there is no need to tease anything apart,
2287             * so this evaluates to DFA_RETURN_FAILURE_; otherwise you
2288             * need to have a label 'tease_apart_FF' that it will transfer
2289             * to.
2290             * 'incomplete_char_action' This is executed when the DFA ran off the end
2291             * before accepting or rejecting the input.
2292             * DFA_RETURN_FAILURE_ is the likely action, but you could
2293             * have a 'goto', or NOOP. In the latter case the DFA drops
2294             * off the end, and you place your code to handle this case
2295             * immediately after it.
2296             */
2297              
2298             #define DFA_RETURN_SUCCESS_ return (s8dfa_ - s0)
2299             #define DFA_RETURN_FAILURE_ return 0
2300             #ifdef HAS_EXTRA_LONG_UTF8
2301             # define DFA_TEASE_APART_FF_ goto tease_apart_FF
2302             #else
2303             # define DFA_TEASE_APART_FF_ DFA_RETURN_FAILURE_
2304             #endif
2305              
2306             #define PERL_IS_UTF8_CHAR_DFA(s0, e, dfa_tab, \
2307             accept_action, \
2308             reject_action, \
2309             incomplete_char_action) \
2310             STMT_START { \
2311             const U8 * s8dfa_ = s0; \
2312             const U8 * const e8dfa_ = e; \
2313             PERL_UINT_FAST16_T state = 0; \
2314             \
2315             PERL_NON_CORE_CHECK_EMPTY(s8dfa_, e8dfa_); \
2316             \
2317             do { \
2318             state = dfa_tab[256 + state + dfa_tab[*s8dfa_]]; \
2319             } while (++s8dfa_ < e8dfa_ && state > 1); \
2320             \
2321             if (LIKELY(state == 0)) { /* Accepting state */ \
2322             accept_action; \
2323             } \
2324             \
2325             if (state == 1) { /* Rejecting state */ \
2326             reject_action; \
2327             } \
2328             \
2329             /* Here, dropped out of loop before end-of-char */ \
2330             incomplete_char_action; \
2331             } STMT_END
2332              
2333              
2334             /*
2335              
2336             =for apidoc isUTF8_CHAR
2337             =for apidoc_item isSTRICT_UTF8_CHAR
2338             =for apidoc_item isC9_STRICT_UTF8_CHAR
2339             =for apidoc_item isUTF8_CHAR_flags
2340             =for apidoc_item is_utf8_char_buf
2341              
2342             These each evaluate to non-zero if the first few bytes of the string starting
2343             at C and looking no further than S> are well-formed UTF-8 that
2344             represents some code point, for varying degrees of strictness. Otherwise they
2345             evaluate to 0. If non-zero, the value gives how many bytes starting at C
2346             comprise the code point's representation. Any bytes remaining before C, but
2347             beyond the ones needed to form the first code point in C, are not examined.
2348              
2349             These are used to efficiently decide if the next few bytes in C are
2350             legal UTF-8 for a single character.
2351              
2352             With C, the code point can be any that will fit in an IV on this
2353             machine, using Perl's extension to official UTF-8 to represent those higher
2354             than the Unicode maximum of 0x10FFFF. That means that this will consider byte
2355             sequences to be valid that are unrecognized or considered illegal by non-Perl
2356             applications.
2357              
2358             With C>, acceptable code points are restricted to those
2359             defined by Unicode to be fully interchangeable across applications.
2360             This means code points above the Unicode range (max legal is 0x10FFFF),
2361             surrogates, and non-character code points are rejected.
2362              
2363             With C>, acceptable code points are restricted to
2364             those defined by Unicode to be fully interchangeable within an application.
2365             This means code points above the Unicode range and surrogates are rejected, but
2366             non-character code points are accepted. See L
2367             #9|http://www.unicode.org/versions/corrigendum9.html>.
2368              
2369             Use C> to customize what code points are acceptable.
2370             If C is 0, this gives the same results as C>;
2371             if C is C, this gives the same results
2372             as C>;
2373             and if C is C, this gives
2374             the same results as C>.
2375             Otherwise C may be any combination of the C> flags
2376             understood by C>, with the same meanings.
2377              
2378             The three alternative macros are for the most commonly needed validations; they
2379             are likely to run somewhat faster than this more general one, as they can be
2380             inlined into your code.
2381              
2382             Use one of the C> forms to check entire strings.
2383              
2384             Note also that a UTF-8 "invariant" character (i.e. ASCII on non-EBCDIC
2385             machines) is a valid UTF-8 character.
2386              
2387             C is the old name for C. Do not use it in new
2388             code.
2389              
2390             =cut
2391              
2392             All the functions except isUTF8_CHAR_flags) use adaptations of the table and
2393             algorithm given in https://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which
2394             provides comprehensive documentation of the original version. A copyright
2395             notice for the original version is given at the beginning of this file.
2396              
2397             The Perl adaptation for isUTF8_CHAR is documented at the definition of
2398             PL_extended_utf8_dfa_tab[].
2399              
2400             The Perl adaptation for isSTRICT_UTF8_CHAR is documented at the definition of
2401             PL_strict_utf8_dfa_tab[];
2402              
2403             The Perl adaptation for isC9_STRICT_UTF8_CHAR is documented at the definition
2404             of PL_c9_utf8_dfa_tab[].
2405              
2406             */
2407              
2408             PERL_STATIC_INLINE Size_t
2409             Perl_isSTRICT_UTF8_CHAR(const U8 * const s0, const U8 * const e)
2410             {
2411             PERL_ARGS_ASSERT_ISSTRICT_UTF8_CHAR;
2412              
2413             PERL_IS_UTF8_CHAR_DFA(s0, e, PL_strict_utf8_dfa_tab,
2414             DFA_RETURN_SUCCESS_,
2415             goto check_hanguls,
2416             DFA_RETURN_FAILURE_);
2417             check_hanguls:
2418              
2419             /* Here, we didn't return success, but dropped out of the loop. In the
2420             * case of PL_strict_utf8_dfa_tab, this means the input is either
2421             * malformed, or was for certain Hanguls; handle them specially */
2422              
2423             /* The dfa above drops out for incomplete or illegal inputs, and certain
2424             * legal Hanguls; check and return accordingly */
2425             return is_HANGUL_ED_utf8_safe(s0, e);
2426             }
2427              
2428             PERL_STATIC_INLINE Size_t
2429             Perl_isUTF8_CHAR(const U8 * const s0, const U8 * const e)
2430             {
2431             PERL_ARGS_ASSERT_ISUTF8_CHAR;
2432              
2433             PERL_IS_UTF8_CHAR_DFA(s0, e, PL_extended_utf8_dfa_tab,
2434             DFA_RETURN_SUCCESS_,
2435             DFA_TEASE_APART_FF_,
2436             DFA_RETURN_FAILURE_);
2437              
2438             /* Here, we didn't return success, but dropped out of the loop. In the
2439             * case of PL_extended_utf8_dfa_tab, this means the input is either
2440             * malformed, or the start byte was FF on a platform that the dfa doesn't
2441             * handle FF's. Call a helper function. */
2442              
2443             #ifdef HAS_EXTRA_LONG_UTF8
2444              
2445             tease_apart_FF:
2446              
2447             /* In the case of PL_extended_utf8_dfa_tab, getting here means the input is
2448             * either malformed, or was for the largest possible start byte, which we
2449             * now check, not inline */
2450             if (*s0 != I8_TO_NATIVE_UTF8(0xFF)) {
2451             return 0;
2452             }
2453              
2454             return is_utf8_FF_helper_(s0, e,
2455             FALSE /* require full, not partial char */
2456             );
2457             #endif
2458              
2459             }
2460              
2461             PERL_STATIC_INLINE Size_t
2462             Perl_isC9_STRICT_UTF8_CHAR(const U8 * const s0, const U8 * const e)
2463             {
2464             PERL_ARGS_ASSERT_ISC9_STRICT_UTF8_CHAR;
2465              
2466             PERL_IS_UTF8_CHAR_DFA(s0, e, PL_c9_utf8_dfa_tab,
2467             DFA_RETURN_SUCCESS_,
2468             DFA_RETURN_FAILURE_,
2469             DFA_RETURN_FAILURE_);
2470             }
2471              
2472             #define is_strict_utf8_string_loc(s, len, ep) \
2473             is_strict_utf8_string_loclen(s, len, ep, 0)
2474              
2475             PERL_STATIC_INLINE bool
2476             Perl_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
2477             {
2478             const U8 * first_variant;
2479              
2480             PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
2481              
2482             if (len == 0) {
2483             len = strlen((const char *) s);
2484             }
2485              
2486             if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
2487             if (el)
2488             *el = len;
2489              
2490             if (ep) {
2491             *ep = s + len;
2492             }
2493              
2494             return TRUE;
2495             }
2496              
2497             {
2498             const U8* const send = s + len;
2499             const U8* x = first_variant;
2500             STRLEN outlen = first_variant - s;
2501              
2502             while (x < send) {
2503             const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
2504             if (UNLIKELY(! cur_len)) {
2505             break;
2506             }
2507             x += cur_len;
2508             outlen++;
2509             }
2510              
2511             if (el)
2512             *el = outlen;
2513              
2514             if (ep) {
2515             *ep = x;
2516             }
2517              
2518             return (x == send);
2519             }
2520             }
2521              
2522             #define is_c9strict_utf8_string_loc(s, len, ep) \
2523             is_c9strict_utf8_string_loclen(s, len, ep, 0)
2524              
2525             PERL_STATIC_INLINE bool
2526             Perl_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
2527             {
2528             const U8 * first_variant;
2529              
2530             PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
2531              
2532             if (len == 0) {
2533             len = strlen((const char *) s);
2534             }
2535              
2536             if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
2537             if (el)
2538             *el = len;
2539              
2540             if (ep) {
2541             *ep = s + len;
2542             }
2543              
2544             return TRUE;
2545             }
2546              
2547             {
2548             const U8* const send = s + len;
2549             const U8* x = first_variant;
2550             STRLEN outlen = first_variant - s;
2551              
2552             while (x < send) {
2553             const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
2554             if (UNLIKELY(! cur_len)) {
2555             break;
2556             }
2557             x += cur_len;
2558             outlen++;
2559             }
2560              
2561             if (el)
2562             *el = outlen;
2563              
2564             if (ep) {
2565             *ep = x;
2566             }
2567              
2568             return (x == send);
2569             }
2570             }
2571              
2572             #define is_utf8_string_loc_flags(s, len, ep, flags) \
2573             is_utf8_string_loclen_flags(s, len, ep, 0, flags)
2574              
2575              
2576             /* The above 3 actual functions could have been moved into the more general one
2577             * just below, and made #defines that call it with the right 'flags'. They are
2578             * currently kept separate to increase their chances of getting inlined */
2579              
2580             PERL_STATIC_INLINE bool
2581             Perl_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
2582             {
2583             const U8 * first_variant;
2584              
2585             PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
2586             assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE));
2587              
2588             if (flags == 0) {
2589             return is_utf8_string_loclen(s, len, ep, el);
2590             }
2591              
2592             if ((flags & UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
2593             == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
2594             {
2595             return is_strict_utf8_string_loclen(s, len, ep, el);
2596             }
2597              
2598             if ((flags & UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
2599             == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
2600             {
2601             return is_c9strict_utf8_string_loclen(s, len, ep, el);
2602             }
2603              
2604             if (len == 0) {
2605             len = strlen((const char *) s);
2606             }
2607              
2608             if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
2609             if (el)
2610             *el = len;
2611              
2612             if (ep) {
2613             *ep = s + len;
2614             }
2615              
2616             return TRUE;
2617             }
2618              
2619             {
2620             const U8* send = s + len;
2621             const U8* x = first_variant;
2622             STRLEN outlen = first_variant - s;
2623              
2624             while (x < send) {
2625             const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
2626             if (UNLIKELY(! cur_len)) {
2627             break;
2628             }
2629             x += cur_len;
2630             outlen++;
2631             }
2632              
2633             if (el)
2634             *el = outlen;
2635              
2636             if (ep) {
2637             *ep = x;
2638             }
2639              
2640             return (x == send);
2641             }
2642             }
2643              
2644             /*
2645             =for apidoc utf8_distance
2646              
2647             Returns the number of UTF-8 characters between the UTF-8 pointers C
2648             and C.
2649              
2650             WARNING: use only if you *know* that the pointers point inside the
2651             same UTF-8 buffer.
2652              
2653             =cut
2654             */
2655              
2656             PERL_STATIC_INLINE IV
2657 34           Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
2658             {
2659             PERL_ARGS_ASSERT_UTF8_DISTANCE;
2660              
2661 34 50         return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
2662             }
2663              
2664             /*
2665             =for apidoc utf8_hop
2666              
2667             Return the UTF-8 pointer C displaced by C characters, either
2668             forward (if C is positive) or backward (if negative). C does not need
2669             to be pointing to the starting byte of a character. If it isn't, one count of
2670             C will be used up to get to the start of the next character for forward
2671             hops, and to the start of the current character for negative ones.
2672              
2673             WARNING: Prefer L to this one.
2674              
2675             Do NOT use this function unless you B C is within
2676             the UTF-8 data pointed to by C B that on entry C is aligned
2677             on the first byte of a character or just after the last byte of a character.
2678              
2679             =cut
2680             */
2681              
2682             PERL_STATIC_INLINE U8 *
2683 0           Perl_utf8_hop(const U8 *s, SSize_t off)
2684             {
2685             PERL_ARGS_ASSERT_UTF8_HOP;
2686              
2687             /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
2688             * the XXX bitops (especially ~) can create illegal UTF-8.
2689             * In other words: in Perl UTF-8 is not just for Unicode. */
2690              
2691 0 0         if (off > 0) {
2692              
2693             /* Get to next non-continuation byte */
2694 0 0         if (UNLIKELY(UTF8_IS_CONTINUATION(*s))) {
2695             do {
2696 0           s++;
2697             }
2698 0 0         while (UTF8_IS_CONTINUATION(*s));
2699 0           off--;
2700             }
2701              
2702 0 0         while (off--)
2703 0           s += UTF8SKIP(s);
2704             }
2705             else {
2706 0 0         while (off++) {
2707 0           s--;
2708 0 0         while (UTF8_IS_CONTINUATION(*s))
2709 0           s--;
2710             }
2711             }
2712              
2713             GCC_DIAG_IGNORE(-Wcast-qual)
2714 0           return (U8 *)s;
2715             GCC_DIAG_RESTORE
2716             }
2717              
2718             /*
2719             =for apidoc utf8_hop_forward
2720             =for apidoc_item utf8_hop_forward_overshoot
2721              
2722             These each take as input a position, C, into a string encoded as UTF-8
2723             which ends at the byte before C, and return the position within it that is
2724             C displaced by up to C characters forwards.
2725              
2726             If there are fewer than C characters between C and C, the
2727             functions return C.
2728              
2729             The functions differ in two ways
2730              
2731             =over 4
2732              
2733             =item *
2734              
2735             C can return how many characters beyond the edge
2736             the request was for. When its parameter, C<&remaining>, is not NULL, the
2737             function stores into it the count of the excess; zero if the request was
2738             completely fulfilled. The actual number of characters that were displaced can
2739             then be calculated as S>.
2740              
2741             =item *
2742              
2743             C will panic if called with C already positioned at or
2744             beyond the edge of the string ending at C and the request is to go even
2745             further over the edge. C presumes the caller will
2746             handle any errors, and just stores C into C without doing
2747             anything else.
2748              
2749             =back
2750              
2751             (The above contains a slight lie. When C is NULL, the two functions
2752             act identically.)
2753              
2754             C does not need to be pointing to the starting byte of a character. If it
2755             isn't, one count of C will be used up to get to that start.
2756              
2757             C must be non-negative, and if zero, no action is taken; C is returned
2758             unchanged.
2759              
2760             =cut
2761             */
2762             # define Perl_utf8_hop_forward( s, off, end) \
2763             Perl_utf8_hop_forward_overshoot(s, off, end, NULL)
2764              
2765             PERL_STATIC_INLINE U8 *
2766             Perl_utf8_hop_forward_overshoot(const U8 * s, SSize_t off,
2767             const U8 * const end, SSize_t *remaining)
2768             {
2769             PERL_ARGS_ASSERT_UTF8_HOP_FORWARD_OVERSHOOT;
2770             assert(off >= 0);
2771              
2772             if (off != 0) {
2773             if (UNLIKELY(s >= end && ! remaining)) {
2774             Perl_croak_nocontext("panic: Start of forward hop (0x%p) is %zd"
2775             " bytes beyond legal end position (0x%p)",
2776             s, 1 + s - end, end);
2777             }
2778              
2779             if (UNLIKELY(UTF8_IS_CONTINUATION(*s))) {
2780             do { /* Get to next non-continuation byte */
2781             if (! UTF8_IS_CONTINUATION(*s)) {
2782             off--;
2783             break;
2784             }
2785             s++;
2786             } while (s < end);
2787             }
2788              
2789             while (off > 0 && s < end) {
2790             STRLEN skip = UTF8SKIP(s);
2791              
2792             /* Quit without counting this character if it overshoots the edge.
2793             * */
2794             if ((STRLEN)(end - s) < skip) {
2795             s = end;
2796             break;
2797             }
2798              
2799             s += skip;
2800             off--;
2801             }
2802             }
2803              
2804             if (remaining) {
2805             *remaining = off;
2806             }
2807              
2808             GCC_DIAG_IGNORE(-Wcast-qual)
2809             return (U8 *)s;
2810             GCC_DIAG_RESTORE
2811             }
2812              
2813             /*
2814             =for apidoc utf8_hop_back
2815             =for apidoc_item utf8_hop_back_overshoot
2816              
2817             These each take as input a string encoded as UTF-8 which starts at C,
2818             and a position into it given by C, and return the position within it that is
2819             C displaced by up to C characters backwards.
2820              
2821             If there are fewer than C characters between C and C, the
2822             functions return C.
2823              
2824             The functions differ in that C can return how many
2825             characters C beyond the edge the request was for. When its parameter,
2826             C<&remaining>, is not NULL, the function stores into it the count of the
2827             excess; zero if the request was completely fulfilled. The actual number of
2828             characters that were displaced can then be calculated as S>.
2829             This function acts identically to plain C when this parameter is
2830             NULL.
2831              
2832             C does not need to be pointing to the starting byte of a character. If it
2833             isn't, one count of C will be used up to get to that start.
2834              
2835             C must be non-positive, and if zero, no action is taken; C is returned
2836             unchanged. That it otherwise must be negative means that the earlier
2837             description is a lie, to avoid burdening you with this detail too soon. An
2838             C of C<-2> means to displace two characters backwards, so the displacement
2839             is actually the absolute value of C. C will also be
2840             non-positive. If there was only one character between C and C, and a
2841             displacement of C<-2> was requested, C would be set to C<-1>. The
2842             subtraction formula works, yielding the result that only C<-1> character was
2843             displaced.
2844              
2845             =cut
2846             */
2847              
2848             # define Perl_utf8_hop_back( s, off, start) \
2849             Perl_utf8_hop_back_overshoot(s, off, start, NULL)
2850              
2851             PERL_STATIC_INLINE U8 *
2852             Perl_utf8_hop_back_overshoot(const U8 *s, SSize_t off,
2853             const U8 * const start, SSize_t *remaining)
2854             {
2855             PERL_ARGS_ASSERT_UTF8_HOP_BACK_OVERSHOOT;
2856             assert(start <= s);
2857             assert(off <= 0);
2858              
2859             /* Note: if we know that the input is well-formed, we can do per-word
2860             * hop-back. Commit d6ad3b72778369a84a215b498d8d60d5b03aa1af implemented
2861             * that. But it was reverted because doing per-word has some
2862             * start-up/tear-down overhead, so only makes sense if the distance to be
2863             * moved is large, and core perl doesn't currently move more than a few
2864             * characters at a time. You can reinstate it if it does become
2865             * advantageous. */
2866             while (off < 0 && s > start) {
2867             do { /* Find the beginning of this character */
2868             s--;
2869             if (! UTF8_IS_CONTINUATION(*s)) {
2870             off++;
2871             break;
2872             }
2873             } while (s > start);
2874             }
2875              
2876             if (remaining) {
2877             *remaining = off;
2878             }
2879              
2880             GCC_DIAG_IGNORE(-Wcast-qual)
2881             return (U8 *)s;
2882             GCC_DIAG_RESTORE
2883             }
2884              
2885             /*
2886             =for apidoc utf8_hop_safe
2887             =for apidoc_item utf8_hop_overshoot
2888              
2889             These each take as input a string encoded as UTF-8 which starts at C,
2890             ending at C, and a position into it given by C, and return the
2891             position within it that is C displaced by up to C characters, either
2892             forwards if C is positive, or backwards if C is negative. (Nothing
2893             is done if C is 0.)
2894              
2895             If there are fewer than C characters between C and the respective edge,
2896             the functions return that edge.
2897              
2898             The functions differ in that C can return how many
2899             characters beyond the edge the request was for. When its parameter,
2900             C<&remaining>, is not NULL, the function stores into it the count of the
2901             excess; zero if the request was completely fulfilled. The actual number of
2902             characters that were displaced can then be calculated as S>.
2903             This function acts identically to plain C when this parameter is
2904             NULL.
2905              
2906             C does not need to be pointing to the starting byte of a character. If it
2907             isn't, one count of C will be used up to get to that start.
2908              
2909             To be more precise, the displacement is by the absolute value of C, and
2910             the excess count is the absolute value of C.
2911              
2912             =cut
2913             */
2914              
2915             #define Perl_utf8_hop_safe(s, o, b, e) Perl_utf8_hop_overshoot(s, o, b, e, 0)
2916              
2917             PERL_STATIC_INLINE U8 *
2918             Perl_utf8_hop_overshoot(const U8 *s, SSize_t off,
2919             const U8 * const start, const U8 * const end,
2920             SSize_t * remaining)
2921             {
2922             PERL_ARGS_ASSERT_UTF8_HOP_OVERSHOOT;
2923              
2924             assert(start <= s && s <= end);
2925              
2926             if (off >= 0) {
2927             return utf8_hop_forward_overshoot(s, off, end, remaining);
2928             }
2929             else {
2930             return utf8_hop_back_overshoot(s, off, start, remaining);
2931             }
2932             }
2933              
2934             PERL_STATIC_INLINE STRLEN
2935             Perl_isUTF8_CHAR_flags(const U8 * const s0, const U8 * const e, const U32 flags)
2936             {
2937             PERL_ARGS_ASSERT_ISUTF8_CHAR_FLAGS;
2938             assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE));
2939              
2940             PERL_IS_UTF8_CHAR_DFA(s0, e, PL_extended_utf8_dfa_tab,
2941             goto check_success,
2942             DFA_TEASE_APART_FF_,
2943             DFA_RETURN_FAILURE_);
2944              
2945             check_success:
2946              
2947             return is_utf8_char_helper_(s0, e, flags);
2948              
2949             #ifdef HAS_EXTRA_LONG_UTF8
2950              
2951             tease_apart_FF:
2952              
2953             /* In the case of PL_extended_utf8_dfa_tab, getting here means the input is
2954             * either malformed, or was for the largest possible start byte, which
2955             * indicates perl extended UTF-8, well above the Unicode maximum */
2956             if ( *s0 != I8_TO_NATIVE_UTF8(0xFF)
2957             || (flags & (UTF8_DISALLOW_SUPER|UTF8_DISALLOW_PERL_EXTENDED)))
2958             {
2959             return 0;
2960             }
2961              
2962             /* Otherwise examine the sequence not inline */
2963             return is_utf8_FF_helper_(s0, e,
2964             FALSE /* require full, not partial char */
2965             );
2966             #endif
2967              
2968             }
2969              
2970             /*
2971              
2972             =for apidoc is_utf8_valid_partial_char
2973             =for apidoc_item is_utf8_valid_partial_char_flags
2974              
2975             These each return FALSE if the sequence of bytes starting at C and looking no
2976             further than S> is the UTF-8 encoding for one or more code points.
2977             That is, FALSE is returned if C points to at least one entire UTF-8 encoded
2978             character.
2979              
2980             Otherwise, they return TRUE if there exists at least one non-empty sequence of
2981             bytes that when appended to sequence C, starting at position C causes the
2982             entire sequence to be the well-formed UTF-8 of some code point
2983              
2984             In other words they return TRUE if C points to an incomplete UTF-8-encoded
2985             code point; FALSE otherwise.
2986              
2987             This is useful when a fixed-length buffer is being tested for being well-formed
2988             UTF-8, but the final few bytes in it don't comprise a full character; that is,
2989             it is split somewhere in the middle of the final code point's UTF-8
2990             representation. (Presumably when the buffer is refreshed with the next chunk
2991             of data, the new first bytes will complete the partial code point.) This
2992             function is used to verify that the final bytes in the current buffer are in
2993             fact the legal beginning of some code point, so that if they aren't, the
2994             failure can be signalled without having to wait for the next read.
2995              
2996             C behaves identically to
2997             C when the latter is called with a zero
2998             C parameter. This parameter is used to restrict the classes of code
2999             points that are considered to be valid. When zero, Perl's extended UTF-8 is
3000             used. Otherwise C can be any combination of the C>
3001             flags accepted by C>. If there is any sequence of bytes
3002             that can complete the input partial character in such a way that a
3003             non-prohibited character is formed, the function returns TRUE; otherwise FALSE.
3004             Non-character code points cannot be determined based on partial character
3005             input, so TRUE is always returned if C looks like it could be the beginning
3006             on one of those. But many of the other possible excluded types can be
3007             determined from just the first one or two bytes.
3008              
3009             =cut
3010             */
3011             #define is_utf8_valid_partial_char(s, e) \
3012             is_utf8_valid_partial_char_flags(s, e, 0)
3013              
3014             PERL_STATIC_INLINE bool
3015             Perl_is_utf8_valid_partial_char_flags(const U8 * const s0, const U8 * const e, const U32 flags)
3016             {
3017             PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
3018             assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE));
3019              
3020             PERL_IS_UTF8_CHAR_DFA(s0, e, PL_extended_utf8_dfa_tab,
3021             DFA_RETURN_FAILURE_,
3022             DFA_TEASE_APART_FF_,
3023             NOOP);
3024              
3025             /* The NOOP above causes the DFA to drop down here iff the input was a
3026             * partial character. flags=0 => can return TRUE immediately; otherwise we
3027             * need to check (not inline) if the partial character is the beginning of
3028             * a disallowed one */
3029             if (flags == 0) {
3030             return TRUE;
3031             }
3032              
3033             return cBOOL(is_utf8_char_helper_(s0, e, flags));
3034              
3035             #ifdef HAS_EXTRA_LONG_UTF8
3036              
3037             tease_apart_FF:
3038              
3039             /* Getting here means the input is either malformed, or, in the case of
3040             * PL_extended_utf8_dfa_tab, was for the largest possible start byte. The
3041             * latter case has to be extended UTF-8, so can fail immediately if that is
3042             * forbidden */
3043              
3044             if ( *s0 != I8_TO_NATIVE_UTF8(0xFF)
3045             || (flags & (UTF8_DISALLOW_SUPER|UTF8_DISALLOW_PERL_EXTENDED)))
3046             {
3047             return FALSE;
3048             }
3049              
3050             return is_utf8_FF_helper_(s0, e,
3051             TRUE /* Require to be a partial character */
3052             );
3053             #endif
3054              
3055             }
3056              
3057             /*
3058              
3059             =for apidoc is_utf8_fixed_width_buf_flags
3060             =for apidoc_item is_utf8_fixed_width_buf_loc_flags
3061             =for apidoc_item is_utf8_fixed_width_buf_loclen_flags
3062              
3063             These each return TRUE if the fixed-width buffer starting at C with length
3064             C is entirely valid UTF-8, subject to the restrictions given by C;
3065             otherwise they return FALSE.
3066              
3067             If C is 0, any well-formed UTF-8, as extended by Perl, is accepted
3068             without restriction. If the final few bytes of the buffer do not form a
3069             complete code point, this will return TRUE anyway, provided that
3070             C> returns TRUE for them.
3071              
3072             C can be zero or any combination of the C> flags
3073             accepted by C>, and with the same meanings.
3074              
3075             The functions differ from C> only in that the latter
3076             returns FALSE if the final few bytes of the string don't form a complete code
3077             point.
3078              
3079             C> does all the preceding, but takes an
3080             extra parameter, C into which it stores the location of the failure, if
3081             C is not NULL. If instead the function returns TRUE, C<*ep> will point to
3082             the beginning of any partial character at the end of the buffer; if there is no
3083             partial character C<*ep> will contain C+C.
3084              
3085             C> does all the preceding, but takes
3086             another extra parameter, C into which it stores the number of complete,
3087             valid characters found, if C is not NULL.
3088              
3089             =cut
3090             */
3091             #define is_utf8_fixed_width_buf_flags(s, len, flags) \
3092             is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
3093              
3094             #define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
3095             is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
3096              
3097             PERL_STATIC_INLINE bool
3098             Perl_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
3099             STRLEN len,
3100             const U8 **ep,
3101             STRLEN *el,
3102             const U32 flags)
3103             {
3104             const U8 * maybe_partial;
3105              
3106             PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
3107              
3108             if (! ep) {
3109             ep = &maybe_partial;
3110             }
3111              
3112             /* If it's entirely valid, return that; otherwise see if the only error is
3113             * that the final few bytes are for a partial character */
3114             return is_utf8_string_loclen_flags(s, len, ep, el, flags)
3115             || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
3116             }
3117              
3118             PERL_STATIC_INLINE bool
3119             Perl_utf8_to_uv_msgs(const U8 * const s0,
3120             const U8 * const e,
3121             UV * cp_p,
3122             Size_t *advance_p,
3123             U32 flags,
3124             U32 * errors,
3125             AV ** msgs)
3126             {
3127             PERL_ARGS_ASSERT_UTF8_TO_UV_MSGS;
3128              
3129             /* This is the inlined portion of utf8_to_uv_msgs. It handles the simple
3130             * cases, and, if necessary calls a helper function to deal with the more
3131             * complex ones. Almost all well-formed non-problematic code points are
3132             * considered simple, so that it's unlikely that the helper function will
3133             * need to be called. */
3134              
3135             /* Assume that isn't malformed; the vast majority of calls won't be */
3136             if (errors) {
3137             *errors = 0;
3138             }
3139             if (msgs) {
3140             *msgs = NULL;
3141             }
3142              
3143              
3144             /* No calls from core pass in an empty string; non-core need a check */
3145             #ifdef PERL_CORE
3146             assert(e > s0);
3147             #else
3148             if (LIKELY(e > s0))
3149             #endif
3150              
3151             {
3152             /* UTF-8 invariants are returned unchanged. The code below is quite
3153             * capable of handling this, but this shortcuts this very common case
3154             * */
3155             if (UTF8_IS_INVARIANT(*s0)) {
3156             if (advance_p) {
3157             *advance_p = 1;
3158             }
3159              
3160             *cp_p = *s0;
3161             return true;
3162             }
3163              
3164             const U8 * s = s0;
3165              
3166             /* This dfa is fast. If it accepts the input, it was for a
3167             * well-formed, non-problematic code point, which can be returned
3168             * immediately. Otherwise we call a helper function to figure out the
3169             * more complicated cases.
3170             *
3171             * It is an adaptation of the tables and algorithm given in
3172             * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides
3173             * comprehensive documentation of the original version. A copyright
3174             * notice for the original version is given at the beginning of this
3175             * file. The Perl adaptation is documented at the definition of
3176             * PL_strict_utf8_dfa_tab[].
3177             *
3178             * The terminology of the dfa refers to a 'class'. The variable 'type'
3179             * would have been named 'class' except that is a reserved word in C++
3180             *
3181             * The table can be a U16 on EBCDIC platforms, so 'state' is declared
3182             * as U16; 'type' is likely to never occupy more than 5 bits. */
3183             PERL_UINT_FAST8_T type = PL_strict_utf8_dfa_tab[*s];
3184             PERL_UINT_FAST16_T state = PL_strict_utf8_dfa_tab[256 + type];
3185             UV uv = (0xff >> type) & NATIVE_UTF8_TO_I8(*s);
3186              
3187             while (state > 1 && ++s < e) {
3188             type = PL_strict_utf8_dfa_tab[*s];
3189             state = PL_strict_utf8_dfa_tab[256 + state + type];
3190              
3191             uv = UTF8_ACCUMULATE(uv, *s);
3192             }
3193              
3194             if (LIKELY(state == 0)) {
3195             if (advance_p) {
3196             *advance_p = s - s0 + 1;
3197             }
3198              
3199             *cp_p = UNI_TO_NATIVE(uv);
3200             return true;
3201             }
3202             }
3203              
3204             /* Here is potentially problematic. Use the full mechanism */
3205             return utf8_to_uv_msgs_helper_(s0, e, cp_p, advance_p, flags, errors, msgs);
3206             }
3207              
3208             PERL_STATIC_INLINE UV
3209             Perl_utf8_to_uv_or_die(const U8 *s, const U8 *e, STRLEN *advance_p)
3210             {
3211             PERL_ARGS_ASSERT_UTF8_TO_UV_OR_DIE;
3212              
3213             UV cp;
3214             (void) utf8_to_uv_flags(s, e, &cp, advance_p, UTF8_DIE_IF_MALFORMED);
3215             return cp;
3216             }
3217              
3218             PERL_STATIC_INLINE UV
3219             Perl_utf8n_to_uvchr_msgs(const U8 * const s0,
3220             STRLEN curlen,
3221             STRLEN *retlen,
3222             U32 flags,
3223             U32 * errors,
3224             AV ** msgs)
3225             {
3226             PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_MSGS;
3227              
3228             UV cp;
3229             if (LIKELY(utf8_to_uv_msgs(s0, s0 + curlen, &cp, retlen, flags, errors,
3230             msgs)))
3231             {
3232             return cp;
3233             }
3234              
3235             if ((flags & UTF8_CHECK_ONLY) && retlen) {
3236             *retlen = ((STRLEN) -1);
3237             }
3238              
3239             return 0;
3240             }
3241              
3242              
3243             PERL_STATIC_INLINE UV
3244             Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
3245             {
3246             PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF;
3247              
3248             UV cp;
3249              
3250             /* When everything is legal, just return that; but when not:
3251             * 1) if warnings are enabled return 0 and retlen to -1
3252             * 2) if warnings are disabled, set 'flags' to accept any malformation,
3253             * but that will just cause the REPLACEMENT CHARACTER to be returned,
3254             * as the documentation indicates. EMPTY is not really allowed, and
3255             * asserts on debugging builds. But on non-debugging we have to deal
3256             * with it.
3257             * This API means 0 can mean a legal NUL, or the input is malformed; and
3258             * the caller has to know if warnings are disabled to know if it can rely on
3259             * 'retlen'. Best to use utf8_to_uv() instead */
3260             U32 flags = (ckWARN_d(WARN_UTF8)) ? 0 : (UTF8_ALLOW_ANY | UTF8_ALLOW_EMPTY);
3261              
3262             if ( LIKELY(utf8_to_uv_flags(s, send, &cp, retlen, flags))
3263             || flags)
3264             {
3265             return cp;
3266             }
3267              
3268             if (retlen) {
3269             *retlen = (STRLEN) -1;
3270             }
3271              
3272             return 0;
3273             }
3274              
3275             PERL_STATIC_INLINE U8 *
3276             Perl_uv_to_utf8(pTHX_ U8 *d, UV uv)
3277             {
3278             return uv_to_utf8_msgs(d, uv, 0, 0);
3279             }
3280              
3281             PERL_STATIC_INLINE U8 *
3282             Perl_uv_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
3283             {
3284             return uv_to_utf8_msgs(d, uv, flags, 0);
3285             }
3286              
3287             PERL_STATIC_INLINE U8 *
3288             Perl_uv_to_utf8_msgs(pTHX_ U8 *d, UV uv, UV flags , HV **msgs)
3289             {
3290             return uvoffuni_to_utf8_flags_msgs(d, NATIVE_TO_UNI(uv), flags, msgs);
3291             }
3292              
3293             /* ------------------------------- perl.h ----------------------------- */
3294              
3295             /*
3296             =for apidoc_section $utility
3297              
3298             =for apidoc is_safe_syscall
3299              
3300             Test that the given C (with length C) doesn't contain any internal
3301             C characters.
3302             If it does, set C to C, optionally warn using the C
3303             category, and return FALSE.
3304              
3305             Return TRUE if the name is safe.
3306              
3307             C and C are used in any warning.
3308              
3309             Used by the C macro.
3310              
3311             =cut
3312             */
3313              
3314             PERL_STATIC_INLINE bool
3315             Perl_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name)
3316             {
3317             /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
3318             * perl itself uses xce*() functions which accept 8-bit strings.
3319             */
3320              
3321             PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
3322              
3323             if (len > 1) {
3324             char *null_at;
3325             if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
3326             SETERRNO(ENOENT, LIB_INVARG);
3327             Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
3328             "Invalid \\0 character in %s for %s: %s\\0%s",
3329             what, op_name, pv, null_at+1);
3330             return FALSE;
3331             }
3332             }
3333              
3334             return TRUE;
3335             }
3336              
3337             /*
3338              
3339             Return true if the supplied filename has a newline character
3340             immediately before the first (hopefully only) NUL.
3341              
3342             My original look at this incorrectly used the len from SvPV(), but
3343             that's incorrect, since we allow for a NUL in pv[len-1].
3344              
3345             So instead, strlen() and work from there.
3346              
3347             This allow for the user reading a filename, forgetting to chomp it,
3348             then calling:
3349              
3350             open my $foo, "$file\0";
3351              
3352             */
3353              
3354             #ifdef PERL_CORE
3355              
3356             PERL_STATIC_INLINE bool
3357             S_should_warn_nl(const char *pv)
3358             {
3359             STRLEN len;
3360              
3361             PERL_ARGS_ASSERT_SHOULD_WARN_NL;
3362              
3363             len = strlen(pv);
3364              
3365             return len > 0 && pv[len-1] == '\n';
3366             }
3367              
3368             #endif
3369              
3370             #if defined(PERL_IN_PP_C) || defined(PERL_IN_PP_HOT_C)
3371              
3372             PERL_STATIC_INLINE bool
3373             S_lossless_NV_to_IV(const NV nv, IV *ivp)
3374             {
3375             /* This function determines if the input NV 'nv' may be converted without
3376             * loss of data to an IV. If not, it returns FALSE taking no other action.
3377             * But if it is possible, it does the conversion, returning TRUE, and
3378             * storing the converted result in '*ivp' */
3379              
3380             PERL_ARGS_ASSERT_LOSSLESS_NV_TO_IV;
3381              
3382             # if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
3383             /* Normally any comparison with a NaN returns false; if we can't rely
3384             * on that behaviour, check explicitly */
3385             if (UNLIKELY(Perl_isnan(nv))) {
3386             return FALSE;
3387             }
3388             # endif
3389              
3390             # ifndef NV_PRESERVES_UV
3391             STATIC_ASSERT_STMT(((UV)1 << NV_PRESERVES_UV_BITS) - 1 <= (UV)IV_MAX);
3392             # endif
3393              
3394             /* Written this way so that with an always-false NaN comparison we
3395             * return false */
3396             if (
3397             # ifdef NV_PRESERVES_UV
3398             LIKELY(nv >= (NV) IV_MIN) && LIKELY(nv < IV_MAX_P1) &&
3399             # else
3400             /* If the condition below is not satisfied, lower bits of nv's
3401             * integral part is already lost and accurate conversion to integer
3402             * is impossible.
3403             * Note this should be consistent with S_sv_2iuv_common in sv.c. */
3404             Perl_fabs(nv) < (NV) ((UV)1 << NV_PRESERVES_UV_BITS) &&
3405             # endif
3406             (IV) nv == nv) {
3407             *ivp = (IV) nv;
3408             return TRUE;
3409             }
3410             return FALSE;
3411             }
3412              
3413             #endif
3414              
3415             /* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
3416              
3417             #if defined(PERL_IN_PP_C) || defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UNIVERSAL_C)
3418              
3419             #define MAX_CHARSET_NAME_LENGTH 2
3420              
3421             PERL_STATIC_INLINE const char *
3422             S_get_regex_charset_name(const U32 flags, STRLEN* const lenp)
3423             {
3424             PERL_ARGS_ASSERT_GET_REGEX_CHARSET_NAME;
3425              
3426             /* Returns a string that corresponds to the name of the regex character set
3427             * given by 'flags', and *lenp is set the length of that string, which
3428             * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
3429              
3430             *lenp = 1;
3431             switch (get_regex_charset(flags)) {
3432             case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
3433             case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
3434             case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
3435             case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
3436             case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
3437             *lenp = 2;
3438             return ASCII_MORE_RESTRICT_PAT_MODS;
3439             }
3440             /* The NOT_REACHED; hides an assert() which has a rather complex
3441             * definition in perl.h. */
3442             NOT_REACHED; /* NOTREACHED */
3443             return "?"; /* Unknown */
3444             }
3445              
3446             #endif
3447              
3448             /*
3449              
3450             Return false if any get magic is on the SV other than taint magic.
3451              
3452             */
3453              
3454             PERL_STATIC_INLINE bool
3455             Perl_sv_only_taint_gmagic(SV *sv)
3456             {
3457             MAGIC *mg = SvMAGIC(sv);
3458              
3459             PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
3460              
3461             while (mg) {
3462             if (mg->mg_type != PERL_MAGIC_taint
3463             && !(mg->mg_flags & MGf_GSKIP)
3464             && mg->mg_virtual->svt_get) {
3465             return FALSE;
3466             }
3467             mg = mg->mg_moremagic;
3468             }
3469              
3470             return TRUE;
3471             }
3472              
3473             /* ------------------ cop.h ------------------------------------------- */
3474              
3475             /* implement GIMME_V() macro */
3476              
3477             PERL_STATIC_INLINE U8
3478 945           Perl_gimme_V(pTHX)
3479             {
3480             I32 cxix;
3481 945           U8 gimme = (PL_op->op_flags & OPf_WANT);
3482              
3483 945 50         if (gimme)
3484 945           return gimme;
3485 0           cxix = PL_curstackinfo->si_cxsubix;
3486 0 0         if (cxix < 0)
3487 0 0         return PL_curstackinfo->si_type == PERLSI_SORT ? G_SCALAR: G_VOID;
3488             assert(cxstack[cxix].blk_gimme & G_WANT);
3489 0           return (cxstack[cxix].blk_gimme & G_WANT);
3490             }
3491              
3492              
3493             /* Enter a block. Push a new base context and return its address. */
3494              
3495             PERL_STATIC_INLINE PERL_CONTEXT *
3496             Perl_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
3497             {
3498             PERL_CONTEXT * cx;
3499              
3500             PERL_ARGS_ASSERT_CX_PUSHBLOCK;
3501              
3502             CXINC;
3503             cx = CX_CUR();
3504             cx->cx_type = type;
3505             cx->blk_gimme = gimme;
3506             cx->blk_oldsaveix = saveix;
3507             cx->blk_oldsp = (Stack_off_t)(sp - PL_stack_base);
3508             assert(cxstack_ix <= 0
3509             || CxTYPE(cx-1) == CXt_SUBST
3510             || cx->blk_oldsp >= (cx-1)->blk_oldsp);
3511             cx->blk_oldcop = PL_curcop;
3512             cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
3513             cx->blk_oldscopesp = PL_scopestack_ix;
3514             cx->blk_oldpm = PL_curpm;
3515             cx->blk_old_tmpsfloor = PL_tmps_floor;
3516              
3517             PL_tmps_floor = PL_tmps_ix;
3518             CX_DEBUG(cx, "PUSH");
3519             return cx;
3520             }
3521              
3522              
3523             /* Exit a block (RETURN and LAST). */
3524              
3525             PERL_STATIC_INLINE void
3526             Perl_cx_popblock(pTHX_ PERL_CONTEXT *cx)
3527             {
3528             PERL_ARGS_ASSERT_CX_POPBLOCK;
3529              
3530             CX_DEBUG(cx, "POP");
3531             /* these 3 are common to cx_popblock and cx_topblock */
3532             PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
3533             PL_scopestack_ix = cx->blk_oldscopesp;
3534             PL_curpm = cx->blk_oldpm;
3535              
3536             /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
3537             * and leaves a CX entry lying around for repeated use, so
3538             * skip for multicall */ \
3539             assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
3540             || PL_savestack_ix == cx->blk_oldsaveix);
3541             PL_curcop = cx->blk_oldcop;
3542             PL_tmps_floor = cx->blk_old_tmpsfloor;
3543             }
3544              
3545             /* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
3546             * Whereas cx_popblock() restores the state to the point just before
3547             * cx_pushblock() was called, cx_topblock() restores it to the point just
3548             * *after* cx_pushblock() was called. */
3549              
3550             PERL_STATIC_INLINE void
3551             Perl_cx_topblock(pTHX_ PERL_CONTEXT *cx)
3552             {
3553             PERL_ARGS_ASSERT_CX_TOPBLOCK;
3554              
3555             CX_DEBUG(cx, "TOP");
3556             /* these 3 are common to cx_popblock and cx_topblock */
3557             PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
3558             PL_scopestack_ix = cx->blk_oldscopesp;
3559             PL_curpm = cx->blk_oldpm;
3560             Perl_rpp_popfree_to(aTHX_ PL_stack_base + cx->blk_oldsp);
3561             }
3562              
3563              
3564             PERL_STATIC_INLINE void
3565             Perl_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
3566             {
3567             U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
3568              
3569             PERL_ARGS_ASSERT_CX_PUSHSUB;
3570              
3571             PERL_DTRACE_PROBE_ENTRY(cv);
3572             cx->blk_sub.old_cxsubix = PL_curstackinfo->si_cxsubix;
3573             PL_curstackinfo->si_cxsubix = (I32)(cx - PL_curstackinfo->si_cxstack);
3574             cx->blk_sub.cv = cv;
3575             cx->blk_sub.olddepth = CvDEPTH(cv);
3576             cx->blk_sub.prevcomppad = PL_comppad;
3577             cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
3578             cx->blk_sub.retop = retop;
3579             SvREFCNT_inc_simple_void_NN(cv);
3580             cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
3581             }
3582              
3583              
3584             /* subsets of cx_popsub() */
3585              
3586             PERL_STATIC_INLINE void
3587             Perl_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
3588             {
3589             CV *cv;
3590              
3591             PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
3592             assert(CxTYPE(cx) == CXt_SUB);
3593              
3594             PL_comppad = cx->blk_sub.prevcomppad;
3595             PL_curpad = LIKELY(PL_comppad != NULL) ? AvARRAY(PL_comppad) : NULL;
3596             cv = cx->blk_sub.cv;
3597             CvDEPTH(cv) = cx->blk_sub.olddepth;
3598             cx->blk_sub.cv = NULL;
3599             SvREFCNT_dec(cv);
3600             PL_curstackinfo->si_cxsubix = cx->blk_sub.old_cxsubix;
3601             }
3602              
3603              
3604             /* handle the @_ part of leaving a sub */
3605              
3606             PERL_STATIC_INLINE void
3607             Perl_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
3608             {
3609             AV *av;
3610              
3611             PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
3612             assert(CxTYPE(cx) == CXt_SUB);
3613             assert(AvARRAY(MUTABLE_AV(
3614             PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
3615             CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
3616              
3617             CX_POP_SAVEARRAY(cx);
3618             av = MUTABLE_AV(PAD_SVl(0));
3619             if (!SvMAGICAL(av) && SvREFCNT(av) == 1
3620             #ifndef PERL_RC_STACK
3621             && !AvREAL(av)
3622             #endif
3623             )
3624             clear_defarray_simple(av);
3625             else
3626             /* abandon @_ if it got reified */
3627             clear_defarray(av, 0);
3628             }
3629              
3630              
3631             PERL_STATIC_INLINE void
3632             Perl_cx_popsub(pTHX_ PERL_CONTEXT *cx)
3633             {
3634             PERL_ARGS_ASSERT_CX_POPSUB;
3635             assert(CxTYPE(cx) == CXt_SUB);
3636              
3637             PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
3638              
3639             if (CxHASARGS(cx))
3640             cx_popsub_args(cx);
3641             cx_popsub_common(cx);
3642             }
3643              
3644              
3645             PERL_STATIC_INLINE void
3646             Perl_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
3647             {
3648             PERL_ARGS_ASSERT_CX_PUSHFORMAT;
3649              
3650             cx->blk_format.old_cxsubix = PL_curstackinfo->si_cxsubix;
3651             PL_curstackinfo->si_cxsubix= (I32)(cx - PL_curstackinfo->si_cxstack);
3652             cx->blk_format.cv = cv;
3653             cx->blk_format.retop = retop;
3654             cx->blk_format.gv = gv;
3655             cx->blk_format.dfoutgv = PL_defoutgv;
3656             cx->blk_format.prevcomppad = PL_comppad;
3657             cx->blk_u16 = 0;
3658              
3659             SvREFCNT_inc_simple_void_NN(cv);
3660             CvDEPTH(cv)++;
3661             SvREFCNT_inc_void(cx->blk_format.dfoutgv);
3662             }
3663              
3664              
3665             PERL_STATIC_INLINE void
3666             Perl_cx_popformat(pTHX_ PERL_CONTEXT *cx)
3667             {
3668             CV *cv;
3669             GV *dfout;
3670              
3671             PERL_ARGS_ASSERT_CX_POPFORMAT;
3672             assert(CxTYPE(cx) == CXt_FORMAT);
3673              
3674             dfout = cx->blk_format.dfoutgv;
3675             setdefout(dfout);
3676             cx->blk_format.dfoutgv = NULL;
3677             SvREFCNT_dec_NN(dfout);
3678              
3679             PL_comppad = cx->blk_format.prevcomppad;
3680             PL_curpad = LIKELY(PL_comppad != NULL) ? AvARRAY(PL_comppad) : NULL;
3681             cv = cx->blk_format.cv;
3682             cx->blk_format.cv = NULL;
3683             --CvDEPTH(cv);
3684             SvREFCNT_dec_NN(cv);
3685             PL_curstackinfo->si_cxsubix = cx->blk_format.old_cxsubix;
3686             }
3687              
3688              
3689             PERL_STATIC_INLINE void
3690             Perl_push_evalortry_common(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
3691             {
3692             cx->blk_eval.retop = retop;
3693             cx->blk_eval.old_namesv = namesv;
3694             cx->blk_eval.old_eval_root = PL_eval_root;
3695             cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
3696             cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
3697             cx->blk_eval.cur_top_env = PL_top_env;
3698              
3699             assert(!(PL_in_eval & ~ 0x3F));
3700             assert(!(PL_op->op_type & ~0x1FF));
3701             cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
3702             }
3703              
3704             PERL_STATIC_INLINE void
3705             Perl_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
3706             {
3707             PERL_ARGS_ASSERT_CX_PUSHEVAL;
3708              
3709             Perl_push_evalortry_common(aTHX_ cx, retop, namesv);
3710              
3711             cx->blk_eval.old_cxsubix = PL_curstackinfo->si_cxsubix;
3712             PL_curstackinfo->si_cxsubix = (I32)(cx - PL_curstackinfo->si_cxstack);
3713             }
3714              
3715             PERL_STATIC_INLINE void
3716             Perl_cx_pushtry(pTHX_ PERL_CONTEXT *cx, OP *retop)
3717             {
3718             PERL_ARGS_ASSERT_CX_PUSHTRY;
3719              
3720             Perl_push_evalortry_common(aTHX_ cx, retop, NULL);
3721              
3722             /* Don't actually change it, just store the current value so it's restored
3723             * by the common popeval */
3724             cx->blk_eval.old_cxsubix = PL_curstackinfo->si_cxsubix;
3725             }
3726              
3727              
3728             PERL_STATIC_INLINE void
3729             Perl_cx_popeval(pTHX_ PERL_CONTEXT *cx)
3730             {
3731             SV *sv;
3732              
3733             PERL_ARGS_ASSERT_CX_POPEVAL;
3734             assert(CxTYPE(cx) == CXt_EVAL);
3735              
3736             PL_in_eval = CxOLD_IN_EVAL(cx);
3737             assert(!(PL_in_eval & 0xc0));
3738             PL_eval_root = cx->blk_eval.old_eval_root;
3739             sv = cx->blk_eval.cur_text;
3740             if (sv && CxEVAL_TXT_REFCNTED(cx)) {
3741             cx->blk_eval.cur_text = NULL;
3742             SvREFCNT_dec_NN(sv);
3743             }
3744              
3745             sv = cx->blk_eval.old_namesv;
3746             if (sv) {
3747             cx->blk_eval.old_namesv = NULL;
3748             SvREFCNT_dec_NN(sv);
3749             }
3750             PL_curstackinfo->si_cxsubix = cx->blk_eval.old_cxsubix;
3751             }
3752              
3753              
3754             /* push a plain loop, i.e.
3755             * { block }
3756             * while (cond) { block }
3757             * for (init;cond;continue) { block }
3758             * This loop can be last/redo'ed etc.
3759             */
3760              
3761             PERL_STATIC_INLINE void
3762             Perl_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
3763             {
3764             PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
3765             cx->blk_loop.my_op = cLOOP;
3766             }
3767              
3768              
3769             /* push a true for loop, i.e.
3770             * for var (list) { block }
3771             */
3772              
3773             PERL_STATIC_INLINE void
3774             Perl_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
3775             {
3776             PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
3777              
3778             /* this one line is common with cx_pushloop_plain */
3779             cx->blk_loop.my_op = cLOOP;
3780              
3781             cx->blk_loop.itervar_u.svp = (SV**)itervarp;
3782             cx->blk_loop.itersave = itersave;
3783             #ifdef USE_ITHREADS
3784             cx->blk_loop.oldcomppad = PL_comppad;
3785             #endif
3786             }
3787              
3788              
3789             /* pop all loop types, including plain */
3790              
3791             PERL_STATIC_INLINE void
3792             Perl_cx_poploop(pTHX_ PERL_CONTEXT *cx)
3793             {
3794             PERL_ARGS_ASSERT_CX_POPLOOP;
3795              
3796             assert(CxTYPE_is_LOOP(cx));
3797             if ( CxTYPE(cx) == CXt_LOOP_ARY
3798             || CxTYPE(cx) == CXt_LOOP_LAZYSV)
3799             {
3800             /* Free ary or cur. This assumes that state_u.ary.ary
3801             * aligns with state_u.lazysv.cur. See cx_dup() */
3802             SV *sv = cx->blk_loop.state_u.lazysv.cur;
3803             cx->blk_loop.state_u.lazysv.cur = NULL;
3804             SvREFCNT_dec_NN(sv);
3805             if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
3806             sv = cx->blk_loop.state_u.lazysv.end;
3807             cx->blk_loop.state_u.lazysv.end = NULL;
3808             SvREFCNT_dec_NN(sv);
3809             }
3810             }
3811             if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
3812             SV *cursv;
3813             SV **svp = (cx)->blk_loop.itervar_u.svp;
3814             if ((cx->cx_type & CXp_FOR_GV))
3815             svp = &GvSV((GV*)svp);
3816             cursv = *svp;
3817             *svp = cx->blk_loop.itersave;
3818             cx->blk_loop.itersave = NULL;
3819             SvREFCNT_dec(cursv);
3820             }
3821             if (cx->cx_type & (CXp_FOR_GV|CXp_FOR_LVREF))
3822             SvREFCNT_dec(cx->blk_loop.itervar_u.svp);
3823             }
3824              
3825              
3826             PERL_STATIC_INLINE void
3827             Perl_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
3828             {
3829             PERL_ARGS_ASSERT_CX_PUSHWHEN;
3830              
3831             cx->blk_givwhen.leave_op = cLOGOP->op_other;
3832             }
3833              
3834              
3835             PERL_STATIC_INLINE void
3836             Perl_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
3837             {
3838             PERL_ARGS_ASSERT_CX_POPWHEN;
3839             assert(CxTYPE(cx) == CXt_WHEN);
3840              
3841             PERL_UNUSED_ARG(cx);
3842             PERL_UNUSED_CONTEXT;
3843             /* currently NOOP */
3844             }
3845              
3846              
3847             PERL_STATIC_INLINE void
3848             Perl_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
3849             {
3850             PERL_ARGS_ASSERT_CX_PUSHGIVEN;
3851              
3852             cx->blk_givwhen.leave_op = cLOGOP->op_other;
3853             cx->blk_givwhen.defsv_save = orig_defsv;
3854             }
3855              
3856              
3857             PERL_STATIC_INLINE void
3858             Perl_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
3859             {
3860             SV *sv;
3861              
3862             PERL_ARGS_ASSERT_CX_POPGIVEN;
3863             assert(CxTYPE(cx) == CXt_GIVEN);
3864              
3865             sv = GvSV(PL_defgv);
3866             GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
3867             cx->blk_givwhen.defsv_save = NULL;
3868             SvREFCNT_dec(sv);
3869             }
3870              
3871              
3872             /* Make @_ empty in-place in simple cases: a cheap av_clear().
3873             * See Perl_clear_defarray() for non-simple cases */
3874              
3875              
3876             PERL_STATIC_INLINE void
3877             Perl_clear_defarray_simple(pTHX_ AV *av)
3878             {
3879             PERL_ARGS_ASSERT_CLEAR_DEFARRAY_SIMPLE;
3880              
3881             assert(SvTYPE(av) == SVt_PVAV);
3882             assert(!SvREADONLY(av));
3883             assert(!SvMAGICAL(av));
3884             assert(SvREFCNT(av) == 1);
3885              
3886             #ifdef PERL_RC_STACK
3887             assert(AvREAL(av));
3888             /* this code assumes that destructors called here can't free av
3889             * itself, because pad[0] and/or CX pointers will keep it alive */
3890             SSize_t i = AvFILLp(av);
3891             while (i >= 0) {
3892             SV *sv = AvARRAY(av)[i];
3893             AvARRAY(av)[i--] = NULL;
3894             SvREFCNT_dec(sv);
3895             }
3896             #else
3897             assert(!AvREAL(av));
3898             #endif
3899             AvFILLp(av) = -1;
3900             Perl_av_remove_offset(aTHX_ av);
3901             }
3902              
3903             /* Switch to a different argument stack.
3904             *
3905             * Note that it doesn't update PL_curstackinfo->si_stack_nonrc_base,
3906             * so this should only be used as part of a general switching between
3907             * stackinfos.
3908             */
3909              
3910             PERL_STATIC_INLINE void
3911             Perl_switch_argstack(pTHX_ AV *to)
3912             {
3913             PERL_ARGS_ASSERT_SWITCH_ARGSTACK;
3914              
3915             AvFILLp(PL_curstack) = PL_stack_sp - PL_stack_base;
3916             PL_stack_base = AvARRAY(to);
3917             PL_stack_max = PL_stack_base + AvMAX(to);
3918             PL_stack_sp = PL_stack_base + AvFILLp(to);
3919             PL_curstack = to;
3920             }
3921              
3922              
3923             /* Push, and switch to a new stackinfo, allocating one if none are spare,
3924             * to get a fresh set of stacks.
3925             * Update all the interpreter variables like PL_curstackinfo,
3926             * PL_stack_sp, etc.
3927             * current flag meanings:
3928             * 1 make the new arg stack AvREAL
3929             */
3930              
3931              
3932             PERL_STATIC_INLINE void
3933             Perl_push_stackinfo(pTHX_ I32 type, UV flags)
3934             {
3935             PERL_ARGS_ASSERT_PUSH_STACKINFO;
3936              
3937             PERL_SI *next = PL_curstackinfo->si_next;
3938             DEBUG_l({
3939             int i = 0; PERL_SI *p = PL_curstackinfo;
3940             while (p) { i++; p = p->si_prev; }
3941             Perl_deb(aTHX_ "push STACKINFO %d in %s at %s:%d\n",
3942             i, SAFE_FUNCTION__, __FILE__, __LINE__);
3943             })
3944              
3945             if (!next) {
3946             next = new_stackinfo_flags(32, 2048/sizeof(PERL_CONTEXT) - 1, flags);
3947             next->si_prev = PL_curstackinfo;
3948             PL_curstackinfo->si_next = next;
3949             }
3950             next->si_type = type;
3951             next->si_cxix = -1;
3952             next->si_cxsubix = -1;
3953             PUSHSTACK_INIT_HWM(next);
3954             #ifdef PERL_RC_STACK
3955             next->si_stack_nonrc_base = 0;
3956             #endif
3957             if (flags & 1)
3958             AvREAL_on(next->si_stack);
3959             else
3960             AvREAL_off(next->si_stack);
3961             AvFILLp(next->si_stack) = 0;
3962             switch_argstack(next->si_stack);
3963             PL_curstackinfo = next;
3964             SET_MARK_OFFSET;
3965             }
3966              
3967              
3968             /* Pop, then switch to the previous stackinfo and set of stacks.
3969             * Update all the interpreter variables like PL_curstackinfo,
3970             * PL_stack_sp, etc. */
3971              
3972             PERL_STATIC_INLINE void
3973             Perl_pop_stackinfo(pTHX)
3974             {
3975             PERL_ARGS_ASSERT_POP_STACKINFO;
3976              
3977             PERL_SI * const prev = PL_curstackinfo->si_prev;
3978             DEBUG_l({
3979             int i = -1; PERL_SI *p = PL_curstackinfo;
3980             while (p) { i++; p = p->si_prev; }
3981             Perl_deb(aTHX_ "pop STACKINFO %d in %s at %s:%d\n",
3982             i, SAFE_FUNCTION__, __FILE__, __LINE__);})
3983             if (!prev) {
3984             Perl_croak_popstack();
3985             }
3986              
3987             switch_argstack(prev->si_stack);
3988             /* don't free prev here, free them all at the END{} */
3989             PL_curstackinfo = prev;
3990             }
3991              
3992              
3993              
3994             /*
3995             =for apidoc newPADxVOP
3996              
3997             Constructs, checks and returns an op containing a pad offset. C is
3998             the opcode, which should be one of C, C, C
3999             or C. The returned op will have the C field set by
4000             the C argument.
4001              
4002             This is convenient when constructing a large optree in nested function
4003             calls, as it avoids needing to store the pad op directly to set the
4004             C field as a side-effect. For example
4005              
4006             o = op_append_elem(OP_LINESEQ, o,
4007             newPADxVOP(OP_PADSV, 0, padix));
4008              
4009             =cut
4010             */
4011              
4012             PERL_STATIC_INLINE OP *
4013             Perl_newPADxVOP(pTHX_ I32 type, I32 flags, PADOFFSET padix)
4014             {
4015             PERL_ARGS_ASSERT_NEWPADXVOP;
4016              
4017             assert(type == OP_PADSV || type == OP_PADAV || type == OP_PADHV
4018             || type == OP_PADCV);
4019             OP *o = newOP(type, flags);
4020             o->op_targ = padix;
4021             return o;
4022             }
4023              
4024             /* ------------------ util.h ------------------------------------------- */
4025              
4026             /*
4027             =for apidoc_section $string
4028              
4029             =for apidoc foldEQ
4030             =for apidoc_item foldEQ_locale
4031              
4032             These each return true if the leading C bytes of the strings C and
4033             C are the same case-insensitively; false otherwise.
4034              
4035             In C, uppercase and lowercase ASCII range bytes match themselves and
4036             their opposite case counterparts. Non-cased and non-ASCII range bytes match
4037             only themselves.
4038              
4039             In C, the comparison is based on the current locale.
4040             If that locale is UTF-8, the results are the same as C, leading to
4041             incorrect values for non-ASCII range code points. Use C>
4042             instead.
4043              
4044             =cut
4045             */
4046              
4047             PERL_STATIC_INLINE I32
4048             Perl_foldEQ(pTHX_ const char *s1, const char *s2, I32 len)
4049             {
4050             PERL_UNUSED_CONTEXT;
4051              
4052             const U8 *a = (const U8 *)s1;
4053             const U8 *b = (const U8 *)s2;
4054              
4055             PERL_ARGS_ASSERT_FOLDEQ;
4056              
4057             assert(len >= 0);
4058              
4059             while (len--) {
4060             if (*a != *b && *a != PL_fold[*b])
4061             return 0;
4062             a++,b++;
4063             }
4064             return 1;
4065             }
4066              
4067             PERL_STATIC_INLINE I32
4068             Perl_foldEQ_latin1(pTHX_ const char *s1, const char *s2, I32 len)
4069             {
4070             /* Compare non-UTF-8 using Unicode (Latin1) semantics. Works on all folds
4071             * representable without UTF-8, except for LATIN_SMALL_LETTER_SHARP_S, and
4072             * does not check for this. Nor does it check that the strings each have
4073             * at least 'len' characters. */
4074              
4075             PERL_UNUSED_CONTEXT;
4076              
4077             const U8 *a = (const U8 *)s1;
4078             const U8 *b = (const U8 *)s2;
4079              
4080             PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
4081              
4082             assert(len >= 0);
4083              
4084             while (len--) {
4085             if (*a != *b && *a != PL_fold_latin1[*b]) {
4086             return 0;
4087             }
4088             a++, b++;
4089             }
4090             return 1;
4091             }
4092              
4093             PERL_STATIC_INLINE I32
4094             Perl_foldEQ_locale(pTHX_ const char *s1, const char *s2, I32 len)
4095             {
4096             const U8 *a = (const U8 *)s1;
4097             const U8 *b = (const U8 *)s2;
4098              
4099             PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
4100              
4101             assert(len >= 0);
4102              
4103             while (len--) {
4104             if (*a != *b && *a != PL_fold_locale[*b]) {
4105             DEBUG_Lv(PerlIO_printf(Perl_debug_log,
4106             "%s:%d: Our records indicate %02x is not a fold of %02x"
4107             " or its mate %02x\n",
4108             __FILE__, __LINE__, *a, *b, PL_fold_locale[*b]));
4109              
4110             return 0;
4111             }
4112             a++,b++;
4113             }
4114             return 1;
4115             }
4116              
4117             /*
4118             =for apidoc_section $string
4119             =for apidoc my_strnlen
4120              
4121             The C library C if available, or a Perl implementation of it.
4122              
4123             C computes the length of the string, up to C
4124             bytes. It will never attempt to address more than C
4125             bytes, making it suitable for use with strings that are not
4126             guaranteed to be NUL-terminated.
4127              
4128             =cut
4129              
4130             Description stolen from http://man.openbsd.org/strnlen.3,
4131             implementation stolen from PostgreSQL.
4132             */
4133             #ifndef HAS_STRNLEN
4134              
4135             PERL_STATIC_INLINE Size_t
4136             Perl_my_strnlen(const char *str, Size_t maxlen)
4137             {
4138             PERL_ARGS_ASSERT_MY_STRNLEN;
4139              
4140             const char *end = (const char *) memchr(str, '\0', maxlen);
4141              
4142             if (end == NULL) return maxlen;
4143             return end - str;
4144             }
4145              
4146             #endif
4147              
4148             #if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
4149              
4150             PERL_STATIC_INLINE void *
4151             S_my_memrchr(const char * s, const char c, const STRLEN len)
4152             {
4153             /* memrchr(), since many platforms lack it */
4154              
4155             const char * t = s + len - 1;
4156              
4157             PERL_ARGS_ASSERT_MY_MEMRCHR;
4158              
4159             while (t >= s) {
4160             if (*t == c) {
4161             return (void *) t;
4162             }
4163             t--;
4164             }
4165              
4166             return NULL;
4167             }
4168              
4169             #endif
4170              
4171             PERL_STATIC_INLINE char *
4172             Perl_mortal_getenv(const char * str)
4173             {
4174             /* This implements a (mostly) thread-safe, sequential-call-safe getenv().
4175             *
4176             * It's (mostly) thread-safe because it uses a mutex to prevent other
4177             * threads (that look at this mutex) from destroying the result before this
4178             * routine has a chance to copy the result to a place that won't be
4179             * destroyed before the caller gets a chance to handle it. That place is a
4180             * mortal SV. khw chose this over SAVEFREEPV because he is under the
4181             * impression that the SV will hang around longer under more circumstances
4182             *
4183             * The reason it isn't completely thread-safe is that other code could
4184             * simply not pay attention to the mutex. All of the Perl core uses the
4185             * mutex, but it is possible for code from, say XS, to not use this mutex,
4186             * defeating the safety.
4187             *
4188             * getenv() returns, in some implementations, a pointer to a spot in the
4189             * **environ array, which could be invalidated at any time by this or
4190             * another thread changing the environment. Other implementations copy the
4191             * **environ value to a static buffer, returning a pointer to that. That
4192             * buffer might or might not be invalidated by a getenv() call in another
4193             * thread. If it does get zapped, we need an exclusive lock. Otherwise,
4194             * many getenv() calls can safely be running simultaneously, so a
4195             * many-reader (but no simultaneous writers) lock is ok. There is a
4196             * Configure probe to see if another thread destroys the buffer, and the
4197             * mutex is defined accordingly.
4198             *
4199             * But in all cases, using the mutex prevents these problems, as long as
4200             * all code uses the same mutex.
4201             *
4202             * A complication is that this can be called during phases where the
4203             * mortalization process isn't available. These are in interpreter
4204             * destruction or early in construction. khw believes that at these times
4205             * there shouldn't be anything else going on, so plain getenv is safe AS
4206             * LONG AS the caller acts on the return before calling it again. */
4207              
4208             char * ret;
4209             dTHX;
4210              
4211             PERL_ARGS_ASSERT_MORTAL_GETENV;
4212              
4213             /* Can't mortalize without stacks. khw believes that no other threads
4214             * should be running, so no need to lock things, and this may be during a
4215             * phase when locking isn't even available */
4216             if (UNLIKELY(PL_scopestack_ix == 0)) {
4217             return getenv(str);
4218             }
4219              
4220             #ifdef PERL_MEM_LOG
4221              
4222             /* A major complication arises under PERL_MEM_LOG. When that is active,
4223             * every memory allocation may result in logging, depending on the value of
4224             * ENV{PERL_MEM_LOG} at the moment. That means, as we create the SV for
4225             * saving ENV{foo}'s value (but before saving it), the logging code will
4226             * call us recursively to find out what ENV{PERL_MEM_LOG} is. Without some
4227             * care that could lead to: 1) infinite recursion; or 2) deadlock (trying to
4228             * lock a boolean mutex recursively); 3) destroying the getenv() static
4229             * buffer; or 4) destroying the temporary created by this for the copy
4230             * causes a log entry to be made which could cause a new temporary to be
4231             * created, which will need to be destroyed at some point, leading to an
4232             * infinite loop.
4233             *
4234             * The solution adopted here (after some gnashing of teeth) is to detect
4235             * the recursive calls and calls from the logger, and treat them specially.
4236             * Let's say we want to do getenv("foo"). We first find
4237             * getenv(PERL_MEM_LOG) and save it to a fixed-length per-interpreter
4238             * variable, so no temporary is required. Then we do getenv(foo), and in
4239             * the process of creating a temporary to save it, this function will be
4240             * called recursively to do a getenv(PERL_MEM_LOG). On the recursed call,
4241             * we detect that it is such a call and return our saved value instead of
4242             * locking and doing a new getenv(). This solves all of problems 1), 2),
4243             * and 3). Because all the getenv()s are done while the mutex is locked,
4244             * the state cannot have changed. To solve 4), we don't create a temporary
4245             * when this is called from the logging code. That code disposes of the
4246             * return value while the mutex is still locked.
4247             *
4248             * The value of getenv(PERL_MEM_LOG) can be anything, but only initial
4249             * digits and 3 particular letters are significant; the rest are ignored by
4250             * the memory logging code. Thus the per-interpreter variable only needs
4251             * to be large enough to save the significant information, the size of
4252             * which is known at compile time. The first byte is extra, reserved for
4253             * flags for our use. To protect against overflowing, only the reserved
4254             * byte, as many digits as don't overflow, and the three letters are
4255             * stored.
4256             *
4257             * The reserved byte has two bits:
4258             * 0x1 if set indicates that if we get here, it is a recursive call of
4259             * getenv()
4260             * 0x2 if set indicates that the call is from the logging code.
4261             *
4262             * If the flag indicates this is a recursive call, just return the stored
4263             * value of PL_mem_log; An empty value gets turned into NULL. */
4264             if (strEQ(str, "PERL_MEM_LOG") && PL_mem_log[0] & 0x1) {
4265             if (PL_mem_log[1] == '\0') {
4266             return NULL;
4267             } else {
4268             return PL_mem_log + 1;
4269             }
4270             }
4271              
4272             #endif
4273              
4274             GETENV_LOCK;
4275              
4276             #ifdef PERL_MEM_LOG
4277              
4278             /* Here we are in a critical section. As explained above, we do our own
4279             * getenv(PERL_MEM_LOG), saving the result safely. */
4280             ret = getenv("PERL_MEM_LOG");
4281             if (ret == NULL) { /* No logging active */
4282              
4283             /* Return that immediately if called from the logging code */
4284             if (PL_mem_log[0] & 0x2) {
4285             GETENV_UNLOCK;
4286             return NULL;
4287             }
4288              
4289             PL_mem_log[1] = '\0';
4290             }
4291             else {
4292             char *mem_log_meat = PL_mem_log + 1; /* first byte reserved */
4293              
4294             /* There is nothing to prevent the value of PERL_MEM_LOG from being an
4295             * extremely long string. But we want only a few characters from it.
4296             * PL_mem_log has been made large enough to hold just the ones we need.
4297             * First the file descriptor. */
4298             if (isDIGIT(*ret)) {
4299             const char * s = ret;
4300             if (UNLIKELY(*s == '0')) {
4301              
4302             /* Reduce multiple leading zeros to a single one. This is to
4303             * allow the caller to change what to do with leading zeros. */
4304             *mem_log_meat++ = '0';
4305             s++;
4306             while (*s == '0') {
4307             s++;
4308             }
4309             }
4310              
4311             /* If the input overflows, copy just enough for the result to also
4312             * overflow, plus 1 to make sure */
4313             while (isDIGIT(*s) && s < ret + TYPE_DIGITS(UV) + 1) {
4314             *mem_log_meat++ = *s++;
4315             }
4316             }
4317              
4318             /* Then each of the four significant characters */
4319             if (strchr(ret, 'm')) {
4320             *mem_log_meat++ = 'm';
4321             }
4322             if (strchr(ret, 's')) {
4323             *mem_log_meat++ = 's';
4324             }
4325             if (strchr(ret, 't')) {
4326             *mem_log_meat++ = 't';
4327             }
4328             if (strchr(ret, 'c')) {
4329             *mem_log_meat++ = 'c';
4330             }
4331             *mem_log_meat = '\0';
4332              
4333             assert(mem_log_meat < PL_mem_log + sizeof(PL_mem_log));
4334             }
4335              
4336             /* If we are being called from the logger, it only needs the significant
4337             * portion of PERL_MEM_LOG, and doesn't need a safe copy */
4338             if (PL_mem_log[0] & 0x2) {
4339             assert(strEQ(str, "PERL_MEM_LOG"));
4340             GETENV_UNLOCK;
4341             return PL_mem_log + 1;
4342             }
4343              
4344             /* Here is a generic getenv(). This could be a getenv("PERL_MEM_LOG") that
4345             * is coming from other than the logging code, so it should be treated the
4346             * same as any other getenv(), returning the full value, not just the
4347             * significant part, and having its value saved. Set the flag that
4348             * indicates any call to this routine will be a recursion from here */
4349             PL_mem_log[0] = 0x1;
4350              
4351             #endif
4352              
4353             /* Now get the value of the real desired variable, and save a copy */
4354             ret = getenv(str);
4355              
4356             if (ret != NULL) {
4357             ret = SvPVX( newSVpvn_flags(ret, strlen(ret) ,SVs_TEMP) );
4358             }
4359              
4360             GETENV_UNLOCK;
4361              
4362             #ifdef PERL_MEM_LOG
4363              
4364             /* Clear the buffer */
4365             Zero(PL_mem_log, sizeof(PL_mem_log), char);
4366              
4367             #endif
4368              
4369             return ret;
4370             }
4371              
4372             PERL_STATIC_INLINE bool
4373             Perl_sv_isbool(pTHX_ const SV *sv)
4374             {
4375             PERL_UNUSED_CONTEXT;
4376             return SvBoolFlagsOK(sv) && BOOL_INTERNALS_sv_isbool(sv);
4377             }
4378              
4379             #ifdef USE_ITHREADS
4380              
4381             PERL_STATIC_INLINE AV *
4382             Perl_cop_file_avn(pTHX_ const COP *cop) {
4383              
4384             PERL_ARGS_ASSERT_COP_FILE_AVN;
4385              
4386             const char *file = CopFILE(cop);
4387             if (file) {
4388             GV *gv = gv_fetchfile_flags(file, strlen(file), GVF_NOADD);
4389             if (gv) {
4390             return GvAVn(gv);
4391             }
4392             else
4393             return NULL;
4394             }
4395             else
4396             return NULL;
4397             }
4398              
4399             #endif
4400              
4401             PERL_STATIC_INLINE PADNAME *
4402             Perl_padname_refcnt_inc(PADNAME *pn)
4403             {
4404             PadnameREFCNT(pn)++;
4405             return pn;
4406             }
4407              
4408             PERL_STATIC_INLINE PADNAMELIST *
4409             Perl_padnamelist_refcnt_inc(PADNAMELIST *pnl)
4410             {
4411             PadnamelistREFCNT(pnl)++;
4412             return pnl;
4413             }
4414              
4415             /* copy a string to a safe spot */
4416              
4417             /*
4418             =for apidoc_section $string
4419             =for apidoc savepv
4420             =for apidoc_item savepvn
4421             =for apidoc_item savepvs
4422             =for apidoc_item savesvpv
4423             =for apidoc_item savesharedpv
4424             =for apidoc_item savesharedpvn
4425             =for apidoc_item savesharedpvs
4426             =for apidoc_item savesharedsvpv
4427              
4428             Perl's version of C (or C would be if it existed).
4429              
4430             These each return a pointer to a newly allocated string which is a duplicate of
4431             the input string.
4432              
4433             The forms differ in how the string to be copied is specified, and where the new
4434             memory is allocated from.
4435              
4436             To prevent memory leaks, the memory allocated for the new string needs to be
4437             freed when no longer needed. This can be done with the C>
4438             function, or L|perlguts/SAVEFREEPV(p)>.
4439              
4440             The forms whose names contain C differ from the corresponding form
4441             without that in its name, only in that the memory in the former comes from
4442             memory shared between threads. This is needed, because on some platforms,
4443             Windows for example, all allocated memory owned by a thread is deallocated when
4444             that thread ends. So if you need that not to happen, you need to use the
4445             shared memory forms.
4446              
4447             The string to copy in C is a C language string literal surrounded by
4448             double quotes.
4449              
4450             The string to copy in the forms whose name contains C comes from the PV
4451             in the SV argument C, using C
4452              
4453             The string to copy in the remaining forms comes from the C argument.
4454              
4455             In the case of C, the size of the string is determined by C,
4456             which means it may not contain embedded C characters, and must have a
4457             trailing C.
4458              
4459             In the case of C, C gives the length of C, hence it may
4460             contain embedded C characters. The copy will be guaranteed to have a
4461             trailing NUL added if not already present.
4462              
4463             =cut
4464             */
4465              
4466             PERL_STATIC_INLINE char *
4467             Perl_savepv(pTHX_ const char *pv)
4468             {
4469             PERL_UNUSED_CONTEXT;
4470             if (!pv)
4471             return NULL;
4472             else {
4473             char *newaddr;
4474             const STRLEN pvlen = strlen(pv)+1;
4475             Newx(newaddr, pvlen, char);
4476             return (char*)memcpy(newaddr, pv, pvlen);
4477             }
4478             }
4479              
4480             /* same thing but with a known length */
4481              
4482             PERL_STATIC_INLINE char *
4483             Perl_savepvn(pTHX_ const char *pv, Size_t len)
4484             {
4485             char *newaddr;
4486             PERL_UNUSED_CONTEXT;
4487              
4488             Newx(newaddr,len+1,char);
4489             /* Give a meaning to NULL pointer mainly for the use in sv_magic() */
4490             if (pv) {
4491             /* might not be null terminated */
4492             newaddr[len] = '\0';
4493             return (char *) CopyD(pv,newaddr,len,char);
4494             }
4495             else {
4496             return (char *) ZeroD(newaddr,len+1,char);
4497             }
4498             }
4499              
4500             PERL_STATIC_INLINE char *
4501             Perl_savesvpv(pTHX_ SV *sv)
4502             {
4503             STRLEN len;
4504             const char * const pv = SvPV_const(sv, len);
4505             char *newaddr;
4506              
4507             PERL_ARGS_ASSERT_SAVESVPV;
4508              
4509             ++len;
4510             Newx(newaddr,len,char);
4511             return (char *) CopyD(pv,newaddr,len,char);
4512             }
4513              
4514             PERL_STATIC_INLINE char *
4515             Perl_savesharedsvpv(pTHX_ SV *sv)
4516             {
4517             STRLEN len;
4518             const char * const pv = SvPV_const(sv, len);
4519              
4520             PERL_ARGS_ASSERT_SAVESHAREDSVPV;
4521              
4522             return savesharedpvn(pv, len);
4523             }
4524              
4525             #ifndef PERL_GET_CONTEXT_DEFINED
4526              
4527             /*
4528             =for apidoc_section $embedding
4529             =for apidoc get_context
4530              
4531             Implements L>, which you should use instead.
4532              
4533             =cut
4534             */
4535              
4536             PERL_STATIC_INLINE void *
4537             Perl_get_context(void)
4538             {
4539             # if defined(USE_ITHREADS)
4540             # ifdef OLD_PTHREADS_API
4541             pthread_addr_t t;
4542             int error = pthread_getspecific(PL_thr_key, &t);
4543             if (error)
4544             Perl_croak_nocontext("panic: pthread_getspecific, error=%d", error);
4545             return (void*)t;
4546             # elif defined(I_MACH_CTHREADS)
4547             return (void*)cthread_data(cthread_self());
4548             # else
4549             return (void*)PTHREAD_GETSPECIFIC(PL_thr_key);
4550             # endif
4551             # else
4552             return (void*)NULL;
4553             # endif
4554             }
4555              
4556             #endif
4557              
4558             PERL_STATIC_INLINE MGVTBL*
4559             Perl_get_vtbl(pTHX_ int vtbl_id)
4560             {
4561             PERL_UNUSED_CONTEXT;
4562              
4563             return (vtbl_id < 0 || vtbl_id >= magic_vtable_max)
4564             ? NULL : (MGVTBL*)PL_magic_vtables + vtbl_id;
4565             }
4566              
4567             /*
4568             =for apidoc_section $string
4569             =for apidoc my_strlcat
4570              
4571             The C library C if available, or a Perl implementation of it.
4572             This operates on C C-terminated strings.
4573              
4574             C appends string C to the end of C. It will append at
4575             most S> bytes. It will then C-terminate,
4576             unless C is 0 or the original C string was longer than C (in
4577             practice this should not happen as it means that either C is incorrect or
4578             that C is not a proper C-terminated string).
4579              
4580             Note that C is the full size of the destination buffer and
4581             the result is guaranteed to be C-terminated if there is room. Note that
4582             room for the C should be included in C.
4583              
4584             The return value is the total length that C would have if C is
4585             sufficiently large. Thus it is the initial length of C plus the length of
4586             C. If C is smaller than the return, the excess was not appended.
4587              
4588             =cut
4589              
4590             Description stolen from http://man.openbsd.org/strlcat.3
4591             */
4592             #ifndef HAS_STRLCAT
4593             PERL_STATIC_INLINE Size_t
4594             Perl_my_strlcat(char *dst, const char *src, Size_t size)
4595             {
4596             Size_t used, length, copy;
4597              
4598             used = strlen(dst);
4599             length = strlen(src);
4600             if (size > 0 && used < size - 1) {
4601             copy = (length >= size - used) ? size - used - 1 : length;
4602             memcpy(dst + used, src, copy);
4603             dst[used + copy] = '\0';
4604             }
4605             return used + length;
4606             }
4607             #endif
4608              
4609              
4610             /*
4611             =for apidoc my_strlcpy
4612              
4613             The C library C if available, or a Perl implementation of it.
4614             This operates on C C-terminated strings.
4615              
4616             C copies up to S> bytes from the string C
4617             to C, C-terminating the result if C is not 0.
4618              
4619             The return value is the total length C would be if the copy completely
4620             succeeded. If it is larger than C, the excess was not copied.
4621              
4622             =cut
4623              
4624             Description stolen from http://man.openbsd.org/strlcpy.3
4625             */
4626             #ifndef HAS_STRLCPY
4627             PERL_STATIC_INLINE Size_t
4628             Perl_my_strlcpy(char *dst, const char *src, Size_t size)
4629             {
4630             Size_t length, copy;
4631              
4632             length = strlen(src);
4633             if (size > 0) {
4634             copy = (length >= size) ? size - 1 : length;
4635             memcpy(dst, src, copy);
4636             dst[copy] = '\0';
4637             }
4638             return length;
4639             }
4640             #endif
4641              
4642             /*
4643             * ex: set ts=8 sts=4 sw=4 et:
4644             */