File Coverage

/usr/local/lib/perl5/5.42.0/x86_64-linux/CORE/inline.h

Criterion	Covered	Total	%
statement	8	25	32.0
branch	2	20	10.0
condition			n/a
subroutine			n/a
pod			n/a
total	10	45	22.2

line	stmt	bran	code
1			/*> inline.h
2			*
3			* Copyright (C) 2012 by Larry Wall and others
4			*
5			* You may distribute under the terms of either the GNU General Public
6			* License or the Artistic License, as specified in the README file.
7			*
8			* This file contains tables and code adapted from
9			* https://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which requires this
10			* copyright notice:
11
12			Copyright (c) 2008-2009 Bjoern Hoehrmann
13
14			Permission is hereby granted, free of charge, to any person obtaining a copy of
15			this software and associated documentation files (the "Software"), to deal in
16			the Software without restriction, including without limitation the rights to
17			use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
18			of the Software, and to permit persons to whom the Software is furnished to do
19			so, subject to the following conditions:
20
21			The above copyright notice and this permission notice shall be included in all
22			copies or substantial portions of the Software.
23
24			THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25			IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26			FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27			AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28			LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29			OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30			SOFTWARE.
31
32			*
33			* This file is a home for static inline functions that cannot go in other
34			* header files, because they depend on proto.h (included after most other
35			* headers) or struct definitions.
36			*
37			* Note also perlstatic.h for functions that can't or shouldn't be inlined, but
38			* whose details should be exposed to the compiler, for such things as tail
39			* call optimization.
40			*
41			* Each section names the header file that the functions "belong" to.
42			*/
43
44			/* ------------------------------- av.h ------------------------------- */
45
46			/*
47			=for apidoc_section $AV
48			=for apidoc av_count
49			Returns the number of elements in the array C. This is the true length of
50			the array, including any undefined elements. It is always the same as
51			S>.
52
53			=cut
54			*/
55			PERL_STATIC_INLINE Size_t
56			Perl_av_count(pTHX_ AV *av)
57			{
58			PERL_ARGS_ASSERT_AV_COUNT;
59			assert(SvTYPE(av) == SVt_PVAV);
60
61			return AvFILL(av) + 1;
62			}
63
64			/* ------------------------------- av.c ------------------------------- */
65
66			/*
67			=for apidoc av_store_simple
68
69			This is a cut-down version of av_store that assumes that the array is
70			very straightforward - no magic, not readonly, and AvREAL - and that
71			C is not negative. This function MUST NOT be used in situations
72			where any of those assumptions may not hold.
73
74			Stores an SV in an array. The array index is specified as C. It
75			can be dereferenced to get the C that was stored there (= C)).
76
77			Note that the caller is responsible for suitably incrementing the reference
78			count of C before the call.
79
80			Approximate Perl equivalent: C.
81
82			=cut
83			*/
84
85			PERL_STATIC_INLINE SV**
86			Perl_av_store_simple(pTHX_ AV av, SSize_t key, SV val)
87			{
88			SV** ary;
89
90			PERL_ARGS_ASSERT_AV_STORE_SIMPLE;
91			assert(SvTYPE(av) == SVt_PVAV);
92			assert(!SvMAGICAL(av));
93			assert(!SvREADONLY(av));
94			assert(AvREAL(av));
95			assert(key > -1);
96
97			ary = AvARRAY(av);
98
99			if (AvFILLp(av) < key) {
100			if (key > AvMAX(av)) {
101			av_extend(av,key);
102			ary = AvARRAY(av);
103			}
104			AvFILLp(av) = key;
105			} else
106			SvREFCNT_dec(ary[key]);
107
108			ary[key] = val;
109			return &ary[key];
110			}
111
112			/*
113			=for apidoc av_fetch_simple
114
115			This is a cut-down version of av_fetch that assumes that the array is
116			very straightforward - no magic, not readonly, and AvREAL - and that
117			C is not negative. This function MUST NOT be used in situations
118			where any of those assumptions may not hold.
119
120			Returns the SV at the specified index in the array. The C is the
121			index. If lval is true, you are guaranteed to get a real SV back (in case
122			it wasn't real before), which you can then modify. Check that the return
123			value is non-null before dereferencing it to a C.
124
125			The rough perl equivalent is C<$myarray[$key]>.
126
127			=cut
128			*/
129
130			PERL_STATIC_INLINE SV**
131			Perl_av_fetch_simple(pTHX_ AV *av, SSize_t key, I32 lval)
132			{
133			PERL_ARGS_ASSERT_AV_FETCH_SIMPLE;
134			assert(SvTYPE(av) == SVt_PVAV);
135			assert(!SvMAGICAL(av));
136			assert(!SvREADONLY(av));
137			assert(AvREAL(av));
138			assert(key > -1);
139
140			if ( (key > AvFILLp(av)) \|\| !AvARRAY(av)[key]) {
141			return lval ? av_store_simple(av,key,newSV_type(SVt_NULL)) : NULL;
142			} else {
143			return &AvARRAY(av)[key];
144			}
145			}
146
147			PERL_STATIC_INLINE void
148			Perl_av_push_simple(pTHX_ AV av, SV val)
149			{
150			PERL_ARGS_ASSERT_AV_PUSH_SIMPLE;
151			assert(SvTYPE(av) == SVt_PVAV);
152			assert(!SvMAGICAL(av));
153			assert(!SvREADONLY(av));
154			assert(AvREAL(av));
155			assert(AvFILLp(av) > -2);
156
157			(void)av_store_simple(av,AvFILLp(av)+1,val);
158			}
159
160			/*
161			=for apidoc av_new_alloc
162
163			This implements L>
164			and L>, which are the public API for this
165			functionality.
166
167			Creates a new AV and allocates its SV* array.
168
169			This is similar to, but more efficient than doing:
170
171			AV *av = newAV();
172			av_extend(av, key);
173
174			The size parameter is used to pre-allocate a SV* array large enough to
175			hold at least elements C<0..(size-1)>. C must be at least 1.
176
177			The C parameter controls whether or not the array is NULL
178			initialized.
179
180			=cut
181			*/
182
183			PERL_STATIC_INLINE AV *
184			Perl_av_new_alloc(pTHX_ SSize_t size, bool zeroflag)
185			{
186			AV * const av = newAV();
187			SV** ary;
188			PERL_ARGS_ASSERT_AV_NEW_ALLOC;
189			assert(size > 0);
190
191			Newx(ary, size, SV); / Newx performs the memwrap check */
192			AvALLOC(av) = ary;
193			AvARRAY(av) = ary;
194			AvMAX(av) = size - 1;
195
196			if (zeroflag)
197			Zero(ary, size, SV*);
198
199			return av;
200			}
201
202
203			/* remove (AvARRAY(av) - AvALLOC(av)) offset from empty array */
204
205			PERL_STATIC_INLINE void
206			Perl_av_remove_offset(pTHX_ AV *av)
207			{
208			PERL_ARGS_ASSERT_AV_REMOVE_OFFSET;
209			assert(AvFILLp(av) == -1);
210			SSize_t i = AvARRAY(av) - AvALLOC(av);
211			if (i) {
212			AvARRAY(av) = AvALLOC(av);
213			AvMAX(av) += i;
214			#ifdef PERL_RC_STACK
215			Zero(AvALLOC(av), i, SV*);
216			#endif
217			}
218			}
219
220
221			/* ------------------------------- cv.h ------------------------------- */
222
223			/*
224			=for apidoc_section $CV
225			=for apidoc CvGV
226			Returns the GV associated with the CV C, reifying it if necessary.
227
228			=cut
229			*/
230			PERL_STATIC_INLINE GV *
231			Perl_CvGV(pTHX_ CV *sv)
232			{
233			PERL_ARGS_ASSERT_CVGV;
234
235			return CvNAMED(sv)
236			? Perl_cvgv_from_hek(aTHX_ sv)
237			: ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
238			}
239
240			/*
241			=for apidoc CvDEPTH
242			Returns the recursion level of the CV C. Hence >= 2 indicates we are in a
243			recursive call.
244
245			=cut
246			*/
247			PERL_STATIC_INLINE I32 *
248			Perl_CvDEPTH(const CV * const sv)
249			{
250			PERL_ARGS_ASSERT_CVDEPTH;
251			assert(SvTYPE(sv) == SVt_PVCV \|\| SvTYPE(sv) == SVt_PVFM);
252
253			return &((XPVCV*)SvANY(sv))->xcv_depth;
254			}
255
256			/*
257			CvPROTO returns the prototype as stored, which is not necessarily what
258			the interpreter should be using. Specifically, the interpreter assumes
259			that spaces have been stripped, which has been the case if the prototype
260			was added by toke.c, but is generally not the case if it was added elsewhere.
261			Since we can't enforce the spacelessness at assignment time, this routine
262			provides a temporary copy at parse time with spaces removed.
263			I is the start of the original buffer, I is the length of the
264			prototype and will be updated when this returns.
265			*/
266
267			#ifdef PERL_CORE
268			PERL_STATIC_INLINE char *
269			S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
270			{
271			SV * tmpsv;
272			char * tmps;
273			tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
274			tmps = SvPVX(tmpsv);
275			while ((*len)--) {
276			if (!isSPACE(*orig))
277			tmps++ = orig;
278			orig++;
279			}
280			*tmps = '\0';
281			*len = tmps - SvPVX(tmpsv);
282			return SvPVX(tmpsv);
283			}
284			#endif
285
286			/* ------------------------------- iperlsys.h ------------------------------- */
287			#if ! defined(PERL_IMPLICIT_SYS) && defined(USE_ITHREADS)
288
289			/* Otherwise this function is implemented as macros in iperlsys.h */
290
291			PERL_STATIC_INLINE bool
292			S_PerlEnv_putenv(pTHX_ char * str)
293			{
294			PERL_ARGS_ASSERT_PERLENV_PUTENV;
295
296			ENV_LOCK;
297			bool retval = putenv(str);
298			ENV_UNLOCK;
299
300			return retval;
301			}
302
303			#endif
304
305			/* ------------------------------- mg.h ------------------------------- */
306
307			#if defined(PERL_CORE) \|\| defined(PERL_EXT)
308			/* assumes get-magic and stringification have already occurred */
309			PERL_STATIC_INLINE STRLEN
310			S_MgBYTEPOS(pTHX_ MAGIC mg, SV sv, const char *s, STRLEN len)
311			{
312			assert(mg->mg_type == PERL_MAGIC_regex_global);
313			assert(mg->mg_len != -1);
314			if (mg->mg_flags & MGf_BYTES \|\| !DO_UTF8(sv))
315			return (STRLEN)mg->mg_len;
316			else {
317			const STRLEN pos = (STRLEN)mg->mg_len;
318			/* Without this check, we may read past the end of the buffer: */
319			if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
320			return sv_or_pv_pos_u2b(sv, s, pos, NULL);
321			}
322			}
323			#endif
324
325			/* ------------------------------- pad.h ------------------------------ */
326
327			#if defined(PERL_IN_PAD_C) \|\| defined(PERL_IN_OP_C)
328			PERL_STATIC_INLINE bool
329			S_PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
330			{
331			PERL_ARGS_ASSERT_PADNAMEIN_SCOPE;
332
333			/* is seq within the range _LOW to _HIGH ?
334			* This is complicated by the fact that PL_cop_seqmax
335			* may have wrapped around at some point */
336			if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
337			return FALSE; /* not yet introduced */
338
339			if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
340			/* in compiling scope */
341			if (
342			(seq > COP_SEQ_RANGE_LOW(pn))
343			? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
344			: (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
345			)
346			return TRUE;
347			}
348			else if (
349			(COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
350			?
351			( seq > COP_SEQ_RANGE_LOW(pn)
352			\|\| seq <= COP_SEQ_RANGE_HIGH(pn))
353
354			: ( seq > COP_SEQ_RANGE_LOW(pn)
355			&& seq <= COP_SEQ_RANGE_HIGH(pn))
356			)
357			return TRUE;
358			return FALSE;
359			}
360			#endif
361
362			/* ------------------------------- pp.h ------------------------------- */
363
364			PERL_STATIC_INLINE Stack_off_t
365			Perl_TOPMARK(pTHX)
366			{
367			DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
368			"MARK top %p %" IVdf "\n",
369			PL_markstack_ptr,
370			(IV)*PL_markstack_ptr)));
371			return *PL_markstack_ptr;
372			}
373
374			PERL_STATIC_INLINE Stack_off_t
375	214098		Perl_POPMARK(pTHX)
376			{
377			DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
378			"MARK pop %p %" IVdf "\n",
379			(PL_markstack_ptr-1),
380			(IV)*(PL_markstack_ptr-1))));
381			assert((PL_markstack_ptr > PL_markstack) \|\| !"MARK underflow");
382	214098		return *PL_markstack_ptr--;
383			}
384
385			/*
386			=for apidoc_section $rpp
387
388			=for apidoc rpp_extend
389			Ensures that there is space on the stack to push C items, extending it
390			if necessary.
391
392			=cut
393			*/
394
395			PERL_STATIC_INLINE void
396			Perl_rpp_extend(pTHX_ SSize_t n)
397			{
398			PERL_ARGS_ASSERT_RPP_EXTEND;
399
400			EXTEND_HWM_SET(PL_stack_sp, n);
401			#ifndef STRESS_REALLOC
402			if (UNLIKELY(_EXTEND_NEEDS_GROW(PL_stack_sp, n)))
403			#endif
404			{
405			(void)stack_grow(PL_stack_sp, PL_stack_sp, n);
406			}
407			}
408
409
410			/*
411			=for apidoc rpp_popfree_to
412
413			Pop and free all items on the argument stack above C. On return,
414			C will be equal to C.
415
416			=cut
417			*/
418
419			PERL_STATIC_INLINE void
420			Perl_rpp_popfree_to(pTHX_ SV **sp)
421			{
422			PERL_ARGS_ASSERT_RPP_POPFREE_TO;
423
424			assert(sp <= PL_stack_sp);
425			#ifdef PERL_RC_STACK
426			assert(rpp_stack_is_rc());
427			while (PL_stack_sp > sp) {
428			SV sv = PL_stack_sp--;
429			SvREFCNT_dec(sv);
430			}
431			#else
432			PL_stack_sp = sp;
433			#endif
434			}
435
436
437			/*
438			=for apidoc rpp_popfree_to_NN
439
440			A variant of rpp_popfree_to() which assumes that all the pointers being
441			popped off the stack are non-NULL.
442
443			=cut
444			*/
445
446			PERL_STATIC_INLINE void
447			Perl_rpp_popfree_to_NN(pTHX_ SV **sp)
448			{
449			PERL_ARGS_ASSERT_RPP_POPFREE_TO_NN;
450
451			assert(sp <= PL_stack_sp);
452			#ifdef PERL_RC_STACK
453			assert(rpp_stack_is_rc());
454			while (PL_stack_sp > sp) {
455			SV sv = PL_stack_sp--;
456			assert(sv);
457			SvREFCNT_dec_NN(sv);
458			}
459			#else
460			PL_stack_sp = sp;
461			#endif
462			}
463
464
465			/*
466			=for apidoc rpp_popfree_1
467
468			Pop and free the top item on the argument stack and update C.
469
470			=cut
471			*/
472
473			PERL_STATIC_INLINE void
474			Perl_rpp_popfree_1(pTHX)
475			{
476			PERL_ARGS_ASSERT_RPP_POPFREE_1;
477
478			#ifdef PERL_RC_STACK
479			assert(rpp_stack_is_rc());
480			SV sv = PL_stack_sp--;
481			SvREFCNT_dec(sv);
482			#else
483			PL_stack_sp--;
484			#endif
485			}
486
487
488			/*
489			=for apidoc rpp_popfree_1_NN
490
491			A variant of rpp_popfree_1() which assumes that the pointer being popped
492			off the stack is non-NULL.
493
494			=cut
495			*/
496
497			PERL_STATIC_INLINE void
498			Perl_rpp_popfree_1_NN(pTHX)
499			{
500			PERL_ARGS_ASSERT_RPP_POPFREE_1_NN;
501
502			assert(*PL_stack_sp);
503			#ifdef PERL_RC_STACK
504			assert(rpp_stack_is_rc());
505			SV sv = PL_stack_sp--;
506			SvREFCNT_dec_NN(sv);
507			#else
508			PL_stack_sp--;
509			#endif
510			}
511
512
513			/*
514			=for apidoc rpp_popfree_2
515
516			Pop and free the top two items on the argument stack and update
517			C.
518
519			=cut
520			*/
521
522
523			PERL_STATIC_INLINE void
524			Perl_rpp_popfree_2(pTHX)
525			{
526			PERL_ARGS_ASSERT_RPP_POPFREE_2;
527
528			#ifdef PERL_RC_STACK
529			assert(rpp_stack_is_rc());
530			for (int i = 0; i < 2; i++) {
531			SV sv = PL_stack_sp--;
532			SvREFCNT_dec(sv);
533			}
534			#else
535			PL_stack_sp -= 2;
536			#endif
537			}
538
539
540			/*
541			=for apidoc rpp_popfree_2_NN
542
543			A variant of rpp_popfree_2() which assumes that the two pointers being
544			popped off the stack are non-NULL.
545
546			=cut
547			*/
548
549
550			PERL_STATIC_INLINE void
551			Perl_rpp_popfree_2_NN(pTHX)
552			{
553			PERL_ARGS_ASSERT_RPP_POPFREE_2_NN;
554			#ifdef PERL_RC_STACK
555			SV sv2 = PL_stack_sp--;
556			assert(sv2);
557			SV sv1 = PL_stack_sp;
558			assert(sv1);
559
560			assert(rpp_stack_is_rc());
561			U32 rc1 = SvREFCNT(sv1);
562			U32 rc2 = SvREFCNT(sv2);
563			/* This expression is intended to be true if either of rc1 or rc2 has
564			* the value 0 or 1, but using only a single branch test, rather
565			* than the two branches that a compiler would plant for a boolean
566			* expression. We are working on the assumption that, most of the
567			* time, neither of the args to a binary function will need to be
568			* freed - they're likely to lex vars, or PADTMPs or whatever.
569			* So give the CPU a single branch that is rarely taken. */
570			if (UNLIKELY( !(rc1>>1) + !(rc2>>1) ))
571			/* at least one of the old SVs needs freeing. Do it the long way */
572			Perl_rpp_free_2_(aTHX_ sv1, sv2, rc1, rc2);
573			else {
574			SvREFCNT(sv1) = rc1 - 1;
575			SvREFCNT(sv2) = rc2 - 1;
576			}
577			PL_stack_sp--;
578			#else
579			PL_stack_sp -= 2;
580			#endif
581			}
582
583
584			/*
585			=for apidoc rpp_pop_1_norc
586
587			Pop and return the top item off the argument stack and update
588			C. It's similar to rpp_popfree_1(), except that it actually
589			returns a value, and it I decrement the SV's reference count.
590			On non-C builds it actually increments the SV's reference
591			count.
592
593			This is useful in cases where the popped value is immediately embedded
594			somewhere e.g. via av_store(), allowing you skip decrementing and then
595			immediately incrementing the reference count again (and risk prematurely
596			freeing the SV if it had a RC of 1). On non-RC builds, the reference count
597			bookkeeping still works too, which is why it should be used rather than
598			a simple C<*PL_stack_sp-->.
599
600			=cut
601			*/
602
603			PERL_STATIC_INLINE SV*
604			Perl_rpp_pop_1_norc(pTHX)
605			{
606			PERL_ARGS_ASSERT_RPP_POP_1_NORC
607
608			SV sv = PL_stack_sp--;
609
610			#ifndef PERL_RC_STACK
611			SvREFCNT_inc(sv);
612			#else
613			assert(rpp_stack_is_rc());
614			#endif
615			return sv;
616			}
617
618
619
620			/*
621			=for apidoc rpp_push_1
622			=for apidoc_item rpp_push_IMM
623			=for apidoc_item rpp_push_2
624			=for apidoc_item rpp_xpush_1
625			=for apidoc_item rpp_xpush_IMM
626			=for apidoc_item rpp_xpush_2
627
628			Push one or two SVs onto the stack, incrementing their reference counts
629			and updating C. With the C variants, it extends the stack
630			first. The C variants assume that the single argument is an immortal
631			such as <&PL_sv_undef> and, for efficiency, will skip incrementing its
632			reference count.
633
634			=cut
635			*/
636
637			PERL_STATIC_INLINE void
638			Perl_rpp_push_1(pTHX_ SV *sv)
639			{
640			PERL_ARGS_ASSERT_RPP_PUSH_1;
641
642			*++PL_stack_sp = sv;
643			#ifdef PERL_RC_STACK
644			assert(rpp_stack_is_rc());
645			SvREFCNT_inc_simple_void_NN(sv);
646			#endif
647			}
648
649			PERL_STATIC_INLINE void
650			Perl_rpp_push_IMM(pTHX_ SV *sv)
651			{
652			PERL_ARGS_ASSERT_RPP_PUSH_IMM;
653
654			assert(SvIMMORTAL(sv));
655			*++PL_stack_sp = sv;
656			#ifdef PERL_RC_STACK
657			assert(rpp_stack_is_rc());
658			#endif
659			}
660
661			PERL_STATIC_INLINE void
662			Perl_rpp_push_2(pTHX_ SV sv1, SV sv2)
663			{
664			PERL_ARGS_ASSERT_RPP_PUSH_2;
665
666			*++PL_stack_sp = sv1;
667			*++PL_stack_sp = sv2;
668			#ifdef PERL_RC_STACK
669			assert(rpp_stack_is_rc());
670			SvREFCNT_inc_simple_void_NN(sv1);
671			SvREFCNT_inc_simple_void_NN(sv2);
672			#endif
673			}
674
675			PERL_STATIC_INLINE void
676			Perl_rpp_xpush_1(pTHX_ SV *sv)
677			{
678			PERL_ARGS_ASSERT_RPP_XPUSH_1;
679
680			rpp_extend(1);
681			rpp_push_1(sv);
682			}
683
684			PERL_STATIC_INLINE void
685			Perl_rpp_xpush_IMM(pTHX_ SV *sv)
686			{
687			PERL_ARGS_ASSERT_RPP_XPUSH_IMM;
688
689			rpp_extend(1);
690			rpp_push_IMM(sv);
691			}
692
693			PERL_STATIC_INLINE void
694			Perl_rpp_xpush_2(pTHX_ SV sv1, SV sv2)
695			{
696			PERL_ARGS_ASSERT_RPP_XPUSH_2;
697
698			rpp_extend(2);
699			rpp_push_2(sv1, sv2);
700			}
701
702
703			/*
704			=for apidoc rpp_push_1_norc
705
706			Push C onto the stack without incrementing its reference count, and
707			update C. On non-PERL_RC_STACK builds, mortalise too.
708
709			This is most useful where an SV has just been created and already has a
710			reference count of 1, but has not yet been anchored anywhere.
711
712			=cut
713			*/
714
715			PERL_STATIC_INLINE void
716			Perl_rpp_push_1_norc(pTHX_ SV *sv)
717			{
718			PERL_ARGS_ASSERT_RPP_PUSH_1;
719
720			*++PL_stack_sp = sv;
721			#ifdef PERL_RC_STACK
722			assert(rpp_stack_is_rc());
723			#else
724			sv_2mortal(sv);
725			#endif
726			}
727
728
729			/*
730			=for apidoc rpp_replace_1_1
731			=for apidoc_item rpp_replace_1_1_NN
732			=for apidoc_item rpp_replace_1_IMM_NN
733
734			Replace the current top stack item with C, while suitably adjusting
735			reference counts. Equivalent to rpp_popfree_1(); rpp_push_1(sv), but
736			is more efficient and handles both SVs being the same.
737
738			The C<_NN> variant assumes that the pointer on the stack to the SV being
739			freed is non-NULL.
740
741			The C variant is like the C<_NN> variant, but in addition, assumes
742			that the single argument is an immortal such as <&PL_sv_undef> and, for
743			efficiency, will skip incrementing its reference count.
744
745			=cut
746			*/
747
748			PERL_STATIC_INLINE void
749			Perl_rpp_replace_1_1(pTHX_ SV *sv)
750			{
751			PERL_ARGS_ASSERT_RPP_REPLACE_1_1;
752
753			assert(sv);
754			#ifdef PERL_RC_STACK
755			assert(rpp_stack_is_rc());
756			SV oldsv = PL_stack_sp;
757			*PL_stack_sp = sv;
758			SvREFCNT_inc_simple_void_NN(sv);
759			SvREFCNT_dec(oldsv);
760			#else
761			*PL_stack_sp = sv;
762			#endif
763			}
764
765
766			PERL_STATIC_INLINE void
767			Perl_rpp_replace_1_1_NN(pTHX_ SV *sv)
768			{
769			PERL_ARGS_ASSERT_RPP_REPLACE_1_1_NN;
770
771			assert(sv);
772			assert(*PL_stack_sp);
773			#ifdef PERL_RC_STACK
774			assert(rpp_stack_is_rc());
775			SV oldsv = PL_stack_sp;
776			*PL_stack_sp = sv;
777			SvREFCNT_inc_simple_void_NN(sv);
778			SvREFCNT_dec_NN(oldsv);
779			#else
780			*PL_stack_sp = sv;
781			#endif
782			}
783
784
785			PERL_STATIC_INLINE void
786			Perl_rpp_replace_1_IMM_NN(pTHX_ SV *sv)
787			{
788			PERL_ARGS_ASSERT_RPP_REPLACE_1_IMM_NN;
789
790			assert(sv);
791			assert(SvIMMORTAL(sv));
792			assert(*PL_stack_sp);
793			#ifdef PERL_RC_STACK
794			assert(rpp_stack_is_rc());
795			SV oldsv = PL_stack_sp;
796			*PL_stack_sp = sv;
797			SvREFCNT_dec_NN(oldsv);
798			#else
799			*PL_stack_sp = sv;
800			#endif
801			}
802
803
804			/*
805			=for apidoc rpp_replace_2_1
806			=for apidoc_item rpp_replace_2_1_NN
807			=for apidoc_item rpp_replace_2_IMM_NN
808
809			Replace the current top to stacks item with C, while suitably
810			adjusting reference counts. Equivalent to rpp_popfree_2(); rpp_push_1(sv),
811			but is more efficient and handles SVs being the same.
812
813			The C<_NN> variant assumes that the pointers on the stack to the SVs being
814			freed are non-NULL.
815
816			The C variant is like the C<_NN> variant, but in addition, assumes
817			that the single argument is an immortal such as <&PL_sv_undef> and, for
818			efficiency, will skip incrementing its reference count.
819			=cut
820			*/
821
822			PERL_STATIC_INLINE void
823			Perl_rpp_replace_2_1(pTHX_ SV *sv)
824			{
825			PERL_ARGS_ASSERT_RPP_REPLACE_2_1;
826
827			#ifdef PERL_RC_STACK
828			assert(rpp_stack_is_rc());
829			/* replace PL_stack_sp[-1] first; leave PL_stack_sp[0] in place while
830			* we free [-1], so if an exception occurs, [0] will still be freed.
831			*/
832			SV *oldsv = PL_stack_sp[-1];
833			PL_stack_sp[-1] = sv;
834			SvREFCNT_inc_simple_void_NN(sv);
835			SvREFCNT_dec(oldsv);
836			oldsv = *PL_stack_sp--;
837			SvREFCNT_dec(oldsv);
838			#else
839			*--PL_stack_sp = sv;
840			#endif
841			}
842
843
844			/* Private helper function for _NN and _IMM_NN variants.
845			* Assumes sv has already had its ref count incremented,
846			* ready for being put on the stack.
847			* Intended to be small and fast, since it's inlined into many hot parts of
848			* code.
849			*/
850
851			PERL_STATIC_INLINE void
852			Perl_rpp_replace_2_1_COMMON(pTHX_ SV *sv)
853			{
854
855			assert(sv);
856			#ifdef PERL_RC_STACK
857			SV sv2 = PL_stack_sp--;
858			assert(sv2);
859			SV sv1 = PL_stack_sp;
860			assert(sv1);
861
862			*PL_stack_sp = sv;
863			assert(rpp_stack_is_rc());
864			U32 rc1 = SvREFCNT(sv1);
865			U32 rc2 = SvREFCNT(sv2);
866			/* This expression is intended to be true if either of rc1 or rc2 has
867			* the value 0 or 1, but using only a single branch test, rather
868			* than the two branches that a compiler would plant for a boolean
869			* expression. We are working on the assumption that, most of the
870			* time, neither of the args to a binary function will need to be
871			* freed - they're likely to lex vars, or PADTMPs or whatever.
872			* So give the CPU a single branch that is rarely taken. */
873			if (UNLIKELY( !(rc1>>1) + !(rc2>>1) ))
874			/* at least one of the old SVs needs freeing. Do it the long way */
875			Perl_rpp_free_2_(aTHX_ sv1, sv2, rc1, rc2);
876			else {
877			SvREFCNT(sv1) = rc1 - 1;
878			SvREFCNT(sv2) = rc2 - 1;
879			}
880			#else
881			*--PL_stack_sp = sv;
882			#endif
883			}
884
885
886			PERL_STATIC_INLINE void
887			Perl_rpp_replace_2_1_NN(pTHX_ SV *sv)
888			{
889			PERL_ARGS_ASSERT_RPP_REPLACE_2_1_NN;
890
891			assert(sv);
892			#ifdef PERL_RC_STACK
893			SvREFCNT_inc_simple_void_NN(sv);
894			#endif
895			rpp_replace_2_1_COMMON(sv);
896			}
897
898
899			PERL_STATIC_INLINE void
900			Perl_rpp_replace_2_IMM_NN(pTHX_ SV *sv)
901			{
902			PERL_ARGS_ASSERT_RPP_REPLACE_2_IMM_NN;
903
904			assert(sv);
905			assert(SvIMMORTAL(sv));
906			rpp_replace_2_1_COMMON(sv);
907			}
908
909
910			/*
911			=for apidoc rpp_replace_at
912
913			Replace the SV at address sp within the stack with C, while suitably
914			adjusting reference counts. Equivalent to C<*sp = sv>, except with proper
915			reference count handling.
916
917			=cut
918			*/
919
920			PERL_STATIC_INLINE void
921			Perl_rpp_replace_at(pTHX_ SV *sp, SV sv)
922			{
923			PERL_ARGS_ASSERT_RPP_REPLACE_AT;
924
925			#ifdef PERL_RC_STACK
926			assert(rpp_stack_is_rc());
927			SV oldsv = sp;
928			*sp = sv;
929			SvREFCNT_inc_simple_void_NN(sv);
930			SvREFCNT_dec(oldsv);
931			#else
932			*sp = sv;
933			#endif
934			}
935
936
937			/*
938			=for apidoc rpp_replace_at_NN
939
940			A variant of rpp_replace_at() which assumes that the SV pointer on the
941			stack is non-NULL.
942
943			=cut
944			*/
945
946			PERL_STATIC_INLINE void
947			Perl_rpp_replace_at_NN(pTHX_ SV *sp, SV sv)
948			{
949			PERL_ARGS_ASSERT_RPP_REPLACE_AT_NN;
950
951			assert(sv);
952			assert(*sp);
953			#ifdef PERL_RC_STACK
954			assert(rpp_stack_is_rc());
955			SV oldsv = sp;
956			*sp = sv;
957			SvREFCNT_inc_simple_void_NN(sv);
958			SvREFCNT_dec_NN(oldsv);
959			#else
960			*sp = sv;
961			#endif
962			}
963
964
965			/*
966			=for apidoc rpp_replace_at_norc
967
968			Replace the SV at address sp within the stack with C, while suitably
969			adjusting the reference count of the old SV. Equivalent to C<*sp = sv>,
970			except with proper reference count handling.
971
972			C's reference count doesn't get incremented. On non-C
973			builds, it gets mortalised too.
974
975			This is most useful where an SV has just been created and already has a
976			reference count of 1, but has not yet been anchored anywhere.
977
978			=cut
979			*/
980
981			PERL_STATIC_INLINE void
982			Perl_rpp_replace_at_norc(pTHX_ SV *sp, SV sv)
983			{
984			PERL_ARGS_ASSERT_RPP_REPLACE_AT_NORC;
985
986			#ifdef PERL_RC_STACK
987			assert(rpp_stack_is_rc());
988			SV oldsv = sp;
989			*sp = sv;
990			SvREFCNT_dec(oldsv);
991			#else
992			*sp = sv;
993			sv_2mortal(sv);
994			#endif
995			}
996
997
998			/*
999			=for apidoc rpp_replace_at_norc_NN
1000
1001			A variant of rpp_replace_at_norc() which assumes that the SV pointer on the
1002			stack is non-NULL.
1003
1004			=cut
1005			*/
1006
1007			PERL_STATIC_INLINE void
1008			Perl_rpp_replace_at_norc_NN(pTHX_ SV *sp, SV sv)
1009			{
1010			PERL_ARGS_ASSERT_RPP_REPLACE_AT_NORC_NN;
1011
1012			assert(*sp);
1013			#ifdef PERL_RC_STACK
1014			assert(rpp_stack_is_rc());
1015			SV oldsv = sp;
1016			*sp = sv;
1017			SvREFCNT_dec_NN(oldsv);
1018			#else
1019			*sp = sv;
1020			sv_2mortal(sv);
1021			#endif
1022			}
1023
1024
1025			/*
1026			=for apidoc rpp_context
1027
1028			Impose void, scalar or list context on the stack.
1029			First, pop C items off the stack, then when C is:
1030			C: return as-is.
1031			C: pop everything back to C
1032			C: move the top stack item (or C<&PL_sv_undef> if none) to
1033			C and free everything above it.
1034
1035			=cut
1036			*/
1037
1038			PERL_STATIC_INLINE void
1039			Perl_rpp_context(pTHX_ SV **mark, U8 gimme, SSize_t extra)
1040			{
1041			PERL_ARGS_ASSERT_RPP_CONTEXT;
1042			assert(extra >= 0);
1043			assert(mark <= PL_stack_sp - extra);
1044
1045			if (gimme == G_LIST)
1046			mark = PL_stack_sp - extra;
1047			else if (gimme == G_SCALAR) {
1048			SV **svp = PL_stack_sp - extra;
1049			mark++;
1050			if (mark > svp) {
1051			/* empty list (plus extra) */
1052			rpp_popfree_to(svp);
1053			rpp_extend(1);
1054			*++PL_stack_sp = &PL_sv_undef;
1055			return;
1056			}
1057			/* swap top and bottom list items */
1058			SV top = svp;
1059			svp = mark;
1060			*mark = top;
1061			}
1062			rpp_popfree_to(mark);
1063			}
1064
1065
1066
1067
1068			/*
1069			=for apidoc rpp_try_AMAGIC_1
1070			=for apidoc_item rpp_try_AMAGIC_2
1071
1072			Check whether either of the one or two SVs at the top of the stack is
1073			magical or a ref, and in either case handle it specially: invoke get
1074			magic, call an overload method, or replace a ref with a temporary numeric
1075			value, as appropriate. If this function returns true, it indicates that
1076			the correct return value is already on the stack. Intended to be used at
1077			the beginning of the PP function for unary or binary ops.
1078
1079			=cut
1080			*/
1081
1082			PERL_STATIC_INLINE bool
1083			Perl_rpp_try_AMAGIC_1(pTHX_ int method, int flags)
1084			{
1085			return UNLIKELY((SvFLAGS(*PL_stack_sp) & (SVf_ROK\|SVs_GMG)))
1086			&& Perl_try_amagic_un(aTHX_ method, flags);
1087			}
1088
1089			PERL_STATIC_INLINE bool
1090			Perl_rpp_try_AMAGIC_2(pTHX_ int method, int flags)
1091			{
1092			return UNLIKELY(((SvFLAGS(PL_stack_sp[-1])\|SvFLAGS(PL_stack_sp[0]))
1093			& (SVf_ROK\|SVs_GMG)))
1094			&& Perl_try_amagic_bin(aTHX_ method, flags);
1095			}
1096
1097
1098			/*
1099			=for apidoc rpp_stack_is_rc
1100
1101			Returns a boolean value indicating whether the stack is currently
1102			reference-counted. Note that if the stack is split (bottom half RC, top
1103			half non-RC), this function returns false, even if the top half currently
1104			contains zero items.
1105
1106			=cut
1107			*/
1108
1109			PERL_STATIC_INLINE bool
1110			Perl_rpp_stack_is_rc(pTHX)
1111			{
1112			#ifdef PERL_RC_STACK
1113			return AvREAL(PL_curstack) && !PL_curstackinfo->si_stack_nonrc_base;
1114			#else
1115			return 0;
1116			#endif
1117
1118			}
1119
1120
1121			/*
1122			=for apidoc rpp_is_lone
1123
1124			Indicates whether the stacked SV C (assumed to be not yet popped off
1125			the stack) is only kept alive due to a single reference from the argument
1126			stack and/or and the temps stack.
1127
1128			This can used for example to decide whether the copying of return values
1129			in rvalue context can be skipped, or whether it shouldn't be assigned to
1130			in lvalue context.
1131
1132			=cut
1133			*/
1134
1135			PERL_STATIC_INLINE bool
1136			Perl_rpp_is_lone(pTHX_ SV *sv)
1137			{
1138			#ifdef PERL_RC_STACK
1139			/* note that rpp_is_lone() can be used in wrapped pp functions,
1140			* where technically the stack is no longer ref-counted; but because
1141			* the args are non-RC copies of RC args further down the stack, we
1142			* can't be in a completely non-ref stack.
1143			*/
1144			assert(AvREAL(PL_curstack));
1145			#endif
1146
1147			return SvREFCNT(sv) <= (U32)cBOOL(SvTEMP(sv))
1148			#ifdef PERL_RC_STACK
1149			+ 1u
1150			&& !SvIMMORTAL(sv) /* PL_sv_undef etc are never stealable */
1151			#endif
1152			;
1153			}
1154
1155
1156			/*
1157			=for apidoc rpp_invoke_xs
1158
1159			Call the XS function associated with C. Wraps the call if necessary to
1160			handle XS functions which are not aware of reference-counted stacks.
1161
1162			=cut
1163			*/
1164
1165
1166			PERL_STATIC_INLINE void
1167			Perl_rpp_invoke_xs(pTHX_ CV *cv)
1168			{
1169			PERL_ARGS_ASSERT_RPP_INVOKE_XS;
1170
1171			#ifdef PERL_RC_STACK
1172			if (!CvXS_RCSTACK(cv))
1173			Perl_xs_wrap(aTHX_ CvXSUB(cv), cv);
1174			else
1175			#endif
1176			CvXSUB(cv)(aTHX_ cv);
1177			}
1178
1179
1180			/* for SvCANEXISTDELETE() macro in pp.h */
1181			PERL_STATIC_INLINE bool
1182			Perl_sv_can_existdelete(pTHX_ SV *sv)
1183			{
1184			/* Anything without tie magic is fine */
1185			MAGIC *mg;
1186			if(!SvRMAGICAL(sv) \|\| !(mg = mg_find(sv, PERL_MAGIC_tied)))
1187			return true;
1188
1189			HV *stash = SvSTASH(SvRV(SvTIED_obj(sv, mg)));
1190			return stash &&
1191			gv_fetchmethod_autoload(stash, "EXISTS", TRUE) &&
1192			gv_fetchmethod_autoload(stash, "DELETE", TRUE);
1193			}
1194
1195
1196			/* ----------------------------- regexp.h ----------------------------- */
1197
1198			/* PVLVs need to act as a superset of all scalar types - they are basically
1199			* PVMGs with a few extra fields.
1200			* REGEXPs are first class scalars, but have many fields that can't be copied
1201			* into a PVLV body.
1202			*
1203			* Hence we take a different approach - instead of a copy, PVLVs store a pointer
1204			* back to the original body. To avoid increasing the size of PVLVs just for the
1205			* rare case of REGEXP assignment, this pointer is stored in the memory usually
1206			* used for SvLEN(). Hence the check for SVt_PVLV below, and the ? : ternary to
1207			* read the pointer from the two possible locations. The macro SvLEN() wraps the
1208			* access to the union's member xpvlenu_len, but there is no equivalent macro
1209			* for wrapping the union's member xpvlenu_rx, hence the direct reference here.
1210			*
1211			* See commit df6b4bd56551f2d3 for more details. */
1212
1213			PERL_STATIC_INLINE struct regexp *
1214			Perl_ReANY(const REGEXP * const re)
1215			{
1216			XPV* const p = (XPV*)SvANY(re);
1217
1218			PERL_ARGS_ASSERT_REANY;
1219			assert(isREGEXP(re));
1220
1221			return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
1222			: (struct regexp *)p;
1223			}
1224
1225			/* ------------------------------- utf8.h ------------------------------- */
1226
1227			/*
1228			=for apidoc_section $unicode
1229			*/
1230
1231			PERL_STATIC_INLINE void
1232			Perl_append_utf8_from_native_byte(const U8 byte, U8** dest)
1233			{
1234			/* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
1235			* encoded string at 'dest', updating 'dest' to include it */
1236
1237			PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
1238
1239			if (NATIVE_BYTE_IS_INVARIANT(byte))
1240			((dest)++) = byte;
1241			else {
1242			((dest)++) = UTF8_EIGHT_BIT_HI(byte);
1243			((dest)++) = UTF8_EIGHT_BIT_LO(byte);
1244			}
1245			}
1246
1247			PERL_STATIC_INLINE U8 *
1248			Perl_bytes_to_utf8(pTHX_ const U8 s, STRLEN lenp)
1249			{
1250			return bytes_to_utf8_free_me(s, lenp, NULL);
1251			}
1252
1253			PERL_STATIC_INLINE U8 *
1254			Perl_bytes_to_utf8_temp_pv(pTHX_ const U8 s, STRLEN lenp)
1255			{
1256			void * free_me = NULL;
1257			U8 * converted = bytes_to_utf8_free_me(s, lenp, &free_me);
1258
1259			if (free_me) {
1260			SAVEFREEPV(free_me);
1261			}
1262
1263			return converted;
1264			}
1265
1266			PERL_STATIC_INLINE bool
1267			Perl_utf8_to_bytes_new_pv(pTHX_ U8 const *s_ptr, STRLEN lenp, void ** free_me)
1268			{
1269			/* utf8_to_bytes_() is declared to take a non-const s_ptr because it may
1270			* change it, but NOT when called with PL_utf8_to_bytes_new_memory, so it
1271			* is ok to cast away const */
1272			return utf8_to_bytes_((U8 **) s_ptr, lenp, free_me,
1273			PL_utf8_to_bytes_new_memory);
1274			}
1275
1276			PERL_STATIC_INLINE bool
1277			Perl_utf8_to_bytes_temp_pv(pTHX_ U8 const *s_ptr, STRLEN lenp)
1278			{
1279			/* utf8_to_bytes_() requires a non-NULL pointer, but doesn't use it when
1280			* called with PL_utf8_to_bytes_use_temporary */
1281			void* dummy = NULL;
1282
1283			/* utf8_to_bytes_() is declared to take a non-const s_ptr because it may
1284			* change it, but NOT when called with PL_utf8_to_bytes_use_temporary, so
1285			* it is ok to cast away const */
1286			return utf8_to_bytes_((U8 **) s_ptr, lenp, &dummy,
1287			PL_utf8_to_bytes_use_temporary);
1288			}
1289
1290			PERL_STATIC_INLINE bool
1291			Perl_utf8_to_bytes_overwrite(pTHX_ U8 *s_ptr, STRLEN lenp)
1292			{
1293			/* utf8_to_bytes_() requires a non-NULL pointer, but doesn't use it when
1294			* called with PL_utf8_to_bytes_overwrite */
1295			void* dummy = NULL;
1296
1297			return utf8_to_bytes_(s_ptr, lenp, &dummy, PL_utf8_to_bytes_overwrite);
1298			}
1299
1300			/*
1301			=for apidoc valid_utf8_to_uvchr
1302			Like C>, but should only be called when it is
1303			known that the next character in the input UTF-8 string C ~~is well-formed~~
1304			(I, it passes C>. Surrogates, non-character code
1305			points, and non-Unicode code points are allowed.
1306
1307			=cut
1308
1309			*/
1310
1311			PERL_STATIC_INLINE UV
1312			Perl_valid_utf8_to_uvchr(const U8 s, STRLEN retlen)
1313			{
1314			const UV expectlen = UTF8SKIP(s);
1315			const U8* send = s + expectlen;
1316			UV uv = *s;
1317
1318			PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
1319
1320			if (retlen) {
1321			*retlen = expectlen;
1322			}
1323
1324			/* An invariant is trivially returned */
1325			if (expectlen == 1) {
1326			return uv;
1327			}
1328
1329			/* Remove the leading bits that indicate the number of bytes, leaving just
1330			* the bits that are part of the value */
1331			uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
1332
1333			/* Now, loop through the remaining bytes, accumulating each into the
1334			* working total as we go. (I khw tried unrolling the loop for up to 4
1335			* bytes, but there was no performance improvement) */
1336			for (++s; s < send; s++) {
1337			uv = UTF8_ACCUMULATE(uv, *s);
1338			}
1339
1340			return UNI_TO_NATIVE(uv);
1341
1342			}
1343
1344			/* This looks like 0x010101... */
1345			# define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
1346
1347			/* This looks like 0x808080... */
1348			# define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
1349			# define PERL_WORDSIZE sizeof(PERL_UINTMAX_T)
1350			# define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
1351
1352			/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
1353			* or'ing together the lowest bits of 'x'. Hopefully the final term gets
1354			* optimized out completely on a 32-bit system, and its mask gets optimized out
1355			* on a 64-bit system */
1356			# define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
1357			\| ( PTR2nat(x) >> 1) \
1358			\| ( ( (PTR2nat(x) \
1359			& PERL_WORD_BOUNDARY_MASK) >> 2))))
1360
1361			/*
1362			=for apidoc is_utf8_invariant_string
1363			=for apidoc_item is_utf8_invariant_string_loc
1364			=for apidoc_item is_ascii_string
1365			=for apidoc_item is_invariant_string
1366
1367			These each return TRUE if the first C bytes of the string C ~~are the~~
1368			same regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
1369			EBCDIC machines); otherwise they returns FALSE. That is, they return TRUE if
1370			they are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and
1371			only the ASCII characters fit this definition. On EBCDIC machines, the
1372			ASCII-range characters are invariant, but so also are the C1 controls.
1373
1374			If C is 0, it will be calculated using C, (which means if you
1375			use this option, that C ~~can't have embedded C characters and has to~~
1376			have a terminating C byte).
1377
1378			All forms except C have identical behavior. The
1379			only difference with it is that it has an extra pointer parameter, C, into
1380			which, if it isn't NULL, the location of the first UTF-8 variant character in
1381			the C pointer will be stored upon failure. If all characters are UTF-8
1382			invariant, this function does not change the contents of C<*ep>.
1383
1384			C is somewhat misleadingly named.
1385			C is preferred, as it indicates under what conditions
1386			the string is invariant.
1387
1388			C is misleadingly-named. On ASCII-ish platforms, the name
1389			isn't misleading: the ASCII-range characters are exactly the UTF-8 invariants.
1390			But EBCDIC machines have more UTF-8 invariants than just the ASCII characters,
1391			so the name C is preferred.
1392
1393			See also
1394			C> and C>.
1395
1396			=for apidoc_defn ARTm\|bool\|is_utf8_invariant_string\|NN const U8 * const s\|STRLEN len
1397
1398			=cut
1399
1400			*/
1401
1402			#define is_utf8_invariant_string(s, len) \
1403			is_utf8_invariant_string_loc(s, len, NULL)
1404
1405			PERL_STATIC_INLINE bool
1406			Perl_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
1407			{
1408			const U8* send;
1409			const U8* x = s;
1410
1411			PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
1412
1413			if (len == 0) {
1414			len = strlen((const char *)s);
1415			}
1416
1417			send = s + len;
1418
1419			#ifndef EBCDIC
1420
1421			/* Do the word-at-a-time iff there is at least one usable full word. That
1422			* means that after advancing to a word boundary, there still is at least a
1423			* full word left. The number of bytes needed to advance is 'wordsize -
1424			* offset' unless offset is 0. */
1425			if ((STRLEN) (send - x) >= PERL_WORDSIZE
1426
1427			/* This term is wordsize if subword; 0 if not */
1428			+ PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
1429
1430			/* 'offset' */
1431			- (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
1432			{
1433
1434			/* Process per-byte until reach word boundary. XXX This loop could be
1435			* eliminated if we knew that this platform had fast unaligned reads */
1436			while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
1437			if (! UTF8_IS_INVARIANT(*x)) {
1438			if (ep) {
1439			*ep = x;
1440			}
1441
1442			return FALSE;
1443			}
1444			x++;
1445			}
1446
1447			/* Here, we know we have at least one full word to process. Process
1448			* per-word as long as we have at least a full word left */
1449			do {
1450			if ((* (const PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
1451
1452			/* Found a variant. Just return if caller doesn't want its
1453			* exact position */
1454			if (! ep) {
1455			return FALSE;
1456			}
1457
1458			# if BYTEORDER == 0x1234 \|\| BYTEORDER == 0x12345678 \
1459			\|\| BYTEORDER == 0x4321 \|\| BYTEORDER == 0x87654321
1460
1461			ep = x + variant_byte_number( (const PERL_UINTMAX_T *) x);
1462			assert(ep >= s && ep < send);
1463
1464			return FALSE;
1465
1466			# else /* If weird byte order, drop into next loop to do byte-at-a-time
1467			checks. */
1468
1469			break;
1470			# endif
1471			}
1472
1473			x += PERL_WORDSIZE;
1474
1475			} while (x + PERL_WORDSIZE <= send);
1476			}
1477
1478			#endif /* End of ! EBCDIC */
1479
1480			/* Process per-byte. (Can't use libc functions like strpbrk() because
1481			* input isn't necessarily a C string) */
1482			while (x < send) {
1483			if (! UTF8_IS_INVARIANT(*x)) {
1484			if (ep) {
1485			*ep = x;
1486			}
1487
1488			return FALSE;
1489			}
1490
1491			x++;
1492			}
1493
1494			return TRUE;
1495			}
1496
1497			/* See if the platform has builtins for finding the most/least significant bit,
1498			* and which one is right for using on 32 and 64 bit operands */
1499			#if (__has_builtin(__builtin_clz) \|\| PERL_GCC_VERSION_GE(3,4,0))
1500			# if U32SIZE == INTSIZE
1501			# define PERL_CLZ_32 __builtin_clz
1502			# endif
1503			# if defined(U64TYPE) && U64SIZE == INTSIZE
1504			# define PERL_CLZ_64 __builtin_clz
1505			# endif
1506			#endif
1507			#if (__has_builtin(__builtin_ctz) \|\| PERL_GCC_VERSION_GE(3,4,0))
1508			# if U32SIZE == INTSIZE
1509			# define PERL_CTZ_32 __builtin_ctz
1510			# endif
1511			# if defined(U64TYPE) && U64SIZE == INTSIZE
1512			# define PERL_CTZ_64 __builtin_ctz
1513			# endif
1514			#endif
1515
1516			#if (__has_builtin(__builtin_clzl) \|\| PERL_GCC_VERSION_GE(3,4,0))
1517			# if U32SIZE == LONGSIZE && ! defined(PERL_CLZ_32)
1518			# define PERL_CLZ_32 __builtin_clzl
1519			# endif
1520			# if defined(U64TYPE) && U64SIZE == LONGSIZE && ! defined(PERL_CLZ_64)
1521			# define PERL_CLZ_64 __builtin_clzl
1522			# endif
1523			#endif
1524			#if (__has_builtin(__builtin_ctzl) \|\| PERL_GCC_VERSION_GE(3,4,0))
1525			# if U32SIZE == LONGSIZE && ! defined(PERL_CTZ_32)
1526			# define PERL_CTZ_32 __builtin_ctzl
1527			# endif
1528			# if defined(U64TYPE) && U64SIZE == LONGSIZE && ! defined(PERL_CTZ_64)
1529			# define PERL_CTZ_64 __builtin_ctzl
1530			# endif
1531			#endif
1532
1533			#if (__has_builtin(__builtin_clzll) \|\| PERL_GCC_VERSION_GE(3,4,0))
1534			# if U32SIZE == LONGLONGSIZE && ! defined(PERL_CLZ_32)
1535			# define PERL_CLZ_32 __builtin_clzll
1536			# endif
1537			# if defined(U64TYPE) && U64SIZE == LONGLONGSIZE && ! defined(PERL_CLZ_64)
1538			# define PERL_CLZ_64 __builtin_clzll
1539			# endif
1540			#endif
1541			#if (__has_builtin(__builtin_ctzll) \|\| PERL_GCC_VERSION_GE(3,4,0))
1542			# if U32SIZE == LONGLONGSIZE && ! defined(PERL_CTZ_32)
1543			# define PERL_CTZ_32 __builtin_ctzll
1544			# endif
1545			# if defined(U64TYPE) && U64SIZE == LONGLONGSIZE && ! defined(PERL_CTZ_64)
1546			# define PERL_CTZ_64 __builtin_ctzll
1547			# endif
1548			#endif
1549
1550			#if defined(WIN32)
1551			# include
1552			/* MinGW warns that it ignores "pragma intrinsic". */
1553			# if defined(_MSC_VER)
1554			# pragma intrinsic(_BitScanForward)
1555			# pragma intrinsic(_BitScanReverse)
1556			# if defined(_WIN64)
1557			# pragma intrinsic(_BitScanForward64)
1558			# pragma intrinsic(_BitScanReverse64)
1559			# endif
1560			# endif
1561			#endif
1562
1563			/* The reason there are not checks to see if ffs() and ffsl() are available for
1564			* determining the lsb, is because these don't improve on the deBruijn method
1565			* fallback, which is just a branchless integer multiply, array element
1566			* retrieval, and shift. The others, even if the function call overhead is
1567			* optimized out, have to cope with the possibility of the input being all
1568			* zeroes, and almost certainly will have conditionals for this eventuality.
1569			* khw, at the time of this commit, looked at the source for both gcc and clang
1570			* to verify this. (gcc used a method inferior to deBruijn.) */
1571
1572			/* Below are functions to find the first, last, or only set bit in a word. On
1573			* platforms with 64-bit capability, there is a pair for each operation; the
1574			* first taking a 64 bit operand, and the second a 32 bit one. The logic is
1575			* the same in each pair, so the second is stripped of most comments. */
1576
1577			#ifdef U64TYPE /* HAS_QUAD not usable outside the core */
1578
1579			PERL_STATIC_INLINE unsigned
1580			Perl_lsbit_pos64(U64 word)
1581			{
1582			/* Find the position (0..63) of the least significant set bit in the input
1583			* word */
1584
1585			ASSUME(word != 0);
1586
1587			/* If we can determine that the platform has a usable fast method to get
1588			* this info, use that */
1589
1590			# if defined(PERL_CTZ_64)
1591			# define PERL_HAS_FAST_GET_LSB_POS64
1592
1593			return (unsigned) PERL_CTZ_64(word);
1594
1595			# elif U64SIZE == 8 && defined(_WIN64)
1596			# define PERL_HAS_FAST_GET_LSB_POS64
1597
1598			{
1599			unsigned long index;
1600			_BitScanForward64(&index, word);
1601			return (unsigned)index;
1602			}
1603
1604			# else
1605
1606			/* Here, we didn't find a fast method for finding the lsb. Fall back to
1607			* making the lsb the only set bit in the word, and use our function that
1608			* works on words with a single bit set.
1609			*
1610			* Isolate the lsb;
1611			* https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
1612			*
1613			* The word will look like this, with a rightmost set bit in position 's':
1614			* ('x's are don't cares, and 'y's are their complements)
1615			* s
1616			* x..x100..00
1617			* y..y011..11 Complement
1618			* y..y100..00 Add 1
1619			* 0..0100..00 And with the original
1620			*
1621			* (Yes, complementing and adding 1 is just taking the negative on 2's
1622			* complement machines, but not on 1's complement ones, and some compilers
1623			* complain about negating an unsigned.)
1624			*/
1625			return single_1bit_pos64(word & (~word + 1));
1626
1627			# endif
1628
1629			}
1630
1631			# define lsbit_pos_uintmax_(word) lsbit_pos64(word)
1632			#else /* ! QUAD */
1633			# define lsbit_pos_uintmax_(word) lsbit_pos32(word)
1634			#endif
1635
1636			PERL_STATIC_INLINE unsigned /* Like above for 32 bit word */
1637			Perl_lsbit_pos32(U32 word)
1638			{
1639			/* Find the position (0..31) of the least significant set bit in the input
1640			* word */
1641
1642			ASSUME(word != 0);
1643
1644			#if defined(PERL_CTZ_32)
1645			# define PERL_HAS_FAST_GET_LSB_POS32
1646
1647			return (unsigned) PERL_CTZ_32(word);
1648
1649			#elif U32SIZE == 4 && defined(WIN32)
1650			# define PERL_HAS_FAST_GET_LSB_POS32
1651
1652			{
1653			unsigned long index;
1654			_BitScanForward(&index, word);
1655			return (unsigned)index;
1656			}
1657
1658			#elif defined(PERL_HAS_FAST_GET_LSB_POS64)
1659			# define PERL_HAS_FAST_GET_LSB_POS32
1660
1661			/* Unlikely, but possible for the platform to have a wider fast operation
1662			* but not a narrower one. But easy enough to handle the case by widening
1663			* the parameter size. */
1664			return lsbit_pos64(word);
1665
1666			#else
1667
1668			return single_1bit_pos32(word & (~word + 1));
1669
1670			#endif
1671
1672			}
1673
1674
1675			/* Convert the leading zeros count to the bit position of the first set bit.
1676			* This just subtracts from the highest position, 31 or 63. But some compilers
1677			* don't optimize this optimally, and so a bit of bit twiddling encourages them
1678			* to do the right thing. It turns out that subtracting a smaller non-negative
1679			* number 'x' from 2**n-1 for any n is the same as taking the exclusive-or of
1680			* the two numbers. To see why, first note that the sum of any number, x, and
1681			* its complement, x', is all ones. So all ones minus x is x'. Then note that
1682			* the xor of x and all ones is x'. */
1683			#define LZC_TO_MSBIT_POS_(size, lzc) ((size##SIZE * CHARBITS - 1) ^ (lzc))
1684
1685			#ifdef U64TYPE /* HAS_QUAD not usable outside the core */
1686
1687			PERL_STATIC_INLINE unsigned
1688			Perl_msbit_pos64(U64 word)
1689			{
1690			/* Find the position (0..63) of the most significant set bit in the input
1691			* word */
1692
1693			ASSUME(word != 0);
1694
1695			/* If we can determine that the platform has a usable fast method to get
1696			* this, use that */
1697
1698			# if defined(PERL_CLZ_64)
1699			# define PERL_HAS_FAST_GET_MSB_POS64
1700
1701			return (unsigned) LZC_TO_MSBIT_POS_(U64, PERL_CLZ_64(word));
1702
1703			# elif U64SIZE == 8 && defined(_WIN64)
1704			# define PERL_HAS_FAST_GET_MSB_POS64
1705
1706			{
1707			unsigned long index;
1708			_BitScanReverse64(&index, word);
1709			return (unsigned)index;
1710			}
1711
1712			# else
1713
1714			/* Here, we didn't find a fast method for finding the msb. Fall back to
1715			* making the msb the only set bit in the word, and use our function that
1716			* works on words with a single bit set.
1717			*
1718			* Isolate the msb; http://codeforces.com/blog/entry/10330
1719			*
1720			* Only the most significant set bit matters. Or'ing word with its right
1721			* shift of 1 makes that bit and the next one to its right both 1.
1722			* Repeating that with the right shift of 2 makes for 4 1-bits in a row.
1723			* ... We end with the msb and all to the right being 1. */
1724			word \|= (word >> 1);
1725			word \|= (word >> 2);
1726			word \|= (word >> 4);
1727			word \|= (word >> 8);
1728			word \|= (word >> 16);
1729			word \|= (word >> 32);
1730
1731			/* Then subtracting the right shift by 1 clears all but the left-most of
1732			* the 1 bits, which is our desired result */
1733			word -= (word >> 1);
1734
1735			/* Now we have a single bit set */
1736			return single_1bit_pos64(word);
1737
1738			# endif
1739
1740			}
1741
1742			# define msbit_pos_uintmax_(word) msbit_pos64(word)
1743			#else /* ! QUAD */
1744			# define msbit_pos_uintmax_(word) msbit_pos32(word)
1745			#endif
1746
1747			PERL_STATIC_INLINE unsigned
1748			Perl_msbit_pos32(U32 word)
1749			{
1750			/* Find the position (0..31) of the most significant set bit in the input
1751			* word */
1752
1753			ASSUME(word != 0);
1754
1755			#if defined(PERL_CLZ_32)
1756			# define PERL_HAS_FAST_GET_MSB_POS32
1757
1758			return (unsigned) LZC_TO_MSBIT_POS_(U32, PERL_CLZ_32(word));
1759			#elif U32SIZE == 4 && defined(WIN32)
1760			# define PERL_HAS_FAST_GET_MSB_POS32
1761
1762			{
1763			unsigned long index;
1764			_BitScanReverse(&index, word);
1765			return (unsigned)index;
1766			}
1767
1768			#elif defined(PERL_HAS_FAST_GET_MSB_POS64)
1769			# define PERL_HAS_FAST_GET_MSB_POS32
1770
1771			return msbit_pos64(word); /* Let compiler widen parameter */
1772
1773			#else
1774
1775			word \|= (word >> 1);
1776			word \|= (word >> 2);
1777			word \|= (word >> 4);
1778			word \|= (word >> 8);
1779			word \|= (word >> 16);
1780			word -= (word >> 1);
1781			return single_1bit_pos32(word);
1782
1783			#endif
1784
1785			}
1786
1787			/* Note that if you are working through all the 1 bits in a word, and don't
1788			* care which order you process them in, it is better to use lsbit_pos. This
1789			* is because some platforms have a fast way to find the msb but not the lsb,
1790			* and others vice versa. The code above falls back to use the single
1791			* available fast method when the desired one is missing, and it is cheaper to
1792			* fall back from lsb to msb than the other way around */
1793
1794			#if UVSIZE == U64SIZE
1795			# define msbit_pos(word) msbit_pos64(word)
1796			# define lsbit_pos(word) lsbit_pos64(word)
1797			#elif UVSIZE == U32SIZE
1798			# define msbit_pos(word) msbit_pos32(word)
1799			# define lsbit_pos(word) lsbit_pos32(word)
1800			#endif
1801
1802			#ifdef U64TYPE /* HAS_QUAD not usable outside the core */
1803
1804			PERL_STATIC_INLINE unsigned
1805			Perl_single_1bit_pos64(U64 word)
1806			{
1807			/* Given a 64-bit word known to contain all zero bits except one 1 bit,
1808			* find and return the 1's position: 0..63 */
1809
1810			# ifdef PERL_CORE /* macro not exported */
1811			ASSUME(isPOWER_OF_2(word));
1812			# else
1813			ASSUME(word && (word & (word-1)) == 0);
1814			# endif
1815
1816			/* The only set bit is both the most and least significant bit. If we have
1817			* a fast way of finding either one, use that.
1818			*
1819			* It may appear at first glance that those functions call this one, but
1820			* they don't if the corresponding #define is set */
1821
1822			# ifdef PERL_HAS_FAST_GET_MSB_POS64
1823
1824			return msbit_pos64(word);
1825
1826			# elif defined(PERL_HAS_FAST_GET_LSB_POS64)
1827
1828			return lsbit_pos64(word);
1829
1830			# else
1831
1832			/* The position of the only set bit in a word can be quickly calculated
1833			* using deBruijn sequences. See for example
1834			* https://en.wikipedia.org/wiki/De_Bruijn_sequence */
1835			return PL_deBruijn_bitpos_tab64[(word * PERL_deBruijnMagic64_)
1836			>> PERL_deBruijnShift64_];
1837			# endif
1838
1839			}
1840
1841			#endif
1842
1843			PERL_STATIC_INLINE unsigned
1844			Perl_single_1bit_pos32(U32 word)
1845			{
1846			/* Given a 32-bit word known to contain all zero bits except one 1 bit,
1847			* find and return the 1's position: 0..31 */
1848
1849			#ifdef PERL_CORE /* macro not exported */
1850			ASSUME(isPOWER_OF_2(word));
1851			#else
1852			ASSUME(word && (word & (word-1)) == 0);
1853			#endif
1854			#ifdef PERL_HAS_FAST_GET_MSB_POS32
1855
1856			return msbit_pos32(word);
1857
1858			#elif defined(PERL_HAS_FAST_GET_LSB_POS32)
1859
1860			return lsbit_pos32(word);
1861
1862			#else
1863
1864			return PL_deBruijn_bitpos_tab32[(word * PERL_deBruijnMagic32_)
1865			>> PERL_deBruijnShift32_];
1866			#endif
1867
1868			}
1869
1870			#ifndef EBCDIC
1871
1872			PERL_STATIC_INLINE unsigned int
1873			Perl_variant_byte_number(PERL_UINTMAX_T word)
1874			{
1875			/* This returns the position in a word (0..7) of the first variant byte in
1876			* it. This is a helper function. Note that there are no branches */
1877
1878			/* Get just the msb bits of each byte */
1879			word &= PERL_VARIANTS_WORD_MASK;
1880
1881			/* This should only be called if we know there is a variant byte in the
1882			* word */
1883			assert(word);
1884
1885			# if BYTEORDER == 0x1234 \|\| BYTEORDER == 0x12345678
1886
1887			/* Bytes are stored like
1888			* Byte8 ... Byte2 Byte1
1889			* 63..56...15...8 7...0
1890			* so getting the lsb of the whole modified word is getting the msb of the
1891			* first byte that has its msb set */
1892			word = lsbit_pos_uintmax_(word);
1893
1894			/* Here, word contains the position 7,15,23,...55,63 of that bit. Convert
1895			* to 0..7 */
1896			return (unsigned int) ((word + 1) >> 3) - 1;
1897
1898			# elif BYTEORDER == 0x4321 \|\| BYTEORDER == 0x87654321
1899
1900			/* Bytes are stored like
1901			* Byte1 Byte2 ... Byte8
1902			* 63..56 55..47 ... 7...0
1903			* so getting the msb of the whole modified word is getting the msb of the
1904			* first byte that has its msb set */
1905			word = msbit_pos_uintmax_(word);
1906
1907			/* Here, word contains the position 63,55,...,23,15,7 of that bit. Convert
1908			* to 0..7 */
1909			word = ((word + 1) >> 3) - 1;
1910
1911			/* And invert the result because of the reversed byte order on this
1912			* platform */
1913			word = CHARBITS - word - 1;
1914
1915			return (unsigned int) word;
1916
1917			# else
1918			# error Unexpected byte order
1919			# endif
1920
1921			}
1922
1923			#endif
1924			#if defined(PERL_CORE) \|\| defined(PERL_EXT)
1925
1926			/*
1927			=for apidoc variant_under_utf8_count
1928
1929			This function looks at the sequence of bytes between C ~~and C, which are~~
1930			assumed to be encoded in ASCII/Latin1, and returns how many of them would
1931			change should the string be translated into UTF-8. Due to the nature of UTF-8,
1932			each of these would occupy two bytes instead of the single one in the input
1933			string. Thus, this function returns the precise number of bytes the string
1934			would expand by when translated to UTF-8.
1935
1936			Unlike most of the other functions that have C in their name, the input
1937			to this function is NOT a UTF-8-encoded string. The function name is slightly
1938			I to emphasize this.
1939
1940			This function is internal to Perl because khw thinks that any XS code that
1941			would want this is probably operating too close to the internals. Presenting a
1942			valid use case could change that.
1943
1944			See also
1945			C>
1946			and
1947			C>,
1948
1949			=cut
1950
1951			*/
1952
1953			PERL_STATIC_INLINE Size_t
1954			S_variant_under_utf8_count(const U8* const s, const U8* const e)
1955			{
1956			const U8* x = s;
1957			Size_t count = 0;
1958
1959			PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
1960
1961			# ifndef EBCDIC
1962
1963			/* Test if the string is long enough to use word-at-a-time. (Logic is the
1964			* same as for is_utf8_invariant_string()) */
1965			if ((STRLEN) (e - x) >= PERL_WORDSIZE
1966			+ PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
1967			- (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
1968			{
1969
1970			/* Process per-byte until reach word boundary. XXX This loop could be
1971			* eliminated if we knew that this platform had fast unaligned reads */
1972			while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
1973			count += ! UTF8_IS_INVARIANT(*x++);
1974			}
1975
1976			/* Process per-word as long as we have at least a full word left */
1977			do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
1978			explanation of how this works */
1979			PERL_UINTMAX_T increment
1980			= ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
1981			* PERL_COUNT_MULTIPLIER)
1982			>> ((PERL_WORDSIZE - 1) * CHARBITS);
1983			count += (Size_t) increment;
1984			x += PERL_WORDSIZE;
1985			} while (x + PERL_WORDSIZE <= e);
1986			}
1987
1988			# endif
1989
1990			/* Process per-byte */
1991			while (x < e) {
1992			if (! UTF8_IS_INVARIANT(*x)) {
1993			count++;
1994			}
1995
1996			x++;
1997			}
1998
1999			return count;
2000			}
2001
2002			#endif
2003
2004			/* Keep these around for these files */
2005			#if ! defined(PERL_IN_REGEXEC_C) && ! defined(PERL_IN_UTF8_C)
2006			# undef PERL_WORDSIZE
2007			# undef PERL_COUNT_MULTIPLIER
2008			# undef PERL_WORD_BOUNDARY_MASK
2009			# undef PERL_VARIANTS_WORD_MASK
2010			#endif
2011
2012			#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
2013
2014			#if defined(PERL_CORE) \|\| defined (PERL_EXT)
2015
2016			/*
2017			=for apidoc is_utf8_non_invariant_string
2018
2019			Returns TRUE if L returns FALSE for the first
2020			C bytes of the string C~~, but they are, nonetheless, legal Perl-extended~~
2021			UTF-8; otherwise returns FALSE.
2022
2023			A TRUE return means that at least one code point represented by the sequence
2024			either is a wide character not representable as a single byte, or the
2025			representation differs depending on whether the sequence is encoded in UTF-8 or
2026			not.
2027
2028			See also C>.
2029
2030			=cut
2031
2032			This is commonly used to determine if a SV's UTF-8 flag should be turned on.
2033			It generally needn't be if its string is entirely UTF-8 invariant, and it
2034			shouldn't be if it otherwise contains invalid UTF-8.
2035
2036			It is an internal function because khw thinks that XS code shouldn't be working
2037			at this low a level. A valid use case could change that.
2038
2039			*/
2040
2041			PERL_STATIC_INLINE bool
2042			Perl_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
2043			{
2044			const U8 * first_variant;
2045
2046			PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
2047
2048			if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
2049			return FALSE;
2050			}
2051
2052			return is_utf8_string(first_variant, len - (first_variant - s));
2053			}
2054
2055			#endif
2056
2057			/*
2058			=for apidoc is_utf8_string
2059			=for apidoc_item is_utf8_string_loc
2060			=for apidoc_item is_utf8_string_loclen
2061			=for apidoc_item is_strict_utf8_string
2062			=for apidoc_item is_strict_utf8_string_loc
2063			=for apidoc_item is_strict_utf8_string_loclen
2064			=for apidoc_item is_c9strict_utf8_string
2065			=for apidoc_item is_c9strict_utf8_string_loc
2066			=for apidoc_item is_c9strict_utf8_string_loclen
2067			=for apidoc_item is_utf8_string_flags
2068			=for apidoc_item is_utf8_string_loc_flags
2069			=for apidoc_item is_utf8_string_loclen_flags
2070
2071			These each return TRUE if the first C bytes of string C ~~form a valid~~
2072			UTF-8 string for varying degrees of strictness, FALSE otherwise. If C is
2073			0, it will be calculated using C (which means if you use this
2074			option, that C ~~can't have embedded C characters and has to have a~~
2075			terminating C byte). Note that all characters being ASCII constitute 'a
2076			valid UTF-8 string'.
2077
2078			Some of the functions also return information about the string. Those that
2079			have the suffix C<_loc> in their names have an extra parameter, C. If that
2080			is not NULL, the function stores into it the location of how far it got in
2081			parsing C~~. If the function is returning TRUE, this will be a pointer to the~~
2082			byte immediately after the end of C~~. If FALSE, it will be the location of~~
2083			the first byte that fails the criteria.
2084
2085			The functions that instead have the suffix C<_loclen> have a second extra
2086			parameter, C. They act as the plain C<_loc> functions do with their C
2087			parameter, but if C is not null, the functions store into it the number of
2088			UTF-8 encoded characters found at the point where parsing stopped. If the
2089			function is returning TRUE, this will be the full count of the UTF-8 characters
2090			in C~~; if FALSE, it will be the count before the first invalid one.~~
2091
2092			C (and C and C)
2093			consider Perl's extended UTF-8 to be valid. That means that
2094			code points above Unicode, surrogates, and non-character code points are
2095			all considered valid by this function. Problems may arise in interchange with
2096			non-Perl applications, or (unlikely) between machines with different word
2097			sizes.
2098
2099			C (and C and
2100			C) consider only Unicode-range (0 to 0x10FFFF)
2101			code points to be valid, with the surrogates and non-character code points
2102			invalid. This level of strictness is what is safe to accept from outside
2103			sources that use Unicode rules.
2104
2105			The forms whose names contain C conform to the level of strictness
2106			given in
2107			L.
2108			This means Unicode-range code points including non-character ones are
2109			considered valid, but not the surrogates. This level of strictness is
2110			considered safe for cooperating components that know how the other components
2111			handle non-character code points.
2112
2113			The forms whose names contain C<_flags> allow you to customize the acceptable
2114			level of strictness. They have an extra parameter, C to indicate the
2115			types of code points that are acceptable. If C is 0, they give the
2116			same results as C> (and kin); if C is
2117			C, they give the same results as
2118			C> (and kin); and if C is
2119			C, they give the same results as
2120			C> (and kin). Otherwise C may be any
2121			combination of the C> flags understood by
2122			C>, with the same meanings.
2123
2124			It's better to use one of the non-C<_flags> functions if they give you the
2125			desired strictness, as those have a better chance of being inlined by the C
2126			compiler.
2127
2128			See also
2129			C>,
2130			C>,
2131
2132			=cut
2133			*/
2134
2135			#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
2136
2137			#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
2138
2139			PERL_STATIC_INLINE bool
2140			Perl_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
2141			{
2142			const U8 * first_variant;
2143
2144			PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
2145			assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE));
2146
2147			if (len == 0) {
2148			len = strlen((const char *)s);
2149			}
2150
2151			if (flags == 0) {
2152			return is_utf8_string(s, len);
2153			}
2154
2155			if ((flags & UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
2156			== UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
2157			{
2158			return is_strict_utf8_string(s, len);
2159			}
2160
2161			if ((flags & UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
2162			== UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
2163			{
2164			return is_c9strict_utf8_string(s, len);
2165			}
2166
2167			if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
2168			const U8* const send = s + len;
2169			const U8* x = first_variant;
2170
2171			while (x < send) {
2172			STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
2173			if (UNLIKELY(! cur_len)) {
2174			return FALSE;
2175			}
2176			x += cur_len;
2177			}
2178			}
2179
2180			return TRUE;
2181			}
2182
2183			#define Perl_is_utf8_string_loc(s, len, ep) \
2184			Perl_is_utf8_string_loclen(s, len, ep, 0)
2185
2186			PERL_STATIC_INLINE bool
2187			Perl_is_utf8_string_loclen(const U8 s, STRLEN len, const U8 ep, STRLEN el)
2188			{
2189			const U8 * first_variant;
2190
2191			PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
2192
2193			if (len == 0) {
2194			len = strlen((const char *) s);
2195			}
2196
2197			if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
2198			if (el)
2199			*el = len;
2200
2201			if (ep) {
2202			*ep = s + len;
2203			}
2204
2205			return TRUE;
2206			}
2207
2208			{
2209			const U8* const send = s + len;
2210			const U8* x = first_variant;
2211			STRLEN outlen = first_variant - s;
2212
2213			while (x < send) {
2214			const STRLEN cur_len = isUTF8_CHAR(x, send);
2215			if (UNLIKELY(! cur_len)) {
2216			break;
2217			}
2218			x += cur_len;
2219			outlen++;
2220			}
2221
2222			if (el)
2223			*el = outlen;
2224
2225			if (ep) {
2226			*ep = x;
2227			}
2228
2229			return (x == send);
2230			}
2231			}
2232
2233			/* The perl core arranges to never call the DFA below without there being at
2234			* least one byte available to look at. This allows the DFA to use a do {}
2235			* while loop which means that calling it with a UTF-8 invariant has a single
2236			* conditional, same as the calling code checking for invariance ahead of time.
2237			* And having the calling code remove that conditional speeds up by that
2238			* conditional, the case where it wasn't invariant. So there's no reason to
2239			* check before calling this.
2240			*
2241			* But we don't know this for non-core calls, so have to retain the check for
2242			* them. */
2243			#ifdef PERL_CORE
2244			# define PERL_NON_CORE_CHECK_EMPTY(s,e) assert((e) > (s))
2245			#else
2246			# define PERL_NON_CORE_CHECK_EMPTY(s,e) if ((e) <= (s)) return FALSE
2247			#endif
2248
2249			/*
2250			* DFA for checking input is valid UTF-8 syntax.
2251			*
2252			* This uses adaptations of the table and algorithm given in
2253			* https://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides comprehensive
2254			* documentation of the original version. A copyright notice for the original
2255			* version is given at the beginning of this file. The Perl adaptations are
2256			* documented at the definition of PL_extended_utf8_dfa_tab[].
2257			*
2258			* This dfa is fast. There are three exit conditions:
2259			* 1) a well-formed code point, acceptable to the table
2260			* 2) the beginning bytes of an incomplete character, whose completion might
2261			* or might not be acceptable
2262			* 3) unacceptable to the table. Some of the adaptations have certain,
2263			* hopefully less likely to occur, legal inputs be unacceptable to the
2264			* table, so these must be sorted out afterwards.
2265			*
2266			* This macro is a complete implementation of the code executing the DFA. It
2267			* is passed the input sequence bounds and the table to use, and what to do
2268			* for each of the exit conditions. There are three canned actions, likely to
2269			* be the ones you want:
2270			* DFA_RETURN_SUCCESS_
2271			* DFA_RETURN_FAILURE_
2272			* DFA_GOTO_TEASE_APART_FF_
2273			*
2274			* You pass a parameter giving the action to take for each of the three
2275			* possible exit conditions:
2276			*
2277			* 'accept_action' This is executed when the DFA accepts the input.
2278			* DFA_RETURN_SUCCESS_ is the most likely candidate.
2279			* 'reject_action' This is executed when the DFA rejects the input.
2280			* DFA_RETURN_FAILURE_ is a candidate, or 'goto label' where
2281			* you have written code to distinguish the rejecting state
2282			* results. Because it happens in several places, and
2283			* involves #ifdefs, the special action
2284			* DFA_GOTO_TEASE_APART_FF_ is what you want with
2285			* PL_extended_utf8_dfa_tab. On platforms without
2286			* EXTRA_LONG_UTF8, there is no need to tease anything apart,
2287			* so this evaluates to DFA_RETURN_FAILURE_; otherwise you
2288			* need to have a label 'tease_apart_FF' that it will transfer
2289			* to.
2290			* 'incomplete_char_action' This is executed when the DFA ran off the end
2291			* before accepting or rejecting the input.
2292			* DFA_RETURN_FAILURE_ is the likely action, but you could
2293			* have a 'goto', or NOOP. In the latter case the DFA drops
2294			* off the end, and you place your code to handle this case
2295			* immediately after it.
2296			*/
2297
2298			#define DFA_RETURN_SUCCESS_ return (s8dfa_ - s0)
2299			#define DFA_RETURN_FAILURE_ return 0
2300			#ifdef HAS_EXTRA_LONG_UTF8
2301			# define DFA_TEASE_APART_FF_ goto tease_apart_FF
2302			#else
2303			# define DFA_TEASE_APART_FF_ DFA_RETURN_FAILURE_
2304			#endif
2305
2306			#define PERL_IS_UTF8_CHAR_DFA(s0, e, dfa_tab, \
2307			accept_action, \
2308			reject_action, \
2309			incomplete_char_action) \
2310			STMT_START { \
2311			const U8 * s8dfa_ = s0; \
2312			const U8 * const e8dfa_ = e; \
2313			PERL_UINT_FAST16_T state = 0; \
2314			\
2315			PERL_NON_CORE_CHECK_EMPTY(s8dfa_, e8dfa_); \
2316			\
2317			do { \
2318			state = dfa_tab[256 + state + dfa_tab[*s8dfa_]]; \
2319			} while (++s8dfa_ < e8dfa_ && state > 1); \
2320			\
2321			if (LIKELY(state == 0)) { /* Accepting state */ \
2322			accept_action; \
2323			} \
2324			\
2325			if (state == 1) { /* Rejecting state */ \
2326			reject_action; \
2327			} \
2328			\
2329			/* Here, dropped out of loop before end-of-char */ \
2330			incomplete_char_action; \
2331			} STMT_END
2332
2333
2334			/*
2335
2336			=for apidoc isUTF8_CHAR
2337			=for apidoc_item isSTRICT_UTF8_CHAR
2338			=for apidoc_item isC9_STRICT_UTF8_CHAR
2339			=for apidoc_item isUTF8_CHAR_flags
2340			=for apidoc_item is_utf8_char_buf
2341
2342			These each evaluate to non-zero if the first few bytes of the string starting
2343			at C ~~and looking no further than S> are well-formed UTF-8 that~~
2344			represents some code point, for varying degrees of strictness. Otherwise they
2345			evaluate to 0. If non-zero, the value gives how many bytes starting at C
2346			comprise the code point's representation. Any bytes remaining before C, but
2347			beyond the ones needed to form the first code point in C~~, are not examined.~~
2348
2349			These are used to efficiently decide if the next few bytes in C ~~are~~
2350			legal UTF-8 for a single character.
2351
2352			With C, the code point can be any that will fit in an IV on this
2353			machine, using Perl's extension to official UTF-8 to represent those higher
2354			than the Unicode maximum of 0x10FFFF. That means that this will consider byte
2355			sequences to be valid that are unrecognized or considered illegal by non-Perl
2356			applications.
2357
2358			With C>, acceptable code points are restricted to those
2359			defined by Unicode to be fully interchangeable across applications.
2360			This means code points above the Unicode range (max legal is 0x10FFFF),
2361			surrogates, and non-character code points are rejected.
2362
2363			With C>, acceptable code points are restricted to
2364			those defined by Unicode to be fully interchangeable within an application.
2365			This means code points above the Unicode range and surrogates are rejected, but
2366			non-character code points are accepted. See L
2367			#9\|http://www.unicode.org/versions/corrigendum9.html>.
2368
2369			Use C> to customize what code points are acceptable.
2370			If C is 0, this gives the same results as C>;
2371			if C is C, this gives the same results
2372			as C>;
2373			and if C is C, this gives
2374			the same results as C>.
2375			Otherwise C may be any combination of the C> flags
2376			understood by C>, with the same meanings.
2377
2378			The three alternative macros are for the most commonly needed validations; they
2379			are likely to run somewhat faster than this more general one, as they can be
2380			inlined into your code.
2381
2382			Use one of the C> forms to check entire strings.
2383
2384			Note also that a UTF-8 "invariant" character (i.e. ASCII on non-EBCDIC
2385			machines) is a valid UTF-8 character.
2386
2387			C is the old name for C. Do not use it in new
2388			code.
2389
2390			=cut
2391
2392			All the functions except isUTF8_CHAR_flags) use adaptations of the table and
2393			algorithm given in https://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which
2394			provides comprehensive documentation of the original version. A copyright
2395			notice for the original version is given at the beginning of this file.
2396
2397			The Perl adaptation for isUTF8_CHAR is documented at the definition of
2398			PL_extended_utf8_dfa_tab[].
2399
2400			The Perl adaptation for isSTRICT_UTF8_CHAR is documented at the definition of
2401			PL_strict_utf8_dfa_tab[];
2402
2403			The Perl adaptation for isC9_STRICT_UTF8_CHAR is documented at the definition
2404			of PL_c9_utf8_dfa_tab[].
2405
2406			*/
2407
2408			PERL_STATIC_INLINE Size_t
2409			Perl_isSTRICT_UTF8_CHAR(const U8 * const s0, const U8 * const e)
2410			{
2411			PERL_ARGS_ASSERT_ISSTRICT_UTF8_CHAR;
2412
2413			PERL_IS_UTF8_CHAR_DFA(s0, e, PL_strict_utf8_dfa_tab,
2414			DFA_RETURN_SUCCESS_,
2415			goto check_hanguls,
2416			DFA_RETURN_FAILURE_);
2417			check_hanguls:
2418
2419			/* Here, we didn't return success, but dropped out of the loop. In the
2420			* case of PL_strict_utf8_dfa_tab, this means the input is either
2421			* malformed, or was for certain Hanguls; handle them specially */
2422
2423			/* The dfa above drops out for incomplete or illegal inputs, and certain
2424			* legal Hanguls; check and return accordingly */
2425			return is_HANGUL_ED_utf8_safe(s0, e);
2426			}
2427
2428			PERL_STATIC_INLINE Size_t
2429			Perl_isUTF8_CHAR(const U8 * const s0, const U8 * const e)
2430			{
2431			PERL_ARGS_ASSERT_ISUTF8_CHAR;
2432
2433			PERL_IS_UTF8_CHAR_DFA(s0, e, PL_extended_utf8_dfa_tab,
2434			DFA_RETURN_SUCCESS_,
2435			DFA_TEASE_APART_FF_,
2436			DFA_RETURN_FAILURE_);
2437
2438			/* Here, we didn't return success, but dropped out of the loop. In the
2439			* case of PL_extended_utf8_dfa_tab, this means the input is either
2440			* malformed, or the start byte was FF on a platform that the dfa doesn't
2441			* handle FF's. Call a helper function. */
2442
2443			#ifdef HAS_EXTRA_LONG_UTF8
2444
2445			tease_apart_FF:
2446
2447			/* In the case of PL_extended_utf8_dfa_tab, getting here means the input is
2448			* either malformed, or was for the largest possible start byte, which we
2449			* now check, not inline */
2450			if (*s0 != I8_TO_NATIVE_UTF8(0xFF)) {
2451			return 0;
2452			}
2453
2454			return is_utf8_FF_helper_(s0, e,
2455			FALSE /* require full, not partial char */
2456			);
2457			#endif
2458
2459			}
2460
2461			PERL_STATIC_INLINE Size_t
2462			Perl_isC9_STRICT_UTF8_CHAR(const U8 * const s0, const U8 * const e)
2463			{
2464			PERL_ARGS_ASSERT_ISC9_STRICT_UTF8_CHAR;
2465
2466			PERL_IS_UTF8_CHAR_DFA(s0, e, PL_c9_utf8_dfa_tab,
2467			DFA_RETURN_SUCCESS_,
2468			DFA_RETURN_FAILURE_,
2469			DFA_RETURN_FAILURE_);
2470			}
2471
2472			#define is_strict_utf8_string_loc(s, len, ep) \
2473			is_strict_utf8_string_loclen(s, len, ep, 0)
2474
2475			PERL_STATIC_INLINE bool
2476			Perl_is_strict_utf8_string_loclen(const U8 s, STRLEN len, const U8 ep, STRLEN el)
2477			{
2478			const U8 * first_variant;
2479
2480			PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
2481
2482			if (len == 0) {
2483			len = strlen((const char *) s);
2484			}
2485
2486			if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
2487			if (el)
2488			*el = len;
2489
2490			if (ep) {
2491			*ep = s + len;
2492			}
2493
2494			return TRUE;
2495			}
2496
2497			{
2498			const U8* const send = s + len;
2499			const U8* x = first_variant;
2500			STRLEN outlen = first_variant - s;
2501
2502			while (x < send) {
2503			const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
2504			if (UNLIKELY(! cur_len)) {
2505			break;
2506			}
2507			x += cur_len;
2508			outlen++;
2509			}
2510
2511			if (el)
2512			*el = outlen;
2513
2514			if (ep) {
2515			*ep = x;
2516			}
2517
2518			return (x == send);
2519			}
2520			}
2521
2522			#define is_c9strict_utf8_string_loc(s, len, ep) \
2523			is_c9strict_utf8_string_loclen(s, len, ep, 0)
2524
2525			PERL_STATIC_INLINE bool
2526			Perl_is_c9strict_utf8_string_loclen(const U8 s, STRLEN len, const U8 ep, STRLEN el)
2527			{
2528			const U8 * first_variant;
2529
2530			PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
2531
2532			if (len == 0) {
2533			len = strlen((const char *) s);
2534			}
2535
2536			if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
2537			if (el)
2538			*el = len;
2539
2540			if (ep) {
2541			*ep = s + len;
2542			}
2543
2544			return TRUE;
2545			}
2546
2547			{
2548			const U8* const send = s + len;
2549			const U8* x = first_variant;
2550			STRLEN outlen = first_variant - s;
2551
2552			while (x < send) {
2553			const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
2554			if (UNLIKELY(! cur_len)) {
2555			break;
2556			}
2557			x += cur_len;
2558			outlen++;
2559			}
2560
2561			if (el)
2562			*el = outlen;
2563
2564			if (ep) {
2565			*ep = x;
2566			}
2567
2568			return (x == send);
2569			}
2570			}
2571
2572			#define is_utf8_string_loc_flags(s, len, ep, flags) \
2573			is_utf8_string_loclen_flags(s, len, ep, 0, flags)
2574
2575
2576			/* The above 3 actual functions could have been moved into the more general one
2577			* just below, and made #defines that call it with the right 'flags'. They are
2578			* currently kept separate to increase their chances of getting inlined */
2579
2580			PERL_STATIC_INLINE bool
2581			Perl_is_utf8_string_loclen_flags(const U8 s, STRLEN len, const U8 ep, STRLEN el, const U32 flags)
2582			{
2583			const U8 * first_variant;
2584
2585			PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
2586			assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE));
2587
2588			if (flags == 0) {
2589			return is_utf8_string_loclen(s, len, ep, el);
2590			}
2591
2592			if ((flags & UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
2593			== UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
2594			{
2595			return is_strict_utf8_string_loclen(s, len, ep, el);
2596			}
2597
2598			if ((flags & UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
2599			== UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
2600			{
2601			return is_c9strict_utf8_string_loclen(s, len, ep, el);
2602			}
2603
2604			if (len == 0) {
2605			len = strlen((const char *) s);
2606			}
2607
2608			if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
2609			if (el)
2610			*el = len;
2611
2612			if (ep) {
2613			*ep = s + len;
2614			}
2615
2616			return TRUE;
2617			}
2618
2619			{
2620			const U8* send = s + len;
2621			const U8* x = first_variant;
2622			STRLEN outlen = first_variant - s;
2623
2624			while (x < send) {
2625			const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
2626			if (UNLIKELY(! cur_len)) {
2627			break;
2628			}
2629			x += cur_len;
2630			outlen++;
2631			}
2632
2633			if (el)
2634			*el = outlen;
2635
2636			if (ep) {
2637			*ep = x;
2638			}
2639
2640			return (x == send);
2641			}
2642			}
2643
2644			/*
2645			=for apidoc utf8_distance
2646
2647			Returns the number of UTF-8 characters between the UTF-8 pointers C
2648			and C.
2649
2650			WARNING: use only if you know that the pointers point inside the
2651			same UTF-8 buffer.
2652
2653			=cut
2654			*/
2655
2656			PERL_STATIC_INLINE IV
2657	34		Perl_utf8_distance(pTHX_ const U8 a, const U8 b)
2658			{
2659			PERL_ARGS_ASSERT_UTF8_DISTANCE;
2660
2661	34	50	return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
2662			}
2663
2664			/*
2665			=for apidoc utf8_hop
2666
2667			Return the UTF-8 pointer C ~~displaced by C characters, either~~
2668			forward (if C is positive) or backward (if negative). C ~~does not need~~
2669			to be pointing to the starting byte of a character. If it isn't, one count of
2670			C will be used up to get to the start of the next character for forward
2671			hops, and to the start of the current character for negative ones.
2672
2673			WARNING: Prefer L to this one.
2674
2675			Do NOT use this function unless you B C is within
2676			the UTF-8 data pointed to by C ~~B that on entry C ~~is aligned~~~~
2677			on the first byte of a character or just after the last byte of a character.
2678
2679			=cut
2680			*/
2681
2682			PERL_STATIC_INLINE U8 *
2683	0		Perl_utf8_hop(const U8 *s, SSize_t off)
2684			{
2685			PERL_ARGS_ASSERT_UTF8_HOP;
2686
2687			/* Note: cannot use UTF8_IS_...() too eagerly here since e.g
2688			* the XXX bitops (especially ~) can create illegal UTF-8.
2689			* In other words: in Perl UTF-8 is not just for Unicode. */
2690
2691	0	0	if (off > 0) {
2692
2693			/* Get to next non-continuation byte */
2694	0	0	if (UNLIKELY(UTF8_IS_CONTINUATION(*s))) {
2695			do {
2696	0		s++;
2697			}
2698	0	0	while (UTF8_IS_CONTINUATION(*s));
2699	0		off--;
2700			}
2701
2702	0	0	while (off--)
2703	0		s += UTF8SKIP(s);
2704			}
2705			else {
2706	0	0	while (off++) {
2707	0		s--;
2708	0	0	while (UTF8_IS_CONTINUATION(*s))
2709	0		s--;
2710			}
2711			}
2712
2713			GCC_DIAG_IGNORE(-Wcast-qual)
2714	0		return (U8 *)s;
2715			GCC_DIAG_RESTORE
2716			}
2717
2718			/*
2719			=for apidoc utf8_hop_forward
2720			=for apidoc_item utf8_hop_forward_overshoot
2721
2722			These each take as input a position, C, into a string encoded as UTF-8
2723			which ends at the byte before C, and return the position within it that is
2724			C displaced by up to C characters forwards.
2725
2726			If there are fewer than C characters between C and C, the
2727			functions return C.
2728
2729			The functions differ in two ways
2730
2731			=over 4
2732
2733			=item *
2734
2735			C can return how many characters beyond the edge
2736			the request was for. When its parameter, C<&remaining>, is not NULL, the
2737			function stores into it the count of the excess; zero if the request was
2738			completely fulfilled. The actual number of characters that were displaced can
2739			then be calculated as S>.
2740
2741			=item *
2742
2743			C will panic if called with C already positioned at or
2744			beyond the edge of the string ending at C and the request is to go even
2745			further over the edge. C presumes the caller will
2746			handle any errors, and just stores C into C without doing
2747			anything else.
2748
2749			=back
2750
2751			(The above contains a slight lie. When C is NULL, the two functions
2752			act identically.)
2753
2754			C does not need to be pointing to the starting byte of a character. If it
2755			isn't, one count of C will be used up to get to that start.
2756
2757			C must be non-negative, and if zero, no action is taken; C is returned
2758			unchanged.
2759
2760			=cut
2761			*/
2762			# define Perl_utf8_hop_forward( s, off, end) \
2763			Perl_utf8_hop_forward_overshoot(s, off, end, NULL)
2764
2765			PERL_STATIC_INLINE U8 *
2766			Perl_utf8_hop_forward_overshoot(const U8 * s, SSize_t off,
2767			const U8 * const end, SSize_t *remaining)
2768			{
2769			PERL_ARGS_ASSERT_UTF8_HOP_FORWARD_OVERSHOOT;
2770			assert(off >= 0);
2771
2772			if (off != 0) {
2773			if (UNLIKELY(s >= end && ! remaining)) {
2774			Perl_croak_nocontext("panic: Start of forward hop (0x%p) is %zd"
2775			" bytes beyond legal end position (0x%p)",
2776			s, 1 + s - end, end);
2777			}
2778
2779			if (UNLIKELY(UTF8_IS_CONTINUATION(*s))) {
2780			do { /* Get to next non-continuation byte */
2781			if (! UTF8_IS_CONTINUATION(*s)) {
2782			off--;
2783			break;
2784			}
2785			s++;
2786			} while (s < end);
2787			}
2788
2789			while (off > 0 && s < end) {
2790			STRLEN skip = UTF8SKIP(s);
2791
2792			/* Quit without counting this character if it overshoots the edge.
2793			* */
2794			if ((STRLEN)(end - s) < skip) {
2795			s = end;
2796			break;
2797			}
2798
2799			s += skip;
2800			off--;
2801			}
2802			}
2803
2804			if (remaining) {
2805			*remaining = off;
2806			}
2807
2808			GCC_DIAG_IGNORE(-Wcast-qual)
2809			return (U8 *)s;
2810			GCC_DIAG_RESTORE
2811			}
2812
2813			/*
2814			=for apidoc utf8_hop_back
2815			=for apidoc_item utf8_hop_back_overshoot
2816
2817			These each take as input a string encoded as UTF-8 which starts at C,
2818			and a position into it given by C~~, and return the position within it that is~~
2819			C ~~displaced by up to C characters backwards.~~
2820
2821			If there are fewer than C characters between C and C~~, the~~
2822			functions return C.
2823
2824			The functions differ in that C can return how many
2825			characters C beyond the edge the request was for. When its parameter,
2826			C<&remaining>, is not NULL, the function stores into it the count of the
2827			excess; zero if the request was completely fulfilled. The actual number of
2828			characters that were displaced can then be calculated as S>.
2829			This function acts identically to plain C when this parameter is
2830			NULL.
2831
2832			C ~~does not need to be pointing to the starting byte of a character. If it~~
2833			isn't, one count of C will be used up to get to that start.
2834
2835			C must be non-positive, and if zero, no action is taken; C ~~is returned~~
2836			unchanged. That it otherwise must be negative means that the earlier
2837			description is a lie, to avoid burdening you with this detail too soon. An
2838			C of C<-2> means to displace two characters backwards, so the displacement
2839			is actually the absolute value of C. C will also be
2840			non-positive. If there was only one character between C and C~~, and a~~
2841			displacement of C<-2> was requested, C would be set to C<-1>. The
2842			subtraction formula works, yielding the result that only C<-1> character was
2843			displaced.
2844
2845			=cut
2846			*/
2847
2848			# define Perl_utf8_hop_back( s, off, start) \
2849			Perl_utf8_hop_back_overshoot(s, off, start, NULL)
2850
2851			PERL_STATIC_INLINE U8 *
2852			Perl_utf8_hop_back_overshoot(const U8 *s, SSize_t off,
2853			const U8 * const start, SSize_t *remaining)
2854			{
2855			PERL_ARGS_ASSERT_UTF8_HOP_BACK_OVERSHOOT;
2856			assert(start <= s);
2857			assert(off <= 0);
2858
2859			/* Note: if we know that the input is well-formed, we can do per-word
2860			* hop-back. Commit d6ad3b72778369a84a215b498d8d60d5b03aa1af implemented
2861			* that. But it was reverted because doing per-word has some
2862			* start-up/tear-down overhead, so only makes sense if the distance to be
2863			* moved is large, and core perl doesn't currently move more than a few
2864			* characters at a time. You can reinstate it if it does become
2865			* advantageous. */
2866			while (off < 0 && s > start) {
2867			do { /* Find the beginning of this character */
2868			s--;
2869			if (! UTF8_IS_CONTINUATION(*s)) {
2870			off++;
2871			break;
2872			}
2873			} while (s > start);
2874			}
2875
2876			if (remaining) {
2877			*remaining = off;
2878			}
2879
2880			GCC_DIAG_IGNORE(-Wcast-qual)
2881			return (U8 *)s;
2882			GCC_DIAG_RESTORE
2883			}
2884
2885			/*
2886			=for apidoc utf8_hop_safe
2887			=for apidoc_item utf8_hop_overshoot
2888
2889			These each take as input a string encoded as UTF-8 which starts at C,
2890			ending at C, and a position into it given by C~~, and return the~~
2891			position within it that is C ~~displaced by up to C characters, either~~
2892			forwards if C is positive, or backwards if C is negative. (Nothing
2893			is done if C is 0.)
2894
2895			If there are fewer than C characters between C ~~and the respective edge,~~
2896			the functions return that edge.
2897
2898			The functions differ in that C can return how many
2899			characters beyond the edge the request was for. When its parameter,
2900			C<&remaining>, is not NULL, the function stores into it the count of the
2901			excess; zero if the request was completely fulfilled. The actual number of
2902			characters that were displaced can then be calculated as S>.
2903			This function acts identically to plain C when this parameter is
2904			NULL.
2905
2906			C ~~does not need to be pointing to the starting byte of a character. If it~~
2907			isn't, one count of C will be used up to get to that start.
2908
2909			To be more precise, the displacement is by the absolute value of C, and
2910			the excess count is the absolute value of C.
2911
2912			=cut
2913			*/
2914
2915			#define Perl_utf8_hop_safe(s, o, b, e) Perl_utf8_hop_overshoot(s, o, b, e, 0)
2916
2917			PERL_STATIC_INLINE U8 *
2918			Perl_utf8_hop_overshoot(const U8 *s, SSize_t off,
2919			const U8 * const start, const U8 * const end,
2920			SSize_t * remaining)
2921			{
2922			PERL_ARGS_ASSERT_UTF8_HOP_OVERSHOOT;
2923
2924			assert(start <= s && s <= end);
2925
2926			if (off >= 0) {
2927			return utf8_hop_forward_overshoot(s, off, end, remaining);
2928			}
2929			else {
2930			return utf8_hop_back_overshoot(s, off, start, remaining);
2931			}
2932			}
2933
2934			PERL_STATIC_INLINE STRLEN
2935			Perl_isUTF8_CHAR_flags(const U8 * const s0, const U8 * const e, const U32 flags)
2936			{
2937			PERL_ARGS_ASSERT_ISUTF8_CHAR_FLAGS;
2938			assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE));
2939
2940			PERL_IS_UTF8_CHAR_DFA(s0, e, PL_extended_utf8_dfa_tab,
2941			goto check_success,
2942			DFA_TEASE_APART_FF_,
2943			DFA_RETURN_FAILURE_);
2944
2945			check_success:
2946
2947			return is_utf8_char_helper_(s0, e, flags);
2948
2949			#ifdef HAS_EXTRA_LONG_UTF8
2950
2951			tease_apart_FF:
2952
2953			/* In the case of PL_extended_utf8_dfa_tab, getting here means the input is
2954			* either malformed, or was for the largest possible start byte, which
2955			* indicates perl extended UTF-8, well above the Unicode maximum */
2956			if ( *s0 != I8_TO_NATIVE_UTF8(0xFF)
2957			\|\| (flags & (UTF8_DISALLOW_SUPER\|UTF8_DISALLOW_PERL_EXTENDED)))
2958			{
2959			return 0;
2960			}
2961
2962			/* Otherwise examine the sequence not inline */
2963			return is_utf8_FF_helper_(s0, e,
2964			FALSE /* require full, not partial char */
2965			);
2966			#endif
2967
2968			}
2969
2970			/*
2971
2972			=for apidoc is_utf8_valid_partial_char
2973			=for apidoc_item is_utf8_valid_partial_char_flags
2974
2975			These each return FALSE if the sequence of bytes starting at C ~~and looking no~~
2976			further than S> is the UTF-8 encoding for one or more code points.
2977			That is, FALSE is returned if C ~~points to at least one entire UTF-8 encoded~~
2978			character.
2979
2980			Otherwise, they return TRUE if there exists at least one non-empty sequence of
2981			bytes that when appended to sequence C~~, starting at position C causes the~~
2982			entire sequence to be the well-formed UTF-8 of some code point
2983
2984			In other words they return TRUE if C ~~points to an incomplete UTF-8-encoded~~
2985			code point; FALSE otherwise.
2986
2987			This is useful when a fixed-length buffer is being tested for being well-formed
2988			UTF-8, but the final few bytes in it don't comprise a full character; that is,
2989			it is split somewhere in the middle of the final code point's UTF-8
2990			representation. (Presumably when the buffer is refreshed with the next chunk
2991			of data, the new first bytes will complete the partial code point.) This
2992			function is used to verify that the final bytes in the current buffer are in
2993			fact the legal beginning of some code point, so that if they aren't, the
2994			failure can be signalled without having to wait for the next read.
2995
2996			C behaves identically to
2997			C when the latter is called with a zero
2998			C parameter. This parameter is used to restrict the classes of code
2999			points that are considered to be valid. When zero, Perl's extended UTF-8 is
3000			used. Otherwise C can be any combination of the C>
3001			flags accepted by C>. If there is any sequence of bytes
3002			that can complete the input partial character in such a way that a
3003			non-prohibited character is formed, the function returns TRUE; otherwise FALSE.
3004			Non-character code points cannot be determined based on partial character
3005			input, so TRUE is always returned if C ~~looks like it could be the beginning~~
3006			on one of those. But many of the other possible excluded types can be
3007			determined from just the first one or two bytes.
3008
3009			=cut
3010			*/
3011			#define is_utf8_valid_partial_char(s, e) \
3012			is_utf8_valid_partial_char_flags(s, e, 0)
3013
3014			PERL_STATIC_INLINE bool
3015			Perl_is_utf8_valid_partial_char_flags(const U8 * const s0, const U8 * const e, const U32 flags)
3016			{
3017			PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
3018			assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE));
3019
3020			PERL_IS_UTF8_CHAR_DFA(s0, e, PL_extended_utf8_dfa_tab,
3021			DFA_RETURN_FAILURE_,
3022			DFA_TEASE_APART_FF_,
3023			NOOP);
3024
3025			/* The NOOP above causes the DFA to drop down here iff the input was a
3026			* partial character. flags=0 => can return TRUE immediately; otherwise we
3027			* need to check (not inline) if the partial character is the beginning of
3028			* a disallowed one */
3029			if (flags == 0) {
3030			return TRUE;
3031			}
3032
3033			return cBOOL(is_utf8_char_helper_(s0, e, flags));
3034
3035			#ifdef HAS_EXTRA_LONG_UTF8
3036
3037			tease_apart_FF:
3038
3039			/* Getting here means the input is either malformed, or, in the case of
3040			* PL_extended_utf8_dfa_tab, was for the largest possible start byte. The
3041			* latter case has to be extended UTF-8, so can fail immediately if that is
3042			* forbidden */
3043
3044			if ( *s0 != I8_TO_NATIVE_UTF8(0xFF)
3045			\|\| (flags & (UTF8_DISALLOW_SUPER\|UTF8_DISALLOW_PERL_EXTENDED)))
3046			{
3047			return FALSE;
3048			}
3049
3050			return is_utf8_FF_helper_(s0, e,
3051			TRUE /* Require to be a partial character */
3052			);
3053			#endif
3054
3055			}
3056
3057			/*
3058
3059			=for apidoc is_utf8_fixed_width_buf_flags
3060			=for apidoc_item is_utf8_fixed_width_buf_loc_flags
3061			=for apidoc_item is_utf8_fixed_width_buf_loclen_flags
3062
3063			These each return TRUE if the fixed-width buffer starting at C ~~with length~~
3064			C is entirely valid UTF-8, subject to the restrictions given by C;
3065			otherwise they return FALSE.
3066
3067			If C is 0, any well-formed UTF-8, as extended by Perl, is accepted
3068			without restriction. If the final few bytes of the buffer do not form a
3069			complete code point, this will return TRUE anyway, provided that
3070			C> returns TRUE for them.
3071
3072			C can be zero or any combination of the C> flags
3073			accepted by C>, and with the same meanings.
3074
3075			The functions differ from C> only in that the latter
3076			returns FALSE if the final few bytes of the string don't form a complete code
3077			point.
3078
3079			C> does all the preceding, but takes an
3080			extra parameter, C into which it stores the location of the failure, if
3081			C is not NULL. If instead the function returns TRUE, C<*ep> will point to
3082			the beginning of any partial character at the end of the buffer; if there is no
3083			partial character C<*ep> will contain C~~+C.~~
3084
3085			C> does all the preceding, but takes
3086			another extra parameter, C into which it stores the number of complete,
3087			valid characters found, if C is not NULL.
3088
3089			=cut
3090			*/
3091			#define is_utf8_fixed_width_buf_flags(s, len, flags) \
3092			is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
3093
3094			#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
3095			is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
3096
3097			PERL_STATIC_INLINE bool
3098			Perl_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
3099			STRLEN len,
3100			const U8 **ep,
3101			STRLEN *el,
3102			const U32 flags)
3103			{
3104			const U8 * maybe_partial;
3105
3106			PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
3107
3108			if (! ep) {
3109			ep = &maybe_partial;
3110			}
3111
3112			/* If it's entirely valid, return that; otherwise see if the only error is
3113			* that the final few bytes are for a partial character */
3114			return is_utf8_string_loclen_flags(s, len, ep, el, flags)
3115			\|\| is_utf8_valid_partial_char_flags(*ep, s + len, flags);
3116			}
3117
3118			PERL_STATIC_INLINE bool
3119			Perl_utf8_to_uv_msgs(const U8 * const s0,
3120			const U8 * const e,
3121			UV * cp_p,
3122			Size_t *advance_p,
3123			U32 flags,
3124			U32 * errors,
3125			AV ** msgs)
3126			{
3127			PERL_ARGS_ASSERT_UTF8_TO_UV_MSGS;
3128
3129			/* This is the inlined portion of utf8_to_uv_msgs. It handles the simple
3130			* cases, and, if necessary calls a helper function to deal with the more
3131			* complex ones. Almost all well-formed non-problematic code points are
3132			* considered simple, so that it's unlikely that the helper function will
3133			* need to be called. */
3134
3135			/* Assume that isn't malformed; the vast majority of calls won't be */
3136			if (errors) {
3137			*errors = 0;
3138			}
3139			if (msgs) {
3140			*msgs = NULL;
3141			}
3142
3143
3144			/* No calls from core pass in an empty string; non-core need a check */
3145			#ifdef PERL_CORE
3146			assert(e > s0);
3147			#else
3148			if (LIKELY(e > s0))
3149			#endif
3150
3151			{
3152			/* UTF-8 invariants are returned unchanged. The code below is quite
3153			* capable of handling this, but this shortcuts this very common case
3154			* */
3155			if (UTF8_IS_INVARIANT(*s0)) {
3156			if (advance_p) {
3157			*advance_p = 1;
3158			}
3159
3160			cp_p = s0;
3161			return true;
3162			}
3163
3164			const U8 * s = s0;
3165
3166			/* This dfa is fast. If it accepts the input, it was for a
3167			* well-formed, non-problematic code point, which can be returned
3168			* immediately. Otherwise we call a helper function to figure out the
3169			* more complicated cases.
3170			*
3171			* It is an adaptation of the tables and algorithm given in
3172			* https://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides
3173			* comprehensive documentation of the original version. A copyright
3174			* notice for the original version is given at the beginning of this
3175			* file. The Perl adaptation is documented at the definition of
3176			* PL_strict_utf8_dfa_tab[].
3177			*
3178			* The terminology of the dfa refers to a 'class'. The variable 'type'
3179			* would have been named 'class' except that is a reserved word in C++
3180			*
3181			* The table can be a U16 on EBCDIC platforms, so 'state' is declared
3182			* as U16; 'type' is likely to never occupy more than 5 bits. */
3183			PERL_UINT_FAST8_T type = PL_strict_utf8_dfa_tab[*s];
3184			PERL_UINT_FAST16_T state = PL_strict_utf8_dfa_tab[256 + type];
3185			UV uv = (0xff >> type) & NATIVE_UTF8_TO_I8(*s);
3186
3187			while (state > 1 && ++s < e) {
3188			type = PL_strict_utf8_dfa_tab[*s];
3189			state = PL_strict_utf8_dfa_tab[256 + state + type];
3190
3191			uv = UTF8_ACCUMULATE(uv, *s);
3192			}
3193
3194			if (LIKELY(state == 0)) {
3195			if (advance_p) {
3196			*advance_p = s - s0 + 1;
3197			}
3198
3199			*cp_p = UNI_TO_NATIVE(uv);
3200			return true;
3201			}
3202			}
3203
3204			/* Here is potentially problematic. Use the full mechanism */
3205			return utf8_to_uv_msgs_helper_(s0, e, cp_p, advance_p, flags, errors, msgs);
3206			}
3207
3208			PERL_STATIC_INLINE UV
3209			Perl_utf8_to_uv_or_die(const U8 s, const U8 e, STRLEN *advance_p)
3210			{
3211			PERL_ARGS_ASSERT_UTF8_TO_UV_OR_DIE;
3212
3213			UV cp;
3214			(void) utf8_to_uv_flags(s, e, &cp, advance_p, UTF8_DIE_IF_MALFORMED);
3215			return cp;
3216			}
3217
3218			PERL_STATIC_INLINE UV
3219			Perl_utf8n_to_uvchr_msgs(const U8 * const s0,
3220			STRLEN curlen,
3221			STRLEN *retlen,
3222			U32 flags,
3223			U32 * errors,
3224			AV ** msgs)
3225			{
3226			PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_MSGS;
3227
3228			UV cp;
3229			if (LIKELY(utf8_to_uv_msgs(s0, s0 + curlen, &cp, retlen, flags, errors,
3230			msgs)))
3231			{
3232			return cp;
3233			}
3234
3235			if ((flags & UTF8_CHECK_ONLY) && retlen) {
3236			*retlen = ((STRLEN) -1);
3237			}
3238
3239			return 0;
3240			}
3241
3242
3243			PERL_STATIC_INLINE UV
3244			Perl_utf8_to_uvchr_buf(pTHX_ const U8 s, const U8 send, STRLEN *retlen)
3245			{
3246			PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF;
3247
3248			UV cp;
3249
3250			/* When everything is legal, just return that; but when not:
3251			* 1) if warnings are enabled return 0 and retlen to -1
3252			* 2) if warnings are disabled, set 'flags' to accept any malformation,
3253			* but that will just cause the REPLACEMENT CHARACTER to be returned,
3254			* as the documentation indicates. EMPTY is not really allowed, and
3255			* asserts on debugging builds. But on non-debugging we have to deal
3256			* with it.
3257			* This API means 0 can mean a legal NUL, or the input is malformed; and
3258			* the caller has to know if warnings are disabled to know if it can rely on
3259			* 'retlen'. Best to use utf8_to_uv() instead */
3260			U32 flags = (ckWARN_d(WARN_UTF8)) ? 0 : (UTF8_ALLOW_ANY \| UTF8_ALLOW_EMPTY);
3261
3262			if ( LIKELY(utf8_to_uv_flags(s, send, &cp, retlen, flags))
3263			\|\| flags)
3264			{
3265			return cp;
3266			}
3267
3268			if (retlen) {
3269			*retlen = (STRLEN) -1;
3270			}
3271
3272			return 0;
3273			}
3274
3275			PERL_STATIC_INLINE U8 *
3276			Perl_uv_to_utf8(pTHX_ U8 *d, UV uv)
3277			{
3278			return uv_to_utf8_msgs(d, uv, 0, 0);
3279			}
3280
3281			PERL_STATIC_INLINE U8 *
3282			Perl_uv_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
3283			{
3284			return uv_to_utf8_msgs(d, uv, flags, 0);
3285			}
3286
3287			PERL_STATIC_INLINE U8 *
3288			Perl_uv_to_utf8_msgs(pTHX_ U8 d, UV uv, UV flags , HV *msgs)
3289			{
3290			return uvoffuni_to_utf8_flags_msgs(d, NATIVE_TO_UNI(uv), flags, msgs);
3291			}
3292
3293			/* ------------------------------- perl.h ----------------------------- */
3294
3295			/*
3296			=for apidoc_section $utility
3297
3298			=for apidoc is_safe_syscall
3299
3300			Test that the given C (with length C) doesn't contain any internal
3301			C characters.
3302			If it does, set C to C, optionally warn using the C
3303			category, and return FALSE.
3304
3305			Return TRUE if the name is safe.
3306
3307			C and C are used in any warning.
3308
3309			Used by the C macro.
3310
3311			=cut
3312			*/
3313
3314			PERL_STATIC_INLINE bool
3315			Perl_is_safe_syscall(pTHX_ const char pv, STRLEN len, const char what, const char *op_name)
3316			{
3317			/* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
3318			* perl itself uses xce*() functions which accept 8-bit strings.
3319			*/
3320
3321			PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
3322
3323			if (len > 1) {
3324			char *null_at;
3325			if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
3326			SETERRNO(ENOENT, LIB_INVARG);
3327			Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
3328			"Invalid \\0 character in %s for %s: %s\\0%s",
3329			what, op_name, pv, null_at+1);
3330			return FALSE;
3331			}
3332			}
3333
3334			return TRUE;
3335			}
3336
3337			/*
3338
3339			Return true if the supplied filename has a newline character
3340			immediately before the first (hopefully only) NUL.
3341
3342			My original look at this incorrectly used the len from SvPV(), but
3343			that's incorrect, since we allow for a NUL in pv[len-1].
3344
3345			So instead, strlen() and work from there.
3346
3347			This allow for the user reading a filename, forgetting to chomp it,
3348			then calling:
3349
3350			open my $foo, "$file\0";
3351
3352			*/
3353
3354			#ifdef PERL_CORE
3355
3356			PERL_STATIC_INLINE bool
3357			S_should_warn_nl(const char *pv)
3358			{
3359			STRLEN len;
3360
3361			PERL_ARGS_ASSERT_SHOULD_WARN_NL;
3362
3363			len = strlen(pv);
3364
3365			return len > 0 && pv[len-1] == '\n';
3366			}
3367
3368			#endif
3369
3370			#if defined(PERL_IN_PP_C) \|\| defined(PERL_IN_PP_HOT_C)
3371
3372			PERL_STATIC_INLINE bool
3373			S_lossless_NV_to_IV(const NV nv, IV *ivp)
3374			{
3375			/* This function determines if the input NV 'nv' may be converted without
3376			* loss of data to an IV. If not, it returns FALSE taking no other action.
3377			* But if it is possible, it does the conversion, returning TRUE, and
3378			* storing the converted result in 'ivp' /
3379
3380			PERL_ARGS_ASSERT_LOSSLESS_NV_TO_IV;
3381
3382			# if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
3383			/* Normally any comparison with a NaN returns false; if we can't rely
3384			* on that behaviour, check explicitly */
3385			if (UNLIKELY(Perl_isnan(nv))) {
3386			return FALSE;
3387			}
3388			# endif
3389
3390			# ifndef NV_PRESERVES_UV
3391			STATIC_ASSERT_STMT(((UV)1 << NV_PRESERVES_UV_BITS) - 1 <= (UV)IV_MAX);
3392			# endif
3393
3394			/* Written this way so that with an always-false NaN comparison we
3395			* return false */
3396			if (
3397			# ifdef NV_PRESERVES_UV
3398			LIKELY(nv >= (NV) IV_MIN) && LIKELY(nv < IV_MAX_P1) &&
3399			# else
3400			/* If the condition below is not satisfied, lower bits of nv's
3401			* integral part is already lost and accurate conversion to integer
3402			* is impossible.
3403			* Note this should be consistent with S_sv_2iuv_common in sv.c. */
3404			Perl_fabs(nv) < (NV) ((UV)1 << NV_PRESERVES_UV_BITS) &&
3405			# endif
3406			(IV) nv == nv) {
3407			*ivp = (IV) nv;
3408			return TRUE;
3409			}
3410			return FALSE;
3411			}
3412
3413			#endif
3414
3415			/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
3416
3417			#if defined(PERL_IN_PP_C) \|\| defined(PERL_IN_REGCOMP_ANY) \|\| defined(PERL_IN_TOKE_C) \|\| defined(PERL_IN_UNIVERSAL_C)
3418
3419			#define MAX_CHARSET_NAME_LENGTH 2
3420
3421			PERL_STATIC_INLINE const char *
3422			S_get_regex_charset_name(const U32 flags, STRLEN* const lenp)
3423			{
3424			PERL_ARGS_ASSERT_GET_REGEX_CHARSET_NAME;
3425
3426			/* Returns a string that corresponds to the name of the regex character set
3427			* given by 'flags', and *lenp is set the length of that string, which
3428			* cannot exceed MAX_CHARSET_NAME_LENGTH characters */
3429
3430			*lenp = 1;
3431			switch (get_regex_charset(flags)) {
3432			case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
3433			case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
3434			case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
3435			case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
3436			case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
3437			*lenp = 2;
3438			return ASCII_MORE_RESTRICT_PAT_MODS;
3439			}
3440			/* The NOT_REACHED; hides an assert() which has a rather complex
3441			* definition in perl.h. */
3442			NOT_REACHED; /* NOTREACHED */
3443			return "?"; /* Unknown */
3444			}
3445
3446			#endif
3447
3448			/*
3449
3450			Return false if any get magic is on the SV other than taint magic.
3451
3452			*/
3453
3454			PERL_STATIC_INLINE bool
3455			Perl_sv_only_taint_gmagic(SV *sv)
3456			{
3457			MAGIC *mg = SvMAGIC(sv);
3458
3459			PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
3460
3461			while (mg) {
3462			if (mg->mg_type != PERL_MAGIC_taint
3463			&& !(mg->mg_flags & MGf_GSKIP)
3464			&& mg->mg_virtual->svt_get) {
3465			return FALSE;
3466			}
3467			mg = mg->mg_moremagic;
3468			}
3469
3470			return TRUE;
3471			}
3472
3473			/* ------------------ cop.h ------------------------------------------- */
3474
3475			/* implement GIMME_V() macro */
3476
3477			PERL_STATIC_INLINE U8
3478	945		Perl_gimme_V(pTHX)
3479			{
3480			I32 cxix;
3481	945		U8 gimme = (PL_op->op_flags & OPf_WANT);
3482
3483	945	50	if (gimme)
3484	945		return gimme;
3485	0		cxix = PL_curstackinfo->si_cxsubix;
3486	0	0	if (cxix < 0)
3487	0	0	return PL_curstackinfo->si_type == PERLSI_SORT ? G_SCALAR: G_VOID;
3488			assert(cxstack[cxix].blk_gimme & G_WANT);
3489	0		return (cxstack[cxix].blk_gimme & G_WANT);
3490			}
3491
3492
3493			/* Enter a block. Push a new base context and return its address. */
3494
3495			PERL_STATIC_INLINE PERL_CONTEXT *
3496			Perl_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
3497			{
3498			PERL_CONTEXT * cx;
3499
3500			PERL_ARGS_ASSERT_CX_PUSHBLOCK;
3501
3502			CXINC;
3503			cx = CX_CUR();
3504			cx->cx_type = type;
3505			cx->blk_gimme = gimme;
3506			cx->blk_oldsaveix = saveix;
3507			cx->blk_oldsp = (Stack_off_t)(sp - PL_stack_base);
3508			assert(cxstack_ix <= 0
3509			\|\| CxTYPE(cx-1) == CXt_SUBST
3510			\|\| cx->blk_oldsp >= (cx-1)->blk_oldsp);
3511			cx->blk_oldcop = PL_curcop;
3512			cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
3513			cx->blk_oldscopesp = PL_scopestack_ix;
3514			cx->blk_oldpm = PL_curpm;
3515			cx->blk_old_tmpsfloor = PL_tmps_floor;
3516
3517			PL_tmps_floor = PL_tmps_ix;
3518			CX_DEBUG(cx, "PUSH");
3519			return cx;
3520			}
3521
3522
3523			/* Exit a block (RETURN and LAST). */
3524
3525			PERL_STATIC_INLINE void
3526			Perl_cx_popblock(pTHX_ PERL_CONTEXT *cx)
3527			{
3528			PERL_ARGS_ASSERT_CX_POPBLOCK;
3529
3530			CX_DEBUG(cx, "POP");
3531			/* these 3 are common to cx_popblock and cx_topblock */
3532			PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
3533			PL_scopestack_ix = cx->blk_oldscopesp;
3534			PL_curpm = cx->blk_oldpm;
3535
3536			/* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
3537			* and leaves a CX entry lying around for repeated use, so
3538			* skip for multicall */ \
3539			assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
3540			\|\| PL_savestack_ix == cx->blk_oldsaveix);
3541			PL_curcop = cx->blk_oldcop;
3542			PL_tmps_floor = cx->blk_old_tmpsfloor;
3543			}
3544
3545			/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
3546			* Whereas cx_popblock() restores the state to the point just before
3547			* cx_pushblock() was called, cx_topblock() restores it to the point just
3548			* after cx_pushblock() was called. */
3549
3550			PERL_STATIC_INLINE void
3551			Perl_cx_topblock(pTHX_ PERL_CONTEXT *cx)
3552			{
3553			PERL_ARGS_ASSERT_CX_TOPBLOCK;
3554
3555			CX_DEBUG(cx, "TOP");
3556			/* these 3 are common to cx_popblock and cx_topblock */
3557			PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
3558			PL_scopestack_ix = cx->blk_oldscopesp;
3559			PL_curpm = cx->blk_oldpm;
3560			Perl_rpp_popfree_to(aTHX_ PL_stack_base + cx->blk_oldsp);
3561			}
3562
3563
3564			PERL_STATIC_INLINE void
3565			Perl_cx_pushsub(pTHX_ PERL_CONTEXT cx, CV cv, OP *retop, bool hasargs)
3566			{
3567			U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
3568
3569			PERL_ARGS_ASSERT_CX_PUSHSUB;
3570
3571			PERL_DTRACE_PROBE_ENTRY(cv);
3572			cx->blk_sub.old_cxsubix = PL_curstackinfo->si_cxsubix;
3573			PL_curstackinfo->si_cxsubix = (I32)(cx - PL_curstackinfo->si_cxstack);
3574			cx->blk_sub.cv = cv;
3575			cx->blk_sub.olddepth = CvDEPTH(cv);
3576			cx->blk_sub.prevcomppad = PL_comppad;
3577			cx->cx_type \|= (hasargs) ? CXp_HASARGS : 0;
3578			cx->blk_sub.retop = retop;
3579			SvREFCNT_inc_simple_void_NN(cv);
3580			cx->blk_u16 = PL_op->op_private & (phlags\|OPpDEREF);
3581			}
3582
3583
3584			/* subsets of cx_popsub() */
3585
3586			PERL_STATIC_INLINE void
3587			Perl_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
3588			{
3589			CV *cv;
3590
3591			PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
3592			assert(CxTYPE(cx) == CXt_SUB);
3593
3594			PL_comppad = cx->blk_sub.prevcomppad;
3595			PL_curpad = LIKELY(PL_comppad != NULL) ? AvARRAY(PL_comppad) : NULL;
3596			cv = cx->blk_sub.cv;
3597			CvDEPTH(cv) = cx->blk_sub.olddepth;
3598			cx->blk_sub.cv = NULL;
3599			SvREFCNT_dec(cv);
3600			PL_curstackinfo->si_cxsubix = cx->blk_sub.old_cxsubix;
3601			}
3602
3603
3604			/* handle the @_ part of leaving a sub */
3605
3606			PERL_STATIC_INLINE void
3607			Perl_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
3608			{
3609			AV *av;
3610
3611			PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
3612			assert(CxTYPE(cx) == CXt_SUB);
3613			assert(AvARRAY(MUTABLE_AV(
3614			PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
3615			CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
3616
3617			CX_POP_SAVEARRAY(cx);
3618			av = MUTABLE_AV(PAD_SVl(0));
3619			if (!SvMAGICAL(av) && SvREFCNT(av) == 1
3620			#ifndef PERL_RC_STACK
3621			&& !AvREAL(av)
3622			#endif
3623			)
3624			clear_defarray_simple(av);
3625			else
3626			/* abandon @_ if it got reified */
3627			clear_defarray(av, 0);
3628			}
3629
3630
3631			PERL_STATIC_INLINE void
3632			Perl_cx_popsub(pTHX_ PERL_CONTEXT *cx)
3633			{
3634			PERL_ARGS_ASSERT_CX_POPSUB;
3635			assert(CxTYPE(cx) == CXt_SUB);
3636
3637			PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
3638
3639			if (CxHASARGS(cx))
3640			cx_popsub_args(cx);
3641			cx_popsub_common(cx);
3642			}
3643
3644
3645			PERL_STATIC_INLINE void
3646			Perl_cx_pushformat(pTHX_ PERL_CONTEXT cx, CV cv, OP retop, GV gv)
3647			{
3648			PERL_ARGS_ASSERT_CX_PUSHFORMAT;
3649
3650			cx->blk_format.old_cxsubix = PL_curstackinfo->si_cxsubix;
3651			PL_curstackinfo->si_cxsubix= (I32)(cx - PL_curstackinfo->si_cxstack);
3652			cx->blk_format.cv = cv;
3653			cx->blk_format.retop = retop;
3654			cx->blk_format.gv = gv;
3655			cx->blk_format.dfoutgv = PL_defoutgv;
3656			cx->blk_format.prevcomppad = PL_comppad;
3657			cx->blk_u16 = 0;
3658
3659			SvREFCNT_inc_simple_void_NN(cv);
3660			CvDEPTH(cv)++;
3661			SvREFCNT_inc_void(cx->blk_format.dfoutgv);
3662			}
3663
3664
3665			PERL_STATIC_INLINE void
3666			Perl_cx_popformat(pTHX_ PERL_CONTEXT *cx)
3667			{
3668			CV *cv;
3669			GV *dfout;
3670
3671			PERL_ARGS_ASSERT_CX_POPFORMAT;
3672			assert(CxTYPE(cx) == CXt_FORMAT);
3673
3674			dfout = cx->blk_format.dfoutgv;
3675			setdefout(dfout);
3676			cx->blk_format.dfoutgv = NULL;
3677			SvREFCNT_dec_NN(dfout);
3678
3679			PL_comppad = cx->blk_format.prevcomppad;
3680			PL_curpad = LIKELY(PL_comppad != NULL) ? AvARRAY(PL_comppad) : NULL;
3681			cv = cx->blk_format.cv;
3682			cx->blk_format.cv = NULL;
3683			--CvDEPTH(cv);
3684			SvREFCNT_dec_NN(cv);
3685			PL_curstackinfo->si_cxsubix = cx->blk_format.old_cxsubix;
3686			}
3687
3688
3689			PERL_STATIC_INLINE void
3690			Perl_push_evalortry_common(pTHX_ PERL_CONTEXT cx, OP retop, SV *namesv)
3691			{
3692			cx->blk_eval.retop = retop;
3693			cx->blk_eval.old_namesv = namesv;
3694			cx->blk_eval.old_eval_root = PL_eval_root;
3695			cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
3696			cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
3697			cx->blk_eval.cur_top_env = PL_top_env;
3698
3699			assert(!(PL_in_eval & ~ 0x3F));
3700			assert(!(PL_op->op_type & ~0x1FF));
3701			cx->blk_u16 = (PL_in_eval & 0x3F) \| ((U16)PL_op->op_type << 7);
3702			}
3703
3704			PERL_STATIC_INLINE void
3705			Perl_cx_pusheval(pTHX_ PERL_CONTEXT cx, OP retop, SV *namesv)
3706			{
3707			PERL_ARGS_ASSERT_CX_PUSHEVAL;
3708
3709			Perl_push_evalortry_common(aTHX_ cx, retop, namesv);
3710
3711			cx->blk_eval.old_cxsubix = PL_curstackinfo->si_cxsubix;
3712			PL_curstackinfo->si_cxsubix = (I32)(cx - PL_curstackinfo->si_cxstack);
3713			}
3714
3715			PERL_STATIC_INLINE void
3716			Perl_cx_pushtry(pTHX_ PERL_CONTEXT cx, OP retop)
3717			{
3718			PERL_ARGS_ASSERT_CX_PUSHTRY;
3719
3720			Perl_push_evalortry_common(aTHX_ cx, retop, NULL);
3721
3722			/* Don't actually change it, just store the current value so it's restored
3723			* by the common popeval */
3724			cx->blk_eval.old_cxsubix = PL_curstackinfo->si_cxsubix;
3725			}
3726
3727
3728			PERL_STATIC_INLINE void
3729			Perl_cx_popeval(pTHX_ PERL_CONTEXT *cx)
3730			{
3731			SV *sv;
3732
3733			PERL_ARGS_ASSERT_CX_POPEVAL;
3734			assert(CxTYPE(cx) == CXt_EVAL);
3735
3736			PL_in_eval = CxOLD_IN_EVAL(cx);
3737			assert(!(PL_in_eval & 0xc0));
3738			PL_eval_root = cx->blk_eval.old_eval_root;
3739			sv = cx->blk_eval.cur_text;
3740			if (sv && CxEVAL_TXT_REFCNTED(cx)) {
3741			cx->blk_eval.cur_text = NULL;
3742			SvREFCNT_dec_NN(sv);
3743			}
3744
3745			sv = cx->blk_eval.old_namesv;
3746			if (sv) {
3747			cx->blk_eval.old_namesv = NULL;
3748			SvREFCNT_dec_NN(sv);
3749			}
3750			PL_curstackinfo->si_cxsubix = cx->blk_eval.old_cxsubix;
3751			}
3752
3753
3754			/* push a plain loop, i.e.
3755			* { block }
3756			* while (cond) { block }
3757			* for (init;cond;continue) { block }
3758			* This loop can be last/redo'ed etc.
3759			*/
3760
3761			PERL_STATIC_INLINE void
3762			Perl_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
3763			{
3764			PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
3765			cx->blk_loop.my_op = cLOOP;
3766			}
3767
3768
3769			/* push a true for loop, i.e.
3770			* for var (list) { block }
3771			*/
3772
3773			PERL_STATIC_INLINE void
3774			Perl_cx_pushloop_for(pTHX_ PERL_CONTEXT cx, void itervarp, SV* itersave)
3775			{
3776			PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
3777
3778			/* this one line is common with cx_pushloop_plain */
3779			cx->blk_loop.my_op = cLOOP;
3780
3781			cx->blk_loop.itervar_u.svp = (SV**)itervarp;
3782			cx->blk_loop.itersave = itersave;
3783			#ifdef USE_ITHREADS
3784			cx->blk_loop.oldcomppad = PL_comppad;
3785			#endif
3786			}
3787
3788
3789			/* pop all loop types, including plain */
3790
3791			PERL_STATIC_INLINE void
3792			Perl_cx_poploop(pTHX_ PERL_CONTEXT *cx)
3793			{
3794			PERL_ARGS_ASSERT_CX_POPLOOP;
3795
3796			assert(CxTYPE_is_LOOP(cx));
3797			if ( CxTYPE(cx) == CXt_LOOP_ARY
3798			\|\| CxTYPE(cx) == CXt_LOOP_LAZYSV)
3799			{
3800			/* Free ary or cur. This assumes that state_u.ary.ary
3801			* aligns with state_u.lazysv.cur. See cx_dup() */
3802			SV *sv = cx->blk_loop.state_u.lazysv.cur;
3803			cx->blk_loop.state_u.lazysv.cur = NULL;
3804			SvREFCNT_dec_NN(sv);
3805			if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
3806			sv = cx->blk_loop.state_u.lazysv.end;
3807			cx->blk_loop.state_u.lazysv.end = NULL;
3808			SvREFCNT_dec_NN(sv);
3809			}
3810			}
3811			if (cx->cx_type & (CXp_FOR_PAD\|CXp_FOR_GV)) {
3812			SV *cursv;
3813			SV **svp = (cx)->blk_loop.itervar_u.svp;
3814			if ((cx->cx_type & CXp_FOR_GV))
3815			svp = &GvSV((GV*)svp);
3816			cursv = *svp;
3817			*svp = cx->blk_loop.itersave;
3818			cx->blk_loop.itersave = NULL;
3819			SvREFCNT_dec(cursv);
3820			}
3821			if (cx->cx_type & (CXp_FOR_GV\|CXp_FOR_LVREF))
3822			SvREFCNT_dec(cx->blk_loop.itervar_u.svp);
3823			}
3824
3825
3826			PERL_STATIC_INLINE void
3827			Perl_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
3828			{
3829			PERL_ARGS_ASSERT_CX_PUSHWHEN;
3830
3831			cx->blk_givwhen.leave_op = cLOGOP->op_other;
3832			}
3833
3834
3835			PERL_STATIC_INLINE void
3836			Perl_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
3837			{
3838			PERL_ARGS_ASSERT_CX_POPWHEN;
3839			assert(CxTYPE(cx) == CXt_WHEN);
3840
3841			PERL_UNUSED_ARG(cx);
3842			PERL_UNUSED_CONTEXT;
3843			/* currently NOOP */
3844			}
3845
3846
3847			PERL_STATIC_INLINE void
3848			Perl_cx_pushgiven(pTHX_ PERL_CONTEXT cx, SV orig_defsv)
3849			{
3850			PERL_ARGS_ASSERT_CX_PUSHGIVEN;
3851
3852			cx->blk_givwhen.leave_op = cLOGOP->op_other;
3853			cx->blk_givwhen.defsv_save = orig_defsv;
3854			}
3855
3856
3857			PERL_STATIC_INLINE void
3858			Perl_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
3859			{
3860			SV *sv;
3861
3862			PERL_ARGS_ASSERT_CX_POPGIVEN;
3863			assert(CxTYPE(cx) == CXt_GIVEN);
3864
3865			sv = GvSV(PL_defgv);
3866			GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
3867			cx->blk_givwhen.defsv_save = NULL;
3868			SvREFCNT_dec(sv);
3869			}
3870
3871
3872			/* Make @_ empty in-place in simple cases: a cheap av_clear().
3873			* See Perl_clear_defarray() for non-simple cases */
3874
3875
3876			PERL_STATIC_INLINE void
3877			Perl_clear_defarray_simple(pTHX_ AV *av)
3878			{
3879			PERL_ARGS_ASSERT_CLEAR_DEFARRAY_SIMPLE;
3880
3881			assert(SvTYPE(av) == SVt_PVAV);
3882			assert(!SvREADONLY(av));
3883			assert(!SvMAGICAL(av));
3884			assert(SvREFCNT(av) == 1);
3885
3886			#ifdef PERL_RC_STACK
3887			assert(AvREAL(av));
3888			/* this code assumes that destructors called here can't free av
3889			* itself, because pad[0] and/or CX pointers will keep it alive */
3890			SSize_t i = AvFILLp(av);
3891			while (i >= 0) {
3892			SV *sv = AvARRAY(av)[i];
3893			AvARRAY(av)[i--] = NULL;
3894			SvREFCNT_dec(sv);
3895			}
3896			#else
3897			assert(!AvREAL(av));
3898			#endif
3899			AvFILLp(av) = -1;
3900			Perl_av_remove_offset(aTHX_ av);
3901			}
3902
3903			/* Switch to a different argument stack.
3904			*
3905			* Note that it doesn't update PL_curstackinfo->si_stack_nonrc_base,
3906			* so this should only be used as part of a general switching between
3907			* stackinfos.
3908			*/
3909
3910			PERL_STATIC_INLINE void
3911			Perl_switch_argstack(pTHX_ AV *to)
3912			{
3913			PERL_ARGS_ASSERT_SWITCH_ARGSTACK;
3914
3915			AvFILLp(PL_curstack) = PL_stack_sp - PL_stack_base;
3916			PL_stack_base = AvARRAY(to);
3917			PL_stack_max = PL_stack_base + AvMAX(to);
3918			PL_stack_sp = PL_stack_base + AvFILLp(to);
3919			PL_curstack = to;
3920			}
3921
3922
3923			/* Push, and switch to a new stackinfo, allocating one if none are spare,
3924			* to get a fresh set of stacks.
3925			* Update all the interpreter variables like PL_curstackinfo,
3926			* PL_stack_sp, etc.
3927			* current flag meanings:
3928			* 1 make the new arg stack AvREAL
3929			*/
3930
3931
3932			PERL_STATIC_INLINE void
3933			Perl_push_stackinfo(pTHX_ I32 type, UV flags)
3934			{
3935			PERL_ARGS_ASSERT_PUSH_STACKINFO;
3936
3937			PERL_SI *next = PL_curstackinfo->si_next;
3938			DEBUG_l({
3939			int i = 0; PERL_SI *p = PL_curstackinfo;
3940			while (p) { i++; p = p->si_prev; }
3941			Perl_deb(aTHX_ "push STACKINFO %d in %s at %s:%d\n",
3942			i, SAFE_FUNCTION__, __FILE__, __LINE__);
3943			})
3944
3945			if (!next) {
3946			next = new_stackinfo_flags(32, 2048/sizeof(PERL_CONTEXT) - 1, flags);
3947			next->si_prev = PL_curstackinfo;
3948			PL_curstackinfo->si_next = next;
3949			}
3950			next->si_type = type;
3951			next->si_cxix = -1;
3952			next->si_cxsubix = -1;
3953			PUSHSTACK_INIT_HWM(next);
3954			#ifdef PERL_RC_STACK
3955			next->si_stack_nonrc_base = 0;
3956			#endif
3957			if (flags & 1)
3958			AvREAL_on(next->si_stack);
3959			else
3960			AvREAL_off(next->si_stack);
3961			AvFILLp(next->si_stack) = 0;
3962			switch_argstack(next->si_stack);
3963			PL_curstackinfo = next;
3964			SET_MARK_OFFSET;
3965			}
3966
3967
3968			/* Pop, then switch to the previous stackinfo and set of stacks.
3969			* Update all the interpreter variables like PL_curstackinfo,
3970			* PL_stack_sp, etc. */
3971
3972			PERL_STATIC_INLINE void
3973			Perl_pop_stackinfo(pTHX)
3974			{
3975			PERL_ARGS_ASSERT_POP_STACKINFO;
3976
3977			PERL_SI * const prev = PL_curstackinfo->si_prev;
3978			DEBUG_l({
3979			int i = -1; PERL_SI *p = PL_curstackinfo;
3980			while (p) { i++; p = p->si_prev; }
3981			Perl_deb(aTHX_ "pop STACKINFO %d in %s at %s:%d\n",
3982			i, SAFE_FUNCTION__, __FILE__, __LINE__);})
3983			if (!prev) {
3984			Perl_croak_popstack();
3985			}
3986
3987			switch_argstack(prev->si_stack);
3988			/* don't free prev here, free them all at the END{} */
3989			PL_curstackinfo = prev;
3990			}
3991
3992
3993
3994			/*
3995			=for apidoc newPADxVOP
3996
3997			Constructs, checks and returns an op containing a pad offset. C is
3998			the opcode, which should be one of C, C, C
3999			or C. The returned op will have the C field set by
4000			the C argument.
4001
4002			This is convenient when constructing a large optree in nested function
4003			calls, as it avoids needing to store the pad op directly to set the
4004			C field as a side-effect. For example
4005
4006			o = op_append_elem(OP_LINESEQ, o,
4007			newPADxVOP(OP_PADSV, 0, padix));
4008
4009			=cut
4010			*/
4011
4012			PERL_STATIC_INLINE OP *
4013			Perl_newPADxVOP(pTHX_ I32 type, I32 flags, PADOFFSET padix)
4014			{
4015			PERL_ARGS_ASSERT_NEWPADXVOP;
4016
4017			assert(type == OP_PADSV \|\| type == OP_PADAV \|\| type == OP_PADHV
4018			\|\| type == OP_PADCV);
4019			OP *o = newOP(type, flags);
4020			o->op_targ = padix;
4021			return o;
4022			}
4023
4024			/* ------------------ util.h ------------------------------------------- */
4025
4026			/*
4027			=for apidoc_section $string
4028
4029			=for apidoc foldEQ
4030			=for apidoc_item foldEQ_locale
4031
4032			These each return true if the leading C bytes of the strings C and
4033			C are the same case-insensitively; false otherwise.
4034
4035			In C, uppercase and lowercase ASCII range bytes match themselves and
4036			their opposite case counterparts. Non-cased and non-ASCII range bytes match
4037			only themselves.
4038
4039			In C, the comparison is based on the current locale.
4040			If that locale is UTF-8, the results are the same as C, leading to
4041			incorrect values for non-ASCII range code points. Use C>
4042			instead.
4043
4044			=cut
4045			*/
4046
4047			PERL_STATIC_INLINE I32
4048			Perl_foldEQ(pTHX_ const char s1, const char s2, I32 len)
4049			{
4050			PERL_UNUSED_CONTEXT;
4051
4052			const U8 a = (const U8 )s1;
4053			const U8 b = (const U8 )s2;
4054
4055			PERL_ARGS_ASSERT_FOLDEQ;
4056
4057			assert(len >= 0);
4058
4059			while (len--) {
4060			if (a != b && a != PL_fold[b])
4061			return 0;
4062			a++,b++;
4063			}
4064			return 1;
4065			}
4066
4067			PERL_STATIC_INLINE I32
4068			Perl_foldEQ_latin1(pTHX_ const char s1, const char s2, I32 len)
4069			{
4070			/* Compare non-UTF-8 using Unicode (Latin1) semantics. Works on all folds
4071			* representable without UTF-8, except for LATIN_SMALL_LETTER_SHARP_S, and
4072			* does not check for this. Nor does it check that the strings each have
4073			* at least 'len' characters. */
4074
4075			PERL_UNUSED_CONTEXT;
4076
4077			const U8 a = (const U8 )s1;
4078			const U8 b = (const U8 )s2;
4079
4080			PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
4081
4082			assert(len >= 0);
4083
4084			while (len--) {
4085			if (a != b && a != PL_fold_latin1[b]) {
4086			return 0;
4087			}
4088			a++, b++;
4089			}
4090			return 1;
4091			}
4092
4093			PERL_STATIC_INLINE I32
4094			Perl_foldEQ_locale(pTHX_ const char s1, const char s2, I32 len)
4095			{
4096			const U8 a = (const U8 )s1;
4097			const U8 b = (const U8 )s2;
4098
4099			PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
4100
4101			assert(len >= 0);
4102
4103			while (len--) {
4104			if (a != b && a != PL_fold_locale[b]) {
4105			DEBUG_Lv(PerlIO_printf(Perl_debug_log,
4106			"%s:%d: Our records indicate %02x is not a fold of %02x"
4107			" or its mate %02x\n",
4108			__FILE__, __LINE__, a, b, PL_fold_locale[*b]));
4109
4110			return 0;
4111			}
4112			a++,b++;
4113			}
4114			return 1;
4115			}
4116
4117			/*
4118			=for apidoc_section $string
4119			=for apidoc my_strnlen
4120
4121			The C library C if available, or a Perl implementation of it.
4122
4123			C computes the length of the string, up to C
4124			bytes. It will never attempt to address more than C
4125			bytes, making it suitable for use with strings that are not
4126			guaranteed to be NUL-terminated.
4127
4128			=cut
4129
4130			Description stolen from http://man.openbsd.org/strnlen.3,
4131			implementation stolen from PostgreSQL.
4132			*/
4133			#ifndef HAS_STRNLEN
4134
4135			PERL_STATIC_INLINE Size_t
4136			Perl_my_strnlen(const char *str, Size_t maxlen)
4137			{
4138			PERL_ARGS_ASSERT_MY_STRNLEN;
4139
4140			const char end = (const char ) memchr(str, '\0', maxlen);
4141
4142			if (end == NULL) return maxlen;
4143			return end - str;
4144			}
4145
4146			#endif
4147
4148			#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) \|\| defined(PERL_EXT))
4149
4150			PERL_STATIC_INLINE void *
4151			S_my_memrchr(const char * s, const char c, const STRLEN len)
4152			{
4153			/* memrchr(), since many platforms lack it */
4154
4155			const char * t = s + len - 1;
4156
4157			PERL_ARGS_ASSERT_MY_MEMRCHR;
4158
4159			while (t >= s) {
4160			if (*t == c) {
4161			return (void *) t;
4162			}
4163			t--;
4164			}
4165
4166			return NULL;
4167			}
4168
4169			#endif
4170
4171			PERL_STATIC_INLINE char *
4172			Perl_mortal_getenv(const char * str)
4173			{
4174			/* This implements a (mostly) thread-safe, sequential-call-safe getenv().
4175			*
4176			* It's (mostly) thread-safe because it uses a mutex to prevent other
4177			* threads (that look at this mutex) from destroying the result before this
4178			* routine has a chance to copy the result to a place that won't be
4179			* destroyed before the caller gets a chance to handle it. That place is a
4180			* mortal SV. khw chose this over SAVEFREEPV because he is under the
4181			* impression that the SV will hang around longer under more circumstances
4182			*
4183			* The reason it isn't completely thread-safe is that other code could
4184			* simply not pay attention to the mutex. All of the Perl core uses the
4185			* mutex, but it is possible for code from, say XS, to not use this mutex,
4186			* defeating the safety.
4187			*
4188			* getenv() returns, in some implementations, a pointer to a spot in the
4189			* **environ array, which could be invalidated at any time by this or
4190			* another thread changing the environment. Other implementations copy the
4191			* **environ value to a static buffer, returning a pointer to that. That
4192			* buffer might or might not be invalidated by a getenv() call in another
4193			* thread. If it does get zapped, we need an exclusive lock. Otherwise,
4194			* many getenv() calls can safely be running simultaneously, so a
4195			* many-reader (but no simultaneous writers) lock is ok. There is a
4196			* Configure probe to see if another thread destroys the buffer, and the
4197			* mutex is defined accordingly.
4198			*
4199			* But in all cases, using the mutex prevents these problems, as long as
4200			* all code uses the same mutex.
4201			*
4202			* A complication is that this can be called during phases where the
4203			* mortalization process isn't available. These are in interpreter
4204			* destruction or early in construction. khw believes that at these times
4205			* there shouldn't be anything else going on, so plain getenv is safe AS
4206			* LONG AS the caller acts on the return before calling it again. */
4207
4208			char * ret;
4209			dTHX;
4210
4211			PERL_ARGS_ASSERT_MORTAL_GETENV;
4212
4213			/* Can't mortalize without stacks. khw believes that no other threads
4214			* should be running, so no need to lock things, and this may be during a
4215			* phase when locking isn't even available */
4216			if (UNLIKELY(PL_scopestack_ix == 0)) {
4217			return getenv(str);
4218			}
4219
4220			#ifdef PERL_MEM_LOG
4221
4222			/* A major complication arises under PERL_MEM_LOG. When that is active,
4223			* every memory allocation may result in logging, depending on the value of
4224			* ENV{PERL_MEM_LOG} at the moment. That means, as we create the SV for
4225			* saving ENV{foo}'s value (but before saving it), the logging code will
4226			* call us recursively to find out what ENV{PERL_MEM_LOG} is. Without some
4227			* care that could lead to: 1) infinite recursion; or 2) deadlock (trying to
4228			* lock a boolean mutex recursively); 3) destroying the getenv() static
4229			* buffer; or 4) destroying the temporary created by this for the copy
4230			* causes a log entry to be made which could cause a new temporary to be
4231			* created, which will need to be destroyed at some point, leading to an
4232			* infinite loop.
4233			*
4234			* The solution adopted here (after some gnashing of teeth) is to detect
4235			* the recursive calls and calls from the logger, and treat them specially.
4236			* Let's say we want to do getenv("foo"). We first find
4237			* getenv(PERL_MEM_LOG) and save it to a fixed-length per-interpreter
4238			* variable, so no temporary is required. Then we do getenv(foo), and in
4239			* the process of creating a temporary to save it, this function will be
4240			* called recursively to do a getenv(PERL_MEM_LOG). On the recursed call,
4241			* we detect that it is such a call and return our saved value instead of
4242			* locking and doing a new getenv(). This solves all of problems 1), 2),
4243			* and 3). Because all the getenv()s are done while the mutex is locked,
4244			* the state cannot have changed. To solve 4), we don't create a temporary
4245			* when this is called from the logging code. That code disposes of the
4246			* return value while the mutex is still locked.
4247			*
4248			* The value of getenv(PERL_MEM_LOG) can be anything, but only initial
4249			* digits and 3 particular letters are significant; the rest are ignored by
4250			* the memory logging code. Thus the per-interpreter variable only needs
4251			* to be large enough to save the significant information, the size of
4252			* which is known at compile time. The first byte is extra, reserved for
4253			* flags for our use. To protect against overflowing, only the reserved
4254			* byte, as many digits as don't overflow, and the three letters are
4255			* stored.
4256			*
4257			* The reserved byte has two bits:
4258			* 0x1 if set indicates that if we get here, it is a recursive call of
4259			* getenv()
4260			* 0x2 if set indicates that the call is from the logging code.
4261			*
4262			* If the flag indicates this is a recursive call, just return the stored
4263			* value of PL_mem_log; An empty value gets turned into NULL. */
4264			if (strEQ(str, "PERL_MEM_LOG") && PL_mem_log[0] & 0x1) {
4265			if (PL_mem_log[1] == '\0') {
4266			return NULL;
4267			} else {
4268			return PL_mem_log + 1;
4269			}
4270			}
4271
4272			#endif
4273
4274			GETENV_LOCK;
4275
4276			#ifdef PERL_MEM_LOG
4277
4278			/* Here we are in a critical section. As explained above, we do our own
4279			* getenv(PERL_MEM_LOG), saving the result safely. */
4280			ret = getenv("PERL_MEM_LOG");
4281			if (ret == NULL) { /* No logging active */
4282
4283			/* Return that immediately if called from the logging code */
4284			if (PL_mem_log[0] & 0x2) {
4285			GETENV_UNLOCK;
4286			return NULL;
4287			}
4288
4289			PL_mem_log[1] = '\0';
4290			}
4291			else {
4292			char mem_log_meat = PL_mem_log + 1; / first byte reserved */
4293
4294			/* There is nothing to prevent the value of PERL_MEM_LOG from being an
4295			* extremely long string. But we want only a few characters from it.
4296			* PL_mem_log has been made large enough to hold just the ones we need.
4297			* First the file descriptor. */
4298			if (isDIGIT(*ret)) {
4299			const char * s = ret;
4300			if (UNLIKELY(*s == '0')) {
4301
4302			/* Reduce multiple leading zeros to a single one. This is to
4303			* allow the caller to change what to do with leading zeros. */
4304			*mem_log_meat++ = '0';
4305			s++;
4306			while (*s == '0') {
4307			s++;
4308			}
4309			}
4310
4311			/* If the input overflows, copy just enough for the result to also
4312			* overflow, plus 1 to make sure */
4313			while (isDIGIT(*s) && s < ret + TYPE_DIGITS(UV) + 1) {
4314			mem_log_meat++ = s++;
4315			}
4316			}
4317
4318			/* Then each of the four significant characters */
4319			if (strchr(ret, 'm')) {
4320			*mem_log_meat++ = 'm';
4321			}
4322			if (strchr(ret, 's')) {
4323			*mem_log_meat++ = 's';
4324			}
4325			if (strchr(ret, 't')) {
4326			*mem_log_meat++ = 't';
4327			}
4328			if (strchr(ret, 'c')) {
4329			*mem_log_meat++ = 'c';
4330			}
4331			*mem_log_meat = '\0';
4332
4333			assert(mem_log_meat < PL_mem_log + sizeof(PL_mem_log));
4334			}
4335
4336			/* If we are being called from the logger, it only needs the significant
4337			* portion of PERL_MEM_LOG, and doesn't need a safe copy */
4338			if (PL_mem_log[0] & 0x2) {
4339			assert(strEQ(str, "PERL_MEM_LOG"));
4340			GETENV_UNLOCK;
4341			return PL_mem_log + 1;
4342			}
4343
4344			/* Here is a generic getenv(). This could be a getenv("PERL_MEM_LOG") that
4345			* is coming from other than the logging code, so it should be treated the
4346			* same as any other getenv(), returning the full value, not just the
4347			* significant part, and having its value saved. Set the flag that
4348			* indicates any call to this routine will be a recursion from here */
4349			PL_mem_log[0] = 0x1;
4350
4351			#endif
4352
4353			/* Now get the value of the real desired variable, and save a copy */
4354			ret = getenv(str);
4355
4356			if (ret != NULL) {
4357			ret = SvPVX( newSVpvn_flags(ret, strlen(ret) ,SVs_TEMP) );
4358			}
4359
4360			GETENV_UNLOCK;
4361
4362			#ifdef PERL_MEM_LOG
4363
4364			/* Clear the buffer */
4365			Zero(PL_mem_log, sizeof(PL_mem_log), char);
4366
4367			#endif
4368
4369			return ret;
4370			}
4371
4372			PERL_STATIC_INLINE bool
4373			Perl_sv_isbool(pTHX_ const SV *sv)
4374			{
4375			PERL_UNUSED_CONTEXT;
4376			return SvBoolFlagsOK(sv) && BOOL_INTERNALS_sv_isbool(sv);
4377			}
4378
4379			#ifdef USE_ITHREADS
4380
4381			PERL_STATIC_INLINE AV *
4382			Perl_cop_file_avn(pTHX_ const COP *cop) {
4383
4384			PERL_ARGS_ASSERT_COP_FILE_AVN;
4385
4386			const char *file = CopFILE(cop);
4387			if (file) {
4388			GV *gv = gv_fetchfile_flags(file, strlen(file), GVF_NOADD);
4389			if (gv) {
4390			return GvAVn(gv);
4391			}
4392			else
4393			return NULL;
4394			}
4395			else
4396			return NULL;
4397			}
4398
4399			#endif
4400
4401			PERL_STATIC_INLINE PADNAME *
4402			Perl_padname_refcnt_inc(PADNAME *pn)
4403			{
4404			PadnameREFCNT(pn)++;
4405			return pn;
4406			}
4407
4408			PERL_STATIC_INLINE PADNAMELIST *
4409			Perl_padnamelist_refcnt_inc(PADNAMELIST *pnl)
4410			{
4411			PadnamelistREFCNT(pnl)++;
4412			return pnl;
4413			}
4414
4415			/* copy a string to a safe spot */
4416
4417			/*
4418			=for apidoc_section $string
4419			=for apidoc savepv
4420			=for apidoc_item savepvn
4421			=for apidoc_item savepvs
4422			=for apidoc_item savesvpv
4423			=for apidoc_item savesharedpv
4424			=for apidoc_item savesharedpvn
4425			=for apidoc_item savesharedpvs
4426			=for apidoc_item savesharedsvpv
4427
4428			Perl's version of C (or C would be if it existed).
4429
4430			These each return a pointer to a newly allocated string which is a duplicate of
4431			the input string.
4432
4433			The forms differ in how the string to be copied is specified, and where the new
4434			memory is allocated from.
4435
4436			To prevent memory leaks, the memory allocated for the new string needs to be
4437			freed when no longer needed. This can be done with the C>
4438			function, or L\|perlguts/SAVEFREEPV(p)>.
4439
4440			The forms whose names contain C differ from the corresponding form
4441			without that in its name, only in that the memory in the former comes from
4442			memory shared between threads. This is needed, because on some platforms,
4443			Windows for example, all allocated memory owned by a thread is deallocated when
4444			that thread ends. So if you need that not to happen, you need to use the
4445			shared memory forms.
4446
4447			The string to copy in C is a C language string literal surrounded by
4448			double quotes.
4449
4450			The string to copy in the forms whose name contains C comes from the PV
4451			in the SV argument C, using C
4452
4453			The string to copy in the remaining forms comes from the C argument.
4454
4455			In the case of C, the size of the string is determined by C,
4456			which means it may not contain embedded C characters, and must have a
4457			trailing C.
4458
4459			In the case of C, C gives the length of C, hence it may
4460			contain embedded C characters. The copy will be guaranteed to have a
4461			trailing NUL added if not already present.
4462
4463			=cut
4464			*/
4465
4466			PERL_STATIC_INLINE char *
4467			Perl_savepv(pTHX_ const char *pv)
4468			{
4469			PERL_UNUSED_CONTEXT;
4470			if (!pv)
4471			return NULL;
4472			else {
4473			char *newaddr;
4474			const STRLEN pvlen = strlen(pv)+1;
4475			Newx(newaddr, pvlen, char);
4476			return (char*)memcpy(newaddr, pv, pvlen);
4477			}
4478			}
4479
4480			/* same thing but with a known length */
4481
4482			PERL_STATIC_INLINE char *
4483			Perl_savepvn(pTHX_ const char *pv, Size_t len)
4484			{
4485			char *newaddr;
4486			PERL_UNUSED_CONTEXT;
4487
4488			Newx(newaddr,len+1,char);
4489			/* Give a meaning to NULL pointer mainly for the use in sv_magic() */
4490			if (pv) {
4491			/* might not be null terminated */
4492			newaddr[len] = '\0';
4493			return (char *) CopyD(pv,newaddr,len,char);
4494			}
4495			else {
4496			return (char *) ZeroD(newaddr,len+1,char);
4497			}
4498			}
4499
4500			PERL_STATIC_INLINE char *
4501			Perl_savesvpv(pTHX_ SV *sv)
4502			{
4503			STRLEN len;
4504			const char * const pv = SvPV_const(sv, len);
4505			char *newaddr;
4506
4507			PERL_ARGS_ASSERT_SAVESVPV;
4508
4509			++len;
4510			Newx(newaddr,len,char);
4511			return (char *) CopyD(pv,newaddr,len,char);
4512			}
4513
4514			PERL_STATIC_INLINE char *
4515			Perl_savesharedsvpv(pTHX_ SV *sv)
4516			{
4517			STRLEN len;
4518			const char * const pv = SvPV_const(sv, len);
4519
4520			PERL_ARGS_ASSERT_SAVESHAREDSVPV;
4521
4522			return savesharedpvn(pv, len);
4523			}
4524
4525			#ifndef PERL_GET_CONTEXT_DEFINED
4526
4527			/*
4528			=for apidoc_section $embedding
4529			=for apidoc get_context
4530
4531			Implements L>, which you should use instead.
4532
4533			=cut
4534			*/
4535
4536			PERL_STATIC_INLINE void *
4537			Perl_get_context(void)
4538			{
4539			# if defined(USE_ITHREADS)
4540			# ifdef OLD_PTHREADS_API
4541			pthread_addr_t t;
4542			int error = pthread_getspecific(PL_thr_key, &t);
4543			if (error)
4544			Perl_croak_nocontext("panic: pthread_getspecific, error=%d", error);
4545			return (void*)t;
4546			# elif defined(I_MACH_CTHREADS)
4547			return (void*)cthread_data(cthread_self());
4548			# else
4549			return (void*)PTHREAD_GETSPECIFIC(PL_thr_key);
4550			# endif
4551			# else
4552			return (void*)NULL;
4553			# endif
4554			}
4555
4556			#endif
4557
4558			PERL_STATIC_INLINE MGVTBL*
4559			Perl_get_vtbl(pTHX_ int vtbl_id)
4560			{
4561			PERL_UNUSED_CONTEXT;
4562
4563			return (vtbl_id < 0 \|\| vtbl_id >= magic_vtable_max)
4564			? NULL : (MGVTBL*)PL_magic_vtables + vtbl_id;
4565			}
4566
4567			/*
4568			=for apidoc_section $string
4569			=for apidoc my_strlcat
4570
4571			The C library C if available, or a Perl implementation of it.
4572			This operates on C C-terminated strings.
4573
4574			C appends string C to the end of C. It will append at
4575			most S> bytes. It will then C-terminate,
4576			unless C is 0 or the original C string was longer than C (in
4577			practice this should not happen as it means that either C is incorrect or
4578			that C is not a proper C-terminated string).
4579
4580			Note that C is the full size of the destination buffer and
4581			the result is guaranteed to be C-terminated if there is room. Note that
4582			room for the C should be included in C.
4583
4584			The return value is the total length that C would have if C is
4585			sufficiently large. Thus it is the initial length of C plus the length of
4586			C. If C is smaller than the return, the excess was not appended.
4587
4588			=cut
4589
4590			Description stolen from http://man.openbsd.org/strlcat.3
4591			*/
4592			#ifndef HAS_STRLCAT
4593			PERL_STATIC_INLINE Size_t
4594			Perl_my_strlcat(char dst, const char src, Size_t size)
4595			{
4596			Size_t used, length, copy;
4597
4598			used = strlen(dst);
4599			length = strlen(src);
4600			if (size > 0 && used < size - 1) {
4601			copy = (length >= size - used) ? size - used - 1 : length;
4602			memcpy(dst + used, src, copy);
4603			dst[used + copy] = '\0';
4604			}
4605			return used + length;
4606			}
4607			#endif
4608
4609
4610			/*
4611			=for apidoc my_strlcpy
4612
4613			The C library C if available, or a Perl implementation of it.
4614			This operates on C C-terminated strings.
4615
4616			C copies up to S> bytes from the string C
4617			to C, C-terminating the result if C is not 0.
4618
4619			The return value is the total length C would be if the copy completely
4620			succeeded. If it is larger than C, the excess was not copied.
4621
4622			=cut
4623
4624			Description stolen from http://man.openbsd.org/strlcpy.3
4625			*/
4626			#ifndef HAS_STRLCPY
4627			PERL_STATIC_INLINE Size_t
4628			Perl_my_strlcpy(char dst, const char src, Size_t size)
4629			{
4630			Size_t length, copy;
4631
4632			length = strlen(src);
4633			if (size > 0) {
4634			copy = (length >= size) ? size - 1 : length;
4635			memcpy(dst, src, copy);
4636			dst[copy] = '\0';
4637			}
4638			return length;
4639			}
4640			#endif
4641
4642			/*
4643			* ex: set ts=8 sts=4 sw=4 et:
4644			*/