1 /* sv.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall 5 * and others 6 * 7 * You may distribute under the terms of either the GNU General Public 8 * License or the Artistic License, as specified in the README file. 9 * 10 */ 11 12 /* 13 * 'I wonder what the Entish is for "yes" and "no",' he thought. 14 * --Pippin 15 * 16 * [p.480 of _The Lord of the Rings_, III/iv: "Treebeard"] 17 */ 18 19 /* 20 * 21 * 22 * This file contains the code that creates, manipulates and destroys 23 * scalar values (SVs). The other types (AV, HV, GV, etc.) reuse the 24 * structure of an SV, so their creation and destruction is handled 25 * here; higher-level functions are in av.c, hv.c, and so on. Opcode 26 * level functions (eg. substr, split, join) for each of the types are 27 * in the pp*.c files. 28 */ 29 30 #include "EXTERN.h" 31 #define PERL_IN_SV_C 32 #include "perl.h" 33 #include "regcomp.h" 34 #ifdef __VMS 35 # include <rms.h> 36 #endif 37 38 #ifdef __Lynx__ 39 /* Missing proto on LynxOS */ 40 char *gconvert(double, int, int, char *); 41 #endif 42 43 #ifdef USE_QUADMATH 44 # define SNPRINTF_G(nv, buffer, size, ndig) \ 45 quadmath_snprintf(buffer, size, "%.*Qg", (int)ndig, (NV)(nv)) 46 #else 47 # define SNPRINTF_G(nv, buffer, size, ndig) \ 48 PERL_UNUSED_RESULT(Gconvert((NV)(nv), (int)ndig, 0, buffer)) 49 #endif 50 51 #ifndef SV_COW_THRESHOLD 52 # define SV_COW_THRESHOLD 0 /* COW iff len > K */ 53 #endif 54 #ifndef SV_COWBUF_THRESHOLD 55 # define SV_COWBUF_THRESHOLD 1250 /* COW iff len > K */ 56 #endif 57 #ifndef SV_COW_MAX_WASTE_THRESHOLD 58 # define SV_COW_MAX_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */ 59 #endif 60 #ifndef SV_COWBUF_WASTE_THRESHOLD 61 # define SV_COWBUF_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */ 62 #endif 63 #ifndef SV_COW_MAX_WASTE_FACTOR_THRESHOLD 64 # define SV_COW_MAX_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */ 65 #endif 66 #ifndef SV_COWBUF_WASTE_FACTOR_THRESHOLD 67 # define SV_COWBUF_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */ 68 #endif 69 /* Work around compiler warnings about unsigned >= THRESHOLD when thres- 70 hold is 0. */ 71 #if SV_COW_THRESHOLD 72 # define GE_COW_THRESHOLD(cur) ((cur) >= SV_COW_THRESHOLD) 73 #else 74 # define GE_COW_THRESHOLD(cur) 1 75 #endif 76 #if SV_COWBUF_THRESHOLD 77 # define GE_COWBUF_THRESHOLD(cur) ((cur) >= SV_COWBUF_THRESHOLD) 78 #else 79 # define GE_COWBUF_THRESHOLD(cur) 1 80 #endif 81 #if SV_COW_MAX_WASTE_THRESHOLD 82 # define GE_COW_MAX_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COW_MAX_WASTE_THRESHOLD) 83 #else 84 # define GE_COW_MAX_WASTE_THRESHOLD(cur,len) 1 85 #endif 86 #if SV_COWBUF_WASTE_THRESHOLD 87 # define GE_COWBUF_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COWBUF_WASTE_THRESHOLD) 88 #else 89 # define GE_COWBUF_WASTE_THRESHOLD(cur,len) 1 90 #endif 91 #if SV_COW_MAX_WASTE_FACTOR_THRESHOLD 92 # define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COW_MAX_WASTE_FACTOR_THRESHOLD * (cur)) 93 #else 94 # define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) 1 95 #endif 96 #if SV_COWBUF_WASTE_FACTOR_THRESHOLD 97 # define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COWBUF_WASTE_FACTOR_THRESHOLD * (cur)) 98 #else 99 # define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) 1 100 #endif 101 102 #define CHECK_COW_THRESHOLD(cur,len) (\ 103 GE_COW_THRESHOLD((cur)) && \ 104 GE_COW_MAX_WASTE_THRESHOLD((cur),(len)) && \ 105 GE_COW_MAX_WASTE_FACTOR_THRESHOLD((cur),(len)) \ 106 ) 107 #define CHECK_COWBUF_THRESHOLD(cur,len) (\ 108 GE_COWBUF_THRESHOLD((cur)) && \ 109 GE_COWBUF_WASTE_THRESHOLD((cur),(len)) && \ 110 GE_COWBUF_WASTE_FACTOR_THRESHOLD((cur),(len)) \ 111 ) 112 113 #ifdef PERL_UTF8_CACHE_ASSERT 114 /* if adding more checks watch out for the following tests: 115 * t/op/index.t t/op/length.t t/op/pat.t t/op/substr.t 116 * lib/utf8.t lib/Unicode/Collate/t/index.t 117 * --jhi 118 */ 119 # define ASSERT_UTF8_CACHE(cache) \ 120 STMT_START { if (cache) { assert((cache)[0] <= (cache)[1]); \ 121 assert((cache)[2] <= (cache)[3]); \ 122 assert((cache)[3] <= (cache)[1]);} \ 123 } STMT_END 124 #else 125 # define ASSERT_UTF8_CACHE(cache) NOOP 126 #endif 127 128 static const char S_destroy[] = "DESTROY"; 129 #define S_destroy_len (sizeof(S_destroy)-1) 130 131 /* ============================================================================ 132 133 An SV (or AV, HV, etc.) is allocated in two parts: the head (struct 134 sv, av, hv...) contains type and reference count information, and for 135 many types, a pointer to the body (struct xrv, xpv, xpviv...), which 136 contains fields specific to each type. Some types store all they need 137 in the head, so don't have a body. 138 139 In all but the most memory-paranoid configurations (ex: PURIFY), heads 140 and bodies are allocated out of arenas, which by default are 141 approximately 4K chunks of memory parcelled up into N heads or bodies. 142 Sv-bodies are allocated by their sv-type, guaranteeing size 143 consistency needed to allocate safely from arrays. 144 145 For SV-heads, the first slot in each arena is reserved, and holds a 146 link to the next arena, some flags, and a note of the number of slots. 147 Snaked through each arena chain is a linked list of free items; when 148 this becomes empty, an extra arena is allocated and divided up into N 149 items which are threaded into the free list. 150 151 SV-bodies are similar, but they use arena-sets by default, which 152 separate the link and info from the arena itself, and reclaim the 1st 153 slot in the arena. SV-bodies are further described later. 154 155 The following global variables are associated with arenas: 156 157 PL_sv_arenaroot pointer to list of SV arenas 158 PL_sv_root pointer to list of free SV structures 159 160 PL_body_arenas head of linked-list of body arenas 161 PL_body_roots[] array of pointers to list of free bodies of svtype 162 arrays are indexed by the svtype needed 163 164 A few special SV heads are not allocated from an arena, but are 165 instead directly created in the interpreter structure, eg PL_sv_undef. 166 The size of arenas can be changed from the default by setting 167 PERL_ARENA_SIZE appropriately at compile time. 168 169 The SV arena serves the secondary purpose of allowing still-live SVs 170 to be located and destroyed during final cleanup. 171 172 At the lowest level, the macros new_SV() and del_SV() grab and free 173 an SV head. (If debugging with -DD, del_SV() calls the function S_del_sv() 174 to return the SV to the free list with error checking.) new_SV() calls 175 more_sv() / sv_add_arena() to add an extra arena if the free list is empty. 176 SVs in the free list have their SvTYPE field set to all ones. 177 178 At the time of very final cleanup, sv_free_arenas() is called from 179 perl_destruct() to physically free all the arenas allocated since the 180 start of the interpreter. 181 182 The internal function visit() scans the SV arenas list, and calls a specified 183 function for each SV it finds which is still live, I<i.e.> which has an SvTYPE 184 other than all 1's, and a non-zero SvREFCNT. visit() is used by the 185 following functions (specified as [function that calls visit()] / [function 186 called by visit() for each SV]): 187 188 sv_report_used() / do_report_used() 189 dump all remaining SVs (debugging aid) 190 191 sv_clean_objs() / do_clean_objs(),do_clean_named_objs(), 192 do_clean_named_io_objs(),do_curse() 193 Attempt to free all objects pointed to by RVs, 194 try to do the same for all objects indir- 195 ectly referenced by typeglobs too, and 196 then do a final sweep, cursing any 197 objects that remain. Called once from 198 perl_destruct(), prior to calling sv_clean_all() 199 below. 200 201 sv_clean_all() / do_clean_all() 202 SvREFCNT_dec(sv) each remaining SV, possibly 203 triggering an sv_free(). It also sets the 204 SVf_BREAK flag on the SV to indicate that the 205 refcnt has been artificially lowered, and thus 206 stopping sv_free() from giving spurious warnings 207 about SVs which unexpectedly have a refcnt 208 of zero. called repeatedly from perl_destruct() 209 until there are no SVs left. 210 211 =head2 Arena allocator API Summary 212 213 Private API to rest of sv.c 214 215 new_SV(), del_SV(), 216 217 new_XPVNV(), del_body() 218 etc 219 220 Public API: 221 222 sv_report_used(), sv_clean_objs(), sv_clean_all(), sv_free_arenas() 223 224 =cut 225 226 * ========================================================================= */ 227 228 /* 229 * "A time to plant, and a time to uproot what was planted..." 230 */ 231 232 #ifdef DEBUG_LEAKING_SCALARS 233 # define FREE_SV_DEBUG_FILE(sv) STMT_START { \ 234 if ((sv)->sv_debug_file) PerlMemShared_free((sv)->sv_debug_file); \ 235 } STMT_END 236 # define DEBUG_SV_SERIAL(sv) \ 237 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%" UVxf ": (%05ld) del_SV\n", \ 238 PTR2UV(sv), (long)(sv)->sv_debug_serial)) 239 #else 240 # define FREE_SV_DEBUG_FILE(sv) 241 # define DEBUG_SV_SERIAL(sv) NOOP 242 #endif 243 244 /* Mark an SV head as unused, and add to free list. 245 * 246 * If SVf_BREAK is set, skip adding it to the free list, as this SV had 247 * its refcount artificially decremented during global destruction, so 248 * there may be dangling pointers to it. The last thing we want in that 249 * case is for it to be reused. */ 250 251 #define plant_SV(p) \ 252 STMT_START { \ 253 const U32 old_flags = SvFLAGS(p); \ 254 MEM_LOG_DEL_SV(p, __FILE__, __LINE__, FUNCTION__); \ 255 DEBUG_SV_SERIAL(p); \ 256 FREE_SV_DEBUG_FILE(p); \ 257 POISON_SV_HEAD(p); \ 258 SvFLAGS(p) = SVTYPEMASK; \ 259 if (!(old_flags & SVf_BREAK)) { \ 260 SvARENA_CHAIN_SET(p, PL_sv_root); \ 261 PL_sv_root = (p); \ 262 } \ 263 --PL_sv_count; \ 264 } STMT_END 265 266 267 /* make some more SVs by adding another arena */ 268 269 SV* 270 Perl_more_sv(pTHX) 271 { 272 SV* sv; 273 char *chunk; /* must use New here to match call to */ 274 Newx(chunk,PERL_ARENA_SIZE,char); /* Safefree() in sv_free_arenas() */ 275 sv_add_arena(chunk, PERL_ARENA_SIZE, 0); 276 uproot_SV(sv); 277 return sv; 278 } 279 280 /* del_SV(): return an empty SV head to the free list */ 281 282 #ifdef DEBUGGING 283 284 #define del_SV(p) \ 285 STMT_START { \ 286 if (DEBUG_D_TEST) \ 287 del_sv(p); \ 288 else \ 289 plant_SV(p); \ 290 } STMT_END 291 292 STATIC void 293 S_del_sv(pTHX_ SV *p) 294 { 295 PERL_ARGS_ASSERT_DEL_SV; 296 297 if (DEBUG_D_TEST) { 298 SV* sva; 299 bool ok = 0; 300 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) { 301 const SV * const sv = sva + 1; 302 const SV * const svend = &sva[SvREFCNT(sva)]; 303 if (p >= sv && p < svend) { 304 ok = 1; 305 break; 306 } 307 } 308 if (!ok) { 309 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), 310 "Attempt to free non-arena SV: 0x%" UVxf 311 pTHX__FORMAT, PTR2UV(p) pTHX__VALUE); 312 return; 313 } 314 } 315 plant_SV(p); 316 } 317 318 #else /* ! DEBUGGING */ 319 320 #define del_SV(p) plant_SV(p) 321 322 #endif /* DEBUGGING */ 323 324 325 /* 326 =for apidoc_section $SV 327 328 =for apidoc sv_add_arena 329 330 Given a chunk of memory, link it to the head of the list of arenas, 331 and split it into a list of free SVs. 332 333 =cut 334 */ 335 336 static void 337 S_sv_add_arena(pTHX_ char *const ptr, const U32 size, const U32 flags) 338 { 339 SV *const sva = MUTABLE_SV(ptr); 340 SV* sv; 341 SV* svend; 342 343 PERL_ARGS_ASSERT_SV_ADD_ARENA; 344 345 /* The first SV in an arena isn't an SV. */ 346 SvANY(sva) = (void *) PL_sv_arenaroot; /* ptr to next arena */ 347 SvREFCNT(sva) = size / sizeof(SV); /* number of SV slots */ 348 SvFLAGS(sva) = flags; /* FAKE if not to be freed */ 349 350 PL_sv_arenaroot = sva; 351 PL_sv_root = sva + 1; 352 353 svend = &sva[SvREFCNT(sva) - 1]; 354 sv = sva + 1; 355 while (sv < svend) { 356 SvARENA_CHAIN_SET(sv, (sv + 1)); 357 #ifdef DEBUGGING 358 SvREFCNT(sv) = 0; 359 #endif 360 /* Must always set typemask because it's always checked in on cleanup 361 when the arenas are walked looking for objects. */ 362 SvFLAGS(sv) = SVTYPEMASK; 363 sv++; 364 } 365 SvARENA_CHAIN_SET(sv, 0); 366 #ifdef DEBUGGING 367 SvREFCNT(sv) = 0; 368 #endif 369 SvFLAGS(sv) = SVTYPEMASK; 370 } 371 372 /* visit(): call the named function for each non-free SV in the arenas 373 * whose flags field matches the flags/mask args. */ 374 375 STATIC I32 376 S_visit(pTHX_ SVFUNC_t f, const U32 flags, const U32 mask) 377 { 378 SV* sva; 379 I32 visited = 0; 380 381 PERL_ARGS_ASSERT_VISIT; 382 383 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) { 384 const SV * const svend = &sva[SvREFCNT(sva)]; 385 SV* sv; 386 for (sv = sva + 1; sv < svend; ++sv) { 387 if (SvTYPE(sv) != (svtype)SVTYPEMASK 388 && (sv->sv_flags & mask) == flags 389 && SvREFCNT(sv)) 390 { 391 (*f)(aTHX_ sv); 392 ++visited; 393 } 394 } 395 } 396 return visited; 397 } 398 399 #ifdef DEBUGGING 400 401 /* called by sv_report_used() for each live SV */ 402 403 static void 404 do_report_used(pTHX_ SV *const sv) 405 { 406 if (SvTYPE(sv) != (svtype)SVTYPEMASK) { 407 PerlIO_printf(Perl_debug_log, "****\n"); 408 sv_dump(sv); 409 } 410 } 411 #endif 412 413 /* 414 =for apidoc sv_report_used 415 416 Dump the contents of all SVs not yet freed (debugging aid). 417 418 =cut 419 */ 420 421 void 422 Perl_sv_report_used(pTHX) 423 { 424 #ifdef DEBUGGING 425 visit(do_report_used, 0, 0); 426 #else 427 PERL_UNUSED_CONTEXT; 428 #endif 429 } 430 431 /* called by sv_clean_objs() for each live SV */ 432 433 static void 434 do_clean_objs(pTHX_ SV *const ref) 435 { 436 assert (SvROK(ref)); 437 { 438 SV * const target = SvRV(ref); 439 if (SvOBJECT(target)) { 440 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning object ref:\n "), sv_dump(ref))); 441 if (SvWEAKREF(ref)) { 442 sv_del_backref(target, ref); 443 SvWEAKREF_off(ref); 444 SvRV_set(ref, NULL); 445 } else { 446 SvROK_off(ref); 447 SvRV_set(ref, NULL); 448 SvREFCNT_dec_NN(target); 449 } 450 } 451 } 452 } 453 454 455 /* clear any slots in a GV which hold objects - except IO; 456 * called by sv_clean_objs() for each live GV */ 457 458 static void 459 do_clean_named_objs(pTHX_ SV *const sv) 460 { 461 SV *obj; 462 assert(SvTYPE(sv) == SVt_PVGV); 463 assert(isGV_with_GP(sv)); 464 if (!GvGP(sv)) 465 return; 466 467 /* freeing GP entries may indirectly free the current GV; 468 * hold onto it while we mess with the GP slots */ 469 SvREFCNT_inc(sv); 470 471 if ( ((obj = GvSV(sv) )) && SvOBJECT(obj)) { 472 DEBUG_D((PerlIO_printf(Perl_debug_log, 473 "Cleaning named glob SV object:\n "), sv_dump(obj))); 474 GvSV(sv) = NULL; 475 SvREFCNT_dec_NN(obj); 476 } 477 if ( ((obj = MUTABLE_SV(GvAV(sv)) )) && SvOBJECT(obj)) { 478 DEBUG_D((PerlIO_printf(Perl_debug_log, 479 "Cleaning named glob AV object:\n "), sv_dump(obj))); 480 GvAV(sv) = NULL; 481 SvREFCNT_dec_NN(obj); 482 } 483 if ( ((obj = MUTABLE_SV(GvHV(sv)) )) && SvOBJECT(obj)) { 484 DEBUG_D((PerlIO_printf(Perl_debug_log, 485 "Cleaning named glob HV object:\n "), sv_dump(obj))); 486 GvHV(sv) = NULL; 487 SvREFCNT_dec_NN(obj); 488 } 489 if ( ((obj = MUTABLE_SV(GvCV(sv)) )) && SvOBJECT(obj)) { 490 DEBUG_D((PerlIO_printf(Perl_debug_log, 491 "Cleaning named glob CV object:\n "), sv_dump(obj))); 492 GvCV_set(sv, NULL); 493 SvREFCNT_dec_NN(obj); 494 } 495 SvREFCNT_dec_NN(sv); /* undo the inc above */ 496 } 497 498 /* clear any IO slots in a GV which hold objects (except stderr, defout); 499 * called by sv_clean_objs() for each live GV */ 500 501 static void 502 do_clean_named_io_objs(pTHX_ SV *const sv) 503 { 504 SV *obj; 505 assert(SvTYPE(sv) == SVt_PVGV); 506 assert(isGV_with_GP(sv)); 507 if (!GvGP(sv) || sv == (SV*)PL_stderrgv || sv == (SV*)PL_defoutgv) 508 return; 509 510 SvREFCNT_inc(sv); 511 if ( ((obj = MUTABLE_SV(GvIO(sv)) )) && SvOBJECT(obj)) { 512 DEBUG_D((PerlIO_printf(Perl_debug_log, 513 "Cleaning named glob IO object:\n "), sv_dump(obj))); 514 GvIOp(sv) = NULL; 515 SvREFCNT_dec_NN(obj); 516 } 517 SvREFCNT_dec_NN(sv); /* undo the inc above */ 518 } 519 520 /* Void wrapper to pass to visit() */ 521 static void 522 do_curse(pTHX_ SV * const sv) { 523 if ((PL_stderrgv && GvGP(PL_stderrgv) && (SV*)GvIO(PL_stderrgv) == sv) 524 || (PL_defoutgv && GvGP(PL_defoutgv) && (SV*)GvIO(PL_defoutgv) == sv)) 525 return; 526 (void)curse(sv, 0); 527 } 528 529 /* 530 =for apidoc sv_clean_objs 531 532 Attempt to destroy all objects not yet freed. 533 534 =cut 535 */ 536 537 void 538 Perl_sv_clean_objs(pTHX) 539 { 540 GV *olddef, *olderr; 541 PL_in_clean_objs = TRUE; 542 visit(do_clean_objs, SVf_ROK, SVf_ROK); 543 /* Some barnacles may yet remain, clinging to typeglobs. 544 * Run the non-IO destructors first: they may want to output 545 * error messages, close files etc */ 546 visit(do_clean_named_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP); 547 visit(do_clean_named_io_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP); 548 /* And if there are some very tenacious barnacles clinging to arrays, 549 closures, or what have you.... */ 550 visit(do_curse, SVs_OBJECT, SVs_OBJECT); 551 olddef = PL_defoutgv; 552 PL_defoutgv = NULL; /* disable skip of PL_defoutgv */ 553 if (olddef && isGV_with_GP(olddef)) 554 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olddef)); 555 olderr = PL_stderrgv; 556 PL_stderrgv = NULL; /* disable skip of PL_stderrgv */ 557 if (olderr && isGV_with_GP(olderr)) 558 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olderr)); 559 SvREFCNT_dec(olddef); 560 PL_in_clean_objs = FALSE; 561 } 562 563 /* called by sv_clean_all() for each live SV */ 564 565 static void 566 do_clean_all(pTHX_ SV *const sv) 567 { 568 if (sv == (const SV *) PL_fdpid || sv == (const SV *)PL_strtab) { 569 /* don't clean pid table and strtab */ 570 return; 571 } 572 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning loops: SV at 0x%" UVxf "\n", PTR2UV(sv)) )); 573 SvFLAGS(sv) |= SVf_BREAK; 574 SvREFCNT_dec_NN(sv); 575 } 576 577 /* 578 =for apidoc sv_clean_all 579 580 Decrement the refcnt of each remaining SV, possibly triggering a 581 cleanup. This function may have to be called multiple times to free 582 SVs which are in complex self-referential hierarchies. 583 584 =cut 585 */ 586 587 I32 588 Perl_sv_clean_all(pTHX) 589 { 590 I32 cleaned; 591 PL_in_clean_all = TRUE; 592 cleaned = visit(do_clean_all, 0,0); 593 return cleaned; 594 } 595 596 /* 597 ARENASETS: a meta-arena implementation which separates arena-info 598 into struct arena_set, which contains an array of struct 599 arena_descs, each holding info for a single arena. By separating 600 the meta-info from the arena, we recover the 1st slot, formerly 601 borrowed for list management. The arena_set is about the size of an 602 arena, avoiding the needless malloc overhead of a naive linked-list. 603 604 The cost is 1 arena-set malloc per ~320 arena-mallocs, + the unused 605 memory in the last arena-set (1/2 on average). In trade, we get 606 back the 1st slot in each arena (ie 1.7% of a CV-arena, less for 607 smaller types). The recovery of the wasted space allows use of 608 small arenas for large, rare body types, by changing array* fields 609 in body_details_by_type[] below. 610 */ 611 struct arena_desc { 612 char *arena; /* the raw storage, allocated aligned */ 613 size_t size; /* its size ~4k typ */ 614 svtype utype; /* bodytype stored in arena */ 615 }; 616 617 struct arena_set; 618 619 /* Get the maximum number of elements in set[] such that struct arena_set 620 will fit within PERL_ARENA_SIZE, which is probably just under 4K, and 621 therefore likely to be 1 aligned memory page. */ 622 623 #define ARENAS_PER_SET ((PERL_ARENA_SIZE - sizeof(struct arena_set*) \ 624 - 2 * sizeof(int)) / sizeof (struct arena_desc)) 625 626 struct arena_set { 627 struct arena_set* next; 628 unsigned int set_size; /* ie ARENAS_PER_SET */ 629 unsigned int curr; /* index of next available arena-desc */ 630 struct arena_desc set[ARENAS_PER_SET]; 631 }; 632 633 /* 634 =for apidoc sv_free_arenas 635 636 Deallocate the memory used by all arenas. Note that all the individual SV 637 heads and bodies within the arenas must already have been freed. 638 639 =cut 640 641 */ 642 void 643 Perl_sv_free_arenas(pTHX) 644 { 645 SV* sva; 646 SV* svanext; 647 unsigned int i; 648 649 /* Free arenas here, but be careful about fake ones. (We assume 650 contiguity of the fake ones with the corresponding real ones.) */ 651 652 for (sva = PL_sv_arenaroot; sva; sva = svanext) { 653 svanext = MUTABLE_SV(SvANY(sva)); 654 while (svanext && SvFAKE(svanext)) 655 svanext = MUTABLE_SV(SvANY(svanext)); 656 657 if (!SvFAKE(sva)) 658 Safefree(sva); 659 } 660 661 { 662 struct arena_set *aroot = (struct arena_set*) PL_body_arenas; 663 664 while (aroot) { 665 struct arena_set *current = aroot; 666 i = aroot->curr; 667 while (i--) { 668 assert(aroot->set[i].arena); 669 Safefree(aroot->set[i].arena); 670 } 671 aroot = aroot->next; 672 Safefree(current); 673 } 674 } 675 PL_body_arenas = 0; 676 677 i = PERL_ARENA_ROOTS_SIZE; 678 while (i--) 679 PL_body_roots[i] = 0; 680 681 PL_sv_arenaroot = 0; 682 PL_sv_root = 0; 683 } 684 685 /* 686 Historically, here were mid-level routines that manage the 687 allocation of bodies out of the various arenas. Some of these 688 routines and related definitions remain here, but otherse were 689 moved into sv_inline.h to facilitate inlining of newSV_type(). 690 691 There are 4 kinds of arenas: 692 693 1. SV-head arenas, which are discussed and handled above 694 2. regular body arenas 695 3. arenas for reduced-size bodies 696 4. Hash-Entry arenas 697 698 Arena types 2 & 3 are chained by body-type off an array of 699 arena-root pointers, which is indexed by svtype. Some of the 700 larger/less used body types are malloced singly, since a large 701 unused block of them is wasteful. Also, several svtypes dont have 702 bodies; the data fits into the sv-head itself. The arena-root 703 pointer thus has a few unused root-pointers (which may be hijacked 704 later for arena type 4) 705 706 3 differs from 2 as an optimization; some body types have several 707 unused fields in the front of the structure (which are kept in-place 708 for consistency). These bodies can be allocated in smaller chunks, 709 because the leading fields arent accessed. Pointers to such bodies 710 are decremented to point at the unused 'ghost' memory, knowing that 711 the pointers are used with offsets to the real memory. 712 713 Allocation of SV-bodies is similar to SV-heads, differing as follows; 714 the allocation mechanism is used for many body types, so is somewhat 715 more complicated, it uses arena-sets, and has no need for still-live 716 SV detection. 717 718 At the outermost level, (new|del)_X*V macros return bodies of the 719 appropriate type. These macros call either (new|del)_body_type or 720 (new|del)_body_allocated macro pairs, depending on specifics of the 721 type. Most body types use the former pair, the latter pair is used to 722 allocate body types with "ghost fields". 723 724 "ghost fields" are fields that are unused in certain types, and 725 consequently don't need to actually exist. They are declared because 726 they're part of a "base type", which allows use of functions as 727 methods. The simplest examples are AVs and HVs, 2 aggregate types 728 which don't use the fields which support SCALAR semantics. 729 730 For these types, the arenas are carved up into appropriately sized 731 chunks, we thus avoid wasted memory for those unaccessed members. 732 When bodies are allocated, we adjust the pointer back in memory by the 733 size of the part not allocated, so it's as if we allocated the full 734 structure. (But things will all go boom if you write to the part that 735 is "not there", because you'll be overwriting the last members of the 736 preceding structure in memory.) 737 738 We calculate the correction using the STRUCT_OFFSET macro on the first 739 member present. If the allocated structure is smaller (no initial NV 740 actually allocated) then the net effect is to subtract the size of the NV 741 from the pointer, to return a new pointer as if an initial NV were actually 742 allocated. (We were using structures named *_allocated for this, but 743 this turned out to be a subtle bug, because a structure without an NV 744 could have a lower alignment constraint, but the compiler is allowed to 745 optimised accesses based on the alignment constraint of the actual pointer 746 to the full structure, for example, using a single 64 bit load instruction 747 because it "knows" that two adjacent 32 bit members will be 8-byte aligned.) 748 749 This is the same trick as was used for NV and IV bodies. Ironically it 750 doesn't need to be used for NV bodies any more, because NV is now at 751 the start of the structure. IV bodies, and also in some builds NV bodies, 752 don't need it either, because they are no longer allocated. 753 754 In turn, the new_body_* allocators call S_new_body(), which invokes 755 new_body_from_arena macro, which takes a lock, and takes a body off the 756 linked list at PL_body_roots[sv_type], calling Perl_more_bodies() if 757 necessary to refresh an empty list. Then the lock is released, and 758 the body is returned. 759 760 Perl_more_bodies allocates a new arena, and carves it up into an array of N 761 bodies, which it strings into a linked list. It looks up arena-size 762 and body-size from the body_details table described below, thus 763 supporting the multiple body-types. 764 765 If PURIFY is defined, or PERL_ARENA_SIZE=0, arenas are not used, and 766 the (new|del)_X*V macros are mapped directly to malloc/free. 767 768 For each sv-type, struct body_details bodies_by_type[] carries 769 parameters which control these aspects of SV handling: 770 771 Arena_size determines whether arenas are used for this body type, and if 772 so, how big they are. PURIFY or PERL_ARENA_SIZE=0 set this field to 773 zero, forcing individual mallocs and frees. 774 775 Body_size determines how big a body is, and therefore how many fit into 776 each arena. Offset carries the body-pointer adjustment needed for 777 "ghost fields", and is used in *_allocated macros. 778 779 But its main purpose is to parameterize info needed in 780 Perl_sv_upgrade(). The info here dramatically simplifies the function 781 vs the implementation in 5.8.8, making it table-driven. All fields 782 are used for this, except for arena_size. 783 784 For the sv-types that have no bodies, arenas are not used, so those 785 PL_body_roots[sv_type] are unused, and can be overloaded. In 786 something of a special case, SVt_NULL is borrowed for HE arenas; 787 PL_body_roots[HE_ARENA_ROOT_IX=SVt_NULL] is filled by S_more_he, but the 788 bodies_by_type[SVt_NULL] slot is not used, as the table is not 789 available in hv.c. Similarly SVt_IV is re-used for HVAUX_ARENA_ROOT_IX. 790 791 */ 792 793 /* return a thing to the free list */ 794 795 #define del_body(thing, root) \ 796 STMT_START { \ 797 void ** const thing_copy = (void **)thing; \ 798 *thing_copy = *root; \ 799 *root = (void*)thing_copy; \ 800 } STMT_END 801 802 803 void * 804 Perl_more_bodies (pTHX_ const svtype sv_type, const size_t body_size, 805 const size_t arena_size) 806 { 807 void ** const root = &PL_body_roots[sv_type]; 808 struct arena_desc *adesc; 809 struct arena_set *aroot = (struct arena_set *) PL_body_arenas; 810 unsigned int curr; 811 char *start; 812 const char *end; 813 const size_t good_arena_size = Perl_malloc_good_size(arena_size); 814 #if defined(DEBUGGING) 815 static bool done_sanity_check; 816 817 if (!done_sanity_check) { 818 unsigned int i = SVt_LAST; 819 820 done_sanity_check = TRUE; 821 822 while (i--) 823 assert (bodies_by_type[i].type == i); 824 } 825 #endif 826 827 assert(arena_size); 828 829 /* may need new arena-set to hold new arena */ 830 if (!aroot || aroot->curr >= aroot->set_size) { 831 struct arena_set *newroot; 832 Newxz(newroot, 1, struct arena_set); 833 newroot->set_size = ARENAS_PER_SET; 834 newroot->next = aroot; 835 aroot = newroot; 836 PL_body_arenas = (void *) newroot; 837 DEBUG_m(PerlIO_printf(Perl_debug_log, "new arenaset %p\n", (void*)aroot)); 838 } 839 840 /* ok, now have arena-set with at least 1 empty/available arena-desc */ 841 curr = aroot->curr++; 842 adesc = &(aroot->set[curr]); 843 assert(!adesc->arena); 844 845 Newx(adesc->arena, good_arena_size, char); 846 adesc->size = good_arena_size; 847 adesc->utype = sv_type; 848 DEBUG_m(PerlIO_printf(Perl_debug_log, "arena %d added: %p size %" UVuf "\n", 849 curr, (void*)adesc->arena, (UV)good_arena_size)); 850 851 start = (char *) adesc->arena; 852 853 /* Get the address of the byte after the end of the last body we can fit. 854 Remember, this is integer division: */ 855 end = start + good_arena_size / body_size * body_size; 856 857 /* computed count doesn't reflect the 1st slot reservation */ 858 #if defined(MYMALLOC) || defined(HAS_MALLOC_GOOD_SIZE) 859 DEBUG_m(PerlIO_printf(Perl_debug_log, 860 "arena %p end %p arena-size %d (from %d) type %d " 861 "size %d ct %d\n", 862 (void*)start, (void*)end, (int)good_arena_size, 863 (int)arena_size, sv_type, (int)body_size, 864 (int)good_arena_size / (int)body_size)); 865 #else 866 DEBUG_m(PerlIO_printf(Perl_debug_log, 867 "arena %p end %p arena-size %d type %d size %d ct %d\n", 868 (void*)start, (void*)end, 869 (int)arena_size, sv_type, (int)body_size, 870 (int)good_arena_size / (int)body_size)); 871 #endif 872 *root = (void *)start; 873 874 while (1) { 875 /* Where the next body would start: */ 876 char * const next = start + body_size; 877 878 if (next >= end) { 879 /* This is the last body: */ 880 assert(next == end); 881 882 *(void **)start = 0; 883 return *root; 884 } 885 886 *(void**) start = (void *)next; 887 start = next; 888 } 889 } 890 891 /* 892 =for apidoc sv_upgrade 893 894 Upgrade an SV to a more complex form. Generally adds a new body type to the 895 SV, then copies across as much information as possible from the old body. 896 It croaks if the SV is already in a more complex form than requested. You 897 generally want to use the C<SvUPGRADE> macro wrapper, which checks the type 898 before calling C<sv_upgrade>, and hence does not croak. See also 899 C<L</svtype>>. 900 901 =cut 902 */ 903 904 void 905 Perl_sv_upgrade(pTHX_ SV *const sv, svtype new_type) 906 { 907 void* old_body; 908 void* new_body; 909 const svtype old_type = SvTYPE(sv); 910 const struct body_details *new_type_details; 911 const struct body_details *old_type_details 912 = bodies_by_type + old_type; 913 SV *referent = NULL; 914 915 PERL_ARGS_ASSERT_SV_UPGRADE; 916 917 if (old_type == new_type) 918 return; 919 920 /* This clause was purposefully added ahead of the early return above to 921 the shared string hackery for (sort {$a <=> $b} keys %hash), with the 922 inference by Nick I-S that it would fix other troublesome cases. See 923 changes 7162, 7163 (f130fd4589cf5fbb24149cd4db4137c8326f49c1 and parent) 924 925 Given that shared hash key scalars are no longer PVIV, but PV, there is 926 no longer need to unshare so as to free up the IVX slot for its proper 927 purpose. So it's safe to move the early return earlier. */ 928 929 if (new_type > SVt_PVMG && SvIsCOW(sv)) { 930 sv_force_normal_flags(sv, 0); 931 } 932 933 old_body = SvANY(sv); 934 935 /* Copying structures onto other structures that have been neatly zeroed 936 has a subtle gotcha. Consider XPVMG 937 938 +------+------+------+------+------+-------+-------+ 939 | NV | CUR | LEN | IV | MAGIC | STASH | 940 +------+------+------+------+------+-------+-------+ 941 0 4 8 12 16 20 24 28 942 943 where NVs are aligned to 8 bytes, so that sizeof that structure is 944 actually 32 bytes long, with 4 bytes of padding at the end: 945 946 +------+------+------+------+------+-------+-------+------+ 947 | NV | CUR | LEN | IV | MAGIC | STASH | ??? | 948 +------+------+------+------+------+-------+-------+------+ 949 0 4 8 12 16 20 24 28 32 950 951 so what happens if you allocate memory for this structure: 952 953 +------+------+------+------+------+-------+-------+------+------+... 954 | NV | CUR | LEN | IV | MAGIC | STASH | GP | NAME | 955 +------+------+------+------+------+-------+-------+------+------+... 956 0 4 8 12 16 20 24 28 32 36 957 958 zero it, then copy sizeof(XPVMG) bytes on top of it? Not quite what you 959 expect, because you copy the area marked ??? onto GP. Now, ??? may have 960 started out as zero once, but it's quite possible that it isn't. So now, 961 rather than a nicely zeroed GP, you have it pointing somewhere random. 962 Bugs ensue. 963 964 (In fact, GP ends up pointing at a previous GP structure, because the 965 principle cause of the padding in XPVMG getting garbage is a copy of 966 sizeof(XPVMG) bytes from a XPVGV structure in sv_unglob. Right now 967 this happens to be moot because XPVGV has been re-ordered, with GP 968 no longer after STASH) 969 970 So we are careful and work out the size of used parts of all the 971 structures. */ 972 973 switch (old_type) { 974 case SVt_NULL: 975 break; 976 case SVt_IV: 977 if (SvROK(sv)) { 978 referent = SvRV(sv); 979 old_type_details = &fake_rv; 980 if (new_type == SVt_NV) 981 new_type = SVt_PVNV; 982 } else { 983 if (new_type < SVt_PVIV) { 984 new_type = (new_type == SVt_NV) 985 ? SVt_PVNV : SVt_PVIV; 986 } 987 } 988 break; 989 case SVt_NV: 990 if (new_type < SVt_PVNV) { 991 new_type = SVt_PVNV; 992 } 993 break; 994 case SVt_PV: 995 assert(new_type > SVt_PV); 996 STATIC_ASSERT_STMT(SVt_IV < SVt_PV); 997 STATIC_ASSERT_STMT(SVt_NV < SVt_PV); 998 break; 999 case SVt_PVIV: 1000 break; 1001 case SVt_PVNV: 1002 break; 1003 case SVt_PVMG: 1004 /* Because the XPVMG of PL_mess_sv isn't allocated from the arena, 1005 there's no way that it can be safely upgraded, because perl.c 1006 expects to Safefree(SvANY(PL_mess_sv)) */ 1007 assert(sv != PL_mess_sv); 1008 break; 1009 default: 1010 if (UNLIKELY(old_type_details->cant_upgrade)) 1011 Perl_croak(aTHX_ "Can't upgrade %s (%" UVuf ") to %" UVuf, 1012 sv_reftype(sv, 0), (UV) old_type, (UV) new_type); 1013 } 1014 1015 if (UNLIKELY(old_type > new_type)) 1016 Perl_croak(aTHX_ "sv_upgrade from type %d down to type %d", 1017 (int)old_type, (int)new_type); 1018 1019 new_type_details = bodies_by_type + new_type; 1020 1021 SvFLAGS(sv) &= ~SVTYPEMASK; 1022 SvFLAGS(sv) |= new_type; 1023 1024 /* This can't happen, as SVt_NULL is <= all values of new_type, so one of 1025 the return statements above will have triggered. */ 1026 assert (new_type != SVt_NULL); 1027 switch (new_type) { 1028 case SVt_IV: 1029 assert(old_type == SVt_NULL); 1030 SET_SVANY_FOR_BODYLESS_IV(sv); 1031 SvIV_set(sv, 0); 1032 return; 1033 case SVt_NV: 1034 assert(old_type == SVt_NULL); 1035 #if NVSIZE <= IVSIZE 1036 SET_SVANY_FOR_BODYLESS_NV(sv); 1037 #else 1038 SvANY(sv) = new_XNV(); 1039 #endif 1040 SvNV_set(sv, 0); 1041 return; 1042 case SVt_PVHV: 1043 case SVt_PVAV: 1044 assert(new_type_details->body_size); 1045 1046 #ifndef PURIFY 1047 assert(new_type_details->arena); 1048 assert(new_type_details->arena_size); 1049 /* This points to the start of the allocated area. */ 1050 new_body = S_new_body(aTHX_ new_type); 1051 /* xpvav and xpvhv have no offset, so no need to adjust new_body */ 1052 assert(!(new_type_details->offset)); 1053 #else 1054 /* We always allocated the full length item with PURIFY. To do this 1055 we fake things so that arena is false for all 16 types.. */ 1056 new_body = new_NOARENAZ(new_type_details); 1057 #endif 1058 SvANY(sv) = new_body; 1059 if (new_type == SVt_PVAV) { 1060 *((XPVAV*) SvANY(sv)) = (XPVAV) { 1061 .xmg_stash = NULL, .xmg_u = {.xmg_magic = NULL}, 1062 .xav_fill = -1, .xav_max = -1, .xav_alloc = 0 1063 }; 1064 1065 AvREAL_only(sv); 1066 } else { 1067 *((XPVHV*) SvANY(sv)) = (XPVHV) { 1068 .xmg_stash = NULL, .xmg_u = {.xmg_magic = NULL}, 1069 .xhv_keys = 0, 1070 /* start with PERL_HASH_DEFAULT_HvMAX+1 buckets: */ 1071 .xhv_max = PERL_HASH_DEFAULT_HvMAX 1072 }; 1073 1074 assert(!SvOK(sv)); 1075 SvOK_off(sv); 1076 #ifndef NODEFAULT_SHAREKEYS 1077 HvSHAREKEYS_on(sv); /* key-sharing on by default */ 1078 #endif 1079 } 1080 1081 /* SVt_NULL isn't the only thing upgraded to AV or HV. 1082 The target created by newSVrv also is, and it can have magic. 1083 However, it never has SvPVX set. 1084 */ 1085 if (old_type == SVt_IV) { 1086 assert(!SvROK(sv)); 1087 } else if (old_type >= SVt_PV) { 1088 assert(SvPVX_const(sv) == 0); 1089 } 1090 1091 if (old_type >= SVt_PVMG) { 1092 SvMAGIC_set(sv, ((XPVMG*)old_body)->xmg_u.xmg_magic); 1093 SvSTASH_set(sv, ((XPVMG*)old_body)->xmg_stash); 1094 } else { 1095 sv->sv_u.svu_array = NULL; /* or svu_hash */ 1096 } 1097 break; 1098 1099 case SVt_PVIV: 1100 /* XXX Is this still needed? Was it ever needed? Surely as there is 1101 no route from NV to PVIV, NOK can never be true */ 1102 assert(!SvNOKp(sv)); 1103 assert(!SvNOK(sv)); 1104 /* FALLTHROUGH */ 1105 case SVt_PVIO: 1106 case SVt_PVFM: 1107 case SVt_PVGV: 1108 case SVt_PVCV: 1109 case SVt_PVLV: 1110 case SVt_INVLIST: 1111 case SVt_REGEXP: 1112 case SVt_PVMG: 1113 case SVt_PVNV: 1114 case SVt_PV: 1115 1116 assert(new_type_details->body_size); 1117 /* We always allocated the full length item with PURIFY. To do this 1118 we fake things so that arena is false for all 16 types.. */ 1119 #ifndef PURIFY 1120 if(new_type_details->arena) { 1121 /* This points to the start of the allocated area. */ 1122 new_body = S_new_body(aTHX_ new_type); 1123 Zero(new_body, new_type_details->body_size, char); 1124 new_body = ((char *)new_body) - new_type_details->offset; 1125 } else 1126 #endif 1127 { 1128 new_body = new_NOARENAZ(new_type_details); 1129 } 1130 SvANY(sv) = new_body; 1131 1132 if (old_type_details->copy) { 1133 /* There is now the potential for an upgrade from something without 1134 an offset (PVNV or PVMG) to something with one (PVCV, PVFM) */ 1135 int offset = old_type_details->offset; 1136 int length = old_type_details->copy; 1137 1138 if (new_type_details->offset > old_type_details->offset) { 1139 const int difference 1140 = new_type_details->offset - old_type_details->offset; 1141 offset += difference; 1142 length -= difference; 1143 } 1144 assert (length >= 0); 1145 1146 Copy((char *)old_body + offset, (char *)new_body + offset, length, 1147 char); 1148 } 1149 1150 #ifndef NV_ZERO_IS_ALLBITS_ZERO 1151 /* If NV 0.0 is stores as all bits 0 then Zero() already creates a 1152 * correct 0.0 for us. Otherwise, if the old body didn't have an 1153 * NV slot, but the new one does, then we need to initialise the 1154 * freshly created NV slot with whatever the correct bit pattern is 1155 * for 0.0 */ 1156 if (old_type_details->zero_nv && !new_type_details->zero_nv 1157 && !isGV_with_GP(sv)) 1158 SvNV_set(sv, 0); 1159 #endif 1160 1161 if (UNLIKELY(new_type == SVt_PVIO)) { 1162 IO * const io = MUTABLE_IO(sv); 1163 GV *iogv = gv_fetchpvs("IO::File::", GV_ADD, SVt_PVHV); 1164 1165 SvOBJECT_on(io); 1166 /* Clear the stashcache because a new IO could overrule a package 1167 name */ 1168 DEBUG_o(Perl_deb(aTHX_ "sv_upgrade clearing PL_stashcache\n")); 1169 hv_clear(PL_stashcache); 1170 1171 SvSTASH_set(io, MUTABLE_HV(SvREFCNT_inc(GvHV(iogv)))); 1172 IoPAGE_LEN(sv) = 60; 1173 } 1174 if (old_type < SVt_PV) { 1175 /* referent will be NULL unless the old type was SVt_IV emulating 1176 SVt_RV */ 1177 sv->sv_u.svu_rv = referent; 1178 } 1179 break; 1180 default: 1181 Perl_croak(aTHX_ "panic: sv_upgrade to unknown type %lu", 1182 (unsigned long)new_type); 1183 } 1184 1185 /* if this is zero, this is a body-less SVt_NULL, SVt_IV/SVt_RV, 1186 and sometimes SVt_NV */ 1187 if (old_type_details->body_size) { 1188 #ifdef PURIFY 1189 safefree(old_body); 1190 #else 1191 /* Note that there is an assumption that all bodies of types that 1192 can be upgraded came from arenas. Only the more complex non- 1193 upgradable types are allowed to be directly malloc()ed. */ 1194 assert(old_type_details->arena); 1195 del_body((void*)((char*)old_body + old_type_details->offset), 1196 &PL_body_roots[old_type]); 1197 #endif 1198 } 1199 } 1200 1201 struct xpvhv_aux* 1202 Perl_hv_auxalloc(pTHX_ HV *hv) { 1203 const struct body_details *old_type_details = bodies_by_type + SVt_PVHV; 1204 void *old_body; 1205 void *new_body; 1206 1207 PERL_ARGS_ASSERT_HV_AUXALLOC; 1208 assert(SvTYPE(hv) == SVt_PVHV); 1209 assert(!SvOOK(hv)); 1210 1211 #ifdef PURIFY 1212 new_body = new_NOARENAZ(&fake_hv_with_aux); 1213 #else 1214 new_body_from_arena(new_body, HVAUX_ARENA_ROOT_IX, fake_hv_with_aux); 1215 #endif 1216 1217 old_body = SvANY(hv); 1218 1219 Copy((char *)old_body + old_type_details->offset, 1220 (char *)new_body + fake_hv_with_aux.offset, 1221 old_type_details->copy, 1222 char); 1223 1224 #ifdef PURIFY 1225 safefree(old_body); 1226 #else 1227 assert(old_type_details->arena); 1228 del_body((void*)((char*)old_body + old_type_details->offset), 1229 &PL_body_roots[SVt_PVHV]); 1230 #endif 1231 1232 SvANY(hv) = (XPVHV *) new_body; 1233 SvOOK_on(hv); 1234 return HvAUX(hv); 1235 } 1236 1237 /* 1238 =for apidoc sv_backoff 1239 1240 Remove any string offset. You should normally use the C<SvOOK_off> macro 1241 wrapper instead. 1242 1243 =cut 1244 */ 1245 1246 /* prior to 5.000 stable, this function returned the new OOK-less SvFLAGS 1247 prior to 5.23.4 this function always returned 0 1248 */ 1249 1250 void 1251 Perl_sv_backoff(SV *const sv) 1252 { 1253 STRLEN delta; 1254 const char * const s = SvPVX_const(sv); 1255 1256 PERL_ARGS_ASSERT_SV_BACKOFF; 1257 1258 assert(SvOOK(sv)); 1259 assert(SvTYPE(sv) != SVt_PVHV); 1260 assert(SvTYPE(sv) != SVt_PVAV); 1261 1262 SvOOK_offset(sv, delta); 1263 1264 SvLEN_set(sv, SvLEN(sv) + delta); 1265 SvPV_set(sv, SvPVX(sv) - delta); 1266 SvFLAGS(sv) &= ~SVf_OOK; 1267 Move(s, SvPVX(sv), SvCUR(sv)+1, char); 1268 return; 1269 } 1270 1271 1272 /* forward declaration */ 1273 static void S_sv_uncow(pTHX_ SV * const sv, const U32 flags); 1274 1275 1276 /* 1277 =for apidoc sv_grow 1278 1279 Expands the character buffer in the SV. If necessary, uses C<sv_unref> and 1280 upgrades the SV to C<SVt_PV>. Returns a pointer to the character buffer. 1281 Use the C<SvGROW> wrapper instead. 1282 1283 =cut 1284 */ 1285 1286 1287 char * 1288 Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen) 1289 { 1290 char *s; 1291 1292 PERL_ARGS_ASSERT_SV_GROW; 1293 1294 if (SvROK(sv)) 1295 sv_unref(sv); 1296 if (SvTYPE(sv) < SVt_PV) { 1297 sv_upgrade(sv, SVt_PV); 1298 s = SvPVX_mutable(sv); 1299 } 1300 else if (SvOOK(sv)) { /* pv is offset? */ 1301 sv_backoff(sv); 1302 s = SvPVX_mutable(sv); 1303 if (newlen > SvLEN(sv)) 1304 newlen += 10 * (newlen - SvCUR(sv)); /* avoid copy each time */ 1305 } 1306 else 1307 { 1308 if (SvIsCOW(sv)) S_sv_uncow(aTHX_ sv, 0); 1309 s = SvPVX_mutable(sv); 1310 } 1311 1312 #ifdef PERL_COPY_ON_WRITE 1313 /* the new COW scheme uses SvPVX(sv)[SvLEN(sv)-1] (if spare) 1314 * to store the COW count. So in general, allocate one more byte than 1315 * asked for, to make it likely this byte is always spare: and thus 1316 * make more strings COW-able. 1317 * 1318 * Only increment if the allocation isn't MEM_SIZE_MAX, 1319 * otherwise it will wrap to 0. 1320 */ 1321 if ( newlen != MEM_SIZE_MAX ) 1322 newlen++; 1323 #endif 1324 1325 #if defined(PERL_USE_MALLOC_SIZE) && defined(Perl_safesysmalloc_size) 1326 #define PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC 1327 #endif 1328 1329 if (newlen > SvLEN(sv)) { /* need more room? */ 1330 STRLEN minlen = SvCUR(sv); 1331 minlen += (minlen >> PERL_STRLEN_EXPAND_SHIFT) + 10; 1332 if (newlen < minlen) 1333 newlen = minlen; 1334 #ifndef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC 1335 1336 /* Don't round up on the first allocation, as odds are pretty good that 1337 * the initial request is accurate as to what is really needed */ 1338 if (SvLEN(sv)) { 1339 STRLEN rounded = PERL_STRLEN_ROUNDUP(newlen); 1340 if (rounded > newlen) 1341 newlen = rounded; 1342 } 1343 #endif 1344 if (SvLEN(sv) && s) { 1345 s = (char*)saferealloc(s, newlen); 1346 } 1347 else { 1348 s = (char*)safemalloc(newlen); 1349 if (SvPVX_const(sv) && SvCUR(sv)) { 1350 Move(SvPVX_const(sv), s, SvCUR(sv), char); 1351 } 1352 } 1353 SvPV_set(sv, s); 1354 #ifdef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC 1355 /* Do this here, do it once, do it right, and then we will never get 1356 called back into sv_grow() unless there really is some growing 1357 needed. */ 1358 SvLEN_set(sv, Perl_safesysmalloc_size(s)); 1359 #else 1360 SvLEN_set(sv, newlen); 1361 #endif 1362 } 1363 return s; 1364 } 1365 1366 /* 1367 =for apidoc sv_grow_fresh 1368 1369 A cut-down version of sv_grow intended only for when sv is a freshly-minted 1370 SVt_PV, SVt_PVIV, SVt_PVNV, or SVt_PVMG. i.e. sv has the default flags, has 1371 never been any other type, and does not have an existing string. Basically, 1372 just assigns a char buffer and returns a pointer to it. 1373 1374 =cut 1375 */ 1376 1377 1378 char * 1379 Perl_sv_grow_fresh(pTHX_ SV *const sv, STRLEN newlen) 1380 { 1381 char *s; 1382 1383 PERL_ARGS_ASSERT_SV_GROW_FRESH; 1384 1385 assert(SvTYPE(sv) >= SVt_PV && SvTYPE(sv) <= SVt_PVMG); 1386 assert(!SvROK(sv)); 1387 assert(!SvOOK(sv)); 1388 assert(!SvIsCOW(sv)); 1389 assert(!SvLEN(sv)); 1390 assert(!SvCUR(sv)); 1391 1392 #ifdef PERL_COPY_ON_WRITE 1393 /* the new COW scheme uses SvPVX(sv)[SvLEN(sv)-1] (if spare) 1394 * to store the COW count. So in general, allocate one more byte than 1395 * asked for, to make it likely this byte is always spare: and thus 1396 * make more strings COW-able. 1397 * 1398 * Only increment if the allocation isn't MEM_SIZE_MAX, 1399 * otherwise it will wrap to 0. 1400 */ 1401 if ( newlen != MEM_SIZE_MAX ) 1402 newlen++; 1403 #endif 1404 1405 /* 10 is a longstanding, hardcoded minimum length in sv_grow. */ 1406 /* Just doing the same here for consistency. */ 1407 if (newlen < 10) 1408 newlen = 10; 1409 1410 s = (char*)safemalloc(newlen); 1411 SvPV_set(sv, s); 1412 1413 /* No PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC here, since many strings */ 1414 /* will never be grown once set. Let the real sv_grow worry about that. */ 1415 SvLEN_set(sv, newlen); 1416 return s; 1417 } 1418 1419 /* 1420 =for apidoc sv_setiv 1421 =for apidoc_item sv_setiv_mg 1422 1423 These copy an integer into the given SV, upgrading first if necessary. 1424 1425 They differ only in that C<sv_setiv_mg> handles 'set' magic; C<sv_setiv> does 1426 not. 1427 1428 =cut 1429 */ 1430 1431 void 1432 Perl_sv_setiv(pTHX_ SV *const sv, const IV i) 1433 { 1434 PERL_ARGS_ASSERT_SV_SETIV; 1435 1436 SV_CHECK_THINKFIRST_COW_DROP(sv); 1437 switch (SvTYPE(sv)) { 1438 case SVt_NULL: 1439 case SVt_NV: 1440 sv_upgrade(sv, SVt_IV); 1441 break; 1442 case SVt_PV: 1443 sv_upgrade(sv, SVt_PVIV); 1444 break; 1445 1446 case SVt_PVGV: 1447 if (!isGV_with_GP(sv)) 1448 break; 1449 /* FALLTHROUGH */ 1450 case SVt_PVAV: 1451 case SVt_PVHV: 1452 case SVt_PVCV: 1453 case SVt_PVFM: 1454 case SVt_PVIO: 1455 /* diag_listed_as: Can't coerce %s to %s in %s */ 1456 Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0), 1457 OP_DESC(PL_op)); 1458 NOT_REACHED; /* NOTREACHED */ 1459 break; 1460 default: NOOP; 1461 } 1462 (void)SvIOK_only(sv); /* validate number */ 1463 SvIV_set(sv, i); 1464 SvTAINT(sv); 1465 } 1466 1467 void 1468 Perl_sv_setiv_mg(pTHX_ SV *const sv, const IV i) 1469 { 1470 PERL_ARGS_ASSERT_SV_SETIV_MG; 1471 1472 sv_setiv(sv,i); 1473 SvSETMAGIC(sv); 1474 } 1475 1476 /* 1477 =for apidoc sv_setuv 1478 =for apidoc_item sv_setuv_mg 1479 1480 These copy an unsigned integer into the given SV, upgrading first if necessary. 1481 1482 1483 They differ only in that C<sv_setuv_mg> handles 'set' magic; C<sv_setuv> does 1484 not. 1485 1486 =cut 1487 */ 1488 1489 void 1490 Perl_sv_setuv(pTHX_ SV *const sv, const UV u) 1491 { 1492 PERL_ARGS_ASSERT_SV_SETUV; 1493 1494 /* With the if statement to ensure that integers are stored as IVs whenever 1495 possible: 1496 u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865 1497 1498 without 1499 u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865 1500 1501 If you wish to remove the following if statement, so that this routine 1502 (and its callers) always return UVs, please benchmark to see what the 1503 effect is. Modern CPUs may be different. Or may not :-) 1504 */ 1505 if (u <= (UV)IV_MAX) { 1506 sv_setiv(sv, (IV)u); 1507 return; 1508 } 1509 sv_setiv(sv, 0); 1510 SvIsUV_on(sv); 1511 SvUV_set(sv, u); 1512 } 1513 1514 void 1515 Perl_sv_setuv_mg(pTHX_ SV *const sv, const UV u) 1516 { 1517 PERL_ARGS_ASSERT_SV_SETUV_MG; 1518 1519 sv_setuv(sv,u); 1520 SvSETMAGIC(sv); 1521 } 1522 1523 /* 1524 =for apidoc sv_setnv 1525 =for apidoc_item sv_setnv_mg 1526 1527 These copy a double into the given SV, upgrading first if necessary. 1528 1529 They differ only in that C<sv_setnv_mg> handles 'set' magic; C<sv_setnv> does 1530 not. 1531 1532 =cut 1533 */ 1534 1535 void 1536 Perl_sv_setnv(pTHX_ SV *const sv, const NV num) 1537 { 1538 PERL_ARGS_ASSERT_SV_SETNV; 1539 1540 SV_CHECK_THINKFIRST_COW_DROP(sv); 1541 switch (SvTYPE(sv)) { 1542 case SVt_NULL: 1543 case SVt_IV: 1544 sv_upgrade(sv, SVt_NV); 1545 break; 1546 case SVt_PV: 1547 case SVt_PVIV: 1548 sv_upgrade(sv, SVt_PVNV); 1549 break; 1550 1551 case SVt_PVGV: 1552 if (!isGV_with_GP(sv)) 1553 break; 1554 /* FALLTHROUGH */ 1555 case SVt_PVAV: 1556 case SVt_PVHV: 1557 case SVt_PVCV: 1558 case SVt_PVFM: 1559 case SVt_PVIO: 1560 /* diag_listed_as: Can't coerce %s to %s in %s */ 1561 Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0), 1562 OP_DESC(PL_op)); 1563 NOT_REACHED; /* NOTREACHED */ 1564 break; 1565 default: NOOP; 1566 } 1567 SvNV_set(sv, num); 1568 (void)SvNOK_only(sv); /* validate number */ 1569 SvTAINT(sv); 1570 } 1571 1572 void 1573 Perl_sv_setnv_mg(pTHX_ SV *const sv, const NV num) 1574 { 1575 PERL_ARGS_ASSERT_SV_SETNV_MG; 1576 1577 sv_setnv(sv,num); 1578 SvSETMAGIC(sv); 1579 } 1580 1581 /* 1582 =for apidoc sv_setrv_noinc 1583 =for apidoc_item sv_setrv_noinc_mg 1584 1585 Copies an SV pointer into the given SV as an SV reference, upgrading it if 1586 necessary. After this, C<SvRV(sv)> is equal to I<ref>. This does not adjust 1587 the reference count of I<ref>. The reference I<ref> must not be NULL. 1588 1589 C<sv_setrv_noinc_mg> will invoke 'set' magic on the SV; C<sv_setrv_noinc> will 1590 not. 1591 1592 =cut 1593 */ 1594 1595 void 1596 Perl_sv_setrv_noinc(pTHX_ SV *const sv, SV *const ref) 1597 { 1598 PERL_ARGS_ASSERT_SV_SETRV_NOINC; 1599 1600 SV_CHECK_THINKFIRST_COW_DROP(sv); 1601 prepare_SV_for_RV(sv); 1602 1603 SvOK_off(sv); 1604 SvRV_set(sv, ref); 1605 SvROK_on(sv); 1606 } 1607 1608 void 1609 Perl_sv_setrv_noinc_mg(pTHX_ SV *const sv, SV *const ref) 1610 { 1611 PERL_ARGS_ASSERT_SV_SETRV_NOINC_MG; 1612 1613 sv_setrv_noinc(sv, ref); 1614 SvSETMAGIC(sv); 1615 } 1616 1617 /* 1618 =for apidoc sv_setrv_inc 1619 =for apidoc_item sv_setrv_inc_mg 1620 1621 As C<sv_setrv_noinc> but increments the reference count of I<ref>. 1622 1623 C<sv_setrv_inc_mg> will invoke 'set' magic on the SV; C<sv_setrv_inc> will 1624 not. 1625 1626 =cut 1627 */ 1628 1629 void 1630 Perl_sv_setrv_inc(pTHX_ SV *const sv, SV *const ref) 1631 { 1632 PERL_ARGS_ASSERT_SV_SETRV_INC; 1633 1634 sv_setrv_noinc(sv, SvREFCNT_inc_simple_NN(ref)); 1635 } 1636 1637 void 1638 Perl_sv_setrv_inc_mg(pTHX_ SV *const sv, SV *const ref) 1639 { 1640 PERL_ARGS_ASSERT_SV_SETRV_INC_MG; 1641 1642 sv_setrv_noinc(sv, SvREFCNT_inc_simple_NN(ref)); 1643 SvSETMAGIC(sv); 1644 } 1645 1646 /* Return a cleaned-up, printable version of sv, for non-numeric, or 1647 * not incrementable warning display. 1648 * Originally part of S_not_a_number(). 1649 * The return value may be != tmpbuf. 1650 */ 1651 1652 STATIC const char * 1653 S_sv_display(pTHX_ SV *const sv, char *tmpbuf, STRLEN tmpbuf_size) { 1654 const char *pv; 1655 1656 PERL_ARGS_ASSERT_SV_DISPLAY; 1657 1658 if (DO_UTF8(sv)) { 1659 SV *dsv = newSVpvs_flags("", SVs_TEMP); 1660 pv = sv_uni_display(dsv, sv, 32, UNI_DISPLAY_ISPRINT); 1661 } else { 1662 char *d = tmpbuf; 1663 const char * const limit = tmpbuf + tmpbuf_size - 8; 1664 /* each *s can expand to 4 chars + "...\0", 1665 i.e. need room for 8 chars */ 1666 1667 const char *s = SvPVX_const(sv); 1668 const char * const end = s + SvCUR(sv); 1669 for ( ; s < end && d < limit; s++ ) { 1670 int ch = (U8) *s; 1671 if (! isASCII(ch) && !isPRINT_LC(ch)) { 1672 *d++ = 'M'; 1673 *d++ = '-'; 1674 1675 /* Map to ASCII "equivalent" of Latin1 */ 1676 ch = LATIN1_TO_NATIVE(NATIVE_TO_LATIN1(ch) & 127); 1677 } 1678 if (ch == '\n') { 1679 *d++ = '\\'; 1680 *d++ = 'n'; 1681 } 1682 else if (ch == '\r') { 1683 *d++ = '\\'; 1684 *d++ = 'r'; 1685 } 1686 else if (ch == '\f') { 1687 *d++ = '\\'; 1688 *d++ = 'f'; 1689 } 1690 else if (ch == '\\') { 1691 *d++ = '\\'; 1692 *d++ = '\\'; 1693 } 1694 else if (ch == '\0') { 1695 *d++ = '\\'; 1696 *d++ = '0'; 1697 } 1698 else if (isPRINT_LC(ch)) 1699 *d++ = ch; 1700 else { 1701 *d++ = '^'; 1702 *d++ = toCTRL(ch); 1703 } 1704 } 1705 if (s < end) { 1706 *d++ = '.'; 1707 *d++ = '.'; 1708 *d++ = '.'; 1709 } 1710 *d = '\0'; 1711 pv = tmpbuf; 1712 } 1713 1714 return pv; 1715 } 1716 1717 /* Print an "isn't numeric" warning, using a cleaned-up, 1718 * printable version of the offending string 1719 */ 1720 1721 STATIC void 1722 S_not_a_number(pTHX_ SV *const sv) 1723 { 1724 char tmpbuf[64]; 1725 const char *pv; 1726 1727 PERL_ARGS_ASSERT_NOT_A_NUMBER; 1728 1729 pv = sv_display(sv, tmpbuf, sizeof(tmpbuf)); 1730 1731 if (PL_op) 1732 Perl_warner(aTHX_ packWARN(WARN_NUMERIC), 1733 /* diag_listed_as: Argument "%s" isn't numeric%s */ 1734 "Argument \"%s\" isn't numeric in %s", pv, 1735 OP_DESC(PL_op)); 1736 else 1737 Perl_warner(aTHX_ packWARN(WARN_NUMERIC), 1738 /* diag_listed_as: Argument "%s" isn't numeric%s */ 1739 "Argument \"%s\" isn't numeric", pv); 1740 } 1741 1742 STATIC void 1743 S_not_incrementable(pTHX_ SV *const sv) { 1744 char tmpbuf[64]; 1745 const char *pv; 1746 1747 PERL_ARGS_ASSERT_NOT_INCREMENTABLE; 1748 1749 pv = sv_display(sv, tmpbuf, sizeof(tmpbuf)); 1750 1751 Perl_warner(aTHX_ packWARN(WARN_NUMERIC), 1752 "Argument \"%s\" treated as 0 in increment (++)", pv); 1753 } 1754 1755 /* 1756 =for apidoc looks_like_number 1757 1758 Test if the content of an SV looks like a number (or is a number). 1759 C<Inf> and C<Infinity> are treated as numbers (so will not issue a 1760 non-numeric warning), even if your C<atof()> doesn't grok them. Get-magic is 1761 ignored. 1762 1763 =cut 1764 */ 1765 1766 I32 1767 Perl_looks_like_number(pTHX_ SV *const sv) 1768 { 1769 const char *sbegin; 1770 STRLEN len; 1771 int numtype; 1772 1773 PERL_ARGS_ASSERT_LOOKS_LIKE_NUMBER; 1774 1775 if (SvPOK(sv) || SvPOKp(sv)) { 1776 sbegin = SvPV_nomg_const(sv, len); 1777 } 1778 else 1779 return SvFLAGS(sv) & (SVf_NOK|SVp_NOK|SVf_IOK|SVp_IOK); 1780 numtype = grok_number(sbegin, len, NULL); 1781 return ((numtype & IS_NUMBER_TRAILING)) ? 0 : numtype; 1782 } 1783 1784 STATIC bool 1785 S_glob_2number(pTHX_ GV * const gv) 1786 { 1787 PERL_ARGS_ASSERT_GLOB_2NUMBER; 1788 1789 /* We know that all GVs stringify to something that is not-a-number, 1790 so no need to test that. */ 1791 if (ckWARN(WARN_NUMERIC)) 1792 { 1793 SV *const buffer = sv_newmortal(); 1794 gv_efullname3(buffer, gv, "*"); 1795 not_a_number(buffer); 1796 } 1797 /* We just want something true to return, so that S_sv_2iuv_common 1798 can tail call us and return true. */ 1799 return TRUE; 1800 } 1801 1802 /* Actually, ISO C leaves conversion of UV to IV undefined, but 1803 until proven guilty, assume that things are not that bad... */ 1804 1805 /* 1806 NV_PRESERVES_UV: 1807 1808 As 64 bit platforms often have an NV that doesn't preserve all bits of 1809 an IV (an assumption perl has been based on to date) it becomes necessary 1810 to remove the assumption that the NV always carries enough precision to 1811 recreate the IV whenever needed, and that the NV is the canonical form. 1812 Instead, IV/UV and NV need to be given equal rights. So as to not lose 1813 precision as a side effect of conversion (which would lead to insanity 1814 and the dragon(s) in t/op/numconvert.t getting very angry) the intent is 1815 1) to distinguish between IV/UV/NV slots that have a valid conversion cached 1816 where precision was lost, and IV/UV/NV slots that have a valid conversion 1817 which has lost no precision 1818 2) to ensure that if a numeric conversion to one form is requested that 1819 would lose precision, the precise conversion (or differently 1820 imprecise conversion) is also performed and cached, to prevent 1821 requests for different numeric formats on the same SV causing 1822 lossy conversion chains. (lossless conversion chains are perfectly 1823 acceptable (still)) 1824 1825 1826 flags are used: 1827 SvIOKp is true if the IV slot contains a valid value 1828 SvIOK is true only if the IV value is accurate (UV if SvIOK_UV true) 1829 SvNOKp is true if the NV slot contains a valid value 1830 SvNOK is true only if the NV value is accurate 1831 1832 so 1833 while converting from PV to NV, check to see if converting that NV to an 1834 IV(or UV) would lose accuracy over a direct conversion from PV to 1835 IV(or UV). If it would, cache both conversions, return NV, but mark 1836 SV as IOK NOKp (ie not NOK). 1837 1838 While converting from PV to IV, check to see if converting that IV to an 1839 NV would lose accuracy over a direct conversion from PV to NV. If it 1840 would, cache both conversions, flag similarly. 1841 1842 Before, the SV value "3.2" could become NV=3.2 IV=3 NOK, IOK quite 1843 correctly because if IV & NV were set NV *always* overruled. 1844 Now, "3.2" will become NV=3.2 IV=3 NOK, IOKp, because the flag's meaning 1845 changes - now IV and NV together means that the two are interchangeable: 1846 SvIVX == (IV) SvNVX && SvNVX == (NV) SvIVX; 1847 1848 The benefit of this is that operations such as pp_add know that if 1849 SvIOK is true for both left and right operands, then integer addition 1850 can be used instead of floating point (for cases where the result won't 1851 overflow). Before, floating point was always used, which could lead to 1852 loss of precision compared with integer addition. 1853 1854 * making IV and NV equal status should make maths accurate on 64 bit 1855 platforms 1856 * may speed up maths somewhat if pp_add and friends start to use 1857 integers when possible instead of fp. (Hopefully the overhead in 1858 looking for SvIOK and checking for overflow will not outweigh the 1859 fp to integer speedup) 1860 * will slow down integer operations (callers of SvIV) on "inaccurate" 1861 values, as the change from SvIOK to SvIOKp will cause a call into 1862 sv_2iv each time rather than a macro access direct to the IV slot 1863 * should speed up number->string conversion on integers as IV is 1864 favoured when IV and NV are equally accurate 1865 1866 #################################################################### 1867 You had better be using SvIOK_notUV if you want an IV for arithmetic: 1868 SvIOK is true if (IV or UV), so you might be getting (IV)SvUV. 1869 On the other hand, SvUOK is true iff UV. 1870 #################################################################### 1871 1872 Your mileage will vary depending your CPU's relative fp to integer 1873 performance ratio. 1874 */ 1875 1876 #ifndef NV_PRESERVES_UV 1877 # define IS_NUMBER_UNDERFLOW_IV 1 1878 # define IS_NUMBER_UNDERFLOW_UV 2 1879 # define IS_NUMBER_IV_AND_UV 2 1880 # define IS_NUMBER_OVERFLOW_IV 4 1881 # define IS_NUMBER_OVERFLOW_UV 5 1882 1883 /* sv_2iuv_non_preserve(): private routine for use by sv_2iv() and sv_2uv() */ 1884 1885 /* For sv_2nv these three cases are "SvNOK and don't bother casting" */ 1886 STATIC int 1887 S_sv_2iuv_non_preserve(pTHX_ SV *const sv 1888 # ifdef DEBUGGING 1889 , I32 numtype 1890 # endif 1891 ) 1892 { 1893 PERL_ARGS_ASSERT_SV_2IUV_NON_PRESERVE; 1894 PERL_UNUSED_CONTEXT; 1895 1896 DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%" UVxf " NV=%" NVgf " inttype=%" UVXf "\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv), (UV)numtype)); 1897 if (SvNVX(sv) < (NV)IV_MIN) { 1898 (void)SvIOKp_on(sv); 1899 (void)SvNOK_on(sv); 1900 SvIV_set(sv, IV_MIN); 1901 return IS_NUMBER_UNDERFLOW_IV; 1902 } 1903 if (SvNVX(sv) > (NV)UV_MAX) { 1904 (void)SvIOKp_on(sv); 1905 (void)SvNOK_on(sv); 1906 SvIsUV_on(sv); 1907 SvUV_set(sv, UV_MAX); 1908 return IS_NUMBER_OVERFLOW_UV; 1909 } 1910 (void)SvIOKp_on(sv); 1911 (void)SvNOK_on(sv); 1912 /* Can't use strtol etc to convert this string. (See truth table in 1913 sv_2iv */ 1914 if (SvNVX(sv) < IV_MAX_P1) { 1915 SvIV_set(sv, I_V(SvNVX(sv))); 1916 if ((NV)(SvIVX(sv)) == SvNVX(sv)) { 1917 SvIOK_on(sv); /* Integer is precise. NOK, IOK */ 1918 } else { 1919 /* Integer is imprecise. NOK, IOKp */ 1920 } 1921 return SvNVX(sv) < 0 ? IS_NUMBER_UNDERFLOW_UV : IS_NUMBER_IV_AND_UV; 1922 } 1923 SvIsUV_on(sv); 1924 SvUV_set(sv, U_V(SvNVX(sv))); 1925 if ((NV)(SvUVX(sv)) == SvNVX(sv)) { 1926 if (SvUVX(sv) == UV_MAX) { 1927 /* As we know that NVs don't preserve UVs, UV_MAX cannot 1928 possibly be preserved by NV. Hence, it must be overflow. 1929 NOK, IOKp */ 1930 return IS_NUMBER_OVERFLOW_UV; 1931 } 1932 SvIOK_on(sv); /* Integer is precise. NOK, UOK */ 1933 } else { 1934 /* Integer is imprecise. NOK, IOKp */ 1935 } 1936 return IS_NUMBER_OVERFLOW_IV; 1937 } 1938 #endif /* !NV_PRESERVES_UV*/ 1939 1940 /* If numtype is infnan, set the NV of the sv accordingly. 1941 * If numtype is anything else, try setting the NV using Atof(PV). */ 1942 static void 1943 S_sv_setnv(pTHX_ SV* sv, int numtype) 1944 { 1945 bool pok = cBOOL(SvPOK(sv)); 1946 bool nok = FALSE; 1947 #ifdef NV_INF 1948 if ((numtype & IS_NUMBER_INFINITY)) { 1949 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -NV_INF : NV_INF); 1950 nok = TRUE; 1951 } else 1952 #endif 1953 #ifdef NV_NAN 1954 if ((numtype & IS_NUMBER_NAN)) { 1955 SvNV_set(sv, NV_NAN); 1956 nok = TRUE; 1957 } else 1958 #endif 1959 if (pok) { 1960 SvNV_set(sv, Atof(SvPVX_const(sv))); 1961 /* Purposefully no true nok here, since we don't want to blow 1962 * away the possible IOK/UV of an existing sv. */ 1963 } 1964 if (nok) { 1965 SvNOK_only(sv); /* No IV or UV please, this is pure infnan. */ 1966 if (pok) 1967 SvPOK_on(sv); /* PV is okay, though. */ 1968 } 1969 } 1970 1971 STATIC bool 1972 S_sv_2iuv_common(pTHX_ SV *const sv) 1973 { 1974 PERL_ARGS_ASSERT_SV_2IUV_COMMON; 1975 1976 if (SvNOKp(sv)) { 1977 /* erm. not sure. *should* never get NOKp (without NOK) from sv_2nv 1978 * without also getting a cached IV/UV from it at the same time 1979 * (ie PV->NV conversion should detect loss of accuracy and cache 1980 * IV or UV at same time to avoid this. */ 1981 /* IV-over-UV optimisation - choose to cache IV if possible */ 1982 1983 if (SvTYPE(sv) == SVt_NV) 1984 sv_upgrade(sv, SVt_PVNV); 1985 1986 got_nv: 1987 (void)SvIOKp_on(sv); /* Must do this first, to clear any SvOOK */ 1988 /* < not <= as for NV doesn't preserve UV, ((NV)IV_MAX+1) will almost 1989 certainly cast into the IV range at IV_MAX, whereas the correct 1990 answer is the UV IV_MAX +1. Hence < ensures that dodgy boundary 1991 cases go to UV */ 1992 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) 1993 if (Perl_isnan(SvNVX(sv))) { 1994 SvUV_set(sv, 0); 1995 SvIsUV_on(sv); 1996 return FALSE; 1997 } 1998 #endif 1999 if (SvNVX(sv) < (NV)IV_MAX + 0.5) { 2000 SvIV_set(sv, I_V(SvNVX(sv))); 2001 if (SvNVX(sv) == (NV) SvIVX(sv) 2002 #ifndef NV_PRESERVES_UV 2003 && SvIVX(sv) != IV_MIN /* avoid negating IV_MIN below */ 2004 && (((UV)1 << NV_PRESERVES_UV_BITS) > 2005 (UV)(SvIVX(sv) > 0 ? SvIVX(sv) : -SvIVX(sv))) 2006 /* Don't flag it as "accurately an integer" if the number 2007 came from a (by definition imprecise) NV operation, and 2008 we're outside the range of NV integer precision */ 2009 #endif 2010 ) { 2011 if (SvNOK(sv)) 2012 SvIOK_on(sv); /* Can this go wrong with rounding? NWC */ 2013 else { 2014 /* scalar has trailing garbage, eg "42a" */ 2015 } 2016 DEBUG_c(PerlIO_printf(Perl_debug_log, 2017 "0x%" UVxf " iv(%" NVgf " => %" IVdf ") (precise)\n", 2018 PTR2UV(sv), 2019 SvNVX(sv), 2020 SvIVX(sv))); 2021 2022 } else { 2023 /* IV not precise. No need to convert from PV, as NV 2024 conversion would already have cached IV if it detected 2025 that PV->IV would be better than PV->NV->IV 2026 flags already correct - don't set public IOK. */ 2027 DEBUG_c(PerlIO_printf(Perl_debug_log, 2028 "0x%" UVxf " iv(%" NVgf " => %" IVdf ") (imprecise)\n", 2029 PTR2UV(sv), 2030 SvNVX(sv), 2031 SvIVX(sv))); 2032 } 2033 /* Can the above go wrong if SvIVX == IV_MIN and SvNVX < IV_MIN, 2034 but the cast (NV)IV_MIN rounds to a the value less (more 2035 negative) than IV_MIN which happens to be equal to SvNVX ?? 2036 Analogous to 0xFFFFFFFFFFFFFFFF rounding up to NV (2**64) and 2037 NV rounding back to 0xFFFFFFFFFFFFFFFF, so UVX == UV(NVX) and 2038 (NV)UVX == NVX are both true, but the values differ. :-( 2039 Hopefully for 2s complement IV_MIN is something like 2040 0x8000000000000000 which will be exact. NWC */ 2041 } 2042 else { 2043 SvUV_set(sv, U_V(SvNVX(sv))); 2044 if ( 2045 (SvNVX(sv) == (NV) SvUVX(sv)) 2046 #ifndef NV_PRESERVES_UV 2047 /* Make sure it's not 0xFFFFFFFFFFFFFFFF */ 2048 /*&& (SvUVX(sv) != UV_MAX) irrelevant with code below */ 2049 && (((UV)1 << NV_PRESERVES_UV_BITS) > SvUVX(sv)) 2050 /* Don't flag it as "accurately an integer" if the number 2051 came from a (by definition imprecise) NV operation, and 2052 we're outside the range of NV integer precision */ 2053 #endif 2054 && SvNOK(sv) 2055 ) 2056 SvIOK_on(sv); 2057 SvIsUV_on(sv); 2058 DEBUG_c(PerlIO_printf(Perl_debug_log, 2059 "0x%" UVxf " 2iv(%" UVuf " => %" IVdf ") (as unsigned)\n", 2060 PTR2UV(sv), 2061 SvUVX(sv), 2062 SvUVX(sv))); 2063 } 2064 } 2065 else if (SvPOKp(sv)) { 2066 UV value; 2067 int numtype; 2068 const char *s = SvPVX_const(sv); 2069 const STRLEN cur = SvCUR(sv); 2070 2071 /* short-cut for a single digit string like "1" */ 2072 2073 if (cur == 1) { 2074 char c = *s; 2075 if (isDIGIT(c)) { 2076 if (SvTYPE(sv) < SVt_PVIV) 2077 sv_upgrade(sv, SVt_PVIV); 2078 (void)SvIOK_on(sv); 2079 SvIV_set(sv, (IV)(c - '0')); 2080 return FALSE; 2081 } 2082 } 2083 2084 numtype = grok_number(s, cur, &value); 2085 /* We want to avoid a possible problem when we cache an IV/ a UV which 2086 may be later translated to an NV, and the resulting NV is not 2087 the same as the direct translation of the initial string 2088 (eg 123.456 can shortcut to the IV 123 with atol(), but we must 2089 be careful to ensure that the value with the .456 is around if the 2090 NV value is requested in the future). 2091 2092 This means that if we cache such an IV/a UV, we need to cache the 2093 NV as well. Moreover, we trade speed for space, and do not 2094 cache the NV if we are sure it's not needed. 2095 */ 2096 2097 /* SVt_PVNV is one higher than SVt_PVIV, hence this order */ 2098 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) 2099 == IS_NUMBER_IN_UV) { 2100 /* It's definitely an integer, only upgrade to PVIV */ 2101 if (SvTYPE(sv) < SVt_PVIV) 2102 sv_upgrade(sv, SVt_PVIV); 2103 (void)SvIOK_on(sv); 2104 } else if (SvTYPE(sv) < SVt_PVNV) 2105 sv_upgrade(sv, SVt_PVNV); 2106 2107 if ((numtype & (IS_NUMBER_INFINITY | IS_NUMBER_NAN))) { 2108 if (ckWARN(WARN_NUMERIC) && ((numtype & IS_NUMBER_TRAILING))) 2109 not_a_number(sv); 2110 S_sv_setnv(aTHX_ sv, numtype); 2111 goto got_nv; /* Fill IV/UV slot and set IOKp */ 2112 } 2113 2114 /* If NVs preserve UVs then we only use the UV value if we know that 2115 we aren't going to call atof() below. If NVs don't preserve UVs 2116 then the value returned may have more precision than atof() will 2117 return, even though value isn't perfectly accurate. */ 2118 if ((numtype & (IS_NUMBER_IN_UV 2119 #ifdef NV_PRESERVES_UV 2120 | IS_NUMBER_NOT_INT 2121 #endif 2122 )) == IS_NUMBER_IN_UV) { 2123 /* This won't turn off the public IOK flag if it was set above */ 2124 (void)SvIOKp_on(sv); 2125 2126 if (!(numtype & IS_NUMBER_NEG)) { 2127 /* positive */; 2128 if (value <= (UV)IV_MAX) { 2129 SvIV_set(sv, (IV)value); 2130 } else { 2131 /* it didn't overflow, and it was positive. */ 2132 SvUV_set(sv, value); 2133 SvIsUV_on(sv); 2134 } 2135 } else { 2136 /* 2s complement assumption */ 2137 if (value <= (UV)IV_MIN) { 2138 SvIV_set(sv, value == (UV)IV_MIN 2139 ? IV_MIN : -(IV)value); 2140 } else { 2141 /* Too negative for an IV. This is a double upgrade, but 2142 I'm assuming it will be rare. */ 2143 if (SvTYPE(sv) < SVt_PVNV) 2144 sv_upgrade(sv, SVt_PVNV); 2145 SvNOK_on(sv); 2146 SvIOK_off(sv); 2147 SvIOKp_on(sv); 2148 SvNV_set(sv, -(NV)value); 2149 SvIV_set(sv, IV_MIN); 2150 } 2151 } 2152 } 2153 /* For !NV_PRESERVES_UV and IS_NUMBER_IN_UV and IS_NUMBER_NOT_INT we 2154 will be in the previous block to set the IV slot, and the next 2155 block to set the NV slot. So no else here. */ 2156 2157 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) 2158 != IS_NUMBER_IN_UV) { 2159 /* It wasn't an (integer that doesn't overflow the UV). */ 2160 S_sv_setnv(aTHX_ sv, numtype); 2161 2162 if (! numtype && ckWARN(WARN_NUMERIC)) 2163 not_a_number(sv); 2164 2165 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2iv(%" NVgf ")\n", 2166 PTR2UV(sv), SvNVX(sv))); 2167 2168 #ifdef NV_PRESERVES_UV 2169 SvNOKp_on(sv); 2170 if (numtype) 2171 SvNOK_on(sv); 2172 goto got_nv; /* Fill IV/UV slot and set IOKp, maybe IOK */ 2173 #else /* NV_PRESERVES_UV */ 2174 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) 2175 == (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) { 2176 /* The IV/UV slot will have been set from value returned by 2177 grok_number above. The NV slot has just been set using 2178 Atof. */ 2179 SvNOK_on(sv); 2180 assert (SvIOKp(sv)); 2181 } else { 2182 if (((UV)1 << NV_PRESERVES_UV_BITS) > 2183 U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) { 2184 /* Small enough to preserve all bits. */ 2185 (void)SvIOKp_on(sv); 2186 SvNOK_on(sv); 2187 SvIV_set(sv, I_V(SvNVX(sv))); 2188 if ((NV)(SvIVX(sv)) == SvNVX(sv)) 2189 SvIOK_on(sv); 2190 /* Assumption: first non-preserved integer is < IV_MAX, 2191 this NV is in the preserved range, therefore: */ 2192 if (!(U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv)) 2193 < (UV)IV_MAX)) { 2194 Perl_croak(aTHX_ "sv_2iv assumed (U_V(fabs((double)SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%" NVgf " U_V is 0x%" UVxf ", IV_MAX is 0x%" UVxf "\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX); 2195 } 2196 } else { 2197 /* IN_UV NOT_INT 2198 0 0 already failed to read UV. 2199 0 1 already failed to read UV. 2200 1 0 you won't get here in this case. IV/UV 2201 slot set, public IOK, Atof() unneeded. 2202 1 1 already read UV. 2203 so there's no point in sv_2iuv_non_preserve() attempting 2204 to use atol, strtol, strtoul etc. */ 2205 # ifdef DEBUGGING 2206 sv_2iuv_non_preserve (sv, numtype); 2207 # else 2208 sv_2iuv_non_preserve (sv); 2209 # endif 2210 } 2211 } 2212 /* It might be more code efficient to go through the entire logic above 2213 and conditionally set with SvIOKp_on() rather than SvIOK(), but it 2214 gets complex and potentially buggy, so more programmer efficient 2215 to do it this way, by turning off the public flags: */ 2216 if (!numtype) 2217 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK); 2218 #endif /* NV_PRESERVES_UV */ 2219 } 2220 } 2221 else { 2222 if (isGV_with_GP(sv)) 2223 return glob_2number(MUTABLE_GV(sv)); 2224 2225 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED)) 2226 report_uninit(sv); 2227 if (SvTYPE(sv) < SVt_IV) 2228 /* Typically the caller expects that sv_any is not NULL now. */ 2229 sv_upgrade(sv, SVt_IV); 2230 /* Return 0 from the caller. */ 2231 return TRUE; 2232 } 2233 return FALSE; 2234 } 2235 2236 /* 2237 =for apidoc sv_2iv_flags 2238 2239 Return the integer value of an SV, doing any necessary string 2240 conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first. 2241 Normally used via the C<SvIV(sv)> and C<SvIVx(sv)> macros. 2242 2243 =cut 2244 */ 2245 2246 IV 2247 Perl_sv_2iv_flags(pTHX_ SV *const sv, const I32 flags) 2248 { 2249 PERL_ARGS_ASSERT_SV_2IV_FLAGS; 2250 2251 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV 2252 && SvTYPE(sv) != SVt_PVFM); 2253 2254 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC)) 2255 mg_get(sv); 2256 2257 if (SvROK(sv)) { 2258 if (SvAMAGIC(sv)) { 2259 SV * tmpstr; 2260 if (flags & SV_SKIP_OVERLOAD) 2261 return 0; 2262 tmpstr = AMG_CALLunary(sv, numer_amg); 2263 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) { 2264 return SvIV(tmpstr); 2265 } 2266 } 2267 return PTR2IV(SvRV(sv)); 2268 } 2269 2270 if (SvVALID(sv) || isREGEXP(sv)) { 2271 /* FBMs use the space for SvIVX and SvNVX for other purposes, so 2272 must not let them cache IVs. 2273 In practice they are extremely unlikely to actually get anywhere 2274 accessible by user Perl code - the only way that I'm aware of is when 2275 a constant subroutine which is used as the second argument to index. 2276 2277 Regexps have no SvIVX and SvNVX fields. 2278 */ 2279 assert(SvPOKp(sv)); 2280 { 2281 UV value; 2282 const char * const ptr = 2283 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv); 2284 const int numtype 2285 = grok_number(ptr, SvCUR(sv), &value); 2286 2287 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) 2288 == IS_NUMBER_IN_UV) { 2289 /* It's definitely an integer */ 2290 if (numtype & IS_NUMBER_NEG) { 2291 if (value < (UV)IV_MIN) 2292 return -(IV)value; 2293 } else { 2294 if (value < (UV)IV_MAX) 2295 return (IV)value; 2296 } 2297 } 2298 2299 /* Quite wrong but no good choices. */ 2300 if ((numtype & IS_NUMBER_INFINITY)) { 2301 return (numtype & IS_NUMBER_NEG) ? IV_MIN : IV_MAX; 2302 } else if ((numtype & IS_NUMBER_NAN)) { 2303 return 0; /* So wrong. */ 2304 } 2305 2306 if (!numtype) { 2307 if (ckWARN(WARN_NUMERIC)) 2308 not_a_number(sv); 2309 } 2310 return I_V(Atof(ptr)); 2311 } 2312 } 2313 2314 if (SvTHINKFIRST(sv)) { 2315 if (SvREADONLY(sv) && !SvOK(sv)) { 2316 if (ckWARN(WARN_UNINITIALIZED)) 2317 report_uninit(sv); 2318 return 0; 2319 } 2320 } 2321 2322 if (!SvIOKp(sv)) { 2323 if (S_sv_2iuv_common(aTHX_ sv)) 2324 return 0; 2325 } 2326 2327 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2iv(%" IVdf ")\n", 2328 PTR2UV(sv),SvIVX(sv))); 2329 return SvIsUV(sv) ? (IV)SvUVX(sv) : SvIVX(sv); 2330 } 2331 2332 /* 2333 =for apidoc sv_2uv_flags 2334 2335 Return the unsigned integer value of an SV, doing any necessary string 2336 conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first. 2337 Normally used via the C<SvUV(sv)> and C<SvUVx(sv)> macros. 2338 2339 =for apidoc Amnh||SV_GMAGIC 2340 2341 =cut 2342 */ 2343 2344 UV 2345 Perl_sv_2uv_flags(pTHX_ SV *const sv, const I32 flags) 2346 { 2347 PERL_ARGS_ASSERT_SV_2UV_FLAGS; 2348 2349 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC)) 2350 mg_get(sv); 2351 2352 if (SvROK(sv)) { 2353 if (SvAMAGIC(sv)) { 2354 SV *tmpstr; 2355 if (flags & SV_SKIP_OVERLOAD) 2356 return 0; 2357 tmpstr = AMG_CALLunary(sv, numer_amg); 2358 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) { 2359 return SvUV(tmpstr); 2360 } 2361 } 2362 return PTR2UV(SvRV(sv)); 2363 } 2364 2365 if (SvVALID(sv) || isREGEXP(sv)) { 2366 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use 2367 the same flag bit as SVf_IVisUV, so must not let them cache IVs. 2368 Regexps have no SvIVX and SvNVX fields. */ 2369 assert(SvPOKp(sv)); 2370 { 2371 UV value; 2372 const char * const ptr = 2373 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv); 2374 const int numtype 2375 = grok_number(ptr, SvCUR(sv), &value); 2376 2377 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) 2378 == IS_NUMBER_IN_UV) { 2379 /* It's definitely an integer */ 2380 if (!(numtype & IS_NUMBER_NEG)) 2381 return value; 2382 } 2383 2384 /* Quite wrong but no good choices. */ 2385 if ((numtype & IS_NUMBER_INFINITY)) { 2386 return UV_MAX; /* So wrong. */ 2387 } else if ((numtype & IS_NUMBER_NAN)) { 2388 return 0; /* So wrong. */ 2389 } 2390 2391 if (!numtype) { 2392 if (ckWARN(WARN_NUMERIC)) 2393 not_a_number(sv); 2394 } 2395 return U_V(Atof(ptr)); 2396 } 2397 } 2398 2399 if (SvTHINKFIRST(sv)) { 2400 if (SvREADONLY(sv) && !SvOK(sv)) { 2401 if (ckWARN(WARN_UNINITIALIZED)) 2402 report_uninit(sv); 2403 return 0; 2404 } 2405 } 2406 2407 if (!SvIOKp(sv)) { 2408 if (S_sv_2iuv_common(aTHX_ sv)) 2409 return 0; 2410 } 2411 2412 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2uv(%" UVuf ")\n", 2413 PTR2UV(sv),SvUVX(sv))); 2414 return SvIsUV(sv) ? SvUVX(sv) : (UV)SvIVX(sv); 2415 } 2416 2417 /* 2418 =for apidoc sv_2nv_flags 2419 2420 Return the num value of an SV, doing any necessary string or integer 2421 conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first. 2422 Normally used via the C<SvNV(sv)> and C<SvNVx(sv)> macros. 2423 2424 =cut 2425 */ 2426 2427 NV 2428 Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags) 2429 { 2430 PERL_ARGS_ASSERT_SV_2NV_FLAGS; 2431 2432 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV 2433 && SvTYPE(sv) != SVt_PVFM); 2434 if (SvGMAGICAL(sv) || SvVALID(sv) || isREGEXP(sv)) { 2435 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use 2436 the same flag bit as SVf_IVisUV, so must not let them cache NVs. 2437 Regexps have no SvIVX and SvNVX fields. */ 2438 const char *ptr; 2439 if (flags & SV_GMAGIC) 2440 mg_get(sv); 2441 if (SvNOKp(sv)) 2442 return SvNVX(sv); 2443 if (SvPOKp(sv) && !SvIOKp(sv)) { 2444 ptr = SvPVX_const(sv); 2445 if (!SvIOKp(sv) && ckWARN(WARN_NUMERIC) && 2446 !grok_number(ptr, SvCUR(sv), NULL)) 2447 not_a_number(sv); 2448 return Atof(ptr); 2449 } 2450 if (SvIOKp(sv)) { 2451 if (SvIsUV(sv)) 2452 return (NV)SvUVX(sv); 2453 else 2454 return (NV)SvIVX(sv); 2455 } 2456 if (SvROK(sv)) { 2457 goto return_rok; 2458 } 2459 assert(SvTYPE(sv) >= SVt_PVMG); 2460 /* This falls through to the report_uninit near the end of the 2461 function. */ 2462 } else if (SvTHINKFIRST(sv)) { 2463 if (SvROK(sv)) { 2464 return_rok: 2465 if (SvAMAGIC(sv)) { 2466 SV *tmpstr; 2467 if (flags & SV_SKIP_OVERLOAD) 2468 return 0; 2469 tmpstr = AMG_CALLunary(sv, numer_amg); 2470 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) { 2471 return SvNV(tmpstr); 2472 } 2473 } 2474 return PTR2NV(SvRV(sv)); 2475 } 2476 if (SvREADONLY(sv) && !SvOK(sv)) { 2477 if (ckWARN(WARN_UNINITIALIZED)) 2478 report_uninit(sv); 2479 return 0.0; 2480 } 2481 } 2482 if (SvTYPE(sv) < SVt_NV) { 2483 /* The logic to use SVt_PVNV if necessary is in sv_upgrade. */ 2484 sv_upgrade(sv, SVt_NV); 2485 CLANG_DIAG_IGNORE_STMT(-Wthread-safety); 2486 DEBUG_c({ 2487 DECLARATION_FOR_LC_NUMERIC_MANIPULATION; 2488 STORE_LC_NUMERIC_SET_STANDARD(); 2489 PerlIO_printf(Perl_debug_log, 2490 "0x%" UVxf " num(%" NVgf ")\n", 2491 PTR2UV(sv), SvNVX(sv)); 2492 RESTORE_LC_NUMERIC(); 2493 }); 2494 CLANG_DIAG_RESTORE_STMT; 2495 2496 } 2497 else if (SvTYPE(sv) < SVt_PVNV) 2498 sv_upgrade(sv, SVt_PVNV); 2499 if (SvNOKp(sv)) { 2500 return SvNVX(sv); 2501 } 2502 if (SvIOKp(sv)) { 2503 SvNV_set(sv, SvIsUV(sv) ? (NV)SvUVX(sv) : (NV)SvIVX(sv)); 2504 #ifdef NV_PRESERVES_UV 2505 if (SvIOK(sv)) 2506 SvNOK_on(sv); 2507 else 2508 SvNOKp_on(sv); 2509 #else 2510 /* Only set the public NV OK flag if this NV preserves the IV */ 2511 /* Check it's not 0xFFFFFFFFFFFFFFFF */ 2512 if (SvIOK(sv) && 2513 SvIsUV(sv) ? ((SvUVX(sv) != UV_MAX)&&(SvUVX(sv) == U_V(SvNVX(sv)))) 2514 : (SvIVX(sv) == I_V(SvNVX(sv)))) 2515 SvNOK_on(sv); 2516 else 2517 SvNOKp_on(sv); 2518 #endif 2519 } 2520 else if (SvPOKp(sv)) { 2521 UV value; 2522 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value); 2523 if (!SvIOKp(sv) && !numtype && ckWARN(WARN_NUMERIC)) 2524 not_a_number(sv); 2525 #ifdef NV_PRESERVES_UV 2526 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) 2527 == IS_NUMBER_IN_UV) { 2528 /* It's definitely an integer */ 2529 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -(NV)value : (NV)value); 2530 } else { 2531 S_sv_setnv(aTHX_ sv, numtype); 2532 } 2533 if (numtype) 2534 SvNOK_on(sv); 2535 else 2536 SvNOKp_on(sv); 2537 #else 2538 SvNV_set(sv, Atof(SvPVX_const(sv))); 2539 /* Only set the public NV OK flag if this NV preserves the value in 2540 the PV at least as well as an IV/UV would. 2541 Not sure how to do this 100% reliably. */ 2542 /* if that shift count is out of range then Configure's test is 2543 wonky. We shouldn't be in here with NV_PRESERVES_UV_BITS == 2544 UV_BITS */ 2545 if (((UV)1 << NV_PRESERVES_UV_BITS) > 2546 U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) { 2547 SvNOK_on(sv); /* Definitely small enough to preserve all bits */ 2548 } else if (!(numtype & IS_NUMBER_IN_UV)) { 2549 /* Can't use strtol etc to convert this string, so don't try. 2550 sv_2iv and sv_2uv will use the NV to convert, not the PV. */ 2551 SvNOK_on(sv); 2552 } else { 2553 /* value has been set. It may not be precise. */ 2554 if ((numtype & IS_NUMBER_NEG) && (value >= (UV)IV_MIN)) { 2555 /* 2s complement assumption for (UV)IV_MIN */ 2556 SvNOK_on(sv); /* Integer is too negative. */ 2557 } else { 2558 SvNOKp_on(sv); 2559 SvIOKp_on(sv); 2560 2561 if (numtype & IS_NUMBER_NEG) { 2562 /* -IV_MIN is undefined, but we should never reach 2563 * this point with both IS_NUMBER_NEG and value == 2564 * (UV)IV_MIN */ 2565 assert(value != (UV)IV_MIN); 2566 SvIV_set(sv, -(IV)value); 2567 } else if (value <= (UV)IV_MAX) { 2568 SvIV_set(sv, (IV)value); 2569 } else { 2570 SvUV_set(sv, value); 2571 SvIsUV_on(sv); 2572 } 2573 2574 if (numtype & IS_NUMBER_NOT_INT) { 2575 /* I believe that even if the original PV had decimals, 2576 they are lost beyond the limit of the FP precision. 2577 However, neither is canonical, so both only get p 2578 flags. NWC, 2000/11/25 */ 2579 /* Both already have p flags, so do nothing */ 2580 } else { 2581 const NV nv = SvNVX(sv); 2582 /* XXX should this spot have NAN_COMPARE_BROKEN, too? */ 2583 if (SvNVX(sv) < (NV)IV_MAX + 0.5) { 2584 if (SvIVX(sv) == I_V(nv)) { 2585 SvNOK_on(sv); 2586 } else { 2587 /* It had no "." so it must be integer. */ 2588 } 2589 SvIOK_on(sv); 2590 } else { 2591 /* between IV_MAX and NV(UV_MAX). 2592 Could be slightly > UV_MAX */ 2593 2594 if (numtype & IS_NUMBER_NOT_INT) { 2595 /* UV and NV both imprecise. */ 2596 } else { 2597 const UV nv_as_uv = U_V(nv); 2598 2599 if (value == nv_as_uv && SvUVX(sv) != UV_MAX) { 2600 SvNOK_on(sv); 2601 } 2602 SvIOK_on(sv); 2603 } 2604 } 2605 } 2606 } 2607 } 2608 /* It might be more code efficient to go through the entire logic above 2609 and conditionally set with SvNOKp_on() rather than SvNOK(), but it 2610 gets complex and potentially buggy, so more programmer efficient 2611 to do it this way, by turning off the public flags: */ 2612 if (!numtype) 2613 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK); 2614 #endif /* NV_PRESERVES_UV */ 2615 } 2616 else { 2617 if (isGV_with_GP(sv)) { 2618 glob_2number(MUTABLE_GV(sv)); 2619 return 0.0; 2620 } 2621 2622 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED)) 2623 report_uninit(sv); 2624 assert (SvTYPE(sv) >= SVt_NV); 2625 /* Typically the caller expects that sv_any is not NULL now. */ 2626 /* XXX Ilya implies that this is a bug in callers that assume this 2627 and ideally should be fixed. */ 2628 return 0.0; 2629 } 2630 CLANG_DIAG_IGNORE_STMT(-Wthread-safety); 2631 DEBUG_c({ 2632 DECLARATION_FOR_LC_NUMERIC_MANIPULATION; 2633 STORE_LC_NUMERIC_SET_STANDARD(); 2634 PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2nv(%" NVgf ")\n", 2635 PTR2UV(sv), SvNVX(sv)); 2636 RESTORE_LC_NUMERIC(); 2637 }); 2638 CLANG_DIAG_RESTORE_STMT; 2639 return SvNVX(sv); 2640 } 2641 2642 /* 2643 =for apidoc sv_2num 2644 2645 Return an SV with the numeric value of the source SV, doing any necessary 2646 reference or overload conversion. The caller is expected to have handled 2647 get-magic already. 2648 2649 =cut 2650 */ 2651 2652 SV * 2653 Perl_sv_2num(pTHX_ SV *const sv) 2654 { 2655 PERL_ARGS_ASSERT_SV_2NUM; 2656 2657 if (!SvROK(sv)) 2658 return sv; 2659 if (SvAMAGIC(sv)) { 2660 SV * const tmpsv = AMG_CALLunary(sv, numer_amg); 2661 TAINT_IF(tmpsv && SvTAINTED(tmpsv)); 2662 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv)))) 2663 return sv_2num(tmpsv); 2664 } 2665 return sv_2mortal(newSVuv(PTR2UV(SvRV(sv)))); 2666 } 2667 2668 /* int2str_table: lookup table containing string representations of all 2669 * two digit numbers. For example, int2str_table.arr[0] is "00" and 2670 * int2str_table.arr[12*2] is "12". 2671 * 2672 * We are going to read two bytes at a time, so we have to ensure that 2673 * the array is aligned to a 2 byte boundary. That's why it was made a 2674 * union with a dummy U16 member. */ 2675 static const union { 2676 char arr[200]; 2677 U16 dummy; 2678 } int2str_table = {{ 2679 '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', 2680 '0', '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3', 2681 '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0', 2682 '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', 2683 '2', '8', '2', '9', '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', 2684 '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '4', '0', '4', '1', 2685 '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', 2686 '4', '9', '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', 2687 '5', '6', '5', '7', '5', '8', '5', '9', '6', '0', '6', '1', '6', '2', 2688 '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', 2689 '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', 2690 '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8', '2', '8', '3', 2691 '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', '9', '0', 2692 '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', 2693 '9', '8', '9', '9' 2694 }}; 2695 2696 /* uiv_2buf(): private routine for use by sv_2pv_flags(): print an IV or 2697 * UV as a string towards the end of buf, and return pointers to start and 2698 * end of it. 2699 * 2700 * We assume that buf is at least TYPE_CHARS(UV) long. 2701 */ 2702 2703 PERL_STATIC_INLINE char * 2704 S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const peob) 2705 { 2706 char *ptr = buf + TYPE_CHARS(UV); 2707 char * const ebuf = ptr; 2708 int sign; 2709 U16 *word_ptr, *word_table; 2710 2711 PERL_ARGS_ASSERT_UIV_2BUF; 2712 2713 /* ptr has to be properly aligned, because we will cast it to U16* */ 2714 assert(PTR2nat(ptr) % 2 == 0); 2715 /* we are going to read/write two bytes at a time */ 2716 word_ptr = (U16*)ptr; 2717 word_table = (U16*)int2str_table.arr; 2718 2719 if (UNLIKELY(is_uv)) 2720 sign = 0; 2721 else if (iv >= 0) { 2722 uv = iv; 2723 sign = 0; 2724 } else { 2725 /* Using 0- here to silence bogus warning from MS VC */ 2726 uv = (UV) (0 - (UV) iv); 2727 sign = 1; 2728 } 2729 2730 while (uv > 99) { 2731 *--word_ptr = word_table[uv % 100]; 2732 uv /= 100; 2733 } 2734 ptr = (char*)word_ptr; 2735 2736 if (uv < 10) 2737 *--ptr = (char)uv + '0'; 2738 else { 2739 *--word_ptr = word_table[uv]; 2740 ptr = (char*)word_ptr; 2741 } 2742 2743 if (sign) 2744 *--ptr = '-'; 2745 2746 *peob = ebuf; 2747 return ptr; 2748 } 2749 2750 /* Helper for sv_2pv_flags and sv_vcatpvfn_flags. If the NV is an 2751 * infinity or a not-a-number, writes the appropriate strings to the 2752 * buffer, including a zero byte. On success returns the written length, 2753 * excluding the zero byte, on failure (not an infinity, not a nan) 2754 * returns zero, assert-fails on maxlen being too short. 2755 * 2756 * XXX for "Inf", "-Inf", and "NaN", we could have three read-only 2757 * shared string constants we point to, instead of generating a new 2758 * string for each instance. */ 2759 STATIC size_t 2760 S_infnan_2pv(NV nv, char* buffer, size_t maxlen, char plus) { 2761 char* s = buffer; 2762 assert(maxlen >= 4); 2763 if (Perl_isinf(nv)) { 2764 if (nv < 0) { 2765 if (maxlen < 5) /* "-Inf\0" */ 2766 return 0; 2767 *s++ = '-'; 2768 } else if (plus) { 2769 *s++ = '+'; 2770 } 2771 *s++ = 'I'; 2772 *s++ = 'n'; 2773 *s++ = 'f'; 2774 } 2775 else if (Perl_isnan(nv)) { 2776 *s++ = 'N'; 2777 *s++ = 'a'; 2778 *s++ = 'N'; 2779 /* XXX optionally output the payload mantissa bits as 2780 * "(unsigned)" (to match the nan("...") C99 function, 2781 * or maybe as "(0xhhh...)" would make more sense... 2782 * provide a format string so that the user can decide? 2783 * NOTE: would affect the maxlen and assert() logic.*/ 2784 } 2785 else { 2786 return 0; 2787 } 2788 assert((s == buffer + 3) || (s == buffer + 4)); 2789 *s = 0; 2790 return s - buffer; 2791 } 2792 2793 /* 2794 =for apidoc sv_2pv 2795 =for apidoc_item sv_2pv_flags 2796 2797 These implement the various forms of the L<perlapi/C<SvPV>> macros. 2798 The macros are the preferred interface. 2799 2800 These return a pointer to the string value of an SV (coercing it to a string if 2801 necessary), and set C<*lp> to its length in bytes. 2802 2803 The forms differ in that plain C<sv_2pvbyte> always processes 'get' magic; and 2804 C<sv_2pvbyte_flags> processes 'get' magic if and only if C<flags> contains 2805 C<SV_GMAGIC>. 2806 2807 =for apidoc Amnh||SV_GMAGIC 2808 2809 =cut 2810 */ 2811 2812 char * 2813 Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const U32 flags) 2814 { 2815 char *s; 2816 2817 PERL_ARGS_ASSERT_SV_2PV_FLAGS; 2818 2819 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV 2820 && SvTYPE(sv) != SVt_PVFM); 2821 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC)) 2822 mg_get(sv); 2823 if (SvROK(sv)) { 2824 if (SvAMAGIC(sv)) { 2825 SV *tmpstr; 2826 if (flags & SV_SKIP_OVERLOAD) 2827 return NULL; 2828 tmpstr = AMG_CALLunary(sv, string_amg); 2829 TAINT_IF(tmpstr && SvTAINTED(tmpstr)); 2830 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) { 2831 /* Unwrap this: */ 2832 /* char *pv = lp ? SvPV(tmpstr, *lp) : SvPV_nolen(tmpstr); 2833 */ 2834 2835 char *pv; 2836 if ((SvFLAGS(tmpstr) & (SVf_POK)) == SVf_POK) { 2837 if (flags & SV_CONST_RETURN) { 2838 pv = (char *) SvPVX_const(tmpstr); 2839 } else { 2840 pv = (flags & SV_MUTABLE_RETURN) 2841 ? SvPVX_mutable(tmpstr) : SvPVX(tmpstr); 2842 } 2843 if (lp) 2844 *lp = SvCUR(tmpstr); 2845 } else { 2846 pv = sv_2pv_flags(tmpstr, lp, flags); 2847 } 2848 if (SvUTF8(tmpstr)) 2849 SvUTF8_on(sv); 2850 else 2851 SvUTF8_off(sv); 2852 return pv; 2853 } 2854 } 2855 { 2856 STRLEN len; 2857 char *retval; 2858 char *buffer; 2859 SV *const referent = SvRV(sv); 2860 2861 if (!referent) { 2862 len = 7; 2863 retval = buffer = savepvn("NULLREF", len); 2864 } else if (SvTYPE(referent) == SVt_REGEXP && 2865 (!(PL_curcop->cop_hints & HINT_NO_AMAGIC) || 2866 amagic_is_enabled(string_amg))) { 2867 REGEXP * const re = (REGEXP *)MUTABLE_PTR(referent); 2868 2869 assert(re); 2870 2871 /* If the regex is UTF-8 we want the containing scalar to 2872 have an UTF-8 flag too */ 2873 if (RX_UTF8(re)) 2874 SvUTF8_on(sv); 2875 else 2876 SvUTF8_off(sv); 2877 2878 if (lp) 2879 *lp = RX_WRAPLEN(re); 2880 2881 return RX_WRAPPED(re); 2882 } else { 2883 const char *const typestring = sv_reftype(referent, 0); 2884 const STRLEN typelen = strlen(typestring); 2885 UV addr = PTR2UV(referent); 2886 const char *stashname = NULL; 2887 STRLEN stashnamelen = 0; /* hush, gcc */ 2888 const char *buffer_end; 2889 2890 if (SvOBJECT(referent)) { 2891 const HEK *const name = HvNAME_HEK(SvSTASH(referent)); 2892 2893 if (name) { 2894 stashname = HEK_KEY(name); 2895 stashnamelen = HEK_LEN(name); 2896 2897 if (HEK_UTF8(name)) { 2898 SvUTF8_on(sv); 2899 } else { 2900 SvUTF8_off(sv); 2901 } 2902 } else { 2903 stashname = "__ANON__"; 2904 stashnamelen = 8; 2905 } 2906 len = stashnamelen + 1 /* = */ + typelen + 3 /* (0x */ 2907 + 2 * sizeof(UV) + 2 /* )\0 */; 2908 } else { 2909 len = typelen + 3 /* (0x */ 2910 + 2 * sizeof(UV) + 2 /* )\0 */; 2911 } 2912 2913 Newx(buffer, len, char); 2914 buffer_end = retval = buffer + len; 2915 2916 /* Working backwards */ 2917 *--retval = '\0'; 2918 *--retval = ')'; 2919 do { 2920 *--retval = PL_hexdigit[addr & 15]; 2921 } while (addr >>= 4); 2922 *--retval = 'x'; 2923 *--retval = '0'; 2924 *--retval = '('; 2925 2926 retval -= typelen; 2927 memcpy(retval, typestring, typelen); 2928 2929 if (stashname) { 2930 *--retval = '='; 2931 retval -= stashnamelen; 2932 memcpy(retval, stashname, stashnamelen); 2933 } 2934 /* retval may not necessarily have reached the start of the 2935 buffer here. */ 2936 assert (retval >= buffer); 2937 2938 len = buffer_end - retval - 1; /* -1 for that \0 */ 2939 } 2940 if (lp) 2941 *lp = len; 2942 SAVEFREEPV(buffer); 2943 return retval; 2944 } 2945 } 2946 2947 if (SvPOKp(sv)) { 2948 if (lp) 2949 *lp = SvCUR(sv); 2950 if (flags & SV_MUTABLE_RETURN) 2951 return SvPVX_mutable(sv); 2952 if (flags & SV_CONST_RETURN) 2953 return (char *)SvPVX_const(sv); 2954 return SvPVX(sv); 2955 } 2956 2957 if (SvIOK(sv)) { 2958 /* I'm assuming that if both IV and NV are equally valid then 2959 converting the IV is going to be more efficient */ 2960 const U32 isUIOK = SvIsUV(sv); 2961 /* The purpose of this union is to ensure that arr is aligned on 2962 a 2 byte boundary, because that is what uiv_2buf() requires */ 2963 union { 2964 char arr[TYPE_CHARS(UV)]; 2965 U16 dummy; 2966 } buf; 2967 char *ebuf, *ptr; 2968 STRLEN len; 2969 2970 if (SvTYPE(sv) < SVt_PVIV) 2971 sv_upgrade(sv, SVt_PVIV); 2972 ptr = uiv_2buf(buf.arr, SvIVX(sv), SvUVX(sv), isUIOK, &ebuf); 2973 len = ebuf - ptr; 2974 /* inlined from sv_setpvn */ 2975 s = SvGROW_mutable(sv, len + 1); 2976 Move(ptr, s, len, char); 2977 s += len; 2978 *s = '\0'; 2979 /* We used to call SvPOK_on(). Whilst this is fine for (most) Perl code, 2980 it means that after this stringification is cached, there is no way 2981 to distinguish between values originally assigned as $a = 42; and 2982 $a = "42"; (or results of string operators vs numeric operators) 2983 where the value has subsequently been used in the other sense 2984 and had a value cached. 2985 This (somewhat) hack means that we retain the cached stringification, 2986 but don't set SVf_POK. Hence if a value is SVf_IOK|SVf_POK then it 2987 originated as "42", whereas if it's SVf_IOK then it originated as 42. 2988 (ignore SVp_IOK and SVp_POK) 2989 The SvPV macros are now updated to recognise this specific case 2990 (and that there isn't overloading or magic that could alter the 2991 cached value) and so return the cached value immediately without 2992 re-entering this function, getting back here to this block of code, 2993 and repeating the same conversion. */ 2994 SvPOKp_on(sv); 2995 } 2996 else if (SvNOK(sv)) { 2997 if (SvTYPE(sv) < SVt_PVNV) 2998 sv_upgrade(sv, SVt_PVNV); 2999 if (SvNVX(sv) == 0.0 3000 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) 3001 && !Perl_isnan(SvNVX(sv)) 3002 #endif 3003 ) { 3004 s = SvGROW_mutable(sv, 2); 3005 *s++ = '0'; 3006 *s = '\0'; 3007 } else { 3008 STRLEN len; 3009 STRLEN size = 5; /* "-Inf\0" */ 3010 3011 s = SvGROW_mutable(sv, size); 3012 len = S_infnan_2pv(SvNVX(sv), s, size, 0); 3013 if (len > 0) { 3014 s += len; 3015 SvPOKp_on(sv); 3016 } 3017 else { 3018 /* some Xenix systems wipe out errno here */ 3019 dSAVE_ERRNO; 3020 3021 size = 3022 1 + /* sign */ 3023 1 + /* "." */ 3024 NV_DIG + 3025 1 + /* "e" */ 3026 1 + /* sign */ 3027 5 + /* exponent digits */ 3028 1 + /* \0 */ 3029 2; /* paranoia */ 3030 3031 s = SvGROW_mutable(sv, size); 3032 #ifndef USE_LOCALE_NUMERIC 3033 SNPRINTF_G(SvNVX(sv), s, SvLEN(sv), NV_DIG); 3034 3035 SvPOKp_on(sv); 3036 #else 3037 { 3038 bool local_radix; 3039 DECLARATION_FOR_LC_NUMERIC_MANIPULATION; 3040 STORE_LC_NUMERIC_SET_TO_NEEDED(); 3041 3042 local_radix = _NOT_IN_NUMERIC_STANDARD; 3043 if (local_radix && SvCUR(PL_numeric_radix_sv) > 1) { 3044 size += SvCUR(PL_numeric_radix_sv) - 1; 3045 s = SvGROW_mutable(sv, size); 3046 } 3047 3048 SNPRINTF_G(SvNVX(sv), s, SvLEN(sv), NV_DIG); 3049 3050 /* If the radix character is UTF-8, and actually is in the 3051 * output, turn on the UTF-8 flag for the scalar */ 3052 if ( local_radix 3053 && SvUTF8(PL_numeric_radix_sv) 3054 && instr(s, SvPVX_const(PL_numeric_radix_sv))) 3055 { 3056 SvUTF8_on(sv); 3057 } 3058 3059 RESTORE_LC_NUMERIC(); 3060 } 3061 3062 /* We don't call SvPOK_on(), because it may come to 3063 * pass that the locale changes so that the 3064 * stringification we just did is no longer correct. We 3065 * will have to re-stringify every time it is needed */ 3066 #endif 3067 RESTORE_ERRNO; 3068 } 3069 while (*s) s++; 3070 } 3071 } 3072 else if (isGV_with_GP(sv)) { 3073 GV *const gv = MUTABLE_GV(sv); 3074 SV *const buffer = sv_newmortal(); 3075 3076 gv_efullname3(buffer, gv, "*"); 3077 3078 assert(SvPOK(buffer)); 3079 if (SvUTF8(buffer)) 3080 SvUTF8_on(sv); 3081 else 3082 SvUTF8_off(sv); 3083 if (lp) 3084 *lp = SvCUR(buffer); 3085 return SvPVX(buffer); 3086 } 3087 else { 3088 if (lp) 3089 *lp = 0; 3090 if (flags & SV_UNDEF_RETURNS_NULL) 3091 return NULL; 3092 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED)) 3093 report_uninit(sv); 3094 /* Typically the caller expects that sv_any is not NULL now. */ 3095 if (!SvREADONLY(sv) && SvTYPE(sv) < SVt_PV) 3096 sv_upgrade(sv, SVt_PV); 3097 return (char *)""; 3098 } 3099 3100 { 3101 const STRLEN len = s - SvPVX_const(sv); 3102 if (lp) 3103 *lp = len; 3104 SvCUR_set(sv, len); 3105 } 3106 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2pv(%s)\n", 3107 PTR2UV(sv),SvPVX_const(sv))); 3108 if (flags & SV_CONST_RETURN) 3109 return (char *)SvPVX_const(sv); 3110 if (flags & SV_MUTABLE_RETURN) 3111 return SvPVX_mutable(sv); 3112 return SvPVX(sv); 3113 } 3114 3115 /* 3116 =for apidoc sv_copypv 3117 =for apidoc_item sv_copypv_nomg 3118 =for apidoc_item sv_copypv_flags 3119 3120 These copy a stringified representation of the source SV into the 3121 destination SV. They automatically perform coercion of numeric values into 3122 strings. Guaranteed to preserve the C<UTF8> flag even from overloaded objects. 3123 Similar in nature to C<sv_2pv[_flags]> but they operate directly on an SV 3124 instead of just the string. Mostly they use L</C<sv_2pv_flags>> to 3125 do the work, except when that would lose the UTF-8'ness of the PV. 3126 3127 The three forms differ only in whether or not they perform 'get magic' on 3128 C<sv>. C<sv_copypv_nomg> skips 'get magic'; C<sv_copypv> performs it; and 3129 C<sv_copypv_flags> either performs it (if the C<SV_GMAGIC> bit is set in 3130 C<flags>) or doesn't (if that bit is cleared). 3131 3132 =cut 3133 */ 3134 3135 void 3136 Perl_sv_copypv_flags(pTHX_ SV *const dsv, SV *const ssv, const I32 flags) 3137 { 3138 STRLEN len; 3139 const char *s; 3140 3141 PERL_ARGS_ASSERT_SV_COPYPV_FLAGS; 3142 3143 s = SvPV_flags_const(ssv,len,(flags & SV_GMAGIC)); 3144 sv_setpvn(dsv,s,len); 3145 if (SvUTF8(ssv)) 3146 SvUTF8_on(dsv); 3147 else 3148 SvUTF8_off(dsv); 3149 } 3150 3151 /* 3152 =for apidoc sv_2pvbyte 3153 =for apidoc_item sv_2pvbyte_flags 3154 3155 These implement the various forms of the L<perlapi/C<SvPVbyte>> macros. 3156 The macros are the preferred interface. 3157 3158 These return a pointer to the byte-encoded representation of the SV, and set 3159 C<*lp> to its length. If the SV is marked as being encoded as UTF-8, it will 3160 be downgraded, if possible, to a byte string. If the SV cannot be downgraded, 3161 they croak. 3162 3163 The forms differ in that plain C<sv_2pvbyte> always processes 'get' magic; and 3164 C<sv_2pvbyte_flags> processes 'get' magic if and only if C<flags> contains 3165 C<SV_GMAGIC>. 3166 3167 =for apidoc Amnh||SV_GMAGIC 3168 3169 =cut 3170 */ 3171 3172 char * 3173 Perl_sv_2pvbyte_flags(pTHX_ SV *sv, STRLEN *const lp, const U32 flags) 3174 { 3175 PERL_ARGS_ASSERT_SV_2PVBYTE_FLAGS; 3176 3177 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC)) 3178 mg_get(sv); 3179 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv)) 3180 || isGV_with_GP(sv) || SvROK(sv)) { 3181 SV *sv2 = sv_newmortal(); 3182 sv_copypv_nomg(sv2,sv); 3183 sv = sv2; 3184 } 3185 sv_utf8_downgrade_nomg(sv,0); 3186 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv); 3187 } 3188 3189 /* 3190 =for apidoc sv_2pvutf8 3191 =for apidoc_item sv_2pvutf8_flags 3192 3193 These implement the various forms of the L<perlapi/C<SvPVutf8>> macros. 3194 The macros are the preferred interface. 3195 3196 These return a pointer to the UTF-8-encoded representation of the SV, and set 3197 C<*lp> to its length in bytes. They may cause the SV to be upgraded to UTF-8 3198 as a side-effect. 3199 3200 The forms differ in that plain C<sv_2pvutf8> always processes 'get' magic; and 3201 C<sv_2pvutf8_flags> processes 'get' magic if and only if C<flags> contains 3202 C<SV_GMAGIC>. 3203 3204 =cut 3205 */ 3206 3207 char * 3208 Perl_sv_2pvutf8_flags(pTHX_ SV *sv, STRLEN *const lp, const U32 flags) 3209 { 3210 PERL_ARGS_ASSERT_SV_2PVUTF8_FLAGS; 3211 3212 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC)) 3213 mg_get(sv); 3214 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv)) 3215 || isGV_with_GP(sv) || SvROK(sv)) { 3216 SV *sv2 = sv_newmortal(); 3217 sv_copypv_nomg(sv2,sv); 3218 sv = sv2; 3219 } 3220 sv_utf8_upgrade_nomg(sv); 3221 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv); 3222 } 3223 3224 3225 /* 3226 =for apidoc sv_2bool 3227 3228 This macro is only used by C<sv_true()> or its macro equivalent, and only if 3229 the latter's argument is neither C<SvPOK>, C<SvIOK> nor C<SvNOK>. 3230 It calls C<sv_2bool_flags> with the C<SV_GMAGIC> flag. 3231 3232 =for apidoc sv_2bool_flags 3233 3234 This function is only used by C<sv_true()> and friends, and only if 3235 the latter's argument is neither C<SvPOK>, C<SvIOK> nor C<SvNOK>. If the flags 3236 contain C<SV_GMAGIC>, then it does an C<mg_get()> first. 3237 3238 3239 =cut 3240 */ 3241 3242 bool 3243 Perl_sv_2bool_flags(pTHX_ SV *sv, I32 flags) 3244 { 3245 PERL_ARGS_ASSERT_SV_2BOOL_FLAGS; 3246 3247 restart: 3248 if(flags & SV_GMAGIC) SvGETMAGIC(sv); 3249 3250 if (!SvOK(sv)) 3251 return 0; 3252 if (SvROK(sv)) { 3253 if (SvAMAGIC(sv)) { 3254 SV * const tmpsv = AMG_CALLunary(sv, bool__amg); 3255 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv)))) { 3256 bool svb; 3257 sv = tmpsv; 3258 if(SvGMAGICAL(sv)) { 3259 flags = SV_GMAGIC; 3260 goto restart; /* call sv_2bool */ 3261 } 3262 /* expanded SvTRUE_common(sv, (flags = 0, goto restart)) */ 3263 else if(!SvOK(sv)) { 3264 svb = 0; 3265 } 3266 else if(SvPOK(sv)) { 3267 svb = SvPVXtrue(sv); 3268 } 3269 else if((SvFLAGS(sv) & (SVf_IOK|SVf_NOK))) { 3270 svb = (SvIOK(sv) && SvIVX(sv) != 0) 3271 || (SvNOK(sv) && SvNVX(sv) != 0.0); 3272 } 3273 else { 3274 flags = 0; 3275 goto restart; /* call sv_2bool_nomg */ 3276 } 3277 return cBOOL(svb); 3278 } 3279 } 3280 assert(SvRV(sv)); 3281 return TRUE; 3282 } 3283 if (isREGEXP(sv)) 3284 return 3285 RX_WRAPLEN(sv) > 1 || (RX_WRAPLEN(sv) && *RX_WRAPPED(sv) != '0'); 3286 3287 if (SvNOK(sv) && !SvPOK(sv)) 3288 return SvNVX(sv) != 0.0; 3289 3290 return SvTRUE_common(sv, 0); 3291 } 3292 3293 /* 3294 =for apidoc sv_utf8_upgrade 3295 =for apidoc_item sv_utf8_upgrade_nomg 3296 =for apidoc_item sv_utf8_upgrade_flags 3297 =for apidoc_item sv_utf8_upgrade_flags_grow 3298 3299 These convert the PV of an SV to its UTF-8-encoded form. 3300 The SV is forced to string form if it is not already. 3301 They always set the C<SvUTF8> flag to avoid future validity checks even if the 3302 whole string is the same in UTF-8 as not. 3303 They return the number of bytes in the converted string 3304 3305 The forms differ in just two ways. The main difference is whether or not they 3306 perform 'get magic' on C<sv>. C<sv_utf8_upgrade_nomg> skips 'get magic'; 3307 C<sv_utf8_upgrade> performs it; and C<sv_utf8_upgrade_flags> and 3308 C<sv_utf8_upgrade_flags_grow> either perform it (if the C<SV_GMAGIC> bit is set 3309 in C<flags>) or don't (if that bit is cleared). 3310 3311 The other difference is that C<sv_utf8_upgrade_flags_grow> has an additional 3312 parameter, C<extra>, which allows the caller to specify an amount of space to 3313 be reserved as spare beyond what is needed for the actual conversion. This is 3314 used when the caller knows it will soon be needing yet more space, and it is 3315 more efficient to request space from the system in a single call. 3316 This form is otherwise identical to C<sv_utf8_upgrade_flags>. 3317 3318 These are not a general purpose byte encoding to Unicode interface: use the 3319 Encode extension for that. 3320 3321 The C<SV_FORCE_UTF8_UPGRADE> flag is now ignored. 3322 3323 =for apidoc Amnh||SV_GMAGIC| 3324 =for apidoc Amnh||SV_FORCE_UTF8_UPGRADE| 3325 3326 =cut 3327 3328 If the routine itself changes the string, it adds a trailing C<NUL>. Such a 3329 C<NUL> isn't guaranteed due to having other routines do the work in some input 3330 cases, or if the input is already flagged as being in utf8. 3331 3332 */ 3333 3334 STRLEN 3335 Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extra) 3336 { 3337 PERL_ARGS_ASSERT_SV_UTF8_UPGRADE_FLAGS_GROW; 3338 3339 if (sv == &PL_sv_undef) 3340 return 0; 3341 if (!SvPOK_nog(sv)) { 3342 STRLEN len = 0; 3343 if (SvREADONLY(sv) && (SvPOKp(sv) || SvIOKp(sv) || SvNOKp(sv))) { 3344 (void) sv_2pv_flags(sv,&len, flags); 3345 if (SvUTF8(sv)) { 3346 if (extra) SvGROW(sv, SvCUR(sv) + extra); 3347 return len; 3348 } 3349 } else { 3350 (void) SvPV_force_flags(sv,len,flags & SV_GMAGIC); 3351 } 3352 } 3353 3354 /* SVt_REGEXP's shouldn't be upgraded to UTF8 - they're already 3355 * compiled and individual nodes will remain non-utf8 even if the 3356 * stringified version of the pattern gets upgraded. Whether the 3357 * PVX of a REGEXP should be grown or we should just croak, I don't 3358 * know - DAPM */ 3359 if (SvUTF8(sv) || isREGEXP(sv)) { 3360 if (extra) SvGROW(sv, SvCUR(sv) + extra); 3361 return SvCUR(sv); 3362 } 3363 3364 if (SvIsCOW(sv)) { 3365 S_sv_uncow(aTHX_ sv, 0); 3366 } 3367 3368 if (SvCUR(sv) == 0) { 3369 if (extra) SvGROW(sv, extra + 1); /* Make sure is room for a trailing 3370 byte */ 3371 } else { /* Assume Latin-1/EBCDIC */ 3372 /* This function could be much more efficient if we 3373 * had a FLAG in SVs to signal if there are any variant 3374 * chars in the PV. Given that there isn't such a flag 3375 * make the loop as fast as possible. */ 3376 U8 * s = (U8 *) SvPVX_const(sv); 3377 U8 *t = s; 3378 3379 if (is_utf8_invariant_string_loc(s, SvCUR(sv), (const U8 **) &t)) { 3380 3381 /* utf8 conversion not needed because all are invariants. Mark 3382 * as UTF-8 even if no variant - saves scanning loop */ 3383 SvUTF8_on(sv); 3384 if (extra) SvGROW(sv, SvCUR(sv) + extra); 3385 return SvCUR(sv); 3386 } 3387 3388 /* Here, there is at least one variant (t points to the first one), so 3389 * the string should be converted to utf8. Everything from 's' to 3390 * 't - 1' will occupy only 1 byte each on output. 3391 * 3392 * Note that the incoming SV may not have a trailing '\0', as certain 3393 * code in pp_formline can send us partially built SVs. 3394 * 3395 * There are two main ways to convert. One is to create a new string 3396 * and go through the input starting from the beginning, appending each 3397 * converted value onto the new string as we go along. Going this 3398 * route, it's probably best to initially allocate enough space in the 3399 * string rather than possibly running out of space and having to 3400 * reallocate and then copy what we've done so far. Since everything 3401 * from 's' to 't - 1' is invariant, the destination can be initialized 3402 * with these using a fast memory copy. To be sure to allocate enough 3403 * space, one could use the worst case scenario, where every remaining 3404 * byte expands to two under UTF-8, or one could parse it and count 3405 * exactly how many do expand. 3406 * 3407 * The other way is to unconditionally parse the remainder of the 3408 * string to figure out exactly how big the expanded string will be, 3409 * growing if needed. Then start at the end of the string and place 3410 * the character there at the end of the unfilled space in the expanded 3411 * one, working backwards until reaching 't'. 3412 * 3413 * The problem with assuming the worst case scenario is that for very 3414 * long strings, we could allocate much more memory than actually 3415 * needed, which can create performance problems. If we have to parse 3416 * anyway, the second method is the winner as it may avoid an extra 3417 * copy. The code used to use the first method under some 3418 * circumstances, but now that there is faster variant counting on 3419 * ASCII platforms, the second method is used exclusively, eliminating 3420 * some code that no longer has to be maintained. */ 3421 3422 { 3423 /* Count the total number of variants there are. We can start 3424 * just beyond the first one, which is known to be at 't' */ 3425 const Size_t invariant_length = t - s; 3426 U8 * e = (U8 *) SvEND(sv); 3427 3428 /* The length of the left overs, plus 1. */ 3429 const Size_t remaining_length_p1 = e - t; 3430 3431 /* We expand by 1 for the variant at 't' and one for each remaining 3432 * variant (we start looking at 't+1') */ 3433 Size_t expansion = 1 + variant_under_utf8_count(t + 1, e); 3434 3435 /* +1 = trailing NUL */ 3436 Size_t need = SvCUR(sv) + expansion + extra + 1; 3437 U8 * d; 3438 3439 /* Grow if needed */ 3440 if (SvLEN(sv) < need) { 3441 t = invariant_length + (U8*) SvGROW(sv, need); 3442 e = t + remaining_length_p1; 3443 } 3444 SvCUR_set(sv, invariant_length + remaining_length_p1 + expansion); 3445 3446 /* Set the NUL at the end */ 3447 d = (U8 *) SvEND(sv); 3448 *d-- = '\0'; 3449 3450 /* Having decremented d, it points to the position to put the 3451 * very last byte of the expanded string. Go backwards through 3452 * the string, copying and expanding as we go, stopping when we 3453 * get to the part that is invariant the rest of the way down */ 3454 3455 e--; 3456 while (e >= t) { 3457 if (NATIVE_BYTE_IS_INVARIANT(*e)) { 3458 *d-- = *e; 3459 } else { 3460 *d-- = UTF8_EIGHT_BIT_LO(*e); 3461 *d-- = UTF8_EIGHT_BIT_HI(*e); 3462 } 3463 e--; 3464 } 3465 3466 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { 3467 /* Update pos. We do it at the end rather than during 3468 * the upgrade, to avoid slowing down the common case 3469 * (upgrade without pos). 3470 * pos can be stored as either bytes or characters. Since 3471 * this was previously a byte string we can just turn off 3472 * the bytes flag. */ 3473 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global); 3474 if (mg) { 3475 mg->mg_flags &= ~MGf_BYTES; 3476 } 3477 if ((mg = mg_find(sv, PERL_MAGIC_utf8))) 3478 magic_setutf8(sv,mg); /* clear UTF8 cache */ 3479 } 3480 } 3481 } 3482 3483 SvUTF8_on(sv); 3484 return SvCUR(sv); 3485 } 3486 3487 /* 3488 =for apidoc sv_utf8_downgrade 3489 =for apidoc_item sv_utf8_downgrade_flags 3490 =for apidoc_item sv_utf8_downgrade_nomg 3491 3492 These attempt to convert the PV of an SV from characters to bytes. If the PV 3493 contains a character that cannot fit in a byte, this conversion will fail; in 3494 this case, C<FALSE> is returned if C<fail_ok> is true; otherwise they croak. 3495 3496 They are not a general purpose Unicode to byte encoding interface: 3497 use the C<Encode> extension for that. 3498 3499 They differ only in that: 3500 3501 C<sv_utf8_downgrade> processes 'get' magic on C<sv>. 3502 3503 C<sv_utf8_downgrade_nomg> does not. 3504 3505 C<sv_utf8_downgrade_flags> has an additional C<flags> parameter in which you can specify 3506 C<SV_GMAGIC> to process 'get' magic, or leave it cleared to not process 'get' magic. 3507 3508 =cut 3509 */ 3510 3511 bool 3512 Perl_sv_utf8_downgrade_flags(pTHX_ SV *const sv, const bool fail_ok, const U32 flags) 3513 { 3514 PERL_ARGS_ASSERT_SV_UTF8_DOWNGRADE_FLAGS; 3515 3516 if (SvPOKp(sv) && SvUTF8(sv)) { 3517 if (SvCUR(sv)) { 3518 U8 *s; 3519 STRLEN len; 3520 U32 mg_flags = flags & SV_GMAGIC; 3521 3522 if (SvIsCOW(sv)) { 3523 S_sv_uncow(aTHX_ sv, 0); 3524 } 3525 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { 3526 /* update pos */ 3527 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global); 3528 if (mg && mg->mg_len > 0 && mg->mg_flags & MGf_BYTES) { 3529 mg->mg_len = sv_pos_b2u_flags(sv, mg->mg_len, 3530 mg_flags|SV_CONST_RETURN); 3531 mg_flags = 0; /* sv_pos_b2u does get magic */ 3532 } 3533 if ((mg = mg_find(sv, PERL_MAGIC_utf8))) 3534 magic_setutf8(sv,mg); /* clear UTF8 cache */ 3535 3536 } 3537 s = (U8 *) SvPV_flags(sv, len, mg_flags); 3538 3539 if (!utf8_to_bytes(s, &len)) { 3540 if (fail_ok) 3541 return FALSE; 3542 else { 3543 if (PL_op) 3544 Perl_croak(aTHX_ "Wide character in %s", 3545 OP_DESC(PL_op)); 3546 else 3547 Perl_croak(aTHX_ "Wide character"); 3548 } 3549 } 3550 SvCUR_set(sv, len); 3551 } 3552 } 3553 SvUTF8_off(sv); 3554 return TRUE; 3555 } 3556 3557 /* 3558 =for apidoc sv_utf8_encode 3559 3560 Converts the PV of an SV to UTF-8, but then turns the C<SvUTF8> 3561 flag off so that it looks like octets again. 3562 3563 =cut 3564 */ 3565 3566 void 3567 Perl_sv_utf8_encode(pTHX_ SV *const sv) 3568 { 3569 PERL_ARGS_ASSERT_SV_UTF8_ENCODE; 3570 3571 if (SvREADONLY(sv)) { 3572 sv_force_normal_flags(sv, 0); 3573 } 3574 (void) sv_utf8_upgrade(sv); 3575 SvUTF8_off(sv); 3576 } 3577 3578 /* 3579 =for apidoc sv_utf8_decode 3580 3581 If the PV of the SV is an octet sequence in Perl's extended UTF-8 3582 and contains a multiple-byte character, the C<SvUTF8> flag is turned on 3583 so that it looks like a character. If the PV contains only single-byte 3584 characters, the C<SvUTF8> flag stays off. 3585 Scans PV for validity and returns FALSE if the PV is invalid UTF-8. 3586 3587 =cut 3588 */ 3589 3590 bool 3591 Perl_sv_utf8_decode(pTHX_ SV *const sv) 3592 { 3593 PERL_ARGS_ASSERT_SV_UTF8_DECODE; 3594 3595 if (SvPOKp(sv)) { 3596 const U8 *start, *c, *first_variant; 3597 3598 /* The octets may have got themselves encoded - get them back as 3599 * bytes 3600 */ 3601 if (!sv_utf8_downgrade(sv, TRUE)) 3602 return FALSE; 3603 3604 /* it is actually just a matter of turning the utf8 flag on, but 3605 * we want to make sure everything inside is valid utf8 first. 3606 */ 3607 c = start = (const U8 *) SvPVX_const(sv); 3608 if (! is_utf8_invariant_string_loc(c, SvCUR(sv), &first_variant)) { 3609 if (!is_utf8_string(first_variant, SvCUR(sv) - (first_variant -c))) 3610 return FALSE; 3611 SvUTF8_on(sv); 3612 } 3613 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { 3614 /* XXX Is this dead code? XS_utf8_decode calls SvSETMAGIC 3615 after this, clearing pos. Does anything on CPAN 3616 need this? */ 3617 /* adjust pos to the start of a UTF8 char sequence */ 3618 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global); 3619 if (mg) { 3620 I32 pos = mg->mg_len; 3621 if (pos > 0) { 3622 for (c = start + pos; c > start; c--) { 3623 if (UTF8_IS_START(*c)) 3624 break; 3625 } 3626 mg->mg_len = c - start; 3627 } 3628 } 3629 if ((mg = mg_find(sv, PERL_MAGIC_utf8))) 3630 magic_setutf8(sv,mg); /* clear UTF8 cache */ 3631 } 3632 } 3633 return TRUE; 3634 } 3635 3636 /* 3637 =for apidoc sv_setsv 3638 =for apidoc_item sv_setsv_flags 3639 =for apidoc_item sv_setsv_mg 3640 =for apidoc_item sv_setsv_nomg 3641 3642 These copy the contents of the source SV C<ssv> into the destination SV C<dsv>. 3643 C<ssv> may be destroyed if it is mortal, so don't use these functions if 3644 the source SV needs to be reused. 3645 Loosely speaking, they perform a copy-by-value, obliterating any previous 3646 content of the destination. 3647 3648 They differ only in that: 3649 3650 C<sv_setsv> calls 'get' magic on C<ssv>, but skips 'set' magic on C<dsv>. 3651 3652 C<sv_setsv_mg> calls both 'get' magic on C<ssv> and 'set' magic on C<dsv>. 3653 3654 C<sv_setsv_nomg> skips all magic. 3655 3656 C<sv_setsv_flags> has a C<flags> parameter which you can use to specify any 3657 combination of magic handling, and also you can specify C<SV_NOSTEAL> so that 3658 the buffers of temps will not be stolen. 3659 3660 You probably want to instead use one of the assortment of wrappers, such as 3661 C<L</SvSetSV>>, C<L</SvSetSV_nosteal>>, C<L</SvSetMagicSV>> and 3662 C<L</SvSetMagicSV_nosteal>>. 3663 3664 C<sv_setsv_flags> is the primary function for copying scalars, and most other 3665 copy-ish functions and macros use it underneath. 3666 3667 =for apidoc Amnh||SV_NOSTEAL 3668 3669 =cut 3670 */ 3671 3672 static void 3673 S_glob_assign_glob(pTHX_ SV *const dsv, SV *const ssv, const int dtype) 3674 { 3675 I32 mro_changes = 0; /* 1 = method, 2 = isa, 3 = recursive isa */ 3676 HV *old_stash = NULL; 3677 3678 PERL_ARGS_ASSERT_GLOB_ASSIGN_GLOB; 3679 3680 if (dtype != SVt_PVGV && !isGV_with_GP(dsv)) { 3681 const char * const name = GvNAME(ssv); 3682 const STRLEN len = GvNAMELEN(ssv); 3683 { 3684 if (dtype >= SVt_PV) { 3685 SvPV_free(dsv); 3686 SvPV_set(dsv, 0); 3687 SvLEN_set(dsv, 0); 3688 SvCUR_set(dsv, 0); 3689 } 3690 SvUPGRADE(dsv, SVt_PVGV); 3691 (void)SvOK_off(dsv); 3692 isGV_with_GP_on(dsv); 3693 } 3694 GvSTASH(dsv) = GvSTASH(ssv); 3695 if (GvSTASH(dsv)) 3696 Perl_sv_add_backref(aTHX_ MUTABLE_SV(GvSTASH(dsv)), dsv); 3697 gv_name_set(MUTABLE_GV(dsv), name, len, 3698 GV_ADD | (GvNAMEUTF8(ssv) ? SVf_UTF8 : 0 )); 3699 SvFAKE_on(dsv); /* can coerce to non-glob */ 3700 } 3701 3702 if(GvGP(MUTABLE_GV(ssv))) { 3703 /* If source has method cache entry, clear it */ 3704 if(GvCVGEN(ssv)) { 3705 SvREFCNT_dec(GvCV(ssv)); 3706 GvCV_set(ssv, NULL); 3707 GvCVGEN(ssv) = 0; 3708 } 3709 /* If source has a real method, then a method is 3710 going to change */ 3711 else if( 3712 GvCV((const GV *)ssv) && GvSTASH(dsv) && HvENAME(GvSTASH(dsv)) 3713 ) { 3714 mro_changes = 1; 3715 } 3716 } 3717 3718 /* If dest already had a real method, that's a change as well */ 3719 if( 3720 !mro_changes && GvGP(MUTABLE_GV(dsv)) && GvCVu((const GV *)dsv) 3721 && GvSTASH(dsv) && HvENAME(GvSTASH(dsv)) 3722 ) { 3723 mro_changes = 1; 3724 } 3725 3726 /* We don't need to check the name of the destination if it was not a 3727 glob to begin with. */ 3728 if(dtype == SVt_PVGV) { 3729 const char * const name = GvNAME((const GV *)dsv); 3730 const STRLEN len = GvNAMELEN(dsv); 3731 if(memEQs(name, len, "ISA") 3732 /* The stash may have been detached from the symbol table, so 3733 check its name. */ 3734 && GvSTASH(dsv) && HvENAME(GvSTASH(dsv)) 3735 ) 3736 mro_changes = 2; 3737 else { 3738 if ((len > 1 && name[len-2] == ':' && name[len-1] == ':') 3739 || (len == 1 && name[0] == ':')) { 3740 mro_changes = 3; 3741 3742 /* Set aside the old stash, so we can reset isa caches on 3743 its subclasses. */ 3744 if((old_stash = GvHV(dsv))) 3745 /* Make sure we do not lose it early. */ 3746 SvREFCNT_inc_simple_void_NN( 3747 sv_2mortal((SV *)old_stash) 3748 ); 3749 } 3750 } 3751 3752 SvREFCNT_inc_simple_void_NN(sv_2mortal(dsv)); 3753 } 3754 3755 /* freeing dsv's GP might free ssv (e.g. *x = $x), 3756 * so temporarily protect it */ 3757 ENTER; 3758 SAVEFREESV(SvREFCNT_inc_simple_NN(ssv)); 3759 gp_free(MUTABLE_GV(dsv)); 3760 GvINTRO_off(dsv); /* one-shot flag */ 3761 GvGP_set(dsv, gp_ref(GvGP(ssv))); 3762 LEAVE; 3763 3764 if (SvTAINTED(ssv)) 3765 SvTAINT(dsv); 3766 if (GvIMPORTED(dsv) != GVf_IMPORTED 3767 && CopSTASH_ne(PL_curcop, GvSTASH(dsv))) 3768 { 3769 GvIMPORTED_on(dsv); 3770 } 3771 GvMULTI_on(dsv); 3772 if(mro_changes == 2) { 3773 if (GvAV((const GV *)ssv)) { 3774 MAGIC *mg; 3775 SV * const sref = (SV *)GvAV((const GV *)dsv); 3776 if (SvSMAGICAL(sref) && (mg = mg_find(sref, PERL_MAGIC_isa))) { 3777 if (SvTYPE(mg->mg_obj) != SVt_PVAV) { 3778 AV * const ary = newAV(); 3779 av_push(ary, mg->mg_obj); /* takes the refcount */ 3780 mg->mg_obj = (SV *)ary; 3781 } 3782 av_push((AV *)mg->mg_obj, SvREFCNT_inc_simple_NN(dsv)); 3783 } 3784 else sv_magic(sref, dsv, PERL_MAGIC_isa, NULL, 0); 3785 } 3786 mro_isa_changed_in(GvSTASH(dsv)); 3787 } 3788 else if(mro_changes == 3) { 3789 HV * const stash = GvHV(dsv); 3790 if(old_stash ? (HV *)HvENAME_get(old_stash) : stash) 3791 mro_package_moved( 3792 stash, old_stash, 3793 (GV *)dsv, 0 3794 ); 3795 } 3796 else if(mro_changes) mro_method_changed_in(GvSTASH(dsv)); 3797 if (GvIO(dsv) && dtype == SVt_PVGV) { 3798 DEBUG_o(Perl_deb(aTHX_ 3799 "glob_assign_glob clearing PL_stashcache\n")); 3800 /* It's a cache. It will rebuild itself quite happily. 3801 It's a lot of effort to work out exactly which key (or keys) 3802 might be invalidated by the creation of the this file handle. 3803 */ 3804 hv_clear(PL_stashcache); 3805 } 3806 return; 3807 } 3808 3809 void 3810 Perl_gv_setref(pTHX_ SV *const dsv, SV *const ssv) 3811 { 3812 SV * const sref = SvRV(ssv); 3813 SV *dref; 3814 const int intro = GvINTRO(dsv); 3815 SV **location; 3816 U8 import_flag = 0; 3817 const U32 stype = SvTYPE(sref); 3818 3819 PERL_ARGS_ASSERT_GV_SETREF; 3820 3821 if (intro) { 3822 GvINTRO_off(dsv); /* one-shot flag */ 3823 GvLINE(dsv) = CopLINE(PL_curcop); 3824 GvEGV(dsv) = MUTABLE_GV(dsv); 3825 } 3826 GvMULTI_on(dsv); 3827 switch (stype) { 3828 case SVt_PVCV: 3829 location = (SV **) &(GvGP(dsv)->gp_cv); /* XXX bypassing GvCV_set */ 3830 import_flag = GVf_IMPORTED_CV; 3831 goto common; 3832 case SVt_PVHV: 3833 location = (SV **) &GvHV(dsv); 3834 import_flag = GVf_IMPORTED_HV; 3835 goto common; 3836 case SVt_PVAV: 3837 location = (SV **) &GvAV(dsv); 3838 import_flag = GVf_IMPORTED_AV; 3839 goto common; 3840 case SVt_PVIO: 3841 location = (SV **) &GvIOp(dsv); 3842 goto common; 3843 case SVt_PVFM: 3844 location = (SV **) &GvFORM(dsv); 3845 goto common; 3846 default: 3847 location = &GvSV(dsv); 3848 import_flag = GVf_IMPORTED_SV; 3849 common: 3850 if (intro) { 3851 if (stype == SVt_PVCV) { 3852 /*if (GvCVGEN(dsv) && (GvCV(dsv) != (const CV *)sref || GvCVGEN(dsv))) {*/ 3853 if (GvCVGEN(dsv)) { 3854 SvREFCNT_dec(GvCV(dsv)); 3855 GvCV_set(dsv, NULL); 3856 GvCVGEN(dsv) = 0; /* Switch off cacheness. */ 3857 } 3858 } 3859 /* SAVEt_GVSLOT takes more room on the savestack and has more 3860 overhead in leave_scope than SAVEt_GENERIC_SV. But for CVs 3861 leave_scope needs access to the GV so it can reset method 3862 caches. We must use SAVEt_GVSLOT whenever the type is 3863 SVt_PVCV, even if the stash is anonymous, as the stash may 3864 gain a name somehow before leave_scope. */ 3865 if (stype == SVt_PVCV) { 3866 /* There is no save_pushptrptrptr. Creating it for this 3867 one call site would be overkill. So inline the ss add 3868 routines here. */ 3869 dSS_ADD; 3870 SS_ADD_PTR(dsv); 3871 SS_ADD_PTR(location); 3872 SS_ADD_PTR(SvREFCNT_inc(*location)); 3873 SS_ADD_UV(SAVEt_GVSLOT); 3874 SS_ADD_END(4); 3875 } 3876 else SAVEGENERICSV(*location); 3877 } 3878 dref = *location; 3879 if (stype == SVt_PVCV && (*location != sref || GvCVGEN(dsv))) { 3880 CV* const cv = MUTABLE_CV(*location); 3881 if (cv) { 3882 if (!GvCVGEN((const GV *)dsv) && 3883 (CvROOT(cv) || CvXSUB(cv)) && 3884 /* redundant check that avoids creating the extra SV 3885 most of the time: */ 3886 (CvCONST(cv) || (ckWARN(WARN_REDEFINE) && !intro))) 3887 { 3888 SV * const new_const_sv = 3889 CvCONST((const CV *)sref) 3890 ? cv_const_sv((const CV *)sref) 3891 : NULL; 3892 HV * const stash = GvSTASH((const GV *)dsv); 3893 report_redefined_cv( 3894 sv_2mortal( 3895 stash 3896 ? Perl_newSVpvf(aTHX_ 3897 "%" HEKf "::%" HEKf, 3898 HEKfARG(HvNAME_HEK(stash)), 3899 HEKfARG(GvENAME_HEK(MUTABLE_GV(dsv)))) 3900 : Perl_newSVpvf(aTHX_ 3901 "%" HEKf, 3902 HEKfARG(GvENAME_HEK(MUTABLE_GV(dsv)))) 3903 ), 3904 cv, 3905 CvCONST((const CV *)sref) ? &new_const_sv : NULL 3906 ); 3907 } 3908 if (!intro) 3909 cv_ckproto_len_flags(cv, (const GV *)dsv, 3910 SvPOK(sref) ? CvPROTO(sref) : NULL, 3911 SvPOK(sref) ? CvPROTOLEN(sref) : 0, 3912 SvPOK(sref) ? SvUTF8(sref) : 0); 3913 } 3914 GvCVGEN(dsv) = 0; /* Switch off cacheness. */ 3915 GvASSUMECV_on(dsv); 3916 if(GvSTASH(dsv)) { /* sub foo { 1 } sub bar { 2 } *bar = \&foo */ 3917 if (intro && GvREFCNT(dsv) > 1) { 3918 /* temporary remove extra savestack's ref */ 3919 --GvREFCNT(dsv); 3920 gv_method_changed(dsv); 3921 ++GvREFCNT(dsv); 3922 } 3923 else gv_method_changed(dsv); 3924 } 3925 } 3926 *location = SvREFCNT_inc_simple_NN(sref); 3927 if (import_flag && !(GvFLAGS(dsv) & import_flag) 3928 && CopSTASH_ne(PL_curcop, GvSTASH(dsv))) { 3929 GvFLAGS(dsv) |= import_flag; 3930 } 3931 3932 if (stype == SVt_PVHV) { 3933 const char * const name = GvNAME((GV*)dsv); 3934 const STRLEN len = GvNAMELEN(dsv); 3935 if ( 3936 ( 3937 (len > 1 && name[len-2] == ':' && name[len-1] == ':') 3938 || (len == 1 && name[0] == ':') 3939 ) 3940 && (!dref || HvENAME_get(dref)) 3941 ) { 3942 mro_package_moved( 3943 (HV *)sref, (HV *)dref, 3944 (GV *)dsv, 0 3945 ); 3946 } 3947 } 3948 else if ( 3949 stype == SVt_PVAV && sref != dref 3950 && memEQs(GvNAME((GV*)dsv), GvNAMELEN((GV*)dsv), "ISA") 3951 /* The stash may have been detached from the symbol table, so 3952 check its name before doing anything. */ 3953 && GvSTASH(dsv) && HvENAME(GvSTASH(dsv)) 3954 ) { 3955 MAGIC *mg; 3956 MAGIC * const omg = dref && SvSMAGICAL(dref) 3957 ? mg_find(dref, PERL_MAGIC_isa) 3958 : NULL; 3959 if (SvSMAGICAL(sref) && (mg = mg_find(sref, PERL_MAGIC_isa))) { 3960 if (SvTYPE(mg->mg_obj) != SVt_PVAV) { 3961 AV * const ary = newAV(); 3962 av_push(ary, mg->mg_obj); /* takes the refcount */ 3963 mg->mg_obj = (SV *)ary; 3964 } 3965 if (omg) { 3966 if (SvTYPE(omg->mg_obj) == SVt_PVAV) { 3967 SV **svp = AvARRAY((AV *)omg->mg_obj); 3968 I32 items = AvFILLp((AV *)omg->mg_obj) + 1; 3969 while (items--) 3970 av_push( 3971 (AV *)mg->mg_obj, 3972 SvREFCNT_inc_simple_NN(*svp++) 3973 ); 3974 } 3975 else 3976 av_push( 3977 (AV *)mg->mg_obj, 3978 SvREFCNT_inc_simple_NN(omg->mg_obj) 3979 ); 3980 } 3981 else 3982 av_push((AV *)mg->mg_obj,SvREFCNT_inc_simple_NN(dsv)); 3983 } 3984 else 3985 { 3986 SSize_t i; 3987 sv_magic( 3988 sref, omg ? omg->mg_obj : dsv, PERL_MAGIC_isa, NULL, 0 3989 ); 3990 for (i = 0; i <= AvFILL(sref); ++i) { 3991 SV **elem = av_fetch ((AV*)sref, i, 0); 3992 if (elem) { 3993 sv_magic( 3994 *elem, sref, PERL_MAGIC_isaelem, NULL, i 3995 ); 3996 } 3997 } 3998 mg = mg_find(sref, PERL_MAGIC_isa); 3999 } 4000 /* Since the *ISA assignment could have affected more than 4001 one stash, don't call mro_isa_changed_in directly, but let 4002 magic_clearisa do it for us, as it already has the logic for 4003 dealing with globs vs arrays of globs. */ 4004 assert(mg); 4005 Perl_magic_clearisa(aTHX_ NULL, mg); 4006 } 4007 else if (stype == SVt_PVIO) { 4008 DEBUG_o(Perl_deb(aTHX_ "gv_setref clearing PL_stashcache\n")); 4009 /* It's a cache. It will rebuild itself quite happily. 4010 It's a lot of effort to work out exactly which key (or keys) 4011 might be invalidated by the creation of the this file handle. 4012 */ 4013 hv_clear(PL_stashcache); 4014 } 4015 break; 4016 } 4017 if (!intro) SvREFCNT_dec(dref); 4018 if (SvTAINTED(ssv)) 4019 SvTAINT(dsv); 4020 return; 4021 } 4022 4023 4024 4025 4026 #ifdef PERL_DEBUG_READONLY_COW 4027 # include <sys/mman.h> 4028 4029 # ifndef PERL_MEMORY_DEBUG_HEADER_SIZE 4030 # define PERL_MEMORY_DEBUG_HEADER_SIZE 0 4031 # endif 4032 4033 void 4034 Perl_sv_buf_to_ro(pTHX_ SV *sv) 4035 { 4036 struct perl_memory_debug_header * const header = 4037 (struct perl_memory_debug_header *)(SvPVX(sv)-PERL_MEMORY_DEBUG_HEADER_SIZE); 4038 const MEM_SIZE len = header->size; 4039 PERL_ARGS_ASSERT_SV_BUF_TO_RO; 4040 # ifdef PERL_TRACK_MEMPOOL 4041 if (!header->readonly) header->readonly = 1; 4042 # endif 4043 if (mprotect(header, len, PROT_READ)) 4044 Perl_warn(aTHX_ "mprotect RW for COW string %p %lu failed with %d", 4045 header, len, errno); 4046 } 4047 4048 static void 4049 S_sv_buf_to_rw(pTHX_ SV *sv) 4050 { 4051 struct perl_memory_debug_header * const header = 4052 (struct perl_memory_debug_header *)(SvPVX(sv)-PERL_MEMORY_DEBUG_HEADER_SIZE); 4053 const MEM_SIZE len = header->size; 4054 PERL_ARGS_ASSERT_SV_BUF_TO_RW; 4055 if (mprotect(header, len, PROT_READ|PROT_WRITE)) 4056 Perl_warn(aTHX_ "mprotect for COW string %p %lu failed with %d", 4057 header, len, errno); 4058 # ifdef PERL_TRACK_MEMPOOL 4059 header->readonly = 0; 4060 # endif 4061 } 4062 4063 #else 4064 # define sv_buf_to_ro(sv) NOOP 4065 # define sv_buf_to_rw(sv) NOOP 4066 #endif 4067 4068 void 4069 Perl_sv_setsv_flags(pTHX_ SV *dsv, SV* ssv, const I32 flags) 4070 { 4071 U32 sflags; 4072 int dtype; 4073 svtype stype; 4074 unsigned int both_type; 4075 4076 PERL_ARGS_ASSERT_SV_SETSV_FLAGS; 4077 4078 if (UNLIKELY( ssv == dsv )) 4079 return; 4080 4081 if (UNLIKELY( !ssv )) 4082 ssv = &PL_sv_undef; 4083 4084 stype = SvTYPE(ssv); 4085 dtype = SvTYPE(dsv); 4086 both_type = (stype | dtype); 4087 4088 /* with these values, we can check that both SVs are NULL/IV (and not 4089 * freed) just by testing the or'ed types */ 4090 STATIC_ASSERT_STMT(SVt_NULL == 0); 4091 STATIC_ASSERT_STMT(SVt_IV == 1); 4092 if (both_type <= 1) { 4093 /* both src and dst are UNDEF/IV/RV, so we can do a lot of 4094 * special-casing */ 4095 U32 sflags; 4096 U32 new_dflags; 4097 SV *old_rv = NULL; 4098 4099 /* minimal subset of SV_CHECK_THINKFIRST_COW_DROP(dsv) */ 4100 if (SvREADONLY(dsv)) 4101 Perl_croak_no_modify(); 4102 if (SvROK(dsv)) { 4103 if (SvWEAKREF(dsv)) 4104 sv_unref_flags(dsv, 0); 4105 else 4106 old_rv = SvRV(dsv); 4107 } 4108 4109 assert(!SvGMAGICAL(ssv)); 4110 assert(!SvGMAGICAL(dsv)); 4111 4112 sflags = SvFLAGS(ssv); 4113 if (sflags & (SVf_IOK|SVf_ROK)) { 4114 SET_SVANY_FOR_BODYLESS_IV(dsv); 4115 new_dflags = SVt_IV; 4116 4117 if (sflags & SVf_ROK) { 4118 dsv->sv_u.svu_rv = SvREFCNT_inc(SvRV(ssv)); 4119 new_dflags |= SVf_ROK; 4120 } 4121 else { 4122 /* both src and dst are <= SVt_IV, so sv_any points to the 4123 * head; so access the head directly 4124 */ 4125 assert( &(ssv->sv_u.svu_iv) 4126 == &(((XPVIV*) SvANY(ssv))->xiv_iv)); 4127 assert( &(dsv->sv_u.svu_iv) 4128 == &(((XPVIV*) SvANY(dsv))->xiv_iv)); 4129 dsv->sv_u.svu_iv = ssv->sv_u.svu_iv; 4130 new_dflags |= (SVf_IOK|SVp_IOK|(sflags & SVf_IVisUV)); 4131 } 4132 } 4133 else { 4134 new_dflags = dtype; /* turn off everything except the type */ 4135 } 4136 SvFLAGS(dsv) = new_dflags; 4137 SvREFCNT_dec(old_rv); 4138 4139 return; 4140 } 4141 4142 if (UNLIKELY(both_type == SVTYPEMASK)) { 4143 if (SvIS_FREED(dsv)) { 4144 Perl_croak(aTHX_ "panic: attempt to copy value %" SVf 4145 " to a freed scalar %p", SVfARG(ssv), (void *)dsv); 4146 } 4147 if (SvIS_FREED(ssv)) { 4148 Perl_croak(aTHX_ "panic: attempt to copy freed scalar %p to %p", 4149 (void*)ssv, (void*)dsv); 4150 } 4151 } 4152 4153 4154 4155 SV_CHECK_THINKFIRST_COW_DROP(dsv); 4156 dtype = SvTYPE(dsv); /* THINKFIRST may have changed type */ 4157 4158 /* There's a lot of redundancy below but we're going for speed here */ 4159 4160 switch (stype) { 4161 case SVt_NULL: 4162 undef_sstr: 4163 if (LIKELY( dtype != SVt_PVGV && dtype != SVt_PVLV )) { 4164 (void)SvOK_off(dsv); 4165 return; 4166 } 4167 break; 4168 case SVt_IV: 4169 if (SvIOK(ssv)) { 4170 switch (dtype) { 4171 case SVt_NULL: 4172 /* For performance, we inline promoting to type SVt_IV. */ 4173 /* We're starting from SVt_NULL, so provided that define is 4174 * actual 0, we don't have to unset any SV type flags 4175 * to promote to SVt_IV. */ 4176 STATIC_ASSERT_STMT(SVt_NULL == 0); 4177 SET_SVANY_FOR_BODYLESS_IV(dsv); 4178 SvFLAGS(dsv) |= SVt_IV; 4179 break; 4180 case SVt_NV: 4181 case SVt_PV: 4182 sv_upgrade(dsv, SVt_PVIV); 4183 break; 4184 case SVt_PVGV: 4185 case SVt_PVLV: 4186 goto end_of_first_switch; 4187 } 4188 (void)SvIOK_only(dsv); 4189 SvIV_set(dsv, SvIVX(ssv)); 4190 if (SvIsUV(ssv)) 4191 SvIsUV_on(dsv); 4192 /* SvTAINTED can only be true if the SV has taint magic, which in 4193 turn means that the SV type is PVMG (or greater). This is the 4194 case statement for SVt_IV, so this cannot be true (whatever gcov 4195 may say). */ 4196 assert(!SvTAINTED(ssv)); 4197 return; 4198 } 4199 if (!SvROK(ssv)) 4200 goto undef_sstr; 4201 if (dtype < SVt_PV && dtype != SVt_IV) 4202 sv_upgrade(dsv, SVt_IV); 4203 break; 4204 4205 case SVt_NV: 4206 if (LIKELY( SvNOK(ssv) )) { 4207 switch (dtype) { 4208 case SVt_NULL: 4209 case SVt_IV: 4210 sv_upgrade(dsv, SVt_NV); 4211 break; 4212 case SVt_PV: 4213 case SVt_PVIV: 4214 sv_upgrade(dsv, SVt_PVNV); 4215 break; 4216 case SVt_PVGV: 4217 case SVt_PVLV: 4218 goto end_of_first_switch; 4219 } 4220 SvNV_set(dsv, SvNVX(ssv)); 4221 (void)SvNOK_only(dsv); 4222 /* SvTAINTED can only be true if the SV has taint magic, which in 4223 turn means that the SV type is PVMG (or greater). This is the 4224 case statement for SVt_NV, so this cannot be true (whatever gcov 4225 may say). */ 4226 assert(!SvTAINTED(ssv)); 4227 return; 4228 } 4229 goto undef_sstr; 4230 4231 case SVt_PV: 4232 if (dtype < SVt_PV) 4233 sv_upgrade(dsv, SVt_PV); 4234 break; 4235 case SVt_PVIV: 4236 if (dtype < SVt_PVIV) 4237 sv_upgrade(dsv, SVt_PVIV); 4238 break; 4239 case SVt_PVNV: 4240 if (dtype < SVt_PVNV) 4241 sv_upgrade(dsv, SVt_PVNV); 4242 break; 4243 4244 case SVt_INVLIST: 4245 invlist_clone(ssv, dsv); 4246 break; 4247 default: 4248 { 4249 const char * const type = sv_reftype(ssv,0); 4250 if (PL_op) 4251 /* diag_listed_as: Bizarre copy of %s */ 4252 Perl_croak(aTHX_ "Bizarre copy of %s in %s", type, OP_DESC(PL_op)); 4253 else 4254 Perl_croak(aTHX_ "Bizarre copy of %s", type); 4255 } 4256 NOT_REACHED; /* NOTREACHED */ 4257 4258 case SVt_REGEXP: 4259 upgregexp: 4260 if (dtype < SVt_REGEXP) 4261 sv_upgrade(dsv, SVt_REGEXP); 4262 break; 4263 4264 case SVt_PVLV: 4265 case SVt_PVGV: 4266 case SVt_PVMG: 4267 if (SvGMAGICAL(ssv) && (flags & SV_GMAGIC)) { 4268 mg_get(ssv); 4269 if (SvTYPE(ssv) != stype) 4270 stype = SvTYPE(ssv); 4271 } 4272 if (isGV_with_GP(ssv) && dtype <= SVt_PVLV) { 4273 glob_assign_glob(dsv, ssv, dtype); 4274 return; 4275 } 4276 if (stype == SVt_PVLV) 4277 { 4278 if (isREGEXP(ssv)) goto upgregexp; 4279 SvUPGRADE(dsv, SVt_PVNV); 4280 } 4281 else 4282 SvUPGRADE(dsv, (svtype)stype); 4283 } 4284 end_of_first_switch: 4285 4286 /* dsv may have been upgraded. */ 4287 dtype = SvTYPE(dsv); 4288 sflags = SvFLAGS(ssv); 4289 4290 if (UNLIKELY( dtype == SVt_PVCV )) { 4291 /* Assigning to a subroutine sets the prototype. */ 4292 if (SvOK(ssv)) { 4293 STRLEN len; 4294 const char *const ptr = SvPV_const(ssv, len); 4295 4296 SvGROW(dsv, len + 1); 4297 Copy(ptr, SvPVX(dsv), len + 1, char); 4298 SvCUR_set(dsv, len); 4299 SvPOK_only(dsv); 4300 SvFLAGS(dsv) |= sflags & SVf_UTF8; 4301 CvAUTOLOAD_off(dsv); 4302 } else { 4303 SvOK_off(dsv); 4304 } 4305 } 4306 else if (UNLIKELY(dtype == SVt_PVAV || dtype == SVt_PVHV 4307 || dtype == SVt_PVFM)) 4308 { 4309 const char * const type = sv_reftype(dsv,0); 4310 if (PL_op) 4311 /* diag_listed_as: Cannot copy to %s */ 4312 Perl_croak(aTHX_ "Cannot copy to %s in %s", type, OP_DESC(PL_op)); 4313 else 4314 Perl_croak(aTHX_ "Cannot copy to %s", type); 4315 } else if (sflags & SVf_ROK) { 4316 if (isGV_with_GP(dsv) 4317 && SvTYPE(SvRV(ssv)) == SVt_PVGV && isGV_with_GP(SvRV(ssv))) { 4318 ssv = SvRV(ssv); 4319 if (ssv == dsv) { 4320 if (GvIMPORTED(dsv) != GVf_IMPORTED 4321 && CopSTASH_ne(PL_curcop, GvSTASH(dsv))) 4322 { 4323 GvIMPORTED_on(dsv); 4324 } 4325 GvMULTI_on(dsv); 4326 return; 4327 } 4328 glob_assign_glob(dsv, ssv, dtype); 4329 return; 4330 } 4331 4332 if (dtype >= SVt_PV) { 4333 if (isGV_with_GP(dsv)) { 4334 gv_setref(dsv, ssv); 4335 return; 4336 } 4337 if (SvPVX_const(dsv)) { 4338 SvPV_free(dsv); 4339 SvLEN_set(dsv, 0); 4340 SvCUR_set(dsv, 0); 4341 } 4342 } 4343 (void)SvOK_off(dsv); 4344 SvRV_set(dsv, SvREFCNT_inc(SvRV(ssv))); 4345 SvFLAGS(dsv) |= sflags & SVf_ROK; 4346 assert(!(sflags & SVp_NOK)); 4347 assert(!(sflags & SVp_IOK)); 4348 assert(!(sflags & SVf_NOK)); 4349 assert(!(sflags & SVf_IOK)); 4350 } 4351 else if (isGV_with_GP(dsv)) { 4352 if (!(sflags & SVf_OK)) { 4353 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 4354 "Undefined value assigned to typeglob"); 4355 } 4356 else { 4357 GV *gv = gv_fetchsv_nomg(ssv, GV_ADD, SVt_PVGV); 4358 if (dsv != (const SV *)gv) { 4359 const char * const name = GvNAME((const GV *)dsv); 4360 const STRLEN len = GvNAMELEN(dsv); 4361 HV *old_stash = NULL; 4362 bool reset_isa = FALSE; 4363 if ((len > 1 && name[len-2] == ':' && name[len-1] == ':') 4364 || (len == 1 && name[0] == ':')) { 4365 /* Set aside the old stash, so we can reset isa caches 4366 on its subclasses. */ 4367 if((old_stash = GvHV(dsv))) { 4368 /* Make sure we do not lose it early. */ 4369 SvREFCNT_inc_simple_void_NN( 4370 sv_2mortal((SV *)old_stash) 4371 ); 4372 } 4373 reset_isa = TRUE; 4374 } 4375 4376 if (GvGP(dsv)) { 4377 SvREFCNT_inc_simple_void_NN(sv_2mortal(dsv)); 4378 gp_free(MUTABLE_GV(dsv)); 4379 } 4380 GvGP_set(dsv, gp_ref(GvGP(gv))); 4381 4382 if (reset_isa) { 4383 HV * const stash = GvHV(dsv); 4384 if( 4385 old_stash ? (HV *)HvENAME_get(old_stash) : stash 4386 ) 4387 mro_package_moved( 4388 stash, old_stash, 4389 (GV *)dsv, 0 4390 ); 4391 } 4392 } 4393 } 4394 } 4395 else if ((dtype == SVt_REGEXP || dtype == SVt_PVLV) 4396 && (stype == SVt_REGEXP || isREGEXP(ssv))) { 4397 reg_temp_copy((REGEXP*)dsv, (REGEXP*)ssv); 4398 } 4399 else if (sflags & SVp_POK) { 4400 const STRLEN cur = SvCUR(ssv); 4401 const STRLEN len = SvLEN(ssv); 4402 4403 /* 4404 * We have three basic ways to copy the string: 4405 * 4406 * 1. Swipe 4407 * 2. Copy-on-write 4408 * 3. Actual copy 4409 * 4410 * Which we choose is based on various factors. The following 4411 * things are listed in order of speed, fastest to slowest: 4412 * - Swipe 4413 * - Copying a short string 4414 * - Copy-on-write bookkeeping 4415 * - malloc 4416 * - Copying a long string 4417 * 4418 * We swipe the string (steal the string buffer) if the SV on the 4419 * rhs is about to be freed anyway (TEMP and refcnt==1). This is a 4420 * big win on long strings. It should be a win on short strings if 4421 * SvPVX_const(dsv) has to be allocated. If not, it should not 4422 * slow things down, as SvPVX_const(ssv) would have been freed 4423 * soon anyway. 4424 * 4425 * We also steal the buffer from a PADTMP (operator target) if it 4426 * is ‘long enough’. For short strings, a swipe does not help 4427 * here, as it causes more malloc calls the next time the target 4428 * is used. Benchmarks show that even if SvPVX_const(dsv) has to 4429 * be allocated it is still not worth swiping PADTMPs for short 4430 * strings, as the savings here are small. 4431 * 4432 * If swiping is not an option, then we see whether it is 4433 * worth using copy-on-write. If the lhs already has a buf- 4434 * fer big enough and the string is short, we skip it and fall back 4435 * to method 3, since memcpy is faster for short strings than the 4436 * later bookkeeping overhead that copy-on-write entails. 4437 4438 * If the rhs is not a copy-on-write string yet, then we also 4439 * consider whether the buffer is too large relative to the string 4440 * it holds. Some operations such as readline allocate a large 4441 * buffer in the expectation of reusing it. But turning such into 4442 * a COW buffer is counter-productive because it increases memory 4443 * usage by making readline allocate a new large buffer the sec- 4444 * ond time round. So, if the buffer is too large, again, we use 4445 * method 3 (copy). 4446 * 4447 * Finally, if there is no buffer on the left, or the buffer is too 4448 * small, then we use copy-on-write and make both SVs share the 4449 * string buffer. 4450 * 4451 */ 4452 4453 /* Whichever path we take through the next code, we want this true, 4454 and doing it now facilitates the COW check. */ 4455 (void)SvPOK_only(dsv); 4456 4457 if ( 4458 ( /* Either ... */ 4459 /* slated for free anyway (and not COW)? */ 4460 (sflags & (SVs_TEMP|SVf_IsCOW)) == SVs_TEMP 4461 /* or a swipable TARG */ 4462 || ((sflags & 4463 (SVs_PADTMP|SVf_READONLY|SVf_PROTECT|SVf_IsCOW)) 4464 == SVs_PADTMP 4465 /* whose buffer is worth stealing */ 4466 && CHECK_COWBUF_THRESHOLD(cur,len) 4467 ) 4468 ) && 4469 !(sflags & SVf_OOK) && /* and not involved in OOK hack? */ 4470 (!(flags & SV_NOSTEAL)) && 4471 /* and we're allowed to steal temps */ 4472 SvREFCNT(ssv) == 1 && /* and no other references to it? */ 4473 len) /* and really is a string */ 4474 { /* Passes the swipe test. */ 4475 if (SvPVX_const(dsv)) /* we know that dtype >= SVt_PV */ 4476 SvPV_free(dsv); 4477 SvPV_set(dsv, SvPVX_mutable(ssv)); 4478 SvLEN_set(dsv, SvLEN(ssv)); 4479 SvCUR_set(dsv, SvCUR(ssv)); 4480 4481 SvTEMP_off(dsv); 4482 (void)SvOK_off(ssv); /* NOTE: nukes most SvFLAGS on ssv */ 4483 SvPV_set(ssv, NULL); 4484 SvLEN_set(ssv, 0); 4485 SvCUR_set(ssv, 0); 4486 SvTEMP_off(ssv); 4487 } 4488 /* We must check for SvIsCOW_static() even without 4489 * SV_COW_SHARED_HASH_KEYS being set or else we'll break SvIsBOOL() 4490 */ 4491 else if (SvIsCOW_static(ssv)) { 4492 if (SvPVX_const(dsv)) { /* we know that dtype >= SVt_PV */ 4493 SvPV_free(dsv); 4494 } 4495 SvPV_set(dsv, SvPVX(ssv)); 4496 SvLEN_set(dsv, 0); 4497 SvCUR_set(dsv, cur); 4498 SvFLAGS(dsv) |= (SVf_IsCOW|SVppv_STATIC); 4499 } 4500 else if (flags & SV_COW_SHARED_HASH_KEYS 4501 && 4502 #ifdef PERL_COPY_ON_WRITE 4503 (sflags & SVf_IsCOW 4504 ? (!len || 4505 ( (CHECK_COWBUF_THRESHOLD(cur,len) || SvLEN(dsv) < cur+1) 4506 /* If this is a regular (non-hek) COW, only so 4507 many COW "copies" are possible. */ 4508 && CowREFCNT(ssv) != SV_COW_REFCNT_MAX )) 4509 : ( (sflags & CAN_COW_MASK) == CAN_COW_FLAGS 4510 && !(SvFLAGS(dsv) & SVf_BREAK) 4511 && CHECK_COW_THRESHOLD(cur,len) && cur+1 < len 4512 && (CHECK_COWBUF_THRESHOLD(cur,len) || SvLEN(dsv) < cur+1) 4513 )) 4514 #else 4515 sflags & SVf_IsCOW 4516 && !(SvFLAGS(dsv) & SVf_BREAK) 4517 #endif 4518 ) { 4519 /* Either it's a shared hash key, or it's suitable for 4520 copy-on-write. */ 4521 #ifdef DEBUGGING 4522 if (DEBUG_C_TEST) { 4523 PerlIO_printf(Perl_debug_log, "Copy on write: ssv --> dsv\n"); 4524 sv_dump(ssv); 4525 sv_dump(dsv); 4526 } 4527 #endif 4528 #ifdef PERL_ANY_COW 4529 if (!(sflags & SVf_IsCOW)) { 4530 SvIsCOW_on(ssv); 4531 CowREFCNT(ssv) = 0; 4532 } 4533 #endif 4534 if (SvPVX_const(dsv)) { /* we know that dtype >= SVt_PV */ 4535 SvPV_free(dsv); 4536 } 4537 4538 #ifdef PERL_ANY_COW 4539 if (len) { 4540 if (sflags & SVf_IsCOW) { 4541 sv_buf_to_rw(ssv); 4542 } 4543 CowREFCNT(ssv)++; 4544 SvPV_set(dsv, SvPVX_mutable(ssv)); 4545 sv_buf_to_ro(ssv); 4546 } else 4547 #endif 4548 { 4549 /* SvIsCOW_shared_hash */ 4550 DEBUG_C(PerlIO_printf(Perl_debug_log, 4551 "Copy on write: Sharing hash\n")); 4552 4553 assert (SvTYPE(dsv) >= SVt_PV); 4554 SvPV_set(dsv, 4555 HEK_KEY(share_hek_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(ssv))))); 4556 } 4557 SvLEN_set(dsv, len); 4558 SvCUR_set(dsv, cur); 4559 SvIsCOW_on(dsv); 4560 } else { 4561 /* Failed the swipe test, and we cannot do copy-on-write either. 4562 Have to copy the string. */ 4563 SvGROW(dsv, cur + 1); /* inlined from sv_setpvn */ 4564 Move(SvPVX_const(ssv),SvPVX(dsv),cur,char); 4565 SvCUR_set(dsv, cur); 4566 *SvEND(dsv) = '\0'; 4567 } 4568 if (sflags & SVp_NOK) { 4569 SvNV_set(dsv, SvNVX(ssv)); 4570 if ((sflags & SVf_NOK) && !(sflags & SVf_POK)) { 4571 /* Source was SVf_NOK|SVp_NOK|SVp_POK but not SVf_POK, meaning 4572 a value set as floating point and later stringified, where 4573 the value happens to be one of the few that we know aren't 4574 affected by the numeric locale, hence we can cache the 4575 stringification. Currently that's +Inf, -Inf and NaN, but 4576 conceivably we might extend this to -9 .. +9 (excluding -0). 4577 So mark destination the same: */ 4578 SvFLAGS(dsv) &= ~SVf_POK; 4579 } 4580 } 4581 if (sflags & SVp_IOK) { 4582 SvIV_set(dsv, SvIVX(ssv)); 4583 if (sflags & SVf_IVisUV) 4584 SvIsUV_on(dsv); 4585 if ((sflags & SVf_IOK) && !(sflags & SVf_POK)) { 4586 /* Source was SVf_IOK|SVp_IOK|SVp_POK but not SVf_POK, meaning 4587 a value set as an integer and later stringified. So mark 4588 destination the same: */ 4589 SvFLAGS(dsv) &= ~SVf_POK; 4590 } 4591 } 4592 SvFLAGS(dsv) |= sflags & (SVf_IOK|SVp_IOK|SVf_NOK|SVp_NOK|SVf_UTF8); 4593 { 4594 const MAGIC * const smg = SvVSTRING_mg(ssv); 4595 if (smg) { 4596 sv_magic(dsv, NULL, PERL_MAGIC_vstring, 4597 smg->mg_ptr, smg->mg_len); 4598 SvRMAGICAL_on(dsv); 4599 } 4600 } 4601 } 4602 else if (sflags & (SVp_IOK|SVp_NOK)) { 4603 (void)SvOK_off(dsv); 4604 SvFLAGS(dsv) |= sflags & (SVf_IOK|SVp_IOK|SVf_IVisUV|SVf_NOK|SVp_NOK); 4605 if (sflags & SVp_IOK) { 4606 /* XXXX Do we want to set IsUV for IV(ROK)? Be extra safe... */ 4607 SvIV_set(dsv, SvIVX(ssv)); 4608 } 4609 if (sflags & SVp_NOK) { 4610 SvNV_set(dsv, SvNVX(ssv)); 4611 } 4612 } 4613 else { 4614 if (isGV_with_GP(ssv)) { 4615 gv_efullname3(dsv, MUTABLE_GV(ssv), "*"); 4616 } 4617 else 4618 (void)SvOK_off(dsv); 4619 } 4620 if (SvTAINTED(ssv)) 4621 SvTAINT(dsv); 4622 } 4623 4624 4625 /* 4626 =for apidoc sv_set_undef 4627 4628 Equivalent to C<sv_setsv(sv, &PL_sv_undef)>, but more efficient. 4629 Doesn't handle set magic. 4630 4631 The perl equivalent is C<$sv = undef;>. Note that it doesn't free any string 4632 buffer, unlike C<undef $sv>. 4633 4634 Introduced in perl 5.25.12. 4635 4636 =cut 4637 */ 4638 4639 void 4640 Perl_sv_set_undef(pTHX_ SV *sv) 4641 { 4642 U32 type = SvTYPE(sv); 4643 4644 PERL_ARGS_ASSERT_SV_SET_UNDEF; 4645 4646 /* shortcut, NULL, IV, RV */ 4647 4648 if (type <= SVt_IV) { 4649 assert(!SvGMAGICAL(sv)); 4650 if (SvREADONLY(sv)) { 4651 /* does undeffing PL_sv_undef count as modifying a read-only 4652 * variable? Some XS code does this */ 4653 if (sv == &PL_sv_undef) 4654 return; 4655 Perl_croak_no_modify(); 4656 } 4657 4658 if (SvROK(sv)) { 4659 if (SvWEAKREF(sv)) 4660 sv_unref_flags(sv, 0); 4661 else { 4662 SV *rv = SvRV(sv); 4663 SvFLAGS(sv) = type; /* quickly turn off all flags */ 4664 SvREFCNT_dec_NN(rv); 4665 return; 4666 } 4667 } 4668 SvFLAGS(sv) = type; /* quickly turn off all flags */ 4669 return; 4670 } 4671 4672 if (SvIS_FREED(sv)) 4673 Perl_croak(aTHX_ "panic: attempt to undefine a freed scalar %p", 4674 (void *)sv); 4675 4676 SV_CHECK_THINKFIRST_COW_DROP(sv); 4677 4678 if (isGV_with_GP(sv)) 4679 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 4680 "Undefined value assigned to typeglob"); 4681 else 4682 SvOK_off(sv); 4683 } 4684 4685 void 4686 Perl_sv_setsv_mg(pTHX_ SV *const dsv, SV *const ssv) 4687 { 4688 PERL_ARGS_ASSERT_SV_SETSV_MG; 4689 4690 sv_setsv(dsv,ssv); 4691 SvSETMAGIC(dsv); 4692 } 4693 4694 #ifdef PERL_ANY_COW 4695 # define SVt_COW SVt_PV 4696 SV * 4697 Perl_sv_setsv_cow(pTHX_ SV *dsv, SV *ssv) 4698 { 4699 STRLEN cur = SvCUR(ssv); 4700 STRLEN len = SvLEN(ssv); 4701 char *new_pv; 4702 U32 new_flags = (SVt_COW|SVf_POK|SVp_POK|SVf_IsCOW); 4703 #if defined(PERL_DEBUG_READONLY_COW) && defined(PERL_COPY_ON_WRITE) 4704 const bool already = cBOOL(SvIsCOW(ssv)); 4705 #endif 4706 4707 PERL_ARGS_ASSERT_SV_SETSV_COW; 4708 #ifdef DEBUGGING 4709 if (DEBUG_C_TEST) { 4710 PerlIO_printf(Perl_debug_log, "Fast copy on write: %p -> %p\n", 4711 (void*)ssv, (void*)dsv); 4712 sv_dump(ssv); 4713 if (dsv) 4714 sv_dump(dsv); 4715 } 4716 #endif 4717 if (dsv) { 4718 if (SvTHINKFIRST(dsv)) 4719 sv_force_normal_flags(dsv, SV_COW_DROP_PV); 4720 else if (SvPVX_const(dsv)) 4721 Safefree(SvPVX_mutable(dsv)); 4722 } 4723 else 4724 new_SV(dsv); 4725 SvUPGRADE(dsv, SVt_COW); 4726 4727 assert (SvPOK(ssv)); 4728 assert (SvPOKp(ssv)); 4729 4730 if (SvIsCOW(ssv)) { 4731 if (SvIsCOW_shared_hash(ssv)) { 4732 /* source is a COW shared hash key. */ 4733 DEBUG_C(PerlIO_printf(Perl_debug_log, 4734 "Fast copy on write: Sharing hash\n")); 4735 new_pv = HEK_KEY(share_hek_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(ssv)))); 4736 goto common_exit; 4737 } 4738 else if (SvIsCOW_static(ssv)) { 4739 /* source is static constant; preserve this */ 4740 new_pv = SvPVX(ssv); 4741 new_flags |= SVppv_STATIC; 4742 goto common_exit; 4743 } 4744 assert(SvCUR(ssv)+1 < SvLEN(ssv)); 4745 assert(CowREFCNT(ssv) < SV_COW_REFCNT_MAX); 4746 } else { 4747 assert ((SvFLAGS(ssv) & CAN_COW_MASK) == CAN_COW_FLAGS); 4748 SvUPGRADE(ssv, SVt_COW); 4749 SvIsCOW_on(ssv); 4750 DEBUG_C(PerlIO_printf(Perl_debug_log, 4751 "Fast copy on write: Converting ssv to COW\n")); 4752 CowREFCNT(ssv) = 0; 4753 } 4754 # ifdef PERL_DEBUG_READONLY_COW 4755 if (already) sv_buf_to_rw(ssv); 4756 # endif 4757 CowREFCNT(ssv)++; 4758 new_pv = SvPVX_mutable(ssv); 4759 sv_buf_to_ro(ssv); 4760 4761 common_exit: 4762 SvPV_set(dsv, new_pv); 4763 SvFLAGS(dsv) = new_flags; 4764 if (SvUTF8(ssv)) 4765 SvUTF8_on(dsv); 4766 SvLEN_set(dsv, len); 4767 SvCUR_set(dsv, cur); 4768 #ifdef DEBUGGING 4769 if (DEBUG_C_TEST) 4770 sv_dump(dsv); 4771 #endif 4772 return dsv; 4773 } 4774 #endif 4775 4776 /* 4777 =for apidoc sv_setpv_bufsize 4778 4779 Sets the SV to be a string of cur bytes length, with at least 4780 len bytes available. Ensures that there is a null byte at SvEND. 4781 Returns a char * pointer to the SvPV buffer. 4782 4783 =cut 4784 */ 4785 4786 char * 4787 Perl_sv_setpv_bufsize(pTHX_ SV *const sv, const STRLEN cur, const STRLEN len) 4788 { 4789 char *pv; 4790 4791 PERL_ARGS_ASSERT_SV_SETPV_BUFSIZE; 4792 4793 SV_CHECK_THINKFIRST_COW_DROP(sv); 4794 SvUPGRADE(sv, SVt_PV); 4795 pv = SvGROW(sv, len + 1); 4796 SvCUR_set(sv, cur); 4797 *(SvEND(sv))= '\0'; 4798 (void)SvPOK_only_UTF8(sv); /* validate pointer */ 4799 4800 SvTAINT(sv); 4801 if (SvTYPE(sv) == SVt_PVCV) CvAUTOLOAD_off(sv); 4802 return pv; 4803 } 4804 4805 /* 4806 =for apidoc sv_setpv 4807 =for apidoc_item sv_setpv_mg 4808 =for apidoc_item sv_setpvn 4809 =for apidoc_item sv_setpvn_fresh 4810 =for apidoc_item sv_setpvn_mg 4811 =for apidoc_item |void|sv_setpvs|SV* sv|"literal string" 4812 =for apidoc_item |void|sv_setpvs_mg|SV* sv|"literal string" 4813 4814 These copy a string into the SV C<sv>, making sure it is C<L</SvPOK_only>>. 4815 4816 In the C<pvs> forms, the string must be a C literal string, enclosed in double 4817 quotes. 4818 4819 In the C<pvn> forms, the first byte of the string is pointed to by C<ptr>, and 4820 C<len> indicates the number of bytes to be copied, potentially including 4821 embedded C<NUL> characters. 4822 4823 In the plain C<pv> forms, C<ptr> points to a NUL-terminated C string. That is, 4824 it points to the first byte of the string, and the copy proceeds up through the 4825 first enountered C<NUL> byte. 4826 4827 In the forms that take a C<ptr> argument, if it is NULL, the SV will become 4828 undefined. 4829 4830 The UTF-8 flag is not changed by these functions. A terminating NUL byte is 4831 guaranteed in the result. 4832 4833 The C<_mg> forms handle 'set' magic; the other forms skip all magic. 4834 4835 C<sv_setpvn_fresh> is a cut-down alternative to C<sv_setpvn>, intended ONLY 4836 to be used with a fresh sv that has been upgraded to a SVt_PV, SVt_PVIV, 4837 SVt_PVNV, or SVt_PVMG. 4838 4839 =cut 4840 */ 4841 4842 void 4843 Perl_sv_setpvn(pTHX_ SV *const sv, const char *const ptr, const STRLEN len) 4844 { 4845 char *dptr; 4846 4847 PERL_ARGS_ASSERT_SV_SETPVN; 4848 4849 SV_CHECK_THINKFIRST_COW_DROP(sv); 4850 if (isGV_with_GP(sv)) 4851 Perl_croak_no_modify(); 4852 if (!ptr) { 4853 (void)SvOK_off(sv); 4854 return; 4855 } 4856 else { 4857 /* len is STRLEN which is unsigned, need to copy to signed */ 4858 const IV iv = len; 4859 if (iv < 0) 4860 Perl_croak(aTHX_ "panic: sv_setpvn called with negative strlen %" 4861 IVdf, iv); 4862 } 4863 SvUPGRADE(sv, SVt_PV); 4864 4865 dptr = SvGROW(sv, len + 1); 4866 Move(ptr,dptr,len,char); 4867 dptr[len] = '\0'; 4868 SvCUR_set(sv, len); 4869 (void)SvPOK_only_UTF8(sv); /* validate pointer */ 4870 SvTAINT(sv); 4871 if (SvTYPE(sv) == SVt_PVCV) CvAUTOLOAD_off(sv); 4872 } 4873 4874 void 4875 Perl_sv_setpvn_mg(pTHX_ SV *const sv, const char *const ptr, const STRLEN len) 4876 { 4877 PERL_ARGS_ASSERT_SV_SETPVN_MG; 4878 4879 sv_setpvn(sv,ptr,len); 4880 SvSETMAGIC(sv); 4881 } 4882 4883 void 4884 Perl_sv_setpvn_fresh(pTHX_ SV *const sv, const char *const ptr, const STRLEN len) 4885 { 4886 char *dptr; 4887 4888 PERL_ARGS_ASSERT_SV_SETPVN_FRESH; 4889 assert(SvTYPE(sv) >= SVt_PV && SvTYPE(sv) <= SVt_PVMG); 4890 assert(!SvTHINKFIRST(sv)); 4891 assert(!isGV_with_GP(sv)); 4892 4893 if (ptr) { 4894 const IV iv = len; 4895 /* len is STRLEN which is unsigned, need to copy to signed */ 4896 if (iv < 0) 4897 Perl_croak(aTHX_ "panic: sv_setpvn_fresh called with negative strlen %" 4898 IVdf, iv); 4899 4900 dptr = sv_grow_fresh(sv, len + 1); 4901 Move(ptr,dptr,len,char); 4902 dptr[len] = '\0'; 4903 SvCUR_set(sv, len); 4904 SvPOK_on(sv); 4905 SvTAINT(sv); 4906 } 4907 } 4908 4909 void 4910 Perl_sv_setpv(pTHX_ SV *const sv, const char *const ptr) 4911 { 4912 STRLEN len; 4913 4914 PERL_ARGS_ASSERT_SV_SETPV; 4915 4916 SV_CHECK_THINKFIRST_COW_DROP(sv); 4917 if (!ptr) { 4918 (void)SvOK_off(sv); 4919 return; 4920 } 4921 len = strlen(ptr); 4922 SvUPGRADE(sv, SVt_PV); 4923 4924 SvGROW(sv, len + 1); 4925 Move(ptr,SvPVX(sv),len+1,char); 4926 SvCUR_set(sv, len); 4927 (void)SvPOK_only_UTF8(sv); /* validate pointer */ 4928 SvTAINT(sv); 4929 if (SvTYPE(sv) == SVt_PVCV) CvAUTOLOAD_off(sv); 4930 } 4931 4932 void 4933 Perl_sv_setpv_mg(pTHX_ SV *const sv, const char *const ptr) 4934 { 4935 PERL_ARGS_ASSERT_SV_SETPV_MG; 4936 4937 sv_setpv(sv,ptr); 4938 SvSETMAGIC(sv); 4939 } 4940 4941 void 4942 Perl_sv_sethek(pTHX_ SV *const sv, const HEK *const hek) 4943 { 4944 PERL_ARGS_ASSERT_SV_SETHEK; 4945 4946 if (!hek) { 4947 return; 4948 } 4949 4950 if (HEK_LEN(hek) == HEf_SVKEY) { 4951 sv_setsv(sv, *(SV**)HEK_KEY(hek)); 4952 return; 4953 } else { 4954 const int flags = HEK_FLAGS(hek); 4955 if (flags & HVhek_WASUTF8) { 4956 STRLEN utf8_len = HEK_LEN(hek); 4957 char *as_utf8 = (char *)bytes_to_utf8((U8*)HEK_KEY(hek), &utf8_len); 4958 sv_usepvn_flags(sv, as_utf8, utf8_len, SV_HAS_TRAILING_NUL); 4959 SvUTF8_on(sv); 4960 return; 4961 } else if (flags & HVhek_NOTSHARED) { 4962 sv_setpvn(sv, HEK_KEY(hek), HEK_LEN(hek)); 4963 if (HEK_UTF8(hek)) 4964 SvUTF8_on(sv); 4965 else SvUTF8_off(sv); 4966 return; 4967 } 4968 { 4969 SV_CHECK_THINKFIRST_COW_DROP(sv); 4970 SvUPGRADE(sv, SVt_PV); 4971 SvPV_free(sv); 4972 SvPV_set(sv,(char *)HEK_KEY(share_hek_hek(hek))); 4973 SvCUR_set(sv, HEK_LEN(hek)); 4974 SvLEN_set(sv, 0); 4975 SvIsCOW_on(sv); 4976 SvPOK_on(sv); 4977 if (HEK_UTF8(hek)) 4978 SvUTF8_on(sv); 4979 else SvUTF8_off(sv); 4980 return; 4981 } 4982 } 4983 } 4984 4985 4986 /* 4987 =for apidoc sv_usepvn 4988 =for apidoc_item sv_usepvn_mg 4989 =for apidoc_item sv_usepvn_flags 4990 4991 These tell an SV to use C<ptr> for its string value. Normally SVs have 4992 their string stored inside the SV, but these tell the SV to use an 4993 external string instead. 4994 4995 C<ptr> should point to memory that was allocated 4996 by L</C<Newx>>. It must be 4997 the start of a C<Newx>-ed block of memory, and not a pointer to the 4998 middle of it (beware of L<C<OOK>|perlguts/Offsets> and copy-on-write), 4999 and not be from a non-C<Newx> memory allocator like C<malloc>. The 5000 string length, C<len>, must be supplied. By default this function 5001 will L</C<Renew>> (i.e. realloc, move) the memory pointed to by C<ptr>, 5002 so that the pointer should not be freed or used by the programmer after giving 5003 it to C<sv_usepvn>, and neither should any pointers from "behind" that pointer 5004 (I<e.g.>, S<C<ptr> + 1>) be used. 5005 5006 In the C<sv_usepvn_flags> form, if S<C<flags & SV_SMAGIC>> is true, 5007 C<SvSETMAGIC> is called before returning. 5008 And if S<C<flags & SV_HAS_TRAILING_NUL>> is true, then C<ptr[len]> must be 5009 C<NUL>, and the realloc will be skipped (I<i.e.>, the buffer is actually at 5010 least 1 byte longer than C<len>, and already meets the requirements for storing 5011 in C<SvPVX>). 5012 5013 C<sv_usepvn> is merely C<sv_usepvn_flags> with C<flags> set to 0, so 'set' 5014 magic is skipped. 5015 5016 C<sv_usepvn_mg> is merely C<sv_usepvn_flags> with C<flags> set to C<SV_SMAGIC>, 5017 so 'set' magic is performed. 5018 5019 =for apidoc Amnh||SV_SMAGIC 5020 =for apidoc Amnh||SV_HAS_TRAILING_NUL 5021 5022 =cut 5023 */ 5024 5025 void 5026 Perl_sv_usepvn_flags(pTHX_ SV *const sv, char *ptr, const STRLEN len, const U32 flags) 5027 { 5028 STRLEN allocate; 5029 5030 PERL_ARGS_ASSERT_SV_USEPVN_FLAGS; 5031 5032 SV_CHECK_THINKFIRST_COW_DROP(sv); 5033 SvUPGRADE(sv, SVt_PV); 5034 if (!ptr) { 5035 (void)SvOK_off(sv); 5036 if (flags & SV_SMAGIC) 5037 SvSETMAGIC(sv); 5038 return; 5039 } 5040 if (SvPVX_const(sv)) 5041 SvPV_free(sv); 5042 5043 #ifdef DEBUGGING 5044 if (flags & SV_HAS_TRAILING_NUL) 5045 assert(ptr[len] == '\0'); 5046 #endif 5047 5048 allocate = (flags & SV_HAS_TRAILING_NUL) 5049 ? len + 1 : 5050 #ifdef Perl_safesysmalloc_size 5051 len + 1; 5052 #else 5053 PERL_STRLEN_ROUNDUP(len + 1); 5054 #endif 5055 if (flags & SV_HAS_TRAILING_NUL) { 5056 /* It's long enough - do nothing. 5057 Specifically Perl_newCONSTSUB is relying on this. */ 5058 } else { 5059 #ifdef DEBUGGING 5060 /* Force a move to shake out bugs in callers. */ 5061 char *new_ptr = (char*)safemalloc(allocate); 5062 Copy(ptr, new_ptr, len, char); 5063 PoisonFree(ptr,len,char); 5064 Safefree(ptr); 5065 ptr = new_ptr; 5066 #else 5067 ptr = (char*) saferealloc (ptr, allocate); 5068 #endif 5069 } 5070 #ifdef Perl_safesysmalloc_size 5071 SvLEN_set(sv, Perl_safesysmalloc_size(ptr)); 5072 #else 5073 SvLEN_set(sv, allocate); 5074 #endif 5075 SvCUR_set(sv, len); 5076 SvPV_set(sv, ptr); 5077 if (!(flags & SV_HAS_TRAILING_NUL)) { 5078 ptr[len] = '\0'; 5079 } 5080 (void)SvPOK_only_UTF8(sv); /* validate pointer */ 5081 SvTAINT(sv); 5082 if (flags & SV_SMAGIC) 5083 SvSETMAGIC(sv); 5084 } 5085 5086 5087 static void 5088 S_sv_uncow(pTHX_ SV * const sv, const U32 flags) 5089 { 5090 assert(SvIsCOW(sv)); 5091 { 5092 #ifdef PERL_ANY_COW 5093 const char * const pvx = SvPVX_const(sv); 5094 const STRLEN len = SvLEN(sv); 5095 const STRLEN cur = SvCUR(sv); 5096 const bool was_shared_hek = SvIsCOW_shared_hash(sv); 5097 5098 #ifdef DEBUGGING 5099 if (DEBUG_C_TEST) { 5100 PerlIO_printf(Perl_debug_log, 5101 "Copy on write: Force normal %ld\n", 5102 (long) flags); 5103 sv_dump(sv); 5104 } 5105 #endif 5106 SvIsCOW_off(sv); 5107 # ifdef PERL_COPY_ON_WRITE 5108 if (len) { 5109 /* Must do this first, since the CowREFCNT uses SvPVX and 5110 we need to write to CowREFCNT, or de-RO the whole buffer if we are 5111 the only owner left of the buffer. */ 5112 sv_buf_to_rw(sv); /* NOOP if RO-ing not supported */ 5113 { 5114 U8 cowrefcnt = CowREFCNT(sv); 5115 if(cowrefcnt != 0) { 5116 cowrefcnt--; 5117 CowREFCNT(sv) = cowrefcnt; 5118 sv_buf_to_ro(sv); 5119 goto copy_over; 5120 } 5121 } 5122 /* Else we are the only owner of the buffer. */ 5123 } 5124 else 5125 # endif 5126 { 5127 /* This SV doesn't own the buffer, so need to Newx() a new one: */ 5128 copy_over: 5129 SvPV_set(sv, NULL); 5130 SvCUR_set(sv, 0); 5131 SvLEN_set(sv, 0); 5132 if (flags & SV_COW_DROP_PV) { 5133 /* OK, so we don't need to copy our buffer. */ 5134 SvPOK_off(sv); 5135 } else { 5136 SvGROW(sv, cur + 1); 5137 Move(pvx,SvPVX(sv),cur,char); 5138 SvCUR_set(sv, cur); 5139 *SvEND(sv) = '\0'; 5140 } 5141 if (was_shared_hek) { 5142 unshare_hek(SvSHARED_HEK_FROM_PV(pvx)); 5143 } 5144 #ifdef DEBUGGING 5145 if (DEBUG_C_TEST) 5146 sv_dump(sv); 5147 #endif 5148 } 5149 #else 5150 const char * const pvx = SvPVX_const(sv); 5151 const STRLEN len = SvCUR(sv); 5152 SvIsCOW_off(sv); 5153 SvPV_set(sv, NULL); 5154 SvLEN_set(sv, 0); 5155 if (flags & SV_COW_DROP_PV) { 5156 /* OK, so we don't need to copy our buffer. */ 5157 SvPOK_off(sv); 5158 } else { 5159 SvGROW(sv, len + 1); 5160 Move(pvx,SvPVX(sv),len,char); 5161 *SvEND(sv) = '\0'; 5162 } 5163 unshare_hek(SvSHARED_HEK_FROM_PV(pvx)); 5164 #endif 5165 } 5166 } 5167 5168 5169 /* 5170 =for apidoc sv_force_normal_flags 5171 5172 Undo various types of fakery on an SV, where fakery means 5173 "more than" a string: if the PV is a shared string, make 5174 a private copy; if we're a ref, stop refing; if we're a glob, downgrade to 5175 an C<xpvmg>; if we're a copy-on-write scalar, this is the on-write time when 5176 we do the copy, and is also used locally; if this is a 5177 vstring, drop the vstring magic. If C<SV_COW_DROP_PV> is set 5178 then a copy-on-write scalar drops its PV buffer (if any) and becomes 5179 C<SvPOK_off> rather than making a copy. (Used where this 5180 scalar is about to be set to some other value.) In addition, 5181 the C<flags> parameter gets passed to C<sv_unref_flags()> 5182 when unreffing. C<sv_force_normal> calls this function 5183 with flags set to 0. 5184 5185 This function is expected to be used to signal to perl that this SV is 5186 about to be written to, and any extra book-keeping needs to be taken care 5187 of. Hence, it croaks on read-only values. 5188 5189 =for apidoc Amnh||SV_COW_DROP_PV 5190 5191 =cut 5192 */ 5193 5194 void 5195 Perl_sv_force_normal_flags(pTHX_ SV *const sv, const U32 flags) 5196 { 5197 PERL_ARGS_ASSERT_SV_FORCE_NORMAL_FLAGS; 5198 5199 if (SvREADONLY(sv)) 5200 Perl_croak_no_modify(); 5201 else if (SvIsCOW(sv) && LIKELY(SvTYPE(sv) != SVt_PVHV)) 5202 S_sv_uncow(aTHX_ sv, flags); 5203 if (SvROK(sv)) 5204 sv_unref_flags(sv, flags); 5205 else if (SvFAKE(sv) && isGV_with_GP(sv)) 5206 sv_unglob(sv, flags); 5207 else if (SvFAKE(sv) && isREGEXP(sv)) { 5208 /* Need to downgrade the REGEXP to a simple(r) scalar. This is analogous 5209 to sv_unglob. We only need it here, so inline it. */ 5210 const bool islv = SvTYPE(sv) == SVt_PVLV; 5211 const svtype new_type = 5212 islv ? SVt_NULL : SvMAGIC(sv) || SvSTASH(sv) ? SVt_PVMG : SVt_PV; 5213 SV *const temp = newSV_type(new_type); 5214 regexp *old_rx_body; 5215 5216 if (new_type == SVt_PVMG) { 5217 SvMAGIC_set(temp, SvMAGIC(sv)); 5218 SvMAGIC_set(sv, NULL); 5219 SvSTASH_set(temp, SvSTASH(sv)); 5220 SvSTASH_set(sv, NULL); 5221 } 5222 if (!islv) 5223 SvCUR_set(temp, SvCUR(sv)); 5224 /* Remember that SvPVX is in the head, not the body. */ 5225 assert(ReANY((REGEXP *)sv)->mother_re); 5226 5227 if (islv) { 5228 /* LV-as-regex has sv->sv_any pointing to an XPVLV body, 5229 * whose xpvlenu_rx field points to the regex body */ 5230 XPV *xpv = (XPV*)(SvANY(sv)); 5231 old_rx_body = xpv->xpv_len_u.xpvlenu_rx; 5232 xpv->xpv_len_u.xpvlenu_rx = NULL; 5233 } 5234 else 5235 old_rx_body = ReANY((REGEXP *)sv); 5236 5237 /* Their buffer is already owned by someone else. */ 5238 if (flags & SV_COW_DROP_PV) { 5239 /* SvLEN is already 0. For SVt_REGEXP, we have a brand new 5240 zeroed body. For SVt_PVLV, we zeroed it above (len field 5241 a union with xpvlenu_rx) */ 5242 assert(!SvLEN(islv ? sv : temp)); 5243 sv->sv_u.svu_pv = 0; 5244 } 5245 else { 5246 sv->sv_u.svu_pv = savepvn(RX_WRAPPED((REGEXP *)sv), SvCUR(sv)); 5247 SvLEN_set(islv ? sv : temp, SvCUR(sv)+1); 5248 SvPOK_on(sv); 5249 } 5250 5251 /* Now swap the rest of the bodies. */ 5252 5253 SvFAKE_off(sv); 5254 if (!islv) { 5255 SvFLAGS(sv) &= ~SVTYPEMASK; 5256 SvFLAGS(sv) |= new_type; 5257 SvANY(sv) = SvANY(temp); 5258 } 5259 5260 SvFLAGS(temp) &= ~(SVTYPEMASK); 5261 SvFLAGS(temp) |= SVt_REGEXP|SVf_FAKE; 5262 SvANY(temp) = old_rx_body; 5263 5264 /* temp is now rebuilt as a correctly structured SVt_REGEXP, so this 5265 * will trigger a call to sv_clear() which will correctly free the 5266 * body. */ 5267 SvREFCNT_dec_NN(temp); 5268 } 5269 else if (SvVOK(sv)) sv_unmagic(sv, PERL_MAGIC_vstring); 5270 } 5271 5272 /* 5273 =for apidoc sv_chop 5274 5275 Efficient removal of characters from the beginning of the string buffer. 5276 C<SvPOK(sv)>, or at least C<SvPOKp(sv)>, must be true and C<ptr> must be a 5277 pointer to somewhere inside the string buffer. C<ptr> becomes the first 5278 character of the adjusted string. Uses the C<OOK> hack. On return, only 5279 C<SvPOK(sv)> and C<SvPOKp(sv)> among the C<OK> flags will be true. 5280 5281 Beware: after this function returns, C<ptr> and SvPVX_const(sv) may no longer 5282 refer to the same chunk of data. 5283 5284 The unfortunate similarity of this function's name to that of Perl's C<chop> 5285 operator is strictly coincidental. This function works from the left; 5286 C<chop> works from the right. 5287 5288 =cut 5289 */ 5290 5291 void 5292 Perl_sv_chop(pTHX_ SV *const sv, const char *const ptr) 5293 { 5294 STRLEN delta; 5295 STRLEN old_delta; 5296 U8 *p; 5297 #ifdef DEBUGGING 5298 const U8 *evacp; 5299 STRLEN evacn; 5300 #endif 5301 STRLEN max_delta; 5302 5303 PERL_ARGS_ASSERT_SV_CHOP; 5304 5305 if (!ptr || !SvPOKp(sv)) 5306 return; 5307 delta = ptr - SvPVX_const(sv); 5308 if (!delta) { 5309 /* Nothing to do. */ 5310 return; 5311 } 5312 max_delta = SvLEN(sv) ? SvLEN(sv) : SvCUR(sv); 5313 if (delta > max_delta) 5314 Perl_croak(aTHX_ "panic: sv_chop ptr=%p, start=%p, end=%p", 5315 ptr, SvPVX_const(sv), SvPVX_const(sv) + max_delta); 5316 /* SvPVX(sv) may move in SV_CHECK_THINKFIRST(sv), so don't use ptr any more */ 5317 SV_CHECK_THINKFIRST(sv); 5318 SvPOK_only_UTF8(sv); 5319 5320 if (!SvOOK(sv)) { 5321 if (!SvLEN(sv)) { /* make copy of shared string */ 5322 const char *pvx = SvPVX_const(sv); 5323 const STRLEN len = SvCUR(sv); 5324 SvGROW(sv, len + 1); 5325 Move(pvx,SvPVX(sv),len,char); 5326 *SvEND(sv) = '\0'; 5327 } 5328 SvOOK_on(sv); 5329 old_delta = 0; 5330 } else { 5331 SvOOK_offset(sv, old_delta); 5332 } 5333 SvLEN_set(sv, SvLEN(sv) - delta); 5334 SvCUR_set(sv, SvCUR(sv) - delta); 5335 SvPV_set(sv, SvPVX(sv) + delta); 5336 5337 p = (U8 *)SvPVX_const(sv); 5338 5339 #ifdef DEBUGGING 5340 /* how many bytes were evacuated? we will fill them with sentinel 5341 bytes, except for the part holding the new offset of course. */ 5342 evacn = delta; 5343 if (old_delta) 5344 evacn += (old_delta < 0x100 ? 1 : 1 + sizeof(STRLEN)); 5345 assert(evacn); 5346 assert(evacn <= delta + old_delta); 5347 evacp = p - evacn; 5348 #endif 5349 5350 /* This sets 'delta' to the accumulated value of all deltas so far */ 5351 delta += old_delta; 5352 assert(delta); 5353 5354 /* If 'delta' fits in a byte, store it just prior to the new beginning of 5355 * the string; otherwise store a 0 byte there and store 'delta' just prior 5356 * to that, using as many bytes as a STRLEN occupies. Thus it overwrites a 5357 * portion of the chopped part of the string */ 5358 if (delta < 0x100) { 5359 *--p = (U8) delta; 5360 } else { 5361 *--p = 0; 5362 p -= sizeof(STRLEN); 5363 Copy((U8*)&delta, p, sizeof(STRLEN), U8); 5364 } 5365 5366 #ifdef DEBUGGING 5367 /* Fill the preceding buffer with sentinals to verify that no-one is 5368 using it. */ 5369 while (p > evacp) { 5370 --p; 5371 *p = (U8)PTR2UV(p); 5372 } 5373 #endif 5374 } 5375 5376 /* 5377 =for apidoc sv_catpvn 5378 =for apidoc_item sv_catpvn_flags 5379 =for apidoc_item sv_catpvn_mg 5380 =for apidoc_item sv_catpvn_nomg 5381 5382 These concatenate the C<len> bytes of the string beginning at C<ptr> onto the 5383 end of the string which is in C<dsv>. The caller must make sure C<ptr> 5384 contains at least C<len> bytes. 5385 5386 For all but C<sv_catpvn_flags>, the string appended is assumed to be valid 5387 UTF-8 if the SV has the UTF-8 status set, and a string of bytes otherwise. 5388 5389 They differ in that: 5390 5391 C<sv_catpvn_mg> performs both 'get' and 'set' magic on C<dsv>. 5392 5393 C<sv_catpvn> performs only 'get' magic. 5394 5395 C<sv_catpvn_nomg> skips all magic. 5396 5397 C<sv_catpvn_flags> has an extra C<flags> parameter which allows you to specify 5398 any combination of magic handling (using C<SV_GMAGIC> and/or C<SV_SMAGIC>) and 5399 to also override the UTF-8 handling. By supplying the C<SV_CATBYTES> flag, the 5400 appended string is interpreted as plain bytes; by supplying instead the 5401 C<SV_CATUTF8> flag, it will be interpreted as UTF-8, and the C<dsv> will be 5402 upgraded to UTF-8 if necessary. 5403 5404 C<sv_catpvn>, C<sv_catpvn_mg>, and C<sv_catpvn_nomg> are implemented 5405 in terms of C<sv_catpvn_flags>. 5406 5407 =for apidoc Amnh||SV_CATUTF8 5408 =for apidoc Amnh||SV_CATBYTES 5409 5410 =cut 5411 */ 5412 5413 void 5414 Perl_sv_catpvn_flags(pTHX_ SV *const dsv, const char *sstr, const STRLEN slen, const I32 flags) 5415 { 5416 STRLEN dlen; 5417 const char * const dstr = SvPV_force_flags(dsv, dlen, flags); 5418 5419 PERL_ARGS_ASSERT_SV_CATPVN_FLAGS; 5420 assert((flags & (SV_CATBYTES|SV_CATUTF8)) != (SV_CATBYTES|SV_CATUTF8)); 5421 5422 if (!(flags & SV_CATBYTES) || !SvUTF8(dsv)) { 5423 if (flags & SV_CATUTF8 && !SvUTF8(dsv)) { 5424 sv_utf8_upgrade_flags_grow(dsv, 0, slen + 1); 5425 dlen = SvCUR(dsv); 5426 } 5427 else SvGROW(dsv, dlen + slen + 3); 5428 if (sstr == dstr) 5429 sstr = SvPVX_const(dsv); 5430 Move(sstr, SvPVX(dsv) + dlen, slen, char); 5431 SvCUR_set(dsv, SvCUR(dsv) + slen); 5432 } 5433 else { 5434 /* We inline bytes_to_utf8, to avoid an extra malloc. */ 5435 const char * const send = sstr + slen; 5436 U8 *d; 5437 5438 /* Something this code does not account for, which I think is 5439 impossible; it would require the same pv to be treated as 5440 bytes *and* utf8, which would indicate a bug elsewhere. */ 5441 assert(sstr != dstr); 5442 5443 SvGROW(dsv, dlen + slen * 2 + 3); 5444 d = (U8 *)SvPVX(dsv) + dlen; 5445 5446 while (sstr < send) { 5447 append_utf8_from_native_byte(*sstr, &d); 5448 sstr++; 5449 } 5450 SvCUR_set(dsv, d-(const U8 *)SvPVX(dsv)); 5451 } 5452 *SvEND(dsv) = '\0'; 5453 (void)SvPOK_only_UTF8(dsv); /* validate pointer */ 5454 SvTAINT(dsv); 5455 if (flags & SV_SMAGIC) 5456 SvSETMAGIC(dsv); 5457 } 5458 5459 /* 5460 =for apidoc sv_catsv 5461 =for apidoc_item sv_catsv_flags 5462 =for apidoc_item sv_catsv_mg 5463 =for apidoc_item sv_catsv_nomg 5464 5465 These concatenate the string from SV C<sstr> onto the end of the string in SV 5466 C<dsv>. If C<sstr> is null, these are no-ops; otherwise only C<dsv> is 5467 modified. 5468 5469 They differ only in what magic they perform: 5470 5471 C<sv_catsv_mg> performs 'get' magic on both SVs before the copy, and 'set' magic 5472 on C<dsv> afterwards. 5473 5474 C<sv_catsv> performs just 'get' magic, on both SVs. 5475 5476 C<sv_catsv_nomg> skips all magic. 5477 5478 C<sv_catsv_flags> has an extra C<flags> parameter which allows you to use 5479 C<SV_GMAGIC> and/or C<SV_SMAGIC> to specify any combination of magic handling 5480 (although either both or neither SV will have 'get' magic applied to it.) 5481 5482 C<sv_catsv>, C<sv_catsv_mg>, and C<sv_catsv_nomg> are implemented 5483 in terms of C<sv_catsv_flags>. 5484 5485 =cut */ 5486 5487 void 5488 Perl_sv_catsv_flags(pTHX_ SV *const dsv, SV *const sstr, const I32 flags) 5489 { 5490 PERL_ARGS_ASSERT_SV_CATSV_FLAGS; 5491 5492 if (sstr) { 5493 STRLEN slen; 5494 const char *spv = SvPV_flags_const(sstr, slen, flags); 5495 if (flags & SV_GMAGIC) 5496 SvGETMAGIC(dsv); 5497 sv_catpvn_flags(dsv, spv, slen, 5498 DO_UTF8(sstr) ? SV_CATUTF8 : SV_CATBYTES); 5499 if (flags & SV_SMAGIC) 5500 SvSETMAGIC(dsv); 5501 } 5502 } 5503 5504 /* 5505 =for apidoc sv_catpv 5506 =for apidoc_item sv_catpv_flags 5507 =for apidoc_item sv_catpv_mg 5508 =for apidoc_item sv_catpv_nomg 5509 5510 These concatenate the C<NUL>-terminated string C<sstr> onto the end of the 5511 string which is in the SV. 5512 If the SV has the UTF-8 status set, then the bytes appended should be 5513 valid UTF-8. 5514 5515 They differ only in how they handle magic: 5516 5517 C<sv_catpv_mg> performs both 'get' and 'set' magic. 5518 5519 C<sv_catpv> performs only 'get' magic. 5520 5521 C<sv_catpv_nomg> skips all magic. 5522 5523 C<sv_catpv_flags> has an extra C<flags> parameter which allows you to specify 5524 any combination of magic handling (using C<SV_GMAGIC> and/or C<SV_SMAGIC>), and 5525 to also override the UTF-8 handling. By supplying the C<SV_CATUTF8> flag, the 5526 appended string is forced to be interpreted as UTF-8; by supplying instead the 5527 C<SV_CATBYTES> flag, it will be interpreted as just bytes. Either the SV or 5528 the string appended will be upgraded to UTF-8 if necessary. 5529 5530 =cut 5531 */ 5532 5533 void 5534 Perl_sv_catpv(pTHX_ SV *const dsv, const char *sstr) 5535 { 5536 STRLEN len; 5537 STRLEN tlen; 5538 char *junk; 5539 5540 PERL_ARGS_ASSERT_SV_CATPV; 5541 5542 if (!sstr) 5543 return; 5544 junk = SvPV_force(dsv, tlen); 5545 len = strlen(sstr); 5546 SvGROW(dsv, tlen + len + 1); 5547 if (sstr == junk) 5548 sstr = SvPVX_const(dsv); 5549 Move(sstr,SvPVX(dsv)+tlen,len+1,char); 5550 SvCUR_set(dsv, SvCUR(dsv) + len); 5551 (void)SvPOK_only_UTF8(dsv); /* validate pointer */ 5552 SvTAINT(dsv); 5553 } 5554 5555 void 5556 Perl_sv_catpv_flags(pTHX_ SV *dsv, const char *sstr, const I32 flags) 5557 { 5558 PERL_ARGS_ASSERT_SV_CATPV_FLAGS; 5559 sv_catpvn_flags(dsv, sstr, strlen(sstr), flags); 5560 } 5561 5562 void 5563 Perl_sv_catpv_mg(pTHX_ SV *const dsv, const char *const sstr) 5564 { 5565 PERL_ARGS_ASSERT_SV_CATPV_MG; 5566 5567 sv_catpv(dsv,sstr); 5568 SvSETMAGIC(dsv); 5569 } 5570 5571 /* 5572 =for apidoc newSV 5573 5574 Creates a new SV. A non-zero C<len> parameter indicates the number of 5575 bytes of preallocated string space the SV should have. An extra byte for a 5576 trailing C<NUL> is also reserved. (C<SvPOK> is not set for the SV even if string 5577 space is allocated.) The reference count for the new SV is set to 1. 5578 5579 In 5.9.3, C<newSV()> replaces the older C<NEWSV()> API, and drops the first 5580 parameter, I<x>, a debug aid which allowed callers to identify themselves. 5581 This aid has been superseded by a new build option, C<PERL_MEM_LOG> (see 5582 L<perlhacktips/PERL_MEM_LOG>). The older API is still there for use in XS 5583 modules supporting older perls. 5584 5585 =cut 5586 */ 5587 5588 SV * 5589 Perl_newSV(pTHX_ const STRLEN len) 5590 { 5591 SV *sv; 5592 5593 if (!len) 5594 new_SV(sv); 5595 else { 5596 sv = newSV_type(SVt_PV); 5597 sv_grow_fresh(sv, len + 1); 5598 } 5599 return sv; 5600 } 5601 /* 5602 =for apidoc sv_magicext 5603 5604 Adds magic to an SV, upgrading it if necessary. Applies the 5605 supplied C<vtable> and returns a pointer to the magic added. 5606 5607 Note that C<sv_magicext> will allow things that C<sv_magic> will not. 5608 In particular, you can add magic to C<SvREADONLY> SVs, and add more than 5609 one instance of the same C<how>. 5610 5611 If C<namlen> is greater than zero then a C<savepvn> I<copy> of C<name> is 5612 stored, if C<namlen> is zero then C<name> is stored as-is and - as another 5613 special case - if C<(name && namlen == HEf_SVKEY)> then C<name> is assumed 5614 to contain an SV* and is stored as-is with its C<REFCNT> incremented. 5615 5616 (This is now used as a subroutine by C<sv_magic>.) 5617 5618 =cut 5619 */ 5620 MAGIC * 5621 Perl_sv_magicext(pTHX_ SV *const sv, SV *const obj, const int how, 5622 const MGVTBL *const vtable, const char *const name, const I32 namlen) 5623 { 5624 MAGIC* mg; 5625 5626 PERL_ARGS_ASSERT_SV_MAGICEXT; 5627 5628 SvUPGRADE(sv, SVt_PVMG); 5629 Newxz(mg, 1, MAGIC); 5630 mg->mg_moremagic = SvMAGIC(sv); 5631 SvMAGIC_set(sv, mg); 5632 5633 /* Sometimes a magic contains a reference loop, where the sv and 5634 object refer to each other. To prevent a reference loop that 5635 would prevent such objects being freed, we look for such loops 5636 and if we find one we avoid incrementing the object refcount. 5637 5638 Note we cannot do this to avoid self-tie loops as intervening RV must 5639 have its REFCNT incremented to keep it in existence. 5640 5641 */ 5642 if (!obj || obj == sv || 5643 how == PERL_MAGIC_arylen || 5644 how == PERL_MAGIC_regdata || 5645 how == PERL_MAGIC_regdatum || 5646 how == PERL_MAGIC_symtab || 5647 (SvTYPE(obj) == SVt_PVGV && 5648 (GvSV(obj) == sv || GvHV(obj) == (const HV *)sv 5649 || GvAV(obj) == (const AV *)sv || GvCV(obj) == (const CV *)sv 5650 || GvIOp(obj) == (const IO *)sv || GvFORM(obj) == (const CV *)sv))) 5651 { 5652 mg->mg_obj = obj; 5653 } 5654 else { 5655 mg->mg_obj = SvREFCNT_inc_simple(obj); 5656 mg->mg_flags |= MGf_REFCOUNTED; 5657 } 5658 5659 /* Normal self-ties simply pass a null object, and instead of 5660 using mg_obj directly, use the SvTIED_obj macro to produce a 5661 new RV as needed. For glob "self-ties", we are tieing the PVIO 5662 with an RV obj pointing to the glob containing the PVIO. In 5663 this case, to avoid a reference loop, we need to weaken the 5664 reference. 5665 */ 5666 5667 if (how == PERL_MAGIC_tiedscalar && SvTYPE(sv) == SVt_PVIO && 5668 obj && SvROK(obj) && GvIO(SvRV(obj)) == (const IO *)sv) 5669 { 5670 sv_rvweaken(obj); 5671 } 5672 5673 mg->mg_type = how; 5674 mg->mg_len = namlen; 5675 if (name) { 5676 if (namlen > 0) 5677 mg->mg_ptr = savepvn(name, namlen); 5678 else if (namlen == HEf_SVKEY) { 5679 /* Yes, this is casting away const. This is only for the case of 5680 HEf_SVKEY. I think we need to document this aberation of the 5681 constness of the API, rather than making name non-const, as 5682 that change propagating outwards a long way. */ 5683 mg->mg_ptr = (char*)SvREFCNT_inc_simple_NN((SV *)name); 5684 } else 5685 mg->mg_ptr = (char *) name; 5686 } 5687 mg->mg_virtual = (MGVTBL *) vtable; 5688 5689 mg_magical(sv); 5690 return mg; 5691 } 5692 5693 MAGIC * 5694 Perl_sv_magicext_mglob(pTHX_ SV *sv) 5695 { 5696 PERL_ARGS_ASSERT_SV_MAGICEXT_MGLOB; 5697 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') { 5698 /* This sv is only a delegate. //g magic must be attached to 5699 its target. */ 5700 vivify_defelem(sv); 5701 sv = LvTARG(sv); 5702 } 5703 return sv_magicext(sv, NULL, PERL_MAGIC_regex_global, 5704 &PL_vtbl_mglob, 0, 0); 5705 } 5706 5707 /* 5708 =for apidoc sv_magic 5709 5710 Adds magic to an SV. First upgrades C<sv> to type C<SVt_PVMG> if 5711 necessary, then adds a new magic item of type C<how> to the head of the 5712 magic list. 5713 5714 See C<L</sv_magicext>> (which C<sv_magic> now calls) for a description of the 5715 handling of the C<name> and C<namlen> arguments. 5716 5717 You need to use C<sv_magicext> to add magic to C<SvREADONLY> SVs and also 5718 to add more than one instance of the same C<how>. 5719 5720 =cut 5721 */ 5722 5723 void 5724 Perl_sv_magic(pTHX_ SV *const sv, SV *const obj, const int how, 5725 const char *const name, const I32 namlen) 5726 { 5727 const MGVTBL *vtable; 5728 MAGIC* mg; 5729 unsigned int flags; 5730 unsigned int vtable_index; 5731 5732 PERL_ARGS_ASSERT_SV_MAGIC; 5733 5734 if (how < 0 || (unsigned)how >= C_ARRAY_LENGTH(PL_magic_data) 5735 || ((flags = PL_magic_data[how]), 5736 (vtable_index = flags & PERL_MAGIC_VTABLE_MASK) 5737 > magic_vtable_max)) 5738 Perl_croak(aTHX_ "Don't know how to handle magic of type \\%o", how); 5739 5740 /* PERL_MAGIC_ext is reserved for use by extensions not perl internals. 5741 Useful for attaching extension internal data to perl vars. 5742 Note that multiple extensions may clash if magical scalars 5743 etc holding private data from one are passed to another. */ 5744 5745 vtable = (vtable_index == magic_vtable_max) 5746 ? NULL : PL_magic_vtables + vtable_index; 5747 5748 if (SvREADONLY(sv)) { 5749 if ( 5750 !PERL_MAGIC_TYPE_READONLY_ACCEPTABLE(how) 5751 ) 5752 { 5753 Perl_croak_no_modify(); 5754 } 5755 } 5756 if (SvMAGICAL(sv) || (how == PERL_MAGIC_taint && SvTYPE(sv) >= SVt_PVMG)) { 5757 if (SvMAGIC(sv) && (mg = mg_find(sv, how))) { 5758 /* sv_magic() refuses to add a magic of the same 'how' as an 5759 existing one 5760 */ 5761 if (how == PERL_MAGIC_taint) 5762 mg->mg_len |= 1; 5763 return; 5764 } 5765 } 5766 5767 /* Force pos to be stored as characters, not bytes. */ 5768 if (SvMAGICAL(sv) && DO_UTF8(sv) 5769 && (mg = mg_find(sv, PERL_MAGIC_regex_global)) 5770 && mg->mg_len != -1 5771 && mg->mg_flags & MGf_BYTES) { 5772 mg->mg_len = (SSize_t)sv_pos_b2u_flags(sv, (STRLEN)mg->mg_len, 5773 SV_CONST_RETURN); 5774 mg->mg_flags &= ~MGf_BYTES; 5775 } 5776 5777 /* Rest of work is done else where */ 5778 mg = sv_magicext(sv,obj,how,vtable,name,namlen); 5779 5780 switch (how) { 5781 case PERL_MAGIC_taint: 5782 mg->mg_len = 1; 5783 break; 5784 case PERL_MAGIC_ext: 5785 case PERL_MAGIC_dbfile: 5786 SvRMAGICAL_on(sv); 5787 break; 5788 } 5789 } 5790 5791 static int 5792 S_sv_unmagicext_flags(pTHX_ SV *const sv, const int type, MGVTBL *vtbl, const U32 flags) 5793 { 5794 MAGIC* mg; 5795 MAGIC** mgp; 5796 5797 assert(flags <= 1); 5798 5799 if (SvTYPE(sv) < SVt_PVMG || !SvMAGIC(sv)) 5800 return 0; 5801 mgp = &(((XPVMG*) SvANY(sv))->xmg_u.xmg_magic); 5802 for (mg = *mgp; mg; mg = *mgp) { 5803 const MGVTBL* const virt = mg->mg_virtual; 5804 if (mg->mg_type == type && (!flags || virt == vtbl)) { 5805 *mgp = mg->mg_moremagic; 5806 if (virt && virt->svt_free) 5807 virt->svt_free(aTHX_ sv, mg); 5808 if (mg->mg_ptr && mg->mg_type != PERL_MAGIC_regex_global) { 5809 if (mg->mg_len > 0) 5810 Safefree(mg->mg_ptr); 5811 else if (mg->mg_len == HEf_SVKEY) 5812 SvREFCNT_dec(MUTABLE_SV(mg->mg_ptr)); 5813 else if (mg->mg_type == PERL_MAGIC_utf8) 5814 Safefree(mg->mg_ptr); 5815 } 5816 if (mg->mg_flags & MGf_REFCOUNTED) 5817 SvREFCNT_dec(mg->mg_obj); 5818 Safefree(mg); 5819 } 5820 else 5821 mgp = &mg->mg_moremagic; 5822 } 5823 if (SvMAGIC(sv)) { 5824 if (SvMAGICAL(sv)) /* if we're under save_magic, wait for restore_magic; */ 5825 mg_magical(sv); /* else fix the flags now */ 5826 } 5827 else 5828 SvMAGICAL_off(sv); 5829 5830 return 0; 5831 } 5832 5833 /* 5834 =for apidoc sv_unmagic 5835 5836 Removes all magic of type C<type> from an SV. 5837 5838 =cut 5839 */ 5840 5841 int 5842 Perl_sv_unmagic(pTHX_ SV *const sv, const int type) 5843 { 5844 PERL_ARGS_ASSERT_SV_UNMAGIC; 5845 return S_sv_unmagicext_flags(aTHX_ sv, type, NULL, 0); 5846 } 5847 5848 /* 5849 =for apidoc sv_unmagicext 5850 5851 Removes all magic of type C<type> with the specified C<vtbl> from an SV. 5852 5853 =cut 5854 */ 5855 5856 int 5857 Perl_sv_unmagicext(pTHX_ SV *const sv, const int type, MGVTBL *vtbl) 5858 { 5859 PERL_ARGS_ASSERT_SV_UNMAGICEXT; 5860 return S_sv_unmagicext_flags(aTHX_ sv, type, vtbl, 1); 5861 } 5862 5863 /* 5864 =for apidoc sv_rvweaken 5865 5866 Weaken a reference: set the C<SvWEAKREF> flag on this RV; give the 5867 referred-to SV C<PERL_MAGIC_backref> magic if it hasn't already; and 5868 push a back-reference to this RV onto the array of backreferences 5869 associated with that magic. If the RV is magical, set magic will be 5870 called after the RV is cleared. Silently ignores C<undef> and warns 5871 on already-weak references. 5872 5873 =cut 5874 */ 5875 5876 SV * 5877 Perl_sv_rvweaken(pTHX_ SV *const sv) 5878 { 5879 SV *tsv; 5880 5881 PERL_ARGS_ASSERT_SV_RVWEAKEN; 5882 5883 if (!SvOK(sv)) /* let undefs pass */ 5884 return sv; 5885 if (!SvROK(sv)) 5886 Perl_croak(aTHX_ "Can't weaken a nonreference"); 5887 else if (SvWEAKREF(sv)) { 5888 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Reference is already weak"); 5889 return sv; 5890 } 5891 else if (SvREADONLY(sv)) croak_no_modify(); 5892 tsv = SvRV(sv); 5893 Perl_sv_add_backref(aTHX_ tsv, sv); 5894 SvWEAKREF_on(sv); 5895 SvREFCNT_dec_NN(tsv); 5896 return sv; 5897 } 5898 5899 /* 5900 =for apidoc sv_rvunweaken 5901 5902 Unweaken a reference: Clear the C<SvWEAKREF> flag on this RV; remove 5903 the backreference to this RV from the array of backreferences 5904 associated with the target SV, increment the refcount of the target. 5905 Silently ignores C<undef> and warns on non-weak references. 5906 5907 =cut 5908 */ 5909 5910 SV * 5911 Perl_sv_rvunweaken(pTHX_ SV *const sv) 5912 { 5913 SV *tsv; 5914 5915 PERL_ARGS_ASSERT_SV_RVUNWEAKEN; 5916 5917 if (!SvOK(sv)) /* let undefs pass */ 5918 return sv; 5919 if (!SvROK(sv)) 5920 Perl_croak(aTHX_ "Can't unweaken a nonreference"); 5921 else if (!SvWEAKREF(sv)) { 5922 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Reference is not weak"); 5923 return sv; 5924 } 5925 else if (SvREADONLY(sv)) croak_no_modify(); 5926 5927 tsv = SvRV(sv); 5928 SvWEAKREF_off(sv); 5929 SvROK_on(sv); 5930 SvREFCNT_inc_NN(tsv); 5931 Perl_sv_del_backref(aTHX_ tsv, sv); 5932 return sv; 5933 } 5934 5935 /* 5936 =for apidoc sv_get_backrefs 5937 5938 If C<sv> is the target of a weak reference then it returns the back 5939 references structure associated with the sv; otherwise return C<NULL>. 5940 5941 When returning a non-null result the type of the return is relevant. If it 5942 is an AV then the elements of the AV are the weak reference RVs which 5943 point at this item. If it is any other type then the item itself is the 5944 weak reference. 5945 5946 See also C<Perl_sv_add_backref()>, C<Perl_sv_del_backref()>, 5947 C<Perl_sv_kill_backrefs()> 5948 5949 =cut 5950 */ 5951 5952 SV * 5953 Perl_sv_get_backrefs(SV *const sv) 5954 { 5955 SV *backrefs= NULL; 5956 5957 PERL_ARGS_ASSERT_SV_GET_BACKREFS; 5958 5959 /* find slot to store array or singleton backref */ 5960 5961 if (SvTYPE(sv) == SVt_PVHV) { 5962 if (SvOOK(sv)) { 5963 struct xpvhv_aux * const iter = HvAUX((HV *)sv); 5964 backrefs = (SV *)iter->xhv_backreferences; 5965 } 5966 } else if (SvMAGICAL(sv)) { 5967 MAGIC *mg = mg_find(sv, PERL_MAGIC_backref); 5968 if (mg) 5969 backrefs = mg->mg_obj; 5970 } 5971 return backrefs; 5972 } 5973 5974 /* Give tsv backref magic if it hasn't already got it, then push a 5975 * back-reference to sv onto the array associated with the backref magic. 5976 * 5977 * As an optimisation, if there's only one backref and it's not an AV, 5978 * store it directly in the HvAUX or mg_obj slot, avoiding the need to 5979 * allocate an AV. (Whether the slot holds an AV tells us whether this is 5980 * active.) 5981 */ 5982 5983 /* A discussion about the backreferences array and its refcount: 5984 * 5985 * The AV holding the backreferences is pointed to either as the mg_obj of 5986 * PERL_MAGIC_backref, or in the specific case of a HV, from the 5987 * xhv_backreferences field. The array is created with a refcount 5988 * of 2. This means that if during global destruction the array gets 5989 * picked on before its parent to have its refcount decremented by the 5990 * random zapper, it won't actually be freed, meaning it's still there for 5991 * when its parent gets freed. 5992 * 5993 * When the parent SV is freed, the extra ref is killed by 5994 * Perl_sv_kill_backrefs. The other ref is killed, in the case of magic, 5995 * by mg_free() / MGf_REFCOUNTED, or for a hash, by Perl_hv_kill_backrefs. 5996 * 5997 * When a single backref SV is stored directly, it is not reference 5998 * counted. 5999 */ 6000 6001 void 6002 Perl_sv_add_backref(pTHX_ SV *const tsv, SV *const sv) 6003 { 6004 SV **svp; 6005 AV *av = NULL; 6006 MAGIC *mg = NULL; 6007 6008 PERL_ARGS_ASSERT_SV_ADD_BACKREF; 6009 6010 /* find slot to store array or singleton backref */ 6011 6012 if (SvTYPE(tsv) == SVt_PVHV) { 6013 svp = (SV**)Perl_hv_backreferences_p(aTHX_ MUTABLE_HV(tsv)); 6014 } else { 6015 if (SvMAGICAL(tsv)) 6016 mg = mg_find(tsv, PERL_MAGIC_backref); 6017 if (!mg) 6018 mg = sv_magicext(tsv, NULL, PERL_MAGIC_backref, &PL_vtbl_backref, NULL, 0); 6019 svp = &(mg->mg_obj); 6020 } 6021 6022 /* create or retrieve the array */ 6023 6024 if ( (!*svp && SvTYPE(sv) == SVt_PVAV) 6025 || (*svp && SvTYPE(*svp) != SVt_PVAV) 6026 ) { 6027 /* create array */ 6028 if (mg) 6029 mg->mg_flags |= MGf_REFCOUNTED; 6030 av = newAV(); 6031 AvREAL_off(av); 6032 SvREFCNT_inc_simple_void_NN(av); 6033 /* av now has a refcnt of 2; see discussion above */ 6034 av_extend(av, *svp ? 2 : 1); 6035 if (*svp) { 6036 /* move single existing backref to the array */ 6037 AvARRAY(av)[++AvFILLp(av)] = *svp; /* av_push() */ 6038 } 6039 *svp = (SV*)av; 6040 } 6041 else { 6042 av = MUTABLE_AV(*svp); 6043 if (!av) { 6044 /* optimisation: store single backref directly in HvAUX or mg_obj */ 6045 *svp = sv; 6046 return; 6047 } 6048 assert(SvTYPE(av) == SVt_PVAV); 6049 if (AvFILLp(av) >= AvMAX(av)) { 6050 av_extend(av, AvFILLp(av)+1); 6051 } 6052 } 6053 /* push new backref */ 6054 AvARRAY(av)[++AvFILLp(av)] = sv; /* av_push() */ 6055 } 6056 6057 /* delete a back-reference to ourselves from the backref magic associated 6058 * with the SV we point to. 6059 */ 6060 6061 void 6062 Perl_sv_del_backref(pTHX_ SV *const tsv, SV *const sv) 6063 { 6064 SV **svp = NULL; 6065 6066 PERL_ARGS_ASSERT_SV_DEL_BACKREF; 6067 6068 if (SvTYPE(tsv) == SVt_PVHV) { 6069 if (SvOOK(tsv)) 6070 svp = (SV**)Perl_hv_backreferences_p(aTHX_ MUTABLE_HV(tsv)); 6071 } 6072 else if (SvIS_FREED(tsv) && PL_phase == PERL_PHASE_DESTRUCT) { 6073 /* It's possible for the the last (strong) reference to tsv to have 6074 become freed *before* the last thing holding a weak reference. 6075 If both survive longer than the backreferences array, then when 6076 the referent's reference count drops to 0 and it is freed, it's 6077 not able to chase the backreferences, so they aren't NULLed. 6078 6079 For example, a CV holds a weak reference to its stash. If both the 6080 CV and the stash survive longer than the backreferences array, 6081 and the CV gets picked for the SvBREAK() treatment first, 6082 *and* it turns out that the stash is only being kept alive because 6083 of an our variable in the pad of the CV, then midway during CV 6084 destruction the stash gets freed, but CvSTASH() isn't set to NULL. 6085 It ends up pointing to the freed HV. Hence it's chased in here, and 6086 if this block wasn't here, it would hit the !svp panic just below. 6087 6088 I don't believe that "better" destruction ordering is going to help 6089 here - during global destruction there's always going to be the 6090 chance that something goes out of order. We've tried to make it 6091 foolproof before, and it only resulted in evolutionary pressure on 6092 fools. Which made us look foolish for our hubris. :-( 6093 */ 6094 return; 6095 } 6096 else { 6097 MAGIC *const mg 6098 = SvMAGICAL(tsv) ? mg_find(tsv, PERL_MAGIC_backref) : NULL; 6099 svp = mg ? &(mg->mg_obj) : NULL; 6100 } 6101 6102 if (!svp) 6103 Perl_croak(aTHX_ "panic: del_backref, svp=0"); 6104 if (!*svp) { 6105 /* It's possible that sv is being freed recursively part way through the 6106 freeing of tsv. If this happens, the backreferences array of tsv has 6107 already been freed, and so svp will be NULL. If this is the case, 6108 we should not panic. Instead, nothing needs doing, so return. */ 6109 if (PL_phase == PERL_PHASE_DESTRUCT && SvREFCNT(tsv) == 0) 6110 return; 6111 Perl_croak(aTHX_ "panic: del_backref, *svp=%p phase=%s refcnt=%" UVuf, 6112 (void*)*svp, PL_phase_names[PL_phase], (UV)SvREFCNT(tsv)); 6113 } 6114 6115 if (SvTYPE(*svp) == SVt_PVAV) { 6116 #ifdef DEBUGGING 6117 int count = 1; 6118 #endif 6119 AV * const av = (AV*)*svp; 6120 SSize_t fill; 6121 assert(!SvIS_FREED(av)); 6122 fill = AvFILLp(av); 6123 assert(fill > -1); 6124 svp = AvARRAY(av); 6125 /* for an SV with N weak references to it, if all those 6126 * weak refs are deleted, then sv_del_backref will be called 6127 * N times and O(N^2) compares will be done within the backref 6128 * array. To ameliorate this potential slowness, we: 6129 * 1) make sure this code is as tight as possible; 6130 * 2) when looking for SV, look for it at both the head and tail of the 6131 * array first before searching the rest, since some create/destroy 6132 * patterns will cause the backrefs to be freed in order. 6133 */ 6134 if (*svp == sv) { 6135 AvARRAY(av)++; 6136 AvMAX(av)--; 6137 } 6138 else { 6139 SV **p = &svp[fill]; 6140 SV *const topsv = *p; 6141 if (topsv != sv) { 6142 #ifdef DEBUGGING 6143 count = 0; 6144 #endif 6145 while (--p > svp) { 6146 if (*p == sv) { 6147 /* We weren't the last entry. 6148 An unordered list has this property that you 6149 can take the last element off the end to fill 6150 the hole, and it's still an unordered list :-) 6151 */ 6152 *p = topsv; 6153 #ifdef DEBUGGING 6154 count++; 6155 #else 6156 break; /* should only be one */ 6157 #endif 6158 } 6159 } 6160 } 6161 } 6162 assert(count ==1); 6163 AvFILLp(av) = fill-1; 6164 } 6165 else if (SvIS_FREED(*svp) && PL_phase == PERL_PHASE_DESTRUCT) { 6166 /* freed AV; skip */ 6167 } 6168 else { 6169 /* optimisation: only a single backref, stored directly */ 6170 if (*svp != sv) 6171 Perl_croak(aTHX_ "panic: del_backref, *svp=%p, sv=%p", 6172 (void*)*svp, (void*)sv); 6173 *svp = NULL; 6174 } 6175 6176 } 6177 6178 void 6179 Perl_sv_kill_backrefs(pTHX_ SV *const sv, AV *const av) 6180 { 6181 SV **svp; 6182 SV **last; 6183 bool is_array; 6184 6185 PERL_ARGS_ASSERT_SV_KILL_BACKREFS; 6186 6187 if (!av) 6188 return; 6189 6190 /* after multiple passes through Perl_sv_clean_all() for a thingy 6191 * that has badly leaked, the backref array may have gotten freed, 6192 * since we only protect it against 1 round of cleanup */ 6193 if (SvIS_FREED(av)) { 6194 if (PL_in_clean_all) /* All is fair */ 6195 return; 6196 Perl_croak(aTHX_ 6197 "panic: magic_killbackrefs (freed backref AV/SV)"); 6198 } 6199 6200 6201 is_array = (SvTYPE(av) == SVt_PVAV); 6202 if (is_array) { 6203 assert(!SvIS_FREED(av)); 6204 svp = AvARRAY(av); 6205 if (svp) 6206 last = svp + AvFILLp(av); 6207 } 6208 else { 6209 /* optimisation: only a single backref, stored directly */ 6210 svp = (SV**)&av; 6211 last = svp; 6212 } 6213 6214 if (svp) { 6215 while (svp <= last) { 6216 if (*svp) { 6217 SV *const referrer = *svp; 6218 if (SvWEAKREF(referrer)) { 6219 /* XXX Should we check that it hasn't changed? */ 6220 assert(SvROK(referrer)); 6221 SvRV_set(referrer, 0); 6222 SvOK_off(referrer); 6223 SvWEAKREF_off(referrer); 6224 SvSETMAGIC(referrer); 6225 } else if (SvTYPE(referrer) == SVt_PVGV || 6226 SvTYPE(referrer) == SVt_PVLV) { 6227 assert(SvTYPE(sv) == SVt_PVHV); /* stash backref */ 6228 /* You lookin' at me? */ 6229 assert(GvSTASH(referrer)); 6230 assert(GvSTASH(referrer) == (const HV *)sv); 6231 GvSTASH(referrer) = 0; 6232 } else if (SvTYPE(referrer) == SVt_PVCV || 6233 SvTYPE(referrer) == SVt_PVFM) { 6234 if (SvTYPE(sv) == SVt_PVHV) { /* stash backref */ 6235 /* You lookin' at me? */ 6236 assert(CvSTASH(referrer)); 6237 assert(CvSTASH(referrer) == (const HV *)sv); 6238 SvANY(MUTABLE_CV(referrer))->xcv_stash = 0; 6239 } 6240 else { 6241 assert(SvTYPE(sv) == SVt_PVGV); 6242 /* You lookin' at me? */ 6243 assert(CvGV(referrer)); 6244 assert(CvGV(referrer) == (const GV *)sv); 6245 anonymise_cv_maybe(MUTABLE_GV(sv), 6246 MUTABLE_CV(referrer)); 6247 } 6248 6249 } else { 6250 Perl_croak(aTHX_ 6251 "panic: magic_killbackrefs (flags=%" UVxf ")", 6252 (UV)SvFLAGS(referrer)); 6253 } 6254 6255 if (is_array) 6256 *svp = NULL; 6257 } 6258 svp++; 6259 } 6260 } 6261 if (is_array) { 6262 AvFILLp(av) = -1; 6263 SvREFCNT_dec_NN(av); /* remove extra count added by sv_add_backref() */ 6264 } 6265 return; 6266 } 6267 6268 /* 6269 =for apidoc sv_insert 6270 6271 Inserts and/or replaces a string at the specified offset/length within the SV. 6272 Similar to the Perl C<substr()> function, with C<littlelen> bytes starting at 6273 C<little> replacing C<len> bytes of the string in C<bigstr> starting at 6274 C<offset>. Handles get magic. 6275 6276 =for apidoc sv_insert_flags 6277 6278 Same as C<sv_insert>, but the extra C<flags> are passed to the 6279 C<SvPV_force_flags> that applies to C<bigstr>. 6280 6281 =cut 6282 */ 6283 6284 void 6285 Perl_sv_insert_flags(pTHX_ SV *const bigstr, const STRLEN offset, const STRLEN len, const char *little, const STRLEN littlelen, const U32 flags) 6286 { 6287 char *big; 6288 char *mid; 6289 char *midend; 6290 char *bigend; 6291 SSize_t i; /* better be sizeof(STRLEN) or bad things happen */ 6292 STRLEN curlen; 6293 6294 PERL_ARGS_ASSERT_SV_INSERT_FLAGS; 6295 6296 SvPV_force_flags(bigstr, curlen, flags); 6297 (void)SvPOK_only_UTF8(bigstr); 6298 6299 if (little >= SvPVX(bigstr) && 6300 little < SvPVX(bigstr) + (SvLEN(bigstr) ? SvLEN(bigstr) : SvCUR(bigstr))) { 6301 /* little is a pointer to within bigstr, since we can reallocate bigstr, 6302 or little...little+littlelen might overlap offset...offset+len we make a copy 6303 */ 6304 little = savepvn(little, littlelen); 6305 SAVEFREEPV(little); 6306 } 6307 6308 if (offset + len > curlen) { 6309 SvGROW(bigstr, offset+len+1); 6310 Zero(SvPVX(bigstr)+curlen, offset+len-curlen, char); 6311 SvCUR_set(bigstr, offset+len); 6312 } 6313 6314 SvTAINT(bigstr); 6315 i = littlelen - len; 6316 if (i > 0) { /* string might grow */ 6317 big = SvGROW(bigstr, SvCUR(bigstr) + i + 1); 6318 mid = big + offset + len; 6319 midend = bigend = big + SvCUR(bigstr); 6320 bigend += i; 6321 *bigend = '\0'; 6322 while (midend > mid) /* shove everything down */ 6323 *--bigend = *--midend; 6324 Move(little,big+offset,littlelen,char); 6325 SvCUR_set(bigstr, SvCUR(bigstr) + i); 6326 SvSETMAGIC(bigstr); 6327 return; 6328 } 6329 else if (i == 0) { 6330 Move(little,SvPVX(bigstr)+offset,len,char); 6331 SvSETMAGIC(bigstr); 6332 return; 6333 } 6334 6335 big = SvPVX(bigstr); 6336 mid = big + offset; 6337 midend = mid + len; 6338 bigend = big + SvCUR(bigstr); 6339 6340 if (midend > bigend) 6341 Perl_croak(aTHX_ "panic: sv_insert, midend=%p, bigend=%p", 6342 midend, bigend); 6343 6344 if (mid - big > bigend - midend) { /* faster to shorten from end */ 6345 if (littlelen) { 6346 Move(little, mid, littlelen,char); 6347 mid += littlelen; 6348 } 6349 i = bigend - midend; 6350 if (i > 0) { 6351 Move(midend, mid, i,char); 6352 mid += i; 6353 } 6354 *mid = '\0'; 6355 SvCUR_set(bigstr, mid - big); 6356 } 6357 else if ((i = mid - big)) { /* faster from front */ 6358 midend -= littlelen; 6359 mid = midend; 6360 Move(big, midend - i, i, char); 6361 sv_chop(bigstr,midend-i); 6362 if (littlelen) 6363 Move(little, mid, littlelen,char); 6364 } 6365 else if (littlelen) { 6366 midend -= littlelen; 6367 sv_chop(bigstr,midend); 6368 Move(little,midend,littlelen,char); 6369 } 6370 else { 6371 sv_chop(bigstr,midend); 6372 } 6373 SvSETMAGIC(bigstr); 6374 } 6375 6376 /* 6377 =for apidoc sv_replace 6378 6379 Make the first argument a copy of the second, then delete the original. 6380 The target SV physically takes over ownership of the body of the source SV 6381 and inherits its flags; however, the target keeps any magic it owns, 6382 and any magic in the source is discarded. 6383 Note that this is a rather specialist SV copying operation; most of the 6384 time you'll want to use C<sv_setsv> or one of its many macro front-ends. 6385 6386 =cut 6387 */ 6388 6389 void 6390 Perl_sv_replace(pTHX_ SV *const sv, SV *const nsv) 6391 { 6392 const U32 refcnt = SvREFCNT(sv); 6393 6394 PERL_ARGS_ASSERT_SV_REPLACE; 6395 6396 SV_CHECK_THINKFIRST_COW_DROP(sv); 6397 if (SvREFCNT(nsv) != 1) { 6398 Perl_croak(aTHX_ "panic: reference miscount on nsv in sv_replace()" 6399 " (%" UVuf " != 1)", (UV) SvREFCNT(nsv)); 6400 } 6401 if (SvMAGICAL(sv)) { 6402 if (SvMAGICAL(nsv)) 6403 mg_free(nsv); 6404 else 6405 sv_upgrade(nsv, SVt_PVMG); 6406 SvMAGIC_set(nsv, SvMAGIC(sv)); 6407 SvFLAGS(nsv) |= SvMAGICAL(sv); 6408 SvMAGICAL_off(sv); 6409 SvMAGIC_set(sv, NULL); 6410 } 6411 SvREFCNT(sv) = 0; 6412 sv_clear(sv); 6413 assert(!SvREFCNT(sv)); 6414 #ifdef DEBUG_LEAKING_SCALARS 6415 sv->sv_flags = nsv->sv_flags; 6416 sv->sv_any = nsv->sv_any; 6417 sv->sv_refcnt = nsv->sv_refcnt; 6418 sv->sv_u = nsv->sv_u; 6419 #else 6420 StructCopy(nsv,sv,SV); 6421 #endif 6422 if(SvTYPE(sv) == SVt_IV) { 6423 SET_SVANY_FOR_BODYLESS_IV(sv); 6424 } 6425 6426 6427 SvREFCNT(sv) = refcnt; 6428 SvFLAGS(nsv) |= SVTYPEMASK; /* Mark as freed */ 6429 SvREFCNT(nsv) = 0; 6430 del_SV(nsv); 6431 } 6432 6433 /* We're about to free a GV which has a CV that refers back to us. 6434 * If that CV will outlive us, make it anonymous (i.e. fix up its CvGV 6435 * field) */ 6436 6437 STATIC void 6438 S_anonymise_cv_maybe(pTHX_ GV *gv, CV* cv) 6439 { 6440 SV *gvname; 6441 GV *anongv; 6442 6443 PERL_ARGS_ASSERT_ANONYMISE_CV_MAYBE; 6444 6445 /* be assertive! */ 6446 assert(SvREFCNT(gv) == 0); 6447 assert(isGV(gv) && isGV_with_GP(gv)); 6448 assert(GvGP(gv)); 6449 assert(!CvANON(cv)); 6450 assert(CvGV(cv) == gv); 6451 assert(!CvNAMED(cv)); 6452 6453 /* will the CV shortly be freed by gp_free() ? */ 6454 if (GvCV(gv) == cv && GvGP(gv)->gp_refcnt < 2 && SvREFCNT(cv) < 2) { 6455 SvANY(cv)->xcv_gv_u.xcv_gv = NULL; 6456 return; 6457 } 6458 6459 /* if not, anonymise: */ 6460 gvname = (GvSTASH(gv) && HvNAME(GvSTASH(gv)) && HvENAME(GvSTASH(gv))) 6461 ? newSVhek(HvENAME_HEK(GvSTASH(gv))) 6462 : newSVpvn_flags( "__ANON__", 8, 0 ); 6463 sv_catpvs(gvname, "::__ANON__"); 6464 anongv = gv_fetchsv(gvname, GV_ADDMULTI, SVt_PVCV); 6465 SvREFCNT_dec_NN(gvname); 6466 6467 CvANON_on(cv); 6468 CvCVGV_RC_on(cv); 6469 SvANY(cv)->xcv_gv_u.xcv_gv = MUTABLE_GV(SvREFCNT_inc(anongv)); 6470 } 6471 6472 6473 /* 6474 =for apidoc sv_clear 6475 6476 Clear an SV: call any destructors, free up any memory used by the body, 6477 and free the body itself. The SV's head is I<not> freed, although 6478 its type is set to all 1's so that it won't inadvertently be assumed 6479 to be live during global destruction etc. 6480 This function should only be called when C<REFCNT> is zero. Most of the time 6481 you'll want to call C<sv_free()> (or its macro wrapper C<SvREFCNT_dec>) 6482 instead. 6483 6484 =cut 6485 */ 6486 6487 void 6488 Perl_sv_clear(pTHX_ SV *const orig_sv) 6489 { 6490 SV* iter_sv = NULL; 6491 SV* next_sv = NULL; 6492 SV *sv = orig_sv; 6493 STRLEN hash_index = 0; /* initialise to make Coverity et al happy. 6494 Not strictly necessary */ 6495 6496 PERL_ARGS_ASSERT_SV_CLEAR; 6497 6498 /* within this loop, sv is the SV currently being freed, and 6499 * iter_sv is the most recent AV or whatever that's being iterated 6500 * over to provide more SVs */ 6501 6502 while (sv) { 6503 U32 type = SvTYPE(sv); 6504 HV *stash; 6505 6506 assert(SvREFCNT(sv) == 0); 6507 assert(SvTYPE(sv) != (svtype)SVTYPEMASK); 6508 6509 if (type <= SVt_IV) { 6510 /* Historically this check on type was needed so that the code to 6511 * free bodies wasn't reached for these types, because the arena 6512 * slots were re-used for HEs and pointer table entries. The 6513 * metadata table `bodies_by_type` had the information for the sizes 6514 * for HEs and PTEs, hence the code here had to have a special-case 6515 * check to ensure that the "regular" body freeing code wasn't 6516 * reached, and get confused by the "lies" in `bodies_by_type`. 6517 * 6518 * However, it hasn't actually been needed for that reason since 6519 * Aug 2010 (commit 829cd18aa7f45221), because `bodies_by_type` was 6520 * changed to always hold the accurate metadata for the SV types. 6521 * This was possible because PTEs were no longer allocated from the 6522 * "SVt_IV" arena, and the code to allocate HEs from the "SVt_NULL" 6523 * arena is entirely in hv.c, so doesn't access the table. 6524 * 6525 * Some sort of check is still needed to handle SVt_IVs - pure RVs 6526 * need to take one code path which is common with RVs stored in 6527 * SVt_PV (or larger), but pure IVs mustn't take the "PV but not RV" 6528 * path, as SvPVX() doesn't point to valid memory. 6529 * 6530 * Hence this code is still the most efficient way to handle this. 6531 */ 6532 6533 if (SvROK(sv)) 6534 goto free_rv; 6535 SvFLAGS(sv) &= SVf_BREAK; 6536 SvFLAGS(sv) |= SVTYPEMASK; 6537 goto free_head; 6538 } 6539 6540 /* objs are always >= MG, but pad names use the SVs_OBJECT flag 6541 for another purpose */ 6542 assert(!SvOBJECT(sv) || type >= SVt_PVMG); 6543 6544 if (type >= SVt_PVMG) { 6545 if (SvOBJECT(sv)) { 6546 if (!curse(sv, 1)) goto get_next_sv; 6547 type = SvTYPE(sv); /* destructor may have changed it */ 6548 } 6549 /* Free back-references before magic, in case the magic calls 6550 * Perl code that has weak references to sv. */ 6551 if (type == SVt_PVHV) { 6552 Perl_hv_kill_backrefs(aTHX_ MUTABLE_HV(sv)); 6553 if (SvMAGIC(sv)) 6554 mg_free(sv); 6555 } 6556 else if (SvMAGIC(sv)) { 6557 /* Free back-references before other types of magic. */ 6558 sv_unmagic(sv, PERL_MAGIC_backref); 6559 mg_free(sv); 6560 } 6561 SvMAGICAL_off(sv); 6562 } 6563 switch (type) { 6564 /* case SVt_INVLIST: */ 6565 case SVt_PVIO: 6566 if (IoIFP(sv) && 6567 IoIFP(sv) != PerlIO_stdin() && 6568 IoIFP(sv) != PerlIO_stdout() && 6569 IoIFP(sv) != PerlIO_stderr() && 6570 !(IoFLAGS(sv) & IOf_FAKE_DIRP)) 6571 { 6572 io_close(MUTABLE_IO(sv), NULL, FALSE, 6573 (IoTYPE(sv) == IoTYPE_WRONLY || 6574 IoTYPE(sv) == IoTYPE_RDWR || 6575 IoTYPE(sv) == IoTYPE_APPEND)); 6576 } 6577 if (IoDIRP(sv) && !(IoFLAGS(sv) & IOf_FAKE_DIRP)) 6578 PerlDir_close(IoDIRP(sv)); 6579 IoDIRP(sv) = (DIR*)NULL; 6580 Safefree(IoTOP_NAME(sv)); 6581 Safefree(IoFMT_NAME(sv)); 6582 Safefree(IoBOTTOM_NAME(sv)); 6583 if ((const GV *)sv == PL_statgv) 6584 PL_statgv = NULL; 6585 goto freescalar; 6586 case SVt_REGEXP: 6587 /* FIXME for plugins */ 6588 pregfree2((REGEXP*) sv); 6589 goto freescalar; 6590 case SVt_PVCV: 6591 case SVt_PVFM: 6592 cv_undef(MUTABLE_CV(sv)); 6593 /* If we're in a stash, we don't own a reference to it. 6594 * However it does have a back reference to us, which needs to 6595 * be cleared. */ 6596 if ((stash = CvSTASH(sv))) 6597 sv_del_backref(MUTABLE_SV(stash), sv); 6598 goto freescalar; 6599 case SVt_PVHV: 6600 if (HvTOTALKEYS((HV*)sv) > 0) { 6601 const HEK *hek; 6602 /* this statement should match the one at the beginning of 6603 * hv_undef_flags() */ 6604 if ( PL_phase != PERL_PHASE_DESTRUCT 6605 && (hek = HvNAME_HEK((HV*)sv))) 6606 { 6607 if (PL_stashcache) { 6608 DEBUG_o(Perl_deb(aTHX_ 6609 "sv_clear clearing PL_stashcache for '%" HEKf 6610 "'\n", 6611 HEKfARG(hek))); 6612 (void)hv_deletehek(PL_stashcache, 6613 hek, G_DISCARD); 6614 } 6615 hv_name_set((HV*)sv, NULL, 0, 0); 6616 } 6617 6618 /* save old iter_sv in unused SvSTASH field */ 6619 assert(!SvOBJECT(sv)); 6620 SvSTASH(sv) = (HV*)iter_sv; 6621 iter_sv = sv; 6622 6623 /* save old hash_index in unused SvMAGIC field */ 6624 assert(!SvMAGICAL(sv)); 6625 assert(!SvMAGIC(sv)); 6626 ((XPVMG*) SvANY(sv))->xmg_u.xmg_hash_index = hash_index; 6627 hash_index = 0; 6628 6629 next_sv = Perl_hfree_next_entry(aTHX_ (HV*)sv, &hash_index); 6630 goto get_next_sv; /* process this new sv */ 6631 } 6632 /* free empty hash */ 6633 Perl_hv_undef_flags(aTHX_ MUTABLE_HV(sv), HV_NAME_SETALL); 6634 assert(!HvARRAY((HV*)sv)); 6635 break; 6636 case SVt_PVAV: 6637 { 6638 AV* av = MUTABLE_AV(sv); 6639 if (PL_comppad == av) { 6640 PL_comppad = NULL; 6641 PL_curpad = NULL; 6642 } 6643 if (AvREAL(av) && AvFILLp(av) > -1) { 6644 next_sv = AvARRAY(av)[AvFILLp(av)--]; 6645 /* save old iter_sv in top-most slot of AV, 6646 * and pray that it doesn't get wiped in the meantime */ 6647 AvARRAY(av)[AvMAX(av)] = iter_sv; 6648 iter_sv = sv; 6649 goto get_next_sv; /* process this new sv */ 6650 } 6651 Safefree(AvALLOC(av)); 6652 } 6653 6654 break; 6655 case SVt_PVLV: 6656 if (LvTYPE(sv) == 'T') { /* for tie: return HE to pool */ 6657 SvREFCNT_dec(HeKEY_sv((HE*)LvTARG(sv))); 6658 HeNEXT((HE*)LvTARG(sv)) = PL_hv_fetch_ent_mh; 6659 PL_hv_fetch_ent_mh = (HE*)LvTARG(sv); 6660 } 6661 else if (LvTYPE(sv) != 't') /* unless tie: unrefcnted fake SV** */ 6662 SvREFCNT_dec(LvTARG(sv)); 6663 if (isREGEXP(sv)) { 6664 /* This PVLV has had a REGEXP assigned to it - the memory 6665 * normally used to store SvLEN instead points to a regex body. 6666 * Retrieving the pointer to the regex body from the correct 6667 * location is normally abstracted by ReANY(), which handles 6668 * both SVt_PVLV and SVt_REGEXP 6669 * 6670 * This code is unwinding the storage specific to SVt_PVLV. 6671 * We get the body pointer directly from the union, free it, 6672 * then set SvLEN to whatever value was in the now-freed regex 6673 * body. The PVX buffer is shared by multiple re's and only 6674 * freed once, by the re whose SvLEN is non-null. 6675 * 6676 * Perl_sv_force_normal_flags() also has code to free this 6677 * hidden body - it swaps the body into a temporary SV it has 6678 * just allocated, then frees that SV. That causes execution 6679 * to reach the SVt_REGEXP: case about 60 lines earlier in this 6680 * function. 6681 * 6682 * See Perl_reg_temp_copy() for the code that sets up this 6683 * REGEXP body referenced by the PVLV. */ 6684 struct regexp *r = ((XPV*)SvANY(sv))->xpv_len_u.xpvlenu_rx; 6685 STRLEN len = r->xpv_len; 6686 pregfree2((REGEXP*) sv); 6687 del_body_by_type(r, SVt_REGEXP); 6688 SvLEN_set((sv), len); 6689 goto freescalar; 6690 } 6691 /* FALLTHROUGH */ 6692 case SVt_PVGV: 6693 if (isGV_with_GP(sv)) { 6694 if(GvCVu((const GV *)sv) && (stash = GvSTASH(MUTABLE_GV(sv))) 6695 && HvENAME_get(stash)) 6696 mro_method_changed_in(stash); 6697 gp_free(MUTABLE_GV(sv)); 6698 if (GvNAME_HEK(sv)) 6699 unshare_hek(GvNAME_HEK(sv)); 6700 /* If we're in a stash, we don't own a reference to it. 6701 * However it does have a back reference to us, which 6702 * needs to be cleared. */ 6703 if ((stash = GvSTASH(sv))) 6704 sv_del_backref(MUTABLE_SV(stash), sv); 6705 } 6706 /* FIXME. There are probably more unreferenced pointers to SVs 6707 * in the interpreter struct that we should check and tidy in 6708 * a similar fashion to this: */ 6709 /* See also S_sv_unglob, which does the same thing. */ 6710 if ((const GV *)sv == PL_last_in_gv) 6711 PL_last_in_gv = NULL; 6712 else if ((const GV *)sv == PL_statgv) 6713 PL_statgv = NULL; 6714 else if ((const GV *)sv == PL_stderrgv) 6715 PL_stderrgv = NULL; 6716 /* FALLTHROUGH */ 6717 case SVt_PVMG: 6718 case SVt_PVNV: 6719 case SVt_PVIV: 6720 case SVt_INVLIST: 6721 case SVt_PV: 6722 freescalar: 6723 /* Don't bother with SvOOK_off(sv); as we're only going to 6724 * free it. */ 6725 if (SvOOK(sv)) { 6726 STRLEN offset; 6727 SvOOK_offset(sv, offset); 6728 SvPV_set(sv, SvPVX_mutable(sv) - offset); 6729 /* Don't even bother with turning off the OOK flag. */ 6730 } 6731 if (SvROK(sv)) { 6732 free_rv: 6733 { 6734 SV * const target = SvRV(sv); 6735 if (SvWEAKREF(sv)) 6736 sv_del_backref(target, sv); 6737 else 6738 next_sv = target; 6739 } 6740 } 6741 #ifdef PERL_ANY_COW 6742 else if (SvPVX_const(sv) 6743 && !(SvTYPE(sv) == SVt_PVIO 6744 && !(IoFLAGS(sv) & IOf_FAKE_DIRP))) 6745 { 6746 if (SvIsCOW(sv)) { 6747 #ifdef DEBUGGING 6748 if (DEBUG_C_TEST) { 6749 PerlIO_printf(Perl_debug_log, "Copy on write: clear\n"); 6750 sv_dump(sv); 6751 } 6752 #endif 6753 if (SvIsCOW_static(sv)) { 6754 SvLEN_set(sv, 0); 6755 } 6756 else if (SvIsCOW_shared_hash(sv)) { 6757 unshare_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(sv))); 6758 } 6759 else { 6760 if (CowREFCNT(sv)) { 6761 sv_buf_to_rw(sv); 6762 CowREFCNT(sv)--; 6763 sv_buf_to_ro(sv); 6764 SvLEN_set(sv, 0); 6765 } 6766 } 6767 } 6768 if (SvLEN(sv)) { 6769 Safefree(SvPVX_mutable(sv)); 6770 } 6771 } 6772 #else 6773 else if (SvPVX_const(sv) && SvLEN(sv) 6774 && !(SvTYPE(sv) == SVt_PVIO 6775 && !(IoFLAGS(sv) & IOf_FAKE_DIRP))) 6776 Safefree(SvPVX_mutable(sv)); 6777 else if (SvPVX_const(sv) && SvIsCOW(sv)) { 6778 unshare_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(sv))); 6779 } 6780 #endif 6781 break; 6782 case SVt_NV: 6783 break; 6784 } 6785 6786 free_body: 6787 6788 { 6789 U32 arena_index; 6790 const struct body_details *sv_type_details; 6791 6792 if (type == SVt_PVHV && SvOOK(sv)) { 6793 arena_index = HVAUX_ARENA_ROOT_IX; 6794 sv_type_details = &fake_hv_with_aux; 6795 } 6796 else { 6797 arena_index = type; 6798 sv_type_details = bodies_by_type + arena_index; 6799 } 6800 6801 SvFLAGS(sv) &= SVf_BREAK; 6802 SvFLAGS(sv) |= SVTYPEMASK; 6803 6804 if (sv_type_details->arena) { 6805 del_body(((char *)SvANY(sv) + sv_type_details->offset), 6806 &PL_body_roots[arena_index]); 6807 } 6808 else if (sv_type_details->body_size) { 6809 safefree(SvANY(sv)); 6810 } 6811 } 6812 6813 free_head: 6814 /* caller is responsible for freeing the head of the original sv */ 6815 if (sv != orig_sv && !SvREFCNT(sv)) 6816 del_SV(sv); 6817 6818 /* grab and free next sv, if any */ 6819 get_next_sv: 6820 while (1) { 6821 sv = NULL; 6822 if (next_sv) { 6823 sv = next_sv; 6824 next_sv = NULL; 6825 } 6826 else if (!iter_sv) { 6827 break; 6828 } else if (SvTYPE(iter_sv) == SVt_PVAV) { 6829 AV *const av = (AV*)iter_sv; 6830 if (AvFILLp(av) > -1) { 6831 sv = AvARRAY(av)[AvFILLp(av)--]; 6832 } 6833 else { /* no more elements of current AV to free */ 6834 sv = iter_sv; 6835 type = SvTYPE(sv); 6836 /* restore previous value, squirrelled away */ 6837 iter_sv = AvARRAY(av)[AvMAX(av)]; 6838 Safefree(AvALLOC(av)); 6839 goto free_body; 6840 } 6841 } else if (SvTYPE(iter_sv) == SVt_PVHV) { 6842 sv = Perl_hfree_next_entry(aTHX_ (HV*)iter_sv, &hash_index); 6843 if (!sv && !HvTOTALKEYS((HV *)iter_sv)) { 6844 /* no more elements of current HV to free */ 6845 sv = iter_sv; 6846 type = SvTYPE(sv); 6847 /* Restore previous values of iter_sv and hash_index, 6848 * squirrelled away */ 6849 assert(!SvOBJECT(sv)); 6850 iter_sv = (SV*)SvSTASH(sv); 6851 assert(!SvMAGICAL(sv)); 6852 hash_index = ((XPVMG*) SvANY(sv))->xmg_u.xmg_hash_index; 6853 #ifdef DEBUGGING 6854 /* perl -DA does not like rubbish in SvMAGIC. */ 6855 SvMAGIC_set(sv, 0); 6856 #endif 6857 6858 /* free any remaining detritus from the hash struct */ 6859 Perl_hv_undef_flags(aTHX_ MUTABLE_HV(sv), HV_NAME_SETALL); 6860 assert(!HvARRAY((HV*)sv)); 6861 goto free_body; 6862 } 6863 } 6864 6865 /* unrolled SvREFCNT_dec and sv_free2 follows: */ 6866 6867 if (!sv) 6868 continue; 6869 if (!SvREFCNT(sv)) { 6870 sv_free(sv); 6871 continue; 6872 } 6873 if (--(SvREFCNT(sv))) 6874 continue; 6875 #ifdef DEBUGGING 6876 if (SvTEMP(sv)) { 6877 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEBUGGING), 6878 "Attempt to free temp prematurely: SV 0x%" UVxf 6879 pTHX__FORMAT, PTR2UV(sv) pTHX__VALUE); 6880 continue; 6881 } 6882 #endif 6883 if (SvIMMORTAL(sv)) { 6884 /* make sure SvREFCNT(sv)==0 happens very seldom */ 6885 SvREFCNT(sv) = SvREFCNT_IMMORTAL; 6886 continue; 6887 } 6888 break; 6889 } /* while 1 */ 6890 6891 } /* while sv */ 6892 } 6893 6894 /* This routine curses the sv itself, not the object referenced by sv. So 6895 sv does not have to be ROK. */ 6896 6897 static bool 6898 S_curse(pTHX_ SV * const sv, const bool check_refcnt) { 6899 PERL_ARGS_ASSERT_CURSE; 6900 assert(SvOBJECT(sv)); 6901 6902 if (PL_defstash && /* Still have a symbol table? */ 6903 SvDESTROYABLE(sv)) 6904 { 6905 dSP; 6906 HV* stash; 6907 do { 6908 stash = SvSTASH(sv); 6909 assert(SvTYPE(stash) == SVt_PVHV); 6910 if (HvNAME(stash)) { 6911 CV* destructor = NULL; 6912 struct mro_meta *meta; 6913 6914 assert (SvOOK(stash)); 6915 6916 DEBUG_o( Perl_deb(aTHX_ "Looking for DESTROY method for %s\n", 6917 HvNAME(stash)) ); 6918 6919 /* don't make this an initialization above the assert, since it needs 6920 an AUX structure */ 6921 meta = HvMROMETA(stash); 6922 if (meta->destroy_gen && meta->destroy_gen == PL_sub_generation) { 6923 destructor = meta->destroy; 6924 DEBUG_o( Perl_deb(aTHX_ "Using cached DESTROY method %p for %s\n", 6925 (void *)destructor, HvNAME(stash)) ); 6926 } 6927 else { 6928 bool autoload = FALSE; 6929 GV *gv = 6930 gv_fetchmeth_pvn(stash, S_destroy, S_destroy_len, -1, 0); 6931 if (gv) 6932 destructor = GvCV(gv); 6933 if (!destructor) { 6934 gv = gv_autoload_pvn(stash, S_destroy, S_destroy_len, 6935 GV_AUTOLOAD_ISMETHOD); 6936 if (gv) 6937 destructor = GvCV(gv); 6938 if (destructor) 6939 autoload = TRUE; 6940 } 6941 /* we don't cache AUTOLOAD for DESTROY, since this code 6942 would then need to set $__PACKAGE__::AUTOLOAD, or the 6943 equivalent for XS AUTOLOADs */ 6944 if (!autoload) { 6945 meta->destroy_gen = PL_sub_generation; 6946 meta->destroy = destructor; 6947 6948 DEBUG_o( Perl_deb(aTHX_ "Set cached DESTROY method %p for %s\n", 6949 (void *)destructor, HvNAME(stash)) ); 6950 } 6951 else { 6952 DEBUG_o( Perl_deb(aTHX_ "Not caching AUTOLOAD for DESTROY method for %s\n", 6953 HvNAME(stash)) ); 6954 } 6955 } 6956 assert(!destructor || SvTYPE(destructor) == SVt_PVCV); 6957 if (destructor 6958 /* A constant subroutine can have no side effects, so 6959 don't bother calling it. */ 6960 && !CvCONST(destructor) 6961 /* Don't bother calling an empty destructor or one that 6962 returns immediately. */ 6963 && (CvISXSUB(destructor) 6964 || (CvSTART(destructor) 6965 && (CvSTART(destructor)->op_next->op_type 6966 != OP_LEAVESUB) 6967 && (CvSTART(destructor)->op_next->op_type 6968 != OP_PUSHMARK 6969 || CvSTART(destructor)->op_next->op_next->op_type 6970 != OP_RETURN 6971 ) 6972 )) 6973 ) 6974 { 6975 SV* const tmpref = newRV(sv); 6976 SvREADONLY_on(tmpref); /* DESTROY() could be naughty */ 6977 ENTER; 6978 PUSHSTACKi(PERLSI_DESTROY); 6979 EXTEND(SP, 2); 6980 PUSHMARK(SP); 6981 PUSHs(tmpref); 6982 PUTBACK; 6983 call_sv(MUTABLE_SV(destructor), 6984 G_DISCARD|G_EVAL|G_KEEPERR|G_VOID); 6985 POPSTACK; 6986 SPAGAIN; 6987 LEAVE; 6988 if(SvREFCNT(tmpref) < 2) { 6989 /* tmpref is not kept alive! */ 6990 SvREFCNT(sv)--; 6991 SvRV_set(tmpref, NULL); 6992 SvROK_off(tmpref); 6993 } 6994 SvREFCNT_dec_NN(tmpref); 6995 } 6996 } 6997 } while (SvOBJECT(sv) && SvSTASH(sv) != stash); 6998 6999 7000 if (check_refcnt && SvREFCNT(sv)) { 7001 if (PL_in_clean_objs) 7002 Perl_croak(aTHX_ 7003 "DESTROY created new reference to dead object '%" HEKf "'", 7004 HEKfARG(HvNAME_HEK(stash))); 7005 /* DESTROY gave object new lease on life */ 7006 return FALSE; 7007 } 7008 } 7009 7010 if (SvOBJECT(sv)) { 7011 HV * const stash = SvSTASH(sv); 7012 /* Curse before freeing the stash, as freeing the stash could cause 7013 a recursive call into S_curse. */ 7014 SvOBJECT_off(sv); /* Curse the object. */ 7015 SvSTASH_set(sv,0); /* SvREFCNT_dec may try to read this */ 7016 SvREFCNT_dec(stash); /* possibly of changed persuasion */ 7017 } 7018 return TRUE; 7019 } 7020 7021 /* 7022 =for apidoc sv_newref 7023 7024 Increment an SV's reference count. Use the C<SvREFCNT_inc()> wrapper 7025 instead. 7026 7027 =cut 7028 */ 7029 7030 SV * 7031 Perl_sv_newref(pTHX_ SV *const sv) 7032 { 7033 PERL_UNUSED_CONTEXT; 7034 if (sv) 7035 (SvREFCNT(sv))++; 7036 return sv; 7037 } 7038 7039 /* 7040 =for apidoc sv_free 7041 7042 Decrement an SV's reference count, and if it drops to zero, call 7043 C<sv_clear> to invoke destructors and free up any memory used by 7044 the body; finally, deallocating the SV's head itself. 7045 Normally called via a wrapper macro C<SvREFCNT_dec>. 7046 7047 =cut 7048 */ 7049 7050 void 7051 Perl_sv_free(pTHX_ SV *const sv) 7052 { 7053 SvREFCNT_dec(sv); 7054 } 7055 7056 7057 /* Private helper function for SvREFCNT_dec(). 7058 * Called with rc set to original SvREFCNT(sv), where rc == 0 or 1 */ 7059 7060 void 7061 Perl_sv_free2(pTHX_ SV *const sv, const U32 rc) 7062 { 7063 7064 PERL_ARGS_ASSERT_SV_FREE2; 7065 7066 if (LIKELY( rc == 1 )) { 7067 /* normal case */ 7068 SvREFCNT(sv) = 0; 7069 7070 #ifdef DEBUGGING 7071 if (SvTEMP(sv)) { 7072 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEBUGGING), 7073 "Attempt to free temp prematurely: SV 0x%" UVxf 7074 pTHX__FORMAT, PTR2UV(sv) pTHX__VALUE); 7075 return; 7076 } 7077 #endif 7078 if (SvIMMORTAL(sv)) { 7079 /* make sure SvREFCNT(sv)==0 happens very seldom */ 7080 SvREFCNT(sv) = SvREFCNT_IMMORTAL; 7081 return; 7082 } 7083 sv_clear(sv); 7084 if (! SvREFCNT(sv)) /* may have have been resurrected */ 7085 del_SV(sv); 7086 return; 7087 } 7088 7089 /* handle exceptional cases */ 7090 7091 assert(rc == 0); 7092 7093 if (SvFLAGS(sv) & SVf_BREAK) 7094 /* this SV's refcnt has been artificially decremented to 7095 * trigger cleanup */ 7096 return; 7097 if (PL_in_clean_all) /* All is fair */ 7098 return; 7099 if (SvIMMORTAL(sv)) { 7100 /* make sure SvREFCNT(sv)==0 happens very seldom */ 7101 SvREFCNT(sv) = SvREFCNT_IMMORTAL; 7102 return; 7103 } 7104 if (ckWARN_d(WARN_INTERNAL)) { 7105 #ifdef DEBUG_LEAKING_SCALARS_FORK_DUMP 7106 Perl_dump_sv_child(aTHX_ sv); 7107 #else 7108 #ifdef DEBUG_LEAKING_SCALARS 7109 sv_dump(sv); 7110 #endif 7111 #ifdef DEBUG_LEAKING_SCALARS_ABORT 7112 if (PL_warnhook == PERL_WARNHOOK_FATAL 7113 || ckDEAD(packWARN(WARN_INTERNAL))) { 7114 /* Don't let Perl_warner cause us to escape our fate: */ 7115 abort(); 7116 } 7117 #endif 7118 /* This may not return: */ 7119 Perl_warner(aTHX_ packWARN(WARN_INTERNAL), 7120 "Attempt to free unreferenced scalar: SV 0x%" UVxf 7121 pTHX__FORMAT, PTR2UV(sv) pTHX__VALUE); 7122 #endif 7123 } 7124 #ifdef DEBUG_LEAKING_SCALARS_ABORT 7125 abort(); 7126 #endif 7127 7128 } 7129 7130 7131 /* 7132 =for apidoc sv_len 7133 7134 Returns the length of the string in the SV. Handles magic and type 7135 coercion and sets the UTF8 flag appropriately. See also C<L</SvCUR>>, which 7136 gives raw access to the C<xpv_cur> slot. 7137 7138 =cut 7139 */ 7140 7141 STRLEN 7142 Perl_sv_len(pTHX_ SV *const sv) 7143 { 7144 STRLEN len; 7145 7146 if (!sv) 7147 return 0; 7148 7149 (void)SvPV_const(sv, len); 7150 return len; 7151 } 7152 7153 /* 7154 =for apidoc sv_len_utf8 7155 =for apidoc_item sv_len_utf8_nomg 7156 7157 These return the number of characters in the string in an SV, counting wide 7158 UTF-8 bytes as a single character. Both handle type coercion. 7159 They differ only in that C<sv_len_utf8> performs 'get' magic; 7160 C<sv_len_utf8_nomg> skips any magic. 7161 7162 =cut 7163 */ 7164 7165 /* 7166 * The length is cached in PERL_MAGIC_utf8, in the mg_len field. Also the 7167 * mg_ptr is used, by sv_pos_u2b() and sv_pos_b2u() - see the comments below. 7168 * (Note that the mg_len is not the length of the mg_ptr field. 7169 * This allows the cache to store the character length of the string without 7170 * needing to malloc() extra storage to attach to the mg_ptr.) 7171 * 7172 */ 7173 7174 STRLEN 7175 Perl_sv_len_utf8(pTHX_ SV *const sv) 7176 { 7177 if (!sv) 7178 return 0; 7179 7180 SvGETMAGIC(sv); 7181 return sv_len_utf8_nomg(sv); 7182 } 7183 7184 STRLEN 7185 Perl_sv_len_utf8_nomg(pTHX_ SV * const sv) 7186 { 7187 STRLEN len; 7188 const U8 *s = (U8*)SvPV_nomg_const(sv, len); 7189 7190 PERL_ARGS_ASSERT_SV_LEN_UTF8_NOMG; 7191 7192 if (PL_utf8cache && SvUTF8(sv)) { 7193 STRLEN ulen; 7194 MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL; 7195 7196 if (mg && (mg->mg_len != -1 || mg->mg_ptr)) { 7197 if (mg->mg_len != -1) 7198 ulen = mg->mg_len; 7199 else { 7200 /* We can use the offset cache for a headstart. 7201 The longer value is stored in the first pair. */ 7202 STRLEN *cache = (STRLEN *) mg->mg_ptr; 7203 7204 ulen = cache[0] + Perl_utf8_length(aTHX_ s + cache[1], 7205 s + len); 7206 } 7207 7208 if (PL_utf8cache < 0) { 7209 const STRLEN real = Perl_utf8_length(aTHX_ s, s + len); 7210 assert_uft8_cache_coherent("sv_len_utf8", ulen, real, sv); 7211 } 7212 } 7213 else { 7214 ulen = Perl_utf8_length(aTHX_ s, s + len); 7215 utf8_mg_len_cache_update(sv, &mg, ulen); 7216 } 7217 return ulen; 7218 } 7219 return SvUTF8(sv) ? Perl_utf8_length(aTHX_ s, s + len) : len; 7220 } 7221 7222 /* Walk forwards to find the byte corresponding to the passed in UTF-8 7223 offset. */ 7224 static STRLEN 7225 S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const send, 7226 STRLEN *const uoffset_p, bool *const at_end, 7227 bool* canonical_position) 7228 { 7229 const U8 *s = start; 7230 STRLEN uoffset = *uoffset_p; 7231 7232 PERL_ARGS_ASSERT_SV_POS_U2B_FORWARDS; 7233 7234 while (s < send && uoffset) { 7235 --uoffset; 7236 s += UTF8SKIP(s); 7237 } 7238 if (s == send) { 7239 *at_end = TRUE; 7240 } 7241 else if (s > send) { 7242 *at_end = TRUE; 7243 /* This is the existing behaviour. Possibly it should be a croak, as 7244 it's actually a bounds error */ 7245 s = send; 7246 } 7247 /* If the unicode position is beyond the end, we return the end but 7248 shouldn't cache that position */ 7249 *canonical_position = (uoffset == 0); 7250 *uoffset_p -= uoffset; 7251 return s - start; 7252 } 7253 7254 /* Given the length of the string in both bytes and UTF-8 characters, decide 7255 whether to walk forwards or backwards to find the byte corresponding to 7256 the passed in UTF-8 offset. */ 7257 static STRLEN 7258 S_sv_pos_u2b_midway(const U8 *const start, const U8 *send, 7259 STRLEN uoffset, const STRLEN uend) 7260 { 7261 STRLEN backw = uend - uoffset; 7262 7263 PERL_ARGS_ASSERT_SV_POS_U2B_MIDWAY; 7264 7265 if (uoffset < 2 * backw) { 7266 /* The assumption is that going forwards is twice the speed of going 7267 forward (that's where the 2 * backw comes from). 7268 (The real figure of course depends on the UTF-8 data.) */ 7269 const U8 *s = start; 7270 7271 while (s < send && uoffset--) 7272 s += UTF8SKIP(s); 7273 assert (s <= send); 7274 if (s > send) 7275 s = send; 7276 return s - start; 7277 } 7278 7279 while (backw--) { 7280 send--; 7281 while (UTF8_IS_CONTINUATION(*send)) 7282 send--; 7283 } 7284 return send - start; 7285 } 7286 7287 /* For the string representation of the given scalar, find the byte 7288 corresponding to the passed in UTF-8 offset. uoffset0 and boffset0 7289 give another position in the string, *before* the sought offset, which 7290 (which is always true, as 0, 0 is a valid pair of positions), which should 7291 help reduce the amount of linear searching. 7292 If *mgp is non-NULL, it should point to the UTF-8 cache magic, which 7293 will be used to reduce the amount of linear searching. The cache will be 7294 created if necessary, and the found value offered to it for update. */ 7295 static STRLEN 7296 S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start, 7297 const U8 *const send, STRLEN uoffset, 7298 STRLEN uoffset0, STRLEN boffset0) 7299 { 7300 STRLEN boffset = 0; /* Actually always set, but let's keep gcc happy. */ 7301 bool found = FALSE; 7302 bool at_end = FALSE; 7303 bool canonical_position = FALSE; 7304 7305 PERL_ARGS_ASSERT_SV_POS_U2B_CACHED; 7306 7307 assert (uoffset >= uoffset0); 7308 7309 if (!uoffset) 7310 return 0; 7311 7312 if (!SvREADONLY(sv) && !SvGMAGICAL(sv) && SvPOK(sv) 7313 && PL_utf8cache 7314 && (*mgp || (SvTYPE(sv) >= SVt_PVMG && 7315 (*mgp = mg_find(sv, PERL_MAGIC_utf8))))) { 7316 if ((*mgp)->mg_ptr) { 7317 STRLEN *cache = (STRLEN *) (*mgp)->mg_ptr; 7318 if (cache[0] == uoffset) { 7319 /* An exact match. */ 7320 return cache[1]; 7321 } 7322 if (cache[2] == uoffset) { 7323 /* An exact match. */ 7324 return cache[3]; 7325 } 7326 7327 if (cache[0] < uoffset) { 7328 /* The cache already knows part of the way. */ 7329 if (cache[0] > uoffset0) { 7330 /* The cache knows more than the passed in pair */ 7331 uoffset0 = cache[0]; 7332 boffset0 = cache[1]; 7333 } 7334 if ((*mgp)->mg_len != -1) { 7335 /* And we know the end too. */ 7336 boffset = boffset0 7337 + sv_pos_u2b_midway(start + boffset0, send, 7338 uoffset - uoffset0, 7339 (*mgp)->mg_len - uoffset0); 7340 } else { 7341 uoffset -= uoffset0; 7342 boffset = boffset0 7343 + sv_pos_u2b_forwards(start + boffset0, 7344 send, &uoffset, &at_end, 7345 &canonical_position); 7346 uoffset += uoffset0; 7347 } 7348 } 7349 else if (cache[2] < uoffset) { 7350 /* We're between the two cache entries. */ 7351 if (cache[2] > uoffset0) { 7352 /* and the cache knows more than the passed in pair */ 7353 uoffset0 = cache[2]; 7354 boffset0 = cache[3]; 7355 } 7356 7357 boffset = boffset0 7358 + sv_pos_u2b_midway(start + boffset0, 7359 start + cache[1], 7360 uoffset - uoffset0, 7361 cache[0] - uoffset0); 7362 } else { 7363 boffset = boffset0 7364 + sv_pos_u2b_midway(start + boffset0, 7365 start + cache[3], 7366 uoffset - uoffset0, 7367 cache[2] - uoffset0); 7368 } 7369 found = TRUE; 7370 } 7371 else if ((*mgp)->mg_len != -1) { 7372 /* If we can take advantage of a passed in offset, do so. */ 7373 /* In fact, offset0 is either 0, or less than offset, so don't 7374 need to worry about the other possibility. */ 7375 boffset = boffset0 7376 + sv_pos_u2b_midway(start + boffset0, send, 7377 uoffset - uoffset0, 7378 (*mgp)->mg_len - uoffset0); 7379 found = TRUE; 7380 } 7381 } 7382 7383 if (!found || PL_utf8cache < 0) { 7384 STRLEN real_boffset; 7385 uoffset -= uoffset0; 7386 real_boffset = boffset0 + sv_pos_u2b_forwards(start + boffset0, 7387 send, &uoffset, &at_end, 7388 &canonical_position); 7389 uoffset += uoffset0; 7390 7391 if (found && PL_utf8cache < 0) 7392 assert_uft8_cache_coherent("sv_pos_u2b_cache", boffset, 7393 real_boffset, sv); 7394 boffset = real_boffset; 7395 } 7396 7397 if (PL_utf8cache && canonical_position && !SvGMAGICAL(sv) && SvPOK(sv)) { 7398 if (at_end) 7399 utf8_mg_len_cache_update(sv, mgp, uoffset); 7400 else 7401 utf8_mg_pos_cache_update(sv, mgp, boffset, uoffset, send - start); 7402 } 7403 return boffset; 7404 } 7405 7406 7407 /* 7408 =for apidoc sv_pos_u2b_flags 7409 7410 Converts the offset from a count of UTF-8 chars from 7411 the start of the string, to a count of the equivalent number of bytes; if 7412 C<lenp> is non-zero, it does the same to C<lenp>, but this time starting from 7413 C<offset>, rather than from the start 7414 of the string. Handles type coercion. 7415 C<flags> is passed to C<SvPV_flags>, and usually should be 7416 C<SV_GMAGIC|SV_CONST_RETURN> to handle magic. 7417 7418 =cut 7419 */ 7420 7421 /* 7422 * sv_pos_u2b_flags() uses, like sv_pos_b2u(), the mg_ptr of the potential 7423 * PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 and 7424 * byte offsets. See also the comments of S_utf8_mg_pos_cache_update(). 7425 * 7426 */ 7427 7428 STRLEN 7429 Perl_sv_pos_u2b_flags(pTHX_ SV *const sv, STRLEN uoffset, STRLEN *const lenp, 7430 U32 flags) 7431 { 7432 const U8 *start; 7433 STRLEN len; 7434 STRLEN boffset; 7435 7436 PERL_ARGS_ASSERT_SV_POS_U2B_FLAGS; 7437 7438 start = (U8*)SvPV_flags(sv, len, flags); 7439 if (len) { 7440 const U8 * const send = start + len; 7441 MAGIC *mg = NULL; 7442 boffset = sv_pos_u2b_cached(sv, &mg, start, send, uoffset, 0, 0); 7443 7444 if (lenp 7445 && *lenp /* don't bother doing work for 0, as its bytes equivalent 7446 is 0, and *lenp is already set to that. */) { 7447 /* Convert the relative offset to absolute. */ 7448 const STRLEN uoffset2 = uoffset + *lenp; 7449 const STRLEN boffset2 7450 = sv_pos_u2b_cached(sv, &mg, start, send, uoffset2, 7451 uoffset, boffset) - boffset; 7452 7453 *lenp = boffset2; 7454 } 7455 } else { 7456 if (lenp) 7457 *lenp = 0; 7458 boffset = 0; 7459 } 7460 7461 return boffset; 7462 } 7463 7464 /* 7465 =for apidoc sv_pos_u2b 7466 7467 Converts the value pointed to by C<offsetp> from a count of UTF-8 chars from 7468 the start of the string, to a count of the equivalent number of bytes; if 7469 C<lenp> is non-zero, it does the same to C<lenp>, but this time starting from 7470 the offset, rather than from the start of the string. Handles magic and 7471 type coercion. 7472 7473 Use C<sv_pos_u2b_flags> in preference, which correctly handles strings longer 7474 than 2Gb. 7475 7476 =cut 7477 */ 7478 7479 /* 7480 * sv_pos_u2b() uses, like sv_pos_b2u(), the mg_ptr of the potential 7481 * PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 and 7482 * byte offsets. See also the comments of S_utf8_mg_pos_cache_update(). 7483 * 7484 */ 7485 7486 /* This function is subject to size and sign problems */ 7487 7488 void 7489 Perl_sv_pos_u2b(pTHX_ SV *const sv, I32 *const offsetp, I32 *const lenp) 7490 { 7491 PERL_ARGS_ASSERT_SV_POS_U2B; 7492 7493 if (lenp) { 7494 STRLEN ulen = (STRLEN)*lenp; 7495 *offsetp = (I32)sv_pos_u2b_flags(sv, (STRLEN)*offsetp, &ulen, 7496 SV_GMAGIC|SV_CONST_RETURN); 7497 *lenp = (I32)ulen; 7498 } else { 7499 *offsetp = (I32)sv_pos_u2b_flags(sv, (STRLEN)*offsetp, NULL, 7500 SV_GMAGIC|SV_CONST_RETURN); 7501 } 7502 } 7503 7504 static void 7505 S_utf8_mg_len_cache_update(pTHX_ SV *const sv, MAGIC **const mgp, 7506 const STRLEN ulen) 7507 { 7508 PERL_ARGS_ASSERT_UTF8_MG_LEN_CACHE_UPDATE; 7509 if (SvREADONLY(sv) || SvGMAGICAL(sv) || !SvPOK(sv)) 7510 return; 7511 7512 if (!*mgp && (SvTYPE(sv) < SVt_PVMG || 7513 !(*mgp = mg_find(sv, PERL_MAGIC_utf8)))) { 7514 *mgp = sv_magicext(sv, 0, PERL_MAGIC_utf8, &PL_vtbl_utf8, 0, 0); 7515 } 7516 assert(*mgp); 7517 7518 (*mgp)->mg_len = ulen; 7519 } 7520 7521 /* Create and update the UTF8 magic offset cache, with the proffered utf8/ 7522 byte length pairing. The (byte) length of the total SV is passed in too, 7523 as blen, because for some (more esoteric) SVs, the call to SvPV_const() 7524 may not have updated SvCUR, so we can't rely on reading it directly. 7525 7526 The proffered utf8/byte length pairing isn't used if the cache already has 7527 two pairs, and swapping either for the proffered pair would increase the 7528 RMS of the intervals between known byte offsets. 7529 7530 The cache itself consists of 4 STRLEN values 7531 0: larger UTF-8 offset 7532 1: corresponding byte offset 7533 2: smaller UTF-8 offset 7534 3: corresponding byte offset 7535 7536 Unused cache pairs have the value 0, 0. 7537 Keeping the cache "backwards" means that the invariant of 7538 cache[0] >= cache[2] is maintained even with empty slots, which means that 7539 the code that uses it doesn't need to worry if only 1 entry has actually 7540 been set to non-zero. It also makes the "position beyond the end of the 7541 cache" logic much simpler, as the first slot is always the one to start 7542 from. 7543 */ 7544 static void 7545 S_utf8_mg_pos_cache_update(pTHX_ SV *const sv, MAGIC **const mgp, const STRLEN byte, 7546 const STRLEN utf8, const STRLEN blen) 7547 { 7548 STRLEN *cache; 7549 7550 PERL_ARGS_ASSERT_UTF8_MG_POS_CACHE_UPDATE; 7551 7552 if (SvREADONLY(sv)) 7553 return; 7554 7555 if (!*mgp && (SvTYPE(sv) < SVt_PVMG || 7556 !(*mgp = mg_find(sv, PERL_MAGIC_utf8)))) { 7557 *mgp = sv_magicext(sv, 0, PERL_MAGIC_utf8, (MGVTBL*)&PL_vtbl_utf8, 0, 7558 0); 7559 (*mgp)->mg_len = -1; 7560 } 7561 assert(*mgp); 7562 7563 if (!(cache = (STRLEN *)(*mgp)->mg_ptr)) { 7564 Newxz(cache, PERL_MAGIC_UTF8_CACHESIZE * 2, STRLEN); 7565 (*mgp)->mg_ptr = (char *) cache; 7566 } 7567 assert(cache); 7568 7569 if (PL_utf8cache < 0 && SvPOKp(sv)) { 7570 /* SvPOKp() because, if sv is a reference, then SvPVX() is actually 7571 a pointer. Note that we no longer cache utf8 offsets on refer- 7572 ences, but this check is still a good idea, for robustness. */ 7573 const U8 *start = (const U8 *) SvPVX_const(sv); 7574 const STRLEN realutf8 = utf8_length(start, start + byte); 7575 7576 assert_uft8_cache_coherent("utf8_mg_pos_cache_update", utf8, realutf8, 7577 sv); 7578 } 7579 7580 /* Cache is held with the later position first, to simplify the code 7581 that deals with unbounded ends. */ 7582 7583 ASSERT_UTF8_CACHE(cache); 7584 if (cache[1] == 0) { 7585 /* Cache is totally empty */ 7586 cache[0] = utf8; 7587 cache[1] = byte; 7588 } else if (cache[3] == 0) { 7589 if (byte > cache[1]) { 7590 /* New one is larger, so goes first. */ 7591 cache[2] = cache[0]; 7592 cache[3] = cache[1]; 7593 cache[0] = utf8; 7594 cache[1] = byte; 7595 } else { 7596 cache[2] = utf8; 7597 cache[3] = byte; 7598 } 7599 } else { 7600 /* float casts necessary? XXX */ 7601 #define THREEWAY_SQUARE(a,b,c,d) \ 7602 ((float)((d) - (c))) * ((float)((d) - (c))) \ 7603 + ((float)((c) - (b))) * ((float)((c) - (b))) \ 7604 + ((float)((b) - (a))) * ((float)((b) - (a))) 7605 7606 /* Cache has 2 slots in use, and we know three potential pairs. 7607 Keep the two that give the lowest RMS distance. Do the 7608 calculation in bytes simply because we always know the byte 7609 length. squareroot has the same ordering as the positive value, 7610 so don't bother with the actual square root. */ 7611 if (byte > cache[1]) { 7612 /* New position is after the existing pair of pairs. */ 7613 const float keep_earlier 7614 = THREEWAY_SQUARE(0, cache[3], byte, blen); 7615 const float keep_later 7616 = THREEWAY_SQUARE(0, cache[1], byte, blen); 7617 7618 if (keep_later < keep_earlier) { 7619 cache[2] = cache[0]; 7620 cache[3] = cache[1]; 7621 } 7622 cache[0] = utf8; 7623 cache[1] = byte; 7624 } 7625 else { 7626 const float keep_later = THREEWAY_SQUARE(0, byte, cache[1], blen); 7627 float b, c, keep_earlier; 7628 if (byte > cache[3]) { 7629 /* New position is between the existing pair of pairs. */ 7630 b = (float)cache[3]; 7631 c = (float)byte; 7632 } else { 7633 /* New position is before the existing pair of pairs. */ 7634 b = (float)byte; 7635 c = (float)cache[3]; 7636 } 7637 keep_earlier = THREEWAY_SQUARE(0, b, c, blen); 7638 if (byte > cache[3]) { 7639 if (keep_later < keep_earlier) { 7640 cache[2] = utf8; 7641 cache[3] = byte; 7642 } 7643 else { 7644 cache[0] = utf8; 7645 cache[1] = byte; 7646 } 7647 } 7648 else { 7649 if (! (keep_later < keep_earlier)) { 7650 cache[0] = cache[2]; 7651 cache[1] = cache[3]; 7652 } 7653 cache[2] = utf8; 7654 cache[3] = byte; 7655 } 7656 } 7657 } 7658 ASSERT_UTF8_CACHE(cache); 7659 } 7660 7661 /* We already know all of the way, now we may be able to walk back. The same 7662 assumption is made as in S_sv_pos_u2b_midway(), namely that walking 7663 backward is half the speed of walking forward. */ 7664 static STRLEN 7665 S_sv_pos_b2u_midway(pTHX_ const U8 *const s, const U8 *const target, 7666 const U8 *end, STRLEN endu) 7667 { 7668 const STRLEN forw = target - s; 7669 STRLEN backw = end - target; 7670 7671 PERL_ARGS_ASSERT_SV_POS_B2U_MIDWAY; 7672 7673 if (forw < 2 * backw) { 7674 return utf8_length(s, target); 7675 } 7676 7677 while (end > target) { 7678 end--; 7679 while (UTF8_IS_CONTINUATION(*end)) { 7680 end--; 7681 } 7682 endu--; 7683 } 7684 return endu; 7685 } 7686 7687 /* 7688 =for apidoc sv_pos_b2u_flags 7689 7690 Converts C<offset> from a count of bytes from the start of the string, to 7691 a count of the equivalent number of UTF-8 chars. Handles type coercion. 7692 C<flags> is passed to C<SvPV_flags>, and usually should be 7693 C<SV_GMAGIC|SV_CONST_RETURN> to handle magic. 7694 7695 =cut 7696 */ 7697 7698 /* 7699 * sv_pos_b2u_flags() uses, like sv_pos_u2b_flags(), the mg_ptr of the 7700 * potential PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 7701 * and byte offsets. 7702 * 7703 */ 7704 STRLEN 7705 Perl_sv_pos_b2u_flags(pTHX_ SV *const sv, STRLEN const offset, U32 flags) 7706 { 7707 const U8* s; 7708 STRLEN len = 0; /* Actually always set, but let's keep gcc happy. */ 7709 STRLEN blen; 7710 MAGIC* mg = NULL; 7711 const U8* send; 7712 bool found = FALSE; 7713 7714 PERL_ARGS_ASSERT_SV_POS_B2U_FLAGS; 7715 7716 s = (const U8*)SvPV_flags(sv, blen, flags); 7717 7718 if (blen < offset) 7719 Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset, blen=%" UVuf 7720 ", byte=%" UVuf, (UV)blen, (UV)offset); 7721 7722 send = s + offset; 7723 7724 if (!SvREADONLY(sv) 7725 && PL_utf8cache 7726 && SvTYPE(sv) >= SVt_PVMG 7727 && (mg = mg_find(sv, PERL_MAGIC_utf8))) 7728 { 7729 if (mg->mg_ptr) { 7730 STRLEN * const cache = (STRLEN *) mg->mg_ptr; 7731 if (cache[1] == offset) { 7732 /* An exact match. */ 7733 return cache[0]; 7734 } 7735 if (cache[3] == offset) { 7736 /* An exact match. */ 7737 return cache[2]; 7738 } 7739 7740 if (cache[1] < offset) { 7741 /* We already know part of the way. */ 7742 if (mg->mg_len != -1) { 7743 /* Actually, we know the end too. */ 7744 len = cache[0] 7745 + S_sv_pos_b2u_midway(aTHX_ s + cache[1], send, 7746 s + blen, mg->mg_len - cache[0]); 7747 } else { 7748 len = cache[0] + utf8_length(s + cache[1], send); 7749 } 7750 } 7751 else if (cache[3] < offset) { 7752 /* We're between the two cached pairs, so we do the calculation 7753 offset by the byte/utf-8 positions for the earlier pair, 7754 then add the utf-8 characters from the string start to 7755 there. */ 7756 len = S_sv_pos_b2u_midway(aTHX_ s + cache[3], send, 7757 s + cache[1], cache[0] - cache[2]) 7758 + cache[2]; 7759 7760 } 7761 else { /* cache[3] > offset */ 7762 len = S_sv_pos_b2u_midway(aTHX_ s, send, s + cache[3], 7763 cache[2]); 7764 7765 } 7766 ASSERT_UTF8_CACHE(cache); 7767 found = TRUE; 7768 } else if (mg->mg_len != -1) { 7769 len = S_sv_pos_b2u_midway(aTHX_ s, send, s + blen, mg->mg_len); 7770 found = TRUE; 7771 } 7772 } 7773 if (!found || PL_utf8cache < 0) { 7774 const STRLEN real_len = utf8_length(s, send); 7775 7776 if (found && PL_utf8cache < 0) 7777 assert_uft8_cache_coherent("sv_pos_b2u", len, real_len, sv); 7778 len = real_len; 7779 } 7780 7781 if (PL_utf8cache) { 7782 if (blen == offset) 7783 utf8_mg_len_cache_update(sv, &mg, len); 7784 else 7785 utf8_mg_pos_cache_update(sv, &mg, offset, len, blen); 7786 } 7787 7788 return len; 7789 } 7790 7791 /* 7792 =for apidoc sv_pos_b2u 7793 7794 Converts the value pointed to by C<offsetp> from a count of bytes from the 7795 start of the string, to a count of the equivalent number of UTF-8 chars. 7796 Handles magic and type coercion. 7797 7798 Use C<sv_pos_b2u_flags> in preference, which correctly handles strings 7799 longer than 2Gb. 7800 7801 =cut 7802 */ 7803 7804 /* 7805 * sv_pos_b2u() uses, like sv_pos_u2b(), the mg_ptr of the potential 7806 * PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 and 7807 * byte offsets. 7808 * 7809 */ 7810 void 7811 Perl_sv_pos_b2u(pTHX_ SV *const sv, I32 *const offsetp) 7812 { 7813 PERL_ARGS_ASSERT_SV_POS_B2U; 7814 7815 if (!sv) 7816 return; 7817 7818 *offsetp = (I32)sv_pos_b2u_flags(sv, (STRLEN)*offsetp, 7819 SV_GMAGIC|SV_CONST_RETURN); 7820 } 7821 7822 static void 7823 S_assert_uft8_cache_coherent(pTHX_ const char *const func, STRLEN from_cache, 7824 STRLEN real, SV *const sv) 7825 { 7826 PERL_ARGS_ASSERT_ASSERT_UFT8_CACHE_COHERENT; 7827 7828 /* As this is debugging only code, save space by keeping this test here, 7829 rather than inlining it in all the callers. */ 7830 if (from_cache == real) 7831 return; 7832 7833 /* Need to turn the assertions off otherwise we may recurse infinitely 7834 while printing error messages. */ 7835 SAVEI8(PL_utf8cache); 7836 PL_utf8cache = 0; 7837 Perl_croak(aTHX_ "panic: %s cache %" UVuf " real %" UVuf " for %" SVf, 7838 func, (UV) from_cache, (UV) real, SVfARG(sv)); 7839 } 7840 7841 /* 7842 =for apidoc sv_eq 7843 7844 Returns a boolean indicating whether the strings in the two SVs are 7845 identical. Is UTF-8 and S<C<'use bytes'>> aware, handles get magic, and will 7846 coerce its args to strings if necessary. 7847 7848 This function does not handle operator overloading. For a version that does, 7849 see instead C<sv_streq>. 7850 7851 =for apidoc sv_eq_flags 7852 7853 Returns a boolean indicating whether the strings in the two SVs are 7854 identical. Is UTF-8 and S<C<'use bytes'>> aware and coerces its args to strings 7855 if necessary. If the flags has the C<SV_GMAGIC> bit set, it handles get-magic, too. 7856 7857 This function does not handle operator overloading. For a version that does, 7858 see instead C<sv_streq_flags>. 7859 7860 =cut 7861 */ 7862 7863 I32 7864 Perl_sv_eq_flags(pTHX_ SV *sv1, SV *sv2, const U32 flags) 7865 { 7866 const char *pv1; 7867 STRLEN cur1; 7868 const char *pv2; 7869 STRLEN cur2; 7870 7871 if (!sv1) { 7872 pv1 = ""; 7873 cur1 = 0; 7874 } 7875 else { 7876 /* if pv1 and pv2 are the same, second SvPV_const call may 7877 * invalidate pv1 (if we are handling magic), so we may need to 7878 * make a copy */ 7879 if (sv1 == sv2 && flags & SV_GMAGIC 7880 && (SvTHINKFIRST(sv1) || SvGMAGICAL(sv1))) { 7881 pv1 = SvPV_const(sv1, cur1); 7882 sv1 = newSVpvn_flags(pv1, cur1, SVs_TEMP | SvUTF8(sv2)); 7883 } 7884 pv1 = SvPV_flags_const(sv1, cur1, flags); 7885 } 7886 7887 if (!sv2){ 7888 pv2 = ""; 7889 cur2 = 0; 7890 } 7891 else 7892 pv2 = SvPV_flags_const(sv2, cur2, flags); 7893 7894 if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { 7895 /* Differing utf8ness. */ 7896 if (SvUTF8(sv1)) { 7897 /* sv1 is the UTF-8 one */ 7898 return bytes_cmp_utf8((const U8*)pv2, cur2, 7899 (const U8*)pv1, cur1) == 0; 7900 } 7901 else { 7902 /* sv2 is the UTF-8 one */ 7903 return bytes_cmp_utf8((const U8*)pv1, cur1, 7904 (const U8*)pv2, cur2) == 0; 7905 } 7906 } 7907 7908 if (cur1 == cur2) 7909 return (pv1 == pv2) || memEQ(pv1, pv2, cur1); 7910 else 7911 return 0; 7912 } 7913 7914 /* 7915 =for apidoc sv_streq_flags 7916 7917 Returns a boolean indicating whether the strings in the two SVs are 7918 identical. If the flags argument has the C<SV_GMAGIC> bit set, it handles 7919 get-magic too. Will coerce its args to strings if necessary. Treats 7920 C<NULL> as undef. Correctly handles the UTF8 flag. 7921 7922 If flags does not have the C<SV_SKIP_OVERLOAD> bit set, an attempt to use 7923 C<eq> overloading will be made. If such overloading does not exist or the 7924 flag is set, then regular string comparison will be used instead. 7925 7926 =for apidoc sv_streq 7927 7928 A convenient shortcut for calling C<sv_streq_flags> with the C<SV_GMAGIC> 7929 flag. This function basically behaves like the Perl code C<$sv1 eq $sv2>. 7930 7931 =cut 7932 */ 7933 7934 bool 7935 Perl_sv_streq_flags(pTHX_ SV *sv1, SV *sv2, const U32 flags) 7936 { 7937 PERL_ARGS_ASSERT_SV_STREQ_FLAGS; 7938 7939 if(flags & SV_GMAGIC) { 7940 if(sv1) 7941 SvGETMAGIC(sv1); 7942 if(sv2) 7943 SvGETMAGIC(sv2); 7944 } 7945 7946 /* Treat NULL as undef */ 7947 if(!sv1) 7948 sv1 = &PL_sv_undef; 7949 if(!sv2) 7950 sv2 = &PL_sv_undef; 7951 7952 if(!(flags & SV_SKIP_OVERLOAD) && 7953 (SvAMAGIC(sv1) || SvAMAGIC(sv2))) { 7954 SV *ret = amagic_call(sv1, sv2, seq_amg, 0); 7955 if(ret) 7956 return SvTRUE(ret); 7957 } 7958 7959 return sv_eq_flags(sv1, sv2, 0); 7960 } 7961 7962 /* 7963 =for apidoc sv_numeq_flags 7964 7965 Returns a boolean indicating whether the numbers in the two SVs are 7966 identical. If the flags argument has the C<SV_GMAGIC> bit set, it handles 7967 get-magic too. Will coerce its args to numbers if necessary. Treats 7968 C<NULL> as undef. 7969 7970 If flags does not have the C<SV_SKIP_OVERLOAD> bit set, an attempt to use 7971 C<==> overloading will be made. If such overloading does not exist or the 7972 flag is set, then regular numerical comparison will be used instead. 7973 7974 =for apidoc sv_numeq 7975 7976 A convenient shortcut for calling C<sv_numeq_flags> with the C<SV_GMAGIC> 7977 flag. This function basically behaves like the Perl code C<$sv1 == $sv2>. 7978 7979 =cut 7980 */ 7981 7982 bool 7983 Perl_sv_numeq_flags(pTHX_ SV *sv1, SV *sv2, const U32 flags) 7984 { 7985 PERL_ARGS_ASSERT_SV_NUMEQ_FLAGS; 7986 7987 if(flags & SV_GMAGIC) { 7988 if(sv1) 7989 SvGETMAGIC(sv1); 7990 if(sv2) 7991 SvGETMAGIC(sv2); 7992 } 7993 7994 /* Treat NULL as undef */ 7995 if(!sv1) 7996 sv1 = &PL_sv_undef; 7997 if(!sv2) 7998 sv2 = &PL_sv_undef; 7999 8000 if(!(flags & SV_SKIP_OVERLOAD) && 8001 (SvAMAGIC(sv1) || SvAMAGIC(sv2))) { 8002 SV *ret = amagic_call(sv1, sv2, eq_amg, 0); 8003 if(ret) 8004 return SvTRUE(ret); 8005 } 8006 8007 return do_ncmp(sv1, sv2) == 0; 8008 } 8009 8010 /* 8011 =for apidoc sv_cmp 8012 8013 Compares the strings in two SVs. Returns -1, 0, or 1 indicating whether the 8014 string in C<sv1> is less than, equal to, or greater than the string in 8015 C<sv2>. Is UTF-8 and S<C<'use bytes'>> aware, handles get magic, and will 8016 coerce its args to strings if necessary. See also C<L</sv_cmp_locale>>. 8017 8018 =for apidoc sv_cmp_flags 8019 8020 Compares the strings in two SVs. Returns -1, 0, or 1 indicating whether the 8021 string in C<sv1> is less than, equal to, or greater than the string in 8022 C<sv2>. Is UTF-8 and S<C<'use bytes'>> aware and will coerce its args to strings 8023 if necessary. If the flags has the C<SV_GMAGIC> bit set, it handles get magic. See 8024 also C<L</sv_cmp_locale_flags>>. 8025 8026 =cut 8027 */ 8028 8029 I32 8030 Perl_sv_cmp(pTHX_ SV *const sv1, SV *const sv2) 8031 { 8032 return sv_cmp_flags(sv1, sv2, SV_GMAGIC); 8033 } 8034 8035 I32 8036 Perl_sv_cmp_flags(pTHX_ SV *const sv1, SV *const sv2, 8037 const U32 flags) 8038 { 8039 STRLEN cur1, cur2; 8040 const char *pv1, *pv2; 8041 I32 cmp; 8042 SV *svrecode = NULL; 8043 8044 if (!sv1) { 8045 pv1 = ""; 8046 cur1 = 0; 8047 } 8048 else 8049 pv1 = SvPV_flags_const(sv1, cur1, flags); 8050 8051 if (!sv2) { 8052 pv2 = ""; 8053 cur2 = 0; 8054 } 8055 else 8056 pv2 = SvPV_flags_const(sv2, cur2, flags); 8057 8058 if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { 8059 /* Differing utf8ness. */ 8060 if (SvUTF8(sv1)) { 8061 const int retval = -bytes_cmp_utf8((const U8*)pv2, cur2, 8062 (const U8*)pv1, cur1); 8063 return retval ? retval < 0 ? -1 : +1 : 0; 8064 } 8065 else { 8066 const int retval = bytes_cmp_utf8((const U8*)pv1, cur1, 8067 (const U8*)pv2, cur2); 8068 return retval ? retval < 0 ? -1 : +1 : 0; 8069 } 8070 } 8071 8072 /* Here, if both are non-NULL, then they have the same UTF8ness. */ 8073 8074 if (!cur1) { 8075 cmp = cur2 ? -1 : 0; 8076 } else if (!cur2) { 8077 cmp = 1; 8078 } else { 8079 STRLEN shortest_len = cur1 < cur2 ? cur1 : cur2; 8080 8081 #ifdef EBCDIC 8082 if (! DO_UTF8(sv1)) { 8083 #endif 8084 const I32 retval = memcmp((const void*)pv1, 8085 (const void*)pv2, 8086 shortest_len); 8087 if (retval) { 8088 cmp = retval < 0 ? -1 : 1; 8089 } else if (cur1 == cur2) { 8090 cmp = 0; 8091 } else { 8092 cmp = cur1 < cur2 ? -1 : 1; 8093 } 8094 #ifdef EBCDIC 8095 } 8096 else { /* Both are to be treated as UTF-EBCDIC */ 8097 8098 /* EBCDIC UTF-8 is complicated by the fact that it is based on I8 8099 * which remaps code points 0-255. We therefore generally have to 8100 * unmap back to the original values to get an accurate comparison. 8101 * But we don't have to do that for UTF-8 invariants, as by 8102 * definition, they aren't remapped, nor do we have to do it for 8103 * above-latin1 code points, as they also aren't remapped. (This 8104 * code also works on ASCII platforms, but the memcmp() above is 8105 * much faster). */ 8106 8107 const char *e = pv1 + shortest_len; 8108 8109 /* Find the first bytes that differ between the two strings */ 8110 while (pv1 < e && *pv1 == *pv2) { 8111 pv1++; 8112 pv2++; 8113 } 8114 8115 8116 if (pv1 == e) { /* Are the same all the way to the end */ 8117 if (cur1 == cur2) { 8118 cmp = 0; 8119 } else { 8120 cmp = cur1 < cur2 ? -1 : 1; 8121 } 8122 } 8123 else /* Here *pv1 and *pv2 are not equal, but all bytes earlier 8124 * in the strings were. The current bytes may or may not be 8125 * at the beginning of a character. But neither or both are 8126 * (or else earlier bytes would have been different). And 8127 * if we are in the middle of a character, the two 8128 * characters are comprised of the same number of bytes 8129 * (because in this case the start bytes are the same, and 8130 * the start bytes encode the character's length). */ 8131 if (UTF8_IS_INVARIANT(*pv1)) 8132 { 8133 /* If both are invariants; can just compare directly */ 8134 if (UTF8_IS_INVARIANT(*pv2)) { 8135 cmp = ((U8) *pv1 < (U8) *pv2) ? -1 : 1; 8136 } 8137 else /* Since *pv1 is invariant, it is the whole character, 8138 which means it is at the beginning of a character. 8139 That means pv2 is also at the beginning of a 8140 character (see earlier comment). Since it isn't 8141 invariant, it must be a start byte. If it starts a 8142 character whose code point is above 255, that 8143 character is greater than any single-byte char, which 8144 *pv1 is */ 8145 if (UTF8_IS_ABOVE_LATIN1_START(*pv2)) 8146 { 8147 cmp = -1; 8148 } 8149 else { 8150 /* Here, pv2 points to a character composed of 2 bytes 8151 * whose code point is < 256. Get its code point and 8152 * compare with *pv1 */ 8153 cmp = ((U8) *pv1 < EIGHT_BIT_UTF8_TO_NATIVE(*pv2, *(pv2 + 1))) 8154 ? -1 8155 : 1; 8156 } 8157 } 8158 else /* The code point starting at pv1 isn't a single byte */ 8159 if (UTF8_IS_INVARIANT(*pv2)) 8160 { 8161 /* But here, the code point starting at *pv2 is a single byte, 8162 * and so *pv1 must begin a character, hence is a start byte. 8163 * If that character is above 255, it is larger than any 8164 * single-byte char, which *pv2 is */ 8165 if (UTF8_IS_ABOVE_LATIN1_START(*pv1)) { 8166 cmp = 1; 8167 } 8168 else { 8169 /* Here, pv1 points to a character composed of 2 bytes 8170 * whose code point is < 256. Get its code point and 8171 * compare with the single byte character *pv2 */ 8172 cmp = (EIGHT_BIT_UTF8_TO_NATIVE(*pv1, *(pv1 + 1)) < (U8) *pv2) 8173 ? -1 8174 : 1; 8175 } 8176 } 8177 else /* Here, we've ruled out either *pv1 and *pv2 being 8178 invariant. That means both are part of variants, but not 8179 necessarily at the start of a character */ 8180 if ( UTF8_IS_ABOVE_LATIN1_START(*pv1) 8181 || UTF8_IS_ABOVE_LATIN1_START(*pv2)) 8182 { 8183 /* Here, at least one is the start of a character, which means 8184 * the other is also a start byte. And the code point of at 8185 * least one of the characters is above 255. It is a 8186 * characteristic of UTF-EBCDIC that all start bytes for 8187 * above-latin1 code points are well behaved as far as code 8188 * point comparisons go, and all are larger than all other 8189 * start bytes, so the comparison with those is also well 8190 * behaved */ 8191 cmp = ((U8) *pv1 < (U8) *pv2) ? -1 : 1; 8192 } 8193 else { 8194 /* Here both *pv1 and *pv2 are part of variant characters. 8195 * They could be both continuations, or both start characters. 8196 * (One or both could even be an illegal start character (for 8197 * an overlong) which for the purposes of sorting we treat as 8198 * legal. */ 8199 if (UTF8_IS_CONTINUATION(*pv1)) { 8200 8201 /* If they are continuations for code points above 255, 8202 * then comparing the current byte is sufficient, as there 8203 * is no remapping of these and so the comparison is 8204 * well-behaved. We determine if they are such 8205 * continuations by looking at the preceding byte. It 8206 * could be a start byte, from which we can tell if it is 8207 * for an above 255 code point. Or it could be a 8208 * continuation, which means the character occupies at 8209 * least 3 bytes, so must be above 255. */ 8210 if ( UTF8_IS_CONTINUATION(*(pv2 - 1)) 8211 || UTF8_IS_ABOVE_LATIN1_START(*(pv2 -1))) 8212 { 8213 cmp = ((U8) *pv1 < (U8) *pv2) ? -1 : 1; 8214 goto cmp_done; 8215 } 8216 8217 /* Here, the continuations are for code points below 256; 8218 * back up one to get to the start byte */ 8219 pv1--; 8220 pv2--; 8221 } 8222 8223 /* We need to get the actual native code point of each of these 8224 * variants in order to compare them */ 8225 cmp = ( EIGHT_BIT_UTF8_TO_NATIVE(*pv1, *(pv1 + 1)) 8226 < EIGHT_BIT_UTF8_TO_NATIVE(*pv2, *(pv2 + 1))) 8227 ? -1 8228 : 1; 8229 } 8230 } 8231 cmp_done: ; 8232 #endif 8233 } 8234 8235 SvREFCNT_dec(svrecode); 8236 8237 return cmp; 8238 } 8239 8240 /* 8241 =for apidoc sv_cmp_locale 8242 8243 Compares the strings in two SVs in a locale-aware manner. Is UTF-8 and 8244 S<C<'use bytes'>> aware, handles get magic, and will coerce its args to strings 8245 if necessary. See also C<L</sv_cmp>>. 8246 8247 =for apidoc sv_cmp_locale_flags 8248 8249 Compares the strings in two SVs in a locale-aware manner. Is UTF-8 and 8250 S<C<'use bytes'>> aware and will coerce its args to strings if necessary. If 8251 the flags contain C<SV_GMAGIC>, it handles get magic. See also 8252 C<L</sv_cmp_flags>>. 8253 8254 =cut 8255 */ 8256 8257 I32 8258 Perl_sv_cmp_locale(pTHX_ SV *const sv1, SV *const sv2) 8259 { 8260 return sv_cmp_locale_flags(sv1, sv2, SV_GMAGIC); 8261 } 8262 8263 I32 8264 Perl_sv_cmp_locale_flags(pTHX_ SV *const sv1, SV *const sv2, 8265 const U32 flags) 8266 { 8267 #ifdef USE_LOCALE_COLLATE 8268 8269 char *pv1, *pv2; 8270 STRLEN len1, len2; 8271 I32 retval; 8272 8273 if (PL_collation_standard) 8274 goto raw_compare; 8275 8276 len1 = len2 = 0; 8277 8278 /* Revert to using raw compare if both operands exist, but either one 8279 * doesn't transform properly for collation */ 8280 if (sv1 && sv2) { 8281 pv1 = sv_collxfrm_flags(sv1, &len1, flags); 8282 if (! pv1) { 8283 goto raw_compare; 8284 } 8285 pv2 = sv_collxfrm_flags(sv2, &len2, flags); 8286 if (! pv2) { 8287 goto raw_compare; 8288 } 8289 } 8290 else { 8291 pv1 = sv1 ? sv_collxfrm_flags(sv1, &len1, flags) : (char *) NULL; 8292 pv2 = sv2 ? sv_collxfrm_flags(sv2, &len2, flags) : (char *) NULL; 8293 } 8294 8295 if (!pv1 || !len1) { 8296 if (pv2 && len2) 8297 return -1; 8298 else 8299 goto raw_compare; 8300 } 8301 else { 8302 if (!pv2 || !len2) 8303 return 1; 8304 } 8305 8306 retval = memcmp((void*)pv1, (void*)pv2, len1 < len2 ? len1 : len2); 8307 8308 if (retval) 8309 return retval < 0 ? -1 : 1; 8310 8311 /* 8312 * When the result of collation is equality, that doesn't mean 8313 * that there are no differences -- some locales exclude some 8314 * characters from consideration. So to avoid false equalities, 8315 * we use the raw string as a tiebreaker. 8316 */ 8317 8318 raw_compare: 8319 /* FALLTHROUGH */ 8320 8321 #else 8322 PERL_UNUSED_ARG(flags); 8323 #endif /* USE_LOCALE_COLLATE */ 8324 8325 return sv_cmp(sv1, sv2); 8326 } 8327 8328 8329 #ifdef USE_LOCALE_COLLATE 8330 8331 /* 8332 =for apidoc sv_collxfrm 8333 8334 This calls C<sv_collxfrm_flags> with the SV_GMAGIC flag. See 8335 C<L</sv_collxfrm_flags>>. 8336 8337 =for apidoc sv_collxfrm_flags 8338 8339 Add Collate Transform magic to an SV if it doesn't already have it. If the 8340 flags contain C<SV_GMAGIC>, it handles get-magic. 8341 8342 Any scalar variable may carry C<PERL_MAGIC_collxfrm> magic that contains the 8343 scalar data of the variable, but transformed to such a format that a normal 8344 memory comparison can be used to compare the data according to the locale 8345 settings. 8346 8347 =cut 8348 */ 8349 8350 char * 8351 Perl_sv_collxfrm_flags(pTHX_ SV *const sv, STRLEN *const nxp, const I32 flags) 8352 { 8353 MAGIC *mg; 8354 8355 PERL_ARGS_ASSERT_SV_COLLXFRM_FLAGS; 8356 8357 mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_collxfrm) : (MAGIC *) NULL; 8358 8359 /* If we don't have collation magic on 'sv', or the locale has changed 8360 * since the last time we calculated it, get it and save it now */ 8361 if (!mg || !mg->mg_ptr || *(U32*)mg->mg_ptr != PL_collation_ix) { 8362 const char *s; 8363 char *xf; 8364 STRLEN len, xlen; 8365 8366 /* Free the old space */ 8367 if (mg) 8368 Safefree(mg->mg_ptr); 8369 8370 s = SvPV_flags_const(sv, len, flags); 8371 if ((xf = _mem_collxfrm(s, len, &xlen, cBOOL(SvUTF8(sv))))) { 8372 if (! mg) { 8373 mg = sv_magicext(sv, 0, PERL_MAGIC_collxfrm, &PL_vtbl_collxfrm, 8374 0, 0); 8375 assert(mg); 8376 } 8377 mg->mg_ptr = xf; 8378 mg->mg_len = xlen; 8379 } 8380 else { 8381 if (mg) { 8382 mg->mg_ptr = NULL; 8383 mg->mg_len = -1; 8384 } 8385 } 8386 } 8387 8388 if (mg && mg->mg_ptr) { 8389 *nxp = mg->mg_len; 8390 return mg->mg_ptr + sizeof(PL_collation_ix); 8391 } 8392 else { 8393 *nxp = 0; 8394 return NULL; 8395 } 8396 } 8397 8398 #endif /* USE_LOCALE_COLLATE */ 8399 8400 static char * 8401 S_sv_gets_append_to_utf8(pTHX_ SV *const sv, PerlIO *const fp, I32 append) 8402 { 8403 SV * const tsv = newSV_type(SVt_NULL); 8404 ENTER; 8405 SAVEFREESV(tsv); 8406 sv_gets(tsv, fp, 0); 8407 sv_utf8_upgrade_nomg(tsv); 8408 SvCUR_set(sv,append); 8409 sv_catsv(sv,tsv); 8410 LEAVE; 8411 return (SvCUR(sv) - append) ? SvPVX(sv) : NULL; 8412 } 8413 8414 static char * 8415 S_sv_gets_read_record(pTHX_ SV *const sv, PerlIO *const fp, I32 append) 8416 { 8417 SSize_t bytesread; 8418 const STRLEN recsize = SvUV(SvRV(PL_rs)); /* RsRECORD() guarantees > 0. */ 8419 /* Grab the size of the record we're getting */ 8420 char *buffer = SvGROW(sv, (STRLEN)(recsize + append + 1)) + append; 8421 8422 /* Go yank in */ 8423 #ifdef __VMS 8424 int fd; 8425 Stat_t st; 8426 8427 /* With a true, record-oriented file on VMS, we need to use read directly 8428 * to ensure that we respect RMS record boundaries. The user is responsible 8429 * for providing a PL_rs value that corresponds to the FAB$W_MRS (maximum 8430 * record size) field. N.B. This is likely to produce invalid results on 8431 * varying-width character data when a record ends mid-character. 8432 */ 8433 fd = PerlIO_fileno(fp); 8434 if (fd != -1 8435 && PerlLIO_fstat(fd, &st) == 0 8436 && (st.st_fab_rfm == FAB$C_VAR 8437 || st.st_fab_rfm == FAB$C_VFC 8438 || st.st_fab_rfm == FAB$C_FIX)) { 8439 8440 bytesread = PerlLIO_read(fd, buffer, recsize); 8441 } 8442 else /* in-memory file from PerlIO::Scalar 8443 * or not a record-oriented file 8444 */ 8445 #endif 8446 { 8447 bytesread = PerlIO_read(fp, buffer, recsize); 8448 8449 /* At this point, the logic in sv_get() means that sv will 8450 be treated as utf-8 if the handle is utf8. 8451 */ 8452 if (PerlIO_isutf8(fp) && bytesread > 0) { 8453 char *bend = buffer + bytesread; 8454 char *bufp = buffer; 8455 size_t charcount = 0; 8456 bool charstart = TRUE; 8457 STRLEN skip = 0; 8458 8459 while (charcount < recsize) { 8460 /* count accumulated characters */ 8461 while (bufp < bend) { 8462 if (charstart) { 8463 skip = UTF8SKIP(bufp); 8464 } 8465 if (bufp + skip > bend) { 8466 /* partial at the end */ 8467 charstart = FALSE; 8468 break; 8469 } 8470 else { 8471 ++charcount; 8472 bufp += skip; 8473 charstart = TRUE; 8474 } 8475 } 8476 8477 if (charcount < recsize) { 8478 STRLEN readsize; 8479 STRLEN bufp_offset = bufp - buffer; 8480 SSize_t morebytesread; 8481 8482 /* originally I read enough to fill any incomplete 8483 character and the first byte of the next 8484 character if needed, but if there's many 8485 multi-byte encoded characters we're going to be 8486 making a read call for every character beyond 8487 the original read size. 8488 8489 So instead, read the rest of the character if 8490 any, and enough bytes to match at least the 8491 start bytes for each character we're going to 8492 read. 8493 */ 8494 if (charstart) 8495 readsize = recsize - charcount; 8496 else 8497 readsize = skip - (bend - bufp) + recsize - charcount - 1; 8498 buffer = SvGROW(sv, append + bytesread + readsize + 1) + append; 8499 bend = buffer + bytesread; 8500 morebytesread = PerlIO_read(fp, bend, readsize); 8501 if (morebytesread <= 0) { 8502 /* we're done, if we still have incomplete 8503 characters the check code in sv_gets() will 8504 warn about them. 8505 8506 I'd originally considered doing 8507 PerlIO_ungetc() on all but the lead 8508 character of the incomplete character, but 8509 read() doesn't do that, so I don't. 8510 */ 8511 break; 8512 } 8513 8514 /* prepare to scan some more */ 8515 bytesread += morebytesread; 8516 bend = buffer + bytesread; 8517 bufp = buffer + bufp_offset; 8518 } 8519 } 8520 } 8521 } 8522 8523 if (bytesread < 0) 8524 bytesread = 0; 8525 SvCUR_set(sv, bytesread + append); 8526 buffer[bytesread] = '\0'; 8527 return (SvCUR(sv) - append) ? SvPVX(sv) : NULL; 8528 } 8529 8530 /* 8531 =for apidoc sv_gets 8532 8533 Get a line from the filehandle and store it into the SV, optionally 8534 appending to the currently-stored string. If C<append> is not 0, the 8535 line is appended to the SV instead of overwriting it. C<append> should 8536 be set to the byte offset that the appended string should start at 8537 in the SV (typically, C<SvCUR(sv)> is a suitable choice). 8538 8539 =cut 8540 */ 8541 8542 char * 8543 Perl_sv_gets(pTHX_ SV *const sv, PerlIO *const fp, I32 append) 8544 { 8545 const char *rsptr; 8546 STRLEN rslen; 8547 STDCHAR rslast; 8548 STDCHAR *bp; 8549 SSize_t cnt; 8550 int i = 0; 8551 int rspara = 0; 8552 8553 PERL_ARGS_ASSERT_SV_GETS; 8554 8555 if (SvTHINKFIRST(sv)) 8556 sv_force_normal_flags(sv, append ? 0 : SV_COW_DROP_PV); 8557 /* XXX. If you make this PVIV, then copy on write can copy scalars read 8558 from <>. 8559 However, perlbench says it's slower, because the existing swipe code 8560 is faster than copy on write. 8561 Swings and roundabouts. */ 8562 SvUPGRADE(sv, SVt_PV); 8563 8564 if (append) { 8565 /* line is going to be appended to the existing buffer in the sv */ 8566 if (PerlIO_isutf8(fp)) { 8567 if (!SvUTF8(sv)) { 8568 sv_utf8_upgrade_nomg(sv); 8569 sv_pos_u2b(sv,&append,0); 8570 } 8571 } else if (SvUTF8(sv)) { 8572 return S_sv_gets_append_to_utf8(aTHX_ sv, fp, append); 8573 } 8574 } 8575 8576 SvPOK_only(sv); 8577 if (!append) { 8578 /* not appending - "clear" the string by setting SvCUR to 0, 8579 * the pv is still avaiable. */ 8580 SvCUR_set(sv,0); 8581 } 8582 if (PerlIO_isutf8(fp)) 8583 SvUTF8_on(sv); 8584 8585 if (IN_PERL_COMPILETIME) { 8586 /* we always read code in line mode */ 8587 rsptr = "\n"; 8588 rslen = 1; 8589 } 8590 else if (RsSNARF(PL_rs)) { 8591 /* If it is a regular disk file use size from stat() as estimate 8592 of amount we are going to read -- may result in mallocing 8593 more memory than we really need if the layers below reduce 8594 the size we read (e.g. CRLF or a gzip layer). 8595 */ 8596 Stat_t st; 8597 int fd = PerlIO_fileno(fp); 8598 if (fd >= 0 && (PerlLIO_fstat(fd, &st) == 0) && S_ISREG(st.st_mode)) { 8599 const Off_t offset = PerlIO_tell(fp); 8600 if (offset != (Off_t) -1 && st.st_size + append > offset) { 8601 #ifdef PERL_COPY_ON_WRITE 8602 /* Add an extra byte for the sake of copy-on-write's 8603 * buffer reference count. */ 8604 (void) SvGROW(sv, (STRLEN)((st.st_size - offset) + append + 2)); 8605 #else 8606 (void) SvGROW(sv, (STRLEN)((st.st_size - offset) + append + 1)); 8607 #endif 8608 } 8609 } 8610 rsptr = NULL; 8611 rslen = 0; 8612 } 8613 else if (RsRECORD(PL_rs)) { 8614 return S_sv_gets_read_record(aTHX_ sv, fp, append); 8615 } 8616 else if (RsPARA(PL_rs)) { 8617 rsptr = "\n\n"; 8618 rslen = 2; 8619 rspara = 1; 8620 } 8621 else { 8622 /* Get $/ i.e. PL_rs into same encoding as stream wants */ 8623 if (PerlIO_isutf8(fp)) { 8624 rsptr = SvPVutf8(PL_rs, rslen); 8625 } 8626 else { 8627 if (SvUTF8(PL_rs)) { 8628 if (!sv_utf8_downgrade(PL_rs, TRUE)) { 8629 Perl_croak(aTHX_ "Wide character in $/"); 8630 } 8631 } 8632 /* extract the raw pointer to the record separator */ 8633 rsptr = SvPV_const(PL_rs, rslen); 8634 } 8635 } 8636 8637 /* rslast is the last character in the record separator 8638 * note we don't use rslast except when rslen is true, so the 8639 * null assign is a placeholder. */ 8640 rslast = rslen ? rsptr[rslen - 1] : '\0'; 8641 8642 if (rspara) { /* have to do this both before and after */ 8643 /* to make sure file boundaries work right */ 8644 while (1) { 8645 if (PerlIO_eof(fp)) 8646 return 0; 8647 i = PerlIO_getc(fp); 8648 if (i != '\n') { 8649 if (i == -1) 8650 return 0; 8651 PerlIO_ungetc(fp,i); 8652 break; 8653 } 8654 } 8655 } 8656 8657 /* See if we know enough about I/O mechanism to cheat it ! */ 8658 8659 /* This used to be #ifdef test - it is made run-time test for ease 8660 of abstracting out stdio interface. One call should be cheap 8661 enough here - and may even be a macro allowing compile 8662 time optimization. 8663 */ 8664 8665 if (PerlIO_fast_gets(fp)) { 8666 /* 8667 * We can do buffer based IO operations on this filehandle. 8668 * 8669 * This means we can bypass a lot of subcalls and process 8670 * the buffer directly, it also means we know the upper bound 8671 * on the amount of data we might read of the current buffer 8672 * into our sv. Knowing this allows us to preallocate the pv 8673 * to be able to hold that maximum, which allows us to simplify 8674 * a lot of logic. */ 8675 8676 /* 8677 * We're going to steal some values from the stdio struct 8678 * and put EVERYTHING in the innermost loop into registers. 8679 */ 8680 STDCHAR *ptr; /* pointer into fp's read-ahead buffer */ 8681 STRLEN bpx; /* length of the data in the target sv 8682 used to fix pointers after a SvGROW */ 8683 I32 shortbuffered; /* If the pv buffer is shorter than the amount 8684 of data left in the read-ahead buffer. 8685 If 0 then the pv buffer can hold the full 8686 amount left, otherwise this is the amount it 8687 can hold. */ 8688 8689 /* Here is some breathtakingly efficient cheating */ 8690 8691 /* When you read the following logic resist the urge to think 8692 * of record separators that are 1 byte long. They are an 8693 * uninteresting special (simple) case. 8694 * 8695 * Instead think of record separators which are at least 2 bytes 8696 * long, and keep in mind that we need to deal with such 8697 * separators when they cross a read-ahead buffer boundary. 8698 * 8699 * Also consider that we need to gracefully deal with separators 8700 * that may be longer than a single read ahead buffer. 8701 * 8702 * Lastly do not forget we want to copy the delimiter as well. We 8703 * are copying all data in the file _up_to_and_including_ the separator 8704 * itself. 8705 * 8706 * Now that you have all that in mind here is what is happening below: 8707 * 8708 * 1. When we first enter the loop we do some memory book keeping to see 8709 * how much free space there is in the target SV. (This sub assumes that 8710 * it is operating on the same SV most of the time via $_ and that it is 8711 * going to be able to reuse the same pv buffer each call.) If there is 8712 * "enough" room then we set "shortbuffered" to how much space there is 8713 * and start reading forward. 8714 * 8715 * 2. When we scan forward we copy from the read-ahead buffer to the target 8716 * SV's pv buffer. While we go we watch for the end of the read-ahead buffer, 8717 * and the end of the of pv, as well as for the "rslast", which is the last 8718 * char of the separator. 8719 * 8720 * 3. When scanning forward if we see rslast then we jump backwards in *pv* 8721 * (which has a "complete" record up to the point we saw rslast) and check 8722 * it to see if it matches the separator. If it does we are done. If it doesn't 8723 * we continue on with the scan/copy. 8724 * 8725 * 4. If we run out of read-ahead buffer (cnt goes to 0) then we have to get 8726 * the IO system to read the next buffer. We do this by doing a getc(), which 8727 * returns a single char read (or EOF), and prefills the buffer, and also 8728 * allows us to find out how full the buffer is. We use this information to 8729 * SvGROW() the sv to the size remaining in the buffer, after which we copy 8730 * the returned single char into the target sv, and then go back into scan 8731 * forward mode. 8732 * 8733 * 5. If we run out of write-buffer then we SvGROW() it by the size of the 8734 * remaining space in the read-buffer. 8735 * 8736 * Note that this code despite its twisty-turny nature is pretty darn slick. 8737 * It manages single byte separators, multi-byte cross boundary separators, 8738 * and cross-read-buffer separators cleanly and efficiently at the cost 8739 * of potentially greatly overallocating the target SV. 8740 * 8741 * Yves 8742 */ 8743 8744 8745 /* get the number of bytes remaining in the read-ahead buffer 8746 * on first call on a given fp this will return 0.*/ 8747 cnt = PerlIO_get_cnt(fp); 8748 8749 /* make sure we have the room */ 8750 if ((I32)(SvLEN(sv) - append) <= cnt + 1) { 8751 /* Not room for all of it 8752 if we are looking for a separator and room for some 8753 */ 8754 if (rslen && cnt > 80 && (I32)SvLEN(sv) > append) { 8755 /* just process what we have room for */ 8756 shortbuffered = cnt - SvLEN(sv) + append + 1; 8757 cnt -= shortbuffered; 8758 } 8759 else { 8760 /* ensure that the target sv has enough room to hold 8761 * the rest of the read-ahead buffer */ 8762 shortbuffered = 0; 8763 /* remember that cnt can be negative */ 8764 SvGROW(sv, (STRLEN)(append + (cnt <= 0 ? 2 : (cnt + 1)))); 8765 } 8766 } 8767 else { 8768 /* we have enough room to hold the full buffer, lets scream */ 8769 shortbuffered = 0; 8770 } 8771 8772 /* extract the pointer to sv's string buffer, offset by append as necessary */ 8773 bp = (STDCHAR*)SvPVX_const(sv) + append; /* move these two too to registers */ 8774 /* extract the point to the read-ahead buffer */ 8775 ptr = (STDCHAR*)PerlIO_get_ptr(fp); 8776 8777 /* some trace debug output */ 8778 DEBUG_P(PerlIO_printf(Perl_debug_log, 8779 "Screamer: entering, ptr=%" UVuf ", cnt=%ld\n",PTR2UV(ptr),(long)cnt)); 8780 DEBUG_P(PerlIO_printf(Perl_debug_log, 8781 "Screamer: entering: PerlIO * thinks ptr=%" UVuf ", cnt=%" IVdf ", base=%" 8782 UVuf "\n", 8783 PTR2UV(PerlIO_get_ptr(fp)), (IV)PerlIO_get_cnt(fp), 8784 PTR2UV(PerlIO_has_base(fp) ? PerlIO_get_base(fp) : 0))); 8785 8786 for (;;) { 8787 screamer: 8788 /* if there is stuff left in the read-ahead buffer */ 8789 if (cnt > 0) { 8790 /* if there is a separator */ 8791 if (rslen) { 8792 /* find next rslast */ 8793 STDCHAR *p; 8794 8795 /* shortcut common case of blank line */ 8796 cnt--; 8797 if ((*bp++ = *ptr++) == rslast) 8798 goto thats_all_folks; 8799 8800 p = (STDCHAR *)memchr(ptr, rslast, cnt); 8801 if (p) { 8802 SSize_t got = p - ptr + 1; 8803 Copy(ptr, bp, got, STDCHAR); 8804 ptr += got; 8805 bp += got; 8806 cnt -= got; 8807 goto thats_all_folks; 8808 } 8809 Copy(ptr, bp, cnt, STDCHAR); 8810 ptr += cnt; 8811 bp += cnt; 8812 cnt = 0; 8813 } 8814 else { 8815 /* no separator, slurp the full buffer */ 8816 Copy(ptr, bp, cnt, char); /* this | eat */ 8817 bp += cnt; /* screams | dust */ 8818 ptr += cnt; /* louder | sed :-) */ 8819 cnt = 0; 8820 assert (!shortbuffered); 8821 goto cannot_be_shortbuffered; 8822 } 8823 } 8824 8825 if (shortbuffered) { /* oh well, must extend */ 8826 /* we didnt have enough room to fit the line into the target buffer 8827 * so we must extend the target buffer and keep going */ 8828 cnt = shortbuffered; 8829 shortbuffered = 0; 8830 bpx = bp - (STDCHAR*)SvPVX_const(sv); /* box up before relocation */ 8831 SvCUR_set(sv, bpx); 8832 /* extned the target sv's buffer so it can hold the full read-ahead buffer */ 8833 SvGROW(sv, SvLEN(sv) + append + cnt + 2); 8834 bp = (STDCHAR*)SvPVX_const(sv) + bpx; /* unbox after relocation */ 8835 continue; 8836 } 8837 8838 cannot_be_shortbuffered: 8839 /* we need to refill the read-ahead buffer if possible */ 8840 8841 DEBUG_P(PerlIO_printf(Perl_debug_log, 8842 "Screamer: going to getc, ptr=%" UVuf ", cnt=%" IVdf "\n", 8843 PTR2UV(ptr),(IV)cnt)); 8844 PerlIO_set_ptrcnt(fp, (STDCHAR*)ptr, cnt); /* deregisterize cnt and ptr */ 8845 8846 DEBUG_Pv(PerlIO_printf(Perl_debug_log, 8847 "Screamer: pre: FILE * thinks ptr=%" UVuf ", cnt=%" IVdf ", base=%" UVuf "\n", 8848 PTR2UV(PerlIO_get_ptr(fp)), (IV)PerlIO_get_cnt(fp), 8849 PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); 8850 8851 /* 8852 call PerlIO_getc() to let it prefill the lookahead buffer 8853 8854 This used to call 'filbuf' in stdio form, but as that behaves like 8855 getc when cnt <= 0 we use PerlIO_getc here to avoid introducing 8856 another abstraction. 8857 8858 Note we have to deal with the char in 'i' if we are not at EOF 8859 */ 8860 bpx = bp - (STDCHAR*)SvPVX_const(sv); 8861 /* signals might be called here, possibly modifying sv */ 8862 i = PerlIO_getc(fp); /* get more characters */ 8863 bp = (STDCHAR*)SvPVX_const(sv) + bpx; 8864 8865 DEBUG_Pv(PerlIO_printf(Perl_debug_log, 8866 "Screamer: post: FILE * thinks ptr=%" UVuf ", cnt=%" IVdf ", base=%" UVuf "\n", 8867 PTR2UV(PerlIO_get_ptr(fp)), (IV)PerlIO_get_cnt(fp), 8868 PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); 8869 8870 /* find out how much is left in the read-ahead buffer, and rextract its pointer */ 8871 cnt = PerlIO_get_cnt(fp); 8872 ptr = (STDCHAR*)PerlIO_get_ptr(fp); /* reregisterize cnt and ptr */ 8873 DEBUG_P(PerlIO_printf(Perl_debug_log, 8874 "Screamer: after getc, ptr=%" UVuf ", cnt=%" IVdf "\n", 8875 PTR2UV(ptr),(IV)cnt)); 8876 8877 if (i == EOF) /* all done for ever? */ 8878 goto thats_really_all_folks; 8879 8880 /* make sure we have enough space in the target sv */ 8881 bpx = bp - (STDCHAR*)SvPVX_const(sv); /* box up before relocation */ 8882 SvCUR_set(sv, bpx); 8883 SvGROW(sv, bpx + cnt + 2); 8884 bp = (STDCHAR*)SvPVX_const(sv) + bpx; /* unbox after relocation */ 8885 8886 /* copy of the char we got from getc() */ 8887 *bp++ = (STDCHAR)i; /* store character from PerlIO_getc */ 8888 8889 /* make sure we deal with the i being the last character of a separator */ 8890 if (rslen && (STDCHAR)i == rslast) /* all done for now? */ 8891 goto thats_all_folks; 8892 } 8893 8894 thats_all_folks: 8895 /* check if we have actually found the separator - only really applies 8896 * when rslen > 1 */ 8897 if ((rslen > 1 && (STRLEN)(bp - (STDCHAR*)SvPVX_const(sv)) < rslen) || 8898 memNE((char*)bp - rslen, rsptr, rslen)) 8899 goto screamer; /* go back to the fray */ 8900 thats_really_all_folks: 8901 if (shortbuffered) 8902 cnt += shortbuffered; 8903 DEBUG_P(PerlIO_printf(Perl_debug_log, 8904 "Screamer: quitting, ptr=%" UVuf ", cnt=%" IVdf "\n",PTR2UV(ptr),(IV)cnt)); 8905 PerlIO_set_ptrcnt(fp, (STDCHAR*)ptr, cnt); /* put these back or we're in trouble */ 8906 DEBUG_P(PerlIO_printf(Perl_debug_log, 8907 "Screamer: end: FILE * thinks ptr=%" UVuf ", cnt=%" IVdf ", base=%" UVuf 8908 "\n", 8909 PTR2UV(PerlIO_get_ptr(fp)), (IV)PerlIO_get_cnt(fp), 8910 PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); 8911 *bp = '\0'; 8912 SvCUR_set(sv, bp - (STDCHAR*)SvPVX_const(sv)); /* set length */ 8913 DEBUG_P(PerlIO_printf(Perl_debug_log, 8914 "Screamer: done, len=%ld, string=|%.*s|\n", 8915 (long)SvCUR(sv),(int)SvCUR(sv),SvPVX_const(sv))); 8916 } 8917 else 8918 { 8919 /*The big, slow, and stupid way. */ 8920 STDCHAR buf[8192]; 8921 8922 screamer2: 8923 if (rslen) { 8924 const STDCHAR * const bpe = buf + sizeof(buf); 8925 bp = buf; 8926 while ((i = PerlIO_getc(fp)) != EOF && (*bp++ = (STDCHAR)i) != rslast && bp < bpe) 8927 ; /* keep reading */ 8928 cnt = bp - buf; 8929 } 8930 else { 8931 cnt = PerlIO_read(fp,(char*)buf, sizeof(buf)); 8932 /* Accommodate broken VAXC compiler, which applies U8 cast to 8933 * both args of ?: operator, causing EOF to change into 255 8934 */ 8935 if (cnt > 0) 8936 i = (U8)buf[cnt - 1]; 8937 else 8938 i = EOF; 8939 } 8940 8941 if (cnt < 0) 8942 cnt = 0; /* we do need to re-set the sv even when cnt <= 0 */ 8943 if (append) 8944 sv_catpvn_nomg(sv, (char *) buf, cnt); 8945 else 8946 sv_setpvn(sv, (char *) buf, cnt); /* "nomg" is implied */ 8947 8948 if (i != EOF && /* joy */ 8949 (!rslen || 8950 SvCUR(sv) < rslen || 8951 memNE(SvPVX_const(sv) + SvCUR(sv) - rslen, rsptr, rslen))) 8952 { 8953 append = -1; 8954 /* 8955 * If we're reading from a TTY and we get a short read, 8956 * indicating that the user hit his EOF character, we need 8957 * to notice it now, because if we try to read from the TTY 8958 * again, the EOF condition will disappear. 8959 * 8960 * The comparison of cnt to sizeof(buf) is an optimization 8961 * that prevents unnecessary calls to feof(). 8962 * 8963 * - jik 9/25/96 8964 */ 8965 if (!(cnt < (I32)sizeof(buf) && PerlIO_eof(fp))) 8966 goto screamer2; 8967 } 8968 8969 } 8970 8971 if (rspara) { /* have to do this both before and after */ 8972 while (i != EOF) { /* to make sure file boundaries work right */ 8973 i = PerlIO_getc(fp); 8974 if (i != '\n') { 8975 PerlIO_ungetc(fp,i); 8976 break; 8977 } 8978 } 8979 } 8980 8981 return (SvCUR(sv) - append) ? SvPVX(sv) : NULL; 8982 } 8983 8984 /* 8985 =for apidoc sv_inc 8986 =for apidoc_item sv_inc_nomg 8987 8988 These auto-increment the value in the SV, doing string to numeric conversion 8989 if necessary. They both handle operator overloading. 8990 8991 They differ only in that C<sv_inc> performs 'get' magic; C<sv_inc_nomg> skips 8992 any magic. 8993 8994 =cut 8995 */ 8996 8997 void 8998 Perl_sv_inc(pTHX_ SV *const sv) 8999 { 9000 if (!sv) 9001 return; 9002 SvGETMAGIC(sv); 9003 sv_inc_nomg(sv); 9004 } 9005 9006 void 9007 Perl_sv_inc_nomg(pTHX_ SV *const sv) 9008 { 9009 char *d; 9010 int flags; 9011 9012 if (!sv) 9013 return; 9014 if (SvTHINKFIRST(sv)) { 9015 if (SvREADONLY(sv)) { 9016 Perl_croak_no_modify(); 9017 } 9018 if (SvROK(sv)) { 9019 IV i; 9020 if (SvAMAGIC(sv) && AMG_CALLunary(sv, inc_amg)) 9021 return; 9022 i = PTR2IV(SvRV(sv)); 9023 sv_unref(sv); 9024 sv_setiv(sv, i); 9025 } 9026 else sv_force_normal_flags(sv, 0); 9027 } 9028 flags = SvFLAGS(sv); 9029 if ((flags & (SVp_NOK|SVp_IOK)) == SVp_NOK) { 9030 /* It's (privately or publicly) a float, but not tested as an 9031 integer, so test it to see. */ 9032 (void) SvIV(sv); 9033 flags = SvFLAGS(sv); 9034 } 9035 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) { 9036 /* It's publicly an integer, or privately an integer-not-float */ 9037 #ifdef PERL_PRESERVE_IVUV 9038 oops_its_int: 9039 #endif 9040 if (SvIsUV(sv)) { 9041 if (SvUVX(sv) == UV_MAX) 9042 sv_setnv(sv, UV_MAX_P1); 9043 else { 9044 (void)SvIOK_only_UV(sv); 9045 SvUV_set(sv, SvUVX(sv) + 1); 9046 } 9047 } else { 9048 if (SvIVX(sv) == IV_MAX) 9049 sv_setuv(sv, (UV)IV_MAX + 1); 9050 else { 9051 (void)SvIOK_only(sv); 9052 SvIV_set(sv, SvIVX(sv) + 1); 9053 } 9054 } 9055 return; 9056 } 9057 if (flags & SVp_NOK) { 9058 const NV was = SvNVX(sv); 9059 if (NV_OVERFLOWS_INTEGERS_AT != 0.0 && 9060 /* If NVX was NaN, the following comparisons return always false */ 9061 UNLIKELY(was >= NV_OVERFLOWS_INTEGERS_AT || 9062 was < -NV_OVERFLOWS_INTEGERS_AT) && 9063 #if defined(NAN_COMPARE_BROKEN) 9064 LIKELY(!Perl_isinfnan(was)) 9065 #else 9066 LIKELY(!Perl_isinf(was)) 9067 #endif 9068 ) { 9069 /* diag_listed_as: Lost precision when %s %f by 1 */ 9070 Perl_ck_warner(aTHX_ packWARN(WARN_IMPRECISION), 9071 "Lost precision when incrementing %" NVff " by 1", 9072 was); 9073 } 9074 (void)SvNOK_only(sv); 9075 SvNV_set(sv, was + 1.0); 9076 return; 9077 } 9078 9079 /* treat AV/HV/CV/FM/IO and non-fake GVs as immutable */ 9080 if (SvTYPE(sv) >= SVt_PVAV || (isGV_with_GP(sv) && !SvFAKE(sv))) 9081 Perl_croak_no_modify(); 9082 9083 if (!(flags & SVp_POK) || !*SvPVX_const(sv)) { 9084 if ((flags & SVTYPEMASK) < SVt_PVIV) 9085 sv_upgrade(sv, ((flags & SVTYPEMASK) > SVt_IV ? SVt_PVIV : SVt_IV)); 9086 (void)SvIOK_only(sv); 9087 SvIV_set(sv, 1); 9088 return; 9089 } 9090 d = SvPVX(sv); 9091 while (isALPHA(*d)) d++; 9092 while (isDIGIT(*d)) d++; 9093 if (d < SvEND(sv)) { 9094 const int numtype = grok_number_flags(SvPVX_const(sv), SvCUR(sv), NULL, PERL_SCAN_TRAILING); 9095 #ifdef PERL_PRESERVE_IVUV 9096 /* Got to punt this as an integer if needs be, but we don't issue 9097 warnings. Probably ought to make the sv_iv_please() that does 9098 the conversion if possible, and silently. */ 9099 if (numtype && !(numtype & IS_NUMBER_INFINITY)) { 9100 /* Need to try really hard to see if it's an integer. 9101 9.22337203685478e+18 is an integer. 9102 but "9.22337203685478e+18" + 0 is UV=9223372036854779904 9103 so $a="9.22337203685478e+18"; $a+0; $a++ 9104 needs to be the same as $a="9.22337203685478e+18"; $a++ 9105 or we go insane. */ 9106 9107 (void) sv_2iv(sv); 9108 if (SvIOK(sv)) 9109 goto oops_its_int; 9110 9111 /* sv_2iv *should* have made this an NV */ 9112 if (flags & SVp_NOK) { 9113 (void)SvNOK_only(sv); 9114 SvNV_set(sv, SvNVX(sv) + 1.0); 9115 return; 9116 } 9117 /* I don't think we can get here. Maybe I should assert this 9118 And if we do get here I suspect that sv_setnv will croak. NWC 9119 Fall through. */ 9120 DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_inc punt failed to convert '%s' to IOK or NOKp, UV=0x%" UVxf " NV=%" NVgf "\n", 9121 SvPVX_const(sv), SvIVX(sv), SvNVX(sv))); 9122 } 9123 #endif /* PERL_PRESERVE_IVUV */ 9124 if (!numtype && ckWARN(WARN_NUMERIC)) 9125 not_incrementable(sv); 9126 sv_setnv(sv,Atof(SvPVX_const(sv)) + 1.0); 9127 return; 9128 } 9129 d--; 9130 while (d >= SvPVX_const(sv)) { 9131 if (isDIGIT(*d)) { 9132 if (++*d <= '9') 9133 return; 9134 *(d--) = '0'; 9135 } 9136 else { 9137 #ifdef EBCDIC 9138 /* MKS: The original code here died if letters weren't consecutive. 9139 * at least it didn't have to worry about non-C locales. The 9140 * new code assumes that ('z'-'a')==('Z'-'A'), letters are 9141 * arranged in order (although not consecutively) and that only 9142 * [A-Za-z] are accepted by isALPHA in the C locale. 9143 */ 9144 if (isALPHA_FOLD_NE(*d, 'z')) { 9145 do { ++*d; } while (!isALPHA(*d)); 9146 return; 9147 } 9148 *(d--) -= 'z' - 'a'; 9149 #else 9150 ++*d; 9151 if (isALPHA(*d)) 9152 return; 9153 *(d--) -= 'z' - 'a' + 1; 9154 #endif 9155 } 9156 } 9157 /* oh,oh, the number grew */ 9158 SvGROW(sv, SvCUR(sv) + 2); 9159 SvCUR_set(sv, SvCUR(sv) + 1); 9160 for (d = SvPVX(sv) + SvCUR(sv); d > SvPVX_const(sv); d--) 9161 *d = d[-1]; 9162 if (isDIGIT(d[1])) 9163 *d = '1'; 9164 else 9165 *d = d[1]; 9166 } 9167 9168 /* 9169 =for apidoc sv_dec 9170 =for apidoc_item sv_dec_nomg 9171 9172 These auto-decrement the value in the SV, doing string to numeric conversion 9173 if necessary. They both handle operator overloading. 9174 9175 They differ only in that: 9176 9177 C<sv_dec> handles 'get' magic; C<sv_dec_nomg> skips 'get' magic. 9178 9179 =cut 9180 */ 9181 9182 void 9183 Perl_sv_dec(pTHX_ SV *const sv) 9184 { 9185 if (!sv) 9186 return; 9187 SvGETMAGIC(sv); 9188 sv_dec_nomg(sv); 9189 } 9190 9191 void 9192 Perl_sv_dec_nomg(pTHX_ SV *const sv) 9193 { 9194 int flags; 9195 9196 if (!sv) 9197 return; 9198 if (SvTHINKFIRST(sv)) { 9199 if (SvREADONLY(sv)) { 9200 Perl_croak_no_modify(); 9201 } 9202 if (SvROK(sv)) { 9203 IV i; 9204 if (SvAMAGIC(sv) && AMG_CALLunary(sv, dec_amg)) 9205 return; 9206 i = PTR2IV(SvRV(sv)); 9207 sv_unref(sv); 9208 sv_setiv(sv, i); 9209 } 9210 else sv_force_normal_flags(sv, 0); 9211 } 9212 /* Unlike sv_inc we don't have to worry about string-never-numbers 9213 and keeping them magic. But we mustn't warn on punting */ 9214 flags = SvFLAGS(sv); 9215 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) { 9216 /* It's publicly an integer, or privately an integer-not-float */ 9217 #ifdef PERL_PRESERVE_IVUV 9218 oops_its_int: 9219 #endif 9220 if (SvIsUV(sv)) { 9221 if (SvUVX(sv) == 0) { 9222 (void)SvIOK_only(sv); 9223 SvIV_set(sv, -1); 9224 } 9225 else { 9226 (void)SvIOK_only_UV(sv); 9227 SvUV_set(sv, SvUVX(sv) - 1); 9228 } 9229 } else { 9230 if (SvIVX(sv) == IV_MIN) { 9231 sv_setnv(sv, (NV)IV_MIN); 9232 goto oops_its_num; 9233 } 9234 else { 9235 (void)SvIOK_only(sv); 9236 SvIV_set(sv, SvIVX(sv) - 1); 9237 } 9238 } 9239 return; 9240 } 9241 if (flags & SVp_NOK) { 9242 oops_its_num: 9243 { 9244 const NV was = SvNVX(sv); 9245 if (NV_OVERFLOWS_INTEGERS_AT != 0.0 && 9246 /* If NVX was NaN, these comparisons return always false */ 9247 UNLIKELY(was <= -NV_OVERFLOWS_INTEGERS_AT || 9248 was > NV_OVERFLOWS_INTEGERS_AT) && 9249 #if defined(NAN_COMPARE_BROKEN) 9250 LIKELY(!Perl_isinfnan(was)) 9251 #else 9252 LIKELY(!Perl_isinf(was)) 9253 #endif 9254 ) { 9255 /* diag_listed_as: Lost precision when %s %f by 1 */ 9256 Perl_ck_warner(aTHX_ packWARN(WARN_IMPRECISION), 9257 "Lost precision when decrementing %" NVff " by 1", 9258 was); 9259 } 9260 (void)SvNOK_only(sv); 9261 SvNV_set(sv, was - 1.0); 9262 return; 9263 } 9264 } 9265 9266 /* treat AV/HV/CV/FM/IO and non-fake GVs as immutable */ 9267 if (SvTYPE(sv) >= SVt_PVAV || (isGV_with_GP(sv) && !SvFAKE(sv))) 9268 Perl_croak_no_modify(); 9269 9270 if (!(flags & SVp_POK)) { 9271 if ((flags & SVTYPEMASK) < SVt_PVIV) 9272 sv_upgrade(sv, ((flags & SVTYPEMASK) > SVt_IV) ? SVt_PVIV : SVt_IV); 9273 SvIV_set(sv, -1); 9274 (void)SvIOK_only(sv); 9275 return; 9276 } 9277 #ifdef PERL_PRESERVE_IVUV 9278 { 9279 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), NULL); 9280 if (numtype && !(numtype & IS_NUMBER_INFINITY)) { 9281 /* Need to try really hard to see if it's an integer. 9282 9.22337203685478e+18 is an integer. 9283 but "9.22337203685478e+18" + 0 is UV=9223372036854779904 9284 so $a="9.22337203685478e+18"; $a+0; $a-- 9285 needs to be the same as $a="9.22337203685478e+18"; $a-- 9286 or we go insane. */ 9287 9288 (void) sv_2iv(sv); 9289 if (SvIOK(sv)) 9290 goto oops_its_int; 9291 9292 /* sv_2iv *should* have made this an NV */ 9293 if (flags & SVp_NOK) { 9294 (void)SvNOK_only(sv); 9295 SvNV_set(sv, SvNVX(sv) - 1.0); 9296 return; 9297 } 9298 /* I don't think we can get here. Maybe I should assert this 9299 And if we do get here I suspect that sv_setnv will croak. NWC 9300 Fall through. */ 9301 DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_dec punt failed to convert '%s' to IOK or NOKp, UV=0x%" UVxf " NV=%" NVgf "\n", 9302 SvPVX_const(sv), SvIVX(sv), SvNVX(sv))); 9303 } 9304 } 9305 #endif /* PERL_PRESERVE_IVUV */ 9306 sv_setnv(sv,Atof(SvPVX_const(sv)) - 1.0); /* punt */ 9307 } 9308 9309 /* this define is used to eliminate a chunk of duplicated but shared logic 9310 * it has the suffix __SV_C to signal that it isnt API, and isnt meant to be 9311 * used anywhere but here - yves 9312 */ 9313 #define PUSH_EXTEND_MORTAL__SV_C(AnSv) \ 9314 STMT_START { \ 9315 SSize_t ix = ++PL_tmps_ix; \ 9316 if (UNLIKELY(ix >= PL_tmps_max)) \ 9317 ix = tmps_grow_p(ix); \ 9318 PL_tmps_stack[ix] = (AnSv); \ 9319 } STMT_END 9320 9321 /* 9322 =for apidoc sv_mortalcopy 9323 9324 Creates a new SV which is a copy of the original SV (using C<sv_setsv>). 9325 The new SV is marked as mortal. It will be destroyed "soon", either by an 9326 explicit call to C<FREETMPS>, or by an implicit call at places such as 9327 statement boundaries. See also C<L</sv_newmortal>> and C<L</sv_2mortal>>. 9328 9329 =for apidoc sv_mortalcopy_flags 9330 9331 Like C<sv_mortalcopy>, but the extra C<flags> are passed to the 9332 C<sv_setsv_flags>. 9333 9334 =cut 9335 */ 9336 9337 /* Make a string that will exist for the duration of the expression 9338 * evaluation. Actually, it may have to last longer than that, but 9339 * hopefully we won't free it until it has been assigned to a 9340 * permanent location. */ 9341 9342 SV * 9343 Perl_sv_mortalcopy_flags(pTHX_ SV *const oldstr, U32 flags) 9344 { 9345 SV *sv; 9346 9347 if (flags & SV_GMAGIC) 9348 SvGETMAGIC(oldstr); /* before new_SV, in case it dies */ 9349 new_SV(sv); 9350 sv_setsv_flags(sv,oldstr,flags & ~SV_GMAGIC); 9351 PUSH_EXTEND_MORTAL__SV_C(sv); 9352 SvTEMP_on(sv); 9353 return sv; 9354 } 9355 9356 /* 9357 =for apidoc sv_newmortal 9358 9359 Creates a new null SV which is mortal. The reference count of the SV is 9360 set to 1. It will be destroyed "soon", either by an explicit call to 9361 C<FREETMPS>, or by an implicit call at places such as statement boundaries. 9362 See also C<L</sv_mortalcopy>> and C<L</sv_2mortal>>. 9363 9364 =cut 9365 */ 9366 9367 SV * 9368 Perl_sv_newmortal(pTHX) 9369 { 9370 SV *sv; 9371 9372 new_SV(sv); 9373 SvFLAGS(sv) = SVs_TEMP; 9374 PUSH_EXTEND_MORTAL__SV_C(sv); 9375 return sv; 9376 } 9377 9378 9379 /* 9380 =for apidoc newSVpvn_flags 9381 9382 Creates a new SV and copies a string (which may contain C<NUL> (C<\0>) 9383 characters) into it. The reference count for the 9384 SV is set to 1. Note that if C<len> is zero, Perl will create a zero length 9385 string. You are responsible for ensuring that the source string is at least 9386 C<len> bytes long. If the C<s> argument is NULL the new SV will be undefined. 9387 Currently the only flag bits accepted are C<SVf_UTF8> and C<SVs_TEMP>. 9388 If C<SVs_TEMP> is set, then C<sv_2mortal()> is called on the result before 9389 returning. If C<SVf_UTF8> is set, C<s> 9390 is considered to be in UTF-8 and the 9391 C<SVf_UTF8> flag will be set on the new SV. 9392 C<newSVpvn_utf8()> is a convenience wrapper for this function, defined as 9393 9394 #define newSVpvn_utf8(s, len, u) \ 9395 newSVpvn_flags((s), (len), (u) ? SVf_UTF8 : 0) 9396 9397 =for apidoc Amnh||SVs_TEMP 9398 9399 =cut 9400 */ 9401 9402 SV * 9403 Perl_newSVpvn_flags(pTHX_ const char *const s, const STRLEN len, const U32 flags) 9404 { 9405 SV *sv; 9406 9407 /* All the flags we don't support must be zero. 9408 And we're new code so I'm going to assert this from the start. */ 9409 assert(!(flags & ~(SVf_UTF8|SVs_TEMP))); 9410 sv = newSV_type(SVt_PV); 9411 sv_setpvn_fresh(sv,s,len); 9412 9413 /* This code used to do a sv_2mortal(), however we now unroll the call to 9414 * sv_2mortal() and do what it does ourselves here. Since we have asserted 9415 * that flags can only have the SVf_UTF8 and/or SVs_TEMP flags set above we 9416 * can use it to enable the sv flags directly (bypassing SvTEMP_on), which 9417 * in turn means we dont need to mask out the SVf_UTF8 flag below, which 9418 * means that we eliminate quite a few steps than it looks - Yves 9419 * (explaining patch by gfx) */ 9420 9421 SvFLAGS(sv) |= flags; 9422 9423 if(flags & SVs_TEMP){ 9424 PUSH_EXTEND_MORTAL__SV_C(sv); 9425 } 9426 9427 return sv; 9428 } 9429 9430 /* 9431 =for apidoc sv_2mortal 9432 9433 Marks an existing SV as mortal. The SV will be destroyed "soon", either 9434 by an explicit call to C<FREETMPS>, or by an implicit call at places such as 9435 statement boundaries. C<SvTEMP()> is turned on which means that the SV's 9436 string buffer can be "stolen" if this SV is copied. See also 9437 C<L</sv_newmortal>> and C<L</sv_mortalcopy>>. 9438 9439 =cut 9440 */ 9441 9442 SV * 9443 Perl_sv_2mortal(pTHX_ SV *const sv) 9444 { 9445 if (!sv) 9446 return sv; 9447 if (SvIMMORTAL(sv)) 9448 return sv; 9449 PUSH_EXTEND_MORTAL__SV_C(sv); 9450 SvTEMP_on(sv); 9451 return sv; 9452 } 9453 9454 /* 9455 =for apidoc newSVpv 9456 9457 Creates a new SV and copies a string (which may contain C<NUL> (C<\0>) 9458 characters) into it. The reference count for the 9459 SV is set to 1. If C<len> is zero, Perl will compute the length using 9460 C<strlen()>, (which means if you use this option, that C<s> can't have embedded 9461 C<NUL> characters and has to have a terminating C<NUL> byte). 9462 9463 This function can cause reliability issues if you are likely to pass in 9464 empty strings that are not null terminated, because it will run 9465 strlen on the string and potentially run past valid memory. 9466 9467 Using L</newSVpvn> is a safer alternative for non C<NUL> terminated strings. 9468 For string literals use L</newSVpvs> instead. This function will work fine for 9469 C<NUL> terminated strings, but if you want to avoid the if statement on whether 9470 to call C<strlen> use C<newSVpvn> instead (calling C<strlen> yourself). 9471 9472 =cut 9473 */ 9474 9475 SV * 9476 Perl_newSVpv(pTHX_ const char *const s, const STRLEN len) 9477 { 9478 SV *sv = newSV_type(SVt_PV); 9479 sv_setpvn_fresh(sv, s, len || s == NULL ? len : strlen(s)); 9480 return sv; 9481 } 9482 9483 /* 9484 =for apidoc newSVpvn 9485 9486 Creates a new SV and copies a string into it, which may contain C<NUL> characters 9487 (C<\0>) and other binary data. The reference count for the SV is set to 1. 9488 Note that if C<len> is zero, Perl will create a zero length (Perl) string. You 9489 are responsible for ensuring that the source buffer is at least 9490 C<len> bytes long. If the C<buffer> argument is NULL the new SV will be 9491 undefined. 9492 9493 =cut 9494 */ 9495 9496 SV * 9497 Perl_newSVpvn(pTHX_ const char *const buffer, const STRLEN len) 9498 { 9499 SV *sv = newSV_type(SVt_PV); 9500 sv_setpvn_fresh(sv,buffer,len); 9501 return sv; 9502 } 9503 9504 /* 9505 =for apidoc newSVhek 9506 9507 Creates a new SV from the hash key structure. It will generate scalars that 9508 point to the shared string table where possible. Returns a new (undefined) 9509 SV if C<hek> is NULL. 9510 9511 =cut 9512 */ 9513 9514 SV * 9515 Perl_newSVhek(pTHX_ const HEK *const hek) 9516 { 9517 if (!hek) { 9518 SV *sv; 9519 9520 new_SV(sv); 9521 return sv; 9522 } 9523 9524 if (HEK_LEN(hek) == HEf_SVKEY) { 9525 return newSVsv(*(SV**)HEK_KEY(hek)); 9526 } else { 9527 const int flags = HEK_FLAGS(hek); 9528 if (flags & HVhek_WASUTF8) { 9529 /* Trouble :-) 9530 Andreas would like keys he put in as utf8 to come back as utf8 9531 */ 9532 STRLEN utf8_len = HEK_LEN(hek); 9533 SV * const sv = newSV_type(SVt_PV); 9534 char *as_utf8 = (char *)bytes_to_utf8 ((U8*)HEK_KEY(hek), &utf8_len); 9535 /* bytes_to_utf8() allocates a new string, which we can repurpose: */ 9536 sv_usepvn_flags(sv, as_utf8, utf8_len, SV_HAS_TRAILING_NUL); 9537 SvUTF8_on (sv); 9538 return sv; 9539 } else if (flags & HVhek_NOTSHARED) { 9540 /* A hash that isn't using shared hash keys has to have 9541 the flag in every key so that we know not to try to call 9542 share_hek_hek on it. */ 9543 9544 SV * const sv = newSVpvn (HEK_KEY(hek), HEK_LEN(hek)); 9545 if (HEK_UTF8(hek)) 9546 SvUTF8_on (sv); 9547 return sv; 9548 } 9549 /* This will be overwhelminly the most common case. */ 9550 { 9551 /* Inline most of newSVpvn_share(), because share_hek_hek() is far 9552 more efficient than sharepvn(). */ 9553 SV *sv = newSV_type(SVt_PV); 9554 9555 SvPV_set(sv, (char *)HEK_KEY(share_hek_hek(hek))); 9556 SvCUR_set(sv, HEK_LEN(hek)); 9557 SvLEN_set(sv, 0); 9558 SvIsCOW_on(sv); 9559 SvPOK_on(sv); 9560 if (HEK_UTF8(hek)) 9561 SvUTF8_on(sv); 9562 return sv; 9563 } 9564 } 9565 } 9566 9567 /* 9568 =for apidoc newSVpvn_share 9569 9570 Creates a new SV with its C<SvPVX_const> pointing to a shared string in the string 9571 table. If the string does not already exist in the table, it is 9572 created first. Turns on the C<SvIsCOW> flag (or C<READONLY> 9573 and C<FAKE> in 5.16 and earlier). If the C<hash> parameter 9574 is non-zero, that value is used; otherwise the hash is computed. 9575 The string's hash can later be retrieved from the SV 9576 with the C<L</SvSHARED_HASH>> macro. The idea here is 9577 that as the string table is used for shared hash keys these strings will have 9578 C<SvPVX_const == HeKEY> and hash lookup will avoid string compare. 9579 9580 =cut 9581 */ 9582 9583 SV * 9584 Perl_newSVpvn_share(pTHX_ const char *src, I32 len, U32 hash) 9585 { 9586 SV *sv; 9587 bool is_utf8 = FALSE; 9588 const char *const orig_src = src; 9589 9590 if (len < 0) { 9591 STRLEN tmplen = -len; 9592 is_utf8 = TRUE; 9593 /* See the note in hv.c:hv_fetch() --jhi */ 9594 src = (char*)bytes_from_utf8((const U8*)src, &tmplen, &is_utf8); 9595 len = tmplen; 9596 } 9597 if (!hash) 9598 PERL_HASH(hash, src, len); 9599 sv = newSV_type(SVt_PV); 9600 /* The logic for this is inlined in S_mro_get_linear_isa_dfs(), so if it 9601 changes here, update it there too. */ 9602 SvPV_set(sv, sharepvn(src, is_utf8?-len:len, hash)); 9603 SvCUR_set(sv, len); 9604 SvLEN_set(sv, 0); 9605 SvIsCOW_on(sv); 9606 SvPOK_on(sv); 9607 if (is_utf8) 9608 SvUTF8_on(sv); 9609 if (src != orig_src) 9610 Safefree(src); 9611 return sv; 9612 } 9613 9614 /* 9615 =for apidoc newSVpv_share 9616 9617 Like C<newSVpvn_share>, but takes a C<NUL>-terminated string instead of a 9618 string/length pair. 9619 9620 =cut 9621 */ 9622 9623 SV * 9624 Perl_newSVpv_share(pTHX_ const char *src, U32 hash) 9625 { 9626 return newSVpvn_share(src, strlen(src), hash); 9627 } 9628 9629 #if defined(MULTIPLICITY) 9630 9631 /* pTHX_ magic can't cope with varargs, so this is a no-context 9632 * version of the main function, (which may itself be aliased to us). 9633 * Don't access this version directly. 9634 */ 9635 9636 SV * 9637 Perl_newSVpvf_nocontext(const char *const pat, ...) 9638 { 9639 dTHX; 9640 SV *sv; 9641 va_list args; 9642 9643 PERL_ARGS_ASSERT_NEWSVPVF_NOCONTEXT; 9644 9645 va_start(args, pat); 9646 sv = vnewSVpvf(pat, &args); 9647 va_end(args); 9648 return sv; 9649 } 9650 #endif 9651 9652 /* 9653 =for apidoc newSVpvf 9654 9655 Creates a new SV and initializes it with the string formatted like 9656 C<sv_catpvf>. 9657 9658 =for apidoc newSVpvf_nocontext 9659 Like C<L</newSVpvf>> but does not take a thread context (C<aTHX>) parameter, 9660 so is used in situations where the caller doesn't already have the thread 9661 context. 9662 9663 =for apidoc vnewSVpvf 9664 Like C<L</newSVpvf>> but the arguments are an encapsulated argument list. 9665 9666 =cut 9667 */ 9668 9669 SV * 9670 Perl_newSVpvf(pTHX_ const char *const pat, ...) 9671 { 9672 SV *sv; 9673 va_list args; 9674 9675 PERL_ARGS_ASSERT_NEWSVPVF; 9676 9677 va_start(args, pat); 9678 sv = vnewSVpvf(pat, &args); 9679 va_end(args); 9680 return sv; 9681 } 9682 9683 /* backend for newSVpvf() and newSVpvf_nocontext() */ 9684 9685 SV * 9686 Perl_vnewSVpvf(pTHX_ const char *const pat, va_list *const args) 9687 { 9688 SV *sv; 9689 9690 PERL_ARGS_ASSERT_VNEWSVPVF; 9691 9692 new_SV(sv); 9693 sv_vsetpvfn(sv, pat, strlen(pat), args, NULL, 0, NULL); 9694 return sv; 9695 } 9696 9697 /* 9698 =for apidoc newSVnv 9699 9700 Creates a new SV and copies a floating point value into it. 9701 The reference count for the SV is set to 1. 9702 9703 =cut 9704 */ 9705 9706 SV * 9707 Perl_newSVnv(pTHX_ const NV n) 9708 { 9709 SV *sv = newSV_type(SVt_NV); 9710 (void)SvNOK_on(sv); 9711 9712 SvNV_set(sv, n); 9713 SvTAINT(sv); 9714 9715 return sv; 9716 } 9717 9718 /* 9719 =for apidoc newSViv 9720 9721 Creates a new SV and copies an integer into it. The reference count for the 9722 SV is set to 1. 9723 9724 =cut 9725 */ 9726 9727 SV * 9728 Perl_newSViv(pTHX_ const IV i) 9729 { 9730 SV *sv = newSV_type(SVt_IV); 9731 (void)SvIOK_on(sv); 9732 9733 SvIV_set(sv, i); 9734 SvTAINT(sv); 9735 9736 return sv; 9737 } 9738 9739 /* 9740 =for apidoc newSVuv 9741 9742 Creates a new SV and copies an unsigned integer into it. 9743 The reference count for the SV is set to 1. 9744 9745 =cut 9746 */ 9747 9748 SV * 9749 Perl_newSVuv(pTHX_ const UV u) 9750 { 9751 SV *sv; 9752 9753 /* Inlining ONLY the small relevant subset of sv_setuv here 9754 * for performance. Makes a significant difference. */ 9755 9756 /* Using ivs is more efficient than using uvs - see sv_setuv */ 9757 if (u <= (UV)IV_MAX) { 9758 return newSViv((IV)u); 9759 } 9760 9761 new_SV(sv); 9762 9763 /* We're starting from SVt_FIRST, so provided that's 9764 * actual 0, we don't have to unset any SV type flags 9765 * to promote to SVt_IV. */ 9766 STATIC_ASSERT_STMT(SVt_FIRST == 0); 9767 9768 SET_SVANY_FOR_BODYLESS_IV(sv); 9769 SvFLAGS(sv) |= SVt_IV; 9770 (void)SvIOK_on(sv); 9771 (void)SvIsUV_on(sv); 9772 9773 SvUV_set(sv, u); 9774 SvTAINT(sv); 9775 9776 return sv; 9777 } 9778 9779 /* 9780 =for apidoc newRV_noinc 9781 9782 Creates an RV wrapper for an SV. The reference count for the original 9783 SV is B<not> incremented. 9784 9785 =cut 9786 */ 9787 9788 SV * 9789 Perl_newRV_noinc(pTHX_ SV *const tmpRef) 9790 { 9791 SV *sv; 9792 9793 PERL_ARGS_ASSERT_NEWRV_NOINC; 9794 9795 new_SV(sv); 9796 9797 /* We're starting from SVt_FIRST, so provided that's 9798 * actual 0, we don't have to unset any SV type flags 9799 * to promote to SVt_IV. */ 9800 STATIC_ASSERT_STMT(SVt_FIRST == 0); 9801 9802 SET_SVANY_FOR_BODYLESS_IV(sv); 9803 SvFLAGS(sv) |= SVt_IV; 9804 9805 SvTEMP_off(tmpRef); 9806 9807 sv_setrv_noinc(sv, tmpRef); 9808 9809 return sv; 9810 } 9811 9812 /* newRV_inc is the official function name to use now. 9813 * newRV_inc is in fact #defined to newRV in sv.h 9814 */ 9815 9816 SV * 9817 Perl_newRV(pTHX_ SV *const sv) 9818 { 9819 PERL_ARGS_ASSERT_NEWRV; 9820 9821 return newRV_noinc(SvREFCNT_inc_simple_NN(sv)); 9822 } 9823 9824 /* 9825 =for apidoc newSVsv 9826 =for apidoc_item newSVsv_nomg 9827 =for apidoc_item newSVsv_flags 9828 9829 These create a new SV which is an exact duplicate of the original SV 9830 (using C<sv_setsv>.) 9831 9832 They differ only in that C<newSVsv> performs 'get' magic; C<newSVsv_nomg> skips 9833 any magic; and C<newSVsv_flags> allows you to explicitly set a C<flags> 9834 parameter. 9835 9836 =cut 9837 */ 9838 9839 SV * 9840 Perl_newSVsv_flags(pTHX_ SV *const old, I32 flags) 9841 { 9842 SV *sv; 9843 9844 if (!old) 9845 return NULL; 9846 if (SvTYPE(old) == (svtype)SVTYPEMASK) { 9847 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), "semi-panic: attempt to dup freed string"); 9848 return NULL; 9849 } 9850 /* Do this here, otherwise we leak the new SV if this croaks. */ 9851 if (flags & SV_GMAGIC) 9852 SvGETMAGIC(old); 9853 new_SV(sv); 9854 sv_setsv_flags(sv, old, flags & ~SV_GMAGIC); 9855 return sv; 9856 } 9857 9858 /* 9859 =for apidoc sv_reset 9860 9861 Underlying implementation for the C<reset> Perl function. 9862 Note that the perl-level function is vaguely deprecated. 9863 9864 =cut 9865 */ 9866 9867 void 9868 Perl_sv_reset(pTHX_ const char *s, HV *const stash) 9869 { 9870 PERL_ARGS_ASSERT_SV_RESET; 9871 9872 sv_resetpvn(*s ? s : NULL, strlen(s), stash); 9873 } 9874 9875 void 9876 Perl_sv_resetpvn(pTHX_ const char *s, STRLEN len, HV * const stash) 9877 { 9878 char todo[PERL_UCHAR_MAX+1]; 9879 const char *send; 9880 9881 if (!stash || SvTYPE(stash) != SVt_PVHV) 9882 return; 9883 9884 if (!s) { /* reset ?? searches */ 9885 MAGIC * const mg = mg_find((const SV *)stash, PERL_MAGIC_symtab); 9886 if (mg) { 9887 const U32 count = mg->mg_len / sizeof(PMOP**); 9888 PMOP **pmp = (PMOP**) mg->mg_ptr; 9889 PMOP *const *const end = pmp + count; 9890 9891 while (pmp < end) { 9892 #ifdef USE_ITHREADS 9893 SvREADONLY_off(PL_regex_pad[(*pmp)->op_pmoffset]); 9894 #else 9895 (*pmp)->op_pmflags &= ~PMf_USED; 9896 #endif 9897 ++pmp; 9898 } 9899 } 9900 return; 9901 } 9902 9903 /* reset variables */ 9904 9905 if (!HvTOTALKEYS(stash)) 9906 return; 9907 9908 Zero(todo, 256, char); 9909 send = s + len; 9910 while (s < send) { 9911 I32 max; 9912 I32 i = (unsigned char)*s; 9913 if (s[1] == '-') { 9914 s += 2; 9915 } 9916 max = (unsigned char)*s++; 9917 for ( ; i <= max; i++) { 9918 todo[i] = 1; 9919 } 9920 for (i = 0; i <= (I32) HvMAX(stash); i++) { 9921 HE *entry; 9922 for (entry = HvARRAY(stash)[i]; 9923 entry; 9924 entry = HeNEXT(entry)) 9925 { 9926 GV *gv; 9927 SV *sv; 9928 9929 if (!todo[(U8)*HeKEY(entry)]) 9930 continue; 9931 gv = MUTABLE_GV(HeVAL(entry)); 9932 if (!isGV(gv)) 9933 continue; 9934 sv = GvSV(gv); 9935 if (sv && !SvREADONLY(sv)) { 9936 SV_CHECK_THINKFIRST_COW_DROP(sv); 9937 if (!isGV(sv)) SvOK_off(sv); 9938 } 9939 if (GvAV(gv)) { 9940 av_clear(GvAV(gv)); 9941 } 9942 if (GvHV(gv) && !HvNAME_get(GvHV(gv))) { 9943 hv_clear(GvHV(gv)); 9944 } 9945 } 9946 } 9947 } 9948 } 9949 9950 /* 9951 =for apidoc sv_2io 9952 9953 Using various gambits, try to get an IO from an SV: the IO slot if its a 9954 GV; or the recursive result if we're an RV; or the IO slot of the symbol 9955 named after the PV if we're a string. 9956 9957 'Get' magic is ignored on the C<sv> passed in, but will be called on 9958 C<SvRV(sv)> if C<sv> is an RV. 9959 9960 =cut 9961 */ 9962 9963 IO* 9964 Perl_sv_2io(pTHX_ SV *const sv) 9965 { 9966 IO* io; 9967 GV* gv; 9968 9969 PERL_ARGS_ASSERT_SV_2IO; 9970 9971 switch (SvTYPE(sv)) { 9972 case SVt_PVIO: 9973 io = MUTABLE_IO(sv); 9974 break; 9975 case SVt_PVGV: 9976 case SVt_PVLV: 9977 if (isGV_with_GP(sv)) { 9978 gv = MUTABLE_GV(sv); 9979 io = GvIO(gv); 9980 if (!io) 9981 Perl_croak(aTHX_ "Bad filehandle: %" HEKf, 9982 HEKfARG(GvNAME_HEK(gv))); 9983 break; 9984 } 9985 /* FALLTHROUGH */ 9986 default: 9987 if (!SvOK(sv)) 9988 Perl_croak(aTHX_ PL_no_usym, "filehandle"); 9989 if (SvROK(sv)) { 9990 SvGETMAGIC(SvRV(sv)); 9991 return sv_2io(SvRV(sv)); 9992 } 9993 gv = gv_fetchsv_nomg(sv, 0, SVt_PVIO); 9994 if (gv) 9995 io = GvIO(gv); 9996 else 9997 io = 0; 9998 if (!io) { 9999 SV *newsv = sv; 10000 if (SvGMAGICAL(sv)) { 10001 newsv = sv_newmortal(); 10002 sv_setsv_nomg(newsv, sv); 10003 } 10004 Perl_croak(aTHX_ "Bad filehandle: %" SVf, SVfARG(newsv)); 10005 } 10006 break; 10007 } 10008 return io; 10009 } 10010 10011 /* 10012 =for apidoc sv_2cv 10013 10014 Using various gambits, try to get a CV from an SV; in addition, try if 10015 possible to set C<*st> and C<*gvp> to the stash and GV associated with it. 10016 The flags in C<lref> are passed to C<gv_fetchsv>. 10017 10018 =cut 10019 */ 10020 10021 CV * 10022 Perl_sv_2cv(pTHX_ SV *sv, HV **const st, GV **const gvp, const I32 lref) 10023 { 10024 GV *gv = NULL; 10025 CV *cv = NULL; 10026 10027 PERL_ARGS_ASSERT_SV_2CV; 10028 10029 if (!sv) { 10030 *st = NULL; 10031 *gvp = NULL; 10032 return NULL; 10033 } 10034 switch (SvTYPE(sv)) { 10035 case SVt_PVCV: 10036 *st = CvSTASH(sv); 10037 *gvp = NULL; 10038 return MUTABLE_CV(sv); 10039 case SVt_PVHV: 10040 case SVt_PVAV: 10041 *st = NULL; 10042 *gvp = NULL; 10043 return NULL; 10044 default: 10045 SvGETMAGIC(sv); 10046 if (SvROK(sv)) { 10047 if (SvAMAGIC(sv)) 10048 sv = amagic_deref_call(sv, to_cv_amg); 10049 10050 sv = SvRV(sv); 10051 if (SvTYPE(sv) == SVt_PVCV) { 10052 cv = MUTABLE_CV(sv); 10053 *gvp = NULL; 10054 *st = CvSTASH(cv); 10055 return cv; 10056 } 10057 else if(SvGETMAGIC(sv), isGV_with_GP(sv)) 10058 gv = MUTABLE_GV(sv); 10059 else 10060 Perl_croak(aTHX_ "Not a subroutine reference"); 10061 } 10062 else if (isGV_with_GP(sv)) { 10063 gv = MUTABLE_GV(sv); 10064 } 10065 else { 10066 gv = gv_fetchsv_nomg(sv, lref, SVt_PVCV); 10067 } 10068 *gvp = gv; 10069 if (!gv) { 10070 *st = NULL; 10071 return NULL; 10072 } 10073 /* Some flags to gv_fetchsv mean don't really create the GV */ 10074 if (!isGV_with_GP(gv)) { 10075 *st = NULL; 10076 return NULL; 10077 } 10078 *st = GvESTASH(gv); 10079 if (lref & ~GV_ADDMG && !GvCVu(gv)) { 10080 /* XXX this is probably not what they think they're getting. 10081 * It has the same effect as "sub name;", i.e. just a forward 10082 * declaration! */ 10083 newSTUB(gv,0); 10084 } 10085 return GvCVu(gv); 10086 } 10087 } 10088 10089 /* 10090 =for apidoc sv_true 10091 10092 Returns true if the SV has a true value by Perl's rules. 10093 Use the C<SvTRUE> macro instead, which may call C<sv_true()> or may 10094 instead use an in-line version. 10095 10096 =cut 10097 */ 10098 10099 I32 10100 Perl_sv_true(pTHX_ SV *const sv) 10101 { 10102 if (!sv) 10103 return 0; 10104 if (SvPOK(sv)) { 10105 const XPV* const tXpv = (XPV*)SvANY(sv); 10106 if (tXpv && 10107 (tXpv->xpv_cur > 1 || 10108 (tXpv->xpv_cur && *sv->sv_u.svu_pv != '0'))) 10109 return 1; 10110 else 10111 return 0; 10112 } 10113 else { 10114 if (SvIOK(sv)) 10115 return SvIVX(sv) != 0; 10116 else { 10117 if (SvNOK(sv)) 10118 return SvNVX(sv) != 0.0; 10119 else 10120 return sv_2bool(sv); 10121 } 10122 } 10123 } 10124 10125 /* 10126 =for apidoc sv_pvn_force 10127 10128 Get a sensible string out of the SV somehow. 10129 A private implementation of the C<SvPV_force> macro for compilers which 10130 can't cope with complex macro expressions. Always use the macro instead. 10131 10132 =for apidoc sv_pvn_force_flags 10133 10134 Get a sensible string out of the SV somehow. 10135 If C<flags> has the C<SV_GMAGIC> bit set, will C<L</mg_get>> on C<sv> if 10136 appropriate, else not. C<sv_pvn_force> and C<sv_pvn_force_nomg> are 10137 implemented in terms of this function. 10138 You normally want to use the various wrapper macros instead: see 10139 C<L</SvPV_force>> and C<L</SvPV_force_nomg>>. 10140 10141 =cut 10142 */ 10143 10144 char * 10145 Perl_sv_pvn_force_flags(pTHX_ SV *const sv, STRLEN *const lp, const U32 flags) 10146 { 10147 PERL_ARGS_ASSERT_SV_PVN_FORCE_FLAGS; 10148 10149 if (flags & SV_GMAGIC) SvGETMAGIC(sv); 10150 if (SvTHINKFIRST(sv) && (!SvROK(sv) || SvREADONLY(sv))) 10151 sv_force_normal_flags(sv, 0); 10152 10153 if (SvPOK(sv)) { 10154 if (lp) 10155 *lp = SvCUR(sv); 10156 } 10157 else { 10158 char *s; 10159 STRLEN len; 10160 10161 if (SvTYPE(sv) > SVt_PVLV 10162 || isGV_with_GP(sv)) 10163 /* diag_listed_as: Can't coerce %s to %s in %s */ 10164 Perl_croak(aTHX_ "Can't coerce %s to string in %s", sv_reftype(sv,0), 10165 OP_DESC(PL_op)); 10166 s = sv_2pv_flags(sv, &len, flags &~ SV_GMAGIC); 10167 if (!s) { 10168 s = (char *)""; 10169 } 10170 if (lp) 10171 *lp = len; 10172 10173 if (SvTYPE(sv) < SVt_PV || 10174 s != SvPVX_const(sv)) { /* Almost, but not quite, sv_setpvn() */ 10175 if (SvROK(sv)) 10176 sv_unref(sv); 10177 SvUPGRADE(sv, SVt_PV); /* Never FALSE */ 10178 SvGROW(sv, len + 1); 10179 Move(s,SvPVX(sv),len,char); 10180 SvCUR_set(sv, len); 10181 SvPVX(sv)[len] = '\0'; 10182 } 10183 if (!SvPOK(sv)) { 10184 SvPOK_on(sv); /* validate pointer */ 10185 SvTAINT(sv); 10186 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2pv(%s)\n", 10187 PTR2UV(sv),SvPVX_const(sv))); 10188 } 10189 } 10190 (void)SvPOK_only_UTF8(sv); 10191 return SvPVX_mutable(sv); 10192 } 10193 10194 /* 10195 =for apidoc sv_pvbyten_force 10196 10197 The backend for the C<SvPVbytex_force> macro. Always use the macro 10198 instead. If the SV cannot be downgraded from UTF-8, this croaks. 10199 10200 =cut 10201 */ 10202 10203 char * 10204 Perl_sv_pvbyten_force(pTHX_ SV *const sv, STRLEN *const lp) 10205 { 10206 PERL_ARGS_ASSERT_SV_PVBYTEN_FORCE; 10207 10208 sv_pvn_force(sv,lp); 10209 sv_utf8_downgrade(sv,0); 10210 *lp = SvCUR(sv); 10211 return SvPVX(sv); 10212 } 10213 10214 /* 10215 =for apidoc sv_pvutf8n_force 10216 10217 The backend for the C<SvPVutf8x_force> macro. Always use the macro 10218 instead. 10219 10220 =cut 10221 */ 10222 10223 char * 10224 Perl_sv_pvutf8n_force(pTHX_ SV *const sv, STRLEN *const lp) 10225 { 10226 PERL_ARGS_ASSERT_SV_PVUTF8N_FORCE; 10227 10228 sv_pvn_force(sv,0); 10229 sv_utf8_upgrade_nomg(sv); 10230 *lp = SvCUR(sv); 10231 return SvPVX(sv); 10232 } 10233 10234 /* 10235 =for apidoc sv_reftype 10236 10237 Returns a string describing what the SV is a reference to. 10238 10239 If ob is true and the SV is blessed, the string is the class name, 10240 otherwise it is the type of the SV, "SCALAR", "ARRAY" etc. 10241 10242 =cut 10243 */ 10244 10245 const char * 10246 Perl_sv_reftype(pTHX_ const SV *const sv, const int ob) 10247 { 10248 PERL_ARGS_ASSERT_SV_REFTYPE; 10249 if (ob && SvOBJECT(sv)) { 10250 return SvPV_nolen_const(sv_ref(NULL, sv, ob)); 10251 } 10252 else { 10253 /* WARNING - There is code, for instance in mg.c, that assumes that 10254 * the only reason that sv_reftype(sv,0) would return a string starting 10255 * with 'L' or 'S' is that it is a LVALUE or a SCALAR. 10256 * Yes this a dodgy way to do type checking, but it saves practically reimplementing 10257 * this routine inside other subs, and it saves time. 10258 * Do not change this assumption without searching for "dodgy type check" in 10259 * the code. 10260 * - Yves */ 10261 switch (SvTYPE(sv)) { 10262 case SVt_NULL: 10263 case SVt_IV: 10264 case SVt_NV: 10265 case SVt_PV: 10266 case SVt_PVIV: 10267 case SVt_PVNV: 10268 case SVt_PVMG: 10269 if (SvVOK(sv)) 10270 return "VSTRING"; 10271 if (SvROK(sv)) 10272 return "REF"; 10273 else 10274 return "SCALAR"; 10275 10276 case SVt_PVLV: return (char *) (SvROK(sv) ? "REF" 10277 /* tied lvalues should appear to be 10278 * scalars for backwards compatibility */ 10279 : (isALPHA_FOLD_EQ(LvTYPE(sv), 't')) 10280 ? "SCALAR" : "LVALUE"); 10281 case SVt_PVAV: return "ARRAY"; 10282 case SVt_PVHV: return "HASH"; 10283 case SVt_PVCV: return "CODE"; 10284 case SVt_PVGV: return (char *) (isGV_with_GP(sv) 10285 ? "GLOB" : "SCALAR"); 10286 case SVt_PVFM: return "FORMAT"; 10287 case SVt_PVIO: return "IO"; 10288 case SVt_INVLIST: return "INVLIST"; 10289 case SVt_REGEXP: return "REGEXP"; 10290 default: return "UNKNOWN"; 10291 } 10292 } 10293 } 10294 10295 /* 10296 =for apidoc sv_ref 10297 10298 Returns a SV describing what the SV passed in is a reference to. 10299 10300 dst can be a SV to be set to the description or NULL, in which case a 10301 mortal SV is returned. 10302 10303 If ob is true and the SV is blessed, the description is the class 10304 name, otherwise it is the type of the SV, "SCALAR", "ARRAY" etc. 10305 10306 =cut 10307 */ 10308 10309 SV * 10310 Perl_sv_ref(pTHX_ SV *dst, const SV *const sv, const int ob) 10311 { 10312 PERL_ARGS_ASSERT_SV_REF; 10313 10314 if (!dst) 10315 dst = sv_newmortal(); 10316 10317 if (ob && SvOBJECT(sv)) { 10318 HvNAME_get(SvSTASH(sv)) 10319 ? sv_sethek(dst, HvNAME_HEK(SvSTASH(sv))) 10320 : sv_setpvs(dst, "__ANON__"); 10321 } 10322 else { 10323 const char * reftype = sv_reftype(sv, 0); 10324 sv_setpv(dst, reftype); 10325 } 10326 return dst; 10327 } 10328 10329 /* 10330 =for apidoc sv_isobject 10331 10332 Returns a boolean indicating whether the SV is an RV pointing to a blessed 10333 object. If the SV is not an RV, or if the object is not blessed, then this 10334 will return false. 10335 10336 =cut 10337 */ 10338 10339 int 10340 Perl_sv_isobject(pTHX_ SV *sv) 10341 { 10342 if (!sv) 10343 return 0; 10344 SvGETMAGIC(sv); 10345 if (!SvROK(sv)) 10346 return 0; 10347 sv = SvRV(sv); 10348 if (!SvOBJECT(sv)) 10349 return 0; 10350 return 1; 10351 } 10352 10353 /* 10354 =for apidoc sv_isa 10355 10356 Returns a boolean indicating whether the SV is blessed into the specified 10357 class. 10358 10359 This does not check for subtypes or method overloading. Use C<sv_isa_sv> to 10360 verify an inheritance relationship in the same way as the C<isa> operator by 10361 respecting any C<isa()> method overloading; or C<sv_derived_from_sv> to test 10362 directly on the actual object type. 10363 10364 =cut 10365 */ 10366 10367 int 10368 Perl_sv_isa(pTHX_ SV *sv, const char *const name) 10369 { 10370 const char *hvname; 10371 10372 PERL_ARGS_ASSERT_SV_ISA; 10373 10374 if (!sv) 10375 return 0; 10376 SvGETMAGIC(sv); 10377 if (!SvROK(sv)) 10378 return 0; 10379 sv = SvRV(sv); 10380 if (!SvOBJECT(sv)) 10381 return 0; 10382 hvname = HvNAME_get(SvSTASH(sv)); 10383 if (!hvname) 10384 return 0; 10385 10386 return strEQ(hvname, name); 10387 } 10388 10389 /* 10390 =for apidoc newSVrv 10391 10392 Creates a new SV for the existing RV, C<rv>, to point to. If C<rv> is not an 10393 RV then it will be upgraded to one. If C<classname> is non-null then the new 10394 SV will be blessed in the specified package. The new SV is returned and its 10395 reference count is 1. The reference count 1 is owned by C<rv>. See also 10396 newRV_inc() and newRV_noinc() for creating a new RV properly. 10397 10398 =cut 10399 */ 10400 10401 SV* 10402 Perl_newSVrv(pTHX_ SV *const rv, const char *const classname) 10403 { 10404 SV *sv; 10405 10406 PERL_ARGS_ASSERT_NEWSVRV; 10407 10408 new_SV(sv); 10409 10410 SV_CHECK_THINKFIRST_COW_DROP(rv); 10411 10412 if (UNLIKELY( SvTYPE(rv) >= SVt_PVMG )) { 10413 const U32 refcnt = SvREFCNT(rv); 10414 SvREFCNT(rv) = 0; 10415 sv_clear(rv); 10416 SvFLAGS(rv) = 0; 10417 SvREFCNT(rv) = refcnt; 10418 10419 sv_upgrade(rv, SVt_IV); 10420 } else if (SvROK(rv)) { 10421 SvREFCNT_dec(SvRV(rv)); 10422 } else { 10423 prepare_SV_for_RV(rv); 10424 } 10425 10426 SvOK_off(rv); 10427 SvRV_set(rv, sv); 10428 SvROK_on(rv); 10429 10430 if (classname) { 10431 HV* const stash = gv_stashpv(classname, GV_ADD); 10432 (void)sv_bless(rv, stash); 10433 } 10434 return sv; 10435 } 10436 10437 SV * 10438 Perl_newSVavdefelem(pTHX_ AV *av, SSize_t ix, bool extendible) 10439 { 10440 SV * const lv = newSV_type(SVt_PVLV); 10441 PERL_ARGS_ASSERT_NEWSVAVDEFELEM; 10442 LvTYPE(lv) = 'y'; 10443 sv_magic(lv, NULL, PERL_MAGIC_defelem, NULL, 0); 10444 LvTARG(lv) = SvREFCNT_inc_simple_NN(av); 10445 LvSTARGOFF(lv) = ix; 10446 LvTARGLEN(lv) = extendible ? 1 : (STRLEN)UV_MAX; 10447 return lv; 10448 } 10449 10450 /* 10451 =for apidoc sv_setref_pv 10452 10453 Copies a pointer into a new SV, optionally blessing the SV. The C<rv> 10454 argument will be upgraded to an RV. That RV will be modified to point to 10455 the new SV. If the C<pv> argument is C<NULL>, then C<PL_sv_undef> will be placed 10456 into the SV. The C<classname> argument indicates the package for the 10457 blessing. Set C<classname> to C<NULL> to avoid the blessing. The new SV 10458 will have a reference count of 1, and the RV will be returned. 10459 10460 Do not use with other Perl types such as HV, AV, SV, CV, because those 10461 objects will become corrupted by the pointer copy process. 10462 10463 Note that C<sv_setref_pvn> copies the string while this copies the pointer. 10464 10465 =cut 10466 */ 10467 10468 SV* 10469 Perl_sv_setref_pv(pTHX_ SV *const rv, const char *const classname, void *const pv) 10470 { 10471 PERL_ARGS_ASSERT_SV_SETREF_PV; 10472 10473 if (!pv) { 10474 sv_set_undef(rv); 10475 SvSETMAGIC(rv); 10476 } 10477 else 10478 sv_setiv(newSVrv(rv,classname), PTR2IV(pv)); 10479 return rv; 10480 } 10481 10482 /* 10483 =for apidoc sv_setref_iv 10484 10485 Copies an integer into a new SV, optionally blessing the SV. The C<rv> 10486 argument will be upgraded to an RV. That RV will be modified to point to 10487 the new SV. The C<classname> argument indicates the package for the 10488 blessing. Set C<classname> to C<NULL> to avoid the blessing. The new SV 10489 will have a reference count of 1, and the RV will be returned. 10490 10491 =cut 10492 */ 10493 10494 SV* 10495 Perl_sv_setref_iv(pTHX_ SV *const rv, const char *const classname, const IV iv) 10496 { 10497 PERL_ARGS_ASSERT_SV_SETREF_IV; 10498 10499 sv_setiv(newSVrv(rv,classname), iv); 10500 return rv; 10501 } 10502 10503 /* 10504 =for apidoc sv_setref_uv 10505 10506 Copies an unsigned integer into a new SV, optionally blessing the SV. The C<rv> 10507 argument will be upgraded to an RV. That RV will be modified to point to 10508 the new SV. The C<classname> argument indicates the package for the 10509 blessing. Set C<classname> to C<NULL> to avoid the blessing. The new SV 10510 will have a reference count of 1, and the RV will be returned. 10511 10512 =cut 10513 */ 10514 10515 SV* 10516 Perl_sv_setref_uv(pTHX_ SV *const rv, const char *const classname, const UV uv) 10517 { 10518 PERL_ARGS_ASSERT_SV_SETREF_UV; 10519 10520 sv_setuv(newSVrv(rv,classname), uv); 10521 return rv; 10522 } 10523 10524 /* 10525 =for apidoc sv_setref_nv 10526 10527 Copies a double into a new SV, optionally blessing the SV. The C<rv> 10528 argument will be upgraded to an RV. That RV will be modified to point to 10529 the new SV. The C<classname> argument indicates the package for the 10530 blessing. Set C<classname> to C<NULL> to avoid the blessing. The new SV 10531 will have a reference count of 1, and the RV will be returned. 10532 10533 =cut 10534 */ 10535 10536 SV* 10537 Perl_sv_setref_nv(pTHX_ SV *const rv, const char *const classname, const NV nv) 10538 { 10539 PERL_ARGS_ASSERT_SV_SETREF_NV; 10540 10541 sv_setnv(newSVrv(rv,classname), nv); 10542 return rv; 10543 } 10544 10545 /* 10546 =for apidoc sv_setref_pvn 10547 10548 Copies a string into a new SV, optionally blessing the SV. The length of the 10549 string must be specified with C<n>. The C<rv> argument will be upgraded to 10550 an RV. That RV will be modified to point to the new SV. The C<classname> 10551 argument indicates the package for the blessing. Set C<classname> to 10552 C<NULL> to avoid the blessing. The new SV will have a reference count 10553 of 1, and the RV will be returned. 10554 10555 Note that C<sv_setref_pv> copies the pointer while this copies the string. 10556 10557 =cut 10558 */ 10559 10560 SV* 10561 Perl_sv_setref_pvn(pTHX_ SV *const rv, const char *const classname, 10562 const char *const pv, const STRLEN n) 10563 { 10564 PERL_ARGS_ASSERT_SV_SETREF_PVN; 10565 10566 sv_setpvn(newSVrv(rv,classname), pv, n); 10567 return rv; 10568 } 10569 10570 /* 10571 =for apidoc sv_bless 10572 10573 Blesses an SV into a specified package. The SV must be an RV. The package 10574 must be designated by its stash (see C<L</gv_stashpv>>). The reference count 10575 of the SV is unaffected. 10576 10577 =cut 10578 */ 10579 10580 SV* 10581 Perl_sv_bless(pTHX_ SV *const sv, HV *const stash) 10582 { 10583 SV *tmpRef; 10584 HV *oldstash = NULL; 10585 10586 PERL_ARGS_ASSERT_SV_BLESS; 10587 10588 SvGETMAGIC(sv); 10589 if (!SvROK(sv)) 10590 Perl_croak(aTHX_ "Can't bless non-reference value"); 10591 tmpRef = SvRV(sv); 10592 if (SvFLAGS(tmpRef) & (SVs_OBJECT|SVf_READONLY|SVf_PROTECT)) { 10593 if (SvREADONLY(tmpRef)) 10594 Perl_croak_no_modify(); 10595 if (SvOBJECT(tmpRef)) { 10596 oldstash = SvSTASH(tmpRef); 10597 } 10598 } 10599 SvOBJECT_on(tmpRef); 10600 SvUPGRADE(tmpRef, SVt_PVMG); 10601 SvSTASH_set(tmpRef, MUTABLE_HV(SvREFCNT_inc_simple(stash))); 10602 SvREFCNT_dec(oldstash); 10603 10604 if(SvSMAGICAL(tmpRef)) 10605 if(mg_find(tmpRef, PERL_MAGIC_ext) || mg_find(tmpRef, PERL_MAGIC_uvar)) 10606 mg_set(tmpRef); 10607 10608 10609 10610 return sv; 10611 } 10612 10613 /* Downgrades a PVGV to a PVMG. If it's actually a PVLV, we leave the type 10614 * as it is after unglobbing it. 10615 */ 10616 10617 PERL_STATIC_INLINE void 10618 S_sv_unglob(pTHX_ SV *const sv, U32 flags) 10619 { 10620 void *xpvmg; 10621 HV *stash; 10622 SV * const temp = flags & SV_COW_DROP_PV ? NULL : sv_newmortal(); 10623 10624 PERL_ARGS_ASSERT_SV_UNGLOB; 10625 10626 assert(SvTYPE(sv) == SVt_PVGV || SvTYPE(sv) == SVt_PVLV); 10627 SvFAKE_off(sv); 10628 if (!(flags & SV_COW_DROP_PV)) 10629 gv_efullname3(temp, MUTABLE_GV(sv), "*"); 10630 10631 SvREFCNT_inc_simple_void_NN(sv_2mortal(sv)); 10632 if (GvGP(sv)) { 10633 if(GvCVu((const GV *)sv) && (stash = GvSTASH(MUTABLE_GV(sv))) 10634 && HvNAME_get(stash)) 10635 mro_method_changed_in(stash); 10636 gp_free(MUTABLE_GV(sv)); 10637 } 10638 if (GvSTASH(sv)) { 10639 sv_del_backref(MUTABLE_SV(GvSTASH(sv)), sv); 10640 GvSTASH(sv) = NULL; 10641 } 10642 GvMULTI_off(sv); 10643 if (GvNAME_HEK(sv)) { 10644 unshare_hek(GvNAME_HEK(sv)); 10645 } 10646 isGV_with_GP_off(sv); 10647 10648 if(SvTYPE(sv) == SVt_PVGV) { 10649 /* need to keep SvANY(sv) in the right arena */ 10650 xpvmg = new_XPVMG(); 10651 StructCopy(SvANY(sv), xpvmg, XPVMG); 10652 del_body_by_type(SvANY(sv), SVt_PVGV); 10653 SvANY(sv) = xpvmg; 10654 10655 SvFLAGS(sv) &= ~SVTYPEMASK; 10656 SvFLAGS(sv) |= SVt_PVMG; 10657 } 10658 10659 /* Intentionally not calling any local SET magic, as this isn't so much a 10660 set operation as merely an internal storage change. */ 10661 if (flags & SV_COW_DROP_PV) SvOK_off(sv); 10662 else sv_setsv_flags(sv, temp, 0); 10663 10664 if ((const GV *)sv == PL_last_in_gv) 10665 PL_last_in_gv = NULL; 10666 else if ((const GV *)sv == PL_statgv) 10667 PL_statgv = NULL; 10668 } 10669 10670 /* 10671 =for apidoc sv_unref_flags 10672 10673 Unsets the RV status of the SV, and decrements the reference count of 10674 whatever was being referenced by the RV. This can almost be thought of 10675 as a reversal of C<newSVrv>. The C<cflags> argument can contain 10676 C<SV_IMMEDIATE_UNREF> to force the reference count to be decremented 10677 (otherwise the decrementing is conditional on the reference count being 10678 different from one or the reference being a readonly SV). 10679 See C<L</SvROK_off>>. 10680 10681 =for apidoc Amnh||SV_IMMEDIATE_UNREF 10682 10683 =cut 10684 */ 10685 10686 void 10687 Perl_sv_unref_flags(pTHX_ SV *const ref, const U32 flags) 10688 { 10689 SV* const target = SvRV(ref); 10690 10691 PERL_ARGS_ASSERT_SV_UNREF_FLAGS; 10692 10693 if (SvWEAKREF(ref)) { 10694 sv_del_backref(target, ref); 10695 SvWEAKREF_off(ref); 10696 SvRV_set(ref, NULL); 10697 return; 10698 } 10699 SvRV_set(ref, NULL); 10700 SvROK_off(ref); 10701 /* You can't have a || SvREADONLY(target) here, as $a = $$a, where $a was 10702 assigned to as BEGIN {$a = \"Foo"} will fail. */ 10703 if (SvREFCNT(target) != 1 || (flags & SV_IMMEDIATE_UNREF)) 10704 SvREFCNT_dec_NN(target); 10705 else /* XXX Hack, but hard to make $a=$a->[1] work otherwise */ 10706 sv_2mortal(target); /* Schedule for freeing later */ 10707 } 10708 10709 /* 10710 =for apidoc sv_untaint 10711 10712 Untaint an SV. Use C<SvTAINTED_off> instead. 10713 10714 =cut 10715 */ 10716 10717 void 10718 Perl_sv_untaint(pTHX_ SV *const sv) 10719 { 10720 PERL_ARGS_ASSERT_SV_UNTAINT; 10721 PERL_UNUSED_CONTEXT; 10722 10723 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { 10724 MAGIC * const mg = mg_find(sv, PERL_MAGIC_taint); 10725 if (mg) 10726 mg->mg_len &= ~1; 10727 } 10728 } 10729 10730 /* 10731 =for apidoc sv_tainted 10732 10733 Test an SV for taintedness. Use C<SvTAINTED> instead. 10734 10735 =cut 10736 */ 10737 10738 bool 10739 Perl_sv_tainted(pTHX_ SV *const sv) 10740 { 10741 PERL_ARGS_ASSERT_SV_TAINTED; 10742 PERL_UNUSED_CONTEXT; 10743 10744 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { 10745 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_taint); 10746 if (mg && (mg->mg_len & 1) ) 10747 return TRUE; 10748 } 10749 return FALSE; 10750 } 10751 10752 #ifndef NO_MATHOMS /* Can't move these to mathoms.c because call uiv_2buf(), 10753 private to this file */ 10754 10755 /* 10756 =for apidoc sv_setpviv 10757 =for apidoc_item sv_setpviv_mg 10758 10759 These copy an integer into the given SV, also updating its string value. 10760 10761 They differ only in that C<sv_setpviv_mg> performs 'set' magic; C<sv_setpviv> 10762 skips any magic. 10763 10764 =cut 10765 */ 10766 10767 void 10768 Perl_sv_setpviv(pTHX_ SV *const sv, const IV iv) 10769 { 10770 /* The purpose of this union is to ensure that arr is aligned on 10771 a 2 byte boundary, because that is what uiv_2buf() requires */ 10772 union { 10773 char arr[TYPE_CHARS(UV)]; 10774 U16 dummy; 10775 } buf; 10776 char *ebuf; 10777 char * const ptr = uiv_2buf(buf.arr, iv, 0, 0, &ebuf); 10778 10779 PERL_ARGS_ASSERT_SV_SETPVIV; 10780 10781 sv_setpvn(sv, ptr, ebuf - ptr); 10782 } 10783 10784 void 10785 Perl_sv_setpviv_mg(pTHX_ SV *const sv, const IV iv) 10786 { 10787 PERL_ARGS_ASSERT_SV_SETPVIV_MG; 10788 10789 GCC_DIAG_IGNORE_STMT(-Wdeprecated-declarations); 10790 10791 sv_setpviv(sv, iv); 10792 10793 GCC_DIAG_RESTORE_STMT; 10794 10795 SvSETMAGIC(sv); 10796 } 10797 10798 #endif /* NO_MATHOMS */ 10799 10800 #if defined(MULTIPLICITY) 10801 10802 /* pTHX_ magic can't cope with varargs, so this is a no-context 10803 * version of the main function, (which may itself be aliased to us). 10804 * Don't access this version directly. 10805 */ 10806 10807 void 10808 Perl_sv_setpvf_nocontext(SV *const sv, const char *const pat, ...) 10809 { 10810 dTHX; 10811 va_list args; 10812 10813 PERL_ARGS_ASSERT_SV_SETPVF_NOCONTEXT; 10814 10815 va_start(args, pat); 10816 sv_vsetpvf(sv, pat, &args); 10817 va_end(args); 10818 } 10819 10820 /* pTHX_ magic can't cope with varargs, so this is a no-context 10821 * version of the main function, (which may itself be aliased to us). 10822 * Don't access this version directly. 10823 */ 10824 10825 void 10826 Perl_sv_setpvf_mg_nocontext(SV *const sv, const char *const pat, ...) 10827 { 10828 dTHX; 10829 va_list args; 10830 10831 PERL_ARGS_ASSERT_SV_SETPVF_MG_NOCONTEXT; 10832 10833 va_start(args, pat); 10834 sv_vsetpvf_mg(sv, pat, &args); 10835 va_end(args); 10836 } 10837 #endif 10838 10839 /* 10840 =for apidoc sv_setpvf 10841 =for apidoc_item sv_setpvf_nocontext 10842 =for apidoc_item sv_setpvf_mg 10843 =for apidoc_item sv_setpvf_mg_nocontext 10844 10845 These work like C<L</sv_catpvf>> but copy the text into the SV instead of 10846 appending it. 10847 10848 The differences between these are: 10849 10850 C<sv_setpvf_mg> and C<sv_setpvf_mg_nocontext> perform 'set' magic; C<sv_setpvf> 10851 and C<sv_setpvf_nocontext> skip all magic. 10852 10853 C<sv_setpvf_nocontext> and C<sv_setpvf_mg_nocontext> do not take a thread 10854 context (C<aTHX>) parameter, so are used in situations where the caller 10855 doesn't already have the thread context. 10856 10857 =cut 10858 */ 10859 10860 void 10861 Perl_sv_setpvf(pTHX_ SV *const sv, const char *const pat, ...) 10862 { 10863 va_list args; 10864 10865 PERL_ARGS_ASSERT_SV_SETPVF; 10866 10867 va_start(args, pat); 10868 sv_vsetpvf(sv, pat, &args); 10869 va_end(args); 10870 } 10871 10872 /* 10873 =for apidoc sv_vsetpvf 10874 =for apidoc_item sv_vsetpvf_mg 10875 10876 These work like C<L</sv_vcatpvf>> but copy the text into the SV instead of 10877 appending it. 10878 10879 They differ only in that C<sv_vsetpvf_mg> performs 'set' magic; 10880 C<sv_vsetpvf> skips all magic. 10881 10882 They are usually used via their frontends, C<L</sv_setpvf>> and 10883 C<L</sv_setpvf_mg>>. 10884 10885 =cut 10886 */ 10887 10888 void 10889 Perl_sv_vsetpvf(pTHX_ SV *const sv, const char *const pat, va_list *const args) 10890 { 10891 PERL_ARGS_ASSERT_SV_VSETPVF; 10892 10893 sv_vsetpvfn(sv, pat, strlen(pat), args, NULL, 0, NULL); 10894 } 10895 10896 void 10897 Perl_sv_setpvf_mg(pTHX_ SV *const sv, const char *const pat, ...) 10898 { 10899 va_list args; 10900 10901 PERL_ARGS_ASSERT_SV_SETPVF_MG; 10902 10903 va_start(args, pat); 10904 sv_vsetpvf_mg(sv, pat, &args); 10905 va_end(args); 10906 } 10907 10908 void 10909 Perl_sv_vsetpvf_mg(pTHX_ SV *const sv, const char *const pat, va_list *const args) 10910 { 10911 PERL_ARGS_ASSERT_SV_VSETPVF_MG; 10912 10913 sv_vsetpvfn(sv, pat, strlen(pat), args, NULL, 0, NULL); 10914 SvSETMAGIC(sv); 10915 } 10916 10917 #if defined(MULTIPLICITY) 10918 10919 /* pTHX_ magic can't cope with varargs, so this is a no-context 10920 * version of the main function, (which may itself be aliased to us). 10921 * Don't access this version directly. 10922 */ 10923 10924 void 10925 Perl_sv_catpvf_nocontext(SV *const sv, const char *const pat, ...) 10926 { 10927 dTHX; 10928 va_list args; 10929 10930 PERL_ARGS_ASSERT_SV_CATPVF_NOCONTEXT; 10931 10932 va_start(args, pat); 10933 sv_vcatpvfn_flags(sv, pat, strlen(pat), &args, NULL, 0, NULL, SV_GMAGIC|SV_SMAGIC); 10934 va_end(args); 10935 } 10936 10937 /* pTHX_ magic can't cope with varargs, so this is a no-context 10938 * version of the main function, (which may itself be aliased to us). 10939 * Don't access this version directly. 10940 */ 10941 10942 void 10943 Perl_sv_catpvf_mg_nocontext(SV *const sv, const char *const pat, ...) 10944 { 10945 dTHX; 10946 va_list args; 10947 10948 PERL_ARGS_ASSERT_SV_CATPVF_MG_NOCONTEXT; 10949 10950 va_start(args, pat); 10951 sv_vcatpvfn_flags(sv, pat, strlen(pat), &args, NULL, 0, NULL, SV_GMAGIC|SV_SMAGIC); 10952 SvSETMAGIC(sv); 10953 va_end(args); 10954 } 10955 #endif 10956 10957 /* 10958 =for apidoc sv_catpvf 10959 =for apidoc_item sv_catpvf_nocontext 10960 =for apidoc_item sv_catpvf_mg 10961 =for apidoc_item sv_catpvf_mg_nocontext 10962 10963 These process their arguments like C<sprintf>, and append the formatted 10964 output to an SV. As with C<sv_vcatpvfn>, argument reordering is not supporte 10965 when called with a non-null C-style variable argument list. 10966 10967 If the appended data contains "wide" characters 10968 (including, but not limited to, SVs with a UTF-8 PV formatted with C<%s>, 10969 and characters >255 formatted with C<%c>), the original SV might get 10970 upgraded to UTF-8. 10971 10972 If the original SV was UTF-8, the pattern should be 10973 valid UTF-8; if the original SV was bytes, the pattern should be too. 10974 10975 All perform 'get' magic, but only C<sv_catpvf_mg> and C<sv_catpvf_mg_nocontext> 10976 perform 'set' magic. 10977 10978 C<sv_catpvf_nocontext> and C<sv_catpvf_mg_nocontext> do not take a thread 10979 context (C<aTHX>) parameter, so are used in situations where the caller 10980 doesn't already have the thread context. 10981 10982 =cut 10983 */ 10984 10985 void 10986 Perl_sv_catpvf(pTHX_ SV *const sv, const char *const pat, ...) 10987 { 10988 va_list args; 10989 10990 PERL_ARGS_ASSERT_SV_CATPVF; 10991 10992 va_start(args, pat); 10993 sv_vcatpvfn_flags(sv, pat, strlen(pat), &args, NULL, 0, NULL, SV_GMAGIC|SV_SMAGIC); 10994 va_end(args); 10995 } 10996 10997 /* 10998 =for apidoc sv_vcatpvf 10999 =for apidoc_item sv_vcatpvf_mg 11000 11001 These process their arguments like C<sv_vcatpvfn> called with a non-null 11002 C-style variable argument list, and append the formatted output to C<sv>. 11003 11004 They differ only in that C<sv_vcatpvf_mg> performs 'set' magic; 11005 C<sv_vcatpvf> skips 'set' magic. 11006 11007 Both perform 'get' magic. 11008 11009 They are usually accessed via their frontends C<L</sv_catpvf>> and 11010 C<L</sv_catpvf_mg>>. 11011 11012 =cut 11013 */ 11014 11015 void 11016 Perl_sv_vcatpvf(pTHX_ SV *const sv, const char *const pat, va_list *const args) 11017 { 11018 PERL_ARGS_ASSERT_SV_VCATPVF; 11019 11020 sv_vcatpvfn_flags(sv, pat, strlen(pat), args, NULL, 0, NULL, SV_GMAGIC|SV_SMAGIC); 11021 } 11022 11023 void 11024 Perl_sv_catpvf_mg(pTHX_ SV *const sv, const char *const pat, ...) 11025 { 11026 va_list args; 11027 11028 PERL_ARGS_ASSERT_SV_CATPVF_MG; 11029 11030 va_start(args, pat); 11031 sv_vcatpvfn_flags(sv, pat, strlen(pat), &args, NULL, 0, NULL, SV_GMAGIC|SV_SMAGIC); 11032 SvSETMAGIC(sv); 11033 va_end(args); 11034 } 11035 11036 void 11037 Perl_sv_vcatpvf_mg(pTHX_ SV *const sv, const char *const pat, va_list *const args) 11038 { 11039 PERL_ARGS_ASSERT_SV_VCATPVF_MG; 11040 11041 sv_vcatpvfn(sv, pat, strlen(pat), args, NULL, 0, NULL); 11042 SvSETMAGIC(sv); 11043 } 11044 11045 /* 11046 =for apidoc sv_vsetpvfn 11047 11048 Works like C<sv_vcatpvfn> but copies the text into the SV instead of 11049 appending it. 11050 11051 Usually used via one of its frontends L</C<sv_vsetpvf>> and 11052 L</C<sv_vsetpvf_mg>>. 11053 11054 =cut 11055 */ 11056 11057 void 11058 Perl_sv_vsetpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, 11059 va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted) 11060 { 11061 PERL_ARGS_ASSERT_SV_VSETPVFN; 11062 11063 SvPVCLEAR(sv); 11064 sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, sv_count, maybe_tainted, 0); 11065 } 11066 11067 11068 /* simplified inline Perl_sv_catpvn_nomg() when you know the SV's SvPOK */ 11069 11070 PERL_STATIC_INLINE void 11071 S_sv_catpvn_simple(pTHX_ SV *const sv, const char* const buf, const STRLEN len) 11072 { 11073 STRLEN const need = len + SvCUR(sv) + 1; 11074 char *end; 11075 11076 /* can't wrap as both len and SvCUR() are allocated in 11077 * memory and together can't consume all the address space 11078 */ 11079 assert(need > len); 11080 11081 assert(SvPOK(sv)); 11082 SvGROW(sv, need); 11083 end = SvEND(sv); 11084 Copy(buf, end, len, char); 11085 end += len; 11086 *end = '\0'; 11087 SvCUR_set(sv, need - 1); 11088 } 11089 11090 11091 /* 11092 * Warn of missing argument to sprintf. The value used in place of such 11093 * arguments should be &PL_sv_no; an undefined value would yield 11094 * inappropriate "use of uninit" warnings [perl #71000]. 11095 */ 11096 STATIC void 11097 S_warn_vcatpvfn_missing_argument(pTHX) { 11098 if (ckWARN(WARN_MISSING)) { 11099 Perl_warner(aTHX_ packWARN(WARN_MISSING), "Missing argument in %s", 11100 PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); 11101 } 11102 } 11103 11104 11105 static void 11106 S_croak_overflow() 11107 { 11108 dTHX; 11109 Perl_croak(aTHX_ "Integer overflow in format string for %s", 11110 (PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn")); 11111 } 11112 11113 11114 /* Given an int i from the next arg (if args is true) or an sv from an arg 11115 * (if args is false), try to extract a STRLEN-ranged value from the arg, 11116 * with overflow checking. 11117 * Sets *neg to true if the value was negative (untouched otherwise. 11118 * Returns the absolute value. 11119 * As an extra margin of safety, it croaks if the returned value would 11120 * exceed the maximum value of a STRLEN / 4. 11121 */ 11122 11123 static STRLEN 11124 S_sprintf_arg_num_val(pTHX_ va_list *const args, int i, SV *sv, bool *neg) 11125 { 11126 IV iv; 11127 11128 if (args) { 11129 iv = i; 11130 goto do_iv; 11131 } 11132 11133 if (!sv) 11134 return 0; 11135 11136 SvGETMAGIC(sv); 11137 11138 if (UNLIKELY(SvIsUV(sv))) { 11139 UV uv = SvUV_nomg(sv); 11140 if (uv > IV_MAX) 11141 S_croak_overflow(); 11142 iv = uv; 11143 } 11144 else { 11145 iv = SvIV_nomg(sv); 11146 do_iv: 11147 if (iv < 0) { 11148 if (iv < -IV_MAX) 11149 S_croak_overflow(); 11150 iv = -iv; 11151 *neg = TRUE; 11152 } 11153 } 11154 11155 if (iv > (IV)(((STRLEN)~0) / 4)) 11156 S_croak_overflow(); 11157 11158 return (STRLEN)iv; 11159 } 11160 11161 /* Read in and return a number. Updates *pattern to point to the char 11162 * following the number. Expects the first char to 1..9. 11163 * Croaks if the number exceeds 1/4 of the maximum value of STRLEN. 11164 * This is a belt-and-braces safety measure to complement any 11165 * overflow/wrap checks done in the main body of sv_vcatpvfn_flags. 11166 * It means that e.g. on a 32-bit system the width/precision can't be more 11167 * than 1G, which seems reasonable. 11168 */ 11169 11170 STATIC STRLEN 11171 S_expect_number(pTHX_ const char **const pattern) 11172 { 11173 STRLEN var; 11174 11175 PERL_ARGS_ASSERT_EXPECT_NUMBER; 11176 11177 assert(inRANGE(**pattern, '1', '9')); 11178 11179 var = *(*pattern)++ - '0'; 11180 while (isDIGIT(**pattern)) { 11181 /* if var * 10 + 9 would exceed 1/4 max strlen, croak */ 11182 if (var > ((((STRLEN)~0) / 4 - 9) / 10)) 11183 S_croak_overflow(); 11184 var = var * 10 + (*(*pattern)++ - '0'); 11185 } 11186 return var; 11187 } 11188 11189 /* Implement a fast "%.0f": given a pointer to the end of a buffer (caller 11190 * ensures it's big enough), back fill it with the rounded integer part of 11191 * nv. Returns ptr to start of string, and sets *len to its length. 11192 * Returns NULL if not convertible. 11193 */ 11194 11195 STATIC char * 11196 S_F0convert(NV nv, char *const endbuf, STRLEN *const len) 11197 { 11198 const int neg = nv < 0; 11199 UV uv; 11200 11201 PERL_ARGS_ASSERT_F0CONVERT; 11202 11203 assert(!Perl_isinfnan(nv)); 11204 if (neg) 11205 nv = -nv; 11206 if (nv != 0.0 && nv < (NV) UV_MAX) { 11207 char *p = endbuf; 11208 uv = (UV)nv; 11209 if (uv != nv) { 11210 nv += 0.5; 11211 uv = (UV)nv; 11212 if (uv & 1 && uv == nv) 11213 uv--; /* Round to even */ 11214 } 11215 do { 11216 const unsigned dig = uv % 10; 11217 *--p = '0' + dig; 11218 } while (uv /= 10); 11219 if (neg) 11220 *--p = '-'; 11221 *len = endbuf - p; 11222 return p; 11223 } 11224 return NULL; 11225 } 11226 11227 11228 /* XXX maybe_tainted is never assigned to, so the doc above is lying. */ 11229 11230 void 11231 Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, 11232 va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted) 11233 { 11234 PERL_ARGS_ASSERT_SV_VCATPVFN; 11235 11236 sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, sv_count, maybe_tainted, SV_GMAGIC|SV_SMAGIC); 11237 } 11238 11239 11240 /* For the vcatpvfn code, we need a long double target in case 11241 * HAS_LONG_DOUBLE, even without USE_LONG_DOUBLE, so that we can printf 11242 * with long double formats, even without NV being long double. But we 11243 * call the target 'fv' instead of 'nv', since most of the time it is not 11244 * (most compilers these days recognize "long double", even if only as a 11245 * synonym for "double"). 11246 */ 11247 #if defined(HAS_LONG_DOUBLE) && LONG_DOUBLESIZE > DOUBLESIZE && \ 11248 defined(PERL_PRIgldbl) && !defined(USE_QUADMATH) 11249 # define VCATPVFN_FV_GF PERL_PRIgldbl 11250 # if defined(__VMS) && defined(__ia64) && defined(__IEEE_FLOAT) 11251 /* Work around breakage in OTS$CVT_FLOAT_T_X */ 11252 # define VCATPVFN_NV_TO_FV(nv,fv) \ 11253 STMT_START { \ 11254 double _dv = nv; \ 11255 fv = Perl_isnan(_dv) ? LDBL_QNAN : _dv; \ 11256 } STMT_END 11257 # else 11258 # define VCATPVFN_NV_TO_FV(nv,fv) (fv)=(nv) 11259 # endif 11260 typedef long double vcatpvfn_long_double_t; 11261 #else 11262 # define VCATPVFN_FV_GF NVgf 11263 # define VCATPVFN_NV_TO_FV(nv,fv) (fv)=(nv) 11264 typedef NV vcatpvfn_long_double_t; 11265 #endif 11266 11267 #ifdef LONGDOUBLE_DOUBLEDOUBLE 11268 /* The first double can be as large as 2**1023, or '1' x '0' x 1023. 11269 * The second double can be as small as 2**-1074, or '0' x 1073 . '1'. 11270 * The sum of them can be '1' . '0' x 2096 . '1', with implied radix point 11271 * after the first 1023 zero bits. 11272 * 11273 * XXX The 2098 is quite large (262.25 bytes) and therefore some sort 11274 * of dynamically growing buffer might be better, start at just 16 bytes 11275 * (for example) and grow only when necessary. Or maybe just by looking 11276 * at the exponents of the two doubles? */ 11277 # define DOUBLEDOUBLE_MAXBITS 2098 11278 #endif 11279 11280 /* vhex will contain the values (0..15) of the hex digits ("nybbles" 11281 * of 4 bits); 1 for the implicit 1, and the mantissa bits, four bits 11282 * per xdigit. For the double-double case, this can be rather many. 11283 * The non-double-double-long-double overshoots since all bits of NV 11284 * are not mantissa bits, there are also exponent bits. */ 11285 #ifdef LONGDOUBLE_DOUBLEDOUBLE 11286 # define VHEX_SIZE (3+DOUBLEDOUBLE_MAXBITS/4) 11287 #else 11288 # define VHEX_SIZE (1+(NVSIZE * 8)/4) 11289 #endif 11290 11291 /* If we do not have a known long double format, (including not using 11292 * long doubles, or long doubles being equal to doubles) then we will 11293 * fall back to the ldexp/frexp route, with which we can retrieve at 11294 * most as many bits as our widest unsigned integer type is. We try 11295 * to get a 64-bit unsigned integer even if we are not using a 64-bit UV. 11296 * 11297 * (If you want to test the case of UVSIZE == 4, NVSIZE == 8, 11298 * set the MANTISSATYPE to int and the MANTISSASIZE to 4.) 11299 */ 11300 #if defined(HAS_QUAD) && defined(Uquad_t) 11301 # define MANTISSATYPE Uquad_t 11302 # define MANTISSASIZE 8 11303 #else 11304 # define MANTISSATYPE UV 11305 # define MANTISSASIZE UVSIZE 11306 #endif 11307 11308 #if defined(DOUBLE_LITTLE_ENDIAN) || defined(LONGDOUBLE_LITTLE_ENDIAN) 11309 # define HEXTRACT_LITTLE_ENDIAN 11310 #elif defined(DOUBLE_BIG_ENDIAN) || defined(LONGDOUBLE_BIG_ENDIAN) 11311 # define HEXTRACT_BIG_ENDIAN 11312 #else 11313 # define HEXTRACT_MIX_ENDIAN 11314 #endif 11315 11316 /* S_hextract() is a helper for S_format_hexfp, for extracting 11317 * the hexadecimal values (for %a/%A). The nv is the NV where the value 11318 * are being extracted from (either directly from the long double in-memory 11319 * presentation, or from the uquad computed via frexp+ldexp). frexp also 11320 * is used to update the exponent. The subnormal is set to true 11321 * for IEEE 754 subnormals/denormals (including the x86 80-bit format). 11322 * The vhex is the pointer to the beginning of the output buffer of VHEX_SIZE. 11323 * 11324 * The tricky part is that S_hextract() needs to be called twice: 11325 * the first time with vend as NULL, and the second time with vend as 11326 * the pointer returned by the first call. What happens is that on 11327 * the first round the output size is computed, and the intended 11328 * extraction sanity checked. On the second round the actual output 11329 * (the extraction of the hexadecimal values) takes place. 11330 * Sanity failures cause fatal failures during both rounds. */ 11331 STATIC U8* 11332 S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal, 11333 U8* vhex, U8* vend) 11334 { 11335 U8* v = vhex; 11336 int ix; 11337 int ixmin = 0, ixmax = 0; 11338 11339 /* XXX Inf/NaN are not handled here, since it is 11340 * assumed they are to be output as "Inf" and "NaN". */ 11341 11342 /* These macros are just to reduce typos, they have multiple 11343 * repetitions below, but usually only one (or sometimes two) 11344 * of them is really being used. */ 11345 /* HEXTRACT_OUTPUT() extracts the high nybble first. */ 11346 #define HEXTRACT_OUTPUT_HI(ix) (*v++ = nvp[ix] >> 4) 11347 #define HEXTRACT_OUTPUT_LO(ix) (*v++ = nvp[ix] & 0xF) 11348 #define HEXTRACT_OUTPUT(ix) \ 11349 STMT_START { \ 11350 HEXTRACT_OUTPUT_HI(ix); HEXTRACT_OUTPUT_LO(ix); \ 11351 } STMT_END 11352 #define HEXTRACT_COUNT(ix, c) \ 11353 STMT_START { \ 11354 v += c; if (ix < ixmin) ixmin = ix; else if (ix > ixmax) ixmax = ix; \ 11355 } STMT_END 11356 #define HEXTRACT_BYTE(ix) \ 11357 STMT_START { \ 11358 if (vend) HEXTRACT_OUTPUT(ix); else HEXTRACT_COUNT(ix, 2); \ 11359 } STMT_END 11360 #define HEXTRACT_LO_NYBBLE(ix) \ 11361 STMT_START { \ 11362 if (vend) HEXTRACT_OUTPUT_LO(ix); else HEXTRACT_COUNT(ix, 1); \ 11363 } STMT_END 11364 /* HEXTRACT_TOP_NYBBLE is just convenience disguise, 11365 * to make it look less odd when the top bits of a NV 11366 * are extracted using HEXTRACT_LO_NYBBLE: the highest 11367 * order bits can be in the "low nybble" of a byte. */ 11368 #define HEXTRACT_TOP_NYBBLE(ix) HEXTRACT_LO_NYBBLE(ix) 11369 #define HEXTRACT_BYTES_LE(a, b) \ 11370 for (ix = a; ix >= b; ix--) { HEXTRACT_BYTE(ix); } 11371 #define HEXTRACT_BYTES_BE(a, b) \ 11372 for (ix = a; ix <= b; ix++) { HEXTRACT_BYTE(ix); } 11373 #define HEXTRACT_GET_SUBNORMAL(nv) *subnormal = Perl_fp_class_denorm(nv) 11374 #define HEXTRACT_IMPLICIT_BIT(nv) \ 11375 STMT_START { \ 11376 if (!*subnormal) { \ 11377 if (vend) *v++ = ((nv) == 0.0) ? 0 : 1; else v++; \ 11378 } \ 11379 } STMT_END 11380 11381 /* Most formats do. Those which don't should undef this. 11382 * 11383 * But also note that IEEE 754 subnormals do not have it, or, 11384 * expressed alternatively, their implicit bit is zero. */ 11385 #define HEXTRACT_HAS_IMPLICIT_BIT 11386 11387 /* Many formats do. Those which don't should undef this. */ 11388 #define HEXTRACT_HAS_TOP_NYBBLE 11389 11390 /* HEXTRACTSIZE is the maximum number of xdigits. */ 11391 #if defined(USE_LONG_DOUBLE) && defined(LONGDOUBLE_DOUBLEDOUBLE) 11392 # define HEXTRACTSIZE (2+DOUBLEDOUBLE_MAXBITS/4) 11393 #else 11394 # define HEXTRACTSIZE 2 * NVSIZE 11395 #endif 11396 11397 const U8* vmaxend = vhex + HEXTRACTSIZE; 11398 11399 assert(HEXTRACTSIZE <= VHEX_SIZE); 11400 11401 PERL_UNUSED_VAR(ix); /* might happen */ 11402 (void)Perl_frexp(PERL_ABS(nv), exponent); 11403 *subnormal = FALSE; 11404 if (vend && (vend <= vhex || vend > vmaxend)) { 11405 /* diag_listed_as: Hexadecimal float: internal error (%s) */ 11406 Perl_croak(aTHX_ "Hexadecimal float: internal error (entry)"); 11407 } 11408 { 11409 /* First check if using long doubles. */ 11410 #if defined(USE_LONG_DOUBLE) && (NVSIZE > DOUBLESIZE) 11411 # if LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_LITTLE_ENDIAN 11412 /* Used in e.g. VMS and HP-UX IA-64, e.g. -0.1L: 11413 * 9a 99 99 99 99 99 99 99 99 99 99 99 99 99 fb bf */ 11414 /* The bytes 13..0 are the mantissa/fraction, 11415 * the 15,14 are the sign+exponent. */ 11416 const U8* nvp = (const U8*)(&nv); 11417 HEXTRACT_GET_SUBNORMAL(nv); 11418 HEXTRACT_IMPLICIT_BIT(nv); 11419 # undef HEXTRACT_HAS_TOP_NYBBLE 11420 HEXTRACT_BYTES_LE(13, 0); 11421 # elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_BIG_ENDIAN 11422 /* Used in e.g. Solaris Sparc and HP-UX PA-RISC, e.g. -0.1L: 11423 * bf fb 99 99 99 99 99 99 99 99 99 99 99 99 99 9a */ 11424 /* The bytes 2..15 are the mantissa/fraction, 11425 * the 0,1 are the sign+exponent. */ 11426 const U8* nvp = (const U8*)(&nv); 11427 HEXTRACT_GET_SUBNORMAL(nv); 11428 HEXTRACT_IMPLICIT_BIT(nv); 11429 # undef HEXTRACT_HAS_TOP_NYBBLE 11430 HEXTRACT_BYTES_BE(2, 15); 11431 # elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN 11432 /* x86 80-bit "extended precision", 64 bits of mantissa / fraction / 11433 * significand, 15 bits of exponent, 1 bit of sign. No implicit bit. 11434 * NVSIZE can be either 12 (ILP32, Solaris x86) or 16 (LP64, Linux 11435 * and OS X), meaning that 2 or 6 bytes are empty padding. */ 11436 /* The bytes 0..1 are the sign+exponent, 11437 * the bytes 2..9 are the mantissa/fraction. */ 11438 const U8* nvp = (const U8*)(&nv); 11439 # undef HEXTRACT_HAS_IMPLICIT_BIT 11440 # undef HEXTRACT_HAS_TOP_NYBBLE 11441 HEXTRACT_GET_SUBNORMAL(nv); 11442 HEXTRACT_BYTES_LE(7, 0); 11443 # elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN 11444 /* Does this format ever happen? (Wikipedia says the Motorola 11445 * 6888x math coprocessors used format _like_ this but padded 11446 * to 96 bits with 16 unused bits between the exponent and the 11447 * mantissa.) */ 11448 const U8* nvp = (const U8*)(&nv); 11449 # undef HEXTRACT_HAS_IMPLICIT_BIT 11450 # undef HEXTRACT_HAS_TOP_NYBBLE 11451 HEXTRACT_GET_SUBNORMAL(nv); 11452 HEXTRACT_BYTES_BE(0, 7); 11453 # else 11454 # define HEXTRACT_FALLBACK 11455 /* Double-double format: two doubles next to each other. 11456 * The first double is the high-order one, exactly like 11457 * it would be for a "lone" double. The second double 11458 * is shifted down using the exponent so that that there 11459 * are no common bits. The tricky part is that the value 11460 * of the double-double is the SUM of the two doubles and 11461 * the second one can be also NEGATIVE. 11462 * 11463 * Because of this tricky construction the bytewise extraction we 11464 * use for the other long double formats doesn't work, we must 11465 * extract the values bit by bit. 11466 * 11467 * The little-endian double-double is used .. somewhere? 11468 * 11469 * The big endian double-double is used in e.g. PPC/Power (AIX) 11470 * and MIPS (SGI). 11471 * 11472 * The mantissa bits are in two separate stretches, e.g. for -0.1L: 11473 * 9a 99 99 99 99 99 59 bc 9a 99 99 99 99 99 b9 3f (LE) 11474 * 3f b9 99 99 99 99 99 9a bc 59 99 99 99 99 99 9a (BE) 11475 */ 11476 # endif 11477 #else /* #if defined(USE_LONG_DOUBLE) && (NVSIZE > DOUBLESIZE) */ 11478 /* Using normal doubles, not long doubles. 11479 * 11480 * We generate 4-bit xdigits (nybble/nibble) instead of 8-bit 11481 * bytes, since we might need to handle printf precision, and 11482 * also need to insert the radix. */ 11483 # if NVSIZE == 8 11484 # ifdef HEXTRACT_LITTLE_ENDIAN 11485 /* 0 1 2 3 4 5 6 7 (MSB = 7, LSB = 0, 6+7 = exponent+sign) */ 11486 const U8* nvp = (const U8*)(&nv); 11487 HEXTRACT_GET_SUBNORMAL(nv); 11488 HEXTRACT_IMPLICIT_BIT(nv); 11489 HEXTRACT_TOP_NYBBLE(6); 11490 HEXTRACT_BYTES_LE(5, 0); 11491 # elif defined(HEXTRACT_BIG_ENDIAN) 11492 /* 7 6 5 4 3 2 1 0 (MSB = 7, LSB = 0, 6+7 = exponent+sign) */ 11493 const U8* nvp = (const U8*)(&nv); 11494 HEXTRACT_GET_SUBNORMAL(nv); 11495 HEXTRACT_IMPLICIT_BIT(nv); 11496 HEXTRACT_TOP_NYBBLE(1); 11497 HEXTRACT_BYTES_BE(2, 7); 11498 # elif DOUBLEKIND == DOUBLE_IS_IEEE_754_64_BIT_MIXED_ENDIAN_LE_BE 11499 /* 4 5 6 7 0 1 2 3 (MSB = 7, LSB = 0, 6:7 = nybble:exponent:sign) */ 11500 const U8* nvp = (const U8*)(&nv); 11501 HEXTRACT_GET_SUBNORMAL(nv); 11502 HEXTRACT_IMPLICIT_BIT(nv); 11503 HEXTRACT_TOP_NYBBLE(2); /* 6 */ 11504 HEXTRACT_BYTE(1); /* 5 */ 11505 HEXTRACT_BYTE(0); /* 4 */ 11506 HEXTRACT_BYTE(7); /* 3 */ 11507 HEXTRACT_BYTE(6); /* 2 */ 11508 HEXTRACT_BYTE(5); /* 1 */ 11509 HEXTRACT_BYTE(4); /* 0 */ 11510 # elif DOUBLEKIND == DOUBLE_IS_IEEE_754_64_BIT_MIXED_ENDIAN_BE_LE 11511 /* 3 2 1 0 7 6 5 4 (MSB = 7, LSB = 0, 7:6 = sign:exponent:nybble) */ 11512 const U8* nvp = (const U8*)(&nv); 11513 HEXTRACT_GET_SUBNORMAL(nv); 11514 HEXTRACT_IMPLICIT_BIT(nv); 11515 HEXTRACT_TOP_NYBBLE(5); /* 6 */ 11516 HEXTRACT_BYTE(6); /* 5 */ 11517 HEXTRACT_BYTE(7); /* 4 */ 11518 HEXTRACT_BYTE(0); /* 3 */ 11519 HEXTRACT_BYTE(1); /* 2 */ 11520 HEXTRACT_BYTE(2); /* 1 */ 11521 HEXTRACT_BYTE(3); /* 0 */ 11522 # else 11523 # define HEXTRACT_FALLBACK 11524 # endif 11525 # else 11526 # define HEXTRACT_FALLBACK 11527 # endif 11528 #endif /* #if defined(USE_LONG_DOUBLE) && (NVSIZE > DOUBLESIZE) #else */ 11529 11530 #ifdef HEXTRACT_FALLBACK 11531 HEXTRACT_GET_SUBNORMAL(nv); 11532 # undef HEXTRACT_HAS_TOP_NYBBLE /* Meaningless, but consistent. */ 11533 /* The fallback is used for the double-double format, and 11534 * for unknown long double formats, and for unknown double 11535 * formats, or in general unknown NV formats. */ 11536 if (nv == (NV)0.0) { 11537 if (vend) 11538 *v++ = 0; 11539 else 11540 v++; 11541 *exponent = 0; 11542 } 11543 else { 11544 NV d = nv < 0 ? -nv : nv; 11545 NV e = (NV)1.0; 11546 U8 ha = 0x0; /* hexvalue accumulator */ 11547 U8 hd = 0x8; /* hexvalue digit */ 11548 11549 /* Shift d and e (and update exponent) so that e <= d < 2*e, 11550 * this is essentially manual frexp(). Multiplying by 0.5 and 11551 * doubling should be lossless in binary floating point. */ 11552 11553 *exponent = 1; 11554 11555 while (e > d) { 11556 e *= (NV)0.5; 11557 (*exponent)--; 11558 } 11559 /* Now d >= e */ 11560 11561 while (d >= e + e) { 11562 e += e; 11563 (*exponent)++; 11564 } 11565 /* Now e <= d < 2*e */ 11566 11567 /* First extract the leading hexdigit (the implicit bit). */ 11568 if (d >= e) { 11569 d -= e; 11570 if (vend) 11571 *v++ = 1; 11572 else 11573 v++; 11574 } 11575 else { 11576 if (vend) 11577 *v++ = 0; 11578 else 11579 v++; 11580 } 11581 e *= (NV)0.5; 11582 11583 /* Then extract the remaining hexdigits. */ 11584 while (d > (NV)0.0) { 11585 if (d >= e) { 11586 ha |= hd; 11587 d -= e; 11588 } 11589 if (hd == 1) { 11590 /* Output or count in groups of four bits, 11591 * that is, when the hexdigit is down to one. */ 11592 if (vend) 11593 *v++ = ha; 11594 else 11595 v++; 11596 /* Reset the hexvalue. */ 11597 ha = 0x0; 11598 hd = 0x8; 11599 } 11600 else 11601 hd >>= 1; 11602 e *= (NV)0.5; 11603 } 11604 11605 /* Flush possible pending hexvalue. */ 11606 if (ha) { 11607 if (vend) 11608 *v++ = ha; 11609 else 11610 v++; 11611 } 11612 } 11613 #endif 11614 } 11615 /* Croak for various reasons: if the output pointer escaped the 11616 * output buffer, if the extraction index escaped the extraction 11617 * buffer, or if the ending output pointer didn't match the 11618 * previously computed value. */ 11619 if (v <= vhex || v - vhex >= VHEX_SIZE || 11620 /* For double-double the ixmin and ixmax stay at zero, 11621 * which is convenient since the HEXTRACTSIZE is tricky 11622 * for double-double. */ 11623 ixmin < 0 || ixmax >= NVSIZE || 11624 (vend && v != vend)) { 11625 /* diag_listed_as: Hexadecimal float: internal error (%s) */ 11626 Perl_croak(aTHX_ "Hexadecimal float: internal error (overflow)"); 11627 } 11628 return v; 11629 } 11630 11631 11632 /* S_format_hexfp(): helper function for Perl_sv_vcatpvfn_flags(). 11633 * 11634 * Processes the %a/%A hexadecimal floating-point format, since the 11635 * built-in snprintf()s which are used for most of the f/p formats, don't 11636 * universally handle %a/%A. 11637 * Populates buf of length bufsize, and returns the length of the created 11638 * string. 11639 * The rest of the args have the same meaning as the local vars of the 11640 * same name within Perl_sv_vcatpvfn_flags(). 11641 * 11642 * The caller's determination of IN_LC(LC_NUMERIC), passed as in_lc_numeric, 11643 * is used to ensure we do the right thing when we need to access the locale's 11644 * numeric radix. 11645 * 11646 * It requires the caller to make buf large enough. 11647 */ 11648 11649 static STRLEN 11650 S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c, 11651 const NV nv, const vcatpvfn_long_double_t fv, 11652 bool has_precis, STRLEN precis, STRLEN width, 11653 bool alt, char plus, bool left, bool fill, bool in_lc_numeric) 11654 { 11655 /* Hexadecimal floating point. */ 11656 char* p = buf; 11657 U8 vhex[VHEX_SIZE]; 11658 U8* v = vhex; /* working pointer to vhex */ 11659 U8* vend; /* pointer to one beyond last digit of vhex */ 11660 U8* vfnz = NULL; /* first non-zero */ 11661 U8* vlnz = NULL; /* last non-zero */ 11662 U8* v0 = NULL; /* first output */ 11663 const bool lower = (c == 'a'); 11664 /* At output the values of vhex (up to vend) will 11665 * be mapped through the xdig to get the actual 11666 * human-readable xdigits. */ 11667 const char* xdig = PL_hexdigit; 11668 STRLEN zerotail = 0; /* how many extra zeros to append */ 11669 int exponent = 0; /* exponent of the floating point input */ 11670 bool hexradix = FALSE; /* should we output the radix */ 11671 bool subnormal = FALSE; /* IEEE 754 subnormal/denormal */ 11672 bool negative = FALSE; 11673 STRLEN elen; 11674 11675 /* XXX: NaN, Inf -- though they are printed as "NaN" and "Inf". 11676 * 11677 * For example with denormals, (assuming the vanilla 11678 * 64-bit double): the exponent is zero. 1xp-1074 is 11679 * the smallest denormal and the smallest double, it 11680 * could be output also as 0x0.0000000000001p-1022 to 11681 * match its internal structure. */ 11682 11683 vend = S_hextract(aTHX_ nv, &exponent, &subnormal, vhex, NULL); 11684 S_hextract(aTHX_ nv, &exponent, &subnormal, vhex, vend); 11685 11686 #if NVSIZE > DOUBLESIZE 11687 # ifdef HEXTRACT_HAS_IMPLICIT_BIT 11688 /* In this case there is an implicit bit, 11689 * and therefore the exponent is shifted by one. */ 11690 exponent--; 11691 # elif defined(NV_X86_80_BIT) 11692 if (subnormal) { 11693 /* The subnormals of the x86-80 have a base exponent of -16382, 11694 * (while the physical exponent bits are zero) but the frexp() 11695 * returned the scientific-style floating exponent. We want 11696 * to map the last one as: 11697 * -16831..-16384 -> -16382 (the last normal is 0x1p-16382) 11698 * -16835..-16388 -> -16384 11699 * since we want to keep the first hexdigit 11700 * as one of the [8421]. */ 11701 exponent = -4 * ( (exponent + 1) / -4) - 2; 11702 } else { 11703 exponent -= 4; 11704 } 11705 /* TBD: other non-implicit-bit platforms than the x86-80. */ 11706 # endif 11707 #endif 11708 11709 negative = fv < 0 || Perl_signbit(nv); 11710 if (negative) 11711 *p++ = '-'; 11712 else if (plus) 11713 *p++ = plus; 11714 *p++ = '0'; 11715 if (lower) { 11716 *p++ = 'x'; 11717 } 11718 else { 11719 *p++ = 'X'; 11720 xdig += 16; /* Use uppercase hex. */ 11721 } 11722 11723 /* Find the first non-zero xdigit. */ 11724 for (v = vhex; v < vend; v++) { 11725 if (*v) { 11726 vfnz = v; 11727 break; 11728 } 11729 } 11730 11731 if (vfnz) { 11732 /* Find the last non-zero xdigit. */ 11733 for (v = vend - 1; v >= vhex; v--) { 11734 if (*v) { 11735 vlnz = v; 11736 break; 11737 } 11738 } 11739 11740 #if NVSIZE == DOUBLESIZE 11741 if (fv != 0.0) 11742 exponent--; 11743 #endif 11744 11745 if (subnormal) { 11746 #ifndef NV_X86_80_BIT 11747 if (vfnz[0] > 1) { 11748 /* IEEE 754 subnormals (but not the x86 80-bit): 11749 * we want "normalize" the subnormal, 11750 * so we need to right shift the hex nybbles 11751 * so that the output of the subnormal starts 11752 * from the first true bit. (Another, equally 11753 * valid, policy would be to dump the subnormal 11754 * nybbles as-is, to display the "physical" layout.) */ 11755 int i, n; 11756 U8 *vshr; 11757 /* Find the ceil(log2(v[0])) of 11758 * the top non-zero nybble. */ 11759 for (i = vfnz[0], n = 0; i > 1; i >>= 1, n++) { } 11760 assert(n < 4); 11761 assert(vlnz); 11762 vlnz[1] = 0; 11763 for (vshr = vlnz; vshr >= vfnz; vshr--) { 11764 vshr[1] |= (vshr[0] & (0xF >> (4 - n))) << (4 - n); 11765 vshr[0] >>= n; 11766 } 11767 if (vlnz[1]) { 11768 vlnz++; 11769 } 11770 } 11771 #endif 11772 v0 = vfnz; 11773 } else { 11774 v0 = vhex; 11775 } 11776 11777 if (has_precis) { 11778 U8* ve = (subnormal ? vlnz + 1 : vend); 11779 SSize_t vn = ve - v0; 11780 assert(vn >= 1); 11781 if (precis < (Size_t)(vn - 1)) { 11782 bool overflow = FALSE; 11783 if (v0[precis + 1] < 0x8) { 11784 /* Round down, nothing to do. */ 11785 } else if (v0[precis + 1] > 0x8) { 11786 /* Round up. */ 11787 v0[precis]++; 11788 overflow = v0[precis] > 0xF; 11789 v0[precis] &= 0xF; 11790 } else { /* v0[precis] == 0x8 */ 11791 /* Half-point: round towards the one 11792 * with the even least-significant digit: 11793 * 08 -> 0 88 -> 8 11794 * 18 -> 2 98 -> a 11795 * 28 -> 2 a8 -> a 11796 * 38 -> 4 b8 -> c 11797 * 48 -> 4 c8 -> c 11798 * 58 -> 6 d8 -> e 11799 * 68 -> 6 e8 -> e 11800 * 78 -> 8 f8 -> 10 */ 11801 if ((v0[precis] & 0x1)) { 11802 v0[precis]++; 11803 } 11804 overflow = v0[precis] > 0xF; 11805 v0[precis] &= 0xF; 11806 } 11807 11808 if (overflow) { 11809 for (v = v0 + precis - 1; v >= v0; v--) { 11810 (*v)++; 11811 overflow = *v > 0xF; 11812 (*v) &= 0xF; 11813 if (!overflow) { 11814 break; 11815 } 11816 } 11817 if (v == v0 - 1 && overflow) { 11818 /* If the overflow goes all the 11819 * way to the front, we need to 11820 * insert 0x1 in front, and adjust 11821 * the exponent. */ 11822 Move(v0, v0 + 1, vn - 1, char); 11823 *v0 = 0x1; 11824 exponent += 4; 11825 } 11826 } 11827 11828 /* The new effective "last non zero". */ 11829 vlnz = v0 + precis; 11830 } 11831 else { 11832 zerotail = 11833 subnormal ? precis - vn + 1 : 11834 precis - (vlnz - vhex); 11835 } 11836 } 11837 11838 v = v0; 11839 *p++ = xdig[*v++]; 11840 11841 /* If there are non-zero xdigits, the radix 11842 * is output after the first one. */ 11843 if (vfnz < vlnz) { 11844 hexradix = TRUE; 11845 } 11846 } 11847 else { 11848 *p++ = '0'; 11849 exponent = 0; 11850 zerotail = has_precis ? precis : 0; 11851 } 11852 11853 /* The radix is always output if precis, or if alt. */ 11854 if ((has_precis && precis > 0) || alt) { 11855 hexradix = TRUE; 11856 } 11857 11858 if (hexradix) { 11859 #ifndef USE_LOCALE_NUMERIC 11860 PERL_UNUSED_ARG(in_lc_numeric); 11861 11862 *p++ = '.'; 11863 #else 11864 if (in_lc_numeric) { 11865 STRLEN n; 11866 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(TRUE, { 11867 const char* r = SvPV(PL_numeric_radix_sv, n); 11868 Copy(r, p, n, char); 11869 }); 11870 p += n; 11871 } 11872 else { 11873 *p++ = '.'; 11874 } 11875 #endif 11876 } 11877 11878 if (vlnz) { 11879 while (v <= vlnz) 11880 *p++ = xdig[*v++]; 11881 } 11882 11883 if (zerotail > 0) { 11884 while (zerotail--) { 11885 *p++ = '0'; 11886 } 11887 } 11888 11889 elen = p - buf; 11890 11891 /* sanity checks */ 11892 if (elen >= bufsize || width >= bufsize) 11893 /* diag_listed_as: Hexadecimal float: internal error (%s) */ 11894 Perl_croak(aTHX_ "Hexadecimal float: internal error (overflow)"); 11895 11896 elen += my_snprintf(p, bufsize - elen, 11897 "%c%+d", lower ? 'p' : 'P', 11898 exponent); 11899 11900 if (elen < width) { 11901 STRLEN gap = (STRLEN)(width - elen); 11902 if (left) { 11903 /* Pad the back with spaces. */ 11904 memset(buf + elen, ' ', gap); 11905 } 11906 else if (fill) { 11907 /* Insert the zeros after the "0x" and the 11908 * the potential sign, but before the digits, 11909 * otherwise we end up with "0000xH.HHH...", 11910 * when we want "0x000H.HHH..." */ 11911 STRLEN nzero = gap; 11912 char* zerox = buf + 2; 11913 STRLEN nmove = elen - 2; 11914 if (negative || plus) { 11915 zerox++; 11916 nmove--; 11917 } 11918 Move(zerox, zerox + nzero, nmove, char); 11919 memset(zerox, fill ? '0' : ' ', nzero); 11920 } 11921 else { 11922 /* Move it to the right. */ 11923 Move(buf, buf + gap, 11924 elen, char); 11925 /* Pad the front with spaces. */ 11926 memset(buf, ' ', gap); 11927 } 11928 elen = width; 11929 } 11930 return elen; 11931 } 11932 11933 /* 11934 =for apidoc sv_vcatpvfn 11935 =for apidoc_item sv_vcatpvfn_flags 11936 11937 These process their arguments like C<L<vsprintf(3)>> and append the formatted output 11938 to an SV. They use an array of SVs if the C-style variable argument list is 11939 missing (C<NULL>). Argument reordering (using format specifiers like C<%2$d> or 11940 C<%*2$d>) is supported only when using an array of SVs; using a C-style 11941 C<va_list> argument list with a format string that uses argument reordering 11942 will yield an exception. 11943 11944 When running with taint checks enabled, they indicate via C<maybe_tainted> if 11945 results are untrustworthy (often due to the use of locales). 11946 11947 They assume that C<pat> has the same utf8-ness as C<sv>. It's the caller's 11948 responsibility to ensure that this is so. 11949 11950 They differ in that C<sv_vcatpvfn_flags> has a C<flags> parameter in which you 11951 can set or clear the C<SV_GMAGIC> and/or S<SV_SMAGIC> flags, to specify which 11952 magic to handle or not handle; whereas plain C<sv_vcatpvfn> always specifies 11953 both 'get' and 'set' magic. 11954 11955 They are usually used via one of the frontends L</C<sv_vcatpvf>> and 11956 L</C<sv_vcatpvf_mg>>. 11957 11958 =cut 11959 */ 11960 11961 11962 void 11963 Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, 11964 va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted, 11965 const U32 flags) 11966 { 11967 const char *fmtstart; /* character following the current '%' */ 11968 const char *q; /* current position within format */ 11969 const char *patend; 11970 STRLEN origlen; 11971 Size_t svix = 0; 11972 static const char nullstr[] = "(null)"; 11973 bool has_utf8 = DO_UTF8(sv); /* has the result utf8? */ 11974 const bool pat_utf8 = has_utf8; /* the pattern is in utf8? */ 11975 /* Times 4: a decimal digit takes more than 3 binary digits. 11976 * NV_DIG: mantissa takes that many decimal digits. 11977 * Plus 32: Playing safe. */ 11978 char ebuf[IV_DIG * 4 + NV_DIG + 32]; 11979 bool no_redundant_warning = FALSE; /* did we use any explicit format parameter index? */ 11980 #ifdef USE_LOCALE_NUMERIC 11981 bool have_in_lc_numeric = FALSE; 11982 #endif 11983 /* we never change this unless USE_LOCALE_NUMERIC */ 11984 bool in_lc_numeric = FALSE; 11985 11986 PERL_ARGS_ASSERT_SV_VCATPVFN_FLAGS; 11987 PERL_UNUSED_ARG(maybe_tainted); 11988 11989 if (flags & SV_GMAGIC) 11990 SvGETMAGIC(sv); 11991 11992 /* no matter what, this is a string now */ 11993 (void)SvPV_force_nomg(sv, origlen); 11994 11995 /* the code that scans for flags etc following a % relies on 11996 * a '\0' being present to avoid falling off the end. Ideally that 11997 * should be fixed */ 11998 assert(pat[patlen] == '\0'); 11999 12000 12001 /* Special-case "", "%s", "%-p" (SVf - see below) and "%.0f". 12002 * In each case, if there isn't the correct number of args, instead 12003 * fall through to the main code to handle the issuing of any 12004 * warnings etc. 12005 */ 12006 12007 if (patlen == 0 && (args || sv_count == 0)) 12008 return; 12009 12010 if (patlen <= 4 && pat[0] == '%' && (args || sv_count == 1)) { 12011 12012 /* "%s" */ 12013 if (patlen == 2 && pat[1] == 's') { 12014 if (args) { 12015 const char * const s = va_arg(*args, char*); 12016 sv_catpv_nomg(sv, s ? s : nullstr); 12017 } 12018 else { 12019 /* we want get magic on the source but not the target. 12020 * sv_catsv can't do that, though */ 12021 SvGETMAGIC(*svargs); 12022 sv_catsv_nomg(sv, *svargs); 12023 } 12024 return; 12025 } 12026 12027 /* "%-p" */ 12028 if (args) { 12029 if (patlen == 3 && pat[1] == '-' && pat[2] == 'p') { 12030 SV *asv = MUTABLE_SV(va_arg(*args, void*)); 12031 sv_catsv_nomg(sv, asv); 12032 return; 12033 } 12034 } 12035 #if !defined(USE_LONG_DOUBLE) && !defined(USE_QUADMATH) 12036 /* special-case "%.0f" */ 12037 else if ( patlen == 4 12038 && pat[1] == '.' && pat[2] == '0' && pat[3] == 'f') 12039 { 12040 const NV nv = SvNV(*svargs); 12041 if (LIKELY(!Perl_isinfnan(nv))) { 12042 STRLEN l; 12043 char *p; 12044 12045 if ((p = F0convert(nv, ebuf + sizeof ebuf, &l))) { 12046 sv_catpvn_nomg(sv, p, l); 12047 return; 12048 } 12049 } 12050 } 12051 #endif /* !USE_LONG_DOUBLE */ 12052 } 12053 12054 12055 patend = (char*)pat + patlen; 12056 for (fmtstart = pat; fmtstart < patend; fmtstart = q) { 12057 char intsize = 0; /* size qualifier in "%hi..." etc */ 12058 bool alt = FALSE; /* has "%#..." */ 12059 bool left = FALSE; /* has "%-..." */ 12060 bool fill = FALSE; /* has "%0..." */ 12061 char plus = 0; /* has "%+..." */ 12062 STRLEN width = 0; /* value of "%NNN..." */ 12063 bool has_precis = FALSE; /* has "%.NNN..." */ 12064 STRLEN precis = 0; /* value of "%.NNN..." */ 12065 int base = 0; /* base to print in, e.g. 8 for %o */ 12066 UV uv = 0; /* the value to print of int-ish args */ 12067 12068 bool vectorize = FALSE; /* has "%v..." */ 12069 bool vec_utf8 = FALSE; /* SvUTF8(vec arg) */ 12070 const U8 *vecstr = NULL; /* SvPVX(vec arg) */ 12071 STRLEN veclen = 0; /* SvCUR(vec arg) */ 12072 const char *dotstr = NULL; /* separator string for %v */ 12073 STRLEN dotstrlen; /* length of separator string for %v */ 12074 12075 Size_t efix = 0; /* explicit format parameter index */ 12076 const Size_t osvix = svix; /* original index in case of bad fmt */ 12077 12078 SV *argsv = NULL; 12079 bool is_utf8 = FALSE; /* is this item utf8? */ 12080 bool arg_missing = FALSE; /* give "Missing argument" warning */ 12081 char esignbuf[4]; /* holds sign prefix, e.g. "-0x" */ 12082 STRLEN esignlen = 0; /* length of e.g. "-0x" */ 12083 STRLEN zeros = 0; /* how many '0' to prepend */ 12084 12085 const char *eptr = NULL; /* the address of the element string */ 12086 STRLEN elen = 0; /* the length of the element string */ 12087 12088 char c; /* the actual format ('d', s' etc) */ 12089 12090 12091 /* echo everything up to the next format specification */ 12092 for (q = fmtstart; q < patend && *q != '%'; ++q) 12093 {}; 12094 12095 if (q > fmtstart) { 12096 if (has_utf8 && !pat_utf8) { 12097 /* upgrade and copy the bytes of fmtstart..q-1 to utf8 on 12098 * the fly */ 12099 const char *p; 12100 char *dst; 12101 STRLEN need = SvCUR(sv) + (q - fmtstart) + 1; 12102 12103 for (p = fmtstart; p < q; p++) 12104 if (!NATIVE_BYTE_IS_INVARIANT(*p)) 12105 need++; 12106 SvGROW(sv, need); 12107 12108 dst = SvEND(sv); 12109 for (p = fmtstart; p < q; p++) 12110 append_utf8_from_native_byte((U8)*p, (U8**)&dst); 12111 *dst = '\0'; 12112 SvCUR_set(sv, need - 1); 12113 } 12114 else 12115 S_sv_catpvn_simple(aTHX_ sv, fmtstart, q - fmtstart); 12116 } 12117 if (q++ >= patend) 12118 break; 12119 12120 fmtstart = q; /* fmtstart is char following the '%' */ 12121 12122 /* 12123 We allow format specification elements in this order: 12124 \d+\$ explicit format parameter index 12125 [-+ 0#]+ flags 12126 v|\*(\d+\$)?v vector with optional (optionally specified) arg 12127 0 flag (as above): repeated to allow "v02" 12128 \d+|\*(\d+\$)? width using optional (optionally specified) arg 12129 \.(\d*|\*(\d+\$)?) precision using optional (optionally specified) arg 12130 [hlqLV] size 12131 [%bcdefginopsuxDFOUX] format (mandatory) 12132 */ 12133 12134 if (inRANGE(*q, '1', '9')) { 12135 width = expect_number(&q); 12136 if (*q == '$') { 12137 if (args) 12138 Perl_croak_nocontext( 12139 "Cannot yet reorder sv_vcatpvfn() arguments from va_list"); 12140 ++q; 12141 efix = (Size_t)width; 12142 width = 0; 12143 no_redundant_warning = TRUE; 12144 } else { 12145 goto gotwidth; 12146 } 12147 } 12148 12149 /* FLAGS */ 12150 12151 while (*q) { 12152 switch (*q) { 12153 case ' ': 12154 case '+': 12155 if (plus == '+' && *q == ' ') /* '+' over ' ' */ 12156 q++; 12157 else 12158 plus = *q++; 12159 continue; 12160 12161 case '-': 12162 left = TRUE; 12163 q++; 12164 continue; 12165 12166 case '0': 12167 fill = TRUE; 12168 q++; 12169 continue; 12170 12171 case '#': 12172 alt = TRUE; 12173 q++; 12174 continue; 12175 12176 default: 12177 break; 12178 } 12179 break; 12180 } 12181 12182 /* at this point we can expect one of: 12183 * 12184 * 123 an explicit width 12185 * * width taken from next arg 12186 * *12$ width taken from 12th arg 12187 * or no width 12188 * 12189 * But any width specification may be preceded by a v, in one of its 12190 * forms: 12191 * v 12192 * *v 12193 * *12$v 12194 * So an asterisk may be either a width specifier or a vector 12195 * separator arg specifier, and we don't know which initially 12196 */ 12197 12198 tryasterisk: 12199 if (*q == '*') { 12200 STRLEN ix; /* explicit width/vector separator index */ 12201 q++; 12202 if (inRANGE(*q, '1', '9')) { 12203 ix = expect_number(&q); 12204 if (*q++ == '$') { 12205 if (args) 12206 Perl_croak_nocontext( 12207 "Cannot yet reorder sv_vcatpvfn() arguments from va_list"); 12208 no_redundant_warning = TRUE; 12209 } else 12210 goto unknown; 12211 } 12212 else 12213 ix = 0; 12214 12215 if (*q == 'v') { 12216 SV *vecsv; 12217 /* The asterisk was for *v, *NNN$v: vectorizing, but not 12218 * with the default "." */ 12219 q++; 12220 if (vectorize) 12221 goto unknown; 12222 if (args) 12223 vecsv = va_arg(*args, SV*); 12224 else { 12225 ix = ix ? ix - 1 : svix++; 12226 vecsv = ix < sv_count ? svargs[ix] 12227 : (arg_missing = TRUE, &PL_sv_no); 12228 } 12229 dotstr = SvPV_const(vecsv, dotstrlen); 12230 /* Keep the DO_UTF8 test *after* the SvPV call, else things go 12231 bad with tied or overloaded values that return UTF8. */ 12232 if (DO_UTF8(vecsv)) 12233 is_utf8 = TRUE; 12234 else if (has_utf8) { 12235 vecsv = sv_mortalcopy(vecsv); 12236 sv_utf8_upgrade(vecsv); 12237 dotstr = SvPV_const(vecsv, dotstrlen); 12238 is_utf8 = TRUE; 12239 } 12240 vectorize = TRUE; 12241 goto tryasterisk; 12242 } 12243 12244 /* the asterisk specified a width */ 12245 { 12246 int i = 0; 12247 SV *width_sv = NULL; 12248 if (args) 12249 i = va_arg(*args, int); 12250 else { 12251 ix = ix ? ix - 1 : svix++; 12252 width_sv = (ix < sv_count) ? svargs[ix] 12253 : (arg_missing = TRUE, (SV*)NULL); 12254 } 12255 width = S_sprintf_arg_num_val(aTHX_ args, i, width_sv, &left); 12256 } 12257 } 12258 else if (*q == 'v') { 12259 q++; 12260 if (vectorize) 12261 goto unknown; 12262 vectorize = TRUE; 12263 dotstr = "."; 12264 dotstrlen = 1; 12265 goto tryasterisk; 12266 12267 } 12268 else { 12269 /* explicit width? */ 12270 if(*q == '0') { 12271 fill = TRUE; 12272 q++; 12273 } 12274 if (inRANGE(*q, '1', '9')) 12275 width = expect_number(&q); 12276 } 12277 12278 gotwidth: 12279 12280 /* PRECISION */ 12281 12282 if (*q == '.') { 12283 q++; 12284 if (*q == '*') { 12285 STRLEN ix; /* explicit precision index */ 12286 q++; 12287 if (inRANGE(*q, '1', '9')) { 12288 ix = expect_number(&q); 12289 if (*q++ == '$') { 12290 if (args) 12291 Perl_croak_nocontext( 12292 "Cannot yet reorder sv_vcatpvfn() arguments from va_list"); 12293 no_redundant_warning = TRUE; 12294 } else 12295 goto unknown; 12296 } 12297 else 12298 ix = 0; 12299 12300 { 12301 int i = 0; 12302 SV *width_sv = NULL; 12303 bool neg = FALSE; 12304 12305 if (args) 12306 i = va_arg(*args, int); 12307 else { 12308 ix = ix ? ix - 1 : svix++; 12309 width_sv = (ix < sv_count) ? svargs[ix] 12310 : (arg_missing = TRUE, (SV*)NULL); 12311 } 12312 precis = S_sprintf_arg_num_val(aTHX_ args, i, width_sv, &neg); 12313 has_precis = !neg; 12314 /* ignore negative precision */ 12315 if (!has_precis) 12316 precis = 0; 12317 } 12318 } 12319 else { 12320 /* although it doesn't seem documented, this code has long 12321 * behaved so that: 12322 * no digits following the '.' is treated like '.0' 12323 * the number may be preceded by any number of zeroes, 12324 * e.g. "%.0001f", which is the same as "%.1f" 12325 * so I've kept that behaviour. DAPM May 2017 12326 */ 12327 while (*q == '0') 12328 q++; 12329 precis = inRANGE(*q, '1', '9') ? expect_number(&q) : 0; 12330 has_precis = TRUE; 12331 } 12332 } 12333 12334 /* SIZE */ 12335 12336 switch (*q) { 12337 #ifdef WIN32 12338 case 'I': /* Ix, I32x, and I64x */ 12339 # ifdef USE_64_BIT_INT 12340 if (q[1] == '6' && q[2] == '4') { 12341 q += 3; 12342 intsize = 'q'; 12343 break; 12344 } 12345 # endif 12346 if (q[1] == '3' && q[2] == '2') { 12347 q += 3; 12348 break; 12349 } 12350 # ifdef USE_64_BIT_INT 12351 intsize = 'q'; 12352 # endif 12353 q++; 12354 break; 12355 #endif 12356 #if (IVSIZE >= 8 || defined(HAS_LONG_DOUBLE)) || \ 12357 (IVSIZE == 4 && !defined(HAS_LONG_DOUBLE)) 12358 case 'L': /* Ld */ 12359 /* FALLTHROUGH */ 12360 # if IVSIZE >= 8 12361 case 'q': /* qd */ 12362 # endif 12363 intsize = 'q'; 12364 q++; 12365 break; 12366 #endif 12367 case 'l': 12368 ++q; 12369 #if (IVSIZE >= 8 || defined(HAS_LONG_DOUBLE)) || \ 12370 (IVSIZE == 4 && !defined(HAS_LONG_DOUBLE)) 12371 if (*q == 'l') { /* lld, llf */ 12372 intsize = 'q'; 12373 ++q; 12374 } 12375 else 12376 #endif 12377 intsize = 'l'; 12378 break; 12379 case 'h': 12380 if (*++q == 'h') { /* hhd, hhu */ 12381 intsize = 'c'; 12382 ++q; 12383 } 12384 else 12385 intsize = 'h'; 12386 break; 12387 #ifdef USE_QUADMATH 12388 case 'Q': 12389 #endif 12390 case 'V': 12391 case 'z': 12392 case 't': 12393 case 'j': 12394 intsize = *q++; 12395 break; 12396 } 12397 12398 /* CONVERSION */ 12399 12400 c = *q++; /* c now holds the conversion type */ 12401 12402 /* '%' doesn't have an arg, so skip arg processing */ 12403 if (c == '%') { 12404 eptr = q - 1; 12405 elen = 1; 12406 if (vectorize) 12407 goto unknown; 12408 goto string; 12409 } 12410 12411 if (vectorize && !memCHRs("BbDdiOouUXx", c)) 12412 goto unknown; 12413 12414 /* get next arg (individual branches do their own va_arg() 12415 * handling for the args case) */ 12416 12417 if (!args) { 12418 efix = efix ? efix - 1 : svix++; 12419 argsv = efix < sv_count ? svargs[efix] 12420 : (arg_missing = TRUE, &PL_sv_no); 12421 } 12422 12423 12424 switch (c) { 12425 12426 /* STRINGS */ 12427 12428 case 's': 12429 if (args) { 12430 eptr = va_arg(*args, char*); 12431 if (eptr) 12432 if (has_precis) 12433 elen = my_strnlen(eptr, precis); 12434 else 12435 elen = strlen(eptr); 12436 else { 12437 eptr = (char *)nullstr; 12438 elen = sizeof nullstr - 1; 12439 } 12440 } 12441 else { 12442 eptr = SvPV_const(argsv, elen); 12443 if (DO_UTF8(argsv)) { 12444 STRLEN old_precis = precis; 12445 if (has_precis && precis < elen) { 12446 STRLEN ulen = sv_or_pv_len_utf8(argsv, eptr, elen); 12447 STRLEN p = precis > ulen ? ulen : precis; 12448 precis = sv_or_pv_pos_u2b(argsv, eptr, p, 0); 12449 /* sticks at end */ 12450 } 12451 if (width) { /* fudge width (can't fudge elen) */ 12452 if (has_precis && precis < elen) 12453 width += precis - old_precis; 12454 else 12455 width += 12456 elen - sv_or_pv_len_utf8(argsv,eptr,elen); 12457 } 12458 is_utf8 = TRUE; 12459 } 12460 } 12461 12462 string: 12463 if (has_precis && precis < elen) 12464 elen = precis; 12465 break; 12466 12467 /* INTEGERS */ 12468 12469 case 'p': 12470 12471 /* %p extensions: 12472 * 12473 * "%...p" is normally treated like "%...x", except that the 12474 * number to print is the SV's address (or a pointer address 12475 * for C-ish sprintf). 12476 * 12477 * However, the C-ish sprintf variant allows a few special 12478 * extensions. These are currently: 12479 * 12480 * %-p (SVf) Like %s, but gets the string from an SV* 12481 * arg rather than a char* arg. 12482 * (This was previously %_). 12483 * 12484 * %-<num>p Ditto but like %.<num>s (i.e. num is max width) 12485 * 12486 * %2p (HEKf) Like %s, but using the key string in a HEK 12487 * 12488 * %3p (HEKf256) Ditto but like %.256s 12489 * 12490 * %d%lu%4p (UTF8f) A utf8 string. Consumes 3 args: 12491 * (cBOOL(utf8), len, string_buf). 12492 * It's handled by the "case 'd'" branch 12493 * rather than here. 12494 * 12495 * %<num>p where num is 1 or > 4: reserved for future 12496 * extensions. Warns, but then is treated as a 12497 * general %p (print hex address) format. 12498 */ 12499 12500 if ( args 12501 && !intsize 12502 && !fill 12503 && !plus 12504 && !has_precis 12505 /* not %*p or %*1$p - any width was explicit */ 12506 && q[-2] != '*' 12507 && q[-2] != '$' 12508 ) { 12509 if (left) { /* %-p (SVf), %-NNNp */ 12510 if (width) { 12511 precis = width; 12512 has_precis = TRUE; 12513 } 12514 argsv = MUTABLE_SV(va_arg(*args, void*)); 12515 eptr = SvPV_const(argsv, elen); 12516 if (DO_UTF8(argsv)) 12517 is_utf8 = TRUE; 12518 width = 0; 12519 goto string; 12520 } 12521 else if (width == 2 || width == 3) { /* HEKf, HEKf256 */ 12522 HEK * const hek = va_arg(*args, HEK *); 12523 eptr = HEK_KEY(hek); 12524 elen = HEK_LEN(hek); 12525 if (HEK_UTF8(hek)) 12526 is_utf8 = TRUE; 12527 if (width == 3) { 12528 precis = 256; 12529 has_precis = TRUE; 12530 } 12531 width = 0; 12532 goto string; 12533 } 12534 else if (width) { 12535 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), 12536 "internal %%<num>p might conflict with future printf extensions"); 12537 } 12538 } 12539 12540 /* treat as normal %...p */ 12541 12542 uv = PTR2UV(args ? va_arg(*args, void*) : argsv); 12543 base = 16; 12544 c = 'x'; /* in case the format string contains '#' */ 12545 goto do_integer; 12546 12547 case 'c': 12548 /* Ignore any size specifiers, since they're not documented as 12549 * being allowed for %c (ideally we should warn on e.g. '%hc'). 12550 * Setting a default intsize, along with a positive 12551 * (which signals unsigned) base, causes, for C-ish use, the 12552 * va_arg to be interpreted as an unsigned int, when it's 12553 * actually signed, which will convert -ve values to high +ve 12554 * values. Note that unlike the libc %c, values > 255 will 12555 * convert to high unicode points rather than being truncated 12556 * to 8 bits. For perlish use, it will do SvUV(argsv), which 12557 * will again convert -ve args to high -ve values. 12558 */ 12559 intsize = 0; 12560 base = 1; /* special value that indicates we're doing a 'c' */ 12561 goto get_int_arg_val; 12562 12563 case 'D': 12564 #ifdef IV_IS_QUAD 12565 intsize = 'q'; 12566 #else 12567 intsize = 'l'; 12568 #endif 12569 base = -10; 12570 goto get_int_arg_val; 12571 12572 case 'd': 12573 /* probably just a plain %d, but it might be the start of the 12574 * special UTF8f format, which usually looks something like 12575 * "%d%lu%4p" (the lu may vary by platform) 12576 */ 12577 assert((UTF8f)[0] == 'd'); 12578 assert((UTF8f)[1] == '%'); 12579 12580 if ( args /* UTF8f only valid for C-ish sprintf */ 12581 && q == fmtstart + 1 /* plain %d, not %....d */ 12582 && patend >= fmtstart + sizeof(UTF8f) - 1 /* long enough */ 12583 && *q == '%' 12584 && strnEQ(q + 1, (UTF8f) + 2, sizeof(UTF8f) - 3)) 12585 { 12586 /* The argument has already gone through cBOOL, so the cast 12587 is safe. */ 12588 is_utf8 = (bool)va_arg(*args, int); 12589 elen = va_arg(*args, UV); 12590 /* if utf8 length is larger than 0x7ffff..., then it might 12591 * have been a signed value that wrapped */ 12592 if (elen > ((~(STRLEN)0) >> 1)) { 12593 assert(0); /* in DEBUGGING build we want to crash */ 12594 elen = 0; /* otherwise we want to treat this as an empty string */ 12595 } 12596 eptr = va_arg(*args, char *); 12597 q += sizeof(UTF8f) - 2; 12598 goto string; 12599 } 12600 12601 /* FALLTHROUGH */ 12602 case 'i': 12603 base = -10; 12604 goto get_int_arg_val; 12605 12606 case 'U': 12607 #ifdef IV_IS_QUAD 12608 intsize = 'q'; 12609 #else 12610 intsize = 'l'; 12611 #endif 12612 /* FALLTHROUGH */ 12613 case 'u': 12614 base = 10; 12615 goto get_int_arg_val; 12616 12617 case 'B': 12618 case 'b': 12619 base = 2; 12620 goto get_int_arg_val; 12621 12622 case 'O': 12623 #ifdef IV_IS_QUAD 12624 intsize = 'q'; 12625 #else 12626 intsize = 'l'; 12627 #endif 12628 /* FALLTHROUGH */ 12629 case 'o': 12630 base = 8; 12631 goto get_int_arg_val; 12632 12633 case 'X': 12634 case 'x': 12635 base = 16; 12636 12637 get_int_arg_val: 12638 12639 if (vectorize) { 12640 STRLEN ulen; 12641 SV *vecsv; 12642 12643 if (base < 0) { 12644 base = -base; 12645 if (plus) 12646 esignbuf[esignlen++] = plus; 12647 } 12648 12649 /* initialise the vector string to iterate over */ 12650 12651 vecsv = args ? va_arg(*args, SV*) : argsv; 12652 12653 /* if this is a version object, we need to convert 12654 * back into v-string notation and then let the 12655 * vectorize happen normally 12656 */ 12657 if (sv_isobject(vecsv) && sv_derived_from(vecsv, "version")) { 12658 if ( hv_existss(MUTABLE_HV(SvRV(vecsv)), "alpha") ) { 12659 Perl_ck_warner_d(aTHX_ packWARN(WARN_PRINTF), 12660 "vector argument not supported with alpha versions"); 12661 vecsv = &PL_sv_no; 12662 } 12663 else { 12664 vecstr = (U8*)SvPV_const(vecsv,veclen); 12665 vecsv = sv_newmortal(); 12666 scan_vstring((char *)vecstr, (char *)vecstr + veclen, 12667 vecsv); 12668 } 12669 } 12670 vecstr = (U8*)SvPV_const(vecsv, veclen); 12671 vec_utf8 = DO_UTF8(vecsv); 12672 12673 /* This is the re-entry point for when we're iterating 12674 * over the individual characters of a vector arg */ 12675 vector: 12676 if (!veclen) 12677 goto done_valid_conversion; 12678 if (vec_utf8) 12679 uv = utf8n_to_uvchr(vecstr, veclen, &ulen, 12680 UTF8_ALLOW_ANYUV); 12681 else { 12682 uv = *vecstr; 12683 ulen = 1; 12684 } 12685 vecstr += ulen; 12686 veclen -= ulen; 12687 } 12688 else { 12689 /* test arg for inf/nan. This can trigger an unwanted 12690 * 'str' overload, so manually force 'num' overload first 12691 * if necessary */ 12692 if (argsv) { 12693 SvGETMAGIC(argsv); 12694 if (UNLIKELY(SvAMAGIC(argsv))) 12695 argsv = sv_2num(argsv); 12696 if (UNLIKELY(isinfnansv(argsv))) 12697 goto handle_infnan_argsv; 12698 } 12699 12700 if (base < 0) { 12701 /* signed int type */ 12702 IV iv; 12703 base = -base; 12704 if (args) { 12705 switch (intsize) { 12706 case 'c': iv = (char)va_arg(*args, int); break; 12707 case 'h': iv = (short)va_arg(*args, int); break; 12708 case 'l': iv = va_arg(*args, long); break; 12709 case 'V': iv = va_arg(*args, IV); break; 12710 case 'z': iv = va_arg(*args, SSize_t); break; 12711 #ifdef HAS_PTRDIFF_T 12712 case 't': iv = va_arg(*args, ptrdiff_t); break; 12713 #endif 12714 default: iv = va_arg(*args, int); break; 12715 case 'j': iv = (IV) va_arg(*args, PERL_INTMAX_T); break; 12716 case 'q': 12717 #if IVSIZE >= 8 12718 iv = va_arg(*args, Quad_t); break; 12719 #else 12720 goto unknown; 12721 #endif 12722 } 12723 } 12724 else { 12725 /* assign to tiv then cast to iv to work around 12726 * 2003 GCC cast bug (gnu.org bugzilla #13488) */ 12727 IV tiv = SvIV_nomg(argsv); 12728 switch (intsize) { 12729 case 'c': iv = (char)tiv; break; 12730 case 'h': iv = (short)tiv; break; 12731 case 'l': iv = (long)tiv; break; 12732 case 'V': 12733 default: iv = tiv; break; 12734 case 'q': 12735 #if IVSIZE >= 8 12736 iv = (Quad_t)tiv; break; 12737 #else 12738 goto unknown; 12739 #endif 12740 } 12741 } 12742 12743 /* now convert iv to uv */ 12744 if (iv >= 0) { 12745 uv = iv; 12746 if (plus) 12747 esignbuf[esignlen++] = plus; 12748 } 12749 else { 12750 /* Using 0- here to silence bogus warning from MS VC */ 12751 uv = (UV) (0 - (UV) iv); 12752 esignbuf[esignlen++] = '-'; 12753 } 12754 } 12755 else { 12756 /* unsigned int type */ 12757 if (args) { 12758 switch (intsize) { 12759 case 'c': uv = (unsigned char)va_arg(*args, unsigned); 12760 break; 12761 case 'h': uv = (unsigned short)va_arg(*args, unsigned); 12762 break; 12763 case 'l': uv = va_arg(*args, unsigned long); break; 12764 case 'V': uv = va_arg(*args, UV); break; 12765 case 'z': uv = va_arg(*args, Size_t); break; 12766 #ifdef HAS_PTRDIFF_T 12767 /* will sign extend, but there is no 12768 * uptrdiff_t, so oh well */ 12769 case 't': uv = va_arg(*args, ptrdiff_t); break; 12770 #endif 12771 case 'j': uv = (UV) va_arg(*args, PERL_UINTMAX_T); break; 12772 default: uv = va_arg(*args, unsigned); break; 12773 case 'q': 12774 #if IVSIZE >= 8 12775 uv = va_arg(*args, Uquad_t); break; 12776 #else 12777 goto unknown; 12778 #endif 12779 } 12780 } 12781 else { 12782 /* assign to tiv then cast to iv to work around 12783 * 2003 GCC cast bug (gnu.org bugzilla #13488) */ 12784 UV tuv = SvUV_nomg(argsv); 12785 switch (intsize) { 12786 case 'c': uv = (unsigned char)tuv; break; 12787 case 'h': uv = (unsigned short)tuv; break; 12788 case 'l': uv = (unsigned long)tuv; break; 12789 case 'V': 12790 default: uv = tuv; break; 12791 case 'q': 12792 #if IVSIZE >= 8 12793 uv = (Uquad_t)tuv; break; 12794 #else 12795 goto unknown; 12796 #endif 12797 } 12798 } 12799 } 12800 } 12801 12802 do_integer: 12803 { 12804 char *ptr = ebuf + sizeof ebuf; 12805 unsigned dig; 12806 zeros = 0; 12807 12808 switch (base) { 12809 case 16: 12810 { 12811 const char * const p = 12812 (c == 'X') ? PL_hexdigit + 16 : PL_hexdigit; 12813 12814 do { 12815 dig = uv & 15; 12816 *--ptr = p[dig]; 12817 } while (uv >>= 4); 12818 if (alt && *ptr != '0') { 12819 esignbuf[esignlen++] = '0'; 12820 esignbuf[esignlen++] = c; /* 'x' or 'X' */ 12821 } 12822 break; 12823 } 12824 case 8: 12825 do { 12826 dig = uv & 7; 12827 *--ptr = '0' + dig; 12828 } while (uv >>= 3); 12829 if (alt && *ptr != '0') 12830 *--ptr = '0'; 12831 break; 12832 case 2: 12833 do { 12834 dig = uv & 1; 12835 *--ptr = '0' + dig; 12836 } while (uv >>= 1); 12837 if (alt && *ptr != '0') { 12838 esignbuf[esignlen++] = '0'; 12839 esignbuf[esignlen++] = c; /* 'b' or 'B' */ 12840 } 12841 break; 12842 12843 case 1: 12844 /* special-case: base 1 indicates a 'c' format: 12845 * we use the common code for extracting a uv, 12846 * but handle that value differently here than 12847 * all the other int types */ 12848 if ((uv > 255 || 12849 (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv))) 12850 && !IN_BYTES) 12851 { 12852 STATIC_ASSERT_STMT(sizeof(ebuf) >= UTF8_MAXBYTES + 1); 12853 eptr = ebuf; 12854 elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf; 12855 is_utf8 = TRUE; 12856 } 12857 else { 12858 eptr = ebuf; 12859 ebuf[0] = (char)uv; 12860 elen = 1; 12861 } 12862 goto string; 12863 12864 default: /* it had better be ten or less */ 12865 do { 12866 dig = uv % base; 12867 *--ptr = '0' + dig; 12868 } while (uv /= base); 12869 break; 12870 } 12871 elen = (ebuf + sizeof ebuf) - ptr; 12872 eptr = ptr; 12873 if (has_precis) { 12874 if (precis > elen) 12875 zeros = precis - elen; 12876 else if (precis == 0 && elen == 1 && *eptr == '0' 12877 && !(base == 8 && alt)) /* "%#.0o" prints "0" */ 12878 elen = 0; 12879 12880 /* a precision nullifies the 0 flag. */ 12881 fill = FALSE; 12882 } 12883 } 12884 break; 12885 12886 /* FLOATING POINT */ 12887 12888 case 'F': 12889 c = 'f'; /* maybe %F isn't supported here */ 12890 /* FALLTHROUGH */ 12891 case 'e': case 'E': 12892 case 'f': 12893 case 'g': case 'G': 12894 case 'a': case 'A': 12895 12896 { 12897 STRLEN float_need; /* what PL_efloatsize needs to become */ 12898 bool hexfp; /* hexadecimal floating point? */ 12899 12900 vcatpvfn_long_double_t fv; 12901 NV nv; 12902 12903 /* This is evil, but floating point is even more evil */ 12904 12905 /* for SV-style calling, we can only get NV 12906 for C-style calling, we assume %f is double; 12907 for simplicity we allow any of %Lf, %llf, %qf for long double 12908 */ 12909 switch (intsize) { 12910 #if defined(USE_QUADMATH) 12911 case 'Q': 12912 break; 12913 #endif 12914 case 'V': 12915 #if defined(USE_LONG_DOUBLE) || defined(USE_QUADMATH) 12916 intsize = 'q'; 12917 #endif 12918 break; 12919 /* [perl #20339] - we should accept and ignore %lf rather than die */ 12920 case 'l': 12921 /* FALLTHROUGH */ 12922 default: 12923 #if defined(USE_LONG_DOUBLE) || defined(USE_QUADMATH) 12924 intsize = args ? 0 : 'q'; 12925 #endif 12926 break; 12927 case 'q': 12928 #if defined(HAS_LONG_DOUBLE) 12929 break; 12930 #else 12931 /* FALLTHROUGH */ 12932 #endif 12933 case 'c': 12934 case 'h': 12935 case 'z': 12936 case 't': 12937 case 'j': 12938 goto unknown; 12939 } 12940 12941 /* Now we need (long double) if intsize == 'q', else (double). */ 12942 if (args) { 12943 /* Note: do not pull NVs off the va_list with va_arg() 12944 * (pull doubles instead) because if you have a build 12945 * with long doubles, you would always be pulling long 12946 * doubles, which would badly break anyone using only 12947 * doubles (i.e. the majority of builds). In other 12948 * words, you cannot mix doubles and long doubles. 12949 * The only case where you can pull off long doubles 12950 * is when the format specifier explicitly asks so with 12951 * e.g. "%Lg". */ 12952 #ifdef USE_QUADMATH 12953 nv = intsize == 'Q' ? va_arg(*args, NV) : 12954 intsize == 'q' ? va_arg(*args, long double) : 12955 va_arg(*args, double); 12956 fv = nv; 12957 #elif LONG_DOUBLESIZE > DOUBLESIZE 12958 if (intsize == 'q') { 12959 fv = va_arg(*args, long double); 12960 nv = fv; 12961 } else { 12962 nv = va_arg(*args, double); 12963 VCATPVFN_NV_TO_FV(nv, fv); 12964 } 12965 #else 12966 nv = va_arg(*args, double); 12967 fv = nv; 12968 #endif 12969 } 12970 else 12971 { 12972 SvGETMAGIC(argsv); 12973 /* we jump here if an int-ish format encountered an 12974 * infinite/Nan argsv. After setting nv/fv, it falls 12975 * into the isinfnan block which follows */ 12976 handle_infnan_argsv: 12977 nv = SvNV_nomg(argsv); 12978 VCATPVFN_NV_TO_FV(nv, fv); 12979 } 12980 12981 if (Perl_isinfnan(nv)) { 12982 if (c == 'c') 12983 Perl_croak(aTHX_ "Cannot printf %" NVgf " with '%c'", 12984 nv, (int)c); 12985 12986 elen = S_infnan_2pv(nv, ebuf, sizeof(ebuf), plus); 12987 assert(elen); 12988 eptr = ebuf; 12989 zeros = 0; 12990 esignlen = 0; 12991 dotstrlen = 0; 12992 break; 12993 } 12994 12995 /* special-case "%.0f" */ 12996 if ( c == 'f' 12997 && !precis 12998 && has_precis 12999 && !(width || left || plus || alt) 13000 && !fill 13001 && intsize != 'q' 13002 && ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen))) 13003 ) 13004 goto float_concat; 13005 13006 /* Determine the buffer size needed for the various 13007 * floating-point formats. 13008 * 13009 * The basic possibilities are: 13010 * 13011 * <---P---> 13012 * %f 1111111.123456789 13013 * %e 1.111111123e+06 13014 * %a 0x1.0f4471f9bp+20 13015 * %g 1111111.12 13016 * %g 1.11111112e+15 13017 * 13018 * where P is the value of the precision in the format, or 6 13019 * if not specified. Note the two possible output formats of 13020 * %g; in both cases the number of significant digits is <= 13021 * precision. 13022 * 13023 * For most of the format types the maximum buffer size needed 13024 * is precision, plus: any leading 1 or 0x1, the radix 13025 * point, and an exponent. The difficult one is %f: for a 13026 * large positive exponent it can have many leading digits, 13027 * which needs to be calculated specially. Also %a is slightly 13028 * different in that in the absence of a specified precision, 13029 * it uses as many digits as necessary to distinguish 13030 * different values. 13031 * 13032 * First, here are the constant bits. For ease of calculation 13033 * we over-estimate the needed buffer size, for example by 13034 * assuming all formats have an exponent and a leading 0x1. 13035 * 13036 * Also for production use, add a little extra overhead for 13037 * safety's sake. Under debugging don't, as it means we're 13038 * more likely to quickly spot issues during development. 13039 */ 13040 13041 float_need = 1 /* possible unary minus */ 13042 + 4 /* "0x1" plus very unlikely carry */ 13043 + 1 /* default radix point '.' */ 13044 + 2 /* "e-", "p+" etc */ 13045 + 6 /* exponent: up to 16383 (quad fp) */ 13046 #ifndef DEBUGGING 13047 + 20 /* safety net */ 13048 #endif 13049 + 1; /* \0 */ 13050 13051 13052 /* determine the radix point len, e.g. length(".") in "1.2" */ 13053 #ifdef USE_LOCALE_NUMERIC 13054 /* note that we may either explicitly use PL_numeric_radix_sv 13055 * below, or implicitly, via an snprintf() variant. 13056 * Note also things like ps_AF.utf8 which has 13057 * "\N{ARABIC DECIMAL SEPARATOR} as a radix point */ 13058 if (! have_in_lc_numeric) { 13059 in_lc_numeric = IN_LC(LC_NUMERIC); 13060 have_in_lc_numeric = TRUE; 13061 } 13062 13063 if (in_lc_numeric) { 13064 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(TRUE, { 13065 /* this can't wrap unless PL_numeric_radix_sv is a string 13066 * consuming virtually all the 32-bit or 64-bit address 13067 * space 13068 */ 13069 float_need += (SvCUR(PL_numeric_radix_sv) - 1); 13070 13071 /* floating-point formats only get utf8 if the radix point 13072 * is utf8. All other characters in the string are < 128 13073 * and so can be safely appended to both a non-utf8 and utf8 13074 * string as-is. 13075 * Note that this will convert the output to utf8 even if 13076 * the radix point didn't get output. 13077 */ 13078 if (SvUTF8(PL_numeric_radix_sv) && !has_utf8) { 13079 sv_utf8_upgrade(sv); 13080 has_utf8 = TRUE; 13081 } 13082 }); 13083 } 13084 #endif 13085 13086 hexfp = FALSE; 13087 13088 if (isALPHA_FOLD_EQ(c, 'f')) { 13089 /* Determine how many digits before the radix point 13090 * might be emitted. frexp() (or frexpl) has some 13091 * unspecified behaviour for nan/inf/-inf, so lucky we've 13092 * already handled them above */ 13093 STRLEN digits; 13094 int i = PERL_INT_MIN; 13095 (void)Perl_frexp((NV)fv, &i); 13096 if (i == PERL_INT_MIN) 13097 Perl_die(aTHX_ "panic: frexp: %" VCATPVFN_FV_GF, fv); 13098 13099 if (i > 0) { 13100 digits = BIT_DIGITS(i); 13101 /* this can't overflow. 'digits' will only be a few 13102 * thousand even for the largest floating-point types. 13103 * And up until now float_need is just some small 13104 * constants plus radix len, which can't be in 13105 * overflow territory unless the radix SV is consuming 13106 * over 1/2 the address space */ 13107 assert(float_need < ((STRLEN)~0) - digits); 13108 float_need += digits; 13109 } 13110 } 13111 else if (UNLIKELY(isALPHA_FOLD_EQ(c, 'a'))) { 13112 hexfp = TRUE; 13113 if (!has_precis) { 13114 /* %a in the absence of precision may print as many 13115 * digits as needed to represent the entire mantissa 13116 * bit pattern. 13117 * This estimate seriously overshoots in most cases, 13118 * but better the undershooting. Firstly, all bytes 13119 * of the NV are not mantissa, some of them are 13120 * exponent. Secondly, for the reasonably common 13121 * long doubles case, the "80-bit extended", two 13122 * or six bytes of the NV are unused. Also, we'll 13123 * still pick up an extra +6 from the default 13124 * precision calculation below. */ 13125 STRLEN digits = 13126 #ifdef LONGDOUBLE_DOUBLEDOUBLE 13127 /* For the "double double", we need more. 13128 * Since each double has their own exponent, the 13129 * doubles may float (haha) rather far from each 13130 * other, and the number of required bits is much 13131 * larger, up to total of DOUBLEDOUBLE_MAXBITS bits. 13132 * See the definition of DOUBLEDOUBLE_MAXBITS. 13133 * 13134 * Need 2 hexdigits for each byte. */ 13135 (DOUBLEDOUBLE_MAXBITS/8 + 1) * 2; 13136 #else 13137 NVSIZE * 2; /* 2 hexdigits for each byte */ 13138 #endif 13139 /* see "this can't overflow" comment above */ 13140 assert(float_need < ((STRLEN)~0) - digits); 13141 float_need += digits; 13142 } 13143 } 13144 /* special-case "%.<number>g" if it will fit in ebuf */ 13145 else if (c == 'g' 13146 && precis /* See earlier comment about buggy Gconvert 13147 when digits, aka precis, is 0 */ 13148 && has_precis 13149 /* check that "%.<number>g" formatting will fit in ebuf */ 13150 && sizeof(ebuf) - float_need > precis 13151 /* sizeof(ebuf) - float_need will have wrapped if float_need > sizeof(ebuf). * 13152 * Therefore we should check that float_need < sizeof(ebuf). Normally, we would * 13153 * have run this check first, but that triggers incorrect -Wformat-overflow * 13154 * compilation warnings with some versions of gcc if Gconvert invokes sprintf(). * 13155 * ( See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89161 ) * 13156 * So, instead, we check it next: */ 13157 && float_need < sizeof(ebuf) 13158 && !(width || left || plus || alt) 13159 && !fill 13160 && intsize != 'q' 13161 ) { 13162 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric, 13163 SNPRINTF_G(fv, ebuf, sizeof(ebuf), precis) 13164 ); 13165 elen = strlen(ebuf); 13166 eptr = ebuf; 13167 goto float_concat; 13168 } 13169 13170 13171 { 13172 STRLEN pr = has_precis ? precis : 6; /* known default */ 13173 /* this probably can't wrap, since precis is limited 13174 * to 1/4 address space size, but better safe than sorry 13175 */ 13176 if (float_need >= ((STRLEN)~0) - pr) 13177 croak_memory_wrap(); 13178 float_need += pr; 13179 } 13180 13181 if (float_need < width) 13182 float_need = width; 13183 13184 if (float_need > INT_MAX) { 13185 /* snprintf() returns an int, and we use that return value, 13186 so die horribly if the expected size is too large for int 13187 */ 13188 Perl_croak(aTHX_ "Numeric format result too large"); 13189 } 13190 13191 if (PL_efloatsize <= float_need) { 13192 /* PL_efloatbuf should be at least 1 greater than 13193 * float_need to allow a trailing \0 to be returned by 13194 * snprintf(). If we need to grow, overgrow for the 13195 * benefit of future generations */ 13196 const STRLEN extra = 0x20; 13197 if (float_need >= ((STRLEN)~0) - extra) 13198 croak_memory_wrap(); 13199 float_need += extra; 13200 Safefree(PL_efloatbuf); 13201 PL_efloatsize = float_need; 13202 Newx(PL_efloatbuf, PL_efloatsize, char); 13203 PL_efloatbuf[0] = '\0'; 13204 } 13205 13206 if (UNLIKELY(hexfp)) { 13207 elen = S_format_hexfp(aTHX_ PL_efloatbuf, PL_efloatsize, c, 13208 nv, fv, has_precis, precis, width, 13209 alt, plus, left, fill, in_lc_numeric); 13210 } 13211 else { 13212 char *ptr = ebuf + sizeof ebuf; 13213 *--ptr = '\0'; 13214 *--ptr = c; 13215 #if defined(USE_QUADMATH) 13216 /* always use Q here. my_snprint() throws an exception if we 13217 fallthrough to the double/long double code, even when the 13218 format is correct, presumably to avoid any accidentally 13219 missing Q. 13220 */ 13221 *--ptr = 'Q'; 13222 /* FIXME: what to do if HAS_LONG_DOUBLE but not PERL_PRIfldbl? */ 13223 #elif defined(HAS_LONG_DOUBLE) && defined(PERL_PRIfldbl) 13224 /* Note that this is HAS_LONG_DOUBLE and PERL_PRIfldbl, 13225 * not USE_LONG_DOUBLE and NVff. In other words, 13226 * this needs to work without USE_LONG_DOUBLE. */ 13227 if (intsize == 'q') { 13228 /* Copy the one or more characters in a long double 13229 * format before the 'base' ([efgEFG]) character to 13230 * the format string. */ 13231 static char const ldblf[] = PERL_PRIfldbl; 13232 char const *p = ldblf + sizeof(ldblf) - 3; 13233 while (p >= ldblf) { *--ptr = *p--; } 13234 } 13235 #endif 13236 if (has_precis) { 13237 base = precis; 13238 do { *--ptr = '0' + (base % 10); } while (base /= 10); 13239 *--ptr = '.'; 13240 } 13241 if (width) { 13242 base = width; 13243 do { *--ptr = '0' + (base % 10); } while (base /= 10); 13244 } 13245 if (fill) 13246 *--ptr = '0'; 13247 if (left) 13248 *--ptr = '-'; 13249 if (plus) 13250 *--ptr = plus; 13251 if (alt) 13252 *--ptr = '#'; 13253 *--ptr = '%'; 13254 13255 /* No taint. Otherwise we are in the strange situation 13256 * where printf() taints but print($float) doesn't. 13257 * --jhi */ 13258 13259 /* hopefully the above makes ptr a very constrained format 13260 * that is safe to use, even though it's not literal */ 13261 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); 13262 #ifdef USE_QUADMATH 13263 { 13264 if (!quadmath_format_valid(ptr)) 13265 Perl_croak_nocontext("panic: quadmath invalid format \"%s\"", ptr); 13266 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric, 13267 elen = quadmath_snprintf(PL_efloatbuf, PL_efloatsize, 13268 ptr, nv); 13269 ); 13270 if ((IV)elen == -1) { 13271 Perl_croak_nocontext("panic: quadmath_snprintf failed, format \"%s\"", ptr); 13272 } 13273 } 13274 #elif defined(HAS_LONG_DOUBLE) 13275 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric, 13276 elen = ((intsize == 'q') 13277 ? my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, fv) 13278 : my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, (double)fv)) 13279 ); 13280 #else 13281 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric, 13282 elen = my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, fv) 13283 ); 13284 #endif 13285 GCC_DIAG_RESTORE_STMT; 13286 } 13287 13288 eptr = PL_efloatbuf; 13289 13290 float_concat: 13291 13292 /* Since floating-point formats do their own formatting and 13293 * padding, we skip the main block of code at the end of this 13294 * loop which handles appending eptr to sv, and do our own 13295 * stripped-down version */ 13296 13297 assert(!zeros); 13298 assert(!esignlen); 13299 assert(elen); 13300 assert(elen >= width); 13301 13302 S_sv_catpvn_simple(aTHX_ sv, eptr, elen); 13303 13304 goto done_valid_conversion; 13305 } 13306 13307 /* SPECIAL */ 13308 13309 case 'n': 13310 { 13311 STRLEN len; 13312 /* XXX ideally we should warn if any flags etc have been 13313 * set, e.g. "%-4.5n" */ 13314 /* XXX if sv was originally non-utf8 with a char in the 13315 * range 0x80-0xff, then if it got upgraded, we should 13316 * calculate char len rather than byte len here */ 13317 len = SvCUR(sv) - origlen; 13318 if (args) { 13319 int i = (len > PERL_INT_MAX) ? PERL_INT_MAX : (int)len; 13320 13321 switch (intsize) { 13322 case 'c': *(va_arg(*args, char*)) = i; break; 13323 case 'h': *(va_arg(*args, short*)) = i; break; 13324 default: *(va_arg(*args, int*)) = i; break; 13325 case 'l': *(va_arg(*args, long*)) = i; break; 13326 case 'V': *(va_arg(*args, IV*)) = i; break; 13327 case 'z': *(va_arg(*args, SSize_t*)) = i; break; 13328 #ifdef HAS_PTRDIFF_T 13329 case 't': *(va_arg(*args, ptrdiff_t*)) = i; break; 13330 #endif 13331 case 'j': *(va_arg(*args, PERL_INTMAX_T*)) = i; break; 13332 case 'q': 13333 #if IVSIZE >= 8 13334 *(va_arg(*args, Quad_t*)) = i; break; 13335 #else 13336 goto unknown; 13337 #endif 13338 } 13339 } 13340 else { 13341 if (arg_missing) 13342 Perl_croak_nocontext( 13343 "Missing argument for %%n in %s", 13344 PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); 13345 sv_setuv_mg(argsv, has_utf8 13346 ? (UV)utf8_length((U8*)SvPVX(sv), (U8*)SvEND(sv)) 13347 : (UV)len); 13348 } 13349 goto done_valid_conversion; 13350 } 13351 13352 /* UNKNOWN */ 13353 13354 default: 13355 unknown: 13356 if (!args 13357 && (PL_op->op_type == OP_PRTF || PL_op->op_type == OP_SPRINTF) 13358 && ckWARN(WARN_PRINTF)) 13359 { 13360 SV * const msg = sv_newmortal(); 13361 Perl_sv_setpvf(aTHX_ msg, "Invalid conversion in %sprintf: ", 13362 (PL_op->op_type == OP_PRTF) ? "" : "s"); 13363 if (fmtstart < patend) { 13364 const char * const fmtend = q < patend ? q : patend; 13365 const char * f; 13366 sv_catpvs(msg, "\"%"); 13367 for (f = fmtstart; f < fmtend; f++) { 13368 if (isPRINT(*f)) { 13369 sv_catpvn_nomg(msg, f, 1); 13370 } else { 13371 Perl_sv_catpvf(aTHX_ msg, "\\%03o", (U8) *f); 13372 } 13373 } 13374 sv_catpvs(msg, "\""); 13375 } else { 13376 sv_catpvs(msg, "end of string"); 13377 } 13378 Perl_warner(aTHX_ packWARN(WARN_PRINTF), "%" SVf, SVfARG(msg)); /* yes, this is reentrant */ 13379 } 13380 13381 /* mangled format: output the '%', then continue from the 13382 * character following that */ 13383 sv_catpvn_nomg(sv, fmtstart-1, 1); 13384 q = fmtstart; 13385 svix = osvix; 13386 /* Any "redundant arg" warning from now onwards will probably 13387 * just be misleading, so don't bother. */ 13388 no_redundant_warning = TRUE; 13389 continue; /* not "break" */ 13390 } 13391 13392 if (is_utf8 != has_utf8) { 13393 if (is_utf8) { 13394 if (SvCUR(sv)) 13395 sv_utf8_upgrade(sv); 13396 } 13397 else { 13398 const STRLEN old_elen = elen; 13399 SV * const nsv = newSVpvn_flags(eptr, elen, SVs_TEMP); 13400 sv_utf8_upgrade(nsv); 13401 eptr = SvPVX_const(nsv); 13402 elen = SvCUR(nsv); 13403 13404 if (width) { /* fudge width (can't fudge elen) */ 13405 width += elen - old_elen; 13406 } 13407 is_utf8 = TRUE; 13408 } 13409 } 13410 13411 13412 /* append esignbuf, filler, zeros, eptr and dotstr to sv */ 13413 13414 { 13415 STRLEN need, have, gap; 13416 STRLEN i; 13417 char *s; 13418 13419 /* signed value that's wrapped? */ 13420 assert(elen <= ((~(STRLEN)0) >> 1)); 13421 13422 /* if zeros is non-zero, then it represents filler between 13423 * elen and precis. So adding elen and zeros together will 13424 * always be <= precis, and the addition can never wrap */ 13425 assert(!zeros || (precis > elen && precis - elen == zeros)); 13426 have = elen + zeros; 13427 13428 if (have >= (((STRLEN)~0) - esignlen)) 13429 croak_memory_wrap(); 13430 have += esignlen; 13431 13432 need = (have > width ? have : width); 13433 gap = need - have; 13434 13435 if (need >= (((STRLEN)~0) - (SvCUR(sv) + 1))) 13436 croak_memory_wrap(); 13437 need += (SvCUR(sv) + 1); 13438 13439 SvGROW(sv, need); 13440 13441 s = SvEND(sv); 13442 13443 if (left) { 13444 for (i = 0; i < esignlen; i++) 13445 *s++ = esignbuf[i]; 13446 for (i = zeros; i; i--) 13447 *s++ = '0'; 13448 Copy(eptr, s, elen, char); 13449 s += elen; 13450 for (i = gap; i; i--) 13451 *s++ = ' '; 13452 } 13453 else { 13454 if (fill) { 13455 for (i = 0; i < esignlen; i++) 13456 *s++ = esignbuf[i]; 13457 assert(!zeros); 13458 zeros = gap; 13459 } 13460 else { 13461 for (i = gap; i; i--) 13462 *s++ = ' '; 13463 for (i = 0; i < esignlen; i++) 13464 *s++ = esignbuf[i]; 13465 } 13466 13467 for (i = zeros; i; i--) 13468 *s++ = '0'; 13469 Copy(eptr, s, elen, char); 13470 s += elen; 13471 } 13472 13473 *s = '\0'; 13474 SvCUR_set(sv, s - SvPVX_const(sv)); 13475 13476 if (is_utf8) 13477 has_utf8 = TRUE; 13478 if (has_utf8) 13479 SvUTF8_on(sv); 13480 } 13481 13482 if (vectorize && veclen) { 13483 /* we append the vector separator separately since %v isn't 13484 * very common: don't slow down the general case by adding 13485 * dotstrlen to need etc */ 13486 sv_catpvn_nomg(sv, dotstr, dotstrlen); 13487 esignlen = 0; 13488 goto vector; /* do next iteration */ 13489 } 13490 13491 done_valid_conversion: 13492 13493 if (arg_missing) 13494 S_warn_vcatpvfn_missing_argument(aTHX); 13495 } 13496 13497 /* Now that we've consumed all our printf format arguments (svix) 13498 * do we have things left on the stack that we didn't use? 13499 */ 13500 if (!no_redundant_warning && sv_count >= svix + 1 && ckWARN(WARN_REDUNDANT)) { 13501 Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s", 13502 PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); 13503 } 13504 13505 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { 13506 /* while we shouldn't set the cache, it may have been previously 13507 set in the caller, so clear it */ 13508 MAGIC *mg = mg_find(sv, PERL_MAGIC_utf8); 13509 if (mg) 13510 magic_setutf8(sv,mg); /* clear UTF8 cache */ 13511 } 13512 SvTAINT(sv); 13513 } 13514 13515 /* ========================================================================= 13516 13517 =for apidoc_section $embedding 13518 13519 =cut 13520 13521 All the macros and functions in this section are for the private use of 13522 the main function, perl_clone(). 13523 13524 The foo_dup() functions make an exact copy of an existing foo thingy. 13525 During the course of a cloning, a hash table is used to map old addresses 13526 to new addresses. The table is created and manipulated with the 13527 ptr_table_* functions. 13528 13529 * =========================================================================*/ 13530 13531 13532 #if defined(USE_ITHREADS) 13533 13534 /* XXX Remove this so it doesn't have to go thru the macro and return for nothing */ 13535 #ifndef GpREFCNT_inc 13536 # define GpREFCNT_inc(gp) ((gp) ? (++(gp)->gp_refcnt, (gp)) : (GP*)NULL) 13537 #endif 13538 13539 13540 /* Certain cases in Perl_ss_dup have been merged, by relying on the fact 13541 that currently av_dup, gv_dup and hv_dup are the same as sv_dup. 13542 If this changes, please unmerge ss_dup. 13543 Likewise, sv_dup_inc_multiple() relies on this fact. */ 13544 #define sv_dup_inc_NN(s,t) SvREFCNT_inc_NN(sv_dup_inc(s,t)) 13545 #define av_dup(s,t) MUTABLE_AV(sv_dup((const SV *)s,t)) 13546 #define av_dup_inc(s,t) MUTABLE_AV(sv_dup_inc((const SV *)s,t)) 13547 #define hv_dup(s,t) MUTABLE_HV(sv_dup((const SV *)s,t)) 13548 #define hv_dup_inc(s,t) MUTABLE_HV(sv_dup_inc((const SV *)s,t)) 13549 #define cv_dup(s,t) MUTABLE_CV(sv_dup((const SV *)s,t)) 13550 #define cv_dup_inc(s,t) MUTABLE_CV(sv_dup_inc((const SV *)s,t)) 13551 #define io_dup(s,t) MUTABLE_IO(sv_dup((const SV *)s,t)) 13552 #define io_dup_inc(s,t) MUTABLE_IO(sv_dup_inc((const SV *)s,t)) 13553 #define gv_dup(s,t) MUTABLE_GV(sv_dup((const SV *)s,t)) 13554 #define gv_dup_inc(s,t) MUTABLE_GV(sv_dup_inc((const SV *)s,t)) 13555 #define SAVEPV(p) ((p) ? savepv(p) : NULL) 13556 #define SAVEPVN(p,n) ((p) ? savepvn(p,n) : NULL) 13557 13558 /* clone a parser */ 13559 13560 yy_parser * 13561 Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param) 13562 { 13563 yy_parser *parser; 13564 13565 PERL_ARGS_ASSERT_PARSER_DUP; 13566 13567 if (!proto) 13568 return NULL; 13569 13570 /* look for it in the table first */ 13571 parser = (yy_parser *)ptr_table_fetch(PL_ptr_table, proto); 13572 if (parser) 13573 return parser; 13574 13575 /* create anew and remember what it is */ 13576 Newxz(parser, 1, yy_parser); 13577 ptr_table_store(PL_ptr_table, proto, parser); 13578 13579 /* XXX eventually, just Copy() most of the parser struct ? */ 13580 13581 parser->lex_brackets = proto->lex_brackets; 13582 parser->lex_casemods = proto->lex_casemods; 13583 parser->lex_brackstack = savepvn(proto->lex_brackstack, 13584 (proto->lex_brackets < 120 ? 120 : proto->lex_brackets)); 13585 parser->lex_casestack = savepvn(proto->lex_casestack, 13586 (proto->lex_casemods < 12 ? 12 : proto->lex_casemods)); 13587 parser->lex_defer = proto->lex_defer; 13588 parser->lex_dojoin = proto->lex_dojoin; 13589 parser->lex_formbrack = proto->lex_formbrack; 13590 parser->lex_inpat = proto->lex_inpat; 13591 parser->lex_inwhat = proto->lex_inwhat; 13592 parser->lex_op = proto->lex_op; 13593 parser->lex_repl = sv_dup_inc(proto->lex_repl, param); 13594 parser->lex_starts = proto->lex_starts; 13595 parser->lex_stuff = sv_dup_inc(proto->lex_stuff, param); 13596 parser->multi_close = proto->multi_close; 13597 parser->multi_open = proto->multi_open; 13598 parser->multi_start = proto->multi_start; 13599 parser->multi_end = proto->multi_end; 13600 parser->preambled = proto->preambled; 13601 parser->lex_super_state = proto->lex_super_state; 13602 parser->lex_sub_inwhat = proto->lex_sub_inwhat; 13603 parser->lex_sub_op = proto->lex_sub_op; 13604 parser->lex_sub_repl= sv_dup_inc(proto->lex_sub_repl, param); 13605 parser->linestr = sv_dup_inc(proto->linestr, param); 13606 parser->expect = proto->expect; 13607 parser->copline = proto->copline; 13608 parser->last_lop_op = proto->last_lop_op; 13609 parser->lex_state = proto->lex_state; 13610 parser->rsfp = fp_dup(proto->rsfp, '<', param); 13611 /* rsfp_filters entries have fake IoDIRP() */ 13612 parser->rsfp_filters= av_dup_inc(proto->rsfp_filters, param); 13613 parser->in_my = proto->in_my; 13614 parser->in_my_stash = hv_dup(proto->in_my_stash, param); 13615 parser->error_count = proto->error_count; 13616 parser->sig_elems = proto->sig_elems; 13617 parser->sig_optelems= proto->sig_optelems; 13618 parser->sig_slurpy = proto->sig_slurpy; 13619 parser->recheck_utf8_validity = proto->recheck_utf8_validity; 13620 13621 { 13622 char * const ols = SvPVX(proto->linestr); 13623 char * const ls = SvPVX(parser->linestr); 13624 13625 parser->bufptr = ls + (proto->bufptr >= ols ? 13626 proto->bufptr - ols : 0); 13627 parser->oldbufptr = ls + (proto->oldbufptr >= ols ? 13628 proto->oldbufptr - ols : 0); 13629 parser->oldoldbufptr= ls + (proto->oldoldbufptr >= ols ? 13630 proto->oldoldbufptr - ols : 0); 13631 parser->linestart = ls + (proto->linestart >= ols ? 13632 proto->linestart - ols : 0); 13633 parser->last_uni = ls + (proto->last_uni >= ols ? 13634 proto->last_uni - ols : 0); 13635 parser->last_lop = ls + (proto->last_lop >= ols ? 13636 proto->last_lop - ols : 0); 13637 13638 parser->bufend = ls + SvCUR(parser->linestr); 13639 } 13640 13641 Copy(proto->tokenbuf, parser->tokenbuf, 256, char); 13642 13643 13644 Copy(proto->nextval, parser->nextval, 5, YYSTYPE); 13645 Copy(proto->nexttype, parser->nexttype, 5, I32); 13646 parser->nexttoke = proto->nexttoke; 13647 13648 /* XXX should clone saved_curcop here, but we aren't passed 13649 * proto_perl; so do it in perl_clone_using instead */ 13650 13651 return parser; 13652 } 13653 13654 /* 13655 =for apidoc_section $io 13656 =for apidoc fp_dup 13657 13658 Duplicate a file handle, returning a pointer to the cloned object. 13659 13660 =cut 13661 */ 13662 13663 PerlIO * 13664 Perl_fp_dup(pTHX_ PerlIO *const fp, const char type, CLONE_PARAMS *const param) 13665 { 13666 PerlIO *ret; 13667 13668 PERL_ARGS_ASSERT_FP_DUP; 13669 PERL_UNUSED_ARG(type); 13670 13671 if (!fp) 13672 return (PerlIO*)NULL; 13673 13674 /* look for it in the table first */ 13675 ret = (PerlIO*)ptr_table_fetch(PL_ptr_table, fp); 13676 if (ret) 13677 return ret; 13678 13679 /* create anew and remember what it is */ 13680 #ifdef __amigaos4__ 13681 ret = PerlIO_fdupopen(aTHX_ fp, param, PERLIO_DUP_CLONE|PERLIO_DUP_FD); 13682 #else 13683 ret = PerlIO_fdupopen(aTHX_ fp, param, PERLIO_DUP_CLONE); 13684 #endif 13685 ptr_table_store(PL_ptr_table, fp, ret); 13686 return ret; 13687 } 13688 13689 /* 13690 =for apidoc_section $io 13691 =for apidoc dirp_dup 13692 13693 Duplicate a directory handle, returning a pointer to the cloned object. 13694 13695 =cut 13696 */ 13697 13698 DIR * 13699 Perl_dirp_dup(pTHX_ DIR *const dp, CLONE_PARAMS *const param) 13700 { 13701 DIR *ret; 13702 13703 #if defined(HAS_FCHDIR) && defined(HAS_TELLDIR) && defined(HAS_SEEKDIR) 13704 DIR *pwd; 13705 const Direntry_t *dirent; 13706 char smallbuf[256]; /* XXX MAXPATHLEN, surely? */ 13707 char *name = NULL; 13708 STRLEN len = 0; 13709 long pos; 13710 #endif 13711 13712 PERL_UNUSED_CONTEXT; 13713 PERL_ARGS_ASSERT_DIRP_DUP; 13714 13715 if (!dp) 13716 return (DIR*)NULL; 13717 13718 /* look for it in the table first */ 13719 ret = (DIR*)ptr_table_fetch(PL_ptr_table, dp); 13720 if (ret) 13721 return ret; 13722 13723 #if defined(HAS_FCHDIR) && defined(HAS_TELLDIR) && defined(HAS_SEEKDIR) 13724 13725 PERL_UNUSED_ARG(param); 13726 13727 /* create anew */ 13728 13729 /* open the current directory (so we can switch back) */ 13730 if (!(pwd = PerlDir_open("."))) return (DIR *)NULL; 13731 13732 /* chdir to our dir handle and open the present working directory */ 13733 if (fchdir(my_dirfd(dp)) < 0 || !(ret = PerlDir_open("."))) { 13734 PerlDir_close(pwd); 13735 return (DIR *)NULL; 13736 } 13737 /* Now we should have two dir handles pointing to the same dir. */ 13738 13739 /* Be nice to the calling code and chdir back to where we were. */ 13740 /* XXX If this fails, then what? */ 13741 PERL_UNUSED_RESULT(fchdir(my_dirfd(pwd))); 13742 13743 /* We have no need of the pwd handle any more. */ 13744 PerlDir_close(pwd); 13745 13746 #ifdef DIRNAMLEN 13747 # define d_namlen(d) (d)->d_namlen 13748 #else 13749 # define d_namlen(d) strlen((d)->d_name) 13750 #endif 13751 /* Iterate once through dp, to get the file name at the current posi- 13752 tion. Then step back. */ 13753 pos = PerlDir_tell(dp); 13754 if ((dirent = PerlDir_read(dp))) { 13755 len = d_namlen(dirent); 13756 if (len > sizeof(dirent->d_name) && sizeof(dirent->d_name) > PTRSIZE) { 13757 /* If the len is somehow magically longer than the 13758 * maximum length of the directory entry, even though 13759 * we could fit it in a buffer, we could not copy it 13760 * from the dirent. Bail out. */ 13761 PerlDir_close(ret); 13762 return (DIR*)NULL; 13763 } 13764 if (len <= sizeof smallbuf) name = smallbuf; 13765 else Newx(name, len, char); 13766 Move(dirent->d_name, name, len, char); 13767 } 13768 PerlDir_seek(dp, pos); 13769 13770 /* Iterate through the new dir handle, till we find a file with the 13771 right name. */ 13772 if (!dirent) /* just before the end */ 13773 for(;;) { 13774 pos = PerlDir_tell(ret); 13775 if (PerlDir_read(ret)) continue; /* not there yet */ 13776 PerlDir_seek(ret, pos); /* step back */ 13777 break; 13778 } 13779 else { 13780 const long pos0 = PerlDir_tell(ret); 13781 for(;;) { 13782 pos = PerlDir_tell(ret); 13783 if ((dirent = PerlDir_read(ret))) { 13784 if (len == (STRLEN)d_namlen(dirent) 13785 && memEQ(name, dirent->d_name, len)) { 13786 /* found it */ 13787 PerlDir_seek(ret, pos); /* step back */ 13788 break; 13789 } 13790 /* else we are not there yet; keep iterating */ 13791 } 13792 else { /* This is not meant to happen. The best we can do is 13793 reset the iterator to the beginning. */ 13794 PerlDir_seek(ret, pos0); 13795 break; 13796 } 13797 } 13798 } 13799 #undef d_namlen 13800 13801 if (name && name != smallbuf) 13802 Safefree(name); 13803 #endif 13804 13805 #ifdef WIN32 13806 ret = win32_dirp_dup(dp, param); 13807 #endif 13808 13809 /* pop it in the pointer table */ 13810 if (ret) 13811 ptr_table_store(PL_ptr_table, dp, ret); 13812 13813 return ret; 13814 } 13815 13816 /* 13817 =for apidoc_section $GV 13818 =for apidoc gp_dup 13819 13820 Duplicate a typeglob, returning a pointer to the cloned object. 13821 13822 =cut 13823 */ 13824 13825 GP * 13826 Perl_gp_dup(pTHX_ GP *const gp, CLONE_PARAMS *const param) 13827 { 13828 GP *ret; 13829 13830 PERL_ARGS_ASSERT_GP_DUP; 13831 13832 if (!gp) 13833 return (GP*)NULL; 13834 /* look for it in the table first */ 13835 ret = (GP*)ptr_table_fetch(PL_ptr_table, gp); 13836 if (ret) 13837 return ret; 13838 13839 /* create anew and remember what it is */ 13840 Newxz(ret, 1, GP); 13841 ptr_table_store(PL_ptr_table, gp, ret); 13842 13843 /* clone */ 13844 /* ret->gp_refcnt must be 0 before any other dups are called. We're relying 13845 on Newxz() to do this for us. */ 13846 ret->gp_sv = sv_dup_inc(gp->gp_sv, param); 13847 ret->gp_io = io_dup_inc(gp->gp_io, param); 13848 ret->gp_form = cv_dup_inc(gp->gp_form, param); 13849 ret->gp_av = av_dup_inc(gp->gp_av, param); 13850 ret->gp_hv = hv_dup_inc(gp->gp_hv, param); 13851 ret->gp_egv = gv_dup(gp->gp_egv, param);/* GvEGV is not refcounted */ 13852 ret->gp_cv = cv_dup_inc(gp->gp_cv, param); 13853 ret->gp_cvgen = gp->gp_cvgen; 13854 ret->gp_line = gp->gp_line; 13855 ret->gp_file_hek = hek_dup(gp->gp_file_hek, param); 13856 return ret; 13857 } 13858 13859 13860 /* 13861 =for apidoc_section $magic 13862 =for apidoc mg_dup 13863 13864 Duplicate a chain of magic, returning a pointer to the cloned object. 13865 13866 =cut 13867 */ 13868 13869 MAGIC * 13870 Perl_mg_dup(pTHX_ MAGIC *mg, CLONE_PARAMS *const param) 13871 { 13872 MAGIC *mgret = NULL; 13873 MAGIC **mgprev_p = &mgret; 13874 13875 PERL_ARGS_ASSERT_MG_DUP; 13876 13877 for (; mg; mg = mg->mg_moremagic) { 13878 MAGIC *nmg; 13879 13880 if ((param->flags & CLONEf_JOIN_IN) 13881 && mg->mg_type == PERL_MAGIC_backref) 13882 /* when joining, we let the individual SVs add themselves to 13883 * backref as needed. */ 13884 continue; 13885 13886 Newx(nmg, 1, MAGIC); 13887 *mgprev_p = nmg; 13888 mgprev_p = &(nmg->mg_moremagic); 13889 13890 /* There was a comment "XXX copy dynamic vtable?" but as we don't have 13891 dynamic vtables, I'm not sure why Sarathy wrote it. The comment dates 13892 from the original commit adding Perl_mg_dup() - revision 4538. 13893 Similarly there is the annotation "XXX random ptr?" next to the 13894 assignment to nmg->mg_ptr. */ 13895 *nmg = *mg; 13896 13897 /* FIXME for plugins 13898 if (nmg->mg_type == PERL_MAGIC_qr) { 13899 nmg->mg_obj = MUTABLE_SV(CALLREGDUPE((REGEXP*)nmg->mg_obj, param)); 13900 } 13901 else 13902 */ 13903 nmg->mg_obj = (nmg->mg_flags & MGf_REFCOUNTED) 13904 ? nmg->mg_type == PERL_MAGIC_backref 13905 /* The backref AV has its reference 13906 * count deliberately bumped by 1 */ 13907 ? SvREFCNT_inc(av_dup_inc((const AV *) 13908 nmg->mg_obj, param)) 13909 : sv_dup_inc(nmg->mg_obj, param) 13910 : (nmg->mg_type == PERL_MAGIC_regdatum || 13911 nmg->mg_type == PERL_MAGIC_regdata) 13912 ? nmg->mg_obj 13913 : sv_dup(nmg->mg_obj, param); 13914 13915 if (nmg->mg_ptr && nmg->mg_type != PERL_MAGIC_regex_global) { 13916 if (nmg->mg_len > 0) { 13917 nmg->mg_ptr = SAVEPVN(nmg->mg_ptr, nmg->mg_len); 13918 if (nmg->mg_type == PERL_MAGIC_overload_table && 13919 AMT_AMAGIC((AMT*)nmg->mg_ptr)) 13920 { 13921 AMT * const namtp = (AMT*)nmg->mg_ptr; 13922 sv_dup_inc_multiple((SV**)(namtp->table), 13923 (SV**)(namtp->table), NofAMmeth, param); 13924 } 13925 } 13926 else if (nmg->mg_len == HEf_SVKEY) 13927 nmg->mg_ptr = (char*)sv_dup_inc((const SV *)nmg->mg_ptr, param); 13928 } 13929 if ((nmg->mg_flags & MGf_DUP) && nmg->mg_virtual && nmg->mg_virtual->svt_dup) { 13930 nmg->mg_virtual->svt_dup(aTHX_ nmg, param); 13931 } 13932 } 13933 return mgret; 13934 } 13935 13936 #endif /* USE_ITHREADS */ 13937 13938 struct ptr_tbl_arena { 13939 struct ptr_tbl_arena *next; 13940 struct ptr_tbl_ent array[1023/3]; /* as ptr_tbl_ent has 3 pointers. */ 13941 }; 13942 13943 /* 13944 =for apidoc ptr_table_new 13945 13946 Create a new pointer-mapping table 13947 13948 =cut 13949 */ 13950 13951 PTR_TBL_t * 13952 Perl_ptr_table_new(pTHX) 13953 { 13954 PTR_TBL_t *tbl; 13955 PERL_UNUSED_CONTEXT; 13956 13957 Newx(tbl, 1, PTR_TBL_t); 13958 tbl->tbl_max = 511; 13959 tbl->tbl_items = 0; 13960 tbl->tbl_arena = NULL; 13961 tbl->tbl_arena_next = NULL; 13962 tbl->tbl_arena_end = NULL; 13963 Newxz(tbl->tbl_ary, tbl->tbl_max + 1, PTR_TBL_ENT_t*); 13964 return tbl; 13965 } 13966 13967 #define PTR_TABLE_HASH(ptr) \ 13968 ((PTR2UV(ptr) >> 3) ^ (PTR2UV(ptr) >> (3 + 7)) ^ (PTR2UV(ptr) >> (3 + 17))) 13969 13970 /* map an existing pointer using a table */ 13971 13972 STATIC PTR_TBL_ENT_t * 13973 S_ptr_table_find(PTR_TBL_t *const tbl, const void *const sv) 13974 { 13975 PTR_TBL_ENT_t *tblent; 13976 const UV hash = PTR_TABLE_HASH(sv); 13977 13978 PERL_ARGS_ASSERT_PTR_TABLE_FIND; 13979 13980 tblent = tbl->tbl_ary[hash & tbl->tbl_max]; 13981 for (; tblent; tblent = tblent->next) { 13982 if (tblent->oldval == sv) 13983 return tblent; 13984 } 13985 return NULL; 13986 } 13987 13988 /* 13989 =for apidoc ptr_table_fetch 13990 13991 Look for C<sv> in the pointer-mapping table C<tbl>, returning its value, or 13992 NULL if not found. 13993 13994 =cut 13995 */ 13996 13997 void * 13998 Perl_ptr_table_fetch(pTHX_ PTR_TBL_t *const tbl, const void *const sv) 13999 { 14000 PTR_TBL_ENT_t const *const tblent = ptr_table_find(tbl, sv); 14001 14002 PERL_ARGS_ASSERT_PTR_TABLE_FETCH; 14003 PERL_UNUSED_CONTEXT; 14004 14005 return tblent ? tblent->newval : NULL; 14006 } 14007 14008 /* 14009 =for apidoc ptr_table_store 14010 14011 Add a new entry to a pointer-mapping table C<tbl>. 14012 In hash terms, C<oldsv> is the key; Cnewsv> is the value. 14013 14014 The names "old" and "new" are specific to the core's typical use of ptr_tables 14015 in thread cloning. 14016 14017 =cut 14018 */ 14019 14020 void 14021 Perl_ptr_table_store(pTHX_ PTR_TBL_t *const tbl, const void *const oldsv, void *const newsv) 14022 { 14023 PTR_TBL_ENT_t *tblent = ptr_table_find(tbl, oldsv); 14024 14025 PERL_ARGS_ASSERT_PTR_TABLE_STORE; 14026 PERL_UNUSED_CONTEXT; 14027 14028 if (tblent) { 14029 tblent->newval = newsv; 14030 } else { 14031 const UV entry = PTR_TABLE_HASH(oldsv) & tbl->tbl_max; 14032 14033 if (tbl->tbl_arena_next == tbl->tbl_arena_end) { 14034 struct ptr_tbl_arena *new_arena; 14035 14036 Newx(new_arena, 1, struct ptr_tbl_arena); 14037 new_arena->next = tbl->tbl_arena; 14038 tbl->tbl_arena = new_arena; 14039 tbl->tbl_arena_next = new_arena->array; 14040 tbl->tbl_arena_end = C_ARRAY_END(new_arena->array); 14041 } 14042 14043 tblent = tbl->tbl_arena_next++; 14044 14045 tblent->oldval = oldsv; 14046 tblent->newval = newsv; 14047 tblent->next = tbl->tbl_ary[entry]; 14048 tbl->tbl_ary[entry] = tblent; 14049 tbl->tbl_items++; 14050 if (tblent->next && tbl->tbl_items > tbl->tbl_max) 14051 ptr_table_split(tbl); 14052 } 14053 } 14054 14055 /* 14056 =for apidoc ptr_table_split 14057 14058 Double the hash bucket size of an existing ptr table 14059 14060 =cut 14061 */ 14062 14063 void 14064 Perl_ptr_table_split(pTHX_ PTR_TBL_t *const tbl) 14065 { 14066 PTR_TBL_ENT_t **ary = tbl->tbl_ary; 14067 const UV oldsize = tbl->tbl_max + 1; 14068 UV newsize = oldsize * 2; 14069 UV i; 14070 14071 PERL_ARGS_ASSERT_PTR_TABLE_SPLIT; 14072 PERL_UNUSED_CONTEXT; 14073 14074 Renew(ary, newsize, PTR_TBL_ENT_t*); 14075 Zero(&ary[oldsize], newsize-oldsize, PTR_TBL_ENT_t*); 14076 tbl->tbl_max = --newsize; 14077 tbl->tbl_ary = ary; 14078 for (i=0; i < oldsize; i++, ary++) { 14079 PTR_TBL_ENT_t **entp = ary; 14080 PTR_TBL_ENT_t *ent = *ary; 14081 PTR_TBL_ENT_t **curentp; 14082 if (!ent) 14083 continue; 14084 curentp = ary + oldsize; 14085 do { 14086 if ((newsize & PTR_TABLE_HASH(ent->oldval)) != i) { 14087 *entp = ent->next; 14088 ent->next = *curentp; 14089 *curentp = ent; 14090 } 14091 else 14092 entp = &ent->next; 14093 ent = *entp; 14094 } while (ent); 14095 } 14096 } 14097 14098 /* 14099 =for apidoc ptr_table_free 14100 14101 Clear and free a ptr table 14102 14103 =cut 14104 */ 14105 14106 void 14107 Perl_ptr_table_free(pTHX_ PTR_TBL_t *const tbl) 14108 { 14109 struct ptr_tbl_arena *arena; 14110 14111 PERL_UNUSED_CONTEXT; 14112 14113 if (!tbl) { 14114 return; 14115 } 14116 14117 arena = tbl->tbl_arena; 14118 14119 while (arena) { 14120 struct ptr_tbl_arena *next = arena->next; 14121 14122 Safefree(arena); 14123 arena = next; 14124 } 14125 14126 Safefree(tbl->tbl_ary); 14127 Safefree(tbl); 14128 } 14129 14130 #if defined(USE_ITHREADS) 14131 14132 void 14133 Perl_rvpv_dup(pTHX_ SV *const dsv, const SV *const ssv, CLONE_PARAMS *const param) 14134 { 14135 PERL_ARGS_ASSERT_RVPV_DUP; 14136 14137 assert(!isREGEXP(ssv)); 14138 if (SvROK(ssv)) { 14139 if (SvWEAKREF(ssv)) { 14140 SvRV_set(dsv, sv_dup(SvRV_const(ssv), param)); 14141 if (param->flags & CLONEf_JOIN_IN) { 14142 /* if joining, we add any back references individually rather 14143 * than copying the whole backref array */ 14144 Perl_sv_add_backref(aTHX_ SvRV(dsv), dsv); 14145 } 14146 } 14147 else 14148 SvRV_set(dsv, sv_dup_inc(SvRV_const(ssv), param)); 14149 } 14150 else if (SvPVX_const(ssv)) { 14151 /* Has something there */ 14152 if (SvLEN(ssv)) { 14153 /* Normal PV - clone whole allocated space */ 14154 SvPV_set(dsv, SAVEPVN(SvPVX_const(ssv), SvLEN(ssv)-1)); 14155 /* ssv may not be that normal, but actually copy on write. 14156 But we are a true, independent SV, so: */ 14157 SvIsCOW_off(dsv); 14158 } 14159 else { 14160 /* Special case - not normally malloced for some reason */ 14161 if (isGV_with_GP(ssv)) { 14162 /* Don't need to do anything here. */ 14163 } 14164 else if ((SvIsCOW_shared_hash(ssv))) { 14165 /* A "shared" PV - clone it as "shared" PV */ 14166 SvPV_set(dsv, 14167 HEK_KEY(hek_dup(SvSHARED_HEK_FROM_PV(SvPVX_const(ssv)), 14168 param))); 14169 } 14170 else { 14171 /* Some other special case - random pointer */ 14172 SvPV_set(dsv, (char *) SvPVX_const(ssv)); 14173 } 14174 } 14175 } 14176 else { 14177 /* Copy the NULL */ 14178 SvPV_set(dsv, NULL); 14179 } 14180 } 14181 14182 /* duplicate a list of SVs. source and dest may point to the same memory. */ 14183 static SV ** 14184 S_sv_dup_inc_multiple(pTHX_ SV *const *source, SV **dest, 14185 SSize_t items, CLONE_PARAMS *const param) 14186 { 14187 PERL_ARGS_ASSERT_SV_DUP_INC_MULTIPLE; 14188 14189 while (items-- > 0) { 14190 *dest++ = sv_dup_inc(*source++, param); 14191 } 14192 14193 return dest; 14194 } 14195 14196 /* duplicate an SV of any type (including AV, HV etc) */ 14197 14198 static SV * 14199 S_sv_dup_common(pTHX_ const SV *const ssv, CLONE_PARAMS *const param) 14200 { 14201 SV *dsv; 14202 14203 PERL_ARGS_ASSERT_SV_DUP_COMMON; 14204 14205 if (SvTYPE(ssv) == (svtype)SVTYPEMASK) { 14206 #ifdef DEBUG_LEAKING_SCALARS_ABORT 14207 abort(); 14208 #endif 14209 return NULL; 14210 } 14211 /* look for it in the table first */ 14212 dsv = MUTABLE_SV(ptr_table_fetch(PL_ptr_table, ssv)); 14213 if (dsv) 14214 return dsv; 14215 14216 if(param->flags & CLONEf_JOIN_IN) { 14217 /** We are joining here so we don't want do clone 14218 something that is bad **/ 14219 if (SvTYPE(ssv) == SVt_PVHV) { 14220 const HEK * const hvname = HvNAME_HEK(ssv); 14221 if (hvname) { 14222 /** don't clone stashes if they already exist **/ 14223 dsv = MUTABLE_SV(gv_stashpvn(HEK_KEY(hvname), HEK_LEN(hvname), 14224 HEK_UTF8(hvname) ? SVf_UTF8 : 0)); 14225 ptr_table_store(PL_ptr_table, ssv, dsv); 14226 return dsv; 14227 } 14228 } 14229 else if (SvTYPE(ssv) == SVt_PVGV && !SvFAKE(ssv)) { 14230 HV *stash = GvSTASH(ssv); 14231 const HEK * hvname; 14232 if (stash && (hvname = HvNAME_HEK(stash))) { 14233 /** don't clone GVs if they already exist **/ 14234 SV **svp; 14235 stash = gv_stashpvn(HEK_KEY(hvname), HEK_LEN(hvname), 14236 HEK_UTF8(hvname) ? SVf_UTF8 : 0); 14237 svp = hv_fetch( 14238 stash, GvNAME(ssv), 14239 GvNAMEUTF8(ssv) 14240 ? -GvNAMELEN(ssv) 14241 : GvNAMELEN(ssv), 14242 0 14243 ); 14244 if (svp && *svp && SvTYPE(*svp) == SVt_PVGV) { 14245 ptr_table_store(PL_ptr_table, ssv, *svp); 14246 return *svp; 14247 } 14248 } 14249 } 14250 } 14251 14252 /* create anew and remember what it is */ 14253 new_SV(dsv); 14254 14255 #ifdef DEBUG_LEAKING_SCALARS 14256 dsv->sv_debug_optype = ssv->sv_debug_optype; 14257 dsv->sv_debug_line = ssv->sv_debug_line; 14258 dsv->sv_debug_inpad = ssv->sv_debug_inpad; 14259 dsv->sv_debug_parent = (SV*)ssv; 14260 FREE_SV_DEBUG_FILE(dsv); 14261 dsv->sv_debug_file = savesharedpv(ssv->sv_debug_file); 14262 #endif 14263 14264 ptr_table_store(PL_ptr_table, ssv, dsv); 14265 14266 /* clone */ 14267 SvFLAGS(dsv) = SvFLAGS(ssv); 14268 SvFLAGS(dsv) &= ~SVf_OOK; /* don't propagate OOK hack */ 14269 SvREFCNT(dsv) = 0; /* must be before any other dups! */ 14270 14271 #ifdef DEBUGGING 14272 if (SvANY(ssv) && PL_watch_pvx && SvPVX_const(ssv) == PL_watch_pvx) 14273 PerlIO_printf(Perl_debug_log, "watch at %p hit, found string \"%s\"\n", 14274 (void*)PL_watch_pvx, SvPVX_const(ssv)); 14275 #endif 14276 14277 /* don't clone objects whose class has asked us not to */ 14278 if (SvOBJECT(ssv) 14279 && ! (SvFLAGS(SvSTASH(ssv)) & SVphv_CLONEABLE)) 14280 { 14281 SvFLAGS(dsv) = 0; 14282 return dsv; 14283 } 14284 14285 switch (SvTYPE(ssv)) { 14286 case SVt_NULL: 14287 SvANY(dsv) = NULL; 14288 break; 14289 case SVt_IV: 14290 SET_SVANY_FOR_BODYLESS_IV(dsv); 14291 if(SvROK(ssv)) { 14292 Perl_rvpv_dup(aTHX_ dsv, ssv, param); 14293 } else { 14294 SvIV_set(dsv, SvIVX(ssv)); 14295 } 14296 break; 14297 case SVt_NV: 14298 #if NVSIZE <= IVSIZE 14299 SET_SVANY_FOR_BODYLESS_NV(dsv); 14300 #else 14301 SvANY(dsv) = new_XNV(); 14302 #endif 14303 SvNV_set(dsv, SvNVX(ssv)); 14304 break; 14305 default: 14306 { 14307 /* These are all the types that need complex bodies allocating. */ 14308 void *new_body; 14309 const svtype sv_type = SvTYPE(ssv); 14310 const struct body_details *sv_type_details 14311 = bodies_by_type + sv_type; 14312 14313 switch (sv_type) { 14314 default: 14315 Perl_croak(aTHX_ "Bizarre SvTYPE [%" IVdf "]", (IV)SvTYPE(ssv)); 14316 NOT_REACHED; /* NOTREACHED */ 14317 break; 14318 14319 case SVt_PVHV: 14320 if (SvOOK(ssv)) { 14321 sv_type_details = &fake_hv_with_aux; 14322 #ifdef PURIFY 14323 new_body = new_NOARENA(sv_type_details); 14324 #else 14325 new_body_from_arena(new_body, HVAUX_ARENA_ROOT_IX, fake_hv_with_aux); 14326 #endif 14327 goto have_body; 14328 } 14329 /* FALLTHROUGH */ 14330 case SVt_PVGV: 14331 case SVt_PVIO: 14332 case SVt_PVFM: 14333 case SVt_PVAV: 14334 case SVt_PVCV: 14335 case SVt_PVLV: 14336 case SVt_REGEXP: 14337 case SVt_PVMG: 14338 case SVt_PVNV: 14339 case SVt_PVIV: 14340 case SVt_INVLIST: 14341 case SVt_PV: 14342 assert(sv_type_details->body_size); 14343 #ifndef PURIFY 14344 if (sv_type_details->arena) { 14345 new_body = S_new_body(aTHX_ sv_type); 14346 new_body 14347 = (void*)((char*)new_body - sv_type_details->offset); 14348 } else 14349 #endif 14350 { 14351 new_body = new_NOARENA(sv_type_details); 14352 } 14353 } 14354 have_body: 14355 assert(new_body); 14356 SvANY(dsv) = new_body; 14357 14358 #ifndef PURIFY 14359 Copy(((char*)SvANY(ssv)) + sv_type_details->offset, 14360 ((char*)SvANY(dsv)) + sv_type_details->offset, 14361 sv_type_details->copy, char); 14362 #else 14363 Copy(((char*)SvANY(ssv)), 14364 ((char*)SvANY(dsv)), 14365 sv_type_details->body_size + sv_type_details->offset, char); 14366 #endif 14367 14368 if (sv_type != SVt_PVAV && sv_type != SVt_PVHV 14369 && !isGV_with_GP(dsv) 14370 && !isREGEXP(dsv) 14371 && !(sv_type == SVt_PVIO && !(IoFLAGS(dsv) & IOf_FAKE_DIRP))) 14372 Perl_rvpv_dup(aTHX_ dsv, ssv, param); 14373 14374 /* The Copy above means that all the source (unduplicated) pointers 14375 are now in the destination. We can check the flags and the 14376 pointers in either, but it's possible that there's less cache 14377 missing by always going for the destination. 14378 FIXME - instrument and check that assumption */ 14379 if (sv_type >= SVt_PVMG) { 14380 if (SvMAGIC(dsv)) 14381 SvMAGIC_set(dsv, mg_dup(SvMAGIC(dsv), param)); 14382 if (SvOBJECT(dsv) && SvSTASH(dsv)) 14383 SvSTASH_set(dsv, hv_dup_inc(SvSTASH(dsv), param)); 14384 else SvSTASH_set(dsv, 0); /* don't copy DESTROY cache */ 14385 } 14386 14387 /* The cast silences a GCC warning about unhandled types. */ 14388 switch ((int)sv_type) { 14389 case SVt_PV: 14390 break; 14391 case SVt_PVIV: 14392 break; 14393 case SVt_PVNV: 14394 break; 14395 case SVt_PVMG: 14396 break; 14397 case SVt_REGEXP: 14398 duprex: 14399 /* FIXME for plugins */ 14400 re_dup_guts((REGEXP*) ssv, (REGEXP*) dsv, param); 14401 break; 14402 case SVt_PVLV: 14403 /* XXX LvTARGOFF sometimes holds PMOP* when DEBUGGING */ 14404 if (LvTYPE(dsv) == 't') /* for tie: unrefcnted fake (SV**) */ 14405 LvTARG(dsv) = dsv; 14406 else if (LvTYPE(dsv) == 'T') /* for tie: fake HE */ 14407 LvTARG(dsv) = MUTABLE_SV(he_dup((HE*)LvTARG(dsv), FALSE, param)); 14408 else 14409 LvTARG(dsv) = sv_dup_inc(LvTARG(dsv), param); 14410 if (isREGEXP(ssv)) goto duprex; 14411 /* FALLTHROUGH */ 14412 case SVt_PVGV: 14413 /* non-GP case already handled above */ 14414 if(isGV_with_GP(ssv)) { 14415 GvNAME_HEK(dsv) = hek_dup(GvNAME_HEK(dsv), param); 14416 /* Don't call sv_add_backref here as it's going to be 14417 created as part of the magic cloning of the symbol 14418 table--unless this is during a join and the stash 14419 is not actually being cloned. */ 14420 /* Danger Will Robinson - GvGP(dsv) isn't initialised 14421 at the point of this comment. */ 14422 GvSTASH(dsv) = hv_dup(GvSTASH(dsv), param); 14423 if (param->flags & CLONEf_JOIN_IN) 14424 Perl_sv_add_backref(aTHX_ MUTABLE_SV(GvSTASH(dsv)), dsv); 14425 GvGP_set(dsv, gp_dup(GvGP(ssv), param)); 14426 (void)GpREFCNT_inc(GvGP(dsv)); 14427 } 14428 break; 14429 case SVt_PVIO: 14430 /* PL_parser->rsfp_filters entries have fake IoDIRP() */ 14431 if(IoFLAGS(dsv) & IOf_FAKE_DIRP) { 14432 /* I have no idea why fake dirp (rsfps) 14433 should be treated differently but otherwise 14434 we end up with leaks -- sky*/ 14435 IoTOP_GV(dsv) = gv_dup_inc(IoTOP_GV(dsv), param); 14436 IoFMT_GV(dsv) = gv_dup_inc(IoFMT_GV(dsv), param); 14437 IoBOTTOM_GV(dsv) = gv_dup_inc(IoBOTTOM_GV(dsv), param); 14438 } else { 14439 IoTOP_GV(dsv) = gv_dup(IoTOP_GV(dsv), param); 14440 IoFMT_GV(dsv) = gv_dup(IoFMT_GV(dsv), param); 14441 IoBOTTOM_GV(dsv) = gv_dup(IoBOTTOM_GV(dsv), param); 14442 if (IoDIRP(dsv)) { 14443 IoDIRP(dsv) = dirp_dup(IoDIRP(dsv), param); 14444 } else { 14445 NOOP; 14446 /* IoDIRP(dsv) is already a copy of IoDIRP(ssv) */ 14447 } 14448 IoIFP(dsv) = fp_dup(IoIFP(ssv), IoTYPE(dsv), param); 14449 } 14450 if (IoOFP(dsv) == IoIFP(ssv)) 14451 IoOFP(dsv) = IoIFP(dsv); 14452 else 14453 IoOFP(dsv) = fp_dup(IoOFP(dsv), IoTYPE(dsv), param); 14454 IoTOP_NAME(dsv) = SAVEPV(IoTOP_NAME(dsv)); 14455 IoFMT_NAME(dsv) = SAVEPV(IoFMT_NAME(dsv)); 14456 IoBOTTOM_NAME(dsv) = SAVEPV(IoBOTTOM_NAME(dsv)); 14457 break; 14458 case SVt_PVAV: 14459 /* avoid cloning an empty array */ 14460 if (AvARRAY((const AV *)ssv) && AvFILLp((const AV *)ssv) >= 0) { 14461 SV **dst_ary, **src_ary; 14462 SSize_t items = AvFILLp((const AV *)ssv) + 1; 14463 14464 src_ary = AvARRAY((const AV *)ssv); 14465 Newx(dst_ary, AvMAX((const AV *)ssv)+1, SV*); 14466 ptr_table_store(PL_ptr_table, src_ary, dst_ary); 14467 AvARRAY(MUTABLE_AV(dsv)) = dst_ary; 14468 AvALLOC((const AV *)dsv) = dst_ary; 14469 if (AvREAL((const AV *)ssv)) { 14470 dst_ary = sv_dup_inc_multiple(src_ary, dst_ary, items, 14471 param); 14472 } 14473 else { 14474 while (items-- > 0) 14475 *dst_ary++ = sv_dup(*src_ary++, param); 14476 } 14477 items = AvMAX((const AV *)ssv) - AvFILLp((const AV *)ssv); 14478 while (items-- > 0) { 14479 *dst_ary++ = NULL; 14480 } 14481 } 14482 else { 14483 AvARRAY(MUTABLE_AV(dsv)) = NULL; 14484 AvALLOC((const AV *)dsv) = (SV**)NULL; 14485 AvMAX( (const AV *)dsv) = -1; 14486 AvFILLp((const AV *)dsv) = -1; 14487 } 14488 break; 14489 case SVt_PVHV: 14490 if (HvARRAY((const HV *)ssv)) { 14491 STRLEN i = 0; 14492 XPVHV * const dxhv = (XPVHV*)SvANY(dsv); 14493 XPVHV * const sxhv = (XPVHV*)SvANY(ssv); 14494 char *darray; 14495 Newx(darray, PERL_HV_ARRAY_ALLOC_BYTES(dxhv->xhv_max+1), 14496 char); 14497 HvARRAY(dsv) = (HE**)darray; 14498 while (i <= sxhv->xhv_max) { 14499 const HE * const source = HvARRAY(ssv)[i]; 14500 HvARRAY(dsv)[i] = source 14501 ? he_dup(source, FALSE, param) : 0; 14502 ++i; 14503 } 14504 if (SvOOK(ssv)) { 14505 const struct xpvhv_aux * const saux = HvAUX(ssv); 14506 struct xpvhv_aux * const daux = HvAUX(dsv); 14507 /* This flag isn't copied. */ 14508 SvOOK_on(dsv); 14509 14510 if (saux->xhv_name_count) { 14511 HEK ** const sname = saux->xhv_name_u.xhvnameu_names; 14512 const I32 count 14513 = saux->xhv_name_count < 0 14514 ? -saux->xhv_name_count 14515 : saux->xhv_name_count; 14516 HEK **shekp = sname + count; 14517 HEK **dhekp; 14518 Newx(daux->xhv_name_u.xhvnameu_names, count, HEK *); 14519 dhekp = daux->xhv_name_u.xhvnameu_names + count; 14520 while (shekp-- > sname) { 14521 dhekp--; 14522 *dhekp = hek_dup(*shekp, param); 14523 } 14524 } 14525 else { 14526 daux->xhv_name_u.xhvnameu_name 14527 = hek_dup(saux->xhv_name_u.xhvnameu_name, 14528 param); 14529 } 14530 daux->xhv_name_count = saux->xhv_name_count; 14531 14532 daux->xhv_aux_flags = saux->xhv_aux_flags; 14533 #ifdef PERL_HASH_RANDOMIZE_KEYS 14534 daux->xhv_rand = saux->xhv_rand; 14535 daux->xhv_last_rand = saux->xhv_last_rand; 14536 #endif 14537 daux->xhv_riter = saux->xhv_riter; 14538 daux->xhv_eiter = saux->xhv_eiter 14539 ? he_dup(saux->xhv_eiter, FALSE, param) : 0; 14540 /* backref array needs refcnt=2; see sv_add_backref */ 14541 daux->xhv_backreferences = 14542 (param->flags & CLONEf_JOIN_IN) 14543 /* when joining, we let the individual GVs and 14544 * CVs add themselves to backref as 14545 * needed. This avoids pulling in stuff 14546 * that isn't required, and simplifies the 14547 * case where stashes aren't cloned back 14548 * if they already exist in the parent 14549 * thread */ 14550 ? NULL 14551 : saux->xhv_backreferences 14552 ? (SvTYPE(saux->xhv_backreferences) == SVt_PVAV) 14553 ? MUTABLE_AV(SvREFCNT_inc( 14554 sv_dup_inc((const SV *) 14555 saux->xhv_backreferences, param))) 14556 : MUTABLE_AV(sv_dup((const SV *) 14557 saux->xhv_backreferences, param)) 14558 : 0; 14559 14560 daux->xhv_mro_meta = saux->xhv_mro_meta 14561 ? mro_meta_dup(saux->xhv_mro_meta, param) 14562 : 0; 14563 14564 /* Record stashes for possible cloning in Perl_clone(). */ 14565 if (HvNAME(ssv)) 14566 av_push(param->stashes, dsv); 14567 } 14568 } 14569 else 14570 HvARRAY(MUTABLE_HV(dsv)) = NULL; 14571 break; 14572 case SVt_PVCV: 14573 if (!(param->flags & CLONEf_COPY_STACKS)) { 14574 CvDEPTH(dsv) = 0; 14575 } 14576 /* FALLTHROUGH */ 14577 case SVt_PVFM: 14578 /* NOTE: not refcounted */ 14579 SvANY(MUTABLE_CV(dsv))->xcv_stash = 14580 hv_dup(CvSTASH(dsv), param); 14581 if ((param->flags & CLONEf_JOIN_IN) && CvSTASH(dsv)) 14582 Perl_sv_add_backref(aTHX_ MUTABLE_SV(CvSTASH(dsv)), dsv); 14583 if (!CvISXSUB(dsv)) { 14584 OP_REFCNT_LOCK; 14585 CvROOT(dsv) = OpREFCNT_inc(CvROOT(dsv)); 14586 OP_REFCNT_UNLOCK; 14587 CvSLABBED_off(dsv); 14588 } else if (CvCONST(dsv)) { 14589 CvXSUBANY(dsv).any_ptr = 14590 sv_dup_inc((const SV *)CvXSUBANY(dsv).any_ptr, param); 14591 } 14592 assert(!CvSLABBED(dsv)); 14593 if (CvDYNFILE(dsv)) CvFILE(dsv) = SAVEPV(CvFILE(dsv)); 14594 if (CvNAMED(dsv)) 14595 SvANY((CV *)dsv)->xcv_gv_u.xcv_hek = 14596 hek_dup(CvNAME_HEK((CV *)ssv), param); 14597 /* don't dup if copying back - CvGV isn't refcounted, so the 14598 * duped GV may never be freed. A bit of a hack! DAPM */ 14599 else 14600 SvANY(MUTABLE_CV(dsv))->xcv_gv_u.xcv_gv = 14601 CvCVGV_RC(dsv) 14602 ? gv_dup_inc(CvGV(ssv), param) 14603 : (param->flags & CLONEf_JOIN_IN) 14604 ? NULL 14605 : gv_dup(CvGV(ssv), param); 14606 14607 if (!CvISXSUB(ssv)) { 14608 PADLIST * padlist = CvPADLIST(ssv); 14609 if(padlist) 14610 padlist = padlist_dup(padlist, param); 14611 CvPADLIST_set(dsv, padlist); 14612 } else 14613 /* unthreaded perl can't sv_dup so we dont support unthreaded's CvHSCXT */ 14614 PoisonPADLIST(dsv); 14615 14616 CvOUTSIDE(dsv) = 14617 CvWEAKOUTSIDE(ssv) 14618 ? cv_dup( CvOUTSIDE(dsv), param) 14619 : cv_dup_inc(CvOUTSIDE(dsv), param); 14620 break; 14621 } 14622 } 14623 } 14624 14625 return dsv; 14626 } 14627 14628 SV * 14629 Perl_sv_dup_inc(pTHX_ const SV *const ssv, CLONE_PARAMS *const param) 14630 { 14631 PERL_ARGS_ASSERT_SV_DUP_INC; 14632 return ssv ? SvREFCNT_inc(sv_dup_common(ssv, param)) : NULL; 14633 } 14634 14635 SV * 14636 Perl_sv_dup(pTHX_ const SV *const ssv, CLONE_PARAMS *const param) 14637 { 14638 SV *dsv = ssv ? sv_dup_common(ssv, param) : NULL; 14639 PERL_ARGS_ASSERT_SV_DUP; 14640 14641 /* Track every SV that (at least initially) had a reference count of 0. 14642 We need to do this by holding an actual reference to it in this array. 14643 If we attempt to cheat, turn AvREAL_off(), and store only pointers 14644 (akin to the stashes hash, and the perl stack), we come unstuck if 14645 a weak reference (or other SV legitimately SvREFCNT() == 0 for this 14646 thread) is manipulated in a CLONE method, because CLONE runs before the 14647 unreferenced array is walked to find SVs still with SvREFCNT() == 0 14648 (and fix things up by giving each a reference via the temps stack). 14649 Instead, during CLONE, if the 0-referenced SV has SvREFCNT_inc() and 14650 then SvREFCNT_dec(), it will be cleaned up (and added to the free list) 14651 before the walk of unreferenced happens and a reference to that is SV 14652 added to the temps stack. At which point we have the same SV considered 14653 to be in use, and free to be re-used. Not good. 14654 */ 14655 if (dsv && !(param->flags & CLONEf_COPY_STACKS) && !SvREFCNT(dsv)) { 14656 assert(param->unreferenced); 14657 av_push(param->unreferenced, SvREFCNT_inc(dsv)); 14658 } 14659 14660 return dsv; 14661 } 14662 14663 /* duplicate a context */ 14664 14665 PERL_CONTEXT * 14666 Perl_cx_dup(pTHX_ PERL_CONTEXT *cxs, I32 ix, I32 max, CLONE_PARAMS* param) 14667 { 14668 PERL_CONTEXT *ncxs; 14669 14670 PERL_ARGS_ASSERT_CX_DUP; 14671 14672 if (!cxs) 14673 return (PERL_CONTEXT*)NULL; 14674 14675 /* look for it in the table first */ 14676 ncxs = (PERL_CONTEXT*)ptr_table_fetch(PL_ptr_table, cxs); 14677 if (ncxs) 14678 return ncxs; 14679 14680 /* create anew and remember what it is */ 14681 Newx(ncxs, max + 1, PERL_CONTEXT); 14682 ptr_table_store(PL_ptr_table, cxs, ncxs); 14683 Copy(cxs, ncxs, max + 1, PERL_CONTEXT); 14684 14685 while (ix >= 0) { 14686 PERL_CONTEXT * const ncx = &ncxs[ix]; 14687 if (CxTYPE(ncx) == CXt_SUBST) { 14688 Perl_croak(aTHX_ "Cloning substitution context is unimplemented"); 14689 } 14690 else { 14691 ncx->blk_oldcop = (COP*)any_dup(ncx->blk_oldcop, param->proto_perl); 14692 switch (CxTYPE(ncx)) { 14693 case CXt_SUB: 14694 ncx->blk_sub.cv = cv_dup_inc(ncx->blk_sub.cv, param); 14695 if(CxHASARGS(ncx)){ 14696 ncx->blk_sub.savearray = av_dup_inc(ncx->blk_sub.savearray,param); 14697 } else { 14698 ncx->blk_sub.savearray = NULL; 14699 } 14700 ncx->blk_sub.prevcomppad = (PAD*)ptr_table_fetch(PL_ptr_table, 14701 ncx->blk_sub.prevcomppad); 14702 break; 14703 case CXt_EVAL: 14704 ncx->blk_eval.old_namesv = sv_dup_inc(ncx->blk_eval.old_namesv, 14705 param); 14706 /* XXX should this sv_dup_inc? Or only if CxEVAL_TXT_REFCNTED ???? */ 14707 ncx->blk_eval.cur_text = sv_dup(ncx->blk_eval.cur_text, param); 14708 ncx->blk_eval.cv = cv_dup(ncx->blk_eval.cv, param); 14709 /* XXX what to do with cur_top_env ???? */ 14710 break; 14711 case CXt_LOOP_LAZYSV: 14712 ncx->blk_loop.state_u.lazysv.end 14713 = sv_dup_inc(ncx->blk_loop.state_u.lazysv.end, param); 14714 /* Fallthrough: duplicate lazysv.cur by using the ary.ary 14715 duplication code instead. 14716 We are taking advantage of (1) av_dup_inc and sv_dup_inc 14717 actually being the same function, and (2) order 14718 equivalence of the two unions. 14719 We can assert the later [but only at run time :-(] */ 14720 assert ((void *) &ncx->blk_loop.state_u.ary.ary == 14721 (void *) &ncx->blk_loop.state_u.lazysv.cur); 14722 /* FALLTHROUGH */ 14723 case CXt_LOOP_ARY: 14724 ncx->blk_loop.state_u.ary.ary 14725 = av_dup_inc(ncx->blk_loop.state_u.ary.ary, param); 14726 /* FALLTHROUGH */ 14727 case CXt_LOOP_LIST: 14728 case CXt_LOOP_LAZYIV: 14729 /* code common to all 'for' CXt_LOOP_* types */ 14730 ncx->blk_loop.itersave = 14731 sv_dup_inc(ncx->blk_loop.itersave, param); 14732 if (CxPADLOOP(ncx)) { 14733 PADOFFSET off = ncx->blk_loop.itervar_u.svp 14734 - &CX_CURPAD_SV(ncx->blk_loop, 0); 14735 ncx->blk_loop.oldcomppad = 14736 (PAD*)ptr_table_fetch(PL_ptr_table, 14737 ncx->blk_loop.oldcomppad); 14738 ncx->blk_loop.itervar_u.svp = 14739 &CX_CURPAD_SV(ncx->blk_loop, off); 14740 } 14741 else { 14742 /* this copies the GV if CXp_FOR_GV, or the SV for an 14743 * alias (for \$x (...)) - relies on gv_dup being the 14744 * same as sv_dup */ 14745 ncx->blk_loop.itervar_u.gv 14746 = gv_dup((const GV *)ncx->blk_loop.itervar_u.gv, 14747 param); 14748 } 14749 break; 14750 case CXt_LOOP_PLAIN: 14751 break; 14752 case CXt_FORMAT: 14753 ncx->blk_format.prevcomppad = 14754 (PAD*)ptr_table_fetch(PL_ptr_table, 14755 ncx->blk_format.prevcomppad); 14756 ncx->blk_format.cv = cv_dup_inc(ncx->blk_format.cv, param); 14757 ncx->blk_format.gv = gv_dup(ncx->blk_format.gv, param); 14758 ncx->blk_format.dfoutgv = gv_dup_inc(ncx->blk_format.dfoutgv, 14759 param); 14760 break; 14761 case CXt_GIVEN: 14762 ncx->blk_givwhen.defsv_save = 14763 sv_dup_inc(ncx->blk_givwhen.defsv_save, param); 14764 break; 14765 case CXt_BLOCK: 14766 case CXt_NULL: 14767 case CXt_WHEN: 14768 case CXt_DEFER: 14769 break; 14770 } 14771 } 14772 --ix; 14773 } 14774 return ncxs; 14775 } 14776 14777 /* 14778 =for apidoc si_dup 14779 14780 Duplicate a stack info structure, returning a pointer to the cloned object. 14781 14782 =cut 14783 */ 14784 14785 PERL_SI * 14786 Perl_si_dup(pTHX_ PERL_SI *si, CLONE_PARAMS* param) 14787 { 14788 PERL_SI *nsi; 14789 14790 PERL_ARGS_ASSERT_SI_DUP; 14791 14792 if (!si) 14793 return (PERL_SI*)NULL; 14794 14795 /* look for it in the table first */ 14796 nsi = (PERL_SI*)ptr_table_fetch(PL_ptr_table, si); 14797 if (nsi) 14798 return nsi; 14799 14800 /* create anew and remember what it is */ 14801 Newx(nsi, 1, PERL_SI); 14802 ptr_table_store(PL_ptr_table, si, nsi); 14803 14804 nsi->si_stack = av_dup_inc(si->si_stack, param); 14805 nsi->si_cxix = si->si_cxix; 14806 nsi->si_cxsubix = si->si_cxsubix; 14807 nsi->si_cxmax = si->si_cxmax; 14808 nsi->si_cxstack = cx_dup(si->si_cxstack, si->si_cxix, si->si_cxmax, param); 14809 nsi->si_type = si->si_type; 14810 nsi->si_prev = si_dup(si->si_prev, param); 14811 nsi->si_next = si_dup(si->si_next, param); 14812 nsi->si_markoff = si->si_markoff; 14813 #if defined DEBUGGING && !defined DEBUGGING_RE_ONLY 14814 nsi->si_stack_hwm = 0; 14815 #endif 14816 14817 return nsi; 14818 } 14819 14820 #define POPINT(ss,ix) ((ss)[--(ix)].any_i32) 14821 #define TOPINT(ss,ix) ((ss)[ix].any_i32) 14822 #define POPLONG(ss,ix) ((ss)[--(ix)].any_long) 14823 #define TOPLONG(ss,ix) ((ss)[ix].any_long) 14824 #define POPIV(ss,ix) ((ss)[--(ix)].any_iv) 14825 #define TOPIV(ss,ix) ((ss)[ix].any_iv) 14826 #define POPUV(ss,ix) ((ss)[--(ix)].any_uv) 14827 #define TOPUV(ss,ix) ((ss)[ix].any_uv) 14828 #define POPBOOL(ss,ix) ((ss)[--(ix)].any_bool) 14829 #define TOPBOOL(ss,ix) ((ss)[ix].any_bool) 14830 #define POPPTR(ss,ix) ((ss)[--(ix)].any_ptr) 14831 #define TOPPTR(ss,ix) ((ss)[ix].any_ptr) 14832 #define POPDPTR(ss,ix) ((ss)[--(ix)].any_dptr) 14833 #define TOPDPTR(ss,ix) ((ss)[ix].any_dptr) 14834 #define POPDXPTR(ss,ix) ((ss)[--(ix)].any_dxptr) 14835 #define TOPDXPTR(ss,ix) ((ss)[ix].any_dxptr) 14836 14837 /* XXXXX todo */ 14838 #define pv_dup_inc(p) SAVEPV(p) 14839 #define pv_dup(p) SAVEPV(p) 14840 #define svp_dup_inc(p,pp) any_dup(p,pp) 14841 14842 /* map any object to the new equivent - either something in the 14843 * ptr table, or something in the interpreter structure 14844 */ 14845 14846 void * 14847 Perl_any_dup(pTHX_ void *v, const PerlInterpreter *proto_perl) 14848 { 14849 void *ret; 14850 14851 PERL_ARGS_ASSERT_ANY_DUP; 14852 14853 if (!v) 14854 return (void*)NULL; 14855 14856 /* look for it in the table first */ 14857 ret = ptr_table_fetch(PL_ptr_table, v); 14858 if (ret) 14859 return ret; 14860 14861 /* see if it is part of the interpreter structure */ 14862 if (v >= (void*)proto_perl && v < (void*)(proto_perl+1)) 14863 ret = (void*)(((char*)aTHX) + (((char*)v) - (char*)proto_perl)); 14864 else { 14865 ret = v; 14866 } 14867 14868 return ret; 14869 } 14870 14871 /* 14872 =for apidoc ss_dup 14873 14874 Duplicate the save stack, returning a pointer to the cloned object. 14875 14876 =cut 14877 */ 14878 14879 ANY * 14880 Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param) 14881 { 14882 ANY * const ss = proto_perl->Isavestack; 14883 const I32 max = proto_perl->Isavestack_max + SS_MAXPUSH; 14884 I32 ix = proto_perl->Isavestack_ix; 14885 ANY *nss; 14886 const SV *sv; 14887 const GV *gv; 14888 const AV *av; 14889 const HV *hv; 14890 void* ptr; 14891 int intval; 14892 long longval; 14893 GP *gp; 14894 IV iv; 14895 I32 i; 14896 char *c = NULL; 14897 void (*dptr) (void*); 14898 void (*dxptr) (pTHX_ void*); 14899 14900 PERL_ARGS_ASSERT_SS_DUP; 14901 14902 Newx(nss, max, ANY); 14903 14904 while (ix > 0) { 14905 const UV uv = POPUV(ss,ix); 14906 const U8 type = (U8)uv & SAVE_MASK; 14907 14908 TOPUV(nss,ix) = uv; 14909 switch (type) { 14910 case SAVEt_CLEARSV: 14911 case SAVEt_CLEARPADRANGE: 14912 break; 14913 case SAVEt_HELEM: /* hash element */ 14914 case SAVEt_SV: /* scalar reference */ 14915 sv = (const SV *)POPPTR(ss,ix); 14916 TOPPTR(nss,ix) = SvREFCNT_inc(sv_dup_inc(sv, param)); 14917 /* FALLTHROUGH */ 14918 case SAVEt_ITEM: /* normal string */ 14919 case SAVEt_GVSV: /* scalar slot in GV */ 14920 sv = (const SV *)POPPTR(ss,ix); 14921 TOPPTR(nss,ix) = sv_dup_inc(sv, param); 14922 if (type == SAVEt_SV) 14923 break; 14924 /* FALLTHROUGH */ 14925 case SAVEt_FREESV: 14926 case SAVEt_MORTALIZESV: 14927 case SAVEt_READONLY_OFF: 14928 sv = (const SV *)POPPTR(ss,ix); 14929 TOPPTR(nss,ix) = sv_dup_inc(sv, param); 14930 break; 14931 case SAVEt_FREEPADNAME: 14932 ptr = POPPTR(ss,ix); 14933 TOPPTR(nss,ix) = padname_dup((PADNAME *)ptr, param); 14934 PadnameREFCNT((PADNAME *)TOPPTR(nss,ix))++; 14935 break; 14936 case SAVEt_SHARED_PVREF: /* char* in shared space */ 14937 c = (char*)POPPTR(ss,ix); 14938 TOPPTR(nss,ix) = savesharedpv(c); 14939 ptr = POPPTR(ss,ix); 14940 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 14941 break; 14942 case SAVEt_GENERIC_SVREF: /* generic sv */ 14943 case SAVEt_SVREF: /* scalar reference */ 14944 sv = (const SV *)POPPTR(ss,ix); 14945 TOPPTR(nss,ix) = sv_dup_inc(sv, param); 14946 if (type == SAVEt_SVREF) 14947 SvREFCNT_inc_simple_void((SV *)TOPPTR(nss,ix)); 14948 ptr = POPPTR(ss,ix); 14949 TOPPTR(nss,ix) = svp_dup_inc((SV**)ptr, proto_perl);/* XXXXX */ 14950 break; 14951 case SAVEt_GVSLOT: /* any slot in GV */ 14952 sv = (const SV *)POPPTR(ss,ix); 14953 TOPPTR(nss,ix) = sv_dup_inc(sv, param); 14954 ptr = POPPTR(ss,ix); 14955 TOPPTR(nss,ix) = svp_dup_inc((SV**)ptr, proto_perl);/* XXXXX */ 14956 sv = (const SV *)POPPTR(ss,ix); 14957 TOPPTR(nss,ix) = sv_dup_inc(sv, param); 14958 break; 14959 case SAVEt_HV: /* hash reference */ 14960 case SAVEt_AV: /* array reference */ 14961 sv = (const SV *) POPPTR(ss,ix); 14962 TOPPTR(nss,ix) = sv_dup_inc(sv, param); 14963 /* FALLTHROUGH */ 14964 case SAVEt_COMPPAD: 14965 case SAVEt_NSTAB: 14966 sv = (const SV *) POPPTR(ss,ix); 14967 TOPPTR(nss,ix) = sv_dup(sv, param); 14968 break; 14969 case SAVEt_INT: /* int reference */ 14970 ptr = POPPTR(ss,ix); 14971 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 14972 intval = (int)POPINT(ss,ix); 14973 TOPINT(nss,ix) = intval; 14974 break; 14975 case SAVEt_LONG: /* long reference */ 14976 ptr = POPPTR(ss,ix); 14977 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 14978 longval = (long)POPLONG(ss,ix); 14979 TOPLONG(nss,ix) = longval; 14980 break; 14981 case SAVEt_I32: /* I32 reference */ 14982 ptr = POPPTR(ss,ix); 14983 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 14984 i = POPINT(ss,ix); 14985 TOPINT(nss,ix) = i; 14986 break; 14987 case SAVEt_IV: /* IV reference */ 14988 case SAVEt_STRLEN: /* STRLEN/size_t ref */ 14989 ptr = POPPTR(ss,ix); 14990 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 14991 iv = POPIV(ss,ix); 14992 TOPIV(nss,ix) = iv; 14993 break; 14994 case SAVEt_TMPSFLOOR: 14995 iv = POPIV(ss,ix); 14996 TOPIV(nss,ix) = iv; 14997 break; 14998 case SAVEt_HPTR: /* HV* reference */ 14999 case SAVEt_APTR: /* AV* reference */ 15000 case SAVEt_SPTR: /* SV* reference */ 15001 ptr = POPPTR(ss,ix); 15002 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 15003 sv = (const SV *)POPPTR(ss,ix); 15004 TOPPTR(nss,ix) = sv_dup(sv, param); 15005 break; 15006 case SAVEt_VPTR: /* random* reference */ 15007 ptr = POPPTR(ss,ix); 15008 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 15009 /* FALLTHROUGH */ 15010 case SAVEt_STRLEN_SMALL: 15011 case SAVEt_INT_SMALL: 15012 case SAVEt_I32_SMALL: 15013 case SAVEt_I16: /* I16 reference */ 15014 case SAVEt_I8: /* I8 reference */ 15015 case SAVEt_BOOL: 15016 ptr = POPPTR(ss,ix); 15017 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 15018 break; 15019 case SAVEt_GENERIC_PVREF: /* generic char* */ 15020 case SAVEt_PPTR: /* char* reference */ 15021 ptr = POPPTR(ss,ix); 15022 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 15023 c = (char*)POPPTR(ss,ix); 15024 TOPPTR(nss,ix) = pv_dup(c); 15025 break; 15026 case SAVEt_GP: /* scalar reference */ 15027 gp = (GP*)POPPTR(ss,ix); 15028 TOPPTR(nss,ix) = gp = gp_dup(gp, param); 15029 (void)GpREFCNT_inc(gp); 15030 gv = (const GV *)POPPTR(ss,ix); 15031 TOPPTR(nss,ix) = gv_dup_inc(gv, param); 15032 break; 15033 case SAVEt_FREEOP: 15034 ptr = POPPTR(ss,ix); 15035 if (ptr && (((OP*)ptr)->op_private & OPpREFCOUNTED)) { 15036 /* these are assumed to be refcounted properly */ 15037 OP *o; 15038 switch (((OP*)ptr)->op_type) { 15039 case OP_LEAVESUB: 15040 case OP_LEAVESUBLV: 15041 case OP_LEAVEEVAL: 15042 case OP_LEAVE: 15043 case OP_SCOPE: 15044 case OP_LEAVEWRITE: 15045 TOPPTR(nss,ix) = ptr; 15046 o = (OP*)ptr; 15047 OP_REFCNT_LOCK; 15048 (void) OpREFCNT_inc(o); 15049 OP_REFCNT_UNLOCK; 15050 break; 15051 default: 15052 TOPPTR(nss,ix) = NULL; 15053 break; 15054 } 15055 } 15056 else 15057 TOPPTR(nss,ix) = NULL; 15058 break; 15059 case SAVEt_FREECOPHH: 15060 ptr = POPPTR(ss,ix); 15061 TOPPTR(nss,ix) = cophh_copy((COPHH *)ptr); 15062 break; 15063 case SAVEt_ADELETE: 15064 av = (const AV *)POPPTR(ss,ix); 15065 TOPPTR(nss,ix) = av_dup_inc(av, param); 15066 i = POPINT(ss,ix); 15067 TOPINT(nss,ix) = i; 15068 break; 15069 case SAVEt_DELETE: 15070 hv = (const HV *)POPPTR(ss,ix); 15071 TOPPTR(nss,ix) = hv_dup_inc(hv, param); 15072 i = POPINT(ss,ix); 15073 TOPINT(nss,ix) = i; 15074 /* FALLTHROUGH */ 15075 case SAVEt_FREEPV: 15076 c = (char*)POPPTR(ss,ix); 15077 TOPPTR(nss,ix) = pv_dup_inc(c); 15078 break; 15079 case SAVEt_STACK_POS: /* Position on Perl stack */ 15080 i = POPINT(ss,ix); 15081 TOPINT(nss,ix) = i; 15082 break; 15083 case SAVEt_DESTRUCTOR: 15084 ptr = POPPTR(ss,ix); 15085 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); /* XXX quite arbitrary */ 15086 dptr = POPDPTR(ss,ix); 15087 TOPDPTR(nss,ix) = DPTR2FPTR(void (*)(void*), 15088 any_dup(FPTR2DPTR(void *, dptr), 15089 proto_perl)); 15090 break; 15091 case SAVEt_DESTRUCTOR_X: 15092 ptr = POPPTR(ss,ix); 15093 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); /* XXX quite arbitrary */ 15094 dxptr = POPDXPTR(ss,ix); 15095 TOPDXPTR(nss,ix) = DPTR2FPTR(void (*)(pTHX_ void*), 15096 any_dup(FPTR2DPTR(void *, dxptr), 15097 proto_perl)); 15098 break; 15099 case SAVEt_REGCONTEXT: 15100 case SAVEt_ALLOC: 15101 ix -= uv >> SAVE_TIGHT_SHIFT; 15102 break; 15103 case SAVEt_AELEM: /* array element */ 15104 sv = (const SV *)POPPTR(ss,ix); 15105 TOPPTR(nss,ix) = SvREFCNT_inc(sv_dup_inc(sv, param)); 15106 iv = POPIV(ss,ix); 15107 TOPIV(nss,ix) = iv; 15108 av = (const AV *)POPPTR(ss,ix); 15109 TOPPTR(nss,ix) = av_dup_inc(av, param); 15110 break; 15111 case SAVEt_OP: 15112 ptr = POPPTR(ss,ix); 15113 TOPPTR(nss,ix) = ptr; 15114 break; 15115 case SAVEt_HINTS_HH: 15116 hv = (const HV *)POPPTR(ss,ix); 15117 TOPPTR(nss,ix) = hv_dup_inc(hv, param); 15118 /* FALLTHROUGH */ 15119 case SAVEt_HINTS: 15120 ptr = POPPTR(ss,ix); 15121 ptr = cophh_copy((COPHH*)ptr); 15122 TOPPTR(nss,ix) = ptr; 15123 i = POPINT(ss,ix); 15124 TOPINT(nss,ix) = i; 15125 break; 15126 case SAVEt_PADSV_AND_MORTALIZE: 15127 longval = (long)POPLONG(ss,ix); 15128 TOPLONG(nss,ix) = longval; 15129 ptr = POPPTR(ss,ix); 15130 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 15131 sv = (const SV *)POPPTR(ss,ix); 15132 TOPPTR(nss,ix) = sv_dup_inc(sv, param); 15133 break; 15134 case SAVEt_SET_SVFLAGS: 15135 i = POPINT(ss,ix); 15136 TOPINT(nss,ix) = i; 15137 i = POPINT(ss,ix); 15138 TOPINT(nss,ix) = i; 15139 sv = (const SV *)POPPTR(ss,ix); 15140 TOPPTR(nss,ix) = sv_dup(sv, param); 15141 break; 15142 case SAVEt_COMPILE_WARNINGS: 15143 ptr = POPPTR(ss,ix); 15144 TOPPTR(nss,ix) = DUP_WARNINGS((STRLEN*)ptr); 15145 break; 15146 case SAVEt_PARSER: 15147 ptr = POPPTR(ss,ix); 15148 TOPPTR(nss,ix) = parser_dup((const yy_parser*)ptr, param); 15149 break; 15150 default: 15151 Perl_croak(aTHX_ 15152 "panic: ss_dup inconsistency (%" IVdf ")", (IV) type); 15153 } 15154 } 15155 15156 return nss; 15157 } 15158 15159 15160 /* if sv is a stash, call $class->CLONE_SKIP(), and set the SVphv_CLONEABLE 15161 * flag to the result. This is done for each stash before cloning starts, 15162 * so we know which stashes want their objects cloned */ 15163 15164 static void 15165 do_mark_cloneable_stash(pTHX_ SV *const sv) 15166 { 15167 const HEK * const hvname = HvNAME_HEK((const HV *)sv); 15168 if (hvname) { 15169 GV* const cloner = gv_fetchmethod_autoload(MUTABLE_HV(sv), "CLONE_SKIP", 0); 15170 SvFLAGS(sv) |= SVphv_CLONEABLE; /* clone objects by default */ 15171 if (cloner && GvCV(cloner)) { 15172 dSP; 15173 UV status; 15174 15175 ENTER; 15176 SAVETMPS; 15177 PUSHMARK(SP); 15178 mXPUSHs(newSVhek(hvname)); 15179 PUTBACK; 15180 call_sv(MUTABLE_SV(GvCV(cloner)), G_SCALAR); 15181 SPAGAIN; 15182 status = POPu; 15183 PUTBACK; 15184 FREETMPS; 15185 LEAVE; 15186 if (status) 15187 SvFLAGS(sv) &= ~SVphv_CLONEABLE; 15188 } 15189 } 15190 } 15191 15192 15193 15194 /* 15195 =for apidoc perl_clone 15196 15197 Create and return a new interpreter by cloning the current one. 15198 15199 C<perl_clone> takes these flags as parameters: 15200 15201 C<CLONEf_COPY_STACKS> - is used to, well, copy the stacks also, 15202 without it we only clone the data and zero the stacks, 15203 with it we copy the stacks and the new perl interpreter is 15204 ready to run at the exact same point as the previous one. 15205 The pseudo-fork code uses C<COPY_STACKS> while the 15206 threads->create doesn't. 15207 15208 C<CLONEf_KEEP_PTR_TABLE> - 15209 C<perl_clone> keeps a ptr_table with the pointer of the old 15210 variable as a key and the new variable as a value, 15211 this allows it to check if something has been cloned and not 15212 clone it again, but rather just use the value and increase the 15213 refcount. 15214 If C<KEEP_PTR_TABLE> is not set then C<perl_clone> will kill the ptr_table 15215 using the function S<C<ptr_table_free(PL_ptr_table); PL_ptr_table = NULL;>>. 15216 A reason to keep it around is if you want to dup some of your own 15217 variables which are outside the graph that perl scans. 15218 15219 C<CLONEf_CLONE_HOST> - 15220 This is a win32 thing, it is ignored on unix, it tells perl's 15221 win32host code (which is c++) to clone itself, this is needed on 15222 win32 if you want to run two threads at the same time, 15223 if you just want to do some stuff in a separate perl interpreter 15224 and then throw it away and return to the original one, 15225 you don't need to do anything. 15226 15227 =cut 15228 */ 15229 15230 /* XXX the above needs expanding by someone who actually understands it ! */ 15231 EXTERN_C PerlInterpreter * 15232 perl_clone_host(PerlInterpreter* proto_perl, UV flags); 15233 15234 PerlInterpreter * 15235 perl_clone(PerlInterpreter *proto_perl, UV flags) 15236 { 15237 #ifdef PERL_IMPLICIT_SYS 15238 15239 PERL_ARGS_ASSERT_PERL_CLONE; 15240 15241 /* perlhost.h so we need to call into it 15242 to clone the host, CPerlHost should have a c interface, sky */ 15243 15244 #ifndef __amigaos4__ 15245 if (flags & CLONEf_CLONE_HOST) { 15246 return perl_clone_host(proto_perl,flags); 15247 } 15248 #endif 15249 return perl_clone_using(proto_perl, flags, 15250 proto_perl->IMem, 15251 proto_perl->IMemShared, 15252 proto_perl->IMemParse, 15253 proto_perl->IEnv, 15254 proto_perl->IStdIO, 15255 proto_perl->ILIO, 15256 proto_perl->IDir, 15257 proto_perl->ISock, 15258 proto_perl->IProc); 15259 } 15260 15261 PerlInterpreter * 15262 perl_clone_using(PerlInterpreter *proto_perl, UV flags, 15263 struct IPerlMem* ipM, struct IPerlMem* ipMS, 15264 struct IPerlMem* ipMP, struct IPerlEnv* ipE, 15265 struct IPerlStdIO* ipStd, struct IPerlLIO* ipLIO, 15266 struct IPerlDir* ipD, struct IPerlSock* ipS, 15267 struct IPerlProc* ipP) 15268 { 15269 /* XXX many of the string copies here can be optimized if they're 15270 * constants; they need to be allocated as common memory and just 15271 * their pointers copied. */ 15272 15273 IV i; 15274 CLONE_PARAMS clone_params; 15275 CLONE_PARAMS* const param = &clone_params; 15276 15277 PerlInterpreter * const my_perl = (PerlInterpreter*)(*ipM->pMalloc)(ipM, sizeof(PerlInterpreter)); 15278 15279 PERL_ARGS_ASSERT_PERL_CLONE_USING; 15280 #else /* !PERL_IMPLICIT_SYS */ 15281 IV i; 15282 CLONE_PARAMS clone_params; 15283 CLONE_PARAMS* param = &clone_params; 15284 PerlInterpreter * const my_perl = (PerlInterpreter*)PerlMem_malloc(sizeof(PerlInterpreter)); 15285 15286 PERL_ARGS_ASSERT_PERL_CLONE; 15287 #endif /* PERL_IMPLICIT_SYS */ 15288 15289 /* for each stash, determine whether its objects should be cloned */ 15290 S_visit(proto_perl, do_mark_cloneable_stash, SVt_PVHV, SVTYPEMASK); 15291 PERL_SET_THX(my_perl); 15292 15293 #ifdef DEBUGGING 15294 PoisonNew(my_perl, 1, PerlInterpreter); 15295 PL_op = NULL; 15296 PL_curcop = NULL; 15297 PL_defstash = NULL; /* may be used by perl malloc() */ 15298 PL_markstack = 0; 15299 PL_scopestack = 0; 15300 PL_scopestack_name = 0; 15301 PL_savestack = 0; 15302 PL_savestack_ix = 0; 15303 PL_savestack_max = -1; 15304 PL_sig_pending = 0; 15305 PL_parser = NULL; 15306 Zero(&PL_debug_pad, 1, struct perl_debug_pad); 15307 Zero(&PL_padname_undef, 1, PADNAME); 15308 Zero(&PL_padname_const, 1, PADNAME); 15309 # ifdef DEBUG_LEAKING_SCALARS 15310 PL_sv_serial = (((UV)my_perl >> 2) & 0xfff) * 1000000; 15311 # endif 15312 # ifdef PERL_TRACE_OPS 15313 Zero(PL_op_exec_cnt, OP_max+2, UV); 15314 # endif 15315 #else /* !DEBUGGING */ 15316 Zero(my_perl, 1, PerlInterpreter); 15317 #endif /* DEBUGGING */ 15318 15319 #ifdef PERL_IMPLICIT_SYS 15320 /* host pointers */ 15321 PL_Mem = ipM; 15322 PL_MemShared = ipMS; 15323 PL_MemParse = ipMP; 15324 PL_Env = ipE; 15325 PL_StdIO = ipStd; 15326 PL_LIO = ipLIO; 15327 PL_Dir = ipD; 15328 PL_Sock = ipS; 15329 PL_Proc = ipP; 15330 #endif /* PERL_IMPLICIT_SYS */ 15331 15332 15333 param->flags = flags; 15334 /* Nothing in the core code uses this, but we make it available to 15335 extensions (using mg_dup). */ 15336 param->proto_perl = proto_perl; 15337 /* Likely nothing will use this, but it is initialised to be consistent 15338 with Perl_clone_params_new(). */ 15339 param->new_perl = my_perl; 15340 param->unreferenced = NULL; 15341 15342 15343 INIT_TRACK_MEMPOOL(my_perl->Imemory_debug_header, my_perl); 15344 15345 PL_body_arenas = NULL; 15346 Zero(&PL_body_roots, 1, PL_body_roots); 15347 15348 PL_sv_count = 0; 15349 PL_sv_root = NULL; 15350 PL_sv_arenaroot = NULL; 15351 15352 PL_debug = proto_perl->Idebug; 15353 15354 /* dbargs array probably holds garbage */ 15355 PL_dbargs = NULL; 15356 15357 PL_compiling = proto_perl->Icompiling; 15358 15359 /* pseudo environmental stuff */ 15360 PL_origargc = proto_perl->Iorigargc; 15361 PL_origargv = proto_perl->Iorigargv; 15362 15363 #ifndef NO_TAINT_SUPPORT 15364 /* Set tainting stuff before PerlIO_debug can possibly get called */ 15365 PL_tainting = proto_perl->Itainting; 15366 PL_taint_warn = proto_perl->Itaint_warn; 15367 #else 15368 PL_tainting = FALSE; 15369 PL_taint_warn = FALSE; 15370 #endif 15371 15372 PL_minus_c = proto_perl->Iminus_c; 15373 15374 PL_localpatches = proto_perl->Ilocalpatches; 15375 PL_splitstr = proto_perl->Isplitstr; 15376 PL_minus_n = proto_perl->Iminus_n; 15377 PL_minus_p = proto_perl->Iminus_p; 15378 PL_minus_l = proto_perl->Iminus_l; 15379 PL_minus_a = proto_perl->Iminus_a; 15380 PL_minus_E = proto_perl->Iminus_E; 15381 PL_minus_F = proto_perl->Iminus_F; 15382 PL_doswitches = proto_perl->Idoswitches; 15383 PL_dowarn = proto_perl->Idowarn; 15384 #ifdef PERL_SAWAMPERSAND 15385 PL_sawampersand = proto_perl->Isawampersand; 15386 #endif 15387 PL_unsafe = proto_perl->Iunsafe; 15388 PL_perldb = proto_perl->Iperldb; 15389 PL_perl_destruct_level = proto_perl->Iperl_destruct_level; 15390 PL_exit_flags = proto_perl->Iexit_flags; 15391 15392 /* XXX time(&PL_basetime) when asked for? */ 15393 PL_basetime = proto_perl->Ibasetime; 15394 15395 PL_maxsysfd = proto_perl->Imaxsysfd; 15396 PL_statusvalue = proto_perl->Istatusvalue; 15397 #ifdef __VMS 15398 PL_statusvalue_vms = proto_perl->Istatusvalue_vms; 15399 #else 15400 PL_statusvalue_posix = proto_perl->Istatusvalue_posix; 15401 #endif 15402 15403 /* RE engine related */ 15404 PL_regmatch_slab = NULL; 15405 PL_reg_curpm = NULL; 15406 15407 PL_sub_generation = proto_perl->Isub_generation; 15408 15409 /* funky return mechanisms */ 15410 PL_forkprocess = proto_perl->Iforkprocess; 15411 15412 /* internal state */ 15413 PL_main_start = proto_perl->Imain_start; 15414 PL_eval_root = proto_perl->Ieval_root; 15415 PL_eval_start = proto_perl->Ieval_start; 15416 15417 PL_filemode = proto_perl->Ifilemode; 15418 PL_lastfd = proto_perl->Ilastfd; 15419 PL_oldname = proto_perl->Ioldname; /* XXX not quite right */ 15420 PL_gensym = proto_perl->Igensym; 15421 15422 PL_laststatval = proto_perl->Ilaststatval; 15423 PL_laststype = proto_perl->Ilaststype; 15424 PL_mess_sv = NULL; 15425 15426 PL_profiledata = NULL; 15427 15428 PL_generation = proto_perl->Igeneration; 15429 15430 PL_in_clean_objs = proto_perl->Iin_clean_objs; 15431 PL_in_clean_all = proto_perl->Iin_clean_all; 15432 15433 PL_delaymagic_uid = proto_perl->Idelaymagic_uid; 15434 PL_delaymagic_euid = proto_perl->Idelaymagic_euid; 15435 PL_delaymagic_gid = proto_perl->Idelaymagic_gid; 15436 PL_delaymagic_egid = proto_perl->Idelaymagic_egid; 15437 PL_nomemok = proto_perl->Inomemok; 15438 PL_an = proto_perl->Ian; 15439 PL_evalseq = proto_perl->Ievalseq; 15440 PL_origenviron = proto_perl->Iorigenviron; /* XXX not quite right */ 15441 PL_origalen = proto_perl->Iorigalen; 15442 15443 PL_sighandlerp = proto_perl->Isighandlerp; 15444 PL_sighandler1p = proto_perl->Isighandler1p; 15445 PL_sighandler3p = proto_perl->Isighandler3p; 15446 15447 PL_runops = proto_perl->Irunops; 15448 15449 PL_subline = proto_perl->Isubline; 15450 15451 PL_cv_has_eval = proto_perl->Icv_has_eval; 15452 15453 #ifdef USE_LOCALE_COLLATE 15454 PL_collation_ix = proto_perl->Icollation_ix; 15455 PL_collation_standard = proto_perl->Icollation_standard; 15456 PL_collxfrm_base = proto_perl->Icollxfrm_base; 15457 PL_collxfrm_mult = proto_perl->Icollxfrm_mult; 15458 PL_strxfrm_max_cp = proto_perl->Istrxfrm_max_cp; 15459 #endif /* USE_LOCALE_COLLATE */ 15460 15461 #ifdef USE_LOCALE_NUMERIC 15462 PL_numeric_standard = proto_perl->Inumeric_standard; 15463 PL_numeric_underlying = proto_perl->Inumeric_underlying; 15464 PL_numeric_underlying_is_standard = proto_perl->Inumeric_underlying_is_standard; 15465 #endif /* !USE_LOCALE_NUMERIC */ 15466 15467 /* Did the locale setup indicate UTF-8? */ 15468 PL_utf8locale = proto_perl->Iutf8locale; 15469 PL_in_utf8_CTYPE_locale = proto_perl->Iin_utf8_CTYPE_locale; 15470 PL_in_utf8_COLLATE_locale = proto_perl->Iin_utf8_COLLATE_locale; 15471 my_strlcpy(PL_locale_utf8ness, proto_perl->Ilocale_utf8ness, sizeof(PL_locale_utf8ness)); 15472 #if defined(USE_ITHREADS) && ! defined(USE_THREAD_SAFE_LOCALE) 15473 PL_lc_numeric_mutex_depth = 0; 15474 #endif 15475 /* Unicode features (see perlrun/-C) */ 15476 PL_unicode = proto_perl->Iunicode; 15477 15478 /* Pre-5.8 signals control */ 15479 PL_signals = proto_perl->Isignals; 15480 15481 /* times() ticks per second */ 15482 PL_clocktick = proto_perl->Iclocktick; 15483 15484 /* Recursion stopper for PerlIO_find_layer */ 15485 PL_in_load_module = proto_perl->Iin_load_module; 15486 15487 /* Not really needed/useful since the reenrant_retint is "volatile", 15488 * but do it for consistency's sake. */ 15489 PL_reentrant_retint = proto_perl->Ireentrant_retint; 15490 15491 /* Hooks to shared SVs and locks. */ 15492 PL_sharehook = proto_perl->Isharehook; 15493 PL_lockhook = proto_perl->Ilockhook; 15494 PL_unlockhook = proto_perl->Iunlockhook; 15495 PL_threadhook = proto_perl->Ithreadhook; 15496 PL_destroyhook = proto_perl->Idestroyhook; 15497 PL_signalhook = proto_perl->Isignalhook; 15498 15499 PL_globhook = proto_perl->Iglobhook; 15500 15501 PL_srand_called = proto_perl->Isrand_called; 15502 Copy(&(proto_perl->Irandom_state), &PL_random_state, 1, PL_RANDOM_STATE_TYPE); 15503 15504 if (flags & CLONEf_COPY_STACKS) { 15505 /* next allocation will be PL_tmps_stack[PL_tmps_ix+1] */ 15506 PL_tmps_ix = proto_perl->Itmps_ix; 15507 PL_tmps_max = proto_perl->Itmps_max; 15508 PL_tmps_floor = proto_perl->Itmps_floor; 15509 15510 /* next push_scope()/ENTER sets PL_scopestack[PL_scopestack_ix] 15511 * NOTE: unlike the others! */ 15512 PL_scopestack_ix = proto_perl->Iscopestack_ix; 15513 PL_scopestack_max = proto_perl->Iscopestack_max; 15514 15515 /* next SSPUSHFOO() sets PL_savestack[PL_savestack_ix] 15516 * NOTE: unlike the others! */ 15517 PL_savestack_ix = proto_perl->Isavestack_ix; 15518 PL_savestack_max = proto_perl->Isavestack_max; 15519 } 15520 15521 PL_start_env = proto_perl->Istart_env; /* XXXXXX */ 15522 PL_top_env = &PL_start_env; 15523 15524 PL_op = proto_perl->Iop; 15525 15526 PL_Sv = NULL; 15527 PL_Xpv = (XPV*)NULL; 15528 my_perl->Ina = proto_perl->Ina; 15529 15530 PL_statcache = proto_perl->Istatcache; 15531 15532 #ifndef NO_TAINT_SUPPORT 15533 PL_tainted = proto_perl->Itainted; 15534 #else 15535 PL_tainted = FALSE; 15536 #endif 15537 PL_curpm = proto_perl->Icurpm; /* XXX No PMOP ref count */ 15538 15539 PL_chopset = proto_perl->Ichopset; /* XXX never deallocated */ 15540 15541 PL_restartjmpenv = proto_perl->Irestartjmpenv; 15542 PL_restartop = proto_perl->Irestartop; 15543 PL_in_eval = proto_perl->Iin_eval; 15544 PL_delaymagic = proto_perl->Idelaymagic; 15545 PL_phase = proto_perl->Iphase; 15546 PL_localizing = proto_perl->Ilocalizing; 15547 15548 PL_hv_fetch_ent_mh = NULL; 15549 PL_modcount = proto_perl->Imodcount; 15550 PL_lastgotoprobe = NULL; 15551 PL_dumpindent = proto_perl->Idumpindent; 15552 15553 PL_efloatbuf = NULL; /* reinits on demand */ 15554 PL_efloatsize = 0; /* reinits on demand */ 15555 15556 /* regex stuff */ 15557 15558 PL_colorset = 0; /* reinits PL_colors[] */ 15559 /*PL_colors[6] = {0,0,0,0,0,0};*/ 15560 15561 /* Pluggable optimizer */ 15562 PL_peepp = proto_perl->Ipeepp; 15563 PL_rpeepp = proto_perl->Irpeepp; 15564 /* op_free() hook */ 15565 PL_opfreehook = proto_perl->Iopfreehook; 15566 15567 # ifdef PERL_MEM_LOG 15568 Zero(PL_mem_log, sizeof(PL_mem_log), char); 15569 # endif 15570 15571 #ifdef USE_REENTRANT_API 15572 /* XXX: things like -Dm will segfault here in perlio, but doing 15573 * PERL_SET_CONTEXT(proto_perl); 15574 * breaks too many other things 15575 */ 15576 Perl_reentrant_init(aTHX); 15577 #endif 15578 15579 /* create SV map for pointer relocation */ 15580 PL_ptr_table = ptr_table_new(); 15581 15582 /* initialize these special pointers as early as possible */ 15583 init_constants(); 15584 ptr_table_store(PL_ptr_table, &proto_perl->Isv_undef, &PL_sv_undef); 15585 ptr_table_store(PL_ptr_table, &proto_perl->Isv_no, &PL_sv_no); 15586 ptr_table_store(PL_ptr_table, &proto_perl->Isv_zero, &PL_sv_zero); 15587 ptr_table_store(PL_ptr_table, &proto_perl->Isv_yes, &PL_sv_yes); 15588 ptr_table_store(PL_ptr_table, &proto_perl->Ipadname_const, 15589 &PL_padname_const); 15590 15591 /* create (a non-shared!) shared string table */ 15592 PL_strtab = newHV(); 15593 HvSHAREKEYS_off(PL_strtab); 15594 hv_ksplit(PL_strtab, HvTOTALKEYS(proto_perl->Istrtab)); 15595 ptr_table_store(PL_ptr_table, proto_perl->Istrtab, PL_strtab); 15596 15597 Zero(PL_sv_consts, SV_CONSTS_COUNT, SV*); 15598 15599 /* This PV will be free'd special way so must set it same way op.c does */ 15600 PL_compiling.cop_file = savesharedpv(PL_compiling.cop_file); 15601 ptr_table_store(PL_ptr_table, proto_perl->Icompiling.cop_file, PL_compiling.cop_file); 15602 15603 ptr_table_store(PL_ptr_table, &proto_perl->Icompiling, &PL_compiling); 15604 PL_compiling.cop_warnings = DUP_WARNINGS(PL_compiling.cop_warnings); 15605 CopHINTHASH_set(&PL_compiling, cophh_copy(CopHINTHASH_get(&PL_compiling))); 15606 PL_curcop = (COP*)any_dup(proto_perl->Icurcop, proto_perl); 15607 15608 param->stashes = newAV(); /* Setup array of objects to call clone on */ 15609 /* This makes no difference to the implementation, as it always pushes 15610 and shifts pointers to other SVs without changing their reference 15611 count, with the array becoming empty before it is freed. However, it 15612 makes it conceptually clear what is going on, and will avoid some 15613 work inside av.c, filling slots between AvFILL() and AvMAX() with 15614 &PL_sv_undef, and SvREFCNT_dec()ing those. */ 15615 AvREAL_off(param->stashes); 15616 15617 if (!(flags & CLONEf_COPY_STACKS)) { 15618 param->unreferenced = newAV(); 15619 } 15620 15621 #ifdef PERLIO_LAYERS 15622 /* Clone PerlIO tables as soon as we can handle general xx_dup() */ 15623 PerlIO_clone(aTHX_ proto_perl, param); 15624 #endif 15625 15626 PL_envgv = gv_dup_inc(proto_perl->Ienvgv, param); 15627 PL_incgv = gv_dup_inc(proto_perl->Iincgv, param); 15628 PL_hintgv = gv_dup_inc(proto_perl->Ihintgv, param); 15629 PL_origfilename = SAVEPV(proto_perl->Iorigfilename); 15630 PL_xsubfilename = proto_perl->Ixsubfilename; 15631 PL_diehook = sv_dup_inc(proto_perl->Idiehook, param); 15632 PL_warnhook = sv_dup_inc(proto_perl->Iwarnhook, param); 15633 15634 /* switches */ 15635 PL_patchlevel = sv_dup_inc(proto_perl->Ipatchlevel, param); 15636 PL_inplace = SAVEPV(proto_perl->Iinplace); 15637 PL_e_script = sv_dup_inc(proto_perl->Ie_script, param); 15638 15639 /* magical thingies */ 15640 15641 SvPVCLEAR(PERL_DEBUG_PAD(0)); /* For regex debugging. */ 15642 SvPVCLEAR(PERL_DEBUG_PAD(1)); /* ext/re needs these */ 15643 SvPVCLEAR(PERL_DEBUG_PAD(2)); /* even without DEBUGGING. */ 15644 15645 15646 /* Clone the regex array */ 15647 /* ORANGE FIXME for plugins, probably in the SV dup code. 15648 newSViv(PTR2IV(CALLREGDUPE( 15649 INT2PTR(REGEXP *, SvIVX(regex)), param)))) 15650 */ 15651 PL_regex_padav = av_dup_inc(proto_perl->Iregex_padav, param); 15652 PL_regex_pad = AvARRAY(PL_regex_padav); 15653 15654 PL_stashpadmax = proto_perl->Istashpadmax; 15655 PL_stashpadix = proto_perl->Istashpadix ; 15656 Newx(PL_stashpad, PL_stashpadmax, HV *); 15657 { 15658 PADOFFSET o = 0; 15659 for (; o < PL_stashpadmax; ++o) 15660 PL_stashpad[o] = hv_dup(proto_perl->Istashpad[o], param); 15661 } 15662 15663 /* shortcuts to various I/O objects */ 15664 PL_ofsgv = gv_dup_inc(proto_perl->Iofsgv, param); 15665 PL_stdingv = gv_dup(proto_perl->Istdingv, param); 15666 PL_stderrgv = gv_dup(proto_perl->Istderrgv, param); 15667 PL_defgv = gv_dup(proto_perl->Idefgv, param); 15668 PL_argvgv = gv_dup_inc(proto_perl->Iargvgv, param); 15669 PL_argvoutgv = gv_dup(proto_perl->Iargvoutgv, param); 15670 PL_argvout_stack = av_dup_inc(proto_perl->Iargvout_stack, param); 15671 15672 /* shortcuts to regexp stuff */ 15673 PL_replgv = gv_dup_inc(proto_perl->Ireplgv, param); 15674 15675 /* shortcuts to misc objects */ 15676 PL_errgv = gv_dup(proto_perl->Ierrgv, param); 15677 15678 /* shortcuts to debugging objects */ 15679 PL_DBgv = gv_dup_inc(proto_perl->IDBgv, param); 15680 PL_DBline = gv_dup_inc(proto_perl->IDBline, param); 15681 PL_DBsub = gv_dup_inc(proto_perl->IDBsub, param); 15682 PL_DBsingle = sv_dup(proto_perl->IDBsingle, param); 15683 PL_DBtrace = sv_dup(proto_perl->IDBtrace, param); 15684 PL_DBsignal = sv_dup(proto_perl->IDBsignal, param); 15685 Copy(proto_perl->IDBcontrol, PL_DBcontrol, DBVARMG_COUNT, IV); 15686 15687 /* symbol tables */ 15688 PL_defstash = hv_dup_inc(proto_perl->Idefstash, param); 15689 PL_curstash = hv_dup_inc(proto_perl->Icurstash, param); 15690 PL_debstash = hv_dup(proto_perl->Idebstash, param); 15691 PL_globalstash = hv_dup(proto_perl->Iglobalstash, param); 15692 PL_curstname = sv_dup_inc(proto_perl->Icurstname, param); 15693 15694 PL_beginav = av_dup_inc(proto_perl->Ibeginav, param); 15695 PL_beginav_save = av_dup_inc(proto_perl->Ibeginav_save, param); 15696 PL_checkav_save = av_dup_inc(proto_perl->Icheckav_save, param); 15697 PL_unitcheckav = av_dup_inc(proto_perl->Iunitcheckav, param); 15698 PL_unitcheckav_save = av_dup_inc(proto_perl->Iunitcheckav_save, param); 15699 PL_endav = av_dup_inc(proto_perl->Iendav, param); 15700 PL_checkav = av_dup_inc(proto_perl->Icheckav, param); 15701 PL_initav = av_dup_inc(proto_perl->Iinitav, param); 15702 PL_savebegin = proto_perl->Isavebegin; 15703 15704 PL_isarev = hv_dup_inc(proto_perl->Iisarev, param); 15705 15706 /* subprocess state */ 15707 PL_fdpid = av_dup_inc(proto_perl->Ifdpid, param); 15708 15709 if (proto_perl->Iop_mask) 15710 PL_op_mask = SAVEPVN(proto_perl->Iop_mask, PL_maxo); 15711 else 15712 PL_op_mask = NULL; 15713 /* PL_asserting = proto_perl->Iasserting; */ 15714 15715 /* current interpreter roots */ 15716 PL_main_cv = cv_dup_inc(proto_perl->Imain_cv, param); 15717 OP_REFCNT_LOCK; 15718 PL_main_root = OpREFCNT_inc(proto_perl->Imain_root); 15719 OP_REFCNT_UNLOCK; 15720 15721 /* runtime control stuff */ 15722 PL_curcopdb = (COP*)any_dup(proto_perl->Icurcopdb, proto_perl); 15723 15724 PL_preambleav = av_dup_inc(proto_perl->Ipreambleav, param); 15725 15726 PL_ors_sv = sv_dup_inc(proto_perl->Iors_sv, param); 15727 15728 /* interpreter atexit processing */ 15729 PL_exitlistlen = proto_perl->Iexitlistlen; 15730 if (PL_exitlistlen) { 15731 Newx(PL_exitlist, PL_exitlistlen, PerlExitListEntry); 15732 Copy(proto_perl->Iexitlist, PL_exitlist, PL_exitlistlen, PerlExitListEntry); 15733 } 15734 else 15735 PL_exitlist = (PerlExitListEntry*)NULL; 15736 15737 PL_my_cxt_size = proto_perl->Imy_cxt_size; 15738 if (PL_my_cxt_size) { 15739 Newx(PL_my_cxt_list, PL_my_cxt_size, void *); 15740 Copy(proto_perl->Imy_cxt_list, PL_my_cxt_list, PL_my_cxt_size, void *); 15741 } 15742 else { 15743 PL_my_cxt_list = (void**)NULL; 15744 } 15745 PL_modglobal = hv_dup_inc(proto_perl->Imodglobal, param); 15746 PL_custom_op_names = hv_dup_inc(proto_perl->Icustom_op_names,param); 15747 PL_custom_op_descs = hv_dup_inc(proto_perl->Icustom_op_descs,param); 15748 PL_custom_ops = hv_dup_inc(proto_perl->Icustom_ops, param); 15749 15750 PL_compcv = cv_dup(proto_perl->Icompcv, param); 15751 15752 PAD_CLONE_VARS(proto_perl, param); 15753 15754 #ifdef HAVE_INTERP_INTERN 15755 sys_intern_dup(&proto_perl->Isys_intern, &PL_sys_intern); 15756 #endif 15757 15758 PL_DBcv = cv_dup(proto_perl->IDBcv, param); 15759 15760 #ifdef PERL_USES_PL_PIDSTATUS 15761 PL_pidstatus = newHV(); /* XXX flag for cloning? */ 15762 #endif 15763 PL_osname = SAVEPV(proto_perl->Iosname); 15764 PL_parser = parser_dup(proto_perl->Iparser, param); 15765 15766 /* XXX this only works if the saved cop has already been cloned */ 15767 if (proto_perl->Iparser) { 15768 PL_parser->saved_curcop = (COP*)any_dup( 15769 proto_perl->Iparser->saved_curcop, 15770 proto_perl); 15771 } 15772 15773 PL_subname = sv_dup_inc(proto_perl->Isubname, param); 15774 15775 #if defined(USE_POSIX_2008_LOCALE) \ 15776 && defined(USE_THREAD_SAFE_LOCALE) \ 15777 && ! defined(HAS_QUERYLOCALE) 15778 for (i = 0; i < (int) C_ARRAY_LENGTH(PL_curlocales); i++) { 15779 PL_curlocales[i] = savepv("."); /* An illegal value */ 15780 } 15781 #endif 15782 #ifdef USE_LOCALE_CTYPE 15783 /* Should we warn if uses locale? */ 15784 PL_warn_locale = sv_dup_inc(proto_perl->Iwarn_locale, param); 15785 #endif 15786 15787 #ifdef USE_LOCALE_COLLATE 15788 PL_collation_name = SAVEPV(proto_perl->Icollation_name); 15789 #endif /* USE_LOCALE_COLLATE */ 15790 15791 #ifdef USE_LOCALE_NUMERIC 15792 PL_numeric_name = SAVEPV(proto_perl->Inumeric_name); 15793 PL_numeric_radix_sv = sv_dup_inc(proto_perl->Inumeric_radix_sv, param); 15794 15795 # if defined(HAS_POSIX_2008_LOCALE) 15796 PL_underlying_numeric_obj = NULL; 15797 # endif 15798 #endif /* !USE_LOCALE_NUMERIC */ 15799 15800 #ifdef HAS_MBRLEN 15801 PL_mbrlen_ps = proto_perl->Imbrlen_ps; 15802 #endif 15803 #ifdef HAS_MBRTOWC 15804 PL_mbrtowc_ps = proto_perl->Imbrtowc_ps; 15805 #endif 15806 #ifdef HAS_WCRTOMB 15807 PL_wcrtomb_ps = proto_perl->Iwcrtomb_ps; 15808 #endif 15809 15810 PL_langinfo_buf = NULL; 15811 PL_langinfo_bufsize = 0; 15812 15813 PL_setlocale_buf = NULL; 15814 PL_setlocale_bufsize = 0; 15815 15816 /* Unicode inversion lists */ 15817 15818 PL_AboveLatin1 = sv_dup_inc(proto_perl->IAboveLatin1, param); 15819 PL_Assigned_invlist = sv_dup_inc(proto_perl->IAssigned_invlist, param); 15820 PL_GCB_invlist = sv_dup_inc(proto_perl->IGCB_invlist, param); 15821 PL_HasMultiCharFold = sv_dup_inc(proto_perl->IHasMultiCharFold, param); 15822 PL_InMultiCharFold = sv_dup_inc(proto_perl->IInMultiCharFold, param); 15823 PL_Latin1 = sv_dup_inc(proto_perl->ILatin1, param); 15824 PL_LB_invlist = sv_dup_inc(proto_perl->ILB_invlist, param); 15825 PL_SB_invlist = sv_dup_inc(proto_perl->ISB_invlist, param); 15826 PL_SCX_invlist = sv_dup_inc(proto_perl->ISCX_invlist, param); 15827 PL_UpperLatin1 = sv_dup_inc(proto_perl->IUpperLatin1, param); 15828 PL_in_some_fold = sv_dup_inc(proto_perl->Iin_some_fold, param); 15829 PL_utf8_foldclosures = sv_dup_inc(proto_perl->Iutf8_foldclosures, param); 15830 PL_utf8_idcont = sv_dup_inc(proto_perl->Iutf8_idcont, param); 15831 PL_utf8_idstart = sv_dup_inc(proto_perl->Iutf8_idstart, param); 15832 PL_utf8_perl_idcont = sv_dup_inc(proto_perl->Iutf8_perl_idcont, param); 15833 PL_utf8_perl_idstart = sv_dup_inc(proto_perl->Iutf8_perl_idstart, param); 15834 PL_utf8_xidcont = sv_dup_inc(proto_perl->Iutf8_xidcont, param); 15835 PL_utf8_xidstart = sv_dup_inc(proto_perl->Iutf8_xidstart, param); 15836 PL_WB_invlist = sv_dup_inc(proto_perl->IWB_invlist, param); 15837 for (i = 0; i < POSIX_CC_COUNT; i++) { 15838 PL_XPosix_ptrs[i] = sv_dup_inc(proto_perl->IXPosix_ptrs[i], param); 15839 if (i != _CC_CASED && i != _CC_VERTSPACE) { 15840 PL_Posix_ptrs[i] = sv_dup_inc(proto_perl->IPosix_ptrs[i], param); 15841 } 15842 } 15843 PL_Posix_ptrs[_CC_CASED] = PL_Posix_ptrs[_CC_ALPHA]; 15844 PL_Posix_ptrs[_CC_VERTSPACE] = NULL; 15845 15846 PL_utf8_toupper = sv_dup_inc(proto_perl->Iutf8_toupper, param); 15847 PL_utf8_totitle = sv_dup_inc(proto_perl->Iutf8_totitle, param); 15848 PL_utf8_tolower = sv_dup_inc(proto_perl->Iutf8_tolower, param); 15849 PL_utf8_tofold = sv_dup_inc(proto_perl->Iutf8_tofold, param); 15850 PL_utf8_tosimplefold = sv_dup_inc(proto_perl->Iutf8_tosimplefold, param); 15851 PL_utf8_charname_begin = sv_dup_inc(proto_perl->Iutf8_charname_begin, param); 15852 PL_utf8_charname_continue = sv_dup_inc(proto_perl->Iutf8_charname_continue, param); 15853 PL_utf8_mark = sv_dup_inc(proto_perl->Iutf8_mark, param); 15854 PL_InBitmap = sv_dup_inc(proto_perl->IInBitmap, param); 15855 PL_CCC_non0_non230 = sv_dup_inc(proto_perl->ICCC_non0_non230, param); 15856 PL_Private_Use = sv_dup_inc(proto_perl->IPrivate_Use, param); 15857 15858 #if 0 15859 PL_seen_deprecated_macro = hv_dup_inc(proto_perl->Iseen_deprecated_macro, param); 15860 #endif 15861 15862 if (proto_perl->Ipsig_pend) { 15863 Newxz(PL_psig_pend, SIG_SIZE, int); 15864 } 15865 else { 15866 PL_psig_pend = (int*)NULL; 15867 } 15868 15869 if (proto_perl->Ipsig_name) { 15870 Newx(PL_psig_name, 2 * SIG_SIZE, SV*); 15871 sv_dup_inc_multiple(proto_perl->Ipsig_name, PL_psig_name, 2 * SIG_SIZE, 15872 param); 15873 PL_psig_ptr = PL_psig_name + SIG_SIZE; 15874 } 15875 else { 15876 PL_psig_ptr = (SV**)NULL; 15877 PL_psig_name = (SV**)NULL; 15878 } 15879 15880 if (flags & CLONEf_COPY_STACKS) { 15881 Newx(PL_tmps_stack, PL_tmps_max, SV*); 15882 sv_dup_inc_multiple(proto_perl->Itmps_stack, PL_tmps_stack, 15883 PL_tmps_ix+1, param); 15884 15885 /* next PUSHMARK() sets *(PL_markstack_ptr+1) */ 15886 i = proto_perl->Imarkstack_max - proto_perl->Imarkstack; 15887 Newx(PL_markstack, i, I32); 15888 PL_markstack_max = PL_markstack + (proto_perl->Imarkstack_max 15889 - proto_perl->Imarkstack); 15890 PL_markstack_ptr = PL_markstack + (proto_perl->Imarkstack_ptr 15891 - proto_perl->Imarkstack); 15892 Copy(proto_perl->Imarkstack, PL_markstack, 15893 PL_markstack_ptr - PL_markstack + 1, I32); 15894 15895 /* next push_scope()/ENTER sets PL_scopestack[PL_scopestack_ix] 15896 * NOTE: unlike the others! */ 15897 Newx(PL_scopestack, PL_scopestack_max, I32); 15898 Copy(proto_perl->Iscopestack, PL_scopestack, PL_scopestack_ix, I32); 15899 15900 #ifdef DEBUGGING 15901 Newx(PL_scopestack_name, PL_scopestack_max, const char *); 15902 Copy(proto_perl->Iscopestack_name, PL_scopestack_name, PL_scopestack_ix, const char *); 15903 #endif 15904 /* reset stack AV to correct length before its duped via 15905 * PL_curstackinfo */ 15906 AvFILLp(proto_perl->Icurstack) = 15907 proto_perl->Istack_sp - proto_perl->Istack_base; 15908 15909 /* NOTE: si_dup() looks at PL_markstack */ 15910 PL_curstackinfo = si_dup(proto_perl->Icurstackinfo, param); 15911 15912 /* PL_curstack = PL_curstackinfo->si_stack; */ 15913 PL_curstack = av_dup(proto_perl->Icurstack, param); 15914 PL_mainstack = av_dup(proto_perl->Imainstack, param); 15915 15916 /* next PUSHs() etc. set *(PL_stack_sp+1) */ 15917 PL_stack_base = AvARRAY(PL_curstack); 15918 PL_stack_sp = PL_stack_base + (proto_perl->Istack_sp 15919 - proto_perl->Istack_base); 15920 PL_stack_max = PL_stack_base + AvMAX(PL_curstack); 15921 15922 /*Newxz(PL_savestack, PL_savestack_max, ANY);*/ 15923 PL_savestack = ss_dup(proto_perl, param); 15924 } 15925 else { 15926 init_stacks(); 15927 ENTER; /* perl_destruct() wants to LEAVE; */ 15928 } 15929 15930 PL_statgv = gv_dup(proto_perl->Istatgv, param); 15931 PL_statname = sv_dup_inc(proto_perl->Istatname, param); 15932 15933 PL_rs = sv_dup_inc(proto_perl->Irs, param); 15934 PL_last_in_gv = gv_dup(proto_perl->Ilast_in_gv, param); 15935 PL_defoutgv = gv_dup_inc(proto_perl->Idefoutgv, param); 15936 PL_toptarget = sv_dup_inc(proto_perl->Itoptarget, param); 15937 PL_bodytarget = sv_dup_inc(proto_perl->Ibodytarget, param); 15938 PL_formtarget = sv_dup(proto_perl->Iformtarget, param); 15939 15940 PL_errors = sv_dup_inc(proto_perl->Ierrors, param); 15941 15942 PL_sortcop = (OP*)any_dup(proto_perl->Isortcop, proto_perl); 15943 PL_firstgv = gv_dup_inc(proto_perl->Ifirstgv, param); 15944 PL_secondgv = gv_dup_inc(proto_perl->Isecondgv, param); 15945 15946 PL_stashcache = newHV(); 15947 15948 PL_watchaddr = (char **) ptr_table_fetch(PL_ptr_table, 15949 proto_perl->Iwatchaddr); 15950 PL_watchok = PL_watchaddr ? * PL_watchaddr : NULL; 15951 if (PL_debug && PL_watchaddr) { 15952 PerlIO_printf(Perl_debug_log, 15953 "WATCHING: %" UVxf " cloned as %" UVxf " with value %" UVxf "\n", 15954 PTR2UV(proto_perl->Iwatchaddr), PTR2UV(PL_watchaddr), 15955 PTR2UV(PL_watchok)); 15956 } 15957 15958 PL_registered_mros = hv_dup_inc(proto_perl->Iregistered_mros, param); 15959 PL_blockhooks = av_dup_inc(proto_perl->Iblockhooks, param); 15960 15961 /* Call the ->CLONE method, if it exists, for each of the stashes 15962 identified by sv_dup() above. 15963 */ 15964 while(av_count(param->stashes) != 0) { 15965 HV* const stash = MUTABLE_HV(av_shift(param->stashes)); 15966 GV* const cloner = gv_fetchmethod_autoload(stash, "CLONE", 0); 15967 if (cloner && GvCV(cloner)) { 15968 dSP; 15969 ENTER; 15970 SAVETMPS; 15971 PUSHMARK(SP); 15972 mXPUSHs(newSVhek(HvNAME_HEK(stash))); 15973 PUTBACK; 15974 call_sv(MUTABLE_SV(GvCV(cloner)), G_DISCARD); 15975 FREETMPS; 15976 LEAVE; 15977 } 15978 } 15979 15980 if (!(flags & CLONEf_KEEP_PTR_TABLE)) { 15981 ptr_table_free(PL_ptr_table); 15982 PL_ptr_table = NULL; 15983 } 15984 15985 if (!(flags & CLONEf_COPY_STACKS)) { 15986 unreferenced_to_tmp_stack(param->unreferenced); 15987 } 15988 15989 SvREFCNT_dec(param->stashes); 15990 15991 /* orphaned? eg threads->new inside BEGIN or use */ 15992 if (PL_compcv && ! SvREFCNT(PL_compcv)) { 15993 SvREFCNT_inc_simple_void(PL_compcv); 15994 SAVEFREESV(PL_compcv); 15995 } 15996 15997 return my_perl; 15998 } 15999 16000 static void 16001 S_unreferenced_to_tmp_stack(pTHX_ AV *const unreferenced) 16002 { 16003 PERL_ARGS_ASSERT_UNREFERENCED_TO_TMP_STACK; 16004 16005 if (AvFILLp(unreferenced) > -1) { 16006 SV **svp = AvARRAY(unreferenced); 16007 SV **const last = svp + AvFILLp(unreferenced); 16008 SSize_t count = 0; 16009 16010 do { 16011 if (SvREFCNT(*svp) == 1) 16012 ++count; 16013 } while (++svp <= last); 16014 16015 EXTEND_MORTAL(count); 16016 svp = AvARRAY(unreferenced); 16017 16018 do { 16019 if (SvREFCNT(*svp) == 1) { 16020 /* Our reference is the only one to this SV. This means that 16021 in this thread, the scalar effectively has a 0 reference. 16022 That doesn't work (cleanup never happens), so donate our 16023 reference to it onto the save stack. */ 16024 PL_tmps_stack[++PL_tmps_ix] = *svp; 16025 } else { 16026 /* As an optimisation, because we are already walking the 16027 entire array, instead of above doing either 16028 SvREFCNT_inc(*svp) or *svp = &PL_sv_undef, we can instead 16029 release our reference to the scalar, so that at the end of 16030 the array owns zero references to the scalars it happens to 16031 point to. We are effectively converting the array from 16032 AvREAL() on to AvREAL() off. This saves the av_clear() 16033 (triggered by the SvREFCNT_dec(unreferenced) below) from 16034 walking the array a second time. */ 16035 SvREFCNT_dec(*svp); 16036 } 16037 16038 } while (++svp <= last); 16039 AvREAL_off(unreferenced); 16040 } 16041 SvREFCNT_dec_NN(unreferenced); 16042 } 16043 16044 void 16045 Perl_clone_params_del(CLONE_PARAMS *param) 16046 { 16047 PerlInterpreter *const was = PERL_GET_THX; 16048 PerlInterpreter *const to = param->new_perl; 16049 dTHXa(to); 16050 16051 PERL_ARGS_ASSERT_CLONE_PARAMS_DEL; 16052 16053 if (was != to) { 16054 PERL_SET_THX(to); 16055 } 16056 16057 SvREFCNT_dec(param->stashes); 16058 if (param->unreferenced) 16059 unreferenced_to_tmp_stack(param->unreferenced); 16060 16061 Safefree(param); 16062 16063 if (was != to) { 16064 PERL_SET_THX(was); 16065 } 16066 } 16067 16068 CLONE_PARAMS * 16069 Perl_clone_params_new(PerlInterpreter *const from, PerlInterpreter *const to) 16070 { 16071 /* Need to play this game, as newAV() can call safesysmalloc(), and that 16072 does a dTHX; to get the context from thread local storage. 16073 FIXME - under PERL_CORE Newx(), Safefree() and friends should expand to 16074 a version that passes in my_perl. */ 16075 PerlInterpreter *const was = PERL_GET_THX; 16076 CLONE_PARAMS *param; 16077 16078 PERL_ARGS_ASSERT_CLONE_PARAMS_NEW; 16079 16080 if (was != to) { 16081 PERL_SET_THX(to); 16082 } 16083 16084 /* Given that we've set the context, we can do this unshared. */ 16085 Newx(param, 1, CLONE_PARAMS); 16086 16087 param->flags = 0; 16088 param->proto_perl = from; 16089 param->new_perl = to; 16090 param->stashes = (AV *)Perl_newSV_type(to, SVt_PVAV); 16091 AvREAL_off(param->stashes); 16092 param->unreferenced = (AV *)Perl_newSV_type(to, SVt_PVAV); 16093 16094 if (was != to) { 16095 PERL_SET_THX(was); 16096 } 16097 return param; 16098 } 16099 16100 #endif /* USE_ITHREADS */ 16101 16102 void 16103 Perl_init_constants(pTHX) 16104 { 16105 16106 SvREFCNT(&PL_sv_undef) = SvREFCNT_IMMORTAL; 16107 SvFLAGS(&PL_sv_undef) = SVf_READONLY|SVf_PROTECT|SVt_NULL; 16108 SvANY(&PL_sv_undef) = NULL; 16109 16110 SvANY(&PL_sv_no) = new_XPVNV(); 16111 SvREFCNT(&PL_sv_no) = SvREFCNT_IMMORTAL; 16112 SvFLAGS(&PL_sv_no) = SVt_PVNV|SVf_READONLY|SVf_PROTECT 16113 |SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK 16114 |SVp_POK|SVf_POK|SVf_IsCOW|SVppv_STATIC; 16115 16116 SvANY(&PL_sv_yes) = new_XPVNV(); 16117 SvREFCNT(&PL_sv_yes) = SvREFCNT_IMMORTAL; 16118 SvFLAGS(&PL_sv_yes) = SVt_PVNV|SVf_READONLY|SVf_PROTECT 16119 |SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK 16120 |SVp_POK|SVf_POK|SVf_IsCOW|SVppv_STATIC; 16121 16122 SvANY(&PL_sv_zero) = new_XPVNV(); 16123 SvREFCNT(&PL_sv_zero) = SvREFCNT_IMMORTAL; 16124 SvFLAGS(&PL_sv_zero) = SVt_PVNV|SVf_READONLY|SVf_PROTECT 16125 |SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK 16126 |SVp_POK|SVf_POK 16127 |SVs_PADTMP; 16128 16129 SvPV_set(&PL_sv_no, (char*)PL_No); 16130 SvCUR_set(&PL_sv_no, 0); 16131 SvLEN_set(&PL_sv_no, 0); 16132 SvIV_set(&PL_sv_no, 0); 16133 SvNV_set(&PL_sv_no, 0); 16134 16135 SvPV_set(&PL_sv_yes, (char*)PL_Yes); 16136 SvCUR_set(&PL_sv_yes, 1); 16137 SvLEN_set(&PL_sv_yes, 0); 16138 SvIV_set(&PL_sv_yes, 1); 16139 SvNV_set(&PL_sv_yes, 1); 16140 16141 SvPV_set(&PL_sv_zero, (char*)PL_Zero); 16142 SvCUR_set(&PL_sv_zero, 1); 16143 SvLEN_set(&PL_sv_zero, 0); 16144 SvIV_set(&PL_sv_zero, 0); 16145 SvNV_set(&PL_sv_zero, 0); 16146 16147 PadnamePV(&PL_padname_const) = (char *)PL_No; 16148 16149 assert(SvIMMORTAL_INTERP(&PL_sv_yes)); 16150 assert(SvIMMORTAL_INTERP(&PL_sv_undef)); 16151 assert(SvIMMORTAL_INTERP(&PL_sv_no)); 16152 assert(SvIMMORTAL_INTERP(&PL_sv_zero)); 16153 16154 assert(SvIMMORTAL(&PL_sv_yes)); 16155 assert(SvIMMORTAL(&PL_sv_undef)); 16156 assert(SvIMMORTAL(&PL_sv_no)); 16157 assert(SvIMMORTAL(&PL_sv_zero)); 16158 16159 assert( SvIMMORTAL_TRUE(&PL_sv_yes)); 16160 assert(!SvIMMORTAL_TRUE(&PL_sv_undef)); 16161 assert(!SvIMMORTAL_TRUE(&PL_sv_no)); 16162 assert(!SvIMMORTAL_TRUE(&PL_sv_zero)); 16163 16164 assert( SvTRUE_nomg_NN(&PL_sv_yes)); 16165 assert(!SvTRUE_nomg_NN(&PL_sv_undef)); 16166 assert(!SvTRUE_nomg_NN(&PL_sv_no)); 16167 assert(!SvTRUE_nomg_NN(&PL_sv_zero)); 16168 } 16169 16170 /* 16171 =for apidoc_section $unicode 16172 16173 =for apidoc sv_recode_to_utf8 16174 16175 C<encoding> is assumed to be an C<Encode> object, on entry the PV 16176 of C<sv> is assumed to be octets in that encoding, and C<sv> 16177 will be converted into Unicode (and UTF-8). 16178 16179 If C<sv> already is UTF-8 (or if it is not C<POK>), or if C<encoding> 16180 is not a reference, nothing is done to C<sv>. If C<encoding> is not 16181 an C<Encode::XS> Encoding object, bad things will happen. 16182 (See L<encoding> and L<Encode>.) 16183 16184 The PV of C<sv> is returned. 16185 16186 =cut */ 16187 16188 char * 16189 Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding) 16190 { 16191 PERL_ARGS_ASSERT_SV_RECODE_TO_UTF8; 16192 16193 if (SvPOK(sv) && !SvUTF8(sv) && !IN_BYTES && SvROK(encoding)) { 16194 SV *uni; 16195 STRLEN len; 16196 const char *s; 16197 dSP; 16198 SV *nsv = sv; 16199 ENTER; 16200 PUSHSTACK; 16201 SAVETMPS; 16202 if (SvPADTMP(nsv)) { 16203 nsv = sv_newmortal(); 16204 SvSetSV_nosteal(nsv, sv); 16205 } 16206 save_re_context(); 16207 PUSHMARK(sp); 16208 EXTEND(SP, 3); 16209 PUSHs(encoding); 16210 PUSHs(nsv); 16211 /* 16212 NI-S 2002/07/09 16213 Passing sv_yes is wrong - it needs to be or'ed set of constants 16214 for Encode::XS, while UTf-8 decode (currently) assumes a true value means 16215 remove converted chars from source. 16216 16217 Both will default the value - let them. 16218 16219 XPUSHs(&PL_sv_yes); 16220 */ 16221 PUTBACK; 16222 call_method("decode", G_SCALAR); 16223 SPAGAIN; 16224 uni = POPs; 16225 PUTBACK; 16226 s = SvPV_const(uni, len); 16227 if (s != SvPVX_const(sv)) { 16228 SvGROW(sv, len + 1); 16229 Move(s, SvPVX(sv), len + 1, char); 16230 SvCUR_set(sv, len); 16231 } 16232 FREETMPS; 16233 POPSTACK; 16234 LEAVE; 16235 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { 16236 /* clear pos and any utf8 cache */ 16237 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global); 16238 if (mg) 16239 mg->mg_len = -1; 16240 if ((mg = mg_find(sv, PERL_MAGIC_utf8))) 16241 magic_setutf8(sv,mg); /* clear UTF8 cache */ 16242 } 16243 SvUTF8_on(sv); 16244 return SvPVX(sv); 16245 } 16246 return SvPOKp(sv) ? SvPVX(sv) : NULL; 16247 } 16248 16249 /* 16250 =for apidoc sv_cat_decode 16251 16252 C<encoding> is assumed to be an C<Encode> object, the PV of C<ssv> is 16253 assumed to be octets in that encoding and decoding the input starts 16254 from the position which S<C<(PV + *offset)>> pointed to. C<dsv> will be 16255 concatenated with the decoded UTF-8 string from C<ssv>. Decoding will terminate 16256 when the string C<tstr> appears in decoding output or the input ends on 16257 the PV of C<ssv>. The value which C<offset> points will be modified 16258 to the last input position on C<ssv>. 16259 16260 Returns TRUE if the terminator was found, else returns FALSE. 16261 16262 =cut */ 16263 16264 bool 16265 Perl_sv_cat_decode(pTHX_ SV *dsv, SV *encoding, 16266 SV *ssv, int *offset, char *tstr, int tlen) 16267 { 16268 bool ret = FALSE; 16269 16270 PERL_ARGS_ASSERT_SV_CAT_DECODE; 16271 16272 if (SvPOK(ssv) && SvPOK(dsv) && SvROK(encoding)) { 16273 SV *offsv; 16274 dSP; 16275 ENTER; 16276 SAVETMPS; 16277 save_re_context(); 16278 PUSHMARK(sp); 16279 EXTEND(SP, 6); 16280 PUSHs(encoding); 16281 PUSHs(dsv); 16282 PUSHs(ssv); 16283 offsv = newSViv(*offset); 16284 mPUSHs(offsv); 16285 mPUSHp(tstr, tlen); 16286 PUTBACK; 16287 call_method("cat_decode", G_SCALAR); 16288 SPAGAIN; 16289 ret = SvTRUE(TOPs); 16290 *offset = SvIV(offsv); 16291 PUTBACK; 16292 FREETMPS; 16293 LEAVE; 16294 } 16295 else 16296 Perl_croak(aTHX_ "Invalid argument to sv_cat_decode"); 16297 return ret; 16298 16299 } 16300 16301 /* --------------------------------------------------------------------- 16302 * 16303 * support functions for report_uninit() 16304 */ 16305 16306 /* the maxiumum size of array or hash where we will scan looking 16307 * for the undefined element that triggered the warning */ 16308 16309 #define FUV_MAX_SEARCH_SIZE 1000 16310 16311 /* Look for an entry in the hash whose value has the same SV as val; 16312 * If so, return a mortal copy of the key. */ 16313 16314 STATIC SV* 16315 S_find_hash_subscript(pTHX_ const HV *const hv, const SV *const val) 16316 { 16317 HE **array; 16318 I32 i; 16319 16320 PERL_ARGS_ASSERT_FIND_HASH_SUBSCRIPT; 16321 16322 if (!hv || SvMAGICAL(hv) || !HvTOTALKEYS(hv) || 16323 (HvTOTALKEYS(hv) > FUV_MAX_SEARCH_SIZE)) 16324 return NULL; 16325 16326 if (val == &PL_sv_undef || val == &PL_sv_placeholder) 16327 return NULL; 16328 16329 array = HvARRAY(hv); 16330 16331 for (i=HvMAX(hv); i>=0; i--) { 16332 HE *entry; 16333 for (entry = array[i]; entry; entry = HeNEXT(entry)) { 16334 if (HeVAL(entry) == val) 16335 return sv_2mortal(newSVhek(HeKEY_hek(entry))); 16336 } 16337 } 16338 return NULL; 16339 } 16340 16341 /* Look for an entry in the array whose value has the same SV as val; 16342 * If so, return the index, otherwise return -1. */ 16343 16344 STATIC SSize_t 16345 S_find_array_subscript(pTHX_ const AV *const av, const SV *const val) 16346 { 16347 PERL_ARGS_ASSERT_FIND_ARRAY_SUBSCRIPT; 16348 16349 if (!av || SvMAGICAL(av) || !AvARRAY(av) || 16350 (AvFILLp(av) > FUV_MAX_SEARCH_SIZE)) 16351 return -1; 16352 16353 if (val != &PL_sv_undef) { 16354 SV ** const svp = AvARRAY(av); 16355 SSize_t i; 16356 16357 for (i=AvFILLp(av); i>=0; i--) 16358 if (svp[i] == val) 16359 return i; 16360 } 16361 return -1; 16362 } 16363 16364 /* varname(): return the name of a variable, optionally with a subscript. 16365 * If gv is non-zero, use the name of that global, along with gvtype (one 16366 * of "$", "@", "%"); otherwise use the name of the lexical at pad offset 16367 * targ. Depending on the value of the subscript_type flag, return: 16368 */ 16369 16370 #define FUV_SUBSCRIPT_NONE 1 /* "@foo" */ 16371 #define FUV_SUBSCRIPT_ARRAY 2 /* "$foo[aindex]" */ 16372 #define FUV_SUBSCRIPT_HASH 3 /* "$foo{keyname}" */ 16373 #define FUV_SUBSCRIPT_WITHIN 4 /* "within @foo" */ 16374 16375 SV* 16376 Perl_varname(pTHX_ const GV *const gv, const char gvtype, PADOFFSET targ, 16377 const SV *const keyname, SSize_t aindex, int subscript_type) 16378 { 16379 16380 SV * const name = sv_newmortal(); 16381 if (gv && isGV(gv)) { 16382 char buffer[2]; 16383 buffer[0] = gvtype; 16384 buffer[1] = 0; 16385 16386 /* as gv_fullname4(), but add literal '^' for $^FOO names */ 16387 16388 gv_fullname4(name, gv, buffer, 0); 16389 16390 if ((unsigned int)SvPVX(name)[1] <= 26) { 16391 buffer[0] = '^'; 16392 buffer[1] = SvPVX(name)[1] + 'A' - 1; 16393 16394 /* Swap the 1 unprintable control character for the 2 byte pretty 16395 version - ie substr($name, 1, 1) = $buffer; */ 16396 sv_insert(name, 1, 1, buffer, 2); 16397 } 16398 } 16399 else { 16400 CV * const cv = gv ? ((CV *)gv) : find_runcv(NULL); 16401 PADNAME *sv; 16402 16403 assert(!cv || SvTYPE(cv) == SVt_PVCV || SvTYPE(cv) == SVt_PVFM); 16404 16405 if (!cv || !CvPADLIST(cv)) 16406 return NULL; 16407 sv = padnamelist_fetch(PadlistNAMES(CvPADLIST(cv)), targ); 16408 sv_setpvn(name, PadnamePV(sv), PadnameLEN(sv)); 16409 SvUTF8_on(name); 16410 } 16411 16412 if (subscript_type == FUV_SUBSCRIPT_HASH) { 16413 SV * const sv = newSV_type(SVt_NULL); 16414 STRLEN len; 16415 const char * const pv = SvPV_nomg_const((SV*)keyname, len); 16416 16417 *SvPVX(name) = '$'; 16418 Perl_sv_catpvf(aTHX_ name, "{%s}", 16419 pv_pretty(sv, pv, len, 32, NULL, NULL, 16420 PERL_PV_PRETTY_DUMP | PERL_PV_ESCAPE_UNI_DETECT )); 16421 SvREFCNT_dec_NN(sv); 16422 } 16423 else if (subscript_type == FUV_SUBSCRIPT_ARRAY) { 16424 *SvPVX(name) = '$'; 16425 Perl_sv_catpvf(aTHX_ name, "[%" IVdf "]", (IV)aindex); 16426 } 16427 else if (subscript_type == FUV_SUBSCRIPT_WITHIN) { 16428 /* We know that name has no magic, so can use 0 instead of SV_GMAGIC */ 16429 Perl_sv_insert_flags(aTHX_ name, 0, 0, STR_WITH_LEN("within "), 0); 16430 } 16431 16432 return name; 16433 } 16434 16435 16436 /* 16437 =apidoc_section $warning 16438 =for apidoc find_uninit_var 16439 16440 Find the name of the undefined variable (if any) that caused the operator 16441 to issue a "Use of uninitialized value" warning. 16442 If match is true, only return a name if its value matches C<uninit_sv>. 16443 So roughly speaking, if a unary operator (such as C<OP_COS>) generates a 16444 warning, then following the direct child of the op may yield an 16445 C<OP_PADSV> or C<OP_GV> that gives the name of the undefined variable. On the 16446 other hand, with C<OP_ADD> there are two branches to follow, so we only print 16447 the variable name if we get an exact match. 16448 C<desc_p> points to a string pointer holding the description of the op. 16449 This may be updated if needed. 16450 16451 The name is returned as a mortal SV. 16452 16453 Assumes that C<PL_op> is the OP that originally triggered the error, and that 16454 C<PL_comppad>/C<PL_curpad> points to the currently executing pad. 16455 16456 =cut 16457 */ 16458 16459 STATIC SV * 16460 S_find_uninit_var(pTHX_ const OP *const obase, const SV *const uninit_sv, 16461 bool match, const char **desc_p) 16462 { 16463 SV *sv; 16464 const GV *gv; 16465 const OP *o, *o2, *kid; 16466 16467 PERL_ARGS_ASSERT_FIND_UNINIT_VAR; 16468 16469 if (!obase || (match && (!uninit_sv || uninit_sv == &PL_sv_undef || 16470 uninit_sv == &PL_sv_placeholder))) 16471 return NULL; 16472 16473 switch (obase->op_type) { 16474 16475 case OP_UNDEF: 16476 /* undef should care if its args are undef - any warnings 16477 * will be from tied/magic vars */ 16478 break; 16479 16480 case OP_RV2AV: 16481 case OP_RV2HV: 16482 case OP_PADAV: 16483 case OP_PADHV: 16484 { 16485 const bool pad = ( obase->op_type == OP_PADAV 16486 || obase->op_type == OP_PADHV 16487 || obase->op_type == OP_PADRANGE 16488 ); 16489 16490 const bool hash = ( obase->op_type == OP_PADHV 16491 || obase->op_type == OP_RV2HV 16492 || (obase->op_type == OP_PADRANGE 16493 && SvTYPE(PAD_SVl(obase->op_targ)) == SVt_PVHV) 16494 ); 16495 SSize_t index = 0; 16496 SV *keysv = NULL; 16497 int subscript_type = FUV_SUBSCRIPT_WITHIN; 16498 16499 if (pad) { /* @lex, %lex */ 16500 sv = PAD_SVl(obase->op_targ); 16501 gv = NULL; 16502 } 16503 else { 16504 if (cUNOPx(obase)->op_first->op_type == OP_GV) { 16505 /* @global, %global */ 16506 gv = cGVOPx_gv(cUNOPx(obase)->op_first); 16507 if (!gv) 16508 break; 16509 sv = hash ? MUTABLE_SV(GvHV(gv)): MUTABLE_SV(GvAV(gv)); 16510 } 16511 else if (obase == PL_op) /* @{expr}, %{expr} */ 16512 return find_uninit_var(cUNOPx(obase)->op_first, 16513 uninit_sv, match, desc_p); 16514 else /* @{expr}, %{expr} as a sub-expression */ 16515 return NULL; 16516 } 16517 16518 /* attempt to find a match within the aggregate */ 16519 if (hash) { 16520 keysv = find_hash_subscript((const HV*)sv, uninit_sv); 16521 if (keysv) 16522 subscript_type = FUV_SUBSCRIPT_HASH; 16523 } 16524 else { 16525 index = find_array_subscript((const AV *)sv, uninit_sv); 16526 if (index >= 0) 16527 subscript_type = FUV_SUBSCRIPT_ARRAY; 16528 } 16529 16530 if (match && subscript_type == FUV_SUBSCRIPT_WITHIN) 16531 break; 16532 16533 return varname(gv, (char)(hash ? '%' : '@'), obase->op_targ, 16534 keysv, index, subscript_type); 16535 } 16536 16537 case OP_RV2SV: 16538 if (cUNOPx(obase)->op_first->op_type == OP_GV) { 16539 /* $global */ 16540 gv = cGVOPx_gv(cUNOPx(obase)->op_first); 16541 if (!gv || !GvSTASH(gv)) 16542 break; 16543 if (match && (GvSV(gv) != uninit_sv)) 16544 break; 16545 return varname(gv, '$', 0, NULL, 0, FUV_SUBSCRIPT_NONE); 16546 } 16547 /* ${expr} */ 16548 return find_uninit_var(cUNOPx(obase)->op_first, uninit_sv, 1, desc_p); 16549 16550 case OP_PADSV: 16551 if (match && PAD_SVl(obase->op_targ) != uninit_sv) 16552 break; 16553 return varname(NULL, '$', obase->op_targ, 16554 NULL, 0, FUV_SUBSCRIPT_NONE); 16555 16556 case OP_GVSV: 16557 gv = cGVOPx_gv(obase); 16558 if (!gv || (match && GvSV(gv) != uninit_sv) || !GvSTASH(gv)) 16559 break; 16560 return varname(gv, '$', 0, NULL, 0, FUV_SUBSCRIPT_NONE); 16561 16562 case OP_AELEMFAST_LEX: 16563 if (match) { 16564 SV **svp; 16565 AV *av = MUTABLE_AV(PAD_SV(obase->op_targ)); 16566 if (!av || SvRMAGICAL(av)) 16567 break; 16568 svp = av_fetch(av, (I8)obase->op_private, FALSE); 16569 if (!svp || *svp != uninit_sv) 16570 break; 16571 } 16572 return varname(NULL, '$', obase->op_targ, 16573 NULL, (I8)obase->op_private, FUV_SUBSCRIPT_ARRAY); 16574 case OP_AELEMFAST: 16575 { 16576 gv = cGVOPx_gv(obase); 16577 if (!gv) 16578 break; 16579 if (match) { 16580 SV **svp; 16581 AV *const av = GvAV(gv); 16582 if (!av || SvRMAGICAL(av)) 16583 break; 16584 svp = av_fetch(av, (I8)obase->op_private, FALSE); 16585 if (!svp || *svp != uninit_sv) 16586 break; 16587 } 16588 return varname(gv, '$', 0, 16589 NULL, (I8)obase->op_private, FUV_SUBSCRIPT_ARRAY); 16590 } 16591 NOT_REACHED; /* NOTREACHED */ 16592 16593 case OP_EXISTS: 16594 o = cUNOPx(obase)->op_first; 16595 if (!o || o->op_type != OP_NULL || 16596 ! (o->op_targ == OP_AELEM || o->op_targ == OP_HELEM)) 16597 break; 16598 return find_uninit_var(cBINOPo->op_last, uninit_sv, match, desc_p); 16599 16600 case OP_AELEM: 16601 case OP_HELEM: 16602 { 16603 bool negate = FALSE; 16604 16605 if (PL_op == obase) 16606 /* $a[uninit_expr] or $h{uninit_expr} */ 16607 return find_uninit_var(cBINOPx(obase)->op_last, 16608 uninit_sv, match, desc_p); 16609 16610 gv = NULL; 16611 o = cBINOPx(obase)->op_first; 16612 kid = cBINOPx(obase)->op_last; 16613 16614 /* get the av or hv, and optionally the gv */ 16615 sv = NULL; 16616 if (o->op_type == OP_PADAV || o->op_type == OP_PADHV) { 16617 sv = PAD_SV(o->op_targ); 16618 } 16619 else if ((o->op_type == OP_RV2AV || o->op_type == OP_RV2HV) 16620 && cUNOPo->op_first->op_type == OP_GV) 16621 { 16622 gv = cGVOPx_gv(cUNOPo->op_first); 16623 if (!gv) 16624 break; 16625 sv = o->op_type 16626 == OP_RV2HV ? MUTABLE_SV(GvHV(gv)) : MUTABLE_SV(GvAV(gv)); 16627 } 16628 if (!sv) 16629 break; 16630 16631 if (kid && kid->op_type == OP_NEGATE) { 16632 negate = TRUE; 16633 kid = cUNOPx(kid)->op_first; 16634 } 16635 16636 if (kid && kid->op_type == OP_CONST && SvOK(cSVOPx_sv(kid))) { 16637 /* index is constant */ 16638 SV* kidsv; 16639 if (negate) { 16640 kidsv = newSVpvs_flags("-", SVs_TEMP); 16641 sv_catsv(kidsv, cSVOPx_sv(kid)); 16642 } 16643 else 16644 kidsv = cSVOPx_sv(kid); 16645 if (match) { 16646 if (SvMAGICAL(sv)) 16647 break; 16648 if (obase->op_type == OP_HELEM) { 16649 HE* he = hv_fetch_ent(MUTABLE_HV(sv), kidsv, 0, 0); 16650 if (!he || HeVAL(he) != uninit_sv) 16651 break; 16652 } 16653 else { 16654 SV * const opsv = cSVOPx_sv(kid); 16655 const IV opsviv = SvIV(opsv); 16656 SV * const * const svp = av_fetch(MUTABLE_AV(sv), 16657 negate ? - opsviv : opsviv, 16658 FALSE); 16659 if (!svp || *svp != uninit_sv) 16660 break; 16661 } 16662 } 16663 if (obase->op_type == OP_HELEM) 16664 return varname(gv, '%', o->op_targ, 16665 kidsv, 0, FUV_SUBSCRIPT_HASH); 16666 else 16667 return varname(gv, '@', o->op_targ, NULL, 16668 negate ? - SvIV(cSVOPx_sv(kid)) : SvIV(cSVOPx_sv(kid)), 16669 FUV_SUBSCRIPT_ARRAY); 16670 } 16671 else { 16672 /* index is an expression; 16673 * attempt to find a match within the aggregate */ 16674 if (obase->op_type == OP_HELEM) { 16675 SV * const keysv = find_hash_subscript((const HV*)sv, uninit_sv); 16676 if (keysv) 16677 return varname(gv, '%', o->op_targ, 16678 keysv, 0, FUV_SUBSCRIPT_HASH); 16679 } 16680 else { 16681 const SSize_t index 16682 = find_array_subscript((const AV *)sv, uninit_sv); 16683 if (index >= 0) 16684 return varname(gv, '@', o->op_targ, 16685 NULL, index, FUV_SUBSCRIPT_ARRAY); 16686 } 16687 if (match) 16688 break; 16689 return varname(gv, 16690 (char)((o->op_type == OP_PADAV || o->op_type == OP_RV2AV) 16691 ? '@' : '%'), 16692 o->op_targ, NULL, 0, FUV_SUBSCRIPT_WITHIN); 16693 } 16694 NOT_REACHED; /* NOTREACHED */ 16695 } 16696 16697 case OP_MULTIDEREF: { 16698 /* If we were executing OP_MULTIDEREF when the undef warning 16699 * triggered, then it must be one of the index values within 16700 * that triggered it. If not, then the only possibility is that 16701 * the value retrieved by the last aggregate index might be the 16702 * culprit. For the former, we set PL_multideref_pc each time before 16703 * using an index, so work though the item list until we reach 16704 * that point. For the latter, just work through the entire item 16705 * list; the last aggregate retrieved will be the candidate. 16706 * There is a third rare possibility: something triggered 16707 * magic while fetching an array/hash element. Just display 16708 * nothing in this case. 16709 */ 16710 16711 /* the named aggregate, if any */ 16712 PADOFFSET agg_targ = 0; 16713 GV *agg_gv = NULL; 16714 /* the last-seen index */ 16715 UV index_type; 16716 PADOFFSET index_targ; 16717 GV *index_gv; 16718 IV index_const_iv = 0; /* init for spurious compiler warn */ 16719 SV *index_const_sv; 16720 int depth = 0; /* how many array/hash lookups we've done */ 16721 16722 UNOP_AUX_item *items = cUNOP_AUXx(obase)->op_aux; 16723 UNOP_AUX_item *last = NULL; 16724 UV actions = items->uv; 16725 bool is_hv; 16726 16727 if (PL_op == obase) { 16728 last = PL_multideref_pc; 16729 assert(last >= items && last <= items + items[-1].uv); 16730 } 16731 16732 assert(actions); 16733 16734 while (1) { 16735 is_hv = FALSE; 16736 switch (actions & MDEREF_ACTION_MASK) { 16737 16738 case MDEREF_reload: 16739 actions = (++items)->uv; 16740 continue; 16741 16742 case MDEREF_HV_padhv_helem: /* $lex{...} */ 16743 is_hv = TRUE; 16744 /* FALLTHROUGH */ 16745 case MDEREF_AV_padav_aelem: /* $lex[...] */ 16746 agg_targ = (++items)->pad_offset; 16747 agg_gv = NULL; 16748 break; 16749 16750 case MDEREF_HV_gvhv_helem: /* $pkg{...} */ 16751 is_hv = TRUE; 16752 /* FALLTHROUGH */ 16753 case MDEREF_AV_gvav_aelem: /* $pkg[...] */ 16754 agg_targ = 0; 16755 agg_gv = (GV*)UNOP_AUX_item_sv(++items); 16756 assert(isGV_with_GP(agg_gv)); 16757 break; 16758 16759 case MDEREF_HV_gvsv_vivify_rv2hv_helem: /* $pkg->{...} */ 16760 case MDEREF_HV_padsv_vivify_rv2hv_helem: /* $lex->{...} */ 16761 ++items; 16762 /* FALLTHROUGH */ 16763 case MDEREF_HV_pop_rv2hv_helem: /* expr->{...} */ 16764 case MDEREF_HV_vivify_rv2hv_helem: /* vivify, ->{...} */ 16765 agg_targ = 0; 16766 agg_gv = NULL; 16767 is_hv = TRUE; 16768 break; 16769 16770 case MDEREF_AV_gvsv_vivify_rv2av_aelem: /* $pkg->[...] */ 16771 case MDEREF_AV_padsv_vivify_rv2av_aelem: /* $lex->[...] */ 16772 ++items; 16773 /* FALLTHROUGH */ 16774 case MDEREF_AV_pop_rv2av_aelem: /* expr->[...] */ 16775 case MDEREF_AV_vivify_rv2av_aelem: /* vivify, ->[...] */ 16776 agg_targ = 0; 16777 agg_gv = NULL; 16778 } /* switch */ 16779 16780 index_targ = 0; 16781 index_gv = NULL; 16782 index_const_sv = NULL; 16783 16784 index_type = (actions & MDEREF_INDEX_MASK); 16785 switch (index_type) { 16786 case MDEREF_INDEX_none: 16787 break; 16788 case MDEREF_INDEX_const: 16789 if (is_hv) 16790 index_const_sv = UNOP_AUX_item_sv(++items) 16791 else 16792 index_const_iv = (++items)->iv; 16793 break; 16794 case MDEREF_INDEX_padsv: 16795 index_targ = (++items)->pad_offset; 16796 break; 16797 case MDEREF_INDEX_gvsv: 16798 index_gv = (GV*)UNOP_AUX_item_sv(++items); 16799 assert(isGV_with_GP(index_gv)); 16800 break; 16801 } 16802 16803 if (index_type != MDEREF_INDEX_none) 16804 depth++; 16805 16806 if ( index_type == MDEREF_INDEX_none 16807 || (actions & MDEREF_FLAG_last) 16808 || (last && items >= last) 16809 ) 16810 break; 16811 16812 actions >>= MDEREF_SHIFT; 16813 } /* while */ 16814 16815 if (PL_op == obase) { 16816 /* most likely index was undef */ 16817 16818 *desc_p = ( (actions & MDEREF_FLAG_last) 16819 && (obase->op_private 16820 & (OPpMULTIDEREF_EXISTS|OPpMULTIDEREF_DELETE))) 16821 ? 16822 (obase->op_private & OPpMULTIDEREF_EXISTS) 16823 ? "exists" 16824 : "delete" 16825 : is_hv ? "hash element" : "array element"; 16826 assert(index_type != MDEREF_INDEX_none); 16827 if (index_gv) { 16828 if (GvSV(index_gv) == uninit_sv) 16829 return varname(index_gv, '$', 0, NULL, 0, 16830 FUV_SUBSCRIPT_NONE); 16831 else 16832 return NULL; 16833 } 16834 if (index_targ) { 16835 if (PL_curpad[index_targ] == uninit_sv) 16836 return varname(NULL, '$', index_targ, 16837 NULL, 0, FUV_SUBSCRIPT_NONE); 16838 else 16839 return NULL; 16840 } 16841 /* If we got to this point it was undef on a const subscript, 16842 * so magic probably involved, e.g. $ISA[0]. Give up. */ 16843 return NULL; 16844 } 16845 16846 /* the SV returned by pp_multideref() was undef, if anything was */ 16847 16848 if (depth != 1) 16849 break; 16850 16851 if (agg_targ) 16852 sv = PAD_SV(agg_targ); 16853 else if (agg_gv) { 16854 sv = is_hv ? MUTABLE_SV(GvHV(agg_gv)) : MUTABLE_SV(GvAV(agg_gv)); 16855 if (!sv) 16856 break; 16857 } 16858 else 16859 break; 16860 16861 if (index_type == MDEREF_INDEX_const) { 16862 if (match) { 16863 if (SvMAGICAL(sv)) 16864 break; 16865 if (is_hv) { 16866 HE* he = hv_fetch_ent(MUTABLE_HV(sv), index_const_sv, 0, 0); 16867 if (!he || HeVAL(he) != uninit_sv) 16868 break; 16869 } 16870 else { 16871 SV * const * const svp = 16872 av_fetch(MUTABLE_AV(sv), index_const_iv, FALSE); 16873 if (!svp || *svp != uninit_sv) 16874 break; 16875 } 16876 } 16877 return is_hv 16878 ? varname(agg_gv, '%', agg_targ, 16879 index_const_sv, 0, FUV_SUBSCRIPT_HASH) 16880 : varname(agg_gv, '@', agg_targ, 16881 NULL, index_const_iv, FUV_SUBSCRIPT_ARRAY); 16882 } 16883 else { 16884 /* index is an var */ 16885 if (is_hv) { 16886 SV * const keysv = find_hash_subscript((const HV*)sv, uninit_sv); 16887 if (keysv) 16888 return varname(agg_gv, '%', agg_targ, 16889 keysv, 0, FUV_SUBSCRIPT_HASH); 16890 } 16891 else { 16892 const SSize_t index 16893 = find_array_subscript((const AV *)sv, uninit_sv); 16894 if (index >= 0) 16895 return varname(agg_gv, '@', agg_targ, 16896 NULL, index, FUV_SUBSCRIPT_ARRAY); 16897 } 16898 /* look for an element not found */ 16899 if (!SvMAGICAL(sv)) { 16900 SV *index_sv = NULL; 16901 if (index_targ) { 16902 index_sv = PL_curpad[index_targ]; 16903 } 16904 else if (index_gv) { 16905 index_sv = GvSV(index_gv); 16906 } 16907 if (index_sv && !SvMAGICAL(index_sv) && !SvROK(index_sv)) { 16908 if (is_hv) { 16909 SV *report_index_sv = SvOK(index_sv) ? index_sv : &PL_sv_no; 16910 HE *he = hv_fetch_ent(MUTABLE_HV(sv), report_index_sv, 0, 0); 16911 if (!he) { 16912 return varname(agg_gv, '%', agg_targ, 16913 report_index_sv, 0, FUV_SUBSCRIPT_HASH); 16914 } 16915 } 16916 else { 16917 SSize_t index = SvOK(index_sv) ? SvIV(index_sv) : 0; 16918 SV * const * const svp = 16919 av_fetch(MUTABLE_AV(sv), index, FALSE); 16920 if (!svp) { 16921 return varname(agg_gv, '@', agg_targ, 16922 NULL, index, FUV_SUBSCRIPT_ARRAY); 16923 } 16924 } 16925 } 16926 } 16927 if (match) 16928 break; 16929 return varname(agg_gv, 16930 is_hv ? '%' : '@', 16931 agg_targ, NULL, 0, FUV_SUBSCRIPT_WITHIN); 16932 } 16933 NOT_REACHED; /* NOTREACHED */ 16934 } 16935 16936 case OP_AASSIGN: 16937 /* only examine RHS */ 16938 return find_uninit_var(cBINOPx(obase)->op_first, uninit_sv, 16939 match, desc_p); 16940 16941 case OP_OPEN: 16942 o = cUNOPx(obase)->op_first; 16943 if ( o->op_type == OP_PUSHMARK 16944 || (o->op_type == OP_NULL && o->op_targ == OP_PUSHMARK) 16945 ) 16946 o = OpSIBLING(o); 16947 16948 if (!OpHAS_SIBLING(o)) { 16949 /* one-arg version of open is highly magical */ 16950 16951 if (o->op_type == OP_GV) { /* open FOO; */ 16952 gv = cGVOPx_gv(o); 16953 if (match && GvSV(gv) != uninit_sv) 16954 break; 16955 return varname(gv, '$', 0, 16956 NULL, 0, FUV_SUBSCRIPT_NONE); 16957 } 16958 /* other possibilities not handled are: 16959 * open $x; or open my $x; should return '${*$x}' 16960 * open expr; should return '$'.expr ideally 16961 */ 16962 break; 16963 } 16964 match = 1; 16965 goto do_op; 16966 16967 /* ops where $_ may be an implicit arg */ 16968 case OP_TRANS: 16969 case OP_TRANSR: 16970 case OP_SUBST: 16971 case OP_MATCH: 16972 if ( !(obase->op_flags & OPf_STACKED)) { 16973 if (uninit_sv == DEFSV) 16974 return newSVpvs_flags("$_", SVs_TEMP); 16975 else if (obase->op_targ 16976 && uninit_sv == PAD_SVl(obase->op_targ)) 16977 return varname(NULL, '$', obase->op_targ, NULL, 0, 16978 FUV_SUBSCRIPT_NONE); 16979 } 16980 goto do_op; 16981 16982 case OP_PRTF: 16983 case OP_PRINT: 16984 case OP_SAY: 16985 match = 1; /* print etc can return undef on defined args */ 16986 /* skip filehandle as it can't produce 'undef' warning */ 16987 o = cUNOPx(obase)->op_first; 16988 if ((obase->op_flags & OPf_STACKED) 16989 && 16990 ( o->op_type == OP_PUSHMARK 16991 || (o->op_type == OP_NULL && o->op_targ == OP_PUSHMARK))) 16992 o = OpSIBLING(OpSIBLING(o)); 16993 goto do_op2; 16994 16995 16996 case OP_ENTEREVAL: /* could be eval $undef or $x='$undef'; eval $x */ 16997 case OP_CUSTOM: /* XS or custom code could trigger random warnings */ 16998 16999 /* the following ops are capable of returning PL_sv_undef even for 17000 * defined arg(s) */ 17001 17002 case OP_BACKTICK: 17003 case OP_PIPE_OP: 17004 case OP_FILENO: 17005 case OP_BINMODE: 17006 case OP_TIED: 17007 case OP_GETC: 17008 case OP_SYSREAD: 17009 case OP_SEND: 17010 case OP_IOCTL: 17011 case OP_SOCKET: 17012 case OP_SOCKPAIR: 17013 case OP_BIND: 17014 case OP_CONNECT: 17015 case OP_LISTEN: 17016 case OP_ACCEPT: 17017 case OP_SHUTDOWN: 17018 case OP_SSOCKOPT: 17019 case OP_GETPEERNAME: 17020 case OP_FTRREAD: 17021 case OP_FTRWRITE: 17022 case OP_FTREXEC: 17023 case OP_FTROWNED: 17024 case OP_FTEREAD: 17025 case OP_FTEWRITE: 17026 case OP_FTEEXEC: 17027 case OP_FTEOWNED: 17028 case OP_FTIS: 17029 case OP_FTZERO: 17030 case OP_FTSIZE: 17031 case OP_FTFILE: 17032 case OP_FTDIR: 17033 case OP_FTLINK: 17034 case OP_FTPIPE: 17035 case OP_FTSOCK: 17036 case OP_FTBLK: 17037 case OP_FTCHR: 17038 case OP_FTTTY: 17039 case OP_FTSUID: 17040 case OP_FTSGID: 17041 case OP_FTSVTX: 17042 case OP_FTTEXT: 17043 case OP_FTBINARY: 17044 case OP_FTMTIME: 17045 case OP_FTATIME: 17046 case OP_FTCTIME: 17047 case OP_READLINK: 17048 case OP_OPEN_DIR: 17049 case OP_READDIR: 17050 case OP_TELLDIR: 17051 case OP_SEEKDIR: 17052 case OP_REWINDDIR: 17053 case OP_CLOSEDIR: 17054 case OP_GMTIME: 17055 case OP_ALARM: 17056 case OP_SEMGET: 17057 case OP_GETLOGIN: 17058 case OP_SUBSTR: 17059 case OP_AEACH: 17060 case OP_EACH: 17061 case OP_SORT: 17062 case OP_CALLER: 17063 case OP_DOFILE: 17064 case OP_PROTOTYPE: 17065 case OP_NCMP: 17066 case OP_SMARTMATCH: 17067 case OP_UNPACK: 17068 case OP_SYSOPEN: 17069 case OP_SYSSEEK: 17070 match = 1; 17071 goto do_op; 17072 17073 case OP_ENTERSUB: 17074 case OP_GOTO: 17075 /* XXX tmp hack: these two may call an XS sub, and currently 17076 XS subs don't have a SUB entry on the context stack, so CV and 17077 pad determination goes wrong, and BAD things happen. So, just 17078 don't try to determine the value under those circumstances. 17079 Need a better fix at dome point. DAPM 11/2007 */ 17080 break; 17081 17082 case OP_FLIP: 17083 case OP_FLOP: 17084 { 17085 GV * const gv = gv_fetchpvs(".", GV_NOTQUAL, SVt_PV); 17086 if (gv && GvSV(gv) == uninit_sv) 17087 return newSVpvs_flags("$.", SVs_TEMP); 17088 goto do_op; 17089 } 17090 17091 case OP_POS: 17092 /* def-ness of rval pos() is independent of the def-ness of its arg */ 17093 if ( !(obase->op_flags & OPf_MOD)) 17094 break; 17095 /* FALLTHROUGH */ 17096 17097 case OP_SCHOMP: 17098 case OP_CHOMP: 17099 if (SvROK(PL_rs) && uninit_sv == SvRV(PL_rs)) 17100 return newSVpvs_flags("${$/}", SVs_TEMP); 17101 /* FALLTHROUGH */ 17102 17103 default: 17104 do_op: 17105 if (!(obase->op_flags & OPf_KIDS)) 17106 break; 17107 o = cUNOPx(obase)->op_first; 17108 17109 do_op2: 17110 if (!o) 17111 break; 17112 17113 /* This loop checks all the kid ops, skipping any that cannot pos- 17114 * sibly be responsible for the uninitialized value; i.e., defined 17115 * constants and ops that return nothing. If there is only one op 17116 * left that is not skipped, then we *know* it is responsible for 17117 * the uninitialized value. If there is more than one op left, we 17118 * have to look for an exact match in the while() loop below. 17119 * Note that we skip padrange, because the individual pad ops that 17120 * it replaced are still in the tree, so we work on them instead. 17121 */ 17122 o2 = NULL; 17123 for (kid=o; kid; kid = OpSIBLING(kid)) { 17124 const OPCODE type = kid->op_type; 17125 if ( (type == OP_CONST && SvOK(cSVOPx_sv(kid))) 17126 || (type == OP_NULL && ! (kid->op_flags & OPf_KIDS)) 17127 || (type == OP_PUSHMARK) 17128 || (type == OP_PADRANGE) 17129 ) 17130 continue; 17131 17132 if (o2) { /* more than one found */ 17133 o2 = NULL; 17134 break; 17135 } 17136 o2 = kid; 17137 } 17138 if (o2) 17139 return find_uninit_var(o2, uninit_sv, match, desc_p); 17140 17141 /* scan all args */ 17142 while (o) { 17143 sv = find_uninit_var(o, uninit_sv, 1, desc_p); 17144 if (sv) 17145 return sv; 17146 o = OpSIBLING(o); 17147 } 17148 break; 17149 } 17150 return NULL; 17151 } 17152 17153 17154 /* 17155 =for apidoc report_uninit 17156 17157 Print appropriate "Use of uninitialized variable" warning. 17158 17159 =cut 17160 */ 17161 17162 void 17163 Perl_report_uninit(pTHX_ const SV *uninit_sv) 17164 { 17165 const char *desc = NULL; 17166 SV* varname = NULL; 17167 17168 if (PL_op) { 17169 desc = PL_op->op_type == OP_STRINGIFY && PL_op->op_folded 17170 ? "join or string" 17171 : PL_op->op_type == OP_MULTICONCAT 17172 && (PL_op->op_private & OPpMULTICONCAT_FAKE) 17173 ? "sprintf" 17174 : OP_DESC(PL_op); 17175 if (uninit_sv && PL_curpad) { 17176 varname = find_uninit_var(PL_op, uninit_sv, 0, &desc); 17177 if (varname) 17178 sv_insert(varname, 0, 0, " ", 1); 17179 } 17180 } 17181 else if (PL_curstackinfo->si_type == PERLSI_SORT && cxstack_ix == 0) 17182 /* we've reached the end of a sort block or sub, 17183 * and the uninit value is probably what that code returned */ 17184 desc = "sort"; 17185 17186 /* PL_warn_uninit_sv is constant */ 17187 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); 17188 if (desc) 17189 /* diag_listed_as: Use of uninitialized value%s */ 17190 Perl_warner(aTHX_ packWARN(WARN_UNINITIALIZED), PL_warn_uninit_sv, 17191 SVfARG(varname ? varname : &PL_sv_no), 17192 " in ", desc); 17193 else 17194 Perl_warner(aTHX_ packWARN(WARN_UNINITIALIZED), PL_warn_uninit, 17195 "", "", ""); 17196 GCC_DIAG_RESTORE_STMT; 17197 } 17198 17199 /* 17200 * ex: set ts=8 sts=4 sw=4 et: 17201 */ 17202