1 /* sv.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall 5 * and others 6 * 7 * You may distribute under the terms of either the GNU General Public 8 * License or the Artistic License, as specified in the README file. 9 * 10 */ 11 12 /* 13 * 'I wonder what the Entish is for "yes" and "no",' he thought. 14 * --Pippin 15 * 16 * [p.480 of _The Lord of the Rings_, III/iv: "Treebeard"] 17 */ 18 19 /* 20 * 21 * 22 * This file contains the code that creates, manipulates and destroys 23 * scalar values (SVs). The other types (AV, HV, GV, etc.) reuse the 24 * structure of an SV, so their creation and destruction is handled 25 * here; higher-level functions are in av.c, hv.c, and so on. Opcode 26 * level functions (eg. substr, split, join) for each of the types are 27 * in the pp*.c files. 28 */ 29 30 #include "EXTERN.h" 31 #define PERL_IN_SV_C 32 #include "perl.h" 33 #include "regcomp.h" 34 #ifdef __VMS 35 # include <rms.h> 36 #endif 37 38 #ifdef __Lynx__ 39 /* Missing proto on LynxOS */ 40 char *gconvert(double, int, int, char *); 41 #endif 42 43 #ifdef USE_QUADMATH 44 # define SNPRINTF_G(nv, buffer, size, ndig) \ 45 quadmath_snprintf(buffer, size, "%.*Qg", (int)ndig, (NV)(nv)) 46 #else 47 # define SNPRINTF_G(nv, buffer, size, ndig) \ 48 PERL_UNUSED_RESULT(Gconvert((NV)(nv), (int)ndig, 0, buffer)) 49 #endif 50 51 #ifndef SV_COW_THRESHOLD 52 # define SV_COW_THRESHOLD 0 /* COW iff len > K */ 53 #endif 54 #ifndef SV_COWBUF_THRESHOLD 55 # define SV_COWBUF_THRESHOLD 1250 /* COW iff len > K */ 56 #endif 57 #ifndef SV_COW_MAX_WASTE_THRESHOLD 58 # define SV_COW_MAX_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */ 59 #endif 60 #ifndef SV_COWBUF_WASTE_THRESHOLD 61 # define SV_COWBUF_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */ 62 #endif 63 #ifndef SV_COW_MAX_WASTE_FACTOR_THRESHOLD 64 # define SV_COW_MAX_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */ 65 #endif 66 #ifndef SV_COWBUF_WASTE_FACTOR_THRESHOLD 67 # define SV_COWBUF_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */ 68 #endif 69 /* Work around compiler warnings about unsigned >= THRESHOLD when thres- 70 hold is 0. */ 71 #if SV_COW_THRESHOLD 72 # define GE_COW_THRESHOLD(cur) ((cur) >= SV_COW_THRESHOLD) 73 #else 74 # define GE_COW_THRESHOLD(cur) 1 75 #endif 76 #if SV_COWBUF_THRESHOLD 77 # define GE_COWBUF_THRESHOLD(cur) ((cur) >= SV_COWBUF_THRESHOLD) 78 #else 79 # define GE_COWBUF_THRESHOLD(cur) 1 80 #endif 81 #if SV_COW_MAX_WASTE_THRESHOLD 82 # define GE_COW_MAX_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COW_MAX_WASTE_THRESHOLD) 83 #else 84 # define GE_COW_MAX_WASTE_THRESHOLD(cur,len) 1 85 #endif 86 #if SV_COWBUF_WASTE_THRESHOLD 87 # define GE_COWBUF_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COWBUF_WASTE_THRESHOLD) 88 #else 89 # define GE_COWBUF_WASTE_THRESHOLD(cur,len) 1 90 #endif 91 #if SV_COW_MAX_WASTE_FACTOR_THRESHOLD 92 # define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COW_MAX_WASTE_FACTOR_THRESHOLD * (cur)) 93 #else 94 # define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) 1 95 #endif 96 #if SV_COWBUF_WASTE_FACTOR_THRESHOLD 97 # define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COWBUF_WASTE_FACTOR_THRESHOLD * (cur)) 98 #else 99 # define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) 1 100 #endif 101 102 #define CHECK_COW_THRESHOLD(cur,len) (\ 103 GE_COW_THRESHOLD((cur)) && \ 104 GE_COW_MAX_WASTE_THRESHOLD((cur),(len)) && \ 105 GE_COW_MAX_WASTE_FACTOR_THRESHOLD((cur),(len)) \ 106 ) 107 #define CHECK_COWBUF_THRESHOLD(cur,len) (\ 108 GE_COWBUF_THRESHOLD((cur)) && \ 109 GE_COWBUF_WASTE_THRESHOLD((cur),(len)) && \ 110 GE_COWBUF_WASTE_FACTOR_THRESHOLD((cur),(len)) \ 111 ) 112 113 #ifdef PERL_UTF8_CACHE_ASSERT 114 /* if adding more checks watch out for the following tests: 115 * t/op/index.t t/op/length.t t/op/pat.t t/op/substr.t 116 * lib/utf8.t lib/Unicode/Collate/t/index.t 117 * --jhi 118 */ 119 # define ASSERT_UTF8_CACHE(cache) \ 120 STMT_START { if (cache) { assert((cache)[0] <= (cache)[1]); \ 121 assert((cache)[2] <= (cache)[3]); \ 122 assert((cache)[3] <= (cache)[1]);} \ 123 } STMT_END 124 #else 125 # define ASSERT_UTF8_CACHE(cache) NOOP 126 #endif 127 128 static const char S_destroy[] = "DESTROY"; 129 #define S_destroy_len (sizeof(S_destroy)-1) 130 131 /* ============================================================================ 132 133 An SV (or AV, HV, etc.) is allocated in two parts: the head (struct 134 sv, av, hv...) contains type and reference count information, and for 135 many types, a pointer to the body (struct xrv, xpv, xpviv...), which 136 contains fields specific to each type. Some types store all they need 137 in the head, so don't have a body. 138 139 In all but the most memory-paranoid configurations (ex: PURIFY), heads 140 and bodies are allocated out of arenas, which by default are 141 approximately 4K chunks of memory parcelled up into N heads or bodies. 142 Sv-bodies are allocated by their sv-type, guaranteeing size 143 consistency needed to allocate safely from arrays. 144 145 For SV-heads, the first slot in each arena is reserved, and holds a 146 link to the next arena, some flags, and a note of the number of slots. 147 Snaked through each arena chain is a linked list of free items; when 148 this becomes empty, an extra arena is allocated and divided up into N 149 items which are threaded into the free list. 150 151 SV-bodies are similar, but they use arena-sets by default, which 152 separate the link and info from the arena itself, and reclaim the 1st 153 slot in the arena. SV-bodies are further described later. 154 155 The following global variables are associated with arenas: 156 157 PL_sv_arenaroot pointer to list of SV arenas 158 PL_sv_root pointer to list of free SV structures 159 160 PL_body_arenas head of linked-list of body arenas 161 PL_body_roots[] array of pointers to list of free bodies of svtype 162 arrays are indexed by the svtype needed 163 164 A few special SV heads are not allocated from an arena, but are 165 instead directly created in the interpreter structure, eg PL_sv_undef. 166 The size of arenas can be changed from the default by setting 167 PERL_ARENA_SIZE appropriately at compile time. 168 169 The SV arena serves the secondary purpose of allowing still-live SVs 170 to be located and destroyed during final cleanup. 171 172 At the lowest level, the macros new_SV() and del_SV() grab and free 173 an SV head. (If debugging with -DD, del_SV() calls the function S_del_sv() 174 to return the SV to the free list with error checking.) new_SV() calls 175 more_sv() / sv_add_arena() to add an extra arena if the free list is empty. 176 SVs in the free list have their SvTYPE field set to all ones. 177 178 At the time of very final cleanup, sv_free_arenas() is called from 179 perl_destruct() to physically free all the arenas allocated since the 180 start of the interpreter. 181 182 The internal function visit() scans the SV arenas list, and calls a specified 183 function for each SV it finds which is still live, I<i.e.> which has an SvTYPE 184 other than all 1's, and a non-zero SvREFCNT. visit() is used by the 185 following functions (specified as [function that calls visit()] / [function 186 called by visit() for each SV]): 187 188 sv_report_used() / do_report_used() 189 dump all remaining SVs (debugging aid) 190 191 sv_clean_objs() / do_clean_objs(),do_clean_named_objs(), 192 do_clean_named_io_objs(),do_curse() 193 Attempt to free all objects pointed to by RVs, 194 try to do the same for all objects indir- 195 ectly referenced by typeglobs too, and 196 then do a final sweep, cursing any 197 objects that remain. Called once from 198 perl_destruct(), prior to calling sv_clean_all() 199 below. 200 201 sv_clean_all() / do_clean_all() 202 SvREFCNT_dec(sv) each remaining SV, possibly 203 triggering an sv_free(). It also sets the 204 SVf_BREAK flag on the SV to indicate that the 205 refcnt has been artificially lowered, and thus 206 stopping sv_free() from giving spurious warnings 207 about SVs which unexpectedly have a refcnt 208 of zero. called repeatedly from perl_destruct() 209 until there are no SVs left. 210 211 =head2 Arena allocator API Summary 212 213 Private API to rest of sv.c 214 215 new_SV(), del_SV(), 216 217 new_XPVNV(), del_body() 218 etc 219 220 Public API: 221 222 sv_report_used(), sv_clean_objs(), sv_clean_all(), sv_free_arenas() 223 224 =cut 225 226 * ========================================================================= */ 227 228 /* 229 * "A time to plant, and a time to uproot what was planted..." 230 */ 231 232 #ifdef DEBUG_LEAKING_SCALARS 233 # define FREE_SV_DEBUG_FILE(sv) STMT_START { \ 234 if ((sv)->sv_debug_file) { \ 235 PerlMemShared_free((sv)->sv_debug_file); \ 236 sv->sv_debug_file = NULL; \ 237 } \ 238 } STMT_END 239 # define DEBUG_SV_SERIAL(sv) \ 240 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%" UVxf ": (%05ld) del_SV\n", \ 241 PTR2UV(sv), (long)(sv)->sv_debug_serial)) 242 #else 243 # define FREE_SV_DEBUG_FILE(sv) 244 # define DEBUG_SV_SERIAL(sv) NOOP 245 #endif 246 247 /* Mark an SV head as unused, and add to free list. 248 * 249 * If SVf_BREAK is set, skip adding it to the free list, as this SV had 250 * its refcount artificially decremented during global destruction, so 251 * there may be dangling pointers to it. The last thing we want in that 252 * case is for it to be reused. */ 253 254 #define plant_SV(p) \ 255 STMT_START { \ 256 const U32 old_flags = SvFLAGS(p); \ 257 MEM_LOG_DEL_SV(p, __FILE__, __LINE__, FUNCTION__); \ 258 DEBUG_SV_SERIAL(p); \ 259 FREE_SV_DEBUG_FILE(p); \ 260 POISON_SV_HEAD(p); \ 261 SvFLAGS(p) = SVTYPEMASK; \ 262 if (!(old_flags & SVf_BREAK)) { \ 263 SvARENA_CHAIN_SET(p, PL_sv_root); \ 264 PL_sv_root = (p); \ 265 } \ 266 --PL_sv_count; \ 267 } STMT_END 268 269 270 /* make some more SVs by adding another arena */ 271 272 SV* 273 Perl_more_sv(pTHX) 274 { 275 SV* sv; 276 char *chunk; /* must use New here to match call to */ 277 Newx(chunk,PERL_ARENA_SIZE,char); /* Safefree() in sv_free_arenas() */ 278 sv_add_arena(chunk, PERL_ARENA_SIZE, 0); 279 uproot_SV(sv); 280 return sv; 281 } 282 283 /* del_SV(): return an empty SV head to the free list */ 284 285 #ifdef DEBUGGING 286 287 #define del_SV(p) \ 288 STMT_START { \ 289 if (DEBUG_D_TEST) \ 290 del_sv(p); \ 291 else \ 292 plant_SV(p); \ 293 } STMT_END 294 295 STATIC void 296 S_del_sv(pTHX_ SV *p) 297 { 298 PERL_ARGS_ASSERT_DEL_SV; 299 300 if (DEBUG_D_TEST) { 301 SV* sva; 302 bool ok = 0; 303 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) { 304 const SV * const sv = sva + 1; 305 const SV * const svend = &sva[SvREFCNT(sva)]; 306 if (p >= sv && p < svend) { 307 ok = 1; 308 break; 309 } 310 } 311 if (!ok) { 312 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), 313 "Attempt to free non-arena SV: 0x%" UVxf 314 pTHX__FORMAT, PTR2UV(p) pTHX__VALUE); 315 return; 316 } 317 } 318 plant_SV(p); 319 } 320 321 #else /* ! DEBUGGING */ 322 323 #define del_SV(p) plant_SV(p) 324 325 #endif /* DEBUGGING */ 326 327 328 /* 329 =for apidoc_section $SV 330 331 =for apidoc sv_add_arena 332 333 Given a chunk of memory, link it to the head of the list of arenas, 334 and split it into a list of free SVs. 335 336 =cut 337 */ 338 339 static void 340 S_sv_add_arena(pTHX_ char *const ptr, const U32 size, const U32 flags) 341 { 342 SV *const sva = MUTABLE_SV(ptr); 343 SV* sv; 344 SV* svend; 345 346 PERL_ARGS_ASSERT_SV_ADD_ARENA; 347 348 /* The first SV in an arena isn't an SV. */ 349 SvANY(sva) = (void *) PL_sv_arenaroot; /* ptr to next arena */ 350 SvREFCNT(sva) = size / sizeof(SV); /* number of SV slots */ 351 SvFLAGS(sva) = flags; /* FAKE if not to be freed */ 352 353 PL_sv_arenaroot = sva; 354 PL_sv_root = sva + 1; 355 356 svend = &sva[SvREFCNT(sva) - 1]; 357 sv = sva + 1; 358 while (sv < svend) { 359 SvARENA_CHAIN_SET(sv, (sv + 1)); 360 #ifdef DEBUGGING 361 SvREFCNT(sv) = 0; 362 #endif 363 /* Must always set typemask because it's always checked in on cleanup 364 when the arenas are walked looking for objects. */ 365 SvFLAGS(sv) = SVTYPEMASK; 366 sv++; 367 } 368 SvARENA_CHAIN_SET(sv, 0); 369 #ifdef DEBUGGING 370 SvREFCNT(sv) = 0; 371 #endif 372 SvFLAGS(sv) = SVTYPEMASK; 373 } 374 375 /* visit(): call the named function for each non-free SV in the arenas 376 * whose flags field matches the flags/mask args. */ 377 378 STATIC SSize_t 379 S_visit(pTHX_ SVFUNC_t f, const U32 flags, const U32 mask) 380 { 381 SV* sva; 382 I32 visited = 0; 383 384 PERL_ARGS_ASSERT_VISIT; 385 386 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) { 387 const SV * const svend = &sva[SvREFCNT(sva)]; 388 SV* sv; 389 for (sv = sva + 1; sv < svend; ++sv) { 390 if (!SvIS_FREED(sv) 391 && (sv->sv_flags & mask) == flags 392 && SvREFCNT(sv)) 393 { 394 (*f)(aTHX_ sv); 395 ++visited; 396 } 397 } 398 } 399 return visited; 400 } 401 402 #ifdef DEBUGGING 403 404 /* called by sv_report_used() for each live SV */ 405 406 static void 407 do_report_used(pTHX_ SV *const sv) 408 { 409 if (!SvIS_FREED(sv)) { 410 PerlIO_printf(Perl_debug_log, "****\n"); 411 sv_dump(sv); 412 } 413 } 414 #endif 415 416 /* 417 =for apidoc sv_report_used 418 419 Dump the contents of all SVs not yet freed (debugging aid). 420 421 =cut 422 */ 423 424 void 425 Perl_sv_report_used(pTHX) 426 { 427 #ifdef DEBUGGING 428 visit(do_report_used, 0, 0); 429 #else 430 PERL_UNUSED_CONTEXT; 431 #endif 432 } 433 434 /* called by sv_clean_objs() for each live SV */ 435 436 static void 437 do_clean_objs(pTHX_ SV *const ref) 438 { 439 assert (SvROK(ref)); 440 { 441 SV * const target = SvRV(ref); 442 if (SvOBJECT(target)) { 443 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning object ref:\n "), sv_dump(ref))); 444 if (SvWEAKREF(ref)) { 445 sv_del_backref(target, ref); 446 SvWEAKREF_off(ref); 447 SvRV_set(ref, NULL); 448 } else { 449 SvROK_off(ref); 450 SvRV_set(ref, NULL); 451 SvREFCNT_dec_NN(target); 452 } 453 } 454 } 455 } 456 457 458 /* clear any slots in a GV which hold objects - except IO; 459 * called by sv_clean_objs() for each live GV */ 460 461 static void 462 do_clean_named_objs(pTHX_ SV *const sv) 463 { 464 SV *obj; 465 assert(SvTYPE(sv) == SVt_PVGV); 466 assert(isGV_with_GP(sv)); 467 if (!GvGP(sv)) 468 return; 469 470 /* freeing GP entries may indirectly free the current GV; 471 * hold onto it while we mess with the GP slots */ 472 SvREFCNT_inc(sv); 473 474 if ( ((obj = GvSV(sv) )) && SvOBJECT(obj)) { 475 DEBUG_D((PerlIO_printf(Perl_debug_log, 476 "Cleaning named glob SV object:\n "), sv_dump(obj))); 477 GvSV(sv) = NULL; 478 SvREFCNT_dec_NN(obj); 479 } 480 if ( ((obj = MUTABLE_SV(GvAV(sv)) )) && SvOBJECT(obj)) { 481 DEBUG_D((PerlIO_printf(Perl_debug_log, 482 "Cleaning named glob AV object:\n "), sv_dump(obj))); 483 GvAV(sv) = NULL; 484 SvREFCNT_dec_NN(obj); 485 } 486 if ( ((obj = MUTABLE_SV(GvHV(sv)) )) && SvOBJECT(obj)) { 487 DEBUG_D((PerlIO_printf(Perl_debug_log, 488 "Cleaning named glob HV object:\n "), sv_dump(obj))); 489 GvHV(sv) = NULL; 490 SvREFCNT_dec_NN(obj); 491 } 492 if ( ((obj = MUTABLE_SV(GvCV(sv)) )) && SvOBJECT(obj)) { 493 DEBUG_D((PerlIO_printf(Perl_debug_log, 494 "Cleaning named glob CV object:\n "), sv_dump(obj))); 495 GvCV_set(sv, NULL); 496 SvREFCNT_dec_NN(obj); 497 } 498 SvREFCNT_dec_NN(sv); /* undo the inc above */ 499 } 500 501 /* clear any IO slots in a GV which hold objects (except stderr, defout); 502 * called by sv_clean_objs() for each live GV */ 503 504 static void 505 do_clean_named_io_objs(pTHX_ SV *const sv) 506 { 507 SV *obj; 508 assert(SvTYPE(sv) == SVt_PVGV); 509 assert(isGV_with_GP(sv)); 510 if (!GvGP(sv) || sv == (SV*)PL_stderrgv || sv == (SV*)PL_defoutgv) 511 return; 512 513 SvREFCNT_inc(sv); 514 if ( ((obj = MUTABLE_SV(GvIO(sv)) )) && SvOBJECT(obj)) { 515 DEBUG_D((PerlIO_printf(Perl_debug_log, 516 "Cleaning named glob IO object:\n "), sv_dump(obj))); 517 GvIOp(sv) = NULL; 518 SvREFCNT_dec_NN(obj); 519 } 520 SvREFCNT_dec_NN(sv); /* undo the inc above */ 521 } 522 523 /* Void wrapper to pass to visit() */ 524 static void 525 do_curse(pTHX_ SV * const sv) { 526 if ((PL_stderrgv && GvGP(PL_stderrgv) && (SV*)GvIO(PL_stderrgv) == sv) 527 || (PL_defoutgv && GvGP(PL_defoutgv) && (SV*)GvIO(PL_defoutgv) == sv)) 528 return; 529 (void)curse(sv, 0); 530 } 531 532 /* 533 =for apidoc sv_clean_objs 534 535 Attempt to destroy all objects not yet freed. 536 537 =cut 538 */ 539 540 void 541 Perl_sv_clean_objs(pTHX) 542 { 543 GV *olddef, *olderr; 544 PL_in_clean_objs = TRUE; 545 visit(do_clean_objs, SVf_ROK, SVf_ROK); 546 /* Some barnacles may yet remain, clinging to typeglobs. 547 * Run the non-IO destructors first: they may want to output 548 * error messages, close files etc */ 549 visit(do_clean_named_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP); 550 visit(do_clean_named_io_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP); 551 /* And if there are some very tenacious barnacles clinging to arrays, 552 closures, or what have you.... */ 553 visit(do_curse, SVs_OBJECT, SVs_OBJECT); 554 olddef = PL_defoutgv; 555 PL_defoutgv = NULL; /* disable skip of PL_defoutgv */ 556 if (olddef && isGV_with_GP(olddef)) 557 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olddef)); 558 olderr = PL_stderrgv; 559 PL_stderrgv = NULL; /* disable skip of PL_stderrgv */ 560 if (olderr && isGV_with_GP(olderr)) 561 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olderr)); 562 SvREFCNT_dec(olddef); 563 PL_in_clean_objs = FALSE; 564 } 565 566 /* called by sv_clean_all() for each live SV */ 567 568 static void 569 do_clean_all(pTHX_ SV *const sv) 570 { 571 if (sv == (const SV *) PL_fdpid || sv == (const SV *)PL_strtab) { 572 /* don't clean pid table and strtab */ 573 return; 574 } 575 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning loops: SV at 0x%" UVxf "\n", PTR2UV(sv)) )); 576 SvFLAGS(sv) |= SVf_BREAK; 577 SvREFCNT_dec_NN(sv); 578 } 579 580 /* 581 =for apidoc sv_clean_all 582 583 Decrement the refcnt of each remaining SV, possibly triggering a 584 cleanup. This function may have to be called multiple times to free 585 SVs which are in complex self-referential hierarchies. 586 587 =cut 588 */ 589 590 SSize_t 591 Perl_sv_clean_all(pTHX) 592 { 593 SSize_t cleaned; 594 PL_in_clean_all = TRUE; 595 cleaned = visit(do_clean_all, 0,0); 596 return cleaned; 597 } 598 599 600 #ifdef DEBUGGING 601 602 /* Called by sv_mark_arenas() for each live SV: set SVf_BREAK */ 603 604 static void 605 S_do_sv_mark_arenas(pTHX_ SV *const sv) 606 { 607 sv->sv_flags |= SVf_BREAK; 608 } 609 610 /* sv_mark_arenas(): for leak debugging: mark all live SVs with SVf_BREAK. 611 * Then later, use sv_sweep_arenas() to list any SVs not so marked. 612 */ 613 614 void 615 Perl_sv_mark_arenas(pTHX) 616 { 617 visit(S_do_sv_mark_arenas, 0, 0); 618 } 619 620 /* Called by sv_sweep_arenas() for each live SV, to list any SVs without 621 * SVf_BREAK set */ 622 623 static void 624 S_do_sv_sweep_arenas(pTHX_ SV *const sv) 625 { 626 if (sv->sv_flags & SVf_BREAK) { 627 sv->sv_flags &= ~SVf_BREAK; 628 return; 629 } 630 PerlIO_printf(Perl_debug_log, "Unmarked SV: 0x%p: %s\n", 631 sv, SvPEEK(sv)); 632 } 633 634 635 /* sv_sweep_arenas(): for debugging: list all live SVs that don't have 636 * SVf_BREAK set, then turn off all SVf_BREAK flags. Typically used some 637 * time after sv_mark_arenas(), to find SVs which have been created since 638 * the marking but not yet freed (they may have leaked, or been stored in 639 * an array, or whatever). 640 */ 641 642 void 643 Perl_sv_sweep_arenas(pTHX) 644 { 645 visit(S_do_sv_sweep_arenas, 0, 0); 646 } 647 648 #endif 649 650 651 /* 652 ARENASETS: a meta-arena implementation which separates arena-info 653 into struct arena_set, which contains an array of struct 654 arena_descs, each holding info for a single arena. By separating 655 the meta-info from the arena, we recover the 1st slot, formerly 656 borrowed for list management. The arena_set is about the size of an 657 arena, avoiding the needless malloc overhead of a naive linked-list. 658 659 The cost is 1 arena-set malloc per ~320 arena-mallocs, + the unused 660 memory in the last arena-set (1/2 on average). In trade, we get 661 back the 1st slot in each arena (ie 1.7% of a CV-arena, less for 662 smaller types). The recovery of the wasted space allows use of 663 small arenas for large, rare body types, by changing array* fields 664 in body_details_by_type[] below. 665 */ 666 struct arena_desc { 667 char *arena; /* the raw storage, allocated aligned */ 668 size_t size; /* its size ~4k typ */ 669 svtype utype; /* bodytype stored in arena */ 670 }; 671 672 struct arena_set; 673 674 /* Get the maximum number of elements in set[] such that struct arena_set 675 will fit within PERL_ARENA_SIZE, which is probably just under 4K, and 676 therefore likely to be 1 aligned memory page. */ 677 678 #define ARENAS_PER_SET ((PERL_ARENA_SIZE - sizeof(struct arena_set*) \ 679 - 2 * sizeof(int)) / sizeof (struct arena_desc)) 680 681 struct arena_set { 682 struct arena_set* next; 683 unsigned int set_size; /* ie ARENAS_PER_SET */ 684 unsigned int curr; /* index of next available arena-desc */ 685 struct arena_desc set[ARENAS_PER_SET]; 686 }; 687 688 /* 689 =for apidoc sv_free_arenas 690 691 Deallocate the memory used by all arenas. Note that all the individual SV 692 heads and bodies within the arenas must already have been freed. 693 694 =cut 695 696 */ 697 void 698 Perl_sv_free_arenas(pTHX) 699 { 700 SV* sva; 701 SV* svanext; 702 unsigned int i; 703 704 /* Free arenas here, but be careful about fake ones. (We assume 705 contiguity of the fake ones with the corresponding real ones.) */ 706 707 for (sva = PL_sv_arenaroot; sva; sva = svanext) { 708 svanext = MUTABLE_SV(SvANY(sva)); 709 while (svanext && SvFAKE(svanext)) 710 svanext = MUTABLE_SV(SvANY(svanext)); 711 712 if (!SvFAKE(sva)) 713 Safefree(sva); 714 } 715 716 { 717 struct arena_set *aroot = (struct arena_set*) PL_body_arenas; 718 719 while (aroot) { 720 struct arena_set *current = aroot; 721 i = aroot->curr; 722 while (i--) { 723 assert(aroot->set[i].arena); 724 Safefree(aroot->set[i].arena); 725 } 726 aroot = aroot->next; 727 Safefree(current); 728 } 729 } 730 PL_body_arenas = 0; 731 732 i = PERL_ARENA_ROOTS_SIZE; 733 while (i--) 734 PL_body_roots[i] = 0; 735 736 PL_sv_arenaroot = 0; 737 PL_sv_root = 0; 738 } 739 740 /* 741 Historically, here were mid-level routines that manage the 742 allocation of bodies out of the various arenas. Some of these 743 routines and related definitions remain here, but others were 744 moved into sv_inline.h to facilitate inlining of newSV_type(). 745 746 There are 4 kinds of arenas: 747 748 1. SV-head arenas, which are discussed and handled above 749 2. regular body arenas 750 3. arenas for reduced-size bodies 751 4. Hash-Entry arenas 752 753 Arena types 2 & 3 are chained by body-type off an array of 754 arena-root pointers, which is indexed by svtype. Some of the 755 larger/less used body types are malloced singly, since a large 756 unused block of them is wasteful. Also, several svtypes don't have 757 bodies; the data fits into the sv-head itself. The arena-root 758 pointer thus has a few unused root-pointers (which may be hijacked 759 later for arena type 4) 760 761 3 differs from 2 as an optimization; some body types have several 762 unused fields in the front of the structure (which are kept in-place 763 for consistency). These bodies can be allocated in smaller chunks, 764 because the leading fields arent accessed. Pointers to such bodies 765 are decremented to point at the unused 'ghost' memory, knowing that 766 the pointers are used with offsets to the real memory. 767 768 Allocation of SV-bodies is similar to SV-heads, differing as follows; 769 the allocation mechanism is used for many body types, so is somewhat 770 more complicated, it uses arena-sets, and has no need for still-live 771 SV detection. 772 773 At the outermost level, (new|del)_X*V macros return bodies of the 774 appropriate type. These macros call either (new|del)_body_type or 775 (new|del)_body_allocated macro pairs, depending on specifics of the 776 type. Most body types use the former pair, the latter pair is used to 777 allocate body types with "ghost fields". 778 779 "ghost fields" are fields that are unused in certain types, and 780 consequently don't need to actually exist. They are declared because 781 they're part of a "base type", which allows use of functions as 782 methods. The simplest examples are AVs and HVs, 2 aggregate types 783 which don't use the fields which support SCALAR semantics. 784 785 For these types, the arenas are carved up into appropriately sized 786 chunks, we thus avoid wasted memory for those unaccessed members. 787 When bodies are allocated, we adjust the pointer back in memory by the 788 size of the part not allocated, so it's as if we allocated the full 789 structure. (But things will all go boom if you write to the part that 790 is "not there", because you'll be overwriting the last members of the 791 preceding structure in memory.) 792 793 We calculate the correction using the STRUCT_OFFSET macro on the first 794 member present. If the allocated structure is smaller (no initial NV 795 actually allocated) then the net effect is to subtract the size of the NV 796 from the pointer, to return a new pointer as if an initial NV were actually 797 allocated. (We were using structures named *_allocated for this, but 798 this turned out to be a subtle bug, because a structure without an NV 799 could have a lower alignment constraint, but the compiler is allowed to 800 optimised accesses based on the alignment constraint of the actual pointer 801 to the full structure, for example, using a single 64 bit load instruction 802 because it "knows" that two adjacent 32 bit members will be 8-byte aligned.) 803 804 This is the same trick as was used for NV and IV bodies. Ironically it 805 doesn't need to be used for NV bodies any more, because NV is now at 806 the start of the structure. IV bodies, and also in some builds NV bodies, 807 don't need it either, because they are no longer allocated. 808 809 In turn, the new_body_* allocators call S_new_body(), which invokes 810 new_body_from_arena macro, which takes a lock, and takes a body off the 811 linked list at PL_body_roots[sv_type], calling Perl_more_bodies() if 812 necessary to refresh an empty list. Then the lock is released, and 813 the body is returned. 814 815 Perl_more_bodies allocates a new arena, and carves it up into an array of N 816 bodies, which it strings into a linked list. It looks up arena-size 817 and body-size from the body_details table described below, thus 818 supporting the multiple body-types. 819 820 If PURIFY is defined, or PERL_ARENA_SIZE=0, arenas are not used, and 821 the (new|del)_X*V macros are mapped directly to malloc/free. 822 823 For each sv-type, struct body_details bodies_by_type[] carries 824 parameters which control these aspects of SV handling: 825 826 Arena_size determines whether arenas are used for this body type, and if 827 so, how big they are. PURIFY or PERL_ARENA_SIZE=0 set this field to 828 zero, forcing individual mallocs and frees. 829 830 Body_size determines how big a body is, and therefore how many fit into 831 each arena. Offset carries the body-pointer adjustment needed for 832 "ghost fields", and is used in *_allocated macros. 833 834 But its main purpose is to parameterize info needed in 835 Perl_sv_upgrade(). The info here dramatically simplifies the function 836 vs the implementation in 5.8.8, making it table-driven. All fields 837 are used for this, except for arena_size. 838 839 For the sv-types that have no bodies, arenas are not used, so those 840 PL_body_roots[sv_type] are unused, and can be overloaded. In 841 something of a special case, SVt_NULL is borrowed for HE arenas; 842 PL_body_roots[HE_ARENA_ROOT_IX=SVt_NULL] is filled by S_more_he, but the 843 bodies_by_type[SVt_NULL] slot is not used, as the table is not 844 available in hv.c. Similarly SVt_IV is re-used for HVAUX_ARENA_ROOT_IX. 845 846 */ 847 848 /* return a thing to the free list */ 849 850 #define del_body(thing, root) \ 851 STMT_START { \ 852 void ** const thing_copy = (void **)thing; \ 853 *thing_copy = *root; \ 854 *root = (void*)thing_copy; \ 855 } STMT_END 856 857 858 void * 859 Perl_more_bodies (pTHX_ const svtype sv_type, const size_t body_size, 860 const size_t arena_size) 861 { 862 void ** const root = &PL_body_roots[sv_type]; 863 struct arena_desc *adesc; 864 struct arena_set *aroot = (struct arena_set *) PL_body_arenas; 865 unsigned int curr; 866 char *start; 867 const char *end; 868 const size_t good_arena_size = Perl_malloc_good_size(arena_size); 869 #if defined(DEBUGGING) 870 static bool done_sanity_check; 871 872 if (!done_sanity_check) { 873 unsigned int i = SVt_LAST; 874 875 done_sanity_check = TRUE; 876 877 while (i--) 878 assert (bodies_by_type[i].type == i); 879 } 880 #endif 881 882 assert(arena_size); 883 884 /* may need new arena-set to hold new arena */ 885 if (!aroot || aroot->curr >= aroot->set_size) { 886 struct arena_set *newroot; 887 Newxz(newroot, 1, struct arena_set); 888 newroot->set_size = ARENAS_PER_SET; 889 newroot->next = aroot; 890 aroot = newroot; 891 PL_body_arenas = (void *) newroot; 892 DEBUG_m(PerlIO_printf(Perl_debug_log, "new arenaset %p\n", (void*)aroot)); 893 } 894 895 /* ok, now have arena-set with at least 1 empty/available arena-desc */ 896 curr = aroot->curr++; 897 adesc = &(aroot->set[curr]); 898 assert(!adesc->arena); 899 900 Newx(adesc->arena, good_arena_size, char); 901 adesc->size = good_arena_size; 902 adesc->utype = sv_type; 903 DEBUG_m(PerlIO_printf(Perl_debug_log, "arena %d added: %p size %" UVuf "\n", 904 curr, (void*)adesc->arena, (UV)good_arena_size)); 905 906 start = (char *) adesc->arena; 907 908 /* Get the address of the byte after the end of the last body we can fit. 909 Remember, this is integer division: */ 910 end = start + good_arena_size / body_size * body_size; 911 912 /* computed count doesn't reflect the 1st slot reservation */ 913 #if defined(MYMALLOC) || defined(HAS_MALLOC_GOOD_SIZE) 914 DEBUG_m(PerlIO_printf(Perl_debug_log, 915 "arena %p end %p arena-size %d (from %d) type %d " 916 "size %d ct %d\n", 917 (void*)start, (void*)end, (int)good_arena_size, 918 (int)arena_size, sv_type, (int)body_size, 919 (int)good_arena_size / (int)body_size)); 920 #else 921 DEBUG_m(PerlIO_printf(Perl_debug_log, 922 "arena %p end %p arena-size %d type %d size %d ct %d\n", 923 (void*)start, (void*)end, 924 (int)arena_size, sv_type, (int)body_size, 925 (int)good_arena_size / (int)body_size)); 926 #endif 927 *root = (void *)start; 928 929 while (1) { 930 /* Where the next body would start: */ 931 char * const next = start + body_size; 932 933 if (next >= end) { 934 /* This is the last body: */ 935 assert(next == end); 936 937 *(void **)start = 0; 938 return *root; 939 } 940 941 *(void**) start = (void *)next; 942 start = next; 943 } 944 } 945 946 /* 947 =for apidoc sv_upgrade 948 949 Upgrade an SV to a more complex form. Generally adds a new body type to the 950 SV, then copies across as much information as possible from the old body. 951 It croaks if the SV is already in a more complex form than requested. You 952 generally want to use the C<SvUPGRADE> macro wrapper, which checks the type 953 before calling C<sv_upgrade>, and hence does not croak. See also 954 C<L</svtype>>. 955 956 =cut 957 */ 958 959 void 960 Perl_sv_upgrade(pTHX_ SV *const sv, svtype new_type) 961 { 962 void* old_body; 963 void* new_body; 964 const svtype old_type = SvTYPE(sv); 965 const struct body_details *new_type_details; 966 const struct body_details *old_type_details 967 = bodies_by_type + old_type; 968 SV *referent = NULL; 969 970 PERL_ARGS_ASSERT_SV_UPGRADE; 971 972 if (old_type == new_type) 973 return; 974 975 /* This clause was purposefully added ahead of the early return above to 976 the shared string hackery for (sort {$a <=> $b} keys %hash), with the 977 inference by Nick I-S that it would fix other troublesome cases. See 978 changes 7162, 7163 (f130fd4589cf5fbb24149cd4db4137c8326f49c1 and parent) 979 980 Given that shared hash key scalars are no longer PVIV, but PV, there is 981 no longer need to unshare so as to free up the IVX slot for its proper 982 purpose. So it's safe to move the early return earlier. */ 983 984 if (new_type > SVt_PVMG && SvIsCOW(sv)) { 985 sv_force_normal_flags(sv, 0); 986 } 987 988 old_body = SvANY(sv); 989 990 /* Copying structures onto other structures that have been neatly zeroed 991 has a subtle gotcha. Consider XPVMG 992 993 +------+------+------+------+------+-------+-------+ 994 | NV | CUR | LEN | IV | MAGIC | STASH | 995 +------+------+------+------+------+-------+-------+ 996 0 4 8 12 16 20 24 28 997 998 where NVs are aligned to 8 bytes, so that sizeof that structure is 999 actually 32 bytes long, with 4 bytes of padding at the end: 1000 1001 +------+------+------+------+------+-------+-------+------+ 1002 | NV | CUR | LEN | IV | MAGIC | STASH | ??? | 1003 +------+------+------+------+------+-------+-------+------+ 1004 0 4 8 12 16 20 24 28 32 1005 1006 so what happens if you allocate memory for this structure: 1007 1008 +------+------+------+------+------+-------+-------+------+------+... 1009 | NV | CUR | LEN | IV | MAGIC | STASH | GP | NAME | 1010 +------+------+------+------+------+-------+-------+------+------+... 1011 0 4 8 12 16 20 24 28 32 36 1012 1013 zero it, then copy sizeof(XPVMG) bytes on top of it? Not quite what you 1014 expect, because you copy the area marked ??? onto GP. Now, ??? may have 1015 started out as zero once, but it's quite possible that it isn't. So now, 1016 rather than a nicely zeroed GP, you have it pointing somewhere random. 1017 Bugs ensue. 1018 1019 (In fact, GP ends up pointing at a previous GP structure, because the 1020 principle cause of the padding in XPVMG getting garbage is a copy of 1021 sizeof(XPVMG) bytes from a XPVGV structure in sv_unglob. Right now 1022 this happens to be moot because XPVGV has been re-ordered, with GP 1023 no longer after STASH) 1024 1025 So we are careful and work out the size of used parts of all the 1026 structures. */ 1027 1028 switch (old_type) { 1029 case SVt_NULL: 1030 break; 1031 case SVt_IV: 1032 if (SvROK(sv)) { 1033 referent = SvRV(sv); 1034 old_type_details = &fake_rv; 1035 if (new_type == SVt_NV) 1036 new_type = SVt_PVNV; 1037 } else { 1038 if (new_type < SVt_PVIV) { 1039 new_type = (new_type == SVt_NV) 1040 ? SVt_PVNV : SVt_PVIV; 1041 } 1042 } 1043 break; 1044 case SVt_NV: 1045 if (new_type < SVt_PVNV) { 1046 new_type = SVt_PVNV; 1047 } 1048 break; 1049 case SVt_PV: 1050 assert(new_type > SVt_PV); 1051 STATIC_ASSERT_STMT(SVt_IV < SVt_PV); 1052 STATIC_ASSERT_STMT(SVt_NV < SVt_PV); 1053 break; 1054 case SVt_PVIV: 1055 break; 1056 case SVt_PVNV: 1057 break; 1058 case SVt_PVMG: 1059 /* Because the XPVMG of PL_mess_sv isn't allocated from the arena, 1060 there's no way that it can be safely upgraded, because perl.c 1061 expects to Safefree(SvANY(PL_mess_sv)) */ 1062 assert(sv != PL_mess_sv); 1063 break; 1064 default: 1065 if (UNLIKELY(old_type_details->cant_upgrade)) 1066 Perl_croak(aTHX_ "Can't upgrade %s (%" UVuf ") to %" UVuf, 1067 sv_reftype(sv, 0), (UV) old_type, (UV) new_type); 1068 } 1069 1070 if (UNLIKELY(old_type > new_type)) 1071 Perl_croak(aTHX_ "sv_upgrade from type %d down to type %d", 1072 (int)old_type, (int)new_type); 1073 1074 new_type_details = bodies_by_type + new_type; 1075 1076 SvFLAGS(sv) &= ~SVTYPEMASK; 1077 SvFLAGS(sv) |= new_type; 1078 1079 /* This can't happen, as SVt_NULL is <= all values of new_type, so one of 1080 the return statements above will have triggered. */ 1081 assert (new_type != SVt_NULL); 1082 switch (new_type) { 1083 case SVt_IV: 1084 assert(old_type == SVt_NULL); 1085 SET_SVANY_FOR_BODYLESS_IV(sv); 1086 SvIV_set(sv, 0); 1087 return; 1088 case SVt_NV: 1089 assert(old_type == SVt_NULL); 1090 #if NVSIZE <= IVSIZE 1091 SET_SVANY_FOR_BODYLESS_NV(sv); 1092 #else 1093 SvANY(sv) = new_XNV(); 1094 #endif 1095 SvNV_set(sv, 0); 1096 return; 1097 case SVt_PVHV: 1098 case SVt_PVAV: 1099 case SVt_PVOBJ: 1100 assert(new_type_details->body_size); 1101 1102 #ifndef PURIFY 1103 assert(new_type_details->arena); 1104 assert(new_type_details->arena_size); 1105 /* This points to the start of the allocated area. */ 1106 new_body = S_new_body(aTHX_ new_type); 1107 /* xpvav and xpvhv have no offset, so no need to adjust new_body */ 1108 assert(!(new_type_details->offset)); 1109 #else 1110 /* We always allocated the full length item with PURIFY. To do this 1111 we fake things so that arena is false for all 16 types.. */ 1112 new_body = new_NOARENAZ(new_type_details); 1113 #endif 1114 SvANY(sv) = new_body; 1115 switch(new_type) { 1116 case SVt_PVAV: 1117 { 1118 XPVAV pvav = { 1119 .xmg_stash = NULL, 1120 .xmg_u = {.xmg_magic = NULL}, 1121 .xav_fill = -1, .xav_max = -1, .xav_alloc = 0 1122 }; 1123 *((XPVAV*) SvANY(sv)) = pvav; 1124 } 1125 1126 AvREAL_only(sv); 1127 break; 1128 case SVt_PVHV: 1129 { 1130 XPVHV pvhv = { 1131 .xmg_stash = NULL, 1132 .xmg_u = {.xmg_magic = NULL}, 1133 .xhv_keys = 0, 1134 /* start with PERL_HASH_DEFAULT_HvMAX+1 buckets: */ 1135 .xhv_max = PERL_HASH_DEFAULT_HvMAX 1136 }; 1137 *((XPVHV*) SvANY(sv)) = pvhv; 1138 } 1139 1140 assert(!SvOK(sv)); 1141 SvOK_off(sv); 1142 #ifndef NODEFAULT_SHAREKEYS 1143 HvSHAREKEYS_on(sv); /* key-sharing on by default */ 1144 #endif 1145 break; 1146 case SVt_PVOBJ: 1147 { 1148 XPVOBJ pvo = { 1149 .xmg_stash = NULL, .xmg_u = {.xmg_magic = NULL}, 1150 .xobject_maxfield = -1, 1151 .xobject_iter_sv_at = 0, 1152 .xobject_fields = NULL, 1153 }; 1154 *((XPVOBJ*) SvANY(sv)) = pvo; 1155 } 1156 break; 1157 default: 1158 NOT_REACHED; 1159 } 1160 1161 /* SVt_NULL isn't the only thing upgraded to AV or HV. 1162 The target created by newSVrv also is, and it can have magic. 1163 However, it never has SvPVX set. 1164 */ 1165 if (old_type == SVt_IV) { 1166 assert(!SvROK(sv)); 1167 } else if (old_type >= SVt_PV) { 1168 assert(SvPVX_const(sv) == 0); 1169 } 1170 1171 if (old_type >= SVt_PVMG) { 1172 SvMAGIC_set(sv, ((XPVMG*)old_body)->xmg_u.xmg_magic); 1173 SvSTASH_set(sv, ((XPVMG*)old_body)->xmg_stash); 1174 } else { 1175 sv->sv_u.svu_array = NULL; /* or svu_hash */ 1176 } 1177 break; 1178 1179 case SVt_PVIV: 1180 /* XXX Is this still needed? Was it ever needed? Surely as there is 1181 no route from NV to PVIV, NOK can never be true */ 1182 assert(!SvNOKp(sv)); 1183 assert(!SvNOK(sv)); 1184 /* FALLTHROUGH */ 1185 case SVt_PVIO: 1186 case SVt_PVFM: 1187 case SVt_PVGV: 1188 case SVt_PVCV: 1189 case SVt_PVLV: 1190 case SVt_INVLIST: 1191 case SVt_REGEXP: 1192 case SVt_PVMG: 1193 case SVt_PVNV: 1194 case SVt_PV: 1195 1196 assert(new_type_details->body_size); 1197 /* We always allocated the full length item with PURIFY. To do this 1198 we fake things so that arena is false for all 16 types.. */ 1199 #ifndef PURIFY 1200 if(new_type_details->arena) { 1201 /* This points to the start of the allocated area. */ 1202 new_body = S_new_body(aTHX_ new_type); 1203 Zero(new_body, new_type_details->body_size, char); 1204 new_body = ((char *)new_body) - new_type_details->offset; 1205 } else 1206 #endif 1207 { 1208 new_body = new_NOARENAZ(new_type_details); 1209 } 1210 SvANY(sv) = new_body; 1211 1212 if (old_type_details->copy) { 1213 /* There is now the potential for an upgrade from something without 1214 an offset (PVNV or PVMG) to something with one (PVCV, PVFM) */ 1215 int offset = old_type_details->offset; 1216 int length = old_type_details->copy; 1217 1218 if (new_type_details->offset > old_type_details->offset) { 1219 const int difference 1220 = new_type_details->offset - old_type_details->offset; 1221 offset += difference; 1222 length -= difference; 1223 } 1224 assert (length >= 0); 1225 1226 Copy((char *)old_body + offset, (char *)new_body + offset, length, 1227 char); 1228 } 1229 1230 #ifndef NV_ZERO_IS_ALLBITS_ZERO 1231 /* If NV 0.0 is stores as all bits 0 then Zero() already creates a 1232 * correct 0.0 for us. Otherwise, if the old body didn't have an 1233 * NV slot, but the new one does, then we need to initialise the 1234 * freshly created NV slot with whatever the correct bit pattern is 1235 * for 0.0 */ 1236 if (old_type_details->zero_nv && !new_type_details->zero_nv 1237 && !isGV_with_GP(sv)) 1238 SvNV_set(sv, 0); 1239 #endif 1240 1241 if (UNLIKELY(new_type == SVt_PVIO)) { 1242 IO * const io = MUTABLE_IO(sv); 1243 GV *iogv = gv_fetchpvs("IO::File::", GV_ADD, SVt_PVHV); 1244 1245 SvOBJECT_on(io); 1246 /* Clear the stashcache because a new IO could overrule a package 1247 name */ 1248 DEBUG_o(Perl_deb(aTHX_ "sv_upgrade clearing PL_stashcache\n")); 1249 hv_clear(PL_stashcache); 1250 1251 SvSTASH_set(io, MUTABLE_HV(SvREFCNT_inc(GvHV(iogv)))); 1252 IoPAGE_LEN(sv) = 60; 1253 } 1254 if (old_type < SVt_PV) { 1255 /* referent will be NULL unless the old type was SVt_IV emulating 1256 SVt_RV */ 1257 sv->sv_u.svu_rv = referent; 1258 } 1259 break; 1260 default: 1261 Perl_croak(aTHX_ "panic: sv_upgrade to unknown type %lu", 1262 (unsigned long)new_type); 1263 } 1264 1265 /* if this is zero, this is a body-less SVt_NULL, SVt_IV/SVt_RV, 1266 and sometimes SVt_NV */ 1267 if (old_type_details->body_size) { 1268 #ifdef PURIFY 1269 safefree(old_body); 1270 #else 1271 /* Note that there is an assumption that all bodies of types that 1272 can be upgraded came from arenas. Only the more complex non- 1273 upgradable types are allowed to be directly malloc()ed. */ 1274 assert(old_type_details->arena); 1275 del_body((void*)((char*)old_body + old_type_details->offset), 1276 &PL_body_roots[old_type]); 1277 #endif 1278 } 1279 } 1280 1281 struct xpvhv_aux* 1282 Perl_hv_auxalloc(pTHX_ HV *hv) { 1283 const struct body_details *old_type_details = bodies_by_type + SVt_PVHV; 1284 void *old_body; 1285 void *new_body; 1286 1287 PERL_ARGS_ASSERT_HV_AUXALLOC; 1288 assert(SvTYPE(hv) == SVt_PVHV); 1289 assert(!HvHasAUX(hv)); 1290 1291 #ifdef PURIFY 1292 new_body = new_NOARENAZ(&fake_hv_with_aux); 1293 #else 1294 new_body_from_arena(new_body, HVAUX_ARENA_ROOT_IX, fake_hv_with_aux); 1295 #endif 1296 1297 old_body = SvANY(hv); 1298 1299 Copy((char *)old_body + old_type_details->offset, 1300 (char *)new_body + fake_hv_with_aux.offset, 1301 old_type_details->copy, 1302 char); 1303 1304 #ifdef PURIFY 1305 safefree(old_body); 1306 #else 1307 assert(old_type_details->arena); 1308 del_body((void*)((char*)old_body + old_type_details->offset), 1309 &PL_body_roots[SVt_PVHV]); 1310 #endif 1311 1312 SvANY(hv) = (XPVHV *) new_body; 1313 SvFLAGS(hv) |= SVphv_HasAUX; 1314 return HvAUX(hv); 1315 } 1316 1317 /* 1318 =for apidoc sv_backoff 1319 1320 Remove any string offset. You should normally use the C<SvOOK_off> macro 1321 wrapper instead. 1322 1323 =cut 1324 */ 1325 1326 /* prior to 5.000 stable, this function returned the new OOK-less SvFLAGS 1327 prior to 5.23.4 this function always returned 0 1328 */ 1329 1330 void 1331 Perl_sv_backoff(SV *const sv) 1332 { 1333 STRLEN delta; 1334 const char * const s = SvPVX_const(sv); 1335 1336 PERL_ARGS_ASSERT_SV_BACKOFF; 1337 1338 assert(SvOOK(sv)); 1339 assert(SvTYPE(sv) != SVt_PVHV); 1340 assert(SvTYPE(sv) != SVt_PVAV); 1341 1342 SvOOK_offset(sv, delta); 1343 1344 SvLEN_set(sv, SvLEN(sv) + delta); 1345 SvPV_set(sv, SvPVX(sv) - delta); 1346 SvFLAGS(sv) &= ~SVf_OOK; 1347 Move(s, SvPVX(sv), SvCUR(sv)+1, char); 1348 return; 1349 } 1350 1351 1352 /* forward declaration */ 1353 static void S_sv_uncow(pTHX_ SV * const sv, const U32 flags); 1354 1355 1356 /* 1357 =for apidoc sv_grow 1358 1359 Expands the character buffer in the SV. If necessary, uses C<sv_unref> and 1360 upgrades the SV to C<SVt_PV>. Returns a pointer to the character buffer. 1361 Use the C<SvGROW> wrapper instead. 1362 1363 =cut 1364 */ 1365 1366 1367 char * 1368 Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen) 1369 { 1370 char *s; 1371 1372 PERL_ARGS_ASSERT_SV_GROW; 1373 1374 if (SvROK(sv)) 1375 sv_unref(sv); 1376 if (SvTYPE(sv) < SVt_PV) { 1377 sv_upgrade(sv, SVt_PV); 1378 s = SvPVX_mutable(sv); 1379 } 1380 else if (SvOOK(sv)) { /* pv is offset? */ 1381 sv_backoff(sv); 1382 s = SvPVX_mutable(sv); 1383 if (newlen > SvLEN(sv)) 1384 newlen += 10 * (newlen - SvCUR(sv)); /* avoid copy each time */ 1385 } 1386 else 1387 { 1388 if (SvIsCOW(sv)) S_sv_uncow(aTHX_ sv, 0); 1389 s = SvPVX_mutable(sv); 1390 } 1391 1392 #ifdef PERL_COPY_ON_WRITE 1393 /* the new COW scheme uses SvPVX(sv)[SvLEN(sv)-1] (if spare) 1394 * to store the COW count. So in general, allocate one more byte than 1395 * asked for, to make it likely this byte is always spare: and thus 1396 * make more strings COW-able. 1397 * 1398 * Only increment if the allocation isn't MEM_SIZE_MAX, 1399 * otherwise it will wrap to 0. 1400 */ 1401 if ( newlen != MEM_SIZE_MAX ) 1402 newlen++; 1403 #endif 1404 1405 #if defined(PERL_USE_MALLOC_SIZE) && defined(Perl_safesysmalloc_size) 1406 #define PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC 1407 #endif 1408 1409 if (newlen > SvLEN(sv)) { /* need more room? */ 1410 STRLEN minlen = SvCUR(sv); 1411 minlen += (minlen >> PERL_STRLEN_EXPAND_SHIFT) + PERL_STRLEN_NEW_MIN; 1412 if (newlen < minlen) 1413 newlen = minlen; 1414 #ifndef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC 1415 1416 /* Don't round up on the first allocation, as odds are pretty good that 1417 * the initial request is accurate as to what is really needed */ 1418 if (SvLEN(sv)) { 1419 STRLEN rounded = PERL_STRLEN_ROUNDUP(newlen); 1420 if (rounded > newlen) 1421 newlen = rounded; 1422 } 1423 #endif 1424 if (SvLEN(sv) && s) { 1425 s = (char*)saferealloc(s, newlen); 1426 } 1427 else { 1428 s = (char*)safemalloc(newlen); 1429 if (SvPVX_const(sv) && SvCUR(sv)) { 1430 Move(SvPVX_const(sv), s, SvCUR(sv), char); 1431 } 1432 } 1433 SvPV_set(sv, s); 1434 #ifdef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC 1435 /* Do this here, do it once, do it right, and then we will never get 1436 called back into sv_grow() unless there really is some growing 1437 needed. */ 1438 SvLEN_set(sv, Perl_safesysmalloc_size(s)); 1439 #else 1440 SvLEN_set(sv, newlen); 1441 #endif 1442 } 1443 return s; 1444 } 1445 1446 /* 1447 =for apidoc sv_grow_fresh 1448 1449 A cut-down version of sv_grow intended only for when sv is a freshly-minted 1450 SVt_PV, SVt_PVIV, SVt_PVNV, or SVt_PVMG. i.e. sv has the default flags, has 1451 never been any other type, and does not have an existing string. Basically, 1452 just assigns a char buffer and returns a pointer to it. 1453 1454 =cut 1455 */ 1456 1457 1458 char * 1459 Perl_sv_grow_fresh(pTHX_ SV *const sv, STRLEN newlen) 1460 { 1461 char *s; 1462 1463 PERL_ARGS_ASSERT_SV_GROW_FRESH; 1464 1465 assert(SvTYPE(sv) >= SVt_PV && SvTYPE(sv) <= SVt_PVMG); 1466 assert(!SvROK(sv)); 1467 assert(!SvOOK(sv)); 1468 assert(!SvIsCOW(sv)); 1469 assert(!SvLEN(sv)); 1470 assert(!SvCUR(sv)); 1471 1472 #ifdef PERL_COPY_ON_WRITE 1473 /* the new COW scheme uses SvPVX(sv)[SvLEN(sv)-1] (if spare) 1474 * to store the COW count. So in general, allocate one more byte than 1475 * asked for, to make it likely this byte is always spare: and thus 1476 * make more strings COW-able. 1477 * 1478 * Only increment if the allocation isn't MEM_SIZE_MAX, 1479 * otherwise it will wrap to 0. 1480 */ 1481 if ( newlen != MEM_SIZE_MAX ) 1482 newlen++; 1483 #endif 1484 1485 if (newlen < PERL_STRLEN_NEW_MIN) 1486 newlen = PERL_STRLEN_NEW_MIN; 1487 1488 s = (char*)safemalloc(newlen); 1489 SvPV_set(sv, s); 1490 1491 /* No PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC here, since many strings */ 1492 /* will never be grown once set. Let the real sv_grow worry about that. */ 1493 SvLEN_set(sv, newlen); 1494 return s; 1495 } 1496 1497 /* 1498 =for apidoc sv_setiv 1499 =for apidoc_item sv_setiv_mg 1500 1501 These copy an integer into the given SV, upgrading first if necessary. 1502 1503 They differ only in that C<sv_setiv_mg> handles 'set' magic; C<sv_setiv> does 1504 not. 1505 1506 =cut 1507 */ 1508 1509 void 1510 Perl_sv_setiv(pTHX_ SV *const sv, const IV i) 1511 { 1512 PERL_ARGS_ASSERT_SV_SETIV; 1513 1514 SV_CHECK_THINKFIRST_COW_DROP(sv); 1515 switch (SvTYPE(sv)) { 1516 #if NVSIZE <= IVSIZE 1517 case SVt_NULL: 1518 case SVt_NV: 1519 SET_SVANY_FOR_BODYLESS_IV(sv); 1520 SvFLAGS(sv) &= ~SVTYPEMASK; 1521 SvFLAGS(sv) |= SVt_IV; 1522 break; 1523 #else 1524 case SVt_NULL: 1525 SET_SVANY_FOR_BODYLESS_IV(sv); 1526 SvFLAGS(sv) &= ~SVTYPEMASK; 1527 SvFLAGS(sv) |= SVt_IV; 1528 break; 1529 case SVt_NV: 1530 sv_upgrade(sv, SVt_IV); 1531 break; 1532 #endif 1533 case SVt_PV: 1534 sv_upgrade(sv, SVt_PVIV); 1535 break; 1536 1537 case SVt_PVGV: 1538 if (!isGV_with_GP(sv)) 1539 break; 1540 /* FALLTHROUGH */ 1541 case SVt_PVAV: 1542 case SVt_PVHV: 1543 case SVt_PVCV: 1544 case SVt_PVFM: 1545 case SVt_PVIO: 1546 /* diag_listed_as: Can't coerce %s to %s in %s */ 1547 Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0), 1548 OP_DESC(PL_op)); 1549 NOT_REACHED; /* NOTREACHED */ 1550 break; 1551 default: NOOP; 1552 } 1553 (void)SvIOK_only(sv); /* validate number */ 1554 SvIV_set(sv, i); 1555 SvTAINT(sv); 1556 } 1557 1558 void 1559 Perl_sv_setiv_mg(pTHX_ SV *const sv, const IV i) 1560 { 1561 PERL_ARGS_ASSERT_SV_SETIV_MG; 1562 1563 sv_setiv(sv,i); 1564 SvSETMAGIC(sv); 1565 } 1566 1567 /* 1568 =for apidoc sv_setuv 1569 =for apidoc_item sv_setuv_mg 1570 1571 These copy an unsigned integer into the given SV, upgrading first if necessary. 1572 1573 1574 They differ only in that C<sv_setuv_mg> handles 'set' magic; C<sv_setuv> does 1575 not. 1576 1577 =cut 1578 */ 1579 1580 void 1581 Perl_sv_setuv(pTHX_ SV *const sv, const UV u) 1582 { 1583 PERL_ARGS_ASSERT_SV_SETUV; 1584 1585 /* With the if statement to ensure that integers are stored as IVs whenever 1586 possible: 1587 u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865 1588 1589 without 1590 u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865 1591 1592 If you wish to remove the following if statement, so that this routine 1593 (and its callers) always return UVs, please benchmark to see what the 1594 effect is. Modern CPUs may be different. Or may not :-) 1595 */ 1596 if (u <= (UV)IV_MAX) { 1597 sv_setiv(sv, (IV)u); 1598 return; 1599 } 1600 sv_setiv(sv, 0); 1601 SvIsUV_on(sv); 1602 SvUV_set(sv, u); 1603 } 1604 1605 void 1606 Perl_sv_setuv_mg(pTHX_ SV *const sv, const UV u) 1607 { 1608 PERL_ARGS_ASSERT_SV_SETUV_MG; 1609 1610 sv_setuv(sv,u); 1611 SvSETMAGIC(sv); 1612 } 1613 1614 /* 1615 =for apidoc sv_setnv 1616 =for apidoc_item sv_setnv_mg 1617 1618 These copy a double into the given SV, upgrading first if necessary. 1619 1620 They differ only in that C<sv_setnv_mg> handles 'set' magic; C<sv_setnv> does 1621 not. 1622 1623 =cut 1624 */ 1625 1626 void 1627 Perl_sv_setnv(pTHX_ SV *const sv, const NV num) 1628 { 1629 PERL_ARGS_ASSERT_SV_SETNV; 1630 1631 SV_CHECK_THINKFIRST_COW_DROP(sv); 1632 switch (SvTYPE(sv)) { 1633 case SVt_NULL: 1634 case SVt_IV: 1635 #if NVSIZE <= IVSIZE 1636 SET_SVANY_FOR_BODYLESS_NV(sv); 1637 SvFLAGS(sv) &= ~SVTYPEMASK; 1638 SvFLAGS(sv) |= SVt_NV; 1639 break; 1640 #else 1641 sv_upgrade(sv, SVt_NV); 1642 break; 1643 #endif 1644 case SVt_PV: 1645 case SVt_PVIV: 1646 sv_upgrade(sv, SVt_PVNV); 1647 break; 1648 1649 case SVt_PVGV: 1650 if (!isGV_with_GP(sv)) 1651 break; 1652 /* FALLTHROUGH */ 1653 case SVt_PVAV: 1654 case SVt_PVHV: 1655 case SVt_PVCV: 1656 case SVt_PVFM: 1657 case SVt_PVIO: 1658 /* diag_listed_as: Can't coerce %s to %s in %s */ 1659 Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0), 1660 OP_DESC(PL_op)); 1661 NOT_REACHED; /* NOTREACHED */ 1662 break; 1663 default: NOOP; 1664 } 1665 SvNV_set(sv, num); 1666 (void)SvNOK_only(sv); /* validate number */ 1667 SvTAINT(sv); 1668 } 1669 1670 void 1671 Perl_sv_setnv_mg(pTHX_ SV *const sv, const NV num) 1672 { 1673 PERL_ARGS_ASSERT_SV_SETNV_MG; 1674 1675 sv_setnv(sv,num); 1676 SvSETMAGIC(sv); 1677 } 1678 1679 /* 1680 =for apidoc sv_setrv_noinc 1681 =for apidoc_item sv_setrv_noinc_mg 1682 1683 Copies an SV pointer into the given SV as an SV reference, upgrading it if 1684 necessary. After this, C<SvRV(sv)> is equal to I<ref>. This does not adjust 1685 the reference count of I<ref>. The reference I<ref> must not be NULL. 1686 1687 C<sv_setrv_noinc_mg> will invoke 'set' magic on the SV; C<sv_setrv_noinc> will 1688 not. 1689 1690 =cut 1691 */ 1692 1693 void 1694 Perl_sv_setrv_noinc(pTHX_ SV *const sv, SV *const ref) 1695 { 1696 PERL_ARGS_ASSERT_SV_SETRV_NOINC; 1697 1698 SV_CHECK_THINKFIRST_COW_DROP(sv); 1699 prepare_SV_for_RV(sv); 1700 1701 SvOK_off(sv); 1702 SvRV_set(sv, ref); 1703 SvROK_on(sv); 1704 } 1705 1706 void 1707 Perl_sv_setrv_noinc_mg(pTHX_ SV *const sv, SV *const ref) 1708 { 1709 PERL_ARGS_ASSERT_SV_SETRV_NOINC_MG; 1710 1711 sv_setrv_noinc(sv, ref); 1712 SvSETMAGIC(sv); 1713 } 1714 1715 /* 1716 =for apidoc sv_setrv_inc 1717 =for apidoc_item sv_setrv_inc_mg 1718 1719 As C<sv_setrv_noinc> but increments the reference count of I<ref>. 1720 1721 C<sv_setrv_inc_mg> will invoke 'set' magic on the SV; C<sv_setrv_inc> will 1722 not. 1723 1724 =cut 1725 */ 1726 1727 void 1728 Perl_sv_setrv_inc(pTHX_ SV *const sv, SV *const ref) 1729 { 1730 PERL_ARGS_ASSERT_SV_SETRV_INC; 1731 1732 sv_setrv_noinc(sv, SvREFCNT_inc_simple_NN(ref)); 1733 } 1734 1735 void 1736 Perl_sv_setrv_inc_mg(pTHX_ SV *const sv, SV *const ref) 1737 { 1738 PERL_ARGS_ASSERT_SV_SETRV_INC_MG; 1739 1740 sv_setrv_noinc(sv, SvREFCNT_inc_simple_NN(ref)); 1741 SvSETMAGIC(sv); 1742 } 1743 1744 /* Return a cleaned-up, printable version of sv, for non-numeric, or 1745 * not incrementable warning display. 1746 * Originally part of S_not_a_number(). 1747 * The return value may be != tmpbuf. 1748 */ 1749 1750 STATIC const char * 1751 S_sv_display(pTHX_ SV *const sv, char *tmpbuf, STRLEN tmpbuf_size) { 1752 const char *pv; 1753 1754 PERL_ARGS_ASSERT_SV_DISPLAY; 1755 1756 if (DO_UTF8(sv)) { 1757 SV *dsv = newSVpvs_flags("", SVs_TEMP); 1758 pv = sv_uni_display(dsv, sv, 32, UNI_DISPLAY_ISPRINT); 1759 } else { 1760 char *d = tmpbuf; 1761 const char * const limit = tmpbuf + tmpbuf_size - 8; 1762 /* each *s can expand to 4 chars + "...\0", 1763 i.e. need room for 8 chars */ 1764 1765 const char *s = SvPVX_const(sv); 1766 const char * const end = s + SvCUR(sv); 1767 for ( ; s < end && d < limit; s++ ) { 1768 int ch = (U8) *s; 1769 if (! isASCII(ch) && !isPRINT_LC(ch)) { 1770 *d++ = 'M'; 1771 *d++ = '-'; 1772 1773 /* Map to ASCII "equivalent" of Latin1 */ 1774 ch = LATIN1_TO_NATIVE(NATIVE_TO_LATIN1(ch) & 127); 1775 } 1776 if (ch == '\n') { 1777 *d++ = '\\'; 1778 *d++ = 'n'; 1779 } 1780 else if (ch == '\r') { 1781 *d++ = '\\'; 1782 *d++ = 'r'; 1783 } 1784 else if (ch == '\f') { 1785 *d++ = '\\'; 1786 *d++ = 'f'; 1787 } 1788 else if (ch == '\\') { 1789 *d++ = '\\'; 1790 *d++ = '\\'; 1791 } 1792 else if (ch == '\0') { 1793 *d++ = '\\'; 1794 *d++ = '0'; 1795 } 1796 else if (isPRINT_LC(ch)) 1797 *d++ = ch; 1798 else { 1799 *d++ = '^'; 1800 *d++ = toCTRL(ch); 1801 } 1802 } 1803 if (s < end) { 1804 *d++ = '.'; 1805 *d++ = '.'; 1806 *d++ = '.'; 1807 } 1808 *d = '\0'; 1809 pv = tmpbuf; 1810 } 1811 1812 return pv; 1813 } 1814 1815 /* Print an "isn't numeric" warning, using a cleaned-up, 1816 * printable version of the offending string 1817 */ 1818 1819 STATIC void 1820 S_not_a_number(pTHX_ SV *const sv) 1821 { 1822 char tmpbuf[64]; 1823 const char *pv; 1824 1825 PERL_ARGS_ASSERT_NOT_A_NUMBER; 1826 1827 pv = sv_display(sv, tmpbuf, sizeof(tmpbuf)); 1828 1829 if (PL_op) 1830 Perl_warner(aTHX_ packWARN(WARN_NUMERIC), 1831 /* diag_listed_as: Argument "%s" isn't numeric%s */ 1832 "Argument \"%s\" isn't numeric in %s", pv, 1833 OP_DESC(PL_op)); 1834 else 1835 Perl_warner(aTHX_ packWARN(WARN_NUMERIC), 1836 /* diag_listed_as: Argument "%s" isn't numeric%s */ 1837 "Argument \"%s\" isn't numeric", pv); 1838 } 1839 1840 STATIC void 1841 S_not_incrementable(pTHX_ SV *const sv) { 1842 char tmpbuf[64]; 1843 const char *pv; 1844 1845 PERL_ARGS_ASSERT_NOT_INCREMENTABLE; 1846 1847 pv = sv_display(sv, tmpbuf, sizeof(tmpbuf)); 1848 1849 Perl_warner(aTHX_ packWARN(WARN_NUMERIC), 1850 "Argument \"%s\" treated as 0 in increment (++)", pv); 1851 } 1852 1853 /* 1854 =for apidoc looks_like_number 1855 1856 Test if the content of an SV looks like a number (or is a number). 1857 C<Inf> and C<Infinity> are treated as numbers (so will not issue a 1858 non-numeric warning), even if your C<atof()> doesn't grok them. Get-magic is 1859 ignored. 1860 1861 =cut 1862 */ 1863 1864 I32 1865 Perl_looks_like_number(pTHX_ SV *const sv) 1866 { 1867 const char *sbegin; 1868 STRLEN len; 1869 int numtype; 1870 1871 PERL_ARGS_ASSERT_LOOKS_LIKE_NUMBER; 1872 1873 if (SvPOK(sv) || SvPOKp(sv)) { 1874 sbegin = SvPV_nomg_const(sv, len); 1875 } 1876 else 1877 return SvFLAGS(sv) & (SVf_NOK|SVp_NOK|SVf_IOK|SVp_IOK); 1878 numtype = grok_number(sbegin, len, NULL); 1879 return ((numtype & IS_NUMBER_TRAILING)) ? 0 : numtype; 1880 } 1881 1882 STATIC bool 1883 S_glob_2number(pTHX_ GV * const gv) 1884 { 1885 PERL_ARGS_ASSERT_GLOB_2NUMBER; 1886 1887 /* We know that all GVs stringify to something that is not-a-number, 1888 so no need to test that. */ 1889 if (ckWARN(WARN_NUMERIC)) 1890 { 1891 SV *const buffer = sv_newmortal(); 1892 gv_efullname3(buffer, gv, "*"); 1893 not_a_number(buffer); 1894 } 1895 /* We just want something true to return, so that S_sv_2iuv_common 1896 can tail call us and return true. */ 1897 return TRUE; 1898 } 1899 1900 /* Actually, ISO C leaves conversion of UV to IV undefined, but 1901 until proven guilty, assume that things are not that bad... */ 1902 1903 /* 1904 NV_PRESERVES_UV: 1905 1906 As 64 bit platforms often have an NV that doesn't preserve all bits of 1907 an IV (an assumption perl has been based on to date) it becomes necessary 1908 to remove the assumption that the NV always carries enough precision to 1909 recreate the IV whenever needed, and that the NV is the canonical form. 1910 Instead, IV/UV and NV need to be given equal rights. So as to not lose 1911 precision as a side effect of conversion (which would lead to insanity 1912 and the dragon(s) in t/op/numconvert.t getting very angry) the intent is 1913 1) to distinguish between IV/UV/NV slots that have a valid conversion cached 1914 where precision was lost, and IV/UV/NV slots that have a valid conversion 1915 which has lost no precision 1916 2) to ensure that if a numeric conversion to one form is requested that 1917 would lose precision, the precise conversion (or differently 1918 imprecise conversion) is also performed and cached, to prevent 1919 requests for different numeric formats on the same SV causing 1920 lossy conversion chains. (lossless conversion chains are perfectly 1921 acceptable (still)) 1922 1923 1924 flags are used: 1925 SvIOKp is true if the IV slot contains a valid value 1926 SvIOK is true only if the IV value is accurate (UV if SvIOK_UV true) 1927 SvNOKp is true if the NV slot contains a valid value 1928 SvNOK is true only if the NV value is accurate 1929 1930 so 1931 while converting from PV to NV, check to see if converting that NV to an 1932 IV(or UV) would lose accuracy over a direct conversion from PV to 1933 IV(or UV). If it would, cache both conversions, return NV, but mark 1934 SV as IOK NOKp (ie not NOK). 1935 1936 While converting from PV to IV, check to see if converting that IV to an 1937 NV would lose accuracy over a direct conversion from PV to NV. If it 1938 would, cache both conversions, flag similarly. 1939 1940 Before, the SV value "3.2" could become NV=3.2 IV=3 NOK, IOK quite 1941 correctly because if IV & NV were set NV *always* overruled. 1942 Now, "3.2" will become NV=3.2 IV=3 NOK, IOKp, because the flag's meaning 1943 changes - now IV and NV together means that the two are interchangeable: 1944 SvIVX == (IV) SvNVX && SvNVX == (NV) SvIVX; 1945 1946 The benefit of this is that operations such as pp_add know that if 1947 SvIOK is true for both left and right operands, then integer addition 1948 can be used instead of floating point (for cases where the result won't 1949 overflow). Before, floating point was always used, which could lead to 1950 loss of precision compared with integer addition. 1951 1952 * making IV and NV equal status should make maths accurate on 64 bit 1953 platforms 1954 * may speed up maths somewhat if pp_add and friends start to use 1955 integers when possible instead of fp. (Hopefully the overhead in 1956 looking for SvIOK and checking for overflow will not outweigh the 1957 fp to integer speedup) 1958 * will slow down integer operations (callers of SvIV) on "inaccurate" 1959 values, as the change from SvIOK to SvIOKp will cause a call into 1960 sv_2iv each time rather than a macro access direct to the IV slot 1961 * should speed up number->string conversion on integers as IV is 1962 favoured when IV and NV are equally accurate 1963 1964 #################################################################### 1965 You had better be using SvIOK_notUV if you want an IV for arithmetic: 1966 SvIOK is true if (IV or UV), so you might be getting (IV)SvUV. 1967 On the other hand, SvUOK is true iff UV. 1968 #################################################################### 1969 1970 Your mileage will vary depending your CPU's relative fp to integer 1971 performance ratio. 1972 */ 1973 1974 #ifndef NV_PRESERVES_UV 1975 # define IS_NUMBER_UNDERFLOW_IV 1 1976 # define IS_NUMBER_UNDERFLOW_UV 2 1977 # define IS_NUMBER_IV_AND_UV 2 1978 # define IS_NUMBER_OVERFLOW_IV 4 1979 # define IS_NUMBER_OVERFLOW_UV 5 1980 1981 /* sv_2iuv_non_preserve(): private routine for use by sv_2iv() and sv_2uv() */ 1982 1983 /* For sv_2nv these three cases are "SvNOK and don't bother casting" */ 1984 STATIC int 1985 S_sv_2iuv_non_preserve(pTHX_ SV *const sv 1986 # ifdef DEBUGGING 1987 , I32 numtype 1988 # endif 1989 ) 1990 { 1991 PERL_ARGS_ASSERT_SV_2IUV_NON_PRESERVE; 1992 PERL_UNUSED_CONTEXT; 1993 1994 DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%" UVxf " NV=%" NVgf " inttype=%" UVXf "\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv), (UV)numtype)); 1995 if (SvNVX(sv) < (NV)IV_MIN) { 1996 (void)SvIOKp_on(sv); 1997 (void)SvNOK_on(sv); 1998 SvIV_set(sv, IV_MIN); 1999 return IS_NUMBER_UNDERFLOW_IV; 2000 } 2001 if (SvNVX(sv) > (NV)UV_MAX) { 2002 (void)SvIOKp_on(sv); 2003 (void)SvNOK_on(sv); 2004 SvIsUV_on(sv); 2005 SvUV_set(sv, UV_MAX); 2006 return IS_NUMBER_OVERFLOW_UV; 2007 } 2008 (void)SvIOKp_on(sv); 2009 (void)SvNOK_on(sv); 2010 /* Can't use strtol etc to convert this string. (See truth table in 2011 sv_2iv */ 2012 if (SvNVX(sv) < IV_MAX_P1) { 2013 SvIV_set(sv, I_V(SvNVX(sv))); 2014 if ((NV)(SvIVX(sv)) == SvNVX(sv)) { 2015 SvIOK_on(sv); /* Integer is precise. NOK, IOK */ 2016 } else { 2017 /* Integer is imprecise. NOK, IOKp */ 2018 } 2019 return SvNVX(sv) < 0 ? IS_NUMBER_UNDERFLOW_UV : IS_NUMBER_IV_AND_UV; 2020 } 2021 SvIsUV_on(sv); 2022 SvUV_set(sv, U_V(SvNVX(sv))); 2023 if ((NV)(SvUVX(sv)) == SvNVX(sv)) { 2024 if (SvUVX(sv) == UV_MAX) { 2025 /* As we know that NVs don't preserve UVs, UV_MAX cannot 2026 possibly be preserved by NV. Hence, it must be overflow. 2027 NOK, IOKp */ 2028 return IS_NUMBER_OVERFLOW_UV; 2029 } 2030 SvIOK_on(sv); /* Integer is precise. NOK, UOK */ 2031 } else { 2032 /* Integer is imprecise. NOK, IOKp */ 2033 } 2034 return IS_NUMBER_OVERFLOW_IV; 2035 } 2036 #endif /* !NV_PRESERVES_UV*/ 2037 2038 /* If numtype is infnan, set the NV of the sv accordingly. 2039 * If numtype is anything else, try setting the NV using Atof(PV). */ 2040 static void 2041 S_sv_setnv(pTHX_ SV* sv, int numtype) 2042 { 2043 bool pok = cBOOL(SvPOK(sv)); 2044 bool nok = FALSE; 2045 #ifdef NV_INF 2046 if ((numtype & IS_NUMBER_INFINITY)) { 2047 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -NV_INF : NV_INF); 2048 nok = TRUE; 2049 } else 2050 #endif 2051 #ifdef NV_NAN 2052 if ((numtype & IS_NUMBER_NAN)) { 2053 SvNV_set(sv, NV_NAN); 2054 nok = TRUE; 2055 } else 2056 #endif 2057 if (pok) { 2058 SvNV_set(sv, Atof(SvPVX_const(sv))); 2059 /* Purposefully no true nok here, since we don't want to blow 2060 * away the possible IOK/UV of an existing sv. */ 2061 } 2062 if (nok) { 2063 SvNOK_only(sv); /* No IV or UV please, this is pure infnan. */ 2064 if (pok) 2065 SvPOK_on(sv); /* PV is okay, though. */ 2066 } 2067 } 2068 2069 #ifndef NV_PRESERVES_UV 2070 # define MAX_UV_PRESERVED_IN_NV (((UV)1 << NV_PRESERVES_UV_BITS) - 1) 2071 # define MAX_IV_PRESERVED_IN_NV ((IV)MAX_UV_PRESERVED_IN_NV) 2072 # define MIN_IV_PRESERVED_IN_NV (-MAX_IV_PRESERVED_IN_NV) 2073 /* We presume that (IV)MAX_UV_PRESERVED_IN_NV and (-MAX_IV_PRESERVED_IN_NV) 2074 above will not overflow if the condition below holds true: */ 2075 STATIC_ASSERT_DECL(MAX_UV_PRESERVED_IN_NV <= (UV)IV_MAX); 2076 #endif 2077 2078 STATIC bool 2079 S_sv_2iuv_common(pTHX_ SV *const sv) 2080 { 2081 PERL_ARGS_ASSERT_SV_2IUV_COMMON; 2082 2083 if (SvNOKp(sv)) { 2084 /* erm. not sure. *should* never get NOKp (without NOK) from sv_2nv 2085 * without also getting a cached IV/UV from it at the same time 2086 * (ie PV->NV conversion should detect loss of accuracy and cache 2087 * IV or UV at same time to avoid this. */ 2088 /* IV-over-UV optimisation - choose to cache IV if possible */ 2089 2090 if (SvTYPE(sv) == SVt_NV) 2091 sv_upgrade(sv, SVt_PVNV); 2092 2093 got_nv: 2094 (void)SvIOKp_on(sv); /* Must do this first, to clear any SvOOK */ 2095 /* < not <= as for NV doesn't preserve UV, ((NV)IV_MAX+1) will almost 2096 certainly cast into the IV range at IV_MAX, whereas the correct 2097 answer is the UV IV_MAX +1. Hence < ensures that dodgy boundary 2098 cases go to UV */ 2099 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) 2100 if (Perl_isnan(SvNVX(sv))) { 2101 SvUV_set(sv, 0); 2102 SvIsUV_on(sv); 2103 return FALSE; 2104 } 2105 #endif 2106 if (SvNVX(sv) < (NV)IV_MAX + 0.5) { 2107 SvIV_set(sv, I_V(SvNVX(sv))); 2108 if (SvNVX(sv) == (NV) SvIVX(sv) 2109 #ifndef NV_PRESERVES_UV 2110 /* Optimizing compilers might merge two comparisons below 2111 into single comparison */ 2112 && MIN_IV_PRESERVED_IN_NV <= SvIVX(sv) 2113 && SvIVX(sv) <= MAX_IV_PRESERVED_IN_NV 2114 /* Don't flag it as "accurately an integer" if the number 2115 came from a (by definition imprecise) NV operation, and 2116 we're outside the range of NV integer precision */ 2117 #endif 2118 ) { 2119 if (SvNOK(sv)) 2120 SvIOK_on(sv); /* Can this go wrong with rounding? NWC */ 2121 else { 2122 /* scalar has trailing garbage, eg "42a" */ 2123 } 2124 DEBUG_c(PerlIO_printf(Perl_debug_log, 2125 "0x%" UVxf " iv(%" NVgf " => %" IVdf ") (precise)\n", 2126 PTR2UV(sv), 2127 SvNVX(sv), 2128 SvIVX(sv))); 2129 2130 } else { 2131 /* IV not precise. No need to convert from PV, as NV 2132 conversion would already have cached IV if it detected 2133 that PV->IV would be better than PV->NV->IV 2134 flags already correct - don't set public IOK. */ 2135 DEBUG_c(PerlIO_printf(Perl_debug_log, 2136 "0x%" UVxf " iv(%" NVgf " => %" IVdf ") (imprecise)\n", 2137 PTR2UV(sv), 2138 SvNVX(sv), 2139 SvIVX(sv))); 2140 } 2141 /* Can the above go wrong if SvIVX == IV_MIN and SvNVX < IV_MIN, 2142 but the cast (NV)IV_MIN rounds to a the value less (more 2143 negative) than IV_MIN which happens to be equal to SvNVX ?? 2144 Analogous to 0xFFFFFFFFFFFFFFFF rounding up to NV (2**64) and 2145 NV rounding back to 0xFFFFFFFFFFFFFFFF, so UVX == UV(NVX) and 2146 (NV)UVX == NVX are both true, but the values differ. :-( 2147 Hopefully for 2s complement IV_MIN is something like 2148 0x8000000000000000 which will be exact. NWC */ 2149 } 2150 else { 2151 SvUV_set(sv, U_V(SvNVX(sv))); 2152 if ( 2153 (SvNVX(sv) == (NV) SvUVX(sv)) 2154 #ifndef NV_PRESERVES_UV 2155 /* Make sure it's not 0xFFFFFFFFFFFFFFFF */ 2156 /*&& (SvUVX(sv) != UV_MAX) irrelevant with code below */ 2157 && (((UV)1 << NV_PRESERVES_UV_BITS) > SvUVX(sv)) 2158 /* Don't flag it as "accurately an integer" if the number 2159 came from a (by definition imprecise) NV operation, and 2160 we're outside the range of NV integer precision */ 2161 #endif 2162 && SvNOK(sv) 2163 ) 2164 SvIOK_on(sv); 2165 SvIsUV_on(sv); 2166 DEBUG_c(PerlIO_printf(Perl_debug_log, 2167 "0x%" UVxf " 2iv(%" UVuf " => %" IVdf ") (as unsigned)\n", 2168 PTR2UV(sv), 2169 SvUVX(sv), 2170 SvUVX(sv))); 2171 } 2172 } 2173 else if (SvPOKp(sv)) { 2174 UV value; 2175 int numtype; 2176 const char *s = SvPVX_const(sv); 2177 const STRLEN cur = SvCUR(sv); 2178 2179 /* short-cut for a single digit string like "1" */ 2180 2181 if (cur == 1) { 2182 char c = *s; 2183 if (isDIGIT(c)) { 2184 if (SvTYPE(sv) < SVt_PVIV) 2185 sv_upgrade(sv, SVt_PVIV); 2186 (void)SvIOK_on(sv); 2187 SvIV_set(sv, (IV)(c - '0')); 2188 return FALSE; 2189 } 2190 } 2191 2192 numtype = grok_number(s, cur, &value); 2193 /* We want to avoid a possible problem when we cache an IV/ a UV which 2194 may be later translated to an NV, and the resulting NV is not 2195 the same as the direct translation of the initial string 2196 (eg 123.456 can shortcut to the IV 123 with atol(), but we must 2197 be careful to ensure that the value with the .456 is around if the 2198 NV value is requested in the future). 2199 2200 This means that if we cache such an IV/a UV, we need to cache the 2201 NV as well. Moreover, we trade speed for space, and do not 2202 cache the NV if we are sure it's not needed. 2203 */ 2204 2205 /* SVt_PVNV is one higher than SVt_PVIV, hence this order */ 2206 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) 2207 == IS_NUMBER_IN_UV) { 2208 /* It's definitely an integer, only upgrade to PVIV */ 2209 if (SvTYPE(sv) < SVt_PVIV) 2210 sv_upgrade(sv, SVt_PVIV); 2211 (void)SvIOK_on(sv); 2212 } else if (SvTYPE(sv) < SVt_PVNV) 2213 sv_upgrade(sv, SVt_PVNV); 2214 2215 if ((numtype & (IS_NUMBER_INFINITY | IS_NUMBER_NAN))) { 2216 if (ckWARN(WARN_NUMERIC) && ((numtype & IS_NUMBER_TRAILING))) 2217 not_a_number(sv); 2218 S_sv_setnv(aTHX_ sv, numtype); 2219 goto got_nv; /* Fill IV/UV slot and set IOKp */ 2220 } 2221 2222 /* If NVs preserve UVs then we only use the UV value if we know that 2223 we aren't going to call atof() below. If NVs don't preserve UVs 2224 then the value returned may have more precision than atof() will 2225 return, even though value isn't perfectly accurate. */ 2226 if ((numtype & (IS_NUMBER_IN_UV 2227 #ifdef NV_PRESERVES_UV 2228 | IS_NUMBER_NOT_INT 2229 #endif 2230 )) == IS_NUMBER_IN_UV) { 2231 /* This won't turn off the public IOK flag if it was set above */ 2232 (void)SvIOKp_on(sv); 2233 2234 if (!(numtype & IS_NUMBER_NEG)) { 2235 /* positive */; 2236 if (value <= (UV)IV_MAX) { 2237 SvIV_set(sv, (IV)value); 2238 } else { 2239 /* it didn't overflow, and it was positive. */ 2240 SvUV_set(sv, value); 2241 SvIsUV_on(sv); 2242 } 2243 } else { 2244 /* 2s complement assumption */ 2245 if (value <= (UV)IV_MIN) { 2246 SvIV_set(sv, value == (UV)IV_MIN 2247 ? IV_MIN : -(IV)value); 2248 } else { 2249 /* Too negative for an IV. This is a double upgrade, but 2250 I'm assuming it will be rare. */ 2251 if (SvTYPE(sv) < SVt_PVNV) 2252 sv_upgrade(sv, SVt_PVNV); 2253 SvNOK_on(sv); 2254 SvIOK_off(sv); 2255 SvIOKp_on(sv); 2256 SvNV_set(sv, -(NV)value); 2257 SvIV_set(sv, IV_MIN); 2258 } 2259 } 2260 } 2261 /* For !NV_PRESERVES_UV and IS_NUMBER_IN_UV and IS_NUMBER_NOT_INT we 2262 will be in the previous block to set the IV slot, and the next 2263 block to set the NV slot. So no else here. */ 2264 2265 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) 2266 != IS_NUMBER_IN_UV) { 2267 /* It wasn't an (integer that doesn't overflow the UV). */ 2268 S_sv_setnv(aTHX_ sv, numtype); 2269 2270 if (! numtype && ckWARN(WARN_NUMERIC)) 2271 not_a_number(sv); 2272 2273 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2iv(%" NVgf ")\n", 2274 PTR2UV(sv), SvNVX(sv))); 2275 2276 #ifdef NV_PRESERVES_UV 2277 SvNOKp_on(sv); 2278 if (numtype) 2279 SvNOK_on(sv); 2280 goto got_nv; /* Fill IV/UV slot and set IOKp, maybe IOK */ 2281 #else /* NV_PRESERVES_UV */ 2282 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) 2283 == (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) { 2284 /* The IV/UV slot will have been set from value returned by 2285 grok_number above. The NV slot has just been set using 2286 Atof. */ 2287 SvNOK_on(sv); 2288 assert (SvIOKp(sv)); 2289 } else { 2290 if (((UV)1 << NV_PRESERVES_UV_BITS) > 2291 U_V(Perl_fabs(SvNVX(sv)))) { 2292 /* Small enough to preserve all bits. */ 2293 (void)SvIOKp_on(sv); 2294 SvNOK_on(sv); 2295 SvIV_set(sv, I_V(SvNVX(sv))); 2296 if ((NV)(SvIVX(sv)) == SvNVX(sv)) 2297 SvIOK_on(sv); 2298 /* There had been runtime checking for 2299 "U_V(Perl_fabs(SvNVX(sv))) < (UV)IV_MAX" here to ensure 2300 that this NV is in the preserved range, but this should 2301 be always true if the following assertion is true: */ 2302 STATIC_ASSERT_STMT(((UV)1 << NV_PRESERVES_UV_BITS) <= 2303 (UV)IV_MAX); 2304 } else { 2305 /* IN_UV NOT_INT 2306 0 0 already failed to read UV. 2307 0 1 already failed to read UV. 2308 1 0 you won't get here in this case. IV/UV 2309 slot set, public IOK, Atof() unneeded. 2310 1 1 already read UV. 2311 so there's no point in sv_2iuv_non_preserve() attempting 2312 to use atol, strtol, strtoul etc. */ 2313 # ifdef DEBUGGING 2314 sv_2iuv_non_preserve (sv, numtype); 2315 # else 2316 sv_2iuv_non_preserve (sv); 2317 # endif 2318 } 2319 } 2320 /* It might be more code efficient to go through the entire logic above 2321 and conditionally set with SvIOKp_on() rather than SvIOK(), but it 2322 gets complex and potentially buggy, so more programmer efficient 2323 to do it this way, by turning off the public flags: */ 2324 if (!numtype) 2325 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK); 2326 #endif /* NV_PRESERVES_UV */ 2327 } 2328 } 2329 else { 2330 if (isGV_with_GP(sv)) 2331 return glob_2number(MUTABLE_GV(sv)); 2332 2333 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED)) 2334 report_uninit(sv); 2335 if (SvTYPE(sv) < SVt_IV) 2336 /* Typically the caller expects that sv_any is not NULL now. */ 2337 sv_upgrade(sv, SVt_IV); 2338 /* Return 0 from the caller. */ 2339 return TRUE; 2340 } 2341 return FALSE; 2342 } 2343 2344 /* 2345 =for apidoc sv_2iv_flags 2346 2347 Return the integer value of an SV, doing any necessary string 2348 conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first. 2349 Normally used via the C<SvIV(sv)> and C<SvIVx(sv)> macros. 2350 2351 =cut 2352 */ 2353 2354 IV 2355 Perl_sv_2iv_flags(pTHX_ SV *const sv, const I32 flags) 2356 { 2357 PERL_ARGS_ASSERT_SV_2IV_FLAGS; 2358 2359 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV 2360 && SvTYPE(sv) != SVt_PVFM); 2361 2362 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC)) 2363 mg_get(sv); 2364 2365 if (SvROK(sv)) { 2366 if (SvAMAGIC(sv)) { 2367 SV * tmpstr; 2368 if (flags & SV_SKIP_OVERLOAD) 2369 return 0; 2370 tmpstr = AMG_CALLunary(sv, numer_amg); 2371 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) { 2372 return SvIV(tmpstr); 2373 } 2374 } 2375 return PTR2IV(SvRV(sv)); 2376 } 2377 2378 if (SvVALID(sv) || isREGEXP(sv)) { 2379 /* FBMs use the space for SvIVX and SvNVX for other purposes, so 2380 must not let them cache IVs. 2381 In practice they are extremely unlikely to actually get anywhere 2382 accessible by user Perl code - the only way that I'm aware of is when 2383 a constant subroutine which is used as the second argument to index. 2384 2385 Regexps have no SvIVX and SvNVX fields. 2386 */ 2387 assert(SvPOKp(sv)); 2388 { 2389 UV value; 2390 const char * const ptr = 2391 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv); 2392 const int numtype 2393 = grok_number(ptr, SvCUR(sv), &value); 2394 2395 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) 2396 == IS_NUMBER_IN_UV) { 2397 /* It's definitely an integer */ 2398 if (numtype & IS_NUMBER_NEG) { 2399 if (value < (UV)IV_MIN) 2400 return -(IV)value; 2401 } else { 2402 if (value < (UV)IV_MAX) 2403 return (IV)value; 2404 } 2405 } 2406 2407 /* Quite wrong but no good choices. */ 2408 if ((numtype & IS_NUMBER_INFINITY)) { 2409 return (numtype & IS_NUMBER_NEG) ? IV_MIN : IV_MAX; 2410 } else if ((numtype & IS_NUMBER_NAN)) { 2411 return 0; /* So wrong. */ 2412 } 2413 2414 if (!numtype) { 2415 if (ckWARN(WARN_NUMERIC)) 2416 not_a_number(sv); 2417 } 2418 return I_V(Atof(ptr)); 2419 } 2420 } 2421 2422 if (SvTHINKFIRST(sv)) { 2423 if (SvREADONLY(sv) && !SvOK(sv)) { 2424 if (ckWARN(WARN_UNINITIALIZED)) 2425 report_uninit(sv); 2426 return 0; 2427 } 2428 } 2429 2430 if (!SvIOKp(sv)) { 2431 if (S_sv_2iuv_common(aTHX_ sv)) 2432 return 0; 2433 } 2434 2435 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2iv(%" IVdf ")\n", 2436 PTR2UV(sv),SvIVX(sv))); 2437 return SvIsUV(sv) ? (IV)SvUVX(sv) : SvIVX(sv); 2438 } 2439 2440 /* 2441 =for apidoc sv_2uv_flags 2442 2443 Return the unsigned integer value of an SV, doing any necessary string 2444 conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first. 2445 Normally used via the C<SvUV(sv)> and C<SvUVx(sv)> macros. 2446 2447 =for apidoc Amnh||SV_GMAGIC 2448 2449 =cut 2450 */ 2451 2452 UV 2453 Perl_sv_2uv_flags(pTHX_ SV *const sv, const I32 flags) 2454 { 2455 PERL_ARGS_ASSERT_SV_2UV_FLAGS; 2456 2457 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC)) 2458 mg_get(sv); 2459 2460 if (SvROK(sv)) { 2461 if (SvAMAGIC(sv)) { 2462 SV *tmpstr; 2463 if (flags & SV_SKIP_OVERLOAD) 2464 return 0; 2465 tmpstr = AMG_CALLunary(sv, numer_amg); 2466 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) { 2467 return SvUV(tmpstr); 2468 } 2469 } 2470 return PTR2UV(SvRV(sv)); 2471 } 2472 2473 if (SvVALID(sv) || isREGEXP(sv)) { 2474 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use 2475 the same flag bit as SVf_IVisUV, so must not let them cache IVs. 2476 Regexps have no SvIVX and SvNVX fields. */ 2477 assert(SvPOKp(sv)); 2478 { 2479 UV value; 2480 const char * const ptr = 2481 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv); 2482 const int numtype 2483 = grok_number(ptr, SvCUR(sv), &value); 2484 2485 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) 2486 == IS_NUMBER_IN_UV) { 2487 /* It's definitely an integer */ 2488 if (!(numtype & IS_NUMBER_NEG)) 2489 return value; 2490 } 2491 2492 /* Quite wrong but no good choices. */ 2493 if ((numtype & IS_NUMBER_INFINITY)) { 2494 return UV_MAX; /* So wrong. */ 2495 } else if ((numtype & IS_NUMBER_NAN)) { 2496 return 0; /* So wrong. */ 2497 } 2498 2499 if (!numtype) { 2500 if (ckWARN(WARN_NUMERIC)) 2501 not_a_number(sv); 2502 } 2503 return U_V(Atof(ptr)); 2504 } 2505 } 2506 2507 if (SvTHINKFIRST(sv)) { 2508 if (SvREADONLY(sv) && !SvOK(sv)) { 2509 if (ckWARN(WARN_UNINITIALIZED)) 2510 report_uninit(sv); 2511 return 0; 2512 } 2513 } 2514 2515 if (!SvIOKp(sv)) { 2516 if (S_sv_2iuv_common(aTHX_ sv)) 2517 return 0; 2518 } 2519 2520 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2uv(%" UVuf ")\n", 2521 PTR2UV(sv),SvUVX(sv))); 2522 return SvIsUV(sv) ? SvUVX(sv) : (UV)SvIVX(sv); 2523 } 2524 2525 /* 2526 =for apidoc sv_2nv_flags 2527 2528 Return the num value of an SV, doing any necessary string or integer 2529 conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first. 2530 Normally used via the C<SvNV(sv)> and C<SvNVx(sv)> macros. 2531 2532 =cut 2533 */ 2534 2535 NV 2536 Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags) 2537 { 2538 PERL_ARGS_ASSERT_SV_2NV_FLAGS; 2539 2540 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV 2541 && SvTYPE(sv) != SVt_PVFM); 2542 if (SvGMAGICAL(sv) || SvVALID(sv) || isREGEXP(sv)) { 2543 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use 2544 the same flag bit as SVf_IVisUV, so must not let them cache NVs. 2545 Regexps have no SvIVX and SvNVX fields. */ 2546 const char *ptr; 2547 if (flags & SV_GMAGIC) 2548 mg_get(sv); 2549 if (SvNOKp(sv)) 2550 return SvNVX(sv); 2551 if (SvPOKp(sv) && !SvIOKp(sv)) { 2552 ptr = SvPVX_const(sv); 2553 if (!SvIOKp(sv) && ckWARN(WARN_NUMERIC) && 2554 !grok_number(ptr, SvCUR(sv), NULL)) 2555 not_a_number(sv); 2556 return Atof(ptr); 2557 } 2558 if (SvIOKp(sv)) { 2559 if (SvIsUV(sv)) 2560 return (NV)SvUVX(sv); 2561 else 2562 return (NV)SvIVX(sv); 2563 } 2564 if (SvROK(sv)) { 2565 goto return_rok; 2566 } 2567 assert(SvTYPE(sv) >= SVt_PVMG); 2568 /* This falls through to the report_uninit near the end of the 2569 function. */ 2570 } else if (SvTHINKFIRST(sv)) { 2571 if (SvROK(sv)) { 2572 return_rok: 2573 if (SvAMAGIC(sv)) { 2574 SV *tmpstr; 2575 if (flags & SV_SKIP_OVERLOAD) 2576 return 0; 2577 tmpstr = AMG_CALLunary(sv, numer_amg); 2578 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) { 2579 return SvNV(tmpstr); 2580 } 2581 } 2582 return PTR2NV(SvRV(sv)); 2583 } 2584 if (SvREADONLY(sv) && !SvOK(sv)) { 2585 if (ckWARN(WARN_UNINITIALIZED)) 2586 report_uninit(sv); 2587 return 0.0; 2588 } 2589 } 2590 if (SvTYPE(sv) < SVt_NV) { 2591 /* The logic to use SVt_PVNV if necessary is in sv_upgrade. */ 2592 sv_upgrade(sv, SVt_NV); 2593 CLANG_DIAG_IGNORE_STMT(-Wthread-safety); 2594 DEBUG_c({ 2595 DECLARATION_FOR_LC_NUMERIC_MANIPULATION; 2596 STORE_LC_NUMERIC_SET_STANDARD(); 2597 PerlIO_printf(Perl_debug_log, 2598 "0x%" UVxf " num(%" NVgf ")\n", 2599 PTR2UV(sv), SvNVX(sv)); 2600 RESTORE_LC_NUMERIC(); 2601 }); 2602 CLANG_DIAG_RESTORE_STMT; 2603 2604 } 2605 else if (SvTYPE(sv) < SVt_PVNV) 2606 sv_upgrade(sv, SVt_PVNV); 2607 if (SvNOKp(sv)) { 2608 return SvNVX(sv); 2609 } 2610 if (SvIOKp(sv)) { 2611 SvNV_set(sv, SvIsUV(sv) ? (NV)SvUVX(sv) : (NV)SvIVX(sv)); 2612 #ifdef NV_PRESERVES_UV 2613 if (SvIOK(sv)) 2614 SvNOK_on(sv); 2615 else 2616 SvNOKp_on(sv); 2617 #else 2618 /* Only set the public NV OK flag if this NV preserves the IV */ 2619 /* Check it's not 0xFFFFFFFFFFFFFFFF */ 2620 if (SvIOK(sv) && 2621 SvIsUV(sv) ? ((SvUVX(sv) != UV_MAX)&&(SvUVX(sv) == U_V(SvNVX(sv)))) 2622 : (SvIVX(sv) == I_V(SvNVX(sv)))) 2623 SvNOK_on(sv); 2624 else 2625 SvNOKp_on(sv); 2626 #endif 2627 } 2628 else if (SvPOKp(sv)) { 2629 UV value; 2630 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value); 2631 if (!SvIOKp(sv) && !numtype && ckWARN(WARN_NUMERIC)) 2632 not_a_number(sv); 2633 #ifdef NV_PRESERVES_UV 2634 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) 2635 == IS_NUMBER_IN_UV) { 2636 /* It's definitely an integer */ 2637 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -(NV)value : (NV)value); 2638 } else { 2639 S_sv_setnv(aTHX_ sv, numtype); 2640 } 2641 if (numtype) 2642 SvNOK_on(sv); 2643 else 2644 SvNOKp_on(sv); 2645 #else 2646 SvNV_set(sv, Atof(SvPVX_const(sv))); 2647 /* Only set the public NV OK flag if this NV preserves the value in 2648 the PV at least as well as an IV/UV would. 2649 Not sure how to do this 100% reliably. */ 2650 /* if that shift count is out of range then Configure's test is 2651 wonky. We shouldn't be in here with NV_PRESERVES_UV_BITS == 2652 UV_BITS */ 2653 if (((UV)1 << NV_PRESERVES_UV_BITS) > U_V(Perl_fabs(SvNVX(sv)))) { 2654 SvNOK_on(sv); /* Definitely small enough to preserve all bits */ 2655 } else if (!(numtype & IS_NUMBER_IN_UV)) { 2656 /* Can't use strtol etc to convert this string, so don't try. 2657 sv_2iv and sv_2uv will use the NV to convert, not the PV. */ 2658 SvNOK_on(sv); 2659 } else { 2660 /* value has been set. It may not be precise. */ 2661 if ((numtype & IS_NUMBER_NEG) && (value >= (UV)IV_MIN)) { 2662 /* 2s complement assumption for (UV)IV_MIN */ 2663 SvNOK_on(sv); /* Integer is too negative. */ 2664 } else { 2665 SvNOKp_on(sv); 2666 SvIOKp_on(sv); 2667 2668 if (numtype & IS_NUMBER_NEG) { 2669 /* -IV_MIN is undefined, but we should never reach 2670 * this point with both IS_NUMBER_NEG and value == 2671 * (UV)IV_MIN */ 2672 assert(value != (UV)IV_MIN); 2673 SvIV_set(sv, -(IV)value); 2674 } else if (value <= (UV)IV_MAX) { 2675 SvIV_set(sv, (IV)value); 2676 } else { 2677 SvUV_set(sv, value); 2678 SvIsUV_on(sv); 2679 } 2680 2681 if (numtype & IS_NUMBER_NOT_INT) { 2682 /* I believe that even if the original PV had decimals, 2683 they are lost beyond the limit of the FP precision. 2684 However, neither is canonical, so both only get p 2685 flags. NWC, 2000/11/25 */ 2686 /* Both already have p flags, so do nothing */ 2687 } else { 2688 const NV nv = SvNVX(sv); 2689 /* XXX should this spot have NAN_COMPARE_BROKEN, too? */ 2690 if (SvNVX(sv) < (NV)IV_MAX + 0.5) { 2691 if (SvIVX(sv) == I_V(nv)) { 2692 SvNOK_on(sv); 2693 } else { 2694 /* It had no "." so it must be integer. */ 2695 } 2696 SvIOK_on(sv); 2697 } else { 2698 /* between IV_MAX and NV(UV_MAX). 2699 Could be slightly > UV_MAX */ 2700 2701 if (numtype & IS_NUMBER_NOT_INT) { 2702 /* UV and NV both imprecise. */ 2703 } else { 2704 const UV nv_as_uv = U_V(nv); 2705 2706 if (value == nv_as_uv && SvUVX(sv) != UV_MAX) { 2707 SvNOK_on(sv); 2708 } 2709 SvIOK_on(sv); 2710 } 2711 } 2712 } 2713 } 2714 } 2715 /* It might be more code efficient to go through the entire logic above 2716 and conditionally set with SvNOKp_on() rather than SvNOK(), but it 2717 gets complex and potentially buggy, so more programmer efficient 2718 to do it this way, by turning off the public flags: */ 2719 if (!numtype) 2720 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK); 2721 #endif /* NV_PRESERVES_UV */ 2722 } 2723 else { 2724 if (isGV_with_GP(sv)) { 2725 glob_2number(MUTABLE_GV(sv)); 2726 return 0.0; 2727 } 2728 2729 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED)) 2730 report_uninit(sv); 2731 assert (SvTYPE(sv) >= SVt_NV); 2732 /* Typically the caller expects that sv_any is not NULL now. */ 2733 /* XXX Ilya implies that this is a bug in callers that assume this 2734 and ideally should be fixed. */ 2735 return 0.0; 2736 } 2737 CLANG_DIAG_IGNORE_STMT(-Wthread-safety); 2738 DEBUG_c({ 2739 DECLARATION_FOR_LC_NUMERIC_MANIPULATION; 2740 STORE_LC_NUMERIC_SET_STANDARD(); 2741 PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2nv(%" NVgf ")\n", 2742 PTR2UV(sv), SvNVX(sv)); 2743 RESTORE_LC_NUMERIC(); 2744 }); 2745 CLANG_DIAG_RESTORE_STMT; 2746 return SvNVX(sv); 2747 } 2748 2749 /* 2750 =for apidoc sv_2num 2751 2752 Return an SV with the numeric value of the source SV, doing any necessary 2753 reference or overload conversion. The caller is expected to have handled 2754 get-magic already. 2755 2756 =cut 2757 */ 2758 2759 SV * 2760 Perl_sv_2num(pTHX_ SV *const sv) 2761 { 2762 PERL_ARGS_ASSERT_SV_2NUM; 2763 2764 if (!SvROK(sv)) 2765 return sv; 2766 if (SvAMAGIC(sv)) { 2767 SV * const tmpsv = AMG_CALLunary(sv, numer_amg); 2768 TAINT_IF(tmpsv && SvTAINTED(tmpsv)); 2769 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv)))) 2770 return sv_2num(tmpsv); 2771 } 2772 return sv_2mortal(newSVuv(PTR2UV(SvRV(sv)))); 2773 } 2774 2775 /* int2str_table: lookup table containing string representations of all 2776 * two digit numbers. For example, int2str_table.arr[0] is "00" and 2777 * int2str_table.arr[12*2] is "12". 2778 * 2779 * We are going to read two bytes at a time, so we have to ensure that 2780 * the array is aligned to a 2 byte boundary. That's why it was made a 2781 * union with a dummy U16 member. */ 2782 static const union { 2783 char arr[200]; 2784 U16 dummy; 2785 } int2str_table = {{ 2786 '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', 2787 '0', '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3', 2788 '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0', 2789 '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', 2790 '2', '8', '2', '9', '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', 2791 '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '4', '0', '4', '1', 2792 '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', 2793 '4', '9', '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', 2794 '5', '6', '5', '7', '5', '8', '5', '9', '6', '0', '6', '1', '6', '2', 2795 '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', 2796 '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', 2797 '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8', '2', '8', '3', 2798 '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', '9', '0', 2799 '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', 2800 '9', '8', '9', '9' 2801 }}; 2802 2803 /* uiv_2buf(): private routine for use by sv_2pv_flags(): print an IV or 2804 * UV as a string towards the end of buf, and return pointers to start and 2805 * end of it. 2806 * 2807 * We assume that buf is at least TYPE_CHARS(UV) long. 2808 */ 2809 2810 PERL_STATIC_INLINE char * 2811 S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const peob) 2812 { 2813 char *ptr = buf + TYPE_CHARS(UV); 2814 char * const ebuf = ptr; 2815 int sign; 2816 U16 *word_ptr, *word_table; 2817 2818 PERL_ARGS_ASSERT_UIV_2BUF; 2819 2820 /* ptr has to be properly aligned, because we will cast it to U16* */ 2821 assert(PTR2nat(ptr) % 2 == 0); 2822 /* we are going to read/write two bytes at a time */ 2823 word_ptr = (U16*)ptr; 2824 word_table = (U16*)int2str_table.arr; 2825 2826 if (UNLIKELY(is_uv)) 2827 sign = 0; 2828 else if (iv >= 0) { 2829 uv = iv; 2830 sign = 0; 2831 } else { 2832 /* Using 0- here to silence bogus warning from MS VC */ 2833 uv = (UV) (0 - (UV) iv); 2834 sign = 1; 2835 } 2836 2837 while (uv > 99) { 2838 *--word_ptr = word_table[uv % 100]; 2839 uv /= 100; 2840 } 2841 ptr = (char*)word_ptr; 2842 2843 if (uv < 10) 2844 *--ptr = (char)uv + '0'; 2845 else { 2846 *--word_ptr = word_table[uv]; 2847 ptr = (char*)word_ptr; 2848 } 2849 2850 if (sign) 2851 *--ptr = '-'; 2852 2853 *peob = ebuf; 2854 return ptr; 2855 } 2856 2857 /* Helper for sv_2pv_flags and sv_vcatpvfn_flags. If the NV is an 2858 * infinity or a not-a-number, writes the appropriate strings to the 2859 * buffer, including a zero byte. On success returns the written length, 2860 * excluding the zero byte, on failure (not an infinity, not a nan) 2861 * returns zero, assert-fails on maxlen being too short. 2862 * 2863 * XXX for "Inf", "-Inf", and "NaN", we could have three read-only 2864 * shared string constants we point to, instead of generating a new 2865 * string for each instance. */ 2866 STATIC size_t 2867 S_infnan_2pv(NV nv, char* buffer, size_t maxlen, char plus) { 2868 char* s = buffer; 2869 assert(maxlen >= 4); 2870 if (Perl_isinf(nv)) { 2871 if (nv < 0) { 2872 if (maxlen < 5) /* "-Inf\0" */ 2873 return 0; 2874 *s++ = '-'; 2875 } else if (plus) { 2876 *s++ = '+'; 2877 } 2878 *s++ = 'I'; 2879 *s++ = 'n'; 2880 *s++ = 'f'; 2881 } 2882 else if (Perl_isnan(nv)) { 2883 *s++ = 'N'; 2884 *s++ = 'a'; 2885 *s++ = 'N'; 2886 /* XXX optionally output the payload mantissa bits as 2887 * "(unsigned)" (to match the nan("...") C99 function, 2888 * or maybe as "(0xhhh...)" would make more sense... 2889 * provide a format string so that the user can decide? 2890 * NOTE: would affect the maxlen and assert() logic.*/ 2891 } 2892 else { 2893 return 0; 2894 } 2895 assert((s == buffer + 3) || (s == buffer + 4)); 2896 *s = 0; 2897 return s - buffer; 2898 } 2899 2900 /* 2901 =for apidoc sv_2pv 2902 =for apidoc_item sv_2pv_flags 2903 2904 These implement the various forms of the L<perlapi/C<SvPV>> macros. 2905 The macros are the preferred interface. 2906 2907 These return a pointer to the string value of an SV (coercing it to a string if 2908 necessary), and set C<*lp> to its length in bytes. 2909 2910 The forms differ in that plain C<sv_2pvbyte> always processes 'get' magic; and 2911 C<sv_2pvbyte_flags> processes 'get' magic if and only if C<flags> contains 2912 C<SV_GMAGIC>. 2913 2914 =for apidoc Amnh||SV_GMAGIC 2915 2916 =cut 2917 */ 2918 2919 char * 2920 Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const U32 flags) 2921 { 2922 char *s; 2923 bool done_gmagic = FALSE; 2924 2925 PERL_ARGS_ASSERT_SV_2PV_FLAGS; 2926 2927 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV 2928 && SvTYPE(sv) != SVt_PVFM); 2929 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC)) { 2930 mg_get(sv); 2931 done_gmagic = TRUE; 2932 } 2933 2934 if (SvROK(sv)) { 2935 if (SvAMAGIC(sv)) { 2936 SV *tmpstr; 2937 SV *nsv= (SV *)sv; 2938 if (flags & SV_SKIP_OVERLOAD) 2939 return NULL; 2940 if (done_gmagic) 2941 nsv = sv_mortalcopy_flags(sv,0); 2942 tmpstr = AMG_CALLunary(nsv, string_amg); 2943 TAINT_IF(tmpstr && SvTAINTED(tmpstr)); 2944 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(nsv)))) { 2945 /* Unwrap this: */ 2946 /* char *pv = lp ? SvPV(tmpstr, *lp) : SvPV_nolen(tmpstr); 2947 */ 2948 2949 char *pv; 2950 if ((SvFLAGS(tmpstr) & (SVf_POK)) == SVf_POK) { 2951 if (flags & SV_CONST_RETURN) { 2952 pv = (char *) SvPVX_const(tmpstr); 2953 } else { 2954 pv = (flags & SV_MUTABLE_RETURN) 2955 ? SvPVX_mutable(tmpstr) : SvPVX(tmpstr); 2956 } 2957 if (lp) 2958 *lp = SvCUR(tmpstr); 2959 } else { 2960 pv = sv_2pv_flags(tmpstr, lp, flags); 2961 } 2962 if (SvUTF8(tmpstr)) 2963 SvUTF8_on(sv); 2964 else 2965 SvUTF8_off(sv); 2966 return pv; 2967 } 2968 } 2969 { 2970 STRLEN len; 2971 char *retval; 2972 char *buffer; 2973 SV *const referent = SvRV(sv); 2974 2975 if (!referent) { 2976 len = 7; 2977 retval = buffer = savepvn("NULLREF", len); 2978 } else if (SvTYPE(referent) == SVt_REGEXP && 2979 (!(PL_curcop->cop_hints & HINT_NO_AMAGIC) || 2980 amagic_is_enabled(string_amg))) { 2981 REGEXP * const re = (REGEXP *)MUTABLE_PTR(referent); 2982 2983 assert(re); 2984 2985 /* If the regex is UTF-8 we want the containing scalar to 2986 have an UTF-8 flag too */ 2987 if (RX_UTF8(re)) 2988 SvUTF8_on(sv); 2989 else 2990 SvUTF8_off(sv); 2991 2992 if (lp) 2993 *lp = RX_WRAPLEN(re); 2994 2995 return RX_WRAPPED(re); 2996 } else { 2997 const char *const typestring = sv_reftype(referent, 0); 2998 const STRLEN typelen = strlen(typestring); 2999 UV addr = PTR2UV(referent); 3000 const char *stashname = NULL; 3001 STRLEN stashnamelen = 0; /* hush, gcc */ 3002 const char *buffer_end; 3003 3004 if (SvOBJECT(referent)) { 3005 const HEK *const name = HvNAME_HEK(SvSTASH(referent)); 3006 3007 if (name) { 3008 stashname = HEK_KEY(name); 3009 stashnamelen = HEK_LEN(name); 3010 3011 if (HEK_UTF8(name)) { 3012 SvUTF8_on(sv); 3013 } else { 3014 SvUTF8_off(sv); 3015 } 3016 } else { 3017 stashname = "__ANON__"; 3018 stashnamelen = 8; 3019 } 3020 len = stashnamelen + 1 /* = */ + typelen + 3 /* (0x */ 3021 + 2 * sizeof(UV) + 2 /* )\0 */; 3022 } else { 3023 len = typelen + 3 /* (0x */ 3024 + 2 * sizeof(UV) + 2 /* )\0 */; 3025 } 3026 3027 Newx(buffer, len, char); 3028 buffer_end = retval = buffer + len; 3029 3030 /* Working backwards */ 3031 *--retval = '\0'; 3032 *--retval = ')'; 3033 do { 3034 *--retval = PL_hexdigit[addr & 15]; 3035 } while (addr >>= 4); 3036 *--retval = 'x'; 3037 *--retval = '0'; 3038 *--retval = '('; 3039 3040 retval -= typelen; 3041 memcpy(retval, typestring, typelen); 3042 3043 if (stashname) { 3044 *--retval = '='; 3045 retval -= stashnamelen; 3046 memcpy(retval, stashname, stashnamelen); 3047 } 3048 /* retval may not necessarily have reached the start of the 3049 buffer here. */ 3050 assert (retval >= buffer); 3051 3052 len = buffer_end - retval - 1; /* -1 for that \0 */ 3053 } 3054 if (lp) 3055 *lp = len; 3056 SAVEFREEPV(buffer); 3057 return retval; 3058 } 3059 } 3060 3061 if (SvPOKp(sv)) { 3062 if (lp) 3063 *lp = SvCUR(sv); 3064 if (flags & SV_MUTABLE_RETURN) 3065 return SvPVX_mutable(sv); 3066 if (flags & SV_CONST_RETURN) 3067 return (char *)SvPVX_const(sv); 3068 return SvPVX(sv); 3069 } 3070 3071 if (SvIOK(sv)) { 3072 /* I'm assuming that if both IV and NV are equally valid then 3073 converting the IV is going to be more efficient */ 3074 const U32 isUIOK = SvIsUV(sv); 3075 /* The purpose of this union is to ensure that arr is aligned on 3076 a 2 byte boundary, because that is what uiv_2buf() requires */ 3077 union { 3078 char arr[TYPE_CHARS(UV)]; 3079 U16 dummy; 3080 } buf; 3081 char *ebuf, *ptr; 3082 STRLEN len; 3083 3084 if (SvTYPE(sv) < SVt_PVIV) 3085 sv_upgrade(sv, SVt_PVIV); 3086 ptr = uiv_2buf(buf.arr, SvIVX(sv), SvUVX(sv), isUIOK, &ebuf); 3087 len = ebuf - ptr; 3088 /* inlined from sv_setpvn */ 3089 s = SvGROW_mutable(sv, len + 1); 3090 Move(ptr, s, len, char); 3091 s += len; 3092 *s = '\0'; 3093 /* We used to call SvPOK_on(). Whilst this is fine for (most) Perl code, 3094 it means that after this stringification is cached, there is no way 3095 to distinguish between values originally assigned as $a = 42; and 3096 $a = "42"; (or results of string operators vs numeric operators) 3097 where the value has subsequently been used in the other sense 3098 and had a value cached. 3099 This (somewhat) hack means that we retain the cached stringification, 3100 but don't set SVf_POK. Hence if a value is SVf_IOK|SVf_POK then it 3101 originated as "42", whereas if it's SVf_IOK then it originated as 42. 3102 (ignore SVp_IOK and SVp_POK) 3103 The SvPV macros are now updated to recognise this specific case 3104 (and that there isn't overloading or magic that could alter the 3105 cached value) and so return the cached value immediately without 3106 re-entering this function, getting back here to this block of code, 3107 and repeating the same conversion. */ 3108 SvPOKp_on(sv); 3109 } 3110 else if (SvNOK(sv)) { 3111 if (SvTYPE(sv) < SVt_PVNV) 3112 sv_upgrade(sv, SVt_PVNV); 3113 if (SvNVX(sv) == 0.0 3114 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) 3115 && !Perl_isnan(SvNVX(sv)) 3116 #endif 3117 ) { 3118 s = SvGROW_mutable(sv, 2); 3119 *s++ = '0'; 3120 *s = '\0'; 3121 } else { 3122 STRLEN len; 3123 STRLEN size = 5; /* "-Inf\0" */ 3124 3125 s = SvGROW_mutable(sv, size); 3126 len = S_infnan_2pv(SvNVX(sv), s, size, 0); 3127 if (len > 0) { 3128 s += len; 3129 SvPOKp_on(sv); 3130 } 3131 else { 3132 /* some Xenix systems wipe out errno here */ 3133 dSAVE_ERRNO; 3134 3135 size = 3136 1 + /* sign */ 3137 1 + /* "." */ 3138 NV_DIG + 3139 1 + /* "e" */ 3140 1 + /* sign */ 3141 5 + /* exponent digits */ 3142 1 + /* \0 */ 3143 2; /* paranoia */ 3144 3145 s = SvGROW_mutable(sv, size); 3146 #ifndef USE_LOCALE_NUMERIC 3147 SNPRINTF_G(SvNVX(sv), s, SvLEN(sv), NV_DIG); 3148 3149 SvPOKp_on(sv); 3150 #else 3151 { 3152 bool local_radix; 3153 DECLARATION_FOR_LC_NUMERIC_MANIPULATION; 3154 STORE_LC_NUMERIC_SET_TO_NEEDED(); 3155 3156 local_radix = NOT_IN_NUMERIC_STANDARD_; 3157 if (local_radix && SvCUR(PL_numeric_radix_sv) > 1) { 3158 size += SvCUR(PL_numeric_radix_sv) - 1; 3159 s = SvGROW_mutable(sv, size); 3160 } 3161 3162 SNPRINTF_G(SvNVX(sv), s, SvLEN(sv), NV_DIG); 3163 3164 /* If the radix character is UTF-8, and actually is in the 3165 * output, turn on the UTF-8 flag for the scalar */ 3166 if ( local_radix 3167 && SvUTF8(PL_numeric_radix_sv) 3168 && instr(s, SvPVX_const(PL_numeric_radix_sv))) 3169 { 3170 SvUTF8_on(sv); 3171 } 3172 3173 RESTORE_LC_NUMERIC(); 3174 } 3175 3176 /* We don't call SvPOK_on(), because it may come to 3177 * pass that the locale changes so that the 3178 * stringification we just did is no longer correct. We 3179 * will have to re-stringify every time it is needed */ 3180 #endif 3181 RESTORE_ERRNO; 3182 } 3183 while (*s) s++; 3184 } 3185 } 3186 else if (isGV_with_GP(sv)) { 3187 GV *const gv = MUTABLE_GV(sv); 3188 SV *const buffer = sv_newmortal(); 3189 3190 gv_efullname3(buffer, gv, "*"); 3191 3192 assert(SvPOK(buffer)); 3193 if (SvUTF8(buffer)) 3194 SvUTF8_on(sv); 3195 else 3196 SvUTF8_off(sv); 3197 if (lp) 3198 *lp = SvCUR(buffer); 3199 return SvPVX(buffer); 3200 } 3201 else { 3202 if (lp) 3203 *lp = 0; 3204 if (flags & SV_UNDEF_RETURNS_NULL) 3205 return NULL; 3206 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED)) 3207 report_uninit(sv); 3208 /* Typically the caller expects that sv_any is not NULL now. */ 3209 if (!SvREADONLY(sv) && SvTYPE(sv) < SVt_PV) 3210 sv_upgrade(sv, SVt_PV); 3211 return (char *)""; 3212 } 3213 3214 { 3215 const STRLEN len = s - SvPVX_const(sv); 3216 if (lp) 3217 *lp = len; 3218 SvCUR_set(sv, len); 3219 } 3220 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2pv(%s)\n", 3221 PTR2UV(sv),SvPVX_const(sv))); 3222 if (flags & SV_CONST_RETURN) 3223 return (char *)SvPVX_const(sv); 3224 if (flags & SV_MUTABLE_RETURN) 3225 return SvPVX_mutable(sv); 3226 return SvPVX(sv); 3227 } 3228 3229 /* 3230 =for apidoc sv_copypv 3231 =for apidoc_item sv_copypv_flags 3232 =for apidoc_item sv_copypv_nomg 3233 3234 These copy a stringified representation of the source SV into the 3235 destination SV. They automatically perform coercion of numeric values into 3236 strings. Guaranteed to preserve the C<UTF8> flag even from overloaded objects. 3237 Similar in nature to C<sv_2pv[_flags]> but they operate directly on an SV 3238 instead of just the string. Mostly they use L</C<sv_2pv_flags>> to 3239 do the work, except when that would lose the UTF-8'ness of the PV. 3240 3241 The three forms differ only in whether or not they perform 'get magic' on 3242 C<sv>. C<sv_copypv_nomg> skips 'get magic'; C<sv_copypv> performs it; and 3243 C<sv_copypv_flags> either performs it (if the C<SV_GMAGIC> bit is set in 3244 C<flags>) or doesn't (if that bit is cleared). 3245 3246 =cut 3247 */ 3248 3249 void 3250 Perl_sv_copypv_flags(pTHX_ SV *const dsv, SV *const ssv, const I32 flags) 3251 { 3252 STRLEN len; 3253 const char *s; 3254 3255 PERL_ARGS_ASSERT_SV_COPYPV_FLAGS; 3256 3257 s = SvPV_flags_const(ssv,len,(flags & SV_GMAGIC)); 3258 sv_setpvn(dsv,s,len); 3259 if (SvUTF8(ssv)) 3260 SvUTF8_on(dsv); 3261 else 3262 SvUTF8_off(dsv); 3263 } 3264 3265 /* 3266 =for apidoc sv_2pvbyte 3267 =for apidoc_item sv_2pvbyte_flags 3268 3269 These implement the various forms of the L<perlapi/C<SvPVbyte>> macros. 3270 The macros are the preferred interface. 3271 3272 These return a pointer to the byte-encoded representation of the SV, and set 3273 C<*lp> to its length. If the SV is marked as being encoded as UTF-8, it will 3274 be downgraded, if possible, to a byte string. If the SV cannot be downgraded, 3275 they croak. 3276 3277 The forms differ in that plain C<sv_2pvbyte> always processes 'get' magic; and 3278 C<sv_2pvbyte_flags> processes 'get' magic if and only if C<flags> contains 3279 C<SV_GMAGIC>. 3280 3281 =for apidoc Amnh||SV_GMAGIC 3282 3283 =cut 3284 */ 3285 3286 char * 3287 Perl_sv_2pvbyte_flags(pTHX_ SV *sv, STRLEN *const lp, const U32 flags) 3288 { 3289 PERL_ARGS_ASSERT_SV_2PVBYTE_FLAGS; 3290 3291 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC)) 3292 mg_get(sv); 3293 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv)) 3294 || isGV_with_GP(sv) || SvROK(sv)) { 3295 SV *sv2 = sv_newmortal(); 3296 sv_copypv_nomg(sv2,sv); 3297 sv = sv2; 3298 } 3299 sv_utf8_downgrade_nomg(sv,0); 3300 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv); 3301 } 3302 3303 /* 3304 =for apidoc sv_2pvutf8 3305 =for apidoc_item sv_2pvutf8_flags 3306 3307 These implement the various forms of the L<perlapi/C<SvPVutf8>> macros. 3308 The macros are the preferred interface. 3309 3310 These return a pointer to the UTF-8-encoded representation of the SV, and set 3311 C<*lp> to its length in bytes. They may cause the SV to be upgraded to UTF-8 3312 as a side-effect. 3313 3314 The forms differ in that plain C<sv_2pvutf8> always processes 'get' magic; and 3315 C<sv_2pvutf8_flags> processes 'get' magic if and only if C<flags> contains 3316 C<SV_GMAGIC>. 3317 3318 =cut 3319 */ 3320 3321 char * 3322 Perl_sv_2pvutf8_flags(pTHX_ SV *sv, STRLEN *const lp, const U32 flags) 3323 { 3324 PERL_ARGS_ASSERT_SV_2PVUTF8_FLAGS; 3325 3326 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC)) 3327 mg_get(sv); 3328 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv)) 3329 || isGV_with_GP(sv) || SvROK(sv)) { 3330 SV *sv2 = sv_newmortal(); 3331 sv_copypv_nomg(sv2,sv); 3332 sv = sv2; 3333 } 3334 sv_utf8_upgrade_nomg(sv); 3335 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv); 3336 } 3337 3338 3339 /* 3340 =for apidoc sv_2bool 3341 3342 This macro is only used by C<sv_true()> or its macro equivalent, and only if 3343 the latter's argument is neither C<SvPOK>, C<SvIOK> nor C<SvNOK>. 3344 It calls C<sv_2bool_flags> with the C<SV_GMAGIC> flag. 3345 3346 =for apidoc sv_2bool_flags 3347 3348 This function is only used by C<sv_true()> and friends, and only if 3349 the latter's argument is neither C<SvPOK>, C<SvIOK> nor C<SvNOK>. If the flags 3350 contain C<SV_GMAGIC>, then it does an C<mg_get()> first. 3351 3352 3353 =cut 3354 */ 3355 3356 bool 3357 Perl_sv_2bool_flags(pTHX_ SV *sv, I32 flags) 3358 { 3359 PERL_ARGS_ASSERT_SV_2BOOL_FLAGS; 3360 3361 restart: 3362 if(flags & SV_GMAGIC) SvGETMAGIC(sv); 3363 3364 if (!SvOK(sv)) 3365 return 0; 3366 if (SvROK(sv)) { 3367 if (SvAMAGIC(sv)) { 3368 SV * const tmpsv = AMG_CALLunary(sv, bool__amg); 3369 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv)))) { 3370 bool svb; 3371 sv = tmpsv; 3372 if(SvGMAGICAL(sv)) { 3373 flags = SV_GMAGIC; 3374 goto restart; /* call sv_2bool */ 3375 } 3376 /* expanded SvTRUE_common(sv, (flags = 0, goto restart)) */ 3377 else if(!SvOK(sv)) { 3378 svb = 0; 3379 } 3380 else if(SvPOK(sv)) { 3381 svb = SvPVXtrue(sv); 3382 } 3383 else if((SvFLAGS(sv) & (SVf_IOK|SVf_NOK))) { 3384 svb = (SvIOK(sv) && SvIVX(sv) != 0) 3385 || (SvNOK(sv) && SvNVX(sv) != 0.0); 3386 } 3387 else { 3388 flags = 0; 3389 goto restart; /* call sv_2bool_nomg */ 3390 } 3391 return cBOOL(svb); 3392 } 3393 } 3394 assert(SvRV(sv)); 3395 return TRUE; 3396 } 3397 if (isREGEXP(sv)) 3398 return 3399 RX_WRAPLEN(sv) > 1 || (RX_WRAPLEN(sv) && *RX_WRAPPED(sv) != '0'); 3400 3401 if (SvNOK(sv) && !SvPOK(sv)) 3402 return SvNVX(sv) != 0.0; 3403 3404 return SvTRUE_common(sv, 0); 3405 } 3406 3407 /* 3408 =for apidoc sv_utf8_upgrade 3409 =for apidoc_item sv_utf8_upgrade_flags 3410 =for apidoc_item sv_utf8_upgrade_flags_grow 3411 =for apidoc_item sv_utf8_upgrade_nomg 3412 3413 These convert the PV of an SV to its UTF-8-encoded form. 3414 The SV is forced to string form if it is not already. 3415 They always set the C<SvUTF8> flag to avoid future validity checks even if the 3416 whole string is the same in UTF-8 as not. 3417 They return the number of bytes in the converted string 3418 3419 The forms differ in just two ways. The main difference is whether or not they 3420 perform 'get magic' on C<sv>. C<sv_utf8_upgrade_nomg> skips 'get magic'; 3421 C<sv_utf8_upgrade> performs it; and C<sv_utf8_upgrade_flags> and 3422 C<sv_utf8_upgrade_flags_grow> either perform it (if the C<SV_GMAGIC> bit is set 3423 in C<flags>) or don't (if that bit is cleared). 3424 3425 The other difference is that C<sv_utf8_upgrade_flags_grow> has an additional 3426 parameter, C<extra>, which allows the caller to specify an amount of space to 3427 be reserved as spare beyond what is needed for the actual conversion. This is 3428 used when the caller knows it will soon be needing yet more space, and it is 3429 more efficient to request space from the system in a single call. 3430 This form is otherwise identical to C<sv_utf8_upgrade_flags>. 3431 3432 These are not a general purpose byte encoding to Unicode interface: use the 3433 Encode extension for that. 3434 3435 The C<SV_FORCE_UTF8_UPGRADE> flag is now ignored. 3436 3437 =for apidoc Amnh||SV_GMAGIC| 3438 =for apidoc Amnh||SV_FORCE_UTF8_UPGRADE| 3439 3440 =cut 3441 3442 If the routine itself changes the string, it adds a trailing C<NUL>. Such a 3443 C<NUL> isn't guaranteed due to having other routines do the work in some input 3444 cases, or if the input is already flagged as being in utf8. 3445 3446 */ 3447 3448 STRLEN 3449 Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extra) 3450 { 3451 PERL_ARGS_ASSERT_SV_UTF8_UPGRADE_FLAGS_GROW; 3452 3453 if (sv == &PL_sv_undef) 3454 return 0; 3455 if (!SvPOK_nog(sv)) { 3456 STRLEN len = 0; 3457 if (SvREADONLY(sv) && (SvPOKp(sv) || SvIOKp(sv) || SvNOKp(sv))) { 3458 (void) sv_2pv_flags(sv,&len, flags); 3459 if (SvUTF8(sv)) { 3460 if (extra) SvGROW(sv, SvCUR(sv) + extra); 3461 return len; 3462 } 3463 } else { 3464 (void) SvPV_force_flags(sv,len,flags & SV_GMAGIC); 3465 } 3466 } 3467 3468 /* SVt_REGEXP's shouldn't be upgraded to UTF8 - they're already 3469 * compiled and individual nodes will remain non-utf8 even if the 3470 * stringified version of the pattern gets upgraded. Whether the 3471 * PVX of a REGEXP should be grown or we should just croak, I don't 3472 * know - DAPM */ 3473 if (SvUTF8(sv) || isREGEXP(sv)) { 3474 if (extra) SvGROW(sv, SvCUR(sv) + extra); 3475 return SvCUR(sv); 3476 } 3477 3478 if (SvIsCOW(sv)) { 3479 S_sv_uncow(aTHX_ sv, 0); 3480 } 3481 3482 if (SvCUR(sv) == 0) { 3483 if (extra) SvGROW(sv, extra + 1); /* Make sure is room for a trailing 3484 byte */ 3485 } else { /* Assume Latin-1/EBCDIC */ 3486 /* This function could be much more efficient if we 3487 * had a FLAG in SVs to signal if there are any variant 3488 * chars in the PV. Given that there isn't such a flag 3489 * make the loop as fast as possible. */ 3490 U8 * s = (U8 *) SvPVX_const(sv); 3491 U8 *t = s; 3492 3493 if (is_utf8_invariant_string_loc(s, SvCUR(sv), (const U8 **) &t)) { 3494 3495 /* utf8 conversion not needed because all are invariants. Mark 3496 * as UTF-8 even if no variant - saves scanning loop */ 3497 SvUTF8_on(sv); 3498 if (extra) SvGROW(sv, SvCUR(sv) + extra); 3499 return SvCUR(sv); 3500 } 3501 3502 /* Here, there is at least one variant (t points to the first one), so 3503 * the string should be converted to utf8. Everything from 's' to 3504 * 't - 1' will occupy only 1 byte each on output. 3505 * 3506 * Note that the incoming SV may not have a trailing '\0', as certain 3507 * code in pp_formline can send us partially built SVs. 3508 * 3509 * There are two main ways to convert. One is to create a new string 3510 * and go through the input starting from the beginning, appending each 3511 * converted value onto the new string as we go along. Going this 3512 * route, it's probably best to initially allocate enough space in the 3513 * string rather than possibly running out of space and having to 3514 * reallocate and then copy what we've done so far. Since everything 3515 * from 's' to 't - 1' is invariant, the destination can be initialized 3516 * with these using a fast memory copy. To be sure to allocate enough 3517 * space, one could use the worst case scenario, where every remaining 3518 * byte expands to two under UTF-8, or one could parse it and count 3519 * exactly how many do expand. 3520 * 3521 * The other way is to unconditionally parse the remainder of the 3522 * string to figure out exactly how big the expanded string will be, 3523 * growing if needed. Then start at the end of the string and place 3524 * the character there at the end of the unfilled space in the expanded 3525 * one, working backwards until reaching 't'. 3526 * 3527 * The problem with assuming the worst case scenario is that for very 3528 * long strings, we could allocate much more memory than actually 3529 * needed, which can create performance problems. If we have to parse 3530 * anyway, the second method is the winner as it may avoid an extra 3531 * copy. The code used to use the first method under some 3532 * circumstances, but now that there is faster variant counting on 3533 * ASCII platforms, the second method is used exclusively, eliminating 3534 * some code that no longer has to be maintained. */ 3535 3536 { 3537 /* Count the total number of variants there are. We can start 3538 * just beyond the first one, which is known to be at 't' */ 3539 const Size_t invariant_length = t - s; 3540 U8 * e = (U8 *) SvEND(sv); 3541 3542 /* The length of the left overs, plus 1. */ 3543 const Size_t remaining_length_p1 = e - t; 3544 3545 /* We expand by 1 for the variant at 't' and one for each remaining 3546 * variant (we start looking at 't+1') */ 3547 Size_t expansion = 1 + variant_under_utf8_count(t + 1, e); 3548 3549 /* +1 = trailing NUL */ 3550 Size_t need = SvCUR(sv) + expansion + extra + 1; 3551 U8 * d; 3552 3553 /* Grow if needed */ 3554 if (SvLEN(sv) < need) { 3555 t = invariant_length + (U8*) SvGROW(sv, need); 3556 e = t + remaining_length_p1; 3557 } 3558 SvCUR_set(sv, invariant_length + remaining_length_p1 + expansion); 3559 3560 /* Set the NUL at the end */ 3561 d = (U8 *) SvEND(sv); 3562 *d-- = '\0'; 3563 3564 /* Having decremented d, it points to the position to put the 3565 * very last byte of the expanded string. Go backwards through 3566 * the string, copying and expanding as we go, stopping when we 3567 * get to the part that is invariant the rest of the way down */ 3568 3569 e--; 3570 while (e >= t) { 3571 if (NATIVE_BYTE_IS_INVARIANT(*e)) { 3572 *d-- = *e; 3573 } else { 3574 *d-- = UTF8_EIGHT_BIT_LO(*e); 3575 *d-- = UTF8_EIGHT_BIT_HI(*e); 3576 } 3577 e--; 3578 } 3579 3580 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { 3581 /* Update pos. We do it at the end rather than during 3582 * the upgrade, to avoid slowing down the common case 3583 * (upgrade without pos). 3584 * pos can be stored as either bytes or characters. Since 3585 * this was previously a byte string we can just turn off 3586 * the bytes flag. */ 3587 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global); 3588 if (mg) { 3589 mg->mg_flags &= ~MGf_BYTES; 3590 } 3591 if ((mg = mg_find(sv, PERL_MAGIC_utf8))) 3592 magic_setutf8(sv,mg); /* clear UTF8 cache */ 3593 } 3594 } 3595 } 3596 3597 SvUTF8_on(sv); 3598 return SvCUR(sv); 3599 } 3600 3601 /* 3602 =for apidoc sv_utf8_downgrade 3603 =for apidoc_item sv_utf8_downgrade_flags 3604 =for apidoc_item sv_utf8_downgrade_nomg 3605 3606 These attempt to convert the PV of an SV from characters to bytes. If the PV 3607 contains a character that cannot fit in a byte, this conversion will fail; in 3608 this case, C<FALSE> is returned if C<fail_ok> is true; otherwise they croak. 3609 3610 They are not a general purpose Unicode to byte encoding interface: 3611 use the C<Encode> extension for that. 3612 3613 They differ only in that: 3614 3615 C<sv_utf8_downgrade> processes 'get' magic on C<sv>. 3616 3617 C<sv_utf8_downgrade_nomg> does not. 3618 3619 C<sv_utf8_downgrade_flags> has an additional C<flags> parameter in which you can specify 3620 C<SV_GMAGIC> to process 'get' magic, or leave it cleared to not process 'get' magic. 3621 3622 =cut 3623 */ 3624 3625 bool 3626 Perl_sv_utf8_downgrade_flags(pTHX_ SV *const sv, const bool fail_ok, const U32 flags) 3627 { 3628 PERL_ARGS_ASSERT_SV_UTF8_DOWNGRADE_FLAGS; 3629 3630 if (SvPOKp(sv) && SvUTF8(sv)) { 3631 if (SvCUR(sv)) { 3632 U8 *s; 3633 STRLEN len; 3634 U32 mg_flags = flags & SV_GMAGIC; 3635 3636 if (SvIsCOW(sv)) { 3637 S_sv_uncow(aTHX_ sv, 0); 3638 } 3639 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { 3640 /* update pos */ 3641 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global); 3642 if (mg && mg->mg_len > 0 && mg->mg_flags & MGf_BYTES) { 3643 mg->mg_len = sv_pos_b2u_flags(sv, mg->mg_len, 3644 mg_flags|SV_CONST_RETURN); 3645 mg_flags = 0; /* sv_pos_b2u does get magic */ 3646 } 3647 if ((mg = mg_find(sv, PERL_MAGIC_utf8))) 3648 magic_setutf8(sv,mg); /* clear UTF8 cache */ 3649 3650 } 3651 s = (U8 *) SvPV_flags(sv, len, mg_flags); 3652 3653 if (!utf8_to_bytes(s, &len)) { 3654 if (fail_ok) 3655 return FALSE; 3656 else { 3657 if (PL_op) 3658 Perl_croak(aTHX_ "Wide character in %s", 3659 OP_DESC(PL_op)); 3660 else 3661 Perl_croak(aTHX_ "Wide character"); 3662 } 3663 } 3664 SvCUR_set(sv, len); 3665 } 3666 } 3667 SvUTF8_off(sv); 3668 return TRUE; 3669 } 3670 3671 /* 3672 =for apidoc sv_utf8_encode 3673 3674 Converts the PV of an SV to UTF-8, but then turns the C<SvUTF8> 3675 flag off so that it looks like octets again. 3676 3677 =cut 3678 */ 3679 3680 void 3681 Perl_sv_utf8_encode(pTHX_ SV *const sv) 3682 { 3683 PERL_ARGS_ASSERT_SV_UTF8_ENCODE; 3684 3685 if (SvREADONLY(sv)) { 3686 sv_force_normal_flags(sv, 0); 3687 } 3688 (void) sv_utf8_upgrade(sv); 3689 SvUTF8_off(sv); 3690 } 3691 3692 /* 3693 =for apidoc sv_utf8_decode 3694 3695 If the PV of the SV is an octet sequence in Perl's extended UTF-8 3696 and contains a multiple-byte character, the C<SvUTF8> flag is turned on 3697 so that it looks like a character. If the PV contains only single-byte 3698 characters, the C<SvUTF8> flag stays off. 3699 Scans PV for validity and returns FALSE if the PV is invalid UTF-8. 3700 3701 =cut 3702 */ 3703 3704 bool 3705 Perl_sv_utf8_decode(pTHX_ SV *const sv) 3706 { 3707 PERL_ARGS_ASSERT_SV_UTF8_DECODE; 3708 3709 if (SvPOKp(sv)) { 3710 const U8 *start, *c, *first_variant; 3711 3712 /* The octets may have got themselves encoded - get them back as 3713 * bytes 3714 */ 3715 if (!sv_utf8_downgrade(sv, TRUE)) 3716 return FALSE; 3717 3718 /* it is actually just a matter of turning the utf8 flag on, but 3719 * we want to make sure everything inside is valid utf8 first. 3720 */ 3721 c = start = (const U8 *) SvPVX_const(sv); 3722 if (! is_utf8_invariant_string_loc(c, SvCUR(sv), &first_variant)) { 3723 if (!is_utf8_string(first_variant, SvCUR(sv) - (first_variant -c))) 3724 return FALSE; 3725 SvUTF8_on(sv); 3726 } 3727 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { 3728 /* XXX Is this dead code? XS_utf8_decode calls SvSETMAGIC 3729 after this, clearing pos. Does anything on CPAN 3730 need this? */ 3731 /* adjust pos to the start of a UTF8 char sequence */ 3732 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global); 3733 if (mg) { 3734 SSize_t pos = mg->mg_len; 3735 if (pos > 0) { 3736 for (c = start + pos; c > start; c--) { 3737 if (UTF8_IS_START(*c)) 3738 break; 3739 } 3740 mg->mg_len = c - start; 3741 } 3742 } 3743 if ((mg = mg_find(sv, PERL_MAGIC_utf8))) 3744 magic_setutf8(sv,mg); /* clear UTF8 cache */ 3745 } 3746 } 3747 return TRUE; 3748 } 3749 3750 /* 3751 =for apidoc sv_setsv 3752 =for apidoc_item sv_setsv_flags 3753 =for apidoc_item sv_setsv_mg 3754 =for apidoc_item sv_setsv_nomg 3755 3756 These copy the contents of the source SV C<ssv> into the destination SV C<dsv>. 3757 C<ssv> may be destroyed if it is mortal, so don't use these functions if 3758 the source SV needs to be reused. 3759 Loosely speaking, they perform a copy-by-value, obliterating any previous 3760 content of the destination. 3761 3762 They differ only in that: 3763 3764 C<sv_setsv> calls 'get' magic on C<ssv>, but skips 'set' magic on C<dsv>. 3765 3766 C<sv_setsv_mg> calls both 'get' magic on C<ssv> and 'set' magic on C<dsv>. 3767 3768 C<sv_setsv_nomg> skips all magic. 3769 3770 C<sv_setsv_flags> has a C<flags> parameter which you can use to specify any 3771 combination of magic handling, and also you can specify C<SV_NOSTEAL> so that 3772 the buffers of temps will not be stolen. 3773 3774 You probably want to instead use one of the assortment of wrappers, such as 3775 C<L</SvSetSV>>, C<L</SvSetSV_nosteal>>, C<L</SvSetMagicSV>> and 3776 C<L</SvSetMagicSV_nosteal>>. 3777 3778 C<sv_setsv_flags> is the primary function for copying scalars, and most other 3779 copy-ish functions and macros use it underneath. 3780 3781 =for apidoc Amnh||SV_NOSTEAL 3782 3783 =cut 3784 */ 3785 3786 static void 3787 S_glob_assign_glob(pTHX_ SV *const dsv, SV *const ssv, const int dtype) 3788 { 3789 I32 mro_changes = 0; /* 1 = method, 2 = isa, 3 = recursive isa */ 3790 HV *old_stash = NULL; 3791 3792 PERL_ARGS_ASSERT_GLOB_ASSIGN_GLOB; 3793 3794 if (dtype != SVt_PVGV && !isGV_with_GP(dsv)) { 3795 const char * const name = GvNAME(ssv); 3796 const STRLEN len = GvNAMELEN(ssv); 3797 { 3798 if (dtype >= SVt_PV) { 3799 SvPV_free(dsv); 3800 SvPV_set(dsv, 0); 3801 SvLEN_set(dsv, 0); 3802 SvCUR_set(dsv, 0); 3803 } 3804 SvUPGRADE(dsv, SVt_PVGV); 3805 (void)SvOK_off(dsv); 3806 isGV_with_GP_on(dsv); 3807 } 3808 GvSTASH(dsv) = GvSTASH(ssv); 3809 if (GvSTASH(dsv)) 3810 Perl_sv_add_backref(aTHX_ MUTABLE_SV(GvSTASH(dsv)), dsv); 3811 gv_name_set(MUTABLE_GV(dsv), name, len, 3812 GV_ADD | (GvNAMEUTF8(ssv) ? SVf_UTF8 : 0 )); 3813 SvFAKE_on(dsv); /* can coerce to non-glob */ 3814 } 3815 3816 if(GvGP(MUTABLE_GV(ssv))) { 3817 /* If source has method cache entry, clear it */ 3818 if(GvCVGEN(ssv)) { 3819 SvREFCNT_dec(GvCV(ssv)); 3820 GvCV_set(ssv, NULL); 3821 GvCVGEN(ssv) = 0; 3822 } 3823 /* If source has a real method, then a method is 3824 going to change */ 3825 else if( 3826 GvCV((const GV *)ssv) && GvSTASH(dsv) && HvHasENAME(GvSTASH(dsv)) 3827 ) { 3828 mro_changes = 1; 3829 } 3830 } 3831 3832 /* If dest already had a real method, that's a change as well */ 3833 if( 3834 !mro_changes && GvGP(MUTABLE_GV(dsv)) && GvCVu((const GV *)dsv) 3835 && GvSTASH(dsv) && HvHasENAME(GvSTASH(dsv)) 3836 ) { 3837 mro_changes = 1; 3838 } 3839 3840 /* We don't need to check the name of the destination if it was not a 3841 glob to begin with. */ 3842 if(dtype == SVt_PVGV) { 3843 const char * const name = GvNAME((const GV *)dsv); 3844 const STRLEN len = GvNAMELEN(dsv); 3845 if(memEQs(name, len, "ISA") 3846 /* The stash may have been detached from the symbol table, so 3847 check its name. */ 3848 && GvSTASH(dsv) && HvHasENAME(GvSTASH(dsv)) 3849 ) 3850 mro_changes = 2; 3851 else { 3852 if ((len > 1 && name[len-2] == ':' && name[len-1] == ':') 3853 || (len == 1 && name[0] == ':')) { 3854 mro_changes = 3; 3855 3856 /* Set aside the old stash, so we can reset isa caches on 3857 its subclasses. */ 3858 if((old_stash = GvHV(dsv))) 3859 /* Make sure we do not lose it early. */ 3860 SvREFCNT_inc_simple_void_NN( 3861 sv_2mortal((SV *)old_stash) 3862 ); 3863 } 3864 } 3865 3866 SvREFCNT_inc_simple_void_NN(sv_2mortal(dsv)); 3867 } 3868 3869 /* freeing dsv's GP might free ssv (e.g. *x = $x), 3870 * so temporarily protect it */ 3871 ENTER; 3872 SAVEFREESV(SvREFCNT_inc_simple_NN(ssv)); 3873 gp_free(MUTABLE_GV(dsv)); 3874 GvINTRO_off(dsv); /* one-shot flag */ 3875 GvGP_set(dsv, gp_ref(GvGP(ssv))); 3876 LEAVE; 3877 3878 if (SvTAINTED(ssv)) 3879 SvTAINT(dsv); 3880 if (GvIMPORTED(dsv) != GVf_IMPORTED 3881 && CopSTASH_ne(PL_curcop, GvSTASH(dsv))) 3882 { 3883 GvIMPORTED_on(dsv); 3884 } 3885 GvMULTI_on(dsv); 3886 if(mro_changes == 2) { 3887 if (GvAV((const GV *)ssv)) { 3888 MAGIC *mg; 3889 SV * const sref = (SV *)GvAV((const GV *)dsv); 3890 if (SvSMAGICAL(sref) && (mg = mg_find(sref, PERL_MAGIC_isa))) { 3891 if (SvTYPE(mg->mg_obj) != SVt_PVAV) { 3892 AV * const ary = newAV_alloc_x(2); 3893 av_push_simple(ary, mg->mg_obj); /* takes the refcount */ 3894 av_push_simple(ary, SvREFCNT_inc_simple_NN(dsv)); 3895 mg->mg_obj = (SV *)ary; 3896 } else { 3897 av_push((AV *)mg->mg_obj, SvREFCNT_inc_simple_NN(dsv)); 3898 } 3899 } 3900 else sv_magic(sref, dsv, PERL_MAGIC_isa, NULL, 0); 3901 } 3902 mro_isa_changed_in(GvSTASH(dsv)); 3903 } 3904 else if(mro_changes == 3) { 3905 HV * const stash = GvHV(dsv); 3906 if(old_stash ? HvHasENAME(old_stash) : cBOOL(stash)) 3907 mro_package_moved( 3908 stash, old_stash, 3909 (GV *)dsv, 0 3910 ); 3911 } 3912 else if(mro_changes) mro_method_changed_in(GvSTASH(dsv)); 3913 if (GvIO(dsv) && dtype == SVt_PVGV) { 3914 DEBUG_o(Perl_deb(aTHX_ 3915 "glob_assign_glob clearing PL_stashcache\n")); 3916 /* It's a cache. It will rebuild itself quite happily. 3917 It's a lot of effort to work out exactly which key (or keys) 3918 might be invalidated by the creation of the this file handle. 3919 */ 3920 hv_clear(PL_stashcache); 3921 } 3922 return; 3923 } 3924 3925 void 3926 Perl_gv_setref(pTHX_ SV *const dsv, SV *const ssv) 3927 { 3928 SV * const sref = SvRV(ssv); 3929 SV *dref; 3930 const int intro = GvINTRO(dsv); 3931 SV **location; 3932 U8 import_flag = 0; 3933 const U32 stype = SvTYPE(sref); 3934 3935 PERL_ARGS_ASSERT_GV_SETREF; 3936 3937 if (intro) { 3938 GvINTRO_off(dsv); /* one-shot flag */ 3939 GvLINE(dsv) = CopLINE(PL_curcop); 3940 GvEGV(dsv) = MUTABLE_GV(dsv); 3941 } 3942 GvMULTI_on(dsv); 3943 switch (stype) { 3944 case SVt_PVCV: 3945 location = (SV **) &(GvGP(dsv)->gp_cv); /* XXX bypassing GvCV_set */ 3946 import_flag = GVf_IMPORTED_CV; 3947 goto common; 3948 case SVt_PVHV: 3949 location = (SV **) &GvHV(dsv); 3950 import_flag = GVf_IMPORTED_HV; 3951 goto common; 3952 case SVt_PVAV: 3953 location = (SV **) &GvAV(dsv); 3954 import_flag = GVf_IMPORTED_AV; 3955 goto common; 3956 case SVt_PVIO: 3957 location = (SV **) &GvIOp(dsv); 3958 goto common; 3959 case SVt_PVFM: 3960 location = (SV **) &GvFORM(dsv); 3961 goto common; 3962 default: 3963 location = &GvSV(dsv); 3964 import_flag = GVf_IMPORTED_SV; 3965 common: 3966 if (intro) { 3967 if (stype == SVt_PVCV) { 3968 /*if (GvCVGEN(dsv) && (GvCV(dsv) != (const CV *)sref || GvCVGEN(dsv))) {*/ 3969 if (GvCVGEN(dsv)) { 3970 SvREFCNT_dec(GvCV(dsv)); 3971 GvCV_set(dsv, NULL); 3972 GvCVGEN(dsv) = 0; /* Switch off cacheness. */ 3973 } 3974 } 3975 /* SAVEt_GVSLOT takes more room on the savestack and has more 3976 overhead in leave_scope than SAVEt_GENERIC_SV. But for CVs 3977 leave_scope needs access to the GV so it can reset method 3978 caches. We must use SAVEt_GVSLOT whenever the type is 3979 SVt_PVCV, even if the stash is anonymous, as the stash may 3980 gain a name somehow before leave_scope. */ 3981 if (stype == SVt_PVCV) { 3982 /* There is no save_pushptrptrptr. Creating it for this 3983 one call site would be overkill. So inline the ss add 3984 routines here. */ 3985 dSS_ADD; 3986 SS_ADD_PTR(dsv); 3987 SS_ADD_PTR(location); 3988 SS_ADD_PTR(SvREFCNT_inc(*location)); 3989 SS_ADD_UV(SAVEt_GVSLOT); 3990 SS_ADD_END(4); 3991 } 3992 else SAVEGENERICSV(*location); 3993 } 3994 dref = *location; 3995 if (stype == SVt_PVCV && (*location != sref || GvCVGEN(dsv))) { 3996 CV* const cv = MUTABLE_CV(*location); 3997 if (cv) { 3998 if (!GvCVGEN((const GV *)dsv) && 3999 (CvROOT(cv) || CvXSUB(cv)) && 4000 /* redundant check that avoids creating the extra SV 4001 most of the time: */ 4002 (CvCONST(cv) || (ckWARN(WARN_REDEFINE) && !intro))) 4003 { 4004 SV * const new_const_sv = 4005 CvCONST((const CV *)sref) 4006 ? cv_const_sv_or_av((const CV *)sref) 4007 : NULL; 4008 HV * const stash = GvSTASH((const GV *)dsv); 4009 report_redefined_cv( 4010 sv_2mortal( 4011 stash 4012 ? Perl_newSVpvf(aTHX_ 4013 "%" HEKf "::%" HEKf, 4014 HEKfARG(HvNAME_HEK(stash)), 4015 HEKfARG(GvENAME_HEK(MUTABLE_GV(dsv)))) 4016 : Perl_newSVpvf(aTHX_ 4017 "%" HEKf, 4018 HEKfARG(GvENAME_HEK(MUTABLE_GV(dsv)))) 4019 ), 4020 cv, 4021 CvCONST((const CV *)sref) ? &new_const_sv : NULL 4022 ); 4023 } 4024 if (!intro) 4025 cv_ckproto_len_flags(cv, (const GV *)dsv, 4026 SvPOK(sref) ? CvPROTO(sref) : NULL, 4027 SvPOK(sref) ? CvPROTOLEN(sref) : 0, 4028 SvPOK(sref) ? SvUTF8(sref) : 0); 4029 } 4030 GvCVGEN(dsv) = 0; /* Switch off cacheness. */ 4031 GvASSUMECV_on(dsv); 4032 if(GvSTASH(dsv)) { /* sub foo { 1 } sub bar { 2 } *bar = \&foo */ 4033 if (intro && GvREFCNT(dsv) > 1) { 4034 /* temporary remove extra savestack's ref */ 4035 --GvREFCNT(dsv); 4036 gv_method_changed(dsv); 4037 ++GvREFCNT(dsv); 4038 } 4039 else gv_method_changed(dsv); 4040 } 4041 } 4042 *location = SvREFCNT_inc_simple_NN(sref); 4043 if (import_flag && !(GvFLAGS(dsv) & import_flag) 4044 && CopSTASH_ne(PL_curcop, GvSTASH(dsv))) { 4045 GvFLAGS(dsv) |= import_flag; 4046 } 4047 4048 if (stype == SVt_PVHV) { 4049 const char * const name = GvNAME((GV*)dsv); 4050 const STRLEN len = GvNAMELEN(dsv); 4051 if ( 4052 ( 4053 (len > 1 && name[len-2] == ':' && name[len-1] == ':') 4054 || (len == 1 && name[0] == ':') 4055 ) 4056 && (!dref || HvHasENAME(dref)) 4057 ) { 4058 mro_package_moved( 4059 (HV *)sref, (HV *)dref, 4060 (GV *)dsv, 0 4061 ); 4062 } 4063 } 4064 else if ( 4065 stype == SVt_PVAV && sref != dref 4066 && memEQs(GvNAME((GV*)dsv), GvNAMELEN((GV*)dsv), "ISA") 4067 /* The stash may have been detached from the symbol table, so 4068 check its name before doing anything. */ 4069 && GvSTASH(dsv) && HvHasENAME(GvSTASH(dsv)) 4070 ) { 4071 MAGIC *mg; 4072 MAGIC * const omg = dref && SvSMAGICAL(dref) 4073 ? mg_find(dref, PERL_MAGIC_isa) 4074 : NULL; 4075 if (SvSMAGICAL(sref) && (mg = mg_find(sref, PERL_MAGIC_isa))) { 4076 if (SvTYPE(mg->mg_obj) != SVt_PVAV) { 4077 AV * const ary = newAV_alloc_xz(4); 4078 av_push_simple(ary, mg->mg_obj); /* takes the refcount */ 4079 mg->mg_obj = (SV *)ary; 4080 } 4081 if (omg) { 4082 if (SvTYPE(omg->mg_obj) == SVt_PVAV) { 4083 SV **svp = AvARRAY((AV *)omg->mg_obj); 4084 I32 items = AvFILLp((AV *)omg->mg_obj) + 1; 4085 while (items--) 4086 av_push( 4087 (AV *)mg->mg_obj, 4088 SvREFCNT_inc_simple_NN(*svp++) 4089 ); 4090 } 4091 else 4092 av_push( 4093 (AV *)mg->mg_obj, 4094 SvREFCNT_inc_simple_NN(omg->mg_obj) 4095 ); 4096 } 4097 else 4098 av_push((AV *)mg->mg_obj,SvREFCNT_inc_simple_NN(dsv)); 4099 } 4100 else 4101 { 4102 SSize_t i; 4103 sv_magic( 4104 sref, omg ? omg->mg_obj : dsv, PERL_MAGIC_isa, NULL, 0 4105 ); 4106 for (i = 0; i <= AvFILL(sref); ++i) { 4107 SV **elem = av_fetch ((AV*)sref, i, 0); 4108 if (elem) { 4109 sv_magic( 4110 *elem, sref, PERL_MAGIC_isaelem, NULL, i 4111 ); 4112 } 4113 } 4114 mg = mg_find(sref, PERL_MAGIC_isa); 4115 } 4116 /* Since the *ISA assignment could have affected more than 4117 one stash, don't call mro_isa_changed_in directly, but let 4118 magic_clearisa do it for us, as it already has the logic for 4119 dealing with globs vs arrays of globs. */ 4120 assert(mg); 4121 Perl_magic_clearisa(aTHX_ NULL, mg); 4122 } 4123 else if (stype == SVt_PVIO) { 4124 DEBUG_o(Perl_deb(aTHX_ "gv_setref clearing PL_stashcache\n")); 4125 /* It's a cache. It will rebuild itself quite happily. 4126 It's a lot of effort to work out exactly which key (or keys) 4127 might be invalidated by the creation of the this file handle. 4128 */ 4129 hv_clear(PL_stashcache); 4130 } 4131 break; 4132 } 4133 if (!intro) SvREFCNT_dec(dref); 4134 if (SvTAINTED(ssv)) 4135 SvTAINT(dsv); 4136 return; 4137 } 4138 4139 4140 4141 4142 #ifdef PERL_DEBUG_READONLY_COW 4143 # include <sys/mman.h> 4144 4145 # ifndef PERL_MEMORY_DEBUG_HEADER_SIZE 4146 # define PERL_MEMORY_DEBUG_HEADER_SIZE 0 4147 # endif 4148 4149 void 4150 Perl_sv_buf_to_ro(pTHX_ SV *sv) 4151 { 4152 struct perl_memory_debug_header * const header = 4153 (struct perl_memory_debug_header *)(SvPVX(sv)-PERL_MEMORY_DEBUG_HEADER_SIZE); 4154 const MEM_SIZE len = header->size; 4155 PERL_ARGS_ASSERT_SV_BUF_TO_RO; 4156 # ifdef PERL_TRACK_MEMPOOL 4157 if (!header->readonly) header->readonly = 1; 4158 # endif 4159 if (mprotect(header, len, PROT_READ)) 4160 Perl_warn(aTHX_ "mprotect RW for COW string %p %lu failed with %d", 4161 header, len, errno); 4162 } 4163 4164 static void 4165 S_sv_buf_to_rw(pTHX_ SV *sv) 4166 { 4167 struct perl_memory_debug_header * const header = 4168 (struct perl_memory_debug_header *)(SvPVX(sv)-PERL_MEMORY_DEBUG_HEADER_SIZE); 4169 const MEM_SIZE len = header->size; 4170 PERL_ARGS_ASSERT_SV_BUF_TO_RW; 4171 if (mprotect(header, len, PROT_READ|PROT_WRITE)) 4172 Perl_warn(aTHX_ "mprotect for COW string %p %lu failed with %d", 4173 header, len, errno); 4174 # ifdef PERL_TRACK_MEMPOOL 4175 header->readonly = 0; 4176 # endif 4177 } 4178 4179 #else 4180 # define sv_buf_to_ro(sv) NOOP 4181 # define sv_buf_to_rw(sv) NOOP 4182 #endif 4183 4184 void 4185 Perl_sv_setsv_flags(pTHX_ SV *dsv, SV* ssv, const I32 flags) 4186 { 4187 U32 sflags; 4188 int dtype; 4189 svtype stype; 4190 unsigned int both_type; 4191 4192 PERL_ARGS_ASSERT_SV_SETSV_FLAGS; 4193 4194 if (UNLIKELY( ssv == dsv )) 4195 return; 4196 4197 if (UNLIKELY( !ssv )) 4198 ssv = &PL_sv_undef; 4199 4200 stype = SvTYPE(ssv); 4201 dtype = SvTYPE(dsv); 4202 both_type = (stype | dtype); 4203 4204 /* with these values, we can check that both SVs are NULL/IV (and not 4205 * freed) just by testing the or'ed types */ 4206 STATIC_ASSERT_STMT(SVt_NULL == 0); 4207 STATIC_ASSERT_STMT(SVt_IV == 1); 4208 STATIC_ASSERT_STMT(SVt_NV == 2); 4209 #if NVSIZE <= IVSIZE 4210 if (both_type <= 2) { 4211 #else 4212 if (both_type <= 1) { 4213 #endif 4214 /* both src and dst are UNDEF/IV/RV - maybe NV depending on config, 4215 * so we can do a lot of special-casing */ 4216 U32 sflags; 4217 U32 new_dflags; 4218 SV *old_rv = NULL; 4219 4220 /* minimal subset of SV_CHECK_THINKFIRST_COW_DROP(dsv) */ 4221 if (SvREADONLY(dsv)) 4222 Perl_croak_no_modify(); 4223 if (SvROK(dsv)) { 4224 if (SvWEAKREF(dsv)) 4225 sv_unref_flags(dsv, 0); 4226 else 4227 old_rv = SvRV(dsv); 4228 SvROK_off(dsv); 4229 } 4230 4231 assert(!SvGMAGICAL(ssv)); 4232 assert(!SvGMAGICAL(dsv)); 4233 4234 sflags = SvFLAGS(ssv); 4235 if (sflags & (SVf_IOK|SVf_ROK)) { 4236 SET_SVANY_FOR_BODYLESS_IV(dsv); 4237 new_dflags = SVt_IV; 4238 4239 if (sflags & SVf_ROK) { 4240 dsv->sv_u.svu_rv = SvREFCNT_inc(SvRV(ssv)); 4241 new_dflags |= SVf_ROK; 4242 } 4243 else { 4244 /* both src and dst are <= SVt_IV, so sv_any points to the 4245 * head; so access the head directly 4246 */ 4247 assert( &(ssv->sv_u.svu_iv) 4248 == &(((XPVIV*) SvANY(ssv))->xiv_iv)); 4249 assert( &(dsv->sv_u.svu_iv) 4250 == &(((XPVIV*) SvANY(dsv))->xiv_iv)); 4251 dsv->sv_u.svu_iv = ssv->sv_u.svu_iv; 4252 new_dflags |= (SVf_IOK|SVp_IOK|(sflags & SVf_IVisUV)); 4253 } 4254 } 4255 #if NVSIZE <= IVSIZE 4256 else if (sflags & SVf_NOK) { 4257 SET_SVANY_FOR_BODYLESS_NV(dsv); 4258 new_dflags = (SVt_NV|SVf_NOK|SVp_NOK); 4259 4260 /* both src and dst are <= SVt_MV, so sv_any points to the 4261 * head; so access the head directly 4262 */ 4263 assert( &(ssv->sv_u.svu_nv) 4264 == &(((XPVNV*) SvANY(ssv))->xnv_u.xnv_nv)); 4265 assert( &(dsv->sv_u.svu_nv) 4266 == &(((XPVNV*) SvANY(dsv))->xnv_u.xnv_nv)); 4267 dsv->sv_u.svu_nv = ssv->sv_u.svu_nv; 4268 } 4269 #endif 4270 else { 4271 new_dflags = dtype; /* turn off everything except the type */ 4272 } 4273 /* Should preserve some dsv flags - at least SVs_TEMP, */ 4274 /* so cannot just set SvFLAGS(dsv) = new_dflags */ 4275 /* First clear the flags that we do want to clobber */ 4276 (void)SvOK_off(dsv); 4277 SvFLAGS(dsv) &= ~SVTYPEMASK; 4278 /* Now set the new flags */ 4279 SvFLAGS(dsv) |= new_dflags; 4280 4281 SvREFCNT_dec(old_rv); 4282 return; 4283 } 4284 4285 if (UNLIKELY(both_type == SVTYPEMASK)) { 4286 if (SvIS_FREED(dsv)) { 4287 Perl_croak(aTHX_ "panic: attempt to copy value %" SVf 4288 " to a freed scalar %p", SVfARG(ssv), (void *)dsv); 4289 } 4290 if (SvIS_FREED(ssv)) { 4291 Perl_croak(aTHX_ "panic: attempt to copy freed scalar %p to %p", 4292 (void*)ssv, (void*)dsv); 4293 } 4294 } 4295 4296 4297 4298 SV_CHECK_THINKFIRST_COW_DROP(dsv); 4299 dtype = SvTYPE(dsv); /* THINKFIRST may have changed type */ 4300 4301 /* There's a lot of redundancy below but we're going for speed here 4302 * Note: some of the cases below do return; rather than break; so the 4303 * if-elseif-else logic below this switch does not see all cases. */ 4304 4305 switch (stype) { 4306 case SVt_NULL: 4307 undef_sstr: 4308 if (LIKELY( dtype != SVt_PVGV && dtype != SVt_PVLV )) { 4309 (void)SvOK_off(dsv); 4310 return; 4311 } 4312 break; 4313 case SVt_IV: 4314 if (SvIOK(ssv)) { 4315 switch (dtype) { 4316 case SVt_NULL: 4317 /* For performance, we inline promoting to type SVt_IV. */ 4318 /* We're starting from SVt_NULL, so provided that define is 4319 * actual 0, we don't have to unset any SV type flags 4320 * to promote to SVt_IV. */ 4321 STATIC_ASSERT_STMT(SVt_NULL == 0); 4322 SET_SVANY_FOR_BODYLESS_IV(dsv); 4323 SvFLAGS(dsv) |= SVt_IV; 4324 break; 4325 case SVt_NV: 4326 case SVt_PV: 4327 sv_upgrade(dsv, SVt_PVIV); 4328 break; 4329 case SVt_PVGV: 4330 case SVt_PVLV: 4331 goto end_of_first_switch; 4332 } 4333 (void)SvIOK_only(dsv); 4334 SvIV_set(dsv, SvIVX(ssv)); 4335 if (SvIsUV(ssv)) 4336 SvIsUV_on(dsv); 4337 /* SvTAINTED can only be true if the SV has taint magic, which in 4338 turn means that the SV type is PVMG (or greater). This is the 4339 case statement for SVt_IV, so this cannot be true (whatever gcov 4340 may say). */ 4341 assert(!SvTAINTED(ssv)); 4342 return; 4343 } 4344 if (!SvROK(ssv)) 4345 goto undef_sstr; 4346 if (dtype < SVt_PV && dtype != SVt_IV) 4347 sv_upgrade(dsv, SVt_IV); 4348 break; 4349 4350 case SVt_NV: 4351 if (LIKELY( SvNOK(ssv) )) { 4352 switch (dtype) { 4353 case SVt_NULL: 4354 case SVt_IV: 4355 sv_upgrade(dsv, SVt_NV); 4356 break; 4357 case SVt_PV: 4358 case SVt_PVIV: 4359 sv_upgrade(dsv, SVt_PVNV); 4360 break; 4361 case SVt_PVGV: 4362 case SVt_PVLV: 4363 goto end_of_first_switch; 4364 } 4365 SvNV_set(dsv, SvNVX(ssv)); 4366 (void)SvNOK_only(dsv); 4367 /* SvTAINTED can only be true if the SV has taint magic, which in 4368 turn means that the SV type is PVMG (or greater). This is the 4369 case statement for SVt_NV, so this cannot be true (whatever gcov 4370 may say). */ 4371 assert(!SvTAINTED(ssv)); 4372 return; 4373 } 4374 goto undef_sstr; 4375 4376 case SVt_PV: 4377 if (dtype < SVt_PV) 4378 sv_upgrade(dsv, SVt_PV); 4379 break; 4380 case SVt_PVIV: 4381 if (dtype < SVt_PVIV) 4382 sv_upgrade(dsv, SVt_PVIV); 4383 break; 4384 case SVt_PVNV: 4385 if (dtype < SVt_PVNV) 4386 sv_upgrade(dsv, SVt_PVNV); 4387 break; 4388 4389 case SVt_INVLIST: 4390 invlist_clone(ssv, dsv); 4391 return; 4392 default: 4393 { 4394 const char * const type = sv_reftype(ssv,0); 4395 if (PL_op) 4396 /* diag_listed_as: Bizarre copy of %s */ 4397 Perl_croak(aTHX_ "Bizarre copy of %s in %s", type, OP_DESC(PL_op)); 4398 else 4399 Perl_croak(aTHX_ "Bizarre copy of %s", type); 4400 } 4401 NOT_REACHED; /* NOTREACHED */ 4402 4403 case SVt_REGEXP: 4404 upgregexp: 4405 if (dtype < SVt_REGEXP) 4406 sv_upgrade(dsv, SVt_REGEXP); 4407 break; 4408 4409 case SVt_PVLV: 4410 case SVt_PVGV: 4411 case SVt_PVMG: 4412 if (SvGMAGICAL(ssv) && (flags & SV_GMAGIC)) { 4413 mg_get(ssv); 4414 if (SvTYPE(ssv) != stype) 4415 stype = SvTYPE(ssv); 4416 } 4417 if (isGV_with_GP(ssv) && dtype <= SVt_PVLV) { 4418 glob_assign_glob(dsv, ssv, dtype); 4419 return; 4420 } 4421 if (stype == SVt_PVLV) 4422 { 4423 if (isREGEXP(ssv)) goto upgregexp; 4424 SvUPGRADE(dsv, SVt_PVNV); 4425 } 4426 else 4427 SvUPGRADE(dsv, (svtype)stype); 4428 } 4429 end_of_first_switch: 4430 4431 /* dsv may have been upgraded. */ 4432 dtype = SvTYPE(dsv); 4433 sflags = SvFLAGS(ssv); 4434 4435 if (UNLIKELY( dtype == SVt_PVCV )) { 4436 /* Assigning to a subroutine sets the prototype. */ 4437 if (SvOK(ssv)) { 4438 STRLEN len; 4439 const char *const ptr = SvPV_const(ssv, len); 4440 4441 SvGROW(dsv, len + 1); 4442 Copy(ptr, SvPVX(dsv), len + 1, char); 4443 SvCUR_set(dsv, len); 4444 SvPOK_only(dsv); 4445 SvFLAGS(dsv) |= sflags & SVf_UTF8; 4446 CvAUTOLOAD_off(dsv); 4447 } else { 4448 SvOK_off(dsv); 4449 } 4450 } 4451 else if (UNLIKELY(dtype == SVt_PVAV || dtype == SVt_PVHV 4452 || dtype == SVt_PVFM)) 4453 { 4454 const char * const type = sv_reftype(dsv,0); 4455 if (PL_op) 4456 /* diag_listed_as: Cannot copy to %s */ 4457 Perl_croak(aTHX_ "Cannot copy to %s in %s", type, OP_DESC(PL_op)); 4458 else 4459 Perl_croak(aTHX_ "Cannot copy to %s", type); 4460 } else if (sflags & SVf_ROK) { 4461 if (isGV_with_GP(dsv) 4462 && SvTYPE(SvRV(ssv)) == SVt_PVGV && isGV_with_GP(SvRV(ssv))) { 4463 ssv = SvRV(ssv); 4464 if (ssv == dsv) { 4465 if (GvIMPORTED(dsv) != GVf_IMPORTED 4466 && CopSTASH_ne(PL_curcop, GvSTASH(dsv))) 4467 { 4468 GvIMPORTED_on(dsv); 4469 } 4470 GvMULTI_on(dsv); 4471 return; 4472 } 4473 glob_assign_glob(dsv, ssv, dtype); 4474 return; 4475 } 4476 4477 if (dtype >= SVt_PV) { 4478 if (isGV_with_GP(dsv)) { 4479 gv_setref(dsv, ssv); 4480 return; 4481 } 4482 if (SvPVX_const(dsv)) { 4483 SvPV_free(dsv); 4484 SvLEN_set(dsv, 0); 4485 SvCUR_set(dsv, 0); 4486 } 4487 } 4488 (void)SvOK_off(dsv); 4489 SvRV_set(dsv, SvREFCNT_inc(SvRV(ssv))); 4490 SvFLAGS(dsv) |= sflags & SVf_ROK; 4491 assert(!(sflags & SVp_NOK)); 4492 assert(!(sflags & SVp_IOK)); 4493 assert(!(sflags & SVf_NOK)); 4494 assert(!(sflags & SVf_IOK)); 4495 } 4496 else if (isGV_with_GP(dsv)) { 4497 if (!(sflags & SVf_OK)) { 4498 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 4499 "Undefined value assigned to typeglob"); 4500 } 4501 else { 4502 GV *gv = gv_fetchsv_nomg(ssv, GV_ADD, SVt_PVGV); 4503 if (dsv != (const SV *)gv) { 4504 const char * const name = GvNAME((const GV *)dsv); 4505 const STRLEN len = GvNAMELEN(dsv); 4506 HV *old_stash = NULL; 4507 bool reset_isa = FALSE; 4508 if ((len > 1 && name[len-2] == ':' && name[len-1] == ':') 4509 || (len == 1 && name[0] == ':')) { 4510 /* Set aside the old stash, so we can reset isa caches 4511 on its subclasses. */ 4512 if((old_stash = GvHV(dsv))) { 4513 /* Make sure we do not lose it early. */ 4514 SvREFCNT_inc_simple_void_NN( 4515 sv_2mortal((SV *)old_stash) 4516 ); 4517 } 4518 reset_isa = TRUE; 4519 } 4520 4521 if (GvGP(dsv)) { 4522 SvREFCNT_inc_simple_void_NN(sv_2mortal(dsv)); 4523 gp_free(MUTABLE_GV(dsv)); 4524 } 4525 GvGP_set(dsv, gp_ref(GvGP(gv))); 4526 4527 if (reset_isa) { 4528 HV * const stash = GvHV(dsv); 4529 if( 4530 old_stash ? HvHasENAME(old_stash) : cBOOL(stash) 4531 ) 4532 mro_package_moved( 4533 stash, old_stash, 4534 (GV *)dsv, 0 4535 ); 4536 } 4537 } 4538 } 4539 } 4540 else if ((dtype == SVt_REGEXP || dtype == SVt_PVLV) 4541 && (stype == SVt_REGEXP || isREGEXP(ssv))) { 4542 reg_temp_copy((REGEXP*)dsv, (REGEXP*)ssv); 4543 } 4544 else if (sflags & SVp_POK) { 4545 const STRLEN cur = SvCUR(ssv); 4546 const STRLEN len = SvLEN(ssv); 4547 4548 /* 4549 * We have three basic ways to copy the string: 4550 * 4551 * 1. Swipe 4552 * 2. Copy-on-write 4553 * 3. Actual copy 4554 * 4555 * Which we choose is based on various factors. The following 4556 * things are listed in order of speed, fastest to slowest: 4557 * - Swipe 4558 * - Copying a short string 4559 * - Copy-on-write bookkeeping 4560 * - malloc 4561 * - Copying a long string 4562 * 4563 * We swipe the string (steal the string buffer) if the SV on the 4564 * rhs is about to be freed anyway (TEMP and refcnt==1). This is a 4565 * big win on long strings. It should be a win on short strings if 4566 * SvPVX_const(dsv) has to be allocated. If not, it should not 4567 * slow things down, as SvPVX_const(ssv) would have been freed 4568 * soon anyway. 4569 * 4570 * We also steal the buffer from a PADTMP (operator target) if it 4571 * is ‘long enough’. For short strings, a swipe does not help 4572 * here, as it causes more malloc calls the next time the target 4573 * is used. Benchmarks show that even if SvPVX_const(dsv) has to 4574 * be allocated it is still not worth swiping PADTMPs for short 4575 * strings, as the savings here are small. 4576 * 4577 * If swiping is not an option, then we see whether it is worth using 4578 * copy-on-write. If the lhs already has a buffer big enough and the 4579 * string is short, we skip it and fall back to method 3, since memcpy 4580 * is faster for short strings than the later bookkeeping overhead that 4581 * copy-on-write entails. 4582 4583 * If the rhs is not a copy-on-write string yet, then we also 4584 * consider whether the buffer is too large relative to the string 4585 * it holds. Some operations such as readline allocate a large 4586 * buffer in the expectation of reusing it. But turning such into 4587 * a COW buffer is counter-productive because it increases memory 4588 * usage by making readline allocate a new large buffer the sec- 4589 * ond time round. So, if the buffer is too large, again, we use 4590 * method 3 (copy). 4591 * 4592 * Finally, if there is no buffer on the left, or the buffer is too 4593 * small, then we use copy-on-write and make both SVs share the 4594 * string buffer. 4595 * 4596 */ 4597 4598 /* Whichever path we take through the next code, we want this true, 4599 and doing it now facilitates the COW check. */ 4600 (void)SvPOK_only(dsv); 4601 4602 if ( 4603 ( /* Either ... */ 4604 /* slated for free anyway (and not COW)? */ 4605 (sflags & (SVs_TEMP|SVf_IsCOW)) == SVs_TEMP 4606 /* or a swipable TARG */ 4607 || ((sflags & 4608 (SVs_PADTMP|SVf_READONLY|SVf_PROTECT|SVf_IsCOW)) 4609 == SVs_PADTMP 4610 /* whose buffer is worth stealing */ 4611 && CHECK_COWBUF_THRESHOLD(cur,len) 4612 ) 4613 ) && 4614 !(sflags & SVf_OOK) && /* and not involved in OOK hack? */ 4615 (!(flags & SV_NOSTEAL)) && 4616 /* and we're allowed to steal temps */ 4617 SvREFCNT(ssv) == 1 && /* and no other references to it? */ 4618 len) /* and really is a string */ 4619 { /* Passes the swipe test. */ 4620 if (SvPVX_const(dsv)) /* we know that dtype >= SVt_PV */ 4621 SvPV_free(dsv); 4622 SvPV_set(dsv, SvPVX_mutable(ssv)); 4623 SvLEN_set(dsv, SvLEN(ssv)); 4624 SvCUR_set(dsv, SvCUR(ssv)); 4625 4626 SvTEMP_off(dsv); 4627 (void)SvOK_off(ssv); /* NOTE: nukes most SvFLAGS on ssv */ 4628 SvPV_set(ssv, NULL); 4629 SvLEN_set(ssv, 0); 4630 SvCUR_set(ssv, 0); 4631 SvTEMP_off(ssv); 4632 } 4633 /* We must check for SvIsCOW_static() even without 4634 * SV_COW_SHARED_HASH_KEYS being set or else we'll break SvIsBOOL() 4635 */ 4636 else if (SvIsCOW_static(ssv)) { 4637 if (SvPVX_const(dsv)) { /* we know that dtype >= SVt_PV */ 4638 SvPV_free(dsv); 4639 } 4640 SvPV_set(dsv, SvPVX(ssv)); 4641 SvLEN_set(dsv, 0); 4642 SvCUR_set(dsv, cur); 4643 SvFLAGS(dsv) |= (SVf_IsCOW|SVppv_STATIC); 4644 } 4645 else if (flags & SV_COW_SHARED_HASH_KEYS 4646 && 4647 #ifdef PERL_COPY_ON_WRITE 4648 (sflags & SVf_IsCOW 4649 ? (!len || 4650 ( (CHECK_COWBUF_THRESHOLD(cur,len) || SvLEN(dsv) < cur+1) 4651 /* If this is a regular (non-hek) COW, only so 4652 many COW "copies" are possible. */ 4653 && CowREFCNT(ssv) != SV_COW_REFCNT_MAX )) 4654 : ( (sflags & CAN_COW_MASK) == CAN_COW_FLAGS 4655 && !(SvFLAGS(dsv) & SVf_BREAK) 4656 && CHECK_COW_THRESHOLD(cur,len) && cur+1 < len 4657 && (CHECK_COWBUF_THRESHOLD(cur,len) || SvLEN(dsv) < cur+1) 4658 )) 4659 #else 4660 sflags & SVf_IsCOW 4661 && !(SvFLAGS(dsv) & SVf_BREAK) 4662 #endif 4663 ) { 4664 /* Either it's a shared hash key, or it's suitable for 4665 copy-on-write. */ 4666 #ifdef DEBUGGING 4667 if (DEBUG_C_TEST) { 4668 PerlIO_printf(Perl_debug_log, "Copy on write: ssv --> dsv\n"); 4669 sv_dump(ssv); 4670 sv_dump(dsv); 4671 } 4672 #endif 4673 #ifdef PERL_ANY_COW 4674 if (!(sflags & SVf_IsCOW)) { 4675 SvIsCOW_on(ssv); 4676 CowREFCNT(ssv) = 0; 4677 } 4678 #endif 4679 if (SvPVX_const(dsv)) { /* we know that dtype >= SVt_PV */ 4680 SvPV_free(dsv); 4681 } 4682 4683 #ifdef PERL_ANY_COW 4684 if (len) { 4685 if (sflags & SVf_IsCOW) { 4686 sv_buf_to_rw(ssv); 4687 } 4688 CowREFCNT(ssv)++; 4689 SvPV_set(dsv, SvPVX_mutable(ssv)); 4690 sv_buf_to_ro(ssv); 4691 } else 4692 #endif 4693 { 4694 /* SvIsCOW_shared_hash */ 4695 DEBUG_C(PerlIO_printf(Perl_debug_log, 4696 "Copy on write: Sharing hash\n")); 4697 4698 assert (SvTYPE(dsv) >= SVt_PV); 4699 SvPV_set(dsv, 4700 HEK_KEY(share_hek_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(ssv))))); 4701 } 4702 SvLEN_set(dsv, len); 4703 SvCUR_set(dsv, cur); 4704 SvIsCOW_on(dsv); 4705 } else { 4706 /* Failed the swipe test, and we cannot do copy-on-write either. 4707 Have to copy the string. */ 4708 SvGROW(dsv, cur + 1); /* inlined from sv_setpvn */ 4709 Move(SvPVX_const(ssv),SvPVX(dsv),cur,char); 4710 SvCUR_set(dsv, cur); 4711 *SvEND(dsv) = '\0'; 4712 } 4713 if (sflags & SVp_NOK) { 4714 SvNV_set(dsv, SvNVX(ssv)); 4715 if ((sflags & SVf_NOK) && !(sflags & SVf_POK)) { 4716 /* Source was SVf_NOK|SVp_NOK|SVp_POK but not SVf_POK, meaning 4717 a value set as floating point and later stringified, where 4718 the value happens to be one of the few that we know aren't 4719 affected by the numeric locale, hence we can cache the 4720 stringification. Currently that's +Inf, -Inf and NaN, but 4721 conceivably we might extend this to -9 .. +9 (excluding -0). 4722 So mark destination the same: */ 4723 SvFLAGS(dsv) &= ~SVf_POK; 4724 } 4725 } 4726 if (sflags & SVp_IOK) { 4727 SvIV_set(dsv, SvIVX(ssv)); 4728 if (sflags & SVf_IVisUV) 4729 SvIsUV_on(dsv); 4730 if ((sflags & SVf_IOK) && !(sflags & SVf_POK)) { 4731 /* Source was SVf_IOK|SVp_IOK|SVp_POK but not SVf_POK, meaning 4732 a value set as an integer and later stringified. So mark 4733 destination the same: */ 4734 SvFLAGS(dsv) &= ~SVf_POK; 4735 } 4736 } 4737 SvFLAGS(dsv) |= sflags & (SVf_IOK|SVp_IOK|SVf_NOK|SVp_NOK|SVf_UTF8); 4738 { 4739 const MAGIC * const smg = SvVSTRING_mg(ssv); 4740 if (smg) { 4741 sv_magic(dsv, NULL, PERL_MAGIC_vstring, 4742 smg->mg_ptr, smg->mg_len); 4743 SvRMAGICAL_on(dsv); 4744 } 4745 } 4746 } 4747 else if (sflags & (SVp_IOK|SVp_NOK)) { 4748 (void)SvOK_off(dsv); 4749 SvFLAGS(dsv) |= sflags & (SVf_IOK|SVp_IOK|SVf_IVisUV|SVf_NOK|SVp_NOK); 4750 if (sflags & SVp_IOK) { 4751 /* XXXX Do we want to set IsUV for IV(ROK)? Be extra safe... */ 4752 SvIV_set(dsv, SvIVX(ssv)); 4753 } 4754 if (sflags & SVp_NOK) { 4755 SvNV_set(dsv, SvNVX(ssv)); 4756 } 4757 } 4758 else { 4759 if (isGV_with_GP(ssv)) { 4760 gv_efullname3(dsv, MUTABLE_GV(ssv), "*"); 4761 } 4762 else 4763 (void)SvOK_off(dsv); 4764 } 4765 if (SvTAINTED(ssv)) 4766 SvTAINT(dsv); 4767 } 4768 4769 4770 /* 4771 =for apidoc sv_set_undef 4772 4773 Equivalent to C<sv_setsv(sv, &PL_sv_undef)>, but more efficient. 4774 Doesn't handle set magic. 4775 4776 The perl equivalent is C<$sv = undef;>. Note that it doesn't free any string 4777 buffer, unlike C<undef $sv>. 4778 4779 Introduced in perl 5.25.12. 4780 4781 =cut 4782 */ 4783 4784 void 4785 Perl_sv_set_undef(pTHX_ SV *sv) 4786 { 4787 U32 type = SvTYPE(sv); 4788 4789 PERL_ARGS_ASSERT_SV_SET_UNDEF; 4790 4791 /* shortcut, NULL, IV, RV */ 4792 4793 if (type <= SVt_IV) { 4794 assert(!SvGMAGICAL(sv)); 4795 if (SvREADONLY(sv)) { 4796 /* does undeffing PL_sv_undef count as modifying a read-only 4797 * variable? Some XS code does this */ 4798 if (sv == &PL_sv_undef) 4799 return; 4800 Perl_croak_no_modify(); 4801 } 4802 4803 if (SvROK(sv)) { 4804 if (SvWEAKREF(sv)) 4805 sv_unref_flags(sv, 0); 4806 else { 4807 SV *rv = SvRV(sv); 4808 SvFLAGS(sv) = type; /* quickly turn off all flags */ 4809 SvREFCNT_dec_NN(rv); 4810 return; 4811 } 4812 } 4813 SvFLAGS(sv) = type; /* quickly turn off all flags */ 4814 return; 4815 } 4816 4817 if (SvIS_FREED(sv)) 4818 Perl_croak(aTHX_ "panic: attempt to undefine a freed scalar %p", 4819 (void *)sv); 4820 4821 SV_CHECK_THINKFIRST_COW_DROP(sv); 4822 4823 if (isGV_with_GP(sv)) 4824 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 4825 "Undefined value assigned to typeglob"); 4826 else 4827 SvOK_off(sv); 4828 } 4829 4830 /* 4831 =for apidoc sv_set_true 4832 4833 Equivalent to C<sv_setsv(sv, &PL_sv_yes)>, but may be made more 4834 efficient in the future. Doesn't handle set magic. 4835 4836 The perl equivalent is C<$sv = !0;>. 4837 4838 Introduced in perl 5.35.11. 4839 4840 =cut 4841 */ 4842 4843 void 4844 Perl_sv_set_true(pTHX_ SV *sv) 4845 { 4846 PERL_ARGS_ASSERT_SV_SET_TRUE; 4847 sv_setsv(sv, &PL_sv_yes); 4848 } 4849 4850 /* 4851 =for apidoc sv_set_false 4852 4853 Equivalent to C<sv_setsv(sv, &PL_sv_no)>, but may be made more 4854 efficient in the future. Doesn't handle set magic. 4855 4856 The perl equivalent is C<$sv = !1;>. 4857 4858 Introduced in perl 5.35.11. 4859 4860 =cut 4861 */ 4862 4863 void 4864 Perl_sv_set_false(pTHX_ SV *sv) 4865 { 4866 PERL_ARGS_ASSERT_SV_SET_FALSE; 4867 sv_setsv(sv, &PL_sv_no); 4868 } 4869 4870 /* 4871 =for apidoc sv_set_bool 4872 4873 Equivalent to C<sv_setsv(sv, bool_val ? &Pl_sv_yes : &PL_sv_no)>, but 4874 may be made more efficient in the future. Doesn't handle set magic. 4875 4876 The perl equivalent is C<$sv = !!$expr;>. 4877 4878 Introduced in perl 5.35.11. 4879 4880 =cut 4881 */ 4882 4883 void 4884 Perl_sv_set_bool(pTHX_ SV *sv, const bool bool_val) 4885 { 4886 PERL_ARGS_ASSERT_SV_SET_BOOL; 4887 sv_setsv(sv, bool_val ? &PL_sv_yes : &PL_sv_no); 4888 } 4889 4890 4891 void 4892 Perl_sv_setsv_mg(pTHX_ SV *const dsv, SV *const ssv) 4893 { 4894 PERL_ARGS_ASSERT_SV_SETSV_MG; 4895 4896 sv_setsv(dsv,ssv); 4897 SvSETMAGIC(dsv); 4898 } 4899 4900 #ifdef PERL_ANY_COW 4901 # define SVt_COW SVt_PV 4902 SV * 4903 Perl_sv_setsv_cow(pTHX_ SV *dsv, SV *ssv) 4904 { 4905 STRLEN cur = SvCUR(ssv); 4906 STRLEN len = SvLEN(ssv); 4907 char *new_pv; 4908 U32 new_flags = (SVt_COW|SVf_POK|SVp_POK|SVf_IsCOW); 4909 #if defined(PERL_DEBUG_READONLY_COW) && defined(PERL_COPY_ON_WRITE) 4910 const bool already = cBOOL(SvIsCOW(ssv)); 4911 #endif 4912 4913 PERL_ARGS_ASSERT_SV_SETSV_COW; 4914 #ifdef DEBUGGING 4915 if (DEBUG_C_TEST) { 4916 PerlIO_printf(Perl_debug_log, "Fast copy on write: %p -> %p\n", 4917 (void*)ssv, (void*)dsv); 4918 sv_dump(ssv); 4919 if (dsv) 4920 sv_dump(dsv); 4921 } 4922 #endif 4923 if (dsv) { 4924 if (SvTHINKFIRST(dsv)) 4925 sv_force_normal_flags(dsv, SV_COW_DROP_PV); 4926 else if (SvPVX_const(dsv)) 4927 Safefree(SvPVX_mutable(dsv)); 4928 SvUPGRADE(dsv, SVt_COW); 4929 } 4930 else 4931 dsv = newSV_type(SVt_COW); 4932 4933 assert (SvPOK(ssv)); 4934 assert (SvPOKp(ssv)); 4935 4936 if (SvIsCOW(ssv)) { 4937 if (SvIsCOW_shared_hash(ssv)) { 4938 /* source is a COW shared hash key. */ 4939 DEBUG_C(PerlIO_printf(Perl_debug_log, 4940 "Fast copy on write: Sharing hash\n")); 4941 new_pv = HEK_KEY(share_hek_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(ssv)))); 4942 goto common_exit; 4943 } 4944 else if (SvIsCOW_static(ssv)) { 4945 /* source is static constant; preserve this */ 4946 new_pv = SvPVX(ssv); 4947 new_flags |= SVppv_STATIC; 4948 goto common_exit; 4949 } 4950 assert(SvCUR(ssv)+1 < SvLEN(ssv)); 4951 assert(CowREFCNT(ssv) < SV_COW_REFCNT_MAX); 4952 } else { 4953 assert ((SvFLAGS(ssv) & CAN_COW_MASK) == CAN_COW_FLAGS); 4954 SvUPGRADE(ssv, SVt_COW); 4955 SvIsCOW_on(ssv); 4956 DEBUG_C(PerlIO_printf(Perl_debug_log, 4957 "Fast copy on write: Converting ssv to COW\n")); 4958 CowREFCNT(ssv) = 0; 4959 } 4960 # ifdef PERL_DEBUG_READONLY_COW 4961 if (already) sv_buf_to_rw(ssv); 4962 # endif 4963 CowREFCNT(ssv)++; 4964 new_pv = SvPVX_mutable(ssv); 4965 sv_buf_to_ro(ssv); 4966 4967 common_exit: 4968 SvPV_set(dsv, new_pv); 4969 SvFLAGS(dsv) = new_flags; 4970 if (SvUTF8(ssv)) 4971 SvUTF8_on(dsv); 4972 SvLEN_set(dsv, len); 4973 SvCUR_set(dsv, cur); 4974 #ifdef DEBUGGING 4975 if (DEBUG_C_TEST) 4976 sv_dump(dsv); 4977 #endif 4978 return dsv; 4979 } 4980 #endif 4981 4982 /* 4983 =for apidoc sv_setpv_bufsize 4984 4985 Sets the SV to be a string of C<cur> bytes length, with at least 4986 C<len> bytes available. Ensures that there is a null byte at C<SvEND>. 4987 4988 Returns a char * pointer to the SvPV buffer. 4989 4990 The caller must set the first C<cur> bytes of C<sv> before the first use of its 4991 contents. This means that if C<cur> is zero, the SV is immediately fully 4992 formed and ready to use, just like any other SV containing an empty string. 4993 4994 =cut 4995 */ 4996 4997 char * 4998 Perl_sv_setpv_bufsize(pTHX_ SV *const sv, const STRLEN cur, const STRLEN len) 4999 { 5000 char *pv; 5001 5002 PERL_ARGS_ASSERT_SV_SETPV_BUFSIZE; 5003 5004 SV_CHECK_THINKFIRST_COW_DROP(sv); 5005 SvUPGRADE(sv, SVt_PV); 5006 pv = SvGROW(sv, len + 1); 5007 SvCUR_set(sv, cur); 5008 *(SvEND(sv))= '\0'; 5009 (void)SvPOK_only_UTF8(sv); /* validate pointer */ 5010 5011 SvTAINT(sv); 5012 if (SvTYPE(sv) == SVt_PVCV) CvAUTOLOAD_off(sv); 5013 return pv; 5014 } 5015 5016 /* 5017 =for apidoc sv_setpv 5018 =for apidoc_item sv_setpv_mg 5019 =for apidoc_item sv_setpvn 5020 =for apidoc_item sv_setpvn_fresh 5021 =for apidoc_item sv_setpvn_mg 5022 =for apidoc_item |void|sv_setpvs|SV* sv|"literal string" 5023 =for apidoc_item |void|sv_setpvs_mg|SV* sv|"literal string" 5024 5025 These copy a string into the SV C<sv>, making sure it is C<L</SvPOK_only>>. 5026 5027 In the C<pvs> forms, the string must be a C literal string, enclosed in double 5028 quotes. 5029 5030 In the C<pvn> forms, the first byte of the string is pointed to by C<ptr>, and 5031 C<len> indicates the number of bytes to be copied, potentially including 5032 embedded C<NUL> characters. 5033 5034 In the plain C<pv> forms, C<ptr> points to a NUL-terminated C string. That is, 5035 it points to the first byte of the string, and the copy proceeds up through the 5036 first encountered C<NUL> byte. 5037 5038 In the forms that take a C<ptr> argument, if it is NULL, the SV will become 5039 undefined. 5040 5041 B<The UTF-8 flag is not changed by these functions.> 5042 5043 A terminating NUL byte is guaranteed in the result. 5044 5045 The C<_mg> forms handle 'set' magic; the other forms skip all magic. 5046 5047 C<sv_setpvn_fresh> is a cut-down alternative to C<sv_setpvn>, intended ONLY 5048 to be used with a fresh sv that has been upgraded to a SVt_PV, SVt_PVIV, 5049 SVt_PVNV, or SVt_PVMG. 5050 5051 =cut 5052 */ 5053 5054 void 5055 Perl_sv_setpvn(pTHX_ SV *const sv, const char *const ptr, const STRLEN len) 5056 { 5057 char *dptr; 5058 5059 PERL_ARGS_ASSERT_SV_SETPVN; 5060 5061 SV_CHECK_THINKFIRST_COW_DROP(sv); 5062 if (isGV_with_GP(sv)) 5063 Perl_croak_no_modify(); 5064 if (!ptr) { 5065 (void)SvOK_off(sv); 5066 return; 5067 } 5068 else { 5069 /* len is STRLEN which is unsigned, need to copy to signed */ 5070 const IV iv = len; 5071 if (iv < 0) 5072 Perl_croak(aTHX_ "panic: sv_setpvn called with negative strlen %" 5073 IVdf, iv); 5074 } 5075 SvUPGRADE(sv, SVt_PV); 5076 5077 dptr = SvGROW(sv, len + 1); 5078 Move(ptr,dptr,len,char); 5079 dptr[len] = '\0'; 5080 SvCUR_set(sv, len); 5081 (void)SvPOK_only_UTF8(sv); /* validate pointer */ 5082 SvTAINT(sv); 5083 if (SvTYPE(sv) == SVt_PVCV) CvAUTOLOAD_off(sv); 5084 } 5085 5086 void 5087 Perl_sv_setpvn_mg(pTHX_ SV *const sv, const char *const ptr, const STRLEN len) 5088 { 5089 PERL_ARGS_ASSERT_SV_SETPVN_MG; 5090 5091 sv_setpvn(sv,ptr,len); 5092 SvSETMAGIC(sv); 5093 } 5094 5095 void 5096 Perl_sv_setpvn_fresh(pTHX_ SV *const sv, const char *const ptr, const STRLEN len) 5097 { 5098 char *dptr; 5099 5100 PERL_ARGS_ASSERT_SV_SETPVN_FRESH; 5101 assert(SvTYPE(sv) >= SVt_PV && SvTYPE(sv) <= SVt_PVMG); 5102 assert(!SvTHINKFIRST(sv)); 5103 assert(!isGV_with_GP(sv)); 5104 5105 if (ptr) { 5106 const IV iv = len; 5107 /* len is STRLEN which is unsigned, need to copy to signed */ 5108 if (iv < 0) 5109 Perl_croak(aTHX_ "panic: sv_setpvn_fresh called with negative strlen %" 5110 IVdf, iv); 5111 5112 dptr = sv_grow_fresh(sv, len + 1); 5113 Move(ptr,dptr,len,char); 5114 dptr[len] = '\0'; 5115 SvCUR_set(sv, len); 5116 SvPOK_on(sv); 5117 SvTAINT(sv); 5118 } 5119 } 5120 5121 void 5122 Perl_sv_setpv(pTHX_ SV *const sv, const char *const ptr) 5123 { 5124 STRLEN len; 5125 5126 PERL_ARGS_ASSERT_SV_SETPV; 5127 5128 SV_CHECK_THINKFIRST_COW_DROP(sv); 5129 if (!ptr) { 5130 (void)SvOK_off(sv); 5131 return; 5132 } 5133 len = strlen(ptr); 5134 SvUPGRADE(sv, SVt_PV); 5135 5136 SvGROW(sv, len + 1); 5137 Move(ptr,SvPVX(sv),len+1,char); 5138 SvCUR_set(sv, len); 5139 (void)SvPOK_only_UTF8(sv); /* validate pointer */ 5140 SvTAINT(sv); 5141 if (SvTYPE(sv) == SVt_PVCV) CvAUTOLOAD_off(sv); 5142 } 5143 5144 void 5145 Perl_sv_setpv_mg(pTHX_ SV *const sv, const char *const ptr) 5146 { 5147 PERL_ARGS_ASSERT_SV_SETPV_MG; 5148 5149 sv_setpv(sv,ptr); 5150 SvSETMAGIC(sv); 5151 } 5152 5153 void 5154 Perl_sv_sethek(pTHX_ SV *const sv, const HEK *const hek) 5155 { 5156 PERL_ARGS_ASSERT_SV_SETHEK; 5157 5158 if (!hek) { 5159 return; 5160 } 5161 5162 if (HEK_LEN(hek) == HEf_SVKEY) { 5163 sv_setsv(sv, *(SV**)HEK_KEY(hek)); 5164 return; 5165 } else { 5166 const int flags = HEK_FLAGS(hek); 5167 if (flags & HVhek_WASUTF8) { 5168 STRLEN utf8_len = HEK_LEN(hek); 5169 char *as_utf8 = (char *)bytes_to_utf8((U8*)HEK_KEY(hek), &utf8_len); 5170 sv_usepvn_flags(sv, as_utf8, utf8_len, SV_HAS_TRAILING_NUL); 5171 SvUTF8_on(sv); 5172 return; 5173 } else if (flags & HVhek_NOTSHARED) { 5174 sv_setpvn(sv, HEK_KEY(hek), HEK_LEN(hek)); 5175 if (HEK_UTF8(hek)) 5176 SvUTF8_on(sv); 5177 else SvUTF8_off(sv); 5178 return; 5179 } 5180 { 5181 SV_CHECK_THINKFIRST_COW_DROP(sv); 5182 SvUPGRADE(sv, SVt_PV); 5183 SvPV_free(sv); 5184 SvPV_set(sv,(char *)HEK_KEY(share_hek_hek(hek))); 5185 SvCUR_set(sv, HEK_LEN(hek)); 5186 SvLEN_set(sv, 0); 5187 SvIsCOW_on(sv); 5188 SvPOK_on(sv); 5189 if (HEK_UTF8(hek)) 5190 SvUTF8_on(sv); 5191 else SvUTF8_off(sv); 5192 return; 5193 } 5194 } 5195 } 5196 5197 5198 /* 5199 =for apidoc sv_usepvn 5200 =for apidoc_item sv_usepvn_flags 5201 =for apidoc_item sv_usepvn_mg 5202 5203 These tell an SV to use C<ptr> for its string value. Normally SVs have 5204 their string stored inside the SV, but these tell the SV to use an 5205 external string instead. 5206 5207 C<ptr> should point to memory that was allocated 5208 by L</C<Newx>>. It must be 5209 the start of a C<Newx>-ed block of memory, and not a pointer to the 5210 middle of it (beware of L<C<OOK>|perlguts/Offsets> and copy-on-write), 5211 and not be from a non-C<Newx> memory allocator like C<malloc>. The 5212 string length, C<len>, must be supplied. By default this function 5213 will L</C<Renew>> (i.e. realloc, move) the memory pointed to by C<ptr>, 5214 so that the pointer should not be freed or used by the programmer after giving 5215 it to C<sv_usepvn>, and neither should any pointers from "behind" that pointer 5216 (I<e.g.>, S<C<ptr> + 1>) be used. 5217 5218 In the C<sv_usepvn_flags> form, if S<C<flags & SV_SMAGIC>> is true, 5219 C<SvSETMAGIC> is called before returning. 5220 And if S<C<flags & SV_HAS_TRAILING_NUL>> is true, then C<ptr[len]> must be 5221 C<NUL>, and the realloc will be skipped (I<i.e.>, the buffer is actually at 5222 least 1 byte longer than C<len>, and already meets the requirements for storing 5223 in C<SvPVX>). 5224 5225 C<sv_usepvn> is merely C<sv_usepvn_flags> with C<flags> set to 0, so 'set' 5226 magic is skipped. 5227 5228 C<sv_usepvn_mg> is merely C<sv_usepvn_flags> with C<flags> set to C<SV_SMAGIC>, 5229 so 'set' magic is performed. 5230 5231 =for apidoc Amnh||SV_SMAGIC 5232 =for apidoc Amnh||SV_HAS_TRAILING_NUL 5233 5234 =cut 5235 */ 5236 5237 void 5238 Perl_sv_usepvn_flags(pTHX_ SV *const sv, char *ptr, const STRLEN len, const U32 flags) 5239 { 5240 STRLEN allocate; 5241 5242 PERL_ARGS_ASSERT_SV_USEPVN_FLAGS; 5243 5244 SV_CHECK_THINKFIRST_COW_DROP(sv); 5245 SvUPGRADE(sv, SVt_PV); 5246 if (!ptr) { 5247 (void)SvOK_off(sv); 5248 if (flags & SV_SMAGIC) 5249 SvSETMAGIC(sv); 5250 return; 5251 } 5252 if (SvPVX_const(sv)) 5253 SvPV_free(sv); 5254 5255 #ifdef DEBUGGING 5256 if (flags & SV_HAS_TRAILING_NUL) 5257 assert(ptr[len] == '\0'); 5258 #endif 5259 5260 allocate = (flags & SV_HAS_TRAILING_NUL) 5261 ? len + 1 : 5262 #ifdef Perl_safesysmalloc_size 5263 len + 1; 5264 #else 5265 PERL_STRLEN_ROUNDUP(len + 1); 5266 #endif 5267 if (flags & SV_HAS_TRAILING_NUL) { 5268 /* It's long enough - do nothing. 5269 Specifically Perl_newCONSTSUB is relying on this. */ 5270 } else { 5271 #ifdef DEBUGGING 5272 /* Force a move to shake out bugs in callers. */ 5273 char *new_ptr = (char*)safemalloc(allocate); 5274 Copy(ptr, new_ptr, len, char); 5275 PoisonFree(ptr,len,char); 5276 Safefree(ptr); 5277 ptr = new_ptr; 5278 #else 5279 ptr = (char*) saferealloc (ptr, allocate); 5280 #endif 5281 } 5282 #ifdef Perl_safesysmalloc_size 5283 SvLEN_set(sv, Perl_safesysmalloc_size(ptr)); 5284 #else 5285 SvLEN_set(sv, allocate); 5286 #endif 5287 SvCUR_set(sv, len); 5288 SvPV_set(sv, ptr); 5289 if (!(flags & SV_HAS_TRAILING_NUL)) { 5290 ptr[len] = '\0'; 5291 } 5292 (void)SvPOK_only_UTF8(sv); /* validate pointer */ 5293 SvTAINT(sv); 5294 if (flags & SV_SMAGIC) 5295 SvSETMAGIC(sv); 5296 } 5297 5298 5299 static void 5300 S_sv_uncow(pTHX_ SV * const sv, const U32 flags) 5301 { 5302 assert(SvIsCOW(sv)); 5303 { 5304 #ifdef PERL_ANY_COW 5305 const char * const pvx = SvPVX_const(sv); 5306 const STRLEN len = SvLEN(sv); 5307 const STRLEN cur = SvCUR(sv); 5308 const bool was_shared_hek = SvIsCOW_shared_hash(sv); 5309 5310 #ifdef DEBUGGING 5311 if (DEBUG_C_TEST) { 5312 PerlIO_printf(Perl_debug_log, 5313 "Copy on write: Force normal %ld\n", 5314 (long) flags); 5315 sv_dump(sv); 5316 } 5317 #endif 5318 SvIsCOW_off(sv); 5319 # ifdef PERL_COPY_ON_WRITE 5320 if (len) { 5321 /* Must do this first, since the CowREFCNT uses SvPVX and 5322 we need to write to CowREFCNT, or de-RO the whole buffer if we are 5323 the only owner left of the buffer. */ 5324 sv_buf_to_rw(sv); /* NOOP if RO-ing not supported */ 5325 { 5326 U8 cowrefcnt = CowREFCNT(sv); 5327 if(cowrefcnt != 0) { 5328 cowrefcnt--; 5329 CowREFCNT(sv) = cowrefcnt; 5330 sv_buf_to_ro(sv); 5331 goto copy_over; 5332 } 5333 } 5334 /* Else we are the only owner of the buffer. */ 5335 } 5336 else 5337 # endif 5338 { 5339 /* This SV doesn't own the buffer, so need to Newx() a new one: */ 5340 copy_over: 5341 SvPV_set(sv, NULL); 5342 SvCUR_set(sv, 0); 5343 SvLEN_set(sv, 0); 5344 if (flags & SV_COW_DROP_PV) { 5345 /* OK, so we don't need to copy our buffer. */ 5346 SvPOK_off(sv); 5347 } else { 5348 SvGROW(sv, cur + 1); 5349 Move(pvx,SvPVX(sv),cur,char); 5350 SvCUR_set(sv, cur); 5351 *SvEND(sv) = '\0'; 5352 } 5353 if (was_shared_hek) { 5354 unshare_hek(SvSHARED_HEK_FROM_PV(pvx)); 5355 } 5356 #ifdef DEBUGGING 5357 if (DEBUG_C_TEST) 5358 sv_dump(sv); 5359 #endif 5360 } 5361 #else 5362 const char * const pvx = SvPVX_const(sv); 5363 const STRLEN len = SvCUR(sv); 5364 SvIsCOW_off(sv); 5365 SvPV_set(sv, NULL); 5366 SvLEN_set(sv, 0); 5367 if (flags & SV_COW_DROP_PV) { 5368 /* OK, so we don't need to copy our buffer. */ 5369 SvPOK_off(sv); 5370 } else { 5371 SvGROW(sv, len + 1); 5372 Move(pvx,SvPVX(sv),len,char); 5373 *SvEND(sv) = '\0'; 5374 } 5375 unshare_hek(SvSHARED_HEK_FROM_PV(pvx)); 5376 #endif 5377 } 5378 } 5379 5380 5381 /* 5382 =for apidoc sv_force_normal_flags 5383 5384 Undo various types of fakery on an SV, where fakery means 5385 "more than" a string: if the PV is a shared string, make 5386 a private copy; if we're a ref, stop refing; if we're a glob, downgrade to 5387 an C<xpvmg>; if we're a copy-on-write scalar, this is the on-write time when 5388 we do the copy, and is also used locally; if this is a 5389 vstring, drop the vstring magic. If C<SV_COW_DROP_PV> is set 5390 then a copy-on-write scalar drops its PV buffer (if any) and becomes 5391 C<SvPOK_off> rather than making a copy. (Used where this 5392 scalar is about to be set to some other value.) In addition, 5393 the C<flags> parameter gets passed to C<sv_unref_flags()> 5394 when unreffing. C<sv_force_normal> calls this function 5395 with flags set to 0. 5396 5397 This function is expected to be used to signal to perl that this SV is 5398 about to be written to, and any extra book-keeping needs to be taken care 5399 of. Hence, it croaks on read-only values. 5400 5401 =for apidoc Amnh||SV_COW_DROP_PV 5402 5403 =cut 5404 */ 5405 5406 void 5407 Perl_sv_force_normal_flags(pTHX_ SV *const sv, const U32 flags) 5408 { 5409 PERL_ARGS_ASSERT_SV_FORCE_NORMAL_FLAGS; 5410 5411 if (SvREADONLY(sv)) 5412 Perl_croak_no_modify(); 5413 else if (SvIsCOW(sv) && LIKELY(SvTYPE(sv) != SVt_PVHV)) 5414 S_sv_uncow(aTHX_ sv, flags); 5415 if (SvROK(sv)) 5416 sv_unref_flags(sv, flags); 5417 else if (SvFAKE(sv) && isGV_with_GP(sv)) 5418 sv_unglob(sv, flags); 5419 else if (SvFAKE(sv) && isREGEXP(sv)) { 5420 /* Need to downgrade the REGEXP to a simple(r) scalar. This is analogous 5421 to sv_unglob. We only need it here, so inline it. */ 5422 const bool islv = SvTYPE(sv) == SVt_PVLV; 5423 const svtype new_type = 5424 islv ? SVt_NULL : SvMAGIC(sv) || SvSTASH(sv) ? SVt_PVMG : SVt_PV; 5425 SV *const temp = newSV_type(new_type); 5426 regexp *old_rx_body; 5427 5428 if (new_type == SVt_PVMG) { 5429 SvMAGIC_set(temp, SvMAGIC(sv)); 5430 SvMAGIC_set(sv, NULL); 5431 SvSTASH_set(temp, SvSTASH(sv)); 5432 SvSTASH_set(sv, NULL); 5433 } 5434 if (!islv) 5435 SvCUR_set(temp, SvCUR(sv)); 5436 /* Remember that SvPVX is in the head, not the body. */ 5437 assert(ReANY((REGEXP *)sv)->mother_re); 5438 5439 if (islv) { 5440 /* LV-as-regex has sv->sv_any pointing to an XPVLV body, 5441 * whose xpvlenu_rx field points to the regex body */ 5442 XPV *xpv = (XPV*)(SvANY(sv)); 5443 old_rx_body = xpv->xpv_len_u.xpvlenu_rx; 5444 xpv->xpv_len_u.xpvlenu_rx = NULL; 5445 } 5446 else 5447 old_rx_body = ReANY((REGEXP *)sv); 5448 5449 /* Their buffer is already owned by someone else. */ 5450 if (flags & SV_COW_DROP_PV) { 5451 /* SvLEN is already 0. For SVt_REGEXP, we have a brand new 5452 zeroed body. For SVt_PVLV, we zeroed it above (len field 5453 a union with xpvlenu_rx) */ 5454 assert(!SvLEN(islv ? sv : temp)); 5455 sv->sv_u.svu_pv = 0; 5456 } 5457 else { 5458 sv->sv_u.svu_pv = savepvn(RX_WRAPPED((REGEXP *)sv), SvCUR(sv)); 5459 SvLEN_set(islv ? sv : temp, SvCUR(sv)+1); 5460 SvPOK_on(sv); 5461 } 5462 5463 /* Now swap the rest of the bodies. */ 5464 5465 SvFAKE_off(sv); 5466 if (!islv) { 5467 SvFLAGS(sv) &= ~SVTYPEMASK; 5468 SvFLAGS(sv) |= new_type; 5469 SvANY(sv) = SvANY(temp); 5470 } 5471 5472 SvFLAGS(temp) &= ~(SVTYPEMASK); 5473 SvFLAGS(temp) |= SVt_REGEXP|SVf_FAKE; 5474 SvANY(temp) = old_rx_body; 5475 5476 /* temp is now rebuilt as a correctly structured SVt_REGEXP, so this 5477 * will trigger a call to sv_clear() which will correctly free the 5478 * body. */ 5479 SvREFCNT_dec_NN(temp); 5480 } 5481 else if (SvVOK(sv)) sv_unmagic(sv, PERL_MAGIC_vstring); 5482 } 5483 5484 /* 5485 =for apidoc sv_chop 5486 5487 Efficient removal of characters from the beginning of the string buffer. 5488 C<SvPOK(sv)>, or at least C<SvPOKp(sv)>, must be true and C<ptr> must be a 5489 pointer to somewhere inside the string buffer. C<ptr> becomes the first 5490 character of the adjusted string. Uses the C<OOK> hack. On return, only 5491 C<SvPOK(sv)> and C<SvPOKp(sv)> among the C<OK> flags will be true. 5492 5493 Beware: after this function returns, C<ptr> and SvPVX_const(sv) may no longer 5494 refer to the same chunk of data. 5495 5496 The unfortunate similarity of this function's name to that of Perl's C<chop> 5497 operator is strictly coincidental. This function works from the left; 5498 C<chop> works from the right. 5499 5500 =cut 5501 */ 5502 5503 void 5504 Perl_sv_chop(pTHX_ SV *const sv, const char *const ptr) 5505 { 5506 STRLEN delta; 5507 STRLEN old_delta; 5508 U8 *p; 5509 #ifdef DEBUGGING 5510 const U8 *evacp; 5511 STRLEN evacn; 5512 #endif 5513 STRLEN max_delta; 5514 5515 PERL_ARGS_ASSERT_SV_CHOP; 5516 5517 if (!ptr || !SvPOKp(sv)) 5518 return; 5519 delta = ptr - SvPVX_const(sv); 5520 if (!delta) { 5521 /* Nothing to do. */ 5522 return; 5523 } 5524 max_delta = SvLEN(sv) ? SvLEN(sv) : SvCUR(sv); 5525 if (delta > max_delta) 5526 Perl_croak(aTHX_ "panic: sv_chop ptr=%p, start=%p, end=%p", 5527 ptr, SvPVX_const(sv), SvPVX_const(sv) + max_delta); 5528 /* SvPVX(sv) may move in SV_CHECK_THINKFIRST(sv), so don't use ptr any more */ 5529 SV_CHECK_THINKFIRST(sv); 5530 SvPOK_only_UTF8(sv); 5531 5532 if (!SvOOK(sv)) { 5533 if (!SvLEN(sv)) { /* make copy of shared string */ 5534 const char *pvx = SvPVX_const(sv); 5535 const STRLEN len = SvCUR(sv); 5536 SvGROW(sv, len + 1); 5537 Move(pvx,SvPVX(sv),len,char); 5538 *SvEND(sv) = '\0'; 5539 } 5540 SvOOK_on(sv); 5541 old_delta = 0; 5542 } else { 5543 SvOOK_offset(sv, old_delta); 5544 } 5545 SvLEN_set(sv, SvLEN(sv) - delta); 5546 SvCUR_set(sv, SvCUR(sv) - delta); 5547 SvPV_set(sv, SvPVX(sv) + delta); 5548 5549 p = (U8 *)SvPVX_const(sv); 5550 5551 #ifdef DEBUGGING 5552 /* how many bytes were evacuated? we will fill them with sentinel 5553 bytes, except for the part holding the new offset of course. */ 5554 evacn = delta; 5555 if (old_delta) 5556 evacn += (old_delta < 0x100 ? 1 : 1 + sizeof(STRLEN)); 5557 assert(evacn); 5558 assert(evacn <= delta + old_delta); 5559 evacp = p - evacn; 5560 #endif 5561 5562 /* This sets 'delta' to the accumulated value of all deltas so far */ 5563 delta += old_delta; 5564 assert(delta); 5565 5566 /* If 'delta' fits in a byte, store it just prior to the new beginning of 5567 * the string; otherwise store a 0 byte there and store 'delta' just prior 5568 * to that, using as many bytes as a STRLEN occupies. Thus it overwrites a 5569 * portion of the chopped part of the string */ 5570 if (delta < 0x100) { 5571 *--p = (U8) delta; 5572 } else { 5573 *--p = 0; 5574 p -= sizeof(STRLEN); 5575 Copy((U8*)&delta, p, sizeof(STRLEN), U8); 5576 } 5577 5578 #ifdef DEBUGGING 5579 /* Fill the preceding buffer with sentinals to verify that no-one is 5580 using it. */ 5581 while (p > evacp) { 5582 --p; 5583 *p = (U8)PTR2UV(p); 5584 } 5585 #endif 5586 } 5587 5588 /* 5589 =for apidoc sv_catpvn 5590 =for apidoc_item sv_catpvn_flags 5591 =for apidoc_item sv_catpvn_mg 5592 =for apidoc_item sv_catpvn_nomg 5593 5594 These concatenate the C<len> bytes of the string beginning at C<ptr> onto the 5595 end of the string which is in C<dsv>. The caller must make sure C<ptr> 5596 contains at least C<len> bytes. 5597 5598 For all but C<sv_catpvn_flags>, the string appended is assumed to be valid 5599 UTF-8 if the SV has the UTF-8 status set, and a string of bytes otherwise. 5600 5601 They differ in that: 5602 5603 C<sv_catpvn_mg> performs both 'get' and 'set' magic on C<dsv>. 5604 5605 C<sv_catpvn> performs only 'get' magic. 5606 5607 C<sv_catpvn_nomg> skips all magic. 5608 5609 C<sv_catpvn_flags> has an extra C<flags> parameter which allows you to specify 5610 any combination of magic handling (using C<SV_GMAGIC> and/or C<SV_SMAGIC>) and 5611 to also override the UTF-8 handling. By supplying the C<SV_CATBYTES> flag, the 5612 appended string is interpreted as plain bytes; by supplying instead the 5613 C<SV_CATUTF8> flag, it will be interpreted as UTF-8, and the C<dsv> will be 5614 upgraded to UTF-8 if necessary. 5615 5616 C<sv_catpvn>, C<sv_catpvn_mg>, and C<sv_catpvn_nomg> are implemented 5617 in terms of C<sv_catpvn_flags>. 5618 5619 =for apidoc Amnh||SV_CATUTF8 5620 =for apidoc Amnh||SV_CATBYTES 5621 5622 =cut 5623 */ 5624 5625 void 5626 Perl_sv_catpvn_flags(pTHX_ SV *const dsv, const char *sstr, const STRLEN slen, const I32 flags) 5627 { 5628 STRLEN dlen; 5629 const char * const dstr = SvPV_force_flags(dsv, dlen, flags); 5630 5631 PERL_ARGS_ASSERT_SV_CATPVN_FLAGS; 5632 assert((flags & (SV_CATBYTES|SV_CATUTF8)) != (SV_CATBYTES|SV_CATUTF8)); 5633 5634 if (!(flags & SV_CATBYTES) || !SvUTF8(dsv)) { 5635 if (flags & SV_CATUTF8 && !SvUTF8(dsv)) { 5636 sv_utf8_upgrade_flags_grow(dsv, 0, slen + 1); 5637 dlen = SvCUR(dsv); 5638 } 5639 else SvGROW(dsv, dlen + slen + 3); 5640 if (sstr == dstr) 5641 sstr = SvPVX_const(dsv); 5642 Move(sstr, SvPVX(dsv) + dlen, slen, char); 5643 SvCUR_set(dsv, SvCUR(dsv) + slen); 5644 } 5645 else { 5646 /* We inline bytes_to_utf8, to avoid an extra malloc. */ 5647 const char * const send = sstr + slen; 5648 U8 *d; 5649 5650 /* Something this code does not account for, which I think is 5651 impossible; it would require the same pv to be treated as 5652 bytes *and* utf8, which would indicate a bug elsewhere. */ 5653 assert(sstr != dstr); 5654 5655 SvGROW(dsv, dlen + slen * 2 + 3); 5656 d = (U8 *)SvPVX(dsv) + dlen; 5657 5658 while (sstr < send) { 5659 append_utf8_from_native_byte(*sstr, &d); 5660 sstr++; 5661 } 5662 SvCUR_set(dsv, d-(const U8 *)SvPVX(dsv)); 5663 } 5664 *SvEND(dsv) = '\0'; 5665 (void)SvPOK_only_UTF8(dsv); /* validate pointer */ 5666 SvTAINT(dsv); 5667 if (flags & SV_SMAGIC) 5668 SvSETMAGIC(dsv); 5669 } 5670 5671 /* 5672 =for apidoc sv_catsv 5673 =for apidoc_item sv_catsv_flags 5674 =for apidoc_item sv_catsv_mg 5675 =for apidoc_item sv_catsv_nomg 5676 5677 These concatenate the string from SV C<sstr> onto the end of the string in SV 5678 C<dsv>. If C<sstr> is null, these are no-ops; otherwise only C<dsv> is 5679 modified. 5680 5681 They differ only in what magic they perform: 5682 5683 C<sv_catsv_mg> performs 'get' magic on both SVs before the copy, and 'set' magic 5684 on C<dsv> afterwards. 5685 5686 C<sv_catsv> performs just 'get' magic, on both SVs. 5687 5688 C<sv_catsv_nomg> skips all magic. 5689 5690 C<sv_catsv_flags> has an extra C<flags> parameter which allows you to use 5691 C<SV_GMAGIC> and/or C<SV_SMAGIC> to specify any combination of magic handling 5692 (although either both or neither SV will have 'get' magic applied to it.) 5693 5694 C<sv_catsv>, C<sv_catsv_mg>, and C<sv_catsv_nomg> are implemented 5695 in terms of C<sv_catsv_flags>. 5696 5697 =cut */ 5698 5699 void 5700 Perl_sv_catsv_flags(pTHX_ SV *const dsv, SV *const sstr, const I32 flags) 5701 { 5702 PERL_ARGS_ASSERT_SV_CATSV_FLAGS; 5703 5704 if (sstr) { 5705 STRLEN slen; 5706 const char *spv = SvPV_flags_const(sstr, slen, flags); 5707 if (flags & SV_GMAGIC) 5708 SvGETMAGIC(dsv); 5709 sv_catpvn_flags(dsv, spv, slen, 5710 DO_UTF8(sstr) ? SV_CATUTF8 : SV_CATBYTES); 5711 if (flags & SV_SMAGIC) 5712 SvSETMAGIC(dsv); 5713 } 5714 } 5715 5716 /* 5717 =for apidoc sv_catpv 5718 =for apidoc_item sv_catpv_flags 5719 =for apidoc_item sv_catpv_mg 5720 =for apidoc_item sv_catpv_nomg 5721 5722 These concatenate the C<NUL>-terminated string C<sstr> onto the end of the 5723 string which is in the SV. 5724 If the SV has the UTF-8 status set, then the bytes appended should be 5725 valid UTF-8. 5726 5727 They differ only in how they handle magic: 5728 5729 C<sv_catpv_mg> performs both 'get' and 'set' magic. 5730 5731 C<sv_catpv> performs only 'get' magic. 5732 5733 C<sv_catpv_nomg> skips all magic. 5734 5735 C<sv_catpv_flags> has an extra C<flags> parameter which allows you to specify 5736 any combination of magic handling (using C<SV_GMAGIC> and/or C<SV_SMAGIC>), and 5737 to also override the UTF-8 handling. By supplying the C<SV_CATUTF8> flag, the 5738 appended string is forced to be interpreted as UTF-8; by supplying instead the 5739 C<SV_CATBYTES> flag, it will be interpreted as just bytes. Either the SV or 5740 the string appended will be upgraded to UTF-8 if necessary. 5741 5742 =cut 5743 */ 5744 5745 void 5746 Perl_sv_catpv(pTHX_ SV *const dsv, const char *sstr) 5747 { 5748 STRLEN len; 5749 STRLEN tlen; 5750 char *junk; 5751 5752 PERL_ARGS_ASSERT_SV_CATPV; 5753 5754 if (!sstr) 5755 return; 5756 junk = SvPV_force(dsv, tlen); 5757 len = strlen(sstr); 5758 SvGROW(dsv, tlen + len + 1); 5759 if (sstr == junk) 5760 sstr = SvPVX_const(dsv); 5761 Move(sstr,SvPVX(dsv)+tlen,len+1,char); 5762 SvCUR_set(dsv, SvCUR(dsv) + len); 5763 (void)SvPOK_only_UTF8(dsv); /* validate pointer */ 5764 SvTAINT(dsv); 5765 } 5766 5767 void 5768 Perl_sv_catpv_flags(pTHX_ SV *dsv, const char *sstr, const I32 flags) 5769 { 5770 PERL_ARGS_ASSERT_SV_CATPV_FLAGS; 5771 sv_catpvn_flags(dsv, sstr, strlen(sstr), flags); 5772 } 5773 5774 void 5775 Perl_sv_catpv_mg(pTHX_ SV *const dsv, const char *const sstr) 5776 { 5777 PERL_ARGS_ASSERT_SV_CATPV_MG; 5778 5779 sv_catpv(dsv,sstr); 5780 SvSETMAGIC(dsv); 5781 } 5782 5783 /* 5784 =for apidoc newSV 5785 5786 Creates a new SV. A non-zero C<len> parameter indicates the number of 5787 bytes of preallocated string space the SV should have. An extra byte for a 5788 trailing C<NUL> is also reserved. (C<SvPOK> is not set for the SV even if string 5789 space is allocated.) The reference count for the new SV is set to 1. 5790 5791 In 5.9.3, C<newSV()> replaces the older C<NEWSV()> API, and drops the first 5792 parameter, I<x>, a debug aid which allowed callers to identify themselves. 5793 This aid has been superseded by a new build option, C<PERL_MEM_LOG> (see 5794 L<perlhacktips/PERL_MEM_LOG>). The older API is still there for use in XS 5795 modules supporting older perls. 5796 5797 =cut 5798 */ 5799 5800 SV * 5801 Perl_newSV(pTHX_ const STRLEN len) 5802 { 5803 SV *sv; 5804 5805 if (!len) 5806 new_SV(sv); 5807 else { 5808 sv = newSV_type(SVt_PV); 5809 sv_grow_fresh(sv, len + 1); 5810 } 5811 return sv; 5812 } 5813 /* 5814 =for apidoc sv_magicext 5815 5816 Adds magic to an SV, upgrading it if necessary. Applies the 5817 supplied C<vtable> and returns a pointer to the magic added. 5818 5819 Note that C<sv_magicext> will allow things that C<sv_magic> will not. 5820 In particular, you can add magic to C<SvREADONLY> SVs, and add more than 5821 one instance of the same C<how>. 5822 5823 If C<namlen> is greater than zero then a C<savepvn> I<copy> of C<name> is 5824 stored, if C<namlen> is zero then C<name> is stored as-is and - as another 5825 special case - if C<(name && namlen == HEf_SVKEY)> then C<name> is assumed 5826 to contain an SV* and is stored as-is with its C<REFCNT> incremented. 5827 5828 (This is now used as a subroutine by C<sv_magic>.) 5829 5830 =cut 5831 */ 5832 MAGIC * 5833 Perl_sv_magicext(pTHX_ SV *const sv, SV *const obj, const int how, 5834 const MGVTBL *const vtable, const char *const name, const I32 namlen) 5835 { 5836 MAGIC* mg; 5837 5838 PERL_ARGS_ASSERT_SV_MAGICEXT; 5839 5840 SvUPGRADE(sv, SVt_PVMG); 5841 Newxz(mg, 1, MAGIC); 5842 mg->mg_moremagic = SvMAGIC(sv); 5843 SvMAGIC_set(sv, mg); 5844 5845 /* Sometimes a magic contains a reference loop, where the sv and 5846 object refer to each other. To prevent a reference loop that 5847 would prevent such objects being freed, we look for such loops 5848 and if we find one we avoid incrementing the object refcount. 5849 5850 Note we cannot do this to avoid self-tie loops as intervening RV must 5851 have its REFCNT incremented to keep it in existence. 5852 5853 */ 5854 if (!obj || obj == sv || 5855 how == PERL_MAGIC_arylen || 5856 how == PERL_MAGIC_regdata || 5857 how == PERL_MAGIC_regdatum || 5858 how == PERL_MAGIC_symtab || 5859 (SvTYPE(obj) == SVt_PVGV && 5860 (GvSV(obj) == sv || GvHV(obj) == (const HV *)sv 5861 || GvAV(obj) == (const AV *)sv || GvCV(obj) == (const CV *)sv 5862 || GvIOp(obj) == (const IO *)sv || GvFORM(obj) == (const CV *)sv))) 5863 { 5864 mg->mg_obj = obj; 5865 } 5866 else { 5867 mg->mg_obj = SvREFCNT_inc_simple(obj); 5868 mg->mg_flags |= MGf_REFCOUNTED; 5869 } 5870 5871 /* Normal self-ties simply pass a null object, and instead of 5872 using mg_obj directly, use the SvTIED_obj macro to produce a 5873 new RV as needed. For glob "self-ties", we are tieing the PVIO 5874 with an RV obj pointing to the glob containing the PVIO. In 5875 this case, to avoid a reference loop, we need to weaken the 5876 reference. 5877 */ 5878 5879 if (how == PERL_MAGIC_tiedscalar && SvTYPE(sv) == SVt_PVIO && 5880 obj && SvROK(obj) && GvIO(SvRV(obj)) == (const IO *)sv) 5881 { 5882 sv_rvweaken(obj); 5883 } 5884 5885 mg->mg_type = how; 5886 mg->mg_len = namlen; 5887 if (name) { 5888 if (namlen > 0) 5889 mg->mg_ptr = savepvn(name, namlen); 5890 else if (namlen == HEf_SVKEY) { 5891 /* Yes, this is casting away const. This is only for the case of 5892 HEf_SVKEY. I think we need to document this aberration of the 5893 constness of the API, rather than making name non-const, as 5894 that change propagating outwards a long way. */ 5895 mg->mg_ptr = (char*)SvREFCNT_inc_simple_NN((SV *)name); 5896 } else 5897 mg->mg_ptr = (char *) name; 5898 } 5899 mg->mg_virtual = (MGVTBL *) vtable; 5900 5901 mg_magical(sv); 5902 return mg; 5903 } 5904 5905 MAGIC * 5906 Perl_sv_magicext_mglob(pTHX_ SV *sv) 5907 { 5908 PERL_ARGS_ASSERT_SV_MAGICEXT_MGLOB; 5909 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') { 5910 /* This sv is only a delegate. //g magic must be attached to 5911 its target. */ 5912 vivify_defelem(sv); 5913 sv = LvTARG(sv); 5914 } 5915 return sv_magicext(sv, NULL, PERL_MAGIC_regex_global, 5916 &PL_vtbl_mglob, 0, 0); 5917 } 5918 5919 /* 5920 =for apidoc sv_magic 5921 5922 Adds magic to an SV. First upgrades C<sv> to type C<SVt_PVMG> if 5923 necessary, then adds a new magic item of type C<how> to the head of the 5924 magic list. 5925 5926 See C<L</sv_magicext>> (which C<sv_magic> now calls) for a description of the 5927 handling of the C<name> and C<namlen> arguments. 5928 5929 You need to use C<sv_magicext> to add magic to C<SvREADONLY> SVs and also 5930 to add more than one instance of the same C<how>. 5931 5932 =cut 5933 */ 5934 5935 void 5936 Perl_sv_magic(pTHX_ SV *const sv, SV *const obj, const int how, 5937 const char *const name, const I32 namlen) 5938 { 5939 const MGVTBL *vtable; 5940 MAGIC* mg; 5941 unsigned int flags; 5942 unsigned int vtable_index; 5943 5944 PERL_ARGS_ASSERT_SV_MAGIC; 5945 5946 if (how < 0 || (unsigned)how >= C_ARRAY_LENGTH(PL_magic_data) 5947 || ((flags = PL_magic_data[how]), 5948 (vtable_index = flags & PERL_MAGIC_VTABLE_MASK) 5949 > magic_vtable_max)) 5950 Perl_croak(aTHX_ "Don't know how to handle magic of type \\%o", how); 5951 5952 /* PERL_MAGIC_ext is reserved for use by extensions not perl internals. 5953 Useful for attaching extension internal data to perl vars. 5954 Note that multiple extensions may clash if magical scalars 5955 etc holding private data from one are passed to another. */ 5956 5957 vtable = (vtable_index == magic_vtable_max) 5958 ? NULL : PL_magic_vtables + vtable_index; 5959 5960 if (SvREADONLY(sv)) { 5961 if ( 5962 !PERL_MAGIC_TYPE_READONLY_ACCEPTABLE(how) 5963 ) 5964 { 5965 Perl_croak_no_modify(); 5966 } 5967 } 5968 if (SvMAGICAL(sv) || (how == PERL_MAGIC_taint && SvTYPE(sv) >= SVt_PVMG)) { 5969 if (SvMAGIC(sv) && (mg = mg_find(sv, how))) { 5970 /* sv_magic() refuses to add a magic of the same 'how' as an 5971 existing one 5972 */ 5973 if (how == PERL_MAGIC_taint) 5974 mg->mg_len |= 1; 5975 return; 5976 } 5977 } 5978 5979 /* Rest of work is done else where */ 5980 mg = sv_magicext(sv,obj,how,vtable,name,namlen); 5981 5982 switch (how) { 5983 case PERL_MAGIC_taint: 5984 mg->mg_len = 1; 5985 break; 5986 case PERL_MAGIC_ext: 5987 case PERL_MAGIC_dbfile: 5988 SvRMAGICAL_on(sv); 5989 break; 5990 } 5991 } 5992 5993 static int 5994 S_sv_unmagicext_flags(pTHX_ SV *const sv, const int type, const MGVTBL *vtbl, const U32 flags) 5995 { 5996 MAGIC* mg; 5997 MAGIC** mgp; 5998 5999 assert(flags <= 1); 6000 6001 if (SvTYPE(sv) < SVt_PVMG || !SvMAGIC(sv)) 6002 return 0; 6003 mgp = &(((XPVMG*) SvANY(sv))->xmg_u.xmg_magic); 6004 for (mg = *mgp; mg; mg = *mgp) { 6005 const MGVTBL* const virt = mg->mg_virtual; 6006 if (mg->mg_type == type && (!flags || virt == vtbl)) { 6007 *mgp = mg->mg_moremagic; 6008 if (virt && virt->svt_free) 6009 virt->svt_free(aTHX_ sv, mg); 6010 if (mg->mg_ptr && mg->mg_type != PERL_MAGIC_regex_global) { 6011 if (mg->mg_len > 0) 6012 Safefree(mg->mg_ptr); 6013 else if (mg->mg_len == HEf_SVKEY) 6014 SvREFCNT_dec(MUTABLE_SV(mg->mg_ptr)); 6015 else if (mg->mg_type == PERL_MAGIC_utf8) 6016 Safefree(mg->mg_ptr); 6017 } 6018 if (mg->mg_flags & MGf_REFCOUNTED) 6019 SvREFCNT_dec(mg->mg_obj); 6020 Safefree(mg); 6021 } 6022 else 6023 mgp = &mg->mg_moremagic; 6024 } 6025 if (SvMAGIC(sv)) { 6026 if (SvMAGICAL(sv)) /* if we're under save_magic, wait for restore_magic; */ 6027 mg_magical(sv); /* else fix the flags now */ 6028 } 6029 else 6030 SvMAGICAL_off(sv); 6031 6032 return 0; 6033 } 6034 6035 /* 6036 =for apidoc sv_unmagic 6037 6038 Removes all magic of type C<type> from an SV. 6039 6040 =cut 6041 */ 6042 6043 int 6044 Perl_sv_unmagic(pTHX_ SV *const sv, const int type) 6045 { 6046 PERL_ARGS_ASSERT_SV_UNMAGIC; 6047 return S_sv_unmagicext_flags(aTHX_ sv, type, NULL, 0); 6048 } 6049 6050 /* 6051 =for apidoc sv_unmagicext 6052 6053 Removes all magic of type C<type> with the specified C<vtbl> from an SV. 6054 6055 =cut 6056 */ 6057 6058 int 6059 Perl_sv_unmagicext(pTHX_ SV *const sv, const int type, const MGVTBL *vtbl) 6060 { 6061 PERL_ARGS_ASSERT_SV_UNMAGICEXT; 6062 return S_sv_unmagicext_flags(aTHX_ sv, type, vtbl, 1); 6063 } 6064 6065 /* 6066 =for apidoc sv_rvweaken 6067 6068 Weaken a reference: set the C<SvWEAKREF> flag on this RV; give the 6069 referred-to SV C<PERL_MAGIC_backref> magic if it hasn't already; and 6070 push a back-reference to this RV onto the array of backreferences 6071 associated with that magic. If the RV is magical, set magic will be 6072 called after the RV is cleared. Silently ignores C<undef> and warns 6073 on already-weak references. 6074 6075 =cut 6076 */ 6077 6078 SV * 6079 Perl_sv_rvweaken(pTHX_ SV *const sv) 6080 { 6081 SV *tsv; 6082 6083 PERL_ARGS_ASSERT_SV_RVWEAKEN; 6084 6085 if (!SvOK(sv)) /* let undefs pass */ 6086 return sv; 6087 if (!SvROK(sv)) 6088 Perl_croak(aTHX_ "Can't weaken a nonreference"); 6089 else if (SvWEAKREF(sv)) { 6090 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Reference is already weak"); 6091 return sv; 6092 } 6093 else if (SvREADONLY(sv)) croak_no_modify(); 6094 tsv = SvRV(sv); 6095 Perl_sv_add_backref(aTHX_ tsv, sv); 6096 SvWEAKREF_on(sv); 6097 SvREFCNT_dec_NN(tsv); 6098 return sv; 6099 } 6100 6101 /* 6102 =for apidoc sv_rvunweaken 6103 6104 Unweaken a reference: Clear the C<SvWEAKREF> flag on this RV; remove 6105 the backreference to this RV from the array of backreferences 6106 associated with the target SV, increment the refcount of the target. 6107 Silently ignores C<undef> and warns on non-weak references. 6108 6109 =cut 6110 */ 6111 6112 SV * 6113 Perl_sv_rvunweaken(pTHX_ SV *const sv) 6114 { 6115 SV *tsv; 6116 6117 PERL_ARGS_ASSERT_SV_RVUNWEAKEN; 6118 6119 if (!SvOK(sv)) /* let undefs pass */ 6120 return sv; 6121 if (!SvROK(sv)) 6122 Perl_croak(aTHX_ "Can't unweaken a nonreference"); 6123 else if (!SvWEAKREF(sv)) { 6124 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Reference is not weak"); 6125 return sv; 6126 } 6127 else if (SvREADONLY(sv)) croak_no_modify(); 6128 6129 tsv = SvRV(sv); 6130 SvWEAKREF_off(sv); 6131 SvROK_on(sv); 6132 SvREFCNT_inc_NN(tsv); 6133 Perl_sv_del_backref(aTHX_ tsv, sv); 6134 return sv; 6135 } 6136 6137 /* 6138 =for apidoc sv_get_backrefs 6139 6140 If C<sv> is the target of a weak reference then it returns the back 6141 references structure associated with the sv; otherwise return C<NULL>. 6142 6143 When returning a non-null result the type of the return is relevant. If it 6144 is an AV then the elements of the AV are the weak reference RVs which 6145 point at this item. If it is any other type then the item itself is the 6146 weak reference. 6147 6148 See also C<Perl_sv_add_backref()>, C<Perl_sv_del_backref()>, 6149 C<Perl_sv_kill_backrefs()> 6150 6151 =cut 6152 */ 6153 6154 SV * 6155 Perl_sv_get_backrefs(SV *const sv) 6156 { 6157 SV *backrefs= NULL; 6158 6159 PERL_ARGS_ASSERT_SV_GET_BACKREFS; 6160 6161 /* find slot to store array or singleton backref */ 6162 6163 if (SvTYPE(sv) == SVt_PVHV) { 6164 if (HvHasAUX(sv)) { 6165 struct xpvhv_aux * const iter = HvAUX((HV *)sv); 6166 backrefs = (SV *)iter->xhv_backreferences; 6167 } 6168 } else if (SvMAGICAL(sv)) { 6169 MAGIC *mg = mg_find(sv, PERL_MAGIC_backref); 6170 if (mg) 6171 backrefs = mg->mg_obj; 6172 } 6173 return backrefs; 6174 } 6175 6176 /* Give tsv backref magic if it hasn't already got it, then push a 6177 * back-reference to sv onto the array associated with the backref magic. 6178 * 6179 * As an optimisation, if there's only one backref and it's not an AV, 6180 * store it directly in the HvAUX or mg_obj slot, avoiding the need to 6181 * allocate an AV. (Whether the slot holds an AV tells us whether this is 6182 * active.) 6183 */ 6184 6185 /* A discussion about the backreferences array and its refcount: 6186 * 6187 * The AV holding the backreferences is pointed to either as the mg_obj of 6188 * PERL_MAGIC_backref, or in the specific case of a HV, from the 6189 * xhv_backreferences field. The array is created with a refcount 6190 * of 2. This means that if during global destruction the array gets 6191 * picked on before its parent to have its refcount decremented by the 6192 * random zapper, it won't actually be freed, meaning it's still there for 6193 * when its parent gets freed. 6194 * 6195 * When the parent SV is freed, the extra ref is killed by 6196 * Perl_sv_kill_backrefs. The other ref is killed, in the case of magic, 6197 * by mg_free() / MGf_REFCOUNTED, or for a hash, by Perl_hv_kill_backrefs. 6198 * 6199 * When a single backref SV is stored directly, it is not reference 6200 * counted. 6201 */ 6202 6203 void 6204 Perl_sv_add_backref(pTHX_ SV *const tsv, SV *const sv) 6205 { 6206 SV **svp; 6207 AV *av = NULL; 6208 MAGIC *mg = NULL; 6209 6210 PERL_ARGS_ASSERT_SV_ADD_BACKREF; 6211 6212 /* find slot to store array or singleton backref */ 6213 6214 if (SvTYPE(tsv) == SVt_PVHV) { 6215 svp = (SV**)Perl_hv_backreferences_p(aTHX_ MUTABLE_HV(tsv)); 6216 } else { 6217 if (SvMAGICAL(tsv)) 6218 mg = mg_find(tsv, PERL_MAGIC_backref); 6219 if (!mg) 6220 mg = sv_magicext(tsv, NULL, PERL_MAGIC_backref, &PL_vtbl_backref, NULL, 0); 6221 svp = &(mg->mg_obj); 6222 } 6223 6224 /* create or retrieve the array */ 6225 6226 if ( (!*svp && SvTYPE(sv) == SVt_PVAV) 6227 || (*svp && SvTYPE(*svp) != SVt_PVAV) 6228 ) { 6229 /* create array */ 6230 if (mg) 6231 mg->mg_flags |= MGf_REFCOUNTED; 6232 av = newAV(); 6233 AvREAL_off(av); 6234 SvREFCNT_inc_simple_void_NN(av); 6235 /* av now has a refcnt of 2; see discussion above */ 6236 av_extend(av, *svp ? 2 : 1); 6237 if (*svp) { 6238 /* move single existing backref to the array */ 6239 AvARRAY(av)[++AvFILLp(av)] = *svp; /* av_push() */ 6240 } 6241 *svp = (SV*)av; 6242 } 6243 else { 6244 av = MUTABLE_AV(*svp); 6245 if (!av) { 6246 /* optimisation: store single backref directly in HvAUX or mg_obj */ 6247 *svp = sv; 6248 return; 6249 } 6250 assert(SvTYPE(av) == SVt_PVAV); 6251 if (AvFILLp(av) >= AvMAX(av)) { 6252 av_extend(av, AvFILLp(av)+1); 6253 } 6254 } 6255 /* push new backref */ 6256 AvARRAY(av)[++AvFILLp(av)] = sv; /* av_push() */ 6257 } 6258 6259 /* delete a back-reference to ourselves from the backref magic associated 6260 * with the SV we point to. 6261 */ 6262 6263 void 6264 Perl_sv_del_backref(pTHX_ SV *const tsv, SV *const sv) 6265 { 6266 SV **svp = NULL; 6267 6268 PERL_ARGS_ASSERT_SV_DEL_BACKREF; 6269 6270 if (SvTYPE(tsv) == SVt_PVHV) { 6271 if (HvHasAUX(tsv)) 6272 svp = (SV**)Perl_hv_backreferences_p(aTHX_ MUTABLE_HV(tsv)); 6273 } 6274 else if (SvIS_FREED(tsv) && PL_phase == PERL_PHASE_DESTRUCT) { 6275 /* It's possible for the last (strong) reference to tsv to have 6276 become freed *before* the last thing holding a weak reference. 6277 If both survive longer than the backreferences array, then when 6278 the referent's reference count drops to 0 and it is freed, it's 6279 not able to chase the backreferences, so they aren't NULLed. 6280 6281 For example, a CV holds a weak reference to its stash. If both the 6282 CV and the stash survive longer than the backreferences array, 6283 and the CV gets picked for the SvBREAK() treatment first, 6284 *and* it turns out that the stash is only being kept alive because 6285 of an our variable in the pad of the CV, then midway during CV 6286 destruction the stash gets freed, but CvSTASH() isn't set to NULL. 6287 It ends up pointing to the freed HV. Hence it's chased in here, and 6288 if this block wasn't here, it would hit the !svp panic just below. 6289 6290 I don't believe that "better" destruction ordering is going to help 6291 here - during global destruction there's always going to be the 6292 chance that something goes out of order. We've tried to make it 6293 foolproof before, and it only resulted in evolutionary pressure on 6294 fools. Which made us look foolish for our hubris. :-( 6295 */ 6296 return; 6297 } 6298 else { 6299 MAGIC *const mg 6300 = SvMAGICAL(tsv) ? mg_find(tsv, PERL_MAGIC_backref) : NULL; 6301 svp = mg ? &(mg->mg_obj) : NULL; 6302 } 6303 6304 if (!svp) 6305 Perl_croak(aTHX_ "panic: del_backref, svp=0"); 6306 if (!*svp) { 6307 /* It's possible that sv is being freed recursively part way through the 6308 freeing of tsv. If this happens, the backreferences array of tsv has 6309 already been freed, and so svp will be NULL. If this is the case, 6310 we should not panic. Instead, nothing needs doing, so return. */ 6311 if (PL_phase == PERL_PHASE_DESTRUCT && SvREFCNT(tsv) == 0) 6312 return; 6313 Perl_croak(aTHX_ "panic: del_backref, *svp=%p phase=%s refcnt=%" UVuf, 6314 (void*)*svp, PL_phase_names[PL_phase], (UV)SvREFCNT(tsv)); 6315 } 6316 6317 if (SvTYPE(*svp) == SVt_PVAV) { 6318 #ifdef DEBUGGING 6319 int count = 1; 6320 #endif 6321 AV * const av = (AV*)*svp; 6322 SSize_t fill; 6323 assert(!SvIS_FREED(av)); 6324 fill = AvFILLp(av); 6325 assert(fill > -1); 6326 svp = AvARRAY(av); 6327 /* for an SV with N weak references to it, if all those 6328 * weak refs are deleted, then sv_del_backref will be called 6329 * N times and O(N^2) compares will be done within the backref 6330 * array. To ameliorate this potential slowness, we: 6331 * 1) make sure this code is as tight as possible; 6332 * 2) when looking for SV, look for it at both the head and tail of the 6333 * array first before searching the rest, since some create/destroy 6334 * patterns will cause the backrefs to be freed in order. 6335 */ 6336 if (*svp == sv) { 6337 AvARRAY(av)++; 6338 AvMAX(av)--; 6339 } 6340 else { 6341 SV **p = &svp[fill]; 6342 SV *const topsv = *p; 6343 if (topsv != sv) { 6344 #ifdef DEBUGGING 6345 count = 0; 6346 #endif 6347 while (--p > svp) { 6348 if (*p == sv) { 6349 /* We weren't the last entry. 6350 An unordered list has this property that you 6351 can take the last element off the end to fill 6352 the hole, and it's still an unordered list :-) 6353 */ 6354 *p = topsv; 6355 #ifdef DEBUGGING 6356 count++; 6357 #else 6358 break; /* should only be one */ 6359 #endif 6360 } 6361 } 6362 } 6363 } 6364 assert(count ==1); 6365 AvFILLp(av) = fill-1; 6366 } 6367 else if (SvIS_FREED(*svp) && PL_phase == PERL_PHASE_DESTRUCT) { 6368 /* freed AV; skip */ 6369 } 6370 else { 6371 /* optimisation: only a single backref, stored directly */ 6372 if (*svp != sv) 6373 Perl_croak(aTHX_ "panic: del_backref, *svp=%p, sv=%p", 6374 (void*)*svp, (void*)sv); 6375 *svp = NULL; 6376 } 6377 6378 } 6379 6380 void 6381 Perl_sv_kill_backrefs(pTHX_ SV *const sv, AV *const av) 6382 { 6383 SV **svp; 6384 SV **last; 6385 bool is_array; 6386 6387 PERL_ARGS_ASSERT_SV_KILL_BACKREFS; 6388 6389 if (!av) 6390 return; 6391 6392 /* after multiple passes through Perl_sv_clean_all() for a thingy 6393 * that has badly leaked, the backref array may have gotten freed, 6394 * since we only protect it against 1 round of cleanup */ 6395 if (SvIS_FREED(av)) { 6396 if (PL_in_clean_all) /* All is fair */ 6397 return; 6398 Perl_croak(aTHX_ 6399 "panic: magic_killbackrefs (freed backref AV/SV)"); 6400 } 6401 6402 6403 is_array = (SvTYPE(av) == SVt_PVAV); 6404 if (is_array) { 6405 assert(!SvIS_FREED(av)); 6406 svp = AvARRAY(av); 6407 if (svp) 6408 last = svp + AvFILLp(av); 6409 } 6410 else { 6411 /* optimisation: only a single backref, stored directly */ 6412 svp = (SV**)&av; 6413 last = svp; 6414 } 6415 6416 if (svp) { 6417 while (svp <= last) { 6418 if (*svp) { 6419 SV *const referrer = *svp; 6420 if (SvWEAKREF(referrer)) { 6421 /* XXX Should we check that it hasn't changed? */ 6422 assert(SvROK(referrer)); 6423 SvRV_set(referrer, 0); 6424 SvOK_off(referrer); 6425 SvWEAKREF_off(referrer); 6426 SvSETMAGIC(referrer); 6427 } else if (SvTYPE(referrer) == SVt_PVGV || 6428 SvTYPE(referrer) == SVt_PVLV) { 6429 assert(SvTYPE(sv) == SVt_PVHV); /* stash backref */ 6430 /* You lookin' at me? */ 6431 assert(GvSTASH(referrer)); 6432 assert(GvSTASH(referrer) == (const HV *)sv); 6433 GvSTASH(referrer) = 0; 6434 } else if (SvTYPE(referrer) == SVt_PVCV || 6435 SvTYPE(referrer) == SVt_PVFM) { 6436 if (SvTYPE(sv) == SVt_PVHV) { /* stash backref */ 6437 /* You lookin' at me? */ 6438 assert(CvSTASH(referrer)); 6439 assert(CvSTASH(referrer) == (const HV *)sv); 6440 SvANY(MUTABLE_CV(referrer))->xcv_stash = 0; 6441 } 6442 else { 6443 assert(SvTYPE(sv) == SVt_PVGV); 6444 /* You lookin' at me? */ 6445 assert(CvGV(referrer)); 6446 assert(CvGV(referrer) == (const GV *)sv); 6447 anonymise_cv_maybe(MUTABLE_GV(sv), 6448 MUTABLE_CV(referrer)); 6449 } 6450 6451 } else { 6452 Perl_croak(aTHX_ 6453 "panic: magic_killbackrefs (flags=%" UVxf ")", 6454 (UV)SvFLAGS(referrer)); 6455 } 6456 6457 if (is_array) 6458 *svp = NULL; 6459 } 6460 svp++; 6461 } 6462 } 6463 if (is_array) { 6464 AvFILLp(av) = -1; 6465 SvREFCNT_dec_NN(av); /* remove extra count added by sv_add_backref() */ 6466 } 6467 return; 6468 } 6469 6470 /* 6471 =for apidoc sv_insert 6472 =for apidoc_item sv_insert_flags 6473 6474 These insert and/or replace a string at the specified offset/length within the 6475 SV. Similar to the Perl C<substr()> function, with C<littlelen> bytes starting 6476 at C<little> replacing C<len> bytes of the string in C<bigstr> starting at 6477 C<offset>. They handle get magic. 6478 6479 C<sv_insert_flags> is identical to plain C<sv_insert>, but the extra C<flags> 6480 are passed to the C<SvPV_force_flags> operation that is internally applied to 6481 C<bigstr>. 6482 6483 =cut 6484 */ 6485 6486 void 6487 Perl_sv_insert_flags(pTHX_ SV *const bigstr, const STRLEN offset, const STRLEN len, const char *little, const STRLEN littlelen, const U32 flags) 6488 { 6489 char *big; 6490 char *mid; 6491 char *midend; 6492 char *bigend; 6493 SSize_t i; /* better be sizeof(STRLEN) or bad things happen */ 6494 STRLEN curlen; 6495 6496 PERL_ARGS_ASSERT_SV_INSERT_FLAGS; 6497 6498 SvPV_force_flags(bigstr, curlen, flags); 6499 (void)SvPOK_only_UTF8(bigstr); 6500 6501 if (little >= SvPVX(bigstr) && 6502 little < SvPVX(bigstr) + (SvLEN(bigstr) ? SvLEN(bigstr) : SvCUR(bigstr))) { 6503 /* little is a pointer to within bigstr, since we can reallocate bigstr, 6504 or little...little+littlelen might overlap offset...offset+len we make a copy 6505 */ 6506 little = savepvn(little, littlelen); 6507 SAVEFREEPV(little); 6508 } 6509 6510 if (offset + len > curlen) { 6511 SvGROW(bigstr, offset+len+1); 6512 Zero(SvPVX(bigstr)+curlen, offset+len-curlen, char); 6513 SvCUR_set(bigstr, offset+len); 6514 } 6515 6516 SvTAINT(bigstr); 6517 i = littlelen - len; 6518 if (i > 0) { /* string might grow */ 6519 big = SvGROW(bigstr, SvCUR(bigstr) + i + 1); 6520 mid = big + offset + len; 6521 midend = bigend = big + SvCUR(bigstr); 6522 bigend += i; 6523 *bigend = '\0'; 6524 while (midend > mid) /* shove everything down */ 6525 *--bigend = *--midend; 6526 Move(little,big+offset,littlelen,char); 6527 SvCUR_set(bigstr, SvCUR(bigstr) + i); 6528 SvSETMAGIC(bigstr); 6529 return; 6530 } 6531 else if (i == 0) { 6532 Move(little,SvPVX(bigstr)+offset,len,char); 6533 SvSETMAGIC(bigstr); 6534 return; 6535 } 6536 6537 big = SvPVX(bigstr); 6538 mid = big + offset; 6539 midend = mid + len; 6540 bigend = big + SvCUR(bigstr); 6541 6542 if (midend > bigend) 6543 Perl_croak(aTHX_ "panic: sv_insert, midend=%p, bigend=%p", 6544 midend, bigend); 6545 6546 if (mid - big > bigend - midend) { /* faster to shorten from end */ 6547 if (littlelen) { 6548 Move(little, mid, littlelen,char); 6549 mid += littlelen; 6550 } 6551 i = bigend - midend; 6552 if (i > 0) { 6553 Move(midend, mid, i,char); 6554 mid += i; 6555 } 6556 *mid = '\0'; 6557 SvCUR_set(bigstr, mid - big); 6558 } 6559 else if ((i = mid - big)) { /* faster from front */ 6560 midend -= littlelen; 6561 mid = midend; 6562 Move(big, midend - i, i, char); 6563 sv_chop(bigstr,midend-i); 6564 if (littlelen) 6565 Move(little, mid, littlelen,char); 6566 } 6567 else if (littlelen) { 6568 midend -= littlelen; 6569 sv_chop(bigstr,midend); 6570 Move(little,midend,littlelen,char); 6571 } 6572 else { 6573 sv_chop(bigstr,midend); 6574 } 6575 SvSETMAGIC(bigstr); 6576 } 6577 6578 /* 6579 =for apidoc sv_replace 6580 6581 Make the first argument a copy of the second, then delete the original. 6582 The target SV physically takes over ownership of the body of the source SV 6583 and inherits its flags; however, the target keeps any magic it owns, 6584 and any magic in the source is discarded. 6585 Note that this is a rather specialist SV copying operation; most of the 6586 time you'll want to use C<sv_setsv> or one of its many macro front-ends. 6587 6588 =cut 6589 */ 6590 6591 void 6592 Perl_sv_replace(pTHX_ SV *const sv, SV *const nsv) 6593 { 6594 const U32 refcnt = SvREFCNT(sv); 6595 6596 PERL_ARGS_ASSERT_SV_REPLACE; 6597 6598 SV_CHECK_THINKFIRST_COW_DROP(sv); 6599 if (SvREFCNT(nsv) != 1) { 6600 Perl_croak(aTHX_ "panic: reference miscount on nsv in sv_replace()" 6601 " (%" UVuf " != 1)", (UV) SvREFCNT(nsv)); 6602 } 6603 if (SvMAGICAL(sv)) { 6604 if (SvMAGICAL(nsv)) 6605 mg_free(nsv); 6606 else 6607 sv_upgrade(nsv, SVt_PVMG); 6608 SvMAGIC_set(nsv, SvMAGIC(sv)); 6609 SvFLAGS(nsv) |= SvMAGICAL(sv); 6610 SvMAGICAL_off(sv); 6611 SvMAGIC_set(sv, NULL); 6612 } 6613 SvREFCNT(sv) = 0; 6614 sv_clear(sv); 6615 assert(!SvREFCNT(sv)); 6616 #ifdef DEBUG_LEAKING_SCALARS 6617 sv->sv_flags = nsv->sv_flags; 6618 sv->sv_any = nsv->sv_any; 6619 sv->sv_refcnt = nsv->sv_refcnt; 6620 sv->sv_u = nsv->sv_u; 6621 #else 6622 StructCopy(nsv,sv,SV); 6623 #endif 6624 if(SvTYPE(sv) == SVt_IV) { 6625 SET_SVANY_FOR_BODYLESS_IV(sv); 6626 } 6627 6628 6629 SvREFCNT(sv) = refcnt; 6630 SvFLAGS(nsv) |= SVTYPEMASK; /* Mark as freed */ 6631 SvREFCNT(nsv) = 0; 6632 del_SV(nsv); 6633 } 6634 6635 /* We're about to free a GV which has a CV that refers back to us. 6636 * If that CV will outlive us, make it anonymous (i.e. fix up its CvGV 6637 * field) */ 6638 6639 STATIC void 6640 S_anonymise_cv_maybe(pTHX_ GV *gv, CV* cv) 6641 { 6642 SV *gvname; 6643 GV *anongv; 6644 6645 PERL_ARGS_ASSERT_ANONYMISE_CV_MAYBE; 6646 6647 /* be assertive! */ 6648 assert(SvREFCNT(gv) == 0); 6649 assert(isGV(gv) && isGV_with_GP(gv)); 6650 assert(GvGP(gv)); 6651 assert(!CvANON(cv)); 6652 assert(CvGV(cv) == gv); 6653 assert(!CvNAMED(cv)); 6654 6655 /* will the CV shortly be freed by gp_free() ? */ 6656 if (GvCV(gv) == cv && GvGP(gv)->gp_refcnt < 2 && SvREFCNT(cv) < 2) { 6657 SvANY(cv)->xcv_gv_u.xcv_gv = NULL; 6658 return; 6659 } 6660 6661 /* if not, anonymise: */ 6662 gvname = (GvSTASH(gv) && HvHasNAME(GvSTASH(gv)) && HvHasENAME(GvSTASH(gv))) 6663 ? newSVhek(HvENAME_HEK(GvSTASH(gv))) 6664 : newSVpvn_flags( "__ANON__", 8, 0 ); 6665 sv_catpvs(gvname, "::__ANON__"); 6666 anongv = gv_fetchsv(gvname, GV_ADDMULTI, SVt_PVCV); 6667 SvREFCNT_dec_NN(gvname); 6668 6669 CvANON_on(cv); 6670 CvCVGV_RC_on(cv); 6671 SvANY(cv)->xcv_gv_u.xcv_gv = MUTABLE_GV(SvREFCNT_inc(anongv)); 6672 } 6673 6674 6675 /* 6676 =for apidoc sv_clear 6677 6678 Clear an SV: call any destructors, free up any memory used by the body, 6679 and free the body itself. The SV's head is I<not> freed, although 6680 its type is set to all 1's so that it won't inadvertently be assumed 6681 to be live during global destruction etc. 6682 This function should only be called when C<REFCNT> is zero. Most of the time 6683 you'll want to call C<SvREFCNT_dec> instead. 6684 6685 =cut 6686 */ 6687 6688 void 6689 Perl_sv_clear(pTHX_ SV *const orig_sv) 6690 { 6691 SV* iter_sv = NULL; 6692 SV* next_sv = NULL; 6693 SV *sv = orig_sv; 6694 STRLEN hash_index = 0; /* initialise to make Coverity et al happy. 6695 Not strictly necessary */ 6696 6697 PERL_ARGS_ASSERT_SV_CLEAR; 6698 6699 /* within this loop, sv is the SV currently being freed, and 6700 * iter_sv is the most recent AV or whatever that's being iterated 6701 * over to provide more SVs */ 6702 6703 while (sv) { 6704 U32 type = SvTYPE(sv); 6705 HV *stash; 6706 6707 assert(SvREFCNT(sv) == 0); 6708 assert(!SvIS_FREED(sv)); 6709 #if NVSIZE <= IVSIZE 6710 if (type <= SVt_NV) { 6711 #else 6712 if (type <= SVt_IV) { 6713 #endif 6714 /* Historically this check on type was needed so that the code to 6715 * free bodies wasn't reached for these types, because the arena 6716 * slots were re-used for HEs and pointer table entries. The 6717 * metadata table `bodies_by_type` had the information for the sizes 6718 * for HEs and PTEs, hence the code here had to have a special-case 6719 * check to ensure that the "regular" body freeing code wasn't 6720 * reached, and get confused by the "lies" in `bodies_by_type`. 6721 * 6722 * However, it hasn't actually been needed for that reason since 6723 * Aug 2010 (commit 829cd18aa7f45221), because `bodies_by_type` was 6724 * changed to always hold the accurate metadata for the SV types. 6725 * This was possible because PTEs were no longer allocated from the 6726 * "SVt_IV" arena, and the code to allocate HEs from the "SVt_NULL" 6727 * arena is entirely in hv.c, so doesn't access the table. 6728 * 6729 * Some sort of check is still needed to handle SVt_IVs - pure RVs 6730 * need to take one code path which is common with RVs stored in 6731 * SVt_PV (or larger), but pure IVs mustn't take the "PV but not RV" 6732 * path, as SvPVX() doesn't point to valid memory. 6733 * 6734 * Hence this code is still the most efficient way to handle this. 6735 * 6736 * Additionally, for bodyless NVs, riding this branch is more 6737 * efficient than stepping through the general logic. 6738 */ 6739 6740 if (SvROK(sv)) 6741 goto free_rv; 6742 SvFLAGS(sv) &= SVf_BREAK; 6743 SvFLAGS(sv) |= SVTYPEMASK; 6744 goto free_head; 6745 } 6746 6747 /* objs are always >= MG, but pad names use the SVs_OBJECT flag 6748 for another purpose */ 6749 assert(!SvOBJECT(sv) || type >= SVt_PVMG); 6750 6751 if (type >= SVt_PVMG) { 6752 if (SvOBJECT(sv)) { 6753 if (!curse(sv, 1)) goto get_next_sv; 6754 type = SvTYPE(sv); /* destructor may have changed it */ 6755 } 6756 /* Free back-references before magic, in case the magic calls 6757 * Perl code that has weak references to sv. */ 6758 if (type == SVt_PVHV) { 6759 Perl_hv_kill_backrefs(aTHX_ MUTABLE_HV(sv)); 6760 if (SvMAGIC(sv)) 6761 mg_free(sv); 6762 } 6763 else if (SvMAGIC(sv)) { 6764 /* Free back-references before other types of magic. */ 6765 sv_unmagic(sv, PERL_MAGIC_backref); 6766 mg_free(sv); 6767 } 6768 SvMAGICAL_off(sv); 6769 } 6770 switch (type) { 6771 /* case SVt_INVLIST: */ 6772 case SVt_PVIO: 6773 if (IoIFP(sv) && 6774 IoIFP(sv) != PerlIO_stdin() && 6775 IoIFP(sv) != PerlIO_stdout() && 6776 IoIFP(sv) != PerlIO_stderr() && 6777 !(IoFLAGS(sv) & IOf_FAKE_DIRP)) 6778 { 6779 io_close(MUTABLE_IO(sv), NULL, FALSE, 6780 (IoTYPE(sv) == IoTYPE_WRONLY || 6781 IoTYPE(sv) == IoTYPE_RDWR || 6782 IoTYPE(sv) == IoTYPE_APPEND)); 6783 } 6784 if (IoDIRP(sv) && !(IoFLAGS(sv) & IOf_FAKE_DIRP)) 6785 PerlDir_close(IoDIRP(sv)); 6786 IoDIRP(sv) = (DIR*)NULL; 6787 Safefree(IoTOP_NAME(sv)); 6788 Safefree(IoFMT_NAME(sv)); 6789 Safefree(IoBOTTOM_NAME(sv)); 6790 if ((const GV *)sv == PL_statgv) 6791 PL_statgv = NULL; 6792 goto freescalar; 6793 case SVt_REGEXP: 6794 /* FIXME for plugins */ 6795 pregfree2((REGEXP*) sv); 6796 goto freescalar; 6797 case SVt_PVCV: 6798 case SVt_PVFM: 6799 cv_undef(MUTABLE_CV(sv)); 6800 /* If we're in a stash, we don't own a reference to it. 6801 * However it does have a back reference to us, which needs to 6802 * be cleared. */ 6803 if ((stash = CvSTASH(sv))) 6804 sv_del_backref(MUTABLE_SV(stash), sv); 6805 goto freescalar; 6806 case SVt_PVHV: 6807 if (HvTOTALKEYS((HV*)sv) > 0) { 6808 const HEK *hek; 6809 /* this statement should match the one at the beginning of 6810 * hv_undef_flags() */ 6811 if ( PL_phase != PERL_PHASE_DESTRUCT 6812 && (hek = HvNAME_HEK((HV*)sv))) 6813 { 6814 if (PL_stashcache) { 6815 DEBUG_o(Perl_deb(aTHX_ 6816 "sv_clear clearing PL_stashcache for '%" HEKf 6817 "'\n", 6818 HEKfARG(hek))); 6819 (void)hv_deletehek(PL_stashcache, 6820 hek, G_DISCARD); 6821 } 6822 hv_name_set((HV*)sv, NULL, 0, 0); 6823 } 6824 6825 /* save old iter_sv in unused SvSTASH field */ 6826 assert(!SvOBJECT(sv)); 6827 SvSTASH(sv) = (HV*)iter_sv; 6828 iter_sv = sv; 6829 6830 /* save old hash_index in unused SvMAGIC field */ 6831 assert(!SvMAGICAL(sv)); 6832 assert(!SvMAGIC(sv)); 6833 ((XPVMG*) SvANY(sv))->xmg_u.xmg_hash_index = hash_index; 6834 hash_index = 0; 6835 6836 next_sv = Perl_hfree_next_entry(aTHX_ (HV*)sv, &hash_index); 6837 goto get_next_sv; /* process this new sv */ 6838 } 6839 /* free empty hash */ 6840 Perl_hv_undef_flags(aTHX_ MUTABLE_HV(sv), HV_NAME_SETALL); 6841 assert(!HvARRAY((HV*)sv)); 6842 break; 6843 case SVt_PVAV: 6844 { 6845 AV* av = MUTABLE_AV(sv); 6846 if (PL_comppad == av) { 6847 PL_comppad = NULL; 6848 PL_curpad = NULL; 6849 } 6850 if (AvREAL(av) && AvFILLp(av) > -1) { 6851 next_sv = AvARRAY(av)[AvFILLp(av)--]; 6852 /* save old iter_sv in top-most slot of AV, 6853 * and pray that it doesn't get wiped in the meantime */ 6854 AvARRAY(av)[AvMAX(av)] = iter_sv; 6855 iter_sv = sv; 6856 goto get_next_sv; /* process this new sv */ 6857 } 6858 Safefree(AvALLOC(av)); 6859 } 6860 6861 break; 6862 case SVt_PVOBJ: 6863 if(ObjectMAXFIELD(sv) > -1) { 6864 next_sv = ObjectFIELDS(sv)[ObjectMAXFIELD(sv)--]; 6865 /* save old iter_sv in top-most field, and pray that it 6866 * doesn't get wiped in the meantime */ 6867 ObjectFIELDS(sv)[(ObjectITERSVAT(sv) = ObjectMAXFIELD(sv) + 1)] = iter_sv; 6868 iter_sv = sv; 6869 goto get_next_sv; 6870 } 6871 Safefree(ObjectFIELDS(sv)); 6872 break; 6873 case SVt_PVLV: 6874 if (LvTYPE(sv) == 'T') { /* for tie: return HE to pool */ 6875 SvREFCNT_dec(HeKEY_sv((HE*)LvTARG(sv))); 6876 HeNEXT((HE*)LvTARG(sv)) = PL_hv_fetch_ent_mh; 6877 PL_hv_fetch_ent_mh = (HE*)LvTARG(sv); 6878 } 6879 else if (LvTYPE(sv) != 't') /* unless tie: unrefcnted fake SV** */ 6880 SvREFCNT_dec(LvTARG(sv)); 6881 if (isREGEXP(sv)) { 6882 /* This PVLV has had a REGEXP assigned to it - the memory 6883 * normally used to store SvLEN instead points to a regex body. 6884 * Retrieving the pointer to the regex body from the correct 6885 * location is normally abstracted by ReANY(), which handles 6886 * both SVt_PVLV and SVt_REGEXP 6887 * 6888 * This code is unwinding the storage specific to SVt_PVLV. 6889 * We get the body pointer directly from the union, free it, 6890 * then set SvLEN to whatever value was in the now-freed regex 6891 * body. The PVX buffer is shared by multiple re's and only 6892 * freed once, by the re whose SvLEN is non-null. 6893 * 6894 * Perl_sv_force_normal_flags() also has code to free this 6895 * hidden body - it swaps the body into a temporary SV it has 6896 * just allocated, then frees that SV. That causes execution 6897 * to reach the SVt_REGEXP: case about 60 lines earlier in this 6898 * function. 6899 * 6900 * See Perl_reg_temp_copy() for the code that sets up this 6901 * REGEXP body referenced by the PVLV. */ 6902 struct regexp *r = ((XPV*)SvANY(sv))->xpv_len_u.xpvlenu_rx; 6903 STRLEN len = r->xpv_len; 6904 pregfree2((REGEXP*) sv); 6905 del_body_by_type(r, SVt_REGEXP); 6906 SvLEN_set((sv), len); 6907 goto freescalar; 6908 } 6909 /* FALLTHROUGH */ 6910 case SVt_PVGV: 6911 if (isGV_with_GP(sv)) { 6912 if(GvCVu((const GV *)sv) && (stash = GvSTASH(MUTABLE_GV(sv))) 6913 && HvHasENAME(stash)) 6914 mro_method_changed_in(stash); 6915 gp_free(MUTABLE_GV(sv)); 6916 if (GvNAME_HEK(sv)) 6917 unshare_hek(GvNAME_HEK(sv)); 6918 /* If we're in a stash, we don't own a reference to it. 6919 * However it does have a back reference to us, which 6920 * needs to be cleared. */ 6921 if ((stash = GvSTASH(sv))) 6922 sv_del_backref(MUTABLE_SV(stash), sv); 6923 } 6924 /* FIXME. There are probably more unreferenced pointers to SVs 6925 * in the interpreter struct that we should check and tidy in 6926 * a similar fashion to this: */ 6927 /* See also S_sv_unglob, which does the same thing. */ 6928 if ((const GV *)sv == PL_last_in_gv) 6929 PL_last_in_gv = NULL; 6930 else if ((const GV *)sv == PL_statgv) 6931 PL_statgv = NULL; 6932 else if ((const GV *)sv == PL_stderrgv) 6933 PL_stderrgv = NULL; 6934 /* FALLTHROUGH */ 6935 case SVt_PVMG: 6936 case SVt_PVNV: 6937 case SVt_PVIV: 6938 case SVt_INVLIST: 6939 case SVt_PV: 6940 freescalar: 6941 /* Don't bother with SvOOK_off(sv); as we're only going to 6942 * free it. */ 6943 if (SvOOK(sv)) { 6944 STRLEN offset; 6945 SvOOK_offset(sv, offset); 6946 SvPV_set(sv, SvPVX_mutable(sv) - offset); 6947 /* Don't even bother with turning off the OOK flag. */ 6948 } 6949 if (SvROK(sv)) { 6950 free_rv: 6951 { 6952 SV * const target = SvRV(sv); 6953 if (SvWEAKREF(sv)) 6954 sv_del_backref(target, sv); 6955 else 6956 next_sv = target; 6957 } 6958 } 6959 #ifdef PERL_ANY_COW 6960 else if (SvPVX_const(sv) 6961 && !(SvTYPE(sv) == SVt_PVIO 6962 && !(IoFLAGS(sv) & IOf_FAKE_DIRP))) 6963 { 6964 if (SvIsCOW(sv)) { 6965 #ifdef DEBUGGING 6966 if (DEBUG_C_TEST) { 6967 PerlIO_printf(Perl_debug_log, "Copy on write: clear\n"); 6968 sv_dump(sv); 6969 } 6970 #endif 6971 if (SvIsCOW_static(sv)) { 6972 SvLEN_set(sv, 0); 6973 } 6974 else if (SvIsCOW_shared_hash(sv)) { 6975 unshare_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(sv))); 6976 } 6977 else { 6978 if (CowREFCNT(sv)) { 6979 sv_buf_to_rw(sv); 6980 CowREFCNT(sv)--; 6981 sv_buf_to_ro(sv); 6982 SvLEN_set(sv, 0); 6983 } 6984 } 6985 } 6986 if (SvLEN(sv)) { 6987 Safefree(SvPVX_mutable(sv)); 6988 } 6989 } 6990 #else 6991 else if (SvPVX_const(sv) && SvLEN(sv) 6992 && !(SvTYPE(sv) == SVt_PVIO 6993 && !(IoFLAGS(sv) & IOf_FAKE_DIRP))) 6994 Safefree(SvPVX_mutable(sv)); 6995 else if (SvPVX_const(sv) && SvIsCOW(sv)) { 6996 unshare_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(sv))); 6997 } 6998 #endif 6999 break; 7000 case SVt_NV: 7001 break; 7002 } 7003 7004 free_body: 7005 7006 { 7007 U32 arena_index; 7008 const struct body_details *sv_type_details; 7009 7010 if (type == SVt_PVHV && HvHasAUX(sv)) { 7011 arena_index = HVAUX_ARENA_ROOT_IX; 7012 sv_type_details = &fake_hv_with_aux; 7013 } 7014 else { 7015 arena_index = type; 7016 sv_type_details = bodies_by_type + arena_index; 7017 } 7018 7019 SvFLAGS(sv) &= SVf_BREAK; 7020 SvFLAGS(sv) |= SVTYPEMASK; 7021 7022 if (sv_type_details->arena) { 7023 del_body(((char *)SvANY(sv) + sv_type_details->offset), 7024 &PL_body_roots[arena_index]); 7025 } 7026 else if (sv_type_details->body_size) { 7027 safefree(SvANY(sv)); 7028 } 7029 } 7030 7031 free_head: 7032 /* caller is responsible for freeing the head of the original sv */ 7033 if (sv != orig_sv && !SvREFCNT(sv)) 7034 del_SV(sv); 7035 7036 /* grab and free next sv, if any */ 7037 get_next_sv: 7038 while (1) { 7039 sv = NULL; 7040 if (next_sv) { 7041 sv = next_sv; 7042 next_sv = NULL; 7043 } 7044 else if (!iter_sv) { 7045 break; 7046 } else if (SvTYPE(iter_sv) == SVt_PVAV) { 7047 AV *const av = (AV*)iter_sv; 7048 if (AvFILLp(av) > -1) { 7049 sv = AvARRAY(av)[AvFILLp(av)--]; 7050 } 7051 else { /* no more elements of current AV to free */ 7052 sv = iter_sv; 7053 type = SvTYPE(sv); 7054 /* restore previous value, squirrelled away */ 7055 iter_sv = AvARRAY(av)[AvMAX(av)]; 7056 Safefree(AvALLOC(av)); 7057 goto free_body; 7058 } 7059 } else if (SvTYPE(iter_sv) == SVt_PVOBJ) { 7060 if (ObjectMAXFIELD(iter_sv) > -1) { 7061 sv = ObjectFIELDS(iter_sv)[ObjectMAXFIELD(iter_sv)--]; 7062 } 7063 else { /* no more fields in the current SV to free */ 7064 sv = iter_sv; 7065 type = SvTYPE(sv); 7066 iter_sv = ObjectFIELDS(sv)[ObjectITERSVAT(sv)]; 7067 Safefree(ObjectFIELDS(sv)); 7068 goto free_body; 7069 } 7070 } else if (SvTYPE(iter_sv) == SVt_PVHV) { 7071 sv = Perl_hfree_next_entry(aTHX_ (HV*)iter_sv, &hash_index); 7072 if (!sv && !HvTOTALKEYS((HV *)iter_sv)) { 7073 /* no more elements of current HV to free */ 7074 sv = iter_sv; 7075 type = SvTYPE(sv); 7076 /* Restore previous values of iter_sv and hash_index, 7077 * squirrelled away */ 7078 assert(!SvOBJECT(sv)); 7079 iter_sv = (SV*)SvSTASH(sv); 7080 assert(!SvMAGICAL(sv)); 7081 hash_index = ((XPVMG*) SvANY(sv))->xmg_u.xmg_hash_index; 7082 #ifdef DEBUGGING 7083 /* perl -DA does not like rubbish in SvMAGIC. */ 7084 SvMAGIC_set(sv, 0); 7085 #endif 7086 7087 /* free any remaining detritus from the hash struct */ 7088 Perl_hv_undef_flags(aTHX_ MUTABLE_HV(sv), HV_NAME_SETALL); 7089 assert(!HvARRAY((HV*)sv)); 7090 goto free_body; 7091 } 7092 } 7093 7094 /* unrolled SvREFCNT_dec and sv_free2 follows: */ 7095 7096 if (!sv) 7097 continue; 7098 if (!SvREFCNT(sv)) { 7099 sv_free(sv); 7100 continue; 7101 } 7102 if (--(SvREFCNT(sv))) 7103 continue; 7104 if (SvIMMORTAL(sv)) { 7105 /* make sure SvREFCNT(sv)==0 happens very seldom */ 7106 SvREFCNT(sv) = SvREFCNT_IMMORTAL; 7107 SvTEMP_off(sv); 7108 continue; 7109 } 7110 #ifdef DEBUGGING 7111 if (SvTEMP(sv)) { 7112 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEBUGGING), 7113 "Attempt to free temp prematurely: SV 0x%" UVxf 7114 pTHX__FORMAT, PTR2UV(sv) pTHX__VALUE); 7115 continue; 7116 } 7117 #endif 7118 break; 7119 } /* while 1 */ 7120 7121 } /* while sv */ 7122 } 7123 7124 /* This routine curses the sv itself, not the object referenced by sv. So 7125 sv does not have to be ROK. */ 7126 7127 static bool 7128 S_curse(pTHX_ SV * const sv, const bool check_refcnt) { 7129 PERL_ARGS_ASSERT_CURSE; 7130 assert(SvOBJECT(sv)); 7131 7132 if (PL_defstash && /* Still have a symbol table? */ 7133 SvDESTROYABLE(sv)) 7134 { 7135 dSP; 7136 HV* stash; 7137 do { 7138 stash = SvSTASH(sv); 7139 assert(SvTYPE(stash) == SVt_PVHV); 7140 if (HvNAME(stash)) { 7141 CV* destructor = NULL; 7142 struct mro_meta *meta; 7143 7144 assert (HvHasAUX(stash)); 7145 7146 DEBUG_o( Perl_deb(aTHX_ "Looking for DESTROY method for %s\n", 7147 HvNAME(stash)) ); 7148 7149 /* don't make this an initialization above the assert, since it needs 7150 an AUX structure */ 7151 meta = HvMROMETA(stash); 7152 if (meta->destroy_gen && meta->destroy_gen == PL_sub_generation) { 7153 destructor = meta->destroy; 7154 DEBUG_o( Perl_deb(aTHX_ "Using cached DESTROY method %p for %s\n", 7155 (void *)destructor, HvNAME(stash)) ); 7156 } 7157 else { 7158 bool autoload = FALSE; 7159 GV *gv = 7160 gv_fetchmeth_pvn(stash, S_destroy, S_destroy_len, -1, 0); 7161 if (gv) 7162 destructor = GvCV(gv); 7163 if (!destructor) { 7164 gv = gv_autoload_pvn(stash, S_destroy, S_destroy_len, 7165 GV_AUTOLOAD_ISMETHOD); 7166 if (gv) 7167 destructor = GvCV(gv); 7168 if (destructor) 7169 autoload = TRUE; 7170 } 7171 /* we don't cache AUTOLOAD for DESTROY, since this code 7172 would then need to set $__PACKAGE__::AUTOLOAD, or the 7173 equivalent for XS AUTOLOADs */ 7174 if (!autoload) { 7175 meta->destroy_gen = PL_sub_generation; 7176 meta->destroy = destructor; 7177 7178 DEBUG_o( Perl_deb(aTHX_ "Set cached DESTROY method %p for %s\n", 7179 (void *)destructor, HvNAME(stash)) ); 7180 } 7181 else { 7182 DEBUG_o( Perl_deb(aTHX_ "Not caching AUTOLOAD for DESTROY method for %s\n", 7183 HvNAME(stash)) ); 7184 } 7185 } 7186 assert(!destructor || SvTYPE(destructor) == SVt_PVCV); 7187 if (destructor 7188 /* A constant subroutine can have no side effects, so 7189 don't bother calling it. */ 7190 && !CvCONST(destructor) 7191 /* Don't bother calling an empty destructor or one that 7192 returns immediately. */ 7193 && (CvISXSUB(destructor) 7194 || (CvSTART(destructor) 7195 && (CvSTART(destructor)->op_next->op_type 7196 != OP_LEAVESUB) 7197 && (CvSTART(destructor)->op_next->op_type 7198 != OP_PUSHMARK 7199 || CvSTART(destructor)->op_next->op_next->op_type 7200 != OP_RETURN 7201 ) 7202 )) 7203 ) 7204 { 7205 SV* const tmpref = newRV(sv); 7206 SvREADONLY_on(tmpref); /* DESTROY() could be naughty */ 7207 ENTER; 7208 PUSHSTACKi(PERLSI_DESTROY); 7209 EXTEND(SP, 2); 7210 PUSHMARK(SP); 7211 PUSHs(tmpref); 7212 PUTBACK; 7213 call_sv(MUTABLE_SV(destructor), 7214 G_DISCARD|G_EVAL|G_KEEPERR|G_VOID); 7215 POPSTACK; 7216 SPAGAIN; 7217 LEAVE; 7218 if(SvREFCNT(tmpref) < 2) { 7219 /* tmpref is not kept alive! */ 7220 SvREFCNT(sv)--; 7221 SvRV_set(tmpref, NULL); 7222 SvROK_off(tmpref); 7223 } 7224 SvREFCNT_dec_NN(tmpref); 7225 } 7226 } 7227 } while (SvOBJECT(sv) && SvSTASH(sv) != stash); 7228 7229 7230 if (check_refcnt && SvREFCNT(sv)) { 7231 if (PL_in_clean_objs) 7232 Perl_croak(aTHX_ 7233 "DESTROY created new reference to dead object '%" HEKf "'", 7234 HEKfARG(HvNAME_HEK(stash))); 7235 /* DESTROY gave object new lease on life */ 7236 return FALSE; 7237 } 7238 } 7239 7240 if (SvOBJECT(sv)) { 7241 HV * const stash = SvSTASH(sv); 7242 /* Curse before freeing the stash, as freeing the stash could cause 7243 a recursive call into S_curse. */ 7244 SvOBJECT_off(sv); /* Curse the object. */ 7245 SvSTASH_set(sv,0); /* SvREFCNT_dec may try to read this */ 7246 SvREFCNT_dec(stash); /* possibly of changed persuasion */ 7247 } 7248 return TRUE; 7249 } 7250 7251 /* 7252 =for apidoc sv_newref 7253 7254 Increment an SV's reference count. Use the C<SvREFCNT_inc()> wrapper 7255 instead. 7256 7257 =cut 7258 */ 7259 7260 SV * 7261 Perl_sv_newref(pTHX_ SV *const sv) 7262 { 7263 PERL_UNUSED_CONTEXT; 7264 if (sv) 7265 (SvREFCNT(sv))++; 7266 return sv; 7267 } 7268 7269 /* 7270 =for apidoc sv_free 7271 7272 Decrement an SV's reference count, and if it drops to zero, call 7273 C<sv_clear> to invoke destructors and free up any memory used by 7274 the body; finally, deallocating the SV's head itself. 7275 Normally called via a wrapper macro C<SvREFCNT_dec>. 7276 7277 =cut 7278 */ 7279 7280 void 7281 Perl_sv_free(pTHX_ SV *const sv) 7282 { 7283 SvREFCNT_dec(sv); 7284 } 7285 7286 7287 /* Private helper function for SvREFCNT_dec(). 7288 * Called with rc set to original SvREFCNT(sv), where rc == 0 or 1 */ 7289 7290 void 7291 Perl_sv_free2(pTHX_ SV *const sv, const U32 rc) 7292 { 7293 7294 PERL_ARGS_ASSERT_SV_FREE2; 7295 7296 if (LIKELY( rc == 1 )) { 7297 /* normal case */ 7298 SvREFCNT(sv) = 0; 7299 7300 if (SvIMMORTAL(sv)) { 7301 /* make sure SvREFCNT(sv)==0 happens very seldom */ 7302 SvREFCNT(sv) = SvREFCNT_IMMORTAL; 7303 SvTEMP_off(sv); 7304 return; 7305 } 7306 #ifdef DEBUGGING 7307 if (SvTEMP(sv)) { 7308 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEBUGGING), 7309 "Attempt to free temp prematurely: SV 0x%" UVxf 7310 pTHX__FORMAT, PTR2UV(sv) pTHX__VALUE); 7311 return; 7312 } 7313 #endif 7314 sv_clear(sv); 7315 if (! SvREFCNT(sv)) /* may have have been resurrected */ 7316 del_SV(sv); 7317 return; 7318 } 7319 7320 /* handle exceptional cases */ 7321 7322 assert(rc == 0); 7323 7324 if (SvFLAGS(sv) & SVf_BREAK) 7325 /* this SV's refcnt has been artificially decremented to 7326 * trigger cleanup */ 7327 return; 7328 if (PL_in_clean_all) /* All is fair */ 7329 return; 7330 if (SvIMMORTAL(sv)) { 7331 /* make sure SvREFCNT(sv)==0 happens very seldom */ 7332 SvREFCNT(sv) = SvREFCNT_IMMORTAL; 7333 return; 7334 } 7335 if (ckWARN_d(WARN_INTERNAL)) { 7336 #ifdef DEBUG_LEAKING_SCALARS_FORK_DUMP 7337 Perl_dump_sv_child(aTHX_ sv); 7338 #else 7339 #ifdef DEBUG_LEAKING_SCALARS 7340 sv_dump(sv); 7341 #endif 7342 #ifdef DEBUG_LEAKING_SCALARS_ABORT 7343 if (PL_warnhook == PERL_WARNHOOK_FATAL 7344 || ckDEAD(packWARN(WARN_INTERNAL))) { 7345 /* Don't let Perl_warner cause us to escape our fate: */ 7346 abort(); 7347 } 7348 #endif 7349 /* This may not return: */ 7350 Perl_warner(aTHX_ packWARN(WARN_INTERNAL), 7351 "Attempt to free unreferenced scalar: SV 0x%" UVxf 7352 pTHX__FORMAT, PTR2UV(sv) pTHX__VALUE); 7353 #endif 7354 } 7355 #ifdef DEBUG_LEAKING_SCALARS_ABORT 7356 abort(); 7357 #endif 7358 7359 } 7360 7361 7362 /* 7363 =for apidoc sv_len 7364 7365 Returns the length of the string in the SV. Handles magic and type 7366 coercion and sets the UTF8 flag appropriately. See also C<L</SvCUR>>, which 7367 gives raw access to the C<xpv_cur> slot. 7368 7369 =cut 7370 */ 7371 7372 STRLEN 7373 Perl_sv_len(pTHX_ SV *const sv) 7374 { 7375 STRLEN len; 7376 7377 if (!sv) 7378 return 0; 7379 7380 (void)SvPV_const(sv, len); 7381 return len; 7382 } 7383 7384 /* 7385 =for apidoc sv_len_utf8 7386 =for apidoc_item sv_len_utf8_nomg 7387 7388 These return the number of characters in the string in an SV, counting wide 7389 UTF-8 bytes as a single character. Both handle type coercion. 7390 They differ only in that C<sv_len_utf8> performs 'get' magic; 7391 C<sv_len_utf8_nomg> skips any magic. 7392 7393 =cut 7394 */ 7395 7396 /* 7397 * The length is cached in PERL_MAGIC_utf8, in the mg_len field. Also the 7398 * mg_ptr is used, by sv_pos_u2b() and sv_pos_b2u() - see the comments below. 7399 * (Note that the mg_len is not the length of the mg_ptr field. 7400 * This allows the cache to store the character length of the string without 7401 * needing to malloc() extra storage to attach to the mg_ptr.) 7402 * 7403 */ 7404 7405 STRLEN 7406 Perl_sv_len_utf8(pTHX_ SV *const sv) 7407 { 7408 if (!sv) 7409 return 0; 7410 7411 SvGETMAGIC(sv); 7412 return sv_len_utf8_nomg(sv); 7413 } 7414 7415 STRLEN 7416 Perl_sv_len_utf8_nomg(pTHX_ SV * const sv) 7417 { 7418 STRLEN len; 7419 const U8 *s = (U8*)SvPV_nomg_const(sv, len); 7420 7421 PERL_ARGS_ASSERT_SV_LEN_UTF8_NOMG; 7422 7423 if (PL_utf8cache && SvUTF8(sv)) { 7424 STRLEN ulen; 7425 MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL; 7426 7427 if (mg && (mg->mg_len != -1 || mg->mg_ptr)) { 7428 if (mg->mg_len != -1) 7429 ulen = mg->mg_len; 7430 else { 7431 /* We can use the offset cache for a headstart. 7432 The longer value is stored in the first pair. */ 7433 STRLEN *cache = (STRLEN *) mg->mg_ptr; 7434 7435 ulen = cache[0] + Perl_utf8_length(aTHX_ s + cache[1], 7436 s + len); 7437 } 7438 7439 if (PL_utf8cache < 0) { 7440 const STRLEN real = Perl_utf8_length(aTHX_ s, s + len); 7441 assert_uft8_cache_coherent("sv_len_utf8", ulen, real, sv); 7442 } 7443 } 7444 else { 7445 ulen = Perl_utf8_length(aTHX_ s, s + len); 7446 utf8_mg_len_cache_update(sv, &mg, ulen); 7447 } 7448 return ulen; 7449 } 7450 return SvUTF8(sv) ? Perl_utf8_length(aTHX_ s, s + len) : len; 7451 } 7452 7453 /* Walk forwards to find the byte corresponding to the passed in UTF-8 7454 offset. */ 7455 static STRLEN 7456 S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const send, 7457 STRLEN *const uoffset_p, bool *const at_end, 7458 bool* canonical_position) 7459 { 7460 const U8 *s = start; 7461 STRLEN uoffset = *uoffset_p; 7462 7463 PERL_ARGS_ASSERT_SV_POS_U2B_FORWARDS; 7464 7465 while (s < send && uoffset) { 7466 --uoffset; 7467 s += UTF8SKIP(s); 7468 } 7469 if (s == send) { 7470 *at_end = TRUE; 7471 } 7472 else if (s > send) { 7473 *at_end = TRUE; 7474 /* This is the existing behaviour. Possibly it should be a croak, as 7475 it's actually a bounds error */ 7476 s = send; 7477 } 7478 /* If the unicode position is beyond the end, we return the end but 7479 shouldn't cache that position */ 7480 *canonical_position = (uoffset == 0); 7481 *uoffset_p -= uoffset; 7482 return s - start; 7483 } 7484 7485 /* Given the length of the string in both bytes and UTF-8 characters, decide 7486 whether to walk forwards or backwards to find the byte corresponding to 7487 the passed in UTF-8 offset. */ 7488 static STRLEN 7489 S_sv_pos_u2b_midway(const U8 *const start, const U8 *send, 7490 STRLEN uoffset, const STRLEN uend) 7491 { 7492 STRLEN backw = uend - uoffset; 7493 7494 PERL_ARGS_ASSERT_SV_POS_U2B_MIDWAY; 7495 7496 if (uoffset < 2 * backw) { 7497 /* The assumption is that the average size of a character is 2 bytes, 7498 * so going forwards is twice the speed of going backwards (that's 7499 * where the 2 * backw comes from). (The real figure of course depends 7500 * on the UTF-8 data.) */ 7501 const U8 *s = start; 7502 7503 s = utf8_hop_forward(s, uoffset, send); 7504 assert (s <= send); 7505 if (s > send) 7506 s = send; 7507 return s - start; 7508 } 7509 7510 send = utf8_hop_back(send, -backw, start); 7511 return send - start; 7512 } 7513 7514 /* For the string representation of the given scalar, find the byte 7515 corresponding to the passed in UTF-8 offset. uoffset0 and boffset0 7516 give another position in the string, *before* the sought offset, which 7517 (which is always true, as 0, 0 is a valid pair of positions), which should 7518 help reduce the amount of linear searching. 7519 If *mgp is non-NULL, it should point to the UTF-8 cache magic, which 7520 will be used to reduce the amount of linear searching. The cache will be 7521 created if necessary, and the found value offered to it for update. */ 7522 static STRLEN 7523 S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start, 7524 const U8 *const send, STRLEN uoffset, 7525 STRLEN uoffset0, STRLEN boffset0) 7526 { 7527 STRLEN boffset = 0; /* Actually always set, but let's keep gcc happy. */ 7528 bool found = FALSE; 7529 bool at_end = FALSE; 7530 bool canonical_position = FALSE; 7531 7532 PERL_ARGS_ASSERT_SV_POS_U2B_CACHED; 7533 7534 assert (uoffset >= uoffset0); 7535 7536 if (!uoffset) 7537 return 0; 7538 7539 if (!SvREADONLY(sv) && !SvGMAGICAL(sv) && SvPOK(sv) 7540 && PL_utf8cache 7541 && (*mgp || (SvTYPE(sv) >= SVt_PVMG && 7542 (*mgp = mg_find(sv, PERL_MAGIC_utf8))))) { 7543 if ((*mgp)->mg_ptr) { 7544 STRLEN *cache = (STRLEN *) (*mgp)->mg_ptr; 7545 if (cache[0] == uoffset) { 7546 /* An exact match. */ 7547 return cache[1]; 7548 } 7549 if (cache[2] == uoffset) { 7550 /* An exact match. */ 7551 return cache[3]; 7552 } 7553 7554 if (cache[0] < uoffset) { 7555 /* The cache already knows part of the way. */ 7556 if (cache[0] > uoffset0) { 7557 /* The cache knows more than the passed in pair */ 7558 uoffset0 = cache[0]; 7559 boffset0 = cache[1]; 7560 } 7561 if ((*mgp)->mg_len != -1) { 7562 /* And we know the end too. */ 7563 boffset = boffset0 7564 + sv_pos_u2b_midway(start + boffset0, send, 7565 uoffset - uoffset0, 7566 (*mgp)->mg_len - uoffset0); 7567 } else { 7568 uoffset -= uoffset0; 7569 boffset = boffset0 7570 + sv_pos_u2b_forwards(start + boffset0, 7571 send, &uoffset, &at_end, 7572 &canonical_position); 7573 uoffset += uoffset0; 7574 } 7575 } 7576 else if (cache[2] < uoffset) { 7577 /* We're between the two cache entries. */ 7578 if (cache[2] > uoffset0) { 7579 /* and the cache knows more than the passed in pair */ 7580 uoffset0 = cache[2]; 7581 boffset0 = cache[3]; 7582 } 7583 7584 boffset = boffset0 7585 + sv_pos_u2b_midway(start + boffset0, 7586 start + cache[1], 7587 uoffset - uoffset0, 7588 cache[0] - uoffset0); 7589 } else { 7590 boffset = boffset0 7591 + sv_pos_u2b_midway(start + boffset0, 7592 start + cache[3], 7593 uoffset - uoffset0, 7594 cache[2] - uoffset0); 7595 } 7596 found = TRUE; 7597 } 7598 else if ((*mgp)->mg_len != -1) { 7599 /* If we can take advantage of a passed in offset, do so. */ 7600 /* In fact, offset0 is either 0, or less than offset, so don't 7601 need to worry about the other possibility. */ 7602 boffset = boffset0 7603 + sv_pos_u2b_midway(start + boffset0, send, 7604 uoffset - uoffset0, 7605 (*mgp)->mg_len - uoffset0); 7606 found = TRUE; 7607 } 7608 } 7609 7610 if (!found || PL_utf8cache < 0) { 7611 STRLEN real_boffset; 7612 uoffset -= uoffset0; 7613 real_boffset = boffset0 + sv_pos_u2b_forwards(start + boffset0, 7614 send, &uoffset, &at_end, 7615 &canonical_position); 7616 uoffset += uoffset0; 7617 7618 if (found && PL_utf8cache < 0) 7619 assert_uft8_cache_coherent("sv_pos_u2b_cache", boffset, 7620 real_boffset, sv); 7621 boffset = real_boffset; 7622 } 7623 7624 if (PL_utf8cache && canonical_position && !SvGMAGICAL(sv) && SvPOK(sv)) { 7625 if (at_end) 7626 utf8_mg_len_cache_update(sv, mgp, uoffset); 7627 else 7628 utf8_mg_pos_cache_update(sv, mgp, boffset, uoffset, send - start); 7629 } 7630 return boffset; 7631 } 7632 7633 7634 /* 7635 =for apidoc sv_pos_u2b_flags 7636 7637 Converts the offset from a count of UTF-8 chars from 7638 the start of the string, to a count of the equivalent number of bytes; if 7639 C<lenp> is non-zero, it does the same to C<lenp>, but this time starting from 7640 C<offset>, rather than from the start 7641 of the string. Handles type coercion. 7642 C<flags> is passed to C<SvPV_flags>, and usually should be 7643 C<SV_GMAGIC|SV_CONST_RETURN> to handle magic. 7644 7645 =cut 7646 */ 7647 7648 /* 7649 * sv_pos_u2b_flags() uses, like sv_pos_b2u(), the mg_ptr of the potential 7650 * PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 and 7651 * byte offsets. See also the comments of S_utf8_mg_pos_cache_update(). 7652 * 7653 */ 7654 7655 STRLEN 7656 Perl_sv_pos_u2b_flags(pTHX_ SV *const sv, STRLEN uoffset, STRLEN *const lenp, 7657 U32 flags) 7658 { 7659 const U8 *start; 7660 STRLEN len; 7661 STRLEN boffset; 7662 7663 PERL_ARGS_ASSERT_SV_POS_U2B_FLAGS; 7664 7665 start = (U8*)SvPV_flags(sv, len, flags); 7666 if (len) { 7667 const U8 * const send = start + len; 7668 MAGIC *mg = NULL; 7669 boffset = sv_pos_u2b_cached(sv, &mg, start, send, uoffset, 0, 0); 7670 7671 if (lenp 7672 && *lenp /* don't bother doing work for 0, as its bytes equivalent 7673 is 0, and *lenp is already set to that. */) { 7674 /* Convert the relative offset to absolute. */ 7675 const STRLEN uoffset2 = uoffset + *lenp; 7676 const STRLEN boffset2 7677 = sv_pos_u2b_cached(sv, &mg, start, send, uoffset2, 7678 uoffset, boffset) - boffset; 7679 7680 *lenp = boffset2; 7681 } 7682 } else { 7683 if (lenp) 7684 *lenp = 0; 7685 boffset = 0; 7686 } 7687 7688 return boffset; 7689 } 7690 7691 /* 7692 =for apidoc sv_pos_u2b 7693 7694 Converts the value pointed to by C<offsetp> from a count of UTF-8 chars from 7695 the start of the string, to a count of the equivalent number of bytes; if 7696 C<lenp> is non-zero, it does the same to C<lenp>, but this time starting from 7697 the offset, rather than from the start of the string. Handles magic and 7698 type coercion. 7699 7700 Use C<sv_pos_u2b_flags> in preference, which correctly handles strings longer 7701 than 2Gb. 7702 7703 =cut 7704 */ 7705 7706 /* 7707 * sv_pos_u2b() uses, like sv_pos_b2u(), the mg_ptr of the potential 7708 * PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 and 7709 * byte offsets. See also the comments of S_utf8_mg_pos_cache_update(). 7710 * 7711 */ 7712 7713 /* This function is subject to size and sign problems */ 7714 7715 void 7716 Perl_sv_pos_u2b(pTHX_ SV *const sv, I32 *const offsetp, I32 *const lenp) 7717 { 7718 PERL_ARGS_ASSERT_SV_POS_U2B; 7719 7720 if (lenp) { 7721 STRLEN ulen = (STRLEN)*lenp; 7722 *offsetp = (I32)sv_pos_u2b_flags(sv, (STRLEN)*offsetp, &ulen, 7723 SV_GMAGIC|SV_CONST_RETURN); 7724 *lenp = (I32)ulen; 7725 } else { 7726 *offsetp = (I32)sv_pos_u2b_flags(sv, (STRLEN)*offsetp, NULL, 7727 SV_GMAGIC|SV_CONST_RETURN); 7728 } 7729 } 7730 7731 static void 7732 S_utf8_mg_len_cache_update(pTHX_ SV *const sv, MAGIC **const mgp, 7733 const STRLEN ulen) 7734 { 7735 PERL_ARGS_ASSERT_UTF8_MG_LEN_CACHE_UPDATE; 7736 if (SvREADONLY(sv) || SvGMAGICAL(sv) || !SvPOK(sv)) 7737 return; 7738 7739 if (!*mgp && (SvTYPE(sv) < SVt_PVMG || 7740 !(*mgp = mg_find(sv, PERL_MAGIC_utf8)))) { 7741 *mgp = sv_magicext(sv, 0, PERL_MAGIC_utf8, &PL_vtbl_utf8, 0, 0); 7742 } 7743 assert(*mgp); 7744 7745 (*mgp)->mg_len = ulen; 7746 } 7747 7748 /* Create and update the UTF8 magic offset cache, with the proffered utf8/ 7749 byte length pairing. The (byte) length of the total SV is passed in too, 7750 as blen, because for some (more esoteric) SVs, the call to SvPV_const() 7751 may not have updated SvCUR, so we can't rely on reading it directly. 7752 7753 The proffered utf8/byte length pairing isn't used if the cache already has 7754 two pairs, and swapping either for the proffered pair would increase the 7755 RMS of the intervals between known byte offsets. 7756 7757 The cache itself consists of 4 STRLEN values 7758 0: larger UTF-8 offset 7759 1: corresponding byte offset 7760 2: smaller UTF-8 offset 7761 3: corresponding byte offset 7762 7763 Unused cache pairs have the value 0, 0. 7764 Keeping the cache "backwards" means that the invariant of 7765 cache[0] >= cache[2] is maintained even with empty slots, which means that 7766 the code that uses it doesn't need to worry if only 1 entry has actually 7767 been set to non-zero. It also makes the "position beyond the end of the 7768 cache" logic much simpler, as the first slot is always the one to start 7769 from. 7770 */ 7771 static void 7772 S_utf8_mg_pos_cache_update(pTHX_ SV *const sv, MAGIC **const mgp, const STRLEN byte, 7773 const STRLEN utf8, const STRLEN blen) 7774 { 7775 STRLEN *cache; 7776 7777 PERL_ARGS_ASSERT_UTF8_MG_POS_CACHE_UPDATE; 7778 7779 if (SvREADONLY(sv)) 7780 return; 7781 7782 if (!*mgp && (SvTYPE(sv) < SVt_PVMG || 7783 !(*mgp = mg_find(sv, PERL_MAGIC_utf8)))) { 7784 *mgp = sv_magicext(sv, 0, PERL_MAGIC_utf8, (MGVTBL*)&PL_vtbl_utf8, 0, 7785 0); 7786 (*mgp)->mg_len = -1; 7787 } 7788 assert(*mgp); 7789 7790 if (!(cache = (STRLEN *)(*mgp)->mg_ptr)) { 7791 Newxz(cache, PERL_MAGIC_UTF8_CACHESIZE * 2, STRLEN); 7792 (*mgp)->mg_ptr = (char *) cache; 7793 } 7794 assert(cache); 7795 7796 if (PL_utf8cache < 0 && SvPOKp(sv)) { 7797 /* SvPOKp() because, if sv is a reference, then SvPVX() is actually 7798 a pointer. Note that we no longer cache utf8 offsets on refer- 7799 ences, but this check is still a good idea, for robustness. */ 7800 const U8 *start = (const U8 *) SvPVX_const(sv); 7801 const STRLEN realutf8 = utf8_length(start, start + byte); 7802 7803 assert_uft8_cache_coherent("utf8_mg_pos_cache_update", utf8, realutf8, 7804 sv); 7805 } 7806 7807 /* Cache is held with the later position first, to simplify the code 7808 that deals with unbounded ends. */ 7809 7810 ASSERT_UTF8_CACHE(cache); 7811 if (cache[1] == 0) { 7812 /* Cache is totally empty */ 7813 cache[0] = utf8; 7814 cache[1] = byte; 7815 } else if (cache[3] == 0) { 7816 if (byte > cache[1]) { 7817 /* New one is larger, so goes first. */ 7818 cache[2] = cache[0]; 7819 cache[3] = cache[1]; 7820 cache[0] = utf8; 7821 cache[1] = byte; 7822 } else { 7823 cache[2] = utf8; 7824 cache[3] = byte; 7825 } 7826 } else { 7827 /* float casts necessary? XXX */ 7828 #define THREEWAY_SQUARE(a,b,c,d) \ 7829 ((float)((d) - (c))) * ((float)((d) - (c))) \ 7830 + ((float)((c) - (b))) * ((float)((c) - (b))) \ 7831 + ((float)((b) - (a))) * ((float)((b) - (a))) 7832 7833 /* Cache has 2 slots in use, and we know three potential pairs. 7834 Keep the two that give the lowest RMS distance. Do the 7835 calculation in bytes simply because we always know the byte 7836 length. squareroot has the same ordering as the positive value, 7837 so don't bother with the actual square root. */ 7838 if (byte > cache[1]) { 7839 /* New position is after the existing pair of pairs. */ 7840 const float keep_earlier 7841 = THREEWAY_SQUARE(0, cache[3], byte, blen); 7842 const float keep_later 7843 = THREEWAY_SQUARE(0, cache[1], byte, blen); 7844 7845 if (keep_later < keep_earlier) { 7846 cache[2] = cache[0]; 7847 cache[3] = cache[1]; 7848 } 7849 cache[0] = utf8; 7850 cache[1] = byte; 7851 } 7852 else { 7853 const float keep_later = THREEWAY_SQUARE(0, byte, cache[1], blen); 7854 float b, c, keep_earlier; 7855 if (byte > cache[3]) { 7856 /* New position is between the existing pair of pairs. */ 7857 b = (float)cache[3]; 7858 c = (float)byte; 7859 } else { 7860 /* New position is before the existing pair of pairs. */ 7861 b = (float)byte; 7862 c = (float)cache[3]; 7863 } 7864 keep_earlier = THREEWAY_SQUARE(0, b, c, blen); 7865 if (byte > cache[3]) { 7866 if (keep_later < keep_earlier) { 7867 cache[2] = utf8; 7868 cache[3] = byte; 7869 } 7870 else { 7871 cache[0] = utf8; 7872 cache[1] = byte; 7873 } 7874 } 7875 else { 7876 if (! (keep_later < keep_earlier)) { 7877 cache[0] = cache[2]; 7878 cache[1] = cache[3]; 7879 } 7880 cache[2] = utf8; 7881 cache[3] = byte; 7882 } 7883 } 7884 } 7885 ASSERT_UTF8_CACHE(cache); 7886 } 7887 7888 /* We already know all of the way, now we may be able to walk back. The same 7889 assumption is made as in S_sv_pos_u2b_midway(), namely that walking 7890 backward is half the speed of walking forward. */ 7891 static STRLEN 7892 S_sv_pos_b2u_midway(pTHX_ const U8 *const s, const U8 *const target, 7893 const U8 *end, STRLEN endu) 7894 { 7895 const STRLEN forw = target - s; 7896 STRLEN backw = end - target; 7897 7898 PERL_ARGS_ASSERT_SV_POS_B2U_MIDWAY; 7899 7900 if (forw < 2 * backw) { 7901 return utf8_length(s, target); 7902 } 7903 7904 while (end > target) { 7905 end = utf8_hop_back(end, -1, target); 7906 endu--; 7907 } 7908 return endu; 7909 } 7910 7911 /* 7912 =for apidoc sv_pos_b2u_flags 7913 7914 Converts C<offset> from a count of bytes from the start of the string, to 7915 a count of the equivalent number of UTF-8 chars. Handles type coercion. 7916 C<flags> is passed to C<SvPV_flags>, and usually should be 7917 C<SV_GMAGIC|SV_CONST_RETURN> to handle magic. 7918 7919 =cut 7920 */ 7921 7922 /* 7923 * sv_pos_b2u_flags() uses, like sv_pos_u2b_flags(), the mg_ptr of the 7924 * potential PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 7925 * and byte offsets. 7926 * 7927 */ 7928 STRLEN 7929 Perl_sv_pos_b2u_flags(pTHX_ SV *const sv, STRLEN const offset, U32 flags) 7930 { 7931 const U8* s; 7932 STRLEN len = 0; /* Actually always set, but let's keep gcc happy. */ 7933 STRLEN blen; 7934 MAGIC* mg = NULL; 7935 const U8* send; 7936 bool found = FALSE; 7937 7938 PERL_ARGS_ASSERT_SV_POS_B2U_FLAGS; 7939 7940 s = (const U8*)SvPV_flags(sv, blen, flags); 7941 7942 if (blen < offset) 7943 Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset, blen=%" UVuf 7944 ", byte=%" UVuf, (UV)blen, (UV)offset); 7945 7946 send = s + offset; 7947 7948 if (!SvREADONLY(sv) 7949 && PL_utf8cache 7950 && SvTYPE(sv) >= SVt_PVMG 7951 && (mg = mg_find(sv, PERL_MAGIC_utf8))) 7952 { 7953 if (mg->mg_ptr) { 7954 STRLEN * const cache = (STRLEN *) mg->mg_ptr; 7955 if (cache[1] == offset) { 7956 /* An exact match. */ 7957 return cache[0]; 7958 } 7959 if (cache[3] == offset) { 7960 /* An exact match. */ 7961 return cache[2]; 7962 } 7963 7964 if (cache[1] < offset) { 7965 /* We already know part of the way. */ 7966 if (mg->mg_len != -1) { 7967 /* Actually, we know the end too. */ 7968 len = cache[0] 7969 + S_sv_pos_b2u_midway(aTHX_ s + cache[1], send, 7970 s + blen, mg->mg_len - cache[0]); 7971 } else { 7972 len = cache[0] + utf8_length(s + cache[1], send); 7973 } 7974 } 7975 else if (cache[3] < offset) { 7976 /* We're between the two cached pairs, so we do the calculation 7977 offset by the byte/utf-8 positions for the earlier pair, 7978 then add the utf-8 characters from the string start to 7979 there. */ 7980 len = S_sv_pos_b2u_midway(aTHX_ s + cache[3], send, 7981 s + cache[1], cache[0] - cache[2]) 7982 + cache[2]; 7983 7984 } 7985 else { /* cache[3] > offset */ 7986 len = S_sv_pos_b2u_midway(aTHX_ s, send, s + cache[3], 7987 cache[2]); 7988 7989 } 7990 ASSERT_UTF8_CACHE(cache); 7991 found = TRUE; 7992 } else if (mg->mg_len != -1) { 7993 len = S_sv_pos_b2u_midway(aTHX_ s, send, s + blen, mg->mg_len); 7994 found = TRUE; 7995 } 7996 } 7997 if (!found || PL_utf8cache < 0) { 7998 const STRLEN real_len = utf8_length(s, send); 7999 8000 if (found && PL_utf8cache < 0) 8001 assert_uft8_cache_coherent("sv_pos_b2u", len, real_len, sv); 8002 len = real_len; 8003 } 8004 8005 if (PL_utf8cache) { 8006 if (blen == offset) 8007 utf8_mg_len_cache_update(sv, &mg, len); 8008 else 8009 utf8_mg_pos_cache_update(sv, &mg, offset, len, blen); 8010 } 8011 8012 return len; 8013 } 8014 8015 /* 8016 =for apidoc sv_pos_b2u 8017 8018 Converts the value pointed to by C<offsetp> from a count of bytes from the 8019 start of the string, to a count of the equivalent number of UTF-8 chars. 8020 Handles magic and type coercion. 8021 8022 Use C<sv_pos_b2u_flags> in preference, which correctly handles strings 8023 longer than 2Gb. 8024 8025 =cut 8026 */ 8027 8028 /* 8029 * sv_pos_b2u() uses, like sv_pos_u2b(), the mg_ptr of the potential 8030 * PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 and 8031 * byte offsets. 8032 * 8033 */ 8034 void 8035 Perl_sv_pos_b2u(pTHX_ SV *const sv, I32 *const offsetp) 8036 { 8037 PERL_ARGS_ASSERT_SV_POS_B2U; 8038 8039 if (!sv) 8040 return; 8041 8042 *offsetp = (I32)sv_pos_b2u_flags(sv, (STRLEN)*offsetp, 8043 SV_GMAGIC|SV_CONST_RETURN); 8044 } 8045 8046 static void 8047 S_assert_uft8_cache_coherent(pTHX_ const char *const func, STRLEN from_cache, 8048 STRLEN real, SV *const sv) 8049 { 8050 PERL_ARGS_ASSERT_ASSERT_UFT8_CACHE_COHERENT; 8051 8052 /* As this is debugging only code, save space by keeping this test here, 8053 rather than inlining it in all the callers. */ 8054 if (from_cache == real) 8055 return; 8056 8057 /* Need to turn the assertions off otherwise we may recurse infinitely 8058 while printing error messages. */ 8059 SAVEI8(PL_utf8cache); 8060 PL_utf8cache = 0; 8061 Perl_croak(aTHX_ "panic: %s cache %" UVuf " real %" UVuf " for %" SVf, 8062 func, (UV) from_cache, (UV) real, SVfARG(sv)); 8063 } 8064 8065 /* 8066 =for apidoc sv_eq 8067 =for apidoc_item sv_eq_flags 8068 8069 These each return a boolean indicating whether or not the strings in the two 8070 SVs are equal. If S<C<'use bytes'>> is in effect, the comparison is 8071 byte-by-byte; otherwise character-by-character. Each will coerce its args to 8072 strings if necessary. 8073 8074 They differ only in that C<sv_eq> always processes get magic, while 8075 C<sv_eq_flags> processes get magic only when the C<flags> parameter has the 8076 C<SV_GMAGIC> bit set. 8077 8078 These functions do not handle operator overloading. For versions that do, 8079 see instead C<L</sv_streq>> or C<L</sv_streq_flags>>. 8080 8081 =cut 8082 */ 8083 8084 I32 8085 Perl_sv_eq_flags(pTHX_ SV *sv1, SV *sv2, const U32 flags) 8086 { 8087 const char *pv1; 8088 STRLEN cur1; 8089 const char *pv2; 8090 STRLEN cur2; 8091 8092 if (!sv1) { 8093 pv1 = ""; 8094 cur1 = 0; 8095 } 8096 else { 8097 /* if pv1 and pv2 are the same, second SvPV_const call may 8098 * invalidate pv1 (if we are handling magic), so we may need to 8099 * make a copy */ 8100 if (sv1 == sv2 && flags & SV_GMAGIC 8101 && (SvTHINKFIRST(sv1) || SvGMAGICAL(sv1))) { 8102 pv1 = SvPV_const(sv1, cur1); 8103 sv1 = newSVpvn_flags(pv1, cur1, SVs_TEMP | SvUTF8(sv2)); 8104 } 8105 pv1 = SvPV_flags_const(sv1, cur1, flags); 8106 } 8107 8108 if (!sv2){ 8109 pv2 = ""; 8110 cur2 = 0; 8111 } 8112 else 8113 pv2 = SvPV_flags_const(sv2, cur2, flags); 8114 8115 if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { 8116 /* Differing utf8ness. */ 8117 if (SvUTF8(sv1)) { 8118 /* sv1 is the UTF-8 one */ 8119 return bytes_cmp_utf8((const U8*)pv2, cur2, 8120 (const U8*)pv1, cur1) == 0; 8121 } 8122 else { 8123 /* sv2 is the UTF-8 one */ 8124 return bytes_cmp_utf8((const U8*)pv1, cur1, 8125 (const U8*)pv2, cur2) == 0; 8126 } 8127 } 8128 8129 if (cur1 == cur2) 8130 return (pv1 == pv2) || memEQ(pv1, pv2, cur1); 8131 else 8132 return 0; 8133 } 8134 8135 /* 8136 =for apidoc sv_streq_flags 8137 8138 Returns a boolean indicating whether the strings in the two SVs are 8139 identical. If the flags argument has the C<SV_GMAGIC> bit set, it handles 8140 get-magic too. Will coerce its args to strings if necessary. Treats 8141 C<NULL> as undef. Correctly handles the UTF8 flag. 8142 8143 If flags does not have the C<SV_SKIP_OVERLOAD> bit set, an attempt to use 8144 C<eq> overloading will be made. If such overloading does not exist or the 8145 flag is set, then regular string comparison will be used instead. 8146 8147 =for apidoc sv_streq 8148 8149 A convenient shortcut for calling C<sv_streq_flags> with the C<SV_GMAGIC> 8150 flag. This function basically behaves like the Perl code C<$sv1 eq $sv2>. 8151 8152 =cut 8153 */ 8154 8155 bool 8156 Perl_sv_streq_flags(pTHX_ SV *sv1, SV *sv2, const U32 flags) 8157 { 8158 PERL_ARGS_ASSERT_SV_STREQ_FLAGS; 8159 8160 if(flags & SV_GMAGIC) { 8161 if(sv1) 8162 SvGETMAGIC(sv1); 8163 if(sv2) 8164 SvGETMAGIC(sv2); 8165 } 8166 8167 /* Treat NULL as undef */ 8168 if(!sv1) 8169 sv1 = &PL_sv_undef; 8170 if(!sv2) 8171 sv2 = &PL_sv_undef; 8172 8173 if(!(flags & SV_SKIP_OVERLOAD) && 8174 (SvAMAGIC(sv1) || SvAMAGIC(sv2))) { 8175 SV *ret = amagic_call(sv1, sv2, seq_amg, 0); 8176 if(ret) 8177 return SvTRUE(ret); 8178 } 8179 8180 return sv_eq_flags(sv1, sv2, 0); 8181 } 8182 8183 /* 8184 =for apidoc sv_numeq_flags 8185 8186 Returns a boolean indicating whether the numbers in the two SVs are 8187 identical. If the flags argument has the C<SV_GMAGIC> bit set, it handles 8188 get-magic too. Will coerce its args to numbers if necessary. Treats 8189 C<NULL> as undef. 8190 8191 If flags does not have the C<SV_SKIP_OVERLOAD> bit set, an attempt to use 8192 C<==> overloading will be made. If such overloading does not exist or the 8193 flag is set, then regular numerical comparison will be used instead. 8194 8195 =for apidoc sv_numeq 8196 8197 A convenient shortcut for calling C<sv_numeq_flags> with the C<SV_GMAGIC> 8198 flag. This function basically behaves like the Perl code C<$sv1 == $sv2>. 8199 8200 =cut 8201 */ 8202 8203 bool 8204 Perl_sv_numeq_flags(pTHX_ SV *sv1, SV *sv2, const U32 flags) 8205 { 8206 PERL_ARGS_ASSERT_SV_NUMEQ_FLAGS; 8207 8208 if(flags & SV_GMAGIC) { 8209 if(sv1) 8210 SvGETMAGIC(sv1); 8211 if(sv2) 8212 SvGETMAGIC(sv2); 8213 } 8214 8215 /* Treat NULL as undef */ 8216 if(!sv1) 8217 sv1 = &PL_sv_undef; 8218 if(!sv2) 8219 sv2 = &PL_sv_undef; 8220 8221 if(!(flags & SV_SKIP_OVERLOAD) && 8222 (SvAMAGIC(sv1) || SvAMAGIC(sv2))) { 8223 SV *ret = amagic_call(sv1, sv2, eq_amg, 0); 8224 if(ret) 8225 return SvTRUE(ret); 8226 } 8227 8228 return do_ncmp(sv1, sv2) == 0; 8229 } 8230 8231 /* 8232 =for apidoc sv_cmp 8233 8234 Compares the strings in two SVs. Returns -1, 0, or 1 indicating whether the 8235 string in C<sv1> is less than, equal to, or greater than the string in 8236 C<sv2>. Is UTF-8 and S<C<'use bytes'>> aware, handles get magic, and will 8237 coerce its args to strings if necessary. See also C<L</sv_cmp_locale>>. 8238 8239 =for apidoc sv_cmp_flags 8240 8241 Compares the strings in two SVs. Returns -1, 0, or 1 indicating whether the 8242 string in C<sv1> is less than, equal to, or greater than the string in 8243 C<sv2>. Is UTF-8 and S<C<'use bytes'>> aware and will coerce its args to strings 8244 if necessary. If the flags has the C<SV_GMAGIC> bit set, it handles get magic. See 8245 also C<L</sv_cmp_locale_flags>>. 8246 8247 =cut 8248 */ 8249 8250 I32 8251 Perl_sv_cmp(pTHX_ SV *const sv1, SV *const sv2) 8252 { 8253 return sv_cmp_flags(sv1, sv2, SV_GMAGIC); 8254 } 8255 8256 I32 8257 Perl_sv_cmp_flags(pTHX_ SV *const sv1, SV *const sv2, 8258 const U32 flags) 8259 { 8260 STRLEN cur1, cur2; 8261 const char *pv1, *pv2; 8262 I32 cmp; 8263 SV *svrecode = NULL; 8264 8265 if (!sv1) { 8266 pv1 = ""; 8267 cur1 = 0; 8268 } 8269 else 8270 pv1 = SvPV_flags_const(sv1, cur1, flags); 8271 8272 if (!sv2) { 8273 pv2 = ""; 8274 cur2 = 0; 8275 } 8276 else 8277 pv2 = SvPV_flags_const(sv2, cur2, flags); 8278 8279 if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { 8280 /* Differing utf8ness. */ 8281 if (SvUTF8(sv1)) { 8282 const int retval = -bytes_cmp_utf8((const U8*)pv2, cur2, 8283 (const U8*)pv1, cur1); 8284 return retval ? retval < 0 ? -1 : +1 : 0; 8285 } 8286 else { 8287 const int retval = bytes_cmp_utf8((const U8*)pv1, cur1, 8288 (const U8*)pv2, cur2); 8289 return retval ? retval < 0 ? -1 : +1 : 0; 8290 } 8291 } 8292 8293 /* Here, if both are non-NULL, then they have the same UTF8ness. */ 8294 8295 if (!cur1) { 8296 cmp = cur2 ? -1 : 0; 8297 } else if (!cur2) { 8298 cmp = 1; 8299 } else { 8300 STRLEN shortest_len = cur1 < cur2 ? cur1 : cur2; 8301 8302 #ifdef EBCDIC 8303 if (! DO_UTF8(sv1)) { 8304 #endif 8305 const I32 retval = memcmp((const void*)pv1, 8306 (const void*)pv2, 8307 shortest_len); 8308 if (retval) { 8309 cmp = retval < 0 ? -1 : 1; 8310 } else if (cur1 == cur2) { 8311 cmp = 0; 8312 } else { 8313 cmp = cur1 < cur2 ? -1 : 1; 8314 } 8315 #ifdef EBCDIC 8316 } 8317 else { /* Both are to be treated as UTF-EBCDIC */ 8318 8319 /* EBCDIC UTF-8 is complicated by the fact that it is based on I8 8320 * which remaps code points 0-255. We therefore generally have to 8321 * unmap back to the original values to get an accurate comparison. 8322 * But we don't have to do that for UTF-8 invariants, as by 8323 * definition, they aren't remapped, nor do we have to do it for 8324 * above-latin1 code points, as they also aren't remapped. (This 8325 * code also works on ASCII platforms, but the memcmp() above is 8326 * much faster). */ 8327 8328 const char *e = pv1 + shortest_len; 8329 8330 /* Find the first bytes that differ between the two strings */ 8331 while (pv1 < e && *pv1 == *pv2) { 8332 pv1++; 8333 pv2++; 8334 } 8335 8336 8337 if (pv1 == e) { /* Are the same all the way to the end */ 8338 if (cur1 == cur2) { 8339 cmp = 0; 8340 } else { 8341 cmp = cur1 < cur2 ? -1 : 1; 8342 } 8343 } 8344 else /* Here *pv1 and *pv2 are not equal, but all bytes earlier 8345 * in the strings were. The current bytes may or may not be 8346 * at the beginning of a character. But neither or both are 8347 * (or else earlier bytes would have been different). And 8348 * if we are in the middle of a character, the two 8349 * characters have the same number of bytes 8350 * (because in this case the start bytes are the same, and 8351 * the start bytes encode the character's length). */ 8352 if (UTF8_IS_INVARIANT(*pv1)) 8353 { 8354 /* If both are invariants; can just compare directly */ 8355 if (UTF8_IS_INVARIANT(*pv2)) { 8356 cmp = ((U8) *pv1 < (U8) *pv2) ? -1 : 1; 8357 } 8358 else /* Since *pv1 is invariant, it is the whole character, 8359 which means it is at the beginning of a character. 8360 That means pv2 is also at the beginning of a 8361 character (see earlier comment). Since it isn't 8362 invariant, it must be a start byte. If it starts a 8363 character whose code point is above 255, that 8364 character is greater than any single-byte char, which 8365 *pv1 is */ 8366 if (UTF8_IS_ABOVE_LATIN1_START(*pv2)) 8367 { 8368 cmp = -1; 8369 } 8370 else { 8371 /* Here, pv2 points to a character composed of 2 bytes 8372 * whose code point is < 256. Get its code point and 8373 * compare with *pv1 */ 8374 cmp = ((U8) *pv1 < EIGHT_BIT_UTF8_TO_NATIVE(*pv2, *(pv2 + 1))) 8375 ? -1 8376 : 1; 8377 } 8378 } 8379 else /* The code point starting at pv1 isn't a single byte */ 8380 if (UTF8_IS_INVARIANT(*pv2)) 8381 { 8382 /* But here, the code point starting at *pv2 is a single byte, 8383 * and so *pv1 must begin a character, hence is a start byte. 8384 * If that character is above 255, it is larger than any 8385 * single-byte char, which *pv2 is */ 8386 if (UTF8_IS_ABOVE_LATIN1_START(*pv1)) { 8387 cmp = 1; 8388 } 8389 else { 8390 /* Here, pv1 points to a character composed of 2 bytes 8391 * whose code point is < 256. Get its code point and 8392 * compare with the single byte character *pv2 */ 8393 cmp = (EIGHT_BIT_UTF8_TO_NATIVE(*pv1, *(pv1 + 1)) < (U8) *pv2) 8394 ? -1 8395 : 1; 8396 } 8397 } 8398 else /* Here, we've ruled out either *pv1 and *pv2 being 8399 invariant. That means both are part of variants, but not 8400 necessarily at the start of a character */ 8401 if ( UTF8_IS_ABOVE_LATIN1_START(*pv1) 8402 || UTF8_IS_ABOVE_LATIN1_START(*pv2)) 8403 { 8404 /* Here, at least one is the start of a character, which means 8405 * the other is also a start byte. And the code point of at 8406 * least one of the characters is above 255. It is a 8407 * characteristic of UTF-EBCDIC that all start bytes for 8408 * above-latin1 code points are well behaved as far as code 8409 * point comparisons go, and all are larger than all other 8410 * start bytes, so the comparison with those is also well 8411 * behaved */ 8412 cmp = ((U8) *pv1 < (U8) *pv2) ? -1 : 1; 8413 } 8414 else { 8415 /* Here both *pv1 and *pv2 are part of variant characters. 8416 * They could be both continuations, or both start characters. 8417 * (One or both could even be an illegal start character (for 8418 * an overlong) which for the purposes of sorting we treat as 8419 * legal. */ 8420 if (UTF8_IS_CONTINUATION(*pv1)) { 8421 8422 /* If they are continuations for code points above 255, 8423 * then comparing the current byte is sufficient, as there 8424 * is no remapping of these and so the comparison is 8425 * well-behaved. We determine if they are such 8426 * continuations by looking at the preceding byte. It 8427 * could be a start byte, from which we can tell if it is 8428 * for an above 255 code point. Or it could be a 8429 * continuation, which means the character occupies at 8430 * least 3 bytes, so must be above 255. */ 8431 if ( UTF8_IS_CONTINUATION(*(pv2 - 1)) 8432 || UTF8_IS_ABOVE_LATIN1_START(*(pv2 -1))) 8433 { 8434 cmp = ((U8) *pv1 < (U8) *pv2) ? -1 : 1; 8435 goto cmp_done; 8436 } 8437 8438 /* Here, the continuations are for code points below 256; 8439 * back up one to get to the start byte */ 8440 pv1--; 8441 pv2--; 8442 } 8443 8444 /* We need to get the actual native code point of each of these 8445 * variants in order to compare them */ 8446 cmp = ( EIGHT_BIT_UTF8_TO_NATIVE(*pv1, *(pv1 + 1)) 8447 < EIGHT_BIT_UTF8_TO_NATIVE(*pv2, *(pv2 + 1))) 8448 ? -1 8449 : 1; 8450 } 8451 } 8452 cmp_done: ; 8453 #endif 8454 } 8455 8456 SvREFCNT_dec(svrecode); 8457 8458 return cmp; 8459 } 8460 8461 /* 8462 =for apidoc sv_cmp_locale 8463 8464 Compares the strings in two SVs in a locale-aware manner. Is UTF-8 and 8465 S<C<'use bytes'>> aware, handles get magic, and will coerce its args to strings 8466 if necessary. See also C<L</sv_cmp>>. 8467 8468 =for apidoc sv_cmp_locale_flags 8469 8470 Compares the strings in two SVs in a locale-aware manner. Is UTF-8 and 8471 S<C<'use bytes'>> aware and will coerce its args to strings if necessary. If 8472 the flags contain C<SV_GMAGIC>, it handles get magic. See also 8473 C<L</sv_cmp_flags>>. 8474 8475 =cut 8476 */ 8477 8478 I32 8479 Perl_sv_cmp_locale(pTHX_ SV *const sv1, SV *const sv2) 8480 { 8481 return sv_cmp_locale_flags(sv1, sv2, SV_GMAGIC); 8482 } 8483 8484 I32 8485 Perl_sv_cmp_locale_flags(pTHX_ SV *const sv1, SV *const sv2, 8486 const U32 flags) 8487 { 8488 #ifdef USE_LOCALE_COLLATE 8489 8490 char *pv1, *pv2; 8491 STRLEN len1, len2; 8492 I32 retval; 8493 8494 if (PL_collation_standard) 8495 goto raw_compare; 8496 8497 len1 = len2 = 0; 8498 8499 /* Revert to using raw compare if both operands exist, but either one 8500 * doesn't transform properly for collation */ 8501 if (sv1 && sv2) { 8502 pv1 = sv_collxfrm_flags(sv1, &len1, flags); 8503 if (! pv1) { 8504 goto raw_compare; 8505 } 8506 pv2 = sv_collxfrm_flags(sv2, &len2, flags); 8507 if (! pv2) { 8508 goto raw_compare; 8509 } 8510 } 8511 else { 8512 pv1 = sv1 ? sv_collxfrm_flags(sv1, &len1, flags) : (char *) NULL; 8513 pv2 = sv2 ? sv_collxfrm_flags(sv2, &len2, flags) : (char *) NULL; 8514 } 8515 8516 if (!pv1 || !len1) { 8517 if (pv2 && len2) 8518 return -1; 8519 else 8520 goto raw_compare; 8521 } 8522 else { 8523 if (!pv2 || !len2) 8524 return 1; 8525 } 8526 8527 retval = memcmp((void*)pv1, (void*)pv2, len1 < len2 ? len1 : len2); 8528 8529 if (retval) 8530 return retval < 0 ? -1 : 1; 8531 8532 /* 8533 * When the result of collation is equality, that doesn't mean 8534 * that there are no differences -- some locales exclude some 8535 * characters from consideration. So to avoid false equalities, 8536 * we use the raw string as a tiebreaker. 8537 */ 8538 8539 raw_compare: 8540 /* FALLTHROUGH */ 8541 8542 #else 8543 PERL_UNUSED_ARG(flags); 8544 #endif /* USE_LOCALE_COLLATE */ 8545 8546 return sv_cmp(sv1, sv2); 8547 } 8548 8549 8550 #ifdef USE_LOCALE_COLLATE 8551 8552 /* 8553 =for apidoc sv_collxfrm 8554 8555 This calls C<sv_collxfrm_flags> with the SV_GMAGIC flag. See 8556 C<L</sv_collxfrm_flags>>. 8557 8558 =for apidoc sv_collxfrm_flags 8559 8560 Add Collate Transform magic to an SV if it doesn't already have it. If the 8561 flags contain C<SV_GMAGIC>, it handles get-magic. 8562 8563 Any scalar variable may carry C<PERL_MAGIC_collxfrm> magic that contains the 8564 scalar data of the variable, but transformed to such a format that a normal 8565 memory comparison can be used to compare the data according to the locale 8566 settings. 8567 8568 =cut 8569 */ 8570 8571 char * 8572 Perl_sv_collxfrm_flags(pTHX_ SV *const sv, STRLEN *const nxp, const I32 flags) 8573 { 8574 MAGIC *mg; 8575 8576 PERL_ARGS_ASSERT_SV_COLLXFRM_FLAGS; 8577 8578 mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_collxfrm) : (MAGIC *) NULL; 8579 8580 /* If we don't have collation magic on 'sv', or the locale has changed 8581 * since the last time we calculated it, get it and save it now */ 8582 if (!mg || !mg->mg_ptr || *(U32*)mg->mg_ptr != PL_collation_ix) { 8583 const char *s; 8584 char *xf; 8585 STRLEN len, xlen; 8586 8587 /* Free the old space */ 8588 if (mg) 8589 Safefree(mg->mg_ptr); 8590 8591 s = SvPV_flags_const(sv, len, flags); 8592 if ((xf = mem_collxfrm_(s, len, &xlen, cBOOL(SvUTF8(sv))))) { 8593 if (! mg) { 8594 mg = sv_magicext(sv, 0, PERL_MAGIC_collxfrm, &PL_vtbl_collxfrm, 8595 0, 0); 8596 assert(mg); 8597 } 8598 mg->mg_ptr = xf; 8599 mg->mg_len = xlen; 8600 } 8601 else { 8602 if (mg) { 8603 mg->mg_ptr = NULL; 8604 mg->mg_len = -1; 8605 } 8606 } 8607 } 8608 8609 if (mg && mg->mg_ptr) { 8610 *nxp = mg->mg_len; 8611 return mg->mg_ptr + sizeof(PL_collation_ix); 8612 } 8613 else { 8614 *nxp = 0; 8615 return NULL; 8616 } 8617 } 8618 8619 #endif /* USE_LOCALE_COLLATE */ 8620 8621 static char * 8622 S_sv_gets_append_to_utf8(pTHX_ SV *const sv, PerlIO *const fp, I32 append) 8623 { 8624 SV * const tsv = newSV_type(SVt_NULL); 8625 ENTER; 8626 SAVEFREESV(tsv); 8627 sv_gets(tsv, fp, 0); 8628 sv_utf8_upgrade_nomg(tsv); 8629 SvCUR_set(sv,append); 8630 sv_catsv(sv,tsv); 8631 LEAVE; 8632 return (SvCUR(sv) - append) ? SvPVX(sv) : NULL; 8633 } 8634 8635 static char * 8636 S_sv_gets_read_record(pTHX_ SV *const sv, PerlIO *const fp, I32 append) 8637 { 8638 SSize_t bytesread; 8639 const STRLEN recsize = SvUV(SvRV(PL_rs)); /* RsRECORD() guarantees > 0. */ 8640 /* Grab the size of the record we're getting */ 8641 char *buffer = SvGROW(sv, (STRLEN)(recsize + append + 1)) + append; 8642 8643 /* Go yank in */ 8644 #ifdef __VMS 8645 int fd; 8646 Stat_t st; 8647 8648 /* With a true, record-oriented file on VMS, we need to use read directly 8649 * to ensure that we respect RMS record boundaries. The user is responsible 8650 * for providing a PL_rs value that corresponds to the FAB$W_MRS (maximum 8651 * record size) field. N.B. This is likely to produce invalid results on 8652 * varying-width character data when a record ends mid-character. 8653 */ 8654 fd = PerlIO_fileno(fp); 8655 if (fd != -1 8656 && PerlLIO_fstat(fd, &st) == 0 8657 && (st.st_fab_rfm == FAB$C_VAR 8658 || st.st_fab_rfm == FAB$C_VFC 8659 || st.st_fab_rfm == FAB$C_FIX)) { 8660 8661 bytesread = PerlLIO_read(fd, buffer, recsize); 8662 } 8663 else /* in-memory file from PerlIO::Scalar 8664 * or not a record-oriented file 8665 */ 8666 #endif 8667 { 8668 bytesread = PerlIO_read(fp, buffer, recsize); 8669 8670 /* At this point, the logic in sv_get() means that sv will 8671 be treated as utf-8 if the handle is utf8. 8672 */ 8673 if (PerlIO_isutf8(fp) && bytesread > 0) { 8674 char *bend = buffer + bytesread; 8675 char *bufp = buffer; 8676 size_t charcount = 0; 8677 bool charstart = TRUE; 8678 STRLEN skip = 0; 8679 8680 while (charcount < recsize) { 8681 /* count accumulated characters */ 8682 while (bufp < bend) { 8683 if (charstart) { 8684 skip = UTF8SKIP(bufp); 8685 } 8686 if (bufp + skip > bend) { 8687 /* partial at the end */ 8688 charstart = FALSE; 8689 break; 8690 } 8691 else { 8692 ++charcount; 8693 bufp += skip; 8694 charstart = TRUE; 8695 } 8696 } 8697 8698 if (charcount < recsize) { 8699 STRLEN readsize; 8700 STRLEN bufp_offset = bufp - buffer; 8701 SSize_t morebytesread; 8702 8703 /* originally I read enough to fill any incomplete 8704 character and the first byte of the next 8705 character if needed, but if there's many 8706 multi-byte encoded characters we're going to be 8707 making a read call for every character beyond 8708 the original read size. 8709 8710 So instead, read the rest of the character if 8711 any, and enough bytes to match at least the 8712 start bytes for each character we're going to 8713 read. 8714 */ 8715 if (charstart) 8716 readsize = recsize - charcount; 8717 else 8718 readsize = skip - (bend - bufp) + recsize - charcount - 1; 8719 buffer = SvGROW(sv, append + bytesread + readsize + 1) + append; 8720 bend = buffer + bytesread; 8721 morebytesread = PerlIO_read(fp, bend, readsize); 8722 if (morebytesread <= 0) { 8723 /* we're done, if we still have incomplete 8724 characters the check code in sv_gets() will 8725 warn about them. 8726 8727 I'd originally considered doing 8728 PerlIO_ungetc() on all but the lead 8729 character of the incomplete character, but 8730 read() doesn't do that, so I don't. 8731 */ 8732 break; 8733 } 8734 8735 /* prepare to scan some more */ 8736 bytesread += morebytesread; 8737 bend = buffer + bytesread; 8738 bufp = buffer + bufp_offset; 8739 } 8740 } 8741 } 8742 } 8743 8744 if (bytesread < 0) 8745 bytesread = 0; 8746 SvCUR_set(sv, bytesread + append); 8747 buffer[bytesread] = '\0'; 8748 return (SvCUR(sv) - append) ? SvPVX(sv) : NULL; 8749 } 8750 8751 /* 8752 =for apidoc sv_gets 8753 8754 Get a line from the filehandle and store it into the SV, optionally 8755 appending to the currently-stored string. If C<append> is not 0, the 8756 line is appended to the SV instead of overwriting it. C<append> should 8757 be set to the byte offset that the appended string should start at 8758 in the SV (typically, C<SvCUR(sv)> is a suitable choice). 8759 8760 =cut 8761 */ 8762 8763 char * 8764 Perl_sv_gets(pTHX_ SV *const sv, PerlIO *const fp, I32 append) 8765 { 8766 const char *rsptr; 8767 STRLEN rslen; 8768 STDCHAR rslast; 8769 STDCHAR *bp; 8770 SSize_t cnt; 8771 int i = 0; 8772 int rspara = 0; 8773 8774 PERL_ARGS_ASSERT_SV_GETS; 8775 8776 if (SvTHINKFIRST(sv)) 8777 sv_force_normal_flags(sv, append ? 0 : SV_COW_DROP_PV); 8778 /* XXX. If you make this PVIV, then copy on write can copy scalars read 8779 from <>. 8780 However, perlbench says it's slower, because the existing swipe code 8781 is faster than copy on write. 8782 Swings and roundabouts. */ 8783 SvUPGRADE(sv, SVt_PV); 8784 8785 if (append) { 8786 /* line is going to be appended to the existing buffer in the sv */ 8787 if (PerlIO_isutf8(fp)) { 8788 if (!SvUTF8(sv)) { 8789 sv_utf8_upgrade_nomg(sv); 8790 sv_pos_u2b(sv,&append,0); 8791 } 8792 } else if (SvUTF8(sv)) { 8793 return S_sv_gets_append_to_utf8(aTHX_ sv, fp, append); 8794 } 8795 } 8796 8797 SvPOK_only(sv); 8798 if (!append) { 8799 /* not appending - "clear" the string by setting SvCUR to 0, 8800 * the pv is still available. */ 8801 SvCUR_set(sv,0); 8802 } 8803 if (PerlIO_isutf8(fp)) 8804 SvUTF8_on(sv); 8805 8806 if (IN_PERL_COMPILETIME) { 8807 /* we always read code in line mode */ 8808 rsptr = "\n"; 8809 rslen = 1; 8810 } 8811 else if (RsSNARF(PL_rs)) { 8812 /* If it is a regular disk file use size from stat() as estimate 8813 of amount we are going to read -- may result in mallocing 8814 more memory than we really need if the layers below reduce 8815 the size we read (e.g. CRLF or a gzip layer). 8816 */ 8817 Stat_t st; 8818 int fd = PerlIO_fileno(fp); 8819 if (fd >= 0 && (PerlLIO_fstat(fd, &st) == 0) && S_ISREG(st.st_mode)) { 8820 const Off_t offset = PerlIO_tell(fp); 8821 if (offset != (Off_t) -1 && st.st_size + append > offset) { 8822 #ifdef PERL_COPY_ON_WRITE 8823 /* Add an extra byte for the sake of copy-on-write's 8824 * buffer reference count. */ 8825 (void) SvGROW(sv, (STRLEN)((st.st_size - offset) + append + 2)); 8826 #else 8827 (void) SvGROW(sv, (STRLEN)((st.st_size - offset) + append + 1)); 8828 #endif 8829 } 8830 } 8831 rsptr = NULL; 8832 rslen = 0; 8833 } 8834 else if (RsRECORD(PL_rs)) { 8835 return S_sv_gets_read_record(aTHX_ sv, fp, append); 8836 } 8837 else if (RsPARA(PL_rs)) { 8838 rsptr = "\n\n"; 8839 rslen = 2; 8840 rspara = 1; 8841 } 8842 else { 8843 /* Get $/ i.e. PL_rs into same encoding as stream wants */ 8844 if (PerlIO_isutf8(fp)) { 8845 rsptr = SvPVutf8(PL_rs, rslen); 8846 } 8847 else { 8848 if (SvUTF8(PL_rs)) { 8849 if (!sv_utf8_downgrade(PL_rs, TRUE)) { 8850 Perl_croak(aTHX_ "Wide character in $/"); 8851 } 8852 } 8853 /* extract the raw pointer to the record separator */ 8854 rsptr = SvPV_const(PL_rs, rslen); 8855 } 8856 } 8857 8858 /* rslast is the last character in the record separator 8859 * note we don't use rslast except when rslen is true, so the 8860 * null assign is a placeholder. */ 8861 rslast = rslen ? rsptr[rslen - 1] : '\0'; 8862 8863 if (rspara) { /* have to do this both before and after */ 8864 /* to make sure file boundaries work right */ 8865 while (1) { 8866 if (PerlIO_eof(fp)) 8867 return 0; 8868 i = PerlIO_getc(fp); 8869 if (i != '\n') { 8870 if (i == -1) 8871 return 0; 8872 PerlIO_ungetc(fp,i); 8873 break; 8874 } 8875 } 8876 } 8877 8878 /* See if we know enough about I/O mechanism to cheat it ! */ 8879 8880 /* This used to be #ifdef test - it is made run-time test for ease 8881 of abstracting out stdio interface. One call should be cheap 8882 enough here - and may even be a macro allowing compile 8883 time optimization. 8884 */ 8885 8886 if (PerlIO_fast_gets(fp)) { 8887 /* 8888 * We can do buffer based IO operations on this filehandle. 8889 * 8890 * This means we can bypass a lot of subcalls and process 8891 * the buffer directly, it also means we know the upper bound 8892 * on the amount of data we might read of the current buffer 8893 * into our sv. Knowing this allows us to preallocate the pv 8894 * to be able to hold that maximum, which allows us to simplify 8895 * a lot of logic. */ 8896 8897 /* 8898 * We're going to steal some values from the stdio struct 8899 * and put EVERYTHING in the innermost loop into registers. 8900 */ 8901 STDCHAR *ptr; /* pointer into fp's read-ahead buffer */ 8902 STRLEN bpx; /* length of the data in the target sv 8903 used to fix pointers after a SvGROW */ 8904 I32 shortbuffered; /* If the pv buffer is shorter than the amount 8905 of data left in the read-ahead buffer. 8906 If 0 then the pv buffer can hold the full 8907 amount left, otherwise this is the amount it 8908 can hold. */ 8909 8910 /* Here is some breathtakingly efficient cheating */ 8911 8912 /* When you read the following logic resist the urge to think 8913 * of record separators that are 1 byte long. They are an 8914 * uninteresting special (simple) case. 8915 * 8916 * Instead think of record separators which are at least 2 bytes 8917 * long, and keep in mind that we need to deal with such 8918 * separators when they cross a read-ahead buffer boundary. 8919 * 8920 * Also consider that we need to gracefully deal with separators 8921 * that may be longer than a single read ahead buffer. 8922 * 8923 * Lastly do not forget we want to copy the delimiter as well. We 8924 * are copying all data in the file _up_to_and_including_ the separator 8925 * itself. 8926 * 8927 * Now that you have all that in mind here is what is happening below: 8928 * 8929 * 1. When we first enter the loop we do some memory book keeping to see 8930 * how much free space there is in the target SV. (This sub assumes that 8931 * it is operating on the same SV most of the time via $_ and that it is 8932 * going to be able to reuse the same pv buffer each call.) If there is 8933 * "enough" room then we set "shortbuffered" to how much space there is 8934 * and start reading forward. 8935 * 8936 * 2. When we scan forward we copy from the read-ahead buffer to the target 8937 * SV's pv buffer. While we go we watch for the end of the read-ahead buffer, 8938 * and the end of the of pv, as well as for the "rslast", which is the last 8939 * char of the separator. 8940 * 8941 * 3. When scanning forward if we see rslast then we jump backwards in *pv* 8942 * (which has a "complete" record up to the point we saw rslast) and check 8943 * it to see if it matches the separator. If it does we are done. If it doesn't 8944 * we continue on with the scan/copy. 8945 * 8946 * 4. If we run out of read-ahead buffer (cnt goes to 0) then we have to get 8947 * the IO system to read the next buffer. We do this by doing a getc(), which 8948 * returns a single char read (or EOF), and prefills the buffer, and also 8949 * allows us to find out how full the buffer is. We use this information to 8950 * SvGROW() the sv to the size remaining in the buffer, after which we copy 8951 * the returned single char into the target sv, and then go back into scan 8952 * forward mode. 8953 * 8954 * 5. If we run out of write-buffer then we SvGROW() it by the size of the 8955 * remaining space in the read-buffer. 8956 * 8957 * Note that this code despite its twisty-turny nature is pretty darn slick. 8958 * It manages single byte separators, multi-byte cross boundary separators, 8959 * and cross-read-buffer separators cleanly and efficiently at the cost 8960 * of potentially greatly overallocating the target SV. 8961 * 8962 * Yves 8963 */ 8964 8965 8966 /* get the number of bytes remaining in the read-ahead buffer 8967 * on first call on a given fp this will return 0.*/ 8968 cnt = PerlIO_get_cnt(fp); 8969 8970 /* make sure we have the room */ 8971 if ((I32)(SvLEN(sv) - append) <= cnt + 1) { 8972 /* Not room for all of it 8973 if we are looking for a separator and room for some 8974 */ 8975 if (rslen && cnt > 80 && (I32)SvLEN(sv) > append) { 8976 /* just process what we have room for */ 8977 shortbuffered = cnt - SvLEN(sv) + append + 1; 8978 cnt -= shortbuffered; 8979 } 8980 else { 8981 /* ensure that the target sv has enough room to hold 8982 * the rest of the read-ahead buffer */ 8983 shortbuffered = 0; 8984 /* remember that cnt can be negative */ 8985 SvGROW(sv, (STRLEN)(append + (cnt <= 0 ? 2 : (cnt + 1)))); 8986 } 8987 } 8988 else { 8989 /* we have enough room to hold the full buffer, lets scream */ 8990 shortbuffered = 0; 8991 } 8992 8993 /* extract the pointer to sv's string buffer, offset by append as necessary */ 8994 bp = (STDCHAR*)SvPVX_const(sv) + append; /* move these two too to registers */ 8995 /* extract the point to the read-ahead buffer */ 8996 ptr = (STDCHAR*)PerlIO_get_ptr(fp); 8997 8998 /* some trace debug output */ 8999 DEBUG_P(PerlIO_printf(Perl_debug_log, 9000 "Screamer: entering, ptr=%" UVuf ", cnt=%ld\n",PTR2UV(ptr),(long)cnt)); 9001 DEBUG_P(PerlIO_printf(Perl_debug_log, 9002 "Screamer: entering: PerlIO * thinks ptr=%" UVuf ", cnt=%" IVdf ", base=%" 9003 UVuf "\n", 9004 PTR2UV(PerlIO_get_ptr(fp)), (IV)PerlIO_get_cnt(fp), 9005 PTR2UV(PerlIO_has_base(fp) ? PerlIO_get_base(fp) : 0))); 9006 9007 for (;;) { 9008 screamer: 9009 /* if there is stuff left in the read-ahead buffer */ 9010 if (cnt > 0) { 9011 /* if there is a separator */ 9012 if (rslen) { 9013 /* find next rslast */ 9014 STDCHAR *p; 9015 9016 /* shortcut common case of blank line */ 9017 cnt--; 9018 if ((*bp++ = *ptr++) == rslast) 9019 goto thats_all_folks; 9020 9021 p = (STDCHAR *)memchr(ptr, rslast, cnt); 9022 if (p) { 9023 SSize_t got = p - ptr + 1; 9024 Copy(ptr, bp, got, STDCHAR); 9025 ptr += got; 9026 bp += got; 9027 cnt -= got; 9028 goto thats_all_folks; 9029 } 9030 Copy(ptr, bp, cnt, STDCHAR); 9031 ptr += cnt; 9032 bp += cnt; 9033 cnt = 0; 9034 } 9035 else { 9036 /* no separator, slurp the full buffer */ 9037 Copy(ptr, bp, cnt, char); /* this | eat */ 9038 bp += cnt; /* screams | dust */ 9039 ptr += cnt; /* louder | sed :-) */ 9040 cnt = 0; 9041 assert (!shortbuffered); 9042 goto cannot_be_shortbuffered; 9043 } 9044 } 9045 9046 if (shortbuffered) { /* oh well, must extend */ 9047 /* we didn't have enough room to fit the line into the target buffer 9048 * so we must extend the target buffer and keep going */ 9049 cnt = shortbuffered; 9050 shortbuffered = 0; 9051 bpx = bp - (STDCHAR*)SvPVX_const(sv); /* box up before relocation */ 9052 SvCUR_set(sv, bpx); 9053 /* extned the target sv's buffer so it can hold the full read-ahead buffer */ 9054 SvGROW(sv, SvLEN(sv) + append + cnt + 2); 9055 bp = (STDCHAR*)SvPVX_const(sv) + bpx; /* unbox after relocation */ 9056 continue; 9057 } 9058 9059 cannot_be_shortbuffered: 9060 /* we need to refill the read-ahead buffer if possible */ 9061 9062 DEBUG_P(PerlIO_printf(Perl_debug_log, 9063 "Screamer: going to getc, ptr=%" UVuf ", cnt=%" IVdf "\n", 9064 PTR2UV(ptr),(IV)cnt)); 9065 PerlIO_set_ptrcnt(fp, (STDCHAR*)ptr, cnt); /* deregisterize cnt and ptr */ 9066 9067 DEBUG_Pv(PerlIO_printf(Perl_debug_log, 9068 "Screamer: pre: FILE * thinks ptr=%" UVuf ", cnt=%" IVdf ", base=%" UVuf "\n", 9069 PTR2UV(PerlIO_get_ptr(fp)), (IV)PerlIO_get_cnt(fp), 9070 PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); 9071 9072 /* 9073 call PerlIO_getc() to let it prefill the lookahead buffer 9074 9075 This used to call 'filbuf' in stdio form, but as that behaves like 9076 getc when cnt <= 0 we use PerlIO_getc here to avoid introducing 9077 another abstraction. 9078 9079 Note we have to deal with the char in 'i' if we are not at EOF 9080 */ 9081 bpx = bp - (STDCHAR*)SvPVX_const(sv); 9082 /* signals might be called here, possibly modifying sv */ 9083 i = PerlIO_getc(fp); /* get more characters */ 9084 bp = (STDCHAR*)SvPVX_const(sv) + bpx; 9085 9086 DEBUG_Pv(PerlIO_printf(Perl_debug_log, 9087 "Screamer: post: FILE * thinks ptr=%" UVuf ", cnt=%" IVdf ", base=%" UVuf "\n", 9088 PTR2UV(PerlIO_get_ptr(fp)), (IV)PerlIO_get_cnt(fp), 9089 PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); 9090 9091 /* find out how much is left in the read-ahead buffer, and rextract its pointer */ 9092 cnt = PerlIO_get_cnt(fp); 9093 ptr = (STDCHAR*)PerlIO_get_ptr(fp); /* reregisterize cnt and ptr */ 9094 DEBUG_P(PerlIO_printf(Perl_debug_log, 9095 "Screamer: after getc, ptr=%" UVuf ", cnt=%" IVdf "\n", 9096 PTR2UV(ptr),(IV)cnt)); 9097 9098 if (i == EOF) /* all done for ever? */ 9099 goto thats_really_all_folks; 9100 9101 /* make sure we have enough space in the target sv */ 9102 bpx = bp - (STDCHAR*)SvPVX_const(sv); /* box up before relocation */ 9103 SvCUR_set(sv, bpx); 9104 SvGROW(sv, bpx + cnt + 2); 9105 bp = (STDCHAR*)SvPVX_const(sv) + bpx; /* unbox after relocation */ 9106 9107 /* copy of the char we got from getc() */ 9108 *bp++ = (STDCHAR)i; /* store character from PerlIO_getc */ 9109 9110 /* make sure we deal with the i being the last character of a separator */ 9111 if (rslen && (STDCHAR)i == rslast) /* all done for now? */ 9112 goto thats_all_folks; 9113 } 9114 9115 thats_all_folks: 9116 /* check if we have actually found the separator - only really applies 9117 * when rslen > 1 */ 9118 if ((rslen > 1 && (STRLEN)(bp - (STDCHAR*)SvPVX_const(sv)) < rslen) || 9119 memNE((char*)bp - rslen, rsptr, rslen)) 9120 goto screamer; /* go back to the fray */ 9121 thats_really_all_folks: 9122 if (shortbuffered) 9123 cnt += shortbuffered; 9124 DEBUG_P(PerlIO_printf(Perl_debug_log, 9125 "Screamer: quitting, ptr=%" UVuf ", cnt=%" IVdf "\n",PTR2UV(ptr),(IV)cnt)); 9126 PerlIO_set_ptrcnt(fp, (STDCHAR*)ptr, cnt); /* put these back or we're in trouble */ 9127 DEBUG_P(PerlIO_printf(Perl_debug_log, 9128 "Screamer: end: FILE * thinks ptr=%" UVuf ", cnt=%" IVdf ", base=%" UVuf 9129 "\n", 9130 PTR2UV(PerlIO_get_ptr(fp)), (IV)PerlIO_get_cnt(fp), 9131 PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); 9132 *bp = '\0'; 9133 SvCUR_set(sv, bp - (STDCHAR*)SvPVX_const(sv)); /* set length */ 9134 DEBUG_P(PerlIO_printf(Perl_debug_log, 9135 "Screamer: done, len=%ld, string=|%.*s|\n", 9136 (long)SvCUR(sv),(int)SvCUR(sv),SvPVX_const(sv))); 9137 } 9138 else 9139 { 9140 /*The big, slow, and stupid way. */ 9141 STDCHAR buf[8192]; 9142 9143 screamer2: 9144 if (rslen) { 9145 const STDCHAR * const bpe = buf + sizeof(buf); 9146 bp = buf; 9147 while ((i = PerlIO_getc(fp)) != EOF && (*bp++ = (STDCHAR)i) != rslast && bp < bpe) 9148 ; /* keep reading */ 9149 cnt = bp - buf; 9150 } 9151 else { 9152 cnt = PerlIO_read(fp,(char*)buf, sizeof(buf)); 9153 /* Accommodate broken VAXC compiler, which applies U8 cast to 9154 * both args of ?: operator, causing EOF to change into 255 9155 */ 9156 if (cnt > 0) 9157 i = (U8)buf[cnt - 1]; 9158 else 9159 i = EOF; 9160 } 9161 9162 if (cnt < 0) 9163 cnt = 0; /* we do need to re-set the sv even when cnt <= 0 */ 9164 if (append) 9165 sv_catpvn_nomg(sv, (char *) buf, cnt); 9166 else 9167 sv_setpvn(sv, (char *) buf, cnt); /* "nomg" is implied */ 9168 9169 if (i != EOF && /* joy */ 9170 (!rslen || 9171 SvCUR(sv) < rslen || 9172 memNE(SvPVX_const(sv) + SvCUR(sv) - rslen, rsptr, rslen))) 9173 { 9174 append = -1; 9175 /* 9176 * If we're reading from a TTY and we get a short read, 9177 * indicating that the user hit his EOF character, we need 9178 * to notice it now, because if we try to read from the TTY 9179 * again, the EOF condition will disappear. 9180 * 9181 * The comparison of cnt to sizeof(buf) is an optimization 9182 * that prevents unnecessary calls to feof(). 9183 * 9184 * - jik 9/25/96 9185 */ 9186 if (!(cnt < (I32)sizeof(buf) && PerlIO_eof(fp))) 9187 goto screamer2; 9188 } 9189 9190 } 9191 9192 if (rspara) { /* have to do this both before and after */ 9193 while (i != EOF) { /* to make sure file boundaries work right */ 9194 i = PerlIO_getc(fp); 9195 if (i != '\n') { 9196 PerlIO_ungetc(fp,i); 9197 break; 9198 } 9199 } 9200 } 9201 9202 return (SvCUR(sv) - append) ? SvPVX(sv) : NULL; 9203 } 9204 9205 /* 9206 =for apidoc sv_inc 9207 =for apidoc_item sv_inc_nomg 9208 9209 These auto-increment the value in the SV, doing string to numeric conversion 9210 if necessary. They both handle operator overloading. 9211 9212 They differ only in that C<sv_inc> performs 'get' magic; C<sv_inc_nomg> skips 9213 any magic. 9214 9215 =cut 9216 */ 9217 9218 void 9219 Perl_sv_inc(pTHX_ SV *const sv) 9220 { 9221 if (!sv) 9222 return; 9223 SvGETMAGIC(sv); 9224 sv_inc_nomg(sv); 9225 } 9226 9227 void 9228 Perl_sv_inc_nomg(pTHX_ SV *const sv) 9229 { 9230 char *d; 9231 int flags; 9232 9233 if (!sv) 9234 return; 9235 if (SvTHINKFIRST(sv)) { 9236 if (SvREADONLY(sv)) { 9237 Perl_croak_no_modify(); 9238 } 9239 if (SvROK(sv)) { 9240 IV i; 9241 if (SvAMAGIC(sv) && AMG_CALLunary(sv, inc_amg)) 9242 return; 9243 i = PTR2IV(SvRV(sv)); 9244 sv_unref(sv); 9245 sv_setiv(sv, i); 9246 } 9247 else sv_force_normal_flags(sv, 0); 9248 } 9249 flags = SvFLAGS(sv); 9250 if ((flags & (SVp_NOK|SVp_IOK)) == SVp_NOK) { 9251 /* It's (privately or publicly) a float, but not tested as an 9252 integer, so test it to see. */ 9253 (void) SvIV(sv); 9254 flags = SvFLAGS(sv); 9255 } 9256 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) { 9257 /* It's publicly an integer, or privately an integer-not-float */ 9258 #ifdef PERL_PRESERVE_IVUV 9259 oops_its_int: 9260 #endif 9261 if (SvIsUV(sv)) { 9262 if (SvUVX(sv) == UV_MAX) 9263 sv_setnv(sv, UV_MAX_P1); 9264 else { 9265 (void)SvIOK_only_UV(sv); 9266 SvUV_set(sv, SvUVX(sv) + 1); 9267 } 9268 } else { 9269 if (SvIVX(sv) == IV_MAX) 9270 sv_setuv(sv, (UV)IV_MAX + 1); 9271 else { 9272 (void)SvIOK_only(sv); 9273 SvIV_set(sv, SvIVX(sv) + 1); 9274 } 9275 } 9276 return; 9277 } 9278 if (flags & SVp_NOK) { 9279 const NV was = SvNVX(sv); 9280 if (NV_OVERFLOWS_INTEGERS_AT != 0.0 && 9281 /* If NVX was NaN, the following comparisons return always false */ 9282 UNLIKELY(was >= NV_OVERFLOWS_INTEGERS_AT || 9283 was < -NV_OVERFLOWS_INTEGERS_AT) && 9284 #if defined(NAN_COMPARE_BROKEN) 9285 LIKELY(!Perl_isinfnan(was)) 9286 #else 9287 LIKELY(!Perl_isinf(was)) 9288 #endif 9289 ) { 9290 /* diag_listed_as: Lost precision when %s %f by 1 */ 9291 Perl_ck_warner(aTHX_ packWARN(WARN_IMPRECISION), 9292 "Lost precision when incrementing %" NVff " by 1", 9293 was); 9294 } 9295 (void)SvNOK_only(sv); 9296 SvNV_set(sv, was + 1.0); 9297 return; 9298 } 9299 9300 /* treat AV/HV/CV/FM/IO and non-fake GVs as immutable */ 9301 if (SvTYPE(sv) >= SVt_PVAV || (isGV_with_GP(sv) && !SvFAKE(sv))) 9302 Perl_croak_no_modify(); 9303 9304 if (!(flags & SVp_POK) || !*SvPVX_const(sv)) { 9305 if ((flags & SVTYPEMASK) < SVt_PVIV) 9306 sv_upgrade(sv, ((flags & SVTYPEMASK) > SVt_IV ? SVt_PVIV : SVt_IV)); 9307 (void)SvIOK_only(sv); 9308 SvIV_set(sv, 1); 9309 return; 9310 } 9311 d = SvPVX(sv); 9312 while (isALPHA(*d)) d++; 9313 while (isDIGIT(*d)) d++; 9314 if (d < SvEND(sv)) { 9315 const int numtype = grok_number_flags(SvPVX_const(sv), SvCUR(sv), NULL, PERL_SCAN_TRAILING); 9316 #ifdef PERL_PRESERVE_IVUV 9317 /* Got to punt this as an integer if needs be, but we don't issue 9318 warnings. Probably ought to make the sv_iv_please() that does 9319 the conversion if possible, and silently. */ 9320 if (numtype && !(numtype & IS_NUMBER_INFINITY)) { 9321 /* Need to try really hard to see if it's an integer. 9322 9.22337203685478e+18 is an integer. 9323 but "9.22337203685478e+18" + 0 is UV=9223372036854779904 9324 so $a="9.22337203685478e+18"; $a+0; $a++ 9325 needs to be the same as $a="9.22337203685478e+18"; $a++ 9326 or we go insane. */ 9327 9328 (void) sv_2iv(sv); 9329 if (SvIOK(sv)) 9330 goto oops_its_int; 9331 9332 /* sv_2iv *should* have made this an NV */ 9333 if (flags & SVp_NOK) { 9334 (void)SvNOK_only(sv); 9335 SvNV_set(sv, SvNVX(sv) + 1.0); 9336 return; 9337 } 9338 /* I don't think we can get here. Maybe I should assert this 9339 And if we do get here I suspect that sv_setnv will croak. NWC 9340 Fall through. */ 9341 DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_inc punt failed to convert '%s' to IOK or NOKp, UV=0x%" UVxf " NV=%" NVgf "\n", 9342 SvPVX_const(sv), SvIVX(sv), SvNVX(sv))); 9343 } 9344 #endif /* PERL_PRESERVE_IVUV */ 9345 if (!numtype && ckWARN(WARN_NUMERIC)) 9346 not_incrementable(sv); 9347 sv_setnv(sv,Atof(SvPVX_const(sv)) + 1.0); 9348 return; 9349 } 9350 d--; 9351 while (d >= SvPVX_const(sv)) { 9352 if (isDIGIT(*d)) { 9353 if (++*d <= '9') 9354 return; 9355 *(d--) = '0'; 9356 } 9357 else { 9358 #ifdef EBCDIC 9359 /* MKS: The original code here died if letters weren't consecutive. 9360 * at least it didn't have to worry about non-C locales. The 9361 * new code assumes that ('z'-'a')==('Z'-'A'), letters are 9362 * arranged in order (although not consecutively) and that only 9363 * [A-Za-z] are accepted by isALPHA in the C locale. 9364 */ 9365 if (isALPHA_FOLD_NE(*d, 'z')) { 9366 do { ++*d; } while (!isALPHA(*d)); 9367 return; 9368 } 9369 *(d--) -= 'z' - 'a'; 9370 #else 9371 ++*d; 9372 if (isALPHA(*d)) 9373 return; 9374 *(d--) -= 'z' - 'a' + 1; 9375 #endif 9376 } 9377 } 9378 /* oh,oh, the number grew */ 9379 SvGROW(sv, SvCUR(sv) + 2); 9380 SvCUR_set(sv, SvCUR(sv) + 1); 9381 for (d = SvPVX(sv) + SvCUR(sv); d > SvPVX_const(sv); d--) 9382 *d = d[-1]; 9383 if (isDIGIT(d[1])) 9384 *d = '1'; 9385 else 9386 *d = d[1]; 9387 } 9388 9389 /* 9390 =for apidoc sv_dec 9391 =for apidoc_item sv_dec_nomg 9392 9393 These auto-decrement the value in the SV, doing string to numeric conversion 9394 if necessary. They both handle operator overloading. 9395 9396 They differ only in that: 9397 9398 C<sv_dec> handles 'get' magic; C<sv_dec_nomg> skips 'get' magic. 9399 9400 =cut 9401 */ 9402 9403 void 9404 Perl_sv_dec(pTHX_ SV *const sv) 9405 { 9406 if (!sv) 9407 return; 9408 SvGETMAGIC(sv); 9409 sv_dec_nomg(sv); 9410 } 9411 9412 void 9413 Perl_sv_dec_nomg(pTHX_ SV *const sv) 9414 { 9415 int flags; 9416 9417 if (!sv) 9418 return; 9419 if (SvTHINKFIRST(sv)) { 9420 if (SvREADONLY(sv)) { 9421 Perl_croak_no_modify(); 9422 } 9423 if (SvROK(sv)) { 9424 IV i; 9425 if (SvAMAGIC(sv) && AMG_CALLunary(sv, dec_amg)) 9426 return; 9427 i = PTR2IV(SvRV(sv)); 9428 sv_unref(sv); 9429 sv_setiv(sv, i); 9430 } 9431 else sv_force_normal_flags(sv, 0); 9432 } 9433 /* Unlike sv_inc we don't have to worry about string-never-numbers 9434 and keeping them magic. But we mustn't warn on punting */ 9435 flags = SvFLAGS(sv); 9436 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) { 9437 /* It's publicly an integer, or privately an integer-not-float */ 9438 #ifdef PERL_PRESERVE_IVUV 9439 oops_its_int: 9440 #endif 9441 if (SvIsUV(sv)) { 9442 if (SvUVX(sv) == 0) { 9443 (void)SvIOK_only(sv); 9444 SvIV_set(sv, -1); 9445 } 9446 else { 9447 (void)SvIOK_only_UV(sv); 9448 SvUV_set(sv, SvUVX(sv) - 1); 9449 } 9450 } else { 9451 if (SvIVX(sv) == IV_MIN) { 9452 sv_setnv(sv, (NV)IV_MIN); 9453 goto oops_its_num; 9454 } 9455 else { 9456 (void)SvIOK_only(sv); 9457 SvIV_set(sv, SvIVX(sv) - 1); 9458 } 9459 } 9460 return; 9461 } 9462 if (flags & SVp_NOK) { 9463 oops_its_num: 9464 { 9465 const NV was = SvNVX(sv); 9466 if (NV_OVERFLOWS_INTEGERS_AT != 0.0 && 9467 /* If NVX was NaN, these comparisons return always false */ 9468 UNLIKELY(was <= -NV_OVERFLOWS_INTEGERS_AT || 9469 was > NV_OVERFLOWS_INTEGERS_AT) && 9470 #if defined(NAN_COMPARE_BROKEN) 9471 LIKELY(!Perl_isinfnan(was)) 9472 #else 9473 LIKELY(!Perl_isinf(was)) 9474 #endif 9475 ) { 9476 /* diag_listed_as: Lost precision when %s %f by 1 */ 9477 Perl_ck_warner(aTHX_ packWARN(WARN_IMPRECISION), 9478 "Lost precision when decrementing %" NVff " by 1", 9479 was); 9480 } 9481 (void)SvNOK_only(sv); 9482 SvNV_set(sv, was - 1.0); 9483 return; 9484 } 9485 } 9486 9487 /* treat AV/HV/CV/FM/IO and non-fake GVs as immutable */ 9488 if (SvTYPE(sv) >= SVt_PVAV || (isGV_with_GP(sv) && !SvFAKE(sv))) 9489 Perl_croak_no_modify(); 9490 9491 if (!(flags & SVp_POK)) { 9492 if ((flags & SVTYPEMASK) < SVt_PVIV) 9493 sv_upgrade(sv, ((flags & SVTYPEMASK) > SVt_IV) ? SVt_PVIV : SVt_IV); 9494 SvIV_set(sv, -1); 9495 (void)SvIOK_only(sv); 9496 return; 9497 } 9498 #ifdef PERL_PRESERVE_IVUV 9499 { 9500 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), NULL); 9501 if (numtype && !(numtype & IS_NUMBER_INFINITY)) { 9502 /* Need to try really hard to see if it's an integer. 9503 9.22337203685478e+18 is an integer. 9504 but "9.22337203685478e+18" + 0 is UV=9223372036854779904 9505 so $a="9.22337203685478e+18"; $a+0; $a-- 9506 needs to be the same as $a="9.22337203685478e+18"; $a-- 9507 or we go insane. */ 9508 9509 (void) sv_2iv(sv); 9510 if (SvIOK(sv)) 9511 goto oops_its_int; 9512 9513 /* sv_2iv *should* have made this an NV */ 9514 if (flags & SVp_NOK) { 9515 (void)SvNOK_only(sv); 9516 SvNV_set(sv, SvNVX(sv) - 1.0); 9517 return; 9518 } 9519 /* I don't think we can get here. Maybe I should assert this 9520 And if we do get here I suspect that sv_setnv will croak. NWC 9521 Fall through. */ 9522 DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_dec punt failed to convert '%s' to IOK or NOKp, UV=0x%" UVxf " NV=%" NVgf "\n", 9523 SvPVX_const(sv), SvIVX(sv), SvNVX(sv))); 9524 } 9525 } 9526 #endif /* PERL_PRESERVE_IVUV */ 9527 sv_setnv(sv,Atof(SvPVX_const(sv)) - 1.0); /* punt */ 9528 } 9529 9530 /* this define is used to eliminate a chunk of duplicated but shared logic 9531 * it has the suffix __SV_C to signal that it isnt API, and isnt meant to be 9532 * used anywhere but here - yves 9533 */ 9534 #define PUSH_EXTEND_MORTAL__SV_C(AnSv) \ 9535 STMT_START { \ 9536 SSize_t ix = ++PL_tmps_ix; \ 9537 if (UNLIKELY(ix >= PL_tmps_max)) \ 9538 ix = tmps_grow_p(ix); \ 9539 PL_tmps_stack[ix] = (AnSv); \ 9540 } STMT_END 9541 9542 /* 9543 =for apidoc sv_mortalcopy 9544 9545 Creates a new SV which is a copy of the original SV (using C<sv_setsv>). 9546 The new SV is marked as mortal. It will be destroyed "soon", either by an 9547 explicit call to C<FREETMPS>, or by an implicit call at places such as 9548 statement boundaries. See also C<L</sv_newmortal>> and C<L</sv_2mortal>>. 9549 9550 =for apidoc sv_mortalcopy_flags 9551 9552 Like C<sv_mortalcopy>, but the extra C<flags> are passed to the 9553 C<sv_setsv_flags>. 9554 9555 =cut 9556 */ 9557 9558 /* Make a string that will exist for the duration of the expression 9559 * evaluation. Actually, it may have to last longer than that, but 9560 * hopefully we won't free it until it has been assigned to a 9561 * permanent location. */ 9562 9563 SV * 9564 Perl_sv_mortalcopy_flags(pTHX_ SV *const oldstr, U32 flags) 9565 { 9566 SV *sv; 9567 9568 if (flags & SV_GMAGIC) 9569 SvGETMAGIC(oldstr); /* before new_SV, in case it dies */ 9570 new_SV(sv); 9571 sv_setsv_flags(sv,oldstr,flags & ~SV_GMAGIC); 9572 PUSH_EXTEND_MORTAL__SV_C(sv); 9573 SvTEMP_on(sv); 9574 return sv; 9575 } 9576 9577 /* 9578 =for apidoc sv_newmortal 9579 9580 Creates a new null SV which is mortal. The reference count of the SV is 9581 set to 1. It will be destroyed "soon", either by an explicit call to 9582 C<FREETMPS>, or by an implicit call at places such as statement boundaries. 9583 See also C<L</sv_mortalcopy>> and C<L</sv_2mortal>>. 9584 9585 =cut 9586 */ 9587 9588 SV * 9589 Perl_sv_newmortal(pTHX) 9590 { 9591 SV *sv; 9592 9593 new_SV(sv); 9594 SvFLAGS(sv) = SVs_TEMP; 9595 PUSH_EXTEND_MORTAL__SV_C(sv); 9596 return sv; 9597 } 9598 9599 9600 /* 9601 =for apidoc newSVpvn_flags 9602 9603 Creates a new SV and copies a string (which may contain C<NUL> (C<\0>) 9604 characters) into it. The reference count for the 9605 SV is set to 1. Note that if C<len> is zero, Perl will create a zero length 9606 string. You are responsible for ensuring that the source string is at least 9607 C<len> bytes long. If the C<s> argument is NULL the new SV will be undefined. 9608 Currently the only flag bits accepted are C<SVf_UTF8> and C<SVs_TEMP>. 9609 If C<SVs_TEMP> is set, then C<sv_2mortal()> is called on the result before 9610 returning. If C<SVf_UTF8> is set, C<s> 9611 is considered to be in UTF-8 and the 9612 C<SVf_UTF8> flag will be set on the new SV. 9613 C<newSVpvn_utf8()> is a convenience wrapper for this function, defined as 9614 9615 #define newSVpvn_utf8(s, len, u) \ 9616 newSVpvn_flags((s), (len), (u) ? SVf_UTF8 : 0) 9617 9618 =for apidoc Amnh||SVs_TEMP 9619 9620 =cut 9621 */ 9622 9623 SV * 9624 Perl_newSVpvn_flags(pTHX_ const char *const s, const STRLEN len, const U32 flags) 9625 { 9626 SV *sv; 9627 9628 /* All the flags we don't support must be zero. 9629 And we're new code so I'm going to assert this from the start. */ 9630 assert(!(flags & ~(SVf_UTF8|SVs_TEMP))); 9631 sv = newSV_type(SVt_PV); 9632 sv_setpvn_fresh(sv,s,len); 9633 9634 /* This code used to do a sv_2mortal(), however we now unroll the call to 9635 * sv_2mortal() and do what it does ourselves here. Since we have asserted 9636 * that flags can only have the SVf_UTF8 and/or SVs_TEMP flags set above we 9637 * can use it to enable the sv flags directly (bypassing SvTEMP_on), which 9638 * in turn means we don't need to mask out the SVf_UTF8 flag below, which 9639 * means that we eliminate quite a few steps than it looks - Yves 9640 * (explaining patch by gfx) */ 9641 9642 SvFLAGS(sv) |= flags; 9643 9644 if(flags & SVs_TEMP){ 9645 PUSH_EXTEND_MORTAL__SV_C(sv); 9646 } 9647 9648 return sv; 9649 } 9650 9651 /* 9652 =for apidoc sv_2mortal 9653 9654 Marks an existing SV as mortal. The SV will be destroyed "soon", either 9655 by an explicit call to C<FREETMPS>, or by an implicit call at places such as 9656 statement boundaries. C<SvTEMP()> is turned on which means that the SV's 9657 string buffer can be "stolen" if this SV is copied. See also 9658 C<L</sv_newmortal>> and C<L</sv_mortalcopy>>. 9659 9660 =cut 9661 */ 9662 9663 SV * 9664 Perl_sv_2mortal(pTHX_ SV *const sv) 9665 { 9666 if (!sv) 9667 return sv; 9668 if (SvIMMORTAL(sv)) 9669 return sv; 9670 PUSH_EXTEND_MORTAL__SV_C(sv); 9671 SvTEMP_on(sv); 9672 return sv; 9673 } 9674 9675 /* 9676 =for apidoc newSVpv 9677 9678 Creates a new SV and copies a string (which may contain C<NUL> (C<\0>) 9679 characters) into it. The reference count for the 9680 SV is set to 1. If C<len> is zero, Perl will compute the length using 9681 C<strlen()>, (which means if you use this option, that C<s> can't have embedded 9682 C<NUL> characters and has to have a terminating C<NUL> byte). 9683 9684 This function can cause reliability issues if you are likely to pass in 9685 empty strings that are not null terminated, because it will run 9686 strlen on the string and potentially run past valid memory. 9687 9688 Using L</newSVpvn> is a safer alternative for non C<NUL> terminated strings. 9689 For string literals use L</newSVpvs> instead. This function will work fine for 9690 C<NUL> terminated strings, but if you want to avoid the if statement on whether 9691 to call C<strlen> use C<newSVpvn> instead (calling C<strlen> yourself). 9692 9693 =cut 9694 */ 9695 9696 SV * 9697 Perl_newSVpv(pTHX_ const char *const s, const STRLEN len) 9698 { 9699 SV *sv = newSV_type(SVt_PV); 9700 sv_setpvn_fresh(sv, s, len || s == NULL ? len : strlen(s)); 9701 return sv; 9702 } 9703 9704 /* 9705 =for apidoc newSVpvn 9706 9707 Creates a new SV and copies a string into it, which may contain C<NUL> characters 9708 (C<\0>) and other binary data. The reference count for the SV is set to 1. 9709 Note that if C<len> is zero, Perl will create a zero length (Perl) string. You 9710 are responsible for ensuring that the source buffer is at least 9711 C<len> bytes long. If the C<buffer> argument is NULL the new SV will be 9712 undefined. 9713 9714 =cut 9715 */ 9716 9717 SV * 9718 Perl_newSVpvn(pTHX_ const char *const buffer, const STRLEN len) 9719 { 9720 SV *sv = newSV_type(SVt_PV); 9721 sv_setpvn_fresh(sv,buffer,len); 9722 return sv; 9723 } 9724 9725 /* 9726 =for apidoc newSVhek_mortal 9727 9728 Creates a new mortal SV from the hash key structure. It will generate 9729 scalars that point to the shared string table where possible. Returns 9730 a new (undefined) SV if C<hek> is NULL. 9731 9732 This is more efficient than using sv_2mortal(newSVhek( ... )) 9733 9734 =cut 9735 */ 9736 9737 SV * 9738 Perl_newSVhek_mortal(pTHX_ const HEK *const hek) 9739 { 9740 SV * const sv = newSVhek(hek); 9741 assert(sv); 9742 assert(!SvIMMORTAL(sv)); 9743 9744 PUSH_EXTEND_MORTAL__SV_C(sv); 9745 SvTEMP_on(sv); 9746 return sv; 9747 } 9748 9749 /* 9750 =for apidoc newSVhek 9751 9752 Creates a new SV from the hash key structure. It will generate scalars that 9753 point to the shared string table where possible. Returns a new (undefined) 9754 SV if C<hek> is NULL. 9755 9756 =cut 9757 */ 9758 9759 SV * 9760 Perl_newSVhek(pTHX_ const HEK *const hek) 9761 { 9762 if (!hek) { 9763 SV *sv; 9764 9765 new_SV(sv); 9766 return sv; 9767 } 9768 9769 if (HEK_LEN(hek) == HEf_SVKEY) { 9770 return newSVsv(*(SV**)HEK_KEY(hek)); 9771 } else { 9772 const int flags = HEK_FLAGS(hek); 9773 if (flags & HVhek_WASUTF8) { 9774 /* Trouble :-) 9775 Andreas would like keys he put in as utf8 to come back as utf8 9776 */ 9777 STRLEN utf8_len = HEK_LEN(hek); 9778 SV * const sv = newSV_type(SVt_PV); 9779 char *as_utf8 = (char *)bytes_to_utf8 ((U8*)HEK_KEY(hek), &utf8_len); 9780 /* bytes_to_utf8() allocates a new string, which we can repurpose: */ 9781 sv_usepvn_flags(sv, as_utf8, utf8_len, SV_HAS_TRAILING_NUL); 9782 SvUTF8_on (sv); 9783 return sv; 9784 } else if (flags & HVhek_NOTSHARED) { 9785 /* A hash that isn't using shared hash keys has to have 9786 the flag in every key so that we know not to try to call 9787 share_hek_hek on it. */ 9788 9789 SV * const sv = newSVpvn (HEK_KEY(hek), HEK_LEN(hek)); 9790 if (HEK_UTF8(hek)) 9791 SvUTF8_on (sv); 9792 return sv; 9793 } 9794 /* This will be overwhelmingly the most common case. */ 9795 { 9796 /* Inline most of newSVpvn_share(), because share_hek_hek() is far 9797 more efficient than sharepvn(). */ 9798 SV *sv = newSV_type(SVt_PV); 9799 9800 SvPV_set(sv, (char *)HEK_KEY(share_hek_hek(hek))); 9801 SvCUR_set(sv, HEK_LEN(hek)); 9802 SvLEN_set(sv, 0); 9803 SvIsCOW_on(sv); 9804 SvPOK_on(sv); 9805 if (HEK_UTF8(hek)) 9806 SvUTF8_on(sv); 9807 return sv; 9808 } 9809 } 9810 } 9811 9812 /* 9813 =for apidoc newSVpvn_share 9814 9815 Creates a new SV with its C<SvPVX_const> pointing to a shared string in the string 9816 table. If the string does not already exist in the table, it is 9817 created first. Turns on the C<SvIsCOW> flag (or C<READONLY> 9818 and C<FAKE> in 5.16 and earlier). If the C<hash> parameter 9819 is non-zero, that value is used; otherwise the hash is computed. 9820 The string's hash can later be retrieved from the SV 9821 with the C<L</SvSHARED_HASH>> macro. The idea here is 9822 that as the string table is used for shared hash keys these strings will have 9823 C<SvPVX_const == HeKEY> and hash lookup will avoid string compare. 9824 9825 =cut 9826 */ 9827 9828 SV * 9829 Perl_newSVpvn_share(pTHX_ const char *src, I32 len, U32 hash) 9830 { 9831 SV *sv; 9832 bool is_utf8 = FALSE; 9833 const char *const orig_src = src; 9834 9835 if (len < 0) { 9836 STRLEN tmplen = -len; 9837 is_utf8 = TRUE; 9838 /* See the note in hv.c:hv_fetch() --jhi */ 9839 src = (char*)bytes_from_utf8((const U8*)src, &tmplen, &is_utf8); 9840 len = tmplen; 9841 } 9842 if (!hash) 9843 PERL_HASH(hash, src, len); 9844 sv = newSV_type(SVt_PV); 9845 /* The logic for this is inlined in S_mro_get_linear_isa_dfs(), so if it 9846 changes here, update it there too. */ 9847 SvPV_set(sv, sharepvn(src, is_utf8?-len:len, hash)); 9848 SvCUR_set(sv, len); 9849 SvLEN_set(sv, 0); 9850 SvIsCOW_on(sv); 9851 SvPOK_on(sv); 9852 if (is_utf8) 9853 SvUTF8_on(sv); 9854 if (src != orig_src) 9855 Safefree(src); 9856 return sv; 9857 } 9858 9859 /* 9860 =for apidoc newSVpv_share 9861 9862 Like C<newSVpvn_share>, but takes a C<NUL>-terminated string instead of a 9863 string/length pair. 9864 9865 =cut 9866 */ 9867 9868 SV * 9869 Perl_newSVpv_share(pTHX_ const char *src, U32 hash) 9870 { 9871 return newSVpvn_share(src, strlen(src), hash); 9872 } 9873 9874 #if defined(MULTIPLICITY) 9875 9876 /* pTHX_ magic can't cope with varargs, so this is a no-context 9877 * version of the main function, (which may itself be aliased to us). 9878 * Don't access this version directly. 9879 */ 9880 9881 SV * 9882 Perl_newSVpvf_nocontext(const char *const pat, ...) 9883 { 9884 dTHX; 9885 SV *sv; 9886 va_list args; 9887 9888 PERL_ARGS_ASSERT_NEWSVPVF_NOCONTEXT; 9889 9890 va_start(args, pat); 9891 sv = vnewSVpvf(pat, &args); 9892 va_end(args); 9893 return sv; 9894 } 9895 #endif 9896 9897 /* 9898 =for apidoc newSVpvf 9899 9900 Creates a new SV and initializes it with the string formatted like 9901 C<sv_catpvf>. 9902 9903 =for apidoc newSVpvf_nocontext 9904 Like C<L</newSVpvf>> but does not take a thread context (C<aTHX>) parameter, 9905 so is used in situations where the caller doesn't already have the thread 9906 context. 9907 9908 =for apidoc vnewSVpvf 9909 Like C<L</newSVpvf>> but the arguments are an encapsulated argument list. 9910 9911 =cut 9912 */ 9913 9914 SV * 9915 Perl_newSVpvf(pTHX_ const char *const pat, ...) 9916 { 9917 SV *sv; 9918 va_list args; 9919 9920 PERL_ARGS_ASSERT_NEWSVPVF; 9921 9922 va_start(args, pat); 9923 sv = vnewSVpvf(pat, &args); 9924 va_end(args); 9925 return sv; 9926 } 9927 9928 /* backend for newSVpvf() and newSVpvf_nocontext() */ 9929 9930 SV * 9931 Perl_vnewSVpvf(pTHX_ const char *const pat, va_list *const args) 9932 { 9933 SV *sv; 9934 9935 PERL_ARGS_ASSERT_VNEWSVPVF; 9936 9937 sv = newSV(1); 9938 SvPVCLEAR_FRESH(sv); 9939 sv_vcatpvfn_flags(sv, pat, strlen(pat), args, NULL, 0, NULL, 0); 9940 return sv; 9941 } 9942 9943 /* 9944 =for apidoc newSVnv 9945 9946 Creates a new SV and copies a floating point value into it. 9947 The reference count for the SV is set to 1. 9948 9949 =cut 9950 */ 9951 9952 SV * 9953 Perl_newSVnv(pTHX_ const NV n) 9954 { 9955 SV *sv = newSV_type(SVt_NV); 9956 (void)SvNOK_on(sv); 9957 9958 SvNV_set(sv, n); 9959 SvTAINT(sv); 9960 9961 return sv; 9962 } 9963 9964 /* 9965 =for apidoc newSViv 9966 9967 Creates a new SV and copies an integer into it. The reference count for the 9968 SV is set to 1. 9969 9970 =cut 9971 */ 9972 9973 SV * 9974 Perl_newSViv(pTHX_ const IV i) 9975 { 9976 SV *sv = newSV_type(SVt_IV); 9977 (void)SvIOK_on(sv); 9978 9979 SvIV_set(sv, i); 9980 SvTAINT(sv); 9981 9982 return sv; 9983 } 9984 9985 /* 9986 =for apidoc newSVuv 9987 9988 Creates a new SV and copies an unsigned integer into it. 9989 The reference count for the SV is set to 1. 9990 9991 =cut 9992 */ 9993 9994 SV * 9995 Perl_newSVuv(pTHX_ const UV u) 9996 { 9997 SV *sv; 9998 9999 /* Inlining ONLY the small relevant subset of sv_setuv here 10000 * for performance. Makes a significant difference. */ 10001 10002 /* Using ivs is more efficient than using uvs - see sv_setuv */ 10003 if (u <= (UV)IV_MAX) { 10004 return newSViv((IV)u); 10005 } 10006 10007 new_SV(sv); 10008 10009 /* We're starting from SVt_FIRST, so provided that's 10010 * actual 0, we don't have to unset any SV type flags 10011 * to promote to SVt_IV. */ 10012 STATIC_ASSERT_STMT(SVt_FIRST == 0); 10013 10014 SET_SVANY_FOR_BODYLESS_IV(sv); 10015 SvFLAGS(sv) |= SVt_IV; 10016 (void)SvIOK_on(sv); 10017 (void)SvIsUV_on(sv); 10018 10019 SvUV_set(sv, u); 10020 SvTAINT(sv); 10021 10022 return sv; 10023 } 10024 10025 /* 10026 =for apidoc newSVbool 10027 10028 Creates a new SV boolean. 10029 10030 =cut 10031 */ 10032 10033 SV * 10034 Perl_newSVbool(pTHX_ bool bool_val) 10035 { 10036 PERL_ARGS_ASSERT_NEWSVBOOL; 10037 SV *sv = newSVsv(bool_val ? &PL_sv_yes : &PL_sv_no); 10038 10039 return sv; 10040 } 10041 10042 /* 10043 =for apidoc newSV_true 10044 10045 Creates a new SV that is a boolean true. 10046 10047 =cut 10048 */ 10049 SV * 10050 Perl_newSV_true(pTHX) 10051 { 10052 PERL_ARGS_ASSERT_NEWSV_TRUE; 10053 SV *sv = newSVsv(&PL_sv_yes); 10054 10055 return sv; 10056 } 10057 10058 /* 10059 =for apidoc newSV_false 10060 10061 Creates a new SV that is a boolean false. 10062 10063 =cut 10064 */ 10065 10066 SV * 10067 Perl_newSV_false(pTHX) 10068 { 10069 PERL_ARGS_ASSERT_NEWSV_FALSE; 10070 SV *sv = newSVsv(&PL_sv_no); 10071 10072 return sv; 10073 } 10074 10075 /* newRV_inc is the official function name to use now. 10076 * newRV_inc is in fact #defined to newRV in sv.h 10077 */ 10078 10079 SV * 10080 Perl_newRV(pTHX_ SV *const sv) 10081 { 10082 PERL_ARGS_ASSERT_NEWRV; 10083 10084 return newRV_noinc(SvREFCNT_inc_simple_NN(sv)); 10085 } 10086 10087 /* 10088 =for apidoc newSVsv 10089 =for apidoc_item newSVsv_flags 10090 =for apidoc_item newSVsv_nomg 10091 10092 These create a new SV which is an exact duplicate of the original SV 10093 (using C<sv_setsv>.) 10094 10095 They differ only in that C<newSVsv> performs 'get' magic; C<newSVsv_nomg> skips 10096 any magic; and C<newSVsv_flags> allows you to explicitly set a C<flags> 10097 parameter. 10098 10099 =cut 10100 */ 10101 10102 SV * 10103 Perl_newSVsv_flags(pTHX_ SV *const old, I32 flags) 10104 { 10105 SV *sv; 10106 10107 if (!old) 10108 return NULL; 10109 if (SvIS_FREED(old)) { 10110 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), "semi-panic: attempt to dup freed string"); 10111 return NULL; 10112 } 10113 /* Do this here, otherwise we leak the new SV if this croaks. */ 10114 if (flags & SV_GMAGIC) 10115 SvGETMAGIC(old); 10116 new_SV(sv); 10117 sv_setsv_flags(sv, old, flags & ~SV_GMAGIC); 10118 return sv; 10119 } 10120 10121 /* 10122 =for apidoc sv_reset 10123 10124 Underlying implementation for the C<reset> Perl function. 10125 Note that the perl-level function is vaguely deprecated. 10126 10127 =cut 10128 */ 10129 10130 void 10131 Perl_sv_reset(pTHX_ const char *s, HV *const stash) 10132 { 10133 PERL_ARGS_ASSERT_SV_RESET; 10134 10135 sv_resetpvn(*s ? s : NULL, strlen(s), stash); 10136 } 10137 10138 void 10139 Perl_sv_resetpvn(pTHX_ const char *s, STRLEN len, HV * const stash) 10140 { 10141 char todo[PERL_UCHAR_MAX+1]; 10142 const char *send; 10143 10144 if (!stash || SvTYPE(stash) != SVt_PVHV) 10145 return; 10146 10147 if (!s) { /* reset ?? searches */ 10148 MAGIC * const mg = mg_find((const SV *)stash, PERL_MAGIC_symtab); 10149 if (mg && mg->mg_len) { 10150 const U32 count = mg->mg_len / sizeof(PMOP**); 10151 PMOP **pmp = (PMOP**) mg->mg_ptr; 10152 PMOP *const *const end = pmp + count; 10153 10154 while (pmp < end) { 10155 #ifdef USE_ITHREADS 10156 SvREADONLY_off(PL_regex_pad[(*pmp)->op_pmoffset]); 10157 #else 10158 (*pmp)->op_pmflags &= ~PMf_USED; 10159 #endif 10160 ++pmp; 10161 } 10162 } 10163 return; 10164 } 10165 10166 /* reset variables */ 10167 10168 if (!HvTOTALKEYS(stash)) 10169 return; 10170 10171 Zero(todo, 256, char); 10172 send = s + len; 10173 while (s < send) { 10174 I32 max; 10175 I32 i = (unsigned char)*s; 10176 if (s[1] == '-') { 10177 s += 2; 10178 } 10179 max = (unsigned char)*s++; 10180 for ( ; i <= max; i++) { 10181 todo[i] = 1; 10182 } 10183 for (i = 0; i <= (I32) HvMAX(stash); i++) { 10184 HE *entry; 10185 for (entry = HvARRAY(stash)[i]; 10186 entry; 10187 entry = HeNEXT(entry)) 10188 { 10189 GV *gv; 10190 SV *sv; 10191 10192 if (!todo[(U8)*HeKEY(entry)]) 10193 continue; 10194 gv = MUTABLE_GV(HeVAL(entry)); 10195 if (!isGV(gv)) 10196 continue; 10197 sv = GvSV(gv); 10198 if (sv && !SvREADONLY(sv)) { 10199 SV_CHECK_THINKFIRST_COW_DROP(sv); 10200 if (!isGV(sv)) { 10201 SvOK_off(sv); 10202 SvSETMAGIC(sv); 10203 } 10204 } 10205 if (GvAV(gv)) { 10206 av_clear(GvAV(gv)); 10207 } 10208 if (GvHV(gv) && !HvHasNAME(GvHV(gv))) { 10209 hv_clear(GvHV(gv)); 10210 } 10211 } 10212 } 10213 } 10214 } 10215 10216 /* 10217 =for apidoc sv_2io 10218 10219 Using various gambits, try to get an IO from an SV: the IO slot if its a 10220 GV; or the recursive result if we're an RV; or the IO slot of the symbol 10221 named after the PV if we're a string. 10222 10223 'Get' magic is ignored on the C<sv> passed in, but will be called on 10224 C<SvRV(sv)> if C<sv> is an RV. 10225 10226 =cut 10227 */ 10228 10229 IO* 10230 Perl_sv_2io(pTHX_ SV *const sv) 10231 { 10232 IO* io; 10233 GV* gv; 10234 10235 PERL_ARGS_ASSERT_SV_2IO; 10236 10237 switch (SvTYPE(sv)) { 10238 case SVt_PVIO: 10239 io = MUTABLE_IO(sv); 10240 break; 10241 case SVt_PVGV: 10242 case SVt_PVLV: 10243 if (isGV_with_GP(sv)) { 10244 gv = MUTABLE_GV(sv); 10245 io = GvIO(gv); 10246 if (!io) 10247 Perl_croak(aTHX_ "Bad filehandle: %" HEKf, 10248 HEKfARG(GvNAME_HEK(gv))); 10249 break; 10250 } 10251 /* FALLTHROUGH */ 10252 default: 10253 if (!SvOK(sv)) 10254 Perl_croak(aTHX_ PL_no_usym, "filehandle"); 10255 if (SvROK(sv)) { 10256 SvGETMAGIC(SvRV(sv)); 10257 return sv_2io(SvRV(sv)); 10258 } 10259 gv = gv_fetchsv_nomg(sv, 0, SVt_PVIO); 10260 if (gv) 10261 io = GvIO(gv); 10262 else 10263 io = 0; 10264 if (!io) { 10265 SV *newsv = sv; 10266 if (SvGMAGICAL(sv)) { 10267 newsv = sv_newmortal(); 10268 sv_setsv_nomg(newsv, sv); 10269 } 10270 Perl_croak(aTHX_ "Bad filehandle: %" SVf, SVfARG(newsv)); 10271 } 10272 break; 10273 } 10274 return io; 10275 } 10276 10277 /* 10278 =for apidoc sv_2cv 10279 10280 Using various gambits, try to get a CV from an SV; in addition, try if 10281 possible to set C<*st> and C<*gvp> to the stash and GV associated with it. 10282 The flags in C<lref> are passed to C<gv_fetchsv>. 10283 10284 =cut 10285 */ 10286 10287 CV * 10288 Perl_sv_2cv(pTHX_ SV *sv, HV **const st, GV **const gvp, const I32 lref) 10289 { 10290 GV *gv = NULL; 10291 CV *cv = NULL; 10292 10293 PERL_ARGS_ASSERT_SV_2CV; 10294 10295 if (!sv) { 10296 *st = NULL; 10297 *gvp = NULL; 10298 return NULL; 10299 } 10300 switch (SvTYPE(sv)) { 10301 case SVt_PVCV: 10302 *st = CvSTASH(sv); 10303 *gvp = NULL; 10304 return MUTABLE_CV(sv); 10305 case SVt_PVHV: 10306 case SVt_PVAV: 10307 *st = NULL; 10308 *gvp = NULL; 10309 return NULL; 10310 default: 10311 SvGETMAGIC(sv); 10312 if (SvROK(sv)) { 10313 if (SvAMAGIC(sv)) 10314 sv = amagic_deref_call(sv, to_cv_amg); 10315 10316 sv = SvRV(sv); 10317 if (SvTYPE(sv) == SVt_PVCV) { 10318 cv = MUTABLE_CV(sv); 10319 *gvp = NULL; 10320 *st = CvSTASH(cv); 10321 return cv; 10322 } 10323 else if(SvGETMAGIC(sv), isGV_with_GP(sv)) 10324 gv = MUTABLE_GV(sv); 10325 else 10326 Perl_croak(aTHX_ "Not a subroutine reference"); 10327 } 10328 else if (isGV_with_GP(sv)) { 10329 gv = MUTABLE_GV(sv); 10330 } 10331 else { 10332 gv = gv_fetchsv_nomg(sv, lref, SVt_PVCV); 10333 } 10334 *gvp = gv; 10335 if (!gv) { 10336 *st = NULL; 10337 return NULL; 10338 } 10339 /* Some flags to gv_fetchsv mean don't really create the GV */ 10340 if (!isGV_with_GP(gv)) { 10341 *st = NULL; 10342 return NULL; 10343 } 10344 *st = GvESTASH(gv); 10345 if (lref & ~GV_ADDMG && !GvCVu(gv)) { 10346 /* XXX this is probably not what they think they're getting. 10347 * It has the same effect as "sub name;", i.e. just a forward 10348 * declaration! */ 10349 newSTUB(gv,0); 10350 } 10351 return GvCVu(gv); 10352 } 10353 } 10354 10355 /* 10356 =for apidoc sv_true 10357 10358 Returns true if the SV has a true value by Perl's rules. 10359 Use the C<SvTRUE> macro instead, which may call C<sv_true()> or may 10360 instead use an in-line version. 10361 10362 =cut 10363 */ 10364 10365 I32 10366 Perl_sv_true(pTHX_ SV *const sv) 10367 { 10368 if (!sv) 10369 return 0; 10370 if (SvPOK(sv)) { 10371 const XPV* const tXpv = (XPV*)SvANY(sv); 10372 if (tXpv && 10373 (tXpv->xpv_cur > 1 || 10374 (tXpv->xpv_cur && *sv->sv_u.svu_pv != '0'))) 10375 return 1; 10376 else 10377 return 0; 10378 } 10379 else { 10380 if (SvIOK(sv)) 10381 return SvIVX(sv) != 0; 10382 else { 10383 if (SvNOK(sv)) 10384 return SvNVX(sv) != 0.0; 10385 else 10386 return sv_2bool(sv); 10387 } 10388 } 10389 } 10390 10391 /* 10392 =for apidoc sv_pvn_force 10393 10394 Get a sensible string out of the SV somehow. 10395 A private implementation of the C<SvPV_force> macro for compilers which 10396 can't cope with complex macro expressions. Always use the macro instead. 10397 10398 =for apidoc sv_pvn_force_flags 10399 10400 Get a sensible string out of the SV somehow. 10401 If C<flags> has the C<SV_GMAGIC> bit set, will C<L</mg_get>> on C<sv> if 10402 appropriate, else not. C<sv_pvn_force> and C<sv_pvn_force_nomg> are 10403 implemented in terms of this function. 10404 You normally want to use the various wrapper macros instead: see 10405 C<L</SvPV_force>> and C<L</SvPV_force_nomg>>. 10406 10407 =cut 10408 */ 10409 10410 char * 10411 Perl_sv_pvn_force_flags(pTHX_ SV *const sv, STRLEN *const lp, const U32 flags) 10412 { 10413 PERL_ARGS_ASSERT_SV_PVN_FORCE_FLAGS; 10414 10415 if (flags & SV_GMAGIC) SvGETMAGIC(sv); 10416 if (SvTHINKFIRST(sv) && (!SvROK(sv) || SvREADONLY(sv))) 10417 sv_force_normal_flags(sv, 0); 10418 10419 if (SvPOK(sv)) { 10420 if (lp) 10421 *lp = SvCUR(sv); 10422 } 10423 else { 10424 char *s; 10425 STRLEN len; 10426 10427 if (SvTYPE(sv) > SVt_PVLV 10428 || isGV_with_GP(sv)) 10429 /* diag_listed_as: Can't coerce %s to %s in %s */ 10430 Perl_croak(aTHX_ "Can't coerce %s to string in %s", sv_reftype(sv,0), 10431 OP_DESC(PL_op)); 10432 s = sv_2pv_flags(sv, &len, flags &~ SV_GMAGIC); 10433 if (!s) { 10434 s = (char *)""; 10435 } 10436 if (lp) 10437 *lp = len; 10438 10439 if (SvTYPE(sv) < SVt_PV || 10440 s != SvPVX_const(sv)) { /* Almost, but not quite, sv_setpvn() */ 10441 if (SvROK(sv)) 10442 sv_unref(sv); 10443 SvUPGRADE(sv, SVt_PV); /* Never FALSE */ 10444 SvGROW(sv, len + 1); 10445 Move(s,SvPVX(sv),len,char); 10446 SvCUR_set(sv, len); 10447 SvPVX(sv)[len] = '\0'; 10448 } 10449 if (!SvPOK(sv)) { 10450 SvPOK_on(sv); /* validate pointer */ 10451 SvTAINT(sv); 10452 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2pv(%s)\n", 10453 PTR2UV(sv),SvPVX_const(sv))); 10454 } 10455 } 10456 (void)SvPOK_only_UTF8(sv); 10457 return SvPVX_mutable(sv); 10458 } 10459 10460 /* 10461 =for apidoc sv_pvbyten_force 10462 10463 The backend for the C<SvPVbytex_force> macro. Always use the macro 10464 instead. If the SV cannot be downgraded from UTF-8, this croaks. 10465 10466 =cut 10467 */ 10468 10469 char * 10470 Perl_sv_pvbyten_force(pTHX_ SV *const sv, STRLEN *const lp) 10471 { 10472 PERL_ARGS_ASSERT_SV_PVBYTEN_FORCE; 10473 10474 sv_pvn_force(sv,lp); 10475 (void)sv_utf8_downgrade(sv,0); 10476 *lp = SvCUR(sv); 10477 return SvPVX(sv); 10478 } 10479 10480 /* 10481 =for apidoc sv_pvutf8n_force 10482 10483 The backend for the C<SvPVutf8x_force> macro. Always use the macro 10484 instead. 10485 10486 =cut 10487 */ 10488 10489 char * 10490 Perl_sv_pvutf8n_force(pTHX_ SV *const sv, STRLEN *const lp) 10491 { 10492 PERL_ARGS_ASSERT_SV_PVUTF8N_FORCE; 10493 10494 sv_pvn_force(sv,0); 10495 sv_utf8_upgrade_nomg(sv); 10496 *lp = SvCUR(sv); 10497 return SvPVX(sv); 10498 } 10499 10500 /* 10501 =for apidoc sv_reftype 10502 10503 Returns a string describing what the SV is a reference to. 10504 10505 If ob is true and the SV is blessed, the string is the class name, 10506 otherwise it is the type of the SV, "SCALAR", "ARRAY" etc. 10507 10508 =cut 10509 */ 10510 10511 const char * 10512 Perl_sv_reftype(pTHX_ const SV *const sv, const int ob) 10513 { 10514 PERL_ARGS_ASSERT_SV_REFTYPE; 10515 if (ob && SvOBJECT(sv)) { 10516 return SvPV_nolen_const(sv_ref(NULL, sv, ob)); 10517 } 10518 else { 10519 /* WARNING - There is code, for instance in mg.c, that assumes that 10520 * the only reason that sv_reftype(sv,0) would return a string starting 10521 * with 'L' or 'S' is that it is a LVALUE or a SCALAR. 10522 * Yes this a dodgy way to do type checking, but it saves practically reimplementing 10523 * this routine inside other subs, and it saves time. 10524 * Do not change this assumption without searching for "dodgy type check" in 10525 * the code. 10526 * - Yves */ 10527 switch (SvTYPE(sv)) { 10528 case SVt_NULL: 10529 case SVt_IV: 10530 case SVt_NV: 10531 case SVt_PV: 10532 case SVt_PVIV: 10533 case SVt_PVNV: 10534 case SVt_PVMG: 10535 if (SvVOK(sv)) 10536 return "VSTRING"; 10537 if (SvROK(sv)) 10538 return "REF"; 10539 else 10540 return "SCALAR"; 10541 10542 case SVt_PVLV: return (char *) (SvROK(sv) ? "REF" 10543 /* tied lvalues should appear to be 10544 * scalars for backwards compatibility */ 10545 : (isALPHA_FOLD_EQ(LvTYPE(sv), 't')) 10546 ? "SCALAR" : "LVALUE"); 10547 case SVt_PVAV: return "ARRAY"; 10548 case SVt_PVHV: return "HASH"; 10549 case SVt_PVCV: return "CODE"; 10550 case SVt_PVGV: return (char *) (isGV_with_GP(sv) 10551 ? "GLOB" : "SCALAR"); 10552 case SVt_PVFM: return "FORMAT"; 10553 case SVt_PVIO: return "IO"; 10554 case SVt_INVLIST: return "INVLIST"; 10555 case SVt_REGEXP: return "REGEXP"; 10556 case SVt_PVOBJ: return "OBJECT"; 10557 default: return "UNKNOWN"; 10558 } 10559 } 10560 } 10561 10562 /* 10563 =for apidoc sv_ref 10564 10565 Returns a SV describing what the SV passed in is a reference to. 10566 10567 dst can be a SV to be set to the description or NULL, in which case a 10568 mortal SV is returned. 10569 10570 If ob is true and the SV is blessed, the description is the class 10571 name, otherwise it is the type of the SV, "SCALAR", "ARRAY" etc. 10572 10573 =cut 10574 */ 10575 10576 SV * 10577 Perl_sv_ref(pTHX_ SV *dst, const SV *const sv, const int ob) 10578 { 10579 PERL_ARGS_ASSERT_SV_REF; 10580 10581 if (!dst) 10582 dst = sv_newmortal(); 10583 10584 if (ob && SvOBJECT(sv)) { 10585 if (HvHasNAME(SvSTASH(sv))) 10586 sv_sethek(dst, HvNAME_HEK(SvSTASH(sv))); 10587 else 10588 sv_setpvs(dst, "__ANON__"); 10589 } 10590 else { 10591 const char * reftype = sv_reftype(sv, 0); 10592 sv_setpv(dst, reftype); 10593 } 10594 return dst; 10595 } 10596 10597 /* 10598 =for apidoc sv_isobject 10599 10600 Returns a boolean indicating whether the SV is an RV pointing to a blessed 10601 object. If the SV is not an RV, or if the object is not blessed, then this 10602 will return false. 10603 10604 =cut 10605 */ 10606 10607 int 10608 Perl_sv_isobject(pTHX_ SV *sv) 10609 { 10610 if (!sv) 10611 return 0; 10612 SvGETMAGIC(sv); 10613 if (!SvROK(sv)) 10614 return 0; 10615 sv = SvRV(sv); 10616 if (!SvOBJECT(sv)) 10617 return 0; 10618 return 1; 10619 } 10620 10621 /* 10622 =for apidoc sv_isa 10623 10624 Returns a boolean indicating whether the SV is blessed into the specified 10625 class. 10626 10627 This does not check for subtypes or method overloading. Use C<sv_isa_sv> to 10628 verify an inheritance relationship in the same way as the C<isa> operator by 10629 respecting any C<isa()> method overloading; or C<sv_derived_from_sv> to test 10630 directly on the actual object type. 10631 10632 =cut 10633 */ 10634 10635 int 10636 Perl_sv_isa(pTHX_ SV *sv, const char *const name) 10637 { 10638 const char *hvname; 10639 10640 PERL_ARGS_ASSERT_SV_ISA; 10641 10642 if (!sv) 10643 return 0; 10644 SvGETMAGIC(sv); 10645 if (!SvROK(sv)) 10646 return 0; 10647 sv = SvRV(sv); 10648 if (!SvOBJECT(sv)) 10649 return 0; 10650 hvname = HvNAME_get(SvSTASH(sv)); 10651 if (!hvname) 10652 return 0; 10653 10654 return strEQ(hvname, name); 10655 } 10656 10657 /* 10658 =for apidoc newSVrv 10659 10660 Creates a new SV for the existing RV, C<rv>, to point to. If C<rv> is not an 10661 RV then it will be upgraded to one. If C<classname> is non-null then the new 10662 SV will be blessed in the specified package. The new SV is returned and its 10663 reference count is 1. The reference count 1 is owned by C<rv>. See also 10664 newRV_inc() and newRV_noinc() for creating a new RV properly. 10665 10666 =cut 10667 */ 10668 10669 SV* 10670 Perl_newSVrv(pTHX_ SV *const rv, const char *const classname) 10671 { 10672 SV *sv; 10673 10674 PERL_ARGS_ASSERT_NEWSVRV; 10675 10676 new_SV(sv); 10677 10678 SV_CHECK_THINKFIRST_COW_DROP(rv); 10679 10680 if (UNLIKELY( SvTYPE(rv) >= SVt_PVMG )) { 10681 const U32 refcnt = SvREFCNT(rv); 10682 SvREFCNT(rv) = 0; 10683 sv_clear(rv); 10684 SvFLAGS(rv) = 0; 10685 SvREFCNT(rv) = refcnt; 10686 10687 sv_upgrade(rv, SVt_IV); 10688 } else if (SvROK(rv)) { 10689 SvREFCNT_dec(SvRV(rv)); 10690 } else { 10691 prepare_SV_for_RV(rv); 10692 } 10693 10694 SvOK_off(rv); 10695 SvRV_set(rv, sv); 10696 SvROK_on(rv); 10697 10698 if (classname) { 10699 HV* const stash = gv_stashpv(classname, GV_ADD); 10700 (void)sv_bless(rv, stash); 10701 } 10702 return sv; 10703 } 10704 10705 SV * 10706 Perl_newSVavdefelem(pTHX_ AV *av, SSize_t ix, bool extendible) 10707 { 10708 SV * const lv = newSV_type(SVt_PVLV); 10709 PERL_ARGS_ASSERT_NEWSVAVDEFELEM; 10710 LvTYPE(lv) = 'y'; 10711 sv_magic(lv, NULL, PERL_MAGIC_defelem, NULL, 0); 10712 LvTARG(lv) = SvREFCNT_inc_simple_NN(av); 10713 LvSTARGOFF(lv) = ix; 10714 LvTARGLEN(lv) = extendible ? 1 : (STRLEN)UV_MAX; 10715 return lv; 10716 } 10717 10718 /* 10719 =for apidoc sv_setref_pv 10720 10721 Copies a pointer into a new SV, optionally blessing the SV. The C<rv> 10722 argument will be upgraded to an RV. That RV will be modified to point to 10723 the new SV. If the C<pv> argument is C<NULL>, then C<PL_sv_undef> will be placed 10724 into the SV. The C<classname> argument indicates the package for the 10725 blessing. Set C<classname> to C<NULL> to avoid the blessing. The new SV 10726 will have a reference count of 1, and the RV will be returned. 10727 10728 Do not use with other Perl types such as HV, AV, SV, CV, because those 10729 objects will become corrupted by the pointer copy process. 10730 10731 Note that C<sv_setref_pvn> copies the string while this copies the pointer. 10732 10733 =cut 10734 */ 10735 10736 SV* 10737 Perl_sv_setref_pv(pTHX_ SV *const rv, const char *const classname, void *const pv) 10738 { 10739 PERL_ARGS_ASSERT_SV_SETREF_PV; 10740 10741 if (!pv) { 10742 sv_set_undef(rv); 10743 SvSETMAGIC(rv); 10744 } 10745 else 10746 sv_setiv(newSVrv(rv,classname), PTR2IV(pv)); 10747 return rv; 10748 } 10749 10750 /* 10751 =for apidoc sv_setref_iv 10752 10753 Copies an integer into a new SV, optionally blessing the SV. The C<rv> 10754 argument will be upgraded to an RV. That RV will be modified to point to 10755 the new SV. The C<classname> argument indicates the package for the 10756 blessing. Set C<classname> to C<NULL> to avoid the blessing. The new SV 10757 will have a reference count of 1, and the RV will be returned. 10758 10759 =cut 10760 */ 10761 10762 SV* 10763 Perl_sv_setref_iv(pTHX_ SV *const rv, const char *const classname, const IV iv) 10764 { 10765 PERL_ARGS_ASSERT_SV_SETREF_IV; 10766 10767 sv_setiv(newSVrv(rv,classname), iv); 10768 return rv; 10769 } 10770 10771 /* 10772 =for apidoc sv_setref_uv 10773 10774 Copies an unsigned integer into a new SV, optionally blessing the SV. The C<rv> 10775 argument will be upgraded to an RV. That RV will be modified to point to 10776 the new SV. The C<classname> argument indicates the package for the 10777 blessing. Set C<classname> to C<NULL> to avoid the blessing. The new SV 10778 will have a reference count of 1, and the RV will be returned. 10779 10780 =cut 10781 */ 10782 10783 SV* 10784 Perl_sv_setref_uv(pTHX_ SV *const rv, const char *const classname, const UV uv) 10785 { 10786 PERL_ARGS_ASSERT_SV_SETREF_UV; 10787 10788 sv_setuv(newSVrv(rv,classname), uv); 10789 return rv; 10790 } 10791 10792 /* 10793 =for apidoc sv_setref_nv 10794 10795 Copies a double into a new SV, optionally blessing the SV. The C<rv> 10796 argument will be upgraded to an RV. That RV will be modified to point to 10797 the new SV. The C<classname> argument indicates the package for the 10798 blessing. Set C<classname> to C<NULL> to avoid the blessing. The new SV 10799 will have a reference count of 1, and the RV will be returned. 10800 10801 =cut 10802 */ 10803 10804 SV* 10805 Perl_sv_setref_nv(pTHX_ SV *const rv, const char *const classname, const NV nv) 10806 { 10807 PERL_ARGS_ASSERT_SV_SETREF_NV; 10808 10809 sv_setnv(newSVrv(rv,classname), nv); 10810 return rv; 10811 } 10812 10813 /* 10814 =for apidoc sv_setref_pvn 10815 10816 Copies a string into a new SV, optionally blessing the SV. The length of the 10817 string must be specified with C<n>. The C<rv> argument will be upgraded to 10818 an RV. That RV will be modified to point to the new SV. The C<classname> 10819 argument indicates the package for the blessing. Set C<classname> to 10820 C<NULL> to avoid the blessing. The new SV will have a reference count 10821 of 1, and the RV will be returned. 10822 10823 Note that C<sv_setref_pv> copies the pointer while this copies the string. 10824 10825 =cut 10826 */ 10827 10828 SV* 10829 Perl_sv_setref_pvn(pTHX_ SV *const rv, const char *const classname, 10830 const char *const pv, const STRLEN n) 10831 { 10832 PERL_ARGS_ASSERT_SV_SETREF_PVN; 10833 10834 sv_setpvn(newSVrv(rv,classname), pv, n); 10835 return rv; 10836 } 10837 10838 /* 10839 =for apidoc sv_bless 10840 10841 Blesses an SV into a specified package. The SV must be an RV. The package 10842 must be designated by its stash (see C<L</gv_stashpv>>). The reference count 10843 of the SV is unaffected. 10844 10845 =cut 10846 */ 10847 10848 SV* 10849 Perl_sv_bless(pTHX_ SV *const sv, HV *const stash) 10850 { 10851 SV *tmpRef; 10852 HV *oldstash = NULL; 10853 10854 PERL_ARGS_ASSERT_SV_BLESS; 10855 10856 SvGETMAGIC(sv); 10857 if (!SvROK(sv)) 10858 Perl_croak(aTHX_ "Can't bless non-reference value"); 10859 if (HvSTASH_IS_CLASS(stash)) 10860 Perl_croak(aTHX_ "Attempt to bless into a class"); 10861 10862 tmpRef = SvRV(sv); 10863 if (SvFLAGS(tmpRef) & (SVs_OBJECT|SVf_READONLY|SVf_PROTECT)) { 10864 if (SvREADONLY(tmpRef)) 10865 Perl_croak_no_modify(); 10866 if (SvTYPE(tmpRef) == SVt_PVOBJ) 10867 Perl_croak(aTHX_ "Can't bless an object reference"); 10868 if (SvOBJECT(tmpRef)) { 10869 oldstash = SvSTASH(tmpRef); 10870 } 10871 } 10872 SvOBJECT_on(tmpRef); 10873 SvUPGRADE(tmpRef, SVt_PVMG); 10874 SvSTASH_set(tmpRef, MUTABLE_HV(SvREFCNT_inc_simple(stash))); 10875 SvREFCNT_dec(oldstash); 10876 10877 if(SvSMAGICAL(tmpRef)) 10878 if(mg_find(tmpRef, PERL_MAGIC_ext) || mg_find(tmpRef, PERL_MAGIC_uvar)) 10879 mg_set(tmpRef); 10880 10881 10882 10883 return sv; 10884 } 10885 10886 /* Downgrades a PVGV to a PVMG. If it's actually a PVLV, we leave the type 10887 * as it is after unglobbing it. 10888 */ 10889 10890 PERL_STATIC_INLINE void 10891 S_sv_unglob(pTHX_ SV *const sv, U32 flags) 10892 { 10893 void *xpvmg; 10894 HV *stash; 10895 SV * const temp = flags & SV_COW_DROP_PV ? NULL : sv_newmortal(); 10896 10897 PERL_ARGS_ASSERT_SV_UNGLOB; 10898 10899 assert(SvTYPE(sv) == SVt_PVGV || SvTYPE(sv) == SVt_PVLV); 10900 SvFAKE_off(sv); 10901 if (!(flags & SV_COW_DROP_PV)) 10902 gv_efullname3(temp, MUTABLE_GV(sv), "*"); 10903 10904 SvREFCNT_inc_simple_void_NN(sv_2mortal(sv)); 10905 if (GvGP(sv)) { 10906 if(GvCVu((const GV *)sv) && (stash = GvSTASH(MUTABLE_GV(sv))) 10907 && HvHasNAME(stash)) 10908 mro_method_changed_in(stash); 10909 gp_free(MUTABLE_GV(sv)); 10910 } 10911 if (GvSTASH(sv)) { 10912 sv_del_backref(MUTABLE_SV(GvSTASH(sv)), sv); 10913 GvSTASH(sv) = NULL; 10914 } 10915 GvMULTI_off(sv); 10916 if (GvNAME_HEK(sv)) { 10917 unshare_hek(GvNAME_HEK(sv)); 10918 } 10919 isGV_with_GP_off(sv); 10920 10921 if(SvTYPE(sv) == SVt_PVGV) { 10922 /* need to keep SvANY(sv) in the right arena */ 10923 xpvmg = new_XPVMG(); 10924 StructCopy(SvANY(sv), xpvmg, XPVMG); 10925 del_body_by_type(SvANY(sv), SVt_PVGV); 10926 SvANY(sv) = xpvmg; 10927 10928 SvFLAGS(sv) &= ~SVTYPEMASK; 10929 SvFLAGS(sv) |= SVt_PVMG; 10930 } 10931 10932 /* Intentionally not calling any local SET magic, as this isn't so much a 10933 set operation as merely an internal storage change. */ 10934 if (flags & SV_COW_DROP_PV) SvOK_off(sv); 10935 else sv_setsv_flags(sv, temp, 0); 10936 10937 if ((const GV *)sv == PL_last_in_gv) 10938 PL_last_in_gv = NULL; 10939 else if ((const GV *)sv == PL_statgv) 10940 PL_statgv = NULL; 10941 } 10942 10943 /* 10944 =for apidoc sv_unref_flags 10945 10946 Unsets the RV status of the SV, and decrements the reference count of 10947 whatever was being referenced by the RV. This can almost be thought of 10948 as a reversal of C<newSVrv>. The C<cflags> argument can contain 10949 C<SV_IMMEDIATE_UNREF> to force the reference count to be decremented 10950 (otherwise the decrementing is conditional on the reference count being 10951 different from one or the reference being a readonly SV). 10952 See C<L</SvROK_off>>. 10953 10954 =for apidoc Amnh||SV_IMMEDIATE_UNREF 10955 10956 =cut 10957 */ 10958 10959 void 10960 Perl_sv_unref_flags(pTHX_ SV *const ref, const U32 flags) 10961 { 10962 SV* const target = SvRV(ref); 10963 10964 PERL_ARGS_ASSERT_SV_UNREF_FLAGS; 10965 10966 if (SvWEAKREF(ref)) { 10967 sv_del_backref(target, ref); 10968 SvWEAKREF_off(ref); 10969 SvRV_set(ref, NULL); 10970 return; 10971 } 10972 SvRV_set(ref, NULL); 10973 SvROK_off(ref); 10974 /* You can't have a || SvREADONLY(target) here, as $a = $$a, where $a was 10975 assigned to as BEGIN {$a = \"Foo"} will fail. */ 10976 if (SvREFCNT(target) != 1 || (flags & SV_IMMEDIATE_UNREF)) 10977 SvREFCNT_dec_NN(target); 10978 else /* XXX Hack, but hard to make $a=$a->[1] work otherwise */ 10979 sv_2mortal(target); /* Schedule for freeing later */ 10980 } 10981 10982 /* 10983 =for apidoc sv_untaint 10984 10985 Untaint an SV. Use C<SvTAINTED_off> instead. 10986 10987 =cut 10988 */ 10989 10990 void 10991 Perl_sv_untaint(pTHX_ SV *const sv) 10992 { 10993 PERL_ARGS_ASSERT_SV_UNTAINT; 10994 PERL_UNUSED_CONTEXT; 10995 10996 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { 10997 MAGIC * const mg = mg_find(sv, PERL_MAGIC_taint); 10998 if (mg) 10999 mg->mg_len &= ~1; 11000 } 11001 } 11002 11003 /* 11004 =for apidoc sv_tainted 11005 11006 Test an SV for taintedness. Use C<SvTAINTED> instead. 11007 11008 =cut 11009 */ 11010 11011 bool 11012 Perl_sv_tainted(pTHX_ SV *const sv) 11013 { 11014 PERL_ARGS_ASSERT_SV_TAINTED; 11015 PERL_UNUSED_CONTEXT; 11016 11017 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { 11018 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_taint); 11019 if (mg && (mg->mg_len & 1) ) 11020 return TRUE; 11021 } 11022 return FALSE; 11023 } 11024 11025 #if defined(MULTIPLICITY) 11026 11027 /* pTHX_ magic can't cope with varargs, so this is a no-context 11028 * version of the main function, (which may itself be aliased to us). 11029 * Don't access this version directly. 11030 */ 11031 11032 void 11033 Perl_sv_setpvf_nocontext(SV *const sv, const char *const pat, ...) 11034 { 11035 dTHX; 11036 va_list args; 11037 11038 PERL_ARGS_ASSERT_SV_SETPVF_NOCONTEXT; 11039 11040 va_start(args, pat); 11041 sv_vsetpvf(sv, pat, &args); 11042 va_end(args); 11043 } 11044 11045 /* pTHX_ magic can't cope with varargs, so this is a no-context 11046 * version of the main function, (which may itself be aliased to us). 11047 * Don't access this version directly. 11048 */ 11049 11050 void 11051 Perl_sv_setpvf_mg_nocontext(SV *const sv, const char *const pat, ...) 11052 { 11053 dTHX; 11054 va_list args; 11055 11056 PERL_ARGS_ASSERT_SV_SETPVF_MG_NOCONTEXT; 11057 11058 va_start(args, pat); 11059 sv_vsetpvf_mg(sv, pat, &args); 11060 va_end(args); 11061 } 11062 #endif 11063 11064 /* 11065 =for apidoc sv_setpvf 11066 =for apidoc_item sv_setpvf_mg 11067 =for apidoc_item sv_setpvf_mg_nocontext 11068 =for apidoc_item sv_setpvf_nocontext 11069 11070 These work like C<L</sv_catpvf>> but copy the text into the SV instead of 11071 appending it. 11072 11073 The differences between these are: 11074 11075 C<sv_setpvf_mg> and C<sv_setpvf_mg_nocontext> perform 'set' magic; C<sv_setpvf> 11076 and C<sv_setpvf_nocontext> skip all magic. 11077 11078 C<sv_setpvf_nocontext> and C<sv_setpvf_mg_nocontext> do not take a thread 11079 context (C<aTHX>) parameter, so are used in situations where the caller 11080 doesn't already have the thread context. 11081 11082 B<The UTF-8 flag is not changed by these functions.> 11083 11084 =cut 11085 */ 11086 11087 void 11088 Perl_sv_setpvf(pTHX_ SV *const sv, const char *const pat, ...) 11089 { 11090 va_list args; 11091 11092 PERL_ARGS_ASSERT_SV_SETPVF; 11093 11094 va_start(args, pat); 11095 sv_vsetpvf(sv, pat, &args); 11096 va_end(args); 11097 } 11098 11099 /* 11100 =for apidoc sv_vsetpvf 11101 =for apidoc_item sv_vsetpvf_mg 11102 11103 These work like C<L</sv_vcatpvf>> but copy the text into the SV instead of 11104 appending it. 11105 11106 They differ only in that C<sv_vsetpvf_mg> performs 'set' magic; 11107 C<sv_vsetpvf> skips all magic. 11108 11109 They are usually used via their frontends, C<L</sv_setpvf>> and 11110 C<L</sv_setpvf_mg>>. 11111 11112 B<The UTF-8 flag is not changed by these functions.> 11113 11114 =cut 11115 */ 11116 11117 void 11118 Perl_sv_vsetpvf(pTHX_ SV *const sv, const char *const pat, va_list *const args) 11119 { 11120 PERL_ARGS_ASSERT_SV_VSETPVF; 11121 11122 sv_vsetpvfn(sv, pat, strlen(pat), args, NULL, 0, NULL); 11123 } 11124 11125 void 11126 Perl_sv_setpvf_mg(pTHX_ SV *const sv, const char *const pat, ...) 11127 { 11128 va_list args; 11129 11130 PERL_ARGS_ASSERT_SV_SETPVF_MG; 11131 11132 va_start(args, pat); 11133 sv_vsetpvf_mg(sv, pat, &args); 11134 va_end(args); 11135 } 11136 11137 void 11138 Perl_sv_vsetpvf_mg(pTHX_ SV *const sv, const char *const pat, va_list *const args) 11139 { 11140 PERL_ARGS_ASSERT_SV_VSETPVF_MG; 11141 11142 sv_vsetpvfn(sv, pat, strlen(pat), args, NULL, 0, NULL); 11143 SvSETMAGIC(sv); 11144 } 11145 11146 #if defined(MULTIPLICITY) 11147 11148 /* pTHX_ magic can't cope with varargs, so this is a no-context 11149 * version of the main function, (which may itself be aliased to us). 11150 * Don't access this version directly. 11151 */ 11152 11153 void 11154 Perl_sv_catpvf_nocontext(SV *const sv, const char *const pat, ...) 11155 { 11156 dTHX; 11157 va_list args; 11158 11159 PERL_ARGS_ASSERT_SV_CATPVF_NOCONTEXT; 11160 11161 va_start(args, pat); 11162 sv_vcatpvfn_flags(sv, pat, strlen(pat), &args, NULL, 0, NULL, SV_GMAGIC|SV_SMAGIC); 11163 va_end(args); 11164 } 11165 11166 /* pTHX_ magic can't cope with varargs, so this is a no-context 11167 * version of the main function, (which may itself be aliased to us). 11168 * Don't access this version directly. 11169 */ 11170 11171 void 11172 Perl_sv_catpvf_mg_nocontext(SV *const sv, const char *const pat, ...) 11173 { 11174 dTHX; 11175 va_list args; 11176 11177 PERL_ARGS_ASSERT_SV_CATPVF_MG_NOCONTEXT; 11178 11179 va_start(args, pat); 11180 sv_vcatpvfn_flags(sv, pat, strlen(pat), &args, NULL, 0, NULL, SV_GMAGIC|SV_SMAGIC); 11181 SvSETMAGIC(sv); 11182 va_end(args); 11183 } 11184 #endif 11185 11186 /* 11187 =for apidoc sv_catpvf 11188 =for apidoc_item sv_catpvf_mg 11189 =for apidoc_item sv_catpvf_mg_nocontext 11190 =for apidoc_item sv_catpvf_nocontext 11191 11192 These process their arguments like C<sprintf>, and append the formatted 11193 output to an SV. As with C<sv_vcatpvfn>, argument reordering is not supporte 11194 when called with a non-null C-style variable argument list. 11195 11196 If the appended data contains "wide" characters 11197 (including, but not limited to, SVs with a UTF-8 PV formatted with C<%s>, 11198 and characters >255 formatted with C<%c>), the original SV might get 11199 upgraded to UTF-8. 11200 11201 If the original SV was UTF-8, the pattern should be 11202 valid UTF-8; if the original SV was bytes, the pattern should be too. 11203 11204 All perform 'get' magic, but only C<sv_catpvf_mg> and C<sv_catpvf_mg_nocontext> 11205 perform 'set' magic. 11206 11207 C<sv_catpvf_nocontext> and C<sv_catpvf_mg_nocontext> do not take a thread 11208 context (C<aTHX>) parameter, so are used in situations where the caller 11209 doesn't already have the thread context. 11210 11211 =cut 11212 */ 11213 11214 void 11215 Perl_sv_catpvf(pTHX_ SV *const sv, const char *const pat, ...) 11216 { 11217 va_list args; 11218 11219 PERL_ARGS_ASSERT_SV_CATPVF; 11220 11221 va_start(args, pat); 11222 sv_vcatpvfn_flags(sv, pat, strlen(pat), &args, NULL, 0, NULL, SV_GMAGIC|SV_SMAGIC); 11223 va_end(args); 11224 } 11225 11226 /* 11227 =for apidoc sv_vcatpvf 11228 =for apidoc_item sv_vcatpvf_mg 11229 11230 These process their arguments like C<sv_vcatpvfn> called with a non-null 11231 C-style variable argument list, and append the formatted output to C<sv>. 11232 11233 They differ only in that C<sv_vcatpvf_mg> performs 'set' magic; 11234 C<sv_vcatpvf> skips 'set' magic. 11235 11236 Both perform 'get' magic. 11237 11238 They are usually accessed via their frontends C<L</sv_catpvf>> and 11239 C<L</sv_catpvf_mg>>. 11240 11241 =cut 11242 */ 11243 11244 void 11245 Perl_sv_vcatpvf(pTHX_ SV *const sv, const char *const pat, va_list *const args) 11246 { 11247 PERL_ARGS_ASSERT_SV_VCATPVF; 11248 11249 sv_vcatpvfn_flags(sv, pat, strlen(pat), args, NULL, 0, NULL, SV_GMAGIC|SV_SMAGIC); 11250 } 11251 11252 void 11253 Perl_sv_catpvf_mg(pTHX_ SV *const sv, const char *const pat, ...) 11254 { 11255 va_list args; 11256 11257 PERL_ARGS_ASSERT_SV_CATPVF_MG; 11258 11259 va_start(args, pat); 11260 sv_vcatpvfn_flags(sv, pat, strlen(pat), &args, NULL, 0, NULL, SV_GMAGIC|SV_SMAGIC); 11261 SvSETMAGIC(sv); 11262 va_end(args); 11263 } 11264 11265 void 11266 Perl_sv_vcatpvf_mg(pTHX_ SV *const sv, const char *const pat, va_list *const args) 11267 { 11268 PERL_ARGS_ASSERT_SV_VCATPVF_MG; 11269 11270 sv_vcatpvfn(sv, pat, strlen(pat), args, NULL, 0, NULL); 11271 SvSETMAGIC(sv); 11272 } 11273 11274 /* 11275 =for apidoc sv_vsetpvfn 11276 11277 Works like C<sv_vcatpvfn> but copies the text into the SV instead of 11278 appending it. 11279 11280 B<The UTF-8 flag is not changed by this function.> 11281 11282 Usually used via one of its frontends L</C<sv_vsetpvf>> and 11283 L</C<sv_vsetpvf_mg>>. 11284 11285 =cut 11286 */ 11287 11288 void 11289 Perl_sv_vsetpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, 11290 va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted) 11291 { 11292 PERL_ARGS_ASSERT_SV_VSETPVFN; 11293 11294 SvPVCLEAR(sv); 11295 sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, sv_count, maybe_tainted, 0); 11296 } 11297 11298 11299 /* simplified inline Perl_sv_catpvn_nomg() when you know the SV's SvPOK */ 11300 11301 PERL_STATIC_INLINE void 11302 S_sv_catpvn_simple(pTHX_ SV *const sv, const char* const buf, const STRLEN len) 11303 { 11304 STRLEN const need = len + SvCUR(sv) + 1; 11305 char *end; 11306 11307 /* can't wrap as both len and SvCUR() are allocated in 11308 * memory and together can't consume all the address space 11309 */ 11310 assert(need > len); 11311 11312 assert(SvPOK(sv)); 11313 SvGROW(sv, need); 11314 end = SvEND(sv); 11315 Copy(buf, end, len, char); 11316 end += len; 11317 *end = '\0'; 11318 SvCUR_set(sv, need - 1); 11319 } 11320 11321 11322 /* 11323 * Warn of missing argument to sprintf. The value used in place of such 11324 * arguments should be &PL_sv_no; an undefined value would yield 11325 * inappropriate "use of uninit" warnings [perl #71000]. 11326 */ 11327 STATIC void 11328 S_warn_vcatpvfn_missing_argument(pTHX) { 11329 if (ckWARN(WARN_MISSING)) { 11330 Perl_warner(aTHX_ packWARN(WARN_MISSING), "Missing argument in %s", 11331 PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); 11332 } 11333 } 11334 11335 11336 static void 11337 S_croak_overflow() 11338 { 11339 dTHX; 11340 Perl_croak(aTHX_ "Integer overflow in format string for %s", 11341 (PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn")); 11342 } 11343 11344 11345 /* Given an int i from the next arg (if args is true) or an sv from an arg 11346 * (if args is false), try to extract a STRLEN-ranged value from the arg, 11347 * with overflow checking. 11348 * Sets *neg to true if the value was negative (untouched otherwise. 11349 * Returns the absolute value. 11350 * As an extra margin of safety, it croaks if the returned value would 11351 * exceed the maximum value of a STRLEN / 4. 11352 */ 11353 11354 static STRLEN 11355 S_sprintf_arg_num_val(pTHX_ va_list *const args, int i, SV *sv, bool *neg) 11356 { 11357 IV iv; 11358 11359 if (args) { 11360 iv = i; 11361 goto do_iv; 11362 } 11363 11364 if (!sv) 11365 return 0; 11366 11367 SvGETMAGIC(sv); 11368 11369 if (UNLIKELY(SvIsUV(sv))) { 11370 UV uv = SvUV_nomg(sv); 11371 if (uv > IV_MAX) 11372 S_croak_overflow(); 11373 iv = uv; 11374 } 11375 else { 11376 iv = SvIV_nomg(sv); 11377 do_iv: 11378 if (iv < 0) { 11379 if (iv < -IV_MAX) 11380 S_croak_overflow(); 11381 iv = -iv; 11382 *neg = TRUE; 11383 } 11384 } 11385 11386 if (iv > (IV)(((STRLEN)~0) / 4)) 11387 S_croak_overflow(); 11388 11389 return (STRLEN)iv; 11390 } 11391 11392 /* Read in and return a number. Updates *pattern to point to the char 11393 * following the number. Expects the first char to 1..9. 11394 * Croaks if the number exceeds 1/4 of the maximum value of STRLEN. 11395 * This is a belt-and-braces safety measure to complement any 11396 * overflow/wrap checks done in the main body of sv_vcatpvfn_flags. 11397 * It means that e.g. on a 32-bit system the width/precision can't be more 11398 * than 1G, which seems reasonable. 11399 */ 11400 11401 STATIC STRLEN 11402 S_expect_number(pTHX_ const char **const pattern) 11403 { 11404 STRLEN var; 11405 11406 PERL_ARGS_ASSERT_EXPECT_NUMBER; 11407 11408 assert(inRANGE(**pattern, '1', '9')); 11409 11410 var = *(*pattern)++ - '0'; 11411 while (isDIGIT(**pattern)) { 11412 /* if var * 10 + 9 would exceed 1/4 max strlen, croak */ 11413 if (var > ((((STRLEN)~0) / 4 - 9) / 10)) 11414 S_croak_overflow(); 11415 var = var * 10 + (*(*pattern)++ - '0'); 11416 } 11417 return var; 11418 } 11419 11420 /* Implement a fast "%.0f": given a pointer to the end of a buffer (caller 11421 * ensures it's big enough), back fill it with the rounded integer part of 11422 * nv. Returns ptr to start of string, and sets *len to its length. 11423 * Returns NULL if not convertible. 11424 */ 11425 11426 STATIC char * 11427 S_F0convert(NV nv, char *const endbuf, STRLEN *const len) 11428 { 11429 const int neg = nv < 0; 11430 UV uv; 11431 11432 PERL_ARGS_ASSERT_F0CONVERT; 11433 11434 assert(!Perl_isinfnan(nv)); 11435 if (neg) 11436 nv = -nv; 11437 if (nv != 0.0 && nv < (NV) UV_MAX) { 11438 char *p = endbuf; 11439 uv = (UV)nv; 11440 if (uv != nv) { 11441 nv += 0.5; 11442 uv = (UV)nv; 11443 if (uv & 1 && uv == nv) 11444 uv--; /* Round to even */ 11445 } 11446 do { 11447 const unsigned dig = uv % 10; 11448 *--p = '0' + dig; 11449 } while (uv /= 10); 11450 if (neg) 11451 *--p = '-'; 11452 *len = endbuf - p; 11453 return p; 11454 } 11455 return NULL; 11456 } 11457 11458 11459 /* XXX maybe_tainted is never assigned to, so the doc above is lying. */ 11460 11461 void 11462 Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, 11463 va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted) 11464 { 11465 PERL_ARGS_ASSERT_SV_VCATPVFN; 11466 11467 sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, sv_count, maybe_tainted, SV_GMAGIC|SV_SMAGIC); 11468 } 11469 11470 11471 /* For the vcatpvfn code, we need a long double target in case 11472 * HAS_LONG_DOUBLE, even without USE_LONG_DOUBLE, so that we can printf 11473 * with long double formats, even without NV being long double. But we 11474 * call the target 'fv' instead of 'nv', since most of the time it is not 11475 * (most compilers these days recognize "long double", even if only as a 11476 * synonym for "double"). 11477 */ 11478 #if defined(HAS_LONG_DOUBLE) && LONG_DOUBLESIZE > DOUBLESIZE && \ 11479 defined(PERL_PRIgldbl) && !defined(USE_QUADMATH) 11480 # define VCATPVFN_FV_GF PERL_PRIgldbl 11481 # if defined(__VMS) && defined(__ia64) && defined(__IEEE_FLOAT) 11482 /* Work around breakage in OTS$CVT_FLOAT_T_X */ 11483 # define VCATPVFN_NV_TO_FV(nv,fv) \ 11484 STMT_START { \ 11485 double _dv = nv; \ 11486 fv = Perl_isnan(_dv) ? LDBL_QNAN : _dv; \ 11487 } STMT_END 11488 # else 11489 # define VCATPVFN_NV_TO_FV(nv,fv) (fv)=(nv) 11490 # endif 11491 typedef long double vcatpvfn_long_double_t; 11492 #else 11493 # define VCATPVFN_FV_GF NVgf 11494 # define VCATPVFN_NV_TO_FV(nv,fv) (fv)=(nv) 11495 typedef NV vcatpvfn_long_double_t; 11496 #endif 11497 11498 #ifdef LONGDOUBLE_DOUBLEDOUBLE 11499 /* The first double can be as large as 2**1023, or '1' x '0' x 1023. 11500 * The second double can be as small as 2**-1074, or '0' x 1073 . '1'. 11501 * The sum of them can be '1' . '0' x 2096 . '1', with implied radix point 11502 * after the first 1023 zero bits. 11503 * 11504 * XXX The 2098 is quite large (262.25 bytes) and therefore some sort 11505 * of dynamically growing buffer might be better, start at just 16 bytes 11506 * (for example) and grow only when necessary. Or maybe just by looking 11507 * at the exponents of the two doubles? */ 11508 # define DOUBLEDOUBLE_MAXBITS 2098 11509 #endif 11510 11511 /* vhex will contain the values (0..15) of the hex digits ("nybbles" 11512 * of 4 bits); 1 for the implicit 1, and the mantissa bits, four bits 11513 * per xdigit. For the double-double case, this can be rather many. 11514 * The non-double-double-long-double overshoots since all bits of NV 11515 * are not mantissa bits, there are also exponent bits. */ 11516 #ifdef LONGDOUBLE_DOUBLEDOUBLE 11517 # define VHEX_SIZE (3+DOUBLEDOUBLE_MAXBITS/4) 11518 #else 11519 # define VHEX_SIZE (1+(NVSIZE * 8)/4) 11520 #endif 11521 11522 /* If we do not have a known long double format, (including not using 11523 * long doubles, or long doubles being equal to doubles) then we will 11524 * fall back to the ldexp/frexp route, with which we can retrieve at 11525 * most as many bits as our widest unsigned integer type is. We try 11526 * to get a 64-bit unsigned integer even if we are not using a 64-bit UV. 11527 * 11528 * (If you want to test the case of UVSIZE == 4, NVSIZE == 8, 11529 * set the MANTISSATYPE to int and the MANTISSASIZE to 4.) 11530 */ 11531 #if defined(HAS_QUAD) && defined(Uquad_t) 11532 # define MANTISSATYPE Uquad_t 11533 # define MANTISSASIZE 8 11534 #else 11535 # define MANTISSATYPE UV 11536 # define MANTISSASIZE UVSIZE 11537 #endif 11538 11539 #if defined(DOUBLE_LITTLE_ENDIAN) || defined(LONGDOUBLE_LITTLE_ENDIAN) 11540 # define HEXTRACT_LITTLE_ENDIAN 11541 #elif defined(DOUBLE_BIG_ENDIAN) || defined(LONGDOUBLE_BIG_ENDIAN) 11542 # define HEXTRACT_BIG_ENDIAN 11543 #else 11544 # define HEXTRACT_MIX_ENDIAN 11545 #endif 11546 11547 /* S_hextract() is a helper for S_format_hexfp, for extracting 11548 * the hexadecimal values (for %a/%A). The nv is the NV where the value 11549 * are being extracted from (either directly from the long double in-memory 11550 * presentation, or from the uquad computed via frexp+ldexp). frexp also 11551 * is used to update the exponent. The subnormal is set to true 11552 * for IEEE 754 subnormals/denormals (including the x86 80-bit format). 11553 * The vhex is the pointer to the beginning of the output buffer of VHEX_SIZE. 11554 * 11555 * The tricky part is that S_hextract() needs to be called twice: 11556 * the first time with vend as NULL, and the second time with vend as 11557 * the pointer returned by the first call. What happens is that on 11558 * the first round the output size is computed, and the intended 11559 * extraction sanity checked. On the second round the actual output 11560 * (the extraction of the hexadecimal values) takes place. 11561 * Sanity failures cause fatal failures during both rounds. */ 11562 STATIC U8* 11563 S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal, 11564 U8* vhex, U8* vend) 11565 { 11566 U8* v = vhex; 11567 int ix; 11568 int ixmin = 0, ixmax = 0; 11569 11570 /* XXX Inf/NaN are not handled here, since it is 11571 * assumed they are to be output as "Inf" and "NaN". */ 11572 11573 /* These macros are just to reduce typos, they have multiple 11574 * repetitions below, but usually only one (or sometimes two) 11575 * of them is really being used. */ 11576 /* HEXTRACT_OUTPUT() extracts the high nybble first. */ 11577 #define HEXTRACT_OUTPUT_HI(ix) (*v++ = nvp[ix] >> 4) 11578 #define HEXTRACT_OUTPUT_LO(ix) (*v++ = nvp[ix] & 0xF) 11579 #define HEXTRACT_OUTPUT(ix) \ 11580 STMT_START { \ 11581 HEXTRACT_OUTPUT_HI(ix); HEXTRACT_OUTPUT_LO(ix); \ 11582 } STMT_END 11583 #define HEXTRACT_COUNT(ix, c) \ 11584 STMT_START { \ 11585 v += c; if (ix < ixmin) ixmin = ix; else if (ix > ixmax) ixmax = ix; \ 11586 } STMT_END 11587 #define HEXTRACT_BYTE(ix) \ 11588 STMT_START { \ 11589 if (vend) HEXTRACT_OUTPUT(ix); else HEXTRACT_COUNT(ix, 2); \ 11590 } STMT_END 11591 #define HEXTRACT_LO_NYBBLE(ix) \ 11592 STMT_START { \ 11593 if (vend) HEXTRACT_OUTPUT_LO(ix); else HEXTRACT_COUNT(ix, 1); \ 11594 } STMT_END 11595 /* HEXTRACT_TOP_NYBBLE is just convenience disguise, 11596 * to make it look less odd when the top bits of a NV 11597 * are extracted using HEXTRACT_LO_NYBBLE: the highest 11598 * order bits can be in the "low nybble" of a byte. */ 11599 #define HEXTRACT_TOP_NYBBLE(ix) HEXTRACT_LO_NYBBLE(ix) 11600 #define HEXTRACT_BYTES_LE(a, b) \ 11601 for (ix = a; ix >= b; ix--) { HEXTRACT_BYTE(ix); } 11602 #define HEXTRACT_BYTES_BE(a, b) \ 11603 for (ix = a; ix <= b; ix++) { HEXTRACT_BYTE(ix); } 11604 #define HEXTRACT_GET_SUBNORMAL(nv) *subnormal = Perl_fp_class_denorm(nv) 11605 #define HEXTRACT_IMPLICIT_BIT(nv) \ 11606 STMT_START { \ 11607 if (!*subnormal) { \ 11608 if (vend) *v++ = ((nv) == 0.0) ? 0 : 1; else v++; \ 11609 } \ 11610 } STMT_END 11611 11612 /* Most formats do. Those which don't should undef this. 11613 * 11614 * But also note that IEEE 754 subnormals do not have it, or, 11615 * expressed alternatively, their implicit bit is zero. */ 11616 #define HEXTRACT_HAS_IMPLICIT_BIT 11617 11618 /* Many formats do. Those which don't should undef this. */ 11619 #define HEXTRACT_HAS_TOP_NYBBLE 11620 11621 /* HEXTRACTSIZE is the maximum number of xdigits. */ 11622 #if defined(USE_LONG_DOUBLE) && defined(LONGDOUBLE_DOUBLEDOUBLE) 11623 # define HEXTRACTSIZE (2+DOUBLEDOUBLE_MAXBITS/4) 11624 #else 11625 # define HEXTRACTSIZE 2 * NVSIZE 11626 #endif 11627 11628 const U8* vmaxend = vhex + HEXTRACTSIZE; 11629 11630 assert(HEXTRACTSIZE <= VHEX_SIZE); 11631 11632 PERL_UNUSED_VAR(ix); /* might happen */ 11633 (void)Perl_frexp(PERL_ABS(nv), exponent); 11634 *subnormal = FALSE; 11635 if (vend && (vend <= vhex || vend > vmaxend)) { 11636 /* diag_listed_as: Hexadecimal float: internal error (%s) */ 11637 Perl_croak(aTHX_ "Hexadecimal float: internal error (entry)"); 11638 } 11639 { 11640 /* First check if using long doubles. */ 11641 #if defined(USE_LONG_DOUBLE) && (NVSIZE > DOUBLESIZE) 11642 # if LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_LITTLE_ENDIAN 11643 /* Used in e.g. VMS and HP-UX IA-64, e.g. -0.1L: 11644 * 9a 99 99 99 99 99 99 99 99 99 99 99 99 99 fb bf */ 11645 /* The bytes 13..0 are the mantissa/fraction, 11646 * the 15,14 are the sign+exponent. */ 11647 const U8* nvp = (const U8*)(&nv); 11648 HEXTRACT_GET_SUBNORMAL(nv); 11649 HEXTRACT_IMPLICIT_BIT(nv); 11650 # undef HEXTRACT_HAS_TOP_NYBBLE 11651 HEXTRACT_BYTES_LE(13, 0); 11652 # elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_BIG_ENDIAN 11653 /* Used in e.g. Solaris Sparc and HP-UX PA-RISC, e.g. -0.1L: 11654 * bf fb 99 99 99 99 99 99 99 99 99 99 99 99 99 9a */ 11655 /* The bytes 2..15 are the mantissa/fraction, 11656 * the 0,1 are the sign+exponent. */ 11657 const U8* nvp = (const U8*)(&nv); 11658 HEXTRACT_GET_SUBNORMAL(nv); 11659 HEXTRACT_IMPLICIT_BIT(nv); 11660 # undef HEXTRACT_HAS_TOP_NYBBLE 11661 HEXTRACT_BYTES_BE(2, 15); 11662 # elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN 11663 /* x86 80-bit "extended precision", 64 bits of mantissa / fraction / 11664 * significand, 15 bits of exponent, 1 bit of sign. No implicit bit. 11665 * NVSIZE can be either 12 (ILP32, Solaris x86) or 16 (LP64, Linux 11666 * and OS X), meaning that 2 or 6 bytes are empty padding. */ 11667 /* The bytes 0..1 are the sign+exponent, 11668 * the bytes 2..9 are the mantissa/fraction. */ 11669 const U8* nvp = (const U8*)(&nv); 11670 # undef HEXTRACT_HAS_IMPLICIT_BIT 11671 # undef HEXTRACT_HAS_TOP_NYBBLE 11672 HEXTRACT_GET_SUBNORMAL(nv); 11673 HEXTRACT_BYTES_LE(7, 0); 11674 # elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN 11675 /* Does this format ever happen? (Wikipedia says the Motorola 11676 * 6888x math coprocessors used format _like_ this but padded 11677 * to 96 bits with 16 unused bits between the exponent and the 11678 * mantissa.) */ 11679 const U8* nvp = (const U8*)(&nv); 11680 # undef HEXTRACT_HAS_IMPLICIT_BIT 11681 # undef HEXTRACT_HAS_TOP_NYBBLE 11682 HEXTRACT_GET_SUBNORMAL(nv); 11683 HEXTRACT_BYTES_BE(0, 7); 11684 # else 11685 # define HEXTRACT_FALLBACK 11686 /* Double-double format: two doubles next to each other. 11687 * The first double is the high-order one, exactly like 11688 * it would be for a "lone" double. The second double 11689 * is shifted down using the exponent so that that there 11690 * are no common bits. The tricky part is that the value 11691 * of the double-double is the SUM of the two doubles and 11692 * the second one can be also NEGATIVE. 11693 * 11694 * Because of this tricky construction the bytewise extraction we 11695 * use for the other long double formats doesn't work, we must 11696 * extract the values bit by bit. 11697 * 11698 * The little-endian double-double is used .. somewhere? 11699 * 11700 * The big endian double-double is used in e.g. PPC/Power (AIX) 11701 * and MIPS (SGI). 11702 * 11703 * The mantissa bits are in two separate stretches, e.g. for -0.1L: 11704 * 9a 99 99 99 99 99 59 bc 9a 99 99 99 99 99 b9 3f (LE) 11705 * 3f b9 99 99 99 99 99 9a bc 59 99 99 99 99 99 9a (BE) 11706 */ 11707 # endif 11708 #else /* #if defined(USE_LONG_DOUBLE) && (NVSIZE > DOUBLESIZE) */ 11709 /* Using normal doubles, not long doubles. 11710 * 11711 * We generate 4-bit xdigits (nybble/nibble) instead of 8-bit 11712 * bytes, since we might need to handle printf precision, and 11713 * also need to insert the radix. */ 11714 # if NVSIZE == 8 11715 # ifdef HEXTRACT_LITTLE_ENDIAN 11716 /* 0 1 2 3 4 5 6 7 (MSB = 7, LSB = 0, 6+7 = exponent+sign) */ 11717 const U8* nvp = (const U8*)(&nv); 11718 HEXTRACT_GET_SUBNORMAL(nv); 11719 HEXTRACT_IMPLICIT_BIT(nv); 11720 HEXTRACT_TOP_NYBBLE(6); 11721 HEXTRACT_BYTES_LE(5, 0); 11722 # elif defined(HEXTRACT_BIG_ENDIAN) 11723 /* 7 6 5 4 3 2 1 0 (MSB = 7, LSB = 0, 6+7 = exponent+sign) */ 11724 const U8* nvp = (const U8*)(&nv); 11725 HEXTRACT_GET_SUBNORMAL(nv); 11726 HEXTRACT_IMPLICIT_BIT(nv); 11727 HEXTRACT_TOP_NYBBLE(1); 11728 HEXTRACT_BYTES_BE(2, 7); 11729 # elif DOUBLEKIND == DOUBLE_IS_IEEE_754_64_BIT_MIXED_ENDIAN_LE_BE 11730 /* 4 5 6 7 0 1 2 3 (MSB = 7, LSB = 0, 6:7 = nybble:exponent:sign) */ 11731 const U8* nvp = (const U8*)(&nv); 11732 HEXTRACT_GET_SUBNORMAL(nv); 11733 HEXTRACT_IMPLICIT_BIT(nv); 11734 HEXTRACT_TOP_NYBBLE(2); /* 6 */ 11735 HEXTRACT_BYTE(1); /* 5 */ 11736 HEXTRACT_BYTE(0); /* 4 */ 11737 HEXTRACT_BYTE(7); /* 3 */ 11738 HEXTRACT_BYTE(6); /* 2 */ 11739 HEXTRACT_BYTE(5); /* 1 */ 11740 HEXTRACT_BYTE(4); /* 0 */ 11741 # elif DOUBLEKIND == DOUBLE_IS_IEEE_754_64_BIT_MIXED_ENDIAN_BE_LE 11742 /* 3 2 1 0 7 6 5 4 (MSB = 7, LSB = 0, 7:6 = sign:exponent:nybble) */ 11743 const U8* nvp = (const U8*)(&nv); 11744 HEXTRACT_GET_SUBNORMAL(nv); 11745 HEXTRACT_IMPLICIT_BIT(nv); 11746 HEXTRACT_TOP_NYBBLE(5); /* 6 */ 11747 HEXTRACT_BYTE(6); /* 5 */ 11748 HEXTRACT_BYTE(7); /* 4 */ 11749 HEXTRACT_BYTE(0); /* 3 */ 11750 HEXTRACT_BYTE(1); /* 2 */ 11751 HEXTRACT_BYTE(2); /* 1 */ 11752 HEXTRACT_BYTE(3); /* 0 */ 11753 # else 11754 # define HEXTRACT_FALLBACK 11755 # endif 11756 # else 11757 # define HEXTRACT_FALLBACK 11758 # endif 11759 #endif /* #if defined(USE_LONG_DOUBLE) && (NVSIZE > DOUBLESIZE) #else */ 11760 11761 #ifdef HEXTRACT_FALLBACK 11762 HEXTRACT_GET_SUBNORMAL(nv); 11763 # undef HEXTRACT_HAS_TOP_NYBBLE /* Meaningless, but consistent. */ 11764 /* The fallback is used for the double-double format, and 11765 * for unknown long double formats, and for unknown double 11766 * formats, or in general unknown NV formats. */ 11767 if (nv == (NV)0.0) { 11768 if (vend) 11769 *v++ = 0; 11770 else 11771 v++; 11772 *exponent = 0; 11773 } 11774 else { 11775 NV d = nv < 0 ? -nv : nv; 11776 NV e = (NV)1.0; 11777 U8 ha = 0x0; /* hexvalue accumulator */ 11778 U8 hd = 0x8; /* hexvalue digit */ 11779 11780 /* Shift d and e (and update exponent) so that e <= d < 2*e, 11781 * this is essentially manual frexp(). Multiplying by 0.5 and 11782 * doubling should be lossless in binary floating point. */ 11783 11784 *exponent = 1; 11785 11786 while (e > d) { 11787 e *= (NV)0.5; 11788 (*exponent)--; 11789 } 11790 /* Now d >= e */ 11791 11792 while (d >= e + e) { 11793 e += e; 11794 (*exponent)++; 11795 } 11796 /* Now e <= d < 2*e */ 11797 11798 /* First extract the leading hexdigit (the implicit bit). */ 11799 if (d >= e) { 11800 d -= e; 11801 if (vend) 11802 *v++ = 1; 11803 else 11804 v++; 11805 } 11806 else { 11807 if (vend) 11808 *v++ = 0; 11809 else 11810 v++; 11811 } 11812 e *= (NV)0.5; 11813 11814 /* Then extract the remaining hexdigits. */ 11815 while (d > (NV)0.0) { 11816 if (d >= e) { 11817 ha |= hd; 11818 d -= e; 11819 } 11820 if (hd == 1) { 11821 /* Output or count in groups of four bits, 11822 * that is, when the hexdigit is down to one. */ 11823 if (vend) 11824 *v++ = ha; 11825 else 11826 v++; 11827 /* Reset the hexvalue. */ 11828 ha = 0x0; 11829 hd = 0x8; 11830 } 11831 else 11832 hd >>= 1; 11833 e *= (NV)0.5; 11834 } 11835 11836 /* Flush possible pending hexvalue. */ 11837 if (ha) { 11838 if (vend) 11839 *v++ = ha; 11840 else 11841 v++; 11842 } 11843 } 11844 #endif 11845 } 11846 /* Croak for various reasons: if the output pointer escaped the 11847 * output buffer, if the extraction index escaped the extraction 11848 * buffer, or if the ending output pointer didn't match the 11849 * previously computed value. */ 11850 if (v <= vhex || v - vhex >= VHEX_SIZE || 11851 /* For double-double the ixmin and ixmax stay at zero, 11852 * which is convenient since the HEXTRACTSIZE is tricky 11853 * for double-double. */ 11854 ixmin < 0 || ixmax >= NVSIZE || 11855 (vend && v != vend)) { 11856 /* diag_listed_as: Hexadecimal float: internal error (%s) */ 11857 Perl_croak(aTHX_ "Hexadecimal float: internal error (overflow)"); 11858 } 11859 return v; 11860 } 11861 11862 11863 /* S_format_hexfp(): helper function for Perl_sv_vcatpvfn_flags(). 11864 * 11865 * Processes the %a/%A hexadecimal floating-point format, since the 11866 * built-in snprintf()s which are used for most of the f/p formats, don't 11867 * universally handle %a/%A. 11868 * Populates buf of length bufsize, and returns the length of the created 11869 * string. 11870 * The rest of the args have the same meaning as the local vars of the 11871 * same name within Perl_sv_vcatpvfn_flags(). 11872 * 11873 * The caller's determination of IN_LC(LC_NUMERIC), passed as in_lc_numeric, 11874 * is used to ensure we do the right thing when we need to access the locale's 11875 * numeric radix. 11876 * 11877 * It requires the caller to make buf large enough. 11878 */ 11879 11880 static STRLEN 11881 S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c, 11882 const NV nv, const vcatpvfn_long_double_t fv, 11883 bool has_precis, STRLEN precis, STRLEN width, 11884 bool alt, char plus, bool left, bool fill, bool in_lc_numeric) 11885 { 11886 /* Hexadecimal floating point. */ 11887 char* p = buf; 11888 U8 vhex[VHEX_SIZE]; 11889 U8* v = vhex; /* working pointer to vhex */ 11890 U8* vend; /* pointer to one beyond last digit of vhex */ 11891 U8* vfnz = NULL; /* first non-zero */ 11892 U8* vlnz = NULL; /* last non-zero */ 11893 U8* v0 = NULL; /* first output */ 11894 const bool lower = (c == 'a'); 11895 /* At output the values of vhex (up to vend) will 11896 * be mapped through the xdig to get the actual 11897 * human-readable xdigits. */ 11898 const char* xdig = PL_hexdigit; 11899 STRLEN zerotail = 0; /* how many extra zeros to append */ 11900 int exponent = 0; /* exponent of the floating point input */ 11901 bool hexradix = FALSE; /* should we output the radix */ 11902 bool subnormal = FALSE; /* IEEE 754 subnormal/denormal */ 11903 bool negative = FALSE; 11904 STRLEN elen; 11905 11906 /* XXX: NaN, Inf -- though they are printed as "NaN" and "Inf". 11907 * 11908 * For example with denormals, (assuming the vanilla 11909 * 64-bit double): the exponent is zero. 1xp-1074 is 11910 * the smallest denormal and the smallest double, it 11911 * could be output also as 0x0.0000000000001p-1022 to 11912 * match its internal structure. */ 11913 11914 vend = S_hextract(aTHX_ nv, &exponent, &subnormal, vhex, NULL); 11915 S_hextract(aTHX_ nv, &exponent, &subnormal, vhex, vend); 11916 11917 #if NVSIZE > DOUBLESIZE 11918 # ifdef HEXTRACT_HAS_IMPLICIT_BIT 11919 /* In this case there is an implicit bit, 11920 * and therefore the exponent is shifted by one. */ 11921 exponent--; 11922 # elif defined(NV_X86_80_BIT) 11923 if (subnormal) { 11924 /* The subnormals of the x86-80 have a base exponent of -16382, 11925 * (while the physical exponent bits are zero) but the frexp() 11926 * returned the scientific-style floating exponent. We want 11927 * to map the last one as: 11928 * -16831..-16384 -> -16382 (the last normal is 0x1p-16382) 11929 * -16835..-16388 -> -16384 11930 * since we want to keep the first hexdigit 11931 * as one of the [8421]. */ 11932 exponent = -4 * ( (exponent + 1) / -4) - 2; 11933 } else { 11934 exponent -= 4; 11935 } 11936 /* TBD: other non-implicit-bit platforms than the x86-80. */ 11937 # endif 11938 #endif 11939 11940 negative = fv < 0 || Perl_signbit(nv); 11941 if (negative) 11942 *p++ = '-'; 11943 else if (plus) 11944 *p++ = plus; 11945 *p++ = '0'; 11946 if (lower) { 11947 *p++ = 'x'; 11948 } 11949 else { 11950 *p++ = 'X'; 11951 xdig += 16; /* Use uppercase hex. */ 11952 } 11953 11954 /* Find the first non-zero xdigit. */ 11955 for (v = vhex; v < vend; v++) { 11956 if (*v) { 11957 vfnz = v; 11958 break; 11959 } 11960 } 11961 11962 if (vfnz) { 11963 /* Find the last non-zero xdigit. */ 11964 for (v = vend - 1; v >= vhex; v--) { 11965 if (*v) { 11966 vlnz = v; 11967 break; 11968 } 11969 } 11970 11971 #if NVSIZE == DOUBLESIZE 11972 if (fv != 0.0) 11973 exponent--; 11974 #endif 11975 11976 if (subnormal) { 11977 #ifndef NV_X86_80_BIT 11978 if (vfnz[0] > 1) { 11979 /* IEEE 754 subnormals (but not the x86 80-bit): 11980 * we want "normalize" the subnormal, 11981 * so we need to right shift the hex nybbles 11982 * so that the output of the subnormal starts 11983 * from the first true bit. (Another, equally 11984 * valid, policy would be to dump the subnormal 11985 * nybbles as-is, to display the "physical" layout.) */ 11986 int i, n; 11987 U8 *vshr; 11988 /* Find the ceil(log2(v[0])) of 11989 * the top non-zero nybble. */ 11990 for (i = vfnz[0], n = 0; i > 1; i >>= 1, n++) { } 11991 assert(n < 4); 11992 assert(vlnz); 11993 vlnz[1] = 0; 11994 for (vshr = vlnz; vshr >= vfnz; vshr--) { 11995 vshr[1] |= (vshr[0] & (0xF >> (4 - n))) << (4 - n); 11996 vshr[0] >>= n; 11997 } 11998 if (vlnz[1]) { 11999 vlnz++; 12000 } 12001 } 12002 #endif 12003 v0 = vfnz; 12004 } else { 12005 v0 = vhex; 12006 } 12007 12008 if (has_precis) { 12009 U8* ve = (subnormal ? vlnz + 1 : vend); 12010 SSize_t vn = ve - v0; 12011 assert(vn >= 1); 12012 if (precis < (Size_t)(vn - 1)) { 12013 bool overflow = FALSE; 12014 if (v0[precis + 1] < 0x8) { 12015 /* Round down, nothing to do. */ 12016 } else if (v0[precis + 1] > 0x8) { 12017 /* Round up. */ 12018 v0[precis]++; 12019 overflow = v0[precis] > 0xF; 12020 v0[precis] &= 0xF; 12021 } else { /* v0[precis] == 0x8 */ 12022 /* Half-point: round towards the one 12023 * with the even least-significant digit: 12024 * 08 -> 0 88 -> 8 12025 * 18 -> 2 98 -> a 12026 * 28 -> 2 a8 -> a 12027 * 38 -> 4 b8 -> c 12028 * 48 -> 4 c8 -> c 12029 * 58 -> 6 d8 -> e 12030 * 68 -> 6 e8 -> e 12031 * 78 -> 8 f8 -> 10 */ 12032 if ((v0[precis] & 0x1)) { 12033 v0[precis]++; 12034 } 12035 overflow = v0[precis] > 0xF; 12036 v0[precis] &= 0xF; 12037 } 12038 12039 if (overflow) { 12040 for (v = v0 + precis - 1; v >= v0; v--) { 12041 (*v)++; 12042 overflow = *v > 0xF; 12043 (*v) &= 0xF; 12044 if (!overflow) { 12045 break; 12046 } 12047 } 12048 if (v == v0 - 1 && overflow) { 12049 /* If the overflow goes all the 12050 * way to the front, we need to 12051 * insert 0x1 in front, and adjust 12052 * the exponent. */ 12053 Move(v0, v0 + 1, vn - 1, char); 12054 *v0 = 0x1; 12055 exponent += 4; 12056 } 12057 } 12058 12059 /* The new effective "last non zero". */ 12060 vlnz = v0 + precis; 12061 } 12062 else { 12063 zerotail = 12064 subnormal ? precis - vn + 1 : 12065 precis - (vlnz - vhex); 12066 } 12067 } 12068 12069 v = v0; 12070 *p++ = xdig[*v++]; 12071 12072 /* If there are non-zero xdigits, the radix 12073 * is output after the first one. */ 12074 if (vfnz < vlnz) { 12075 hexradix = TRUE; 12076 } 12077 } 12078 else { 12079 *p++ = '0'; 12080 exponent = 0; 12081 zerotail = has_precis ? precis : 0; 12082 } 12083 12084 /* The radix is always output if precis, or if alt. */ 12085 if ((has_precis && precis > 0) || alt) { 12086 hexradix = TRUE; 12087 } 12088 12089 if (hexradix) { 12090 #ifndef USE_LOCALE_NUMERIC 12091 PERL_UNUSED_ARG(in_lc_numeric); 12092 12093 *p++ = '.'; 12094 #else 12095 if (in_lc_numeric) { 12096 STRLEN n; 12097 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(TRUE, { 12098 const char* r = SvPV(PL_numeric_radix_sv, n); 12099 Copy(r, p, n, char); 12100 }); 12101 p += n; 12102 } 12103 else { 12104 *p++ = '.'; 12105 } 12106 #endif 12107 } 12108 12109 if (vlnz) { 12110 while (v <= vlnz) 12111 *p++ = xdig[*v++]; 12112 } 12113 12114 if (zerotail > 0) { 12115 while (zerotail--) { 12116 *p++ = '0'; 12117 } 12118 } 12119 12120 elen = p - buf; 12121 12122 /* sanity checks */ 12123 if (elen >= bufsize || width >= bufsize) 12124 /* diag_listed_as: Hexadecimal float: internal error (%s) */ 12125 Perl_croak(aTHX_ "Hexadecimal float: internal error (overflow)"); 12126 12127 elen += my_snprintf(p, bufsize - elen, 12128 "%c%+d", lower ? 'p' : 'P', 12129 exponent); 12130 12131 if (elen < width) { 12132 STRLEN gap = (STRLEN)(width - elen); 12133 if (left) { 12134 /* Pad the back with spaces. */ 12135 memset(buf + elen, ' ', gap); 12136 } 12137 else if (fill) { 12138 /* Insert the zeros after the "0x" and the 12139 * the potential sign, but before the digits, 12140 * otherwise we end up with "0000xH.HHH...", 12141 * when we want "0x000H.HHH..." */ 12142 STRLEN nzero = gap; 12143 char* zerox = buf + 2; 12144 STRLEN nmove = elen - 2; 12145 if (negative || plus) { 12146 zerox++; 12147 nmove--; 12148 } 12149 Move(zerox, zerox + nzero, nmove, char); 12150 memset(zerox, fill ? '0' : ' ', nzero); 12151 } 12152 else { 12153 /* Move it to the right. */ 12154 Move(buf, buf + gap, 12155 elen, char); 12156 /* Pad the front with spaces. */ 12157 memset(buf, ' ', gap); 12158 } 12159 elen = width; 12160 } 12161 return elen; 12162 } 12163 12164 /* 12165 =for apidoc sv_vcatpvfn 12166 =for apidoc_item sv_vcatpvfn_flags 12167 12168 These process their arguments like C<L<vsprintf(3)>> and append the formatted output 12169 to an SV. They use an array of SVs if the C-style variable argument list is 12170 missing (C<NULL>). Argument reordering (using format specifiers like C<%2$d> or 12171 C<%*2$d>) is supported only when using an array of SVs; using a C-style 12172 C<va_list> argument list with a format string that uses argument reordering 12173 will yield an exception. 12174 12175 When running with taint checks enabled, they indicate via C<maybe_tainted> if 12176 results are untrustworthy (often due to the use of locales). 12177 12178 They assume that C<pat> has the same utf8-ness as C<sv>. It's the caller's 12179 responsibility to ensure that this is so. 12180 12181 They differ in that C<sv_vcatpvfn_flags> has a C<flags> parameter in which you 12182 can set or clear the C<SV_GMAGIC> and/or S<SV_SMAGIC> flags, to specify which 12183 magic to handle or not handle; whereas plain C<sv_vcatpvfn> always specifies 12184 both 'get' and 'set' magic. 12185 12186 They are usually used via one of the frontends L</C<sv_vcatpvf>> and 12187 L</C<sv_vcatpvf_mg>>. 12188 12189 =cut 12190 */ 12191 12192 12193 void 12194 Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, 12195 va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted, 12196 const U32 flags) 12197 { 12198 const char *fmtstart; /* character following the current '%' */ 12199 const char *q; /* current position within format */ 12200 const char *patend; 12201 STRLEN origlen; 12202 Size_t svix = 0; 12203 static const char nullstr[] = "(null)"; 12204 bool has_utf8 = DO_UTF8(sv); /* has the result utf8? */ 12205 const bool pat_utf8 = has_utf8; /* the pattern is in utf8? */ 12206 /* Times 4: a decimal digit takes more than 3 binary digits. 12207 * NV_DIG: mantissa takes that many decimal digits. 12208 * Plus 32: Playing safe. */ 12209 char ebuf[IV_DIG * 4 + NV_DIG + 32]; 12210 bool no_redundant_warning = FALSE; /* did we use any explicit format parameter index? */ 12211 #ifdef USE_LOCALE_NUMERIC 12212 bool have_in_lc_numeric = FALSE; 12213 #endif 12214 /* we never change this unless USE_LOCALE_NUMERIC */ 12215 bool in_lc_numeric = FALSE; 12216 SV *tmp_sv = NULL; 12217 12218 PERL_ARGS_ASSERT_SV_VCATPVFN_FLAGS; 12219 PERL_UNUSED_ARG(maybe_tainted); 12220 12221 if (flags & SV_GMAGIC) 12222 SvGETMAGIC(sv); 12223 12224 /* no matter what, this is a string now */ 12225 (void)SvPV_force_nomg(sv, origlen); 12226 12227 /* the code that scans for flags etc following a % relies on 12228 * a '\0' being present to avoid falling off the end. Ideally that 12229 * should be fixed */ 12230 assert(pat[patlen] == '\0'); 12231 12232 12233 /* Special-case "", "%s", "%-p" (SVf - see below) and "%.0f". 12234 * In each case, if there isn't the correct number of args, instead 12235 * fall through to the main code to handle the issuing of any 12236 * warnings etc. 12237 */ 12238 12239 if (patlen == 0 && (args || sv_count == 0)) 12240 return; 12241 12242 if (patlen <= 4 && pat[0] == '%' && (args || sv_count == 1)) { 12243 12244 /* "%s" */ 12245 if (patlen == 2 && pat[1] == 's') { 12246 if (args) { 12247 const char * const s = va_arg(*args, char*); 12248 sv_catpv_nomg(sv, s ? s : nullstr); 12249 } 12250 else { 12251 /* we want get magic on the source but not the target. 12252 * sv_catsv can't do that, though */ 12253 SvGETMAGIC(*svargs); 12254 sv_catsv_nomg(sv, *svargs); 12255 } 12256 return; 12257 } 12258 12259 /* "%-p" */ 12260 if (args) { 12261 if (patlen == 3 && pat[1] == '-' && pat[2] == 'p') { 12262 SV *asv = MUTABLE_SV(va_arg(*args, void*)); 12263 sv_catsv_nomg(sv, asv); 12264 return; 12265 } 12266 } 12267 #if !defined(USE_LONG_DOUBLE) && !defined(USE_QUADMATH) 12268 /* special-case "%.0f" */ 12269 else if ( patlen == 4 12270 && pat[1] == '.' && pat[2] == '0' && pat[3] == 'f') 12271 { 12272 const NV nv = SvNV(*svargs); 12273 if (LIKELY(!Perl_isinfnan(nv))) { 12274 STRLEN l; 12275 char *p; 12276 12277 if ((p = F0convert(nv, ebuf + sizeof ebuf, &l))) { 12278 sv_catpvn_nomg(sv, p, l); 12279 return; 12280 } 12281 } 12282 } 12283 #endif /* !USE_LONG_DOUBLE */ 12284 } 12285 12286 12287 patend = (char*)pat + patlen; 12288 for (fmtstart = pat; fmtstart < patend; fmtstart = q) { 12289 char intsize = 0; /* size qualifier in "%hi..." etc */ 12290 bool alt = FALSE; /* has "%#..." */ 12291 bool left = FALSE; /* has "%-..." */ 12292 bool fill = FALSE; /* has "%0..." */ 12293 char plus = 0; /* has "%+..." */ 12294 STRLEN width = 0; /* value of "%NNN..." */ 12295 bool has_precis = FALSE; /* has "%.NNN..." */ 12296 STRLEN precis = 0; /* value of "%.NNN..." */ 12297 int base = 0; /* base to print in, e.g. 8 for %o */ 12298 UV uv = 0; /* the value to print of int-ish args */ 12299 12300 bool vectorize = FALSE; /* has "%v..." */ 12301 bool vec_utf8 = FALSE; /* SvUTF8(vec arg) */ 12302 const U8 *vecstr = NULL; /* SvPVX(vec arg) */ 12303 STRLEN veclen = 0; /* SvCUR(vec arg) */ 12304 const char *dotstr = NULL; /* separator string for %v */ 12305 STRLEN dotstrlen; /* length of separator string for %v */ 12306 12307 Size_t efix = 0; /* explicit format parameter index */ 12308 const Size_t osvix = svix; /* original index in case of bad fmt */ 12309 12310 SV *argsv = NULL; 12311 bool is_utf8 = FALSE; /* is this item utf8? */ 12312 bool arg_missing = FALSE; /* give "Missing argument" warning */ 12313 char esignbuf[4]; /* holds sign prefix, e.g. "-0x" */ 12314 STRLEN esignlen = 0; /* length of e.g. "-0x" */ 12315 STRLEN zeros = 0; /* how many '0' to prepend */ 12316 12317 const char *eptr = NULL; /* the address of the element string */ 12318 STRLEN elen = 0; /* the length of the element string */ 12319 12320 char c; /* the actual format ('d', s' etc) */ 12321 12322 bool escape_it = FALSE; /* if this is a string should we quote and escape it? */ 12323 12324 12325 /* echo everything up to the next format specification */ 12326 for (q = fmtstart; q < patend && *q != '%'; ++q) 12327 {}; 12328 12329 if (q > fmtstart) { 12330 if (has_utf8 && !pat_utf8) { 12331 /* upgrade and copy the bytes of fmtstart..q-1 to utf8 on 12332 * the fly */ 12333 const char *p; 12334 char *dst; 12335 STRLEN need = SvCUR(sv) + (q - fmtstart) + 1; 12336 12337 for (p = fmtstart; p < q; p++) 12338 if (!NATIVE_BYTE_IS_INVARIANT(*p)) 12339 need++; 12340 SvGROW(sv, need); 12341 12342 dst = SvEND(sv); 12343 for (p = fmtstart; p < q; p++) 12344 append_utf8_from_native_byte((U8)*p, (U8**)&dst); 12345 *dst = '\0'; 12346 SvCUR_set(sv, need - 1); 12347 } 12348 else 12349 S_sv_catpvn_simple(aTHX_ sv, fmtstart, q - fmtstart); 12350 } 12351 if (q++ >= patend) 12352 break; 12353 12354 fmtstart = q; /* fmtstart is char following the '%' */ 12355 12356 /* 12357 We allow format specification elements in this order: 12358 \d+\$ explicit format parameter index 12359 [-+ 0#]+ flags 12360 v|\*(\d+\$)?v vector with optional (optionally specified) arg 12361 0 flag (as above): repeated to allow "v02" 12362 \d+|\*(\d+\$)? width using optional (optionally specified) arg 12363 \.(\d*|\*(\d+\$)?) precision using optional (optionally specified) arg 12364 [hlqLV] size 12365 [%bcdefginopsuxDFOUX] format (mandatory) 12366 */ 12367 12368 if (inRANGE(*q, '1', '9')) { 12369 width = expect_number(&q); 12370 if (*q == '$') { 12371 if (args) 12372 Perl_croak_nocontext( 12373 "Cannot yet reorder sv_vcatpvfn() arguments from va_list"); 12374 ++q; 12375 efix = (Size_t)width; 12376 width = 0; 12377 no_redundant_warning = TRUE; 12378 } else { 12379 goto gotwidth; 12380 } 12381 } 12382 12383 /* FLAGS */ 12384 12385 while (*q) { 12386 switch (*q) { 12387 case ' ': 12388 case '+': 12389 if (plus == '+' && *q == ' ') /* '+' over ' ' */ 12390 q++; 12391 else 12392 plus = *q++; 12393 continue; 12394 12395 case '-': 12396 left = TRUE; 12397 q++; 12398 continue; 12399 12400 case '0': 12401 fill = TRUE; 12402 q++; 12403 continue; 12404 12405 case '#': 12406 alt = TRUE; 12407 q++; 12408 continue; 12409 12410 default: 12411 break; 12412 } 12413 break; 12414 } 12415 12416 /* at this point we can expect one of: 12417 * 12418 * 123 an explicit width 12419 * * width taken from next arg 12420 * *12$ width taken from 12th arg 12421 * or no width 12422 * 12423 * But any width specification may be preceded by a v, in one of its 12424 * forms: 12425 * v 12426 * *v 12427 * *12$v 12428 * So an asterisk may be either a width specifier or a vector 12429 * separator arg specifier, and we don't know which initially 12430 */ 12431 12432 tryasterisk: 12433 if (*q == '*') { 12434 STRLEN ix; /* explicit width/vector separator index */ 12435 q++; 12436 if (inRANGE(*q, '1', '9')) { 12437 ix = expect_number(&q); 12438 if (*q++ == '$') { 12439 if (args) 12440 Perl_croak_nocontext( 12441 "Cannot yet reorder sv_vcatpvfn() arguments from va_list"); 12442 no_redundant_warning = TRUE; 12443 } else 12444 goto unknown; 12445 } 12446 else 12447 ix = 0; 12448 12449 if (*q == 'v') { 12450 SV *vecsv; 12451 /* The asterisk was for *v, *NNN$v: vectorizing, but not 12452 * with the default "." */ 12453 q++; 12454 if (vectorize) 12455 goto unknown; 12456 if (args) 12457 vecsv = va_arg(*args, SV*); 12458 else { 12459 ix = ix ? ix - 1 : svix++; 12460 vecsv = ix < sv_count ? svargs[ix] 12461 : (arg_missing = TRUE, &PL_sv_no); 12462 } 12463 dotstr = SvPV_const(vecsv, dotstrlen); 12464 /* Keep the DO_UTF8 test *after* the SvPV call, else things go 12465 bad with tied or overloaded values that return UTF8. */ 12466 if (DO_UTF8(vecsv)) 12467 is_utf8 = TRUE; 12468 else if (has_utf8) { 12469 vecsv = sv_mortalcopy(vecsv); 12470 sv_utf8_upgrade(vecsv); 12471 dotstr = SvPV_const(vecsv, dotstrlen); 12472 is_utf8 = TRUE; 12473 } 12474 vectorize = TRUE; 12475 goto tryasterisk; 12476 } 12477 12478 /* the asterisk specified a width */ 12479 { 12480 int i = 0; 12481 SV *width_sv = NULL; 12482 if (args) 12483 i = va_arg(*args, int); 12484 else { 12485 ix = ix ? ix - 1 : svix++; 12486 width_sv = (ix < sv_count) ? svargs[ix] 12487 : (arg_missing = TRUE, (SV*)NULL); 12488 } 12489 width = S_sprintf_arg_num_val(aTHX_ args, i, width_sv, &left); 12490 } 12491 } 12492 else if (*q == 'v') { 12493 q++; 12494 if (vectorize) 12495 goto unknown; 12496 vectorize = TRUE; 12497 dotstr = "."; 12498 dotstrlen = 1; 12499 goto tryasterisk; 12500 12501 } 12502 else { 12503 /* explicit width? */ 12504 if(*q == '0') { 12505 fill = TRUE; 12506 q++; 12507 } 12508 if (inRANGE(*q, '1', '9')) 12509 width = expect_number(&q); 12510 } 12511 12512 gotwidth: 12513 12514 /* PRECISION */ 12515 12516 if (*q == '.') { 12517 q++; 12518 if (*q == '*') { 12519 STRLEN ix; /* explicit precision index */ 12520 q++; 12521 if (inRANGE(*q, '1', '9')) { 12522 ix = expect_number(&q); 12523 if (*q++ == '$') { 12524 if (args) 12525 Perl_croak_nocontext( 12526 "Cannot yet reorder sv_vcatpvfn() arguments from va_list"); 12527 no_redundant_warning = TRUE; 12528 } else 12529 goto unknown; 12530 } 12531 else 12532 ix = 0; 12533 12534 { 12535 int i = 0; 12536 SV *width_sv = NULL; 12537 bool neg = FALSE; 12538 12539 if (args) 12540 i = va_arg(*args, int); 12541 else { 12542 ix = ix ? ix - 1 : svix++; 12543 width_sv = (ix < sv_count) ? svargs[ix] 12544 : (arg_missing = TRUE, (SV*)NULL); 12545 } 12546 precis = S_sprintf_arg_num_val(aTHX_ args, i, width_sv, &neg); 12547 has_precis = !neg; 12548 /* ignore negative precision */ 12549 if (!has_precis) 12550 precis = 0; 12551 } 12552 } 12553 else { 12554 /* although it doesn't seem documented, this code has long 12555 * behaved so that: 12556 * no digits following the '.' is treated like '.0' 12557 * the number may be preceded by any number of zeroes, 12558 * e.g. "%.0001f", which is the same as "%.1f" 12559 * so I've kept that behaviour. DAPM May 2017 12560 */ 12561 while (*q == '0') 12562 q++; 12563 precis = inRANGE(*q, '1', '9') ? expect_number(&q) : 0; 12564 has_precis = TRUE; 12565 } 12566 } 12567 12568 /* SIZE */ 12569 12570 switch (*q) { 12571 #ifdef WIN32 12572 case 'I': /* Ix, I32x, and I64x */ 12573 # ifdef USE_64_BIT_INT 12574 if (q[1] == '6' && q[2] == '4') { 12575 q += 3; 12576 intsize = 'q'; 12577 break; 12578 } 12579 # endif 12580 if (q[1] == '3' && q[2] == '2') { 12581 q += 3; 12582 break; 12583 } 12584 # ifdef USE_64_BIT_INT 12585 intsize = 'q'; 12586 # endif 12587 q++; 12588 break; 12589 #endif 12590 #if (IVSIZE >= 8 || defined(HAS_LONG_DOUBLE)) || \ 12591 (IVSIZE == 4 && !defined(HAS_LONG_DOUBLE)) 12592 case 'L': /* Ld */ 12593 /* FALLTHROUGH */ 12594 # if IVSIZE >= 8 12595 case 'q': /* qd */ 12596 # endif 12597 intsize = 'q'; 12598 q++; 12599 break; 12600 #endif 12601 case 'l': 12602 ++q; 12603 #if (IVSIZE >= 8 || defined(HAS_LONG_DOUBLE)) || \ 12604 (IVSIZE == 4 && !defined(HAS_LONG_DOUBLE)) 12605 if (*q == 'l') { /* lld, llf */ 12606 intsize = 'q'; 12607 ++q; 12608 } 12609 else 12610 #endif 12611 intsize = 'l'; 12612 break; 12613 case 'h': 12614 if (*++q == 'h') { /* hhd, hhu */ 12615 intsize = 'c'; 12616 ++q; 12617 } 12618 else 12619 intsize = 'h'; 12620 break; 12621 #ifdef USE_QUADMATH 12622 case 'Q': 12623 #endif 12624 case 'V': 12625 case 'z': 12626 case 't': 12627 case 'j': 12628 intsize = *q++; 12629 break; 12630 } 12631 12632 /* CONVERSION */ 12633 12634 c = *q++; /* c now holds the conversion type */ 12635 12636 /* '%' doesn't have an arg, so skip arg processing */ 12637 if (c == '%') { 12638 eptr = q - 1; 12639 elen = 1; 12640 if (vectorize) 12641 goto unknown; 12642 goto string; 12643 } 12644 12645 if (vectorize && !memCHRs("BbDdiOouUXx", c)) 12646 goto unknown; 12647 12648 /* get next arg (individual branches do their own va_arg() 12649 * handling for the args case) */ 12650 12651 if (!args) { 12652 efix = efix ? efix - 1 : svix++; 12653 argsv = efix < sv_count ? svargs[efix] 12654 : (arg_missing = TRUE, &PL_sv_no); 12655 } 12656 12657 12658 switch (c) { 12659 12660 /* STRINGS */ 12661 12662 case 's': 12663 if (args) { 12664 eptr = va_arg(*args, char*); 12665 if (eptr) 12666 if (has_precis) 12667 elen = my_strnlen(eptr, precis); 12668 else 12669 elen = strlen(eptr); 12670 else { 12671 eptr = (char *)nullstr; 12672 elen = sizeof nullstr - 1; 12673 } 12674 } 12675 else { 12676 eptr = SvPV_const(argsv, elen); 12677 if (DO_UTF8(argsv)) { 12678 STRLEN old_precis = precis; 12679 if (has_precis && precis < elen) { 12680 STRLEN ulen = sv_or_pv_len_utf8(argsv, eptr, elen); 12681 STRLEN p = precis > ulen ? ulen : precis; 12682 precis = sv_or_pv_pos_u2b(argsv, eptr, p, 0); 12683 /* sticks at end */ 12684 } 12685 if (width) { /* fudge width (can't fudge elen) */ 12686 if (has_precis && precis < elen) 12687 width += precis - old_precis; 12688 else 12689 width += 12690 elen - sv_or_pv_len_utf8(argsv,eptr,elen); 12691 } 12692 is_utf8 = TRUE; 12693 } 12694 } 12695 12696 string: 12697 if (escape_it) { 12698 U32 flags = PERL_PV_PRETTY_QUOTEDPREFIX; 12699 if (is_utf8) 12700 flags |= PERL_PV_ESCAPE_UNI; 12701 12702 if (!tmp_sv) { 12703 /* "blah"... where blah might be made up 12704 * of characters like \x{1234} */ 12705 tmp_sv = newSV(1 + (PERL_QUOTEDPREFIX_LEN * 8) + 1 + 3); 12706 sv_2mortal(tmp_sv); 12707 } 12708 pv_pretty(tmp_sv, eptr, elen, PERL_QUOTEDPREFIX_LEN, 12709 NULL, NULL, flags); 12710 eptr = SvPV_const(tmp_sv, elen); 12711 } 12712 if (has_precis && precis < elen) 12713 elen = precis; 12714 break; 12715 12716 /* INTEGERS */ 12717 12718 case 'p': 12719 12720 /* BEGIN NOTE 12721 * 12722 * We want to extend the C level sprintf format API with 12723 * custom formats for specific types (eg SV*) and behavior. 12724 * However some C compilers are "sprintf aware" and will 12725 * throw compile time exceptions when an illegal sprintf is 12726 * encountered, so we can't just add new format letters. 12727 * 12728 * However it turns out the length argument to the %p format 12729 * is more or less useless (the size of a pointer does not 12730 * change over time) and is not really used in the C level 12731 * code. Accordingly we can map our special behavior to 12732 * specific "length" options to the %p format. We hide these 12733 * mappings behind defines anyway, so nobody needs to know 12734 * that HEKf is actually %2p. This keeps the C compiler 12735 * happy while allowing us to add new formats. 12736 * 12737 * Note the existing logic for which number is used for what 12738 * is torturous. All negative values are used for SVf, and 12739 * non-negative values have arbitrary meanings with no 12740 * structure to them. This may change in the future. 12741 * 12742 * NEVER use the raw %p values directly. Always use the define 12743 * as the underlying mapping may change in the future. 12744 * 12745 * END NOTE 12746 * 12747 * %p extensions: 12748 * 12749 * "%...p" is normally treated like "%...x", except that the 12750 * number to print is the SV's address (or a pointer address 12751 * for C-ish sprintf). 12752 * 12753 * However, the C-ish sprintf variant allows a few special 12754 * extensions. These are currently: 12755 * 12756 * %-p (SVf) Like %s, but gets the string from an SV* 12757 * arg rather than a char* arg. Use C<SVfARG()> 12758 * to set up the argument properly. 12759 * (This was previously %_). 12760 * 12761 * %-<num>p Ditto but like %.<num>s (i.e. num is max 12762 * width), there is no escaped and quoted version 12763 * of this. 12764 * 12765 * %1p (PVf_QUOTEDPREFIX). Like raw %s, but it is escaped 12766 * and quoted. 12767 * 12768 * %5p (SVf_QUOTEDPREFIX) Like SVf, but length restricted, 12769 * escaped and quoted with pv_pretty. Intended 12770 * for error messages. 12771 * 12772 * %2p (HEKf) Like %s, but using the key string in a HEK 12773 * %7p (HEKf_QUOTEDPREFIX) ... but escaped and quoted. 12774 * 12775 * %3p (HEKf256) Ditto but like %.256s 12776 * %8p (HEKf256_QUOTEDPREFIX) ... but escaped and quoted 12777 * 12778 * %d%lu%4p (UTF8f) A utf8 string. Consumes 3 args: 12779 * (cBOOL(utf8), len, string_buf). 12780 * It's handled by the "case 'd'" branch 12781 * rather than here. 12782 * %d%lu%9p (UTF8f_QUOTEDPREFIX) .. but escaped and quoted. 12783 * 12784 * %6p (HvNAMEf) Like %s, but using the HvNAME() and HvNAMELEN() 12785 * %10p (HvNAMEf_QUOTEDPREFIX) ... but escaped and quoted 12786 * 12787 * %<num>p where num is > 9: reserved for future 12788 * extensions. Warns, but then is treated as a 12789 * general %p (print hex address) format. 12790 * 12791 * NOTE: If you add a new magic %p value you will 12792 * need to update F<t/porting/diag.t> to be aware of it 12793 * on top of adding the various defines and etc. Do not 12794 * forget to add it to F<pod/perlguts.pod> as well. 12795 */ 12796 12797 if ( args 12798 && !intsize 12799 && !fill 12800 && !plus 12801 && !has_precis 12802 /* not %*p or %*1$p - any width was explicit */ 12803 && q[-2] != '*' 12804 && q[-2] != '$' 12805 ) { 12806 if (left || width == 5) { /* %-p (SVf), %-NNNp, %5p */ 12807 if (left && width) { 12808 precis = width; 12809 has_precis = TRUE; 12810 } else if (width == 5) { 12811 escape_it = TRUE; 12812 } 12813 argsv = MUTABLE_SV(va_arg(*args, void*)); 12814 eptr = SvPV_const(argsv, elen); 12815 if (DO_UTF8(argsv)) 12816 is_utf8 = TRUE; 12817 width = 0; 12818 goto string; 12819 } 12820 else if (width == 2 || width == 3 || 12821 width == 7 || width == 8) 12822 { /* HEKf, HEKf256, HEKf_QUOTEDPREFIX, HEKf256_QUOTEDPREFIX */ 12823 HEK * const hek = va_arg(*args, HEK *); 12824 eptr = HEK_KEY(hek); 12825 elen = HEK_LEN(hek); 12826 if (HEK_UTF8(hek)) 12827 is_utf8 = TRUE; 12828 if (width == 3) { 12829 precis = 256; 12830 has_precis = TRUE; 12831 } 12832 if (width > 5) 12833 escape_it = TRUE; 12834 width = 0; 12835 goto string; 12836 } 12837 else if (width == 1) { 12838 eptr = va_arg(*args,char *); 12839 elen = strlen(eptr); 12840 escape_it = TRUE; 12841 width = 0; 12842 goto string; 12843 } 12844 else if (width == 6 || width == 10) { 12845 HV *hv = va_arg(*args, HV *); 12846 eptr = HvNAME(hv); 12847 elen = HvNAMELEN(hv); 12848 if (HvNAMEUTF8(hv)) 12849 is_utf8 = TRUE; 12850 if (width == 10) 12851 escape_it = TRUE; 12852 width = 0; 12853 goto string; 12854 } 12855 else if (width) { 12856 /* note width=4 or width=9 is handled under %d */ 12857 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), 12858 "internal %%<num>p might conflict with future printf extensions"); 12859 } 12860 } 12861 12862 /* treat as normal %...p */ 12863 12864 uv = PTR2UV(args ? va_arg(*args, void*) : argsv); 12865 base = 16; 12866 c = 'x'; /* in case the format string contains '#' */ 12867 goto do_integer; 12868 12869 case 'c': 12870 /* Ignore any size specifiers, since they're not documented as 12871 * being allowed for %c (ideally we should warn on e.g. '%hc'). 12872 * Setting a default intsize, along with a positive 12873 * (which signals unsigned) base, causes, for C-ish use, the 12874 * va_arg to be interpreted as an unsigned int, when it's 12875 * actually signed, which will convert -ve values to high +ve 12876 * values. Note that unlike the libc %c, values > 255 will 12877 * convert to high unicode points rather than being truncated 12878 * to 8 bits. For perlish use, it will do SvUV(argsv), which 12879 * will again convert -ve args to high -ve values. 12880 */ 12881 intsize = 0; 12882 base = 1; /* special value that indicates we're doing a 'c' */ 12883 goto get_int_arg_val; 12884 12885 case 'D': 12886 #ifdef IV_IS_QUAD 12887 intsize = 'q'; 12888 #else 12889 intsize = 'l'; 12890 #endif 12891 base = -10; 12892 goto get_int_arg_val; 12893 12894 case 'd': 12895 /* probably just a plain %d, but it might be the start of the 12896 * special UTF8f format, which usually looks something like 12897 * "%d%lu%4p" (the lu may vary by platform) or 12898 * "%d%lu%9p" for an escaped version. 12899 */ 12900 assert((UTF8f)[0] == 'd'); 12901 assert((UTF8f)[1] == '%'); 12902 12903 if ( args /* UTF8f only valid for C-ish sprintf */ 12904 && q == fmtstart + 1 /* plain %d, not %....d */ 12905 && patend >= fmtstart + sizeof(UTF8f) - 1 /* long enough */ 12906 && *q == '%' 12907 && strnEQ(q + 1, (UTF8f) + 2, sizeof(UTF8f) - 5) 12908 && q[sizeof(UTF8f)-3] == 'p' 12909 && (q[sizeof(UTF8f)-4] == '4' || 12910 q[sizeof(UTF8f)-4] == '9')) 12911 { 12912 /* The argument has already gone through cBOOL, so the cast 12913 is safe. */ 12914 if (q[sizeof(UTF8f)-4] == '9') 12915 escape_it = TRUE; 12916 is_utf8 = (bool)va_arg(*args, int); 12917 elen = va_arg(*args, UV); 12918 /* if utf8 length is larger than 0x7ffff..., then it might 12919 * have been a signed value that wrapped */ 12920 if (elen > ((~(STRLEN)0) >> 1)) { 12921 assert(0); /* in DEBUGGING build we want to crash */ 12922 elen = 0; /* otherwise we want to treat this as an empty string */ 12923 } 12924 eptr = va_arg(*args, char *); 12925 q += sizeof(UTF8f) - 2; 12926 goto string; 12927 } 12928 12929 /* FALLTHROUGH */ 12930 case 'i': 12931 base = -10; 12932 goto get_int_arg_val; 12933 12934 case 'U': 12935 #ifdef IV_IS_QUAD 12936 intsize = 'q'; 12937 #else 12938 intsize = 'l'; 12939 #endif 12940 /* FALLTHROUGH */ 12941 case 'u': 12942 base = 10; 12943 goto get_int_arg_val; 12944 12945 case 'B': 12946 case 'b': 12947 base = 2; 12948 goto get_int_arg_val; 12949 12950 case 'O': 12951 #ifdef IV_IS_QUAD 12952 intsize = 'q'; 12953 #else 12954 intsize = 'l'; 12955 #endif 12956 /* FALLTHROUGH */ 12957 case 'o': 12958 base = 8; 12959 goto get_int_arg_val; 12960 12961 case 'X': 12962 case 'x': 12963 base = 16; 12964 12965 get_int_arg_val: 12966 12967 if (vectorize) { 12968 STRLEN ulen; 12969 SV *vecsv; 12970 12971 if (base < 0) { 12972 base = -base; 12973 if (plus) 12974 esignbuf[esignlen++] = plus; 12975 } 12976 12977 /* initialise the vector string to iterate over */ 12978 12979 vecsv = args ? va_arg(*args, SV*) : argsv; 12980 12981 /* if this is a version object, we need to convert 12982 * back into v-string notation and then let the 12983 * vectorize happen normally 12984 */ 12985 if (sv_isobject(vecsv) && sv_derived_from(vecsv, "version")) { 12986 if ( hv_existss(MUTABLE_HV(SvRV(vecsv)), "alpha") ) { 12987 Perl_ck_warner_d(aTHX_ packWARN(WARN_PRINTF), 12988 "vector argument not supported with alpha versions"); 12989 vecsv = &PL_sv_no; 12990 } 12991 else { 12992 vecstr = (U8*)SvPV_const(vecsv,veclen); 12993 vecsv = sv_newmortal(); 12994 scan_vstring((char *)vecstr, (char *)vecstr + veclen, 12995 vecsv); 12996 } 12997 } 12998 vecstr = (U8*)SvPV_const(vecsv, veclen); 12999 vec_utf8 = DO_UTF8(vecsv); 13000 13001 /* This is the re-entry point for when we're iterating 13002 * over the individual characters of a vector arg */ 13003 vector: 13004 if (!veclen) 13005 goto done_valid_conversion; 13006 if (vec_utf8) 13007 uv = utf8n_to_uvchr(vecstr, veclen, &ulen, 13008 UTF8_ALLOW_ANYUV); 13009 else { 13010 uv = *vecstr; 13011 ulen = 1; 13012 } 13013 vecstr += ulen; 13014 veclen -= ulen; 13015 } 13016 else { 13017 /* test arg for inf/nan. This can trigger an unwanted 13018 * 'str' overload, so manually force 'num' overload first 13019 * if necessary */ 13020 if (argsv) { 13021 SvGETMAGIC(argsv); 13022 if (UNLIKELY(SvAMAGIC(argsv))) 13023 argsv = sv_2num(argsv); 13024 if (UNLIKELY(isinfnansv(argsv))) 13025 goto handle_infnan_argsv; 13026 } 13027 13028 if (base < 0) { 13029 /* signed int type */ 13030 IV iv; 13031 base = -base; 13032 if (args) { 13033 switch (intsize) { 13034 case 'c': iv = (char)va_arg(*args, int); break; 13035 case 'h': iv = (short)va_arg(*args, int); break; 13036 case 'l': iv = va_arg(*args, long); break; 13037 case 'V': iv = va_arg(*args, IV); break; 13038 case 'z': iv = va_arg(*args, SSize_t); break; 13039 #ifdef HAS_PTRDIFF_T 13040 case 't': iv = va_arg(*args, ptrdiff_t); break; 13041 #endif 13042 default: iv = va_arg(*args, int); break; 13043 case 'j': iv = (IV) va_arg(*args, PERL_INTMAX_T); break; 13044 case 'q': 13045 #if IVSIZE >= 8 13046 iv = va_arg(*args, Quad_t); break; 13047 #else 13048 goto unknown; 13049 #endif 13050 } 13051 } 13052 else { 13053 /* assign to tiv then cast to iv to work around 13054 * 2003 GCC cast bug (gnu.org bugzilla #13488) */ 13055 IV tiv = SvIV_nomg(argsv); 13056 switch (intsize) { 13057 case 'c': iv = (char)tiv; break; 13058 case 'h': iv = (short)tiv; break; 13059 case 'l': iv = (long)tiv; break; 13060 case 'V': 13061 default: iv = tiv; break; 13062 case 'q': 13063 #if IVSIZE >= 8 13064 iv = (Quad_t)tiv; break; 13065 #else 13066 goto unknown; 13067 #endif 13068 } 13069 } 13070 13071 /* now convert iv to uv */ 13072 if (iv >= 0) { 13073 uv = iv; 13074 if (plus) 13075 esignbuf[esignlen++] = plus; 13076 } 13077 else { 13078 /* Using 0- here to silence bogus warning from MS VC */ 13079 uv = (UV) (0 - (UV) iv); 13080 esignbuf[esignlen++] = '-'; 13081 } 13082 } 13083 else { 13084 /* unsigned int type */ 13085 if (args) { 13086 switch (intsize) { 13087 case 'c': uv = (unsigned char)va_arg(*args, unsigned); 13088 break; 13089 case 'h': uv = (unsigned short)va_arg(*args, unsigned); 13090 break; 13091 case 'l': uv = va_arg(*args, unsigned long); break; 13092 case 'V': uv = va_arg(*args, UV); break; 13093 case 'z': uv = va_arg(*args, Size_t); break; 13094 #ifdef HAS_PTRDIFF_T 13095 /* will sign extend, but there is no 13096 * uptrdiff_t, so oh well */ 13097 case 't': uv = va_arg(*args, ptrdiff_t); break; 13098 #endif 13099 case 'j': uv = (UV) va_arg(*args, PERL_UINTMAX_T); break; 13100 default: uv = va_arg(*args, unsigned); break; 13101 case 'q': 13102 #if IVSIZE >= 8 13103 uv = va_arg(*args, Uquad_t); break; 13104 #else 13105 goto unknown; 13106 #endif 13107 } 13108 } 13109 else { 13110 /* assign to tiv then cast to iv to work around 13111 * 2003 GCC cast bug (gnu.org bugzilla #13488) */ 13112 UV tuv = SvUV_nomg(argsv); 13113 switch (intsize) { 13114 case 'c': uv = (unsigned char)tuv; break; 13115 case 'h': uv = (unsigned short)tuv; break; 13116 case 'l': uv = (unsigned long)tuv; break; 13117 case 'V': 13118 default: uv = tuv; break; 13119 case 'q': 13120 #if IVSIZE >= 8 13121 uv = (Uquad_t)tuv; break; 13122 #else 13123 goto unknown; 13124 #endif 13125 } 13126 } 13127 } 13128 } 13129 13130 do_integer: 13131 { 13132 char *ptr = ebuf + sizeof ebuf; 13133 unsigned dig; 13134 zeros = 0; 13135 13136 switch (base) { 13137 case 16: 13138 { 13139 const char * const p = 13140 (c == 'X') ? PL_hexdigit + 16 : PL_hexdigit; 13141 13142 do { 13143 dig = uv & 15; 13144 *--ptr = p[dig]; 13145 } while (uv >>= 4); 13146 if (alt && *ptr != '0') { 13147 esignbuf[esignlen++] = '0'; 13148 esignbuf[esignlen++] = c; /* 'x' or 'X' */ 13149 } 13150 break; 13151 } 13152 case 8: 13153 do { 13154 dig = uv & 7; 13155 *--ptr = '0' + dig; 13156 } while (uv >>= 3); 13157 if (alt && *ptr != '0') 13158 *--ptr = '0'; 13159 break; 13160 case 2: 13161 do { 13162 dig = uv & 1; 13163 *--ptr = '0' + dig; 13164 } while (uv >>= 1); 13165 if (alt && *ptr != '0') { 13166 esignbuf[esignlen++] = '0'; 13167 esignbuf[esignlen++] = c; /* 'b' or 'B' */ 13168 } 13169 break; 13170 13171 case 1: 13172 /* special-case: base 1 indicates a 'c' format: 13173 * we use the common code for extracting a uv, 13174 * but handle that value differently here than 13175 * all the other int types */ 13176 if ((uv > 255 || 13177 (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv))) 13178 && !IN_BYTES) 13179 { 13180 STATIC_ASSERT_STMT(sizeof(ebuf) >= UTF8_MAXBYTES + 1); 13181 eptr = ebuf; 13182 elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf; 13183 is_utf8 = TRUE; 13184 } 13185 else { 13186 eptr = ebuf; 13187 ebuf[0] = (char)uv; 13188 elen = 1; 13189 } 13190 goto string; 13191 13192 default: /* it had better be ten or less */ 13193 do { 13194 dig = uv % base; 13195 *--ptr = '0' + dig; 13196 } while (uv /= base); 13197 break; 13198 } 13199 elen = (ebuf + sizeof ebuf) - ptr; 13200 eptr = ptr; 13201 if (has_precis) { 13202 if (precis > elen) 13203 zeros = precis - elen; 13204 else if (precis == 0 && elen == 1 && *eptr == '0' 13205 && !(base == 8 && alt)) /* "%#.0o" prints "0" */ 13206 elen = 0; 13207 13208 /* a precision nullifies the 0 flag. */ 13209 fill = FALSE; 13210 } 13211 } 13212 break; 13213 13214 /* FLOATING POINT */ 13215 13216 case 'F': 13217 c = 'f'; /* maybe %F isn't supported here */ 13218 /* FALLTHROUGH */ 13219 case 'e': case 'E': 13220 case 'f': 13221 case 'g': case 'G': 13222 case 'a': case 'A': 13223 13224 { 13225 STRLEN float_need; /* what PL_efloatsize needs to become */ 13226 bool hexfp; /* hexadecimal floating point? */ 13227 13228 vcatpvfn_long_double_t fv; 13229 NV nv; 13230 13231 /* This is evil, but floating point is even more evil */ 13232 13233 /* for SV-style calling, we can only get NV 13234 for C-style calling, we assume %f is double; 13235 for simplicity we allow any of %Lf, %llf, %qf for long double 13236 */ 13237 switch (intsize) { 13238 #if defined(USE_QUADMATH) 13239 case 'Q': 13240 break; 13241 #endif 13242 case 'V': 13243 #if defined(USE_LONG_DOUBLE) || defined(USE_QUADMATH) 13244 intsize = 'q'; 13245 #endif 13246 break; 13247 /* [perl #20339] - we should accept and ignore %lf rather than die */ 13248 case 'l': 13249 /* FALLTHROUGH */ 13250 default: 13251 #if defined(USE_LONG_DOUBLE) || defined(USE_QUADMATH) 13252 intsize = args ? 0 : 'q'; 13253 #endif 13254 break; 13255 case 'q': 13256 #if defined(HAS_LONG_DOUBLE) 13257 break; 13258 #else 13259 /* FALLTHROUGH */ 13260 #endif 13261 case 'c': 13262 case 'h': 13263 case 'z': 13264 case 't': 13265 case 'j': 13266 goto unknown; 13267 } 13268 13269 /* Now we need (long double) if intsize == 'q', else (double). */ 13270 if (args) { 13271 /* Note: do not pull NVs off the va_list with va_arg() 13272 * (pull doubles instead) because if you have a build 13273 * with long doubles, you would always be pulling long 13274 * doubles, which would badly break anyone using only 13275 * doubles (i.e. the majority of builds). In other 13276 * words, you cannot mix doubles and long doubles. 13277 * The only case where you can pull off long doubles 13278 * is when the format specifier explicitly asks so with 13279 * e.g. "%Lg". */ 13280 #ifdef USE_QUADMATH 13281 nv = intsize == 'Q' ? va_arg(*args, NV) : 13282 intsize == 'q' ? va_arg(*args, long double) : 13283 va_arg(*args, double); 13284 fv = nv; 13285 #elif LONG_DOUBLESIZE > DOUBLESIZE 13286 if (intsize == 'q') { 13287 fv = va_arg(*args, long double); 13288 nv = fv; 13289 } else { 13290 nv = va_arg(*args, double); 13291 VCATPVFN_NV_TO_FV(nv, fv); 13292 } 13293 #else 13294 nv = va_arg(*args, double); 13295 fv = nv; 13296 #endif 13297 } 13298 else 13299 { 13300 SvGETMAGIC(argsv); 13301 /* we jump here if an int-ish format encountered an 13302 * infinite/Nan argsv. After setting nv/fv, it falls 13303 * into the isinfnan block which follows */ 13304 handle_infnan_argsv: 13305 nv = SvNV_nomg(argsv); 13306 VCATPVFN_NV_TO_FV(nv, fv); 13307 } 13308 13309 if (Perl_isinfnan(nv)) { 13310 if (c == 'c') 13311 Perl_croak(aTHX_ "Cannot printf %" NVgf " with '%c'", 13312 nv, (int)c); 13313 13314 elen = S_infnan_2pv(nv, ebuf, sizeof(ebuf), plus); 13315 assert(elen); 13316 eptr = ebuf; 13317 zeros = 0; 13318 esignlen = 0; 13319 dotstrlen = 0; 13320 break; 13321 } 13322 13323 /* special-case "%.0f" */ 13324 if ( c == 'f' 13325 && !precis 13326 && has_precis 13327 && !(width || left || plus || alt) 13328 && !fill 13329 && intsize != 'q' 13330 && ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen))) 13331 ) 13332 goto float_concat; 13333 13334 /* Determine the buffer size needed for the various 13335 * floating-point formats. 13336 * 13337 * The basic possibilities are: 13338 * 13339 * <---P---> 13340 * %f 1111111.123456789 13341 * %e 1.111111123e+06 13342 * %a 0x1.0f4471f9bp+20 13343 * %g 1111111.12 13344 * %g 1.11111112e+15 13345 * 13346 * where P is the value of the precision in the format, or 6 13347 * if not specified. Note the two possible output formats of 13348 * %g; in both cases the number of significant digits is <= 13349 * precision. 13350 * 13351 * For most of the format types the maximum buffer size needed 13352 * is precision, plus: any leading 1 or 0x1, the radix 13353 * point, and an exponent. The difficult one is %f: for a 13354 * large positive exponent it can have many leading digits, 13355 * which needs to be calculated specially. Also %a is slightly 13356 * different in that in the absence of a specified precision, 13357 * it uses as many digits as necessary to distinguish 13358 * different values. 13359 * 13360 * First, here are the constant bits. For ease of calculation 13361 * we over-estimate the needed buffer size, for example by 13362 * assuming all formats have an exponent and a leading 0x1. 13363 * 13364 * Also for production use, add a little extra overhead for 13365 * safety's sake. Under debugging don't, as it means we're 13366 * more likely to quickly spot issues during development. 13367 */ 13368 13369 float_need = 1 /* possible unary minus */ 13370 + 4 /* "0x1" plus very unlikely carry */ 13371 + 1 /* default radix point '.' */ 13372 + 2 /* "e-", "p+" etc */ 13373 + 6 /* exponent: up to 16383 (quad fp) */ 13374 #ifndef DEBUGGING 13375 + 20 /* safety net */ 13376 #endif 13377 + 1; /* \0 */ 13378 13379 13380 /* determine the radix point len, e.g. length(".") in "1.2" */ 13381 #ifdef USE_LOCALE_NUMERIC 13382 /* note that we may either explicitly use PL_numeric_radix_sv 13383 * below, or implicitly, via an snprintf() variant. 13384 * Note also things like ps_AF.utf8 which has 13385 * "\N{ARABIC DECIMAL SEPARATOR} as a radix point */ 13386 if (! have_in_lc_numeric) { 13387 in_lc_numeric = IN_LC(LC_NUMERIC); 13388 have_in_lc_numeric = TRUE; 13389 } 13390 13391 if (in_lc_numeric) { 13392 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(TRUE, { 13393 /* this can't wrap unless PL_numeric_radix_sv is a string 13394 * consuming virtually all the 32-bit or 64-bit address 13395 * space 13396 */ 13397 float_need += (SvCUR(PL_numeric_radix_sv) - 1); 13398 13399 /* floating-point formats only get utf8 if the radix point 13400 * is utf8. All other characters in the string are < 128 13401 * and so can be safely appended to both a non-utf8 and utf8 13402 * string as-is. 13403 * Note that this will convert the output to utf8 even if 13404 * the radix point didn't get output. 13405 */ 13406 if (SvUTF8(PL_numeric_radix_sv) && !has_utf8) { 13407 sv_utf8_upgrade(sv); 13408 has_utf8 = TRUE; 13409 } 13410 }); 13411 } 13412 #endif 13413 13414 hexfp = FALSE; 13415 13416 if (isALPHA_FOLD_EQ(c, 'f')) { 13417 /* Determine how many digits before the radix point 13418 * might be emitted. frexp() (or frexpl) has some 13419 * unspecified behaviour for nan/inf/-inf, so lucky we've 13420 * already handled them above */ 13421 STRLEN digits; 13422 int i = PERL_INT_MIN; 13423 (void)Perl_frexp((NV)fv, &i); 13424 if (i == PERL_INT_MIN) 13425 Perl_die(aTHX_ "panic: frexp: %" VCATPVFN_FV_GF, fv); 13426 13427 if (i > 0) { 13428 digits = BIT_DIGITS(i); 13429 /* this can't overflow. 'digits' will only be a few 13430 * thousand even for the largest floating-point types. 13431 * And up until now float_need is just some small 13432 * constants plus radix len, which can't be in 13433 * overflow territory unless the radix SV is consuming 13434 * over 1/2 the address space */ 13435 assert(float_need < ((STRLEN)~0) - digits); 13436 float_need += digits; 13437 } 13438 } 13439 else if (UNLIKELY(isALPHA_FOLD_EQ(c, 'a'))) { 13440 hexfp = TRUE; 13441 if (!has_precis) { 13442 /* %a in the absence of precision may print as many 13443 * digits as needed to represent the entire mantissa 13444 * bit pattern. 13445 * This estimate seriously overshoots in most cases, 13446 * but better the undershooting. Firstly, all bytes 13447 * of the NV are not mantissa, some of them are 13448 * exponent. Secondly, for the reasonably common 13449 * long doubles case, the "80-bit extended", two 13450 * or six bytes of the NV are unused. Also, we'll 13451 * still pick up an extra +6 from the default 13452 * precision calculation below. */ 13453 STRLEN digits = 13454 #ifdef LONGDOUBLE_DOUBLEDOUBLE 13455 /* For the "double double", we need more. 13456 * Since each double has their own exponent, the 13457 * doubles may float (haha) rather far from each 13458 * other, and the number of required bits is much 13459 * larger, up to total of DOUBLEDOUBLE_MAXBITS bits. 13460 * See the definition of DOUBLEDOUBLE_MAXBITS. 13461 * 13462 * Need 2 hexdigits for each byte. */ 13463 (DOUBLEDOUBLE_MAXBITS/8 + 1) * 2; 13464 #else 13465 NVSIZE * 2; /* 2 hexdigits for each byte */ 13466 #endif 13467 /* see "this can't overflow" comment above */ 13468 assert(float_need < ((STRLEN)~0) - digits); 13469 float_need += digits; 13470 } 13471 } 13472 /* special-case "%.<number>g" if it will fit in ebuf */ 13473 else if (c == 'g' 13474 && precis /* See earlier comment about buggy Gconvert 13475 when digits, aka precis, is 0 */ 13476 && has_precis 13477 /* check that "%.<number>g" formatting will fit in ebuf */ 13478 && sizeof(ebuf) - float_need > precis 13479 /* sizeof(ebuf) - float_need will have wrapped if float_need > sizeof(ebuf). * 13480 * Therefore we should check that float_need < sizeof(ebuf). Normally, we would * 13481 * have run this check first, but that triggers incorrect -Wformat-overflow * 13482 * compilation warnings with some versions of gcc if Gconvert invokes sprintf(). * 13483 * ( See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89161 ) * 13484 * So, instead, we check it next: */ 13485 && float_need < sizeof(ebuf) 13486 && !(width || left || plus || alt) 13487 && !fill 13488 && intsize != 'q' 13489 ) { 13490 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric, 13491 SNPRINTF_G(fv, ebuf, sizeof(ebuf), precis) 13492 ); 13493 elen = strlen(ebuf); 13494 eptr = ebuf; 13495 goto float_concat; 13496 } 13497 13498 13499 { 13500 STRLEN pr = has_precis ? precis : 6; /* known default */ 13501 /* this probably can't wrap, since precis is limited 13502 * to 1/4 address space size, but better safe than sorry 13503 */ 13504 if (float_need >= ((STRLEN)~0) - pr) 13505 croak_memory_wrap(); 13506 float_need += pr; 13507 } 13508 13509 if (float_need < width) 13510 float_need = width; 13511 13512 if (float_need > INT_MAX) { 13513 /* snprintf() returns an int, and we use that return value, 13514 so die horribly if the expected size is too large for int 13515 */ 13516 Perl_croak(aTHX_ "Numeric format result too large"); 13517 } 13518 13519 if (PL_efloatsize <= float_need) { 13520 /* PL_efloatbuf should be at least 1 greater than 13521 * float_need to allow a trailing \0 to be returned by 13522 * snprintf(). If we need to grow, overgrow for the 13523 * benefit of future generations */ 13524 const STRLEN extra = 0x20; 13525 if (float_need >= ((STRLEN)~0) - extra) 13526 croak_memory_wrap(); 13527 float_need += extra; 13528 Safefree(PL_efloatbuf); 13529 PL_efloatsize = float_need; 13530 Newx(PL_efloatbuf, PL_efloatsize, char); 13531 PL_efloatbuf[0] = '\0'; 13532 } 13533 13534 if (UNLIKELY(hexfp)) { 13535 elen = S_format_hexfp(aTHX_ PL_efloatbuf, PL_efloatsize, c, 13536 nv, fv, has_precis, precis, width, 13537 alt, plus, left, fill, in_lc_numeric); 13538 } 13539 else { 13540 char *ptr = ebuf + sizeof ebuf; 13541 *--ptr = '\0'; 13542 *--ptr = c; 13543 #if defined(USE_QUADMATH) 13544 /* always use Q here. my_snprint() throws an exception if we 13545 fallthrough to the double/long double code, even when the 13546 format is correct, presumably to avoid any accidentally 13547 missing Q. 13548 */ 13549 *--ptr = 'Q'; 13550 /* FIXME: what to do if HAS_LONG_DOUBLE but not PERL_PRIfldbl? */ 13551 #elif defined(HAS_LONG_DOUBLE) && defined(PERL_PRIfldbl) 13552 /* Note that this is HAS_LONG_DOUBLE and PERL_PRIfldbl, 13553 * not USE_LONG_DOUBLE and NVff. In other words, 13554 * this needs to work without USE_LONG_DOUBLE. */ 13555 if (intsize == 'q') { 13556 /* Copy the one or more characters in a long double 13557 * format before the 'base' ([efgEFG]) character to 13558 * the format string. */ 13559 static char const ldblf[] = PERL_PRIfldbl; 13560 char const *p = ldblf + sizeof(ldblf) - 3; 13561 while (p >= ldblf) { *--ptr = *p--; } 13562 } 13563 #endif 13564 if (has_precis) { 13565 base = precis; 13566 do { *--ptr = '0' + (base % 10); } while (base /= 10); 13567 *--ptr = '.'; 13568 } 13569 if (width) { 13570 base = width; 13571 do { *--ptr = '0' + (base % 10); } while (base /= 10); 13572 } 13573 if (fill) 13574 *--ptr = '0'; 13575 if (left) 13576 *--ptr = '-'; 13577 if (plus) 13578 *--ptr = plus; 13579 if (alt) 13580 *--ptr = '#'; 13581 *--ptr = '%'; 13582 13583 /* No taint. Otherwise we are in the strange situation 13584 * where printf() taints but print($float) doesn't. 13585 * --jhi */ 13586 13587 /* hopefully the above makes ptr a very constrained format 13588 * that is safe to use, even though it's not literal */ 13589 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); 13590 #ifdef USE_QUADMATH 13591 { 13592 if (!quadmath_format_valid(ptr)) 13593 Perl_croak_nocontext("panic: quadmath invalid format \"%s\"", ptr); 13594 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric, 13595 elen = quadmath_snprintf(PL_efloatbuf, PL_efloatsize, 13596 ptr, nv); 13597 ); 13598 if ((IV)elen == -1) { 13599 Perl_croak_nocontext("panic: quadmath_snprintf failed, format \"%s\"", ptr); 13600 } 13601 } 13602 #elif defined(HAS_LONG_DOUBLE) 13603 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric, 13604 elen = ((intsize == 'q') 13605 ? my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, fv) 13606 : my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, (double)fv)) 13607 ); 13608 #else 13609 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric, 13610 elen = my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, fv) 13611 ); 13612 #endif 13613 GCC_DIAG_RESTORE_STMT; 13614 } 13615 13616 eptr = PL_efloatbuf; 13617 13618 float_concat: 13619 13620 /* Since floating-point formats do their own formatting and 13621 * padding, we skip the main block of code at the end of this 13622 * loop which handles appending eptr to sv, and do our own 13623 * stripped-down version */ 13624 13625 assert(!zeros); 13626 assert(!esignlen); 13627 assert(elen); 13628 assert(elen >= width); 13629 13630 S_sv_catpvn_simple(aTHX_ sv, eptr, elen); 13631 13632 goto done_valid_conversion; 13633 } 13634 13635 /* SPECIAL */ 13636 13637 case 'n': 13638 { 13639 STRLEN len; 13640 /* XXX ideally we should warn if any flags etc have been 13641 * set, e.g. "%-4.5n" */ 13642 /* XXX if sv was originally non-utf8 with a char in the 13643 * range 0x80-0xff, then if it got upgraded, we should 13644 * calculate char len rather than byte len here */ 13645 len = SvCUR(sv) - origlen; 13646 if (args) { 13647 int i = (len > PERL_INT_MAX) ? PERL_INT_MAX : (int)len; 13648 13649 switch (intsize) { 13650 case 'c': *(va_arg(*args, char*)) = i; break; 13651 case 'h': *(va_arg(*args, short*)) = i; break; 13652 default: *(va_arg(*args, int*)) = i; break; 13653 case 'l': *(va_arg(*args, long*)) = i; break; 13654 case 'V': *(va_arg(*args, IV*)) = i; break; 13655 case 'z': *(va_arg(*args, SSize_t*)) = i; break; 13656 #ifdef HAS_PTRDIFF_T 13657 case 't': *(va_arg(*args, ptrdiff_t*)) = i; break; 13658 #endif 13659 case 'j': *(va_arg(*args, PERL_INTMAX_T*)) = i; break; 13660 case 'q': 13661 #if IVSIZE >= 8 13662 *(va_arg(*args, Quad_t*)) = i; break; 13663 #else 13664 goto unknown; 13665 #endif 13666 } 13667 } 13668 else { 13669 if (arg_missing) 13670 Perl_croak_nocontext( 13671 "Missing argument for %%n in %s", 13672 PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); 13673 sv_setuv_mg(argsv, has_utf8 13674 ? (UV)utf8_length((U8*)SvPVX(sv), (U8*)SvEND(sv)) 13675 : (UV)len); 13676 } 13677 goto done_valid_conversion; 13678 } 13679 13680 /* UNKNOWN */ 13681 13682 default: 13683 unknown: 13684 if (!args 13685 && (PL_op->op_type == OP_PRTF || PL_op->op_type == OP_SPRINTF) 13686 && ckWARN(WARN_PRINTF)) 13687 { 13688 SV * const msg = sv_newmortal(); 13689 Perl_sv_setpvf(aTHX_ msg, "Invalid conversion in %sprintf: ", 13690 (PL_op->op_type == OP_PRTF) ? "" : "s"); 13691 if (fmtstart < patend) { 13692 const char * const fmtend = q < patend ? q : patend; 13693 const char * f; 13694 sv_catpvs(msg, "\"%"); 13695 for (f = fmtstart; f < fmtend; f++) { 13696 if (isPRINT(*f)) { 13697 sv_catpvn_nomg(msg, f, 1); 13698 } else { 13699 Perl_sv_catpvf(aTHX_ msg, "\\%03o", (U8) *f); 13700 } 13701 } 13702 sv_catpvs(msg, "\""); 13703 } else { 13704 sv_catpvs(msg, "end of string"); 13705 } 13706 Perl_warner(aTHX_ packWARN(WARN_PRINTF), "%" SVf, SVfARG(msg)); /* yes, this is reentrant */ 13707 } 13708 13709 /* mangled format: output the '%', then continue from the 13710 * character following that */ 13711 sv_catpvn_nomg(sv, fmtstart-1, 1); 13712 q = fmtstart; 13713 svix = osvix; 13714 /* Any "redundant arg" warning from now onwards will probably 13715 * just be misleading, so don't bother. */ 13716 no_redundant_warning = TRUE; 13717 continue; /* not "break" */ 13718 } 13719 13720 if (is_utf8 != has_utf8) { 13721 if (is_utf8) { 13722 if (SvCUR(sv)) 13723 sv_utf8_upgrade(sv); 13724 } 13725 else { 13726 const STRLEN old_elen = elen; 13727 SV * const nsv = newSVpvn_flags(eptr, elen, SVs_TEMP); 13728 sv_utf8_upgrade(nsv); 13729 eptr = SvPVX_const(nsv); 13730 elen = SvCUR(nsv); 13731 13732 if (width) { /* fudge width (can't fudge elen) */ 13733 width += elen - old_elen; 13734 } 13735 is_utf8 = TRUE; 13736 } 13737 } 13738 13739 13740 /* append esignbuf, filler, zeros, eptr and dotstr to sv */ 13741 13742 { 13743 STRLEN need, have, gap; 13744 STRLEN i; 13745 char *s; 13746 13747 /* signed value that's wrapped? */ 13748 assert(elen <= ((~(STRLEN)0) >> 1)); 13749 13750 /* if zeros is non-zero, then it represents filler between 13751 * elen and precis. So adding elen and zeros together will 13752 * always be <= precis, and the addition can never wrap */ 13753 assert(!zeros || (precis > elen && precis - elen == zeros)); 13754 have = elen + zeros; 13755 13756 if (have >= (((STRLEN)~0) - esignlen)) 13757 croak_memory_wrap(); 13758 have += esignlen; 13759 13760 need = (have > width ? have : width); 13761 gap = need - have; 13762 13763 if (need >= (((STRLEN)~0) - (SvCUR(sv) + 1))) 13764 croak_memory_wrap(); 13765 need += (SvCUR(sv) + 1); 13766 13767 SvGROW(sv, need); 13768 13769 s = SvEND(sv); 13770 13771 if (left) { 13772 for (i = 0; i < esignlen; i++) 13773 *s++ = esignbuf[i]; 13774 for (i = zeros; i; i--) 13775 *s++ = '0'; 13776 Copy(eptr, s, elen, char); 13777 s += elen; 13778 for (i = gap; i; i--) 13779 *s++ = ' '; 13780 } 13781 else { 13782 if (fill) { 13783 for (i = 0; i < esignlen; i++) 13784 *s++ = esignbuf[i]; 13785 assert(!zeros); 13786 zeros = gap; 13787 } 13788 else { 13789 for (i = gap; i; i--) 13790 *s++ = ' '; 13791 for (i = 0; i < esignlen; i++) 13792 *s++ = esignbuf[i]; 13793 } 13794 13795 for (i = zeros; i; i--) 13796 *s++ = '0'; 13797 Copy(eptr, s, elen, char); 13798 s += elen; 13799 } 13800 13801 *s = '\0'; 13802 SvCUR_set(sv, s - SvPVX_const(sv)); 13803 13804 if (is_utf8) 13805 has_utf8 = TRUE; 13806 if (has_utf8) 13807 SvUTF8_on(sv); 13808 } 13809 13810 if (vectorize && veclen) { 13811 /* we append the vector separator separately since %v isn't 13812 * very common: don't slow down the general case by adding 13813 * dotstrlen to need etc */ 13814 sv_catpvn_nomg(sv, dotstr, dotstrlen); 13815 esignlen = 0; 13816 goto vector; /* do next iteration */ 13817 } 13818 13819 done_valid_conversion: 13820 13821 if (arg_missing) 13822 S_warn_vcatpvfn_missing_argument(aTHX); 13823 } 13824 13825 /* Now that we've consumed all our printf format arguments (svix) 13826 * do we have things left on the stack that we didn't use? 13827 */ 13828 if (!no_redundant_warning && sv_count >= svix + 1 && ckWARN(WARN_REDUNDANT)) { 13829 Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s", 13830 PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); 13831 } 13832 13833 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { 13834 /* while we shouldn't set the cache, it may have been previously 13835 set in the caller, so clear it */ 13836 MAGIC *mg = mg_find(sv, PERL_MAGIC_utf8); 13837 if (mg) 13838 magic_setutf8(sv,mg); /* clear UTF8 cache */ 13839 } 13840 SvTAINT(sv); 13841 } 13842 13843 /* ========================================================================= 13844 13845 =for apidoc_section $embedding 13846 13847 =cut 13848 13849 All the macros and functions in this section are for the private use of 13850 the main function, perl_clone(). 13851 13852 The foo_dup() functions make an exact copy of an existing foo thingy. 13853 During the course of a cloning, a hash table is used to map old addresses 13854 to new addresses. The table is created and manipulated with the 13855 ptr_table_* functions. 13856 13857 * =========================================================================*/ 13858 13859 13860 #if defined(USE_ITHREADS) 13861 13862 /* XXX Remove this so it doesn't have to go thru the macro and return for nothing */ 13863 #ifndef GpREFCNT_inc 13864 # define GpREFCNT_inc(gp) ((gp) ? (++(gp)->gp_refcnt, (gp)) : (GP*)NULL) 13865 #endif 13866 13867 13868 #define SAVEPV(p) ((p) ? savepv(p) : NULL) 13869 #define SAVEPVN(p,n) ((p) ? savepvn(p,n) : NULL) 13870 13871 /* clone a parser */ 13872 13873 yy_parser * 13874 Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param) 13875 { 13876 yy_parser *parser; 13877 13878 PERL_ARGS_ASSERT_PARSER_DUP; 13879 13880 if (!proto) 13881 return NULL; 13882 13883 /* look for it in the table first */ 13884 parser = (yy_parser *)ptr_table_fetch(PL_ptr_table, proto); 13885 if (parser) 13886 return parser; 13887 13888 /* create anew and remember what it is */ 13889 Newxz(parser, 1, yy_parser); 13890 ptr_table_store(PL_ptr_table, proto, parser); 13891 13892 /* XXX eventually, just Copy() most of the parser struct ? */ 13893 13894 parser->lex_brackets = proto->lex_brackets; 13895 parser->lex_casemods = proto->lex_casemods; 13896 parser->lex_brackstack = savepvn(proto->lex_brackstack, 13897 (proto->lex_brackets < 120 ? 120 : proto->lex_brackets)); 13898 parser->lex_casestack = savepvn(proto->lex_casestack, 13899 (proto->lex_casemods < 12 ? 12 : proto->lex_casemods)); 13900 parser->lex_defer = proto->lex_defer; 13901 parser->lex_dojoin = proto->lex_dojoin; 13902 parser->lex_formbrack = proto->lex_formbrack; 13903 parser->lex_inpat = proto->lex_inpat; 13904 parser->lex_inwhat = proto->lex_inwhat; 13905 parser->lex_op = proto->lex_op; 13906 parser->lex_repl = sv_dup_inc(proto->lex_repl, param); 13907 parser->lex_starts = proto->lex_starts; 13908 parser->lex_stuff = sv_dup_inc(proto->lex_stuff, param); 13909 parser->multi_close = proto->multi_close; 13910 parser->multi_open = proto->multi_open; 13911 parser->multi_start = proto->multi_start; 13912 parser->multi_end = proto->multi_end; 13913 parser->preambled = proto->preambled; 13914 parser->lex_super_state = proto->lex_super_state; 13915 parser->lex_sub_inwhat = proto->lex_sub_inwhat; 13916 parser->lex_sub_op = proto->lex_sub_op; 13917 parser->lex_sub_repl= sv_dup_inc(proto->lex_sub_repl, param); 13918 parser->linestr = sv_dup_inc(proto->linestr, param); 13919 parser->expect = proto->expect; 13920 parser->copline = proto->copline; 13921 parser->last_lop_op = proto->last_lop_op; 13922 parser->lex_state = proto->lex_state; 13923 parser->rsfp = fp_dup(proto->rsfp, '<', param); 13924 /* rsfp_filters entries have fake IoDIRP() */ 13925 parser->rsfp_filters= av_dup_inc(proto->rsfp_filters, param); 13926 parser->in_my = proto->in_my; 13927 parser->in_my_stash = hv_dup(proto->in_my_stash, param); 13928 parser->error_count = proto->error_count; 13929 parser->sig_elems = proto->sig_elems; 13930 parser->sig_optelems= proto->sig_optelems; 13931 parser->sig_slurpy = proto->sig_slurpy; 13932 parser->recheck_utf8_validity = proto->recheck_utf8_validity; 13933 13934 { 13935 char * const ols = SvPVX(proto->linestr); 13936 char * const ls = SvPVX(parser->linestr); 13937 13938 parser->bufptr = ls + (proto->bufptr >= ols ? 13939 proto->bufptr - ols : 0); 13940 parser->oldbufptr = ls + (proto->oldbufptr >= ols ? 13941 proto->oldbufptr - ols : 0); 13942 parser->oldoldbufptr= ls + (proto->oldoldbufptr >= ols ? 13943 proto->oldoldbufptr - ols : 0); 13944 parser->linestart = ls + (proto->linestart >= ols ? 13945 proto->linestart - ols : 0); 13946 parser->last_uni = ls + (proto->last_uni >= ols ? 13947 proto->last_uni - ols : 0); 13948 parser->last_lop = ls + (proto->last_lop >= ols ? 13949 proto->last_lop - ols : 0); 13950 13951 parser->bufend = ls + SvCUR(parser->linestr); 13952 } 13953 13954 Copy(proto->tokenbuf, parser->tokenbuf, 256, char); 13955 13956 13957 Copy(proto->nextval, parser->nextval, 5, YYSTYPE); 13958 Copy(proto->nexttype, parser->nexttype, 5, I32); 13959 parser->nexttoke = proto->nexttoke; 13960 13961 /* XXX should clone saved_curcop here, but we aren't passed 13962 * proto_perl; so do it in perl_clone_using instead */ 13963 13964 return parser; 13965 } 13966 13967 /* 13968 =for apidoc_section $io 13969 =for apidoc fp_dup 13970 13971 Duplicate a file handle, returning a pointer to the cloned object. 13972 13973 =cut 13974 */ 13975 13976 PerlIO * 13977 Perl_fp_dup(pTHX_ PerlIO *const fp, const char type, CLONE_PARAMS *const param) 13978 { 13979 PerlIO *ret; 13980 13981 PERL_ARGS_ASSERT_FP_DUP; 13982 PERL_UNUSED_ARG(type); 13983 13984 if (!fp) 13985 return (PerlIO*)NULL; 13986 13987 /* look for it in the table first */ 13988 ret = (PerlIO*)ptr_table_fetch(PL_ptr_table, fp); 13989 if (ret) 13990 return ret; 13991 13992 /* create anew and remember what it is */ 13993 #ifdef __amigaos4__ 13994 ret = PerlIO_fdupopen(aTHX_ fp, param, PERLIO_DUP_CLONE|PERLIO_DUP_FD); 13995 #else 13996 ret = PerlIO_fdupopen(aTHX_ fp, param, PERLIO_DUP_CLONE); 13997 #endif 13998 ptr_table_store(PL_ptr_table, fp, ret); 13999 return ret; 14000 } 14001 14002 /* 14003 =for apidoc_section $io 14004 =for apidoc dirp_dup 14005 14006 Duplicate a directory handle, returning a pointer to the cloned object. 14007 14008 =cut 14009 */ 14010 14011 DIR * 14012 Perl_dirp_dup(pTHX_ DIR *const dp, CLONE_PARAMS *const param) 14013 { 14014 DIR *ret; 14015 14016 #if defined(HAS_FCHDIR) && defined(HAS_TELLDIR) && defined(HAS_SEEKDIR) 14017 DIR *pwd; 14018 const Direntry_t *dirent; 14019 char smallbuf[256]; /* XXX MAXPATHLEN, surely? */ 14020 char *name = NULL; 14021 STRLEN len = 0; 14022 long pos; 14023 #endif 14024 14025 PERL_UNUSED_CONTEXT; 14026 PERL_ARGS_ASSERT_DIRP_DUP; 14027 14028 if (!dp) 14029 return (DIR*)NULL; 14030 14031 /* look for it in the table first */ 14032 ret = (DIR*)ptr_table_fetch(PL_ptr_table, dp); 14033 if (ret) 14034 return ret; 14035 14036 #if defined(HAS_FCHDIR) && defined(HAS_TELLDIR) && defined(HAS_SEEKDIR) 14037 14038 PERL_UNUSED_ARG(param); 14039 14040 /* create anew */ 14041 14042 /* open the current directory (so we can switch back) */ 14043 if (!(pwd = PerlDir_open("."))) return (DIR *)NULL; 14044 14045 /* chdir to our dir handle and open the present working directory */ 14046 if (fchdir(my_dirfd(dp)) < 0 || !(ret = PerlDir_open("."))) { 14047 PerlDir_close(pwd); 14048 return (DIR *)NULL; 14049 } 14050 /* Now we should have two dir handles pointing to the same dir. */ 14051 14052 /* Be nice to the calling code and chdir back to where we were. */ 14053 /* XXX If this fails, then what? */ 14054 PERL_UNUSED_RESULT(fchdir(my_dirfd(pwd))); 14055 14056 /* We have no need of the pwd handle any more. */ 14057 PerlDir_close(pwd); 14058 14059 #ifdef DIRNAMLEN 14060 # define d_namlen(d) (d)->d_namlen 14061 #else 14062 # define d_namlen(d) strlen((d)->d_name) 14063 #endif 14064 /* Iterate once through dp, to get the file name at the current posi- 14065 tion. Then step back. */ 14066 pos = PerlDir_tell(dp); 14067 if ((dirent = PerlDir_read(dp))) { 14068 len = d_namlen(dirent); 14069 if (len > sizeof(dirent->d_name) && sizeof(dirent->d_name) > PTRSIZE) { 14070 /* If the len is somehow magically longer than the 14071 * maximum length of the directory entry, even though 14072 * we could fit it in a buffer, we could not copy it 14073 * from the dirent. Bail out. */ 14074 PerlDir_close(ret); 14075 return (DIR*)NULL; 14076 } 14077 if (len <= sizeof smallbuf) name = smallbuf; 14078 else Newx(name, len, char); 14079 Move(dirent->d_name, name, len, char); 14080 } 14081 PerlDir_seek(dp, pos); 14082 14083 /* Iterate through the new dir handle, till we find a file with the 14084 right name. */ 14085 if (!dirent) /* just before the end */ 14086 for(;;) { 14087 pos = PerlDir_tell(ret); 14088 if (PerlDir_read(ret)) continue; /* not there yet */ 14089 PerlDir_seek(ret, pos); /* step back */ 14090 break; 14091 } 14092 else { 14093 const long pos0 = PerlDir_tell(ret); 14094 for(;;) { 14095 pos = PerlDir_tell(ret); 14096 if ((dirent = PerlDir_read(ret))) { 14097 if (len == (STRLEN)d_namlen(dirent) 14098 && memEQ(name, dirent->d_name, len)) { 14099 /* found it */ 14100 PerlDir_seek(ret, pos); /* step back */ 14101 break; 14102 } 14103 /* else we are not there yet; keep iterating */ 14104 } 14105 else { /* This is not meant to happen. The best we can do is 14106 reset the iterator to the beginning. */ 14107 PerlDir_seek(ret, pos0); 14108 break; 14109 } 14110 } 14111 } 14112 #undef d_namlen 14113 14114 if (name && name != smallbuf) 14115 Safefree(name); 14116 #endif 14117 14118 #ifdef WIN32 14119 ret = win32_dirp_dup(dp, param); 14120 #endif 14121 14122 /* pop it in the pointer table */ 14123 if (ret) 14124 ptr_table_store(PL_ptr_table, dp, ret); 14125 14126 return ret; 14127 } 14128 14129 /* 14130 =for apidoc_section $GV 14131 =for apidoc gp_dup 14132 14133 Duplicate a typeglob, returning a pointer to the cloned object. 14134 14135 =cut 14136 */ 14137 14138 GP * 14139 Perl_gp_dup(pTHX_ GP *const gp, CLONE_PARAMS *const param) 14140 { 14141 GP *ret; 14142 14143 PERL_ARGS_ASSERT_GP_DUP; 14144 14145 if (!gp) 14146 return (GP*)NULL; 14147 /* look for it in the table first */ 14148 ret = (GP*)ptr_table_fetch(PL_ptr_table, gp); 14149 if (ret) 14150 return ret; 14151 14152 /* create anew and remember what it is */ 14153 Newxz(ret, 1, GP); 14154 ptr_table_store(PL_ptr_table, gp, ret); 14155 14156 /* clone */ 14157 /* ret->gp_refcnt must be 0 before any other dups are called. We're relying 14158 on Newxz() to do this for us. */ 14159 ret->gp_sv = sv_dup_inc(gp->gp_sv, param); 14160 ret->gp_io = io_dup_inc(gp->gp_io, param); 14161 ret->gp_form = cv_dup_inc(gp->gp_form, param); 14162 ret->gp_av = av_dup_inc(gp->gp_av, param); 14163 ret->gp_hv = hv_dup_inc(gp->gp_hv, param); 14164 ret->gp_egv = gv_dup(gp->gp_egv, param);/* GvEGV is not refcounted */ 14165 ret->gp_cv = cv_dup_inc(gp->gp_cv, param); 14166 ret->gp_cvgen = gp->gp_cvgen; 14167 ret->gp_line = gp->gp_line; 14168 ret->gp_file_hek = hek_dup(gp->gp_file_hek, param); 14169 return ret; 14170 } 14171 14172 14173 /* 14174 =for apidoc_section $magic 14175 =for apidoc mg_dup 14176 14177 Duplicate a chain of magic, returning a pointer to the cloned object. 14178 14179 =cut 14180 */ 14181 14182 MAGIC * 14183 Perl_mg_dup(pTHX_ MAGIC *mg, CLONE_PARAMS *const param) 14184 { 14185 MAGIC *mgret = NULL; 14186 MAGIC **mgprev_p = &mgret; 14187 14188 PERL_ARGS_ASSERT_MG_DUP; 14189 14190 for (; mg; mg = mg->mg_moremagic) { 14191 MAGIC *nmg; 14192 14193 if ((param->flags & CLONEf_JOIN_IN) 14194 && mg->mg_type == PERL_MAGIC_backref) 14195 /* when joining, we let the individual SVs add themselves to 14196 * backref as needed. */ 14197 continue; 14198 14199 Newx(nmg, 1, MAGIC); 14200 *mgprev_p = nmg; 14201 mgprev_p = &(nmg->mg_moremagic); 14202 14203 /* There was a comment "XXX copy dynamic vtable?" but as we don't have 14204 dynamic vtables, I'm not sure why Sarathy wrote it. The comment dates 14205 from the original commit adding Perl_mg_dup() - revision 4538. 14206 Similarly there is the annotation "XXX random ptr?" next to the 14207 assignment to nmg->mg_ptr. */ 14208 *nmg = *mg; 14209 14210 /* FIXME for plugins 14211 if (nmg->mg_type == PERL_MAGIC_qr) { 14212 nmg->mg_obj = MUTABLE_SV(CALLREGDUPE((REGEXP*)nmg->mg_obj, param)); 14213 } 14214 else 14215 */ 14216 nmg->mg_obj = (nmg->mg_flags & MGf_REFCOUNTED) 14217 ? nmg->mg_type == PERL_MAGIC_backref 14218 /* The backref AV has its reference 14219 * count deliberately bumped by 1 */ 14220 ? SvREFCNT_inc(av_dup_inc((const AV *) 14221 nmg->mg_obj, param)) 14222 : sv_dup_inc(nmg->mg_obj, param) 14223 : (nmg->mg_type == PERL_MAGIC_regdatum || 14224 nmg->mg_type == PERL_MAGIC_regdata) 14225 ? nmg->mg_obj 14226 : sv_dup(nmg->mg_obj, param); 14227 14228 if (nmg->mg_ptr && nmg->mg_type != PERL_MAGIC_regex_global) { 14229 if (nmg->mg_len > 0) { 14230 nmg->mg_ptr = SAVEPVN(nmg->mg_ptr, nmg->mg_len); 14231 if (nmg->mg_type == PERL_MAGIC_overload_table && 14232 AMT_AMAGIC((AMT*)nmg->mg_ptr)) 14233 { 14234 AMT * const namtp = (AMT*)nmg->mg_ptr; 14235 sv_dup_inc_multiple((SV**)(namtp->table), 14236 (SV**)(namtp->table), NofAMmeth, param); 14237 } 14238 } 14239 else if (nmg->mg_len == HEf_SVKEY) 14240 nmg->mg_ptr = (char*)sv_dup_inc((const SV *)nmg->mg_ptr, param); 14241 } 14242 if ((nmg->mg_flags & MGf_DUP) && nmg->mg_virtual && nmg->mg_virtual->svt_dup) { 14243 nmg->mg_virtual->svt_dup(aTHX_ nmg, param); 14244 } 14245 } 14246 return mgret; 14247 } 14248 14249 #endif /* USE_ITHREADS */ 14250 14251 struct ptr_tbl_arena { 14252 struct ptr_tbl_arena *next; 14253 struct ptr_tbl_ent array[1023/3]; /* as ptr_tbl_ent has 3 pointers. */ 14254 }; 14255 14256 /* 14257 =for apidoc_section $embedding 14258 =for apidoc ptr_table_new 14259 14260 Create a new pointer-mapping table 14261 14262 =cut 14263 */ 14264 14265 PTR_TBL_t * 14266 Perl_ptr_table_new(pTHX) 14267 { 14268 PTR_TBL_t *tbl; 14269 PERL_UNUSED_CONTEXT; 14270 14271 Newx(tbl, 1, PTR_TBL_t); 14272 tbl->tbl_max = 511; 14273 tbl->tbl_items = 0; 14274 tbl->tbl_arena = NULL; 14275 tbl->tbl_arena_next = NULL; 14276 tbl->tbl_arena_end = NULL; 14277 Newxz(tbl->tbl_ary, tbl->tbl_max + 1, PTR_TBL_ENT_t*); 14278 return tbl; 14279 } 14280 14281 #define PTR_TABLE_HASH(ptr) \ 14282 ((PTR2UV(ptr) >> 3) ^ (PTR2UV(ptr) >> (3 + 7)) ^ (PTR2UV(ptr) >> (3 + 17))) 14283 14284 /* map an existing pointer using a table */ 14285 14286 STATIC PTR_TBL_ENT_t * 14287 S_ptr_table_find(PTR_TBL_t *const tbl, const void *const sv) 14288 { 14289 PTR_TBL_ENT_t *tblent; 14290 const UV hash = PTR_TABLE_HASH(sv); 14291 14292 PERL_ARGS_ASSERT_PTR_TABLE_FIND; 14293 14294 tblent = tbl->tbl_ary[hash & tbl->tbl_max]; 14295 for (; tblent; tblent = tblent->next) { 14296 if (tblent->oldval == sv) 14297 return tblent; 14298 } 14299 return NULL; 14300 } 14301 14302 /* 14303 =for apidoc ptr_table_fetch 14304 14305 Look for C<sv> in the pointer-mapping table C<tbl>, returning its value, or 14306 NULL if not found. 14307 14308 =cut 14309 */ 14310 14311 void * 14312 Perl_ptr_table_fetch(pTHX_ PTR_TBL_t *const tbl, const void *const sv) 14313 { 14314 PTR_TBL_ENT_t const *const tblent = ptr_table_find(tbl, sv); 14315 14316 PERL_ARGS_ASSERT_PTR_TABLE_FETCH; 14317 PERL_UNUSED_CONTEXT; 14318 14319 return tblent ? tblent->newval : NULL; 14320 } 14321 14322 /* 14323 =for apidoc ptr_table_store 14324 14325 Add a new entry to a pointer-mapping table C<tbl>. 14326 In hash terms, C<oldsv> is the key; Cnewsv> is the value. 14327 14328 The names "old" and "new" are specific to the core's typical use of ptr_tables 14329 in thread cloning. 14330 14331 =cut 14332 */ 14333 14334 void 14335 Perl_ptr_table_store(pTHX_ PTR_TBL_t *const tbl, const void *const oldsv, void *const newsv) 14336 { 14337 PTR_TBL_ENT_t *tblent = ptr_table_find(tbl, oldsv); 14338 14339 PERL_ARGS_ASSERT_PTR_TABLE_STORE; 14340 PERL_UNUSED_CONTEXT; 14341 14342 if (tblent) { 14343 tblent->newval = newsv; 14344 } else { 14345 const UV entry = PTR_TABLE_HASH(oldsv) & tbl->tbl_max; 14346 14347 if (tbl->tbl_arena_next == tbl->tbl_arena_end) { 14348 struct ptr_tbl_arena *new_arena; 14349 14350 Newx(new_arena, 1, struct ptr_tbl_arena); 14351 new_arena->next = tbl->tbl_arena; 14352 tbl->tbl_arena = new_arena; 14353 tbl->tbl_arena_next = new_arena->array; 14354 tbl->tbl_arena_end = C_ARRAY_END(new_arena->array); 14355 } 14356 14357 tblent = tbl->tbl_arena_next++; 14358 14359 tblent->oldval = oldsv; 14360 tblent->newval = newsv; 14361 tblent->next = tbl->tbl_ary[entry]; 14362 tbl->tbl_ary[entry] = tblent; 14363 tbl->tbl_items++; 14364 if (tblent->next && tbl->tbl_items > tbl->tbl_max) 14365 ptr_table_split(tbl); 14366 } 14367 } 14368 14369 /* 14370 =for apidoc ptr_table_split 14371 14372 Double the hash bucket size of an existing ptr table 14373 14374 =cut 14375 */ 14376 14377 void 14378 Perl_ptr_table_split(pTHX_ PTR_TBL_t *const tbl) 14379 { 14380 PTR_TBL_ENT_t **ary = tbl->tbl_ary; 14381 const UV oldsize = tbl->tbl_max + 1; 14382 UV newsize = oldsize * 2; 14383 UV i; 14384 14385 PERL_ARGS_ASSERT_PTR_TABLE_SPLIT; 14386 PERL_UNUSED_CONTEXT; 14387 14388 Renew(ary, newsize, PTR_TBL_ENT_t*); 14389 Zero(&ary[oldsize], newsize-oldsize, PTR_TBL_ENT_t*); 14390 tbl->tbl_max = --newsize; 14391 tbl->tbl_ary = ary; 14392 for (i=0; i < oldsize; i++, ary++) { 14393 PTR_TBL_ENT_t **entp = ary; 14394 PTR_TBL_ENT_t *ent = *ary; 14395 PTR_TBL_ENT_t **curentp; 14396 if (!ent) 14397 continue; 14398 curentp = ary + oldsize; 14399 do { 14400 if ((newsize & PTR_TABLE_HASH(ent->oldval)) != i) { 14401 *entp = ent->next; 14402 ent->next = *curentp; 14403 *curentp = ent; 14404 } 14405 else 14406 entp = &ent->next; 14407 ent = *entp; 14408 } while (ent); 14409 } 14410 } 14411 14412 /* 14413 =for apidoc ptr_table_free 14414 14415 Clear and free a ptr table 14416 14417 =cut 14418 */ 14419 14420 void 14421 Perl_ptr_table_free(pTHX_ PTR_TBL_t *const tbl) 14422 { 14423 struct ptr_tbl_arena *arena; 14424 14425 PERL_UNUSED_CONTEXT; 14426 14427 if (!tbl) { 14428 return; 14429 } 14430 14431 arena = tbl->tbl_arena; 14432 14433 while (arena) { 14434 struct ptr_tbl_arena *next = arena->next; 14435 14436 Safefree(arena); 14437 arena = next; 14438 } 14439 14440 Safefree(tbl->tbl_ary); 14441 Safefree(tbl); 14442 } 14443 14444 #if defined(USE_ITHREADS) 14445 14446 void 14447 Perl_rvpv_dup(pTHX_ SV *const dsv, const SV *const ssv, CLONE_PARAMS *const param) 14448 { 14449 PERL_ARGS_ASSERT_RVPV_DUP; 14450 14451 assert(!isREGEXP(ssv)); 14452 if (SvROK(ssv)) { 14453 if (SvWEAKREF(ssv)) { 14454 SvRV_set(dsv, sv_dup(SvRV_const(ssv), param)); 14455 if (param->flags & CLONEf_JOIN_IN) { 14456 /* if joining, we add any back references individually rather 14457 * than copying the whole backref array */ 14458 Perl_sv_add_backref(aTHX_ SvRV(dsv), dsv); 14459 } 14460 } 14461 else 14462 SvRV_set(dsv, sv_dup_inc(SvRV_const(ssv), param)); 14463 } 14464 else if (SvPVX_const(ssv)) { 14465 /* Has something there */ 14466 if (SvLEN(ssv)) { 14467 /* Normal PV - clone whole allocated space */ 14468 SvPV_set(dsv, SAVEPVN(SvPVX_const(ssv), SvLEN(ssv)-1)); 14469 /* ssv may not be that normal, but actually copy on write. 14470 But we are a true, independent SV, so: */ 14471 SvIsCOW_off(dsv); 14472 } 14473 else { 14474 /* Special case - not normally malloced for some reason */ 14475 if (isGV_with_GP(ssv)) { 14476 /* Don't need to do anything here. */ 14477 } 14478 else if ((SvIsCOW_shared_hash(ssv))) { 14479 /* A "shared" PV - clone it as "shared" PV */ 14480 SvPV_set(dsv, 14481 HEK_KEY(hek_dup(SvSHARED_HEK_FROM_PV(SvPVX_const(ssv)), 14482 param))); 14483 } 14484 else { 14485 /* Some other special case - random pointer */ 14486 SvPV_set(dsv, (char *) SvPVX_const(ssv)); 14487 } 14488 } 14489 } 14490 else { 14491 /* Copy the NULL */ 14492 SvPV_set(dsv, NULL); 14493 } 14494 } 14495 14496 /* duplicate a list of SVs. source and dest may point to the same memory. */ 14497 static SV ** 14498 S_sv_dup_inc_multiple(pTHX_ SV *const *source, SV **dest, 14499 SSize_t items, CLONE_PARAMS *const param) 14500 { 14501 PERL_ARGS_ASSERT_SV_DUP_INC_MULTIPLE; 14502 14503 while (items-- > 0) { 14504 *dest++ = sv_dup_inc(*source++, param); 14505 } 14506 14507 return dest; 14508 } 14509 14510 /* duplicate the HvAUX of an HV */ 14511 static void 14512 S_sv_dup_hvaux(pTHX_ const SV *const ssv, SV *dsv, CLONE_PARAMS *const param) 14513 { 14514 PERL_ARGS_ASSERT_SV_DUP_HVAUX; 14515 14516 const struct xpvhv_aux * const saux = HvAUX(ssv); 14517 struct xpvhv_aux * const daux = HvAUX(dsv); 14518 /* This flag isn't copied. */ 14519 SvFLAGS(dsv) |= SVphv_HasAUX; 14520 14521 if (saux->xhv_name_count) { 14522 HEK ** const sname = saux->xhv_name_u.xhvnameu_names; 14523 const I32 count = saux->xhv_name_count < 0 14524 ? -saux->xhv_name_count 14525 : saux->xhv_name_count; 14526 HEK **shekp = sname + count; 14527 HEK **dhekp; 14528 Newx(daux->xhv_name_u.xhvnameu_names, count, HEK *); 14529 dhekp = daux->xhv_name_u.xhvnameu_names + count; 14530 while (shekp-- > sname) { 14531 dhekp--; 14532 *dhekp = hek_dup(*shekp, param); 14533 } 14534 } 14535 else { 14536 daux->xhv_name_u.xhvnameu_name = hek_dup(saux->xhv_name_u.xhvnameu_name, param); 14537 } 14538 daux->xhv_name_count = saux->xhv_name_count; 14539 14540 daux->xhv_aux_flags = saux->xhv_aux_flags; 14541 #ifdef PERL_HASH_RANDOMIZE_KEYS 14542 daux->xhv_rand = saux->xhv_rand; 14543 daux->xhv_last_rand = saux->xhv_last_rand; 14544 #endif 14545 daux->xhv_riter = saux->xhv_riter; 14546 daux->xhv_eiter = saux->xhv_eiter ? he_dup(saux->xhv_eiter, FALSE, param) : 0; 14547 /* backref array needs refcnt=2; see sv_add_backref */ 14548 daux->xhv_backreferences = 14549 (param->flags & CLONEf_JOIN_IN) 14550 /* when joining, we let the individual GVs and 14551 * CVs add themselves to backref as 14552 * needed. This avoids pulling in stuff 14553 * that isn't required, and simplifies the 14554 * case where stashes aren't cloned back 14555 * if they already exist in the parent 14556 * thread */ 14557 ? NULL 14558 : saux->xhv_backreferences 14559 ? (SvTYPE(saux->xhv_backreferences) == SVt_PVAV) 14560 ? MUTABLE_AV(SvREFCNT_inc( 14561 sv_dup_inc((const SV *) 14562 saux->xhv_backreferences, param))) 14563 : MUTABLE_AV(sv_dup((const SV *) 14564 saux->xhv_backreferences, param)) 14565 : 0; 14566 14567 daux->xhv_mro_meta = saux->xhv_mro_meta 14568 ? mro_meta_dup(saux->xhv_mro_meta, param) 14569 : 0; 14570 14571 /* Record stashes for possible cloning in Perl_clone(). */ 14572 if (HvNAME(ssv)) 14573 av_push(param->stashes, dsv); 14574 14575 if (HvSTASH_IS_CLASS(ssv)) { 14576 daux->xhv_class_superclass = hv_dup_inc(saux->xhv_class_superclass, param); 14577 daux->xhv_class_initfields_cv = cv_dup_inc(saux->xhv_class_initfields_cv, param); 14578 daux->xhv_class_adjust_blocks = av_dup_inc(saux->xhv_class_adjust_blocks, param); 14579 daux->xhv_class_fields = padnamelist_dup_inc(saux->xhv_class_fields, param); 14580 daux->xhv_class_next_fieldix = saux->xhv_class_next_fieldix; 14581 daux->xhv_class_param_map = hv_dup_inc(saux->xhv_class_param_map, param); 14582 14583 /* TODO: This does mean that we can't compile more `field` expressions 14584 * in the cloned thread, but surely we're done with compiletime now..? 14585 */ 14586 daux->xhv_class_suspended_initfields_compcv = NULL; 14587 } 14588 } 14589 14590 /* duplicate an SV of any type (including AV, HV etc) */ 14591 14592 static SV * 14593 S_sv_dup_common(pTHX_ const SV *const ssv, CLONE_PARAMS *const param) 14594 { 14595 SV *dsv; 14596 14597 PERL_ARGS_ASSERT_SV_DUP_COMMON; 14598 14599 if (SvIS_FREED(ssv)) { 14600 #ifdef DEBUG_LEAKING_SCALARS_ABORT 14601 abort(); 14602 #endif 14603 return NULL; 14604 } 14605 /* look for it in the table first */ 14606 dsv = MUTABLE_SV(ptr_table_fetch(PL_ptr_table, ssv)); 14607 if (dsv) 14608 return dsv; 14609 14610 if(param->flags & CLONEf_JOIN_IN) { 14611 /** We are joining here so we don't want do clone 14612 something that is bad **/ 14613 if (SvTYPE(ssv) == SVt_PVHV) { 14614 const HEK * const hvname = HvNAME_HEK(ssv); 14615 if (hvname) { 14616 /** don't clone stashes if they already exist **/ 14617 dsv = MUTABLE_SV(gv_stashpvn(HEK_KEY(hvname), HEK_LEN(hvname), 14618 HEK_UTF8(hvname) ? SVf_UTF8 : 0)); 14619 ptr_table_store(PL_ptr_table, ssv, dsv); 14620 return dsv; 14621 } 14622 } 14623 else if (SvTYPE(ssv) == SVt_PVGV && !SvFAKE(ssv)) { 14624 HV *stash = GvSTASH(ssv); 14625 const HEK * hvname; 14626 if (stash && (hvname = HvNAME_HEK(stash))) { 14627 /** don't clone GVs if they already exist **/ 14628 SV **svp; 14629 stash = gv_stashpvn(HEK_KEY(hvname), HEK_LEN(hvname), 14630 HEK_UTF8(hvname) ? SVf_UTF8 : 0); 14631 svp = hv_fetch( 14632 stash, GvNAME(ssv), 14633 GvNAMEUTF8(ssv) 14634 ? -GvNAMELEN(ssv) 14635 : GvNAMELEN(ssv), 14636 0 14637 ); 14638 if (svp && *svp && SvTYPE(*svp) == SVt_PVGV) { 14639 ptr_table_store(PL_ptr_table, ssv, *svp); 14640 return *svp; 14641 } 14642 } 14643 } 14644 } 14645 14646 /* create anew and remember what it is */ 14647 new_SV(dsv); 14648 14649 #ifdef DEBUG_LEAKING_SCALARS 14650 dsv->sv_debug_optype = ssv->sv_debug_optype; 14651 dsv->sv_debug_line = ssv->sv_debug_line; 14652 dsv->sv_debug_inpad = ssv->sv_debug_inpad; 14653 dsv->sv_debug_parent = (SV*)ssv; 14654 FREE_SV_DEBUG_FILE(dsv); 14655 dsv->sv_debug_file = savesharedpv(ssv->sv_debug_file); 14656 #endif 14657 14658 ptr_table_store(PL_ptr_table, ssv, dsv); 14659 14660 /* clone */ 14661 SvFLAGS(dsv) = SvFLAGS(ssv); 14662 SvFLAGS(dsv) &= ~SVf_OOK; /* don't propagate OOK hack */ 14663 SvREFCNT(dsv) = 0; /* must be before any other dups! */ 14664 14665 #ifdef DEBUGGING 14666 if (SvANY(ssv) && PL_watch_pvx && SvPVX_const(ssv) == PL_watch_pvx) 14667 PerlIO_printf(Perl_debug_log, "watch at %p hit, found string \"%s\"\n", 14668 (void*)PL_watch_pvx, SvPVX_const(ssv)); 14669 #endif 14670 14671 /* don't clone objects whose class has asked us not to */ 14672 if (SvOBJECT(ssv) 14673 && ! (SvFLAGS(SvSTASH(ssv)) & SVphv_CLONEABLE)) 14674 { 14675 SvFLAGS(dsv) = 0; 14676 return dsv; 14677 } 14678 14679 switch (SvTYPE(ssv)) { 14680 case SVt_NULL: 14681 SvANY(dsv) = NULL; 14682 break; 14683 case SVt_IV: 14684 SET_SVANY_FOR_BODYLESS_IV(dsv); 14685 if(SvROK(ssv)) { 14686 Perl_rvpv_dup(aTHX_ dsv, ssv, param); 14687 } else { 14688 SvIV_set(dsv, SvIVX(ssv)); 14689 } 14690 break; 14691 case SVt_NV: 14692 #if NVSIZE <= IVSIZE 14693 SET_SVANY_FOR_BODYLESS_NV(dsv); 14694 #else 14695 SvANY(dsv) = new_XNV(); 14696 #endif 14697 SvNV_set(dsv, SvNVX(ssv)); 14698 break; 14699 default: 14700 { 14701 /* These are all the types that need complex bodies allocating. */ 14702 void *new_body; 14703 const svtype sv_type = SvTYPE(ssv); 14704 const struct body_details *sv_type_details 14705 = bodies_by_type + sv_type; 14706 14707 switch (sv_type) { 14708 default: 14709 Perl_croak(param->proto_perl, "Bizarre SvTYPE [%" IVdf "]", (IV)SvTYPE(ssv)); 14710 NOT_REACHED; /* NOTREACHED */ 14711 break; 14712 14713 case SVt_PVHV: 14714 if (HvHasAUX(ssv)) { 14715 sv_type_details = &fake_hv_with_aux; 14716 #ifdef PURIFY 14717 new_body = new_NOARENA(sv_type_details); 14718 #else 14719 new_body_from_arena(new_body, HVAUX_ARENA_ROOT_IX, fake_hv_with_aux); 14720 #endif 14721 goto have_body; 14722 } 14723 /* FALLTHROUGH */ 14724 case SVt_PVOBJ: 14725 case SVt_PVGV: 14726 case SVt_PVIO: 14727 case SVt_PVFM: 14728 case SVt_PVAV: 14729 case SVt_PVCV: 14730 case SVt_PVLV: 14731 case SVt_REGEXP: 14732 case SVt_PVMG: 14733 case SVt_PVNV: 14734 case SVt_PVIV: 14735 case SVt_INVLIST: 14736 case SVt_PV: 14737 assert(sv_type_details->body_size); 14738 #ifndef PURIFY 14739 if (sv_type_details->arena) { 14740 new_body = S_new_body(aTHX_ sv_type); 14741 new_body 14742 = (void*)((char*)new_body - sv_type_details->offset); 14743 } else 14744 #endif 14745 { 14746 new_body = new_NOARENA(sv_type_details); 14747 } 14748 } 14749 have_body: 14750 assert(new_body); 14751 SvANY(dsv) = new_body; 14752 14753 #ifndef PURIFY 14754 Copy(((char*)SvANY(ssv)) + sv_type_details->offset, 14755 ((char*)SvANY(dsv)) + sv_type_details->offset, 14756 sv_type_details->copy, char); 14757 #else 14758 Copy(((char*)SvANY(ssv)), 14759 ((char*)SvANY(dsv)), 14760 sv_type_details->body_size + sv_type_details->offset, char); 14761 #endif 14762 14763 if (sv_type != SVt_PVAV && sv_type != SVt_PVHV && sv_type != SVt_PVOBJ 14764 && !isGV_with_GP(dsv) 14765 && !isREGEXP(dsv) 14766 && !(sv_type == SVt_PVIO && !(IoFLAGS(dsv) & IOf_FAKE_DIRP))) 14767 Perl_rvpv_dup(aTHX_ dsv, ssv, param); 14768 14769 /* The Copy above means that all the source (unduplicated) pointers 14770 are now in the destination. We can check the flags and the 14771 pointers in either, but it's possible that there's less cache 14772 missing by always going for the destination. 14773 FIXME - instrument and check that assumption */ 14774 if (sv_type >= SVt_PVMG) { 14775 if (SvMAGIC(dsv)) 14776 SvMAGIC_set(dsv, mg_dup(SvMAGIC(dsv), param)); 14777 if (SvOBJECT(dsv) && SvSTASH(dsv)) 14778 SvSTASH_set(dsv, hv_dup_inc(SvSTASH(dsv), param)); 14779 else SvSTASH_set(dsv, 0); /* don't copy DESTROY cache */ 14780 } 14781 14782 /* The cast silences a GCC warning about unhandled types. */ 14783 switch ((int)sv_type) { 14784 case SVt_PV: 14785 break; 14786 case SVt_PVIV: 14787 break; 14788 case SVt_PVNV: 14789 break; 14790 case SVt_PVMG: 14791 break; 14792 case SVt_REGEXP: 14793 duprex: 14794 /* FIXME for plugins */ 14795 re_dup_guts((REGEXP*) ssv, (REGEXP*) dsv, param); 14796 break; 14797 case SVt_PVLV: 14798 /* XXX LvTARGOFF sometimes holds PMOP* when DEBUGGING */ 14799 if (LvTYPE(dsv) == 't') /* for tie: unrefcnted fake (SV**) */ 14800 LvTARG(dsv) = dsv; 14801 else if (LvTYPE(dsv) == 'T') /* for tie: fake HE */ 14802 LvTARG(dsv) = MUTABLE_SV(he_dup((HE*)LvTARG(dsv), FALSE, param)); 14803 else 14804 LvTARG(dsv) = sv_dup_inc(LvTARG(dsv), param); 14805 if (isREGEXP(ssv)) goto duprex; 14806 /* FALLTHROUGH */ 14807 case SVt_PVGV: 14808 /* non-GP case already handled above */ 14809 if(isGV_with_GP(ssv)) { 14810 GvNAME_HEK(dsv) = hek_dup(GvNAME_HEK(dsv), param); 14811 /* Don't call sv_add_backref here as it's going to be 14812 created as part of the magic cloning of the symbol 14813 table--unless this is during a join and the stash 14814 is not actually being cloned. */ 14815 /* Danger Will Robinson - GvGP(dsv) isn't initialised 14816 at the point of this comment. */ 14817 GvSTASH(dsv) = hv_dup(GvSTASH(dsv), param); 14818 if (param->flags & CLONEf_JOIN_IN) 14819 Perl_sv_add_backref(aTHX_ MUTABLE_SV(GvSTASH(dsv)), dsv); 14820 GvGP_set(dsv, gp_dup(GvGP(ssv), param)); 14821 (void)GpREFCNT_inc(GvGP(dsv)); 14822 } 14823 break; 14824 case SVt_PVIO: 14825 /* PL_parser->rsfp_filters entries have fake IoDIRP() */ 14826 if(IoFLAGS(dsv) & IOf_FAKE_DIRP) { 14827 /* I have no idea why fake dirp (rsfps) 14828 should be treated differently but otherwise 14829 we end up with leaks -- sky*/ 14830 IoTOP_GV(dsv) = gv_dup_inc(IoTOP_GV(dsv), param); 14831 IoFMT_GV(dsv) = gv_dup_inc(IoFMT_GV(dsv), param); 14832 IoBOTTOM_GV(dsv) = gv_dup_inc(IoBOTTOM_GV(dsv), param); 14833 } else { 14834 IoTOP_GV(dsv) = gv_dup(IoTOP_GV(dsv), param); 14835 IoFMT_GV(dsv) = gv_dup(IoFMT_GV(dsv), param); 14836 IoBOTTOM_GV(dsv) = gv_dup(IoBOTTOM_GV(dsv), param); 14837 if (IoDIRP(dsv)) { 14838 IoDIRP(dsv) = dirp_dup(IoDIRP(dsv), param); 14839 } else { 14840 NOOP; 14841 /* IoDIRP(dsv) is already a copy of IoDIRP(ssv) */ 14842 } 14843 IoIFP(dsv) = fp_dup(IoIFP(ssv), IoTYPE(dsv), param); 14844 } 14845 if (IoOFP(dsv) == IoIFP(ssv)) 14846 IoOFP(dsv) = IoIFP(dsv); 14847 else 14848 IoOFP(dsv) = fp_dup(IoOFP(dsv), IoTYPE(dsv), param); 14849 IoTOP_NAME(dsv) = SAVEPV(IoTOP_NAME(dsv)); 14850 IoFMT_NAME(dsv) = SAVEPV(IoFMT_NAME(dsv)); 14851 IoBOTTOM_NAME(dsv) = SAVEPV(IoBOTTOM_NAME(dsv)); 14852 break; 14853 case SVt_PVAV: 14854 /* avoid cloning an empty array */ 14855 if (AvARRAY((const AV *)ssv) && AvFILLp((const AV *)ssv) >= 0) { 14856 SV **dst_ary, **src_ary; 14857 SSize_t items = AvFILLp((const AV *)ssv) + 1; 14858 14859 src_ary = AvARRAY((const AV *)ssv); 14860 Newx(dst_ary, AvMAX((const AV *)ssv)+1, SV*); 14861 ptr_table_store(PL_ptr_table, src_ary, dst_ary); 14862 AvARRAY(MUTABLE_AV(dsv)) = dst_ary; 14863 AvALLOC((const AV *)dsv) = dst_ary; 14864 if (AvREAL((const AV *)ssv)) { 14865 dst_ary = sv_dup_inc_multiple(src_ary, dst_ary, items, 14866 param); 14867 } 14868 else { 14869 while (items-- > 0) 14870 *dst_ary++ = sv_dup(*src_ary++, param); 14871 } 14872 items = AvMAX((const AV *)ssv) - AvFILLp((const AV *)ssv); 14873 while (items-- > 0) { 14874 *dst_ary++ = NULL; 14875 } 14876 } 14877 else { 14878 AvARRAY(MUTABLE_AV(dsv)) = NULL; 14879 AvALLOC((const AV *)dsv) = (SV**)NULL; 14880 AvMAX( (const AV *)dsv) = -1; 14881 AvFILLp((const AV *)dsv) = -1; 14882 } 14883 break; 14884 case SVt_PVHV: 14885 if (HvARRAY((const HV *)ssv)) { 14886 STRLEN i = 0; 14887 XPVHV * const dxhv = (XPVHV*)SvANY(dsv); 14888 XPVHV * const sxhv = (XPVHV*)SvANY(ssv); 14889 char *darray; 14890 Newx(darray, PERL_HV_ARRAY_ALLOC_BYTES(dxhv->xhv_max+1), 14891 char); 14892 HvARRAY(dsv) = (HE**)darray; 14893 while (i <= sxhv->xhv_max) { 14894 const HE * const source = HvARRAY(ssv)[i]; 14895 HvARRAY(dsv)[i] = source 14896 ? he_dup(source, FALSE, param) : 0; 14897 ++i; 14898 } 14899 if (HvHasAUX(ssv)) 14900 sv_dup_hvaux(ssv, dsv, param); 14901 } 14902 else 14903 HvARRAY(MUTABLE_HV(dsv)) = NULL; 14904 break; 14905 case SVt_PVCV: 14906 if (!(param->flags & CLONEf_COPY_STACKS)) { 14907 CvDEPTH(dsv) = 0; 14908 } 14909 /* FALLTHROUGH */ 14910 case SVt_PVFM: 14911 /* NOTE: not refcounted */ 14912 SvANY(MUTABLE_CV(dsv))->xcv_stash = 14913 hv_dup(CvSTASH(dsv), param); 14914 if ((param->flags & CLONEf_JOIN_IN) && CvSTASH(dsv)) 14915 Perl_sv_add_backref(aTHX_ MUTABLE_SV(CvSTASH(dsv)), dsv); 14916 if (!CvISXSUB(dsv)) { 14917 OP_REFCNT_LOCK; 14918 CvROOT(dsv) = OpREFCNT_inc(CvROOT(dsv)); 14919 OP_REFCNT_UNLOCK; 14920 CvSLABBED_off(dsv); 14921 } else if (CvCONST(dsv)) { 14922 CvXSUBANY(dsv).any_ptr = 14923 sv_dup_inc((const SV *)CvXSUBANY(dsv).any_ptr, param); 14924 } else if (CvREFCOUNTED_ANYSV(dsv)) { 14925 CvXSUBANY(dsv).any_sv = 14926 sv_dup_inc((const SV *)CvXSUBANY(dsv).any_sv, param); 14927 } 14928 assert(!CvSLABBED(dsv)); 14929 if (CvDYNFILE(dsv)) CvFILE(dsv) = SAVEPV(CvFILE(dsv)); 14930 if (CvNAMED(dsv)) 14931 SvANY((CV *)dsv)->xcv_gv_u.xcv_hek = 14932 hek_dup(CvNAME_HEK((CV *)ssv), param); 14933 /* don't dup if copying back - CvGV isn't refcounted, so the 14934 * duped GV may never be freed. A bit of a hack! DAPM */ 14935 else 14936 SvANY(MUTABLE_CV(dsv))->xcv_gv_u.xcv_gv = 14937 CvCVGV_RC(dsv) 14938 ? gv_dup_inc(CvGV(ssv), param) 14939 : (param->flags & CLONEf_JOIN_IN) 14940 ? NULL 14941 : gv_dup(CvGV(ssv), param); 14942 14943 if (!CvISXSUB(ssv)) { 14944 PADLIST * padlist = CvPADLIST(ssv); 14945 if(padlist) 14946 padlist = padlist_dup(padlist, param); 14947 CvPADLIST_set(dsv, padlist); 14948 } else 14949 /* unthreaded perl can't sv_dup so we don't support unthreaded's CvHSCXT */ 14950 PoisonPADLIST(dsv); 14951 14952 CvOUTSIDE(dsv) = 14953 CvWEAKOUTSIDE(ssv) 14954 ? cv_dup( CvOUTSIDE(dsv), param) 14955 : cv_dup_inc(CvOUTSIDE(dsv), param); 14956 break; 14957 case SVt_PVOBJ: 14958 { 14959 Size_t fieldcount = ObjectMAXFIELD(ssv) + 1; 14960 14961 Newx(ObjectFIELDS(dsv), fieldcount, SV *); 14962 ObjectMAXFIELD(dsv) = fieldcount - 1; 14963 14964 sv_dup_inc_multiple(ObjectFIELDS(ssv), ObjectFIELDS(dsv), fieldcount, param); 14965 } 14966 break; 14967 } 14968 } 14969 } 14970 14971 return dsv; 14972 } 14973 14974 SV * 14975 Perl_sv_dup_inc(pTHX_ const SV *const ssv, CLONE_PARAMS *const param) 14976 { 14977 PERL_ARGS_ASSERT_SV_DUP_INC; 14978 return ssv ? SvREFCNT_inc(sv_dup_common(ssv, param)) : NULL; 14979 } 14980 14981 SV * 14982 Perl_sv_dup(pTHX_ const SV *const ssv, CLONE_PARAMS *const param) 14983 { 14984 SV *dsv = ssv ? sv_dup_common(ssv, param) : NULL; 14985 PERL_ARGS_ASSERT_SV_DUP; 14986 14987 /* Track every SV that (at least initially) had a reference count of 0. 14988 We need to do this by holding an actual reference to it in this array. 14989 If we attempt to cheat, turn AvREAL_off(), and store only pointers 14990 (akin to the stashes hash, and the perl stack), we come unstuck if 14991 a weak reference (or other SV legitimately SvREFCNT() == 0 for this 14992 thread) is manipulated in a CLONE method, because CLONE runs before the 14993 unreferenced array is walked to find SVs still with SvREFCNT() == 0 14994 (and fix things up by giving each a reference via the temps stack). 14995 Instead, during CLONE, if the 0-referenced SV has SvREFCNT_inc() and 14996 then SvREFCNT_dec(), it will be cleaned up (and added to the free list) 14997 before the walk of unreferenced happens and a reference to that is SV 14998 added to the temps stack. At which point we have the same SV considered 14999 to be in use, and free to be re-used. Not good. 15000 */ 15001 if (dsv && !(param->flags & CLONEf_COPY_STACKS) && !SvREFCNT(dsv)) { 15002 assert(param->unreferenced); 15003 av_push(param->unreferenced, SvREFCNT_inc(dsv)); 15004 } 15005 15006 return dsv; 15007 } 15008 15009 /* duplicate a context */ 15010 15011 PERL_CONTEXT * 15012 Perl_cx_dup(pTHX_ PERL_CONTEXT *cxs, I32 ix, I32 max, CLONE_PARAMS* param) 15013 { 15014 PERL_CONTEXT *ncxs; 15015 15016 PERL_ARGS_ASSERT_CX_DUP; 15017 15018 if (!cxs) 15019 return (PERL_CONTEXT*)NULL; 15020 15021 /* look for it in the table first */ 15022 ncxs = (PERL_CONTEXT*)ptr_table_fetch(PL_ptr_table, cxs); 15023 if (ncxs) 15024 return ncxs; 15025 15026 /* create anew and remember what it is */ 15027 Newx(ncxs, max + 1, PERL_CONTEXT); 15028 ptr_table_store(PL_ptr_table, cxs, ncxs); 15029 Copy(cxs, ncxs, max + 1, PERL_CONTEXT); 15030 15031 while (ix >= 0) { 15032 PERL_CONTEXT * const ncx = &ncxs[ix]; 15033 if (CxTYPE(ncx) == CXt_SUBST) { 15034 Perl_croak(aTHX_ "Cloning substitution context is unimplemented"); 15035 } 15036 else { 15037 ncx->blk_oldcop = (COP*)any_dup(ncx->blk_oldcop, param->proto_perl); 15038 switch (CxTYPE(ncx)) { 15039 case CXt_SUB: 15040 ncx->blk_sub.cv = cv_dup_inc(ncx->blk_sub.cv, param); 15041 if(CxHASARGS(ncx)){ 15042 ncx->blk_sub.savearray = av_dup_inc(ncx->blk_sub.savearray,param); 15043 } else { 15044 ncx->blk_sub.savearray = NULL; 15045 } 15046 ncx->blk_sub.prevcomppad = (PAD*)ptr_table_fetch(PL_ptr_table, 15047 ncx->blk_sub.prevcomppad); 15048 break; 15049 case CXt_EVAL: 15050 ncx->blk_eval.old_namesv = sv_dup_inc(ncx->blk_eval.old_namesv, 15051 param); 15052 /* XXX should this sv_dup_inc? Or only if CxEVAL_TXT_REFCNTED ???? */ 15053 ncx->blk_eval.cur_text = sv_dup(ncx->blk_eval.cur_text, param); 15054 ncx->blk_eval.cv = cv_dup(ncx->blk_eval.cv, param); 15055 /* XXX what to do with cur_top_env ???? */ 15056 break; 15057 case CXt_LOOP_LAZYSV: 15058 ncx->blk_loop.state_u.lazysv.end 15059 = sv_dup_inc(ncx->blk_loop.state_u.lazysv.end, param); 15060 /* Fallthrough: duplicate lazysv.cur by using the ary.ary 15061 duplication code instead. 15062 We are taking advantage of (1) av_dup_inc and sv_dup_inc 15063 actually being the same function, and (2) order 15064 equivalence of the two unions. 15065 We can assert the later [but only at run time :-(] */ 15066 assert ((void *) &ncx->blk_loop.state_u.ary.ary == 15067 (void *) &ncx->blk_loop.state_u.lazysv.cur); 15068 /* FALLTHROUGH */ 15069 case CXt_LOOP_ARY: 15070 ncx->blk_loop.state_u.ary.ary 15071 = av_dup_inc(ncx->blk_loop.state_u.ary.ary, param); 15072 /* FALLTHROUGH */ 15073 case CXt_LOOP_LIST: 15074 case CXt_LOOP_LAZYIV: 15075 /* code common to all 'for' CXt_LOOP_* types */ 15076 ncx->blk_loop.itersave = 15077 sv_dup_inc(ncx->blk_loop.itersave, param); 15078 if (CxPADLOOP(ncx)) { 15079 PADOFFSET off = ncx->blk_loop.itervar_u.svp 15080 - &CX_CURPAD_SV(ncx->blk_loop, 0); 15081 ncx->blk_loop.oldcomppad = 15082 (PAD*)ptr_table_fetch(PL_ptr_table, 15083 ncx->blk_loop.oldcomppad); 15084 ncx->blk_loop.itervar_u.svp = 15085 &CX_CURPAD_SV(ncx->blk_loop, off); 15086 } 15087 else { 15088 /* this copies the GV if CXp_FOR_GV, or the SV for an 15089 * alias (for \$x (...)) - relies on gv_dup being the 15090 * same as sv_dup */ 15091 ncx->blk_loop.itervar_u.gv 15092 = gv_dup((const GV *)ncx->blk_loop.itervar_u.gv, 15093 param); 15094 } 15095 break; 15096 case CXt_LOOP_PLAIN: 15097 break; 15098 case CXt_FORMAT: 15099 ncx->blk_format.prevcomppad = 15100 (PAD*)ptr_table_fetch(PL_ptr_table, 15101 ncx->blk_format.prevcomppad); 15102 ncx->blk_format.cv = cv_dup_inc(ncx->blk_format.cv, param); 15103 ncx->blk_format.gv = gv_dup(ncx->blk_format.gv, param); 15104 ncx->blk_format.dfoutgv = gv_dup_inc(ncx->blk_format.dfoutgv, 15105 param); 15106 break; 15107 case CXt_GIVEN: 15108 ncx->blk_givwhen.defsv_save = 15109 sv_dup_inc(ncx->blk_givwhen.defsv_save, param); 15110 break; 15111 case CXt_BLOCK: 15112 case CXt_NULL: 15113 case CXt_WHEN: 15114 case CXt_DEFER: 15115 break; 15116 } 15117 } 15118 --ix; 15119 } 15120 return ncxs; 15121 } 15122 15123 /* 15124 =for apidoc si_dup 15125 15126 Duplicate a stack info structure, returning a pointer to the cloned object. 15127 15128 =cut 15129 */ 15130 15131 PERL_SI * 15132 Perl_si_dup(pTHX_ PERL_SI *si, CLONE_PARAMS* param) 15133 { 15134 PERL_SI *nsi; 15135 15136 PERL_ARGS_ASSERT_SI_DUP; 15137 15138 if (!si) 15139 return (PERL_SI*)NULL; 15140 15141 /* look for it in the table first */ 15142 nsi = (PERL_SI*)ptr_table_fetch(PL_ptr_table, si); 15143 if (nsi) 15144 return nsi; 15145 15146 /* create anew and remember what it is */ 15147 Newx(nsi, 1, PERL_SI); 15148 ptr_table_store(PL_ptr_table, si, nsi); 15149 15150 nsi->si_stack = av_dup_inc(si->si_stack, param); 15151 nsi->si_cxix = si->si_cxix; 15152 nsi->si_cxsubix = si->si_cxsubix; 15153 nsi->si_cxmax = si->si_cxmax; 15154 nsi->si_cxstack = cx_dup(si->si_cxstack, si->si_cxix, si->si_cxmax, param); 15155 nsi->si_type = si->si_type; 15156 nsi->si_prev = si_dup(si->si_prev, param); 15157 nsi->si_next = si_dup(si->si_next, param); 15158 nsi->si_markoff = si->si_markoff; 15159 #ifdef PERL_RC_STACK 15160 nsi->si_stack_nonrc_base = si->si_stack_nonrc_base; 15161 #endif 15162 #ifdef PERL_USE_HWM 15163 nsi->si_stack_hwm = 0; 15164 #endif 15165 15166 return nsi; 15167 } 15168 15169 #define POPINT(ss,ix) ((ss)[--(ix)].any_i32) 15170 #define TOPINT(ss,ix) ((ss)[ix].any_i32) 15171 #define POPLONG(ss,ix) ((ss)[--(ix)].any_long) 15172 #define TOPLONG(ss,ix) ((ss)[ix].any_long) 15173 #define POPIV(ss,ix) ((ss)[--(ix)].any_iv) 15174 #define TOPIV(ss,ix) ((ss)[ix].any_iv) 15175 #define POPUV(ss,ix) ((ss)[--(ix)].any_uv) 15176 #define TOPUV(ss,ix) ((ss)[ix].any_uv) 15177 #define POPBOOL(ss,ix) ((ss)[--(ix)].any_bool) 15178 #define TOPBOOL(ss,ix) ((ss)[ix].any_bool) 15179 #define POPPTR(ss,ix) ((ss)[--(ix)].any_ptr) 15180 #define TOPPTR(ss,ix) ((ss)[ix].any_ptr) 15181 #define POPDPTR(ss,ix) ((ss)[--(ix)].any_dptr) 15182 #define TOPDPTR(ss,ix) ((ss)[ix].any_dptr) 15183 #define POPDXPTR(ss,ix) ((ss)[--(ix)].any_dxptr) 15184 #define TOPDXPTR(ss,ix) ((ss)[ix].any_dxptr) 15185 15186 /* XXXXX todo */ 15187 #define pv_dup_inc(p) SAVEPV(p) 15188 #define pv_dup(p) SAVEPV(p) 15189 #define svp_dup_inc(p,pp) any_dup(p,pp) 15190 15191 /* map any object to the new equivalent - either something in the 15192 * ptr table, or something in the interpreter structure 15193 */ 15194 15195 void * 15196 Perl_any_dup(pTHX_ void *v, const PerlInterpreter *proto_perl) 15197 { 15198 void *ret; 15199 15200 PERL_ARGS_ASSERT_ANY_DUP; 15201 15202 if (!v) 15203 return (void*)NULL; 15204 15205 /* look for it in the table first */ 15206 ret = ptr_table_fetch(PL_ptr_table, v); 15207 if (ret) 15208 return ret; 15209 15210 /* see if it is part of the interpreter structure */ 15211 if (v >= (void*)proto_perl && v < (void*)(proto_perl+1)) 15212 ret = (void*)(((char*)aTHX) + (((char*)v) - (char*)proto_perl)); 15213 else { 15214 ret = v; 15215 } 15216 15217 return ret; 15218 } 15219 15220 /* 15221 =for apidoc ss_dup 15222 15223 Duplicate the save stack, returning a pointer to the cloned object. 15224 15225 =cut 15226 */ 15227 15228 ANY * 15229 Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param) 15230 { 15231 ANY * const ss = proto_perl->Isavestack; 15232 const I32 max = proto_perl->Isavestack_max + SS_MAXPUSH; 15233 I32 ix = proto_perl->Isavestack_ix; 15234 ANY *nss; 15235 const SV *sv; 15236 const GV *gv; 15237 const AV *av; 15238 const HV *hv; 15239 char *pv; /* no const deliberately */ 15240 void* ptr; 15241 int intval; 15242 long longval; 15243 GP *gp; 15244 IV iv; 15245 I32 i; 15246 char *c = NULL; 15247 void (*dptr) (void*); 15248 void (*dxptr) (pTHX_ void*); 15249 15250 PERL_ARGS_ASSERT_SS_DUP; 15251 15252 Newx(nss, max, ANY); 15253 15254 while (ix > 0) { 15255 const UV uv = POPUV(ss,ix); 15256 const U8 type = (U8)uv & SAVE_MASK; 15257 15258 TOPUV(nss,ix) = uv; 15259 switch (type) { 15260 case SAVEt_CLEARSV: 15261 case SAVEt_CLEARPADRANGE: 15262 break; 15263 case SAVEt_HELEM: /* hash element */ 15264 case SAVEt_SV: /* scalar reference */ 15265 sv = (const SV *)POPPTR(ss,ix); 15266 TOPPTR(nss,ix) = SvREFCNT_inc(sv_dup_inc(sv, param)); 15267 /* FALLTHROUGH */ 15268 case SAVEt_ITEM: /* normal string */ 15269 case SAVEt_GVSV: /* scalar slot in GV */ 15270 sv = (const SV *)POPPTR(ss,ix); 15271 TOPPTR(nss,ix) = sv_dup_inc(sv, param); 15272 if (type == SAVEt_SV) 15273 break; 15274 /* FALLTHROUGH */ 15275 case SAVEt_FREESV: 15276 case SAVEt_MORTALIZESV: 15277 case SAVEt_READONLY_OFF: 15278 sv = (const SV *)POPPTR(ss,ix); 15279 TOPPTR(nss,ix) = sv_dup_inc(sv, param); 15280 break; 15281 case SAVEt_FREEPADNAME: 15282 ptr = POPPTR(ss,ix); 15283 TOPPTR(nss,ix) = padname_dup((PADNAME *)ptr, param); 15284 PadnameREFCNT((PADNAME *)TOPPTR(nss,ix))++; 15285 break; 15286 case SAVEt_SHARED_PVREF: /* char* in shared space */ 15287 c = (char*)POPPTR(ss,ix); 15288 TOPPTR(nss,ix) = savesharedpv(c); 15289 ptr = POPPTR(ss,ix); 15290 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 15291 break; 15292 case SAVEt_GENERIC_SVREF: /* generic sv */ 15293 case SAVEt_SVREF: /* scalar reference */ 15294 sv = (const SV *)POPPTR(ss,ix); 15295 TOPPTR(nss,ix) = sv_dup_inc(sv, param); 15296 if (type == SAVEt_SVREF) 15297 SvREFCNT_inc_simple_void((SV *)TOPPTR(nss,ix)); 15298 ptr = POPPTR(ss,ix); 15299 TOPPTR(nss,ix) = svp_dup_inc((SV**)ptr, proto_perl);/* XXXXX */ 15300 /* this feels very strange, we have a **SV from one thread, 15301 * we copy the SV, but dont change the **SV. But in this thread 15302 * the target of the **SV could be something from the *other* thread. 15303 * So how can this possibly work correctly? */ 15304 break; 15305 case SAVEt_RCPV: 15306 pv = (char *)POPPTR(ss,ix); 15307 TOPPTR(nss,ix) = rcpv_copy(pv); 15308 ptr = POPPTR(ss,ix); 15309 (void)rcpv_copy(*((char **)ptr)); 15310 TOPPTR(nss,ix) = ptr; 15311 /* XXXXX: see comment above. */ 15312 break; 15313 case SAVEt_GVSLOT: /* any slot in GV */ 15314 sv = (const SV *)POPPTR(ss,ix); 15315 TOPPTR(nss,ix) = sv_dup_inc(sv, param); 15316 ptr = POPPTR(ss,ix); 15317 TOPPTR(nss,ix) = svp_dup_inc((SV**)ptr, proto_perl);/* XXXXX */ 15318 sv = (const SV *)POPPTR(ss,ix); 15319 TOPPTR(nss,ix) = sv_dup_inc(sv, param); 15320 break; 15321 case SAVEt_HV: /* hash reference */ 15322 case SAVEt_AV: /* array reference */ 15323 sv = (const SV *) POPPTR(ss,ix); 15324 TOPPTR(nss,ix) = sv_dup_inc(sv, param); 15325 /* FALLTHROUGH */ 15326 case SAVEt_COMPPAD: 15327 case SAVEt_NSTAB: 15328 sv = (const SV *) POPPTR(ss,ix); 15329 TOPPTR(nss,ix) = sv_dup(sv, param); 15330 break; 15331 case SAVEt_INT: /* int reference */ 15332 ptr = POPPTR(ss,ix); 15333 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 15334 intval = (int)POPINT(ss,ix); 15335 TOPINT(nss,ix) = intval; 15336 break; 15337 case SAVEt_LONG: /* long reference */ 15338 ptr = POPPTR(ss,ix); 15339 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 15340 longval = (long)POPLONG(ss,ix); 15341 TOPLONG(nss,ix) = longval; 15342 break; 15343 case SAVEt_I32: /* I32 reference */ 15344 ptr = POPPTR(ss,ix); 15345 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 15346 i = POPINT(ss,ix); 15347 TOPINT(nss,ix) = i; 15348 break; 15349 case SAVEt_IV: /* IV reference */ 15350 case SAVEt_STRLEN: /* STRLEN/size_t ref */ 15351 ptr = POPPTR(ss,ix); 15352 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 15353 iv = POPIV(ss,ix); 15354 TOPIV(nss,ix) = iv; 15355 break; 15356 case SAVEt_TMPSFLOOR: 15357 iv = POPIV(ss,ix); 15358 TOPIV(nss,ix) = iv; 15359 break; 15360 case SAVEt_HPTR: /* HV* reference */ 15361 case SAVEt_APTR: /* AV* reference */ 15362 case SAVEt_SPTR: /* SV* reference */ 15363 ptr = POPPTR(ss,ix); 15364 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 15365 sv = (const SV *)POPPTR(ss,ix); 15366 TOPPTR(nss,ix) = sv_dup(sv, param); 15367 break; 15368 case SAVEt_VPTR: /* random* reference */ 15369 ptr = POPPTR(ss,ix); 15370 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 15371 /* FALLTHROUGH */ 15372 case SAVEt_STRLEN_SMALL: 15373 case SAVEt_INT_SMALL: 15374 case SAVEt_I32_SMALL: 15375 case SAVEt_I16: /* I16 reference */ 15376 case SAVEt_I8: /* I8 reference */ 15377 case SAVEt_BOOL: 15378 ptr = POPPTR(ss,ix); 15379 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 15380 break; 15381 case SAVEt_GENERIC_PVREF: /* generic char* */ 15382 case SAVEt_PPTR: /* char* reference */ 15383 ptr = POPPTR(ss,ix); 15384 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 15385 c = (char*)POPPTR(ss,ix); 15386 TOPPTR(nss,ix) = pv_dup(c); 15387 break; 15388 case SAVEt_GP: /* scalar reference */ 15389 gp = (GP*)POPPTR(ss,ix); 15390 TOPPTR(nss,ix) = gp = gp_dup(gp, param); 15391 (void)GpREFCNT_inc(gp); 15392 gv = (const GV *)POPPTR(ss,ix); 15393 TOPPTR(nss,ix) = gv_dup_inc(gv, param); 15394 break; 15395 case SAVEt_FREEOP: 15396 ptr = POPPTR(ss,ix); 15397 if (ptr && (((OP*)ptr)->op_private & OPpREFCOUNTED)) { 15398 /* these are assumed to be refcounted properly */ 15399 OP *o; 15400 switch (((OP*)ptr)->op_type) { 15401 case OP_LEAVESUB: 15402 case OP_LEAVESUBLV: 15403 case OP_LEAVEEVAL: 15404 case OP_LEAVE: 15405 case OP_SCOPE: 15406 case OP_LEAVEWRITE: 15407 TOPPTR(nss,ix) = ptr; 15408 o = (OP*)ptr; 15409 OP_REFCNT_LOCK; 15410 (void) OpREFCNT_inc(o); 15411 OP_REFCNT_UNLOCK; 15412 break; 15413 default: 15414 TOPPTR(nss,ix) = NULL; 15415 break; 15416 } 15417 } 15418 else 15419 TOPPTR(nss,ix) = NULL; 15420 break; 15421 case SAVEt_FREECOPHH: 15422 ptr = POPPTR(ss,ix); 15423 TOPPTR(nss,ix) = cophh_copy((COPHH *)ptr); 15424 break; 15425 case SAVEt_ADELETE: 15426 av = (const AV *)POPPTR(ss,ix); 15427 TOPPTR(nss,ix) = av_dup_inc(av, param); 15428 i = POPINT(ss,ix); 15429 TOPINT(nss,ix) = i; 15430 break; 15431 case SAVEt_DELETE: 15432 hv = (const HV *)POPPTR(ss,ix); 15433 TOPPTR(nss,ix) = hv_dup_inc(hv, param); 15434 i = POPINT(ss,ix); 15435 TOPINT(nss,ix) = i; 15436 /* FALLTHROUGH */ 15437 case SAVEt_FREEPV: 15438 c = (char*)POPPTR(ss,ix); 15439 TOPPTR(nss,ix) = pv_dup_inc(c); 15440 break; 15441 case SAVEt_FREERCPV: 15442 c = (char *)POPPTR(ss,ix); 15443 TOPPTR(nss,ix) = rcpv_copy(c); 15444 break; 15445 case SAVEt_STACK_POS: /* Position on Perl stack */ 15446 i = POPINT(ss,ix); 15447 TOPINT(nss,ix) = i; 15448 break; 15449 case SAVEt_DESTRUCTOR: 15450 ptr = POPPTR(ss,ix); 15451 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); /* XXX quite arbitrary */ 15452 dptr = POPDPTR(ss,ix); 15453 TOPDPTR(nss,ix) = DPTR2FPTR(void (*)(void*), 15454 any_dup(FPTR2DPTR(void *, dptr), 15455 proto_perl)); 15456 break; 15457 case SAVEt_DESTRUCTOR_X: 15458 ptr = POPPTR(ss,ix); 15459 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); /* XXX quite arbitrary */ 15460 dxptr = POPDXPTR(ss,ix); 15461 TOPDXPTR(nss,ix) = DPTR2FPTR(void (*)(pTHX_ void*), 15462 any_dup(FPTR2DPTR(void *, dxptr), 15463 proto_perl)); 15464 break; 15465 case SAVEt_REGCONTEXT: 15466 case SAVEt_ALLOC: 15467 ix -= uv >> SAVE_TIGHT_SHIFT; 15468 break; 15469 case SAVEt_AELEM: /* array element */ 15470 sv = (const SV *)POPPTR(ss,ix); 15471 TOPPTR(nss,ix) = SvREFCNT_inc(sv_dup_inc(sv, param)); 15472 iv = POPIV(ss,ix); 15473 TOPIV(nss,ix) = iv; 15474 av = (const AV *)POPPTR(ss,ix); 15475 TOPPTR(nss,ix) = av_dup_inc(av, param); 15476 break; 15477 case SAVEt_OP: 15478 ptr = POPPTR(ss,ix); 15479 TOPPTR(nss,ix) = ptr; 15480 break; 15481 case SAVEt_HINTS_HH: 15482 hv = (const HV *)POPPTR(ss,ix); 15483 TOPPTR(nss,ix) = hv_dup_inc(hv, param); 15484 /* FALLTHROUGH */ 15485 case SAVEt_HINTS: 15486 ptr = POPPTR(ss,ix); 15487 ptr = cophh_copy((COPHH*)ptr); 15488 TOPPTR(nss,ix) = ptr; 15489 i = POPINT(ss,ix); 15490 TOPINT(nss,ix) = i; 15491 break; 15492 case SAVEt_PADSV_AND_MORTALIZE: 15493 longval = (long)POPLONG(ss,ix); 15494 TOPLONG(nss,ix) = longval; 15495 ptr = POPPTR(ss,ix); 15496 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); 15497 sv = (const SV *)POPPTR(ss,ix); 15498 TOPPTR(nss,ix) = sv_dup_inc(sv, param); 15499 break; 15500 case SAVEt_SET_SVFLAGS: 15501 i = POPINT(ss,ix); 15502 TOPINT(nss,ix) = i; 15503 i = POPINT(ss,ix); 15504 TOPINT(nss,ix) = i; 15505 sv = (const SV *)POPPTR(ss,ix); 15506 TOPPTR(nss,ix) = sv_dup(sv, param); 15507 break; 15508 case SAVEt_CURCOP_WARNINGS: 15509 /* FALLTHROUGH */ 15510 case SAVEt_COMPILE_WARNINGS: 15511 ptr = POPPTR(ss,ix); 15512 TOPPTR(nss,ix) = DUP_WARNINGS((char*)ptr); 15513 break; 15514 case SAVEt_PARSER: 15515 ptr = POPPTR(ss,ix); 15516 TOPPTR(nss,ix) = parser_dup((const yy_parser*)ptr, param); 15517 break; 15518 default: 15519 Perl_croak(aTHX_ 15520 "panic: ss_dup inconsistency (%" IVdf ")", (IV) type); 15521 } 15522 } 15523 15524 return nss; 15525 } 15526 15527 15528 /* if sv is a stash, call $class->CLONE_SKIP(), and set the SVphv_CLONEABLE 15529 * flag to the result. This is done for each stash before cloning starts, 15530 * so we know which stashes want their objects cloned */ 15531 15532 static void 15533 do_mark_cloneable_stash(pTHX_ SV *const sv) 15534 { 15535 const HEK * const hvname = HvNAME_HEK((const HV *)sv); 15536 if (hvname) { 15537 GV* const cloner = gv_fetchmethod_autoload(MUTABLE_HV(sv), "CLONE_SKIP", 0); 15538 SvFLAGS(sv) |= SVphv_CLONEABLE; /* clone objects by default */ 15539 if (cloner && GvCV(cloner)) { 15540 dSP; 15541 UV status; 15542 15543 ENTER; 15544 SAVETMPS; 15545 PUSHMARK(SP); 15546 mXPUSHs(newSVhek(hvname)); 15547 PUTBACK; 15548 call_sv(MUTABLE_SV(GvCV(cloner)), G_SCALAR); 15549 SPAGAIN; 15550 status = POPu; 15551 PUTBACK; 15552 FREETMPS; 15553 LEAVE; 15554 if (status) 15555 SvFLAGS(sv) &= ~SVphv_CLONEABLE; 15556 } 15557 } 15558 } 15559 15560 15561 15562 /* 15563 =for apidoc perl_clone 15564 15565 Create and return a new interpreter by cloning the current one. 15566 15567 C<perl_clone> takes these flags as parameters: 15568 15569 C<CLONEf_COPY_STACKS> - is used to, well, copy the stacks also, 15570 without it we only clone the data and zero the stacks, 15571 with it we copy the stacks and the new perl interpreter is 15572 ready to run at the exact same point as the previous one. 15573 The pseudo-fork code uses C<COPY_STACKS> while the 15574 threads->create doesn't. 15575 15576 C<CLONEf_KEEP_PTR_TABLE> - 15577 C<perl_clone> keeps a ptr_table with the pointer of the old 15578 variable as a key and the new variable as a value, 15579 this allows it to check if something has been cloned and not 15580 clone it again, but rather just use the value and increase the 15581 refcount. 15582 If C<KEEP_PTR_TABLE> is not set then C<perl_clone> will kill the ptr_table 15583 using the function S<C<ptr_table_free(PL_ptr_table); PL_ptr_table = NULL;>>. 15584 A reason to keep it around is if you want to dup some of your own 15585 variables which are outside the graph that perl scans. 15586 15587 C<CLONEf_CLONE_HOST> - 15588 This is a win32 thing, it is ignored on unix, it tells perl's 15589 win32host code (which is c++) to clone itself, this is needed on 15590 win32 if you want to run two threads at the same time, 15591 if you just want to do some stuff in a separate perl interpreter 15592 and then throw it away and return to the original one, 15593 you don't need to do anything. 15594 15595 =cut 15596 */ 15597 15598 /* XXX the above needs expanding by someone who actually understands it ! */ 15599 EXTERN_C PerlInterpreter * 15600 perl_clone_host(PerlInterpreter* proto_perl, UV flags); 15601 15602 PerlInterpreter * 15603 perl_clone(PerlInterpreter *proto_perl, UV flags) 15604 { 15605 #ifdef PERL_IMPLICIT_SYS 15606 15607 PERL_ARGS_ASSERT_PERL_CLONE; 15608 15609 /* perlhost.h so we need to call into it 15610 to clone the host, CPerlHost should have a c interface, sky */ 15611 15612 #ifndef __amigaos4__ 15613 if (flags & CLONEf_CLONE_HOST) { 15614 return perl_clone_host(proto_perl,flags); 15615 } 15616 #endif 15617 return perl_clone_using(proto_perl, flags, 15618 proto_perl->IMem, 15619 proto_perl->IMemShared, 15620 proto_perl->IMemParse, 15621 proto_perl->IEnv, 15622 proto_perl->IStdIO, 15623 proto_perl->ILIO, 15624 proto_perl->IDir, 15625 proto_perl->ISock, 15626 proto_perl->IProc); 15627 } 15628 15629 PerlInterpreter * 15630 perl_clone_using(PerlInterpreter *proto_perl, UV flags, 15631 struct IPerlMem* ipM, struct IPerlMem* ipMS, 15632 struct IPerlMem* ipMP, struct IPerlEnv* ipE, 15633 struct IPerlStdIO* ipStd, struct IPerlLIO* ipLIO, 15634 struct IPerlDir* ipD, struct IPerlSock* ipS, 15635 struct IPerlProc* ipP) 15636 { 15637 /* XXX many of the string copies here can be optimized if they're 15638 * constants; they need to be allocated as common memory and just 15639 * their pointers copied. */ 15640 15641 IV i; 15642 CLONE_PARAMS clone_params; 15643 CLONE_PARAMS* const param = &clone_params; 15644 15645 PerlInterpreter * const my_perl = (PerlInterpreter*)(*ipM->pMalloc)(ipM, sizeof(PerlInterpreter)); 15646 15647 PERL_ARGS_ASSERT_PERL_CLONE_USING; 15648 #else /* !PERL_IMPLICIT_SYS */ 15649 IV i; 15650 CLONE_PARAMS clone_params; 15651 CLONE_PARAMS* param = &clone_params; 15652 PerlInterpreter * const my_perl = (PerlInterpreter*)PerlMem_malloc(sizeof(PerlInterpreter)); 15653 15654 PERL_ARGS_ASSERT_PERL_CLONE; 15655 #endif /* PERL_IMPLICIT_SYS */ 15656 15657 /* for each stash, determine whether its objects should be cloned */ 15658 S_visit(proto_perl, do_mark_cloneable_stash, SVt_PVHV, SVTYPEMASK); 15659 my_perl->Iphase = PERL_PHASE_CONSTRUCT; 15660 PERL_SET_THX(my_perl); 15661 15662 #ifdef DEBUGGING 15663 PoisonNew(my_perl, 1, PerlInterpreter); 15664 PL_op = NULL; 15665 PL_curcop = NULL; 15666 PL_defstash = NULL; /* may be used by perl malloc() */ 15667 PL_markstack = 0; 15668 PL_scopestack = 0; 15669 PL_scopestack_name = 0; 15670 PL_savestack = 0; 15671 PL_savestack_ix = 0; 15672 PL_savestack_max = -1; 15673 PL_sig_pending = 0; 15674 PL_parser = NULL; 15675 PL_eval_begin_nest_depth = proto_perl->Ieval_begin_nest_depth; 15676 Zero(&PL_debug_pad, 1, struct perl_debug_pad); 15677 Zero(&PL_padname_undef, 1, PADNAME); 15678 Zero(&PL_padname_const, 1, PADNAME); 15679 # ifdef DEBUG_LEAKING_SCALARS 15680 PL_sv_serial = (((UV)my_perl >> 2) & 0xfff) * 1000000; 15681 # endif 15682 # ifdef PERL_TRACE_OPS 15683 Zero(PL_op_exec_cnt, OP_max+2, UV); 15684 # endif 15685 #else /* !DEBUGGING */ 15686 Zero(my_perl, 1, PerlInterpreter); 15687 #endif /* DEBUGGING */ 15688 15689 #ifdef PERL_IMPLICIT_SYS 15690 /* host pointers */ 15691 PL_Mem = ipM; 15692 PL_MemShared = ipMS; 15693 PL_MemParse = ipMP; 15694 PL_Env = ipE; 15695 PL_StdIO = ipStd; 15696 PL_LIO = ipLIO; 15697 PL_Dir = ipD; 15698 PL_Sock = ipS; 15699 PL_Proc = ipP; 15700 #endif /* PERL_IMPLICIT_SYS */ 15701 15702 15703 param->flags = flags; 15704 /* Nothing in the core code uses this, but we make it available to 15705 extensions (using mg_dup). */ 15706 param->proto_perl = proto_perl; 15707 /* Likely nothing will use this, but it is initialised to be consistent 15708 with Perl_clone_params_new(). */ 15709 param->new_perl = my_perl; 15710 param->unreferenced = NULL; 15711 15712 15713 INIT_TRACK_MEMPOOL(my_perl->Imemory_debug_header, my_perl); 15714 15715 PL_body_arenas = NULL; 15716 Zero(&PL_body_roots, 1, PL_body_roots); 15717 15718 PL_sv_count = 0; 15719 PL_sv_root = NULL; 15720 PL_sv_arenaroot = NULL; 15721 15722 PL_debug = proto_perl->Idebug; 15723 15724 /* dbargs array probably holds garbage */ 15725 PL_dbargs = NULL; 15726 15727 PL_compiling = proto_perl->Icompiling; 15728 15729 /* pseudo environmental stuff */ 15730 PL_origargc = proto_perl->Iorigargc; 15731 PL_origargv = proto_perl->Iorigargv; 15732 15733 #ifndef NO_TAINT_SUPPORT 15734 /* Set tainting stuff before PerlIO_debug can possibly get called */ 15735 PL_tainting = proto_perl->Itainting; 15736 PL_taint_warn = proto_perl->Itaint_warn; 15737 #else 15738 PL_tainting = FALSE; 15739 PL_taint_warn = FALSE; 15740 #endif 15741 15742 PL_minus_c = proto_perl->Iminus_c; 15743 15744 PL_localpatches = proto_perl->Ilocalpatches; 15745 PL_splitstr = SAVEPV(proto_perl->Isplitstr); 15746 PL_minus_n = proto_perl->Iminus_n; 15747 PL_minus_p = proto_perl->Iminus_p; 15748 PL_minus_l = proto_perl->Iminus_l; 15749 PL_minus_a = proto_perl->Iminus_a; 15750 PL_minus_E = proto_perl->Iminus_E; 15751 PL_minus_F = proto_perl->Iminus_F; 15752 PL_doswitches = proto_perl->Idoswitches; 15753 PL_dowarn = proto_perl->Idowarn; 15754 #ifdef PERL_SAWAMPERSAND 15755 PL_sawampersand = proto_perl->Isawampersand; 15756 #endif 15757 PL_unsafe = proto_perl->Iunsafe; 15758 PL_perldb = proto_perl->Iperldb; 15759 PL_perl_destruct_level = proto_perl->Iperl_destruct_level; 15760 PL_exit_flags = proto_perl->Iexit_flags; 15761 15762 /* XXX time(&PL_basetime) when asked for? */ 15763 PL_basetime = proto_perl->Ibasetime; 15764 15765 PL_maxsysfd = proto_perl->Imaxsysfd; 15766 PL_statusvalue = proto_perl->Istatusvalue; 15767 #ifdef __VMS 15768 PL_statusvalue_vms = proto_perl->Istatusvalue_vms; 15769 #else 15770 PL_statusvalue_posix = proto_perl->Istatusvalue_posix; 15771 #endif 15772 15773 /* RE engine related */ 15774 PL_regmatch_slab = NULL; 15775 PL_reg_curpm = NULL; 15776 15777 PL_sub_generation = proto_perl->Isub_generation; 15778 15779 /* funky return mechanisms */ 15780 PL_forkprocess = proto_perl->Iforkprocess; 15781 15782 /* internal state */ 15783 PL_main_start = proto_perl->Imain_start; 15784 PL_eval_root = proto_perl->Ieval_root; 15785 PL_eval_start = proto_perl->Ieval_start; 15786 15787 PL_filemode = proto_perl->Ifilemode; 15788 PL_lastfd = proto_perl->Ilastfd; 15789 PL_oldname = proto_perl->Ioldname; /* XXX not quite right */ 15790 PL_gensym = proto_perl->Igensym; 15791 15792 PL_laststatval = proto_perl->Ilaststatval; 15793 PL_laststype = proto_perl->Ilaststype; 15794 PL_mess_sv = NULL; 15795 15796 PL_profiledata = NULL; 15797 15798 PL_generation = proto_perl->Igeneration; 15799 15800 PL_in_clean_objs = proto_perl->Iin_clean_objs; 15801 PL_in_clean_all = proto_perl->Iin_clean_all; 15802 15803 PL_delaymagic_uid = proto_perl->Idelaymagic_uid; 15804 PL_delaymagic_euid = proto_perl->Idelaymagic_euid; 15805 PL_delaymagic_gid = proto_perl->Idelaymagic_gid; 15806 PL_delaymagic_egid = proto_perl->Idelaymagic_egid; 15807 PL_nomemok = proto_perl->Inomemok; 15808 PL_an = proto_perl->Ian; 15809 PL_evalseq = proto_perl->Ievalseq; 15810 PL_origalen = proto_perl->Iorigalen; 15811 15812 PL_sighandlerp = proto_perl->Isighandlerp; 15813 PL_sighandler1p = proto_perl->Isighandler1p; 15814 PL_sighandler3p = proto_perl->Isighandler3p; 15815 15816 PL_runops = proto_perl->Irunops; 15817 15818 PL_subline = proto_perl->Isubline; 15819 15820 PL_cv_has_eval = proto_perl->Icv_has_eval; 15821 /* Unicode features (see perlrun/-C) */ 15822 PL_unicode = proto_perl->Iunicode; 15823 15824 /* Pre-5.8 signals control */ 15825 PL_signals = proto_perl->Isignals; 15826 15827 /* times() ticks per second */ 15828 PL_clocktick = proto_perl->Iclocktick; 15829 15830 /* Recursion stopper for PerlIO_find_layer */ 15831 PL_in_load_module = proto_perl->Iin_load_module; 15832 15833 /* Not really needed/useful since the reenrant_retint is "volatile", 15834 * but do it for consistency's sake. */ 15835 PL_reentrant_retint = proto_perl->Ireentrant_retint; 15836 15837 /* Hooks to shared SVs and locks. */ 15838 PL_sharehook = proto_perl->Isharehook; 15839 PL_lockhook = proto_perl->Ilockhook; 15840 PL_unlockhook = proto_perl->Iunlockhook; 15841 PL_threadhook = proto_perl->Ithreadhook; 15842 PL_destroyhook = proto_perl->Idestroyhook; 15843 PL_signalhook = proto_perl->Isignalhook; 15844 15845 PL_globhook = proto_perl->Iglobhook; 15846 15847 PL_srand_called = proto_perl->Isrand_called; 15848 Copy(&(proto_perl->Irandom_state), &PL_random_state, 1, PL_RANDOM_STATE_TYPE); 15849 PL_srand_override = proto_perl->Isrand_override; 15850 PL_srand_override_next = proto_perl->Isrand_override_next; 15851 15852 if (flags & CLONEf_COPY_STACKS) { 15853 /* next allocation will be PL_tmps_stack[PL_tmps_ix+1] */ 15854 PL_tmps_ix = proto_perl->Itmps_ix; 15855 PL_tmps_max = proto_perl->Itmps_max; 15856 PL_tmps_floor = proto_perl->Itmps_floor; 15857 15858 /* next push_scope()/ENTER sets PL_scopestack[PL_scopestack_ix] 15859 * NOTE: unlike the others! */ 15860 PL_scopestack_ix = proto_perl->Iscopestack_ix; 15861 PL_scopestack_max = proto_perl->Iscopestack_max; 15862 15863 /* next SSPUSHFOO() sets PL_savestack[PL_savestack_ix] 15864 * NOTE: unlike the others! */ 15865 PL_savestack_ix = proto_perl->Isavestack_ix; 15866 PL_savestack_max = proto_perl->Isavestack_max; 15867 } 15868 15869 PL_start_env = proto_perl->Istart_env; /* XXXXXX */ 15870 PL_top_env = &PL_start_env; 15871 15872 PL_op = proto_perl->Iop; 15873 15874 PL_Sv = NULL; 15875 PL_Xpv = (XPV*)NULL; 15876 my_perl->Ina = proto_perl->Ina; 15877 15878 PL_statcache = proto_perl->Istatcache; 15879 15880 #ifndef NO_TAINT_SUPPORT 15881 PL_tainted = proto_perl->Itainted; 15882 #else 15883 PL_tainted = FALSE; 15884 #endif 15885 PL_curpm = proto_perl->Icurpm; /* XXX No PMOP ref count */ 15886 15887 PL_chopset = proto_perl->Ichopset; /* XXX never deallocated */ 15888 15889 PL_restartjmpenv = proto_perl->Irestartjmpenv; 15890 PL_restartop = proto_perl->Irestartop; 15891 PL_in_eval = proto_perl->Iin_eval; 15892 PL_delaymagic = proto_perl->Idelaymagic; 15893 PL_phase = proto_perl->Iphase; 15894 PL_localizing = proto_perl->Ilocalizing; 15895 15896 PL_hv_fetch_ent_mh = NULL; 15897 PL_modcount = proto_perl->Imodcount; 15898 PL_lastgotoprobe = NULL; 15899 PL_dumpindent = proto_perl->Idumpindent; 15900 15901 PL_efloatbuf = NULL; /* reinits on demand */ 15902 PL_efloatsize = 0; /* reinits on demand */ 15903 15904 /* regex stuff */ 15905 15906 PL_colorset = 0; /* reinits PL_colors[] */ 15907 /*PL_colors[6] = {0,0,0,0,0,0};*/ 15908 15909 /* Pluggable optimizer */ 15910 PL_peepp = proto_perl->Ipeepp; 15911 PL_rpeepp = proto_perl->Irpeepp; 15912 /* op_free() hook */ 15913 PL_opfreehook = proto_perl->Iopfreehook; 15914 15915 # ifdef PERL_MEM_LOG 15916 Zero(PL_mem_log, sizeof(PL_mem_log), char); 15917 # endif 15918 15919 #ifdef USE_REENTRANT_API 15920 /* XXX: things like -Dm will segfault here in perlio, but doing 15921 * PERL_SET_CONTEXT(proto_perl); 15922 * breaks too many other things 15923 */ 15924 Perl_reentrant_init(aTHX); 15925 #endif 15926 15927 /* create SV map for pointer relocation */ 15928 PL_ptr_table = ptr_table_new(); 15929 15930 /* initialize these special pointers as early as possible */ 15931 init_constants(); 15932 ptr_table_store(PL_ptr_table, &proto_perl->Isv_undef, &PL_sv_undef); 15933 ptr_table_store(PL_ptr_table, &proto_perl->Isv_no, &PL_sv_no); 15934 ptr_table_store(PL_ptr_table, &proto_perl->Isv_zero, &PL_sv_zero); 15935 ptr_table_store(PL_ptr_table, &proto_perl->Isv_yes, &PL_sv_yes); 15936 ptr_table_store(PL_ptr_table, &proto_perl->Ipadname_const, 15937 &PL_padname_const); 15938 15939 /* create (a non-shared!) shared string table */ 15940 PL_strtab = newHV(); 15941 HvSHAREKEYS_off(PL_strtab); 15942 hv_ksplit(PL_strtab, HvTOTALKEYS(proto_perl->Istrtab)); 15943 ptr_table_store(PL_ptr_table, proto_perl->Istrtab, PL_strtab); 15944 15945 Zero(PL_sv_consts, SV_CONSTS_COUNT, SV*); 15946 15947 PL_compiling.cop_file = rcpv_copy(proto_perl->Icompiling.cop_file); 15948 15949 ptr_table_store(PL_ptr_table, &proto_perl->Icompiling, &PL_compiling); 15950 PL_compiling.cop_warnings = DUP_WARNINGS(PL_compiling.cop_warnings); 15951 CopHINTHASH_set(&PL_compiling, cophh_copy(CopHINTHASH_get(&PL_compiling))); 15952 PL_curcop = (COP*)any_dup(proto_perl->Icurcop, proto_perl); 15953 15954 param->stashes = newAV(); /* Setup array of objects to call clone on */ 15955 /* This makes no difference to the implementation, as it always pushes 15956 and shifts pointers to other SVs without changing their reference 15957 count, with the array becoming empty before it is freed. However, it 15958 makes it conceptually clear what is going on, and will avoid some 15959 work inside av.c, filling slots between AvFILL() and AvMAX() with 15960 &PL_sv_undef, and SvREFCNT_dec()ing those. */ 15961 AvREAL_off(param->stashes); 15962 15963 if (!(flags & CLONEf_COPY_STACKS)) { 15964 param->unreferenced = newAV(); 15965 } 15966 15967 #ifdef PERLIO_LAYERS 15968 /* Clone PerlIO tables as soon as we can handle general xx_dup() */ 15969 PerlIO_clone(aTHX_ proto_perl, param); 15970 #endif 15971 15972 PL_envgv = gv_dup_inc(proto_perl->Ienvgv, param); 15973 PL_incgv = gv_dup_inc(proto_perl->Iincgv, param); 15974 PL_hintgv = gv_dup_inc(proto_perl->Ihintgv, param); 15975 PL_origfilename = SAVEPV(proto_perl->Iorigfilename); 15976 PL_xsubfilename = proto_perl->Ixsubfilename; 15977 PL_diehook = sv_dup_inc(proto_perl->Idiehook, param); 15978 PL_warnhook = sv_dup_inc(proto_perl->Iwarnhook, param); 15979 15980 PL_hook__require__before = sv_dup_inc(proto_perl->Ihook__require__before, param); 15981 PL_hook__require__after = sv_dup_inc(proto_perl->Ihook__require__after, param); 15982 15983 /* switches */ 15984 PL_patchlevel = sv_dup_inc(proto_perl->Ipatchlevel, param); 15985 PL_inplace = SAVEPV(proto_perl->Iinplace); 15986 PL_e_script = sv_dup_inc(proto_perl->Ie_script, param); 15987 15988 /* magical thingies */ 15989 15990 SvPVCLEAR(PERL_DEBUG_PAD(0)); /* For regex debugging. */ 15991 SvPVCLEAR(PERL_DEBUG_PAD(1)); /* ext/re needs these */ 15992 SvPVCLEAR(PERL_DEBUG_PAD(2)); /* even without DEBUGGING. */ 15993 15994 15995 /* Clone the regex array */ 15996 /* ORANGE FIXME for plugins, probably in the SV dup code. 15997 newSViv(PTR2IV(CALLREGDUPE( 15998 INT2PTR(REGEXP *, SvIVX(regex)), param)))) 15999 */ 16000 PL_regex_padav = av_dup_inc(proto_perl->Iregex_padav, param); 16001 PL_regex_pad = AvARRAY(PL_regex_padav); 16002 16003 PL_stashpadmax = proto_perl->Istashpadmax; 16004 PL_stashpadix = proto_perl->Istashpadix ; 16005 Newx(PL_stashpad, PL_stashpadmax, HV *); 16006 { 16007 PADOFFSET o = 0; 16008 for (; o < PL_stashpadmax; ++o) 16009 PL_stashpad[o] = hv_dup(proto_perl->Istashpad[o], param); 16010 } 16011 16012 /* shortcuts to various I/O objects */ 16013 PL_ofsgv = gv_dup_inc(proto_perl->Iofsgv, param); 16014 PL_stdingv = gv_dup(proto_perl->Istdingv, param); 16015 PL_stderrgv = gv_dup(proto_perl->Istderrgv, param); 16016 PL_defgv = gv_dup(proto_perl->Idefgv, param); 16017 PL_argvgv = gv_dup_inc(proto_perl->Iargvgv, param); 16018 PL_argvoutgv = gv_dup(proto_perl->Iargvoutgv, param); 16019 PL_argvout_stack = av_dup_inc(proto_perl->Iargvout_stack, param); 16020 16021 /* shortcuts to regexp stuff */ 16022 PL_replgv = gv_dup_inc(proto_perl->Ireplgv, param); 16023 16024 /* shortcuts to misc objects */ 16025 PL_errgv = gv_dup(proto_perl->Ierrgv, param); 16026 16027 /* shortcuts to debugging objects */ 16028 PL_DBgv = gv_dup_inc(proto_perl->IDBgv, param); 16029 PL_DBline = gv_dup_inc(proto_perl->IDBline, param); 16030 PL_DBsub = gv_dup_inc(proto_perl->IDBsub, param); 16031 PL_DBsingle = sv_dup(proto_perl->IDBsingle, param); 16032 PL_DBtrace = sv_dup(proto_perl->IDBtrace, param); 16033 PL_DBsignal = sv_dup(proto_perl->IDBsignal, param); 16034 Copy(proto_perl->IDBcontrol, PL_DBcontrol, DBVARMG_COUNT, IV); 16035 16036 /* symbol tables */ 16037 PL_defstash = hv_dup_inc(proto_perl->Idefstash, param); 16038 PL_curstash = hv_dup_inc(proto_perl->Icurstash, param); 16039 PL_debstash = hv_dup(proto_perl->Idebstash, param); 16040 PL_globalstash = hv_dup(proto_perl->Iglobalstash, param); 16041 PL_curstname = sv_dup_inc(proto_perl->Icurstname, param); 16042 16043 PL_beginav = av_dup_inc(proto_perl->Ibeginav, param); 16044 PL_beginav_save = av_dup_inc(proto_perl->Ibeginav_save, param); 16045 PL_checkav_save = av_dup_inc(proto_perl->Icheckav_save, param); 16046 PL_unitcheckav = av_dup_inc(proto_perl->Iunitcheckav, param); 16047 PL_unitcheckav_save = av_dup_inc(proto_perl->Iunitcheckav_save, param); 16048 PL_endav = av_dup_inc(proto_perl->Iendav, param); 16049 PL_checkav = av_dup_inc(proto_perl->Icheckav, param); 16050 PL_initav = av_dup_inc(proto_perl->Iinitav, param); 16051 PL_savebegin = proto_perl->Isavebegin; 16052 16053 PL_isarev = hv_dup_inc(proto_perl->Iisarev, param); 16054 16055 /* subprocess state */ 16056 PL_fdpid = av_dup_inc(proto_perl->Ifdpid, param); 16057 16058 if (proto_perl->Iop_mask) 16059 PL_op_mask = SAVEPVN(proto_perl->Iop_mask, PL_maxo); 16060 else 16061 PL_op_mask = NULL; 16062 /* PL_asserting = proto_perl->Iasserting; */ 16063 16064 /* current interpreter roots */ 16065 PL_main_cv = cv_dup_inc(proto_perl->Imain_cv, param); 16066 OP_REFCNT_LOCK; 16067 PL_main_root = OpREFCNT_inc(proto_perl->Imain_root); 16068 OP_REFCNT_UNLOCK; 16069 16070 /* runtime control stuff */ 16071 PL_curcopdb = (COP*)any_dup(proto_perl->Icurcopdb, proto_perl); 16072 16073 PL_preambleav = av_dup_inc(proto_perl->Ipreambleav, param); 16074 16075 PL_ors_sv = sv_dup_inc(proto_perl->Iors_sv, param); 16076 16077 /* interpreter atexit processing */ 16078 PL_exitlistlen = proto_perl->Iexitlistlen; 16079 if (PL_exitlistlen) { 16080 Newx(PL_exitlist, PL_exitlistlen, PerlExitListEntry); 16081 Copy(proto_perl->Iexitlist, PL_exitlist, PL_exitlistlen, PerlExitListEntry); 16082 } 16083 else 16084 PL_exitlist = (PerlExitListEntry*)NULL; 16085 16086 PL_my_cxt_size = proto_perl->Imy_cxt_size; 16087 if (PL_my_cxt_size) { 16088 Newx(PL_my_cxt_list, PL_my_cxt_size, void *); 16089 Copy(proto_perl->Imy_cxt_list, PL_my_cxt_list, PL_my_cxt_size, void *); 16090 } 16091 else { 16092 PL_my_cxt_list = (void**)NULL; 16093 } 16094 PL_modglobal = hv_dup_inc(proto_perl->Imodglobal, param); 16095 PL_custom_op_names = hv_dup_inc(proto_perl->Icustom_op_names,param); 16096 PL_custom_op_descs = hv_dup_inc(proto_perl->Icustom_op_descs,param); 16097 PL_custom_ops = hv_dup_inc(proto_perl->Icustom_ops, param); 16098 16099 PL_compcv = cv_dup(proto_perl->Icompcv, param); 16100 16101 PAD_CLONE_VARS(proto_perl, param); 16102 16103 #ifdef HAVE_INTERP_INTERN 16104 sys_intern_dup(&proto_perl->Isys_intern, &PL_sys_intern); 16105 #endif 16106 16107 PL_DBcv = cv_dup(proto_perl->IDBcv, param); 16108 16109 #ifdef PERL_USES_PL_PIDSTATUS 16110 PL_pidstatus = newHV(); /* XXX flag for cloning? */ 16111 #endif 16112 PL_osname = SAVEPV(proto_perl->Iosname); 16113 PL_parser = parser_dup(proto_perl->Iparser, param); 16114 16115 /* XXX this only works if the saved cop has already been cloned */ 16116 if (proto_perl->Iparser) { 16117 PL_parser->saved_curcop = (COP*)any_dup( 16118 proto_perl->Iparser->saved_curcop, 16119 proto_perl); 16120 } 16121 16122 PL_subname = sv_dup_inc(proto_perl->Isubname, param); 16123 16124 #ifdef USE_PL_CURLOCALES 16125 for (i = 0; i < (int) C_ARRAY_LENGTH(PL_curlocales); i++) { 16126 PL_curlocales[i] = SAVEPV("C"); 16127 } 16128 #endif 16129 #ifdef USE_PL_CUR_LC_ALL 16130 PL_cur_LC_ALL = SAVEPV("C"); 16131 #endif 16132 #ifdef USE_LOCALE_CTYPE 16133 Copy(PL_fold, PL_fold_locale, 256, U8); 16134 16135 /* Should we warn if uses locale? */ 16136 PL_ctype_name = SAVEPV("C"); 16137 PL_warn_locale = sv_dup_inc(proto_perl->Iwarn_locale, param); 16138 PL_in_utf8_CTYPE_locale = false; 16139 PL_in_utf8_turkic_locale = false; 16140 #endif 16141 16142 /* Did the locale setup indicate UTF-8? */ 16143 PL_utf8locale = false; 16144 16145 #ifdef USE_LOCALE_COLLATE 16146 PL_in_utf8_COLLATE_locale = false; 16147 PL_collation_name = SAVEPV("C"); 16148 PL_collation_ix = proto_perl->Icollation_ix; 16149 PL_collation_standard = true; 16150 PL_collxfrm_base = 0; 16151 PL_collxfrm_mult = 0; 16152 PL_strxfrm_max_cp = 0; 16153 PL_strxfrm_is_behaved = proto_perl->Istrxfrm_is_behaved; 16154 PL_strxfrm_NUL_replacement = '\0'; 16155 #endif /* USE_LOCALE_COLLATE */ 16156 16157 #ifdef USE_LOCALE_THREADS 16158 assert(PL_locale_mutex_depth <= 0); 16159 PL_locale_mutex_depth = 0; 16160 #endif 16161 16162 #ifdef USE_LOCALE_NUMERIC 16163 PL_numeric_name = SAVEPV("C"); 16164 PL_numeric_radix_sv = newSVpvs("."); 16165 PL_underlying_radix_sv = newSVpvs("."); 16166 PL_numeric_standard = true; 16167 PL_numeric_underlying = true; 16168 PL_numeric_underlying_is_standard = true; 16169 16170 #endif /* !USE_LOCALE_NUMERIC */ 16171 #if defined(USE_POSIX_2008_LOCALE) 16172 PL_scratch_locale_obj = NULL; 16173 PL_cur_locale_obj = PL_C_locale_obj; 16174 #endif 16175 16176 #ifdef HAS_MBRLEN 16177 PL_mbrlen_ps = proto_perl->Imbrlen_ps; 16178 #endif 16179 #ifdef HAS_MBRTOWC 16180 PL_mbrtowc_ps = proto_perl->Imbrtowc_ps; 16181 #endif 16182 #ifdef HAS_WCRTOMB 16183 PL_wcrtomb_ps = proto_perl->Iwcrtomb_ps; 16184 #endif 16185 16186 PL_langinfo_sv = newSVpvs(""); 16187 PL_scratch_langinfo = newSVpvs(""); 16188 16189 PL_setlocale_buf = NULL; 16190 PL_setlocale_bufsize = 0; 16191 16192 #if defined(USE_LOCALE_THREADS) && ! defined(USE_THREAD_SAFE_LOCALE) 16193 PL_less_dicey_locale_buf = NULL; 16194 PL_less_dicey_locale_bufsize = 0; 16195 #endif 16196 16197 /* Unicode inversion lists */ 16198 16199 PL_AboveLatin1 = sv_dup_inc(proto_perl->IAboveLatin1, param); 16200 PL_Assigned_invlist = sv_dup_inc(proto_perl->IAssigned_invlist, param); 16201 PL_GCB_invlist = sv_dup_inc(proto_perl->IGCB_invlist, param); 16202 PL_HasMultiCharFold = sv_dup_inc(proto_perl->IHasMultiCharFold, param); 16203 PL_InMultiCharFold = sv_dup_inc(proto_perl->IInMultiCharFold, param); 16204 PL_Latin1 = sv_dup_inc(proto_perl->ILatin1, param); 16205 PL_LB_invlist = sv_dup_inc(proto_perl->ILB_invlist, param); 16206 PL_SB_invlist = sv_dup_inc(proto_perl->ISB_invlist, param); 16207 PL_SCX_invlist = sv_dup_inc(proto_perl->ISCX_invlist, param); 16208 PL_UpperLatin1 = sv_dup_inc(proto_perl->IUpperLatin1, param); 16209 PL_in_some_fold = sv_dup_inc(proto_perl->Iin_some_fold, param); 16210 PL_utf8_foldclosures = sv_dup_inc(proto_perl->Iutf8_foldclosures, param); 16211 PL_utf8_idcont = sv_dup_inc(proto_perl->Iutf8_idcont, param); 16212 PL_utf8_idstart = sv_dup_inc(proto_perl->Iutf8_idstart, param); 16213 PL_utf8_perl_idcont = sv_dup_inc(proto_perl->Iutf8_perl_idcont, param); 16214 PL_utf8_perl_idstart = sv_dup_inc(proto_perl->Iutf8_perl_idstart, param); 16215 PL_utf8_xidcont = sv_dup_inc(proto_perl->Iutf8_xidcont, param); 16216 PL_utf8_xidstart = sv_dup_inc(proto_perl->Iutf8_xidstart, param); 16217 PL_WB_invlist = sv_dup_inc(proto_perl->IWB_invlist, param); 16218 for (i = 0; i < POSIX_CC_COUNT; i++) { 16219 PL_XPosix_ptrs[i] = sv_dup_inc(proto_perl->IXPosix_ptrs[i], param); 16220 if (i != CC_CASED_ && i != CC_VERTSPACE_) { 16221 PL_Posix_ptrs[i] = sv_dup_inc(proto_perl->IPosix_ptrs[i], param); 16222 } 16223 } 16224 PL_Posix_ptrs[CC_CASED_] = PL_Posix_ptrs[CC_ALPHA_]; 16225 PL_Posix_ptrs[CC_VERTSPACE_] = NULL; 16226 16227 PL_utf8_toupper = sv_dup_inc(proto_perl->Iutf8_toupper, param); 16228 PL_utf8_totitle = sv_dup_inc(proto_perl->Iutf8_totitle, param); 16229 PL_utf8_tolower = sv_dup_inc(proto_perl->Iutf8_tolower, param); 16230 PL_utf8_tofold = sv_dup_inc(proto_perl->Iutf8_tofold, param); 16231 PL_utf8_tosimplefold = sv_dup_inc(proto_perl->Iutf8_tosimplefold, param); 16232 PL_utf8_charname_begin = sv_dup_inc(proto_perl->Iutf8_charname_begin, param); 16233 PL_utf8_charname_continue = sv_dup_inc(proto_perl->Iutf8_charname_continue, param); 16234 PL_utf8_mark = sv_dup_inc(proto_perl->Iutf8_mark, param); 16235 PL_InBitmap = sv_dup_inc(proto_perl->IInBitmap, param); 16236 PL_CCC_non0_non230 = sv_dup_inc(proto_perl->ICCC_non0_non230, param); 16237 PL_Private_Use = sv_dup_inc(proto_perl->IPrivate_Use, param); 16238 16239 #if 0 16240 PL_seen_deprecated_macro = hv_dup_inc(proto_perl->Iseen_deprecated_macro, param); 16241 #endif 16242 16243 if (proto_perl->Ipsig_pend) { 16244 Newxz(PL_psig_pend, SIG_SIZE, int); 16245 } 16246 else { 16247 PL_psig_pend = (int*)NULL; 16248 } 16249 16250 if (proto_perl->Ipsig_name) { 16251 Newx(PL_psig_name, 2 * SIG_SIZE, SV*); 16252 sv_dup_inc_multiple(proto_perl->Ipsig_name, PL_psig_name, 2 * SIG_SIZE, 16253 param); 16254 PL_psig_ptr = PL_psig_name + SIG_SIZE; 16255 } 16256 else { 16257 PL_psig_ptr = (SV**)NULL; 16258 PL_psig_name = (SV**)NULL; 16259 } 16260 16261 if (flags & CLONEf_COPY_STACKS) { 16262 Newx(PL_tmps_stack, PL_tmps_max, SV*); 16263 sv_dup_inc_multiple(proto_perl->Itmps_stack, PL_tmps_stack, 16264 PL_tmps_ix+1, param); 16265 16266 /* next PUSHMARK() sets *(PL_markstack_ptr+1) */ 16267 i = proto_perl->Imarkstack_max - proto_perl->Imarkstack; 16268 Newx(PL_markstack, i, Stack_off_t); 16269 PL_markstack_max = PL_markstack + (proto_perl->Imarkstack_max 16270 - proto_perl->Imarkstack); 16271 PL_markstack_ptr = PL_markstack + (proto_perl->Imarkstack_ptr 16272 - proto_perl->Imarkstack); 16273 Copy(proto_perl->Imarkstack, PL_markstack, 16274 PL_markstack_ptr - PL_markstack + 1, Stack_off_t); 16275 16276 /* next push_scope()/ENTER sets PL_scopestack[PL_scopestack_ix] 16277 * NOTE: unlike the others! */ 16278 Newx(PL_scopestack, PL_scopestack_max, I32); 16279 Copy(proto_perl->Iscopestack, PL_scopestack, PL_scopestack_ix, I32); 16280 16281 #ifdef DEBUGGING 16282 Newx(PL_scopestack_name, PL_scopestack_max, const char *); 16283 Copy(proto_perl->Iscopestack_name, PL_scopestack_name, PL_scopestack_ix, const char *); 16284 #endif 16285 /* reset stack AV to correct length before its duped via 16286 * PL_curstackinfo */ 16287 AvFILLp(proto_perl->Icurstack) = 16288 proto_perl->Istack_sp - proto_perl->Istack_base; 16289 16290 /* NOTE: si_dup() looks at PL_markstack */ 16291 PL_curstackinfo = si_dup(proto_perl->Icurstackinfo, param); 16292 16293 /* PL_curstack = PL_curstackinfo->si_stack; */ 16294 PL_curstack = av_dup(proto_perl->Icurstack, param); 16295 PL_mainstack = av_dup(proto_perl->Imainstack, param); 16296 16297 /* next PUSHs() etc. set *(PL_stack_sp+1) */ 16298 PL_stack_base = AvARRAY(PL_curstack); 16299 PL_stack_sp = PL_stack_base + (proto_perl->Istack_sp 16300 - proto_perl->Istack_base); 16301 PL_stack_max = PL_stack_base + AvMAX(PL_curstack); 16302 16303 /*Newxz(PL_savestack, PL_savestack_max, ANY);*/ 16304 PL_savestack = ss_dup(proto_perl, param); 16305 } 16306 else { 16307 init_stacks(); 16308 ENTER; /* perl_destruct() wants to LEAVE; */ 16309 } 16310 16311 PL_statgv = gv_dup(proto_perl->Istatgv, param); 16312 PL_statname = sv_dup_inc(proto_perl->Istatname, param); 16313 16314 PL_rs = sv_dup_inc(proto_perl->Irs, param); 16315 PL_last_in_gv = gv_dup(proto_perl->Ilast_in_gv, param); 16316 PL_defoutgv = gv_dup_inc(proto_perl->Idefoutgv, param); 16317 PL_toptarget = sv_dup_inc(proto_perl->Itoptarget, param); 16318 PL_bodytarget = sv_dup_inc(proto_perl->Ibodytarget, param); 16319 PL_formtarget = sv_dup(proto_perl->Iformtarget, param); 16320 16321 PL_errors = sv_dup_inc(proto_perl->Ierrors, param); 16322 16323 PL_sortcop = (OP*)any_dup(proto_perl->Isortcop, proto_perl); 16324 PL_firstgv = gv_dup_inc(proto_perl->Ifirstgv, param); 16325 PL_secondgv = gv_dup_inc(proto_perl->Isecondgv, param); 16326 16327 PL_stashcache = newHV(); 16328 16329 PL_watchaddr = (char **) ptr_table_fetch(PL_ptr_table, 16330 proto_perl->Iwatchaddr); 16331 PL_watchok = PL_watchaddr ? * PL_watchaddr : NULL; 16332 if (PL_debug && PL_watchaddr) { 16333 PerlIO_printf(Perl_debug_log, 16334 "WATCHING: %" UVxf " cloned as %" UVxf " with value %" UVxf "\n", 16335 PTR2UV(proto_perl->Iwatchaddr), PTR2UV(PL_watchaddr), 16336 PTR2UV(PL_watchok)); 16337 } 16338 16339 PL_registered_mros = hv_dup_inc(proto_perl->Iregistered_mros, param); 16340 PL_blockhooks = av_dup_inc(proto_perl->Iblockhooks, param); 16341 16342 /* Call the ->CLONE method, if it exists, for each of the stashes 16343 identified by sv_dup() above. 16344 */ 16345 while(av_count(param->stashes) != 0) { 16346 HV* const stash = MUTABLE_HV(av_shift(param->stashes)); 16347 GV* const cloner = gv_fetchmethod_autoload(stash, "CLONE", 0); 16348 if (cloner && GvCV(cloner)) { 16349 ENTER; 16350 SAVETMPS; 16351 PUSHMARK(PL_stack_sp); 16352 rpp_extend(1); 16353 SV *newsv = newSVhek(HvNAME_HEK(stash)); 16354 *++PL_stack_sp = newsv; 16355 if (!rpp_stack_is_rc()) 16356 sv_2mortal(newsv); 16357 call_sv(MUTABLE_SV(GvCV(cloner)), G_DISCARD); 16358 FREETMPS; 16359 LEAVE; 16360 } 16361 } 16362 16363 if (!(flags & CLONEf_KEEP_PTR_TABLE)) { 16364 ptr_table_free(PL_ptr_table); 16365 PL_ptr_table = NULL; 16366 } 16367 16368 if (!(flags & CLONEf_COPY_STACKS)) { 16369 unreferenced_to_tmp_stack(param->unreferenced); 16370 } 16371 16372 SvREFCNT_dec(param->stashes); 16373 16374 /* orphaned? eg threads->new inside BEGIN or use */ 16375 if (PL_compcv && ! SvREFCNT(PL_compcv)) { 16376 SvREFCNT_inc_simple_void(PL_compcv); 16377 SAVEFREESV(PL_compcv); 16378 } 16379 16380 return my_perl; 16381 } 16382 16383 static void 16384 S_unreferenced_to_tmp_stack(pTHX_ AV *const unreferenced) 16385 { 16386 PERL_ARGS_ASSERT_UNREFERENCED_TO_TMP_STACK; 16387 16388 if (AvFILLp(unreferenced) > -1) { 16389 SV **svp = AvARRAY(unreferenced); 16390 SV **const last = svp + AvFILLp(unreferenced); 16391 SSize_t count = 0; 16392 16393 do { 16394 if (SvREFCNT(*svp) == 1) 16395 ++count; 16396 } while (++svp <= last); 16397 16398 EXTEND_MORTAL(count); 16399 svp = AvARRAY(unreferenced); 16400 16401 do { 16402 if (SvREFCNT(*svp) == 1) { 16403 /* Our reference is the only one to this SV. This means that 16404 in this thread, the scalar effectively has a 0 reference. 16405 That doesn't work (cleanup never happens), so donate our 16406 reference to it onto the save stack. */ 16407 PL_tmps_stack[++PL_tmps_ix] = *svp; 16408 } else { 16409 /* As an optimisation, because we are already walking the 16410 entire array, instead of above doing either 16411 SvREFCNT_inc(*svp) or *svp = &PL_sv_undef, we can instead 16412 release our reference to the scalar, so that at the end of 16413 the array owns zero references to the scalars it happens to 16414 point to. We are effectively converting the array from 16415 AvREAL() on to AvREAL() off. This saves the av_clear() 16416 (triggered by the SvREFCNT_dec(unreferenced) below) from 16417 walking the array a second time. */ 16418 SvREFCNT_dec(*svp); 16419 } 16420 16421 } while (++svp <= last); 16422 AvREAL_off(unreferenced); 16423 } 16424 SvREFCNT_dec_NN(unreferenced); 16425 } 16426 16427 void 16428 Perl_clone_params_del(CLONE_PARAMS *param) 16429 { 16430 PerlInterpreter *const was = PERL_GET_THX; 16431 PerlInterpreter *const to = param->new_perl; 16432 dTHXa(to); 16433 16434 PERL_ARGS_ASSERT_CLONE_PARAMS_DEL; 16435 16436 if (was != to) { 16437 PERL_SET_THX(to); 16438 } 16439 16440 SvREFCNT_dec(param->stashes); 16441 if (param->unreferenced) 16442 unreferenced_to_tmp_stack(param->unreferenced); 16443 16444 Safefree(param); 16445 16446 if (was != to) { 16447 PERL_SET_THX(was); 16448 } 16449 } 16450 16451 CLONE_PARAMS * 16452 Perl_clone_params_new(PerlInterpreter *const from, PerlInterpreter *const to) 16453 { 16454 /* Need to play this game, as newAV() can call safesysmalloc(), and that 16455 does a dTHX; to get the context from thread local storage. 16456 FIXME - under PERL_CORE Newx(), Safefree() and friends should expand to 16457 a version that passes in my_perl. */ 16458 PerlInterpreter *const was = PERL_GET_THX; 16459 CLONE_PARAMS *param; 16460 16461 PERL_ARGS_ASSERT_CLONE_PARAMS_NEW; 16462 16463 if (was != to) { 16464 PERL_SET_THX(to); 16465 } 16466 16467 /* Given that we've set the context, we can do this unshared. */ 16468 Newx(param, 1, CLONE_PARAMS); 16469 16470 param->flags = 0; 16471 param->proto_perl = from; 16472 param->new_perl = to; 16473 param->stashes = (AV *)Perl_newSV_type(to, SVt_PVAV); 16474 AvREAL_off(param->stashes); 16475 param->unreferenced = (AV *)Perl_newSV_type(to, SVt_PVAV); 16476 16477 if (was != to) { 16478 PERL_SET_THX(was); 16479 } 16480 return param; 16481 } 16482 16483 #endif /* USE_ITHREADS */ 16484 16485 void 16486 Perl_init_constants(pTHX) 16487 { 16488 16489 SvREFCNT(&PL_sv_undef) = SvREFCNT_IMMORTAL; 16490 SvFLAGS(&PL_sv_undef) = SVf_READONLY|SVf_PROTECT|SVt_NULL; 16491 SvANY(&PL_sv_undef) = NULL; 16492 16493 SvANY(&PL_sv_no) = new_XPVNV(); 16494 SvREFCNT(&PL_sv_no) = SvREFCNT_IMMORTAL; 16495 SvFLAGS(&PL_sv_no) = SVt_PVNV|SVf_READONLY|SVf_PROTECT 16496 |SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK 16497 |SVp_POK|SVf_POK|SVf_IsCOW|SVppv_STATIC; 16498 16499 SvANY(&PL_sv_yes) = new_XPVNV(); 16500 SvREFCNT(&PL_sv_yes) = SvREFCNT_IMMORTAL; 16501 SvFLAGS(&PL_sv_yes) = SVt_PVNV|SVf_READONLY|SVf_PROTECT 16502 |SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK 16503 |SVp_POK|SVf_POK|SVf_IsCOW|SVppv_STATIC; 16504 16505 SvANY(&PL_sv_zero) = new_XPVNV(); 16506 SvREFCNT(&PL_sv_zero) = SvREFCNT_IMMORTAL; 16507 SvFLAGS(&PL_sv_zero) = SVt_PVNV|SVf_READONLY|SVf_PROTECT 16508 |SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK 16509 |SVp_POK|SVf_POK 16510 |SVs_PADTMP; 16511 16512 SvPV_set(&PL_sv_no, (char*)PL_No); 16513 SvCUR_set(&PL_sv_no, 0); 16514 SvLEN_set(&PL_sv_no, 0); 16515 SvIV_set(&PL_sv_no, 0); 16516 SvNV_set(&PL_sv_no, 0); 16517 16518 SvPV_set(&PL_sv_yes, (char*)PL_Yes); 16519 SvCUR_set(&PL_sv_yes, 1); 16520 SvLEN_set(&PL_sv_yes, 0); 16521 SvIV_set(&PL_sv_yes, 1); 16522 SvNV_set(&PL_sv_yes, 1); 16523 16524 SvPV_set(&PL_sv_zero, (char*)PL_Zero); 16525 SvCUR_set(&PL_sv_zero, 1); 16526 SvLEN_set(&PL_sv_zero, 0); 16527 SvIV_set(&PL_sv_zero, 0); 16528 SvNV_set(&PL_sv_zero, 0); 16529 16530 PadnamePV(&PL_padname_const) = (char *)PL_No; 16531 16532 assert(SvIMMORTAL_INTERP(&PL_sv_yes)); 16533 assert(SvIMMORTAL_INTERP(&PL_sv_undef)); 16534 assert(SvIMMORTAL_INTERP(&PL_sv_no)); 16535 assert(SvIMMORTAL_INTERP(&PL_sv_zero)); 16536 16537 assert(SvIMMORTAL(&PL_sv_yes)); 16538 assert(SvIMMORTAL(&PL_sv_undef)); 16539 assert(SvIMMORTAL(&PL_sv_no)); 16540 assert(SvIMMORTAL(&PL_sv_zero)); 16541 16542 assert( SvIMMORTAL_TRUE(&PL_sv_yes)); 16543 assert(!SvIMMORTAL_TRUE(&PL_sv_undef)); 16544 assert(!SvIMMORTAL_TRUE(&PL_sv_no)); 16545 assert(!SvIMMORTAL_TRUE(&PL_sv_zero)); 16546 16547 assert( SvTRUE_nomg_NN(&PL_sv_yes)); 16548 assert(!SvTRUE_nomg_NN(&PL_sv_undef)); 16549 assert(!SvTRUE_nomg_NN(&PL_sv_no)); 16550 assert(!SvTRUE_nomg_NN(&PL_sv_zero)); 16551 } 16552 16553 /* 16554 =for apidoc_section $unicode 16555 16556 =for apidoc sv_recode_to_utf8 16557 16558 C<encoding> is assumed to be an C<Encode> object, on entry the PV 16559 of C<sv> is assumed to be octets in that encoding, and C<sv> 16560 will be converted into Unicode (and UTF-8). 16561 16562 If C<sv> already is UTF-8 (or if it is not C<POK>), or if C<encoding> 16563 is not a reference, nothing is done to C<sv>. If C<encoding> is not 16564 an C<Encode::XS> Encoding object, bad things will happen. 16565 (See L<encoding> and L<Encode>.) 16566 16567 The PV of C<sv> is returned. 16568 16569 =cut */ 16570 16571 char * 16572 Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding) 16573 { 16574 PERL_ARGS_ASSERT_SV_RECODE_TO_UTF8; 16575 16576 if (SvPOK(sv) && !SvUTF8(sv) && !IN_BYTES && SvROK(encoding)) { 16577 SV *uni; 16578 STRLEN len; 16579 const char *s; 16580 dSP; 16581 SV *nsv = sv; 16582 ENTER; 16583 PUSHSTACK; 16584 SAVETMPS; 16585 if (SvPADTMP(nsv)) { 16586 nsv = sv_newmortal(); 16587 SvSetSV_nosteal(nsv, sv); 16588 } 16589 save_re_context(); 16590 PUSHMARK(sp); 16591 EXTEND(SP, 3); 16592 PUSHs(encoding); 16593 PUSHs(nsv); 16594 /* 16595 NI-S 2002/07/09 16596 Passing sv_yes is wrong - it needs to be or'ed set of constants 16597 for Encode::XS, while UTf-8 decode (currently) assumes a true value means 16598 remove converted chars from source. 16599 16600 Both will default the value - let them. 16601 16602 XPUSHs(&PL_sv_yes); 16603 */ 16604 PUTBACK; 16605 call_method("decode", G_SCALAR); 16606 SPAGAIN; 16607 uni = POPs; 16608 PUTBACK; 16609 s = SvPV_const(uni, len); 16610 if (s != SvPVX_const(sv)) { 16611 SvGROW(sv, len + 1); 16612 Move(s, SvPVX(sv), len + 1, char); 16613 SvCUR_set(sv, len); 16614 } 16615 FREETMPS; 16616 POPSTACK; 16617 LEAVE; 16618 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { 16619 /* clear pos and any utf8 cache */ 16620 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global); 16621 if (mg) 16622 mg->mg_len = -1; 16623 if ((mg = mg_find(sv, PERL_MAGIC_utf8))) 16624 magic_setutf8(sv,mg); /* clear UTF8 cache */ 16625 } 16626 SvUTF8_on(sv); 16627 return SvPVX(sv); 16628 } 16629 return SvPOKp(sv) ? SvPVX(sv) : NULL; 16630 } 16631 16632 /* 16633 =for apidoc sv_cat_decode 16634 16635 C<encoding> is assumed to be an C<Encode> object, the PV of C<ssv> is 16636 assumed to be octets in that encoding and decoding the input starts 16637 from the position which S<C<(PV + *offset)>> pointed to. C<dsv> will be 16638 concatenated with the decoded UTF-8 string from C<ssv>. Decoding will terminate 16639 when the string C<tstr> appears in decoding output or the input ends on 16640 the PV of C<ssv>. The value which C<offset> points will be modified 16641 to the last input position on C<ssv>. 16642 16643 Returns TRUE if the terminator was found, else returns FALSE. 16644 16645 =cut */ 16646 16647 bool 16648 Perl_sv_cat_decode(pTHX_ SV *dsv, SV *encoding, 16649 SV *ssv, int *offset, char *tstr, int tlen) 16650 { 16651 bool ret = FALSE; 16652 16653 PERL_ARGS_ASSERT_SV_CAT_DECODE; 16654 16655 if (SvPOK(ssv) && SvPOK(dsv) && SvROK(encoding)) { 16656 SV *offsv; 16657 dSP; 16658 ENTER; 16659 SAVETMPS; 16660 save_re_context(); 16661 PUSHMARK(sp); 16662 EXTEND(SP, 6); 16663 PUSHs(encoding); 16664 PUSHs(dsv); 16665 PUSHs(ssv); 16666 offsv = newSViv(*offset); 16667 mPUSHs(offsv); 16668 mPUSHp(tstr, tlen); 16669 PUTBACK; 16670 call_method("cat_decode", G_SCALAR); 16671 SPAGAIN; 16672 ret = SvTRUE(TOPs); 16673 *offset = SvIV(offsv); 16674 PUTBACK; 16675 FREETMPS; 16676 LEAVE; 16677 } 16678 else 16679 Perl_croak(aTHX_ "Invalid argument to sv_cat_decode"); 16680 return ret; 16681 16682 } 16683 16684 /* --------------------------------------------------------------------- 16685 * 16686 * support functions for report_uninit() 16687 */ 16688 16689 /* the maxiumum size of array or hash where we will scan looking 16690 * for the undefined element that triggered the warning */ 16691 16692 #define FUV_MAX_SEARCH_SIZE 1000 16693 16694 /* Look for an entry in the hash whose value has the same SV as val; 16695 * If so, return a mortal copy of the key. */ 16696 16697 STATIC SV* 16698 S_find_hash_subscript(pTHX_ const HV *const hv, const SV *const val) 16699 { 16700 HE **array; 16701 I32 i; 16702 16703 PERL_ARGS_ASSERT_FIND_HASH_SUBSCRIPT; 16704 16705 if (!hv || SvMAGICAL(hv) || !HvTOTALKEYS(hv) || 16706 (HvTOTALKEYS(hv) > FUV_MAX_SEARCH_SIZE)) 16707 return NULL; 16708 16709 if (val == &PL_sv_undef || val == &PL_sv_placeholder) 16710 return NULL; 16711 16712 array = HvARRAY(hv); 16713 16714 for (i=HvMAX(hv); i>=0; i--) { 16715 HE *entry; 16716 for (entry = array[i]; entry; entry = HeNEXT(entry)) { 16717 if (HeVAL(entry) == val) 16718 return newSVhek_mortal(HeKEY_hek(entry)); 16719 } 16720 } 16721 return NULL; 16722 } 16723 16724 /* Look for an entry in the array whose value has the same SV as val; 16725 * If so, return the index, otherwise return -1. */ 16726 16727 STATIC SSize_t 16728 S_find_array_subscript(pTHX_ const AV *const av, const SV *const val) 16729 { 16730 PERL_ARGS_ASSERT_FIND_ARRAY_SUBSCRIPT; 16731 16732 if (!av || SvMAGICAL(av) || !AvARRAY(av) || 16733 (AvFILLp(av) > FUV_MAX_SEARCH_SIZE)) 16734 return -1; 16735 16736 if (val != &PL_sv_undef) { 16737 SV ** const svp = AvARRAY(av); 16738 SSize_t i; 16739 16740 for (i=AvFILLp(av); i>=0; i--) 16741 if (svp[i] == val) 16742 return i; 16743 } 16744 return -1; 16745 } 16746 16747 /* varname(): return the name of a variable, optionally with a subscript. 16748 * If gv is non-zero, use the name of that global, along with gvtype (one 16749 * of "$", "@", "%"); otherwise use the name of the lexical at pad offset 16750 * targ. Depending on the value of the subscript_type flag, return: 16751 */ 16752 16753 #define FUV_SUBSCRIPT_NONE 1 /* "@foo" */ 16754 #define FUV_SUBSCRIPT_ARRAY 2 /* "$foo[aindex]" */ 16755 #define FUV_SUBSCRIPT_HASH 3 /* "$foo{keyname}" */ 16756 #define FUV_SUBSCRIPT_WITHIN 4 /* "within @foo" */ 16757 16758 SV* 16759 Perl_varname(pTHX_ const GV *const gv, const char gvtype, PADOFFSET targ, 16760 const SV *const keyname, SSize_t aindex, int subscript_type) 16761 { 16762 16763 SV * const name = sv_newmortal(); 16764 if (gv && isGV(gv)) { 16765 char buffer[2]; 16766 buffer[0] = gvtype; 16767 buffer[1] = 0; 16768 16769 /* as gv_fullname4(), but add literal '^' for $^FOO names */ 16770 16771 gv_fullname4(name, gv, buffer, 0); 16772 16773 if ((unsigned int)SvPVX(name)[1] <= 26) { 16774 buffer[0] = '^'; 16775 buffer[1] = SvPVX(name)[1] + 'A' - 1; 16776 16777 /* Swap the 1 unprintable control character for the 2 byte pretty 16778 version - ie substr($name, 1, 1) = $buffer; */ 16779 sv_insert(name, 1, 1, buffer, 2); 16780 } 16781 } 16782 else { 16783 CV * const cv = gv ? ((CV *)gv) : find_runcv(NULL); 16784 PADNAME *sv; 16785 16786 assert(!cv || SvTYPE(cv) == SVt_PVCV || SvTYPE(cv) == SVt_PVFM); 16787 16788 if (!cv || !CvPADLIST(cv)) 16789 return NULL; 16790 sv = padnamelist_fetch(PadlistNAMES(CvPADLIST(cv)), targ); 16791 sv_setpvn(name, PadnamePV(sv), PadnameLEN(sv)); 16792 SvUTF8_on(name); 16793 } 16794 16795 if (subscript_type == FUV_SUBSCRIPT_HASH) { 16796 SV * const sv = newSV_type(SVt_NULL); 16797 STRLEN len; 16798 const char * const pv = SvPV_nomg_const((SV*)keyname, len); 16799 16800 *SvPVX(name) = '$'; 16801 Perl_sv_catpvf(aTHX_ name, "{%s}", 16802 pv_pretty(sv, pv, len, 32, NULL, NULL, 16803 PERL_PV_PRETTY_DUMP | PERL_PV_ESCAPE_UNI_DETECT )); 16804 SvREFCNT_dec_NN(sv); 16805 } 16806 else if (subscript_type == FUV_SUBSCRIPT_ARRAY) { 16807 *SvPVX(name) = '$'; 16808 Perl_sv_catpvf(aTHX_ name, "[%" IVdf "]", (IV)aindex); 16809 } 16810 else if (subscript_type == FUV_SUBSCRIPT_WITHIN) { 16811 /* We know that name has no magic, so can use 0 instead of SV_GMAGIC */ 16812 Perl_sv_insert_flags(aTHX_ name, 0, 0, STR_WITH_LEN("within "), 0); 16813 } 16814 else { 16815 assert(subscript_type == FUV_SUBSCRIPT_NONE); 16816 } 16817 16818 return name; 16819 } 16820 16821 16822 /* 16823 =apidoc_section $warning 16824 =for apidoc find_uninit_var 16825 16826 Find the name of the undefined variable (if any) that caused the operator 16827 to issue a "Use of uninitialized value" warning. 16828 If match is true, only return a name if its value matches C<uninit_sv>. 16829 So roughly speaking, if a unary operator (such as C<OP_COS>) generates a 16830 warning, then following the direct child of the op may yield an 16831 C<OP_PADSV> or C<OP_GV> that gives the name of the undefined variable. On the 16832 other hand, with C<OP_ADD> there are two branches to follow, so we only print 16833 the variable name if we get an exact match. 16834 C<desc_p> points to a string pointer holding the description of the op. 16835 This may be updated if needed. 16836 16837 The name is returned as a mortal SV. 16838 16839 Assumes that C<PL_op> is the OP that originally triggered the error, and that 16840 C<PL_comppad>/C<PL_curpad> points to the currently executing pad. 16841 16842 =cut 16843 */ 16844 16845 STATIC SV * 16846 S_find_uninit_var(pTHX_ const OP *const obase, const SV *const uninit_sv, 16847 bool match, const char **desc_p) 16848 { 16849 SV *sv; 16850 const GV *gv; 16851 const OP *o, *o2, *kid; 16852 16853 PERL_ARGS_ASSERT_FIND_UNINIT_VAR; 16854 16855 if (!obase || (match && (!uninit_sv || uninit_sv == &PL_sv_undef || 16856 uninit_sv == &PL_sv_placeholder))) 16857 return NULL; 16858 16859 switch (obase->op_type) { 16860 16861 case OP_UNDEF: 16862 /* the optimizer rewrites '$x = undef' to 'undef $x' for lexical 16863 * variables, which can occur as the source of warnings: 16864 * ($x = undef) =~ s/a/b/; 16865 * The OPpUNDEF_KEEP_PV flag indicates that this used to be an 16866 * assignment op. 16867 * Otherwise undef should not care if its args are undef - any warnings 16868 * will be from tied/magic vars */ 16869 if ( 16870 (obase->op_private & (OPpTARGET_MY | OPpUNDEF_KEEP_PV)) == (OPpTARGET_MY | OPpUNDEF_KEEP_PV) 16871 && (!match || PAD_SVl(obase->op_targ) == uninit_sv) 16872 ) { 16873 return varname(NULL, '$', obase->op_targ, NULL, 0, FUV_SUBSCRIPT_NONE); 16874 } 16875 break; 16876 16877 case OP_RV2AV: 16878 case OP_RV2HV: 16879 case OP_PADAV: 16880 case OP_PADHV: 16881 { 16882 const bool pad = ( obase->op_type == OP_PADAV 16883 || obase->op_type == OP_PADHV 16884 || obase->op_type == OP_PADRANGE 16885 ); 16886 16887 const bool hash = ( obase->op_type == OP_PADHV 16888 || obase->op_type == OP_RV2HV 16889 || (obase->op_type == OP_PADRANGE 16890 && SvTYPE(PAD_SVl(obase->op_targ)) == SVt_PVHV) 16891 ); 16892 SSize_t index = 0; 16893 SV *keysv = NULL; 16894 int subscript_type = FUV_SUBSCRIPT_WITHIN; 16895 16896 if (pad) { /* @lex, %lex */ 16897 sv = PAD_SVl(obase->op_targ); 16898 gv = NULL; 16899 } 16900 else { 16901 if (cUNOPx(obase)->op_first->op_type == OP_GV) { 16902 /* @global, %global */ 16903 gv = cGVOPx_gv(cUNOPx(obase)->op_first); 16904 if (!gv) 16905 break; 16906 sv = hash ? MUTABLE_SV(GvHV(gv)): MUTABLE_SV(GvAV(gv)); 16907 } 16908 else if (obase == PL_op) /* @{expr}, %{expr} */ 16909 return find_uninit_var(cUNOPx(obase)->op_first, 16910 uninit_sv, match, desc_p); 16911 else /* @{expr}, %{expr} as a sub-expression */ 16912 return NULL; 16913 } 16914 16915 /* attempt to find a match within the aggregate */ 16916 if (hash) { 16917 keysv = find_hash_subscript((const HV*)sv, uninit_sv); 16918 if (keysv) 16919 subscript_type = FUV_SUBSCRIPT_HASH; 16920 } 16921 else { 16922 index = find_array_subscript((const AV *)sv, uninit_sv); 16923 if (index >= 0) 16924 subscript_type = FUV_SUBSCRIPT_ARRAY; 16925 } 16926 16927 if (match && subscript_type == FUV_SUBSCRIPT_WITHIN) 16928 break; 16929 16930 return varname(gv, (char)(hash ? '%' : '@'), obase->op_targ, 16931 keysv, index, subscript_type); 16932 } 16933 16934 case OP_RV2SV: 16935 if (cUNOPx(obase)->op_first->op_type == OP_GV) { 16936 /* $global */ 16937 gv = cGVOPx_gv(cUNOPx(obase)->op_first); 16938 if (!gv || !GvSTASH(gv)) 16939 break; 16940 if (match && (GvSV(gv) != uninit_sv)) 16941 break; 16942 return varname(gv, '$', 0, NULL, 0, FUV_SUBSCRIPT_NONE); 16943 } 16944 /* ${expr} */ 16945 return find_uninit_var(cUNOPx(obase)->op_first, uninit_sv, 1, desc_p); 16946 16947 case OP_PADSV: 16948 if (match && PAD_SVl(obase->op_targ) != uninit_sv) 16949 break; 16950 return varname(NULL, '$', obase->op_targ, 16951 NULL, 0, FUV_SUBSCRIPT_NONE); 16952 16953 case OP_PADSV_STORE: 16954 if (match && PAD_SVl(obase->op_targ) != uninit_sv) 16955 goto do_op; 16956 return varname(NULL, '$', obase->op_targ, 16957 NULL, 0, FUV_SUBSCRIPT_NONE); 16958 16959 case OP_GVSV: 16960 gv = cGVOPx_gv(obase); 16961 if (!gv || (match && GvSV(gv) != uninit_sv) || !GvSTASH(gv)) 16962 break; 16963 return varname(gv, '$', 0, NULL, 0, FUV_SUBSCRIPT_NONE); 16964 16965 case OP_AELEMFAST_LEX: 16966 if (match) { 16967 SV **svp; 16968 AV *av = MUTABLE_AV(PAD_SV(obase->op_targ)); 16969 if (!av || SvRMAGICAL(av)) 16970 break; 16971 svp = av_fetch(av, (I8)obase->op_private, FALSE); 16972 if (!svp || *svp != uninit_sv) 16973 break; 16974 } 16975 return varname(NULL, '$', obase->op_targ, 16976 NULL, (I8)obase->op_private, FUV_SUBSCRIPT_ARRAY); 16977 16978 case OP_AELEMFASTLEX_STORE: 16979 if (match) { 16980 SV **svp; 16981 AV *av = MUTABLE_AV(PAD_SV(obase->op_targ)); 16982 if (!av || SvRMAGICAL(av)) 16983 goto do_op; 16984 svp = av_fetch(av, (I8)obase->op_private, FALSE); 16985 if (!svp || *svp != uninit_sv) 16986 goto do_op; 16987 } 16988 return varname(NULL, '$', obase->op_targ, 16989 NULL, (I8)obase->op_private, FUV_SUBSCRIPT_ARRAY); 16990 16991 case OP_AELEMFAST: 16992 { 16993 gv = cGVOPx_gv(obase); 16994 if (!gv) 16995 break; 16996 if (match) { 16997 SV **svp; 16998 AV *const av = GvAV(gv); 16999 if (!av || SvRMAGICAL(av)) 17000 break; 17001 svp = av_fetch(av, (I8)obase->op_private, FALSE); 17002 if (!svp || *svp != uninit_sv) 17003 break; 17004 } 17005 return varname(gv, '$', 0, 17006 NULL, (I8)obase->op_private, FUV_SUBSCRIPT_ARRAY); 17007 } 17008 NOT_REACHED; /* NOTREACHED */ 17009 17010 case OP_EXISTS: 17011 o = cUNOPx(obase)->op_first; 17012 if (!o || o->op_type != OP_NULL || 17013 ! (o->op_targ == OP_AELEM || o->op_targ == OP_HELEM)) 17014 break; 17015 return find_uninit_var(cBINOPo->op_last, uninit_sv, match, desc_p); 17016 17017 case OP_AELEM: 17018 case OP_HELEM: 17019 { 17020 bool negate = FALSE; 17021 17022 if (PL_op == obase) 17023 /* $a[uninit_expr] or $h{uninit_expr} */ 17024 return find_uninit_var(cBINOPx(obase)->op_last, 17025 uninit_sv, match, desc_p); 17026 17027 gv = NULL; 17028 o = cBINOPx(obase)->op_first; 17029 kid = cBINOPx(obase)->op_last; 17030 17031 /* get the av or hv, and optionally the gv */ 17032 sv = NULL; 17033 if (o->op_type == OP_PADAV || o->op_type == OP_PADHV) { 17034 sv = PAD_SV(o->op_targ); 17035 } 17036 else if ((o->op_type == OP_RV2AV || o->op_type == OP_RV2HV) 17037 && cUNOPo->op_first->op_type == OP_GV) 17038 { 17039 gv = cGVOPx_gv(cUNOPo->op_first); 17040 if (!gv) 17041 break; 17042 sv = o->op_type 17043 == OP_RV2HV ? MUTABLE_SV(GvHV(gv)) : MUTABLE_SV(GvAV(gv)); 17044 } 17045 if (!sv) 17046 break; 17047 17048 if (kid && kid->op_type == OP_NEGATE) { 17049 negate = TRUE; 17050 kid = cUNOPx(kid)->op_first; 17051 } 17052 17053 if (kid && kid->op_type == OP_CONST && SvOK(cSVOPx_sv(kid))) { 17054 /* index is constant */ 17055 SV* kidsv; 17056 if (negate) { 17057 kidsv = newSVpvs_flags("-", SVs_TEMP); 17058 sv_catsv(kidsv, cSVOPx_sv(kid)); 17059 } 17060 else 17061 kidsv = cSVOPx_sv(kid); 17062 if (match) { 17063 if (SvMAGICAL(sv)) 17064 break; 17065 if (obase->op_type == OP_HELEM) { 17066 HE* he = hv_fetch_ent(MUTABLE_HV(sv), kidsv, 0, 0); 17067 if (!he || HeVAL(he) != uninit_sv) 17068 break; 17069 } 17070 else { 17071 SV * const opsv = cSVOPx_sv(kid); 17072 const IV opsviv = SvIV(opsv); 17073 SV * const * const svp = av_fetch(MUTABLE_AV(sv), 17074 negate ? - opsviv : opsviv, 17075 FALSE); 17076 if (!svp || *svp != uninit_sv) 17077 break; 17078 } 17079 } 17080 if (obase->op_type == OP_HELEM) 17081 return varname(gv, '%', o->op_targ, 17082 kidsv, 0, FUV_SUBSCRIPT_HASH); 17083 else 17084 return varname(gv, '@', o->op_targ, NULL, 17085 negate ? - SvIV(cSVOPx_sv(kid)) : SvIV(cSVOPx_sv(kid)), 17086 FUV_SUBSCRIPT_ARRAY); 17087 } 17088 else { 17089 /* index is an expression; 17090 * attempt to find a match within the aggregate */ 17091 if (obase->op_type == OP_HELEM) { 17092 SV * const keysv = find_hash_subscript((const HV*)sv, uninit_sv); 17093 if (keysv) 17094 return varname(gv, '%', o->op_targ, 17095 keysv, 0, FUV_SUBSCRIPT_HASH); 17096 } 17097 else { 17098 const SSize_t index 17099 = find_array_subscript((const AV *)sv, uninit_sv); 17100 if (index >= 0) 17101 return varname(gv, '@', o->op_targ, 17102 NULL, index, FUV_SUBSCRIPT_ARRAY); 17103 } 17104 if (match) 17105 break; 17106 return varname(gv, 17107 (char)((o->op_type == OP_PADAV || o->op_type == OP_RV2AV) 17108 ? '@' : '%'), 17109 o->op_targ, NULL, 0, FUV_SUBSCRIPT_WITHIN); 17110 } 17111 NOT_REACHED; /* NOTREACHED */ 17112 } 17113 17114 case OP_MULTIDEREF: { 17115 /* If we were executing OP_MULTIDEREF when the undef warning 17116 * triggered, then it must be one of the index values within 17117 * that triggered it. If not, then the only possibility is that 17118 * the value retrieved by the last aggregate index might be the 17119 * culprit. For the former, we set PL_multideref_pc each time before 17120 * using an index, so work though the item list until we reach 17121 * that point. For the latter, just work through the entire item 17122 * list; the last aggregate retrieved will be the candidate. 17123 * There is a third rare possibility: something triggered 17124 * magic while fetching an array/hash element. Just display 17125 * nothing in this case. 17126 */ 17127 17128 /* the named aggregate, if any */ 17129 PADOFFSET agg_targ = 0; 17130 GV *agg_gv = NULL; 17131 /* the last-seen index */ 17132 UV index_type; 17133 PADOFFSET index_targ; 17134 GV *index_gv; 17135 IV index_const_iv = 0; /* init for spurious compiler warn */ 17136 SV *index_const_sv; 17137 int depth = 0; /* how many array/hash lookups we've done */ 17138 17139 UNOP_AUX_item *items = cUNOP_AUXx(obase)->op_aux; 17140 UNOP_AUX_item *last = NULL; 17141 UV actions = items->uv; 17142 bool is_hv; 17143 17144 if (PL_op == obase) { 17145 last = PL_multideref_pc; 17146 assert(last >= items && last <= items + items[-1].uv); 17147 } 17148 17149 assert(actions); 17150 17151 while (1) { 17152 is_hv = FALSE; 17153 switch (actions & MDEREF_ACTION_MASK) { 17154 17155 case MDEREF_reload: 17156 actions = (++items)->uv; 17157 continue; 17158 17159 case MDEREF_HV_padhv_helem: /* $lex{...} */ 17160 is_hv = TRUE; 17161 /* FALLTHROUGH */ 17162 case MDEREF_AV_padav_aelem: /* $lex[...] */ 17163 agg_targ = (++items)->pad_offset; 17164 agg_gv = NULL; 17165 break; 17166 17167 case MDEREF_HV_gvhv_helem: /* $pkg{...} */ 17168 is_hv = TRUE; 17169 /* FALLTHROUGH */ 17170 case MDEREF_AV_gvav_aelem: /* $pkg[...] */ 17171 agg_targ = 0; 17172 agg_gv = (GV*)UNOP_AUX_item_sv(++items); 17173 assert(isGV_with_GP(agg_gv)); 17174 break; 17175 17176 case MDEREF_HV_gvsv_vivify_rv2hv_helem: /* $pkg->{...} */ 17177 case MDEREF_HV_padsv_vivify_rv2hv_helem: /* $lex->{...} */ 17178 ++items; 17179 /* FALLTHROUGH */ 17180 case MDEREF_HV_pop_rv2hv_helem: /* expr->{...} */ 17181 case MDEREF_HV_vivify_rv2hv_helem: /* vivify, ->{...} */ 17182 agg_targ = 0; 17183 agg_gv = NULL; 17184 is_hv = TRUE; 17185 break; 17186 17187 case MDEREF_AV_gvsv_vivify_rv2av_aelem: /* $pkg->[...] */ 17188 case MDEREF_AV_padsv_vivify_rv2av_aelem: /* $lex->[...] */ 17189 ++items; 17190 /* FALLTHROUGH */ 17191 case MDEREF_AV_pop_rv2av_aelem: /* expr->[...] */ 17192 case MDEREF_AV_vivify_rv2av_aelem: /* vivify, ->[...] */ 17193 agg_targ = 0; 17194 agg_gv = NULL; 17195 } /* switch */ 17196 17197 index_targ = 0; 17198 index_gv = NULL; 17199 index_const_sv = NULL; 17200 17201 index_type = (actions & MDEREF_INDEX_MASK); 17202 switch (index_type) { 17203 case MDEREF_INDEX_none: 17204 break; 17205 case MDEREF_INDEX_const: 17206 if (is_hv) 17207 index_const_sv = UNOP_AUX_item_sv(++items) 17208 else 17209 index_const_iv = (++items)->iv; 17210 break; 17211 case MDEREF_INDEX_padsv: 17212 index_targ = (++items)->pad_offset; 17213 break; 17214 case MDEREF_INDEX_gvsv: 17215 index_gv = (GV*)UNOP_AUX_item_sv(++items); 17216 assert(isGV_with_GP(index_gv)); 17217 break; 17218 } 17219 17220 if (index_type != MDEREF_INDEX_none) 17221 depth++; 17222 17223 if ( index_type == MDEREF_INDEX_none 17224 || (actions & MDEREF_FLAG_last) 17225 || (last && items >= last) 17226 ) 17227 break; 17228 17229 actions >>= MDEREF_SHIFT; 17230 } /* while */ 17231 17232 if (PL_op == obase) { 17233 /* most likely index was undef */ 17234 17235 *desc_p = ( (actions & MDEREF_FLAG_last) 17236 && (obase->op_private 17237 & (OPpMULTIDEREF_EXISTS|OPpMULTIDEREF_DELETE))) 17238 ? 17239 (obase->op_private & OPpMULTIDEREF_EXISTS) 17240 ? "exists" 17241 : "delete" 17242 : is_hv ? "hash element" : "array element"; 17243 assert(index_type != MDEREF_INDEX_none); 17244 if (index_gv) { 17245 if (GvSV(index_gv) == uninit_sv) 17246 return varname(index_gv, '$', 0, NULL, 0, 17247 FUV_SUBSCRIPT_NONE); 17248 else 17249 return NULL; 17250 } 17251 if (index_targ) { 17252 if (PL_curpad[index_targ] == uninit_sv) 17253 return varname(NULL, '$', index_targ, 17254 NULL, 0, FUV_SUBSCRIPT_NONE); 17255 else 17256 return NULL; 17257 } 17258 /* If we got to this point it was undef on a const subscript, 17259 * so magic probably involved, e.g. $ISA[0]. Give up. */ 17260 return NULL; 17261 } 17262 17263 /* the SV returned by pp_multideref() was undef, if anything was */ 17264 17265 if (depth != 1) 17266 break; 17267 17268 if (agg_targ) 17269 sv = PAD_SV(agg_targ); 17270 else if (agg_gv) { 17271 sv = is_hv ? MUTABLE_SV(GvHV(agg_gv)) : MUTABLE_SV(GvAV(agg_gv)); 17272 if (!sv) 17273 break; 17274 } 17275 else 17276 break; 17277 17278 if (index_type == MDEREF_INDEX_const) { 17279 if (match) { 17280 if (SvMAGICAL(sv)) 17281 break; 17282 if (is_hv) { 17283 HE* he = hv_fetch_ent(MUTABLE_HV(sv), index_const_sv, 0, 0); 17284 if (!he || HeVAL(he) != uninit_sv) 17285 break; 17286 } 17287 else { 17288 SV * const * const svp = 17289 av_fetch(MUTABLE_AV(sv), index_const_iv, FALSE); 17290 if (!svp || *svp != uninit_sv) 17291 break; 17292 } 17293 } 17294 return is_hv 17295 ? varname(agg_gv, '%', agg_targ, 17296 index_const_sv, 0, FUV_SUBSCRIPT_HASH) 17297 : varname(agg_gv, '@', agg_targ, 17298 NULL, index_const_iv, FUV_SUBSCRIPT_ARRAY); 17299 } 17300 else { 17301 /* index is an var */ 17302 if (is_hv) { 17303 SV * const keysv = find_hash_subscript((const HV*)sv, uninit_sv); 17304 if (keysv) 17305 return varname(agg_gv, '%', agg_targ, 17306 keysv, 0, FUV_SUBSCRIPT_HASH); 17307 } 17308 else { 17309 const SSize_t index 17310 = find_array_subscript((const AV *)sv, uninit_sv); 17311 if (index >= 0) 17312 return varname(agg_gv, '@', agg_targ, 17313 NULL, index, FUV_SUBSCRIPT_ARRAY); 17314 } 17315 /* look for an element not found */ 17316 if (!SvMAGICAL(sv)) { 17317 SV *index_sv = NULL; 17318 if (index_targ) { 17319 index_sv = PL_curpad[index_targ]; 17320 } 17321 else if (index_gv) { 17322 index_sv = GvSV(index_gv); 17323 } 17324 if (index_sv && !SvMAGICAL(index_sv) && !SvROK(index_sv)) { 17325 if (is_hv) { 17326 SV *report_index_sv = SvOK(index_sv) ? index_sv : &PL_sv_no; 17327 HE *he = hv_fetch_ent(MUTABLE_HV(sv), report_index_sv, 0, 0); 17328 if (!he) { 17329 return varname(agg_gv, '%', agg_targ, 17330 report_index_sv, 0, FUV_SUBSCRIPT_HASH); 17331 } 17332 } 17333 else { 17334 SSize_t index = SvOK(index_sv) ? SvIV(index_sv) : 0; 17335 SV * const * const svp = 17336 av_fetch(MUTABLE_AV(sv), index, FALSE); 17337 if (!svp) { 17338 return varname(agg_gv, '@', agg_targ, 17339 NULL, index, FUV_SUBSCRIPT_ARRAY); 17340 } 17341 } 17342 } 17343 } 17344 if (match) 17345 break; 17346 return varname(agg_gv, 17347 is_hv ? '%' : '@', 17348 agg_targ, NULL, 0, FUV_SUBSCRIPT_WITHIN); 17349 } 17350 NOT_REACHED; /* NOTREACHED */ 17351 } 17352 17353 case OP_AASSIGN: 17354 /* only examine RHS */ 17355 return find_uninit_var(cBINOPx(obase)->op_first, uninit_sv, 17356 match, desc_p); 17357 17358 case OP_OPEN: 17359 o = cUNOPx(obase)->op_first; 17360 if ( o->op_type == OP_PUSHMARK 17361 || (o->op_type == OP_NULL && o->op_targ == OP_PUSHMARK) 17362 ) 17363 o = OpSIBLING(o); 17364 17365 if (!OpHAS_SIBLING(o)) { 17366 /* one-arg version of open is highly magical */ 17367 17368 if (o->op_type == OP_GV) { /* open FOO; */ 17369 gv = cGVOPx_gv(o); 17370 if (match && GvSV(gv) != uninit_sv) 17371 break; 17372 return varname(gv, '$', 0, 17373 NULL, 0, FUV_SUBSCRIPT_NONE); 17374 } 17375 /* other possibilities not handled are: 17376 * open $x; or open my $x; should return '${*$x}' 17377 * open expr; should return '$'.expr ideally 17378 */ 17379 break; 17380 } 17381 match = 1; 17382 goto do_op; 17383 17384 /* ops where $_ may be an implicit arg */ 17385 case OP_TRANS: 17386 case OP_TRANSR: 17387 case OP_SUBST: 17388 case OP_MATCH: 17389 if ( !(obase->op_flags & OPf_STACKED)) { 17390 if (uninit_sv == DEFSV) 17391 return newSVpvs_flags("$_", SVs_TEMP); 17392 else if (obase->op_targ 17393 && uninit_sv == PAD_SVl(obase->op_targ)) 17394 return varname(NULL, '$', obase->op_targ, NULL, 0, 17395 FUV_SUBSCRIPT_NONE); 17396 } 17397 goto do_op; 17398 17399 case OP_PRTF: 17400 case OP_PRINT: 17401 case OP_SAY: 17402 match = 1; /* print etc can return undef on defined args */ 17403 /* skip filehandle as it can't produce 'undef' warning */ 17404 o = cUNOPx(obase)->op_first; 17405 if ((obase->op_flags & OPf_STACKED) 17406 && 17407 ( o->op_type == OP_PUSHMARK 17408 || (o->op_type == OP_NULL && o->op_targ == OP_PUSHMARK))) 17409 o = OpSIBLING(OpSIBLING(o)); 17410 goto do_op2; 17411 17412 17413 case OP_ENTEREVAL: /* could be eval $undef or $x='$undef'; eval $x */ 17414 case OP_CUSTOM: /* XS or custom code could trigger random warnings */ 17415 17416 /* the following ops are capable of returning PL_sv_undef even for 17417 * defined arg(s) */ 17418 17419 case OP_BACKTICK: 17420 case OP_PIPE_OP: 17421 case OP_FILENO: 17422 case OP_BINMODE: 17423 case OP_TIED: 17424 case OP_GETC: 17425 case OP_SYSREAD: 17426 case OP_READLINE: 17427 case OP_SEND: 17428 case OP_IOCTL: 17429 case OP_SOCKET: 17430 case OP_SOCKPAIR: 17431 case OP_BIND: 17432 case OP_CONNECT: 17433 case OP_LISTEN: 17434 case OP_ACCEPT: 17435 case OP_SHUTDOWN: 17436 case OP_SSOCKOPT: 17437 case OP_GETPEERNAME: 17438 case OP_FTRREAD: 17439 case OP_FTRWRITE: 17440 case OP_FTREXEC: 17441 case OP_FTROWNED: 17442 case OP_FTEREAD: 17443 case OP_FTEWRITE: 17444 case OP_FTEEXEC: 17445 case OP_FTEOWNED: 17446 case OP_FTIS: 17447 case OP_FTZERO: 17448 case OP_FTSIZE: 17449 case OP_FTFILE: 17450 case OP_FTDIR: 17451 case OP_FTLINK: 17452 case OP_FTPIPE: 17453 case OP_FTSOCK: 17454 case OP_FTBLK: 17455 case OP_FTCHR: 17456 case OP_FTTTY: 17457 case OP_FTSUID: 17458 case OP_FTSGID: 17459 case OP_FTSVTX: 17460 case OP_FTTEXT: 17461 case OP_FTBINARY: 17462 case OP_FTMTIME: 17463 case OP_FTATIME: 17464 case OP_FTCTIME: 17465 case OP_READLINK: 17466 case OP_OPEN_DIR: 17467 case OP_READDIR: 17468 case OP_TELLDIR: 17469 case OP_SEEKDIR: 17470 case OP_REWINDDIR: 17471 case OP_CLOSEDIR: 17472 case OP_GMTIME: 17473 case OP_ALARM: 17474 case OP_SEMGET: 17475 case OP_GETLOGIN: 17476 case OP_SUBSTR: 17477 case OP_AEACH: 17478 case OP_EACH: 17479 case OP_SORT: 17480 case OP_CALLER: 17481 case OP_DOFILE: 17482 case OP_PROTOTYPE: 17483 case OP_NCMP: 17484 case OP_SMARTMATCH: 17485 case OP_UNPACK: 17486 case OP_SYSOPEN: 17487 case OP_SYSSEEK: 17488 case OP_SPLICE: /* scalar splice(@x, $i, 0) ==> undef */ 17489 match = 1; 17490 goto do_op; 17491 17492 case OP_ENTERSUB: 17493 case OP_GOTO: 17494 /* XXX tmp hack: these two may call an XS sub, and currently 17495 XS subs don't have a SUB entry on the context stack, so CV and 17496 pad determination goes wrong, and BAD things happen. So, just 17497 don't try to determine the value under those circumstances. 17498 Need a better fix at dome point. DAPM 11/2007 */ 17499 break; 17500 17501 case OP_FLIP: 17502 case OP_FLOP: 17503 { 17504 GV * const gv = gv_fetchpvs(".", GV_NOTQUAL, SVt_PV); 17505 if (gv && GvSV(gv) == uninit_sv) 17506 return newSVpvs_flags("$.", SVs_TEMP); 17507 goto do_op; 17508 } 17509 17510 case OP_LENGTH: 17511 o = cUNOPx(obase)->op_first; 17512 sv = find_uninit_var(o, uninit_sv, match, desc_p); 17513 if (sv) { 17514 Perl_sv_insert_flags(aTHX_ sv, 0, 0, STR_WITH_LEN("length("), 0); 17515 sv_catpvs_nomg(sv, ")"); 17516 } 17517 return sv; 17518 17519 case OP_SHIFT: 17520 case OP_POP: 17521 if (match) { 17522 break; 17523 } 17524 if (!(obase->op_flags & OPf_KIDS)) { 17525 sv = newSVpvn_flags("", 0, SVs_TEMP); 17526 } 17527 else { 17528 o = cUNOPx(obase)->op_first; 17529 if (o->op_type == OP_RV2AV) { 17530 o2 = cUNOPx(o)->op_first; 17531 if (o2->op_type != OP_GV) { 17532 break; 17533 } 17534 gv = cGVOPx_gv(o2); 17535 if (!gv) { 17536 break; 17537 } 17538 } 17539 else if (o->op_type == OP_PADAV) { 17540 gv = NULL; 17541 } 17542 else { 17543 break; 17544 } 17545 sv = varname(gv, '@', o->op_targ, NULL, 0, FUV_SUBSCRIPT_NONE); 17546 } 17547 if (sv) { 17548 const char *name = OP_NAME(obase); 17549 Perl_sv_insert_flags(aTHX_ sv, 0, 0, STR_WITH_LEN("("), 0); 17550 Perl_sv_insert_flags(aTHX_ sv, 0, 0, name, strlen(name), 0); 17551 sv_catpvs_nomg(sv, ")"); 17552 } 17553 return sv; 17554 17555 case OP_POS: 17556 /* def-ness of rval pos() is independent of the def-ness of its arg */ 17557 if ( !(obase->op_flags & OPf_MOD)) 17558 break; 17559 /* FALLTHROUGH */ 17560 17561 case OP_SCHOMP: 17562 case OP_CHOMP: 17563 if (SvROK(PL_rs) && uninit_sv == SvRV(PL_rs)) 17564 return newSVpvs_flags("${$/}", SVs_TEMP); 17565 /* FALLTHROUGH */ 17566 17567 default: 17568 do_op: 17569 if (!(obase->op_flags & OPf_KIDS)) 17570 break; 17571 o = cUNOPx(obase)->op_first; 17572 17573 do_op2: 17574 if (!o) 17575 break; 17576 17577 /* This loop checks all the kid ops, skipping any that cannot pos- 17578 * sibly be responsible for the uninitialized value; i.e., defined 17579 * constants and ops that return nothing. If there is only one op 17580 * left that is not skipped, then we *know* it is responsible for 17581 * the uninitialized value. If there is more than one op left, we 17582 * have to look for an exact match in the while() loop below. 17583 * Note that we skip padrange, because the individual pad ops that 17584 * it replaced are still in the tree, so we work on them instead. 17585 */ 17586 o2 = NULL; 17587 for (kid=o; kid; kid = OpSIBLING(kid)) { 17588 const OPCODE type = kid->op_type; 17589 if ( (type == OP_CONST && SvOK(cSVOPx_sv(kid))) 17590 || (type == OP_NULL && ! (kid->op_flags & OPf_KIDS)) 17591 || (type == OP_PUSHMARK) 17592 || (type == OP_PADRANGE) 17593 ) 17594 continue; 17595 17596 if (o2) { /* more than one found */ 17597 o2 = NULL; 17598 break; 17599 } 17600 o2 = kid; 17601 } 17602 if (o2) 17603 return find_uninit_var(o2, uninit_sv, match, desc_p); 17604 17605 /* scan all args */ 17606 while (o) { 17607 sv = find_uninit_var(o, uninit_sv, 1, desc_p); 17608 if (sv) 17609 return sv; 17610 o = OpSIBLING(o); 17611 } 17612 break; 17613 } 17614 return NULL; 17615 } 17616 17617 17618 /* 17619 =for apidoc_section $warning 17620 =for apidoc report_uninit 17621 17622 Print appropriate "Use of uninitialized variable" warning. 17623 17624 =cut 17625 */ 17626 17627 void 17628 Perl_report_uninit(pTHX_ const SV *uninit_sv) 17629 { 17630 const char *desc = NULL; 17631 SV* varname = NULL; 17632 17633 if (PL_op) { 17634 desc = PL_op->op_type == OP_STRINGIFY && PL_op->op_folded 17635 ? "join or string" 17636 : PL_op->op_type == OP_MULTICONCAT 17637 && (PL_op->op_private & OPpMULTICONCAT_FAKE) 17638 ? "sprintf" 17639 : OP_DESC(PL_op); 17640 if (uninit_sv && PL_curpad) { 17641 varname = find_uninit_var(PL_op, uninit_sv, 0, &desc); 17642 if (varname) 17643 sv_insert(varname, 0, 0, " ", 1); 17644 } 17645 } 17646 else if (PL_curstackinfo->si_type == PERLSI_SORT && cxstack_ix == 0) 17647 /* we've reached the end of a sort block or sub, 17648 * and the uninit value is probably what that code returned */ 17649 desc = "sort"; 17650 17651 /* PL_warn_uninit_sv is constant */ 17652 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); 17653 if (desc) 17654 /* diag_listed_as: Use of uninitialized value%s */ 17655 Perl_warner(aTHX_ packWARN(WARN_UNINITIALIZED), PL_warn_uninit_sv, 17656 SVfARG(varname ? varname : &PL_sv_no), 17657 " in ", desc); 17658 else 17659 Perl_warner(aTHX_ packWARN(WARN_UNINITIALIZED), PL_warn_uninit, 17660 "", "", ""); 17661 GCC_DIAG_RESTORE_STMT; 17662 } 17663 17664 /* 17665 * ex: set ts=8 sts=4 sw=4 et: 17666 */ 17667