1 /* hv.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * I sit beside the fire and think 13 * of all that I have seen. 14 * --Bilbo 15 * 16 * [p.278 of _The Lord of the Rings_, II/iii: "The Ring Goes South"] 17 */ 18 19 /* 20 =head1 HV Handling 21 A HV structure represents a Perl hash. It consists mainly of an array 22 of pointers, each of which points to a linked list of HE structures. The 23 array is indexed by the hash function of the key, so each linked list 24 represents all the hash entries with the same hash value. Each HE contains 25 a pointer to the actual value, plus a pointer to a HEK structure which 26 holds the key and hash value. 27 28 =cut 29 30 */ 31 32 #include "EXTERN.h" 33 #define PERL_IN_HV_C 34 #define PERL_HASH_INTERNAL_ACCESS 35 #include "perl.h" 36 37 /* we split when we collide and we have a load factor over 0.667. 38 * NOTE if you change this formula so we split earlier than previously 39 * you MUST change the logic in hv_ksplit() 40 */ 41 42 /* MAX_BUCKET_MAX is the maximum max bucket index, at which point we stop growing the 43 * number of buckets, 44 */ 45 #define MAX_BUCKET_MAX ((1<<26)-1) 46 #define DO_HSPLIT(xhv) ( ( ((xhv)->xhv_keys + ((xhv)->xhv_keys >> 1)) > (xhv)->xhv_max ) && \ 47 ((xhv)->xhv_max < MAX_BUCKET_MAX) ) 48 49 static const char S_strtab_error[] 50 = "Cannot modify shared string table in hv_%s"; 51 52 #define DEBUG_HASH_RAND_BITS (DEBUG_h_TEST) 53 54 /* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" 55 * See also https://en.wikipedia.org/wiki/Xorshift 56 */ 57 #if IVSIZE == 8 58 /* 64 bit version */ 59 #define XORSHIFT_RAND_BITS(x) \ 60 STMT_START { \ 61 (x) ^= (x) << 13; \ 62 (x) ^= (x) >> 17; \ 63 (x) ^= (x) << 5; \ 64 } STMT_END 65 #else 66 /* 32 bit version */ 67 #define XORSHIFT_RAND_BITS(x) \ 68 STMT_START { \ 69 (x) ^= (x) << 13; \ 70 (x) ^= (x) >> 7; \ 71 (x) ^= (x) << 17; \ 72 } STMT_END 73 #endif 74 75 #define UPDATE_HASH_RAND_BITS_KEY(key,klen) \ 76 STMT_START { \ 77 XORSHIFT_RAND_BITS(PL_hash_rand_bits); \ 78 if (DEBUG_HASH_RAND_BITS) { \ 79 PerlIO_printf( Perl_debug_log, \ 80 "PL_hash_rand_bits=%016" UVxf" @ %s:%-4d", \ 81 (UV)PL_hash_rand_bits, __FILE__, __LINE__ \ 82 ); \ 83 if (DEBUG_v_TEST && key) { \ 84 PerlIO_printf( Perl_debug_log, " key:'%.*s' %" UVuf"\n", \ 85 (int)klen, \ 86 key ? key : "", /* silence warning */ \ 87 (UV)klen \ 88 ); \ 89 } else { \ 90 PerlIO_printf( Perl_debug_log, "\n"); \ 91 } \ 92 } \ 93 } STMT_END 94 95 #define MAYBE_UPDATE_HASH_RAND_BITS_KEY(key,klen) \ 96 STMT_START { \ 97 if (PL_HASH_RAND_BITS_ENABLED) \ 98 UPDATE_HASH_RAND_BITS_KEY(key,klen); \ 99 } STMT_END 100 101 102 #define UPDATE_HASH_RAND_BITS() \ 103 UPDATE_HASH_RAND_BITS_KEY(NULL,0) 104 105 #define MAYBE_UPDATE_HASH_RAND_BITS() \ 106 MAYBE_UPDATE_HASH_RAND_BITS_KEY(NULL,0) 107 108 /* HeKFLAGS(entry) is a single U8, so only provides 8 flags bits. 109 We currently use 3. All 3 we have behave differently, so if we find a use for 110 more flags it's hard to predict which they group with. 111 112 Hash keys are stored as flat octet sequences, not SVs. Hence we need a flag 113 bit to say whether those octet sequences represent ISO-8859-1 or UTF-8 - 114 HVhek_UTF8. The value of this flag bit matters for (regular) hash key 115 lookups. 116 117 To speed up comparisons, keys are normalised to octets. But we (also) 118 preserve whether the key was supplied, so we need another flag bit to say 119 whether to reverse the normalisation when iterating the keys (converting them 120 back to SVs) - HVhek_WASUTF8. The value of this flag bit must be ignored for 121 (regular) hash key lookups. 122 123 But for the shared string table (the private "hash" that manages shared hash 124 keys and their reference counts), we need to be able to store both variants 125 (HVhek_WASUTF8 set and clear), so the code performing lookups in this hash 126 must be different and consider both keys. 127 128 However, regular hashes (now) can have a mix of shared and unshared keys. 129 (This avoids the need to reallocate all the keys into unshared storage at 130 the point where hash passes the "large" hash threshold, and no longer uses 131 the shared string table - existing keys remain shared, to avoid makework.) 132 133 Meaning that HVhek_NOTSHARED *may* be set in regular hashes (but should be 134 ignored for hash lookups) but must always be clear in the keys in the shared 135 string table (because the pointers to these keys are directly copied into 136 regular hashes - this is how shared keys work.) 137 138 Hence all 3 are different, and it's hard to predict the best way to future 139 proof what is needed next. 140 141 We also have HVhek_ENABLEHVKFLAGS, which is used as a mask within the code 142 below to determine whether to set HvHASKFLAGS() true on the hash as a whole. 143 This is a public "optimisation" flag provided to serealisers, to indicate 144 (up front) that a hash contains non-8-bit keys, if they want to use different 145 storage formats for hashes where all keys are simple octet sequences 146 (avoiding needing to store an extra byte per hash key), and they need to know 147 that this holds *before* iterating the hash keys. Only Storable seems to use 148 this. (For this use case, HVhek_NOTSHARED doesn't matter) 149 150 For now, we assume that any future flag bits will need to be distinguished 151 in the shared string table, hence we create this mask for the shared string 152 table code. It happens to be the same as HVhek_ENABLEHVKFLAGS, but that might 153 change if we add a flag bit that matters to the shared string table but not 154 to Storable (or similar). */ 155 156 #define HVhek_STORAGE_MASK (0xFF & ~HVhek_NOTSHARED) 157 158 #ifdef PURIFY 159 160 #define new_HE() (HE*)safemalloc(sizeof(HE)) 161 #define del_HE(p) safefree((char*)p) 162 163 #else 164 165 STATIC HE* 166 S_new_he(pTHX) 167 { 168 HE* he; 169 void ** const root = &PL_body_roots[HE_ARENA_ROOT_IX]; 170 171 if (!*root) 172 Perl_more_bodies(aTHX_ HE_ARENA_ROOT_IX, sizeof(HE), PERL_ARENA_SIZE); 173 he = (HE*) *root; 174 assert(he); 175 *root = HeNEXT(he); 176 return he; 177 } 178 179 #define new_HE() new_he() 180 #define del_HE(p) \ 181 STMT_START { \ 182 HeNEXT(p) = (HE*)(PL_body_roots[HE_ARENA_ROOT_IX]); \ 183 PL_body_roots[HE_ARENA_ROOT_IX] = p; \ 184 } STMT_END 185 186 187 188 #endif 189 190 STATIC HEK * 191 S_save_hek_flags(const char *str, I32 len, U32 hash, int flags) 192 { 193 char *k; 194 HEK *hek; 195 196 PERL_ARGS_ASSERT_SAVE_HEK_FLAGS; 197 198 Newx(k, HEK_BASESIZE + len + 2, char); 199 hek = (HEK*)k; 200 Copy(str, HEK_KEY(hek), len, char); 201 HEK_KEY(hek)[len] = 0; 202 HEK_LEN(hek) = len; 203 HEK_HASH(hek) = hash; 204 HEK_FLAGS(hek) = HVhek_NOTSHARED | (flags & HVhek_STORAGE_MASK); 205 206 if (flags & HVhek_FREEKEY) 207 Safefree(str); 208 return hek; 209 } 210 211 /* free the pool of temporary HE/HEK pairs returned by hv_fetch_ent 212 * for tied hashes */ 213 214 void 215 Perl_free_tied_hv_pool(pTHX) 216 { 217 HE *he = PL_hv_fetch_ent_mh; 218 while (he) { 219 HE * const ohe = he; 220 Safefree(HeKEY_hek(he)); 221 he = HeNEXT(he); 222 del_HE(ohe); 223 } 224 PL_hv_fetch_ent_mh = NULL; 225 } 226 227 #if defined(USE_ITHREADS) 228 HEK * 229 Perl_hek_dup(pTHX_ HEK *source, CLONE_PARAMS* param) 230 { 231 HEK *shared; 232 233 PERL_ARGS_ASSERT_HEK_DUP; 234 PERL_UNUSED_ARG(param); 235 236 if (!source) 237 return NULL; 238 239 shared = (HEK*)ptr_table_fetch(PL_ptr_table, source); 240 if (shared) { 241 /* We already shared this hash key. */ 242 (void)share_hek_hek(shared); 243 } 244 else { 245 shared 246 = share_hek_flags(HEK_KEY(source), HEK_LEN(source), 247 HEK_HASH(source), HEK_FLAGS(source)); 248 ptr_table_store(PL_ptr_table, source, shared); 249 } 250 return shared; 251 } 252 253 HE * 254 Perl_he_dup(pTHX_ const HE *e, bool shared, CLONE_PARAMS* param) 255 { 256 HE *ret; 257 258 PERL_ARGS_ASSERT_HE_DUP; 259 260 /* All the *_dup functions are deemed to be API, despite most being deeply 261 tied to the internals. Hence we can't simply remove the parameter 262 "shared" from this function. */ 263 /* sv_dup and sv_dup_inc seem to be the only two that are used by XS code. 264 Probably the others should be dropped from the API. See #19409 */ 265 PERL_UNUSED_ARG(shared); 266 267 if (!e) 268 return NULL; 269 /* look for it in the table first */ 270 ret = (HE*)ptr_table_fetch(PL_ptr_table, e); 271 if (ret) 272 return ret; 273 274 /* create anew and remember what it is */ 275 ret = new_HE(); 276 ptr_table_store(PL_ptr_table, e, ret); 277 278 if (HeKLEN(e) == HEf_SVKEY) { 279 char *k; 280 Newx(k, HEK_BASESIZE + sizeof(const SV *), char); 281 HeKEY_hek(ret) = (HEK*)k; 282 HeKEY_sv(ret) = sv_dup_inc(HeKEY_sv(e), param); 283 } 284 else if (!(HeKFLAGS(e) & HVhek_NOTSHARED)) { 285 /* This is hek_dup inlined, which seems to be important for speed 286 reasons. */ 287 HEK * const source = HeKEY_hek(e); 288 HEK *shared = (HEK*)ptr_table_fetch(PL_ptr_table, source); 289 290 if (shared) { 291 /* We already shared this hash key. */ 292 (void)share_hek_hek(shared); 293 } 294 else { 295 shared 296 = share_hek_flags(HEK_KEY(source), HEK_LEN(source), 297 HEK_HASH(source), HEK_FLAGS(source)); 298 ptr_table_store(PL_ptr_table, source, shared); 299 } 300 HeKEY_hek(ret) = shared; 301 } 302 else 303 HeKEY_hek(ret) = save_hek_flags(HeKEY(e), HeKLEN(e), HeHASH(e), 304 HeKFLAGS(e)); 305 HeVAL(ret) = sv_dup_inc(HeVAL(e), param); 306 307 HeNEXT(ret) = he_dup(HeNEXT(e), FALSE, param); 308 return ret; 309 } 310 #endif /* USE_ITHREADS */ 311 312 static void 313 S_hv_notallowed(pTHX_ int flags, const char *key, I32 klen, 314 const char *msg) 315 { 316 /* Straight to SVt_PVN here, as needed by sv_setpvn_fresh and 317 * sv_usepvn would otherwise call it */ 318 SV * const sv = newSV_type_mortal(SVt_PV); 319 320 PERL_ARGS_ASSERT_HV_NOTALLOWED; 321 322 if (!(flags & HVhek_FREEKEY)) { 323 sv_setpvn_fresh(sv, key, klen); 324 } 325 else { 326 /* Need to free saved eventually assign to mortal SV */ 327 /* XXX is this line an error ???: SV *sv = sv_newmortal(); */ 328 sv_usepvn(sv, (char *) key, klen); 329 } 330 if (flags & HVhek_UTF8) { 331 SvUTF8_on(sv); 332 } 333 Perl_croak(aTHX_ msg, SVfARG(sv)); 334 } 335 336 /* (klen == HEf_SVKEY) is special for MAGICAL hv entries, meaning key slot 337 * contains an SV* */ 338 339 /* 340 =for apidoc hv_store 341 342 Stores an SV in a hash. The hash key is specified as C<key> and the 343 absolute value of C<klen> is the length of the key. If C<klen> is 344 negative the key is assumed to be in UTF-8-encoded Unicode. The 345 C<hash> parameter is the precomputed hash value; if it is zero then 346 Perl will compute it. 347 348 The return value will be 349 C<NULL> if the operation failed or if the value did not need to be actually 350 stored within the hash (as in the case of tied hashes). Otherwise it can 351 be dereferenced to get the original C<SV*>. Note that the caller is 352 responsible for suitably incrementing the reference count of C<val> before 353 the call, and decrementing it if the function returned C<NULL>. Effectively 354 a successful C<hv_store> takes ownership of one reference to C<val>. This is 355 usually what you want; a newly created SV has a reference count of one, so 356 if all your code does is create SVs then store them in a hash, C<hv_store> 357 will own the only reference to the new SV, and your code doesn't need to do 358 anything further to tidy up. C<hv_store> is not implemented as a call to 359 C<hv_store_ent>, and does not create a temporary SV for the key, so if your 360 key data is not already in SV form then use C<hv_store> in preference to 361 C<hv_store_ent>. 362 363 See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more 364 information on how to use this function on tied hashes. 365 366 =for apidoc hv_store_ent 367 368 Stores C<val> in a hash. The hash key is specified as C<key>. The C<hash> 369 parameter is the precomputed hash value; if it is zero then Perl will 370 compute it. The return value is the new hash entry so created. It will be 371 C<NULL> if the operation failed or if the value did not need to be actually 372 stored within the hash (as in the case of tied hashes). Otherwise the 373 contents of the return value can be accessed using the C<He?> macros 374 described here. Note that the caller is responsible for suitably 375 incrementing the reference count of C<val> before the call, and 376 decrementing it if the function returned NULL. Effectively a successful 377 C<hv_store_ent> takes ownership of one reference to C<val>. This is 378 usually what you want; a newly created SV has a reference count of one, so 379 if all your code does is create SVs then store them in a hash, C<hv_store> 380 will own the only reference to the new SV, and your code doesn't need to do 381 anything further to tidy up. Note that C<hv_store_ent> only reads the C<key>; 382 unlike C<val> it does not take ownership of it, so maintaining the correct 383 reference count on C<key> is entirely the caller's responsibility. The reason 384 it does not take ownership, is that C<key> is not used after this function 385 returns, and so can be freed immediately. C<hv_store> 386 is not implemented as a call to C<hv_store_ent>, and does not create a temporary 387 SV for the key, so if your key data is not already in SV form then use 388 C<hv_store> in preference to C<hv_store_ent>. 389 390 See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more 391 information on how to use this function on tied hashes. 392 393 =for apidoc hv_exists 394 395 Returns a boolean indicating whether the specified hash key exists. The 396 absolute value of C<klen> is the length of the key. If C<klen> is 397 negative the key is assumed to be in UTF-8-encoded Unicode. 398 399 =for apidoc hv_fetch 400 401 Returns the SV which corresponds to the specified key in the hash. 402 The absolute value of C<klen> is the length of the key. If C<klen> is 403 negative the key is assumed to be in UTF-8-encoded Unicode. If 404 C<lval> is set then the fetch will be part of a store. This means that if 405 there is no value in the hash associated with the given key, then one is 406 created and a pointer to it is returned. The C<SV*> it points to can be 407 assigned to. But always check that the 408 return value is non-null before dereferencing it to an C<SV*>. 409 410 See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more 411 information on how to use this function on tied hashes. 412 413 =for apidoc hv_exists_ent 414 415 Returns a boolean indicating whether 416 the specified hash key exists. C<hash> 417 can be a valid precomputed hash value, or 0 to ask for it to be 418 computed. 419 420 =cut 421 */ 422 423 /* returns an HE * structure with the all fields set */ 424 /* note that hent_val will be a mortal sv for MAGICAL hashes */ 425 /* 426 =for apidoc hv_fetch_ent 427 428 Returns the hash entry which corresponds to the specified key in the hash. 429 C<hash> must be a valid precomputed hash number for the given C<key>, or 0 430 if you want the function to compute it. IF C<lval> is set then the fetch 431 will be part of a store. Make sure the return value is non-null before 432 accessing it. The return value when C<hv> is a tied hash is a pointer to a 433 static location, so be sure to make a copy of the structure if you need to 434 store it somewhere. 435 436 See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more 437 information on how to use this function on tied hashes. 438 439 =cut 440 */ 441 442 /* Common code for hv_delete()/hv_exists()/hv_fetch()/hv_store() */ 443 void * 444 Perl_hv_common_key_len(pTHX_ HV *hv, const char *key, I32 klen_i32, 445 const int action, SV *val, const U32 hash) 446 { 447 STRLEN klen; 448 int flags; 449 450 PERL_ARGS_ASSERT_HV_COMMON_KEY_LEN; 451 452 if (klen_i32 < 0) { 453 klen = -klen_i32; 454 flags = HVhek_UTF8; 455 } else { 456 klen = klen_i32; 457 flags = 0; 458 } 459 return hv_common(hv, NULL, key, klen, flags, action, val, hash); 460 } 461 462 void * 463 Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, 464 int flags, int action, SV *val, U32 hash) 465 { 466 XPVHV* xhv; 467 HE *entry; 468 HE **oentry; 469 SV *sv; 470 bool is_utf8; 471 bool in_collision; 472 const int return_svp = action & HV_FETCH_JUST_SV; 473 HEK *keysv_hek = NULL; 474 475 if (!hv) 476 return NULL; 477 if (SvTYPE(hv) == (svtype)SVTYPEMASK) 478 return NULL; 479 480 assert(SvTYPE(hv) == SVt_PVHV); 481 482 if (SvSMAGICAL(hv) && SvGMAGICAL(hv) && !(action & HV_DISABLE_UVAR_XKEY)) { 483 MAGIC* mg; 484 if ((mg = mg_find((const SV *)hv, PERL_MAGIC_uvar))) { 485 struct ufuncs * const uf = (struct ufuncs *)mg->mg_ptr; 486 if (uf->uf_set == NULL) { 487 SV* obj = mg->mg_obj; 488 489 if (!keysv) { 490 keysv = newSVpvn_flags(key, klen, SVs_TEMP | 491 ((flags & HVhek_UTF8) 492 ? SVf_UTF8 : 0)); 493 } 494 495 mg->mg_obj = keysv; /* pass key */ 496 uf->uf_index = action; /* pass action */ 497 magic_getuvar(MUTABLE_SV(hv), mg); 498 keysv = mg->mg_obj; /* may have changed */ 499 mg->mg_obj = obj; 500 501 /* If the key may have changed, then we need to invalidate 502 any passed-in computed hash value. */ 503 hash = 0; 504 } 505 } 506 } 507 508 /* flags might have HVhek_NOTSHARED set. If so, we need to ignore that. 509 Some callers to hv_common() pass the flags value from an existing HEK, 510 and if that HEK is not shared, then it has the relevant flag bit set, 511 which must not be passed into share_hek_flags(). 512 513 It would be "purer" to insist that all callers clear it, but we'll end up 514 with subtle bugs if we leave it to them, or runtime assertion failures if 515 we try to enforce our documentation with landmines. 516 517 If keysv is true, all code paths assign a new value to flags with that 518 bit clear, so we're always "good". Hence we only need to explicitly clear 519 this bit in the else block. */ 520 if (keysv) { 521 if (flags & HVhek_FREEKEY) 522 Safefree(key); 523 key = SvPV_const(keysv, klen); 524 is_utf8 = (SvUTF8(keysv) != 0); 525 if (SvIsCOW_shared_hash(keysv)) { 526 flags = HVhek_KEYCANONICAL | (is_utf8 ? HVhek_UTF8 : 0); 527 } else { 528 flags = 0; 529 } 530 } else { 531 is_utf8 = cBOOL(flags & HVhek_UTF8); 532 flags &= ~HVhek_NOTSHARED; 533 } 534 535 if (action & HV_DELETE) { 536 return (void *) hv_delete_common(hv, keysv, key, klen, 537 flags | (is_utf8 ? HVhek_UTF8 : 0), 538 action, hash); 539 } 540 541 xhv = (XPVHV*)SvANY(hv); 542 if (SvMAGICAL(hv)) { 543 if (SvRMAGICAL(hv) && !(action & (HV_FETCH_ISSTORE|HV_FETCH_ISEXISTS))) { 544 if (mg_find((const SV *)hv, PERL_MAGIC_tied) 545 || SvGMAGICAL((const SV *)hv)) 546 { 547 /* FIXME should be able to skimp on the HE/HEK here when 548 HV_FETCH_JUST_SV is true. */ 549 if (!keysv) { 550 keysv = newSVpvn_utf8(key, klen, is_utf8); 551 } else { 552 keysv = newSVsv(keysv); 553 } 554 sv = sv_newmortal(); 555 mg_copy(MUTABLE_SV(hv), sv, (char *)keysv, HEf_SVKEY); 556 557 /* grab a fake HE/HEK pair from the pool or make a new one */ 558 entry = PL_hv_fetch_ent_mh; 559 if (entry) 560 PL_hv_fetch_ent_mh = HeNEXT(entry); 561 else { 562 char *k; 563 entry = new_HE(); 564 Newx(k, HEK_BASESIZE + sizeof(const SV *), char); 565 HeKEY_hek(entry) = (HEK*)k; 566 } 567 HeNEXT(entry) = NULL; 568 HeSVKEY_set(entry, keysv); 569 HeVAL(entry) = sv; 570 sv_upgrade(sv, SVt_PVLV); 571 LvTYPE(sv) = 'T'; 572 /* so we can free entry when freeing sv */ 573 LvTARG(sv) = MUTABLE_SV(entry); 574 575 /* XXX remove at some point? */ 576 if (flags & HVhek_FREEKEY) 577 Safefree(key); 578 579 if (return_svp) { 580 return entry ? (void *) &HeVAL(entry) : NULL; 581 } 582 return (void *) entry; 583 } 584 #ifdef ENV_IS_CASELESS 585 else if (mg_find((const SV *)hv, PERL_MAGIC_env)) { 586 U32 i; 587 for (i = 0; i < klen; ++i) 588 if (isLOWER(key[i])) { 589 /* Would be nice if we had a routine to do the 590 copy and upercase in a single pass through. */ 591 const char * const nkey = strupr(savepvn(key,klen)); 592 /* Note that this fetch is for nkey (the uppercased 593 key) whereas the store is for key (the original) */ 594 void *result = hv_common(hv, NULL, nkey, klen, 595 HVhek_FREEKEY, /* free nkey */ 596 0 /* non-LVAL fetch */ 597 | HV_DISABLE_UVAR_XKEY 598 | return_svp, 599 NULL /* no value */, 600 0 /* compute hash */); 601 if (!result && (action & HV_FETCH_LVALUE)) { 602 /* This call will free key if necessary. 603 Do it this way to encourage compiler to tail 604 call optimise. */ 605 result = hv_common(hv, keysv, key, klen, flags, 606 HV_FETCH_ISSTORE 607 | HV_DISABLE_UVAR_XKEY 608 | return_svp, 609 newSV_type(SVt_NULL), hash); 610 } else { 611 if (flags & HVhek_FREEKEY) 612 Safefree(key); 613 } 614 return result; 615 } 616 } 617 #endif 618 } /* ISFETCH */ 619 else if (SvRMAGICAL(hv) && (action & HV_FETCH_ISEXISTS)) { 620 if (mg_find((const SV *)hv, PERL_MAGIC_tied) 621 || SvGMAGICAL((const SV *)hv)) { 622 /* I don't understand why hv_exists_ent has svret and sv, 623 whereas hv_exists only had one. */ 624 SV * const svret = sv_newmortal(); 625 sv = sv_newmortal(); 626 627 if (keysv || is_utf8) { 628 if (!keysv) { 629 keysv = newSVpvn_utf8(key, klen, TRUE); 630 } else { 631 keysv = newSVsv(keysv); 632 } 633 mg_copy(MUTABLE_SV(hv), sv, (char *)sv_2mortal(keysv), HEf_SVKEY); 634 } else { 635 mg_copy(MUTABLE_SV(hv), sv, key, klen); 636 } 637 if (flags & HVhek_FREEKEY) 638 Safefree(key); 639 { 640 MAGIC * const mg = mg_find(sv, PERL_MAGIC_tiedelem); 641 if (mg) 642 magic_existspack(svret, mg); 643 } 644 /* This cast somewhat evil, but I'm merely using NULL/ 645 not NULL to return the boolean exists. 646 And I know hv is not NULL. */ 647 return SvTRUE_NN(svret) ? (void *)hv : NULL; 648 } 649 #ifdef ENV_IS_CASELESS 650 else if (mg_find((const SV *)hv, PERL_MAGIC_env)) { 651 /* XXX This code isn't UTF8 clean. */ 652 char * const keysave = (char * const)key; 653 /* Will need to free this, so set FREEKEY flag. */ 654 key = savepvn(key,klen); 655 key = (const char*)strupr((char*)key); 656 is_utf8 = FALSE; 657 hash = 0; 658 keysv = 0; 659 660 if (flags & HVhek_FREEKEY) { 661 Safefree(keysave); 662 } 663 flags |= HVhek_FREEKEY; 664 } 665 #endif 666 } /* ISEXISTS */ 667 else if (action & HV_FETCH_ISSTORE) { 668 bool needs_copy; 669 bool needs_store; 670 hv_magic_check (hv, &needs_copy, &needs_store); 671 if (needs_copy) { 672 const bool save_taint = TAINT_get; 673 if (keysv || is_utf8) { 674 if (!keysv) { 675 keysv = newSVpvn_utf8(key, klen, TRUE); 676 } 677 if (TAINTING_get) 678 TAINT_set(SvTAINTED(keysv)); 679 keysv = sv_2mortal(newSVsv(keysv)); 680 mg_copy(MUTABLE_SV(hv), val, (char*)keysv, HEf_SVKEY); 681 } else { 682 mg_copy(MUTABLE_SV(hv), val, key, klen); 683 } 684 685 TAINT_IF(save_taint); 686 #ifdef NO_TAINT_SUPPORT 687 PERL_UNUSED_VAR(save_taint); 688 #endif 689 if (!needs_store) { 690 if (flags & HVhek_FREEKEY) 691 Safefree(key); 692 return NULL; 693 } 694 #ifdef ENV_IS_CASELESS 695 else if (mg_find((const SV *)hv, PERL_MAGIC_env)) { 696 /* XXX This code isn't UTF8 clean. */ 697 const char *keysave = key; 698 /* Will need to free this, so set FREEKEY flag. */ 699 key = savepvn(key,klen); 700 key = (const char*)strupr((char*)key); 701 is_utf8 = FALSE; 702 hash = 0; 703 keysv = 0; 704 705 if (flags & HVhek_FREEKEY) { 706 Safefree(keysave); 707 } 708 flags |= HVhek_FREEKEY; 709 } 710 #endif 711 } 712 } /* ISSTORE */ 713 } /* SvMAGICAL */ 714 715 if (!HvARRAY(hv)) { 716 if ((action & (HV_FETCH_LVALUE | HV_FETCH_ISSTORE)) 717 #ifdef DYNAMIC_ENV_FETCH /* if it's an %ENV lookup, we may get it on the fly */ 718 || (SvRMAGICAL((const SV *)hv) 719 && mg_find((const SV *)hv, PERL_MAGIC_env)) 720 #endif 721 ) { 722 char *array; 723 Newxz(array, 724 PERL_HV_ARRAY_ALLOC_BYTES(xhv->xhv_max+1 /* HvMAX(hv)+1 */), 725 char); 726 HvARRAY(hv) = (HE**)array; 727 } 728 #ifdef DYNAMIC_ENV_FETCH 729 else if (action & HV_FETCH_ISEXISTS) { 730 /* for an %ENV exists, if we do an insert it's by a recursive 731 store call, so avoid creating HvARRAY(hv) right now. */ 732 } 733 #endif 734 else { 735 /* XXX remove at some point? */ 736 if (flags & HVhek_FREEKEY) 737 Safefree(key); 738 739 return NULL; 740 } 741 } 742 743 if (is_utf8 && !(flags & HVhek_KEYCANONICAL)) { 744 char * const keysave = (char *)key; 745 key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8); 746 if (is_utf8) 747 flags |= HVhek_UTF8; 748 else 749 flags &= ~HVhek_UTF8; 750 if (key != keysave) { 751 if (flags & HVhek_FREEKEY) 752 Safefree(keysave); 753 flags |= HVhek_WASUTF8 | HVhek_FREEKEY; 754 /* If the caller calculated a hash, it was on the sequence of 755 octets that are the UTF-8 form. We've now changed the sequence 756 of octets stored to that of the equivalent byte representation, 757 so the hash we need is different. */ 758 hash = 0; 759 } 760 } 761 762 if (keysv && (SvIsCOW_shared_hash(keysv))) { 763 if (HvSHAREKEYS(hv)) 764 keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv)); 765 hash = SvSHARED_HASH(keysv); 766 } 767 else if (!hash) 768 PERL_HASH(hash, key, klen); 769 770 #ifdef DYNAMIC_ENV_FETCH 771 if (!HvARRAY(hv)) entry = NULL; 772 else 773 #endif 774 { 775 entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)]; 776 } 777 778 if (!entry) 779 goto not_found; 780 781 if (keysv_hek) { 782 /* keysv is actually a HEK in disguise, so we can match just by 783 * comparing the HEK pointers in the HE chain. There is a slight 784 * caveat: on something like "\x80", which has both plain and utf8 785 * representations, perl's hashes do encoding-insensitive lookups, 786 * but preserve the encoding of the stored key. Thus a particular 787 * key could map to two different HEKs in PL_strtab. We only 788 * conclude 'not found' if all the flags are the same; otherwise 789 * we fall back to a full search (this should only happen in rare 790 * cases). 791 */ 792 int keysv_flags = HEK_FLAGS(keysv_hek); 793 HE *orig_entry = entry; 794 795 for (; entry; entry = HeNEXT(entry)) { 796 HEK *hek = HeKEY_hek(entry); 797 if (hek == keysv_hek) 798 goto found; 799 if (HEK_FLAGS(hek) != keysv_flags) 800 break; /* need to do full match */ 801 } 802 if (!entry) 803 goto not_found; 804 /* failed on shortcut - do full search loop */ 805 entry = orig_entry; 806 } 807 808 for (; entry; entry = HeNEXT(entry)) { 809 if (HeHASH(entry) != hash) /* strings can't be equal */ 810 continue; 811 if (HeKLEN(entry) != (I32)klen) 812 continue; 813 if (memNE(HeKEY(entry),key,klen)) /* is this it? */ 814 continue; 815 if ((HeKFLAGS(entry) ^ flags) & HVhek_UTF8) 816 continue; 817 818 found: 819 if (action & (HV_FETCH_LVALUE|HV_FETCH_ISSTORE)) { 820 if ((HeKFLAGS(entry) ^ flags) & HVhek_WASUTF8) { 821 /* We match if HVhek_UTF8 bit in our flags and hash key's 822 match. But if entry was set previously with HVhek_WASUTF8 823 and key now doesn't (or vice versa) then we should change 824 the key's flag, as this is assignment. */ 825 if ((HeKFLAGS(entry) & HVhek_NOTSHARED) == 0) { 826 /* Need to swap the key we have for a key with the flags we 827 need. As keys are shared we can't just write to the 828 flag, so we share the new one, unshare the old one. */ 829 HEK * const new_hek 830 = share_hek_flags(key, klen, hash, flags & ~HVhek_FREEKEY); 831 unshare_hek (HeKEY_hek(entry)); 832 HeKEY_hek(entry) = new_hek; 833 } 834 else if (hv == PL_strtab) { 835 /* PL_strtab is usually the only hash without HvSHAREKEYS, 836 so putting this test here is cheap */ 837 if (flags & HVhek_FREEKEY) 838 Safefree(key); 839 Perl_croak(aTHX_ S_strtab_error, 840 action & HV_FETCH_LVALUE ? "fetch" : "store"); 841 } 842 else { 843 /* Effectively this is save_hek_flags() for a new version 844 of the HEK and Safefree() of the old rolled together. */ 845 HeKFLAGS(entry) ^= HVhek_WASUTF8; 846 } 847 if (flags & HVhek_ENABLEHVKFLAGS) 848 HvHASKFLAGS_on(hv); 849 } 850 if (HeVAL(entry) == &PL_sv_placeholder) { 851 /* yes, can store into placeholder slot */ 852 if (action & HV_FETCH_LVALUE) { 853 if (SvMAGICAL(hv)) { 854 /* This preserves behaviour with the old hv_fetch 855 implementation which at this point would bail out 856 with a break; (at "if we find a placeholder, we 857 pretend we haven't found anything") 858 859 That break mean that if a placeholder were found, it 860 caused a call into hv_store, which in turn would 861 check magic, and if there is no magic end up pretty 862 much back at this point (in hv_store's code). */ 863 break; 864 } 865 /* LVAL fetch which actually needs a store. */ 866 val = newSV_type(SVt_NULL); 867 HvPLACEHOLDERS(hv)--; 868 } else { 869 /* store */ 870 if (val != &PL_sv_placeholder) 871 HvPLACEHOLDERS(hv)--; 872 } 873 HeVAL(entry) = val; 874 } else if (action & HV_FETCH_ISSTORE) { 875 SvREFCNT_dec(HeVAL(entry)); 876 HeVAL(entry) = val; 877 } 878 } else if (HeVAL(entry) == &PL_sv_placeholder) { 879 /* if we find a placeholder, we pretend we haven't found 880 anything */ 881 break; 882 } 883 if (flags & HVhek_FREEKEY) 884 Safefree(key); 885 if (return_svp) { 886 return (void *) &HeVAL(entry); 887 } 888 return entry; 889 } 890 891 not_found: 892 #ifdef DYNAMIC_ENV_FETCH /* %ENV lookup? If so, try to fetch the value now */ 893 if (!(action & HV_FETCH_ISSTORE) 894 && SvRMAGICAL((const SV *)hv) 895 && mg_find((const SV *)hv, PERL_MAGIC_env)) { 896 unsigned long len; 897 const char * const env = PerlEnv_ENVgetenv_len(key,&len); 898 if (env) { 899 sv = newSVpvn(env,len); 900 SvTAINTED_on(sv); 901 return hv_common(hv, keysv, key, klen, flags, 902 HV_FETCH_ISSTORE|HV_DISABLE_UVAR_XKEY|return_svp, 903 sv, hash); 904 } 905 } 906 #endif 907 908 if (!entry && SvREADONLY(hv) && !(action & HV_FETCH_ISEXISTS)) { 909 hv_notallowed(flags, key, klen, 910 "Attempt to access disallowed key '%" SVf "' in" 911 " a restricted hash"); 912 } 913 if (!(action & (HV_FETCH_LVALUE|HV_FETCH_ISSTORE))) { 914 /* Not doing some form of store, so return failure. */ 915 if (flags & HVhek_FREEKEY) 916 Safefree(key); 917 return NULL; 918 } 919 if (action & HV_FETCH_LVALUE) { 920 val = action & HV_FETCH_EMPTY_HE ? NULL : newSV_type(SVt_NULL); 921 if (SvMAGICAL(hv)) { 922 /* At this point the old hv_fetch code would call to hv_store, 923 which in turn might do some tied magic. So we need to make that 924 magic check happen. */ 925 /* gonna assign to this, so it better be there */ 926 /* If a fetch-as-store fails on the fetch, then the action is to 927 recurse once into "hv_store". If we didn't do this, then that 928 recursive call would call the key conversion routine again. 929 However, as we replace the original key with the converted 930 key, this would result in a double conversion, which would show 931 up as a bug if the conversion routine is not idempotent. 932 Hence the use of HV_DISABLE_UVAR_XKEY. */ 933 return hv_common(hv, keysv, key, klen, flags, 934 HV_FETCH_ISSTORE|HV_DISABLE_UVAR_XKEY|return_svp, 935 val, hash); 936 /* XXX Surely that could leak if the fetch-was-store fails? 937 Just like the hv_fetch. */ 938 } 939 } 940 941 /* Welcome to hv_store... */ 942 943 if (!HvARRAY(hv)) { 944 /* Not sure if we can get here. I think the only case of oentry being 945 NULL is for %ENV with dynamic env fetch. But that should disappear 946 with magic in the previous code. */ 947 char *array; 948 Newxz(array, 949 PERL_HV_ARRAY_ALLOC_BYTES(xhv->xhv_max+1 /* HvMAX(hv)+1 */), 950 char); 951 HvARRAY(hv) = (HE**)array; 952 } 953 954 oentry = &(HvARRAY(hv))[hash & (I32) xhv->xhv_max]; 955 956 /* share_hek_flags will do the free for us. This might be considered 957 bad API design. */ 958 if (LIKELY(HvSHAREKEYS(hv))) { 959 entry = new_HE(); 960 HeKEY_hek(entry) = share_hek_flags(key, klen, hash, flags); 961 } 962 else if (UNLIKELY(hv == PL_strtab)) { 963 /* PL_strtab is usually the only hash without HvSHAREKEYS, so putting 964 this test here is cheap */ 965 if (flags & HVhek_FREEKEY) 966 Safefree(key); 967 Perl_croak(aTHX_ S_strtab_error, 968 action & HV_FETCH_LVALUE ? "fetch" : "store"); 969 } 970 else { 971 /* gotta do the real thing */ 972 entry = new_HE(); 973 HeKEY_hek(entry) = save_hek_flags(key, klen, hash, flags); 974 } 975 HeVAL(entry) = val; 976 in_collision = cBOOL(*oentry != NULL); 977 978 979 #ifdef PERL_HASH_RANDOMIZE_KEYS 980 /* This logic semi-randomizes the insert order in a bucket. 981 * Either we insert into the top, or the slot below the top, 982 * making it harder to see if there is a collision. We also 983 * reset the iterator randomizer if there is one. 984 */ 985 986 987 if ( *oentry && PL_HASH_RAND_BITS_ENABLED) { 988 UPDATE_HASH_RAND_BITS_KEY(key,klen); 989 if ( PL_hash_rand_bits & 1 ) { 990 HeNEXT(entry) = HeNEXT(*oentry); 991 HeNEXT(*oentry) = entry; 992 } else { 993 HeNEXT(entry) = *oentry; 994 *oentry = entry; 995 } 996 } else 997 #endif 998 { 999 HeNEXT(entry) = *oentry; 1000 *oentry = entry; 1001 } 1002 #ifdef PERL_HASH_RANDOMIZE_KEYS 1003 if (SvOOK(hv)) { 1004 /* Currently this makes various tests warn in annoying ways. 1005 * So Silenced for now. - Yves | bogus end of comment =>* / 1006 if (HvAUX(hv)->xhv_riter != -1) { 1007 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), 1008 "[TESTING] Inserting into a hash during each() traversal results in undefined behavior" 1009 pTHX__FORMAT 1010 pTHX__VALUE); 1011 } 1012 */ 1013 MAYBE_UPDATE_HASH_RAND_BITS_KEY(key,klen); 1014 HvAUX(hv)->xhv_rand= (U32)PL_hash_rand_bits; 1015 } 1016 #endif 1017 1018 if (val == &PL_sv_placeholder) 1019 HvPLACEHOLDERS(hv)++; 1020 if (flags & HVhek_ENABLEHVKFLAGS) 1021 HvHASKFLAGS_on(hv); 1022 1023 xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */ 1024 if ( in_collision && DO_HSPLIT(xhv) ) { 1025 const STRLEN oldsize = xhv->xhv_max + 1; 1026 const U32 items = (U32)HvPLACEHOLDERS_get(hv); 1027 1028 if (items /* hash has placeholders */ 1029 && !SvREADONLY(hv) /* but is not a restricted hash */) { 1030 /* If this hash previously was a "restricted hash" and had 1031 placeholders, but the "restricted" flag has been turned off, 1032 then the placeholders no longer serve any useful purpose. 1033 However, they have the downsides of taking up RAM, and adding 1034 extra steps when finding used values. It's safe to clear them 1035 at this point, even though Storable rebuilds restricted hashes by 1036 putting in all the placeholders (first) before turning on the 1037 readonly flag, because Storable always pre-splits the hash. 1038 If we're lucky, then we may clear sufficient placeholders to 1039 avoid needing to split the hash at all. */ 1040 clear_placeholders(hv, items); 1041 if (DO_HSPLIT(xhv)) 1042 hsplit(hv, oldsize, oldsize * 2); 1043 } else 1044 hsplit(hv, oldsize, oldsize * 2); 1045 } 1046 1047 if (return_svp) { 1048 return entry ? (void *) &HeVAL(entry) : NULL; 1049 } 1050 return (void *) entry; 1051 } 1052 1053 STATIC void 1054 S_hv_magic_check(HV *hv, bool *needs_copy, bool *needs_store) 1055 { 1056 const MAGIC *mg = SvMAGIC(hv); 1057 1058 PERL_ARGS_ASSERT_HV_MAGIC_CHECK; 1059 1060 *needs_copy = FALSE; 1061 *needs_store = TRUE; 1062 while (mg) { 1063 if (isUPPER(mg->mg_type)) { 1064 *needs_copy = TRUE; 1065 if (mg->mg_type == PERL_MAGIC_tied) { 1066 *needs_store = FALSE; 1067 return; /* We've set all there is to set. */ 1068 } 1069 } 1070 mg = mg->mg_moremagic; 1071 } 1072 } 1073 1074 /* 1075 =for apidoc hv_scalar 1076 1077 Evaluates the hash in scalar context and returns the result. 1078 1079 When the hash is tied dispatches through to the SCALAR method, 1080 otherwise returns a mortal SV containing the number of keys 1081 in the hash. 1082 1083 Note, prior to 5.25 this function returned what is now 1084 returned by the hv_bucket_ratio() function. 1085 1086 =cut 1087 */ 1088 1089 SV * 1090 Perl_hv_scalar(pTHX_ HV *hv) 1091 { 1092 SV *sv; 1093 UV u; 1094 1095 PERL_ARGS_ASSERT_HV_SCALAR; 1096 1097 if (SvRMAGICAL(hv)) { 1098 MAGIC * const mg = mg_find((const SV *)hv, PERL_MAGIC_tied); 1099 if (mg) 1100 return magic_scalarpack(hv, mg); 1101 } 1102 1103 sv = newSV_type_mortal(SVt_IV); 1104 1105 /* Inlined sv_setuv(sv, HvUSEDKEYS(hv)) follows:*/ 1106 u = HvUSEDKEYS(hv); 1107 1108 if (u <= (UV)IV_MAX) { 1109 SvIV_set(sv, (IV)u); 1110 (void)SvIOK_only(sv); 1111 SvTAINT(sv); 1112 } else { 1113 SvIV_set(sv, 0); 1114 SvUV_set(sv, u); 1115 (void)SvIOK_only_UV(sv); 1116 SvTAINT(sv); 1117 } 1118 1119 return sv; 1120 } 1121 1122 1123 /* 1124 hv_pushkv(): push all the keys and/or values of a hash onto the stack. 1125 The rough Perl equivalents: 1126 () = %hash; 1127 () = keys %hash; 1128 () = values %hash; 1129 1130 Resets the hash's iterator. 1131 1132 flags : 1 = push keys 1133 2 = push values 1134 1|2 = push keys and values 1135 XXX use symbolic flag constants at some point? 1136 I might unroll the non-tied hv_iternext() in here at some point - DAPM 1137 */ 1138 1139 void 1140 Perl_hv_pushkv(pTHX_ HV *hv, U32 flags) 1141 { 1142 HE *entry; 1143 bool tied = SvRMAGICAL(hv) && (mg_find(MUTABLE_SV(hv), PERL_MAGIC_tied) 1144 #ifdef DYNAMIC_ENV_FETCH /* might not know number of keys yet */ 1145 || mg_find(MUTABLE_SV(hv), PERL_MAGIC_env) 1146 #endif 1147 ); 1148 dSP; 1149 1150 PERL_ARGS_ASSERT_HV_PUSHKV; 1151 assert(flags); /* must be pushing at least one of keys and values */ 1152 1153 (void)hv_iterinit(hv); 1154 1155 if (tied) { 1156 SSize_t ext = (flags == 3) ? 2 : 1; 1157 while ((entry = hv_iternext(hv))) { 1158 EXTEND(SP, ext); 1159 if (flags & 1) 1160 PUSHs(hv_iterkeysv(entry)); 1161 if (flags & 2) 1162 PUSHs(hv_iterval(hv, entry)); 1163 } 1164 } 1165 else { 1166 Size_t nkeys = HvUSEDKEYS(hv); 1167 SSize_t ext; 1168 1169 if (!nkeys) 1170 return; 1171 1172 /* 2*nkeys() should never be big enough to truncate or wrap */ 1173 assert(nkeys <= (SSize_t_MAX >> 1)); 1174 ext = nkeys * ((flags == 3) ? 2 : 1); 1175 1176 EXTEND_MORTAL(nkeys); 1177 EXTEND(SP, ext); 1178 1179 while ((entry = hv_iternext(hv))) { 1180 if (flags & 1) { 1181 SV *keysv = newSVhek(HeKEY_hek(entry)); 1182 SvTEMP_on(keysv); 1183 PL_tmps_stack[++PL_tmps_ix] = keysv; 1184 PUSHs(keysv); 1185 } 1186 if (flags & 2) 1187 PUSHs(HeVAL(entry)); 1188 } 1189 } 1190 1191 PUTBACK; 1192 } 1193 1194 1195 /* 1196 =for apidoc hv_bucket_ratio 1197 1198 If the hash is tied dispatches through to the SCALAR tied method, 1199 otherwise if the hash contains no keys returns 0, otherwise returns 1200 a mortal sv containing a string specifying the number of used buckets, 1201 followed by a slash, followed by the number of available buckets. 1202 1203 This function is expensive, it must scan all of the buckets 1204 to determine which are used, and the count is NOT cached. 1205 In a large hash this could be a lot of buckets. 1206 1207 =cut 1208 */ 1209 1210 SV * 1211 Perl_hv_bucket_ratio(pTHX_ HV *hv) 1212 { 1213 SV *sv; 1214 1215 PERL_ARGS_ASSERT_HV_BUCKET_RATIO; 1216 1217 if (SvRMAGICAL(hv)) { 1218 MAGIC * const mg = mg_find((const SV *)hv, PERL_MAGIC_tied); 1219 if (mg) 1220 return magic_scalarpack(hv, mg); 1221 } 1222 1223 if (HvUSEDKEYS((HV *)hv)) { 1224 sv = sv_newmortal(); 1225 Perl_sv_setpvf(aTHX_ sv, "%ld/%ld", 1226 (long)HvFILL(hv), (long)HvMAX(hv) + 1); 1227 } 1228 else 1229 sv = &PL_sv_zero; 1230 1231 return sv; 1232 } 1233 1234 /* 1235 =for apidoc hv_delete 1236 1237 Deletes a key/value pair in the hash. The value's SV is removed from 1238 the hash, made mortal, and returned to the caller. The absolute 1239 value of C<klen> is the length of the key. If C<klen> is negative the 1240 key is assumed to be in UTF-8-encoded Unicode. The C<flags> value 1241 will normally be zero; if set to C<G_DISCARD> then C<NULL> will be returned. 1242 C<NULL> will also be returned if the key is not found. 1243 1244 =for apidoc hv_delete_ent 1245 1246 Deletes a key/value pair in the hash. The value SV is removed from the hash, 1247 made mortal, and returned to the caller. The C<flags> value will normally be 1248 zero; if set to C<G_DISCARD> then C<NULL> will be returned. C<NULL> will also 1249 be returned if the key is not found. C<hash> can be a valid precomputed hash 1250 value, or 0 to ask for it to be computed. 1251 1252 =cut 1253 */ 1254 1255 STATIC SV * 1256 S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, 1257 int k_flags, I32 d_flags, U32 hash) 1258 { 1259 XPVHV* xhv; 1260 HE *entry; 1261 HE **oentry; 1262 HE **first_entry; 1263 bool is_utf8 = cBOOL(k_flags & HVhek_UTF8); 1264 HEK *keysv_hek = NULL; 1265 U8 mro_changes = 0; /* 1 = isa; 2 = package moved */ 1266 SV *sv; 1267 GV *gv = NULL; 1268 HV *stash = NULL; 1269 1270 if (SvMAGICAL(hv)) { 1271 bool needs_copy; 1272 bool needs_store; 1273 hv_magic_check (hv, &needs_copy, &needs_store); 1274 1275 if (needs_copy) { 1276 SV *sv; 1277 entry = (HE *) hv_common(hv, keysv, key, klen, 1278 k_flags & ~HVhek_FREEKEY, 1279 HV_FETCH_LVALUE|HV_DISABLE_UVAR_XKEY, 1280 NULL, hash); 1281 sv = entry ? HeVAL(entry) : NULL; 1282 if (sv) { 1283 if (SvMAGICAL(sv)) { 1284 mg_clear(sv); 1285 } 1286 if (!needs_store) { 1287 if (mg_find(sv, PERL_MAGIC_tiedelem)) { 1288 /* No longer an element */ 1289 sv_unmagic(sv, PERL_MAGIC_tiedelem); 1290 return sv; 1291 } 1292 return NULL; /* element cannot be deleted */ 1293 } 1294 #ifdef ENV_IS_CASELESS 1295 else if (mg_find((const SV *)hv, PERL_MAGIC_env)) { 1296 /* XXX This code isn't UTF8 clean. */ 1297 keysv = newSVpvn_flags(key, klen, SVs_TEMP); 1298 if (k_flags & HVhek_FREEKEY) { 1299 Safefree(key); 1300 } 1301 key = strupr(SvPVX(keysv)); 1302 is_utf8 = 0; 1303 k_flags = 0; 1304 hash = 0; 1305 } 1306 #endif 1307 } 1308 } 1309 } 1310 xhv = (XPVHV*)SvANY(hv); 1311 if (!HvTOTALKEYS(hv)) 1312 return NULL; 1313 1314 if (is_utf8 && !(k_flags & HVhek_KEYCANONICAL)) { 1315 const char * const keysave = key; 1316 key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8); 1317 1318 if (is_utf8) 1319 k_flags |= HVhek_UTF8; 1320 else 1321 k_flags &= ~HVhek_UTF8; 1322 if (key != keysave) { 1323 if (k_flags & HVhek_FREEKEY) { 1324 /* This shouldn't happen if our caller does what we expect, 1325 but strictly the API allows it. */ 1326 Safefree(keysave); 1327 } 1328 k_flags |= HVhek_WASUTF8 | HVhek_FREEKEY; 1329 } 1330 } 1331 1332 if (keysv && (SvIsCOW_shared_hash(keysv))) { 1333 if (HvSHAREKEYS(hv)) 1334 keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv)); 1335 hash = SvSHARED_HASH(keysv); 1336 } 1337 else if (!hash) 1338 PERL_HASH(hash, key, klen); 1339 1340 first_entry = oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; 1341 entry = *oentry; 1342 1343 if (!entry) 1344 goto not_found; 1345 1346 if (keysv_hek) { 1347 /* keysv is actually a HEK in disguise, so we can match just by 1348 * comparing the HEK pointers in the HE chain. There is a slight 1349 * caveat: on something like "\x80", which has both plain and utf8 1350 * representations, perl's hashes do encoding-insensitive lookups, 1351 * but preserve the encoding of the stored key. Thus a particular 1352 * key could map to two different HEKs in PL_strtab. We only 1353 * conclude 'not found' if all the flags are the same; otherwise 1354 * we fall back to a full search (this should only happen in rare 1355 * cases). 1356 */ 1357 int keysv_flags = HEK_FLAGS(keysv_hek); 1358 1359 for (; entry; oentry = &HeNEXT(entry), entry = *oentry) { 1360 HEK *hek = HeKEY_hek(entry); 1361 if (hek == keysv_hek) 1362 goto found; 1363 if (HEK_FLAGS(hek) != keysv_flags) 1364 break; /* need to do full match */ 1365 } 1366 if (!entry) 1367 goto not_found; 1368 /* failed on shortcut - do full search loop */ 1369 oentry = first_entry; 1370 entry = *oentry; 1371 } 1372 1373 for (; entry; oentry = &HeNEXT(entry), entry = *oentry) { 1374 if (HeHASH(entry) != hash) /* strings can't be equal */ 1375 continue; 1376 if (HeKLEN(entry) != (I32)klen) 1377 continue; 1378 if (memNE(HeKEY(entry),key,klen)) /* is this it? */ 1379 continue; 1380 if ((HeKFLAGS(entry) ^ k_flags) & HVhek_UTF8) 1381 continue; 1382 1383 found: 1384 if (hv == PL_strtab) { 1385 if (k_flags & HVhek_FREEKEY) 1386 Safefree(key); 1387 Perl_croak(aTHX_ S_strtab_error, "delete"); 1388 } 1389 1390 sv = HeVAL(entry); 1391 1392 /* if placeholder is here, it's already been deleted.... */ 1393 if (sv == &PL_sv_placeholder) { 1394 if (k_flags & HVhek_FREEKEY) 1395 Safefree(key); 1396 return NULL; 1397 } 1398 if (SvREADONLY(hv) && sv && SvREADONLY(sv)) { 1399 hv_notallowed(k_flags, key, klen, 1400 "Attempt to delete readonly key '%" SVf "' from" 1401 " a restricted hash"); 1402 } 1403 1404 /* 1405 * If a restricted hash, rather than really deleting the entry, put 1406 * a placeholder there. This marks the key as being "approved", so 1407 * we can still access via not-really-existing key without raising 1408 * an error. 1409 */ 1410 if (SvREADONLY(hv)) { 1411 /* We'll be saving this slot, so the number of allocated keys 1412 * doesn't go down, but the number placeholders goes up */ 1413 HeVAL(entry) = &PL_sv_placeholder; 1414 HvPLACEHOLDERS(hv)++; 1415 } 1416 else { 1417 HeVAL(entry) = NULL; 1418 *oentry = HeNEXT(entry); 1419 if (SvOOK(hv) && entry == HvAUX(hv)->xhv_eiter /* HvEITER(hv) */) { 1420 HvLAZYDEL_on(hv); 1421 } 1422 else { 1423 if (SvOOK(hv) && HvLAZYDEL(hv) && 1424 entry == HeNEXT(HvAUX(hv)->xhv_eiter)) 1425 HeNEXT(HvAUX(hv)->xhv_eiter) = HeNEXT(entry); 1426 hv_free_ent(NULL, entry); 1427 } 1428 xhv->xhv_keys--; /* HvTOTALKEYS(hv)-- */ 1429 if (xhv->xhv_keys == 0) 1430 HvHASKFLAGS_off(hv); 1431 } 1432 1433 /* If this is a stash and the key ends with ::, then someone is 1434 * deleting a package. 1435 */ 1436 if (sv && SvTYPE(sv) == SVt_PVGV && HvENAME_get(hv)) { 1437 gv = (GV *)sv; 1438 if (( 1439 (klen > 1 && key[klen-2] == ':' && key[klen-1] == ':') 1440 || 1441 (klen == 1 && key[0] == ':') 1442 ) 1443 && (klen != 6 || hv!=PL_defstash || memNE(key,"main::",6)) 1444 && (stash = GvHV((GV *)gv)) 1445 && HvENAME_get(stash)) { 1446 /* A previous version of this code checked that the 1447 * GV was still in the symbol table by fetching the 1448 * GV with its name. That is not necessary (and 1449 * sometimes incorrect), as HvENAME cannot be set 1450 * on hv if it is not in the symtab. */ 1451 mro_changes = 2; 1452 /* Hang on to it for a bit. */ 1453 SvREFCNT_inc_simple_void_NN( 1454 sv_2mortal((SV *)gv) 1455 ); 1456 } 1457 else if (memEQs(key, klen, "ISA") && GvAV(gv)) { 1458 AV *isa = GvAV(gv); 1459 MAGIC *mg = mg_find((SV*)isa, PERL_MAGIC_isa); 1460 1461 mro_changes = 1; 1462 if (mg) { 1463 if (mg->mg_obj == (SV*)gv) { 1464 /* This is the only stash this ISA was used for. 1465 * The isaelem magic asserts if there's no 1466 * isa magic on the array, so explicitly 1467 * remove the magic on both the array and its 1468 * elements. @ISA shouldn't be /too/ large. 1469 */ 1470 SV **svp, **end; 1471 strip_magic: 1472 svp = AvARRAY(isa); 1473 end = svp + (AvFILLp(isa)+1); 1474 while (svp < end) { 1475 if (*svp) 1476 mg_free_type(*svp, PERL_MAGIC_isaelem); 1477 ++svp; 1478 } 1479 mg_free_type((SV*)GvAV(gv), PERL_MAGIC_isa); 1480 } 1481 else { 1482 /* mg_obj is an array of stashes 1483 Note that the array doesn't keep a reference 1484 count on the stashes. 1485 */ 1486 AV *av = (AV*)mg->mg_obj; 1487 SV **svp, **arrayp; 1488 SSize_t index; 1489 SSize_t items; 1490 1491 assert(SvTYPE(mg->mg_obj) == SVt_PVAV); 1492 1493 /* remove the stash from the magic array */ 1494 arrayp = svp = AvARRAY(av); 1495 items = AvFILLp(av) + 1; 1496 if (items == 1) { 1497 assert(*arrayp == (SV *)gv); 1498 mg->mg_obj = NULL; 1499 /* avoid a double free on the last stash */ 1500 AvFILLp(av) = -1; 1501 /* The magic isn't MGf_REFCOUNTED, so release 1502 * the array manually. 1503 */ 1504 SvREFCNT_dec_NN(av); 1505 goto strip_magic; 1506 } 1507 else { 1508 while (items--) { 1509 if (*svp == (SV*)gv) 1510 break; 1511 ++svp; 1512 } 1513 index = svp - arrayp; 1514 assert(index >= 0 && index <= AvFILLp(av)); 1515 if (index < AvFILLp(av)) { 1516 arrayp[index] = arrayp[AvFILLp(av)]; 1517 } 1518 arrayp[AvFILLp(av)] = NULL; 1519 --AvFILLp(av); 1520 } 1521 } 1522 } 1523 } 1524 } 1525 1526 if (k_flags & HVhek_FREEKEY) 1527 Safefree(key); 1528 1529 if (sv) { 1530 /* deletion of method from stash */ 1531 if (isGV(sv) && isGV_with_GP(sv) && GvCVu(sv) 1532 && HvENAME_get(hv)) 1533 mro_method_changed_in(hv); 1534 1535 if (d_flags & G_DISCARD) { 1536 SvREFCNT_dec(sv); 1537 sv = NULL; 1538 } 1539 else { 1540 sv_2mortal(sv); 1541 } 1542 } 1543 1544 if (mro_changes == 1) mro_isa_changed_in(hv); 1545 else if (mro_changes == 2) 1546 mro_package_moved(NULL, stash, gv, 1); 1547 1548 return sv; 1549 } 1550 1551 not_found: 1552 if (SvREADONLY(hv)) { 1553 hv_notallowed(k_flags, key, klen, 1554 "Attempt to delete disallowed key '%" SVf "' from" 1555 " a restricted hash"); 1556 } 1557 1558 if (k_flags & HVhek_FREEKEY) 1559 Safefree(key); 1560 return NULL; 1561 } 1562 1563 /* HVs are used for (at least) three things 1564 1) objects 1565 2) symbol tables 1566 3) associative arrays 1567 1568 shared hash keys benefit the first two greatly, because keys are likely 1569 to be re-used between objects, or for constants in the optree 1570 1571 However, for large associative arrays (lookup tables, "seen" hashes) keys are 1572 unlikely to be re-used. Hence having those keys in the shared string table as 1573 well as the hash is a memory hit, if they are never actually shared with a 1574 second hash. Hence we turn off shared hash keys if a (regular) hash gets 1575 large. 1576 1577 This is a heuristic. There might be a better answer than 42, but for now 1578 we'll use it. 1579 1580 NOTE: Configure with -Accflags='-DPERL_USE_UNSHARED_KEYS_IN_LARGE_HASHES' 1581 to enable this new functionality. 1582 */ 1583 1584 #ifdef PERL_USE_UNSHARED_KEYS_IN_LARGE_HASHES 1585 static bool 1586 S_large_hash_heuristic(pTHX_ HV *hv, STRLEN size) { 1587 if (size > 42 1588 && !SvOBJECT(hv) 1589 && !(SvOOK(hv) && HvENAME_get(hv))) { 1590 /* This hash appears to be growing quite large. 1591 We gamble that it is not sharing keys with other hashes. */ 1592 return TRUE; 1593 } 1594 return FALSE; 1595 } 1596 #endif 1597 1598 STATIC void 1599 S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize) 1600 { 1601 STRLEN i = 0; 1602 char *a = (char*) HvARRAY(hv); 1603 HE **aep; 1604 1605 PERL_ARGS_ASSERT_HSPLIT; 1606 if (newsize > MAX_BUCKET_MAX+1) 1607 return; 1608 1609 PL_nomemok = TRUE; 1610 Renew(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char); 1611 PL_nomemok = FALSE; 1612 if (!a) { 1613 return; 1614 } 1615 1616 #ifdef PERL_HASH_RANDOMIZE_KEYS 1617 /* the idea of this is that we create a "random" value by hashing the address of 1618 * the array, we then use the low bit to decide if we insert at the top, or insert 1619 * second from top. After each such insert we rotate the hashed value. So we can 1620 * use the same hashed value over and over, and in normal build environments use 1621 * very few ops to do so. ROTL32() should produce a single machine operation. */ 1622 MAYBE_UPDATE_HASH_RAND_BITS(); 1623 #endif 1624 HvARRAY(hv) = (HE**) a; 1625 HvMAX(hv) = newsize - 1; 1626 /* now we can safely clear the second half */ 1627 Zero(&a[oldsize * sizeof(HE*)], (newsize-oldsize) * sizeof(HE*), char); /* zero 2nd half*/ 1628 1629 if (!HvTOTALKEYS(hv)) /* skip rest if no entries */ 1630 return; 1631 1632 /* don't share keys in large simple hashes */ 1633 if (LARGE_HASH_HEURISTIC(hv, HvTOTALKEYS(hv))) 1634 HvSHAREKEYS_off(hv); 1635 1636 1637 newsize--; 1638 aep = (HE**)a; 1639 do { 1640 HE **oentry = aep + i; 1641 HE *entry = aep[i]; 1642 1643 if (!entry) /* non-existent */ 1644 continue; 1645 do { 1646 U32 j = (HeHASH(entry) & newsize); 1647 if (j != (U32)i) { 1648 *oentry = HeNEXT(entry); 1649 #ifdef PERL_HASH_RANDOMIZE_KEYS 1650 /* if the target cell is empty or PL_HASH_RAND_BITS_ENABLED is false 1651 * insert to top, otherwise rotate the bucket rand 1 bit, 1652 * and use the new low bit to decide if we insert at top, 1653 * or next from top. IOW, we only rotate on a collision.*/ 1654 if (aep[j] && PL_HASH_RAND_BITS_ENABLED) { 1655 UPDATE_HASH_RAND_BITS(); 1656 if (PL_hash_rand_bits & 1) { 1657 HeNEXT(entry)= HeNEXT(aep[j]); 1658 HeNEXT(aep[j])= entry; 1659 } else { 1660 /* Note, this is structured in such a way as the optimizer 1661 * should eliminate the duplicated code here and below without 1662 * us needing to explicitly use a goto. */ 1663 HeNEXT(entry) = aep[j]; 1664 aep[j] = entry; 1665 } 1666 } else 1667 #endif 1668 { 1669 /* see comment above about duplicated code */ 1670 HeNEXT(entry) = aep[j]; 1671 aep[j] = entry; 1672 } 1673 } 1674 else { 1675 oentry = &HeNEXT(entry); 1676 } 1677 entry = *oentry; 1678 } while (entry); 1679 } while (i++ < oldsize); 1680 } 1681 1682 void 1683 Perl_hv_ksplit(pTHX_ HV *hv, IV newmax) 1684 { 1685 XPVHV* xhv = (XPVHV*)SvANY(hv); 1686 const I32 oldsize = (I32) xhv->xhv_max+1; /* HvMAX(hv)+1 */ 1687 I32 newsize; 1688 I32 wantsize; 1689 I32 trysize; 1690 char *a; 1691 1692 PERL_ARGS_ASSERT_HV_KSPLIT; 1693 1694 wantsize = (I32) newmax; /* possible truncation here */ 1695 if (wantsize != newmax) 1696 return; 1697 1698 wantsize= wantsize + (wantsize >> 1); /* wantsize *= 1.5 */ 1699 if (wantsize < newmax) /* overflow detection */ 1700 return; 1701 1702 newsize = oldsize; 1703 while (wantsize > newsize) { 1704 trysize = newsize << 1; 1705 if (trysize > newsize) { 1706 newsize = trysize; 1707 } else { 1708 /* we overflowed */ 1709 return; 1710 } 1711 } 1712 1713 if (newsize <= oldsize) 1714 return; /* overflow detection */ 1715 1716 a = (char *) HvARRAY(hv); 1717 if (a) { 1718 #ifdef PERL_HASH_RANDOMIZE_KEYS 1719 U32 was_ook = SvOOK(hv); 1720 #endif 1721 hsplit(hv, oldsize, newsize); 1722 #ifdef PERL_HASH_RANDOMIZE_KEYS 1723 if (was_ook && SvOOK(hv) && HvTOTALKEYS(hv)) { 1724 MAYBE_UPDATE_HASH_RAND_BITS(); 1725 HvAUX(hv)->xhv_rand = (U32)PL_hash_rand_bits; 1726 } 1727 #endif 1728 } else { 1729 if (LARGE_HASH_HEURISTIC(hv, newmax)) 1730 HvSHAREKEYS_off(hv); 1731 Newxz(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char); 1732 xhv->xhv_max = newsize - 1; 1733 HvARRAY(hv) = (HE **) a; 1734 } 1735 } 1736 1737 /* IMO this should also handle cases where hv_max is smaller than hv_keys 1738 * as tied hashes could play silly buggers and mess us around. We will 1739 * do the right thing during hv_store() afterwards, but still - Yves */ 1740 #define HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys) STMT_START {\ 1741 /* Can we use fewer buckets? (hv_max is always 2^n-1) */ \ 1742 if (hv_max < PERL_HASH_DEFAULT_HvMAX) { \ 1743 hv_max = PERL_HASH_DEFAULT_HvMAX; \ 1744 } else { \ 1745 while (hv_max > PERL_HASH_DEFAULT_HvMAX && hv_max + 1 >= hv_keys * 2) \ 1746 hv_max = hv_max / 2; \ 1747 } \ 1748 HvMAX(hv) = hv_max; \ 1749 } STMT_END 1750 1751 1752 /* 1753 =for apidoc newHVhv 1754 1755 The content of C<ohv> is copied to a new hash. A pointer to the new hash is 1756 returned. 1757 1758 =cut 1759 */ 1760 1761 HV * 1762 Perl_newHVhv(pTHX_ HV *ohv) 1763 { 1764 HV * const hv = newHV(); 1765 STRLEN hv_max; 1766 1767 if (!ohv || (!HvTOTALKEYS(ohv) && !SvMAGICAL((const SV *)ohv))) 1768 return hv; 1769 hv_max = HvMAX(ohv); 1770 1771 if (!SvMAGICAL((const SV *)ohv)) { 1772 /* It's an ordinary hash, so copy it fast. AMS 20010804 */ 1773 STRLEN i; 1774 HE **ents, ** const oents = (HE **)HvARRAY(ohv); 1775 char *a; 1776 Newx(a, PERL_HV_ARRAY_ALLOC_BYTES(hv_max+1), char); 1777 ents = (HE**)a; 1778 1779 if (HvSHAREKEYS(ohv)) { 1780 #ifdef NODEFAULT_SHAREKEYS 1781 HvSHAREKEYS_on(hv); 1782 #else 1783 /* Shared is the default - it should have been set by newHV(). */ 1784 assert(HvSHAREKEYS(hv)); 1785 #endif 1786 } 1787 else { 1788 HvSHAREKEYS_off(hv); 1789 } 1790 1791 /* In each bucket... */ 1792 for (i = 0; i <= hv_max; i++) { 1793 HE *prev = NULL; 1794 HE *oent = oents[i]; 1795 1796 if (!oent) { 1797 ents[i] = NULL; 1798 continue; 1799 } 1800 1801 /* Copy the linked list of entries. */ 1802 for (; oent; oent = HeNEXT(oent)) { 1803 HE * const ent = new_HE(); 1804 SV *const val = HeVAL(oent); 1805 const int flags = HeKFLAGS(oent); 1806 1807 HeVAL(ent) = SvIMMORTAL(val) ? val : newSVsv(val); 1808 if ((flags & HVhek_NOTSHARED) == 0) { 1809 HeKEY_hek(ent) = share_hek_hek(HeKEY_hek(oent)); 1810 } 1811 else { 1812 const U32 hash = HeHASH(oent); 1813 const char * const key = HeKEY(oent); 1814 const STRLEN len = HeKLEN(oent); 1815 HeKEY_hek(ent) = save_hek_flags(key, len, hash, flags); 1816 } 1817 if (prev) 1818 HeNEXT(prev) = ent; 1819 else 1820 ents[i] = ent; 1821 prev = ent; 1822 HeNEXT(ent) = NULL; 1823 } 1824 } 1825 1826 HvMAX(hv) = hv_max; 1827 HvTOTALKEYS(hv) = HvTOTALKEYS(ohv); 1828 HvARRAY(hv) = ents; 1829 } /* not magical */ 1830 else { 1831 /* Iterate over ohv, copying keys and values one at a time. */ 1832 HE *entry; 1833 const I32 riter = HvRITER_get(ohv); 1834 HE * const eiter = HvEITER_get(ohv); 1835 STRLEN hv_keys = HvTOTALKEYS(ohv); 1836 1837 HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys); 1838 1839 hv_iterinit(ohv); 1840 while ((entry = hv_iternext_flags(ohv, 0))) { 1841 SV *val = hv_iterval(ohv,entry); 1842 SV * const keysv = HeSVKEY(entry); 1843 val = SvIMMORTAL(val) ? val : newSVsv(val); 1844 if (keysv) 1845 (void)hv_store_ent(hv, keysv, val, 0); 1846 else 1847 (void)hv_store_flags(hv, HeKEY(entry), HeKLEN(entry), val, 1848 HeHASH(entry), HeKFLAGS(entry)); 1849 } 1850 HvRITER_set(ohv, riter); 1851 HvEITER_set(ohv, eiter); 1852 } 1853 1854 return hv; 1855 } 1856 1857 /* 1858 =for apidoc hv_copy_hints_hv 1859 1860 A specialised version of L</newHVhv> for copying C<%^H>. C<ohv> must be 1861 a pointer to a hash (which may have C<%^H> magic, but should be generally 1862 non-magical), or C<NULL> (interpreted as an empty hash). The content 1863 of C<ohv> is copied to a new hash, which has the C<%^H>-specific magic 1864 added to it. A pointer to the new hash is returned. 1865 1866 =cut 1867 */ 1868 1869 HV * 1870 Perl_hv_copy_hints_hv(pTHX_ HV *const ohv) 1871 { 1872 HV * const hv = newHV(); 1873 1874 if (ohv) { 1875 STRLEN hv_max = HvMAX(ohv); 1876 STRLEN hv_keys = HvTOTALKEYS(ohv); 1877 HE *entry; 1878 const I32 riter = HvRITER_get(ohv); 1879 HE * const eiter = HvEITER_get(ohv); 1880 1881 ENTER; 1882 SAVEFREESV(hv); 1883 1884 HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys); 1885 1886 hv_iterinit(ohv); 1887 while ((entry = hv_iternext_flags(ohv, 0))) { 1888 SV *const sv = newSVsv(hv_iterval(ohv,entry)); 1889 SV *heksv = HeSVKEY(entry); 1890 if (!heksv && sv) heksv = newSVhek(HeKEY_hek(entry)); 1891 if (sv) sv_magic(sv, NULL, PERL_MAGIC_hintselem, 1892 (char *)heksv, HEf_SVKEY); 1893 if (heksv == HeSVKEY(entry)) 1894 (void)hv_store_ent(hv, heksv, sv, 0); 1895 else { 1896 (void)hv_common(hv, heksv, HeKEY(entry), HeKLEN(entry), 1897 HeKFLAGS(entry), HV_FETCH_ISSTORE|HV_FETCH_JUST_SV, sv, HeHASH(entry)); 1898 SvREFCNT_dec_NN(heksv); 1899 } 1900 } 1901 HvRITER_set(ohv, riter); 1902 HvEITER_set(ohv, eiter); 1903 1904 SvREFCNT_inc_simple_void_NN(hv); 1905 LEAVE; 1906 } 1907 hv_magic(hv, NULL, PERL_MAGIC_hints); 1908 return hv; 1909 } 1910 #undef HV_SET_MAX_ADJUSTED_FOR_KEYS 1911 1912 /* like hv_free_ent, but returns the SV rather than freeing it */ 1913 STATIC SV* 1914 S_hv_free_ent_ret(pTHX_ HE *entry) 1915 { 1916 PERL_ARGS_ASSERT_HV_FREE_ENT_RET; 1917 1918 SV *val = HeVAL(entry); 1919 if (HeKLEN(entry) == HEf_SVKEY) { 1920 SvREFCNT_dec(HeKEY_sv(entry)); 1921 Safefree(HeKEY_hek(entry)); 1922 } 1923 else if ((HeKFLAGS(entry) & HVhek_NOTSHARED) == 0) { 1924 unshare_hek(HeKEY_hek(entry)); 1925 } 1926 else { 1927 Safefree(HeKEY_hek(entry)); 1928 } 1929 del_HE(entry); 1930 return val; 1931 } 1932 1933 1934 void 1935 Perl_hv_free_ent(pTHX_ HV *notused, HE *entry) 1936 { 1937 PERL_UNUSED_ARG(notused); 1938 1939 if (!entry) 1940 return; 1941 1942 SV *val = hv_free_ent_ret(entry); 1943 SvREFCNT_dec(val); 1944 } 1945 1946 1947 void 1948 Perl_hv_delayfree_ent(pTHX_ HV *notused, HE *entry) 1949 { 1950 PERL_UNUSED_ARG(notused); 1951 1952 if (!entry) 1953 return; 1954 /* SvREFCNT_inc to counter the SvREFCNT_dec in hv_free_ent */ 1955 sv_2mortal(SvREFCNT_inc(HeVAL(entry))); /* free between statements */ 1956 if (HeKLEN(entry) == HEf_SVKEY) { 1957 sv_2mortal(SvREFCNT_inc(HeKEY_sv(entry))); 1958 } 1959 hv_free_ent(NULL, entry); 1960 } 1961 1962 /* 1963 =for apidoc hv_clear 1964 1965 Frees all the elements of a hash, leaving it empty. 1966 The XS equivalent of C<%hash = ()>. See also L</hv_undef>. 1967 1968 See L</av_clear> for a note about the hash possibly being invalid on 1969 return. 1970 1971 =cut 1972 */ 1973 1974 void 1975 Perl_hv_clear(pTHX_ HV *hv) 1976 { 1977 SSize_t orig_ix; 1978 1979 if (!hv) 1980 return; 1981 1982 DEBUG_A(Perl_hv_assert(aTHX_ hv)); 1983 1984 /* avoid hv being freed when calling destructors below */ 1985 EXTEND_MORTAL(1); 1986 PL_tmps_stack[++PL_tmps_ix] = SvREFCNT_inc_simple_NN(hv); 1987 orig_ix = PL_tmps_ix; 1988 if (SvREADONLY(hv) && HvTOTALKEYS(hv)) { 1989 /* restricted hash: convert all keys to placeholders */ 1990 STRLEN max = HvMAX(hv); 1991 STRLEN i; 1992 for (i = 0; i <= max; i++) { 1993 HE *entry = (HvARRAY(hv))[i]; 1994 for (; entry; entry = HeNEXT(entry)) { 1995 /* not already placeholder */ 1996 if (HeVAL(entry) != &PL_sv_placeholder) { 1997 if (HeVAL(entry)) { 1998 if (SvREADONLY(HeVAL(entry))) { 1999 SV* const keysv = hv_iterkeysv(entry); 2000 Perl_croak_nocontext( 2001 "Attempt to delete readonly key '%" SVf "' from a restricted hash", 2002 (void*)keysv); 2003 } 2004 SvREFCNT_dec_NN(HeVAL(entry)); 2005 } 2006 HeVAL(entry) = &PL_sv_placeholder; 2007 HvPLACEHOLDERS(hv)++; 2008 } 2009 } 2010 } 2011 } 2012 else { 2013 hv_free_entries(hv); 2014 HvPLACEHOLDERS_set(hv, 0); 2015 2016 if (SvRMAGICAL(hv)) 2017 mg_clear(MUTABLE_SV(hv)); 2018 2019 HvHASKFLAGS_off(hv); 2020 } 2021 if (SvOOK(hv)) { 2022 if(HvENAME_get(hv)) 2023 mro_isa_changed_in(hv); 2024 HvEITER_set(hv, NULL); 2025 } 2026 /* disarm hv's premature free guard */ 2027 if (LIKELY(PL_tmps_ix == orig_ix)) 2028 PL_tmps_ix--; 2029 else 2030 PL_tmps_stack[orig_ix] = &PL_sv_undef; 2031 SvREFCNT_dec_NN(hv); 2032 } 2033 2034 /* 2035 =for apidoc hv_clear_placeholders 2036 2037 Clears any placeholders from a hash. If a restricted hash has any of its keys 2038 marked as readonly and the key is subsequently deleted, the key is not actually 2039 deleted but is marked by assigning it a value of C<&PL_sv_placeholder>. This tags 2040 it so it will be ignored by future operations such as iterating over the hash, 2041 but will still allow the hash to have a value reassigned to the key at some 2042 future point. This function clears any such placeholder keys from the hash. 2043 See C<L<Hash::Util::lock_keys()|Hash::Util/lock_keys>> for an example of its 2044 use. 2045 2046 =cut 2047 */ 2048 2049 void 2050 Perl_hv_clear_placeholders(pTHX_ HV *hv) 2051 { 2052 const U32 items = (U32)HvPLACEHOLDERS_get(hv); 2053 2054 PERL_ARGS_ASSERT_HV_CLEAR_PLACEHOLDERS; 2055 2056 if (items) 2057 clear_placeholders(hv, items); 2058 } 2059 2060 static void 2061 S_clear_placeholders(pTHX_ HV *hv, const U32 placeholders) 2062 { 2063 I32 i; 2064 U32 to_find = placeholders; 2065 2066 PERL_ARGS_ASSERT_CLEAR_PLACEHOLDERS; 2067 2068 assert(to_find); 2069 2070 i = HvMAX(hv); 2071 do { 2072 /* Loop down the linked list heads */ 2073 HE **oentry = &(HvARRAY(hv))[i]; 2074 HE *entry; 2075 2076 while ((entry = *oentry)) { 2077 if (HeVAL(entry) == &PL_sv_placeholder) { 2078 *oentry = HeNEXT(entry); 2079 if (entry == HvEITER_get(hv)) 2080 HvLAZYDEL_on(hv); 2081 else { 2082 if (SvOOK(hv) && HvLAZYDEL(hv) && 2083 entry == HeNEXT(HvAUX(hv)->xhv_eiter)) 2084 HeNEXT(HvAUX(hv)->xhv_eiter) = HeNEXT(entry); 2085 hv_free_ent(NULL, entry); 2086 } 2087 2088 if (--to_find == 0) { 2089 /* Finished. */ 2090 HvTOTALKEYS(hv) -= (IV)placeholders; 2091 if (HvTOTALKEYS(hv) == 0) 2092 HvHASKFLAGS_off(hv); 2093 HvPLACEHOLDERS_set(hv, 0); 2094 return; 2095 } 2096 } else { 2097 oentry = &HeNEXT(entry); 2098 } 2099 } 2100 } while (--i >= 0); 2101 /* You can't get here, hence assertion should always fail. */ 2102 assert (to_find == 0); 2103 NOT_REACHED; /* NOTREACHED */ 2104 } 2105 2106 STATIC void 2107 S_hv_free_entries(pTHX_ HV *hv) 2108 { 2109 STRLEN index = 0; 2110 SV *sv; 2111 2112 PERL_ARGS_ASSERT_HV_FREE_ENTRIES; 2113 2114 while ((sv = Perl_hfree_next_entry(aTHX_ hv, &index)) || HvTOTALKEYS(hv)) { 2115 SvREFCNT_dec(sv); 2116 } 2117 } 2118 2119 2120 /* hfree_next_entry() 2121 * For use only by S_hv_free_entries() and sv_clear(). 2122 * Delete the next available HE from hv and return the associated SV. 2123 * Returns null on empty hash. Nevertheless null is not a reliable 2124 * indicator that the hash is empty, as the deleted entry may have a 2125 * null value. 2126 * indexp is a pointer to the current index into HvARRAY. The index should 2127 * initially be set to 0. hfree_next_entry() may update it. */ 2128 2129 SV* 2130 Perl_hfree_next_entry(pTHX_ HV *hv, STRLEN *indexp) 2131 { 2132 struct xpvhv_aux *iter; 2133 HE *entry; 2134 HE ** array; 2135 #ifdef DEBUGGING 2136 STRLEN orig_index = *indexp; 2137 #endif 2138 2139 PERL_ARGS_ASSERT_HFREE_NEXT_ENTRY; 2140 2141 if (SvOOK(hv) && ((iter = HvAUX(hv)))) { 2142 if ((entry = iter->xhv_eiter)) { 2143 /* the iterator may get resurrected after each 2144 * destructor call, so check each time */ 2145 if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */ 2146 HvLAZYDEL_off(hv); 2147 hv_free_ent(NULL, entry); 2148 /* warning: at this point HvARRAY may have been 2149 * re-allocated, HvMAX changed etc */ 2150 } 2151 iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ 2152 iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ 2153 #ifdef PERL_HASH_RANDOMIZE_KEYS 2154 iter->xhv_last_rand = iter->xhv_rand; 2155 #endif 2156 } 2157 } 2158 2159 if (!((XPVHV*)SvANY(hv))->xhv_keys) 2160 return NULL; 2161 2162 array = HvARRAY(hv); 2163 assert(array); 2164 while ( ! ((entry = array[*indexp])) ) { 2165 if ((*indexp)++ >= HvMAX(hv)) 2166 *indexp = 0; 2167 assert(*indexp != orig_index); 2168 } 2169 array[*indexp] = HeNEXT(entry); 2170 ((XPVHV*) SvANY(hv))->xhv_keys--; 2171 2172 if ( PL_phase != PERL_PHASE_DESTRUCT && HvENAME(hv) 2173 && HeVAL(entry) && isGV(HeVAL(entry)) 2174 && GvHV(HeVAL(entry)) && HvENAME(GvHV(HeVAL(entry))) 2175 ) { 2176 STRLEN klen; 2177 const char * const key = HePV(entry,klen); 2178 if ((klen > 1 && key[klen-1]==':' && key[klen-2]==':') 2179 || (klen == 1 && key[0] == ':')) { 2180 mro_package_moved( 2181 NULL, GvHV(HeVAL(entry)), 2182 (GV *)HeVAL(entry), 0 2183 ); 2184 } 2185 } 2186 return hv_free_ent_ret(entry); 2187 } 2188 2189 2190 /* 2191 =for apidoc hv_undef 2192 2193 Undefines the hash. The XS equivalent of C<undef(%hash)>. 2194 2195 As well as freeing all the elements of the hash (like C<hv_clear()>), this 2196 also frees any auxiliary data and storage associated with the hash. 2197 2198 See L</av_clear> for a note about the hash possibly being invalid on 2199 return. 2200 2201 =cut 2202 */ 2203 2204 void 2205 Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags) 2206 { 2207 bool save; 2208 SSize_t orig_ix = PL_tmps_ix; /* silence compiler warning about unitialized vars */ 2209 2210 if (!hv) 2211 return; 2212 save = cBOOL(SvREFCNT(hv)); 2213 DEBUG_A(Perl_hv_assert(aTHX_ hv)); 2214 2215 /* The name must be deleted before the call to hv_free_entries so that 2216 CVs are anonymised properly. But the effective name must be pre- 2217 served until after that call (and only deleted afterwards if the 2218 call originated from sv_clear). For stashes with one name that is 2219 both the canonical name and the effective name, hv_name_set has to 2220 allocate an array for storing the effective name. We can skip that 2221 during global destruction, as it does not matter where the CVs point 2222 if they will be freed anyway. */ 2223 /* note that the code following prior to hv_free_entries is duplicated 2224 * in sv_clear(), and changes here should be done there too */ 2225 if (PL_phase != PERL_PHASE_DESTRUCT && HvNAME(hv)) { 2226 if (PL_stashcache) { 2227 DEBUG_o(Perl_deb(aTHX_ "hv_undef_flags clearing PL_stashcache for '%" 2228 HEKf "'\n", HEKfARG(HvNAME_HEK(hv)))); 2229 (void)hv_deletehek(PL_stashcache, HvNAME_HEK(hv), G_DISCARD); 2230 } 2231 hv_name_set(hv, NULL, 0, 0); 2232 } 2233 if (save) { 2234 /* avoid hv being freed when calling destructors below */ 2235 EXTEND_MORTAL(1); 2236 PL_tmps_stack[++PL_tmps_ix] = SvREFCNT_inc_simple_NN(hv); 2237 orig_ix = PL_tmps_ix; 2238 } 2239 2240 /* As well as any/all HE*s in HvARRAY(), this call also ensures that 2241 xhv_eiter is NULL, including handling the case of a tied hash partway 2242 through iteration where HvLAZYDEL() is true and xhv_eiter points to an 2243 HE* that needs to be explicitly freed. */ 2244 hv_free_entries(hv); 2245 2246 /* SvOOK() is true for a hash if it has struct xpvhv_aux allocated. That 2247 structure has several other pieces of allocated memory - hence those must 2248 be freed before the structure itself can be freed. Some can be freed when 2249 a hash is "undefined" (this function), but some must persist until it is 2250 destroyed (which might be this function's immediate caller). 2251 2252 Hence the code in this block frees what it is logical to free (and NULLs 2253 out anything freed) so that the structure is left in a logically 2254 consistent state - pointers are NULL or point to valid memory, and 2255 non-pointer values are correct for an empty hash. The structure state 2256 must remain consistent, because this code can no longer clear SVf_OOK, 2257 meaning that this structure might be read again at any point in the 2258 future without further checks or reinitialisation. */ 2259 if (SvOOK(hv)) { 2260 struct mro_meta *meta; 2261 const char *name; 2262 2263 if (HvENAME_get(hv)) { 2264 if (PL_phase != PERL_PHASE_DESTRUCT) 2265 mro_isa_changed_in(hv); 2266 if (PL_stashcache) { 2267 DEBUG_o(Perl_deb(aTHX_ "hv_undef_flags clearing PL_stashcache for effective name '%" 2268 HEKf "'\n", HEKfARG(HvENAME_HEK(hv)))); 2269 (void)hv_deletehek(PL_stashcache, HvENAME_HEK(hv), G_DISCARD); 2270 } 2271 } 2272 2273 /* If this call originated from sv_clear, then we must check for 2274 * effective names that need freeing, as well as the usual name. */ 2275 name = HvNAME(hv); 2276 if (flags & HV_NAME_SETALL ? !!HvAUX(hv)->xhv_name_u.xhvnameu_name : !!name) { 2277 if (name && PL_stashcache) { 2278 DEBUG_o(Perl_deb(aTHX_ "hv_undef_flags clearing PL_stashcache for name '%" 2279 HEKf "'\n", HEKfARG(HvNAME_HEK(hv)))); 2280 (void)hv_deletehek(PL_stashcache, HvNAME_HEK(hv), G_DISCARD); 2281 } 2282 hv_name_set(hv, NULL, 0, flags); 2283 } 2284 if((meta = HvAUX(hv)->xhv_mro_meta)) { 2285 if (meta->mro_linear_all) { 2286 SvREFCNT_dec_NN(meta->mro_linear_all); 2287 /* mro_linear_current is just acting as a shortcut pointer, 2288 hence the else. */ 2289 } 2290 else 2291 /* Only the current MRO is stored, so this owns the data. 2292 */ 2293 SvREFCNT_dec(meta->mro_linear_current); 2294 SvREFCNT_dec(meta->mro_nextmethod); 2295 SvREFCNT_dec(meta->isa); 2296 SvREFCNT_dec(meta->super); 2297 Safefree(meta); 2298 HvAUX(hv)->xhv_mro_meta = NULL; 2299 } 2300 } 2301 2302 Safefree(HvARRAY(hv)); 2303 HvMAX(hv) = PERL_HASH_DEFAULT_HvMAX; /* 7 (it's a normal hash) */ 2304 HvARRAY(hv) = 0; 2305 2306 /* if we're freeing the HV, the SvMAGIC field has been reused for 2307 * other purposes, and so there can't be any placeholder magic */ 2308 if (SvREFCNT(hv)) 2309 HvPLACEHOLDERS_set(hv, 0); 2310 2311 if (SvRMAGICAL(hv)) 2312 mg_clear(MUTABLE_SV(hv)); 2313 2314 if (save) { 2315 /* disarm hv's premature free guard */ 2316 if (LIKELY(PL_tmps_ix == orig_ix)) 2317 PL_tmps_ix--; 2318 else 2319 PL_tmps_stack[orig_ix] = &PL_sv_undef; 2320 SvREFCNT_dec_NN(hv); 2321 } 2322 } 2323 2324 /* 2325 =for apidoc hv_fill 2326 2327 Returns the number of hash buckets that happen to be in use. 2328 2329 This function implements the L<C<HvFILL> macro|perlapi/HvFILL> which you should 2330 use instead. 2331 2332 As of perl 5.25 this function is used only for debugging 2333 purposes, and the number of used hash buckets is not 2334 in any way cached, thus this function can be costly 2335 to execute as it must iterate over all the buckets in the 2336 hash. 2337 2338 =cut 2339 */ 2340 2341 STRLEN 2342 Perl_hv_fill(pTHX_ HV *const hv) 2343 { 2344 STRLEN count = 0; 2345 HE **ents = HvARRAY(hv); 2346 2347 PERL_UNUSED_CONTEXT; 2348 PERL_ARGS_ASSERT_HV_FILL; 2349 2350 /* No keys implies no buckets used. 2351 One key can only possibly mean one bucket used. */ 2352 if (HvTOTALKEYS(hv) < 2) 2353 return HvTOTALKEYS(hv); 2354 2355 if (ents) { 2356 /* I wonder why we count down here... 2357 * Is it some micro-optimisation? 2358 * I would have thought counting up was better. 2359 * - Yves 2360 */ 2361 HE *const *const last = ents + HvMAX(hv); 2362 count = last + 1 - ents; 2363 2364 do { 2365 if (!*ents) 2366 --count; 2367 } while (++ents <= last); 2368 } 2369 return count; 2370 } 2371 2372 static struct xpvhv_aux* 2373 S_hv_auxinit(pTHX_ HV *hv) { 2374 struct xpvhv_aux *iter; 2375 2376 PERL_ARGS_ASSERT_HV_AUXINIT; 2377 2378 if (!SvOOK(hv)) { 2379 char *array = (char *) HvARRAY(hv); 2380 if (!array) { 2381 Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1), char); 2382 HvARRAY(hv) = (HE**)array; 2383 } 2384 iter = Perl_hv_auxalloc(aTHX_ hv); 2385 #ifdef PERL_HASH_RANDOMIZE_KEYS 2386 MAYBE_UPDATE_HASH_RAND_BITS(); 2387 iter->xhv_rand = (U32)PL_hash_rand_bits; 2388 #endif 2389 } else { 2390 iter = HvAUX(hv); 2391 } 2392 2393 iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ 2394 iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ 2395 #ifdef PERL_HASH_RANDOMIZE_KEYS 2396 iter->xhv_last_rand = iter->xhv_rand; 2397 #endif 2398 iter->xhv_name_u.xhvnameu_name = 0; 2399 iter->xhv_name_count = 0; 2400 iter->xhv_backreferences = 0; 2401 iter->xhv_mro_meta = NULL; 2402 iter->xhv_aux_flags = 0; 2403 return iter; 2404 } 2405 2406 /* 2407 =for apidoc hv_iterinit 2408 2409 Prepares a starting point to traverse a hash table. Returns the number of 2410 keys in the hash, including placeholders (i.e. the same as C<HvTOTALKEYS(hv)>). 2411 The return value is currently only meaningful for hashes without tie magic. 2412 2413 NOTE: Before version 5.004_65, C<hv_iterinit> used to return the number of 2414 hash buckets that happen to be in use. If you still need that esoteric 2415 value, you can get it through the macro C<HvFILL(hv)>. 2416 2417 2418 =cut 2419 */ 2420 2421 I32 2422 Perl_hv_iterinit(pTHX_ HV *hv) 2423 { 2424 PERL_ARGS_ASSERT_HV_ITERINIT; 2425 2426 if (SvOOK(hv)) { 2427 struct xpvhv_aux * iter = HvAUX(hv); 2428 HE * const entry = iter->xhv_eiter; /* HvEITER(hv) */ 2429 if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */ 2430 HvLAZYDEL_off(hv); 2431 hv_free_ent(NULL, entry); 2432 } 2433 iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ 2434 iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ 2435 #ifdef PERL_HASH_RANDOMIZE_KEYS 2436 iter->xhv_last_rand = iter->xhv_rand; 2437 #endif 2438 } else { 2439 hv_auxinit(hv); 2440 } 2441 2442 /* note this includes placeholders! */ 2443 return HvTOTALKEYS(hv); 2444 } 2445 2446 /* 2447 =for apidoc hv_riter_p 2448 2449 Implements C<HvRITER> which you should use instead. 2450 2451 =cut 2452 */ 2453 2454 I32 * 2455 Perl_hv_riter_p(pTHX_ HV *hv) { 2456 struct xpvhv_aux *iter; 2457 2458 PERL_ARGS_ASSERT_HV_RITER_P; 2459 2460 iter = SvOOK(hv) ? HvAUX(hv) : hv_auxinit(hv); 2461 return &(iter->xhv_riter); 2462 } 2463 2464 /* 2465 =for apidoc hv_eiter_p 2466 2467 Implements C<HvEITER> which you should use instead. 2468 2469 =cut 2470 */ 2471 2472 HE ** 2473 Perl_hv_eiter_p(pTHX_ HV *hv) { 2474 struct xpvhv_aux *iter; 2475 2476 PERL_ARGS_ASSERT_HV_EITER_P; 2477 2478 iter = SvOOK(hv) ? HvAUX(hv) : hv_auxinit(hv); 2479 return &(iter->xhv_eiter); 2480 } 2481 2482 /* 2483 =for apidoc hv_riter_set 2484 2485 Implements C<HvRITER_set> which you should use instead. 2486 2487 =cut 2488 */ 2489 2490 void 2491 Perl_hv_riter_set(pTHX_ HV *hv, I32 riter) { 2492 struct xpvhv_aux *iter; 2493 2494 PERL_ARGS_ASSERT_HV_RITER_SET; 2495 2496 if (SvOOK(hv)) { 2497 iter = HvAUX(hv); 2498 } else { 2499 if (riter == -1) 2500 return; 2501 2502 iter = hv_auxinit(hv); 2503 } 2504 iter->xhv_riter = riter; 2505 } 2506 2507 void 2508 Perl_hv_rand_set(pTHX_ HV *hv, U32 new_xhv_rand) { 2509 struct xpvhv_aux *iter; 2510 2511 PERL_ARGS_ASSERT_HV_RAND_SET; 2512 2513 #ifdef PERL_HASH_RANDOMIZE_KEYS 2514 if (SvOOK(hv)) { 2515 iter = HvAUX(hv); 2516 } else { 2517 iter = hv_auxinit(hv); 2518 } 2519 iter->xhv_rand = new_xhv_rand; 2520 #else 2521 Perl_croak(aTHX_ "This Perl has not been built with support for randomized hash key traversal but something called Perl_hv_rand_set()."); 2522 #endif 2523 } 2524 2525 /* 2526 =for apidoc hv_eiter_set 2527 2528 Implements C<HvEITER_set> which you should use instead. 2529 2530 =cut 2531 */ 2532 2533 void 2534 Perl_hv_eiter_set(pTHX_ HV *hv, HE *eiter) { 2535 struct xpvhv_aux *iter; 2536 2537 PERL_ARGS_ASSERT_HV_EITER_SET; 2538 2539 if (SvOOK(hv)) { 2540 iter = HvAUX(hv); 2541 } else { 2542 /* 0 is the default so don't go malloc()ing a new structure just to 2543 hold 0. */ 2544 if (!eiter) 2545 return; 2546 2547 iter = hv_auxinit(hv); 2548 } 2549 iter->xhv_eiter = eiter; 2550 } 2551 2552 void 2553 Perl_hv_name_set(pTHX_ HV *hv, const char *name, U32 len, U32 flags) 2554 { 2555 struct xpvhv_aux *iter; 2556 U32 hash; 2557 HEK **spot; 2558 2559 PERL_ARGS_ASSERT_HV_NAME_SET; 2560 2561 if (len > I32_MAX) 2562 Perl_croak(aTHX_ "panic: hv name too long (%" UVuf ")", (UV) len); 2563 2564 if (SvOOK(hv)) { 2565 iter = HvAUX(hv); 2566 if (iter->xhv_name_u.xhvnameu_name) { 2567 if(iter->xhv_name_count) { 2568 if(flags & HV_NAME_SETALL) { 2569 HEK ** const this_name = HvAUX(hv)->xhv_name_u.xhvnameu_names; 2570 HEK **hekp = this_name + ( 2571 iter->xhv_name_count < 0 2572 ? -iter->xhv_name_count 2573 : iter->xhv_name_count 2574 ); 2575 while(hekp-- > this_name+1) 2576 unshare_hek_or_pvn(*hekp, 0, 0, 0); 2577 /* The first elem may be null. */ 2578 if(*this_name) unshare_hek_or_pvn(*this_name, 0, 0, 0); 2579 Safefree(this_name); 2580 spot = &iter->xhv_name_u.xhvnameu_name; 2581 iter->xhv_name_count = 0; 2582 } 2583 else { 2584 if(iter->xhv_name_count > 0) { 2585 /* shift some things over */ 2586 Renew( 2587 iter->xhv_name_u.xhvnameu_names, iter->xhv_name_count + 1, HEK * 2588 ); 2589 spot = iter->xhv_name_u.xhvnameu_names; 2590 spot[iter->xhv_name_count] = spot[1]; 2591 spot[1] = spot[0]; 2592 iter->xhv_name_count = -(iter->xhv_name_count + 1); 2593 } 2594 else if(*(spot = iter->xhv_name_u.xhvnameu_names)) { 2595 unshare_hek_or_pvn(*spot, 0, 0, 0); 2596 } 2597 } 2598 } 2599 else if (flags & HV_NAME_SETALL) { 2600 unshare_hek_or_pvn(iter->xhv_name_u.xhvnameu_name, 0, 0, 0); 2601 spot = &iter->xhv_name_u.xhvnameu_name; 2602 } 2603 else { 2604 HEK * const existing_name = iter->xhv_name_u.xhvnameu_name; 2605 Newx(iter->xhv_name_u.xhvnameu_names, 2, HEK *); 2606 iter->xhv_name_count = -2; 2607 spot = iter->xhv_name_u.xhvnameu_names; 2608 spot[1] = existing_name; 2609 } 2610 } 2611 else { spot = &iter->xhv_name_u.xhvnameu_name; iter->xhv_name_count = 0; } 2612 } else { 2613 if (name == 0) 2614 return; 2615 2616 iter = hv_auxinit(hv); 2617 spot = &iter->xhv_name_u.xhvnameu_name; 2618 } 2619 PERL_HASH(hash, name, len); 2620 *spot = name ? share_hek(name, flags & SVf_UTF8 ? -(I32)len : (I32)len, hash) : NULL; 2621 } 2622 2623 /* 2624 This is basically sv_eq_flags() in sv.c, but we avoid the magic 2625 and bytes checking. 2626 */ 2627 2628 STATIC I32 2629 hek_eq_pvn_flags(pTHX_ const HEK *hek, const char* pv, const I32 pvlen, const U32 flags) { 2630 if ( (HEK_UTF8(hek) ? 1 : 0) != (flags & SVf_UTF8 ? 1 : 0) ) { 2631 if (flags & SVf_UTF8) 2632 return (bytes_cmp_utf8( 2633 (const U8*)HEK_KEY(hek), HEK_LEN(hek), 2634 (const U8*)pv, pvlen) == 0); 2635 else 2636 return (bytes_cmp_utf8( 2637 (const U8*)pv, pvlen, 2638 (const U8*)HEK_KEY(hek), HEK_LEN(hek)) == 0); 2639 } 2640 else 2641 return HEK_LEN(hek) == pvlen && ((HEK_KEY(hek) == pv) 2642 || memEQ(HEK_KEY(hek), pv, pvlen)); 2643 } 2644 2645 /* 2646 =for apidoc hv_ename_add 2647 2648 Adds a name to a stash's internal list of effective names. See 2649 C<L</hv_ename_delete>>. 2650 2651 This is called when a stash is assigned to a new location in the symbol 2652 table. 2653 2654 =cut 2655 */ 2656 2657 void 2658 Perl_hv_ename_add(pTHX_ HV *hv, const char *name, U32 len, U32 flags) 2659 { 2660 struct xpvhv_aux *aux = SvOOK(hv) ? HvAUX(hv) : hv_auxinit(hv); 2661 U32 hash; 2662 2663 PERL_ARGS_ASSERT_HV_ENAME_ADD; 2664 2665 if (len > I32_MAX) 2666 Perl_croak(aTHX_ "panic: hv name too long (%" UVuf ")", (UV) len); 2667 2668 PERL_HASH(hash, name, len); 2669 2670 if (aux->xhv_name_count) { 2671 I32 count = aux->xhv_name_count; 2672 HEK ** const xhv_name = aux->xhv_name_u.xhvnameu_names + (count<0); 2673 HEK **hekp = xhv_name + (count < 0 ? -count - 1 : count); 2674 while (hekp-- > xhv_name) 2675 { 2676 assert(*hekp); 2677 if ( 2678 (HEK_UTF8(*hekp) || (flags & SVf_UTF8)) 2679 ? hek_eq_pvn_flags(aTHX_ *hekp, name, (I32)len, flags) 2680 : (HEK_LEN(*hekp) == (I32)len && memEQ(HEK_KEY(*hekp), name, len)) 2681 ) { 2682 if (hekp == xhv_name && count < 0) 2683 aux->xhv_name_count = -count; 2684 return; 2685 } 2686 } 2687 if (count < 0) aux->xhv_name_count--, count = -count; 2688 else aux->xhv_name_count++; 2689 Renew(aux->xhv_name_u.xhvnameu_names, count + 1, HEK *); 2690 (aux->xhv_name_u.xhvnameu_names)[count] = share_hek(name, (flags & SVf_UTF8 ? -(I32)len : (I32)len), hash); 2691 } 2692 else { 2693 HEK *existing_name = aux->xhv_name_u.xhvnameu_name; 2694 if ( 2695 existing_name && ( 2696 (HEK_UTF8(existing_name) || (flags & SVf_UTF8)) 2697 ? hek_eq_pvn_flags(aTHX_ existing_name, name, (I32)len, flags) 2698 : (HEK_LEN(existing_name) == (I32)len && memEQ(HEK_KEY(existing_name), name, len)) 2699 ) 2700 ) return; 2701 Newx(aux->xhv_name_u.xhvnameu_names, 2, HEK *); 2702 aux->xhv_name_count = existing_name ? 2 : -2; 2703 *aux->xhv_name_u.xhvnameu_names = existing_name; 2704 (aux->xhv_name_u.xhvnameu_names)[1] = share_hek(name, (flags & SVf_UTF8 ? -(I32)len : (I32)len), hash); 2705 } 2706 } 2707 2708 /* 2709 =for apidoc hv_ename_delete 2710 2711 Removes a name from a stash's internal list of effective names. If this is 2712 the name returned by C<HvENAME>, then another name in the list will take 2713 its place (C<HvENAME> will use it). 2714 2715 This is called when a stash is deleted from the symbol table. 2716 2717 =cut 2718 */ 2719 2720 void 2721 Perl_hv_ename_delete(pTHX_ HV *hv, const char *name, U32 len, U32 flags) 2722 { 2723 struct xpvhv_aux *aux; 2724 2725 PERL_ARGS_ASSERT_HV_ENAME_DELETE; 2726 2727 if (len > I32_MAX) 2728 Perl_croak(aTHX_ "panic: hv name too long (%" UVuf ")", (UV) len); 2729 2730 if (!SvOOK(hv)) return; 2731 2732 aux = HvAUX(hv); 2733 if (!aux->xhv_name_u.xhvnameu_name) return; 2734 2735 if (aux->xhv_name_count) { 2736 HEK ** const namep = aux->xhv_name_u.xhvnameu_names; 2737 I32 const count = aux->xhv_name_count; 2738 HEK **victim = namep + (count < 0 ? -count : count); 2739 while (victim-- > namep + 1) 2740 if ( 2741 (HEK_UTF8(*victim) || (flags & SVf_UTF8)) 2742 ? hek_eq_pvn_flags(aTHX_ *victim, name, (I32)len, flags) 2743 : (HEK_LEN(*victim) == (I32)len && memEQ(HEK_KEY(*victim), name, len)) 2744 ) { 2745 unshare_hek_or_pvn(*victim, 0, 0, 0); 2746 if (count < 0) ++aux->xhv_name_count; 2747 else --aux->xhv_name_count; 2748 if ( 2749 (aux->xhv_name_count == 1 || aux->xhv_name_count == -1) 2750 && !*namep 2751 ) { /* if there are none left */ 2752 Safefree(namep); 2753 aux->xhv_name_u.xhvnameu_names = NULL; 2754 aux->xhv_name_count = 0; 2755 } 2756 else { 2757 /* Move the last one back to fill the empty slot. It 2758 does not matter what order they are in. */ 2759 *victim = *(namep + (count < 0 ? -count : count) - 1); 2760 } 2761 return; 2762 } 2763 if ( 2764 count > 0 && ((HEK_UTF8(*namep) || (flags & SVf_UTF8)) 2765 ? hek_eq_pvn_flags(aTHX_ *namep, name, (I32)len, flags) 2766 : (HEK_LEN(*namep) == (I32)len && memEQ(HEK_KEY(*namep), name, len)) 2767 ) 2768 ) { 2769 aux->xhv_name_count = -count; 2770 } 2771 } 2772 else if( 2773 (HEK_UTF8(aux->xhv_name_u.xhvnameu_name) || (flags & SVf_UTF8)) 2774 ? hek_eq_pvn_flags(aTHX_ aux->xhv_name_u.xhvnameu_name, name, (I32)len, flags) 2775 : (HEK_LEN(aux->xhv_name_u.xhvnameu_name) == (I32)len && 2776 memEQ(HEK_KEY(aux->xhv_name_u.xhvnameu_name), name, len)) 2777 ) { 2778 HEK * const namehek = aux->xhv_name_u.xhvnameu_name; 2779 Newx(aux->xhv_name_u.xhvnameu_names, 1, HEK *); 2780 *aux->xhv_name_u.xhvnameu_names = namehek; 2781 aux->xhv_name_count = -1; 2782 } 2783 } 2784 2785 AV ** 2786 Perl_hv_backreferences_p(pTHX_ HV *hv) { 2787 PERL_ARGS_ASSERT_HV_BACKREFERENCES_P; 2788 /* See also Perl_sv_get_backrefs in sv.c where this logic is unrolled */ 2789 { 2790 struct xpvhv_aux * const iter = SvOOK(hv) ? HvAUX(hv) : hv_auxinit(hv); 2791 return &(iter->xhv_backreferences); 2792 } 2793 } 2794 2795 void 2796 Perl_hv_kill_backrefs(pTHX_ HV *hv) { 2797 AV *av; 2798 2799 PERL_ARGS_ASSERT_HV_KILL_BACKREFS; 2800 2801 if (!SvOOK(hv)) 2802 return; 2803 2804 av = HvAUX(hv)->xhv_backreferences; 2805 2806 if (av) { 2807 HvAUX(hv)->xhv_backreferences = 0; 2808 Perl_sv_kill_backrefs(aTHX_ MUTABLE_SV(hv), av); 2809 if (SvTYPE(av) == SVt_PVAV) 2810 SvREFCNT_dec_NN(av); 2811 } 2812 } 2813 2814 /* 2815 hv_iternext is implemented as a macro in hv.h 2816 2817 =for apidoc hv_iternext 2818 2819 Returns entries from a hash iterator. See C<L</hv_iterinit>>. 2820 2821 You may call C<hv_delete> or C<hv_delete_ent> on the hash entry that the 2822 iterator currently points to, without losing your place or invalidating your 2823 iterator. Note that in this case the current entry is deleted from the hash 2824 with your iterator holding the last reference to it. Your iterator is flagged 2825 to free the entry on the next call to C<hv_iternext>, so you must not discard 2826 your iterator immediately else the entry will leak - call C<hv_iternext> to 2827 trigger the resource deallocation. 2828 2829 =for apidoc hv_iternext_flags 2830 2831 Returns entries from a hash iterator. See C<L</hv_iterinit>> and 2832 C<L</hv_iternext>>. 2833 The C<flags> value will normally be zero; if C<HV_ITERNEXT_WANTPLACEHOLDERS> is 2834 set the placeholders keys (for restricted hashes) will be returned in addition 2835 to normal keys. By default placeholders are automatically skipped over. 2836 Currently a placeholder is implemented with a value that is 2837 C<&PL_sv_placeholder>. Note that the implementation of placeholders and 2838 restricted hashes may change, and the implementation currently is 2839 insufficiently abstracted for any change to be tidy. 2840 2841 =for apidoc Amnh||HV_ITERNEXT_WANTPLACEHOLDERS 2842 2843 =cut 2844 */ 2845 2846 HE * 2847 Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) 2848 { 2849 HE *entry; 2850 HE *oldentry; 2851 MAGIC* mg; 2852 struct xpvhv_aux *iter; 2853 2854 PERL_ARGS_ASSERT_HV_ITERNEXT_FLAGS; 2855 2856 if (!SvOOK(hv)) { 2857 /* Too many things (well, pp_each at least) merrily assume that you can 2858 call hv_iternext without calling hv_iterinit, so we'll have to deal 2859 with it. */ 2860 hv_iterinit(hv); 2861 } 2862 else if (!HvARRAY(hv)) { 2863 /* Since 5.002 calling hv_iternext() has ensured that HvARRAY() is 2864 non-NULL. There was explicit code for this added as part of commit 2865 4633a7c4bad06b47, without any explicit comment as to why, but from 2866 code inspection it seems to be a fix to ensure that the later line 2867 entry = ((HE**)xhv->xhv_array)[xhv->xhv_riter]; 2868 was accessing a valid address, because that lookup in the loop was 2869 always reached even if the hash had no keys. 2870 2871 That explicit code was removed in 2005 as part of b79f7545f218479c: 2872 Store the xhv_aux structure after the main array. 2873 This reduces the size of HV bodies from 24 to 20 bytes on a 32 bit 2874 build. It has the side effect of defined %symbol_table:: now always 2875 being true. defined %hash is already deprecated. 2876 2877 with a comment and assertion added to note that after the call to 2878 hv_iterinit() HvARRAY() will now always be non-NULL. 2879 2880 In turn, that potential NULL-pointer access within the loop was made 2881 unreachable in 2009 by commit 9eb4ebd1619c0362 2882 In Perl_hv_iternext_flags(), clarify and generalise the empty hash bailout code. 2883 2884 which skipped the entire while loop if the hash had no keys. 2885 (If the hash has any keys, HvARRAY() cannot be NULL.) 2886 Hence the code in hv_iternext_flags() has long been able to handle 2887 HvARRAY() being NULL because no keys are allocated. 2888 2889 Now that we have decoupled the aux structure from HvARRAY(), 2890 HvARRAY() can now be NULL even when SVf_OOK is true (and the aux 2891 struct is allocated and correction initialised). 2892 2893 Is this actually a guarantee that we need to make? We should check 2894 whether anything is actually relying on this, or if we are simply 2895 making work for ourselves. 2896 2897 For now, keep the behaviour as-was - after calling hv_iternext_flags 2898 ensure that HvARRAY() is non-NULL. Many (other) things are changing - 2899 no need to add risk by changing this too. But in the future we should 2900 consider changing hv_iternext_flags() to avoid allocating HvARRAY() 2901 here, and potentially also we avoid allocating HvARRAY() 2902 automatically in hv_auxinit() */ 2903 2904 char *array; 2905 Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1), char); 2906 HvARRAY(hv) = (HE**)array; 2907 } 2908 2909 iter = HvAUX(hv); 2910 2911 oldentry = entry = iter->xhv_eiter; /* HvEITER(hv) */ 2912 if (SvMAGICAL(hv) && SvRMAGICAL(hv)) { 2913 if ( ( mg = mg_find((const SV *)hv, PERL_MAGIC_tied) ) ) { 2914 SV * const key = sv_newmortal(); 2915 if (entry) { 2916 sv_setsv(key, HeSVKEY_force(entry)); 2917 SvREFCNT_dec(HeSVKEY(entry)); /* get rid of previous key */ 2918 HeSVKEY_set(entry, NULL); 2919 } 2920 else { 2921 char *k; 2922 HEK *hek; 2923 2924 /* one HE per MAGICAL hash */ 2925 iter->xhv_eiter = entry = new_HE(); /* HvEITER(hv) = new_HE() */ 2926 HvLAZYDEL_on(hv); /* make sure entry gets freed */ 2927 Zero(entry, 1, HE); 2928 Newxz(k, HEK_BASESIZE + sizeof(const SV *), char); 2929 hek = (HEK*)k; 2930 HeKEY_hek(entry) = hek; 2931 HeKLEN(entry) = HEf_SVKEY; 2932 } 2933 magic_nextpack(MUTABLE_SV(hv),mg,key); 2934 if (SvOK(key)) { 2935 /* force key to stay around until next time */ 2936 HeSVKEY_set(entry, SvREFCNT_inc_simple_NN(key)); 2937 return entry; /* beware, hent_val is not set */ 2938 } 2939 SvREFCNT_dec(HeVAL(entry)); 2940 Safefree(HeKEY_hek(entry)); 2941 del_HE(entry); 2942 iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ 2943 HvLAZYDEL_off(hv); 2944 return NULL; 2945 } 2946 } 2947 #if defined(DYNAMIC_ENV_FETCH) && defined(VMS) /* set up %ENV for iteration */ 2948 if (!entry && SvRMAGICAL((const SV *)hv) 2949 && mg_find((const SV *)hv, PERL_MAGIC_env)) { 2950 prime_env_iter(); 2951 } 2952 #endif 2953 2954 /* hv_iterinit now ensures this. */ 2955 assert (HvARRAY(hv)); 2956 2957 /* At start of hash, entry is NULL. */ 2958 if (entry) 2959 { 2960 entry = HeNEXT(entry); 2961 if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) { 2962 /* 2963 * Skip past any placeholders -- don't want to include them in 2964 * any iteration. 2965 */ 2966 while (entry && HeVAL(entry) == &PL_sv_placeholder) { 2967 entry = HeNEXT(entry); 2968 } 2969 } 2970 } 2971 2972 #ifdef PERL_HASH_RANDOMIZE_KEYS 2973 if (iter->xhv_last_rand != iter->xhv_rand) { 2974 if (iter->xhv_riter != -1) { 2975 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), 2976 "Use of each() on hash after insertion without resetting hash iterator results in undefined behavior" 2977 pTHX__FORMAT 2978 pTHX__VALUE); 2979 } 2980 iter->xhv_last_rand = iter->xhv_rand; 2981 } 2982 #endif 2983 2984 /* Skip the entire loop if the hash is empty. */ 2985 if ((flags & HV_ITERNEXT_WANTPLACEHOLDERS) 2986 ? HvTOTALKEYS(hv) : HvUSEDKEYS(hv)) { 2987 STRLEN max = HvMAX(hv); 2988 while (!entry) { 2989 /* OK. Come to the end of the current list. Grab the next one. */ 2990 2991 iter->xhv_riter++; /* HvRITER(hv)++ */ 2992 if (iter->xhv_riter > (I32)max /* HvRITER(hv) > HvMAX(hv) */) { 2993 /* There is no next one. End of the hash. */ 2994 iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ 2995 #ifdef PERL_HASH_RANDOMIZE_KEYS 2996 iter->xhv_last_rand = iter->xhv_rand; /* reset xhv_last_rand so we can detect inserts during traversal */ 2997 #endif 2998 break; 2999 } 3000 entry = (HvARRAY(hv))[ PERL_HASH_ITER_BUCKET(iter) & max ]; 3001 3002 if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) { 3003 /* If we have an entry, but it's a placeholder, don't count it. 3004 Try the next. */ 3005 while (entry && HeVAL(entry) == &PL_sv_placeholder) 3006 entry = HeNEXT(entry); 3007 } 3008 /* Will loop again if this linked list starts NULL 3009 (for HV_ITERNEXT_WANTPLACEHOLDERS) 3010 or if we run through it and find only placeholders. */ 3011 } 3012 } 3013 else { 3014 iter->xhv_riter = -1; 3015 #ifdef PERL_HASH_RANDOMIZE_KEYS 3016 iter->xhv_last_rand = iter->xhv_rand; 3017 #endif 3018 } 3019 3020 if (oldentry && HvLAZYDEL(hv)) { /* was deleted earlier? */ 3021 HvLAZYDEL_off(hv); 3022 hv_free_ent(NULL, oldentry); 3023 } 3024 3025 iter->xhv_eiter = entry; /* HvEITER(hv) = entry */ 3026 return entry; 3027 } 3028 3029 /* 3030 =for apidoc hv_iterkey 3031 3032 Returns the key from the current position of the hash iterator. See 3033 C<L</hv_iterinit>>. 3034 3035 =cut 3036 */ 3037 3038 char * 3039 Perl_hv_iterkey(pTHX_ HE *entry, I32 *retlen) 3040 { 3041 PERL_ARGS_ASSERT_HV_ITERKEY; 3042 3043 if (HeKLEN(entry) == HEf_SVKEY) { 3044 STRLEN len; 3045 char * const p = SvPV(HeKEY_sv(entry), len); 3046 *retlen = len; 3047 return p; 3048 } 3049 else { 3050 *retlen = HeKLEN(entry); 3051 return HeKEY(entry); 3052 } 3053 } 3054 3055 /* unlike hv_iterval(), this always returns a mortal copy of the key */ 3056 /* 3057 =for apidoc hv_iterkeysv 3058 3059 Returns the key as an C<SV*> from the current position of the hash 3060 iterator. The return value will always be a mortal copy of the key. Also 3061 see C<L</hv_iterinit>>. 3062 3063 =cut 3064 */ 3065 3066 SV * 3067 Perl_hv_iterkeysv(pTHX_ HE *entry) 3068 { 3069 PERL_ARGS_ASSERT_HV_ITERKEYSV; 3070 3071 return sv_2mortal(newSVhek(HeKEY_hek(entry))); 3072 } 3073 3074 /* 3075 =for apidoc hv_iterval 3076 3077 Returns the value from the current position of the hash iterator. See 3078 C<L</hv_iterkey>>. 3079 3080 =cut 3081 */ 3082 3083 SV * 3084 Perl_hv_iterval(pTHX_ HV *hv, HE *entry) 3085 { 3086 PERL_ARGS_ASSERT_HV_ITERVAL; 3087 3088 if (SvRMAGICAL(hv)) { 3089 if (mg_find((const SV *)hv, PERL_MAGIC_tied)) { 3090 SV* const sv = sv_newmortal(); 3091 if (HeKLEN(entry) == HEf_SVKEY) 3092 mg_copy(MUTABLE_SV(hv), sv, (char*)HeKEY_sv(entry), HEf_SVKEY); 3093 else 3094 mg_copy(MUTABLE_SV(hv), sv, HeKEY(entry), HeKLEN(entry)); 3095 return sv; 3096 } 3097 } 3098 return HeVAL(entry); 3099 } 3100 3101 /* 3102 =for apidoc hv_iternextsv 3103 3104 Performs an C<hv_iternext>, C<hv_iterkey>, and C<hv_iterval> in one 3105 operation. 3106 3107 =cut 3108 */ 3109 3110 SV * 3111 Perl_hv_iternextsv(pTHX_ HV *hv, char **key, I32 *retlen) 3112 { 3113 HE * const he = hv_iternext_flags(hv, 0); 3114 3115 PERL_ARGS_ASSERT_HV_ITERNEXTSV; 3116 3117 if (!he) 3118 return NULL; 3119 *key = hv_iterkey(he, retlen); 3120 return hv_iterval(hv, he); 3121 } 3122 3123 /* 3124 3125 Now a macro in hv.h 3126 3127 =for apidoc hv_magic 3128 3129 Adds magic to a hash. See C<L</sv_magic>>. 3130 3131 =for apidoc unsharepvn 3132 3133 If no one has access to shared string C<str> with length C<len>, free it. 3134 3135 C<len> and C<hash> must both be valid for C<str>. 3136 3137 =cut 3138 */ 3139 3140 void 3141 Perl_unsharepvn(pTHX_ const char *str, I32 len, U32 hash) 3142 { 3143 unshare_hek_or_pvn (NULL, str, len, hash); 3144 } 3145 3146 3147 void 3148 Perl_unshare_hek(pTHX_ HEK *hek) 3149 { 3150 assert(hek); 3151 unshare_hek_or_pvn(hek, NULL, 0, 0); 3152 } 3153 3154 /* possibly free a shared string if no one has access to it 3155 hek if non-NULL takes priority over the other 3, else str, len and hash 3156 are used. If so, len and hash must both be valid for str. 3157 */ 3158 STATIC void 3159 S_unshare_hek_or_pvn(pTHX_ const HEK *hek, const char *str, I32 len, U32 hash) 3160 { 3161 HE *entry; 3162 HE **oentry; 3163 bool is_utf8 = FALSE; 3164 int k_flags = 0; 3165 const char * const save = str; 3166 struct shared_he *he = NULL; 3167 3168 if (hek) { 3169 assert((HEK_FLAGS(hek) & HVhek_NOTSHARED) == 0); 3170 /* Find the shared he which is just before us in memory. */ 3171 he = (struct shared_he *)(((char *)hek) 3172 - STRUCT_OFFSET(struct shared_he, 3173 shared_he_hek)); 3174 3175 /* Assert that the caller passed us a genuine (or at least consistent) 3176 shared hek */ 3177 assert (he->shared_he_he.hent_hek == hek); 3178 3179 if (he->shared_he_he.he_valu.hent_refcount - 1) { 3180 --he->shared_he_he.he_valu.hent_refcount; 3181 return; 3182 } 3183 3184 hash = HEK_HASH(hek); 3185 } else if (len < 0) { 3186 STRLEN tmplen = -len; 3187 is_utf8 = TRUE; 3188 /* See the note in hv_fetch(). --jhi */ 3189 str = (char*)bytes_from_utf8((U8*)str, &tmplen, &is_utf8); 3190 len = tmplen; 3191 if (is_utf8) 3192 k_flags = HVhek_UTF8; 3193 if (str != save) 3194 k_flags |= HVhek_WASUTF8 | HVhek_FREEKEY; 3195 } 3196 3197 /* what follows was the moral equivalent of: 3198 if ((Svp = hv_fetch(PL_strtab, tmpsv, FALSE, hash))) { 3199 if (--*Svp == NULL) 3200 hv_delete(PL_strtab, str, len, G_DISCARD, hash); 3201 } */ 3202 3203 /* assert(xhv_array != 0) */ 3204 oentry = &(HvARRAY(PL_strtab))[hash & (I32) HvMAX(PL_strtab)]; 3205 if (he) { 3206 const HE *const he_he = &(he->shared_he_he); 3207 for (entry = *oentry; entry; oentry = &HeNEXT(entry), entry = *oentry) { 3208 if (entry == he_he) 3209 break; 3210 } 3211 } else { 3212 const U8 flags_masked = k_flags & HVhek_STORAGE_MASK; 3213 for (entry = *oentry; entry; oentry = &HeNEXT(entry), entry = *oentry) { 3214 if (HeHASH(entry) != hash) /* strings can't be equal */ 3215 continue; 3216 if (HeKLEN(entry) != len) 3217 continue; 3218 if (HeKEY(entry) != str && memNE(HeKEY(entry),str,len)) /* is this it? */ 3219 continue; 3220 if (HeKFLAGS(entry) != flags_masked) 3221 continue; 3222 break; 3223 } 3224 } 3225 3226 if (entry) { 3227 if (--entry->he_valu.hent_refcount == 0) { 3228 *oentry = HeNEXT(entry); 3229 Safefree(entry); 3230 HvTOTALKEYS(PL_strtab)--; 3231 } 3232 } 3233 3234 if (!entry) 3235 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), 3236 "Attempt to free nonexistent shared string '%s'%s" 3237 pTHX__FORMAT, 3238 hek ? HEK_KEY(hek) : str, 3239 ((k_flags & HVhek_UTF8) ? " (utf8)" : "") pTHX__VALUE); 3240 if (k_flags & HVhek_FREEKEY) 3241 Safefree(str); 3242 } 3243 3244 /* get a (constant) string ptr from the global string table 3245 * string will get added if it is not already there. 3246 * len and hash must both be valid for str. 3247 */ 3248 HEK * 3249 Perl_share_hek(pTHX_ const char *str, SSize_t len, U32 hash) 3250 { 3251 bool is_utf8 = FALSE; 3252 int flags = 0; 3253 const char * const save = str; 3254 3255 PERL_ARGS_ASSERT_SHARE_HEK; 3256 3257 if (len < 0) { 3258 STRLEN tmplen = -len; 3259 is_utf8 = TRUE; 3260 /* See the note in hv_fetch(). --jhi */ 3261 str = (char*)bytes_from_utf8((U8*)str, &tmplen, &is_utf8); 3262 len = tmplen; 3263 /* If we were able to downgrade here, then than means that we were passed 3264 in a key which only had chars 0-255, but was utf8 encoded. */ 3265 if (is_utf8) 3266 flags = HVhek_UTF8; 3267 /* If we found we were able to downgrade the string to bytes, then 3268 we should flag that it needs upgrading on keys or each. Also flag 3269 that we need share_hek_flags to free the string. */ 3270 if (str != save) { 3271 PERL_HASH(hash, str, len); 3272 flags |= HVhek_WASUTF8 | HVhek_FREEKEY; 3273 } 3274 } 3275 3276 return share_hek_flags (str, len, hash, flags); 3277 } 3278 3279 STATIC HEK * 3280 S_share_hek_flags(pTHX_ const char *str, STRLEN len, U32 hash, int flags) 3281 { 3282 HE *entry; 3283 const U8 flags_masked = flags & HVhek_STORAGE_MASK; 3284 const U32 hindex = hash & (I32) HvMAX(PL_strtab); 3285 3286 PERL_ARGS_ASSERT_SHARE_HEK_FLAGS; 3287 assert(!(flags & HVhek_NOTSHARED)); 3288 3289 if (UNLIKELY(len > (STRLEN) I32_MAX)) { 3290 Perl_croak_nocontext("Sorry, hash keys must be smaller than 2**31 bytes"); 3291 } 3292 3293 /* what follows is the moral equivalent of: 3294 3295 if (!(Svp = hv_fetch(PL_strtab, str, len, FALSE))) 3296 hv_store(PL_strtab, str, len, NULL, hash); 3297 3298 Can't rehash the shared string table, so not sure if it's worth 3299 counting the number of entries in the linked list 3300 */ 3301 3302 /* assert(xhv_array != 0) */ 3303 entry = (HvARRAY(PL_strtab))[hindex]; 3304 for (;entry; entry = HeNEXT(entry)) { 3305 if (HeHASH(entry) != hash) /* strings can't be equal */ 3306 continue; 3307 if (HeKLEN(entry) != (SSize_t) len) 3308 continue; 3309 if (HeKEY(entry) != str && memNE(HeKEY(entry),str,len)) /* is this it? */ 3310 continue; 3311 if (HeKFLAGS(entry) != flags_masked) 3312 continue; 3313 break; 3314 } 3315 3316 if (!entry) { 3317 /* What used to be head of the list. 3318 If this is NULL, then we're the first entry for this slot, which 3319 means we need to increate fill. */ 3320 struct shared_he *new_entry; 3321 HEK *hek; 3322 char *k; 3323 HE **const head = &HvARRAY(PL_strtab)[hindex]; 3324 HE *const next = *head; 3325 XPVHV * const xhv = (XPVHV*)SvANY(PL_strtab); 3326 3327 /* We don't actually store a HE from the arena and a regular HEK. 3328 Instead we allocate one chunk of memory big enough for both, 3329 and put the HEK straight after the HE. This way we can find the 3330 HE directly from the HEK. 3331 */ 3332 3333 Newx(k, STRUCT_OFFSET(struct shared_he, 3334 shared_he_hek.hek_key[0]) + len + 2, char); 3335 new_entry = (struct shared_he *)k; 3336 entry = &(new_entry->shared_he_he); 3337 hek = &(new_entry->shared_he_hek); 3338 3339 Copy(str, HEK_KEY(hek), len, char); 3340 HEK_KEY(hek)[len] = 0; 3341 HEK_LEN(hek) = len; 3342 HEK_HASH(hek) = hash; 3343 HEK_FLAGS(hek) = (unsigned char)flags_masked; 3344 3345 /* Still "point" to the HEK, so that other code need not know what 3346 we're up to. */ 3347 HeKEY_hek(entry) = hek; 3348 entry->he_valu.hent_refcount = 0; 3349 HeNEXT(entry) = next; 3350 *head = entry; 3351 3352 xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */ 3353 if (!next) { /* initial entry? */ 3354 } else if ( DO_HSPLIT(xhv) ) { 3355 const STRLEN oldsize = xhv->xhv_max + 1; 3356 hsplit(PL_strtab, oldsize, oldsize * 2); 3357 } 3358 } 3359 3360 ++entry->he_valu.hent_refcount; 3361 3362 if (flags & HVhek_FREEKEY) 3363 Safefree(str); 3364 3365 return HeKEY_hek(entry); 3366 } 3367 3368 SSize_t * 3369 Perl_hv_placeholders_p(pTHX_ HV *hv) 3370 { 3371 MAGIC *mg = mg_find((const SV *)hv, PERL_MAGIC_rhash); 3372 3373 PERL_ARGS_ASSERT_HV_PLACEHOLDERS_P; 3374 3375 if (!mg) { 3376 mg = sv_magicext(MUTABLE_SV(hv), 0, PERL_MAGIC_rhash, 0, 0, 0); 3377 3378 if (!mg) { 3379 Perl_die(aTHX_ "panic: hv_placeholders_p"); 3380 } 3381 } 3382 return &(mg->mg_len); 3383 } 3384 3385 /* 3386 =for apidoc hv_placeholders_get 3387 3388 Implements C<HvPLACEHOLDERS_get>, which you should use instead. 3389 3390 =cut 3391 */ 3392 3393 I32 3394 Perl_hv_placeholders_get(pTHX_ const HV *hv) 3395 { 3396 MAGIC * const mg = mg_find((const SV *)hv, PERL_MAGIC_rhash); 3397 3398 PERL_ARGS_ASSERT_HV_PLACEHOLDERS_GET; 3399 PERL_UNUSED_CONTEXT; 3400 3401 return mg ? mg->mg_len : 0; 3402 } 3403 3404 /* 3405 =for apidoc hv_placeholders_set 3406 3407 Implements C<HvPLACEHOLDERS_set>, which you should use instead. 3408 3409 =cut 3410 */ 3411 3412 void 3413 Perl_hv_placeholders_set(pTHX_ HV *hv, I32 ph) 3414 { 3415 MAGIC * const mg = mg_find((const SV *)hv, PERL_MAGIC_rhash); 3416 3417 PERL_ARGS_ASSERT_HV_PLACEHOLDERS_SET; 3418 3419 if (mg) { 3420 mg->mg_len = ph; 3421 } else if (ph) { 3422 if (!sv_magicext(MUTABLE_SV(hv), 0, PERL_MAGIC_rhash, 0, 0, ph)) 3423 Perl_die(aTHX_ "panic: hv_placeholders_set"); 3424 } 3425 /* else we don't need to add magic to record 0 placeholders. */ 3426 } 3427 3428 STATIC SV * 3429 S_refcounted_he_value(pTHX_ const struct refcounted_he *he) 3430 { 3431 SV *value; 3432 3433 PERL_ARGS_ASSERT_REFCOUNTED_HE_VALUE; 3434 3435 switch(he->refcounted_he_data[0] & HVrhek_typemask) { 3436 case HVrhek_undef: 3437 value = newSV_type(SVt_NULL); 3438 break; 3439 case HVrhek_delete: 3440 value = &PL_sv_placeholder; 3441 break; 3442 case HVrhek_IV: 3443 value = newSViv(he->refcounted_he_val.refcounted_he_u_iv); 3444 break; 3445 case HVrhek_UV: 3446 value = newSVuv(he->refcounted_he_val.refcounted_he_u_uv); 3447 break; 3448 case HVrhek_PV: 3449 case HVrhek_PV_UTF8: 3450 /* Create a string SV that directly points to the bytes in our 3451 structure. */ 3452 value = newSV_type(SVt_PV); 3453 SvPV_set(value, (char *) he->refcounted_he_data + 1); 3454 SvCUR_set(value, he->refcounted_he_val.refcounted_he_u_len); 3455 /* This stops anything trying to free it */ 3456 SvLEN_set(value, 0); 3457 SvPOK_on(value); 3458 SvREADONLY_on(value); 3459 if ((he->refcounted_he_data[0] & HVrhek_typemask) == HVrhek_PV_UTF8) 3460 SvUTF8_on(value); 3461 break; 3462 default: 3463 Perl_croak(aTHX_ "panic: refcounted_he_value bad flags %" UVxf, 3464 (UV)he->refcounted_he_data[0]); 3465 } 3466 return value; 3467 } 3468 3469 /* 3470 =for apidoc refcounted_he_chain_2hv 3471 3472 Generates and returns a C<HV *> representing the content of a 3473 C<refcounted_he> chain. 3474 C<flags> is currently unused and must be zero. 3475 3476 =cut 3477 */ 3478 HV * 3479 Perl_refcounted_he_chain_2hv(pTHX_ const struct refcounted_he *chain, U32 flags) 3480 { 3481 HV *hv; 3482 U32 placeholders, max; 3483 3484 if (flags) 3485 Perl_croak(aTHX_ "panic: refcounted_he_chain_2hv bad flags %" UVxf, 3486 (UV)flags); 3487 3488 /* We could chase the chain once to get an idea of the number of keys, 3489 and call ksplit. But for now we'll make a potentially inefficient 3490 hash with only 8 entries in its array. */ 3491 hv = newHV(); 3492 #ifdef NODEFAULT_SHAREKEYS 3493 /* We share keys in the COP, so it's much easier to keep sharing keys in 3494 the hash we build from it. */ 3495 HvSHAREKEYS_on(hv); 3496 #endif 3497 max = HvMAX(hv); 3498 if (!HvARRAY(hv)) { 3499 char *array; 3500 Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(max + 1), char); 3501 HvARRAY(hv) = (HE**)array; 3502 } 3503 3504 placeholders = 0; 3505 while (chain) { 3506 #ifdef USE_ITHREADS 3507 U32 hash = chain->refcounted_he_hash; 3508 #else 3509 U32 hash = HEK_HASH(chain->refcounted_he_hek); 3510 #endif 3511 HE **oentry = &((HvARRAY(hv))[hash & max]); 3512 HE *entry = *oentry; 3513 SV *value; 3514 3515 for (; entry; entry = HeNEXT(entry)) { 3516 if (HeHASH(entry) == hash) { 3517 /* We might have a duplicate key here. If so, entry is older 3518 than the key we've already put in the hash, so if they are 3519 the same, skip adding entry. */ 3520 #ifdef USE_ITHREADS 3521 const STRLEN klen = HeKLEN(entry); 3522 const char *const key = HeKEY(entry); 3523 if (klen == chain->refcounted_he_keylen 3524 && (!!HeKUTF8(entry) 3525 == !!(chain->refcounted_he_data[0] & HVhek_UTF8)) 3526 && memEQ(key, REF_HE_KEY(chain), klen)) 3527 goto next_please; 3528 #else 3529 if (HeKEY_hek(entry) == chain->refcounted_he_hek) 3530 goto next_please; 3531 if (HeKLEN(entry) == HEK_LEN(chain->refcounted_he_hek) 3532 && HeKUTF8(entry) == HEK_UTF8(chain->refcounted_he_hek) 3533 && memEQ(HeKEY(entry), HEK_KEY(chain->refcounted_he_hek), 3534 HeKLEN(entry))) 3535 goto next_please; 3536 #endif 3537 } 3538 } 3539 assert (!entry); 3540 entry = new_HE(); 3541 3542 #ifdef USE_ITHREADS 3543 HeKEY_hek(entry) 3544 = share_hek_flags(REF_HE_KEY(chain), 3545 chain->refcounted_he_keylen, 3546 chain->refcounted_he_hash, 3547 (chain->refcounted_he_data[0] 3548 & (HVhek_UTF8|HVhek_WASUTF8))); 3549 #else 3550 HeKEY_hek(entry) = share_hek_hek(chain->refcounted_he_hek); 3551 #endif 3552 value = refcounted_he_value(chain); 3553 if (value == &PL_sv_placeholder) 3554 placeholders++; 3555 HeVAL(entry) = value; 3556 3557 /* Link it into the chain. */ 3558 HeNEXT(entry) = *oentry; 3559 *oentry = entry; 3560 3561 HvTOTALKEYS(hv)++; 3562 3563 next_please: 3564 chain = chain->refcounted_he_next; 3565 } 3566 3567 if (placeholders) { 3568 clear_placeholders(hv, placeholders); 3569 } 3570 3571 /* We could check in the loop to see if we encounter any keys with key 3572 flags, but it's probably not worth it, as this per-hash flag is only 3573 really meant as an optimisation for things like Storable. */ 3574 HvHASKFLAGS_on(hv); 3575 DEBUG_A(Perl_hv_assert(aTHX_ hv)); 3576 3577 return hv; 3578 } 3579 3580 /* 3581 =for apidoc refcounted_he_fetch_pvn 3582 3583 Search along a C<refcounted_he> chain for an entry with the key specified 3584 by C<keypv> and C<keylen>. If C<flags> has the C<REFCOUNTED_HE_KEY_UTF8> 3585 bit set, the key octets are interpreted as UTF-8, otherwise they 3586 are interpreted as Latin-1. C<hash> is a precomputed hash of the key 3587 string, or zero if it has not been precomputed. Returns a mortal scalar 3588 representing the value associated with the key, or C<&PL_sv_placeholder> 3589 if there is no value associated with the key. 3590 3591 =cut 3592 */ 3593 3594 SV * 3595 Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain, 3596 const char *keypv, STRLEN keylen, U32 hash, U32 flags) 3597 { 3598 U8 utf8_flag; 3599 PERL_ARGS_ASSERT_REFCOUNTED_HE_FETCH_PVN; 3600 3601 if (flags & ~(REFCOUNTED_HE_KEY_UTF8|REFCOUNTED_HE_EXISTS)) 3602 Perl_croak(aTHX_ "panic: refcounted_he_fetch_pvn bad flags %" UVxf, 3603 (UV)flags); 3604 if (!chain) 3605 goto ret; 3606 if (flags & REFCOUNTED_HE_KEY_UTF8) { 3607 /* For searching purposes, canonicalise to Latin-1 where possible. */ 3608 const char *keyend = keypv + keylen, *p; 3609 STRLEN nonascii_count = 0; 3610 for (p = keypv; p != keyend; p++) { 3611 if (! UTF8_IS_INVARIANT(*p)) { 3612 if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) { 3613 goto canonicalised_key; 3614 } 3615 nonascii_count++; 3616 p++; 3617 } 3618 } 3619 if (nonascii_count) { 3620 char *q; 3621 const char *p = keypv, *keyend = keypv + keylen; 3622 keylen -= nonascii_count; 3623 Newx(q, keylen, char); 3624 SAVEFREEPV(q); 3625 keypv = q; 3626 for (; p != keyend; p++, q++) { 3627 U8 c = (U8)*p; 3628 if (UTF8_IS_INVARIANT(c)) { 3629 *q = (char) c; 3630 } 3631 else { 3632 p++; 3633 *q = (char) EIGHT_BIT_UTF8_TO_NATIVE(c, *p); 3634 } 3635 } 3636 } 3637 flags &= ~REFCOUNTED_HE_KEY_UTF8; 3638 canonicalised_key: ; 3639 } 3640 utf8_flag = (flags & REFCOUNTED_HE_KEY_UTF8) ? HVhek_UTF8 : 0; 3641 if (!hash) 3642 PERL_HASH(hash, keypv, keylen); 3643 3644 for (; chain; chain = chain->refcounted_he_next) { 3645 if ( 3646 #ifdef USE_ITHREADS 3647 hash == chain->refcounted_he_hash && 3648 keylen == chain->refcounted_he_keylen && 3649 memEQ(REF_HE_KEY(chain), keypv, keylen) && 3650 utf8_flag == (chain->refcounted_he_data[0] & HVhek_UTF8) 3651 #else 3652 hash == HEK_HASH(chain->refcounted_he_hek) && 3653 keylen == (STRLEN)HEK_LEN(chain->refcounted_he_hek) && 3654 memEQ(HEK_KEY(chain->refcounted_he_hek), keypv, keylen) && 3655 utf8_flag == (HEK_FLAGS(chain->refcounted_he_hek) & HVhek_UTF8) 3656 #endif 3657 ) { 3658 if (flags & REFCOUNTED_HE_EXISTS) 3659 return (chain->refcounted_he_data[0] & HVrhek_typemask) 3660 == HVrhek_delete 3661 ? NULL : &PL_sv_yes; 3662 return sv_2mortal(refcounted_he_value(chain)); 3663 } 3664 } 3665 ret: 3666 return flags & REFCOUNTED_HE_EXISTS ? NULL : &PL_sv_placeholder; 3667 } 3668 3669 /* 3670 =for apidoc refcounted_he_fetch_pv 3671 3672 Like L</refcounted_he_fetch_pvn>, but takes a nul-terminated string 3673 instead of a string/length pair. 3674 3675 =cut 3676 */ 3677 3678 SV * 3679 Perl_refcounted_he_fetch_pv(pTHX_ const struct refcounted_he *chain, 3680 const char *key, U32 hash, U32 flags) 3681 { 3682 PERL_ARGS_ASSERT_REFCOUNTED_HE_FETCH_PV; 3683 return refcounted_he_fetch_pvn(chain, key, strlen(key), hash, flags); 3684 } 3685 3686 /* 3687 =for apidoc refcounted_he_fetch_sv 3688 3689 Like L</refcounted_he_fetch_pvn>, but takes a Perl scalar instead of a 3690 string/length pair. 3691 3692 =cut 3693 */ 3694 3695 SV * 3696 Perl_refcounted_he_fetch_sv(pTHX_ const struct refcounted_he *chain, 3697 SV *key, U32 hash, U32 flags) 3698 { 3699 const char *keypv; 3700 STRLEN keylen; 3701 PERL_ARGS_ASSERT_REFCOUNTED_HE_FETCH_SV; 3702 if (flags & REFCOUNTED_HE_KEY_UTF8) 3703 Perl_croak(aTHX_ "panic: refcounted_he_fetch_sv bad flags %" UVxf, 3704 (UV)flags); 3705 keypv = SvPV_const(key, keylen); 3706 if (SvUTF8(key)) 3707 flags |= REFCOUNTED_HE_KEY_UTF8; 3708 if (!hash && SvIsCOW_shared_hash(key)) 3709 hash = SvSHARED_HASH(key); 3710 return refcounted_he_fetch_pvn(chain, keypv, keylen, hash, flags); 3711 } 3712 3713 /* 3714 =for apidoc refcounted_he_new_pvn 3715 3716 Creates a new C<refcounted_he>. This consists of a single key/value 3717 pair and a reference to an existing C<refcounted_he> chain (which may 3718 be empty), and thus forms a longer chain. When using the longer chain, 3719 the new key/value pair takes precedence over any entry for the same key 3720 further along the chain. 3721 3722 The new key is specified by C<keypv> and C<keylen>. If C<flags> has 3723 the C<REFCOUNTED_HE_KEY_UTF8> bit set, the key octets are interpreted 3724 as UTF-8, otherwise they are interpreted as Latin-1. C<hash> is 3725 a precomputed hash of the key string, or zero if it has not been 3726 precomputed. 3727 3728 C<value> is the scalar value to store for this key. C<value> is copied 3729 by this function, which thus does not take ownership of any reference 3730 to it, and later changes to the scalar will not be reflected in the 3731 value visible in the C<refcounted_he>. Complex types of scalar will not 3732 be stored with referential integrity, but will be coerced to strings. 3733 C<value> may be either null or C<&PL_sv_placeholder> to indicate that no 3734 value is to be associated with the key; this, as with any non-null value, 3735 takes precedence over the existence of a value for the key further along 3736 the chain. 3737 3738 C<parent> points to the rest of the C<refcounted_he> chain to be 3739 attached to the new C<refcounted_he>. This function takes ownership 3740 of one reference to C<parent>, and returns one reference to the new 3741 C<refcounted_he>. 3742 3743 =cut 3744 */ 3745 3746 struct refcounted_he * 3747 Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent, 3748 const char *keypv, STRLEN keylen, U32 hash, SV *value, U32 flags) 3749 { 3750 STRLEN value_len = 0; 3751 const char *value_p = NULL; 3752 bool is_pv; 3753 char value_type; 3754 char hekflags; 3755 STRLEN key_offset = 1; 3756 struct refcounted_he *he; 3757 PERL_ARGS_ASSERT_REFCOUNTED_HE_NEW_PVN; 3758 3759 if (!value || value == &PL_sv_placeholder) { 3760 value_type = HVrhek_delete; 3761 } else if (SvPOK(value)) { 3762 value_type = HVrhek_PV; 3763 } else if (SvIOK(value)) { 3764 value_type = SvUOK((const SV *)value) ? HVrhek_UV : HVrhek_IV; 3765 } else if (!SvOK(value)) { 3766 value_type = HVrhek_undef; 3767 } else { 3768 value_type = HVrhek_PV; 3769 } 3770 is_pv = value_type == HVrhek_PV; 3771 if (is_pv) { 3772 /* Do it this way so that the SvUTF8() test is after the SvPV, in case 3773 the value is overloaded, and doesn't yet have the UTF-8flag set. */ 3774 value_p = SvPV_const(value, value_len); 3775 if (SvUTF8(value)) 3776 value_type = HVrhek_PV_UTF8; 3777 key_offset = value_len + 2; 3778 } 3779 hekflags = value_type; 3780 3781 if (flags & REFCOUNTED_HE_KEY_UTF8) { 3782 /* Canonicalise to Latin-1 where possible. */ 3783 const char *keyend = keypv + keylen, *p; 3784 STRLEN nonascii_count = 0; 3785 for (p = keypv; p != keyend; p++) { 3786 if (! UTF8_IS_INVARIANT(*p)) { 3787 if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) { 3788 goto canonicalised_key; 3789 } 3790 nonascii_count++; 3791 p++; 3792 } 3793 } 3794 if (nonascii_count) { 3795 char *q; 3796 const char *p = keypv, *keyend = keypv + keylen; 3797 keylen -= nonascii_count; 3798 Newx(q, keylen, char); 3799 SAVEFREEPV(q); 3800 keypv = q; 3801 for (; p != keyend; p++, q++) { 3802 U8 c = (U8)*p; 3803 if (UTF8_IS_INVARIANT(c)) { 3804 *q = (char) c; 3805 } 3806 else { 3807 p++; 3808 *q = (char) EIGHT_BIT_UTF8_TO_NATIVE(c, *p); 3809 } 3810 } 3811 } 3812 flags &= ~REFCOUNTED_HE_KEY_UTF8; 3813 canonicalised_key: ; 3814 } 3815 if (flags & REFCOUNTED_HE_KEY_UTF8) 3816 hekflags |= HVhek_UTF8; 3817 if (!hash) 3818 PERL_HASH(hash, keypv, keylen); 3819 3820 #ifdef USE_ITHREADS 3821 he = (struct refcounted_he*) 3822 PerlMemShared_malloc(sizeof(struct refcounted_he) - 1 3823 + keylen 3824 + key_offset); 3825 #else 3826 he = (struct refcounted_he*) 3827 PerlMemShared_malloc(sizeof(struct refcounted_he) - 1 3828 + key_offset); 3829 #endif 3830 3831 he->refcounted_he_next = parent; 3832 3833 if (is_pv) { 3834 Copy(value_p, he->refcounted_he_data + 1, value_len + 1, char); 3835 he->refcounted_he_val.refcounted_he_u_len = value_len; 3836 } else if (value_type == HVrhek_IV) { 3837 he->refcounted_he_val.refcounted_he_u_iv = SvIVX(value); 3838 } else if (value_type == HVrhek_UV) { 3839 he->refcounted_he_val.refcounted_he_u_uv = SvUVX(value); 3840 } 3841 3842 #ifdef USE_ITHREADS 3843 he->refcounted_he_hash = hash; 3844 he->refcounted_he_keylen = keylen; 3845 Copy(keypv, he->refcounted_he_data + key_offset, keylen, char); 3846 #else 3847 he->refcounted_he_hek = share_hek_flags(keypv, keylen, hash, hekflags); 3848 #endif 3849 3850 he->refcounted_he_data[0] = hekflags; 3851 he->refcounted_he_refcnt = 1; 3852 3853 return he; 3854 } 3855 3856 /* 3857 =for apidoc refcounted_he_new_pv 3858 3859 Like L</refcounted_he_new_pvn>, but takes a nul-terminated string instead 3860 of a string/length pair. 3861 3862 =cut 3863 */ 3864 3865 struct refcounted_he * 3866 Perl_refcounted_he_new_pv(pTHX_ struct refcounted_he *parent, 3867 const char *key, U32 hash, SV *value, U32 flags) 3868 { 3869 PERL_ARGS_ASSERT_REFCOUNTED_HE_NEW_PV; 3870 return refcounted_he_new_pvn(parent, key, strlen(key), hash, value, flags); 3871 } 3872 3873 /* 3874 =for apidoc refcounted_he_new_sv 3875 3876 Like L</refcounted_he_new_pvn>, but takes a Perl scalar instead of a 3877 string/length pair. 3878 3879 =cut 3880 */ 3881 3882 struct refcounted_he * 3883 Perl_refcounted_he_new_sv(pTHX_ struct refcounted_he *parent, 3884 SV *key, U32 hash, SV *value, U32 flags) 3885 { 3886 const char *keypv; 3887 STRLEN keylen; 3888 PERL_ARGS_ASSERT_REFCOUNTED_HE_NEW_SV; 3889 if (flags & REFCOUNTED_HE_KEY_UTF8) 3890 Perl_croak(aTHX_ "panic: refcounted_he_new_sv bad flags %" UVxf, 3891 (UV)flags); 3892 keypv = SvPV_const(key, keylen); 3893 if (SvUTF8(key)) 3894 flags |= REFCOUNTED_HE_KEY_UTF8; 3895 if (!hash && SvIsCOW_shared_hash(key)) 3896 hash = SvSHARED_HASH(key); 3897 return refcounted_he_new_pvn(parent, keypv, keylen, hash, value, flags); 3898 } 3899 3900 /* 3901 =for apidoc refcounted_he_free 3902 3903 Decrements the reference count of a C<refcounted_he> by one. If the 3904 reference count reaches zero the structure's memory is freed, which 3905 (recursively) causes a reduction of its parent C<refcounted_he>'s 3906 reference count. It is safe to pass a null pointer to this function: 3907 no action occurs in this case. 3908 3909 =cut 3910 */ 3911 3912 void 3913 Perl_refcounted_he_free(pTHX_ struct refcounted_he *he) { 3914 PERL_UNUSED_CONTEXT; 3915 3916 while (he) { 3917 struct refcounted_he *copy; 3918 U32 new_count; 3919 3920 HINTS_REFCNT_LOCK; 3921 new_count = --he->refcounted_he_refcnt; 3922 HINTS_REFCNT_UNLOCK; 3923 3924 if (new_count) { 3925 return; 3926 } 3927 3928 #ifndef USE_ITHREADS 3929 unshare_hek_or_pvn (he->refcounted_he_hek, 0, 0, 0); 3930 #endif 3931 copy = he; 3932 he = he->refcounted_he_next; 3933 PerlMemShared_free(copy); 3934 } 3935 } 3936 3937 /* 3938 =for apidoc refcounted_he_inc 3939 3940 Increment the reference count of a C<refcounted_he>. The pointer to the 3941 C<refcounted_he> is also returned. It is safe to pass a null pointer 3942 to this function: no action occurs and a null pointer is returned. 3943 3944 =cut 3945 */ 3946 3947 struct refcounted_he * 3948 Perl_refcounted_he_inc(pTHX_ struct refcounted_he *he) 3949 { 3950 PERL_UNUSED_CONTEXT; 3951 if (he) { 3952 HINTS_REFCNT_LOCK; 3953 he->refcounted_he_refcnt++; 3954 HINTS_REFCNT_UNLOCK; 3955 } 3956 return he; 3957 } 3958 3959 /* 3960 =for apidoc_section $COP 3961 =for apidoc cop_fetch_label 3962 3963 Returns the label attached to a cop, and stores its length in bytes into 3964 C<*len>. 3965 Upon return, C<*flags> will be set to either C<SVf_UTF8> or 0. 3966 3967 Alternatively, use the macro C<L</CopLABEL_len_flags>>; 3968 or if you don't need to know if the label is UTF-8 or not, the macro 3969 C<L</CopLABEL_len>>; 3970 or if you additionally dont need to know the length, C<L</CopLABEL>>. 3971 3972 =cut 3973 */ 3974 3975 /* pp_entereval is aware that labels are stored with a key ':' at the top of 3976 the linked list. */ 3977 const char * 3978 Perl_cop_fetch_label(pTHX_ COP *const cop, STRLEN *len, U32 *flags) { 3979 struct refcounted_he *const chain = cop->cop_hints_hash; 3980 3981 PERL_ARGS_ASSERT_COP_FETCH_LABEL; 3982 PERL_UNUSED_CONTEXT; 3983 3984 if (!chain) 3985 return NULL; 3986 #ifdef USE_ITHREADS 3987 if (chain->refcounted_he_keylen != 1) 3988 return NULL; 3989 if (*REF_HE_KEY(chain) != ':') 3990 return NULL; 3991 #else 3992 if ((STRLEN)HEK_LEN(chain->refcounted_he_hek) != 1) 3993 return NULL; 3994 if (*HEK_KEY(chain->refcounted_he_hek) != ':') 3995 return NULL; 3996 #endif 3997 /* Stop anyone trying to really mess us up by adding their own value for 3998 ':' into %^H */ 3999 if ((chain->refcounted_he_data[0] & HVrhek_typemask) != HVrhek_PV 4000 && (chain->refcounted_he_data[0] & HVrhek_typemask) != HVrhek_PV_UTF8) 4001 return NULL; 4002 4003 if (len) 4004 *len = chain->refcounted_he_val.refcounted_he_u_len; 4005 if (flags) { 4006 *flags = ((chain->refcounted_he_data[0] & HVrhek_typemask) 4007 == HVrhek_PV_UTF8) ? SVf_UTF8 : 0; 4008 } 4009 return chain->refcounted_he_data + 1; 4010 } 4011 4012 /* 4013 =for apidoc cop_store_label 4014 4015 Save a label into a C<cop_hints_hash>. 4016 You need to set flags to C<SVf_UTF8> 4017 for a UTF-8 label. Any other flag is ignored. 4018 4019 =cut 4020 */ 4021 4022 void 4023 Perl_cop_store_label(pTHX_ COP *const cop, const char *label, STRLEN len, 4024 U32 flags) 4025 { 4026 SV *labelsv; 4027 PERL_ARGS_ASSERT_COP_STORE_LABEL; 4028 4029 if (flags & ~(SVf_UTF8)) 4030 Perl_croak(aTHX_ "panic: cop_store_label illegal flag bits 0x%" UVxf, 4031 (UV)flags); 4032 labelsv = newSVpvn_flags(label, len, SVs_TEMP); 4033 if (flags & SVf_UTF8) 4034 SvUTF8_on(labelsv); 4035 cop->cop_hints_hash 4036 = refcounted_he_new_pvs(cop->cop_hints_hash, ":", labelsv, 0); 4037 } 4038 4039 /* 4040 =for apidoc_section $HV 4041 =for apidoc hv_assert 4042 4043 Check that a hash is in an internally consistent state. 4044 4045 =cut 4046 */ 4047 4048 #ifdef DEBUGGING 4049 4050 void 4051 Perl_hv_assert(pTHX_ HV *hv) 4052 { 4053 HE* entry; 4054 int withflags = 0; 4055 int placeholders = 0; 4056 int real = 0; 4057 int bad = 0; 4058 const I32 riter = HvRITER_get(hv); 4059 HE *eiter = HvEITER_get(hv); 4060 4061 PERL_ARGS_ASSERT_HV_ASSERT; 4062 4063 (void)hv_iterinit(hv); 4064 4065 while ((entry = hv_iternext_flags(hv, HV_ITERNEXT_WANTPLACEHOLDERS))) { 4066 /* sanity check the values */ 4067 if (HeVAL(entry) == &PL_sv_placeholder) 4068 placeholders++; 4069 else 4070 real++; 4071 /* sanity check the keys */ 4072 if (HeSVKEY(entry)) { 4073 NOOP; /* Don't know what to check on SV keys. */ 4074 } else if (HeKUTF8(entry)) { 4075 withflags++; 4076 if (HeKWASUTF8(entry)) { 4077 PerlIO_printf(Perl_debug_log, 4078 "hash key has both WASUTF8 and UTF8: '%.*s'\n", 4079 (int) HeKLEN(entry), HeKEY(entry)); 4080 bad = 1; 4081 } 4082 } else if (HeKWASUTF8(entry)) 4083 withflags++; 4084 } 4085 if (!SvTIED_mg((const SV *)hv, PERL_MAGIC_tied)) { 4086 static const char bad_count[] = "Count %d %s(s), but hash reports %d\n"; 4087 const int nhashkeys = HvUSEDKEYS(hv); 4088 const int nhashplaceholders = HvPLACEHOLDERS_get(hv); 4089 4090 if (nhashkeys != real) { 4091 PerlIO_printf(Perl_debug_log, bad_count, real, "keys", nhashkeys ); 4092 bad = 1; 4093 } 4094 if (nhashplaceholders != placeholders) { 4095 PerlIO_printf(Perl_debug_log, bad_count, placeholders, "placeholder", nhashplaceholders ); 4096 bad = 1; 4097 } 4098 } 4099 if (withflags && ! HvHASKFLAGS(hv)) { 4100 PerlIO_printf(Perl_debug_log, 4101 "Hash has HASKFLAGS off but I count %d key(s) with flags\n", 4102 withflags); 4103 bad = 1; 4104 } 4105 if (bad) { 4106 sv_dump(MUTABLE_SV(hv)); 4107 } 4108 HvRITER_set(hv, riter); /* Restore hash iterator state */ 4109 HvEITER_set(hv, eiter); 4110 } 4111 4112 #endif 4113 4114 /* 4115 * ex: set ts=8 sts=4 sw=4 et: 4116 */ 4117