143003dfeSmillertpackage Hash::Util; 243003dfeSmillert 343003dfeSmillertrequire 5.007003; 443003dfeSmillertuse strict; 543003dfeSmillertuse Carp; 643003dfeSmillertuse warnings; 7898184e3Ssthenno warnings 'uninitialized'; 843003dfeSmillertuse warnings::register; 9eac174f2Safresh1no warnings 'experimental::builtin'; 10eac174f2Safresh1use builtin qw(reftype); 1143003dfeSmillert 1243003dfeSmillertrequire Exporter; 1343003dfeSmillertour @EXPORT_OK = qw( 1443003dfeSmillert fieldhash fieldhashes 1543003dfeSmillert 1643003dfeSmillert all_keys 1743003dfeSmillert lock_keys unlock_keys 1843003dfeSmillert lock_value unlock_value 1943003dfeSmillert lock_hash unlock_hash 2091f110e0Safresh1 lock_keys_plus 2191f110e0Safresh1 hash_locked hash_unlocked 2291f110e0Safresh1 hashref_locked hashref_unlocked 2343003dfeSmillert hidden_keys legal_keys 2443003dfeSmillert 2543003dfeSmillert lock_ref_keys unlock_ref_keys 2643003dfeSmillert lock_ref_value unlock_ref_value 2743003dfeSmillert lock_hashref unlock_hashref 2891f110e0Safresh1 lock_ref_keys_plus 2943003dfeSmillert hidden_ref_keys legal_ref_keys 3043003dfeSmillert 3191f110e0Safresh1 hash_seed hash_value hv_store 32b8851fccSafresh1 bucket_stats bucket_stats_formatted bucket_info bucket_array 3391f110e0Safresh1 lock_hash_recurse unlock_hash_recurse 34b8851fccSafresh1 lock_hashref_recurse unlock_hashref_recurse 3543003dfeSmillert 3691f110e0Safresh1 hash_traversal_mask 379f11ffb7Safresh1 389f11ffb7Safresh1 bucket_ratio 399f11ffb7Safresh1 used_buckets 409f11ffb7Safresh1 num_buckets 4143003dfeSmillert ); 429f11ffb7Safresh1BEGIN { 439f11ffb7Safresh1 # make sure all our XS routines are available early so their prototypes 449f11ffb7Safresh1 # are correctly applied in the following code. 45*3d61058aSafresh1 our $VERSION = '0.32'; 46898184e3Ssthen require XSLoader; 47898184e3Ssthen XSLoader::load(); 489f11ffb7Safresh1} 4943003dfeSmillert 5043003dfeSmillertsub import { 5143003dfeSmillert my $class = shift; 5243003dfeSmillert if ( grep /fieldhash/, @_ ) { 5343003dfeSmillert require Hash::Util::FieldHash; 5443003dfeSmillert Hash::Util::FieldHash->import(':all'); # for re-export 5543003dfeSmillert } 5643003dfeSmillert unshift @_, $class; 5743003dfeSmillert goto &Exporter::import; 5843003dfeSmillert} 5943003dfeSmillert 6043003dfeSmillert 6143003dfeSmillert=head1 NAME 6243003dfeSmillert 6343003dfeSmillertHash::Util - A selection of general-utility hash subroutines 6443003dfeSmillert 6543003dfeSmillert=head1 SYNOPSIS 6643003dfeSmillert 6743003dfeSmillert # Restricted hashes 6843003dfeSmillert 6943003dfeSmillert use Hash::Util qw( 7091f110e0Safresh1 fieldhash fieldhashes 7191f110e0Safresh1 7291f110e0Safresh1 all_keys 7343003dfeSmillert lock_keys unlock_keys 7443003dfeSmillert lock_value unlock_value 7543003dfeSmillert lock_hash unlock_hash 7691f110e0Safresh1 lock_keys_plus 7791f110e0Safresh1 hash_locked hash_unlocked 7891f110e0Safresh1 hashref_locked hashref_unlocked 7943003dfeSmillert hidden_keys legal_keys 8091f110e0Safresh1 8191f110e0Safresh1 lock_ref_keys unlock_ref_keys 8291f110e0Safresh1 lock_ref_value unlock_ref_value 8391f110e0Safresh1 lock_hashref unlock_hashref 8491f110e0Safresh1 lock_ref_keys_plus 8591f110e0Safresh1 hidden_ref_keys legal_ref_keys 8691f110e0Safresh1 8791f110e0Safresh1 hash_seed hash_value hv_store 8891f110e0Safresh1 bucket_stats bucket_info bucket_array 8991f110e0Safresh1 lock_hash_recurse unlock_hash_recurse 90b8851fccSafresh1 lock_hashref_recurse unlock_hashref_recurse 9191f110e0Safresh1 9291f110e0Safresh1 hash_traversal_mask 9343003dfeSmillert ); 9443003dfeSmillert 95e0680481Safresh1 my %hash = (foo => 42, bar => 23); 9643003dfeSmillert # Ways to restrict a hash 9743003dfeSmillert lock_keys(%hash); 9843003dfeSmillert lock_keys(%hash, @keyset); 9943003dfeSmillert lock_keys_plus(%hash, @additional_keys); 10043003dfeSmillert 10143003dfeSmillert # Ways to inspect the properties of a restricted hash 10243003dfeSmillert my @legal = legal_keys(%hash); 10343003dfeSmillert my @hidden = hidden_keys(%hash); 10443003dfeSmillert my $ref = all_keys(%hash,@keys,@hidden); 10543003dfeSmillert my $is_locked = hash_locked(%hash); 10643003dfeSmillert 10743003dfeSmillert # Remove restrictions on the hash 10843003dfeSmillert unlock_keys(%hash); 10943003dfeSmillert 11043003dfeSmillert # Lock individual values in a hash 11143003dfeSmillert lock_value (%hash, 'foo'); 11243003dfeSmillert unlock_value(%hash, 'foo'); 11343003dfeSmillert 11443003dfeSmillert # Ways to change the restrictions on both keys and values 11543003dfeSmillert lock_hash (%hash); 11643003dfeSmillert unlock_hash(%hash); 11743003dfeSmillert 11856d68f1eSafresh1 my $hashes_are_randomised = hash_seed() !~ /^\0+$/; 11943003dfeSmillert 12091f110e0Safresh1 my $int_hash_value = hash_value( 'string' ); 12191f110e0Safresh1 12291f110e0Safresh1 my $mask= hash_traversal_mask(%hash); 12391f110e0Safresh1 12491f110e0Safresh1 hash_traversal_mask(%hash,1234); 12591f110e0Safresh1 12643003dfeSmillert=head1 DESCRIPTION 12743003dfeSmillert 12843003dfeSmillertC<Hash::Util> and C<Hash::Util::FieldHash> contain special functions 12943003dfeSmillertfor manipulating hashes that don't really warrant a keyword. 13043003dfeSmillert 13143003dfeSmillertC<Hash::Util> contains a set of functions that support 13243003dfeSmillertL<restricted hashes|/"Restricted hashes">. These are described in 13343003dfeSmillertthis document. C<Hash::Util::FieldHash> contains an (unrelated) 13443003dfeSmillertset of functions that support the use of hashes in 13543003dfeSmillertI<inside-out classes>, described in L<Hash::Util::FieldHash>. 13643003dfeSmillert 13743003dfeSmillertBy default C<Hash::Util> does not export anything. 13843003dfeSmillert 13943003dfeSmillert=head2 Restricted hashes 14043003dfeSmillert 14143003dfeSmillert5.8.0 introduces the ability to restrict a hash to a certain set of 14243003dfeSmillertkeys. No keys outside of this set can be added. It also introduces 14343003dfeSmillertthe ability to lock an individual key so it cannot be deleted and the 14443003dfeSmillertability to ensure that an individual value cannot be changed. 14543003dfeSmillert 14643003dfeSmillertThis is intended to largely replace the deprecated pseudo-hashes. 14743003dfeSmillert 14843003dfeSmillert=over 4 14943003dfeSmillert 15043003dfeSmillert=item B<lock_keys> 15143003dfeSmillert 15243003dfeSmillert=item B<unlock_keys> 15343003dfeSmillert 15443003dfeSmillert lock_keys(%hash); 15543003dfeSmillert lock_keys(%hash, @keys); 15643003dfeSmillert 15743003dfeSmillertRestricts the given %hash's set of keys to @keys. If @keys is not 15843003dfeSmillertgiven it restricts it to its current keyset. No more keys can be 15943003dfeSmillertadded. delete() and exists() will still work, but will not alter 16043003dfeSmillertthe set of allowed keys. B<Note>: the current implementation prevents 16143003dfeSmillertthe hash from being bless()ed while it is in a locked state. Any attempt 16243003dfeSmillertto do so will raise an exception. Of course you can still bless() 16343003dfeSmillertthe hash before you call lock_keys() so this shouldn't be a problem. 16443003dfeSmillert 16543003dfeSmillert unlock_keys(%hash); 16643003dfeSmillert 16743003dfeSmillertRemoves the restriction on the %hash's keyset. 16843003dfeSmillert 16991f110e0Safresh1B<Note> that if any of the values of the hash have been locked they will not 17091f110e0Safresh1be unlocked after this sub executes. 17143003dfeSmillert 17243003dfeSmillertBoth routines return a reference to the hash operated on. 17343003dfeSmillert 17443003dfeSmillert=cut 17543003dfeSmillert 17643003dfeSmillertsub lock_ref_keys { 17743003dfeSmillert my($hash, @keys) = @_; 17843003dfeSmillert 1799f11ffb7Safresh1 _clear_placeholders(%$hash); 18043003dfeSmillert if( @keys ) { 18143003dfeSmillert my %keys = map { ($_ => 1) } @keys; 18243003dfeSmillert my %original_keys = map { ($_ => 1) } keys %$hash; 18343003dfeSmillert foreach my $k (keys %original_keys) { 18443003dfeSmillert croak "Hash has key '$k' which is not in the new key set" 18543003dfeSmillert unless $keys{$k}; 18643003dfeSmillert } 18743003dfeSmillert 18843003dfeSmillert foreach my $k (@keys) { 18943003dfeSmillert $hash->{$k} = undef unless exists $hash->{$k}; 19043003dfeSmillert } 19143003dfeSmillert Internals::SvREADONLY %$hash, 1; 19243003dfeSmillert 19343003dfeSmillert foreach my $k (@keys) { 19443003dfeSmillert delete $hash->{$k} unless $original_keys{$k}; 19543003dfeSmillert } 19643003dfeSmillert } 19743003dfeSmillert else { 19843003dfeSmillert Internals::SvREADONLY %$hash, 1; 19943003dfeSmillert } 20043003dfeSmillert 20143003dfeSmillert return $hash; 20243003dfeSmillert} 20343003dfeSmillert 20443003dfeSmillertsub unlock_ref_keys { 20543003dfeSmillert my $hash = shift; 20643003dfeSmillert 20743003dfeSmillert Internals::SvREADONLY %$hash, 0; 20843003dfeSmillert return $hash; 20943003dfeSmillert} 21043003dfeSmillert 21143003dfeSmillertsub lock_keys (\%;@) { lock_ref_keys(@_) } 21243003dfeSmillertsub unlock_keys (\%) { unlock_ref_keys(@_) } 21343003dfeSmillert 2149f11ffb7Safresh1#=item B<_clear_placeholders> 2159f11ffb7Safresh1# 2169f11ffb7Safresh1# This function removes any placeholder keys from a hash. See Perl_hv_clear_placeholders() 2179f11ffb7Safresh1# in hv.c for what it does exactly. It is currently exposed as XS by universal.c and 2189f11ffb7Safresh1# injected into the Hash::Util namespace. 2199f11ffb7Safresh1# 2209f11ffb7Safresh1# It is not intended for use outside of this module, and may be changed 2219f11ffb7Safresh1# or removed without notice or deprecation cycle. 2229f11ffb7Safresh1# 2239f11ffb7Safresh1#=cut 2249f11ffb7Safresh1# 2259f11ffb7Safresh1# sub _clear_placeholders {} # just in case someone searches... 2269f11ffb7Safresh1 22743003dfeSmillert=item B<lock_keys_plus> 22843003dfeSmillert 22943003dfeSmillert lock_keys_plus(%hash,@additional_keys) 23043003dfeSmillert 23143003dfeSmillertSimilar to C<lock_keys()>, with the difference being that the optional key list 23243003dfeSmillertspecifies keys that may or may not be already in the hash. Essentially this is 23343003dfeSmillertan easier way to say 23443003dfeSmillert 23543003dfeSmillert lock_keys(%hash,@additional_keys,keys %hash); 23643003dfeSmillert 23743003dfeSmillertReturns a reference to %hash 23843003dfeSmillert 23943003dfeSmillert=cut 24043003dfeSmillert 24143003dfeSmillert 24243003dfeSmillertsub lock_ref_keys_plus { 24343003dfeSmillert my ($hash,@keys) = @_; 24443003dfeSmillert my @delete; 2459f11ffb7Safresh1 _clear_placeholders(%$hash); 24643003dfeSmillert foreach my $key (@keys) { 24743003dfeSmillert unless (exists($hash->{$key})) { 24843003dfeSmillert $hash->{$key}=undef; 24943003dfeSmillert push @delete,$key; 25043003dfeSmillert } 25143003dfeSmillert } 25243003dfeSmillert Internals::SvREADONLY(%$hash,1); 25343003dfeSmillert delete @{$hash}{@delete}; 25443003dfeSmillert return $hash 25543003dfeSmillert} 25643003dfeSmillert 25743003dfeSmillertsub lock_keys_plus(\%;@) { lock_ref_keys_plus(@_) } 25843003dfeSmillert 25943003dfeSmillert 26043003dfeSmillert=item B<lock_value> 26143003dfeSmillert 26243003dfeSmillert=item B<unlock_value> 26343003dfeSmillert 26443003dfeSmillert lock_value (%hash, $key); 26543003dfeSmillert unlock_value(%hash, $key); 26643003dfeSmillert 26743003dfeSmillertLocks and unlocks the value for an individual key of a hash. The value of a 26843003dfeSmillertlocked key cannot be changed. 26943003dfeSmillert 27043003dfeSmillertUnless %hash has already been locked the key/value could be deleted 27143003dfeSmillertregardless of this setting. 27243003dfeSmillert 27343003dfeSmillertReturns a reference to the %hash. 27443003dfeSmillert 27543003dfeSmillert=cut 27643003dfeSmillert 27743003dfeSmillertsub lock_ref_value { 27843003dfeSmillert my($hash, $key) = @_; 27943003dfeSmillert # I'm doubtful about this warning, as it seems not to be true. 28043003dfeSmillert # Marking a value in the hash as RO is useful, regardless 28143003dfeSmillert # of the status of the hash itself. 28243003dfeSmillert carp "Cannot usefully lock values in an unlocked hash" 28343003dfeSmillert if !Internals::SvREADONLY(%$hash) && warnings::enabled; 28443003dfeSmillert Internals::SvREADONLY $hash->{$key}, 1; 28543003dfeSmillert return $hash 28643003dfeSmillert} 28743003dfeSmillert 28843003dfeSmillertsub unlock_ref_value { 28943003dfeSmillert my($hash, $key) = @_; 29043003dfeSmillert Internals::SvREADONLY $hash->{$key}, 0; 29143003dfeSmillert return $hash 29243003dfeSmillert} 29343003dfeSmillert 29443003dfeSmillertsub lock_value (\%$) { lock_ref_value(@_) } 29543003dfeSmillertsub unlock_value (\%$) { unlock_ref_value(@_) } 29643003dfeSmillert 29743003dfeSmillert 29843003dfeSmillert=item B<lock_hash> 29943003dfeSmillert 30043003dfeSmillert=item B<unlock_hash> 30143003dfeSmillert 30243003dfeSmillert lock_hash(%hash); 30343003dfeSmillert 30443003dfeSmillertlock_hash() locks an entire hash, making all keys and values read-only. 30543003dfeSmillertNo value can be changed, no keys can be added or deleted. 30643003dfeSmillert 30743003dfeSmillert unlock_hash(%hash); 30843003dfeSmillert 30943003dfeSmillertunlock_hash() does the opposite of lock_hash(). All keys and values 31043003dfeSmillertare made writable. All values can be changed and keys can be added 31143003dfeSmillertand deleted. 31243003dfeSmillert 31343003dfeSmillertReturns a reference to the %hash. 31443003dfeSmillert 31543003dfeSmillert=cut 31643003dfeSmillert 31743003dfeSmillertsub lock_hashref { 31843003dfeSmillert my $hash = shift; 31943003dfeSmillert 32043003dfeSmillert lock_ref_keys($hash); 32143003dfeSmillert 32243003dfeSmillert foreach my $value (values %$hash) { 32343003dfeSmillert Internals::SvREADONLY($value,1); 32443003dfeSmillert } 32543003dfeSmillert 32643003dfeSmillert return $hash; 32743003dfeSmillert} 32843003dfeSmillert 32943003dfeSmillertsub unlock_hashref { 33043003dfeSmillert my $hash = shift; 33143003dfeSmillert 33243003dfeSmillert foreach my $value (values %$hash) { 33343003dfeSmillert Internals::SvREADONLY($value, 0); 33443003dfeSmillert } 33543003dfeSmillert 33643003dfeSmillert unlock_ref_keys($hash); 33743003dfeSmillert 33843003dfeSmillert return $hash; 33943003dfeSmillert} 34043003dfeSmillert 34143003dfeSmillertsub lock_hash (\%) { lock_hashref(@_) } 34243003dfeSmillertsub unlock_hash (\%) { unlock_hashref(@_) } 34343003dfeSmillert 34443003dfeSmillert=item B<lock_hash_recurse> 34543003dfeSmillert 34643003dfeSmillert=item B<unlock_hash_recurse> 34743003dfeSmillert 34843003dfeSmillert lock_hash_recurse(%hash); 34943003dfeSmillert 35043003dfeSmillertlock_hash() locks an entire hash and any hashes it references recursively, 35143003dfeSmillertmaking all keys and values read-only. No value can be changed, no keys can 35243003dfeSmillertbe added or deleted. 35343003dfeSmillert 35491f110e0Safresh1This method B<only> recurses into hashes that are referenced by another hash. 35591f110e0Safresh1Thus a Hash of Hashes (HoH) will all be restricted, but a Hash of Arrays of 35691f110e0Safresh1Hashes (HoAoH) will only have the top hash restricted. 35743003dfeSmillert 35843003dfeSmillert unlock_hash_recurse(%hash); 35943003dfeSmillert 36043003dfeSmillertunlock_hash_recurse() does the opposite of lock_hash_recurse(). All keys and 36143003dfeSmillertvalues are made writable. All values can be changed and keys can be added 36243003dfeSmillertand deleted. Identical recursion restrictions apply as to lock_hash_recurse(). 36343003dfeSmillert 36443003dfeSmillertReturns a reference to the %hash. 36543003dfeSmillert 36643003dfeSmillert=cut 36743003dfeSmillert 36843003dfeSmillertsub lock_hashref_recurse { 36943003dfeSmillert my $hash = shift; 37043003dfeSmillert 37143003dfeSmillert lock_ref_keys($hash); 37243003dfeSmillert foreach my $value (values %$hash) { 373898184e3Ssthen my $type = reftype($value); 374898184e3Ssthen if (defined($type) and $type eq 'HASH') { 37543003dfeSmillert lock_hashref_recurse($value); 37643003dfeSmillert } 37743003dfeSmillert Internals::SvREADONLY($value,1); 37843003dfeSmillert } 37943003dfeSmillert return $hash 38043003dfeSmillert} 38143003dfeSmillert 38243003dfeSmillertsub unlock_hashref_recurse { 38343003dfeSmillert my $hash = shift; 38443003dfeSmillert 38543003dfeSmillert foreach my $value (values %$hash) { 386898184e3Ssthen my $type = reftype($value); 387898184e3Ssthen if (defined($type) and $type eq 'HASH') { 38843003dfeSmillert unlock_hashref_recurse($value); 38943003dfeSmillert } 390b8851fccSafresh1 Internals::SvREADONLY($value,0); 39143003dfeSmillert } 39243003dfeSmillert unlock_ref_keys($hash); 39343003dfeSmillert return $hash; 39443003dfeSmillert} 39543003dfeSmillert 39643003dfeSmillertsub lock_hash_recurse (\%) { lock_hashref_recurse(@_) } 39743003dfeSmillertsub unlock_hash_recurse (\%) { unlock_hashref_recurse(@_) } 39843003dfeSmillert 39991f110e0Safresh1=item B<hashref_locked> 40091f110e0Safresh1 40191f110e0Safresh1=item B<hash_locked> 40291f110e0Safresh1 40391f110e0Safresh1 hashref_locked(\%hash) and print "Hash is locked!\n"; 40491f110e0Safresh1 hash_locked(%hash) and print "Hash is locked!\n"; 40591f110e0Safresh1 40691f110e0Safresh1Returns true if the hash and its keys are locked. 40791f110e0Safresh1 40891f110e0Safresh1=cut 40991f110e0Safresh1 41091f110e0Safresh1sub hashref_locked { 41191f110e0Safresh1 my $hash=shift; 41291f110e0Safresh1 Internals::SvREADONLY(%$hash); 41391f110e0Safresh1} 41491f110e0Safresh1 41591f110e0Safresh1sub hash_locked(\%) { hashref_locked(@_) } 41691f110e0Safresh1 41791f110e0Safresh1=item B<hashref_unlocked> 41843003dfeSmillert 41943003dfeSmillert=item B<hash_unlocked> 42043003dfeSmillert 42191f110e0Safresh1 hashref_unlocked(\%hash) and print "Hash is unlocked!\n"; 42243003dfeSmillert hash_unlocked(%hash) and print "Hash is unlocked!\n"; 42343003dfeSmillert 42443003dfeSmillertReturns true if the hash and its keys are unlocked. 42543003dfeSmillert 42643003dfeSmillert=cut 42743003dfeSmillert 42843003dfeSmillertsub hashref_unlocked { 42943003dfeSmillert my $hash=shift; 43091f110e0Safresh1 !Internals::SvREADONLY(%$hash); 43143003dfeSmillert} 43243003dfeSmillert 43343003dfeSmillertsub hash_unlocked(\%) { hashref_unlocked(@_) } 43443003dfeSmillert 43543003dfeSmillert=for demerphqs_editor 43643003dfeSmillertsub legal_ref_keys{} 43743003dfeSmillertsub hidden_ref_keys{} 43843003dfeSmillertsub all_keys{} 43943003dfeSmillert 44043003dfeSmillert=cut 44143003dfeSmillert 44243003dfeSmillertsub legal_keys(\%) { legal_ref_keys(@_) } 44343003dfeSmillertsub hidden_keys(\%){ hidden_ref_keys(@_) } 44443003dfeSmillert 44543003dfeSmillert=item B<legal_keys> 44643003dfeSmillert 44743003dfeSmillert my @keys = legal_keys(%hash); 44843003dfeSmillert 44943003dfeSmillertReturns the list of the keys that are legal in a restricted hash. 45043003dfeSmillertIn the case of an unrestricted hash this is identical to calling 45143003dfeSmillertkeys(%hash). 45243003dfeSmillert 45343003dfeSmillert=item B<hidden_keys> 45443003dfeSmillert 45543003dfeSmillert my @keys = hidden_keys(%hash); 45643003dfeSmillert 45743003dfeSmillertReturns the list of the keys that are legal in a restricted hash but 45843003dfeSmillertdo not have a value associated to them. Thus if 'foo' is a 45943003dfeSmillert"hidden" key of the %hash it will return false for both C<defined> 46043003dfeSmillertand C<exists> tests. 46143003dfeSmillert 46243003dfeSmillertIn the case of an unrestricted hash this will return an empty list. 46343003dfeSmillert 46443003dfeSmillertB<NOTE> this is an experimental feature that is heavily dependent 46543003dfeSmillerton the current implementation of restricted hashes. Should the 46643003dfeSmillertimplementation change, this routine may become meaningless, in which 46743003dfeSmillertcase it will return an empty list. 46843003dfeSmillert 46943003dfeSmillert=item B<all_keys> 47043003dfeSmillert 47143003dfeSmillert all_keys(%hash,@keys,@hidden); 47243003dfeSmillert 47343003dfeSmillertPopulates the arrays @keys with the all the keys that would pass 47443003dfeSmillertan C<exists> tests, and populates @hidden with the remaining legal 47543003dfeSmillertkeys that have not been utilized. 47643003dfeSmillert 47743003dfeSmillertReturns a reference to the hash. 47843003dfeSmillert 47943003dfeSmillertIn the case of an unrestricted hash this will be equivalent to 48043003dfeSmillert 48143003dfeSmillert $ref = do { 48243003dfeSmillert @keys = keys %hash; 48343003dfeSmillert @hidden = (); 48443003dfeSmillert \%hash 48543003dfeSmillert }; 48643003dfeSmillert 48743003dfeSmillertB<NOTE> this is an experimental feature that is heavily dependent 48843003dfeSmillerton the current implementation of restricted hashes. Should the 48943003dfeSmillertimplementation change this routine may become meaningless in which 49043003dfeSmillertcase it will behave identically to how it would behave on an 49143003dfeSmillertunrestricted hash. 49243003dfeSmillert 49343003dfeSmillert=item B<hash_seed> 49443003dfeSmillert 49543003dfeSmillert my $hash_seed = hash_seed(); 49643003dfeSmillert 49791f110e0Safresh1hash_seed() returns the seed bytes used to randomise hash ordering. 49843003dfeSmillert 49943003dfeSmillertB<Note that the hash seed is sensitive information>: by knowing it one 50043003dfeSmillertcan craft a denial-of-service attack against Perl code, even remotely, 50143003dfeSmillertsee L<perlsec/"Algorithmic Complexity Attacks"> for more information. 50243003dfeSmillertB<Do not disclose the hash seed> to people who don't need to know it. 50343003dfeSmillertSee also L<perlrun/PERL_HASH_SEED_DEBUG>. 50443003dfeSmillert 50591f110e0Safresh1Prior to Perl 5.17.6 this function returned a UV, it now returns a string, 50691f110e0Safresh1which may be of nearly any size as determined by the hash function your 50791f110e0Safresh1Perl has been built with. Possible sizes may be but are not limited to 50891f110e0Safresh14 bytes (for most hash algorithms) and 16 bytes (for siphash). 50991f110e0Safresh1 51091f110e0Safresh1=item B<hash_value> 51191f110e0Safresh1 51291f110e0Safresh1 my $hash_value = hash_value($string); 513eac174f2Safresh1 my $hash_value = hash_value($string, $seed); 51491f110e0Safresh1 515eac174f2Safresh1C<hash_value($string)> 516eac174f2Safresh1returns 517eac174f2Safresh1the current perl's internal hash value for a given string. 518eac174f2Safresh1C<hash_value($string, $seed)> 519eac174f2Safresh1returns the hash value as if computed with a different seed. 520eac174f2Safresh1If the custom seed is too short, the function errors out. 521eac174f2Safresh1The minimum length of the seed is implementation-dependent. 52291f110e0Safresh1 523eac174f2Safresh1Returns a 32-bit integer 524eac174f2Safresh1representing the hash value of the string passed in. 525eac174f2Safresh1The 1-parameter value is only reliable 526eac174f2Safresh1for the lifetime of the process. 527eac174f2Safresh1It may be different 528eac174f2Safresh1depending on invocation, environment variables, perl version, 52991f110e0Safresh1architectures, and build options. 53091f110e0Safresh1 53191f110e0Safresh1B<Note that the hash value of a given string is sensitive information>: 53291f110e0Safresh1by knowing it one can deduce the hash seed which in turn can allow one to 53391f110e0Safresh1craft a denial-of-service attack against Perl code, even remotely, 53491f110e0Safresh1see L<perlsec/"Algorithmic Complexity Attacks"> for more information. 53591f110e0Safresh1B<Do not disclose the hash value of a string> to people who don't need to 53691f110e0Safresh1know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>. 53791f110e0Safresh1 53891f110e0Safresh1=item B<bucket_info> 53991f110e0Safresh1 54091f110e0Safresh1Return a set of basic information about a hash. 54191f110e0Safresh1 54291f110e0Safresh1 my ($keys, $buckets, $used, @length_counts)= bucket_info($hash); 54391f110e0Safresh1 54491f110e0Safresh1Fields are as follows: 54591f110e0Safresh1 54691f110e0Safresh1 0: Number of keys in the hash 54791f110e0Safresh1 1: Number of buckets in the hash 54891f110e0Safresh1 2: Number of used buckets in the hash 54991f110e0Safresh1 rest : list of counts, Kth element is the number of buckets 55091f110e0Safresh1 with K keys in it. 55191f110e0Safresh1 55291f110e0Safresh1See also bucket_stats() and bucket_array(). 55391f110e0Safresh1 55491f110e0Safresh1=item B<bucket_stats> 55591f110e0Safresh1 55691f110e0Safresh1Returns a list of statistics about a hash. 55791f110e0Safresh1 558b8851fccSafresh1 my ($keys, $buckets, $used, $quality, $utilization_ratio, 559b8851fccSafresh1 $collision_pct, $mean, $stddev, @length_counts) 560b8851fccSafresh1 = bucket_stats($hashref); 56191f110e0Safresh1 56291f110e0Safresh1Fields are as follows: 56391f110e0Safresh1 56491f110e0Safresh1 0: Number of keys in the hash 56591f110e0Safresh1 1: Number of buckets in the hash 56691f110e0Safresh1 2: Number of used buckets in the hash 56791f110e0Safresh1 3: Hash Quality Score 56891f110e0Safresh1 4: Percent of buckets used 56991f110e0Safresh1 5: Percent of keys which are in collision 570b8851fccSafresh1 6: Mean bucket length of occupied buckets 571b8851fccSafresh1 7: Standard Deviation of bucket lengths of occupied buckets 57291f110e0Safresh1 rest : list of counts, Kth element is the number of buckets 57391f110e0Safresh1 with K keys in it. 57491f110e0Safresh1 57591f110e0Safresh1See also bucket_info() and bucket_array(). 57691f110e0Safresh1 57791f110e0Safresh1Note that Hash Quality Score would be 1 for an ideal hash, numbers 57891f110e0Safresh1close to and below 1 indicate good hashing, and number significantly 57991f110e0Safresh1above indicate a poor score. In practice it should be around 0.95 to 1.05. 58091f110e0Safresh1It is defined as: 58191f110e0Safresh1 58291f110e0Safresh1 $score= sum( $count[$length] * ($length * ($length + 1) / 2) ) 58391f110e0Safresh1 / 58491f110e0Safresh1 ( ( $keys / 2 * $buckets ) * 58591f110e0Safresh1 ( $keys + ( 2 * $buckets ) - 1 ) ) 58691f110e0Safresh1 58791f110e0Safresh1The formula is from the Red Dragon book (reformulated to use the data available) 58891f110e0Safresh1and is documented at L<http://www.strchr.com/hash_functions> 58991f110e0Safresh1 59091f110e0Safresh1=item B<bucket_array> 59191f110e0Safresh1 59291f110e0Safresh1 my $array= bucket_array(\%hash); 59391f110e0Safresh1 59491f110e0Safresh1Returns a packed representation of the bucket array associated with a hash. Each element 59591f110e0Safresh1of the array is either an integer K, in which case it represents K empty buckets, or 59691f110e0Safresh1a reference to another array which contains the keys that are in that bucket. 59791f110e0Safresh1 59891f110e0Safresh1B<Note that the information returned by bucket_array is sensitive information>: 59991f110e0Safresh1by knowing it one can directly attack perl's hash function which in turn may allow 60091f110e0Safresh1one to craft a denial-of-service attack against Perl code, even remotely, 60191f110e0Safresh1see L<perlsec/"Algorithmic Complexity Attacks"> for more information. 60291f110e0Safresh1B<Do not disclose the output of this function> to people who don't need to 60391f110e0Safresh1know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>. This function is provided strictly 60491f110e0Safresh1for debugging and diagnostics purposes only, it is hard to imagine a reason why it 60591f110e0Safresh1would be used in production code. 60691f110e0Safresh1 60743003dfeSmillert=cut 60843003dfeSmillert 60991f110e0Safresh1 61091f110e0Safresh1sub bucket_stats { 61191f110e0Safresh1 my ($hash) = @_; 61291f110e0Safresh1 my ($keys, $buckets, $used, @length_counts) = bucket_info($hash); 61391f110e0Safresh1 my $sum; 61491f110e0Safresh1 my $score; 615b8851fccSafresh1 for (1 .. $#length_counts) { 61691f110e0Safresh1 $sum += ($length_counts[$_] * $_); 61791f110e0Safresh1 $score += $length_counts[$_] * ( $_ * ($_ + 1 ) / 2 ); 61891f110e0Safresh1 } 61991f110e0Safresh1 $score = $score / 62091f110e0Safresh1 (( $keys / (2 * $buckets )) * ( $keys + ( 2 * $buckets ) - 1 )) 62191f110e0Safresh1 if $keys; 622b8851fccSafresh1 my ($mean, $stddev)= (0, 0); 623b8851fccSafresh1 if ($used) { 624b8851fccSafresh1 $mean= $sum / $used; 62591f110e0Safresh1 $sum= 0; 626b8851fccSafresh1 $sum += ($length_counts[$_] * (($_-$mean)**2)) for 1 .. $#length_counts; 62791f110e0Safresh1 628b8851fccSafresh1 $stddev= sqrt($sum/$used); 629b8851fccSafresh1 } 63091f110e0Safresh1 return $keys, $buckets, $used, $keys ? ($score, $used/$buckets, ($keys-$used)/$keys, $mean, $stddev, @length_counts) : (); 63143003dfeSmillert} 63243003dfeSmillert 633b8851fccSafresh1=item B<bucket_stats_formatted> 634b8851fccSafresh1 635b8851fccSafresh1 print bucket_stats_formatted($hashref); 636b8851fccSafresh1 637b8851fccSafresh1Return a formatted report of the information returned by bucket_stats(). 638b8851fccSafresh1An example report looks like this: 639b8851fccSafresh1 640b8851fccSafresh1 Keys: 50 Buckets: 33/64 Quality-Score: 1.01 (Good) 641b8851fccSafresh1 Utilized Buckets: 51.56% Optimal: 78.12% Keys In Collision: 34.00% 642b8851fccSafresh1 Chain Length - mean: 1.52 stddev: 0.66 643b8851fccSafresh1 Buckets 64 [0000000000000000000000000000000111111111111111111122222222222333] 644b8851fccSafresh1 Len 0 Pct: 48.44 [###############################] 645b8851fccSafresh1 Len 1 Pct: 29.69 [###################] 646b8851fccSafresh1 Len 2 Pct: 17.19 [###########] 647b8851fccSafresh1 Len 3 Pct: 4.69 [###] 648b8851fccSafresh1 Keys 50 [11111111111111111111111111111111122222222222222333] 649b8851fccSafresh1 Pos 1 Pct: 66.00 [#################################] 650b8851fccSafresh1 Pos 2 Pct: 28.00 [##############] 651b8851fccSafresh1 Pos 3 Pct: 6.00 [###] 652b8851fccSafresh1 653b8851fccSafresh1The first set of stats gives some summary statistical information, 654b8851fccSafresh1including the quality score translated into "Good", "Poor" and "Bad", 655b8851fccSafresh1(score<=1.05, score<=1.2, score>1.2). See the documentation in 656b8851fccSafresh1bucket_stats() for more details. 657b8851fccSafresh1 658b8851fccSafresh1The two sets of barcharts give stats and a visual indication of performance 659b8851fccSafresh1of the hash. 660b8851fccSafresh1 661b8851fccSafresh1The first gives data on bucket chain lengths and provides insight on how 662b8851fccSafresh1much work a fetch *miss* will take. In this case we have to inspect every item 663b8851fccSafresh1in a bucket before we can be sure the item is not in the list. The performance 664b8851fccSafresh1for an insert is equivalent to this case, as is a delete where the item 665b8851fccSafresh1is not in the hash. 666b8851fccSafresh1 667b8851fccSafresh1The second gives data on how many keys are at each depth in the chain, and 668b8851fccSafresh1gives an idea of how much work a fetch *hit* will take. The performance for 669b8851fccSafresh1an update or delete of an item in the hash is equivalent to this case. 670b8851fccSafresh1 671b8851fccSafresh1Note that these statistics are summary only. Actual performance will depend 672b8851fccSafresh1on real hit/miss ratios accessing the hash. If you are concerned by hit ratios 673b8851fccSafresh1you are recommended to "oversize" your hash by using something like: 674b8851fccSafresh1 675b8851fccSafresh1 keys(%hash)= keys(%hash) << $k; 676b8851fccSafresh1 677b8851fccSafresh1With $k chosen carefully, and likely to be a small number like 1 or 2. In 678b8851fccSafresh1theory the larger the bucket array the less chance of collision. 679b8851fccSafresh1 680b8851fccSafresh1=cut 681b8851fccSafresh1 682b8851fccSafresh1 683b8851fccSafresh1sub _bucket_stats_formatted_bars { 684b8851fccSafresh1 my ($total, $ary, $start_idx, $title, $row_title)= @_; 685b8851fccSafresh1 686b8851fccSafresh1 my $return = ""; 687b8851fccSafresh1 my $max_width= $total > 64 ? 64 : $total; 688b8851fccSafresh1 my $bar_width= $max_width / $total; 689b8851fccSafresh1 690b8851fccSafresh1 my $str= ""; 691b8851fccSafresh1 if ( @$ary < 10) { 692b8851fccSafresh1 for my $idx ($start_idx .. $#$ary) { 693b8851fccSafresh1 $str .= $idx x sprintf("%.0f", ($ary->[$idx] * $bar_width)); 694b8851fccSafresh1 } 695b8851fccSafresh1 } else { 696b8851fccSafresh1 $str= "-" x $max_width; 697b8851fccSafresh1 } 698b8851fccSafresh1 $return .= sprintf "%-7s %6d [%s]\n",$title, $total, $str; 699b8851fccSafresh1 700b8851fccSafresh1 foreach my $idx ($start_idx .. $#$ary) { 701b8851fccSafresh1 $return .= sprintf "%-.3s %3d %6.2f%% %6d [%s]\n", 702b8851fccSafresh1 $row_title, 703b8851fccSafresh1 $idx, 704b8851fccSafresh1 $ary->[$idx] / $total * 100, 705b8851fccSafresh1 $ary->[$idx], 706b8851fccSafresh1 "#" x sprintf("%.0f", ($ary->[$idx] * $bar_width)), 707b8851fccSafresh1 ; 708b8851fccSafresh1 } 709b8851fccSafresh1 return $return; 710b8851fccSafresh1} 711b8851fccSafresh1 712b8851fccSafresh1sub bucket_stats_formatted { 713b8851fccSafresh1 my ($hashref)= @_; 714b8851fccSafresh1 my ($keys, $buckets, $used, $score, $utilization_ratio, $collision_pct, 715b8851fccSafresh1 $mean, $stddev, @length_counts) = bucket_stats($hashref); 716b8851fccSafresh1 717b8851fccSafresh1 my $return= sprintf "Keys: %d Buckets: %d/%d Quality-Score: %.2f (%s)\n" 718b8851fccSafresh1 . "Utilized Buckets: %.2f%% Optimal: %.2f%% Keys In Collision: %.2f%%\n" 719b8851fccSafresh1 . "Chain Length - mean: %.2f stddev: %.2f\n", 720b8851fccSafresh1 $keys, $used, $buckets, $score, $score <= 1.05 ? "Good" : $score < 1.2 ? "Poor" : "Bad", 721b8851fccSafresh1 $utilization_ratio * 100, 722b8851fccSafresh1 $keys/$buckets * 100, 723b8851fccSafresh1 $collision_pct * 100, 724b8851fccSafresh1 $mean, $stddev; 725b8851fccSafresh1 726b8851fccSafresh1 my @key_depth; 727b8851fccSafresh1 $key_depth[$_]= $length_counts[$_] + ( $key_depth[$_+1] || 0 ) 728b8851fccSafresh1 for reverse 1 .. $#length_counts; 729b8851fccSafresh1 730b8851fccSafresh1 if ($keys) { 731b8851fccSafresh1 $return .= _bucket_stats_formatted_bars($buckets, \@length_counts, 0, "Buckets", "Len"); 732b8851fccSafresh1 $return .= _bucket_stats_formatted_bars($keys, \@key_depth, 1, "Keys", "Pos"); 733b8851fccSafresh1 } 734b8851fccSafresh1 return $return 735b8851fccSafresh1} 736b8851fccSafresh1 73743003dfeSmillert=item B<hv_store> 73843003dfeSmillert 73943003dfeSmillert my $sv = 0; 74043003dfeSmillert hv_store(%hash,$key,$sv) or die "Failed to alias!"; 74143003dfeSmillert $hash{$key} = 1; 74243003dfeSmillert print $sv; # prints 1 74343003dfeSmillert 74443003dfeSmillertStores an alias to a variable in a hash instead of copying the value. 74543003dfeSmillert 74691f110e0Safresh1=item B<hash_traversal_mask> 74791f110e0Safresh1 74891f110e0Safresh1As of Perl 5.18 every hash has its own hash traversal order, and this order 74991f110e0Safresh1changes every time a new element is inserted into the hash. This functionality 75091f110e0Safresh1is provided by maintaining an unsigned integer mask (U32) which is xor'ed 75191f110e0Safresh1with the actual bucket id during a traversal of the hash buckets using keys(), 75291f110e0Safresh1values() or each(). 75391f110e0Safresh1 75491f110e0Safresh1You can use this subroutine to get and set the traversal mask for a specific 75591f110e0Safresh1hash. Setting the mask ensures that a given hash will produce the same key 75691f110e0Safresh1order. B<Note> that this does B<not> guarantee that B<two> hashes will produce 75791f110e0Safresh1the same key order for the same hash seed and traversal mask, items that 75891f110e0Safresh1collide into one bucket may have different orders regardless of this setting. 75991f110e0Safresh1 7609f11ffb7Safresh1=item B<bucket_ratio> 7619f11ffb7Safresh1 7629f11ffb7Safresh1This function behaves the same way that scalar(%hash) behaved prior to 7639f11ffb7Safresh1Perl 5.25. Specifically if the hash is tied, then it calls the SCALAR tied 7649f11ffb7Safresh1hash method, if untied then if the hash is empty it return 0, otherwise it 7659f11ffb7Safresh1returns a string containing the number of used buckets in the hash, 7669f11ffb7Safresh1followed by a slash, followed by the total number of buckets in the hash. 7679f11ffb7Safresh1 7689f11ffb7Safresh1 my %hash=("foo"=>1); 7699f11ffb7Safresh1 print Hash::Util::bucket_ratio(%hash); # prints "1/8" 7709f11ffb7Safresh1 7719f11ffb7Safresh1=item B<used_buckets> 7729f11ffb7Safresh1 7739f11ffb7Safresh1This function returns the count of used buckets in the hash. It is expensive 7749f11ffb7Safresh1to calculate and the value is NOT cached, so avoid use of this function 7759f11ffb7Safresh1in production code. 7769f11ffb7Safresh1 7779f11ffb7Safresh1=item B<num_buckets> 7789f11ffb7Safresh1 7799f11ffb7Safresh1This function returns the total number of buckets the hash holds, or would 7809f11ffb7Safresh1hold if the array were created. (When a hash is freshly created the array 7819f11ffb7Safresh1may not be allocated even though this value will be non-zero.) 7829f11ffb7Safresh1 78343003dfeSmillert=back 78443003dfeSmillert 785*3d61058aSafresh1=head2 Operating on references to hashes 78643003dfeSmillert 78743003dfeSmillertMost subroutines documented in this module have equivalent versions 78843003dfeSmillertthat operate on references to hashes instead of native hashes. 78943003dfeSmillertThe following is a list of these subs. They are identical except 79043003dfeSmillertin name and in that instead of taking a %hash they take a $hashref, 79143003dfeSmillertand additionally are not prototyped. 79243003dfeSmillert 79343003dfeSmillert=over 4 79443003dfeSmillert 79543003dfeSmillert=item lock_ref_keys 79643003dfeSmillert 79743003dfeSmillert=item unlock_ref_keys 79843003dfeSmillert 79943003dfeSmillert=item lock_ref_keys_plus 80043003dfeSmillert 80143003dfeSmillert=item lock_ref_value 80243003dfeSmillert 80343003dfeSmillert=item unlock_ref_value 80443003dfeSmillert 80543003dfeSmillert=item lock_hashref 80643003dfeSmillert 80743003dfeSmillert=item unlock_hashref 80843003dfeSmillert 80943003dfeSmillert=item lock_hashref_recurse 81043003dfeSmillert 81143003dfeSmillert=item unlock_hashref_recurse 81243003dfeSmillert 81343003dfeSmillert=item hash_ref_unlocked 81443003dfeSmillert 81543003dfeSmillert=item legal_ref_keys 81643003dfeSmillert 81743003dfeSmillert=item hidden_ref_keys 81843003dfeSmillert 81943003dfeSmillert=back 82043003dfeSmillert 82143003dfeSmillert=head1 CAVEATS 82243003dfeSmillert 82343003dfeSmillertNote that the trapping of the restricted operations is not atomic: 82443003dfeSmillertfor example 82543003dfeSmillert 82643003dfeSmillert eval { %hash = (illegal_key => 1) } 82743003dfeSmillert 82843003dfeSmillertleaves the C<%hash> empty rather than with its original contents. 82943003dfeSmillert 83043003dfeSmillert=head1 BUGS 83143003dfeSmillert 83243003dfeSmillertThe interface exposed by this module is very close to the current 83343003dfeSmillertimplementation of restricted hashes. Over time it is expected that 83443003dfeSmillertthis behavior will be extended and the interface abstracted further. 83543003dfeSmillert 83643003dfeSmillert=head1 AUTHOR 83743003dfeSmillert 83843003dfeSmillertMichael G Schwern <schwern@pobox.com> on top of code by Nick 83943003dfeSmillertIng-Simmons and Jeffrey Friedl. 84043003dfeSmillert 84143003dfeSmillerthv_store() is from Array::RefElem, Copyright 2000 Gisle Aas. 84243003dfeSmillert 84343003dfeSmillertAdditional code by Yves Orton. 84443003dfeSmillert 845eac174f2Safresh1Description of C<hash_value($string, $seed)> 846eac174f2Safresh1by Christopher Yeleighton <ne01026@shark.2a.pl> 847eac174f2Safresh1 84843003dfeSmillert=head1 SEE ALSO 84943003dfeSmillert 85043003dfeSmillertL<Scalar::Util>, L<List::Util> and L<perlsec/"Algorithmic Complexity Attacks">. 85143003dfeSmillert 85243003dfeSmillertL<Hash::Util::FieldHash>. 85343003dfeSmillert 85443003dfeSmillert=cut 85543003dfeSmillert 85643003dfeSmillert1; 857