xref: /openbsd-src/gnu/usr.bin/perl/ext/Hash-Util/lib/Hash/Util.pm (revision 3d61058aa5c692477b6d18acfbbdb653a9930ff9)
143003dfeSmillertpackage Hash::Util;
243003dfeSmillert
343003dfeSmillertrequire 5.007003;
443003dfeSmillertuse strict;
543003dfeSmillertuse Carp;
643003dfeSmillertuse warnings;
7898184e3Ssthenno warnings 'uninitialized';
843003dfeSmillertuse warnings::register;
9eac174f2Safresh1no warnings 'experimental::builtin';
10eac174f2Safresh1use builtin qw(reftype);
1143003dfeSmillert
1243003dfeSmillertrequire Exporter;
1343003dfeSmillertour @EXPORT_OK  = qw(
1443003dfeSmillert                     fieldhash fieldhashes
1543003dfeSmillert
1643003dfeSmillert                     all_keys
1743003dfeSmillert                     lock_keys unlock_keys
1843003dfeSmillert                     lock_value unlock_value
1943003dfeSmillert                     lock_hash unlock_hash
2091f110e0Safresh1                     lock_keys_plus
2191f110e0Safresh1                     hash_locked hash_unlocked
2291f110e0Safresh1                     hashref_locked hashref_unlocked
2343003dfeSmillert                     hidden_keys legal_keys
2443003dfeSmillert
2543003dfeSmillert                     lock_ref_keys unlock_ref_keys
2643003dfeSmillert                     lock_ref_value unlock_ref_value
2743003dfeSmillert                     lock_hashref unlock_hashref
2891f110e0Safresh1                     lock_ref_keys_plus
2943003dfeSmillert                     hidden_ref_keys legal_ref_keys
3043003dfeSmillert
3191f110e0Safresh1                     hash_seed hash_value hv_store
32b8851fccSafresh1                     bucket_stats bucket_stats_formatted bucket_info bucket_array
3391f110e0Safresh1                     lock_hash_recurse unlock_hash_recurse
34b8851fccSafresh1                     lock_hashref_recurse unlock_hashref_recurse
3543003dfeSmillert
3691f110e0Safresh1                     hash_traversal_mask
379f11ffb7Safresh1
389f11ffb7Safresh1                     bucket_ratio
399f11ffb7Safresh1                     used_buckets
409f11ffb7Safresh1                     num_buckets
4143003dfeSmillert                    );
429f11ffb7Safresh1BEGIN {
439f11ffb7Safresh1    # make sure all our XS routines are available early so their prototypes
449f11ffb7Safresh1    # are correctly applied in the following code.
45*3d61058aSafresh1    our $VERSION = '0.32';
46898184e3Ssthen    require XSLoader;
47898184e3Ssthen    XSLoader::load();
489f11ffb7Safresh1}
4943003dfeSmillert
5043003dfeSmillertsub import {
5143003dfeSmillert    my $class = shift;
5243003dfeSmillert    if ( grep /fieldhash/, @_ ) {
5343003dfeSmillert        require Hash::Util::FieldHash;
5443003dfeSmillert        Hash::Util::FieldHash->import(':all'); # for re-export
5543003dfeSmillert    }
5643003dfeSmillert    unshift @_, $class;
5743003dfeSmillert    goto &Exporter::import;
5843003dfeSmillert}
5943003dfeSmillert
6043003dfeSmillert
6143003dfeSmillert=head1 NAME
6243003dfeSmillert
6343003dfeSmillertHash::Util - A selection of general-utility hash subroutines
6443003dfeSmillert
6543003dfeSmillert=head1 SYNOPSIS
6643003dfeSmillert
6743003dfeSmillert  # Restricted hashes
6843003dfeSmillert
6943003dfeSmillert  use Hash::Util qw(
7091f110e0Safresh1                     fieldhash fieldhashes
7191f110e0Safresh1
7291f110e0Safresh1                     all_keys
7343003dfeSmillert                     lock_keys unlock_keys
7443003dfeSmillert                     lock_value unlock_value
7543003dfeSmillert                     lock_hash unlock_hash
7691f110e0Safresh1                     lock_keys_plus
7791f110e0Safresh1                     hash_locked hash_unlocked
7891f110e0Safresh1                     hashref_locked hashref_unlocked
7943003dfeSmillert                     hidden_keys legal_keys
8091f110e0Safresh1
8191f110e0Safresh1                     lock_ref_keys unlock_ref_keys
8291f110e0Safresh1                     lock_ref_value unlock_ref_value
8391f110e0Safresh1                     lock_hashref unlock_hashref
8491f110e0Safresh1                     lock_ref_keys_plus
8591f110e0Safresh1                     hidden_ref_keys legal_ref_keys
8691f110e0Safresh1
8791f110e0Safresh1                     hash_seed hash_value hv_store
8891f110e0Safresh1                     bucket_stats bucket_info bucket_array
8991f110e0Safresh1                     lock_hash_recurse unlock_hash_recurse
90b8851fccSafresh1                     lock_hashref_recurse unlock_hashref_recurse
9191f110e0Safresh1
9291f110e0Safresh1                     hash_traversal_mask
9343003dfeSmillert                   );
9443003dfeSmillert
95e0680481Safresh1  my %hash = (foo => 42, bar => 23);
9643003dfeSmillert  # Ways to restrict a hash
9743003dfeSmillert  lock_keys(%hash);
9843003dfeSmillert  lock_keys(%hash, @keyset);
9943003dfeSmillert  lock_keys_plus(%hash, @additional_keys);
10043003dfeSmillert
10143003dfeSmillert  # Ways to inspect the properties of a restricted hash
10243003dfeSmillert  my @legal = legal_keys(%hash);
10343003dfeSmillert  my @hidden = hidden_keys(%hash);
10443003dfeSmillert  my $ref = all_keys(%hash,@keys,@hidden);
10543003dfeSmillert  my $is_locked = hash_locked(%hash);
10643003dfeSmillert
10743003dfeSmillert  # Remove restrictions on the hash
10843003dfeSmillert  unlock_keys(%hash);
10943003dfeSmillert
11043003dfeSmillert  # Lock individual values in a hash
11143003dfeSmillert  lock_value  (%hash, 'foo');
11243003dfeSmillert  unlock_value(%hash, 'foo');
11343003dfeSmillert
11443003dfeSmillert  # Ways to change the restrictions on both keys and values
11543003dfeSmillert  lock_hash  (%hash);
11643003dfeSmillert  unlock_hash(%hash);
11743003dfeSmillert
11856d68f1eSafresh1  my $hashes_are_randomised = hash_seed() !~ /^\0+$/;
11943003dfeSmillert
12091f110e0Safresh1  my $int_hash_value = hash_value( 'string' );
12191f110e0Safresh1
12291f110e0Safresh1  my $mask= hash_traversal_mask(%hash);
12391f110e0Safresh1
12491f110e0Safresh1  hash_traversal_mask(%hash,1234);
12591f110e0Safresh1
12643003dfeSmillert=head1 DESCRIPTION
12743003dfeSmillert
12843003dfeSmillertC<Hash::Util> and C<Hash::Util::FieldHash> contain special functions
12943003dfeSmillertfor manipulating hashes that don't really warrant a keyword.
13043003dfeSmillert
13143003dfeSmillertC<Hash::Util> contains a set of functions that support
13243003dfeSmillertL<restricted hashes|/"Restricted hashes">. These are described in
13343003dfeSmillertthis document.  C<Hash::Util::FieldHash> contains an (unrelated)
13443003dfeSmillertset of functions that support the use of hashes in
13543003dfeSmillertI<inside-out classes>, described in L<Hash::Util::FieldHash>.
13643003dfeSmillert
13743003dfeSmillertBy default C<Hash::Util> does not export anything.
13843003dfeSmillert
13943003dfeSmillert=head2 Restricted hashes
14043003dfeSmillert
14143003dfeSmillert5.8.0 introduces the ability to restrict a hash to a certain set of
14243003dfeSmillertkeys.  No keys outside of this set can be added.  It also introduces
14343003dfeSmillertthe ability to lock an individual key so it cannot be deleted and the
14443003dfeSmillertability to ensure that an individual value cannot be changed.
14543003dfeSmillert
14643003dfeSmillertThis is intended to largely replace the deprecated pseudo-hashes.
14743003dfeSmillert
14843003dfeSmillert=over 4
14943003dfeSmillert
15043003dfeSmillert=item B<lock_keys>
15143003dfeSmillert
15243003dfeSmillert=item B<unlock_keys>
15343003dfeSmillert
15443003dfeSmillert  lock_keys(%hash);
15543003dfeSmillert  lock_keys(%hash, @keys);
15643003dfeSmillert
15743003dfeSmillertRestricts the given %hash's set of keys to @keys.  If @keys is not
15843003dfeSmillertgiven it restricts it to its current keyset.  No more keys can be
15943003dfeSmillertadded. delete() and exists() will still work, but will not alter
16043003dfeSmillertthe set of allowed keys. B<Note>: the current implementation prevents
16143003dfeSmillertthe hash from being bless()ed while it is in a locked state. Any attempt
16243003dfeSmillertto do so will raise an exception. Of course you can still bless()
16343003dfeSmillertthe hash before you call lock_keys() so this shouldn't be a problem.
16443003dfeSmillert
16543003dfeSmillert  unlock_keys(%hash);
16643003dfeSmillert
16743003dfeSmillertRemoves the restriction on the %hash's keyset.
16843003dfeSmillert
16991f110e0Safresh1B<Note> that if any of the values of the hash have been locked they will not
17091f110e0Safresh1be unlocked after this sub executes.
17143003dfeSmillert
17243003dfeSmillertBoth routines return a reference to the hash operated on.
17343003dfeSmillert
17443003dfeSmillert=cut
17543003dfeSmillert
17643003dfeSmillertsub lock_ref_keys {
17743003dfeSmillert    my($hash, @keys) = @_;
17843003dfeSmillert
1799f11ffb7Safresh1    _clear_placeholders(%$hash);
18043003dfeSmillert    if( @keys ) {
18143003dfeSmillert        my %keys = map { ($_ => 1) } @keys;
18243003dfeSmillert        my %original_keys = map { ($_ => 1) } keys %$hash;
18343003dfeSmillert        foreach my $k (keys %original_keys) {
18443003dfeSmillert            croak "Hash has key '$k' which is not in the new key set"
18543003dfeSmillert              unless $keys{$k};
18643003dfeSmillert        }
18743003dfeSmillert
18843003dfeSmillert        foreach my $k (@keys) {
18943003dfeSmillert            $hash->{$k} = undef unless exists $hash->{$k};
19043003dfeSmillert        }
19143003dfeSmillert        Internals::SvREADONLY %$hash, 1;
19243003dfeSmillert
19343003dfeSmillert        foreach my $k (@keys) {
19443003dfeSmillert            delete $hash->{$k} unless $original_keys{$k};
19543003dfeSmillert        }
19643003dfeSmillert    }
19743003dfeSmillert    else {
19843003dfeSmillert        Internals::SvREADONLY %$hash, 1;
19943003dfeSmillert    }
20043003dfeSmillert
20143003dfeSmillert    return $hash;
20243003dfeSmillert}
20343003dfeSmillert
20443003dfeSmillertsub unlock_ref_keys {
20543003dfeSmillert    my $hash = shift;
20643003dfeSmillert
20743003dfeSmillert    Internals::SvREADONLY %$hash, 0;
20843003dfeSmillert    return $hash;
20943003dfeSmillert}
21043003dfeSmillert
21143003dfeSmillertsub   lock_keys (\%;@) {   lock_ref_keys(@_) }
21243003dfeSmillertsub unlock_keys (\%)   { unlock_ref_keys(@_) }
21343003dfeSmillert
2149f11ffb7Safresh1#=item B<_clear_placeholders>
2159f11ffb7Safresh1#
2169f11ffb7Safresh1# This function removes any placeholder keys from a hash. See Perl_hv_clear_placeholders()
2179f11ffb7Safresh1# in hv.c for what it does exactly. It is currently exposed as XS by universal.c and
2189f11ffb7Safresh1# injected into the Hash::Util namespace.
2199f11ffb7Safresh1#
2209f11ffb7Safresh1# It is not intended for use outside of this module, and may be changed
2219f11ffb7Safresh1# or removed without notice or deprecation cycle.
2229f11ffb7Safresh1#
2239f11ffb7Safresh1#=cut
2249f11ffb7Safresh1#
2259f11ffb7Safresh1# sub _clear_placeholders {} # just in case someone searches...
2269f11ffb7Safresh1
22743003dfeSmillert=item B<lock_keys_plus>
22843003dfeSmillert
22943003dfeSmillert  lock_keys_plus(%hash,@additional_keys)
23043003dfeSmillert
23143003dfeSmillertSimilar to C<lock_keys()>, with the difference being that the optional key list
23243003dfeSmillertspecifies keys that may or may not be already in the hash. Essentially this is
23343003dfeSmillertan easier way to say
23443003dfeSmillert
23543003dfeSmillert  lock_keys(%hash,@additional_keys,keys %hash);
23643003dfeSmillert
23743003dfeSmillertReturns a reference to %hash
23843003dfeSmillert
23943003dfeSmillert=cut
24043003dfeSmillert
24143003dfeSmillert
24243003dfeSmillertsub lock_ref_keys_plus {
24343003dfeSmillert    my ($hash,@keys) = @_;
24443003dfeSmillert    my @delete;
2459f11ffb7Safresh1    _clear_placeholders(%$hash);
24643003dfeSmillert    foreach my $key (@keys) {
24743003dfeSmillert        unless (exists($hash->{$key})) {
24843003dfeSmillert            $hash->{$key}=undef;
24943003dfeSmillert            push @delete,$key;
25043003dfeSmillert        }
25143003dfeSmillert    }
25243003dfeSmillert    Internals::SvREADONLY(%$hash,1);
25343003dfeSmillert    delete @{$hash}{@delete};
25443003dfeSmillert    return $hash
25543003dfeSmillert}
25643003dfeSmillert
25743003dfeSmillertsub lock_keys_plus(\%;@) { lock_ref_keys_plus(@_) }
25843003dfeSmillert
25943003dfeSmillert
26043003dfeSmillert=item B<lock_value>
26143003dfeSmillert
26243003dfeSmillert=item B<unlock_value>
26343003dfeSmillert
26443003dfeSmillert  lock_value  (%hash, $key);
26543003dfeSmillert  unlock_value(%hash, $key);
26643003dfeSmillert
26743003dfeSmillertLocks and unlocks the value for an individual key of a hash.  The value of a
26843003dfeSmillertlocked key cannot be changed.
26943003dfeSmillert
27043003dfeSmillertUnless %hash has already been locked the key/value could be deleted
27143003dfeSmillertregardless of this setting.
27243003dfeSmillert
27343003dfeSmillertReturns a reference to the %hash.
27443003dfeSmillert
27543003dfeSmillert=cut
27643003dfeSmillert
27743003dfeSmillertsub lock_ref_value {
27843003dfeSmillert    my($hash, $key) = @_;
27943003dfeSmillert    # I'm doubtful about this warning, as it seems not to be true.
28043003dfeSmillert    # Marking a value in the hash as RO is useful, regardless
28143003dfeSmillert    # of the status of the hash itself.
28243003dfeSmillert    carp "Cannot usefully lock values in an unlocked hash"
28343003dfeSmillert      if !Internals::SvREADONLY(%$hash) && warnings::enabled;
28443003dfeSmillert    Internals::SvREADONLY $hash->{$key}, 1;
28543003dfeSmillert    return $hash
28643003dfeSmillert}
28743003dfeSmillert
28843003dfeSmillertsub unlock_ref_value {
28943003dfeSmillert    my($hash, $key) = @_;
29043003dfeSmillert    Internals::SvREADONLY $hash->{$key}, 0;
29143003dfeSmillert    return $hash
29243003dfeSmillert}
29343003dfeSmillert
29443003dfeSmillertsub   lock_value (\%$) {   lock_ref_value(@_) }
29543003dfeSmillertsub unlock_value (\%$) { unlock_ref_value(@_) }
29643003dfeSmillert
29743003dfeSmillert
29843003dfeSmillert=item B<lock_hash>
29943003dfeSmillert
30043003dfeSmillert=item B<unlock_hash>
30143003dfeSmillert
30243003dfeSmillert    lock_hash(%hash);
30343003dfeSmillert
30443003dfeSmillertlock_hash() locks an entire hash, making all keys and values read-only.
30543003dfeSmillertNo value can be changed, no keys can be added or deleted.
30643003dfeSmillert
30743003dfeSmillert    unlock_hash(%hash);
30843003dfeSmillert
30943003dfeSmillertunlock_hash() does the opposite of lock_hash().  All keys and values
31043003dfeSmillertare made writable.  All values can be changed and keys can be added
31143003dfeSmillertand deleted.
31243003dfeSmillert
31343003dfeSmillertReturns a reference to the %hash.
31443003dfeSmillert
31543003dfeSmillert=cut
31643003dfeSmillert
31743003dfeSmillertsub lock_hashref {
31843003dfeSmillert    my $hash = shift;
31943003dfeSmillert
32043003dfeSmillert    lock_ref_keys($hash);
32143003dfeSmillert
32243003dfeSmillert    foreach my $value (values %$hash) {
32343003dfeSmillert        Internals::SvREADONLY($value,1);
32443003dfeSmillert    }
32543003dfeSmillert
32643003dfeSmillert    return $hash;
32743003dfeSmillert}
32843003dfeSmillert
32943003dfeSmillertsub unlock_hashref {
33043003dfeSmillert    my $hash = shift;
33143003dfeSmillert
33243003dfeSmillert    foreach my $value (values %$hash) {
33343003dfeSmillert        Internals::SvREADONLY($value, 0);
33443003dfeSmillert    }
33543003dfeSmillert
33643003dfeSmillert    unlock_ref_keys($hash);
33743003dfeSmillert
33843003dfeSmillert    return $hash;
33943003dfeSmillert}
34043003dfeSmillert
34143003dfeSmillertsub   lock_hash (\%) {   lock_hashref(@_) }
34243003dfeSmillertsub unlock_hash (\%) { unlock_hashref(@_) }
34343003dfeSmillert
34443003dfeSmillert=item B<lock_hash_recurse>
34543003dfeSmillert
34643003dfeSmillert=item B<unlock_hash_recurse>
34743003dfeSmillert
34843003dfeSmillert    lock_hash_recurse(%hash);
34943003dfeSmillert
35043003dfeSmillertlock_hash() locks an entire hash and any hashes it references recursively,
35143003dfeSmillertmaking all keys and values read-only. No value can be changed, no keys can
35243003dfeSmillertbe added or deleted.
35343003dfeSmillert
35491f110e0Safresh1This method B<only> recurses into hashes that are referenced by another hash.
35591f110e0Safresh1Thus a Hash of Hashes (HoH) will all be restricted, but a Hash of Arrays of
35691f110e0Safresh1Hashes (HoAoH) will only have the top hash restricted.
35743003dfeSmillert
35843003dfeSmillert    unlock_hash_recurse(%hash);
35943003dfeSmillert
36043003dfeSmillertunlock_hash_recurse() does the opposite of lock_hash_recurse().  All keys and
36143003dfeSmillertvalues are made writable.  All values can be changed and keys can be added
36243003dfeSmillertand deleted. Identical recursion restrictions apply as to lock_hash_recurse().
36343003dfeSmillert
36443003dfeSmillertReturns a reference to the %hash.
36543003dfeSmillert
36643003dfeSmillert=cut
36743003dfeSmillert
36843003dfeSmillertsub lock_hashref_recurse {
36943003dfeSmillert    my $hash = shift;
37043003dfeSmillert
37143003dfeSmillert    lock_ref_keys($hash);
37243003dfeSmillert    foreach my $value (values %$hash) {
373898184e3Ssthen        my $type = reftype($value);
374898184e3Ssthen        if (defined($type) and $type eq 'HASH') {
37543003dfeSmillert            lock_hashref_recurse($value);
37643003dfeSmillert        }
37743003dfeSmillert        Internals::SvREADONLY($value,1);
37843003dfeSmillert    }
37943003dfeSmillert    return $hash
38043003dfeSmillert}
38143003dfeSmillert
38243003dfeSmillertsub unlock_hashref_recurse {
38343003dfeSmillert    my $hash = shift;
38443003dfeSmillert
38543003dfeSmillert    foreach my $value (values %$hash) {
386898184e3Ssthen        my $type = reftype($value);
387898184e3Ssthen        if (defined($type) and $type eq 'HASH') {
38843003dfeSmillert            unlock_hashref_recurse($value);
38943003dfeSmillert        }
390b8851fccSafresh1        Internals::SvREADONLY($value,0);
39143003dfeSmillert    }
39243003dfeSmillert    unlock_ref_keys($hash);
39343003dfeSmillert    return $hash;
39443003dfeSmillert}
39543003dfeSmillert
39643003dfeSmillertsub   lock_hash_recurse (\%) {   lock_hashref_recurse(@_) }
39743003dfeSmillertsub unlock_hash_recurse (\%) { unlock_hashref_recurse(@_) }
39843003dfeSmillert
39991f110e0Safresh1=item B<hashref_locked>
40091f110e0Safresh1
40191f110e0Safresh1=item B<hash_locked>
40291f110e0Safresh1
40391f110e0Safresh1  hashref_locked(\%hash) and print "Hash is locked!\n";
40491f110e0Safresh1  hash_locked(%hash) and print "Hash is locked!\n";
40591f110e0Safresh1
40691f110e0Safresh1Returns true if the hash and its keys are locked.
40791f110e0Safresh1
40891f110e0Safresh1=cut
40991f110e0Safresh1
41091f110e0Safresh1sub hashref_locked {
41191f110e0Safresh1    my $hash=shift;
41291f110e0Safresh1    Internals::SvREADONLY(%$hash);
41391f110e0Safresh1}
41491f110e0Safresh1
41591f110e0Safresh1sub hash_locked(\%) { hashref_locked(@_) }
41691f110e0Safresh1
41791f110e0Safresh1=item B<hashref_unlocked>
41843003dfeSmillert
41943003dfeSmillert=item B<hash_unlocked>
42043003dfeSmillert
42191f110e0Safresh1  hashref_unlocked(\%hash) and print "Hash is unlocked!\n";
42243003dfeSmillert  hash_unlocked(%hash) and print "Hash is unlocked!\n";
42343003dfeSmillert
42443003dfeSmillertReturns true if the hash and its keys are unlocked.
42543003dfeSmillert
42643003dfeSmillert=cut
42743003dfeSmillert
42843003dfeSmillertsub hashref_unlocked {
42943003dfeSmillert    my $hash=shift;
43091f110e0Safresh1    !Internals::SvREADONLY(%$hash);
43143003dfeSmillert}
43243003dfeSmillert
43343003dfeSmillertsub hash_unlocked(\%) { hashref_unlocked(@_) }
43443003dfeSmillert
43543003dfeSmillert=for demerphqs_editor
43643003dfeSmillertsub legal_ref_keys{}
43743003dfeSmillertsub hidden_ref_keys{}
43843003dfeSmillertsub all_keys{}
43943003dfeSmillert
44043003dfeSmillert=cut
44143003dfeSmillert
44243003dfeSmillertsub legal_keys(\%) { legal_ref_keys(@_)  }
44343003dfeSmillertsub hidden_keys(\%){ hidden_ref_keys(@_) }
44443003dfeSmillert
44543003dfeSmillert=item B<legal_keys>
44643003dfeSmillert
44743003dfeSmillert  my @keys = legal_keys(%hash);
44843003dfeSmillert
44943003dfeSmillertReturns the list of the keys that are legal in a restricted hash.
45043003dfeSmillertIn the case of an unrestricted hash this is identical to calling
45143003dfeSmillertkeys(%hash).
45243003dfeSmillert
45343003dfeSmillert=item B<hidden_keys>
45443003dfeSmillert
45543003dfeSmillert  my @keys = hidden_keys(%hash);
45643003dfeSmillert
45743003dfeSmillertReturns the list of the keys that are legal in a restricted hash but
45843003dfeSmillertdo not have a value associated to them. Thus if 'foo' is a
45943003dfeSmillert"hidden" key of the %hash it will return false for both C<defined>
46043003dfeSmillertand C<exists> tests.
46143003dfeSmillert
46243003dfeSmillertIn the case of an unrestricted hash this will return an empty list.
46343003dfeSmillert
46443003dfeSmillertB<NOTE> this is an experimental feature that is heavily dependent
46543003dfeSmillerton the current implementation of restricted hashes. Should the
46643003dfeSmillertimplementation change, this routine may become meaningless, in which
46743003dfeSmillertcase it will return an empty list.
46843003dfeSmillert
46943003dfeSmillert=item B<all_keys>
47043003dfeSmillert
47143003dfeSmillert  all_keys(%hash,@keys,@hidden);
47243003dfeSmillert
47343003dfeSmillertPopulates the arrays @keys with the all the keys that would pass
47443003dfeSmillertan C<exists> tests, and populates @hidden with the remaining legal
47543003dfeSmillertkeys that have not been utilized.
47643003dfeSmillert
47743003dfeSmillertReturns a reference to the hash.
47843003dfeSmillert
47943003dfeSmillertIn the case of an unrestricted hash this will be equivalent to
48043003dfeSmillert
48143003dfeSmillert  $ref = do {
48243003dfeSmillert      @keys = keys %hash;
48343003dfeSmillert      @hidden = ();
48443003dfeSmillert      \%hash
48543003dfeSmillert  };
48643003dfeSmillert
48743003dfeSmillertB<NOTE> this is an experimental feature that is heavily dependent
48843003dfeSmillerton the current implementation of restricted hashes. Should the
48943003dfeSmillertimplementation change this routine may become meaningless in which
49043003dfeSmillertcase it will behave identically to how it would behave on an
49143003dfeSmillertunrestricted hash.
49243003dfeSmillert
49343003dfeSmillert=item B<hash_seed>
49443003dfeSmillert
49543003dfeSmillert    my $hash_seed = hash_seed();
49643003dfeSmillert
49791f110e0Safresh1hash_seed() returns the seed bytes used to randomise hash ordering.
49843003dfeSmillert
49943003dfeSmillertB<Note that the hash seed is sensitive information>: by knowing it one
50043003dfeSmillertcan craft a denial-of-service attack against Perl code, even remotely,
50143003dfeSmillertsee L<perlsec/"Algorithmic Complexity Attacks"> for more information.
50243003dfeSmillertB<Do not disclose the hash seed> to people who don't need to know it.
50343003dfeSmillertSee also L<perlrun/PERL_HASH_SEED_DEBUG>.
50443003dfeSmillert
50591f110e0Safresh1Prior to Perl 5.17.6 this function returned a UV, it now returns a string,
50691f110e0Safresh1which may be of nearly any size as determined by the hash function your
50791f110e0Safresh1Perl has been built with. Possible sizes may be but are not limited to
50891f110e0Safresh14 bytes (for most hash algorithms) and 16 bytes (for siphash).
50991f110e0Safresh1
51091f110e0Safresh1=item B<hash_value>
51191f110e0Safresh1
51291f110e0Safresh1    my $hash_value = hash_value($string);
513eac174f2Safresh1    my $hash_value = hash_value($string, $seed);
51491f110e0Safresh1
515eac174f2Safresh1C<hash_value($string)>
516eac174f2Safresh1returns
517eac174f2Safresh1the current perl's internal hash value for a given string.
518eac174f2Safresh1C<hash_value($string, $seed)>
519eac174f2Safresh1returns the hash value as if computed with a different seed.
520eac174f2Safresh1If the custom seed is too short, the function errors out.
521eac174f2Safresh1The minimum length of the seed is implementation-dependent.
52291f110e0Safresh1
523eac174f2Safresh1Returns a 32-bit integer
524eac174f2Safresh1representing the hash value of the string passed in.
525eac174f2Safresh1The 1-parameter value is only reliable
526eac174f2Safresh1for the lifetime of the process.
527eac174f2Safresh1It may be different
528eac174f2Safresh1depending on invocation, environment variables, perl version,
52991f110e0Safresh1architectures, and build options.
53091f110e0Safresh1
53191f110e0Safresh1B<Note that the hash value of a given string is sensitive information>:
53291f110e0Safresh1by knowing it one can deduce the hash seed which in turn can allow one to
53391f110e0Safresh1craft a denial-of-service attack against Perl code, even remotely,
53491f110e0Safresh1see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
53591f110e0Safresh1B<Do not disclose the hash value of a string> to people who don't need to
53691f110e0Safresh1know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>.
53791f110e0Safresh1
53891f110e0Safresh1=item B<bucket_info>
53991f110e0Safresh1
54091f110e0Safresh1Return a set of basic information about a hash.
54191f110e0Safresh1
54291f110e0Safresh1    my ($keys, $buckets, $used, @length_counts)= bucket_info($hash);
54391f110e0Safresh1
54491f110e0Safresh1Fields are as follows:
54591f110e0Safresh1
54691f110e0Safresh1    0: Number of keys in the hash
54791f110e0Safresh1    1: Number of buckets in the hash
54891f110e0Safresh1    2: Number of used buckets in the hash
54991f110e0Safresh1    rest : list of counts, Kth element is the number of buckets
55091f110e0Safresh1           with K keys in it.
55191f110e0Safresh1
55291f110e0Safresh1See also bucket_stats() and bucket_array().
55391f110e0Safresh1
55491f110e0Safresh1=item B<bucket_stats>
55591f110e0Safresh1
55691f110e0Safresh1Returns a list of statistics about a hash.
55791f110e0Safresh1
558b8851fccSafresh1 my ($keys, $buckets, $used, $quality, $utilization_ratio,
559b8851fccSafresh1        $collision_pct, $mean, $stddev, @length_counts)
560b8851fccSafresh1    = bucket_stats($hashref);
56191f110e0Safresh1
56291f110e0Safresh1Fields are as follows:
56391f110e0Safresh1
56491f110e0Safresh1    0: Number of keys in the hash
56591f110e0Safresh1    1: Number of buckets in the hash
56691f110e0Safresh1    2: Number of used buckets in the hash
56791f110e0Safresh1    3: Hash Quality Score
56891f110e0Safresh1    4: Percent of buckets used
56991f110e0Safresh1    5: Percent of keys which are in collision
570b8851fccSafresh1    6: Mean bucket length of occupied buckets
571b8851fccSafresh1    7: Standard Deviation of bucket lengths of occupied buckets
57291f110e0Safresh1    rest : list of counts, Kth element is the number of buckets
57391f110e0Safresh1           with K keys in it.
57491f110e0Safresh1
57591f110e0Safresh1See also bucket_info() and bucket_array().
57691f110e0Safresh1
57791f110e0Safresh1Note that Hash Quality Score would be 1 for an ideal hash, numbers
57891f110e0Safresh1close to and below 1 indicate good hashing, and number significantly
57991f110e0Safresh1above indicate a poor score. In practice it should be around 0.95 to 1.05.
58091f110e0Safresh1It is defined as:
58191f110e0Safresh1
58291f110e0Safresh1 $score= sum( $count[$length] * ($length * ($length + 1) / 2) )
58391f110e0Safresh1            /
58491f110e0Safresh1            ( ( $keys / 2 * $buckets ) *
58591f110e0Safresh1              ( $keys + ( 2 * $buckets ) - 1 ) )
58691f110e0Safresh1
58791f110e0Safresh1The formula is from the Red Dragon book (reformulated to use the data available)
58891f110e0Safresh1and is documented at L<http://www.strchr.com/hash_functions>
58991f110e0Safresh1
59091f110e0Safresh1=item B<bucket_array>
59191f110e0Safresh1
59291f110e0Safresh1    my $array= bucket_array(\%hash);
59391f110e0Safresh1
59491f110e0Safresh1Returns a packed representation of the bucket array associated with a hash. Each element
59591f110e0Safresh1of the array is either an integer K, in which case it represents K empty buckets, or
59691f110e0Safresh1a reference to another array which contains the keys that are in that bucket.
59791f110e0Safresh1
59891f110e0Safresh1B<Note that the information returned by bucket_array is sensitive information>:
59991f110e0Safresh1by knowing it one can directly attack perl's hash function which in turn may allow
60091f110e0Safresh1one to craft a denial-of-service attack against Perl code, even remotely,
60191f110e0Safresh1see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
60291f110e0Safresh1B<Do not disclose the output of this function> to people who don't need to
60391f110e0Safresh1know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>. This function is provided strictly
60491f110e0Safresh1for  debugging and diagnostics purposes only, it is hard to imagine a reason why it
60591f110e0Safresh1would be used in production code.
60691f110e0Safresh1
60743003dfeSmillert=cut
60843003dfeSmillert
60991f110e0Safresh1
61091f110e0Safresh1sub bucket_stats {
61191f110e0Safresh1    my ($hash) = @_;
61291f110e0Safresh1    my ($keys, $buckets, $used, @length_counts) = bucket_info($hash);
61391f110e0Safresh1    my $sum;
61491f110e0Safresh1    my $score;
615b8851fccSafresh1    for (1 .. $#length_counts) {
61691f110e0Safresh1        $sum += ($length_counts[$_] * $_);
61791f110e0Safresh1        $score += $length_counts[$_] * ( $_ * ($_ + 1 ) / 2 );
61891f110e0Safresh1    }
61991f110e0Safresh1    $score = $score /
62091f110e0Safresh1             (( $keys / (2 * $buckets )) * ( $keys + ( 2 * $buckets ) - 1 ))
62191f110e0Safresh1                 if $keys;
622b8851fccSafresh1    my ($mean, $stddev)= (0, 0);
623b8851fccSafresh1    if ($used) {
624b8851fccSafresh1        $mean= $sum / $used;
62591f110e0Safresh1        $sum= 0;
626b8851fccSafresh1        $sum += ($length_counts[$_] * (($_-$mean)**2)) for 1 .. $#length_counts;
62791f110e0Safresh1
628b8851fccSafresh1        $stddev= sqrt($sum/$used);
629b8851fccSafresh1    }
63091f110e0Safresh1    return $keys, $buckets, $used, $keys ? ($score, $used/$buckets, ($keys-$used)/$keys, $mean, $stddev, @length_counts) : ();
63143003dfeSmillert}
63243003dfeSmillert
633b8851fccSafresh1=item B<bucket_stats_formatted>
634b8851fccSafresh1
635b8851fccSafresh1  print bucket_stats_formatted($hashref);
636b8851fccSafresh1
637b8851fccSafresh1Return a formatted report of the information returned by bucket_stats().
638b8851fccSafresh1An example report looks like this:
639b8851fccSafresh1
640b8851fccSafresh1 Keys: 50 Buckets: 33/64 Quality-Score: 1.01 (Good)
641b8851fccSafresh1 Utilized Buckets: 51.56% Optimal: 78.12% Keys In Collision: 34.00%
642b8851fccSafresh1 Chain Length - mean: 1.52 stddev: 0.66
643b8851fccSafresh1 Buckets 64          [0000000000000000000000000000000111111111111111111122222222222333]
644b8851fccSafresh1 Len   0 Pct:  48.44 [###############################]
645b8851fccSafresh1 Len   1 Pct:  29.69 [###################]
646b8851fccSafresh1 Len   2 Pct:  17.19 [###########]
647b8851fccSafresh1 Len   3 Pct:   4.69 [###]
648b8851fccSafresh1 Keys    50          [11111111111111111111111111111111122222222222222333]
649b8851fccSafresh1 Pos   1 Pct:  66.00 [#################################]
650b8851fccSafresh1 Pos   2 Pct:  28.00 [##############]
651b8851fccSafresh1 Pos   3 Pct:   6.00 [###]
652b8851fccSafresh1
653b8851fccSafresh1The first set of stats gives some summary statistical information,
654b8851fccSafresh1including the quality score translated into "Good", "Poor" and "Bad",
655b8851fccSafresh1(score<=1.05, score<=1.2, score>1.2). See the documentation in
656b8851fccSafresh1bucket_stats() for more details.
657b8851fccSafresh1
658b8851fccSafresh1The two sets of barcharts give stats and a visual indication of performance
659b8851fccSafresh1of the hash.
660b8851fccSafresh1
661b8851fccSafresh1The first gives data on bucket chain lengths and provides insight on how
662b8851fccSafresh1much work a fetch *miss* will take. In this case we have to inspect every item
663b8851fccSafresh1in a bucket before we can be sure the item is not in the list. The performance
664b8851fccSafresh1for an insert is equivalent to this case, as is a delete where the item
665b8851fccSafresh1is not in the hash.
666b8851fccSafresh1
667b8851fccSafresh1The second gives data on how many keys are at each depth in the chain, and
668b8851fccSafresh1gives an idea of how much work a fetch *hit* will take. The performance for
669b8851fccSafresh1an update or delete of an item in the hash is equivalent to this case.
670b8851fccSafresh1
671b8851fccSafresh1Note that these statistics are summary only. Actual performance will depend
672b8851fccSafresh1on real hit/miss ratios accessing the hash. If you are concerned by hit ratios
673b8851fccSafresh1you are recommended to "oversize" your hash by using something like:
674b8851fccSafresh1
675b8851fccSafresh1   keys(%hash)= keys(%hash) << $k;
676b8851fccSafresh1
677b8851fccSafresh1With $k chosen carefully, and likely to be a small number like 1 or 2. In
678b8851fccSafresh1theory the larger the bucket array the less chance of collision.
679b8851fccSafresh1
680b8851fccSafresh1=cut
681b8851fccSafresh1
682b8851fccSafresh1
683b8851fccSafresh1sub _bucket_stats_formatted_bars {
684b8851fccSafresh1    my ($total, $ary, $start_idx, $title, $row_title)= @_;
685b8851fccSafresh1
686b8851fccSafresh1    my $return = "";
687b8851fccSafresh1    my $max_width= $total > 64 ? 64 : $total;
688b8851fccSafresh1    my $bar_width= $max_width / $total;
689b8851fccSafresh1
690b8851fccSafresh1    my $str= "";
691b8851fccSafresh1    if ( @$ary < 10) {
692b8851fccSafresh1        for my $idx ($start_idx .. $#$ary) {
693b8851fccSafresh1            $str .= $idx x sprintf("%.0f", ($ary->[$idx] * $bar_width));
694b8851fccSafresh1        }
695b8851fccSafresh1    } else {
696b8851fccSafresh1        $str= "-" x $max_width;
697b8851fccSafresh1    }
698b8851fccSafresh1    $return .= sprintf "%-7s         %6d [%s]\n",$title, $total, $str;
699b8851fccSafresh1
700b8851fccSafresh1    foreach my $idx ($start_idx .. $#$ary) {
701b8851fccSafresh1        $return .= sprintf "%-.3s %3d %6.2f%% %6d [%s]\n",
702b8851fccSafresh1            $row_title,
703b8851fccSafresh1            $idx,
704b8851fccSafresh1            $ary->[$idx] / $total * 100,
705b8851fccSafresh1            $ary->[$idx],
706b8851fccSafresh1            "#" x sprintf("%.0f", ($ary->[$idx] * $bar_width)),
707b8851fccSafresh1        ;
708b8851fccSafresh1    }
709b8851fccSafresh1    return $return;
710b8851fccSafresh1}
711b8851fccSafresh1
712b8851fccSafresh1sub bucket_stats_formatted {
713b8851fccSafresh1    my ($hashref)= @_;
714b8851fccSafresh1    my ($keys, $buckets, $used, $score, $utilization_ratio, $collision_pct,
715b8851fccSafresh1        $mean, $stddev, @length_counts) = bucket_stats($hashref);
716b8851fccSafresh1
717b8851fccSafresh1    my $return= sprintf   "Keys: %d Buckets: %d/%d Quality-Score: %.2f (%s)\n"
718b8851fccSafresh1                        . "Utilized Buckets: %.2f%% Optimal: %.2f%% Keys In Collision: %.2f%%\n"
719b8851fccSafresh1                        . "Chain Length - mean: %.2f stddev: %.2f\n",
720b8851fccSafresh1                $keys, $used, $buckets, $score, $score <= 1.05 ? "Good" : $score < 1.2 ? "Poor" : "Bad",
721b8851fccSafresh1                $utilization_ratio * 100,
722b8851fccSafresh1                $keys/$buckets * 100,
723b8851fccSafresh1                $collision_pct * 100,
724b8851fccSafresh1                $mean, $stddev;
725b8851fccSafresh1
726b8851fccSafresh1    my @key_depth;
727b8851fccSafresh1    $key_depth[$_]= $length_counts[$_] + ( $key_depth[$_+1] || 0 )
728b8851fccSafresh1        for reverse 1 .. $#length_counts;
729b8851fccSafresh1
730b8851fccSafresh1    if ($keys) {
731b8851fccSafresh1        $return .= _bucket_stats_formatted_bars($buckets, \@length_counts, 0, "Buckets", "Len");
732b8851fccSafresh1        $return .= _bucket_stats_formatted_bars($keys, \@key_depth, 1, "Keys", "Pos");
733b8851fccSafresh1    }
734b8851fccSafresh1    return $return
735b8851fccSafresh1}
736b8851fccSafresh1
73743003dfeSmillert=item B<hv_store>
73843003dfeSmillert
73943003dfeSmillert  my $sv = 0;
74043003dfeSmillert  hv_store(%hash,$key,$sv) or die "Failed to alias!";
74143003dfeSmillert  $hash{$key} = 1;
74243003dfeSmillert  print $sv; # prints 1
74343003dfeSmillert
74443003dfeSmillertStores an alias to a variable in a hash instead of copying the value.
74543003dfeSmillert
74691f110e0Safresh1=item B<hash_traversal_mask>
74791f110e0Safresh1
74891f110e0Safresh1As of Perl 5.18 every hash has its own hash traversal order, and this order
74991f110e0Safresh1changes every time a new element is inserted into the hash. This functionality
75091f110e0Safresh1is provided by maintaining an unsigned integer mask (U32) which is xor'ed
75191f110e0Safresh1with the actual bucket id during a traversal of the hash buckets using keys(),
75291f110e0Safresh1values() or each().
75391f110e0Safresh1
75491f110e0Safresh1You can use this subroutine to get and set the traversal mask for a specific
75591f110e0Safresh1hash. Setting the mask ensures that a given hash will produce the same key
75691f110e0Safresh1order. B<Note> that this does B<not> guarantee that B<two> hashes will produce
75791f110e0Safresh1the same key order for the same hash seed and traversal mask, items that
75891f110e0Safresh1collide into one bucket may have different orders regardless of this setting.
75991f110e0Safresh1
7609f11ffb7Safresh1=item B<bucket_ratio>
7619f11ffb7Safresh1
7629f11ffb7Safresh1This function behaves the same way that scalar(%hash) behaved prior to
7639f11ffb7Safresh1Perl 5.25. Specifically if the hash is tied, then it calls the SCALAR tied
7649f11ffb7Safresh1hash method, if untied then if the hash is empty it return 0, otherwise it
7659f11ffb7Safresh1returns a string containing the number of used buckets in the hash,
7669f11ffb7Safresh1followed by a slash, followed by the total number of buckets in the hash.
7679f11ffb7Safresh1
7689f11ffb7Safresh1    my %hash=("foo"=>1);
7699f11ffb7Safresh1    print Hash::Util::bucket_ratio(%hash); # prints "1/8"
7709f11ffb7Safresh1
7719f11ffb7Safresh1=item B<used_buckets>
7729f11ffb7Safresh1
7739f11ffb7Safresh1This function returns the count of used buckets in the hash. It is expensive
7749f11ffb7Safresh1to calculate and the value is NOT cached, so avoid use of this function
7759f11ffb7Safresh1in production code.
7769f11ffb7Safresh1
7779f11ffb7Safresh1=item B<num_buckets>
7789f11ffb7Safresh1
7799f11ffb7Safresh1This function returns the total number of buckets the hash holds, or would
7809f11ffb7Safresh1hold if the array were created. (When a hash is freshly created the array
7819f11ffb7Safresh1may not be allocated even though this value will be non-zero.)
7829f11ffb7Safresh1
78343003dfeSmillert=back
78443003dfeSmillert
785*3d61058aSafresh1=head2 Operating on references to hashes
78643003dfeSmillert
78743003dfeSmillertMost subroutines documented in this module have equivalent versions
78843003dfeSmillertthat operate on references to hashes instead of native hashes.
78943003dfeSmillertThe following is a list of these subs. They are identical except
79043003dfeSmillertin name and in that instead of taking a %hash they take a $hashref,
79143003dfeSmillertand additionally are not prototyped.
79243003dfeSmillert
79343003dfeSmillert=over 4
79443003dfeSmillert
79543003dfeSmillert=item lock_ref_keys
79643003dfeSmillert
79743003dfeSmillert=item unlock_ref_keys
79843003dfeSmillert
79943003dfeSmillert=item lock_ref_keys_plus
80043003dfeSmillert
80143003dfeSmillert=item lock_ref_value
80243003dfeSmillert
80343003dfeSmillert=item unlock_ref_value
80443003dfeSmillert
80543003dfeSmillert=item lock_hashref
80643003dfeSmillert
80743003dfeSmillert=item unlock_hashref
80843003dfeSmillert
80943003dfeSmillert=item lock_hashref_recurse
81043003dfeSmillert
81143003dfeSmillert=item unlock_hashref_recurse
81243003dfeSmillert
81343003dfeSmillert=item hash_ref_unlocked
81443003dfeSmillert
81543003dfeSmillert=item legal_ref_keys
81643003dfeSmillert
81743003dfeSmillert=item hidden_ref_keys
81843003dfeSmillert
81943003dfeSmillert=back
82043003dfeSmillert
82143003dfeSmillert=head1 CAVEATS
82243003dfeSmillert
82343003dfeSmillertNote that the trapping of the restricted operations is not atomic:
82443003dfeSmillertfor example
82543003dfeSmillert
82643003dfeSmillert    eval { %hash = (illegal_key => 1) }
82743003dfeSmillert
82843003dfeSmillertleaves the C<%hash> empty rather than with its original contents.
82943003dfeSmillert
83043003dfeSmillert=head1 BUGS
83143003dfeSmillert
83243003dfeSmillertThe interface exposed by this module is very close to the current
83343003dfeSmillertimplementation of restricted hashes. Over time it is expected that
83443003dfeSmillertthis behavior will be extended and the interface abstracted further.
83543003dfeSmillert
83643003dfeSmillert=head1 AUTHOR
83743003dfeSmillert
83843003dfeSmillertMichael G Schwern <schwern@pobox.com> on top of code by Nick
83943003dfeSmillertIng-Simmons and Jeffrey Friedl.
84043003dfeSmillert
84143003dfeSmillerthv_store() is from Array::RefElem, Copyright 2000 Gisle Aas.
84243003dfeSmillert
84343003dfeSmillertAdditional code by Yves Orton.
84443003dfeSmillert
845eac174f2Safresh1Description of C<hash_value($string, $seed)>
846eac174f2Safresh1by Christopher Yeleighton <ne01026@shark.2a.pl>
847eac174f2Safresh1
84843003dfeSmillert=head1 SEE ALSO
84943003dfeSmillert
85043003dfeSmillertL<Scalar::Util>, L<List::Util> and L<perlsec/"Algorithmic Complexity Attacks">.
85143003dfeSmillert
85243003dfeSmillertL<Hash::Util::FieldHash>.
85343003dfeSmillert
85443003dfeSmillert=cut
85543003dfeSmillert
85643003dfeSmillert1;
857