xref: /openbsd-src/gnu/usr.bin/perl/ext/Hash-Util-FieldHash/lib/Hash/Util/FieldHash.pm (revision 3d61058aa5c692477b6d18acfbbdb653a9930ff9)
143003dfeSmillertpackage Hash::Util::FieldHash;
243003dfeSmillert
343003dfeSmillertuse strict;
443003dfeSmillertuse warnings;
5eac174f2Safresh1no warnings 'experimental::builtin';
6eac174f2Safresh1use builtin qw(reftype);
743003dfeSmillert
8*3d61058aSafresh1our $VERSION = '1.27';
943003dfeSmillert
10eac174f2Safresh1use Exporter 'import';
1143003dfeSmillertour %EXPORT_TAGS = (
1243003dfeSmillert    'all' => [ qw(
1343003dfeSmillert        fieldhash
1443003dfeSmillert        fieldhashes
1543003dfeSmillert        idhash
1643003dfeSmillert        idhashes
1743003dfeSmillert        id
1843003dfeSmillert        id_2obj
1943003dfeSmillert        register
2043003dfeSmillert    )],
2143003dfeSmillert);
2243003dfeSmillertour @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
2343003dfeSmillert
2443003dfeSmillert{
2543003dfeSmillert    require XSLoader;
2643003dfeSmillert    my %ob_reg; # private object registry
2743003dfeSmillert    sub _ob_reg { \ %ob_reg }
28898184e3Ssthen    XSLoader::load();
2943003dfeSmillert}
3043003dfeSmillert
3143003dfeSmillertsub fieldhash (\%) {
3243003dfeSmillert    for ( shift ) {
3343003dfeSmillert        return unless ref() && reftype( $_) eq 'HASH';
3443003dfeSmillert        return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0);
3543003dfeSmillert        return $_ if Hash::Util::FieldHash::_fieldhash( $_, 2) == 2;
3643003dfeSmillert        return;
3743003dfeSmillert    }
3843003dfeSmillert}
3943003dfeSmillert
4043003dfeSmillertsub idhash (\%) {
4143003dfeSmillert    for ( shift ) {
4243003dfeSmillert        return unless ref() && reftype( $_) eq 'HASH';
4343003dfeSmillert        return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0);
4443003dfeSmillert        return $_ if Hash::Util::FieldHash::_fieldhash( $_, 1) == 1;
4543003dfeSmillert        return;
4643003dfeSmillert    }
4743003dfeSmillert}
4843003dfeSmillert
4943003dfeSmillertsub fieldhashes { map &fieldhash( $_), @_ }
5043003dfeSmillertsub idhashes { map &idhash( $_), @_ }
5143003dfeSmillert
5243003dfeSmillert1;
5343003dfeSmillert__END__
5443003dfeSmillert
5543003dfeSmillert=head1 NAME
5643003dfeSmillert
5743003dfeSmillertHash::Util::FieldHash - Support for Inside-Out Classes
5843003dfeSmillert
5943003dfeSmillert=head1 SYNOPSIS
6043003dfeSmillert
6143003dfeSmillert  ### Create fieldhashes
6243003dfeSmillert  use Hash::Util qw(fieldhash fieldhashes);
6343003dfeSmillert
6443003dfeSmillert  # Create a single field hash
6543003dfeSmillert  fieldhash my %foo;
6643003dfeSmillert
6743003dfeSmillert  # Create three at once...
6843003dfeSmillert  fieldhashes \ my(%foo, %bar, %baz);
6943003dfeSmillert  # ...or any number
7043003dfeSmillert  fieldhashes @hashrefs;
7143003dfeSmillert
7243003dfeSmillert  ### Create an idhash and register it for garbage collection
7343003dfeSmillert  use Hash::Util::FieldHash qw(idhash register);
7443003dfeSmillert  idhash my %name;
7543003dfeSmillert  my $object = \ do { my $o };
7643003dfeSmillert  # register the idhash for garbage collection with $object
7743003dfeSmillert  register($object, \ %name);
7843003dfeSmillert  # the following entry will be deleted when $object goes out of scope
7943003dfeSmillert  $name{$object} = 'John Doe';
8043003dfeSmillert
8143003dfeSmillert  ### Register an ordinary hash for garbage collection
8243003dfeSmillert  use Hash::Util::FieldHash qw(id register);
8343003dfeSmillert  my %name;
8443003dfeSmillert  my $object = \ do { my $o };
8543003dfeSmillert  # register the hash %name for garbage collection of $object's id
8643003dfeSmillert  register $object, \ %name;
8743003dfeSmillert  # the following entry will be deleted when $object goes out of scope
8843003dfeSmillert  $name{id $object} = 'John Doe';
8943003dfeSmillert
9043003dfeSmillert=head1 FUNCTIONS
9143003dfeSmillert
9243003dfeSmillertC<Hash::Util::FieldHash> offers a number of functions in support of
9343003dfeSmillertL<The Inside-out Technique> of class construction.
9443003dfeSmillert
9543003dfeSmillert=over
9643003dfeSmillert
9743003dfeSmillert=item id
9843003dfeSmillert
9943003dfeSmillert    id($obj)
10043003dfeSmillert
10143003dfeSmillertReturns the reference address of a reference $obj.  If $obj is
10243003dfeSmillertnot a reference, returns $obj.
10343003dfeSmillert
10443003dfeSmillertThis function is a stand-in replacement for
105b8851fccSafresh1L<Scalar::Util::refaddr|Scalar::Util/refaddr>,
1066fb12b70Safresh1that is, it returns
10743003dfeSmillertthe reference address of its argument as a numeric value.  The only
10843003dfeSmillertdifference is that C<refaddr()> returns C<undef> when given a
10943003dfeSmillertnon-reference while C<id()> returns its argument unchanged.
11043003dfeSmillert
11143003dfeSmillertC<id()> also uses a caching technique that makes it faster when
11243003dfeSmillertthe id of an object is requested often, but slower if it is needed
11343003dfeSmillertonly once or twice.
11443003dfeSmillert
11543003dfeSmillert=item id_2obj
11643003dfeSmillert
11743003dfeSmillert    $obj = id_2obj($id)
11843003dfeSmillert
11943003dfeSmillertIf C<$id> is the id of a registered object (see L</register>), returns
12043003dfeSmillertthe object, otherwise an undefined value.  For registered objects this
12143003dfeSmillertis the inverse function of C<id()>.
12243003dfeSmillert
12343003dfeSmillert=item register
12443003dfeSmillert
12543003dfeSmillert    register($obj)
12643003dfeSmillert    register($obj, @hashrefs)
12743003dfeSmillert
12843003dfeSmillertIn the first form, registers an object to work with for the function
12943003dfeSmillertC<id_2obj()>.  In the second form, it additionally marks the given
13043003dfeSmillerthashrefs down for garbage collection.  This means that when the object
13143003dfeSmillertgoes out of scope, any entries in the given hashes under the key of
13243003dfeSmillertC<id($obj)> will be deleted from the hashes.
13343003dfeSmillert
13443003dfeSmillertIt is a fatal error to register a non-reference $obj.  Any non-hashrefs
13543003dfeSmillertamong the following arguments are silently ignored.
13643003dfeSmillert
13743003dfeSmillertIt is I<not> an error to register the same object multiple times with
13843003dfeSmillertvarying sets of hashrefs.  Any hashrefs that are not registered yet
13943003dfeSmillertwill be added, others ignored.
14043003dfeSmillert
14143003dfeSmillertRegistry also implies thread support.  When a new thread is created,
14243003dfeSmillertall references are replaced with new ones, including all objects.
14343003dfeSmillertIf a hash uses the reference address of an object as a key, that
14443003dfeSmillertconnection would be broken.  With a registered object, its id will
14543003dfeSmillertbe updated in all hashes registered with it.
14643003dfeSmillert
14743003dfeSmillert=item idhash
14843003dfeSmillert
14943003dfeSmillert    idhash my %hash
15043003dfeSmillert
15143003dfeSmillertMakes an idhash from the argument, which must be a hash.
15243003dfeSmillert
15343003dfeSmillertAn I<idhash> works like a normal hash, except that it stringifies a
15443003dfeSmillertI<reference used as a key> differently.  A reference is stringified
15543003dfeSmillertas if the C<id()> function had been invoked on it, that is, its
15643003dfeSmillertreference address in decimal is used as the key.
15743003dfeSmillert
15843003dfeSmillert=item idhashes
15943003dfeSmillert
16043003dfeSmillert    idhashes \ my(%hash, %gnash, %trash)
16143003dfeSmillert    idhashes \ @hashrefs
16243003dfeSmillert
16343003dfeSmillertCreates many idhashes from its hashref arguments.  Returns those
16443003dfeSmillertarguments that could be converted or their number in scalar context.
16543003dfeSmillert
16643003dfeSmillert=item fieldhash
16743003dfeSmillert
16843003dfeSmillert    fieldhash %hash;
16943003dfeSmillert
17043003dfeSmillertCreates a single fieldhash.  The argument must be a hash.  Returns
17143003dfeSmillerta reference to the given hash if successful, otherwise nothing.
17243003dfeSmillert
17343003dfeSmillertA I<fieldhash> is, in short, an idhash with auto-registry.  When an
17443003dfeSmillertobject (or, indeed, any reference) is used as a fieldhash key, the
17543003dfeSmillertfieldhash is automatically registered for garbage collection with
17643003dfeSmillertthe object, as if C<register $obj, \ %fieldhash> had been called.
17743003dfeSmillert
17843003dfeSmillert=item fieldhashes
17943003dfeSmillert
18043003dfeSmillert    fieldhashes @hashrefs;
18143003dfeSmillert
18243003dfeSmillertCreates any number of field hashes.  Arguments must be hash references.
18343003dfeSmillertReturns the converted hashrefs in list context, their number in scalar
18443003dfeSmillertcontext.
18543003dfeSmillert
18643003dfeSmillert=back
18743003dfeSmillert
18843003dfeSmillert=head1 DESCRIPTION
18943003dfeSmillert
19043003dfeSmillertA word on terminology:  I shall use the term I<field> for a scalar
19143003dfeSmillertpiece of data that a class associates with an object.  Other terms that
19243003dfeSmillerthave been used for this concept are "object variable", "(object) property",
19343003dfeSmillert"(object) attribute" and more.  Especially "attribute" has some currency
19443003dfeSmillertamong Perl programmer, but that clashes with the C<attributes> pragma.  The
19543003dfeSmillertterm "field" also has some currency in this sense and doesn't seem
19643003dfeSmillertto conflict with other Perl terminology.
19743003dfeSmillert
19843003dfeSmillertIn Perl, an object is a blessed reference.  The standard way of associating
19943003dfeSmillertdata with an object is to store the data inside the object's body, that is,
20043003dfeSmillertthe piece of data pointed to by the reference.
20143003dfeSmillert
20243003dfeSmillertIn consequence, if two or more classes want to access an object they
20343003dfeSmillertI<must> agree on the type of reference and also on the organization of
20443003dfeSmillertdata within the object body.  Failure to agree on the type results in
20543003dfeSmillertimmediate death when the wrong method tries to access an object.  Failure
20643003dfeSmillertto agree on data organization may lead to one class trampling over the
20743003dfeSmillertdata of another.
20843003dfeSmillert
20943003dfeSmillertThis object model leads to a tight coupling between subclasses.
21043003dfeSmillertIf one class wants to inherit from another (and both classes access
21143003dfeSmillertobject data), the classes must agree about implementation details.
21243003dfeSmillertInheritance can only be used among classes that are maintained together,
21343003dfeSmillertin a single source or not.
21443003dfeSmillert
21543003dfeSmillertIn particular, it is not possible to write general-purpose classes
21643003dfeSmillertin this technique, classes that can advertise themselves as "Put me
21743003dfeSmillerton your @ISA list and use my methods".  If the other class has different
21843003dfeSmillertideas about how the object body is used, there is trouble.
21943003dfeSmillert
220898184e3SsthenFor reference C<Name_hash> in L</Example 1> shows the standard implementation of
22143003dfeSmillerta simple class C<Name> in the well-known hash based way.  It also demonstrates
22243003dfeSmillertthe predictable failure to construct a common subclass C<NamedFile>
22343003dfeSmillertof C<Name> and the class C<IO::File> (whose objects I<must> be globrefs).
22443003dfeSmillert
22543003dfeSmillertThus, techniques are of interest that store object data I<not> in
22643003dfeSmillertthe object body but some other place.
22743003dfeSmillert
22843003dfeSmillert=head2 The Inside-out Technique
22943003dfeSmillert
23043003dfeSmillertWith I<inside-out> classes, each class declares a (typically lexical)
23143003dfeSmillerthash for each field it wants to use.  The reference address of an
23243003dfeSmillertobject is used as the hash key.  By definition, the reference address
23343003dfeSmillertis unique to each object so this guarantees a place for each field that
234898184e3Ssthenis private to the class and unique to each object.  See C<Name_id>
235898184e3Ssthenin L</Example 1> for a simple example.
23643003dfeSmillert
23743003dfeSmillertIn comparison to the standard implementation where the object is a
23843003dfeSmillerthash and the fields correspond to hash keys, here the fields correspond
23943003dfeSmillertto hashes, and the object determines the hash key.  Thus the hashes
24043003dfeSmillertappear to be turned I<inside out>.
24143003dfeSmillert
24243003dfeSmillertThe body of an object is never examined by an inside-out class, only
24343003dfeSmillertits reference address is used.  This allows for the body of an actual
24443003dfeSmillertobject to be I<anything at all> while the object methods of the class
24543003dfeSmillertstill work as designed.  This is a key feature of inside-out classes.
24643003dfeSmillert
24743003dfeSmillert=head2 Problems of Inside-out
24843003dfeSmillert
24943003dfeSmillertInside-out classes give us freedom of inheritance, but as usual there
25043003dfeSmillertis a price.
25143003dfeSmillert
25243003dfeSmillertMost obviously, there is the necessity of retrieving the reference
25343003dfeSmillertaddress of an object for each data access.  It's a minor inconvenience,
25443003dfeSmillertbut it does clutter the code.
25543003dfeSmillert
25643003dfeSmillertMore important (and less obvious) is the necessity of garbage
25743003dfeSmillertcollection.  When a normal object dies, anything stored in the
25843003dfeSmillertobject body is garbage-collected by perl.  With inside-out objects,
25943003dfeSmillertPerl knows nothing about the data stored in field hashes by a class,
26043003dfeSmillertbut these must be deleted when the object goes out of scope.  Thus
26143003dfeSmillertthe class must provide a C<DESTROY> method to take care of that.
26243003dfeSmillert
26343003dfeSmillertIn the presence of multiple classes it can be non-trivial
26443003dfeSmillertto make sure that every relevant destructor is called for
26543003dfeSmillertevery object.  Perl calls the first one it finds on the
26643003dfeSmillertinheritance tree (if any) and that's it.
26743003dfeSmillert
26843003dfeSmillertA related issue is thread-safety.  When a new thread is created,
26943003dfeSmillertthe Perl interpreter is cloned, which implies that all reference
27043003dfeSmillertaddresses in use will be replaced with new ones.  Thus, if a class
27143003dfeSmillerttries to access a field of a cloned object its (cloned) data will
27243003dfeSmillertstill be stored under the now invalid reference address of the
27343003dfeSmillertoriginal in the parent thread.  A general C<CLONE> method must
27443003dfeSmillertbe provided to re-establish the association.
27543003dfeSmillert
27643003dfeSmillert=head2 Solutions
27743003dfeSmillert
27843003dfeSmillertC<Hash::Util::FieldHash> addresses these issues on several
27943003dfeSmillertlevels.
28043003dfeSmillert
28143003dfeSmillertThe C<id()> function is provided in addition to the
28243003dfeSmillertexisting C<Scalar::Util::refaddr()>.  Besides its short name
28343003dfeSmillertit can be a little faster under some circumstances (and a
28443003dfeSmillertbit slower under others).  Benchmark if it matters.  The
28543003dfeSmillertworking of C<id()> also allows the use of the class name
28643003dfeSmillertas a I<generic object> as described L<further down|/"The Generic Object">.
28743003dfeSmillert
28843003dfeSmillertThe C<id()> function is incorporated in I<id hashes> in the sense
28943003dfeSmillertthat it is called automatically on every key that is used with
29043003dfeSmillertthe hash.  No explicit call is necessary.
29143003dfeSmillert
29243003dfeSmillertThe problems of garbage collection and thread safety are both
29343003dfeSmillertaddressed by the function C<register()>.  It registers an object
29443003dfeSmillerttogether with any number of hashes.  Registry means that when the
29543003dfeSmillertobject dies, an entry in any of the hashes under the reference
29643003dfeSmillertaddress of this object will be deleted.  This guarantees garbage
29743003dfeSmillertcollection in these hashes.  It also means that on thread
29843003dfeSmillertcloning the object's entries in registered hashes will be
29943003dfeSmillertreplaced with updated entries whose key is the cloned object's
30043003dfeSmillertreference address.  Thus the object-data association becomes
30143003dfeSmillertthread-safe.
30243003dfeSmillert
30343003dfeSmillertObject registry is best done when the object is initialized
30443003dfeSmillertfor use with a class.  That way, garbage collection and thread
30543003dfeSmillertsafety are established for every object and every field that is
30643003dfeSmillertinitialized.
30743003dfeSmillert
30843003dfeSmillertFinally, I<field hashes> incorporate all these functions in one
30943003dfeSmillertpackage.  Besides automatically calling the C<id()> function
31043003dfeSmillerton every object used as a key, the object is registered with
31143003dfeSmillertthe field hash on first use.  Classes based on field hashes
31243003dfeSmillertare fully garbage-collected and thread safe without further
31343003dfeSmillertmeasures.
31443003dfeSmillert
31543003dfeSmillert=head2 More Problems
31643003dfeSmillert
31743003dfeSmillertAnother problem that occurs with inside-out classes is serialization.
31843003dfeSmillertSince the object data is not in its usual place, standard routines
31943003dfeSmillertlike C<Storable::freeze()>, C<Storable::thaw()> and
32043003dfeSmillertC<Data::Dumper::Dumper()> can't deal with it on their own.  Both
32143003dfeSmillertC<Data::Dumper> and C<Storable> provide the necessary hooks to
32243003dfeSmillertmake things work, but the functions or methods used by the hooks
32343003dfeSmillertmust be provided by each inside-out class.
32443003dfeSmillert
32543003dfeSmillertA general solution to the serialization problem would require another
3266fb12b70Safresh1level of registry, one that associates I<classes> and fields.
32743003dfeSmillertSo far, the functions of C<Hash::Util::FieldHash> are unaware of
32843003dfeSmillertany classes, which I consider a feature.  Therefore C<Hash::Util::FieldHash>
32943003dfeSmillertdoesn't address the serialization problems.
33043003dfeSmillert
33143003dfeSmillert=head2 The Generic Object
33243003dfeSmillert
33343003dfeSmillertClasses based on the C<id()> function (and hence classes based on
33443003dfeSmillertC<idhash()> and C<fieldhash()>) show a peculiar behavior in that
33543003dfeSmillertthe class name can be used like an object.  Specifically, methods
33643003dfeSmillertthat set or read data associated with an object continue to work as
33743003dfeSmillertclass methods, just as if the class name were an object, distinct from
33843003dfeSmillertall other objects, with its own data.  This object may be called
33943003dfeSmillertthe I<generic object> of the class.
34043003dfeSmillert
34143003dfeSmillertThis works because field hashes respond to keys that are not references
34243003dfeSmillertlike a normal hash would and use the string offered as the hash key.
34343003dfeSmillertThus, if a method is called as a class method, the field hash is presented
34443003dfeSmillertwith the class name instead of an object and blithely uses it as a key.
34543003dfeSmillertSince the keys of real objects are decimal numbers, there is no
34643003dfeSmillertconflict and the slot in the field hash can be used like any other.
34743003dfeSmillertThe C<id()> function behaves correspondingly with respect to non-reference
34843003dfeSmillertarguments.
34943003dfeSmillert
35043003dfeSmillertTwo possible uses (besides ignoring the property) come to mind.
35143003dfeSmillertA singleton class could be implemented this using the generic object.
35243003dfeSmillertIf necessary, an C<init()> method could die or ignore calls with
35343003dfeSmillertactual objects (references), so only the generic object will ever exist.
35443003dfeSmillert
35543003dfeSmillertAnother use of the generic object would be as a template.  It is
35643003dfeSmillerta convenient place to store class-specific defaults for various
35743003dfeSmillertfields to be used in actual object initialization.
35843003dfeSmillert
35943003dfeSmillertUsually, the feature can be entirely ignored.  Calling I<object
36043003dfeSmillertmethods> as I<class methods> normally leads to an error and isn't used
36143003dfeSmillertroutinely anywhere.  It may be a problem that this error isn't
36243003dfeSmillertindicated by a class with a generic object.
36343003dfeSmillert
36443003dfeSmillert=head2 How to use Field Hashes
36543003dfeSmillert
36643003dfeSmillertTraditionally, the definition of an inside-out class contains a bare
36743003dfeSmillertblock inside which a number of lexical hashes are declared and the
36843003dfeSmillertbasic accessor methods defined, usually through C<Scalar::Util::refaddr>.
36943003dfeSmillertFurther methods may be defined outside this block.  There has to be
37043003dfeSmillerta DESTROY method and, for thread support, a CLONE method.
37143003dfeSmillert
37243003dfeSmillertWhen field hashes are used, the basic structure remains the same.
37343003dfeSmillertEach lexical hash will be made a field hash.  The call to C<refaddr>
37443003dfeSmillertcan be omitted from the accessor methods.  DESTROY and CLONE methods
37543003dfeSmillertare not necessary.
37643003dfeSmillert
37743003dfeSmillertIf you have an existing inside-out class, simply making all hashes
37843003dfeSmillertfield hashes with no other change should make no difference.  Through
37943003dfeSmillertthe calls to C<refaddr> or equivalent, the field hashes never get to
38043003dfeSmillertsee a reference and work like normal hashes.  Your DESTROY (and
38143003dfeSmillertCLONE) methods are still needed.
38243003dfeSmillert
38343003dfeSmillertTo make the field hashes kick in, it is easiest to redefine C<refaddr>
38443003dfeSmillertas
38543003dfeSmillert
38643003dfeSmillert    sub refaddr { shift }
38743003dfeSmillert
38843003dfeSmillertinstead of importing it from C<Scalar::Util>.  It should now be possible
38943003dfeSmillertto disable DESTROY and CLONE.  Note that while it isn't disabled,
39043003dfeSmillertDESTROY will be called before the garbage collection of field hashes,
39143003dfeSmillertso it will be invoked with a functional object and will continue to
39243003dfeSmillertfunction.
39343003dfeSmillert
39443003dfeSmillertIt is not desirable to import the functions C<fieldhash> and/or
39543003dfeSmillertC<fieldhashes> into every class that is going to use them.  They
39643003dfeSmillertare only used once to set up the class.  When the class is up and running,
39743003dfeSmillertthese functions serve no more purpose.
39843003dfeSmillert
39943003dfeSmillertIf there are only a few field hashes to declare, it is simplest to
40043003dfeSmillert
40143003dfeSmillert    use Hash::Util::FieldHash;
40243003dfeSmillert
40343003dfeSmillertearly and call the functions qualified:
40443003dfeSmillert
40543003dfeSmillert    Hash::Util::FieldHash::fieldhash my %foo;
40643003dfeSmillert
40743003dfeSmillertOtherwise, import the functions into a convenient package like
40843003dfeSmillertC<HUF> or, more general, C<Aux>
40943003dfeSmillert
41043003dfeSmillert    {
41143003dfeSmillert        package Aux;
41243003dfeSmillert        use Hash::Util::FieldHash ':all';
41343003dfeSmillert    }
41443003dfeSmillert
41543003dfeSmillertand call
41643003dfeSmillert
41743003dfeSmillert    Aux::fieldhash my %foo;
41843003dfeSmillert
41943003dfeSmillertas needed.
42043003dfeSmillert
42143003dfeSmillert=head2 Garbage-Collected Hashes
42243003dfeSmillert
42343003dfeSmillertGarbage collection in a field hash means that entries will "spontaneously"
42443003dfeSmillertdisappear when the object that created them disappears.  That must be
42543003dfeSmillertborne in mind, especially when looping over a field hash.  If anything
42643003dfeSmillertyou do inside the loop could cause an object to go out of scope, a
42743003dfeSmillertrandom key may be deleted from the hash you are looping over.  That
42843003dfeSmillertcan throw the loop iterator, so it's best to cache a consistent snapshot
42943003dfeSmillertof the keys and/or values and loop over that.  You will still have to
43043003dfeSmillertcheck that a cached entry still exists when you get to it.
43143003dfeSmillert
43243003dfeSmillertGarbage collection can be confusing when keys are created in a field hash
43343003dfeSmillertfrom normal scalars as well as references.  Once a reference is I<used> with
43443003dfeSmillerta field hash, the entry will be collected, even if it was later overwritten
43543003dfeSmillertwith a plain scalar key (every positive integer is a candidate).  This
43643003dfeSmillertis true even if the original entry was deleted in the meantime.  In fact,
43743003dfeSmillertdeletion from a field hash, and also a test for existence constitute
43843003dfeSmillertI<use> in this sense and create a liability to delete the entry when
43943003dfeSmillertthe reference goes out of scope.  If you happen to create an entry
44043003dfeSmillertwith an identical key from a string or integer, that will be collected
44143003dfeSmillertinstead.  Thus, mixed use of references and plain scalars as field hash
44243003dfeSmillertkeys is not entirely supported.
44343003dfeSmillert
44443003dfeSmillert=head1 EXAMPLES
44543003dfeSmillert
44643003dfeSmillertThe examples show a very simple class that implements a I<name>, consisting
44743003dfeSmillertof a first and last name (no middle initial).  The name class has four
44843003dfeSmillertmethods:
44943003dfeSmillert
45043003dfeSmillert=over
45143003dfeSmillert
45243003dfeSmillert=item * C<init()>
45343003dfeSmillert
45443003dfeSmillertAn object method that initializes the first and last name to its
45543003dfeSmillerttwo arguments. If called as a class method, C<init()> creates an
45643003dfeSmillertobject in the given class and initializes that.
45743003dfeSmillert
45843003dfeSmillert=item * C<first()>
45943003dfeSmillert
46043003dfeSmillertRetrieve the first name
46143003dfeSmillert
46243003dfeSmillert=item * C<last()>
46343003dfeSmillert
46443003dfeSmillertRetrieve the last name
46543003dfeSmillert
46643003dfeSmillert=item * C<name()>
46743003dfeSmillert
46843003dfeSmillertRetrieve the full name, the first and last name joined by a blank.
46943003dfeSmillert
47043003dfeSmillert=back
47143003dfeSmillert
47243003dfeSmillertThe examples show this class implemented with different levels of
47343003dfeSmillertsupport by C<Hash::Util::FieldHash>.  All supported combinations
47443003dfeSmillertare shown.  The difference between implementations is often quite
47543003dfeSmillertsmall.  The implementations are:
47643003dfeSmillert
47743003dfeSmillert=over
47843003dfeSmillert
47943003dfeSmillert=item * C<Name_hash>
48043003dfeSmillert
48143003dfeSmillertA conventional (not inside-out) implementation where an object is
48243003dfeSmillerta hash that stores the field values, without support by
48343003dfeSmillertC<Hash::Util::FieldHash>.  This implementation doesn't allow
48443003dfeSmillertarbitrary inheritance.
48543003dfeSmillert
48643003dfeSmillert=item * C<Name_id>
48743003dfeSmillert
48843003dfeSmillertInside-out implementation based on the C<id()> function.  It needs
48943003dfeSmillerta C<DESTROY> method.  For thread support a C<CLONE> method (not shown)
49043003dfeSmillertwould also be needed.  Instead of C<Hash::Util::FieldHash::id()> the
49143003dfeSmillertfunction C<Scalar::Util::refaddr> could be used with very little
49243003dfeSmillertfunctional difference.  This is the basic pattern of an inside-out
49343003dfeSmillertclass.
49443003dfeSmillert
49543003dfeSmillert=item * C<Name_idhash>
49643003dfeSmillert
497898184e3SsthenIdhash-based inside-out implementation.  Like C<Name_id> it needs
49843003dfeSmillerta C<DESTROY> method and would need C<CLONE> for thread support.
49943003dfeSmillert
50043003dfeSmillert=item * C<Name_id_reg>
50143003dfeSmillert
50243003dfeSmillertInside-out implementation based on the C<id()> function with explicit
50343003dfeSmillertobject registry.  No destructor is needed and objects are thread safe.
50443003dfeSmillert
50543003dfeSmillert=item * C<Name_idhash_reg>
50643003dfeSmillert
50743003dfeSmillertIdhash-based inside-out implementation with explicit object registry.
50843003dfeSmillertNo destructor is needed and objects are thread safe.
50943003dfeSmillert
51043003dfeSmillert=item * C<Name_fieldhash>
51143003dfeSmillert
51243003dfeSmillertFieldHash-based inside-out implementation.  Object registry happens
51343003dfeSmillertautomatically.  No destructor is needed and objects are thread safe.
51443003dfeSmillert
51543003dfeSmillert=back
51643003dfeSmillert
51743003dfeSmillertThese examples are realized in the code below, which could be copied
51843003dfeSmillertto a file F<Example.pm>.
51943003dfeSmillert
52043003dfeSmillert=head2 Example 1
52143003dfeSmillert
52243003dfeSmillert    use strict; use warnings;
52343003dfeSmillert
52443003dfeSmillert    {
5256fb12b70Safresh1        package Name_hash;  # standard implementation: the
5266fb12b70Safresh1                            # object is a hash
52743003dfeSmillert        sub init {
52843003dfeSmillert            my $obj = shift;
52943003dfeSmillert            my ($first, $last) = @_;
53043003dfeSmillert            # create an object if called as class method
53143003dfeSmillert            $obj = bless {}, $obj unless ref $obj;
53243003dfeSmillert            $obj->{ first} = $first;
53343003dfeSmillert            $obj->{ last} = $last;
53443003dfeSmillert            $obj;
53543003dfeSmillert        }
53643003dfeSmillert
53743003dfeSmillert        sub first { shift()->{ first} }
53843003dfeSmillert        sub last { shift()->{ last} }
53943003dfeSmillert
54043003dfeSmillert        sub name {
54143003dfeSmillert            my $n = shift;
54243003dfeSmillert            join ' ' => $n->first, $n->last;
54343003dfeSmillert        }
54443003dfeSmillert
54543003dfeSmillert    }
54643003dfeSmillert
54743003dfeSmillert    {
54843003dfeSmillert        package Name_id;
54943003dfeSmillert        use Hash::Util::FieldHash qw(id);
55043003dfeSmillert
55143003dfeSmillert        my (%first, %last);
55243003dfeSmillert
55343003dfeSmillert        sub init {
55443003dfeSmillert            my $obj = shift;
55543003dfeSmillert            my ($first, $last) = @_;
55643003dfeSmillert            # create an object if called as class method
55743003dfeSmillert            $obj = bless \ my $o, $obj unless ref $obj;
55843003dfeSmillert            $first{ id $obj} = $first;
55943003dfeSmillert            $last{ id $obj} = $last;
56043003dfeSmillert            $obj;
56143003dfeSmillert        }
56243003dfeSmillert
56343003dfeSmillert        sub first { $first{ id shift()} }
56443003dfeSmillert        sub last { $last{ id shift()} }
56543003dfeSmillert
56643003dfeSmillert        sub name {
56743003dfeSmillert            my $n = shift;
56843003dfeSmillert            join ' ' => $n->first, $n->last;
56943003dfeSmillert        }
57043003dfeSmillert
57143003dfeSmillert        sub DESTROY {
57243003dfeSmillert            my $id = id shift;
57343003dfeSmillert            delete $first{ $id};
57443003dfeSmillert            delete $last{ $id};
57543003dfeSmillert        }
57643003dfeSmillert
57743003dfeSmillert    }
57843003dfeSmillert
57943003dfeSmillert    {
58043003dfeSmillert        package Name_idhash;
58143003dfeSmillert        use Hash::Util::FieldHash;
58243003dfeSmillert
58343003dfeSmillert        Hash::Util::FieldHash::idhashes( \ my (%first, %last) );
58443003dfeSmillert
58543003dfeSmillert        sub init {
58643003dfeSmillert            my $obj = shift;
58743003dfeSmillert            my ($first, $last) = @_;
58843003dfeSmillert            # create an object if called as class method
58943003dfeSmillert            $obj = bless \ my $o, $obj unless ref $obj;
59043003dfeSmillert            $first{ $obj} = $first;
59143003dfeSmillert            $last{ $obj} = $last;
59243003dfeSmillert            $obj;
59343003dfeSmillert        }
59443003dfeSmillert
59543003dfeSmillert        sub first { $first{ shift()} }
59643003dfeSmillert        sub last { $last{ shift()} }
59743003dfeSmillert
59843003dfeSmillert        sub name {
59943003dfeSmillert            my $n = shift;
60043003dfeSmillert            join ' ' => $n->first, $n->last;
60143003dfeSmillert        }
60243003dfeSmillert
60343003dfeSmillert        sub DESTROY {
60443003dfeSmillert            my $n = shift;
60543003dfeSmillert            delete $first{ $n};
60643003dfeSmillert            delete $last{ $n};
60743003dfeSmillert        }
60843003dfeSmillert
60943003dfeSmillert    }
61043003dfeSmillert
61143003dfeSmillert    {
61243003dfeSmillert        package Name_id_reg;
61343003dfeSmillert        use Hash::Util::FieldHash qw(id register);
61443003dfeSmillert
61543003dfeSmillert        my (%first, %last);
61643003dfeSmillert
61743003dfeSmillert        sub init {
61843003dfeSmillert            my $obj = shift;
61943003dfeSmillert            my ($first, $last) = @_;
62043003dfeSmillert            # create an object if called as class method
62143003dfeSmillert            $obj = bless \ my $o, $obj unless ref $obj;
62243003dfeSmillert            register( $obj, \ (%first, %last) );
62343003dfeSmillert            $first{ id $obj} = $first;
62443003dfeSmillert            $last{ id $obj} = $last;
62543003dfeSmillert            $obj;
62643003dfeSmillert        }
62743003dfeSmillert
62843003dfeSmillert        sub first { $first{ id shift()} }
62943003dfeSmillert        sub last { $last{ id shift()} }
63043003dfeSmillert
63143003dfeSmillert        sub name {
63243003dfeSmillert            my $n = shift;
63343003dfeSmillert            join ' ' => $n->first, $n->last;
63443003dfeSmillert        }
63543003dfeSmillert    }
63643003dfeSmillert
63743003dfeSmillert    {
63843003dfeSmillert        package Name_idhash_reg;
63943003dfeSmillert        use Hash::Util::FieldHash qw(register);
64043003dfeSmillert
64143003dfeSmillert        Hash::Util::FieldHash::idhashes \ my (%first, %last);
64243003dfeSmillert
64343003dfeSmillert        sub init {
64443003dfeSmillert            my $obj = shift;
64543003dfeSmillert            my ($first, $last) = @_;
64643003dfeSmillert            # create an object if called as class method
64743003dfeSmillert            $obj = bless \ my $o, $obj unless ref $obj;
64843003dfeSmillert            register( $obj, \ (%first, %last) );
64943003dfeSmillert            $first{ $obj} = $first;
65043003dfeSmillert            $last{ $obj} = $last;
65143003dfeSmillert            $obj;
65243003dfeSmillert        }
65343003dfeSmillert
65443003dfeSmillert        sub first { $first{ shift()} }
65543003dfeSmillert        sub last { $last{ shift()} }
65643003dfeSmillert
65743003dfeSmillert        sub name {
65843003dfeSmillert            my $n = shift;
65943003dfeSmillert            join ' ' => $n->first, $n->last;
66043003dfeSmillert        }
66143003dfeSmillert    }
66243003dfeSmillert
66343003dfeSmillert    {
66443003dfeSmillert        package Name_fieldhash;
66543003dfeSmillert        use Hash::Util::FieldHash;
66643003dfeSmillert
66743003dfeSmillert        Hash::Util::FieldHash::fieldhashes \ my (%first, %last);
66843003dfeSmillert
66943003dfeSmillert        sub init {
67043003dfeSmillert            my $obj = shift;
67143003dfeSmillert            my ($first, $last) = @_;
67243003dfeSmillert            # create an object if called as class method
67343003dfeSmillert            $obj = bless \ my $o, $obj unless ref $obj;
67443003dfeSmillert            $first{ $obj} = $first;
67543003dfeSmillert            $last{ $obj} = $last;
67643003dfeSmillert            $obj;
67743003dfeSmillert        }
67843003dfeSmillert
67943003dfeSmillert        sub first { $first{ shift()} }
68043003dfeSmillert        sub last { $last{ shift()} }
68143003dfeSmillert
68243003dfeSmillert        sub name {
68343003dfeSmillert            my $n = shift;
68443003dfeSmillert            join ' ' => $n->first, $n->last;
68543003dfeSmillert        }
68643003dfeSmillert    }
68743003dfeSmillert
68843003dfeSmillert    1;
68943003dfeSmillert
69043003dfeSmillertTo exercise the various implementations the script L<below|/"Example 2"> can
69143003dfeSmillertbe used.
69243003dfeSmillert
69343003dfeSmillertIt sets up a class C<Name> that is a mirror of one of the implementation
69443003dfeSmillertclasses C<Name_hash>, C<Name_id>, ..., C<Name_fieldhash>.  That determines
69543003dfeSmillertwhich implementation is run.
69643003dfeSmillert
69743003dfeSmillertThe script first verifies the function of the C<Name> class.
69843003dfeSmillert
69943003dfeSmillertIn the second step, the free inheritability of the implementation
70043003dfeSmillert(or lack thereof) is demonstrated.  For this purpose it constructs
70143003dfeSmillerta class called C<NamedFile> which is a common subclass of C<Name> and
70243003dfeSmillertthe standard class C<IO::File>.  This puts inheritability to the test
70343003dfeSmillertbecause objects of C<IO::File> I<must> be globrefs.  Objects of C<NamedFile>
70443003dfeSmillertshould behave like a file opened for reading and also support the C<name()>
70543003dfeSmillertmethod.  This class juncture works with exception of the C<Name_hash>
70643003dfeSmillertimplementation, where object initialization fails because of the
70743003dfeSmillertincompatibility of object bodies.
70843003dfeSmillert
70943003dfeSmillert=head2 Example 2
71043003dfeSmillert
71143003dfeSmillert    use strict; use warnings; $| = 1;
71243003dfeSmillert
71343003dfeSmillert    use Example;
71443003dfeSmillert
71543003dfeSmillert    {
71643003dfeSmillert        package Name;
7176fb12b70Safresh1        use parent 'Name_id';  # define here which implementation to run
71843003dfeSmillert    }
71943003dfeSmillert
72043003dfeSmillert
72143003dfeSmillert    # Verify that the base package works
72243003dfeSmillert    my $n = Name->init(qw(Albert Einstein));
72343003dfeSmillert    print $n->name, "\n";
72443003dfeSmillert    print "\n";
72543003dfeSmillert
72643003dfeSmillert    # Create a named file handle (See definition below)
72743003dfeSmillert    my $nf = NamedFile->init(qw(/tmp/x Filomena File));
72843003dfeSmillert    # use as a file handle...
72943003dfeSmillert    for ( 1 .. 3 ) {
73043003dfeSmillert        my $l = <$nf>;
73143003dfeSmillert        print "line $_: $l";
73243003dfeSmillert    }
73343003dfeSmillert    # ...and as a Name object
73443003dfeSmillert    print "...brought to you by ", $nf->name, "\n";
73543003dfeSmillert    exit;
73643003dfeSmillert
73743003dfeSmillert
73843003dfeSmillert    # Definition of NamedFile
73943003dfeSmillert    package NamedFile;
7406fb12b70Safresh1    use parent 'Name';
7416fb12b70Safresh1    use parent 'IO::File';
74243003dfeSmillert
74343003dfeSmillert    sub init {
74443003dfeSmillert        my $obj = shift;
74543003dfeSmillert        my ($file, $first, $last) = @_;
74643003dfeSmillert        $obj = $obj->IO::File::new() unless ref $obj;
74743003dfeSmillert        $obj->open($file) or die "Can't read '$file': $!";
74843003dfeSmillert        $obj->Name::init($first, $last);
74943003dfeSmillert    }
75043003dfeSmillert    __END__
75143003dfeSmillert
75243003dfeSmillert
75343003dfeSmillert=head1 GUTS
75443003dfeSmillert
75543003dfeSmillertTo make C<Hash::Util::FieldHash> work, there were two changes to
756898184e3SsthenF<perl> itself.  C<PERL_MAGIC_uvar> was made available for hashes,
75743003dfeSmillertand weak references now call uvar C<get> magic after a weakref has been
75843003dfeSmillertcleared.  The first feature is used to make field hashes intercept
75943003dfeSmillerttheir keys upon access.  The second one triggers garbage collection.
76043003dfeSmillert
76143003dfeSmillert=head2 The C<PERL_MAGIC_uvar> interface for hashes
76243003dfeSmillert
76343003dfeSmillertC<PERL_MAGIC_uvar> I<get> magic is called from C<hv_fetch_common> and
76443003dfeSmillertC<hv_delete_common> through the function C<hv_magic_uvar_xkey>, which
76543003dfeSmillertdefines the interface.  The call happens for hashes with "uvar" magic
76643003dfeSmillertif the C<ufuncs> structure has equal values in the C<uf_val> and C<uf_set>
76743003dfeSmillertfields.  Hashes are unaffected if (and as long as) these fields
76843003dfeSmillerthold different values.
76943003dfeSmillert
77043003dfeSmillertUpon the call, the C<mg_obj> field will hold the hash key to be accessed.
77143003dfeSmillertUpon return, the C<SV*> value in C<mg_obj> will be used in place of the
77243003dfeSmillertoriginal key in the hash access.  The integer index value in the first
77343003dfeSmillertparameter will be the C<action> value from C<hv_fetch_common>, or -1
77443003dfeSmillertif the call is from C<hv_delete_common>.
77543003dfeSmillert
77643003dfeSmillertThis is a template for a function suitable for the C<uf_val> field in
77743003dfeSmillerta C<ufuncs> structure for this call.  The C<uf_set> and C<uf_index>
77843003dfeSmillertfields are irrelevant.
77943003dfeSmillert
78043003dfeSmillert    IV watch_key(pTHX_ IV action, SV* field) {
78143003dfeSmillert        MAGIC* mg = mg_find(field, PERL_MAGIC_uvar);
78243003dfeSmillert        SV* keysv = mg->mg_obj;
78343003dfeSmillert        /* Do whatever you need to.  If you decide to
78443003dfeSmillert           supply a different key newkey, return it like this
78543003dfeSmillert        */
78643003dfeSmillert        sv_2mortal(newkey);
78743003dfeSmillert        mg->mg_obj = newkey;
78843003dfeSmillert        return 0;
78943003dfeSmillert    }
79043003dfeSmillert
79143003dfeSmillert=head2 Weakrefs call uvar magic
79243003dfeSmillert
79343003dfeSmillertWhen a weak reference is stored in an C<SV> that has "uvar" magic, C<set>
79443003dfeSmillertmagic is called after the reference has gone stale.  This hook can be
79543003dfeSmillertused to trigger further garbage-collection activities associated with
79643003dfeSmillertthe referenced object.
79743003dfeSmillert
79843003dfeSmillert=head2 How field hashes work
79943003dfeSmillert
80043003dfeSmillertThe three features of key hashes, I<key replacement>, I<thread support>,
80143003dfeSmillertand I<garbage collection> are supported by a data structure called
80243003dfeSmillertthe I<object registry>.  This is a private hash where every object
80343003dfeSmillertis stored.  An "object" in this sense is any reference (blessed or
80443003dfeSmillertunblessed) that has been used as a field hash key.
80543003dfeSmillert
80643003dfeSmillertThe object registry keeps track of references that have been used as
80743003dfeSmillertfield hash keys.  The keys are generated from the reference address
80843003dfeSmillertlike in a field hash (though the registry isn't a field hash).  Each
80943003dfeSmillertvalue is a weak copy of the original reference, stored in an C<SV> that
81043003dfeSmillertis itself magical (C<PERL_MAGIC_uvar> again).  The magical structure
81143003dfeSmillertholds a list (another hash, really) of field hashes that the reference
81243003dfeSmillerthas been used with.  When the weakref becomes stale, the magic is
81343003dfeSmillertactivated and uses the list to delete the reference from all field
81443003dfeSmillerthashes it has been used with.  After that, the entry is removed from
81543003dfeSmillertthe object registry itself.  Implicitly, that frees the magic structure
81643003dfeSmillertand the storage it has been using.
81743003dfeSmillert
81843003dfeSmillertWhenever a reference is used as a field hash key, the object registry
81943003dfeSmillertis checked and a new entry is made if necessary.  The field hash is
82043003dfeSmillertthen added to the list of fields this reference has used.
82143003dfeSmillert
82243003dfeSmillertThe object registry is also used to repair a field hash after thread
82343003dfeSmillertcloning.  Here, the entire object registry is processed.  For every
82443003dfeSmillertreference found there, the field hashes it has used are visited and
82543003dfeSmillertthe entry is updated.
82643003dfeSmillert
82743003dfeSmillert=head2 Internal function Hash::Util::FieldHash::_fieldhash
82843003dfeSmillert
82943003dfeSmillert    # test if %hash is a field hash
83043003dfeSmillert    my $result = _fieldhash \ %hash, 0;
83143003dfeSmillert
83243003dfeSmillert    # make %hash a field hash
83343003dfeSmillert    my $result = _fieldhash \ %hash, 1;
83443003dfeSmillert
83543003dfeSmillertC<_fieldhash> is the internal function used to create field hashes.
83643003dfeSmillertIt takes two arguments, a hashref and a mode.  If the mode is boolean
83743003dfeSmillertfalse, the hash is not changed but tested if it is a field hash.  If
83843003dfeSmillertthe hash isn't a field hash the return value is boolean false.  If it
83943003dfeSmillertis, the return value indicates the mode of field hash.  When called with
84043003dfeSmillerta boolean true mode, it turns the given hash into a field hash of this
84143003dfeSmillertmode, returning the mode of the created field hash.  C<_fieldhash>
84243003dfeSmillertdoes not erase the given hash.
84343003dfeSmillert
84443003dfeSmillertCurrently there is only one type of field hash, and only the boolean
84543003dfeSmillertvalue of the mode makes a difference, but that may change.
84643003dfeSmillert
84743003dfeSmillert=head1 AUTHOR
84843003dfeSmillert
84943003dfeSmillertAnno Siegel (ANNO) wrote the xs code and the changes in perl proper
85043003dfeSmillertJerry Hedden (JDHEDDEN) made it faster
85143003dfeSmillert
85243003dfeSmillert=head1 COPYRIGHT AND LICENSE
85343003dfeSmillert
85443003dfeSmillertCopyright (C) 2006-2007 by (Anno Siegel)
85543003dfeSmillert
85643003dfeSmillertThis library is free software; you can redistribute it and/or modify
85743003dfeSmillertit under the same terms as Perl itself, either Perl version 5.8.7 or,
85843003dfeSmillertat your option, any later version of Perl 5 you may have available.
85943003dfeSmillert
86043003dfeSmillert=cut
861