1*0Sstevel@tonic-gate# -*- mode: perl; perl-indent-level: 2; -*- 2*0Sstevel@tonic-gate# Memoize.pm 3*0Sstevel@tonic-gate# 4*0Sstevel@tonic-gate# Transparent memoization of idempotent functions 5*0Sstevel@tonic-gate# 6*0Sstevel@tonic-gate# Copyright 1998, 1999, 2000, 2001 M-J. Dominus. 7*0Sstevel@tonic-gate# You may copy and distribute this program under the 8*0Sstevel@tonic-gate# same terms as Perl itself. If in doubt, 9*0Sstevel@tonic-gate# write to mjd-perl-memoize+@plover.com for a license. 10*0Sstevel@tonic-gate# 11*0Sstevel@tonic-gate# Version 1.01 $Revision: 1.18 $ $Date: 2001/06/24 17:16:47 $ 12*0Sstevel@tonic-gate 13*0Sstevel@tonic-gatepackage Memoize; 14*0Sstevel@tonic-gate$VERSION = '1.01'; 15*0Sstevel@tonic-gate 16*0Sstevel@tonic-gate# Compile-time constants 17*0Sstevel@tonic-gatesub SCALAR () { 0 } 18*0Sstevel@tonic-gatesub LIST () { 1 } 19*0Sstevel@tonic-gate 20*0Sstevel@tonic-gate 21*0Sstevel@tonic-gate# 22*0Sstevel@tonic-gate# Usage memoize(functionname/ref, 23*0Sstevel@tonic-gate# { NORMALIZER => coderef, INSTALL => name, 24*0Sstevel@tonic-gate# LIST_CACHE => descriptor, SCALAR_CACHE => descriptor } 25*0Sstevel@tonic-gate# 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gateuse Carp; 28*0Sstevel@tonic-gateuse Exporter; 29*0Sstevel@tonic-gateuse vars qw($DEBUG); 30*0Sstevel@tonic-gateuse Config; # Dammit. 31*0Sstevel@tonic-gate@ISA = qw(Exporter); 32*0Sstevel@tonic-gate@EXPORT = qw(memoize); 33*0Sstevel@tonic-gate@EXPORT_OK = qw(unmemoize flush_cache); 34*0Sstevel@tonic-gateuse strict; 35*0Sstevel@tonic-gate 36*0Sstevel@tonic-gatemy %memotable; 37*0Sstevel@tonic-gatemy %revmemotable; 38*0Sstevel@tonic-gatemy @CONTEXT_TAGS = qw(MERGE TIE MEMORY FAULT HASH); 39*0Sstevel@tonic-gatemy %IS_CACHE_TAG = map {($_ => 1)} @CONTEXT_TAGS; 40*0Sstevel@tonic-gate 41*0Sstevel@tonic-gate# Raise an error if the user tries to specify one of thesepackage as a 42*0Sstevel@tonic-gate# tie for LIST_CACHE 43*0Sstevel@tonic-gate 44*0Sstevel@tonic-gatemy %scalar_only = map {($_ => 1)} qw(DB_File GDBM_File SDBM_File ODBM_File NDBM_File); 45*0Sstevel@tonic-gate 46*0Sstevel@tonic-gatesub memoize { 47*0Sstevel@tonic-gate my $fn = shift; 48*0Sstevel@tonic-gate my %options = @_; 49*0Sstevel@tonic-gate my $options = \%options; 50*0Sstevel@tonic-gate 51*0Sstevel@tonic-gate unless (defined($fn) && 52*0Sstevel@tonic-gate (ref $fn eq 'CODE' || ref $fn eq '')) { 53*0Sstevel@tonic-gate croak "Usage: memoize 'functionname'|coderef {OPTIONS}"; 54*0Sstevel@tonic-gate } 55*0Sstevel@tonic-gate 56*0Sstevel@tonic-gate my $uppack = caller; # TCL me Elmo! 57*0Sstevel@tonic-gate my $cref; # Code reference to original function 58*0Sstevel@tonic-gate my $name = (ref $fn ? undef : $fn); 59*0Sstevel@tonic-gate 60*0Sstevel@tonic-gate # Convert function names to code references 61*0Sstevel@tonic-gate $cref = &_make_cref($fn, $uppack); 62*0Sstevel@tonic-gate 63*0Sstevel@tonic-gate # Locate function prototype, if any 64*0Sstevel@tonic-gate my $proto = prototype $cref; 65*0Sstevel@tonic-gate if (defined $proto) { $proto = "($proto)" } 66*0Sstevel@tonic-gate else { $proto = "" } 67*0Sstevel@tonic-gate 68*0Sstevel@tonic-gate # I would like to get rid of the eval, but there seems not to be any 69*0Sstevel@tonic-gate # other way to set the prototype properly. The switch here for 70*0Sstevel@tonic-gate # 'usethreads' works around a bug in threadperl having to do with 71*0Sstevel@tonic-gate # magic goto. It would be better to fix the bug and use the magic 72*0Sstevel@tonic-gate # goto version everywhere. 73*0Sstevel@tonic-gate my $wrapper = 74*0Sstevel@tonic-gate $Config{usethreads} 75*0Sstevel@tonic-gate ? eval "sub $proto { &_memoizer(\$cref, \@_); }" 76*0Sstevel@tonic-gate : eval "sub $proto { unshift \@_, \$cref; goto &_memoizer; }"; 77*0Sstevel@tonic-gate 78*0Sstevel@tonic-gate my $normalizer = $options{NORMALIZER}; 79*0Sstevel@tonic-gate if (defined $normalizer && ! ref $normalizer) { 80*0Sstevel@tonic-gate $normalizer = _make_cref($normalizer, $uppack); 81*0Sstevel@tonic-gate } 82*0Sstevel@tonic-gate 83*0Sstevel@tonic-gate my $install_name; 84*0Sstevel@tonic-gate if (defined $options->{INSTALL}) { 85*0Sstevel@tonic-gate # INSTALL => name 86*0Sstevel@tonic-gate $install_name = $options->{INSTALL}; 87*0Sstevel@tonic-gate } elsif (! exists $options->{INSTALL}) { 88*0Sstevel@tonic-gate # No INSTALL option provided; use original name if possible 89*0Sstevel@tonic-gate $install_name = $name; 90*0Sstevel@tonic-gate } else { 91*0Sstevel@tonic-gate # INSTALL => undef means don't install 92*0Sstevel@tonic-gate } 93*0Sstevel@tonic-gate 94*0Sstevel@tonic-gate if (defined $install_name) { 95*0Sstevel@tonic-gate $install_name = $uppack . '::' . $install_name 96*0Sstevel@tonic-gate unless $install_name =~ /::/; 97*0Sstevel@tonic-gate no strict; 98*0Sstevel@tonic-gate local($^W) = 0; # ``Subroutine $install_name redefined at ...'' 99*0Sstevel@tonic-gate *{$install_name} = $wrapper; # Install memoized version 100*0Sstevel@tonic-gate } 101*0Sstevel@tonic-gate 102*0Sstevel@tonic-gate $revmemotable{$wrapper} = "" . $cref; # Turn code ref into hash key 103*0Sstevel@tonic-gate 104*0Sstevel@tonic-gate # These will be the caches 105*0Sstevel@tonic-gate my %caches; 106*0Sstevel@tonic-gate for my $context (qw(SCALAR LIST)) { 107*0Sstevel@tonic-gate # suppress subsequent 'uninitialized value' warnings 108*0Sstevel@tonic-gate $options{"${context}_CACHE"} ||= ''; 109*0Sstevel@tonic-gate 110*0Sstevel@tonic-gate my $cache_opt = $options{"${context}_CACHE"}; 111*0Sstevel@tonic-gate my @cache_opt_args; 112*0Sstevel@tonic-gate if (ref $cache_opt) { 113*0Sstevel@tonic-gate @cache_opt_args = @$cache_opt; 114*0Sstevel@tonic-gate $cache_opt = shift @cache_opt_args; 115*0Sstevel@tonic-gate } 116*0Sstevel@tonic-gate if ($cache_opt eq 'FAULT') { # no cache 117*0Sstevel@tonic-gate $caches{$context} = undef; 118*0Sstevel@tonic-gate } elsif ($cache_opt eq 'HASH') { # user-supplied hash 119*0Sstevel@tonic-gate my $cache = $cache_opt_args[0]; 120*0Sstevel@tonic-gate my $package = ref(tied %$cache); 121*0Sstevel@tonic-gate if ($context eq 'LIST' && $scalar_only{$package}) { 122*0Sstevel@tonic-gate croak("You can't use $package for LIST_CACHE because it can only store scalars"); 123*0Sstevel@tonic-gate } 124*0Sstevel@tonic-gate $caches{$context} = $cache; 125*0Sstevel@tonic-gate } elsif ($cache_opt eq '' || $IS_CACHE_TAG{$cache_opt}) { 126*0Sstevel@tonic-gate # default is that we make up an in-memory hash 127*0Sstevel@tonic-gate $caches{$context} = {}; 128*0Sstevel@tonic-gate # (this might get tied later, or MERGEd away) 129*0Sstevel@tonic-gate } else { 130*0Sstevel@tonic-gate croak "Unrecognized option to `${context}_CACHE': `$cache_opt' should be one of (@CONTEXT_TAGS); aborting"; 131*0Sstevel@tonic-gate } 132*0Sstevel@tonic-gate } 133*0Sstevel@tonic-gate 134*0Sstevel@tonic-gate # Perhaps I should check here that you didn't supply *both* merge 135*0Sstevel@tonic-gate # options. But if you did, it does do something reasonable: They 136*0Sstevel@tonic-gate # both get merged to the same in-memory hash. 137*0Sstevel@tonic-gate if ($options{SCALAR_CACHE} eq 'MERGE') { 138*0Sstevel@tonic-gate $caches{SCALAR} = $caches{LIST}; 139*0Sstevel@tonic-gate } elsif ($options{LIST_CACHE} eq 'MERGE') { 140*0Sstevel@tonic-gate $caches{LIST} = $caches{SCALAR}; 141*0Sstevel@tonic-gate } 142*0Sstevel@tonic-gate 143*0Sstevel@tonic-gate # Now deal with the TIE options 144*0Sstevel@tonic-gate { 145*0Sstevel@tonic-gate my $context; 146*0Sstevel@tonic-gate foreach $context (qw(SCALAR LIST)) { 147*0Sstevel@tonic-gate # If the relevant option wasn't `TIE', this call does nothing. 148*0Sstevel@tonic-gate _my_tie($context, $caches{$context}, $options); # Croaks on failure 149*0Sstevel@tonic-gate } 150*0Sstevel@tonic-gate } 151*0Sstevel@tonic-gate 152*0Sstevel@tonic-gate # We should put some more stuff in here eventually. 153*0Sstevel@tonic-gate # We've been saying that for serveral versions now. 154*0Sstevel@tonic-gate # And you know what? More stuff keeps going in! 155*0Sstevel@tonic-gate $memotable{$cref} = 156*0Sstevel@tonic-gate { 157*0Sstevel@tonic-gate O => $options, # Short keys here for things we need to access frequently 158*0Sstevel@tonic-gate N => $normalizer, 159*0Sstevel@tonic-gate U => $cref, 160*0Sstevel@tonic-gate MEMOIZED => $wrapper, 161*0Sstevel@tonic-gate PACKAGE => $uppack, 162*0Sstevel@tonic-gate NAME => $install_name, 163*0Sstevel@tonic-gate S => $caches{SCALAR}, 164*0Sstevel@tonic-gate L => $caches{LIST}, 165*0Sstevel@tonic-gate }; 166*0Sstevel@tonic-gate 167*0Sstevel@tonic-gate $wrapper # Return just memoized version 168*0Sstevel@tonic-gate} 169*0Sstevel@tonic-gate 170*0Sstevel@tonic-gate# This function tries to load a tied hash class and tie the hash to it. 171*0Sstevel@tonic-gatesub _my_tie { 172*0Sstevel@tonic-gate my ($context, $hash, $options) = @_; 173*0Sstevel@tonic-gate my $fullopt = $options->{"${context}_CACHE"}; 174*0Sstevel@tonic-gate 175*0Sstevel@tonic-gate # We already checked to make sure that this works. 176*0Sstevel@tonic-gate my $shortopt = (ref $fullopt) ? $fullopt->[0] : $fullopt; 177*0Sstevel@tonic-gate 178*0Sstevel@tonic-gate return unless defined $shortopt && $shortopt eq 'TIE'; 179*0Sstevel@tonic-gate carp("TIE option to memoize() is deprecated; use HASH instead") 180*0Sstevel@tonic-gate if $^W; 181*0Sstevel@tonic-gate 182*0Sstevel@tonic-gate my @args = ref $fullopt ? @$fullopt : (); 183*0Sstevel@tonic-gate shift @args; 184*0Sstevel@tonic-gate my $module = shift @args; 185*0Sstevel@tonic-gate if ($context eq 'LIST' && $scalar_only{$module}) { 186*0Sstevel@tonic-gate croak("You can't use $module for LIST_CACHE because it can only store scalars"); 187*0Sstevel@tonic-gate } 188*0Sstevel@tonic-gate my $modulefile = $module . '.pm'; 189*0Sstevel@tonic-gate $modulefile =~ s{::}{/}g; 190*0Sstevel@tonic-gate eval { require $modulefile }; 191*0Sstevel@tonic-gate if ($@) { 192*0Sstevel@tonic-gate croak "Memoize: Couldn't load hash tie module `$module': $@; aborting"; 193*0Sstevel@tonic-gate } 194*0Sstevel@tonic-gate my $rc = (tie %$hash => $module, @args); 195*0Sstevel@tonic-gate unless ($rc) { 196*0Sstevel@tonic-gate croak "Memoize: Couldn't tie hash to `$module': $!; aborting"; 197*0Sstevel@tonic-gate } 198*0Sstevel@tonic-gate 1; 199*0Sstevel@tonic-gate} 200*0Sstevel@tonic-gate 201*0Sstevel@tonic-gatesub flush_cache { 202*0Sstevel@tonic-gate my $func = _make_cref($_[0], scalar caller); 203*0Sstevel@tonic-gate my $info = $memotable{$revmemotable{$func}}; 204*0Sstevel@tonic-gate die "$func not memoized" unless defined $info; 205*0Sstevel@tonic-gate for my $context (qw(S L)) { 206*0Sstevel@tonic-gate my $cache = $info->{$context}; 207*0Sstevel@tonic-gate if (tied %$cache && ! (tied %$cache)->can('CLEAR')) { 208*0Sstevel@tonic-gate my $funcname = defined($info->{NAME}) ? 209*0Sstevel@tonic-gate "function $info->{NAME}" : "anonymous function $func"; 210*0Sstevel@tonic-gate my $context = {S => 'scalar', L => 'list'}->{$context}; 211*0Sstevel@tonic-gate croak "Tied cache hash for $context-context $funcname does not support flushing"; 212*0Sstevel@tonic-gate } else { 213*0Sstevel@tonic-gate %$cache = (); 214*0Sstevel@tonic-gate } 215*0Sstevel@tonic-gate } 216*0Sstevel@tonic-gate} 217*0Sstevel@tonic-gate 218*0Sstevel@tonic-gate# This is the function that manages the memo tables. 219*0Sstevel@tonic-gatesub _memoizer { 220*0Sstevel@tonic-gate my $orig = shift; # stringized version of ref to original func. 221*0Sstevel@tonic-gate my $info = $memotable{$orig}; 222*0Sstevel@tonic-gate my $normalizer = $info->{N}; 223*0Sstevel@tonic-gate 224*0Sstevel@tonic-gate my $argstr; 225*0Sstevel@tonic-gate my $context = (wantarray() ? LIST : SCALAR); 226*0Sstevel@tonic-gate 227*0Sstevel@tonic-gate if (defined $normalizer) { 228*0Sstevel@tonic-gate no strict; 229*0Sstevel@tonic-gate if ($context == SCALAR) { 230*0Sstevel@tonic-gate $argstr = &{$normalizer}(@_); 231*0Sstevel@tonic-gate } elsif ($context == LIST) { 232*0Sstevel@tonic-gate ($argstr) = &{$normalizer}(@_); 233*0Sstevel@tonic-gate } else { 234*0Sstevel@tonic-gate croak "Internal error \#41; context was neither LIST nor SCALAR\n"; 235*0Sstevel@tonic-gate } 236*0Sstevel@tonic-gate } else { # Default normalizer 237*0Sstevel@tonic-gate local $^W = 0; 238*0Sstevel@tonic-gate $argstr = join chr(28),@_; 239*0Sstevel@tonic-gate } 240*0Sstevel@tonic-gate 241*0Sstevel@tonic-gate if ($context == SCALAR) { 242*0Sstevel@tonic-gate my $cache = $info->{S}; 243*0Sstevel@tonic-gate _crap_out($info->{NAME}, 'scalar') unless $cache; 244*0Sstevel@tonic-gate if (exists $cache->{$argstr}) { 245*0Sstevel@tonic-gate return $cache->{$argstr}; 246*0Sstevel@tonic-gate } else { 247*0Sstevel@tonic-gate my $val = &{$info->{U}}(@_); 248*0Sstevel@tonic-gate # Scalars are considered to be lists; store appropriately 249*0Sstevel@tonic-gate if ($info->{O}{SCALAR_CACHE} eq 'MERGE') { 250*0Sstevel@tonic-gate $cache->{$argstr} = [$val]; 251*0Sstevel@tonic-gate } else { 252*0Sstevel@tonic-gate $cache->{$argstr} = $val; 253*0Sstevel@tonic-gate } 254*0Sstevel@tonic-gate $val; 255*0Sstevel@tonic-gate } 256*0Sstevel@tonic-gate } elsif ($context == LIST) { 257*0Sstevel@tonic-gate my $cache = $info->{L}; 258*0Sstevel@tonic-gate _crap_out($info->{NAME}, 'list') unless $cache; 259*0Sstevel@tonic-gate if (exists $cache->{$argstr}) { 260*0Sstevel@tonic-gate my $val = $cache->{$argstr}; 261*0Sstevel@tonic-gate # If LISTCONTEXT=>MERGE, then the function never returns lists, 262*0Sstevel@tonic-gate # so we have a scalar value cached, so just return it straightaway: 263*0Sstevel@tonic-gate return ($val) if $info->{O}{LIST_CACHE} eq 'MERGE'; 264*0Sstevel@tonic-gate # Maybe in a later version we can use a faster test. 265*0Sstevel@tonic-gate 266*0Sstevel@tonic-gate # Otherwise, we cached an array containing the returned list: 267*0Sstevel@tonic-gate return @$val; 268*0Sstevel@tonic-gate } else { 269*0Sstevel@tonic-gate my $q = $cache->{$argstr} = [&{$info->{U}}(@_)]; 270*0Sstevel@tonic-gate @$q; 271*0Sstevel@tonic-gate } 272*0Sstevel@tonic-gate } else { 273*0Sstevel@tonic-gate croak "Internal error \#42; context was neither LIST nor SCALAR\n"; 274*0Sstevel@tonic-gate } 275*0Sstevel@tonic-gate} 276*0Sstevel@tonic-gate 277*0Sstevel@tonic-gatesub unmemoize { 278*0Sstevel@tonic-gate my $f = shift; 279*0Sstevel@tonic-gate my $uppack = caller; 280*0Sstevel@tonic-gate my $cref = _make_cref($f, $uppack); 281*0Sstevel@tonic-gate 282*0Sstevel@tonic-gate unless (exists $revmemotable{$cref}) { 283*0Sstevel@tonic-gate croak "Could not unmemoize function `$f', because it was not memoized to begin with"; 284*0Sstevel@tonic-gate } 285*0Sstevel@tonic-gate 286*0Sstevel@tonic-gate my $tabent = $memotable{$revmemotable{$cref}}; 287*0Sstevel@tonic-gate unless (defined $tabent) { 288*0Sstevel@tonic-gate croak "Could not figure out how to unmemoize function `$f'"; 289*0Sstevel@tonic-gate } 290*0Sstevel@tonic-gate my $name = $tabent->{NAME}; 291*0Sstevel@tonic-gate if (defined $name) { 292*0Sstevel@tonic-gate no strict; 293*0Sstevel@tonic-gate local($^W) = 0; # ``Subroutine $install_name redefined at ...'' 294*0Sstevel@tonic-gate *{$name} = $tabent->{U}; # Replace with original function 295*0Sstevel@tonic-gate } 296*0Sstevel@tonic-gate undef $memotable{$revmemotable{$cref}}; 297*0Sstevel@tonic-gate undef $revmemotable{$cref}; 298*0Sstevel@tonic-gate 299*0Sstevel@tonic-gate # This removes the last reference to the (possibly tied) memo tables 300*0Sstevel@tonic-gate # my ($old_function, $memotabs) = @{$tabent}{'U','S','L'}; 301*0Sstevel@tonic-gate # undef $tabent; 302*0Sstevel@tonic-gate 303*0Sstevel@tonic-gate# # Untie the memo tables if they were tied. 304*0Sstevel@tonic-gate# my $i; 305*0Sstevel@tonic-gate# for $i (0,1) { 306*0Sstevel@tonic-gate# if (tied %{$memotabs->[$i]}) { 307*0Sstevel@tonic-gate# warn "Untying hash #$i\n"; 308*0Sstevel@tonic-gate# untie %{$memotabs->[$i]}; 309*0Sstevel@tonic-gate# } 310*0Sstevel@tonic-gate# } 311*0Sstevel@tonic-gate 312*0Sstevel@tonic-gate $tabent->{U}; 313*0Sstevel@tonic-gate} 314*0Sstevel@tonic-gate 315*0Sstevel@tonic-gatesub _make_cref { 316*0Sstevel@tonic-gate my $fn = shift; 317*0Sstevel@tonic-gate my $uppack = shift; 318*0Sstevel@tonic-gate my $cref; 319*0Sstevel@tonic-gate my $name; 320*0Sstevel@tonic-gate 321*0Sstevel@tonic-gate if (ref $fn eq 'CODE') { 322*0Sstevel@tonic-gate $cref = $fn; 323*0Sstevel@tonic-gate } elsif (! ref $fn) { 324*0Sstevel@tonic-gate if ($fn =~ /::/) { 325*0Sstevel@tonic-gate $name = $fn; 326*0Sstevel@tonic-gate } else { 327*0Sstevel@tonic-gate $name = $uppack . '::' . $fn; 328*0Sstevel@tonic-gate } 329*0Sstevel@tonic-gate no strict; 330*0Sstevel@tonic-gate if (defined $name and !defined(&$name)) { 331*0Sstevel@tonic-gate croak "Cannot operate on nonexistent function `$fn'"; 332*0Sstevel@tonic-gate } 333*0Sstevel@tonic-gate# $cref = \&$name; 334*0Sstevel@tonic-gate $cref = *{$name}{CODE}; 335*0Sstevel@tonic-gate } else { 336*0Sstevel@tonic-gate my $parent = (caller(1))[3]; # Function that called _make_cref 337*0Sstevel@tonic-gate croak "Usage: argument 1 to `$parent' must be a function name or reference.\n"; 338*0Sstevel@tonic-gate } 339*0Sstevel@tonic-gate $DEBUG and warn "${name}($fn) => $cref in _make_cref\n"; 340*0Sstevel@tonic-gate $cref; 341*0Sstevel@tonic-gate} 342*0Sstevel@tonic-gate 343*0Sstevel@tonic-gatesub _crap_out { 344*0Sstevel@tonic-gate my ($funcname, $context) = @_; 345*0Sstevel@tonic-gate if (defined $funcname) { 346*0Sstevel@tonic-gate croak "Function `$funcname' called in forbidden $context context; faulting"; 347*0Sstevel@tonic-gate } else { 348*0Sstevel@tonic-gate croak "Anonymous function called in forbidden $context context; faulting"; 349*0Sstevel@tonic-gate } 350*0Sstevel@tonic-gate} 351*0Sstevel@tonic-gate 352*0Sstevel@tonic-gate1; 353*0Sstevel@tonic-gate 354*0Sstevel@tonic-gate 355*0Sstevel@tonic-gate 356*0Sstevel@tonic-gate 357*0Sstevel@tonic-gate 358*0Sstevel@tonic-gate=head1 NAME 359*0Sstevel@tonic-gate 360*0Sstevel@tonic-gateMemoize - Make functions faster by trading space for time 361*0Sstevel@tonic-gate 362*0Sstevel@tonic-gate=head1 SYNOPSIS 363*0Sstevel@tonic-gate 364*0Sstevel@tonic-gate # This is the documentation for Memoize 1.01 365*0Sstevel@tonic-gate use Memoize; 366*0Sstevel@tonic-gate memoize('slow_function'); 367*0Sstevel@tonic-gate slow_function(arguments); # Is faster than it was before 368*0Sstevel@tonic-gate 369*0Sstevel@tonic-gate 370*0Sstevel@tonic-gateThis is normally all you need to know. However, many options are available: 371*0Sstevel@tonic-gate 372*0Sstevel@tonic-gate memoize(function, options...); 373*0Sstevel@tonic-gate 374*0Sstevel@tonic-gateOptions include: 375*0Sstevel@tonic-gate 376*0Sstevel@tonic-gate NORMALIZER => function 377*0Sstevel@tonic-gate INSTALL => new_name 378*0Sstevel@tonic-gate 379*0Sstevel@tonic-gate SCALAR_CACHE => 'MEMORY' 380*0Sstevel@tonic-gate SCALAR_CACHE => ['HASH', \%cache_hash ] 381*0Sstevel@tonic-gate SCALAR_CACHE => 'FAULT' 382*0Sstevel@tonic-gate SCALAR_CACHE => 'MERGE' 383*0Sstevel@tonic-gate 384*0Sstevel@tonic-gate LIST_CACHE => 'MEMORY' 385*0Sstevel@tonic-gate LIST_CACHE => ['HASH', \%cache_hash ] 386*0Sstevel@tonic-gate LIST_CACHE => 'FAULT' 387*0Sstevel@tonic-gate LIST_CACHE => 'MERGE' 388*0Sstevel@tonic-gate 389*0Sstevel@tonic-gate=head1 DESCRIPTION 390*0Sstevel@tonic-gate 391*0Sstevel@tonic-gate`Memoizing' a function makes it faster by trading space for time. It 392*0Sstevel@tonic-gatedoes this by caching the return values of the function in a table. 393*0Sstevel@tonic-gateIf you call the function again with the same arguments, C<memoize> 394*0Sstevel@tonic-gatejumps in and gives you the value out of the table, instead of letting 395*0Sstevel@tonic-gatethe function compute the value all over again. 396*0Sstevel@tonic-gate 397*0Sstevel@tonic-gateHere is an extreme example. Consider the Fibonacci sequence, defined 398*0Sstevel@tonic-gateby the following function: 399*0Sstevel@tonic-gate 400*0Sstevel@tonic-gate # Compute Fibonacci numbers 401*0Sstevel@tonic-gate sub fib { 402*0Sstevel@tonic-gate my $n = shift; 403*0Sstevel@tonic-gate return $n if $n < 2; 404*0Sstevel@tonic-gate fib($n-1) + fib($n-2); 405*0Sstevel@tonic-gate } 406*0Sstevel@tonic-gate 407*0Sstevel@tonic-gateThis function is very slow. Why? To compute fib(14), it first wants 408*0Sstevel@tonic-gateto compute fib(13) and fib(12), and add the results. But to compute 409*0Sstevel@tonic-gatefib(13), it first has to compute fib(12) and fib(11), and then it 410*0Sstevel@tonic-gatecomes back and computes fib(12) all over again even though the answer 411*0Sstevel@tonic-gateis the same. And both of the times that it wants to compute fib(12), 412*0Sstevel@tonic-gateit has to compute fib(11) from scratch, and then it has to do it 413*0Sstevel@tonic-gateagain each time it wants to compute fib(13). This function does so 414*0Sstevel@tonic-gatemuch recomputing of old results that it takes a really long time to 415*0Sstevel@tonic-gaterun---fib(14) makes 1,200 extra recursive calls to itself, to compute 416*0Sstevel@tonic-gateand recompute things that it already computed. 417*0Sstevel@tonic-gate 418*0Sstevel@tonic-gateThis function is a good candidate for memoization. If you memoize the 419*0Sstevel@tonic-gate`fib' function above, it will compute fib(14) exactly once, the first 420*0Sstevel@tonic-gatetime it needs to, and then save the result in a table. Then if you 421*0Sstevel@tonic-gateask for fib(14) again, it gives you the result out of the table. 422*0Sstevel@tonic-gateWhile computing fib(14), instead of computing fib(12) twice, it does 423*0Sstevel@tonic-gateit once; the second time it needs the value it gets it from the table. 424*0Sstevel@tonic-gateIt doesn't compute fib(11) four times; it computes it once, getting it 425*0Sstevel@tonic-gatefrom the table the next three times. Instead of making 1,200 426*0Sstevel@tonic-gaterecursive calls to `fib', it makes 15. This makes the function about 427*0Sstevel@tonic-gate150 times faster. 428*0Sstevel@tonic-gate 429*0Sstevel@tonic-gateYou could do the memoization yourself, by rewriting the function, like 430*0Sstevel@tonic-gatethis: 431*0Sstevel@tonic-gate 432*0Sstevel@tonic-gate # Compute Fibonacci numbers, memoized version 433*0Sstevel@tonic-gate { my @fib; 434*0Sstevel@tonic-gate sub fib { 435*0Sstevel@tonic-gate my $n = shift; 436*0Sstevel@tonic-gate return $fib[$n] if defined $fib[$n]; 437*0Sstevel@tonic-gate return $fib[$n] = $n if $n < 2; 438*0Sstevel@tonic-gate $fib[$n] = fib($n-1) + fib($n-2); 439*0Sstevel@tonic-gate } 440*0Sstevel@tonic-gate } 441*0Sstevel@tonic-gate 442*0Sstevel@tonic-gateOr you could use this module, like this: 443*0Sstevel@tonic-gate 444*0Sstevel@tonic-gate use Memoize; 445*0Sstevel@tonic-gate memoize('fib'); 446*0Sstevel@tonic-gate 447*0Sstevel@tonic-gate # Rest of the fib function just like the original version. 448*0Sstevel@tonic-gate 449*0Sstevel@tonic-gateThis makes it easy to turn memoizing on and off. 450*0Sstevel@tonic-gate 451*0Sstevel@tonic-gateHere's an even simpler example: I wrote a simple ray tracer; the 452*0Sstevel@tonic-gateprogram would look in a certain direction, figure out what it was 453*0Sstevel@tonic-gatelooking at, and then convert the `color' value (typically a string 454*0Sstevel@tonic-gatelike `red') of that object to a red, green, and blue pixel value, like 455*0Sstevel@tonic-gatethis: 456*0Sstevel@tonic-gate 457*0Sstevel@tonic-gate for ($direction = 0; $direction < 300; $direction++) { 458*0Sstevel@tonic-gate # Figure out which object is in direction $direction 459*0Sstevel@tonic-gate $color = $object->{color}; 460*0Sstevel@tonic-gate ($r, $g, $b) = @{&ColorToRGB($color)}; 461*0Sstevel@tonic-gate ... 462*0Sstevel@tonic-gate } 463*0Sstevel@tonic-gate 464*0Sstevel@tonic-gateSince there are relatively few objects in a picture, there are only a 465*0Sstevel@tonic-gatefew colors, which get looked up over and over again. Memoizing 466*0Sstevel@tonic-gateC<ColorToRGB> sped up the program by several percent. 467*0Sstevel@tonic-gate 468*0Sstevel@tonic-gate=head1 DETAILS 469*0Sstevel@tonic-gate 470*0Sstevel@tonic-gateThis module exports exactly one function, C<memoize>. The rest of the 471*0Sstevel@tonic-gatefunctions in this package are None of Your Business. 472*0Sstevel@tonic-gate 473*0Sstevel@tonic-gateYou should say 474*0Sstevel@tonic-gate 475*0Sstevel@tonic-gate memoize(function) 476*0Sstevel@tonic-gate 477*0Sstevel@tonic-gatewhere C<function> is the name of the function you want to memoize, or 478*0Sstevel@tonic-gatea reference to it. C<memoize> returns a reference to the new, 479*0Sstevel@tonic-gatememoized version of the function, or C<undef> on a non-fatal error. 480*0Sstevel@tonic-gateAt present, there are no non-fatal errors, but there might be some in 481*0Sstevel@tonic-gatethe future. 482*0Sstevel@tonic-gate 483*0Sstevel@tonic-gateIf C<function> was the name of a function, then C<memoize> hides the 484*0Sstevel@tonic-gateold version and installs the new memoized version under the old name, 485*0Sstevel@tonic-gateso that C<&function(...)> actually invokes the memoized version. 486*0Sstevel@tonic-gate 487*0Sstevel@tonic-gate=head1 OPTIONS 488*0Sstevel@tonic-gate 489*0Sstevel@tonic-gateThere are some optional options you can pass to C<memoize> to change 490*0Sstevel@tonic-gatethe way it behaves a little. To supply options, invoke C<memoize> 491*0Sstevel@tonic-gatelike this: 492*0Sstevel@tonic-gate 493*0Sstevel@tonic-gate memoize(function, NORMALIZER => function, 494*0Sstevel@tonic-gate INSTALL => newname, 495*0Sstevel@tonic-gate SCALAR_CACHE => option, 496*0Sstevel@tonic-gate LIST_CACHE => option 497*0Sstevel@tonic-gate ); 498*0Sstevel@tonic-gate 499*0Sstevel@tonic-gateEach of these options is optional; you can include some, all, or none 500*0Sstevel@tonic-gateof them. 501*0Sstevel@tonic-gate 502*0Sstevel@tonic-gate=head2 INSTALL 503*0Sstevel@tonic-gate 504*0Sstevel@tonic-gateIf you supply a function name with C<INSTALL>, memoize will install 505*0Sstevel@tonic-gatethe new, memoized version of the function under the name you give. 506*0Sstevel@tonic-gateFor example, 507*0Sstevel@tonic-gate 508*0Sstevel@tonic-gate memoize('fib', INSTALL => 'fastfib') 509*0Sstevel@tonic-gate 510*0Sstevel@tonic-gateinstalls the memoized version of C<fib> as C<fastfib>; without the 511*0Sstevel@tonic-gateC<INSTALL> option it would have replaced the old C<fib> with the 512*0Sstevel@tonic-gatememoized version. 513*0Sstevel@tonic-gate 514*0Sstevel@tonic-gateTo prevent C<memoize> from installing the memoized version anywhere, use 515*0Sstevel@tonic-gateC<INSTALL =E<gt> undef>. 516*0Sstevel@tonic-gate 517*0Sstevel@tonic-gate=head2 NORMALIZER 518*0Sstevel@tonic-gate 519*0Sstevel@tonic-gateSuppose your function looks like this: 520*0Sstevel@tonic-gate 521*0Sstevel@tonic-gate # Typical call: f('aha!', A => 11, B => 12); 522*0Sstevel@tonic-gate sub f { 523*0Sstevel@tonic-gate my $a = shift; 524*0Sstevel@tonic-gate my %hash = @_; 525*0Sstevel@tonic-gate $hash{B} ||= 2; # B defaults to 2 526*0Sstevel@tonic-gate $hash{C} ||= 7; # C defaults to 7 527*0Sstevel@tonic-gate 528*0Sstevel@tonic-gate # Do something with $a, %hash 529*0Sstevel@tonic-gate } 530*0Sstevel@tonic-gate 531*0Sstevel@tonic-gateNow, the following calls to your function are all completely equivalent: 532*0Sstevel@tonic-gate 533*0Sstevel@tonic-gate f(OUCH); 534*0Sstevel@tonic-gate f(OUCH, B => 2); 535*0Sstevel@tonic-gate f(OUCH, C => 7); 536*0Sstevel@tonic-gate f(OUCH, B => 2, C => 7); 537*0Sstevel@tonic-gate f(OUCH, C => 7, B => 2); 538*0Sstevel@tonic-gate (etc.) 539*0Sstevel@tonic-gate 540*0Sstevel@tonic-gateHowever, unless you tell C<Memoize> that these calls are equivalent, 541*0Sstevel@tonic-gateit will not know that, and it will compute the values for these 542*0Sstevel@tonic-gateinvocations of your function separately, and store them separately. 543*0Sstevel@tonic-gate 544*0Sstevel@tonic-gateTo prevent this, supply a C<NORMALIZER> function that turns the 545*0Sstevel@tonic-gateprogram arguments into a string in a way that equivalent arguments 546*0Sstevel@tonic-gateturn into the same string. A C<NORMALIZER> function for C<f> above 547*0Sstevel@tonic-gatemight look like this: 548*0Sstevel@tonic-gate 549*0Sstevel@tonic-gate sub normalize_f { 550*0Sstevel@tonic-gate my $a = shift; 551*0Sstevel@tonic-gate my %hash = @_; 552*0Sstevel@tonic-gate $hash{B} ||= 2; 553*0Sstevel@tonic-gate $hash{C} ||= 7; 554*0Sstevel@tonic-gate 555*0Sstevel@tonic-gate join(',', $a, map ($_ => $hash{$_}) sort keys %hash); 556*0Sstevel@tonic-gate } 557*0Sstevel@tonic-gate 558*0Sstevel@tonic-gateEach of the argument lists above comes out of the C<normalize_f> 559*0Sstevel@tonic-gatefunction looking exactly the same, like this: 560*0Sstevel@tonic-gate 561*0Sstevel@tonic-gate OUCH,B,2,C,7 562*0Sstevel@tonic-gate 563*0Sstevel@tonic-gateYou would tell C<Memoize> to use this normalizer this way: 564*0Sstevel@tonic-gate 565*0Sstevel@tonic-gate memoize('f', NORMALIZER => 'normalize_f'); 566*0Sstevel@tonic-gate 567*0Sstevel@tonic-gateC<memoize> knows that if the normalized version of the arguments is 568*0Sstevel@tonic-gatethe same for two argument lists, then it can safely look up the value 569*0Sstevel@tonic-gatethat it computed for one argument list and return it as the result of 570*0Sstevel@tonic-gatecalling the function with the other argument list, even if the 571*0Sstevel@tonic-gateargument lists look different. 572*0Sstevel@tonic-gate 573*0Sstevel@tonic-gateThe default normalizer just concatenates the arguments with character 574*0Sstevel@tonic-gate28 in between. (In ASCII, this is called FS or control-\.) This 575*0Sstevel@tonic-gatealways works correctly for functions with only one string argument, 576*0Sstevel@tonic-gateand also when the arguments never contain character 28. However, it 577*0Sstevel@tonic-gatecan confuse certain argument lists: 578*0Sstevel@tonic-gate 579*0Sstevel@tonic-gate normalizer("a\034", "b") 580*0Sstevel@tonic-gate normalizer("a", "\034b") 581*0Sstevel@tonic-gate normalizer("a\034\034b") 582*0Sstevel@tonic-gate 583*0Sstevel@tonic-gatefor example. 584*0Sstevel@tonic-gate 585*0Sstevel@tonic-gateSince hash keys are strings, the default normalizer will not 586*0Sstevel@tonic-gatedistinguish between C<undef> and the empty string. It also won't work 587*0Sstevel@tonic-gatewhen the function's arguments are references. For example, consider a 588*0Sstevel@tonic-gatefunction C<g> which gets two arguments: A number, and a reference to 589*0Sstevel@tonic-gatean array of numbers: 590*0Sstevel@tonic-gate 591*0Sstevel@tonic-gate g(13, [1,2,3,4,5,6,7]); 592*0Sstevel@tonic-gate 593*0Sstevel@tonic-gateThe default normalizer will turn this into something like 594*0Sstevel@tonic-gateC<"13\034ARRAY(0x436c1f)">. That would be all right, except that a 595*0Sstevel@tonic-gatesubsequent array of numbers might be stored at a different location 596*0Sstevel@tonic-gateeven though it contains the same data. If this happens, C<Memoize> 597*0Sstevel@tonic-gatewill think that the arguments are different, even though they are 598*0Sstevel@tonic-gateequivalent. In this case, a normalizer like this is appropriate: 599*0Sstevel@tonic-gate 600*0Sstevel@tonic-gate sub normalize { join ' ', $_[0], @{$_[1]} } 601*0Sstevel@tonic-gate 602*0Sstevel@tonic-gateFor the example above, this produces the key "13 1 2 3 4 5 6 7". 603*0Sstevel@tonic-gate 604*0Sstevel@tonic-gateAnother use for normalizers is when the function depends on data other 605*0Sstevel@tonic-gatethan those in its arguments. Suppose you have a function which 606*0Sstevel@tonic-gatereturns a value which depends on the current hour of the day: 607*0Sstevel@tonic-gate 608*0Sstevel@tonic-gate sub on_duty { 609*0Sstevel@tonic-gate my ($problem_type) = @_; 610*0Sstevel@tonic-gate my $hour = (localtime)[2]; 611*0Sstevel@tonic-gate open my $fh, "$DIR/$problem_type" or die...; 612*0Sstevel@tonic-gate my $line; 613*0Sstevel@tonic-gate while ($hour-- > 0) 614*0Sstevel@tonic-gate $line = <$fh>; 615*0Sstevel@tonic-gate } 616*0Sstevel@tonic-gate return $line; 617*0Sstevel@tonic-gate } 618*0Sstevel@tonic-gate 619*0Sstevel@tonic-gateAt 10:23, this function generates the 10th line of a data file; at 620*0Sstevel@tonic-gate3:45 PM it generates the 15th line instead. By default, C<Memoize> 621*0Sstevel@tonic-gatewill only see the $problem_type argument. To fix this, include the 622*0Sstevel@tonic-gatecurrent hour in the normalizer: 623*0Sstevel@tonic-gate 624*0Sstevel@tonic-gate sub normalize { join ' ', (localtime)[2], @_ } 625*0Sstevel@tonic-gate 626*0Sstevel@tonic-gateThe calling context of the function (scalar or list context) is 627*0Sstevel@tonic-gatepropagated to the normalizer. This means that if the memoized 628*0Sstevel@tonic-gatefunction will treat its arguments differently in list context than it 629*0Sstevel@tonic-gatewould in scalar context, you can have the normalizer function select 630*0Sstevel@tonic-gateits behavior based on the results of C<wantarray>. Even if called in 631*0Sstevel@tonic-gatea list context, a normalizer should still return a single string. 632*0Sstevel@tonic-gate 633*0Sstevel@tonic-gate=head2 C<SCALAR_CACHE>, C<LIST_CACHE> 634*0Sstevel@tonic-gate 635*0Sstevel@tonic-gateNormally, C<Memoize> caches your function's return values into an 636*0Sstevel@tonic-gateordinary Perl hash variable. However, you might like to have the 637*0Sstevel@tonic-gatevalues cached on the disk, so that they persist from one run of your 638*0Sstevel@tonic-gateprogram to the next, or you might like to associate some other 639*0Sstevel@tonic-gateinteresting semantics with the cached values. 640*0Sstevel@tonic-gate 641*0Sstevel@tonic-gateThere's a slight complication under the hood of C<Memoize>: There are 642*0Sstevel@tonic-gateactually I<two> caches, one for scalar values and one for list values. 643*0Sstevel@tonic-gateWhen your function is called in scalar context, its return value is 644*0Sstevel@tonic-gatecached in one hash, and when your function is called in list context, 645*0Sstevel@tonic-gateits value is cached in the other hash. You can control the caching 646*0Sstevel@tonic-gatebehavior of both contexts independently with these options. 647*0Sstevel@tonic-gate 648*0Sstevel@tonic-gateThe argument to C<LIST_CACHE> or C<SCALAR_CACHE> must either be one of 649*0Sstevel@tonic-gatethe following four strings: 650*0Sstevel@tonic-gate 651*0Sstevel@tonic-gate MEMORY 652*0Sstevel@tonic-gate FAULT 653*0Sstevel@tonic-gate MERGE 654*0Sstevel@tonic-gate HASH 655*0Sstevel@tonic-gate 656*0Sstevel@tonic-gateor else it must be a reference to a list whose first element is one of 657*0Sstevel@tonic-gatethese four strings, such as C<[HASH, arguments...]>. 658*0Sstevel@tonic-gate 659*0Sstevel@tonic-gate=over 4 660*0Sstevel@tonic-gate 661*0Sstevel@tonic-gate=item C<MEMORY> 662*0Sstevel@tonic-gate 663*0Sstevel@tonic-gateC<MEMORY> means that return values from the function will be cached in 664*0Sstevel@tonic-gatean ordinary Perl hash variable. The hash variable will not persist 665*0Sstevel@tonic-gateafter the program exits. This is the default. 666*0Sstevel@tonic-gate 667*0Sstevel@tonic-gate=item C<HASH> 668*0Sstevel@tonic-gate 669*0Sstevel@tonic-gateC<HASH> allows you to specify that a particular hash that you supply 670*0Sstevel@tonic-gatewill be used as the cache. You can tie this hash beforehand to give 671*0Sstevel@tonic-gateit any behavior you want. 672*0Sstevel@tonic-gate 673*0Sstevel@tonic-gateA tied hash can have any semantics at all. It is typically tied to an 674*0Sstevel@tonic-gateon-disk database, so that cached values are stored in the database and 675*0Sstevel@tonic-gateretrieved from it again when needed, and the disk file typically 676*0Sstevel@tonic-gatepersists after your program has exited. See C<perltie> for more 677*0Sstevel@tonic-gatecomplete details about C<tie>. 678*0Sstevel@tonic-gate 679*0Sstevel@tonic-gateA typical example is: 680*0Sstevel@tonic-gate 681*0Sstevel@tonic-gate use DB_File; 682*0Sstevel@tonic-gate tie my %cache => 'DB_File', $filename, O_RDWR|O_CREAT, 0666; 683*0Sstevel@tonic-gate memoize 'function', SCALAR_CACHE => [HASH => \%cache]; 684*0Sstevel@tonic-gate 685*0Sstevel@tonic-gateThis has the effect of storing the cache in a C<DB_File> database 686*0Sstevel@tonic-gatewhose name is in C<$filename>. The cache will persist after the 687*0Sstevel@tonic-gateprogram has exited. Next time the program runs, it will find the 688*0Sstevel@tonic-gatecache already populated from the previous run of the program. Or you 689*0Sstevel@tonic-gatecan forcibly populate the cache by constructing a batch program that 690*0Sstevel@tonic-gateruns in the background and populates the cache file. Then when you 691*0Sstevel@tonic-gatecome to run your real program the memoized function will be fast 692*0Sstevel@tonic-gatebecause all its results have been precomputed. 693*0Sstevel@tonic-gate 694*0Sstevel@tonic-gate=item C<TIE> 695*0Sstevel@tonic-gate 696*0Sstevel@tonic-gateThis option is no longer supported. It is still documented only to 697*0Sstevel@tonic-gateaid in the debugging of old programs that use it. Old programs should 698*0Sstevel@tonic-gatebe converted to use the C<HASH> option instead. 699*0Sstevel@tonic-gate 700*0Sstevel@tonic-gate memoize ... [TIE, PACKAGE, ARGS...] 701*0Sstevel@tonic-gate 702*0Sstevel@tonic-gateis merely a shortcut for 703*0Sstevel@tonic-gate 704*0Sstevel@tonic-gate require PACKAGE; 705*0Sstevel@tonic-gate { my %cache; 706*0Sstevel@tonic-gate tie %cache, PACKAGE, ARGS...; 707*0Sstevel@tonic-gate } 708*0Sstevel@tonic-gate memoize ... [HASH => \%cache]; 709*0Sstevel@tonic-gate 710*0Sstevel@tonic-gate=item C<FAULT> 711*0Sstevel@tonic-gate 712*0Sstevel@tonic-gateC<FAULT> means that you never expect to call the function in scalar 713*0Sstevel@tonic-gate(or list) context, and that if C<Memoize> detects such a call, it 714*0Sstevel@tonic-gateshould abort the program. The error message is one of 715*0Sstevel@tonic-gate 716*0Sstevel@tonic-gate `foo' function called in forbidden list context at line ... 717*0Sstevel@tonic-gate `foo' function called in forbidden scalar context at line ... 718*0Sstevel@tonic-gate 719*0Sstevel@tonic-gate=item C<MERGE> 720*0Sstevel@tonic-gate 721*0Sstevel@tonic-gateC<MERGE> normally means the function does not distinguish between list 722*0Sstevel@tonic-gateand sclar context, and that return values in both contexts should be 723*0Sstevel@tonic-gatestored together. C<LIST_CACHE =E<gt> MERGE> means that list context 724*0Sstevel@tonic-gatereturn values should be stored in the same hash that is used for 725*0Sstevel@tonic-gatescalar context returns, and C<SCALAR_CACHE =E<gt> MERGE> means the 726*0Sstevel@tonic-gatesame, mutatis mutandis. It is an error to specify C<MERGE> for both, 727*0Sstevel@tonic-gatebut it probably does something useful. 728*0Sstevel@tonic-gate 729*0Sstevel@tonic-gateConsider this function: 730*0Sstevel@tonic-gate 731*0Sstevel@tonic-gate sub pi { 3; } 732*0Sstevel@tonic-gate 733*0Sstevel@tonic-gateNormally, the following code will result in two calls to C<pi>: 734*0Sstevel@tonic-gate 735*0Sstevel@tonic-gate $x = pi(); 736*0Sstevel@tonic-gate ($y) = pi(); 737*0Sstevel@tonic-gate $z = pi(); 738*0Sstevel@tonic-gate 739*0Sstevel@tonic-gateThe first call caches the value C<3> in the scalar cache; the second 740*0Sstevel@tonic-gatecaches the list C<(3)> in the list cache. The third call doesn't call 741*0Sstevel@tonic-gatethe real C<pi> function; it gets the value from the scalar cache. 742*0Sstevel@tonic-gate 743*0Sstevel@tonic-gateObviously, the second call to C<pi> is a waste of time, and storing 744*0Sstevel@tonic-gateits return value is a waste of space. Specifying C<LIST_CACHE =E<gt> 745*0Sstevel@tonic-gateMERGE> will make C<memoize> use the same cache for scalar and list 746*0Sstevel@tonic-gatecontext return values, so that the second call uses the scalar cache 747*0Sstevel@tonic-gatethat was populated by the first call. C<pi> ends up being called only 748*0Sstevel@tonic-gateonce, and both subsequent calls return C<3> from the cache, regardless 749*0Sstevel@tonic-gateof the calling context. 750*0Sstevel@tonic-gate 751*0Sstevel@tonic-gateAnother use for C<MERGE> is when you want both kinds of return values 752*0Sstevel@tonic-gatestored in the same disk file; this saves you from having to deal with 753*0Sstevel@tonic-gatetwo disk files instead of one. You can use a normalizer function to 754*0Sstevel@tonic-gatekeep the two sets of return values separate. For example: 755*0Sstevel@tonic-gate 756*0Sstevel@tonic-gate tie my %cache => 'MLDBM', 'DB_File', $filename, ...; 757*0Sstevel@tonic-gate 758*0Sstevel@tonic-gate memoize 'myfunc', 759*0Sstevel@tonic-gate NORMALIZER => 'n', 760*0Sstevel@tonic-gate SCALAR_CACHE => [HASH => \%cache], 761*0Sstevel@tonic-gate LIST_CACHE => MERGE, 762*0Sstevel@tonic-gate ; 763*0Sstevel@tonic-gate 764*0Sstevel@tonic-gate sub n { 765*0Sstevel@tonic-gate my $context = wantarray() ? 'L' : 'S'; 766*0Sstevel@tonic-gate # ... now compute the hash key from the arguments ... 767*0Sstevel@tonic-gate $hashkey = "$context:$hashkey"; 768*0Sstevel@tonic-gate } 769*0Sstevel@tonic-gate 770*0Sstevel@tonic-gateThis normalizer function will store scalar context return values in 771*0Sstevel@tonic-gatethe disk file under keys that begin with C<S:>, and list context 772*0Sstevel@tonic-gatereturn values under keys that begin with C<L:>. 773*0Sstevel@tonic-gate 774*0Sstevel@tonic-gate=back 775*0Sstevel@tonic-gate 776*0Sstevel@tonic-gate=head1 OTHER FACILITIES 777*0Sstevel@tonic-gate 778*0Sstevel@tonic-gate=head2 C<unmemoize> 779*0Sstevel@tonic-gate 780*0Sstevel@tonic-gateThere's an C<unmemoize> function that you can import if you want to. 781*0Sstevel@tonic-gateWhy would you want to? Here's an example: Suppose you have your cache 782*0Sstevel@tonic-gatetied to a DBM file, and you want to make sure that the cache is 783*0Sstevel@tonic-gatewritten out to disk if someone interrupts the program. If the program 784*0Sstevel@tonic-gateexits normally, this will happen anyway, but if someone types 785*0Sstevel@tonic-gatecontrol-C or something then the program will terminate immediately 786*0Sstevel@tonic-gatewithout synchronizing the database. So what you can do instead is 787*0Sstevel@tonic-gate 788*0Sstevel@tonic-gate $SIG{INT} = sub { unmemoize 'function' }; 789*0Sstevel@tonic-gate 790*0Sstevel@tonic-gateC<unmemoize> accepts a reference to, or the name of a previously 791*0Sstevel@tonic-gatememoized function, and undoes whatever it did to provide the memoized 792*0Sstevel@tonic-gateversion in the first place, including making the name refer to the 793*0Sstevel@tonic-gateunmemoized version if appropriate. It returns a reference to the 794*0Sstevel@tonic-gateunmemoized version of the function. 795*0Sstevel@tonic-gate 796*0Sstevel@tonic-gateIf you ask it to unmemoize a function that was never memoized, it 797*0Sstevel@tonic-gatecroaks. 798*0Sstevel@tonic-gate 799*0Sstevel@tonic-gate=head2 C<flush_cache> 800*0Sstevel@tonic-gate 801*0Sstevel@tonic-gateC<flush_cache(function)> will flush out the caches, discarding I<all> 802*0Sstevel@tonic-gatethe cached data. The argument may be a function name or a reference 803*0Sstevel@tonic-gateto a function. For finer control over when data is discarded or 804*0Sstevel@tonic-gateexpired, see the documentation for C<Memoize::Expire>, included in 805*0Sstevel@tonic-gatethis package. 806*0Sstevel@tonic-gate 807*0Sstevel@tonic-gateNote that if the cache is a tied hash, C<flush_cache> will attempt to 808*0Sstevel@tonic-gateinvoke the C<CLEAR> method on the hash. If there is no C<CLEAR> 809*0Sstevel@tonic-gatemethod, this will cause a run-time error. 810*0Sstevel@tonic-gate 811*0Sstevel@tonic-gateAn alternative approach to cache flushing is to use the C<HASH> option 812*0Sstevel@tonic-gate(see above) to request that C<Memoize> use a particular hash variable 813*0Sstevel@tonic-gateas its cache. Then you can examine or modify the hash at any time in 814*0Sstevel@tonic-gateany way you desire. You may flush the cache by using C<%hash = ()>. 815*0Sstevel@tonic-gate 816*0Sstevel@tonic-gate=head1 CAVEATS 817*0Sstevel@tonic-gate 818*0Sstevel@tonic-gateMemoization is not a cure-all: 819*0Sstevel@tonic-gate 820*0Sstevel@tonic-gate=over 4 821*0Sstevel@tonic-gate 822*0Sstevel@tonic-gate=item * 823*0Sstevel@tonic-gate 824*0Sstevel@tonic-gateDo not memoize a function whose behavior depends on program 825*0Sstevel@tonic-gatestate other than its own arguments, such as global variables, the time 826*0Sstevel@tonic-gateof day, or file input. These functions will not produce correct 827*0Sstevel@tonic-gateresults when memoized. For a particularly easy example: 828*0Sstevel@tonic-gate 829*0Sstevel@tonic-gate sub f { 830*0Sstevel@tonic-gate time; 831*0Sstevel@tonic-gate } 832*0Sstevel@tonic-gate 833*0Sstevel@tonic-gateThis function takes no arguments, and as far as C<Memoize> is 834*0Sstevel@tonic-gateconcerned, it always returns the same result. C<Memoize> is wrong, of 835*0Sstevel@tonic-gatecourse, and the memoized version of this function will call C<time> once 836*0Sstevel@tonic-gateto get the current time, and it will return that same time 837*0Sstevel@tonic-gateevery time you call it after that. 838*0Sstevel@tonic-gate 839*0Sstevel@tonic-gate=item * 840*0Sstevel@tonic-gate 841*0Sstevel@tonic-gateDo not memoize a function with side effects. 842*0Sstevel@tonic-gate 843*0Sstevel@tonic-gate sub f { 844*0Sstevel@tonic-gate my ($a, $b) = @_; 845*0Sstevel@tonic-gate my $s = $a + $b; 846*0Sstevel@tonic-gate print "$a + $b = $s.\n"; 847*0Sstevel@tonic-gate } 848*0Sstevel@tonic-gate 849*0Sstevel@tonic-gateThis function accepts two arguments, adds them, and prints their sum. 850*0Sstevel@tonic-gateIts return value is the numuber of characters it printed, but you 851*0Sstevel@tonic-gateprobably didn't care about that. But C<Memoize> doesn't understand 852*0Sstevel@tonic-gatethat. If you memoize this function, you will get the result you 853*0Sstevel@tonic-gateexpect the first time you ask it to print the sum of 2 and 3, but 854*0Sstevel@tonic-gatesubsequent calls will return 1 (the return value of 855*0Sstevel@tonic-gateC<print>) without actually printing anything. 856*0Sstevel@tonic-gate 857*0Sstevel@tonic-gate=item * 858*0Sstevel@tonic-gate 859*0Sstevel@tonic-gateDo not memoize a function that returns a data structure that is 860*0Sstevel@tonic-gatemodified by its caller. 861*0Sstevel@tonic-gate 862*0Sstevel@tonic-gateConsider these functions: C<getusers> returns a list of users somehow, 863*0Sstevel@tonic-gateand then C<main> throws away the first user on the list and prints the 864*0Sstevel@tonic-gaterest: 865*0Sstevel@tonic-gate 866*0Sstevel@tonic-gate sub main { 867*0Sstevel@tonic-gate my $userlist = getusers(); 868*0Sstevel@tonic-gate shift @$userlist; 869*0Sstevel@tonic-gate foreach $u (@$userlist) { 870*0Sstevel@tonic-gate print "User $u\n"; 871*0Sstevel@tonic-gate } 872*0Sstevel@tonic-gate } 873*0Sstevel@tonic-gate 874*0Sstevel@tonic-gate sub getusers { 875*0Sstevel@tonic-gate my @users; 876*0Sstevel@tonic-gate # Do something to get a list of users; 877*0Sstevel@tonic-gate \@users; # Return reference to list. 878*0Sstevel@tonic-gate } 879*0Sstevel@tonic-gate 880*0Sstevel@tonic-gateIf you memoize C<getusers> here, it will work right exactly once. The 881*0Sstevel@tonic-gatereference to the users list will be stored in the memo table. C<main> 882*0Sstevel@tonic-gatewill discard the first element from the referenced list. The next 883*0Sstevel@tonic-gatetime you invoke C<main>, C<Memoize> will not call C<getusers>; it will 884*0Sstevel@tonic-gatejust return the same reference to the same list it got last time. But 885*0Sstevel@tonic-gatethis time the list has already had its head removed; C<main> will 886*0Sstevel@tonic-gateerroneously remove another element from it. The list will get shorter 887*0Sstevel@tonic-gateand shorter every time you call C<main>. 888*0Sstevel@tonic-gate 889*0Sstevel@tonic-gateSimilarly, this: 890*0Sstevel@tonic-gate 891*0Sstevel@tonic-gate $u1 = getusers(); 892*0Sstevel@tonic-gate $u2 = getusers(); 893*0Sstevel@tonic-gate pop @$u1; 894*0Sstevel@tonic-gate 895*0Sstevel@tonic-gatewill modify $u2 as well as $u1, because both variables are references 896*0Sstevel@tonic-gateto the same array. Had C<getusers> not been memoized, $u1 and $u2 897*0Sstevel@tonic-gatewould have referred to different arrays. 898*0Sstevel@tonic-gate 899*0Sstevel@tonic-gate=item * 900*0Sstevel@tonic-gate 901*0Sstevel@tonic-gateDo not memoize a very simple function. 902*0Sstevel@tonic-gate 903*0Sstevel@tonic-gateRecently someone mentioned to me that the Memoize module made his 904*0Sstevel@tonic-gateprogram run slower instead of faster. It turned out that he was 905*0Sstevel@tonic-gatememoizing the following function: 906*0Sstevel@tonic-gate 907*0Sstevel@tonic-gate sub square { 908*0Sstevel@tonic-gate $_[0] * $_[0]; 909*0Sstevel@tonic-gate } 910*0Sstevel@tonic-gate 911*0Sstevel@tonic-gateI pointed out that C<Memoize> uses a hash, and that looking up a 912*0Sstevel@tonic-gatenumber in the hash is necessarily going to take a lot longer than a 913*0Sstevel@tonic-gatesingle multiplication. There really is no way to speed up the 914*0Sstevel@tonic-gateC<square> function. 915*0Sstevel@tonic-gate 916*0Sstevel@tonic-gateMemoization is not magical. 917*0Sstevel@tonic-gate 918*0Sstevel@tonic-gate=back 919*0Sstevel@tonic-gate 920*0Sstevel@tonic-gate=head1 PERSISTENT CACHE SUPPORT 921*0Sstevel@tonic-gate 922*0Sstevel@tonic-gateYou can tie the cache tables to any sort of tied hash that you want 923*0Sstevel@tonic-gateto, as long as it supports C<TIEHASH>, C<FETCH>, C<STORE>, and 924*0Sstevel@tonic-gateC<EXISTS>. For example, 925*0Sstevel@tonic-gate 926*0Sstevel@tonic-gate tie my %cache => 'GDBM_File', $filename, O_RDWR|O_CREAT, 0666; 927*0Sstevel@tonic-gate memoize 'function', SCALAR_CACHE => [HASH => \%cache]; 928*0Sstevel@tonic-gate 929*0Sstevel@tonic-gateworks just fine. For some storage methods, you need a little glue. 930*0Sstevel@tonic-gate 931*0Sstevel@tonic-gateC<SDBM_File> doesn't supply an C<EXISTS> method, so included in this 932*0Sstevel@tonic-gatepackage is a glue module called C<Memoize::SDBM_File> which does 933*0Sstevel@tonic-gateprovide one. Use this instead of plain C<SDBM_File> to store your 934*0Sstevel@tonic-gatecache table on disk in an C<SDBM_File> database: 935*0Sstevel@tonic-gate 936*0Sstevel@tonic-gate tie my %cache => 'Memoize::SDBM_File', $filename, O_RDWR|O_CREAT, 0666; 937*0Sstevel@tonic-gate memoize 'function', SCALAR_CACHE => [HASH => \%cache]; 938*0Sstevel@tonic-gate 939*0Sstevel@tonic-gateC<NDBM_File> has the same problem and the same solution. (Use 940*0Sstevel@tonic-gateC<Memoize::NDBM_File instead of plain NDBM_File.>) 941*0Sstevel@tonic-gate 942*0Sstevel@tonic-gateC<Storable> isn't a tied hash class at all. You can use it to store a 943*0Sstevel@tonic-gatehash to disk and retrieve it again, but you can't modify the hash while 944*0Sstevel@tonic-gateit's on the disk. So if you want to store your cache table in a 945*0Sstevel@tonic-gateC<Storable> database, use C<Memoize::Storable>, which puts a hashlike 946*0Sstevel@tonic-gatefront-end onto C<Storable>. The hash table is actually kept in 947*0Sstevel@tonic-gatememory, and is loaded from your C<Storable> file at the time you 948*0Sstevel@tonic-gatememoize the function, and stored back at the time you unmemoize the 949*0Sstevel@tonic-gatefunction (or when your program exits): 950*0Sstevel@tonic-gate 951*0Sstevel@tonic-gate tie my %cache => 'Memoize::Storable', $filename; 952*0Sstevel@tonic-gate memoize 'function', SCALAR_CACHE => [HASH => \%cache]; 953*0Sstevel@tonic-gate 954*0Sstevel@tonic-gate tie my %cache => 'Memoize::Storable', $filename, 'nstore'; 955*0Sstevel@tonic-gate memoize 'function', SCALAR_CACHE => [HASH => \%cache]; 956*0Sstevel@tonic-gate 957*0Sstevel@tonic-gateInclude the `nstore' option to have the C<Storable> database written 958*0Sstevel@tonic-gatein `network order'. (See L<Storable> for more details about this.) 959*0Sstevel@tonic-gate 960*0Sstevel@tonic-gateThe C<flush_cache()> function will raise a run-time error unless the 961*0Sstevel@tonic-gatetied package provides a C<CLEAR> method. 962*0Sstevel@tonic-gate 963*0Sstevel@tonic-gate=head1 EXPIRATION SUPPORT 964*0Sstevel@tonic-gate 965*0Sstevel@tonic-gateSee Memoize::Expire, which is a plug-in module that adds expiration 966*0Sstevel@tonic-gatefunctionality to Memoize. If you don't like the kinds of policies 967*0Sstevel@tonic-gatethat Memoize::Expire implements, it is easy to write your own plug-in 968*0Sstevel@tonic-gatemodule to implement whatever policy you desire. Memoize comes with 969*0Sstevel@tonic-gateseveral examples. An expiration manager that implements a LRU policy 970*0Sstevel@tonic-gateis available on CPAN as Memoize::ExpireLRU. 971*0Sstevel@tonic-gate 972*0Sstevel@tonic-gate=head1 BUGS 973*0Sstevel@tonic-gate 974*0Sstevel@tonic-gateThe test suite is much better, but always needs improvement. 975*0Sstevel@tonic-gate 976*0Sstevel@tonic-gateThere is some problem with the way C<goto &f> works under threaded 977*0Sstevel@tonic-gatePerl, perhaps because of the lexical scoping of C<@_>. This is a bug 978*0Sstevel@tonic-gatein Perl, and until it is resolved, memoized functions will see a 979*0Sstevel@tonic-gateslightly different C<caller()> and will perform a little more slowly 980*0Sstevel@tonic-gateon threaded perls than unthreaded perls. 981*0Sstevel@tonic-gate 982*0Sstevel@tonic-gateSome versions of C<DB_File> won't let you store data under a key of 983*0Sstevel@tonic-gatelength 0. That means that if you have a function C<f> which you 984*0Sstevel@tonic-gatememoized and the cache is in a C<DB_File> database, then the value of 985*0Sstevel@tonic-gateC<f()> (C<f> called with no arguments) will not be memoized. If this 986*0Sstevel@tonic-gateis a big problem, you can supply a normalizer function that prepends 987*0Sstevel@tonic-gateC<"x"> to every key. 988*0Sstevel@tonic-gate 989*0Sstevel@tonic-gate=head1 MAILING LIST 990*0Sstevel@tonic-gate 991*0Sstevel@tonic-gateTo join a very low-traffic mailing list for announcements about 992*0Sstevel@tonic-gateC<Memoize>, send an empty note to C<mjd-perl-memoize-request@plover.com>. 993*0Sstevel@tonic-gate 994*0Sstevel@tonic-gate=head1 AUTHOR 995*0Sstevel@tonic-gate 996*0Sstevel@tonic-gateMark-Jason Dominus (C<mjd-perl-memoize+@plover.com>), Plover Systems co. 997*0Sstevel@tonic-gate 998*0Sstevel@tonic-gateSee the C<Memoize.pm> Page at http://www.plover.com/~mjd/perl/Memoize/ 999*0Sstevel@tonic-gatefor news and upgrades. Near this page, at 1000*0Sstevel@tonic-gatehttp://www.plover.com/~mjd/perl/MiniMemoize/ there is an article about 1001*0Sstevel@tonic-gatememoization and about the internals of Memoize that appeared in The 1002*0Sstevel@tonic-gatePerl Journal, issue #13. (This article is also included in the 1003*0Sstevel@tonic-gateMemoize distribution as `article.html'.) 1004*0Sstevel@tonic-gate 1005*0Sstevel@tonic-gateMy upcoming book will discuss memoization (and many other fascinating 1006*0Sstevel@tonic-gatetopics) in tremendous detail. It will be published by Morgan Kaufmann 1007*0Sstevel@tonic-gatein 2002, possibly under the title I<Perl Advanced Techniques 1008*0Sstevel@tonic-gateHandbook>. It will also be available on-line for free. For more 1009*0Sstevel@tonic-gateinformation, visit http://perl.plover.com/book/ . 1010*0Sstevel@tonic-gate 1011*0Sstevel@tonic-gateTo join a mailing list for announcements about C<Memoize>, send an 1012*0Sstevel@tonic-gateempty message to C<mjd-perl-memoize-request@plover.com>. This mailing 1013*0Sstevel@tonic-gatelist is for announcements only and has extremely low traffic---about 1014*0Sstevel@tonic-gatetwo messages per year. 1015*0Sstevel@tonic-gate 1016*0Sstevel@tonic-gate=head1 COPYRIGHT AND LICENSE 1017*0Sstevel@tonic-gate 1018*0Sstevel@tonic-gateCopyright 1998, 1999, 2000, 2001 by Mark Jason Dominus 1019*0Sstevel@tonic-gate 1020*0Sstevel@tonic-gateThis library is free software; you may redistribute it and/or modify 1021*0Sstevel@tonic-gateit under the same terms as Perl itself. 1022*0Sstevel@tonic-gate 1023*0Sstevel@tonic-gate=head1 THANK YOU 1024*0Sstevel@tonic-gate 1025*0Sstevel@tonic-gateMany thanks to Jonathan Roy for bug reports and suggestions, to 1026*0Sstevel@tonic-gateMichael Schwern for other bug reports and patches, to Mike Cariaso for 1027*0Sstevel@tonic-gatehelping me to figure out the Right Thing to Do About Expiration, to 1028*0Sstevel@tonic-gateJoshua Gerth, Joshua Chamas, Jonathan Roy (again), Mark D. Anderson, 1029*0Sstevel@tonic-gateand Andrew Johnson for more suggestions about expiration, to Brent 1030*0Sstevel@tonic-gatePowers for the Memoize::ExpireLRU module, to Ariel Scolnicov for 1031*0Sstevel@tonic-gatedelightful messages about the Fibonacci function, to Dion Almaer for 1032*0Sstevel@tonic-gatethought-provoking suggestions about the default normalizer, to Walt 1033*0Sstevel@tonic-gateMankowski and Kurt Starsinic for much help investigating problems 1034*0Sstevel@tonic-gateunder threaded Perl, to Alex Dudkevich for reporting the bug in 1035*0Sstevel@tonic-gateprototyped functions and for checking my patch, to Tony Bass for many 1036*0Sstevel@tonic-gatehelpful suggestions, to Jonathan Roy (again) for finding a use for 1037*0Sstevel@tonic-gateC<unmemoize()>, to Philippe Verdret for enlightening discussion of 1038*0Sstevel@tonic-gateC<Hook::PrePostCall>, to Nat Torkington for advice I ignored, to Chris 1039*0Sstevel@tonic-gateNandor for portability advice, to Randal Schwartz for suggesting the 1040*0Sstevel@tonic-gate'C<flush_cache> function, and to Jenda Krynicky for being a light in 1041*0Sstevel@tonic-gatethe world. 1042*0Sstevel@tonic-gate 1043*0Sstevel@tonic-gateSpecial thanks to Jarkko Hietaniemi, the 5.8.0 pumpking, for including 1044*0Sstevel@tonic-gatethis module in the core and for his patient and helpful guidance 1045*0Sstevel@tonic-gateduring the integration process. 1046*0Sstevel@tonic-gate 1047*0Sstevel@tonic-gate=cut 1048