xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/lib/Memoize.pm (revision 0:68f95e015346)
1*0Sstevel@tonic-gate# -*- mode: perl; perl-indent-level: 2; -*-
2*0Sstevel@tonic-gate# Memoize.pm
3*0Sstevel@tonic-gate#
4*0Sstevel@tonic-gate# Transparent memoization of idempotent functions
5*0Sstevel@tonic-gate#
6*0Sstevel@tonic-gate# Copyright 1998, 1999, 2000, 2001 M-J. Dominus.
7*0Sstevel@tonic-gate# You may copy and distribute this program under the
8*0Sstevel@tonic-gate# same terms as Perl itself.  If in doubt,
9*0Sstevel@tonic-gate# write to mjd-perl-memoize+@plover.com for a license.
10*0Sstevel@tonic-gate#
11*0Sstevel@tonic-gate# Version 1.01 $Revision: 1.18 $ $Date: 2001/06/24 17:16:47 $
12*0Sstevel@tonic-gate
13*0Sstevel@tonic-gatepackage Memoize;
14*0Sstevel@tonic-gate$VERSION = '1.01';
15*0Sstevel@tonic-gate
16*0Sstevel@tonic-gate# Compile-time constants
17*0Sstevel@tonic-gatesub SCALAR () { 0 }
18*0Sstevel@tonic-gatesub LIST () { 1 }
19*0Sstevel@tonic-gate
20*0Sstevel@tonic-gate
21*0Sstevel@tonic-gate#
22*0Sstevel@tonic-gate# Usage memoize(functionname/ref,
23*0Sstevel@tonic-gate#               { NORMALIZER => coderef, INSTALL => name,
24*0Sstevel@tonic-gate#                 LIST_CACHE => descriptor, SCALAR_CACHE => descriptor }
25*0Sstevel@tonic-gate#
26*0Sstevel@tonic-gate
27*0Sstevel@tonic-gateuse Carp;
28*0Sstevel@tonic-gateuse Exporter;
29*0Sstevel@tonic-gateuse vars qw($DEBUG);
30*0Sstevel@tonic-gateuse Config;                     # Dammit.
31*0Sstevel@tonic-gate@ISA = qw(Exporter);
32*0Sstevel@tonic-gate@EXPORT = qw(memoize);
33*0Sstevel@tonic-gate@EXPORT_OK = qw(unmemoize flush_cache);
34*0Sstevel@tonic-gateuse strict;
35*0Sstevel@tonic-gate
36*0Sstevel@tonic-gatemy %memotable;
37*0Sstevel@tonic-gatemy %revmemotable;
38*0Sstevel@tonic-gatemy @CONTEXT_TAGS = qw(MERGE TIE MEMORY FAULT HASH);
39*0Sstevel@tonic-gatemy %IS_CACHE_TAG = map {($_ => 1)} @CONTEXT_TAGS;
40*0Sstevel@tonic-gate
41*0Sstevel@tonic-gate# Raise an error if the user tries to specify one of thesepackage as a
42*0Sstevel@tonic-gate# tie for LIST_CACHE
43*0Sstevel@tonic-gate
44*0Sstevel@tonic-gatemy %scalar_only = map {($_ => 1)} qw(DB_File GDBM_File SDBM_File ODBM_File NDBM_File);
45*0Sstevel@tonic-gate
46*0Sstevel@tonic-gatesub memoize {
47*0Sstevel@tonic-gate  my $fn = shift;
48*0Sstevel@tonic-gate  my %options = @_;
49*0Sstevel@tonic-gate  my $options = \%options;
50*0Sstevel@tonic-gate
51*0Sstevel@tonic-gate  unless (defined($fn) &&
52*0Sstevel@tonic-gate	  (ref $fn eq 'CODE' || ref $fn eq '')) {
53*0Sstevel@tonic-gate    croak "Usage: memoize 'functionname'|coderef {OPTIONS}";
54*0Sstevel@tonic-gate  }
55*0Sstevel@tonic-gate
56*0Sstevel@tonic-gate  my $uppack = caller;		# TCL me Elmo!
57*0Sstevel@tonic-gate  my $cref;			# Code reference to original function
58*0Sstevel@tonic-gate  my $name = (ref $fn ? undef : $fn);
59*0Sstevel@tonic-gate
60*0Sstevel@tonic-gate  # Convert function names to code references
61*0Sstevel@tonic-gate  $cref = &_make_cref($fn, $uppack);
62*0Sstevel@tonic-gate
63*0Sstevel@tonic-gate  # Locate function prototype, if any
64*0Sstevel@tonic-gate  my $proto = prototype $cref;
65*0Sstevel@tonic-gate  if (defined $proto) { $proto = "($proto)" }
66*0Sstevel@tonic-gate  else { $proto = "" }
67*0Sstevel@tonic-gate
68*0Sstevel@tonic-gate  # I would like to get rid of the eval, but there seems not to be any
69*0Sstevel@tonic-gate  # other way to set the prototype properly.  The switch here for
70*0Sstevel@tonic-gate  # 'usethreads' works around a bug in threadperl having to do with
71*0Sstevel@tonic-gate  # magic goto.  It would be better to fix the bug and use the magic
72*0Sstevel@tonic-gate  # goto version everywhere.
73*0Sstevel@tonic-gate  my $wrapper =
74*0Sstevel@tonic-gate      $Config{usethreads}
75*0Sstevel@tonic-gate        ? eval "sub $proto { &_memoizer(\$cref, \@_); }"
76*0Sstevel@tonic-gate        : eval "sub $proto { unshift \@_, \$cref; goto &_memoizer; }";
77*0Sstevel@tonic-gate
78*0Sstevel@tonic-gate  my $normalizer = $options{NORMALIZER};
79*0Sstevel@tonic-gate  if (defined $normalizer  && ! ref $normalizer) {
80*0Sstevel@tonic-gate    $normalizer = _make_cref($normalizer, $uppack);
81*0Sstevel@tonic-gate  }
82*0Sstevel@tonic-gate
83*0Sstevel@tonic-gate  my $install_name;
84*0Sstevel@tonic-gate  if (defined $options->{INSTALL}) {
85*0Sstevel@tonic-gate    # INSTALL => name
86*0Sstevel@tonic-gate    $install_name = $options->{INSTALL};
87*0Sstevel@tonic-gate  } elsif (! exists $options->{INSTALL}) {
88*0Sstevel@tonic-gate    # No INSTALL option provided; use original name if possible
89*0Sstevel@tonic-gate    $install_name = $name;
90*0Sstevel@tonic-gate  } else {
91*0Sstevel@tonic-gate    # INSTALL => undef  means don't install
92*0Sstevel@tonic-gate  }
93*0Sstevel@tonic-gate
94*0Sstevel@tonic-gate  if (defined $install_name) {
95*0Sstevel@tonic-gate    $install_name = $uppack . '::' . $install_name
96*0Sstevel@tonic-gate	unless $install_name =~ /::/;
97*0Sstevel@tonic-gate    no strict;
98*0Sstevel@tonic-gate    local($^W) = 0;	       # ``Subroutine $install_name redefined at ...''
99*0Sstevel@tonic-gate    *{$install_name} = $wrapper; # Install memoized version
100*0Sstevel@tonic-gate  }
101*0Sstevel@tonic-gate
102*0Sstevel@tonic-gate  $revmemotable{$wrapper} = "" . $cref; # Turn code ref into hash key
103*0Sstevel@tonic-gate
104*0Sstevel@tonic-gate  # These will be the caches
105*0Sstevel@tonic-gate  my %caches;
106*0Sstevel@tonic-gate  for my $context (qw(SCALAR LIST)) {
107*0Sstevel@tonic-gate    # suppress subsequent 'uninitialized value' warnings
108*0Sstevel@tonic-gate    $options{"${context}_CACHE"} ||= '';
109*0Sstevel@tonic-gate
110*0Sstevel@tonic-gate    my $cache_opt = $options{"${context}_CACHE"};
111*0Sstevel@tonic-gate    my @cache_opt_args;
112*0Sstevel@tonic-gate    if (ref $cache_opt) {
113*0Sstevel@tonic-gate      @cache_opt_args = @$cache_opt;
114*0Sstevel@tonic-gate      $cache_opt = shift @cache_opt_args;
115*0Sstevel@tonic-gate    }
116*0Sstevel@tonic-gate    if ($cache_opt eq 'FAULT') { # no cache
117*0Sstevel@tonic-gate      $caches{$context} = undef;
118*0Sstevel@tonic-gate    } elsif ($cache_opt eq 'HASH') { # user-supplied hash
119*0Sstevel@tonic-gate      my $cache = $cache_opt_args[0];
120*0Sstevel@tonic-gate      my $package = ref(tied %$cache);
121*0Sstevel@tonic-gate      if ($context eq 'LIST' && $scalar_only{$package}) {
122*0Sstevel@tonic-gate        croak("You can't use $package for LIST_CACHE because it can only store scalars");
123*0Sstevel@tonic-gate      }
124*0Sstevel@tonic-gate      $caches{$context} = $cache;
125*0Sstevel@tonic-gate    } elsif ($cache_opt eq '' ||  $IS_CACHE_TAG{$cache_opt}) {
126*0Sstevel@tonic-gate      # default is that we make up an in-memory hash
127*0Sstevel@tonic-gate      $caches{$context} = {};
128*0Sstevel@tonic-gate      # (this might get tied later, or MERGEd away)
129*0Sstevel@tonic-gate    } else {
130*0Sstevel@tonic-gate      croak "Unrecognized option to `${context}_CACHE': `$cache_opt' should be one of (@CONTEXT_TAGS); aborting";
131*0Sstevel@tonic-gate    }
132*0Sstevel@tonic-gate  }
133*0Sstevel@tonic-gate
134*0Sstevel@tonic-gate  # Perhaps I should check here that you didn't supply *both* merge
135*0Sstevel@tonic-gate  # options.  But if you did, it does do something reasonable: They
136*0Sstevel@tonic-gate  # both get merged to the same in-memory hash.
137*0Sstevel@tonic-gate  if ($options{SCALAR_CACHE} eq 'MERGE') {
138*0Sstevel@tonic-gate    $caches{SCALAR} = $caches{LIST};
139*0Sstevel@tonic-gate  } elsif ($options{LIST_CACHE} eq 'MERGE') {
140*0Sstevel@tonic-gate    $caches{LIST} = $caches{SCALAR};
141*0Sstevel@tonic-gate  }
142*0Sstevel@tonic-gate
143*0Sstevel@tonic-gate  # Now deal with the TIE options
144*0Sstevel@tonic-gate  {
145*0Sstevel@tonic-gate    my $context;
146*0Sstevel@tonic-gate    foreach $context (qw(SCALAR LIST)) {
147*0Sstevel@tonic-gate      # If the relevant option wasn't `TIE', this call does nothing.
148*0Sstevel@tonic-gate      _my_tie($context, $caches{$context}, $options);  # Croaks on failure
149*0Sstevel@tonic-gate    }
150*0Sstevel@tonic-gate  }
151*0Sstevel@tonic-gate
152*0Sstevel@tonic-gate  # We should put some more stuff in here eventually.
153*0Sstevel@tonic-gate  # We've been saying that for serveral versions now.
154*0Sstevel@tonic-gate  # And you know what?  More stuff keeps going in!
155*0Sstevel@tonic-gate  $memotable{$cref} =
156*0Sstevel@tonic-gate  {
157*0Sstevel@tonic-gate    O => $options,  # Short keys here for things we need to access frequently
158*0Sstevel@tonic-gate    N => $normalizer,
159*0Sstevel@tonic-gate    U => $cref,
160*0Sstevel@tonic-gate    MEMOIZED => $wrapper,
161*0Sstevel@tonic-gate    PACKAGE => $uppack,
162*0Sstevel@tonic-gate    NAME => $install_name,
163*0Sstevel@tonic-gate    S => $caches{SCALAR},
164*0Sstevel@tonic-gate    L => $caches{LIST},
165*0Sstevel@tonic-gate  };
166*0Sstevel@tonic-gate
167*0Sstevel@tonic-gate  $wrapper			# Return just memoized version
168*0Sstevel@tonic-gate}
169*0Sstevel@tonic-gate
170*0Sstevel@tonic-gate# This function tries to load a tied hash class and tie the hash to it.
171*0Sstevel@tonic-gatesub _my_tie {
172*0Sstevel@tonic-gate  my ($context, $hash, $options) = @_;
173*0Sstevel@tonic-gate  my $fullopt = $options->{"${context}_CACHE"};
174*0Sstevel@tonic-gate
175*0Sstevel@tonic-gate  # We already checked to make sure that this works.
176*0Sstevel@tonic-gate  my $shortopt = (ref $fullopt) ? $fullopt->[0] : $fullopt;
177*0Sstevel@tonic-gate
178*0Sstevel@tonic-gate  return unless defined $shortopt && $shortopt eq 'TIE';
179*0Sstevel@tonic-gate  carp("TIE option to memoize() is deprecated; use HASH instead")
180*0Sstevel@tonic-gate      if $^W;
181*0Sstevel@tonic-gate
182*0Sstevel@tonic-gate  my @args = ref $fullopt ? @$fullopt : ();
183*0Sstevel@tonic-gate  shift @args;
184*0Sstevel@tonic-gate  my $module = shift @args;
185*0Sstevel@tonic-gate  if ($context eq 'LIST' && $scalar_only{$module}) {
186*0Sstevel@tonic-gate    croak("You can't use $module for LIST_CACHE because it can only store scalars");
187*0Sstevel@tonic-gate  }
188*0Sstevel@tonic-gate  my $modulefile = $module . '.pm';
189*0Sstevel@tonic-gate  $modulefile =~ s{::}{/}g;
190*0Sstevel@tonic-gate  eval { require $modulefile };
191*0Sstevel@tonic-gate  if ($@) {
192*0Sstevel@tonic-gate    croak "Memoize: Couldn't load hash tie module `$module': $@; aborting";
193*0Sstevel@tonic-gate  }
194*0Sstevel@tonic-gate  my $rc = (tie %$hash => $module, @args);
195*0Sstevel@tonic-gate  unless ($rc) {
196*0Sstevel@tonic-gate    croak "Memoize: Couldn't tie hash to `$module': $!; aborting";
197*0Sstevel@tonic-gate  }
198*0Sstevel@tonic-gate  1;
199*0Sstevel@tonic-gate}
200*0Sstevel@tonic-gate
201*0Sstevel@tonic-gatesub flush_cache {
202*0Sstevel@tonic-gate  my $func = _make_cref($_[0], scalar caller);
203*0Sstevel@tonic-gate  my $info = $memotable{$revmemotable{$func}};
204*0Sstevel@tonic-gate  die "$func not memoized" unless defined $info;
205*0Sstevel@tonic-gate  for my $context (qw(S L)) {
206*0Sstevel@tonic-gate    my $cache = $info->{$context};
207*0Sstevel@tonic-gate    if (tied %$cache && ! (tied %$cache)->can('CLEAR')) {
208*0Sstevel@tonic-gate      my $funcname = defined($info->{NAME}) ?
209*0Sstevel@tonic-gate          "function $info->{NAME}" : "anonymous function $func";
210*0Sstevel@tonic-gate      my $context = {S => 'scalar', L => 'list'}->{$context};
211*0Sstevel@tonic-gate      croak "Tied cache hash for $context-context $funcname does not support flushing";
212*0Sstevel@tonic-gate    } else {
213*0Sstevel@tonic-gate      %$cache = ();
214*0Sstevel@tonic-gate    }
215*0Sstevel@tonic-gate  }
216*0Sstevel@tonic-gate}
217*0Sstevel@tonic-gate
218*0Sstevel@tonic-gate# This is the function that manages the memo tables.
219*0Sstevel@tonic-gatesub _memoizer {
220*0Sstevel@tonic-gate  my $orig = shift;		# stringized version of ref to original func.
221*0Sstevel@tonic-gate  my $info = $memotable{$orig};
222*0Sstevel@tonic-gate  my $normalizer = $info->{N};
223*0Sstevel@tonic-gate
224*0Sstevel@tonic-gate  my $argstr;
225*0Sstevel@tonic-gate  my $context = (wantarray() ? LIST : SCALAR);
226*0Sstevel@tonic-gate
227*0Sstevel@tonic-gate  if (defined $normalizer) {
228*0Sstevel@tonic-gate    no strict;
229*0Sstevel@tonic-gate    if ($context == SCALAR) {
230*0Sstevel@tonic-gate      $argstr = &{$normalizer}(@_);
231*0Sstevel@tonic-gate    } elsif ($context == LIST) {
232*0Sstevel@tonic-gate      ($argstr) = &{$normalizer}(@_);
233*0Sstevel@tonic-gate    } else {
234*0Sstevel@tonic-gate      croak "Internal error \#41; context was neither LIST nor SCALAR\n";
235*0Sstevel@tonic-gate    }
236*0Sstevel@tonic-gate  } else {                      # Default normalizer
237*0Sstevel@tonic-gate    local $^W = 0;
238*0Sstevel@tonic-gate    $argstr = join chr(28),@_;
239*0Sstevel@tonic-gate  }
240*0Sstevel@tonic-gate
241*0Sstevel@tonic-gate  if ($context == SCALAR) {
242*0Sstevel@tonic-gate    my $cache = $info->{S};
243*0Sstevel@tonic-gate    _crap_out($info->{NAME}, 'scalar') unless $cache;
244*0Sstevel@tonic-gate    if (exists $cache->{$argstr}) {
245*0Sstevel@tonic-gate      return $cache->{$argstr};
246*0Sstevel@tonic-gate    } else {
247*0Sstevel@tonic-gate      my $val = &{$info->{U}}(@_);
248*0Sstevel@tonic-gate      # Scalars are considered to be lists; store appropriately
249*0Sstevel@tonic-gate      if ($info->{O}{SCALAR_CACHE} eq 'MERGE') {
250*0Sstevel@tonic-gate	$cache->{$argstr} = [$val];
251*0Sstevel@tonic-gate      } else {
252*0Sstevel@tonic-gate	$cache->{$argstr} = $val;
253*0Sstevel@tonic-gate      }
254*0Sstevel@tonic-gate      $val;
255*0Sstevel@tonic-gate    }
256*0Sstevel@tonic-gate  } elsif ($context == LIST) {
257*0Sstevel@tonic-gate    my $cache = $info->{L};
258*0Sstevel@tonic-gate    _crap_out($info->{NAME}, 'list') unless $cache;
259*0Sstevel@tonic-gate    if (exists $cache->{$argstr}) {
260*0Sstevel@tonic-gate      my $val = $cache->{$argstr};
261*0Sstevel@tonic-gate      # If LISTCONTEXT=>MERGE, then the function never returns lists,
262*0Sstevel@tonic-gate      # so we have a scalar value cached, so just return it straightaway:
263*0Sstevel@tonic-gate      return ($val) if $info->{O}{LIST_CACHE} eq 'MERGE';
264*0Sstevel@tonic-gate      # Maybe in a later version we can use a faster test.
265*0Sstevel@tonic-gate
266*0Sstevel@tonic-gate      # Otherwise, we cached an array containing the returned list:
267*0Sstevel@tonic-gate      return @$val;
268*0Sstevel@tonic-gate    } else {
269*0Sstevel@tonic-gate      my $q = $cache->{$argstr} = [&{$info->{U}}(@_)];
270*0Sstevel@tonic-gate      @$q;
271*0Sstevel@tonic-gate    }
272*0Sstevel@tonic-gate  } else {
273*0Sstevel@tonic-gate    croak "Internal error \#42; context was neither LIST nor SCALAR\n";
274*0Sstevel@tonic-gate  }
275*0Sstevel@tonic-gate}
276*0Sstevel@tonic-gate
277*0Sstevel@tonic-gatesub unmemoize {
278*0Sstevel@tonic-gate  my $f = shift;
279*0Sstevel@tonic-gate  my $uppack = caller;
280*0Sstevel@tonic-gate  my $cref = _make_cref($f, $uppack);
281*0Sstevel@tonic-gate
282*0Sstevel@tonic-gate  unless (exists $revmemotable{$cref}) {
283*0Sstevel@tonic-gate    croak "Could not unmemoize function `$f', because it was not memoized to begin with";
284*0Sstevel@tonic-gate  }
285*0Sstevel@tonic-gate
286*0Sstevel@tonic-gate  my $tabent = $memotable{$revmemotable{$cref}};
287*0Sstevel@tonic-gate  unless (defined $tabent) {
288*0Sstevel@tonic-gate    croak "Could not figure out how to unmemoize function `$f'";
289*0Sstevel@tonic-gate  }
290*0Sstevel@tonic-gate  my $name = $tabent->{NAME};
291*0Sstevel@tonic-gate  if (defined $name) {
292*0Sstevel@tonic-gate    no strict;
293*0Sstevel@tonic-gate    local($^W) = 0;	       # ``Subroutine $install_name redefined at ...''
294*0Sstevel@tonic-gate    *{$name} = $tabent->{U}; # Replace with original function
295*0Sstevel@tonic-gate  }
296*0Sstevel@tonic-gate  undef $memotable{$revmemotable{$cref}};
297*0Sstevel@tonic-gate  undef $revmemotable{$cref};
298*0Sstevel@tonic-gate
299*0Sstevel@tonic-gate  # This removes the last reference to the (possibly tied) memo tables
300*0Sstevel@tonic-gate  # my ($old_function, $memotabs) = @{$tabent}{'U','S','L'};
301*0Sstevel@tonic-gate  # undef $tabent;
302*0Sstevel@tonic-gate
303*0Sstevel@tonic-gate#  # Untie the memo tables if they were tied.
304*0Sstevel@tonic-gate#  my $i;
305*0Sstevel@tonic-gate#  for $i (0,1) {
306*0Sstevel@tonic-gate#    if (tied %{$memotabs->[$i]}) {
307*0Sstevel@tonic-gate#      warn "Untying hash #$i\n";
308*0Sstevel@tonic-gate#      untie %{$memotabs->[$i]};
309*0Sstevel@tonic-gate#    }
310*0Sstevel@tonic-gate#  }
311*0Sstevel@tonic-gate
312*0Sstevel@tonic-gate  $tabent->{U};
313*0Sstevel@tonic-gate}
314*0Sstevel@tonic-gate
315*0Sstevel@tonic-gatesub _make_cref {
316*0Sstevel@tonic-gate  my $fn = shift;
317*0Sstevel@tonic-gate  my $uppack = shift;
318*0Sstevel@tonic-gate  my $cref;
319*0Sstevel@tonic-gate  my $name;
320*0Sstevel@tonic-gate
321*0Sstevel@tonic-gate  if (ref $fn eq 'CODE') {
322*0Sstevel@tonic-gate    $cref = $fn;
323*0Sstevel@tonic-gate  } elsif (! ref $fn) {
324*0Sstevel@tonic-gate    if ($fn =~ /::/) {
325*0Sstevel@tonic-gate      $name = $fn;
326*0Sstevel@tonic-gate    } else {
327*0Sstevel@tonic-gate      $name = $uppack . '::' . $fn;
328*0Sstevel@tonic-gate    }
329*0Sstevel@tonic-gate    no strict;
330*0Sstevel@tonic-gate    if (defined $name and !defined(&$name)) {
331*0Sstevel@tonic-gate      croak "Cannot operate on nonexistent function `$fn'";
332*0Sstevel@tonic-gate    }
333*0Sstevel@tonic-gate#    $cref = \&$name;
334*0Sstevel@tonic-gate    $cref = *{$name}{CODE};
335*0Sstevel@tonic-gate  } else {
336*0Sstevel@tonic-gate    my $parent = (caller(1))[3]; # Function that called _make_cref
337*0Sstevel@tonic-gate    croak "Usage: argument 1 to `$parent' must be a function name or reference.\n";
338*0Sstevel@tonic-gate  }
339*0Sstevel@tonic-gate  $DEBUG and warn "${name}($fn) => $cref in _make_cref\n";
340*0Sstevel@tonic-gate  $cref;
341*0Sstevel@tonic-gate}
342*0Sstevel@tonic-gate
343*0Sstevel@tonic-gatesub _crap_out {
344*0Sstevel@tonic-gate  my ($funcname, $context) = @_;
345*0Sstevel@tonic-gate  if (defined $funcname) {
346*0Sstevel@tonic-gate    croak "Function `$funcname' called in forbidden $context context; faulting";
347*0Sstevel@tonic-gate  } else {
348*0Sstevel@tonic-gate    croak "Anonymous function called in forbidden $context context; faulting";
349*0Sstevel@tonic-gate  }
350*0Sstevel@tonic-gate}
351*0Sstevel@tonic-gate
352*0Sstevel@tonic-gate1;
353*0Sstevel@tonic-gate
354*0Sstevel@tonic-gate
355*0Sstevel@tonic-gate
356*0Sstevel@tonic-gate
357*0Sstevel@tonic-gate
358*0Sstevel@tonic-gate=head1 NAME
359*0Sstevel@tonic-gate
360*0Sstevel@tonic-gateMemoize - Make functions faster by trading space for time
361*0Sstevel@tonic-gate
362*0Sstevel@tonic-gate=head1 SYNOPSIS
363*0Sstevel@tonic-gate
364*0Sstevel@tonic-gate        # This is the documentation for Memoize 1.01
365*0Sstevel@tonic-gate	use Memoize;
366*0Sstevel@tonic-gate	memoize('slow_function');
367*0Sstevel@tonic-gate	slow_function(arguments);    # Is faster than it was before
368*0Sstevel@tonic-gate
369*0Sstevel@tonic-gate
370*0Sstevel@tonic-gateThis is normally all you need to know.  However, many options are available:
371*0Sstevel@tonic-gate
372*0Sstevel@tonic-gate	memoize(function, options...);
373*0Sstevel@tonic-gate
374*0Sstevel@tonic-gateOptions include:
375*0Sstevel@tonic-gate
376*0Sstevel@tonic-gate	NORMALIZER => function
377*0Sstevel@tonic-gate	INSTALL => new_name
378*0Sstevel@tonic-gate
379*0Sstevel@tonic-gate	SCALAR_CACHE => 'MEMORY'
380*0Sstevel@tonic-gate        SCALAR_CACHE => ['HASH', \%cache_hash ]
381*0Sstevel@tonic-gate	SCALAR_CACHE => 'FAULT'
382*0Sstevel@tonic-gate	SCALAR_CACHE => 'MERGE'
383*0Sstevel@tonic-gate
384*0Sstevel@tonic-gate	LIST_CACHE => 'MEMORY'
385*0Sstevel@tonic-gate        LIST_CACHE => ['HASH', \%cache_hash ]
386*0Sstevel@tonic-gate	LIST_CACHE => 'FAULT'
387*0Sstevel@tonic-gate	LIST_CACHE => 'MERGE'
388*0Sstevel@tonic-gate
389*0Sstevel@tonic-gate=head1 DESCRIPTION
390*0Sstevel@tonic-gate
391*0Sstevel@tonic-gate`Memoizing' a function makes it faster by trading space for time.  It
392*0Sstevel@tonic-gatedoes this by caching the return values of the function in a table.
393*0Sstevel@tonic-gateIf you call the function again with the same arguments, C<memoize>
394*0Sstevel@tonic-gatejumps in and gives you the value out of the table, instead of letting
395*0Sstevel@tonic-gatethe function compute the value all over again.
396*0Sstevel@tonic-gate
397*0Sstevel@tonic-gateHere is an extreme example.  Consider the Fibonacci sequence, defined
398*0Sstevel@tonic-gateby the following function:
399*0Sstevel@tonic-gate
400*0Sstevel@tonic-gate	# Compute Fibonacci numbers
401*0Sstevel@tonic-gate	sub fib {
402*0Sstevel@tonic-gate	  my $n = shift;
403*0Sstevel@tonic-gate	  return $n if $n < 2;
404*0Sstevel@tonic-gate	  fib($n-1) + fib($n-2);
405*0Sstevel@tonic-gate	}
406*0Sstevel@tonic-gate
407*0Sstevel@tonic-gateThis function is very slow.  Why?  To compute fib(14), it first wants
408*0Sstevel@tonic-gateto compute fib(13) and fib(12), and add the results.  But to compute
409*0Sstevel@tonic-gatefib(13), it first has to compute fib(12) and fib(11), and then it
410*0Sstevel@tonic-gatecomes back and computes fib(12) all over again even though the answer
411*0Sstevel@tonic-gateis the same.  And both of the times that it wants to compute fib(12),
412*0Sstevel@tonic-gateit has to compute fib(11) from scratch, and then it has to do it
413*0Sstevel@tonic-gateagain each time it wants to compute fib(13).  This function does so
414*0Sstevel@tonic-gatemuch recomputing of old results that it takes a really long time to
415*0Sstevel@tonic-gaterun---fib(14) makes 1,200 extra recursive calls to itself, to compute
416*0Sstevel@tonic-gateand recompute things that it already computed.
417*0Sstevel@tonic-gate
418*0Sstevel@tonic-gateThis function is a good candidate for memoization.  If you memoize the
419*0Sstevel@tonic-gate`fib' function above, it will compute fib(14) exactly once, the first
420*0Sstevel@tonic-gatetime it needs to, and then save the result in a table.  Then if you
421*0Sstevel@tonic-gateask for fib(14) again, it gives you the result out of the table.
422*0Sstevel@tonic-gateWhile computing fib(14), instead of computing fib(12) twice, it does
423*0Sstevel@tonic-gateit once; the second time it needs the value it gets it from the table.
424*0Sstevel@tonic-gateIt doesn't compute fib(11) four times; it computes it once, getting it
425*0Sstevel@tonic-gatefrom the table the next three times.  Instead of making 1,200
426*0Sstevel@tonic-gaterecursive calls to `fib', it makes 15.  This makes the function about
427*0Sstevel@tonic-gate150 times faster.
428*0Sstevel@tonic-gate
429*0Sstevel@tonic-gateYou could do the memoization yourself, by rewriting the function, like
430*0Sstevel@tonic-gatethis:
431*0Sstevel@tonic-gate
432*0Sstevel@tonic-gate	# Compute Fibonacci numbers, memoized version
433*0Sstevel@tonic-gate	{ my @fib;
434*0Sstevel@tonic-gate  	  sub fib {
435*0Sstevel@tonic-gate	    my $n = shift;
436*0Sstevel@tonic-gate	    return $fib[$n] if defined $fib[$n];
437*0Sstevel@tonic-gate	    return $fib[$n] = $n if $n < 2;
438*0Sstevel@tonic-gate	    $fib[$n] = fib($n-1) + fib($n-2);
439*0Sstevel@tonic-gate	  }
440*0Sstevel@tonic-gate        }
441*0Sstevel@tonic-gate
442*0Sstevel@tonic-gateOr you could use this module, like this:
443*0Sstevel@tonic-gate
444*0Sstevel@tonic-gate	use Memoize;
445*0Sstevel@tonic-gate	memoize('fib');
446*0Sstevel@tonic-gate
447*0Sstevel@tonic-gate	# Rest of the fib function just like the original version.
448*0Sstevel@tonic-gate
449*0Sstevel@tonic-gateThis makes it easy to turn memoizing on and off.
450*0Sstevel@tonic-gate
451*0Sstevel@tonic-gateHere's an even simpler example: I wrote a simple ray tracer; the
452*0Sstevel@tonic-gateprogram would look in a certain direction, figure out what it was
453*0Sstevel@tonic-gatelooking at, and then convert the `color' value (typically a string
454*0Sstevel@tonic-gatelike `red') of that object to a red, green, and blue pixel value, like
455*0Sstevel@tonic-gatethis:
456*0Sstevel@tonic-gate
457*0Sstevel@tonic-gate    for ($direction = 0; $direction < 300; $direction++) {
458*0Sstevel@tonic-gate      # Figure out which object is in direction $direction
459*0Sstevel@tonic-gate      $color = $object->{color};
460*0Sstevel@tonic-gate      ($r, $g, $b) = @{&ColorToRGB($color)};
461*0Sstevel@tonic-gate      ...
462*0Sstevel@tonic-gate    }
463*0Sstevel@tonic-gate
464*0Sstevel@tonic-gateSince there are relatively few objects in a picture, there are only a
465*0Sstevel@tonic-gatefew colors, which get looked up over and over again.  Memoizing
466*0Sstevel@tonic-gateC<ColorToRGB> sped up the program by several percent.
467*0Sstevel@tonic-gate
468*0Sstevel@tonic-gate=head1 DETAILS
469*0Sstevel@tonic-gate
470*0Sstevel@tonic-gateThis module exports exactly one function, C<memoize>.  The rest of the
471*0Sstevel@tonic-gatefunctions in this package are None of Your Business.
472*0Sstevel@tonic-gate
473*0Sstevel@tonic-gateYou should say
474*0Sstevel@tonic-gate
475*0Sstevel@tonic-gate	memoize(function)
476*0Sstevel@tonic-gate
477*0Sstevel@tonic-gatewhere C<function> is the name of the function you want to memoize, or
478*0Sstevel@tonic-gatea reference to it.  C<memoize> returns a reference to the new,
479*0Sstevel@tonic-gatememoized version of the function, or C<undef> on a non-fatal error.
480*0Sstevel@tonic-gateAt present, there are no non-fatal errors, but there might be some in
481*0Sstevel@tonic-gatethe future.
482*0Sstevel@tonic-gate
483*0Sstevel@tonic-gateIf C<function> was the name of a function, then C<memoize> hides the
484*0Sstevel@tonic-gateold version and installs the new memoized version under the old name,
485*0Sstevel@tonic-gateso that C<&function(...)> actually invokes the memoized version.
486*0Sstevel@tonic-gate
487*0Sstevel@tonic-gate=head1 OPTIONS
488*0Sstevel@tonic-gate
489*0Sstevel@tonic-gateThere are some optional options you can pass to C<memoize> to change
490*0Sstevel@tonic-gatethe way it behaves a little.  To supply options, invoke C<memoize>
491*0Sstevel@tonic-gatelike this:
492*0Sstevel@tonic-gate
493*0Sstevel@tonic-gate	memoize(function, NORMALIZER => function,
494*0Sstevel@tonic-gate			  INSTALL => newname,
495*0Sstevel@tonic-gate                          SCALAR_CACHE => option,
496*0Sstevel@tonic-gate	                  LIST_CACHE => option
497*0Sstevel@tonic-gate			 );
498*0Sstevel@tonic-gate
499*0Sstevel@tonic-gateEach of these options is optional; you can include some, all, or none
500*0Sstevel@tonic-gateof them.
501*0Sstevel@tonic-gate
502*0Sstevel@tonic-gate=head2 INSTALL
503*0Sstevel@tonic-gate
504*0Sstevel@tonic-gateIf you supply a function name with C<INSTALL>, memoize will install
505*0Sstevel@tonic-gatethe new, memoized version of the function under the name you give.
506*0Sstevel@tonic-gateFor example,
507*0Sstevel@tonic-gate
508*0Sstevel@tonic-gate	memoize('fib', INSTALL => 'fastfib')
509*0Sstevel@tonic-gate
510*0Sstevel@tonic-gateinstalls the memoized version of C<fib> as C<fastfib>; without the
511*0Sstevel@tonic-gateC<INSTALL> option it would have replaced the old C<fib> with the
512*0Sstevel@tonic-gatememoized version.
513*0Sstevel@tonic-gate
514*0Sstevel@tonic-gateTo prevent C<memoize> from installing the memoized version anywhere, use
515*0Sstevel@tonic-gateC<INSTALL =E<gt> undef>.
516*0Sstevel@tonic-gate
517*0Sstevel@tonic-gate=head2 NORMALIZER
518*0Sstevel@tonic-gate
519*0Sstevel@tonic-gateSuppose your function looks like this:
520*0Sstevel@tonic-gate
521*0Sstevel@tonic-gate	# Typical call: f('aha!', A => 11, B => 12);
522*0Sstevel@tonic-gate	sub f {
523*0Sstevel@tonic-gate	  my $a = shift;
524*0Sstevel@tonic-gate	  my %hash = @_;
525*0Sstevel@tonic-gate	  $hash{B} ||= 2;  # B defaults to 2
526*0Sstevel@tonic-gate	  $hash{C} ||= 7;  # C defaults to 7
527*0Sstevel@tonic-gate
528*0Sstevel@tonic-gate	  # Do something with $a, %hash
529*0Sstevel@tonic-gate	}
530*0Sstevel@tonic-gate
531*0Sstevel@tonic-gateNow, the following calls to your function are all completely equivalent:
532*0Sstevel@tonic-gate
533*0Sstevel@tonic-gate	f(OUCH);
534*0Sstevel@tonic-gate	f(OUCH, B => 2);
535*0Sstevel@tonic-gate	f(OUCH, C => 7);
536*0Sstevel@tonic-gate	f(OUCH, B => 2, C => 7);
537*0Sstevel@tonic-gate	f(OUCH, C => 7, B => 2);
538*0Sstevel@tonic-gate	(etc.)
539*0Sstevel@tonic-gate
540*0Sstevel@tonic-gateHowever, unless you tell C<Memoize> that these calls are equivalent,
541*0Sstevel@tonic-gateit will not know that, and it will compute the values for these
542*0Sstevel@tonic-gateinvocations of your function separately, and store them separately.
543*0Sstevel@tonic-gate
544*0Sstevel@tonic-gateTo prevent this, supply a C<NORMALIZER> function that turns the
545*0Sstevel@tonic-gateprogram arguments into a string in a way that equivalent arguments
546*0Sstevel@tonic-gateturn into the same string.  A C<NORMALIZER> function for C<f> above
547*0Sstevel@tonic-gatemight look like this:
548*0Sstevel@tonic-gate
549*0Sstevel@tonic-gate	sub normalize_f {
550*0Sstevel@tonic-gate	  my $a = shift;
551*0Sstevel@tonic-gate	  my %hash = @_;
552*0Sstevel@tonic-gate	  $hash{B} ||= 2;
553*0Sstevel@tonic-gate	  $hash{C} ||= 7;
554*0Sstevel@tonic-gate
555*0Sstevel@tonic-gate	  join(',', $a, map ($_ => $hash{$_}) sort keys %hash);
556*0Sstevel@tonic-gate	}
557*0Sstevel@tonic-gate
558*0Sstevel@tonic-gateEach of the argument lists above comes out of the C<normalize_f>
559*0Sstevel@tonic-gatefunction looking exactly the same, like this:
560*0Sstevel@tonic-gate
561*0Sstevel@tonic-gate	OUCH,B,2,C,7
562*0Sstevel@tonic-gate
563*0Sstevel@tonic-gateYou would tell C<Memoize> to use this normalizer this way:
564*0Sstevel@tonic-gate
565*0Sstevel@tonic-gate	memoize('f', NORMALIZER => 'normalize_f');
566*0Sstevel@tonic-gate
567*0Sstevel@tonic-gateC<memoize> knows that if the normalized version of the arguments is
568*0Sstevel@tonic-gatethe same for two argument lists, then it can safely look up the value
569*0Sstevel@tonic-gatethat it computed for one argument list and return it as the result of
570*0Sstevel@tonic-gatecalling the function with the other argument list, even if the
571*0Sstevel@tonic-gateargument lists look different.
572*0Sstevel@tonic-gate
573*0Sstevel@tonic-gateThe default normalizer just concatenates the arguments with character
574*0Sstevel@tonic-gate28 in between.  (In ASCII, this is called FS or control-\.)  This
575*0Sstevel@tonic-gatealways works correctly for functions with only one string argument,
576*0Sstevel@tonic-gateand also when the arguments never contain character 28.  However, it
577*0Sstevel@tonic-gatecan confuse certain argument lists:
578*0Sstevel@tonic-gate
579*0Sstevel@tonic-gate	normalizer("a\034", "b")
580*0Sstevel@tonic-gate	normalizer("a", "\034b")
581*0Sstevel@tonic-gate	normalizer("a\034\034b")
582*0Sstevel@tonic-gate
583*0Sstevel@tonic-gatefor example.
584*0Sstevel@tonic-gate
585*0Sstevel@tonic-gateSince hash keys are strings, the default normalizer will not
586*0Sstevel@tonic-gatedistinguish between C<undef> and the empty string.  It also won't work
587*0Sstevel@tonic-gatewhen the function's arguments are references.  For example, consider a
588*0Sstevel@tonic-gatefunction C<g> which gets two arguments: A number, and a reference to
589*0Sstevel@tonic-gatean array of numbers:
590*0Sstevel@tonic-gate
591*0Sstevel@tonic-gate	g(13, [1,2,3,4,5,6,7]);
592*0Sstevel@tonic-gate
593*0Sstevel@tonic-gateThe default normalizer will turn this into something like
594*0Sstevel@tonic-gateC<"13\034ARRAY(0x436c1f)">.  That would be all right, except that a
595*0Sstevel@tonic-gatesubsequent array of numbers might be stored at a different location
596*0Sstevel@tonic-gateeven though it contains the same data.  If this happens, C<Memoize>
597*0Sstevel@tonic-gatewill think that the arguments are different, even though they are
598*0Sstevel@tonic-gateequivalent.  In this case, a normalizer like this is appropriate:
599*0Sstevel@tonic-gate
600*0Sstevel@tonic-gate	sub normalize { join ' ', $_[0], @{$_[1]} }
601*0Sstevel@tonic-gate
602*0Sstevel@tonic-gateFor the example above, this produces the key "13 1 2 3 4 5 6 7".
603*0Sstevel@tonic-gate
604*0Sstevel@tonic-gateAnother use for normalizers is when the function depends on data other
605*0Sstevel@tonic-gatethan those in its arguments.  Suppose you have a function which
606*0Sstevel@tonic-gatereturns a value which depends on the current hour of the day:
607*0Sstevel@tonic-gate
608*0Sstevel@tonic-gate	sub on_duty {
609*0Sstevel@tonic-gate          my ($problem_type) = @_;
610*0Sstevel@tonic-gate	  my $hour = (localtime)[2];
611*0Sstevel@tonic-gate          open my $fh, "$DIR/$problem_type" or die...;
612*0Sstevel@tonic-gate          my $line;
613*0Sstevel@tonic-gate          while ($hour-- > 0)
614*0Sstevel@tonic-gate            $line = <$fh>;
615*0Sstevel@tonic-gate          }
616*0Sstevel@tonic-gate	  return $line;
617*0Sstevel@tonic-gate	}
618*0Sstevel@tonic-gate
619*0Sstevel@tonic-gateAt 10:23, this function generates the 10th line of a data file; at
620*0Sstevel@tonic-gate3:45 PM it generates the 15th line instead.  By default, C<Memoize>
621*0Sstevel@tonic-gatewill only see the $problem_type argument.  To fix this, include the
622*0Sstevel@tonic-gatecurrent hour in the normalizer:
623*0Sstevel@tonic-gate
624*0Sstevel@tonic-gate        sub normalize { join ' ', (localtime)[2], @_ }
625*0Sstevel@tonic-gate
626*0Sstevel@tonic-gateThe calling context of the function (scalar or list context) is
627*0Sstevel@tonic-gatepropagated to the normalizer.  This means that if the memoized
628*0Sstevel@tonic-gatefunction will treat its arguments differently in list context than it
629*0Sstevel@tonic-gatewould in scalar context, you can have the normalizer function select
630*0Sstevel@tonic-gateits behavior based on the results of C<wantarray>.  Even if called in
631*0Sstevel@tonic-gatea list context, a normalizer should still return a single string.
632*0Sstevel@tonic-gate
633*0Sstevel@tonic-gate=head2 C<SCALAR_CACHE>, C<LIST_CACHE>
634*0Sstevel@tonic-gate
635*0Sstevel@tonic-gateNormally, C<Memoize> caches your function's return values into an
636*0Sstevel@tonic-gateordinary Perl hash variable.  However, you might like to have the
637*0Sstevel@tonic-gatevalues cached on the disk, so that they persist from one run of your
638*0Sstevel@tonic-gateprogram to the next, or you might like to associate some other
639*0Sstevel@tonic-gateinteresting semantics with the cached values.
640*0Sstevel@tonic-gate
641*0Sstevel@tonic-gateThere's a slight complication under the hood of C<Memoize>: There are
642*0Sstevel@tonic-gateactually I<two> caches, one for scalar values and one for list values.
643*0Sstevel@tonic-gateWhen your function is called in scalar context, its return value is
644*0Sstevel@tonic-gatecached in one hash, and when your function is called in list context,
645*0Sstevel@tonic-gateits value is cached in the other hash.  You can control the caching
646*0Sstevel@tonic-gatebehavior of both contexts independently with these options.
647*0Sstevel@tonic-gate
648*0Sstevel@tonic-gateThe argument to C<LIST_CACHE> or C<SCALAR_CACHE> must either be one of
649*0Sstevel@tonic-gatethe following four strings:
650*0Sstevel@tonic-gate
651*0Sstevel@tonic-gate	MEMORY
652*0Sstevel@tonic-gate	FAULT
653*0Sstevel@tonic-gate	MERGE
654*0Sstevel@tonic-gate        HASH
655*0Sstevel@tonic-gate
656*0Sstevel@tonic-gateor else it must be a reference to a list whose first element is one of
657*0Sstevel@tonic-gatethese four strings, such as C<[HASH, arguments...]>.
658*0Sstevel@tonic-gate
659*0Sstevel@tonic-gate=over 4
660*0Sstevel@tonic-gate
661*0Sstevel@tonic-gate=item C<MEMORY>
662*0Sstevel@tonic-gate
663*0Sstevel@tonic-gateC<MEMORY> means that return values from the function will be cached in
664*0Sstevel@tonic-gatean ordinary Perl hash variable.  The hash variable will not persist
665*0Sstevel@tonic-gateafter the program exits.  This is the default.
666*0Sstevel@tonic-gate
667*0Sstevel@tonic-gate=item C<HASH>
668*0Sstevel@tonic-gate
669*0Sstevel@tonic-gateC<HASH> allows you to specify that a particular hash that you supply
670*0Sstevel@tonic-gatewill be used as the cache.  You can tie this hash beforehand to give
671*0Sstevel@tonic-gateit any behavior you want.
672*0Sstevel@tonic-gate
673*0Sstevel@tonic-gateA tied hash can have any semantics at all.  It is typically tied to an
674*0Sstevel@tonic-gateon-disk database, so that cached values are stored in the database and
675*0Sstevel@tonic-gateretrieved from it again when needed, and the disk file typically
676*0Sstevel@tonic-gatepersists after your program has exited.  See C<perltie> for more
677*0Sstevel@tonic-gatecomplete details about C<tie>.
678*0Sstevel@tonic-gate
679*0Sstevel@tonic-gateA typical example is:
680*0Sstevel@tonic-gate
681*0Sstevel@tonic-gate        use DB_File;
682*0Sstevel@tonic-gate        tie my %cache => 'DB_File', $filename, O_RDWR|O_CREAT, 0666;
683*0Sstevel@tonic-gate        memoize 'function', SCALAR_CACHE => [HASH => \%cache];
684*0Sstevel@tonic-gate
685*0Sstevel@tonic-gateThis has the effect of storing the cache in a C<DB_File> database
686*0Sstevel@tonic-gatewhose name is in C<$filename>.  The cache will persist after the
687*0Sstevel@tonic-gateprogram has exited.  Next time the program runs, it will find the
688*0Sstevel@tonic-gatecache already populated from the previous run of the program.  Or you
689*0Sstevel@tonic-gatecan forcibly populate the cache by constructing a batch program that
690*0Sstevel@tonic-gateruns in the background and populates the cache file.  Then when you
691*0Sstevel@tonic-gatecome to run your real program the memoized function will be fast
692*0Sstevel@tonic-gatebecause all its results have been precomputed.
693*0Sstevel@tonic-gate
694*0Sstevel@tonic-gate=item C<TIE>
695*0Sstevel@tonic-gate
696*0Sstevel@tonic-gateThis option is no longer supported.  It is still documented only to
697*0Sstevel@tonic-gateaid in the debugging of old programs that use it.  Old programs should
698*0Sstevel@tonic-gatebe converted to use the C<HASH> option instead.
699*0Sstevel@tonic-gate
700*0Sstevel@tonic-gate        memoize ... [TIE, PACKAGE, ARGS...]
701*0Sstevel@tonic-gate
702*0Sstevel@tonic-gateis merely a shortcut for
703*0Sstevel@tonic-gate
704*0Sstevel@tonic-gate        require PACKAGE;
705*0Sstevel@tonic-gate	{ my %cache;
706*0Sstevel@tonic-gate          tie %cache, PACKAGE, ARGS...;
707*0Sstevel@tonic-gate	}
708*0Sstevel@tonic-gate        memoize ... [HASH => \%cache];
709*0Sstevel@tonic-gate
710*0Sstevel@tonic-gate=item C<FAULT>
711*0Sstevel@tonic-gate
712*0Sstevel@tonic-gateC<FAULT> means that you never expect to call the function in scalar
713*0Sstevel@tonic-gate(or list) context, and that if C<Memoize> detects such a call, it
714*0Sstevel@tonic-gateshould abort the program.  The error message is one of
715*0Sstevel@tonic-gate
716*0Sstevel@tonic-gate	`foo' function called in forbidden list context at line ...
717*0Sstevel@tonic-gate	`foo' function called in forbidden scalar context at line ...
718*0Sstevel@tonic-gate
719*0Sstevel@tonic-gate=item C<MERGE>
720*0Sstevel@tonic-gate
721*0Sstevel@tonic-gateC<MERGE> normally means the function does not distinguish between list
722*0Sstevel@tonic-gateand sclar context, and that return values in both contexts should be
723*0Sstevel@tonic-gatestored together.  C<LIST_CACHE =E<gt> MERGE> means that list context
724*0Sstevel@tonic-gatereturn values should be stored in the same hash that is used for
725*0Sstevel@tonic-gatescalar context returns, and C<SCALAR_CACHE =E<gt> MERGE> means the
726*0Sstevel@tonic-gatesame, mutatis mutandis.  It is an error to specify C<MERGE> for both,
727*0Sstevel@tonic-gatebut it probably does something useful.
728*0Sstevel@tonic-gate
729*0Sstevel@tonic-gateConsider this function:
730*0Sstevel@tonic-gate
731*0Sstevel@tonic-gate	sub pi { 3; }
732*0Sstevel@tonic-gate
733*0Sstevel@tonic-gateNormally, the following code will result in two calls to C<pi>:
734*0Sstevel@tonic-gate
735*0Sstevel@tonic-gate    $x = pi();
736*0Sstevel@tonic-gate    ($y) = pi();
737*0Sstevel@tonic-gate    $z = pi();
738*0Sstevel@tonic-gate
739*0Sstevel@tonic-gateThe first call caches the value C<3> in the scalar cache; the second
740*0Sstevel@tonic-gatecaches the list C<(3)> in the list cache.  The third call doesn't call
741*0Sstevel@tonic-gatethe real C<pi> function; it gets the value from the scalar cache.
742*0Sstevel@tonic-gate
743*0Sstevel@tonic-gateObviously, the second call to C<pi> is a waste of time, and storing
744*0Sstevel@tonic-gateits return value is a waste of space.  Specifying C<LIST_CACHE =E<gt>
745*0Sstevel@tonic-gateMERGE> will make C<memoize> use the same cache for scalar and list
746*0Sstevel@tonic-gatecontext return values, so that the second call uses the scalar cache
747*0Sstevel@tonic-gatethat was populated by the first call.  C<pi> ends up being called only
748*0Sstevel@tonic-gateonce, and both subsequent calls return C<3> from the cache, regardless
749*0Sstevel@tonic-gateof the calling context.
750*0Sstevel@tonic-gate
751*0Sstevel@tonic-gateAnother use for C<MERGE> is when you want both kinds of return values
752*0Sstevel@tonic-gatestored in the same disk file; this saves you from having to deal with
753*0Sstevel@tonic-gatetwo disk files instead of one.  You can use a normalizer function to
754*0Sstevel@tonic-gatekeep the two sets of return values separate.  For example:
755*0Sstevel@tonic-gate
756*0Sstevel@tonic-gate        tie my %cache => 'MLDBM', 'DB_File', $filename, ...;
757*0Sstevel@tonic-gate
758*0Sstevel@tonic-gate	memoize 'myfunc',
759*0Sstevel@tonic-gate	  NORMALIZER => 'n',
760*0Sstevel@tonic-gate	  SCALAR_CACHE => [HASH => \%cache],
761*0Sstevel@tonic-gate	  LIST_CACHE => MERGE,
762*0Sstevel@tonic-gate	;
763*0Sstevel@tonic-gate
764*0Sstevel@tonic-gate	sub n {
765*0Sstevel@tonic-gate	  my $context = wantarray() ? 'L' : 'S';
766*0Sstevel@tonic-gate	  # ... now compute the hash key from the arguments ...
767*0Sstevel@tonic-gate	  $hashkey = "$context:$hashkey";
768*0Sstevel@tonic-gate	}
769*0Sstevel@tonic-gate
770*0Sstevel@tonic-gateThis normalizer function will store scalar context return values in
771*0Sstevel@tonic-gatethe disk file under keys that begin with C<S:>, and list context
772*0Sstevel@tonic-gatereturn values under keys that begin with C<L:>.
773*0Sstevel@tonic-gate
774*0Sstevel@tonic-gate=back
775*0Sstevel@tonic-gate
776*0Sstevel@tonic-gate=head1 OTHER FACILITIES
777*0Sstevel@tonic-gate
778*0Sstevel@tonic-gate=head2 C<unmemoize>
779*0Sstevel@tonic-gate
780*0Sstevel@tonic-gateThere's an C<unmemoize> function that you can import if you want to.
781*0Sstevel@tonic-gateWhy would you want to?  Here's an example: Suppose you have your cache
782*0Sstevel@tonic-gatetied to a DBM file, and you want to make sure that the cache is
783*0Sstevel@tonic-gatewritten out to disk if someone interrupts the program.  If the program
784*0Sstevel@tonic-gateexits normally, this will happen anyway, but if someone types
785*0Sstevel@tonic-gatecontrol-C or something then the program will terminate immediately
786*0Sstevel@tonic-gatewithout synchronizing the database.  So what you can do instead is
787*0Sstevel@tonic-gate
788*0Sstevel@tonic-gate    $SIG{INT} = sub { unmemoize 'function' };
789*0Sstevel@tonic-gate
790*0Sstevel@tonic-gateC<unmemoize> accepts a reference to, or the name of a previously
791*0Sstevel@tonic-gatememoized function, and undoes whatever it did to provide the memoized
792*0Sstevel@tonic-gateversion in the first place, including making the name refer to the
793*0Sstevel@tonic-gateunmemoized version if appropriate.  It returns a reference to the
794*0Sstevel@tonic-gateunmemoized version of the function.
795*0Sstevel@tonic-gate
796*0Sstevel@tonic-gateIf you ask it to unmemoize a function that was never memoized, it
797*0Sstevel@tonic-gatecroaks.
798*0Sstevel@tonic-gate
799*0Sstevel@tonic-gate=head2 C<flush_cache>
800*0Sstevel@tonic-gate
801*0Sstevel@tonic-gateC<flush_cache(function)> will flush out the caches, discarding I<all>
802*0Sstevel@tonic-gatethe cached data.  The argument may be a function name or a reference
803*0Sstevel@tonic-gateto a function.  For finer control over when data is discarded or
804*0Sstevel@tonic-gateexpired, see the documentation for C<Memoize::Expire>, included in
805*0Sstevel@tonic-gatethis package.
806*0Sstevel@tonic-gate
807*0Sstevel@tonic-gateNote that if the cache is a tied hash, C<flush_cache> will attempt to
808*0Sstevel@tonic-gateinvoke the C<CLEAR> method on the hash.  If there is no C<CLEAR>
809*0Sstevel@tonic-gatemethod, this will cause a run-time error.
810*0Sstevel@tonic-gate
811*0Sstevel@tonic-gateAn alternative approach to cache flushing is to use the C<HASH> option
812*0Sstevel@tonic-gate(see above) to request that C<Memoize> use a particular hash variable
813*0Sstevel@tonic-gateas its cache.  Then you can examine or modify the hash at any time in
814*0Sstevel@tonic-gateany way you desire.  You may flush the cache by using C<%hash = ()>.
815*0Sstevel@tonic-gate
816*0Sstevel@tonic-gate=head1 CAVEATS
817*0Sstevel@tonic-gate
818*0Sstevel@tonic-gateMemoization is not a cure-all:
819*0Sstevel@tonic-gate
820*0Sstevel@tonic-gate=over 4
821*0Sstevel@tonic-gate
822*0Sstevel@tonic-gate=item *
823*0Sstevel@tonic-gate
824*0Sstevel@tonic-gateDo not memoize a function whose behavior depends on program
825*0Sstevel@tonic-gatestate other than its own arguments, such as global variables, the time
826*0Sstevel@tonic-gateof day, or file input.  These functions will not produce correct
827*0Sstevel@tonic-gateresults when memoized.  For a particularly easy example:
828*0Sstevel@tonic-gate
829*0Sstevel@tonic-gate	sub f {
830*0Sstevel@tonic-gate	  time;
831*0Sstevel@tonic-gate	}
832*0Sstevel@tonic-gate
833*0Sstevel@tonic-gateThis function takes no arguments, and as far as C<Memoize> is
834*0Sstevel@tonic-gateconcerned, it always returns the same result.  C<Memoize> is wrong, of
835*0Sstevel@tonic-gatecourse, and the memoized version of this function will call C<time> once
836*0Sstevel@tonic-gateto get the current time, and it will return that same time
837*0Sstevel@tonic-gateevery time you call it after that.
838*0Sstevel@tonic-gate
839*0Sstevel@tonic-gate=item *
840*0Sstevel@tonic-gate
841*0Sstevel@tonic-gateDo not memoize a function with side effects.
842*0Sstevel@tonic-gate
843*0Sstevel@tonic-gate	sub f {
844*0Sstevel@tonic-gate	  my ($a, $b) = @_;
845*0Sstevel@tonic-gate          my $s = $a + $b;
846*0Sstevel@tonic-gate	  print "$a + $b = $s.\n";
847*0Sstevel@tonic-gate	}
848*0Sstevel@tonic-gate
849*0Sstevel@tonic-gateThis function accepts two arguments, adds them, and prints their sum.
850*0Sstevel@tonic-gateIts return value is the numuber of characters it printed, but you
851*0Sstevel@tonic-gateprobably didn't care about that.  But C<Memoize> doesn't understand
852*0Sstevel@tonic-gatethat.  If you memoize this function, you will get the result you
853*0Sstevel@tonic-gateexpect the first time you ask it to print the sum of 2 and 3, but
854*0Sstevel@tonic-gatesubsequent calls will return 1 (the return value of
855*0Sstevel@tonic-gateC<print>) without actually printing anything.
856*0Sstevel@tonic-gate
857*0Sstevel@tonic-gate=item *
858*0Sstevel@tonic-gate
859*0Sstevel@tonic-gateDo not memoize a function that returns a data structure that is
860*0Sstevel@tonic-gatemodified by its caller.
861*0Sstevel@tonic-gate
862*0Sstevel@tonic-gateConsider these functions:  C<getusers> returns a list of users somehow,
863*0Sstevel@tonic-gateand then C<main> throws away the first user on the list and prints the
864*0Sstevel@tonic-gaterest:
865*0Sstevel@tonic-gate
866*0Sstevel@tonic-gate	sub main {
867*0Sstevel@tonic-gate	  my $userlist = getusers();
868*0Sstevel@tonic-gate	  shift @$userlist;
869*0Sstevel@tonic-gate	  foreach $u (@$userlist) {
870*0Sstevel@tonic-gate	    print "User $u\n";
871*0Sstevel@tonic-gate	  }
872*0Sstevel@tonic-gate	}
873*0Sstevel@tonic-gate
874*0Sstevel@tonic-gate	sub getusers {
875*0Sstevel@tonic-gate	  my @users;
876*0Sstevel@tonic-gate	  # Do something to get a list of users;
877*0Sstevel@tonic-gate	  \@users;  # Return reference to list.
878*0Sstevel@tonic-gate	}
879*0Sstevel@tonic-gate
880*0Sstevel@tonic-gateIf you memoize C<getusers> here, it will work right exactly once.  The
881*0Sstevel@tonic-gatereference to the users list will be stored in the memo table.  C<main>
882*0Sstevel@tonic-gatewill discard the first element from the referenced list.  The next
883*0Sstevel@tonic-gatetime you invoke C<main>, C<Memoize> will not call C<getusers>; it will
884*0Sstevel@tonic-gatejust return the same reference to the same list it got last time.  But
885*0Sstevel@tonic-gatethis time the list has already had its head removed; C<main> will
886*0Sstevel@tonic-gateerroneously remove another element from it.  The list will get shorter
887*0Sstevel@tonic-gateand shorter every time you call C<main>.
888*0Sstevel@tonic-gate
889*0Sstevel@tonic-gateSimilarly, this:
890*0Sstevel@tonic-gate
891*0Sstevel@tonic-gate	$u1 = getusers();
892*0Sstevel@tonic-gate	$u2 = getusers();
893*0Sstevel@tonic-gate	pop @$u1;
894*0Sstevel@tonic-gate
895*0Sstevel@tonic-gatewill modify $u2 as well as $u1, because both variables are references
896*0Sstevel@tonic-gateto the same array.  Had C<getusers> not been memoized, $u1 and $u2
897*0Sstevel@tonic-gatewould have referred to different arrays.
898*0Sstevel@tonic-gate
899*0Sstevel@tonic-gate=item *
900*0Sstevel@tonic-gate
901*0Sstevel@tonic-gateDo not memoize a very simple function.
902*0Sstevel@tonic-gate
903*0Sstevel@tonic-gateRecently someone mentioned to me that the Memoize module made his
904*0Sstevel@tonic-gateprogram run slower instead of faster.  It turned out that he was
905*0Sstevel@tonic-gatememoizing the following function:
906*0Sstevel@tonic-gate
907*0Sstevel@tonic-gate    sub square {
908*0Sstevel@tonic-gate      $_[0] * $_[0];
909*0Sstevel@tonic-gate    }
910*0Sstevel@tonic-gate
911*0Sstevel@tonic-gateI pointed out that C<Memoize> uses a hash, and that looking up a
912*0Sstevel@tonic-gatenumber in the hash is necessarily going to take a lot longer than a
913*0Sstevel@tonic-gatesingle multiplication.  There really is no way to speed up the
914*0Sstevel@tonic-gateC<square> function.
915*0Sstevel@tonic-gate
916*0Sstevel@tonic-gateMemoization is not magical.
917*0Sstevel@tonic-gate
918*0Sstevel@tonic-gate=back
919*0Sstevel@tonic-gate
920*0Sstevel@tonic-gate=head1 PERSISTENT CACHE SUPPORT
921*0Sstevel@tonic-gate
922*0Sstevel@tonic-gateYou can tie the cache tables to any sort of tied hash that you want
923*0Sstevel@tonic-gateto, as long as it supports C<TIEHASH>, C<FETCH>, C<STORE>, and
924*0Sstevel@tonic-gateC<EXISTS>.  For example,
925*0Sstevel@tonic-gate
926*0Sstevel@tonic-gate        tie my %cache => 'GDBM_File', $filename, O_RDWR|O_CREAT, 0666;
927*0Sstevel@tonic-gate        memoize 'function', SCALAR_CACHE => [HASH => \%cache];
928*0Sstevel@tonic-gate
929*0Sstevel@tonic-gateworks just fine.  For some storage methods, you need a little glue.
930*0Sstevel@tonic-gate
931*0Sstevel@tonic-gateC<SDBM_File> doesn't supply an C<EXISTS> method, so included in this
932*0Sstevel@tonic-gatepackage is a glue module called C<Memoize::SDBM_File> which does
933*0Sstevel@tonic-gateprovide one.  Use this instead of plain C<SDBM_File> to store your
934*0Sstevel@tonic-gatecache table on disk in an C<SDBM_File> database:
935*0Sstevel@tonic-gate
936*0Sstevel@tonic-gate        tie my %cache => 'Memoize::SDBM_File', $filename, O_RDWR|O_CREAT, 0666;
937*0Sstevel@tonic-gate        memoize 'function', SCALAR_CACHE => [HASH => \%cache];
938*0Sstevel@tonic-gate
939*0Sstevel@tonic-gateC<NDBM_File> has the same problem and the same solution.  (Use
940*0Sstevel@tonic-gateC<Memoize::NDBM_File instead of plain NDBM_File.>)
941*0Sstevel@tonic-gate
942*0Sstevel@tonic-gateC<Storable> isn't a tied hash class at all.  You can use it to store a
943*0Sstevel@tonic-gatehash to disk and retrieve it again, but you can't modify the hash while
944*0Sstevel@tonic-gateit's on the disk.  So if you want to store your cache table in a
945*0Sstevel@tonic-gateC<Storable> database, use C<Memoize::Storable>, which puts a hashlike
946*0Sstevel@tonic-gatefront-end onto C<Storable>.  The hash table is actually kept in
947*0Sstevel@tonic-gatememory, and is loaded from your C<Storable> file at the time you
948*0Sstevel@tonic-gatememoize the function, and stored back at the time you unmemoize the
949*0Sstevel@tonic-gatefunction (or when your program exits):
950*0Sstevel@tonic-gate
951*0Sstevel@tonic-gate        tie my %cache => 'Memoize::Storable', $filename;
952*0Sstevel@tonic-gate	memoize 'function', SCALAR_CACHE => [HASH => \%cache];
953*0Sstevel@tonic-gate
954*0Sstevel@tonic-gate        tie my %cache => 'Memoize::Storable', $filename, 'nstore';
955*0Sstevel@tonic-gate	memoize 'function', SCALAR_CACHE => [HASH => \%cache];
956*0Sstevel@tonic-gate
957*0Sstevel@tonic-gateInclude the `nstore' option to have the C<Storable> database written
958*0Sstevel@tonic-gatein `network order'.  (See L<Storable> for more details about this.)
959*0Sstevel@tonic-gate
960*0Sstevel@tonic-gateThe C<flush_cache()> function will raise a run-time error unless the
961*0Sstevel@tonic-gatetied package provides a C<CLEAR> method.
962*0Sstevel@tonic-gate
963*0Sstevel@tonic-gate=head1 EXPIRATION SUPPORT
964*0Sstevel@tonic-gate
965*0Sstevel@tonic-gateSee Memoize::Expire, which is a plug-in module that adds expiration
966*0Sstevel@tonic-gatefunctionality to Memoize.  If you don't like the kinds of policies
967*0Sstevel@tonic-gatethat Memoize::Expire implements, it is easy to write your own plug-in
968*0Sstevel@tonic-gatemodule to implement whatever policy you desire.  Memoize comes with
969*0Sstevel@tonic-gateseveral examples.  An expiration manager that implements a LRU policy
970*0Sstevel@tonic-gateis available on CPAN as Memoize::ExpireLRU.
971*0Sstevel@tonic-gate
972*0Sstevel@tonic-gate=head1 BUGS
973*0Sstevel@tonic-gate
974*0Sstevel@tonic-gateThe test suite is much better, but always needs improvement.
975*0Sstevel@tonic-gate
976*0Sstevel@tonic-gateThere is some problem with the way C<goto &f> works under threaded
977*0Sstevel@tonic-gatePerl, perhaps because of the lexical scoping of C<@_>.  This is a bug
978*0Sstevel@tonic-gatein Perl, and until it is resolved, memoized functions will see a
979*0Sstevel@tonic-gateslightly different C<caller()> and will perform a little more slowly
980*0Sstevel@tonic-gateon threaded perls than unthreaded perls.
981*0Sstevel@tonic-gate
982*0Sstevel@tonic-gateSome versions of C<DB_File> won't let you store data under a key of
983*0Sstevel@tonic-gatelength 0.  That means that if you have a function C<f> which you
984*0Sstevel@tonic-gatememoized and the cache is in a C<DB_File> database, then the value of
985*0Sstevel@tonic-gateC<f()> (C<f> called with no arguments) will not be memoized.  If this
986*0Sstevel@tonic-gateis a big problem, you can supply a normalizer function that prepends
987*0Sstevel@tonic-gateC<"x"> to every key.
988*0Sstevel@tonic-gate
989*0Sstevel@tonic-gate=head1 MAILING LIST
990*0Sstevel@tonic-gate
991*0Sstevel@tonic-gateTo join a very low-traffic mailing list for announcements about
992*0Sstevel@tonic-gateC<Memoize>, send an empty note to C<mjd-perl-memoize-request@plover.com>.
993*0Sstevel@tonic-gate
994*0Sstevel@tonic-gate=head1 AUTHOR
995*0Sstevel@tonic-gate
996*0Sstevel@tonic-gateMark-Jason Dominus (C<mjd-perl-memoize+@plover.com>), Plover Systems co.
997*0Sstevel@tonic-gate
998*0Sstevel@tonic-gateSee the C<Memoize.pm> Page at http://www.plover.com/~mjd/perl/Memoize/
999*0Sstevel@tonic-gatefor news and upgrades.  Near this page, at
1000*0Sstevel@tonic-gatehttp://www.plover.com/~mjd/perl/MiniMemoize/ there is an article about
1001*0Sstevel@tonic-gatememoization and about the internals of Memoize that appeared in The
1002*0Sstevel@tonic-gatePerl Journal, issue #13.  (This article is also included in the
1003*0Sstevel@tonic-gateMemoize distribution as `article.html'.)
1004*0Sstevel@tonic-gate
1005*0Sstevel@tonic-gateMy upcoming book will discuss memoization (and many other fascinating
1006*0Sstevel@tonic-gatetopics) in tremendous detail.  It will be published by Morgan Kaufmann
1007*0Sstevel@tonic-gatein 2002, possibly under the title I<Perl Advanced Techniques
1008*0Sstevel@tonic-gateHandbook>.  It will also be available on-line for free.  For more
1009*0Sstevel@tonic-gateinformation, visit http://perl.plover.com/book/ .
1010*0Sstevel@tonic-gate
1011*0Sstevel@tonic-gateTo join a mailing list for announcements about C<Memoize>, send an
1012*0Sstevel@tonic-gateempty message to C<mjd-perl-memoize-request@plover.com>.  This mailing
1013*0Sstevel@tonic-gatelist is for announcements only and has extremely low traffic---about
1014*0Sstevel@tonic-gatetwo messages per year.
1015*0Sstevel@tonic-gate
1016*0Sstevel@tonic-gate=head1 COPYRIGHT AND LICENSE
1017*0Sstevel@tonic-gate
1018*0Sstevel@tonic-gateCopyright 1998, 1999, 2000, 2001  by Mark Jason Dominus
1019*0Sstevel@tonic-gate
1020*0Sstevel@tonic-gateThis library is free software; you may redistribute it and/or modify
1021*0Sstevel@tonic-gateit under the same terms as Perl itself.
1022*0Sstevel@tonic-gate
1023*0Sstevel@tonic-gate=head1 THANK YOU
1024*0Sstevel@tonic-gate
1025*0Sstevel@tonic-gateMany thanks to Jonathan Roy for bug reports and suggestions, to
1026*0Sstevel@tonic-gateMichael Schwern for other bug reports and patches, to Mike Cariaso for
1027*0Sstevel@tonic-gatehelping me to figure out the Right Thing to Do About Expiration, to
1028*0Sstevel@tonic-gateJoshua Gerth, Joshua Chamas, Jonathan Roy (again), Mark D. Anderson,
1029*0Sstevel@tonic-gateand Andrew Johnson for more suggestions about expiration, to Brent
1030*0Sstevel@tonic-gatePowers for the Memoize::ExpireLRU module, to Ariel Scolnicov for
1031*0Sstevel@tonic-gatedelightful messages about the Fibonacci function, to Dion Almaer for
1032*0Sstevel@tonic-gatethought-provoking suggestions about the default normalizer, to Walt
1033*0Sstevel@tonic-gateMankowski and Kurt Starsinic for much help investigating problems
1034*0Sstevel@tonic-gateunder threaded Perl, to Alex Dudkevich for reporting the bug in
1035*0Sstevel@tonic-gateprototyped functions and for checking my patch, to Tony Bass for many
1036*0Sstevel@tonic-gatehelpful suggestions, to Jonathan Roy (again) for finding a use for
1037*0Sstevel@tonic-gateC<unmemoize()>, to Philippe Verdret for enlightening discussion of
1038*0Sstevel@tonic-gateC<Hook::PrePostCall>, to Nat Torkington for advice I ignored, to Chris
1039*0Sstevel@tonic-gateNandor for portability advice, to Randal Schwartz for suggesting the
1040*0Sstevel@tonic-gate'C<flush_cache> function, and to Jenda Krynicky for being a light in
1041*0Sstevel@tonic-gatethe world.
1042*0Sstevel@tonic-gate
1043*0Sstevel@tonic-gateSpecial thanks to Jarkko Hietaniemi, the 5.8.0 pumpking, for including
1044*0Sstevel@tonic-gatethis module in the core and for his patient and helpful guidance
1045*0Sstevel@tonic-gateduring the integration process.
1046*0Sstevel@tonic-gate
1047*0Sstevel@tonic-gate=cut
1048