1*0Sstevel@tonic-gate 2*0Sstevel@tonic-gatepackage Tie::File; 3*0Sstevel@tonic-gaterequire 5.005; 4*0Sstevel@tonic-gateuse Carp ':DEFAULT', 'confess'; 5*0Sstevel@tonic-gateuse POSIX 'SEEK_SET'; 6*0Sstevel@tonic-gateuse Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX', 'LOCK_SH', 'O_WRONLY', 'O_RDONLY'; 7*0Sstevel@tonic-gatesub O_ACCMODE () { O_RDONLY | O_RDWR | O_WRONLY } 8*0Sstevel@tonic-gate 9*0Sstevel@tonic-gate 10*0Sstevel@tonic-gate$VERSION = "0.97"; 11*0Sstevel@tonic-gatemy $DEFAULT_MEMORY_SIZE = 1<<21; # 2 megabytes 12*0Sstevel@tonic-gatemy $DEFAULT_AUTODEFER_THRESHHOLD = 3; # 3 records 13*0Sstevel@tonic-gatemy $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD = 65536; # 16 disk blocksful 14*0Sstevel@tonic-gate 15*0Sstevel@tonic-gatemy %good_opt = map {$_ => 1, "-$_" => 1} 16*0Sstevel@tonic-gate qw(memory dw_size mode recsep discipline 17*0Sstevel@tonic-gate autodefer autochomp autodefer_threshhold concurrent); 18*0Sstevel@tonic-gate 19*0Sstevel@tonic-gatesub TIEARRAY { 20*0Sstevel@tonic-gate if (@_ % 2 != 0) { 21*0Sstevel@tonic-gate croak "usage: tie \@array, $_[0], filename, [option => value]..."; 22*0Sstevel@tonic-gate } 23*0Sstevel@tonic-gate my ($pack, $file, %opts) = @_; 24*0Sstevel@tonic-gate 25*0Sstevel@tonic-gate # transform '-foo' keys into 'foo' keys 26*0Sstevel@tonic-gate for my $key (keys %opts) { 27*0Sstevel@tonic-gate unless ($good_opt{$key}) { 28*0Sstevel@tonic-gate croak("$pack: Unrecognized option '$key'\n"); 29*0Sstevel@tonic-gate } 30*0Sstevel@tonic-gate my $okey = $key; 31*0Sstevel@tonic-gate if ($key =~ s/^-+//) { 32*0Sstevel@tonic-gate $opts{$key} = delete $opts{$okey}; 33*0Sstevel@tonic-gate } 34*0Sstevel@tonic-gate } 35*0Sstevel@tonic-gate 36*0Sstevel@tonic-gate if ($opts{concurrent}) { 37*0Sstevel@tonic-gate croak("$pack: concurrent access not supported yet\n"); 38*0Sstevel@tonic-gate } 39*0Sstevel@tonic-gate 40*0Sstevel@tonic-gate unless (defined $opts{memory}) { 41*0Sstevel@tonic-gate # default is the larger of the default cache size and the 42*0Sstevel@tonic-gate # deferred-write buffer size (if specified) 43*0Sstevel@tonic-gate $opts{memory} = $DEFAULT_MEMORY_SIZE; 44*0Sstevel@tonic-gate $opts{memory} = $opts{dw_size} 45*0Sstevel@tonic-gate if defined $opts{dw_size} && $opts{dw_size} > $DEFAULT_MEMORY_SIZE; 46*0Sstevel@tonic-gate # Dora Winifred Read 47*0Sstevel@tonic-gate } 48*0Sstevel@tonic-gate $opts{dw_size} = $opts{memory} unless defined $opts{dw_size}; 49*0Sstevel@tonic-gate if ($opts{dw_size} > $opts{memory}) { 50*0Sstevel@tonic-gate croak("$pack: dw_size may not be larger than total memory allocation\n"); 51*0Sstevel@tonic-gate } 52*0Sstevel@tonic-gate # are we in deferred-write mode? 53*0Sstevel@tonic-gate $opts{defer} = 0 unless defined $opts{defer}; 54*0Sstevel@tonic-gate $opts{deferred} = {}; # no records are presently deferred 55*0Sstevel@tonic-gate $opts{deferred_s} = 0; # count of total bytes in ->{deferred} 56*0Sstevel@tonic-gate $opts{deferred_max} = -1; # empty 57*0Sstevel@tonic-gate 58*0Sstevel@tonic-gate # What's a good way to arrange that this class can be overridden? 59*0Sstevel@tonic-gate $opts{cache} = Tie::File::Cache->new($opts{memory}); 60*0Sstevel@tonic-gate 61*0Sstevel@tonic-gate # autodeferment is enabled by default 62*0Sstevel@tonic-gate $opts{autodefer} = 1 unless defined $opts{autodefer}; 63*0Sstevel@tonic-gate $opts{autodeferring} = 0; # but is not initially active 64*0Sstevel@tonic-gate $opts{ad_history} = []; 65*0Sstevel@tonic-gate $opts{autodefer_threshhold} = $DEFAULT_AUTODEFER_THRESHHOLD 66*0Sstevel@tonic-gate unless defined $opts{autodefer_threshhold}; 67*0Sstevel@tonic-gate $opts{autodefer_filelen_threshhold} = $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD 68*0Sstevel@tonic-gate unless defined $opts{autodefer_filelen_threshhold}; 69*0Sstevel@tonic-gate 70*0Sstevel@tonic-gate $opts{offsets} = [0]; 71*0Sstevel@tonic-gate $opts{filename} = $file; 72*0Sstevel@tonic-gate unless (defined $opts{recsep}) { 73*0Sstevel@tonic-gate $opts{recsep} = _default_recsep(); 74*0Sstevel@tonic-gate } 75*0Sstevel@tonic-gate $opts{recseplen} = length($opts{recsep}); 76*0Sstevel@tonic-gate if ($opts{recseplen} == 0) { 77*0Sstevel@tonic-gate croak "Empty record separator not supported by $pack"; 78*0Sstevel@tonic-gate } 79*0Sstevel@tonic-gate 80*0Sstevel@tonic-gate $opts{autochomp} = 1 unless defined $opts{autochomp}; 81*0Sstevel@tonic-gate 82*0Sstevel@tonic-gate $opts{mode} = O_CREAT|O_RDWR unless defined $opts{mode}; 83*0Sstevel@tonic-gate $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY); 84*0Sstevel@tonic-gate $opts{sawlastrec} = undef; 85*0Sstevel@tonic-gate 86*0Sstevel@tonic-gate my $fh; 87*0Sstevel@tonic-gate 88*0Sstevel@tonic-gate if (UNIVERSAL::isa($file, 'GLOB')) { 89*0Sstevel@tonic-gate # We use 1 here on the theory that some systems 90*0Sstevel@tonic-gate # may not indicate failure if we use 0. 91*0Sstevel@tonic-gate # MSWin32 does not indicate failure with 0, but I don't know if 92*0Sstevel@tonic-gate # it will indicate failure with 1 or not. 93*0Sstevel@tonic-gate unless (seek $file, 1, SEEK_SET) { 94*0Sstevel@tonic-gate croak "$pack: your filehandle does not appear to be seekable"; 95*0Sstevel@tonic-gate } 96*0Sstevel@tonic-gate seek $file, 0, SEEK_SET # put it back 97*0Sstevel@tonic-gate $fh = $file; # setting binmode is the user's problem 98*0Sstevel@tonic-gate } elsif (ref $file) { 99*0Sstevel@tonic-gate croak "usage: tie \@array, $pack, filename, [option => value]..."; 100*0Sstevel@tonic-gate } else { 101*0Sstevel@tonic-gate # $fh = \do { local *FH }; # XXX this is buggy 102*0Sstevel@tonic-gate if ($] < 5.006) { 103*0Sstevel@tonic-gate # perl 5.005 and earlier don't autovivify filehandles 104*0Sstevel@tonic-gate require Symbol; 105*0Sstevel@tonic-gate $fh = Symbol::gensym(); 106*0Sstevel@tonic-gate } 107*0Sstevel@tonic-gate sysopen $fh, $file, $opts{mode}, 0666 or return; 108*0Sstevel@tonic-gate binmode $fh; 109*0Sstevel@tonic-gate ++$opts{ourfh}; 110*0Sstevel@tonic-gate } 111*0Sstevel@tonic-gate { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write 112*0Sstevel@tonic-gate if (defined $opts{discipline} && $] >= 5.006) { 113*0Sstevel@tonic-gate # This avoids a compile-time warning under 5.005 114*0Sstevel@tonic-gate eval 'binmode($fh, $opts{discipline})'; 115*0Sstevel@tonic-gate croak $@ if $@ =~ /unknown discipline/i; 116*0Sstevel@tonic-gate die if $@; 117*0Sstevel@tonic-gate } 118*0Sstevel@tonic-gate $opts{fh} = $fh; 119*0Sstevel@tonic-gate 120*0Sstevel@tonic-gate bless \%opts => $pack; 121*0Sstevel@tonic-gate} 122*0Sstevel@tonic-gate 123*0Sstevel@tonic-gatesub FETCH { 124*0Sstevel@tonic-gate my ($self, $n) = @_; 125*0Sstevel@tonic-gate my $rec; 126*0Sstevel@tonic-gate 127*0Sstevel@tonic-gate # check the defer buffer 128*0Sstevel@tonic-gate $rec = $self->{deferred}{$n} if exists $self->{deferred}{$n}; 129*0Sstevel@tonic-gate $rec = $self->_fetch($n) unless defined $rec; 130*0Sstevel@tonic-gate 131*0Sstevel@tonic-gate # inlined _chomp1 132*0Sstevel@tonic-gate substr($rec, - $self->{recseplen}) = "" 133*0Sstevel@tonic-gate if defined $rec && $self->{autochomp}; 134*0Sstevel@tonic-gate $rec; 135*0Sstevel@tonic-gate} 136*0Sstevel@tonic-gate 137*0Sstevel@tonic-gate# Chomp many records in-place; return nothing useful 138*0Sstevel@tonic-gatesub _chomp { 139*0Sstevel@tonic-gate my $self = shift; 140*0Sstevel@tonic-gate return unless $self->{autochomp}; 141*0Sstevel@tonic-gate if ($self->{autochomp}) { 142*0Sstevel@tonic-gate for (@_) { 143*0Sstevel@tonic-gate next unless defined; 144*0Sstevel@tonic-gate substr($_, - $self->{recseplen}) = ""; 145*0Sstevel@tonic-gate } 146*0Sstevel@tonic-gate } 147*0Sstevel@tonic-gate} 148*0Sstevel@tonic-gate 149*0Sstevel@tonic-gate# Chomp one record in-place; return modified record 150*0Sstevel@tonic-gatesub _chomp1 { 151*0Sstevel@tonic-gate my ($self, $rec) = @_; 152*0Sstevel@tonic-gate return $rec unless $self->{autochomp}; 153*0Sstevel@tonic-gate return unless defined $rec; 154*0Sstevel@tonic-gate substr($rec, - $self->{recseplen}) = ""; 155*0Sstevel@tonic-gate $rec; 156*0Sstevel@tonic-gate} 157*0Sstevel@tonic-gate 158*0Sstevel@tonic-gatesub _fetch { 159*0Sstevel@tonic-gate my ($self, $n) = @_; 160*0Sstevel@tonic-gate 161*0Sstevel@tonic-gate # check the record cache 162*0Sstevel@tonic-gate { my $cached = $self->{cache}->lookup($n); 163*0Sstevel@tonic-gate return $cached if defined $cached; 164*0Sstevel@tonic-gate } 165*0Sstevel@tonic-gate 166*0Sstevel@tonic-gate if ($#{$self->{offsets}} < $n) { 167*0Sstevel@tonic-gate return if $self->{eof}; # request for record beyond end of file 168*0Sstevel@tonic-gate my $o = $self->_fill_offsets_to($n); 169*0Sstevel@tonic-gate # If it's still undefined, there is no such record, so return 'undef' 170*0Sstevel@tonic-gate return unless defined $o; 171*0Sstevel@tonic-gate } 172*0Sstevel@tonic-gate 173*0Sstevel@tonic-gate my $fh = $self->{FH}; 174*0Sstevel@tonic-gate $self->_seek($n); # we can do this now that offsets is populated 175*0Sstevel@tonic-gate my $rec = $self->_read_record; 176*0Sstevel@tonic-gate 177*0Sstevel@tonic-gate# If we happen to have just read the first record, check to see if 178*0Sstevel@tonic-gate# the length of the record matches what 'tell' says. If not, Tie::File 179*0Sstevel@tonic-gate# won't work, and should drop dead. 180*0Sstevel@tonic-gate# 181*0Sstevel@tonic-gate# if ($n == 0 && defined($rec) && tell($self->{fh}) != length($rec)) { 182*0Sstevel@tonic-gate# if (defined $self->{discipline}) { 183*0Sstevel@tonic-gate# croak "I/O discipline $self->{discipline} not supported"; 184*0Sstevel@tonic-gate# } else { 185*0Sstevel@tonic-gate# croak "File encoding not supported"; 186*0Sstevel@tonic-gate# } 187*0Sstevel@tonic-gate# } 188*0Sstevel@tonic-gate 189*0Sstevel@tonic-gate $self->{cache}->insert($n, $rec) if defined $rec && not $self->{flushing}; 190*0Sstevel@tonic-gate $rec; 191*0Sstevel@tonic-gate} 192*0Sstevel@tonic-gate 193*0Sstevel@tonic-gatesub STORE { 194*0Sstevel@tonic-gate my ($self, $n, $rec) = @_; 195*0Sstevel@tonic-gate die "STORE called from _check_integrity!" if $DIAGNOSTIC; 196*0Sstevel@tonic-gate 197*0Sstevel@tonic-gate $self->_fixrecs($rec); 198*0Sstevel@tonic-gate 199*0Sstevel@tonic-gate if ($self->{autodefer}) { 200*0Sstevel@tonic-gate $self->_annotate_ad_history($n); 201*0Sstevel@tonic-gate } 202*0Sstevel@tonic-gate 203*0Sstevel@tonic-gate return $self->_store_deferred($n, $rec) if $self->_is_deferring; 204*0Sstevel@tonic-gate 205*0Sstevel@tonic-gate 206*0Sstevel@tonic-gate # We need this to decide whether the new record will fit 207*0Sstevel@tonic-gate # It incidentally populates the offsets table 208*0Sstevel@tonic-gate # Note we have to do this before we alter the cache 209*0Sstevel@tonic-gate # 20020324 Wait, but this DOES alter the cache. TODO BUG? 210*0Sstevel@tonic-gate my $oldrec = $self->_fetch($n); 211*0Sstevel@tonic-gate 212*0Sstevel@tonic-gate if (not defined $oldrec) { 213*0Sstevel@tonic-gate # We're storing a record beyond the end of the file 214*0Sstevel@tonic-gate $self->_extend_file_to($n+1); 215*0Sstevel@tonic-gate $oldrec = $self->{recsep}; 216*0Sstevel@tonic-gate } 217*0Sstevel@tonic-gate# return if $oldrec eq $rec; # don't bother 218*0Sstevel@tonic-gate my $len_diff = length($rec) - length($oldrec); 219*0Sstevel@tonic-gate 220*0Sstevel@tonic-gate # length($oldrec) here is not consistent with text mode TODO XXX BUG 221*0Sstevel@tonic-gate $self->_mtwrite($rec, $self->{offsets}[$n], length($oldrec)); 222*0Sstevel@tonic-gate $self->_oadjust([$n, 1, $rec]); 223*0Sstevel@tonic-gate $self->{cache}->update($n, $rec); 224*0Sstevel@tonic-gate} 225*0Sstevel@tonic-gate 226*0Sstevel@tonic-gatesub _store_deferred { 227*0Sstevel@tonic-gate my ($self, $n, $rec) = @_; 228*0Sstevel@tonic-gate $self->{cache}->remove($n); 229*0Sstevel@tonic-gate my $old_deferred = $self->{deferred}{$n}; 230*0Sstevel@tonic-gate 231*0Sstevel@tonic-gate if (defined $self->{deferred_max} && $n > $self->{deferred_max}) { 232*0Sstevel@tonic-gate $self->{deferred_max} = $n; 233*0Sstevel@tonic-gate } 234*0Sstevel@tonic-gate $self->{deferred}{$n} = $rec; 235*0Sstevel@tonic-gate 236*0Sstevel@tonic-gate my $len_diff = length($rec); 237*0Sstevel@tonic-gate $len_diff -= length($old_deferred) if defined $old_deferred; 238*0Sstevel@tonic-gate $self->{deferred_s} += $len_diff; 239*0Sstevel@tonic-gate $self->{cache}->adj_limit(-$len_diff); 240*0Sstevel@tonic-gate if ($self->{deferred_s} > $self->{dw_size}) { 241*0Sstevel@tonic-gate $self->_flush; 242*0Sstevel@tonic-gate } elsif ($self->_cache_too_full) { 243*0Sstevel@tonic-gate $self->_cache_flush; 244*0Sstevel@tonic-gate } 245*0Sstevel@tonic-gate} 246*0Sstevel@tonic-gate 247*0Sstevel@tonic-gate# Remove a single record from the deferred-write buffer without writing it 248*0Sstevel@tonic-gate# The record need not be present 249*0Sstevel@tonic-gatesub _delete_deferred { 250*0Sstevel@tonic-gate my ($self, $n) = @_; 251*0Sstevel@tonic-gate my $rec = delete $self->{deferred}{$n}; 252*0Sstevel@tonic-gate return unless defined $rec; 253*0Sstevel@tonic-gate 254*0Sstevel@tonic-gate if (defined $self->{deferred_max} 255*0Sstevel@tonic-gate && $n == $self->{deferred_max}) { 256*0Sstevel@tonic-gate undef $self->{deferred_max}; 257*0Sstevel@tonic-gate } 258*0Sstevel@tonic-gate 259*0Sstevel@tonic-gate $self->{deferred_s} -= length $rec; 260*0Sstevel@tonic-gate $self->{cache}->adj_limit(length $rec); 261*0Sstevel@tonic-gate} 262*0Sstevel@tonic-gate 263*0Sstevel@tonic-gatesub FETCHSIZE { 264*0Sstevel@tonic-gate my $self = shift; 265*0Sstevel@tonic-gate my $n = $self->{eof} ? $#{$self->{offsets}} : $self->_fill_offsets; 266*0Sstevel@tonic-gate 267*0Sstevel@tonic-gate my $top_deferred = $self->_defer_max; 268*0Sstevel@tonic-gate $n = $top_deferred+1 if defined $top_deferred && $n < $top_deferred+1; 269*0Sstevel@tonic-gate $n; 270*0Sstevel@tonic-gate} 271*0Sstevel@tonic-gate 272*0Sstevel@tonic-gatesub STORESIZE { 273*0Sstevel@tonic-gate my ($self, $len) = @_; 274*0Sstevel@tonic-gate 275*0Sstevel@tonic-gate if ($self->{autodefer}) { 276*0Sstevel@tonic-gate $self->_annotate_ad_history('STORESIZE'); 277*0Sstevel@tonic-gate } 278*0Sstevel@tonic-gate 279*0Sstevel@tonic-gate my $olen = $self->FETCHSIZE; 280*0Sstevel@tonic-gate return if $len == $olen; # Woo-hoo! 281*0Sstevel@tonic-gate 282*0Sstevel@tonic-gate # file gets longer 283*0Sstevel@tonic-gate if ($len > $olen) { 284*0Sstevel@tonic-gate if ($self->_is_deferring) { 285*0Sstevel@tonic-gate for ($olen .. $len-1) { 286*0Sstevel@tonic-gate $self->_store_deferred($_, $self->{recsep}); 287*0Sstevel@tonic-gate } 288*0Sstevel@tonic-gate } else { 289*0Sstevel@tonic-gate $self->_extend_file_to($len); 290*0Sstevel@tonic-gate } 291*0Sstevel@tonic-gate return; 292*0Sstevel@tonic-gate } 293*0Sstevel@tonic-gate 294*0Sstevel@tonic-gate # file gets shorter 295*0Sstevel@tonic-gate if ($self->_is_deferring) { 296*0Sstevel@tonic-gate # TODO maybe replace this with map-plus-assignment? 297*0Sstevel@tonic-gate for (grep $_ >= $len, keys %{$self->{deferred}}) { 298*0Sstevel@tonic-gate $self->_delete_deferred($_); 299*0Sstevel@tonic-gate } 300*0Sstevel@tonic-gate $self->{deferred_max} = $len-1; 301*0Sstevel@tonic-gate } 302*0Sstevel@tonic-gate 303*0Sstevel@tonic-gate $self->_seek($len); 304*0Sstevel@tonic-gate $self->_chop_file; 305*0Sstevel@tonic-gate $#{$self->{offsets}} = $len; 306*0Sstevel@tonic-gate# $self->{offsets}[0] = 0; # in case we just chopped this 307*0Sstevel@tonic-gate 308*0Sstevel@tonic-gate $self->{cache}->remove(grep $_ >= $len, $self->{cache}->ckeys); 309*0Sstevel@tonic-gate} 310*0Sstevel@tonic-gate 311*0Sstevel@tonic-gate### OPTIMIZE ME 312*0Sstevel@tonic-gate### It should not be necessary to do FETCHSIZE 313*0Sstevel@tonic-gate### Just seek to the end of the file. 314*0Sstevel@tonic-gatesub PUSH { 315*0Sstevel@tonic-gate my $self = shift; 316*0Sstevel@tonic-gate $self->SPLICE($self->FETCHSIZE, scalar(@_), @_); 317*0Sstevel@tonic-gate 318*0Sstevel@tonic-gate # No need to return: 319*0Sstevel@tonic-gate # $self->FETCHSIZE; # because av.c takes care of this for me 320*0Sstevel@tonic-gate} 321*0Sstevel@tonic-gate 322*0Sstevel@tonic-gatesub POP { 323*0Sstevel@tonic-gate my $self = shift; 324*0Sstevel@tonic-gate my $size = $self->FETCHSIZE; 325*0Sstevel@tonic-gate return if $size == 0; 326*0Sstevel@tonic-gate# print STDERR "# POPPITY POP POP POP\n"; 327*0Sstevel@tonic-gate scalar $self->SPLICE($size-1, 1); 328*0Sstevel@tonic-gate} 329*0Sstevel@tonic-gate 330*0Sstevel@tonic-gatesub SHIFT { 331*0Sstevel@tonic-gate my $self = shift; 332*0Sstevel@tonic-gate scalar $self->SPLICE(0, 1); 333*0Sstevel@tonic-gate} 334*0Sstevel@tonic-gate 335*0Sstevel@tonic-gatesub UNSHIFT { 336*0Sstevel@tonic-gate my $self = shift; 337*0Sstevel@tonic-gate $self->SPLICE(0, 0, @_); 338*0Sstevel@tonic-gate # $self->FETCHSIZE; # av.c takes care of this for me 339*0Sstevel@tonic-gate} 340*0Sstevel@tonic-gate 341*0Sstevel@tonic-gatesub CLEAR { 342*0Sstevel@tonic-gate my $self = shift; 343*0Sstevel@tonic-gate 344*0Sstevel@tonic-gate if ($self->{autodefer}) { 345*0Sstevel@tonic-gate $self->_annotate_ad_history('CLEAR'); 346*0Sstevel@tonic-gate } 347*0Sstevel@tonic-gate 348*0Sstevel@tonic-gate $self->_seekb(0); 349*0Sstevel@tonic-gate $self->_chop_file; 350*0Sstevel@tonic-gate $self->{cache}->set_limit($self->{memory}); 351*0Sstevel@tonic-gate $self->{cache}->empty; 352*0Sstevel@tonic-gate @{$self->{offsets}} = (0); 353*0Sstevel@tonic-gate %{$self->{deferred}}= (); 354*0Sstevel@tonic-gate $self->{deferred_s} = 0; 355*0Sstevel@tonic-gate $self->{deferred_max} = -1; 356*0Sstevel@tonic-gate} 357*0Sstevel@tonic-gate 358*0Sstevel@tonic-gatesub EXTEND { 359*0Sstevel@tonic-gate my ($self, $n) = @_; 360*0Sstevel@tonic-gate 361*0Sstevel@tonic-gate # No need to pre-extend anything in this case 362*0Sstevel@tonic-gate return if $self->_is_deferring; 363*0Sstevel@tonic-gate 364*0Sstevel@tonic-gate $self->_fill_offsets_to($n); 365*0Sstevel@tonic-gate $self->_extend_file_to($n); 366*0Sstevel@tonic-gate} 367*0Sstevel@tonic-gate 368*0Sstevel@tonic-gatesub DELETE { 369*0Sstevel@tonic-gate my ($self, $n) = @_; 370*0Sstevel@tonic-gate 371*0Sstevel@tonic-gate if ($self->{autodefer}) { 372*0Sstevel@tonic-gate $self->_annotate_ad_history('DELETE'); 373*0Sstevel@tonic-gate } 374*0Sstevel@tonic-gate 375*0Sstevel@tonic-gate my $lastrec = $self->FETCHSIZE-1; 376*0Sstevel@tonic-gate my $rec = $self->FETCH($n); 377*0Sstevel@tonic-gate $self->_delete_deferred($n) if $self->_is_deferring; 378*0Sstevel@tonic-gate if ($n == $lastrec) { 379*0Sstevel@tonic-gate $self->_seek($n); 380*0Sstevel@tonic-gate $self->_chop_file; 381*0Sstevel@tonic-gate $#{$self->{offsets}}--; 382*0Sstevel@tonic-gate $self->{cache}->remove($n); 383*0Sstevel@tonic-gate # perhaps in this case I should also remove trailing null records? 384*0Sstevel@tonic-gate # 20020316 385*0Sstevel@tonic-gate # Note that delete @a[-3..-1] deletes the records in the wrong order, 386*0Sstevel@tonic-gate # so we only chop the very last one out of the file. We could repair this 387*0Sstevel@tonic-gate # by tracking deleted records inside the object. 388*0Sstevel@tonic-gate } elsif ($n < $lastrec) { 389*0Sstevel@tonic-gate $self->STORE($n, ""); 390*0Sstevel@tonic-gate } 391*0Sstevel@tonic-gate $rec; 392*0Sstevel@tonic-gate} 393*0Sstevel@tonic-gate 394*0Sstevel@tonic-gatesub EXISTS { 395*0Sstevel@tonic-gate my ($self, $n) = @_; 396*0Sstevel@tonic-gate return 1 if exists $self->{deferred}{$n}; 397*0Sstevel@tonic-gate $n < $self->FETCHSIZE; 398*0Sstevel@tonic-gate} 399*0Sstevel@tonic-gate 400*0Sstevel@tonic-gatesub SPLICE { 401*0Sstevel@tonic-gate my $self = shift; 402*0Sstevel@tonic-gate 403*0Sstevel@tonic-gate if ($self->{autodefer}) { 404*0Sstevel@tonic-gate $self->_annotate_ad_history('SPLICE'); 405*0Sstevel@tonic-gate } 406*0Sstevel@tonic-gate 407*0Sstevel@tonic-gate $self->_flush if $self->_is_deferring; # move this up? 408*0Sstevel@tonic-gate if (wantarray) { 409*0Sstevel@tonic-gate $self->_chomp(my @a = $self->_splice(@_)); 410*0Sstevel@tonic-gate @a; 411*0Sstevel@tonic-gate } else { 412*0Sstevel@tonic-gate $self->_chomp1(scalar $self->_splice(@_)); 413*0Sstevel@tonic-gate } 414*0Sstevel@tonic-gate} 415*0Sstevel@tonic-gate 416*0Sstevel@tonic-gatesub DESTROY { 417*0Sstevel@tonic-gate my $self = shift; 418*0Sstevel@tonic-gate $self->flush if $self->_is_deferring; 419*0Sstevel@tonic-gate $self->{cache}->delink if defined $self->{cache}; # break circular link 420*0Sstevel@tonic-gate if ($self->{fh} and $self->{ourfh}) { 421*0Sstevel@tonic-gate delete $self->{ourfh}; 422*0Sstevel@tonic-gate close delete $self->{fh}; 423*0Sstevel@tonic-gate } 424*0Sstevel@tonic-gate} 425*0Sstevel@tonic-gate 426*0Sstevel@tonic-gatesub _splice { 427*0Sstevel@tonic-gate my ($self, $pos, $nrecs, @data) = @_; 428*0Sstevel@tonic-gate my @result; 429*0Sstevel@tonic-gate 430*0Sstevel@tonic-gate $pos = 0 unless defined $pos; 431*0Sstevel@tonic-gate 432*0Sstevel@tonic-gate # Deal with negative and other out-of-range positions 433*0Sstevel@tonic-gate # Also set default for $nrecs 434*0Sstevel@tonic-gate { 435*0Sstevel@tonic-gate my $oldsize = $self->FETCHSIZE; 436*0Sstevel@tonic-gate $nrecs = $oldsize unless defined $nrecs; 437*0Sstevel@tonic-gate my $oldpos = $pos; 438*0Sstevel@tonic-gate 439*0Sstevel@tonic-gate if ($pos < 0) { 440*0Sstevel@tonic-gate $pos += $oldsize; 441*0Sstevel@tonic-gate if ($pos < 0) { 442*0Sstevel@tonic-gate croak "Modification of non-creatable array value attempted, subscript $oldpos"; 443*0Sstevel@tonic-gate } 444*0Sstevel@tonic-gate } 445*0Sstevel@tonic-gate 446*0Sstevel@tonic-gate if ($pos > $oldsize) { 447*0Sstevel@tonic-gate return unless @data; 448*0Sstevel@tonic-gate $pos = $oldsize; # This is what perl does for normal arrays 449*0Sstevel@tonic-gate } 450*0Sstevel@tonic-gate 451*0Sstevel@tonic-gate # The manual is very unclear here 452*0Sstevel@tonic-gate if ($nrecs < 0) { 453*0Sstevel@tonic-gate $nrecs = $oldsize - $pos + $nrecs; 454*0Sstevel@tonic-gate $nrecs = 0 if $nrecs < 0; 455*0Sstevel@tonic-gate } 456*0Sstevel@tonic-gate 457*0Sstevel@tonic-gate # nrecs is too big---it really means "until the end" 458*0Sstevel@tonic-gate # 20030507 459*0Sstevel@tonic-gate if ($nrecs + $pos > $oldsize) { 460*0Sstevel@tonic-gate $nrecs = $oldsize - $pos; 461*0Sstevel@tonic-gate } 462*0Sstevel@tonic-gate } 463*0Sstevel@tonic-gate 464*0Sstevel@tonic-gate $self->_fixrecs(@data); 465*0Sstevel@tonic-gate my $data = join '', @data; 466*0Sstevel@tonic-gate my $datalen = length $data; 467*0Sstevel@tonic-gate my $oldlen = 0; 468*0Sstevel@tonic-gate 469*0Sstevel@tonic-gate # compute length of data being removed 470*0Sstevel@tonic-gate for ($pos .. $pos+$nrecs-1) { 471*0Sstevel@tonic-gate last unless defined $self->_fill_offsets_to($_); 472*0Sstevel@tonic-gate my $rec = $self->_fetch($_); 473*0Sstevel@tonic-gate last unless defined $rec; 474*0Sstevel@tonic-gate push @result, $rec; 475*0Sstevel@tonic-gate 476*0Sstevel@tonic-gate # Why don't we just use length($rec) here? 477*0Sstevel@tonic-gate # Because that record might have come from the cache. _splice 478*0Sstevel@tonic-gate # might have been called to flush out the deferred-write records, 479*0Sstevel@tonic-gate # and in this case length($rec) is the length of the record to be 480*0Sstevel@tonic-gate # *written*, not the length of the actual record in the file. But 481*0Sstevel@tonic-gate # the offsets are still true. 20020322 482*0Sstevel@tonic-gate $oldlen += $self->{offsets}[$_+1] - $self->{offsets}[$_] 483*0Sstevel@tonic-gate if defined $self->{offsets}[$_+1]; 484*0Sstevel@tonic-gate } 485*0Sstevel@tonic-gate $self->_fill_offsets_to($pos+$nrecs); 486*0Sstevel@tonic-gate 487*0Sstevel@tonic-gate # Modify the file 488*0Sstevel@tonic-gate $self->_mtwrite($data, $self->{offsets}[$pos], $oldlen); 489*0Sstevel@tonic-gate # Adjust the offsets table 490*0Sstevel@tonic-gate $self->_oadjust([$pos, $nrecs, @data]); 491*0Sstevel@tonic-gate 492*0Sstevel@tonic-gate { # Take this read cache stuff out into a separate function 493*0Sstevel@tonic-gate # You made a half-attempt to put it into _oadjust. 494*0Sstevel@tonic-gate # Finish something like that up eventually. 495*0Sstevel@tonic-gate # STORE also needs to do something similarish 496*0Sstevel@tonic-gate 497*0Sstevel@tonic-gate # update the read cache, part 1 498*0Sstevel@tonic-gate # modified records 499*0Sstevel@tonic-gate for ($pos .. $pos+$nrecs-1) { 500*0Sstevel@tonic-gate my $new = $data[$_-$pos]; 501*0Sstevel@tonic-gate if (defined $new) { 502*0Sstevel@tonic-gate $self->{cache}->update($_, $new); 503*0Sstevel@tonic-gate } else { 504*0Sstevel@tonic-gate $self->{cache}->remove($_); 505*0Sstevel@tonic-gate } 506*0Sstevel@tonic-gate } 507*0Sstevel@tonic-gate 508*0Sstevel@tonic-gate # update the read cache, part 2 509*0Sstevel@tonic-gate # moved records - records past the site of the change 510*0Sstevel@tonic-gate # need to be renumbered 511*0Sstevel@tonic-gate # Maybe merge this with the previous block? 512*0Sstevel@tonic-gate { 513*0Sstevel@tonic-gate my @oldkeys = grep $_ >= $pos + $nrecs, $self->{cache}->ckeys; 514*0Sstevel@tonic-gate my @newkeys = map $_-$nrecs+@data, @oldkeys; 515*0Sstevel@tonic-gate $self->{cache}->rekey(\@oldkeys, \@newkeys); 516*0Sstevel@tonic-gate } 517*0Sstevel@tonic-gate 518*0Sstevel@tonic-gate # Now there might be too much data in the cache, if we spliced out 519*0Sstevel@tonic-gate # some short records and spliced in some long ones. If so, flush 520*0Sstevel@tonic-gate # the cache. 521*0Sstevel@tonic-gate $self->_cache_flush; 522*0Sstevel@tonic-gate } 523*0Sstevel@tonic-gate 524*0Sstevel@tonic-gate # Yes, the return value of 'splice' *is* actually this complicated 525*0Sstevel@tonic-gate wantarray ? @result : @result ? $result[-1] : undef; 526*0Sstevel@tonic-gate} 527*0Sstevel@tonic-gate 528*0Sstevel@tonic-gate 529*0Sstevel@tonic-gate# write data into the file 530*0Sstevel@tonic-gate# $data is the data to be written. 531*0Sstevel@tonic-gate# it should be written at position $pos, and should overwrite 532*0Sstevel@tonic-gate# exactly $len of the following bytes. 533*0Sstevel@tonic-gate# Note that if length($data) > $len, the subsequent bytes will have to 534*0Sstevel@tonic-gate# be moved up, and if length($data) < $len, they will have to 535*0Sstevel@tonic-gate# be moved down 536*0Sstevel@tonic-gatesub _twrite { 537*0Sstevel@tonic-gate my ($self, $data, $pos, $len) = @_; 538*0Sstevel@tonic-gate 539*0Sstevel@tonic-gate unless (defined $pos) { 540*0Sstevel@tonic-gate die "\$pos was undefined in _twrite"; 541*0Sstevel@tonic-gate } 542*0Sstevel@tonic-gate 543*0Sstevel@tonic-gate my $len_diff = length($data) - $len; 544*0Sstevel@tonic-gate 545*0Sstevel@tonic-gate if ($len_diff == 0) { # Woo-hoo! 546*0Sstevel@tonic-gate my $fh = $self->{fh}; 547*0Sstevel@tonic-gate $self->_seekb($pos); 548*0Sstevel@tonic-gate $self->_write_record($data); 549*0Sstevel@tonic-gate return; # well, that was easy. 550*0Sstevel@tonic-gate } 551*0Sstevel@tonic-gate 552*0Sstevel@tonic-gate # the two records are of different lengths 553*0Sstevel@tonic-gate # our strategy here: rewrite the tail of the file, 554*0Sstevel@tonic-gate # reading ahead one buffer at a time 555*0Sstevel@tonic-gate # $bufsize is required to be at least as large as the data we're overwriting 556*0Sstevel@tonic-gate my $bufsize = _bufsize($len_diff); 557*0Sstevel@tonic-gate my ($writepos, $readpos) = ($pos, $pos+$len); 558*0Sstevel@tonic-gate my $next_block; 559*0Sstevel@tonic-gate my $more_data; 560*0Sstevel@tonic-gate 561*0Sstevel@tonic-gate # Seems like there ought to be a way to avoid the repeated code 562*0Sstevel@tonic-gate # and the special case here. The read(1) is also a little weird. 563*0Sstevel@tonic-gate # Think about this. 564*0Sstevel@tonic-gate do { 565*0Sstevel@tonic-gate $self->_seekb($readpos); 566*0Sstevel@tonic-gate my $br = read $self->{fh}, $next_block, $bufsize; 567*0Sstevel@tonic-gate $more_data = read $self->{fh}, my($dummy), 1; 568*0Sstevel@tonic-gate $self->_seekb($writepos); 569*0Sstevel@tonic-gate $self->_write_record($data); 570*0Sstevel@tonic-gate $readpos += $br; 571*0Sstevel@tonic-gate $writepos += length $data; 572*0Sstevel@tonic-gate $data = $next_block; 573*0Sstevel@tonic-gate } while $more_data; 574*0Sstevel@tonic-gate $self->_seekb($writepos); 575*0Sstevel@tonic-gate $self->_write_record($next_block); 576*0Sstevel@tonic-gate 577*0Sstevel@tonic-gate # There might be leftover data at the end of the file 578*0Sstevel@tonic-gate $self->_chop_file if $len_diff < 0; 579*0Sstevel@tonic-gate} 580*0Sstevel@tonic-gate 581*0Sstevel@tonic-gate# _iwrite(D, S, E) 582*0Sstevel@tonic-gate# Insert text D at position S. 583*0Sstevel@tonic-gate# Let C = E-S-|D|. If C < 0; die. 584*0Sstevel@tonic-gate# Data in [S,S+C) is copied to [S+D,S+D+C) = [S+D,E). 585*0Sstevel@tonic-gate# Data in [S+C = E-D, E) is returned. Data in [E, oo) is untouched. 586*0Sstevel@tonic-gate# 587*0Sstevel@tonic-gate# In a later version, don't read the entire intervening area into 588*0Sstevel@tonic-gate# memory at once; do the copying block by block. 589*0Sstevel@tonic-gatesub _iwrite { 590*0Sstevel@tonic-gate my $self = shift; 591*0Sstevel@tonic-gate my ($D, $s, $e) = @_; 592*0Sstevel@tonic-gate my $d = length $D; 593*0Sstevel@tonic-gate my $c = $e-$s-$d; 594*0Sstevel@tonic-gate local *FH = $self->{fh}; 595*0Sstevel@tonic-gate confess "Not enough space to insert $d bytes between $s and $e" 596*0Sstevel@tonic-gate if $c < 0; 597*0Sstevel@tonic-gate confess "[$s,$e) is an invalid insertion range" if $e < $s; 598*0Sstevel@tonic-gate 599*0Sstevel@tonic-gate $self->_seekb($s); 600*0Sstevel@tonic-gate read FH, my $buf, $e-$s; 601*0Sstevel@tonic-gate 602*0Sstevel@tonic-gate $D .= substr($buf, 0, $c, ""); 603*0Sstevel@tonic-gate 604*0Sstevel@tonic-gate $self->_seekb($s); 605*0Sstevel@tonic-gate $self->_write_record($D); 606*0Sstevel@tonic-gate 607*0Sstevel@tonic-gate return $buf; 608*0Sstevel@tonic-gate} 609*0Sstevel@tonic-gate 610*0Sstevel@tonic-gate# Like _twrite, but the data-pos-len triple may be repeated; you may 611*0Sstevel@tonic-gate# write several chunks. All the writing will be done in 612*0Sstevel@tonic-gate# one pass. Chunks SHALL be in ascending order and SHALL NOT overlap. 613*0Sstevel@tonic-gatesub _mtwrite { 614*0Sstevel@tonic-gate my $self = shift; 615*0Sstevel@tonic-gate my $unwritten = ""; 616*0Sstevel@tonic-gate my $delta = 0; 617*0Sstevel@tonic-gate 618*0Sstevel@tonic-gate @_ % 3 == 0 619*0Sstevel@tonic-gate or die "Arguments to _mtwrite did not come in groups of three"; 620*0Sstevel@tonic-gate 621*0Sstevel@tonic-gate while (@_) { 622*0Sstevel@tonic-gate my ($data, $pos, $len) = splice @_, 0, 3; 623*0Sstevel@tonic-gate my $end = $pos + $len; # The OLD end of the segment to be replaced 624*0Sstevel@tonic-gate $data = $unwritten . $data; 625*0Sstevel@tonic-gate $delta -= length($unwritten); 626*0Sstevel@tonic-gate $unwritten = ""; 627*0Sstevel@tonic-gate $pos += $delta; # This is where the data goes now 628*0Sstevel@tonic-gate my $dlen = length $data; 629*0Sstevel@tonic-gate $self->_seekb($pos); 630*0Sstevel@tonic-gate if ($len >= $dlen) { # the data will fit 631*0Sstevel@tonic-gate $self->_write_record($data); 632*0Sstevel@tonic-gate $delta += ($dlen - $len); # everything following moves down by this much 633*0Sstevel@tonic-gate $data = ""; # All the data in the buffer has been written 634*0Sstevel@tonic-gate } else { # won't fit 635*0Sstevel@tonic-gate my $writable = substr($data, 0, $len - $delta, ""); 636*0Sstevel@tonic-gate $self->_write_record($writable); 637*0Sstevel@tonic-gate $delta += ($dlen - $len); # everything following moves down by this much 638*0Sstevel@tonic-gate } 639*0Sstevel@tonic-gate 640*0Sstevel@tonic-gate # At this point we've written some but maybe not all of the data. 641*0Sstevel@tonic-gate # There might be a gap to close up, or $data might still contain a 642*0Sstevel@tonic-gate # bunch of unwritten data that didn't fit. 643*0Sstevel@tonic-gate my $ndlen = length $data; 644*0Sstevel@tonic-gate if ($delta == 0) { 645*0Sstevel@tonic-gate $self->_write_record($data); 646*0Sstevel@tonic-gate } elsif ($delta < 0) { 647*0Sstevel@tonic-gate # upcopy (close up gap) 648*0Sstevel@tonic-gate if (@_) { 649*0Sstevel@tonic-gate $self->_upcopy($end, $end + $delta, $_[1] - $end); 650*0Sstevel@tonic-gate } else { 651*0Sstevel@tonic-gate $self->_upcopy($end, $end + $delta); 652*0Sstevel@tonic-gate } 653*0Sstevel@tonic-gate } else { 654*0Sstevel@tonic-gate # downcopy (insert data that didn't fit; replace this data in memory 655*0Sstevel@tonic-gate # with _later_ data that doesn't fit) 656*0Sstevel@tonic-gate if (@_) { 657*0Sstevel@tonic-gate $unwritten = $self->_downcopy($data, $end, $_[1] - $end); 658*0Sstevel@tonic-gate } else { 659*0Sstevel@tonic-gate # Make the file longer to accomodate the last segment that doesn' 660*0Sstevel@tonic-gate $unwritten = $self->_downcopy($data, $end); 661*0Sstevel@tonic-gate } 662*0Sstevel@tonic-gate } 663*0Sstevel@tonic-gate } 664*0Sstevel@tonic-gate} 665*0Sstevel@tonic-gate 666*0Sstevel@tonic-gate# Copy block of data of length $len from position $spos to position $dpos 667*0Sstevel@tonic-gate# $dpos must be <= $spos 668*0Sstevel@tonic-gate# 669*0Sstevel@tonic-gate# If $len is undefined, go all the way to the end of the file 670*0Sstevel@tonic-gate# and then truncate it ($spos - $dpos bytes will be removed) 671*0Sstevel@tonic-gatesub _upcopy { 672*0Sstevel@tonic-gate my $blocksize = 8192; 673*0Sstevel@tonic-gate my ($self, $spos, $dpos, $len) = @_; 674*0Sstevel@tonic-gate if ($dpos > $spos) { 675*0Sstevel@tonic-gate die "source ($spos) was upstream of destination ($dpos) in _upcopy"; 676*0Sstevel@tonic-gate } elsif ($dpos == $spos) { 677*0Sstevel@tonic-gate return; 678*0Sstevel@tonic-gate } 679*0Sstevel@tonic-gate 680*0Sstevel@tonic-gate while (! defined ($len) || $len > 0) { 681*0Sstevel@tonic-gate my $readsize = ! defined($len) ? $blocksize 682*0Sstevel@tonic-gate : $len > $blocksize ? $blocksize 683*0Sstevel@tonic-gate : $len; 684*0Sstevel@tonic-gate 685*0Sstevel@tonic-gate my $fh = $self->{fh}; 686*0Sstevel@tonic-gate $self->_seekb($spos); 687*0Sstevel@tonic-gate my $bytes_read = read $fh, my($data), $readsize; 688*0Sstevel@tonic-gate $self->_seekb($dpos); 689*0Sstevel@tonic-gate if ($data eq "") { 690*0Sstevel@tonic-gate $self->_chop_file; 691*0Sstevel@tonic-gate last; 692*0Sstevel@tonic-gate } 693*0Sstevel@tonic-gate $self->_write_record($data); 694*0Sstevel@tonic-gate $spos += $bytes_read; 695*0Sstevel@tonic-gate $dpos += $bytes_read; 696*0Sstevel@tonic-gate $len -= $bytes_read if defined $len; 697*0Sstevel@tonic-gate } 698*0Sstevel@tonic-gate} 699*0Sstevel@tonic-gate 700*0Sstevel@tonic-gate# Write $data into a block of length $len at position $pos, 701*0Sstevel@tonic-gate# moving everything in the block forwards to make room. 702*0Sstevel@tonic-gate# Instead of writing the last length($data) bytes from the block 703*0Sstevel@tonic-gate# (because there isn't room for them any longer) return them. 704*0Sstevel@tonic-gate# 705*0Sstevel@tonic-gate# Undefined $len means 'until the end of the file' 706*0Sstevel@tonic-gatesub _downcopy { 707*0Sstevel@tonic-gate my $blocksize = 8192; 708*0Sstevel@tonic-gate my ($self, $data, $pos, $len) = @_; 709*0Sstevel@tonic-gate my $fh = $self->{fh}; 710*0Sstevel@tonic-gate 711*0Sstevel@tonic-gate while (! defined $len || $len > 0) { 712*0Sstevel@tonic-gate my $readsize = ! defined($len) ? $blocksize 713*0Sstevel@tonic-gate : $len > $blocksize? $blocksize : $len; 714*0Sstevel@tonic-gate $self->_seekb($pos); 715*0Sstevel@tonic-gate read $fh, my($old), $readsize; 716*0Sstevel@tonic-gate my $last_read_was_short = length($old) < $readsize; 717*0Sstevel@tonic-gate $data .= $old; 718*0Sstevel@tonic-gate my $writable; 719*0Sstevel@tonic-gate if ($last_read_was_short) { 720*0Sstevel@tonic-gate # If last read was short, then $data now contains the entire rest 721*0Sstevel@tonic-gate # of the file, so there's no need to write only one block of it 722*0Sstevel@tonic-gate $writable = $data; 723*0Sstevel@tonic-gate $data = ""; 724*0Sstevel@tonic-gate } else { 725*0Sstevel@tonic-gate $writable = substr($data, 0, $readsize, ""); 726*0Sstevel@tonic-gate } 727*0Sstevel@tonic-gate last if $writable eq ""; 728*0Sstevel@tonic-gate $self->_seekb($pos); 729*0Sstevel@tonic-gate $self->_write_record($writable); 730*0Sstevel@tonic-gate last if $last_read_was_short && $data eq ""; 731*0Sstevel@tonic-gate $len -= $readsize if defined $len; 732*0Sstevel@tonic-gate $pos += $readsize; 733*0Sstevel@tonic-gate } 734*0Sstevel@tonic-gate return $data; 735*0Sstevel@tonic-gate} 736*0Sstevel@tonic-gate 737*0Sstevel@tonic-gate# Adjust the object data structures following an '_mtwrite' 738*0Sstevel@tonic-gate# Arguments are 739*0Sstevel@tonic-gate# [$pos, $nrecs, @length] items 740*0Sstevel@tonic-gate# indicating that $nrecs records were removed at $recpos (a record offset) 741*0Sstevel@tonic-gate# and replaced with records of length @length... 742*0Sstevel@tonic-gate# Arguments guarantee that $recpos is strictly increasing. 743*0Sstevel@tonic-gate# No return value 744*0Sstevel@tonic-gatesub _oadjust { 745*0Sstevel@tonic-gate my $self = shift; 746*0Sstevel@tonic-gate my $delta = 0; 747*0Sstevel@tonic-gate my $delta_recs = 0; 748*0Sstevel@tonic-gate my $prev_end = -1; 749*0Sstevel@tonic-gate my %newkeys; 750*0Sstevel@tonic-gate 751*0Sstevel@tonic-gate for (@_) { 752*0Sstevel@tonic-gate my ($pos, $nrecs, @data) = @$_; 753*0Sstevel@tonic-gate $pos += $delta_recs; 754*0Sstevel@tonic-gate 755*0Sstevel@tonic-gate # Adjust the offsets of the records after the previous batch up 756*0Sstevel@tonic-gate # to the first new one of this batch 757*0Sstevel@tonic-gate for my $i ($prev_end+2 .. $pos - 1) { 758*0Sstevel@tonic-gate $self->{offsets}[$i] += $delta; 759*0Sstevel@tonic-gate $newkey{$i} = $i + $delta_recs; 760*0Sstevel@tonic-gate } 761*0Sstevel@tonic-gate 762*0Sstevel@tonic-gate $prev_end = $pos + @data - 1; # last record moved on this pass 763*0Sstevel@tonic-gate 764*0Sstevel@tonic-gate # Remove the offsets for the removed records; 765*0Sstevel@tonic-gate # replace with the offsets for the inserted records 766*0Sstevel@tonic-gate my @newoff = ($self->{offsets}[$pos] + $delta); 767*0Sstevel@tonic-gate for my $i (0 .. $#data) { 768*0Sstevel@tonic-gate my $newlen = length $data[$i]; 769*0Sstevel@tonic-gate push @newoff, $newoff[$i] + $newlen; 770*0Sstevel@tonic-gate $delta += $newlen; 771*0Sstevel@tonic-gate } 772*0Sstevel@tonic-gate 773*0Sstevel@tonic-gate for my $i ($pos .. $pos+$nrecs-1) { 774*0Sstevel@tonic-gate last if $i+1 > $#{$self->{offsets}}; 775*0Sstevel@tonic-gate my $oldlen = $self->{offsets}[$i+1] - $self->{offsets}[$i]; 776*0Sstevel@tonic-gate $delta -= $oldlen; 777*0Sstevel@tonic-gate } 778*0Sstevel@tonic-gate 779*0Sstevel@tonic-gate# # also this data has changed, so update it in the cache 780*0Sstevel@tonic-gate# for (0 .. $#data) { 781*0Sstevel@tonic-gate# $self->{cache}->update($pos + $_, $data[$_]); 782*0Sstevel@tonic-gate# } 783*0Sstevel@tonic-gate# if ($delta_recs) { 784*0Sstevel@tonic-gate# my @oldkeys = grep $_ >= $pos + @data, $self->{cache}->ckeys; 785*0Sstevel@tonic-gate# my @newkeys = map $_ + $delta_recs, @oldkeys; 786*0Sstevel@tonic-gate# $self->{cache}->rekey(\@oldkeys, \@newkeys); 787*0Sstevel@tonic-gate# } 788*0Sstevel@tonic-gate 789*0Sstevel@tonic-gate # replace old offsets with new 790*0Sstevel@tonic-gate splice @{$self->{offsets}}, $pos, $nrecs+1, @newoff; 791*0Sstevel@tonic-gate # What if we just spliced out the end of the offsets table? 792*0Sstevel@tonic-gate # shouldn't we clear $self->{eof}? Test for this XXX BUG TODO 793*0Sstevel@tonic-gate 794*0Sstevel@tonic-gate $delta_recs += @data - $nrecs; # net change in total number of records 795*0Sstevel@tonic-gate } 796*0Sstevel@tonic-gate 797*0Sstevel@tonic-gate # The trailing records at the very end of the file 798*0Sstevel@tonic-gate if ($delta) { 799*0Sstevel@tonic-gate for my $i ($prev_end+2 .. $#{$self->{offsets}}) { 800*0Sstevel@tonic-gate $self->{offsets}[$i] += $delta; 801*0Sstevel@tonic-gate } 802*0Sstevel@tonic-gate } 803*0Sstevel@tonic-gate 804*0Sstevel@tonic-gate # If we scrubbed out all known offsets, regenerate the trivial table 805*0Sstevel@tonic-gate # that knows that the file does indeed start at 0. 806*0Sstevel@tonic-gate $self->{offsets}[0] = 0 unless @{$self->{offsets}}; 807*0Sstevel@tonic-gate # If the file got longer, the offsets table is no longer complete 808*0Sstevel@tonic-gate # $self->{eof} = 0 if $delta_recs > 0; 809*0Sstevel@tonic-gate 810*0Sstevel@tonic-gate # Now there might be too much data in the cache, if we spliced out 811*0Sstevel@tonic-gate # some short records and spliced in some long ones. If so, flush 812*0Sstevel@tonic-gate # the cache. 813*0Sstevel@tonic-gate $self->_cache_flush; 814*0Sstevel@tonic-gate} 815*0Sstevel@tonic-gate 816*0Sstevel@tonic-gate# If a record does not already end with the appropriate terminator 817*0Sstevel@tonic-gate# string, append one. 818*0Sstevel@tonic-gatesub _fixrecs { 819*0Sstevel@tonic-gate my $self = shift; 820*0Sstevel@tonic-gate for (@_) { 821*0Sstevel@tonic-gate $_ = "" unless defined $_; 822*0Sstevel@tonic-gate $_ .= $self->{recsep} 823*0Sstevel@tonic-gate unless substr($_, - $self->{recseplen}) eq $self->{recsep}; 824*0Sstevel@tonic-gate } 825*0Sstevel@tonic-gate} 826*0Sstevel@tonic-gate 827*0Sstevel@tonic-gate 828*0Sstevel@tonic-gate################################################################ 829*0Sstevel@tonic-gate# 830*0Sstevel@tonic-gate# Basic read, write, and seek 831*0Sstevel@tonic-gate# 832*0Sstevel@tonic-gate 833*0Sstevel@tonic-gate# seek to the beginning of record #$n 834*0Sstevel@tonic-gate# Assumes that the offsets table is already correctly populated 835*0Sstevel@tonic-gate# 836*0Sstevel@tonic-gate# Note that $n=-1 has a special meaning here: It means the start of 837*0Sstevel@tonic-gate# the last known record; this may or may not be the very last record 838*0Sstevel@tonic-gate# in the file, depending on whether the offsets table is fully populated. 839*0Sstevel@tonic-gate# 840*0Sstevel@tonic-gatesub _seek { 841*0Sstevel@tonic-gate my ($self, $n) = @_; 842*0Sstevel@tonic-gate my $o = $self->{offsets}[$n]; 843*0Sstevel@tonic-gate defined($o) 844*0Sstevel@tonic-gate or confess("logic error: undefined offset for record $n"); 845*0Sstevel@tonic-gate seek $self->{fh}, $o, SEEK_SET 846*0Sstevel@tonic-gate or confess "Couldn't seek filehandle: $!"; # "Should never happen." 847*0Sstevel@tonic-gate} 848*0Sstevel@tonic-gate 849*0Sstevel@tonic-gate# seek to byte $b in the file 850*0Sstevel@tonic-gatesub _seekb { 851*0Sstevel@tonic-gate my ($self, $b) = @_; 852*0Sstevel@tonic-gate seek $self->{fh}, $b, SEEK_SET 853*0Sstevel@tonic-gate or die "Couldn't seek filehandle: $!"; # "Should never happen." 854*0Sstevel@tonic-gate} 855*0Sstevel@tonic-gate 856*0Sstevel@tonic-gate# populate the offsets table up to the beginning of record $n 857*0Sstevel@tonic-gate# return the offset of record $n 858*0Sstevel@tonic-gatesub _fill_offsets_to { 859*0Sstevel@tonic-gate my ($self, $n) = @_; 860*0Sstevel@tonic-gate 861*0Sstevel@tonic-gate return $self->{offsets}[$n] if $self->{eof}; 862*0Sstevel@tonic-gate 863*0Sstevel@tonic-gate my $fh = $self->{fh}; 864*0Sstevel@tonic-gate local *OFF = $self->{offsets}; 865*0Sstevel@tonic-gate my $rec; 866*0Sstevel@tonic-gate 867*0Sstevel@tonic-gate until ($#OFF >= $n) { 868*0Sstevel@tonic-gate $self->_seek(-1); # tricky -- see comment at _seek 869*0Sstevel@tonic-gate $rec = $self->_read_record; 870*0Sstevel@tonic-gate if (defined $rec) { 871*0Sstevel@tonic-gate push @OFF, int(tell $fh); # Tels says that int() saves memory here 872*0Sstevel@tonic-gate } else { 873*0Sstevel@tonic-gate $self->{eof} = 1; 874*0Sstevel@tonic-gate return; # It turns out there is no such record 875*0Sstevel@tonic-gate } 876*0Sstevel@tonic-gate } 877*0Sstevel@tonic-gate 878*0Sstevel@tonic-gate # we have now read all the records up to record n-1, 879*0Sstevel@tonic-gate # so we can return the offset of record n 880*0Sstevel@tonic-gate $OFF[$n]; 881*0Sstevel@tonic-gate} 882*0Sstevel@tonic-gate 883*0Sstevel@tonic-gatesub _fill_offsets { 884*0Sstevel@tonic-gate my ($self) = @_; 885*0Sstevel@tonic-gate 886*0Sstevel@tonic-gate my $fh = $self->{fh}; 887*0Sstevel@tonic-gate local *OFF = $self->{offsets}; 888*0Sstevel@tonic-gate 889*0Sstevel@tonic-gate $self->_seek(-1); # tricky -- see comment at _seek 890*0Sstevel@tonic-gate 891*0Sstevel@tonic-gate # Tels says that inlining read_record() would make this loop 892*0Sstevel@tonic-gate # five times faster. 20030508 893*0Sstevel@tonic-gate while ( defined $self->_read_record()) { 894*0Sstevel@tonic-gate # int() saves us memory here 895*0Sstevel@tonic-gate push @OFF, int(tell $fh); 896*0Sstevel@tonic-gate } 897*0Sstevel@tonic-gate 898*0Sstevel@tonic-gate $self->{eof} = 1; 899*0Sstevel@tonic-gate $#OFF; 900*0Sstevel@tonic-gate} 901*0Sstevel@tonic-gate 902*0Sstevel@tonic-gate# assumes that $rec is already suitably terminated 903*0Sstevel@tonic-gatesub _write_record { 904*0Sstevel@tonic-gate my ($self, $rec) = @_; 905*0Sstevel@tonic-gate my $fh = $self->{fh}; 906*0Sstevel@tonic-gate local $\ = ""; 907*0Sstevel@tonic-gate print $fh $rec 908*0Sstevel@tonic-gate or die "Couldn't write record: $!"; # "Should never happen." 909*0Sstevel@tonic-gate# $self->{_written} += length($rec); 910*0Sstevel@tonic-gate} 911*0Sstevel@tonic-gate 912*0Sstevel@tonic-gatesub _read_record { 913*0Sstevel@tonic-gate my $self = shift; 914*0Sstevel@tonic-gate my $rec; 915*0Sstevel@tonic-gate { local $/ = $self->{recsep}; 916*0Sstevel@tonic-gate my $fh = $self->{fh}; 917*0Sstevel@tonic-gate $rec = <$fh>; 918*0Sstevel@tonic-gate } 919*0Sstevel@tonic-gate return unless defined $rec; 920*0Sstevel@tonic-gate if (substr($rec, -$self->{recseplen}) ne $self->{recsep}) { 921*0Sstevel@tonic-gate # improperly terminated final record --- quietly fix it. 922*0Sstevel@tonic-gate# my $ac = substr($rec, -$self->{recseplen}); 923*0Sstevel@tonic-gate# $ac =~ s/\n/\\n/g; 924*0Sstevel@tonic-gate $self->{sawlastrec} = 1; 925*0Sstevel@tonic-gate unless ($self->{rdonly}) { 926*0Sstevel@tonic-gate local $\ = ""; 927*0Sstevel@tonic-gate my $fh = $self->{fh}; 928*0Sstevel@tonic-gate print $fh $self->{recsep}; 929*0Sstevel@tonic-gate } 930*0Sstevel@tonic-gate $rec .= $self->{recsep}; 931*0Sstevel@tonic-gate } 932*0Sstevel@tonic-gate# $self->{_read} += length($rec) if defined $rec; 933*0Sstevel@tonic-gate $rec; 934*0Sstevel@tonic-gate} 935*0Sstevel@tonic-gate 936*0Sstevel@tonic-gatesub _rw_stats { 937*0Sstevel@tonic-gate my $self = shift; 938*0Sstevel@tonic-gate @{$self}{'_read', '_written'}; 939*0Sstevel@tonic-gate} 940*0Sstevel@tonic-gate 941*0Sstevel@tonic-gate################################################################ 942*0Sstevel@tonic-gate# 943*0Sstevel@tonic-gate# Read cache management 944*0Sstevel@tonic-gate 945*0Sstevel@tonic-gatesub _cache_flush { 946*0Sstevel@tonic-gate my ($self) = @_; 947*0Sstevel@tonic-gate $self->{cache}->reduce_size_to($self->{memory} - $self->{deferred_s}); 948*0Sstevel@tonic-gate} 949*0Sstevel@tonic-gate 950*0Sstevel@tonic-gatesub _cache_too_full { 951*0Sstevel@tonic-gate my $self = shift; 952*0Sstevel@tonic-gate $self->{cache}->bytes + $self->{deferred_s} >= $self->{memory}; 953*0Sstevel@tonic-gate} 954*0Sstevel@tonic-gate 955*0Sstevel@tonic-gate################################################################ 956*0Sstevel@tonic-gate# 957*0Sstevel@tonic-gate# File custodial services 958*0Sstevel@tonic-gate# 959*0Sstevel@tonic-gate 960*0Sstevel@tonic-gate 961*0Sstevel@tonic-gate# We have read to the end of the file and have the offsets table 962*0Sstevel@tonic-gate# entirely populated. Now we need to write a new record beyond 963*0Sstevel@tonic-gate# the end of the file. We prepare for this by writing 964*0Sstevel@tonic-gate# empty records into the file up to the position we want 965*0Sstevel@tonic-gate# 966*0Sstevel@tonic-gate# assumes that the offsets table already contains the offset of record $n, 967*0Sstevel@tonic-gate# if it exists, and extends to the end of the file if not. 968*0Sstevel@tonic-gatesub _extend_file_to { 969*0Sstevel@tonic-gate my ($self, $n) = @_; 970*0Sstevel@tonic-gate $self->_seek(-1); # position after the end of the last record 971*0Sstevel@tonic-gate my $pos = $self->{offsets}[-1]; 972*0Sstevel@tonic-gate 973*0Sstevel@tonic-gate # the offsets table has one entry more than the total number of records 974*0Sstevel@tonic-gate my $extras = $n - $#{$self->{offsets}}; 975*0Sstevel@tonic-gate 976*0Sstevel@tonic-gate # Todo : just use $self->{recsep} x $extras here? 977*0Sstevel@tonic-gate while ($extras-- > 0) { 978*0Sstevel@tonic-gate $self->_write_record($self->{recsep}); 979*0Sstevel@tonic-gate push @{$self->{offsets}}, int(tell $self->{fh}); 980*0Sstevel@tonic-gate } 981*0Sstevel@tonic-gate} 982*0Sstevel@tonic-gate 983*0Sstevel@tonic-gate# Truncate the file at the current position 984*0Sstevel@tonic-gatesub _chop_file { 985*0Sstevel@tonic-gate my $self = shift; 986*0Sstevel@tonic-gate truncate $self->{fh}, tell($self->{fh}); 987*0Sstevel@tonic-gate} 988*0Sstevel@tonic-gate 989*0Sstevel@tonic-gate 990*0Sstevel@tonic-gate# compute the size of a buffer suitable for moving 991*0Sstevel@tonic-gate# all the data in a file forward $n bytes 992*0Sstevel@tonic-gate# ($n may be negative) 993*0Sstevel@tonic-gate# The result should be at least $n. 994*0Sstevel@tonic-gatesub _bufsize { 995*0Sstevel@tonic-gate my $n = shift; 996*0Sstevel@tonic-gate return 8192 if $n <= 0; 997*0Sstevel@tonic-gate my $b = $n & ~8191; 998*0Sstevel@tonic-gate $b += 8192 if $n & 8191; 999*0Sstevel@tonic-gate $b; 1000*0Sstevel@tonic-gate} 1001*0Sstevel@tonic-gate 1002*0Sstevel@tonic-gate################################################################ 1003*0Sstevel@tonic-gate# 1004*0Sstevel@tonic-gate# Miscellaneous public methods 1005*0Sstevel@tonic-gate# 1006*0Sstevel@tonic-gate 1007*0Sstevel@tonic-gate# Lock the file 1008*0Sstevel@tonic-gatesub flock { 1009*0Sstevel@tonic-gate my ($self, $op) = @_; 1010*0Sstevel@tonic-gate unless (@_ <= 3) { 1011*0Sstevel@tonic-gate my $pack = ref $self; 1012*0Sstevel@tonic-gate croak "Usage: $pack\->flock([OPERATION])"; 1013*0Sstevel@tonic-gate } 1014*0Sstevel@tonic-gate my $fh = $self->{fh}; 1015*0Sstevel@tonic-gate $op = LOCK_EX unless defined $op; 1016*0Sstevel@tonic-gate my $locked = flock $fh, $op; 1017*0Sstevel@tonic-gate 1018*0Sstevel@tonic-gate if ($locked && ($op & (LOCK_EX | LOCK_SH))) { 1019*0Sstevel@tonic-gate # If you're locking the file, then presumably it's because 1020*0Sstevel@tonic-gate # there might have been a write access by another process. 1021*0Sstevel@tonic-gate # In that case, the read cache contents and the offsets table 1022*0Sstevel@tonic-gate # might be invalid, so discard them. 20030508 1023*0Sstevel@tonic-gate $self->{offsets} = [0]; 1024*0Sstevel@tonic-gate $self->{cache}->empty; 1025*0Sstevel@tonic-gate } 1026*0Sstevel@tonic-gate 1027*0Sstevel@tonic-gate $locked; 1028*0Sstevel@tonic-gate} 1029*0Sstevel@tonic-gate 1030*0Sstevel@tonic-gate# Get/set autochomp option 1031*0Sstevel@tonic-gatesub autochomp { 1032*0Sstevel@tonic-gate my $self = shift; 1033*0Sstevel@tonic-gate if (@_) { 1034*0Sstevel@tonic-gate my $old = $self->{autochomp}; 1035*0Sstevel@tonic-gate $self->{autochomp} = shift; 1036*0Sstevel@tonic-gate $old; 1037*0Sstevel@tonic-gate } else { 1038*0Sstevel@tonic-gate $self->{autochomp}; 1039*0Sstevel@tonic-gate } 1040*0Sstevel@tonic-gate} 1041*0Sstevel@tonic-gate 1042*0Sstevel@tonic-gate# Get offset table entries; returns offset of nth record 1043*0Sstevel@tonic-gatesub offset { 1044*0Sstevel@tonic-gate my ($self, $n) = @_; 1045*0Sstevel@tonic-gate 1046*0Sstevel@tonic-gate if ($#{$self->{offsets}} < $n) { 1047*0Sstevel@tonic-gate return if $self->{eof}; # request for record beyond the end of file 1048*0Sstevel@tonic-gate my $o = $self->_fill_offsets_to($n); 1049*0Sstevel@tonic-gate # If it's still undefined, there is no such record, so return 'undef' 1050*0Sstevel@tonic-gate return unless defined $o; 1051*0Sstevel@tonic-gate } 1052*0Sstevel@tonic-gate 1053*0Sstevel@tonic-gate $self->{offsets}[$n]; 1054*0Sstevel@tonic-gate} 1055*0Sstevel@tonic-gate 1056*0Sstevel@tonic-gatesub discard_offsets { 1057*0Sstevel@tonic-gate my $self = shift; 1058*0Sstevel@tonic-gate $self->{offsets} = [0]; 1059*0Sstevel@tonic-gate} 1060*0Sstevel@tonic-gate 1061*0Sstevel@tonic-gate################################################################ 1062*0Sstevel@tonic-gate# 1063*0Sstevel@tonic-gate# Matters related to deferred writing 1064*0Sstevel@tonic-gate# 1065*0Sstevel@tonic-gate 1066*0Sstevel@tonic-gate# Defer writes 1067*0Sstevel@tonic-gatesub defer { 1068*0Sstevel@tonic-gate my $self = shift; 1069*0Sstevel@tonic-gate $self->_stop_autodeferring; 1070*0Sstevel@tonic-gate @{$self->{ad_history}} = (); 1071*0Sstevel@tonic-gate $self->{defer} = 1; 1072*0Sstevel@tonic-gate} 1073*0Sstevel@tonic-gate 1074*0Sstevel@tonic-gate# Flush deferred writes 1075*0Sstevel@tonic-gate# 1076*0Sstevel@tonic-gate# This could be better optimized to write the file in one pass, instead 1077*0Sstevel@tonic-gate# of one pass per block of records. But that will require modifications 1078*0Sstevel@tonic-gate# to _twrite, so I should have a good _twrite test suite first. 1079*0Sstevel@tonic-gatesub flush { 1080*0Sstevel@tonic-gate my $self = shift; 1081*0Sstevel@tonic-gate 1082*0Sstevel@tonic-gate $self->_flush; 1083*0Sstevel@tonic-gate $self->{defer} = 0; 1084*0Sstevel@tonic-gate} 1085*0Sstevel@tonic-gate 1086*0Sstevel@tonic-gatesub _old_flush { 1087*0Sstevel@tonic-gate my $self = shift; 1088*0Sstevel@tonic-gate my @writable = sort {$a<=>$b} (keys %{$self->{deferred}}); 1089*0Sstevel@tonic-gate 1090*0Sstevel@tonic-gate while (@writable) { 1091*0Sstevel@tonic-gate # gather all consecutive records from the front of @writable 1092*0Sstevel@tonic-gate my $first_rec = shift @writable; 1093*0Sstevel@tonic-gate my $last_rec = $first_rec+1; 1094*0Sstevel@tonic-gate ++$last_rec, shift @writable while @writable && $last_rec == $writable[0]; 1095*0Sstevel@tonic-gate --$last_rec; 1096*0Sstevel@tonic-gate $self->_fill_offsets_to($last_rec); 1097*0Sstevel@tonic-gate $self->_extend_file_to($last_rec); 1098*0Sstevel@tonic-gate $self->_splice($first_rec, $last_rec-$first_rec+1, 1099*0Sstevel@tonic-gate @{$self->{deferred}}{$first_rec .. $last_rec}); 1100*0Sstevel@tonic-gate } 1101*0Sstevel@tonic-gate 1102*0Sstevel@tonic-gate $self->_discard; # clear out defered-write-cache 1103*0Sstevel@tonic-gate} 1104*0Sstevel@tonic-gate 1105*0Sstevel@tonic-gatesub _flush { 1106*0Sstevel@tonic-gate my $self = shift; 1107*0Sstevel@tonic-gate my @writable = sort {$a<=>$b} (keys %{$self->{deferred}}); 1108*0Sstevel@tonic-gate my @args; 1109*0Sstevel@tonic-gate my @adjust; 1110*0Sstevel@tonic-gate 1111*0Sstevel@tonic-gate while (@writable) { 1112*0Sstevel@tonic-gate # gather all consecutive records from the front of @writable 1113*0Sstevel@tonic-gate my $first_rec = shift @writable; 1114*0Sstevel@tonic-gate my $last_rec = $first_rec+1; 1115*0Sstevel@tonic-gate ++$last_rec, shift @writable while @writable && $last_rec == $writable[0]; 1116*0Sstevel@tonic-gate --$last_rec; 1117*0Sstevel@tonic-gate my $end = $self->_fill_offsets_to($last_rec+1); 1118*0Sstevel@tonic-gate if (not defined $end) { 1119*0Sstevel@tonic-gate $self->_extend_file_to($last_rec); 1120*0Sstevel@tonic-gate $end = $self->{offsets}[$last_rec]; 1121*0Sstevel@tonic-gate } 1122*0Sstevel@tonic-gate my ($start) = $self->{offsets}[$first_rec]; 1123*0Sstevel@tonic-gate push @args, 1124*0Sstevel@tonic-gate join("", @{$self->{deferred}}{$first_rec .. $last_rec}), # data 1125*0Sstevel@tonic-gate $start, # position 1126*0Sstevel@tonic-gate $end-$start; # length 1127*0Sstevel@tonic-gate push @adjust, [$first_rec, # starting at this position... 1128*0Sstevel@tonic-gate $last_rec-$first_rec+1, # this many records... 1129*0Sstevel@tonic-gate # are replaced with these... 1130*0Sstevel@tonic-gate @{$self->{deferred}}{$first_rec .. $last_rec}, 1131*0Sstevel@tonic-gate ]; 1132*0Sstevel@tonic-gate } 1133*0Sstevel@tonic-gate 1134*0Sstevel@tonic-gate $self->_mtwrite(@args); # write multiple record groups 1135*0Sstevel@tonic-gate $self->_discard; # clear out defered-write-cache 1136*0Sstevel@tonic-gate $self->_oadjust(@adjust); 1137*0Sstevel@tonic-gate} 1138*0Sstevel@tonic-gate 1139*0Sstevel@tonic-gate# Discard deferred writes and disable future deferred writes 1140*0Sstevel@tonic-gatesub discard { 1141*0Sstevel@tonic-gate my $self = shift; 1142*0Sstevel@tonic-gate $self->_discard; 1143*0Sstevel@tonic-gate $self->{defer} = 0; 1144*0Sstevel@tonic-gate} 1145*0Sstevel@tonic-gate 1146*0Sstevel@tonic-gate# Discard deferred writes, but retain old deferred writing mode 1147*0Sstevel@tonic-gatesub _discard { 1148*0Sstevel@tonic-gate my $self = shift; 1149*0Sstevel@tonic-gate %{$self->{deferred}} = (); 1150*0Sstevel@tonic-gate $self->{deferred_s} = 0; 1151*0Sstevel@tonic-gate $self->{deferred_max} = -1; 1152*0Sstevel@tonic-gate $self->{cache}->set_limit($self->{memory}); 1153*0Sstevel@tonic-gate} 1154*0Sstevel@tonic-gate 1155*0Sstevel@tonic-gate# Deferred writing is enabled, either explicitly ($self->{defer}) 1156*0Sstevel@tonic-gate# or automatically ($self->{autodeferring}) 1157*0Sstevel@tonic-gatesub _is_deferring { 1158*0Sstevel@tonic-gate my $self = shift; 1159*0Sstevel@tonic-gate $self->{defer} || $self->{autodeferring}; 1160*0Sstevel@tonic-gate} 1161*0Sstevel@tonic-gate 1162*0Sstevel@tonic-gate# The largest record number of any deferred record 1163*0Sstevel@tonic-gatesub _defer_max { 1164*0Sstevel@tonic-gate my $self = shift; 1165*0Sstevel@tonic-gate return $self->{deferred_max} if defined $self->{deferred_max}; 1166*0Sstevel@tonic-gate my $max = -1; 1167*0Sstevel@tonic-gate for my $key (keys %{$self->{deferred}}) { 1168*0Sstevel@tonic-gate $max = $key if $key > $max; 1169*0Sstevel@tonic-gate } 1170*0Sstevel@tonic-gate $self->{deferred_max} = $max; 1171*0Sstevel@tonic-gate $max; 1172*0Sstevel@tonic-gate} 1173*0Sstevel@tonic-gate 1174*0Sstevel@tonic-gate################################################################ 1175*0Sstevel@tonic-gate# 1176*0Sstevel@tonic-gate# Matters related to autodeferment 1177*0Sstevel@tonic-gate# 1178*0Sstevel@tonic-gate 1179*0Sstevel@tonic-gate# Get/set autodefer option 1180*0Sstevel@tonic-gatesub autodefer { 1181*0Sstevel@tonic-gate my $self = shift; 1182*0Sstevel@tonic-gate if (@_) { 1183*0Sstevel@tonic-gate my $old = $self->{autodefer}; 1184*0Sstevel@tonic-gate $self->{autodefer} = shift; 1185*0Sstevel@tonic-gate if ($old) { 1186*0Sstevel@tonic-gate $self->_stop_autodeferring; 1187*0Sstevel@tonic-gate @{$self->{ad_history}} = (); 1188*0Sstevel@tonic-gate } 1189*0Sstevel@tonic-gate $old; 1190*0Sstevel@tonic-gate } else { 1191*0Sstevel@tonic-gate $self->{autodefer}; 1192*0Sstevel@tonic-gate } 1193*0Sstevel@tonic-gate} 1194*0Sstevel@tonic-gate 1195*0Sstevel@tonic-gate# The user is trying to store record #$n Record that in the history, 1196*0Sstevel@tonic-gate# and then enable (or disable) autodeferment if that seems useful. 1197*0Sstevel@tonic-gate# Note that it's OK for $n to be a non-number, as long as the function 1198*0Sstevel@tonic-gate# is prepared to deal with that. Nobody else looks at the ad_history. 1199*0Sstevel@tonic-gate# 1200*0Sstevel@tonic-gate# Now, what does the ad_history mean, and what is this function doing? 1201*0Sstevel@tonic-gate# Essentially, the idea is to enable autodeferring when we see that the 1202*0Sstevel@tonic-gate# user has made three consecutive STORE calls to three consecutive records. 1203*0Sstevel@tonic-gate# ("Three" is actually ->{autodefer_threshhold}.) 1204*0Sstevel@tonic-gate# A STORE call for record #$n inserts $n into the autodefer history, 1205*0Sstevel@tonic-gate# and if the history contains three consecutive records, we enable 1206*0Sstevel@tonic-gate# autodeferment. An ad_history of [X, Y] means that the most recent 1207*0Sstevel@tonic-gate# STOREs were for records X, X+1, ..., Y, in that order. 1208*0Sstevel@tonic-gate# 1209*0Sstevel@tonic-gate# Inserting a nonconsecutive number erases the history and starts over. 1210*0Sstevel@tonic-gate# 1211*0Sstevel@tonic-gate# Performing a special operation like SPLICE erases the history. 1212*0Sstevel@tonic-gate# 1213*0Sstevel@tonic-gate# There's one special case: CLEAR means that CLEAR was just called. 1214*0Sstevel@tonic-gate# In this case, we prime the history with [-2, -1] so that if the next 1215*0Sstevel@tonic-gate# write is for record 0, autodeferring goes on immediately. This is for 1216*0Sstevel@tonic-gate# the common special case of "@a = (...)". 1217*0Sstevel@tonic-gate# 1218*0Sstevel@tonic-gatesub _annotate_ad_history { 1219*0Sstevel@tonic-gate my ($self, $n) = @_; 1220*0Sstevel@tonic-gate return unless $self->{autodefer}; # feature is disabled 1221*0Sstevel@tonic-gate return if $self->{defer}; # already in explicit defer mode 1222*0Sstevel@tonic-gate return unless $self->{offsets}[-1] >= $self->{autodefer_filelen_threshhold}; 1223*0Sstevel@tonic-gate 1224*0Sstevel@tonic-gate local *H = $self->{ad_history}; 1225*0Sstevel@tonic-gate if ($n eq 'CLEAR') { 1226*0Sstevel@tonic-gate @H = (-2, -1); # prime the history with fake records 1227*0Sstevel@tonic-gate $self->_stop_autodeferring; 1228*0Sstevel@tonic-gate } elsif ($n =~ /^\d+$/) { 1229*0Sstevel@tonic-gate if (@H == 0) { 1230*0Sstevel@tonic-gate @H = ($n, $n); 1231*0Sstevel@tonic-gate } else { # @H == 2 1232*0Sstevel@tonic-gate if ($H[1] == $n-1) { # another consecutive record 1233*0Sstevel@tonic-gate $H[1]++; 1234*0Sstevel@tonic-gate if ($H[1] - $H[0] + 1 >= $self->{autodefer_threshhold}) { 1235*0Sstevel@tonic-gate $self->{autodeferring} = 1; 1236*0Sstevel@tonic-gate } 1237*0Sstevel@tonic-gate } else { # nonconsecutive- erase and start over 1238*0Sstevel@tonic-gate @H = ($n, $n); 1239*0Sstevel@tonic-gate $self->_stop_autodeferring; 1240*0Sstevel@tonic-gate } 1241*0Sstevel@tonic-gate } 1242*0Sstevel@tonic-gate } else { # SPLICE or STORESIZE or some such 1243*0Sstevel@tonic-gate @H = (); 1244*0Sstevel@tonic-gate $self->_stop_autodeferring; 1245*0Sstevel@tonic-gate } 1246*0Sstevel@tonic-gate} 1247*0Sstevel@tonic-gate 1248*0Sstevel@tonic-gate# If autodeferring was enabled, cut it out and discard the history 1249*0Sstevel@tonic-gatesub _stop_autodeferring { 1250*0Sstevel@tonic-gate my $self = shift; 1251*0Sstevel@tonic-gate if ($self->{autodeferring}) { 1252*0Sstevel@tonic-gate $self->_flush; 1253*0Sstevel@tonic-gate } 1254*0Sstevel@tonic-gate $self->{autodeferring} = 0; 1255*0Sstevel@tonic-gate} 1256*0Sstevel@tonic-gate 1257*0Sstevel@tonic-gate################################################################ 1258*0Sstevel@tonic-gate 1259*0Sstevel@tonic-gate 1260*0Sstevel@tonic-gate# This is NOT a method. It is here for two reasons: 1261*0Sstevel@tonic-gate# 1. To factor a fairly complicated block out of the constructor 1262*0Sstevel@tonic-gate# 2. To provide access for the test suite, which need to be sure 1263*0Sstevel@tonic-gate# files are being written properly. 1264*0Sstevel@tonic-gatesub _default_recsep { 1265*0Sstevel@tonic-gate my $recsep = $/; 1266*0Sstevel@tonic-gate if ($^O eq 'MSWin32') { # Dos too? 1267*0Sstevel@tonic-gate # Windows users expect files to be terminated with \r\n 1268*0Sstevel@tonic-gate # But $/ is set to \n instead 1269*0Sstevel@tonic-gate # Note that this also transforms \n\n into \r\n\r\n. 1270*0Sstevel@tonic-gate # That is a feature. 1271*0Sstevel@tonic-gate $recsep =~ s/\n/\r\n/g; 1272*0Sstevel@tonic-gate } 1273*0Sstevel@tonic-gate $recsep; 1274*0Sstevel@tonic-gate} 1275*0Sstevel@tonic-gate 1276*0Sstevel@tonic-gate# Utility function for _check_integrity 1277*0Sstevel@tonic-gatesub _ci_warn { 1278*0Sstevel@tonic-gate my $msg = shift; 1279*0Sstevel@tonic-gate $msg =~ s/\n/\\n/g; 1280*0Sstevel@tonic-gate $msg =~ s/\r/\\r/g; 1281*0Sstevel@tonic-gate print "# $msg\n"; 1282*0Sstevel@tonic-gate} 1283*0Sstevel@tonic-gate 1284*0Sstevel@tonic-gate# Given a file, make sure the cache is consistent with the 1285*0Sstevel@tonic-gate# file contents and the internal data structures are consistent with 1286*0Sstevel@tonic-gate# each other. Returns true if everything checks out, false if not 1287*0Sstevel@tonic-gate# 1288*0Sstevel@tonic-gate# The $file argument is no longer used. It is retained for compatibility 1289*0Sstevel@tonic-gate# with the existing test suite. 1290*0Sstevel@tonic-gatesub _check_integrity { 1291*0Sstevel@tonic-gate my ($self, $file, $warn) = @_; 1292*0Sstevel@tonic-gate my $rsl = $self->{recseplen}; 1293*0Sstevel@tonic-gate my $rs = $self->{recsep}; 1294*0Sstevel@tonic-gate my $good = 1; 1295*0Sstevel@tonic-gate local *_; # local $_ does not work here 1296*0Sstevel@tonic-gate local $DIAGNOSTIC = 1; 1297*0Sstevel@tonic-gate 1298*0Sstevel@tonic-gate if (not defined $rs) { 1299*0Sstevel@tonic-gate _ci_warn("recsep is undef!"); 1300*0Sstevel@tonic-gate $good = 0; 1301*0Sstevel@tonic-gate } elsif ($rs eq "") { 1302*0Sstevel@tonic-gate _ci_warn("recsep is empty!"); 1303*0Sstevel@tonic-gate $good = 0; 1304*0Sstevel@tonic-gate } elsif ($rsl != length $rs) { 1305*0Sstevel@tonic-gate my $ln = length $rs; 1306*0Sstevel@tonic-gate _ci_warn("recsep <$rs> has length $ln, should be $rsl"); 1307*0Sstevel@tonic-gate $good = 0; 1308*0Sstevel@tonic-gate } 1309*0Sstevel@tonic-gate 1310*0Sstevel@tonic-gate if (not defined $self->{offsets}[0]) { 1311*0Sstevel@tonic-gate _ci_warn("offset 0 is missing!"); 1312*0Sstevel@tonic-gate $good = 0; 1313*0Sstevel@tonic-gate 1314*0Sstevel@tonic-gate } elsif ($self->{offsets}[0] != 0) { 1315*0Sstevel@tonic-gate _ci_warn("rec 0: offset <$self->{offsets}[0]> s/b 0!"); 1316*0Sstevel@tonic-gate $good = 0; 1317*0Sstevel@tonic-gate } 1318*0Sstevel@tonic-gate 1319*0Sstevel@tonic-gate my $cached = 0; 1320*0Sstevel@tonic-gate { 1321*0Sstevel@tonic-gate local *F = $self->{fh}; 1322*0Sstevel@tonic-gate seek F, 0, SEEK_SET; 1323*0Sstevel@tonic-gate local $. = 0; 1324*0Sstevel@tonic-gate local $/ = $rs; 1325*0Sstevel@tonic-gate 1326*0Sstevel@tonic-gate while (<F>) { 1327*0Sstevel@tonic-gate my $n = $. - 1; 1328*0Sstevel@tonic-gate my $cached = $self->{cache}->_produce($n); 1329*0Sstevel@tonic-gate my $offset = $self->{offsets}[$.]; 1330*0Sstevel@tonic-gate my $ao = tell F; 1331*0Sstevel@tonic-gate if (defined $offset && $offset != $ao) { 1332*0Sstevel@tonic-gate _ci_warn("rec $n: offset <$offset> actual <$ao>"); 1333*0Sstevel@tonic-gate $good = 0; 1334*0Sstevel@tonic-gate } 1335*0Sstevel@tonic-gate if (defined $cached && $_ ne $cached && ! $self->{deferred}{$n}) { 1336*0Sstevel@tonic-gate $good = 0; 1337*0Sstevel@tonic-gate _ci_warn("rec $n: cached <$cached> actual <$_>"); 1338*0Sstevel@tonic-gate } 1339*0Sstevel@tonic-gate if (defined $cached && substr($cached, -$rsl) ne $rs) { 1340*0Sstevel@tonic-gate $good = 0; 1341*0Sstevel@tonic-gate _ci_warn("rec $n in the cache is missing the record separator"); 1342*0Sstevel@tonic-gate } 1343*0Sstevel@tonic-gate if (! defined $offset && $self->{eof}) { 1344*0Sstevel@tonic-gate $good = 0; 1345*0Sstevel@tonic-gate _ci_warn("The offset table was marked complete, but it is missing element $."); 1346*0Sstevel@tonic-gate } 1347*0Sstevel@tonic-gate } 1348*0Sstevel@tonic-gate if (@{$self->{offsets}} > $.+1) { 1349*0Sstevel@tonic-gate $good = 0; 1350*0Sstevel@tonic-gate my $n = @{$self->{offsets}}; 1351*0Sstevel@tonic-gate _ci_warn("The offset table has $n items, but the file has only $."); 1352*0Sstevel@tonic-gate } 1353*0Sstevel@tonic-gate 1354*0Sstevel@tonic-gate my $deferring = $self->_is_deferring; 1355*0Sstevel@tonic-gate for my $n ($self->{cache}->ckeys) { 1356*0Sstevel@tonic-gate my $r = $self->{cache}->_produce($n); 1357*0Sstevel@tonic-gate $cached += length($r); 1358*0Sstevel@tonic-gate next if $n+1 <= $.; # checked this already 1359*0Sstevel@tonic-gate _ci_warn("spurious caching of record $n"); 1360*0Sstevel@tonic-gate $good = 0; 1361*0Sstevel@tonic-gate } 1362*0Sstevel@tonic-gate my $b = $self->{cache}->bytes; 1363*0Sstevel@tonic-gate if ($cached != $b) { 1364*0Sstevel@tonic-gate _ci_warn("cache size is $b, should be $cached"); 1365*0Sstevel@tonic-gate $good = 0; 1366*0Sstevel@tonic-gate } 1367*0Sstevel@tonic-gate } 1368*0Sstevel@tonic-gate 1369*0Sstevel@tonic-gate # That cache has its own set of tests 1370*0Sstevel@tonic-gate $good = 0 unless $self->{cache}->_check_integrity; 1371*0Sstevel@tonic-gate 1372*0Sstevel@tonic-gate # Now let's check the deferbuffer 1373*0Sstevel@tonic-gate # Unless deferred writing is enabled, it should be empty 1374*0Sstevel@tonic-gate if (! $self->_is_deferring && %{$self->{deferred}}) { 1375*0Sstevel@tonic-gate _ci_warn("deferred writing disabled, but deferbuffer nonempty"); 1376*0Sstevel@tonic-gate $good = 0; 1377*0Sstevel@tonic-gate } 1378*0Sstevel@tonic-gate 1379*0Sstevel@tonic-gate # Any record in the deferbuffer should *not* be present in the readcache 1380*0Sstevel@tonic-gate my $deferred_s = 0; 1381*0Sstevel@tonic-gate while (my ($n, $r) = each %{$self->{deferred}}) { 1382*0Sstevel@tonic-gate $deferred_s += length($r); 1383*0Sstevel@tonic-gate if (defined $self->{cache}->_produce($n)) { 1384*0Sstevel@tonic-gate _ci_warn("record $n is in the deferbuffer *and* the readcache"); 1385*0Sstevel@tonic-gate $good = 0; 1386*0Sstevel@tonic-gate } 1387*0Sstevel@tonic-gate if (substr($r, -$rsl) ne $rs) { 1388*0Sstevel@tonic-gate _ci_warn("rec $n in the deferbuffer is missing the record separator"); 1389*0Sstevel@tonic-gate $good = 0; 1390*0Sstevel@tonic-gate } 1391*0Sstevel@tonic-gate } 1392*0Sstevel@tonic-gate 1393*0Sstevel@tonic-gate # Total size of deferbuffer should match internal total 1394*0Sstevel@tonic-gate if ($deferred_s != $self->{deferred_s}) { 1395*0Sstevel@tonic-gate _ci_warn("buffer size is $self->{deferred_s}, should be $deferred_s"); 1396*0Sstevel@tonic-gate $good = 0; 1397*0Sstevel@tonic-gate } 1398*0Sstevel@tonic-gate 1399*0Sstevel@tonic-gate # Total size of deferbuffer should not exceed the specified limit 1400*0Sstevel@tonic-gate if ($deferred_s > $self->{dw_size}) { 1401*0Sstevel@tonic-gate _ci_warn("buffer size is $self->{deferred_s} which exceeds the limit of $self->{dw_size}"); 1402*0Sstevel@tonic-gate $good = 0; 1403*0Sstevel@tonic-gate } 1404*0Sstevel@tonic-gate 1405*0Sstevel@tonic-gate # Total size of cached data should not exceed the specified limit 1406*0Sstevel@tonic-gate if ($deferred_s + $cached > $self->{memory}) { 1407*0Sstevel@tonic-gate my $total = $deferred_s + $cached; 1408*0Sstevel@tonic-gate _ci_warn("total stored data size is $total which exceeds the limit of $self->{memory}"); 1409*0Sstevel@tonic-gate $good = 0; 1410*0Sstevel@tonic-gate } 1411*0Sstevel@tonic-gate 1412*0Sstevel@tonic-gate # Stuff related to autodeferment 1413*0Sstevel@tonic-gate if (!$self->{autodefer} && @{$self->{ad_history}}) { 1414*0Sstevel@tonic-gate _ci_warn("autodefer is disabled, but ad_history is nonempty"); 1415*0Sstevel@tonic-gate $good = 0; 1416*0Sstevel@tonic-gate } 1417*0Sstevel@tonic-gate if ($self->{autodeferring} && $self->{defer}) { 1418*0Sstevel@tonic-gate _ci_warn("both autodeferring and explicit deferring are active"); 1419*0Sstevel@tonic-gate $good = 0; 1420*0Sstevel@tonic-gate } 1421*0Sstevel@tonic-gate if (@{$self->{ad_history}} == 0) { 1422*0Sstevel@tonic-gate # That's OK, no additional tests required 1423*0Sstevel@tonic-gate } elsif (@{$self->{ad_history}} == 2) { 1424*0Sstevel@tonic-gate my @non_number = grep !/^-?\d+$/, @{$self->{ad_history}}; 1425*0Sstevel@tonic-gate if (@non_number) { 1426*0Sstevel@tonic-gate my $msg; 1427*0Sstevel@tonic-gate { local $" = ')('; 1428*0Sstevel@tonic-gate $msg = "ad_history contains non-numbers (@{$self->{ad_history}})"; 1429*0Sstevel@tonic-gate } 1430*0Sstevel@tonic-gate _ci_warn($msg); 1431*0Sstevel@tonic-gate $good = 0; 1432*0Sstevel@tonic-gate } elsif ($self->{ad_history}[1] < $self->{ad_history}[0]) { 1433*0Sstevel@tonic-gate _ci_warn("ad_history has nonsensical values @{$self->{ad_history}}"); 1434*0Sstevel@tonic-gate $good = 0; 1435*0Sstevel@tonic-gate } 1436*0Sstevel@tonic-gate } else { 1437*0Sstevel@tonic-gate _ci_warn("ad_history has bad length <@{$self->{ad_history}}>"); 1438*0Sstevel@tonic-gate $good = 0; 1439*0Sstevel@tonic-gate } 1440*0Sstevel@tonic-gate 1441*0Sstevel@tonic-gate $good; 1442*0Sstevel@tonic-gate} 1443*0Sstevel@tonic-gate 1444*0Sstevel@tonic-gate################################################################ 1445*0Sstevel@tonic-gate# 1446*0Sstevel@tonic-gate# Tie::File::Cache 1447*0Sstevel@tonic-gate# 1448*0Sstevel@tonic-gate# Read cache 1449*0Sstevel@tonic-gate 1450*0Sstevel@tonic-gatepackage Tie::File::Cache; 1451*0Sstevel@tonic-gate$Tie::File::Cache::VERSION = $Tie::File::VERSION; 1452*0Sstevel@tonic-gateuse Carp ':DEFAULT', 'confess'; 1453*0Sstevel@tonic-gate 1454*0Sstevel@tonic-gatesub HEAP () { 0 } 1455*0Sstevel@tonic-gatesub HASH () { 1 } 1456*0Sstevel@tonic-gatesub MAX () { 2 } 1457*0Sstevel@tonic-gatesub BYTES() { 3 } 1458*0Sstevel@tonic-gate#sub STAT () { 4 } # Array with request statistics for each record 1459*0Sstevel@tonic-gate#sub MISS () { 5 } # Total number of cache misses 1460*0Sstevel@tonic-gate#sub REQ () { 6 } # Total number of cache requests 1461*0Sstevel@tonic-gateuse strict 'vars'; 1462*0Sstevel@tonic-gate 1463*0Sstevel@tonic-gatesub new { 1464*0Sstevel@tonic-gate my ($pack, $max) = @_; 1465*0Sstevel@tonic-gate local *_; 1466*0Sstevel@tonic-gate croak "missing argument to ->new" unless defined $max; 1467*0Sstevel@tonic-gate my $self = []; 1468*0Sstevel@tonic-gate bless $self => $pack; 1469*0Sstevel@tonic-gate @$self = (Tie::File::Heap->new($self), {}, $max, 0); 1470*0Sstevel@tonic-gate $self; 1471*0Sstevel@tonic-gate} 1472*0Sstevel@tonic-gate 1473*0Sstevel@tonic-gatesub adj_limit { 1474*0Sstevel@tonic-gate my ($self, $n) = @_; 1475*0Sstevel@tonic-gate $self->[MAX] += $n; 1476*0Sstevel@tonic-gate} 1477*0Sstevel@tonic-gate 1478*0Sstevel@tonic-gatesub set_limit { 1479*0Sstevel@tonic-gate my ($self, $n) = @_; 1480*0Sstevel@tonic-gate $self->[MAX] = $n; 1481*0Sstevel@tonic-gate} 1482*0Sstevel@tonic-gate 1483*0Sstevel@tonic-gate# For internal use only 1484*0Sstevel@tonic-gate# Will be called by the heap structure to notify us that a certain 1485*0Sstevel@tonic-gate# piece of data has moved from one heap element to another. 1486*0Sstevel@tonic-gate# $k is the hash key of the item 1487*0Sstevel@tonic-gate# $n is the new index into the heap at which it is stored 1488*0Sstevel@tonic-gate# If $n is undefined, the item has been removed from the heap. 1489*0Sstevel@tonic-gatesub _heap_move { 1490*0Sstevel@tonic-gate my ($self, $k, $n) = @_; 1491*0Sstevel@tonic-gate if (defined $n) { 1492*0Sstevel@tonic-gate $self->[HASH]{$k} = $n; 1493*0Sstevel@tonic-gate } else { 1494*0Sstevel@tonic-gate delete $self->[HASH]{$k}; 1495*0Sstevel@tonic-gate } 1496*0Sstevel@tonic-gate} 1497*0Sstevel@tonic-gate 1498*0Sstevel@tonic-gatesub insert { 1499*0Sstevel@tonic-gate my ($self, $key, $val) = @_; 1500*0Sstevel@tonic-gate local *_; 1501*0Sstevel@tonic-gate croak "missing argument to ->insert" unless defined $key; 1502*0Sstevel@tonic-gate unless (defined $self->[MAX]) { 1503*0Sstevel@tonic-gate confess "undefined max" ; 1504*0Sstevel@tonic-gate } 1505*0Sstevel@tonic-gate confess "undefined val" unless defined $val; 1506*0Sstevel@tonic-gate return if length($val) > $self->[MAX]; 1507*0Sstevel@tonic-gate 1508*0Sstevel@tonic-gate# if ($self->[STAT]) { 1509*0Sstevel@tonic-gate# $self->[STAT][$key] = 1; 1510*0Sstevel@tonic-gate# return; 1511*0Sstevel@tonic-gate# } 1512*0Sstevel@tonic-gate 1513*0Sstevel@tonic-gate my $oldnode = $self->[HASH]{$key}; 1514*0Sstevel@tonic-gate if (defined $oldnode) { 1515*0Sstevel@tonic-gate my $oldval = $self->[HEAP]->set_val($oldnode, $val); 1516*0Sstevel@tonic-gate $self->[BYTES] -= length($oldval); 1517*0Sstevel@tonic-gate } else { 1518*0Sstevel@tonic-gate $self->[HEAP]->insert($key, $val); 1519*0Sstevel@tonic-gate } 1520*0Sstevel@tonic-gate $self->[BYTES] += length($val); 1521*0Sstevel@tonic-gate $self->flush if $self->[BYTES] > $self->[MAX]; 1522*0Sstevel@tonic-gate} 1523*0Sstevel@tonic-gate 1524*0Sstevel@tonic-gatesub expire { 1525*0Sstevel@tonic-gate my $self = shift; 1526*0Sstevel@tonic-gate my $old_data = $self->[HEAP]->popheap; 1527*0Sstevel@tonic-gate return unless defined $old_data; 1528*0Sstevel@tonic-gate $self->[BYTES] -= length $old_data; 1529*0Sstevel@tonic-gate $old_data; 1530*0Sstevel@tonic-gate} 1531*0Sstevel@tonic-gate 1532*0Sstevel@tonic-gatesub remove { 1533*0Sstevel@tonic-gate my ($self, @keys) = @_; 1534*0Sstevel@tonic-gate my @result; 1535*0Sstevel@tonic-gate 1536*0Sstevel@tonic-gate# if ($self->[STAT]) { 1537*0Sstevel@tonic-gate# for my $key (@keys) { 1538*0Sstevel@tonic-gate# $self->[STAT][$key] = 0; 1539*0Sstevel@tonic-gate# } 1540*0Sstevel@tonic-gate# return; 1541*0Sstevel@tonic-gate# } 1542*0Sstevel@tonic-gate 1543*0Sstevel@tonic-gate for my $key (@keys) { 1544*0Sstevel@tonic-gate next unless exists $self->[HASH]{$key}; 1545*0Sstevel@tonic-gate my $old_data = $self->[HEAP]->remove($self->[HASH]{$key}); 1546*0Sstevel@tonic-gate $self->[BYTES] -= length $old_data; 1547*0Sstevel@tonic-gate push @result, $old_data; 1548*0Sstevel@tonic-gate } 1549*0Sstevel@tonic-gate @result; 1550*0Sstevel@tonic-gate} 1551*0Sstevel@tonic-gate 1552*0Sstevel@tonic-gatesub lookup { 1553*0Sstevel@tonic-gate my ($self, $key) = @_; 1554*0Sstevel@tonic-gate local *_; 1555*0Sstevel@tonic-gate croak "missing argument to ->lookup" unless defined $key; 1556*0Sstevel@tonic-gate 1557*0Sstevel@tonic-gate# if ($self->[STAT]) { 1558*0Sstevel@tonic-gate# $self->[MISS]++ if $self->[STAT][$key]++ == 0; 1559*0Sstevel@tonic-gate# $self->[REQ]++; 1560*0Sstevel@tonic-gate# my $hit_rate = 1 - $self->[MISS] / $self->[REQ]; 1561*0Sstevel@tonic-gate# # Do some testing to determine this threshhold 1562*0Sstevel@tonic-gate# $#$self = STAT - 1 if $hit_rate > 0.20; 1563*0Sstevel@tonic-gate# } 1564*0Sstevel@tonic-gate 1565*0Sstevel@tonic-gate if (exists $self->[HASH]{$key}) { 1566*0Sstevel@tonic-gate $self->[HEAP]->lookup($self->[HASH]{$key}); 1567*0Sstevel@tonic-gate } else { 1568*0Sstevel@tonic-gate return; 1569*0Sstevel@tonic-gate } 1570*0Sstevel@tonic-gate} 1571*0Sstevel@tonic-gate 1572*0Sstevel@tonic-gate# For internal use only 1573*0Sstevel@tonic-gatesub _produce { 1574*0Sstevel@tonic-gate my ($self, $key) = @_; 1575*0Sstevel@tonic-gate my $loc = $self->[HASH]{$key}; 1576*0Sstevel@tonic-gate return unless defined $loc; 1577*0Sstevel@tonic-gate $self->[HEAP][$loc][2]; 1578*0Sstevel@tonic-gate} 1579*0Sstevel@tonic-gate 1580*0Sstevel@tonic-gate# For internal use only 1581*0Sstevel@tonic-gatesub _promote { 1582*0Sstevel@tonic-gate my ($self, $key) = @_; 1583*0Sstevel@tonic-gate $self->[HEAP]->promote($self->[HASH]{$key}); 1584*0Sstevel@tonic-gate} 1585*0Sstevel@tonic-gate 1586*0Sstevel@tonic-gatesub empty { 1587*0Sstevel@tonic-gate my ($self) = @_; 1588*0Sstevel@tonic-gate %{$self->[HASH]} = (); 1589*0Sstevel@tonic-gate $self->[BYTES] = 0; 1590*0Sstevel@tonic-gate $self->[HEAP]->empty; 1591*0Sstevel@tonic-gate# @{$self->[STAT]} = (); 1592*0Sstevel@tonic-gate# $self->[MISS] = 0; 1593*0Sstevel@tonic-gate# $self->[REQ] = 0; 1594*0Sstevel@tonic-gate} 1595*0Sstevel@tonic-gate 1596*0Sstevel@tonic-gatesub is_empty { 1597*0Sstevel@tonic-gate my ($self) = @_; 1598*0Sstevel@tonic-gate keys %{$self->[HASH]} == 0; 1599*0Sstevel@tonic-gate} 1600*0Sstevel@tonic-gate 1601*0Sstevel@tonic-gatesub update { 1602*0Sstevel@tonic-gate my ($self, $key, $val) = @_; 1603*0Sstevel@tonic-gate local *_; 1604*0Sstevel@tonic-gate croak "missing argument to ->update" unless defined $key; 1605*0Sstevel@tonic-gate if (length($val) > $self->[MAX]) { 1606*0Sstevel@tonic-gate my ($oldval) = $self->remove($key); 1607*0Sstevel@tonic-gate $self->[BYTES] -= length($oldval) if defined $oldval; 1608*0Sstevel@tonic-gate } elsif (exists $self->[HASH]{$key}) { 1609*0Sstevel@tonic-gate my $oldval = $self->[HEAP]->set_val($self->[HASH]{$key}, $val); 1610*0Sstevel@tonic-gate $self->[BYTES] += length($val); 1611*0Sstevel@tonic-gate $self->[BYTES] -= length($oldval) if defined $oldval; 1612*0Sstevel@tonic-gate } else { 1613*0Sstevel@tonic-gate $self->[HEAP]->insert($key, $val); 1614*0Sstevel@tonic-gate $self->[BYTES] += length($val); 1615*0Sstevel@tonic-gate } 1616*0Sstevel@tonic-gate $self->flush; 1617*0Sstevel@tonic-gate} 1618*0Sstevel@tonic-gate 1619*0Sstevel@tonic-gatesub rekey { 1620*0Sstevel@tonic-gate my ($self, $okeys, $nkeys) = @_; 1621*0Sstevel@tonic-gate local *_; 1622*0Sstevel@tonic-gate my %map; 1623*0Sstevel@tonic-gate @map{@$okeys} = @$nkeys; 1624*0Sstevel@tonic-gate croak "missing argument to ->rekey" unless defined $nkeys; 1625*0Sstevel@tonic-gate croak "length mismatch in ->rekey arguments" unless @$nkeys == @$okeys; 1626*0Sstevel@tonic-gate my %adjusted; # map new keys to heap indices 1627*0Sstevel@tonic-gate # You should be able to cut this to one loop TODO XXX 1628*0Sstevel@tonic-gate for (0 .. $#$okeys) { 1629*0Sstevel@tonic-gate $adjusted{$nkeys->[$_]} = delete $self->[HASH]{$okeys->[$_]}; 1630*0Sstevel@tonic-gate } 1631*0Sstevel@tonic-gate while (my ($nk, $ix) = each %adjusted) { 1632*0Sstevel@tonic-gate # @{$self->[HASH]}{keys %adjusted} = values %adjusted; 1633*0Sstevel@tonic-gate $self->[HEAP]->rekey($ix, $nk); 1634*0Sstevel@tonic-gate $self->[HASH]{$nk} = $ix; 1635*0Sstevel@tonic-gate } 1636*0Sstevel@tonic-gate} 1637*0Sstevel@tonic-gate 1638*0Sstevel@tonic-gatesub ckeys { 1639*0Sstevel@tonic-gate my $self = shift; 1640*0Sstevel@tonic-gate my @a = keys %{$self->[HASH]}; 1641*0Sstevel@tonic-gate @a; 1642*0Sstevel@tonic-gate} 1643*0Sstevel@tonic-gate 1644*0Sstevel@tonic-gate# Return total amount of cached data 1645*0Sstevel@tonic-gatesub bytes { 1646*0Sstevel@tonic-gate my $self = shift; 1647*0Sstevel@tonic-gate $self->[BYTES]; 1648*0Sstevel@tonic-gate} 1649*0Sstevel@tonic-gate 1650*0Sstevel@tonic-gate# Expire oldest item from cache until cache size is smaller than $max 1651*0Sstevel@tonic-gatesub reduce_size_to { 1652*0Sstevel@tonic-gate my ($self, $max) = @_; 1653*0Sstevel@tonic-gate until ($self->[BYTES] <= $max) { 1654*0Sstevel@tonic-gate # Note that Tie::File::Cache::expire has been inlined here 1655*0Sstevel@tonic-gate my $old_data = $self->[HEAP]->popheap; 1656*0Sstevel@tonic-gate return unless defined $old_data; 1657*0Sstevel@tonic-gate $self->[BYTES] -= length $old_data; 1658*0Sstevel@tonic-gate } 1659*0Sstevel@tonic-gate} 1660*0Sstevel@tonic-gate 1661*0Sstevel@tonic-gate# Why not just $self->reduce_size_to($self->[MAX])? 1662*0Sstevel@tonic-gate# Try this when things stabilize TODO XXX 1663*0Sstevel@tonic-gate# If the cache is too full, expire the oldest records 1664*0Sstevel@tonic-gatesub flush { 1665*0Sstevel@tonic-gate my $self = shift; 1666*0Sstevel@tonic-gate $self->reduce_size_to($self->[MAX]) if $self->[BYTES] > $self->[MAX]; 1667*0Sstevel@tonic-gate} 1668*0Sstevel@tonic-gate 1669*0Sstevel@tonic-gate# For internal use only 1670*0Sstevel@tonic-gatesub _produce_lru { 1671*0Sstevel@tonic-gate my $self = shift; 1672*0Sstevel@tonic-gate $self->[HEAP]->expire_order; 1673*0Sstevel@tonic-gate} 1674*0Sstevel@tonic-gate 1675*0Sstevel@tonic-gateBEGIN { *_ci_warn = \&Tie::File::_ci_warn } 1676*0Sstevel@tonic-gate 1677*0Sstevel@tonic-gatesub _check_integrity { # For CACHE 1678*0Sstevel@tonic-gate my $self = shift; 1679*0Sstevel@tonic-gate my $good = 1; 1680*0Sstevel@tonic-gate 1681*0Sstevel@tonic-gate # Test HEAP 1682*0Sstevel@tonic-gate $self->[HEAP]->_check_integrity or $good = 0; 1683*0Sstevel@tonic-gate 1684*0Sstevel@tonic-gate # Test HASH 1685*0Sstevel@tonic-gate my $bytes = 0; 1686*0Sstevel@tonic-gate for my $k (keys %{$self->[HASH]}) { 1687*0Sstevel@tonic-gate if ($k ne '0' && $k !~ /^[1-9][0-9]*$/) { 1688*0Sstevel@tonic-gate $good = 0; 1689*0Sstevel@tonic-gate _ci_warn "Cache hash key <$k> is non-numeric"; 1690*0Sstevel@tonic-gate } 1691*0Sstevel@tonic-gate 1692*0Sstevel@tonic-gate my $h = $self->[HASH]{$k}; 1693*0Sstevel@tonic-gate if (! defined $h) { 1694*0Sstevel@tonic-gate $good = 0; 1695*0Sstevel@tonic-gate _ci_warn "Heap index number for key $k is undefined"; 1696*0Sstevel@tonic-gate } elsif ($h == 0) { 1697*0Sstevel@tonic-gate $good = 0; 1698*0Sstevel@tonic-gate _ci_warn "Heap index number for key $k is zero"; 1699*0Sstevel@tonic-gate } else { 1700*0Sstevel@tonic-gate my $j = $self->[HEAP][$h]; 1701*0Sstevel@tonic-gate if (! defined $j) { 1702*0Sstevel@tonic-gate $good = 0; 1703*0Sstevel@tonic-gate _ci_warn "Heap contents key $k (=> $h) are undefined"; 1704*0Sstevel@tonic-gate } else { 1705*0Sstevel@tonic-gate $bytes += length($j->[2]); 1706*0Sstevel@tonic-gate if ($k ne $j->[1]) { 1707*0Sstevel@tonic-gate $good = 0; 1708*0Sstevel@tonic-gate _ci_warn "Heap contents key $k (=> $h) is $j->[1], should be $k"; 1709*0Sstevel@tonic-gate } 1710*0Sstevel@tonic-gate } 1711*0Sstevel@tonic-gate } 1712*0Sstevel@tonic-gate } 1713*0Sstevel@tonic-gate 1714*0Sstevel@tonic-gate # Test BYTES 1715*0Sstevel@tonic-gate if ($bytes != $self->[BYTES]) { 1716*0Sstevel@tonic-gate $good = 0; 1717*0Sstevel@tonic-gate _ci_warn "Total data in cache is $bytes, expected $self->[BYTES]"; 1718*0Sstevel@tonic-gate } 1719*0Sstevel@tonic-gate 1720*0Sstevel@tonic-gate # Test MAX 1721*0Sstevel@tonic-gate if ($bytes > $self->[MAX]) { 1722*0Sstevel@tonic-gate $good = 0; 1723*0Sstevel@tonic-gate _ci_warn "Total data in cache is $bytes, exceeds maximum $self->[MAX]"; 1724*0Sstevel@tonic-gate } 1725*0Sstevel@tonic-gate 1726*0Sstevel@tonic-gate return $good; 1727*0Sstevel@tonic-gate} 1728*0Sstevel@tonic-gate 1729*0Sstevel@tonic-gatesub delink { 1730*0Sstevel@tonic-gate my $self = shift; 1731*0Sstevel@tonic-gate $self->[HEAP] = undef; # Bye bye heap 1732*0Sstevel@tonic-gate} 1733*0Sstevel@tonic-gate 1734*0Sstevel@tonic-gate################################################################ 1735*0Sstevel@tonic-gate# 1736*0Sstevel@tonic-gate# Tie::File::Heap 1737*0Sstevel@tonic-gate# 1738*0Sstevel@tonic-gate# Heap data structure for use by cache LRU routines 1739*0Sstevel@tonic-gate 1740*0Sstevel@tonic-gatepackage Tie::File::Heap; 1741*0Sstevel@tonic-gateuse Carp ':DEFAULT', 'confess'; 1742*0Sstevel@tonic-gate$Tie::File::Heap::VERSION = $Tie::File::Cache::VERSION; 1743*0Sstevel@tonic-gatesub SEQ () { 0 }; 1744*0Sstevel@tonic-gatesub KEY () { 1 }; 1745*0Sstevel@tonic-gatesub DAT () { 2 }; 1746*0Sstevel@tonic-gate 1747*0Sstevel@tonic-gatesub new { 1748*0Sstevel@tonic-gate my ($pack, $cache) = @_; 1749*0Sstevel@tonic-gate die "$pack: Parent cache object $cache does not support _heap_move method" 1750*0Sstevel@tonic-gate unless eval { $cache->can('_heap_move') }; 1751*0Sstevel@tonic-gate my $self = [[0,$cache,0]]; 1752*0Sstevel@tonic-gate bless $self => $pack; 1753*0Sstevel@tonic-gate} 1754*0Sstevel@tonic-gate 1755*0Sstevel@tonic-gate# Allocate a new sequence number, larger than all previously allocated numbers 1756*0Sstevel@tonic-gatesub _nseq { 1757*0Sstevel@tonic-gate my $self = shift; 1758*0Sstevel@tonic-gate $self->[0][0]++; 1759*0Sstevel@tonic-gate} 1760*0Sstevel@tonic-gate 1761*0Sstevel@tonic-gatesub _cache { 1762*0Sstevel@tonic-gate my $self = shift; 1763*0Sstevel@tonic-gate $self->[0][1]; 1764*0Sstevel@tonic-gate} 1765*0Sstevel@tonic-gate 1766*0Sstevel@tonic-gatesub _nelts { 1767*0Sstevel@tonic-gate my $self = shift; 1768*0Sstevel@tonic-gate $self->[0][2]; 1769*0Sstevel@tonic-gate} 1770*0Sstevel@tonic-gate 1771*0Sstevel@tonic-gatesub _nelts_inc { 1772*0Sstevel@tonic-gate my $self = shift; 1773*0Sstevel@tonic-gate ++$self->[0][2]; 1774*0Sstevel@tonic-gate} 1775*0Sstevel@tonic-gate 1776*0Sstevel@tonic-gatesub _nelts_dec { 1777*0Sstevel@tonic-gate my $self = shift; 1778*0Sstevel@tonic-gate --$self->[0][2]; 1779*0Sstevel@tonic-gate} 1780*0Sstevel@tonic-gate 1781*0Sstevel@tonic-gatesub is_empty { 1782*0Sstevel@tonic-gate my $self = shift; 1783*0Sstevel@tonic-gate $self->_nelts == 0; 1784*0Sstevel@tonic-gate} 1785*0Sstevel@tonic-gate 1786*0Sstevel@tonic-gatesub empty { 1787*0Sstevel@tonic-gate my $self = shift; 1788*0Sstevel@tonic-gate $#$self = 0; 1789*0Sstevel@tonic-gate $self->[0][2] = 0; 1790*0Sstevel@tonic-gate $self->[0][0] = 0; # might as well reset the sequence numbers 1791*0Sstevel@tonic-gate} 1792*0Sstevel@tonic-gate 1793*0Sstevel@tonic-gate# notify the parent cache object that we moved something 1794*0Sstevel@tonic-gatesub _heap_move { 1795*0Sstevel@tonic-gate my $self = shift; 1796*0Sstevel@tonic-gate $self->_cache->_heap_move(@_); 1797*0Sstevel@tonic-gate} 1798*0Sstevel@tonic-gate 1799*0Sstevel@tonic-gate# Insert a piece of data into the heap with the indicated sequence number. 1800*0Sstevel@tonic-gate# The item with the smallest sequence number is always at the top. 1801*0Sstevel@tonic-gate# If no sequence number is specified, allocate a new one and insert the 1802*0Sstevel@tonic-gate# item at the bottom. 1803*0Sstevel@tonic-gatesub insert { 1804*0Sstevel@tonic-gate my ($self, $key, $data, $seq) = @_; 1805*0Sstevel@tonic-gate $seq = $self->_nseq unless defined $seq; 1806*0Sstevel@tonic-gate $self->_insert_new([$seq, $key, $data]); 1807*0Sstevel@tonic-gate} 1808*0Sstevel@tonic-gate 1809*0Sstevel@tonic-gate# Insert a new, fresh item at the bottom of the heap 1810*0Sstevel@tonic-gatesub _insert_new { 1811*0Sstevel@tonic-gate my ($self, $item) = @_; 1812*0Sstevel@tonic-gate my $i = @$self; 1813*0Sstevel@tonic-gate $i = int($i/2) until defined $self->[$i/2]; 1814*0Sstevel@tonic-gate $self->[$i] = $item; 1815*0Sstevel@tonic-gate $self->[0][1]->_heap_move($self->[$i][KEY], $i); 1816*0Sstevel@tonic-gate $self->_nelts_inc; 1817*0Sstevel@tonic-gate} 1818*0Sstevel@tonic-gate 1819*0Sstevel@tonic-gate# Insert [$data, $seq] pair at or below item $i in the heap. 1820*0Sstevel@tonic-gate# If $i is omitted, default to 1 (the top element.) 1821*0Sstevel@tonic-gatesub _insert { 1822*0Sstevel@tonic-gate my ($self, $item, $i) = @_; 1823*0Sstevel@tonic-gate# $self->_check_loc($i) if defined $i; 1824*0Sstevel@tonic-gate $i = 1 unless defined $i; 1825*0Sstevel@tonic-gate until (! defined $self->[$i]) { 1826*0Sstevel@tonic-gate if ($self->[$i][SEQ] > $item->[SEQ]) { # inserted item is older 1827*0Sstevel@tonic-gate ($self->[$i], $item) = ($item, $self->[$i]); 1828*0Sstevel@tonic-gate $self->[0][1]->_heap_move($self->[$i][KEY], $i); 1829*0Sstevel@tonic-gate } 1830*0Sstevel@tonic-gate # If either is undefined, go that way. Otherwise, choose at random 1831*0Sstevel@tonic-gate my $dir; 1832*0Sstevel@tonic-gate $dir = 0 if !defined $self->[2*$i]; 1833*0Sstevel@tonic-gate $dir = 1 if !defined $self->[2*$i+1]; 1834*0Sstevel@tonic-gate $dir = int(rand(2)) unless defined $dir; 1835*0Sstevel@tonic-gate $i = 2*$i + $dir; 1836*0Sstevel@tonic-gate } 1837*0Sstevel@tonic-gate $self->[$i] = $item; 1838*0Sstevel@tonic-gate $self->[0][1]->_heap_move($self->[$i][KEY], $i); 1839*0Sstevel@tonic-gate $self->_nelts_inc; 1840*0Sstevel@tonic-gate} 1841*0Sstevel@tonic-gate 1842*0Sstevel@tonic-gate# Remove the item at node $i from the heap, moving child items upwards. 1843*0Sstevel@tonic-gate# The item with the smallest sequence number is always at the top. 1844*0Sstevel@tonic-gate# Moving items upwards maintains this condition. 1845*0Sstevel@tonic-gate# Return the removed item. Return undef if there was no item at node $i. 1846*0Sstevel@tonic-gatesub remove { 1847*0Sstevel@tonic-gate my ($self, $i) = @_; 1848*0Sstevel@tonic-gate $i = 1 unless defined $i; 1849*0Sstevel@tonic-gate my $top = $self->[$i]; 1850*0Sstevel@tonic-gate return unless defined $top; 1851*0Sstevel@tonic-gate while (1) { 1852*0Sstevel@tonic-gate my $ii; 1853*0Sstevel@tonic-gate my ($L, $R) = (2*$i, 2*$i+1); 1854*0Sstevel@tonic-gate 1855*0Sstevel@tonic-gate # If either is undefined, go the other way. 1856*0Sstevel@tonic-gate # Otherwise, go towards the smallest. 1857*0Sstevel@tonic-gate last unless defined $self->[$L] || defined $self->[$R]; 1858*0Sstevel@tonic-gate $ii = $R if not defined $self->[$L]; 1859*0Sstevel@tonic-gate $ii = $L if not defined $self->[$R]; 1860*0Sstevel@tonic-gate unless (defined $ii) { 1861*0Sstevel@tonic-gate $ii = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R; 1862*0Sstevel@tonic-gate } 1863*0Sstevel@tonic-gate 1864*0Sstevel@tonic-gate $self->[$i] = $self->[$ii]; # Promote child to fill vacated spot 1865*0Sstevel@tonic-gate $self->[0][1]->_heap_move($self->[$i][KEY], $i); 1866*0Sstevel@tonic-gate $i = $ii; # Fill new vacated spot 1867*0Sstevel@tonic-gate } 1868*0Sstevel@tonic-gate $self->[0][1]->_heap_move($top->[KEY], undef); 1869*0Sstevel@tonic-gate undef $self->[$i]; 1870*0Sstevel@tonic-gate $self->_nelts_dec; 1871*0Sstevel@tonic-gate return $top->[DAT]; 1872*0Sstevel@tonic-gate} 1873*0Sstevel@tonic-gate 1874*0Sstevel@tonic-gatesub popheap { 1875*0Sstevel@tonic-gate my $self = shift; 1876*0Sstevel@tonic-gate $self->remove(1); 1877*0Sstevel@tonic-gate} 1878*0Sstevel@tonic-gate 1879*0Sstevel@tonic-gate# set the sequence number of the indicated item to a higher number 1880*0Sstevel@tonic-gate# than any other item in the heap, and bubble the item down to the 1881*0Sstevel@tonic-gate# bottom. 1882*0Sstevel@tonic-gatesub promote { 1883*0Sstevel@tonic-gate my ($self, $n) = @_; 1884*0Sstevel@tonic-gate# $self->_check_loc($n); 1885*0Sstevel@tonic-gate $self->[$n][SEQ] = $self->_nseq; 1886*0Sstevel@tonic-gate my $i = $n; 1887*0Sstevel@tonic-gate while (1) { 1888*0Sstevel@tonic-gate my ($L, $R) = (2*$i, 2*$i+1); 1889*0Sstevel@tonic-gate my $dir; 1890*0Sstevel@tonic-gate last unless defined $self->[$L] || defined $self->[$R]; 1891*0Sstevel@tonic-gate $dir = $R unless defined $self->[$L]; 1892*0Sstevel@tonic-gate $dir = $L unless defined $self->[$R]; 1893*0Sstevel@tonic-gate unless (defined $dir) { 1894*0Sstevel@tonic-gate $dir = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R; 1895*0Sstevel@tonic-gate } 1896*0Sstevel@tonic-gate @{$self}[$i, $dir] = @{$self}[$dir, $i]; 1897*0Sstevel@tonic-gate for ($i, $dir) { 1898*0Sstevel@tonic-gate $self->[0][1]->_heap_move($self->[$_][KEY], $_) if defined $self->[$_]; 1899*0Sstevel@tonic-gate } 1900*0Sstevel@tonic-gate $i = $dir; 1901*0Sstevel@tonic-gate } 1902*0Sstevel@tonic-gate} 1903*0Sstevel@tonic-gate 1904*0Sstevel@tonic-gate# Return item $n from the heap, promoting its LRU status 1905*0Sstevel@tonic-gatesub lookup { 1906*0Sstevel@tonic-gate my ($self, $n) = @_; 1907*0Sstevel@tonic-gate# $self->_check_loc($n); 1908*0Sstevel@tonic-gate my $val = $self->[$n]; 1909*0Sstevel@tonic-gate $self->promote($n); 1910*0Sstevel@tonic-gate $val->[DAT]; 1911*0Sstevel@tonic-gate} 1912*0Sstevel@tonic-gate 1913*0Sstevel@tonic-gate 1914*0Sstevel@tonic-gate# Assign a new value for node $n, promoting it to the bottom of the heap 1915*0Sstevel@tonic-gatesub set_val { 1916*0Sstevel@tonic-gate my ($self, $n, $val) = @_; 1917*0Sstevel@tonic-gate# $self->_check_loc($n); 1918*0Sstevel@tonic-gate my $oval = $self->[$n][DAT]; 1919*0Sstevel@tonic-gate $self->[$n][DAT] = $val; 1920*0Sstevel@tonic-gate $self->promote($n); 1921*0Sstevel@tonic-gate return $oval; 1922*0Sstevel@tonic-gate} 1923*0Sstevel@tonic-gate 1924*0Sstevel@tonic-gate# The hask key has changed for an item; 1925*0Sstevel@tonic-gate# alter the heap's record of the hash key 1926*0Sstevel@tonic-gatesub rekey { 1927*0Sstevel@tonic-gate my ($self, $n, $new_key) = @_; 1928*0Sstevel@tonic-gate# $self->_check_loc($n); 1929*0Sstevel@tonic-gate $self->[$n][KEY] = $new_key; 1930*0Sstevel@tonic-gate} 1931*0Sstevel@tonic-gate 1932*0Sstevel@tonic-gatesub _check_loc { 1933*0Sstevel@tonic-gate my ($self, $n) = @_; 1934*0Sstevel@tonic-gate unless (1 || defined $self->[$n]) { 1935*0Sstevel@tonic-gate confess "_check_loc($n) failed"; 1936*0Sstevel@tonic-gate } 1937*0Sstevel@tonic-gate} 1938*0Sstevel@tonic-gate 1939*0Sstevel@tonic-gateBEGIN { *_ci_warn = \&Tie::File::_ci_warn } 1940*0Sstevel@tonic-gate 1941*0Sstevel@tonic-gatesub _check_integrity { 1942*0Sstevel@tonic-gate my $self = shift; 1943*0Sstevel@tonic-gate my $good = 1; 1944*0Sstevel@tonic-gate my %seq; 1945*0Sstevel@tonic-gate 1946*0Sstevel@tonic-gate unless (eval {$self->[0][1]->isa("Tie::File::Cache")}) { 1947*0Sstevel@tonic-gate _ci_warn "Element 0 of heap corrupt"; 1948*0Sstevel@tonic-gate $good = 0; 1949*0Sstevel@tonic-gate } 1950*0Sstevel@tonic-gate $good = 0 unless $self->_satisfies_heap_condition(1); 1951*0Sstevel@tonic-gate for my $i (2 .. $#{$self}) { 1952*0Sstevel@tonic-gate my $p = int($i/2); # index of parent node 1953*0Sstevel@tonic-gate if (defined $self->[$i] && ! defined $self->[$p]) { 1954*0Sstevel@tonic-gate _ci_warn "Element $i of heap defined, but parent $p isn't"; 1955*0Sstevel@tonic-gate $good = 0; 1956*0Sstevel@tonic-gate } 1957*0Sstevel@tonic-gate 1958*0Sstevel@tonic-gate if (defined $self->[$i]) { 1959*0Sstevel@tonic-gate if ($seq{$self->[$i][SEQ]}) { 1960*0Sstevel@tonic-gate my $seq = $self->[$i][SEQ]; 1961*0Sstevel@tonic-gate _ci_warn "Nodes $i and $seq{$seq} both have SEQ=$seq"; 1962*0Sstevel@tonic-gate $good = 0; 1963*0Sstevel@tonic-gate } else { 1964*0Sstevel@tonic-gate $seq{$self->[$i][SEQ]} = $i; 1965*0Sstevel@tonic-gate } 1966*0Sstevel@tonic-gate } 1967*0Sstevel@tonic-gate } 1968*0Sstevel@tonic-gate 1969*0Sstevel@tonic-gate return $good; 1970*0Sstevel@tonic-gate} 1971*0Sstevel@tonic-gate 1972*0Sstevel@tonic-gatesub _satisfies_heap_condition { 1973*0Sstevel@tonic-gate my $self = shift; 1974*0Sstevel@tonic-gate my $n = shift || 1; 1975*0Sstevel@tonic-gate my $good = 1; 1976*0Sstevel@tonic-gate for (0, 1) { 1977*0Sstevel@tonic-gate my $c = $n*2 + $_; 1978*0Sstevel@tonic-gate next unless defined $self->[$c]; 1979*0Sstevel@tonic-gate if ($self->[$n][SEQ] >= $self->[$c]) { 1980*0Sstevel@tonic-gate _ci_warn "Node $n of heap does not predate node $c"; 1981*0Sstevel@tonic-gate $good = 0 ; 1982*0Sstevel@tonic-gate } 1983*0Sstevel@tonic-gate $good = 0 unless $self->_satisfies_heap_condition($c); 1984*0Sstevel@tonic-gate } 1985*0Sstevel@tonic-gate return $good; 1986*0Sstevel@tonic-gate} 1987*0Sstevel@tonic-gate 1988*0Sstevel@tonic-gate# Return a list of all the values, sorted by expiration order 1989*0Sstevel@tonic-gatesub expire_order { 1990*0Sstevel@tonic-gate my $self = shift; 1991*0Sstevel@tonic-gate my @nodes = sort {$a->[SEQ] <=> $b->[SEQ]} $self->_nodes; 1992*0Sstevel@tonic-gate map { $_->[KEY] } @nodes; 1993*0Sstevel@tonic-gate} 1994*0Sstevel@tonic-gate 1995*0Sstevel@tonic-gatesub _nodes { 1996*0Sstevel@tonic-gate my $self = shift; 1997*0Sstevel@tonic-gate my $i = shift || 1; 1998*0Sstevel@tonic-gate return unless defined $self->[$i]; 1999*0Sstevel@tonic-gate ($self->[$i], $self->_nodes($i*2), $self->_nodes($i*2+1)); 2000*0Sstevel@tonic-gate} 2001*0Sstevel@tonic-gate 2002*0Sstevel@tonic-gate"Cogito, ergo sum."; # don't forget to return a true value from the file 2003*0Sstevel@tonic-gate 2004*0Sstevel@tonic-gate__END__ 2005*0Sstevel@tonic-gate 2006*0Sstevel@tonic-gate=head1 NAME 2007*0Sstevel@tonic-gate 2008*0Sstevel@tonic-gateTie::File - Access the lines of a disk file via a Perl array 2009*0Sstevel@tonic-gate 2010*0Sstevel@tonic-gate=head1 SYNOPSIS 2011*0Sstevel@tonic-gate 2012*0Sstevel@tonic-gate # This file documents Tie::File version 0.97 2013*0Sstevel@tonic-gate use Tie::File; 2014*0Sstevel@tonic-gate 2015*0Sstevel@tonic-gate tie @array, 'Tie::File', filename or die ...; 2016*0Sstevel@tonic-gate 2017*0Sstevel@tonic-gate $array[13] = 'blah'; # line 13 of the file is now 'blah' 2018*0Sstevel@tonic-gate print $array[42]; # display line 42 of the file 2019*0Sstevel@tonic-gate 2020*0Sstevel@tonic-gate $n_recs = @array; # how many records are in the file? 2021*0Sstevel@tonic-gate $#array -= 2; # chop two records off the end 2022*0Sstevel@tonic-gate 2023*0Sstevel@tonic-gate 2024*0Sstevel@tonic-gate for (@array) { 2025*0Sstevel@tonic-gate s/PERL/Perl/g; # Replace PERL with Perl everywhere in the file 2026*0Sstevel@tonic-gate } 2027*0Sstevel@tonic-gate 2028*0Sstevel@tonic-gate # These are just like regular push, pop, unshift, shift, and splice 2029*0Sstevel@tonic-gate # Except that they modify the file in the way you would expect 2030*0Sstevel@tonic-gate 2031*0Sstevel@tonic-gate push @array, new recs...; 2032*0Sstevel@tonic-gate my $r1 = pop @array; 2033*0Sstevel@tonic-gate unshift @array, new recs...; 2034*0Sstevel@tonic-gate my $r2 = shift @array; 2035*0Sstevel@tonic-gate @old_recs = splice @array, 3, 7, new recs...; 2036*0Sstevel@tonic-gate 2037*0Sstevel@tonic-gate untie @array; # all finished 2038*0Sstevel@tonic-gate 2039*0Sstevel@tonic-gate 2040*0Sstevel@tonic-gate=head1 DESCRIPTION 2041*0Sstevel@tonic-gate 2042*0Sstevel@tonic-gateC<Tie::File> represents a regular text file as a Perl array. Each 2043*0Sstevel@tonic-gateelement in the array corresponds to a record in the file. The first 2044*0Sstevel@tonic-gateline of the file is element 0 of the array; the second line is element 2045*0Sstevel@tonic-gate1, and so on. 2046*0Sstevel@tonic-gate 2047*0Sstevel@tonic-gateThe file is I<not> loaded into memory, so this will work even for 2048*0Sstevel@tonic-gategigantic files. 2049*0Sstevel@tonic-gate 2050*0Sstevel@tonic-gateChanges to the array are reflected in the file immediately. 2051*0Sstevel@tonic-gate 2052*0Sstevel@tonic-gateLazy people and beginners may now stop reading the manual. 2053*0Sstevel@tonic-gate 2054*0Sstevel@tonic-gate=head2 C<recsep> 2055*0Sstevel@tonic-gate 2056*0Sstevel@tonic-gateWhat is a 'record'? By default, the meaning is the same as for the 2057*0Sstevel@tonic-gateC<E<lt>...E<gt>> operator: It's a string terminated by C<$/>, which is 2058*0Sstevel@tonic-gateprobably C<"\n">. (Minor exception: on DOS and Win32 systems, a 2059*0Sstevel@tonic-gate'record' is a string terminated by C<"\r\n">.) You may change the 2060*0Sstevel@tonic-gatedefinition of "record" by supplying the C<recsep> option in the C<tie> 2061*0Sstevel@tonic-gatecall: 2062*0Sstevel@tonic-gate 2063*0Sstevel@tonic-gate tie @array, 'Tie::File', $file, recsep => 'es'; 2064*0Sstevel@tonic-gate 2065*0Sstevel@tonic-gateThis says that records are delimited by the string C<es>. If the file 2066*0Sstevel@tonic-gatecontained the following data: 2067*0Sstevel@tonic-gate 2068*0Sstevel@tonic-gate Curse these pesky flies!\n 2069*0Sstevel@tonic-gate 2070*0Sstevel@tonic-gatethen the C<@array> would appear to have four elements: 2071*0Sstevel@tonic-gate 2072*0Sstevel@tonic-gate "Curse th" 2073*0Sstevel@tonic-gate "e p" 2074*0Sstevel@tonic-gate "ky fli" 2075*0Sstevel@tonic-gate "!\n" 2076*0Sstevel@tonic-gate 2077*0Sstevel@tonic-gateAn undefined value is not permitted as a record separator. Perl's 2078*0Sstevel@tonic-gatespecial "paragraph mode" semantics (E<agrave> la C<$/ = "">) are not 2079*0Sstevel@tonic-gateemulated. 2080*0Sstevel@tonic-gate 2081*0Sstevel@tonic-gateRecords read from the tied array do not have the record separator 2082*0Sstevel@tonic-gatestring on the end; this is to allow 2083*0Sstevel@tonic-gate 2084*0Sstevel@tonic-gate $array[17] .= "extra"; 2085*0Sstevel@tonic-gate 2086*0Sstevel@tonic-gateto work as expected. 2087*0Sstevel@tonic-gate 2088*0Sstevel@tonic-gate(See L<"autochomp">, below.) Records stored into the array will have 2089*0Sstevel@tonic-gatethe record separator string appended before they are written to the 2090*0Sstevel@tonic-gatefile, if they don't have one already. For example, if the record 2091*0Sstevel@tonic-gateseparator string is C<"\n">, then the following two lines do exactly 2092*0Sstevel@tonic-gatethe same thing: 2093*0Sstevel@tonic-gate 2094*0Sstevel@tonic-gate $array[17] = "Cherry pie"; 2095*0Sstevel@tonic-gate $array[17] = "Cherry pie\n"; 2096*0Sstevel@tonic-gate 2097*0Sstevel@tonic-gateThe result is that the contents of line 17 of the file will be 2098*0Sstevel@tonic-gatereplaced with "Cherry pie"; a newline character will separate line 17 2099*0Sstevel@tonic-gatefrom line 18. This means that this code will do nothing: 2100*0Sstevel@tonic-gate 2101*0Sstevel@tonic-gate chomp $array[17]; 2102*0Sstevel@tonic-gate 2103*0Sstevel@tonic-gateBecause the C<chomp>ed value will have the separator reattached when 2104*0Sstevel@tonic-gateit is written back to the file. There is no way to create a file 2105*0Sstevel@tonic-gatewhose trailing record separator string is missing. 2106*0Sstevel@tonic-gate 2107*0Sstevel@tonic-gateInserting records that I<contain> the record separator string is not 2108*0Sstevel@tonic-gatesupported by this module. It will probably produce a reasonable 2109*0Sstevel@tonic-gateresult, but what this result will be may change in a future version. 2110*0Sstevel@tonic-gateUse 'splice' to insert records or to replace one record with several. 2111*0Sstevel@tonic-gate 2112*0Sstevel@tonic-gate=head2 C<autochomp> 2113*0Sstevel@tonic-gate 2114*0Sstevel@tonic-gateNormally, array elements have the record separator removed, so that if 2115*0Sstevel@tonic-gatethe file contains the text 2116*0Sstevel@tonic-gate 2117*0Sstevel@tonic-gate Gold 2118*0Sstevel@tonic-gate Frankincense 2119*0Sstevel@tonic-gate Myrrh 2120*0Sstevel@tonic-gate 2121*0Sstevel@tonic-gatethe tied array will appear to contain C<("Gold", "Frankincense", 2122*0Sstevel@tonic-gate"Myrrh")>. If you set C<autochomp> to a false value, the record 2123*0Sstevel@tonic-gateseparator will not be removed. If the file above was tied with 2124*0Sstevel@tonic-gate 2125*0Sstevel@tonic-gate tie @gifts, "Tie::File", $gifts, autochomp => 0; 2126*0Sstevel@tonic-gate 2127*0Sstevel@tonic-gatethen the array C<@gifts> would appear to contain C<("Gold\n", 2128*0Sstevel@tonic-gate"Frankincense\n", "Myrrh\n")>, or (on Win32 systems) C<("Gold\r\n", 2129*0Sstevel@tonic-gate"Frankincense\r\n", "Myrrh\r\n")>. 2130*0Sstevel@tonic-gate 2131*0Sstevel@tonic-gate=head2 C<mode> 2132*0Sstevel@tonic-gate 2133*0Sstevel@tonic-gateNormally, the specified file will be opened for read and write access, 2134*0Sstevel@tonic-gateand will be created if it does not exist. (That is, the flags 2135*0Sstevel@tonic-gateC<O_RDWR | O_CREAT> are supplied in the C<open> call.) If you want to 2136*0Sstevel@tonic-gatechange this, you may supply alternative flags in the C<mode> option. 2137*0Sstevel@tonic-gateSee L<Fcntl> for a listing of available flags. 2138*0Sstevel@tonic-gateFor example: 2139*0Sstevel@tonic-gate 2140*0Sstevel@tonic-gate # open the file if it exists, but fail if it does not exist 2141*0Sstevel@tonic-gate use Fcntl 'O_RDWR'; 2142*0Sstevel@tonic-gate tie @array, 'Tie::File', $file, mode => O_RDWR; 2143*0Sstevel@tonic-gate 2144*0Sstevel@tonic-gate # create the file if it does not exist 2145*0Sstevel@tonic-gate use Fcntl 'O_RDWR', 'O_CREAT'; 2146*0Sstevel@tonic-gate tie @array, 'Tie::File', $file, mode => O_RDWR | O_CREAT; 2147*0Sstevel@tonic-gate 2148*0Sstevel@tonic-gate # open an existing file in read-only mode 2149*0Sstevel@tonic-gate use Fcntl 'O_RDONLY'; 2150*0Sstevel@tonic-gate tie @array, 'Tie::File', $file, mode => O_RDONLY; 2151*0Sstevel@tonic-gate 2152*0Sstevel@tonic-gateOpening the data file in write-only or append mode is not supported. 2153*0Sstevel@tonic-gate 2154*0Sstevel@tonic-gate=head2 C<memory> 2155*0Sstevel@tonic-gate 2156*0Sstevel@tonic-gateThis is an upper limit on the amount of memory that C<Tie::File> will 2157*0Sstevel@tonic-gateconsume at any time while managing the file. This is used for two 2158*0Sstevel@tonic-gatethings: managing the I<read cache> and managing the I<deferred write 2159*0Sstevel@tonic-gatebuffer>. 2160*0Sstevel@tonic-gate 2161*0Sstevel@tonic-gateRecords read in from the file are cached, to avoid having to re-read 2162*0Sstevel@tonic-gatethem repeatedly. If you read the same record twice, the first time it 2163*0Sstevel@tonic-gatewill be stored in memory, and the second time it will be fetched from 2164*0Sstevel@tonic-gatethe I<read cache>. The amount of data in the read cache will not 2165*0Sstevel@tonic-gateexceed the value you specified for C<memory>. If C<Tie::File> wants 2166*0Sstevel@tonic-gateto cache a new record, but the read cache is full, it will make room 2167*0Sstevel@tonic-gateby expiring the least-recently visited records from the read cache. 2168*0Sstevel@tonic-gate 2169*0Sstevel@tonic-gateThe default memory limit is 2Mib. You can adjust the maximum read 2170*0Sstevel@tonic-gatecache size by supplying the C<memory> option. The argument is the 2171*0Sstevel@tonic-gatedesired cache size, in bytes. 2172*0Sstevel@tonic-gate 2173*0Sstevel@tonic-gate # I have a lot of memory, so use a large cache to speed up access 2174*0Sstevel@tonic-gate tie @array, 'Tie::File', $file, memory => 20_000_000; 2175*0Sstevel@tonic-gate 2176*0Sstevel@tonic-gateSetting the memory limit to 0 will inhibit caching; records will be 2177*0Sstevel@tonic-gatefetched from disk every time you examine them. 2178*0Sstevel@tonic-gate 2179*0Sstevel@tonic-gateThe C<memory> value is not an absolute or exact limit on the memory 2180*0Sstevel@tonic-gateused. C<Tie::File> objects contains some structures besides the read 2181*0Sstevel@tonic-gatecache and the deferred write buffer, whose sizes are not charged 2182*0Sstevel@tonic-gateagainst C<memory>. 2183*0Sstevel@tonic-gate 2184*0Sstevel@tonic-gateThe cache itself consumes about 310 bytes per cached record, so if 2185*0Sstevel@tonic-gateyour file has many short records, you may want to decrease the cache 2186*0Sstevel@tonic-gatememory limit, or else the cache overhead may exceed the size of the 2187*0Sstevel@tonic-gatecached data. 2188*0Sstevel@tonic-gate 2189*0Sstevel@tonic-gate 2190*0Sstevel@tonic-gate=head2 C<dw_size> 2191*0Sstevel@tonic-gate 2192*0Sstevel@tonic-gate(This is an advanced feature. Skip this section on first reading.) 2193*0Sstevel@tonic-gate 2194*0Sstevel@tonic-gateIf you use deferred writing (See L<"Deferred Writing">, below) then 2195*0Sstevel@tonic-gatedata you write into the array will not be written directly to the 2196*0Sstevel@tonic-gatefile; instead, it will be saved in the I<deferred write buffer> to be 2197*0Sstevel@tonic-gatewritten out later. Data in the deferred write buffer is also charged 2198*0Sstevel@tonic-gateagainst the memory limit you set with the C<memory> option. 2199*0Sstevel@tonic-gate 2200*0Sstevel@tonic-gateYou may set the C<dw_size> option to limit the amount of data that can 2201*0Sstevel@tonic-gatebe saved in the deferred write buffer. This limit may not exceed the 2202*0Sstevel@tonic-gatetotal memory limit. For example, if you set C<dw_size> to 1000 and 2203*0Sstevel@tonic-gateC<memory> to 2500, that means that no more than 1000 bytes of deferred 2204*0Sstevel@tonic-gatewrites will be saved up. The space available for the read cache will 2205*0Sstevel@tonic-gatevary, but it will always be at least 1500 bytes (if the deferred write 2206*0Sstevel@tonic-gatebuffer is full) and it could grow as large as 2500 bytes (if the 2207*0Sstevel@tonic-gatedeferred write buffer is empty.) 2208*0Sstevel@tonic-gate 2209*0Sstevel@tonic-gateIf you don't specify a C<dw_size>, it defaults to the entire memory 2210*0Sstevel@tonic-gatelimit. 2211*0Sstevel@tonic-gate 2212*0Sstevel@tonic-gate=head2 Option Format 2213*0Sstevel@tonic-gate 2214*0Sstevel@tonic-gateC<-mode> is a synonym for C<mode>. C<-recsep> is a synonym for 2215*0Sstevel@tonic-gateC<recsep>. C<-memory> is a synonym for C<memory>. You get the 2216*0Sstevel@tonic-gateidea. 2217*0Sstevel@tonic-gate 2218*0Sstevel@tonic-gate=head1 Public Methods 2219*0Sstevel@tonic-gate 2220*0Sstevel@tonic-gateThe C<tie> call returns an object, say C<$o>. You may call 2221*0Sstevel@tonic-gate 2222*0Sstevel@tonic-gate $rec = $o->FETCH($n); 2223*0Sstevel@tonic-gate $o->STORE($n, $rec); 2224*0Sstevel@tonic-gate 2225*0Sstevel@tonic-gateto fetch or store the record at line C<$n>, respectively; similarly 2226*0Sstevel@tonic-gatethe other tied array methods. (See L<perltie> for details.) You may 2227*0Sstevel@tonic-gatealso call the following methods on this object: 2228*0Sstevel@tonic-gate 2229*0Sstevel@tonic-gate=head2 C<flock> 2230*0Sstevel@tonic-gate 2231*0Sstevel@tonic-gate $o->flock(MODE) 2232*0Sstevel@tonic-gate 2233*0Sstevel@tonic-gatewill lock the tied file. C<MODE> has the same meaning as the second 2234*0Sstevel@tonic-gateargument to the Perl built-in C<flock> function; for example 2235*0Sstevel@tonic-gateC<LOCK_SH> or C<LOCK_EX | LOCK_NB>. (These constants are provided by 2236*0Sstevel@tonic-gatethe C<use Fcntl ':flock'> declaration.) 2237*0Sstevel@tonic-gate 2238*0Sstevel@tonic-gateC<MODE> is optional; the default is C<LOCK_EX>. 2239*0Sstevel@tonic-gate 2240*0Sstevel@tonic-gateC<Tie::File> maintains an internal table of the byte offset of each 2241*0Sstevel@tonic-gaterecord it has seen in the file. 2242*0Sstevel@tonic-gate 2243*0Sstevel@tonic-gateWhen you use C<flock> to lock the file, C<Tie::File> assumes that the 2244*0Sstevel@tonic-gateread cache is no longer trustworthy, because another process might 2245*0Sstevel@tonic-gatehave modified the file since the last time it was read. Therefore, a 2246*0Sstevel@tonic-gatesuccessful call to C<flock> discards the contents of the read cache 2247*0Sstevel@tonic-gateand the internal record offset table. 2248*0Sstevel@tonic-gate 2249*0Sstevel@tonic-gateC<Tie::File> promises that the following sequence of operations will 2250*0Sstevel@tonic-gatebe safe: 2251*0Sstevel@tonic-gate 2252*0Sstevel@tonic-gate my $o = tie @array, "Tie::File", $filename; 2253*0Sstevel@tonic-gate $o->flock; 2254*0Sstevel@tonic-gate 2255*0Sstevel@tonic-gateIn particular, C<Tie::File> will I<not> read or write the file during 2256*0Sstevel@tonic-gatethe C<tie> call. (Exception: Using C<mode =E<gt> O_TRUNC> will, of 2257*0Sstevel@tonic-gatecourse, erase the file during the C<tie> call. If you want to do this 2258*0Sstevel@tonic-gatesafely, then open the file without C<O_TRUNC>, lock the file, and use 2259*0Sstevel@tonic-gateC<@array = ()>.) 2260*0Sstevel@tonic-gate 2261*0Sstevel@tonic-gateThe best way to unlock a file is to discard the object and untie the 2262*0Sstevel@tonic-gatearray. It is probably unsafe to unlock the file without also untying 2263*0Sstevel@tonic-gateit, because if you do, changes may remain unwritten inside the object. 2264*0Sstevel@tonic-gateThat is why there is no shortcut for unlocking. If you really want to 2265*0Sstevel@tonic-gateunlock the file prematurely, you know what to do; if you don't know 2266*0Sstevel@tonic-gatewhat to do, then don't do it. 2267*0Sstevel@tonic-gate 2268*0Sstevel@tonic-gateAll the usual warnings about file locking apply here. In particular, 2269*0Sstevel@tonic-gatenote that file locking in Perl is B<advisory>, which means that 2270*0Sstevel@tonic-gateholding a lock will not prevent anyone else from reading, writing, or 2271*0Sstevel@tonic-gateerasing the file; it only prevents them from getting another lock at 2272*0Sstevel@tonic-gatethe same time. Locks are analogous to green traffic lights: If you 2273*0Sstevel@tonic-gatehave a green light, that does not prevent the idiot coming the other 2274*0Sstevel@tonic-gateway from plowing into you sideways; it merely guarantees to you that 2275*0Sstevel@tonic-gatethe idiot does not also have a green light at the same time. 2276*0Sstevel@tonic-gate 2277*0Sstevel@tonic-gate=head2 C<autochomp> 2278*0Sstevel@tonic-gate 2279*0Sstevel@tonic-gate my $old_value = $o->autochomp(0); # disable autochomp option 2280*0Sstevel@tonic-gate my $old_value = $o->autochomp(1); # enable autochomp option 2281*0Sstevel@tonic-gate 2282*0Sstevel@tonic-gate my $ac = $o->autochomp(); # recover current value 2283*0Sstevel@tonic-gate 2284*0Sstevel@tonic-gateSee L<"autochomp">, above. 2285*0Sstevel@tonic-gate 2286*0Sstevel@tonic-gate=head2 C<defer>, C<flush>, C<discard>, and C<autodefer> 2287*0Sstevel@tonic-gate 2288*0Sstevel@tonic-gateSee L<"Deferred Writing">, below. 2289*0Sstevel@tonic-gate 2290*0Sstevel@tonic-gate=head2 C<offset> 2291*0Sstevel@tonic-gate 2292*0Sstevel@tonic-gate $off = $o->offset($n); 2293*0Sstevel@tonic-gate 2294*0Sstevel@tonic-gateThis method returns the byte offset of the start of the C<$n>th record 2295*0Sstevel@tonic-gatein the file. If there is no such record, it returns an undefined 2296*0Sstevel@tonic-gatevalue. 2297*0Sstevel@tonic-gate 2298*0Sstevel@tonic-gate=head1 Tying to an already-opened filehandle 2299*0Sstevel@tonic-gate 2300*0Sstevel@tonic-gateIf C<$fh> is a filehandle, such as is returned by C<IO::File> or one 2301*0Sstevel@tonic-gateof the other C<IO> modules, you may use: 2302*0Sstevel@tonic-gate 2303*0Sstevel@tonic-gate tie @array, 'Tie::File', $fh, ...; 2304*0Sstevel@tonic-gate 2305*0Sstevel@tonic-gateSimilarly if you opened that handle C<FH> with regular C<open> or 2306*0Sstevel@tonic-gateC<sysopen>, you may use: 2307*0Sstevel@tonic-gate 2308*0Sstevel@tonic-gate tie @array, 'Tie::File', \*FH, ...; 2309*0Sstevel@tonic-gate 2310*0Sstevel@tonic-gateHandles that were opened write-only won't work. Handles that were 2311*0Sstevel@tonic-gateopened read-only will work as long as you don't try to modify the 2312*0Sstevel@tonic-gatearray. Handles must be attached to seekable sources of data---that 2313*0Sstevel@tonic-gatemeans no pipes or sockets. If C<Tie::File> can detect that you 2314*0Sstevel@tonic-gatesupplied a non-seekable handle, the C<tie> call will throw an 2315*0Sstevel@tonic-gateexception. (On Unix systems, it can detect this.) 2316*0Sstevel@tonic-gate 2317*0Sstevel@tonic-gateNote that Tie::File will only close any filehandles that it opened 2318*0Sstevel@tonic-gateinternally. If you passed it a filehandle as above, you "own" the 2319*0Sstevel@tonic-gatefilehandle, and are responsible for closing it after you have untied 2320*0Sstevel@tonic-gatethe @array. 2321*0Sstevel@tonic-gate 2322*0Sstevel@tonic-gate=head1 Deferred Writing 2323*0Sstevel@tonic-gate 2324*0Sstevel@tonic-gate(This is an advanced feature. Skip this section on first reading.) 2325*0Sstevel@tonic-gate 2326*0Sstevel@tonic-gateNormally, modifying a C<Tie::File> array writes to the underlying file 2327*0Sstevel@tonic-gateimmediately. Every assignment like C<$a[3] = ...> rewrites as much of 2328*0Sstevel@tonic-gatethe file as is necessary; typically, everything from line 3 through 2329*0Sstevel@tonic-gatethe end will need to be rewritten. This is the simplest and most 2330*0Sstevel@tonic-gatetransparent behavior. Performance even for large files is reasonably 2331*0Sstevel@tonic-gategood. 2332*0Sstevel@tonic-gate 2333*0Sstevel@tonic-gateHowever, under some circumstances, this behavior may be excessively 2334*0Sstevel@tonic-gateslow. For example, suppose you have a million-record file, and you 2335*0Sstevel@tonic-gatewant to do: 2336*0Sstevel@tonic-gate 2337*0Sstevel@tonic-gate for (@FILE) { 2338*0Sstevel@tonic-gate $_ = "> $_"; 2339*0Sstevel@tonic-gate } 2340*0Sstevel@tonic-gate 2341*0Sstevel@tonic-gateThe first time through the loop, you will rewrite the entire file, 2342*0Sstevel@tonic-gatefrom line 0 through the end. The second time through the loop, you 2343*0Sstevel@tonic-gatewill rewrite the entire file from line 1 through the end. The third 2344*0Sstevel@tonic-gatetime through the loop, you will rewrite the entire file from line 2 to 2345*0Sstevel@tonic-gatethe end. And so on. 2346*0Sstevel@tonic-gate 2347*0Sstevel@tonic-gateIf the performance in such cases is unacceptable, you may defer the 2348*0Sstevel@tonic-gateactual writing, and then have it done all at once. The following loop 2349*0Sstevel@tonic-gatewill perform much better for large files: 2350*0Sstevel@tonic-gate 2351*0Sstevel@tonic-gate (tied @a)->defer; 2352*0Sstevel@tonic-gate for (@a) { 2353*0Sstevel@tonic-gate $_ = "> $_"; 2354*0Sstevel@tonic-gate } 2355*0Sstevel@tonic-gate (tied @a)->flush; 2356*0Sstevel@tonic-gate 2357*0Sstevel@tonic-gateIf C<Tie::File>'s memory limit is large enough, all the writing will 2358*0Sstevel@tonic-gatedone in memory. Then, when you call C<-E<gt>flush>, the entire file 2359*0Sstevel@tonic-gatewill be rewritten in a single pass. 2360*0Sstevel@tonic-gate 2361*0Sstevel@tonic-gate(Actually, the preceding discussion is something of a fib. You don't 2362*0Sstevel@tonic-gateneed to enable deferred writing to get good performance for this 2363*0Sstevel@tonic-gatecommon case, because C<Tie::File> will do it for you automatically 2364*0Sstevel@tonic-gateunless you specifically tell it not to. See L<"autodeferring">, 2365*0Sstevel@tonic-gatebelow.) 2366*0Sstevel@tonic-gate 2367*0Sstevel@tonic-gateCalling C<-E<gt>flush> returns the array to immediate-write mode. If 2368*0Sstevel@tonic-gateyou wish to discard the deferred writes, you may call C<-E<gt>discard> 2369*0Sstevel@tonic-gateinstead of C<-E<gt>flush>. Note that in some cases, some of the data 2370*0Sstevel@tonic-gatewill have been written already, and it will be too late for 2371*0Sstevel@tonic-gateC<-E<gt>discard> to discard all the changes. Support for 2372*0Sstevel@tonic-gateC<-E<gt>discard> may be withdrawn in a future version of C<Tie::File>. 2373*0Sstevel@tonic-gate 2374*0Sstevel@tonic-gateDeferred writes are cached in memory up to the limit specified by the 2375*0Sstevel@tonic-gateC<dw_size> option (see above). If the deferred-write buffer is full 2376*0Sstevel@tonic-gateand you try to write still more deferred data, the buffer will be 2377*0Sstevel@tonic-gateflushed. All buffered data will be written immediately, the buffer 2378*0Sstevel@tonic-gatewill be emptied, and the now-empty space will be used for future 2379*0Sstevel@tonic-gatedeferred writes. 2380*0Sstevel@tonic-gate 2381*0Sstevel@tonic-gateIf the deferred-write buffer isn't yet full, but the total size of the 2382*0Sstevel@tonic-gatebuffer and the read cache would exceed the C<memory> limit, the oldest 2383*0Sstevel@tonic-gaterecords will be expired from the read cache until the total size is 2384*0Sstevel@tonic-gateunder the limit. 2385*0Sstevel@tonic-gate 2386*0Sstevel@tonic-gateC<push>, C<pop>, C<shift>, C<unshift>, and C<splice> cannot be 2387*0Sstevel@tonic-gatedeferred. When you perform one of these operations, any deferred data 2388*0Sstevel@tonic-gateis written to the file and the operation is performed immediately. 2389*0Sstevel@tonic-gateThis may change in a future version. 2390*0Sstevel@tonic-gate 2391*0Sstevel@tonic-gateIf you resize the array with deferred writing enabled, the file will 2392*0Sstevel@tonic-gatebe resized immediately, but deferred records will not be written. 2393*0Sstevel@tonic-gateThis has a surprising consequence: C<@a = (...)> erases the file 2394*0Sstevel@tonic-gateimmediately, but the writing of the actual data is deferred. This 2395*0Sstevel@tonic-gatemight be a bug. If it is a bug, it will be fixed in a future version. 2396*0Sstevel@tonic-gate 2397*0Sstevel@tonic-gate=head2 Autodeferring 2398*0Sstevel@tonic-gate 2399*0Sstevel@tonic-gateC<Tie::File> tries to guess when deferred writing might be helpful, 2400*0Sstevel@tonic-gateand to turn it on and off automatically. 2401*0Sstevel@tonic-gate 2402*0Sstevel@tonic-gate for (@a) { 2403*0Sstevel@tonic-gate $_ = "> $_"; 2404*0Sstevel@tonic-gate } 2405*0Sstevel@tonic-gate 2406*0Sstevel@tonic-gateIn this example, only the first two assignments will be done 2407*0Sstevel@tonic-gateimmediately; after this, all the changes to the file will be deferred 2408*0Sstevel@tonic-gateup to the user-specified memory limit. 2409*0Sstevel@tonic-gate 2410*0Sstevel@tonic-gateYou should usually be able to ignore this and just use the module 2411*0Sstevel@tonic-gatewithout thinking about deferring. However, special applications may 2412*0Sstevel@tonic-gaterequire fine control over which writes are deferred, or may require 2413*0Sstevel@tonic-gatethat all writes be immediate. To disable the autodeferment feature, 2414*0Sstevel@tonic-gateuse 2415*0Sstevel@tonic-gate 2416*0Sstevel@tonic-gate (tied @o)->autodefer(0); 2417*0Sstevel@tonic-gate 2418*0Sstevel@tonic-gateor 2419*0Sstevel@tonic-gate 2420*0Sstevel@tonic-gate tie @array, 'Tie::File', $file, autodefer => 0; 2421*0Sstevel@tonic-gate 2422*0Sstevel@tonic-gate 2423*0Sstevel@tonic-gateSimilarly, C<-E<gt>autodefer(1)> re-enables autodeferment, and 2424*0Sstevel@tonic-gateC<-E<gt>autodefer()> recovers the current value of the autodefer setting. 2425*0Sstevel@tonic-gate 2426*0Sstevel@tonic-gate 2427*0Sstevel@tonic-gate=head1 CONCURRENT ACCESS TO FILES 2428*0Sstevel@tonic-gate 2429*0Sstevel@tonic-gateCaching and deferred writing are inappropriate if you want the same 2430*0Sstevel@tonic-gatefile to be accessed simultaneously from more than one process. Other 2431*0Sstevel@tonic-gateoptimizations performed internally by this module are also 2432*0Sstevel@tonic-gateincompatible with concurrent access. A future version of this module will 2433*0Sstevel@tonic-gatesupport a C<concurrent =E<gt> 1> option that enables safe concurrent access. 2434*0Sstevel@tonic-gate 2435*0Sstevel@tonic-gatePrevious versions of this documentation suggested using C<memory 2436*0Sstevel@tonic-gate=E<gt> 0> for safe concurrent access. This was mistaken. Tie::File 2437*0Sstevel@tonic-gatewill not support safe concurrent access before version 0.98. 2438*0Sstevel@tonic-gate 2439*0Sstevel@tonic-gate=head1 CAVEATS 2440*0Sstevel@tonic-gate 2441*0Sstevel@tonic-gate(That's Latin for 'warnings'.) 2442*0Sstevel@tonic-gate 2443*0Sstevel@tonic-gate=over 4 2444*0Sstevel@tonic-gate 2445*0Sstevel@tonic-gate=item * 2446*0Sstevel@tonic-gate 2447*0Sstevel@tonic-gateReasonable effort was made to make this module efficient. Nevertheless, 2448*0Sstevel@tonic-gatechanging the size of a record in the middle of a large file will 2449*0Sstevel@tonic-gatealways be fairly slow, because everything after the new record must be 2450*0Sstevel@tonic-gatemoved. 2451*0Sstevel@tonic-gate 2452*0Sstevel@tonic-gate=item * 2453*0Sstevel@tonic-gate 2454*0Sstevel@tonic-gateThe behavior of tied arrays is not precisely the same as for regular 2455*0Sstevel@tonic-gatearrays. For example: 2456*0Sstevel@tonic-gate 2457*0Sstevel@tonic-gate # This DOES print "How unusual!" 2458*0Sstevel@tonic-gate undef $a[10]; print "How unusual!\n" if defined $a[10]; 2459*0Sstevel@tonic-gate 2460*0Sstevel@tonic-gateC<undef>-ing a C<Tie::File> array element just blanks out the 2461*0Sstevel@tonic-gatecorresponding record in the file. When you read it back again, you'll 2462*0Sstevel@tonic-gateget the empty string, so the supposedly-C<undef>'ed value will be 2463*0Sstevel@tonic-gatedefined. Similarly, if you have C<autochomp> disabled, then 2464*0Sstevel@tonic-gate 2465*0Sstevel@tonic-gate # This DOES print "How unusual!" if 'autochomp' is disabled 2466*0Sstevel@tonic-gate undef $a[10]; 2467*0Sstevel@tonic-gate print "How unusual!\n" if $a[10]; 2468*0Sstevel@tonic-gate 2469*0Sstevel@tonic-gateBecause when C<autochomp> is disabled, C<$a[10]> will read back as 2470*0Sstevel@tonic-gateC<"\n"> (or whatever the record separator string is.) 2471*0Sstevel@tonic-gate 2472*0Sstevel@tonic-gateThere are other minor differences, particularly regarding C<exists> 2473*0Sstevel@tonic-gateand C<delete>, but in general, the correspondence is extremely close. 2474*0Sstevel@tonic-gate 2475*0Sstevel@tonic-gate=item * 2476*0Sstevel@tonic-gate 2477*0Sstevel@tonic-gateI have supposed that since this module is concerned with file I/O, 2478*0Sstevel@tonic-gatealmost all normal use of it will be heavily I/O bound. This means 2479*0Sstevel@tonic-gatethat the time to maintain complicated data structures inside the 2480*0Sstevel@tonic-gatemodule will be dominated by the time to actually perform the I/O. 2481*0Sstevel@tonic-gateWhen there was an opportunity to spend CPU time to avoid doing I/O, I 2482*0Sstevel@tonic-gateusually tried to take it. 2483*0Sstevel@tonic-gate 2484*0Sstevel@tonic-gate=item * 2485*0Sstevel@tonic-gate 2486*0Sstevel@tonic-gateYou might be tempted to think that deferred writing is like 2487*0Sstevel@tonic-gatetransactions, with C<flush> as C<commit> and C<discard> as 2488*0Sstevel@tonic-gateC<rollback>, but it isn't, so don't. 2489*0Sstevel@tonic-gate 2490*0Sstevel@tonic-gate=item * 2491*0Sstevel@tonic-gate 2492*0Sstevel@tonic-gateThere is a large memory overhead for each record offset and for each 2493*0Sstevel@tonic-gatecache entry: about 310 bytes per cached data record, and about 21 bytes per offset table entry. 2494*0Sstevel@tonic-gate 2495*0Sstevel@tonic-gateThe per-record overhead will limit the maximum number of records you 2496*0Sstevel@tonic-gatecan access per file. Note that I<accessing> the length of the array 2497*0Sstevel@tonic-gatevia C<$x = scalar @tied_file> accesses B<all> records and stores their 2498*0Sstevel@tonic-gateoffsets. The same for C<foreach (@tied_file)>, even if you exit the 2499*0Sstevel@tonic-gateloop early. 2500*0Sstevel@tonic-gate 2501*0Sstevel@tonic-gate=back 2502*0Sstevel@tonic-gate 2503*0Sstevel@tonic-gate=head1 SUBCLASSING 2504*0Sstevel@tonic-gate 2505*0Sstevel@tonic-gateThis version promises absolutely nothing about the internals, which 2506*0Sstevel@tonic-gatemay change without notice. A future version of the module will have a 2507*0Sstevel@tonic-gatewell-defined and stable subclassing API. 2508*0Sstevel@tonic-gate 2509*0Sstevel@tonic-gate=head1 WHAT ABOUT C<DB_File>? 2510*0Sstevel@tonic-gate 2511*0Sstevel@tonic-gatePeople sometimes point out that L<DB_File> will do something similar, 2512*0Sstevel@tonic-gateand ask why C<Tie::File> module is necessary. 2513*0Sstevel@tonic-gate 2514*0Sstevel@tonic-gateThere are a number of reasons that you might prefer C<Tie::File>. 2515*0Sstevel@tonic-gateA list is available at C<http://perl.plover.com/TieFile/why-not-DB_File>. 2516*0Sstevel@tonic-gate 2517*0Sstevel@tonic-gate=head1 AUTHOR 2518*0Sstevel@tonic-gate 2519*0Sstevel@tonic-gateMark Jason Dominus 2520*0Sstevel@tonic-gate 2521*0Sstevel@tonic-gateTo contact the author, send email to: C<mjd-perl-tiefile+@plover.com> 2522*0Sstevel@tonic-gate 2523*0Sstevel@tonic-gateTo receive an announcement whenever a new version of this module is 2524*0Sstevel@tonic-gatereleased, send a blank email message to 2525*0Sstevel@tonic-gateC<mjd-perl-tiefile-subscribe@plover.com>. 2526*0Sstevel@tonic-gate 2527*0Sstevel@tonic-gateThe most recent version of this module, including documentation and 2528*0Sstevel@tonic-gateany news of importance, will be available at 2529*0Sstevel@tonic-gate 2530*0Sstevel@tonic-gate http://perl.plover.com/TieFile/ 2531*0Sstevel@tonic-gate 2532*0Sstevel@tonic-gate 2533*0Sstevel@tonic-gate=head1 LICENSE 2534*0Sstevel@tonic-gate 2535*0Sstevel@tonic-gateC<Tie::File> version 0.97 is copyright (C) 2003 Mark Jason Dominus. 2536*0Sstevel@tonic-gate 2537*0Sstevel@tonic-gateThis library is free software; you may redistribute it and/or modify 2538*0Sstevel@tonic-gateit under the same terms as Perl itself. 2539*0Sstevel@tonic-gate 2540*0Sstevel@tonic-gateThese terms are your choice of any of (1) the Perl Artistic Licence, 2541*0Sstevel@tonic-gateor (2) version 2 of the GNU General Public License as published by the 2542*0Sstevel@tonic-gateFree Software Foundation, or (3) any later version of the GNU General 2543*0Sstevel@tonic-gatePublic License. 2544*0Sstevel@tonic-gate 2545*0Sstevel@tonic-gateThis library is distributed in the hope that it will be useful, 2546*0Sstevel@tonic-gatebut WITHOUT ANY WARRANTY; without even the implied warranty of 2547*0Sstevel@tonic-gateMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 2548*0Sstevel@tonic-gateGNU General Public License for more details. 2549*0Sstevel@tonic-gate 2550*0Sstevel@tonic-gateYou should have received a copy of the GNU General Public License 2551*0Sstevel@tonic-gatealong with this library program; it should be in the file C<COPYING>. 2552*0Sstevel@tonic-gateIf not, write to the Free Software Foundation, Inc., 59 Temple Place, 2553*0Sstevel@tonic-gateSuite 330, Boston, MA 02111 USA 2554*0Sstevel@tonic-gate 2555*0Sstevel@tonic-gateFor licensing inquiries, contact the author at: 2556*0Sstevel@tonic-gate 2557*0Sstevel@tonic-gate Mark Jason Dominus 2558*0Sstevel@tonic-gate 255 S. Warnock St. 2559*0Sstevel@tonic-gate Philadelphia, PA 19107 2560*0Sstevel@tonic-gate 2561*0Sstevel@tonic-gate=head1 WARRANTY 2562*0Sstevel@tonic-gate 2563*0Sstevel@tonic-gateC<Tie::File> version 0.97 comes with ABSOLUTELY NO WARRANTY. 2564*0Sstevel@tonic-gateFor details, see the license. 2565*0Sstevel@tonic-gate 2566*0Sstevel@tonic-gate=head1 THANKS 2567*0Sstevel@tonic-gate 2568*0Sstevel@tonic-gateGigantic thanks to Jarkko Hietaniemi, for agreeing to put this in the 2569*0Sstevel@tonic-gatecore when I hadn't written it yet, and for generally being helpful, 2570*0Sstevel@tonic-gatesupportive, and competent. (Usually the rule is "choose any one.") 2571*0Sstevel@tonic-gateAlso big thanks to Abhijit Menon-Sen for all of the same things. 2572*0Sstevel@tonic-gate 2573*0Sstevel@tonic-gateSpecial thanks to Craig Berry and Peter Prymmer (for VMS portability 2574*0Sstevel@tonic-gatehelp), Randy Kobes (for Win32 portability help), Clinton Pierce and 2575*0Sstevel@tonic-gateAutrijus Tang (for heroic eleventh-hour Win32 testing above and beyond 2576*0Sstevel@tonic-gatethe call of duty), Michael G Schwern (for testing advice), and the 2577*0Sstevel@tonic-gaterest of the CPAN testers (for testing generally). 2578*0Sstevel@tonic-gate 2579*0Sstevel@tonic-gateSpecial thanks to Tels for suggesting several speed and memory 2580*0Sstevel@tonic-gateoptimizations. 2581*0Sstevel@tonic-gate 2582*0Sstevel@tonic-gateAdditional thanks to: 2583*0Sstevel@tonic-gateEdward Avis / 2584*0Sstevel@tonic-gateMattia Barbon / 2585*0Sstevel@tonic-gateTom Christiansen / 2586*0Sstevel@tonic-gateGerrit Haase / 2587*0Sstevel@tonic-gateGurusamy Sarathy / 2588*0Sstevel@tonic-gateJarkko Hietaniemi (again) / 2589*0Sstevel@tonic-gateNikola Knezevic / 2590*0Sstevel@tonic-gateJohn Kominetz / 2591*0Sstevel@tonic-gateNick Ing-Simmons / 2592*0Sstevel@tonic-gateTassilo von Parseval / 2593*0Sstevel@tonic-gateH. Dieter Pearcey / 2594*0Sstevel@tonic-gateSlaven Rezic / 2595*0Sstevel@tonic-gateEric Roode / 2596*0Sstevel@tonic-gatePeter Scott / 2597*0Sstevel@tonic-gatePeter Somu / 2598*0Sstevel@tonic-gateAutrijus Tang (again) / 2599*0Sstevel@tonic-gateTels (again) / 2600*0Sstevel@tonic-gateJuerd Waalboer 2601*0Sstevel@tonic-gate 2602*0Sstevel@tonic-gate=head1 TODO 2603*0Sstevel@tonic-gate 2604*0Sstevel@tonic-gateMore tests. (Stuff I didn't think of yet.) 2605*0Sstevel@tonic-gate 2606*0Sstevel@tonic-gateParagraph mode? 2607*0Sstevel@tonic-gate 2608*0Sstevel@tonic-gateFixed-length mode. Leave-blanks mode. 2609*0Sstevel@tonic-gate 2610*0Sstevel@tonic-gateMaybe an autolocking mode? 2611*0Sstevel@tonic-gate 2612*0Sstevel@tonic-gateFor many common uses of the module, the read cache is a liability. 2613*0Sstevel@tonic-gateFor example, a program that inserts a single record, or that scans the 2614*0Sstevel@tonic-gatefile once, will have a cache hit rate of zero. This suggests a major 2615*0Sstevel@tonic-gateoptimization: The cache should be initially disabled. Here's a hybrid 2616*0Sstevel@tonic-gateapproach: Initially, the cache is disabled, but the cache code 2617*0Sstevel@tonic-gatemaintains statistics about how high the hit rate would be *if* it were 2618*0Sstevel@tonic-gateenabled. When it sees the hit rate get high enough, it enables 2619*0Sstevel@tonic-gateitself. The STAT comments in this code are the beginning of an 2620*0Sstevel@tonic-gateimplementation of this. 2621*0Sstevel@tonic-gate 2622*0Sstevel@tonic-gateRecord locking with fcntl()? Then the module might support an undo 2623*0Sstevel@tonic-gatelog and get real transactions. What a tour de force that would be. 2624*0Sstevel@tonic-gate 2625*0Sstevel@tonic-gateKeeping track of the highest cached record. This would allow reads-in-a-row 2626*0Sstevel@tonic-gateto skip the cache lookup faster (if reading from 1..N with empty cache at 2627*0Sstevel@tonic-gatestart, the last cached value will be always N-1). 2628*0Sstevel@tonic-gate 2629*0Sstevel@tonic-gateMore tests. 2630*0Sstevel@tonic-gate 2631*0Sstevel@tonic-gate=cut 2632*0Sstevel@tonic-gate 2633