cpan/DB_File/DB_File.pm

b39c5158Smillert# DB_File.pm -- Perl 5 interface to Berkeley DB
b39c5158Smillert#
6fb12b70Safresh1# Written by Paul Marquess (pmqs@cpan.org)
b39c5158Smillert#
*3d61058aSafresh1#     Copyright (c) 1995-2023 Paul Marquess. All rights reserved.
b39c5158Smillert#     This program is free software; you can redistribute it and/or
b39c5158Smillert#     modify it under the same terms as Perl itself.
b39c5158Smillert
b39c5158Smillert
b39c5158Smillertpackage DB_File::HASHINFO ;
b39c5158Smillert
b8851fccSafresh1require 5.008003;
b39c5158Smillert
b39c5158Smillertuse warnings;
b39c5158Smillertuse strict;
b39c5158Smillertuse Carp;
b39c5158Smillertrequire Tie::Hash;
b39c5158Smillert@DB_File::HASHINFO::ISA = qw(Tie::Hash);
b39c5158Smillert
b39c5158Smillertsub new
b39c5158Smillert{
b39c5158Smillert    my $pkg = shift ;
b39c5158Smillert    my %x ;
b39c5158Smillert    tie %x, $pkg ;
b39c5158Smillert    bless \%x, $pkg ;
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillert
b39c5158Smillertsub TIEHASH
b39c5158Smillert{
b39c5158Smillert    my $pkg = shift ;
b39c5158Smillert
b39c5158Smillert    bless { VALID => {
b39c5158Smillert                        bsize     => 1,
b39c5158Smillert                        ffactor   => 1,
b39c5158Smillert                        nelem     => 1,
b39c5158Smillert                        cachesize => 1,
b39c5158Smillert                        hash      => 2,
b39c5158Smillert                        lorder    => 1,
b39c5158Smillert                     },
b39c5158Smillert            GOT   => {}
b39c5158Smillert          }, $pkg ;
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillert
b39c5158Smillertsub FETCH
b39c5158Smillert{
b39c5158Smillert    my $self  = shift ;
b39c5158Smillert    my $key   = shift ;
b39c5158Smillert
b39c5158Smillert    return $self->{GOT}{$key} if exists $self->{VALID}{$key}  ;
b39c5158Smillert
b39c5158Smillert    my $pkg = ref $self ;
b39c5158Smillert    croak "${pkg}::FETCH - Unknown element '$key'" ;
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillert
b39c5158Smillertsub STORE
b39c5158Smillert{
b39c5158Smillert    my $self  = shift ;
b39c5158Smillert    my $key   = shift ;
b39c5158Smillert    my $value = shift ;
b39c5158Smillert
b39c5158Smillert    my $type = $self->{VALID}{$key};
b39c5158Smillert
b39c5158Smillert    if ( $type )
b39c5158Smillert    {
b39c5158Smillert        croak "Key '$key' not associated with a code reference"
b39c5158Smillert            if $type == 2 && !ref $value && ref $value ne 'CODE';
b39c5158Smillert        $self->{GOT}{$key} = $value ;
b39c5158Smillert        return ;
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    my $pkg = ref $self ;
b39c5158Smillert    croak "${pkg}::STORE - Unknown element '$key'" ;
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillertsub DELETE
b39c5158Smillert{
b39c5158Smillert    my $self = shift ;
b39c5158Smillert    my $key  = shift ;
b39c5158Smillert
b39c5158Smillert    if ( exists $self->{VALID}{$key} )
b39c5158Smillert    {
b39c5158Smillert        delete $self->{GOT}{$key} ;
b39c5158Smillert        return ;
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    my $pkg = ref $self ;
b39c5158Smillert    croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillertsub EXISTS
b39c5158Smillert{
b39c5158Smillert    my $self = shift ;
b39c5158Smillert    my $key  = shift ;
b39c5158Smillert
b39c5158Smillert    exists $self->{VALID}{$key} ;
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillertsub NotHere
b39c5158Smillert{
b39c5158Smillert    my $self = shift ;
b39c5158Smillert    my $method = shift ;
b39c5158Smillert
b39c5158Smillert    croak ref($self) . " does not define the method ${method}" ;
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillertsub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
b39c5158Smillertsub NEXTKEY  { my $self = shift ; $self->NotHere("NEXTKEY") }
b39c5158Smillertsub CLEAR    { my $self = shift ; $self->NotHere("CLEAR") }
b39c5158Smillert
b39c5158Smillertpackage DB_File::RECNOINFO ;
b39c5158Smillert
b39c5158Smillertuse warnings;
b39c5158Smillertuse strict ;
b39c5158Smillert
b39c5158Smillert@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
b39c5158Smillert
b39c5158Smillertsub TIEHASH
b39c5158Smillert{
b39c5158Smillert    my $pkg = shift ;
b39c5158Smillert
b39c5158Smillert    bless { VALID => { map {$_, 1}
b39c5158Smillert                       qw( bval cachesize psize flags lorder reclen bfname )
b39c5158Smillert                     },
b39c5158Smillert            GOT   => {},
b39c5158Smillert          }, $pkg ;
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillertpackage DB_File::BTREEINFO ;
b39c5158Smillert
b39c5158Smillertuse warnings;
b39c5158Smillertuse strict ;
b39c5158Smillert
b39c5158Smillert@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
b39c5158Smillert
b39c5158Smillertsub TIEHASH
b39c5158Smillert{
b39c5158Smillert    my $pkg = shift ;
b39c5158Smillert
b39c5158Smillert    bless { VALID => {
b39c5158Smillert                        flags      => 1,
b39c5158Smillert                        cachesize  => 1,
b39c5158Smillert                        maxkeypage => 1,
b39c5158Smillert                        minkeypage => 1,
b39c5158Smillert                        psize      => 1,
b39c5158Smillert                        compare    => 2,
b39c5158Smillert                        prefix     => 2,
b39c5158Smillert                        lorder     => 1,
b39c5158Smillert                     },
b39c5158Smillert            GOT   => {},
b39c5158Smillert          }, $pkg ;
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillert
b39c5158Smillertpackage DB_File ;
b39c5158Smillert
b39c5158Smillertuse warnings;
b39c5158Smillertuse strict;
b39c5158Smillertour ($VERSION, @ISA, @EXPORT, $AUTOLOAD, $DB_BTREE, $DB_HASH, $DB_RECNO);
898184e3Ssthenour ($db_version, $use_XSLoader, $splice_end_array_no_length, $splice_end_array, $Error);
b39c5158Smillertuse Carp;
b39c5158Smillert
b46d8ef2Safresh1# Module not thread safe, so don't clone
b46d8ef2Safresh1sub CLONE_SKIP { 1 }
b39c5158Smillert
*3d61058aSafresh1$VERSION = "1.859" ;
b39c5158Smillert$VERSION = eval $VERSION; # needed for dev releases
b39c5158Smillert
b39c5158Smillert{
91f110e0Safresh1    local $SIG{__WARN__} = sub {$splice_end_array_no_length = join(" ",@_);};
b39c5158Smillert    my @a =(1); splice(@a, 3);
898184e3Ssthen    $splice_end_array_no_length =
898184e3Ssthen        ($splice_end_array_no_length =~ /^splice\(\) offset past end of array at /);
898184e3Ssthen}
898184e3Ssthen{
91f110e0Safresh1    local $SIG{__WARN__} = sub {$splice_end_array = join(" ", @_);};
898184e3Ssthen    my @a =(1); splice(@a, 3, 1);
b39c5158Smillert    $splice_end_array =
b39c5158Smillert        ($splice_end_array =~ /^splice\(\) offset past end of array at /);
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillert#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
eac174f2Safresh1$DB_BTREE = DB_File::BTREEINFO->new();
eac174f2Safresh1$DB_HASH  = DB_File::HASHINFO->new();
eac174f2Safresh1$DB_RECNO = DB_File::RECNOINFO->new();
b39c5158Smillert
b39c5158Smillertrequire Tie::Hash;
b39c5158Smillertrequire Exporter;
b39c5158SmillertBEGIN {
b39c5158Smillert    $use_XSLoader = 1 ;
b39c5158Smillert    { local $SIG{__DIE__} ; eval { require XSLoader } ; }
b39c5158Smillert
b39c5158Smillert    if ($@) {
b39c5158Smillert        $use_XSLoader = 0 ;
b39c5158Smillert        require DynaLoader;
b39c5158Smillert        @ISA = qw(DynaLoader);
b39c5158Smillert    }
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillertpush @ISA, qw(Tie::Hash Exporter);
b39c5158Smillert@EXPORT = qw(
b39c5158Smillert        $DB_BTREE $DB_HASH $DB_RECNO
b39c5158Smillert
b39c5158Smillert        BTREEMAGIC
b39c5158Smillert        BTREEVERSION
b39c5158Smillert        DB_LOCK
b39c5158Smillert        DB_SHMEM
b39c5158Smillert        DB_TXN
b39c5158Smillert        HASHMAGIC
b39c5158Smillert        HASHVERSION
b39c5158Smillert        MAX_PAGE_NUMBER
b39c5158Smillert        MAX_PAGE_OFFSET
b39c5158Smillert        MAX_REC_NUMBER
b39c5158Smillert        RET_ERROR
b39c5158Smillert        RET_SPECIAL
b39c5158Smillert        RET_SUCCESS
b39c5158Smillert        R_CURSOR
b39c5158Smillert        R_DUP
b39c5158Smillert        R_FIRST
b39c5158Smillert        R_FIXEDLEN
b39c5158Smillert        R_IAFTER
b39c5158Smillert        R_IBEFORE
b39c5158Smillert        R_LAST
b39c5158Smillert        R_NEXT
b39c5158Smillert        R_NOKEY
b39c5158Smillert        R_NOOVERWRITE
b39c5158Smillert        R_PREV
b39c5158Smillert        R_RECNOSYNC
b39c5158Smillert        R_SETCURSOR
b39c5158Smillert        R_SNAPSHOT
b39c5158Smillert        __R_UNUSED
b39c5158Smillert
b39c5158Smillert);
b39c5158Smillert
b39c5158Smillertsub AUTOLOAD {
b39c5158Smillert    my($constname);
b39c5158Smillert    ($constname = $AUTOLOAD) =~ s/.*:://;
b39c5158Smillert    my ($error, $val) = constant($constname);
b39c5158Smillert    Carp::croak $error if $error;
b39c5158Smillert    no strict 'refs';
b39c5158Smillert    *{$AUTOLOAD} = sub { $val };
b39c5158Smillert    goto &{$AUTOLOAD};
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillert
b39c5158Smillerteval {
b39c5158Smillert    # Make all Fcntl O_XXX constants available for importing
b39c5158Smillert    require Fcntl;
b39c5158Smillert    my @O = grep /^O_/, @Fcntl::EXPORT;
b39c5158Smillert    Fcntl->import(@O);  # first we import what we want to export
b39c5158Smillert    push(@EXPORT, @O);
b39c5158Smillert};
b39c5158Smillert
b39c5158Smillertif ($use_XSLoader)
b39c5158Smillert  { XSLoader::load("DB_File", $VERSION)}
b39c5158Smillertelse
eac174f2Safresh1  { DB_File->bootstrap( $VERSION ) }
b39c5158Smillert
b39c5158Smillertsub tie_hash_or_array
b39c5158Smillert{
b39c5158Smillert    my (@arg) = @_ ;
b39c5158Smillert    my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
b39c5158Smillert
b39c5158Smillert    use File::Spec;
b39c5158Smillert    $arg[1] = File::Spec->rel2abs($arg[1])
b39c5158Smillert        if defined $arg[1] ;
b39c5158Smillert
b39c5158Smillert    $arg[4] = tied %{ $arg[4] }
b39c5158Smillert        if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
b39c5158Smillert
b39c5158Smillert    $arg[2] = O_CREAT()|O_RDWR() if @arg >=3 && ! defined $arg[2];
b39c5158Smillert    $arg[3] = 0666               if @arg >=4 && ! defined $arg[3];
b39c5158Smillert
b39c5158Smillert    # make recno in Berkeley DB version 2 (or better) work like
b39c5158Smillert    # recno in version 1.
b39c5158Smillert    if ($db_version >= 4 and ! $tieHASH) {
b39c5158Smillert        $arg[2] |= O_CREAT();
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
b39c5158Smillert        $arg[1] and ! -e $arg[1]) {
b39c5158Smillert        open(FH, ">$arg[1]") or return undef ;
b39c5158Smillert        close FH ;
b39c5158Smillert        chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    DoTie_($tieHASH, @arg) ;
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillertsub TIEHASH
b39c5158Smillert{
b39c5158Smillert    tie_hash_or_array(@_) ;
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillertsub TIEARRAY
b39c5158Smillert{
b39c5158Smillert    tie_hash_or_array(@_) ;
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillertsub CLEAR
b39c5158Smillert{
b39c5158Smillert    my $self = shift;
b39c5158Smillert    my $key = 0 ;
b39c5158Smillert    my $value = "" ;
b39c5158Smillert    my $status = $self->seq($key, $value, R_FIRST());
b39c5158Smillert    my @keys;
b39c5158Smillert
b39c5158Smillert    while ($status == 0) {
b39c5158Smillert        push @keys, $key;
b39c5158Smillert        $status = $self->seq($key, $value, R_NEXT());
b39c5158Smillert    }
b39c5158Smillert    foreach $key (reverse @keys) {
b39c5158Smillert        my $s = $self->del($key);
b39c5158Smillert    }
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillertsub EXTEND { }
b39c5158Smillert
b39c5158Smillertsub STORESIZE
b39c5158Smillert{
b39c5158Smillert    my $self = shift;
b39c5158Smillert    my $length = shift ;
b39c5158Smillert    my $current_length = $self->length() ;
b39c5158Smillert
b39c5158Smillert    if ($length < $current_length) {
b39c5158Smillert        my $key ;
b39c5158Smillert        for ($key = $current_length - 1 ; $key >= $length ; -- $key)
b39c5158Smillert          { $self->del($key) }
b39c5158Smillert    }
b39c5158Smillert    elsif ($length > $current_length) {
b39c5158Smillert        $self->put($length-1, "") ;
b39c5158Smillert    }
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillert
b39c5158Smillertsub SPLICE
b39c5158Smillert{
b39c5158Smillert    my $self = shift;
b39c5158Smillert    my $offset = shift;
b39c5158Smillert    if (not defined $offset) {
b39c5158Smillert        warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
b39c5158Smillert        $offset = 0;
b39c5158Smillert    }
b39c5158Smillert
898184e3Ssthen    my $has_length = @_;
b39c5158Smillert    my $length = @_ ? shift : 0;
b39c5158Smillert    # Carping about definedness comes _after_ the OFFSET sanity check.
b39c5158Smillert    # This is so we get the same error messages as Perl's splice().
b39c5158Smillert    #
b39c5158Smillert
b39c5158Smillert    my @list = @_;
b39c5158Smillert
b39c5158Smillert    my $size = $self->FETCHSIZE();
b39c5158Smillert
b39c5158Smillert    # 'If OFFSET is negative then it start that far from the end of
b39c5158Smillert    # the array.'
b39c5158Smillert    #
b39c5158Smillert    if ($offset < 0) {
b39c5158Smillert        my $new_offset = $size + $offset;
b39c5158Smillert        if ($new_offset < 0) {
b39c5158Smillert            die "Modification of non-creatable array value attempted, "
b39c5158Smillert              . "subscript $offset";
b39c5158Smillert        }
b39c5158Smillert        $offset = $new_offset;
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    if (not defined $length) {
b39c5158Smillert        warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
b39c5158Smillert        $length = 0;
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    if ($offset > $size) {
b39c5158Smillert        $offset = $size;
b39c5158Smillert        warnings::warnif('misc', 'splice() offset past end of array')
898184e3Ssthen            if $has_length ? $splice_end_array : $splice_end_array_no_length;
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    # 'If LENGTH is omitted, removes everything from OFFSET onward.'
b39c5158Smillert    if (not defined $length) {
b39c5158Smillert        $length = $size - $offset;
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    # 'If LENGTH is negative, leave that many elements off the end of
b39c5158Smillert    # the array.'
b39c5158Smillert    #
b39c5158Smillert    if ($length < 0) {
b39c5158Smillert        $length = $size - $offset + $length;
b39c5158Smillert
b39c5158Smillert        if ($length < 0) {
b39c5158Smillert            # The user must have specified a length bigger than the
b39c5158Smillert            # length of the array passed in.  But perl's splice()
b39c5158Smillert            # doesn't catch this, it just behaves as for length=0.
b39c5158Smillert            #
b39c5158Smillert            $length = 0;
b39c5158Smillert        }
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    if ($length > $size - $offset) {
b39c5158Smillert        $length = $size - $offset;
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    # $num_elems holds the current number of elements in the database.
b39c5158Smillert    my $num_elems = $size;
b39c5158Smillert
b39c5158Smillert    # 'Removes the elements designated by OFFSET and LENGTH from an
b39c5158Smillert    # array,'...
b39c5158Smillert    #
b39c5158Smillert    my @removed = ();
b39c5158Smillert    foreach (0 .. $length - 1) {
b39c5158Smillert        my $old;
b39c5158Smillert        my $status = $self->get($offset, $old);
b39c5158Smillert        if ($status != 0) {
b39c5158Smillert            my $msg = "error from Berkeley DB on get($offset, \$old)";
b39c5158Smillert            if ($status == 1) {
b39c5158Smillert                $msg .= ' (no such element?)';
b39c5158Smillert            }
b39c5158Smillert            else {
b39c5158Smillert                $msg .= ": error status $status";
b39c5158Smillert                if (defined $! and $! ne '') {
b39c5158Smillert                    $msg .= ", message $!";
b39c5158Smillert                }
b39c5158Smillert            }
b39c5158Smillert            die $msg;
b39c5158Smillert        }
b39c5158Smillert        push @removed, $old;
b39c5158Smillert
b39c5158Smillert        $status = $self->del($offset);
b39c5158Smillert        if ($status != 0) {
b39c5158Smillert            my $msg = "error from Berkeley DB on del($offset)";
b39c5158Smillert            if ($status == 1) {
b39c5158Smillert                $msg .= ' (no such element?)';
b39c5158Smillert            }
b39c5158Smillert            else {
b39c5158Smillert                $msg .= ": error status $status";
b39c5158Smillert                if (defined $! and $! ne '') {
b39c5158Smillert                    $msg .= ", message $!";
b39c5158Smillert                }
b39c5158Smillert            }
b39c5158Smillert            die $msg;
b39c5158Smillert        }
b39c5158Smillert
b39c5158Smillert        -- $num_elems;
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    # ...'and replaces them with the elements of LIST, if any.'
b39c5158Smillert    my $pos = $offset;
b39c5158Smillert    while (defined (my $elem = shift @list)) {
b39c5158Smillert        my $old_pos = $pos;
b39c5158Smillert        my $status;
b39c5158Smillert        if ($pos >= $num_elems) {
b39c5158Smillert            $status = $self->put($pos, $elem);
b39c5158Smillert        }
b39c5158Smillert        else {
b39c5158Smillert            $status = $self->put($pos, $elem, $self->R_IBEFORE);
b39c5158Smillert        }
b39c5158Smillert
b39c5158Smillert        if ($status != 0) {
b39c5158Smillert            my $msg = "error from Berkeley DB on put($pos, $elem, ...)";
b39c5158Smillert            if ($status == 1) {
b39c5158Smillert                $msg .= ' (no such element?)';
b39c5158Smillert            }
b39c5158Smillert            else {
b39c5158Smillert                $msg .= ", error status $status";
b39c5158Smillert                if (defined $! and $! ne '') {
b39c5158Smillert                    $msg .= ", message $!";
b39c5158Smillert                }
b39c5158Smillert            }
b39c5158Smillert            die $msg;
b39c5158Smillert        }
b39c5158Smillert
b39c5158Smillert        die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE"
b39c5158Smillert          if $old_pos != $pos;
b39c5158Smillert
b39c5158Smillert        ++ $pos;
b39c5158Smillert        ++ $num_elems;
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    if (wantarray) {
b39c5158Smillert        # 'In list context, returns the elements removed from the
b39c5158Smillert        # array.'
b39c5158Smillert        #
b39c5158Smillert        return @removed;
b39c5158Smillert    }
b39c5158Smillert    elsif (defined wantarray and not wantarray) {
b39c5158Smillert        # 'In scalar context, returns the last element removed, or
b39c5158Smillert        # undef if no elements are removed.'
b39c5158Smillert        #
b39c5158Smillert        if (@removed) {
b39c5158Smillert            my $last = pop @removed;
b39c5158Smillert            return "$last";
b39c5158Smillert        }
b39c5158Smillert        else {
b39c5158Smillert            return undef;
b39c5158Smillert        }
b39c5158Smillert    }
b39c5158Smillert    elsif (not defined wantarray) {
b39c5158Smillert        # Void context
b39c5158Smillert    }
b39c5158Smillert    else { die }
b39c5158Smillert}
b39c5158Smillertsub ::DB_File::splice { &SPLICE }
b39c5158Smillert
b39c5158Smillertsub find_dup
b39c5158Smillert{
b39c5158Smillert    croak "Usage: \$db->find_dup(key,value)\n"
b39c5158Smillert        unless @_ == 3 ;
b39c5158Smillert
b39c5158Smillert    my $db        = shift ;
b39c5158Smillert    my ($origkey, $value_wanted) = @_ ;
b39c5158Smillert    my ($key, $value) = ($origkey, 0);
b39c5158Smillert    my ($status) = 0 ;
b39c5158Smillert
b39c5158Smillert    for ($status = $db->seq($key, $value, R_CURSOR() ) ;
b39c5158Smillert         $status == 0 ;
b39c5158Smillert         $status = $db->seq($key, $value, R_NEXT() ) ) {
b39c5158Smillert
b39c5158Smillert        return 0 if $key eq $origkey and $value eq $value_wanted ;
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    return $status ;
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillertsub del_dup
b39c5158Smillert{
b39c5158Smillert    croak "Usage: \$db->del_dup(key,value)\n"
b39c5158Smillert        unless @_ == 3 ;
b39c5158Smillert
b39c5158Smillert    my $db        = shift ;
b39c5158Smillert    my ($key, $value) = @_ ;
b39c5158Smillert    my ($status) = $db->find_dup($key, $value) ;
b39c5158Smillert    return $status if $status != 0 ;
b39c5158Smillert
b39c5158Smillert    $status = $db->del($key, R_CURSOR() ) ;
b39c5158Smillert    return $status ;
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillertsub get_dup
b39c5158Smillert{
b39c5158Smillert    croak "Usage: \$db->get_dup(key [,flag])\n"
b39c5158Smillert        unless @_ == 2 or @_ == 3 ;
b39c5158Smillert
b39c5158Smillert    my $db        = shift ;
b39c5158Smillert    my $key       = shift ;
b39c5158Smillert    my $flag      = shift ;
b39c5158Smillert    my $value     = 0 ;
b39c5158Smillert    my $origkey   = $key ;
b39c5158Smillert    my $wantarray = wantarray ;
b39c5158Smillert    my %values    = () ;
b39c5158Smillert    my @values    = () ;
b39c5158Smillert    my $counter   = 0 ;
b39c5158Smillert    my $status    = 0 ;
b39c5158Smillert
b39c5158Smillert    # iterate through the database until either EOF ($status == 0)
b39c5158Smillert    # or a different key is encountered ($key ne $origkey).
b39c5158Smillert    for ($status = $db->seq($key, $value, R_CURSOR()) ;
b39c5158Smillert         $status == 0 and $key eq $origkey ;
b39c5158Smillert         $status = $db->seq($key, $value, R_NEXT()) ) {
b39c5158Smillert
b39c5158Smillert        # save the value or count number of matches
b39c5158Smillert        if ($wantarray) {
b39c5158Smillert            if ($flag)
b39c5158Smillert                { ++ $values{$value} }
b39c5158Smillert            else
b39c5158Smillert                { push (@values, $value) }
b39c5158Smillert        }
b39c5158Smillert        else
b39c5158Smillert            { ++ $counter }
b39c5158Smillert
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    return ($wantarray ? ($flag ? %values : @values) : $counter) ;
b39c5158Smillert}
b39c5158Smillert
b39c5158Smillert
898184e3Ssthensub STORABLE_freeze
898184e3Ssthen{
898184e3Ssthen    my $type = ref shift;
898184e3Ssthen    croak "Cannot freeze $type object\n";
898184e3Ssthen}
898184e3Ssthen
898184e3Ssthensub STORABLE_thaw
898184e3Ssthen{
898184e3Ssthen    my $type = ref shift;
898184e3Ssthen    croak "Cannot thaw $type object\n";
898184e3Ssthen}
898184e3Ssthen
898184e3Ssthen
898184e3Ssthen
b39c5158Smillert1;
b39c5158Smillert__END__
b39c5158Smillert
b39c5158Smillert=head1 NAME
b39c5158Smillert
b39c5158SmillertDB_File - Perl5 access to Berkeley DB version 1.x
b39c5158Smillert
b39c5158Smillert=head1 SYNOPSIS
b39c5158Smillert
b39c5158Smillert use DB_File;
b39c5158Smillert
b39c5158Smillert [$X =] tie %hash,  'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
b39c5158Smillert [$X =] tie %hash,  'DB_File', $filename, $flags, $mode, $DB_BTREE ;
b39c5158Smillert [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
b39c5158Smillert
b39c5158Smillert $status = $X->del($key [, $flags]) ;
b39c5158Smillert $status = $X->put($key, $value [, $flags]) ;
b39c5158Smillert $status = $X->get($key, $value [, $flags]) ;
b39c5158Smillert $status = $X->seq($key, $value, $flags) ;
b39c5158Smillert $status = $X->sync([$flags]) ;
b39c5158Smillert $status = $X->fd ;
b39c5158Smillert
b39c5158Smillert # BTREE only
b39c5158Smillert $count = $X->get_dup($key) ;
b39c5158Smillert @list  = $X->get_dup($key) ;
b39c5158Smillert %list  = $X->get_dup($key, 1) ;
b39c5158Smillert $status = $X->find_dup($key, $value) ;
b39c5158Smillert $status = $X->del_dup($key, $value) ;
b39c5158Smillert
b39c5158Smillert # RECNO only
b39c5158Smillert $a = $X->length;
b39c5158Smillert $a = $X->pop ;
b39c5158Smillert $X->push(list);
b39c5158Smillert $a = $X->shift;
b39c5158Smillert $X->unshift(list);
b39c5158Smillert @r = $X->splice(offset, length, elements);
b39c5158Smillert
b39c5158Smillert # DBM Filters
b39c5158Smillert $old_filter = $db->filter_store_key  ( sub { ... } ) ;
b39c5158Smillert $old_filter = $db->filter_store_value( sub { ... } ) ;
b39c5158Smillert $old_filter = $db->filter_fetch_key  ( sub { ... } ) ;
b39c5158Smillert $old_filter = $db->filter_fetch_value( sub { ... } ) ;
b39c5158Smillert
b39c5158Smillert untie %hash ;
b39c5158Smillert untie @array ;
b39c5158Smillert
b39c5158Smillert=head1 DESCRIPTION
b39c5158Smillert
b39c5158SmillertB<DB_File> is a module which allows Perl programs to make use of the
b39c5158Smillertfacilities provided by Berkeley DB version 1.x (if you have a newer
b39c5158Smillertversion of DB, see L<Using DB_File with Berkeley DB version 2 or greater>).
b39c5158SmillertIt is assumed that you have a copy of the Berkeley DB manual pages at
b39c5158Smillerthand when reading this documentation. The interface defined here
b39c5158Smillertmirrors the Berkeley DB interface closely.
b39c5158Smillert
b39c5158SmillertBerkeley DB is a C library which provides a consistent interface to a
b39c5158Smillertnumber of database formats.  B<DB_File> provides an interface to all
b39c5158Smillertthree of the database types currently supported by Berkeley DB.
b39c5158Smillert
b39c5158SmillertThe file types are:
b39c5158Smillert
b39c5158Smillert=over 5
b39c5158Smillert
b39c5158Smillert=item B<DB_HASH>
b39c5158Smillert
b39c5158SmillertThis database type allows arbitrary key/value pairs to be stored in data
b39c5158Smillertfiles. This is equivalent to the functionality provided by other
b39c5158Smillerthashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
b39c5158Smillertthe files created using DB_HASH are not compatible with any of the
b39c5158Smillertother packages mentioned.
b39c5158Smillert
b39c5158SmillertA default hashing algorithm, which will be adequate for most
b39c5158Smillertapplications, is built into Berkeley DB. If you do need to use your own
b39c5158Smillerthashing algorithm it is possible to write your own in Perl and have
b39c5158SmillertB<DB_File> use it instead.
b39c5158Smillert
b39c5158Smillert=item B<DB_BTREE>
b39c5158Smillert
b39c5158SmillertThe btree format allows arbitrary key/value pairs to be stored in a
b39c5158Smillertsorted, balanced binary tree.
b39c5158Smillert
b39c5158SmillertAs with the DB_HASH format, it is possible to provide a user defined
b39c5158SmillertPerl routine to perform the comparison of keys. By default, though, the
b39c5158Smillertkeys are stored in lexical order.
b39c5158Smillert
b39c5158Smillert=item B<DB_RECNO>
b39c5158Smillert
b39c5158SmillertDB_RECNO allows both fixed-length and variable-length flat text files
b39c5158Smillertto be manipulated using the same key/value pair interface as in DB_HASH
b39c5158Smillertand DB_BTREE.  In this case the key will consist of a record (line)
b39c5158Smillertnumber.
b39c5158Smillert
b39c5158Smillert=back
b39c5158Smillert
b39c5158Smillert=head2 Using DB_File with Berkeley DB version 2 or greater
b39c5158Smillert
b39c5158SmillertAlthough B<DB_File> is intended to be used with Berkeley DB version 1,
b39c5158Smillertit can also be used with version 2, 3 or 4. In this case the interface is
b39c5158Smillertlimited to the functionality provided by Berkeley DB 1.x. Anywhere the
b39c5158Smillertversion 2 or greater interface differs, B<DB_File> arranges for it to work
b39c5158Smillertlike version 1. This feature allows B<DB_File> scripts that were built
b39c5158Smillertwith version 1 to be migrated to version 2 or greater without any changes.
b39c5158Smillert
b39c5158SmillertIf you want to make use of the new features available in Berkeley DB
56d68f1eSafresh12.x or greater, use the Perl module L<BerkeleyDB|https://metacpan.org/pod/BerkeleyDB> instead.
b39c5158Smillert
b39c5158SmillertB<Note:> The database file format has changed multiple times in Berkeley
b39c5158SmillertDB version 2, 3 and 4. If you cannot recreate your databases, you
b39c5158Smillertmust dump any existing databases with either the C<db_dump> or the
b39c5158SmillertC<db_dump185> utility that comes with Berkeley DB.
b39c5158SmillertOnce you have rebuilt DB_File to use Berkeley DB version 2 or greater,
b39c5158Smillertyour databases can be recreated using C<db_load>. Refer to the Berkeley DB
b39c5158Smillertdocumentation for further details.
b39c5158Smillert
b39c5158SmillertPlease read L<"COPYRIGHT"> before using version 2.x or greater of Berkeley
b39c5158SmillertDB with DB_File.
b39c5158Smillert
b39c5158Smillert=head2 Interface to Berkeley DB
b39c5158Smillert
b39c5158SmillertB<DB_File> allows access to Berkeley DB files using the tie() mechanism
b39c5158Smillertin Perl 5 (for full details, see L<perlfunc/tie()>). This facility
b39c5158Smillertallows B<DB_File> to access Berkeley DB files using either an
b39c5158Smillertassociative array (for DB_HASH & DB_BTREE file types) or an ordinary
b39c5158Smillertarray (for the DB_RECNO file type).
b39c5158Smillert
b39c5158SmillertIn addition to the tie() interface, it is also possible to access most
b39c5158Smillertof the functions provided in the Berkeley DB API directly.
b39c5158SmillertSee L<THE API INTERFACE>.
b39c5158Smillert
b39c5158Smillert=head2 Opening a Berkeley DB Database File
b39c5158Smillert
b39c5158SmillertBerkeley DB uses the function dbopen() to open or create a database.
b39c5158SmillertHere is the C prototype for dbopen():
b39c5158Smillert
b39c5158Smillert      DB*
b39c5158Smillert      dbopen (const char * file, int flags, int mode,
b39c5158Smillert              DBTYPE type, const void * openinfo)
b39c5158Smillert
b39c5158SmillertThe parameter C<type> is an enumeration which specifies which of the 3
b39c5158Smillertinterface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
b39c5158SmillertDepending on which of these is actually chosen, the final parameter,
b39c5158SmillertI<openinfo> points to a data structure which allows tailoring of the
b39c5158Smillertspecific interface method.
b39c5158Smillert
b39c5158SmillertThis interface is handled slightly differently in B<DB_File>. Here is
b39c5158Smillertan equivalent call using B<DB_File>:
b39c5158Smillert
b39c5158Smillert        tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
b39c5158Smillert
b39c5158SmillertThe C<filename>, C<flags> and C<mode> parameters are the direct
b39c5158Smillertequivalent of their dbopen() counterparts. The final parameter $DB_HASH
b39c5158Smillertperforms the function of both the C<type> and C<openinfo> parameters in
b39c5158Smillertdbopen().
b39c5158Smillert
b39c5158SmillertIn the example above $DB_HASH is actually a pre-defined reference to a
b39c5158Smillerthash object. B<DB_File> has three of these pre-defined references.
b39c5158SmillertApart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
b39c5158Smillert
b39c5158SmillertThe keys allowed in each of these pre-defined references is limited to
b39c5158Smillertthe names used in the equivalent C structure. So, for example, the
b39c5158Smillert$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
b39c5158SmillertC<ffactor>, C<hash>, C<lorder> and C<nelem>.
b39c5158Smillert
b39c5158SmillertTo change one of these elements, just assign to it like this:
b39c5158Smillert
b39c5158Smillert        $DB_HASH->{'cachesize'} = 10000 ;
b39c5158Smillert
b39c5158SmillertThe three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
b39c5158Smillertusually adequate for most applications.  If you do need to create extra
b39c5158Smillertinstances of these objects, constructors are available for each file
b39c5158Smillerttype.
b39c5158Smillert
b39c5158SmillertHere are examples of the constructors and the valid options available
b39c5158Smillertfor DB_HASH, DB_BTREE and DB_RECNO respectively.
b39c5158Smillert
eac174f2Safresh1     $a = DB_File::HASHINFO->new();
b39c5158Smillert     $a->{'bsize'} ;
b39c5158Smillert     $a->{'cachesize'} ;
b39c5158Smillert     $a->{'ffactor'};
b39c5158Smillert     $a->{'hash'} ;
b39c5158Smillert     $a->{'lorder'} ;
b39c5158Smillert     $a->{'nelem'} ;
b39c5158Smillert
eac174f2Safresh1     $b = DB_File::BTREEINFO->new();
b39c5158Smillert     $b->{'flags'} ;
b39c5158Smillert     $b->{'cachesize'} ;
b39c5158Smillert     $b->{'maxkeypage'} ;
b39c5158Smillert     $b->{'minkeypage'} ;
b39c5158Smillert     $b->{'psize'} ;
b39c5158Smillert     $b->{'compare'} ;
b39c5158Smillert     $b->{'prefix'} ;
b39c5158Smillert     $b->{'lorder'} ;
b39c5158Smillert
eac174f2Safresh1     $c = DB_File::RECNOINFO->new();
b39c5158Smillert     $c->{'bval'} ;
b39c5158Smillert     $c->{'cachesize'} ;
b39c5158Smillert     $c->{'psize'} ;
b39c5158Smillert     $c->{'flags'} ;
b39c5158Smillert     $c->{'lorder'} ;
b39c5158Smillert     $c->{'reclen'} ;
b39c5158Smillert     $c->{'bfname'} ;
b39c5158Smillert
b39c5158SmillertThe values stored in the hashes above are mostly the direct equivalent
b39c5158Smillertof their C counterpart. Like their C counterparts, all are set to a
b39c5158Smillertdefault values - that means you don't have to set I<all> of the
b39c5158Smillertvalues when you only want to change one. Here is an example:
b39c5158Smillert
eac174f2Safresh1     $a = DB_File::HASHINFO->new();
b39c5158Smillert     $a->{'cachesize'} =  12345 ;
b39c5158Smillert     tie %y, 'DB_File', "filename", $flags, 0777, $a ;
b39c5158Smillert
b39c5158SmillertA few of the options need extra discussion here. When used, the C
b39c5158Smillertequivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
b39c5158Smillertto C functions. In B<DB_File> these keys are used to store references
b39c5158Smillertto Perl subs. Below are templates for each of the subs:
b39c5158Smillert
b39c5158Smillert    sub hash
b39c5158Smillert    {
b39c5158Smillert        my ($data) = @_ ;
b39c5158Smillert        ...
b39c5158Smillert        # return the hash value for $data
b39c5158Smillert        return $hash ;
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    sub compare
b39c5158Smillert    {
b39c5158Smillert        my ($key, $key2) = @_ ;
b39c5158Smillert        ...
b39c5158Smillert        # return  0 if $key1 eq $key2
b39c5158Smillert        #        -1 if $key1 lt $key2
b39c5158Smillert        #         1 if $key1 gt $key2
b39c5158Smillert        return (-1 , 0 or 1) ;
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    sub prefix
b39c5158Smillert    {
b39c5158Smillert        my ($key, $key2) = @_ ;
b39c5158Smillert        ...
b39c5158Smillert        # return number of bytes of $key2 which are
b39c5158Smillert        # necessary to determine that it is greater than $key1
b39c5158Smillert        return $bytes ;
b39c5158Smillert    }
b39c5158Smillert
b39c5158SmillertSee L<Changing the BTREE sort order> for an example of using the
b39c5158SmillertC<compare> template.
b39c5158Smillert
b39c5158SmillertIf you are using the DB_RECNO interface and you intend making use of
b39c5158SmillertC<bval>, you should check out L<The 'bval' Option>.
b39c5158Smillert
b39c5158Smillert=head2 Default Parameters
b39c5158Smillert
b39c5158SmillertIt is possible to omit some or all of the final 4 parameters in the
b39c5158Smillertcall to C<tie> and let them take default values. As DB_HASH is the most
b39c5158Smillertcommon file format used, the call:
b39c5158Smillert
b39c5158Smillert    tie %A, "DB_File", "filename" ;
b39c5158Smillert
b39c5158Smillertis equivalent to:
b39c5158Smillert
b39c5158Smillert    tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
b39c5158Smillert
b39c5158SmillertIt is also possible to omit the filename parameter as well, so the
b39c5158Smillertcall:
b39c5158Smillert
b39c5158Smillert    tie %A, "DB_File" ;
b39c5158Smillert
b39c5158Smillertis equivalent to:
b39c5158Smillert
b39c5158Smillert    tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
b39c5158Smillert
b39c5158SmillertSee L<In Memory Databases> for a discussion on the use of C<undef>
b39c5158Smillertin place of a filename.
b39c5158Smillert
b39c5158Smillert=head2 In Memory Databases
b39c5158Smillert
b39c5158SmillertBerkeley DB allows the creation of in-memory databases by using NULL
b39c5158Smillert(that is, a C<(char *)0> in C) in place of the filename.  B<DB_File>
b39c5158Smillertuses C<undef> instead of NULL to provide this functionality.
b39c5158Smillert
b39c5158Smillert=head1 DB_HASH
b39c5158Smillert
b39c5158SmillertThe DB_HASH file format is probably the most commonly used of the three
b39c5158Smillertfile formats that B<DB_File> supports. It is also very straightforward
b39c5158Smillertto use.
b39c5158Smillert
b39c5158Smillert=head2 A Simple Example
b39c5158Smillert
b39c5158SmillertThis example shows how to create a database, add key/value pairs to the
b39c5158Smillertdatabase, delete keys/value pairs and finally how to enumerate the
b39c5158Smillertcontents of the database.
b39c5158Smillert
b39c5158Smillert    use warnings ;
b39c5158Smillert    use strict ;
b39c5158Smillert    use DB_File ;
b39c5158Smillert    our (%h, $k, $v) ;
b39c5158Smillert
b39c5158Smillert    unlink "fruit" ;
b39c5158Smillert    tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0666, $DB_HASH
b39c5158Smillert        or die "Cannot open file 'fruit': $!\n";
b39c5158Smillert
b39c5158Smillert    # Add a few key/value pairs to the file
b39c5158Smillert    $h{"apple"} = "red" ;
b39c5158Smillert    $h{"orange"} = "orange" ;
b39c5158Smillert    $h{"banana"} = "yellow" ;
b39c5158Smillert    $h{"tomato"} = "red" ;
b39c5158Smillert
b39c5158Smillert    # Check for existence of a key
b39c5158Smillert    print "Banana Exists\n\n" if $h{"banana"} ;
b39c5158Smillert
b39c5158Smillert    # Delete a key/value pair.
b39c5158Smillert    delete $h{"apple"} ;
b39c5158Smillert
b39c5158Smillert    # print the contents of the file
b39c5158Smillert    while (($k, $v) = each %h)
b39c5158Smillert      { print "$k -> $v\n" }
b39c5158Smillert
b39c5158Smillert    untie %h ;
b39c5158Smillert
b39c5158Smillerthere is the output:
b39c5158Smillert
b39c5158Smillert    Banana Exists
b39c5158Smillert
b39c5158Smillert    orange -> orange
b39c5158Smillert    tomato -> red
b39c5158Smillert    banana -> yellow
b39c5158Smillert
b39c5158SmillertNote that the like ordinary associative arrays, the order of the keys
b39c5158Smillertretrieved is in an apparently random order.
b39c5158Smillert
b39c5158Smillert=head1 DB_BTREE
b39c5158Smillert
b39c5158SmillertThe DB_BTREE format is useful when you want to store data in a given
b39c5158Smillertorder. By default the keys will be stored in lexical order, but as you
b39c5158Smillertwill see from the example shown in the next section, it is very easy to
b39c5158Smillertdefine your own sorting function.
b39c5158Smillert
b39c5158Smillert=head2 Changing the BTREE sort order
b39c5158Smillert
b39c5158SmillertThis script shows how to override the default sorting algorithm that
b39c5158SmillertBTREE uses. Instead of using the normal lexical ordering, a case
b39c5158Smillertinsensitive compare function will be used.
b39c5158Smillert
b39c5158Smillert    use warnings ;
b39c5158Smillert    use strict ;
b39c5158Smillert    use DB_File ;
b39c5158Smillert
b39c5158Smillert    my %h ;
b39c5158Smillert
b39c5158Smillert    sub Compare
b39c5158Smillert    {
b39c5158Smillert        my ($key1, $key2) = @_ ;
b39c5158Smillert        "\L$key1" cmp "\L$key2" ;
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    # specify the Perl sub that will do the comparison
b39c5158Smillert    $DB_BTREE->{'compare'} = \&Compare ;
b39c5158Smillert
b39c5158Smillert    unlink "tree" ;
b39c5158Smillert    tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0666, $DB_BTREE
b39c5158Smillert        or die "Cannot open file 'tree': $!\n" ;
b39c5158Smillert
b39c5158Smillert    # Add a key/value pair to the file
b39c5158Smillert    $h{'Wall'} = 'Larry' ;
b39c5158Smillert    $h{'Smith'} = 'John' ;
b39c5158Smillert    $h{'mouse'} = 'mickey' ;
b39c5158Smillert    $h{'duck'}  = 'donald' ;
b39c5158Smillert
b39c5158Smillert    # Delete
b39c5158Smillert    delete $h{"duck"} ;
b39c5158Smillert
b39c5158Smillert    # Cycle through the keys printing them in order.
b39c5158Smillert    # Note it is not necessary to sort the keys as
b39c5158Smillert    # the btree will have kept them in order automatically.
b39c5158Smillert    foreach (keys %h)
b39c5158Smillert      { print "$_\n" }
b39c5158Smillert
b39c5158Smillert    untie %h ;
b39c5158Smillert
b39c5158SmillertHere is the output from the code above.
b39c5158Smillert
b39c5158Smillert    mouse
b39c5158Smillert    Smith
b39c5158Smillert    Wall
b39c5158Smillert
b39c5158SmillertThere are a few point to bear in mind if you want to change the
b39c5158Smillertordering in a BTREE database:
b39c5158Smillert
b39c5158Smillert=over 5
b39c5158Smillert
b39c5158Smillert=item 1.
b39c5158Smillert
b39c5158SmillertThe new compare function must be specified when you create the database.
b39c5158Smillert
b39c5158Smillert=item 2.
b39c5158Smillert
b39c5158SmillertYou cannot change the ordering once the database has been created. Thus
b39c5158Smillertyou must use the same compare function every time you access the
b39c5158Smillertdatabase.
b39c5158Smillert
b39c5158Smillert=item 3
b39c5158Smillert
b39c5158SmillertDuplicate keys are entirely defined by the comparison function.
b39c5158SmillertIn the case-insensitive example above, the keys: 'KEY' and 'key'
b39c5158Smillertwould be considered duplicates, and assigning to the second one
b39c5158Smillertwould overwrite the first. If duplicates are allowed for (with the
b39c5158SmillertR_DUP flag discussed below), only a single copy of duplicate keys
b39c5158Smillertis stored in the database --- so (again with example above) assigning
b39c5158Smillertthree values to the keys: 'KEY', 'Key', and 'key' would leave just
b39c5158Smillertthe first key: 'KEY' in the database with three values. For some
b39c5158Smillertsituations this results in information loss, so care should be taken
b39c5158Smillertto provide fully qualified comparison functions when necessary.
b39c5158SmillertFor example, the above comparison routine could be modified to
b39c5158Smillertadditionally compare case-sensitively if two keys are equal in the
b39c5158Smillertcase insensitive comparison:
b39c5158Smillert
b39c5158Smillert    sub compare {
b39c5158Smillert        my($key1, $key2) = @_;
b39c5158Smillert        lc $key1 cmp lc $key2 ||
b39c5158Smillert        $key1 cmp $key2;
b39c5158Smillert    }
b39c5158Smillert
b39c5158SmillertAnd now you will only have duplicates when the keys themselves
b39c5158Smillertare truly the same. (note: in versions of the db library prior to
b39c5158Smillertabout November 1996, such duplicate keys were retained so it was
b39c5158Smillertpossible to recover the original keys in sets of keys that
b39c5158Smillertcompared as equal).
b39c5158Smillert
b39c5158Smillert
b39c5158Smillert=back
b39c5158Smillert
b39c5158Smillert=head2 Handling Duplicate Keys
b39c5158Smillert
b39c5158SmillertThe BTREE file type optionally allows a single key to be associated
b39c5158Smillertwith an arbitrary number of values. This option is enabled by setting
b39c5158Smillertthe flags element of C<$DB_BTREE> to R_DUP when creating the database.
b39c5158Smillert
b39c5158SmillertThere are some difficulties in using the tied hash interface if you
b39c5158Smillertwant to manipulate a BTREE database with duplicate keys. Consider this
b39c5158Smillertcode:
b39c5158Smillert
b39c5158Smillert    use warnings ;
b39c5158Smillert    use strict ;
b39c5158Smillert    use DB_File ;
b39c5158Smillert
b39c5158Smillert    my ($filename, %h) ;
b39c5158Smillert
b39c5158Smillert    $filename = "tree" ;
b39c5158Smillert    unlink $filename ;
b39c5158Smillert
b39c5158Smillert    # Enable duplicate records
b39c5158Smillert    $DB_BTREE->{'flags'} = R_DUP ;
b39c5158Smillert
b39c5158Smillert    tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
b39c5158Smillert        or die "Cannot open $filename: $!\n";
b39c5158Smillert
b39c5158Smillert    # Add some key/value pairs to the file
b39c5158Smillert    $h{'Wall'} = 'Larry' ;
b39c5158Smillert    $h{'Wall'} = 'Brick' ; # Note the duplicate key
b39c5158Smillert    $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
b39c5158Smillert    $h{'Smith'} = 'John' ;
b39c5158Smillert    $h{'mouse'} = 'mickey' ;
b39c5158Smillert
b39c5158Smillert    # iterate through the associative array
b39c5158Smillert    # and print each key/value pair.
b39c5158Smillert    foreach (sort keys %h)
b39c5158Smillert      { print "$_  -> $h{$_}\n" }
b39c5158Smillert
b39c5158Smillert    untie %h ;
b39c5158Smillert
b39c5158SmillertHere is the output:
b39c5158Smillert
b39c5158Smillert    Smith   -> John
b39c5158Smillert    Wall    -> Larry
b39c5158Smillert    Wall    -> Larry
b39c5158Smillert    Wall    -> Larry
b39c5158Smillert    mouse   -> mickey
b39c5158Smillert
b39c5158SmillertAs you can see 3 records have been successfully created with key C<Wall>
b39c5158Smillert- the only thing is, when they are retrieved from the database they
b39c5158SmillertI<seem> to have the same value, namely C<Larry>. The problem is caused
b39c5158Smillertby the way that the associative array interface works. Basically, when
b39c5158Smillertthe associative array interface is used to fetch the value associated
b39c5158Smillertwith a given key, it will only ever retrieve the first value.
b39c5158Smillert
b39c5158SmillertAlthough it may not be immediately obvious from the code above, the
b39c5158Smillertassociative array interface can be used to write values with duplicate
b39c5158Smillertkeys, but it cannot be used to read them back from the database.
b39c5158Smillert
b39c5158SmillertThe way to get around this problem is to use the Berkeley DB API method
b39c5158Smillertcalled C<seq>.  This method allows sequential access to key/value
b39c5158Smillertpairs. See L<THE API INTERFACE> for details of both the C<seq> method
b39c5158Smillertand the API in general.
b39c5158Smillert
b39c5158SmillertHere is the script above rewritten using the C<seq> API method.
b39c5158Smillert
b39c5158Smillert    use warnings ;
b39c5158Smillert    use strict ;
b39c5158Smillert    use DB_File ;
b39c5158Smillert
b39c5158Smillert    my ($filename, $x, %h, $status, $key, $value) ;
b39c5158Smillert
b39c5158Smillert    $filename = "tree" ;
b39c5158Smillert    unlink $filename ;
b39c5158Smillert
b39c5158Smillert    # Enable duplicate records
b39c5158Smillert    $DB_BTREE->{'flags'} = R_DUP ;
b39c5158Smillert
b39c5158Smillert    $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
b39c5158Smillert        or die "Cannot open $filename: $!\n";
b39c5158Smillert
b39c5158Smillert    # Add some key/value pairs to the file
b39c5158Smillert    $h{'Wall'} = 'Larry' ;
b39c5158Smillert    $h{'Wall'} = 'Brick' ; # Note the duplicate key
b39c5158Smillert    $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
b39c5158Smillert    $h{'Smith'} = 'John' ;
b39c5158Smillert    $h{'mouse'} = 'mickey' ;
b39c5158Smillert
b39c5158Smillert    # iterate through the btree using seq
b39c5158Smillert    # and print each key/value pair.
b39c5158Smillert    $key = $value = 0 ;
b39c5158Smillert    for ($status = $x->seq($key, $value, R_FIRST) ;
b39c5158Smillert         $status == 0 ;
b39c5158Smillert         $status = $x->seq($key, $value, R_NEXT) )
b39c5158Smillert      {  print "$key -> $value\n" }
b39c5158Smillert
b39c5158Smillert    undef $x ;
b39c5158Smillert    untie %h ;
b39c5158Smillert
b39c5158Smillertthat prints:
b39c5158Smillert
b39c5158Smillert    Smith   -> John
b39c5158Smillert    Wall    -> Brick
b39c5158Smillert    Wall    -> Brick
b39c5158Smillert    Wall    -> Larry
b39c5158Smillert    mouse   -> mickey
b39c5158Smillert
b39c5158SmillertThis time we have got all the key/value pairs, including the multiple
b39c5158Smillertvalues associated with the key C<Wall>.
b39c5158Smillert
b39c5158SmillertTo make life easier when dealing with duplicate keys, B<DB_File> comes with
b39c5158Smillerta few utility methods.
b39c5158Smillert
b39c5158Smillert=head2 The get_dup() Method
b39c5158Smillert
b39c5158SmillertThe C<get_dup> method assists in
b39c5158Smillertreading duplicate values from BTREE databases. The method can take the
b39c5158Smillertfollowing forms:
b39c5158Smillert
b39c5158Smillert    $count = $x->get_dup($key) ;
b39c5158Smillert    @list  = $x->get_dup($key) ;
b39c5158Smillert    %list  = $x->get_dup($key, 1) ;
b39c5158Smillert
b39c5158SmillertIn a scalar context the method returns the number of values associated
b39c5158Smillertwith the key, C<$key>.
b39c5158Smillert
b39c5158SmillertIn list context, it returns all the values which match C<$key>. Note
b39c5158Smillertthat the values will be returned in an apparently random order.
b39c5158Smillert
b39c5158SmillertIn list context, if the second parameter is present and evaluates
b39c5158SmillertTRUE, the method returns an associative array. The keys of the
b39c5158Smillertassociative array correspond to the values that matched in the BTREE
b39c5158Smillertand the values of the array are a count of the number of times that
b39c5158Smillertparticular value occurred in the BTREE.
b39c5158Smillert
b39c5158SmillertSo assuming the database created above, we can use C<get_dup> like
b39c5158Smillertthis:
b39c5158Smillert
b39c5158Smillert    use warnings ;
b39c5158Smillert    use strict ;
b39c5158Smillert    use DB_File ;
b39c5158Smillert
b39c5158Smillert    my ($filename, $x, %h) ;
b39c5158Smillert
b39c5158Smillert    $filename = "tree" ;
b39c5158Smillert
b39c5158Smillert    # Enable duplicate records
b39c5158Smillert    $DB_BTREE->{'flags'} = R_DUP ;
b39c5158Smillert
b39c5158Smillert    $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
b39c5158Smillert        or die "Cannot open $filename: $!\n";
b39c5158Smillert
b39c5158Smillert    my $cnt  = $x->get_dup("Wall") ;
b39c5158Smillert    print "Wall occurred $cnt times\n" ;
b39c5158Smillert
b39c5158Smillert    my %hash = $x->get_dup("Wall", 1) ;
b39c5158Smillert    print "Larry is there\n" if $hash{'Larry'} ;
b39c5158Smillert    print "There are $hash{'Brick'} Brick Walls\n" ;
b39c5158Smillert
b39c5158Smillert    my @list = sort $x->get_dup("Wall") ;
b39c5158Smillert    print "Wall =>      [@list]\n" ;
b39c5158Smillert
b39c5158Smillert    @list = $x->get_dup("Smith") ;
b39c5158Smillert    print "Smith =>     [@list]\n" ;
b39c5158Smillert
b39c5158Smillert    @list = $x->get_dup("Dog") ;
b39c5158Smillert    print "Dog =>       [@list]\n" ;
b39c5158Smillert
b39c5158Smillert
b39c5158Smillertand it will print:
b39c5158Smillert
b39c5158Smillert    Wall occurred 3 times
b39c5158Smillert    Larry is there
b39c5158Smillert    There are 2 Brick Walls
b39c5158Smillert    Wall =>     [Brick Brick Larry]
b39c5158Smillert    Smith =>    [John]
b39c5158Smillert    Dog =>      []
b39c5158Smillert
b39c5158Smillert=head2 The find_dup() Method
b39c5158Smillert
b39c5158Smillert    $status = $X->find_dup($key, $value) ;
b39c5158Smillert
b39c5158SmillertThis method checks for the existence of a specific key/value pair. If the
b39c5158Smillertpair exists, the cursor is left pointing to the pair and the method
b39c5158Smillertreturns 0. Otherwise the method returns a non-zero value.
b39c5158Smillert
b39c5158SmillertAssuming the database from the previous example:
b39c5158Smillert
b39c5158Smillert    use warnings ;
b39c5158Smillert    use strict ;
b39c5158Smillert    use DB_File ;
b39c5158Smillert
b39c5158Smillert    my ($filename, $x, %h, $found) ;
b39c5158Smillert
b39c5158Smillert    $filename = "tree" ;
b39c5158Smillert
b39c5158Smillert    # Enable duplicate records
b39c5158Smillert    $DB_BTREE->{'flags'} = R_DUP ;
b39c5158Smillert
b39c5158Smillert    $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
b39c5158Smillert        or die "Cannot open $filename: $!\n";
b39c5158Smillert
b39c5158Smillert    $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
b39c5158Smillert    print "Larry Wall is $found there\n" ;
b39c5158Smillert
b39c5158Smillert    $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
b39c5158Smillert    print "Harry Wall is $found there\n" ;
b39c5158Smillert
b39c5158Smillert    undef $x ;
b39c5158Smillert    untie %h ;
b39c5158Smillert
b39c5158Smillertprints this
b39c5158Smillert
b39c5158Smillert    Larry Wall is  there
b39c5158Smillert    Harry Wall is not there
b39c5158Smillert
b39c5158Smillert
b39c5158Smillert=head2 The del_dup() Method
b39c5158Smillert
b39c5158Smillert    $status = $X->del_dup($key, $value) ;
b39c5158Smillert
b39c5158SmillertThis method deletes a specific key/value pair. It returns
b39c5158Smillert0 if they exist and have been deleted successfully.
b39c5158SmillertOtherwise the method returns a non-zero value.
b39c5158Smillert
b39c5158SmillertAgain assuming the existence of the C<tree> database
b39c5158Smillert
b39c5158Smillert    use warnings ;
b39c5158Smillert    use strict ;
b39c5158Smillert    use DB_File ;
b39c5158Smillert
b39c5158Smillert    my ($filename, $x, %h, $found) ;
b39c5158Smillert
b39c5158Smillert    $filename = "tree" ;
b39c5158Smillert
b39c5158Smillert    # Enable duplicate records
b39c5158Smillert    $DB_BTREE->{'flags'} = R_DUP ;
b39c5158Smillert
b39c5158Smillert    $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
b39c5158Smillert        or die "Cannot open $filename: $!\n";
b39c5158Smillert
b39c5158Smillert    $x->del_dup("Wall", "Larry") ;
b39c5158Smillert
b39c5158Smillert    $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
b39c5158Smillert    print "Larry Wall is $found there\n" ;
b39c5158Smillert
b39c5158Smillert    undef $x ;
b39c5158Smillert    untie %h ;
b39c5158Smillert
b39c5158Smillertprints this
b39c5158Smillert
b39c5158Smillert    Larry Wall is not there
b39c5158Smillert
b39c5158Smillert=head2 Matching Partial Keys
b39c5158Smillert
b39c5158SmillertThe BTREE interface has a feature which allows partial keys to be
b39c5158Smillertmatched. This functionality is I<only> available when the C<seq> method
b39c5158Smillertis used along with the R_CURSOR flag.
b39c5158Smillert
b39c5158Smillert    $x->seq($key, $value, R_CURSOR) ;
b39c5158Smillert
b39c5158SmillertHere is the relevant quote from the dbopen man page where it defines
b39c5158Smillertthe use of the R_CURSOR flag with seq:
b39c5158Smillert
b39c5158Smillert    Note, for the DB_BTREE access method, the returned key is not
b39c5158Smillert    necessarily an exact match for the specified key. The returned key
b39c5158Smillert    is the smallest key greater than or equal to the specified key,
b39c5158Smillert    permitting partial key matches and range searches.
b39c5158Smillert
b39c5158SmillertIn the example script below, the C<match> sub uses this feature to find
b39c5158Smillertand print the first matching key/value pair given a partial key.
b39c5158Smillert
b39c5158Smillert    use warnings ;
b39c5158Smillert    use strict ;
b39c5158Smillert    use DB_File ;
b39c5158Smillert    use Fcntl ;
b39c5158Smillert
b39c5158Smillert    my ($filename, $x, %h, $st, $key, $value) ;
b39c5158Smillert
b39c5158Smillert    sub match
b39c5158Smillert    {
b39c5158Smillert        my $key = shift ;
b39c5158Smillert        my $value = 0;
b39c5158Smillert        my $orig_key = $key ;
b39c5158Smillert        $x->seq($key, $value, R_CURSOR) ;
b39c5158Smillert        print "$orig_key\t-> $key\t-> $value\n" ;
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    $filename = "tree" ;
b39c5158Smillert    unlink $filename ;
b39c5158Smillert
b39c5158Smillert    $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
b39c5158Smillert        or die "Cannot open $filename: $!\n";
b39c5158Smillert
b39c5158Smillert    # Add some key/value pairs to the file
b39c5158Smillert    $h{'mouse'} = 'mickey' ;
b39c5158Smillert    $h{'Wall'} = 'Larry' ;
b39c5158Smillert    $h{'Walls'} = 'Brick' ;
b39c5158Smillert    $h{'Smith'} = 'John' ;
b39c5158Smillert
b39c5158Smillert
b39c5158Smillert    $key = $value = 0 ;
b39c5158Smillert    print "IN ORDER\n" ;
b39c5158Smillert    for ($st = $x->seq($key, $value, R_FIRST) ;
b39c5158Smillert         $st == 0 ;
b39c5158Smillert         $st = $x->seq($key, $value, R_NEXT) )
b39c5158Smillert
b39c5158Smillert      {  print "$key    -> $value\n" }
b39c5158Smillert
b39c5158Smillert    print "\nPARTIAL MATCH\n" ;
b39c5158Smillert
b39c5158Smillert    match "Wa" ;
b39c5158Smillert    match "A" ;
b39c5158Smillert    match "a" ;
b39c5158Smillert
b39c5158Smillert    undef $x ;
b39c5158Smillert    untie %h ;
b39c5158Smillert
b39c5158SmillertHere is the output:
b39c5158Smillert
b39c5158Smillert    IN ORDER
b39c5158Smillert    Smith -> John
b39c5158Smillert    Wall  -> Larry
b39c5158Smillert    Walls -> Brick
b39c5158Smillert    mouse -> mickey
b39c5158Smillert
b39c5158Smillert    PARTIAL MATCH
b39c5158Smillert    Wa -> Wall  -> Larry
b39c5158Smillert    A  -> Smith -> John
b39c5158Smillert    a  -> mouse -> mickey
b39c5158Smillert
b39c5158Smillert=head1 DB_RECNO
b39c5158Smillert
b39c5158SmillertDB_RECNO provides an interface to flat text files. Both variable and
b39c5158Smillertfixed length records are supported.
b39c5158Smillert
b39c5158SmillertIn order to make RECNO more compatible with Perl, the array offset for
b39c5158Smillertall RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
b39c5158Smillert
b39c5158SmillertAs with normal Perl arrays, a RECNO array can be accessed using
b39c5158Smillertnegative indexes. The index -1 refers to the last element of the array,
b39c5158Smillert-2 the second last, and so on. Attempting to access an element before
b39c5158Smillertthe start of the array will raise a fatal run-time error.
b39c5158Smillert
b39c5158Smillert=head2 The 'bval' Option
b39c5158Smillert
b39c5158SmillertThe operation of the bval option warrants some discussion. Here is the
b39c5158Smillertdefinition of bval from the Berkeley DB 1.85 recno manual page:
b39c5158Smillert
b39c5158Smillert    The delimiting byte to be used to mark  the  end  of  a
b39c5158Smillert    record for variable-length records, and the pad charac-
b39c5158Smillert    ter for fixed-length records.  If no  value  is  speci-
b39c5158Smillert    fied,  newlines  (``\n'')  are  used to mark the end of
b39c5158Smillert    variable-length records and  fixed-length  records  are
b39c5158Smillert    padded with spaces.
b39c5158Smillert
b39c5158SmillertThe second sentence is wrong. In actual fact bval will only default to
b39c5158SmillertC<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
b39c5158Smillertopeninfo parameter is used at all, the value that happens to be in bval
b39c5158Smillertwill be used. That means you always have to specify bval when making
b39c5158Smillertuse of any of the options in the openinfo parameter. This documentation
b39c5158Smillerterror will be fixed in the next release of Berkeley DB.
b39c5158Smillert
b39c5158SmillertThat clarifies the situation with regards Berkeley DB itself. What
b39c5158Smillertabout B<DB_File>? Well, the behavior defined in the quote above is
b39c5158Smillertquite useful, so B<DB_File> conforms to it.
b39c5158Smillert
b39c5158SmillertThat means that you can specify other options (e.g. cachesize) and
b39c5158Smillertstill have bval default to C<"\n"> for variable length records, and
b39c5158Smillertspace for fixed length records.
b39c5158Smillert
b39c5158SmillertAlso note that the bval option only allows you to specify a single byte
b39c5158Smillertas a delimiter.
b39c5158Smillert
b39c5158Smillert=head2 A Simple Example
b39c5158Smillert
b39c5158SmillertHere is a simple example that uses RECNO (if you are using a version
b39c5158Smillertof Perl earlier than 5.004_57 this example won't work -- see
b39c5158SmillertL<Extra RECNO Methods> for a workaround).
b39c5158Smillert
b39c5158Smillert    use warnings ;
b39c5158Smillert    use strict ;
b39c5158Smillert    use DB_File ;
b39c5158Smillert
b39c5158Smillert    my $filename = "text" ;
b39c5158Smillert    unlink $filename ;
b39c5158Smillert
b39c5158Smillert    my @h ;
b39c5158Smillert    tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_RECNO
b39c5158Smillert        or die "Cannot open file 'text': $!\n" ;
b39c5158Smillert
b39c5158Smillert    # Add a few key/value pairs to the file
b39c5158Smillert    $h[0] = "orange" ;
b39c5158Smillert    $h[1] = "blue" ;
b39c5158Smillert    $h[2] = "yellow" ;
b39c5158Smillert
b39c5158Smillert    push @h, "green", "black" ;
b39c5158Smillert
b39c5158Smillert    my $elements = scalar @h ;
b39c5158Smillert    print "The array contains $elements entries\n" ;
b39c5158Smillert
b39c5158Smillert    my $last = pop @h ;
b39c5158Smillert    print "popped $last\n" ;
b39c5158Smillert
b39c5158Smillert    unshift @h, "white" ;
b39c5158Smillert    my $first = shift @h ;
b39c5158Smillert    print "shifted $first\n" ;
b39c5158Smillert
b39c5158Smillert    # Check for existence of a key
b39c5158Smillert    print "Element 1 Exists with value $h[1]\n" if $h[1] ;
b39c5158Smillert
b39c5158Smillert    # use a negative index
b39c5158Smillert    print "The last element is $h[-1]\n" ;
b39c5158Smillert    print "The 2nd last element is $h[-2]\n" ;
b39c5158Smillert
b39c5158Smillert    untie @h ;
b39c5158Smillert
b39c5158SmillertHere is the output from the script:
b39c5158Smillert
b39c5158Smillert    The array contains 5 entries
b39c5158Smillert    popped black
b39c5158Smillert    shifted white
b39c5158Smillert    Element 1 Exists with value blue
b39c5158Smillert    The last element is green
b39c5158Smillert    The 2nd last element is yellow
b39c5158Smillert
b39c5158Smillert=head2 Extra RECNO Methods
b39c5158Smillert
b39c5158SmillertIf you are using a version of Perl earlier than 5.004_57, the tied
b39c5158Smillertarray interface is quite limited. In the example script above
b39c5158SmillertC<push>, C<pop>, C<shift>, C<unshift>
b39c5158Smillertor determining the array length will not work with a tied array.
b39c5158Smillert
b39c5158SmillertTo make the interface more useful for older versions of Perl, a number
b39c5158Smillertof methods are supplied with B<DB_File> to simulate the missing array
b39c5158Smillertoperations. All these methods are accessed via the object returned from
b39c5158Smillertthe tie call.
b39c5158Smillert
b39c5158SmillertHere are the methods:
b39c5158Smillert
b39c5158Smillert=over 5
b39c5158Smillert
b39c5158Smillert=item B<$X-E<gt>push(list) ;>
b39c5158Smillert
b39c5158SmillertPushes the elements of C<list> to the end of the array.
b39c5158Smillert
b39c5158Smillert=item B<$value = $X-E<gt>pop ;>
b39c5158Smillert
b39c5158SmillertRemoves and returns the last element of the array.
b39c5158Smillert
b39c5158Smillert=item B<$X-E<gt>shift>
b39c5158Smillert
b39c5158SmillertRemoves and returns the first element of the array.
b39c5158Smillert
b39c5158Smillert=item B<$X-E<gt>unshift(list) ;>
b39c5158Smillert
b39c5158SmillertPushes the elements of C<list> to the start of the array.
b39c5158Smillert
b39c5158Smillert=item B<$X-E<gt>length>
b39c5158Smillert
b39c5158SmillertReturns the number of elements in the array.
b39c5158Smillert
b39c5158Smillert=item B<$X-E<gt>splice(offset, length, elements);>
b39c5158Smillert
b39c5158SmillertReturns a splice of the array.
b39c5158Smillert
b39c5158Smillert=back
b39c5158Smillert
b39c5158Smillert=head2 Another Example
b39c5158Smillert
b39c5158SmillertHere is a more complete example that makes use of some of the methods
b39c5158Smillertdescribed above. It also makes use of the API interface directly (see
b39c5158SmillertL<THE API INTERFACE>).
b39c5158Smillert
b39c5158Smillert    use warnings ;
b39c5158Smillert    use strict ;
b39c5158Smillert    my (@h, $H, $file, $i) ;
b39c5158Smillert    use DB_File ;
b39c5158Smillert    use Fcntl ;
b39c5158Smillert
b39c5158Smillert    $file = "text" ;
b39c5158Smillert
b39c5158Smillert    unlink $file ;
b39c5158Smillert
b39c5158Smillert    $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0666, $DB_RECNO
b39c5158Smillert        or die "Cannot open file $file: $!\n" ;
b39c5158Smillert
b39c5158Smillert    # first create a text file to play with
b39c5158Smillert    $h[0] = "zero" ;
b39c5158Smillert    $h[1] = "one" ;
b39c5158Smillert    $h[2] = "two" ;
b39c5158Smillert    $h[3] = "three" ;
b39c5158Smillert    $h[4] = "four" ;
b39c5158Smillert
b39c5158Smillert
b39c5158Smillert    # Print the records in order.
b39c5158Smillert    #
b39c5158Smillert    # The length method is needed here because evaluating a tied
b39c5158Smillert    # array in a scalar context does not return the number of
b39c5158Smillert    # elements in the array.
b39c5158Smillert
b39c5158Smillert    print "\nORIGINAL\n" ;
b39c5158Smillert    foreach $i (0 .. $H->length - 1) {
b39c5158Smillert        print "$i: $h[$i]\n" ;
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    # use the push & pop methods
b39c5158Smillert    $a = $H->pop ;
b39c5158Smillert    $H->push("last") ;
b39c5158Smillert    print "\nThe last record was [$a]\n" ;
b39c5158Smillert
b39c5158Smillert    # and the shift & unshift methods
b39c5158Smillert    $a = $H->shift ;
b39c5158Smillert    $H->unshift("first") ;
b39c5158Smillert    print "The first record was [$a]\n" ;
b39c5158Smillert
b39c5158Smillert    # Use the API to add a new record after record 2.
b39c5158Smillert    $i = 2 ;
b39c5158Smillert    $H->put($i, "Newbie", R_IAFTER) ;
b39c5158Smillert
b39c5158Smillert    # and a new record before record 1.
b39c5158Smillert    $i = 1 ;
b39c5158Smillert    $H->put($i, "New One", R_IBEFORE) ;
b39c5158Smillert
b39c5158Smillert    # delete record 3
b39c5158Smillert    $H->del(3) ;
b39c5158Smillert
b39c5158Smillert    # now print the records in reverse order
b39c5158Smillert    print "\nREVERSE\n" ;
b39c5158Smillert    for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
b39c5158Smillert      { print "$i: $h[$i]\n" }
b39c5158Smillert
b39c5158Smillert    # same again, but use the API functions instead
b39c5158Smillert    print "\nREVERSE again\n" ;
b39c5158Smillert    my ($s, $k, $v)  = (0, 0, 0) ;
b39c5158Smillert    for ($s = $H->seq($k, $v, R_LAST) ;
b39c5158Smillert             $s == 0 ;
b39c5158Smillert             $s = $H->seq($k, $v, R_PREV))
b39c5158Smillert      { print "$k: $v\n" }
b39c5158Smillert
b39c5158Smillert    undef $H ;
b39c5158Smillert    untie @h ;
b39c5158Smillert
b39c5158Smillertand this is what it outputs:
b39c5158Smillert
b39c5158Smillert    ORIGINAL
b39c5158Smillert    0: zero
b39c5158Smillert    1: one
b39c5158Smillert    2: two
b39c5158Smillert    3: three
b39c5158Smillert    4: four
b39c5158Smillert
b39c5158Smillert    The last record was [four]
b39c5158Smillert    The first record was [zero]
b39c5158Smillert
b39c5158Smillert    REVERSE
b39c5158Smillert    5: last
b39c5158Smillert    4: three
b39c5158Smillert    3: Newbie
b39c5158Smillert    2: one
b39c5158Smillert    1: New One
b39c5158Smillert    0: first
b39c5158Smillert
b39c5158Smillert    REVERSE again
b39c5158Smillert    5: last
b39c5158Smillert    4: three
b39c5158Smillert    3: Newbie
b39c5158Smillert    2: one
b39c5158Smillert    1: New One
b39c5158Smillert    0: first
b39c5158Smillert
b39c5158SmillertNotes:
b39c5158Smillert
b39c5158Smillert=over 5
b39c5158Smillert
b39c5158Smillert=item 1.
b39c5158Smillert
b39c5158SmillertRather than iterating through the array, C<@h> like this:
b39c5158Smillert
b39c5158Smillert    foreach $i (@h)
b39c5158Smillert
b39c5158Smillertit is necessary to use either this:
b39c5158Smillert
b39c5158Smillert    foreach $i (0 .. $H->length - 1)
b39c5158Smillert
b39c5158Smillertor this:
b39c5158Smillert
b39c5158Smillert    for ($a = $H->get($k, $v, R_FIRST) ;
b39c5158Smillert         $a == 0 ;
b39c5158Smillert         $a = $H->get($k, $v, R_NEXT) )
b39c5158Smillert
b39c5158Smillert=item 2.
b39c5158Smillert
b39c5158SmillertNotice that both times the C<put> method was used the record index was
b39c5158Smillertspecified using a variable, C<$i>, rather than the literal value
b39c5158Smillertitself. This is because C<put> will return the record number of the
b39c5158Smillertinserted line via that parameter.
b39c5158Smillert
b39c5158Smillert=back
b39c5158Smillert
b39c5158Smillert=head1 THE API INTERFACE
b39c5158Smillert
b39c5158SmillertAs well as accessing Berkeley DB using a tied hash or array, it is also
b39c5158Smillertpossible to make direct use of most of the API functions defined in the
b39c5158SmillertBerkeley DB documentation.
b39c5158Smillert
b39c5158SmillertTo do this you need to store a copy of the object returned from the tie.
b39c5158Smillert
b39c5158Smillert        $db = tie %hash, "DB_File", "filename" ;
b39c5158Smillert
b39c5158SmillertOnce you have done that, you can access the Berkeley DB API functions
b39c5158Smillertas B<DB_File> methods directly like this:
b39c5158Smillert
b39c5158Smillert        $db->put($key, $value, R_NOOVERWRITE) ;
b39c5158Smillert
b39c5158SmillertB<Important:> If you have saved a copy of the object returned from
b39c5158SmillertC<tie>, the underlying database file will I<not> be closed until both
b39c5158Smillertthe tied variable is untied and all copies of the saved object are
b39c5158Smillertdestroyed.
b39c5158Smillert
b39c5158Smillert    use DB_File ;
b39c5158Smillert    $db = tie %hash, "DB_File", "filename"
b39c5158Smillert        or die "Cannot tie filename: $!" ;
b39c5158Smillert    ...
b39c5158Smillert    undef $db ;
b39c5158Smillert    untie %hash ;
b39c5158Smillert
b39c5158SmillertSee L<The untie() Gotcha> for more details.
b39c5158Smillert
b39c5158SmillertAll the functions defined in L<dbopen> are available except for
b39c5158Smillertclose() and dbopen() itself. The B<DB_File> method interface to the
b39c5158Smillertsupported functions have been implemented to mirror the way Berkeley DB
b39c5158Smillertworks whenever possible. In particular note that:
b39c5158Smillert
b39c5158Smillert=over 5
b39c5158Smillert
b39c5158Smillert=item *
b39c5158Smillert
b39c5158SmillertThe methods return a status value. All return 0 on success.
b39c5158SmillertAll return -1 to signify an error and set C<$!> to the exact
b39c5158Smillerterror code. The return code 1 generally (but not always) means that the
b39c5158Smillertkey specified did not exist in the database.
b39c5158Smillert
b39c5158SmillertOther return codes are defined. See below and in the Berkeley DB
b39c5158Smillertdocumentation for details. The Berkeley DB documentation should be used
b39c5158Smillertas the definitive source.
b39c5158Smillert
b39c5158Smillert=item *
b39c5158Smillert
b39c5158SmillertWhenever a Berkeley DB function returns data via one of its parameters,
b39c5158Smillertthe equivalent B<DB_File> method does exactly the same.
b39c5158Smillert
b39c5158Smillert=item *
b39c5158Smillert
b39c5158SmillertIf you are careful, it is possible to mix API calls with the tied
b39c5158Smillerthash/array interface in the same piece of code. Although only a few of
b39c5158Smillertthe methods used to implement the tied interface currently make use of
b39c5158Smillertthe cursor, you should always assume that the cursor has been changed
b39c5158Smillertany time the tied hash/array interface is used. As an example, this
b39c5158Smillertcode will probably not do what you expect:
b39c5158Smillert
b39c5158Smillert    $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
b39c5158Smillert        or die "Cannot tie $filename: $!" ;
b39c5158Smillert
b39c5158Smillert    # Get the first key/value pair and set  the cursor
b39c5158Smillert    $X->seq($key, $value, R_FIRST) ;
b39c5158Smillert
b39c5158Smillert    # this line will modify the cursor
b39c5158Smillert    $count = scalar keys %x ;
b39c5158Smillert
b39c5158Smillert    # Get the second key/value pair.
b39c5158Smillert    # oops, it didn't, it got the last key/value pair!
b39c5158Smillert    $X->seq($key, $value, R_NEXT) ;
b39c5158Smillert
b39c5158SmillertThe code above can be rearranged to get around the problem, like this:
b39c5158Smillert
b39c5158Smillert    $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
b39c5158Smillert        or die "Cannot tie $filename: $!" ;
b39c5158Smillert
b39c5158Smillert    # this line will modify the cursor
b39c5158Smillert    $count = scalar keys %x ;
b39c5158Smillert
b39c5158Smillert    # Get the first key/value pair and set  the cursor
b39c5158Smillert    $X->seq($key, $value, R_FIRST) ;
b39c5158Smillert
b39c5158Smillert    # Get the second key/value pair.
b39c5158Smillert    # worked this time.
b39c5158Smillert    $X->seq($key, $value, R_NEXT) ;
b39c5158Smillert
b39c5158Smillert=back
b39c5158Smillert
b39c5158SmillertAll the constants defined in L<dbopen> for use in the flags parameters
b39c5158Smillertin the methods defined below are also available. Refer to the Berkeley
b39c5158SmillertDB documentation for the precise meaning of the flags values.
b39c5158Smillert
b39c5158SmillertBelow is a list of the methods available.
b39c5158Smillert
b39c5158Smillert=over 5
b39c5158Smillert
b39c5158Smillert=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
b39c5158Smillert
b39c5158SmillertGiven a key (C<$key>) this method reads the value associated with it
b39c5158Smillertfrom the database. The value read from the database is returned in the
b39c5158SmillertC<$value> parameter.
b39c5158Smillert
b39c5158SmillertIf the key does not exist the method returns 1.
b39c5158Smillert
b39c5158SmillertNo flags are currently defined for this method.
b39c5158Smillert
b39c5158Smillert=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
b39c5158Smillert
b39c5158SmillertStores the key/value pair in the database.
b39c5158Smillert
b39c5158SmillertIf you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
b39c5158Smillertwill have the record number of the inserted key/value pair set.
b39c5158Smillert
b39c5158SmillertValid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
b39c5158SmillertR_SETCURSOR.
b39c5158Smillert
b39c5158Smillert=item B<$status = $X-E<gt>del($key [, $flags]) ;>
b39c5158Smillert
b39c5158SmillertRemoves all key/value pairs with key C<$key> from the database.
b39c5158Smillert
b39c5158SmillertA return code of 1 means that the requested key was not in the
b39c5158Smillertdatabase.
b39c5158Smillert
b39c5158SmillertR_CURSOR is the only valid flag at present.
b39c5158Smillert
b39c5158Smillert=item B<$status = $X-E<gt>fd ;>
b39c5158Smillert
b39c5158SmillertReturns the file descriptor for the underlying database.
b39c5158Smillert
b39c5158SmillertSee L<Locking: The Trouble with fd> for an explanation for why you should
b39c5158Smillertnot use C<fd> to lock your database.
b39c5158Smillert
b39c5158Smillert=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
b39c5158Smillert
b39c5158SmillertThis interface allows sequential retrieval from the database. See
b39c5158SmillertL<dbopen> for full details.
b39c5158Smillert
b39c5158SmillertBoth the C<$key> and C<$value> parameters will be set to the key/value
b39c5158Smillertpair read from the database.
b39c5158Smillert
b39c5158SmillertThe flags parameter is mandatory. The valid flag values are R_CURSOR,
b39c5158SmillertR_FIRST, R_LAST, R_NEXT and R_PREV.
b39c5158Smillert
b39c5158Smillert=item B<$status = $X-E<gt>sync([$flags]) ;>
b39c5158Smillert
b39c5158SmillertFlushes any cached buffers to disk.
b39c5158Smillert
b39c5158SmillertR_RECNOSYNC is the only valid flag at present.
b39c5158Smillert
b39c5158Smillert=back
b39c5158Smillert
b39c5158Smillert=head1 DBM FILTERS
b39c5158Smillert
9f11ffb7Safresh1A DBM Filter is a piece of code that is be used when you I<always> want to
9f11ffb7Safresh1make the same transformation to all keys and/or values in a DBM database.
9f11ffb7Safresh1An example is when you need to encode your data in UTF-8 before writing to
9f11ffb7Safresh1the database and then decode the UTF-8 when reading from the database file.
9f11ffb7Safresh1
9f11ffb7Safresh1There are two ways to use a DBM Filter.
9f11ffb7Safresh1
9f11ffb7Safresh1=over 5
9f11ffb7Safresh1
9f11ffb7Safresh1=item 1.
9f11ffb7Safresh1
9f11ffb7Safresh1Using the low-level API defined below.
9f11ffb7Safresh1
9f11ffb7Safresh1=item 2.
9f11ffb7Safresh1
9f11ffb7Safresh1Using the L<DBM_Filter> module.
9f11ffb7Safresh1This module hides the complexity of the API defined below and comes
9f11ffb7Safresh1with a number of "canned" filters that cover some of the common use-cases.
9f11ffb7Safresh1
9f11ffb7Safresh1=back
9f11ffb7Safresh1
9f11ffb7Safresh1Use of the L<DBM_Filter> module is recommended.
9f11ffb7Safresh1
9f11ffb7Safresh1=head2 DBM Filter Low-level API
b39c5158Smillert
b39c5158SmillertThere are four methods associated with DBM Filters. All work identically,
b39c5158Smillertand each is used to install (or uninstall) a single DBM Filter. Each
b39c5158Smillertexpects a single parameter, namely a reference to a sub. The only
b39c5158Smillertdifference between them is the place that the filter is installed.
b39c5158Smillert
b39c5158SmillertTo summarise:
b39c5158Smillert
b39c5158Smillert=over 5
b39c5158Smillert
b39c5158Smillert=item B<filter_store_key>
b39c5158Smillert
b39c5158SmillertIf a filter has been installed with this method, it will be invoked
b39c5158Smillertevery time you write a key to a DBM database.
b39c5158Smillert
b39c5158Smillert=item B<filter_store_value>
b39c5158Smillert
b39c5158SmillertIf a filter has been installed with this method, it will be invoked
b39c5158Smillertevery time you write a value to a DBM database.
b39c5158Smillert
b39c5158Smillert
b39c5158Smillert=item B<filter_fetch_key>
b39c5158Smillert
b39c5158SmillertIf a filter has been installed with this method, it will be invoked
b39c5158Smillertevery time you read a key from a DBM database.
b39c5158Smillert
b39c5158Smillert=item B<filter_fetch_value>
b39c5158Smillert
b39c5158SmillertIf a filter has been installed with this method, it will be invoked
b39c5158Smillertevery time you read a value from a DBM database.
b39c5158Smillert
b39c5158Smillert=back
b39c5158Smillert
b39c5158SmillertYou can use any combination of the methods, from none, to all four.
b39c5158Smillert
b39c5158SmillertAll filter methods return the existing filter, if present, or C<undef>
b39c5158Smillertin not.
b39c5158Smillert
b39c5158SmillertTo delete a filter pass C<undef> to it.
b39c5158Smillert
b39c5158Smillert=head2 The Filter
b39c5158Smillert
b39c5158SmillertWhen each filter is called by Perl, a local copy of C<$_> will contain
b39c5158Smillertthe key or value to be filtered. Filtering is achieved by modifying
b39c5158Smillertthe contents of C<$_>. The return code from the filter is ignored.
b39c5158Smillert
b39c5158Smillert=head2 An Example -- the NULL termination problem.
b39c5158Smillert
b39c5158SmillertConsider the following scenario. You have a DBM database
b39c5158Smillertthat you need to share with a third-party C application. The C application
b39c5158Smillertassumes that I<all> keys and values are NULL terminated. Unfortunately
b39c5158Smillertwhen Perl writes to DBM databases it doesn't use NULL termination, so
b39c5158Smillertyour Perl application will have to manage NULL termination itself. When
b39c5158Smillertyou write to the database you will have to use something like this:
b39c5158Smillert
b39c5158Smillert    $hash{"$key\0"} = "$value\0" ;
b39c5158Smillert
b39c5158SmillertSimilarly the NULL needs to be taken into account when you are considering
b39c5158Smillertthe length of existing keys/values.
b39c5158Smillert
b39c5158SmillertIt would be much better if you could ignore the NULL terminations issue
b39c5158Smillertin the main application code and have a mechanism that automatically
b39c5158Smillertadded the terminating NULL to all keys and values whenever you write to
b39c5158Smillertthe database and have them removed when you read from the database. As I'm
b39c5158Smillertsure you have already guessed, this is a problem that DBM Filters can
b39c5158Smillertfix very easily.
b39c5158Smillert
b39c5158Smillert    use warnings ;
b39c5158Smillert    use strict ;
b39c5158Smillert    use DB_File ;
b39c5158Smillert
b39c5158Smillert    my %hash ;
b39c5158Smillert    my $filename = "filt" ;
b39c5158Smillert    unlink $filename ;
b39c5158Smillert
b39c5158Smillert    my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
b39c5158Smillert      or die "Cannot open $filename: $!\n" ;
b39c5158Smillert
b39c5158Smillert    # Install DBM Filters
b39c5158Smillert    $db->filter_fetch_key  ( sub { s/\0$//    } ) ;
b39c5158Smillert    $db->filter_store_key  ( sub { $_ .= "\0" } ) ;
b39c5158Smillert    $db->filter_fetch_value( sub { s/\0$//    } ) ;
b39c5158Smillert    $db->filter_store_value( sub { $_ .= "\0" } ) ;
b39c5158Smillert
b39c5158Smillert    $hash{"abc"} = "def" ;
b39c5158Smillert    my $a = $hash{"ABC"} ;
b39c5158Smillert    # ...
b39c5158Smillert    undef $db ;
b39c5158Smillert    untie %hash ;
b39c5158Smillert
b39c5158SmillertHopefully the contents of each of the filters should be
b39c5158Smillertself-explanatory. Both "fetch" filters remove the terminating NULL,
b39c5158Smillertand both "store" filters add a terminating NULL.
b39c5158Smillert
b39c5158Smillert
b39c5158Smillert=head2 Another Example -- Key is a C int.
b39c5158Smillert
b39c5158SmillertHere is another real-life example. By default, whenever Perl writes to
b39c5158Smillerta DBM database it always writes the key and value as strings. So when
b39c5158Smillertyou use this:
b39c5158Smillert
b39c5158Smillert    $hash{12345} = "something" ;
b39c5158Smillert
b39c5158Smillertthe key 12345 will get stored in the DBM database as the 5 byte string
b39c5158Smillert"12345". If you actually want the key to be stored in the DBM database
b39c5158Smillertas a C int, you will have to use C<pack> when writing, and C<unpack>
b39c5158Smillertwhen reading.
b39c5158Smillert
b39c5158SmillertHere is a DBM Filter that does it:
b39c5158Smillert
b39c5158Smillert    use warnings ;
b39c5158Smillert    use strict ;
b39c5158Smillert    use DB_File ;
b39c5158Smillert    my %hash ;
b39c5158Smillert    my $filename = "filt" ;
b39c5158Smillert    unlink $filename ;
b39c5158Smillert
b39c5158Smillert
b39c5158Smillert    my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
b39c5158Smillert      or die "Cannot open $filename: $!\n" ;
b39c5158Smillert
b39c5158Smillert    $db->filter_fetch_key  ( sub { $_ = unpack("i", $_) } ) ;
b39c5158Smillert    $db->filter_store_key  ( sub { $_ = pack ("i", $_) } ) ;
b39c5158Smillert    $hash{123} = "def" ;
b39c5158Smillert    # ...
b39c5158Smillert    undef $db ;
b39c5158Smillert    untie %hash ;
b39c5158Smillert
b39c5158SmillertThis time only two filters have been used -- we only need to manipulate
b39c5158Smillertthe contents of the key, so it wasn't necessary to install any value
b39c5158Smillertfilters.
b39c5158Smillert
b39c5158Smillert=head1 HINTS AND TIPS
b39c5158Smillert
b39c5158Smillert
b39c5158Smillert=head2 Locking: The Trouble with fd
b39c5158Smillert
b39c5158SmillertUntil version 1.72 of this module, the recommended technique for locking
b39c5158SmillertB<DB_File> databases was to flock the filehandle returned from the "fd"
b39c5158Smillertfunction. Unfortunately this technique has been shown to be fundamentally
b39c5158Smillertflawed (Kudos to David Harris for tracking this down). Use it at your own
b39c5158Smillertperil!
b39c5158Smillert
b39c5158SmillertThe locking technique went like this.
b39c5158Smillert
b39c5158Smillert    $db = tie(%db, 'DB_File', 'foo.db', O_CREAT|O_RDWR, 0644)
b39c5158Smillert        || die "dbcreat foo.db $!";
b39c5158Smillert    $fd = $db->fd;
b39c5158Smillert    open(DB_FH, "+<&=$fd") || die "dup $!";
b39c5158Smillert    flock (DB_FH, LOCK_EX) || die "flock: $!";
b39c5158Smillert    ...
b39c5158Smillert    $db{"Tom"} = "Jerry" ;
b39c5158Smillert    ...
b39c5158Smillert    flock(DB_FH, LOCK_UN);
b39c5158Smillert    undef $db;
b39c5158Smillert    untie %db;
b39c5158Smillert    close(DB_FH);
b39c5158Smillert
b39c5158SmillertIn simple terms, this is what happens:
b39c5158Smillert
b39c5158Smillert=over 5
b39c5158Smillert
b39c5158Smillert=item 1.
b39c5158Smillert
b39c5158SmillertUse "tie" to open the database.
b39c5158Smillert
b39c5158Smillert=item 2.
b39c5158Smillert
b39c5158SmillertLock the database with fd & flock.
b39c5158Smillert
b39c5158Smillert=item 3.
b39c5158Smillert
b39c5158SmillertRead & Write to the database.
b39c5158Smillert
b39c5158Smillert=item 4.
b39c5158Smillert
b39c5158SmillertUnlock and close the database.
b39c5158Smillert
b39c5158Smillert=back
b39c5158Smillert
b39c5158SmillertHere is the crux of the problem. A side-effect of opening the B<DB_File>
b39c5158Smillertdatabase in step 2 is that an initial block from the database will get
b39c5158Smillertread from disk and cached in memory.
b39c5158Smillert
b39c5158SmillertTo see why this is a problem, consider what can happen when two processes,
b39c5158Smillertsay "A" and "B", both want to update the same B<DB_File> database
b39c5158Smillertusing the locking steps outlined above. Assume process "A" has already
b39c5158Smillertopened the database and has a write lock, but it hasn't actually updated
b39c5158Smillertthe database yet (it has finished step 2, but not started step 3 yet). Now
b39c5158Smillertprocess "B" tries to open the same database - step 1 will succeed,
b39c5158Smillertbut it will block on step 2 until process "A" releases the lock. The
b39c5158Smillertimportant thing to notice here is that at this point in time both
b39c5158Smillertprocesses will have cached identical initial blocks from the database.
b39c5158Smillert
b39c5158SmillertNow process "A" updates the database and happens to change some of the
b39c5158Smillertdata held in the initial buffer. Process "A" terminates, flushing
b39c5158Smillertall cached data to disk and releasing the database lock. At this point
b39c5158Smillertthe database on disk will correctly reflect the changes made by process
b39c5158Smillert"A".
b39c5158Smillert
b39c5158SmillertWith the lock released, process "B" can now continue. It also updates the
b39c5158Smillertdatabase and unfortunately it too modifies the data that was in its
b39c5158Smillertinitial buffer. Once that data gets flushed to disk it will overwrite
b39c5158Smillertsome/all of the changes process "A" made to the database.
b39c5158Smillert
b39c5158SmillertThe result of this scenario is at best a database that doesn't contain
b39c5158Smillertwhat you expect. At worst the database will corrupt.
b39c5158Smillert
b39c5158SmillertThe above won't happen every time competing process update the same
b39c5158SmillertB<DB_File> database, but it does illustrate why the technique should
b39c5158Smillertnot be used.
b39c5158Smillert
b39c5158Smillert=head2 Safe ways to lock a database
b39c5158Smillert
b39c5158SmillertStarting with version 2.x, Berkeley DB  has internal support for locking.
56d68f1eSafresh1The companion module to this one, L<BerkeleyDB|https://metacpan.org/pod/BerkeleyDB>, provides an interface
b39c5158Smillertto this locking functionality. If you are serious about locking
56d68f1eSafresh1Berkeley DB databases, I strongly recommend using L<BerkeleyDB|https://metacpan.org/pod/BerkeleyDB>.
b39c5158Smillert
56d68f1eSafresh1If using L<BerkeleyDB|https://metacpan.org/pod/BerkeleyDB> isn't an option, there are a number of modules
b39c5158Smillertavailable on CPAN that can be used to implement locking. Each one
b39c5158Smillertimplements locking differently and has different goals in mind. It is
b39c5158Smillerttherefore worth knowing the difference, so that you can pick the right
b39c5158Smillertone for your application. Here are the three locking wrappers:
b39c5158Smillert
b39c5158Smillert=over 5
b39c5158Smillert
b39c5158Smillert=item B<Tie::DB_Lock>
b39c5158Smillert
b39c5158SmillertA B<DB_File> wrapper which creates copies of the database file for
b39c5158Smillertread access, so that you have a kind of a multiversioning concurrent read
b39c5158Smillertsystem. However, updates are still serial. Use for databases where reads
b39c5158Smillertmay be lengthy and consistency problems may occur.
b39c5158Smillert
b39c5158Smillert=item B<Tie::DB_LockFile>
b39c5158Smillert
b39c5158SmillertA B<DB_File> wrapper that has the ability to lock and unlock the database
b39c5158Smillertwhile it is being used. Avoids the tie-before-flock problem by simply
b39c5158Smillertre-tie-ing the database when you get or drop a lock.  Because of the
b39c5158Smillertflexibility in dropping and re-acquiring the lock in the middle of a
b39c5158Smillertsession, this can be massaged into a system that will work with long
b39c5158Smillertupdates and/or reads if the application follows the hints in the POD
b39c5158Smillertdocumentation.
b39c5158Smillert
b39c5158Smillert=item B<DB_File::Lock>
b39c5158Smillert
b39c5158SmillertAn extremely lightweight B<DB_File> wrapper that simply flocks a lockfile
b39c5158Smillertbefore tie-ing the database and drops the lock after the untie. Allows
b39c5158Smillertone to use the same lockfile for multiple databases to avoid deadlock
b39c5158Smillertproblems, if desired. Use for databases where updates are reads are
b39c5158Smillertquick and simple flock locking semantics are enough.
b39c5158Smillert
b39c5158Smillert=back
b39c5158Smillert
b39c5158Smillert=head2 Sharing Databases With C Applications
b39c5158Smillert
b39c5158SmillertThere is no technical reason why a Berkeley DB database cannot be
b39c5158Smillertshared by both a Perl and a C application.
b39c5158Smillert
b39c5158SmillertThe vast majority of problems that are reported in this area boil down
b39c5158Smillertto the fact that C strings are NULL terminated, whilst Perl strings are
b39c5158Smillertnot. See L<DBM FILTERS> for a generic way to work around this problem.
b39c5158Smillert
b39c5158SmillertHere is a real example. Netscape 2.0 keeps a record of the locations you
b39c5158Smillertvisit along with the time you last visited them in a DB_HASH database.
b39c5158SmillertThis is usually stored in the file F<~/.netscape/history.db>. The key
b39c5158Smillertfield in the database is the location string and the value field is the
b39c5158Smillerttime the location was last visited stored as a 4 byte binary value.
b39c5158Smillert
b39c5158SmillertIf you haven't already guessed, the location string is stored with a
b39c5158Smillertterminating NULL. This means you need to be careful when accessing the
b39c5158Smillertdatabase.
b39c5158Smillert
b39c5158SmillertHere is a snippet of code that is loosely based on Tom Christiansen's
b39c5158SmillertI<ggh> script (available from your nearest CPAN archive in
b39c5158SmillertF<authors/id/TOMC/scripts/nshist.gz>).
b39c5158Smillert
b39c5158Smillert    use warnings ;
b39c5158Smillert    use strict ;
b39c5158Smillert    use DB_File ;
b39c5158Smillert    use Fcntl ;
b39c5158Smillert
b39c5158Smillert    my ($dotdir, $HISTORY, %hist_db, $href, $binary_time, $date) ;
b39c5158Smillert    $dotdir = $ENV{HOME} || $ENV{LOGNAME};
b39c5158Smillert
b39c5158Smillert    $HISTORY = "$dotdir/.netscape/history.db";
b39c5158Smillert
b39c5158Smillert    tie %hist_db, 'DB_File', $HISTORY
b39c5158Smillert        or die "Cannot open $HISTORY: $!\n" ;;
b39c5158Smillert
b39c5158Smillert    # Dump the complete database
b39c5158Smillert    while ( ($href, $binary_time) = each %hist_db ) {
b39c5158Smillert
b39c5158Smillert        # remove the terminating NULL
b39c5158Smillert        $href =~ s/\x00$// ;
b39c5158Smillert
b39c5158Smillert        # convert the binary time into a user friendly string
b39c5158Smillert        $date = localtime unpack("V", $binary_time);
b39c5158Smillert        print "$date $href\n" ;
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    # check for the existence of a specific key
b39c5158Smillert    # remember to add the NULL
b39c5158Smillert    if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
b39c5158Smillert        $date = localtime unpack("V", $binary_time) ;
b39c5158Smillert        print "Last visited mox.perl.com on $date\n" ;
b39c5158Smillert    }
b39c5158Smillert    else {
b39c5158Smillert        print "Never visited mox.perl.com\n"
b39c5158Smillert    }
b39c5158Smillert
b39c5158Smillert    untie %hist_db ;
b39c5158Smillert
b39c5158Smillert=head2 The untie() Gotcha
b39c5158Smillert
b39c5158SmillertIf you make use of the Berkeley DB API, it is I<very> strongly
b39c5158Smillertrecommended that you read L<perltie/The untie Gotcha>.
b39c5158Smillert
b39c5158SmillertEven if you don't currently make use of the API interface, it is still
b39c5158Smillertworth reading it.
b39c5158Smillert
b39c5158SmillertHere is an example which illustrates the problem from a B<DB_File>
b39c5158Smillertperspective:
b39c5158Smillert
b39c5158Smillert    use DB_File ;
b39c5158Smillert    use Fcntl ;
b39c5158Smillert
b39c5158Smillert    my %x ;
b39c5158Smillert    my $X ;
b39c5158Smillert
b39c5158Smillert    $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
b39c5158Smillert        or die "Cannot tie first time: $!" ;
b39c5158Smillert
b39c5158Smillert    $x{123} = 456 ;
b39c5158Smillert
b39c5158Smillert    untie %x ;
b39c5158Smillert
b39c5158Smillert    tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
b39c5158Smillert        or die "Cannot tie second time: $!" ;
b39c5158Smillert
b39c5158Smillert    untie %x ;
b39c5158Smillert
b39c5158SmillertWhen run, the script will produce this error message:
b39c5158Smillert
b39c5158Smillert    Cannot tie second time: Invalid argument at bad.file line 14.
b39c5158Smillert
b39c5158SmillertAlthough the error message above refers to the second tie() statement
b39c5158Smillertin the script, the source of the problem is really with the untie()
b39c5158Smillertstatement that precedes it.
b39c5158Smillert
b39c5158SmillertHaving read L<perltie> you will probably have already guessed that the
b39c5158Smillerterror is caused by the extra copy of the tied object stored in C<$X>.
b39c5158SmillertIf you haven't, then the problem boils down to the fact that the
b39c5158SmillertB<DB_File> destructor, DESTROY, will not be called until I<all>
b39c5158Smillertreferences to the tied object are destroyed. Both the tied variable,
b39c5158SmillertC<%x>, and C<$X> above hold a reference to the object. The call to
b39c5158Smillertuntie() will destroy the first, but C<$X> still holds a valid
b39c5158Smillertreference, so the destructor will not get called and the database file
b39c5158SmillertF<tst.fil> will remain open. The fact that Berkeley DB then reports the
b39c5158Smillertattempt to open a database that is already open via the catch-all
b39c5158Smillert"Invalid argument" doesn't help.
b39c5158Smillert
b39c5158SmillertIf you run the script with the C<-w> flag the error message becomes:
b39c5158Smillert
b39c5158Smillert    untie attempted while 1 inner references still exist at bad.file line 12.
b39c5158Smillert    Cannot tie second time: Invalid argument at bad.file line 14.
b39c5158Smillert
b39c5158Smillertwhich pinpoints the real problem. Finally the script can now be
b39c5158Smillertmodified to fix the original problem by destroying the API object
b39c5158Smillertbefore the untie:
b39c5158Smillert
b39c5158Smillert    ...
b39c5158Smillert    $x{123} = 456 ;
b39c5158Smillert
b39c5158Smillert    undef $X ;
b39c5158Smillert    untie %x ;
b39c5158Smillert
b39c5158Smillert    $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
b39c5158Smillert    ...
b39c5158Smillert
b39c5158Smillert
b39c5158Smillert=head1 COMMON QUESTIONS
b39c5158Smillert
b39c5158Smillert=head2 Why is there Perl source in my database?
b39c5158Smillert
b39c5158SmillertIf you look at the contents of a database file created by DB_File,
b39c5158Smillertthere can sometimes be part of a Perl script included in it.
b39c5158Smillert
b39c5158SmillertThis happens because Berkeley DB uses dynamic memory to allocate
b39c5158Smillertbuffers which will subsequently be written to the database file. Being
b39c5158Smillertdynamic, the memory could have been used for anything before DB
b39c5158Smillertmalloced it. As Berkeley DB doesn't clear the memory once it has been
b39c5158Smillertallocated, the unused portions will contain random junk. In the case
b39c5158Smillertwhere a Perl script gets written to the database, the random junk will
b39c5158Smillertcorrespond to an area of dynamic memory that happened to be used during
b39c5158Smillertthe compilation of the script.
b39c5158Smillert
b39c5158SmillertUnless you don't like the possibility of there being part of your Perl
b39c5158Smillertscripts embedded in a database file, this is nothing to worry about.
b39c5158Smillert
b39c5158Smillert=head2 How do I store complex data structures with DB_File?
b39c5158Smillert
b39c5158SmillertAlthough B<DB_File> cannot do this directly, there is a module which
b39c5158Smillertcan layer transparently over B<DB_File> to accomplish this feat.
b39c5158Smillert
b39c5158SmillertCheck out the MLDBM module, available on CPAN in the directory
b39c5158SmillertF<modules/by-module/MLDBM>.
b39c5158Smillert
9f11ffb7Safresh1=head2 What does "wide character in subroutine entry" mean?
9f11ffb7Safresh1
9f11ffb7Safresh1You will usually get this message if you are working with UTF-8 data and
9f11ffb7Safresh1want to read/write it from/to a Berkeley DB database file.
9f11ffb7Safresh1
9f11ffb7Safresh1The easist way to deal with this issue is to use the pre-defined "utf8"
9f11ffb7Safresh1B<DBM_Filter> (see L<DBM_Filter>) that was designed to deal with this
9f11ffb7Safresh1situation.
9f11ffb7Safresh1
9f11ffb7Safresh1The example below shows what you need if I<both> the key and value are
9f11ffb7Safresh1expected to be in UTF-8.
9f11ffb7Safresh1
9f11ffb7Safresh1    use DB_File;
9f11ffb7Safresh1    use DBM_Filter;
9f11ffb7Safresh1
9f11ffb7Safresh1    my $db = tie %h, 'DB_File', '/tmp/try.db', O_CREAT|O_RDWR, 0666, $DB_BTREE;
9f11ffb7Safresh1    $db->Filter_Key_Push('utf8');
9f11ffb7Safresh1    $db->Filter_Value_Push('utf8');
9f11ffb7Safresh1
9f11ffb7Safresh1    my $key = "\N{LATIN SMALL LETTER A WITH ACUTE}";
9f11ffb7Safresh1    my $value = "\N{LATIN SMALL LETTER E WITH ACUTE}";
9f11ffb7Safresh1    $h{ $key } = $value;
9f11ffb7Safresh1
b39c5158Smillert=head2 What does "Invalid Argument" mean?
b39c5158Smillert
b39c5158SmillertYou will get this error message when one of the parameters in the
b39c5158SmillertC<tie> call is wrong. Unfortunately there are quite a few parameters to
b39c5158Smillertget wrong, so it can be difficult to figure out which one it is.
b39c5158Smillert
b39c5158SmillertHere are a couple of possibilities:
b39c5158Smillert
b39c5158Smillert=over 5
b39c5158Smillert
b39c5158Smillert=item 1.
b39c5158Smillert
b39c5158SmillertAttempting to reopen a database without closing it.
b39c5158Smillert
b39c5158Smillert=item 2.
b39c5158Smillert
b39c5158SmillertUsing the O_WRONLY flag.
b39c5158Smillert
b39c5158Smillert=back
b39c5158Smillert
b39c5158Smillert=head2 What does "Bareword 'DB_File' not allowed" mean?
b39c5158Smillert
b39c5158SmillertYou will encounter this particular error message when you have the
b39c5158SmillertC<strict 'subs'> pragma (or the full strict pragma) in your script.
b39c5158SmillertConsider this script:
b39c5158Smillert
b39c5158Smillert    use warnings ;
b39c5158Smillert    use strict ;
b39c5158Smillert    use DB_File ;
b39c5158Smillert    my %x ;
b39c5158Smillert    tie %x, DB_File, "filename" ;
b39c5158Smillert
b39c5158SmillertRunning it produces the error in question:
b39c5158Smillert
b39c5158Smillert    Bareword "DB_File" not allowed while "strict subs" in use
b39c5158Smillert
b39c5158SmillertTo get around the error, place the word C<DB_File> in either single or
b39c5158Smillertdouble quotes, like this:
b39c5158Smillert
b39c5158Smillert    tie %x, "DB_File", "filename" ;
b39c5158Smillert
b39c5158SmillertAlthough it might seem like a real pain, it is really worth the effort
b39c5158Smillertof having a C<use strict> in all your scripts.
b39c5158Smillert
b39c5158Smillert=head1 REFERENCES
b39c5158Smillert
b39c5158SmillertArticles that are either about B<DB_File> or make use of it.
b39c5158Smillert
b39c5158Smillert=over 5
b39c5158Smillert
b39c5158Smillert=item 1.
b39c5158Smillert
b39c5158SmillertI<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
b39c5158SmillertDr. Dobb's Journal, Issue 295, January 1999, pp 34-41
b39c5158Smillert
b39c5158Smillert=back
b39c5158Smillert
b39c5158Smillert=head1 HISTORY
b39c5158Smillert
b39c5158SmillertMoved to the Changes file.
b39c5158Smillert
b39c5158Smillert=head1 BUGS
b39c5158Smillert
b39c5158SmillertSome older versions of Berkeley DB had problems with fixed length
b39c5158Smillertrecords using the RECNO file format. This problem has been fixed since
b39c5158Smillertversion 1.85 of Berkeley DB.
b39c5158Smillert
b39c5158SmillertI am sure there are bugs in the code. If you do find any, or can
b39c5158Smillertsuggest any enhancements, I would welcome your comments.
b39c5158Smillert
56d68f1eSafresh1=head1 SUPPORT
56d68f1eSafresh1
56d68f1eSafresh1General feedback/questions/bug reports should be sent to
56d68f1eSafresh1L<https://github.com/pmqs/DB_File/issues> (preferred) or
56d68f1eSafresh1L<https://rt.cpan.org/Public/Dist/Display.html?Name=DB_File>.
56d68f1eSafresh1
b39c5158Smillert=head1 AVAILABILITY
b39c5158Smillert
b39c5158SmillertB<DB_File> comes with the standard Perl source distribution. Look in
b39c5158Smillertthe directory F<ext/DB_File>. Given the amount of time between releases
b39c5158Smillertof Perl the version that ships with Perl is quite likely to be out of
b39c5158Smillertdate, so the most recent version can always be found on CPAN (see
b39c5158SmillertL<perlmodlib/CPAN> for details), in the directory
b39c5158SmillertF<modules/by-module/DB_File>.
b39c5158Smillert
56d68f1eSafresh1B<DB_File> is designed to work with any version of Berkeley DB, but is limited to the functionality provided by
56d68f1eSafresh1version 1. If you want to make use of the new features available in Berkeley DB
56d68f1eSafresh12.x, or greater, use the Perl module L<BerkeleyDB|https://metacpan.org/pod/BerkeleyDB> instead.
b39c5158Smillert
56d68f1eSafresh1The official web site for Berkeley DB is L<http://www.oracle.com/technology/products/berkeley-db/db/index.html>.
b39c5158SmillertAll versions of Berkeley DB are available there.
b39c5158Smillert
b39c5158SmillertAlternatively, Berkeley DB version 1 is available at your nearest CPAN
b39c5158Smillertarchive in F<src/misc/db.1.85.tar.gz>.
b39c5158Smillert
b39c5158Smillert=head1 COPYRIGHT
b39c5158Smillert
*3d61058aSafresh1Copyright (c) 1995-2023 Paul Marquess. All rights reserved. This program
b39c5158Smillertis free software; you can redistribute it and/or modify it under the
b39c5158Smillertsame terms as Perl itself.
b39c5158Smillert
b39c5158SmillertAlthough B<DB_File> is covered by the Perl license, the library it
b39c5158Smillertmakes use of, namely Berkeley DB, is not. Berkeley DB has its own
e0680481Safresh1copyright and its own license.
e0680481Safresh1See L<AGPL|https://www.oracle.com/downloads/licenses/berkeleydb-oslicense.html> for more details.
e0680481Safresh1Please take the time to read the Berkeley DB license and decide how it impacts your use of this Perl module.
b39c5158Smillert
b39c5158Smillert=head1 SEE ALSO
b39c5158Smillert
b39c5158SmillertL<perl>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
9f11ffb7Safresh1L<perldbmfilter>, L<DBM_Filter>
b39c5158Smillert
b39c5158Smillert=head1 AUTHOR
b39c5158Smillert
b39c5158SmillertThe DB_File interface was written by Paul Marquess
b39c5158SmillertE<lt>pmqs@cpan.orgE<gt>.
b39c5158Smillert
b39c5158Smillert=cut