xref: /openbsd-src/gnu/usr.bin/perl/cpan/Digest/lib/Digest.pm (revision 256a93a44f36679bee503f12e49566c2183f6181)
1*256a93a4Safresh1package Digest;
2*256a93a4Safresh1
3*256a93a4Safresh1use strict;
4*256a93a4Safresh1use warnings;
5*256a93a4Safresh1
6*256a93a4Safresh1our $VERSION = "1.20";
7*256a93a4Safresh1
8*256a93a4Safresh1our %MMAP = (
9*256a93a4Safresh1    "SHA-1"    => [ [ "Digest::SHA",  1 ], "Digest::SHA1", [ "Digest::SHA2", 1 ] ],
10*256a93a4Safresh1    "SHA-224"  => [ [ "Digest::SHA",  224 ] ],
11*256a93a4Safresh1    "SHA-256"  => [ [ "Digest::SHA",  256 ], [ "Digest::SHA2", 256 ] ],
12*256a93a4Safresh1    "SHA-384"  => [ [ "Digest::SHA",  384 ], [ "Digest::SHA2", 384 ] ],
13*256a93a4Safresh1    "SHA-512"  => [ [ "Digest::SHA",  512 ], [ "Digest::SHA2", 512 ] ],
14*256a93a4Safresh1    "SHA3-224" => [ [ "Digest::SHA3", 224 ] ],
15*256a93a4Safresh1    "SHA3-256" => [ [ "Digest::SHA3", 256 ] ],
16*256a93a4Safresh1    "SHA3-384" => [ [ "Digest::SHA3", 384 ] ],
17*256a93a4Safresh1    "SHA3-512" => [ [ "Digest::SHA3", 512 ] ],
18*256a93a4Safresh1    "HMAC-MD5"   => "Digest::HMAC_MD5",
19*256a93a4Safresh1    "HMAC-SHA-1" => "Digest::HMAC_SHA1",
20*256a93a4Safresh1    "CRC-16"     => [ [ "Digest::CRC", type => "crc16" ] ],
21*256a93a4Safresh1    "CRC-32"     => [ [ "Digest::CRC", type => "crc32" ] ],
22*256a93a4Safresh1    "CRC-CCITT"  => [ [ "Digest::CRC", type => "crcccitt" ] ],
23*256a93a4Safresh1    "RIPEMD-160" => "Crypt::RIPEMD160",
24*256a93a4Safresh1);
25*256a93a4Safresh1
26*256a93a4Safresh1sub new {
27*256a93a4Safresh1    shift;    # class ignored
28*256a93a4Safresh1    my $algorithm = shift;
29*256a93a4Safresh1    my $impl      = $MMAP{$algorithm} || do {
30*256a93a4Safresh1        $algorithm =~ s/\W+//g;
31*256a93a4Safresh1        "Digest::$algorithm";
32*256a93a4Safresh1    };
33*256a93a4Safresh1    $impl = [$impl] unless ref($impl);
34*256a93a4Safresh1    local $@;    # don't clobber it for our caller
35*256a93a4Safresh1    my $err;
36*256a93a4Safresh1    for (@$impl) {
37*256a93a4Safresh1        my $class = $_;
38*256a93a4Safresh1        my @args;
39*256a93a4Safresh1        ( $class, @args ) = @$class if ref($class);
40*256a93a4Safresh1        no strict 'refs';
41*256a93a4Safresh1        unless ( exists ${"$class\::"}{"VERSION"} ) {
42*256a93a4Safresh1            my $pm_file = $class . ".pm";
43*256a93a4Safresh1            $pm_file =~ s{::}{/}g;
44*256a93a4Safresh1            eval {
45*256a93a4Safresh1                local @INC = @INC;
46*256a93a4Safresh1                pop @INC if $INC[-1] eq '.';
47*256a93a4Safresh1                require $pm_file
48*256a93a4Safresh1	    };
49*256a93a4Safresh1            if ($@) {
50*256a93a4Safresh1                $err ||= $@;
51*256a93a4Safresh1                next;
52*256a93a4Safresh1            }
53*256a93a4Safresh1        }
54*256a93a4Safresh1        return $class->new( @args, @_ );
55*256a93a4Safresh1    }
56*256a93a4Safresh1    die $err;
57*256a93a4Safresh1}
58*256a93a4Safresh1
59*256a93a4Safresh1our $AUTOLOAD;
60*256a93a4Safresh1
61*256a93a4Safresh1sub AUTOLOAD {
62*256a93a4Safresh1    my $class     = shift;
63*256a93a4Safresh1    my $algorithm = substr( $AUTOLOAD, rindex( $AUTOLOAD, '::' ) + 2 );
64*256a93a4Safresh1    $class->new( $algorithm, @_ );
65*256a93a4Safresh1}
66*256a93a4Safresh1
67*256a93a4Safresh11;
68*256a93a4Safresh1
69*256a93a4Safresh1__END__
70*256a93a4Safresh1
71*256a93a4Safresh1=head1 NAME
72*256a93a4Safresh1
73*256a93a4Safresh1Digest - Modules that calculate message digests
74*256a93a4Safresh1
75*256a93a4Safresh1=head1 SYNOPSIS
76*256a93a4Safresh1
77*256a93a4Safresh1  $md5  = Digest->new("MD5");
78*256a93a4Safresh1  $sha1 = Digest->new("SHA-1");
79*256a93a4Safresh1  $sha256 = Digest->new("SHA-256");
80*256a93a4Safresh1  $sha384 = Digest->new("SHA-384");
81*256a93a4Safresh1  $sha512 = Digest->new("SHA-512");
82*256a93a4Safresh1
83*256a93a4Safresh1  $hmac = Digest->HMAC_MD5($key);
84*256a93a4Safresh1
85*256a93a4Safresh1=head1 DESCRIPTION
86*256a93a4Safresh1
87*256a93a4Safresh1The C<Digest::> modules calculate digests, also called "fingerprints"
88*256a93a4Safresh1or "hashes", of some data, called a message.  The digest is (usually)
89*256a93a4Safresh1some small/fixed size string.  The actual size of the digest depend of
90*256a93a4Safresh1the algorithm used.  The message is simply a sequence of arbitrary
91*256a93a4Safresh1bytes or bits.
92*256a93a4Safresh1
93*256a93a4Safresh1An important property of the digest algorithms is that the digest is
94*256a93a4Safresh1I<likely> to change if the message change in some way.  Another
95*256a93a4Safresh1property is that digest functions are one-way functions, that is it
96*256a93a4Safresh1should be I<hard> to find a message that correspond to some given
97*256a93a4Safresh1digest.  Algorithms differ in how "likely" and how "hard", as well as
98*256a93a4Safresh1how efficient they are to compute.
99*256a93a4Safresh1
100*256a93a4Safresh1Note that the properties of the algorithms change over time, as the
101*256a93a4Safresh1algorithms are analyzed and machines grow faster.  If your application
102*256a93a4Safresh1for instance depends on it being "impossible" to generate the same
103*256a93a4Safresh1digest for a different message it is wise to make it easy to plug in
104*256a93a4Safresh1stronger algorithms as the one used grow weaker.  Using the interface
105*256a93a4Safresh1documented here should make it easy to change algorithms later.
106*256a93a4Safresh1
107*256a93a4Safresh1All C<Digest::> modules provide the same programming interface.  A
108*256a93a4Safresh1functional interface for simple use, as well as an object oriented
109*256a93a4Safresh1interface that can handle messages of arbitrary length and which can
110*256a93a4Safresh1read files directly.
111*256a93a4Safresh1
112*256a93a4Safresh1The digest can be delivered in three formats:
113*256a93a4Safresh1
114*256a93a4Safresh1=over 8
115*256a93a4Safresh1
116*256a93a4Safresh1=item I<binary>
117*256a93a4Safresh1
118*256a93a4Safresh1This is the most compact form, but it is not well suited for printing
119*256a93a4Safresh1or embedding in places that can't handle arbitrary data.
120*256a93a4Safresh1
121*256a93a4Safresh1=item I<hex>
122*256a93a4Safresh1
123*256a93a4Safresh1A twice as long string of lowercase hexadecimal digits.
124*256a93a4Safresh1
125*256a93a4Safresh1=item I<base64>
126*256a93a4Safresh1
127*256a93a4Safresh1A string of portable printable characters.  This is the base64 encoded
128*256a93a4Safresh1representation of the digest with any trailing padding removed.  The
129*256a93a4Safresh1string will be about 30% longer than the binary version.
130*256a93a4Safresh1L<MIME::Base64> tells you more about this encoding.
131*256a93a4Safresh1
132*256a93a4Safresh1=back
133*256a93a4Safresh1
134*256a93a4Safresh1
135*256a93a4Safresh1The functional interface is simply importable functions with the same
136*256a93a4Safresh1name as the algorithm.  The functions take the message as argument and
137*256a93a4Safresh1return the digest.  Example:
138*256a93a4Safresh1
139*256a93a4Safresh1  use Digest::MD5 qw(md5);
140*256a93a4Safresh1  $digest = md5($message);
141*256a93a4Safresh1
142*256a93a4Safresh1There are also versions of the functions with "_hex" or "_base64"
143*256a93a4Safresh1appended to the name, which returns the digest in the indicated form.
144*256a93a4Safresh1
145*256a93a4Safresh1=head1 OO INTERFACE
146*256a93a4Safresh1
147*256a93a4Safresh1The following methods are available for all C<Digest::> modules:
148*256a93a4Safresh1
149*256a93a4Safresh1=over 4
150*256a93a4Safresh1
151*256a93a4Safresh1=item $ctx = Digest->XXX($arg,...)
152*256a93a4Safresh1
153*256a93a4Safresh1=item $ctx = Digest->new(XXX => $arg,...)
154*256a93a4Safresh1
155*256a93a4Safresh1=item $ctx = Digest::XXX->new($arg,...)
156*256a93a4Safresh1
157*256a93a4Safresh1The constructor returns some object that encapsulate the state of the
158*256a93a4Safresh1message-digest algorithm.  You can add data to the object and finally
159*256a93a4Safresh1ask for the digest.  The "XXX" should of course be replaced by the proper
160*256a93a4Safresh1name of the digest algorithm you want to use.
161*256a93a4Safresh1
162*256a93a4Safresh1The two first forms are simply syntactic sugar which automatically
163*256a93a4Safresh1load the right module on first use.  The second form allow you to use
164*256a93a4Safresh1algorithm names which contains letters which are not legal perl
165*256a93a4Safresh1identifiers, e.g. "SHA-1".  If no implementation for the given algorithm
166*256a93a4Safresh1can be found, then an exception is raised.
167*256a93a4Safresh1
168*256a93a4Safresh1To know what arguments (if any) the constructor takes (the C<$args,...> above)
169*256a93a4Safresh1consult the docs for the specific digest implementation.
170*256a93a4Safresh1
171*256a93a4Safresh1If new() is called as an instance method (i.e. $ctx->new) it will just
172*256a93a4Safresh1reset the state the object to the state of a newly created object.  No
173*256a93a4Safresh1new object is created in this case, and the return value is the
174*256a93a4Safresh1reference to the object (i.e. $ctx).
175*256a93a4Safresh1
176*256a93a4Safresh1=item $other_ctx = $ctx->clone
177*256a93a4Safresh1
178*256a93a4Safresh1The clone method creates a copy of the digest state object and returns
179*256a93a4Safresh1a reference to the copy.
180*256a93a4Safresh1
181*256a93a4Safresh1=item $ctx->reset
182*256a93a4Safresh1
183*256a93a4Safresh1This is just an alias for $ctx->new.
184*256a93a4Safresh1
185*256a93a4Safresh1=item $ctx->add( $data )
186*256a93a4Safresh1
187*256a93a4Safresh1=item $ctx->add( $chunk1, $chunk2, ... )
188*256a93a4Safresh1
189*256a93a4Safresh1The string value of the $data provided as argument is appended to the
190*256a93a4Safresh1message we calculate the digest for.  The return value is the $ctx
191*256a93a4Safresh1object itself.
192*256a93a4Safresh1
193*256a93a4Safresh1If more arguments are provided then they are all appended to the
194*256a93a4Safresh1message, thus all these lines will have the same effect on the state
195*256a93a4Safresh1of the $ctx object:
196*256a93a4Safresh1
197*256a93a4Safresh1  $ctx->add("a"); $ctx->add("b"); $ctx->add("c");
198*256a93a4Safresh1  $ctx->add("a")->add("b")->add("c");
199*256a93a4Safresh1  $ctx->add("a", "b", "c");
200*256a93a4Safresh1  $ctx->add("abc");
201*256a93a4Safresh1
202*256a93a4Safresh1Most algorithms are only defined for strings of bytes and this method
203*256a93a4Safresh1might therefore croak if the provided arguments contain chars with
204*256a93a4Safresh1ordinal number above 255.
205*256a93a4Safresh1
206*256a93a4Safresh1=item $ctx->addfile( $io_handle )
207*256a93a4Safresh1
208*256a93a4Safresh1The $io_handle is read until EOF and the content is appended to the
209*256a93a4Safresh1message we calculate the digest for.  The return value is the $ctx
210*256a93a4Safresh1object itself.
211*256a93a4Safresh1
212*256a93a4Safresh1The addfile() method will croak() if it fails reading data for some
213*256a93a4Safresh1reason.  If it croaks it is unpredictable what the state of the $ctx
214*256a93a4Safresh1object will be in. The addfile() method might have been able to read
215*256a93a4Safresh1the file partially before it failed.  It is probably wise to discard
216*256a93a4Safresh1or reset the $ctx object if this occurs.
217*256a93a4Safresh1
218*256a93a4Safresh1In most cases you want to make sure that the $io_handle is in
219*256a93a4Safresh1"binmode" before you pass it as argument to the addfile() method.
220*256a93a4Safresh1
221*256a93a4Safresh1=item $ctx->add_bits( $data, $nbits )
222*256a93a4Safresh1
223*256a93a4Safresh1=item $ctx->add_bits( $bitstring )
224*256a93a4Safresh1
225*256a93a4Safresh1The add_bits() method is an alternative to add() that allow partial
226*256a93a4Safresh1bytes to be appended to the message.  Most users can just ignore
227*256a93a4Safresh1this method since typical applications involve only whole-byte data.
228*256a93a4Safresh1
229*256a93a4Safresh1The two argument form of add_bits() will add the first $nbits bits
230*256a93a4Safresh1from $data.  For the last potentially partial byte only the high order
231*256a93a4Safresh1C<< $nbits % 8 >> bits are used.  If $nbits is greater than C<<
232*256a93a4Safresh1length($data) * 8 >>, then this method would do the same as C<<
233*256a93a4Safresh1$ctx->add($data) >>.
234*256a93a4Safresh1
235*256a93a4Safresh1The one argument form of add_bits() takes a $bitstring of "1" and "0"
236*256a93a4Safresh1chars as argument.  It's a shorthand for C<< $ctx->add_bits(pack("B*",
237*256a93a4Safresh1$bitstring), length($bitstring)) >>.
238*256a93a4Safresh1
239*256a93a4Safresh1The return value is the $ctx object itself.
240*256a93a4Safresh1
241*256a93a4Safresh1This example shows two calls that should have the same effect:
242*256a93a4Safresh1
243*256a93a4Safresh1   $ctx->add_bits("111100001010");
244*256a93a4Safresh1   $ctx->add_bits("\xF0\xA0", 12);
245*256a93a4Safresh1
246*256a93a4Safresh1Most digest algorithms are byte based and for these it is not possible
247*256a93a4Safresh1to add bits that are not a multiple of 8, and the add_bits() method
248*256a93a4Safresh1will croak if you try.
249*256a93a4Safresh1
250*256a93a4Safresh1=item $ctx->digest
251*256a93a4Safresh1
252*256a93a4Safresh1Return the binary digest for the message.
253*256a93a4Safresh1
254*256a93a4Safresh1Note that the C<digest> operation is effectively a destructive,
255*256a93a4Safresh1read-once operation. Once it has been performed, the $ctx object is
256*256a93a4Safresh1automatically C<reset> and can be used to calculate another digest
257*256a93a4Safresh1value.  Call $ctx->clone->digest if you want to calculate the digest
258*256a93a4Safresh1without resetting the digest state.
259*256a93a4Safresh1
260*256a93a4Safresh1=item $ctx->hexdigest
261*256a93a4Safresh1
262*256a93a4Safresh1Same as $ctx->digest, but will return the digest in hexadecimal form.
263*256a93a4Safresh1
264*256a93a4Safresh1=item $ctx->b64digest
265*256a93a4Safresh1
266*256a93a4Safresh1Same as $ctx->digest, but will return the digest as a base64 encoded
267*256a93a4Safresh1string without padding.
268*256a93a4Safresh1
269*256a93a4Safresh1=item $ctx->base64_padded_digest
270*256a93a4Safresh1
271*256a93a4Safresh1Same as $ctx->digest, but will return the digest as a base64 encoded
272*256a93a4Safresh1string.
273*256a93a4Safresh1
274*256a93a4Safresh1=back
275*256a93a4Safresh1
276*256a93a4Safresh1=head1 Digest speed
277*256a93a4Safresh1
278*256a93a4Safresh1This table should give some indication on the relative speed of
279*256a93a4Safresh1different algorithms.  It is sorted by throughput based on a benchmark
280*256a93a4Safresh1done with of some implementations of this API:
281*256a93a4Safresh1
282*256a93a4Safresh1 Algorithm      Size    Implementation                  MB/s
283*256a93a4Safresh1
284*256a93a4Safresh1 MD4            128     Digest::MD4 v1.3               165.0
285*256a93a4Safresh1 MD5            128     Digest::MD5 v2.33               98.8
286*256a93a4Safresh1 SHA-256        256     Digest::SHA2 v1.1.0             66.7
287*256a93a4Safresh1 SHA-1          160     Digest::SHA v4.3.1              58.9
288*256a93a4Safresh1 SHA-1          160     Digest::SHA1 v2.10              48.8
289*256a93a4Safresh1 SHA-256        256     Digest::SHA v4.3.1              41.3
290*256a93a4Safresh1 Haval-256      256     Digest::Haval256 v1.0.4         39.8
291*256a93a4Safresh1 SHA-384        384     Digest::SHA2 v1.1.0             19.6
292*256a93a4Safresh1 SHA-512        512     Digest::SHA2 v1.1.0             19.3
293*256a93a4Safresh1 SHA-384        384     Digest::SHA v4.3.1              19.2
294*256a93a4Safresh1 SHA-512        512     Digest::SHA v4.3.1              19.2
295*256a93a4Safresh1 Whirlpool      512     Digest::Whirlpool v1.0.2        13.0
296*256a93a4Safresh1 MD2            128     Digest::MD2 v2.03                9.5
297*256a93a4Safresh1
298*256a93a4Safresh1 Adler-32        32     Digest::Adler32 v0.03            1.3
299*256a93a4Safresh1 CRC-16          16     Digest::CRC v0.05                1.1
300*256a93a4Safresh1 CRC-32          32     Digest::CRC v0.05                1.1
301*256a93a4Safresh1 MD5            128     Digest::Perl::MD5 v1.5           1.0
302*256a93a4Safresh1 CRC-CCITT       16     Digest::CRC v0.05                0.8
303*256a93a4Safresh1
304*256a93a4Safresh1These numbers was achieved Apr 2004 with ActivePerl-5.8.3 running
305*256a93a4Safresh1under Linux on a P4 2.8 GHz CPU.  The last 5 entries differ by being
306*256a93a4Safresh1pure perl implementations of the algorithms, which explains why they
307*256a93a4Safresh1are so slow.
308*256a93a4Safresh1
309*256a93a4Safresh1=head1 SEE ALSO
310*256a93a4Safresh1
311*256a93a4Safresh1L<Digest::Adler32>, L<Digest::CRC>, L<Digest::Haval256>,
312*256a93a4Safresh1L<Digest::HMAC>, L<Digest::MD2>, L<Digest::MD4>, L<Digest::MD5>,
313*256a93a4Safresh1L<Digest::SHA>, L<Digest::SHA1>, L<Digest::SHA2>, L<Digest::Whirlpool>
314*256a93a4Safresh1
315*256a93a4Safresh1New digest implementations should consider subclassing from L<Digest::base>.
316*256a93a4Safresh1
317*256a93a4Safresh1L<MIME::Base64>
318*256a93a4Safresh1
319*256a93a4Safresh1http://en.wikipedia.org/wiki/Cryptographic_hash_function
320*256a93a4Safresh1
321*256a93a4Safresh1=head1 AUTHOR
322*256a93a4Safresh1
323*256a93a4Safresh1Gisle Aas <gisle@aas.no>
324*256a93a4Safresh1
325*256a93a4Safresh1The C<Digest::> interface is based on the interface originally
326*256a93a4Safresh1developed by Neil Winton for his C<MD5> module.
327*256a93a4Safresh1
328*256a93a4Safresh1This library is free software; you can redistribute it and/or
329*256a93a4Safresh1modify it under the same terms as Perl itself.
330*256a93a4Safresh1
331*256a93a4Safresh1    Copyright 1998-2006 Gisle Aas.
332*256a93a4Safresh1    Copyright 1995,1996 Neil Winton.
333*256a93a4Safresh1
334*256a93a4Safresh1=cut
335