xref: /openbsd-src/gnu/usr.bin/perl/cpan/Digest-MD5/MD5.pm (revision e068048151d29f2562a32185e21a8ba885482260)
1b39c5158Smillertpackage Digest::MD5;
2b39c5158Smillert
3b39c5158Smillertuse strict;
4eac174f2Safresh1use warnings;
5b39c5158Smillert
6*e0680481Safresh1our $VERSION = '2.58_01';
7b39c5158Smillert
8b39c5158Smillertrequire Exporter;
9b39c5158Smillert*import = \&Exporter::import;
10eac174f2Safresh1our @EXPORT_OK = qw(md5 md5_hex md5_base64);
11b39c5158Smillert
12eac174f2Safresh1our @ISA;
13b39c5158Smillerteval {
14b39c5158Smillert    require Digest::base;
15eac174f2Safresh1    @ISA = qw/Digest::base/;
16b39c5158Smillert};
17b39c5158Smillertif ($@) {
18b39c5158Smillert    my $err = $@;
19b39c5158Smillert    *add_bits = sub { die $err };
20b39c5158Smillert}
21b39c5158Smillert
22b39c5158Smillert
23b39c5158Smillerteval {
24b39c5158Smillert    require XSLoader;
25b39c5158Smillert    XSLoader::load('Digest::MD5', $VERSION);
26b39c5158Smillert};
27b39c5158Smillertif ($@) {
28b39c5158Smillert    my $olderr = $@;
29b39c5158Smillert    eval {
30b39c5158Smillert	# Try to load the pure perl version
31b39c5158Smillert	require Digest::Perl::MD5;
32b39c5158Smillert
33b39c5158Smillert	Digest::Perl::MD5->import(qw(md5 md5_hex md5_base64));
3491f110e0Safresh1	unshift(@ISA, "Digest::Perl::MD5");  # make OO interface work
35b39c5158Smillert    };
36b39c5158Smillert    if ($@) {
37b39c5158Smillert	# restore the original error
38b39c5158Smillert	die $olderr;
39b39c5158Smillert    }
40b39c5158Smillert}
41b39c5158Smillertelse {
42b39c5158Smillert    *reset = \&new;
43b39c5158Smillert}
44b39c5158Smillert
45b39c5158Smillert1;
46b39c5158Smillert__END__
47b39c5158Smillert
48b39c5158Smillert=head1 NAME
49b39c5158Smillert
50b39c5158SmillertDigest::MD5 - Perl interface to the MD5 Algorithm
51b39c5158Smillert
52b39c5158Smillert=head1 SYNOPSIS
53b39c5158Smillert
54b39c5158Smillert # Functional style
55b39c5158Smillert use Digest::MD5 qw(md5 md5_hex md5_base64);
56b39c5158Smillert
57b39c5158Smillert $digest = md5($data);
58b39c5158Smillert $digest = md5_hex($data);
59b39c5158Smillert $digest = md5_base64($data);
60b39c5158Smillert
61b39c5158Smillert # OO style
62b39c5158Smillert use Digest::MD5;
63b39c5158Smillert
64b39c5158Smillert $ctx = Digest::MD5->new;
65b39c5158Smillert
66b39c5158Smillert $ctx->add($data);
6791f110e0Safresh1 $ctx->addfile($file_handle);
68b39c5158Smillert
69b39c5158Smillert $digest = $ctx->digest;
70b39c5158Smillert $digest = $ctx->hexdigest;
71b39c5158Smillert $digest = $ctx->b64digest;
72b39c5158Smillert
73b39c5158Smillert=head1 DESCRIPTION
74b39c5158Smillert
75b39c5158SmillertThe C<Digest::MD5> module allows you to use the RSA Data Security
76b39c5158SmillertInc. MD5 Message Digest algorithm from within Perl programs.  The
77b39c5158Smillertalgorithm takes as input a message of arbitrary length and produces as
78b39c5158Smillertoutput a 128-bit "fingerprint" or "message digest" of the input.
79b39c5158Smillert
80b39c5158SmillertNote that the MD5 algorithm is not as strong as it used to be.  It has
81b39c5158Smillertsince 2005 been easy to generate different messages that produce the
82b39c5158Smillertsame MD5 digest.  It still seems hard to generate messages that
83b39c5158Smillertproduce a given digest, but it is probably wise to move to stronger
84b39c5158Smillertalgorithms for applications that depend on the digest to uniquely identify
85b39c5158Smillerta message.
86b39c5158Smillert
87b39c5158SmillertThe C<Digest::MD5> module provide a procedural interface for simple
88b39c5158Smillertuse, as well as an object oriented interface that can handle messages
89b39c5158Smillertof arbitrary length and which can read files directly.
90b39c5158Smillert
91b39c5158Smillert=head1 FUNCTIONS
92b39c5158Smillert
93b39c5158SmillertThe following functions are provided by the C<Digest::MD5> module.
94b39c5158SmillertNone of these functions are exported by default.
95b39c5158Smillert
96b39c5158Smillert=over 4
97b39c5158Smillert
98b39c5158Smillert=item md5($data,...)
99b39c5158Smillert
100b39c5158SmillertThis function will concatenate all arguments, calculate the MD5 digest
101b39c5158Smillertof this "message", and return it in binary form.  The returned string
102b39c5158Smillertwill be 16 bytes long.
103b39c5158Smillert
104b39c5158SmillertThe result of md5("a", "b", "c") will be exactly the same as the
105b39c5158Smillertresult of md5("abc").
106b39c5158Smillert
107b39c5158Smillert=item md5_hex($data,...)
108b39c5158Smillert
109b39c5158SmillertSame as md5(), but will return the digest in hexadecimal form. The
110b39c5158Smillertlength of the returned string will be 32 and it will only contain
111b39c5158Smillertcharacters from this set: '0'..'9' and 'a'..'f'.
112b39c5158Smillert
113b39c5158Smillert=item md5_base64($data,...)
114b39c5158Smillert
115b39c5158SmillertSame as md5(), but will return the digest as a base64 encoded string.
116b39c5158SmillertThe length of the returned string will be 22 and it will only contain
117b39c5158Smillertcharacters from this set: 'A'..'Z', 'a'..'z', '0'..'9', '+' and
118b39c5158Smillert'/'.
119b39c5158Smillert
120b39c5158SmillertNote that the base64 encoded string returned is not padded to be a
121b39c5158Smillertmultiple of 4 bytes long.  If you want interoperability with other
122b39c5158Smillertbase64 encoded md5 digests you might want to append the redundant
123b39c5158Smillertstring "==" to the result.
124b39c5158Smillert
125b39c5158Smillert=back
126b39c5158Smillert
127b39c5158Smillert=head1 METHODS
128b39c5158Smillert
129b39c5158SmillertThe object oriented interface to C<Digest::MD5> is described in this
130b39c5158Smillertsection.  After a C<Digest::MD5> object has been created, you will add
131b39c5158Smillertdata to it and finally ask for the digest in a suitable format.  A
132b39c5158Smillertsingle object can be used to calculate multiple digests.
133b39c5158Smillert
134b39c5158SmillertThe following methods are provided:
135b39c5158Smillert
136b39c5158Smillert=over 4
137b39c5158Smillert
138b39c5158Smillert=item $md5 = Digest::MD5->new
139b39c5158Smillert
140b39c5158SmillertThe constructor returns a new C<Digest::MD5> object which encapsulate
141b39c5158Smillertthe state of the MD5 message-digest algorithm.
142b39c5158Smillert
143b39c5158SmillertIf called as an instance method (i.e. $md5->new) it will just reset the
144b39c5158Smillertstate the object to the state of a newly created object.  No new
145b39c5158Smillertobject is created in this case.
146b39c5158Smillert
147b39c5158Smillert=item $md5->reset
148b39c5158Smillert
149b39c5158SmillertThis is just an alias for $md5->new.
150b39c5158Smillert
151b39c5158Smillert=item $md5->clone
152b39c5158Smillert
153b39c5158SmillertThis a copy of the $md5 object. It is useful when you do not want to
154b39c5158Smillertdestroy the digests state, but need an intermediate value of the
155b39c5158Smillertdigest, e.g. when calculating digests iteratively on a continuous data
156b39c5158Smillertstream.  Example:
157b39c5158Smillert
158b39c5158Smillert    my $md5 = Digest::MD5->new;
159b39c5158Smillert    while (<>) {
160b39c5158Smillert	$md5->add($_);
161b39c5158Smillert	print "Line $.: ", $md5->clone->hexdigest, "\n";
162b39c5158Smillert    }
163b39c5158Smillert
164b39c5158Smillert=item $md5->add($data,...)
165b39c5158Smillert
166b39c5158SmillertThe $data provided as argument are appended to the message we
167b39c5158Smillertcalculate the digest for.  The return value is the $md5 object itself.
168b39c5158Smillert
169b39c5158SmillertAll these lines will have the same effect on the state of the $md5
170b39c5158Smillertobject:
171b39c5158Smillert
172b39c5158Smillert    $md5->add("a"); $md5->add("b"); $md5->add("c");
173b39c5158Smillert    $md5->add("a")->add("b")->add("c");
174b39c5158Smillert    $md5->add("a", "b", "c");
175b39c5158Smillert    $md5->add("abc");
176b39c5158Smillert
177b39c5158Smillert=item $md5->addfile($io_handle)
178b39c5158Smillert
179b39c5158SmillertThe $io_handle will be read until EOF and its content appended to the
180b39c5158Smillertmessage we calculate the digest for.  The return value is the $md5
181b39c5158Smillertobject itself.
182b39c5158Smillert
183b39c5158SmillertThe addfile() method will croak() if it fails reading data for some
184b39c5158Smillertreason.  If it croaks it is unpredictable what the state of the $md5
185b39c5158Smillertobject will be in. The addfile() method might have been able to read
186b39c5158Smillertthe file partially before it failed.  It is probably wise to discard
187b39c5158Smillertor reset the $md5 object if this occurs.
188b39c5158Smillert
189b39c5158SmillertIn most cases you want to make sure that the $io_handle is in
190b39c5158SmillertC<binmode> before you pass it as argument to the addfile() method.
191b39c5158Smillert
192b39c5158Smillert=item $md5->add_bits($data, $nbits)
193b39c5158Smillert
194b39c5158Smillert=item $md5->add_bits($bitstring)
195b39c5158Smillert
196b39c5158SmillertSince the MD5 algorithm is byte oriented you might only add bits as
197b39c5158Smillertmultiples of 8, so you probably want to just use add() instead.  The
198b39c5158Smillertadd_bits() method is provided for compatibility with other digest
199b39c5158Smillertimplementations.  See L<Digest> for description of the arguments
200b39c5158Smillertthat add_bits() take.
201b39c5158Smillert
202b39c5158Smillert=item $md5->digest
203b39c5158Smillert
204b39c5158SmillertReturn the binary digest for the message.  The returned string will be
205b39c5158Smillert16 bytes long.
206b39c5158Smillert
207b39c5158SmillertNote that the C<digest> operation is effectively a destructive,
208b39c5158Smillertread-once operation. Once it has been performed, the C<Digest::MD5>
209b39c5158Smillertobject is automatically C<reset> and can be used to calculate another
210b39c5158Smillertdigest value.  Call $md5->clone->digest if you want to calculate the
211b39c5158Smillertdigest without resetting the digest state.
212b39c5158Smillert
213b39c5158Smillert=item $md5->hexdigest
214b39c5158Smillert
215b39c5158SmillertSame as $md5->digest, but will return the digest in hexadecimal
216b39c5158Smillertform. The length of the returned string will be 32 and it will only
217b39c5158Smillertcontain characters from this set: '0'..'9' and 'a'..'f'.
218b39c5158Smillert
219b39c5158Smillert=item $md5->b64digest
220b39c5158Smillert
221b39c5158SmillertSame as $md5->digest, but will return the digest as a base64 encoded
222b39c5158Smillertstring.  The length of the returned string will be 22 and it will only
223b39c5158Smillertcontain characters from this set: 'A'..'Z', 'a'..'z', '0'..'9', '+'
224b39c5158Smillertand '/'.
225b39c5158Smillert
226b39c5158Smillert
227b39c5158SmillertThe base64 encoded string returned is not padded to be a multiple of 4
228b39c5158Smillertbytes long.  If you want interoperability with other base64 encoded
229b39c5158Smillertmd5 digests you might want to append the string "==" to the result.
230b39c5158Smillert
231b8851fccSafresh1=item @ctx = $md5->context
232b8851fccSafresh1
233b8851fccSafresh1=item $md5->context(@ctx)
234b8851fccSafresh1
235eac174f2Safresh1Saves or restores the internal state.
236eac174f2Safresh1When called with no arguments, returns a list:
237eac174f2Safresh1number of blocks processed,
238eac174f2Safresh1a 16-byte internal state buffer,
239eac174f2Safresh1then optionally up to 63 bytes of unprocessed data if there are any.
240eac174f2Safresh1When passed those same arguments, restores the state.
241eac174f2Safresh1This is only useful for specialised operations.
242b8851fccSafresh1
243b39c5158Smillert=back
244b39c5158Smillert
245b39c5158Smillert
246b39c5158Smillert=head1 EXAMPLES
247b39c5158Smillert
248b39c5158SmillertThe simplest way to use this library is to import the md5_hex()
249b39c5158Smillertfunction (or one of its cousins):
250b39c5158Smillert
251b39c5158Smillert    use Digest::MD5 qw(md5_hex);
252b39c5158Smillert    print "Digest is ", md5_hex("foobarbaz"), "\n";
253b39c5158Smillert
254b39c5158SmillertThe above example would print out the message:
255b39c5158Smillert
256b39c5158Smillert    Digest is 6df23dc03f9b54cc38a0fc1483df6e21
257b39c5158Smillert
258b39c5158SmillertThe same checksum can also be calculated in OO style:
259b39c5158Smillert
260b39c5158Smillert    use Digest::MD5;
261b39c5158Smillert
262b39c5158Smillert    $md5 = Digest::MD5->new;
263b39c5158Smillert    $md5->add('foo', 'bar');
264b39c5158Smillert    $md5->add('baz');
265b39c5158Smillert    $digest = $md5->hexdigest;
266b39c5158Smillert
267b39c5158Smillert    print "Digest is $digest\n";
268b39c5158Smillert
26991f110e0Safresh1With OO style, you can break the message arbitrarily.  This means that we
270b39c5158Smillertare no longer limited to have space for the whole message in memory, i.e.
271b39c5158Smillertwe can handle messages of any size.
272b39c5158Smillert
273b39c5158SmillertThis is useful when calculating checksum for files:
274b39c5158Smillert
275b39c5158Smillert    use Digest::MD5;
276b39c5158Smillert
27791f110e0Safresh1    my $filename = shift || "/etc/passwd";
27891f110e0Safresh1    open (my $fh, '<', $filename) or die "Can't open '$filename': $!";
27991f110e0Safresh1    binmode($fh);
280b39c5158Smillert
281b39c5158Smillert    $md5 = Digest::MD5->new;
28291f110e0Safresh1    while (<$fh>) {
283b39c5158Smillert        $md5->add($_);
284b39c5158Smillert    }
28591f110e0Safresh1    close($fh);
28691f110e0Safresh1    print $md5->b64digest, " $filename\n";
287b39c5158Smillert
288b39c5158SmillertOr we can use the addfile method for more efficient reading of
289b39c5158Smillertthe file:
290b39c5158Smillert
291b39c5158Smillert    use Digest::MD5;
292b39c5158Smillert
29391f110e0Safresh1    my $filename = shift || "/etc/passwd";
29491f110e0Safresh1    open (my $fh, '<', $filename) or die "Can't open '$filename': $!";
29591f110e0Safresh1    binmode ($fh);
296b39c5158Smillert
29791f110e0Safresh1    print Digest::MD5->new->addfile($fh)->hexdigest, " $filename\n";
298b39c5158Smillert
2996fb12b70Safresh1Since the MD5 algorithm is only defined for strings of bytes, it can not be
3006fb12b70Safresh1used on strings that contains chars with ordinal number above 255 (Unicode
3016fb12b70Safresh1strings).  The MD5 functions and methods will croak if you try to feed them
3026fb12b70Safresh1such input data:
303b39c5158Smillert
304b39c5158Smillert    use Digest::MD5 qw(md5_hex);
305b39c5158Smillert
306b39c5158Smillert    my $str = "abc\x{300}";
307b39c5158Smillert    print md5_hex($str), "\n";  # croaks
308b39c5158Smillert    # Wide character in subroutine entry
309b39c5158Smillert
310b39c5158SmillertWhat you can do is calculate the MD5 checksum of the UTF-8
311b39c5158Smillertrepresentation of such strings.  This is achieved by filtering the
312b39c5158Smillertstring through encode_utf8() function:
313b39c5158Smillert
314b39c5158Smillert    use Digest::MD5 qw(md5_hex);
315b39c5158Smillert    use Encode qw(encode_utf8);
316b39c5158Smillert
317b39c5158Smillert    my $str = "abc\x{300}";
318b39c5158Smillert    print md5_hex(encode_utf8($str)), "\n";
319b39c5158Smillert    # 8c2d46911f3f5a326455f0ed7a8ed3b3
320b39c5158Smillert
321b39c5158Smillert=head1 SEE ALSO
322b39c5158Smillert
323b39c5158SmillertL<Digest>,
324b39c5158SmillertL<Digest::MD2>,
325b39c5158SmillertL<Digest::SHA>,
326b39c5158SmillertL<Digest::HMAC>
327b39c5158Smillert
328b39c5158SmillertL<md5sum(1)>
329b39c5158Smillert
330b39c5158SmillertRFC 1321
331b39c5158Smillert
332b39c5158Smillerthttp://en.wikipedia.org/wiki/MD5
333b39c5158Smillert
334b39c5158SmillertThe paper "How to Break MD5 and Other Hash Functions" by Xiaoyun Wang
335b39c5158Smillertand Hongbo Yu.
336b39c5158Smillert
337b39c5158Smillert=head1 COPYRIGHT
338b39c5158Smillert
339b39c5158SmillertThis library is free software; you can redistribute it and/or
340b39c5158Smillertmodify it under the same terms as Perl itself.
341b39c5158Smillert
342b39c5158Smillert Copyright 1998-2003 Gisle Aas.
343b39c5158Smillert Copyright 1995-1996 Neil Winton.
344b39c5158Smillert Copyright 1991-1992 RSA Data Security, Inc.
345b39c5158Smillert
346b39c5158SmillertThe MD5 algorithm is defined in RFC 1321. This implementation is
347b39c5158Smillertderived from the reference C code in RFC 1321 which is covered by
348b39c5158Smillertthe following copyright statement:
349b39c5158Smillert
350b39c5158Smillert=over 4
351b39c5158Smillert
352b39c5158Smillert=item
353b39c5158Smillert
354b39c5158SmillertCopyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
355b39c5158Smillertrights reserved.
356b39c5158Smillert
357b39c5158SmillertLicense to copy and use this software is granted provided that it
358b39c5158Smillertis identified as the "RSA Data Security, Inc. MD5 Message-Digest
359b39c5158SmillertAlgorithm" in all material mentioning or referencing this software
360b39c5158Smillertor this function.
361b39c5158Smillert
362b39c5158SmillertLicense is also granted to make and use derivative works provided
363b39c5158Smillertthat such works are identified as "derived from the RSA Data
364b39c5158SmillertSecurity, Inc. MD5 Message-Digest Algorithm" in all material
365b39c5158Smillertmentioning or referencing the derived work.
366b39c5158Smillert
367b39c5158SmillertRSA Data Security, Inc. makes no representations concerning either
368b39c5158Smillertthe merchantability of this software or the suitability of this
369b39c5158Smillertsoftware for any particular purpose. It is provided "as is"
370b39c5158Smillertwithout express or implied warranty of any kind.
371b39c5158Smillert
372b39c5158SmillertThese notices must be retained in any copies of any part of this
373b39c5158Smillertdocumentation and/or software.
374b39c5158Smillert
375b39c5158Smillert=back
376b39c5158Smillert
377b39c5158SmillertThis copyright does not prohibit distribution of any version of Perl
378b39c5158Smillertcontaining this extension under the terms of the GNU or Artistic
379b39c5158Smillertlicenses.
380b39c5158Smillert
381b39c5158Smillert=head1 AUTHORS
382b39c5158Smillert
383b39c5158SmillertThe original C<MD5> interface was written by Neil Winton
384b39c5158Smillert(C<N.Winton@axion.bt.co.uk>).
385b39c5158Smillert
386b39c5158SmillertThe C<Digest::MD5> module is written by Gisle Aas <gisle@ActiveState.com>.
387b39c5158Smillert
388b39c5158Smillert=cut
389