xref: /openbsd-src/gnu/usr.bin/perl/cpan/File-Fetch/lib/File/Fetch.pm (revision eac174f2741a08d8deb8aae59a7f778ef9b5d770)
1b39c5158Smillertpackage File::Fetch;
2b39c5158Smillert
3b39c5158Smillertuse strict;
4b39c5158Smillertuse FileHandle;
5b39c5158Smillertuse File::Temp;
6b39c5158Smillertuse File::Copy;
7b39c5158Smillertuse File::Spec;
8b39c5158Smillertuse File::Spec::Unix;
9b39c5158Smillertuse File::Basename              qw[dirname];
10b39c5158Smillert
11b39c5158Smillertuse Cwd                         qw[cwd];
12b39c5158Smillertuse Carp                        qw[carp];
13b39c5158Smillertuse IPC::Cmd                    qw[can_run run QUOTE];
14b39c5158Smillertuse File::Path                  qw[mkpath];
15b39c5158Smillertuse File::Temp                  qw[tempdir];
16b39c5158Smillertuse Params::Check               qw[check];
17b39c5158Smillertuse Module::Load::Conditional   qw[can_load];
18b39c5158Smillertuse Locale::Maketext::Simple    Style => 'gettext';
19b39c5158Smillert
20b39c5158Smillertuse vars    qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
21b39c5158Smillert                $BLACKLIST $METHOD_FAIL $VERSION $METHODS
226fb12b70Safresh1                $FTP_PASSIVE $TIMEOUT $DEBUG $WARN $FORCEIPV4
23b39c5158Smillert            ];
24b39c5158Smillert
25*eac174f2Safresh1$VERSION        = '1.04';
26b39c5158Smillert$VERSION        = eval $VERSION;    # avoid warnings with development releases
27b39c5158Smillert$PREFER_BIN     = 0;                # XXX TODO implement
28b39c5158Smillert$FROM_EMAIL     = 'File-Fetch@example.com';
29b39c5158Smillert$USER_AGENT     = "File::Fetch/$VERSION";
30b39c5158Smillert$BLACKLIST      = [qw|ftp|];
319f11ffb7Safresh1push @$BLACKLIST, qw|lftp| if $^O eq 'dragonfly' || $^O eq 'hpux';
32b39c5158Smillert$METHOD_FAIL    = { };
33b39c5158Smillert$FTP_PASSIVE    = 1;
34b39c5158Smillert$TIMEOUT        = 0;
35b39c5158Smillert$DEBUG          = 0;
36b39c5158Smillert$WARN           = 1;
376fb12b70Safresh1$FORCEIPV4      = 0;
38b39c5158Smillert
39b39c5158Smillert### methods available to fetch the file depending on the scheme
40b39c5158Smillert$METHODS = {
41898184e3Ssthen    http    => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ],
429f11ffb7Safresh1    https   => [ qw|lwp wget curl| ],
43898184e3Ssthen    ftp     => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ],
44b39c5158Smillert    file    => [ qw|lwp lftp file| ],
456fb12b70Safresh1    rsync   => [ qw|rsync| ],
466fb12b70Safresh1    git     => [ qw|git| ],
47b39c5158Smillert};
48b39c5158Smillert
49b39c5158Smillert### silly warnings ###
50b39c5158Smillertlocal $Params::Check::VERBOSE               = 1;
51b39c5158Smillertlocal $Params::Check::VERBOSE               = 1;
52b39c5158Smillertlocal $Module::Load::Conditional::VERBOSE   = 0;
53b39c5158Smillertlocal $Module::Load::Conditional::VERBOSE   = 0;
54b39c5158Smillert
55b39c5158Smillert### see what OS we are on, important for file:// uris ###
56b39c5158Smillertuse constant ON_WIN     => ($^O eq 'MSWin32');
57b39c5158Smillertuse constant ON_VMS     => ($^O eq 'VMS');
58b39c5158Smillertuse constant ON_UNIX    => (!ON_WIN);
59b39c5158Smillertuse constant HAS_VOL    => (ON_WIN);
60b39c5158Smillertuse constant HAS_SHARE  => (ON_WIN);
61898184e3Ssthenuse constant HAS_FETCH  => ( $^O =~ m!^(freebsd|netbsd|dragonfly)$! );
62b39c5158Smillert
63b39c5158Smillert=pod
64b39c5158Smillert
65b39c5158Smillert=head1 NAME
66b39c5158Smillert
67b39c5158SmillertFile::Fetch - A generic file fetching mechanism
68b39c5158Smillert
69b39c5158Smillert=head1 SYNOPSIS
70b39c5158Smillert
71b39c5158Smillert    use File::Fetch;
72b39c5158Smillert
73b39c5158Smillert    ### build a File::Fetch object ###
74b39c5158Smillert    my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
75b39c5158Smillert
76b39c5158Smillert    ### fetch the uri to cwd() ###
77b39c5158Smillert    my $where = $ff->fetch() or die $ff->error;
78b39c5158Smillert
79b39c5158Smillert    ### fetch the uri to /tmp ###
80b39c5158Smillert    my $where = $ff->fetch( to => '/tmp' );
81b39c5158Smillert
82b39c5158Smillert    ### parsed bits from the uri ###
83b39c5158Smillert    $ff->uri;
84b39c5158Smillert    $ff->scheme;
85b39c5158Smillert    $ff->host;
86b39c5158Smillert    $ff->path;
87b39c5158Smillert    $ff->file;
88b39c5158Smillert
89b39c5158Smillert=head1 DESCRIPTION
90b39c5158Smillert
91b39c5158SmillertFile::Fetch is a generic file fetching mechanism.
92b39c5158Smillert
93b39c5158SmillertIt allows you to fetch any file pointed to by a C<ftp>, C<http>,
946fb12b70Safresh1C<file>, C<git> or C<rsync> uri by a number of different means.
95b39c5158Smillert
96b39c5158SmillertSee the C<HOW IT WORKS> section further down for details.
97b39c5158Smillert
98b39c5158Smillert=head1 ACCESSORS
99b39c5158Smillert
100b39c5158SmillertA C<File::Fetch> object has the following accessors
101b39c5158Smillert
102b39c5158Smillert=over 4
103b39c5158Smillert
104b39c5158Smillert=item $ff->uri
105b39c5158Smillert
106b39c5158SmillertThe uri you passed to the constructor
107b39c5158Smillert
108b39c5158Smillert=item $ff->scheme
109b39c5158Smillert
110b39c5158SmillertThe scheme from the uri (like 'file', 'http', etc)
111b39c5158Smillert
112b39c5158Smillert=item $ff->host
113b39c5158Smillert
114b39c5158SmillertThe hostname in the uri.  Will be empty if host was originally
115b39c5158Smillert'localhost' for a 'file://' url.
116b39c5158Smillert
117b39c5158Smillert=item $ff->vol
118b39c5158Smillert
119b39c5158SmillertOn operating systems with the concept of a volume the second element
120b39c5158Smillertof a file:// is considered to the be volume specification for the file.
121b39c5158SmillertThus on Win32 this routine returns the volume, on other operating
122b39c5158Smillertsystems this returns nothing.
123b39c5158Smillert
124b39c5158SmillertOn Windows this value may be empty if the uri is to a network share, in
125b39c5158Smillertwhich case the 'share' property will be defined. Additionally, volume
126b39c5158Smillertspecifications that use '|' as ':' will be converted on read to use ':'.
127b39c5158Smillert
128b39c5158SmillertOn VMS, which has a volume concept, this field will be empty because VMS
129b39c5158Smillertfile specifications are converted to absolute UNIX format and the volume
130b39c5158Smillertinformation is transparently included.
131b39c5158Smillert
132b39c5158Smillert=item $ff->share
133b39c5158Smillert
134b39c5158SmillertOn systems with the concept of a network share (currently only Windows) returns
135b39c5158Smillertthe sharename from a file://// url.  On other operating systems returns empty.
136b39c5158Smillert
137b39c5158Smillert=item $ff->path
138b39c5158Smillert
139b39c5158SmillertThe path from the uri, will be at least a single '/'.
140b39c5158Smillert
141b39c5158Smillert=item $ff->file
142b39c5158Smillert
143b39c5158SmillertThe name of the remote file. For the local file name, the
144b39c5158Smillertresult of $ff->output_file will be used.
145b39c5158Smillert
14691f110e0Safresh1=item $ff->file_default
14791f110e0Safresh1
14891f110e0Safresh1The name of the default local file, that $ff->output_file falls back to if
14991f110e0Safresh1it would otherwise return no filename. For example when fetching a URI like
15091f110e0Safresh1http://www.abc.net.au/ the contents retrieved may be from a remote file called
15191f110e0Safresh1'index.html'. The default value of this attribute is literally 'file_default'.
15291f110e0Safresh1
153b39c5158Smillert=cut
154b39c5158Smillert
155b39c5158Smillert
156b39c5158Smillert##########################
157b39c5158Smillert### Object & Accessors ###
158b39c5158Smillert##########################
159b39c5158Smillert
160b39c5158Smillert{
161b39c5158Smillert    ### template for autogenerated accessors ###
162b39c5158Smillert    my $Tmpl = {
163b39c5158Smillert        scheme          => { default => 'http' },
164b39c5158Smillert        host            => { default => 'localhost' },
165b39c5158Smillert        path            => { default => '/' },
166b39c5158Smillert        file            => { required => 1 },
167b39c5158Smillert        uri             => { required => 1 },
1689f11ffb7Safresh1        userinfo        => { default => '' },
169b39c5158Smillert        vol             => { default => '' }, # windows for file:// uris
170b39c5158Smillert        share           => { default => '' }, # windows for file:// uris
17191f110e0Safresh1        file_default    => { default => 'file_default' },
17291f110e0Safresh1        tempdir_root    => { required => 1 }, # Should be lazy-set at ->new()
173b39c5158Smillert        _error_msg      => { no_override => 1 },
174b39c5158Smillert        _error_msg_long => { no_override => 1 },
175b39c5158Smillert    };
176b39c5158Smillert
177b39c5158Smillert    for my $method ( keys %$Tmpl ) {
178b39c5158Smillert        no strict 'refs';
179b39c5158Smillert        *$method = sub {
180b39c5158Smillert                        my $self = shift;
181b39c5158Smillert                        $self->{$method} = $_[0] if @_;
182b39c5158Smillert                        return $self->{$method};
183b39c5158Smillert                    }
184b39c5158Smillert    }
185b39c5158Smillert
186b39c5158Smillert    sub _create {
187b39c5158Smillert        my $class = shift;
188b39c5158Smillert        my %hash  = @_;
189b39c5158Smillert
190b39c5158Smillert        my $args = check( $Tmpl, \%hash ) or return;
191b39c5158Smillert
192b39c5158Smillert        bless $args, $class;
193b39c5158Smillert
194b39c5158Smillert        if( lc($args->scheme) ne 'file' and not $args->host ) {
195b39c5158Smillert            return $class->_error(loc(
196b39c5158Smillert                "Hostname required when fetching from '%1'",$args->scheme));
197b39c5158Smillert        }
198b39c5158Smillert
19991f110e0Safresh1        for (qw[path]) {
200b39c5158Smillert            unless( $args->$_() ) { # 5.5.x needs the ()
201b39c5158Smillert                return $class->_error(loc("No '%1' specified",$_));
202b39c5158Smillert            }
203b39c5158Smillert        }
204b39c5158Smillert
205b39c5158Smillert        return $args;
206b39c5158Smillert    }
207b39c5158Smillert}
208b39c5158Smillert
209b39c5158Smillert=item $ff->output_file
210b39c5158Smillert
211b39c5158SmillertThe name of the output file. This is the same as $ff->file,
212b39c5158Smillertbut any query parameters are stripped off. For example:
213b39c5158Smillert
214b39c5158Smillert    http://example.com/index.html?x=y
215b39c5158Smillert
216b39c5158Smillertwould make the output file be C<index.html> rather than
217b39c5158SmillertC<index.html?x=y>.
218b39c5158Smillert
219b39c5158Smillert=back
220b39c5158Smillert
221b39c5158Smillert=cut
222b39c5158Smillert
223b39c5158Smillertsub output_file {
224b39c5158Smillert    my $self = shift;
225b39c5158Smillert    my $file = $self->file;
226b39c5158Smillert
227b39c5158Smillert    $file =~ s/\?.*$//g;
228b39c5158Smillert
22991f110e0Safresh1    $file ||= $self->file_default;
23091f110e0Safresh1
231b39c5158Smillert    return $file;
232b39c5158Smillert}
233b39c5158Smillert
234b39c5158Smillert### XXX do this or just point to URI::Escape?
235b39c5158Smillert# =head2 $esc_uri = $ff->escaped_uri
236b39c5158Smillert#
237b39c5158Smillert# =cut
238b39c5158Smillert#
239b39c5158Smillert# ### most of this is stolen straight from URI::escape
240b39c5158Smillert# {   ### Build a char->hex map
241b39c5158Smillert#     my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
242b39c5158Smillert#
243b39c5158Smillert#     sub escaped_uri {
244b39c5158Smillert#         my $self = shift;
245b39c5158Smillert#         my $uri  = $self->uri;
246b39c5158Smillert#
247b39c5158Smillert#         ### Default unsafe characters.  RFC 2732 ^(uric - reserved)
248b39c5158Smillert#         $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
249b39c5158Smillert#                     $escapes{$1} || $self->_fail_hi($1)/ge;
250b39c5158Smillert#
251b39c5158Smillert#         return $uri;
252b39c5158Smillert#     }
253b39c5158Smillert#
254b39c5158Smillert#     sub _fail_hi {
255b39c5158Smillert#         my $self = shift;
256b39c5158Smillert#         my $char = shift;
257b39c5158Smillert#
258b39c5158Smillert#         $self->_error(loc(
259b39c5158Smillert#             "Can't escape '%1', try using the '%2' module instead",
260b39c5158Smillert#             sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
261b39c5158Smillert#         ));
262b39c5158Smillert#     }
263b39c5158Smillert#
264b39c5158Smillert#     sub output_file {
265b39c5158Smillert#
266b39c5158Smillert#     }
267b39c5158Smillert#
268b39c5158Smillert#
269b39c5158Smillert# }
270b39c5158Smillert
271b39c5158Smillert=head1 METHODS
272b39c5158Smillert
273b39c5158Smillert=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
274b39c5158Smillert
275b39c5158SmillertParses the uri and creates a corresponding File::Fetch::Item object,
276b39c5158Smillertthat is ready to be C<fetch>ed and returns it.
277b39c5158Smillert
278b39c5158SmillertReturns false on failure.
279b39c5158Smillert
280b39c5158Smillert=cut
281b39c5158Smillert
282b39c5158Smillertsub new {
283b39c5158Smillert    my $class = shift;
284b39c5158Smillert    my %hash  = @_;
285b39c5158Smillert
28691f110e0Safresh1    my ($uri, $file_default, $tempdir_root);
287b39c5158Smillert    my $tmpl = {
288b39c5158Smillert        uri          => { required => 1, store => \$uri },
28991f110e0Safresh1        file_default => { required => 0, store => \$file_default },
29091f110e0Safresh1        tempdir_root => { required => 0, store => \$tempdir_root },
291b39c5158Smillert    };
292b39c5158Smillert
293b39c5158Smillert    check( $tmpl, \%hash ) or return;
294b39c5158Smillert
295b39c5158Smillert    ### parse the uri to usable parts ###
296b39c5158Smillert    my $href    = $class->_parse_uri( $uri ) or return;
297b39c5158Smillert
29891f110e0Safresh1    $href->{file_default} = $file_default if $file_default;
29991f110e0Safresh1    $href->{tempdir_root} = File::Spec->rel2abs( $tempdir_root ) if $tempdir_root;
30091f110e0Safresh1    $href->{tempdir_root} = File::Spec->rel2abs( Cwd::cwd      ) if not $href->{tempdir_root};
30191f110e0Safresh1
302b39c5158Smillert    ### make it into a FFI object ###
303b39c5158Smillert    my $ff      = $class->_create( %$href ) or return;
304b39c5158Smillert
305b39c5158Smillert
306b39c5158Smillert    ### return the object ###
307b39c5158Smillert    return $ff;
308b39c5158Smillert}
309b39c5158Smillert
310b39c5158Smillert### parses an uri to a hash structure:
311b39c5158Smillert###
312b39c5158Smillert### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
313b39c5158Smillert###
314b39c5158Smillert### becomes:
315b39c5158Smillert###
316b39c5158Smillert### $href = {
317b39c5158Smillert###     scheme  => 'ftp',
318b39c5158Smillert###     host    => 'ftp.cpan.org',
319b39c5158Smillert###     path    => '/pub/mirror',
320b39c5158Smillert###     file    => 'index.html'
321b39c5158Smillert### };
322b39c5158Smillert###
323b39c5158Smillert### In the case of file:// urls there maybe be additional fields
324b39c5158Smillert###
325b39c5158Smillert### For systems with volume specifications such as Win32 there will be
326b39c5158Smillert### a volume specifier provided in the 'vol' field.
327b39c5158Smillert###
328b39c5158Smillert###   'vol' => 'volumename'
329b39c5158Smillert###
330b39c5158Smillert### For windows file shares there may be a 'share' key specified
331b39c5158Smillert###
332b39c5158Smillert###   'share' => 'sharename'
333b39c5158Smillert###
334b39c5158Smillert### Note that the rules of what a file:// url means vary by the operating system
335b39c5158Smillert### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
336b39c5158Smillert### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
337b39c5158Smillert### not '/foo/bar.txt'
338b39c5158Smillert###
339b39c5158Smillert### Similarly if the host interpreting the url is VMS then
340b39c5158Smillert### file:///disk$user/my/notes/note12345.txt' means
341b39c5158Smillert### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
342b39c5158Smillert### if it is unix where it means /disk$user/my/notes/note12345.txt'.
343b39c5158Smillert### Except for some cases in the File::Spec methods, Perl on VMS will generally
344b39c5158Smillert### handle UNIX format file specifications.
345b39c5158Smillert###
346b39c5158Smillert### This means it is impossible to serve certain file:// urls on certain systems.
347b39c5158Smillert###
348b39c5158Smillert### Thus are the problems with a protocol-less specification. :-(
349b39c5158Smillert###
350b39c5158Smillert
351b39c5158Smillertsub _parse_uri {
352b39c5158Smillert    my $self = shift;
353b39c5158Smillert    my $uri  = shift or return;
354b39c5158Smillert
355b39c5158Smillert    my $href = { uri => $uri };
356b39c5158Smillert
357b39c5158Smillert    ### find the scheme ###
358b39c5158Smillert    $uri            =~ s|^(\w+)://||;
359b39c5158Smillert    $href->{scheme} = $1;
360b39c5158Smillert
361b39c5158Smillert    ### See rfc 1738 section 3.10
362*eac174f2Safresh1    ### https://datatracker.ietf.org/doc/html/rfc1738#section-3.10
363b39c5158Smillert    ### And wikipedia for more on windows file:// urls
364b39c5158Smillert    ### http://en.wikipedia.org/wiki/File://
365b39c5158Smillert    if( $href->{scheme} eq 'file' ) {
366b39c5158Smillert
367b39c5158Smillert        my @parts = split '/',$uri;
368b39c5158Smillert
369b39c5158Smillert        ### file://hostname/...
370b39c5158Smillert        ### file://hostname/...
371b39c5158Smillert        ### normalize file://localhost with file:///
372b39c5158Smillert        $href->{host} = $parts[0] || '';
373b39c5158Smillert
374b39c5158Smillert        ### index in @parts where the path components begin;
375b39c5158Smillert        my $index = 1;
376b39c5158Smillert
377b39c5158Smillert        ### file:////hostname/sharename/blah.txt
378b39c5158Smillert        if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
379b39c5158Smillert
380b39c5158Smillert            $href->{host}   = $parts[2] || '';  # avoid warnings
381b39c5158Smillert            $href->{share}  = $parts[3] || '';  # avoid warnings
382b39c5158Smillert
383b39c5158Smillert            $index          = 4         # index after the share
384b39c5158Smillert
385b39c5158Smillert        ### file:///D|/blah.txt
386b39c5158Smillert        ### file:///D:/blah.txt
387b39c5158Smillert        } elsif (HAS_VOL) {
388b39c5158Smillert
389b39c5158Smillert            ### this code comes from dmq's patch, but:
390b39c5158Smillert            ### XXX if volume is empty, wouldn't that be an error? --kane
391b39c5158Smillert            ### if so, our file://localhost test needs to be fixed as wel
392b39c5158Smillert            $href->{vol}    = $parts[1] || '';
393b39c5158Smillert
394b39c5158Smillert            ### correct D| style colume descriptors
395b39c5158Smillert            $href->{vol}    =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
396b39c5158Smillert
397b39c5158Smillert            $index          = 2;        # index after the volume
398b39c5158Smillert        }
399b39c5158Smillert
400b39c5158Smillert        ### rebuild the path from the leftover parts;
401b39c5158Smillert        $href->{path} = join '/', '', splice( @parts, $index, $#parts );
402b39c5158Smillert
403b39c5158Smillert    } else {
404b39c5158Smillert        ### using anything but qw() in hash slices may produce warnings
405b39c5158Smillert        ### in older perls :-(
4069f11ffb7Safresh1        @{$href}{ qw(userinfo host path) } = $uri =~ m|(?:([^\@:]*:[^\:\@]*)@)?([^/]*)(/.*)$|s;
407b39c5158Smillert    }
408b39c5158Smillert
409b39c5158Smillert    ### split the path into file + dir ###
410b39c5158Smillert    {   my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
411b39c5158Smillert        $href->{path} = $parts[1];
412b39c5158Smillert        $href->{file} = $parts[2];
413b39c5158Smillert    }
414b39c5158Smillert
415b39c5158Smillert    ### host will be empty if the target was 'localhost' and the
416b39c5158Smillert    ### scheme was 'file'
417b39c5158Smillert    $href->{host} = '' if   ($href->{host}      eq 'localhost') and
418b39c5158Smillert                            ($href->{scheme}    eq 'file');
419b39c5158Smillert
420b39c5158Smillert    return $href;
421b39c5158Smillert}
422b39c5158Smillert
423b39c5158Smillert=head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] )
424b39c5158Smillert
425b39c5158SmillertFetches the file you requested and returns the full path to the file.
426b39c5158Smillert
427b39c5158SmillertBy default it writes to C<cwd()>, but you can override that by specifying
428b39c5158Smillertthe C<to> argument:
429b39c5158Smillert
430b39c5158Smillert    ### file fetch to /tmp, full path to the file in $where
431b39c5158Smillert    $where = $ff->fetch( to => '/tmp' );
432b39c5158Smillert
433b39c5158Smillert    ### file slurped into $scalar, full path to the file in $where
434b39c5158Smillert    ### file is downloaded to a temp directory and cleaned up at exit time
435b39c5158Smillert    $where = $ff->fetch( to => \$scalar );
436b39c5158Smillert
437b39c5158SmillertReturns the full path to the downloaded file on success, and false
438b39c5158Smillerton failure.
439b39c5158Smillert
440b39c5158Smillert=cut
441b39c5158Smillert
442b39c5158Smillertsub fetch {
443b39c5158Smillert    my $self = shift or return;
444b39c5158Smillert    my %hash = @_;
445b39c5158Smillert
446b39c5158Smillert    my $target;
447b39c5158Smillert    my $tmpl = {
448b39c5158Smillert        to  => { default => cwd(), store => \$target },
449b39c5158Smillert    };
450b39c5158Smillert
451b39c5158Smillert    check( $tmpl, \%hash ) or return;
452b39c5158Smillert
453b39c5158Smillert    my ($to, $fh);
454b39c5158Smillert    ### you want us to slurp the contents
455b39c5158Smillert    if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
45691f110e0Safresh1        $to = tempdir( 'FileFetch.XXXXXX', DIR => $self->tempdir_root, CLEANUP => 1 );
457b39c5158Smillert
458b39c5158Smillert    ### plain old fetch
459b39c5158Smillert    } else {
460b39c5158Smillert        $to = $target;
461b39c5158Smillert
462b39c5158Smillert        ### On VMS force to VMS format so File::Spec will work.
463b39c5158Smillert        $to = VMS::Filespec::vmspath($to) if ON_VMS;
464b39c5158Smillert
465b39c5158Smillert        ### create the path if it doesn't exist yet ###
466b39c5158Smillert        unless( -d $to ) {
467b39c5158Smillert            eval { mkpath( $to ) };
468b39c5158Smillert
469b39c5158Smillert            return $self->_error(loc("Could not create path '%1'",$to)) if $@;
470b39c5158Smillert        }
471b39c5158Smillert    }
472b39c5158Smillert
473b39c5158Smillert    ### set passive ftp if required ###
474b39c5158Smillert    local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
475b39c5158Smillert
476b39c5158Smillert    ### we dont use catfile on win32 because if we are using a cygwin tool
477b39c5158Smillert    ### under cmd.exe they wont understand windows style separators.
478b39c5158Smillert    my $out_to = ON_WIN ? $to.'/'.$self->output_file
479b39c5158Smillert                        : File::Spec->catfile( $to, $self->output_file );
480b39c5158Smillert
481b39c5158Smillert    for my $method ( @{ $METHODS->{$self->scheme} } ) {
482b39c5158Smillert        my $sub =  '_'.$method.'_fetch';
483b39c5158Smillert
484b39c5158Smillert        unless( __PACKAGE__->can($sub) ) {
485b39c5158Smillert            $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
486b39c5158Smillert                        $method));
487b39c5158Smillert            next;
488b39c5158Smillert        }
489b39c5158Smillert
490b39c5158Smillert        ### method is blacklisted ###
491b39c5158Smillert        next if grep { lc $_ eq $method } @$BLACKLIST;
492b39c5158Smillert
493b39c5158Smillert        ### method is known to fail ###
494b39c5158Smillert        next if $METHOD_FAIL->{$method};
495b39c5158Smillert
496b39c5158Smillert        ### there's serious issues with IPC::Run and quoting of command
497b39c5158Smillert        ### line arguments. using quotes in the wrong place breaks things,
498b39c5158Smillert        ### and in the case of say,
499b39c5158Smillert        ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
500b39c5158Smillert        ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
501b39c5158Smillert        ### it doesn't matter how you quote, it always fails.
502b39c5158Smillert        local $IPC::Cmd::USE_IPC_RUN = 0;
503b39c5158Smillert
504b39c5158Smillert        if( my $file = $self->$sub(
505b39c5158Smillert                        to => $out_to
506b39c5158Smillert        )){
507b39c5158Smillert
508b39c5158Smillert            unless( -e $file && -s _ ) {
509b39c5158Smillert                $self->_error(loc("'%1' said it fetched '%2', ".
510b39c5158Smillert                     "but it was not created",$method,$file));
511b39c5158Smillert
512b39c5158Smillert                ### mark the failure ###
513b39c5158Smillert                $METHOD_FAIL->{$method} = 1;
514b39c5158Smillert
515b39c5158Smillert                next;
516b39c5158Smillert
517b39c5158Smillert            } else {
518b39c5158Smillert
519b39c5158Smillert                ### slurp mode?
520b39c5158Smillert                if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
521b39c5158Smillert
522b39c5158Smillert                    ### open the file
523898184e3Ssthen                    open my $fh, "<$file" or do {
524b39c5158Smillert                        $self->_error(
525b39c5158Smillert                            loc("Could not open '%1': %2", $file, $!));
526b39c5158Smillert                        return;
527b39c5158Smillert                    };
528b39c5158Smillert
529b39c5158Smillert                    ### slurp
530b39c5158Smillert                    $$target = do { local $/; <$fh> };
531b39c5158Smillert
532b39c5158Smillert                }
533b39c5158Smillert
534b39c5158Smillert                my $abs = File::Spec->rel2abs( $file );
535b39c5158Smillert                return $abs;
536b39c5158Smillert
537b39c5158Smillert            }
538b39c5158Smillert        }
539b39c5158Smillert    }
540b39c5158Smillert
541b39c5158Smillert
542b39c5158Smillert    ### if we got here, we looped over all methods, but we weren't able
543b39c5158Smillert    ### to fetch it.
544b39c5158Smillert    return;
545b39c5158Smillert}
546b39c5158Smillert
547b39c5158Smillert########################
548b39c5158Smillert### _*_fetch methods ###
549b39c5158Smillert########################
550b39c5158Smillert
551b39c5158Smillert### LWP fetching ###
552b39c5158Smillertsub _lwp_fetch {
553b39c5158Smillert    my $self = shift;
554b39c5158Smillert    my %hash = @_;
555b39c5158Smillert
556b39c5158Smillert    my ($to);
557b39c5158Smillert    my $tmpl = {
558b39c5158Smillert        to  => { required => 1, store => \$to }
559b39c5158Smillert    };
560b39c5158Smillert    check( $tmpl, \%hash ) or return;
561b39c5158Smillert
562b39c5158Smillert    ### modules required to download with lwp ###
563b39c5158Smillert    my $use_list = {
564b39c5158Smillert        LWP                 => '0.0',
565b39c5158Smillert        'LWP::UserAgent'    => '0.0',
566b39c5158Smillert        'HTTP::Request'     => '0.0',
567b39c5158Smillert        'HTTP::Status'      => '0.0',
568b39c5158Smillert        URI                 => '0.0',
569b39c5158Smillert
570b39c5158Smillert    };
571b39c5158Smillert
5729f11ffb7Safresh1    if ($self->scheme eq 'https') {
5739f11ffb7Safresh1        $use_list->{'LWP::Protocol::https'} = '0';
5749f11ffb7Safresh1    }
5759f11ffb7Safresh1
576*eac174f2Safresh1    ### Fix CVE-2016-1238 ###
577*eac174f2Safresh1    local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
57891f110e0Safresh1    unless( can_load( modules => $use_list ) ) {
57991f110e0Safresh1        $METHOD_FAIL->{'lwp'} = 1;
58091f110e0Safresh1        return;
58191f110e0Safresh1    }
582b39c5158Smillert
583b39c5158Smillert    ### setup the uri object
584b39c5158Smillert    my $uri = URI->new( File::Spec::Unix->catfile(
585b39c5158Smillert                                $self->path, $self->file
586b39c5158Smillert                    ) );
587b39c5158Smillert
588b39c5158Smillert    ### special rules apply for file:// uris ###
589b39c5158Smillert    $uri->scheme( $self->scheme );
590b39c5158Smillert    $uri->host( $self->scheme eq 'file' ? '' : $self->host );
5919f11ffb7Safresh1
5929f11ffb7Safresh1    if ($self->userinfo) {
5939f11ffb7Safresh1        $uri->userinfo($self->userinfo);
5949f11ffb7Safresh1    } elsif ($self->scheme ne 'file') {
5959f11ffb7Safresh1        $uri->userinfo("anonymous:$FROM_EMAIL");
5969f11ffb7Safresh1    }
597b39c5158Smillert
598b39c5158Smillert    ### set up the useragent object
599b39c5158Smillert    my $ua = LWP::UserAgent->new();
600b39c5158Smillert    $ua->timeout( $TIMEOUT ) if $TIMEOUT;
601b39c5158Smillert    $ua->agent( $USER_AGENT );
602b39c5158Smillert    $ua->from( $FROM_EMAIL );
603b39c5158Smillert    $ua->env_proxy;
604b39c5158Smillert
605b39c5158Smillert    my $res = $ua->mirror($uri, $to) or return;
606b39c5158Smillert
607b39c5158Smillert    ### uptodate or fetched ok ###
608b39c5158Smillert    if ( $res->code == 304 or $res->code == 200 ) {
609b39c5158Smillert        return $to;
610b39c5158Smillert
611b39c5158Smillert    } else {
612b39c5158Smillert        return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
613b39c5158Smillert                    $res->code, HTTP::Status::status_message($res->code),
614b39c5158Smillert                    $res->status_line));
615b39c5158Smillert    }
616b39c5158Smillert
617b39c5158Smillert}
618b39c5158Smillert
619898184e3Ssthen### HTTP::Tiny fetching ###
620898184e3Ssthensub _httptiny_fetch {
621898184e3Ssthen    my $self = shift;
622898184e3Ssthen    my %hash = @_;
623898184e3Ssthen
624898184e3Ssthen    my ($to);
625898184e3Ssthen    my $tmpl = {
626898184e3Ssthen        to  => { required => 1, store => \$to }
627898184e3Ssthen    };
628898184e3Ssthen    check( $tmpl, \%hash ) or return;
629898184e3Ssthen
630898184e3Ssthen    my $use_list = {
631898184e3Ssthen        'HTTP::Tiny'    => '0.008',
632898184e3Ssthen
633898184e3Ssthen    };
634898184e3Ssthen
635*eac174f2Safresh1    ### Fix CVE-2016-1238 ###
636*eac174f2Safresh1    local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
63791f110e0Safresh1    unless( can_load(modules => $use_list) ) {
63891f110e0Safresh1        $METHOD_FAIL->{'httptiny'} = 1;
63991f110e0Safresh1        return;
64091f110e0Safresh1    }
641898184e3Ssthen
642898184e3Ssthen    my $uri = $self->uri;
643898184e3Ssthen
644898184e3Ssthen    my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) );
645898184e3Ssthen
646898184e3Ssthen    my $rc = $http->mirror( $uri, $to );
647898184e3Ssthen
648898184e3Ssthen    unless ( $rc->{success} ) {
649898184e3Ssthen
650898184e3Ssthen        return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]",
651898184e3Ssthen                    $rc->{status}, $rc->{reason} ) );
652898184e3Ssthen
653898184e3Ssthen    }
654898184e3Ssthen
655898184e3Ssthen    return $to;
656898184e3Ssthen
657898184e3Ssthen}
658898184e3Ssthen
659898184e3Ssthen### HTTP::Lite fetching ###
660898184e3Ssthensub _httplite_fetch {
661898184e3Ssthen    my $self = shift;
662898184e3Ssthen    my %hash = @_;
663898184e3Ssthen
664898184e3Ssthen    my ($to);
665898184e3Ssthen    my $tmpl = {
666898184e3Ssthen        to  => { required => 1, store => \$to }
667898184e3Ssthen    };
668898184e3Ssthen    check( $tmpl, \%hash ) or return;
669898184e3Ssthen
670898184e3Ssthen    ### modules required to download with lwp ###
671898184e3Ssthen    my $use_list = {
672898184e3Ssthen        'HTTP::Lite'    => '2.2',
6739f11ffb7Safresh1        'MIME::Base64'  => '0',
674898184e3Ssthen    };
675898184e3Ssthen
676*eac174f2Safresh1    ### Fix CVE-2016-1238 ###
677*eac174f2Safresh1    local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
67891f110e0Safresh1    unless( can_load(modules => $use_list) ) {
67991f110e0Safresh1        $METHOD_FAIL->{'httplite'} = 1;
68091f110e0Safresh1        return;
68191f110e0Safresh1    }
682898184e3Ssthen
683898184e3Ssthen    my $uri = $self->uri;
684898184e3Ssthen    my $retries = 0;
685898184e3Ssthen
686898184e3Ssthen    RETRIES: while ( $retries++ < 5 ) {
687898184e3Ssthen
688898184e3Ssthen      my $http = HTTP::Lite->new();
689898184e3Ssthen      # Naughty naughty but there isn't any accessor/setter
690898184e3Ssthen      $http->{timeout} = $TIMEOUT if $TIMEOUT;
691898184e3Ssthen      $http->http11_mode(1);
692898184e3Ssthen
6939f11ffb7Safresh1      if ($self->userinfo) {
6949f11ffb7Safresh1          my $encoded = MIME::Base64::encode($self->userinfo, '');
6959f11ffb7Safresh1          $http->add_req_header("Authorization", "Basic $encoded");
6969f11ffb7Safresh1      }
6979f11ffb7Safresh1
698898184e3Ssthen      my $fh = FileHandle->new;
699898184e3Ssthen
700898184e3Ssthen      unless ( $fh->open($to,'>') ) {
701898184e3Ssthen        return $self->_error(loc(
702898184e3Ssthen             "Could not open '%1' for writing: %2",$to,$!));
703898184e3Ssthen      }
704898184e3Ssthen
705898184e3Ssthen      $fh->autoflush(1);
706898184e3Ssthen
707898184e3Ssthen      binmode $fh;
708898184e3Ssthen
709898184e3Ssthen      my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh );
710898184e3Ssthen
711898184e3Ssthen      close $fh;
712898184e3Ssthen
713898184e3Ssthen      if ( $rc == 301 || $rc == 302 ) {
714898184e3Ssthen          my $loc;
715898184e3Ssthen          HEADERS: for ($http->headers_array) {
716898184e3Ssthen            /Location: (\S+)/ and $loc = $1, last HEADERS;
717898184e3Ssthen          }
718898184e3Ssthen          #$loc or last; # Think we should squeal here.
719898184e3Ssthen          if ($loc =~ m!^/!) {
720898184e3Ssthen            $uri =~ s{^(\w+?://[^/]+)/.*$}{$1};
721898184e3Ssthen            $uri .= $loc;
722898184e3Ssthen          }
723898184e3Ssthen          else {
724898184e3Ssthen            $uri = $loc;
725898184e3Ssthen          }
726898184e3Ssthen          next RETRIES;
727898184e3Ssthen      }
728898184e3Ssthen      elsif ( $rc == 200 ) {
729898184e3Ssthen          return $to;
730898184e3Ssthen      }
731898184e3Ssthen      else {
732898184e3Ssthen        return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]",
733898184e3Ssthen                    $rc, $http->status_message));
734898184e3Ssthen      }
735898184e3Ssthen
736898184e3Ssthen    } # Loop for 5 retries.
737898184e3Ssthen
738898184e3Ssthen    return $self->_error("Fetch failed! Gave up after 5 tries");
739898184e3Ssthen
740898184e3Ssthen}
741898184e3Ssthen
742b39c5158Smillert### Simple IO::Socket::INET fetching ###
743b39c5158Smillertsub _iosock_fetch {
744b39c5158Smillert    my $self = shift;
745b39c5158Smillert    my %hash = @_;
746b39c5158Smillert
747b39c5158Smillert    my ($to);
748b39c5158Smillert    my $tmpl = {
749b39c5158Smillert        to  => { required => 1, store => \$to }
750b39c5158Smillert    };
751b39c5158Smillert    check( $tmpl, \%hash ) or return;
752b39c5158Smillert
753b39c5158Smillert    my $use_list = {
754b39c5158Smillert        'IO::Socket::INET' => '0.0',
755b39c5158Smillert        'IO::Select'       => '0.0',
756b39c5158Smillert    };
757b39c5158Smillert
758*eac174f2Safresh1    ### Fix CVE-2016-1238 ###
759*eac174f2Safresh1    local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
76091f110e0Safresh1    unless( can_load(modules => $use_list) ) {
76191f110e0Safresh1        $METHOD_FAIL->{'iosock'} = 1;
76291f110e0Safresh1        return;
76391f110e0Safresh1    }
76491f110e0Safresh1
765b39c5158Smillert    my $sock = IO::Socket::INET->new(
766b39c5158Smillert        PeerHost => $self->host,
767b39c5158Smillert        ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ),
768b39c5158Smillert    );
769b39c5158Smillert
770b39c5158Smillert    unless ( $sock ) {
771b39c5158Smillert        return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!));
772b39c5158Smillert    }
773b39c5158Smillert
774b39c5158Smillert    my $fh = FileHandle->new;
775b39c5158Smillert
776b39c5158Smillert    # Check open()
777b39c5158Smillert
778b39c5158Smillert    unless ( $fh->open($to,'>') ) {
779b39c5158Smillert        return $self->_error(loc(
780b39c5158Smillert             "Could not open '%1' for writing: %2",$to,$!));
781b39c5158Smillert    }
782b39c5158Smillert
783898184e3Ssthen    $fh->autoflush(1);
784898184e3Ssthen    binmode $fh;
785898184e3Ssthen
786b39c5158Smillert    my $path = File::Spec::Unix->catfile( $self->path, $self->file );
787b39c5158Smillert    my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a";
788b39c5158Smillert    $sock->send( $req );
789b39c5158Smillert
790b39c5158Smillert    my $select = IO::Select->new( $sock );
791b39c5158Smillert
792b39c5158Smillert    my $resp = '';
793b39c5158Smillert    my $normal = 0;
794b39c5158Smillert    while ( $select->can_read( $TIMEOUT || 60 ) ) {
795b39c5158Smillert      my $ret = $sock->sysread( $resp, 4096, length($resp) );
796b39c5158Smillert      if ( !defined $ret or $ret == 0 ) {
797b39c5158Smillert        $select->remove( $sock );
798b39c5158Smillert        $normal++;
799b39c5158Smillert      }
800b39c5158Smillert    }
801b39c5158Smillert    close $sock;
802b39c5158Smillert
803b39c5158Smillert    unless ( $normal ) {
804b39c5158Smillert        return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 )));
805b39c5158Smillert    }
806b39c5158Smillert
807b39c5158Smillert    # Check the "response"
808898184e3Ssthen    # Strip preceding blank lines apparently they are allowed (RFC 2616 4.1)
809b39c5158Smillert    $resp =~ s/^(\x0d?\x0a)+//;
810b39c5158Smillert    # Check it is an HTTP response
811b39c5158Smillert    unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) {
812b39c5158Smillert        return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host));
813b39c5158Smillert    }
814b39c5158Smillert
815b39c5158Smillert    # Check for OK
816b39c5158Smillert    my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i;
817b39c5158Smillert    unless ( $code eq '200' ) {
818b39c5158Smillert        return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host));
819b39c5158Smillert    }
820b39c5158Smillert
821898184e3Ssthen    {
822898184e3Ssthen      local $\;
823b39c5158Smillert      print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0];
824898184e3Ssthen    }
825b39c5158Smillert    close $fh;
826b39c5158Smillert    return $to;
827b39c5158Smillert}
828b39c5158Smillert
829b39c5158Smillert### Net::FTP fetching
830b39c5158Smillertsub _netftp_fetch {
831b39c5158Smillert    my $self = shift;
832b39c5158Smillert    my %hash = @_;
833b39c5158Smillert
834b39c5158Smillert    my ($to);
835b39c5158Smillert    my $tmpl = {
836b39c5158Smillert        to  => { required => 1, store => \$to }
837b39c5158Smillert    };
838b39c5158Smillert    check( $tmpl, \%hash ) or return;
839b39c5158Smillert
840b39c5158Smillert    ### required modules ###
841b39c5158Smillert    my $use_list = { 'Net::FTP' => 0 };
842b39c5158Smillert
843*eac174f2Safresh1    ### Fix CVE-2016-1238 ###
844*eac174f2Safresh1    local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
84591f110e0Safresh1    unless( can_load( modules => $use_list ) ) {
84691f110e0Safresh1        $METHOD_FAIL->{'netftp'} = 1;
84791f110e0Safresh1        return;
84891f110e0Safresh1    }
849b39c5158Smillert
850b39c5158Smillert    ### make connection ###
851b39c5158Smillert    my $ftp;
852b39c5158Smillert    my @options = ($self->host);
853b39c5158Smillert    push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
854b39c5158Smillert    unless( $ftp = Net::FTP->new( @options ) ) {
855b39c5158Smillert        return $self->_error(loc("Ftp creation failed: %1",$@));
856b39c5158Smillert    }
857b39c5158Smillert
858b39c5158Smillert    ### login ###
859b39c5158Smillert    unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
860b39c5158Smillert        return $self->_error(loc("Could not login to '%1'",$self->host));
861b39c5158Smillert    }
862b39c5158Smillert
863b39c5158Smillert    ### set binary mode, just in case ###
864b39c5158Smillert    $ftp->binary;
865b39c5158Smillert
866b39c5158Smillert    ### create the remote path
867b39c5158Smillert    ### remember remote paths are unix paths! [#11483]
868b39c5158Smillert    my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
869b39c5158Smillert
870b39c5158Smillert    ### fetch the file ###
871b39c5158Smillert    my $target;
872b39c5158Smillert    unless( $target = $ftp->get( $remote, $to ) ) {
873b39c5158Smillert        return $self->_error(loc("Could not fetch '%1' from '%2'",
874b39c5158Smillert                    $remote, $self->host));
875b39c5158Smillert    }
876b39c5158Smillert
877b39c5158Smillert    ### log out ###
878b39c5158Smillert    $ftp->quit;
879b39c5158Smillert
880b39c5158Smillert    return $target;
881b39c5158Smillert
882b39c5158Smillert}
883b39c5158Smillert
884b39c5158Smillert### /bin/wget fetch ###
885b39c5158Smillertsub _wget_fetch {
886b39c5158Smillert    my $self = shift;
887b39c5158Smillert    my %hash = @_;
888b39c5158Smillert
889b39c5158Smillert    my ($to);
890b39c5158Smillert    my $tmpl = {
891b39c5158Smillert        to  => { required => 1, store => \$to }
892b39c5158Smillert    };
893b39c5158Smillert    check( $tmpl, \%hash ) or return;
894b39c5158Smillert
89591f110e0Safresh1    my $wget;
896b39c5158Smillert    ### see if we have a wget binary ###
89791f110e0Safresh1    unless( $wget = can_run('wget') ) {
89891f110e0Safresh1        $METHOD_FAIL->{'wget'} = 1;
89991f110e0Safresh1        return;
90091f110e0Safresh1    }
901b39c5158Smillert
902b39c5158Smillert    ### no verboseness, thanks ###
903b39c5158Smillert    my $cmd = [ $wget, '--quiet' ];
904b39c5158Smillert
905b39c5158Smillert    ### if a timeout is set, add it ###
906b39c5158Smillert    push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
907b39c5158Smillert
908b39c5158Smillert    ### run passive if specified ###
909*eac174f2Safresh1    push @$cmd, '--passive-ftp' if $self->scheme eq 'ftp' && $FTP_PASSIVE;
910b39c5158Smillert
911b39c5158Smillert    ### set the output document, add the uri ###
912b39c5158Smillert    push @$cmd, '--output-document', $to, $self->uri;
913b39c5158Smillert
914b39c5158Smillert    ### with IPC::Cmd > 0.41, this is fixed in teh library,
915b39c5158Smillert    ### and there's no need for special casing any more.
916b39c5158Smillert    ### DO NOT quote things for IPC::Run, it breaks stuff.
917b39c5158Smillert    # $IPC::Cmd::USE_IPC_RUN
918b39c5158Smillert    #    ? ($to, $self->uri)
919b39c5158Smillert    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
920b39c5158Smillert
921b39c5158Smillert    ### shell out ###
922b39c5158Smillert    my $captured;
923b39c5158Smillert    unless(run( command => $cmd,
924b39c5158Smillert                buffer  => \$captured,
925b39c5158Smillert                verbose => $DEBUG
926b39c5158Smillert    )) {
927b39c5158Smillert        ### wget creates the output document always, even if the fetch
928b39c5158Smillert        ### fails.. so unlink it in that case
929b39c5158Smillert        1 while unlink $to;
930b39c5158Smillert
931b39c5158Smillert        return $self->_error(loc( "Command failed: %1", $captured || '' ));
932b39c5158Smillert    }
933b39c5158Smillert
934b39c5158Smillert    return $to;
935b39c5158Smillert}
936b39c5158Smillert
937b39c5158Smillert### /bin/lftp fetch ###
938b39c5158Smillertsub _lftp_fetch {
939b39c5158Smillert    my $self = shift;
940b39c5158Smillert    my %hash = @_;
941b39c5158Smillert
942b39c5158Smillert    my ($to);
943b39c5158Smillert    my $tmpl = {
944b39c5158Smillert        to  => { required => 1, store => \$to }
945b39c5158Smillert    };
946b39c5158Smillert    check( $tmpl, \%hash ) or return;
947b39c5158Smillert
94891f110e0Safresh1    ### see if we have a lftp binary ###
94991f110e0Safresh1    my $lftp;
95091f110e0Safresh1    unless( $lftp = can_run('lftp') ) {
95191f110e0Safresh1        $METHOD_FAIL->{'lftp'} = 1;
95291f110e0Safresh1        return;
95391f110e0Safresh1    }
954b39c5158Smillert
955b39c5158Smillert    ### no verboseness, thanks ###
956b39c5158Smillert    my $cmd = [ $lftp, '-f' ];
957b39c5158Smillert
958b39c5158Smillert    my $fh = File::Temp->new;
959b39c5158Smillert
960b39c5158Smillert    my $str;
961b39c5158Smillert
962b39c5158Smillert    ### if a timeout is set, add it ###
963b39c5158Smillert    $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
964b39c5158Smillert
965b39c5158Smillert    ### run passive if specified ###
966b39c5158Smillert    $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
967b39c5158Smillert
968b39c5158Smillert    ### set the output document, add the uri ###
969b39c5158Smillert    ### quote the URI, because lftp supports certain shell
970b39c5158Smillert    ### expansions, most notably & for backgrounding.
971b39c5158Smillert    ### ' quote does nto work, must be "
972b39c5158Smillert    $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
973b39c5158Smillert
974b39c5158Smillert    if( $DEBUG ) {
975b39c5158Smillert        my $pp_str = join ' ', split $/, $str;
976b39c5158Smillert        print "# lftp command: $pp_str\n";
977b39c5158Smillert    }
978b39c5158Smillert
979b39c5158Smillert    ### write straight to the file.
980b39c5158Smillert    $fh->autoflush(1);
981b39c5158Smillert    print $fh $str;
982b39c5158Smillert
983b39c5158Smillert    ### the command needs to be 1 string to be executed
984b39c5158Smillert    push @$cmd, $fh->filename;
985b39c5158Smillert
986b39c5158Smillert    ### with IPC::Cmd > 0.41, this is fixed in teh library,
987b39c5158Smillert    ### and there's no need for special casing any more.
988b39c5158Smillert    ### DO NOT quote things for IPC::Run, it breaks stuff.
989b39c5158Smillert    # $IPC::Cmd::USE_IPC_RUN
990b39c5158Smillert    #    ? ($to, $self->uri)
991b39c5158Smillert    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
992b39c5158Smillert
993b39c5158Smillert
994b39c5158Smillert    ### shell out ###
995b39c5158Smillert    my $captured;
996b39c5158Smillert    unless(run( command => $cmd,
997b39c5158Smillert                buffer  => \$captured,
998b39c5158Smillert                verbose => $DEBUG
999b39c5158Smillert    )) {
1000b39c5158Smillert        ### wget creates the output document always, even if the fetch
1001b39c5158Smillert        ### fails.. so unlink it in that case
1002b39c5158Smillert        1 while unlink $to;
1003b39c5158Smillert
1004b39c5158Smillert        return $self->_error(loc( "Command failed: %1", $captured || '' ));
1005b39c5158Smillert    }
1006b39c5158Smillert
1007b39c5158Smillert    return $to;
1008b39c5158Smillert}
1009b39c5158Smillert
1010b39c5158Smillert
1011b39c5158Smillert
1012b39c5158Smillert### /bin/ftp fetch ###
1013b39c5158Smillertsub _ftp_fetch {
1014b39c5158Smillert    my $self = shift;
1015b39c5158Smillert    my %hash = @_;
1016b39c5158Smillert
1017b39c5158Smillert    my ($to);
1018b39c5158Smillert    my $tmpl = {
1019b39c5158Smillert        to  => { required => 1, store => \$to }
1020b39c5158Smillert    };
1021b39c5158Smillert    check( $tmpl, \%hash ) or return;
1022b39c5158Smillert
1023b39c5158Smillert    ### see if we have a ftp binary ###
102491f110e0Safresh1    my $ftp;
102591f110e0Safresh1    unless( $ftp = can_run('ftp') ) {
102691f110e0Safresh1        $METHOD_FAIL->{'ftp'} = 1;
102791f110e0Safresh1        return;
102891f110e0Safresh1    }
1029b39c5158Smillert
1030b39c5158Smillert    my $fh = FileHandle->new;
1031b39c5158Smillert
1032b39c5158Smillert    local $SIG{CHLD} = 'IGNORE';
1033b39c5158Smillert
103491f110e0Safresh1    unless ($fh->open("$ftp -n", '|-')) {
1035b39c5158Smillert        return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
1036b39c5158Smillert    }
1037b39c5158Smillert
1038b39c5158Smillert    my @dialog = (
1039b39c5158Smillert        "lcd " . dirname($to),
1040b39c5158Smillert        "open " . $self->host,
1041b39c5158Smillert        "user anonymous $FROM_EMAIL",
1042b39c5158Smillert        "cd /",
1043b39c5158Smillert        "cd " . $self->path,
1044b39c5158Smillert        "binary",
1045b39c5158Smillert        "get " . $self->file . " " . $self->output_file,
1046b39c5158Smillert        "quit",
1047b39c5158Smillert    );
1048b39c5158Smillert
1049b39c5158Smillert    foreach (@dialog) { $fh->print($_, "\n") }
1050b39c5158Smillert    $fh->close or return;
1051b39c5158Smillert
1052b39c5158Smillert    return $to;
1053b39c5158Smillert}
1054b39c5158Smillert
1055b39c5158Smillert### lynx is stupid - it decompresses any .gz file it finds to be text
1056b39c5158Smillert### use /bin/lynx to fetch files
1057b39c5158Smillertsub _lynx_fetch {
1058b39c5158Smillert    my $self = shift;
1059b39c5158Smillert    my %hash = @_;
1060b39c5158Smillert
1061b39c5158Smillert    my ($to);
1062b39c5158Smillert    my $tmpl = {
1063b39c5158Smillert        to  => { required => 1, store => \$to }
1064b39c5158Smillert    };
1065b39c5158Smillert    check( $tmpl, \%hash ) or return;
1066b39c5158Smillert
1067b39c5158Smillert    ### see if we have a lynx binary ###
106891f110e0Safresh1    my $lynx;
106991f110e0Safresh1    unless ( $lynx = can_run('lynx') ){
107091f110e0Safresh1        $METHOD_FAIL->{'lynx'} = 1;
107191f110e0Safresh1        return;
107291f110e0Safresh1    }
1073b39c5158Smillert
1074b39c5158Smillert    unless( IPC::Cmd->can_capture_buffer ) {
1075b39c5158Smillert        $METHOD_FAIL->{'lynx'} = 1;
1076b39c5158Smillert
1077b39c5158Smillert        return $self->_error(loc(
1078b39c5158Smillert            "Can not capture buffers. Can not use '%1' to fetch files",
1079b39c5158Smillert            'lynx' ));
1080b39c5158Smillert    }
1081b39c5158Smillert
1082b39c5158Smillert    ### check if the HTTP resource exists ###
1083b39c5158Smillert    if ($self->uri =~ /^https?:\/\//i) {
1084b39c5158Smillert        my $cmd = [
1085b39c5158Smillert            $lynx,
1086b39c5158Smillert            '-head',
1087b39c5158Smillert            '-source',
1088b39c5158Smillert            "-auth=anonymous:$FROM_EMAIL",
1089b39c5158Smillert        ];
1090b39c5158Smillert
1091b39c5158Smillert        push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1092b39c5158Smillert
1093b39c5158Smillert        push @$cmd, $self->uri;
1094b39c5158Smillert
1095b39c5158Smillert        ### shell out ###
1096b39c5158Smillert        my $head;
1097b39c5158Smillert        unless(run( command => $cmd,
1098b39c5158Smillert                    buffer  => \$head,
1099b39c5158Smillert                    verbose => $DEBUG )
1100b39c5158Smillert        ) {
1101b39c5158Smillert            return $self->_error(loc("Command failed: %1", $head || ''));
1102b39c5158Smillert        }
1103b39c5158Smillert
1104b39c5158Smillert        unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
1105b39c5158Smillert            return $self->_error(loc("Command failed: %1", $head || ''));
1106b39c5158Smillert        }
1107b39c5158Smillert    }
1108b39c5158Smillert
1109b39c5158Smillert    ### write to the output file ourselves, since lynx ass_u_mes to much
111091f110e0Safresh1    my $local = FileHandle->new( $to, 'w' )
1111b39c5158Smillert                    or return $self->_error(loc(
1112b39c5158Smillert                        "Could not open '%1' for writing: %2",$to,$!));
1113b39c5158Smillert
1114b39c5158Smillert    ### dump to stdout ###
1115b39c5158Smillert    my $cmd = [
1116b39c5158Smillert        $lynx,
1117b39c5158Smillert        '-source',
1118b39c5158Smillert        "-auth=anonymous:$FROM_EMAIL",
1119b39c5158Smillert    ];
1120b39c5158Smillert
1121b39c5158Smillert    push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1122b39c5158Smillert
1123b39c5158Smillert    ### DO NOT quote things for IPC::Run, it breaks stuff.
1124b39c5158Smillert    push @$cmd, $self->uri;
1125b39c5158Smillert
1126b39c5158Smillert    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1127b39c5158Smillert    ### and there's no need for special casing any more.
1128b39c5158Smillert    ### DO NOT quote things for IPC::Run, it breaks stuff.
1129b39c5158Smillert    # $IPC::Cmd::USE_IPC_RUN
1130b39c5158Smillert    #    ? $self->uri
1131b39c5158Smillert    #    : QUOTE. $self->uri .QUOTE;
1132b39c5158Smillert
1133b39c5158Smillert
1134b39c5158Smillert    ### shell out ###
1135b39c5158Smillert    my $captured;
1136b39c5158Smillert    unless(run( command => $cmd,
1137b39c5158Smillert                buffer  => \$captured,
1138b39c5158Smillert                verbose => $DEBUG )
1139b39c5158Smillert    ) {
1140b39c5158Smillert        return $self->_error(loc("Command failed: %1", $captured || ''));
1141b39c5158Smillert    }
1142b39c5158Smillert
1143b39c5158Smillert    ### print to local file ###
1144b39c5158Smillert    ### XXX on a 404 with a special error page, $captured will actually
1145b39c5158Smillert    ### hold the contents of that page, and make it *appear* like the
1146b39c5158Smillert    ### request was a success, when really it wasn't :(
1147b39c5158Smillert    ### there doesn't seem to be an option for lynx to change the exit
1148b39c5158Smillert    ### code based on a 4XX status or so.
1149b39c5158Smillert    ### the closest we can come is using --error_file and parsing that,
1150b39c5158Smillert    ### which is very unreliable ;(
1151b39c5158Smillert    $local->print( $captured );
1152b39c5158Smillert    $local->close or return;
1153b39c5158Smillert
1154b39c5158Smillert    return $to;
1155b39c5158Smillert}
1156b39c5158Smillert
1157b39c5158Smillert### use /bin/ncftp to fetch files
1158b39c5158Smillertsub _ncftp_fetch {
1159b39c5158Smillert    my $self = shift;
1160b39c5158Smillert    my %hash = @_;
1161b39c5158Smillert
1162b39c5158Smillert    my ($to);
1163b39c5158Smillert    my $tmpl = {
1164b39c5158Smillert        to  => { required => 1, store => \$to }
1165b39c5158Smillert    };
1166b39c5158Smillert    check( $tmpl, \%hash ) or return;
1167b39c5158Smillert
1168898184e3Ssthen    ### we can only set passive mode in interactive sessions, so bail out
1169b39c5158Smillert    ### if $FTP_PASSIVE is set
1170b39c5158Smillert    return if $FTP_PASSIVE;
1171b39c5158Smillert
1172b39c5158Smillert    ### see if we have a ncftp binary ###
117391f110e0Safresh1    my $ncftp;
117491f110e0Safresh1    unless( $ncftp = can_run('ncftp') ) {
117591f110e0Safresh1        $METHOD_FAIL->{'ncftp'} = 1;
117691f110e0Safresh1        return;
117791f110e0Safresh1    }
1178b39c5158Smillert
1179b39c5158Smillert    my $cmd = [
1180b39c5158Smillert        $ncftp,
1181b39c5158Smillert        '-V',                   # do not be verbose
1182b39c5158Smillert        '-p', $FROM_EMAIL,      # email as password
1183b39c5158Smillert        $self->host,            # hostname
1184b39c5158Smillert        dirname($to),           # local dir for the file
1185b39c5158Smillert                                # remote path to the file
1186b39c5158Smillert        ### DO NOT quote things for IPC::Run, it breaks stuff.
1187b39c5158Smillert        $IPC::Cmd::USE_IPC_RUN
1188b39c5158Smillert                    ? File::Spec::Unix->catdir( $self->path, $self->file )
1189b39c5158Smillert                    : QUOTE. File::Spec::Unix->catdir(
1190b39c5158Smillert                                    $self->path, $self->file ) .QUOTE
1191b39c5158Smillert
1192b39c5158Smillert    ];
1193b39c5158Smillert
1194b39c5158Smillert    ### shell out ###
1195b39c5158Smillert    my $captured;
1196b39c5158Smillert    unless(run( command => $cmd,
1197b39c5158Smillert                buffer  => \$captured,
1198b39c5158Smillert                verbose => $DEBUG )
1199b39c5158Smillert    ) {
1200b39c5158Smillert        return $self->_error(loc("Command failed: %1", $captured || ''));
1201b39c5158Smillert    }
1202b39c5158Smillert
1203b39c5158Smillert    return $to;
1204b39c5158Smillert
1205b39c5158Smillert}
1206b39c5158Smillert
1207b39c5158Smillert### use /bin/curl to fetch files
1208b39c5158Smillertsub _curl_fetch {
1209b39c5158Smillert    my $self = shift;
1210b39c5158Smillert    my %hash = @_;
1211b39c5158Smillert
1212b39c5158Smillert    my ($to);
1213b39c5158Smillert    my $tmpl = {
1214b39c5158Smillert        to  => { required => 1, store => \$to }
1215b39c5158Smillert    };
1216b39c5158Smillert    check( $tmpl, \%hash ) or return;
121791f110e0Safresh1    my $curl;
121891f110e0Safresh1    unless ( $curl = can_run('curl') ) {
121991f110e0Safresh1        $METHOD_FAIL->{'curl'} = 1;
122091f110e0Safresh1        return;
122191f110e0Safresh1    }
1222b39c5158Smillert
1223b39c5158Smillert    ### these long opts are self explanatory - I like that -jmb
1224b39c5158Smillert    my $cmd = [ $curl, '-q' ];
1225b39c5158Smillert
12266fb12b70Safresh1    push(@$cmd, '-4') if $^O eq 'netbsd' && $FORCEIPV4; # only seen this on NetBSD so far
12276fb12b70Safresh1
1228b39c5158Smillert    push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
1229b39c5158Smillert
1230b39c5158Smillert    push(@$cmd, '--silent') unless $DEBUG;
1231b39c5158Smillert
1232b39c5158Smillert    ### curl does the right thing with passive, regardless ###
1233b39c5158Smillert    if ($self->scheme eq 'ftp') {
1234b39c5158Smillert        push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
1235b39c5158Smillert    }
1236b39c5158Smillert
1237b39c5158Smillert    ### curl doesn't follow 302 (temporarily moved) etc automatically
1238b39c5158Smillert    ### so we add --location to enable that.
1239b39c5158Smillert    push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
1240b39c5158Smillert
1241b39c5158Smillert    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1242b39c5158Smillert    ### and there's no need for special casing any more.
1243b39c5158Smillert    ### DO NOT quote things for IPC::Run, it breaks stuff.
1244b39c5158Smillert    # $IPC::Cmd::USE_IPC_RUN
1245b39c5158Smillert    #    ? ($to, $self->uri)
1246b39c5158Smillert    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1247b39c5158Smillert
1248b39c5158Smillert
1249b39c5158Smillert    my $captured;
1250b39c5158Smillert    unless(run( command => $cmd,
1251b39c5158Smillert                buffer  => \$captured,
1252b39c5158Smillert                verbose => $DEBUG )
1253b39c5158Smillert    ) {
1254b39c5158Smillert
1255b39c5158Smillert        return $self->_error(loc("Command failed: %1", $captured || ''));
1256b39c5158Smillert    }
1257b39c5158Smillert
1258b39c5158Smillert    return $to;
1259b39c5158Smillert
1260b39c5158Smillert}
1261b39c5158Smillert
1262898184e3Ssthen### /usr/bin/fetch fetch! ###
1263898184e3Ssthensub _fetch_fetch {
1264898184e3Ssthen    my $self = shift;
1265898184e3Ssthen    my %hash = @_;
1266898184e3Ssthen
1267898184e3Ssthen    my ($to);
1268898184e3Ssthen    my $tmpl = {
1269898184e3Ssthen        to  => { required => 1, store => \$to }
1270898184e3Ssthen    };
1271898184e3Ssthen    check( $tmpl, \%hash ) or return;
1272898184e3Ssthen
127391f110e0Safresh1    ### see if we have a fetch binary ###
127491f110e0Safresh1    my $fetch;
127591f110e0Safresh1    unless( HAS_FETCH and $fetch = can_run('fetch') ) {
127691f110e0Safresh1        $METHOD_FAIL->{'fetch'} = 1;
127791f110e0Safresh1        return;
127891f110e0Safresh1    }
1279898184e3Ssthen
1280898184e3Ssthen    ### no verboseness, thanks ###
1281898184e3Ssthen    my $cmd = [ $fetch, '-q' ];
1282898184e3Ssthen
1283898184e3Ssthen    ### if a timeout is set, add it ###
1284898184e3Ssthen    push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT;
1285898184e3Ssthen
1286898184e3Ssthen    ### run passive if specified ###
1287898184e3Ssthen    #push @$cmd, '-p' if $FTP_PASSIVE;
1288898184e3Ssthen    local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE;
1289898184e3Ssthen
1290898184e3Ssthen    ### set the output document, add the uri ###
1291898184e3Ssthen    push @$cmd, '-o', $to, $self->uri;
1292898184e3Ssthen
1293898184e3Ssthen    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1294898184e3Ssthen    ### and there's no need for special casing any more.
1295898184e3Ssthen    ### DO NOT quote things for IPC::Run, it breaks stuff.
1296898184e3Ssthen    # $IPC::Cmd::USE_IPC_RUN
1297898184e3Ssthen    #    ? ($to, $self->uri)
1298898184e3Ssthen    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1299898184e3Ssthen
1300898184e3Ssthen    ### shell out ###
1301898184e3Ssthen    my $captured;
1302898184e3Ssthen    unless(run( command => $cmd,
1303898184e3Ssthen                buffer  => \$captured,
1304898184e3Ssthen                verbose => $DEBUG
1305898184e3Ssthen    )) {
1306898184e3Ssthen        ### wget creates the output document always, even if the fetch
1307898184e3Ssthen        ### fails.. so unlink it in that case
1308898184e3Ssthen        1 while unlink $to;
1309898184e3Ssthen
1310898184e3Ssthen        return $self->_error(loc( "Command failed: %1", $captured || '' ));
1311898184e3Ssthen    }
1312898184e3Ssthen
1313898184e3Ssthen    return $to;
1314898184e3Ssthen}
1315b39c5158Smillert
1316b39c5158Smillert### use File::Copy for fetching file:// urls ###
1317b39c5158Smillert###
1318*eac174f2Safresh1### See section 3.10 of RFC 1738 (https://datatracker.ietf.org/doc/html/rfc1738#section-3.10)
1319b39c5158Smillert### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
1320b39c5158Smillert###
1321b39c5158Smillert
1322b39c5158Smillertsub _file_fetch {
1323b39c5158Smillert    my $self = shift;
1324b39c5158Smillert    my %hash = @_;
1325b39c5158Smillert
1326b39c5158Smillert    my ($to);
1327b39c5158Smillert    my $tmpl = {
1328b39c5158Smillert        to  => { required => 1, store => \$to }
1329b39c5158Smillert    };
1330b39c5158Smillert    check( $tmpl, \%hash ) or return;
1331b39c5158Smillert
1332b39c5158Smillert
1333b39c5158Smillert
1334b39c5158Smillert    ### prefix a / on unix systems with a file uri, since it would
1335b39c5158Smillert    ### look somewhat like this:
1336b39c5158Smillert    ###     file:///home/kane/file
1337898184e3Ssthen    ### whereas windows file uris for 'c:\some\dir\file' might look like:
1338b39c5158Smillert    ###     file:///C:/some/dir/file
1339b39c5158Smillert    ###     file:///C|/some/dir/file
1340b39c5158Smillert    ### or for a network share '\\host\share\some\dir\file':
1341b39c5158Smillert    ###     file:////host/share/some/dir/file
1342b39c5158Smillert    ###
1343b39c5158Smillert    ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
1344b39c5158Smillert    ###     file://vms.host.edu/disk$user/my/notes/note12345.txt
1345b39c5158Smillert    ###
1346b39c5158Smillert
1347b39c5158Smillert    my $path    = $self->path;
1348b39c5158Smillert    my $vol     = $self->vol;
1349b39c5158Smillert    my $share   = $self->share;
1350b39c5158Smillert
1351b39c5158Smillert    my $remote;
1352b39c5158Smillert    if (!$share and $self->host) {
1353b39c5158Smillert        return $self->_error(loc(
1354b39c5158Smillert            "Currently %1 cannot handle hosts in %2 urls",
1355b39c5158Smillert            'File::Fetch', 'file://'
1356b39c5158Smillert        ));
1357b39c5158Smillert    }
1358b39c5158Smillert
1359b39c5158Smillert    if( $vol ) {
1360b39c5158Smillert        $path   = File::Spec->catdir( split /\//, $path );
1361b39c5158Smillert        $remote = File::Spec->catpath( $vol, $path, $self->file);
1362b39c5158Smillert
1363b39c5158Smillert    } elsif( $share ) {
1364b39c5158Smillert        ### win32 specific, and a share name, so we wont bother with File::Spec
1365b39c5158Smillert        $path   =~ s|/+|\\|g;
1366b39c5158Smillert        $remote = "\\\\".$self->host."\\$share\\$path";
1367b39c5158Smillert
1368b39c5158Smillert    } else {
1369b39c5158Smillert        ### File::Spec on VMS can not currently handle UNIX syntax.
1370b39c5158Smillert        my $file_class = ON_VMS
1371b39c5158Smillert            ? 'File::Spec::Unix'
1372b39c5158Smillert            : 'File::Spec';
1373b39c5158Smillert
1374b39c5158Smillert        $remote  = $file_class->catfile( $path, $self->file );
1375b39c5158Smillert    }
1376b39c5158Smillert
1377b39c5158Smillert    ### File::Copy is littered with 'die' statements :( ###
1378b39c5158Smillert    my $rv = eval { File::Copy::copy( $remote, $to ) };
1379b39c5158Smillert
1380b39c5158Smillert    ### something went wrong ###
1381b39c5158Smillert    if( !$rv or $@ ) {
1382b39c5158Smillert        return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
1383b39c5158Smillert                             $remote, $to, $!, $@));
1384b39c5158Smillert    }
1385b39c5158Smillert
1386b39c5158Smillert    return $to;
1387b39c5158Smillert}
1388b39c5158Smillert
1389b39c5158Smillert### use /usr/bin/rsync to fetch files
1390b39c5158Smillertsub _rsync_fetch {
1391b39c5158Smillert    my $self = shift;
1392b39c5158Smillert    my %hash = @_;
1393b39c5158Smillert
1394b39c5158Smillert    my ($to);
1395b39c5158Smillert    my $tmpl = {
1396b39c5158Smillert        to  => { required => 1, store => \$to }
1397b39c5158Smillert    };
1398b39c5158Smillert    check( $tmpl, \%hash ) or return;
139991f110e0Safresh1    my $rsync;
140091f110e0Safresh1    unless ( $rsync = can_run('rsync') ) {
140191f110e0Safresh1        $METHOD_FAIL->{'rsync'} = 1;
140291f110e0Safresh1        return;
140391f110e0Safresh1    }
1404b39c5158Smillert
1405b39c5158Smillert    my $cmd = [ $rsync ];
1406b39c5158Smillert
1407b39c5158Smillert    ### XXX: rsync has no I/O timeouts at all, by default
1408b39c5158Smillert    push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1409b39c5158Smillert
1410b39c5158Smillert    push(@$cmd, '--quiet') unless $DEBUG;
1411b39c5158Smillert
1412b39c5158Smillert    ### DO NOT quote things for IPC::Run, it breaks stuff.
1413b39c5158Smillert    push @$cmd, $self->uri, $to;
1414b39c5158Smillert
1415b39c5158Smillert    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1416b39c5158Smillert    ### and there's no need for special casing any more.
1417b39c5158Smillert    ### DO NOT quote things for IPC::Run, it breaks stuff.
1418b39c5158Smillert    # $IPC::Cmd::USE_IPC_RUN
1419b39c5158Smillert    #    ? ($to, $self->uri)
1420b39c5158Smillert    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1421b39c5158Smillert
1422b39c5158Smillert    my $captured;
1423b39c5158Smillert    unless(run( command => $cmd,
1424b39c5158Smillert                buffer  => \$captured,
1425b39c5158Smillert                verbose => $DEBUG )
1426b39c5158Smillert    ) {
1427b39c5158Smillert
1428b39c5158Smillert        return $self->_error(loc("Command %1 failed: %2",
1429b39c5158Smillert            "@$cmd" || '', $captured || ''));
1430b39c5158Smillert    }
1431b39c5158Smillert
1432b39c5158Smillert    return $to;
1433b39c5158Smillert
1434b39c5158Smillert}
1435b39c5158Smillert
14366fb12b70Safresh1### use git to fetch files
14376fb12b70Safresh1sub _git_fetch {
14386fb12b70Safresh1    my $self = shift;
14396fb12b70Safresh1    my %hash = @_;
14406fb12b70Safresh1
14416fb12b70Safresh1    my ($to);
14426fb12b70Safresh1    my $tmpl = {
14436fb12b70Safresh1        to  => { required => 1, store => \$to }
14446fb12b70Safresh1    };
14456fb12b70Safresh1    check( $tmpl, \%hash ) or return;
14466fb12b70Safresh1    my $git;
14476fb12b70Safresh1    unless ( $git = can_run('git') ) {
14486fb12b70Safresh1        $METHOD_FAIL->{'git'} = 1;
14496fb12b70Safresh1        return;
14506fb12b70Safresh1    }
14516fb12b70Safresh1
14526fb12b70Safresh1    my $cmd = [ $git, 'clone' ];
14536fb12b70Safresh1
14546fb12b70Safresh1    #push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
14556fb12b70Safresh1
14566fb12b70Safresh1    push(@$cmd, '--quiet') unless $DEBUG;
14576fb12b70Safresh1
14586fb12b70Safresh1    ### DO NOT quote things for IPC::Run, it breaks stuff.
14596fb12b70Safresh1    push @$cmd, $self->uri, $to;
14606fb12b70Safresh1
14616fb12b70Safresh1    ### with IPC::Cmd > 0.41, this is fixed in teh library,
14626fb12b70Safresh1    ### and there's no need for special casing any more.
14636fb12b70Safresh1    ### DO NOT quote things for IPC::Run, it breaks stuff.
14646fb12b70Safresh1    # $IPC::Cmd::USE_IPC_RUN
14656fb12b70Safresh1    #    ? ($to, $self->uri)
14666fb12b70Safresh1    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
14676fb12b70Safresh1
14686fb12b70Safresh1    my $captured;
14696fb12b70Safresh1    unless(run( command => $cmd,
14706fb12b70Safresh1                buffer  => \$captured,
14716fb12b70Safresh1                verbose => $DEBUG )
14726fb12b70Safresh1    ) {
14736fb12b70Safresh1
14746fb12b70Safresh1        return $self->_error(loc("Command %1 failed: %2",
14756fb12b70Safresh1            "@$cmd" || '', $captured || ''));
14766fb12b70Safresh1    }
14776fb12b70Safresh1
14786fb12b70Safresh1    return $to;
14796fb12b70Safresh1
14806fb12b70Safresh1}
14816fb12b70Safresh1
1482b39c5158Smillert#################################
1483b39c5158Smillert#
1484b39c5158Smillert# Error code
1485b39c5158Smillert#
1486b39c5158Smillert#################################
1487b39c5158Smillert
1488b39c5158Smillert=pod
1489b39c5158Smillert
1490b39c5158Smillert=head2 $ff->error([BOOL])
1491b39c5158Smillert
1492b39c5158SmillertReturns the last encountered error as string.
1493b39c5158SmillertPass it a true value to get the C<Carp::longmess()> output instead.
1494b39c5158Smillert
1495b39c5158Smillert=cut
1496b39c5158Smillert
1497b39c5158Smillert### error handling the way Archive::Extract does it
1498b39c5158Smillertsub _error {
1499b39c5158Smillert    my $self    = shift;
1500b39c5158Smillert    my $error   = shift;
1501b39c5158Smillert
1502b39c5158Smillert    $self->_error_msg( $error );
1503b39c5158Smillert    $self->_error_msg_long( Carp::longmess($error) );
1504b39c5158Smillert
1505b39c5158Smillert    if( $WARN ) {
1506b39c5158Smillert        carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1507b39c5158Smillert    }
1508b39c5158Smillert
1509b39c5158Smillert    return;
1510b39c5158Smillert}
1511b39c5158Smillert
1512b39c5158Smillertsub error {
1513b39c5158Smillert    my $self = shift;
1514b39c5158Smillert    return shift() ? $self->_error_msg_long : $self->_error_msg;
1515b39c5158Smillert}
1516b39c5158Smillert
1517b39c5158Smillert
1518b39c5158Smillert1;
1519b39c5158Smillert
1520b39c5158Smillert=pod
1521b39c5158Smillert
1522b39c5158Smillert=head1 HOW IT WORKS
1523b39c5158Smillert
1524b39c5158SmillertFile::Fetch is able to fetch a variety of uris, by using several
1525b39c5158Smillertexternal programs and modules.
1526b39c5158Smillert
1527b39c5158SmillertBelow is a mapping of what utilities will be used in what order
1528b39c5158Smillertfor what schemes, if available:
1529b39c5158Smillert
1530b39c5158Smillert    file    => LWP, lftp, file
15319f11ffb7Safresh1    http    => LWP, HTTP::Tiny, wget, curl, lftp, fetch, HTTP::Lite, lynx, iosock
1532898184e3Ssthen    ftp     => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp
1533b39c5158Smillert    rsync   => rsync
15346fb12b70Safresh1    git     => git
1535b39c5158Smillert
1536b39c5158SmillertIf you'd like to disable the use of one or more of these utilities
1537b39c5158Smillertand/or modules, see the C<$BLACKLIST> variable further down.
1538b39c5158Smillert
1539b39c5158SmillertIf a utility or module isn't available, it will be marked in a cache
1540b39c5158Smillert(see the C<$METHOD_FAIL> variable further down), so it will not be
1541b39c5158Smillerttried again. The C<fetch> method will only fail when all options are
1542b39c5158Smillertexhausted, and it was not able to retrieve the file.
1543b39c5158Smillert
1544898184e3SsthenThe C<fetch> utility is available on FreeBSD. NetBSD and Dragonfly BSD
1545898184e3Ssthenmay also have it from C<pkgsrc>. We only check for C<fetch> on those
1546898184e3Ssthenthree platforms.
1547898184e3Ssthen
1548b39c5158SmillertC<iosock> is a very limited L<IO::Socket::INET> based mechanism for
1549b39c5158Smillertretrieving C<http> schemed urls. It doesn't follow redirects for instance.
1550b39c5158Smillert
15516fb12b70Safresh1C<git> only supports C<git://> style urls.
15526fb12b70Safresh1
1553b39c5158SmillertA special note about fetching files from an ftp uri:
1554b39c5158Smillert
1555b39c5158SmillertBy default, all ftp connections are done in passive mode. To change
1556b39c5158Smillertthat, see the C<$FTP_PASSIVE> variable further down.
1557b39c5158Smillert
1558b39c5158SmillertFurthermore, ftp uris only support anonymous connections, so no
1559b39c5158Smillertnamed user/password pair can be passed along.
1560b39c5158Smillert
1561b39c5158SmillertC</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
1562b39c5158Smillertfurther down.
1563b39c5158Smillert
1564b39c5158Smillert=head1 GLOBAL VARIABLES
1565b39c5158Smillert
1566b39c5158SmillertThe behaviour of File::Fetch can be altered by changing the following
1567b39c5158Smillertglobal variables:
1568b39c5158Smillert
1569b39c5158Smillert=head2 $File::Fetch::FROM_EMAIL
1570b39c5158Smillert
1571b39c5158SmillertThis is the email address that will be sent as your anonymous ftp
1572b39c5158Smillertpassword.
1573b39c5158Smillert
1574b39c5158SmillertDefault is C<File-Fetch@example.com>.
1575b39c5158Smillert
1576b39c5158Smillert=head2 $File::Fetch::USER_AGENT
1577b39c5158Smillert
1578b39c5158SmillertThis is the useragent as C<LWP> will report it.
1579b39c5158Smillert
1580b39c5158SmillertDefault is C<File::Fetch/$VERSION>.
1581b39c5158Smillert
1582b39c5158Smillert=head2 $File::Fetch::FTP_PASSIVE
1583b39c5158Smillert
1584b39c5158SmillertThis variable controls whether the environment variable C<FTP_PASSIVE>
1585b39c5158Smillertand any passive switches to commandline tools will be set to true.
1586b39c5158Smillert
1587b39c5158SmillertDefault value is 1.
1588b39c5158Smillert
1589b39c5158SmillertNote: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1590b39c5158Smillertfiles, since passive mode can only be set interactively for this binary
1591b39c5158Smillert
1592b39c5158Smillert=head2 $File::Fetch::TIMEOUT
1593b39c5158Smillert
1594b39c5158SmillertWhen set, controls the network timeout (counted in seconds).
1595b39c5158Smillert
1596b39c5158SmillertDefault value is 0.
1597b39c5158Smillert
1598b39c5158Smillert=head2 $File::Fetch::WARN
1599b39c5158Smillert
1600b39c5158SmillertThis variable controls whether errors encountered internally by
1601b39c5158SmillertC<File::Fetch> should be C<carp>'d or not.
1602b39c5158Smillert
1603b39c5158SmillertSet to false to silence warnings. Inspect the output of the C<error()>
1604b39c5158Smillertmethod manually to see what went wrong.
1605b39c5158Smillert
1606b39c5158SmillertDefaults to C<true>.
1607b39c5158Smillert
1608b39c5158Smillert=head2 $File::Fetch::DEBUG
1609b39c5158Smillert
1610b39c5158SmillertThis enables debugging output when calling commandline utilities to
1611b39c5158Smillertfetch files.
1612b39c5158SmillertThis also enables C<Carp::longmess> errors, instead of the regular
1613b39c5158SmillertC<carp> errors.
1614b39c5158Smillert
1615b39c5158SmillertGood for tracking down why things don't work with your particular
1616b39c5158Smillertsetup.
1617b39c5158Smillert
1618b39c5158SmillertDefault is 0.
1619b39c5158Smillert
1620b39c5158Smillert=head2 $File::Fetch::BLACKLIST
1621b39c5158Smillert
1622b39c5158SmillertThis is an array ref holding blacklisted modules/utilities for fetching
1623b39c5158Smillertfiles with.
1624b39c5158Smillert
1625b39c5158SmillertTo disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1626b39c5158Smillertset $File::Fetch::BLACKLIST to:
1627b39c5158Smillert
1628b39c5158Smillert    $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1629b39c5158Smillert
1630b39c5158SmillertThe default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1631b39c5158Smillert
1632b39c5158SmillertSee the note on C<MAPPING> below.
1633b39c5158Smillert
1634b39c5158Smillert=head2 $File::Fetch::METHOD_FAIL
1635b39c5158Smillert
1636b39c5158SmillertThis is a hashref registering what modules/utilities were known to fail
1637b39c5158Smillertfor fetching files (mostly because they weren't installed).
1638b39c5158Smillert
1639b39c5158SmillertYou can reset this cache by assigning an empty hashref to it, or
1640b39c5158Smillertindividually remove keys.
1641b39c5158Smillert
1642b39c5158SmillertSee the note on C<MAPPING> below.
1643b39c5158Smillert
1644b39c5158Smillert=head1 MAPPING
1645b39c5158Smillert
1646b39c5158Smillert
1647b39c5158SmillertHere's a quick mapping for the utilities/modules, and their names for
1648b39c5158Smillertthe $BLACKLIST, $METHOD_FAIL and other internal functions.
1649b39c5158Smillert
1650b39c5158Smillert    LWP         => lwp
1651898184e3Ssthen    HTTP::Lite  => httplite
1652898184e3Ssthen    HTTP::Tiny  => httptiny
1653b39c5158Smillert    Net::FTP    => netftp
1654b39c5158Smillert    wget        => wget
1655b39c5158Smillert    lynx        => lynx
1656b39c5158Smillert    ncftp       => ncftp
1657b39c5158Smillert    ftp         => ftp
1658b39c5158Smillert    curl        => curl
1659b39c5158Smillert    rsync       => rsync
1660b39c5158Smillert    lftp        => lftp
1661898184e3Ssthen    fetch       => fetch
1662b39c5158Smillert    IO::Socket  => iosock
1663b39c5158Smillert
1664b39c5158Smillert=head1 FREQUENTLY ASKED QUESTIONS
1665b39c5158Smillert
1666b39c5158Smillert=head2 So how do I use a proxy with File::Fetch?
1667b39c5158Smillert
1668b39c5158SmillertC<File::Fetch> currently only supports proxies with LWP::UserAgent.
1669b39c5158SmillertYou will need to set your environment variables accordingly. For
1670b39c5158Smillertexample, to use an ftp proxy:
1671b39c5158Smillert
1672b39c5158Smillert    $ENV{ftp_proxy} = 'foo.com';
1673b39c5158Smillert
1674b39c5158SmillertRefer to the LWP::UserAgent manpage for more details.
1675b39c5158Smillert
1676b39c5158Smillert=head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1677b39c5158Smillert
1678b39c5158SmillertC<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1679b39c5158Smillertwhich we in turn capture. If that content is a 'custom' error file
1680b39c5158Smillert(like, say, a C<404 handler>), you will get that contents instead.
1681b39c5158Smillert
1682b39c5158SmillertSadly, C<lynx> doesn't support any options to return a different exit
1683b39c5158Smillertcode on non-C<200 OK> status, giving us no way to tell the difference
1684898184e3Ssthenbetween a 'successful' fetch and a custom error page.
1685b39c5158Smillert
1686b39c5158SmillertTherefor, we recommend to only use C<lynx> as a last resort. This is
1687b39c5158Smillertwhy it is at the back of our list of methods to try as well.
1688b39c5158Smillert
1689b39c5158Smillert=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1690b39c5158Smillert
1691b39c5158SmillertC<File::Fetch> is relatively smart about things. When trying to write
1692b39c5158Smillerta file to disk, it removes the C<query parameters> (see the
1693b39c5158SmillertC<output_file> method for details) from the file name before creating
1694b39c5158Smillertit. In most cases this suffices.
1695b39c5158Smillert
1696b39c5158SmillertIf you have any other characters you need to escape, please install
1697b39c5158Smillertthe C<URI::Escape> module from CPAN, and pre-encode your URI before
1698b39c5158Smillertpassing it to C<File::Fetch>. You can read about the details of URIs
1699b39c5158Smillertand URI encoding here:
1700b39c5158Smillert
1701*eac174f2Safresh1L<https://datatracker.ietf.org/doc/html/rfc2396>
1702b39c5158Smillert
1703b39c5158Smillert=head1 TODO
1704b39c5158Smillert
1705b39c5158Smillert=over 4
1706b39c5158Smillert
1707b39c5158Smillert=item Implement $PREFER_BIN
1708b39c5158Smillert
1709b39c5158SmillertTo indicate to rather use commandline tools than modules
1710b39c5158Smillert
1711b39c5158Smillert=back
1712b39c5158Smillert
1713b39c5158Smillert=head1 BUG REPORTS
1714b39c5158Smillert
1715b39c5158SmillertPlease report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1716b39c5158Smillert
1717b39c5158Smillert=head1 AUTHOR
1718b39c5158Smillert
1719b39c5158SmillertThis module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1720b39c5158Smillert
1721b39c5158Smillert=head1 COPYRIGHT
1722b39c5158Smillert
1723b39c5158SmillertThis library is free software; you may redistribute and/or modify it
1724b39c5158Smillertunder the same terms as Perl itself.
1725b39c5158Smillert
1726b39c5158Smillert
1727b39c5158Smillert=cut
1728b39c5158Smillert
1729b39c5158Smillert# Local variables:
1730b39c5158Smillert# c-indentation-style: bsd
1731b39c5158Smillert# c-basic-offset: 4
1732b39c5158Smillert# indent-tabs-mode: nil
1733b39c5158Smillert# End:
1734b39c5158Smillert# vim: expandtab shiftwidth=4:
1735b39c5158Smillert
1736b39c5158Smillert
1737b39c5158Smillert
1738b39c5158Smillert
1739