xref: /openbsd-src/gnu/usr.bin/perl/cpan/File-Fetch/lib/File/Fetch.pm (revision 2777ee89d0e541ec819d05abee114837837abbec)
1package File::Fetch;
2
3use strict;
4use FileHandle;
5use File::Temp;
6use File::Copy;
7use File::Spec;
8use File::Spec::Unix;
9use File::Basename              qw[dirname];
10
11use Cwd                         qw[cwd];
12use Carp                        qw[carp];
13use IPC::Cmd                    qw[can_run run QUOTE];
14use File::Path                  qw[mkpath];
15use File::Temp                  qw[tempdir];
16use Params::Check               qw[check];
17use Module::Load::Conditional   qw[can_load];
18use Locale::Maketext::Simple    Style => 'gettext';
19
20use vars    qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
21                $BLACKLIST $METHOD_FAIL $VERSION $METHODS
22                $FTP_PASSIVE $TIMEOUT $DEBUG $WARN $FORCEIPV4
23            ];
24
25$VERSION        = '0.48';
26$VERSION        = eval $VERSION;    # avoid warnings with development releases
27$PREFER_BIN     = 0;                # XXX TODO implement
28$FROM_EMAIL     = 'File-Fetch@example.com';
29$USER_AGENT     = "File::Fetch/$VERSION";
30$BLACKLIST      = [qw|ftp|];
31push @$BLACKLIST, qw|lftp| if $^O eq 'dragonfly';
32$METHOD_FAIL    = { };
33$FTP_PASSIVE    = 1;
34$TIMEOUT        = 0;
35$DEBUG          = 0;
36$WARN           = 1;
37$FORCEIPV4      = 0;
38
39### methods available to fetch the file depending on the scheme
40$METHODS = {
41    http    => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ],
42    ftp     => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ],
43    file    => [ qw|lwp lftp file| ],
44    rsync   => [ qw|rsync| ],
45    git     => [ qw|git| ],
46};
47
48### silly warnings ###
49local $Params::Check::VERBOSE               = 1;
50local $Params::Check::VERBOSE               = 1;
51local $Module::Load::Conditional::VERBOSE   = 0;
52local $Module::Load::Conditional::VERBOSE   = 0;
53
54### see what OS we are on, important for file:// uris ###
55use constant ON_WIN     => ($^O eq 'MSWin32');
56use constant ON_VMS     => ($^O eq 'VMS');
57use constant ON_UNIX    => (!ON_WIN);
58use constant HAS_VOL    => (ON_WIN);
59use constant HAS_SHARE  => (ON_WIN);
60use constant HAS_FETCH  => ( $^O =~ m!^(freebsd|netbsd|dragonfly)$! );
61
62=pod
63
64=head1 NAME
65
66File::Fetch - A generic file fetching mechanism
67
68=head1 SYNOPSIS
69
70    use File::Fetch;
71
72    ### build a File::Fetch object ###
73    my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
74
75    ### fetch the uri to cwd() ###
76    my $where = $ff->fetch() or die $ff->error;
77
78    ### fetch the uri to /tmp ###
79    my $where = $ff->fetch( to => '/tmp' );
80
81    ### parsed bits from the uri ###
82    $ff->uri;
83    $ff->scheme;
84    $ff->host;
85    $ff->path;
86    $ff->file;
87
88=head1 DESCRIPTION
89
90File::Fetch is a generic file fetching mechanism.
91
92It allows you to fetch any file pointed to by a C<ftp>, C<http>,
93C<file>, C<git> or C<rsync> uri by a number of different means.
94
95See the C<HOW IT WORKS> section further down for details.
96
97=head1 ACCESSORS
98
99A C<File::Fetch> object has the following accessors
100
101=over 4
102
103=item $ff->uri
104
105The uri you passed to the constructor
106
107=item $ff->scheme
108
109The scheme from the uri (like 'file', 'http', etc)
110
111=item $ff->host
112
113The hostname in the uri.  Will be empty if host was originally
114'localhost' for a 'file://' url.
115
116=item $ff->vol
117
118On operating systems with the concept of a volume the second element
119of a file:// is considered to the be volume specification for the file.
120Thus on Win32 this routine returns the volume, on other operating
121systems this returns nothing.
122
123On Windows this value may be empty if the uri is to a network share, in
124which case the 'share' property will be defined. Additionally, volume
125specifications that use '|' as ':' will be converted on read to use ':'.
126
127On VMS, which has a volume concept, this field will be empty because VMS
128file specifications are converted to absolute UNIX format and the volume
129information is transparently included.
130
131=item $ff->share
132
133On systems with the concept of a network share (currently only Windows) returns
134the sharename from a file://// url.  On other operating systems returns empty.
135
136=item $ff->path
137
138The path from the uri, will be at least a single '/'.
139
140=item $ff->file
141
142The name of the remote file. For the local file name, the
143result of $ff->output_file will be used.
144
145=item $ff->file_default
146
147The name of the default local file, that $ff->output_file falls back to if
148it would otherwise return no filename. For example when fetching a URI like
149http://www.abc.net.au/ the contents retrieved may be from a remote file called
150'index.html'. The default value of this attribute is literally 'file_default'.
151
152=cut
153
154
155##########################
156### Object & Accessors ###
157##########################
158
159{
160    ### template for autogenerated accessors ###
161    my $Tmpl = {
162        scheme          => { default => 'http' },
163        host            => { default => 'localhost' },
164        path            => { default => '/' },
165        file            => { required => 1 },
166        uri             => { required => 1 },
167        vol             => { default => '' }, # windows for file:// uris
168        share           => { default => '' }, # windows for file:// uris
169        file_default    => { default => 'file_default' },
170        tempdir_root    => { required => 1 }, # Should be lazy-set at ->new()
171        _error_msg      => { no_override => 1 },
172        _error_msg_long => { no_override => 1 },
173    };
174
175    for my $method ( keys %$Tmpl ) {
176        no strict 'refs';
177        *$method = sub {
178                        my $self = shift;
179                        $self->{$method} = $_[0] if @_;
180                        return $self->{$method};
181                    }
182    }
183
184    sub _create {
185        my $class = shift;
186        my %hash  = @_;
187
188        my $args = check( $Tmpl, \%hash ) or return;
189
190        bless $args, $class;
191
192        if( lc($args->scheme) ne 'file' and not $args->host ) {
193            return $class->_error(loc(
194                "Hostname required when fetching from '%1'",$args->scheme));
195        }
196
197        for (qw[path]) {
198            unless( $args->$_() ) { # 5.5.x needs the ()
199                return $class->_error(loc("No '%1' specified",$_));
200            }
201        }
202
203        return $args;
204    }
205}
206
207=item $ff->output_file
208
209The name of the output file. This is the same as $ff->file,
210but any query parameters are stripped off. For example:
211
212    http://example.com/index.html?x=y
213
214would make the output file be C<index.html> rather than
215C<index.html?x=y>.
216
217=back
218
219=cut
220
221sub output_file {
222    my $self = shift;
223    my $file = $self->file;
224
225    $file =~ s/\?.*$//g;
226
227    $file ||= $self->file_default;
228
229    return $file;
230}
231
232### XXX do this or just point to URI::Escape?
233# =head2 $esc_uri = $ff->escaped_uri
234#
235# =cut
236#
237# ### most of this is stolen straight from URI::escape
238# {   ### Build a char->hex map
239#     my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
240#
241#     sub escaped_uri {
242#         my $self = shift;
243#         my $uri  = $self->uri;
244#
245#         ### Default unsafe characters.  RFC 2732 ^(uric - reserved)
246#         $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
247#                     $escapes{$1} || $self->_fail_hi($1)/ge;
248#
249#         return $uri;
250#     }
251#
252#     sub _fail_hi {
253#         my $self = shift;
254#         my $char = shift;
255#
256#         $self->_error(loc(
257#             "Can't escape '%1', try using the '%2' module instead",
258#             sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
259#         ));
260#     }
261#
262#     sub output_file {
263#
264#     }
265#
266#
267# }
268
269=head1 METHODS
270
271=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
272
273Parses the uri and creates a corresponding File::Fetch::Item object,
274that is ready to be C<fetch>ed and returns it.
275
276Returns false on failure.
277
278=cut
279
280sub new {
281    my $class = shift;
282    my %hash  = @_;
283
284    my ($uri, $file_default, $tempdir_root);
285    my $tmpl = {
286        uri          => { required => 1, store => \$uri },
287        file_default => { required => 0, store => \$file_default },
288        tempdir_root => { required => 0, store => \$tempdir_root },
289    };
290
291    check( $tmpl, \%hash ) or return;
292
293    ### parse the uri to usable parts ###
294    my $href    = $class->_parse_uri( $uri ) or return;
295
296    $href->{file_default} = $file_default if $file_default;
297    $href->{tempdir_root} = File::Spec->rel2abs( $tempdir_root ) if $tempdir_root;
298    $href->{tempdir_root} = File::Spec->rel2abs( Cwd::cwd      ) if not $href->{tempdir_root};
299
300    ### make it into a FFI object ###
301    my $ff      = $class->_create( %$href ) or return;
302
303
304    ### return the object ###
305    return $ff;
306}
307
308### parses an uri to a hash structure:
309###
310### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
311###
312### becomes:
313###
314### $href = {
315###     scheme  => 'ftp',
316###     host    => 'ftp.cpan.org',
317###     path    => '/pub/mirror',
318###     file    => 'index.html'
319### };
320###
321### In the case of file:// urls there maybe be additional fields
322###
323### For systems with volume specifications such as Win32 there will be
324### a volume specifier provided in the 'vol' field.
325###
326###   'vol' => 'volumename'
327###
328### For windows file shares there may be a 'share' key specified
329###
330###   'share' => 'sharename'
331###
332### Note that the rules of what a file:// url means vary by the operating system
333### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
334### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
335### not '/foo/bar.txt'
336###
337### Similarly if the host interpreting the url is VMS then
338### file:///disk$user/my/notes/note12345.txt' means
339### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
340### if it is unix where it means /disk$user/my/notes/note12345.txt'.
341### Except for some cases in the File::Spec methods, Perl on VMS will generally
342### handle UNIX format file specifications.
343###
344### This means it is impossible to serve certain file:// urls on certain systems.
345###
346### Thus are the problems with a protocol-less specification. :-(
347###
348
349sub _parse_uri {
350    my $self = shift;
351    my $uri  = shift or return;
352
353    my $href = { uri => $uri };
354
355    ### find the scheme ###
356    $uri            =~ s|^(\w+)://||;
357    $href->{scheme} = $1;
358
359    ### See rfc 1738 section 3.10
360    ### http://www.faqs.org/rfcs/rfc1738.html
361    ### And wikipedia for more on windows file:// urls
362    ### http://en.wikipedia.org/wiki/File://
363    if( $href->{scheme} eq 'file' ) {
364
365        my @parts = split '/',$uri;
366
367        ### file://hostname/...
368        ### file://hostname/...
369        ### normalize file://localhost with file:///
370        $href->{host} = $parts[0] || '';
371
372        ### index in @parts where the path components begin;
373        my $index = 1;
374
375        ### file:////hostname/sharename/blah.txt
376        if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
377
378            $href->{host}   = $parts[2] || '';  # avoid warnings
379            $href->{share}  = $parts[3] || '';  # avoid warnings
380
381            $index          = 4         # index after the share
382
383        ### file:///D|/blah.txt
384        ### file:///D:/blah.txt
385        } elsif (HAS_VOL) {
386
387            ### this code comes from dmq's patch, but:
388            ### XXX if volume is empty, wouldn't that be an error? --kane
389            ### if so, our file://localhost test needs to be fixed as wel
390            $href->{vol}    = $parts[1] || '';
391
392            ### correct D| style colume descriptors
393            $href->{vol}    =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
394
395            $index          = 2;        # index after the volume
396        }
397
398        ### rebuild the path from the leftover parts;
399        $href->{path} = join '/', '', splice( @parts, $index, $#parts );
400
401    } else {
402        ### using anything but qw() in hash slices may produce warnings
403        ### in older perls :-(
404        @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s;
405    }
406
407    ### split the path into file + dir ###
408    {   my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
409        $href->{path} = $parts[1];
410        $href->{file} = $parts[2];
411    }
412
413    ### host will be empty if the target was 'localhost' and the
414    ### scheme was 'file'
415    $href->{host} = '' if   ($href->{host}      eq 'localhost') and
416                            ($href->{scheme}    eq 'file');
417
418    return $href;
419}
420
421=head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] )
422
423Fetches the file you requested and returns the full path to the file.
424
425By default it writes to C<cwd()>, but you can override that by specifying
426the C<to> argument:
427
428    ### file fetch to /tmp, full path to the file in $where
429    $where = $ff->fetch( to => '/tmp' );
430
431    ### file slurped into $scalar, full path to the file in $where
432    ### file is downloaded to a temp directory and cleaned up at exit time
433    $where = $ff->fetch( to => \$scalar );
434
435Returns the full path to the downloaded file on success, and false
436on failure.
437
438=cut
439
440sub fetch {
441    my $self = shift or return;
442    my %hash = @_;
443
444    my $target;
445    my $tmpl = {
446        to  => { default => cwd(), store => \$target },
447    };
448
449    check( $tmpl, \%hash ) or return;
450
451    my ($to, $fh);
452    ### you want us to slurp the contents
453    if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
454        $to = tempdir( 'FileFetch.XXXXXX', DIR => $self->tempdir_root, CLEANUP => 1 );
455
456    ### plain old fetch
457    } else {
458        $to = $target;
459
460        ### On VMS force to VMS format so File::Spec will work.
461        $to = VMS::Filespec::vmspath($to) if ON_VMS;
462
463        ### create the path if it doesn't exist yet ###
464        unless( -d $to ) {
465            eval { mkpath( $to ) };
466
467            return $self->_error(loc("Could not create path '%1'",$to)) if $@;
468        }
469    }
470
471    ### set passive ftp if required ###
472    local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
473
474    ### we dont use catfile on win32 because if we are using a cygwin tool
475    ### under cmd.exe they wont understand windows style separators.
476    my $out_to = ON_WIN ? $to.'/'.$self->output_file
477                        : File::Spec->catfile( $to, $self->output_file );
478
479    for my $method ( @{ $METHODS->{$self->scheme} } ) {
480        my $sub =  '_'.$method.'_fetch';
481
482        unless( __PACKAGE__->can($sub) ) {
483            $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
484                        $method));
485            next;
486        }
487
488        ### method is blacklisted ###
489        next if grep { lc $_ eq $method } @$BLACKLIST;
490
491        ### method is known to fail ###
492        next if $METHOD_FAIL->{$method};
493
494        ### there's serious issues with IPC::Run and quoting of command
495        ### line arguments. using quotes in the wrong place breaks things,
496        ### and in the case of say,
497        ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
498        ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
499        ### it doesn't matter how you quote, it always fails.
500        local $IPC::Cmd::USE_IPC_RUN = 0;
501
502        if( my $file = $self->$sub(
503                        to => $out_to
504        )){
505
506            unless( -e $file && -s _ ) {
507                $self->_error(loc("'%1' said it fetched '%2', ".
508                     "but it was not created",$method,$file));
509
510                ### mark the failure ###
511                $METHOD_FAIL->{$method} = 1;
512
513                next;
514
515            } else {
516
517                ### slurp mode?
518                if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
519
520                    ### open the file
521                    open my $fh, "<$file" or do {
522                        $self->_error(
523                            loc("Could not open '%1': %2", $file, $!));
524                        return;
525                    };
526
527                    ### slurp
528                    $$target = do { local $/; <$fh> };
529
530                }
531
532                my $abs = File::Spec->rel2abs( $file );
533                return $abs;
534
535            }
536        }
537    }
538
539
540    ### if we got here, we looped over all methods, but we weren't able
541    ### to fetch it.
542    return;
543}
544
545########################
546### _*_fetch methods ###
547########################
548
549### LWP fetching ###
550sub _lwp_fetch {
551    my $self = shift;
552    my %hash = @_;
553
554    my ($to);
555    my $tmpl = {
556        to  => { required => 1, store => \$to }
557    };
558    check( $tmpl, \%hash ) or return;
559
560    ### modules required to download with lwp ###
561    my $use_list = {
562        LWP                 => '0.0',
563        'LWP::UserAgent'    => '0.0',
564        'HTTP::Request'     => '0.0',
565        'HTTP::Status'      => '0.0',
566        URI                 => '0.0',
567
568    };
569
570    unless( can_load( modules => $use_list ) ) {
571        $METHOD_FAIL->{'lwp'} = 1;
572        return;
573    }
574
575    ### setup the uri object
576    my $uri = URI->new( File::Spec::Unix->catfile(
577                                $self->path, $self->file
578                    ) );
579
580    ### special rules apply for file:// uris ###
581    $uri->scheme( $self->scheme );
582    $uri->host( $self->scheme eq 'file' ? '' : $self->host );
583    $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
584
585    ### set up the useragent object
586    my $ua = LWP::UserAgent->new();
587    $ua->timeout( $TIMEOUT ) if $TIMEOUT;
588    $ua->agent( $USER_AGENT );
589    $ua->from( $FROM_EMAIL );
590    $ua->env_proxy;
591
592    my $res = $ua->mirror($uri, $to) or return;
593
594    ### uptodate or fetched ok ###
595    if ( $res->code == 304 or $res->code == 200 ) {
596        return $to;
597
598    } else {
599        return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
600                    $res->code, HTTP::Status::status_message($res->code),
601                    $res->status_line));
602    }
603
604}
605
606### HTTP::Tiny fetching ###
607sub _httptiny_fetch {
608    my $self = shift;
609    my %hash = @_;
610
611    my ($to);
612    my $tmpl = {
613        to  => { required => 1, store => \$to }
614    };
615    check( $tmpl, \%hash ) or return;
616
617    my $use_list = {
618        'HTTP::Tiny'    => '0.008',
619
620    };
621
622    unless( can_load(modules => $use_list) ) {
623        $METHOD_FAIL->{'httptiny'} = 1;
624        return;
625    }
626
627    my $uri = $self->uri;
628
629    my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) );
630
631    my $rc = $http->mirror( $uri, $to );
632
633    unless ( $rc->{success} ) {
634
635        return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]",
636                    $rc->{status}, $rc->{reason} ) );
637
638    }
639
640    return $to;
641
642}
643
644### HTTP::Lite fetching ###
645sub _httplite_fetch {
646    my $self = shift;
647    my %hash = @_;
648
649    my ($to);
650    my $tmpl = {
651        to  => { required => 1, store => \$to }
652    };
653    check( $tmpl, \%hash ) or return;
654
655    ### modules required to download with lwp ###
656    my $use_list = {
657        'HTTP::Lite'    => '2.2',
658
659    };
660
661    unless( can_load(modules => $use_list) ) {
662        $METHOD_FAIL->{'httplite'} = 1;
663        return;
664    }
665
666    my $uri = $self->uri;
667    my $retries = 0;
668
669    RETRIES: while ( $retries++ < 5 ) {
670
671      my $http = HTTP::Lite->new();
672      # Naughty naughty but there isn't any accessor/setter
673      $http->{timeout} = $TIMEOUT if $TIMEOUT;
674      $http->http11_mode(1);
675
676      my $fh = FileHandle->new;
677
678      unless ( $fh->open($to,'>') ) {
679        return $self->_error(loc(
680             "Could not open '%1' for writing: %2",$to,$!));
681      }
682
683      $fh->autoflush(1);
684
685      binmode $fh;
686
687      my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh );
688
689      close $fh;
690
691      if ( $rc == 301 || $rc == 302 ) {
692          my $loc;
693          HEADERS: for ($http->headers_array) {
694            /Location: (\S+)/ and $loc = $1, last HEADERS;
695          }
696          #$loc or last; # Think we should squeal here.
697          if ($loc =~ m!^/!) {
698            $uri =~ s{^(\w+?://[^/]+)/.*$}{$1};
699            $uri .= $loc;
700          }
701          else {
702            $uri = $loc;
703          }
704          next RETRIES;
705      }
706      elsif ( $rc == 200 ) {
707          return $to;
708      }
709      else {
710        return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]",
711                    $rc, $http->status_message));
712      }
713
714    } # Loop for 5 retries.
715
716    return $self->_error("Fetch failed! Gave up after 5 tries");
717
718}
719
720### Simple IO::Socket::INET fetching ###
721sub _iosock_fetch {
722    my $self = shift;
723    my %hash = @_;
724
725    my ($to);
726    my $tmpl = {
727        to  => { required => 1, store => \$to }
728    };
729    check( $tmpl, \%hash ) or return;
730
731    my $use_list = {
732        'IO::Socket::INET' => '0.0',
733        'IO::Select'       => '0.0',
734    };
735
736    unless( can_load(modules => $use_list) ) {
737        $METHOD_FAIL->{'iosock'} = 1;
738        return;
739    }
740
741    my $sock = IO::Socket::INET->new(
742        PeerHost => $self->host,
743        ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ),
744    );
745
746    unless ( $sock ) {
747        return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!));
748    }
749
750    my $fh = FileHandle->new;
751
752    # Check open()
753
754    unless ( $fh->open($to,'>') ) {
755        return $self->_error(loc(
756             "Could not open '%1' for writing: %2",$to,$!));
757    }
758
759    $fh->autoflush(1);
760    binmode $fh;
761
762    my $path = File::Spec::Unix->catfile( $self->path, $self->file );
763    my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a";
764    $sock->send( $req );
765
766    my $select = IO::Select->new( $sock );
767
768    my $resp = '';
769    my $normal = 0;
770    while ( $select->can_read( $TIMEOUT || 60 ) ) {
771      my $ret = $sock->sysread( $resp, 4096, length($resp) );
772      if ( !defined $ret or $ret == 0 ) {
773        $select->remove( $sock );
774        $normal++;
775      }
776    }
777    close $sock;
778
779    unless ( $normal ) {
780        return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 )));
781    }
782
783    # Check the "response"
784    # Strip preceding blank lines apparently they are allowed (RFC 2616 4.1)
785    $resp =~ s/^(\x0d?\x0a)+//;
786    # Check it is an HTTP response
787    unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) {
788        return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host));
789    }
790
791    # Check for OK
792    my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i;
793    unless ( $code eq '200' ) {
794        return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host));
795    }
796
797    {
798      local $\;
799      print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0];
800    }
801    close $fh;
802    return $to;
803}
804
805### Net::FTP fetching
806sub _netftp_fetch {
807    my $self = shift;
808    my %hash = @_;
809
810    my ($to);
811    my $tmpl = {
812        to  => { required => 1, store => \$to }
813    };
814    check( $tmpl, \%hash ) or return;
815
816    ### required modules ###
817    my $use_list = { 'Net::FTP' => 0 };
818
819    unless( can_load( modules => $use_list ) ) {
820        $METHOD_FAIL->{'netftp'} = 1;
821        return;
822    }
823
824    ### make connection ###
825    my $ftp;
826    my @options = ($self->host);
827    push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
828    unless( $ftp = Net::FTP->new( @options ) ) {
829        return $self->_error(loc("Ftp creation failed: %1",$@));
830    }
831
832    ### login ###
833    unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
834        return $self->_error(loc("Could not login to '%1'",$self->host));
835    }
836
837    ### set binary mode, just in case ###
838    $ftp->binary;
839
840    ### create the remote path
841    ### remember remote paths are unix paths! [#11483]
842    my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
843
844    ### fetch the file ###
845    my $target;
846    unless( $target = $ftp->get( $remote, $to ) ) {
847        return $self->_error(loc("Could not fetch '%1' from '%2'",
848                    $remote, $self->host));
849    }
850
851    ### log out ###
852    $ftp->quit;
853
854    return $target;
855
856}
857
858### /bin/wget fetch ###
859sub _wget_fetch {
860    my $self = shift;
861    my %hash = @_;
862
863    my ($to);
864    my $tmpl = {
865        to  => { required => 1, store => \$to }
866    };
867    check( $tmpl, \%hash ) or return;
868
869    my $wget;
870    ### see if we have a wget binary ###
871    unless( $wget = can_run('wget') ) {
872        $METHOD_FAIL->{'wget'} = 1;
873        return;
874    }
875
876    ### no verboseness, thanks ###
877    my $cmd = [ $wget, '--quiet' ];
878
879    ### if a timeout is set, add it ###
880    push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
881
882    ### run passive if specified ###
883    push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
884
885    ### set the output document, add the uri ###
886    push @$cmd, '--output-document', $to, $self->uri;
887
888    ### with IPC::Cmd > 0.41, this is fixed in teh library,
889    ### and there's no need for special casing any more.
890    ### DO NOT quote things for IPC::Run, it breaks stuff.
891    # $IPC::Cmd::USE_IPC_RUN
892    #    ? ($to, $self->uri)
893    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
894
895    ### shell out ###
896    my $captured;
897    unless(run( command => $cmd,
898                buffer  => \$captured,
899                verbose => $DEBUG
900    )) {
901        ### wget creates the output document always, even if the fetch
902        ### fails.. so unlink it in that case
903        1 while unlink $to;
904
905        return $self->_error(loc( "Command failed: %1", $captured || '' ));
906    }
907
908    return $to;
909}
910
911### /bin/lftp fetch ###
912sub _lftp_fetch {
913    my $self = shift;
914    my %hash = @_;
915
916    my ($to);
917    my $tmpl = {
918        to  => { required => 1, store => \$to }
919    };
920    check( $tmpl, \%hash ) or return;
921
922    ### see if we have a lftp binary ###
923    my $lftp;
924    unless( $lftp = can_run('lftp') ) {
925        $METHOD_FAIL->{'lftp'} = 1;
926        return;
927    }
928
929    ### no verboseness, thanks ###
930    my $cmd = [ $lftp, '-f' ];
931
932    my $fh = File::Temp->new;
933
934    my $str;
935
936    ### if a timeout is set, add it ###
937    $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
938
939    ### run passive if specified ###
940    $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
941
942    ### set the output document, add the uri ###
943    ### quote the URI, because lftp supports certain shell
944    ### expansions, most notably & for backgrounding.
945    ### ' quote does nto work, must be "
946    $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
947
948    if( $DEBUG ) {
949        my $pp_str = join ' ', split $/, $str;
950        print "# lftp command: $pp_str\n";
951    }
952
953    ### write straight to the file.
954    $fh->autoflush(1);
955    print $fh $str;
956
957    ### the command needs to be 1 string to be executed
958    push @$cmd, $fh->filename;
959
960    ### with IPC::Cmd > 0.41, this is fixed in teh library,
961    ### and there's no need for special casing any more.
962    ### DO NOT quote things for IPC::Run, it breaks stuff.
963    # $IPC::Cmd::USE_IPC_RUN
964    #    ? ($to, $self->uri)
965    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
966
967
968    ### shell out ###
969    my $captured;
970    unless(run( command => $cmd,
971                buffer  => \$captured,
972                verbose => $DEBUG
973    )) {
974        ### wget creates the output document always, even if the fetch
975        ### fails.. so unlink it in that case
976        1 while unlink $to;
977
978        return $self->_error(loc( "Command failed: %1", $captured || '' ));
979    }
980
981    return $to;
982}
983
984
985
986### /bin/ftp fetch ###
987sub _ftp_fetch {
988    my $self = shift;
989    my %hash = @_;
990
991    my ($to);
992    my $tmpl = {
993        to  => { required => 1, store => \$to }
994    };
995    check( $tmpl, \%hash ) or return;
996
997    ### see if we have a ftp binary ###
998    my $ftp;
999    unless( $ftp = can_run('ftp') ) {
1000        $METHOD_FAIL->{'ftp'} = 1;
1001        return;
1002    }
1003
1004    my $fh = FileHandle->new;
1005
1006    local $SIG{CHLD} = 'IGNORE';
1007
1008    unless ($fh->open("$ftp -n", '|-')) {
1009        return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
1010    }
1011
1012    my @dialog = (
1013        "lcd " . dirname($to),
1014        "open " . $self->host,
1015        "user anonymous $FROM_EMAIL",
1016        "cd /",
1017        "cd " . $self->path,
1018        "binary",
1019        "get " . $self->file . " " . $self->output_file,
1020        "quit",
1021    );
1022
1023    foreach (@dialog) { $fh->print($_, "\n") }
1024    $fh->close or return;
1025
1026    return $to;
1027}
1028
1029### lynx is stupid - it decompresses any .gz file it finds to be text
1030### use /bin/lynx to fetch files
1031sub _lynx_fetch {
1032    my $self = shift;
1033    my %hash = @_;
1034
1035    my ($to);
1036    my $tmpl = {
1037        to  => { required => 1, store => \$to }
1038    };
1039    check( $tmpl, \%hash ) or return;
1040
1041    ### see if we have a lynx binary ###
1042    my $lynx;
1043    unless ( $lynx = can_run('lynx') ){
1044        $METHOD_FAIL->{'lynx'} = 1;
1045        return;
1046    }
1047
1048    unless( IPC::Cmd->can_capture_buffer ) {
1049        $METHOD_FAIL->{'lynx'} = 1;
1050
1051        return $self->_error(loc(
1052            "Can not capture buffers. Can not use '%1' to fetch files",
1053            'lynx' ));
1054    }
1055
1056    ### check if the HTTP resource exists ###
1057    if ($self->uri =~ /^https?:\/\//i) {
1058        my $cmd = [
1059            $lynx,
1060            '-head',
1061            '-source',
1062            "-auth=anonymous:$FROM_EMAIL",
1063        ];
1064
1065        push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1066
1067        push @$cmd, $self->uri;
1068
1069        ### shell out ###
1070        my $head;
1071        unless(run( command => $cmd,
1072                    buffer  => \$head,
1073                    verbose => $DEBUG )
1074        ) {
1075            return $self->_error(loc("Command failed: %1", $head || ''));
1076        }
1077
1078        unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
1079            return $self->_error(loc("Command failed: %1", $head || ''));
1080        }
1081    }
1082
1083    ### write to the output file ourselves, since lynx ass_u_mes to much
1084    my $local = FileHandle->new( $to, 'w' )
1085                    or return $self->_error(loc(
1086                        "Could not open '%1' for writing: %2",$to,$!));
1087
1088    ### dump to stdout ###
1089    my $cmd = [
1090        $lynx,
1091        '-source',
1092        "-auth=anonymous:$FROM_EMAIL",
1093    ];
1094
1095    push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1096
1097    ### DO NOT quote things for IPC::Run, it breaks stuff.
1098    push @$cmd, $self->uri;
1099
1100    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1101    ### and there's no need for special casing any more.
1102    ### DO NOT quote things for IPC::Run, it breaks stuff.
1103    # $IPC::Cmd::USE_IPC_RUN
1104    #    ? $self->uri
1105    #    : QUOTE. $self->uri .QUOTE;
1106
1107
1108    ### shell out ###
1109    my $captured;
1110    unless(run( command => $cmd,
1111                buffer  => \$captured,
1112                verbose => $DEBUG )
1113    ) {
1114        return $self->_error(loc("Command failed: %1", $captured || ''));
1115    }
1116
1117    ### print to local file ###
1118    ### XXX on a 404 with a special error page, $captured will actually
1119    ### hold the contents of that page, and make it *appear* like the
1120    ### request was a success, when really it wasn't :(
1121    ### there doesn't seem to be an option for lynx to change the exit
1122    ### code based on a 4XX status or so.
1123    ### the closest we can come is using --error_file and parsing that,
1124    ### which is very unreliable ;(
1125    $local->print( $captured );
1126    $local->close or return;
1127
1128    return $to;
1129}
1130
1131### use /bin/ncftp to fetch files
1132sub _ncftp_fetch {
1133    my $self = shift;
1134    my %hash = @_;
1135
1136    my ($to);
1137    my $tmpl = {
1138        to  => { required => 1, store => \$to }
1139    };
1140    check( $tmpl, \%hash ) or return;
1141
1142    ### we can only set passive mode in interactive sessions, so bail out
1143    ### if $FTP_PASSIVE is set
1144    return if $FTP_PASSIVE;
1145
1146    ### see if we have a ncftp binary ###
1147    my $ncftp;
1148    unless( $ncftp = can_run('ncftp') ) {
1149        $METHOD_FAIL->{'ncftp'} = 1;
1150        return;
1151    }
1152
1153    my $cmd = [
1154        $ncftp,
1155        '-V',                   # do not be verbose
1156        '-p', $FROM_EMAIL,      # email as password
1157        $self->host,            # hostname
1158        dirname($to),           # local dir for the file
1159                                # remote path to the file
1160        ### DO NOT quote things for IPC::Run, it breaks stuff.
1161        $IPC::Cmd::USE_IPC_RUN
1162                    ? File::Spec::Unix->catdir( $self->path, $self->file )
1163                    : QUOTE. File::Spec::Unix->catdir(
1164                                    $self->path, $self->file ) .QUOTE
1165
1166    ];
1167
1168    ### shell out ###
1169    my $captured;
1170    unless(run( command => $cmd,
1171                buffer  => \$captured,
1172                verbose => $DEBUG )
1173    ) {
1174        return $self->_error(loc("Command failed: %1", $captured || ''));
1175    }
1176
1177    return $to;
1178
1179}
1180
1181### use /bin/curl to fetch files
1182sub _curl_fetch {
1183    my $self = shift;
1184    my %hash = @_;
1185
1186    my ($to);
1187    my $tmpl = {
1188        to  => { required => 1, store => \$to }
1189    };
1190    check( $tmpl, \%hash ) or return;
1191    my $curl;
1192    unless ( $curl = can_run('curl') ) {
1193        $METHOD_FAIL->{'curl'} = 1;
1194        return;
1195    }
1196
1197    ### these long opts are self explanatory - I like that -jmb
1198    my $cmd = [ $curl, '-q' ];
1199
1200    push(@$cmd, '-4') if $^O eq 'netbsd' && $FORCEIPV4; # only seen this on NetBSD so far
1201
1202    push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
1203
1204    push(@$cmd, '--silent') unless $DEBUG;
1205
1206    ### curl does the right thing with passive, regardless ###
1207    if ($self->scheme eq 'ftp') {
1208        push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
1209    }
1210
1211    ### curl doesn't follow 302 (temporarily moved) etc automatically
1212    ### so we add --location to enable that.
1213    push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
1214
1215    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1216    ### and there's no need for special casing any more.
1217    ### DO NOT quote things for IPC::Run, it breaks stuff.
1218    # $IPC::Cmd::USE_IPC_RUN
1219    #    ? ($to, $self->uri)
1220    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1221
1222
1223    my $captured;
1224    unless(run( command => $cmd,
1225                buffer  => \$captured,
1226                verbose => $DEBUG )
1227    ) {
1228
1229        return $self->_error(loc("Command failed: %1", $captured || ''));
1230    }
1231
1232    return $to;
1233
1234}
1235
1236### /usr/bin/fetch fetch! ###
1237sub _fetch_fetch {
1238    my $self = shift;
1239    my %hash = @_;
1240
1241    my ($to);
1242    my $tmpl = {
1243        to  => { required => 1, store => \$to }
1244    };
1245    check( $tmpl, \%hash ) or return;
1246
1247    ### see if we have a fetch binary ###
1248    my $fetch;
1249    unless( HAS_FETCH and $fetch = can_run('fetch') ) {
1250        $METHOD_FAIL->{'fetch'} = 1;
1251        return;
1252    }
1253
1254    ### no verboseness, thanks ###
1255    my $cmd = [ $fetch, '-q' ];
1256
1257    ### if a timeout is set, add it ###
1258    push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT;
1259
1260    ### run passive if specified ###
1261    #push @$cmd, '-p' if $FTP_PASSIVE;
1262    local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE;
1263
1264    ### set the output document, add the uri ###
1265    push @$cmd, '-o', $to, $self->uri;
1266
1267    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1268    ### and there's no need for special casing any more.
1269    ### DO NOT quote things for IPC::Run, it breaks stuff.
1270    # $IPC::Cmd::USE_IPC_RUN
1271    #    ? ($to, $self->uri)
1272    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1273
1274    ### shell out ###
1275    my $captured;
1276    unless(run( command => $cmd,
1277                buffer  => \$captured,
1278                verbose => $DEBUG
1279    )) {
1280        ### wget creates the output document always, even if the fetch
1281        ### fails.. so unlink it in that case
1282        1 while unlink $to;
1283
1284        return $self->_error(loc( "Command failed: %1", $captured || '' ));
1285    }
1286
1287    return $to;
1288}
1289
1290### use File::Copy for fetching file:// urls ###
1291###
1292### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
1293### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
1294###
1295
1296sub _file_fetch {
1297    my $self = shift;
1298    my %hash = @_;
1299
1300    my ($to);
1301    my $tmpl = {
1302        to  => { required => 1, store => \$to }
1303    };
1304    check( $tmpl, \%hash ) or return;
1305
1306
1307
1308    ### prefix a / on unix systems with a file uri, since it would
1309    ### look somewhat like this:
1310    ###     file:///home/kane/file
1311    ### whereas windows file uris for 'c:\some\dir\file' might look like:
1312    ###     file:///C:/some/dir/file
1313    ###     file:///C|/some/dir/file
1314    ### or for a network share '\\host\share\some\dir\file':
1315    ###     file:////host/share/some/dir/file
1316    ###
1317    ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
1318    ###     file://vms.host.edu/disk$user/my/notes/note12345.txt
1319    ###
1320
1321    my $path    = $self->path;
1322    my $vol     = $self->vol;
1323    my $share   = $self->share;
1324
1325    my $remote;
1326    if (!$share and $self->host) {
1327        return $self->_error(loc(
1328            "Currently %1 cannot handle hosts in %2 urls",
1329            'File::Fetch', 'file://'
1330        ));
1331    }
1332
1333    if( $vol ) {
1334        $path   = File::Spec->catdir( split /\//, $path );
1335        $remote = File::Spec->catpath( $vol, $path, $self->file);
1336
1337    } elsif( $share ) {
1338        ### win32 specific, and a share name, so we wont bother with File::Spec
1339        $path   =~ s|/+|\\|g;
1340        $remote = "\\\\".$self->host."\\$share\\$path";
1341
1342    } else {
1343        ### File::Spec on VMS can not currently handle UNIX syntax.
1344        my $file_class = ON_VMS
1345            ? 'File::Spec::Unix'
1346            : 'File::Spec';
1347
1348        $remote  = $file_class->catfile( $path, $self->file );
1349    }
1350
1351    ### File::Copy is littered with 'die' statements :( ###
1352    my $rv = eval { File::Copy::copy( $remote, $to ) };
1353
1354    ### something went wrong ###
1355    if( !$rv or $@ ) {
1356        return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
1357                             $remote, $to, $!, $@));
1358    }
1359
1360    return $to;
1361}
1362
1363### use /usr/bin/rsync to fetch files
1364sub _rsync_fetch {
1365    my $self = shift;
1366    my %hash = @_;
1367
1368    my ($to);
1369    my $tmpl = {
1370        to  => { required => 1, store => \$to }
1371    };
1372    check( $tmpl, \%hash ) or return;
1373    my $rsync;
1374    unless ( $rsync = can_run('rsync') ) {
1375        $METHOD_FAIL->{'rsync'} = 1;
1376        return;
1377    }
1378
1379    my $cmd = [ $rsync ];
1380
1381    ### XXX: rsync has no I/O timeouts at all, by default
1382    push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1383
1384    push(@$cmd, '--quiet') unless $DEBUG;
1385
1386    ### DO NOT quote things for IPC::Run, it breaks stuff.
1387    push @$cmd, $self->uri, $to;
1388
1389    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1390    ### and there's no need for special casing any more.
1391    ### DO NOT quote things for IPC::Run, it breaks stuff.
1392    # $IPC::Cmd::USE_IPC_RUN
1393    #    ? ($to, $self->uri)
1394    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1395
1396    my $captured;
1397    unless(run( command => $cmd,
1398                buffer  => \$captured,
1399                verbose => $DEBUG )
1400    ) {
1401
1402        return $self->_error(loc("Command %1 failed: %2",
1403            "@$cmd" || '', $captured || ''));
1404    }
1405
1406    return $to;
1407
1408}
1409
1410### use git to fetch files
1411sub _git_fetch {
1412    my $self = shift;
1413    my %hash = @_;
1414
1415    my ($to);
1416    my $tmpl = {
1417        to  => { required => 1, store => \$to }
1418    };
1419    check( $tmpl, \%hash ) or return;
1420    my $git;
1421    unless ( $git = can_run('git') ) {
1422        $METHOD_FAIL->{'git'} = 1;
1423        return;
1424    }
1425
1426    my $cmd = [ $git, 'clone' ];
1427
1428    #push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1429
1430    push(@$cmd, '--quiet') unless $DEBUG;
1431
1432    ### DO NOT quote things for IPC::Run, it breaks stuff.
1433    push @$cmd, $self->uri, $to;
1434
1435    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1436    ### and there's no need for special casing any more.
1437    ### DO NOT quote things for IPC::Run, it breaks stuff.
1438    # $IPC::Cmd::USE_IPC_RUN
1439    #    ? ($to, $self->uri)
1440    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1441
1442    my $captured;
1443    unless(run( command => $cmd,
1444                buffer  => \$captured,
1445                verbose => $DEBUG )
1446    ) {
1447
1448        return $self->_error(loc("Command %1 failed: %2",
1449            "@$cmd" || '', $captured || ''));
1450    }
1451
1452    return $to;
1453
1454}
1455
1456#################################
1457#
1458# Error code
1459#
1460#################################
1461
1462=pod
1463
1464=head2 $ff->error([BOOL])
1465
1466Returns the last encountered error as string.
1467Pass it a true value to get the C<Carp::longmess()> output instead.
1468
1469=cut
1470
1471### error handling the way Archive::Extract does it
1472sub _error {
1473    my $self    = shift;
1474    my $error   = shift;
1475
1476    $self->_error_msg( $error );
1477    $self->_error_msg_long( Carp::longmess($error) );
1478
1479    if( $WARN ) {
1480        carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1481    }
1482
1483    return;
1484}
1485
1486sub error {
1487    my $self = shift;
1488    return shift() ? $self->_error_msg_long : $self->_error_msg;
1489}
1490
1491
14921;
1493
1494=pod
1495
1496=head1 HOW IT WORKS
1497
1498File::Fetch is able to fetch a variety of uris, by using several
1499external programs and modules.
1500
1501Below is a mapping of what utilities will be used in what order
1502for what schemes, if available:
1503
1504    file    => LWP, lftp, file
1505    http    => LWP, HTTP::Lite, wget, curl, lftp, fetch, lynx, iosock
1506    ftp     => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp
1507    rsync   => rsync
1508    git     => git
1509
1510If you'd like to disable the use of one or more of these utilities
1511and/or modules, see the C<$BLACKLIST> variable further down.
1512
1513If a utility or module isn't available, it will be marked in a cache
1514(see the C<$METHOD_FAIL> variable further down), so it will not be
1515tried again. The C<fetch> method will only fail when all options are
1516exhausted, and it was not able to retrieve the file.
1517
1518The C<fetch> utility is available on FreeBSD. NetBSD and Dragonfly BSD
1519may also have it from C<pkgsrc>. We only check for C<fetch> on those
1520three platforms.
1521
1522C<iosock> is a very limited L<IO::Socket::INET> based mechanism for
1523retrieving C<http> schemed urls. It doesn't follow redirects for instance.
1524
1525C<git> only supports C<git://> style urls.
1526
1527A special note about fetching files from an ftp uri:
1528
1529By default, all ftp connections are done in passive mode. To change
1530that, see the C<$FTP_PASSIVE> variable further down.
1531
1532Furthermore, ftp uris only support anonymous connections, so no
1533named user/password pair can be passed along.
1534
1535C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
1536further down.
1537
1538=head1 GLOBAL VARIABLES
1539
1540The behaviour of File::Fetch can be altered by changing the following
1541global variables:
1542
1543=head2 $File::Fetch::FROM_EMAIL
1544
1545This is the email address that will be sent as your anonymous ftp
1546password.
1547
1548Default is C<File-Fetch@example.com>.
1549
1550=head2 $File::Fetch::USER_AGENT
1551
1552This is the useragent as C<LWP> will report it.
1553
1554Default is C<File::Fetch/$VERSION>.
1555
1556=head2 $File::Fetch::FTP_PASSIVE
1557
1558This variable controls whether the environment variable C<FTP_PASSIVE>
1559and any passive switches to commandline tools will be set to true.
1560
1561Default value is 1.
1562
1563Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1564files, since passive mode can only be set interactively for this binary
1565
1566=head2 $File::Fetch::TIMEOUT
1567
1568When set, controls the network timeout (counted in seconds).
1569
1570Default value is 0.
1571
1572=head2 $File::Fetch::WARN
1573
1574This variable controls whether errors encountered internally by
1575C<File::Fetch> should be C<carp>'d or not.
1576
1577Set to false to silence warnings. Inspect the output of the C<error()>
1578method manually to see what went wrong.
1579
1580Defaults to C<true>.
1581
1582=head2 $File::Fetch::DEBUG
1583
1584This enables debugging output when calling commandline utilities to
1585fetch files.
1586This also enables C<Carp::longmess> errors, instead of the regular
1587C<carp> errors.
1588
1589Good for tracking down why things don't work with your particular
1590setup.
1591
1592Default is 0.
1593
1594=head2 $File::Fetch::BLACKLIST
1595
1596This is an array ref holding blacklisted modules/utilities for fetching
1597files with.
1598
1599To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1600set $File::Fetch::BLACKLIST to:
1601
1602    $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1603
1604The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1605
1606See the note on C<MAPPING> below.
1607
1608=head2 $File::Fetch::METHOD_FAIL
1609
1610This is a hashref registering what modules/utilities were known to fail
1611for fetching files (mostly because they weren't installed).
1612
1613You can reset this cache by assigning an empty hashref to it, or
1614individually remove keys.
1615
1616See the note on C<MAPPING> below.
1617
1618=head1 MAPPING
1619
1620
1621Here's a quick mapping for the utilities/modules, and their names for
1622the $BLACKLIST, $METHOD_FAIL and other internal functions.
1623
1624    LWP         => lwp
1625    HTTP::Lite  => httplite
1626    HTTP::Tiny  => httptiny
1627    Net::FTP    => netftp
1628    wget        => wget
1629    lynx        => lynx
1630    ncftp       => ncftp
1631    ftp         => ftp
1632    curl        => curl
1633    rsync       => rsync
1634    lftp        => lftp
1635    fetch       => fetch
1636    IO::Socket  => iosock
1637
1638=head1 FREQUENTLY ASKED QUESTIONS
1639
1640=head2 So how do I use a proxy with File::Fetch?
1641
1642C<File::Fetch> currently only supports proxies with LWP::UserAgent.
1643You will need to set your environment variables accordingly. For
1644example, to use an ftp proxy:
1645
1646    $ENV{ftp_proxy} = 'foo.com';
1647
1648Refer to the LWP::UserAgent manpage for more details.
1649
1650=head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1651
1652C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1653which we in turn capture. If that content is a 'custom' error file
1654(like, say, a C<404 handler>), you will get that contents instead.
1655
1656Sadly, C<lynx> doesn't support any options to return a different exit
1657code on non-C<200 OK> status, giving us no way to tell the difference
1658between a 'successful' fetch and a custom error page.
1659
1660Therefor, we recommend to only use C<lynx> as a last resort. This is
1661why it is at the back of our list of methods to try as well.
1662
1663=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1664
1665C<File::Fetch> is relatively smart about things. When trying to write
1666a file to disk, it removes the C<query parameters> (see the
1667C<output_file> method for details) from the file name before creating
1668it. In most cases this suffices.
1669
1670If you have any other characters you need to escape, please install
1671the C<URI::Escape> module from CPAN, and pre-encode your URI before
1672passing it to C<File::Fetch>. You can read about the details of URIs
1673and URI encoding here:
1674
1675  http://www.faqs.org/rfcs/rfc2396.html
1676
1677=head1 TODO
1678
1679=over 4
1680
1681=item Implement $PREFER_BIN
1682
1683To indicate to rather use commandline tools than modules
1684
1685=back
1686
1687=head1 BUG REPORTS
1688
1689Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1690
1691=head1 AUTHOR
1692
1693This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1694
1695=head1 COPYRIGHT
1696
1697This library is free software; you may redistribute and/or modify it
1698under the same terms as Perl itself.
1699
1700
1701=cut
1702
1703# Local variables:
1704# c-indentation-style: bsd
1705# c-basic-offset: 4
1706# indent-tabs-mode: nil
1707# End:
1708# vim: expandtab shiftwidth=4:
1709
1710
1711
1712
1713