xref: /openbsd-src/gnu/usr.bin/perl/cpan/File-Fetch/lib/File/Fetch.pm (revision c90a81c56dcebd6a1b73fe4aff9b03385b8e63b3)
1package File::Fetch;
2
3use strict;
4use FileHandle;
5use File::Temp;
6use File::Copy;
7use File::Spec;
8use File::Spec::Unix;
9use File::Basename              qw[dirname];
10
11use Cwd                         qw[cwd];
12use Carp                        qw[carp];
13use IPC::Cmd                    qw[can_run run QUOTE];
14use File::Path                  qw[mkpath];
15use File::Temp                  qw[tempdir];
16use Params::Check               qw[check];
17use Module::Load::Conditional   qw[can_load];
18use Locale::Maketext::Simple    Style => 'gettext';
19
20use vars    qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
21                $BLACKLIST $METHOD_FAIL $VERSION $METHODS
22                $FTP_PASSIVE $TIMEOUT $DEBUG $WARN $FORCEIPV4
23            ];
24
25$VERSION        = '0.48_01';
26$VERSION        = eval $VERSION;    # avoid warnings with development releases
27$PREFER_BIN     = 0;                # XXX TODO implement
28$FROM_EMAIL     = 'File-Fetch@example.com';
29$USER_AGENT     = "File::Fetch/$VERSION";
30$BLACKLIST      = [qw|ftp|];
31push @$BLACKLIST, qw|lftp| if $^O eq 'dragonfly';
32$METHOD_FAIL    = { };
33$FTP_PASSIVE    = 1;
34$TIMEOUT        = 0;
35$DEBUG          = 0;
36$WARN           = 1;
37$FORCEIPV4      = 0;
38
39### methods available to fetch the file depending on the scheme
40$METHODS = {
41    http    => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ],
42    ftp     => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ],
43    file    => [ qw|lwp lftp file| ],
44    rsync   => [ qw|rsync| ],
45    git     => [ qw|git| ],
46};
47
48### silly warnings ###
49local $Params::Check::VERBOSE               = 1;
50local $Params::Check::VERBOSE               = 1;
51local $Module::Load::Conditional::VERBOSE   = 0;
52local $Module::Load::Conditional::VERBOSE   = 0;
53
54### see what OS we are on, important for file:// uris ###
55use constant ON_WIN     => ($^O eq 'MSWin32');
56use constant ON_VMS     => ($^O eq 'VMS');
57use constant ON_UNIX    => (!ON_WIN);
58use constant HAS_VOL    => (ON_WIN);
59use constant HAS_SHARE  => (ON_WIN);
60use constant HAS_FETCH  => ( $^O =~ m!^(freebsd|netbsd|dragonfly)$! );
61
62=pod
63
64=head1 NAME
65
66File::Fetch - A generic file fetching mechanism
67
68=head1 SYNOPSIS
69
70    use File::Fetch;
71
72    ### build a File::Fetch object ###
73    my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
74
75    ### fetch the uri to cwd() ###
76    my $where = $ff->fetch() or die $ff->error;
77
78    ### fetch the uri to /tmp ###
79    my $where = $ff->fetch( to => '/tmp' );
80
81    ### parsed bits from the uri ###
82    $ff->uri;
83    $ff->scheme;
84    $ff->host;
85    $ff->path;
86    $ff->file;
87
88=head1 DESCRIPTION
89
90File::Fetch is a generic file fetching mechanism.
91
92It allows you to fetch any file pointed to by a C<ftp>, C<http>,
93C<file>, C<git> or C<rsync> uri by a number of different means.
94
95See the C<HOW IT WORKS> section further down for details.
96
97=head1 ACCESSORS
98
99A C<File::Fetch> object has the following accessors
100
101=over 4
102
103=item $ff->uri
104
105The uri you passed to the constructor
106
107=item $ff->scheme
108
109The scheme from the uri (like 'file', 'http', etc)
110
111=item $ff->host
112
113The hostname in the uri.  Will be empty if host was originally
114'localhost' for a 'file://' url.
115
116=item $ff->vol
117
118On operating systems with the concept of a volume the second element
119of a file:// is considered to the be volume specification for the file.
120Thus on Win32 this routine returns the volume, on other operating
121systems this returns nothing.
122
123On Windows this value may be empty if the uri is to a network share, in
124which case the 'share' property will be defined. Additionally, volume
125specifications that use '|' as ':' will be converted on read to use ':'.
126
127On VMS, which has a volume concept, this field will be empty because VMS
128file specifications are converted to absolute UNIX format and the volume
129information is transparently included.
130
131=item $ff->share
132
133On systems with the concept of a network share (currently only Windows) returns
134the sharename from a file://// url.  On other operating systems returns empty.
135
136=item $ff->path
137
138The path from the uri, will be at least a single '/'.
139
140=item $ff->file
141
142The name of the remote file. For the local file name, the
143result of $ff->output_file will be used.
144
145=item $ff->file_default
146
147The name of the default local file, that $ff->output_file falls back to if
148it would otherwise return no filename. For example when fetching a URI like
149http://www.abc.net.au/ the contents retrieved may be from a remote file called
150'index.html'. The default value of this attribute is literally 'file_default'.
151
152=cut
153
154
155##########################
156### Object & Accessors ###
157##########################
158
159{
160    ### template for autogenerated accessors ###
161    my $Tmpl = {
162        scheme          => { default => 'http' },
163        host            => { default => 'localhost' },
164        path            => { default => '/' },
165        file            => { required => 1 },
166        uri             => { required => 1 },
167        vol             => { default => '' }, # windows for file:// uris
168        share           => { default => '' }, # windows for file:// uris
169        file_default    => { default => 'file_default' },
170        tempdir_root    => { required => 1 }, # Should be lazy-set at ->new()
171        _error_msg      => { no_override => 1 },
172        _error_msg_long => { no_override => 1 },
173    };
174
175    for my $method ( keys %$Tmpl ) {
176        no strict 'refs';
177        *$method = sub {
178                        my $self = shift;
179                        $self->{$method} = $_[0] if @_;
180                        return $self->{$method};
181                    }
182    }
183
184    sub _create {
185        my $class = shift;
186        my %hash  = @_;
187
188        my $args = check( $Tmpl, \%hash ) or return;
189
190        bless $args, $class;
191
192        if( lc($args->scheme) ne 'file' and not $args->host ) {
193            return $class->_error(loc(
194                "Hostname required when fetching from '%1'",$args->scheme));
195        }
196
197        for (qw[path]) {
198            unless( $args->$_() ) { # 5.5.x needs the ()
199                return $class->_error(loc("No '%1' specified",$_));
200            }
201        }
202
203        return $args;
204    }
205}
206
207=item $ff->output_file
208
209The name of the output file. This is the same as $ff->file,
210but any query parameters are stripped off. For example:
211
212    http://example.com/index.html?x=y
213
214would make the output file be C<index.html> rather than
215C<index.html?x=y>.
216
217=back
218
219=cut
220
221sub output_file {
222    my $self = shift;
223    my $file = $self->file;
224
225    $file =~ s/\?.*$//g;
226
227    $file ||= $self->file_default;
228
229    return $file;
230}
231
232### XXX do this or just point to URI::Escape?
233# =head2 $esc_uri = $ff->escaped_uri
234#
235# =cut
236#
237# ### most of this is stolen straight from URI::escape
238# {   ### Build a char->hex map
239#     my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
240#
241#     sub escaped_uri {
242#         my $self = shift;
243#         my $uri  = $self->uri;
244#
245#         ### Default unsafe characters.  RFC 2732 ^(uric - reserved)
246#         $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
247#                     $escapes{$1} || $self->_fail_hi($1)/ge;
248#
249#         return $uri;
250#     }
251#
252#     sub _fail_hi {
253#         my $self = shift;
254#         my $char = shift;
255#
256#         $self->_error(loc(
257#             "Can't escape '%1', try using the '%2' module instead",
258#             sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
259#         ));
260#     }
261#
262#     sub output_file {
263#
264#     }
265#
266#
267# }
268
269=head1 METHODS
270
271=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
272
273Parses the uri and creates a corresponding File::Fetch::Item object,
274that is ready to be C<fetch>ed and returns it.
275
276Returns false on failure.
277
278=cut
279
280sub new {
281    my $class = shift;
282    my %hash  = @_;
283
284    my ($uri, $file_default, $tempdir_root);
285    my $tmpl = {
286        uri          => { required => 1, store => \$uri },
287        file_default => { required => 0, store => \$file_default },
288        tempdir_root => { required => 0, store => \$tempdir_root },
289    };
290
291    check( $tmpl, \%hash ) or return;
292
293    ### parse the uri to usable parts ###
294    my $href    = $class->_parse_uri( $uri ) or return;
295
296    $href->{file_default} = $file_default if $file_default;
297    $href->{tempdir_root} = File::Spec->rel2abs( $tempdir_root ) if $tempdir_root;
298    $href->{tempdir_root} = File::Spec->rel2abs( Cwd::cwd      ) if not $href->{tempdir_root};
299
300    ### make it into a FFI object ###
301    my $ff      = $class->_create( %$href ) or return;
302
303
304    ### return the object ###
305    return $ff;
306}
307
308### parses an uri to a hash structure:
309###
310### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
311###
312### becomes:
313###
314### $href = {
315###     scheme  => 'ftp',
316###     host    => 'ftp.cpan.org',
317###     path    => '/pub/mirror',
318###     file    => 'index.html'
319### };
320###
321### In the case of file:// urls there maybe be additional fields
322###
323### For systems with volume specifications such as Win32 there will be
324### a volume specifier provided in the 'vol' field.
325###
326###   'vol' => 'volumename'
327###
328### For windows file shares there may be a 'share' key specified
329###
330###   'share' => 'sharename'
331###
332### Note that the rules of what a file:// url means vary by the operating system
333### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
334### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
335### not '/foo/bar.txt'
336###
337### Similarly if the host interpreting the url is VMS then
338### file:///disk$user/my/notes/note12345.txt' means
339### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
340### if it is unix where it means /disk$user/my/notes/note12345.txt'.
341### Except for some cases in the File::Spec methods, Perl on VMS will generally
342### handle UNIX format file specifications.
343###
344### This means it is impossible to serve certain file:// urls on certain systems.
345###
346### Thus are the problems with a protocol-less specification. :-(
347###
348
349sub _parse_uri {
350    my $self = shift;
351    my $uri  = shift or return;
352
353    my $href = { uri => $uri };
354
355    ### find the scheme ###
356    $uri            =~ s|^(\w+)://||;
357    $href->{scheme} = $1;
358
359    ### See rfc 1738 section 3.10
360    ### http://www.faqs.org/rfcs/rfc1738.html
361    ### And wikipedia for more on windows file:// urls
362    ### http://en.wikipedia.org/wiki/File://
363    if( $href->{scheme} eq 'file' ) {
364
365        my @parts = split '/',$uri;
366
367        ### file://hostname/...
368        ### file://hostname/...
369        ### normalize file://localhost with file:///
370        $href->{host} = $parts[0] || '';
371
372        ### index in @parts where the path components begin;
373        my $index = 1;
374
375        ### file:////hostname/sharename/blah.txt
376        if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
377
378            $href->{host}   = $parts[2] || '';  # avoid warnings
379            $href->{share}  = $parts[3] || '';  # avoid warnings
380
381            $index          = 4         # index after the share
382
383        ### file:///D|/blah.txt
384        ### file:///D:/blah.txt
385        } elsif (HAS_VOL) {
386
387            ### this code comes from dmq's patch, but:
388            ### XXX if volume is empty, wouldn't that be an error? --kane
389            ### if so, our file://localhost test needs to be fixed as wel
390            $href->{vol}    = $parts[1] || '';
391
392            ### correct D| style colume descriptors
393            $href->{vol}    =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
394
395            $index          = 2;        # index after the volume
396        }
397
398        ### rebuild the path from the leftover parts;
399        $href->{path} = join '/', '', splice( @parts, $index, $#parts );
400
401    } else {
402        ### using anything but qw() in hash slices may produce warnings
403        ### in older perls :-(
404        @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s;
405    }
406
407    ### split the path into file + dir ###
408    {   my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
409        $href->{path} = $parts[1];
410        $href->{file} = $parts[2];
411    }
412
413    ### host will be empty if the target was 'localhost' and the
414    ### scheme was 'file'
415    $href->{host} = '' if   ($href->{host}      eq 'localhost') and
416                            ($href->{scheme}    eq 'file');
417
418    return $href;
419}
420
421=head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] )
422
423Fetches the file you requested and returns the full path to the file.
424
425By default it writes to C<cwd()>, but you can override that by specifying
426the C<to> argument:
427
428    ### file fetch to /tmp, full path to the file in $where
429    $where = $ff->fetch( to => '/tmp' );
430
431    ### file slurped into $scalar, full path to the file in $where
432    ### file is downloaded to a temp directory and cleaned up at exit time
433    $where = $ff->fetch( to => \$scalar );
434
435Returns the full path to the downloaded file on success, and false
436on failure.
437
438=cut
439
440sub fetch {
441    my $self = shift or return;
442    my %hash = @_;
443
444    my $target;
445    my $tmpl = {
446        to  => { default => cwd(), store => \$target },
447    };
448
449    check( $tmpl, \%hash ) or return;
450
451    my ($to, $fh);
452    ### you want us to slurp the contents
453    if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
454        $to = tempdir( 'FileFetch.XXXXXX', DIR => $self->tempdir_root, CLEANUP => 1 );
455
456    ### plain old fetch
457    } else {
458        $to = $target;
459
460        ### On VMS force to VMS format so File::Spec will work.
461        $to = VMS::Filespec::vmspath($to) if ON_VMS;
462
463        ### create the path if it doesn't exist yet ###
464        unless( -d $to ) {
465            eval { mkpath( $to ) };
466
467            return $self->_error(loc("Could not create path '%1'",$to)) if $@;
468        }
469    }
470
471    ### set passive ftp if required ###
472    local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
473
474    ### we dont use catfile on win32 because if we are using a cygwin tool
475    ### under cmd.exe they wont understand windows style separators.
476    my $out_to = ON_WIN ? $to.'/'.$self->output_file
477                        : File::Spec->catfile( $to, $self->output_file );
478
479    for my $method ( @{ $METHODS->{$self->scheme} } ) {
480        my $sub =  '_'.$method.'_fetch';
481
482        unless( __PACKAGE__->can($sub) ) {
483            $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
484                        $method));
485            next;
486        }
487
488        ### method is blacklisted ###
489        next if grep { lc $_ eq $method } @$BLACKLIST;
490
491        ### method is known to fail ###
492        next if $METHOD_FAIL->{$method};
493
494        ### there's serious issues with IPC::Run and quoting of command
495        ### line arguments. using quotes in the wrong place breaks things,
496        ### and in the case of say,
497        ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
498        ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
499        ### it doesn't matter how you quote, it always fails.
500        local $IPC::Cmd::USE_IPC_RUN = 0;
501
502        if( my $file = $self->$sub(
503                        to => $out_to
504        )){
505
506            unless( -e $file && -s _ ) {
507                $self->_error(loc("'%1' said it fetched '%2', ".
508                     "but it was not created",$method,$file));
509
510                ### mark the failure ###
511                $METHOD_FAIL->{$method} = 1;
512
513                next;
514
515            } else {
516
517                ### slurp mode?
518                if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
519
520                    ### open the file
521                    open my $fh, "<$file" or do {
522                        $self->_error(
523                            loc("Could not open '%1': %2", $file, $!));
524                        return;
525                    };
526
527                    ### slurp
528                    $$target = do { local $/; <$fh> };
529
530                }
531
532                my $abs = File::Spec->rel2abs( $file );
533                return $abs;
534
535            }
536        }
537    }
538
539
540    ### if we got here, we looped over all methods, but we weren't able
541    ### to fetch it.
542    return;
543}
544
545########################
546### _*_fetch methods ###
547########################
548
549### LWP fetching ###
550sub _lwp_fetch {
551    my $self = shift;
552    my %hash = @_;
553
554    my ($to);
555    my $tmpl = {
556        to  => { required => 1, store => \$to }
557    };
558    check( $tmpl, \%hash ) or return;
559
560    ### modules required to download with lwp ###
561    my $use_list = {
562        LWP                 => '0.0',
563        'LWP::UserAgent'    => '0.0',
564        'HTTP::Request'     => '0.0',
565        'HTTP::Status'      => '0.0',
566        URI                 => '0.0',
567
568    };
569
570    local @INC = @INC;
571    pop @INC if $INC[-1] eq '.';
572    unless( can_load( modules => $use_list ) ) {
573        $METHOD_FAIL->{'lwp'} = 1;
574        return;
575    }
576
577    ### setup the uri object
578    my $uri = URI->new( File::Spec::Unix->catfile(
579                                $self->path, $self->file
580                    ) );
581
582    ### special rules apply for file:// uris ###
583    $uri->scheme( $self->scheme );
584    $uri->host( $self->scheme eq 'file' ? '' : $self->host );
585    $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
586
587    ### set up the useragent object
588    my $ua = LWP::UserAgent->new();
589    $ua->timeout( $TIMEOUT ) if $TIMEOUT;
590    $ua->agent( $USER_AGENT );
591    $ua->from( $FROM_EMAIL );
592    $ua->env_proxy;
593
594    my $res = $ua->mirror($uri, $to) or return;
595
596    ### uptodate or fetched ok ###
597    if ( $res->code == 304 or $res->code == 200 ) {
598        return $to;
599
600    } else {
601        return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
602                    $res->code, HTTP::Status::status_message($res->code),
603                    $res->status_line));
604    }
605
606}
607
608### HTTP::Tiny fetching ###
609sub _httptiny_fetch {
610    my $self = shift;
611    my %hash = @_;
612
613    my ($to);
614    my $tmpl = {
615        to  => { required => 1, store => \$to }
616    };
617    check( $tmpl, \%hash ) or return;
618
619    my $use_list = {
620        'HTTP::Tiny'    => '0.008',
621
622    };
623
624    local @INC = @INC;
625    pop @INC if $INC[-1] eq '.';
626    unless( can_load(modules => $use_list) ) {
627        $METHOD_FAIL->{'httptiny'} = 1;
628        return;
629    }
630
631    my $uri = $self->uri;
632
633    my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) );
634
635    my $rc = $http->mirror( $uri, $to );
636
637    unless ( $rc->{success} ) {
638
639        return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]",
640                    $rc->{status}, $rc->{reason} ) );
641
642    }
643
644    return $to;
645
646}
647
648### HTTP::Lite fetching ###
649sub _httplite_fetch {
650    my $self = shift;
651    my %hash = @_;
652
653    my ($to);
654    my $tmpl = {
655        to  => { required => 1, store => \$to }
656    };
657    check( $tmpl, \%hash ) or return;
658
659    ### modules required to download with lwp ###
660    my $use_list = {
661        'HTTP::Lite'    => '2.2',
662
663    };
664
665    local @INC = @INC;
666    pop @INC if $INC[-1] eq '.';
667    unless( can_load(modules => $use_list) ) {
668        $METHOD_FAIL->{'httplite'} = 1;
669        return;
670    }
671
672    my $uri = $self->uri;
673    my $retries = 0;
674
675    RETRIES: while ( $retries++ < 5 ) {
676
677      my $http = HTTP::Lite->new();
678      # Naughty naughty but there isn't any accessor/setter
679      $http->{timeout} = $TIMEOUT if $TIMEOUT;
680      $http->http11_mode(1);
681
682      my $fh = FileHandle->new;
683
684      unless ( $fh->open($to,'>') ) {
685        return $self->_error(loc(
686             "Could not open '%1' for writing: %2",$to,$!));
687      }
688
689      $fh->autoflush(1);
690
691      binmode $fh;
692
693      my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh );
694
695      close $fh;
696
697      if ( $rc == 301 || $rc == 302 ) {
698          my $loc;
699          HEADERS: for ($http->headers_array) {
700            /Location: (\S+)/ and $loc = $1, last HEADERS;
701          }
702          #$loc or last; # Think we should squeal here.
703          if ($loc =~ m!^/!) {
704            $uri =~ s{^(\w+?://[^/]+)/.*$}{$1};
705            $uri .= $loc;
706          }
707          else {
708            $uri = $loc;
709          }
710          next RETRIES;
711      }
712      elsif ( $rc == 200 ) {
713          return $to;
714      }
715      else {
716        return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]",
717                    $rc, $http->status_message));
718      }
719
720    } # Loop for 5 retries.
721
722    return $self->_error("Fetch failed! Gave up after 5 tries");
723
724}
725
726### Simple IO::Socket::INET fetching ###
727sub _iosock_fetch {
728    my $self = shift;
729    my %hash = @_;
730
731    my ($to);
732    my $tmpl = {
733        to  => { required => 1, store => \$to }
734    };
735    check( $tmpl, \%hash ) or return;
736
737    my $use_list = {
738        'IO::Socket::INET' => '0.0',
739        'IO::Select'       => '0.0',
740    };
741
742    local @INC = @INC;
743    pop @INC if $INC[-1] eq '.';
744    unless( can_load(modules => $use_list) ) {
745        $METHOD_FAIL->{'iosock'} = 1;
746        return;
747    }
748
749    my $sock = IO::Socket::INET->new(
750        PeerHost => $self->host,
751        ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ),
752    );
753
754    unless ( $sock ) {
755        return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!));
756    }
757
758    my $fh = FileHandle->new;
759
760    # Check open()
761
762    unless ( $fh->open($to,'>') ) {
763        return $self->_error(loc(
764             "Could not open '%1' for writing: %2",$to,$!));
765    }
766
767    $fh->autoflush(1);
768    binmode $fh;
769
770    my $path = File::Spec::Unix->catfile( $self->path, $self->file );
771    my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a";
772    $sock->send( $req );
773
774    my $select = IO::Select->new( $sock );
775
776    my $resp = '';
777    my $normal = 0;
778    while ( $select->can_read( $TIMEOUT || 60 ) ) {
779      my $ret = $sock->sysread( $resp, 4096, length($resp) );
780      if ( !defined $ret or $ret == 0 ) {
781        $select->remove( $sock );
782        $normal++;
783      }
784    }
785    close $sock;
786
787    unless ( $normal ) {
788        return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 )));
789    }
790
791    # Check the "response"
792    # Strip preceding blank lines apparently they are allowed (RFC 2616 4.1)
793    $resp =~ s/^(\x0d?\x0a)+//;
794    # Check it is an HTTP response
795    unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) {
796        return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host));
797    }
798
799    # Check for OK
800    my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i;
801    unless ( $code eq '200' ) {
802        return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host));
803    }
804
805    {
806      local $\;
807      print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0];
808    }
809    close $fh;
810    return $to;
811}
812
813### Net::FTP fetching
814sub _netftp_fetch {
815    my $self = shift;
816    my %hash = @_;
817
818    my ($to);
819    my $tmpl = {
820        to  => { required => 1, store => \$to }
821    };
822    check( $tmpl, \%hash ) or return;
823
824    ### required modules ###
825    local @INC = @INC;
826    pop @INC if $INC[-1] eq '.';
827    my $use_list = { 'Net::FTP' => 0 };
828
829    unless( can_load( modules => $use_list ) ) {
830        $METHOD_FAIL->{'netftp'} = 1;
831        return;
832    }
833
834    ### make connection ###
835    my $ftp;
836    my @options = ($self->host);
837    push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
838    unless( $ftp = Net::FTP->new( @options ) ) {
839        return $self->_error(loc("Ftp creation failed: %1",$@));
840    }
841
842    ### login ###
843    unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
844        return $self->_error(loc("Could not login to '%1'",$self->host));
845    }
846
847    ### set binary mode, just in case ###
848    $ftp->binary;
849
850    ### create the remote path
851    ### remember remote paths are unix paths! [#11483]
852    my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
853
854    ### fetch the file ###
855    my $target;
856    unless( $target = $ftp->get( $remote, $to ) ) {
857        return $self->_error(loc("Could not fetch '%1' from '%2'",
858                    $remote, $self->host));
859    }
860
861    ### log out ###
862    $ftp->quit;
863
864    return $target;
865
866}
867
868### /bin/wget fetch ###
869sub _wget_fetch {
870    my $self = shift;
871    my %hash = @_;
872
873    my ($to);
874    my $tmpl = {
875        to  => { required => 1, store => \$to }
876    };
877    check( $tmpl, \%hash ) or return;
878
879    my $wget;
880    ### see if we have a wget binary ###
881    unless( $wget = can_run('wget') ) {
882        $METHOD_FAIL->{'wget'} = 1;
883        return;
884    }
885
886    ### no verboseness, thanks ###
887    my $cmd = [ $wget, '--quiet' ];
888
889    ### if a timeout is set, add it ###
890    push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
891
892    ### run passive if specified ###
893    push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
894
895    ### set the output document, add the uri ###
896    push @$cmd, '--output-document', $to, $self->uri;
897
898    ### with IPC::Cmd > 0.41, this is fixed in teh library,
899    ### and there's no need for special casing any more.
900    ### DO NOT quote things for IPC::Run, it breaks stuff.
901    # $IPC::Cmd::USE_IPC_RUN
902    #    ? ($to, $self->uri)
903    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
904
905    ### shell out ###
906    my $captured;
907    unless(run( command => $cmd,
908                buffer  => \$captured,
909                verbose => $DEBUG
910    )) {
911        ### wget creates the output document always, even if the fetch
912        ### fails.. so unlink it in that case
913        1 while unlink $to;
914
915        return $self->_error(loc( "Command failed: %1", $captured || '' ));
916    }
917
918    return $to;
919}
920
921### /bin/lftp fetch ###
922sub _lftp_fetch {
923    my $self = shift;
924    my %hash = @_;
925
926    my ($to);
927    my $tmpl = {
928        to  => { required => 1, store => \$to }
929    };
930    check( $tmpl, \%hash ) or return;
931
932    ### see if we have a lftp binary ###
933    my $lftp;
934    unless( $lftp = can_run('lftp') ) {
935        $METHOD_FAIL->{'lftp'} = 1;
936        return;
937    }
938
939    ### no verboseness, thanks ###
940    my $cmd = [ $lftp, '-f' ];
941
942    my $fh = File::Temp->new;
943
944    my $str;
945
946    ### if a timeout is set, add it ###
947    $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
948
949    ### run passive if specified ###
950    $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
951
952    ### set the output document, add the uri ###
953    ### quote the URI, because lftp supports certain shell
954    ### expansions, most notably & for backgrounding.
955    ### ' quote does nto work, must be "
956    $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
957
958    if( $DEBUG ) {
959        my $pp_str = join ' ', split $/, $str;
960        print "# lftp command: $pp_str\n";
961    }
962
963    ### write straight to the file.
964    $fh->autoflush(1);
965    print $fh $str;
966
967    ### the command needs to be 1 string to be executed
968    push @$cmd, $fh->filename;
969
970    ### with IPC::Cmd > 0.41, this is fixed in teh library,
971    ### and there's no need for special casing any more.
972    ### DO NOT quote things for IPC::Run, it breaks stuff.
973    # $IPC::Cmd::USE_IPC_RUN
974    #    ? ($to, $self->uri)
975    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
976
977
978    ### shell out ###
979    my $captured;
980    unless(run( command => $cmd,
981                buffer  => \$captured,
982                verbose => $DEBUG
983    )) {
984        ### wget creates the output document always, even if the fetch
985        ### fails.. so unlink it in that case
986        1 while unlink $to;
987
988        return $self->_error(loc( "Command failed: %1", $captured || '' ));
989    }
990
991    return $to;
992}
993
994
995
996### /bin/ftp fetch ###
997sub _ftp_fetch {
998    my $self = shift;
999    my %hash = @_;
1000
1001    my ($to);
1002    my $tmpl = {
1003        to  => { required => 1, store => \$to }
1004    };
1005    check( $tmpl, \%hash ) or return;
1006
1007    ### see if we have a ftp binary ###
1008    my $ftp;
1009    unless( $ftp = can_run('ftp') ) {
1010        $METHOD_FAIL->{'ftp'} = 1;
1011        return;
1012    }
1013
1014    my $fh = FileHandle->new;
1015
1016    local $SIG{CHLD} = 'IGNORE';
1017
1018    unless ($fh->open("$ftp -n", '|-')) {
1019        return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
1020    }
1021
1022    my @dialog = (
1023        "lcd " . dirname($to),
1024        "open " . $self->host,
1025        "user anonymous $FROM_EMAIL",
1026        "cd /",
1027        "cd " . $self->path,
1028        "binary",
1029        "get " . $self->file . " " . $self->output_file,
1030        "quit",
1031    );
1032
1033    foreach (@dialog) { $fh->print($_, "\n") }
1034    $fh->close or return;
1035
1036    return $to;
1037}
1038
1039### lynx is stupid - it decompresses any .gz file it finds to be text
1040### use /bin/lynx to fetch files
1041sub _lynx_fetch {
1042    my $self = shift;
1043    my %hash = @_;
1044
1045    my ($to);
1046    my $tmpl = {
1047        to  => { required => 1, store => \$to }
1048    };
1049    check( $tmpl, \%hash ) or return;
1050
1051    ### see if we have a lynx binary ###
1052    my $lynx;
1053    unless ( $lynx = can_run('lynx') ){
1054        $METHOD_FAIL->{'lynx'} = 1;
1055        return;
1056    }
1057
1058    unless( IPC::Cmd->can_capture_buffer ) {
1059        $METHOD_FAIL->{'lynx'} = 1;
1060
1061        return $self->_error(loc(
1062            "Can not capture buffers. Can not use '%1' to fetch files",
1063            'lynx' ));
1064    }
1065
1066    ### check if the HTTP resource exists ###
1067    if ($self->uri =~ /^https?:\/\//i) {
1068        my $cmd = [
1069            $lynx,
1070            '-head',
1071            '-source',
1072            "-auth=anonymous:$FROM_EMAIL",
1073        ];
1074
1075        push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1076
1077        push @$cmd, $self->uri;
1078
1079        ### shell out ###
1080        my $head;
1081        unless(run( command => $cmd,
1082                    buffer  => \$head,
1083                    verbose => $DEBUG )
1084        ) {
1085            return $self->_error(loc("Command failed: %1", $head || ''));
1086        }
1087
1088        unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
1089            return $self->_error(loc("Command failed: %1", $head || ''));
1090        }
1091    }
1092
1093    ### write to the output file ourselves, since lynx ass_u_mes to much
1094    my $local = FileHandle->new( $to, 'w' )
1095                    or return $self->_error(loc(
1096                        "Could not open '%1' for writing: %2",$to,$!));
1097
1098    ### dump to stdout ###
1099    my $cmd = [
1100        $lynx,
1101        '-source',
1102        "-auth=anonymous:$FROM_EMAIL",
1103    ];
1104
1105    push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1106
1107    ### DO NOT quote things for IPC::Run, it breaks stuff.
1108    push @$cmd, $self->uri;
1109
1110    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1111    ### and there's no need for special casing any more.
1112    ### DO NOT quote things for IPC::Run, it breaks stuff.
1113    # $IPC::Cmd::USE_IPC_RUN
1114    #    ? $self->uri
1115    #    : QUOTE. $self->uri .QUOTE;
1116
1117
1118    ### shell out ###
1119    my $captured;
1120    unless(run( command => $cmd,
1121                buffer  => \$captured,
1122                verbose => $DEBUG )
1123    ) {
1124        return $self->_error(loc("Command failed: %1", $captured || ''));
1125    }
1126
1127    ### print to local file ###
1128    ### XXX on a 404 with a special error page, $captured will actually
1129    ### hold the contents of that page, and make it *appear* like the
1130    ### request was a success, when really it wasn't :(
1131    ### there doesn't seem to be an option for lynx to change the exit
1132    ### code based on a 4XX status or so.
1133    ### the closest we can come is using --error_file and parsing that,
1134    ### which is very unreliable ;(
1135    $local->print( $captured );
1136    $local->close or return;
1137
1138    return $to;
1139}
1140
1141### use /bin/ncftp to fetch files
1142sub _ncftp_fetch {
1143    my $self = shift;
1144    my %hash = @_;
1145
1146    my ($to);
1147    my $tmpl = {
1148        to  => { required => 1, store => \$to }
1149    };
1150    check( $tmpl, \%hash ) or return;
1151
1152    ### we can only set passive mode in interactive sessions, so bail out
1153    ### if $FTP_PASSIVE is set
1154    return if $FTP_PASSIVE;
1155
1156    ### see if we have a ncftp binary ###
1157    my $ncftp;
1158    unless( $ncftp = can_run('ncftp') ) {
1159        $METHOD_FAIL->{'ncftp'} = 1;
1160        return;
1161    }
1162
1163    my $cmd = [
1164        $ncftp,
1165        '-V',                   # do not be verbose
1166        '-p', $FROM_EMAIL,      # email as password
1167        $self->host,            # hostname
1168        dirname($to),           # local dir for the file
1169                                # remote path to the file
1170        ### DO NOT quote things for IPC::Run, it breaks stuff.
1171        $IPC::Cmd::USE_IPC_RUN
1172                    ? File::Spec::Unix->catdir( $self->path, $self->file )
1173                    : QUOTE. File::Spec::Unix->catdir(
1174                                    $self->path, $self->file ) .QUOTE
1175
1176    ];
1177
1178    ### shell out ###
1179    my $captured;
1180    unless(run( command => $cmd,
1181                buffer  => \$captured,
1182                verbose => $DEBUG )
1183    ) {
1184        return $self->_error(loc("Command failed: %1", $captured || ''));
1185    }
1186
1187    return $to;
1188
1189}
1190
1191### use /bin/curl to fetch files
1192sub _curl_fetch {
1193    my $self = shift;
1194    my %hash = @_;
1195
1196    my ($to);
1197    my $tmpl = {
1198        to  => { required => 1, store => \$to }
1199    };
1200    check( $tmpl, \%hash ) or return;
1201    my $curl;
1202    unless ( $curl = can_run('curl') ) {
1203        $METHOD_FAIL->{'curl'} = 1;
1204        return;
1205    }
1206
1207    ### these long opts are self explanatory - I like that -jmb
1208    my $cmd = [ $curl, '-q' ];
1209
1210    push(@$cmd, '-4') if $^O eq 'netbsd' && $FORCEIPV4; # only seen this on NetBSD so far
1211
1212    push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
1213
1214    push(@$cmd, '--silent') unless $DEBUG;
1215
1216    ### curl does the right thing with passive, regardless ###
1217    if ($self->scheme eq 'ftp') {
1218        push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
1219    }
1220
1221    ### curl doesn't follow 302 (temporarily moved) etc automatically
1222    ### so we add --location to enable that.
1223    push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
1224
1225    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1226    ### and there's no need for special casing any more.
1227    ### DO NOT quote things for IPC::Run, it breaks stuff.
1228    # $IPC::Cmd::USE_IPC_RUN
1229    #    ? ($to, $self->uri)
1230    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1231
1232
1233    my $captured;
1234    unless(run( command => $cmd,
1235                buffer  => \$captured,
1236                verbose => $DEBUG )
1237    ) {
1238
1239        return $self->_error(loc("Command failed: %1", $captured || ''));
1240    }
1241
1242    return $to;
1243
1244}
1245
1246### /usr/bin/fetch fetch! ###
1247sub _fetch_fetch {
1248    my $self = shift;
1249    my %hash = @_;
1250
1251    my ($to);
1252    my $tmpl = {
1253        to  => { required => 1, store => \$to }
1254    };
1255    check( $tmpl, \%hash ) or return;
1256
1257    ### see if we have a fetch binary ###
1258    my $fetch;
1259    unless( HAS_FETCH and $fetch = can_run('fetch') ) {
1260        $METHOD_FAIL->{'fetch'} = 1;
1261        return;
1262    }
1263
1264    ### no verboseness, thanks ###
1265    my $cmd = [ $fetch, '-q' ];
1266
1267    ### if a timeout is set, add it ###
1268    push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT;
1269
1270    ### run passive if specified ###
1271    #push @$cmd, '-p' if $FTP_PASSIVE;
1272    local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE;
1273
1274    ### set the output document, add the uri ###
1275    push @$cmd, '-o', $to, $self->uri;
1276
1277    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1278    ### and there's no need for special casing any more.
1279    ### DO NOT quote things for IPC::Run, it breaks stuff.
1280    # $IPC::Cmd::USE_IPC_RUN
1281    #    ? ($to, $self->uri)
1282    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1283
1284    ### shell out ###
1285    my $captured;
1286    unless(run( command => $cmd,
1287                buffer  => \$captured,
1288                verbose => $DEBUG
1289    )) {
1290        ### wget creates the output document always, even if the fetch
1291        ### fails.. so unlink it in that case
1292        1 while unlink $to;
1293
1294        return $self->_error(loc( "Command failed: %1", $captured || '' ));
1295    }
1296
1297    return $to;
1298}
1299
1300### use File::Copy for fetching file:// urls ###
1301###
1302### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
1303### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
1304###
1305
1306sub _file_fetch {
1307    my $self = shift;
1308    my %hash = @_;
1309
1310    my ($to);
1311    my $tmpl = {
1312        to  => { required => 1, store => \$to }
1313    };
1314    check( $tmpl, \%hash ) or return;
1315
1316
1317
1318    ### prefix a / on unix systems with a file uri, since it would
1319    ### look somewhat like this:
1320    ###     file:///home/kane/file
1321    ### whereas windows file uris for 'c:\some\dir\file' might look like:
1322    ###     file:///C:/some/dir/file
1323    ###     file:///C|/some/dir/file
1324    ### or for a network share '\\host\share\some\dir\file':
1325    ###     file:////host/share/some/dir/file
1326    ###
1327    ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
1328    ###     file://vms.host.edu/disk$user/my/notes/note12345.txt
1329    ###
1330
1331    my $path    = $self->path;
1332    my $vol     = $self->vol;
1333    my $share   = $self->share;
1334
1335    my $remote;
1336    if (!$share and $self->host) {
1337        return $self->_error(loc(
1338            "Currently %1 cannot handle hosts in %2 urls",
1339            'File::Fetch', 'file://'
1340        ));
1341    }
1342
1343    if( $vol ) {
1344        $path   = File::Spec->catdir( split /\//, $path );
1345        $remote = File::Spec->catpath( $vol, $path, $self->file);
1346
1347    } elsif( $share ) {
1348        ### win32 specific, and a share name, so we wont bother with File::Spec
1349        $path   =~ s|/+|\\|g;
1350        $remote = "\\\\".$self->host."\\$share\\$path";
1351
1352    } else {
1353        ### File::Spec on VMS can not currently handle UNIX syntax.
1354        my $file_class = ON_VMS
1355            ? 'File::Spec::Unix'
1356            : 'File::Spec';
1357
1358        $remote  = $file_class->catfile( $path, $self->file );
1359    }
1360
1361    ### File::Copy is littered with 'die' statements :( ###
1362    my $rv = eval { File::Copy::copy( $remote, $to ) };
1363
1364    ### something went wrong ###
1365    if( !$rv or $@ ) {
1366        return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
1367                             $remote, $to, $!, $@));
1368    }
1369
1370    return $to;
1371}
1372
1373### use /usr/bin/rsync to fetch files
1374sub _rsync_fetch {
1375    my $self = shift;
1376    my %hash = @_;
1377
1378    my ($to);
1379    my $tmpl = {
1380        to  => { required => 1, store => \$to }
1381    };
1382    check( $tmpl, \%hash ) or return;
1383    my $rsync;
1384    unless ( $rsync = can_run('rsync') ) {
1385        $METHOD_FAIL->{'rsync'} = 1;
1386        return;
1387    }
1388
1389    my $cmd = [ $rsync ];
1390
1391    ### XXX: rsync has no I/O timeouts at all, by default
1392    push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1393
1394    push(@$cmd, '--quiet') unless $DEBUG;
1395
1396    ### DO NOT quote things for IPC::Run, it breaks stuff.
1397    push @$cmd, $self->uri, $to;
1398
1399    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1400    ### and there's no need for special casing any more.
1401    ### DO NOT quote things for IPC::Run, it breaks stuff.
1402    # $IPC::Cmd::USE_IPC_RUN
1403    #    ? ($to, $self->uri)
1404    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1405
1406    my $captured;
1407    unless(run( command => $cmd,
1408                buffer  => \$captured,
1409                verbose => $DEBUG )
1410    ) {
1411
1412        return $self->_error(loc("Command %1 failed: %2",
1413            "@$cmd" || '', $captured || ''));
1414    }
1415
1416    return $to;
1417
1418}
1419
1420### use git to fetch files
1421sub _git_fetch {
1422    my $self = shift;
1423    my %hash = @_;
1424
1425    my ($to);
1426    my $tmpl = {
1427        to  => { required => 1, store => \$to }
1428    };
1429    check( $tmpl, \%hash ) or return;
1430    my $git;
1431    unless ( $git = can_run('git') ) {
1432        $METHOD_FAIL->{'git'} = 1;
1433        return;
1434    }
1435
1436    my $cmd = [ $git, 'clone' ];
1437
1438    #push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1439
1440    push(@$cmd, '--quiet') unless $DEBUG;
1441
1442    ### DO NOT quote things for IPC::Run, it breaks stuff.
1443    push @$cmd, $self->uri, $to;
1444
1445    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1446    ### and there's no need for special casing any more.
1447    ### DO NOT quote things for IPC::Run, it breaks stuff.
1448    # $IPC::Cmd::USE_IPC_RUN
1449    #    ? ($to, $self->uri)
1450    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1451
1452    my $captured;
1453    unless(run( command => $cmd,
1454                buffer  => \$captured,
1455                verbose => $DEBUG )
1456    ) {
1457
1458        return $self->_error(loc("Command %1 failed: %2",
1459            "@$cmd" || '', $captured || ''));
1460    }
1461
1462    return $to;
1463
1464}
1465
1466#################################
1467#
1468# Error code
1469#
1470#################################
1471
1472=pod
1473
1474=head2 $ff->error([BOOL])
1475
1476Returns the last encountered error as string.
1477Pass it a true value to get the C<Carp::longmess()> output instead.
1478
1479=cut
1480
1481### error handling the way Archive::Extract does it
1482sub _error {
1483    my $self    = shift;
1484    my $error   = shift;
1485
1486    $self->_error_msg( $error );
1487    $self->_error_msg_long( Carp::longmess($error) );
1488
1489    if( $WARN ) {
1490        carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1491    }
1492
1493    return;
1494}
1495
1496sub error {
1497    my $self = shift;
1498    return shift() ? $self->_error_msg_long : $self->_error_msg;
1499}
1500
1501
15021;
1503
1504=pod
1505
1506=head1 HOW IT WORKS
1507
1508File::Fetch is able to fetch a variety of uris, by using several
1509external programs and modules.
1510
1511Below is a mapping of what utilities will be used in what order
1512for what schemes, if available:
1513
1514    file    => LWP, lftp, file
1515    http    => LWP, HTTP::Lite, wget, curl, lftp, fetch, lynx, iosock
1516    ftp     => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp
1517    rsync   => rsync
1518    git     => git
1519
1520If you'd like to disable the use of one or more of these utilities
1521and/or modules, see the C<$BLACKLIST> variable further down.
1522
1523If a utility or module isn't available, it will be marked in a cache
1524(see the C<$METHOD_FAIL> variable further down), so it will not be
1525tried again. The C<fetch> method will only fail when all options are
1526exhausted, and it was not able to retrieve the file.
1527
1528The C<fetch> utility is available on FreeBSD. NetBSD and Dragonfly BSD
1529may also have it from C<pkgsrc>. We only check for C<fetch> on those
1530three platforms.
1531
1532C<iosock> is a very limited L<IO::Socket::INET> based mechanism for
1533retrieving C<http> schemed urls. It doesn't follow redirects for instance.
1534
1535C<git> only supports C<git://> style urls.
1536
1537A special note about fetching files from an ftp uri:
1538
1539By default, all ftp connections are done in passive mode. To change
1540that, see the C<$FTP_PASSIVE> variable further down.
1541
1542Furthermore, ftp uris only support anonymous connections, so no
1543named user/password pair can be passed along.
1544
1545C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
1546further down.
1547
1548=head1 GLOBAL VARIABLES
1549
1550The behaviour of File::Fetch can be altered by changing the following
1551global variables:
1552
1553=head2 $File::Fetch::FROM_EMAIL
1554
1555This is the email address that will be sent as your anonymous ftp
1556password.
1557
1558Default is C<File-Fetch@example.com>.
1559
1560=head2 $File::Fetch::USER_AGENT
1561
1562This is the useragent as C<LWP> will report it.
1563
1564Default is C<File::Fetch/$VERSION>.
1565
1566=head2 $File::Fetch::FTP_PASSIVE
1567
1568This variable controls whether the environment variable C<FTP_PASSIVE>
1569and any passive switches to commandline tools will be set to true.
1570
1571Default value is 1.
1572
1573Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1574files, since passive mode can only be set interactively for this binary
1575
1576=head2 $File::Fetch::TIMEOUT
1577
1578When set, controls the network timeout (counted in seconds).
1579
1580Default value is 0.
1581
1582=head2 $File::Fetch::WARN
1583
1584This variable controls whether errors encountered internally by
1585C<File::Fetch> should be C<carp>'d or not.
1586
1587Set to false to silence warnings. Inspect the output of the C<error()>
1588method manually to see what went wrong.
1589
1590Defaults to C<true>.
1591
1592=head2 $File::Fetch::DEBUG
1593
1594This enables debugging output when calling commandline utilities to
1595fetch files.
1596This also enables C<Carp::longmess> errors, instead of the regular
1597C<carp> errors.
1598
1599Good for tracking down why things don't work with your particular
1600setup.
1601
1602Default is 0.
1603
1604=head2 $File::Fetch::BLACKLIST
1605
1606This is an array ref holding blacklisted modules/utilities for fetching
1607files with.
1608
1609To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1610set $File::Fetch::BLACKLIST to:
1611
1612    $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1613
1614The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1615
1616See the note on C<MAPPING> below.
1617
1618=head2 $File::Fetch::METHOD_FAIL
1619
1620This is a hashref registering what modules/utilities were known to fail
1621for fetching files (mostly because they weren't installed).
1622
1623You can reset this cache by assigning an empty hashref to it, or
1624individually remove keys.
1625
1626See the note on C<MAPPING> below.
1627
1628=head1 MAPPING
1629
1630
1631Here's a quick mapping for the utilities/modules, and their names for
1632the $BLACKLIST, $METHOD_FAIL and other internal functions.
1633
1634    LWP         => lwp
1635    HTTP::Lite  => httplite
1636    HTTP::Tiny  => httptiny
1637    Net::FTP    => netftp
1638    wget        => wget
1639    lynx        => lynx
1640    ncftp       => ncftp
1641    ftp         => ftp
1642    curl        => curl
1643    rsync       => rsync
1644    lftp        => lftp
1645    fetch       => fetch
1646    IO::Socket  => iosock
1647
1648=head1 FREQUENTLY ASKED QUESTIONS
1649
1650=head2 So how do I use a proxy with File::Fetch?
1651
1652C<File::Fetch> currently only supports proxies with LWP::UserAgent.
1653You will need to set your environment variables accordingly. For
1654example, to use an ftp proxy:
1655
1656    $ENV{ftp_proxy} = 'foo.com';
1657
1658Refer to the LWP::UserAgent manpage for more details.
1659
1660=head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1661
1662C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1663which we in turn capture. If that content is a 'custom' error file
1664(like, say, a C<404 handler>), you will get that contents instead.
1665
1666Sadly, C<lynx> doesn't support any options to return a different exit
1667code on non-C<200 OK> status, giving us no way to tell the difference
1668between a 'successful' fetch and a custom error page.
1669
1670Therefor, we recommend to only use C<lynx> as a last resort. This is
1671why it is at the back of our list of methods to try as well.
1672
1673=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1674
1675C<File::Fetch> is relatively smart about things. When trying to write
1676a file to disk, it removes the C<query parameters> (see the
1677C<output_file> method for details) from the file name before creating
1678it. In most cases this suffices.
1679
1680If you have any other characters you need to escape, please install
1681the C<URI::Escape> module from CPAN, and pre-encode your URI before
1682passing it to C<File::Fetch>. You can read about the details of URIs
1683and URI encoding here:
1684
1685  http://www.faqs.org/rfcs/rfc2396.html
1686
1687=head1 TODO
1688
1689=over 4
1690
1691=item Implement $PREFER_BIN
1692
1693To indicate to rather use commandline tools than modules
1694
1695=back
1696
1697=head1 BUG REPORTS
1698
1699Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1700
1701=head1 AUTHOR
1702
1703This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1704
1705=head1 COPYRIGHT
1706
1707This library is free software; you may redistribute and/or modify it
1708under the same terms as Perl itself.
1709
1710
1711=cut
1712
1713# Local variables:
1714# c-indentation-style: bsd
1715# c-basic-offset: 4
1716# indent-tabs-mode: nil
1717# End:
1718# vim: expandtab shiftwidth=4:
1719
1720
1721
1722
1723