xref: /openbsd-src/gnu/usr.bin/perl/cpan/File-Fetch/lib/File/Fetch.pm (revision 91f110e064cd7c194e59e019b83bb7496c1c84d4)
1package File::Fetch;
2
3use strict;
4use FileHandle;
5use File::Temp;
6use File::Copy;
7use File::Spec;
8use File::Spec::Unix;
9use File::Basename              qw[dirname];
10
11use Cwd                         qw[cwd];
12use Carp                        qw[carp];
13use IPC::Cmd                    qw[can_run run QUOTE];
14use File::Path                  qw[mkpath];
15use File::Temp                  qw[tempdir];
16use Params::Check               qw[check];
17use Module::Load::Conditional   qw[can_load];
18use Locale::Maketext::Simple    Style => 'gettext';
19
20use vars    qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
21                $BLACKLIST $METHOD_FAIL $VERSION $METHODS
22                $FTP_PASSIVE $TIMEOUT $DEBUG $WARN
23            ];
24
25$VERSION        = '0.38';
26$VERSION        = eval $VERSION;    # avoid warnings with development releases
27$PREFER_BIN     = 0;                # XXX TODO implement
28$FROM_EMAIL     = 'File-Fetch@example.com';
29$USER_AGENT     = "File::Fetch/$VERSION";
30$BLACKLIST      = [qw|ftp|];
31$METHOD_FAIL    = { };
32$FTP_PASSIVE    = 1;
33$TIMEOUT        = 0;
34$DEBUG          = 0;
35$WARN           = 1;
36
37### methods available to fetch the file depending on the scheme
38$METHODS = {
39    http    => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ],
40    ftp     => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ],
41    file    => [ qw|lwp lftp file| ],
42    rsync   => [ qw|rsync| ]
43};
44
45### silly warnings ###
46local $Params::Check::VERBOSE               = 1;
47local $Params::Check::VERBOSE               = 1;
48local $Module::Load::Conditional::VERBOSE   = 0;
49local $Module::Load::Conditional::VERBOSE   = 0;
50
51### see what OS we are on, important for file:// uris ###
52use constant ON_WIN     => ($^O eq 'MSWin32');
53use constant ON_VMS     => ($^O eq 'VMS');
54use constant ON_UNIX    => (!ON_WIN);
55use constant HAS_VOL    => (ON_WIN);
56use constant HAS_SHARE  => (ON_WIN);
57use constant HAS_FETCH  => ( $^O =~ m!^(freebsd|netbsd|dragonfly)$! );
58
59=pod
60
61=head1 NAME
62
63File::Fetch - A generic file fetching mechanism
64
65=head1 SYNOPSIS
66
67    use File::Fetch;
68
69    ### build a File::Fetch object ###
70    my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
71
72    ### fetch the uri to cwd() ###
73    my $where = $ff->fetch() or die $ff->error;
74
75    ### fetch the uri to /tmp ###
76    my $where = $ff->fetch( to => '/tmp' );
77
78    ### parsed bits from the uri ###
79    $ff->uri;
80    $ff->scheme;
81    $ff->host;
82    $ff->path;
83    $ff->file;
84
85=head1 DESCRIPTION
86
87File::Fetch is a generic file fetching mechanism.
88
89It allows you to fetch any file pointed to by a C<ftp>, C<http>,
90C<file>, or C<rsync> uri by a number of different means.
91
92See the C<HOW IT WORKS> section further down for details.
93
94=head1 ACCESSORS
95
96A C<File::Fetch> object has the following accessors
97
98=over 4
99
100=item $ff->uri
101
102The uri you passed to the constructor
103
104=item $ff->scheme
105
106The scheme from the uri (like 'file', 'http', etc)
107
108=item $ff->host
109
110The hostname in the uri.  Will be empty if host was originally
111'localhost' for a 'file://' url.
112
113=item $ff->vol
114
115On operating systems with the concept of a volume the second element
116of a file:// is considered to the be volume specification for the file.
117Thus on Win32 this routine returns the volume, on other operating
118systems this returns nothing.
119
120On Windows this value may be empty if the uri is to a network share, in
121which case the 'share' property will be defined. Additionally, volume
122specifications that use '|' as ':' will be converted on read to use ':'.
123
124On VMS, which has a volume concept, this field will be empty because VMS
125file specifications are converted to absolute UNIX format and the volume
126information is transparently included.
127
128=item $ff->share
129
130On systems with the concept of a network share (currently only Windows) returns
131the sharename from a file://// url.  On other operating systems returns empty.
132
133=item $ff->path
134
135The path from the uri, will be at least a single '/'.
136
137=item $ff->file
138
139The name of the remote file. For the local file name, the
140result of $ff->output_file will be used.
141
142=item $ff->file_default
143
144The name of the default local file, that $ff->output_file falls back to if
145it would otherwise return no filename. For example when fetching a URI like
146http://www.abc.net.au/ the contents retrieved may be from a remote file called
147'index.html'. The default value of this attribute is literally 'file_default'.
148
149=cut
150
151
152##########################
153### Object & Accessors ###
154##########################
155
156{
157    ### template for autogenerated accessors ###
158    my $Tmpl = {
159        scheme          => { default => 'http' },
160        host            => { default => 'localhost' },
161        path            => { default => '/' },
162        file            => { required => 1 },
163        uri             => { required => 1 },
164        vol             => { default => '' }, # windows for file:// uris
165        share           => { default => '' }, # windows for file:// uris
166        file_default    => { default => 'file_default' },
167        tempdir_root    => { required => 1 }, # Should be lazy-set at ->new()
168        _error_msg      => { no_override => 1 },
169        _error_msg_long => { no_override => 1 },
170    };
171
172    for my $method ( keys %$Tmpl ) {
173        no strict 'refs';
174        *$method = sub {
175                        my $self = shift;
176                        $self->{$method} = $_[0] if @_;
177                        return $self->{$method};
178                    }
179    }
180
181    sub _create {
182        my $class = shift;
183        my %hash  = @_;
184
185        my $args = check( $Tmpl, \%hash ) or return;
186
187        bless $args, $class;
188
189        if( lc($args->scheme) ne 'file' and not $args->host ) {
190            return $class->_error(loc(
191                "Hostname required when fetching from '%1'",$args->scheme));
192        }
193
194        for (qw[path]) {
195            unless( $args->$_() ) { # 5.5.x needs the ()
196                return $class->_error(loc("No '%1' specified",$_));
197            }
198        }
199
200        return $args;
201    }
202}
203
204=item $ff->output_file
205
206The name of the output file. This is the same as $ff->file,
207but any query parameters are stripped off. For example:
208
209    http://example.com/index.html?x=y
210
211would make the output file be C<index.html> rather than
212C<index.html?x=y>.
213
214=back
215
216=cut
217
218sub output_file {
219    my $self = shift;
220    my $file = $self->file;
221
222    $file =~ s/\?.*$//g;
223
224    $file ||= $self->file_default;
225
226    return $file;
227}
228
229### XXX do this or just point to URI::Escape?
230# =head2 $esc_uri = $ff->escaped_uri
231#
232# =cut
233#
234# ### most of this is stolen straight from URI::escape
235# {   ### Build a char->hex map
236#     my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
237#
238#     sub escaped_uri {
239#         my $self = shift;
240#         my $uri  = $self->uri;
241#
242#         ### Default unsafe characters.  RFC 2732 ^(uric - reserved)
243#         $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
244#                     $escapes{$1} || $self->_fail_hi($1)/ge;
245#
246#         return $uri;
247#     }
248#
249#     sub _fail_hi {
250#         my $self = shift;
251#         my $char = shift;
252#
253#         $self->_error(loc(
254#             "Can't escape '%1', try using the '%2' module instead",
255#             sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
256#         ));
257#     }
258#
259#     sub output_file {
260#
261#     }
262#
263#
264# }
265
266=head1 METHODS
267
268=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
269
270Parses the uri and creates a corresponding File::Fetch::Item object,
271that is ready to be C<fetch>ed and returns it.
272
273Returns false on failure.
274
275=cut
276
277sub new {
278    my $class = shift;
279    my %hash  = @_;
280
281    my ($uri, $file_default, $tempdir_root);
282    my $tmpl = {
283        uri          => { required => 1, store => \$uri },
284        file_default => { required => 0, store => \$file_default },
285        tempdir_root => { required => 0, store => \$tempdir_root },
286    };
287
288    check( $tmpl, \%hash ) or return;
289
290    ### parse the uri to usable parts ###
291    my $href    = $class->_parse_uri( $uri ) or return;
292
293    $href->{file_default} = $file_default if $file_default;
294    $href->{tempdir_root} = File::Spec->rel2abs( $tempdir_root ) if $tempdir_root;
295    $href->{tempdir_root} = File::Spec->rel2abs( Cwd::cwd      ) if not $href->{tempdir_root};
296
297    ### make it into a FFI object ###
298    my $ff      = $class->_create( %$href ) or return;
299
300
301    ### return the object ###
302    return $ff;
303}
304
305### parses an uri to a hash structure:
306###
307### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
308###
309### becomes:
310###
311### $href = {
312###     scheme  => 'ftp',
313###     host    => 'ftp.cpan.org',
314###     path    => '/pub/mirror',
315###     file    => 'index.html'
316### };
317###
318### In the case of file:// urls there maybe be additional fields
319###
320### For systems with volume specifications such as Win32 there will be
321### a volume specifier provided in the 'vol' field.
322###
323###   'vol' => 'volumename'
324###
325### For windows file shares there may be a 'share' key specified
326###
327###   'share' => 'sharename'
328###
329### Note that the rules of what a file:// url means vary by the operating system
330### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
331### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
332### not '/foo/bar.txt'
333###
334### Similarly if the host interpreting the url is VMS then
335### file:///disk$user/my/notes/note12345.txt' means
336### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
337### if it is unix where it means /disk$user/my/notes/note12345.txt'.
338### Except for some cases in the File::Spec methods, Perl on VMS will generally
339### handle UNIX format file specifications.
340###
341### This means it is impossible to serve certain file:// urls on certain systems.
342###
343### Thus are the problems with a protocol-less specification. :-(
344###
345
346sub _parse_uri {
347    my $self = shift;
348    my $uri  = shift or return;
349
350    my $href = { uri => $uri };
351
352    ### find the scheme ###
353    $uri            =~ s|^(\w+)://||;
354    $href->{scheme} = $1;
355
356    ### See rfc 1738 section 3.10
357    ### http://www.faqs.org/rfcs/rfc1738.html
358    ### And wikipedia for more on windows file:// urls
359    ### http://en.wikipedia.org/wiki/File://
360    if( $href->{scheme} eq 'file' ) {
361
362        my @parts = split '/',$uri;
363
364        ### file://hostname/...
365        ### file://hostname/...
366        ### normalize file://localhost with file:///
367        $href->{host} = $parts[0] || '';
368
369        ### index in @parts where the path components begin;
370        my $index = 1;
371
372        ### file:////hostname/sharename/blah.txt
373        if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
374
375            $href->{host}   = $parts[2] || '';  # avoid warnings
376            $href->{share}  = $parts[3] || '';  # avoid warnings
377
378            $index          = 4         # index after the share
379
380        ### file:///D|/blah.txt
381        ### file:///D:/blah.txt
382        } elsif (HAS_VOL) {
383
384            ### this code comes from dmq's patch, but:
385            ### XXX if volume is empty, wouldn't that be an error? --kane
386            ### if so, our file://localhost test needs to be fixed as wel
387            $href->{vol}    = $parts[1] || '';
388
389            ### correct D| style colume descriptors
390            $href->{vol}    =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
391
392            $index          = 2;        # index after the volume
393        }
394
395        ### rebuild the path from the leftover parts;
396        $href->{path} = join '/', '', splice( @parts, $index, $#parts );
397
398    } else {
399        ### using anything but qw() in hash slices may produce warnings
400        ### in older perls :-(
401        @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s;
402    }
403
404    ### split the path into file + dir ###
405    {   my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
406        $href->{path} = $parts[1];
407        $href->{file} = $parts[2];
408    }
409
410    ### host will be empty if the target was 'localhost' and the
411    ### scheme was 'file'
412    $href->{host} = '' if   ($href->{host}      eq 'localhost') and
413                            ($href->{scheme}    eq 'file');
414
415    return $href;
416}
417
418=head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] )
419
420Fetches the file you requested and returns the full path to the file.
421
422By default it writes to C<cwd()>, but you can override that by specifying
423the C<to> argument:
424
425    ### file fetch to /tmp, full path to the file in $where
426    $where = $ff->fetch( to => '/tmp' );
427
428    ### file slurped into $scalar, full path to the file in $where
429    ### file is downloaded to a temp directory and cleaned up at exit time
430    $where = $ff->fetch( to => \$scalar );
431
432Returns the full path to the downloaded file on success, and false
433on failure.
434
435=cut
436
437sub fetch {
438    my $self = shift or return;
439    my %hash = @_;
440
441    my $target;
442    my $tmpl = {
443        to  => { default => cwd(), store => \$target },
444    };
445
446    check( $tmpl, \%hash ) or return;
447
448    my ($to, $fh);
449    ### you want us to slurp the contents
450    if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
451        $to = tempdir( 'FileFetch.XXXXXX', DIR => $self->tempdir_root, CLEANUP => 1 );
452
453    ### plain old fetch
454    } else {
455        $to = $target;
456
457        ### On VMS force to VMS format so File::Spec will work.
458        $to = VMS::Filespec::vmspath($to) if ON_VMS;
459
460        ### create the path if it doesn't exist yet ###
461        unless( -d $to ) {
462            eval { mkpath( $to ) };
463
464            return $self->_error(loc("Could not create path '%1'",$to)) if $@;
465        }
466    }
467
468    ### set passive ftp if required ###
469    local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
470
471    ### we dont use catfile on win32 because if we are using a cygwin tool
472    ### under cmd.exe they wont understand windows style separators.
473    my $out_to = ON_WIN ? $to.'/'.$self->output_file
474                        : File::Spec->catfile( $to, $self->output_file );
475
476    for my $method ( @{ $METHODS->{$self->scheme} } ) {
477        my $sub =  '_'.$method.'_fetch';
478
479        unless( __PACKAGE__->can($sub) ) {
480            $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
481                        $method));
482            next;
483        }
484
485        ### method is blacklisted ###
486        next if grep { lc $_ eq $method } @$BLACKLIST;
487
488        ### method is known to fail ###
489        next if $METHOD_FAIL->{$method};
490
491        ### there's serious issues with IPC::Run and quoting of command
492        ### line arguments. using quotes in the wrong place breaks things,
493        ### and in the case of say,
494        ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
495        ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
496        ### it doesn't matter how you quote, it always fails.
497        local $IPC::Cmd::USE_IPC_RUN = 0;
498
499        if( my $file = $self->$sub(
500                        to => $out_to
501        )){
502
503            unless( -e $file && -s _ ) {
504                $self->_error(loc("'%1' said it fetched '%2', ".
505                     "but it was not created",$method,$file));
506
507                ### mark the failure ###
508                $METHOD_FAIL->{$method} = 1;
509
510                next;
511
512            } else {
513
514                ### slurp mode?
515                if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
516
517                    ### open the file
518                    open my $fh, "<$file" or do {
519                        $self->_error(
520                            loc("Could not open '%1': %2", $file, $!));
521                        return;
522                    };
523
524                    ### slurp
525                    $$target = do { local $/; <$fh> };
526
527                }
528
529                my $abs = File::Spec->rel2abs( $file );
530                return $abs;
531
532            }
533        }
534    }
535
536
537    ### if we got here, we looped over all methods, but we weren't able
538    ### to fetch it.
539    return;
540}
541
542########################
543### _*_fetch methods ###
544########################
545
546### LWP fetching ###
547sub _lwp_fetch {
548    my $self = shift;
549    my %hash = @_;
550
551    my ($to);
552    my $tmpl = {
553        to  => { required => 1, store => \$to }
554    };
555    check( $tmpl, \%hash ) or return;
556
557    ### modules required to download with lwp ###
558    my $use_list = {
559        LWP                 => '0.0',
560        'LWP::UserAgent'    => '0.0',
561        'HTTP::Request'     => '0.0',
562        'HTTP::Status'      => '0.0',
563        URI                 => '0.0',
564
565    };
566
567    unless( can_load( modules => $use_list ) ) {
568        $METHOD_FAIL->{'lwp'} = 1;
569        return;
570    }
571
572    ### setup the uri object
573    my $uri = URI->new( File::Spec::Unix->catfile(
574                                $self->path, $self->file
575                    ) );
576
577    ### special rules apply for file:// uris ###
578    $uri->scheme( $self->scheme );
579    $uri->host( $self->scheme eq 'file' ? '' : $self->host );
580    $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
581
582    ### set up the useragent object
583    my $ua = LWP::UserAgent->new();
584    $ua->timeout( $TIMEOUT ) if $TIMEOUT;
585    $ua->agent( $USER_AGENT );
586    $ua->from( $FROM_EMAIL );
587    $ua->env_proxy;
588
589    my $res = $ua->mirror($uri, $to) or return;
590
591    ### uptodate or fetched ok ###
592    if ( $res->code == 304 or $res->code == 200 ) {
593        return $to;
594
595    } else {
596        return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
597                    $res->code, HTTP::Status::status_message($res->code),
598                    $res->status_line));
599    }
600
601}
602
603### HTTP::Tiny fetching ###
604sub _httptiny_fetch {
605    my $self = shift;
606    my %hash = @_;
607
608    my ($to);
609    my $tmpl = {
610        to  => { required => 1, store => \$to }
611    };
612    check( $tmpl, \%hash ) or return;
613
614    my $use_list = {
615        'HTTP::Tiny'    => '0.008',
616
617    };
618
619    unless( can_load(modules => $use_list) ) {
620        $METHOD_FAIL->{'httptiny'} = 1;
621        return;
622    }
623
624    my $uri = $self->uri;
625
626    my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) );
627
628    my $rc = $http->mirror( $uri, $to );
629
630    unless ( $rc->{success} ) {
631
632        return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]",
633                    $rc->{status}, $rc->{reason} ) );
634
635    }
636
637    return $to;
638
639}
640
641### HTTP::Lite fetching ###
642sub _httplite_fetch {
643    my $self = shift;
644    my %hash = @_;
645
646    my ($to);
647    my $tmpl = {
648        to  => { required => 1, store => \$to }
649    };
650    check( $tmpl, \%hash ) or return;
651
652    ### modules required to download with lwp ###
653    my $use_list = {
654        'HTTP::Lite'    => '2.2',
655
656    };
657
658    unless( can_load(modules => $use_list) ) {
659        $METHOD_FAIL->{'httplite'} = 1;
660        return;
661    }
662
663    my $uri = $self->uri;
664    my $retries = 0;
665
666    RETRIES: while ( $retries++ < 5 ) {
667
668      my $http = HTTP::Lite->new();
669      # Naughty naughty but there isn't any accessor/setter
670      $http->{timeout} = $TIMEOUT if $TIMEOUT;
671      $http->http11_mode(1);
672
673      my $fh = FileHandle->new;
674
675      unless ( $fh->open($to,'>') ) {
676        return $self->_error(loc(
677             "Could not open '%1' for writing: %2",$to,$!));
678      }
679
680      $fh->autoflush(1);
681
682      binmode $fh;
683
684      my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh );
685
686      close $fh;
687
688      if ( $rc == 301 || $rc == 302 ) {
689          my $loc;
690          HEADERS: for ($http->headers_array) {
691            /Location: (\S+)/ and $loc = $1, last HEADERS;
692          }
693          #$loc or last; # Think we should squeal here.
694          if ($loc =~ m!^/!) {
695            $uri =~ s{^(\w+?://[^/]+)/.*$}{$1};
696            $uri .= $loc;
697          }
698          else {
699            $uri = $loc;
700          }
701          next RETRIES;
702      }
703      elsif ( $rc == 200 ) {
704          return $to;
705      }
706      else {
707        return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]",
708                    $rc, $http->status_message));
709      }
710
711    } # Loop for 5 retries.
712
713    return $self->_error("Fetch failed! Gave up after 5 tries");
714
715}
716
717### Simple IO::Socket::INET fetching ###
718sub _iosock_fetch {
719    my $self = shift;
720    my %hash = @_;
721
722    my ($to);
723    my $tmpl = {
724        to  => { required => 1, store => \$to }
725    };
726    check( $tmpl, \%hash ) or return;
727
728    my $use_list = {
729        'IO::Socket::INET' => '0.0',
730        'IO::Select'       => '0.0',
731    };
732
733    unless( can_load(modules => $use_list) ) {
734        $METHOD_FAIL->{'iosock'} = 1;
735        return;
736    }
737
738    my $sock = IO::Socket::INET->new(
739        PeerHost => $self->host,
740        ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ),
741    );
742
743    unless ( $sock ) {
744        return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!));
745    }
746
747    my $fh = FileHandle->new;
748
749    # Check open()
750
751    unless ( $fh->open($to,'>') ) {
752        return $self->_error(loc(
753             "Could not open '%1' for writing: %2",$to,$!));
754    }
755
756    $fh->autoflush(1);
757    binmode $fh;
758
759    my $path = File::Spec::Unix->catfile( $self->path, $self->file );
760    my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a";
761    $sock->send( $req );
762
763    my $select = IO::Select->new( $sock );
764
765    my $resp = '';
766    my $normal = 0;
767    while ( $select->can_read( $TIMEOUT || 60 ) ) {
768      my $ret = $sock->sysread( $resp, 4096, length($resp) );
769      if ( !defined $ret or $ret == 0 ) {
770        $select->remove( $sock );
771        $normal++;
772      }
773    }
774    close $sock;
775
776    unless ( $normal ) {
777        return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 )));
778    }
779
780    # Check the "response"
781    # Strip preceding blank lines apparently they are allowed (RFC 2616 4.1)
782    $resp =~ s/^(\x0d?\x0a)+//;
783    # Check it is an HTTP response
784    unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) {
785        return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host));
786    }
787
788    # Check for OK
789    my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i;
790    unless ( $code eq '200' ) {
791        return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host));
792    }
793
794    {
795      local $\;
796      print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0];
797    }
798    close $fh;
799    return $to;
800}
801
802### Net::FTP fetching
803sub _netftp_fetch {
804    my $self = shift;
805    my %hash = @_;
806
807    my ($to);
808    my $tmpl = {
809        to  => { required => 1, store => \$to }
810    };
811    check( $tmpl, \%hash ) or return;
812
813    ### required modules ###
814    my $use_list = { 'Net::FTP' => 0 };
815
816    unless( can_load( modules => $use_list ) ) {
817        $METHOD_FAIL->{'netftp'} = 1;
818        return;
819    }
820
821    ### make connection ###
822    my $ftp;
823    my @options = ($self->host);
824    push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
825    unless( $ftp = Net::FTP->new( @options ) ) {
826        return $self->_error(loc("Ftp creation failed: %1",$@));
827    }
828
829    ### login ###
830    unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
831        return $self->_error(loc("Could not login to '%1'",$self->host));
832    }
833
834    ### set binary mode, just in case ###
835    $ftp->binary;
836
837    ### create the remote path
838    ### remember remote paths are unix paths! [#11483]
839    my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
840
841    ### fetch the file ###
842    my $target;
843    unless( $target = $ftp->get( $remote, $to ) ) {
844        return $self->_error(loc("Could not fetch '%1' from '%2'",
845                    $remote, $self->host));
846    }
847
848    ### log out ###
849    $ftp->quit;
850
851    return $target;
852
853}
854
855### /bin/wget fetch ###
856sub _wget_fetch {
857    my $self = shift;
858    my %hash = @_;
859
860    my ($to);
861    my $tmpl = {
862        to  => { required => 1, store => \$to }
863    };
864    check( $tmpl, \%hash ) or return;
865
866    my $wget;
867    ### see if we have a wget binary ###
868    unless( $wget = can_run('wget') ) {
869        $METHOD_FAIL->{'wget'} = 1;
870        return;
871    }
872
873    ### no verboseness, thanks ###
874    my $cmd = [ $wget, '--quiet' ];
875
876    ### if a timeout is set, add it ###
877    push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
878
879    ### run passive if specified ###
880    push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
881
882    ### set the output document, add the uri ###
883    push @$cmd, '--output-document', $to, $self->uri;
884
885    ### with IPC::Cmd > 0.41, this is fixed in teh library,
886    ### and there's no need for special casing any more.
887    ### DO NOT quote things for IPC::Run, it breaks stuff.
888    # $IPC::Cmd::USE_IPC_RUN
889    #    ? ($to, $self->uri)
890    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
891
892    ### shell out ###
893    my $captured;
894    unless(run( command => $cmd,
895                buffer  => \$captured,
896                verbose => $DEBUG
897    )) {
898        ### wget creates the output document always, even if the fetch
899        ### fails.. so unlink it in that case
900        1 while unlink $to;
901
902        return $self->_error(loc( "Command failed: %1", $captured || '' ));
903    }
904
905    return $to;
906}
907
908### /bin/lftp fetch ###
909sub _lftp_fetch {
910    my $self = shift;
911    my %hash = @_;
912
913    my ($to);
914    my $tmpl = {
915        to  => { required => 1, store => \$to }
916    };
917    check( $tmpl, \%hash ) or return;
918
919    ### see if we have a lftp binary ###
920    my $lftp;
921    unless( $lftp = can_run('lftp') ) {
922        $METHOD_FAIL->{'lftp'} = 1;
923        return;
924    }
925
926    ### no verboseness, thanks ###
927    my $cmd = [ $lftp, '-f' ];
928
929    my $fh = File::Temp->new;
930
931    my $str;
932
933    ### if a timeout is set, add it ###
934    $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
935
936    ### run passive if specified ###
937    $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
938
939    ### set the output document, add the uri ###
940    ### quote the URI, because lftp supports certain shell
941    ### expansions, most notably & for backgrounding.
942    ### ' quote does nto work, must be "
943    $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
944
945    if( $DEBUG ) {
946        my $pp_str = join ' ', split $/, $str;
947        print "# lftp command: $pp_str\n";
948    }
949
950    ### write straight to the file.
951    $fh->autoflush(1);
952    print $fh $str;
953
954    ### the command needs to be 1 string to be executed
955    push @$cmd, $fh->filename;
956
957    ### with IPC::Cmd > 0.41, this is fixed in teh library,
958    ### and there's no need for special casing any more.
959    ### DO NOT quote things for IPC::Run, it breaks stuff.
960    # $IPC::Cmd::USE_IPC_RUN
961    #    ? ($to, $self->uri)
962    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
963
964
965    ### shell out ###
966    my $captured;
967    unless(run( command => $cmd,
968                buffer  => \$captured,
969                verbose => $DEBUG
970    )) {
971        ### wget creates the output document always, even if the fetch
972        ### fails.. so unlink it in that case
973        1 while unlink $to;
974
975        return $self->_error(loc( "Command failed: %1", $captured || '' ));
976    }
977
978    return $to;
979}
980
981
982
983### /bin/ftp fetch ###
984sub _ftp_fetch {
985    my $self = shift;
986    my %hash = @_;
987
988    my ($to);
989    my $tmpl = {
990        to  => { required => 1, store => \$to }
991    };
992    check( $tmpl, \%hash ) or return;
993
994    ### see if we have a ftp binary ###
995    my $ftp;
996    unless( $ftp = can_run('ftp') ) {
997        $METHOD_FAIL->{'ftp'} = 1;
998        return;
999    }
1000
1001    my $fh = FileHandle->new;
1002
1003    local $SIG{CHLD} = 'IGNORE';
1004
1005    unless ($fh->open("$ftp -n", '|-')) {
1006        return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
1007    }
1008
1009    my @dialog = (
1010        "lcd " . dirname($to),
1011        "open " . $self->host,
1012        "user anonymous $FROM_EMAIL",
1013        "cd /",
1014        "cd " . $self->path,
1015        "binary",
1016        "get " . $self->file . " " . $self->output_file,
1017        "quit",
1018    );
1019
1020    foreach (@dialog) { $fh->print($_, "\n") }
1021    $fh->close or return;
1022
1023    return $to;
1024}
1025
1026### lynx is stupid - it decompresses any .gz file it finds to be text
1027### use /bin/lynx to fetch files
1028sub _lynx_fetch {
1029    my $self = shift;
1030    my %hash = @_;
1031
1032    my ($to);
1033    my $tmpl = {
1034        to  => { required => 1, store => \$to }
1035    };
1036    check( $tmpl, \%hash ) or return;
1037
1038    ### see if we have a lynx binary ###
1039    my $lynx;
1040    unless ( $lynx = can_run('lynx') ){
1041        $METHOD_FAIL->{'lynx'} = 1;
1042        return;
1043    }
1044
1045    unless( IPC::Cmd->can_capture_buffer ) {
1046        $METHOD_FAIL->{'lynx'} = 1;
1047
1048        return $self->_error(loc(
1049            "Can not capture buffers. Can not use '%1' to fetch files",
1050            'lynx' ));
1051    }
1052
1053    ### check if the HTTP resource exists ###
1054    if ($self->uri =~ /^https?:\/\//i) {
1055        my $cmd = [
1056            $lynx,
1057            '-head',
1058            '-source',
1059            "-auth=anonymous:$FROM_EMAIL",
1060        ];
1061
1062        push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1063
1064        push @$cmd, $self->uri;
1065
1066        ### shell out ###
1067        my $head;
1068        unless(run( command => $cmd,
1069                    buffer  => \$head,
1070                    verbose => $DEBUG )
1071        ) {
1072            return $self->_error(loc("Command failed: %1", $head || ''));
1073        }
1074
1075        unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
1076            return $self->_error(loc("Command failed: %1", $head || ''));
1077        }
1078    }
1079
1080    ### write to the output file ourselves, since lynx ass_u_mes to much
1081    my $local = FileHandle->new( $to, 'w' )
1082                    or return $self->_error(loc(
1083                        "Could not open '%1' for writing: %2",$to,$!));
1084
1085    ### dump to stdout ###
1086    my $cmd = [
1087        $lynx,
1088        '-source',
1089        "-auth=anonymous:$FROM_EMAIL",
1090    ];
1091
1092    push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1093
1094    ### DO NOT quote things for IPC::Run, it breaks stuff.
1095    push @$cmd, $self->uri;
1096
1097    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1098    ### and there's no need for special casing any more.
1099    ### DO NOT quote things for IPC::Run, it breaks stuff.
1100    # $IPC::Cmd::USE_IPC_RUN
1101    #    ? $self->uri
1102    #    : QUOTE. $self->uri .QUOTE;
1103
1104
1105    ### shell out ###
1106    my $captured;
1107    unless(run( command => $cmd,
1108                buffer  => \$captured,
1109                verbose => $DEBUG )
1110    ) {
1111        return $self->_error(loc("Command failed: %1", $captured || ''));
1112    }
1113
1114    ### print to local file ###
1115    ### XXX on a 404 with a special error page, $captured will actually
1116    ### hold the contents of that page, and make it *appear* like the
1117    ### request was a success, when really it wasn't :(
1118    ### there doesn't seem to be an option for lynx to change the exit
1119    ### code based on a 4XX status or so.
1120    ### the closest we can come is using --error_file and parsing that,
1121    ### which is very unreliable ;(
1122    $local->print( $captured );
1123    $local->close or return;
1124
1125    return $to;
1126}
1127
1128### use /bin/ncftp to fetch files
1129sub _ncftp_fetch {
1130    my $self = shift;
1131    my %hash = @_;
1132
1133    my ($to);
1134    my $tmpl = {
1135        to  => { required => 1, store => \$to }
1136    };
1137    check( $tmpl, \%hash ) or return;
1138
1139    ### we can only set passive mode in interactive sessions, so bail out
1140    ### if $FTP_PASSIVE is set
1141    return if $FTP_PASSIVE;
1142
1143    ### see if we have a ncftp binary ###
1144    my $ncftp;
1145    unless( $ncftp = can_run('ncftp') ) {
1146        $METHOD_FAIL->{'ncftp'} = 1;
1147        return;
1148    }
1149
1150    my $cmd = [
1151        $ncftp,
1152        '-V',                   # do not be verbose
1153        '-p', $FROM_EMAIL,      # email as password
1154        $self->host,            # hostname
1155        dirname($to),           # local dir for the file
1156                                # remote path to the file
1157        ### DO NOT quote things for IPC::Run, it breaks stuff.
1158        $IPC::Cmd::USE_IPC_RUN
1159                    ? File::Spec::Unix->catdir( $self->path, $self->file )
1160                    : QUOTE. File::Spec::Unix->catdir(
1161                                    $self->path, $self->file ) .QUOTE
1162
1163    ];
1164
1165    ### shell out ###
1166    my $captured;
1167    unless(run( command => $cmd,
1168                buffer  => \$captured,
1169                verbose => $DEBUG )
1170    ) {
1171        return $self->_error(loc("Command failed: %1", $captured || ''));
1172    }
1173
1174    return $to;
1175
1176}
1177
1178### use /bin/curl to fetch files
1179sub _curl_fetch {
1180    my $self = shift;
1181    my %hash = @_;
1182
1183    my ($to);
1184    my $tmpl = {
1185        to  => { required => 1, store => \$to }
1186    };
1187    check( $tmpl, \%hash ) or return;
1188    my $curl;
1189    unless ( $curl = can_run('curl') ) {
1190        $METHOD_FAIL->{'curl'} = 1;
1191        return;
1192    }
1193
1194    ### these long opts are self explanatory - I like that -jmb
1195    my $cmd = [ $curl, '-q' ];
1196
1197    push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
1198
1199    push(@$cmd, '--silent') unless $DEBUG;
1200
1201    ### curl does the right thing with passive, regardless ###
1202    if ($self->scheme eq 'ftp') {
1203        push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
1204    }
1205
1206    ### curl doesn't follow 302 (temporarily moved) etc automatically
1207    ### so we add --location to enable that.
1208    push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
1209
1210    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1211    ### and there's no need for special casing any more.
1212    ### DO NOT quote things for IPC::Run, it breaks stuff.
1213    # $IPC::Cmd::USE_IPC_RUN
1214    #    ? ($to, $self->uri)
1215    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1216
1217
1218    my $captured;
1219    unless(run( command => $cmd,
1220                buffer  => \$captured,
1221                verbose => $DEBUG )
1222    ) {
1223
1224        return $self->_error(loc("Command failed: %1", $captured || ''));
1225    }
1226
1227    return $to;
1228
1229}
1230
1231### /usr/bin/fetch fetch! ###
1232sub _fetch_fetch {
1233    my $self = shift;
1234    my %hash = @_;
1235
1236    my ($to);
1237    my $tmpl = {
1238        to  => { required => 1, store => \$to }
1239    };
1240    check( $tmpl, \%hash ) or return;
1241
1242    ### see if we have a fetch binary ###
1243    my $fetch;
1244    unless( HAS_FETCH and $fetch = can_run('fetch') ) {
1245        $METHOD_FAIL->{'fetch'} = 1;
1246        return;
1247    }
1248
1249    ### no verboseness, thanks ###
1250    my $cmd = [ $fetch, '-q' ];
1251
1252    ### if a timeout is set, add it ###
1253    push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT;
1254
1255    ### run passive if specified ###
1256    #push @$cmd, '-p' if $FTP_PASSIVE;
1257    local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE;
1258
1259    ### set the output document, add the uri ###
1260    push @$cmd, '-o', $to, $self->uri;
1261
1262    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1263    ### and there's no need for special casing any more.
1264    ### DO NOT quote things for IPC::Run, it breaks stuff.
1265    # $IPC::Cmd::USE_IPC_RUN
1266    #    ? ($to, $self->uri)
1267    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1268
1269    ### shell out ###
1270    my $captured;
1271    unless(run( command => $cmd,
1272                buffer  => \$captured,
1273                verbose => $DEBUG
1274    )) {
1275        ### wget creates the output document always, even if the fetch
1276        ### fails.. so unlink it in that case
1277        1 while unlink $to;
1278
1279        return $self->_error(loc( "Command failed: %1", $captured || '' ));
1280    }
1281
1282    return $to;
1283}
1284
1285### use File::Copy for fetching file:// urls ###
1286###
1287### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
1288### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
1289###
1290
1291sub _file_fetch {
1292    my $self = shift;
1293    my %hash = @_;
1294
1295    my ($to);
1296    my $tmpl = {
1297        to  => { required => 1, store => \$to }
1298    };
1299    check( $tmpl, \%hash ) or return;
1300
1301
1302
1303    ### prefix a / on unix systems with a file uri, since it would
1304    ### look somewhat like this:
1305    ###     file:///home/kane/file
1306    ### whereas windows file uris for 'c:\some\dir\file' might look like:
1307    ###     file:///C:/some/dir/file
1308    ###     file:///C|/some/dir/file
1309    ### or for a network share '\\host\share\some\dir\file':
1310    ###     file:////host/share/some/dir/file
1311    ###
1312    ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
1313    ###     file://vms.host.edu/disk$user/my/notes/note12345.txt
1314    ###
1315
1316    my $path    = $self->path;
1317    my $vol     = $self->vol;
1318    my $share   = $self->share;
1319
1320    my $remote;
1321    if (!$share and $self->host) {
1322        return $self->_error(loc(
1323            "Currently %1 cannot handle hosts in %2 urls",
1324            'File::Fetch', 'file://'
1325        ));
1326    }
1327
1328    if( $vol ) {
1329        $path   = File::Spec->catdir( split /\//, $path );
1330        $remote = File::Spec->catpath( $vol, $path, $self->file);
1331
1332    } elsif( $share ) {
1333        ### win32 specific, and a share name, so we wont bother with File::Spec
1334        $path   =~ s|/+|\\|g;
1335        $remote = "\\\\".$self->host."\\$share\\$path";
1336
1337    } else {
1338        ### File::Spec on VMS can not currently handle UNIX syntax.
1339        my $file_class = ON_VMS
1340            ? 'File::Spec::Unix'
1341            : 'File::Spec';
1342
1343        $remote  = $file_class->catfile( $path, $self->file );
1344    }
1345
1346    ### File::Copy is littered with 'die' statements :( ###
1347    my $rv = eval { File::Copy::copy( $remote, $to ) };
1348
1349    ### something went wrong ###
1350    if( !$rv or $@ ) {
1351        return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
1352                             $remote, $to, $!, $@));
1353    }
1354
1355    return $to;
1356}
1357
1358### use /usr/bin/rsync to fetch files
1359sub _rsync_fetch {
1360    my $self = shift;
1361    my %hash = @_;
1362
1363    my ($to);
1364    my $tmpl = {
1365        to  => { required => 1, store => \$to }
1366    };
1367    check( $tmpl, \%hash ) or return;
1368    my $rsync;
1369    unless ( $rsync = can_run('rsync') ) {
1370        $METHOD_FAIL->{'rsync'} = 1;
1371        return;
1372    }
1373
1374    my $cmd = [ $rsync ];
1375
1376    ### XXX: rsync has no I/O timeouts at all, by default
1377    push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1378
1379    push(@$cmd, '--quiet') unless $DEBUG;
1380
1381    ### DO NOT quote things for IPC::Run, it breaks stuff.
1382    push @$cmd, $self->uri, $to;
1383
1384    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1385    ### and there's no need for special casing any more.
1386    ### DO NOT quote things for IPC::Run, it breaks stuff.
1387    # $IPC::Cmd::USE_IPC_RUN
1388    #    ? ($to, $self->uri)
1389    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1390
1391    my $captured;
1392    unless(run( command => $cmd,
1393                buffer  => \$captured,
1394                verbose => $DEBUG )
1395    ) {
1396
1397        return $self->_error(loc("Command %1 failed: %2",
1398            "@$cmd" || '', $captured || ''));
1399    }
1400
1401    return $to;
1402
1403}
1404
1405#################################
1406#
1407# Error code
1408#
1409#################################
1410
1411=pod
1412
1413=head2 $ff->error([BOOL])
1414
1415Returns the last encountered error as string.
1416Pass it a true value to get the C<Carp::longmess()> output instead.
1417
1418=cut
1419
1420### error handling the way Archive::Extract does it
1421sub _error {
1422    my $self    = shift;
1423    my $error   = shift;
1424
1425    $self->_error_msg( $error );
1426    $self->_error_msg_long( Carp::longmess($error) );
1427
1428    if( $WARN ) {
1429        carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1430    }
1431
1432    return;
1433}
1434
1435sub error {
1436    my $self = shift;
1437    return shift() ? $self->_error_msg_long : $self->_error_msg;
1438}
1439
1440
14411;
1442
1443=pod
1444
1445=head1 HOW IT WORKS
1446
1447File::Fetch is able to fetch a variety of uris, by using several
1448external programs and modules.
1449
1450Below is a mapping of what utilities will be used in what order
1451for what schemes, if available:
1452
1453    file    => LWP, lftp, file
1454    http    => LWP, HTTP::Lite, wget, curl, lftp, fetch, lynx, iosock
1455    ftp     => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp
1456    rsync   => rsync
1457
1458If you'd like to disable the use of one or more of these utilities
1459and/or modules, see the C<$BLACKLIST> variable further down.
1460
1461If a utility or module isn't available, it will be marked in a cache
1462(see the C<$METHOD_FAIL> variable further down), so it will not be
1463tried again. The C<fetch> method will only fail when all options are
1464exhausted, and it was not able to retrieve the file.
1465
1466The C<fetch> utility is available on FreeBSD. NetBSD and Dragonfly BSD
1467may also have it from C<pkgsrc>. We only check for C<fetch> on those
1468three platforms.
1469
1470C<iosock> is a very limited L<IO::Socket::INET> based mechanism for
1471retrieving C<http> schemed urls. It doesn't follow redirects for instance.
1472
1473A special note about fetching files from an ftp uri:
1474
1475By default, all ftp connections are done in passive mode. To change
1476that, see the C<$FTP_PASSIVE> variable further down.
1477
1478Furthermore, ftp uris only support anonymous connections, so no
1479named user/password pair can be passed along.
1480
1481C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
1482further down.
1483
1484=head1 GLOBAL VARIABLES
1485
1486The behaviour of File::Fetch can be altered by changing the following
1487global variables:
1488
1489=head2 $File::Fetch::FROM_EMAIL
1490
1491This is the email address that will be sent as your anonymous ftp
1492password.
1493
1494Default is C<File-Fetch@example.com>.
1495
1496=head2 $File::Fetch::USER_AGENT
1497
1498This is the useragent as C<LWP> will report it.
1499
1500Default is C<File::Fetch/$VERSION>.
1501
1502=head2 $File::Fetch::FTP_PASSIVE
1503
1504This variable controls whether the environment variable C<FTP_PASSIVE>
1505and any passive switches to commandline tools will be set to true.
1506
1507Default value is 1.
1508
1509Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1510files, since passive mode can only be set interactively for this binary
1511
1512=head2 $File::Fetch::TIMEOUT
1513
1514When set, controls the network timeout (counted in seconds).
1515
1516Default value is 0.
1517
1518=head2 $File::Fetch::WARN
1519
1520This variable controls whether errors encountered internally by
1521C<File::Fetch> should be C<carp>'d or not.
1522
1523Set to false to silence warnings. Inspect the output of the C<error()>
1524method manually to see what went wrong.
1525
1526Defaults to C<true>.
1527
1528=head2 $File::Fetch::DEBUG
1529
1530This enables debugging output when calling commandline utilities to
1531fetch files.
1532This also enables C<Carp::longmess> errors, instead of the regular
1533C<carp> errors.
1534
1535Good for tracking down why things don't work with your particular
1536setup.
1537
1538Default is 0.
1539
1540=head2 $File::Fetch::BLACKLIST
1541
1542This is an array ref holding blacklisted modules/utilities for fetching
1543files with.
1544
1545To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1546set $File::Fetch::BLACKLIST to:
1547
1548    $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1549
1550The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1551
1552See the note on C<MAPPING> below.
1553
1554=head2 $File::Fetch::METHOD_FAIL
1555
1556This is a hashref registering what modules/utilities were known to fail
1557for fetching files (mostly because they weren't installed).
1558
1559You can reset this cache by assigning an empty hashref to it, or
1560individually remove keys.
1561
1562See the note on C<MAPPING> below.
1563
1564=head1 MAPPING
1565
1566
1567Here's a quick mapping for the utilities/modules, and their names for
1568the $BLACKLIST, $METHOD_FAIL and other internal functions.
1569
1570    LWP         => lwp
1571    HTTP::Lite  => httplite
1572    HTTP::Tiny  => httptiny
1573    Net::FTP    => netftp
1574    wget        => wget
1575    lynx        => lynx
1576    ncftp       => ncftp
1577    ftp         => ftp
1578    curl        => curl
1579    rsync       => rsync
1580    lftp        => lftp
1581    fetch       => fetch
1582    IO::Socket  => iosock
1583
1584=head1 FREQUENTLY ASKED QUESTIONS
1585
1586=head2 So how do I use a proxy with File::Fetch?
1587
1588C<File::Fetch> currently only supports proxies with LWP::UserAgent.
1589You will need to set your environment variables accordingly. For
1590example, to use an ftp proxy:
1591
1592    $ENV{ftp_proxy} = 'foo.com';
1593
1594Refer to the LWP::UserAgent manpage for more details.
1595
1596=head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1597
1598C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1599which we in turn capture. If that content is a 'custom' error file
1600(like, say, a C<404 handler>), you will get that contents instead.
1601
1602Sadly, C<lynx> doesn't support any options to return a different exit
1603code on non-C<200 OK> status, giving us no way to tell the difference
1604between a 'successful' fetch and a custom error page.
1605
1606Therefor, we recommend to only use C<lynx> as a last resort. This is
1607why it is at the back of our list of methods to try as well.
1608
1609=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1610
1611C<File::Fetch> is relatively smart about things. When trying to write
1612a file to disk, it removes the C<query parameters> (see the
1613C<output_file> method for details) from the file name before creating
1614it. In most cases this suffices.
1615
1616If you have any other characters you need to escape, please install
1617the C<URI::Escape> module from CPAN, and pre-encode your URI before
1618passing it to C<File::Fetch>. You can read about the details of URIs
1619and URI encoding here:
1620
1621  http://www.faqs.org/rfcs/rfc2396.html
1622
1623=head1 TODO
1624
1625=over 4
1626
1627=item Implement $PREFER_BIN
1628
1629To indicate to rather use commandline tools than modules
1630
1631=back
1632
1633=head1 BUG REPORTS
1634
1635Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1636
1637=head1 AUTHOR
1638
1639This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1640
1641=head1 COPYRIGHT
1642
1643This library is free software; you may redistribute and/or modify it
1644under the same terms as Perl itself.
1645
1646
1647=cut
1648
1649# Local variables:
1650# c-indentation-style: bsd
1651# c-basic-offset: 4
1652# indent-tabs-mode: nil
1653# End:
1654# vim: expandtab shiftwidth=4:
1655
1656
1657
1658
1659