1package File::Fetch; 2 3use strict; 4use FileHandle; 5use File::Temp; 6use File::Copy; 7use File::Spec; 8use File::Spec::Unix; 9use File::Basename qw[dirname]; 10 11use Cwd qw[cwd]; 12use Carp qw[carp]; 13use IPC::Cmd qw[can_run run QUOTE]; 14use File::Path qw[mkpath]; 15use File::Temp qw[tempdir]; 16use Params::Check qw[check]; 17use Module::Load::Conditional qw[can_load]; 18use Locale::Maketext::Simple Style => 'gettext'; 19 20use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT 21 $BLACKLIST $METHOD_FAIL $VERSION $METHODS 22 $FTP_PASSIVE $TIMEOUT $DEBUG $WARN 23 ]; 24 25$VERSION = '0.38'; 26$VERSION = eval $VERSION; # avoid warnings with development releases 27$PREFER_BIN = 0; # XXX TODO implement 28$FROM_EMAIL = 'File-Fetch@example.com'; 29$USER_AGENT = "File::Fetch/$VERSION"; 30$BLACKLIST = [qw|ftp|]; 31$METHOD_FAIL = { }; 32$FTP_PASSIVE = 1; 33$TIMEOUT = 0; 34$DEBUG = 0; 35$WARN = 1; 36 37### methods available to fetch the file depending on the scheme 38$METHODS = { 39 http => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ], 40 ftp => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ], 41 file => [ qw|lwp lftp file| ], 42 rsync => [ qw|rsync| ] 43}; 44 45### silly warnings ### 46local $Params::Check::VERBOSE = 1; 47local $Params::Check::VERBOSE = 1; 48local $Module::Load::Conditional::VERBOSE = 0; 49local $Module::Load::Conditional::VERBOSE = 0; 50 51### see what OS we are on, important for file:// uris ### 52use constant ON_WIN => ($^O eq 'MSWin32'); 53use constant ON_VMS => ($^O eq 'VMS'); 54use constant ON_UNIX => (!ON_WIN); 55use constant HAS_VOL => (ON_WIN); 56use constant HAS_SHARE => (ON_WIN); 57use constant HAS_FETCH => ( $^O =~ m!^(freebsd|netbsd|dragonfly)$! ); 58 59=pod 60 61=head1 NAME 62 63File::Fetch - A generic file fetching mechanism 64 65=head1 SYNOPSIS 66 67 use File::Fetch; 68 69 ### build a File::Fetch object ### 70 my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt'); 71 72 ### fetch the uri to cwd() ### 73 my $where = $ff->fetch() or die $ff->error; 74 75 ### fetch the uri to /tmp ### 76 my $where = $ff->fetch( to => '/tmp' ); 77 78 ### parsed bits from the uri ### 79 $ff->uri; 80 $ff->scheme; 81 $ff->host; 82 $ff->path; 83 $ff->file; 84 85=head1 DESCRIPTION 86 87File::Fetch is a generic file fetching mechanism. 88 89It allows you to fetch any file pointed to by a C<ftp>, C<http>, 90C<file>, or C<rsync> uri by a number of different means. 91 92See the C<HOW IT WORKS> section further down for details. 93 94=head1 ACCESSORS 95 96A C<File::Fetch> object has the following accessors 97 98=over 4 99 100=item $ff->uri 101 102The uri you passed to the constructor 103 104=item $ff->scheme 105 106The scheme from the uri (like 'file', 'http', etc) 107 108=item $ff->host 109 110The hostname in the uri. Will be empty if host was originally 111'localhost' for a 'file://' url. 112 113=item $ff->vol 114 115On operating systems with the concept of a volume the second element 116of a file:// is considered to the be volume specification for the file. 117Thus on Win32 this routine returns the volume, on other operating 118systems this returns nothing. 119 120On Windows this value may be empty if the uri is to a network share, in 121which case the 'share' property will be defined. Additionally, volume 122specifications that use '|' as ':' will be converted on read to use ':'. 123 124On VMS, which has a volume concept, this field will be empty because VMS 125file specifications are converted to absolute UNIX format and the volume 126information is transparently included. 127 128=item $ff->share 129 130On systems with the concept of a network share (currently only Windows) returns 131the sharename from a file://// url. On other operating systems returns empty. 132 133=item $ff->path 134 135The path from the uri, will be at least a single '/'. 136 137=item $ff->file 138 139The name of the remote file. For the local file name, the 140result of $ff->output_file will be used. 141 142=item $ff->file_default 143 144The name of the default local file, that $ff->output_file falls back to if 145it would otherwise return no filename. For example when fetching a URI like 146http://www.abc.net.au/ the contents retrieved may be from a remote file called 147'index.html'. The default value of this attribute is literally 'file_default'. 148 149=cut 150 151 152########################## 153### Object & Accessors ### 154########################## 155 156{ 157 ### template for autogenerated accessors ### 158 my $Tmpl = { 159 scheme => { default => 'http' }, 160 host => { default => 'localhost' }, 161 path => { default => '/' }, 162 file => { required => 1 }, 163 uri => { required => 1 }, 164 vol => { default => '' }, # windows for file:// uris 165 share => { default => '' }, # windows for file:// uris 166 file_default => { default => 'file_default' }, 167 tempdir_root => { required => 1 }, # Should be lazy-set at ->new() 168 _error_msg => { no_override => 1 }, 169 _error_msg_long => { no_override => 1 }, 170 }; 171 172 for my $method ( keys %$Tmpl ) { 173 no strict 'refs'; 174 *$method = sub { 175 my $self = shift; 176 $self->{$method} = $_[0] if @_; 177 return $self->{$method}; 178 } 179 } 180 181 sub _create { 182 my $class = shift; 183 my %hash = @_; 184 185 my $args = check( $Tmpl, \%hash ) or return; 186 187 bless $args, $class; 188 189 if( lc($args->scheme) ne 'file' and not $args->host ) { 190 return $class->_error(loc( 191 "Hostname required when fetching from '%1'",$args->scheme)); 192 } 193 194 for (qw[path]) { 195 unless( $args->$_() ) { # 5.5.x needs the () 196 return $class->_error(loc("No '%1' specified",$_)); 197 } 198 } 199 200 return $args; 201 } 202} 203 204=item $ff->output_file 205 206The name of the output file. This is the same as $ff->file, 207but any query parameters are stripped off. For example: 208 209 http://example.com/index.html?x=y 210 211would make the output file be C<index.html> rather than 212C<index.html?x=y>. 213 214=back 215 216=cut 217 218sub output_file { 219 my $self = shift; 220 my $file = $self->file; 221 222 $file =~ s/\?.*$//g; 223 224 $file ||= $self->file_default; 225 226 return $file; 227} 228 229### XXX do this or just point to URI::Escape? 230# =head2 $esc_uri = $ff->escaped_uri 231# 232# =cut 233# 234# ### most of this is stolen straight from URI::escape 235# { ### Build a char->hex map 236# my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255; 237# 238# sub escaped_uri { 239# my $self = shift; 240# my $uri = $self->uri; 241# 242# ### Default unsafe characters. RFC 2732 ^(uric - reserved) 243# $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/ 244# $escapes{$1} || $self->_fail_hi($1)/ge; 245# 246# return $uri; 247# } 248# 249# sub _fail_hi { 250# my $self = shift; 251# my $char = shift; 252# 253# $self->_error(loc( 254# "Can't escape '%1', try using the '%2' module instead", 255# sprintf("\\x{%04X}", ord($char)), 'URI::Escape' 256# )); 257# } 258# 259# sub output_file { 260# 261# } 262# 263# 264# } 265 266=head1 METHODS 267 268=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' ); 269 270Parses the uri and creates a corresponding File::Fetch::Item object, 271that is ready to be C<fetch>ed and returns it. 272 273Returns false on failure. 274 275=cut 276 277sub new { 278 my $class = shift; 279 my %hash = @_; 280 281 my ($uri, $file_default, $tempdir_root); 282 my $tmpl = { 283 uri => { required => 1, store => \$uri }, 284 file_default => { required => 0, store => \$file_default }, 285 tempdir_root => { required => 0, store => \$tempdir_root }, 286 }; 287 288 check( $tmpl, \%hash ) or return; 289 290 ### parse the uri to usable parts ### 291 my $href = $class->_parse_uri( $uri ) or return; 292 293 $href->{file_default} = $file_default if $file_default; 294 $href->{tempdir_root} = File::Spec->rel2abs( $tempdir_root ) if $tempdir_root; 295 $href->{tempdir_root} = File::Spec->rel2abs( Cwd::cwd ) if not $href->{tempdir_root}; 296 297 ### make it into a FFI object ### 298 my $ff = $class->_create( %$href ) or return; 299 300 301 ### return the object ### 302 return $ff; 303} 304 305### parses an uri to a hash structure: 306### 307### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' ) 308### 309### becomes: 310### 311### $href = { 312### scheme => 'ftp', 313### host => 'ftp.cpan.org', 314### path => '/pub/mirror', 315### file => 'index.html' 316### }; 317### 318### In the case of file:// urls there maybe be additional fields 319### 320### For systems with volume specifications such as Win32 there will be 321### a volume specifier provided in the 'vol' field. 322### 323### 'vol' => 'volumename' 324### 325### For windows file shares there may be a 'share' key specified 326### 327### 'share' => 'sharename' 328### 329### Note that the rules of what a file:// url means vary by the operating system 330### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious 331### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and 332### not '/foo/bar.txt' 333### 334### Similarly if the host interpreting the url is VMS then 335### file:///disk$user/my/notes/note12345.txt' means 336### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as 337### if it is unix where it means /disk$user/my/notes/note12345.txt'. 338### Except for some cases in the File::Spec methods, Perl on VMS will generally 339### handle UNIX format file specifications. 340### 341### This means it is impossible to serve certain file:// urls on certain systems. 342### 343### Thus are the problems with a protocol-less specification. :-( 344### 345 346sub _parse_uri { 347 my $self = shift; 348 my $uri = shift or return; 349 350 my $href = { uri => $uri }; 351 352 ### find the scheme ### 353 $uri =~ s|^(\w+)://||; 354 $href->{scheme} = $1; 355 356 ### See rfc 1738 section 3.10 357 ### http://www.faqs.org/rfcs/rfc1738.html 358 ### And wikipedia for more on windows file:// urls 359 ### http://en.wikipedia.org/wiki/File:// 360 if( $href->{scheme} eq 'file' ) { 361 362 my @parts = split '/',$uri; 363 364 ### file://hostname/... 365 ### file://hostname/... 366 ### normalize file://localhost with file:/// 367 $href->{host} = $parts[0] || ''; 368 369 ### index in @parts where the path components begin; 370 my $index = 1; 371 372 ### file:////hostname/sharename/blah.txt 373 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) { 374 375 $href->{host} = $parts[2] || ''; # avoid warnings 376 $href->{share} = $parts[3] || ''; # avoid warnings 377 378 $index = 4 # index after the share 379 380 ### file:///D|/blah.txt 381 ### file:///D:/blah.txt 382 } elsif (HAS_VOL) { 383 384 ### this code comes from dmq's patch, but: 385 ### XXX if volume is empty, wouldn't that be an error? --kane 386 ### if so, our file://localhost test needs to be fixed as wel 387 $href->{vol} = $parts[1] || ''; 388 389 ### correct D| style colume descriptors 390 $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN; 391 392 $index = 2; # index after the volume 393 } 394 395 ### rebuild the path from the leftover parts; 396 $href->{path} = join '/', '', splice( @parts, $index, $#parts ); 397 398 } else { 399 ### using anything but qw() in hash slices may produce warnings 400 ### in older perls :-( 401 @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s; 402 } 403 404 ### split the path into file + dir ### 405 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} ); 406 $href->{path} = $parts[1]; 407 $href->{file} = $parts[2]; 408 } 409 410 ### host will be empty if the target was 'localhost' and the 411 ### scheme was 'file' 412 $href->{host} = '' if ($href->{host} eq 'localhost') and 413 ($href->{scheme} eq 'file'); 414 415 return $href; 416} 417 418=head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] ) 419 420Fetches the file you requested and returns the full path to the file. 421 422By default it writes to C<cwd()>, but you can override that by specifying 423the C<to> argument: 424 425 ### file fetch to /tmp, full path to the file in $where 426 $where = $ff->fetch( to => '/tmp' ); 427 428 ### file slurped into $scalar, full path to the file in $where 429 ### file is downloaded to a temp directory and cleaned up at exit time 430 $where = $ff->fetch( to => \$scalar ); 431 432Returns the full path to the downloaded file on success, and false 433on failure. 434 435=cut 436 437sub fetch { 438 my $self = shift or return; 439 my %hash = @_; 440 441 my $target; 442 my $tmpl = { 443 to => { default => cwd(), store => \$target }, 444 }; 445 446 check( $tmpl, \%hash ) or return; 447 448 my ($to, $fh); 449 ### you want us to slurp the contents 450 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) { 451 $to = tempdir( 'FileFetch.XXXXXX', DIR => $self->tempdir_root, CLEANUP => 1 ); 452 453 ### plain old fetch 454 } else { 455 $to = $target; 456 457 ### On VMS force to VMS format so File::Spec will work. 458 $to = VMS::Filespec::vmspath($to) if ON_VMS; 459 460 ### create the path if it doesn't exist yet ### 461 unless( -d $to ) { 462 eval { mkpath( $to ) }; 463 464 return $self->_error(loc("Could not create path '%1'",$to)) if $@; 465 } 466 } 467 468 ### set passive ftp if required ### 469 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE; 470 471 ### we dont use catfile on win32 because if we are using a cygwin tool 472 ### under cmd.exe they wont understand windows style separators. 473 my $out_to = ON_WIN ? $to.'/'.$self->output_file 474 : File::Spec->catfile( $to, $self->output_file ); 475 476 for my $method ( @{ $METHODS->{$self->scheme} } ) { 477 my $sub = '_'.$method.'_fetch'; 478 479 unless( __PACKAGE__->can($sub) ) { 480 $self->_error(loc("Cannot call method for '%1' -- WEIRD!", 481 $method)); 482 next; 483 } 484 485 ### method is blacklisted ### 486 next if grep { lc $_ eq $method } @$BLACKLIST; 487 488 ### method is known to fail ### 489 next if $METHOD_FAIL->{$method}; 490 491 ### there's serious issues with IPC::Run and quoting of command 492 ### line arguments. using quotes in the wrong place breaks things, 493 ### and in the case of say, 494 ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document 495 ### "index.html" "http://www.cpan.org/index.html?q=1&y=2" 496 ### it doesn't matter how you quote, it always fails. 497 local $IPC::Cmd::USE_IPC_RUN = 0; 498 499 if( my $file = $self->$sub( 500 to => $out_to 501 )){ 502 503 unless( -e $file && -s _ ) { 504 $self->_error(loc("'%1' said it fetched '%2', ". 505 "but it was not created",$method,$file)); 506 507 ### mark the failure ### 508 $METHOD_FAIL->{$method} = 1; 509 510 next; 511 512 } else { 513 514 ### slurp mode? 515 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) { 516 517 ### open the file 518 open my $fh, "<$file" or do { 519 $self->_error( 520 loc("Could not open '%1': %2", $file, $!)); 521 return; 522 }; 523 524 ### slurp 525 $$target = do { local $/; <$fh> }; 526 527 } 528 529 my $abs = File::Spec->rel2abs( $file ); 530 return $abs; 531 532 } 533 } 534 } 535 536 537 ### if we got here, we looped over all methods, but we weren't able 538 ### to fetch it. 539 return; 540} 541 542######################## 543### _*_fetch methods ### 544######################## 545 546### LWP fetching ### 547sub _lwp_fetch { 548 my $self = shift; 549 my %hash = @_; 550 551 my ($to); 552 my $tmpl = { 553 to => { required => 1, store => \$to } 554 }; 555 check( $tmpl, \%hash ) or return; 556 557 ### modules required to download with lwp ### 558 my $use_list = { 559 LWP => '0.0', 560 'LWP::UserAgent' => '0.0', 561 'HTTP::Request' => '0.0', 562 'HTTP::Status' => '0.0', 563 URI => '0.0', 564 565 }; 566 567 unless( can_load( modules => $use_list ) ) { 568 $METHOD_FAIL->{'lwp'} = 1; 569 return; 570 } 571 572 ### setup the uri object 573 my $uri = URI->new( File::Spec::Unix->catfile( 574 $self->path, $self->file 575 ) ); 576 577 ### special rules apply for file:// uris ### 578 $uri->scheme( $self->scheme ); 579 $uri->host( $self->scheme eq 'file' ? '' : $self->host ); 580 $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file'; 581 582 ### set up the useragent object 583 my $ua = LWP::UserAgent->new(); 584 $ua->timeout( $TIMEOUT ) if $TIMEOUT; 585 $ua->agent( $USER_AGENT ); 586 $ua->from( $FROM_EMAIL ); 587 $ua->env_proxy; 588 589 my $res = $ua->mirror($uri, $to) or return; 590 591 ### uptodate or fetched ok ### 592 if ( $res->code == 304 or $res->code == 200 ) { 593 return $to; 594 595 } else { 596 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]", 597 $res->code, HTTP::Status::status_message($res->code), 598 $res->status_line)); 599 } 600 601} 602 603### HTTP::Tiny fetching ### 604sub _httptiny_fetch { 605 my $self = shift; 606 my %hash = @_; 607 608 my ($to); 609 my $tmpl = { 610 to => { required => 1, store => \$to } 611 }; 612 check( $tmpl, \%hash ) or return; 613 614 my $use_list = { 615 'HTTP::Tiny' => '0.008', 616 617 }; 618 619 unless( can_load(modules => $use_list) ) { 620 $METHOD_FAIL->{'httptiny'} = 1; 621 return; 622 } 623 624 my $uri = $self->uri; 625 626 my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) ); 627 628 my $rc = $http->mirror( $uri, $to ); 629 630 unless ( $rc->{success} ) { 631 632 return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]", 633 $rc->{status}, $rc->{reason} ) ); 634 635 } 636 637 return $to; 638 639} 640 641### HTTP::Lite fetching ### 642sub _httplite_fetch { 643 my $self = shift; 644 my %hash = @_; 645 646 my ($to); 647 my $tmpl = { 648 to => { required => 1, store => \$to } 649 }; 650 check( $tmpl, \%hash ) or return; 651 652 ### modules required to download with lwp ### 653 my $use_list = { 654 'HTTP::Lite' => '2.2', 655 656 }; 657 658 unless( can_load(modules => $use_list) ) { 659 $METHOD_FAIL->{'httplite'} = 1; 660 return; 661 } 662 663 my $uri = $self->uri; 664 my $retries = 0; 665 666 RETRIES: while ( $retries++ < 5 ) { 667 668 my $http = HTTP::Lite->new(); 669 # Naughty naughty but there isn't any accessor/setter 670 $http->{timeout} = $TIMEOUT if $TIMEOUT; 671 $http->http11_mode(1); 672 673 my $fh = FileHandle->new; 674 675 unless ( $fh->open($to,'>') ) { 676 return $self->_error(loc( 677 "Could not open '%1' for writing: %2",$to,$!)); 678 } 679 680 $fh->autoflush(1); 681 682 binmode $fh; 683 684 my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh ); 685 686 close $fh; 687 688 if ( $rc == 301 || $rc == 302 ) { 689 my $loc; 690 HEADERS: for ($http->headers_array) { 691 /Location: (\S+)/ and $loc = $1, last HEADERS; 692 } 693 #$loc or last; # Think we should squeal here. 694 if ($loc =~ m!^/!) { 695 $uri =~ s{^(\w+?://[^/]+)/.*$}{$1}; 696 $uri .= $loc; 697 } 698 else { 699 $uri = $loc; 700 } 701 next RETRIES; 702 } 703 elsif ( $rc == 200 ) { 704 return $to; 705 } 706 else { 707 return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]", 708 $rc, $http->status_message)); 709 } 710 711 } # Loop for 5 retries. 712 713 return $self->_error("Fetch failed! Gave up after 5 tries"); 714 715} 716 717### Simple IO::Socket::INET fetching ### 718sub _iosock_fetch { 719 my $self = shift; 720 my %hash = @_; 721 722 my ($to); 723 my $tmpl = { 724 to => { required => 1, store => \$to } 725 }; 726 check( $tmpl, \%hash ) or return; 727 728 my $use_list = { 729 'IO::Socket::INET' => '0.0', 730 'IO::Select' => '0.0', 731 }; 732 733 unless( can_load(modules => $use_list) ) { 734 $METHOD_FAIL->{'iosock'} = 1; 735 return; 736 } 737 738 my $sock = IO::Socket::INET->new( 739 PeerHost => $self->host, 740 ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ), 741 ); 742 743 unless ( $sock ) { 744 return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!)); 745 } 746 747 my $fh = FileHandle->new; 748 749 # Check open() 750 751 unless ( $fh->open($to,'>') ) { 752 return $self->_error(loc( 753 "Could not open '%1' for writing: %2",$to,$!)); 754 } 755 756 $fh->autoflush(1); 757 binmode $fh; 758 759 my $path = File::Spec::Unix->catfile( $self->path, $self->file ); 760 my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a"; 761 $sock->send( $req ); 762 763 my $select = IO::Select->new( $sock ); 764 765 my $resp = ''; 766 my $normal = 0; 767 while ( $select->can_read( $TIMEOUT || 60 ) ) { 768 my $ret = $sock->sysread( $resp, 4096, length($resp) ); 769 if ( !defined $ret or $ret == 0 ) { 770 $select->remove( $sock ); 771 $normal++; 772 } 773 } 774 close $sock; 775 776 unless ( $normal ) { 777 return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 ))); 778 } 779 780 # Check the "response" 781 # Strip preceding blank lines apparently they are allowed (RFC 2616 4.1) 782 $resp =~ s/^(\x0d?\x0a)+//; 783 # Check it is an HTTP response 784 unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) { 785 return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host)); 786 } 787 788 # Check for OK 789 my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i; 790 unless ( $code eq '200' ) { 791 return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host)); 792 } 793 794 { 795 local $\; 796 print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0]; 797 } 798 close $fh; 799 return $to; 800} 801 802### Net::FTP fetching 803sub _netftp_fetch { 804 my $self = shift; 805 my %hash = @_; 806 807 my ($to); 808 my $tmpl = { 809 to => { required => 1, store => \$to } 810 }; 811 check( $tmpl, \%hash ) or return; 812 813 ### required modules ### 814 my $use_list = { 'Net::FTP' => 0 }; 815 816 unless( can_load( modules => $use_list ) ) { 817 $METHOD_FAIL->{'netftp'} = 1; 818 return; 819 } 820 821 ### make connection ### 822 my $ftp; 823 my @options = ($self->host); 824 push(@options, Timeout => $TIMEOUT) if $TIMEOUT; 825 unless( $ftp = Net::FTP->new( @options ) ) { 826 return $self->_error(loc("Ftp creation failed: %1",$@)); 827 } 828 829 ### login ### 830 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) { 831 return $self->_error(loc("Could not login to '%1'",$self->host)); 832 } 833 834 ### set binary mode, just in case ### 835 $ftp->binary; 836 837 ### create the remote path 838 ### remember remote paths are unix paths! [#11483] 839 my $remote = File::Spec::Unix->catfile( $self->path, $self->file ); 840 841 ### fetch the file ### 842 my $target; 843 unless( $target = $ftp->get( $remote, $to ) ) { 844 return $self->_error(loc("Could not fetch '%1' from '%2'", 845 $remote, $self->host)); 846 } 847 848 ### log out ### 849 $ftp->quit; 850 851 return $target; 852 853} 854 855### /bin/wget fetch ### 856sub _wget_fetch { 857 my $self = shift; 858 my %hash = @_; 859 860 my ($to); 861 my $tmpl = { 862 to => { required => 1, store => \$to } 863 }; 864 check( $tmpl, \%hash ) or return; 865 866 my $wget; 867 ### see if we have a wget binary ### 868 unless( $wget = can_run('wget') ) { 869 $METHOD_FAIL->{'wget'} = 1; 870 return; 871 } 872 873 ### no verboseness, thanks ### 874 my $cmd = [ $wget, '--quiet' ]; 875 876 ### if a timeout is set, add it ### 877 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; 878 879 ### run passive if specified ### 880 push @$cmd, '--passive-ftp' if $FTP_PASSIVE; 881 882 ### set the output document, add the uri ### 883 push @$cmd, '--output-document', $to, $self->uri; 884 885 ### with IPC::Cmd > 0.41, this is fixed in teh library, 886 ### and there's no need for special casing any more. 887 ### DO NOT quote things for IPC::Run, it breaks stuff. 888 # $IPC::Cmd::USE_IPC_RUN 889 # ? ($to, $self->uri) 890 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 891 892 ### shell out ### 893 my $captured; 894 unless(run( command => $cmd, 895 buffer => \$captured, 896 verbose => $DEBUG 897 )) { 898 ### wget creates the output document always, even if the fetch 899 ### fails.. so unlink it in that case 900 1 while unlink $to; 901 902 return $self->_error(loc( "Command failed: %1", $captured || '' )); 903 } 904 905 return $to; 906} 907 908### /bin/lftp fetch ### 909sub _lftp_fetch { 910 my $self = shift; 911 my %hash = @_; 912 913 my ($to); 914 my $tmpl = { 915 to => { required => 1, store => \$to } 916 }; 917 check( $tmpl, \%hash ) or return; 918 919 ### see if we have a lftp binary ### 920 my $lftp; 921 unless( $lftp = can_run('lftp') ) { 922 $METHOD_FAIL->{'lftp'} = 1; 923 return; 924 } 925 926 ### no verboseness, thanks ### 927 my $cmd = [ $lftp, '-f' ]; 928 929 my $fh = File::Temp->new; 930 931 my $str; 932 933 ### if a timeout is set, add it ### 934 $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT; 935 936 ### run passive if specified ### 937 $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE; 938 939 ### set the output document, add the uri ### 940 ### quote the URI, because lftp supports certain shell 941 ### expansions, most notably & for backgrounding. 942 ### ' quote does nto work, must be " 943 $str .= q[get ']. $self->uri .q[' -o ]. $to . $/; 944 945 if( $DEBUG ) { 946 my $pp_str = join ' ', split $/, $str; 947 print "# lftp command: $pp_str\n"; 948 } 949 950 ### write straight to the file. 951 $fh->autoflush(1); 952 print $fh $str; 953 954 ### the command needs to be 1 string to be executed 955 push @$cmd, $fh->filename; 956 957 ### with IPC::Cmd > 0.41, this is fixed in teh library, 958 ### and there's no need for special casing any more. 959 ### DO NOT quote things for IPC::Run, it breaks stuff. 960 # $IPC::Cmd::USE_IPC_RUN 961 # ? ($to, $self->uri) 962 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 963 964 965 ### shell out ### 966 my $captured; 967 unless(run( command => $cmd, 968 buffer => \$captured, 969 verbose => $DEBUG 970 )) { 971 ### wget creates the output document always, even if the fetch 972 ### fails.. so unlink it in that case 973 1 while unlink $to; 974 975 return $self->_error(loc( "Command failed: %1", $captured || '' )); 976 } 977 978 return $to; 979} 980 981 982 983### /bin/ftp fetch ### 984sub _ftp_fetch { 985 my $self = shift; 986 my %hash = @_; 987 988 my ($to); 989 my $tmpl = { 990 to => { required => 1, store => \$to } 991 }; 992 check( $tmpl, \%hash ) or return; 993 994 ### see if we have a ftp binary ### 995 my $ftp; 996 unless( $ftp = can_run('ftp') ) { 997 $METHOD_FAIL->{'ftp'} = 1; 998 return; 999 } 1000 1001 my $fh = FileHandle->new; 1002 1003 local $SIG{CHLD} = 'IGNORE'; 1004 1005 unless ($fh->open("$ftp -n", '|-')) { 1006 return $self->_error(loc("%1 creation failed: %2", $ftp, $!)); 1007 } 1008 1009 my @dialog = ( 1010 "lcd " . dirname($to), 1011 "open " . $self->host, 1012 "user anonymous $FROM_EMAIL", 1013 "cd /", 1014 "cd " . $self->path, 1015 "binary", 1016 "get " . $self->file . " " . $self->output_file, 1017 "quit", 1018 ); 1019 1020 foreach (@dialog) { $fh->print($_, "\n") } 1021 $fh->close or return; 1022 1023 return $to; 1024} 1025 1026### lynx is stupid - it decompresses any .gz file it finds to be text 1027### use /bin/lynx to fetch files 1028sub _lynx_fetch { 1029 my $self = shift; 1030 my %hash = @_; 1031 1032 my ($to); 1033 my $tmpl = { 1034 to => { required => 1, store => \$to } 1035 }; 1036 check( $tmpl, \%hash ) or return; 1037 1038 ### see if we have a lynx binary ### 1039 my $lynx; 1040 unless ( $lynx = can_run('lynx') ){ 1041 $METHOD_FAIL->{'lynx'} = 1; 1042 return; 1043 } 1044 1045 unless( IPC::Cmd->can_capture_buffer ) { 1046 $METHOD_FAIL->{'lynx'} = 1; 1047 1048 return $self->_error(loc( 1049 "Can not capture buffers. Can not use '%1' to fetch files", 1050 'lynx' )); 1051 } 1052 1053 ### check if the HTTP resource exists ### 1054 if ($self->uri =~ /^https?:\/\//i) { 1055 my $cmd = [ 1056 $lynx, 1057 '-head', 1058 '-source', 1059 "-auth=anonymous:$FROM_EMAIL", 1060 ]; 1061 1062 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; 1063 1064 push @$cmd, $self->uri; 1065 1066 ### shell out ### 1067 my $head; 1068 unless(run( command => $cmd, 1069 buffer => \$head, 1070 verbose => $DEBUG ) 1071 ) { 1072 return $self->_error(loc("Command failed: %1", $head || '')); 1073 } 1074 1075 unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) { 1076 return $self->_error(loc("Command failed: %1", $head || '')); 1077 } 1078 } 1079 1080 ### write to the output file ourselves, since lynx ass_u_mes to much 1081 my $local = FileHandle->new( $to, 'w' ) 1082 or return $self->_error(loc( 1083 "Could not open '%1' for writing: %2",$to,$!)); 1084 1085 ### dump to stdout ### 1086 my $cmd = [ 1087 $lynx, 1088 '-source', 1089 "-auth=anonymous:$FROM_EMAIL", 1090 ]; 1091 1092 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; 1093 1094 ### DO NOT quote things for IPC::Run, it breaks stuff. 1095 push @$cmd, $self->uri; 1096 1097 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1098 ### and there's no need for special casing any more. 1099 ### DO NOT quote things for IPC::Run, it breaks stuff. 1100 # $IPC::Cmd::USE_IPC_RUN 1101 # ? $self->uri 1102 # : QUOTE. $self->uri .QUOTE; 1103 1104 1105 ### shell out ### 1106 my $captured; 1107 unless(run( command => $cmd, 1108 buffer => \$captured, 1109 verbose => $DEBUG ) 1110 ) { 1111 return $self->_error(loc("Command failed: %1", $captured || '')); 1112 } 1113 1114 ### print to local file ### 1115 ### XXX on a 404 with a special error page, $captured will actually 1116 ### hold the contents of that page, and make it *appear* like the 1117 ### request was a success, when really it wasn't :( 1118 ### there doesn't seem to be an option for lynx to change the exit 1119 ### code based on a 4XX status or so. 1120 ### the closest we can come is using --error_file and parsing that, 1121 ### which is very unreliable ;( 1122 $local->print( $captured ); 1123 $local->close or return; 1124 1125 return $to; 1126} 1127 1128### use /bin/ncftp to fetch files 1129sub _ncftp_fetch { 1130 my $self = shift; 1131 my %hash = @_; 1132 1133 my ($to); 1134 my $tmpl = { 1135 to => { required => 1, store => \$to } 1136 }; 1137 check( $tmpl, \%hash ) or return; 1138 1139 ### we can only set passive mode in interactive sessions, so bail out 1140 ### if $FTP_PASSIVE is set 1141 return if $FTP_PASSIVE; 1142 1143 ### see if we have a ncftp binary ### 1144 my $ncftp; 1145 unless( $ncftp = can_run('ncftp') ) { 1146 $METHOD_FAIL->{'ncftp'} = 1; 1147 return; 1148 } 1149 1150 my $cmd = [ 1151 $ncftp, 1152 '-V', # do not be verbose 1153 '-p', $FROM_EMAIL, # email as password 1154 $self->host, # hostname 1155 dirname($to), # local dir for the file 1156 # remote path to the file 1157 ### DO NOT quote things for IPC::Run, it breaks stuff. 1158 $IPC::Cmd::USE_IPC_RUN 1159 ? File::Spec::Unix->catdir( $self->path, $self->file ) 1160 : QUOTE. File::Spec::Unix->catdir( 1161 $self->path, $self->file ) .QUOTE 1162 1163 ]; 1164 1165 ### shell out ### 1166 my $captured; 1167 unless(run( command => $cmd, 1168 buffer => \$captured, 1169 verbose => $DEBUG ) 1170 ) { 1171 return $self->_error(loc("Command failed: %1", $captured || '')); 1172 } 1173 1174 return $to; 1175 1176} 1177 1178### use /bin/curl to fetch files 1179sub _curl_fetch { 1180 my $self = shift; 1181 my %hash = @_; 1182 1183 my ($to); 1184 my $tmpl = { 1185 to => { required => 1, store => \$to } 1186 }; 1187 check( $tmpl, \%hash ) or return; 1188 my $curl; 1189 unless ( $curl = can_run('curl') ) { 1190 $METHOD_FAIL->{'curl'} = 1; 1191 return; 1192 } 1193 1194 ### these long opts are self explanatory - I like that -jmb 1195 my $cmd = [ $curl, '-q' ]; 1196 1197 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT; 1198 1199 push(@$cmd, '--silent') unless $DEBUG; 1200 1201 ### curl does the right thing with passive, regardless ### 1202 if ($self->scheme eq 'ftp') { 1203 push(@$cmd, '--user', "anonymous:$FROM_EMAIL"); 1204 } 1205 1206 ### curl doesn't follow 302 (temporarily moved) etc automatically 1207 ### so we add --location to enable that. 1208 push @$cmd, '--fail', '--location', '--output', $to, $self->uri; 1209 1210 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1211 ### and there's no need for special casing any more. 1212 ### DO NOT quote things for IPC::Run, it breaks stuff. 1213 # $IPC::Cmd::USE_IPC_RUN 1214 # ? ($to, $self->uri) 1215 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 1216 1217 1218 my $captured; 1219 unless(run( command => $cmd, 1220 buffer => \$captured, 1221 verbose => $DEBUG ) 1222 ) { 1223 1224 return $self->_error(loc("Command failed: %1", $captured || '')); 1225 } 1226 1227 return $to; 1228 1229} 1230 1231### /usr/bin/fetch fetch! ### 1232sub _fetch_fetch { 1233 my $self = shift; 1234 my %hash = @_; 1235 1236 my ($to); 1237 my $tmpl = { 1238 to => { required => 1, store => \$to } 1239 }; 1240 check( $tmpl, \%hash ) or return; 1241 1242 ### see if we have a fetch binary ### 1243 my $fetch; 1244 unless( HAS_FETCH and $fetch = can_run('fetch') ) { 1245 $METHOD_FAIL->{'fetch'} = 1; 1246 return; 1247 } 1248 1249 ### no verboseness, thanks ### 1250 my $cmd = [ $fetch, '-q' ]; 1251 1252 ### if a timeout is set, add it ### 1253 push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT; 1254 1255 ### run passive if specified ### 1256 #push @$cmd, '-p' if $FTP_PASSIVE; 1257 local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE; 1258 1259 ### set the output document, add the uri ### 1260 push @$cmd, '-o', $to, $self->uri; 1261 1262 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1263 ### and there's no need for special casing any more. 1264 ### DO NOT quote things for IPC::Run, it breaks stuff. 1265 # $IPC::Cmd::USE_IPC_RUN 1266 # ? ($to, $self->uri) 1267 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 1268 1269 ### shell out ### 1270 my $captured; 1271 unless(run( command => $cmd, 1272 buffer => \$captured, 1273 verbose => $DEBUG 1274 )) { 1275 ### wget creates the output document always, even if the fetch 1276 ### fails.. so unlink it in that case 1277 1 while unlink $to; 1278 1279 return $self->_error(loc( "Command failed: %1", $captured || '' )); 1280 } 1281 1282 return $to; 1283} 1284 1285### use File::Copy for fetching file:// urls ### 1286### 1287### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html) 1288### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://) 1289### 1290 1291sub _file_fetch { 1292 my $self = shift; 1293 my %hash = @_; 1294 1295 my ($to); 1296 my $tmpl = { 1297 to => { required => 1, store => \$to } 1298 }; 1299 check( $tmpl, \%hash ) or return; 1300 1301 1302 1303 ### prefix a / on unix systems with a file uri, since it would 1304 ### look somewhat like this: 1305 ### file:///home/kane/file 1306 ### whereas windows file uris for 'c:\some\dir\file' might look like: 1307 ### file:///C:/some/dir/file 1308 ### file:///C|/some/dir/file 1309 ### or for a network share '\\host\share\some\dir\file': 1310 ### file:////host/share/some/dir/file 1311 ### 1312 ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like: 1313 ### file://vms.host.edu/disk$user/my/notes/note12345.txt 1314 ### 1315 1316 my $path = $self->path; 1317 my $vol = $self->vol; 1318 my $share = $self->share; 1319 1320 my $remote; 1321 if (!$share and $self->host) { 1322 return $self->_error(loc( 1323 "Currently %1 cannot handle hosts in %2 urls", 1324 'File::Fetch', 'file://' 1325 )); 1326 } 1327 1328 if( $vol ) { 1329 $path = File::Spec->catdir( split /\//, $path ); 1330 $remote = File::Spec->catpath( $vol, $path, $self->file); 1331 1332 } elsif( $share ) { 1333 ### win32 specific, and a share name, so we wont bother with File::Spec 1334 $path =~ s|/+|\\|g; 1335 $remote = "\\\\".$self->host."\\$share\\$path"; 1336 1337 } else { 1338 ### File::Spec on VMS can not currently handle UNIX syntax. 1339 my $file_class = ON_VMS 1340 ? 'File::Spec::Unix' 1341 : 'File::Spec'; 1342 1343 $remote = $file_class->catfile( $path, $self->file ); 1344 } 1345 1346 ### File::Copy is littered with 'die' statements :( ### 1347 my $rv = eval { File::Copy::copy( $remote, $to ) }; 1348 1349 ### something went wrong ### 1350 if( !$rv or $@ ) { 1351 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4", 1352 $remote, $to, $!, $@)); 1353 } 1354 1355 return $to; 1356} 1357 1358### use /usr/bin/rsync to fetch files 1359sub _rsync_fetch { 1360 my $self = shift; 1361 my %hash = @_; 1362 1363 my ($to); 1364 my $tmpl = { 1365 to => { required => 1, store => \$to } 1366 }; 1367 check( $tmpl, \%hash ) or return; 1368 my $rsync; 1369 unless ( $rsync = can_run('rsync') ) { 1370 $METHOD_FAIL->{'rsync'} = 1; 1371 return; 1372 } 1373 1374 my $cmd = [ $rsync ]; 1375 1376 ### XXX: rsync has no I/O timeouts at all, by default 1377 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; 1378 1379 push(@$cmd, '--quiet') unless $DEBUG; 1380 1381 ### DO NOT quote things for IPC::Run, it breaks stuff. 1382 push @$cmd, $self->uri, $to; 1383 1384 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1385 ### and there's no need for special casing any more. 1386 ### DO NOT quote things for IPC::Run, it breaks stuff. 1387 # $IPC::Cmd::USE_IPC_RUN 1388 # ? ($to, $self->uri) 1389 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 1390 1391 my $captured; 1392 unless(run( command => $cmd, 1393 buffer => \$captured, 1394 verbose => $DEBUG ) 1395 ) { 1396 1397 return $self->_error(loc("Command %1 failed: %2", 1398 "@$cmd" || '', $captured || '')); 1399 } 1400 1401 return $to; 1402 1403} 1404 1405################################# 1406# 1407# Error code 1408# 1409################################# 1410 1411=pod 1412 1413=head2 $ff->error([BOOL]) 1414 1415Returns the last encountered error as string. 1416Pass it a true value to get the C<Carp::longmess()> output instead. 1417 1418=cut 1419 1420### error handling the way Archive::Extract does it 1421sub _error { 1422 my $self = shift; 1423 my $error = shift; 1424 1425 $self->_error_msg( $error ); 1426 $self->_error_msg_long( Carp::longmess($error) ); 1427 1428 if( $WARN ) { 1429 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg; 1430 } 1431 1432 return; 1433} 1434 1435sub error { 1436 my $self = shift; 1437 return shift() ? $self->_error_msg_long : $self->_error_msg; 1438} 1439 1440 14411; 1442 1443=pod 1444 1445=head1 HOW IT WORKS 1446 1447File::Fetch is able to fetch a variety of uris, by using several 1448external programs and modules. 1449 1450Below is a mapping of what utilities will be used in what order 1451for what schemes, if available: 1452 1453 file => LWP, lftp, file 1454 http => LWP, HTTP::Lite, wget, curl, lftp, fetch, lynx, iosock 1455 ftp => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp 1456 rsync => rsync 1457 1458If you'd like to disable the use of one or more of these utilities 1459and/or modules, see the C<$BLACKLIST> variable further down. 1460 1461If a utility or module isn't available, it will be marked in a cache 1462(see the C<$METHOD_FAIL> variable further down), so it will not be 1463tried again. The C<fetch> method will only fail when all options are 1464exhausted, and it was not able to retrieve the file. 1465 1466The C<fetch> utility is available on FreeBSD. NetBSD and Dragonfly BSD 1467may also have it from C<pkgsrc>. We only check for C<fetch> on those 1468three platforms. 1469 1470C<iosock> is a very limited L<IO::Socket::INET> based mechanism for 1471retrieving C<http> schemed urls. It doesn't follow redirects for instance. 1472 1473A special note about fetching files from an ftp uri: 1474 1475By default, all ftp connections are done in passive mode. To change 1476that, see the C<$FTP_PASSIVE> variable further down. 1477 1478Furthermore, ftp uris only support anonymous connections, so no 1479named user/password pair can be passed along. 1480 1481C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable 1482further down. 1483 1484=head1 GLOBAL VARIABLES 1485 1486The behaviour of File::Fetch can be altered by changing the following 1487global variables: 1488 1489=head2 $File::Fetch::FROM_EMAIL 1490 1491This is the email address that will be sent as your anonymous ftp 1492password. 1493 1494Default is C<File-Fetch@example.com>. 1495 1496=head2 $File::Fetch::USER_AGENT 1497 1498This is the useragent as C<LWP> will report it. 1499 1500Default is C<File::Fetch/$VERSION>. 1501 1502=head2 $File::Fetch::FTP_PASSIVE 1503 1504This variable controls whether the environment variable C<FTP_PASSIVE> 1505and any passive switches to commandline tools will be set to true. 1506 1507Default value is 1. 1508 1509Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch 1510files, since passive mode can only be set interactively for this binary 1511 1512=head2 $File::Fetch::TIMEOUT 1513 1514When set, controls the network timeout (counted in seconds). 1515 1516Default value is 0. 1517 1518=head2 $File::Fetch::WARN 1519 1520This variable controls whether errors encountered internally by 1521C<File::Fetch> should be C<carp>'d or not. 1522 1523Set to false to silence warnings. Inspect the output of the C<error()> 1524method manually to see what went wrong. 1525 1526Defaults to C<true>. 1527 1528=head2 $File::Fetch::DEBUG 1529 1530This enables debugging output when calling commandline utilities to 1531fetch files. 1532This also enables C<Carp::longmess> errors, instead of the regular 1533C<carp> errors. 1534 1535Good for tracking down why things don't work with your particular 1536setup. 1537 1538Default is 0. 1539 1540=head2 $File::Fetch::BLACKLIST 1541 1542This is an array ref holding blacklisted modules/utilities for fetching 1543files with. 1544 1545To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could 1546set $File::Fetch::BLACKLIST to: 1547 1548 $File::Fetch::BLACKLIST = [qw|lwp netftp|] 1549 1550The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable. 1551 1552See the note on C<MAPPING> below. 1553 1554=head2 $File::Fetch::METHOD_FAIL 1555 1556This is a hashref registering what modules/utilities were known to fail 1557for fetching files (mostly because they weren't installed). 1558 1559You can reset this cache by assigning an empty hashref to it, or 1560individually remove keys. 1561 1562See the note on C<MAPPING> below. 1563 1564=head1 MAPPING 1565 1566 1567Here's a quick mapping for the utilities/modules, and their names for 1568the $BLACKLIST, $METHOD_FAIL and other internal functions. 1569 1570 LWP => lwp 1571 HTTP::Lite => httplite 1572 HTTP::Tiny => httptiny 1573 Net::FTP => netftp 1574 wget => wget 1575 lynx => lynx 1576 ncftp => ncftp 1577 ftp => ftp 1578 curl => curl 1579 rsync => rsync 1580 lftp => lftp 1581 fetch => fetch 1582 IO::Socket => iosock 1583 1584=head1 FREQUENTLY ASKED QUESTIONS 1585 1586=head2 So how do I use a proxy with File::Fetch? 1587 1588C<File::Fetch> currently only supports proxies with LWP::UserAgent. 1589You will need to set your environment variables accordingly. For 1590example, to use an ftp proxy: 1591 1592 $ENV{ftp_proxy} = 'foo.com'; 1593 1594Refer to the LWP::UserAgent manpage for more details. 1595 1596=head2 I used 'lynx' to fetch a file, but its contents is all wrong! 1597 1598C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>, 1599which we in turn capture. If that content is a 'custom' error file 1600(like, say, a C<404 handler>), you will get that contents instead. 1601 1602Sadly, C<lynx> doesn't support any options to return a different exit 1603code on non-C<200 OK> status, giving us no way to tell the difference 1604between a 'successful' fetch and a custom error page. 1605 1606Therefor, we recommend to only use C<lynx> as a last resort. This is 1607why it is at the back of our list of methods to try as well. 1608 1609=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do? 1610 1611C<File::Fetch> is relatively smart about things. When trying to write 1612a file to disk, it removes the C<query parameters> (see the 1613C<output_file> method for details) from the file name before creating 1614it. In most cases this suffices. 1615 1616If you have any other characters you need to escape, please install 1617the C<URI::Escape> module from CPAN, and pre-encode your URI before 1618passing it to C<File::Fetch>. You can read about the details of URIs 1619and URI encoding here: 1620 1621 http://www.faqs.org/rfcs/rfc2396.html 1622 1623=head1 TODO 1624 1625=over 4 1626 1627=item Implement $PREFER_BIN 1628 1629To indicate to rather use commandline tools than modules 1630 1631=back 1632 1633=head1 BUG REPORTS 1634 1635Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>. 1636 1637=head1 AUTHOR 1638 1639This module by Jos Boumans E<lt>kane@cpan.orgE<gt>. 1640 1641=head1 COPYRIGHT 1642 1643This library is free software; you may redistribute and/or modify it 1644under the same terms as Perl itself. 1645 1646 1647=cut 1648 1649# Local variables: 1650# c-indentation-style: bsd 1651# c-basic-offset: 4 1652# indent-tabs-mode: nil 1653# End: 1654# vim: expandtab shiftwidth=4: 1655 1656 1657 1658 1659