1package File::Fetch; 2 3use strict; 4use FileHandle; 5use File::Temp; 6use File::Copy; 7use File::Spec; 8use File::Spec::Unix; 9use File::Basename qw[dirname]; 10 11use Cwd qw[cwd]; 12use Carp qw[carp]; 13use IPC::Cmd qw[can_run run QUOTE]; 14use File::Path qw[mkpath]; 15use File::Temp qw[tempdir]; 16use Params::Check qw[check]; 17use Module::Load::Conditional qw[can_load]; 18use Locale::Maketext::Simple Style => 'gettext'; 19 20use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT 21 $BLACKLIST $METHOD_FAIL $VERSION $METHODS 22 $FTP_PASSIVE $TIMEOUT $DEBUG $WARN 23 ]; 24 25$VERSION = '0.32'; 26$VERSION = eval $VERSION; # avoid warnings with development releases 27$PREFER_BIN = 0; # XXX TODO implement 28$FROM_EMAIL = 'File-Fetch@example.com'; 29$USER_AGENT = "File::Fetch/$VERSION"; 30$BLACKLIST = [qw|ftp|]; 31$METHOD_FAIL = { }; 32$FTP_PASSIVE = 1; 33$TIMEOUT = 0; 34$DEBUG = 0; 35$WARN = 1; 36 37### methods available to fetch the file depending on the scheme 38$METHODS = { 39 http => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ], 40 ftp => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ], 41 file => [ qw|lwp lftp file| ], 42 rsync => [ qw|rsync| ] 43}; 44 45### silly warnings ### 46local $Params::Check::VERBOSE = 1; 47local $Params::Check::VERBOSE = 1; 48local $Module::Load::Conditional::VERBOSE = 0; 49local $Module::Load::Conditional::VERBOSE = 0; 50 51### see what OS we are on, important for file:// uris ### 52use constant ON_WIN => ($^O eq 'MSWin32'); 53use constant ON_VMS => ($^O eq 'VMS'); 54use constant ON_UNIX => (!ON_WIN); 55use constant HAS_VOL => (ON_WIN); 56use constant HAS_SHARE => (ON_WIN); 57use constant HAS_FETCH => ( $^O =~ m!^(freebsd|netbsd|dragonfly)$! ); 58 59=pod 60 61=head1 NAME 62 63File::Fetch - A generic file fetching mechanism 64 65=head1 SYNOPSIS 66 67 use File::Fetch; 68 69 ### build a File::Fetch object ### 70 my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt'); 71 72 ### fetch the uri to cwd() ### 73 my $where = $ff->fetch() or die $ff->error; 74 75 ### fetch the uri to /tmp ### 76 my $where = $ff->fetch( to => '/tmp' ); 77 78 ### parsed bits from the uri ### 79 $ff->uri; 80 $ff->scheme; 81 $ff->host; 82 $ff->path; 83 $ff->file; 84 85=head1 DESCRIPTION 86 87File::Fetch is a generic file fetching mechanism. 88 89It allows you to fetch any file pointed to by a C<ftp>, C<http>, 90C<file>, or C<rsync> uri by a number of different means. 91 92See the C<HOW IT WORKS> section further down for details. 93 94=head1 ACCESSORS 95 96A C<File::Fetch> object has the following accessors 97 98=over 4 99 100=item $ff->uri 101 102The uri you passed to the constructor 103 104=item $ff->scheme 105 106The scheme from the uri (like 'file', 'http', etc) 107 108=item $ff->host 109 110The hostname in the uri. Will be empty if host was originally 111'localhost' for a 'file://' url. 112 113=item $ff->vol 114 115On operating systems with the concept of a volume the second element 116of a file:// is considered to the be volume specification for the file. 117Thus on Win32 this routine returns the volume, on other operating 118systems this returns nothing. 119 120On Windows this value may be empty if the uri is to a network share, in 121which case the 'share' property will be defined. Additionally, volume 122specifications that use '|' as ':' will be converted on read to use ':'. 123 124On VMS, which has a volume concept, this field will be empty because VMS 125file specifications are converted to absolute UNIX format and the volume 126information is transparently included. 127 128=item $ff->share 129 130On systems with the concept of a network share (currently only Windows) returns 131the sharename from a file://// url. On other operating systems returns empty. 132 133=item $ff->path 134 135The path from the uri, will be at least a single '/'. 136 137=item $ff->file 138 139The name of the remote file. For the local file name, the 140result of $ff->output_file will be used. 141 142=cut 143 144 145########################## 146### Object & Accessors ### 147########################## 148 149{ 150 ### template for autogenerated accessors ### 151 my $Tmpl = { 152 scheme => { default => 'http' }, 153 host => { default => 'localhost' }, 154 path => { default => '/' }, 155 file => { required => 1 }, 156 uri => { required => 1 }, 157 vol => { default => '' }, # windows for file:// uris 158 share => { default => '' }, # windows for file:// uris 159 _error_msg => { no_override => 1 }, 160 _error_msg_long => { no_override => 1 }, 161 }; 162 163 for my $method ( keys %$Tmpl ) { 164 no strict 'refs'; 165 *$method = sub { 166 my $self = shift; 167 $self->{$method} = $_[0] if @_; 168 return $self->{$method}; 169 } 170 } 171 172 sub _create { 173 my $class = shift; 174 my %hash = @_; 175 176 my $args = check( $Tmpl, \%hash ) or return; 177 178 bless $args, $class; 179 180 if( lc($args->scheme) ne 'file' and not $args->host ) { 181 return $class->_error(loc( 182 "Hostname required when fetching from '%1'",$args->scheme)); 183 } 184 185 for (qw[path file]) { 186 unless( $args->$_() ) { # 5.5.x needs the () 187 return $class->_error(loc("No '%1' specified",$_)); 188 } 189 } 190 191 return $args; 192 } 193} 194 195=item $ff->output_file 196 197The name of the output file. This is the same as $ff->file, 198but any query parameters are stripped off. For example: 199 200 http://example.com/index.html?x=y 201 202would make the output file be C<index.html> rather than 203C<index.html?x=y>. 204 205=back 206 207=cut 208 209sub output_file { 210 my $self = shift; 211 my $file = $self->file; 212 213 $file =~ s/\?.*$//g; 214 215 return $file; 216} 217 218### XXX do this or just point to URI::Escape? 219# =head2 $esc_uri = $ff->escaped_uri 220# 221# =cut 222# 223# ### most of this is stolen straight from URI::escape 224# { ### Build a char->hex map 225# my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255; 226# 227# sub escaped_uri { 228# my $self = shift; 229# my $uri = $self->uri; 230# 231# ### Default unsafe characters. RFC 2732 ^(uric - reserved) 232# $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/ 233# $escapes{$1} || $self->_fail_hi($1)/ge; 234# 235# return $uri; 236# } 237# 238# sub _fail_hi { 239# my $self = shift; 240# my $char = shift; 241# 242# $self->_error(loc( 243# "Can't escape '%1', try using the '%2' module instead", 244# sprintf("\\x{%04X}", ord($char)), 'URI::Escape' 245# )); 246# } 247# 248# sub output_file { 249# 250# } 251# 252# 253# } 254 255=head1 METHODS 256 257=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' ); 258 259Parses the uri and creates a corresponding File::Fetch::Item object, 260that is ready to be C<fetch>ed and returns it. 261 262Returns false on failure. 263 264=cut 265 266sub new { 267 my $class = shift; 268 my %hash = @_; 269 270 my ($uri); 271 my $tmpl = { 272 uri => { required => 1, store => \$uri }, 273 }; 274 275 check( $tmpl, \%hash ) or return; 276 277 ### parse the uri to usable parts ### 278 my $href = $class->_parse_uri( $uri ) or return; 279 280 ### make it into a FFI object ### 281 my $ff = $class->_create( %$href ) or return; 282 283 284 ### return the object ### 285 return $ff; 286} 287 288### parses an uri to a hash structure: 289### 290### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' ) 291### 292### becomes: 293### 294### $href = { 295### scheme => 'ftp', 296### host => 'ftp.cpan.org', 297### path => '/pub/mirror', 298### file => 'index.html' 299### }; 300### 301### In the case of file:// urls there maybe be additional fields 302### 303### For systems with volume specifications such as Win32 there will be 304### a volume specifier provided in the 'vol' field. 305### 306### 'vol' => 'volumename' 307### 308### For windows file shares there may be a 'share' key specified 309### 310### 'share' => 'sharename' 311### 312### Note that the rules of what a file:// url means vary by the operating system 313### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious 314### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and 315### not '/foo/bar.txt' 316### 317### Similarly if the host interpreting the url is VMS then 318### file:///disk$user/my/notes/note12345.txt' means 319### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as 320### if it is unix where it means /disk$user/my/notes/note12345.txt'. 321### Except for some cases in the File::Spec methods, Perl on VMS will generally 322### handle UNIX format file specifications. 323### 324### This means it is impossible to serve certain file:// urls on certain systems. 325### 326### Thus are the problems with a protocol-less specification. :-( 327### 328 329sub _parse_uri { 330 my $self = shift; 331 my $uri = shift or return; 332 333 my $href = { uri => $uri }; 334 335 ### find the scheme ### 336 $uri =~ s|^(\w+)://||; 337 $href->{scheme} = $1; 338 339 ### See rfc 1738 section 3.10 340 ### http://www.faqs.org/rfcs/rfc1738.html 341 ### And wikipedia for more on windows file:// urls 342 ### http://en.wikipedia.org/wiki/File:// 343 if( $href->{scheme} eq 'file' ) { 344 345 my @parts = split '/',$uri; 346 347 ### file://hostname/... 348 ### file://hostname/... 349 ### normalize file://localhost with file:/// 350 $href->{host} = $parts[0] || ''; 351 352 ### index in @parts where the path components begin; 353 my $index = 1; 354 355 ### file:////hostname/sharename/blah.txt 356 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) { 357 358 $href->{host} = $parts[2] || ''; # avoid warnings 359 $href->{share} = $parts[3] || ''; # avoid warnings 360 361 $index = 4 # index after the share 362 363 ### file:///D|/blah.txt 364 ### file:///D:/blah.txt 365 } elsif (HAS_VOL) { 366 367 ### this code comes from dmq's patch, but: 368 ### XXX if volume is empty, wouldn't that be an error? --kane 369 ### if so, our file://localhost test needs to be fixed as wel 370 $href->{vol} = $parts[1] || ''; 371 372 ### correct D| style colume descriptors 373 $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN; 374 375 $index = 2; # index after the volume 376 } 377 378 ### rebuild the path from the leftover parts; 379 $href->{path} = join '/', '', splice( @parts, $index, $#parts ); 380 381 } else { 382 ### using anything but qw() in hash slices may produce warnings 383 ### in older perls :-( 384 @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s; 385 } 386 387 ### split the path into file + dir ### 388 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} ); 389 $href->{path} = $parts[1]; 390 $href->{file} = $parts[2]; 391 } 392 393 ### host will be empty if the target was 'localhost' and the 394 ### scheme was 'file' 395 $href->{host} = '' if ($href->{host} eq 'localhost') and 396 ($href->{scheme} eq 'file'); 397 398 return $href; 399} 400 401=head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] ) 402 403Fetches the file you requested and returns the full path to the file. 404 405By default it writes to C<cwd()>, but you can override that by specifying 406the C<to> argument: 407 408 ### file fetch to /tmp, full path to the file in $where 409 $where = $ff->fetch( to => '/tmp' ); 410 411 ### file slurped into $scalar, full path to the file in $where 412 ### file is downloaded to a temp directory and cleaned up at exit time 413 $where = $ff->fetch( to => \$scalar ); 414 415Returns the full path to the downloaded file on success, and false 416on failure. 417 418=cut 419 420sub fetch { 421 my $self = shift or return; 422 my %hash = @_; 423 424 my $target; 425 my $tmpl = { 426 to => { default => cwd(), store => \$target }, 427 }; 428 429 check( $tmpl, \%hash ) or return; 430 431 my ($to, $fh); 432 ### you want us to slurp the contents 433 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) { 434 $to = tempdir( 'FileFetch.XXXXXX', CLEANUP => 1 ); 435 436 ### plain old fetch 437 } else { 438 $to = $target; 439 440 ### On VMS force to VMS format so File::Spec will work. 441 $to = VMS::Filespec::vmspath($to) if ON_VMS; 442 443 ### create the path if it doesn't exist yet ### 444 unless( -d $to ) { 445 eval { mkpath( $to ) }; 446 447 return $self->_error(loc("Could not create path '%1'",$to)) if $@; 448 } 449 } 450 451 ### set passive ftp if required ### 452 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE; 453 454 ### we dont use catfile on win32 because if we are using a cygwin tool 455 ### under cmd.exe they wont understand windows style separators. 456 my $out_to = ON_WIN ? $to.'/'.$self->output_file 457 : File::Spec->catfile( $to, $self->output_file ); 458 459 for my $method ( @{ $METHODS->{$self->scheme} } ) { 460 my $sub = '_'.$method.'_fetch'; 461 462 unless( __PACKAGE__->can($sub) ) { 463 $self->_error(loc("Cannot call method for '%1' -- WEIRD!", 464 $method)); 465 next; 466 } 467 468 ### method is blacklisted ### 469 next if grep { lc $_ eq $method } @$BLACKLIST; 470 471 ### method is known to fail ### 472 next if $METHOD_FAIL->{$method}; 473 474 ### there's serious issues with IPC::Run and quoting of command 475 ### line arguments. using quotes in the wrong place breaks things, 476 ### and in the case of say, 477 ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document 478 ### "index.html" "http://www.cpan.org/index.html?q=1&y=2" 479 ### it doesn't matter how you quote, it always fails. 480 local $IPC::Cmd::USE_IPC_RUN = 0; 481 482 if( my $file = $self->$sub( 483 to => $out_to 484 )){ 485 486 unless( -e $file && -s _ ) { 487 $self->_error(loc("'%1' said it fetched '%2', ". 488 "but it was not created",$method,$file)); 489 490 ### mark the failure ### 491 $METHOD_FAIL->{$method} = 1; 492 493 next; 494 495 } else { 496 497 ### slurp mode? 498 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) { 499 500 ### open the file 501 open my $fh, "<$file" or do { 502 $self->_error( 503 loc("Could not open '%1': %2", $file, $!)); 504 return; 505 }; 506 507 ### slurp 508 $$target = do { local $/; <$fh> }; 509 510 } 511 512 my $abs = File::Spec->rel2abs( $file ); 513 return $abs; 514 515 } 516 } 517 } 518 519 520 ### if we got here, we looped over all methods, but we weren't able 521 ### to fetch it. 522 return; 523} 524 525######################## 526### _*_fetch methods ### 527######################## 528 529### LWP fetching ### 530sub _lwp_fetch { 531 my $self = shift; 532 my %hash = @_; 533 534 my ($to); 535 my $tmpl = { 536 to => { required => 1, store => \$to } 537 }; 538 check( $tmpl, \%hash ) or return; 539 540 ### modules required to download with lwp ### 541 my $use_list = { 542 LWP => '0.0', 543 'LWP::UserAgent' => '0.0', 544 'HTTP::Request' => '0.0', 545 'HTTP::Status' => '0.0', 546 URI => '0.0', 547 548 }; 549 550 if( can_load(modules => $use_list) ) { 551 552 ### setup the uri object 553 my $uri = URI->new( File::Spec::Unix->catfile( 554 $self->path, $self->file 555 ) ); 556 557 ### special rules apply for file:// uris ### 558 $uri->scheme( $self->scheme ); 559 $uri->host( $self->scheme eq 'file' ? '' : $self->host ); 560 $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file'; 561 562 ### set up the useragent object 563 my $ua = LWP::UserAgent->new(); 564 $ua->timeout( $TIMEOUT ) if $TIMEOUT; 565 $ua->agent( $USER_AGENT ); 566 $ua->from( $FROM_EMAIL ); 567 $ua->env_proxy; 568 569 my $res = $ua->mirror($uri, $to) or return; 570 571 ### uptodate or fetched ok ### 572 if ( $res->code == 304 or $res->code == 200 ) { 573 return $to; 574 575 } else { 576 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]", 577 $res->code, HTTP::Status::status_message($res->code), 578 $res->status_line)); 579 } 580 581 } else { 582 $METHOD_FAIL->{'lwp'} = 1; 583 return; 584 } 585} 586 587### HTTP::Tiny fetching ### 588sub _httptiny_fetch { 589 my $self = shift; 590 my %hash = @_; 591 592 my ($to); 593 my $tmpl = { 594 to => { required => 1, store => \$to } 595 }; 596 check( $tmpl, \%hash ) or return; 597 598 my $use_list = { 599 'HTTP::Tiny' => '0.008', 600 601 }; 602 603 if( can_load(modules => $use_list) ) { 604 605 my $uri = $self->uri; 606 607 my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) ); 608 609 my $rc = $http->mirror( $uri, $to ); 610 611 unless ( $rc->{success} ) { 612 613 return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]", 614 $rc->{status}, $rc->{reason} ) ); 615 616 } 617 618 return $to; 619 620 } 621 else { 622 $METHOD_FAIL->{'httptiny'} = 1; 623 return; 624 } 625} 626 627### HTTP::Lite fetching ### 628sub _httplite_fetch { 629 my $self = shift; 630 my %hash = @_; 631 632 my ($to); 633 my $tmpl = { 634 to => { required => 1, store => \$to } 635 }; 636 check( $tmpl, \%hash ) or return; 637 638 ### modules required to download with lwp ### 639 my $use_list = { 640 'HTTP::Lite' => '2.2', 641 642 }; 643 644 # https://github.com/dagolden/cpanpm/compare/master...private%2Fuse-http-lite 645 646 if( can_load(modules => $use_list) ) { 647 648 my $uri = $self->uri; 649 my $retries = 0; 650 651 RETRIES: while ( $retries++ < 5 ) { 652 653 my $http = HTTP::Lite->new(); 654 # Naughty naughty but there isn't any accessor/setter 655 $http->{timeout} = $TIMEOUT if $TIMEOUT; 656 $http->http11_mode(1); 657 658 my $fh = FileHandle->new; 659 660 unless ( $fh->open($to,'>') ) { 661 return $self->_error(loc( 662 "Could not open '%1' for writing: %2",$to,$!)); 663 } 664 665 $fh->autoflush(1); 666 667 binmode $fh; 668 669 my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh ); 670 671 close $fh; 672 673 if ( $rc == 301 || $rc == 302 ) { 674 my $loc; 675 HEADERS: for ($http->headers_array) { 676 /Location: (\S+)/ and $loc = $1, last HEADERS; 677 } 678 #$loc or last; # Think we should squeal here. 679 if ($loc =~ m!^/!) { 680 $uri =~ s{^(\w+?://[^/]+)/.*$}{$1}; 681 $uri .= $loc; 682 } 683 else { 684 $uri = $loc; 685 } 686 next RETRIES; 687 } 688 elsif ( $rc == 200 ) { 689 return $to; 690 } 691 else { 692 return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]", 693 $rc, $http->status_message)); 694 } 695 696 } # Loop for 5 retries. 697 698 return $self->_error("Fetch failed! Gave up after 5 tries"); 699 700 } else { 701 $METHOD_FAIL->{'httplite'} = 1; 702 return; 703 } 704} 705 706### Simple IO::Socket::INET fetching ### 707sub _iosock_fetch { 708 my $self = shift; 709 my %hash = @_; 710 711 my ($to); 712 my $tmpl = { 713 to => { required => 1, store => \$to } 714 }; 715 check( $tmpl, \%hash ) or return; 716 717 my $use_list = { 718 'IO::Socket::INET' => '0.0', 719 'IO::Select' => '0.0', 720 }; 721 722 if( can_load(modules => $use_list) ) { 723 my $sock = IO::Socket::INET->new( 724 PeerHost => $self->host, 725 ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ), 726 ); 727 728 unless ( $sock ) { 729 return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!)); 730 } 731 732 my $fh = FileHandle->new; 733 734 # Check open() 735 736 unless ( $fh->open($to,'>') ) { 737 return $self->_error(loc( 738 "Could not open '%1' for writing: %2",$to,$!)); 739 } 740 741 $fh->autoflush(1); 742 binmode $fh; 743 744 my $path = File::Spec::Unix->catfile( $self->path, $self->file ); 745 my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a"; 746 $sock->send( $req ); 747 748 my $select = IO::Select->new( $sock ); 749 750 my $resp = ''; 751 my $normal = 0; 752 while ( $select->can_read( $TIMEOUT || 60 ) ) { 753 my $ret = $sock->sysread( $resp, 4096, length($resp) ); 754 if ( !defined $ret or $ret == 0 ) { 755 $select->remove( $sock ); 756 $normal++; 757 } 758 } 759 close $sock; 760 761 unless ( $normal ) { 762 return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 ))); 763 } 764 765 # Check the "response" 766 # Strip preceding blank lines apparently they are allowed (RFC 2616 4.1) 767 $resp =~ s/^(\x0d?\x0a)+//; 768 # Check it is an HTTP response 769 unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) { 770 return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host)); 771 } 772 773 # Check for OK 774 my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i; 775 unless ( $code eq '200' ) { 776 return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host)); 777 } 778 779 { 780 local $\; 781 print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0]; 782 } 783 close $fh; 784 return $to; 785 786 } else { 787 $METHOD_FAIL->{'iosock'} = 1; 788 return; 789 } 790} 791 792### Net::FTP fetching 793sub _netftp_fetch { 794 my $self = shift; 795 my %hash = @_; 796 797 my ($to); 798 my $tmpl = { 799 to => { required => 1, store => \$to } 800 }; 801 check( $tmpl, \%hash ) or return; 802 803 ### required modules ### 804 my $use_list = { 'Net::FTP' => 0 }; 805 806 if( can_load( modules => $use_list ) ) { 807 808 ### make connection ### 809 my $ftp; 810 my @options = ($self->host); 811 push(@options, Timeout => $TIMEOUT) if $TIMEOUT; 812 unless( $ftp = Net::FTP->new( @options ) ) { 813 return $self->_error(loc("Ftp creation failed: %1",$@)); 814 } 815 816 ### login ### 817 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) { 818 return $self->_error(loc("Could not login to '%1'",$self->host)); 819 } 820 821 ### set binary mode, just in case ### 822 $ftp->binary; 823 824 ### create the remote path 825 ### remember remote paths are unix paths! [#11483] 826 my $remote = File::Spec::Unix->catfile( $self->path, $self->file ); 827 828 ### fetch the file ### 829 my $target; 830 unless( $target = $ftp->get( $remote, $to ) ) { 831 return $self->_error(loc("Could not fetch '%1' from '%2'", 832 $remote, $self->host)); 833 } 834 835 ### log out ### 836 $ftp->quit; 837 838 return $target; 839 840 } else { 841 $METHOD_FAIL->{'netftp'} = 1; 842 return; 843 } 844} 845 846### /bin/wget fetch ### 847sub _wget_fetch { 848 my $self = shift; 849 my %hash = @_; 850 851 my ($to); 852 my $tmpl = { 853 to => { required => 1, store => \$to } 854 }; 855 check( $tmpl, \%hash ) or return; 856 857 ### see if we have a wget binary ### 858 if( my $wget = can_run('wget') ) { 859 860 ### no verboseness, thanks ### 861 my $cmd = [ $wget, '--quiet' ]; 862 863 ### if a timeout is set, add it ### 864 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; 865 866 ### run passive if specified ### 867 push @$cmd, '--passive-ftp' if $FTP_PASSIVE; 868 869 ### set the output document, add the uri ### 870 push @$cmd, '--output-document', $to, $self->uri; 871 872 ### with IPC::Cmd > 0.41, this is fixed in teh library, 873 ### and there's no need for special casing any more. 874 ### DO NOT quote things for IPC::Run, it breaks stuff. 875 # $IPC::Cmd::USE_IPC_RUN 876 # ? ($to, $self->uri) 877 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 878 879 ### shell out ### 880 my $captured; 881 unless(run( command => $cmd, 882 buffer => \$captured, 883 verbose => $DEBUG 884 )) { 885 ### wget creates the output document always, even if the fetch 886 ### fails.. so unlink it in that case 887 1 while unlink $to; 888 889 return $self->_error(loc( "Command failed: %1", $captured || '' )); 890 } 891 892 return $to; 893 894 } else { 895 $METHOD_FAIL->{'wget'} = 1; 896 return; 897 } 898} 899 900### /bin/lftp fetch ### 901sub _lftp_fetch { 902 my $self = shift; 903 my %hash = @_; 904 905 my ($to); 906 my $tmpl = { 907 to => { required => 1, store => \$to } 908 }; 909 check( $tmpl, \%hash ) or return; 910 911 ### see if we have a wget binary ### 912 if( my $lftp = can_run('lftp') ) { 913 914 ### no verboseness, thanks ### 915 my $cmd = [ $lftp, '-f' ]; 916 917 my $fh = File::Temp->new; 918 919 my $str; 920 921 ### if a timeout is set, add it ### 922 $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT; 923 924 ### run passive if specified ### 925 $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE; 926 927 ### set the output document, add the uri ### 928 ### quote the URI, because lftp supports certain shell 929 ### expansions, most notably & for backgrounding. 930 ### ' quote does nto work, must be " 931 $str .= q[get ']. $self->uri .q[' -o ]. $to . $/; 932 933 if( $DEBUG ) { 934 my $pp_str = join ' ', split $/, $str; 935 print "# lftp command: $pp_str\n"; 936 } 937 938 ### write straight to the file. 939 $fh->autoflush(1); 940 print $fh $str; 941 942 ### the command needs to be 1 string to be executed 943 push @$cmd, $fh->filename; 944 945 ### with IPC::Cmd > 0.41, this is fixed in teh library, 946 ### and there's no need for special casing any more. 947 ### DO NOT quote things for IPC::Run, it breaks stuff. 948 # $IPC::Cmd::USE_IPC_RUN 949 # ? ($to, $self->uri) 950 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 951 952 953 ### shell out ### 954 my $captured; 955 unless(run( command => $cmd, 956 buffer => \$captured, 957 verbose => $DEBUG 958 )) { 959 ### wget creates the output document always, even if the fetch 960 ### fails.. so unlink it in that case 961 1 while unlink $to; 962 963 return $self->_error(loc( "Command failed: %1", $captured || '' )); 964 } 965 966 return $to; 967 968 } else { 969 $METHOD_FAIL->{'lftp'} = 1; 970 return; 971 } 972} 973 974 975 976### /bin/ftp fetch ### 977sub _ftp_fetch { 978 my $self = shift; 979 my %hash = @_; 980 981 my ($to); 982 my $tmpl = { 983 to => { required => 1, store => \$to } 984 }; 985 check( $tmpl, \%hash ) or return; 986 987 ### see if we have a ftp binary ### 988 if( my $ftp = can_run('ftp') ) { 989 990 my $fh = FileHandle->new; 991 992 local $SIG{CHLD} = 'IGNORE'; 993 994 unless ($fh->open("|$ftp -n")) { 995 return $self->_error(loc("%1 creation failed: %2", $ftp, $!)); 996 } 997 998 my @dialog = ( 999 "lcd " . dirname($to), 1000 "open " . $self->host, 1001 "user anonymous $FROM_EMAIL", 1002 "cd /", 1003 "cd " . $self->path, 1004 "binary", 1005 "get " . $self->file . " " . $self->output_file, 1006 "quit", 1007 ); 1008 1009 foreach (@dialog) { $fh->print($_, "\n") } 1010 $fh->close or return; 1011 1012 return $to; 1013 } 1014} 1015 1016### lynx is stupid - it decompresses any .gz file it finds to be text 1017### use /bin/lynx to fetch files 1018sub _lynx_fetch { 1019 my $self = shift; 1020 my %hash = @_; 1021 1022 my ($to); 1023 my $tmpl = { 1024 to => { required => 1, store => \$to } 1025 }; 1026 check( $tmpl, \%hash ) or return; 1027 1028 ### see if we have a lynx binary ### 1029 if( my $lynx = can_run('lynx') ) { 1030 1031 unless( IPC::Cmd->can_capture_buffer ) { 1032 $METHOD_FAIL->{'lynx'} = 1; 1033 1034 return $self->_error(loc( 1035 "Can not capture buffers. Can not use '%1' to fetch files", 1036 'lynx' )); 1037 } 1038 1039 ### check if the HTTP resource exists ### 1040 if ($self->uri =~ /^https?:\/\//i) { 1041 my $cmd = [ 1042 $lynx, 1043 '-head', 1044 '-source', 1045 "-auth=anonymous:$FROM_EMAIL", 1046 ]; 1047 1048 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; 1049 1050 push @$cmd, $self->uri; 1051 1052 ### shell out ### 1053 my $head; 1054 unless(run( command => $cmd, 1055 buffer => \$head, 1056 verbose => $DEBUG ) 1057 ) { 1058 return $self->_error(loc("Command failed: %1", $head || '')); 1059 } 1060 1061 unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) { 1062 return $self->_error(loc("Command failed: %1", $head || '')); 1063 } 1064 } 1065 1066 ### write to the output file ourselves, since lynx ass_u_mes to much 1067 my $local = FileHandle->new(">$to") 1068 or return $self->_error(loc( 1069 "Could not open '%1' for writing: %2",$to,$!)); 1070 1071 ### dump to stdout ### 1072 my $cmd = [ 1073 $lynx, 1074 '-source', 1075 "-auth=anonymous:$FROM_EMAIL", 1076 ]; 1077 1078 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; 1079 1080 ### DO NOT quote things for IPC::Run, it breaks stuff. 1081 push @$cmd, $self->uri; 1082 1083 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1084 ### and there's no need for special casing any more. 1085 ### DO NOT quote things for IPC::Run, it breaks stuff. 1086 # $IPC::Cmd::USE_IPC_RUN 1087 # ? $self->uri 1088 # : QUOTE. $self->uri .QUOTE; 1089 1090 1091 ### shell out ### 1092 my $captured; 1093 unless(run( command => $cmd, 1094 buffer => \$captured, 1095 verbose => $DEBUG ) 1096 ) { 1097 return $self->_error(loc("Command failed: %1", $captured || '')); 1098 } 1099 1100 ### print to local file ### 1101 ### XXX on a 404 with a special error page, $captured will actually 1102 ### hold the contents of that page, and make it *appear* like the 1103 ### request was a success, when really it wasn't :( 1104 ### there doesn't seem to be an option for lynx to change the exit 1105 ### code based on a 4XX status or so. 1106 ### the closest we can come is using --error_file and parsing that, 1107 ### which is very unreliable ;( 1108 $local->print( $captured ); 1109 $local->close or return; 1110 1111 return $to; 1112 1113 } else { 1114 $METHOD_FAIL->{'lynx'} = 1; 1115 return; 1116 } 1117} 1118 1119### use /bin/ncftp to fetch files 1120sub _ncftp_fetch { 1121 my $self = shift; 1122 my %hash = @_; 1123 1124 my ($to); 1125 my $tmpl = { 1126 to => { required => 1, store => \$to } 1127 }; 1128 check( $tmpl, \%hash ) or return; 1129 1130 ### we can only set passive mode in interactive sessions, so bail out 1131 ### if $FTP_PASSIVE is set 1132 return if $FTP_PASSIVE; 1133 1134 ### see if we have a ncftp binary ### 1135 if( my $ncftp = can_run('ncftp') ) { 1136 1137 my $cmd = [ 1138 $ncftp, 1139 '-V', # do not be verbose 1140 '-p', $FROM_EMAIL, # email as password 1141 $self->host, # hostname 1142 dirname($to), # local dir for the file 1143 # remote path to the file 1144 ### DO NOT quote things for IPC::Run, it breaks stuff. 1145 $IPC::Cmd::USE_IPC_RUN 1146 ? File::Spec::Unix->catdir( $self->path, $self->file ) 1147 : QUOTE. File::Spec::Unix->catdir( 1148 $self->path, $self->file ) .QUOTE 1149 1150 ]; 1151 1152 ### shell out ### 1153 my $captured; 1154 unless(run( command => $cmd, 1155 buffer => \$captured, 1156 verbose => $DEBUG ) 1157 ) { 1158 return $self->_error(loc("Command failed: %1", $captured || '')); 1159 } 1160 1161 return $to; 1162 1163 } else { 1164 $METHOD_FAIL->{'ncftp'} = 1; 1165 return; 1166 } 1167} 1168 1169### use /bin/curl to fetch files 1170sub _curl_fetch { 1171 my $self = shift; 1172 my %hash = @_; 1173 1174 my ($to); 1175 my $tmpl = { 1176 to => { required => 1, store => \$to } 1177 }; 1178 check( $tmpl, \%hash ) or return; 1179 1180 if (my $curl = can_run('curl')) { 1181 1182 ### these long opts are self explanatory - I like that -jmb 1183 my $cmd = [ $curl, '-q' ]; 1184 1185 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT; 1186 1187 push(@$cmd, '--silent') unless $DEBUG; 1188 1189 ### curl does the right thing with passive, regardless ### 1190 if ($self->scheme eq 'ftp') { 1191 push(@$cmd, '--user', "anonymous:$FROM_EMAIL"); 1192 } 1193 1194 ### curl doesn't follow 302 (temporarily moved) etc automatically 1195 ### so we add --location to enable that. 1196 push @$cmd, '--fail', '--location', '--output', $to, $self->uri; 1197 1198 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1199 ### and there's no need for special casing any more. 1200 ### DO NOT quote things for IPC::Run, it breaks stuff. 1201 # $IPC::Cmd::USE_IPC_RUN 1202 # ? ($to, $self->uri) 1203 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 1204 1205 1206 my $captured; 1207 unless(run( command => $cmd, 1208 buffer => \$captured, 1209 verbose => $DEBUG ) 1210 ) { 1211 1212 return $self->_error(loc("Command failed: %1", $captured || '')); 1213 } 1214 1215 return $to; 1216 1217 } else { 1218 $METHOD_FAIL->{'curl'} = 1; 1219 return; 1220 } 1221} 1222 1223### /usr/bin/fetch fetch! ### 1224sub _fetch_fetch { 1225 my $self = shift; 1226 my %hash = @_; 1227 1228 my ($to); 1229 my $tmpl = { 1230 to => { required => 1, store => \$to } 1231 }; 1232 check( $tmpl, \%hash ) or return; 1233 1234 ### see if we have a wget binary ### 1235 if( HAS_FETCH and my $fetch = can_run('fetch') ) { 1236 1237 ### no verboseness, thanks ### 1238 my $cmd = [ $fetch, '-q' ]; 1239 1240 ### if a timeout is set, add it ### 1241 push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT; 1242 1243 ### run passive if specified ### 1244 #push @$cmd, '-p' if $FTP_PASSIVE; 1245 local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE; 1246 1247 ### set the output document, add the uri ### 1248 push @$cmd, '-o', $to, $self->uri; 1249 1250 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1251 ### and there's no need for special casing any more. 1252 ### DO NOT quote things for IPC::Run, it breaks stuff. 1253 # $IPC::Cmd::USE_IPC_RUN 1254 # ? ($to, $self->uri) 1255 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 1256 1257 ### shell out ### 1258 my $captured; 1259 unless(run( command => $cmd, 1260 buffer => \$captured, 1261 verbose => $DEBUG 1262 )) { 1263 ### wget creates the output document always, even if the fetch 1264 ### fails.. so unlink it in that case 1265 1 while unlink $to; 1266 1267 return $self->_error(loc( "Command failed: %1", $captured || '' )); 1268 } 1269 1270 return $to; 1271 1272 } else { 1273 $METHOD_FAIL->{'wget'} = 1; 1274 return; 1275 } 1276} 1277 1278### use File::Copy for fetching file:// urls ### 1279### 1280### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html) 1281### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://) 1282### 1283 1284sub _file_fetch { 1285 my $self = shift; 1286 my %hash = @_; 1287 1288 my ($to); 1289 my $tmpl = { 1290 to => { required => 1, store => \$to } 1291 }; 1292 check( $tmpl, \%hash ) or return; 1293 1294 1295 1296 ### prefix a / on unix systems with a file uri, since it would 1297 ### look somewhat like this: 1298 ### file:///home/kane/file 1299 ### whereas windows file uris for 'c:\some\dir\file' might look like: 1300 ### file:///C:/some/dir/file 1301 ### file:///C|/some/dir/file 1302 ### or for a network share '\\host\share\some\dir\file': 1303 ### file:////host/share/some/dir/file 1304 ### 1305 ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like: 1306 ### file://vms.host.edu/disk$user/my/notes/note12345.txt 1307 ### 1308 1309 my $path = $self->path; 1310 my $vol = $self->vol; 1311 my $share = $self->share; 1312 1313 my $remote; 1314 if (!$share and $self->host) { 1315 return $self->_error(loc( 1316 "Currently %1 cannot handle hosts in %2 urls", 1317 'File::Fetch', 'file://' 1318 )); 1319 } 1320 1321 if( $vol ) { 1322 $path = File::Spec->catdir( split /\//, $path ); 1323 $remote = File::Spec->catpath( $vol, $path, $self->file); 1324 1325 } elsif( $share ) { 1326 ### win32 specific, and a share name, so we wont bother with File::Spec 1327 $path =~ s|/+|\\|g; 1328 $remote = "\\\\".$self->host."\\$share\\$path"; 1329 1330 } else { 1331 ### File::Spec on VMS can not currently handle UNIX syntax. 1332 my $file_class = ON_VMS 1333 ? 'File::Spec::Unix' 1334 : 'File::Spec'; 1335 1336 $remote = $file_class->catfile( $path, $self->file ); 1337 } 1338 1339 ### File::Copy is littered with 'die' statements :( ### 1340 my $rv = eval { File::Copy::copy( $remote, $to ) }; 1341 1342 ### something went wrong ### 1343 if( !$rv or $@ ) { 1344 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4", 1345 $remote, $to, $!, $@)); 1346 } 1347 1348 return $to; 1349} 1350 1351### use /usr/bin/rsync to fetch files 1352sub _rsync_fetch { 1353 my $self = shift; 1354 my %hash = @_; 1355 1356 my ($to); 1357 my $tmpl = { 1358 to => { required => 1, store => \$to } 1359 }; 1360 check( $tmpl, \%hash ) or return; 1361 1362 if (my $rsync = can_run('rsync')) { 1363 1364 my $cmd = [ $rsync ]; 1365 1366 ### XXX: rsync has no I/O timeouts at all, by default 1367 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; 1368 1369 push(@$cmd, '--quiet') unless $DEBUG; 1370 1371 ### DO NOT quote things for IPC::Run, it breaks stuff. 1372 push @$cmd, $self->uri, $to; 1373 1374 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1375 ### and there's no need for special casing any more. 1376 ### DO NOT quote things for IPC::Run, it breaks stuff. 1377 # $IPC::Cmd::USE_IPC_RUN 1378 # ? ($to, $self->uri) 1379 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 1380 1381 my $captured; 1382 unless(run( command => $cmd, 1383 buffer => \$captured, 1384 verbose => $DEBUG ) 1385 ) { 1386 1387 return $self->_error(loc("Command %1 failed: %2", 1388 "@$cmd" || '', $captured || '')); 1389 } 1390 1391 return $to; 1392 1393 } else { 1394 $METHOD_FAIL->{'rsync'} = 1; 1395 return; 1396 } 1397} 1398 1399################################# 1400# 1401# Error code 1402# 1403################################# 1404 1405=pod 1406 1407=head2 $ff->error([BOOL]) 1408 1409Returns the last encountered error as string. 1410Pass it a true value to get the C<Carp::longmess()> output instead. 1411 1412=cut 1413 1414### error handling the way Archive::Extract does it 1415sub _error { 1416 my $self = shift; 1417 my $error = shift; 1418 1419 $self->_error_msg( $error ); 1420 $self->_error_msg_long( Carp::longmess($error) ); 1421 1422 if( $WARN ) { 1423 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg; 1424 } 1425 1426 return; 1427} 1428 1429sub error { 1430 my $self = shift; 1431 return shift() ? $self->_error_msg_long : $self->_error_msg; 1432} 1433 1434 14351; 1436 1437=pod 1438 1439=head1 HOW IT WORKS 1440 1441File::Fetch is able to fetch a variety of uris, by using several 1442external programs and modules. 1443 1444Below is a mapping of what utilities will be used in what order 1445for what schemes, if available: 1446 1447 file => LWP, lftp, file 1448 http => LWP, HTTP::Lite, wget, curl, lftp, fetch, lynx, iosock 1449 ftp => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp 1450 rsync => rsync 1451 1452If you'd like to disable the use of one or more of these utilities 1453and/or modules, see the C<$BLACKLIST> variable further down. 1454 1455If a utility or module isn't available, it will be marked in a cache 1456(see the C<$METHOD_FAIL> variable further down), so it will not be 1457tried again. The C<fetch> method will only fail when all options are 1458exhausted, and it was not able to retrieve the file. 1459 1460The C<fetch> utility is available on FreeBSD. NetBSD and Dragonfly BSD 1461may also have it from C<pkgsrc>. We only check for C<fetch> on those 1462three platforms. 1463 1464C<iosock> is a very limited L<IO::Socket::INET> based mechanism for 1465retrieving C<http> schemed urls. It doesn't follow redirects for instance. 1466 1467A special note about fetching files from an ftp uri: 1468 1469By default, all ftp connections are done in passive mode. To change 1470that, see the C<$FTP_PASSIVE> variable further down. 1471 1472Furthermore, ftp uris only support anonymous connections, so no 1473named user/password pair can be passed along. 1474 1475C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable 1476further down. 1477 1478=head1 GLOBAL VARIABLES 1479 1480The behaviour of File::Fetch can be altered by changing the following 1481global variables: 1482 1483=head2 $File::Fetch::FROM_EMAIL 1484 1485This is the email address that will be sent as your anonymous ftp 1486password. 1487 1488Default is C<File-Fetch@example.com>. 1489 1490=head2 $File::Fetch::USER_AGENT 1491 1492This is the useragent as C<LWP> will report it. 1493 1494Default is C<File::Fetch/$VERSION>. 1495 1496=head2 $File::Fetch::FTP_PASSIVE 1497 1498This variable controls whether the environment variable C<FTP_PASSIVE> 1499and any passive switches to commandline tools will be set to true. 1500 1501Default value is 1. 1502 1503Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch 1504files, since passive mode can only be set interactively for this binary 1505 1506=head2 $File::Fetch::TIMEOUT 1507 1508When set, controls the network timeout (counted in seconds). 1509 1510Default value is 0. 1511 1512=head2 $File::Fetch::WARN 1513 1514This variable controls whether errors encountered internally by 1515C<File::Fetch> should be C<carp>'d or not. 1516 1517Set to false to silence warnings. Inspect the output of the C<error()> 1518method manually to see what went wrong. 1519 1520Defaults to C<true>. 1521 1522=head2 $File::Fetch::DEBUG 1523 1524This enables debugging output when calling commandline utilities to 1525fetch files. 1526This also enables C<Carp::longmess> errors, instead of the regular 1527C<carp> errors. 1528 1529Good for tracking down why things don't work with your particular 1530setup. 1531 1532Default is 0. 1533 1534=head2 $File::Fetch::BLACKLIST 1535 1536This is an array ref holding blacklisted modules/utilities for fetching 1537files with. 1538 1539To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could 1540set $File::Fetch::BLACKLIST to: 1541 1542 $File::Fetch::BLACKLIST = [qw|lwp netftp|] 1543 1544The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable. 1545 1546See the note on C<MAPPING> below. 1547 1548=head2 $File::Fetch::METHOD_FAIL 1549 1550This is a hashref registering what modules/utilities were known to fail 1551for fetching files (mostly because they weren't installed). 1552 1553You can reset this cache by assigning an empty hashref to it, or 1554individually remove keys. 1555 1556See the note on C<MAPPING> below. 1557 1558=head1 MAPPING 1559 1560 1561Here's a quick mapping for the utilities/modules, and their names for 1562the $BLACKLIST, $METHOD_FAIL and other internal functions. 1563 1564 LWP => lwp 1565 HTTP::Lite => httplite 1566 HTTP::Tiny => httptiny 1567 Net::FTP => netftp 1568 wget => wget 1569 lynx => lynx 1570 ncftp => ncftp 1571 ftp => ftp 1572 curl => curl 1573 rsync => rsync 1574 lftp => lftp 1575 fetch => fetch 1576 IO::Socket => iosock 1577 1578=head1 FREQUENTLY ASKED QUESTIONS 1579 1580=head2 So how do I use a proxy with File::Fetch? 1581 1582C<File::Fetch> currently only supports proxies with LWP::UserAgent. 1583You will need to set your environment variables accordingly. For 1584example, to use an ftp proxy: 1585 1586 $ENV{ftp_proxy} = 'foo.com'; 1587 1588Refer to the LWP::UserAgent manpage for more details. 1589 1590=head2 I used 'lynx' to fetch a file, but its contents is all wrong! 1591 1592C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>, 1593which we in turn capture. If that content is a 'custom' error file 1594(like, say, a C<404 handler>), you will get that contents instead. 1595 1596Sadly, C<lynx> doesn't support any options to return a different exit 1597code on non-C<200 OK> status, giving us no way to tell the difference 1598between a 'successful' fetch and a custom error page. 1599 1600Therefor, we recommend to only use C<lynx> as a last resort. This is 1601why it is at the back of our list of methods to try as well. 1602 1603=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do? 1604 1605C<File::Fetch> is relatively smart about things. When trying to write 1606a file to disk, it removes the C<query parameters> (see the 1607C<output_file> method for details) from the file name before creating 1608it. In most cases this suffices. 1609 1610If you have any other characters you need to escape, please install 1611the C<URI::Escape> module from CPAN, and pre-encode your URI before 1612passing it to C<File::Fetch>. You can read about the details of URIs 1613and URI encoding here: 1614 1615 http://www.faqs.org/rfcs/rfc2396.html 1616 1617=head1 TODO 1618 1619=over 4 1620 1621=item Implement $PREFER_BIN 1622 1623To indicate to rather use commandline tools than modules 1624 1625=back 1626 1627=head1 BUG REPORTS 1628 1629Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>. 1630 1631=head1 AUTHOR 1632 1633This module by Jos Boumans E<lt>kane@cpan.orgE<gt>. 1634 1635=head1 COPYRIGHT 1636 1637This library is free software; you may redistribute and/or modify it 1638under the same terms as Perl itself. 1639 1640 1641=cut 1642 1643# Local variables: 1644# c-indentation-style: bsd 1645# c-basic-offset: 4 1646# indent-tabs-mode: nil 1647# End: 1648# vim: expandtab shiftwidth=4: 1649 1650 1651 1652 1653