1b39c5158Smillertpackage File::Fetch; 2b39c5158Smillert 3b39c5158Smillertuse strict; 4b39c5158Smillertuse FileHandle; 5b39c5158Smillertuse File::Temp; 6b39c5158Smillertuse File::Copy; 7b39c5158Smillertuse File::Spec; 8b39c5158Smillertuse File::Spec::Unix; 9b39c5158Smillertuse File::Basename qw[dirname]; 10b39c5158Smillert 11b39c5158Smillertuse Cwd qw[cwd]; 12b39c5158Smillertuse Carp qw[carp]; 13b39c5158Smillertuse IPC::Cmd qw[can_run run QUOTE]; 14b39c5158Smillertuse File::Path qw[mkpath]; 15b39c5158Smillertuse File::Temp qw[tempdir]; 16b39c5158Smillertuse Params::Check qw[check]; 17b39c5158Smillertuse Module::Load::Conditional qw[can_load]; 18b39c5158Smillertuse Locale::Maketext::Simple Style => 'gettext'; 19b39c5158Smillert 20b39c5158Smillertuse vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT 21b39c5158Smillert $BLACKLIST $METHOD_FAIL $VERSION $METHODS 226fb12b70Safresh1 $FTP_PASSIVE $TIMEOUT $DEBUG $WARN $FORCEIPV4 23b39c5158Smillert ]; 24b39c5158Smillert 25*eac174f2Safresh1$VERSION = '1.04'; 26b39c5158Smillert$VERSION = eval $VERSION; # avoid warnings with development releases 27b39c5158Smillert$PREFER_BIN = 0; # XXX TODO implement 28b39c5158Smillert$FROM_EMAIL = 'File-Fetch@example.com'; 29b39c5158Smillert$USER_AGENT = "File::Fetch/$VERSION"; 30b39c5158Smillert$BLACKLIST = [qw|ftp|]; 319f11ffb7Safresh1push @$BLACKLIST, qw|lftp| if $^O eq 'dragonfly' || $^O eq 'hpux'; 32b39c5158Smillert$METHOD_FAIL = { }; 33b39c5158Smillert$FTP_PASSIVE = 1; 34b39c5158Smillert$TIMEOUT = 0; 35b39c5158Smillert$DEBUG = 0; 36b39c5158Smillert$WARN = 1; 376fb12b70Safresh1$FORCEIPV4 = 0; 38b39c5158Smillert 39b39c5158Smillert### methods available to fetch the file depending on the scheme 40b39c5158Smillert$METHODS = { 41898184e3Ssthen http => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ], 429f11ffb7Safresh1 https => [ qw|lwp wget curl| ], 43898184e3Ssthen ftp => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ], 44b39c5158Smillert file => [ qw|lwp lftp file| ], 456fb12b70Safresh1 rsync => [ qw|rsync| ], 466fb12b70Safresh1 git => [ qw|git| ], 47b39c5158Smillert}; 48b39c5158Smillert 49b39c5158Smillert### silly warnings ### 50b39c5158Smillertlocal $Params::Check::VERBOSE = 1; 51b39c5158Smillertlocal $Params::Check::VERBOSE = 1; 52b39c5158Smillertlocal $Module::Load::Conditional::VERBOSE = 0; 53b39c5158Smillertlocal $Module::Load::Conditional::VERBOSE = 0; 54b39c5158Smillert 55b39c5158Smillert### see what OS we are on, important for file:// uris ### 56b39c5158Smillertuse constant ON_WIN => ($^O eq 'MSWin32'); 57b39c5158Smillertuse constant ON_VMS => ($^O eq 'VMS'); 58b39c5158Smillertuse constant ON_UNIX => (!ON_WIN); 59b39c5158Smillertuse constant HAS_VOL => (ON_WIN); 60b39c5158Smillertuse constant HAS_SHARE => (ON_WIN); 61898184e3Ssthenuse constant HAS_FETCH => ( $^O =~ m!^(freebsd|netbsd|dragonfly)$! ); 62b39c5158Smillert 63b39c5158Smillert=pod 64b39c5158Smillert 65b39c5158Smillert=head1 NAME 66b39c5158Smillert 67b39c5158SmillertFile::Fetch - A generic file fetching mechanism 68b39c5158Smillert 69b39c5158Smillert=head1 SYNOPSIS 70b39c5158Smillert 71b39c5158Smillert use File::Fetch; 72b39c5158Smillert 73b39c5158Smillert ### build a File::Fetch object ### 74b39c5158Smillert my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt'); 75b39c5158Smillert 76b39c5158Smillert ### fetch the uri to cwd() ### 77b39c5158Smillert my $where = $ff->fetch() or die $ff->error; 78b39c5158Smillert 79b39c5158Smillert ### fetch the uri to /tmp ### 80b39c5158Smillert my $where = $ff->fetch( to => '/tmp' ); 81b39c5158Smillert 82b39c5158Smillert ### parsed bits from the uri ### 83b39c5158Smillert $ff->uri; 84b39c5158Smillert $ff->scheme; 85b39c5158Smillert $ff->host; 86b39c5158Smillert $ff->path; 87b39c5158Smillert $ff->file; 88b39c5158Smillert 89b39c5158Smillert=head1 DESCRIPTION 90b39c5158Smillert 91b39c5158SmillertFile::Fetch is a generic file fetching mechanism. 92b39c5158Smillert 93b39c5158SmillertIt allows you to fetch any file pointed to by a C<ftp>, C<http>, 946fb12b70Safresh1C<file>, C<git> or C<rsync> uri by a number of different means. 95b39c5158Smillert 96b39c5158SmillertSee the C<HOW IT WORKS> section further down for details. 97b39c5158Smillert 98b39c5158Smillert=head1 ACCESSORS 99b39c5158Smillert 100b39c5158SmillertA C<File::Fetch> object has the following accessors 101b39c5158Smillert 102b39c5158Smillert=over 4 103b39c5158Smillert 104b39c5158Smillert=item $ff->uri 105b39c5158Smillert 106b39c5158SmillertThe uri you passed to the constructor 107b39c5158Smillert 108b39c5158Smillert=item $ff->scheme 109b39c5158Smillert 110b39c5158SmillertThe scheme from the uri (like 'file', 'http', etc) 111b39c5158Smillert 112b39c5158Smillert=item $ff->host 113b39c5158Smillert 114b39c5158SmillertThe hostname in the uri. Will be empty if host was originally 115b39c5158Smillert'localhost' for a 'file://' url. 116b39c5158Smillert 117b39c5158Smillert=item $ff->vol 118b39c5158Smillert 119b39c5158SmillertOn operating systems with the concept of a volume the second element 120b39c5158Smillertof a file:// is considered to the be volume specification for the file. 121b39c5158SmillertThus on Win32 this routine returns the volume, on other operating 122b39c5158Smillertsystems this returns nothing. 123b39c5158Smillert 124b39c5158SmillertOn Windows this value may be empty if the uri is to a network share, in 125b39c5158Smillertwhich case the 'share' property will be defined. Additionally, volume 126b39c5158Smillertspecifications that use '|' as ':' will be converted on read to use ':'. 127b39c5158Smillert 128b39c5158SmillertOn VMS, which has a volume concept, this field will be empty because VMS 129b39c5158Smillertfile specifications are converted to absolute UNIX format and the volume 130b39c5158Smillertinformation is transparently included. 131b39c5158Smillert 132b39c5158Smillert=item $ff->share 133b39c5158Smillert 134b39c5158SmillertOn systems with the concept of a network share (currently only Windows) returns 135b39c5158Smillertthe sharename from a file://// url. On other operating systems returns empty. 136b39c5158Smillert 137b39c5158Smillert=item $ff->path 138b39c5158Smillert 139b39c5158SmillertThe path from the uri, will be at least a single '/'. 140b39c5158Smillert 141b39c5158Smillert=item $ff->file 142b39c5158Smillert 143b39c5158SmillertThe name of the remote file. For the local file name, the 144b39c5158Smillertresult of $ff->output_file will be used. 145b39c5158Smillert 14691f110e0Safresh1=item $ff->file_default 14791f110e0Safresh1 14891f110e0Safresh1The name of the default local file, that $ff->output_file falls back to if 14991f110e0Safresh1it would otherwise return no filename. For example when fetching a URI like 15091f110e0Safresh1http://www.abc.net.au/ the contents retrieved may be from a remote file called 15191f110e0Safresh1'index.html'. The default value of this attribute is literally 'file_default'. 15291f110e0Safresh1 153b39c5158Smillert=cut 154b39c5158Smillert 155b39c5158Smillert 156b39c5158Smillert########################## 157b39c5158Smillert### Object & Accessors ### 158b39c5158Smillert########################## 159b39c5158Smillert 160b39c5158Smillert{ 161b39c5158Smillert ### template for autogenerated accessors ### 162b39c5158Smillert my $Tmpl = { 163b39c5158Smillert scheme => { default => 'http' }, 164b39c5158Smillert host => { default => 'localhost' }, 165b39c5158Smillert path => { default => '/' }, 166b39c5158Smillert file => { required => 1 }, 167b39c5158Smillert uri => { required => 1 }, 1689f11ffb7Safresh1 userinfo => { default => '' }, 169b39c5158Smillert vol => { default => '' }, # windows for file:// uris 170b39c5158Smillert share => { default => '' }, # windows for file:// uris 17191f110e0Safresh1 file_default => { default => 'file_default' }, 17291f110e0Safresh1 tempdir_root => { required => 1 }, # Should be lazy-set at ->new() 173b39c5158Smillert _error_msg => { no_override => 1 }, 174b39c5158Smillert _error_msg_long => { no_override => 1 }, 175b39c5158Smillert }; 176b39c5158Smillert 177b39c5158Smillert for my $method ( keys %$Tmpl ) { 178b39c5158Smillert no strict 'refs'; 179b39c5158Smillert *$method = sub { 180b39c5158Smillert my $self = shift; 181b39c5158Smillert $self->{$method} = $_[0] if @_; 182b39c5158Smillert return $self->{$method}; 183b39c5158Smillert } 184b39c5158Smillert } 185b39c5158Smillert 186b39c5158Smillert sub _create { 187b39c5158Smillert my $class = shift; 188b39c5158Smillert my %hash = @_; 189b39c5158Smillert 190b39c5158Smillert my $args = check( $Tmpl, \%hash ) or return; 191b39c5158Smillert 192b39c5158Smillert bless $args, $class; 193b39c5158Smillert 194b39c5158Smillert if( lc($args->scheme) ne 'file' and not $args->host ) { 195b39c5158Smillert return $class->_error(loc( 196b39c5158Smillert "Hostname required when fetching from '%1'",$args->scheme)); 197b39c5158Smillert } 198b39c5158Smillert 19991f110e0Safresh1 for (qw[path]) { 200b39c5158Smillert unless( $args->$_() ) { # 5.5.x needs the () 201b39c5158Smillert return $class->_error(loc("No '%1' specified",$_)); 202b39c5158Smillert } 203b39c5158Smillert } 204b39c5158Smillert 205b39c5158Smillert return $args; 206b39c5158Smillert } 207b39c5158Smillert} 208b39c5158Smillert 209b39c5158Smillert=item $ff->output_file 210b39c5158Smillert 211b39c5158SmillertThe name of the output file. This is the same as $ff->file, 212b39c5158Smillertbut any query parameters are stripped off. For example: 213b39c5158Smillert 214b39c5158Smillert http://example.com/index.html?x=y 215b39c5158Smillert 216b39c5158Smillertwould make the output file be C<index.html> rather than 217b39c5158SmillertC<index.html?x=y>. 218b39c5158Smillert 219b39c5158Smillert=back 220b39c5158Smillert 221b39c5158Smillert=cut 222b39c5158Smillert 223b39c5158Smillertsub output_file { 224b39c5158Smillert my $self = shift; 225b39c5158Smillert my $file = $self->file; 226b39c5158Smillert 227b39c5158Smillert $file =~ s/\?.*$//g; 228b39c5158Smillert 22991f110e0Safresh1 $file ||= $self->file_default; 23091f110e0Safresh1 231b39c5158Smillert return $file; 232b39c5158Smillert} 233b39c5158Smillert 234b39c5158Smillert### XXX do this or just point to URI::Escape? 235b39c5158Smillert# =head2 $esc_uri = $ff->escaped_uri 236b39c5158Smillert# 237b39c5158Smillert# =cut 238b39c5158Smillert# 239b39c5158Smillert# ### most of this is stolen straight from URI::escape 240b39c5158Smillert# { ### Build a char->hex map 241b39c5158Smillert# my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255; 242b39c5158Smillert# 243b39c5158Smillert# sub escaped_uri { 244b39c5158Smillert# my $self = shift; 245b39c5158Smillert# my $uri = $self->uri; 246b39c5158Smillert# 247b39c5158Smillert# ### Default unsafe characters. RFC 2732 ^(uric - reserved) 248b39c5158Smillert# $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/ 249b39c5158Smillert# $escapes{$1} || $self->_fail_hi($1)/ge; 250b39c5158Smillert# 251b39c5158Smillert# return $uri; 252b39c5158Smillert# } 253b39c5158Smillert# 254b39c5158Smillert# sub _fail_hi { 255b39c5158Smillert# my $self = shift; 256b39c5158Smillert# my $char = shift; 257b39c5158Smillert# 258b39c5158Smillert# $self->_error(loc( 259b39c5158Smillert# "Can't escape '%1', try using the '%2' module instead", 260b39c5158Smillert# sprintf("\\x{%04X}", ord($char)), 'URI::Escape' 261b39c5158Smillert# )); 262b39c5158Smillert# } 263b39c5158Smillert# 264b39c5158Smillert# sub output_file { 265b39c5158Smillert# 266b39c5158Smillert# } 267b39c5158Smillert# 268b39c5158Smillert# 269b39c5158Smillert# } 270b39c5158Smillert 271b39c5158Smillert=head1 METHODS 272b39c5158Smillert 273b39c5158Smillert=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' ); 274b39c5158Smillert 275b39c5158SmillertParses the uri and creates a corresponding File::Fetch::Item object, 276b39c5158Smillertthat is ready to be C<fetch>ed and returns it. 277b39c5158Smillert 278b39c5158SmillertReturns false on failure. 279b39c5158Smillert 280b39c5158Smillert=cut 281b39c5158Smillert 282b39c5158Smillertsub new { 283b39c5158Smillert my $class = shift; 284b39c5158Smillert my %hash = @_; 285b39c5158Smillert 28691f110e0Safresh1 my ($uri, $file_default, $tempdir_root); 287b39c5158Smillert my $tmpl = { 288b39c5158Smillert uri => { required => 1, store => \$uri }, 28991f110e0Safresh1 file_default => { required => 0, store => \$file_default }, 29091f110e0Safresh1 tempdir_root => { required => 0, store => \$tempdir_root }, 291b39c5158Smillert }; 292b39c5158Smillert 293b39c5158Smillert check( $tmpl, \%hash ) or return; 294b39c5158Smillert 295b39c5158Smillert ### parse the uri to usable parts ### 296b39c5158Smillert my $href = $class->_parse_uri( $uri ) or return; 297b39c5158Smillert 29891f110e0Safresh1 $href->{file_default} = $file_default if $file_default; 29991f110e0Safresh1 $href->{tempdir_root} = File::Spec->rel2abs( $tempdir_root ) if $tempdir_root; 30091f110e0Safresh1 $href->{tempdir_root} = File::Spec->rel2abs( Cwd::cwd ) if not $href->{tempdir_root}; 30191f110e0Safresh1 302b39c5158Smillert ### make it into a FFI object ### 303b39c5158Smillert my $ff = $class->_create( %$href ) or return; 304b39c5158Smillert 305b39c5158Smillert 306b39c5158Smillert ### return the object ### 307b39c5158Smillert return $ff; 308b39c5158Smillert} 309b39c5158Smillert 310b39c5158Smillert### parses an uri to a hash structure: 311b39c5158Smillert### 312b39c5158Smillert### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' ) 313b39c5158Smillert### 314b39c5158Smillert### becomes: 315b39c5158Smillert### 316b39c5158Smillert### $href = { 317b39c5158Smillert### scheme => 'ftp', 318b39c5158Smillert### host => 'ftp.cpan.org', 319b39c5158Smillert### path => '/pub/mirror', 320b39c5158Smillert### file => 'index.html' 321b39c5158Smillert### }; 322b39c5158Smillert### 323b39c5158Smillert### In the case of file:// urls there maybe be additional fields 324b39c5158Smillert### 325b39c5158Smillert### For systems with volume specifications such as Win32 there will be 326b39c5158Smillert### a volume specifier provided in the 'vol' field. 327b39c5158Smillert### 328b39c5158Smillert### 'vol' => 'volumename' 329b39c5158Smillert### 330b39c5158Smillert### For windows file shares there may be a 'share' key specified 331b39c5158Smillert### 332b39c5158Smillert### 'share' => 'sharename' 333b39c5158Smillert### 334b39c5158Smillert### Note that the rules of what a file:// url means vary by the operating system 335b39c5158Smillert### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious 336b39c5158Smillert### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and 337b39c5158Smillert### not '/foo/bar.txt' 338b39c5158Smillert### 339b39c5158Smillert### Similarly if the host interpreting the url is VMS then 340b39c5158Smillert### file:///disk$user/my/notes/note12345.txt' means 341b39c5158Smillert### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as 342b39c5158Smillert### if it is unix where it means /disk$user/my/notes/note12345.txt'. 343b39c5158Smillert### Except for some cases in the File::Spec methods, Perl on VMS will generally 344b39c5158Smillert### handle UNIX format file specifications. 345b39c5158Smillert### 346b39c5158Smillert### This means it is impossible to serve certain file:// urls on certain systems. 347b39c5158Smillert### 348b39c5158Smillert### Thus are the problems with a protocol-less specification. :-( 349b39c5158Smillert### 350b39c5158Smillert 351b39c5158Smillertsub _parse_uri { 352b39c5158Smillert my $self = shift; 353b39c5158Smillert my $uri = shift or return; 354b39c5158Smillert 355b39c5158Smillert my $href = { uri => $uri }; 356b39c5158Smillert 357b39c5158Smillert ### find the scheme ### 358b39c5158Smillert $uri =~ s|^(\w+)://||; 359b39c5158Smillert $href->{scheme} = $1; 360b39c5158Smillert 361b39c5158Smillert ### See rfc 1738 section 3.10 362*eac174f2Safresh1 ### https://datatracker.ietf.org/doc/html/rfc1738#section-3.10 363b39c5158Smillert ### And wikipedia for more on windows file:// urls 364b39c5158Smillert ### http://en.wikipedia.org/wiki/File:// 365b39c5158Smillert if( $href->{scheme} eq 'file' ) { 366b39c5158Smillert 367b39c5158Smillert my @parts = split '/',$uri; 368b39c5158Smillert 369b39c5158Smillert ### file://hostname/... 370b39c5158Smillert ### file://hostname/... 371b39c5158Smillert ### normalize file://localhost with file:/// 372b39c5158Smillert $href->{host} = $parts[0] || ''; 373b39c5158Smillert 374b39c5158Smillert ### index in @parts where the path components begin; 375b39c5158Smillert my $index = 1; 376b39c5158Smillert 377b39c5158Smillert ### file:////hostname/sharename/blah.txt 378b39c5158Smillert if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) { 379b39c5158Smillert 380b39c5158Smillert $href->{host} = $parts[2] || ''; # avoid warnings 381b39c5158Smillert $href->{share} = $parts[3] || ''; # avoid warnings 382b39c5158Smillert 383b39c5158Smillert $index = 4 # index after the share 384b39c5158Smillert 385b39c5158Smillert ### file:///D|/blah.txt 386b39c5158Smillert ### file:///D:/blah.txt 387b39c5158Smillert } elsif (HAS_VOL) { 388b39c5158Smillert 389b39c5158Smillert ### this code comes from dmq's patch, but: 390b39c5158Smillert ### XXX if volume is empty, wouldn't that be an error? --kane 391b39c5158Smillert ### if so, our file://localhost test needs to be fixed as wel 392b39c5158Smillert $href->{vol} = $parts[1] || ''; 393b39c5158Smillert 394b39c5158Smillert ### correct D| style colume descriptors 395b39c5158Smillert $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN; 396b39c5158Smillert 397b39c5158Smillert $index = 2; # index after the volume 398b39c5158Smillert } 399b39c5158Smillert 400b39c5158Smillert ### rebuild the path from the leftover parts; 401b39c5158Smillert $href->{path} = join '/', '', splice( @parts, $index, $#parts ); 402b39c5158Smillert 403b39c5158Smillert } else { 404b39c5158Smillert ### using anything but qw() in hash slices may produce warnings 405b39c5158Smillert ### in older perls :-( 4069f11ffb7Safresh1 @{$href}{ qw(userinfo host path) } = $uri =~ m|(?:([^\@:]*:[^\:\@]*)@)?([^/]*)(/.*)$|s; 407b39c5158Smillert } 408b39c5158Smillert 409b39c5158Smillert ### split the path into file + dir ### 410b39c5158Smillert { my @parts = File::Spec::Unix->splitpath( delete $href->{path} ); 411b39c5158Smillert $href->{path} = $parts[1]; 412b39c5158Smillert $href->{file} = $parts[2]; 413b39c5158Smillert } 414b39c5158Smillert 415b39c5158Smillert ### host will be empty if the target was 'localhost' and the 416b39c5158Smillert ### scheme was 'file' 417b39c5158Smillert $href->{host} = '' if ($href->{host} eq 'localhost') and 418b39c5158Smillert ($href->{scheme} eq 'file'); 419b39c5158Smillert 420b39c5158Smillert return $href; 421b39c5158Smillert} 422b39c5158Smillert 423b39c5158Smillert=head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] ) 424b39c5158Smillert 425b39c5158SmillertFetches the file you requested and returns the full path to the file. 426b39c5158Smillert 427b39c5158SmillertBy default it writes to C<cwd()>, but you can override that by specifying 428b39c5158Smillertthe C<to> argument: 429b39c5158Smillert 430b39c5158Smillert ### file fetch to /tmp, full path to the file in $where 431b39c5158Smillert $where = $ff->fetch( to => '/tmp' ); 432b39c5158Smillert 433b39c5158Smillert ### file slurped into $scalar, full path to the file in $where 434b39c5158Smillert ### file is downloaded to a temp directory and cleaned up at exit time 435b39c5158Smillert $where = $ff->fetch( to => \$scalar ); 436b39c5158Smillert 437b39c5158SmillertReturns the full path to the downloaded file on success, and false 438b39c5158Smillerton failure. 439b39c5158Smillert 440b39c5158Smillert=cut 441b39c5158Smillert 442b39c5158Smillertsub fetch { 443b39c5158Smillert my $self = shift or return; 444b39c5158Smillert my %hash = @_; 445b39c5158Smillert 446b39c5158Smillert my $target; 447b39c5158Smillert my $tmpl = { 448b39c5158Smillert to => { default => cwd(), store => \$target }, 449b39c5158Smillert }; 450b39c5158Smillert 451b39c5158Smillert check( $tmpl, \%hash ) or return; 452b39c5158Smillert 453b39c5158Smillert my ($to, $fh); 454b39c5158Smillert ### you want us to slurp the contents 455b39c5158Smillert if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) { 45691f110e0Safresh1 $to = tempdir( 'FileFetch.XXXXXX', DIR => $self->tempdir_root, CLEANUP => 1 ); 457b39c5158Smillert 458b39c5158Smillert ### plain old fetch 459b39c5158Smillert } else { 460b39c5158Smillert $to = $target; 461b39c5158Smillert 462b39c5158Smillert ### On VMS force to VMS format so File::Spec will work. 463b39c5158Smillert $to = VMS::Filespec::vmspath($to) if ON_VMS; 464b39c5158Smillert 465b39c5158Smillert ### create the path if it doesn't exist yet ### 466b39c5158Smillert unless( -d $to ) { 467b39c5158Smillert eval { mkpath( $to ) }; 468b39c5158Smillert 469b39c5158Smillert return $self->_error(loc("Could not create path '%1'",$to)) if $@; 470b39c5158Smillert } 471b39c5158Smillert } 472b39c5158Smillert 473b39c5158Smillert ### set passive ftp if required ### 474b39c5158Smillert local $ENV{FTP_PASSIVE} = $FTP_PASSIVE; 475b39c5158Smillert 476b39c5158Smillert ### we dont use catfile on win32 because if we are using a cygwin tool 477b39c5158Smillert ### under cmd.exe they wont understand windows style separators. 478b39c5158Smillert my $out_to = ON_WIN ? $to.'/'.$self->output_file 479b39c5158Smillert : File::Spec->catfile( $to, $self->output_file ); 480b39c5158Smillert 481b39c5158Smillert for my $method ( @{ $METHODS->{$self->scheme} } ) { 482b39c5158Smillert my $sub = '_'.$method.'_fetch'; 483b39c5158Smillert 484b39c5158Smillert unless( __PACKAGE__->can($sub) ) { 485b39c5158Smillert $self->_error(loc("Cannot call method for '%1' -- WEIRD!", 486b39c5158Smillert $method)); 487b39c5158Smillert next; 488b39c5158Smillert } 489b39c5158Smillert 490b39c5158Smillert ### method is blacklisted ### 491b39c5158Smillert next if grep { lc $_ eq $method } @$BLACKLIST; 492b39c5158Smillert 493b39c5158Smillert ### method is known to fail ### 494b39c5158Smillert next if $METHOD_FAIL->{$method}; 495b39c5158Smillert 496b39c5158Smillert ### there's serious issues with IPC::Run and quoting of command 497b39c5158Smillert ### line arguments. using quotes in the wrong place breaks things, 498b39c5158Smillert ### and in the case of say, 499b39c5158Smillert ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document 500b39c5158Smillert ### "index.html" "http://www.cpan.org/index.html?q=1&y=2" 501b39c5158Smillert ### it doesn't matter how you quote, it always fails. 502b39c5158Smillert local $IPC::Cmd::USE_IPC_RUN = 0; 503b39c5158Smillert 504b39c5158Smillert if( my $file = $self->$sub( 505b39c5158Smillert to => $out_to 506b39c5158Smillert )){ 507b39c5158Smillert 508b39c5158Smillert unless( -e $file && -s _ ) { 509b39c5158Smillert $self->_error(loc("'%1' said it fetched '%2', ". 510b39c5158Smillert "but it was not created",$method,$file)); 511b39c5158Smillert 512b39c5158Smillert ### mark the failure ### 513b39c5158Smillert $METHOD_FAIL->{$method} = 1; 514b39c5158Smillert 515b39c5158Smillert next; 516b39c5158Smillert 517b39c5158Smillert } else { 518b39c5158Smillert 519b39c5158Smillert ### slurp mode? 520b39c5158Smillert if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) { 521b39c5158Smillert 522b39c5158Smillert ### open the file 523898184e3Ssthen open my $fh, "<$file" or do { 524b39c5158Smillert $self->_error( 525b39c5158Smillert loc("Could not open '%1': %2", $file, $!)); 526b39c5158Smillert return; 527b39c5158Smillert }; 528b39c5158Smillert 529b39c5158Smillert ### slurp 530b39c5158Smillert $$target = do { local $/; <$fh> }; 531b39c5158Smillert 532b39c5158Smillert } 533b39c5158Smillert 534b39c5158Smillert my $abs = File::Spec->rel2abs( $file ); 535b39c5158Smillert return $abs; 536b39c5158Smillert 537b39c5158Smillert } 538b39c5158Smillert } 539b39c5158Smillert } 540b39c5158Smillert 541b39c5158Smillert 542b39c5158Smillert ### if we got here, we looped over all methods, but we weren't able 543b39c5158Smillert ### to fetch it. 544b39c5158Smillert return; 545b39c5158Smillert} 546b39c5158Smillert 547b39c5158Smillert######################## 548b39c5158Smillert### _*_fetch methods ### 549b39c5158Smillert######################## 550b39c5158Smillert 551b39c5158Smillert### LWP fetching ### 552b39c5158Smillertsub _lwp_fetch { 553b39c5158Smillert my $self = shift; 554b39c5158Smillert my %hash = @_; 555b39c5158Smillert 556b39c5158Smillert my ($to); 557b39c5158Smillert my $tmpl = { 558b39c5158Smillert to => { required => 1, store => \$to } 559b39c5158Smillert }; 560b39c5158Smillert check( $tmpl, \%hash ) or return; 561b39c5158Smillert 562b39c5158Smillert ### modules required to download with lwp ### 563b39c5158Smillert my $use_list = { 564b39c5158Smillert LWP => '0.0', 565b39c5158Smillert 'LWP::UserAgent' => '0.0', 566b39c5158Smillert 'HTTP::Request' => '0.0', 567b39c5158Smillert 'HTTP::Status' => '0.0', 568b39c5158Smillert URI => '0.0', 569b39c5158Smillert 570b39c5158Smillert }; 571b39c5158Smillert 5729f11ffb7Safresh1 if ($self->scheme eq 'https') { 5739f11ffb7Safresh1 $use_list->{'LWP::Protocol::https'} = '0'; 5749f11ffb7Safresh1 } 5759f11ffb7Safresh1 576*eac174f2Safresh1 ### Fix CVE-2016-1238 ### 577*eac174f2Safresh1 local $Module::Load::Conditional::FORCE_SAFE_INC = 1; 57891f110e0Safresh1 unless( can_load( modules => $use_list ) ) { 57991f110e0Safresh1 $METHOD_FAIL->{'lwp'} = 1; 58091f110e0Safresh1 return; 58191f110e0Safresh1 } 582b39c5158Smillert 583b39c5158Smillert ### setup the uri object 584b39c5158Smillert my $uri = URI->new( File::Spec::Unix->catfile( 585b39c5158Smillert $self->path, $self->file 586b39c5158Smillert ) ); 587b39c5158Smillert 588b39c5158Smillert ### special rules apply for file:// uris ### 589b39c5158Smillert $uri->scheme( $self->scheme ); 590b39c5158Smillert $uri->host( $self->scheme eq 'file' ? '' : $self->host ); 5919f11ffb7Safresh1 5929f11ffb7Safresh1 if ($self->userinfo) { 5939f11ffb7Safresh1 $uri->userinfo($self->userinfo); 5949f11ffb7Safresh1 } elsif ($self->scheme ne 'file') { 5959f11ffb7Safresh1 $uri->userinfo("anonymous:$FROM_EMAIL"); 5969f11ffb7Safresh1 } 597b39c5158Smillert 598b39c5158Smillert ### set up the useragent object 599b39c5158Smillert my $ua = LWP::UserAgent->new(); 600b39c5158Smillert $ua->timeout( $TIMEOUT ) if $TIMEOUT; 601b39c5158Smillert $ua->agent( $USER_AGENT ); 602b39c5158Smillert $ua->from( $FROM_EMAIL ); 603b39c5158Smillert $ua->env_proxy; 604b39c5158Smillert 605b39c5158Smillert my $res = $ua->mirror($uri, $to) or return; 606b39c5158Smillert 607b39c5158Smillert ### uptodate or fetched ok ### 608b39c5158Smillert if ( $res->code == 304 or $res->code == 200 ) { 609b39c5158Smillert return $to; 610b39c5158Smillert 611b39c5158Smillert } else { 612b39c5158Smillert return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]", 613b39c5158Smillert $res->code, HTTP::Status::status_message($res->code), 614b39c5158Smillert $res->status_line)); 615b39c5158Smillert } 616b39c5158Smillert 617b39c5158Smillert} 618b39c5158Smillert 619898184e3Ssthen### HTTP::Tiny fetching ### 620898184e3Ssthensub _httptiny_fetch { 621898184e3Ssthen my $self = shift; 622898184e3Ssthen my %hash = @_; 623898184e3Ssthen 624898184e3Ssthen my ($to); 625898184e3Ssthen my $tmpl = { 626898184e3Ssthen to => { required => 1, store => \$to } 627898184e3Ssthen }; 628898184e3Ssthen check( $tmpl, \%hash ) or return; 629898184e3Ssthen 630898184e3Ssthen my $use_list = { 631898184e3Ssthen 'HTTP::Tiny' => '0.008', 632898184e3Ssthen 633898184e3Ssthen }; 634898184e3Ssthen 635*eac174f2Safresh1 ### Fix CVE-2016-1238 ### 636*eac174f2Safresh1 local $Module::Load::Conditional::FORCE_SAFE_INC = 1; 63791f110e0Safresh1 unless( can_load(modules => $use_list) ) { 63891f110e0Safresh1 $METHOD_FAIL->{'httptiny'} = 1; 63991f110e0Safresh1 return; 64091f110e0Safresh1 } 641898184e3Ssthen 642898184e3Ssthen my $uri = $self->uri; 643898184e3Ssthen 644898184e3Ssthen my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) ); 645898184e3Ssthen 646898184e3Ssthen my $rc = $http->mirror( $uri, $to ); 647898184e3Ssthen 648898184e3Ssthen unless ( $rc->{success} ) { 649898184e3Ssthen 650898184e3Ssthen return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]", 651898184e3Ssthen $rc->{status}, $rc->{reason} ) ); 652898184e3Ssthen 653898184e3Ssthen } 654898184e3Ssthen 655898184e3Ssthen return $to; 656898184e3Ssthen 657898184e3Ssthen} 658898184e3Ssthen 659898184e3Ssthen### HTTP::Lite fetching ### 660898184e3Ssthensub _httplite_fetch { 661898184e3Ssthen my $self = shift; 662898184e3Ssthen my %hash = @_; 663898184e3Ssthen 664898184e3Ssthen my ($to); 665898184e3Ssthen my $tmpl = { 666898184e3Ssthen to => { required => 1, store => \$to } 667898184e3Ssthen }; 668898184e3Ssthen check( $tmpl, \%hash ) or return; 669898184e3Ssthen 670898184e3Ssthen ### modules required to download with lwp ### 671898184e3Ssthen my $use_list = { 672898184e3Ssthen 'HTTP::Lite' => '2.2', 6739f11ffb7Safresh1 'MIME::Base64' => '0', 674898184e3Ssthen }; 675898184e3Ssthen 676*eac174f2Safresh1 ### Fix CVE-2016-1238 ### 677*eac174f2Safresh1 local $Module::Load::Conditional::FORCE_SAFE_INC = 1; 67891f110e0Safresh1 unless( can_load(modules => $use_list) ) { 67991f110e0Safresh1 $METHOD_FAIL->{'httplite'} = 1; 68091f110e0Safresh1 return; 68191f110e0Safresh1 } 682898184e3Ssthen 683898184e3Ssthen my $uri = $self->uri; 684898184e3Ssthen my $retries = 0; 685898184e3Ssthen 686898184e3Ssthen RETRIES: while ( $retries++ < 5 ) { 687898184e3Ssthen 688898184e3Ssthen my $http = HTTP::Lite->new(); 689898184e3Ssthen # Naughty naughty but there isn't any accessor/setter 690898184e3Ssthen $http->{timeout} = $TIMEOUT if $TIMEOUT; 691898184e3Ssthen $http->http11_mode(1); 692898184e3Ssthen 6939f11ffb7Safresh1 if ($self->userinfo) { 6949f11ffb7Safresh1 my $encoded = MIME::Base64::encode($self->userinfo, ''); 6959f11ffb7Safresh1 $http->add_req_header("Authorization", "Basic $encoded"); 6969f11ffb7Safresh1 } 6979f11ffb7Safresh1 698898184e3Ssthen my $fh = FileHandle->new; 699898184e3Ssthen 700898184e3Ssthen unless ( $fh->open($to,'>') ) { 701898184e3Ssthen return $self->_error(loc( 702898184e3Ssthen "Could not open '%1' for writing: %2",$to,$!)); 703898184e3Ssthen } 704898184e3Ssthen 705898184e3Ssthen $fh->autoflush(1); 706898184e3Ssthen 707898184e3Ssthen binmode $fh; 708898184e3Ssthen 709898184e3Ssthen my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh ); 710898184e3Ssthen 711898184e3Ssthen close $fh; 712898184e3Ssthen 713898184e3Ssthen if ( $rc == 301 || $rc == 302 ) { 714898184e3Ssthen my $loc; 715898184e3Ssthen HEADERS: for ($http->headers_array) { 716898184e3Ssthen /Location: (\S+)/ and $loc = $1, last HEADERS; 717898184e3Ssthen } 718898184e3Ssthen #$loc or last; # Think we should squeal here. 719898184e3Ssthen if ($loc =~ m!^/!) { 720898184e3Ssthen $uri =~ s{^(\w+?://[^/]+)/.*$}{$1}; 721898184e3Ssthen $uri .= $loc; 722898184e3Ssthen } 723898184e3Ssthen else { 724898184e3Ssthen $uri = $loc; 725898184e3Ssthen } 726898184e3Ssthen next RETRIES; 727898184e3Ssthen } 728898184e3Ssthen elsif ( $rc == 200 ) { 729898184e3Ssthen return $to; 730898184e3Ssthen } 731898184e3Ssthen else { 732898184e3Ssthen return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]", 733898184e3Ssthen $rc, $http->status_message)); 734898184e3Ssthen } 735898184e3Ssthen 736898184e3Ssthen } # Loop for 5 retries. 737898184e3Ssthen 738898184e3Ssthen return $self->_error("Fetch failed! Gave up after 5 tries"); 739898184e3Ssthen 740898184e3Ssthen} 741898184e3Ssthen 742b39c5158Smillert### Simple IO::Socket::INET fetching ### 743b39c5158Smillertsub _iosock_fetch { 744b39c5158Smillert my $self = shift; 745b39c5158Smillert my %hash = @_; 746b39c5158Smillert 747b39c5158Smillert my ($to); 748b39c5158Smillert my $tmpl = { 749b39c5158Smillert to => { required => 1, store => \$to } 750b39c5158Smillert }; 751b39c5158Smillert check( $tmpl, \%hash ) or return; 752b39c5158Smillert 753b39c5158Smillert my $use_list = { 754b39c5158Smillert 'IO::Socket::INET' => '0.0', 755b39c5158Smillert 'IO::Select' => '0.0', 756b39c5158Smillert }; 757b39c5158Smillert 758*eac174f2Safresh1 ### Fix CVE-2016-1238 ### 759*eac174f2Safresh1 local $Module::Load::Conditional::FORCE_SAFE_INC = 1; 76091f110e0Safresh1 unless( can_load(modules => $use_list) ) { 76191f110e0Safresh1 $METHOD_FAIL->{'iosock'} = 1; 76291f110e0Safresh1 return; 76391f110e0Safresh1 } 76491f110e0Safresh1 765b39c5158Smillert my $sock = IO::Socket::INET->new( 766b39c5158Smillert PeerHost => $self->host, 767b39c5158Smillert ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ), 768b39c5158Smillert ); 769b39c5158Smillert 770b39c5158Smillert unless ( $sock ) { 771b39c5158Smillert return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!)); 772b39c5158Smillert } 773b39c5158Smillert 774b39c5158Smillert my $fh = FileHandle->new; 775b39c5158Smillert 776b39c5158Smillert # Check open() 777b39c5158Smillert 778b39c5158Smillert unless ( $fh->open($to,'>') ) { 779b39c5158Smillert return $self->_error(loc( 780b39c5158Smillert "Could not open '%1' for writing: %2",$to,$!)); 781b39c5158Smillert } 782b39c5158Smillert 783898184e3Ssthen $fh->autoflush(1); 784898184e3Ssthen binmode $fh; 785898184e3Ssthen 786b39c5158Smillert my $path = File::Spec::Unix->catfile( $self->path, $self->file ); 787b39c5158Smillert my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a"; 788b39c5158Smillert $sock->send( $req ); 789b39c5158Smillert 790b39c5158Smillert my $select = IO::Select->new( $sock ); 791b39c5158Smillert 792b39c5158Smillert my $resp = ''; 793b39c5158Smillert my $normal = 0; 794b39c5158Smillert while ( $select->can_read( $TIMEOUT || 60 ) ) { 795b39c5158Smillert my $ret = $sock->sysread( $resp, 4096, length($resp) ); 796b39c5158Smillert if ( !defined $ret or $ret == 0 ) { 797b39c5158Smillert $select->remove( $sock ); 798b39c5158Smillert $normal++; 799b39c5158Smillert } 800b39c5158Smillert } 801b39c5158Smillert close $sock; 802b39c5158Smillert 803b39c5158Smillert unless ( $normal ) { 804b39c5158Smillert return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 ))); 805b39c5158Smillert } 806b39c5158Smillert 807b39c5158Smillert # Check the "response" 808898184e3Ssthen # Strip preceding blank lines apparently they are allowed (RFC 2616 4.1) 809b39c5158Smillert $resp =~ s/^(\x0d?\x0a)+//; 810b39c5158Smillert # Check it is an HTTP response 811b39c5158Smillert unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) { 812b39c5158Smillert return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host)); 813b39c5158Smillert } 814b39c5158Smillert 815b39c5158Smillert # Check for OK 816b39c5158Smillert my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i; 817b39c5158Smillert unless ( $code eq '200' ) { 818b39c5158Smillert return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host)); 819b39c5158Smillert } 820b39c5158Smillert 821898184e3Ssthen { 822898184e3Ssthen local $\; 823b39c5158Smillert print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0]; 824898184e3Ssthen } 825b39c5158Smillert close $fh; 826b39c5158Smillert return $to; 827b39c5158Smillert} 828b39c5158Smillert 829b39c5158Smillert### Net::FTP fetching 830b39c5158Smillertsub _netftp_fetch { 831b39c5158Smillert my $self = shift; 832b39c5158Smillert my %hash = @_; 833b39c5158Smillert 834b39c5158Smillert my ($to); 835b39c5158Smillert my $tmpl = { 836b39c5158Smillert to => { required => 1, store => \$to } 837b39c5158Smillert }; 838b39c5158Smillert check( $tmpl, \%hash ) or return; 839b39c5158Smillert 840b39c5158Smillert ### required modules ### 841b39c5158Smillert my $use_list = { 'Net::FTP' => 0 }; 842b39c5158Smillert 843*eac174f2Safresh1 ### Fix CVE-2016-1238 ### 844*eac174f2Safresh1 local $Module::Load::Conditional::FORCE_SAFE_INC = 1; 84591f110e0Safresh1 unless( can_load( modules => $use_list ) ) { 84691f110e0Safresh1 $METHOD_FAIL->{'netftp'} = 1; 84791f110e0Safresh1 return; 84891f110e0Safresh1 } 849b39c5158Smillert 850b39c5158Smillert ### make connection ### 851b39c5158Smillert my $ftp; 852b39c5158Smillert my @options = ($self->host); 853b39c5158Smillert push(@options, Timeout => $TIMEOUT) if $TIMEOUT; 854b39c5158Smillert unless( $ftp = Net::FTP->new( @options ) ) { 855b39c5158Smillert return $self->_error(loc("Ftp creation failed: %1",$@)); 856b39c5158Smillert } 857b39c5158Smillert 858b39c5158Smillert ### login ### 859b39c5158Smillert unless( $ftp->login( anonymous => $FROM_EMAIL ) ) { 860b39c5158Smillert return $self->_error(loc("Could not login to '%1'",$self->host)); 861b39c5158Smillert } 862b39c5158Smillert 863b39c5158Smillert ### set binary mode, just in case ### 864b39c5158Smillert $ftp->binary; 865b39c5158Smillert 866b39c5158Smillert ### create the remote path 867b39c5158Smillert ### remember remote paths are unix paths! [#11483] 868b39c5158Smillert my $remote = File::Spec::Unix->catfile( $self->path, $self->file ); 869b39c5158Smillert 870b39c5158Smillert ### fetch the file ### 871b39c5158Smillert my $target; 872b39c5158Smillert unless( $target = $ftp->get( $remote, $to ) ) { 873b39c5158Smillert return $self->_error(loc("Could not fetch '%1' from '%2'", 874b39c5158Smillert $remote, $self->host)); 875b39c5158Smillert } 876b39c5158Smillert 877b39c5158Smillert ### log out ### 878b39c5158Smillert $ftp->quit; 879b39c5158Smillert 880b39c5158Smillert return $target; 881b39c5158Smillert 882b39c5158Smillert} 883b39c5158Smillert 884b39c5158Smillert### /bin/wget fetch ### 885b39c5158Smillertsub _wget_fetch { 886b39c5158Smillert my $self = shift; 887b39c5158Smillert my %hash = @_; 888b39c5158Smillert 889b39c5158Smillert my ($to); 890b39c5158Smillert my $tmpl = { 891b39c5158Smillert to => { required => 1, store => \$to } 892b39c5158Smillert }; 893b39c5158Smillert check( $tmpl, \%hash ) or return; 894b39c5158Smillert 89591f110e0Safresh1 my $wget; 896b39c5158Smillert ### see if we have a wget binary ### 89791f110e0Safresh1 unless( $wget = can_run('wget') ) { 89891f110e0Safresh1 $METHOD_FAIL->{'wget'} = 1; 89991f110e0Safresh1 return; 90091f110e0Safresh1 } 901b39c5158Smillert 902b39c5158Smillert ### no verboseness, thanks ### 903b39c5158Smillert my $cmd = [ $wget, '--quiet' ]; 904b39c5158Smillert 905b39c5158Smillert ### if a timeout is set, add it ### 906b39c5158Smillert push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; 907b39c5158Smillert 908b39c5158Smillert ### run passive if specified ### 909*eac174f2Safresh1 push @$cmd, '--passive-ftp' if $self->scheme eq 'ftp' && $FTP_PASSIVE; 910b39c5158Smillert 911b39c5158Smillert ### set the output document, add the uri ### 912b39c5158Smillert push @$cmd, '--output-document', $to, $self->uri; 913b39c5158Smillert 914b39c5158Smillert ### with IPC::Cmd > 0.41, this is fixed in teh library, 915b39c5158Smillert ### and there's no need for special casing any more. 916b39c5158Smillert ### DO NOT quote things for IPC::Run, it breaks stuff. 917b39c5158Smillert # $IPC::Cmd::USE_IPC_RUN 918b39c5158Smillert # ? ($to, $self->uri) 919b39c5158Smillert # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 920b39c5158Smillert 921b39c5158Smillert ### shell out ### 922b39c5158Smillert my $captured; 923b39c5158Smillert unless(run( command => $cmd, 924b39c5158Smillert buffer => \$captured, 925b39c5158Smillert verbose => $DEBUG 926b39c5158Smillert )) { 927b39c5158Smillert ### wget creates the output document always, even if the fetch 928b39c5158Smillert ### fails.. so unlink it in that case 929b39c5158Smillert 1 while unlink $to; 930b39c5158Smillert 931b39c5158Smillert return $self->_error(loc( "Command failed: %1", $captured || '' )); 932b39c5158Smillert } 933b39c5158Smillert 934b39c5158Smillert return $to; 935b39c5158Smillert} 936b39c5158Smillert 937b39c5158Smillert### /bin/lftp fetch ### 938b39c5158Smillertsub _lftp_fetch { 939b39c5158Smillert my $self = shift; 940b39c5158Smillert my %hash = @_; 941b39c5158Smillert 942b39c5158Smillert my ($to); 943b39c5158Smillert my $tmpl = { 944b39c5158Smillert to => { required => 1, store => \$to } 945b39c5158Smillert }; 946b39c5158Smillert check( $tmpl, \%hash ) or return; 947b39c5158Smillert 94891f110e0Safresh1 ### see if we have a lftp binary ### 94991f110e0Safresh1 my $lftp; 95091f110e0Safresh1 unless( $lftp = can_run('lftp') ) { 95191f110e0Safresh1 $METHOD_FAIL->{'lftp'} = 1; 95291f110e0Safresh1 return; 95391f110e0Safresh1 } 954b39c5158Smillert 955b39c5158Smillert ### no verboseness, thanks ### 956b39c5158Smillert my $cmd = [ $lftp, '-f' ]; 957b39c5158Smillert 958b39c5158Smillert my $fh = File::Temp->new; 959b39c5158Smillert 960b39c5158Smillert my $str; 961b39c5158Smillert 962b39c5158Smillert ### if a timeout is set, add it ### 963b39c5158Smillert $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT; 964b39c5158Smillert 965b39c5158Smillert ### run passive if specified ### 966b39c5158Smillert $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE; 967b39c5158Smillert 968b39c5158Smillert ### set the output document, add the uri ### 969b39c5158Smillert ### quote the URI, because lftp supports certain shell 970b39c5158Smillert ### expansions, most notably & for backgrounding. 971b39c5158Smillert ### ' quote does nto work, must be " 972b39c5158Smillert $str .= q[get ']. $self->uri .q[' -o ]. $to . $/; 973b39c5158Smillert 974b39c5158Smillert if( $DEBUG ) { 975b39c5158Smillert my $pp_str = join ' ', split $/, $str; 976b39c5158Smillert print "# lftp command: $pp_str\n"; 977b39c5158Smillert } 978b39c5158Smillert 979b39c5158Smillert ### write straight to the file. 980b39c5158Smillert $fh->autoflush(1); 981b39c5158Smillert print $fh $str; 982b39c5158Smillert 983b39c5158Smillert ### the command needs to be 1 string to be executed 984b39c5158Smillert push @$cmd, $fh->filename; 985b39c5158Smillert 986b39c5158Smillert ### with IPC::Cmd > 0.41, this is fixed in teh library, 987b39c5158Smillert ### and there's no need for special casing any more. 988b39c5158Smillert ### DO NOT quote things for IPC::Run, it breaks stuff. 989b39c5158Smillert # $IPC::Cmd::USE_IPC_RUN 990b39c5158Smillert # ? ($to, $self->uri) 991b39c5158Smillert # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 992b39c5158Smillert 993b39c5158Smillert 994b39c5158Smillert ### shell out ### 995b39c5158Smillert my $captured; 996b39c5158Smillert unless(run( command => $cmd, 997b39c5158Smillert buffer => \$captured, 998b39c5158Smillert verbose => $DEBUG 999b39c5158Smillert )) { 1000b39c5158Smillert ### wget creates the output document always, even if the fetch 1001b39c5158Smillert ### fails.. so unlink it in that case 1002b39c5158Smillert 1 while unlink $to; 1003b39c5158Smillert 1004b39c5158Smillert return $self->_error(loc( "Command failed: %1", $captured || '' )); 1005b39c5158Smillert } 1006b39c5158Smillert 1007b39c5158Smillert return $to; 1008b39c5158Smillert} 1009b39c5158Smillert 1010b39c5158Smillert 1011b39c5158Smillert 1012b39c5158Smillert### /bin/ftp fetch ### 1013b39c5158Smillertsub _ftp_fetch { 1014b39c5158Smillert my $self = shift; 1015b39c5158Smillert my %hash = @_; 1016b39c5158Smillert 1017b39c5158Smillert my ($to); 1018b39c5158Smillert my $tmpl = { 1019b39c5158Smillert to => { required => 1, store => \$to } 1020b39c5158Smillert }; 1021b39c5158Smillert check( $tmpl, \%hash ) or return; 1022b39c5158Smillert 1023b39c5158Smillert ### see if we have a ftp binary ### 102491f110e0Safresh1 my $ftp; 102591f110e0Safresh1 unless( $ftp = can_run('ftp') ) { 102691f110e0Safresh1 $METHOD_FAIL->{'ftp'} = 1; 102791f110e0Safresh1 return; 102891f110e0Safresh1 } 1029b39c5158Smillert 1030b39c5158Smillert my $fh = FileHandle->new; 1031b39c5158Smillert 1032b39c5158Smillert local $SIG{CHLD} = 'IGNORE'; 1033b39c5158Smillert 103491f110e0Safresh1 unless ($fh->open("$ftp -n", '|-')) { 1035b39c5158Smillert return $self->_error(loc("%1 creation failed: %2", $ftp, $!)); 1036b39c5158Smillert } 1037b39c5158Smillert 1038b39c5158Smillert my @dialog = ( 1039b39c5158Smillert "lcd " . dirname($to), 1040b39c5158Smillert "open " . $self->host, 1041b39c5158Smillert "user anonymous $FROM_EMAIL", 1042b39c5158Smillert "cd /", 1043b39c5158Smillert "cd " . $self->path, 1044b39c5158Smillert "binary", 1045b39c5158Smillert "get " . $self->file . " " . $self->output_file, 1046b39c5158Smillert "quit", 1047b39c5158Smillert ); 1048b39c5158Smillert 1049b39c5158Smillert foreach (@dialog) { $fh->print($_, "\n") } 1050b39c5158Smillert $fh->close or return; 1051b39c5158Smillert 1052b39c5158Smillert return $to; 1053b39c5158Smillert} 1054b39c5158Smillert 1055b39c5158Smillert### lynx is stupid - it decompresses any .gz file it finds to be text 1056b39c5158Smillert### use /bin/lynx to fetch files 1057b39c5158Smillertsub _lynx_fetch { 1058b39c5158Smillert my $self = shift; 1059b39c5158Smillert my %hash = @_; 1060b39c5158Smillert 1061b39c5158Smillert my ($to); 1062b39c5158Smillert my $tmpl = { 1063b39c5158Smillert to => { required => 1, store => \$to } 1064b39c5158Smillert }; 1065b39c5158Smillert check( $tmpl, \%hash ) or return; 1066b39c5158Smillert 1067b39c5158Smillert ### see if we have a lynx binary ### 106891f110e0Safresh1 my $lynx; 106991f110e0Safresh1 unless ( $lynx = can_run('lynx') ){ 107091f110e0Safresh1 $METHOD_FAIL->{'lynx'} = 1; 107191f110e0Safresh1 return; 107291f110e0Safresh1 } 1073b39c5158Smillert 1074b39c5158Smillert unless( IPC::Cmd->can_capture_buffer ) { 1075b39c5158Smillert $METHOD_FAIL->{'lynx'} = 1; 1076b39c5158Smillert 1077b39c5158Smillert return $self->_error(loc( 1078b39c5158Smillert "Can not capture buffers. Can not use '%1' to fetch files", 1079b39c5158Smillert 'lynx' )); 1080b39c5158Smillert } 1081b39c5158Smillert 1082b39c5158Smillert ### check if the HTTP resource exists ### 1083b39c5158Smillert if ($self->uri =~ /^https?:\/\//i) { 1084b39c5158Smillert my $cmd = [ 1085b39c5158Smillert $lynx, 1086b39c5158Smillert '-head', 1087b39c5158Smillert '-source', 1088b39c5158Smillert "-auth=anonymous:$FROM_EMAIL", 1089b39c5158Smillert ]; 1090b39c5158Smillert 1091b39c5158Smillert push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; 1092b39c5158Smillert 1093b39c5158Smillert push @$cmd, $self->uri; 1094b39c5158Smillert 1095b39c5158Smillert ### shell out ### 1096b39c5158Smillert my $head; 1097b39c5158Smillert unless(run( command => $cmd, 1098b39c5158Smillert buffer => \$head, 1099b39c5158Smillert verbose => $DEBUG ) 1100b39c5158Smillert ) { 1101b39c5158Smillert return $self->_error(loc("Command failed: %1", $head || '')); 1102b39c5158Smillert } 1103b39c5158Smillert 1104b39c5158Smillert unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) { 1105b39c5158Smillert return $self->_error(loc("Command failed: %1", $head || '')); 1106b39c5158Smillert } 1107b39c5158Smillert } 1108b39c5158Smillert 1109b39c5158Smillert ### write to the output file ourselves, since lynx ass_u_mes to much 111091f110e0Safresh1 my $local = FileHandle->new( $to, 'w' ) 1111b39c5158Smillert or return $self->_error(loc( 1112b39c5158Smillert "Could not open '%1' for writing: %2",$to,$!)); 1113b39c5158Smillert 1114b39c5158Smillert ### dump to stdout ### 1115b39c5158Smillert my $cmd = [ 1116b39c5158Smillert $lynx, 1117b39c5158Smillert '-source', 1118b39c5158Smillert "-auth=anonymous:$FROM_EMAIL", 1119b39c5158Smillert ]; 1120b39c5158Smillert 1121b39c5158Smillert push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; 1122b39c5158Smillert 1123b39c5158Smillert ### DO NOT quote things for IPC::Run, it breaks stuff. 1124b39c5158Smillert push @$cmd, $self->uri; 1125b39c5158Smillert 1126b39c5158Smillert ### with IPC::Cmd > 0.41, this is fixed in teh library, 1127b39c5158Smillert ### and there's no need for special casing any more. 1128b39c5158Smillert ### DO NOT quote things for IPC::Run, it breaks stuff. 1129b39c5158Smillert # $IPC::Cmd::USE_IPC_RUN 1130b39c5158Smillert # ? $self->uri 1131b39c5158Smillert # : QUOTE. $self->uri .QUOTE; 1132b39c5158Smillert 1133b39c5158Smillert 1134b39c5158Smillert ### shell out ### 1135b39c5158Smillert my $captured; 1136b39c5158Smillert unless(run( command => $cmd, 1137b39c5158Smillert buffer => \$captured, 1138b39c5158Smillert verbose => $DEBUG ) 1139b39c5158Smillert ) { 1140b39c5158Smillert return $self->_error(loc("Command failed: %1", $captured || '')); 1141b39c5158Smillert } 1142b39c5158Smillert 1143b39c5158Smillert ### print to local file ### 1144b39c5158Smillert ### XXX on a 404 with a special error page, $captured will actually 1145b39c5158Smillert ### hold the contents of that page, and make it *appear* like the 1146b39c5158Smillert ### request was a success, when really it wasn't :( 1147b39c5158Smillert ### there doesn't seem to be an option for lynx to change the exit 1148b39c5158Smillert ### code based on a 4XX status or so. 1149b39c5158Smillert ### the closest we can come is using --error_file and parsing that, 1150b39c5158Smillert ### which is very unreliable ;( 1151b39c5158Smillert $local->print( $captured ); 1152b39c5158Smillert $local->close or return; 1153b39c5158Smillert 1154b39c5158Smillert return $to; 1155b39c5158Smillert} 1156b39c5158Smillert 1157b39c5158Smillert### use /bin/ncftp to fetch files 1158b39c5158Smillertsub _ncftp_fetch { 1159b39c5158Smillert my $self = shift; 1160b39c5158Smillert my %hash = @_; 1161b39c5158Smillert 1162b39c5158Smillert my ($to); 1163b39c5158Smillert my $tmpl = { 1164b39c5158Smillert to => { required => 1, store => \$to } 1165b39c5158Smillert }; 1166b39c5158Smillert check( $tmpl, \%hash ) or return; 1167b39c5158Smillert 1168898184e3Ssthen ### we can only set passive mode in interactive sessions, so bail out 1169b39c5158Smillert ### if $FTP_PASSIVE is set 1170b39c5158Smillert return if $FTP_PASSIVE; 1171b39c5158Smillert 1172b39c5158Smillert ### see if we have a ncftp binary ### 117391f110e0Safresh1 my $ncftp; 117491f110e0Safresh1 unless( $ncftp = can_run('ncftp') ) { 117591f110e0Safresh1 $METHOD_FAIL->{'ncftp'} = 1; 117691f110e0Safresh1 return; 117791f110e0Safresh1 } 1178b39c5158Smillert 1179b39c5158Smillert my $cmd = [ 1180b39c5158Smillert $ncftp, 1181b39c5158Smillert '-V', # do not be verbose 1182b39c5158Smillert '-p', $FROM_EMAIL, # email as password 1183b39c5158Smillert $self->host, # hostname 1184b39c5158Smillert dirname($to), # local dir for the file 1185b39c5158Smillert # remote path to the file 1186b39c5158Smillert ### DO NOT quote things for IPC::Run, it breaks stuff. 1187b39c5158Smillert $IPC::Cmd::USE_IPC_RUN 1188b39c5158Smillert ? File::Spec::Unix->catdir( $self->path, $self->file ) 1189b39c5158Smillert : QUOTE. File::Spec::Unix->catdir( 1190b39c5158Smillert $self->path, $self->file ) .QUOTE 1191b39c5158Smillert 1192b39c5158Smillert ]; 1193b39c5158Smillert 1194b39c5158Smillert ### shell out ### 1195b39c5158Smillert my $captured; 1196b39c5158Smillert unless(run( command => $cmd, 1197b39c5158Smillert buffer => \$captured, 1198b39c5158Smillert verbose => $DEBUG ) 1199b39c5158Smillert ) { 1200b39c5158Smillert return $self->_error(loc("Command failed: %1", $captured || '')); 1201b39c5158Smillert } 1202b39c5158Smillert 1203b39c5158Smillert return $to; 1204b39c5158Smillert 1205b39c5158Smillert} 1206b39c5158Smillert 1207b39c5158Smillert### use /bin/curl to fetch files 1208b39c5158Smillertsub _curl_fetch { 1209b39c5158Smillert my $self = shift; 1210b39c5158Smillert my %hash = @_; 1211b39c5158Smillert 1212b39c5158Smillert my ($to); 1213b39c5158Smillert my $tmpl = { 1214b39c5158Smillert to => { required => 1, store => \$to } 1215b39c5158Smillert }; 1216b39c5158Smillert check( $tmpl, \%hash ) or return; 121791f110e0Safresh1 my $curl; 121891f110e0Safresh1 unless ( $curl = can_run('curl') ) { 121991f110e0Safresh1 $METHOD_FAIL->{'curl'} = 1; 122091f110e0Safresh1 return; 122191f110e0Safresh1 } 1222b39c5158Smillert 1223b39c5158Smillert ### these long opts are self explanatory - I like that -jmb 1224b39c5158Smillert my $cmd = [ $curl, '-q' ]; 1225b39c5158Smillert 12266fb12b70Safresh1 push(@$cmd, '-4') if $^O eq 'netbsd' && $FORCEIPV4; # only seen this on NetBSD so far 12276fb12b70Safresh1 1228b39c5158Smillert push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT; 1229b39c5158Smillert 1230b39c5158Smillert push(@$cmd, '--silent') unless $DEBUG; 1231b39c5158Smillert 1232b39c5158Smillert ### curl does the right thing with passive, regardless ### 1233b39c5158Smillert if ($self->scheme eq 'ftp') { 1234b39c5158Smillert push(@$cmd, '--user', "anonymous:$FROM_EMAIL"); 1235b39c5158Smillert } 1236b39c5158Smillert 1237b39c5158Smillert ### curl doesn't follow 302 (temporarily moved) etc automatically 1238b39c5158Smillert ### so we add --location to enable that. 1239b39c5158Smillert push @$cmd, '--fail', '--location', '--output', $to, $self->uri; 1240b39c5158Smillert 1241b39c5158Smillert ### with IPC::Cmd > 0.41, this is fixed in teh library, 1242b39c5158Smillert ### and there's no need for special casing any more. 1243b39c5158Smillert ### DO NOT quote things for IPC::Run, it breaks stuff. 1244b39c5158Smillert # $IPC::Cmd::USE_IPC_RUN 1245b39c5158Smillert # ? ($to, $self->uri) 1246b39c5158Smillert # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 1247b39c5158Smillert 1248b39c5158Smillert 1249b39c5158Smillert my $captured; 1250b39c5158Smillert unless(run( command => $cmd, 1251b39c5158Smillert buffer => \$captured, 1252b39c5158Smillert verbose => $DEBUG ) 1253b39c5158Smillert ) { 1254b39c5158Smillert 1255b39c5158Smillert return $self->_error(loc("Command failed: %1", $captured || '')); 1256b39c5158Smillert } 1257b39c5158Smillert 1258b39c5158Smillert return $to; 1259b39c5158Smillert 1260b39c5158Smillert} 1261b39c5158Smillert 1262898184e3Ssthen### /usr/bin/fetch fetch! ### 1263898184e3Ssthensub _fetch_fetch { 1264898184e3Ssthen my $self = shift; 1265898184e3Ssthen my %hash = @_; 1266898184e3Ssthen 1267898184e3Ssthen my ($to); 1268898184e3Ssthen my $tmpl = { 1269898184e3Ssthen to => { required => 1, store => \$to } 1270898184e3Ssthen }; 1271898184e3Ssthen check( $tmpl, \%hash ) or return; 1272898184e3Ssthen 127391f110e0Safresh1 ### see if we have a fetch binary ### 127491f110e0Safresh1 my $fetch; 127591f110e0Safresh1 unless( HAS_FETCH and $fetch = can_run('fetch') ) { 127691f110e0Safresh1 $METHOD_FAIL->{'fetch'} = 1; 127791f110e0Safresh1 return; 127891f110e0Safresh1 } 1279898184e3Ssthen 1280898184e3Ssthen ### no verboseness, thanks ### 1281898184e3Ssthen my $cmd = [ $fetch, '-q' ]; 1282898184e3Ssthen 1283898184e3Ssthen ### if a timeout is set, add it ### 1284898184e3Ssthen push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT; 1285898184e3Ssthen 1286898184e3Ssthen ### run passive if specified ### 1287898184e3Ssthen #push @$cmd, '-p' if $FTP_PASSIVE; 1288898184e3Ssthen local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE; 1289898184e3Ssthen 1290898184e3Ssthen ### set the output document, add the uri ### 1291898184e3Ssthen push @$cmd, '-o', $to, $self->uri; 1292898184e3Ssthen 1293898184e3Ssthen ### with IPC::Cmd > 0.41, this is fixed in teh library, 1294898184e3Ssthen ### and there's no need for special casing any more. 1295898184e3Ssthen ### DO NOT quote things for IPC::Run, it breaks stuff. 1296898184e3Ssthen # $IPC::Cmd::USE_IPC_RUN 1297898184e3Ssthen # ? ($to, $self->uri) 1298898184e3Ssthen # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 1299898184e3Ssthen 1300898184e3Ssthen ### shell out ### 1301898184e3Ssthen my $captured; 1302898184e3Ssthen unless(run( command => $cmd, 1303898184e3Ssthen buffer => \$captured, 1304898184e3Ssthen verbose => $DEBUG 1305898184e3Ssthen )) { 1306898184e3Ssthen ### wget creates the output document always, even if the fetch 1307898184e3Ssthen ### fails.. so unlink it in that case 1308898184e3Ssthen 1 while unlink $to; 1309898184e3Ssthen 1310898184e3Ssthen return $self->_error(loc( "Command failed: %1", $captured || '' )); 1311898184e3Ssthen } 1312898184e3Ssthen 1313898184e3Ssthen return $to; 1314898184e3Ssthen} 1315b39c5158Smillert 1316b39c5158Smillert### use File::Copy for fetching file:// urls ### 1317b39c5158Smillert### 1318*eac174f2Safresh1### See section 3.10 of RFC 1738 (https://datatracker.ietf.org/doc/html/rfc1738#section-3.10) 1319b39c5158Smillert### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://) 1320b39c5158Smillert### 1321b39c5158Smillert 1322b39c5158Smillertsub _file_fetch { 1323b39c5158Smillert my $self = shift; 1324b39c5158Smillert my %hash = @_; 1325b39c5158Smillert 1326b39c5158Smillert my ($to); 1327b39c5158Smillert my $tmpl = { 1328b39c5158Smillert to => { required => 1, store => \$to } 1329b39c5158Smillert }; 1330b39c5158Smillert check( $tmpl, \%hash ) or return; 1331b39c5158Smillert 1332b39c5158Smillert 1333b39c5158Smillert 1334b39c5158Smillert ### prefix a / on unix systems with a file uri, since it would 1335b39c5158Smillert ### look somewhat like this: 1336b39c5158Smillert ### file:///home/kane/file 1337898184e3Ssthen ### whereas windows file uris for 'c:\some\dir\file' might look like: 1338b39c5158Smillert ### file:///C:/some/dir/file 1339b39c5158Smillert ### file:///C|/some/dir/file 1340b39c5158Smillert ### or for a network share '\\host\share\some\dir\file': 1341b39c5158Smillert ### file:////host/share/some/dir/file 1342b39c5158Smillert ### 1343b39c5158Smillert ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like: 1344b39c5158Smillert ### file://vms.host.edu/disk$user/my/notes/note12345.txt 1345b39c5158Smillert ### 1346b39c5158Smillert 1347b39c5158Smillert my $path = $self->path; 1348b39c5158Smillert my $vol = $self->vol; 1349b39c5158Smillert my $share = $self->share; 1350b39c5158Smillert 1351b39c5158Smillert my $remote; 1352b39c5158Smillert if (!$share and $self->host) { 1353b39c5158Smillert return $self->_error(loc( 1354b39c5158Smillert "Currently %1 cannot handle hosts in %2 urls", 1355b39c5158Smillert 'File::Fetch', 'file://' 1356b39c5158Smillert )); 1357b39c5158Smillert } 1358b39c5158Smillert 1359b39c5158Smillert if( $vol ) { 1360b39c5158Smillert $path = File::Spec->catdir( split /\//, $path ); 1361b39c5158Smillert $remote = File::Spec->catpath( $vol, $path, $self->file); 1362b39c5158Smillert 1363b39c5158Smillert } elsif( $share ) { 1364b39c5158Smillert ### win32 specific, and a share name, so we wont bother with File::Spec 1365b39c5158Smillert $path =~ s|/+|\\|g; 1366b39c5158Smillert $remote = "\\\\".$self->host."\\$share\\$path"; 1367b39c5158Smillert 1368b39c5158Smillert } else { 1369b39c5158Smillert ### File::Spec on VMS can not currently handle UNIX syntax. 1370b39c5158Smillert my $file_class = ON_VMS 1371b39c5158Smillert ? 'File::Spec::Unix' 1372b39c5158Smillert : 'File::Spec'; 1373b39c5158Smillert 1374b39c5158Smillert $remote = $file_class->catfile( $path, $self->file ); 1375b39c5158Smillert } 1376b39c5158Smillert 1377b39c5158Smillert ### File::Copy is littered with 'die' statements :( ### 1378b39c5158Smillert my $rv = eval { File::Copy::copy( $remote, $to ) }; 1379b39c5158Smillert 1380b39c5158Smillert ### something went wrong ### 1381b39c5158Smillert if( !$rv or $@ ) { 1382b39c5158Smillert return $self->_error(loc("Could not copy '%1' to '%2': %3 %4", 1383b39c5158Smillert $remote, $to, $!, $@)); 1384b39c5158Smillert } 1385b39c5158Smillert 1386b39c5158Smillert return $to; 1387b39c5158Smillert} 1388b39c5158Smillert 1389b39c5158Smillert### use /usr/bin/rsync to fetch files 1390b39c5158Smillertsub _rsync_fetch { 1391b39c5158Smillert my $self = shift; 1392b39c5158Smillert my %hash = @_; 1393b39c5158Smillert 1394b39c5158Smillert my ($to); 1395b39c5158Smillert my $tmpl = { 1396b39c5158Smillert to => { required => 1, store => \$to } 1397b39c5158Smillert }; 1398b39c5158Smillert check( $tmpl, \%hash ) or return; 139991f110e0Safresh1 my $rsync; 140091f110e0Safresh1 unless ( $rsync = can_run('rsync') ) { 140191f110e0Safresh1 $METHOD_FAIL->{'rsync'} = 1; 140291f110e0Safresh1 return; 140391f110e0Safresh1 } 1404b39c5158Smillert 1405b39c5158Smillert my $cmd = [ $rsync ]; 1406b39c5158Smillert 1407b39c5158Smillert ### XXX: rsync has no I/O timeouts at all, by default 1408b39c5158Smillert push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; 1409b39c5158Smillert 1410b39c5158Smillert push(@$cmd, '--quiet') unless $DEBUG; 1411b39c5158Smillert 1412b39c5158Smillert ### DO NOT quote things for IPC::Run, it breaks stuff. 1413b39c5158Smillert push @$cmd, $self->uri, $to; 1414b39c5158Smillert 1415b39c5158Smillert ### with IPC::Cmd > 0.41, this is fixed in teh library, 1416b39c5158Smillert ### and there's no need for special casing any more. 1417b39c5158Smillert ### DO NOT quote things for IPC::Run, it breaks stuff. 1418b39c5158Smillert # $IPC::Cmd::USE_IPC_RUN 1419b39c5158Smillert # ? ($to, $self->uri) 1420b39c5158Smillert # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 1421b39c5158Smillert 1422b39c5158Smillert my $captured; 1423b39c5158Smillert unless(run( command => $cmd, 1424b39c5158Smillert buffer => \$captured, 1425b39c5158Smillert verbose => $DEBUG ) 1426b39c5158Smillert ) { 1427b39c5158Smillert 1428b39c5158Smillert return $self->_error(loc("Command %1 failed: %2", 1429b39c5158Smillert "@$cmd" || '', $captured || '')); 1430b39c5158Smillert } 1431b39c5158Smillert 1432b39c5158Smillert return $to; 1433b39c5158Smillert 1434b39c5158Smillert} 1435b39c5158Smillert 14366fb12b70Safresh1### use git to fetch files 14376fb12b70Safresh1sub _git_fetch { 14386fb12b70Safresh1 my $self = shift; 14396fb12b70Safresh1 my %hash = @_; 14406fb12b70Safresh1 14416fb12b70Safresh1 my ($to); 14426fb12b70Safresh1 my $tmpl = { 14436fb12b70Safresh1 to => { required => 1, store => \$to } 14446fb12b70Safresh1 }; 14456fb12b70Safresh1 check( $tmpl, \%hash ) or return; 14466fb12b70Safresh1 my $git; 14476fb12b70Safresh1 unless ( $git = can_run('git') ) { 14486fb12b70Safresh1 $METHOD_FAIL->{'git'} = 1; 14496fb12b70Safresh1 return; 14506fb12b70Safresh1 } 14516fb12b70Safresh1 14526fb12b70Safresh1 my $cmd = [ $git, 'clone' ]; 14536fb12b70Safresh1 14546fb12b70Safresh1 #push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; 14556fb12b70Safresh1 14566fb12b70Safresh1 push(@$cmd, '--quiet') unless $DEBUG; 14576fb12b70Safresh1 14586fb12b70Safresh1 ### DO NOT quote things for IPC::Run, it breaks stuff. 14596fb12b70Safresh1 push @$cmd, $self->uri, $to; 14606fb12b70Safresh1 14616fb12b70Safresh1 ### with IPC::Cmd > 0.41, this is fixed in teh library, 14626fb12b70Safresh1 ### and there's no need for special casing any more. 14636fb12b70Safresh1 ### DO NOT quote things for IPC::Run, it breaks stuff. 14646fb12b70Safresh1 # $IPC::Cmd::USE_IPC_RUN 14656fb12b70Safresh1 # ? ($to, $self->uri) 14666fb12b70Safresh1 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 14676fb12b70Safresh1 14686fb12b70Safresh1 my $captured; 14696fb12b70Safresh1 unless(run( command => $cmd, 14706fb12b70Safresh1 buffer => \$captured, 14716fb12b70Safresh1 verbose => $DEBUG ) 14726fb12b70Safresh1 ) { 14736fb12b70Safresh1 14746fb12b70Safresh1 return $self->_error(loc("Command %1 failed: %2", 14756fb12b70Safresh1 "@$cmd" || '', $captured || '')); 14766fb12b70Safresh1 } 14776fb12b70Safresh1 14786fb12b70Safresh1 return $to; 14796fb12b70Safresh1 14806fb12b70Safresh1} 14816fb12b70Safresh1 1482b39c5158Smillert################################# 1483b39c5158Smillert# 1484b39c5158Smillert# Error code 1485b39c5158Smillert# 1486b39c5158Smillert################################# 1487b39c5158Smillert 1488b39c5158Smillert=pod 1489b39c5158Smillert 1490b39c5158Smillert=head2 $ff->error([BOOL]) 1491b39c5158Smillert 1492b39c5158SmillertReturns the last encountered error as string. 1493b39c5158SmillertPass it a true value to get the C<Carp::longmess()> output instead. 1494b39c5158Smillert 1495b39c5158Smillert=cut 1496b39c5158Smillert 1497b39c5158Smillert### error handling the way Archive::Extract does it 1498b39c5158Smillertsub _error { 1499b39c5158Smillert my $self = shift; 1500b39c5158Smillert my $error = shift; 1501b39c5158Smillert 1502b39c5158Smillert $self->_error_msg( $error ); 1503b39c5158Smillert $self->_error_msg_long( Carp::longmess($error) ); 1504b39c5158Smillert 1505b39c5158Smillert if( $WARN ) { 1506b39c5158Smillert carp $DEBUG ? $self->_error_msg_long : $self->_error_msg; 1507b39c5158Smillert } 1508b39c5158Smillert 1509b39c5158Smillert return; 1510b39c5158Smillert} 1511b39c5158Smillert 1512b39c5158Smillertsub error { 1513b39c5158Smillert my $self = shift; 1514b39c5158Smillert return shift() ? $self->_error_msg_long : $self->_error_msg; 1515b39c5158Smillert} 1516b39c5158Smillert 1517b39c5158Smillert 1518b39c5158Smillert1; 1519b39c5158Smillert 1520b39c5158Smillert=pod 1521b39c5158Smillert 1522b39c5158Smillert=head1 HOW IT WORKS 1523b39c5158Smillert 1524b39c5158SmillertFile::Fetch is able to fetch a variety of uris, by using several 1525b39c5158Smillertexternal programs and modules. 1526b39c5158Smillert 1527b39c5158SmillertBelow is a mapping of what utilities will be used in what order 1528b39c5158Smillertfor what schemes, if available: 1529b39c5158Smillert 1530b39c5158Smillert file => LWP, lftp, file 15319f11ffb7Safresh1 http => LWP, HTTP::Tiny, wget, curl, lftp, fetch, HTTP::Lite, lynx, iosock 1532898184e3Ssthen ftp => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp 1533b39c5158Smillert rsync => rsync 15346fb12b70Safresh1 git => git 1535b39c5158Smillert 1536b39c5158SmillertIf you'd like to disable the use of one or more of these utilities 1537b39c5158Smillertand/or modules, see the C<$BLACKLIST> variable further down. 1538b39c5158Smillert 1539b39c5158SmillertIf a utility or module isn't available, it will be marked in a cache 1540b39c5158Smillert(see the C<$METHOD_FAIL> variable further down), so it will not be 1541b39c5158Smillerttried again. The C<fetch> method will only fail when all options are 1542b39c5158Smillertexhausted, and it was not able to retrieve the file. 1543b39c5158Smillert 1544898184e3SsthenThe C<fetch> utility is available on FreeBSD. NetBSD and Dragonfly BSD 1545898184e3Ssthenmay also have it from C<pkgsrc>. We only check for C<fetch> on those 1546898184e3Ssthenthree platforms. 1547898184e3Ssthen 1548b39c5158SmillertC<iosock> is a very limited L<IO::Socket::INET> based mechanism for 1549b39c5158Smillertretrieving C<http> schemed urls. It doesn't follow redirects for instance. 1550b39c5158Smillert 15516fb12b70Safresh1C<git> only supports C<git://> style urls. 15526fb12b70Safresh1 1553b39c5158SmillertA special note about fetching files from an ftp uri: 1554b39c5158Smillert 1555b39c5158SmillertBy default, all ftp connections are done in passive mode. To change 1556b39c5158Smillertthat, see the C<$FTP_PASSIVE> variable further down. 1557b39c5158Smillert 1558b39c5158SmillertFurthermore, ftp uris only support anonymous connections, so no 1559b39c5158Smillertnamed user/password pair can be passed along. 1560b39c5158Smillert 1561b39c5158SmillertC</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable 1562b39c5158Smillertfurther down. 1563b39c5158Smillert 1564b39c5158Smillert=head1 GLOBAL VARIABLES 1565b39c5158Smillert 1566b39c5158SmillertThe behaviour of File::Fetch can be altered by changing the following 1567b39c5158Smillertglobal variables: 1568b39c5158Smillert 1569b39c5158Smillert=head2 $File::Fetch::FROM_EMAIL 1570b39c5158Smillert 1571b39c5158SmillertThis is the email address that will be sent as your anonymous ftp 1572b39c5158Smillertpassword. 1573b39c5158Smillert 1574b39c5158SmillertDefault is C<File-Fetch@example.com>. 1575b39c5158Smillert 1576b39c5158Smillert=head2 $File::Fetch::USER_AGENT 1577b39c5158Smillert 1578b39c5158SmillertThis is the useragent as C<LWP> will report it. 1579b39c5158Smillert 1580b39c5158SmillertDefault is C<File::Fetch/$VERSION>. 1581b39c5158Smillert 1582b39c5158Smillert=head2 $File::Fetch::FTP_PASSIVE 1583b39c5158Smillert 1584b39c5158SmillertThis variable controls whether the environment variable C<FTP_PASSIVE> 1585b39c5158Smillertand any passive switches to commandline tools will be set to true. 1586b39c5158Smillert 1587b39c5158SmillertDefault value is 1. 1588b39c5158Smillert 1589b39c5158SmillertNote: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch 1590b39c5158Smillertfiles, since passive mode can only be set interactively for this binary 1591b39c5158Smillert 1592b39c5158Smillert=head2 $File::Fetch::TIMEOUT 1593b39c5158Smillert 1594b39c5158SmillertWhen set, controls the network timeout (counted in seconds). 1595b39c5158Smillert 1596b39c5158SmillertDefault value is 0. 1597b39c5158Smillert 1598b39c5158Smillert=head2 $File::Fetch::WARN 1599b39c5158Smillert 1600b39c5158SmillertThis variable controls whether errors encountered internally by 1601b39c5158SmillertC<File::Fetch> should be C<carp>'d or not. 1602b39c5158Smillert 1603b39c5158SmillertSet to false to silence warnings. Inspect the output of the C<error()> 1604b39c5158Smillertmethod manually to see what went wrong. 1605b39c5158Smillert 1606b39c5158SmillertDefaults to C<true>. 1607b39c5158Smillert 1608b39c5158Smillert=head2 $File::Fetch::DEBUG 1609b39c5158Smillert 1610b39c5158SmillertThis enables debugging output when calling commandline utilities to 1611b39c5158Smillertfetch files. 1612b39c5158SmillertThis also enables C<Carp::longmess> errors, instead of the regular 1613b39c5158SmillertC<carp> errors. 1614b39c5158Smillert 1615b39c5158SmillertGood for tracking down why things don't work with your particular 1616b39c5158Smillertsetup. 1617b39c5158Smillert 1618b39c5158SmillertDefault is 0. 1619b39c5158Smillert 1620b39c5158Smillert=head2 $File::Fetch::BLACKLIST 1621b39c5158Smillert 1622b39c5158SmillertThis is an array ref holding blacklisted modules/utilities for fetching 1623b39c5158Smillertfiles with. 1624b39c5158Smillert 1625b39c5158SmillertTo disallow the use of, for example, C<LWP> and C<Net::FTP>, you could 1626b39c5158Smillertset $File::Fetch::BLACKLIST to: 1627b39c5158Smillert 1628b39c5158Smillert $File::Fetch::BLACKLIST = [qw|lwp netftp|] 1629b39c5158Smillert 1630b39c5158SmillertThe default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable. 1631b39c5158Smillert 1632b39c5158SmillertSee the note on C<MAPPING> below. 1633b39c5158Smillert 1634b39c5158Smillert=head2 $File::Fetch::METHOD_FAIL 1635b39c5158Smillert 1636b39c5158SmillertThis is a hashref registering what modules/utilities were known to fail 1637b39c5158Smillertfor fetching files (mostly because they weren't installed). 1638b39c5158Smillert 1639b39c5158SmillertYou can reset this cache by assigning an empty hashref to it, or 1640b39c5158Smillertindividually remove keys. 1641b39c5158Smillert 1642b39c5158SmillertSee the note on C<MAPPING> below. 1643b39c5158Smillert 1644b39c5158Smillert=head1 MAPPING 1645b39c5158Smillert 1646b39c5158Smillert 1647b39c5158SmillertHere's a quick mapping for the utilities/modules, and their names for 1648b39c5158Smillertthe $BLACKLIST, $METHOD_FAIL and other internal functions. 1649b39c5158Smillert 1650b39c5158Smillert LWP => lwp 1651898184e3Ssthen HTTP::Lite => httplite 1652898184e3Ssthen HTTP::Tiny => httptiny 1653b39c5158Smillert Net::FTP => netftp 1654b39c5158Smillert wget => wget 1655b39c5158Smillert lynx => lynx 1656b39c5158Smillert ncftp => ncftp 1657b39c5158Smillert ftp => ftp 1658b39c5158Smillert curl => curl 1659b39c5158Smillert rsync => rsync 1660b39c5158Smillert lftp => lftp 1661898184e3Ssthen fetch => fetch 1662b39c5158Smillert IO::Socket => iosock 1663b39c5158Smillert 1664b39c5158Smillert=head1 FREQUENTLY ASKED QUESTIONS 1665b39c5158Smillert 1666b39c5158Smillert=head2 So how do I use a proxy with File::Fetch? 1667b39c5158Smillert 1668b39c5158SmillertC<File::Fetch> currently only supports proxies with LWP::UserAgent. 1669b39c5158SmillertYou will need to set your environment variables accordingly. For 1670b39c5158Smillertexample, to use an ftp proxy: 1671b39c5158Smillert 1672b39c5158Smillert $ENV{ftp_proxy} = 'foo.com'; 1673b39c5158Smillert 1674b39c5158SmillertRefer to the LWP::UserAgent manpage for more details. 1675b39c5158Smillert 1676b39c5158Smillert=head2 I used 'lynx' to fetch a file, but its contents is all wrong! 1677b39c5158Smillert 1678b39c5158SmillertC<lynx> can only fetch remote files by dumping its contents to C<STDOUT>, 1679b39c5158Smillertwhich we in turn capture. If that content is a 'custom' error file 1680b39c5158Smillert(like, say, a C<404 handler>), you will get that contents instead. 1681b39c5158Smillert 1682b39c5158SmillertSadly, C<lynx> doesn't support any options to return a different exit 1683b39c5158Smillertcode on non-C<200 OK> status, giving us no way to tell the difference 1684898184e3Ssthenbetween a 'successful' fetch and a custom error page. 1685b39c5158Smillert 1686b39c5158SmillertTherefor, we recommend to only use C<lynx> as a last resort. This is 1687b39c5158Smillertwhy it is at the back of our list of methods to try as well. 1688b39c5158Smillert 1689b39c5158Smillert=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do? 1690b39c5158Smillert 1691b39c5158SmillertC<File::Fetch> is relatively smart about things. When trying to write 1692b39c5158Smillerta file to disk, it removes the C<query parameters> (see the 1693b39c5158SmillertC<output_file> method for details) from the file name before creating 1694b39c5158Smillertit. In most cases this suffices. 1695b39c5158Smillert 1696b39c5158SmillertIf you have any other characters you need to escape, please install 1697b39c5158Smillertthe C<URI::Escape> module from CPAN, and pre-encode your URI before 1698b39c5158Smillertpassing it to C<File::Fetch>. You can read about the details of URIs 1699b39c5158Smillertand URI encoding here: 1700b39c5158Smillert 1701*eac174f2Safresh1L<https://datatracker.ietf.org/doc/html/rfc2396> 1702b39c5158Smillert 1703b39c5158Smillert=head1 TODO 1704b39c5158Smillert 1705b39c5158Smillert=over 4 1706b39c5158Smillert 1707b39c5158Smillert=item Implement $PREFER_BIN 1708b39c5158Smillert 1709b39c5158SmillertTo indicate to rather use commandline tools than modules 1710b39c5158Smillert 1711b39c5158Smillert=back 1712b39c5158Smillert 1713b39c5158Smillert=head1 BUG REPORTS 1714b39c5158Smillert 1715b39c5158SmillertPlease report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>. 1716b39c5158Smillert 1717b39c5158Smillert=head1 AUTHOR 1718b39c5158Smillert 1719b39c5158SmillertThis module by Jos Boumans E<lt>kane@cpan.orgE<gt>. 1720b39c5158Smillert 1721b39c5158Smillert=head1 COPYRIGHT 1722b39c5158Smillert 1723b39c5158SmillertThis library is free software; you may redistribute and/or modify it 1724b39c5158Smillertunder the same terms as Perl itself. 1725b39c5158Smillert 1726b39c5158Smillert 1727b39c5158Smillert=cut 1728b39c5158Smillert 1729b39c5158Smillert# Local variables: 1730b39c5158Smillert# c-indentation-style: bsd 1731b39c5158Smillert# c-basic-offset: 4 1732b39c5158Smillert# indent-tabs-mode: nil 1733b39c5158Smillert# End: 1734b39c5158Smillert# vim: expandtab shiftwidth=4: 1735b39c5158Smillert 1736b39c5158Smillert 1737b39c5158Smillert 1738b39c5158Smillert 1739