1# vim: ts=4 sts=4 sw=4 et: 2package HTTP::Tiny; 3use strict; 4use warnings; 5# ABSTRACT: A small, simple, correct HTTP/1.1 client 6our $VERSION = '0.025'; # VERSION 7 8use Carp (); 9 10 11my @attributes; 12BEGIN { 13 @attributes = qw(agent default_headers local_address max_redirect max_size proxy timeout SSL_options verify_SSL); 14 no strict 'refs'; 15 for my $accessor ( @attributes ) { 16 *{$accessor} = sub { 17 @_ > 1 ? $_[0]->{$accessor} = $_[1] : $_[0]->{$accessor}; 18 }; 19 } 20} 21 22sub new { 23 my($class, %args) = @_; 24 25 (my $default_agent = $class) =~ s{::}{-}g; 26 $default_agent .= "/" . ($class->VERSION || 0); 27 28 my $self = { 29 agent => $default_agent, 30 max_redirect => 5, 31 timeout => 60, 32 verify_SSL => $args{verify_SSL} || $args{verify_ssl} || 0, # no verification by default 33 }; 34 35 $args{agent} .= $default_agent 36 if defined $args{agent} && $args{agent} =~ / $/; 37 38 for my $key ( @attributes ) { 39 $self->{$key} = $args{$key} if exists $args{$key} 40 } 41 42 # Never override proxy argument as this breaks backwards compat. 43 if (!exists $self->{proxy} && (my $http_proxy = $ENV{http_proxy})) { 44 if ($http_proxy =~ m{\Ahttp://[^/?#:@]+:\d+/?\z}) { 45 $self->{proxy} = $http_proxy; 46 } 47 else { 48 Carp::croak(qq{Environment 'http_proxy' must be in format http://<host>:<port>/\n}); 49 } 50 } 51 52 return bless $self, $class; 53} 54 55 56for my $sub_name ( qw/get head put post delete/ ) { 57 my $req_method = uc $sub_name; 58 no strict 'refs'; 59 eval <<"HERE"; ## no critic 60 sub $sub_name { 61 my (\$self, \$url, \$args) = \@_; 62 \@_ == 2 || (\@_ == 3 && ref \$args eq 'HASH') 63 or Carp::croak(q/Usage: \$http->$sub_name(URL, [HASHREF])/ . "\n"); 64 return \$self->request('$req_method', \$url, \$args || {}); 65 } 66HERE 67} 68 69 70sub post_form { 71 my ($self, $url, $data, $args) = @_; 72 (@_ == 3 || @_ == 4 && ref $args eq 'HASH') 73 or Carp::croak(q/Usage: $http->post_form(URL, DATAREF, [HASHREF])/ . "\n"); 74 75 my $headers = {}; 76 while ( my ($key, $value) = each %{$args->{headers} || {}} ) { 77 $headers->{lc $key} = $value; 78 } 79 delete $args->{headers}; 80 81 return $self->request('POST', $url, { 82 %$args, 83 content => $self->www_form_urlencode($data), 84 headers => { 85 %$headers, 86 'content-type' => 'application/x-www-form-urlencoded' 87 }, 88 } 89 ); 90} 91 92 93sub mirror { 94 my ($self, $url, $file, $args) = @_; 95 @_ == 3 || (@_ == 4 && ref $args eq 'HASH') 96 or Carp::croak(q/Usage: $http->mirror(URL, FILE, [HASHREF])/ . "\n"); 97 if ( -e $file and my $mtime = (stat($file))[9] ) { 98 $args->{headers}{'if-modified-since'} ||= $self->_http_date($mtime); 99 } 100 my $tempfile = $file . int(rand(2**31)); 101 open my $fh, ">", $tempfile 102 or Carp::croak(qq/Error: Could not open temporary file $tempfile for downloading: $!\n/); 103 binmode $fh; 104 $args->{data_callback} = sub { print {$fh} $_[0] }; 105 my $response = $self->request('GET', $url, $args); 106 close $fh 107 or Carp::croak(qq/Error: Could not close temporary file $tempfile: $!\n/); 108 if ( $response->{success} ) { 109 rename $tempfile, $file 110 or Carp::croak(qq/Error replacing $file with $tempfile: $!\n/); 111 my $lm = $response->{headers}{'last-modified'}; 112 if ( $lm and my $mtime = $self->_parse_http_date($lm) ) { 113 utime $mtime, $mtime, $file; 114 } 115 } 116 $response->{success} ||= $response->{status} eq '304'; 117 unlink $tempfile; 118 return $response; 119} 120 121 122my %idempotent = map { $_ => 1 } qw/GET HEAD PUT DELETE OPTIONS TRACE/; 123 124sub request { 125 my ($self, $method, $url, $args) = @_; 126 @_ == 3 || (@_ == 4 && ref $args eq 'HASH') 127 or Carp::croak(q/Usage: $http->request(METHOD, URL, [HASHREF])/ . "\n"); 128 $args ||= {}; # we keep some state in this during _request 129 130 # RFC 2616 Section 8.1.4 mandates a single retry on broken socket 131 my $response; 132 for ( 0 .. 1 ) { 133 $response = eval { $self->_request($method, $url, $args) }; 134 last unless $@ && $idempotent{$method} 135 && $@ =~ m{^(?:Socket closed|Unexpected end)}; 136 } 137 138 if (my $e = "$@") { 139 $response = { 140 url => $url, 141 success => q{}, 142 status => 599, 143 reason => 'Internal Exception', 144 content => $e, 145 headers => { 146 'content-type' => 'text/plain', 147 'content-length' => length $e, 148 } 149 }; 150 } 151 return $response; 152} 153 154 155sub www_form_urlencode { 156 my ($self, $data) = @_; 157 (@_ == 2 && ref $data) 158 or Carp::croak(q/Usage: $http->www_form_urlencode(DATAREF)/ . "\n"); 159 (ref $data eq 'HASH' || ref $data eq 'ARRAY') 160 or Carp::croak("form data must be a hash or array reference"); 161 162 my @params = ref $data eq 'HASH' ? %$data : @$data; 163 @params % 2 == 0 164 or Carp::croak("form data reference must have an even number of terms\n"); 165 166 my @terms; 167 while( @params ) { 168 my ($key, $value) = splice(@params, 0, 2); 169 if ( ref $value eq 'ARRAY' ) { 170 unshift @params, map { $key => $_ } @$value; 171 } 172 else { 173 push @terms, join("=", map { $self->_uri_escape($_) } $key, $value); 174 } 175 } 176 177 return join("&", sort @terms); 178} 179 180#--------------------------------------------------------------------------# 181# private methods 182#--------------------------------------------------------------------------# 183 184my %DefaultPort = ( 185 http => 80, 186 https => 443, 187); 188 189sub _request { 190 my ($self, $method, $url, $args) = @_; 191 192 my ($scheme, $host, $port, $path_query) = $self->_split_url($url); 193 194 my $request = { 195 method => $method, 196 scheme => $scheme, 197 host_port => ($port == $DefaultPort{$scheme} ? $host : "$host:$port"), 198 uri => $path_query, 199 headers => {}, 200 }; 201 202 my $handle = HTTP::Tiny::Handle->new( 203 timeout => $self->{timeout}, 204 SSL_options => $self->{SSL_options}, 205 verify_SSL => $self->{verify_SSL}, 206 local_address => $self->{local_address}, 207 ); 208 209 if ($self->{proxy}) { 210 $request->{uri} = "$scheme://$request->{host_port}$path_query"; 211 die(qq/HTTPS via proxy is not supported\n/) 212 if $request->{scheme} eq 'https'; 213 $handle->connect(($self->_split_url($self->{proxy}))[0..2]); 214 } 215 else { 216 $handle->connect($scheme, $host, $port); 217 } 218 219 $self->_prepare_headers_and_cb($request, $args); 220 $handle->write_request($request); 221 222 my $response; 223 do { $response = $handle->read_response_header } 224 until (substr($response->{status},0,1) ne '1'); 225 226 if ( my @redir_args = $self->_maybe_redirect($request, $response, $args) ) { 227 $handle->close; 228 return $self->_request(@redir_args, $args); 229 } 230 231 if ($method eq 'HEAD' || $response->{status} =~ /^[23]04/) { 232 # response has no message body 233 } 234 else { 235 my $data_cb = $self->_prepare_data_cb($response, $args); 236 $handle->read_body($data_cb, $response); 237 } 238 239 $handle->close; 240 $response->{success} = substr($response->{status},0,1) eq '2'; 241 $response->{url} = $url; 242 return $response; 243} 244 245sub _prepare_headers_and_cb { 246 my ($self, $request, $args) = @_; 247 248 for ($self->{default_headers}, $args->{headers}) { 249 next unless defined; 250 while (my ($k, $v) = each %$_) { 251 $request->{headers}{lc $k} = $v; 252 } 253 } 254 $request->{headers}{'host'} = $request->{host_port}; 255 $request->{headers}{'connection'} = "close"; 256 $request->{headers}{'user-agent'} ||= $self->{agent}; 257 258 if (defined $args->{content}) { 259 $request->{headers}{'content-type'} ||= "application/octet-stream"; 260 if (ref $args->{content} eq 'CODE') { 261 $request->{headers}{'transfer-encoding'} = 'chunked' 262 unless $request->{headers}{'content-length'} 263 || $request->{headers}{'transfer-encoding'}; 264 $request->{cb} = $args->{content}; 265 } 266 else { 267 my $content = $args->{content}; 268 if ( $] ge '5.008' ) { 269 utf8::downgrade($content, 1) 270 or die(qq/Wide character in request message body\n/); 271 } 272 $request->{headers}{'content-length'} = length $content 273 unless $request->{headers}{'content-length'} 274 || $request->{headers}{'transfer-encoding'}; 275 $request->{cb} = sub { substr $content, 0, length $content, '' }; 276 } 277 $request->{trailer_cb} = $args->{trailer_callback} 278 if ref $args->{trailer_callback} eq 'CODE'; 279 } 280 return; 281} 282 283sub _prepare_data_cb { 284 my ($self, $response, $args) = @_; 285 my $data_cb = $args->{data_callback}; 286 $response->{content} = ''; 287 288 if (!$data_cb || $response->{status} !~ /^2/) { 289 if (defined $self->{max_size}) { 290 $data_cb = sub { 291 $_[1]->{content} .= $_[0]; 292 die(qq/Size of response body exceeds the maximum allowed of $self->{max_size}\n/) 293 if length $_[1]->{content} > $self->{max_size}; 294 }; 295 } 296 else { 297 $data_cb = sub { $_[1]->{content} .= $_[0] }; 298 } 299 } 300 return $data_cb; 301} 302 303sub _maybe_redirect { 304 my ($self, $request, $response, $args) = @_; 305 my $headers = $response->{headers}; 306 my ($status, $method) = ($response->{status}, $request->{method}); 307 if (($status eq '303' or ($status =~ /^30[127]/ && $method =~ /^GET|HEAD$/)) 308 and $headers->{location} 309 and ++$args->{redirects} <= $self->{max_redirect} 310 ) { 311 my $location = ($headers->{location} =~ /^\//) 312 ? "$request->{scheme}://$request->{host_port}$headers->{location}" 313 : $headers->{location} ; 314 return (($status eq '303' ? 'GET' : $method), $location); 315 } 316 return; 317} 318 319sub _split_url { 320 my $url = pop; 321 322 # URI regex adapted from the URI module 323 my ($scheme, $authority, $path_query) = $url =~ m<\A([^:/?#]+)://([^/?#]*)([^#]*)> 324 or die(qq/Cannot parse URL: '$url'\n/); 325 326 $scheme = lc $scheme; 327 $path_query = "/$path_query" unless $path_query =~ m<\A/>; 328 329 my $host = (length($authority)) ? lc $authority : 'localhost'; 330 $host =~ s/\A[^@]*@//; # userinfo 331 my $port = do { 332 $host =~ s/:([0-9]*)\z// && length $1 333 ? $1 334 : ($scheme eq 'http' ? 80 : $scheme eq 'https' ? 443 : undef); 335 }; 336 337 return ($scheme, $host, $port, $path_query); 338} 339 340# Date conversions adapted from HTTP::Date 341my $DoW = "Sun|Mon|Tue|Wed|Thu|Fri|Sat"; 342my $MoY = "Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec"; 343sub _http_date { 344 my ($sec, $min, $hour, $mday, $mon, $year, $wday) = gmtime($_[1]); 345 return sprintf("%s, %02d %s %04d %02d:%02d:%02d GMT", 346 substr($DoW,$wday*4,3), 347 $mday, substr($MoY,$mon*4,3), $year+1900, 348 $hour, $min, $sec 349 ); 350} 351 352sub _parse_http_date { 353 my ($self, $str) = @_; 354 require Time::Local; 355 my @tl_parts; 356 if ($str =~ /^[SMTWF][a-z]+, +(\d{1,2}) ($MoY) +(\d\d\d\d) +(\d\d):(\d\d):(\d\d) +GMT$/) { 357 @tl_parts = ($6, $5, $4, $1, (index($MoY,$2)/4), $3); 358 } 359 elsif ($str =~ /^[SMTWF][a-z]+, +(\d\d)-($MoY)-(\d{2,4}) +(\d\d):(\d\d):(\d\d) +GMT$/ ) { 360 @tl_parts = ($6, $5, $4, $1, (index($MoY,$2)/4), $3); 361 } 362 elsif ($str =~ /^[SMTWF][a-z]+ +($MoY) +(\d{1,2}) +(\d\d):(\d\d):(\d\d) +(?:[^0-9]+ +)?(\d\d\d\d)$/ ) { 363 @tl_parts = ($5, $4, $3, $2, (index($MoY,$1)/4), $6); 364 } 365 return eval { 366 my $t = @tl_parts ? Time::Local::timegm(@tl_parts) : -1; 367 $t < 0 ? undef : $t; 368 }; 369} 370 371# URI escaping adapted from URI::Escape 372# c.f. http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1 373# perl 5.6 ready UTF-8 encoding adapted from JSON::PP 374my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255; 375$escapes{' '}="+"; 376my $unsafe_char = qr/[^A-Za-z0-9\-\._~]/; 377 378sub _uri_escape { 379 my ($self, $str) = @_; 380 if ( $] ge '5.008' ) { 381 utf8::encode($str); 382 } 383 else { 384 $str = pack("U*", unpack("C*", $str)) # UTF-8 encode a byte string 385 if ( length $str == do { use bytes; length $str } ); 386 $str = pack("C*", unpack("C*", $str)); # clear UTF-8 flag 387 } 388 $str =~ s/($unsafe_char)/$escapes{$1}/ge; 389 return $str; 390} 391 392package 393 HTTP::Tiny::Handle; # hide from PAUSE/indexers 394use strict; 395use warnings; 396 397use Errno qw[EINTR EPIPE]; 398use IO::Socket qw[SOCK_STREAM]; 399 400sub BUFSIZE () { 32768 } ## no critic 401 402my $Printable = sub { 403 local $_ = shift; 404 s/\r/\\r/g; 405 s/\n/\\n/g; 406 s/\t/\\t/g; 407 s/([^\x20-\x7E])/sprintf('\\x%.2X', ord($1))/ge; 408 $_; 409}; 410 411my $Token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7A\x7C\x7E]/; 412 413sub new { 414 my ($class, %args) = @_; 415 return bless { 416 rbuf => '', 417 timeout => 60, 418 max_line_size => 16384, 419 max_header_lines => 64, 420 verify_SSL => 0, 421 SSL_options => {}, 422 %args 423 }, $class; 424} 425 426sub connect { 427 @_ == 4 || die(q/Usage: $handle->connect(scheme, host, port)/ . "\n"); 428 my ($self, $scheme, $host, $port) = @_; 429 430 if ( $scheme eq 'https' ) { 431 die(qq/IO::Socket::SSL 1.56 must be installed for https support\n/) 432 unless eval {require IO::Socket::SSL; IO::Socket::SSL->VERSION(1.56)}; 433 die(qq/Net::SSLeay 1.49 must be installed for https support\n/) 434 unless eval {require Net::SSLeay; Net::SSLeay->VERSION(1.49)}; 435 } 436 elsif ( $scheme ne 'http' ) { 437 die(qq/Unsupported URL scheme '$scheme'\n/); 438 } 439 $self->{fh} = 'IO::Socket::INET'->new( 440 PeerHost => $host, 441 PeerPort => $port, 442 $self->{local_address} ? 443 ( LocalAddr => $self->{local_address} ) : (), 444 Proto => 'tcp', 445 Type => SOCK_STREAM, 446 Timeout => $self->{timeout} 447 ) or die(qq/Could not connect to '$host:$port': $@\n/); 448 449 binmode($self->{fh}) 450 or die(qq/Could not binmode() socket: '$!'\n/); 451 452 if ( $scheme eq 'https') { 453 my $ssl_args = $self->_ssl_args($host); 454 IO::Socket::SSL->start_SSL( 455 $self->{fh}, 456 %$ssl_args, 457 SSL_create_ctx_callback => sub { 458 my $ctx = shift; 459 Net::SSLeay::CTX_set_mode($ctx, Net::SSLeay::MODE_AUTO_RETRY()); 460 }, 461 ); 462 463 unless ( ref($self->{fh}) eq 'IO::Socket::SSL' ) { 464 my $ssl_err = IO::Socket::SSL->errstr; 465 die(qq/SSL connection failed for $host: $ssl_err\n/); 466 } 467 } 468 469 $self->{host} = $host; 470 $self->{port} = $port; 471 472 return $self; 473} 474 475sub close { 476 @_ == 1 || die(q/Usage: $handle->close()/ . "\n"); 477 my ($self) = @_; 478 CORE::close($self->{fh}) 479 or die(qq/Could not close socket: '$!'\n/); 480} 481 482sub write { 483 @_ == 2 || die(q/Usage: $handle->write(buf)/ . "\n"); 484 my ($self, $buf) = @_; 485 486 if ( $] ge '5.008' ) { 487 utf8::downgrade($buf, 1) 488 or die(qq/Wide character in write()\n/); 489 } 490 491 my $len = length $buf; 492 my $off = 0; 493 494 local $SIG{PIPE} = 'IGNORE'; 495 496 while () { 497 $self->can_write 498 or die(qq/Timed out while waiting for socket to become ready for writing\n/); 499 my $r = syswrite($self->{fh}, $buf, $len, $off); 500 if (defined $r) { 501 $len -= $r; 502 $off += $r; 503 last unless $len > 0; 504 } 505 elsif ($! == EPIPE) { 506 die(qq/Socket closed by remote server: $!\n/); 507 } 508 elsif ($! != EINTR) { 509 if ($self->{fh}->can('errstr')){ 510 my $err = $self->{fh}->errstr(); 511 die (qq/Could not write to SSL socket: '$err'\n /); 512 } 513 else { 514 die(qq/Could not write to socket: '$!'\n/); 515 } 516 517 } 518 } 519 return $off; 520} 521 522sub read { 523 @_ == 2 || @_ == 3 || die(q/Usage: $handle->read(len [, allow_partial])/ . "\n"); 524 my ($self, $len, $allow_partial) = @_; 525 526 my $buf = ''; 527 my $got = length $self->{rbuf}; 528 529 if ($got) { 530 my $take = ($got < $len) ? $got : $len; 531 $buf = substr($self->{rbuf}, 0, $take, ''); 532 $len -= $take; 533 } 534 535 while ($len > 0) { 536 $self->can_read 537 or die(q/Timed out while waiting for socket to become ready for reading/ . "\n"); 538 my $r = sysread($self->{fh}, $buf, $len, length $buf); 539 if (defined $r) { 540 last unless $r; 541 $len -= $r; 542 } 543 elsif ($! != EINTR) { 544 if ($self->{fh}->can('errstr')){ 545 my $err = $self->{fh}->errstr(); 546 die (qq/Could not read from SSL socket: '$err'\n /); 547 } 548 else { 549 die(qq/Could not read from socket: '$!'\n/); 550 } 551 } 552 } 553 if ($len && !$allow_partial) { 554 die(qq/Unexpected end of stream\n/); 555 } 556 return $buf; 557} 558 559sub readline { 560 @_ == 1 || die(q/Usage: $handle->readline()/ . "\n"); 561 my ($self) = @_; 562 563 while () { 564 if ($self->{rbuf} =~ s/\A ([^\x0D\x0A]* \x0D?\x0A)//x) { 565 return $1; 566 } 567 if (length $self->{rbuf} >= $self->{max_line_size}) { 568 die(qq/Line size exceeds the maximum allowed size of $self->{max_line_size}\n/); 569 } 570 $self->can_read 571 or die(qq/Timed out while waiting for socket to become ready for reading\n/); 572 my $r = sysread($self->{fh}, $self->{rbuf}, BUFSIZE, length $self->{rbuf}); 573 if (defined $r) { 574 last unless $r; 575 } 576 elsif ($! != EINTR) { 577 if ($self->{fh}->can('errstr')){ 578 my $err = $self->{fh}->errstr(); 579 die (qq/Could not read from SSL socket: '$err'\n /); 580 } 581 else { 582 die(qq/Could not read from socket: '$!'\n/); 583 } 584 } 585 } 586 die(qq/Unexpected end of stream while looking for line\n/); 587} 588 589sub read_header_lines { 590 @_ == 1 || @_ == 2 || die(q/Usage: $handle->read_header_lines([headers])/ . "\n"); 591 my ($self, $headers) = @_; 592 $headers ||= {}; 593 my $lines = 0; 594 my $val; 595 596 while () { 597 my $line = $self->readline; 598 599 if (++$lines >= $self->{max_header_lines}) { 600 die(qq/Header lines exceeds maximum number allowed of $self->{max_header_lines}\n/); 601 } 602 elsif ($line =~ /\A ([^\x00-\x1F\x7F:]+) : [\x09\x20]* ([^\x0D\x0A]*)/x) { 603 my ($field_name) = lc $1; 604 if (exists $headers->{$field_name}) { 605 for ($headers->{$field_name}) { 606 $_ = [$_] unless ref $_ eq "ARRAY"; 607 push @$_, $2; 608 $val = \$_->[-1]; 609 } 610 } 611 else { 612 $val = \($headers->{$field_name} = $2); 613 } 614 } 615 elsif ($line =~ /\A [\x09\x20]+ ([^\x0D\x0A]*)/x) { 616 $val 617 or die(qq/Unexpected header continuation line\n/); 618 next unless length $1; 619 $$val .= ' ' if length $$val; 620 $$val .= $1; 621 } 622 elsif ($line =~ /\A \x0D?\x0A \z/x) { 623 last; 624 } 625 else { 626 die(q/Malformed header line: / . $Printable->($line) . "\n"); 627 } 628 } 629 return $headers; 630} 631 632sub write_request { 633 @_ == 2 || die(q/Usage: $handle->write_request(request)/ . "\n"); 634 my($self, $request) = @_; 635 $self->write_request_header(@{$request}{qw/method uri headers/}); 636 $self->write_body($request) if $request->{cb}; 637 return; 638} 639 640my %HeaderCase = ( 641 'content-md5' => 'Content-MD5', 642 'etag' => 'ETag', 643 'te' => 'TE', 644 'www-authenticate' => 'WWW-Authenticate', 645 'x-xss-protection' => 'X-XSS-Protection', 646); 647 648sub write_header_lines { 649 (@_ == 2 && ref $_[1] eq 'HASH') || die(q/Usage: $handle->write_header_lines(headers)/ . "\n"); 650 my($self, $headers) = @_; 651 652 my $buf = ''; 653 while (my ($k, $v) = each %$headers) { 654 my $field_name = lc $k; 655 if (exists $HeaderCase{$field_name}) { 656 $field_name = $HeaderCase{$field_name}; 657 } 658 else { 659 $field_name =~ /\A $Token+ \z/xo 660 or die(q/Invalid HTTP header field name: / . $Printable->($field_name) . "\n"); 661 $field_name =~ s/\b(\w)/\u$1/g; 662 $HeaderCase{lc $field_name} = $field_name; 663 } 664 for (ref $v eq 'ARRAY' ? @$v : $v) { 665 /[^\x0D\x0A]/ 666 or die(qq/Invalid HTTP header field value ($field_name): / . $Printable->($_). "\n"); 667 $buf .= "$field_name: $_\x0D\x0A"; 668 } 669 } 670 $buf .= "\x0D\x0A"; 671 return $self->write($buf); 672} 673 674sub read_body { 675 @_ == 3 || die(q/Usage: $handle->read_body(callback, response)/ . "\n"); 676 my ($self, $cb, $response) = @_; 677 my $te = $response->{headers}{'transfer-encoding'} || ''; 678 if ( grep { /chunked/i } ( ref $te eq 'ARRAY' ? @$te : $te ) ) { 679 $self->read_chunked_body($cb, $response); 680 } 681 else { 682 $self->read_content_body($cb, $response); 683 } 684 return; 685} 686 687sub write_body { 688 @_ == 2 || die(q/Usage: $handle->write_body(request)/ . "\n"); 689 my ($self, $request) = @_; 690 if ($request->{headers}{'content-length'}) { 691 return $self->write_content_body($request); 692 } 693 else { 694 return $self->write_chunked_body($request); 695 } 696} 697 698sub read_content_body { 699 @_ == 3 || @_ == 4 || die(q/Usage: $handle->read_content_body(callback, response, [read_length])/ . "\n"); 700 my ($self, $cb, $response, $content_length) = @_; 701 $content_length ||= $response->{headers}{'content-length'}; 702 703 if ( $content_length ) { 704 my $len = $content_length; 705 while ($len > 0) { 706 my $read = ($len > BUFSIZE) ? BUFSIZE : $len; 707 $cb->($self->read($read, 0), $response); 708 $len -= $read; 709 } 710 } 711 else { 712 my $chunk; 713 $cb->($chunk, $response) while length( $chunk = $self->read(BUFSIZE, 1) ); 714 } 715 716 return; 717} 718 719sub write_content_body { 720 @_ == 2 || die(q/Usage: $handle->write_content_body(request)/ . "\n"); 721 my ($self, $request) = @_; 722 723 my ($len, $content_length) = (0, $request->{headers}{'content-length'}); 724 while () { 725 my $data = $request->{cb}->(); 726 727 defined $data && length $data 728 or last; 729 730 if ( $] ge '5.008' ) { 731 utf8::downgrade($data, 1) 732 or die(qq/Wide character in write_content()\n/); 733 } 734 735 $len += $self->write($data); 736 } 737 738 $len == $content_length 739 or die(qq/Content-Length missmatch (got: $len expected: $content_length)\n/); 740 741 return $len; 742} 743 744sub read_chunked_body { 745 @_ == 3 || die(q/Usage: $handle->read_chunked_body(callback, $response)/ . "\n"); 746 my ($self, $cb, $response) = @_; 747 748 while () { 749 my $head = $self->readline; 750 751 $head =~ /\A ([A-Fa-f0-9]+)/x 752 or die(q/Malformed chunk head: / . $Printable->($head) . "\n"); 753 754 my $len = hex($1) 755 or last; 756 757 $self->read_content_body($cb, $response, $len); 758 759 $self->read(2) eq "\x0D\x0A" 760 or die(qq/Malformed chunk: missing CRLF after chunk data\n/); 761 } 762 $self->read_header_lines($response->{headers}); 763 return; 764} 765 766sub write_chunked_body { 767 @_ == 2 || die(q/Usage: $handle->write_chunked_body(request)/ . "\n"); 768 my ($self, $request) = @_; 769 770 my $len = 0; 771 while () { 772 my $data = $request->{cb}->(); 773 774 defined $data && length $data 775 or last; 776 777 if ( $] ge '5.008' ) { 778 utf8::downgrade($data, 1) 779 or die(qq/Wide character in write_chunked_body()\n/); 780 } 781 782 $len += length $data; 783 784 my $chunk = sprintf '%X', length $data; 785 $chunk .= "\x0D\x0A"; 786 $chunk .= $data; 787 $chunk .= "\x0D\x0A"; 788 789 $self->write($chunk); 790 } 791 $self->write("0\x0D\x0A"); 792 $self->write_header_lines($request->{trailer_cb}->()) 793 if ref $request->{trailer_cb} eq 'CODE'; 794 return $len; 795} 796 797sub read_response_header { 798 @_ == 1 || die(q/Usage: $handle->read_response_header()/ . "\n"); 799 my ($self) = @_; 800 801 my $line = $self->readline; 802 803 $line =~ /\A (HTTP\/(0*\d+\.0*\d+)) [\x09\x20]+ ([0-9]{3}) [\x09\x20]+ ([^\x0D\x0A]*) \x0D?\x0A/x 804 or die(q/Malformed Status-Line: / . $Printable->($line). "\n"); 805 806 my ($protocol, $version, $status, $reason) = ($1, $2, $3, $4); 807 808 die (qq/Unsupported HTTP protocol: $protocol\n/) 809 unless $version =~ /0*1\.0*[01]/; 810 811 return { 812 status => $status, 813 reason => $reason, 814 headers => $self->read_header_lines, 815 protocol => $protocol, 816 }; 817} 818 819sub write_request_header { 820 @_ == 4 || die(q/Usage: $handle->write_request_header(method, request_uri, headers)/ . "\n"); 821 my ($self, $method, $request_uri, $headers) = @_; 822 823 return $self->write("$method $request_uri HTTP/1.1\x0D\x0A") 824 + $self->write_header_lines($headers); 825} 826 827sub _do_timeout { 828 my ($self, $type, $timeout) = @_; 829 $timeout = $self->{timeout} 830 unless defined $timeout && $timeout >= 0; 831 832 my $fd = fileno $self->{fh}; 833 defined $fd && $fd >= 0 834 or die(qq/select(2): 'Bad file descriptor'\n/); 835 836 my $initial = time; 837 my $pending = $timeout; 838 my $nfound; 839 840 vec(my $fdset = '', $fd, 1) = 1; 841 842 while () { 843 $nfound = ($type eq 'read') 844 ? select($fdset, undef, undef, $pending) 845 : select(undef, $fdset, undef, $pending) ; 846 if ($nfound == -1) { 847 $! == EINTR 848 or die(qq/select(2): '$!'\n/); 849 redo if !$timeout || ($pending = $timeout - (time - $initial)) > 0; 850 $nfound = 0; 851 } 852 last; 853 } 854 $! = 0; 855 return $nfound; 856} 857 858sub can_read { 859 @_ == 1 || @_ == 2 || die(q/Usage: $handle->can_read([timeout])/ . "\n"); 860 my $self = shift; 861 return $self->_do_timeout('read', @_) 862} 863 864sub can_write { 865 @_ == 1 || @_ == 2 || die(q/Usage: $handle->can_write([timeout])/ . "\n"); 866 my $self = shift; 867 return $self->_do_timeout('write', @_) 868} 869 870# Try to find a CA bundle to validate the SSL cert, 871# prefer Mozilla::CA or fallback to a system file 872sub _find_CA_file { 873 my $self = shift(); 874 875 return $self->{SSL_options}->{SSL_ca_file} 876 if $self->{SSL_options}->{SSL_ca_file} and -e $self->{SSL_options}->{SSL_ca_file}; 877 878 return Mozilla::CA::SSL_ca_file() 879 if eval { require Mozilla::CA }; 880 881 foreach my $ca_bundle (qw{ 882 /etc/ssl/certs/ca-certificates.crt 883 /etc/pki/tls/certs/ca-bundle.crt 884 /etc/ssl/ca-bundle.pem 885 } 886 ) { 887 return $ca_bundle if -e $ca_bundle; 888 } 889 890 die qq/Couldn't find a CA bundle with which to verify the SSL certificate.\n/ 891 . qq/Try installing Mozilla::CA from CPAN\n/; 892} 893 894sub _ssl_args { 895 my ($self, $host) = @_; 896 897 my %ssl_args = ( 898 SSL_hostname => $host, # SNI 899 ); 900 901 if ($self->{verify_SSL}) { 902 $ssl_args{SSL_verifycn_scheme} = 'http'; # enable CN validation 903 $ssl_args{SSL_verifycn_name} = $host; # set validation hostname 904 $ssl_args{SSL_verify_mode} = 0x01; # enable cert validation 905 $ssl_args{SSL_ca_file} = $self->_find_CA_file; 906 } 907 else { 908 $ssl_args{SSL_verifycn_scheme} = 'none'; # disable CN validation 909 $ssl_args{SSL_verify_mode} = 0x00; # disable cert validation 910 } 911 912 # user options override settings from verify_SSL 913 for my $k ( keys %{$self->{SSL_options}} ) { 914 $ssl_args{$k} = $self->{SSL_options}{$k} if $k =~ m/^SSL_/; 915 } 916 917 return \%ssl_args; 918} 919 9201; 921 922__END__ 923 924=pod 925 926=head1 NAME 927 928HTTP::Tiny - A small, simple, correct HTTP/1.1 client 929 930=head1 VERSION 931 932version 0.025 933 934=head1 SYNOPSIS 935 936 use HTTP::Tiny; 937 938 my $response = HTTP::Tiny->new->get('http://example.com/'); 939 940 die "Failed!\n" unless $response->{success}; 941 942 print "$response->{status} $response->{reason}\n"; 943 944 while (my ($k, $v) = each %{$response->{headers}}) { 945 for (ref $v eq 'ARRAY' ? @$v : $v) { 946 print "$k: $_\n"; 947 } 948 } 949 950 print $response->{content} if length $response->{content}; 951 952=head1 DESCRIPTION 953 954This is a very simple HTTP/1.1 client, designed for doing simple GET 955requests without the overhead of a large framework like L<LWP::UserAgent>. 956 957It is more correct and more complete than L<HTTP::Lite>. It supports 958proxies (currently only non-authenticating ones) and redirection. It 959also correctly resumes after EINTR. 960 961=head1 METHODS 962 963=head2 new 964 965 $http = HTTP::Tiny->new( %attributes ); 966 967This constructor returns a new HTTP::Tiny object. Valid attributes include: 968 969=over 4 970 971=item * 972 973C<agent> 974 975A user-agent string (defaults to 'HTTP-Tiny/$VERSION'). If C<agent> ends in a space character, the default user-agent string is appended. 976 977=item * 978 979C<default_headers> 980 981A hashref of default headers to apply to requests 982 983=item * 984 985C<local_address> 986 987The local IP address to bind to 988 989=item * 990 991C<max_redirect> 992 993Maximum number of redirects allowed (defaults to 5) 994 995=item * 996 997C<max_size> 998 999Maximum response size (only when not using a data callback). If defined, 1000responses larger than this will return an exception. 1001 1002=item * 1003 1004C<proxy> 1005 1006URL of a proxy server to use (default is C<$ENV{http_proxy}> if set) 1007 1008=item * 1009 1010C<timeout> 1011 1012Request timeout in seconds (default is 60) 1013 1014=item * 1015 1016C<verify_SSL> 1017 1018A boolean that indicates whether to validate the SSL certificate of an C<https> 1019connection (default is false) 1020 1021=item * 1022 1023C<SSL_options> 1024 1025A hashref of C<SSL_*> options to pass through to L<IO::Socket::SSL> 1026 1027=back 1028 1029Exceptions from C<max_size>, C<timeout> or other errors will result in a 1030pseudo-HTTP status code of 599 and a reason of "Internal Exception". The 1031content field in the response will contain the text of the exception. 1032 1033See L</SSL SUPPORT> for more on the C<verify_SSL> and C<SSL_options> attributes. 1034 1035=head2 get|head|put|post|delete 1036 1037 $response = $http->get($url); 1038 $response = $http->get($url, \%options); 1039 $response = $http->head($url); 1040 1041These methods are shorthand for calling C<request()> for the given method. The 1042URL must have unsafe characters escaped and international domain names encoded. 1043See C<request()> for valid options and a description of the response. 1044 1045The C<success> field of the response will be true if the status code is 2XX. 1046 1047=head2 post_form 1048 1049 $response = $http->post_form($url, $form_data); 1050 $response = $http->post_form($url, $form_data, \%options); 1051 1052This method executes a C<POST> request and sends the key/value pairs from a 1053form data hash or array reference to the given URL with a C<content-type> of 1054C<application/x-www-form-urlencoded>. See documentation for the 1055C<www_form_urlencode> method for details on the encoding. 1056 1057The URL must have unsafe characters escaped and international domain names 1058encoded. See C<request()> for valid options and a description of the response. 1059Any C<content-type> header or content in the options hashref will be ignored. 1060 1061The C<success> field of the response will be true if the status code is 2XX. 1062 1063=head2 mirror 1064 1065 $response = $http->mirror($url, $file, \%options) 1066 if ( $response->{success} ) { 1067 print "$file is up to date\n"; 1068 } 1069 1070Executes a C<GET> request for the URL and saves the response body to the file 1071name provided. The URL must have unsafe characters escaped and international 1072domain names encoded. If the file already exists, the request will includes an 1073C<If-Modified-Since> header with the modification timestamp of the file. You 1074may specify a different C<If-Modified-Since> header yourself in the C<< 1075$options->{headers} >> hash. 1076 1077The C<success> field of the response will be true if the status code is 2XX 1078or if the status code is 304 (unmodified). 1079 1080If the file was modified and the server response includes a properly 1081formatted C<Last-Modified> header, the file modification time will 1082be updated accordingly. 1083 1084=head2 request 1085 1086 $response = $http->request($method, $url); 1087 $response = $http->request($method, $url, \%options); 1088 1089Executes an HTTP request of the given method type ('GET', 'HEAD', 'POST', 1090'PUT', etc.) on the given URL. The URL must have unsafe characters escaped and 1091international domain names encoded. A hashref of options may be appended to 1092modify the request. 1093 1094Valid options are: 1095 1096=over 4 1097 1098=item * 1099 1100C<headers> 1101 1102A hashref containing headers to include with the request. If the value for 1103a header is an array reference, the header will be output multiple times with 1104each value in the array. These headers over-write any default headers. 1105 1106=item * 1107 1108C<content> 1109 1110A scalar to include as the body of the request OR a code reference 1111that will be called iteratively to produce the body of the request 1112 1113=item * 1114 1115C<trailer_callback> 1116 1117A code reference that will be called if it exists to provide a hashref 1118of trailing headers (only used with chunked transfer-encoding) 1119 1120=item * 1121 1122C<data_callback> 1123 1124A code reference that will be called for each chunks of the response 1125body received. 1126 1127=back 1128 1129If the C<content> option is a code reference, it will be called iteratively 1130to provide the content body of the request. It should return the empty 1131string or undef when the iterator is exhausted. 1132 1133If the C<data_callback> option is provided, it will be called iteratively until 1134the entire response body is received. The first argument will be a string 1135containing a chunk of the response body, the second argument will be the 1136in-progress response hash reference, as described below. (This allows 1137customizing the action of the callback based on the C<status> or C<headers> 1138received prior to the content body.) 1139 1140The C<request> method returns a hashref containing the response. The hashref 1141will have the following keys: 1142 1143=over 4 1144 1145=item * 1146 1147C<success> 1148 1149Boolean indicating whether the operation returned a 2XX status code 1150 1151=item * 1152 1153C<url> 1154 1155URL that provided the response. This is the URL of the request unless 1156there were redirections, in which case it is the last URL queried 1157in a redirection chain 1158 1159=item * 1160 1161C<status> 1162 1163The HTTP status code of the response 1164 1165=item * 1166 1167C<reason> 1168 1169The response phrase returned by the server 1170 1171=item * 1172 1173C<content> 1174 1175The body of the response. If the response does not have any content 1176or if a data callback is provided to consume the response body, 1177this will be the empty string 1178 1179=item * 1180 1181C<headers> 1182 1183A hashref of header fields. All header field names will be normalized 1184to be lower case. If a header is repeated, the value will be an arrayref; 1185it will otherwise be a scalar string containing the value 1186 1187=back 1188 1189On an exception during the execution of the request, the C<status> field will 1190contain 599, and the C<content> field will contain the text of the exception. 1191 1192=head2 www_form_urlencode 1193 1194 $params = $http->www_form_urlencode( $data ); 1195 $response = $http->get("http://example.com/query?$params"); 1196 1197This method converts the key/value pairs from a data hash or array reference 1198into a C<x-www-form-urlencoded> string. The keys and values from the data 1199reference will be UTF-8 encoded and escaped per RFC 3986. If a value is an 1200array reference, the key will be repeated with each of the values of the array 1201reference. The key/value pairs in the resulting string will be sorted by key 1202and value. 1203 1204=for Pod::Coverage agent 1205default_headers 1206local_address 1207max_redirect 1208max_size 1209proxy 1210timeout 1211verify_SSL 1212SSL_options 1213 1214=head1 SSL SUPPORT 1215 1216Direct C<https> connections are supported only if L<IO::Socket::SSL> 1.56 or 1217greater and L<Net::SSLeay> 1.49 or greater are installed. An exception will be 1218thrown if a new enough versions of these modules not installed or if the SSL 1219encryption fails. There is no support for C<https> connections via proxy (i.e. 1220RFC 2817). 1221 1222SSL provides two distinct capabilities: 1223 1224=over 4 1225 1226=item * 1227 1228Encrypted communication channel 1229 1230=item * 1231 1232Verification of server identity 1233 1234=back 1235 1236B<By default, HTTP::Tiny does not verify server identity>. 1237 1238Server identity verification is controversial and potentially tricky because it 1239depends on a (usually paid) third-party Certificate Authority (CA) trust model 1240to validate a certificate as legitimate. This discriminates against servers 1241with self-signed certificates or certificates signed by free, community-driven 1242CA's such as L<CAcert.org|http://cacert.org>. 1243 1244By default, HTTP::Tiny does not make any assumptions about your trust model, 1245threat level or risk tolerance. It just aims to give you an encrypted channel 1246when you need one. 1247 1248Setting the C<verify_SSL> attribute to a true value will make HTTP::Tiny verify 1249that an SSL connection has a valid SSL certificate corresponding to the host 1250name of the connection and that the SSL certificate has been verified by a CA. 1251Assuming you trust the CA, this will protect against a L<man-in-the-middle 1252attack|http://en.wikipedia.org/wiki/Man-in-the-middle_attack>. If you are 1253concerned about security, you should enable this option. 1254 1255Certificate verification requires a file containing trusted CA certificates. 1256If the L<Mozilla::CA> module is installed, HTTP::Tiny will use the CA file 1257included with it as a source of trusted CA's. (This means you trust Mozilla, 1258the author of Mozilla::CA, the CPAN mirror where you got Mozilla::CA, the 1259toolchain used to install it, and your operating system security, right?) 1260 1261If that module is not available, then HTTP::Tiny will search several 1262system-specific default locations for a CA certificate file: 1263 1264=over 4 1265 1266=item * 1267 1268/etc/ssl/certs/ca-certificates.crt 1269 1270=item * 1271 1272/etc/pki/tls/certs/ca-bundle.crt 1273 1274=item * 1275 1276/etc/ssl/ca-bundle.pem 1277 1278=back 1279 1280An exception will be raised if C<verify_SSL> is true and no CA certificate file 1281is available. 1282 1283If you desire complete control over SSL connections, the C<SSL_options> attribute 1284lets you provide a hash reference that will be passed through to 1285C<IO::Socket::SSL::start_SSL()>, overriding any options set by HTTP::Tiny. For 1286example, to provide your own trusted CA file: 1287 1288 SSL_options => { 1289 SSL_ca_file => $file_path, 1290 } 1291 1292The C<SSL_options> attribute could also be used for such things as providing a 1293client certificate for authentication to a server or controlling the choice of 1294cipher used for the SSL connection. See L<IO::Socket::SSL> documentation for 1295details. 1296 1297=head1 LIMITATIONS 1298 1299HTTP::Tiny is I<conditionally compliant> with the 1300L<HTTP/1.1 specification|http://www.w3.org/Protocols/rfc2616/rfc2616.html>. 1301It attempts to meet all "MUST" requirements of the specification, but does not 1302implement all "SHOULD" requirements. 1303 1304Some particular limitations of note include: 1305 1306=over 1307 1308=item * 1309 1310HTTP::Tiny focuses on correct transport. Users are responsible for ensuring 1311that user-defined headers and content are compliant with the HTTP/1.1 1312specification. 1313 1314=item * 1315 1316Users must ensure that URLs are properly escaped for unsafe characters and that 1317international domain names are properly encoded to ASCII. See L<URI::Escape>, 1318L<URI::_punycode> and L<Net::IDN::Encode>. 1319 1320=item * 1321 1322Redirection is very strict against the specification. Redirection is only 1323automatic for response codes 301, 302 and 307 if the request method is 'GET' or 1324'HEAD'. Response code 303 is always converted into a 'GET' redirection, as 1325mandated by the specification. There is no automatic support for status 305 1326("Use proxy") redirections. 1327 1328=item * 1329 1330Persistent connections are not supported. The C<Connection> header will 1331always be set to C<close>. 1332 1333=item * 1334 1335Cookies are not directly supported. Users that set a C<Cookie> header 1336should also set C<max_redirect> to zero to ensure cookies are not 1337inappropriately re-transmitted. 1338 1339=item * 1340 1341Only the C<http_proxy> environment variable is supported in the format 1342C<http://HOST:PORT/>. If a C<proxy> argument is passed to C<new> (including 1343undef), then the C<http_proxy> environment variable is ignored. 1344 1345=item * 1346 1347There is no provision for delaying a request body using an C<Expect> header. 1348Unexpected C<1XX> responses are silently ignored as per the specification. 1349 1350=item * 1351 1352Only 'chunked' C<Transfer-Encoding> is supported. 1353 1354=item * 1355 1356There is no support for a Request-URI of '*' for the 'OPTIONS' request. 1357 1358=item * 1359 1360There is no support for IPv6 of any kind. 1361 1362=back 1363 1364=head1 SEE ALSO 1365 1366=over 4 1367 1368=item * 1369 1370L<LWP::UserAgent> 1371 1372=item * 1373 1374L<IO::Socket::SSL> 1375 1376=item * 1377 1378L<Mozilla::CA> 1379 1380=item * 1381 1382L<Net::SSLeay> 1383 1384=back 1385 1386=for :stopwords cpan testmatrix url annocpan anno bugtracker rt cpants kwalitee diff irc mailto metadata placeholders metacpan 1387 1388=head1 SUPPORT 1389 1390=head2 Bugs / Feature Requests 1391 1392Please report any bugs or feature requests through the issue tracker 1393at L<https://rt.cpan.org/Public/Dist/Display.html?Name=HTTP-Tiny>. 1394You will be notified automatically of any progress on your issue. 1395 1396=head2 Source Code 1397 1398This is open source software. The code repository is available for 1399public review and contribution under the terms of the license. 1400 1401L<https://github.com/dagolden/http-tiny> 1402 1403 git clone git://github.com/dagolden/http-tiny.git 1404 1405=head1 AUTHORS 1406 1407=over 4 1408 1409=item * 1410 1411Christian Hansen <chansen@cpan.org> 1412 1413=item * 1414 1415David Golden <dagolden@cpan.org> 1416 1417=item * 1418 1419Mike Doherty <doherty@cpan.org> 1420 1421=back 1422 1423=head1 COPYRIGHT AND LICENSE 1424 1425This software is copyright (c) 2012 by Christian Hansen. 1426 1427This is free software; you can redistribute it and/or modify it under 1428the same terms as the Perl 5 programming language system itself. 1429 1430=cut 1431