16dac1782Sespie#! /usr/bin/perl 26dac1782Sespie# ex:ts=8 sw=4: 3*de2fb8e9Sespie# $OpenBSD: HTTP.pm,v 1.16 2023/07/03 17:01:59 espie Exp $ 46dac1782Sespie# 56dac1782Sespie# Copyright (c) 2011 Marc Espie <espie@openbsd.org> 66dac1782Sespie# 76dac1782Sespie# Permission to use, copy, modify, and distribute this software for any 86dac1782Sespie# purpose with or without fee is hereby granted, provided that the above 96dac1782Sespie# copyright notice and this permission notice appear in all copies. 106dac1782Sespie# 116dac1782Sespie# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 126dac1782Sespie# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 136dac1782Sespie# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 146dac1782Sespie# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 156dac1782Sespie# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 166dac1782Sespie# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 176dac1782Sespie# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 186dac1782Sespie 19*de2fb8e9Sespieuse v5.36; 206dac1782Sespie 21a3144174Skspillneruse OpenBSD::PackageRepository::Persistent; 2245c22c0aSespie 2345c22c0aSespiepackage OpenBSD::PackageRepository::HTTP1; 24a3144174Skspillnerour @ISA = qw(OpenBSD::PackageRepository::Persistent); 25039cbdaaSespiesub urlscheme($) 266e004614Sespie{ 276e004614Sespie return 'http'; 286e004614Sespie} 296e004614Sespie 30039cbdaaSespiesub initiate($self) 316e004614Sespie{ 326e004614Sespie my ($rdfh, $wrfh); 3345c22c0aSespie pipe($self->{getfh}, $wrfh) or die; 3445c22c0aSespie pipe($rdfh, $self->{cmdfh}) or die; 3545c22c0aSespie 3645c22c0aSespie my $old =select $self->{getfh}; 3745c22c0aSespie $| = 1; 3845c22c0aSespie select $self->{cmdfh}; 3945c22c0aSespie $| = 1; 4045c22c0aSespie select $rdfh; 4145c22c0aSespie $| = 1; 4245c22c0aSespie select $wrfh; 4345c22c0aSespie $| = 1; 4445c22c0aSespie select $old; 456e004614Sespie my $pid = fork(); 466e004614Sespie if ($pid == 0) { 476e004614Sespie close($self->{getfh}); 486e004614Sespie close($self->{cmdfh}); 4945c22c0aSespie# close(STDOUT); 5045c22c0aSespie# close(STDIN); 516e004614Sespie open(STDOUT, '>&', $wrfh); 526e004614Sespie open(STDIN, '<&', $rdfh); 536e004614Sespie _Proxy::main($self); 546e004614Sespie } else { 556e004614Sespie close($rdfh); 566e004614Sespie close($wrfh); 576e004614Sespie $self->{controller} = $pid; 586e004614Sespie } 596e004614Sespie} 606e004614Sespie 611eb63decSespiepackage _Proxy::Header; 621eb63decSespie 63039cbdaaSespiesub new($class) 641eb63decSespie{ 651eb63decSespie bless {}, $class; 661eb63decSespie} 671eb63decSespie 68039cbdaaSespiesub code($self) 691eb63decSespie{ 701eb63decSespie return $self->{code}; 711eb63decSespie} 721eb63decSespie 73a62c2f70Sespiepackage _Proxy::Connection; 74039cbdaaSespiesub new($class, $host, $port) 756e004614Sespie{ 766e004614Sespie require IO::Socket::INET; 776e004614Sespie my $o = IO::Socket::INET->new( 786e004614Sespie PeerHost => $host, 796e004614Sespie PeerPort => $port); 80d364d569Sespie my $old = select($o); 81d364d569Sespie $| = 1; 82d364d569Sespie select($old); 83a62c2f70Sespie bless {fh => $o, host => $host, buffer => ''}, $class; 846e004614Sespie} 856e004614Sespie 86039cbdaaSespiesub send_header($o, $document, %extra) 871eb63decSespie{ 881eb63decSespie my $crlf="\015\012"; 891eb63decSespie $o->print("GET $document HTTP/1.1", $crlf, 901eb63decSespie "Host: ", $o->{host}, $crlf); 911eb63decSespie if (defined $extra{range}) { 921eb63decSespie my ($a, $b) = @{$extra{range}}; 931eb63decSespie $o->print("Range: bytes=$a-$b", $crlf); 941eb63decSespie } 951eb63decSespie $o->print($crlf); 961eb63decSespie} 971eb63decSespie 98039cbdaaSespiesub get_header($o) 991eb63decSespie{ 100b62674ebSespie my $l = $o->getline; 101b62674ebSespie if ($l !~ m,^HTTP/1\.1\s+(\d\d\d),) { 1021eb63decSespie return undef; 1031eb63decSespie } 1041eb63decSespie my $h = _Proxy::Header->new; 1051eb63decSespie $h->{code} = $1; 106b62674ebSespie while ($l = $o->getline) { 107b62674ebSespie last if $l =~ m/^$/; 108b62674ebSespie if ($l =~ m/^([\w\-]+)\:\s*(.*)$/) { 1091eb63decSespie $h->{$1} = $2; 1101eb63decSespie } else { 111b62674ebSespie print STDERR "unknown line: $l\n"; 1121eb63decSespie } 1131eb63decSespie } 1141eb63decSespie if (defined $h->{'Content-Length'}) { 1151eb63decSespie $h->{length} = $h->{'Content-Length'} 1161eb63decSespie } elsif (defined $h->{'Transfer-Encoding'} && 1171eb63decSespie $h->{'Transfer-Encoding'} eq 'chunked') { 1181eb63decSespie $h->{chunked} = 1; 1191eb63decSespie } 1201eb63decSespie if (defined $h->{'Content-Range'} && 1211eb63decSespie $h->{'Content-Range'} =~ m/^bytes\s+(\d+)\-(\d+)\/(\d+)/) { 1221eb63decSespie ($h->{start}, $h->{end}, $h->{size}) = ($1, $2, $3); 1231eb63decSespie } 1241eb63decSespie $o->{header} = $h; 1251eb63decSespie return $h; 1261eb63decSespie} 1271eb63decSespie 128039cbdaaSespiesub getline($self) 1296e004614Sespie{ 1306e004614Sespie while (1) { 1316e004614Sespie if ($self->{buffer} =~ s/^(.*?)\015\012//) { 1326e004614Sespie return $1; 1336e004614Sespie } 1346e004614Sespie my $buffer; 1356e004614Sespie $self->{fh}->recv($buffer, 1024); 1366e004614Sespie $self->{buffer}.=$buffer; 1376e004614Sespie } 1386e004614Sespie} 1396e004614Sespie 140039cbdaaSespiesub retrieve($self, $sz) 1416e004614Sespie{ 1426e004614Sespie while(length($self->{buffer}) < $sz) { 1436e004614Sespie my $buffer; 1446e004614Sespie $self->{fh}->recv($buffer, $sz - length($self->{buffer})); 1456e004614Sespie $self->{buffer}.=$buffer; 1466e004614Sespie } 1476e004614Sespie my $result= substr($self->{buffer}, 0, $sz); 1486e004614Sespie $self->{buffer} = substr($self->{buffer}, $sz); 1496e004614Sespie return $result; 1506e004614Sespie} 1516e004614Sespie 152039cbdaaSespiesub retrieve_and_print($self, $sz, $fh) 15345c22c0aSespie{ 15445c22c0aSespie my $result = substr($self->{buffer}, 0, $sz); 15545c22c0aSespie print $fh $result; 15645c22c0aSespie my $retrieved = length($result); 15745c22c0aSespie if ($retrieved == $sz) { 15845c22c0aSespie $self->{buffer} = substr($self->{buffer}, $sz); 15945c22c0aSespie return; 16045c22c0aSespie } else { 16145c22c0aSespie $self->{buffer} = ''; 16245c22c0aSespie } 16345c22c0aSespie while ($retrieved < $sz) { 16445c22c0aSespie $self->{fh}->recv($result, $sz - $retrieved); 16545c22c0aSespie print $fh $result; 16645c22c0aSespie $retrieved += length($result); 16745c22c0aSespie } 16845c22c0aSespie} 16945c22c0aSespie 170039cbdaaSespiesub retrieve_chunked($self) 1712b091df0Sespie{ 1722b091df0Sespie my $result = ''; 1732b091df0Sespie while (1) { 1742b091df0Sespie my $sz = $self->getline; 1752b091df0Sespie if ($sz =~ m/^([0-9a-fA-F]+)/) { 1762b091df0Sespie my $realsize = hex($1); 1772b091df0Sespie last if $realsize == 0; 1782b091df0Sespie $result .= $self->retrieve($realsize); 1792b091df0Sespie } 1802b091df0Sespie } 1812b091df0Sespie return $result; 1822b091df0Sespie} 1832b091df0Sespie 184039cbdaaSespiesub retrieve_response($self, $h) 1852d9bd7abSespie{ 1861eb63decSespie if ($h->{chunked}) { 187dc0760faSespie return $self->retrieve_chunked; 188dc0760faSespie } 1891eb63decSespie if ($h->{length}) { 1901eb63decSespie return $self->retrieve($h->{length}); 1911eb63decSespie } 1922d9bd7abSespie return undef; 1932d9bd7abSespie} 1942d9bd7abSespie 195039cbdaaSespiesub retrieve_response_and_print($self, $h, $fh) 19645c22c0aSespie{ 19745c22c0aSespie if ($h->{chunked}) { 19845c22c0aSespie print $fh $self->retrieve_chunked; 19945c22c0aSespie } 20045c22c0aSespie if ($h->{length}) { 20145c22c0aSespie $self->retrieve_and_print($h->{length}, $fh); 20245c22c0aSespie } 20345c22c0aSespie} 20445c22c0aSespie 205039cbdaaSespiesub print($self, @l) 2066e004614Sespie{ 2071eb63decSespie# print STDERR "Before print\n"; 2081eb63decSespie if (!print {$self->{fh}} @l) { 2091eb63decSespie print STDERR "network print failed with $!\n"; 2101eb63decSespie } 2111eb63decSespie# print STDERR "After print\n"; 2126e004614Sespie} 2136e004614Sespie 2146e004614Sespiepackage _Proxy; 2156e004614Sespie 2166e004614Sespiemy $pid; 2176e004614Sespiemy $token = 0; 2186e004614Sespie 219039cbdaaSespiesub batch($code) 2206e004614Sespie{ 2216e004614Sespie if (defined $pid) { 2226e004614Sespie waitpid($pid, 0); 2236e004614Sespie undef $pid; 2246e004614Sespie } 2256e004614Sespie $token++; 2266e004614Sespie $pid = fork(); 2276e004614Sespie if (!defined $pid) { 2286e004614Sespie print "ERROR: fork failed: $!\n"; 2296e004614Sespie } 2306e004614Sespie if ($pid == 0) { 2316e004614Sespie &$code(); 2326e004614Sespie exit(0); 2336e004614Sespie } 2346e004614Sespie} 2356e004614Sespie 236039cbdaaSespiesub abort_batch() 2376e004614Sespie{ 2386e004614Sespie if (defined $pid) { 23945c22c0aSespie kill HUP => $pid; 2406e004614Sespie waitpid($pid, 0); 2416e004614Sespie undef $pid; 2426e004614Sespie } 2436e004614Sespie print "\nABORTED $token\n"; 2446e004614Sespie} 2456e004614Sespie 246039cbdaaSespiesub get_directory($o, $dname) 247a62c2f70Sespie{ 248d364d569Sespie local $SIG{'HUP'} = 'IGNORE'; 2491eb63decSespie $o->send_header("$dname/"); 2501eb63decSespie my $h = $o->get_header; 2511eb63decSespie if (!defined $h) { 2521eb63decSespie print "ERROR: can't decode header\n"; 2531eb63decSespie exit 1; 2541eb63decSespie } 255a62c2f70Sespie 2562d9bd7abSespie my $r = $o->retrieve_response($h); 2572d9bd7abSespie if (!defined $r) { 2582d9bd7abSespie print "ERROR: can't decode response\n"; 2592d9bd7abSespie } 2601eb63decSespie if ($h->code != 200) { 2611eb63decSespie print "ERROR: code was ", $h->code, "\n"; 262d364d569Sespie exit 1; 2632d9bd7abSespie } 2642b091df0Sespie print "SUCCESS: directory $dname\n"; 265d63625b9Ssthen for my $pkg ($r =~ m/\<A[^>]*\s+HREF=\"(.+?)\.tgz\"/gio) { 2662b091df0Sespie $pkg = $1 if $pkg =~ m|^.*/(.*)$|; 2672b091df0Sespie # decode uri-encoding; from URI::Escape 2682b091df0Sespie $pkg =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; 2692b091df0Sespie print $pkg, "\n"; 270a62c2f70Sespie } 2712b091df0Sespie print "\n"; 2722b091df0Sespie return; 2732b091df0Sespie} 2742b091df0Sespie 275d364d569Sespieuse File::Basename; 276d364d569Sespie 277039cbdaaSespiesub get_file($o, $fname) 27834912ae4Sespie{ 279d364d569Sespie my $bailout = 0; 280d364d569Sespie $SIG{'HUP'} = sub { 281d364d569Sespie $bailout++; 282d364d569Sespie }; 283dc0760faSespie my $first = 1; 284dc0760faSespie my $start = 0; 285d364d569Sespie my $end = 2000; 286dc0760faSespie my $total_size = 0; 287dc0760faSespie 288dc0760faSespie do { 289d364d569Sespie $end *= 2; 2901eb63decSespie $o->send_header($fname, range => [$start, $end-1]); 2911eb63decSespie my $h = $o->get_header; 2921eb63decSespie if (!defined $h) { 29334912ae4Sespie print "ERROR\n"; 294d364d569Sespie exit 1; 29534912ae4Sespie } 2961eb63decSespie if (defined $h->{size}) { 2971eb63decSespie $total_size = $h->{size}; 29834912ae4Sespie } 2991eb63decSespie if ($h->code != 200 && $h->code != 206) { 3001eb63decSespie print "ERROR: code was ", $h->code, "\n"; 3011eb63decSespie my $r = $o->retrieve_response($h); 3021eb63decSespie exit 1; 303dc0760faSespie } 304dc0760faSespie if ($first) { 305dc0760faSespie print "TRANSFER: $total_size\n"; 306dc0760faSespie $first = 0; 307dc0760faSespie } 30845c22c0aSespie $o->retrieve_response_and_print($h, \*STDOUT); 309dc0760faSespie $start = $end; 310d364d569Sespie if ($bailout) { 311d364d569Sespie exit 0; 312d364d569Sespie } 313dc0760faSespie } while ($end < $total_size); 31434912ae4Sespie} 315a62c2f70Sespie 316039cbdaaSespiesub main($self) 3176e004614Sespie{ 3186e004614Sespie my $o = _Proxy::Connection->new($self->{host}, "www"); 3196e004614Sespie while (<STDIN>) { 3206e004614Sespie chomp; 3216e004614Sespie if (m/^LIST\s+(.*)$/o) { 3226e004614Sespie my $dname = $1; 323039cbdaaSespie batch(sub() {get_directory($o, $dname);}); 3246e004614Sespie } elsif (m/^GET\s+(.*)$/o) { 3256e004614Sespie my $fname = $1; 326039cbdaaSespie batch(sub() { get_file($o, $fname);}); 3276e004614Sespie } elsif (m/^BYE$/o) { 3286e004614Sespie exit(0); 3296e004614Sespie } elsif (m/^ABORT$/o) { 3306e004614Sespie abort_batch(); 3316e004614Sespie } else { 3326e004614Sespie print "ERROR: Unknown command\n"; 3336e004614Sespie } 3346e004614Sespie } 3356e004614Sespie} 3366e004614Sespie 337a62c2f70Sespie1; 338