xref: /plan9-contrib/sys/src/cmd/hget.c (revision efc4291f9554edd4b2b0148fe3a05435b4507286)
1 #include <u.h>
2 #include <libc.h>
3 #include <ctype.h>
4 #include <bio.h>
5 #include <ip.h>
6 #include <libsec.h>
7 #include <auth.h>
8 
9 typedef struct URL URL;
10 struct URL
11 {
12 	int	method;
13 	char	*host;
14 	char	*port;
15 	char	*page;
16 	char	*etag;
17 	char	*redirect;
18 	char	*postbody;
19 	char	*cred;
20 	long	mtime;
21 };
22 
23 typedef struct Range Range;
24 struct Range
25 {
26 	long	start;	/* only 2 gig supported, tdb */
27 	long	end;
28 };
29 
30 typedef struct Out Out;
31 struct Out
32 {
33 	int fd;
34 	int offset;				/* notional current offset in output */
35 	int written;			/* number of bytes successfully transferred to output */
36 	DigestState *curr;		/* digest state up to offset (if known) */
37 	DigestState *hiwat;		/* digest state of all bytes written */
38 };
39 
40 enum
41 {
42 	Other,
43 	Http,
44 	Https,
45 	Ftp,
46 };
47 
48 enum
49 {
50 	Eof = 0,
51 	Error = -1,
52 	Server = -2,
53 	Changed = -3,
54 };
55 
56 int debug;
57 char *ofile;
58 
59 
60 int	doftp(URL*, URL*, Range*, Out*, long);
61 int	dohttp(URL*, URL*,  Range*, Out*, long);
62 int	crackurl(URL*, char*);
63 Range*	crackrange(char*);
64 int	getheader(int, char*, int);
65 int	httpheaders(int, int, URL*, Range*);
66 int	httprcode(int);
67 int	cistrncmp(char*, char*, int);
68 int	cistrcmp(char*, char*);
69 void	initibuf(void);
70 int	readline(int, char*, int);
71 int	readibuf(int, char*, int);
72 int	dfprint(int, char*, ...);
73 void	unreadline(char*);
74 int	output(Out*, char*, int);
75 void	setoffset(Out*, int);
76 
77 int	verbose;
78 char	*net;
79 char	tcpdir[NETPATHLEN];
80 int	headerprint;
81 
82 struct {
83 	char	*name;
84 	int	(*f)(URL*, URL*, Range*, Out*, long);
85 } method[] = {
86 	[Http]	{ "http",	dohttp },
87 	[Https]	{ "https",	dohttp },
88 	[Ftp]	{ "ftp",	doftp },
89 	[Other]	{ "_______",	nil },
90 };
91 
92 void
93 usage(void)
94 {
95 	fprint(2, "usage: %s [-dhv] [-o outfile] [-p body] [-x netmtpt] url\n", argv0);
96 	exits("usage");
97 }
98 
99 void
100 main(int argc, char **argv)
101 {
102 	URL u;
103 	Range r;
104 	int errs, n;
105 	ulong mtime;
106 	Dir *d;
107 	char postbody[4096], *p, *e, *t, *hpx;
108 	URL px; // Proxy
109 	Out out;
110 
111 	ofile = nil;
112 	p = postbody;
113 	e = p + sizeof(postbody);
114 	r.start = 0;
115 	r.end = -1;
116 	mtime = 0;
117 	memset(&u, 0, sizeof(u));
118 	memset(&px, 0, sizeof(px));
119 	hpx = getenv("httpproxy");
120 
121 	ARGBEGIN {
122 	case 'o':
123 		ofile = EARGF(usage());
124 		break;
125 	case 'd':
126 		debug = 1;
127 		break;
128 	case 'h':
129 		headerprint = 1;
130 		break;
131 	case 'v':
132 		verbose = 1;
133 		break;
134 	case 'x':
135 		net = EARGF(usage());
136 		break;
137 	case 'p':
138 		t = EARGF(usage());
139 		if(p != postbody)
140 			p = seprint(p, e, "&%s", t);
141 		else
142 			p = seprint(p, e, "%s", t);
143 		u.postbody = postbody;
144 
145 		break;
146 	default:
147 		usage();
148 	} ARGEND;
149 
150 	if(net != nil){
151 		if(strlen(net) > sizeof(tcpdir)-5)
152 			sysfatal("network mount point too long");
153 		snprint(tcpdir, sizeof(tcpdir), "%s/tcp", net);
154 	} else
155 		snprint(tcpdir, sizeof(tcpdir), "tcp");
156 
157 	if(argc != 1)
158 		usage();
159 
160 
161 	out.fd = 1;
162 	out.written = 0;
163 	out.offset = 0;
164 	out.curr = nil;
165 	out.hiwat = nil;
166 	if(ofile != nil){
167 		d = dirstat(ofile);
168 		if(d == nil){
169 			out.fd = create(ofile, OWRITE, 0664);
170 			if(out.fd < 0)
171 				sysfatal("creating %s: %r", ofile);
172 		} else {
173 			out.fd = open(ofile, OWRITE);
174 			if(out.fd < 0)
175 				sysfatal("can't open %s: %r", ofile);
176 			r.start = d->length;
177 			mtime = d->mtime;
178 			free(d);
179 		}
180 	}
181 
182 	errs = 0;
183 
184 	if(crackurl(&u, argv[0]) < 0)
185 		sysfatal("%r");
186 	if(hpx && crackurl(&px, hpx) < 0)
187 		sysfatal("%r");
188 
189 	for(;;){
190 		setoffset(&out, 0);
191 		/* transfer data */
192 		werrstr("");
193 		n = (*method[u.method].f)(&u, &px, &r, &out, mtime);
194 
195 		switch(n){
196 		case Eof:
197 			exits(0);
198 			break;
199 		case Error:
200 			if(errs++ < 10)
201 				continue;
202 			sysfatal("too many errors with no progress %r");
203 			break;
204 		case Server:
205 			sysfatal("server returned: %r");
206 			break;
207 		}
208 
209 		/* forward progress */
210 		errs = 0;
211 		r.start += n;
212 		if(r.start >= r.end)
213 			break;
214 	}
215 
216 	exits(0);
217 }
218 
219 int
220 crackurl(URL *u, char *s)
221 {
222 	char *p;
223 	int i;
224 
225 	if(u->page != nil){
226 		free(u->page);
227 		u->page = nil;
228 	}
229 
230 	/* get type */
231 	for(p = s; *p; p++){
232 		if(*p == '/'){
233 			p = s;
234 			if(u->method == Other){
235 				werrstr("missing method");
236 				return -1;
237 			}
238 			if(u->host == nil){
239 				werrstr("missing host");
240 				return -1;
241 			}
242 			u->page = strdup(p);
243 			return 0;
244 		}
245 		if(*p == ':' && *(p+1)=='/' && *(p+2)=='/'){
246 			*p = 0;
247 			p += 3;
248 			for(i = 0; i < nelem(method); i++){
249 				if(cistrcmp(s, method[i].name) == 0){
250 					u->method = i;
251 					break;
252 				}
253 			}
254 			break;
255 		}
256 	}
257 
258 	if(u->method == Other){
259 		werrstr("unsupported URL type %s", s);
260 		return -1;
261 	}
262 
263 	/* get system */
264 	free(u->host);
265 	s = p;
266 	p = strchr(s, '/');
267 	if(p == nil){
268 		u->host = strdup(s);
269 		u->page = strdup("/");
270 	} else {
271 		u->page = strdup(p);
272 		*p = 0;
273 		u->host = strdup(s);
274 		*p = '/';
275 	}
276 
277 	if(p = strchr(u->host, ':')) {
278 		*p++ = 0;
279 		u->port = p;
280 	} else
281 		u->port = method[u->method].name;
282 
283 	if(*(u->host) == 0){
284 		werrstr("bad url, null host");
285 		return -1;
286 	}
287 
288 	return 0;
289 }
290 
291 char *day[] = {
292 	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
293 };
294 
295 char *month[] = {
296 	"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
297 };
298 
299 struct
300 {
301 	int	fd;
302 	long	mtime;
303 } note;
304 
305 void
306 catch(void*, char*)
307 {
308 	Dir d;
309 
310 	nulldir(&d);
311 	d.mtime = note.mtime;
312 	if(dirfwstat(note.fd, &d) < 0)
313 		sysfatal("catch: can't dirfwstat: %r");
314 	noted(NDFLT);
315 }
316 
317 int
318 dohttp(URL *u, URL *px, Range *r, Out *out, long mtime)
319 {
320 	int fd, cfd;
321 	int redirect, auth, loop;
322 	int n, rv, code;
323 	long tot, vtime;
324 	Tm *tm;
325 	char buf[1024];
326 	char err[ERRMAX];
327 
328 
329 	/*  always move back to a previous 512 byte bound because some
330 	 *  servers can't seem to deal with requests that start at the
331 	 *  end of the file
332 	 */
333 	if(r->start)
334 		r->start = ((r->start-1)/512)*512;
335 
336 	/* loop for redirects, requires reading both response code and headers */
337 	fd = -1;
338 	for(loop = 0; loop < 32; loop++){
339 		if(px->host == nil){
340 			fd = dial(netmkaddr(u->host, tcpdir, u->port), 0, 0, 0);
341 		} else {
342 			fd = dial(netmkaddr(px->host, tcpdir, px->port), 0, 0, 0);
343 		}
344 		if(fd < 0)
345 			return Error;
346 
347 		if(u->method == Https){
348 			int tfd;
349 			TLSconn conn;
350 
351 			memset(&conn, 0, sizeof conn);
352 			tfd = tlsClient(fd, &conn);
353 			if(tfd < 0){
354 				fprint(2, "tlsClient: %r\n");
355 				close(fd);
356 				return Error;
357 			}
358 			/* BUG: check cert here? */
359 			if(conn.cert)
360 				free(conn.cert);
361 			close(fd);
362 			fd = tfd;
363 		}
364 
365 		/* write request, use range if not start of file */
366 		if(u->postbody == nil){
367 			if(px->host == nil){
368 				dfprint(fd,	"GET %s HTTP/1.0\r\n"
369 						"Host: %s\r\n"
370 						"User-agent: Plan9/hget\r\n"
371 						"Cache-Control: no-cache\r\n"
372 						"Pragma: no-cache\r\n",
373 						u->page, u->host);
374 			} else {
375 				dfprint(fd,	"GET http://%s%s HTTP/1.0\r\n"
376 						"Host: %s\r\n"
377 						"User-agent: Plan9/hget\r\n"
378 						"Cache-Control: no-cache\r\n"
379 						"Pragma: no-cache\r\n",
380 						u->host, u->page, u->host);
381 			}
382 			if(u->cred)
383 				dfprint(fd,	"Authorization: Basic %s\r\n",
384 						u->cred);
385 		} else {
386 			dfprint(fd,	"POST %s HTTP/1.0\r\n"
387 					"Host: %s\r\n"
388 					"Content-type: application/x-www-form-urlencoded\r\n"
389 					"Content-length: %d\r\n"
390 					"User-agent: Plan9/hget\r\n",
391 					u->page, u->host, strlen(u->postbody));
392 			if(u->cred)
393 				dfprint(fd, "Authorization: Basic %s\r\n", u->cred);
394 		}
395 		if(r->start != 0){
396 			dfprint(fd, "Range: bytes=%d-\n", r->start);
397 			if(u->etag != nil){
398 				dfprint(fd, "If-range: %s\n", u->etag);
399 			} else {
400 				tm = gmtime(mtime);
401 				dfprint(fd, "If-range: %s, %d %s %d %2d:%2.2d:%2.2d GMT\n",
402 					day[tm->wday], tm->mday, month[tm->mon],
403 					tm->year+1900, tm->hour, tm->min, tm->sec);
404 			}
405 		}
406 		if((cfd = open("/mnt/webcookies/http", ORDWR)) >= 0){
407 			if(fprint(cfd, "http://%s%s", u->host, u->page) > 0){
408 				while((n = read(cfd, buf, sizeof buf)) > 0){
409 					if(debug)
410 						write(2, buf, n);
411 					write(fd, buf, n);
412 				}
413 			}else{
414 				close(cfd);
415 				cfd = -1;
416 			}
417 		}
418 
419 		dfprint(fd, "\r\n", u->host);
420 		if(u->postbody)
421 			dfprint(fd,	"%s", u->postbody);
422 
423 		auth = 0;
424 		redirect = 0;
425 		initibuf();
426 		code = httprcode(fd);
427 		switch(code){
428 		case Error:	/* connection timed out */
429 		case Eof:
430 			close(fd);
431 			close(cfd);
432 			return code;
433 
434 		case 200:	/* OK */
435 		case 201:	/* Created */
436 		case 202:	/* Accepted */
437 			if(ofile == nil && r->start != 0)
438 				sysfatal("page changed underfoot");
439 			break;
440 
441 		case 204:	/* No Content */
442 			sysfatal("No Content");
443 
444 		case 206:	/* Partial Content */
445 			setoffset(out, r->start);
446 			break;
447 
448 		case 301:	/* Moved Permanently */
449 		case 302:	/* Moved Temporarily (actually Found) */
450 		case 303:	/* See Other */
451 		case 307:	/* Temporary Redirect (HTTP/1.1) */
452 			redirect = 1;
453 			u->postbody = nil;
454 			break;
455 
456 		case 304:	/* Not Modified */
457 			break;
458 
459 		case 400:	/* Bad Request */
460 			sysfatal("Bad Request");
461 
462 		case 401:	/* Unauthorized */
463 			if (auth)
464 				sysfatal("Authentication failed");
465 			auth = 1;
466 			break;
467 
468 		case 402:	/* ??? */
469 			sysfatal("Unauthorized");
470 
471 		case 403:	/* Forbidden */
472 			sysfatal("Forbidden by server");
473 
474 		case 404:	/* Not Found */
475 			sysfatal("Not found on server");
476 
477 		case 407:	/* Proxy Authentication */
478 			sysfatal("Proxy authentication required");
479 
480 		case 500:	/* Internal server error */
481 			sysfatal("Server choked");
482 
483 		case 501:	/* Not implemented */
484 			sysfatal("Server can't do it!");
485 
486 		case 502:	/* Bad gateway */
487 			sysfatal("Bad gateway");
488 
489 		case 503:	/* Service unavailable */
490 			sysfatal("Service unavailable");
491 
492 		default:
493 			sysfatal("Unknown response code %d", code);
494 		}
495 
496 		if(u->redirect != nil){
497 			free(u->redirect);
498 			u->redirect = nil;
499 		}
500 
501 		rv = httpheaders(fd, cfd, u, r);
502 		close(cfd);
503 		if(rv != 0){
504 			close(fd);
505 			return rv;
506 		}
507 
508 		if(!redirect && !auth)
509 			break;
510 
511 		if (redirect){
512 			if(u->redirect == nil)
513 				sysfatal("redirect: no URL");
514 			if(crackurl(u, u->redirect) < 0)
515 				sysfatal("redirect: %r");
516 		}
517 	}
518 
519 	/* transfer whatever you get */
520 	if(ofile != nil && u->mtime != 0){
521 		note.fd = out->fd;
522 		note.mtime = u->mtime;
523 		notify(catch);
524 	}
525 
526 	tot = 0;
527 	vtime = 0;
528 	for(;;){
529 		n = readibuf(fd, buf, sizeof(buf));
530 		if(n <= 0)
531 			break;
532 		if(output(out, buf, n) != n)
533 			break;
534 		tot += n;
535 		if(verbose && (vtime != time(0) || r->start == r->end)) {
536 			vtime = time(0);
537 			fprint(2, "%ld %ld\n", r->start+tot, r->end);
538 		}
539 	}
540 	notify(nil);
541 	close(fd);
542 
543 	if(ofile != nil && u->mtime != 0){
544 		Dir d;
545 
546 		rerrstr(err, sizeof err);
547 		nulldir(&d);
548 		d.mtime = u->mtime;
549 		if(dirfwstat(out->fd, &d) < 0)
550 			fprint(2, "couldn't set mtime: %r\n");
551 		errstr(err, sizeof err);
552 	}
553 
554 	return tot;
555 }
556 
557 /* get the http response code */
558 int
559 httprcode(int fd)
560 {
561 	int n;
562 	char *p;
563 	char buf[256];
564 
565 	n = readline(fd, buf, sizeof(buf)-1);
566 	if(n <= 0)
567 		return n;
568 	if(debug)
569 		fprint(2, "%d <- %s\n", fd, buf);
570 	p = strchr(buf, ' ');
571 	if(strncmp(buf, "HTTP/", 5) != 0 || p == nil){
572 		werrstr("bad response from server");
573 		return -1;
574 	}
575 	buf[n] = 0;
576 	return atoi(p+1);
577 }
578 
579 /* read in and crack the http headers, update u and r */
580 void	hhetag(char*, URL*, Range*);
581 void	hhmtime(char*, URL*, Range*);
582 void	hhclen(char*, URL*, Range*);
583 void	hhcrange(char*, URL*, Range*);
584 void	hhuri(char*, URL*, Range*);
585 void	hhlocation(char*, URL*, Range*);
586 void	hhauth(char*, URL*, Range*);
587 
588 struct {
589 	char *name;
590 	void (*f)(char*, URL*, Range*);
591 } headers[] = {
592 	{ "etag:", hhetag },
593 	{ "last-modified:", hhmtime },
594 	{ "content-length:", hhclen },
595 	{ "content-range:", hhcrange },
596 	{ "uri:", hhuri },
597 	{ "location:", hhlocation },
598 	{ "WWW-Authenticate:", hhauth },
599 };
600 int
601 httpheaders(int fd, int cfd, URL *u, Range *r)
602 {
603 	char buf[2048];
604 	char *p;
605 	int i, n;
606 
607 	for(;;){
608 		n = getheader(fd, buf, sizeof(buf));
609 		if(n <= 0)
610 			break;
611 		if(cfd >= 0)
612 			fprint(cfd, "%s\n", buf);
613 		for(i = 0; i < nelem(headers); i++){
614 			n = strlen(headers[i].name);
615 			if(cistrncmp(buf, headers[i].name, n) == 0){
616 				/* skip field name and leading white */
617 				p = buf + n;
618 				while(*p == ' ' || *p == '\t')
619 					p++;
620 
621 				(*headers[i].f)(p, u, r);
622 				break;
623 			}
624 		}
625 	}
626 	return n;
627 }
628 
629 /*
630  *  read a single mime header, collect continuations.
631  *
632  *  this routine assumes that there is a blank line twixt
633  *  the header and the message body, otherwise bytes will
634  *  be lost.
635  */
636 int
637 getheader(int fd, char *buf, int n)
638 {
639 	char *p, *e;
640 	int i;
641 
642 	n--;
643 	p = buf;
644 	for(e = p + n; ; p += i){
645 		i = readline(fd, p, e-p);
646 		if(i < 0)
647 			return i;
648 
649 		if(p == buf){
650 			/* first line */
651 			if(strchr(buf, ':') == nil)
652 				break;		/* end of headers */
653 		} else {
654 			/* continuation line */
655 			if(*p != ' ' && *p != '\t'){
656 				unreadline(p);
657 				*p = 0;
658 				break;		/* end of this header */
659 			}
660 		}
661 	}
662 	if(headerprint)
663 		print("%s\n", buf);
664 
665 	if(debug)
666 		fprint(2, "%d <- %s\n", fd, buf);
667 	return p-buf;
668 }
669 
670 void
671 hhetag(char *p, URL *u, Range*)
672 {
673 	if(u->etag != nil){
674 		if(strcmp(u->etag, p) != 0)
675 			sysfatal("file changed underfoot");
676 	} else
677 		u->etag = strdup(p);
678 }
679 
680 char*	monthchars = "janfebmaraprmayjunjulaugsepoctnovdec";
681 
682 void
683 hhmtime(char *p, URL *u, Range*)
684 {
685 	char *month, *day, *yr, *hms;
686 	char *fields[6];
687 	Tm tm, now;
688 	int i;
689 
690 	i = getfields(p, fields, 6, 1, " \t");
691 	if(i < 5)
692 		return;
693 
694 	day = fields[1];
695 	month = fields[2];
696 	yr = fields[3];
697 	hms = fields[4];
698 
699 	/* default time */
700 	now = *gmtime(time(0));
701 	tm = now;
702 	tm.yday = 0;
703 
704 	/* convert ascii month to a number twixt 1 and 12 */
705 	if(*month >= '0' && *month <= '9'){
706 		tm.mon = atoi(month) - 1;
707 		if(tm.mon < 0 || tm.mon > 11)
708 			tm.mon = 5;
709 	} else {
710 		for(p = month; *p; p++)
711 			*p = tolower(*p);
712 		for(i = 0; i < 12; i++)
713 			if(strncmp(&monthchars[i*3], month, 3) == 0){
714 				tm.mon = i;
715 				break;
716 			}
717 	}
718 
719 	tm.mday = atoi(day);
720 
721 	if(hms) {
722 		tm.hour = strtoul(hms, &p, 10);
723 		if(*p == ':') {
724 			p++;
725 			tm.min = strtoul(p, &p, 10);
726 			if(*p == ':') {
727 				p++;
728 				tm.sec = strtoul(p, &p, 10);
729 			}
730 		}
731 		if(tolower(*p) == 'p')
732 			tm.hour += 12;
733 	}
734 
735 	if(yr) {
736 		tm.year = atoi(yr);
737 		if(tm.year >= 1900)
738 			tm.year -= 1900;
739 	} else {
740 		if(tm.mon > now.mon || (tm.mon == now.mon && tm.mday > now.mday+1))
741 			tm.year--;
742 	}
743 
744 	strcpy(tm.zone, "GMT");
745 	/* convert to epoch seconds */
746 	u->mtime = tm2sec(&tm);
747 }
748 
749 void
750 hhclen(char *p, URL*, Range *r)
751 {
752 	r->end = atoi(p);
753 }
754 
755 void
756 hhcrange(char *p, URL*, Range *r)
757 {
758 	char *x;
759 	vlong l;
760 
761 	l = 0;
762 	x = strchr(p, '/');
763 	if(x)
764 		l = atoll(x+1);
765 	if(l == 0) {
766 		x = strchr(p, '-');
767 		if(x)
768 			l = atoll(x+1);
769 	}
770 	if(l)
771 		r->end = l;
772 }
773 
774 void
775 hhuri(char *p, URL *u, Range*)
776 {
777 	if(*p != '<')
778 		return;
779 	u->redirect = strdup(p+1);
780 	p = strchr(u->redirect, '>');
781 	if(p != nil)
782 		*p = 0;
783 }
784 
785 void
786 hhlocation(char *p, URL *u, Range*)
787 {
788 	u->redirect = strdup(p);
789 }
790 
791 void
792 hhauth(char *p, URL *u, Range*)
793 {
794 	char *f[4];
795 	UserPasswd *up;
796 	char *s, cred[64];
797 
798 	if (cistrncmp(p, "basic ", 6) != 0)
799 		sysfatal("only Basic authentication supported");
800 
801 	if (gettokens(p, f, nelem(f), "\"") < 2)
802 		sysfatal("garbled auth data");
803 
804 	if ((up = auth_getuserpasswd(auth_getkey, "proto=pass service=http server=%q realm=%q",
805 	    	u->host, f[1])) == nil)
806 			sysfatal("cannot authenticate");
807 
808 	s = smprint("%s:%s", up->user, up->passwd);
809 	if(enc64(cred, sizeof(cred), (uchar *)s, strlen(s)) == -1)
810 		sysfatal("enc64");
811   		free(s);
812 
813 	assert(u->cred = strdup(cred));
814 }
815 
816 enum
817 {
818 	/* ftp return codes */
819 	Extra=		1,
820 	Success=	2,
821 	Incomplete=	3,
822 	TempFail=	4,
823 	PermFail=	5,
824 
825 	Nnetdir=	64,	/* max length of network directory paths */
826 	Ndialstr=	64,		/* max length of dial strings */
827 };
828 
829 int ftpcmd(int, char*, ...);
830 int ftprcode(int, char*, int);
831 int hello(int);
832 int logon(int);
833 int xfertype(int, char*);
834 int passive(int, URL*);
835 int active(int, URL*);
836 int ftpxfer(int, Out*, Range*);
837 int terminateftp(int, int);
838 int getaddrport(char*, uchar*, uchar*);
839 int ftprestart(int, Out*, URL*, Range*, long);
840 
841 int
842 doftp(URL *u, URL *px, Range *r, Out *out, long mtime)
843 {
844 	int pid, ctl, data, rv;
845 	Waitmsg *w;
846 	char msg[64];
847 	char conndir[NETPATHLEN];
848 	char *p;
849 
850 	/* untested, proxy doesn't work with ftp (I think) */
851 	if(px->host == nil){
852 		ctl = dial(netmkaddr(u->host, tcpdir, u->port), 0, conndir, 0);
853 	} else {
854 		ctl = dial(netmkaddr(px->host, tcpdir, px->port), 0, conndir, 0);
855 	}
856 
857 	if(ctl < 0)
858 		return Error;
859 	if(net == nil){
860 		p = strrchr(conndir, '/');
861 		*p = 0;
862 		snprint(tcpdir, sizeof(tcpdir), conndir);
863 	}
864 
865 	initibuf();
866 
867 	rv = hello(ctl);
868 	if(rv < 0)
869 		return terminateftp(ctl, rv);
870 
871 	rv = logon(ctl);
872 	if(rv < 0)
873 		return terminateftp(ctl, rv);
874 
875 	rv = xfertype(ctl, "I");
876 	if(rv < 0)
877 		return terminateftp(ctl, rv);
878 
879 	/* if file is up to date and the right size, stop */
880 	if(ftprestart(ctl, out, u, r, mtime) > 0){
881 		close(ctl);
882 		return Eof;
883 	}
884 
885 	/* first try passive mode, then active */
886 	data = passive(ctl, u);
887 	if(data < 0){
888 		data = active(ctl, u);
889 		if(data < 0)
890 			return Error;
891 	}
892 
893 	/* fork */
894 	switch(pid = rfork(RFPROC|RFFDG|RFMEM)){
895 	case -1:
896 		close(data);
897 		return terminateftp(ctl, Error);
898 	case 0:
899 		ftpxfer(data, out, r);
900 		close(data);
901 		_exits(0);
902 	default:
903 		close(data);
904 		break;
905 	}
906 
907 	/* wait for reply message */
908 	rv = ftprcode(ctl, msg, sizeof(msg));
909 	close(ctl);
910 
911 	/* wait for process to terminate */
912 	w = nil;
913 	for(;;){
914 		free(w);
915 		w = wait();
916 		if(w == nil)
917 			return Error;
918 		if(w->pid == pid){
919 			if(w->msg[0] == 0){
920 				free(w);
921 				break;
922 			}
923 			werrstr("xfer: %s", w->msg);
924 			free(w);
925 			return Error;
926 		}
927 	}
928 
929 	switch(rv){
930 	case Success:
931 		return Eof;
932 	case TempFail:
933 		return Server;
934 	default:
935 		return Error;
936 	}
937 }
938 
939 int
940 ftpcmd(int ctl, char *fmt, ...)
941 {
942 	va_list arg;
943 	char buf[2*1024], *s;
944 
945 	va_start(arg, fmt);
946 	s = vseprint(buf, buf + (sizeof(buf)-4) / sizeof(*buf), fmt, arg);
947 	va_end(arg);
948 	if(debug)
949 		fprint(2, "%d -> %s\n", ctl, buf);
950 	*s++ = '\r';
951 	*s++ = '\n';
952 	if(write(ctl, buf, s - buf) != s - buf)
953 		return -1;
954 	return 0;
955 }
956 
957 int
958 ftprcode(int ctl, char *msg, int len)
959 {
960 	int rv;
961 	int i;
962 	char *p;
963 
964 	len--;	/* room for terminating null */
965 	for(;;){
966 		*msg = 0;
967 		i = readline(ctl, msg, len);
968 		if(i < 0)
969 			break;
970 		if(debug)
971 			fprint(2, "%d <- %s\n", ctl, msg);
972 
973 		/* stop if not a continuation */
974 		rv = strtol(msg, &p, 10);
975 		if(rv >= 100 && rv < 600 && p==msg+3 && *p == ' ')
976 			return rv/100;
977 	}
978 	*msg = 0;
979 
980 	return -1;
981 }
982 
983 int
984 hello(int ctl)
985 {
986 	char msg[1024];
987 
988 	/* wait for hello from other side */
989 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
990 		werrstr("HELLO: %s", msg);
991 		return Server;
992 	}
993 	return 0;
994 }
995 
996 int
997 getdec(char *p, int n)
998 {
999 	int x = 0;
1000 	int i;
1001 
1002 	for(i = 0; i < n; i++)
1003 		x = x*10 + (*p++ - '0');
1004 	return x;
1005 }
1006 
1007 int
1008 ftprestart(int ctl, Out *out, URL *u, Range *r, long mtime)
1009 {
1010 	Tm tm;
1011 	char msg[1024];
1012 	long x, rmtime;
1013 
1014 	ftpcmd(ctl, "MDTM %s", u->page);
1015 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1016 		r->start = 0;
1017 		return 0;		/* need to do something */
1018 	}
1019 
1020 	/* decode modification time */
1021 	if(strlen(msg) < 4 + 4 + 2 + 2 + 2 + 2 + 2){
1022 		r->start = 0;
1023 		return 0;		/* need to do something */
1024 	}
1025 	memset(&tm, 0, sizeof(tm));
1026 	tm.year = getdec(msg+4, 4) - 1900;
1027 	tm.mon = getdec(msg+4+4, 2) - 1;
1028 	tm.mday = getdec(msg+4+4+2, 2);
1029 	tm.hour = getdec(msg+4+4+2+2, 2);
1030 	tm.min = getdec(msg+4+4+2+2+2, 2);
1031 	tm.sec = getdec(msg+4+4+2+2+2+2, 2);
1032 	strcpy(tm.zone, "GMT");
1033 	rmtime = tm2sec(&tm);
1034 	if(rmtime > mtime)
1035 		r->start = 0;
1036 
1037 	/* get size */
1038 	ftpcmd(ctl, "SIZE %s", u->page);
1039 	if(ftprcode(ctl, msg, sizeof(msg)) == Success){
1040 		x = atol(msg+4);
1041 		if(r->start == x)
1042 			return 1;	/* we're up to date */
1043 		r->end = x;
1044 	}
1045 
1046 	/* seek to restart point */
1047 	if(r->start > 0){
1048 		ftpcmd(ctl, "REST %lud", r->start);
1049 		if(ftprcode(ctl, msg, sizeof(msg)) == Incomplete){
1050 			setoffset(out, r->start);
1051 		}else
1052 			r->start = 0;
1053 	}
1054 
1055 	return 0;	/* need to do something */
1056 }
1057 
1058 int
1059 logon(int ctl)
1060 {
1061 	char msg[1024];
1062 
1063 	/* login anonymous */
1064 	ftpcmd(ctl, "USER anonymous");
1065 	switch(ftprcode(ctl, msg, sizeof(msg))){
1066 	case Success:
1067 		return 0;
1068 	case Incomplete:
1069 		break;	/* need password */
1070 	default:
1071 		werrstr("USER: %s", msg);
1072 		return Server;
1073 	}
1074 
1075 	/* send user id as password */
1076 	sprint(msg, "%s@closedmind.org", getuser());
1077 	ftpcmd(ctl, "PASS %s", msg);
1078 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1079 		werrstr("PASS: %s", msg);
1080 		return Server;
1081 	}
1082 
1083 	return 0;
1084 }
1085 
1086 int
1087 xfertype(int ctl, char *t)
1088 {
1089 	char msg[1024];
1090 
1091 	ftpcmd(ctl, "TYPE %s", t);
1092 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1093 		werrstr("TYPE %s: %s", t, msg);
1094 		return Server;
1095 	}
1096 
1097 	return 0;
1098 }
1099 
1100 int
1101 passive(int ctl, URL *u)
1102 {
1103 	char msg[1024];
1104 	char ipaddr[32];
1105 	char *f[6];
1106 	char *p;
1107 	int fd;
1108 	int port;
1109 	char aport[12];
1110 
1111 	ftpcmd(ctl, "PASV");
1112 	if(ftprcode(ctl, msg, sizeof(msg)) != Success)
1113 		return Error;
1114 
1115 	/* get address and port number from reply, this is AI */
1116 	p = strchr(msg, '(');
1117 	if(p == nil){
1118 		for(p = msg+3; *p; p++)
1119 			if(isdigit(*p))
1120 				break;
1121 	} else
1122 		p++;
1123 	if(getfields(p, f, 6, 0, ",)") < 6){
1124 		werrstr("ftp protocol botch");
1125 		return Server;
1126 	}
1127 	snprint(ipaddr, sizeof(ipaddr), "%s.%s.%s.%s",
1128 		f[0], f[1], f[2], f[3]);
1129 	port = ((atoi(f[4])&0xff)<<8) + (atoi(f[5])&0xff);
1130 	sprint(aport, "%d", port);
1131 
1132 	/* open data connection */
1133 	fd = dial(netmkaddr(ipaddr, tcpdir, aport), 0, 0, 0);
1134 	if(fd < 0){
1135 		werrstr("passive mode failed: %r");
1136 		return Error;
1137 	}
1138 
1139 	/* tell remote to send a file */
1140 	ftpcmd(ctl, "RETR %s", u->page);
1141 	if(ftprcode(ctl, msg, sizeof(msg)) != Extra){
1142 		werrstr("RETR %s: %s", u->page, msg);
1143 		return Error;
1144 	}
1145 	return fd;
1146 }
1147 
1148 int
1149 active(int ctl, URL *u)
1150 {
1151 	char msg[1024];
1152 	char dir[40], ldir[40];
1153 	uchar ipaddr[4];
1154 	uchar port[2];
1155 	int lcfd, dfd, afd;
1156 
1157 	/* announce a port for the call back */
1158 	snprint(msg, sizeof(msg), "%s!*!0", tcpdir);
1159 	afd = announce(msg, dir);
1160 	if(afd < 0)
1161 		return Error;
1162 
1163 	/* get a local address/port of the annoucement */
1164 	if(getaddrport(dir, ipaddr, port) < 0){
1165 		close(afd);
1166 		return Error;
1167 	}
1168 
1169 	/* tell remote side address and port*/
1170 	ftpcmd(ctl, "PORT %d,%d,%d,%d,%d,%d", ipaddr[0], ipaddr[1], ipaddr[2],
1171 		ipaddr[3], port[0], port[1]);
1172 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1173 		close(afd);
1174 		werrstr("active: %s", msg);
1175 		return Error;
1176 	}
1177 
1178 	/* tell remote to send a file */
1179 	ftpcmd(ctl, "RETR %s", u->page);
1180 	if(ftprcode(ctl, msg, sizeof(msg)) != Extra){
1181 		close(afd);
1182 		werrstr("RETR: %s", msg);
1183 		return Server;
1184 	}
1185 
1186 	/* wait for a connection */
1187 	lcfd = listen(dir, ldir);
1188 	if(lcfd < 0){
1189 		close(afd);
1190 		return Error;
1191 	}
1192 	dfd = accept(lcfd, ldir);
1193 	if(dfd < 0){
1194 		close(afd);
1195 		close(lcfd);
1196 		return Error;
1197 	}
1198 	close(afd);
1199 	close(lcfd);
1200 
1201 	return dfd;
1202 }
1203 
1204 int
1205 ftpxfer(int in, Out *out, Range *r)
1206 {
1207 	char buf[1024];
1208 	long vtime;
1209 	int i, n;
1210 
1211 	vtime = 0;
1212 	for(n = 0;;n += i){
1213 		i = read(in, buf, sizeof(buf));
1214 		if(i == 0)
1215 			break;
1216 		if(i < 0)
1217 			return Error;
1218 		if(output(out, buf, i) != i)
1219 			return Error;
1220 		r->start += i;
1221 		if(verbose && (vtime != time(0) || r->start == r->end)) {
1222 			vtime = time(0);
1223 			fprint(2, "%ld %ld\n", r->start, r->end);
1224 		}
1225 	}
1226 	return n;
1227 }
1228 
1229 int
1230 terminateftp(int ctl, int rv)
1231 {
1232 	close(ctl);
1233 	return rv;
1234 }
1235 
1236 /*
1237  * case insensitive strcmp (why aren't these in libc?)
1238  */
1239 int
1240 cistrncmp(char *a, char *b, int n)
1241 {
1242 	while(n-- > 0){
1243 		if(tolower(*a++) != tolower(*b++))
1244 			return -1;
1245 	}
1246 	return 0;
1247 }
1248 
1249 int
1250 cistrcmp(char *a, char *b)
1251 {
1252 	while(*a || *b)
1253 		if(tolower(*a++) != tolower(*b++))
1254 			return -1;
1255 
1256 	return 0;
1257 }
1258 
1259 /*
1260  *  buffered io
1261  */
1262 struct
1263 {
1264 	char *rp;
1265 	char *wp;
1266 	char buf[4*1024];
1267 } b;
1268 
1269 void
1270 initibuf(void)
1271 {
1272 	b.rp = b.wp = b.buf;
1273 }
1274 
1275 /*
1276  *  read a possibly buffered line, strip off trailing while
1277  */
1278 int
1279 readline(int fd, char *buf, int len)
1280 {
1281 	int n;
1282 	char *p;
1283 	int eof = 0;
1284 
1285 	len--;
1286 
1287 	for(p = buf;;){
1288 		if(b.rp >= b.wp){
1289 			n = read(fd, b.wp, sizeof(b.buf)/2);
1290 			if(n < 0)
1291 				return -1;
1292 			if(n == 0){
1293 				eof = 1;
1294 				break;
1295 			}
1296 			b.wp += n;
1297 		}
1298 		n = *b.rp++;
1299 		if(len > 0){
1300 			*p++ = n;
1301 			len--;
1302 		}
1303 		if(n == '\n')
1304 			break;
1305 	}
1306 
1307 	/* drop trailing white */
1308 	for(;;){
1309 		if(p <= buf)
1310 			break;
1311 		n = *(p-1);
1312 		if(n != ' ' && n != '\t' && n != '\r' && n != '\n')
1313 			break;
1314 		p--;
1315 	}
1316 	*p = 0;
1317 
1318 	if(eof && p == buf)
1319 		return -1;
1320 
1321 	return p-buf;
1322 }
1323 
1324 void
1325 unreadline(char *line)
1326 {
1327 	int i, n;
1328 
1329 	i = strlen(line);
1330 	n = b.wp-b.rp;
1331 	memmove(&b.buf[i+1], b.rp, n);
1332 	memmove(b.buf, line, i);
1333 	b.buf[i] = '\n';
1334 	b.rp = b.buf;
1335 	b.wp = b.rp + i + 1 + n;
1336 }
1337 
1338 int
1339 readibuf(int fd, char *buf, int len)
1340 {
1341 	int n;
1342 
1343 	n = b.wp-b.rp;
1344 	if(n > 0){
1345 		if(n > len)
1346 			n = len;
1347 		memmove(buf, b.rp, n);
1348 		b.rp += n;
1349 		return n;
1350 	}
1351 	return read(fd, buf, len);
1352 }
1353 
1354 int
1355 dfprint(int fd, char *fmt, ...)
1356 {
1357 	char buf[4*1024];
1358 	va_list arg;
1359 
1360 	va_start(arg, fmt);
1361 	vseprint(buf, buf+sizeof(buf), fmt, arg);
1362 	va_end(arg);
1363 	if(debug)
1364 		fprint(2, "%d -> %s", fd, buf);
1365 	return fprint(fd, "%s", buf);
1366 }
1367 
1368 int
1369 getaddrport(char *dir, uchar *ipaddr, uchar *port)
1370 {
1371 	char buf[256];
1372 	int fd, i;
1373 	char *p;
1374 
1375 	snprint(buf, sizeof(buf), "%s/local", dir);
1376 	fd = open(buf, OREAD);
1377 	if(fd < 0)
1378 		return -1;
1379 	i = read(fd, buf, sizeof(buf)-1);
1380 	close(fd);
1381 	if(i <= 0)
1382 		return -1;
1383 	buf[i] = 0;
1384 	p = strchr(buf, '!');
1385 	if(p != nil)
1386 		*p++ = 0;
1387 	v4parseip(ipaddr, buf);
1388 	i = atoi(p);
1389 	port[0] = i>>8;
1390 	port[1] = i;
1391 	return 0;
1392 }
1393 
1394 void
1395 md5free(DigestState *state)
1396 {
1397 	uchar x[MD5dlen];
1398 	md5(nil, 0, x, state);
1399 }
1400 
1401 DigestState*
1402 md5dup(DigestState *state)
1403 {
1404 	char *p;
1405 
1406 	p = md5pickle(state);
1407 	if(p == nil)
1408 		sysfatal("md5pickle: %r");
1409 	state = md5unpickle(p);
1410 	if(state == nil)
1411 		sysfatal("md5unpickle: %r");
1412 	free(p);
1413 	return state;
1414 }
1415 
1416 void
1417 setoffset(Out *out, int offset)
1418 {
1419 	md5free(out->curr);
1420 	if(offset == 0)
1421 		out->curr = md5(nil, 0, nil, nil);
1422 	else
1423 		out->curr = nil;
1424 	out->offset = offset;
1425 	out->written = offset;
1426 	if(ofile != nil)
1427 		if(seek(out->fd, offset, 0) != offset)
1428 			sysfatal("seek: %r");
1429 }
1430 
1431 /*
1432  * write some output, discarding it (but keeping track)
1433  * if we've already written it. if we've gone backwards,
1434  * verify that everything previously written matches
1435  * that which would have been written from the current
1436  * output.
1437  */
1438 int
1439 output(Out *out, char *buf, int nb)
1440 {
1441 	int n, d;
1442 	uchar m0[MD5dlen], m1[MD5dlen];
1443 
1444 	n = nb;
1445 	d = out->written - out->offset;
1446 	assert(d >= 0);
1447 	if(d > 0){
1448 		if(n < d){
1449 			if(out->curr != nil)
1450 				md5((uchar*)buf, n, nil, out->curr);
1451 			out->offset += n;
1452 			return n;
1453 		}
1454 		if(out->curr != nil){
1455 			md5((uchar*)buf, d, m0, out->curr);
1456 			out->curr = nil;
1457 			md5(nil, 0, m1, md5dup(out->hiwat));
1458 			if(memcmp(m0, m1, MD5dlen) != 0){
1459 				fprint(2, "integrity check failure at offset %d\n", out->written);
1460 				return -1;
1461 			}
1462 		}
1463 		buf += d;
1464 		n -= d;
1465 		out->offset += d;
1466 	}
1467 	if(n > 0){
1468 		out->hiwat = md5((uchar*)buf, n, nil, out->hiwat);
1469 		n = write(out->fd, buf, n);
1470 		if(n > 0){
1471 			out->offset += n;
1472 			out->written += n;
1473 		}
1474 	}
1475 	return n + d;
1476 }
1477 
1478