xref: /plan9/sys/src/cmd/hget.c (revision ec59a3ddbfceee0efe34584c2c9981a5e5ff1ec4)
1 #include <u.h>
2 #include <libc.h>
3 #include <ctype.h>
4 #include <bio.h>
5 #include <ip.h>
6 #include <libsec.h>
7 #include <auth.h>
8 
9 typedef struct URL URL;
10 struct URL
11 {
12 	int	method;
13 	char	*host;
14 	char	*port;
15 	char	*page;
16 	char	*etag;
17 	char	*redirect;
18 	char	*postbody;
19 	char	*cred;
20 	long	mtime;
21 };
22 
23 typedef struct Range Range;
24 struct Range
25 {
26 	long	start;	/* only 2 gig supported, tdb */
27 	long	end;
28 };
29 
30 typedef struct Out Out;
31 struct Out
32 {
33 	int fd;
34 	int offset;				/* notional current offset in output */
35 	int written;			/* number of bytes successfully transferred to output */
36 	DigestState *curr;		/* digest state up to offset (if known) */
37 	DigestState *hiwat;		/* digest state of all bytes written */
38 };
39 
40 enum
41 {
42 	Other,
43 	Http,
44 	Https,
45 	Ftp,
46 };
47 
48 enum
49 {
50 	Eof = 0,
51 	Error = -1,
52 	Server = -2,
53 	Changed = -3,
54 };
55 
56 int debug;
57 char *ofile;
58 
59 
60 int	doftp(URL*, URL*, Range*, Out*, long);
61 int	dohttp(URL*, URL*,  Range*, Out*, long);
62 int	crackurl(URL*, char*);
63 Range*	crackrange(char*);
64 int	getheader(int, char*, int);
65 int	httpheaders(int, int, URL*, Range*);
66 int	httprcode(int);
67 int	cistrncmp(char*, char*, int);
68 int	cistrcmp(char*, char*);
69 void	initibuf(void);
70 int	readline(int, char*, int);
71 int	readibuf(int, char*, int);
72 int	dfprint(int, char*, ...);
73 void	unreadline(char*);
74 int	output(Out*, char*, int);
75 void	setoffset(Out*, int);
76 
77 int	verbose;
78 char	*net;
79 char	tcpdir[NETPATHLEN];
80 int	headerprint;
81 
82 struct {
83 	char	*name;
84 	int	(*f)(URL*, URL*, Range*, Out*, long);
85 } method[] = {
86 	[Http]	{ "http",	dohttp },
87 	[Https]	{ "https",	dohttp },
88 	[Ftp]	{ "ftp",	doftp },
89 	[Other]	{ "_______",	nil },
90 };
91 
92 void
93 usage(void)
94 {
95 	fprint(2, "usage: %s [-dhv] [-o outfile] [-p body] [-x netmtpt] url\n", argv0);
96 	exits("usage");
97 }
98 
99 void
100 main(int argc, char **argv)
101 {
102 	URL u;
103 	Range r;
104 	int errs, n;
105 	ulong mtime;
106 	Dir *d;
107 	char postbody[4096], *p, *e, *t, *hpx;
108 	URL px; // Proxy
109 	Out out;
110 
111 	ofile = nil;
112 	p = postbody;
113 	e = p + sizeof(postbody);
114 	r.start = 0;
115 	r.end = -1;
116 	mtime = 0;
117 	memset(&u, 0, sizeof(u));
118 	memset(&px, 0, sizeof(px));
119 	hpx = getenv("httpproxy");
120 
121 	ARGBEGIN {
122 	case 'o':
123 		ofile = ARGF();
124 		break;
125 	case 'd':
126 		debug = 1;
127 		break;
128 	case 'h':
129 		headerprint = 1;
130 		break;
131 	case 'v':
132 		verbose = 1;
133 		break;
134 	case 'x':
135 		net = ARGF();
136 		if(net == nil)
137 			usage();
138 		break;
139 	case 'p':
140 		t = ARGF();
141 		if(t == nil)
142 			usage();
143 		if(p != postbody)
144 			p = seprint(p, e, "&%s", t);
145 		else
146 			p = seprint(p, e, "%s", t);
147 		u.postbody = postbody;
148 
149 		break;
150 	default:
151 		usage();
152 	} ARGEND;
153 
154 	if(net != nil){
155 		if(strlen(net) > sizeof(tcpdir)-5)
156 			sysfatal("network mount point too long");
157 		snprint(tcpdir, sizeof(tcpdir), "%s/tcp", net);
158 	} else
159 		snprint(tcpdir, sizeof(tcpdir), "tcp");
160 
161 	if(argc != 1)
162 		usage();
163 
164 
165 	out.fd = 1;
166 	out.written = 0;
167 	out.offset = 0;
168 	out.curr = nil;
169 	out.hiwat = nil;
170 	if(ofile != nil){
171 		d = dirstat(ofile);
172 		if(d == nil){
173 			out.fd = create(ofile, OWRITE, 0664);
174 			if(out.fd < 0)
175 				sysfatal("creating %s: %r", ofile);
176 		} else {
177 			out.fd = open(ofile, OWRITE);
178 			if(out.fd < 0)
179 				sysfatal("can't open %s: %r", ofile);
180 			r.start = d->length;
181 			mtime = d->mtime;
182 			free(d);
183 		}
184 	}
185 
186 	errs = 0;
187 
188 	if(crackurl(&u, argv[0]) < 0)
189 		sysfatal("%r");
190 	if(hpx && crackurl(&px, hpx) < 0)
191 		sysfatal("%r");
192 
193 	for(;;){
194 		setoffset(&out, 0);
195 		/* transfer data */
196 		werrstr("");
197 		n = (*method[u.method].f)(&u, &px, &r, &out, mtime);
198 
199 		switch(n){
200 		case Eof:
201 			exits(0);
202 			break;
203 		case Error:
204 			if(errs++ < 10)
205 				continue;
206 			sysfatal("too many errors with no progress %r");
207 			break;
208 		case Server:
209 			sysfatal("server returned: %r");
210 			break;
211 		}
212 
213 		/* forward progress */
214 		errs = 0;
215 		r.start += n;
216 		if(r.start >= r.end)
217 			break;
218 	}
219 
220 	exits(0);
221 }
222 
223 int
224 crackurl(URL *u, char *s)
225 {
226 	char *p;
227 	int i;
228 
229 	if(u->page != nil){
230 		free(u->page);
231 		u->page = nil;
232 	}
233 
234 	/* get type */
235 	for(p = s; *p; p++){
236 		if(*p == '/'){
237 			p = s;
238 			if(u->method == Other){
239 				werrstr("missing method");
240 				return -1;
241 			}
242 			if(u->host == nil){
243 				werrstr("missing host");
244 				return -1;
245 			}
246 			u->page = strdup(p);
247 			return 0;
248 		}
249 		if(*p == ':' && *(p+1)=='/' && *(p+2)=='/'){
250 			*p = 0;
251 			p += 3;
252 			for(i = 0; i < nelem(method); i++){
253 				if(cistrcmp(s, method[i].name) == 0){
254 					u->method = i;
255 					break;
256 				}
257 			}
258 			break;
259 		}
260 	}
261 
262 	if(u->method == Other){
263 		werrstr("unsupported URL type %s", s);
264 		return -1;
265 	}
266 
267 	/* get system */
268 	free(u->host);
269 	s = p;
270 	p = strchr(s, '/');
271 	if(p == nil){
272 		u->host = strdup(s);
273 		u->page = strdup("/");
274 	} else {
275 		u->page = strdup(p);
276 		*p = 0;
277 		u->host = strdup(s);
278 		*p = '/';
279 	}
280 
281 	if(p = strchr(u->host, ':')) {
282 		*p++ = 0;
283 		u->port = p;
284 	} else
285 		u->port = method[u->method].name;
286 
287 	if(*(u->host) == 0){
288 		werrstr("bad url, null host");
289 		return -1;
290 	}
291 
292 	return 0;
293 }
294 
295 char *day[] = {
296 	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
297 };
298 
299 char *month[] = {
300 	"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
301 };
302 
303 struct
304 {
305 	int	fd;
306 	long	mtime;
307 } note;
308 
309 void
310 catch(void*, char*)
311 {
312 	Dir d;
313 
314 	nulldir(&d);
315 	d.mtime = note.mtime;
316 	if(dirfwstat(note.fd, &d) < 0)
317 		sysfatal("catch: can't dirfwstat: %r");
318 	noted(NDFLT);
319 }
320 
321 int
322 dohttp(URL *u, URL *px, Range *r, Out *out, long mtime)
323 {
324 	int fd, cfd;
325 	int redirect, auth, loop;
326 	int n, rv, code;
327 	long tot, vtime;
328 	Tm *tm;
329 	char buf[1024];
330 	char err[ERRMAX];
331 
332 
333 	/*  always move back to a previous 512 byte bound because some
334 	 *  servers can't seem to deal with requests that start at the
335 	 *  end of the file
336 	 */
337 	if(r->start)
338 		r->start = ((r->start-1)/512)*512;
339 
340 	/* loop for redirects, requires reading both response code and headers */
341 	fd = -1;
342 	for(loop = 0; loop < 32; loop++){
343 		if(px->host == nil){
344 			fd = dial(netmkaddr(u->host, tcpdir, u->port), 0, 0, 0);
345 		} else {
346 			fd = dial(netmkaddr(px->host, tcpdir, px->port), 0, 0, 0);
347 		}
348 		if(fd < 0)
349 			return Error;
350 
351 		if(u->method == Https){
352 			int tfd;
353 			TLSconn conn;
354 
355 			memset(&conn, 0, sizeof conn);
356 			tfd = tlsClient(fd, &conn);
357 			if(tfd < 0){
358 				fprint(2, "tlsClient: %r\n");
359 				close(fd);
360 				return Error;
361 			}
362 			/* BUG: check cert here? */
363 			if(conn.cert)
364 				free(conn.cert);
365 			close(fd);
366 			fd = tfd;
367 		}
368 
369 		/* write request, use range if not start of file */
370 		if(u->postbody == nil){
371 			if(px->host == nil){
372 				dfprint(fd,	"GET %s HTTP/1.0\r\n"
373 						"Host: %s\r\n"
374 						"User-agent: Plan9/hget\r\n"
375 						"Cache-Control: no-cache\r\n"
376 						"Pragma: no-cache\r\n",
377 						u->page, u->host);
378 			} else {
379 				dfprint(fd,	"GET http://%s%s HTTP/1.0\r\n"
380 						"Host: %s\r\n"
381 						"User-agent: Plan9/hget\r\n"
382 						"Cache-Control: no-cache\r\n"
383 						"Pragma: no-cache\r\n",
384 						u->host, u->page, u->host);
385 			}
386 			if(u->cred)
387 				dfprint(fd,	"Authorization: Basic %s\r\n",
388 						u->cred);
389 		} else {
390 			dfprint(fd,	"POST %s HTTP/1.0\r\n"
391 					"Host: %s\r\n"
392 					"Content-type: application/x-www-form-urlencoded\r\n"
393 					"Content-length: %d\r\n"
394 					"User-agent: Plan9/hget\r\n",
395 					u->page, u->host, strlen(u->postbody));
396 			if(u->cred)
397 				dfprint(fd, "Authorization: Basic %s\r\n", u->cred);
398 		}
399 		if(r->start != 0){
400 			dfprint(fd, "Range: bytes=%d-\n", r->start);
401 			if(u->etag != nil){
402 				dfprint(fd, "If-range: %s\n", u->etag);
403 			} else {
404 				tm = gmtime(mtime);
405 				dfprint(fd, "If-range: %s, %d %s %d %2d:%2.2d:%2.2d GMT\n",
406 					day[tm->wday], tm->mday, month[tm->mon],
407 					tm->year+1900, tm->hour, tm->min, tm->sec);
408 			}
409 		}
410 		if((cfd = open("/mnt/webcookies/http", ORDWR)) >= 0){
411 			if(fprint(cfd, "http://%s%s", u->host, u->page) > 0){
412 				while((n = read(cfd, buf, sizeof buf)) > 0){
413 					if(debug)
414 						write(2, buf, n);
415 					write(fd, buf, n);
416 				}
417 			}else{
418 				close(cfd);
419 				cfd = -1;
420 			}
421 		}
422 
423 		dfprint(fd, "\r\n", u->host);
424 		if(u->postbody)
425 			dfprint(fd,	"%s", u->postbody);
426 
427 		auth = 0;
428 		redirect = 0;
429 		initibuf();
430 		code = httprcode(fd);
431 		switch(code){
432 		case Error:	/* connection timed out */
433 		case Eof:
434 			close(fd);
435 			close(cfd);
436 			return code;
437 
438 		case 200:	/* OK */
439 		case 201:	/* Created */
440 		case 202:	/* Accepted */
441 			if(ofile == nil && r->start != 0)
442 				sysfatal("page changed underfoot");
443 			break;
444 
445 		case 204:	/* No Content */
446 			sysfatal("No Content");
447 
448 		case 206:	/* Partial Content */
449 			setoffset(out, r->start);
450 			break;
451 
452 		case 301:	/* Moved Permanently */
453 		case 302:	/* Moved Temporarily */
454 			redirect = 1;
455 			u->postbody = nil;
456 			break;
457 
458 		case 304:	/* Not Modified */
459 			break;
460 
461 		case 400:	/* Bad Request */
462 			sysfatal("Bad Request");
463 
464 		case 401:	/* Unauthorized */
465 			if (auth)
466 				sysfatal("Authentication failed");
467 			auth = 1;
468 			break;
469 
470 		case 402:	/* ??? */
471 			sysfatal("Unauthorized");
472 
473 		case 403:	/* Forbidden */
474 			sysfatal("Forbidden by server");
475 
476 		case 404:	/* Not Found */
477 			sysfatal("Not found on server");
478 
479 		case 407:	/* Proxy Authentication */
480 			sysfatal("Proxy authentication required");
481 
482 		case 500:	/* Internal server error */
483 			sysfatal("Server choked");
484 
485 		case 501:	/* Not implemented */
486 			sysfatal("Server can't do it!");
487 
488 		case 502:	/* Bad gateway */
489 			sysfatal("Bad gateway");
490 
491 		case 503:	/* Service unavailable */
492 			sysfatal("Service unavailable");
493 
494 		default:
495 			sysfatal("Unknown response code %d", code);
496 		}
497 
498 		if(u->redirect != nil){
499 			free(u->redirect);
500 			u->redirect = nil;
501 		}
502 
503 		rv = httpheaders(fd, cfd, u, r);
504 		close(cfd);
505 		if(rv != 0){
506 			close(fd);
507 			return rv;
508 		}
509 
510 		if(!redirect && !auth)
511 			break;
512 
513 		if (redirect){
514 			if(u->redirect == nil)
515 				sysfatal("redirect: no URL");
516 			if(crackurl(u, u->redirect) < 0)
517 				sysfatal("redirect: %r");
518 		}
519 	}
520 
521 	/* transfer whatever you get */
522 	if(ofile != nil && u->mtime != 0){
523 		note.fd = out->fd;
524 		note.mtime = u->mtime;
525 		notify(catch);
526 	}
527 
528 	tot = 0;
529 	vtime = 0;
530 	for(;;){
531 		n = readibuf(fd, buf, sizeof(buf));
532 		if(n <= 0)
533 			break;
534 		if(output(out, buf, n) != n)
535 			break;
536 		tot += n;
537 		if(verbose && vtime != time(0)) {
538 			vtime = time(0);
539 			fprint(2, "%ld %ld\n", r->start+tot, r->end);
540 		}
541 	}
542 	notify(nil);
543 	close(fd);
544 
545 	if(ofile != nil && u->mtime != 0){
546 		Dir d;
547 
548 		rerrstr(err, sizeof err);
549 		nulldir(&d);
550 		d.mtime = u->mtime;
551 		if(dirfwstat(out->fd, &d) < 0)
552 			fprint(2, "couldn't set mtime: %r\n");
553 		errstr(err, sizeof err);
554 	}
555 
556 	return tot;
557 }
558 
559 /* get the http response code */
560 int
561 httprcode(int fd)
562 {
563 	int n;
564 	char *p;
565 	char buf[256];
566 
567 	n = readline(fd, buf, sizeof(buf)-1);
568 	if(n <= 0)
569 		return n;
570 	if(debug)
571 		fprint(2, "%d <- %s\n", fd, buf);
572 	p = strchr(buf, ' ');
573 	if(strncmp(buf, "HTTP/", 5) != 0 || p == nil){
574 		werrstr("bad response from server");
575 		return -1;
576 	}
577 	buf[n] = 0;
578 	return atoi(p+1);
579 }
580 
581 /* read in and crack the http headers, update u and r */
582 void	hhetag(char*, URL*, Range*);
583 void	hhmtime(char*, URL*, Range*);
584 void	hhclen(char*, URL*, Range*);
585 void	hhcrange(char*, URL*, Range*);
586 void	hhuri(char*, URL*, Range*);
587 void	hhlocation(char*, URL*, Range*);
588 void	hhauth(char*, URL*, Range*);
589 
590 struct {
591 	char *name;
592 	void (*f)(char*, URL*, Range*);
593 } headers[] = {
594 	{ "etag:", hhetag },
595 	{ "last-modified:", hhmtime },
596 	{ "content-length:", hhclen },
597 	{ "content-range:", hhcrange },
598 	{ "uri:", hhuri },
599 	{ "location:", hhlocation },
600 	{ "WWW-Authenticate:", hhauth },
601 };
602 int
603 httpheaders(int fd, int cfd, URL *u, Range *r)
604 {
605 	char buf[2048];
606 	char *p;
607 	int i, n;
608 
609 	for(;;){
610 		n = getheader(fd, buf, sizeof(buf));
611 		if(n <= 0)
612 			break;
613 		if(cfd >= 0)
614 			fprint(cfd, "%s\n", buf);
615 		for(i = 0; i < nelem(headers); i++){
616 			n = strlen(headers[i].name);
617 			if(cistrncmp(buf, headers[i].name, n) == 0){
618 				/* skip field name and leading white */
619 				p = buf + n;
620 				while(*p == ' ' || *p == '\t')
621 					p++;
622 
623 				(*headers[i].f)(p, u, r);
624 				break;
625 			}
626 		}
627 	}
628 	return n;
629 }
630 
631 /*
632  *  read a single mime header, collect continuations.
633  *
634  *  this routine assumes that there is a blank line twixt
635  *  the header and the message body, otherwise bytes will
636  *  be lost.
637  */
638 int
639 getheader(int fd, char *buf, int n)
640 {
641 	char *p, *e;
642 	int i;
643 
644 	n--;
645 	p = buf;
646 	for(e = p + n; ; p += i){
647 		i = readline(fd, p, e-p);
648 		if(i < 0)
649 			return i;
650 
651 		if(p == buf){
652 			/* first line */
653 			if(strchr(buf, ':') == nil)
654 				break;		/* end of headers */
655 		} else {
656 			/* continuation line */
657 			if(*p != ' ' && *p != '\t'){
658 				unreadline(p);
659 				*p = 0;
660 				break;		/* end of this header */
661 			}
662 		}
663 	}
664 	if(headerprint)
665 		print("%s\n", buf);
666 
667 	if(debug)
668 		fprint(2, "%d <- %s\n", fd, buf);
669 	return p-buf;
670 }
671 
672 void
673 hhetag(char *p, URL *u, Range*)
674 {
675 	if(u->etag != nil){
676 		if(strcmp(u->etag, p) != 0)
677 			sysfatal("file changed underfoot");
678 	} else
679 		u->etag = strdup(p);
680 }
681 
682 char*	monthchars = "janfebmaraprmayjunjulaugsepoctnovdec";
683 
684 void
685 hhmtime(char *p, URL *u, Range*)
686 {
687 	char *month, *day, *yr, *hms;
688 	char *fields[6];
689 	Tm tm, now;
690 	int i;
691 
692 	i = getfields(p, fields, 6, 1, " \t");
693 	if(i < 5)
694 		return;
695 
696 	day = fields[1];
697 	month = fields[2];
698 	yr = fields[3];
699 	hms = fields[4];
700 
701 	/* default time */
702 	now = *gmtime(time(0));
703 	tm = now;
704 	tm.yday = 0;
705 
706 	/* convert ascii month to a number twixt 1 and 12 */
707 	if(*month >= '0' && *month <= '9'){
708 		tm.mon = atoi(month) - 1;
709 		if(tm.mon < 0 || tm.mon > 11)
710 			tm.mon = 5;
711 	} else {
712 		for(p = month; *p; p++)
713 			*p = tolower(*p);
714 		for(i = 0; i < 12; i++)
715 			if(strncmp(&monthchars[i*3], month, 3) == 0){
716 				tm.mon = i;
717 				break;
718 			}
719 	}
720 
721 	tm.mday = atoi(day);
722 
723 	if(hms) {
724 		tm.hour = strtoul(hms, &p, 10);
725 		if(*p == ':') {
726 			p++;
727 			tm.min = strtoul(p, &p, 10);
728 			if(*p == ':') {
729 				p++;
730 				tm.sec = strtoul(p, &p, 10);
731 			}
732 		}
733 		if(tolower(*p) == 'p')
734 			tm.hour += 12;
735 	}
736 
737 	if(yr) {
738 		tm.year = atoi(yr);
739 		if(tm.year >= 1900)
740 			tm.year -= 1900;
741 	} else {
742 		if(tm.mon > now.mon || (tm.mon == now.mon && tm.mday > now.mday+1))
743 			tm.year--;
744 	}
745 
746 	strcpy(tm.zone, "GMT");
747 	/* convert to epoch seconds */
748 	u->mtime = tm2sec(&tm);
749 }
750 
751 void
752 hhclen(char *p, URL*, Range *r)
753 {
754 	r->end = atoi(p);
755 }
756 
757 void
758 hhcrange(char *p, URL*, Range *r)
759 {
760 	char *x;
761 	vlong l;
762 
763 	l = 0;
764 	x = strchr(p, '/');
765 	if(x)
766 		l = atoll(x+1);
767 	if(l == 0) {
768 		x = strchr(p, '-');
769 		if(x)
770 			l = atoll(x+1);
771 	}
772 	if(l)
773 		r->end = l;
774 }
775 
776 void
777 hhuri(char *p, URL *u, Range*)
778 {
779 	if(*p != '<')
780 		return;
781 	u->redirect = strdup(p+1);
782 	p = strchr(u->redirect, '>');
783 	if(p != nil)
784 		*p = 0;
785 }
786 
787 void
788 hhlocation(char *p, URL *u, Range*)
789 {
790 	u->redirect = strdup(p);
791 }
792 
793 void
794 hhauth(char *p, URL *u, Range*)
795 {
796 	char *f[4];
797 	UserPasswd *up;
798 	char *s, cred[64];
799 
800 	if (cistrncmp(p, "basic ", 6) != 0)
801 		sysfatal("only Basic authentication supported");
802 
803 	if (gettokens(p, f, nelem(f), "\"") < 2)
804 		sysfatal("garbled auth data");
805 
806 	if ((up = auth_getuserpasswd(auth_getkey, "proto=pass service=http server=%q realm=%q",
807 	    	u->host, f[1])) == nil)
808 			sysfatal("cannot authenticate");
809 
810 	s = smprint("%s:%s", up->user, up->passwd);
811 	if(enc64(cred, sizeof(cred), (uchar *)s, strlen(s)) == -1)
812 		sysfatal("enc64");
813   		free(s);
814 
815 	assert(u->cred = strdup(cred));
816 }
817 
818 enum
819 {
820 	/* ftp return codes */
821 	Extra=		1,
822 	Success=	2,
823 	Incomplete=	3,
824 	TempFail=	4,
825 	PermFail=	5,
826 
827 	Nnetdir=	64,	/* max length of network directory paths */
828 	Ndialstr=	64,		/* max length of dial strings */
829 };
830 
831 int ftpcmd(int, char*, ...);
832 int ftprcode(int, char*, int);
833 int hello(int);
834 int logon(int);
835 int xfertype(int, char*);
836 int passive(int, URL*);
837 int active(int, URL*);
838 int ftpxfer(int, Out*, Range*);
839 int terminateftp(int, int);
840 int getaddrport(char*, uchar*, uchar*);
841 int ftprestart(int, Out*, URL*, Range*, long);
842 
843 int
844 doftp(URL *u, URL *px, Range *r, Out *out, long mtime)
845 {
846 	int pid, ctl, data, rv;
847 	Waitmsg *w;
848 	char msg[64];
849 	char conndir[NETPATHLEN];
850 	char *p;
851 
852 	/* untested, proxy doesn't work with ftp (I think) */
853 	if(px->host == nil){
854 		ctl = dial(netmkaddr(u->host, tcpdir, u->port), 0, conndir, 0);
855 	} else {
856 		ctl = dial(netmkaddr(px->host, tcpdir, px->port), 0, conndir, 0);
857 	}
858 
859 	if(ctl < 0)
860 		return Error;
861 	if(net == nil){
862 		p = strrchr(conndir, '/');
863 		*p = 0;
864 		snprint(tcpdir, sizeof(tcpdir), conndir);
865 	}
866 
867 	initibuf();
868 
869 	rv = hello(ctl);
870 	if(rv < 0)
871 		return terminateftp(ctl, rv);
872 
873 	rv = logon(ctl);
874 	if(rv < 0)
875 		return terminateftp(ctl, rv);
876 
877 	rv = xfertype(ctl, "I");
878 	if(rv < 0)
879 		return terminateftp(ctl, rv);
880 
881 	/* if file is up to date and the right size, stop */
882 	if(ftprestart(ctl, out, u, r, mtime) > 0){
883 		close(ctl);
884 		return Eof;
885 	}
886 
887 	/* first try passive mode, then active */
888 	data = passive(ctl, u);
889 	if(data < 0){
890 		data = active(ctl, u);
891 		if(data < 0)
892 			return Error;
893 	}
894 
895 	/* fork */
896 	switch(pid = rfork(RFPROC|RFFDG|RFMEM)){
897 	case -1:
898 		close(data);
899 		return terminateftp(ctl, Error);
900 	case 0:
901 		ftpxfer(data, out, r);
902 		close(data);
903 		_exits(0);
904 	default:
905 		close(data);
906 		break;
907 	}
908 
909 	/* wait for reply message */
910 	rv = ftprcode(ctl, msg, sizeof(msg));
911 	close(ctl);
912 
913 	/* wait for process to terminate */
914 	w = nil;
915 	for(;;){
916 		free(w);
917 		w = wait();
918 		if(w == nil)
919 			return Error;
920 		if(w->pid == pid){
921 			if(w->msg[0] == 0){
922 				free(w);
923 				break;
924 			}
925 			werrstr("xfer: %s", w->msg);
926 			free(w);
927 			return Error;
928 		}
929 	}
930 
931 	switch(rv){
932 	case Success:
933 		return Eof;
934 	case TempFail:
935 		return Server;
936 	default:
937 		return Error;
938 	}
939 }
940 
941 int
942 ftpcmd(int ctl, char *fmt, ...)
943 {
944 	va_list arg;
945 	char buf[2*1024], *s;
946 
947 	va_start(arg, fmt);
948 	s = vseprint(buf, buf + (sizeof(buf)-4) / sizeof(*buf), fmt, arg);
949 	va_end(arg);
950 	if(debug)
951 		fprint(2, "%d -> %s\n", ctl, buf);
952 	*s++ = '\r';
953 	*s++ = '\n';
954 	if(write(ctl, buf, s - buf) != s - buf)
955 		return -1;
956 	return 0;
957 }
958 
959 int
960 ftprcode(int ctl, char *msg, int len)
961 {
962 	int rv;
963 	int i;
964 	char *p;
965 
966 	len--;	/* room for terminating null */
967 	for(;;){
968 		*msg = 0;
969 		i = readline(ctl, msg, len);
970 		if(i < 0)
971 			break;
972 		if(debug)
973 			fprint(2, "%d <- %s\n", ctl, msg);
974 
975 		/* stop if not a continuation */
976 		rv = strtol(msg, &p, 10);
977 		if(rv >= 100 && rv < 600 && p==msg+3 && *p == ' ')
978 			return rv/100;
979 	}
980 	*msg = 0;
981 
982 	return -1;
983 }
984 
985 int
986 hello(int ctl)
987 {
988 	char msg[1024];
989 
990 	/* wait for hello from other side */
991 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
992 		werrstr("HELLO: %s", msg);
993 		return Server;
994 	}
995 	return 0;
996 }
997 
998 int
999 getdec(char *p, int n)
1000 {
1001 	int x = 0;
1002 	int i;
1003 
1004 	for(i = 0; i < n; i++)
1005 		x = x*10 + (*p++ - '0');
1006 	return x;
1007 }
1008 
1009 int
1010 ftprestart(int ctl, Out *out, URL *u, Range *r, long mtime)
1011 {
1012 	Tm tm;
1013 	char msg[1024];
1014 	long x, rmtime;
1015 
1016 	ftpcmd(ctl, "MDTM %s", u->page);
1017 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1018 		r->start = 0;
1019 		return 0;		/* need to do something */
1020 	}
1021 
1022 	/* decode modification time */
1023 	if(strlen(msg) < 4 + 4 + 2 + 2 + 2 + 2 + 2){
1024 		r->start = 0;
1025 		return 0;		/* need to do something */
1026 	}
1027 	memset(&tm, 0, sizeof(tm));
1028 	tm.year = getdec(msg+4, 4) - 1900;
1029 	tm.mon = getdec(msg+4+4, 2) - 1;
1030 	tm.mday = getdec(msg+4+4+2, 2);
1031 	tm.hour = getdec(msg+4+4+2+2, 2);
1032 	tm.min = getdec(msg+4+4+2+2+2, 2);
1033 	tm.sec = getdec(msg+4+4+2+2+2+2, 2);
1034 	strcpy(tm.zone, "GMT");
1035 	rmtime = tm2sec(&tm);
1036 	if(rmtime > mtime)
1037 		r->start = 0;
1038 
1039 	/* get size */
1040 	ftpcmd(ctl, "SIZE %s", u->page);
1041 	if(ftprcode(ctl, msg, sizeof(msg)) == Success){
1042 		x = atol(msg+4);
1043 		if(r->start == x)
1044 			return 1;	/* we're up to date */
1045 		r->end = x;
1046 	}
1047 
1048 	/* seek to restart point */
1049 	if(r->start > 0){
1050 		ftpcmd(ctl, "REST %lud", r->start);
1051 		if(ftprcode(ctl, msg, sizeof(msg)) == Incomplete){
1052 			setoffset(out, r->start);
1053 		}else
1054 			r->start = 0;
1055 	}
1056 
1057 	return 0;	/* need to do something */
1058 }
1059 
1060 int
1061 logon(int ctl)
1062 {
1063 	char msg[1024];
1064 
1065 	/* login anonymous */
1066 	ftpcmd(ctl, "USER anonymous");
1067 	switch(ftprcode(ctl, msg, sizeof(msg))){
1068 	case Success:
1069 		return 0;
1070 	case Incomplete:
1071 		break;	/* need password */
1072 	default:
1073 		werrstr("USER: %s", msg);
1074 		return Server;
1075 	}
1076 
1077 	/* send user id as password */
1078 	sprint(msg, "%s@closedmind.org", getuser());
1079 	ftpcmd(ctl, "PASS %s", msg);
1080 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1081 		werrstr("PASS: %s", msg);
1082 		return Server;
1083 	}
1084 
1085 	return 0;
1086 }
1087 
1088 int
1089 xfertype(int ctl, char *t)
1090 {
1091 	char msg[1024];
1092 
1093 	ftpcmd(ctl, "TYPE %s", t);
1094 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1095 		werrstr("TYPE %s: %s", t, msg);
1096 		return Server;
1097 	}
1098 
1099 	return 0;
1100 }
1101 
1102 int
1103 passive(int ctl, URL *u)
1104 {
1105 	char msg[1024];
1106 	char ipaddr[32];
1107 	char *f[6];
1108 	char *p;
1109 	int fd;
1110 	int port;
1111 	char aport[12];
1112 
1113 	ftpcmd(ctl, "PASV");
1114 	if(ftprcode(ctl, msg, sizeof(msg)) != Success)
1115 		return Error;
1116 
1117 	/* get address and port number from reply, this is AI */
1118 	p = strchr(msg, '(');
1119 	if(p == nil){
1120 		for(p = msg+3; *p; p++)
1121 			if(isdigit(*p))
1122 				break;
1123 	} else
1124 		p++;
1125 	if(getfields(p, f, 6, 0, ",)") < 6){
1126 		werrstr("ftp protocol botch");
1127 		return Server;
1128 	}
1129 	snprint(ipaddr, sizeof(ipaddr), "%s.%s.%s.%s",
1130 		f[0], f[1], f[2], f[3]);
1131 	port = ((atoi(f[4])&0xff)<<8) + (atoi(f[5])&0xff);
1132 	sprint(aport, "%d", port);
1133 
1134 	/* open data connection */
1135 	fd = dial(netmkaddr(ipaddr, tcpdir, aport), 0, 0, 0);
1136 	if(fd < 0){
1137 		werrstr("passive mode failed: %r");
1138 		return Error;
1139 	}
1140 
1141 	/* tell remote to send a file */
1142 	ftpcmd(ctl, "RETR %s", u->page);
1143 	if(ftprcode(ctl, msg, sizeof(msg)) != Extra){
1144 		werrstr("RETR %s: %s", u->page, msg);
1145 		return Error;
1146 	}
1147 	return fd;
1148 }
1149 
1150 int
1151 active(int ctl, URL *u)
1152 {
1153 	char msg[1024];
1154 	char dir[40], ldir[40];
1155 	uchar ipaddr[4];
1156 	uchar port[2];
1157 	int lcfd, dfd, afd;
1158 
1159 	/* announce a port for the call back */
1160 	snprint(msg, sizeof(msg), "%s!*!0", tcpdir);
1161 	afd = announce(msg, dir);
1162 	if(afd < 0)
1163 		return Error;
1164 
1165 	/* get a local address/port of the annoucement */
1166 	if(getaddrport(dir, ipaddr, port) < 0){
1167 		close(afd);
1168 		return Error;
1169 	}
1170 
1171 	/* tell remote side address and port*/
1172 	ftpcmd(ctl, "PORT %d,%d,%d,%d,%d,%d", ipaddr[0], ipaddr[1], ipaddr[2],
1173 		ipaddr[3], port[0], port[1]);
1174 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1175 		close(afd);
1176 		werrstr("active: %s", msg);
1177 		return Error;
1178 	}
1179 
1180 	/* tell remote to send a file */
1181 	ftpcmd(ctl, "RETR %s", u->page);
1182 	if(ftprcode(ctl, msg, sizeof(msg)) != Extra){
1183 		close(afd);
1184 		werrstr("RETR: %s", msg);
1185 		return Server;
1186 	}
1187 
1188 	/* wait for a connection */
1189 	lcfd = listen(dir, ldir);
1190 	if(lcfd < 0){
1191 		close(afd);
1192 		return Error;
1193 	}
1194 	dfd = accept(lcfd, ldir);
1195 	if(dfd < 0){
1196 		close(afd);
1197 		close(lcfd);
1198 		return Error;
1199 	}
1200 	close(afd);
1201 	close(lcfd);
1202 
1203 	return dfd;
1204 }
1205 
1206 int
1207 ftpxfer(int in, Out *out, Range *r)
1208 {
1209 	char buf[1024];
1210 	long vtime;
1211 	int i, n;
1212 
1213 	vtime = 0;
1214 	for(n = 0;;n += i){
1215 		i = read(in, buf, sizeof(buf));
1216 		if(i == 0)
1217 			break;
1218 		if(i < 0)
1219 			return Error;
1220 		if(output(out, buf, i) != i)
1221 			return Error;
1222 		r->start += i;
1223 		if(verbose && vtime != time(0)) {
1224 			vtime = time(0);
1225 			fprint(2, "%ld %ld\n", r->start, r->end);
1226 		}
1227 	}
1228 	return n;
1229 }
1230 
1231 int
1232 terminateftp(int ctl, int rv)
1233 {
1234 	close(ctl);
1235 	return rv;
1236 }
1237 
1238 /*
1239  * case insensitive strcmp (why aren't these in libc?)
1240  */
1241 int
1242 cistrncmp(char *a, char *b, int n)
1243 {
1244 	while(n-- > 0){
1245 		if(tolower(*a++) != tolower(*b++))
1246 			return -1;
1247 	}
1248 	return 0;
1249 }
1250 
1251 int
1252 cistrcmp(char *a, char *b)
1253 {
1254 	while(*a || *b)
1255 		if(tolower(*a++) != tolower(*b++))
1256 			return -1;
1257 
1258 	return 0;
1259 }
1260 
1261 /*
1262  *  buffered io
1263  */
1264 struct
1265 {
1266 	char *rp;
1267 	char *wp;
1268 	char buf[4*1024];
1269 } b;
1270 
1271 void
1272 initibuf(void)
1273 {
1274 	b.rp = b.wp = b.buf;
1275 }
1276 
1277 /*
1278  *  read a possibly buffered line, strip off trailing while
1279  */
1280 int
1281 readline(int fd, char *buf, int len)
1282 {
1283 	int n;
1284 	char *p;
1285 	int eof = 0;
1286 
1287 	len--;
1288 
1289 	for(p = buf;;){
1290 		if(b.rp >= b.wp){
1291 			n = read(fd, b.wp, sizeof(b.buf)/2);
1292 			if(n < 0)
1293 				return -1;
1294 			if(n == 0){
1295 				eof = 1;
1296 				break;
1297 			}
1298 			b.wp += n;
1299 		}
1300 		n = *b.rp++;
1301 		if(len > 0){
1302 			*p++ = n;
1303 			len--;
1304 		}
1305 		if(n == '\n')
1306 			break;
1307 	}
1308 
1309 	/* drop trailing white */
1310 	for(;;){
1311 		if(p <= buf)
1312 			break;
1313 		n = *(p-1);
1314 		if(n != ' ' && n != '\t' && n != '\r' && n != '\n')
1315 			break;
1316 		p--;
1317 	}
1318 	*p = 0;
1319 
1320 	if(eof && p == buf)
1321 		return -1;
1322 
1323 	return p-buf;
1324 }
1325 
1326 void
1327 unreadline(char *line)
1328 {
1329 	int i, n;
1330 
1331 	i = strlen(line);
1332 	n = b.wp-b.rp;
1333 	memmove(&b.buf[i+1], b.rp, n);
1334 	memmove(b.buf, line, i);
1335 	b.buf[i] = '\n';
1336 	b.rp = b.buf;
1337 	b.wp = b.rp + i + 1 + n;
1338 }
1339 
1340 int
1341 readibuf(int fd, char *buf, int len)
1342 {
1343 	int n;
1344 
1345 	n = b.wp-b.rp;
1346 	if(n > 0){
1347 		if(n > len)
1348 			n = len;
1349 		memmove(buf, b.rp, n);
1350 		b.rp += n;
1351 		return n;
1352 	}
1353 	return read(fd, buf, len);
1354 }
1355 
1356 int
1357 dfprint(int fd, char *fmt, ...)
1358 {
1359 	char buf[4*1024];
1360 	va_list arg;
1361 
1362 	va_start(arg, fmt);
1363 	vseprint(buf, buf+sizeof(buf), fmt, arg);
1364 	va_end(arg);
1365 	if(debug)
1366 		fprint(2, "%d -> %s", fd, buf);
1367 	return fprint(fd, "%s", buf);
1368 }
1369 
1370 int
1371 getaddrport(char *dir, uchar *ipaddr, uchar *port)
1372 {
1373 	char buf[256];
1374 	int fd, i;
1375 	char *p;
1376 
1377 	snprint(buf, sizeof(buf), "%s/local", dir);
1378 	fd = open(buf, OREAD);
1379 	if(fd < 0)
1380 		return -1;
1381 	i = read(fd, buf, sizeof(buf)-1);
1382 	close(fd);
1383 	if(i <= 0)
1384 		return -1;
1385 	buf[i] = 0;
1386 	p = strchr(buf, '!');
1387 	if(p != nil)
1388 		*p++ = 0;
1389 	v4parseip(ipaddr, buf);
1390 	i = atoi(p);
1391 	port[0] = i>>8;
1392 	port[1] = i;
1393 	return 0;
1394 }
1395 
1396 void
1397 md5free(DigestState *state)
1398 {
1399 	uchar x[MD5dlen];
1400 	md5(nil, 0, x, state);
1401 }
1402 
1403 DigestState*
1404 md5dup(DigestState *state)
1405 {
1406 	char *p;
1407 
1408 	p = md5pickle(state);
1409 	if(p == nil)
1410 		sysfatal("md5pickle: %r");
1411 	state = md5unpickle(p);
1412 	if(state == nil)
1413 		sysfatal("md5unpickle: %r");
1414 	free(p);
1415 	return state;
1416 }
1417 
1418 void
1419 setoffset(Out *out, int offset)
1420 {
1421 	md5free(out->curr);
1422 	if(offset == 0)
1423 		out->curr = md5(nil, 0, nil, nil);
1424 	else
1425 		out->curr = nil;
1426 	out->offset = offset;
1427 	out->written = offset;
1428 	if(ofile != nil)
1429 		if(seek(out->fd, offset, 0) != offset)
1430 			sysfatal("seek: %r");
1431 }
1432 
1433 /*
1434  * write some output, discarding it (but keeping track)
1435  * if we've already written it. if we've gone backwards,
1436  * verify that everything previously written matches
1437  * that which would have been written from the current
1438  * output.
1439  */
1440 int
1441 output(Out *out, char *buf, int nb)
1442 {
1443 	int n, d;
1444 	uchar m0[MD5dlen], m1[MD5dlen];
1445 
1446 	n = nb;
1447 	d = out->written - out->offset;
1448 	assert(d >= 0);
1449 	if(d > 0){
1450 		if(n < d){
1451 			if(out->curr != nil)
1452 				md5((uchar*)buf, n, nil, out->curr);
1453 			out->offset += n;
1454 			return n;
1455 		}
1456 		if(out->curr != nil){
1457 			md5((uchar*)buf, d, m0, out->curr);
1458 			out->curr = nil;
1459 			md5(nil, 0, m1, md5dup(out->hiwat));
1460 			if(memcmp(m0, m1, MD5dlen) != 0){
1461 				fprint(2, "integrity check failure at offset %d\n", out->written);
1462 				return -1;
1463 			}
1464 		}
1465 		buf += d;
1466 		n -= d;
1467 		out->offset += d;
1468 	}
1469 	if(n > 0){
1470 		out->hiwat = md5((uchar*)buf, n, nil, out->hiwat);
1471 		n = write(out->fd, buf, n);
1472 		if(n > 0){
1473 			out->offset += n;
1474 			out->written += n;
1475 		}
1476 	}
1477 	return n + d;
1478 }
1479 
1480