xref: /plan9/sys/src/cmd/hget.c (revision c54d4d90799b213ecb7bf465d0346e0b05408cc3)
1 #include <u.h>
2 #include <libc.h>
3 #include <ctype.h>
4 #include <bio.h>
5 #include <ip.h>
6 #include <libsec.h>
7 #include <auth.h>
8 
9 typedef struct URL URL;
10 struct URL
11 {
12 	int	method;
13 	char	*host;
14 	char	*port;
15 	char	*page;
16 	char	*etag;
17 	char	*redirect;
18 	char	*postbody;
19 	char	*cred;
20 	char *rhead;
21 	long	mtime;
22 };
23 
24 typedef struct Range Range;
25 struct Range
26 {
27 	long	start;	/* only 2 gig supported, tdb */
28 	long	end;
29 };
30 
31 typedef struct Out Out;
32 struct Out
33 {
34 	int fd;
35 	int offset;				/* notional current offset in output */
36 	int written;			/* number of bytes successfully transferred to output */
37 	DigestState *curr;		/* digest state up to offset (if known) */
38 	DigestState *hiwat;		/* digest state of all bytes written */
39 };
40 
41 enum
42 {
43 	Other,
44 	Http,
45 	Https,
46 	Ftp,
47 };
48 
49 enum
50 {
51 	Eof = 0,
52 	Error = -1,
53 	Server = -2,
54 	Changed = -3,
55 };
56 
57 int debug;
58 char *ofile;
59 
60 
61 int	doftp(URL*, URL*, Range*, Out*, long);
62 int	dohttp(URL*, URL*,  Range*, Out*, long);
63 int	crackurl(URL*, char*);
64 Range*	crackrange(char*);
65 int	getheader(int, char*, int);
66 int	httpheaders(int, int, URL*, Range*);
67 int	httprcode(int);
68 int	cistrncmp(char*, char*, int);
69 int	cistrcmp(char*, char*);
70 void	initibuf(void);
71 int	readline(int, char*, int);
72 int	readibuf(int, char*, int);
73 int	dfprint(int, char*, ...);
74 void	unreadline(char*);
75 int	output(Out*, char*, int);
76 void	setoffset(Out*, int);
77 
78 int	verbose;
79 char	*net;
80 char	tcpdir[NETPATHLEN];
81 int	headerprint;
82 
83 struct {
84 	char	*name;
85 	int	(*f)(URL*, URL*, Range*, Out*, long);
86 } method[] = {
87 	[Http]	{ "http",	dohttp },
88 	[Https]	{ "https",	dohttp },
89 	[Ftp]	{ "ftp",	doftp },
90 	[Other]	{ "_______",	nil },
91 };
92 
93 void
usage(void)94 usage(void)
95 {
96 	fprint(2, "usage: %s [-dhv] [-o outfile] [-p body] [-x netmtpt] [-r header] url\n", argv0);
97 	exits("usage");
98 }
99 
100 void
main(int argc,char ** argv)101 main(int argc, char **argv)
102 {
103 	URL u;
104 	Range r;
105 	int errs, n;
106 	ulong mtime;
107 	Dir *d;
108 	char postbody[4096], *p, *e, *t, *hpx;
109 	URL px; // Proxy
110 	Out out;
111 
112 	ofile = nil;
113 	p = postbody;
114 	e = p + sizeof(postbody);
115 	r.start = 0;
116 	r.end = -1;
117 	mtime = 0;
118 	memset(&u, 0, sizeof(u));
119 	memset(&px, 0, sizeof(px));
120 	hpx = getenv("httpproxy");
121 
122 	ARGBEGIN {
123 	case 'o':
124 		ofile = EARGF(usage());
125 		break;
126 	case 'd':
127 		debug = 1;
128 		break;
129 	case 'h':
130 		headerprint = 1;
131 		break;
132 	case 'v':
133 		verbose = 1;
134 		break;
135 	case 'x':
136 		net = EARGF(usage());
137 		break;
138 	case 'r':
139 		u.rhead = EARGF(usage());
140 		break;
141 	case 'p':
142 		t = EARGF(usage());
143 		if(p != postbody)
144 			p = seprint(p, e, "&%s", t);
145 		else
146 			p = seprint(p, e, "%s", t);
147 		u.postbody = postbody;
148 
149 		break;
150 	default:
151 		usage();
152 	} ARGEND;
153 
154 	if(net != nil){
155 		if(strlen(net) > sizeof(tcpdir)-5)
156 			sysfatal("network mount point too long");
157 		snprint(tcpdir, sizeof(tcpdir), "%s/tcp", net);
158 	} else
159 		snprint(tcpdir, sizeof(tcpdir), "tcp");
160 
161 	if(argc != 1)
162 		usage();
163 
164 
165 	out.fd = 1;
166 	out.written = 0;
167 	out.offset = 0;
168 	out.curr = nil;
169 	out.hiwat = nil;
170 	if(ofile != nil){
171 		d = dirstat(ofile);
172 		if(d == nil){
173 			out.fd = create(ofile, OWRITE, 0664);
174 			if(out.fd < 0)
175 				sysfatal("creating %s: %r", ofile);
176 		} else {
177 			out.fd = open(ofile, OWRITE);
178 			if(out.fd < 0)
179 				sysfatal("can't open %s: %r", ofile);
180 			r.start = d->length;
181 			mtime = d->mtime;
182 			free(d);
183 		}
184 	}
185 
186 	errs = 0;
187 
188 	if(crackurl(&u, argv[0]) < 0)
189 		sysfatal("%r");
190 	if(hpx && crackurl(&px, hpx) < 0)
191 		sysfatal("%r");
192 
193 	for(;;){
194 		setoffset(&out, 0);
195 		/* transfer data */
196 		werrstr("");
197 		n = (*method[u.method].f)(&u, &px, &r, &out, mtime);
198 
199 		switch(n){
200 		case Eof:
201 			exits(0);
202 			break;
203 		case Error:
204 			if(errs++ < 10)
205 				continue;
206 			sysfatal("too many errors with no progress %r");
207 			break;
208 		case Server:
209 			sysfatal("server returned: %r");
210 			break;
211 		}
212 
213 		/* forward progress */
214 		errs = 0;
215 		r.start += n;
216 		if(r.start >= r.end)
217 			break;
218 	}
219 
220 	exits(0);
221 }
222 
223 int
crackurl(URL * u,char * s)224 crackurl(URL *u, char *s)
225 {
226 	char *p;
227 	int i;
228 
229 	if(u->page != nil){
230 		free(u->page);
231 		u->page = nil;
232 	}
233 
234 	/* get type */
235 	for(p = s; *p; p++){
236 		if(*p == '/'){
237 			p = s;
238 			if(u->method == Other){
239 				werrstr("missing method");
240 				return -1;
241 			}
242 			if(u->host == nil){
243 				werrstr("missing host");
244 				return -1;
245 			}
246 			u->page = strdup(p);
247 			return 0;
248 		}
249 		if(*p == ':' && *(p+1)=='/' && *(p+2)=='/'){
250 			*p = 0;
251 			p += 3;
252 			for(i = 0; i < nelem(method); i++){
253 				if(cistrcmp(s, method[i].name) == 0){
254 					u->method = i;
255 					break;
256 				}
257 			}
258 			break;
259 		}
260 	}
261 
262 	if(u->method == Other){
263 		werrstr("unsupported URL type %s", s);
264 		return -1;
265 	}
266 
267 	/* get system */
268 	free(u->host);
269 	s = p;
270 	p = strchr(s, '/');
271 	if(p == nil){
272 		u->host = strdup(s);
273 		u->page = strdup("/");
274 	} else {
275 		u->page = strdup(p);
276 		*p = 0;
277 		u->host = strdup(s);
278 		*p = '/';
279 	}
280 
281 	if(p = strchr(u->host, ':')) {
282 		*p++ = 0;
283 		u->port = p;
284 	} else
285 		u->port = method[u->method].name;
286 
287 	if(*(u->host) == 0){
288 		werrstr("bad url, null host");
289 		return -1;
290 	}
291 
292 	return 0;
293 }
294 
295 char *day[] = {
296 	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
297 };
298 
299 char *month[] = {
300 	"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
301 };
302 
303 struct
304 {
305 	int	fd;
306 	long	mtime;
307 } note;
308 
309 void
catch(void *,char *)310 catch(void*, char*)
311 {
312 	Dir d;
313 
314 	nulldir(&d);
315 	d.mtime = note.mtime;
316 	if(dirfwstat(note.fd, &d) < 0)
317 		sysfatal("catch: can't dirfwstat: %r");
318 	noted(NDFLT);
319 }
320 
321 int
dohttp(URL * u,URL * px,Range * r,Out * out,long mtime)322 dohttp(URL *u, URL *px, Range *r, Out *out, long mtime)
323 {
324 	int fd, cfd;
325 	int redirect, auth, loop;
326 	int n, rv, code;
327 	long tot, vtime;
328 	Tm *tm;
329 	char buf[1024];
330 	char err[ERRMAX];
331 
332 
333 	/*  always move back to a previous 512 byte bound because some
334 	 *  servers can't seem to deal with requests that start at the
335 	 *  end of the file
336 	 */
337 	if(r->start)
338 		r->start = ((r->start-1)/512)*512;
339 
340 	/* loop for redirects, requires reading both response code and headers */
341 	fd = -1;
342 	for(loop = 0; loop < 32; loop++){
343 		if(px->host == nil){
344 			fd = dial(netmkaddr(u->host, tcpdir, u->port), 0, 0, 0);
345 		} else {
346 			fd = dial(netmkaddr(px->host, tcpdir, px->port), 0, 0, 0);
347 		}
348 		if(fd < 0)
349 			return Error;
350 
351 		if(u->method == Https){
352 			int tfd;
353 			TLSconn conn;
354 
355 			memset(&conn, 0, sizeof conn);
356 			tfd = tlsClient(fd, &conn);
357 			if(tfd < 0){
358 				fprint(2, "tlsClient: %r\n");
359 				close(fd);
360 				return Error;
361 			}
362 			/* BUG: check cert here? */
363 			if(conn.cert)
364 				free(conn.cert);
365 			close(fd);
366 			fd = tfd;
367 		}
368 
369 		/* write request, use range if not start of file */
370 		if(u->postbody == nil){
371 			if(px->host == nil){
372 				dfprint(fd,	"GET %s HTTP/1.0\r\n"
373 						"Host: %s\r\n"
374 						"User-agent: Plan9/hget\r\n"
375 						"Cache-Control: no-cache\r\n"
376 						"Pragma: no-cache\r\n",
377 						u->page, u->host);
378 			} else {
379 				dfprint(fd,	"GET http://%s%s HTTP/1.0\r\n"
380 						"Host: %s\r\n"
381 						"User-agent: Plan9/hget\r\n"
382 						"Cache-Control: no-cache\r\n"
383 						"Pragma: no-cache\r\n",
384 						u->host, u->page, u->host);
385 			}
386 		} else {
387 			dfprint(fd,	"POST %s HTTP/1.0\r\n"
388 					"Host: %s\r\n"
389 					"Content-type: application/x-www-form-urlencoded\r\n"
390 					"Content-length: %d\r\n"
391 					"User-agent: Plan9/hget\r\n",
392 					u->page, u->host, strlen(u->postbody));
393 		}
394 		if(u->cred)
395 			dfprint(fd, "Authorization: Basic %s\r\n", u->cred);
396 		if(u->rhead)
397 			dfprint(fd, "%s\r\n", u->rhead);
398 		if(r->start != 0){
399 			dfprint(fd, "Range: bytes=%d-\n", r->start);
400 			if(u->etag != nil){
401 				dfprint(fd, "If-range: %s\n", u->etag);
402 			} else {
403 				tm = gmtime(mtime);
404 				dfprint(fd, "If-range: %s, %d %s %d %2d:%2.2d:%2.2d GMT\n",
405 					day[tm->wday], tm->mday, month[tm->mon],
406 					tm->year+1900, tm->hour, tm->min, tm->sec);
407 			}
408 		}
409 		if((cfd = open("/mnt/webcookies/http", ORDWR)) >= 0){
410 			if(fprint(cfd, "http://%s%s", u->host, u->page) > 0){
411 				while((n = read(cfd, buf, sizeof buf)) > 0){
412 					if(debug)
413 						write(2, buf, n);
414 					write(fd, buf, n);
415 				}
416 			}else{
417 				close(cfd);
418 				cfd = -1;
419 			}
420 		}
421 
422 		dfprint(fd, "\r\n", u->host);
423 		if(u->postbody)
424 			dfprint(fd,	"%s", u->postbody);
425 
426 		auth = 0;
427 		redirect = 0;
428 		initibuf();
429 		code = httprcode(fd);
430 		switch(code){
431 		case Error:	/* connection timed out */
432 		case Eof:
433 			close(fd);
434 			close(cfd);
435 			return code;
436 
437 		case 200:	/* OK */
438 		case 201:	/* Created */
439 		case 202:	/* Accepted */
440 			if(ofile == nil && r->start != 0)
441 				sysfatal("page changed underfoot");
442 			break;
443 
444 		case 204:	/* No Content */
445 			sysfatal("No Content");
446 
447 		case 206:	/* Partial Content */
448 			setoffset(out, r->start);
449 			break;
450 
451 		case 301:	/* Moved Permanently */
452 		case 302:	/* Moved Temporarily (actually Found) */
453 		case 303:	/* See Other */
454 		case 307:	/* Temporary Redirect (HTTP/1.1) */
455 			redirect = 1;
456 			u->postbody = nil;
457 			break;
458 
459 		case 304:	/* Not Modified */
460 			break;
461 
462 		case 400:	/* Bad Request */
463 			sysfatal("Bad Request");
464 
465 		case 401:	/* Unauthorized */
466 			if (auth)
467 				sysfatal("Authentication failed");
468 			auth = 1;
469 			break;
470 
471 		case 402:	/* ??? */
472 			sysfatal("Unauthorized");
473 
474 		case 403:	/* Forbidden */
475 			sysfatal("Forbidden by server");
476 
477 		case 404:	/* Not Found */
478 			sysfatal("Not found on server");
479 
480 		case 407:	/* Proxy Authentication */
481 			sysfatal("Proxy authentication required");
482 
483 		case 500:	/* Internal server error */
484 			sysfatal("Server choked");
485 
486 		case 501:	/* Not implemented */
487 			sysfatal("Server can't do it!");
488 
489 		case 502:	/* Bad gateway */
490 			sysfatal("Bad gateway");
491 
492 		case 503:	/* Service unavailable */
493 			sysfatal("Service unavailable");
494 
495 		default:
496 			sysfatal("Unknown response code %d", code);
497 		}
498 
499 		if(u->redirect != nil){
500 			free(u->redirect);
501 			u->redirect = nil;
502 		}
503 
504 		rv = httpheaders(fd, cfd, u, r);
505 		close(cfd);
506 		if(rv != 0){
507 			close(fd);
508 			return rv;
509 		}
510 
511 		if(!redirect && !auth)
512 			break;
513 
514 		if (redirect){
515 			if(u->redirect == nil)
516 				sysfatal("redirect: no URL");
517 			if(crackurl(u, u->redirect) < 0)
518 				sysfatal("redirect: %r");
519 		}
520 	}
521 
522 	/* transfer whatever you get */
523 	if(ofile != nil && u->mtime != 0){
524 		note.fd = out->fd;
525 		note.mtime = u->mtime;
526 		notify(catch);
527 	}
528 
529 	tot = 0;
530 	vtime = 0;
531 	for(;;){
532 		n = readibuf(fd, buf, sizeof(buf));
533 		if(n <= 0)
534 			break;
535 		if(output(out, buf, n) != n)
536 			break;
537 		tot += n;
538 		if(verbose && (vtime != time(0) || r->start == r->end)) {
539 			vtime = time(0);
540 			fprint(2, "%ld %ld\n", r->start+tot, r->end);
541 		}
542 	}
543 	notify(nil);
544 	close(fd);
545 
546 	if(ofile != nil && u->mtime != 0){
547 		Dir d;
548 
549 		rerrstr(err, sizeof err);
550 		nulldir(&d);
551 		d.mtime = u->mtime;
552 		if(dirfwstat(out->fd, &d) < 0)
553 			fprint(2, "couldn't set mtime: %r\n");
554 		errstr(err, sizeof err);
555 	}
556 
557 	return tot;
558 }
559 
560 /* get the http response code */
561 int
httprcode(int fd)562 httprcode(int fd)
563 {
564 	int n;
565 	char *p;
566 	char buf[256];
567 
568 	n = readline(fd, buf, sizeof(buf)-1);
569 	if(n <= 0)
570 		return n;
571 	if(debug)
572 		fprint(2, "%d <- %s\n", fd, buf);
573 	p = strchr(buf, ' ');
574 	if(strncmp(buf, "HTTP/", 5) != 0 || p == nil){
575 		werrstr("bad response from server");
576 		return -1;
577 	}
578 	buf[n] = 0;
579 	return atoi(p+1);
580 }
581 
582 /* read in and crack the http headers, update u and r */
583 void	hhetag(char*, URL*, Range*);
584 void	hhmtime(char*, URL*, Range*);
585 void	hhclen(char*, URL*, Range*);
586 void	hhcrange(char*, URL*, Range*);
587 void	hhuri(char*, URL*, Range*);
588 void	hhlocation(char*, URL*, Range*);
589 void	hhauth(char*, URL*, Range*);
590 
591 struct {
592 	char *name;
593 	void (*f)(char*, URL*, Range*);
594 } headers[] = {
595 	{ "etag:", hhetag },
596 	{ "last-modified:", hhmtime },
597 	{ "content-length:", hhclen },
598 	{ "content-range:", hhcrange },
599 	{ "uri:", hhuri },
600 	{ "location:", hhlocation },
601 	{ "WWW-Authenticate:", hhauth },
602 };
603 int
httpheaders(int fd,int cfd,URL * u,Range * r)604 httpheaders(int fd, int cfd, URL *u, Range *r)
605 {
606 	char buf[2048];
607 	char *p;
608 	int i, n;
609 
610 	for(;;){
611 		n = getheader(fd, buf, sizeof(buf));
612 		if(n <= 0)
613 			break;
614 		if(cfd >= 0)
615 			fprint(cfd, "%s\n", buf);
616 		for(i = 0; i < nelem(headers); i++){
617 			n = strlen(headers[i].name);
618 			if(cistrncmp(buf, headers[i].name, n) == 0){
619 				/* skip field name and leading white */
620 				p = buf + n;
621 				while(*p == ' ' || *p == '\t')
622 					p++;
623 
624 				(*headers[i].f)(p, u, r);
625 				break;
626 			}
627 		}
628 	}
629 	return n;
630 }
631 
632 /*
633  *  read a single mime header, collect continuations.
634  *
635  *  this routine assumes that there is a blank line twixt
636  *  the header and the message body, otherwise bytes will
637  *  be lost.
638  */
639 int
getheader(int fd,char * buf,int n)640 getheader(int fd, char *buf, int n)
641 {
642 	char *p, *e;
643 	int i;
644 
645 	n--;
646 	p = buf;
647 	for(e = p + n; ; p += i){
648 		i = readline(fd, p, e-p);
649 		if(i < 0)
650 			return i;
651 
652 		if(p == buf){
653 			/* first line */
654 			if(strchr(buf, ':') == nil)
655 				break;		/* end of headers */
656 		} else {
657 			/* continuation line */
658 			if(*p != ' ' && *p != '\t'){
659 				unreadline(p);
660 				*p = 0;
661 				break;		/* end of this header */
662 			}
663 		}
664 	}
665 	if(headerprint)
666 		print("%s\n", buf);
667 
668 	if(debug)
669 		fprint(2, "%d <- %s\n", fd, buf);
670 	return p-buf;
671 }
672 
673 void
hhetag(char * p,URL * u,Range *)674 hhetag(char *p, URL *u, Range*)
675 {
676 	if(u->etag != nil){
677 		if(strcmp(u->etag, p) != 0)
678 			sysfatal("file changed underfoot");
679 	} else
680 		u->etag = strdup(p);
681 }
682 
683 char*	monthchars = "janfebmaraprmayjunjulaugsepoctnovdec";
684 
685 void
hhmtime(char * p,URL * u,Range *)686 hhmtime(char *p, URL *u, Range*)
687 {
688 	char *month, *day, *yr, *hms;
689 	char *fields[6];
690 	Tm tm, now;
691 	int i;
692 
693 	i = getfields(p, fields, 6, 1, " \t");
694 	if(i < 5)
695 		return;
696 
697 	day = fields[1];
698 	month = fields[2];
699 	yr = fields[3];
700 	hms = fields[4];
701 
702 	/* default time */
703 	now = *gmtime(time(0));
704 	tm = now;
705 	tm.yday = 0;
706 
707 	/* convert ascii month to a number twixt 1 and 12 */
708 	if(*month >= '0' && *month <= '9'){
709 		tm.mon = atoi(month) - 1;
710 		if(tm.mon < 0 || tm.mon > 11)
711 			tm.mon = 5;
712 	} else {
713 		for(p = month; *p; p++)
714 			*p = tolower(*p);
715 		for(i = 0; i < 12; i++)
716 			if(strncmp(&monthchars[i*3], month, 3) == 0){
717 				tm.mon = i;
718 				break;
719 			}
720 	}
721 
722 	tm.mday = atoi(day);
723 
724 	if(hms) {
725 		tm.hour = strtoul(hms, &p, 10);
726 		if(*p == ':') {
727 			p++;
728 			tm.min = strtoul(p, &p, 10);
729 			if(*p == ':') {
730 				p++;
731 				tm.sec = strtoul(p, &p, 10);
732 			}
733 		}
734 		if(tolower(*p) == 'p')
735 			tm.hour += 12;
736 	}
737 
738 	if(yr) {
739 		tm.year = atoi(yr);
740 		if(tm.year >= 1900)
741 			tm.year -= 1900;
742 	} else {
743 		if(tm.mon > now.mon || (tm.mon == now.mon && tm.mday > now.mday+1))
744 			tm.year--;
745 	}
746 
747 	strcpy(tm.zone, "GMT");
748 	/* convert to epoch seconds */
749 	u->mtime = tm2sec(&tm);
750 }
751 
752 void
hhclen(char * p,URL *,Range * r)753 hhclen(char *p, URL*, Range *r)
754 {
755 	r->end = atoi(p);
756 }
757 
758 void
hhcrange(char * p,URL *,Range * r)759 hhcrange(char *p, URL*, Range *r)
760 {
761 	char *x;
762 	vlong l;
763 
764 	l = 0;
765 	x = strchr(p, '/');
766 	if(x)
767 		l = atoll(x+1);
768 	if(l == 0) {
769 		x = strchr(p, '-');
770 		if(x)
771 			l = atoll(x+1);
772 	}
773 	if(l)
774 		r->end = l;
775 }
776 
777 void
hhuri(char * p,URL * u,Range *)778 hhuri(char *p, URL *u, Range*)
779 {
780 	if(*p != '<')
781 		return;
782 	u->redirect = strdup(p+1);
783 	p = strchr(u->redirect, '>');
784 	if(p != nil)
785 		*p = 0;
786 }
787 
788 void
hhlocation(char * p,URL * u,Range *)789 hhlocation(char *p, URL *u, Range*)
790 {
791 	u->redirect = strdup(p);
792 }
793 
794 void
hhauth(char * p,URL * u,Range *)795 hhauth(char *p, URL *u, Range*)
796 {
797 	char *f[4];
798 	UserPasswd *up;
799 	char *s, cred[64];
800 
801 	if (cistrncmp(p, "basic ", 6) != 0)
802 		sysfatal("only Basic authentication supported");
803 
804 	if (gettokens(p, f, nelem(f), "\"") < 2)
805 		sysfatal("garbled auth data");
806 
807 	if ((up = auth_getuserpasswd(auth_getkey, "proto=pass service=http server=%q realm=%q",
808 	    	u->host, f[1])) == nil)
809 			sysfatal("cannot authenticate");
810 
811 	s = smprint("%s:%s", up->user, up->passwd);
812 	if(enc64(cred, sizeof(cred), (uchar *)s, strlen(s)) == -1)
813 		sysfatal("enc64");
814   		free(s);
815 
816 	assert(u->cred = strdup(cred));
817 }
818 
819 enum
820 {
821 	/* ftp return codes */
822 	Extra=		1,
823 	Success=	2,
824 	Incomplete=	3,
825 	TempFail=	4,
826 	PermFail=	5,
827 
828 	Nnetdir=	64,	/* max length of network directory paths */
829 	Ndialstr=	64,		/* max length of dial strings */
830 };
831 
832 int ftpcmd(int, char*, ...);
833 int ftprcode(int, char*, int);
834 int hello(int);
835 int logon(int);
836 int xfertype(int, char*);
837 int passive(int, URL*);
838 int active(int, URL*);
839 int ftpxfer(int, Out*, Range*);
840 int terminateftp(int, int);
841 int getaddrport(char*, uchar*, uchar*);
842 int ftprestart(int, Out*, URL*, Range*, long);
843 
844 int
doftp(URL * u,URL * px,Range * r,Out * out,long mtime)845 doftp(URL *u, URL *px, Range *r, Out *out, long mtime)
846 {
847 	int pid, ctl, data, rv;
848 	Waitmsg *w;
849 	char msg[64];
850 	char conndir[NETPATHLEN];
851 	char *p;
852 
853 	/* untested, proxy doesn't work with ftp (I think) */
854 	if(px->host == nil){
855 		ctl = dial(netmkaddr(u->host, tcpdir, u->port), 0, conndir, 0);
856 	} else {
857 		ctl = dial(netmkaddr(px->host, tcpdir, px->port), 0, conndir, 0);
858 	}
859 
860 	if(ctl < 0)
861 		return Error;
862 	if(net == nil){
863 		p = strrchr(conndir, '/');
864 		*p = 0;
865 		snprint(tcpdir, sizeof(tcpdir), conndir);
866 	}
867 
868 	initibuf();
869 
870 	rv = hello(ctl);
871 	if(rv < 0)
872 		return terminateftp(ctl, rv);
873 
874 	rv = logon(ctl);
875 	if(rv < 0)
876 		return terminateftp(ctl, rv);
877 
878 	rv = xfertype(ctl, "I");
879 	if(rv < 0)
880 		return terminateftp(ctl, rv);
881 
882 	/* if file is up to date and the right size, stop */
883 	if(ftprestart(ctl, out, u, r, mtime) > 0){
884 		close(ctl);
885 		return Eof;
886 	}
887 
888 	/* first try passive mode, then active */
889 	data = passive(ctl, u);
890 	if(data < 0){
891 		data = active(ctl, u);
892 		if(data < 0)
893 			return Error;
894 	}
895 
896 	/* fork */
897 	switch(pid = rfork(RFPROC|RFFDG|RFMEM)){
898 	case -1:
899 		close(data);
900 		return terminateftp(ctl, Error);
901 	case 0:
902 		ftpxfer(data, out, r);
903 		close(data);
904 		_exits(0);
905 	default:
906 		close(data);
907 		break;
908 	}
909 
910 	/* wait for reply message */
911 	rv = ftprcode(ctl, msg, sizeof(msg));
912 	close(ctl);
913 
914 	/* wait for process to terminate */
915 	w = nil;
916 	for(;;){
917 		free(w);
918 		w = wait();
919 		if(w == nil)
920 			return Error;
921 		if(w->pid == pid){
922 			if(w->msg[0] == 0){
923 				free(w);
924 				break;
925 			}
926 			werrstr("xfer: %s", w->msg);
927 			free(w);
928 			return Error;
929 		}
930 	}
931 
932 	switch(rv){
933 	case Success:
934 		return Eof;
935 	case TempFail:
936 		return Server;
937 	default:
938 		return Error;
939 	}
940 }
941 
942 int
ftpcmd(int ctl,char * fmt,...)943 ftpcmd(int ctl, char *fmt, ...)
944 {
945 	va_list arg;
946 	char buf[2*1024], *s;
947 
948 	va_start(arg, fmt);
949 	s = vseprint(buf, buf + (sizeof(buf)-4) / sizeof(*buf), fmt, arg);
950 	va_end(arg);
951 	if(debug)
952 		fprint(2, "%d -> %s\n", ctl, buf);
953 	*s++ = '\r';
954 	*s++ = '\n';
955 	if(write(ctl, buf, s - buf) != s - buf)
956 		return -1;
957 	return 0;
958 }
959 
960 int
ftprcode(int ctl,char * msg,int len)961 ftprcode(int ctl, char *msg, int len)
962 {
963 	int rv;
964 	int i;
965 	char *p;
966 
967 	len--;	/* room for terminating null */
968 	for(;;){
969 		*msg = 0;
970 		i = readline(ctl, msg, len);
971 		if(i < 0)
972 			break;
973 		if(debug)
974 			fprint(2, "%d <- %s\n", ctl, msg);
975 
976 		/* stop if not a continuation */
977 		rv = strtol(msg, &p, 10);
978 		if(rv >= 100 && rv < 600 && p==msg+3 && *p == ' ')
979 			return rv/100;
980 	}
981 	*msg = 0;
982 
983 	return -1;
984 }
985 
986 int
hello(int ctl)987 hello(int ctl)
988 {
989 	char msg[1024];
990 
991 	/* wait for hello from other side */
992 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
993 		werrstr("HELLO: %s", msg);
994 		return Server;
995 	}
996 	return 0;
997 }
998 
999 int
getdec(char * p,int n)1000 getdec(char *p, int n)
1001 {
1002 	int x = 0;
1003 	int i;
1004 
1005 	for(i = 0; i < n; i++)
1006 		x = x*10 + (*p++ - '0');
1007 	return x;
1008 }
1009 
1010 int
ftprestart(int ctl,Out * out,URL * u,Range * r,long mtime)1011 ftprestart(int ctl, Out *out, URL *u, Range *r, long mtime)
1012 {
1013 	Tm tm;
1014 	char msg[1024];
1015 	long x, rmtime;
1016 
1017 	ftpcmd(ctl, "MDTM %s", u->page);
1018 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1019 		r->start = 0;
1020 		return 0;		/* need to do something */
1021 	}
1022 
1023 	/* decode modification time */
1024 	if(strlen(msg) < 4 + 4 + 2 + 2 + 2 + 2 + 2){
1025 		r->start = 0;
1026 		return 0;		/* need to do something */
1027 	}
1028 	memset(&tm, 0, sizeof(tm));
1029 	tm.year = getdec(msg+4, 4) - 1900;
1030 	tm.mon = getdec(msg+4+4, 2) - 1;
1031 	tm.mday = getdec(msg+4+4+2, 2);
1032 	tm.hour = getdec(msg+4+4+2+2, 2);
1033 	tm.min = getdec(msg+4+4+2+2+2, 2);
1034 	tm.sec = getdec(msg+4+4+2+2+2+2, 2);
1035 	strcpy(tm.zone, "GMT");
1036 	rmtime = tm2sec(&tm);
1037 	if(rmtime > mtime)
1038 		r->start = 0;
1039 
1040 	/* get size */
1041 	ftpcmd(ctl, "SIZE %s", u->page);
1042 	if(ftprcode(ctl, msg, sizeof(msg)) == Success){
1043 		x = atol(msg+4);
1044 		if(r->start == x)
1045 			return 1;	/* we're up to date */
1046 		r->end = x;
1047 	}
1048 
1049 	/* seek to restart point */
1050 	if(r->start > 0){
1051 		ftpcmd(ctl, "REST %lud", r->start);
1052 		if(ftprcode(ctl, msg, sizeof(msg)) == Incomplete){
1053 			setoffset(out, r->start);
1054 		}else
1055 			r->start = 0;
1056 	}
1057 
1058 	return 0;	/* need to do something */
1059 }
1060 
1061 int
logon(int ctl)1062 logon(int ctl)
1063 {
1064 	char msg[1024];
1065 
1066 	/* login anonymous */
1067 	ftpcmd(ctl, "USER anonymous");
1068 	switch(ftprcode(ctl, msg, sizeof(msg))){
1069 	case Success:
1070 		return 0;
1071 	case Incomplete:
1072 		break;	/* need password */
1073 	default:
1074 		werrstr("USER: %s", msg);
1075 		return Server;
1076 	}
1077 
1078 	/* send user id as password */
1079 	sprint(msg, "%s@closedmind.org", getuser());
1080 	ftpcmd(ctl, "PASS %s", msg);
1081 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1082 		werrstr("PASS: %s", msg);
1083 		return Server;
1084 	}
1085 
1086 	return 0;
1087 }
1088 
1089 int
xfertype(int ctl,char * t)1090 xfertype(int ctl, char *t)
1091 {
1092 	char msg[1024];
1093 
1094 	ftpcmd(ctl, "TYPE %s", t);
1095 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1096 		werrstr("TYPE %s: %s", t, msg);
1097 		return Server;
1098 	}
1099 
1100 	return 0;
1101 }
1102 
1103 int
passive(int ctl,URL * u)1104 passive(int ctl, URL *u)
1105 {
1106 	char msg[1024];
1107 	char ipaddr[32];
1108 	char *f[6];
1109 	char *p;
1110 	int fd;
1111 	int port;
1112 	char aport[12];
1113 
1114 	ftpcmd(ctl, "PASV");
1115 	if(ftprcode(ctl, msg, sizeof(msg)) != Success)
1116 		return Error;
1117 
1118 	/* get address and port number from reply, this is AI */
1119 	p = strchr(msg, '(');
1120 	if(p == nil){
1121 		for(p = msg+3; *p; p++)
1122 			if(isdigit(*p))
1123 				break;
1124 	} else
1125 		p++;
1126 	if(getfields(p, f, 6, 0, ",)") < 6){
1127 		werrstr("ftp protocol botch");
1128 		return Server;
1129 	}
1130 	snprint(ipaddr, sizeof(ipaddr), "%s.%s.%s.%s",
1131 		f[0], f[1], f[2], f[3]);
1132 	port = ((atoi(f[4])&0xff)<<8) + (atoi(f[5])&0xff);
1133 	sprint(aport, "%d", port);
1134 
1135 	/* open data connection */
1136 	fd = dial(netmkaddr(ipaddr, tcpdir, aport), 0, 0, 0);
1137 	if(fd < 0){
1138 		werrstr("passive mode failed: %r");
1139 		return Error;
1140 	}
1141 
1142 	/* tell remote to send a file */
1143 	ftpcmd(ctl, "RETR %s", u->page);
1144 	if(ftprcode(ctl, msg, sizeof(msg)) != Extra){
1145 		werrstr("RETR %s: %s", u->page, msg);
1146 		return Error;
1147 	}
1148 	return fd;
1149 }
1150 
1151 int
active(int ctl,URL * u)1152 active(int ctl, URL *u)
1153 {
1154 	char msg[1024];
1155 	char dir[40], ldir[40];
1156 	uchar ipaddr[4];
1157 	uchar port[2];
1158 	int lcfd, dfd, afd;
1159 
1160 	/* announce a port for the call back */
1161 	snprint(msg, sizeof(msg), "%s!*!0", tcpdir);
1162 	afd = announce(msg, dir);
1163 	if(afd < 0)
1164 		return Error;
1165 
1166 	/* get a local address/port of the annoucement */
1167 	if(getaddrport(dir, ipaddr, port) < 0){
1168 		close(afd);
1169 		return Error;
1170 	}
1171 
1172 	/* tell remote side address and port*/
1173 	ftpcmd(ctl, "PORT %d,%d,%d,%d,%d,%d", ipaddr[0], ipaddr[1], ipaddr[2],
1174 		ipaddr[3], port[0], port[1]);
1175 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1176 		close(afd);
1177 		werrstr("active: %s", msg);
1178 		return Error;
1179 	}
1180 
1181 	/* tell remote to send a file */
1182 	ftpcmd(ctl, "RETR %s", u->page);
1183 	if(ftprcode(ctl, msg, sizeof(msg)) != Extra){
1184 		close(afd);
1185 		werrstr("RETR: %s", msg);
1186 		return Server;
1187 	}
1188 
1189 	/* wait for a connection */
1190 	lcfd = listen(dir, ldir);
1191 	if(lcfd < 0){
1192 		close(afd);
1193 		return Error;
1194 	}
1195 	dfd = accept(lcfd, ldir);
1196 	if(dfd < 0){
1197 		close(afd);
1198 		close(lcfd);
1199 		return Error;
1200 	}
1201 	close(afd);
1202 	close(lcfd);
1203 
1204 	return dfd;
1205 }
1206 
1207 int
ftpxfer(int in,Out * out,Range * r)1208 ftpxfer(int in, Out *out, Range *r)
1209 {
1210 	char buf[1024];
1211 	long vtime;
1212 	int i, n;
1213 
1214 	vtime = 0;
1215 	for(n = 0;;n += i){
1216 		i = read(in, buf, sizeof(buf));
1217 		if(i == 0)
1218 			break;
1219 		if(i < 0)
1220 			return Error;
1221 		if(output(out, buf, i) != i)
1222 			return Error;
1223 		r->start += i;
1224 		if(verbose && (vtime != time(0) || r->start == r->end)) {
1225 			vtime = time(0);
1226 			fprint(2, "%ld %ld\n", r->start, r->end);
1227 		}
1228 	}
1229 	return n;
1230 }
1231 
1232 int
terminateftp(int ctl,int rv)1233 terminateftp(int ctl, int rv)
1234 {
1235 	close(ctl);
1236 	return rv;
1237 }
1238 
1239 /*
1240  * case insensitive strcmp (why aren't these in libc?)
1241  */
1242 int
cistrncmp(char * a,char * b,int n)1243 cistrncmp(char *a, char *b, int n)
1244 {
1245 	while(n-- > 0){
1246 		if(tolower(*a++) != tolower(*b++))
1247 			return -1;
1248 	}
1249 	return 0;
1250 }
1251 
1252 int
cistrcmp(char * a,char * b)1253 cistrcmp(char *a, char *b)
1254 {
1255 	while(*a || *b)
1256 		if(tolower(*a++) != tolower(*b++))
1257 			return -1;
1258 
1259 	return 0;
1260 }
1261 
1262 /*
1263  *  buffered io
1264  */
1265 struct
1266 {
1267 	char *rp;
1268 	char *wp;
1269 	char buf[4*1024];
1270 } b;
1271 
1272 void
initibuf(void)1273 initibuf(void)
1274 {
1275 	b.rp = b.wp = b.buf;
1276 }
1277 
1278 /*
1279  *  read a possibly buffered line, strip off trailing while
1280  */
1281 int
readline(int fd,char * buf,int len)1282 readline(int fd, char *buf, int len)
1283 {
1284 	int n;
1285 	char *p;
1286 	int eof = 0;
1287 
1288 	len--;
1289 
1290 	for(p = buf;;){
1291 		if(b.rp >= b.wp){
1292 			n = read(fd, b.wp, sizeof(b.buf)/2);
1293 			if(n < 0)
1294 				return -1;
1295 			if(n == 0){
1296 				eof = 1;
1297 				break;
1298 			}
1299 			b.wp += n;
1300 		}
1301 		n = *b.rp++;
1302 		if(len > 0){
1303 			*p++ = n;
1304 			len--;
1305 		}
1306 		if(n == '\n')
1307 			break;
1308 	}
1309 
1310 	/* drop trailing white */
1311 	for(;;){
1312 		if(p <= buf)
1313 			break;
1314 		n = *(p-1);
1315 		if(n != ' ' && n != '\t' && n != '\r' && n != '\n')
1316 			break;
1317 		p--;
1318 	}
1319 	*p = 0;
1320 
1321 	if(eof && p == buf)
1322 		return -1;
1323 
1324 	return p-buf;
1325 }
1326 
1327 void
unreadline(char * line)1328 unreadline(char *line)
1329 {
1330 	int i, n;
1331 
1332 	i = strlen(line);
1333 	n = b.wp-b.rp;
1334 	memmove(&b.buf[i+1], b.rp, n);
1335 	memmove(b.buf, line, i);
1336 	b.buf[i] = '\n';
1337 	b.rp = b.buf;
1338 	b.wp = b.rp + i + 1 + n;
1339 }
1340 
1341 int
readibuf(int fd,char * buf,int len)1342 readibuf(int fd, char *buf, int len)
1343 {
1344 	int n;
1345 
1346 	n = b.wp-b.rp;
1347 	if(n > 0){
1348 		if(n > len)
1349 			n = len;
1350 		memmove(buf, b.rp, n);
1351 		b.rp += n;
1352 		return n;
1353 	}
1354 	return read(fd, buf, len);
1355 }
1356 
1357 int
dfprint(int fd,char * fmt,...)1358 dfprint(int fd, char *fmt, ...)
1359 {
1360 	char buf[4*1024];
1361 	va_list arg;
1362 
1363 	va_start(arg, fmt);
1364 	vseprint(buf, buf+sizeof(buf), fmt, arg);
1365 	va_end(arg);
1366 	if(debug)
1367 		fprint(2, "%d -> %s", fd, buf);
1368 	return fprint(fd, "%s", buf);
1369 }
1370 
1371 int
getaddrport(char * dir,uchar * ipaddr,uchar * port)1372 getaddrport(char *dir, uchar *ipaddr, uchar *port)
1373 {
1374 	char buf[256];
1375 	int fd, i;
1376 	char *p;
1377 
1378 	snprint(buf, sizeof(buf), "%s/local", dir);
1379 	fd = open(buf, OREAD);
1380 	if(fd < 0)
1381 		return -1;
1382 	i = read(fd, buf, sizeof(buf)-1);
1383 	close(fd);
1384 	if(i <= 0)
1385 		return -1;
1386 	buf[i] = 0;
1387 	p = strchr(buf, '!');
1388 	if(p != nil)
1389 		*p++ = 0;
1390 	v4parseip(ipaddr, buf);
1391 	i = atoi(p);
1392 	port[0] = i>>8;
1393 	port[1] = i;
1394 	return 0;
1395 }
1396 
1397 void
md5free(DigestState * state)1398 md5free(DigestState *state)
1399 {
1400 	uchar x[MD5dlen];
1401 	md5(nil, 0, x, state);
1402 }
1403 
1404 DigestState*
md5dup(DigestState * state)1405 md5dup(DigestState *state)
1406 {
1407 	char *p;
1408 
1409 	p = md5pickle(state);
1410 	if(p == nil)
1411 		sysfatal("md5pickle: %r");
1412 	state = md5unpickle(p);
1413 	if(state == nil)
1414 		sysfatal("md5unpickle: %r");
1415 	free(p);
1416 	return state;
1417 }
1418 
1419 void
setoffset(Out * out,int offset)1420 setoffset(Out *out, int offset)
1421 {
1422 	md5free(out->curr);
1423 	if(offset == 0)
1424 		out->curr = md5(nil, 0, nil, nil);
1425 	else
1426 		out->curr = nil;
1427 	out->offset = offset;
1428 	out->written = offset;
1429 	if(ofile != nil)
1430 		if(seek(out->fd, offset, 0) != offset)
1431 			sysfatal("seek: %r");
1432 }
1433 
1434 /*
1435  * write some output, discarding it (but keeping track)
1436  * if we've already written it. if we've gone backwards,
1437  * verify that everything previously written matches
1438  * that which would have been written from the current
1439  * output.
1440  */
1441 int
output(Out * out,char * buf,int nb)1442 output(Out *out, char *buf, int nb)
1443 {
1444 	int n, d;
1445 	uchar m0[MD5dlen], m1[MD5dlen];
1446 
1447 	n = nb;
1448 	d = out->written - out->offset;
1449 	assert(d >= 0);
1450 	if(d > 0){
1451 		if(n < d){
1452 			if(out->curr != nil)
1453 				md5((uchar*)buf, n, nil, out->curr);
1454 			out->offset += n;
1455 			return n;
1456 		}
1457 		if(out->curr != nil){
1458 			md5((uchar*)buf, d, m0, out->curr);
1459 			out->curr = nil;
1460 			md5(nil, 0, m1, md5dup(out->hiwat));
1461 			if(memcmp(m0, m1, MD5dlen) != 0){
1462 				fprint(2, "integrity check failure at offset %d\n", out->written);
1463 				return -1;
1464 			}
1465 		}
1466 		buf += d;
1467 		n -= d;
1468 		out->offset += d;
1469 	}
1470 	if(n > 0){
1471 		out->hiwat = md5((uchar*)buf, n, nil, out->hiwat);
1472 		n = write(out->fd, buf, n);
1473 		if(n > 0){
1474 			out->offset += n;
1475 			out->written += n;
1476 		}
1477 	}
1478 	return n + d;
1479 }
1480 
1481