xref: /netbsd-src/lib/librumpclient/rumpclient.c (revision 2b3d1ee8a773e028429b331332895d44f445d720)
1 /*      $NetBSD: rumpclient.c,v 1.52 2012/09/12 12:38:16 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Client side routines for rump syscall proxy.
30  */
31 
32 #include "rumpuser_port.h"
33 
34 /*
35  * We use kqueue on NetBSD, poll elsewhere.  Theoretically we could
36  * use kqueue on other BSD's too, but I haven't tested those.  We
37  * want to use kqueue because it will give us the ability to get signal
38  * notifications but defer their handling to a stage where we do not
39  * hold the communication lock.  Taking a signal while holding on to
40  * that lock may cause a deadlock.  Therefore, block signals throughout
41  * the RPC when using poll.  This unfortunately means that the normal
42  * SIGINT way of stopping a process while it is undergoing rump kernel
43  * RPC will not work.  If anyone know which Linux system call handles
44  * the above scenario correctly, I'm all ears.
45  */
46 
47 #ifdef __NetBSD__
48 #define USE_KQUEUE
49 #endif
50 
51 #include <sys/cdefs.h>
52 __RCSID("$NetBSD: rumpclient.c,v 1.52 2012/09/12 12:38:16 pooka Exp $");
53 
54 #include <sys/param.h>
55 #include <sys/mman.h>
56 #include <sys/socket.h>
57 #include <sys/time.h>
58 
59 #ifdef USE_KQUEUE
60 #include <sys/event.h>
61 #endif
62 
63 #include <arpa/inet.h>
64 #include <netinet/in.h>
65 #include <netinet/tcp.h>
66 
67 #include <assert.h>
68 #include <dlfcn.h>
69 #include <err.h>
70 #include <errno.h>
71 #include <fcntl.h>
72 #include <link.h>
73 #include <poll.h>
74 #include <pthread.h>
75 #include <signal.h>
76 #include <stdarg.h>
77 #include <stdbool.h>
78 #include <stdio.h>
79 #include <stdlib.h>
80 #include <string.h>
81 #include <unistd.h>
82 
83 #include <rump/rumpclient.h>
84 
85 #define HOSTOPS
86 int	(*host_socket)(int, int, int);
87 int	(*host_close)(int);
88 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
89 int	(*host_fcntl)(int, int, ...);
90 int	(*host_poll)(struct pollfd *, nfds_t, int);
91 ssize_t	(*host_read)(int, void *, size_t);
92 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
93 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
94 int	(*host_dup)(int);
95 
96 #ifdef USE_KQUEUE
97 int	(*host_kqueue)(void);
98 int	(*host_kevent)(int, const struct kevent *, size_t,
99 		       struct kevent *, size_t, const struct timespec *);
100 #endif
101 
102 int	(*host_execve)(const char *, char *const[], char *const[]);
103 
104 #include "sp_common.c"
105 
106 static struct spclient clispc = {
107 	.spc_fd = -1,
108 };
109 
110 static int kq = -1;
111 static sigset_t fullset;
112 
113 static int doconnect(void);
114 static int handshake_req(struct spclient *, int, void *, int, bool);
115 
116 /*
117  * Default: don't retry.  Most clients can't handle it
118  * (consider e.g. fds suddenly going missing).
119  */
120 static time_t retrytimo = 0;
121 
122 /* always defined to nothingness for now */
123 #define ERRLOG(a)
124 
125 static int
126 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
127 {
128 	struct timeval starttime, curtime;
129 	time_t prevreconmsg;
130 	unsigned reconretries;
131 	int rv;
132 
133 	for (prevreconmsg = 0, reconretries = 0;;) {
134 		rv = dosend(spc, iov, iovlen);
135 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
136 			/* no persistent connections */
137 			if (retrytimo == 0) {
138 				rv = ENOTCONN;
139 				break;
140 			}
141 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
142 				_exit(1);
143 
144 			if (!prevreconmsg) {
145 				prevreconmsg = time(NULL);
146 				gettimeofday(&starttime, NULL);
147 			}
148 			if (reconretries == 1) {
149 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
150 					rv = ENOTCONN;
151 					break;
152 				}
153 				fprintf(stderr, "rump_sp: connection to "
154 				    "kernel lost, trying to reconnect ...\n");
155 			} else if (time(NULL) - prevreconmsg > 120) {
156 				fprintf(stderr, "rump_sp: still trying to "
157 				    "reconnect ...\n");
158 				prevreconmsg = time(NULL);
159 			}
160 
161 			/* check that we aren't over the limit */
162 			if (retrytimo > 0) {
163 				struct timeval tmp;
164 
165 				gettimeofday(&curtime, NULL);
166 				timersub(&curtime, &starttime, &tmp);
167 				if (tmp.tv_sec >= retrytimo) {
168 					fprintf(stderr, "rump_sp: reconnect "
169 					    "failed, %lld second timeout\n",
170 					    (long long)retrytimo);
171 					return ENOTCONN;
172 				}
173 			}
174 
175 			/* adhoc backoff timer */
176 			if (reconretries < 10) {
177 				usleep(100000 * reconretries);
178 			} else {
179 				sleep(MIN(10, reconretries-9));
180 			}
181 			reconretries++;
182 
183 			if ((rv = doconnect()) != 0)
184 				continue;
185 			if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
186 			    NULL, 0, true)) != 0)
187 				continue;
188 
189 			/*
190 			 * ok, reconnect succesful.  we need to return to
191 			 * the upper layer to get the entire PDU resent.
192 			 */
193 			if (reconretries != 1)
194 				fprintf(stderr, "rump_sp: reconnected!\n");
195 			rv = EAGAIN;
196 			break;
197 		} else {
198 			_DIAGASSERT(errno != EAGAIN);
199 			break;
200 		}
201 	}
202 
203 	return rv;
204 }
205 
206 static int
207 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
208 	bool keeplock)
209 {
210 	uint64_t mygen;
211 	bool imalive = true;
212 
213 	pthread_mutex_lock(&spc->spc_mtx);
214 	if (!keeplock)
215 		sendunlockl(spc);
216 	mygen = spc->spc_generation;
217 
218 	rw->rw_error = 0;
219 	while (!rw->rw_done && rw->rw_error == 0) {
220 		if (__predict_false(spc->spc_generation != mygen || !imalive))
221 			break;
222 
223 		/* are we free to receive? */
224 		if (spc->spc_istatus == SPCSTATUS_FREE) {
225 			int gotresp, dosig, rv;
226 
227 			spc->spc_istatus = SPCSTATUS_BUSY;
228 			pthread_mutex_unlock(&spc->spc_mtx);
229 
230 			dosig = 0;
231 			for (gotresp = 0; !gotresp; ) {
232 #ifdef USE_KQUEUE
233 				struct kevent kev[8];
234 				int i;
235 
236 				/*
237 				 * typically we don't have a frame waiting
238 				 * when we come in here, so call kevent now
239 				 */
240 				rv = host_kevent(kq, NULL, 0,
241 				    kev, __arraycount(kev), NULL);
242 
243 				if (__predict_false(rv == -1)) {
244 					goto activity;
245 				}
246 
247 				/*
248 				 * XXX: don't know how this can happen
249 				 * (timeout cannot expire since there
250 				 * isn't one), but it does happen.
251 				 * treat it as an expectional condition
252 				 * and go through tryread to determine
253 				 * alive status.
254 				 */
255 				if (__predict_false(rv == 0))
256 					goto activity;
257 
258 				for (i = 0; i < rv; i++) {
259 					if (kev[i].filter == EVFILT_SIGNAL)
260 						dosig++;
261 				}
262 				if (dosig)
263 					goto cleanup;
264 
265 				/*
266 				 * ok, activity.  try to read a frame to
267 				 * determine what happens next.
268 				 */
269  activity:
270 #else /* USE_KQUEUE */
271 				struct pollfd pfd;
272 
273 				pfd.fd = clispc.spc_fd;
274 				pfd.events = POLLIN;
275 
276 				rv = host_poll(&pfd, 1, -1);
277 #endif /* !USE_KQUEUE */
278 
279 				switch (readframe(spc)) {
280 				case 0:
281 					continue;
282 				case -1:
283 					imalive = false;
284 					goto cleanup;
285 				default:
286 					/* case 1 */
287 					break;
288 				}
289 
290 				switch (spc->spc_hdr.rsp_class) {
291 				case RUMPSP_RESP:
292 				case RUMPSP_ERROR:
293 					kickwaiter(spc);
294 					gotresp = spc->spc_hdr.rsp_reqno ==
295 					    rw->rw_reqno;
296 					break;
297 				case RUMPSP_REQ:
298 					handlereq(spc);
299 					break;
300 				default:
301 					/* panic */
302 					break;
303 				}
304 			}
305 
306  cleanup:
307 			pthread_mutex_lock(&spc->spc_mtx);
308 			if (spc->spc_istatus == SPCSTATUS_WANTED)
309 				kickall(spc);
310 			spc->spc_istatus = SPCSTATUS_FREE;
311 
312 			/* take one for the team */
313 			if (dosig) {
314 				pthread_mutex_unlock(&spc->spc_mtx);
315 				pthread_sigmask(SIG_SETMASK, mask, NULL);
316 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
317 				pthread_mutex_lock(&spc->spc_mtx);
318 			}
319 		} else {
320 			spc->spc_istatus = SPCSTATUS_WANTED;
321 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
322 		}
323 	}
324 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
325 	pthread_mutex_unlock(&spc->spc_mtx);
326 	pthread_cond_destroy(&rw->rw_cv);
327 
328 	if (spc->spc_generation != mygen || !imalive) {
329 		return ENOTCONN;
330 	}
331 	return rw->rw_error;
332 }
333 
334 static int
335 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
336 	const void *data, size_t dlen, void **resp)
337 {
338 	struct rsp_hdr rhdr;
339 	struct respwait rw;
340 	struct iovec iov[2];
341 	int rv;
342 
343 	rhdr.rsp_len = sizeof(rhdr) + dlen;
344 	rhdr.rsp_class = RUMPSP_REQ;
345 	rhdr.rsp_type = RUMPSP_SYSCALL;
346 	rhdr.rsp_sysnum = sysnum;
347 
348 	IOVPUT(iov[0], rhdr);
349 	IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
350 
351 	do {
352 		putwait(spc, &rw, &rhdr);
353 		if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
354 			unputwait(spc, &rw);
355 			continue;
356 		}
357 
358 		rv = cliwaitresp(spc, &rw, omask, false);
359 		if (rv == ENOTCONN)
360 			rv = EAGAIN;
361 	} while (rv == EAGAIN);
362 
363 	*resp = rw.rw_data;
364 	return rv;
365 }
366 
367 static int
368 handshake_req(struct spclient *spc, int type, void *data,
369 	int cancel, bool haslock)
370 {
371 	struct handshake_fork rf;
372 	const char *myprogname = NULL; /* XXXgcc */
373 	struct rsp_hdr rhdr;
374 	struct respwait rw;
375 	sigset_t omask;
376 	size_t bonus;
377 	struct iovec iov[2];
378 	int rv;
379 
380 	if (type == HANDSHAKE_FORK) {
381 		bonus = sizeof(rf);
382 	} else {
383 #ifdef __NetBSD__
384 		/* would procfs work on NetBSD too? */
385 		myprogname = getprogname();
386 #else
387 		int fd = open("/proc/self/comm", O_RDONLY);
388 		if (fd == -1) {
389 			myprogname = "???";
390 		} else {
391 			static char commname[128];
392 
393 			memset(commname, 0, sizeof(commname));
394 			if (read(fd, commname, sizeof(commname)) > 0) {
395 				char *n;
396 
397 				n = strrchr(commname, '\n');
398 				if (n)
399 					*n = '\0';
400 				myprogname = commname;
401 			} else {
402 				myprogname = "???";
403 			}
404 			close(fd);
405 		}
406 #endif
407 		bonus = strlen(myprogname)+1;
408 	}
409 
410 	/* performs server handshake */
411 	rhdr.rsp_len = sizeof(rhdr) + bonus;
412 	rhdr.rsp_class = RUMPSP_REQ;
413 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
414 	rhdr.rsp_handshake = type;
415 
416 	IOVPUT(iov[0], rhdr);
417 
418 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
419 	if (haslock)
420 		putwait_locked(spc, &rw, &rhdr);
421 	else
422 		putwait(spc, &rw, &rhdr);
423 	if (type == HANDSHAKE_FORK) {
424 		memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
425 		rf.rf_cancel = cancel;
426 		IOVPUT(iov[1], rf);
427 	} else {
428 		IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
429 	}
430 	rv = send_with_recon(spc, iov, __arraycount(iov));
431 	if (rv || cancel) {
432 		if (haslock)
433 			unputwait_locked(spc, &rw);
434 		else
435 			unputwait(spc, &rw);
436 		if (cancel) {
437 			goto out;
438 		}
439 	} else {
440 		rv = cliwaitresp(spc, &rw, &omask, haslock);
441 	}
442 	if (rv)
443 		goto out;
444 
445 	rv = *(int *)rw.rw_data;
446 	free(rw.rw_data);
447 
448  out:
449 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
450 	return rv;
451 }
452 
453 static int
454 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
455 {
456 	struct rsp_hdr rhdr;
457 	struct respwait rw;
458 	struct iovec iov[1];
459 	int rv;
460 
461 	rhdr.rsp_len = sizeof(rhdr);
462 	rhdr.rsp_class = RUMPSP_REQ;
463 	rhdr.rsp_type = RUMPSP_PREFORK;
464 	rhdr.rsp_error = 0;
465 
466 	IOVPUT(iov[0], rhdr);
467 
468 	do {
469 		putwait(spc, &rw, &rhdr);
470 		rv = send_with_recon(spc, iov, __arraycount(iov));
471 		if (rv != 0) {
472 			unputwait(spc, &rw);
473 			continue;
474 		}
475 
476 		rv = cliwaitresp(spc, &rw, omask, false);
477 		if (rv == ENOTCONN)
478 			rv = EAGAIN;
479 	} while (rv == EAGAIN);
480 
481 	*resp = rw.rw_data;
482 	return rv;
483 }
484 
485 /*
486  * prevent response code from deadlocking with reconnect code
487  */
488 static int
489 resp_sendlock(struct spclient *spc)
490 {
491 	int rv = 0;
492 
493 	pthread_mutex_lock(&spc->spc_mtx);
494 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
495 		if (__predict_false(spc->spc_reconnecting)) {
496 			rv = EBUSY;
497 			goto out;
498 		}
499 		spc->spc_ostatus = SPCSTATUS_WANTED;
500 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
501 	}
502 	spc->spc_ostatus = SPCSTATUS_BUSY;
503 
504  out:
505 	pthread_mutex_unlock(&spc->spc_mtx);
506 	return rv;
507 }
508 
509 static void
510 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
511 	int wantstr)
512 {
513 	struct rsp_hdr rhdr;
514 	struct iovec iov[2];
515 
516 	if (wantstr)
517 		dlen = MIN(dlen, strlen(data)+1);
518 
519 	rhdr.rsp_len = sizeof(rhdr) + dlen;
520 	rhdr.rsp_reqno = reqno;
521 	rhdr.rsp_class = RUMPSP_RESP;
522 	rhdr.rsp_type = RUMPSP_COPYIN;
523 	rhdr.rsp_sysnum = 0;
524 
525 	IOVPUT(iov[0], rhdr);
526 	IOVPUT_WITHSIZE(iov[1], data, dlen);
527 
528 	if (resp_sendlock(spc) != 0)
529 		return;
530 	(void)SENDIOV(spc, iov);
531 	sendunlock(spc);
532 }
533 
534 static void
535 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
536 {
537 	struct rsp_hdr rhdr;
538 	struct iovec iov[2];
539 
540 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
541 	rhdr.rsp_reqno = reqno;
542 	rhdr.rsp_class = RUMPSP_RESP;
543 	rhdr.rsp_type = RUMPSP_ANONMMAP;
544 	rhdr.rsp_sysnum = 0;
545 
546 	IOVPUT(iov[0], rhdr);
547 	IOVPUT(iov[1], addr);
548 
549 	if (resp_sendlock(spc) != 0)
550 		return;
551 	(void)SENDIOV(spc, iov);
552 	sendunlock(spc);
553 }
554 
555 int
556 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
557 	register_t *retval)
558 {
559 	struct rsp_sysresp *resp;
560 	sigset_t omask;
561 	void *rdata;
562 	int rv;
563 
564 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
565 
566 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
567 	    sysnum, data, dlen));
568 
569 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
570 	if (rv)
571 		goto out;
572 
573 	resp = rdata;
574 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
575 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
576 
577 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
578 	rv = resp->rsys_error;
579 	free(rdata);
580 
581  out:
582 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
583 	return rv;
584 }
585 
586 static void
587 handlereq(struct spclient *spc)
588 {
589 	struct rsp_copydata *copydata;
590 	struct rsp_hdr *rhdr = &spc->spc_hdr;
591 	void *mapaddr;
592 	size_t maplen;
593 	int reqtype = spc->spc_hdr.rsp_type;
594 
595 	switch (reqtype) {
596 	case RUMPSP_COPYIN:
597 	case RUMPSP_COPYINSTR:
598 		/*LINTED*/
599 		copydata = (struct rsp_copydata *)spc->spc_buf;
600 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
601 		    copydata->rcp_addr, copydata->rcp_len));
602 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
603 		    copydata->rcp_addr, copydata->rcp_len,
604 		    reqtype == RUMPSP_COPYINSTR);
605 		break;
606 	case RUMPSP_COPYOUT:
607 	case RUMPSP_COPYOUTSTR:
608 		/*LINTED*/
609 		copydata = (struct rsp_copydata *)spc->spc_buf;
610 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
611 		    copydata->rcp_addr, copydata->rcp_len));
612 		/*LINTED*/
613 		memcpy(copydata->rcp_addr, copydata->rcp_data,
614 		    copydata->rcp_len);
615 		break;
616 	case RUMPSP_ANONMMAP:
617 		/*LINTED*/
618 		maplen = *(size_t *)spc->spc_buf;
619 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
620 		    MAP_ANON, -1, 0);
621 		if (mapaddr == MAP_FAILED)
622 			mapaddr = NULL;
623 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
624 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
625 		break;
626 	case RUMPSP_RAISE:
627 		DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
628 		raise((int)rhdr->rsp_signo);
629 		/*
630 		 * We most likely have signals blocked, but the signal
631 		 * will be handled soon enough when we return.
632 		 */
633 		break;
634 	default:
635 		printf("PANIC: INVALID TYPE %d\n", reqtype);
636 		abort();
637 		break;
638 	}
639 
640 	spcfreebuf(spc);
641 }
642 
643 static unsigned ptab_idx;
644 static struct sockaddr *serv_sa;
645 
646 /* dup until we get a "good" fd which does not collide with stdio */
647 static int
648 dupgood(int myfd, int mustchange)
649 {
650 	int ofds[4];
651 	int sverrno;
652 	unsigned int i;
653 
654 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
655 		assert(i < __arraycount(ofds));
656 		ofds[i] = myfd;
657 		myfd = host_dup(myfd);
658 		if (mustchange) {
659 			i--; /* prevent closing old fd */
660 			mustchange = 0;
661 		}
662 	}
663 
664 	sverrno = 0;
665 	if (myfd == -1 && i > 0)
666 		sverrno = errno;
667 
668 	while (i-- > 0) {
669 		host_close(ofds[i]);
670 	}
671 
672 	if (sverrno)
673 		errno = sverrno;
674 
675 	return myfd;
676 }
677 
678 static int
679 doconnect(void)
680 {
681 	struct respwait rw;
682 	struct rsp_hdr rhdr;
683 	char banner[MAXBANNER];
684 	int s, error, flags;
685 	ssize_t n;
686 
687 	if (kq != -1)
688 		host_close(kq);
689 	kq = -1;
690 	s = -1;
691 
692 	if (clispc.spc_fd != -1)
693 		host_close(clispc.spc_fd);
694 	clispc.spc_fd = -1;
695 
696 	/*
697 	 * for reconnect, gate everyone out of the receiver code
698 	 */
699 	putwait_locked(&clispc, &rw, &rhdr);
700 
701 	pthread_mutex_lock(&clispc.spc_mtx);
702 	clispc.spc_reconnecting = 1;
703 	pthread_cond_broadcast(&clispc.spc_cv);
704 	clispc.spc_generation++;
705 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
706 		clispc.spc_istatus = SPCSTATUS_WANTED;
707 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
708 	}
709 	kickall(&clispc);
710 
711 	/*
712 	 * we can release it already since we hold the
713 	 * send lock during reconnect
714 	 * XXX: assert it
715 	 */
716 	clispc.spc_istatus = SPCSTATUS_FREE;
717 	pthread_mutex_unlock(&clispc.spc_mtx);
718 	unputwait_locked(&clispc, &rw);
719 
720 	free(clispc.spc_buf);
721 	clispc.spc_off = 0;
722 
723 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
724 	if (s == -1)
725 		return -1;
726 
727 	while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
728 		if (errno == EINTR)
729 			continue;
730 		ERRLOG(("rump_sp: client connect failed: %s\n",
731 		    strerror(errno)));
732 		return -1;
733 	}
734 
735 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
736 		ERRLOG(("rump_sp: connect hook failed\n"));
737 		return -1;
738 	}
739 
740 	if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
741 		ERRLOG(("rump_sp: failed to read banner\n"));
742 		return -1;
743 	}
744 
745 	if (banner[n-1] != '\n') {
746 		ERRLOG(("rump_sp: invalid banner\n"));
747 		return -1;
748 	}
749 	banner[n] = '\0';
750 	/* XXX parse the banner some day */
751 
752 	flags = host_fcntl(s, F_GETFL, 0);
753 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
754 		ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
755 		return -1;
756 	}
757 	clispc.spc_fd = s;
758 	clispc.spc_state = SPCSTATE_RUNNING;
759 	clispc.spc_reconnecting = 0;
760 
761 #ifdef USE_KQUEUE
762 {
763 	struct kevent kev[NSIG+1];
764 	int i;
765 
766 	/* setup kqueue, we want all signals and the fd */
767 	if ((kq = dupgood(host_kqueue(), 0)) == -1) {
768 		ERRLOG(("rump_sp: cannot setup kqueue"));
769 		return -1;
770 	}
771 
772 	for (i = 0; i < NSIG; i++) {
773 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
774 	}
775 	EV_SET(&kev[NSIG], clispc.spc_fd,
776 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
777 	if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
778 		ERRLOG(("rump_sp: kevent() failed"));
779 		return -1;
780 	}
781 }
782 #endif /* USE_KQUEUE */
783 
784 	return 0;
785 }
786 
787 static int
788 doinit(void)
789 {
790 
791 	TAILQ_INIT(&clispc.spc_respwait);
792 	pthread_mutex_init(&clispc.spc_mtx, NULL);
793 	pthread_cond_init(&clispc.spc_cv, NULL);
794 
795 	return 0;
796 }
797 
798 void *rumpclient__dlsym(void *, const char *);
799 void *
800 rumpclient__dlsym(void *handle, const char *symbol)
801 {
802 
803 	return dlsym(handle, symbol);
804 }
805 void *rumphijack_dlsym(void *, const char *)
806     __attribute__((__weak__, alias("rumpclient__dlsym")));
807 
808 static pid_t init_done = 0;
809 
810 int
811 rumpclient_init(void)
812 {
813 	char *p;
814 	int error;
815 	int rv = -1;
816 	int hstype;
817 	pid_t mypid;
818 
819 	/*
820 	 * Make sure we're not riding the context of a previous
821 	 * host fork.  Note: it's *possible* that after n>1 forks
822 	 * we have the same pid as one of our exited parents, but
823 	 * I'm pretty sure there are 0 practical implications, since
824 	 * it means generations would have to skip rumpclient init.
825 	 */
826 	if (init_done == (mypid = getpid()))
827 		return 0;
828 
829 	/* kq does not traverse fork() */
830 	if (init_done != 0)
831 		kq = -1;
832 	init_done = mypid;
833 
834 	sigfillset(&fullset);
835 
836 	/*
837 	 * sag mir, wo die symbols sind.  zogen fort, der krieg beginnt.
838 	 * wann wird man je verstehen?  wann wird man je verstehen?
839 	 */
840 #define FINDSYM2(_name_,_syscall_)					\
841 	if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT,		\
842 	    #_syscall_)) == NULL) {					\
843 		if (rumphijack_dlsym == rumpclient__dlsym)		\
844 			host_##_name_ = _name_; /* static fallback */	\
845 		if (host_##_name_ == NULL)				\
846 			errx(1, "cannot find %s: %s", #_syscall_,	\
847 			    dlerror());					\
848 	}
849 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
850 #ifdef __NetBSD__
851 	FINDSYM2(socket,__socket30)
852 #else
853 	FINDSYM(socket)
854 #endif
855 
856 	FINDSYM(close)
857 	FINDSYM(connect)
858 	FINDSYM(fcntl)
859 	FINDSYM(poll)
860 	FINDSYM(read)
861 	FINDSYM(sendmsg)
862 	FINDSYM(setsockopt)
863 	FINDSYM(dup)
864 	FINDSYM(execve)
865 
866 #ifdef USE_KQUEUE
867 	FINDSYM(kqueue)
868 #if !__NetBSD_Prereq__(5,99,7)
869 	FINDSYM(kevent)
870 #else
871 	FINDSYM2(kevent,_sys___kevent50)
872 #endif
873 #endif /* USE_KQUEUE */
874 
875 #undef	FINDSYM
876 #undef	FINDSY2
877 
878 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
879 		if ((p = getenv("RUMP_SERVER")) == NULL) {
880 			fprintf(stderr, "error: RUMP_SERVER not set\n");
881 			errno = ENOENT;
882 			goto out;
883 		}
884 	}
885 
886 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
887 		errno = error;
888 		goto out;
889 	}
890 
891 	if (doinit() == -1)
892 		goto out;
893 
894 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
895 		sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
896 		unsetenv("RUMPCLIENT__EXECFD");
897 		hstype = HANDSHAKE_EXEC;
898 	} else {
899 		if (doconnect() == -1)
900 			goto out;
901 		hstype = HANDSHAKE_GUEST;
902 	}
903 
904 	error = handshake_req(&clispc, hstype, NULL, 0, false);
905 	if (error) {
906 		pthread_mutex_destroy(&clispc.spc_mtx);
907 		pthread_cond_destroy(&clispc.spc_cv);
908 		if (clispc.spc_fd != -1)
909 			host_close(clispc.spc_fd);
910 		errno = error;
911 		goto out;
912 	}
913 	rv = 0;
914 
915  out:
916 	if (rv == -1)
917 		init_done = 0;
918 	return rv;
919 }
920 
921 struct rumpclient_fork {
922 	uint32_t fork_auth[AUTHLEN];
923 	struct spclient fork_spc;
924 	int fork_kq;
925 };
926 
927 struct rumpclient_fork *
928 rumpclient_prefork(void)
929 {
930 	struct rumpclient_fork *rpf;
931 	sigset_t omask;
932 	void *resp;
933 	int rv;
934 
935 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
936 	rpf = malloc(sizeof(*rpf));
937 	if (rpf == NULL)
938 		goto out;
939 
940 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
941 		free(rpf);
942 		errno = rv;
943 		rpf = NULL;
944 		goto out;
945 	}
946 
947 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
948 	free(resp);
949 
950 	rpf->fork_spc = clispc;
951 	rpf->fork_kq = kq;
952 
953  out:
954 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
955 	return rpf;
956 }
957 
958 int
959 rumpclient_fork_init(struct rumpclient_fork *rpf)
960 {
961 	int error;
962 	int osock;
963 
964 	osock = clispc.spc_fd;
965 	memset(&clispc, 0, sizeof(clispc));
966 	clispc.spc_fd = osock;
967 
968 	kq = -1; /* kqueue descriptor is not copied over fork() */
969 
970 	if (doinit() == -1)
971 		return -1;
972 	if (doconnect() == -1)
973 		return -1;
974 
975 	error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
976 	    0, false);
977 	if (error) {
978 		pthread_mutex_destroy(&clispc.spc_mtx);
979 		pthread_cond_destroy(&clispc.spc_cv);
980 		errno = error;
981 		return -1;
982 	}
983 
984 	return 0;
985 }
986 
987 /*ARGSUSED*/
988 void
989 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
990 {
991 
992 	/* EUNIMPL */
993 }
994 
995 void
996 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
997 {
998 
999 	clispc = rpf->fork_spc;
1000 	kq = rpf->fork_kq;
1001 }
1002 
1003 void
1004 rumpclient_setconnretry(time_t timeout)
1005 {
1006 
1007 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
1008 		return; /* gigo */
1009 
1010 	retrytimo = timeout;
1011 }
1012 
1013 int
1014 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
1015 {
1016 	int fd = *fdp;
1017 	int untilfd, rv;
1018 	int newfd;
1019 
1020 	switch (variant) {
1021 	case RUMPCLIENT_CLOSE_FCLOSEM:
1022 		untilfd = MAX(clispc.spc_fd, kq);
1023 		for (; fd <= untilfd; fd++) {
1024 			if (fd == clispc.spc_fd || fd == kq)
1025 				continue;
1026 			rv = host_close(fd);
1027 			if (rv == -1)
1028 				return -1;
1029 		}
1030 		*fdp = fd;
1031 		break;
1032 
1033 	case RUMPCLIENT_CLOSE_CLOSE:
1034 	case RUMPCLIENT_CLOSE_DUP2:
1035 		if (fd == clispc.spc_fd) {
1036 			newfd = dupgood(clispc.spc_fd, 1);
1037 			if (newfd == -1)
1038 				return -1;
1039 
1040 #ifdef USE_KQUEUE
1041 			{
1042 			struct kevent kev[2];
1043 
1044 			/*
1045 			 * now, we have a new socket number, so change
1046 			 * the file descriptor that kqueue is
1047 			 * monitoring.  remove old and add new.
1048 			 */
1049 			EV_SET(&kev[0], clispc.spc_fd,
1050 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
1051 			EV_SET(&kev[1], newfd,
1052 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1053 			if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
1054 				int sverrno = errno;
1055 				host_close(newfd);
1056 				errno = sverrno;
1057 				return -1;
1058 			}
1059 			clispc.spc_fd = newfd;
1060 			}
1061 		}
1062 		if (fd == kq) {
1063 			newfd = dupgood(kq, 1);
1064 			if (newfd == -1)
1065 				return -1;
1066 			kq = newfd;
1067 #else /* USE_KQUEUE */
1068 			clispc.spc_fd = newfd;
1069 #endif /* !USE_KQUEUE */
1070 		}
1071 		break;
1072 	}
1073 
1074 	return 0;
1075 }
1076 
1077 pid_t
1078 rumpclient_fork(void)
1079 {
1080 
1081 	return rumpclient__dofork(fork);
1082 }
1083 
1084 /*
1085  * Process is about to exec.  Save info about our existing connection
1086  * in the env.  rumpclient will check for this info in init().
1087  * This is mostly for the benefit of rumphijack, but regular applications
1088  * may use it as well.
1089  */
1090 int
1091 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
1092 {
1093 	char buf[4096];
1094 	char **newenv;
1095 	char *envstr, *envstr2;
1096 	size_t nelem;
1097 	int rv, sverrno;
1098 
1099 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
1100 	    clispc.spc_fd, kq);
1101 	envstr = malloc(strlen(buf)+1);
1102 	if (envstr == NULL) {
1103 		return ENOMEM;
1104 	}
1105 	strcpy(envstr, buf);
1106 
1107 	/* do we have a fully parsed url we want to forward in the env? */
1108 	if (*parsedurl != '\0') {
1109 		snprintf(buf, sizeof(buf),
1110 		    "RUMP__PARSEDSERVER=%s", parsedurl);
1111 		envstr2 = malloc(strlen(buf)+1);
1112 		if (envstr2 == NULL) {
1113 			free(envstr);
1114 			return ENOMEM;
1115 		}
1116 		strcpy(envstr2, buf);
1117 	} else {
1118 		envstr2 = NULL;
1119 	}
1120 
1121 	for (nelem = 0; envp && envp[nelem]; nelem++)
1122 		continue;
1123 
1124 	newenv = malloc(sizeof(*newenv) * (nelem+3));
1125 	if (newenv == NULL) {
1126 		free(envstr2);
1127 		free(envstr);
1128 		return ENOMEM;
1129 	}
1130 	memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1131 
1132 	newenv[nelem] = envstr;
1133 	newenv[nelem+1] = envstr2;
1134 	newenv[nelem+2] = NULL;
1135 
1136 	rv = host_execve(path, argv, newenv);
1137 
1138 	_DIAGASSERT(rv != 0);
1139 	sverrno = errno;
1140 	free(envstr2);
1141 	free(envstr);
1142 	free(newenv);
1143 	errno = sverrno;
1144 	return rv;
1145 }
1146 
1147 int
1148 rumpclient_daemon(int nochdir, int noclose)
1149 {
1150 	struct rumpclient_fork *rf;
1151 	int sverrno;
1152 
1153 	if ((rf = rumpclient_prefork()) == NULL)
1154 		return -1;
1155 
1156 	if (daemon(nochdir, noclose) == -1) {
1157 		sverrno = errno;
1158 		rumpclient_fork_cancel(rf);
1159 		errno = sverrno;
1160 		return -1;
1161 	}
1162 
1163 	if (rumpclient_fork_init(rf) == -1)
1164 		return -1;
1165 
1166 	return 0;
1167 }
1168