xref: /netbsd-src/lib/librumpclient/rumpclient.c (revision ba65fde2d7fefa7d39838fa5fa855e62bd606b5e)
1 /*      $NetBSD: rumpclient.c,v 1.54 2013/01/17 20:47:44 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Client side routines for rump syscall proxy.
30  */
31 
32 #include "rumpuser_port.h"
33 
34 /*
35  * We use kqueue on NetBSD, poll elsewhere.  Theoretically we could
36  * use kqueue on other BSD's too, but I haven't tested those.  We
37  * want to use kqueue because it will give us the ability to get signal
38  * notifications but defer their handling to a stage where we do not
39  * hold the communication lock.  Taking a signal while holding on to
40  * that lock may cause a deadlock.  Therefore, block signals throughout
41  * the RPC when using poll.  This unfortunately means that the normal
42  * SIGINT way of stopping a process while it is undergoing rump kernel
43  * RPC will not work.  If anyone know which Linux system call handles
44  * the above scenario correctly, I'm all ears.
45  */
46 
47 #ifdef __NetBSD__
48 #define USE_KQUEUE
49 #endif
50 
51 __RCSID("$NetBSD: rumpclient.c,v 1.54 2013/01/17 20:47:44 pooka Exp $");
52 
53 #include <sys/param.h>
54 #include <sys/mman.h>
55 #include <sys/socket.h>
56 #include <sys/time.h>
57 
58 #ifdef USE_KQUEUE
59 #include <sys/event.h>
60 #endif
61 
62 #include <arpa/inet.h>
63 #include <netinet/in.h>
64 #include <netinet/tcp.h>
65 
66 #include <assert.h>
67 #include <dlfcn.h>
68 #include <errno.h>
69 #include <fcntl.h>
70 #include <poll.h>
71 #include <pthread.h>
72 #include <signal.h>
73 #include <stdarg.h>
74 #include <stdbool.h>
75 #include <stdio.h>
76 #include <stdlib.h>
77 #include <string.h>
78 #include <unistd.h>
79 
80 #include <rump/rumpclient.h>
81 
82 #define HOSTOPS
83 int	(*host_socket)(int, int, int);
84 int	(*host_close)(int);
85 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
86 int	(*host_fcntl)(int, int, ...);
87 int	(*host_poll)(struct pollfd *, nfds_t, int);
88 ssize_t	(*host_read)(int, void *, size_t);
89 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
90 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
91 int	(*host_dup)(int);
92 
93 #ifdef USE_KQUEUE
94 int	(*host_kqueue)(void);
95 int	(*host_kevent)(int, const struct kevent *, size_t,
96 		       struct kevent *, size_t, const struct timespec *);
97 #endif
98 
99 int	(*host_execve)(const char *, char *const[], char *const[]);
100 
101 #include "sp_common.c"
102 
103 static struct spclient clispc = {
104 	.spc_fd = -1,
105 };
106 
107 static int kq = -1;
108 static sigset_t fullset;
109 
110 static int doconnect(void);
111 static int handshake_req(struct spclient *, int, void *, int, bool);
112 
113 /*
114  * Default: don't retry.  Most clients can't handle it
115  * (consider e.g. fds suddenly going missing).
116  */
117 static time_t retrytimo = 0;
118 
119 /* always defined to nothingness for now */
120 #define ERRLOG(a)
121 
122 static int
123 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
124 {
125 	struct timeval starttime, curtime;
126 	time_t prevreconmsg;
127 	unsigned reconretries;
128 	int rv;
129 
130 	for (prevreconmsg = 0, reconretries = 0;;) {
131 		rv = dosend(spc, iov, iovlen);
132 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
133 			/* no persistent connections */
134 			if (retrytimo == 0) {
135 				rv = ENOTCONN;
136 				break;
137 			}
138 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
139 				_exit(1);
140 
141 			if (!prevreconmsg) {
142 				prevreconmsg = time(NULL);
143 				gettimeofday(&starttime, NULL);
144 			}
145 			if (reconretries == 1) {
146 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
147 					rv = ENOTCONN;
148 					break;
149 				}
150 				fprintf(stderr, "rump_sp: connection to "
151 				    "kernel lost, trying to reconnect ...\n");
152 			} else if (time(NULL) - prevreconmsg > 120) {
153 				fprintf(stderr, "rump_sp: still trying to "
154 				    "reconnect ...\n");
155 				prevreconmsg = time(NULL);
156 			}
157 
158 			/* check that we aren't over the limit */
159 			if (retrytimo > 0) {
160 				time_t tdiff;
161 
162 				gettimeofday(&curtime, NULL);
163 				tdiff = curtime.tv_sec - starttime.tv_sec;
164 				if (starttime.tv_usec > curtime.tv_usec)
165 					tdiff--;
166 				if (tdiff >= retrytimo) {
167 					fprintf(stderr, "rump_sp: reconnect "
168 					    "failed, %lld second timeout\n",
169 					    (long long)retrytimo);
170 					return ENOTCONN;
171 				}
172 			}
173 
174 			/* adhoc backoff timer */
175 			if (reconretries < 10) {
176 				usleep(100000 * reconretries);
177 			} else {
178 				sleep(MIN(10, reconretries-9));
179 			}
180 			reconretries++;
181 
182 			if ((rv = doconnect()) != 0)
183 				continue;
184 			if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
185 			    NULL, 0, true)) != 0)
186 				continue;
187 
188 			/*
189 			 * ok, reconnect succesful.  we need to return to
190 			 * the upper layer to get the entire PDU resent.
191 			 */
192 			if (reconretries != 1)
193 				fprintf(stderr, "rump_sp: reconnected!\n");
194 			rv = EAGAIN;
195 			break;
196 		} else {
197 			_DIAGASSERT(errno != EAGAIN);
198 			break;
199 		}
200 	}
201 
202 	return rv;
203 }
204 
205 static int
206 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
207 	bool keeplock)
208 {
209 	uint64_t mygen;
210 	bool imalive = true;
211 
212 	pthread_mutex_lock(&spc->spc_mtx);
213 	if (!keeplock)
214 		sendunlockl(spc);
215 	mygen = spc->spc_generation;
216 
217 	rw->rw_error = 0;
218 	while (!rw->rw_done && rw->rw_error == 0) {
219 		if (__predict_false(spc->spc_generation != mygen || !imalive))
220 			break;
221 
222 		/* are we free to receive? */
223 		if (spc->spc_istatus == SPCSTATUS_FREE) {
224 			int gotresp, dosig, rv;
225 
226 			spc->spc_istatus = SPCSTATUS_BUSY;
227 			pthread_mutex_unlock(&spc->spc_mtx);
228 
229 			dosig = 0;
230 			for (gotresp = 0; !gotresp; ) {
231 #ifdef USE_KQUEUE
232 				struct kevent kev[8];
233 				int i;
234 
235 				/*
236 				 * typically we don't have a frame waiting
237 				 * when we come in here, so call kevent now
238 				 */
239 				rv = host_kevent(kq, NULL, 0,
240 				    kev, __arraycount(kev), NULL);
241 
242 				if (__predict_false(rv == -1)) {
243 					goto activity;
244 				}
245 
246 				/*
247 				 * XXX: don't know how this can happen
248 				 * (timeout cannot expire since there
249 				 * isn't one), but it does happen.
250 				 * treat it as an expectional condition
251 				 * and go through tryread to determine
252 				 * alive status.
253 				 */
254 				if (__predict_false(rv == 0))
255 					goto activity;
256 
257 				for (i = 0; i < rv; i++) {
258 					if (kev[i].filter == EVFILT_SIGNAL)
259 						dosig++;
260 				}
261 				if (dosig)
262 					goto cleanup;
263 
264 				/*
265 				 * ok, activity.  try to read a frame to
266 				 * determine what happens next.
267 				 */
268  activity:
269 #else /* USE_KQUEUE */
270 				struct pollfd pfd;
271 
272 				pfd.fd = clispc.spc_fd;
273 				pfd.events = POLLIN;
274 
275 				rv = host_poll(&pfd, 1, -1);
276 #endif /* !USE_KQUEUE */
277 
278 				switch (readframe(spc)) {
279 				case 0:
280 					continue;
281 				case -1:
282 					imalive = false;
283 					goto cleanup;
284 				default:
285 					/* case 1 */
286 					break;
287 				}
288 
289 				switch (spc->spc_hdr.rsp_class) {
290 				case RUMPSP_RESP:
291 				case RUMPSP_ERROR:
292 					kickwaiter(spc);
293 					gotresp = spc->spc_hdr.rsp_reqno ==
294 					    rw->rw_reqno;
295 					break;
296 				case RUMPSP_REQ:
297 					handlereq(spc);
298 					break;
299 				default:
300 					/* panic */
301 					break;
302 				}
303 			}
304 
305  cleanup:
306 			pthread_mutex_lock(&spc->spc_mtx);
307 			if (spc->spc_istatus == SPCSTATUS_WANTED)
308 				kickall(spc);
309 			spc->spc_istatus = SPCSTATUS_FREE;
310 
311 			/* take one for the team */
312 			if (dosig) {
313 				pthread_mutex_unlock(&spc->spc_mtx);
314 				pthread_sigmask(SIG_SETMASK, mask, NULL);
315 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
316 				pthread_mutex_lock(&spc->spc_mtx);
317 			}
318 		} else {
319 			spc->spc_istatus = SPCSTATUS_WANTED;
320 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
321 		}
322 	}
323 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
324 	pthread_mutex_unlock(&spc->spc_mtx);
325 	pthread_cond_destroy(&rw->rw_cv);
326 
327 	if (spc->spc_generation != mygen || !imalive) {
328 		return ENOTCONN;
329 	}
330 	return rw->rw_error;
331 }
332 
333 static int
334 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
335 	const void *data, size_t dlen, void **resp)
336 {
337 	struct rsp_hdr rhdr;
338 	struct respwait rw;
339 	struct iovec iov[2];
340 	int rv;
341 
342 	rhdr.rsp_len = sizeof(rhdr) + dlen;
343 	rhdr.rsp_class = RUMPSP_REQ;
344 	rhdr.rsp_type = RUMPSP_SYSCALL;
345 	rhdr.rsp_sysnum = sysnum;
346 
347 	IOVPUT(iov[0], rhdr);
348 	IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
349 
350 	do {
351 		putwait(spc, &rw, &rhdr);
352 		if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
353 			unputwait(spc, &rw);
354 			continue;
355 		}
356 
357 		rv = cliwaitresp(spc, &rw, omask, false);
358 		if (rv == ENOTCONN)
359 			rv = EAGAIN;
360 	} while (rv == EAGAIN);
361 
362 	*resp = rw.rw_data;
363 	return rv;
364 }
365 
366 static int
367 handshake_req(struct spclient *spc, int type, void *data,
368 	int cancel, bool haslock)
369 {
370 	struct handshake_fork rf;
371 	const char *myprogname = NULL; /* XXXgcc */
372 	struct rsp_hdr rhdr;
373 	struct respwait rw;
374 	sigset_t omask;
375 	size_t bonus;
376 	struct iovec iov[2];
377 	int rv;
378 
379 	if (type == HANDSHAKE_FORK) {
380 		bonus = sizeof(rf);
381 	} else {
382 #ifdef __NetBSD__
383 		/* would procfs work on NetBSD too? */
384 		myprogname = getprogname();
385 #else
386 		int fd = open("/proc/self/comm", O_RDONLY);
387 		if (fd == -1) {
388 			myprogname = "???";
389 		} else {
390 			static char commname[128];
391 
392 			memset(commname, 0, sizeof(commname));
393 			if (read(fd, commname, sizeof(commname)) > 0) {
394 				char *n;
395 
396 				n = strrchr(commname, '\n');
397 				if (n)
398 					*n = '\0';
399 				myprogname = commname;
400 			} else {
401 				myprogname = "???";
402 			}
403 			close(fd);
404 		}
405 #endif
406 		bonus = strlen(myprogname)+1;
407 	}
408 
409 	/* performs server handshake */
410 	rhdr.rsp_len = sizeof(rhdr) + bonus;
411 	rhdr.rsp_class = RUMPSP_REQ;
412 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
413 	rhdr.rsp_handshake = type;
414 
415 	IOVPUT(iov[0], rhdr);
416 
417 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
418 	if (haslock)
419 		putwait_locked(spc, &rw, &rhdr);
420 	else
421 		putwait(spc, &rw, &rhdr);
422 	if (type == HANDSHAKE_FORK) {
423 		memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
424 		rf.rf_cancel = cancel;
425 		IOVPUT(iov[1], rf);
426 	} else {
427 		IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
428 	}
429 	rv = send_with_recon(spc, iov, __arraycount(iov));
430 	if (rv || cancel) {
431 		if (haslock)
432 			unputwait_locked(spc, &rw);
433 		else
434 			unputwait(spc, &rw);
435 		if (cancel) {
436 			goto out;
437 		}
438 	} else {
439 		rv = cliwaitresp(spc, &rw, &omask, haslock);
440 	}
441 	if (rv)
442 		goto out;
443 
444 	rv = *(int *)rw.rw_data;
445 	free(rw.rw_data);
446 
447  out:
448 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
449 	return rv;
450 }
451 
452 static int
453 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
454 {
455 	struct rsp_hdr rhdr;
456 	struct respwait rw;
457 	struct iovec iov[1];
458 	int rv;
459 
460 	rhdr.rsp_len = sizeof(rhdr);
461 	rhdr.rsp_class = RUMPSP_REQ;
462 	rhdr.rsp_type = RUMPSP_PREFORK;
463 	rhdr.rsp_error = 0;
464 
465 	IOVPUT(iov[0], rhdr);
466 
467 	do {
468 		putwait(spc, &rw, &rhdr);
469 		rv = send_with_recon(spc, iov, __arraycount(iov));
470 		if (rv != 0) {
471 			unputwait(spc, &rw);
472 			continue;
473 		}
474 
475 		rv = cliwaitresp(spc, &rw, omask, false);
476 		if (rv == ENOTCONN)
477 			rv = EAGAIN;
478 	} while (rv == EAGAIN);
479 
480 	*resp = rw.rw_data;
481 	return rv;
482 }
483 
484 /*
485  * prevent response code from deadlocking with reconnect code
486  */
487 static int
488 resp_sendlock(struct spclient *spc)
489 {
490 	int rv = 0;
491 
492 	pthread_mutex_lock(&spc->spc_mtx);
493 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
494 		if (__predict_false(spc->spc_reconnecting)) {
495 			rv = EBUSY;
496 			goto out;
497 		}
498 		spc->spc_ostatus = SPCSTATUS_WANTED;
499 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
500 	}
501 	spc->spc_ostatus = SPCSTATUS_BUSY;
502 
503  out:
504 	pthread_mutex_unlock(&spc->spc_mtx);
505 	return rv;
506 }
507 
508 static void
509 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
510 	int wantstr)
511 {
512 	struct rsp_hdr rhdr;
513 	struct iovec iov[2];
514 
515 	if (wantstr)
516 		dlen = MIN(dlen, strlen(data)+1);
517 
518 	rhdr.rsp_len = sizeof(rhdr) + dlen;
519 	rhdr.rsp_reqno = reqno;
520 	rhdr.rsp_class = RUMPSP_RESP;
521 	rhdr.rsp_type = RUMPSP_COPYIN;
522 	rhdr.rsp_sysnum = 0;
523 
524 	IOVPUT(iov[0], rhdr);
525 	IOVPUT_WITHSIZE(iov[1], data, dlen);
526 
527 	if (resp_sendlock(spc) != 0)
528 		return;
529 	(void)SENDIOV(spc, iov);
530 	sendunlock(spc);
531 }
532 
533 static void
534 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
535 {
536 	struct rsp_hdr rhdr;
537 	struct iovec iov[2];
538 
539 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
540 	rhdr.rsp_reqno = reqno;
541 	rhdr.rsp_class = RUMPSP_RESP;
542 	rhdr.rsp_type = RUMPSP_ANONMMAP;
543 	rhdr.rsp_sysnum = 0;
544 
545 	IOVPUT(iov[0], rhdr);
546 	IOVPUT(iov[1], addr);
547 
548 	if (resp_sendlock(spc) != 0)
549 		return;
550 	(void)SENDIOV(spc, iov);
551 	sendunlock(spc);
552 }
553 
554 int
555 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
556 	register_t *retval)
557 {
558 	struct rsp_sysresp *resp;
559 	sigset_t omask;
560 	void *rdata;
561 	int rv;
562 
563 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
564 
565 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
566 	    sysnum, data, dlen));
567 
568 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
569 	if (rv)
570 		goto out;
571 
572 	resp = rdata;
573 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
574 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
575 
576 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
577 	rv = resp->rsys_error;
578 	free(rdata);
579 
580  out:
581 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
582 	return rv;
583 }
584 
585 static void
586 handlereq(struct spclient *spc)
587 {
588 	struct rsp_copydata *copydata;
589 	struct rsp_hdr *rhdr = &spc->spc_hdr;
590 	void *mapaddr;
591 	size_t maplen;
592 	int reqtype = spc->spc_hdr.rsp_type;
593 
594 	switch (reqtype) {
595 	case RUMPSP_COPYIN:
596 	case RUMPSP_COPYINSTR:
597 		/*LINTED*/
598 		copydata = (struct rsp_copydata *)spc->spc_buf;
599 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
600 		    copydata->rcp_addr, copydata->rcp_len));
601 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
602 		    copydata->rcp_addr, copydata->rcp_len,
603 		    reqtype == RUMPSP_COPYINSTR);
604 		break;
605 	case RUMPSP_COPYOUT:
606 	case RUMPSP_COPYOUTSTR:
607 		/*LINTED*/
608 		copydata = (struct rsp_copydata *)spc->spc_buf;
609 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
610 		    copydata->rcp_addr, copydata->rcp_len));
611 		/*LINTED*/
612 		memcpy(copydata->rcp_addr, copydata->rcp_data,
613 		    copydata->rcp_len);
614 		break;
615 	case RUMPSP_ANONMMAP:
616 		/*LINTED*/
617 		maplen = *(size_t *)spc->spc_buf;
618 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
619 		    MAP_ANON, -1, 0);
620 		if (mapaddr == MAP_FAILED)
621 			mapaddr = NULL;
622 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
623 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
624 		break;
625 	case RUMPSP_RAISE:
626 		DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
627 		raise((int)rhdr->rsp_signo);
628 		/*
629 		 * We most likely have signals blocked, but the signal
630 		 * will be handled soon enough when we return.
631 		 */
632 		break;
633 	default:
634 		printf("PANIC: INVALID TYPE %d\n", reqtype);
635 		abort();
636 		break;
637 	}
638 
639 	spcfreebuf(spc);
640 }
641 
642 static unsigned ptab_idx;
643 static struct sockaddr *serv_sa;
644 
645 /* dup until we get a "good" fd which does not collide with stdio */
646 static int
647 dupgood(int myfd, int mustchange)
648 {
649 	int ofds[4];
650 	int sverrno;
651 	unsigned int i;
652 
653 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
654 		assert(i < __arraycount(ofds));
655 		ofds[i] = myfd;
656 		myfd = host_dup(myfd);
657 		if (mustchange) {
658 			i--; /* prevent closing old fd */
659 			mustchange = 0;
660 		}
661 	}
662 
663 	sverrno = 0;
664 	if (myfd == -1 && i > 0)
665 		sverrno = errno;
666 
667 	while (i-- > 0) {
668 		host_close(ofds[i]);
669 	}
670 
671 	if (sverrno)
672 		errno = sverrno;
673 
674 	return myfd;
675 }
676 
677 static int
678 doconnect(void)
679 {
680 	struct respwait rw;
681 	struct rsp_hdr rhdr;
682 	char banner[MAXBANNER];
683 	int s, error, flags;
684 	ssize_t n;
685 
686 	if (kq != -1)
687 		host_close(kq);
688 	kq = -1;
689 	s = -1;
690 
691 	if (clispc.spc_fd != -1)
692 		host_close(clispc.spc_fd);
693 	clispc.spc_fd = -1;
694 
695 	/*
696 	 * for reconnect, gate everyone out of the receiver code
697 	 */
698 	putwait_locked(&clispc, &rw, &rhdr);
699 
700 	pthread_mutex_lock(&clispc.spc_mtx);
701 	clispc.spc_reconnecting = 1;
702 	pthread_cond_broadcast(&clispc.spc_cv);
703 	clispc.spc_generation++;
704 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
705 		clispc.spc_istatus = SPCSTATUS_WANTED;
706 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
707 	}
708 	kickall(&clispc);
709 
710 	/*
711 	 * we can release it already since we hold the
712 	 * send lock during reconnect
713 	 * XXX: assert it
714 	 */
715 	clispc.spc_istatus = SPCSTATUS_FREE;
716 	pthread_mutex_unlock(&clispc.spc_mtx);
717 	unputwait_locked(&clispc, &rw);
718 
719 	free(clispc.spc_buf);
720 	clispc.spc_off = 0;
721 
722 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
723 	if (s == -1)
724 		return -1;
725 
726 	while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
727 		if (errno == EINTR)
728 			continue;
729 		ERRLOG(("rump_sp: client connect failed: %s\n",
730 		    strerror(errno)));
731 		return -1;
732 	}
733 
734 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
735 		ERRLOG(("rump_sp: connect hook failed\n"));
736 		return -1;
737 	}
738 
739 	if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
740 		ERRLOG(("rump_sp: failed to read banner\n"));
741 		return -1;
742 	}
743 
744 	if (banner[n-1] != '\n') {
745 		ERRLOG(("rump_sp: invalid banner\n"));
746 		return -1;
747 	}
748 	banner[n] = '\0';
749 	/* XXX parse the banner some day */
750 
751 	flags = host_fcntl(s, F_GETFL, 0);
752 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
753 		ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
754 		return -1;
755 	}
756 	clispc.spc_fd = s;
757 	clispc.spc_state = SPCSTATE_RUNNING;
758 	clispc.spc_reconnecting = 0;
759 
760 #ifdef USE_KQUEUE
761 {
762 	struct kevent kev[NSIG+1];
763 	int i;
764 
765 	/* setup kqueue, we want all signals and the fd */
766 	if ((kq = dupgood(host_kqueue(), 0)) == -1) {
767 		ERRLOG(("rump_sp: cannot setup kqueue"));
768 		return -1;
769 	}
770 
771 	for (i = 0; i < NSIG; i++) {
772 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
773 	}
774 	EV_SET(&kev[NSIG], clispc.spc_fd,
775 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
776 	if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
777 		ERRLOG(("rump_sp: kevent() failed"));
778 		return -1;
779 	}
780 }
781 #endif /* USE_KQUEUE */
782 
783 	return 0;
784 }
785 
786 static int
787 doinit(void)
788 {
789 
790 	TAILQ_INIT(&clispc.spc_respwait);
791 	pthread_mutex_init(&clispc.spc_mtx, NULL);
792 	pthread_cond_init(&clispc.spc_cv, NULL);
793 
794 	return 0;
795 }
796 
797 #ifdef RTLD_NEXT
798 void *rumpclient__dlsym(void *, const char *);
799 void *
800 rumpclient__dlsym(void *handle, const char *symbol)
801 {
802 
803 	return dlsym(handle, symbol);
804 }
805 void *rumphijack_dlsym(void *, const char *)
806     __attribute__((__weak__, alias("rumpclient__dlsym")));
807 #endif
808 
809 static pid_t init_done = 0;
810 
811 int
812 rumpclient_init(void)
813 {
814 	char *p;
815 	int error;
816 	int rv = -1;
817 	int hstype;
818 	pid_t mypid;
819 
820 	/*
821 	 * Make sure we're not riding the context of a previous
822 	 * host fork.  Note: it's *possible* that after n>1 forks
823 	 * we have the same pid as one of our exited parents, but
824 	 * I'm pretty sure there are 0 practical implications, since
825 	 * it means generations would have to skip rumpclient init.
826 	 */
827 	if (init_done == (mypid = getpid()))
828 		return 0;
829 
830 	/* kq does not traverse fork() */
831 	if (init_done != 0)
832 		kq = -1;
833 	init_done = mypid;
834 
835 	sigfillset(&fullset);
836 
837 	/*
838 	 * sag mir, wo die symbols sind.  zogen fort, der krieg beginnt.
839 	 * wann wird man je verstehen?  wann wird man je verstehen?
840 	 */
841 #ifdef RTLD_NEXT
842 #define FINDSYM2(_name_,_syscall_)					\
843 	if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT,		\
844 	    #_syscall_)) == NULL) {					\
845 		if (rumphijack_dlsym == rumpclient__dlsym)		\
846 			host_##_name_ = _name_; /* static fallback */	\
847 		if (host_##_name_ == NULL) {				\
848 			fprintf(stderr,"cannot find %s: %s", #_syscall_,\
849 			    dlerror());					\
850 			exit(1);					\
851 		}							\
852 	}
853 #else
854 #define FINDSYM2(_name_,_syscall)					\
855 	host_##_name_ = _name_;
856 #endif
857 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
858 #ifdef __NetBSD__
859 	FINDSYM2(socket,__socket30)
860 #else
861 	FINDSYM(socket)
862 #endif
863 
864 	FINDSYM(close)
865 	FINDSYM(connect)
866 	FINDSYM(fcntl)
867 	FINDSYM(poll)
868 	FINDSYM(read)
869 	FINDSYM(sendmsg)
870 	FINDSYM(setsockopt)
871 	FINDSYM(dup)
872 	FINDSYM(execve)
873 
874 #ifdef USE_KQUEUE
875 	FINDSYM(kqueue)
876 #if !__NetBSD_Prereq__(5,99,7)
877 	FINDSYM(kevent)
878 #else
879 	FINDSYM2(kevent,_sys___kevent50)
880 #endif
881 #endif /* USE_KQUEUE */
882 
883 #undef	FINDSYM
884 #undef	FINDSY2
885 
886 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
887 		if ((p = getenv("RUMP_SERVER")) == NULL) {
888 			fprintf(stderr, "error: RUMP_SERVER not set\n");
889 			errno = ENOENT;
890 			goto out;
891 		}
892 	}
893 
894 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
895 		errno = error;
896 		goto out;
897 	}
898 
899 	if (doinit() == -1)
900 		goto out;
901 
902 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
903 		sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
904 		unsetenv("RUMPCLIENT__EXECFD");
905 		hstype = HANDSHAKE_EXEC;
906 	} else {
907 		if (doconnect() == -1)
908 			goto out;
909 		hstype = HANDSHAKE_GUEST;
910 	}
911 
912 	error = handshake_req(&clispc, hstype, NULL, 0, false);
913 	if (error) {
914 		pthread_mutex_destroy(&clispc.spc_mtx);
915 		pthread_cond_destroy(&clispc.spc_cv);
916 		if (clispc.spc_fd != -1)
917 			host_close(clispc.spc_fd);
918 		errno = error;
919 		goto out;
920 	}
921 	rv = 0;
922 
923  out:
924 	if (rv == -1)
925 		init_done = 0;
926 	return rv;
927 }
928 
929 struct rumpclient_fork {
930 	uint32_t fork_auth[AUTHLEN];
931 	struct spclient fork_spc;
932 	int fork_kq;
933 };
934 
935 struct rumpclient_fork *
936 rumpclient_prefork(void)
937 {
938 	struct rumpclient_fork *rpf;
939 	sigset_t omask;
940 	void *resp;
941 	int rv;
942 
943 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
944 	rpf = malloc(sizeof(*rpf));
945 	if (rpf == NULL)
946 		goto out;
947 
948 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
949 		free(rpf);
950 		errno = rv;
951 		rpf = NULL;
952 		goto out;
953 	}
954 
955 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
956 	free(resp);
957 
958 	rpf->fork_spc = clispc;
959 	rpf->fork_kq = kq;
960 
961  out:
962 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
963 	return rpf;
964 }
965 
966 int
967 rumpclient_fork_init(struct rumpclient_fork *rpf)
968 {
969 	int error;
970 	int osock;
971 
972 	osock = clispc.spc_fd;
973 	memset(&clispc, 0, sizeof(clispc));
974 	clispc.spc_fd = osock;
975 
976 	kq = -1; /* kqueue descriptor is not copied over fork() */
977 
978 	if (doinit() == -1)
979 		return -1;
980 	if (doconnect() == -1)
981 		return -1;
982 
983 	error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
984 	    0, false);
985 	if (error) {
986 		pthread_mutex_destroy(&clispc.spc_mtx);
987 		pthread_cond_destroy(&clispc.spc_cv);
988 		errno = error;
989 		return -1;
990 	}
991 
992 	return 0;
993 }
994 
995 /*ARGSUSED*/
996 void
997 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
998 {
999 
1000 	/* EUNIMPL */
1001 }
1002 
1003 void
1004 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
1005 {
1006 
1007 	clispc = rpf->fork_spc;
1008 	kq = rpf->fork_kq;
1009 }
1010 
1011 void
1012 rumpclient_setconnretry(time_t timeout)
1013 {
1014 
1015 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
1016 		return; /* gigo */
1017 
1018 	retrytimo = timeout;
1019 }
1020 
1021 int
1022 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
1023 {
1024 	int fd = *fdp;
1025 	int untilfd, rv;
1026 	int newfd;
1027 
1028 	switch (variant) {
1029 	case RUMPCLIENT_CLOSE_FCLOSEM:
1030 		untilfd = MAX(clispc.spc_fd, kq);
1031 		for (; fd <= untilfd; fd++) {
1032 			if (fd == clispc.spc_fd || fd == kq)
1033 				continue;
1034 			rv = host_close(fd);
1035 			if (rv == -1)
1036 				return -1;
1037 		}
1038 		*fdp = fd;
1039 		break;
1040 
1041 	case RUMPCLIENT_CLOSE_CLOSE:
1042 	case RUMPCLIENT_CLOSE_DUP2:
1043 		if (fd == clispc.spc_fd) {
1044 			newfd = dupgood(clispc.spc_fd, 1);
1045 			if (newfd == -1)
1046 				return -1;
1047 
1048 #ifdef USE_KQUEUE
1049 			{
1050 			struct kevent kev[2];
1051 
1052 			/*
1053 			 * now, we have a new socket number, so change
1054 			 * the file descriptor that kqueue is
1055 			 * monitoring.  remove old and add new.
1056 			 */
1057 			EV_SET(&kev[0], clispc.spc_fd,
1058 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
1059 			EV_SET(&kev[1], newfd,
1060 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1061 			if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
1062 				int sverrno = errno;
1063 				host_close(newfd);
1064 				errno = sverrno;
1065 				return -1;
1066 			}
1067 			clispc.spc_fd = newfd;
1068 			}
1069 		}
1070 		if (fd == kq) {
1071 			newfd = dupgood(kq, 1);
1072 			if (newfd == -1)
1073 				return -1;
1074 			kq = newfd;
1075 #else /* USE_KQUEUE */
1076 			clispc.spc_fd = newfd;
1077 #endif /* !USE_KQUEUE */
1078 		}
1079 		break;
1080 	}
1081 
1082 	return 0;
1083 }
1084 
1085 pid_t
1086 rumpclient_fork(void)
1087 {
1088 
1089 	return rumpclient__dofork(fork);
1090 }
1091 
1092 /*
1093  * Process is about to exec.  Save info about our existing connection
1094  * in the env.  rumpclient will check for this info in init().
1095  * This is mostly for the benefit of rumphijack, but regular applications
1096  * may use it as well.
1097  */
1098 int
1099 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
1100 {
1101 	char buf[4096];
1102 	char **newenv;
1103 	char *envstr, *envstr2;
1104 	size_t nelem;
1105 	int rv, sverrno;
1106 
1107 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
1108 	    clispc.spc_fd, kq);
1109 	envstr = malloc(strlen(buf)+1);
1110 	if (envstr == NULL) {
1111 		return ENOMEM;
1112 	}
1113 	strcpy(envstr, buf);
1114 
1115 	/* do we have a fully parsed url we want to forward in the env? */
1116 	if (*parsedurl != '\0') {
1117 		snprintf(buf, sizeof(buf),
1118 		    "RUMP__PARSEDSERVER=%s", parsedurl);
1119 		envstr2 = malloc(strlen(buf)+1);
1120 		if (envstr2 == NULL) {
1121 			free(envstr);
1122 			return ENOMEM;
1123 		}
1124 		strcpy(envstr2, buf);
1125 	} else {
1126 		envstr2 = NULL;
1127 	}
1128 
1129 	for (nelem = 0; envp && envp[nelem]; nelem++)
1130 		continue;
1131 
1132 	newenv = malloc(sizeof(*newenv) * (nelem+3));
1133 	if (newenv == NULL) {
1134 		free(envstr2);
1135 		free(envstr);
1136 		return ENOMEM;
1137 	}
1138 	memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1139 
1140 	newenv[nelem] = envstr;
1141 	newenv[nelem+1] = envstr2;
1142 	newenv[nelem+2] = NULL;
1143 
1144 	rv = host_execve(path, argv, newenv);
1145 
1146 	_DIAGASSERT(rv != 0);
1147 	sverrno = errno;
1148 	free(envstr2);
1149 	free(envstr);
1150 	free(newenv);
1151 	errno = sverrno;
1152 	return rv;
1153 }
1154 
1155 /*
1156  * daemon() is handwritten for the benefit of platforms which
1157  * do not support daemon().
1158  */
1159 int
1160 rumpclient_daemon(int nochdir, int noclose)
1161 {
1162 	struct rumpclient_fork *rf;
1163 	int sverrno;
1164 
1165 	if ((rf = rumpclient_prefork()) == NULL)
1166 		return -1;
1167 
1168 	switch (fork()) {
1169 	case 0:
1170 		break;
1171 	case -1:
1172 		goto daemonerr;
1173 	default:
1174 		_exit(0);
1175 	}
1176 
1177 	if (setsid() == -1)
1178 		goto daemonerr;
1179 	if (!nochdir && chdir("/") == -1)
1180 		goto daemonerr;
1181 	if (!noclose) {
1182 		int fd = open("/dev/null", O_RDWR);
1183 		dup2(fd, 0);
1184 		dup2(fd, 1);
1185 		dup2(fd, 2);
1186 		if (fd > 2)
1187 			close(fd);
1188 	}
1189 
1190 	/* note: fork is either completed or cancelled by the call */
1191 	if (rumpclient_fork_init(rf) == -1)
1192 		return -1;
1193 
1194 	return 0;
1195 
1196  daemonerr:
1197 	sverrno = errno;
1198 	rumpclient_fork_cancel(rf);
1199 	errno = sverrno;
1200 	return -1;
1201 }
1202