xref: /netbsd-src/lib/librumpclient/rumpclient.c (revision 48fb7bfab72acd4281a53bbee5ccf3f809019e75)
1 /*      $NetBSD: rumpclient.c,v 1.57 2014/02/26 02:03:40 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Client side routines for rump syscall proxy.
30  */
31 
32 #include <rump/rumpuser_port.h>
33 
34 /*
35  * We use kqueue on NetBSD, poll elsewhere.  Theoretically we could
36  * use kqueue on other BSD's too, but I haven't tested those.  We
37  * want to use kqueue because it will give us the ability to get signal
38  * notifications but defer their handling to a stage where we do not
39  * hold the communication lock.  Taking a signal while holding on to
40  * that lock may cause a deadlock.  Therefore, block signals throughout
41  * the RPC when using poll.  This unfortunately means that the normal
42  * SIGINT way of stopping a process while it is undergoing rump kernel
43  * RPC will not work.  If anyone know which Linux system call handles
44  * the above scenario correctly, I'm all ears.
45  */
46 
47 #ifdef __NetBSD__
48 #define USE_KQUEUE
49 #endif
50 
51 __RCSID("$NetBSD: rumpclient.c,v 1.57 2014/02/26 02:03:40 pooka Exp $");
52 
53 #include <sys/param.h>
54 #include <sys/mman.h>
55 #include <sys/socket.h>
56 #include <sys/time.h>
57 
58 #ifdef USE_KQUEUE
59 #include <sys/event.h>
60 #endif
61 
62 #include <arpa/inet.h>
63 #include <netinet/in.h>
64 #include <netinet/tcp.h>
65 
66 #include <assert.h>
67 #include <dlfcn.h>
68 #include <errno.h>
69 #include <fcntl.h>
70 #include <poll.h>
71 #include <pthread.h>
72 #include <signal.h>
73 #include <stdarg.h>
74 #include <stdbool.h>
75 #include <stdio.h>
76 #include <stdlib.h>
77 #include <string.h>
78 #include <unistd.h>
79 
80 #include <rump/rumpclient.h>
81 
82 #define HOSTOPS
83 int	(*host_socket)(int, int, int);
84 int	(*host_close)(int);
85 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
86 int	(*host_fcntl)(int, int, ...);
87 int	(*host_poll)(struct pollfd *, nfds_t, int);
88 ssize_t	(*host_read)(int, void *, size_t);
89 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
90 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
91 int	(*host_dup)(int);
92 
93 #ifdef USE_KQUEUE
94 int	(*host_kqueue)(void);
95 int	(*host_kevent)(int, const struct kevent *, size_t,
96 		       struct kevent *, size_t, const struct timespec *);
97 #endif
98 
99 int	(*host_execve)(const char *, char *const[], char *const[]);
100 
101 #include "sp_common.c"
102 #include "rumpuser_sigtrans.c"
103 
104 static struct spclient clispc = {
105 	.spc_fd = -1,
106 };
107 
108 static int kq = -1;
109 static sigset_t fullset;
110 
111 static int doconnect(void);
112 static int handshake_req(struct spclient *, int, void *, int, bool);
113 
114 /*
115  * Default: don't retry.  Most clients can't handle it
116  * (consider e.g. fds suddenly going missing).
117  */
118 static time_t retrytimo = 0;
119 
120 /* always defined to nothingness for now */
121 #define ERRLOG(a)
122 
123 static int
124 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
125 {
126 	struct timeval starttime, curtime;
127 	time_t prevreconmsg;
128 	unsigned reconretries;
129 	int rv;
130 
131 	for (prevreconmsg = 0, reconretries = 0;;) {
132 		rv = dosend(spc, iov, iovlen);
133 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
134 			/* no persistent connections */
135 			if (retrytimo == 0) {
136 				rv = ENOTCONN;
137 				break;
138 			}
139 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
140 				_exit(1);
141 
142 			if (!prevreconmsg) {
143 				prevreconmsg = time(NULL);
144 				gettimeofday(&starttime, NULL);
145 			}
146 			if (reconretries == 1) {
147 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
148 					rv = ENOTCONN;
149 					break;
150 				}
151 				fprintf(stderr, "rump_sp: connection to "
152 				    "kernel lost, trying to reconnect ...\n");
153 			} else if (time(NULL) - prevreconmsg > 120) {
154 				fprintf(stderr, "rump_sp: still trying to "
155 				    "reconnect ...\n");
156 				prevreconmsg = time(NULL);
157 			}
158 
159 			/* check that we aren't over the limit */
160 			if (retrytimo > 0) {
161 				time_t tdiff;
162 
163 				gettimeofday(&curtime, NULL);
164 				tdiff = curtime.tv_sec - starttime.tv_sec;
165 				if (starttime.tv_usec > curtime.tv_usec)
166 					tdiff--;
167 				if (tdiff >= retrytimo) {
168 					fprintf(stderr, "rump_sp: reconnect "
169 					    "failed, %lld second timeout\n",
170 					    (long long)retrytimo);
171 					return ENOTCONN;
172 				}
173 			}
174 
175 			/* adhoc backoff timer */
176 			if (reconretries < 10) {
177 				usleep(100000 * reconretries);
178 			} else {
179 				sleep(MIN(10, reconretries-9));
180 			}
181 			reconretries++;
182 
183 			if ((rv = doconnect()) != 0)
184 				continue;
185 			if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
186 			    NULL, 0, true)) != 0)
187 				continue;
188 
189 			/*
190 			 * ok, reconnect succesful.  we need to return to
191 			 * the upper layer to get the entire PDU resent.
192 			 */
193 			if (reconretries != 1)
194 				fprintf(stderr, "rump_sp: reconnected!\n");
195 			rv = EAGAIN;
196 			break;
197 		} else {
198 			_DIAGASSERT(errno != EAGAIN);
199 			break;
200 		}
201 	}
202 
203 	return rv;
204 }
205 
206 static int
207 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
208 	bool keeplock)
209 {
210 	uint64_t mygen;
211 	bool imalive = true;
212 
213 	pthread_mutex_lock(&spc->spc_mtx);
214 	if (!keeplock)
215 		sendunlockl(spc);
216 	mygen = spc->spc_generation;
217 
218 	rw->rw_error = 0;
219 	while (!rw->rw_done && rw->rw_error == 0) {
220 		if (__predict_false(spc->spc_generation != mygen || !imalive))
221 			break;
222 
223 		/* are we free to receive? */
224 		if (spc->spc_istatus == SPCSTATUS_FREE) {
225 			int gotresp, dosig, rv;
226 
227 			spc->spc_istatus = SPCSTATUS_BUSY;
228 			pthread_mutex_unlock(&spc->spc_mtx);
229 
230 			dosig = 0;
231 			for (gotresp = 0; !gotresp; ) {
232 #ifdef USE_KQUEUE
233 				struct kevent kev[8];
234 				int i;
235 
236 				/*
237 				 * typically we don't have a frame waiting
238 				 * when we come in here, so call kevent now
239 				 */
240 				rv = host_kevent(kq, NULL, 0,
241 				    kev, __arraycount(kev), NULL);
242 
243 				if (__predict_false(rv == -1)) {
244 					goto activity;
245 				}
246 
247 				/*
248 				 * XXX: don't know how this can happen
249 				 * (timeout cannot expire since there
250 				 * isn't one), but it does happen.
251 				 * treat it as an expectional condition
252 				 * and go through tryread to determine
253 				 * alive status.
254 				 */
255 				if (__predict_false(rv == 0))
256 					goto activity;
257 
258 				for (i = 0; i < rv; i++) {
259 					if (kev[i].filter == EVFILT_SIGNAL)
260 						dosig++;
261 				}
262 				if (dosig)
263 					goto cleanup;
264 
265 				/*
266 				 * ok, activity.  try to read a frame to
267 				 * determine what happens next.
268 				 */
269  activity:
270 #else /* USE_KQUEUE */
271 				struct pollfd pfd;
272 
273 				pfd.fd = clispc.spc_fd;
274 				pfd.events = POLLIN;
275 
276 				rv = host_poll(&pfd, 1, -1);
277 #endif /* !USE_KQUEUE */
278 
279 				switch (readframe(spc)) {
280 				case 0:
281 					continue;
282 				case -1:
283 					imalive = false;
284 					goto cleanup;
285 				default:
286 					/* case 1 */
287 					break;
288 				}
289 
290 				switch (spc->spc_hdr.rsp_class) {
291 				case RUMPSP_RESP:
292 				case RUMPSP_ERROR:
293 					kickwaiter(spc);
294 					gotresp = spc->spc_hdr.rsp_reqno ==
295 					    rw->rw_reqno;
296 					break;
297 				case RUMPSP_REQ:
298 					handlereq(spc);
299 					break;
300 				default:
301 					/* panic */
302 					break;
303 				}
304 			}
305 
306  cleanup:
307 			pthread_mutex_lock(&spc->spc_mtx);
308 			if (spc->spc_istatus == SPCSTATUS_WANTED)
309 				kickall(spc);
310 			spc->spc_istatus = SPCSTATUS_FREE;
311 
312 			/* take one for the team */
313 			if (dosig) {
314 				pthread_mutex_unlock(&spc->spc_mtx);
315 				pthread_sigmask(SIG_SETMASK, mask, NULL);
316 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
317 				pthread_mutex_lock(&spc->spc_mtx);
318 			}
319 		} else {
320 			spc->spc_istatus = SPCSTATUS_WANTED;
321 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
322 		}
323 	}
324 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
325 	pthread_mutex_unlock(&spc->spc_mtx);
326 	pthread_cond_destroy(&rw->rw_cv);
327 
328 	if (spc->spc_generation != mygen || !imalive) {
329 		return ENOTCONN;
330 	}
331 	return rw->rw_error;
332 }
333 
334 static int
335 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
336 	const void *data, size_t dlen, void **resp)
337 {
338 	struct rsp_hdr rhdr;
339 	struct respwait rw;
340 	struct iovec iov[2];
341 	int rv;
342 
343 	rhdr.rsp_len = sizeof(rhdr) + dlen;
344 	rhdr.rsp_class = RUMPSP_REQ;
345 	rhdr.rsp_type = RUMPSP_SYSCALL;
346 	rhdr.rsp_sysnum = sysnum;
347 
348 	IOVPUT(iov[0], rhdr);
349 	IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
350 
351 	do {
352 		putwait(spc, &rw, &rhdr);
353 		if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
354 			unputwait(spc, &rw);
355 			continue;
356 		}
357 
358 		rv = cliwaitresp(spc, &rw, omask, false);
359 		if (rv == ENOTCONN)
360 			rv = EAGAIN;
361 	} while (rv == EAGAIN);
362 
363 	*resp = rw.rw_data;
364 	return rv;
365 }
366 
367 static int
368 handshake_req(struct spclient *spc, int type, void *data,
369 	int cancel, bool haslock)
370 {
371 	struct handshake_fork rf;
372 	const char *myprogname = NULL; /* XXXgcc */
373 	struct rsp_hdr rhdr;
374 	struct respwait rw;
375 	sigset_t omask;
376 	size_t bonus;
377 	struct iovec iov[2];
378 	int rv;
379 
380 	if (type == HANDSHAKE_FORK) {
381 		bonus = sizeof(rf);
382 	} else {
383 #ifdef __NetBSD__
384 		/* would procfs work on NetBSD too? */
385 		myprogname = getprogname();
386 #else
387 		int fd = open("/proc/self/comm", O_RDONLY);
388 		if (fd == -1) {
389 			myprogname = "???";
390 		} else {
391 			static char commname[128];
392 
393 			memset(commname, 0, sizeof(commname));
394 			if (read(fd, commname, sizeof(commname)) > 0) {
395 				char *n;
396 
397 				n = strrchr(commname, '\n');
398 				if (n)
399 					*n = '\0';
400 				myprogname = commname;
401 			} else {
402 				myprogname = "???";
403 			}
404 			close(fd);
405 		}
406 #endif
407 		bonus = strlen(myprogname)+1;
408 	}
409 
410 	/* performs server handshake */
411 	rhdr.rsp_len = sizeof(rhdr) + bonus;
412 	rhdr.rsp_class = RUMPSP_REQ;
413 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
414 	rhdr.rsp_handshake = type;
415 
416 	IOVPUT(iov[0], rhdr);
417 
418 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
419 	if (haslock)
420 		putwait_locked(spc, &rw, &rhdr);
421 	else
422 		putwait(spc, &rw, &rhdr);
423 	if (type == HANDSHAKE_FORK) {
424 		memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
425 		rf.rf_cancel = cancel;
426 		IOVPUT(iov[1], rf);
427 	} else {
428 		IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
429 	}
430 	rv = send_with_recon(spc, iov, __arraycount(iov));
431 	if (rv || cancel) {
432 		if (haslock)
433 			unputwait_locked(spc, &rw);
434 		else
435 			unputwait(spc, &rw);
436 		if (cancel) {
437 			goto out;
438 		}
439 	} else {
440 		rv = cliwaitresp(spc, &rw, &omask, haslock);
441 	}
442 	if (rv)
443 		goto out;
444 
445 	rv = *(int *)rw.rw_data;
446 	free(rw.rw_data);
447 
448  out:
449 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
450 	return rv;
451 }
452 
453 static int
454 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
455 {
456 	struct rsp_hdr rhdr;
457 	struct respwait rw;
458 	struct iovec iov[1];
459 	int rv;
460 
461 	rhdr.rsp_len = sizeof(rhdr);
462 	rhdr.rsp_class = RUMPSP_REQ;
463 	rhdr.rsp_type = RUMPSP_PREFORK;
464 	rhdr.rsp_error = 0;
465 
466 	IOVPUT(iov[0], rhdr);
467 
468 	do {
469 		putwait(spc, &rw, &rhdr);
470 		rv = send_with_recon(spc, iov, __arraycount(iov));
471 		if (rv != 0) {
472 			unputwait(spc, &rw);
473 			continue;
474 		}
475 
476 		rv = cliwaitresp(spc, &rw, omask, false);
477 		if (rv == ENOTCONN)
478 			rv = EAGAIN;
479 	} while (rv == EAGAIN);
480 
481 	*resp = rw.rw_data;
482 	return rv;
483 }
484 
485 /*
486  * prevent response code from deadlocking with reconnect code
487  */
488 static int
489 resp_sendlock(struct spclient *spc)
490 {
491 	int rv = 0;
492 
493 	pthread_mutex_lock(&spc->spc_mtx);
494 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
495 		if (__predict_false(spc->spc_reconnecting)) {
496 			rv = EBUSY;
497 			goto out;
498 		}
499 		spc->spc_ostatus = SPCSTATUS_WANTED;
500 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
501 	}
502 	spc->spc_ostatus = SPCSTATUS_BUSY;
503 
504  out:
505 	pthread_mutex_unlock(&spc->spc_mtx);
506 	return rv;
507 }
508 
509 static void
510 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
511 	int wantstr)
512 {
513 	struct rsp_hdr rhdr;
514 	struct iovec iov[2];
515 
516 	if (wantstr)
517 		dlen = MIN(dlen, strlen(data)+1);
518 
519 	rhdr.rsp_len = sizeof(rhdr) + dlen;
520 	rhdr.rsp_reqno = reqno;
521 	rhdr.rsp_class = RUMPSP_RESP;
522 	rhdr.rsp_type = RUMPSP_COPYIN;
523 	rhdr.rsp_sysnum = 0;
524 
525 	IOVPUT(iov[0], rhdr);
526 	IOVPUT_WITHSIZE(iov[1], data, dlen);
527 
528 	if (resp_sendlock(spc) != 0)
529 		return;
530 	(void)SENDIOV(spc, iov);
531 	sendunlock(spc);
532 }
533 
534 static void
535 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
536 {
537 	struct rsp_hdr rhdr;
538 	struct iovec iov[2];
539 
540 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
541 	rhdr.rsp_reqno = reqno;
542 	rhdr.rsp_class = RUMPSP_RESP;
543 	rhdr.rsp_type = RUMPSP_ANONMMAP;
544 	rhdr.rsp_sysnum = 0;
545 
546 	IOVPUT(iov[0], rhdr);
547 	IOVPUT(iov[1], addr);
548 
549 	if (resp_sendlock(spc) != 0)
550 		return;
551 	(void)SENDIOV(spc, iov);
552 	sendunlock(spc);
553 }
554 
555 int
556 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
557 	register_t *retval)
558 {
559 	struct rsp_sysresp *resp;
560 	sigset_t omask;
561 	void *rdata;
562 	int rv;
563 
564 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
565 
566 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
567 	    sysnum, data, dlen));
568 
569 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
570 	if (rv)
571 		goto out;
572 
573 	resp = rdata;
574 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
575 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
576 
577 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
578 	rv = resp->rsys_error;
579 	free(rdata);
580 
581  out:
582 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
583 	return rv;
584 }
585 
586 static void
587 handlereq(struct spclient *spc)
588 {
589 	struct rsp_copydata *copydata;
590 	struct rsp_hdr *rhdr = &spc->spc_hdr;
591 	void *mapaddr;
592 	size_t maplen;
593 	int reqtype = spc->spc_hdr.rsp_type;
594 	int sig;
595 
596 	switch (reqtype) {
597 	case RUMPSP_COPYIN:
598 	case RUMPSP_COPYINSTR:
599 		/*LINTED*/
600 		copydata = (struct rsp_copydata *)spc->spc_buf;
601 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
602 		    copydata->rcp_addr, copydata->rcp_len));
603 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
604 		    copydata->rcp_addr, copydata->rcp_len,
605 		    reqtype == RUMPSP_COPYINSTR);
606 		break;
607 	case RUMPSP_COPYOUT:
608 	case RUMPSP_COPYOUTSTR:
609 		/*LINTED*/
610 		copydata = (struct rsp_copydata *)spc->spc_buf;
611 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
612 		    copydata->rcp_addr, copydata->rcp_len));
613 		/*LINTED*/
614 		memcpy(copydata->rcp_addr, copydata->rcp_data,
615 		    copydata->rcp_len);
616 		break;
617 	case RUMPSP_ANONMMAP:
618 		/*LINTED*/
619 		maplen = *(size_t *)spc->spc_buf;
620 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
621 		    MAP_ANON|MAP_PRIVATE, -1, 0);
622 		if (mapaddr == MAP_FAILED)
623 			mapaddr = NULL;
624 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
625 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
626 		break;
627 	case RUMPSP_RAISE:
628 		sig = rumpuser__sig_rump2host(rhdr->rsp_signo);
629 		DPRINTF(("rump_sp handlereq: raise sig %d\n", sig));
630 		raise(sig);
631 		/*
632 		 * We most likely have signals blocked, but the signal
633 		 * will be handled soon enough when we return.
634 		 */
635 		break;
636 	default:
637 		printf("PANIC: INVALID TYPE %d\n", reqtype);
638 		abort();
639 		break;
640 	}
641 
642 	spcfreebuf(spc);
643 }
644 
645 static unsigned ptab_idx;
646 static struct sockaddr *serv_sa;
647 
648 /* dup until we get a "good" fd which does not collide with stdio */
649 static int
650 dupgood(int myfd, int mustchange)
651 {
652 	int ofds[4];
653 	int sverrno;
654 	unsigned int i;
655 
656 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
657 		assert(i < __arraycount(ofds));
658 		ofds[i] = myfd;
659 		myfd = host_dup(myfd);
660 		if (mustchange) {
661 			i--; /* prevent closing old fd */
662 			mustchange = 0;
663 		}
664 	}
665 
666 	sverrno = 0;
667 	if (myfd == -1 && i > 0)
668 		sverrno = errno;
669 
670 	while (i-- > 0) {
671 		host_close(ofds[i]);
672 	}
673 
674 	if (sverrno)
675 		errno = sverrno;
676 
677 	return myfd;
678 }
679 
680 static int
681 doconnect(void)
682 {
683 	struct respwait rw;
684 	struct rsp_hdr rhdr;
685 	char banner[MAXBANNER];
686 	int s, error, flags;
687 	ssize_t n;
688 
689 	if (kq != -1)
690 		host_close(kq);
691 	kq = -1;
692 	s = -1;
693 
694 	if (clispc.spc_fd != -1)
695 		host_close(clispc.spc_fd);
696 	clispc.spc_fd = -1;
697 
698 	/*
699 	 * for reconnect, gate everyone out of the receiver code
700 	 */
701 	putwait_locked(&clispc, &rw, &rhdr);
702 
703 	pthread_mutex_lock(&clispc.spc_mtx);
704 	clispc.spc_reconnecting = 1;
705 	pthread_cond_broadcast(&clispc.spc_cv);
706 	clispc.spc_generation++;
707 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
708 		clispc.spc_istatus = SPCSTATUS_WANTED;
709 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
710 	}
711 	kickall(&clispc);
712 
713 	/*
714 	 * we can release it already since we hold the
715 	 * send lock during reconnect
716 	 * XXX: assert it
717 	 */
718 	clispc.spc_istatus = SPCSTATUS_FREE;
719 	pthread_mutex_unlock(&clispc.spc_mtx);
720 	unputwait_locked(&clispc, &rw);
721 
722 	free(clispc.spc_buf);
723 	clispc.spc_off = 0;
724 
725 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
726 	if (s == -1)
727 		return -1;
728 
729 	while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
730 		if (errno == EINTR)
731 			continue;
732 		ERRLOG(("rump_sp: client connect failed: %s\n",
733 		    strerror(errno)));
734 		return -1;
735 	}
736 
737 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
738 		ERRLOG(("rump_sp: connect hook failed\n"));
739 		return -1;
740 	}
741 
742 	if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
743 		ERRLOG(("rump_sp: failed to read banner\n"));
744 		return -1;
745 	}
746 
747 	if (banner[n-1] != '\n') {
748 		ERRLOG(("rump_sp: invalid banner\n"));
749 		return -1;
750 	}
751 	banner[n] = '\0';
752 	/* XXX parse the banner some day */
753 
754 	flags = host_fcntl(s, F_GETFL, 0);
755 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
756 		ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
757 		return -1;
758 	}
759 	clispc.spc_fd = s;
760 	clispc.spc_state = SPCSTATE_RUNNING;
761 	clispc.spc_reconnecting = 0;
762 
763 #ifdef USE_KQUEUE
764 {
765 	struct kevent kev[NSIG+1];
766 	int i;
767 
768 	/* setup kqueue, we want all signals and the fd */
769 	if ((kq = dupgood(host_kqueue(), 0)) == -1) {
770 		ERRLOG(("rump_sp: cannot setup kqueue"));
771 		return -1;
772 	}
773 
774 	for (i = 0; i < NSIG; i++) {
775 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
776 	}
777 	EV_SET(&kev[NSIG], clispc.spc_fd,
778 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
779 	if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
780 		ERRLOG(("rump_sp: kevent() failed"));
781 		return -1;
782 	}
783 }
784 #endif /* USE_KQUEUE */
785 
786 	return 0;
787 }
788 
789 static int
790 doinit(void)
791 {
792 
793 	TAILQ_INIT(&clispc.spc_respwait);
794 	pthread_mutex_init(&clispc.spc_mtx, NULL);
795 	pthread_cond_init(&clispc.spc_cv, NULL);
796 
797 	return 0;
798 }
799 
800 #ifdef RTLD_NEXT
801 void *rumpclient__dlsym(void *, const char *);
802 void *
803 rumpclient__dlsym(void *handle, const char *symbol)
804 {
805 
806 	return dlsym(handle, symbol);
807 }
808 void *rumphijack_dlsym(void *, const char *)
809     __attribute__((__weak__, alias("rumpclient__dlsym")));
810 #endif
811 
812 static pid_t init_done = 0;
813 
814 int
815 rumpclient_init(void)
816 {
817 	char *p;
818 	int error;
819 	int rv = -1;
820 	int hstype;
821 	pid_t mypid;
822 
823 	/*
824 	 * Make sure we're not riding the context of a previous
825 	 * host fork.  Note: it's *possible* that after n>1 forks
826 	 * we have the same pid as one of our exited parents, but
827 	 * I'm pretty sure there are 0 practical implications, since
828 	 * it means generations would have to skip rumpclient init.
829 	 */
830 	if (init_done == (mypid = getpid()))
831 		return 0;
832 
833 	/* kq does not traverse fork() */
834 	if (init_done != 0)
835 		kq = -1;
836 	init_done = mypid;
837 
838 	sigfillset(&fullset);
839 
840 	/*
841 	 * sag mir, wo die symbols sind.  zogen fort, der krieg beginnt.
842 	 * wann wird man je verstehen?  wann wird man je verstehen?
843 	 */
844 #ifdef RTLD_NEXT
845 #define FINDSYM2(_name_,_syscall_)					\
846 	if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT,		\
847 	    #_syscall_)) == NULL) {					\
848 		if (rumphijack_dlsym == rumpclient__dlsym)		\
849 			host_##_name_ = _name_; /* static fallback */	\
850 		if (host_##_name_ == NULL) {				\
851 			fprintf(stderr,"cannot find %s: %s", #_syscall_,\
852 			    dlerror());					\
853 			exit(1);					\
854 		}							\
855 	}
856 #else
857 #define FINDSYM2(_name_,_syscall)					\
858 	host_##_name_ = _name_;
859 #endif
860 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
861 #ifdef __NetBSD__
862 	FINDSYM2(socket,__socket30)
863 #else
864 	FINDSYM(socket)
865 #endif
866 
867 	FINDSYM(close)
868 	FINDSYM(connect)
869 	FINDSYM(fcntl)
870 	FINDSYM(poll)
871 	FINDSYM(read)
872 	FINDSYM(sendmsg)
873 	FINDSYM(setsockopt)
874 	FINDSYM(dup)
875 	FINDSYM(execve)
876 
877 #ifdef USE_KQUEUE
878 	FINDSYM(kqueue)
879 #if !__NetBSD_Prereq__(5,99,7)
880 	FINDSYM(kevent)
881 #else
882 	FINDSYM2(kevent,_sys___kevent50)
883 #endif
884 #endif /* USE_KQUEUE */
885 
886 #undef	FINDSYM
887 #undef	FINDSY2
888 
889 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
890 		if ((p = getenv("RUMP_SERVER")) == NULL) {
891 			fprintf(stderr, "error: RUMP_SERVER not set\n");
892 			errno = ENOENT;
893 			goto out;
894 		}
895 	}
896 
897 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
898 		errno = error;
899 		goto out;
900 	}
901 
902 	if (doinit() == -1)
903 		goto out;
904 
905 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
906 		sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
907 		unsetenv("RUMPCLIENT__EXECFD");
908 		hstype = HANDSHAKE_EXEC;
909 	} else {
910 		if (doconnect() == -1)
911 			goto out;
912 		hstype = HANDSHAKE_GUEST;
913 	}
914 
915 	error = handshake_req(&clispc, hstype, NULL, 0, false);
916 	if (error) {
917 		pthread_mutex_destroy(&clispc.spc_mtx);
918 		pthread_cond_destroy(&clispc.spc_cv);
919 		if (clispc.spc_fd != -1)
920 			host_close(clispc.spc_fd);
921 		errno = error;
922 		goto out;
923 	}
924 	rv = 0;
925 
926  out:
927 	if (rv == -1)
928 		init_done = 0;
929 	return rv;
930 }
931 
932 struct rumpclient_fork {
933 	uint32_t fork_auth[AUTHLEN];
934 	struct spclient fork_spc;
935 	int fork_kq;
936 };
937 
938 struct rumpclient_fork *
939 rumpclient_prefork(void)
940 {
941 	struct rumpclient_fork *rpf;
942 	sigset_t omask;
943 	void *resp;
944 	int rv;
945 
946 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
947 	rpf = malloc(sizeof(*rpf));
948 	if (rpf == NULL)
949 		goto out;
950 
951 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
952 		free(rpf);
953 		errno = rv;
954 		rpf = NULL;
955 		goto out;
956 	}
957 
958 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
959 	free(resp);
960 
961 	rpf->fork_spc = clispc;
962 	rpf->fork_kq = kq;
963 
964  out:
965 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
966 	return rpf;
967 }
968 
969 int
970 rumpclient_fork_init(struct rumpclient_fork *rpf)
971 {
972 	int error;
973 	int osock;
974 
975 	osock = clispc.spc_fd;
976 	memset(&clispc, 0, sizeof(clispc));
977 	clispc.spc_fd = osock;
978 
979 	kq = -1; /* kqueue descriptor is not copied over fork() */
980 
981 	if (doinit() == -1)
982 		return -1;
983 	if (doconnect() == -1)
984 		return -1;
985 
986 	error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
987 	    0, false);
988 	if (error) {
989 		pthread_mutex_destroy(&clispc.spc_mtx);
990 		pthread_cond_destroy(&clispc.spc_cv);
991 		errno = error;
992 		return -1;
993 	}
994 
995 	return 0;
996 }
997 
998 /*ARGSUSED*/
999 void
1000 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
1001 {
1002 
1003 	/* EUNIMPL */
1004 }
1005 
1006 void
1007 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
1008 {
1009 
1010 	clispc = rpf->fork_spc;
1011 	kq = rpf->fork_kq;
1012 }
1013 
1014 void
1015 rumpclient_setconnretry(time_t timeout)
1016 {
1017 
1018 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
1019 		return; /* gigo */
1020 
1021 	retrytimo = timeout;
1022 }
1023 
1024 int
1025 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
1026 {
1027 	int fd = *fdp;
1028 	int untilfd, rv;
1029 	int newfd;
1030 
1031 	switch (variant) {
1032 	case RUMPCLIENT_CLOSE_FCLOSEM:
1033 		untilfd = MAX(clispc.spc_fd, kq);
1034 		for (; fd <= untilfd; fd++) {
1035 			if (fd == clispc.spc_fd || fd == kq)
1036 				continue;
1037 			rv = host_close(fd);
1038 			if (rv == -1)
1039 				return -1;
1040 		}
1041 		*fdp = fd;
1042 		break;
1043 
1044 	case RUMPCLIENT_CLOSE_CLOSE:
1045 	case RUMPCLIENT_CLOSE_DUP2:
1046 		if (fd == clispc.spc_fd) {
1047 			newfd = dupgood(clispc.spc_fd, 1);
1048 			if (newfd == -1)
1049 				return -1;
1050 
1051 #ifdef USE_KQUEUE
1052 			{
1053 			struct kevent kev[2];
1054 
1055 			/*
1056 			 * now, we have a new socket number, so change
1057 			 * the file descriptor that kqueue is
1058 			 * monitoring.  remove old and add new.
1059 			 */
1060 			EV_SET(&kev[0], clispc.spc_fd,
1061 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
1062 			EV_SET(&kev[1], newfd,
1063 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1064 			if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
1065 				int sverrno = errno;
1066 				host_close(newfd);
1067 				errno = sverrno;
1068 				return -1;
1069 			}
1070 			clispc.spc_fd = newfd;
1071 			}
1072 		}
1073 		if (fd == kq) {
1074 			newfd = dupgood(kq, 1);
1075 			if (newfd == -1)
1076 				return -1;
1077 			kq = newfd;
1078 #else /* USE_KQUEUE */
1079 			clispc.spc_fd = newfd;
1080 #endif /* !USE_KQUEUE */
1081 		}
1082 		break;
1083 	}
1084 
1085 	return 0;
1086 }
1087 
1088 pid_t
1089 rumpclient_fork(void)
1090 {
1091 
1092 	return rumpclient__dofork(fork);
1093 }
1094 
1095 /*
1096  * Process is about to exec.  Save info about our existing connection
1097  * in the env.  rumpclient will check for this info in init().
1098  * This is mostly for the benefit of rumphijack, but regular applications
1099  * may use it as well.
1100  */
1101 int
1102 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
1103 {
1104 	char buf[4096];
1105 	char **newenv;
1106 	char *envstr, *envstr2;
1107 	size_t nelem;
1108 	int rv, sverrno;
1109 
1110 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
1111 	    clispc.spc_fd, kq);
1112 	envstr = malloc(strlen(buf)+1);
1113 	if (envstr == NULL) {
1114 		return ENOMEM;
1115 	}
1116 	strcpy(envstr, buf);
1117 
1118 	/* do we have a fully parsed url we want to forward in the env? */
1119 	if (*parsedurl != '\0') {
1120 		snprintf(buf, sizeof(buf),
1121 		    "RUMP__PARSEDSERVER=%s", parsedurl);
1122 		envstr2 = malloc(strlen(buf)+1);
1123 		if (envstr2 == NULL) {
1124 			free(envstr);
1125 			return ENOMEM;
1126 		}
1127 		strcpy(envstr2, buf);
1128 	} else {
1129 		envstr2 = NULL;
1130 	}
1131 
1132 	for (nelem = 0; envp && envp[nelem]; nelem++)
1133 		continue;
1134 
1135 	newenv = malloc(sizeof(*newenv) * (nelem+3));
1136 	if (newenv == NULL) {
1137 		free(envstr2);
1138 		free(envstr);
1139 		return ENOMEM;
1140 	}
1141 	memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1142 
1143 	newenv[nelem] = envstr;
1144 	newenv[nelem+1] = envstr2;
1145 	newenv[nelem+2] = NULL;
1146 
1147 	rv = host_execve(path, argv, newenv);
1148 
1149 	_DIAGASSERT(rv != 0);
1150 	sverrno = errno;
1151 	free(envstr2);
1152 	free(envstr);
1153 	free(newenv);
1154 	errno = sverrno;
1155 	return rv;
1156 }
1157 
1158 /*
1159  * daemon() is handwritten for the benefit of platforms which
1160  * do not support daemon().
1161  */
1162 int
1163 rumpclient_daemon(int nochdir, int noclose)
1164 {
1165 	struct rumpclient_fork *rf;
1166 	int sverrno;
1167 
1168 	if ((rf = rumpclient_prefork()) == NULL)
1169 		return -1;
1170 
1171 	switch (fork()) {
1172 	case 0:
1173 		break;
1174 	case -1:
1175 		goto daemonerr;
1176 	default:
1177 		_exit(0);
1178 	}
1179 
1180 	if (setsid() == -1)
1181 		goto daemonerr;
1182 	if (!nochdir && chdir("/") == -1)
1183 		goto daemonerr;
1184 	if (!noclose) {
1185 		int fd = open("/dev/null", O_RDWR);
1186 		dup2(fd, 0);
1187 		dup2(fd, 1);
1188 		dup2(fd, 2);
1189 		if (fd > 2)
1190 			close(fd);
1191 	}
1192 
1193 	/* note: fork is either completed or cancelled by the call */
1194 	if (rumpclient_fork_init(rf) == -1)
1195 		return -1;
1196 
1197 	return 0;
1198 
1199  daemonerr:
1200 	sverrno = errno;
1201 	rumpclient_fork_cancel(rf);
1202 	errno = sverrno;
1203 	return -1;
1204 }
1205