xref: /netbsd-src/lib/librumpclient/rumpclient.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /*      $NetBSD: rumpclient.c,v 1.65 2015/01/17 19:34:50 justin Exp $	*/
2 
3 /*
4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Client side routines for rump syscall proxy.
30  */
31 
32 #include <rump/rumpuser_port.h>
33 
34 /*
35  * We use kqueue on the BSDs, poll elsewhere.  We
36  * want to use kqueue because it will give us the ability to get signal
37  * notifications but defer their handling to a stage where we do not
38  * hold the communication lock.  Taking a signal while holding on to
39  * that lock may cause a deadlock.  Therefore, block signals throughout
40  * the RPC when using poll.  On Linux, we use signalfd in the same role
41  * as kqueue on NetBSD to be able to take signals while waiting for a
42  * response from the server.
43  */
44 
45 #if defined(__NetBSD__) || defined(__FreeBSD__) || \
46     defined(__DragonFly__) || defined(__OpenBSD__)
47 #define USE_KQUEUE
48 #endif
49 #if defined(__linux__)
50 #define USE_SIGNALFD
51 #endif
52 
53 __RCSID("$NetBSD: rumpclient.c,v 1.65 2015/01/17 19:34:50 justin Exp $");
54 
55 #include <sys/param.h>
56 #include <sys/mman.h>
57 #include <sys/socket.h>
58 #include <sys/time.h>
59 
60 #ifdef USE_KQUEUE
61 #include <sys/event.h>
62 #endif
63 
64 #include <arpa/inet.h>
65 #include <netinet/in.h>
66 #include <netinet/tcp.h>
67 
68 #include <assert.h>
69 #include <dlfcn.h>
70 #include <errno.h>
71 #include <fcntl.h>
72 #include <poll.h>
73 #include <pthread.h>
74 #include <signal.h>
75 #include <stdarg.h>
76 #include <stdbool.h>
77 #include <stdio.h>
78 #include <stdlib.h>
79 #include <string.h>
80 #include <unistd.h>
81 
82 #include <rump/rumpclient.h>
83 
84 #define HOSTOPS
85 int	(*host_socket)(int, int, int);
86 int	(*host_close)(int);
87 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
88 int	(*host_fcntl)(int, int, ...);
89 int	(*host_poll)(struct pollfd *, nfds_t, int);
90 ssize_t	(*host_read)(int, void *, size_t);
91 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
92 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
93 int	(*host_dup)(int);
94 
95 #ifdef USE_KQUEUE
96 int	(*host_kqueue)(void);
97 #ifdef __NetBSD__
98 int	(*host_kevent)(int, const struct kevent *, size_t,
99 		       struct kevent *, size_t, const struct timespec *);
100 #else
101 int	(*host_kevent)(int, const struct kevent *, int,
102 		       struct kevent *, int, const struct timespec *);
103 #endif
104 #endif
105 
106 #ifdef USE_SIGNALFD
107 #include <sys/signalfd.h>
108 
109 int	(*host_signalfd)(int, const sigset_t *, int);
110 #endif
111 
112 int	(*host_execve)(const char *, char *const[], char *const[]);
113 
114 #include "sp_common.c"
115 #include "rumpuser_sigtrans.c"
116 
117 static struct spclient clispc = {
118 	.spc_fd = -1,
119 };
120 
121 static int holyfd = -1;
122 static sigset_t fullset;
123 
124 static int doconnect(void);
125 static int handshake_req(struct spclient *, int, void *, int, bool);
126 
127 /*
128  * Default: don't retry.  Most clients can't handle it
129  * (consider e.g. fds suddenly going missing).
130  */
131 static time_t retrytimo = 0;
132 
133 /* always defined to nothingness for now */
134 #define ERRLOG(a)
135 
136 static int
137 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
138 {
139 	struct timeval starttime, curtime;
140 	time_t prevreconmsg;
141 	unsigned reconretries;
142 	int rv;
143 
144 	for (prevreconmsg = 0, reconretries = 0;;) {
145 		rv = dosend(spc, iov, iovlen);
146 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
147 			/* no persistent connections */
148 			if (retrytimo == 0) {
149 				rv = ENOTCONN;
150 				break;
151 			}
152 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
153 				_exit(1);
154 
155 			if (!prevreconmsg) {
156 				prevreconmsg = time(NULL);
157 				gettimeofday(&starttime, NULL);
158 			}
159 			if (reconretries == 1) {
160 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
161 					rv = ENOTCONN;
162 					break;
163 				}
164 				fprintf(stderr, "rump_sp: connection to "
165 				    "kernel lost, trying to reconnect ...\n");
166 			} else if (time(NULL) - prevreconmsg > 120) {
167 				fprintf(stderr, "rump_sp: still trying to "
168 				    "reconnect ...\n");
169 				prevreconmsg = time(NULL);
170 			}
171 
172 			/* check that we aren't over the limit */
173 			if (retrytimo > 0) {
174 				time_t tdiff;
175 
176 				gettimeofday(&curtime, NULL);
177 				tdiff = curtime.tv_sec - starttime.tv_sec;
178 				if (starttime.tv_usec > curtime.tv_usec)
179 					tdiff--;
180 				if (tdiff >= retrytimo) {
181 					fprintf(stderr, "rump_sp: reconnect "
182 					    "failed, %lld second timeout\n",
183 					    (long long)retrytimo);
184 					return ENOTCONN;
185 				}
186 			}
187 
188 			/* adhoc backoff timer */
189 			if (reconretries < 10) {
190 				usleep(100000 * reconretries);
191 			} else {
192 				sleep(MIN(10, reconretries-9));
193 			}
194 			reconretries++;
195 
196 			if ((rv = doconnect()) != 0)
197 				continue;
198 			if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
199 			    NULL, 0, true)) != 0)
200 				continue;
201 
202 			/*
203 			 * ok, reconnect succesful.  we need to return to
204 			 * the upper layer to get the entire PDU resent.
205 			 */
206 			if (reconretries != 1)
207 				fprintf(stderr, "rump_sp: reconnected!\n");
208 			rv = EAGAIN;
209 			break;
210 		} else {
211 			_DIAGASSERT(errno != EAGAIN);
212 			break;
213 		}
214 	}
215 
216 	return rv;
217 }
218 
219 static int
220 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
221 	bool keeplock)
222 {
223 	uint64_t mygen;
224 	bool imalive = true;
225 
226 	pthread_mutex_lock(&spc->spc_mtx);
227 	if (!keeplock)
228 		sendunlockl(spc);
229 	mygen = spc->spc_generation;
230 
231 	rw->rw_error = 0;
232 	while (!rw->rw_done && rw->rw_error == 0) {
233 		if (__predict_false(spc->spc_generation != mygen || !imalive))
234 			break;
235 
236 		/* are we free to receive? */
237 		if (spc->spc_istatus == SPCSTATUS_FREE) {
238 			int gotresp, dosig, rv;
239 
240 			spc->spc_istatus = SPCSTATUS_BUSY;
241 			pthread_mutex_unlock(&spc->spc_mtx);
242 
243 			dosig = 0;
244 			for (gotresp = 0; !gotresp; ) {
245 #ifdef USE_KQUEUE
246 				struct kevent kev[8];
247 				int i;
248 
249 				/*
250 				 * typically we don't have a frame waiting
251 				 * when we come in here, so call kevent now
252 				 */
253 				rv = host_kevent(holyfd, NULL, 0,
254 				    kev, __arraycount(kev), NULL);
255 
256 				if (__predict_false(rv == -1)) {
257 					goto activity;
258 				}
259 
260 				/*
261 				 * XXX: don't know how this can happen
262 				 * (timeout cannot expire since there
263 				 * isn't one), but it does happen.
264 				 * treat it as an expectional condition
265 				 * and go through tryread to determine
266 				 * alive status.
267 				 */
268 				if (__predict_false(rv == 0))
269 					goto activity;
270 
271 				for (i = 0; i < rv; i++) {
272 					if (kev[i].filter == EVFILT_SIGNAL)
273 						dosig++;
274 				}
275 				if (dosig)
276 					goto cleanup;
277 
278 				/*
279 				 * ok, activity.  try to read a frame to
280 				 * determine what happens next.
281 				 */
282  activity:
283 #else /* !USE_KQUEUE */
284 				struct pollfd pfd[2];
285 
286 				pfd[0].fd = clispc.spc_fd;
287 				pfd[0].events = POLLIN;
288 				pfd[1].fd = holyfd;
289 				pfd[1].events = POLLIN;
290 
291 				rv = host_poll(pfd, 2, -1);
292 				if (rv >= 1 && pfd[1].revents & POLLIN) {
293 					dosig = 1;
294 					goto cleanup;
295 				}
296 #endif /* !USE_KQUEUE */
297 
298 				switch (readframe(spc)) {
299 				case 0:
300 					continue;
301 				case -1:
302 					imalive = false;
303 					goto cleanup;
304 				default:
305 					/* case 1 */
306 					break;
307 				}
308 
309 				switch (spc->spc_hdr.rsp_class) {
310 				case RUMPSP_RESP:
311 				case RUMPSP_ERROR:
312 					kickwaiter(spc);
313 					gotresp = spc->spc_hdr.rsp_reqno ==
314 					    rw->rw_reqno;
315 					break;
316 				case RUMPSP_REQ:
317 					handlereq(spc);
318 					break;
319 				default:
320 					/* panic */
321 					break;
322 				}
323 			}
324 
325  cleanup:
326 			pthread_mutex_lock(&spc->spc_mtx);
327 			if (spc->spc_istatus == SPCSTATUS_WANTED)
328 				kickall(spc);
329 			spc->spc_istatus = SPCSTATUS_FREE;
330 
331 			/* take one for the team */
332 			if (dosig) {
333 				pthread_mutex_unlock(&spc->spc_mtx);
334 				pthread_sigmask(SIG_SETMASK, mask, NULL);
335 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
336 				pthread_mutex_lock(&spc->spc_mtx);
337 			}
338 		} else {
339 			spc->spc_istatus = SPCSTATUS_WANTED;
340 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
341 		}
342 	}
343 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
344 	pthread_mutex_unlock(&spc->spc_mtx);
345 	pthread_cond_destroy(&rw->rw_cv);
346 
347 	if (spc->spc_generation != mygen || !imalive) {
348 		return ENOTCONN;
349 	}
350 	return rw->rw_error;
351 }
352 
353 static int
354 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
355 	const void *data, size_t dlen, void **resp)
356 {
357 	struct rsp_hdr rhdr;
358 	struct respwait rw;
359 	struct iovec iov[2];
360 	int rv;
361 
362 	rhdr.rsp_len = sizeof(rhdr) + dlen;
363 	rhdr.rsp_class = RUMPSP_REQ;
364 	rhdr.rsp_type = RUMPSP_SYSCALL;
365 	rhdr.rsp_sysnum = sysnum;
366 
367 	IOVPUT(iov[0], rhdr);
368 	IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
369 
370 	do {
371 		putwait(spc, &rw, &rhdr);
372 		if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
373 			unputwait(spc, &rw);
374 			continue;
375 		}
376 
377 		rv = cliwaitresp(spc, &rw, omask, false);
378 		if (rv == ENOTCONN)
379 			rv = EAGAIN;
380 	} while (rv == EAGAIN);
381 
382 	*resp = rw.rw_data;
383 	return rv;
384 }
385 
386 static int
387 handshake_req(struct spclient *spc, int type, void *data,
388 	int cancel, bool haslock)
389 {
390 	struct handshake_fork rf;
391 	const char *myprogname = NULL; /* XXXgcc */
392 	struct rsp_hdr rhdr;
393 	struct respwait rw;
394 	sigset_t omask;
395 	size_t bonus;
396 	struct iovec iov[2];
397 	int rv;
398 
399 	if (type == HANDSHAKE_FORK) {
400 		bonus = sizeof(rf);
401 	} else {
402 #ifdef __NetBSD__
403 		/* would procfs work on NetBSD too? */
404 		myprogname = getprogname();
405 #else
406 		int fd = open("/proc/self/comm", O_RDONLY);
407 		if (fd == -1) {
408 			myprogname = "???";
409 		} else {
410 			static char commname[128];
411 
412 			memset(commname, 0, sizeof(commname));
413 			if (read(fd, commname, sizeof(commname)) > 0) {
414 				char *n;
415 
416 				n = strrchr(commname, '\n');
417 				if (n)
418 					*n = '\0';
419 				myprogname = commname;
420 			} else {
421 				myprogname = "???";
422 			}
423 			close(fd);
424 		}
425 #endif
426 		bonus = strlen(myprogname)+1;
427 	}
428 
429 	/* performs server handshake */
430 	rhdr.rsp_len = sizeof(rhdr) + bonus;
431 	rhdr.rsp_class = RUMPSP_REQ;
432 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
433 	rhdr.rsp_handshake = type;
434 
435 	IOVPUT(iov[0], rhdr);
436 
437 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
438 	if (haslock)
439 		putwait_locked(spc, &rw, &rhdr);
440 	else
441 		putwait(spc, &rw, &rhdr);
442 	if (type == HANDSHAKE_FORK) {
443 		memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
444 		rf.rf_cancel = cancel;
445 		IOVPUT(iov[1], rf);
446 	} else {
447 		IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
448 	}
449 	rv = send_with_recon(spc, iov, __arraycount(iov));
450 	if (rv || cancel) {
451 		if (haslock)
452 			unputwait_locked(spc, &rw);
453 		else
454 			unputwait(spc, &rw);
455 		if (cancel) {
456 			goto out;
457 		}
458 	} else {
459 		rv = cliwaitresp(spc, &rw, &omask, haslock);
460 	}
461 	if (rv)
462 		goto out;
463 
464 	rv = *(int *)rw.rw_data;
465 	free(rw.rw_data);
466 
467  out:
468 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
469 	return rv;
470 }
471 
472 static int
473 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
474 {
475 	struct rsp_hdr rhdr;
476 	struct respwait rw;
477 	struct iovec iov[1];
478 	int rv;
479 
480 	rhdr.rsp_len = sizeof(rhdr);
481 	rhdr.rsp_class = RUMPSP_REQ;
482 	rhdr.rsp_type = RUMPSP_PREFORK;
483 	rhdr.rsp_error = 0;
484 
485 	IOVPUT(iov[0], rhdr);
486 
487 	do {
488 		putwait(spc, &rw, &rhdr);
489 		rv = send_with_recon(spc, iov, __arraycount(iov));
490 		if (rv != 0) {
491 			unputwait(spc, &rw);
492 			continue;
493 		}
494 
495 		rv = cliwaitresp(spc, &rw, omask, false);
496 		if (rv == ENOTCONN)
497 			rv = EAGAIN;
498 	} while (rv == EAGAIN);
499 
500 	*resp = rw.rw_data;
501 	return rv;
502 }
503 
504 /*
505  * prevent response code from deadlocking with reconnect code
506  */
507 static int
508 resp_sendlock(struct spclient *spc)
509 {
510 	int rv = 0;
511 
512 	pthread_mutex_lock(&spc->spc_mtx);
513 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
514 		if (__predict_false(spc->spc_reconnecting)) {
515 			rv = EBUSY;
516 			goto out;
517 		}
518 		spc->spc_ostatus = SPCSTATUS_WANTED;
519 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
520 	}
521 	spc->spc_ostatus = SPCSTATUS_BUSY;
522 
523  out:
524 	pthread_mutex_unlock(&spc->spc_mtx);
525 	return rv;
526 }
527 
528 static void
529 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
530 	int wantstr)
531 {
532 	struct rsp_hdr rhdr;
533 	struct iovec iov[2];
534 
535 	if (wantstr)
536 		dlen = MIN(dlen, strlen(data)+1);
537 
538 	rhdr.rsp_len = sizeof(rhdr) + dlen;
539 	rhdr.rsp_reqno = reqno;
540 	rhdr.rsp_class = RUMPSP_RESP;
541 	rhdr.rsp_type = RUMPSP_COPYIN;
542 	rhdr.rsp_sysnum = 0;
543 
544 	IOVPUT(iov[0], rhdr);
545 	IOVPUT_WITHSIZE(iov[1], data, dlen);
546 
547 	if (resp_sendlock(spc) != 0)
548 		return;
549 	(void)SENDIOV(spc, iov);
550 	sendunlock(spc);
551 }
552 
553 static void
554 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
555 {
556 	struct rsp_hdr rhdr;
557 	struct iovec iov[2];
558 
559 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
560 	rhdr.rsp_reqno = reqno;
561 	rhdr.rsp_class = RUMPSP_RESP;
562 	rhdr.rsp_type = RUMPSP_ANONMMAP;
563 	rhdr.rsp_sysnum = 0;
564 
565 	IOVPUT(iov[0], rhdr);
566 	IOVPUT(iov[1], addr);
567 
568 	if (resp_sendlock(spc) != 0)
569 		return;
570 	(void)SENDIOV(spc, iov);
571 	sendunlock(spc);
572 }
573 
574 int
575 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
576 	register_t *retval)
577 {
578 	struct rsp_sysresp *resp;
579 	sigset_t omask;
580 	void *rdata;
581 	int rv;
582 
583 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
584 
585 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
586 	    sysnum, data, dlen));
587 
588 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
589 	if (rv)
590 		goto out;
591 
592 	resp = rdata;
593 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
594 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
595 
596 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
597 	rv = resp->rsys_error;
598 	free(rdata);
599 
600  out:
601 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
602 	return rv;
603 }
604 
605 static void
606 handlereq(struct spclient *spc)
607 {
608 	struct rsp_copydata *copydata;
609 	struct rsp_hdr *rhdr = &spc->spc_hdr;
610 	void *mapaddr;
611 	size_t maplen;
612 	int reqtype = spc->spc_hdr.rsp_type;
613 	int sig;
614 
615 	switch (reqtype) {
616 	case RUMPSP_COPYIN:
617 	case RUMPSP_COPYINSTR:
618 		/*LINTED*/
619 		copydata = (struct rsp_copydata *)spc->spc_buf;
620 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
621 		    copydata->rcp_addr, copydata->rcp_len));
622 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
623 		    copydata->rcp_addr, copydata->rcp_len,
624 		    reqtype == RUMPSP_COPYINSTR);
625 		break;
626 	case RUMPSP_COPYOUT:
627 	case RUMPSP_COPYOUTSTR:
628 		/*LINTED*/
629 		copydata = (struct rsp_copydata *)spc->spc_buf;
630 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
631 		    copydata->rcp_addr, copydata->rcp_len));
632 		/*LINTED*/
633 		memcpy(copydata->rcp_addr, copydata->rcp_data,
634 		    copydata->rcp_len);
635 		break;
636 	case RUMPSP_ANONMMAP:
637 		/*LINTED*/
638 		maplen = *(size_t *)spc->spc_buf;
639 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
640 		    MAP_ANON|MAP_PRIVATE, -1, 0);
641 		if (mapaddr == MAP_FAILED)
642 			mapaddr = NULL;
643 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
644 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
645 		break;
646 	case RUMPSP_RAISE:
647 		sig = rumpuser__sig_rump2host(rhdr->rsp_signo);
648 		DPRINTF(("rump_sp handlereq: raise sig %d\n", sig));
649 		raise(sig);
650 		/*
651 		 * We most likely have signals blocked, but the signal
652 		 * will be handled soon enough when we return.
653 		 */
654 		break;
655 	default:
656 		printf("PANIC: INVALID TYPE %d\n", reqtype);
657 		abort();
658 		break;
659 	}
660 
661 	spcfreebuf(spc);
662 }
663 
664 static unsigned ptab_idx;
665 static struct sockaddr *serv_sa;
666 
667 /* dup until we get a "good" fd which does not collide with stdio */
668 static int
669 dupgood(int myfd, int mustchange)
670 {
671 	int ofds[4];
672 	int sverrno;
673 	unsigned int i;
674 
675 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
676 		assert(i < __arraycount(ofds));
677 		ofds[i] = myfd;
678 		myfd = host_dup(myfd);
679 		if (mustchange) {
680 			i--; /* prevent closing old fd */
681 			mustchange = 0;
682 		}
683 	}
684 
685 	sverrno = 0;
686 	if (myfd == -1 && i > 0)
687 		sverrno = errno;
688 
689 	while (i-- > 0) {
690 		host_close(ofds[i]);
691 	}
692 
693 	if (sverrno)
694 		errno = sverrno;
695 
696 	return myfd;
697 }
698 
699 #if defined(USE_KQUEUE)
700 
701 static int
702 makeholyfd(void)
703 {
704 	struct kevent kev[NSIG+1];
705 	int i, fd;
706 
707 	/* setup kqueue, we want all signals and the fd */
708 	if ((fd = dupgood(host_kqueue(), 0)) == -1) {
709 		ERRLOG(("rump_sp: cannot setup kqueue"));
710 		return -1;
711 	}
712 
713 	for (i = 0; i < NSIG; i++) {
714 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
715 	}
716 	EV_SET(&kev[NSIG], clispc.spc_fd,
717 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
718 	if (host_kevent(fd, kev, NSIG+1, NULL, 0, NULL) == -1) {
719 		ERRLOG(("rump_sp: kevent() failed"));
720 		host_close(fd);
721 		return -1;
722 	}
723 
724 	return fd;
725 }
726 
727 #elif defined(USE_SIGNALFD) /* !USE_KQUEUE */
728 
729 static int
730 makeholyfd(void)
731 {
732 
733 	return host_signalfd(-1, &fullset, 0);
734 }
735 
736 #else /* !USE_KQUEUE && !USE_SIGNALFD */
737 
738 static int
739 makeholyfd(void)
740 {
741 
742 	return -1;
743 }
744 
745 #endif
746 
747 static int
748 doconnect(void)
749 {
750 	struct respwait rw;
751 	struct rsp_hdr rhdr;
752 	char banner[MAXBANNER];
753 	int s, error, flags;
754 	ssize_t n;
755 
756 	if (holyfd != -1)
757 		host_close(holyfd);
758 	holyfd = -1;
759 	s = -1;
760 
761 	if (clispc.spc_fd != -1)
762 		host_close(clispc.spc_fd);
763 	clispc.spc_fd = -1;
764 
765 	/*
766 	 * for reconnect, gate everyone out of the receiver code
767 	 */
768 	putwait_locked(&clispc, &rw, &rhdr);
769 
770 	pthread_mutex_lock(&clispc.spc_mtx);
771 	clispc.spc_reconnecting = 1;
772 	pthread_cond_broadcast(&clispc.spc_cv);
773 	clispc.spc_generation++;
774 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
775 		clispc.spc_istatus = SPCSTATUS_WANTED;
776 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
777 	}
778 	kickall(&clispc);
779 
780 	/*
781 	 * we can release it already since we hold the
782 	 * send lock during reconnect
783 	 * XXX: assert it
784 	 */
785 	clispc.spc_istatus = SPCSTATUS_FREE;
786 	pthread_mutex_unlock(&clispc.spc_mtx);
787 	unputwait_locked(&clispc, &rw);
788 
789 	free(clispc.spc_buf);
790 	clispc.spc_off = 0;
791 
792 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
793 	if (s == -1)
794 		return -1;
795 
796 	while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
797 		if (errno == EINTR)
798 			continue;
799 		ERRLOG(("rump_sp: client connect failed: %s\n",
800 		    strerror(errno)));
801 		return -1;
802 	}
803 
804 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
805 		ERRLOG(("rump_sp: connect hook failed\n"));
806 		return -1;
807 	}
808 
809 	if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
810 		ERRLOG(("rump_sp: failed to read banner\n"));
811 		return -1;
812 	}
813 
814 	if (banner[n-1] != '\n') {
815 		ERRLOG(("rump_sp: invalid banner\n"));
816 		return -1;
817 	}
818 	banner[n] = '\0';
819 	/* XXX parse the banner some day */
820 
821 	flags = host_fcntl(s, F_GETFL, 0);
822 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
823 		ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
824 		return -1;
825 	}
826 	clispc.spc_fd = s;
827 	clispc.spc_state = SPCSTATE_RUNNING;
828 	clispc.spc_reconnecting = 0;
829 	holyfd = makeholyfd();
830 
831 	return 0;
832 }
833 
834 static int
835 doinit(void)
836 {
837 
838 	TAILQ_INIT(&clispc.spc_respwait);
839 	pthread_mutex_init(&clispc.spc_mtx, NULL);
840 	pthread_cond_init(&clispc.spc_cv, NULL);
841 
842 	return 0;
843 }
844 
845 #ifdef RTLD_NEXT
846 void *rumpclient__dlsym(void *, const char *);
847 void *
848 rumpclient__dlsym(void *handle, const char *symbol)
849 {
850 
851 	return dlsym(handle, symbol);
852 }
853 void *rumphijack_dlsym(void *, const char *)
854     __attribute__((__weak__, alias("rumpclient__dlsym")));
855 #endif
856 
857 static pid_t init_done = 0;
858 
859 int
860 rumpclient_init(void)
861 {
862 	char *p;
863 	int error;
864 	int rv = -1;
865 	int hstype;
866 	pid_t mypid;
867 
868 	/*
869 	 * Make sure we're not riding the context of a previous
870 	 * host fork.  Note: it's *possible* that after n>1 forks
871 	 * we have the same pid as one of our exited parents, but
872 	 * I'm pretty sure there are 0 practical implications, since
873 	 * it means generations would have to skip rumpclient init.
874 	 */
875 	if (init_done == (mypid = getpid()))
876 		return 0;
877 
878 #ifdef USE_KQUEUE
879 	/* kq does not traverse fork() */
880 	holyfd = -1;
881 #endif
882 	init_done = mypid;
883 
884 	sigfillset(&fullset);
885 
886 	/*
887 	 * sag mir, wo die symbols sind.  zogen fort, der krieg beginnt.
888 	 * wann wird man je verstehen?  wann wird man je verstehen?
889 	 */
890 #ifdef RTLD_NEXT
891 #define FINDSYM2(_name_,_syscall_)					\
892 	if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT,		\
893 	    #_syscall_)) == NULL) {					\
894 		if (rumphijack_dlsym == rumpclient__dlsym)		\
895 			host_##_name_ = _name_; /* static fallback */	\
896 		if (host_##_name_ == NULL) {				\
897 			fprintf(stderr,"cannot find %s: %s", #_syscall_,\
898 			    dlerror());					\
899 			exit(1);					\
900 		}							\
901 	}
902 #else
903 #define FINDSYM2(_name_,_syscall)					\
904 	host_##_name_ = _name_;
905 #endif
906 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
907 #ifdef __NetBSD__
908 	FINDSYM2(socket,__socket30)
909 #else
910 	FINDSYM(socket)
911 #endif
912 
913 	FINDSYM(close)
914 	FINDSYM(connect)
915 	FINDSYM(fcntl)
916 	FINDSYM(poll)
917 	FINDSYM(read)
918 	FINDSYM(sendmsg)
919 	FINDSYM(setsockopt)
920 	FINDSYM(dup)
921 	FINDSYM(execve)
922 
923 #ifdef USE_KQUEUE
924 	FINDSYM(kqueue)
925 #ifdef __NetBSD__
926 #if !__NetBSD_Prereq__(5,99,7)
927 	FINDSYM(kevent)
928 #else
929 	FINDSYM2(kevent,_sys___kevent50)
930 #endif
931 #else
932 	FINDSYM(kevent)
933 #endif
934 #endif /* USE_KQUEUE */
935 
936 #ifdef USE_SIGNALFD
937 	FINDSYM(signalfd)
938 #endif
939 
940 #undef	FINDSYM
941 #undef	FINDSY2
942 
943 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
944 		if ((p = getenv("RUMP_SERVER")) == NULL) {
945 			fprintf(stderr, "error: RUMP_SERVER not set\n");
946 			errno = ENOENT;
947 			goto out;
948 		}
949 	}
950 
951 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
952 		errno = error;
953 		goto out;
954 	}
955 
956 	if (doinit() == -1)
957 		goto out;
958 
959 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
960 		sscanf(p, "%d,%d", &clispc.spc_fd, &holyfd);
961 		unsetenv("RUMPCLIENT__EXECFD");
962 		hstype = HANDSHAKE_EXEC;
963 	} else {
964 		if (doconnect() == -1)
965 			goto out;
966 		hstype = HANDSHAKE_GUEST;
967 	}
968 
969 	error = handshake_req(&clispc, hstype, NULL, 0, false);
970 	if (error) {
971 		pthread_mutex_destroy(&clispc.spc_mtx);
972 		pthread_cond_destroy(&clispc.spc_cv);
973 		if (clispc.spc_fd != -1)
974 			host_close(clispc.spc_fd);
975 		errno = error;
976 		goto out;
977 	}
978 	rv = 0;
979 
980  out:
981 	if (rv == -1)
982 		init_done = 0;
983 	return rv;
984 }
985 
986 struct rumpclient_fork {
987 	uint32_t fork_auth[AUTHLEN];
988 	struct spclient fork_spc;
989 	int fork_holyfd;
990 };
991 
992 struct rumpclient_fork *
993 rumpclient_prefork(void)
994 {
995 	struct rumpclient_fork *rpf;
996 	sigset_t omask;
997 	void *resp;
998 	int rv;
999 
1000 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
1001 	rpf = malloc(sizeof(*rpf));
1002 	if (rpf == NULL)
1003 		goto out;
1004 
1005 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
1006 		free(rpf);
1007 		errno = rv;
1008 		rpf = NULL;
1009 		goto out;
1010 	}
1011 
1012 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
1013 	free(resp);
1014 
1015 	rpf->fork_spc = clispc;
1016 	rpf->fork_holyfd = holyfd;
1017 
1018  out:
1019 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
1020 	return rpf;
1021 }
1022 
1023 int
1024 rumpclient_fork_init(struct rumpclient_fork *rpf)
1025 {
1026 	int error;
1027 	int osock;
1028 
1029 	osock = clispc.spc_fd;
1030 	memset(&clispc, 0, sizeof(clispc));
1031 	clispc.spc_fd = osock;
1032 
1033 #ifdef USE_KQUEUE
1034 	holyfd = -1; /* kqueue descriptor is not copied over fork() */
1035 #else
1036 	if (holyfd != -1) {
1037 		host_close(holyfd);
1038 		holyfd = -1;
1039 	}
1040 #endif
1041 
1042 	if (doinit() == -1)
1043 		return -1;
1044 	if (doconnect() == -1)
1045 		return -1;
1046 
1047 	error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
1048 	    0, false);
1049 	if (error) {
1050 		pthread_mutex_destroy(&clispc.spc_mtx);
1051 		pthread_cond_destroy(&clispc.spc_cv);
1052 		errno = error;
1053 		return -1;
1054 	}
1055 
1056 	return 0;
1057 }
1058 
1059 /*ARGSUSED*/
1060 void
1061 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
1062 {
1063 
1064 	/* EUNIMPL */
1065 }
1066 
1067 void
1068 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
1069 {
1070 
1071 	clispc = rpf->fork_spc;
1072 	holyfd = rpf->fork_holyfd;
1073 }
1074 
1075 void
1076 rumpclient_setconnretry(time_t timeout)
1077 {
1078 
1079 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
1080 		return; /* gigo */
1081 
1082 	retrytimo = timeout;
1083 }
1084 
1085 int
1086 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
1087 {
1088 	int fd = *fdp;
1089 	int untilfd, rv;
1090 	int newfd;
1091 
1092 	switch (variant) {
1093 	case RUMPCLIENT_CLOSE_FCLOSEM:
1094 		untilfd = MAX(clispc.spc_fd, holyfd);
1095 		for (; fd <= untilfd; fd++) {
1096 			if (fd == clispc.spc_fd || fd == holyfd)
1097 				continue;
1098 			rv = host_close(fd);
1099 			if (rv == -1)
1100 				return -1;
1101 		}
1102 		*fdp = fd;
1103 		break;
1104 
1105 	case RUMPCLIENT_CLOSE_CLOSE:
1106 	case RUMPCLIENT_CLOSE_DUP2:
1107 		if (fd == clispc.spc_fd) {
1108 			newfd = dupgood(clispc.spc_fd, 1);
1109 			if (newfd == -1)
1110 				return -1;
1111 
1112 #ifdef USE_KQUEUE
1113 			{
1114 			struct kevent kev[2];
1115 
1116 			/*
1117 			 * now, we have a new socket number, so change
1118 			 * the file descriptor that kqueue is
1119 			 * monitoring.  remove old and add new.
1120 			 */
1121 			EV_SET(&kev[0], clispc.spc_fd,
1122 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
1123 			EV_SET(&kev[1], newfd,
1124 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1125 			if (host_kevent(holyfd, kev, 2, NULL, 0, NULL) == -1) {
1126 				int sverrno = errno;
1127 				host_close(newfd);
1128 				errno = sverrno;
1129 				return -1;
1130 			}}
1131 #endif /* !USE_KQUEUE */
1132 			clispc.spc_fd = newfd;
1133 		}
1134 		if (holyfd != -1 && fd == holyfd) {
1135 			newfd = dupgood(holyfd, 1);
1136 			if (newfd == -1)
1137 				return -1;
1138 			holyfd = newfd;
1139 		}
1140 		break;
1141 	}
1142 
1143 	return 0;
1144 }
1145 
1146 pid_t
1147 rumpclient_fork(void)
1148 {
1149 
1150 	return rumpclient__dofork(fork);
1151 }
1152 
1153 /*
1154  * Process is about to exec.  Save info about our existing connection
1155  * in the env.  rumpclient will check for this info in init().
1156  * This is mostly for the benefit of rumphijack, but regular applications
1157  * may use it as well.
1158  */
1159 int
1160 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
1161 {
1162 	char buf[4096];
1163 	char **newenv;
1164 	char *envstr, *envstr2;
1165 	size_t nelem;
1166 	int rv, sverrno;
1167 
1168 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
1169 	    clispc.spc_fd, holyfd);
1170 	envstr = malloc(strlen(buf)+1);
1171 	if (envstr == NULL) {
1172 		return ENOMEM;
1173 	}
1174 	strcpy(envstr, buf);
1175 
1176 	/* do we have a fully parsed url we want to forward in the env? */
1177 	if (*parsedurl != '\0') {
1178 		snprintf(buf, sizeof(buf),
1179 		    "RUMP__PARSEDSERVER=%s", parsedurl);
1180 		envstr2 = malloc(strlen(buf)+1);
1181 		if (envstr2 == NULL) {
1182 			free(envstr);
1183 			return ENOMEM;
1184 		}
1185 		strcpy(envstr2, buf);
1186 	} else {
1187 		envstr2 = NULL;
1188 	}
1189 
1190 	for (nelem = 0; envp && envp[nelem]; nelem++)
1191 		continue;
1192 
1193 	newenv = malloc(sizeof(*newenv) * (nelem+3));
1194 	if (newenv == NULL) {
1195 		free(envstr2);
1196 		free(envstr);
1197 		return ENOMEM;
1198 	}
1199 	memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1200 
1201 	newenv[nelem] = envstr;
1202 	newenv[nelem+1] = envstr2;
1203 	newenv[nelem+2] = NULL;
1204 
1205 	rv = host_execve(path, argv, newenv);
1206 
1207 	_DIAGASSERT(rv != 0);
1208 	sverrno = errno;
1209 	free(envstr2);
1210 	free(envstr);
1211 	free(newenv);
1212 	errno = sverrno;
1213 	return rv;
1214 }
1215 
1216 /*
1217  * daemon() is handwritten for the benefit of platforms which
1218  * do not support daemon().
1219  */
1220 int
1221 rumpclient_daemon(int nochdir, int noclose)
1222 {
1223 	struct rumpclient_fork *rf;
1224 	int sverrno;
1225 
1226 	if ((rf = rumpclient_prefork()) == NULL)
1227 		return -1;
1228 
1229 	switch (fork()) {
1230 	case 0:
1231 		break;
1232 	case -1:
1233 		goto daemonerr;
1234 	default:
1235 		_exit(0);
1236 	}
1237 
1238 	if (setsid() == -1)
1239 		goto daemonerr;
1240 	if (!nochdir && chdir("/") == -1)
1241 		goto daemonerr;
1242 	if (!noclose) {
1243 		int fd = open("/dev/null", O_RDWR);
1244 		dup2(fd, 0);
1245 		dup2(fd, 1);
1246 		dup2(fd, 2);
1247 		if (fd > 2)
1248 			close(fd);
1249 	}
1250 
1251 	/* note: fork is either completed or cancelled by the call */
1252 	if (rumpclient_fork_init(rf) == -1)
1253 		return -1;
1254 
1255 	return 0;
1256 
1257  daemonerr:
1258 	sverrno = errno;
1259 	rumpclient_fork_cancel(rf);
1260 	errno = sverrno;
1261 	return -1;
1262 }
1263