xref: /netbsd-src/lib/librumpuser/rumpuser_sp.c (revision 4817a0b0b8fe9612e8ebe21a9bf2d97b95038a97)
1 /*      $NetBSD: rumpuser_sp.c,v 1.27 2010/12/16 17:05:44 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2010 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Sysproxy routines.  This provides system RPC support over host sockets.
30  * The most notable limitation is that the client and server must share
31  * the same ABI.  This does not mean that they have to be the same
32  * machine or that they need to run the same version of the host OS,
33  * just that they must agree on the data structures.  This even *might*
34  * work correctly from one hardware architecture to another.
35  */
36 
37 #include <sys/cdefs.h>
38 __RCSID("$NetBSD: rumpuser_sp.c,v 1.27 2010/12/16 17:05:44 pooka Exp $");
39 
40 #include <sys/types.h>
41 #include <sys/atomic.h>
42 #include <sys/mman.h>
43 #include <sys/socket.h>
44 
45 #include <arpa/inet.h>
46 #include <netinet/in.h>
47 #include <netinet/tcp.h>
48 
49 #include <assert.h>
50 #include <errno.h>
51 #include <fcntl.h>
52 #include <poll.h>
53 #include <pthread.h>
54 #include <stdarg.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59 
60 #include <rump/rumpuser.h>
61 #include "rumpuser_int.h"
62 
63 #include "sp_common.c"
64 
65 #ifndef MAXCLI
66 #define MAXCLI 256
67 #endif
68 #ifndef MAXWORKER
69 #define MAXWORKER 128
70 #endif
71 #ifndef IDLEWORKER
72 #define IDLEWORKER 16
73 #endif
74 int rumpsp_maxworker = MAXWORKER;
75 int rumpsp_idleworker = IDLEWORKER;
76 
77 static struct pollfd pfdlist[MAXCLI];
78 static struct spclient spclist[MAXCLI];
79 static unsigned int disco;
80 static volatile int spfini;
81 
82 static struct rumpuser_sp_ops spops;
83 
84 static char banner[MAXBANNER];
85 
86 #define PROTOMAJOR 0
87 #define PROTOMINOR 0
88 
89 /*
90  * Manual wrappers, since librump does not have access to the
91  * user namespace wrapped interfaces.
92  */
93 
94 static void
95 lwproc_switch(struct lwp *l)
96 {
97 
98 	spops.spop_schedule();
99 	spops.spop_lwproc_switch(l);
100 	spops.spop_unschedule();
101 }
102 
103 static void
104 lwproc_release(void)
105 {
106 
107 	spops.spop_schedule();
108 	spops.spop_lwproc_release();
109 	spops.spop_unschedule();
110 }
111 
112 static int
113 lwproc_newproc(struct spclient *spc)
114 {
115 	int rv;
116 
117 	spops.spop_schedule();
118 	rv = spops.spop_lwproc_newproc(spc);
119 	spops.spop_unschedule();
120 
121 	return rv;
122 }
123 
124 static int
125 lwproc_newlwp(pid_t pid)
126 {
127 	int rv;
128 
129 	spops.spop_schedule();
130 	rv = spops.spop_lwproc_newlwp(pid);
131 	spops.spop_unschedule();
132 
133 	return rv;
134 }
135 
136 static struct lwp *
137 lwproc_curlwp(void)
138 {
139 	struct lwp *l;
140 
141 	spops.spop_schedule();
142 	l = spops.spop_lwproc_curlwp();
143 	spops.spop_unschedule();
144 
145 	return l;
146 }
147 
148 static pid_t
149 lwproc_getpid(void)
150 {
151 	pid_t p;
152 
153 	spops.spop_schedule();
154 	p = spops.spop_getpid();
155 	spops.spop_unschedule();
156 
157 	return p;
158 }
159 
160 static int
161 rumpsyscall(int sysnum, void *data, register_t *retval)
162 {
163 	int rv;
164 
165 	spops.spop_schedule();
166 	rv = spops.spop_syscall(sysnum, data, retval);
167 	spops.spop_unschedule();
168 
169 	return rv;
170 }
171 
172 static uint64_t
173 nextreq(struct spclient *spc)
174 {
175 	uint64_t nw;
176 
177 	pthread_mutex_lock(&spc->spc_mtx);
178 	nw = spc->spc_nextreq++;
179 	pthread_mutex_unlock(&spc->spc_mtx);
180 
181 	return nw;
182 }
183 
184 static void
185 send_error_resp(struct spclient *spc, uint64_t reqno, int error)
186 {
187 	struct rsp_hdr rhdr;
188 
189 	rhdr.rsp_len = sizeof(rhdr);
190 	rhdr.rsp_reqno = reqno;
191 	rhdr.rsp_class = RUMPSP_ERROR;
192 	rhdr.rsp_type = 0;
193 	rhdr.rsp_error = error;
194 
195 	sendlock(spc);
196 	(void)dosend(spc, &rhdr, sizeof(rhdr));
197 	sendunlock(spc);
198 }
199 
200 static int
201 send_handshake_resp(struct spclient *spc, uint64_t reqno, int error)
202 {
203 	struct rsp_hdr rhdr;
204 	int rv;
205 
206 	rhdr.rsp_len = sizeof(rhdr) + sizeof(error);
207 	rhdr.rsp_reqno = reqno;
208 	rhdr.rsp_class = RUMPSP_RESP;
209 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
210 	rhdr.rsp_error = 0;
211 
212 	sendlock(spc);
213 	rv = dosend(spc, &rhdr, sizeof(rhdr));
214 	rv = dosend(spc, &error, sizeof(error));
215 	sendunlock(spc);
216 
217 	return rv;
218 }
219 
220 static int
221 send_syscall_resp(struct spclient *spc, uint64_t reqno, int error,
222 	register_t *retval)
223 {
224 	struct rsp_hdr rhdr;
225 	struct rsp_sysresp sysresp;
226 	int rv;
227 
228 	rhdr.rsp_len = sizeof(rhdr) + sizeof(sysresp);
229 	rhdr.rsp_reqno = reqno;
230 	rhdr.rsp_class = RUMPSP_RESP;
231 	rhdr.rsp_type = RUMPSP_SYSCALL;
232 	rhdr.rsp_sysnum = 0;
233 
234 	sysresp.rsys_error = error;
235 	memcpy(sysresp.rsys_retval, retval, sizeof(sysresp.rsys_retval));
236 
237 	sendlock(spc);
238 	rv = dosend(spc, &rhdr, sizeof(rhdr));
239 	rv = dosend(spc, &sysresp, sizeof(sysresp));
240 	sendunlock(spc);
241 
242 	return rv;
243 }
244 
245 static int
246 copyin_req(struct spclient *spc, const void *remaddr, size_t *dlen,
247 	int wantstr, void **resp)
248 {
249 	struct rsp_hdr rhdr;
250 	struct rsp_copydata copydata;
251 	struct respwait rw;
252 	int rv;
253 
254 	DPRINTF(("copyin_req: %zu bytes from %p\n", *dlen, remaddr));
255 
256 	rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata);
257 	rhdr.rsp_class = RUMPSP_REQ;
258 	if (wantstr)
259 		rhdr.rsp_type = RUMPSP_COPYINSTR;
260 	else
261 		rhdr.rsp_type = RUMPSP_COPYIN;
262 	rhdr.rsp_sysnum = 0;
263 
264 	copydata.rcp_addr = __UNCONST(remaddr);
265 	copydata.rcp_len = *dlen;
266 
267 	putwait(spc, &rw, &rhdr);
268 	rv = dosend(spc, &rhdr, sizeof(rhdr));
269 	rv = dosend(spc, &copydata, sizeof(copydata));
270 	if (rv) {
271 		unputwait(spc, &rw);
272 		return rv;
273 	}
274 
275 	rv = waitresp(spc, &rw);
276 
277 	DPRINTF(("copyin: response %d\n", rv));
278 
279 	*resp = rw.rw_data;
280 	if (wantstr)
281 		*dlen = rw.rw_dlen;
282 
283 	return rv;
284 
285 }
286 
287 static int
288 send_copyout_req(struct spclient *spc, const void *remaddr,
289 	const void *data, size_t dlen)
290 {
291 	struct rsp_hdr rhdr;
292 	struct rsp_copydata copydata;
293 	int rv;
294 
295 	DPRINTF(("copyout_req (async): %zu bytes to %p\n", dlen, remaddr));
296 
297 	rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata) + dlen;
298 	rhdr.rsp_reqno = nextreq(spc);
299 	rhdr.rsp_class = RUMPSP_REQ;
300 	rhdr.rsp_type = RUMPSP_COPYOUT;
301 	rhdr.rsp_sysnum = 0;
302 
303 	copydata.rcp_addr = __UNCONST(remaddr);
304 	copydata.rcp_len = dlen;
305 
306 	sendlock(spc);
307 	rv = dosend(spc, &rhdr, sizeof(rhdr));
308 	rv = dosend(spc, &copydata, sizeof(copydata));
309 	rv = dosend(spc, data, dlen);
310 	sendunlock(spc);
311 
312 	return rv;
313 }
314 
315 static int
316 anonmmap_req(struct spclient *spc, size_t howmuch, void **resp)
317 {
318 	struct rsp_hdr rhdr;
319 	struct respwait rw;
320 	int rv;
321 
322 	DPRINTF(("anonmmap_req: %zu bytes\n", howmuch));
323 
324 	rhdr.rsp_len = sizeof(rhdr) + sizeof(howmuch);
325 	rhdr.rsp_class = RUMPSP_REQ;
326 	rhdr.rsp_type = RUMPSP_ANONMMAP;
327 	rhdr.rsp_sysnum = 0;
328 
329 	putwait(spc, &rw, &rhdr);
330 	rv = dosend(spc, &rhdr, sizeof(rhdr));
331 	rv = dosend(spc, &howmuch, sizeof(howmuch));
332 	if (rv) {
333 		unputwait(spc, &rw);
334 		return rv;
335 	}
336 
337 	rv = waitresp(spc, &rw);
338 
339 	*resp = rw.rw_data;
340 
341 	DPRINTF(("anonmmap: mapped at %p\n", **(void ***)resp));
342 
343 	return rv;
344 }
345 
346 static void
347 spcref(struct spclient *spc)
348 {
349 
350 	pthread_mutex_lock(&spc->spc_mtx);
351 	spc->spc_refcnt++;
352 	pthread_mutex_unlock(&spc->spc_mtx);
353 }
354 
355 static void
356 spcrelease(struct spclient *spc)
357 {
358 	int ref;
359 
360 	pthread_mutex_lock(&spc->spc_mtx);
361 	ref = --spc->spc_refcnt;
362 	pthread_mutex_unlock(&spc->spc_mtx);
363 
364 	if (ref > 0)
365 		return;
366 
367 	DPRINTF(("spcrelease: spc %p fd %d\n", spc, spc->spc_fd));
368 
369 	_DIAGASSERT(TAILQ_EMPTY(&spc->spc_respwait));
370 	_DIAGASSERT(spc->spc_buf == NULL);
371 
372 	lwproc_switch(spc->spc_mainlwp);
373 	lwproc_release();
374 	spc->spc_mainlwp = NULL;
375 
376 	close(spc->spc_fd);
377 	spc->spc_fd = -1;
378 	spc->spc_state = SPCSTATE_NEW;
379 
380 	atomic_inc_uint(&disco);
381 }
382 
383 static void
384 serv_handledisco(unsigned int idx)
385 {
386 	struct spclient *spc = &spclist[idx];
387 
388 	DPRINTF(("rump_sp: disconnecting [%u]\n", idx));
389 
390 	pfdlist[idx].fd = -1;
391 	pfdlist[idx].revents = 0;
392 	pthread_mutex_lock(&spc->spc_mtx);
393 	spc->spc_state = SPCSTATE_DYING;
394 	kickall(spc);
395 	pthread_mutex_unlock(&spc->spc_mtx);
396 
397 	/*
398 	 * Nobody's going to attempt to send/receive anymore,
399 	 * so reinit info relevant to that.
400 	 */
401 	/*LINTED:pointer casts may be ok*/
402 	memset((char *)spc + SPC_ZEROFF, 0, sizeof(*spc) - SPC_ZEROFF);
403 
404 	spcrelease(spc);
405 }
406 
407 static void
408 serv_shutdown(void)
409 {
410 	struct spclient *spc;
411 	unsigned int i;
412 
413 	for (i = 1; i < MAXCLI; i++) {
414 		spc = &spclist[i];
415 		if (spc->spc_fd == -1)
416 			continue;
417 
418 		shutdown(spc->spc_fd, SHUT_RDWR);
419 		serv_handledisco(i);
420 
421 		spcrelease(spc);
422 	}
423 }
424 
425 static unsigned
426 serv_handleconn(int fd, connecthook_fn connhook, int busy)
427 {
428 	struct sockaddr_storage ss;
429 	socklen_t sl = sizeof(ss);
430 	int newfd, flags;
431 	unsigned i;
432 
433 	/*LINTED: cast ok */
434 	newfd = accept(fd, (struct sockaddr *)&ss, &sl);
435 	if (newfd == -1)
436 		return 0;
437 
438 	if (busy) {
439 		close(newfd); /* EBUSY */
440 		return 0;
441 	}
442 
443 	flags = fcntl(newfd, F_GETFL, 0);
444 	if (fcntl(newfd, F_SETFL, flags | O_NONBLOCK) == -1) {
445 		close(newfd);
446 		return 0;
447 	}
448 
449 	if (connhook(newfd) != 0) {
450 		close(newfd);
451 		return 0;
452 	}
453 
454 	/* write out a banner for the client */
455 	if (write(newfd, banner, strlen(banner)) != (ssize_t)strlen(banner)) {
456 		close(newfd);
457 		return 0;
458 	}
459 
460 	/* find empty slot the simple way */
461 	for (i = 0; i < MAXCLI; i++) {
462 		if (pfdlist[i].fd == -1 && spclist[i].spc_state == SPCSTATE_NEW)
463 			break;
464 	}
465 
466 	if (lwproc_newproc(&spclist[i]) != 0) {
467 		close(newfd);
468 		return 0;
469 	}
470 
471 	assert(i < MAXCLI);
472 
473 	pfdlist[i].fd = newfd;
474 	spclist[i].spc_fd = newfd;
475 	spclist[i].spc_mainlwp = lwproc_curlwp();
476 	spclist[i].spc_istatus = SPCSTATUS_BUSY; /* dedicated receiver */
477 	spclist[i].spc_pid = lwproc_getpid();
478 	spclist[i].spc_refcnt = 1;
479 
480 	TAILQ_INIT(&spclist[i].spc_respwait);
481 
482 	DPRINTF(("rump_sp: added new connection fd %d at idx %u, pid %d\n",
483 	    newfd, i, lwproc_getpid()));
484 
485 	lwproc_switch(NULL);
486 
487 	return i;
488 }
489 
490 static void
491 serv_handlesyscall(struct spclient *spc, struct rsp_hdr *rhdr, uint8_t *data)
492 {
493 	register_t retval[2] = {0, 0};
494 	int rv, sysnum;
495 
496 	sysnum = (int)rhdr->rsp_sysnum;
497 	DPRINTF(("rump_sp: handling syscall %d from client %d\n",
498 	    sysnum, 0));
499 
500 	lwproc_newlwp(spc->spc_pid);
501 	rv = rumpsyscall(sysnum, data, retval);
502 	lwproc_release();
503 
504 	DPRINTF(("rump_sp: got return value %d & %d/%d\n",
505 	    rv, retval[0], retval[1]));
506 
507 	send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval);
508 }
509 
510 struct sysbouncearg {
511 	struct spclient *sba_spc;
512 	struct rsp_hdr sba_hdr;
513 	uint8_t *sba_data;
514 
515 	TAILQ_ENTRY(sysbouncearg) sba_entries;
516 };
517 static pthread_mutex_t sbamtx;
518 static pthread_cond_t sbacv;
519 static int nworker, idleworker;
520 static TAILQ_HEAD(, sysbouncearg) syslist = TAILQ_HEAD_INITIALIZER(syslist);
521 
522 /*ARGSUSED*/
523 static void *
524 serv_syscallbouncer(void *arg)
525 {
526 	struct sysbouncearg *sba;
527 
528 	for (;;) {
529 		pthread_mutex_lock(&sbamtx);
530 		if (idleworker >= rumpsp_idleworker) {
531 			nworker--;
532 			pthread_mutex_unlock(&sbamtx);
533 			break;
534 		}
535 		idleworker++;
536 		while (TAILQ_EMPTY(&syslist)) {
537 			pthread_cond_wait(&sbacv, &sbamtx);
538 		}
539 
540 		sba = TAILQ_FIRST(&syslist);
541 		TAILQ_REMOVE(&syslist, sba, sba_entries);
542 		idleworker--;
543 		pthread_mutex_unlock(&sbamtx);
544 
545 		serv_handlesyscall(sba->sba_spc,
546 		    &sba->sba_hdr, sba->sba_data);
547 		spcrelease(sba->sba_spc);
548 		free(sba->sba_data);
549 		free(sba);
550 	}
551 
552 	return NULL;
553 }
554 
555 static int
556 sp_copyin(void *arg, const void *raddr, void *laddr, size_t *len, int wantstr)
557 {
558 	struct spclient *spc = arg;
559 	void *rdata = NULL; /* XXXuninit */
560 	int rv, nlocks;
561 
562 	rumpuser__kunlock(0, &nlocks, NULL);
563 
564 	rv = copyin_req(spc, raddr, len, wantstr, &rdata);
565 	if (rv)
566 		goto out;
567 
568 	memcpy(laddr, rdata, *len);
569 	free(rdata);
570 
571  out:
572 	rumpuser__klock(nlocks, NULL);
573 	if (rv)
574 		return EFAULT;
575 	return 0;
576 }
577 
578 int
579 rumpuser_sp_copyin(void *arg, const void *raddr, void *laddr, size_t len)
580 {
581 
582 	return sp_copyin(arg, raddr, laddr, &len, 0);
583 }
584 
585 int
586 rumpuser_sp_copyinstr(void *arg, const void *raddr, void *laddr, size_t *len)
587 {
588 
589 	return sp_copyin(arg, raddr, laddr, len, 1);
590 }
591 
592 static int
593 sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
594 {
595 	struct spclient *spc = arg;
596 	int nlocks, rv;
597 
598 	rumpuser__kunlock(0, &nlocks, NULL);
599 	rv = send_copyout_req(spc, raddr, laddr, dlen);
600 	rumpuser__klock(nlocks, NULL);
601 
602 	if (rv)
603 		return EFAULT;
604 	return 0;
605 }
606 
607 int
608 rumpuser_sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
609 {
610 
611 	return sp_copyout(arg, laddr, raddr, dlen);
612 }
613 
614 int
615 rumpuser_sp_copyoutstr(void *arg, const void *laddr, void *raddr, size_t *dlen)
616 {
617 
618 	return sp_copyout(arg, laddr, raddr, *dlen);
619 }
620 
621 int
622 rumpuser_sp_anonmmap(void *arg, size_t howmuch, void **addr)
623 {
624 	struct spclient *spc = arg;
625 	void *resp, *rdata;
626 	int nlocks, rv;
627 
628 	rumpuser__kunlock(0, &nlocks, NULL);
629 
630 	rv = anonmmap_req(spc, howmuch, &rdata);
631 	if (rv) {
632 		rv = EFAULT;
633 		goto out;
634 	}
635 
636 	resp = *(void **)rdata;
637 	free(rdata);
638 
639 	if (resp == NULL) {
640 		rv = ENOMEM;
641 	}
642 
643 	*addr = resp;
644 
645  out:
646 	rumpuser__klock(nlocks, NULL);
647 
648 	if (rv)
649 		return rv;
650 	return 0;
651 }
652 
653 /*
654  *
655  * Startup routines and mainloop for server.
656  *
657  */
658 
659 struct spservarg {
660 	int sps_sock;
661 	connecthook_fn sps_connhook;
662 };
663 
664 static pthread_attr_t pattr_detached;
665 static void
666 handlereq(struct spclient *spc)
667 {
668 	struct sysbouncearg *sba;
669 	pthread_t pt;
670 	int retries, rv;
671 
672 	if (__predict_false(spc->spc_state == SPCSTATE_NEW)) {
673 		if (spc->spc_hdr.rsp_type != RUMPSP_HANDSHAKE) {
674 			send_error_resp(spc, spc->spc_hdr.rsp_reqno, EAUTH);
675 			spcfreebuf(spc);
676 			return;
677 		}
678 
679 		rv = send_handshake_resp(spc, spc->spc_hdr.rsp_reqno, 0);
680 		spcfreebuf(spc);
681 		if (rv) {
682 			shutdown(spc->spc_fd, SHUT_RDWR);
683 			return;
684 		}
685 		spc->spc_state = SPCSTATE_RUNNING;
686 		return;
687 	}
688 
689 	if (__predict_false(spc->spc_hdr.rsp_type != RUMPSP_SYSCALL)) {
690 		send_error_resp(spc, spc->spc_hdr.rsp_reqno, EINVAL);
691 		spcfreebuf(spc);
692 		return;
693 	}
694 
695 	retries = 0;
696 	while ((sba = malloc(sizeof(*sba))) == NULL) {
697 		if (nworker == 0 || retries > 10) {
698 			send_error_resp(spc, spc->spc_hdr.rsp_reqno, EAGAIN);
699 			spcfreebuf(spc);
700 			return;
701 		}
702 		/* slim chance of more memory? */
703 		usleep(10000);
704 	}
705 
706 	sba->sba_spc = spc;
707 	sba->sba_hdr = spc->spc_hdr;
708 	sba->sba_data = spc->spc_buf;
709 	spcresetbuf(spc);
710 
711 	spcref(spc);
712 
713 	pthread_mutex_lock(&sbamtx);
714 	TAILQ_INSERT_TAIL(&syslist, sba, sba_entries);
715 	if (idleworker > 0) {
716 		/* do we have a daemon's tool (i.e. idle threads)? */
717 		pthread_cond_signal(&sbacv);
718 	} else if (nworker < rumpsp_maxworker) {
719 		/*
720 		 * Else, need to create one
721 		 * (if we can, otherwise just expect another
722 		 * worker to pick up the syscall)
723 		 */
724 		if (pthread_create(&pt, &pattr_detached,
725 		    serv_syscallbouncer, NULL) == 0)
726 			nworker++;
727 	}
728 	pthread_mutex_unlock(&sbamtx);
729 }
730 
731 static void *
732 spserver(void *arg)
733 {
734 	struct spservarg *sarg = arg;
735 	struct spclient *spc;
736 	unsigned idx;
737 	int seen;
738 	int rv;
739 	unsigned int nfds, maxidx;
740 
741 	for (idx = 0; idx < MAXCLI; idx++) {
742 		pfdlist[idx].fd = -1;
743 		pfdlist[idx].events = POLLIN;
744 
745 		spc = &spclist[idx];
746 		pthread_mutex_init(&spc->spc_mtx, NULL);
747 		pthread_cond_init(&spc->spc_cv, NULL);
748 		spc->spc_fd = -1;
749 	}
750 	pfdlist[0].fd = spclist[0].spc_fd = sarg->sps_sock;
751 	pfdlist[0].events = POLLIN;
752 	nfds = 1;
753 	maxidx = 0;
754 
755 	pthread_attr_init(&pattr_detached);
756 	pthread_attr_setdetachstate(&pattr_detached, PTHREAD_CREATE_DETACHED);
757 	/* XXX: doesn't stacksize currently work on NetBSD */
758 	pthread_attr_setstacksize(&pattr_detached, 32*1024);
759 
760 	pthread_mutex_init(&sbamtx, NULL);
761 	pthread_cond_init(&sbacv, NULL);
762 
763 	DPRINTF(("rump_sp: server mainloop\n"));
764 
765 	for (;;) {
766 		int discoed;
767 
768 		/* g/c hangarounds (eventually) */
769 		discoed = atomic_swap_uint(&disco, 0);
770 		while (discoed--) {
771 			nfds--;
772 			idx = maxidx;
773 			while (idx) {
774 				if (pfdlist[idx].fd != -1) {
775 					maxidx = idx;
776 					break;
777 				}
778 				idx--;
779 			}
780 			DPRINTF(("rump_sp: set maxidx to [%u]\n",
781 			    maxidx));
782 		}
783 
784 		DPRINTF(("rump_sp: loop nfd %d\n", maxidx+1));
785 		seen = 0;
786 		rv = poll(pfdlist, maxidx+1, INFTIM);
787 		assert(maxidx+1 <= MAXCLI);
788 		assert(rv != 0);
789 		if (rv == -1) {
790 			if (errno == EINTR)
791 				continue;
792 			fprintf(stderr, "rump_spserver: poll returned %d\n",
793 			    errno);
794 			break;
795 		}
796 
797 		for (idx = 0; seen < rv && idx < MAXCLI; idx++) {
798 			if ((pfdlist[idx].revents & POLLIN) == 0)
799 				continue;
800 
801 			seen++;
802 			DPRINTF(("rump_sp: activity at [%u] %d/%d\n",
803 			    idx, seen, rv));
804 			if (idx > 0) {
805 				spc = &spclist[idx];
806 				DPRINTF(("rump_sp: mainloop read [%u]\n", idx));
807 				switch (readframe(spc)) {
808 				case 0:
809 					break;
810 				case -1:
811 					serv_handledisco(idx);
812 					break;
813 				default:
814 					switch (spc->spc_hdr.rsp_class) {
815 					case RUMPSP_RESP:
816 						kickwaiter(spc);
817 						break;
818 					case RUMPSP_REQ:
819 						handlereq(spc);
820 						break;
821 					default:
822 						send_error_resp(spc,
823 						    spc->spc_hdr.rsp_reqno,
824 						    ENOENT);
825 						spcfreebuf(spc);
826 						break;
827 					}
828 					break;
829 				}
830 
831 			} else {
832 				DPRINTF(("rump_sp: mainloop new connection\n"));
833 
834 				if (__predict_false(spfini)) {
835 					close(spclist[0].spc_fd);
836 					serv_shutdown();
837 					goto out;
838 				}
839 
840 				idx = serv_handleconn(pfdlist[0].fd,
841 				    sarg->sps_connhook, nfds == MAXCLI);
842 				if (idx)
843 					nfds++;
844 				if (idx > maxidx)
845 					maxidx = idx;
846 				DPRINTF(("rump_sp: maxid now %d\n", maxidx));
847 			}
848 		}
849 	}
850 
851  out:
852 	return NULL;
853 }
854 
855 static unsigned cleanupidx;
856 static struct sockaddr *cleanupsa;
857 int
858 rumpuser_sp_init(const char *url, const struct rumpuser_sp_ops *spopsp,
859 	const char *ostype, const char *osrelease, const char *machine)
860 {
861 	pthread_t pt;
862 	struct spservarg *sarg;
863 	struct sockaddr *sap;
864 	char *p;
865 	unsigned idx;
866 	int error, s;
867 
868 	p = strdup(url);
869 	if (p == NULL)
870 		return ENOMEM;
871 	error = parseurl(p, &sap, &idx, 1);
872 	free(p);
873 	if (error)
874 		return error;
875 
876 	snprintf(banner, sizeof(banner), "RUMPSP-%d.%d-%s-%s/%s\n",
877 	    PROTOMAJOR, PROTOMINOR, ostype, osrelease, machine);
878 
879 	s = socket(parsetab[idx].domain, SOCK_STREAM, 0);
880 	if (s == -1)
881 		return errno;
882 
883 	spops = *spopsp;
884 	sarg = malloc(sizeof(*sarg));
885 	if (sarg == NULL) {
886 		close(s);
887 		return ENOMEM;
888 	}
889 
890 	sarg->sps_sock = s;
891 	sarg->sps_connhook = parsetab[idx].connhook;
892 
893 	cleanupidx = idx;
894 	cleanupsa = sap;
895 
896 	/* sloppy error recovery */
897 
898 	/*LINTED*/
899 	if (bind(s, sap, sap->sa_len) == -1) {
900 		fprintf(stderr, "rump_sp: server bind failed\n");
901 		return errno;
902 	}
903 
904 	if (listen(s, MAXCLI) == -1) {
905 		fprintf(stderr, "rump_sp: server listen failed\n");
906 		return errno;
907 	}
908 
909 	if ((error = pthread_create(&pt, NULL, spserver, sarg)) != 0) {
910 		fprintf(stderr, "rump_sp: cannot create wrkr thread\n");
911 		return errno;
912 	}
913 	pthread_detach(pt);
914 
915 	return 0;
916 }
917 
918 void
919 rumpuser_sp_fini()
920 {
921 
922 	if (spclist[0].spc_fd) {
923 		parsetab[cleanupidx].cleanup(cleanupsa);
924 		shutdown(spclist[0].spc_fd, SHUT_RDWR);
925 		spfini = 1;
926 	}
927 }
928