xref: /netbsd-src/lib/librumpuser/rumpfiber.c (revision f89f6560d453f5e37386cc7938c072d2f528b9fa)
1 /*	$NetBSD: rumpfiber.c,v 1.12 2015/02/15 00:54:32 justin Exp $	*/
2 
3 /*
4  * Copyright (c) 2007-2013 Antti Kantee.  All Rights Reserved.
5  * Copyright (c) 2014 Justin Cormack.  All Rights Reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
17  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /* Based partly on code from Xen Minios with the following license */
30 
31 /*
32  ****************************************************************************
33  * (C) 2005 - Grzegorz Milos - Intel Research Cambridge
34  ****************************************************************************
35  *
36  *        File: sched.c
37  *      Author: Grzegorz Milos
38  *     Changes: Robert Kaiser
39  *
40  *        Date: Aug 2005
41  *
42  * Environment: Xen Minimal OS
43  * Description: simple scheduler for Mini-Os
44  *
45  * The scheduler is non-preemptive (cooperative), and schedules according
46  * to Round Robin algorithm.
47  *
48  ****************************************************************************
49  * Permission is hereby granted, free of charge, to any person obtaining a copy
50  * of this software and associated documentation files (the "Software"), to
51  * deal in the Software without restriction, including without limitation the
52  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
53  * sell copies of the Software, and to permit persons to whom the Software is
54  * furnished to do so, subject to the following conditions:
55  *
56  * The above copyright notice and this permission notice shall be included in
57  * all copies or substantial portions of the Software.
58  *
59  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
60  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
61  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
62  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
63  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
64  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
65  * DEALINGS IN THE SOFTWARE.
66  */
67 
68 #include "rumpuser_port.h"
69 
70 #if !defined(lint)
71 __RCSID("$NetBSD: rumpfiber.c,v 1.12 2015/02/15 00:54:32 justin Exp $");
72 #endif /* !lint */
73 
74 #include <sys/mman.h>
75 #include <sys/time.h>
76 
77 #include <assert.h>
78 #include <errno.h>
79 #include <fcntl.h>
80 #include <signal.h>
81 #include <stdarg.h>
82 #include <stdint.h>
83 #include <stdio.h>
84 #include <stdlib.h>
85 #include <string.h>
86 #include <time.h>
87 #include <unistd.h>
88 
89 #include <rump/rumpuser.h>
90 
91 #include "rumpuser_int.h"
92 #include "rumpfiber.h"
93 
94 static void init_sched(void);
95 static void join_thread(struct thread *);
96 static void switch_threads(struct thread *prev, struct thread *next);
97 static struct thread *get_current(void);
98 static int64_t now(void);
99 static void msleep(uint64_t millisecs);
100 static void abssleep(uint64_t millisecs);
101 
102 TAILQ_HEAD(thread_list, thread);
103 
104 static struct thread_list exited_threads = TAILQ_HEAD_INITIALIZER(exited_threads);
105 static struct thread_list thread_list = TAILQ_HEAD_INITIALIZER(thread_list);
106 static struct thread *current_thread = NULL;
107 
108 static void (*scheduler_hook)(void *, void *);
109 
110 static void printk(const char *s);
111 
112 static void
113 printk(const char *msg)
114 {
115 	int ret __attribute__((unused));
116 
117 	ret = write(2, msg, strlen(msg));
118 }
119 
120 static struct thread *
121 get_current(void)
122 {
123 
124 	return current_thread;
125 }
126 
127 static int64_t
128 now(void)
129 {
130 	struct timespec ts;
131 	int rv;
132 
133 	rv = clock_gettime(CLOCK_MONOTONIC, &ts);
134 	assert(rv == 0);
135 	return (ts.tv_sec * 1000LL) + (ts.tv_nsec / 1000000LL);
136 }
137 
138 void
139 schedule(void)
140 {
141 	struct thread *prev, *next, *thread, *tmp;
142 	int64_t tm, wakeup;
143 	struct timespec sl;
144 
145 	prev = get_current();
146 
147 	do {
148 		tm = now();
149 		wakeup = tm + 1000; /* wake up in 1s max */
150 		next = NULL;
151 		TAILQ_FOREACH_SAFE(thread, &thread_list, thread_list, tmp) {
152 			if (!is_runnable(thread) && thread->wakeup_time >= 0) {
153 				if (thread->wakeup_time <= tm) {
154 					thread->flags |= THREAD_TIMEDOUT;
155 					wake(thread);
156 				} else if (thread->wakeup_time < wakeup)
157 					wakeup = thread->wakeup_time;
158 			}
159 			if (is_runnable(thread)) {
160 				next = thread;
161 				/* Put this thread on the end of the list */
162 				TAILQ_REMOVE(&thread_list, thread, thread_list);
163 				TAILQ_INSERT_TAIL(&thread_list, thread, thread_list);
164 				break;
165 			}
166 		}
167 		if (next)
168 			break;
169 		sl.tv_sec = (wakeup - tm) / 1000;
170 		sl.tv_nsec = ((wakeup - tm) - 1000 * sl.tv_sec) * 1000000;
171 #ifdef HAVE_CLOCK_NANOSLEEP
172 		clock_nanosleep(CLOCK_MONOTONIC, 0, &sl, NULL);
173 #else
174 		nanosleep(&sl, NULL);
175 #endif
176 	} while (1);
177 
178 	if (prev != next)
179 		switch_threads(prev, next);
180 
181 	TAILQ_FOREACH_SAFE(thread, &exited_threads, thread_list, tmp) {
182 		if (thread != prev) {
183 			TAILQ_REMOVE(&exited_threads, thread, thread_list);
184 			if ((thread->flags & THREAD_EXTSTACK) == 0)
185 				munmap(thread->ctx.uc_stack.ss_sp, STACKSIZE);
186 			free(thread->name);
187 			free(thread);
188 		}
189 	}
190 }
191 
192 static void
193 create_ctx(ucontext_t *ctx, void *stack, size_t stack_size,
194 	void (*f)(void *), void *data)
195 {
196 
197 	getcontext(ctx);
198 	ctx->uc_stack.ss_sp = stack;
199 	ctx->uc_stack.ss_size = stack_size;
200 	ctx->uc_stack.ss_flags = 0;
201 	ctx->uc_link = NULL; /* TODO may link to main thread */
202 	/* may have to do bounce function to call, if args to makecontext are ints */
203 	makecontext(ctx, (void (*)(void))f, 1, data);
204 }
205 
206 /* TODO see notes in rumpuser_thread_create, have flags here */
207 struct thread *
208 create_thread(const char *name, void *cookie, void (*f)(void *), void *data,
209 	void *stack, size_t stack_size)
210 {
211 	struct thread *thread = calloc(1, sizeof(struct thread));
212 
213 	if (!thread) {
214 		return NULL;
215 	}
216 
217 	if (!stack) {
218 		assert(stack_size == 0);
219 		stack = mmap(NULL, STACKSIZE, PROT_READ | PROT_WRITE,
220 		    MAP_SHARED | MAP_ANON, -1, 0);
221 		if (stack == MAP_FAILED) {
222 			free(thread);
223 			return NULL;
224 		}
225 		stack_size = STACKSIZE;
226 	} else {
227 		thread->flags = THREAD_EXTSTACK;
228 	}
229 	create_ctx(&thread->ctx, stack, stack_size, f, data);
230 
231 	thread->name = strdup(name);
232 	thread->cookie = cookie;
233 
234 	/* Not runnable, not exited, not sleeping */
235 	thread->wakeup_time = -1;
236 	thread->lwp = NULL;
237 	set_runnable(thread);
238 	TAILQ_INSERT_TAIL(&thread_list, thread, thread_list);
239 
240 	return thread;
241 }
242 
243 static void
244 switch_threads(struct thread *prev, struct thread *next)
245 {
246 	int ret;
247 
248 	current_thread = next;
249 	if (scheduler_hook)
250 		scheduler_hook(prev->cookie, next->cookie);
251 	ret = swapcontext(&prev->ctx, &next->ctx);
252 	if (ret < 0) {
253 		printk("swapcontext failed\n");
254 		abort();
255 	}
256 }
257 
258 struct join_waiter {
259     struct thread *jw_thread;
260     struct thread *jw_wanted;
261     TAILQ_ENTRY(join_waiter) jw_entries;
262 };
263 static TAILQ_HEAD(, join_waiter) joinwq = TAILQ_HEAD_INITIALIZER(joinwq);
264 
265 void
266 exit_thread(void)
267 {
268 	struct thread *thread = get_current();
269 	struct join_waiter *jw_iter;
270 
271 	/* if joinable, gate until we are allowed to exit */
272 	while (thread->flags & THREAD_MUSTJOIN) {
273 		thread->flags |= THREAD_JOINED;
274 
275 		/* see if the joiner is already there */
276 		TAILQ_FOREACH(jw_iter, &joinwq, jw_entries) {
277 			if (jw_iter->jw_wanted == thread) {
278 				wake(jw_iter->jw_thread);
279 				break;
280 			}
281 		}
282 		block(thread);
283 		schedule();
284 	}
285 
286 	/* Remove from the thread list */
287 	TAILQ_REMOVE(&thread_list, thread, thread_list);
288 	clear_runnable(thread);
289 	/* Put onto exited list */
290 	TAILQ_INSERT_HEAD(&exited_threads, thread, thread_list);
291 
292 	/* Schedule will free the resources */
293 	while (1) {
294 		schedule();
295 		printk("schedule() returned!  Trying again\n");
296 	}
297 }
298 
299 static void
300 join_thread(struct thread *joinable)
301 {
302 	struct join_waiter jw;
303 	struct thread *thread = get_current();
304 
305 	assert(joinable->flags & THREAD_MUSTJOIN);
306 
307 	/* wait for exiting thread to hit thread_exit() */
308 	while (! (joinable->flags & THREAD_JOINED)) {
309 
310 		jw.jw_thread = thread;
311 		jw.jw_wanted = joinable;
312 		TAILQ_INSERT_TAIL(&joinwq, &jw, jw_entries);
313 		block(thread);
314 		schedule();
315 		TAILQ_REMOVE(&joinwq, &jw, jw_entries);
316 	}
317 
318 	/* signal exiting thread that we have seen it and it may now exit */
319 	assert(joinable->flags & THREAD_JOINED);
320 	joinable->flags &= ~THREAD_MUSTJOIN;
321 
322 	wake(joinable);
323 }
324 
325 static void msleep(uint64_t millisecs)
326 {
327 	struct thread *thread = get_current();
328 
329 	thread->wakeup_time = now() + millisecs;
330 	clear_runnable(thread);
331 	schedule();
332 }
333 
334 static void abssleep(uint64_t millisecs)
335 {
336 	struct thread *thread = get_current();
337 
338 	thread->wakeup_time = millisecs;
339 	clear_runnable(thread);
340 	schedule();
341 }
342 
343 /* like abssleep, except against realtime clock instead of monotonic clock */
344 int abssleep_real(uint64_t millisecs)
345 {
346 	struct thread *thread = get_current();
347 	struct timespec ts;
348 	uint64_t real_now;
349 	int rv;
350 
351 	clock_gettime(CLOCK_REALTIME, &ts);
352 	real_now = 1000*ts.tv_sec + ts.tv_nsec/(1000*1000);
353 	thread->wakeup_time = now() + (millisecs - real_now);
354 
355 	clear_runnable(thread);
356 	schedule();
357 
358 	rv = !!(thread->flags & THREAD_TIMEDOUT);
359 	thread->flags &= ~THREAD_TIMEDOUT;
360 	return rv;
361 }
362 
363 void wake(struct thread *thread)
364 {
365 
366 	thread->wakeup_time = -1;
367 	set_runnable(thread);
368 }
369 
370 void block(struct thread *thread)
371 {
372 
373 	thread->wakeup_time = -1;
374 	clear_runnable(thread);
375 }
376 
377 int is_runnable(struct thread *thread)
378 {
379 
380 	return thread->flags & RUNNABLE_FLAG;
381 }
382 
383 void set_runnable(struct thread *thread)
384 {
385 
386 	thread->flags |= RUNNABLE_FLAG;
387 }
388 
389 void clear_runnable(struct thread *thread)
390 {
391 
392 	thread->flags &= ~RUNNABLE_FLAG;
393 }
394 
395 static void
396 init_sched(void)
397 {
398 	struct thread *thread = calloc(1, sizeof(struct thread));
399 
400 	if (!thread) {
401 		abort();
402 	}
403 
404 	thread->name = strdup("init");
405 	thread->flags = 0;
406 	thread->wakeup_time = -1;
407 	thread->lwp = NULL;
408 	set_runnable(thread);
409 	TAILQ_INSERT_TAIL(&thread_list, thread, thread_list);
410 	current_thread = thread;
411 }
412 
413 void
414 set_sched_hook(void (*f)(void *, void *))
415 {
416 
417 	scheduler_hook = f;
418 }
419 
420 struct thread *
421 init_mainthread(void *cookie)
422 {
423 
424 	current_thread->cookie = cookie;
425 	return current_thread;
426 }
427 
428 /* rump functions below */
429 
430 struct rumpuser_hyperup rumpuser__hyp;
431 
432 int
433 rumpuser_init(int version, const struct rumpuser_hyperup *hyp)
434 {
435 	int rv;
436 
437 	if (version != RUMPUSER_VERSION) {
438 		printk("rumpuser version mismatch\n");
439 		abort();
440 	}
441 
442 	rv = rumpuser__random_init();
443 	if (rv != 0) {
444 		ET(rv);
445 	}
446 
447 	rumpuser__hyp = *hyp;
448 
449 	init_sched();
450 
451 	return 0;
452 }
453 
454 int
455 rumpuser_clock_gettime(int enum_rumpclock, int64_t *sec, long *nsec)
456 {
457 	enum rumpclock rclk = enum_rumpclock;
458 	struct timespec ts;
459 	clockid_t clk;
460 	int rv;
461 
462 	switch (rclk) {
463 	case RUMPUSER_CLOCK_RELWALL:
464 		clk = CLOCK_REALTIME;
465 		break;
466 	case RUMPUSER_CLOCK_ABSMONO:
467 		clk = CLOCK_MONOTONIC;
468 		break;
469 	default:
470 		abort();
471 	}
472 
473 	if (clock_gettime(clk, &ts) == -1) {
474 		rv = errno;
475 	} else {
476 		*sec = ts.tv_sec;
477 		*nsec = ts.tv_nsec;
478 		rv = 0;
479 	}
480 
481 	ET(rv);
482 }
483 
484 int
485 rumpuser_clock_sleep(int enum_rumpclock, int64_t sec, long nsec)
486 {
487 	enum rumpclock rclk = enum_rumpclock;
488 	uint64_t msec;
489 	int nlocks;
490 
491 	rumpkern_unsched(&nlocks, NULL);
492 	switch (rclk) {
493 	case RUMPUSER_CLOCK_RELWALL:
494 		msec = sec * 1000 + nsec / (1000*1000UL);
495 		msleep(msec);
496 		break;
497 	case RUMPUSER_CLOCK_ABSMONO:
498 		msec = sec * 1000 + nsec / (1000*1000UL);
499 		abssleep(msec);
500 		break;
501 	}
502 	rumpkern_sched(nlocks, NULL);
503 
504 	return 0;
505 }
506 
507 int
508 rumpuser_getparam(const char *name, void *buf, size_t blen)
509 {
510 	int rv;
511 	const char *ncpu = "1";
512 
513 	if (strcmp(name, RUMPUSER_PARAM_NCPU) == 0) {
514 		strncpy(buf, ncpu, blen);
515 		rv = 0;
516 	} else if (strcmp(name, RUMPUSER_PARAM_HOSTNAME) == 0) {
517 		char tmp[MAXHOSTNAMELEN];
518 
519 		if (gethostname(tmp, sizeof(tmp)) == -1) {
520 			snprintf(buf, blen, "rump-%05d", (int)getpid());
521 		} else {
522 			snprintf(buf, blen, "rump-%05d.%s",
523 			    (int)getpid(), tmp);
524 		}
525 		rv = 0;
526 	} else if (*name == '_') {
527 		rv = EINVAL;
528 	} else {
529 		if (getenv_r(name, buf, blen) == -1)
530 			rv = errno;
531 		else
532 			rv = 0;
533 	}
534 
535 	ET(rv);
536 }
537 
538 void
539 rumpuser_putchar(int c)
540 {
541 
542 	putchar(c);
543 }
544 
545 __dead void
546 rumpuser_exit(int rv)
547 {
548 
549 	if (rv == RUMPUSER_PANIC)
550 		abort();
551 	else
552 		exit(rv);
553 }
554 
555 void
556 rumpuser_seterrno(int error)
557 {
558 
559 	errno = error;
560 }
561 
562 /*
563  * This is meant for safe debugging prints from the kernel.
564  */
565 void
566 rumpuser_dprintf(const char *format, ...)
567 {
568 	va_list ap;
569 
570 	va_start(ap, format);
571 	vfprintf(stderr, format, ap);
572 	va_end(ap);
573 }
574 
575 int
576 rumpuser_kill(int64_t pid, int rumpsig)
577 {
578 	int sig;
579 
580 	sig = rumpuser__sig_rump2host(rumpsig);
581 	if (sig > 0)
582 		raise(sig);
583 	return 0;
584 }
585 
586 /* thread functions */
587 
588 TAILQ_HEAD(waithead, waiter);
589 struct waiter {
590 	struct thread *who;
591 	TAILQ_ENTRY(waiter) entries;
592 	int onlist;
593 };
594 
595 static int
596 wait(struct waithead *wh, uint64_t msec)
597 {
598 	struct waiter w;
599 
600 	w.who = get_current();
601 	TAILQ_INSERT_TAIL(wh, &w, entries);
602 	w.onlist = 1;
603 	block(w.who);
604 	if (msec)
605 		w.who->wakeup_time = now() + msec;
606 	schedule();
607 
608 	/* woken up by timeout? */
609 	if (w.onlist)
610 		TAILQ_REMOVE(wh, &w, entries);
611 
612 	return w.onlist ? ETIMEDOUT : 0;
613 }
614 
615 static void
616 wakeup_one(struct waithead *wh)
617 {
618 	struct waiter *w;
619 
620 	if ((w = TAILQ_FIRST(wh)) != NULL) {
621 		TAILQ_REMOVE(wh, w, entries);
622 		w->onlist = 0;
623 		wake(w->who);
624 	}
625 }
626 
627 static void
628 wakeup_all(struct waithead *wh)
629 {
630 	struct waiter *w;
631 
632 	while ((w = TAILQ_FIRST(wh)) != NULL) {
633 		TAILQ_REMOVE(wh, w, entries);
634 		w->onlist = 0;
635 		wake(w->who);
636 	}
637 }
638 
639 int
640 rumpuser_thread_create(void *(*f)(void *), void *arg, const char *thrname,
641 	int joinable, int pri, int cpuidx, void **tptr)
642 {
643 	struct thread *thr;
644 
645 	thr = create_thread(thrname, NULL, (void (*)(void *))f, arg, NULL, 0);
646 
647 	if (!thr)
648 		return EINVAL;
649 
650 	/*
651 	 * XXX: should be supplied as a flag to create_thread() so as to
652 	 * _ensure_ it's set before the thread runs (and could exit).
653 	 * now we're trusting unclear semantics of create_thread()
654 	 */
655 	if (thr && joinable)
656 		thr->flags |= THREAD_MUSTJOIN;
657 
658 	*tptr = thr;
659 	return 0;
660 }
661 
662 void
663 rumpuser_thread_exit(void)
664 {
665 
666 	exit_thread();
667 }
668 
669 int
670 rumpuser_thread_join(void *p)
671 {
672 
673 	join_thread(p);
674 	return 0;
675 }
676 
677 struct rumpuser_mtx {
678 	struct waithead waiters;
679 	int v;
680 	int flags;
681 	struct lwp *o;
682 };
683 
684 void
685 rumpuser_mutex_init(struct rumpuser_mtx **mtxp, int flags)
686 {
687 	struct rumpuser_mtx *mtx;
688 
689 	mtx = malloc(sizeof(*mtx));
690 	memset(mtx, 0, sizeof(*mtx));
691 	mtx->flags = flags;
692 	TAILQ_INIT(&mtx->waiters);
693 	*mtxp = mtx;
694 }
695 
696 void
697 rumpuser_mutex_enter(struct rumpuser_mtx *mtx)
698 {
699 	int nlocks;
700 
701 	if (rumpuser_mutex_tryenter(mtx) != 0) {
702 		rumpkern_unsched(&nlocks, NULL);
703 		while (rumpuser_mutex_tryenter(mtx) != 0)
704 			wait(&mtx->waiters, 0);
705 		rumpkern_sched(nlocks, NULL);
706 	}
707 }
708 
709 void
710 rumpuser_mutex_enter_nowrap(struct rumpuser_mtx *mtx)
711 {
712 	int rv;
713 
714 	rv = rumpuser_mutex_tryenter(mtx);
715 	/* one VCPU supported, no preemption => must succeed */
716 	if (rv != 0) {
717 		printk("no voi ei\n");
718 	}
719 }
720 
721 int
722 rumpuser_mutex_tryenter(struct rumpuser_mtx *mtx)
723 {
724 	struct lwp *l = get_current()->lwp;
725 
726 	if (mtx->v && mtx->o != l)
727 		return EBUSY;
728 
729 	mtx->v++;
730 	mtx->o = l;
731 
732 	return 0;
733 }
734 
735 void
736 rumpuser_mutex_exit(struct rumpuser_mtx *mtx)
737 {
738 
739 	assert(mtx->v > 0);
740 	if (--mtx->v == 0) {
741 		mtx->o = NULL;
742 		wakeup_one(&mtx->waiters);
743 	}
744 }
745 
746 void
747 rumpuser_mutex_destroy(struct rumpuser_mtx *mtx)
748 {
749 
750 	assert(TAILQ_EMPTY(&mtx->waiters) && mtx->o == NULL);
751 	free(mtx);
752 }
753 
754 void
755 rumpuser_mutex_owner(struct rumpuser_mtx *mtx, struct lwp **lp)
756 {
757 
758 	*lp = mtx->o;
759 }
760 
761 struct rumpuser_rw {
762 	struct waithead rwait;
763 	struct waithead wwait;
764 	int v;
765 	struct lwp *o;
766 };
767 
768 void
769 rumpuser_rw_init(struct rumpuser_rw **rwp)
770 {
771 	struct rumpuser_rw *rw;
772 
773 	rw = malloc(sizeof(*rw));
774 	memset(rw, 0, sizeof(*rw));
775 	TAILQ_INIT(&rw->rwait);
776 	TAILQ_INIT(&rw->wwait);
777 
778 	*rwp = rw;
779 }
780 
781 void
782 rumpuser_rw_enter(int enum_rumprwlock, struct rumpuser_rw *rw)
783 {
784 	enum rumprwlock lk = enum_rumprwlock;
785 	struct waithead *w = NULL;
786 	int nlocks;
787 
788 	switch (lk) {
789 	case RUMPUSER_RW_WRITER:
790 		w = &rw->wwait;
791 		break;
792 	case RUMPUSER_RW_READER:
793 		w = &rw->rwait;
794 		break;
795 	}
796 
797 	if (rumpuser_rw_tryenter(enum_rumprwlock, rw) != 0) {
798 		rumpkern_unsched(&nlocks, NULL);
799 		while (rumpuser_rw_tryenter(enum_rumprwlock, rw) != 0)
800 			wait(w, 0);
801 		rumpkern_sched(nlocks, NULL);
802 	}
803 }
804 
805 int
806 rumpuser_rw_tryenter(int enum_rumprwlock, struct rumpuser_rw *rw)
807 {
808 	enum rumprwlock lk = enum_rumprwlock;
809 	int rv;
810 
811 	switch (lk) {
812 	case RUMPUSER_RW_WRITER:
813 		if (rw->o == NULL) {
814 			rw->o = rumpuser_curlwp();
815 			rv = 0;
816 		} else {
817 			rv = EBUSY;
818 		}
819 		break;
820 	case RUMPUSER_RW_READER:
821 		if (rw->o == NULL && TAILQ_EMPTY(&rw->wwait)) {
822 			rw->v++;
823 			rv = 0;
824 		} else {
825 			rv = EBUSY;
826 		}
827 		break;
828 	default:
829 		rv = EINVAL;
830 	}
831 
832 	return rv;
833 }
834 
835 void
836 rumpuser_rw_exit(struct rumpuser_rw *rw)
837 {
838 
839 	if (rw->o) {
840 		rw->o = NULL;
841 	} else {
842 		rw->v--;
843 	}
844 
845 	/* standard procedure, don't let readers starve out writers */
846 	if (!TAILQ_EMPTY(&rw->wwait)) {
847 		if (rw->o == NULL)
848 			wakeup_one(&rw->wwait);
849 	} else if (!TAILQ_EMPTY(&rw->rwait) && rw->o == NULL) {
850 		wakeup_all(&rw->rwait);
851 	}
852 }
853 
854 void
855 rumpuser_rw_destroy(struct rumpuser_rw *rw)
856 {
857 
858 	free(rw);
859 }
860 
861 void
862 rumpuser_rw_held(int enum_rumprwlock, struct rumpuser_rw *rw, int *rvp)
863 {
864 	enum rumprwlock lk = enum_rumprwlock;
865 
866 	switch (lk) {
867 	case RUMPUSER_RW_WRITER:
868 		*rvp = rw->o == rumpuser_curlwp();
869 		break;
870 	case RUMPUSER_RW_READER:
871 		*rvp = rw->v > 0;
872 		break;
873 	}
874 }
875 
876 void
877 rumpuser_rw_downgrade(struct rumpuser_rw *rw)
878 {
879 
880 	assert(rw->o == rumpuser_curlwp());
881 	rw->v = -1;
882 }
883 
884 int
885 rumpuser_rw_tryupgrade(struct rumpuser_rw *rw)
886 {
887 
888 	if (rw->v == -1) {
889 		rw->v = 1;
890 		rw->o = rumpuser_curlwp();
891 		return 0;
892 	}
893 
894 	return EBUSY;
895 }
896 
897 struct rumpuser_cv {
898 	struct waithead waiters;
899 	int nwaiters;
900 };
901 
902 void
903 rumpuser_cv_init(struct rumpuser_cv **cvp)
904 {
905 	struct rumpuser_cv *cv;
906 
907 	cv = malloc(sizeof(*cv));
908 	memset(cv, 0, sizeof(*cv));
909 	TAILQ_INIT(&cv->waiters);
910 	*cvp = cv;
911 }
912 
913 void
914 rumpuser_cv_destroy(struct rumpuser_cv *cv)
915 {
916 
917 	assert(cv->nwaiters == 0);
918 	free(cv);
919 }
920 
921 static void
922 cv_unsched(struct rumpuser_mtx *mtx, int *nlocks)
923 {
924 
925 	rumpkern_unsched(nlocks, mtx);
926 	rumpuser_mutex_exit(mtx);
927 }
928 
929 static void
930 cv_resched(struct rumpuser_mtx *mtx, int nlocks)
931 {
932 
933 	/* see rumpuser(3) */
934 	if ((mtx->flags & (RUMPUSER_MTX_KMUTEX | RUMPUSER_MTX_SPIN)) ==
935 	    (RUMPUSER_MTX_KMUTEX | RUMPUSER_MTX_SPIN)) {
936 		rumpkern_sched(nlocks, mtx);
937 		rumpuser_mutex_enter_nowrap(mtx);
938 	} else {
939 		rumpuser_mutex_enter_nowrap(mtx);
940 		rumpkern_sched(nlocks, mtx);
941 	}
942 }
943 
944 void
945 rumpuser_cv_wait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx)
946 {
947 	int nlocks;
948 
949 	cv->nwaiters++;
950 	cv_unsched(mtx, &nlocks);
951 	wait(&cv->waiters, 0);
952 	cv_resched(mtx, nlocks);
953 	cv->nwaiters--;
954 }
955 
956 void
957 rumpuser_cv_wait_nowrap(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx)
958 {
959 
960 	cv->nwaiters++;
961 	rumpuser_mutex_exit(mtx);
962 	wait(&cv->waiters, 0);
963 	rumpuser_mutex_enter_nowrap(mtx);
964 	cv->nwaiters--;
965 }
966 
967 int
968 rumpuser_cv_timedwait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx,
969 	int64_t sec, int64_t nsec)
970 {
971 	int nlocks;
972 	int rv;
973 
974 	cv->nwaiters++;
975 	cv_unsched(mtx, &nlocks);
976 	rv = wait(&cv->waiters, sec * 1000 + nsec / (1000*1000));
977 	cv_resched(mtx, nlocks);
978 	cv->nwaiters--;
979 
980 	return rv;
981 }
982 
983 void
984 rumpuser_cv_signal(struct rumpuser_cv *cv)
985 {
986 
987 	wakeup_one(&cv->waiters);
988 }
989 
990 void
991 rumpuser_cv_broadcast(struct rumpuser_cv *cv)
992 {
993 
994 	wakeup_all(&cv->waiters);
995 }
996 
997 void
998 rumpuser_cv_has_waiters(struct rumpuser_cv *cv, int *rvp)
999 {
1000 
1001 	*rvp = cv->nwaiters != 0;
1002 }
1003 
1004 /*
1005  * curlwp
1006  */
1007 
1008 void
1009 rumpuser_curlwpop(int enum_rumplwpop, struct lwp *l)
1010 {
1011 	struct thread *thread;
1012 	enum rumplwpop op = enum_rumplwpop;
1013 
1014 	switch (op) {
1015 	case RUMPUSER_LWP_CREATE:
1016 	case RUMPUSER_LWP_DESTROY:
1017 		break;
1018 	case RUMPUSER_LWP_SET:
1019 		thread = get_current();
1020 		thread->lwp = l;
1021 		break;
1022 	case RUMPUSER_LWP_CLEAR:
1023 		thread = get_current();
1024 		assert(thread->lwp == l);
1025 		thread->lwp = NULL;
1026 		break;
1027 	}
1028 }
1029 
1030 struct lwp *
1031 rumpuser_curlwp(void)
1032 {
1033 
1034 	return get_current()->lwp;
1035 }
1036