xref: /netbsd-src/lib/librumpuser/rumpuser_pth.c (revision 7788a0781fe6ff2cce37368b4578a7ade0850cb1)
1 /*	$NetBSD: rumpuser_pth.c,v 1.30 2013/05/15 14:52:49 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2007-2010 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include "rumpuser_port.h"
29 
30 #if !defined(lint)
31 __RCSID("$NetBSD: rumpuser_pth.c,v 1.30 2013/05/15 14:52:49 pooka Exp $");
32 #endif /* !lint */
33 
34 #include <sys/queue.h>
35 
36 #include <assert.h>
37 #include <errno.h>
38 #include <fcntl.h>
39 #include <pthread.h>
40 #include <stdlib.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <stdint.h>
44 #include <unistd.h>
45 
46 #include <rump/rumpuser.h>
47 
48 #include "rumpuser_int.h"
49 
50 int
51 rumpuser_thread_create(void *(*f)(void *), void *arg, const char *thrname,
52 	int joinable, int priority, int cpuidx, void **ptcookie)
53 {
54 	pthread_t ptid;
55 	pthread_t *ptidp;
56 	pthread_attr_t pattr;
57 	int rv;
58 
59 	if ((rv = pthread_attr_init(&pattr)) != 0)
60 		return rv;
61 
62 	if (joinable) {
63 		NOFAIL(ptidp = malloc(sizeof(*ptidp)));
64 		pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_JOINABLE);
65 	} else {
66 		ptidp = &ptid;
67 		pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_DETACHED);
68 	}
69 
70 	rv = pthread_create(ptidp, &pattr, f, arg);
71 #if defined(__NetBSD__)
72 	if (rv == 0 && thrname)
73 		pthread_setname_np(ptid, thrname, NULL);
74 #elif defined(__linux__)
75 	/*
76 	 * The pthread_setname_np() call varies from one Linux distro to
77 	 * another.  Comment out the call pending autoconf support.
78 	 */
79 #if 0
80 	if (rv == 0 && thrname)
81 		pthread_setname_np(ptid, thrname);
82 #endif
83 #endif
84 
85 	if (joinable) {
86 		assert(ptcookie);
87 		*ptcookie = ptidp;
88 	}
89 
90 	pthread_attr_destroy(&pattr);
91 
92 	ET(rv);
93 }
94 
95 __dead void
96 rumpuser_thread_exit(void)
97 {
98 
99 	pthread_exit(NULL);
100 }
101 
102 int
103 rumpuser_thread_join(void *ptcookie)
104 {
105 	pthread_t *pt = ptcookie;
106 	int rv;
107 
108 	KLOCK_WRAP((rv = pthread_join(*pt, NULL)));
109 	if (rv == 0)
110 		free(pt);
111 
112 	ET(rv);
113 }
114 
115 struct rumpuser_mtx {
116 	pthread_mutex_t pthmtx;
117 	struct lwp *owner;
118 	int flags;
119 };
120 
121 void
122 rumpuser_mutex_init(struct rumpuser_mtx **mtx, int flags)
123 {
124 	pthread_mutexattr_t att;
125 
126 	NOFAIL(*mtx = malloc(sizeof(struct rumpuser_mtx)));
127 
128 	pthread_mutexattr_init(&att);
129 	pthread_mutexattr_settype(&att, PTHREAD_MUTEX_ERRORCHECK);
130 	NOFAIL_ERRNO(pthread_mutex_init(&((*mtx)->pthmtx), &att));
131 	pthread_mutexattr_destroy(&att);
132 
133 	(*mtx)->owner = NULL;
134 	assert(flags != 0);
135 	(*mtx)->flags = flags;
136 }
137 
138 static void
139 mtxenter(struct rumpuser_mtx *mtx)
140 {
141 
142 	if (!(mtx->flags & RUMPUSER_MTX_KMUTEX))
143 		return;
144 
145 	assert(mtx->owner == NULL);
146 	mtx->owner = rumpuser_curlwp();
147 }
148 
149 static void
150 mtxexit(struct rumpuser_mtx *mtx)
151 {
152 
153 	if (!(mtx->flags & RUMPUSER_MTX_KMUTEX))
154 		return;
155 
156 	assert(mtx->owner != NULL);
157 	mtx->owner = NULL;
158 }
159 
160 void
161 rumpuser_mutex_enter(struct rumpuser_mtx *mtx)
162 {
163 
164 	if (mtx->flags & RUMPUSER_MTX_SPIN) {
165 		rumpuser_mutex_enter_nowrap(mtx);
166 		return;
167 	}
168 
169 	assert(mtx->flags & RUMPUSER_MTX_KMUTEX);
170 	if (pthread_mutex_trylock(&mtx->pthmtx) != 0)
171 		KLOCK_WRAP(NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx)));
172 	mtxenter(mtx);
173 }
174 
175 void
176 rumpuser_mutex_enter_nowrap(struct rumpuser_mtx *mtx)
177 {
178 
179 	assert(mtx->flags & RUMPUSER_MTX_SPIN);
180 	NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx));
181 	mtxenter(mtx);
182 }
183 
184 int
185 rumpuser_mutex_tryenter(struct rumpuser_mtx *mtx)
186 {
187 	int rv;
188 
189 	rv = pthread_mutex_trylock(&mtx->pthmtx);
190 	if (rv == 0) {
191 		mtxenter(mtx);
192 	}
193 
194 	ET(rv);
195 }
196 
197 void
198 rumpuser_mutex_exit(struct rumpuser_mtx *mtx)
199 {
200 
201 	mtxexit(mtx);
202 	NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx));
203 }
204 
205 void
206 rumpuser_mutex_destroy(struct rumpuser_mtx *mtx)
207 {
208 
209 	NOFAIL_ERRNO(pthread_mutex_destroy(&mtx->pthmtx));
210 	free(mtx);
211 }
212 
213 void
214 rumpuser_mutex_owner(struct rumpuser_mtx *mtx, struct lwp **lp)
215 {
216 
217 	if (__predict_false(!(mtx->flags & RUMPUSER_MTX_KMUTEX))) {
218 		printf("panic: rumpuser_mutex_held unsupported on non-kmtx\n");
219 		abort();
220 	}
221 
222 	*lp = mtx->owner;
223 }
224 
225 /*
226  * rwlocks.  these are mostly simple, except that NetBSD wants to
227  * support something called downgrade, which means we need to swap
228  * our exclusive lock for a shared lock.  to accommodate this,
229  * we need to check *after* acquiring a lock in case someone was
230  * downgrading it.  if so, we couldn't actually have it and maybe
231  * need to retry later.
232  */
233 
234 struct rumpuser_rw {
235 	pthread_rwlock_t pthrw;
236 	pthread_spinlock_t spin;
237 	int readers;
238 	struct lwp *writer;
239 	int downgrade; /* someone is downgrading (hopefully lock holder ;) */
240 };
241 
242 static int
243 rw_amwriter(struct rumpuser_rw *rw)
244 {
245 
246 	return rw->writer == rumpuser_curlwp() && rw->readers == -1;
247 }
248 
249 static int
250 rw_nreaders(struct rumpuser_rw *rw)
251 {
252 
253 	return rw->readers > 0 ? rw->readers : 0;
254 }
255 
256 static int
257 rw_setwriter(struct rumpuser_rw *rw, int retry)
258 {
259 
260 	/*
261 	 * Don't need the spinlock here, we already have an
262 	 * exclusive lock and "downgrade" is stable until complete.
263 	 */
264 	if (rw->downgrade) {
265 		pthread_rwlock_unlock(&rw->pthrw);
266 		if (retry) {
267 			struct timespec ts;
268 
269 			/* portable yield, essentially */
270 			ts.tv_sec = 0;
271 			ts.tv_nsec = 1;
272 			KLOCK_WRAP(nanosleep(&ts, NULL));
273 		}
274 		return EBUSY;
275 	}
276 	assert(rw->readers == 0);
277 	rw->writer = rumpuser_curlwp();
278 	rw->readers = -1;
279 	return 0;
280 }
281 
282 static void
283 rw_clearwriter(struct rumpuser_rw *rw)
284 {
285 
286 	assert(rw_amwriter(rw));
287 	rw->readers = 0;
288 	rw->writer = NULL;
289 }
290 
291 static void
292 rw_readup(struct rumpuser_rw *rw)
293 {
294 
295 	pthread_spin_lock(&rw->spin);
296 	assert(rw->readers >= 0);
297 	++rw->readers;
298 	pthread_spin_unlock(&rw->spin);
299 }
300 
301 static void
302 rw_readdown(struct rumpuser_rw *rw)
303 {
304 
305 	pthread_spin_lock(&rw->spin);
306 	assert(rw->readers > 0);
307 	--rw->readers;
308 	pthread_spin_unlock(&rw->spin);
309 }
310 
311 void
312 rumpuser_rw_init(struct rumpuser_rw **rw)
313 {
314 
315 	NOFAIL(*rw = malloc(sizeof(struct rumpuser_rw)));
316 	NOFAIL_ERRNO(pthread_rwlock_init(&((*rw)->pthrw), NULL));
317 	NOFAIL_ERRNO(pthread_spin_init(&((*rw)->spin),PTHREAD_PROCESS_PRIVATE));
318 	(*rw)->readers = 0;
319 	(*rw)->writer = NULL;
320 	(*rw)->downgrade = 0;
321 }
322 
323 void
324 rumpuser_rw_enter(int enum_rumprwlock, struct rumpuser_rw *rw)
325 {
326 	enum rumprwlock lk = enum_rumprwlock;
327 
328 	switch (lk) {
329 	case RUMPUSER_RW_WRITER:
330 		do {
331 			if (pthread_rwlock_trywrlock(&rw->pthrw) != 0)
332 				KLOCK_WRAP(NOFAIL_ERRNO(
333 				    pthread_rwlock_wrlock(&rw->pthrw)));
334 		} while (rw_setwriter(rw, 1) != 0);
335 		break;
336 	case RUMPUSER_RW_READER:
337 		if (pthread_rwlock_tryrdlock(&rw->pthrw) != 0)
338 			KLOCK_WRAP(NOFAIL_ERRNO(
339 			    pthread_rwlock_rdlock(&rw->pthrw)));
340 		rw_readup(rw);
341 		break;
342 	}
343 }
344 
345 int
346 rumpuser_rw_tryenter(int enum_rumprwlock, struct rumpuser_rw *rw)
347 {
348 	enum rumprwlock lk = enum_rumprwlock;
349 	int rv;
350 
351 	switch (lk) {
352 	case RUMPUSER_RW_WRITER:
353 		rv = pthread_rwlock_trywrlock(&rw->pthrw);
354 		if (rv == 0)
355 			rv = rw_setwriter(rw, 0);
356 		break;
357 	case RUMPUSER_RW_READER:
358 		rv = pthread_rwlock_tryrdlock(&rw->pthrw);
359 		if (rv == 0)
360 			rw_readup(rw);
361 		break;
362 	default:
363 		rv = EINVAL;
364 		break;
365 	}
366 
367 	ET(rv);
368 }
369 
370 int
371 rumpuser_rw_tryupgrade(struct rumpuser_rw *rw)
372 {
373 
374 	/*
375 	 * Not supported by pthreads.  Since the caller needs to
376 	 * back off anyway to avoid deadlock, always failing
377 	 * is correct.
378 	 */
379 	ET(EBUSY);
380 }
381 
382 /*
383  * convert from exclusive to shared lock without allowing anyone to
384  * obtain an exclusive lock in between.  actually, might allow
385  * someone to obtain the lock, we just don't allow that thread to
386  * return from the hypercall with it.
387  */
388 void
389 rumpuser_rw_downgrade(struct rumpuser_rw *rw)
390 {
391 
392 	assert(rw->downgrade == 0);
393 	rw->downgrade = 1;
394 	rumpuser_rw_exit(rw);
395 	/*
396 	 * though the competition can't get out of the hypervisor, it
397 	 * might have rescheduled itself after we released the lock.
398 	 * so need a wrap here.
399 	 */
400 	KLOCK_WRAP(NOFAIL_ERRNO(pthread_rwlock_rdlock(&rw->pthrw)));
401 	rw->downgrade = 0;
402 	rw_readup(rw);
403 }
404 
405 void
406 rumpuser_rw_exit(struct rumpuser_rw *rw)
407 {
408 
409 	if (rw_nreaders(rw))
410 		rw_readdown(rw);
411 	else
412 		rw_clearwriter(rw);
413 	NOFAIL_ERRNO(pthread_rwlock_unlock(&rw->pthrw));
414 }
415 
416 void
417 rumpuser_rw_destroy(struct rumpuser_rw *rw)
418 {
419 
420 	NOFAIL_ERRNO(pthread_rwlock_destroy(&rw->pthrw));
421 	NOFAIL_ERRNO(pthread_spin_destroy(&rw->spin));
422 	free(rw);
423 }
424 
425 void
426 rumpuser_rw_held(int enum_rumprwlock, struct rumpuser_rw *rw, int *rv)
427 {
428 	enum rumprwlock lk = enum_rumprwlock;
429 
430 	switch (lk) {
431 	case RUMPUSER_RW_WRITER:
432 		*rv = rw_amwriter(rw);
433 		break;
434 	case RUMPUSER_RW_READER:
435 		*rv = rw_nreaders(rw);
436 		break;
437 	}
438 }
439 
440 /*
441  * condvar
442  */
443 
444 struct rumpuser_cv {
445 	pthread_cond_t pthcv;
446 	int nwaiters;
447 };
448 
449 void
450 rumpuser_cv_init(struct rumpuser_cv **cv)
451 {
452 
453 	NOFAIL(*cv = malloc(sizeof(struct rumpuser_cv)));
454 	NOFAIL_ERRNO(pthread_cond_init(&((*cv)->pthcv), NULL));
455 	(*cv)->nwaiters = 0;
456 }
457 
458 void
459 rumpuser_cv_destroy(struct rumpuser_cv *cv)
460 {
461 
462 	NOFAIL_ERRNO(pthread_cond_destroy(&cv->pthcv));
463 	free(cv);
464 }
465 
466 static void
467 cv_unschedule(struct rumpuser_mtx *mtx, int *nlocks)
468 {
469 
470 	rumpkern_unsched(nlocks, mtx);
471 	mtxexit(mtx);
472 }
473 
474 static void
475 cv_reschedule(struct rumpuser_mtx *mtx, int nlocks)
476 {
477 
478 	/*
479 	 * If the cv interlock is a spin mutex, we must first release
480 	 * the mutex that was reacquired by pthread_cond_wait(),
481 	 * acquire the CPU context and only then relock the mutex.
482 	 * This is to preserve resource allocation order so that
483 	 * we don't deadlock.  Non-spinning mutexes don't have this
484 	 * problem since they don't use a hold-and-wait approach
485 	 * to acquiring the mutex wrt the rump kernel CPU context.
486 	 *
487 	 * The more optimal solution would be to rework rumpkern_sched()
488 	 * so that it's possible to tell the scheduler
489 	 * "if you need to block, drop this lock first", but I'm not
490 	 * going poking there without some numbers on how often this
491 	 * path is taken for spin mutexes.
492 	 */
493 	if ((mtx->flags & (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) ==
494 	    (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) {
495 		NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx));
496 		rumpkern_sched(nlocks, mtx);
497 		rumpuser_mutex_enter_nowrap(mtx);
498 	} else {
499 		mtxenter(mtx);
500 		rumpkern_sched(nlocks, mtx);
501 	}
502 }
503 
504 void
505 rumpuser_cv_wait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx)
506 {
507 	int nlocks;
508 
509 	cv->nwaiters++;
510 	cv_unschedule(mtx, &nlocks);
511 	NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx));
512 	cv_reschedule(mtx, nlocks);
513 	cv->nwaiters--;
514 }
515 
516 void
517 rumpuser_cv_wait_nowrap(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx)
518 {
519 
520 	cv->nwaiters++;
521 	mtxexit(mtx);
522 	NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx));
523 	mtxenter(mtx);
524 	cv->nwaiters--;
525 }
526 
527 int
528 rumpuser_cv_timedwait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx,
529 	int64_t sec, int64_t nsec)
530 {
531 	struct timespec ts;
532 	int rv, nlocks;
533 
534 	/*
535 	 * Get clock already here, just in case we will be put to sleep
536 	 * after releasing the kernel context.
537 	 *
538 	 * The condition variables should use CLOCK_MONOTONIC, but since
539 	 * that's not available everywhere, leave it for another day.
540 	 */
541 	clock_gettime(CLOCK_REALTIME, &ts);
542 
543 	cv->nwaiters++;
544 	cv_unschedule(mtx, &nlocks);
545 
546 	ts.tv_sec += sec;
547 	ts.tv_nsec += nsec;
548 	if (ts.tv_nsec >= 1000*1000*1000) {
549 		ts.tv_sec++;
550 		ts.tv_nsec -= 1000*1000*1000;
551 	}
552 	rv = pthread_cond_timedwait(&cv->pthcv, &mtx->pthmtx, &ts);
553 
554 	cv_reschedule(mtx, nlocks);
555 	cv->nwaiters--;
556 
557 	ET(rv);
558 }
559 
560 void
561 rumpuser_cv_signal(struct rumpuser_cv *cv)
562 {
563 
564 	NOFAIL_ERRNO(pthread_cond_signal(&cv->pthcv));
565 }
566 
567 void
568 rumpuser_cv_broadcast(struct rumpuser_cv *cv)
569 {
570 
571 	NOFAIL_ERRNO(pthread_cond_broadcast(&cv->pthcv));
572 }
573 
574 void
575 rumpuser_cv_has_waiters(struct rumpuser_cv *cv, int *nwaiters)
576 {
577 
578 	*nwaiters = cv->nwaiters;
579 }
580 
581 /*
582  * curlwp
583  */
584 
585 static pthread_key_t curlwpkey;
586 
587 /*
588  * the if0'd curlwp implementation is not used by this hypervisor,
589  * but serves as test code to check that the intended usage works.
590  */
591 #if 0
592 struct rumpuser_lwp {
593 	struct lwp *l;
594 	LIST_ENTRY(rumpuser_lwp) l_entries;
595 };
596 static LIST_HEAD(, rumpuser_lwp) lwps = LIST_HEAD_INITIALIZER(lwps);
597 static pthread_mutex_t lwplock = PTHREAD_MUTEX_INITIALIZER;
598 
599 void
600 rumpuser_curlwpop(enum rumplwpop op, struct lwp *l)
601 {
602 	struct rumpuser_lwp *rl, *rliter;
603 
604 	switch (op) {
605 	case RUMPUSER_LWP_CREATE:
606 		rl = malloc(sizeof(*rl));
607 		rl->l = l;
608 		pthread_mutex_lock(&lwplock);
609 		LIST_FOREACH(rliter, &lwps, l_entries) {
610 			if (rliter->l == l) {
611 				fprintf(stderr, "LWP_CREATE: %p exists\n", l);
612 				abort();
613 			}
614 		}
615 		LIST_INSERT_HEAD(&lwps, rl, l_entries);
616 		pthread_mutex_unlock(&lwplock);
617 		break;
618 	case RUMPUSER_LWP_DESTROY:
619 		pthread_mutex_lock(&lwplock);
620 		LIST_FOREACH(rl, &lwps, l_entries) {
621 			if (rl->l == l)
622 				break;
623 		}
624 		if (!rl) {
625 			fprintf(stderr, "LWP_DESTROY: %p does not exist\n", l);
626 			abort();
627 		}
628 		LIST_REMOVE(rl, l_entries);
629 		pthread_mutex_unlock(&lwplock);
630 		free(rl);
631 		break;
632 	case RUMPUSER_LWP_SET:
633 		assert(pthread_getspecific(curlwpkey) == NULL && l != NULL);
634 
635 		pthread_mutex_lock(&lwplock);
636 		LIST_FOREACH(rl, &lwps, l_entries) {
637 			if (rl->l == l)
638 				break;
639 		}
640 		if (!rl) {
641 			fprintf(stderr,
642 			    "LWP_SET: %p does not exist\n", l);
643 			abort();
644 		}
645 		pthread_mutex_unlock(&lwplock);
646 
647 		pthread_setspecific(curlwpkey, rl);
648 		break;
649 	case RUMPUSER_LWP_CLEAR:
650 		assert(((struct rumpuser_lwp *)
651 		    pthread_getspecific(curlwpkey))->l == l);
652 		pthread_setspecific(curlwpkey, NULL);
653 		break;
654 	}
655 }
656 
657 struct lwp *
658 rumpuser_curlwp(void)
659 {
660 	struct rumpuser_lwp *rl;
661 
662 	rl = pthread_getspecific(curlwpkey);
663 	return rl ? rl->l : NULL;
664 }
665 
666 #else
667 
668 void
669 rumpuser_curlwpop(int enum_rumplwpop, struct lwp *l)
670 {
671 	enum rumplwpop op = enum_rumplwpop;
672 
673 	switch (op) {
674 	case RUMPUSER_LWP_CREATE:
675 		break;
676 	case RUMPUSER_LWP_DESTROY:
677 		break;
678 	case RUMPUSER_LWP_SET:
679 		assert(pthread_getspecific(curlwpkey) == NULL);
680 		pthread_setspecific(curlwpkey, l);
681 		break;
682 	case RUMPUSER_LWP_CLEAR:
683 		assert(pthread_getspecific(curlwpkey) == l);
684 		pthread_setspecific(curlwpkey, NULL);
685 		break;
686 	}
687 }
688 
689 struct lwp *
690 rumpuser_curlwp(void)
691 {
692 
693 	return pthread_getspecific(curlwpkey);
694 }
695 #endif
696 
697 
698 void
699 rumpuser__thrinit(void)
700 {
701 	pthread_key_create(&curlwpkey, NULL);
702 }
703