1 /* $NetBSD: rumpuser_pth.c,v 1.42 2014/06/23 12:38:18 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "rumpuser_port.h" 29 30 #if !defined(lint) 31 __RCSID("$NetBSD: rumpuser_pth.c,v 1.42 2014/06/23 12:38:18 pooka Exp $"); 32 #endif /* !lint */ 33 34 #include <sys/queue.h> 35 #if defined(__NetBSD__) 36 #include <sys/param.h> 37 #include <sys/atomic.h> 38 #endif 39 40 #include <assert.h> 41 #include <errno.h> 42 #include <fcntl.h> 43 #include <pthread.h> 44 #include <stdlib.h> 45 #include <stdio.h> 46 #include <string.h> 47 #include <stdint.h> 48 #include <unistd.h> 49 50 #include <rump/rumpuser.h> 51 52 #include "rumpuser_int.h" 53 54 #if defined(__NetBSD__) 55 static void * 56 aligned_alloc(size_t size) 57 { 58 void *ptr; 59 60 size = roundup2(size, COHERENCY_UNIT); 61 return posix_memalign(&ptr, COHERENCY_UNIT, size) ? NULL : ptr; 62 } 63 #else 64 #define aligned_alloc(sz) malloc(sz) 65 #endif 66 67 int 68 rumpuser_thread_create(void *(*f)(void *), void *arg, const char *thrname, 69 int joinable, int priority, int cpuidx, void **ptcookie) 70 { 71 pthread_t ptid; 72 pthread_t *ptidp; 73 pthread_attr_t pattr; 74 int rv, i; 75 76 if ((rv = pthread_attr_init(&pattr)) != 0) 77 return rv; 78 79 if (joinable) { 80 NOFAIL(ptidp = malloc(sizeof(*ptidp))); 81 pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_JOINABLE); 82 } else { 83 ptidp = &ptid; 84 pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_DETACHED); 85 } 86 87 for (i = 0; i < 10; i++) { 88 const struct timespec ts = {0, 10*1000*1000}; 89 90 rv = pthread_create(ptidp, &pattr, f, arg); 91 if (rv != EAGAIN) 92 break; 93 nanosleep(&ts, NULL); 94 } 95 96 #if defined(HAVE_PTHREAD_SETNAME_3) 97 if (rv == 0 && thrname) { 98 pthread_setname_np(*ptidp, thrname, NULL); 99 } 100 #elif defined(HAVE_PTHREAD_SETNAME_2) 101 if (rv == 0 && thrname) { 102 pthread_setname_np(*ptidp, thrname); 103 } 104 #endif 105 106 if (joinable) { 107 assert(ptcookie); 108 *ptcookie = ptidp; 109 } 110 111 pthread_attr_destroy(&pattr); 112 113 ET(rv); 114 } 115 116 __dead void 117 rumpuser_thread_exit(void) 118 { 119 120 pthread_exit(NULL); 121 } 122 123 int 124 rumpuser_thread_join(void *ptcookie) 125 { 126 pthread_t *pt = ptcookie; 127 int rv; 128 129 KLOCK_WRAP((rv = pthread_join(*pt, NULL))); 130 if (rv == 0) 131 free(pt); 132 133 ET(rv); 134 } 135 136 struct rumpuser_mtx { 137 pthread_mutex_t pthmtx; 138 struct lwp *owner; 139 int flags; 140 }; 141 142 void 143 rumpuser_mutex_init(struct rumpuser_mtx **mtx, int flags) 144 { 145 pthread_mutexattr_t att; 146 147 NOFAIL(*mtx = aligned_alloc(sizeof(struct rumpuser_mtx))); 148 149 pthread_mutexattr_init(&att); 150 pthread_mutexattr_settype(&att, PTHREAD_MUTEX_ERRORCHECK); 151 NOFAIL_ERRNO(pthread_mutex_init(&((*mtx)->pthmtx), &att)); 152 pthread_mutexattr_destroy(&att); 153 154 (*mtx)->owner = NULL; 155 assert(flags != 0); 156 (*mtx)->flags = flags; 157 } 158 159 static void 160 mtxenter(struct rumpuser_mtx *mtx) 161 { 162 163 if (!(mtx->flags & RUMPUSER_MTX_KMUTEX)) 164 return; 165 166 assert(mtx->owner == NULL); 167 mtx->owner = rumpuser_curlwp(); 168 } 169 170 static void 171 mtxexit(struct rumpuser_mtx *mtx) 172 { 173 174 if (!(mtx->flags & RUMPUSER_MTX_KMUTEX)) 175 return; 176 177 assert(mtx->owner != NULL); 178 mtx->owner = NULL; 179 } 180 181 void 182 rumpuser_mutex_enter(struct rumpuser_mtx *mtx) 183 { 184 185 if (mtx->flags & RUMPUSER_MTX_SPIN) { 186 rumpuser_mutex_enter_nowrap(mtx); 187 return; 188 } 189 190 assert(mtx->flags & RUMPUSER_MTX_KMUTEX); 191 if (pthread_mutex_trylock(&mtx->pthmtx) != 0) 192 KLOCK_WRAP(NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx))); 193 mtxenter(mtx); 194 } 195 196 void 197 rumpuser_mutex_enter_nowrap(struct rumpuser_mtx *mtx) 198 { 199 200 assert(mtx->flags & RUMPUSER_MTX_SPIN); 201 NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx)); 202 mtxenter(mtx); 203 } 204 205 int 206 rumpuser_mutex_tryenter(struct rumpuser_mtx *mtx) 207 { 208 int rv; 209 210 rv = pthread_mutex_trylock(&mtx->pthmtx); 211 if (rv == 0) { 212 mtxenter(mtx); 213 } 214 215 ET(rv); 216 } 217 218 void 219 rumpuser_mutex_exit(struct rumpuser_mtx *mtx) 220 { 221 222 mtxexit(mtx); 223 NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx)); 224 } 225 226 void 227 rumpuser_mutex_destroy(struct rumpuser_mtx *mtx) 228 { 229 230 NOFAIL_ERRNO(pthread_mutex_destroy(&mtx->pthmtx)); 231 free(mtx); 232 } 233 234 void 235 rumpuser_mutex_owner(struct rumpuser_mtx *mtx, struct lwp **lp) 236 { 237 238 if (__predict_false(!(mtx->flags & RUMPUSER_MTX_KMUTEX))) { 239 printf("panic: rumpuser_mutex_held unsupported on non-kmtx\n"); 240 abort(); 241 } 242 243 *lp = mtx->owner; 244 } 245 246 /* 247 * rwlocks. these are mostly simple, except that NetBSD wants to 248 * support something called downgrade, which means we need to swap 249 * our exclusive lock for a shared lock. to accommodate this, 250 * we need to check *after* acquiring a lock in case someone was 251 * downgrading it. if so, we couldn't actually have it and maybe 252 * need to retry later. 253 */ 254 255 struct rumpuser_rw { 256 pthread_rwlock_t pthrw; 257 #if !defined(__APPLE__) && !defined(__ANDROID__) 258 char pad[64 - sizeof(pthread_rwlock_t)]; 259 pthread_spinlock_t spin; 260 #endif 261 unsigned int readers; 262 struct lwp *writer; 263 int downgrade; /* someone is downgrading (hopefully lock holder ;) */ 264 }; 265 266 static int 267 rw_amwriter(struct rumpuser_rw *rw) 268 { 269 270 return rw->writer == rumpuser_curlwp() && rw->readers == (unsigned)-1; 271 } 272 273 static int 274 rw_nreaders(struct rumpuser_rw *rw) 275 { 276 unsigned nreaders = rw->readers; 277 278 return nreaders != (unsigned)-1 ? nreaders : 0; 279 } 280 281 static int 282 rw_setwriter(struct rumpuser_rw *rw, int retry) 283 { 284 285 /* 286 * Don't need the spinlock here, we already have an 287 * exclusive lock and "downgrade" is stable until complete. 288 */ 289 if (rw->downgrade) { 290 pthread_rwlock_unlock(&rw->pthrw); 291 if (retry) { 292 struct timespec ts; 293 294 /* portable yield, essentially */ 295 ts.tv_sec = 0; 296 ts.tv_nsec = 1; 297 KLOCK_WRAP(nanosleep(&ts, NULL)); 298 } 299 return EBUSY; 300 } 301 assert(rw->readers == 0); 302 rw->writer = rumpuser_curlwp(); 303 rw->readers = (unsigned)-1; 304 return 0; 305 } 306 307 static void 308 rw_clearwriter(struct rumpuser_rw *rw) 309 { 310 311 assert(rw_amwriter(rw)); 312 rw->readers = 0; 313 rw->writer = NULL; 314 } 315 316 static inline void 317 rw_readup(struct rumpuser_rw *rw) 318 { 319 320 #if defined(__NetBSD__) || defined(__APPLE__) || defined(__ANDROID__) 321 atomic_inc_uint(&rw->readers); 322 #else 323 pthread_spin_lock(&rw->spin); 324 ++rw->readers; 325 pthread_spin_unlock(&rw->spin); 326 #endif 327 } 328 329 static inline void 330 rw_readdown(struct rumpuser_rw *rw) 331 { 332 333 #if defined(__NetBSD__) || defined(__APPLE__) || defined(__ANDROID__) 334 atomic_dec_uint(&rw->readers); 335 #else 336 pthread_spin_lock(&rw->spin); 337 assert(rw->readers > 0); 338 --rw->readers; 339 pthread_spin_unlock(&rw->spin); 340 #endif 341 } 342 343 void 344 rumpuser_rw_init(struct rumpuser_rw **rw) 345 { 346 347 NOFAIL(*rw = aligned_alloc(sizeof(struct rumpuser_rw))); 348 NOFAIL_ERRNO(pthread_rwlock_init(&((*rw)->pthrw), NULL)); 349 #if !defined(__APPLE__) && !defined(__ANDROID__) 350 NOFAIL_ERRNO(pthread_spin_init(&((*rw)->spin),PTHREAD_PROCESS_PRIVATE)); 351 #endif 352 (*rw)->readers = 0; 353 (*rw)->writer = NULL; 354 (*rw)->downgrade = 0; 355 } 356 357 void 358 rumpuser_rw_enter(int enum_rumprwlock, struct rumpuser_rw *rw) 359 { 360 enum rumprwlock lk = enum_rumprwlock; 361 362 switch (lk) { 363 case RUMPUSER_RW_WRITER: 364 do { 365 if (pthread_rwlock_trywrlock(&rw->pthrw) != 0) 366 KLOCK_WRAP(NOFAIL_ERRNO( 367 pthread_rwlock_wrlock(&rw->pthrw))); 368 } while (rw_setwriter(rw, 1) != 0); 369 break; 370 case RUMPUSER_RW_READER: 371 if (pthread_rwlock_tryrdlock(&rw->pthrw) != 0) 372 KLOCK_WRAP(NOFAIL_ERRNO( 373 pthread_rwlock_rdlock(&rw->pthrw))); 374 rw_readup(rw); 375 break; 376 } 377 } 378 379 int 380 rumpuser_rw_tryenter(int enum_rumprwlock, struct rumpuser_rw *rw) 381 { 382 enum rumprwlock lk = enum_rumprwlock; 383 int rv; 384 385 switch (lk) { 386 case RUMPUSER_RW_WRITER: 387 rv = pthread_rwlock_trywrlock(&rw->pthrw); 388 if (rv == 0) 389 rv = rw_setwriter(rw, 0); 390 break; 391 case RUMPUSER_RW_READER: 392 rv = pthread_rwlock_tryrdlock(&rw->pthrw); 393 if (rv == 0) 394 rw_readup(rw); 395 break; 396 default: 397 rv = EINVAL; 398 break; 399 } 400 401 ET(rv); 402 } 403 404 int 405 rumpuser_rw_tryupgrade(struct rumpuser_rw *rw) 406 { 407 408 /* 409 * Not supported by pthreads. Since the caller needs to 410 * back off anyway to avoid deadlock, always failing 411 * is correct. 412 */ 413 ET(EBUSY); 414 } 415 416 /* 417 * convert from exclusive to shared lock without allowing anyone to 418 * obtain an exclusive lock in between. actually, might allow 419 * someone to obtain the lock, we just don't allow that thread to 420 * return from the hypercall with it. 421 */ 422 void 423 rumpuser_rw_downgrade(struct rumpuser_rw *rw) 424 { 425 426 assert(rw->downgrade == 0); 427 rw->downgrade = 1; 428 rumpuser_rw_exit(rw); 429 /* 430 * though the competition can't get out of the hypervisor, it 431 * might have rescheduled itself after we released the lock. 432 * so need a wrap here. 433 */ 434 KLOCK_WRAP(NOFAIL_ERRNO(pthread_rwlock_rdlock(&rw->pthrw))); 435 rw->downgrade = 0; 436 rw_readup(rw); 437 } 438 439 void 440 rumpuser_rw_exit(struct rumpuser_rw *rw) 441 { 442 443 if (rw_nreaders(rw)) 444 rw_readdown(rw); 445 else 446 rw_clearwriter(rw); 447 NOFAIL_ERRNO(pthread_rwlock_unlock(&rw->pthrw)); 448 } 449 450 void 451 rumpuser_rw_destroy(struct rumpuser_rw *rw) 452 { 453 454 NOFAIL_ERRNO(pthread_rwlock_destroy(&rw->pthrw)); 455 #if !defined(__APPLE__) && ! defined(__ANDROID__) 456 NOFAIL_ERRNO(pthread_spin_destroy(&rw->spin)); 457 #endif 458 free(rw); 459 } 460 461 void 462 rumpuser_rw_held(int enum_rumprwlock, struct rumpuser_rw *rw, int *rv) 463 { 464 enum rumprwlock lk = enum_rumprwlock; 465 466 switch (lk) { 467 case RUMPUSER_RW_WRITER: 468 *rv = rw_amwriter(rw); 469 break; 470 case RUMPUSER_RW_READER: 471 *rv = rw_nreaders(rw); 472 break; 473 } 474 } 475 476 /* 477 * condvar 478 */ 479 480 struct rumpuser_cv { 481 pthread_cond_t pthcv; 482 int nwaiters; 483 }; 484 485 void 486 rumpuser_cv_init(struct rumpuser_cv **cv) 487 { 488 489 NOFAIL(*cv = malloc(sizeof(struct rumpuser_cv))); 490 NOFAIL_ERRNO(pthread_cond_init(&((*cv)->pthcv), NULL)); 491 (*cv)->nwaiters = 0; 492 } 493 494 void 495 rumpuser_cv_destroy(struct rumpuser_cv *cv) 496 { 497 498 NOFAIL_ERRNO(pthread_cond_destroy(&cv->pthcv)); 499 free(cv); 500 } 501 502 static void 503 cv_unschedule(struct rumpuser_mtx *mtx, int *nlocks) 504 { 505 506 rumpkern_unsched(nlocks, mtx); 507 mtxexit(mtx); 508 } 509 510 static void 511 cv_reschedule(struct rumpuser_mtx *mtx, int nlocks) 512 { 513 514 /* 515 * If the cv interlock is a spin mutex, we must first release 516 * the mutex that was reacquired by pthread_cond_wait(), 517 * acquire the CPU context and only then relock the mutex. 518 * This is to preserve resource allocation order so that 519 * we don't deadlock. Non-spinning mutexes don't have this 520 * problem since they don't use a hold-and-wait approach 521 * to acquiring the mutex wrt the rump kernel CPU context. 522 * 523 * The more optimal solution would be to rework rumpkern_sched() 524 * so that it's possible to tell the scheduler 525 * "if you need to block, drop this lock first", but I'm not 526 * going poking there without some numbers on how often this 527 * path is taken for spin mutexes. 528 */ 529 if ((mtx->flags & (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) == 530 (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) { 531 NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx)); 532 rumpkern_sched(nlocks, mtx); 533 rumpuser_mutex_enter_nowrap(mtx); 534 } else { 535 mtxenter(mtx); 536 rumpkern_sched(nlocks, mtx); 537 } 538 } 539 540 void 541 rumpuser_cv_wait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx) 542 { 543 int nlocks; 544 545 cv->nwaiters++; 546 cv_unschedule(mtx, &nlocks); 547 NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx)); 548 cv_reschedule(mtx, nlocks); 549 cv->nwaiters--; 550 } 551 552 void 553 rumpuser_cv_wait_nowrap(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx) 554 { 555 556 cv->nwaiters++; 557 mtxexit(mtx); 558 NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx)); 559 mtxenter(mtx); 560 cv->nwaiters--; 561 } 562 563 int 564 rumpuser_cv_timedwait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx, 565 int64_t sec, int64_t nsec) 566 { 567 struct timespec ts; 568 int rv, nlocks; 569 570 /* 571 * Get clock already here, just in case we will be put to sleep 572 * after releasing the kernel context. 573 * 574 * The condition variables should use CLOCK_MONOTONIC, but since 575 * that's not available everywhere, leave it for another day. 576 */ 577 clock_gettime(CLOCK_REALTIME, &ts); 578 579 cv->nwaiters++; 580 cv_unschedule(mtx, &nlocks); 581 582 ts.tv_sec += sec; 583 ts.tv_nsec += nsec; 584 if (ts.tv_nsec >= 1000*1000*1000) { 585 ts.tv_sec++; 586 ts.tv_nsec -= 1000*1000*1000; 587 } 588 rv = pthread_cond_timedwait(&cv->pthcv, &mtx->pthmtx, &ts); 589 590 cv_reschedule(mtx, nlocks); 591 cv->nwaiters--; 592 593 ET(rv); 594 } 595 596 void 597 rumpuser_cv_signal(struct rumpuser_cv *cv) 598 { 599 600 NOFAIL_ERRNO(pthread_cond_signal(&cv->pthcv)); 601 } 602 603 void 604 rumpuser_cv_broadcast(struct rumpuser_cv *cv) 605 { 606 607 NOFAIL_ERRNO(pthread_cond_broadcast(&cv->pthcv)); 608 } 609 610 void 611 rumpuser_cv_has_waiters(struct rumpuser_cv *cv, int *nwaiters) 612 { 613 614 *nwaiters = cv->nwaiters; 615 } 616 617 /* 618 * curlwp 619 */ 620 621 static pthread_key_t curlwpkey; 622 623 /* 624 * the if0'd curlwp implementation is not used by this hypervisor, 625 * but serves as test code to check that the intended usage works. 626 */ 627 #if 0 628 struct rumpuser_lwp { 629 struct lwp *l; 630 LIST_ENTRY(rumpuser_lwp) l_entries; 631 }; 632 static LIST_HEAD(, rumpuser_lwp) lwps = LIST_HEAD_INITIALIZER(lwps); 633 static pthread_mutex_t lwplock = PTHREAD_MUTEX_INITIALIZER; 634 635 void 636 rumpuser_curlwpop(enum rumplwpop op, struct lwp *l) 637 { 638 struct rumpuser_lwp *rl, *rliter; 639 640 switch (op) { 641 case RUMPUSER_LWP_CREATE: 642 rl = malloc(sizeof(*rl)); 643 rl->l = l; 644 pthread_mutex_lock(&lwplock); 645 LIST_FOREACH(rliter, &lwps, l_entries) { 646 if (rliter->l == l) { 647 fprintf(stderr, "LWP_CREATE: %p exists\n", l); 648 abort(); 649 } 650 } 651 LIST_INSERT_HEAD(&lwps, rl, l_entries); 652 pthread_mutex_unlock(&lwplock); 653 break; 654 case RUMPUSER_LWP_DESTROY: 655 pthread_mutex_lock(&lwplock); 656 LIST_FOREACH(rl, &lwps, l_entries) { 657 if (rl->l == l) 658 break; 659 } 660 if (!rl) { 661 fprintf(stderr, "LWP_DESTROY: %p does not exist\n", l); 662 abort(); 663 } 664 LIST_REMOVE(rl, l_entries); 665 pthread_mutex_unlock(&lwplock); 666 free(rl); 667 break; 668 case RUMPUSER_LWP_SET: 669 assert(pthread_getspecific(curlwpkey) == NULL && l != NULL); 670 671 pthread_mutex_lock(&lwplock); 672 LIST_FOREACH(rl, &lwps, l_entries) { 673 if (rl->l == l) 674 break; 675 } 676 if (!rl) { 677 fprintf(stderr, 678 "LWP_SET: %p does not exist\n", l); 679 abort(); 680 } 681 pthread_mutex_unlock(&lwplock); 682 683 pthread_setspecific(curlwpkey, rl); 684 break; 685 case RUMPUSER_LWP_CLEAR: 686 assert(((struct rumpuser_lwp *) 687 pthread_getspecific(curlwpkey))->l == l); 688 pthread_setspecific(curlwpkey, NULL); 689 break; 690 } 691 } 692 693 struct lwp * 694 rumpuser_curlwp(void) 695 { 696 struct rumpuser_lwp *rl; 697 698 rl = pthread_getspecific(curlwpkey); 699 return rl ? rl->l : NULL; 700 } 701 702 #else 703 704 void 705 rumpuser_curlwpop(int enum_rumplwpop, struct lwp *l) 706 { 707 enum rumplwpop op = enum_rumplwpop; 708 709 switch (op) { 710 case RUMPUSER_LWP_CREATE: 711 break; 712 case RUMPUSER_LWP_DESTROY: 713 break; 714 case RUMPUSER_LWP_SET: 715 assert(pthread_getspecific(curlwpkey) == NULL); 716 pthread_setspecific(curlwpkey, l); 717 break; 718 case RUMPUSER_LWP_CLEAR: 719 assert(pthread_getspecific(curlwpkey) == l); 720 pthread_setspecific(curlwpkey, NULL); 721 break; 722 } 723 } 724 725 struct lwp * 726 rumpuser_curlwp(void) 727 { 728 729 return pthread_getspecific(curlwpkey); 730 } 731 #endif 732 733 734 void 735 rumpuser__thrinit(void) 736 { 737 pthread_key_create(&curlwpkey, NULL); 738 } 739