1 /* $NetBSD: rumpuser_pth.c,v 1.34 2013/10/27 16:39:46 rmind Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "rumpuser_port.h" 29 30 #if !defined(lint) 31 __RCSID("$NetBSD: rumpuser_pth.c,v 1.34 2013/10/27 16:39:46 rmind Exp $"); 32 #endif /* !lint */ 33 34 #include <sys/queue.h> 35 #if defined(__NetBSD__) 36 #include <sys/param.h> 37 #include <sys/atomic.h> 38 #endif 39 40 #include <assert.h> 41 #include <errno.h> 42 #include <fcntl.h> 43 #include <pthread.h> 44 #include <stdlib.h> 45 #include <stdio.h> 46 #include <string.h> 47 #include <stdint.h> 48 #include <unistd.h> 49 50 #include <rump/rumpuser.h> 51 52 #include "rumpuser_int.h" 53 54 #if defined(__NetBSD__) 55 static void * 56 aligned_alloc(size_t size) 57 { 58 void *ptr; 59 60 size = roundup2(size, COHERENCY_UNIT); 61 return posix_memalign(&ptr, COHERENCY_UNIT, size) ? NULL : ptr; 62 } 63 #else 64 #define aligned_alloc(sz) malloc(sz) 65 #endif 66 67 int 68 rumpuser_thread_create(void *(*f)(void *), void *arg, const char *thrname, 69 int joinable, int priority, int cpuidx, void **ptcookie) 70 { 71 pthread_t ptid; 72 pthread_t *ptidp; 73 pthread_attr_t pattr; 74 int rv, i; 75 76 if ((rv = pthread_attr_init(&pattr)) != 0) 77 return rv; 78 79 if (joinable) { 80 NOFAIL(ptidp = malloc(sizeof(*ptidp))); 81 pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_JOINABLE); 82 } else { 83 ptidp = &ptid; 84 pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_DETACHED); 85 } 86 87 for (i = 0; i < 10; i++) { 88 const struct timespec ts = {0, 10*1000*1000}; 89 90 rv = pthread_create(ptidp, &pattr, f, arg); 91 if (rv != EAGAIN) 92 break; 93 nanosleep(&ts, NULL); 94 } 95 96 #if defined(__NetBSD__) 97 if (rv == 0 && thrname) 98 pthread_setname_np(ptid, thrname, NULL); 99 #elif defined(__linux__) 100 /* 101 * The pthread_setname_np() call varies from one Linux distro to 102 * another. Comment out the call pending autoconf support. 103 */ 104 #if 0 105 if (rv == 0 && thrname) 106 pthread_setname_np(ptid, thrname); 107 #endif 108 #endif 109 110 if (joinable) { 111 assert(ptcookie); 112 *ptcookie = ptidp; 113 } 114 115 pthread_attr_destroy(&pattr); 116 117 ET(rv); 118 } 119 120 __dead void 121 rumpuser_thread_exit(void) 122 { 123 124 pthread_exit(NULL); 125 } 126 127 int 128 rumpuser_thread_join(void *ptcookie) 129 { 130 pthread_t *pt = ptcookie; 131 int rv; 132 133 KLOCK_WRAP((rv = pthread_join(*pt, NULL))); 134 if (rv == 0) 135 free(pt); 136 137 ET(rv); 138 } 139 140 struct rumpuser_mtx { 141 pthread_mutex_t pthmtx; 142 struct lwp *owner; 143 int flags; 144 }; 145 146 void 147 rumpuser_mutex_init(struct rumpuser_mtx **mtx, int flags) 148 { 149 pthread_mutexattr_t att; 150 151 NOFAIL(*mtx = aligned_alloc(sizeof(struct rumpuser_mtx))); 152 153 pthread_mutexattr_init(&att); 154 pthread_mutexattr_settype(&att, PTHREAD_MUTEX_ERRORCHECK); 155 NOFAIL_ERRNO(pthread_mutex_init(&((*mtx)->pthmtx), &att)); 156 pthread_mutexattr_destroy(&att); 157 158 (*mtx)->owner = NULL; 159 assert(flags != 0); 160 (*mtx)->flags = flags; 161 } 162 163 static void 164 mtxenter(struct rumpuser_mtx *mtx) 165 { 166 167 if (!(mtx->flags & RUMPUSER_MTX_KMUTEX)) 168 return; 169 170 assert(mtx->owner == NULL); 171 mtx->owner = rumpuser_curlwp(); 172 } 173 174 static void 175 mtxexit(struct rumpuser_mtx *mtx) 176 { 177 178 if (!(mtx->flags & RUMPUSER_MTX_KMUTEX)) 179 return; 180 181 assert(mtx->owner != NULL); 182 mtx->owner = NULL; 183 } 184 185 void 186 rumpuser_mutex_enter(struct rumpuser_mtx *mtx) 187 { 188 189 if (mtx->flags & RUMPUSER_MTX_SPIN) { 190 rumpuser_mutex_enter_nowrap(mtx); 191 return; 192 } 193 194 assert(mtx->flags & RUMPUSER_MTX_KMUTEX); 195 if (pthread_mutex_trylock(&mtx->pthmtx) != 0) 196 KLOCK_WRAP(NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx))); 197 mtxenter(mtx); 198 } 199 200 void 201 rumpuser_mutex_enter_nowrap(struct rumpuser_mtx *mtx) 202 { 203 204 assert(mtx->flags & RUMPUSER_MTX_SPIN); 205 NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx)); 206 mtxenter(mtx); 207 } 208 209 int 210 rumpuser_mutex_tryenter(struct rumpuser_mtx *mtx) 211 { 212 int rv; 213 214 rv = pthread_mutex_trylock(&mtx->pthmtx); 215 if (rv == 0) { 216 mtxenter(mtx); 217 } 218 219 ET(rv); 220 } 221 222 void 223 rumpuser_mutex_exit(struct rumpuser_mtx *mtx) 224 { 225 226 mtxexit(mtx); 227 NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx)); 228 } 229 230 void 231 rumpuser_mutex_destroy(struct rumpuser_mtx *mtx) 232 { 233 234 NOFAIL_ERRNO(pthread_mutex_destroy(&mtx->pthmtx)); 235 free(mtx); 236 } 237 238 void 239 rumpuser_mutex_owner(struct rumpuser_mtx *mtx, struct lwp **lp) 240 { 241 242 if (__predict_false(!(mtx->flags & RUMPUSER_MTX_KMUTEX))) { 243 printf("panic: rumpuser_mutex_held unsupported on non-kmtx\n"); 244 abort(); 245 } 246 247 *lp = mtx->owner; 248 } 249 250 /* 251 * rwlocks. these are mostly simple, except that NetBSD wants to 252 * support something called downgrade, which means we need to swap 253 * our exclusive lock for a shared lock. to accommodate this, 254 * we need to check *after* acquiring a lock in case someone was 255 * downgrading it. if so, we couldn't actually have it and maybe 256 * need to retry later. 257 */ 258 259 struct rumpuser_rw { 260 pthread_rwlock_t pthrw; 261 #if !defined(__APPLE__) 262 char pad[64 - sizeof(pthread_rwlock_t)]; 263 pthread_spinlock_t spin; 264 #endif 265 unsigned int readers; 266 struct lwp *writer; 267 int downgrade; /* someone is downgrading (hopefully lock holder ;) */ 268 }; 269 270 static int 271 rw_amwriter(struct rumpuser_rw *rw) 272 { 273 274 return rw->writer == rumpuser_curlwp() && rw->readers == (unsigned)-1; 275 } 276 277 static int 278 rw_nreaders(struct rumpuser_rw *rw) 279 { 280 unsigned nreaders = rw->readers; 281 282 return nreaders != (unsigned)-1 ? nreaders : 0; 283 } 284 285 static int 286 rw_setwriter(struct rumpuser_rw *rw, int retry) 287 { 288 289 /* 290 * Don't need the spinlock here, we already have an 291 * exclusive lock and "downgrade" is stable until complete. 292 */ 293 if (rw->downgrade) { 294 pthread_rwlock_unlock(&rw->pthrw); 295 if (retry) { 296 struct timespec ts; 297 298 /* portable yield, essentially */ 299 ts.tv_sec = 0; 300 ts.tv_nsec = 1; 301 KLOCK_WRAP(nanosleep(&ts, NULL)); 302 } 303 return EBUSY; 304 } 305 assert(rw->readers == 0); 306 rw->writer = rumpuser_curlwp(); 307 rw->readers = (unsigned)-1; 308 return 0; 309 } 310 311 static void 312 rw_clearwriter(struct rumpuser_rw *rw) 313 { 314 315 assert(rw_amwriter(rw)); 316 rw->readers = 0; 317 rw->writer = NULL; 318 } 319 320 static inline void 321 rw_readup(struct rumpuser_rw *rw) 322 { 323 324 #if defined(__NetBSD__) || defined(__APPLE__) 325 atomic_inc_uint(&rw->readers); 326 #else 327 pthread_spin_lock(&rw->spin); 328 ++rw->readers; 329 pthread_spin_unlock(&rw->spin); 330 #endif 331 } 332 333 static inline void 334 rw_readdown(struct rumpuser_rw *rw) 335 { 336 337 #if defined(__NetBSD__) || defined(__APPLE__) 338 atomic_dec_uint(&rw->readers); 339 #else 340 pthread_spin_lock(&rw->spin); 341 assert(rw->readers > 0); 342 --rw->readers; 343 pthread_spin_unlock(&rw->spin); 344 #endif 345 } 346 347 void 348 rumpuser_rw_init(struct rumpuser_rw **rw) 349 { 350 351 NOFAIL(*rw = aligned_alloc(sizeof(struct rumpuser_rw))); 352 NOFAIL_ERRNO(pthread_rwlock_init(&((*rw)->pthrw), NULL)); 353 #if !defined(__APPLE__) 354 NOFAIL_ERRNO(pthread_spin_init(&((*rw)->spin),PTHREAD_PROCESS_PRIVATE)); 355 #endif 356 (*rw)->readers = 0; 357 (*rw)->writer = NULL; 358 (*rw)->downgrade = 0; 359 } 360 361 void 362 rumpuser_rw_enter(int enum_rumprwlock, struct rumpuser_rw *rw) 363 { 364 enum rumprwlock lk = enum_rumprwlock; 365 366 switch (lk) { 367 case RUMPUSER_RW_WRITER: 368 do { 369 if (pthread_rwlock_trywrlock(&rw->pthrw) != 0) 370 KLOCK_WRAP(NOFAIL_ERRNO( 371 pthread_rwlock_wrlock(&rw->pthrw))); 372 } while (rw_setwriter(rw, 1) != 0); 373 break; 374 case RUMPUSER_RW_READER: 375 if (pthread_rwlock_tryrdlock(&rw->pthrw) != 0) 376 KLOCK_WRAP(NOFAIL_ERRNO( 377 pthread_rwlock_rdlock(&rw->pthrw))); 378 rw_readup(rw); 379 break; 380 } 381 } 382 383 int 384 rumpuser_rw_tryenter(int enum_rumprwlock, struct rumpuser_rw *rw) 385 { 386 enum rumprwlock lk = enum_rumprwlock; 387 int rv; 388 389 switch (lk) { 390 case RUMPUSER_RW_WRITER: 391 rv = pthread_rwlock_trywrlock(&rw->pthrw); 392 if (rv == 0) 393 rv = rw_setwriter(rw, 0); 394 break; 395 case RUMPUSER_RW_READER: 396 rv = pthread_rwlock_tryrdlock(&rw->pthrw); 397 if (rv == 0) 398 rw_readup(rw); 399 break; 400 default: 401 rv = EINVAL; 402 break; 403 } 404 405 ET(rv); 406 } 407 408 int 409 rumpuser_rw_tryupgrade(struct rumpuser_rw *rw) 410 { 411 412 /* 413 * Not supported by pthreads. Since the caller needs to 414 * back off anyway to avoid deadlock, always failing 415 * is correct. 416 */ 417 ET(EBUSY); 418 } 419 420 /* 421 * convert from exclusive to shared lock without allowing anyone to 422 * obtain an exclusive lock in between. actually, might allow 423 * someone to obtain the lock, we just don't allow that thread to 424 * return from the hypercall with it. 425 */ 426 void 427 rumpuser_rw_downgrade(struct rumpuser_rw *rw) 428 { 429 430 assert(rw->downgrade == 0); 431 rw->downgrade = 1; 432 rumpuser_rw_exit(rw); 433 /* 434 * though the competition can't get out of the hypervisor, it 435 * might have rescheduled itself after we released the lock. 436 * so need a wrap here. 437 */ 438 KLOCK_WRAP(NOFAIL_ERRNO(pthread_rwlock_rdlock(&rw->pthrw))); 439 rw->downgrade = 0; 440 rw_readup(rw); 441 } 442 443 void 444 rumpuser_rw_exit(struct rumpuser_rw *rw) 445 { 446 447 if (rw_nreaders(rw)) 448 rw_readdown(rw); 449 else 450 rw_clearwriter(rw); 451 NOFAIL_ERRNO(pthread_rwlock_unlock(&rw->pthrw)); 452 } 453 454 void 455 rumpuser_rw_destroy(struct rumpuser_rw *rw) 456 { 457 458 NOFAIL_ERRNO(pthread_rwlock_destroy(&rw->pthrw)); 459 #if !defined(__APPLE__) 460 NOFAIL_ERRNO(pthread_spin_destroy(&rw->spin)); 461 #endif 462 free(rw); 463 } 464 465 void 466 rumpuser_rw_held(int enum_rumprwlock, struct rumpuser_rw *rw, int *rv) 467 { 468 enum rumprwlock lk = enum_rumprwlock; 469 470 switch (lk) { 471 case RUMPUSER_RW_WRITER: 472 *rv = rw_amwriter(rw); 473 break; 474 case RUMPUSER_RW_READER: 475 *rv = rw_nreaders(rw); 476 break; 477 } 478 } 479 480 /* 481 * condvar 482 */ 483 484 struct rumpuser_cv { 485 pthread_cond_t pthcv; 486 int nwaiters; 487 }; 488 489 void 490 rumpuser_cv_init(struct rumpuser_cv **cv) 491 { 492 493 NOFAIL(*cv = malloc(sizeof(struct rumpuser_cv))); 494 NOFAIL_ERRNO(pthread_cond_init(&((*cv)->pthcv), NULL)); 495 (*cv)->nwaiters = 0; 496 } 497 498 void 499 rumpuser_cv_destroy(struct rumpuser_cv *cv) 500 { 501 502 NOFAIL_ERRNO(pthread_cond_destroy(&cv->pthcv)); 503 free(cv); 504 } 505 506 static void 507 cv_unschedule(struct rumpuser_mtx *mtx, int *nlocks) 508 { 509 510 rumpkern_unsched(nlocks, mtx); 511 mtxexit(mtx); 512 } 513 514 static void 515 cv_reschedule(struct rumpuser_mtx *mtx, int nlocks) 516 { 517 518 /* 519 * If the cv interlock is a spin mutex, we must first release 520 * the mutex that was reacquired by pthread_cond_wait(), 521 * acquire the CPU context and only then relock the mutex. 522 * This is to preserve resource allocation order so that 523 * we don't deadlock. Non-spinning mutexes don't have this 524 * problem since they don't use a hold-and-wait approach 525 * to acquiring the mutex wrt the rump kernel CPU context. 526 * 527 * The more optimal solution would be to rework rumpkern_sched() 528 * so that it's possible to tell the scheduler 529 * "if you need to block, drop this lock first", but I'm not 530 * going poking there without some numbers on how often this 531 * path is taken for spin mutexes. 532 */ 533 if ((mtx->flags & (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) == 534 (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) { 535 NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx)); 536 rumpkern_sched(nlocks, mtx); 537 rumpuser_mutex_enter_nowrap(mtx); 538 } else { 539 mtxenter(mtx); 540 rumpkern_sched(nlocks, mtx); 541 } 542 } 543 544 void 545 rumpuser_cv_wait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx) 546 { 547 int nlocks; 548 549 cv->nwaiters++; 550 cv_unschedule(mtx, &nlocks); 551 NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx)); 552 cv_reschedule(mtx, nlocks); 553 cv->nwaiters--; 554 } 555 556 void 557 rumpuser_cv_wait_nowrap(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx) 558 { 559 560 cv->nwaiters++; 561 mtxexit(mtx); 562 NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx)); 563 mtxenter(mtx); 564 cv->nwaiters--; 565 } 566 567 int 568 rumpuser_cv_timedwait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx, 569 int64_t sec, int64_t nsec) 570 { 571 struct timespec ts; 572 int rv, nlocks; 573 574 /* 575 * Get clock already here, just in case we will be put to sleep 576 * after releasing the kernel context. 577 * 578 * The condition variables should use CLOCK_MONOTONIC, but since 579 * that's not available everywhere, leave it for another day. 580 */ 581 clock_gettime(CLOCK_REALTIME, &ts); 582 583 cv->nwaiters++; 584 cv_unschedule(mtx, &nlocks); 585 586 ts.tv_sec += sec; 587 ts.tv_nsec += nsec; 588 if (ts.tv_nsec >= 1000*1000*1000) { 589 ts.tv_sec++; 590 ts.tv_nsec -= 1000*1000*1000; 591 } 592 rv = pthread_cond_timedwait(&cv->pthcv, &mtx->pthmtx, &ts); 593 594 cv_reschedule(mtx, nlocks); 595 cv->nwaiters--; 596 597 ET(rv); 598 } 599 600 void 601 rumpuser_cv_signal(struct rumpuser_cv *cv) 602 { 603 604 NOFAIL_ERRNO(pthread_cond_signal(&cv->pthcv)); 605 } 606 607 void 608 rumpuser_cv_broadcast(struct rumpuser_cv *cv) 609 { 610 611 NOFAIL_ERRNO(pthread_cond_broadcast(&cv->pthcv)); 612 } 613 614 void 615 rumpuser_cv_has_waiters(struct rumpuser_cv *cv, int *nwaiters) 616 { 617 618 *nwaiters = cv->nwaiters; 619 } 620 621 /* 622 * curlwp 623 */ 624 625 static pthread_key_t curlwpkey; 626 627 /* 628 * the if0'd curlwp implementation is not used by this hypervisor, 629 * but serves as test code to check that the intended usage works. 630 */ 631 #if 0 632 struct rumpuser_lwp { 633 struct lwp *l; 634 LIST_ENTRY(rumpuser_lwp) l_entries; 635 }; 636 static LIST_HEAD(, rumpuser_lwp) lwps = LIST_HEAD_INITIALIZER(lwps); 637 static pthread_mutex_t lwplock = PTHREAD_MUTEX_INITIALIZER; 638 639 void 640 rumpuser_curlwpop(enum rumplwpop op, struct lwp *l) 641 { 642 struct rumpuser_lwp *rl, *rliter; 643 644 switch (op) { 645 case RUMPUSER_LWP_CREATE: 646 rl = malloc(sizeof(*rl)); 647 rl->l = l; 648 pthread_mutex_lock(&lwplock); 649 LIST_FOREACH(rliter, &lwps, l_entries) { 650 if (rliter->l == l) { 651 fprintf(stderr, "LWP_CREATE: %p exists\n", l); 652 abort(); 653 } 654 } 655 LIST_INSERT_HEAD(&lwps, rl, l_entries); 656 pthread_mutex_unlock(&lwplock); 657 break; 658 case RUMPUSER_LWP_DESTROY: 659 pthread_mutex_lock(&lwplock); 660 LIST_FOREACH(rl, &lwps, l_entries) { 661 if (rl->l == l) 662 break; 663 } 664 if (!rl) { 665 fprintf(stderr, "LWP_DESTROY: %p does not exist\n", l); 666 abort(); 667 } 668 LIST_REMOVE(rl, l_entries); 669 pthread_mutex_unlock(&lwplock); 670 free(rl); 671 break; 672 case RUMPUSER_LWP_SET: 673 assert(pthread_getspecific(curlwpkey) == NULL && l != NULL); 674 675 pthread_mutex_lock(&lwplock); 676 LIST_FOREACH(rl, &lwps, l_entries) { 677 if (rl->l == l) 678 break; 679 } 680 if (!rl) { 681 fprintf(stderr, 682 "LWP_SET: %p does not exist\n", l); 683 abort(); 684 } 685 pthread_mutex_unlock(&lwplock); 686 687 pthread_setspecific(curlwpkey, rl); 688 break; 689 case RUMPUSER_LWP_CLEAR: 690 assert(((struct rumpuser_lwp *) 691 pthread_getspecific(curlwpkey))->l == l); 692 pthread_setspecific(curlwpkey, NULL); 693 break; 694 } 695 } 696 697 struct lwp * 698 rumpuser_curlwp(void) 699 { 700 struct rumpuser_lwp *rl; 701 702 rl = pthread_getspecific(curlwpkey); 703 return rl ? rl->l : NULL; 704 } 705 706 #else 707 708 void 709 rumpuser_curlwpop(int enum_rumplwpop, struct lwp *l) 710 { 711 enum rumplwpop op = enum_rumplwpop; 712 713 switch (op) { 714 case RUMPUSER_LWP_CREATE: 715 break; 716 case RUMPUSER_LWP_DESTROY: 717 break; 718 case RUMPUSER_LWP_SET: 719 assert(pthread_getspecific(curlwpkey) == NULL); 720 pthread_setspecific(curlwpkey, l); 721 break; 722 case RUMPUSER_LWP_CLEAR: 723 assert(pthread_getspecific(curlwpkey) == l); 724 pthread_setspecific(curlwpkey, NULL); 725 break; 726 } 727 } 728 729 struct lwp * 730 rumpuser_curlwp(void) 731 { 732 733 return pthread_getspecific(curlwpkey); 734 } 735 #endif 736 737 738 void 739 rumpuser__thrinit(void) 740 { 741 pthread_key_create(&curlwpkey, NULL); 742 } 743