1 /* $NetBSD: rumpuser_pth.c,v 1.30 2013/05/15 14:52:49 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "rumpuser_port.h" 29 30 #if !defined(lint) 31 __RCSID("$NetBSD: rumpuser_pth.c,v 1.30 2013/05/15 14:52:49 pooka Exp $"); 32 #endif /* !lint */ 33 34 #include <sys/queue.h> 35 36 #include <assert.h> 37 #include <errno.h> 38 #include <fcntl.h> 39 #include <pthread.h> 40 #include <stdlib.h> 41 #include <stdio.h> 42 #include <string.h> 43 #include <stdint.h> 44 #include <unistd.h> 45 46 #include <rump/rumpuser.h> 47 48 #include "rumpuser_int.h" 49 50 int 51 rumpuser_thread_create(void *(*f)(void *), void *arg, const char *thrname, 52 int joinable, int priority, int cpuidx, void **ptcookie) 53 { 54 pthread_t ptid; 55 pthread_t *ptidp; 56 pthread_attr_t pattr; 57 int rv; 58 59 if ((rv = pthread_attr_init(&pattr)) != 0) 60 return rv; 61 62 if (joinable) { 63 NOFAIL(ptidp = malloc(sizeof(*ptidp))); 64 pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_JOINABLE); 65 } else { 66 ptidp = &ptid; 67 pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_DETACHED); 68 } 69 70 rv = pthread_create(ptidp, &pattr, f, arg); 71 #if defined(__NetBSD__) 72 if (rv == 0 && thrname) 73 pthread_setname_np(ptid, thrname, NULL); 74 #elif defined(__linux__) 75 /* 76 * The pthread_setname_np() call varies from one Linux distro to 77 * another. Comment out the call pending autoconf support. 78 */ 79 #if 0 80 if (rv == 0 && thrname) 81 pthread_setname_np(ptid, thrname); 82 #endif 83 #endif 84 85 if (joinable) { 86 assert(ptcookie); 87 *ptcookie = ptidp; 88 } 89 90 pthread_attr_destroy(&pattr); 91 92 ET(rv); 93 } 94 95 __dead void 96 rumpuser_thread_exit(void) 97 { 98 99 pthread_exit(NULL); 100 } 101 102 int 103 rumpuser_thread_join(void *ptcookie) 104 { 105 pthread_t *pt = ptcookie; 106 int rv; 107 108 KLOCK_WRAP((rv = pthread_join(*pt, NULL))); 109 if (rv == 0) 110 free(pt); 111 112 ET(rv); 113 } 114 115 struct rumpuser_mtx { 116 pthread_mutex_t pthmtx; 117 struct lwp *owner; 118 int flags; 119 }; 120 121 void 122 rumpuser_mutex_init(struct rumpuser_mtx **mtx, int flags) 123 { 124 pthread_mutexattr_t att; 125 126 NOFAIL(*mtx = malloc(sizeof(struct rumpuser_mtx))); 127 128 pthread_mutexattr_init(&att); 129 pthread_mutexattr_settype(&att, PTHREAD_MUTEX_ERRORCHECK); 130 NOFAIL_ERRNO(pthread_mutex_init(&((*mtx)->pthmtx), &att)); 131 pthread_mutexattr_destroy(&att); 132 133 (*mtx)->owner = NULL; 134 assert(flags != 0); 135 (*mtx)->flags = flags; 136 } 137 138 static void 139 mtxenter(struct rumpuser_mtx *mtx) 140 { 141 142 if (!(mtx->flags & RUMPUSER_MTX_KMUTEX)) 143 return; 144 145 assert(mtx->owner == NULL); 146 mtx->owner = rumpuser_curlwp(); 147 } 148 149 static void 150 mtxexit(struct rumpuser_mtx *mtx) 151 { 152 153 if (!(mtx->flags & RUMPUSER_MTX_KMUTEX)) 154 return; 155 156 assert(mtx->owner != NULL); 157 mtx->owner = NULL; 158 } 159 160 void 161 rumpuser_mutex_enter(struct rumpuser_mtx *mtx) 162 { 163 164 if (mtx->flags & RUMPUSER_MTX_SPIN) { 165 rumpuser_mutex_enter_nowrap(mtx); 166 return; 167 } 168 169 assert(mtx->flags & RUMPUSER_MTX_KMUTEX); 170 if (pthread_mutex_trylock(&mtx->pthmtx) != 0) 171 KLOCK_WRAP(NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx))); 172 mtxenter(mtx); 173 } 174 175 void 176 rumpuser_mutex_enter_nowrap(struct rumpuser_mtx *mtx) 177 { 178 179 assert(mtx->flags & RUMPUSER_MTX_SPIN); 180 NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx)); 181 mtxenter(mtx); 182 } 183 184 int 185 rumpuser_mutex_tryenter(struct rumpuser_mtx *mtx) 186 { 187 int rv; 188 189 rv = pthread_mutex_trylock(&mtx->pthmtx); 190 if (rv == 0) { 191 mtxenter(mtx); 192 } 193 194 ET(rv); 195 } 196 197 void 198 rumpuser_mutex_exit(struct rumpuser_mtx *mtx) 199 { 200 201 mtxexit(mtx); 202 NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx)); 203 } 204 205 void 206 rumpuser_mutex_destroy(struct rumpuser_mtx *mtx) 207 { 208 209 NOFAIL_ERRNO(pthread_mutex_destroy(&mtx->pthmtx)); 210 free(mtx); 211 } 212 213 void 214 rumpuser_mutex_owner(struct rumpuser_mtx *mtx, struct lwp **lp) 215 { 216 217 if (__predict_false(!(mtx->flags & RUMPUSER_MTX_KMUTEX))) { 218 printf("panic: rumpuser_mutex_held unsupported on non-kmtx\n"); 219 abort(); 220 } 221 222 *lp = mtx->owner; 223 } 224 225 /* 226 * rwlocks. these are mostly simple, except that NetBSD wants to 227 * support something called downgrade, which means we need to swap 228 * our exclusive lock for a shared lock. to accommodate this, 229 * we need to check *after* acquiring a lock in case someone was 230 * downgrading it. if so, we couldn't actually have it and maybe 231 * need to retry later. 232 */ 233 234 struct rumpuser_rw { 235 pthread_rwlock_t pthrw; 236 pthread_spinlock_t spin; 237 int readers; 238 struct lwp *writer; 239 int downgrade; /* someone is downgrading (hopefully lock holder ;) */ 240 }; 241 242 static int 243 rw_amwriter(struct rumpuser_rw *rw) 244 { 245 246 return rw->writer == rumpuser_curlwp() && rw->readers == -1; 247 } 248 249 static int 250 rw_nreaders(struct rumpuser_rw *rw) 251 { 252 253 return rw->readers > 0 ? rw->readers : 0; 254 } 255 256 static int 257 rw_setwriter(struct rumpuser_rw *rw, int retry) 258 { 259 260 /* 261 * Don't need the spinlock here, we already have an 262 * exclusive lock and "downgrade" is stable until complete. 263 */ 264 if (rw->downgrade) { 265 pthread_rwlock_unlock(&rw->pthrw); 266 if (retry) { 267 struct timespec ts; 268 269 /* portable yield, essentially */ 270 ts.tv_sec = 0; 271 ts.tv_nsec = 1; 272 KLOCK_WRAP(nanosleep(&ts, NULL)); 273 } 274 return EBUSY; 275 } 276 assert(rw->readers == 0); 277 rw->writer = rumpuser_curlwp(); 278 rw->readers = -1; 279 return 0; 280 } 281 282 static void 283 rw_clearwriter(struct rumpuser_rw *rw) 284 { 285 286 assert(rw_amwriter(rw)); 287 rw->readers = 0; 288 rw->writer = NULL; 289 } 290 291 static void 292 rw_readup(struct rumpuser_rw *rw) 293 { 294 295 pthread_spin_lock(&rw->spin); 296 assert(rw->readers >= 0); 297 ++rw->readers; 298 pthread_spin_unlock(&rw->spin); 299 } 300 301 static void 302 rw_readdown(struct rumpuser_rw *rw) 303 { 304 305 pthread_spin_lock(&rw->spin); 306 assert(rw->readers > 0); 307 --rw->readers; 308 pthread_spin_unlock(&rw->spin); 309 } 310 311 void 312 rumpuser_rw_init(struct rumpuser_rw **rw) 313 { 314 315 NOFAIL(*rw = malloc(sizeof(struct rumpuser_rw))); 316 NOFAIL_ERRNO(pthread_rwlock_init(&((*rw)->pthrw), NULL)); 317 NOFAIL_ERRNO(pthread_spin_init(&((*rw)->spin),PTHREAD_PROCESS_PRIVATE)); 318 (*rw)->readers = 0; 319 (*rw)->writer = NULL; 320 (*rw)->downgrade = 0; 321 } 322 323 void 324 rumpuser_rw_enter(int enum_rumprwlock, struct rumpuser_rw *rw) 325 { 326 enum rumprwlock lk = enum_rumprwlock; 327 328 switch (lk) { 329 case RUMPUSER_RW_WRITER: 330 do { 331 if (pthread_rwlock_trywrlock(&rw->pthrw) != 0) 332 KLOCK_WRAP(NOFAIL_ERRNO( 333 pthread_rwlock_wrlock(&rw->pthrw))); 334 } while (rw_setwriter(rw, 1) != 0); 335 break; 336 case RUMPUSER_RW_READER: 337 if (pthread_rwlock_tryrdlock(&rw->pthrw) != 0) 338 KLOCK_WRAP(NOFAIL_ERRNO( 339 pthread_rwlock_rdlock(&rw->pthrw))); 340 rw_readup(rw); 341 break; 342 } 343 } 344 345 int 346 rumpuser_rw_tryenter(int enum_rumprwlock, struct rumpuser_rw *rw) 347 { 348 enum rumprwlock lk = enum_rumprwlock; 349 int rv; 350 351 switch (lk) { 352 case RUMPUSER_RW_WRITER: 353 rv = pthread_rwlock_trywrlock(&rw->pthrw); 354 if (rv == 0) 355 rv = rw_setwriter(rw, 0); 356 break; 357 case RUMPUSER_RW_READER: 358 rv = pthread_rwlock_tryrdlock(&rw->pthrw); 359 if (rv == 0) 360 rw_readup(rw); 361 break; 362 default: 363 rv = EINVAL; 364 break; 365 } 366 367 ET(rv); 368 } 369 370 int 371 rumpuser_rw_tryupgrade(struct rumpuser_rw *rw) 372 { 373 374 /* 375 * Not supported by pthreads. Since the caller needs to 376 * back off anyway to avoid deadlock, always failing 377 * is correct. 378 */ 379 ET(EBUSY); 380 } 381 382 /* 383 * convert from exclusive to shared lock without allowing anyone to 384 * obtain an exclusive lock in between. actually, might allow 385 * someone to obtain the lock, we just don't allow that thread to 386 * return from the hypercall with it. 387 */ 388 void 389 rumpuser_rw_downgrade(struct rumpuser_rw *rw) 390 { 391 392 assert(rw->downgrade == 0); 393 rw->downgrade = 1; 394 rumpuser_rw_exit(rw); 395 /* 396 * though the competition can't get out of the hypervisor, it 397 * might have rescheduled itself after we released the lock. 398 * so need a wrap here. 399 */ 400 KLOCK_WRAP(NOFAIL_ERRNO(pthread_rwlock_rdlock(&rw->pthrw))); 401 rw->downgrade = 0; 402 rw_readup(rw); 403 } 404 405 void 406 rumpuser_rw_exit(struct rumpuser_rw *rw) 407 { 408 409 if (rw_nreaders(rw)) 410 rw_readdown(rw); 411 else 412 rw_clearwriter(rw); 413 NOFAIL_ERRNO(pthread_rwlock_unlock(&rw->pthrw)); 414 } 415 416 void 417 rumpuser_rw_destroy(struct rumpuser_rw *rw) 418 { 419 420 NOFAIL_ERRNO(pthread_rwlock_destroy(&rw->pthrw)); 421 NOFAIL_ERRNO(pthread_spin_destroy(&rw->spin)); 422 free(rw); 423 } 424 425 void 426 rumpuser_rw_held(int enum_rumprwlock, struct rumpuser_rw *rw, int *rv) 427 { 428 enum rumprwlock lk = enum_rumprwlock; 429 430 switch (lk) { 431 case RUMPUSER_RW_WRITER: 432 *rv = rw_amwriter(rw); 433 break; 434 case RUMPUSER_RW_READER: 435 *rv = rw_nreaders(rw); 436 break; 437 } 438 } 439 440 /* 441 * condvar 442 */ 443 444 struct rumpuser_cv { 445 pthread_cond_t pthcv; 446 int nwaiters; 447 }; 448 449 void 450 rumpuser_cv_init(struct rumpuser_cv **cv) 451 { 452 453 NOFAIL(*cv = malloc(sizeof(struct rumpuser_cv))); 454 NOFAIL_ERRNO(pthread_cond_init(&((*cv)->pthcv), NULL)); 455 (*cv)->nwaiters = 0; 456 } 457 458 void 459 rumpuser_cv_destroy(struct rumpuser_cv *cv) 460 { 461 462 NOFAIL_ERRNO(pthread_cond_destroy(&cv->pthcv)); 463 free(cv); 464 } 465 466 static void 467 cv_unschedule(struct rumpuser_mtx *mtx, int *nlocks) 468 { 469 470 rumpkern_unsched(nlocks, mtx); 471 mtxexit(mtx); 472 } 473 474 static void 475 cv_reschedule(struct rumpuser_mtx *mtx, int nlocks) 476 { 477 478 /* 479 * If the cv interlock is a spin mutex, we must first release 480 * the mutex that was reacquired by pthread_cond_wait(), 481 * acquire the CPU context and only then relock the mutex. 482 * This is to preserve resource allocation order so that 483 * we don't deadlock. Non-spinning mutexes don't have this 484 * problem since they don't use a hold-and-wait approach 485 * to acquiring the mutex wrt the rump kernel CPU context. 486 * 487 * The more optimal solution would be to rework rumpkern_sched() 488 * so that it's possible to tell the scheduler 489 * "if you need to block, drop this lock first", but I'm not 490 * going poking there without some numbers on how often this 491 * path is taken for spin mutexes. 492 */ 493 if ((mtx->flags & (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) == 494 (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) { 495 NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx)); 496 rumpkern_sched(nlocks, mtx); 497 rumpuser_mutex_enter_nowrap(mtx); 498 } else { 499 mtxenter(mtx); 500 rumpkern_sched(nlocks, mtx); 501 } 502 } 503 504 void 505 rumpuser_cv_wait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx) 506 { 507 int nlocks; 508 509 cv->nwaiters++; 510 cv_unschedule(mtx, &nlocks); 511 NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx)); 512 cv_reschedule(mtx, nlocks); 513 cv->nwaiters--; 514 } 515 516 void 517 rumpuser_cv_wait_nowrap(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx) 518 { 519 520 cv->nwaiters++; 521 mtxexit(mtx); 522 NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx)); 523 mtxenter(mtx); 524 cv->nwaiters--; 525 } 526 527 int 528 rumpuser_cv_timedwait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx, 529 int64_t sec, int64_t nsec) 530 { 531 struct timespec ts; 532 int rv, nlocks; 533 534 /* 535 * Get clock already here, just in case we will be put to sleep 536 * after releasing the kernel context. 537 * 538 * The condition variables should use CLOCK_MONOTONIC, but since 539 * that's not available everywhere, leave it for another day. 540 */ 541 clock_gettime(CLOCK_REALTIME, &ts); 542 543 cv->nwaiters++; 544 cv_unschedule(mtx, &nlocks); 545 546 ts.tv_sec += sec; 547 ts.tv_nsec += nsec; 548 if (ts.tv_nsec >= 1000*1000*1000) { 549 ts.tv_sec++; 550 ts.tv_nsec -= 1000*1000*1000; 551 } 552 rv = pthread_cond_timedwait(&cv->pthcv, &mtx->pthmtx, &ts); 553 554 cv_reschedule(mtx, nlocks); 555 cv->nwaiters--; 556 557 ET(rv); 558 } 559 560 void 561 rumpuser_cv_signal(struct rumpuser_cv *cv) 562 { 563 564 NOFAIL_ERRNO(pthread_cond_signal(&cv->pthcv)); 565 } 566 567 void 568 rumpuser_cv_broadcast(struct rumpuser_cv *cv) 569 { 570 571 NOFAIL_ERRNO(pthread_cond_broadcast(&cv->pthcv)); 572 } 573 574 void 575 rumpuser_cv_has_waiters(struct rumpuser_cv *cv, int *nwaiters) 576 { 577 578 *nwaiters = cv->nwaiters; 579 } 580 581 /* 582 * curlwp 583 */ 584 585 static pthread_key_t curlwpkey; 586 587 /* 588 * the if0'd curlwp implementation is not used by this hypervisor, 589 * but serves as test code to check that the intended usage works. 590 */ 591 #if 0 592 struct rumpuser_lwp { 593 struct lwp *l; 594 LIST_ENTRY(rumpuser_lwp) l_entries; 595 }; 596 static LIST_HEAD(, rumpuser_lwp) lwps = LIST_HEAD_INITIALIZER(lwps); 597 static pthread_mutex_t lwplock = PTHREAD_MUTEX_INITIALIZER; 598 599 void 600 rumpuser_curlwpop(enum rumplwpop op, struct lwp *l) 601 { 602 struct rumpuser_lwp *rl, *rliter; 603 604 switch (op) { 605 case RUMPUSER_LWP_CREATE: 606 rl = malloc(sizeof(*rl)); 607 rl->l = l; 608 pthread_mutex_lock(&lwplock); 609 LIST_FOREACH(rliter, &lwps, l_entries) { 610 if (rliter->l == l) { 611 fprintf(stderr, "LWP_CREATE: %p exists\n", l); 612 abort(); 613 } 614 } 615 LIST_INSERT_HEAD(&lwps, rl, l_entries); 616 pthread_mutex_unlock(&lwplock); 617 break; 618 case RUMPUSER_LWP_DESTROY: 619 pthread_mutex_lock(&lwplock); 620 LIST_FOREACH(rl, &lwps, l_entries) { 621 if (rl->l == l) 622 break; 623 } 624 if (!rl) { 625 fprintf(stderr, "LWP_DESTROY: %p does not exist\n", l); 626 abort(); 627 } 628 LIST_REMOVE(rl, l_entries); 629 pthread_mutex_unlock(&lwplock); 630 free(rl); 631 break; 632 case RUMPUSER_LWP_SET: 633 assert(pthread_getspecific(curlwpkey) == NULL && l != NULL); 634 635 pthread_mutex_lock(&lwplock); 636 LIST_FOREACH(rl, &lwps, l_entries) { 637 if (rl->l == l) 638 break; 639 } 640 if (!rl) { 641 fprintf(stderr, 642 "LWP_SET: %p does not exist\n", l); 643 abort(); 644 } 645 pthread_mutex_unlock(&lwplock); 646 647 pthread_setspecific(curlwpkey, rl); 648 break; 649 case RUMPUSER_LWP_CLEAR: 650 assert(((struct rumpuser_lwp *) 651 pthread_getspecific(curlwpkey))->l == l); 652 pthread_setspecific(curlwpkey, NULL); 653 break; 654 } 655 } 656 657 struct lwp * 658 rumpuser_curlwp(void) 659 { 660 struct rumpuser_lwp *rl; 661 662 rl = pthread_getspecific(curlwpkey); 663 return rl ? rl->l : NULL; 664 } 665 666 #else 667 668 void 669 rumpuser_curlwpop(int enum_rumplwpop, struct lwp *l) 670 { 671 enum rumplwpop op = enum_rumplwpop; 672 673 switch (op) { 674 case RUMPUSER_LWP_CREATE: 675 break; 676 case RUMPUSER_LWP_DESTROY: 677 break; 678 case RUMPUSER_LWP_SET: 679 assert(pthread_getspecific(curlwpkey) == NULL); 680 pthread_setspecific(curlwpkey, l); 681 break; 682 case RUMPUSER_LWP_CLEAR: 683 assert(pthread_getspecific(curlwpkey) == l); 684 pthread_setspecific(curlwpkey, NULL); 685 break; 686 } 687 } 688 689 struct lwp * 690 rumpuser_curlwp(void) 691 { 692 693 return pthread_getspecific(curlwpkey); 694 } 695 #endif 696 697 698 void 699 rumpuser__thrinit(void) 700 { 701 pthread_key_create(&curlwpkey, NULL); 702 } 703