1 /* $NetBSD: sys_lwp.c,v 1.75 2020/01/30 12:36:38 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2001, 2006, 2007, 2008, 2019, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Nathan J. Williams, and Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Lightweight process (LWP) system calls. See kern_lwp.c for a description 34 * of LWPs. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.75 2020/01/30 12:36:38 ad Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/pool.h> 43 #include <sys/proc.h> 44 #include <sys/types.h> 45 #include <sys/syscallargs.h> 46 #include <sys/kauth.h> 47 #include <sys/kmem.h> 48 #include <sys/ptrace.h> 49 #include <sys/sleepq.h> 50 #include <sys/lwpctl.h> 51 #include <sys/cpu.h> 52 53 #include <uvm/uvm_extern.h> 54 55 #define LWP_UNPARK_MAX 1024 56 57 static const stack_t lwp_ss_init = SS_INIT; 58 59 syncobj_t lwp_park_syncobj = { 60 .sobj_flag = SOBJ_SLEEPQ_NULL, 61 .sobj_unsleep = sleepq_unsleep, 62 .sobj_changepri = sleepq_changepri, 63 .sobj_lendpri = sleepq_lendpri, 64 .sobj_owner = syncobj_noowner, 65 }; 66 67 static void 68 mi_startlwp(void *arg) 69 { 70 struct lwp *l = curlwp; 71 struct proc *p = l->l_proc; 72 73 (p->p_emul->e_startlwp)(arg); 74 75 /* If the process is traced, report lwp creation to a debugger */ 76 if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_CREATE)) == 77 (PSL_TRACED|PSL_TRACELWP_CREATE)) { 78 /* Paranoid check */ 79 mutex_enter(proc_lock); 80 if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_CREATE)) != 81 (PSL_TRACED|PSL_TRACELWP_CREATE)) { 82 mutex_exit(proc_lock); 83 return; 84 } 85 86 mutex_enter(p->p_lock); 87 eventswitch(TRAP_LWP, PTRACE_LWP_CREATE, l->l_lid); 88 } 89 } 90 91 int 92 do_lwp_create(lwp_t *l, void *arg, u_long flags, lwp_t **l2, 93 const sigset_t *sigmask, const stack_t *sigstk) 94 { 95 struct proc *p = l->l_proc; 96 vaddr_t uaddr; 97 int error; 98 99 /* XXX check against resource limits */ 100 101 uaddr = uvm_uarea_alloc(); 102 if (__predict_false(uaddr == 0)) 103 return ENOMEM; 104 105 error = lwp_create(l, p, uaddr, flags & LWP_DETACHED, NULL, 0, 106 mi_startlwp, arg, l2, l->l_class, sigmask, &lwp_ss_init); 107 if (__predict_false(error)) { 108 uvm_uarea_free(uaddr); 109 return error; 110 } 111 112 return 0; 113 } 114 115 int 116 sys__lwp_create(struct lwp *l, const struct sys__lwp_create_args *uap, 117 register_t *retval) 118 { 119 /* { 120 syscallarg(const ucontext_t *) ucp; 121 syscallarg(u_long) flags; 122 syscallarg(lwpid_t *) new_lwp; 123 } */ 124 struct proc *p = l->l_proc; 125 ucontext_t *newuc; 126 lwp_t *l2; 127 int error; 128 129 newuc = kmem_alloc(sizeof(ucontext_t), KM_SLEEP); 130 error = copyin(SCARG(uap, ucp), newuc, p->p_emul->e_ucsize); 131 if (error) 132 goto fail; 133 134 /* validate the ucontext */ 135 if ((newuc->uc_flags & _UC_CPU) == 0) { 136 error = EINVAL; 137 goto fail; 138 } 139 error = cpu_mcontext_validate(l, &newuc->uc_mcontext); 140 if (error) 141 goto fail; 142 143 const sigset_t *sigmask = newuc->uc_flags & _UC_SIGMASK ? 144 &newuc->uc_sigmask : &l->l_sigmask; 145 error = do_lwp_create(l, newuc, SCARG(uap, flags), &l2, sigmask, 146 &SS_INIT); 147 if (error) 148 goto fail; 149 150 error = copyout(&l2->l_lid, SCARG(uap, new_lwp), sizeof(l2->l_lid)); 151 if (error == 0) { 152 lwp_start(l2, SCARG(uap, flags)); 153 return 0; 154 } 155 lwp_exit(l2); 156 fail: 157 kmem_free(newuc, sizeof(ucontext_t)); 158 return error; 159 } 160 161 int 162 sys__lwp_exit(struct lwp *l, const void *v, register_t *retval) 163 { 164 165 lwp_exit(l); 166 return 0; 167 } 168 169 int 170 sys__lwp_self(struct lwp *l, const void *v, register_t *retval) 171 { 172 173 *retval = l->l_lid; 174 return 0; 175 } 176 177 int 178 sys__lwp_getprivate(struct lwp *l, const void *v, register_t *retval) 179 { 180 181 *retval = (uintptr_t)l->l_private; 182 return 0; 183 } 184 185 int 186 sys__lwp_setprivate(struct lwp *l, const struct sys__lwp_setprivate_args *uap, 187 register_t *retval) 188 { 189 /* { 190 syscallarg(void *) ptr; 191 } */ 192 193 return lwp_setprivate(l, SCARG(uap, ptr)); 194 } 195 196 int 197 sys__lwp_suspend(struct lwp *l, const struct sys__lwp_suspend_args *uap, 198 register_t *retval) 199 { 200 /* { 201 syscallarg(lwpid_t) target; 202 } */ 203 struct proc *p = l->l_proc; 204 struct lwp *t; 205 int error; 206 207 mutex_enter(p->p_lock); 208 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) { 209 mutex_exit(p->p_lock); 210 return ESRCH; 211 } 212 213 /* 214 * Check for deadlock, which is only possible when we're suspending 215 * ourself. XXX There is a short race here, as p_nrlwps is only 216 * incremented when an LWP suspends itself on the kernel/user 217 * boundary. It's still possible to kill -9 the process so we 218 * don't bother checking further. 219 */ 220 lwp_lock(t); 221 if ((t == l && p->p_nrlwps == 1) || 222 (l->l_flag & (LW_WCORE | LW_WEXIT)) != 0) { 223 lwp_unlock(t); 224 mutex_exit(p->p_lock); 225 return EDEADLK; 226 } 227 228 /* 229 * Suspend the LWP. XXX If it's on a different CPU, we should wait 230 * for it to be preempted, where it will put itself to sleep. 231 * 232 * Suspension of the current LWP will happen on return to userspace. 233 */ 234 error = lwp_suspend(l, t); 235 if (error) { 236 mutex_exit(p->p_lock); 237 return error; 238 } 239 240 /* 241 * Wait for: 242 * o process exiting 243 * o target LWP suspended 244 * o target LWP not suspended and L_WSUSPEND clear 245 * o target LWP exited 246 */ 247 for (;;) { 248 error = cv_wait_sig(&p->p_lwpcv, p->p_lock); 249 if (error) { 250 error = ERESTART; 251 break; 252 } 253 if (lwp_find(p, SCARG(uap, target)) == NULL) { 254 error = ESRCH; 255 break; 256 } 257 if ((l->l_flag | t->l_flag) & (LW_WCORE | LW_WEXIT)) { 258 error = ERESTART; 259 break; 260 } 261 if (t->l_stat == LSSUSPENDED || 262 (t->l_flag & LW_WSUSPEND) == 0) 263 break; 264 } 265 mutex_exit(p->p_lock); 266 267 return error; 268 } 269 270 int 271 sys__lwp_continue(struct lwp *l, const struct sys__lwp_continue_args *uap, 272 register_t *retval) 273 { 274 /* { 275 syscallarg(lwpid_t) target; 276 } */ 277 int error; 278 struct proc *p = l->l_proc; 279 struct lwp *t; 280 281 error = 0; 282 283 mutex_enter(p->p_lock); 284 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) { 285 mutex_exit(p->p_lock); 286 return ESRCH; 287 } 288 289 lwp_lock(t); 290 lwp_continue(t); 291 mutex_exit(p->p_lock); 292 293 return error; 294 } 295 296 int 297 sys__lwp_wakeup(struct lwp *l, const struct sys__lwp_wakeup_args *uap, 298 register_t *retval) 299 { 300 /* { 301 syscallarg(lwpid_t) target; 302 } */ 303 struct lwp *t; 304 struct proc *p; 305 int error; 306 307 p = l->l_proc; 308 mutex_enter(p->p_lock); 309 310 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) { 311 mutex_exit(p->p_lock); 312 return ESRCH; 313 } 314 315 lwp_lock(t); 316 t->l_flag |= (LW_CANCELLED | LW_UNPARKED); 317 318 if (t->l_stat != LSSLEEP) { 319 lwp_unlock(t); 320 error = ENODEV; 321 } else if ((t->l_flag & LW_SINTR) == 0) { 322 lwp_unlock(t); 323 error = EBUSY; 324 } else { 325 /* Wake it up. lwp_unsleep() will release the LWP lock. */ 326 lwp_unsleep(t, true); 327 error = 0; 328 } 329 330 mutex_exit(p->p_lock); 331 332 return error; 333 } 334 335 int 336 sys__lwp_wait(struct lwp *l, const struct sys__lwp_wait_args *uap, 337 register_t *retval) 338 { 339 /* { 340 syscallarg(lwpid_t) wait_for; 341 syscallarg(lwpid_t *) departed; 342 } */ 343 struct proc *p = l->l_proc; 344 int error; 345 lwpid_t dep; 346 347 mutex_enter(p->p_lock); 348 error = lwp_wait(l, SCARG(uap, wait_for), &dep, false); 349 mutex_exit(p->p_lock); 350 351 if (!error && SCARG(uap, departed)) { 352 error = copyout(&dep, SCARG(uap, departed), sizeof(dep)); 353 } 354 355 return error; 356 } 357 358 int 359 sys__lwp_kill(struct lwp *l, const struct sys__lwp_kill_args *uap, 360 register_t *retval) 361 { 362 /* { 363 syscallarg(lwpid_t) target; 364 syscallarg(int) signo; 365 } */ 366 struct proc *p = l->l_proc; 367 struct lwp *t; 368 ksiginfo_t ksi; 369 int signo = SCARG(uap, signo); 370 int error = 0; 371 372 if ((u_int)signo >= NSIG) 373 return EINVAL; 374 375 KSI_INIT(&ksi); 376 ksi.ksi_signo = signo; 377 ksi.ksi_code = SI_LWP; 378 ksi.ksi_pid = p->p_pid; 379 ksi.ksi_uid = kauth_cred_geteuid(l->l_cred); 380 ksi.ksi_lid = SCARG(uap, target); 381 382 mutex_enter(proc_lock); 383 mutex_enter(p->p_lock); 384 if ((t = lwp_find(p, ksi.ksi_lid)) == NULL) 385 error = ESRCH; 386 else if (signo != 0) 387 kpsignal2(p, &ksi); 388 mutex_exit(p->p_lock); 389 mutex_exit(proc_lock); 390 391 return error; 392 } 393 394 int 395 sys__lwp_detach(struct lwp *l, const struct sys__lwp_detach_args *uap, 396 register_t *retval) 397 { 398 /* { 399 syscallarg(lwpid_t) target; 400 } */ 401 struct proc *p; 402 struct lwp *t; 403 lwpid_t target; 404 int error; 405 406 target = SCARG(uap, target); 407 p = l->l_proc; 408 409 mutex_enter(p->p_lock); 410 411 if (l->l_lid == target) 412 t = l; 413 else { 414 /* 415 * We can't use lwp_find() here because the target might 416 * be a zombie. 417 */ 418 t = radix_tree_lookup_node(&p->p_lwptree, 419 (uint64_t)(target - 1)); 420 KASSERT(t == NULL || t->l_lid == target); 421 } 422 423 /* 424 * If the LWP is already detached, there's nothing to do. 425 * If it's a zombie, we need to clean up after it. LSZOMB 426 * is visible with the proc mutex held. 427 * 428 * After we have detached or released the LWP, kick any 429 * other LWPs that may be sitting in _lwp_wait(), waiting 430 * for the target LWP to exit. 431 */ 432 if (t != NULL && t->l_stat != LSIDL) { 433 if ((t->l_prflag & LPR_DETACHED) == 0) { 434 p->p_ndlwps++; 435 t->l_prflag |= LPR_DETACHED; 436 if (t->l_stat == LSZOMB) { 437 /* Releases proc mutex. */ 438 lwp_free(t, false, false); 439 return 0; 440 } 441 error = 0; 442 443 /* 444 * Have any LWPs sleeping in lwp_wait() recheck 445 * for deadlock. 446 */ 447 cv_broadcast(&p->p_lwpcv); 448 } else 449 error = EINVAL; 450 } else 451 error = ESRCH; 452 453 mutex_exit(p->p_lock); 454 455 return error; 456 } 457 458 int 459 lwp_unpark(const lwpid_t *tp, const u_int ntargets) 460 { 461 uint64_t id; 462 u_int target; 463 int error; 464 proc_t *p; 465 lwp_t *t; 466 467 p = curproc; 468 error = 0; 469 470 rw_enter(&p->p_treelock, RW_READER); 471 for (target = 0; target < ntargets; target++) { 472 /* 473 * We don't bother excluding zombies or idle LWPs here, as 474 * setting LW_UNPARKED on them won't do any harm. 475 */ 476 id = (uint64_t)(tp[target] - 1); 477 t = radix_tree_lookup_node(&p->p_lwptree, id); 478 if (t == NULL) { 479 error = ESRCH; 480 continue; 481 } 482 483 lwp_lock(t); 484 if (t->l_syncobj == &lwp_park_syncobj) { 485 /* 486 * As expected it's parked, so wake it up. 487 * lwp_unsleep() will release the LWP lock. 488 */ 489 lwp_unsleep(t, true); 490 } else { 491 /* 492 * It hasn't parked yet because the wakeup side won 493 * the race, or something else has happened to make 494 * the thread not park. Why doesn't really matter. 495 * Set the operation pending, so that the next call 496 * to _lwp_park() in the LWP returns early. If it 497 * turns out to be a spurious wakeup, no harm done. 498 */ 499 t->l_flag |= LW_UNPARKED; 500 lwp_unlock(t); 501 } 502 } 503 rw_exit(&p->p_treelock); 504 505 return error; 506 } 507 508 int 509 lwp_park(clockid_t clock_id, int flags, struct timespec *ts) 510 { 511 int timo, error; 512 struct timespec start; 513 lwp_t *l; 514 bool timeremain = !(flags & TIMER_ABSTIME) && ts; 515 516 if (ts != NULL) { 517 if ((error = ts2timo(clock_id, flags, ts, &timo, 518 timeremain ? &start : NULL)) != 0) 519 return error; 520 KASSERT(timo != 0); 521 } else { 522 timo = 0; 523 } 524 525 /* 526 * Before going the full route and blocking, check to see if an 527 * unpark op is pending. 528 */ 529 l = curlwp; 530 lwp_lock(l); 531 if ((l->l_flag & (LW_CANCELLED | LW_UNPARKED)) != 0) { 532 l->l_flag &= ~(LW_CANCELLED | LW_UNPARKED); 533 lwp_unlock(l); 534 return EALREADY; 535 } 536 l->l_biglocks = 0; 537 sleepq_enqueue(NULL, l, "parked", &lwp_park_syncobj); 538 error = sleepq_block(timo, true); 539 switch (error) { 540 case EWOULDBLOCK: 541 error = ETIMEDOUT; 542 if (timeremain) 543 memset(ts, 0, sizeof(*ts)); 544 break; 545 case ERESTART: 546 error = EINTR; 547 /*FALLTHROUGH*/ 548 default: 549 if (timeremain) 550 clock_timeleft(clock_id, ts, &start); 551 break; 552 } 553 return error; 554 } 555 556 /* 557 * 'park' an LWP waiting on a user-level synchronisation object. The LWP 558 * will remain parked until another LWP in the same process calls in and 559 * requests that it be unparked. 560 */ 561 int 562 sys____lwp_park60(struct lwp *l, const struct sys____lwp_park60_args *uap, 563 register_t *retval) 564 { 565 /* { 566 syscallarg(clockid_t) clock_id; 567 syscallarg(int) flags; 568 syscallarg(struct timespec *) ts; 569 syscallarg(lwpid_t) unpark; 570 syscallarg(const void *) hint; 571 syscallarg(const void *) unparkhint; 572 } */ 573 struct timespec ts, *tsp; 574 int error; 575 576 if (SCARG(uap, ts) == NULL) 577 tsp = NULL; 578 else { 579 error = copyin(SCARG(uap, ts), &ts, sizeof(ts)); 580 if (error != 0) 581 return error; 582 tsp = &ts; 583 } 584 585 if (SCARG(uap, unpark) != 0) { 586 error = lwp_unpark(&SCARG(uap, unpark), 1); 587 if (error != 0) 588 return error; 589 } 590 591 error = lwp_park(SCARG(uap, clock_id), SCARG(uap, flags), tsp); 592 if (SCARG(uap, ts) != NULL && (SCARG(uap, flags) & TIMER_ABSTIME) == 0) 593 (void)copyout(tsp, SCARG(uap, ts), sizeof(*tsp)); 594 return error; 595 } 596 597 int 598 sys__lwp_unpark(struct lwp *l, const struct sys__lwp_unpark_args *uap, 599 register_t *retval) 600 { 601 /* { 602 syscallarg(lwpid_t) target; 603 syscallarg(const void *) hint; 604 } */ 605 606 return lwp_unpark(&SCARG(uap, target), 1); 607 } 608 609 int 610 sys__lwp_unpark_all(struct lwp *l, const struct sys__lwp_unpark_all_args *uap, 611 register_t *retval) 612 { 613 /* { 614 syscallarg(const lwpid_t *) targets; 615 syscallarg(size_t) ntargets; 616 syscallarg(const void *) hint; 617 } */ 618 lwpid_t targets[32], *tp; 619 int error; 620 u_int ntargets; 621 size_t sz; 622 623 ntargets = SCARG(uap, ntargets); 624 if (SCARG(uap, targets) == NULL) { 625 /* 626 * Let the caller know how much we are willing to do, and 627 * let it unpark the LWPs in blocks. 628 */ 629 *retval = LWP_UNPARK_MAX; 630 return 0; 631 } 632 if (ntargets > LWP_UNPARK_MAX || ntargets == 0) 633 return EINVAL; 634 635 /* 636 * Copy in the target array. If it's a small number of LWPs, then 637 * place the numbers on the stack. 638 */ 639 sz = sizeof(lwpid_t) * ntargets; 640 if (sz <= sizeof(targets)) 641 tp = targets; 642 else 643 tp = kmem_alloc(sz, KM_SLEEP); 644 error = copyin(SCARG(uap, targets), tp, sz); 645 if (error != 0) { 646 if (tp != targets) { 647 kmem_free(tp, sz); 648 } 649 return error; 650 } 651 error = lwp_unpark(tp, ntargets); 652 if (tp != targets) 653 kmem_free(tp, sz); 654 return error; 655 } 656 657 int 658 sys__lwp_setname(struct lwp *l, const struct sys__lwp_setname_args *uap, 659 register_t *retval) 660 { 661 /* { 662 syscallarg(lwpid_t) target; 663 syscallarg(const char *) name; 664 } */ 665 char *name, *oname; 666 lwpid_t target; 667 proc_t *p; 668 lwp_t *t; 669 int error; 670 671 if ((target = SCARG(uap, target)) == 0) 672 target = l->l_lid; 673 674 name = kmem_alloc(MAXCOMLEN, KM_SLEEP); 675 error = copyinstr(SCARG(uap, name), name, MAXCOMLEN, NULL); 676 switch (error) { 677 case ENAMETOOLONG: 678 case 0: 679 name[MAXCOMLEN - 1] = '\0'; 680 break; 681 default: 682 kmem_free(name, MAXCOMLEN); 683 return error; 684 } 685 686 p = curproc; 687 mutex_enter(p->p_lock); 688 if ((t = lwp_find(p, target)) == NULL) { 689 mutex_exit(p->p_lock); 690 kmem_free(name, MAXCOMLEN); 691 return ESRCH; 692 } 693 lwp_lock(t); 694 oname = t->l_name; 695 t->l_name = name; 696 lwp_unlock(t); 697 mutex_exit(p->p_lock); 698 699 if (oname != NULL) 700 kmem_free(oname, MAXCOMLEN); 701 702 return 0; 703 } 704 705 int 706 sys__lwp_getname(struct lwp *l, const struct sys__lwp_getname_args *uap, 707 register_t *retval) 708 { 709 /* { 710 syscallarg(lwpid_t) target; 711 syscallarg(char *) name; 712 syscallarg(size_t) len; 713 } */ 714 char name[MAXCOMLEN]; 715 lwpid_t target; 716 size_t len; 717 proc_t *p; 718 lwp_t *t; 719 720 if ((target = SCARG(uap, target)) == 0) 721 target = l->l_lid; 722 723 p = curproc; 724 mutex_enter(p->p_lock); 725 if ((t = lwp_find(p, target)) == NULL) { 726 mutex_exit(p->p_lock); 727 return ESRCH; 728 } 729 lwp_lock(t); 730 if (t->l_name == NULL) 731 name[0] = '\0'; 732 else 733 strlcpy(name, t->l_name, sizeof(name)); 734 lwp_unlock(t); 735 mutex_exit(p->p_lock); 736 737 len = uimin(SCARG(uap, len), sizeof(name)); 738 739 return copyoutstr(name, SCARG(uap, name), len, NULL); 740 } 741 742 int 743 sys__lwp_ctl(struct lwp *l, const struct sys__lwp_ctl_args *uap, 744 register_t *retval) 745 { 746 /* { 747 syscallarg(int) features; 748 syscallarg(struct lwpctl **) address; 749 } */ 750 int error, features; 751 vaddr_t vaddr; 752 753 features = SCARG(uap, features); 754 features &= ~(LWPCTL_FEATURE_CURCPU | LWPCTL_FEATURE_PCTR); 755 if (features != 0) 756 return ENODEV; 757 if ((error = lwp_ctl_alloc(&vaddr)) != 0) 758 return error; 759 return copyout(&vaddr, SCARG(uap, address), sizeof(void *)); 760 } 761