1 /* $NetBSD: sys_lwp.c,v 1.84 2023/07/17 12:54:29 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2001, 2006, 2007, 2008, 2019, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Nathan J. Williams, and Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Lightweight process (LWP) system calls. See kern_lwp.c for a description 34 * of LWPs. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.84 2023/07/17 12:54:29 riastradh Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/pool.h> 43 #include <sys/proc.h> 44 #include <sys/types.h> 45 #include <sys/syscallargs.h> 46 #include <sys/kauth.h> 47 #include <sys/kmem.h> 48 #include <sys/ptrace.h> 49 #include <sys/sleepq.h> 50 #include <sys/lwpctl.h> 51 #include <sys/cpu.h> 52 #include <sys/pserialize.h> 53 54 #include <uvm/uvm_extern.h> 55 56 #define LWP_UNPARK_MAX 1024 57 58 static const stack_t lwp_ss_init = SS_INIT; 59 60 syncobj_t lwp_park_syncobj = { 61 .sobj_name = "lwp_park", 62 .sobj_flag = SOBJ_SLEEPQ_NULL, 63 .sobj_unsleep = sleepq_unsleep, 64 .sobj_changepri = sleepq_changepri, 65 .sobj_lendpri = sleepq_lendpri, 66 .sobj_owner = syncobj_noowner, 67 }; 68 69 static void 70 mi_startlwp(void *arg) 71 { 72 struct lwp *l = curlwp; 73 struct proc *p = l->l_proc; 74 75 (p->p_emul->e_startlwp)(arg); 76 77 /* If the process is traced, report lwp creation to a debugger */ 78 if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_CREATE)) == 79 (PSL_TRACED|PSL_TRACELWP_CREATE)) { 80 /* Paranoid check */ 81 mutex_enter(&proc_lock); 82 if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_CREATE)) != 83 (PSL_TRACED|PSL_TRACELWP_CREATE)) { 84 mutex_exit(&proc_lock); 85 return; 86 } 87 88 mutex_enter(p->p_lock); 89 eventswitch(TRAP_LWP, PTRACE_LWP_CREATE, l->l_lid); 90 } 91 } 92 93 int 94 do_lwp_create(lwp_t *l, void *arg, u_long flags, lwp_t **l2, 95 const sigset_t *sigmask, const stack_t *sigstk) 96 { 97 struct proc *p = l->l_proc; 98 vaddr_t uaddr; 99 int error; 100 101 /* XXX check against resource limits */ 102 103 uaddr = uvm_uarea_alloc(); 104 if (__predict_false(uaddr == 0)) 105 return ENOMEM; 106 107 error = lwp_create(l, p, uaddr, flags & LWP_DETACHED, NULL, 0, 108 mi_startlwp, arg, l2, l->l_class, sigmask, &lwp_ss_init); 109 if (__predict_false(error)) { 110 uvm_uarea_free(uaddr); 111 return error; 112 } 113 114 return 0; 115 } 116 117 int 118 sys__lwp_create(struct lwp *l, const struct sys__lwp_create_args *uap, 119 register_t *retval) 120 { 121 /* { 122 syscallarg(const ucontext_t *) ucp; 123 syscallarg(u_long) flags; 124 syscallarg(lwpid_t *) new_lwp; 125 } */ 126 struct proc *p = l->l_proc; 127 ucontext_t *newuc; 128 lwp_t *l2; 129 int error; 130 131 newuc = kmem_alloc(sizeof(ucontext_t), KM_SLEEP); 132 error = copyin(SCARG(uap, ucp), newuc, p->p_emul->e_ucsize); 133 if (error) 134 goto fail; 135 136 /* validate the ucontext */ 137 if ((newuc->uc_flags & _UC_CPU) == 0) { 138 error = EINVAL; 139 goto fail; 140 } 141 error = cpu_mcontext_validate(l, &newuc->uc_mcontext); 142 if (error) 143 goto fail; 144 145 const sigset_t *sigmask = newuc->uc_flags & _UC_SIGMASK ? 146 &newuc->uc_sigmask : &l->l_sigmask; 147 error = do_lwp_create(l, newuc, SCARG(uap, flags), &l2, sigmask, 148 &SS_INIT); 149 if (error) 150 goto fail; 151 152 error = copyout(&l2->l_lid, SCARG(uap, new_lwp), sizeof(l2->l_lid)); 153 if (error == 0) { 154 lwp_start(l2, SCARG(uap, flags)); 155 return 0; 156 } 157 lwp_exit(l2); 158 fail: 159 kmem_free(newuc, sizeof(ucontext_t)); 160 return error; 161 } 162 163 int 164 sys__lwp_exit(struct lwp *l, const void *v, register_t *retval) 165 { 166 167 lwp_exit(l); 168 return 0; 169 } 170 171 int 172 sys__lwp_self(struct lwp *l, const void *v, register_t *retval) 173 { 174 175 *retval = l->l_lid; 176 return 0; 177 } 178 179 int 180 sys__lwp_getprivate(struct lwp *l, const void *v, register_t *retval) 181 { 182 183 *retval = (uintptr_t)l->l_private; 184 return 0; 185 } 186 187 int 188 sys__lwp_setprivate(struct lwp *l, const struct sys__lwp_setprivate_args *uap, 189 register_t *retval) 190 { 191 /* { 192 syscallarg(void *) ptr; 193 } */ 194 195 return lwp_setprivate(l, SCARG(uap, ptr)); 196 } 197 198 int 199 sys__lwp_suspend(struct lwp *l, const struct sys__lwp_suspend_args *uap, 200 register_t *retval) 201 { 202 /* { 203 syscallarg(lwpid_t) target; 204 } */ 205 struct proc *p = l->l_proc; 206 struct lwp *t; 207 int error; 208 209 mutex_enter(p->p_lock); 210 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) { 211 mutex_exit(p->p_lock); 212 return ESRCH; 213 } 214 215 /* 216 * Check for deadlock, which is only possible when we're suspending 217 * ourself. XXX There is a short race here, as p_nrlwps is only 218 * incremented when an LWP suspends itself on the kernel/user 219 * boundary. It's still possible to kill -9 the process so we 220 * don't bother checking further. 221 */ 222 lwp_lock(t); 223 if ((t == l && p->p_nrlwps == 1) || 224 (l->l_flag & (LW_WCORE | LW_WEXIT)) != 0) { 225 lwp_unlock(t); 226 mutex_exit(p->p_lock); 227 return EDEADLK; 228 } 229 230 /* 231 * Suspend the LWP. XXX If it's on a different CPU, we should wait 232 * for it to be preempted, where it will put itself to sleep. 233 * 234 * Suspension of the current LWP will happen on return to userspace. 235 */ 236 error = lwp_suspend(l, t); 237 if (error) { 238 mutex_exit(p->p_lock); 239 return error; 240 } 241 242 /* 243 * Wait for: 244 * o process exiting 245 * o target LWP suspended 246 * o target LWP not suspended and L_WSUSPEND clear 247 * o target LWP exited 248 */ 249 for (;;) { 250 error = cv_wait_sig(&p->p_lwpcv, p->p_lock); 251 if (error) { 252 error = ERESTART; 253 break; 254 } 255 if (lwp_find(p, SCARG(uap, target)) == NULL) { 256 error = ESRCH; 257 break; 258 } 259 if ((l->l_flag | t->l_flag) & (LW_WCORE | LW_WEXIT)) { 260 error = ERESTART; 261 break; 262 } 263 if (t->l_stat == LSSUSPENDED || 264 (t->l_flag & LW_WSUSPEND) == 0) 265 break; 266 } 267 mutex_exit(p->p_lock); 268 269 return error; 270 } 271 272 int 273 sys__lwp_continue(struct lwp *l, const struct sys__lwp_continue_args *uap, 274 register_t *retval) 275 { 276 /* { 277 syscallarg(lwpid_t) target; 278 } */ 279 int error; 280 struct proc *p = l->l_proc; 281 struct lwp *t; 282 283 error = 0; 284 285 mutex_enter(p->p_lock); 286 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) { 287 mutex_exit(p->p_lock); 288 return ESRCH; 289 } 290 291 lwp_lock(t); 292 lwp_continue(t); 293 mutex_exit(p->p_lock); 294 295 return error; 296 } 297 298 int 299 sys__lwp_wakeup(struct lwp *l, const struct sys__lwp_wakeup_args *uap, 300 register_t *retval) 301 { 302 /* { 303 syscallarg(lwpid_t) target; 304 } */ 305 struct lwp *t; 306 struct proc *p; 307 int error; 308 309 p = l->l_proc; 310 mutex_enter(p->p_lock); 311 312 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) { 313 mutex_exit(p->p_lock); 314 return ESRCH; 315 } 316 317 lwp_lock(t); 318 t->l_flag |= (LW_CANCELLED | LW_UNPARKED); 319 320 if (t->l_stat != LSSLEEP) { 321 lwp_unlock(t); 322 error = ENODEV; 323 } else if ((t->l_flag & LW_SINTR) == 0) { 324 lwp_unlock(t); 325 error = EBUSY; 326 } else { 327 /* Wake it up. lwp_unsleep() will release the LWP lock. */ 328 lwp_unsleep(t, true); 329 error = 0; 330 } 331 332 mutex_exit(p->p_lock); 333 334 return error; 335 } 336 337 int 338 sys__lwp_wait(struct lwp *l, const struct sys__lwp_wait_args *uap, 339 register_t *retval) 340 { 341 /* { 342 syscallarg(lwpid_t) wait_for; 343 syscallarg(lwpid_t *) departed; 344 } */ 345 struct proc *p = l->l_proc; 346 int error; 347 lwpid_t dep; 348 349 mutex_enter(p->p_lock); 350 error = lwp_wait(l, SCARG(uap, wait_for), &dep, false); 351 mutex_exit(p->p_lock); 352 353 if (!error && SCARG(uap, departed)) { 354 error = copyout(&dep, SCARG(uap, departed), sizeof(dep)); 355 } 356 357 return error; 358 } 359 360 int 361 sys__lwp_kill(struct lwp *l, const struct sys__lwp_kill_args *uap, 362 register_t *retval) 363 { 364 /* { 365 syscallarg(lwpid_t) target; 366 syscallarg(int) signo; 367 } */ 368 struct proc *p = l->l_proc; 369 struct lwp *t; 370 ksiginfo_t ksi; 371 int signo = SCARG(uap, signo); 372 int error = 0; 373 374 if ((u_int)signo >= NSIG) 375 return EINVAL; 376 377 KSI_INIT(&ksi); 378 ksi.ksi_signo = signo; 379 ksi.ksi_code = SI_LWP; 380 ksi.ksi_pid = p->p_pid; 381 ksi.ksi_uid = kauth_cred_geteuid(l->l_cred); 382 ksi.ksi_lid = SCARG(uap, target); 383 384 mutex_enter(&proc_lock); 385 mutex_enter(p->p_lock); 386 if ((t = lwp_find(p, ksi.ksi_lid)) == NULL) 387 error = ESRCH; 388 else if (signo != 0) 389 kpsignal2(p, &ksi); 390 mutex_exit(p->p_lock); 391 mutex_exit(&proc_lock); 392 393 return error; 394 } 395 396 int 397 sys__lwp_detach(struct lwp *l, const struct sys__lwp_detach_args *uap, 398 register_t *retval) 399 { 400 /* { 401 syscallarg(lwpid_t) target; 402 } */ 403 struct proc *p; 404 struct lwp *t; 405 lwpid_t target; 406 int error; 407 408 target = SCARG(uap, target); 409 p = l->l_proc; 410 411 mutex_enter(p->p_lock); 412 413 if (l->l_lid == target) 414 t = l; 415 else { 416 /* 417 * We can't use lwp_find() here because the target might 418 * be a zombie. 419 */ 420 t = proc_find_lwp(p, target); 421 KASSERT(t == NULL || t->l_lid == target); 422 } 423 424 /* 425 * If the LWP is already detached, there's nothing to do. 426 * If it's a zombie, we need to clean up after it. LSZOMB 427 * is visible with the proc mutex held. 428 * 429 * After we have detached or released the LWP, kick any 430 * other LWPs that may be sitting in _lwp_wait(), waiting 431 * for the target LWP to exit. 432 */ 433 if (t != NULL && t->l_stat != LSIDL) { 434 if ((t->l_prflag & LPR_DETACHED) == 0) { 435 p->p_ndlwps++; 436 t->l_prflag |= LPR_DETACHED; 437 if (t->l_stat == LSZOMB) { 438 /* Releases proc mutex. */ 439 lwp_free(t, false, false); 440 return 0; 441 } 442 error = 0; 443 444 /* 445 * Have any LWPs sleeping in lwp_wait() recheck 446 * for deadlock. 447 */ 448 cv_broadcast(&p->p_lwpcv); 449 } else 450 error = EINVAL; 451 } else 452 error = ESRCH; 453 454 mutex_exit(p->p_lock); 455 456 return error; 457 } 458 459 int 460 lwp_unpark(const lwpid_t *tp, const u_int ntargets) 461 { 462 u_int target; 463 int error, s; 464 proc_t *p; 465 lwp_t *t; 466 467 p = curproc; 468 error = 0; 469 470 s = pserialize_read_enter(); 471 for (target = 0; target < ntargets; target++) { 472 t = proc_find_lwp_unlocked(p, tp[target]); 473 if (__predict_false(t == NULL)) { 474 error = ESRCH; 475 continue; 476 } 477 478 KASSERT(lwp_locked(t, NULL)); 479 480 if (__predict_true(t->l_syncobj == &lwp_park_syncobj)) { 481 /* 482 * As expected it's parked, so wake it up. 483 * lwp_unsleep() will release the LWP lock. 484 */ 485 lwp_unsleep(t, true); 486 } else if (__predict_false(t->l_stat == LSZOMB)) { 487 lwp_unlock(t); 488 error = ESRCH; 489 } else { 490 /* 491 * It hasn't parked yet because the wakeup side won 492 * the race, or something else has happened to make 493 * the thread not park. Why doesn't really matter. 494 * Set the operation pending, so that the next call 495 * to _lwp_park() in the LWP returns early. If it 496 * turns out to be a spurious wakeup, no harm done. 497 */ 498 t->l_flag |= LW_UNPARKED; 499 lwp_unlock(t); 500 } 501 } 502 pserialize_read_exit(s); 503 504 return error; 505 } 506 507 int 508 lwp_park(clockid_t clock_id, int flags, struct timespec *ts) 509 { 510 int timo, error; 511 struct timespec start; 512 lwp_t *l; 513 bool timeremain = !(flags & TIMER_ABSTIME) && ts; 514 515 if (ts != NULL) { 516 if ((error = ts2timo(clock_id, flags, ts, &timo, 517 timeremain ? &start : NULL)) != 0) 518 return error; 519 KASSERT(timo != 0); 520 } else { 521 timo = 0; 522 } 523 524 /* 525 * Before going the full route and blocking, check to see if an 526 * unpark op is pending. 527 */ 528 l = curlwp; 529 lwp_lock(l); 530 if ((l->l_flag & (LW_CANCELLED | LW_UNPARKED)) != 0) { 531 l->l_flag &= ~(LW_CANCELLED | LW_UNPARKED); 532 lwp_unlock(l); 533 return EALREADY; 534 } 535 l->l_biglocks = 0; 536 sleepq_enqueue(NULL, l, "parked", &lwp_park_syncobj, true); 537 error = sleepq_block(timo, true, &lwp_park_syncobj); 538 switch (error) { 539 case EWOULDBLOCK: 540 error = ETIMEDOUT; 541 if (timeremain) 542 memset(ts, 0, sizeof(*ts)); 543 break; 544 case ERESTART: 545 error = EINTR; 546 /*FALLTHROUGH*/ 547 default: 548 if (timeremain) 549 clock_timeleft(clock_id, ts, &start); 550 break; 551 } 552 return error; 553 } 554 555 /* 556 * 'park' an LWP waiting on a user-level synchronisation object. The LWP 557 * will remain parked until another LWP in the same process calls in and 558 * requests that it be unparked. 559 */ 560 int 561 sys____lwp_park60(struct lwp *l, const struct sys____lwp_park60_args *uap, 562 register_t *retval) 563 { 564 /* { 565 syscallarg(clockid_t) clock_id; 566 syscallarg(int) flags; 567 syscallarg(struct timespec *) ts; 568 syscallarg(lwpid_t) unpark; 569 syscallarg(const void *) hint; 570 syscallarg(const void *) unparkhint; 571 } */ 572 struct timespec ts, *tsp; 573 int error; 574 575 if (SCARG(uap, ts) == NULL) 576 tsp = NULL; 577 else { 578 error = copyin(SCARG(uap, ts), &ts, sizeof(ts)); 579 if (error != 0) 580 return error; 581 tsp = &ts; 582 } 583 584 if (SCARG(uap, unpark) != 0) { 585 error = lwp_unpark(&SCARG(uap, unpark), 1); 586 if (error != 0) 587 return error; 588 } 589 590 error = lwp_park(SCARG(uap, clock_id), SCARG(uap, flags), tsp); 591 if (SCARG(uap, ts) != NULL && (SCARG(uap, flags) & TIMER_ABSTIME) == 0) 592 (void)copyout(tsp, SCARG(uap, ts), sizeof(*tsp)); 593 return error; 594 } 595 596 int 597 sys__lwp_unpark(struct lwp *l, const struct sys__lwp_unpark_args *uap, 598 register_t *retval) 599 { 600 /* { 601 syscallarg(lwpid_t) target; 602 syscallarg(const void *) hint; 603 } */ 604 605 return lwp_unpark(&SCARG(uap, target), 1); 606 } 607 608 int 609 sys__lwp_unpark_all(struct lwp *l, const struct sys__lwp_unpark_all_args *uap, 610 register_t *retval) 611 { 612 /* { 613 syscallarg(const lwpid_t *) targets; 614 syscallarg(size_t) ntargets; 615 syscallarg(const void *) hint; 616 } */ 617 lwpid_t targets[32], *tp; 618 int error; 619 u_int ntargets; 620 size_t sz; 621 622 ntargets = SCARG(uap, ntargets); 623 if (SCARG(uap, targets) == NULL) { 624 /* 625 * Let the caller know how much we are willing to do, and 626 * let it unpark the LWPs in blocks. 627 */ 628 *retval = LWP_UNPARK_MAX; 629 return 0; 630 } 631 if (ntargets > LWP_UNPARK_MAX || ntargets == 0) 632 return EINVAL; 633 634 /* 635 * Copy in the target array. If it's a small number of LWPs, then 636 * place the numbers on the stack. 637 */ 638 sz = sizeof(lwpid_t) * ntargets; 639 if (sz <= sizeof(targets)) 640 tp = targets; 641 else 642 tp = kmem_alloc(sz, KM_SLEEP); 643 error = copyin(SCARG(uap, targets), tp, sz); 644 if (error != 0) { 645 if (tp != targets) { 646 kmem_free(tp, sz); 647 } 648 return error; 649 } 650 error = lwp_unpark(tp, ntargets); 651 if (tp != targets) 652 kmem_free(tp, sz); 653 return error; 654 } 655 656 int 657 sys__lwp_setname(struct lwp *l, const struct sys__lwp_setname_args *uap, 658 register_t *retval) 659 { 660 /* { 661 syscallarg(lwpid_t) target; 662 syscallarg(const char *) name; 663 } */ 664 char *name, *oname; 665 lwpid_t target; 666 proc_t *p; 667 lwp_t *t; 668 int error; 669 670 if ((target = SCARG(uap, target)) == 0) 671 target = l->l_lid; 672 673 name = kmem_alloc(MAXCOMLEN, KM_SLEEP); 674 error = copyinstr(SCARG(uap, name), name, MAXCOMLEN, NULL); 675 switch (error) { 676 case ENAMETOOLONG: 677 case 0: 678 name[MAXCOMLEN - 1] = '\0'; 679 break; 680 default: 681 kmem_free(name, MAXCOMLEN); 682 return error; 683 } 684 685 p = curproc; 686 mutex_enter(p->p_lock); 687 if ((t = lwp_find(p, target)) == NULL) { 688 mutex_exit(p->p_lock); 689 kmem_free(name, MAXCOMLEN); 690 return ESRCH; 691 } 692 lwp_lock(t); 693 oname = t->l_name; 694 t->l_name = name; 695 lwp_unlock(t); 696 mutex_exit(p->p_lock); 697 698 if (oname != NULL) 699 kmem_free(oname, MAXCOMLEN); 700 701 return 0; 702 } 703 704 int 705 sys__lwp_getname(struct lwp *l, const struct sys__lwp_getname_args *uap, 706 register_t *retval) 707 { 708 /* { 709 syscallarg(lwpid_t) target; 710 syscallarg(char *) name; 711 syscallarg(size_t) len; 712 } */ 713 char name[MAXCOMLEN]; 714 lwpid_t target; 715 size_t len; 716 proc_t *p; 717 lwp_t *t; 718 719 if ((target = SCARG(uap, target)) == 0) 720 target = l->l_lid; 721 722 p = curproc; 723 mutex_enter(p->p_lock); 724 if ((t = lwp_find(p, target)) == NULL) { 725 mutex_exit(p->p_lock); 726 return ESRCH; 727 } 728 lwp_lock(t); 729 if (t->l_name == NULL) 730 name[0] = '\0'; 731 else 732 strlcpy(name, t->l_name, sizeof(name)); 733 lwp_unlock(t); 734 mutex_exit(p->p_lock); 735 736 len = uimin(SCARG(uap, len), sizeof(name)); 737 738 return copyoutstr(name, SCARG(uap, name), len, NULL); 739 } 740 741 int 742 sys__lwp_ctl(struct lwp *l, const struct sys__lwp_ctl_args *uap, 743 register_t *retval) 744 { 745 /* { 746 syscallarg(int) features; 747 syscallarg(struct lwpctl **) address; 748 } */ 749 int error, features; 750 vaddr_t vaddr; 751 752 features = SCARG(uap, features); 753 features &= ~(LWPCTL_FEATURE_CURCPU | LWPCTL_FEATURE_PCTR); 754 if (features != 0) 755 return ENODEV; 756 if ((error = lwp_ctl_alloc(&vaddr)) != 0) 757 return error; 758 return copyout(&vaddr, SCARG(uap, address), sizeof(void *)); 759 } 760