1 /* $NetBSD: sys_lwp.c,v 1.82 2020/05/23 23:42:43 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2001, 2006, 2007, 2008, 2019, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Nathan J. Williams, and Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Lightweight process (LWP) system calls. See kern_lwp.c for a description 34 * of LWPs. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.82 2020/05/23 23:42:43 ad Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/pool.h> 43 #include <sys/proc.h> 44 #include <sys/types.h> 45 #include <sys/syscallargs.h> 46 #include <sys/kauth.h> 47 #include <sys/kmem.h> 48 #include <sys/ptrace.h> 49 #include <sys/sleepq.h> 50 #include <sys/lwpctl.h> 51 #include <sys/cpu.h> 52 #include <sys/pserialize.h> 53 54 #include <uvm/uvm_extern.h> 55 56 #define LWP_UNPARK_MAX 1024 57 58 static const stack_t lwp_ss_init = SS_INIT; 59 60 syncobj_t lwp_park_syncobj = { 61 .sobj_flag = SOBJ_SLEEPQ_NULL, 62 .sobj_unsleep = sleepq_unsleep, 63 .sobj_changepri = sleepq_changepri, 64 .sobj_lendpri = sleepq_lendpri, 65 .sobj_owner = syncobj_noowner, 66 }; 67 68 static void 69 mi_startlwp(void *arg) 70 { 71 struct lwp *l = curlwp; 72 struct proc *p = l->l_proc; 73 74 (p->p_emul->e_startlwp)(arg); 75 76 /* If the process is traced, report lwp creation to a debugger */ 77 if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_CREATE)) == 78 (PSL_TRACED|PSL_TRACELWP_CREATE)) { 79 /* Paranoid check */ 80 mutex_enter(&proc_lock); 81 if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_CREATE)) != 82 (PSL_TRACED|PSL_TRACELWP_CREATE)) { 83 mutex_exit(&proc_lock); 84 return; 85 } 86 87 mutex_enter(p->p_lock); 88 eventswitch(TRAP_LWP, PTRACE_LWP_CREATE, l->l_lid); 89 } 90 } 91 92 int 93 do_lwp_create(lwp_t *l, void *arg, u_long flags, lwp_t **l2, 94 const sigset_t *sigmask, const stack_t *sigstk) 95 { 96 struct proc *p = l->l_proc; 97 vaddr_t uaddr; 98 int error; 99 100 /* XXX check against resource limits */ 101 102 uaddr = uvm_uarea_alloc(); 103 if (__predict_false(uaddr == 0)) 104 return ENOMEM; 105 106 error = lwp_create(l, p, uaddr, flags & LWP_DETACHED, NULL, 0, 107 mi_startlwp, arg, l2, l->l_class, sigmask, &lwp_ss_init); 108 if (__predict_false(error)) { 109 uvm_uarea_free(uaddr); 110 return error; 111 } 112 113 return 0; 114 } 115 116 int 117 sys__lwp_create(struct lwp *l, const struct sys__lwp_create_args *uap, 118 register_t *retval) 119 { 120 /* { 121 syscallarg(const ucontext_t *) ucp; 122 syscallarg(u_long) flags; 123 syscallarg(lwpid_t *) new_lwp; 124 } */ 125 struct proc *p = l->l_proc; 126 ucontext_t *newuc; 127 lwp_t *l2; 128 int error; 129 130 newuc = kmem_alloc(sizeof(ucontext_t), KM_SLEEP); 131 error = copyin(SCARG(uap, ucp), newuc, p->p_emul->e_ucsize); 132 if (error) 133 goto fail; 134 135 /* validate the ucontext */ 136 if ((newuc->uc_flags & _UC_CPU) == 0) { 137 error = EINVAL; 138 goto fail; 139 } 140 error = cpu_mcontext_validate(l, &newuc->uc_mcontext); 141 if (error) 142 goto fail; 143 144 const sigset_t *sigmask = newuc->uc_flags & _UC_SIGMASK ? 145 &newuc->uc_sigmask : &l->l_sigmask; 146 error = do_lwp_create(l, newuc, SCARG(uap, flags), &l2, sigmask, 147 &SS_INIT); 148 if (error) 149 goto fail; 150 151 error = copyout(&l2->l_lid, SCARG(uap, new_lwp), sizeof(l2->l_lid)); 152 if (error == 0) { 153 lwp_start(l2, SCARG(uap, flags)); 154 return 0; 155 } 156 lwp_exit(l2); 157 fail: 158 kmem_free(newuc, sizeof(ucontext_t)); 159 return error; 160 } 161 162 int 163 sys__lwp_exit(struct lwp *l, const void *v, register_t *retval) 164 { 165 166 lwp_exit(l); 167 return 0; 168 } 169 170 int 171 sys__lwp_self(struct lwp *l, const void *v, register_t *retval) 172 { 173 174 *retval = l->l_lid; 175 return 0; 176 } 177 178 int 179 sys__lwp_getprivate(struct lwp *l, const void *v, register_t *retval) 180 { 181 182 *retval = (uintptr_t)l->l_private; 183 return 0; 184 } 185 186 int 187 sys__lwp_setprivate(struct lwp *l, const struct sys__lwp_setprivate_args *uap, 188 register_t *retval) 189 { 190 /* { 191 syscallarg(void *) ptr; 192 } */ 193 194 return lwp_setprivate(l, SCARG(uap, ptr)); 195 } 196 197 int 198 sys__lwp_suspend(struct lwp *l, const struct sys__lwp_suspend_args *uap, 199 register_t *retval) 200 { 201 /* { 202 syscallarg(lwpid_t) target; 203 } */ 204 struct proc *p = l->l_proc; 205 struct lwp *t; 206 int error; 207 208 mutex_enter(p->p_lock); 209 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) { 210 mutex_exit(p->p_lock); 211 return ESRCH; 212 } 213 214 /* 215 * Check for deadlock, which is only possible when we're suspending 216 * ourself. XXX There is a short race here, as p_nrlwps is only 217 * incremented when an LWP suspends itself on the kernel/user 218 * boundary. It's still possible to kill -9 the process so we 219 * don't bother checking further. 220 */ 221 lwp_lock(t); 222 if ((t == l && p->p_nrlwps == 1) || 223 (l->l_flag & (LW_WCORE | LW_WEXIT)) != 0) { 224 lwp_unlock(t); 225 mutex_exit(p->p_lock); 226 return EDEADLK; 227 } 228 229 /* 230 * Suspend the LWP. XXX If it's on a different CPU, we should wait 231 * for it to be preempted, where it will put itself to sleep. 232 * 233 * Suspension of the current LWP will happen on return to userspace. 234 */ 235 error = lwp_suspend(l, t); 236 if (error) { 237 mutex_exit(p->p_lock); 238 return error; 239 } 240 241 /* 242 * Wait for: 243 * o process exiting 244 * o target LWP suspended 245 * o target LWP not suspended and L_WSUSPEND clear 246 * o target LWP exited 247 */ 248 for (;;) { 249 error = cv_wait_sig(&p->p_lwpcv, p->p_lock); 250 if (error) { 251 error = ERESTART; 252 break; 253 } 254 if (lwp_find(p, SCARG(uap, target)) == NULL) { 255 error = ESRCH; 256 break; 257 } 258 if ((l->l_flag | t->l_flag) & (LW_WCORE | LW_WEXIT)) { 259 error = ERESTART; 260 break; 261 } 262 if (t->l_stat == LSSUSPENDED || 263 (t->l_flag & LW_WSUSPEND) == 0) 264 break; 265 } 266 mutex_exit(p->p_lock); 267 268 return error; 269 } 270 271 int 272 sys__lwp_continue(struct lwp *l, const struct sys__lwp_continue_args *uap, 273 register_t *retval) 274 { 275 /* { 276 syscallarg(lwpid_t) target; 277 } */ 278 int error; 279 struct proc *p = l->l_proc; 280 struct lwp *t; 281 282 error = 0; 283 284 mutex_enter(p->p_lock); 285 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) { 286 mutex_exit(p->p_lock); 287 return ESRCH; 288 } 289 290 lwp_lock(t); 291 lwp_continue(t); 292 mutex_exit(p->p_lock); 293 294 return error; 295 } 296 297 int 298 sys__lwp_wakeup(struct lwp *l, const struct sys__lwp_wakeup_args *uap, 299 register_t *retval) 300 { 301 /* { 302 syscallarg(lwpid_t) target; 303 } */ 304 struct lwp *t; 305 struct proc *p; 306 int error; 307 308 p = l->l_proc; 309 mutex_enter(p->p_lock); 310 311 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) { 312 mutex_exit(p->p_lock); 313 return ESRCH; 314 } 315 316 lwp_lock(t); 317 t->l_flag |= (LW_CANCELLED | LW_UNPARKED); 318 319 if (t->l_stat != LSSLEEP) { 320 lwp_unlock(t); 321 error = ENODEV; 322 } else if ((t->l_flag & LW_SINTR) == 0) { 323 lwp_unlock(t); 324 error = EBUSY; 325 } else { 326 /* Wake it up. lwp_unsleep() will release the LWP lock. */ 327 lwp_unsleep(t, true); 328 error = 0; 329 } 330 331 mutex_exit(p->p_lock); 332 333 return error; 334 } 335 336 int 337 sys__lwp_wait(struct lwp *l, const struct sys__lwp_wait_args *uap, 338 register_t *retval) 339 { 340 /* { 341 syscallarg(lwpid_t) wait_for; 342 syscallarg(lwpid_t *) departed; 343 } */ 344 struct proc *p = l->l_proc; 345 int error; 346 lwpid_t dep; 347 348 mutex_enter(p->p_lock); 349 error = lwp_wait(l, SCARG(uap, wait_for), &dep, false); 350 mutex_exit(p->p_lock); 351 352 if (!error && SCARG(uap, departed)) { 353 error = copyout(&dep, SCARG(uap, departed), sizeof(dep)); 354 } 355 356 return error; 357 } 358 359 int 360 sys__lwp_kill(struct lwp *l, const struct sys__lwp_kill_args *uap, 361 register_t *retval) 362 { 363 /* { 364 syscallarg(lwpid_t) target; 365 syscallarg(int) signo; 366 } */ 367 struct proc *p = l->l_proc; 368 struct lwp *t; 369 ksiginfo_t ksi; 370 int signo = SCARG(uap, signo); 371 int error = 0; 372 373 if ((u_int)signo >= NSIG) 374 return EINVAL; 375 376 KSI_INIT(&ksi); 377 ksi.ksi_signo = signo; 378 ksi.ksi_code = SI_LWP; 379 ksi.ksi_pid = p->p_pid; 380 ksi.ksi_uid = kauth_cred_geteuid(l->l_cred); 381 ksi.ksi_lid = SCARG(uap, target); 382 383 mutex_enter(&proc_lock); 384 mutex_enter(p->p_lock); 385 if ((t = lwp_find(p, ksi.ksi_lid)) == NULL) 386 error = ESRCH; 387 else if (signo != 0) 388 kpsignal2(p, &ksi); 389 mutex_exit(p->p_lock); 390 mutex_exit(&proc_lock); 391 392 return error; 393 } 394 395 int 396 sys__lwp_detach(struct lwp *l, const struct sys__lwp_detach_args *uap, 397 register_t *retval) 398 { 399 /* { 400 syscallarg(lwpid_t) target; 401 } */ 402 struct proc *p; 403 struct lwp *t; 404 lwpid_t target; 405 int error; 406 407 target = SCARG(uap, target); 408 p = l->l_proc; 409 410 mutex_enter(p->p_lock); 411 412 if (l->l_lid == target) 413 t = l; 414 else { 415 /* 416 * We can't use lwp_find() here because the target might 417 * be a zombie. 418 */ 419 t = proc_find_lwp(p, target); 420 KASSERT(t == NULL || t->l_lid == target); 421 } 422 423 /* 424 * If the LWP is already detached, there's nothing to do. 425 * If it's a zombie, we need to clean up after it. LSZOMB 426 * is visible with the proc mutex held. 427 * 428 * After we have detached or released the LWP, kick any 429 * other LWPs that may be sitting in _lwp_wait(), waiting 430 * for the target LWP to exit. 431 */ 432 if (t != NULL && t->l_stat != LSIDL) { 433 if ((t->l_prflag & LPR_DETACHED) == 0) { 434 p->p_ndlwps++; 435 t->l_prflag |= LPR_DETACHED; 436 if (t->l_stat == LSZOMB) { 437 /* Releases proc mutex. */ 438 lwp_free(t, false, false); 439 return 0; 440 } 441 error = 0; 442 443 /* 444 * Have any LWPs sleeping in lwp_wait() recheck 445 * for deadlock. 446 */ 447 cv_broadcast(&p->p_lwpcv); 448 } else 449 error = EINVAL; 450 } else 451 error = ESRCH; 452 453 mutex_exit(p->p_lock); 454 455 return error; 456 } 457 458 int 459 lwp_unpark(const lwpid_t *tp, const u_int ntargets) 460 { 461 u_int target; 462 int error, s; 463 proc_t *p; 464 lwp_t *t; 465 466 p = curproc; 467 error = 0; 468 469 s = pserialize_read_enter(); 470 for (target = 0; target < ntargets; target++) { 471 t = proc_find_lwp_unlocked(p, tp[target]); 472 if (__predict_false(t == NULL)) { 473 error = ESRCH; 474 continue; 475 } 476 477 KASSERT(lwp_locked(t, NULL)); 478 479 if (__predict_true(t->l_syncobj == &lwp_park_syncobj)) { 480 /* 481 * As expected it's parked, so wake it up. 482 * lwp_unsleep() will release the LWP lock. 483 */ 484 lwp_unsleep(t, true); 485 } else if (__predict_false(t->l_stat == LSZOMB)) { 486 lwp_unlock(t); 487 error = ESRCH; 488 } else { 489 /* 490 * It hasn't parked yet because the wakeup side won 491 * the race, or something else has happened to make 492 * the thread not park. Why doesn't really matter. 493 * Set the operation pending, so that the next call 494 * to _lwp_park() in the LWP returns early. If it 495 * turns out to be a spurious wakeup, no harm done. 496 */ 497 t->l_flag |= LW_UNPARKED; 498 lwp_unlock(t); 499 } 500 } 501 pserialize_read_exit(s); 502 503 return error; 504 } 505 506 int 507 lwp_park(clockid_t clock_id, int flags, struct timespec *ts) 508 { 509 int timo, error; 510 struct timespec start; 511 lwp_t *l; 512 bool timeremain = !(flags & TIMER_ABSTIME) && ts; 513 514 if (ts != NULL) { 515 if ((error = ts2timo(clock_id, flags, ts, &timo, 516 timeremain ? &start : NULL)) != 0) 517 return error; 518 KASSERT(timo != 0); 519 } else { 520 timo = 0; 521 } 522 523 /* 524 * Before going the full route and blocking, check to see if an 525 * unpark op is pending. 526 */ 527 l = curlwp; 528 lwp_lock(l); 529 if ((l->l_flag & (LW_CANCELLED | LW_UNPARKED)) != 0) { 530 l->l_flag &= ~(LW_CANCELLED | LW_UNPARKED); 531 lwp_unlock(l); 532 return EALREADY; 533 } 534 l->l_biglocks = 0; 535 sleepq_enqueue(NULL, l, "parked", &lwp_park_syncobj, true); 536 error = sleepq_block(timo, true); 537 switch (error) { 538 case EWOULDBLOCK: 539 error = ETIMEDOUT; 540 if (timeremain) 541 memset(ts, 0, sizeof(*ts)); 542 break; 543 case ERESTART: 544 error = EINTR; 545 /*FALLTHROUGH*/ 546 default: 547 if (timeremain) 548 clock_timeleft(clock_id, ts, &start); 549 break; 550 } 551 return error; 552 } 553 554 /* 555 * 'park' an LWP waiting on a user-level synchronisation object. The LWP 556 * will remain parked until another LWP in the same process calls in and 557 * requests that it be unparked. 558 */ 559 int 560 sys____lwp_park60(struct lwp *l, const struct sys____lwp_park60_args *uap, 561 register_t *retval) 562 { 563 /* { 564 syscallarg(clockid_t) clock_id; 565 syscallarg(int) flags; 566 syscallarg(struct timespec *) ts; 567 syscallarg(lwpid_t) unpark; 568 syscallarg(const void *) hint; 569 syscallarg(const void *) unparkhint; 570 } */ 571 struct timespec ts, *tsp; 572 int error; 573 574 if (SCARG(uap, ts) == NULL) 575 tsp = NULL; 576 else { 577 error = copyin(SCARG(uap, ts), &ts, sizeof(ts)); 578 if (error != 0) 579 return error; 580 tsp = &ts; 581 } 582 583 if (SCARG(uap, unpark) != 0) { 584 error = lwp_unpark(&SCARG(uap, unpark), 1); 585 if (error != 0) 586 return error; 587 } 588 589 error = lwp_park(SCARG(uap, clock_id), SCARG(uap, flags), tsp); 590 if (SCARG(uap, ts) != NULL && (SCARG(uap, flags) & TIMER_ABSTIME) == 0) 591 (void)copyout(tsp, SCARG(uap, ts), sizeof(*tsp)); 592 return error; 593 } 594 595 int 596 sys__lwp_unpark(struct lwp *l, const struct sys__lwp_unpark_args *uap, 597 register_t *retval) 598 { 599 /* { 600 syscallarg(lwpid_t) target; 601 syscallarg(const void *) hint; 602 } */ 603 604 return lwp_unpark(&SCARG(uap, target), 1); 605 } 606 607 int 608 sys__lwp_unpark_all(struct lwp *l, const struct sys__lwp_unpark_all_args *uap, 609 register_t *retval) 610 { 611 /* { 612 syscallarg(const lwpid_t *) targets; 613 syscallarg(size_t) ntargets; 614 syscallarg(const void *) hint; 615 } */ 616 lwpid_t targets[32], *tp; 617 int error; 618 u_int ntargets; 619 size_t sz; 620 621 ntargets = SCARG(uap, ntargets); 622 if (SCARG(uap, targets) == NULL) { 623 /* 624 * Let the caller know how much we are willing to do, and 625 * let it unpark the LWPs in blocks. 626 */ 627 *retval = LWP_UNPARK_MAX; 628 return 0; 629 } 630 if (ntargets > LWP_UNPARK_MAX || ntargets == 0) 631 return EINVAL; 632 633 /* 634 * Copy in the target array. If it's a small number of LWPs, then 635 * place the numbers on the stack. 636 */ 637 sz = sizeof(lwpid_t) * ntargets; 638 if (sz <= sizeof(targets)) 639 tp = targets; 640 else 641 tp = kmem_alloc(sz, KM_SLEEP); 642 error = copyin(SCARG(uap, targets), tp, sz); 643 if (error != 0) { 644 if (tp != targets) { 645 kmem_free(tp, sz); 646 } 647 return error; 648 } 649 error = lwp_unpark(tp, ntargets); 650 if (tp != targets) 651 kmem_free(tp, sz); 652 return error; 653 } 654 655 int 656 sys__lwp_setname(struct lwp *l, const struct sys__lwp_setname_args *uap, 657 register_t *retval) 658 { 659 /* { 660 syscallarg(lwpid_t) target; 661 syscallarg(const char *) name; 662 } */ 663 char *name, *oname; 664 lwpid_t target; 665 proc_t *p; 666 lwp_t *t; 667 int error; 668 669 if ((target = SCARG(uap, target)) == 0) 670 target = l->l_lid; 671 672 name = kmem_alloc(MAXCOMLEN, KM_SLEEP); 673 error = copyinstr(SCARG(uap, name), name, MAXCOMLEN, NULL); 674 switch (error) { 675 case ENAMETOOLONG: 676 case 0: 677 name[MAXCOMLEN - 1] = '\0'; 678 break; 679 default: 680 kmem_free(name, MAXCOMLEN); 681 return error; 682 } 683 684 p = curproc; 685 mutex_enter(p->p_lock); 686 if ((t = lwp_find(p, target)) == NULL) { 687 mutex_exit(p->p_lock); 688 kmem_free(name, MAXCOMLEN); 689 return ESRCH; 690 } 691 lwp_lock(t); 692 oname = t->l_name; 693 t->l_name = name; 694 lwp_unlock(t); 695 mutex_exit(p->p_lock); 696 697 if (oname != NULL) 698 kmem_free(oname, MAXCOMLEN); 699 700 return 0; 701 } 702 703 int 704 sys__lwp_getname(struct lwp *l, const struct sys__lwp_getname_args *uap, 705 register_t *retval) 706 { 707 /* { 708 syscallarg(lwpid_t) target; 709 syscallarg(char *) name; 710 syscallarg(size_t) len; 711 } */ 712 char name[MAXCOMLEN]; 713 lwpid_t target; 714 size_t len; 715 proc_t *p; 716 lwp_t *t; 717 718 if ((target = SCARG(uap, target)) == 0) 719 target = l->l_lid; 720 721 p = curproc; 722 mutex_enter(p->p_lock); 723 if ((t = lwp_find(p, target)) == NULL) { 724 mutex_exit(p->p_lock); 725 return ESRCH; 726 } 727 lwp_lock(t); 728 if (t->l_name == NULL) 729 name[0] = '\0'; 730 else 731 strlcpy(name, t->l_name, sizeof(name)); 732 lwp_unlock(t); 733 mutex_exit(p->p_lock); 734 735 len = uimin(SCARG(uap, len), sizeof(name)); 736 737 return copyoutstr(name, SCARG(uap, name), len, NULL); 738 } 739 740 int 741 sys__lwp_ctl(struct lwp *l, const struct sys__lwp_ctl_args *uap, 742 register_t *retval) 743 { 744 /* { 745 syscallarg(int) features; 746 syscallarg(struct lwpctl **) address; 747 } */ 748 int error, features; 749 vaddr_t vaddr; 750 751 features = SCARG(uap, features); 752 features &= ~(LWPCTL_FEATURE_CURCPU | LWPCTL_FEATURE_PCTR); 753 if (features != 0) 754 return ENODEV; 755 if ((error = lwp_ctl_alloc(&vaddr)) != 0) 756 return error; 757 return copyout(&vaddr, SCARG(uap, address), sizeof(void *)); 758 } 759