1 /* $NetBSD: linux_sched.c,v 1.83 2024/10/03 12:56:49 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2019 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center; by Matthias Scheler. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Linux compatibility module. Try to deal with scheduler related syscalls. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.83 2024/10/03 12:56:49 hannken Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/mount.h> 42 #include <sys/proc.h> 43 #include <sys/systm.h> 44 #include <sys/sysctl.h> 45 #include <sys/syscallargs.h> 46 #include <sys/wait.h> 47 #include <sys/kauth.h> 48 #include <sys/ptrace.h> 49 #include <sys/atomic.h> 50 51 #include <sys/cpu.h> 52 53 #include <compat/linux/common/linux_types.h> 54 #include <compat/linux/common/linux_signal.h> 55 #include <compat/linux/common/linux_emuldata.h> 56 #include <compat/linux/common/linux_ipc.h> 57 #include <compat/linux/common/linux_sem.h> 58 #include <compat/linux/common/linux_exec.h> 59 #include <compat/linux/common/linux_machdep.h> 60 61 #include <compat/linux/linux_syscallargs.h> 62 63 #include <compat/linux/common/linux_sched.h> 64 65 static int linux_clone_nptl(struct lwp *, const struct linux_sys_clone_args *, 66 register_t *); 67 68 /* Unlike Linux, dynamically calculate CPU mask size */ 69 #define LINUX_CPU_MASK_SIZE (sizeof(long) * ((ncpu + LONG_BIT - 1) / LONG_BIT)) 70 71 #if DEBUG_LINUX 72 #define DPRINTF(x, ...) uprintf(x, __VA_ARGS__) 73 #else 74 #define DPRINTF(x, ...) 75 #endif 76 77 static void 78 linux_child_return(void *arg) 79 { 80 struct lwp *l = arg; 81 struct proc *p = l->l_proc; 82 struct linux_emuldata *led = l->l_emuldata; 83 void *ctp = led->led_child_tidptr; 84 int error; 85 86 if (ctp) { 87 if ((error = copyout(&p->p_pid, ctp, sizeof(p->p_pid))) != 0) 88 printf("%s: LINUX_CLONE_CHILD_SETTID " 89 "failed (child_tidptr = %p, tid = %d error =%d)\n", 90 __func__, ctp, p->p_pid, error); 91 } 92 child_return(arg); 93 } 94 95 int 96 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, 97 register_t *retval) 98 { 99 /* { 100 syscallarg(int) flags; 101 syscallarg(void *) stack; 102 syscallarg(void *) parent_tidptr; 103 syscallarg(void *) tls; 104 syscallarg(void *) child_tidptr; 105 } */ 106 struct linux_emuldata *led; 107 int flags, sig, error; 108 109 /* 110 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags. 111 */ 112 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE)) 113 return EINVAL; 114 115 /* 116 * Thread group implies shared signals. Shared signals 117 * imply shared VM. This matches what Linux kernel does. 118 */ 119 if (SCARG(uap, flags) & LINUX_CLONE_THREAD 120 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0) 121 return EINVAL; 122 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND 123 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0) 124 return EINVAL; 125 126 /* 127 * The thread group flavor is implemented totally differently. 128 */ 129 if (SCARG(uap, flags) & LINUX_CLONE_THREAD) 130 return linux_clone_nptl(l, uap, retval); 131 132 flags = 0; 133 if (SCARG(uap, flags) & LINUX_CLONE_VM) 134 flags |= FORK_SHAREVM; 135 if (SCARG(uap, flags) & LINUX_CLONE_FS) 136 flags |= FORK_SHARECWD; 137 if (SCARG(uap, flags) & LINUX_CLONE_FILES) 138 flags |= FORK_SHAREFILES; 139 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) 140 flags |= FORK_SHARESIGS; 141 if (SCARG(uap, flags) & LINUX_CLONE_VFORK) 142 flags |= FORK_PPWAIT; 143 144 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL; 145 if (sig < 0 || sig >= LINUX__NSIG) 146 return EINVAL; 147 sig = linux_to_native_signo[sig]; 148 149 if (SCARG(uap, flags) & LINUX_CLONE_CHILD_SETTID) { 150 led = l->l_emuldata; 151 led->led_child_tidptr = SCARG(uap, child_tidptr); 152 } 153 154 /* 155 * Note that Linux does not provide a portable way of specifying 156 * the stack area; the caller must know if the stack grows up 157 * or down. So, we pass a stack size of 0, so that the code 158 * that makes this adjustment is a noop. 159 */ 160 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0, 161 linux_child_return, NULL, retval)) != 0) { 162 DPRINTF("%s: fork1: error %d\n", __func__, error); 163 return error; 164 } 165 166 return 0; 167 } 168 169 170 int 171 linux_sys_clone3(struct lwp *l, const struct linux_sys_clone3_args *uap, register_t *retval) 172 { 173 struct linux_user_clone3_args cl_args; 174 struct linux_sys_clone_args clone_args; 175 int error; 176 177 if (SCARG(uap, size) != sizeof(cl_args)) { 178 DPRINTF("%s: Invalid size less or more\n", __func__); 179 return EINVAL; 180 } 181 182 error = copyin(SCARG(uap, cl_args), &cl_args, SCARG(uap, size)); 183 if (error) { 184 DPRINTF("%s: Copyin failed: %d\n", __func__, error); 185 return error; 186 } 187 188 DPRINTF("%s: Flags: %#jx\n", __func__, (intmax_t)cl_args.flags); 189 190 /* Define allowed flags */ 191 if (cl_args.flags & LINUX_CLONE_UNIMPLEMENTED_FLAGS) { 192 DPRINTF("%s: Unsupported flags for clone3: %#" PRIx64 "\n", 193 __func__, cl_args.flags & LINUX_CLONE_UNIMPLEMENTED_FLAGS); 194 return EOPNOTSUPP; 195 } 196 if (cl_args.flags & ~LINUX_CLONE_ALLOWED_FLAGS) { 197 DPRINTF("%s: Disallowed flags for clone3: %#" PRIx64 "\n", 198 __func__, cl_args.flags & ~LINUX_CLONE_ALLOWED_FLAGS); 199 return EINVAL; 200 } 201 202 #if 0 203 // XXX: this is wrong, exit_signal is the signal to deliver to the 204 // process upon exit. 205 if ((cl_args.exit_signal & ~(uint64_t)LINUX_CLONE_CSIGNAL) != 0){ 206 DPRINTF("%s: Disallowed flags for clone3: %#x\n", __func__, 207 cl_args.exit_signal & ~(uint64_t)LINUX_CLONE_CSIGNAL); 208 return EINVAL; 209 } 210 #endif 211 212 if (cl_args.stack == 0 && cl_args.stack_size != 0) { 213 DPRINTF("%s: Stack is NULL but stack size is not 0\n", 214 __func__); 215 return EINVAL; 216 } 217 if (cl_args.stack != 0 && cl_args.stack_size == 0) { 218 DPRINTF("%s: Stack is not NULL but stack size is 0\n", 219 __func__); 220 return EINVAL; 221 } 222 223 int flags = cl_args.flags & LINUX_CLONE_ALLOWED_FLAGS; 224 #if 0 225 int sig = cl_args.exit_signal & LINUX_CLONE_CSIGNAL; 226 #endif 227 // XXX: Pidfd member handling 228 // XXX: we don't have cgroups 229 // XXX: what to do with tid_set and tid_set_size 230 // XXX: clone3 has stacksize, instead implement clone as a clone3 231 // wrapper. 232 SCARG(&clone_args, flags) = flags; 233 SCARG(&clone_args, stack) = (void *)(uintptr_t)cl_args.stack; 234 SCARG(&clone_args, parent_tidptr) = 235 (void *)(intptr_t)cl_args.parent_tid; 236 SCARG(&clone_args, tls) = 237 (void *)(intptr_t)cl_args.tls; 238 SCARG(&clone_args, child_tidptr) = 239 (void *)(intptr_t)cl_args.child_tid; 240 241 return linux_sys_clone(l, &clone_args, retval); 242 } 243 244 static int 245 linux_clone_nptl(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval) 246 { 247 /* { 248 syscallarg(int) flags; 249 syscallarg(void *) stack; 250 syscallarg(void *) parent_tidptr; 251 syscallarg(void *) tls; 252 syscallarg(void *) child_tidptr; 253 } */ 254 struct proc *p; 255 struct lwp *l2; 256 struct linux_emuldata *led; 257 void *parent_tidptr, *tls, *child_tidptr; 258 vaddr_t uaddr; 259 lwpid_t lid; 260 int flags, error; 261 262 p = l->l_proc; 263 flags = SCARG(uap, flags); 264 parent_tidptr = SCARG(uap, parent_tidptr); 265 tls = SCARG(uap, tls); 266 child_tidptr = SCARG(uap, child_tidptr); 267 268 uaddr = uvm_uarea_alloc(); 269 if (__predict_false(uaddr == 0)) { 270 return ENOMEM; 271 } 272 273 error = lwp_create(l, p, uaddr, LWP_DETACHED, 274 SCARG(uap, stack), 0, child_return, NULL, &l2, l->l_class, 275 &l->l_sigmask, &l->l_sigstk); 276 if (__predict_false(error)) { 277 DPRINTF("%s: lwp_create error=%d\n", __func__, error); 278 uvm_uarea_free(uaddr); 279 return error; 280 } 281 lid = l2->l_lid; 282 283 /* LINUX_CLONE_CHILD_CLEARTID: clear TID in child's memory on exit() */ 284 if (flags & LINUX_CLONE_CHILD_CLEARTID) { 285 led = l2->l_emuldata; 286 led->led_clear_tid = child_tidptr; 287 } 288 289 /* LINUX_CLONE_PARENT_SETTID: store child's TID in parent's memory */ 290 if (flags & LINUX_CLONE_PARENT_SETTID) { 291 if ((error = copyout(&lid, parent_tidptr, sizeof(lid))) != 0) 292 printf("%s: LINUX_CLONE_PARENT_SETTID " 293 "failed (parent_tidptr = %p tid = %d error=%d)\n", 294 __func__, parent_tidptr, lid, error); 295 } 296 297 /* LINUX_CLONE_CHILD_SETTID: store child's TID in child's memory */ 298 if (flags & LINUX_CLONE_CHILD_SETTID) { 299 if ((error = copyout(&lid, child_tidptr, sizeof(lid))) != 0) 300 printf("%s: LINUX_CLONE_CHILD_SETTID " 301 "failed (child_tidptr = %p, tid = %d error=%d)\n", 302 __func__, child_tidptr, lid, error); 303 } 304 305 if (flags & LINUX_CLONE_SETTLS) { 306 error = LINUX_LWP_SETPRIVATE(l2, tls); 307 if (error) { 308 DPRINTF("%s: LINUX_LWP_SETPRIVATE %d\n", __func__, 309 error); 310 lwp_exit(l2); 311 return error; 312 } 313 } 314 315 /* Set the new LWP running. */ 316 lwp_start(l2, 0); 317 318 retval[0] = lid; 319 retval[1] = 0; 320 return 0; 321 } 322 323 /* 324 * linux realtime priority 325 * 326 * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99]. 327 * 328 * - SCHED_OTHER tasks don't have realtime priorities. 329 * in particular, sched_param::sched_priority is always 0. 330 */ 331 332 #define LINUX_SCHED_RTPRIO_MIN 1 333 #define LINUX_SCHED_RTPRIO_MAX 99 334 335 static int 336 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params, 337 int *native_policy, struct sched_param *native_params) 338 { 339 340 switch (linux_policy) { 341 case LINUX_SCHED_OTHER: 342 if (native_policy != NULL) { 343 *native_policy = SCHED_OTHER; 344 } 345 break; 346 347 case LINUX_SCHED_FIFO: 348 if (native_policy != NULL) { 349 *native_policy = SCHED_FIFO; 350 } 351 break; 352 353 case LINUX_SCHED_RR: 354 if (native_policy != NULL) { 355 *native_policy = SCHED_RR; 356 } 357 break; 358 359 default: 360 return EINVAL; 361 } 362 363 if (linux_params != NULL) { 364 int prio = linux_params->sched_priority; 365 366 KASSERT(native_params != NULL); 367 368 if (linux_policy == LINUX_SCHED_OTHER) { 369 if (prio != 0) { 370 return EINVAL; 371 } 372 native_params->sched_priority = PRI_NONE; /* XXX */ 373 } else { 374 if (prio < LINUX_SCHED_RTPRIO_MIN || 375 prio > LINUX_SCHED_RTPRIO_MAX) { 376 return EINVAL; 377 } 378 native_params->sched_priority = 379 (prio - LINUX_SCHED_RTPRIO_MIN) 380 * (SCHED_PRI_MAX - SCHED_PRI_MIN) 381 / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN) 382 + SCHED_PRI_MIN; 383 } 384 } 385 386 return 0; 387 } 388 389 static int 390 sched_native2linux(int native_policy, struct sched_param *native_params, 391 int *linux_policy, struct linux_sched_param *linux_params) 392 { 393 394 switch (native_policy) { 395 case SCHED_OTHER: 396 if (linux_policy != NULL) { 397 *linux_policy = LINUX_SCHED_OTHER; 398 } 399 break; 400 401 case SCHED_FIFO: 402 if (linux_policy != NULL) { 403 *linux_policy = LINUX_SCHED_FIFO; 404 } 405 break; 406 407 case SCHED_RR: 408 if (linux_policy != NULL) { 409 *linux_policy = LINUX_SCHED_RR; 410 } 411 break; 412 413 default: 414 panic("%s: unknown policy %d\n", __func__, native_policy); 415 } 416 417 if (native_params != NULL) { 418 int prio = native_params->sched_priority; 419 420 KASSERT(prio >= SCHED_PRI_MIN); 421 KASSERT(prio <= SCHED_PRI_MAX); 422 KASSERT(linux_params != NULL); 423 424 memset(linux_params, 0, sizeof(*linux_params)); 425 426 DPRINTF("%s: native: policy %d, priority %d\n", 427 __func__, native_policy, prio); 428 429 if (native_policy == SCHED_OTHER) { 430 linux_params->sched_priority = 0; 431 } else { 432 linux_params->sched_priority = 433 (prio - SCHED_PRI_MIN) 434 * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN) 435 / (SCHED_PRI_MAX - SCHED_PRI_MIN) 436 + LINUX_SCHED_RTPRIO_MIN; 437 } 438 DPRINTF("%s: linux: policy %d, priority %d\n", 439 __func__, -1, linux_params->sched_priority); 440 } 441 442 return 0; 443 } 444 445 int 446 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval) 447 { 448 /* { 449 syscallarg(linux_pid_t) pid; 450 syscallarg(const struct linux_sched_param *) sp; 451 } */ 452 int error, policy; 453 struct linux_sched_param lp; 454 struct sched_param sp; 455 456 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 457 error = EINVAL; 458 goto out; 459 } 460 461 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 462 if (error) 463 goto out; 464 465 /* We need the current policy in Linux terms. */ 466 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL); 467 if (error) 468 goto out; 469 error = sched_native2linux(policy, NULL, &policy, NULL); 470 if (error) 471 goto out; 472 473 error = sched_linux2native(policy, &lp, &policy, &sp); 474 if (error) 475 goto out; 476 477 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp); 478 if (error) 479 goto out; 480 481 out: 482 return error; 483 } 484 485 int 486 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval) 487 { 488 /* { 489 syscallarg(linux_pid_t) pid; 490 syscallarg(struct linux_sched_param *) sp; 491 } */ 492 struct linux_sched_param lp; 493 struct sched_param sp; 494 int error, policy; 495 496 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 497 error = EINVAL; 498 goto out; 499 } 500 501 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp); 502 if (error) 503 goto out; 504 DPRINTF("%s: native: policy %d, priority %d\n", 505 __func__, policy, sp.sched_priority); 506 507 error = sched_native2linux(policy, &sp, NULL, &lp); 508 if (error) 509 goto out; 510 DPRINTF("%s: linux: policy %d, priority %d\n", 511 __func__, policy, lp.sched_priority); 512 513 error = copyout(&lp, SCARG(uap, sp), sizeof(lp)); 514 if (error) 515 goto out; 516 517 out: 518 return error; 519 } 520 521 int 522 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval) 523 { 524 /* { 525 syscallarg(linux_pid_t) pid; 526 syscallarg(int) policy; 527 syscallarg(cont struct linux_sched_param *) sp; 528 } */ 529 int error, policy; 530 struct linux_sched_param lp; 531 struct sched_param sp; 532 533 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 534 error = EINVAL; 535 goto out; 536 } 537 538 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 539 if (error) 540 goto out; 541 DPRINTF("%s: linux: policy %d, priority %d\n", 542 __func__, SCARG(uap, policy), lp.sched_priority); 543 544 error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp); 545 if (error) 546 goto out; 547 DPRINTF("%s: native: policy %d, priority %d\n", 548 __func__, policy, sp.sched_priority); 549 550 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp); 551 if (error) 552 goto out; 553 554 out: 555 return error; 556 } 557 558 int 559 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval) 560 { 561 /* { 562 syscallarg(linux_pid_t) pid; 563 } */ 564 int error, policy; 565 566 *retval = -1; 567 568 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL); 569 if (error) 570 goto out; 571 572 error = sched_native2linux(policy, NULL, &policy, NULL); 573 if (error) 574 goto out; 575 576 *retval = policy; 577 578 out: 579 return error; 580 } 581 582 int 583 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval) 584 { 585 586 yield(); 587 return 0; 588 } 589 590 int 591 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval) 592 { 593 /* { 594 syscallarg(int) policy; 595 } */ 596 597 switch (SCARG(uap, policy)) { 598 case LINUX_SCHED_OTHER: 599 *retval = 0; 600 break; 601 case LINUX_SCHED_FIFO: 602 case LINUX_SCHED_RR: 603 *retval = LINUX_SCHED_RTPRIO_MAX; 604 break; 605 default: 606 return EINVAL; 607 } 608 609 return 0; 610 } 611 612 int 613 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval) 614 { 615 /* { 616 syscallarg(int) policy; 617 } */ 618 619 switch (SCARG(uap, policy)) { 620 case LINUX_SCHED_OTHER: 621 *retval = 0; 622 break; 623 case LINUX_SCHED_FIFO: 624 case LINUX_SCHED_RR: 625 *retval = LINUX_SCHED_RTPRIO_MIN; 626 break; 627 default: 628 return EINVAL; 629 } 630 631 return 0; 632 } 633 634 int 635 linux_sys_exit(struct lwp *l, const struct linux_sys_exit_args *uap, register_t *retval) 636 { 637 638 lwp_exit(l); 639 return 0; 640 } 641 642 #ifndef __m68k__ 643 /* Present on everything but m68k */ 644 int 645 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval) 646 { 647 648 return sys_exit(l, (const void *)uap, retval); 649 } 650 #endif /* !__m68k__ */ 651 652 int 653 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval) 654 { 655 /* { 656 syscallarg(int *) tidptr; 657 } */ 658 struct linux_emuldata *led; 659 660 led = (struct linux_emuldata *)l->l_emuldata; 661 led->led_clear_tid = SCARG(uap, tid); 662 *retval = l->l_lid; 663 664 return 0; 665 } 666 667 /* ARGUSED1 */ 668 int 669 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval) 670 { 671 672 *retval = l->l_lid; 673 return 0; 674 } 675 676 /* 677 * The affinity syscalls assume that the layout of our cpu kcpuset is 678 * the same as linux's: a linear bitmask. 679 */ 680 int 681 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval) 682 { 683 /* { 684 syscallarg(linux_pid_t) pid; 685 syscallarg(unsigned int) len; 686 syscallarg(unsigned long *) mask; 687 } */ 688 struct proc *p; 689 struct lwp *t; 690 kcpuset_t *kcset; 691 size_t size; 692 cpuid_t i; 693 int error; 694 695 size = LINUX_CPU_MASK_SIZE; 696 if (SCARG(uap, len) < size) 697 return EINVAL; 698 699 if (SCARG(uap, pid) == 0) { 700 p = curproc; 701 mutex_enter(p->p_lock); 702 t = curlwp; 703 } else { 704 t = lwp_find2(-1, SCARG(uap, pid)); 705 if (__predict_false(t == NULL)) { 706 return ESRCH; 707 } 708 p = t->l_proc; 709 KASSERT(mutex_owned(p->p_lock)); 710 } 711 712 /* Check the permission */ 713 if (kauth_authorize_process(l->l_cred, 714 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, p, NULL, NULL, NULL)) { 715 mutex_exit(p->p_lock); 716 return EPERM; 717 } 718 719 kcpuset_create(&kcset, true); 720 lwp_lock(t); 721 if (t->l_affinity != NULL) 722 kcpuset_copy(kcset, t->l_affinity); 723 else { 724 /* 725 * All available CPUs should be masked when affinity has not 726 * been set. 727 */ 728 kcpuset_zero(kcset); 729 for (i = 0; i < ncpu; i++) 730 kcpuset_set(kcset, i); 731 } 732 lwp_unlock(t); 733 mutex_exit(p->p_lock); 734 error = kcpuset_copyout(kcset, (cpuset_t *)SCARG(uap, mask), size); 735 kcpuset_unuse(kcset, NULL); 736 *retval = size; 737 return error; 738 } 739 740 int 741 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval) 742 { 743 /* { 744 syscallarg(linux_pid_t) pid; 745 syscallarg(unsigned int) len; 746 syscallarg(unsigned long *) mask; 747 } */ 748 struct sys__sched_setaffinity_args ssa; 749 size_t size; 750 pid_t pid; 751 lwpid_t lid; 752 753 size = LINUX_CPU_MASK_SIZE; 754 if (SCARG(uap, len) < size) 755 return EINVAL; 756 757 lid = SCARG(uap, pid); 758 if (lid != 0) { 759 /* Get the canonical PID for the process. */ 760 mutex_enter(&proc_lock); 761 struct proc *p = proc_find_lwpid(SCARG(uap, pid)); 762 if (p == NULL) { 763 mutex_exit(&proc_lock); 764 return ESRCH; 765 } 766 pid = p->p_pid; 767 mutex_exit(&proc_lock); 768 } else { 769 pid = curproc->p_pid; 770 lid = curlwp->l_lid; 771 } 772 773 SCARG(&ssa, pid) = pid; 774 SCARG(&ssa, lid) = lid; 775 SCARG(&ssa, size) = size; 776 SCARG(&ssa, cpuset) = (cpuset_t *)SCARG(uap, mask); 777 778 return sys__sched_setaffinity(l, &ssa, retval); 779 } 780