1 /* $NetBSD: linux_sched.c,v 1.58 2008/10/25 23:38:28 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center; by Matthias Scheler. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Linux compatibility module. Try to deal with scheduler related syscalls. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.58 2008/10/25 23:38:28 christos Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/mount.h> 42 #include <sys/proc.h> 43 #include <sys/systm.h> 44 #include <sys/sysctl.h> 45 #include <sys/malloc.h> 46 #include <sys/syscallargs.h> 47 #include <sys/wait.h> 48 #include <sys/kauth.h> 49 #include <sys/ptrace.h> 50 51 #include <sys/cpu.h> 52 53 #include <compat/linux/common/linux_types.h> 54 #include <compat/linux/common/linux_signal.h> 55 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */ 56 #include <compat/linux/common/linux_emuldata.h> 57 #include <compat/linux/common/linux_ipc.h> 58 #include <compat/linux/common/linux_sem.h> 59 #include <compat/linux/common/linux_exec.h> 60 61 #include <compat/linux/linux_syscallargs.h> 62 63 #include <compat/linux/common/linux_sched.h> 64 65 int 66 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval) 67 { 68 /* { 69 syscallarg(int) flags; 70 syscallarg(void *) stack; 71 #ifdef LINUX_NPTL 72 syscallarg(void *) parent_tidptr; 73 syscallarg(void *) child_tidptr; 74 #endif 75 } */ 76 int flags, sig; 77 int error; 78 struct proc *p; 79 #ifdef LINUX_NPTL 80 struct linux_emuldata *led; 81 #endif 82 83 /* 84 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags. 85 */ 86 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE)) 87 return (EINVAL); 88 89 /* 90 * Thread group implies shared signals. Shared signals 91 * imply shared VM. This matches what Linux kernel does. 92 */ 93 if (SCARG(uap, flags) & LINUX_CLONE_THREAD 94 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0) 95 return (EINVAL); 96 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND 97 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0) 98 return (EINVAL); 99 100 flags = 0; 101 102 if (SCARG(uap, flags) & LINUX_CLONE_VM) 103 flags |= FORK_SHAREVM; 104 if (SCARG(uap, flags) & LINUX_CLONE_FS) 105 flags |= FORK_SHARECWD; 106 if (SCARG(uap, flags) & LINUX_CLONE_FILES) 107 flags |= FORK_SHAREFILES; 108 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) 109 flags |= FORK_SHARESIGS; 110 if (SCARG(uap, flags) & LINUX_CLONE_VFORK) 111 flags |= FORK_PPWAIT; 112 113 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL; 114 if (sig < 0 || sig >= LINUX__NSIG) 115 return (EINVAL); 116 sig = linux_to_native_signo[sig]; 117 118 #ifdef LINUX_NPTL 119 led = (struct linux_emuldata *)l->l_proc->p_emuldata; 120 121 led->parent_tidptr = SCARG(uap, parent_tidptr); 122 led->child_tidptr = SCARG(uap, child_tidptr); 123 led->clone_flags = SCARG(uap, flags); 124 #endif /* LINUX_NPTL */ 125 126 /* 127 * Note that Linux does not provide a portable way of specifying 128 * the stack area; the caller must know if the stack grows up 129 * or down. So, we pass a stack size of 0, so that the code 130 * that makes this adjustment is a noop. 131 */ 132 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0, 133 NULL, NULL, retval, &p)) != 0) 134 return error; 135 136 #ifdef LINUX_NPTL 137 if ((SCARG(uap, flags) & LINUX_CLONE_SETTLS) != 0) 138 return linux_init_thread_area(l, LIST_FIRST(&p->p_lwps)); 139 #endif /* LINUX_NPTL */ 140 141 return 0; 142 } 143 144 /* 145 * linux realtime priority 146 * 147 * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99]. 148 * 149 * - SCHED_OTHER tasks don't have realtime priorities. 150 * in particular, sched_param::sched_priority is always 0. 151 */ 152 153 #define LINUX_SCHED_RTPRIO_MIN 1 154 #define LINUX_SCHED_RTPRIO_MAX 99 155 156 static int 157 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params, 158 int *native_policy, struct sched_param *native_params) 159 { 160 161 switch (linux_policy) { 162 case LINUX_SCHED_OTHER: 163 if (native_policy != NULL) { 164 *native_policy = SCHED_OTHER; 165 } 166 break; 167 168 case LINUX_SCHED_FIFO: 169 if (native_policy != NULL) { 170 *native_policy = SCHED_FIFO; 171 } 172 break; 173 174 case LINUX_SCHED_RR: 175 if (native_policy != NULL) { 176 *native_policy = SCHED_RR; 177 } 178 break; 179 180 default: 181 return EINVAL; 182 } 183 184 if (linux_params != NULL) { 185 int prio = linux_params->sched_priority; 186 187 KASSERT(native_params != NULL); 188 189 if (linux_policy == LINUX_SCHED_OTHER) { 190 if (prio != 0) { 191 return EINVAL; 192 } 193 native_params->sched_priority = PRI_NONE; /* XXX */ 194 } else { 195 if (prio < LINUX_SCHED_RTPRIO_MIN || 196 prio > LINUX_SCHED_RTPRIO_MAX) { 197 return EINVAL; 198 } 199 native_params->sched_priority = 200 (prio - LINUX_SCHED_RTPRIO_MIN) 201 * (SCHED_PRI_MAX - SCHED_PRI_MIN) 202 / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN) 203 + SCHED_PRI_MIN; 204 } 205 } 206 207 return 0; 208 } 209 210 static int 211 sched_native2linux(int native_policy, struct sched_param *native_params, 212 int *linux_policy, struct linux_sched_param *linux_params) 213 { 214 215 switch (native_policy) { 216 case SCHED_OTHER: 217 if (linux_policy != NULL) { 218 *linux_policy = LINUX_SCHED_OTHER; 219 } 220 break; 221 222 case SCHED_FIFO: 223 if (linux_policy != NULL) { 224 *linux_policy = LINUX_SCHED_FIFO; 225 } 226 break; 227 228 case SCHED_RR: 229 if (linux_policy != NULL) { 230 *linux_policy = LINUX_SCHED_RR; 231 } 232 break; 233 234 default: 235 panic("%s: unknown policy %d\n", __func__, native_policy); 236 } 237 238 if (native_params != NULL) { 239 int prio = native_params->sched_priority; 240 241 KASSERT(prio >= SCHED_PRI_MIN); 242 KASSERT(prio <= SCHED_PRI_MAX); 243 KASSERT(linux_params != NULL); 244 245 #ifdef DEBUG_LINUX 246 printf("native2linux: native: policy %d, priority %d\n", 247 native_policy, prio); 248 #endif 249 250 if (native_policy == SCHED_OTHER) { 251 linux_params->sched_priority = 0; 252 } else { 253 linux_params->sched_priority = 254 (prio - SCHED_PRI_MIN) 255 * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN) 256 / (SCHED_PRI_MAX - SCHED_PRI_MIN) 257 + LINUX_SCHED_RTPRIO_MIN; 258 } 259 #ifdef DEBUG_LINUX 260 printf("native2linux: linux: policy %d, priority %d\n", 261 -1, linux_params->sched_priority); 262 #endif 263 } 264 265 return 0; 266 } 267 268 int 269 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval) 270 { 271 /* { 272 syscallarg(linux_pid_t) pid; 273 syscallarg(const struct linux_sched_param *) sp; 274 } */ 275 int error, policy; 276 struct linux_sched_param lp; 277 struct sched_param sp; 278 279 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 280 error = EINVAL; 281 goto out; 282 } 283 284 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 285 if (error) 286 goto out; 287 288 /* We need the current policy in Linux terms. */ 289 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL); 290 if (error) 291 goto out; 292 error = sched_native2linux(policy, NULL, &policy, NULL); 293 if (error) 294 goto out; 295 296 error = sched_linux2native(policy, &lp, &policy, &sp); 297 if (error) 298 goto out; 299 300 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp); 301 if (error) 302 goto out; 303 304 out: 305 return error; 306 } 307 308 int 309 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval) 310 { 311 /* { 312 syscallarg(linux_pid_t) pid; 313 syscallarg(struct linux_sched_param *) sp; 314 } */ 315 struct linux_sched_param lp; 316 struct sched_param sp; 317 int error, policy; 318 319 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 320 error = EINVAL; 321 goto out; 322 } 323 324 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp); 325 if (error) 326 goto out; 327 #ifdef DEBUG_LINUX 328 printf("getparam: native: policy %d, priority %d\n", 329 policy, sp.sched_priority); 330 #endif 331 332 error = sched_native2linux(policy, &sp, NULL, &lp); 333 if (error) 334 goto out; 335 #ifdef DEBUG_LINUX 336 printf("getparam: linux: policy %d, priority %d\n", 337 policy, lp.sched_priority); 338 #endif 339 340 error = copyout(&lp, SCARG(uap, sp), sizeof(lp)); 341 if (error) 342 goto out; 343 344 out: 345 return error; 346 } 347 348 int 349 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval) 350 { 351 /* { 352 syscallarg(linux_pid_t) pid; 353 syscallarg(int) policy; 354 syscallarg(cont struct linux_sched_scheduler *) sp; 355 } */ 356 int error, policy; 357 struct linux_sched_param lp; 358 struct sched_param sp; 359 360 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 361 error = EINVAL; 362 goto out; 363 } 364 365 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 366 if (error) 367 goto out; 368 #ifdef DEBUG_LINUX 369 printf("setscheduler: linux: policy %d, priority %d\n", 370 SCARG(uap, policy), lp.sched_priority); 371 #endif 372 373 error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp); 374 if (error) 375 goto out; 376 #ifdef DEBUG_LINUX 377 printf("setscheduler: native: policy %d, priority %d\n", 378 policy, sp.sched_priority); 379 #endif 380 381 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp); 382 if (error) 383 goto out; 384 385 out: 386 return error; 387 } 388 389 int 390 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval) 391 { 392 /* { 393 syscallarg(linux_pid_t) pid; 394 } */ 395 int error, policy; 396 397 *retval = -1; 398 399 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL); 400 if (error) 401 goto out; 402 403 error = sched_native2linux(policy, NULL, &policy, NULL); 404 if (error) 405 goto out; 406 407 *retval = policy; 408 409 out: 410 return error; 411 } 412 413 int 414 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval) 415 { 416 417 yield(); 418 return 0; 419 } 420 421 int 422 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval) 423 { 424 /* { 425 syscallarg(int) policy; 426 } */ 427 428 switch (SCARG(uap, policy)) { 429 case LINUX_SCHED_OTHER: 430 *retval = 0; 431 break; 432 case LINUX_SCHED_FIFO: 433 case LINUX_SCHED_RR: 434 *retval = LINUX_SCHED_RTPRIO_MAX; 435 break; 436 default: 437 return EINVAL; 438 } 439 440 return 0; 441 } 442 443 int 444 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval) 445 { 446 /* { 447 syscallarg(int) policy; 448 } */ 449 450 switch (SCARG(uap, policy)) { 451 case LINUX_SCHED_OTHER: 452 *retval = 0; 453 break; 454 case LINUX_SCHED_FIFO: 455 case LINUX_SCHED_RR: 456 *retval = LINUX_SCHED_RTPRIO_MIN; 457 break; 458 default: 459 return EINVAL; 460 } 461 462 return 0; 463 } 464 465 #ifndef __m68k__ 466 /* Present on everything but m68k */ 467 int 468 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval) 469 { 470 #ifdef LINUX_NPTL 471 /* { 472 syscallarg(int) error_code; 473 } */ 474 struct proc *p = l->l_proc; 475 struct linux_emuldata *led = p->p_emuldata; 476 struct linux_emuldata *e; 477 478 if (led->s->flags & LINUX_LES_USE_NPTL) { 479 480 #ifdef DEBUG_LINUX 481 printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__, 482 led->s->refs); 483 #endif 484 485 /* 486 * The calling thread is supposed to kill all threads 487 * in the same thread group (i.e. all threads created 488 * via clone(2) with CLONE_THREAD flag set). 489 * 490 * If there is only one thread, things are quite simple 491 */ 492 if (led->s->refs == 1) 493 return sys_exit(l, (const void *)uap, retval); 494 495 #ifdef DEBUG_LINUX 496 printf("%s:%d\n", __func__, __LINE__); 497 #endif 498 499 mutex_enter(proc_lock); 500 led->s->flags |= LINUX_LES_INEXITGROUP; 501 led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0); 502 503 /* 504 * Kill all threads in the group. The emulation exit hook takes 505 * care of hiding the zombies and reporting the exit code 506 * properly. 507 */ 508 LIST_FOREACH(e, &led->s->threads, threads) { 509 if (e->proc == p) 510 continue; 511 512 #ifdef DEBUG_LINUX 513 printf("%s: kill PID %d\n", __func__, e->proc->p_pid); 514 #endif 515 psignal(e->proc, SIGKILL); 516 } 517 518 /* Now, kill ourselves */ 519 psignal(p, SIGKILL); 520 mutex_exit(proc_lock); 521 522 return 0; 523 524 } 525 #endif /* LINUX_NPTL */ 526 527 return sys_exit(l, (const void *)uap, retval); 528 } 529 #endif /* !__m68k__ */ 530 531 #ifdef LINUX_NPTL 532 int 533 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval) 534 { 535 /* { 536 syscallarg(int *) tidptr; 537 } */ 538 struct linux_emuldata *led; 539 540 led = (struct linux_emuldata *)l->l_proc->p_emuldata; 541 led->clear_tid = SCARG(uap, tid); 542 543 led->s->flags |= LINUX_LES_USE_NPTL; 544 545 *retval = l->l_proc->p_pid; 546 547 return 0; 548 } 549 550 /* ARGUSED1 */ 551 int 552 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval) 553 { 554 /* The Linux kernel does it exactly that way */ 555 *retval = l->l_proc->p_pid; 556 return 0; 557 } 558 559 #ifdef LINUX_NPTL 560 /* ARGUSED1 */ 561 int 562 linux_sys_getpid(struct lwp *l, const void *v, register_t *retval) 563 { 564 struct linux_emuldata *led = l->l_proc->p_emuldata; 565 566 if (led->s->flags & LINUX_LES_USE_NPTL) { 567 /* The Linux kernel does it exactly that way */ 568 *retval = led->s->group_pid; 569 } else { 570 *retval = l->l_proc->p_pid; 571 } 572 573 return 0; 574 } 575 576 /* ARGUSED1 */ 577 int 578 linux_sys_getppid(struct lwp *l, const void *v, register_t *retval) 579 { 580 struct proc *p = l->l_proc; 581 struct linux_emuldata *led = p->p_emuldata; 582 struct proc *glp; 583 struct proc *pp; 584 585 mutex_enter(proc_lock); 586 if (led->s->flags & LINUX_LES_USE_NPTL) { 587 588 /* Find the thread group leader's parent */ 589 if ((glp = p_find(led->s->group_pid, PFIND_LOCKED)) == NULL) { 590 /* Maybe panic... */ 591 printf("linux_sys_getppid: missing group leader PID" 592 " %d\n", led->s->group_pid); 593 mutex_exit(proc_lock); 594 return -1; 595 } 596 pp = glp->p_pptr; 597 598 /* If this is a Linux process too, return thread group PID */ 599 if (pp->p_emul == p->p_emul) { 600 struct linux_emuldata *pled; 601 602 pled = pp->p_emuldata; 603 *retval = pled->s->group_pid; 604 } else { 605 *retval = pp->p_pid; 606 } 607 608 } else { 609 *retval = p->p_pptr->p_pid; 610 } 611 mutex_exit(proc_lock); 612 613 return 0; 614 } 615 #endif /* LINUX_NPTL */ 616 617 int 618 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval) 619 { 620 /* { 621 syscallarg(pid_t) pid; 622 syscallarg(unsigned int) len; 623 syscallarg(unsigned long *) mask; 624 } */ 625 int error; 626 int ret; 627 char *data; 628 int *retp; 629 630 if (SCARG(uap, mask) == NULL) 631 return EINVAL; 632 633 if (SCARG(uap, len) < sizeof(int)) 634 return EINVAL; 635 636 if (pfind(SCARG(uap, pid)) == NULL) 637 return ESRCH; 638 639 /* 640 * return the actual number of CPU, tag all of them as available 641 * The result is a mask, the first CPU being in the least significant 642 * bit. 643 */ 644 ret = (1 << ncpu) - 1; 645 data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO); 646 retp = (int *)&data[SCARG(uap, len) - sizeof(ret)]; 647 *retp = ret; 648 649 if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0) 650 return error; 651 652 free(data, M_TEMP); 653 654 return 0; 655 656 } 657 658 int 659 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval) 660 { 661 /* { 662 syscallarg(pid_t) pid; 663 syscallarg(unsigned int) len; 664 syscallarg(unsigned long *) mask; 665 } */ 666 667 if (pfind(SCARG(uap, pid)) == NULL) 668 return ESRCH; 669 670 /* Let's ignore it */ 671 #ifdef DEBUG_LINUX 672 printf("linux_sys_sched_setaffinity\n"); 673 #endif 674 return 0; 675 }; 676 #endif /* LINUX_NPTL */ 677