1 /* $NetBSD: linux_sched.c,v 1.57 2008/05/07 15:18:35 njoly Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center; by Matthias Scheler. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Linux compatibility module. Try to deal with scheduler related syscalls. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.57 2008/05/07 15:18:35 njoly Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/mount.h> 42 #include <sys/proc.h> 43 #include <sys/systm.h> 44 #include <sys/sysctl.h> 45 #include <sys/malloc.h> 46 #include <sys/syscallargs.h> 47 #include <sys/wait.h> 48 #include <sys/kauth.h> 49 #include <sys/ptrace.h> 50 51 #include <sys/cpu.h> 52 53 #include <compat/linux/common/linux_types.h> 54 #include <compat/linux/common/linux_signal.h> 55 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */ 56 #include <compat/linux/common/linux_emuldata.h> 57 #include <compat/linux/common/linux_ipc.h> 58 #include <compat/linux/common/linux_sem.h> 59 60 #include <compat/linux/linux_syscallargs.h> 61 62 #include <compat/linux/common/linux_sched.h> 63 64 int 65 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval) 66 { 67 /* { 68 syscallarg(int) flags; 69 syscallarg(void *) stack; 70 #ifdef LINUX_NPTL 71 syscallarg(void *) parent_tidptr; 72 syscallarg(void *) child_tidptr; 73 #endif 74 } */ 75 int flags, sig; 76 int error; 77 #ifdef LINUX_NPTL 78 struct linux_emuldata *led; 79 #endif 80 81 /* 82 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags. 83 */ 84 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE)) 85 return (EINVAL); 86 87 /* 88 * Thread group implies shared signals. Shared signals 89 * imply shared VM. This matches what Linux kernel does. 90 */ 91 if (SCARG(uap, flags) & LINUX_CLONE_THREAD 92 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0) 93 return (EINVAL); 94 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND 95 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0) 96 return (EINVAL); 97 98 flags = 0; 99 100 if (SCARG(uap, flags) & LINUX_CLONE_VM) 101 flags |= FORK_SHAREVM; 102 if (SCARG(uap, flags) & LINUX_CLONE_FS) 103 flags |= FORK_SHARECWD; 104 if (SCARG(uap, flags) & LINUX_CLONE_FILES) 105 flags |= FORK_SHAREFILES; 106 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) 107 flags |= FORK_SHARESIGS; 108 if (SCARG(uap, flags) & LINUX_CLONE_VFORK) 109 flags |= FORK_PPWAIT; 110 111 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL; 112 if (sig < 0 || sig >= LINUX__NSIG) 113 return (EINVAL); 114 sig = linux_to_native_signo[sig]; 115 116 #ifdef LINUX_NPTL 117 led = (struct linux_emuldata *)l->l_proc->p_emuldata; 118 119 led->parent_tidptr = SCARG(uap, parent_tidptr); 120 led->child_tidptr = SCARG(uap, child_tidptr); 121 led->clone_flags = SCARG(uap, flags); 122 #endif /* LINUX_NPTL */ 123 124 /* 125 * Note that Linux does not provide a portable way of specifying 126 * the stack area; the caller must know if the stack grows up 127 * or down. So, we pass a stack size of 0, so that the code 128 * that makes this adjustment is a noop. 129 */ 130 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0, 131 NULL, NULL, retval, NULL)) != 0) 132 return error; 133 134 return 0; 135 } 136 137 /* 138 * linux realtime priority 139 * 140 * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99]. 141 * 142 * - SCHED_OTHER tasks don't have realtime priorities. 143 * in particular, sched_param::sched_priority is always 0. 144 */ 145 146 #define LINUX_SCHED_RTPRIO_MIN 1 147 #define LINUX_SCHED_RTPRIO_MAX 99 148 149 static int 150 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params, 151 int *native_policy, struct sched_param *native_params) 152 { 153 154 switch (linux_policy) { 155 case LINUX_SCHED_OTHER: 156 if (native_policy != NULL) { 157 *native_policy = SCHED_OTHER; 158 } 159 break; 160 161 case LINUX_SCHED_FIFO: 162 if (native_policy != NULL) { 163 *native_policy = SCHED_FIFO; 164 } 165 break; 166 167 case LINUX_SCHED_RR: 168 if (native_policy != NULL) { 169 *native_policy = SCHED_RR; 170 } 171 break; 172 173 default: 174 return EINVAL; 175 } 176 177 if (linux_params != NULL) { 178 int prio = linux_params->sched_priority; 179 180 KASSERT(native_params != NULL); 181 182 if (linux_policy == LINUX_SCHED_OTHER) { 183 if (prio != 0) { 184 return EINVAL; 185 } 186 native_params->sched_priority = PRI_NONE; /* XXX */ 187 } else { 188 if (prio < LINUX_SCHED_RTPRIO_MIN || 189 prio > LINUX_SCHED_RTPRIO_MAX) { 190 return EINVAL; 191 } 192 native_params->sched_priority = 193 (prio - LINUX_SCHED_RTPRIO_MIN) 194 * (SCHED_PRI_MAX - SCHED_PRI_MIN) 195 / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN) 196 + SCHED_PRI_MIN; 197 } 198 } 199 200 return 0; 201 } 202 203 static int 204 sched_native2linux(int native_policy, struct sched_param *native_params, 205 int *linux_policy, struct linux_sched_param *linux_params) 206 { 207 208 switch (native_policy) { 209 case SCHED_OTHER: 210 if (linux_policy != NULL) { 211 *linux_policy = LINUX_SCHED_OTHER; 212 } 213 break; 214 215 case SCHED_FIFO: 216 if (linux_policy != NULL) { 217 *linux_policy = LINUX_SCHED_FIFO; 218 } 219 break; 220 221 case SCHED_RR: 222 if (linux_policy != NULL) { 223 *linux_policy = LINUX_SCHED_RR; 224 } 225 break; 226 227 default: 228 panic("%s: unknown policy %d\n", __func__, native_policy); 229 } 230 231 if (native_params != NULL) { 232 int prio = native_params->sched_priority; 233 234 KASSERT(prio >= SCHED_PRI_MIN); 235 KASSERT(prio <= SCHED_PRI_MAX); 236 KASSERT(linux_params != NULL); 237 238 #ifdef DEBUG_LINUX 239 printf("native2linux: native: policy %d, priority %d\n", 240 native_policy, prio); 241 #endif 242 243 if (native_policy == SCHED_OTHER) { 244 linux_params->sched_priority = 0; 245 } else { 246 linux_params->sched_priority = 247 (prio - SCHED_PRI_MIN) 248 * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN) 249 / (SCHED_PRI_MAX - SCHED_PRI_MIN) 250 + LINUX_SCHED_RTPRIO_MIN; 251 } 252 #ifdef DEBUG_LINUX 253 printf("native2linux: linux: policy %d, priority %d\n", 254 -1, linux_params->sched_priority); 255 #endif 256 } 257 258 return 0; 259 } 260 261 int 262 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval) 263 { 264 /* { 265 syscallarg(linux_pid_t) pid; 266 syscallarg(const struct linux_sched_param *) sp; 267 } */ 268 int error, policy; 269 struct linux_sched_param lp; 270 struct sched_param sp; 271 272 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 273 error = EINVAL; 274 goto out; 275 } 276 277 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 278 if (error) 279 goto out; 280 281 /* We need the current policy in Linux terms. */ 282 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL); 283 if (error) 284 goto out; 285 error = sched_native2linux(policy, NULL, &policy, NULL); 286 if (error) 287 goto out; 288 289 error = sched_linux2native(policy, &lp, &policy, &sp); 290 if (error) 291 goto out; 292 293 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp); 294 if (error) 295 goto out; 296 297 out: 298 return error; 299 } 300 301 int 302 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval) 303 { 304 /* { 305 syscallarg(linux_pid_t) pid; 306 syscallarg(struct linux_sched_param *) sp; 307 } */ 308 struct linux_sched_param lp; 309 struct sched_param sp; 310 int error, policy; 311 312 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 313 error = EINVAL; 314 goto out; 315 } 316 317 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp); 318 if (error) 319 goto out; 320 #ifdef DEBUG_LINUX 321 printf("getparam: native: policy %d, priority %d\n", 322 policy, sp.sched_priority); 323 #endif 324 325 error = sched_native2linux(policy, &sp, NULL, &lp); 326 if (error) 327 goto out; 328 #ifdef DEBUG_LINUX 329 printf("getparam: linux: policy %d, priority %d\n", 330 policy, lp.sched_priority); 331 #endif 332 333 error = copyout(&lp, SCARG(uap, sp), sizeof(lp)); 334 if (error) 335 goto out; 336 337 out: 338 return error; 339 } 340 341 int 342 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval) 343 { 344 /* { 345 syscallarg(linux_pid_t) pid; 346 syscallarg(int) policy; 347 syscallarg(cont struct linux_sched_scheduler *) sp; 348 } */ 349 int error, policy; 350 struct linux_sched_param lp; 351 struct sched_param sp; 352 353 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 354 error = EINVAL; 355 goto out; 356 } 357 358 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 359 if (error) 360 goto out; 361 #ifdef DEBUG_LINUX 362 printf("setscheduler: linux: policy %d, priority %d\n", 363 SCARG(uap, policy), lp.sched_priority); 364 #endif 365 366 error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp); 367 if (error) 368 goto out; 369 #ifdef DEBUG_LINUX 370 printf("setscheduler: native: policy %d, priority %d\n", 371 policy, sp.sched_priority); 372 #endif 373 374 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp); 375 if (error) 376 goto out; 377 378 out: 379 return error; 380 } 381 382 int 383 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval) 384 { 385 /* { 386 syscallarg(linux_pid_t) pid; 387 } */ 388 int error, policy; 389 390 *retval = -1; 391 392 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL); 393 if (error) 394 goto out; 395 396 error = sched_native2linux(policy, NULL, &policy, NULL); 397 if (error) 398 goto out; 399 400 *retval = policy; 401 402 out: 403 return error; 404 } 405 406 int 407 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval) 408 { 409 410 yield(); 411 return 0; 412 } 413 414 int 415 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval) 416 { 417 /* { 418 syscallarg(int) policy; 419 } */ 420 421 switch (SCARG(uap, policy)) { 422 case LINUX_SCHED_OTHER: 423 *retval = 0; 424 break; 425 case LINUX_SCHED_FIFO: 426 case LINUX_SCHED_RR: 427 *retval = LINUX_SCHED_RTPRIO_MAX; 428 break; 429 default: 430 return EINVAL; 431 } 432 433 return 0; 434 } 435 436 int 437 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval) 438 { 439 /* { 440 syscallarg(int) policy; 441 } */ 442 443 switch (SCARG(uap, policy)) { 444 case LINUX_SCHED_OTHER: 445 *retval = 0; 446 break; 447 case LINUX_SCHED_FIFO: 448 case LINUX_SCHED_RR: 449 *retval = LINUX_SCHED_RTPRIO_MIN; 450 break; 451 default: 452 return EINVAL; 453 } 454 455 return 0; 456 } 457 458 #ifndef __m68k__ 459 /* Present on everything but m68k */ 460 int 461 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval) 462 { 463 #ifdef LINUX_NPTL 464 /* { 465 syscallarg(int) error_code; 466 } */ 467 struct proc *p = l->l_proc; 468 struct linux_emuldata *led = p->p_emuldata; 469 struct linux_emuldata *e; 470 471 if (led->s->flags & LINUX_LES_USE_NPTL) { 472 473 #ifdef DEBUG_LINUX 474 printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__, 475 led->s->refs); 476 #endif 477 478 /* 479 * The calling thread is supposed to kill all threads 480 * in the same thread group (i.e. all threads created 481 * via clone(2) with CLONE_THREAD flag set). 482 * 483 * If there is only one thread, things are quite simple 484 */ 485 if (led->s->refs == 1) 486 return sys_exit(l, (const void *)uap, retval); 487 488 #ifdef DEBUG_LINUX 489 printf("%s:%d\n", __func__, __LINE__); 490 #endif 491 492 mutex_enter(proc_lock); 493 led->s->flags |= LINUX_LES_INEXITGROUP; 494 led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0); 495 496 /* 497 * Kill all threads in the group. The emulation exit hook takes 498 * care of hiding the zombies and reporting the exit code 499 * properly. 500 */ 501 LIST_FOREACH(e, &led->s->threads, threads) { 502 if (e->proc == p) 503 continue; 504 505 #ifdef DEBUG_LINUX 506 printf("%s: kill PID %d\n", __func__, e->proc->p_pid); 507 #endif 508 psignal(e->proc, SIGKILL); 509 } 510 511 /* Now, kill ourselves */ 512 psignal(p, SIGKILL); 513 mutex_exit(proc_lock); 514 515 return 0; 516 517 } 518 #endif /* LINUX_NPTL */ 519 520 return sys_exit(l, (const void *)uap, retval); 521 } 522 #endif /* !__m68k__ */ 523 524 #ifdef LINUX_NPTL 525 int 526 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval) 527 { 528 /* { 529 syscallarg(int *) tidptr; 530 } */ 531 struct linux_emuldata *led; 532 533 led = (struct linux_emuldata *)l->l_proc->p_emuldata; 534 led->clear_tid = SCARG(uap, tid); 535 536 led->s->flags |= LINUX_LES_USE_NPTL; 537 538 *retval = l->l_proc->p_pid; 539 540 return 0; 541 } 542 543 /* ARGUSED1 */ 544 int 545 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval) 546 { 547 /* The Linux kernel does it exactly that way */ 548 *retval = l->l_proc->p_pid; 549 return 0; 550 } 551 552 #ifdef LINUX_NPTL 553 /* ARGUSED1 */ 554 int 555 linux_sys_getpid(struct lwp *l, const void *v, register_t *retval) 556 { 557 struct linux_emuldata *led = l->l_proc->p_emuldata; 558 559 if (led->s->flags & LINUX_LES_USE_NPTL) { 560 /* The Linux kernel does it exactly that way */ 561 *retval = led->s->group_pid; 562 } else { 563 *retval = l->l_proc->p_pid; 564 } 565 566 return 0; 567 } 568 569 /* ARGUSED1 */ 570 int 571 linux_sys_getppid(struct lwp *l, const void *v, register_t *retval) 572 { 573 struct proc *p = l->l_proc; 574 struct linux_emuldata *led = p->p_emuldata; 575 struct proc *glp; 576 struct proc *pp; 577 578 mutex_enter(proc_lock); 579 if (led->s->flags & LINUX_LES_USE_NPTL) { 580 581 /* Find the thread group leader's parent */ 582 if ((glp = p_find(led->s->group_pid, PFIND_LOCKED)) == NULL) { 583 /* Maybe panic... */ 584 printf("linux_sys_getppid: missing group leader PID" 585 " %d\n", led->s->group_pid); 586 mutex_exit(proc_lock); 587 return -1; 588 } 589 pp = glp->p_pptr; 590 591 /* If this is a Linux process too, return thread group PID */ 592 if (pp->p_emul == p->p_emul) { 593 struct linux_emuldata *pled; 594 595 pled = pp->p_emuldata; 596 *retval = pled->s->group_pid; 597 } else { 598 *retval = pp->p_pid; 599 } 600 601 } else { 602 *retval = p->p_pptr->p_pid; 603 } 604 mutex_exit(proc_lock); 605 606 return 0; 607 } 608 #endif /* LINUX_NPTL */ 609 610 int 611 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval) 612 { 613 /* { 614 syscallarg(pid_t) pid; 615 syscallarg(unsigned int) len; 616 syscallarg(unsigned long *) mask; 617 } */ 618 int error; 619 int ret; 620 char *data; 621 int *retp; 622 623 if (SCARG(uap, mask) == NULL) 624 return EINVAL; 625 626 if (SCARG(uap, len) < sizeof(int)) 627 return EINVAL; 628 629 if (pfind(SCARG(uap, pid)) == NULL) 630 return ESRCH; 631 632 /* 633 * return the actual number of CPU, tag all of them as available 634 * The result is a mask, the first CPU being in the least significant 635 * bit. 636 */ 637 ret = (1 << ncpu) - 1; 638 data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO); 639 retp = (int *)&data[SCARG(uap, len) - sizeof(ret)]; 640 *retp = ret; 641 642 if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0) 643 return error; 644 645 free(data, M_TEMP); 646 647 return 0; 648 649 } 650 651 int 652 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval) 653 { 654 /* { 655 syscallarg(pid_t) pid; 656 syscallarg(unsigned int) len; 657 syscallarg(unsigned long *) mask; 658 } */ 659 660 if (pfind(SCARG(uap, pid)) == NULL) 661 return ESRCH; 662 663 /* Let's ignore it */ 664 #ifdef DEBUG_LINUX 665 printf("linux_sys_sched_setaffinity\n"); 666 #endif 667 return 0; 668 }; 669 #endif /* LINUX_NPTL */ 670