1 /* $NetBSD: linux_sched.c,v 1.55 2008/05/01 16:06:17 njoly Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center; by Matthias Scheler. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Linux compatibility module. Try to deal with scheduler related syscalls. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.55 2008/05/01 16:06:17 njoly Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/mount.h> 42 #include <sys/proc.h> 43 #include <sys/systm.h> 44 #include <sys/sysctl.h> 45 #include <sys/malloc.h> 46 #include <sys/syscallargs.h> 47 #include <sys/wait.h> 48 #include <sys/kauth.h> 49 #include <sys/ptrace.h> 50 51 #include <sys/cpu.h> 52 53 #include <compat/linux/common/linux_types.h> 54 #include <compat/linux/common/linux_signal.h> 55 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */ 56 #include <compat/linux/common/linux_emuldata.h> 57 #include <compat/linux/common/linux_ipc.h> 58 #include <compat/linux/common/linux_sem.h> 59 60 #include <compat/linux/linux_syscallargs.h> 61 62 #include <compat/linux/common/linux_sched.h> 63 64 int 65 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval) 66 { 67 /* { 68 syscallarg(int) flags; 69 syscallarg(void *) stack; 70 #ifdef LINUX_NPTL 71 syscallarg(void *) parent_tidptr; 72 syscallarg(void *) child_tidptr; 73 #endif 74 } */ 75 int flags, sig; 76 int error; 77 #ifdef LINUX_NPTL 78 struct linux_emuldata *led; 79 #endif 80 81 /* 82 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags. 83 */ 84 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE)) 85 return (EINVAL); 86 87 /* 88 * Thread group implies shared signals. Shared signals 89 * imply shared VM. This matches what Linux kernel does. 90 */ 91 if (SCARG(uap, flags) & LINUX_CLONE_THREAD 92 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0) 93 return (EINVAL); 94 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND 95 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0) 96 return (EINVAL); 97 98 flags = 0; 99 100 if (SCARG(uap, flags) & LINUX_CLONE_VM) 101 flags |= FORK_SHAREVM; 102 if (SCARG(uap, flags) & LINUX_CLONE_FS) 103 flags |= FORK_SHARECWD; 104 if (SCARG(uap, flags) & LINUX_CLONE_FILES) 105 flags |= FORK_SHAREFILES; 106 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) 107 flags |= FORK_SHARESIGS; 108 if (SCARG(uap, flags) & LINUX_CLONE_VFORK) 109 flags |= FORK_PPWAIT; 110 111 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL; 112 if (sig < 0 || sig >= LINUX__NSIG) 113 return (EINVAL); 114 sig = linux_to_native_signo[sig]; 115 116 #ifdef LINUX_NPTL 117 led = (struct linux_emuldata *)l->l_proc->p_emuldata; 118 119 led->parent_tidptr = SCARG(uap, parent_tidptr); 120 led->child_tidptr = SCARG(uap, child_tidptr); 121 led->clone_flags = SCARG(uap, flags); 122 #endif /* LINUX_NPTL */ 123 124 /* 125 * Note that Linux does not provide a portable way of specifying 126 * the stack area; the caller must know if the stack grows up 127 * or down. So, we pass a stack size of 0, so that the code 128 * that makes this adjustment is a noop. 129 */ 130 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0, 131 NULL, NULL, retval, NULL)) != 0) 132 return error; 133 134 return 0; 135 } 136 137 /* 138 * linux realtime priority 139 * 140 * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99]. 141 * 142 * - SCHED_OTHER tasks don't have realtime priorities. 143 * in particular, sched_param::sched_priority is always 0. 144 */ 145 146 #define LINUX_SCHED_RTPRIO_MIN 1 147 #define LINUX_SCHED_RTPRIO_MAX 99 148 149 static int 150 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params, 151 int *native_policy, struct sched_param *native_params) 152 { 153 154 switch (linux_policy) { 155 case LINUX_SCHED_OTHER: 156 if (native_policy != NULL) { 157 *native_policy = SCHED_OTHER; 158 } 159 break; 160 161 case LINUX_SCHED_FIFO: 162 if (native_policy != NULL) { 163 *native_policy = SCHED_FIFO; 164 } 165 break; 166 167 case LINUX_SCHED_RR: 168 if (native_policy != NULL) { 169 *native_policy = SCHED_RR; 170 } 171 break; 172 173 default: 174 return EINVAL; 175 } 176 177 if (linux_params != NULL) { 178 int prio = linux_params->sched_priority; 179 180 KASSERT(native_params != NULL); 181 182 if (linux_policy == LINUX_SCHED_OTHER) { 183 if (prio != 0) { 184 return EINVAL; 185 } 186 native_params->sched_priority = PRI_NONE; /* XXX */ 187 } else { 188 if (prio < LINUX_SCHED_RTPRIO_MIN || 189 prio > LINUX_SCHED_RTPRIO_MAX) { 190 return EINVAL; 191 } 192 native_params->sched_priority = 193 (prio - LINUX_SCHED_RTPRIO_MIN) 194 * (SCHED_PRI_MAX - SCHED_PRI_MIN) 195 / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN) 196 + SCHED_PRI_MIN; 197 } 198 } 199 200 return 0; 201 } 202 203 static int 204 sched_native2linux(int native_policy, struct sched_param *native_params, 205 int *linux_policy, struct linux_sched_param *linux_params) 206 { 207 208 switch (native_policy) { 209 case SCHED_OTHER: 210 if (linux_policy != NULL) { 211 *linux_policy = LINUX_SCHED_OTHER; 212 } 213 break; 214 215 case SCHED_FIFO: 216 if (linux_policy != NULL) { 217 *linux_policy = LINUX_SCHED_FIFO; 218 } 219 break; 220 221 case SCHED_RR: 222 if (linux_policy != NULL) { 223 *linux_policy = LINUX_SCHED_RR; 224 } 225 break; 226 227 default: 228 panic("%s: unknown policy %d\n", __func__, native_policy); 229 } 230 231 if (native_params != NULL) { 232 int prio = native_params->sched_priority; 233 234 #if 0 235 KASSERT(prio >= SCHED_PRI_MIN); 236 KASSERT(prio <= SCHED_PRI_MAX); 237 KASSERT(linux_params != NULL); 238 #endif 239 printf("native2linux: native: policy %d, priority %d\n", 240 native_policy, prio); 241 242 if (native_policy == SCHED_OTHER) { 243 linux_params->sched_priority = 0; 244 } else { 245 linux_params->sched_priority = 246 (prio - SCHED_PRI_MIN) 247 * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN) 248 / (SCHED_PRI_MAX - SCHED_PRI_MIN) 249 + LINUX_SCHED_RTPRIO_MIN; 250 } 251 printf("native2linux: linux: policy %d, priority %d\n", 252 -1, linux_params->sched_priority); 253 } 254 255 return 0; 256 } 257 258 int 259 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval) 260 { 261 /* { 262 syscallarg(linux_pid_t) pid; 263 syscallarg(const struct linux_sched_param *) sp; 264 } */ 265 int error, policy; 266 struct linux_sched_param lp; 267 struct sched_param sp; 268 269 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 270 error = EINVAL; 271 goto out; 272 } 273 274 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 275 if (error) 276 goto out; 277 278 /* We need the current policy in Linux terms. */ 279 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL); 280 if (error) 281 goto out; 282 error = sched_native2linux(policy, NULL, &policy, NULL); 283 if (error) 284 goto out; 285 286 error = sched_linux2native(policy, &lp, &policy, &sp); 287 if (error) 288 goto out; 289 290 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp); 291 if (error) 292 goto out; 293 294 out: 295 return error; 296 } 297 298 int 299 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval) 300 { 301 /* { 302 syscallarg(linux_pid_t) pid; 303 syscallarg(struct linux_sched_param *) sp; 304 } */ 305 struct linux_sched_param lp; 306 struct sched_param sp; 307 int error, policy; 308 309 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 310 error = EINVAL; 311 goto out; 312 } 313 314 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp); 315 if (error) 316 goto out; 317 printf("getparam: native: policy %d, priority %d\n", 318 policy, sp.sched_priority); 319 320 error = sched_native2linux(policy, &sp, NULL, &lp); 321 if (error) 322 goto out; 323 printf("getparam: linux: policy %d, priority %d\n", 324 policy, lp.sched_priority); 325 326 error = copyout(&lp, SCARG(uap, sp), sizeof(lp)); 327 if (error) 328 goto out; 329 330 out: 331 return error; 332 } 333 334 int 335 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval) 336 { 337 /* { 338 syscallarg(linux_pid_t) pid; 339 syscallarg(int) policy; 340 syscallarg(cont struct linux_sched_scheduler *) sp; 341 } */ 342 int error, policy; 343 struct linux_sched_param lp; 344 struct sched_param sp; 345 346 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 347 error = EINVAL; 348 goto out; 349 } 350 351 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 352 if (error) 353 goto out; 354 printf("setscheduler: linux: policy %d, priority %d\n", 355 SCARG(uap, policy), lp.sched_priority); 356 357 error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp); 358 if (error) 359 goto out; 360 printf("setscheduler: native: policy %d, priority %d\n", 361 policy, sp.sched_priority); 362 363 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp); 364 if (error) 365 goto out; 366 367 out: 368 return error; 369 } 370 371 int 372 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval) 373 { 374 /* { 375 syscallarg(linux_pid_t) pid; 376 } */ 377 int error, policy; 378 379 *retval = -1; 380 381 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL); 382 if (error) 383 goto out; 384 385 error = sched_native2linux(policy, NULL, &policy, NULL); 386 if (error) 387 goto out; 388 389 *retval = policy; 390 391 out: 392 return error; 393 } 394 395 int 396 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval) 397 { 398 399 yield(); 400 return 0; 401 } 402 403 int 404 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval) 405 { 406 /* { 407 syscallarg(int) policy; 408 } */ 409 410 switch (SCARG(uap, policy)) { 411 case LINUX_SCHED_OTHER: 412 *retval = 0; 413 break; 414 case LINUX_SCHED_FIFO: 415 case LINUX_SCHED_RR: 416 *retval = LINUX_SCHED_RTPRIO_MAX; 417 break; 418 default: 419 return EINVAL; 420 } 421 422 return 0; 423 } 424 425 int 426 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval) 427 { 428 /* { 429 syscallarg(int) policy; 430 } */ 431 432 switch (SCARG(uap, policy)) { 433 case LINUX_SCHED_OTHER: 434 *retval = 0; 435 break; 436 case LINUX_SCHED_FIFO: 437 case LINUX_SCHED_RR: 438 *retval = LINUX_SCHED_RTPRIO_MIN; 439 break; 440 default: 441 return EINVAL; 442 } 443 444 return 0; 445 } 446 447 #ifndef __m68k__ 448 /* Present on everything but m68k */ 449 int 450 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval) 451 { 452 #ifdef LINUX_NPTL 453 /* { 454 syscallarg(int) error_code; 455 } */ 456 struct proc *p = l->l_proc; 457 struct linux_emuldata *led = p->p_emuldata; 458 struct linux_emuldata *e; 459 460 if (led->s->flags & LINUX_LES_USE_NPTL) { 461 462 #ifdef DEBUG_LINUX 463 printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__, 464 led->s->refs); 465 #endif 466 467 /* 468 * The calling thread is supposed to kill all threads 469 * in the same thread group (i.e. all threads created 470 * via clone(2) with CLONE_THREAD flag set). 471 * 472 * If there is only one thread, things are quite simple 473 */ 474 if (led->s->refs == 1) 475 return sys_exit(l, (const void *)uap, retval); 476 477 #ifdef DEBUG_LINUX 478 printf("%s:%d\n", __func__, __LINE__); 479 #endif 480 481 mutex_enter(proc_lock); 482 led->s->flags |= LINUX_LES_INEXITGROUP; 483 led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0); 484 485 /* 486 * Kill all threads in the group. The emulation exit hook takes 487 * care of hiding the zombies and reporting the exit code 488 * properly. 489 */ 490 LIST_FOREACH(e, &led->s->threads, threads) { 491 if (e->proc == p) 492 continue; 493 494 #ifdef DEBUG_LINUX 495 printf("%s: kill PID %d\n", __func__, e->proc->p_pid); 496 #endif 497 psignal(e->proc, SIGKILL); 498 } 499 500 /* Now, kill ourselves */ 501 psignal(p, SIGKILL); 502 mutex_exit(proc_lock); 503 504 return 0; 505 506 } 507 #endif /* LINUX_NPTL */ 508 509 return sys_exit(l, (const void *)uap, retval); 510 } 511 #endif /* !__m68k__ */ 512 513 #ifdef LINUX_NPTL 514 int 515 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval) 516 { 517 /* { 518 syscallarg(int *) tidptr; 519 } */ 520 struct linux_emuldata *led; 521 522 led = (struct linux_emuldata *)l->l_proc->p_emuldata; 523 led->clear_tid = SCARG(uap, tid); 524 525 led->s->flags |= LINUX_LES_USE_NPTL; 526 527 *retval = l->l_proc->p_pid; 528 529 return 0; 530 } 531 532 /* ARGUSED1 */ 533 int 534 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval) 535 { 536 /* The Linux kernel does it exactly that way */ 537 *retval = l->l_proc->p_pid; 538 return 0; 539 } 540 541 #ifdef LINUX_NPTL 542 /* ARGUSED1 */ 543 int 544 linux_sys_getpid(struct lwp *l, const void *v, register_t *retval) 545 { 546 struct linux_emuldata *led = l->l_proc->p_emuldata; 547 548 if (led->s->flags & LINUX_LES_USE_NPTL) { 549 /* The Linux kernel does it exactly that way */ 550 *retval = led->s->group_pid; 551 } else { 552 *retval = l->l_proc->p_pid; 553 } 554 555 return 0; 556 } 557 558 /* ARGUSED1 */ 559 int 560 linux_sys_getppid(struct lwp *l, const void *v, register_t *retval) 561 { 562 struct proc *p = l->l_proc; 563 struct linux_emuldata *led = p->p_emuldata; 564 struct proc *glp; 565 struct proc *pp; 566 567 mutex_enter(proc_lock); 568 if (led->s->flags & LINUX_LES_USE_NPTL) { 569 570 /* Find the thread group leader's parent */ 571 if ((glp = p_find(led->s->group_pid, PFIND_LOCKED)) == NULL) { 572 /* Maybe panic... */ 573 printf("linux_sys_getppid: missing group leader PID" 574 " %d\n", led->s->group_pid); 575 mutex_exit(proc_lock); 576 return -1; 577 } 578 pp = glp->p_pptr; 579 580 /* If this is a Linux process too, return thread group PID */ 581 if (pp->p_emul == p->p_emul) { 582 struct linux_emuldata *pled; 583 584 pled = pp->p_emuldata; 585 *retval = pled->s->group_pid; 586 } else { 587 *retval = pp->p_pid; 588 } 589 590 } else { 591 *retval = p->p_pptr->p_pid; 592 } 593 mutex_exit(proc_lock); 594 595 return 0; 596 } 597 #endif /* LINUX_NPTL */ 598 599 int 600 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval) 601 { 602 /* { 603 syscallarg(pid_t) pid; 604 syscallarg(unsigned int) len; 605 syscallarg(unsigned long *) mask; 606 } */ 607 int error; 608 int ret; 609 char *data; 610 int *retp; 611 612 if (SCARG(uap, mask) == NULL) 613 return EINVAL; 614 615 if (SCARG(uap, len) < sizeof(int)) 616 return EINVAL; 617 618 if (pfind(SCARG(uap, pid)) == NULL) 619 return ESRCH; 620 621 /* 622 * return the actual number of CPU, tag all of them as available 623 * The result is a mask, the first CPU being in the least significant 624 * bit. 625 */ 626 ret = (1 << ncpu) - 1; 627 data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO); 628 retp = (int *)&data[SCARG(uap, len) - sizeof(ret)]; 629 *retp = ret; 630 631 if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0) 632 return error; 633 634 free(data, M_TEMP); 635 636 return 0; 637 638 } 639 640 int 641 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval) 642 { 643 /* { 644 syscallarg(pid_t) pid; 645 syscallarg(unsigned int) len; 646 syscallarg(unsigned long *) mask; 647 } */ 648 649 if (pfind(SCARG(uap, pid)) == NULL) 650 return ESRCH; 651 652 /* Let's ignore it */ 653 #ifdef DEBUG_LINUX 654 printf("linux_sys_sched_setaffinity\n"); 655 #endif 656 return 0; 657 }; 658 #endif /* LINUX_NPTL */ 659