1 /* $NetBSD: linux_sched.c,v 1.54 2008/04/28 20:23:44 martin Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center; by Matthias Scheler. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Linux compatibility module. Try to deal with scheduler related syscalls. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.54 2008/04/28 20:23:44 martin Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/mount.h> 42 #include <sys/proc.h> 43 #include <sys/systm.h> 44 #include <sys/sysctl.h> 45 #include <sys/malloc.h> 46 #include <sys/syscallargs.h> 47 #include <sys/wait.h> 48 #include <sys/kauth.h> 49 #include <sys/ptrace.h> 50 51 #include <sys/cpu.h> 52 53 #include <compat/linux/common/linux_types.h> 54 #include <compat/linux/common/linux_signal.h> 55 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */ 56 #include <compat/linux/common/linux_emuldata.h> 57 #include <compat/linux/common/linux_ipc.h> 58 #include <compat/linux/common/linux_sem.h> 59 60 #include <compat/linux/linux_syscallargs.h> 61 62 #include <compat/linux/common/linux_sched.h> 63 64 int 65 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval) 66 { 67 /* { 68 syscallarg(int) flags; 69 syscallarg(void *) stack; 70 #ifdef LINUX_NPTL 71 syscallarg(void *) parent_tidptr; 72 syscallarg(void *) child_tidptr; 73 #endif 74 } */ 75 int flags, sig; 76 int error; 77 #ifdef LINUX_NPTL 78 struct linux_emuldata *led; 79 #endif 80 81 /* 82 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags. 83 */ 84 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE)) 85 return (EINVAL); 86 87 /* 88 * Thread group implies shared signals. Shared signals 89 * imply shared VM. This matches what Linux kernel does. 90 */ 91 if (SCARG(uap, flags) & LINUX_CLONE_THREAD 92 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0) 93 return (EINVAL); 94 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND 95 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0) 96 return (EINVAL); 97 98 flags = 0; 99 100 if (SCARG(uap, flags) & LINUX_CLONE_VM) 101 flags |= FORK_SHAREVM; 102 if (SCARG(uap, flags) & LINUX_CLONE_FS) 103 flags |= FORK_SHARECWD; 104 if (SCARG(uap, flags) & LINUX_CLONE_FILES) 105 flags |= FORK_SHAREFILES; 106 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) 107 flags |= FORK_SHARESIGS; 108 if (SCARG(uap, flags) & LINUX_CLONE_VFORK) 109 flags |= FORK_PPWAIT; 110 111 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL; 112 if (sig < 0 || sig >= LINUX__NSIG) 113 return (EINVAL); 114 sig = linux_to_native_signo[sig]; 115 116 #ifdef LINUX_NPTL 117 led = (struct linux_emuldata *)l->l_proc->p_emuldata; 118 119 led->parent_tidptr = SCARG(uap, parent_tidptr); 120 led->child_tidptr = SCARG(uap, child_tidptr); 121 led->clone_flags = SCARG(uap, flags); 122 #endif /* LINUX_NPTL */ 123 124 /* 125 * Note that Linux does not provide a portable way of specifying 126 * the stack area; the caller must know if the stack grows up 127 * or down. So, we pass a stack size of 0, so that the code 128 * that makes this adjustment is a noop. 129 */ 130 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0, 131 NULL, NULL, retval, NULL)) != 0) 132 return error; 133 134 return 0; 135 } 136 137 /* 138 * linux realtime priority 139 * 140 * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99]. 141 * 142 * - SCHED_OTHER tasks don't have realtime priorities. 143 * in particular, sched_param::sched_priority is always 0. 144 */ 145 146 #define LINUX_SCHED_RTPRIO_MIN 1 147 #define LINUX_SCHED_RTPRIO_MAX 99 148 149 static int 150 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params, 151 int *native_policy, struct sched_param *native_params) 152 { 153 154 switch (linux_policy) { 155 case LINUX_SCHED_OTHER: 156 if (native_policy != NULL) { 157 *native_policy = SCHED_OTHER; 158 } 159 break; 160 161 case LINUX_SCHED_FIFO: 162 if (native_policy != NULL) { 163 *native_policy = SCHED_FIFO; 164 } 165 break; 166 167 case LINUX_SCHED_RR: 168 if (native_policy != NULL) { 169 *native_policy = SCHED_RR; 170 } 171 break; 172 173 default: 174 return EINVAL; 175 } 176 177 if (linux_params != NULL) { 178 int prio = linux_params->sched_priority; 179 180 KASSERT(native_params != NULL); 181 182 if (linux_policy == LINUX_SCHED_OTHER) { 183 if (prio != 0) { 184 return EINVAL; 185 } 186 native_params->sched_priority = PRI_NONE; /* XXX */ 187 } else { 188 if (prio < LINUX_SCHED_RTPRIO_MIN || 189 prio > LINUX_SCHED_RTPRIO_MAX) { 190 return EINVAL; 191 } 192 native_params->sched_priority = 193 (prio - LINUX_SCHED_RTPRIO_MIN) 194 * (SCHED_PRI_MAX - SCHED_PRI_MIN) 195 / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN) 196 + SCHED_PRI_MIN; 197 } 198 } 199 200 return 0; 201 } 202 203 static int 204 sched_native2linux(int native_policy, struct sched_param *native_params, 205 int *linux_policy, struct linux_sched_param *linux_params) 206 { 207 208 switch (native_policy) { 209 case SCHED_OTHER: 210 if (linux_policy != NULL) { 211 *linux_policy = LINUX_SCHED_OTHER; 212 } 213 break; 214 215 case SCHED_FIFO: 216 if (linux_policy != NULL) { 217 *linux_policy = LINUX_SCHED_FIFO; 218 } 219 break; 220 221 case SCHED_RR: 222 if (linux_policy != NULL) { 223 *linux_policy = LINUX_SCHED_RR; 224 } 225 break; 226 227 default: 228 panic("%s: unknown policy %d\n", __func__, native_policy); 229 } 230 231 if (native_params != NULL) { 232 int prio = native_params->sched_priority; 233 234 KASSERT(prio >= SCHED_PRI_MIN); 235 KASSERT(prio <= SCHED_PRI_MAX); 236 KASSERT(linux_params != NULL); 237 238 if (native_policy == SCHED_OTHER) { 239 linux_params->sched_priority = 0; 240 } else { 241 linux_params->sched_priority = 242 (prio - SCHED_PRI_MIN) 243 * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN) 244 / (SCHED_PRI_MAX - SCHED_PRI_MIN) 245 + LINUX_SCHED_RTPRIO_MIN; 246 } 247 } 248 249 return 0; 250 } 251 252 int 253 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval) 254 { 255 /* { 256 syscallarg(linux_pid_t) pid; 257 syscallarg(const struct linux_sched_param *) sp; 258 } */ 259 int error, policy; 260 struct linux_sched_param lp; 261 struct sched_param sp; 262 263 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 264 error = EINVAL; 265 goto out; 266 } 267 268 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 269 if (error) 270 goto out; 271 272 /* We need the current policy in Linux terms. */ 273 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL); 274 if (error) 275 goto out; 276 error = sched_native2linux(policy, NULL, &policy, NULL); 277 if (error) 278 goto out; 279 280 error = sched_linux2native(policy, &lp, &policy, &sp); 281 if (error) 282 goto out; 283 284 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp); 285 if (error) 286 goto out; 287 288 out: 289 return error; 290 } 291 292 int 293 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval) 294 { 295 /* { 296 syscallarg(linux_pid_t) pid; 297 syscallarg(struct linux_sched_param *) sp; 298 } */ 299 struct linux_sched_param lp; 300 struct sched_param sp; 301 int error, policy; 302 303 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 304 error = EINVAL; 305 goto out; 306 } 307 308 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp); 309 if (error) 310 goto out; 311 312 error = sched_native2linux(policy, &sp, NULL, &lp); 313 if (error) 314 goto out; 315 316 error = copyout(&lp, SCARG(uap, sp), sizeof(lp)); 317 if (error) 318 goto out; 319 320 out: 321 return error; 322 } 323 324 int 325 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval) 326 { 327 /* { 328 syscallarg(linux_pid_t) pid; 329 syscallarg(int) policy; 330 syscallarg(cont struct linux_sched_scheduler *) sp; 331 } */ 332 int error, policy; 333 struct linux_sched_param lp; 334 struct sched_param sp; 335 336 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 337 error = EINVAL; 338 goto out; 339 } 340 341 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 342 if (error) 343 goto out; 344 345 error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp); 346 if (error) 347 goto out; 348 349 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp); 350 if (error) 351 goto out; 352 353 out: 354 return error; 355 } 356 357 int 358 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval) 359 { 360 /* { 361 syscallarg(linux_pid_t) pid; 362 } */ 363 int error, policy; 364 365 *retval = -1; 366 367 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL); 368 if (error) 369 goto out; 370 371 error = sched_native2linux(policy, NULL, &policy, NULL); 372 if (error) 373 goto out; 374 375 *retval = policy; 376 377 out: 378 return error; 379 } 380 381 int 382 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval) 383 { 384 385 yield(); 386 return 0; 387 } 388 389 int 390 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval) 391 { 392 /* { 393 syscallarg(int) policy; 394 } */ 395 396 /* 397 * We can't emulate anything put the default scheduling policy. 398 */ 399 if (SCARG(uap, policy) != LINUX_SCHED_OTHER) { 400 *retval = -1; 401 return EINVAL; 402 } 403 404 *retval = 0; 405 return 0; 406 } 407 408 int 409 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval) 410 { 411 /* { 412 syscallarg(int) policy; 413 } */ 414 415 /* 416 * We can't emulate anything put the default scheduling policy. 417 */ 418 if (SCARG(uap, policy) != LINUX_SCHED_OTHER) { 419 *retval = -1; 420 return EINVAL; 421 } 422 423 *retval = 0; 424 return 0; 425 } 426 427 #ifndef __m68k__ 428 /* Present on everything but m68k */ 429 int 430 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval) 431 { 432 #ifdef LINUX_NPTL 433 /* { 434 syscallarg(int) error_code; 435 } */ 436 struct proc *p = l->l_proc; 437 struct linux_emuldata *led = p->p_emuldata; 438 struct linux_emuldata *e; 439 440 if (led->s->flags & LINUX_LES_USE_NPTL) { 441 442 #ifdef DEBUG_LINUX 443 printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__, 444 led->s->refs); 445 #endif 446 447 /* 448 * The calling thread is supposed to kill all threads 449 * in the same thread group (i.e. all threads created 450 * via clone(2) with CLONE_THREAD flag set). 451 * 452 * If there is only one thread, things are quite simple 453 */ 454 if (led->s->refs == 1) 455 return sys_exit(l, (const void *)uap, retval); 456 457 #ifdef DEBUG_LINUX 458 printf("%s:%d\n", __func__, __LINE__); 459 #endif 460 461 mutex_enter(proc_lock); 462 led->s->flags |= LINUX_LES_INEXITGROUP; 463 led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0); 464 465 /* 466 * Kill all threads in the group. The emulation exit hook takes 467 * care of hiding the zombies and reporting the exit code 468 * properly. 469 */ 470 LIST_FOREACH(e, &led->s->threads, threads) { 471 if (e->proc == p) 472 continue; 473 474 #ifdef DEBUG_LINUX 475 printf("%s: kill PID %d\n", __func__, e->proc->p_pid); 476 #endif 477 psignal(e->proc, SIGKILL); 478 } 479 480 /* Now, kill ourselves */ 481 psignal(p, SIGKILL); 482 mutex_exit(proc_lock); 483 484 return 0; 485 486 } 487 #endif /* LINUX_NPTL */ 488 489 return sys_exit(l, (const void *)uap, retval); 490 } 491 #endif /* !__m68k__ */ 492 493 #ifdef LINUX_NPTL 494 int 495 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval) 496 { 497 /* { 498 syscallarg(int *) tidptr; 499 } */ 500 struct linux_emuldata *led; 501 502 led = (struct linux_emuldata *)l->l_proc->p_emuldata; 503 led->clear_tid = SCARG(uap, tid); 504 505 led->s->flags |= LINUX_LES_USE_NPTL; 506 507 *retval = l->l_proc->p_pid; 508 509 return 0; 510 } 511 512 /* ARGUSED1 */ 513 int 514 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval) 515 { 516 /* The Linux kernel does it exactly that way */ 517 *retval = l->l_proc->p_pid; 518 return 0; 519 } 520 521 #ifdef LINUX_NPTL 522 /* ARGUSED1 */ 523 int 524 linux_sys_getpid(struct lwp *l, const void *v, register_t *retval) 525 { 526 struct linux_emuldata *led = l->l_proc->p_emuldata; 527 528 if (led->s->flags & LINUX_LES_USE_NPTL) { 529 /* The Linux kernel does it exactly that way */ 530 *retval = led->s->group_pid; 531 } else { 532 *retval = l->l_proc->p_pid; 533 } 534 535 return 0; 536 } 537 538 /* ARGUSED1 */ 539 int 540 linux_sys_getppid(struct lwp *l, const void *v, register_t *retval) 541 { 542 struct proc *p = l->l_proc; 543 struct linux_emuldata *led = p->p_emuldata; 544 struct proc *glp; 545 struct proc *pp; 546 547 mutex_enter(proc_lock); 548 if (led->s->flags & LINUX_LES_USE_NPTL) { 549 550 /* Find the thread group leader's parent */ 551 if ((glp = p_find(led->s->group_pid, PFIND_LOCKED)) == NULL) { 552 /* Maybe panic... */ 553 printf("linux_sys_getppid: missing group leader PID" 554 " %d\n", led->s->group_pid); 555 mutex_exit(proc_lock); 556 return -1; 557 } 558 pp = glp->p_pptr; 559 560 /* If this is a Linux process too, return thread group PID */ 561 if (pp->p_emul == p->p_emul) { 562 struct linux_emuldata *pled; 563 564 pled = pp->p_emuldata; 565 *retval = pled->s->group_pid; 566 } else { 567 *retval = pp->p_pid; 568 } 569 570 } else { 571 *retval = p->p_pptr->p_pid; 572 } 573 mutex_exit(proc_lock); 574 575 return 0; 576 } 577 #endif /* LINUX_NPTL */ 578 579 int 580 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval) 581 { 582 /* { 583 syscallarg(pid_t) pid; 584 syscallarg(unsigned int) len; 585 syscallarg(unsigned long *) mask; 586 } */ 587 int error; 588 int ret; 589 char *data; 590 int *retp; 591 592 if (SCARG(uap, mask) == NULL) 593 return EINVAL; 594 595 if (SCARG(uap, len) < sizeof(int)) 596 return EINVAL; 597 598 if (pfind(SCARG(uap, pid)) == NULL) 599 return ESRCH; 600 601 /* 602 * return the actual number of CPU, tag all of them as available 603 * The result is a mask, the first CPU being in the least significant 604 * bit. 605 */ 606 ret = (1 << ncpu) - 1; 607 data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO); 608 retp = (int *)&data[SCARG(uap, len) - sizeof(ret)]; 609 *retp = ret; 610 611 if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0) 612 return error; 613 614 free(data, M_TEMP); 615 616 return 0; 617 618 } 619 620 int 621 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval) 622 { 623 /* { 624 syscallarg(pid_t) pid; 625 syscallarg(unsigned int) len; 626 syscallarg(unsigned long *) mask; 627 } */ 628 629 if (pfind(SCARG(uap, pid)) == NULL) 630 return ESRCH; 631 632 /* Let's ignore it */ 633 #ifdef DEBUG_LINUX 634 printf("linux_sys_sched_setaffinity\n"); 635 #endif 636 return 0; 637 }; 638 #endif /* LINUX_NPTL */ 639