1 /* $NetBSD: linux_sched.c,v 1.50 2008/02/29 14:29:06 elad Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center; by Matthias Scheler. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Linux compatibility module. Try to deal with scheduler related syscalls. 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.50 2008/02/29 14:29:06 elad Exp $"); 46 47 #include <sys/param.h> 48 #include <sys/mount.h> 49 #include <sys/proc.h> 50 #include <sys/systm.h> 51 #include <sys/sysctl.h> 52 #include <sys/malloc.h> 53 #include <sys/syscallargs.h> 54 #include <sys/wait.h> 55 #include <sys/kauth.h> 56 #include <sys/ptrace.h> 57 58 #include <sys/cpu.h> 59 60 #include <compat/linux/common/linux_types.h> 61 #include <compat/linux/common/linux_signal.h> 62 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */ 63 #include <compat/linux/common/linux_emuldata.h> 64 #include <compat/linux/common/linux_ipc.h> 65 #include <compat/linux/common/linux_sem.h> 66 67 #include <compat/linux/linux_syscallargs.h> 68 69 #include <compat/linux/common/linux_sched.h> 70 71 int 72 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval) 73 { 74 /* { 75 syscallarg(int) flags; 76 syscallarg(void *) stack; 77 #ifdef LINUX_NPTL 78 syscallarg(void *) parent_tidptr; 79 syscallarg(void *) child_tidptr; 80 #endif 81 } */ 82 int flags, sig; 83 int error; 84 #ifdef LINUX_NPTL 85 struct linux_emuldata *led; 86 #endif 87 88 /* 89 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags. 90 */ 91 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE)) 92 return (EINVAL); 93 94 /* 95 * Thread group implies shared signals. Shared signals 96 * imply shared VM. This matches what Linux kernel does. 97 */ 98 if (SCARG(uap, flags) & LINUX_CLONE_THREAD 99 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0) 100 return (EINVAL); 101 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND 102 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0) 103 return (EINVAL); 104 105 flags = 0; 106 107 if (SCARG(uap, flags) & LINUX_CLONE_VM) 108 flags |= FORK_SHAREVM; 109 if (SCARG(uap, flags) & LINUX_CLONE_FS) 110 flags |= FORK_SHARECWD; 111 if (SCARG(uap, flags) & LINUX_CLONE_FILES) 112 flags |= FORK_SHAREFILES; 113 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) 114 flags |= FORK_SHARESIGS; 115 if (SCARG(uap, flags) & LINUX_CLONE_VFORK) 116 flags |= FORK_PPWAIT; 117 118 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL; 119 if (sig < 0 || sig >= LINUX__NSIG) 120 return (EINVAL); 121 sig = linux_to_native_signo[sig]; 122 123 #ifdef LINUX_NPTL 124 led = (struct linux_emuldata *)l->l_proc->p_emuldata; 125 126 led->parent_tidptr = SCARG(uap, parent_tidptr); 127 led->child_tidptr = SCARG(uap, child_tidptr); 128 led->clone_flags = SCARG(uap, flags); 129 #endif /* LINUX_NPTL */ 130 131 /* 132 * Note that Linux does not provide a portable way of specifying 133 * the stack area; the caller must know if the stack grows up 134 * or down. So, we pass a stack size of 0, so that the code 135 * that makes this adjustment is a noop. 136 */ 137 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0, 138 NULL, NULL, retval, NULL)) != 0) 139 return error; 140 141 return 0; 142 } 143 144 /* 145 * linux realtime priority 146 * 147 * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99]. 148 * 149 * - SCHED_OTHER tasks don't have realtime priorities. 150 * in particular, sched_param::sched_priority is always 0. 151 */ 152 153 #define LINUX_SCHED_RTPRIO_MIN 1 154 #define LINUX_SCHED_RTPRIO_MAX 99 155 156 static int 157 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params, 158 int *native_policy, struct sched_param *native_params) 159 { 160 161 switch (linux_policy) { 162 case LINUX_SCHED_OTHER: 163 if (native_policy != NULL) { 164 *native_policy = SCHED_OTHER; 165 } 166 break; 167 168 case LINUX_SCHED_FIFO: 169 if (native_policy != NULL) { 170 *native_policy = SCHED_FIFO; 171 } 172 break; 173 174 case LINUX_SCHED_RR: 175 if (native_policy != NULL) { 176 *native_policy = SCHED_RR; 177 } 178 break; 179 180 default: 181 return EINVAL; 182 } 183 184 if (linux_params != NULL) { 185 int prio = linux_params->sched_priority; 186 187 KASSERT(native_params != NULL); 188 189 if (linux_policy == LINUX_SCHED_OTHER) { 190 if (prio != 0) { 191 return EINVAL; 192 } 193 native_params->sched_priority = PRI_NONE; /* XXX */ 194 } else { 195 if (prio < LINUX_SCHED_RTPRIO_MIN || 196 prio > LINUX_SCHED_RTPRIO_MAX) { 197 return EINVAL; 198 } 199 native_params->sched_priority = 200 (prio - LINUX_SCHED_RTPRIO_MIN) 201 * (SCHED_PRI_MAX - SCHED_PRI_MIN) 202 / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN) 203 + SCHED_PRI_MIN; 204 } 205 } 206 207 return 0; 208 } 209 210 static int 211 sched_native2linux(int native_policy, struct sched_param *native_params, 212 int *linux_policy, struct linux_sched_param *linux_params) 213 { 214 215 switch (native_policy) { 216 case SCHED_OTHER: 217 if (linux_policy != NULL) { 218 *linux_policy = LINUX_SCHED_OTHER; 219 } 220 break; 221 222 case SCHED_FIFO: 223 if (linux_policy != NULL) { 224 *linux_policy = LINUX_SCHED_FIFO; 225 } 226 break; 227 228 case SCHED_RR: 229 if (linux_policy != NULL) { 230 *linux_policy = LINUX_SCHED_RR; 231 } 232 break; 233 234 default: 235 panic("%s: unknown policy %d\n", __func__, native_policy); 236 } 237 238 if (native_params != NULL) { 239 int prio = native_params->sched_priority; 240 241 KASSERT(prio >= SCHED_PRI_MIN); 242 KASSERT(prio <= SCHED_PRI_MAX); 243 KASSERT(linux_params != NULL); 244 245 if (native_policy == SCHED_OTHER) { 246 linux_params->sched_priority = 0; 247 } else { 248 linux_params->sched_priority = 249 (prio - SCHED_PRI_MIN) 250 * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN) 251 / (SCHED_PRI_MAX - SCHED_PRI_MIN) 252 + LINUX_SCHED_RTPRIO_MIN; 253 } 254 } 255 256 return 0; 257 } 258 259 int 260 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval) 261 { 262 /* { 263 syscallarg(linux_pid_t) pid; 264 syscallarg(const struct linux_sched_param *) sp; 265 } */ 266 int error, policy; 267 struct linux_sched_param lp; 268 struct sched_param sp; 269 270 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 271 error = EINVAL; 272 goto out; 273 } 274 275 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 276 if (error) 277 goto out; 278 279 /* We need the current policy in Linux terms. */ 280 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL); 281 if (error) 282 goto out; 283 error = sched_native2linux(policy, NULL, &policy, NULL); 284 if (error) 285 goto out; 286 287 error = sched_linux2native(policy, &lp, &policy, &sp); 288 if (error) 289 goto out; 290 291 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp); 292 if (error) 293 goto out; 294 295 out: 296 return error; 297 } 298 299 int 300 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval) 301 { 302 /* { 303 syscallarg(linux_pid_t) pid; 304 syscallarg(struct linux_sched_param *) sp; 305 } */ 306 struct linux_sched_param lp; 307 struct sched_param sp; 308 int error, policy; 309 310 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 311 error = EINVAL; 312 goto out; 313 } 314 315 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp); 316 if (error) 317 goto out; 318 319 error = sched_native2linux(policy, &sp, NULL, &lp); 320 if (error) 321 goto out; 322 323 error = copyout(&lp, SCARG(uap, sp), sizeof(lp)); 324 if (error) 325 goto out; 326 327 out: 328 return error; 329 } 330 331 int 332 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval) 333 { 334 /* { 335 syscallarg(linux_pid_t) pid; 336 syscallarg(int) policy; 337 syscallarg(cont struct linux_sched_scheduler *) sp; 338 } */ 339 int error, policy; 340 struct linux_sched_param lp; 341 struct sched_param sp; 342 343 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 344 error = EINVAL; 345 goto out; 346 } 347 348 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 349 if (error) 350 goto out; 351 352 error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp); 353 if (error) 354 goto out; 355 356 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp); 357 if (error) 358 goto out; 359 360 out: 361 return error; 362 } 363 364 int 365 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval) 366 { 367 /* { 368 syscallarg(linux_pid_t) pid; 369 } */ 370 int error, policy; 371 372 *retval = -1; 373 374 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL); 375 if (error) 376 goto out; 377 378 error = sched_native2linux(policy, NULL, &policy, NULL); 379 if (error) 380 goto out; 381 382 *retval = policy; 383 384 out: 385 return error; 386 } 387 388 int 389 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval) 390 { 391 392 yield(); 393 return 0; 394 } 395 396 int 397 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval) 398 { 399 /* { 400 syscallarg(int) policy; 401 } */ 402 403 /* 404 * We can't emulate anything put the default scheduling policy. 405 */ 406 if (SCARG(uap, policy) != LINUX_SCHED_OTHER) { 407 *retval = -1; 408 return EINVAL; 409 } 410 411 *retval = 0; 412 return 0; 413 } 414 415 int 416 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval) 417 { 418 /* { 419 syscallarg(int) policy; 420 } */ 421 422 /* 423 * We can't emulate anything put the default scheduling policy. 424 */ 425 if (SCARG(uap, policy) != LINUX_SCHED_OTHER) { 426 *retval = -1; 427 return EINVAL; 428 } 429 430 *retval = 0; 431 return 0; 432 } 433 434 #ifndef __m68k__ 435 /* Present on everything but m68k */ 436 int 437 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval) 438 { 439 #ifdef LINUX_NPTL 440 /* { 441 syscallarg(int) error_code; 442 } */ 443 struct proc *p = l->l_proc; 444 struct linux_emuldata *led = p->p_emuldata; 445 struct linux_emuldata *e; 446 447 if (led->s->flags & LINUX_LES_USE_NPTL) { 448 449 #ifdef DEBUG_LINUX 450 printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__, 451 led->s->refs); 452 #endif 453 454 /* 455 * The calling thread is supposed to kill all threads 456 * in the same thread group (i.e. all threads created 457 * via clone(2) with CLONE_THREAD flag set). 458 * 459 * If there is only one thread, things are quite simple 460 */ 461 if (led->s->refs == 1) 462 return sys_exit(l, (const void *)uap, retval); 463 464 #ifdef DEBUG_LINUX 465 printf("%s:%d\n", __func__, __LINE__); 466 #endif 467 468 led->s->flags |= LINUX_LES_INEXITGROUP; 469 led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0); 470 471 /* 472 * Kill all threads in the group. The emulation exit hook takes 473 * care of hiding the zombies and reporting the exit code 474 * properly. 475 */ 476 mutex_enter(&proclist_mutex); 477 LIST_FOREACH(e, &led->s->threads, threads) { 478 if (e->proc == p) 479 continue; 480 481 #ifdef DEBUG_LINUX 482 printf("%s: kill PID %d\n", __func__, e->proc->p_pid); 483 #endif 484 psignal(e->proc, SIGKILL); 485 } 486 487 /* Now, kill ourselves */ 488 psignal(p, SIGKILL); 489 mutex_exit(&proclist_mutex); 490 491 return 0; 492 493 } 494 #endif /* LINUX_NPTL */ 495 496 return sys_exit(l, (const void *)uap, retval); 497 } 498 #endif /* !__m68k__ */ 499 500 #ifdef LINUX_NPTL 501 int 502 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval) 503 { 504 /* { 505 syscallarg(int *) tidptr; 506 } */ 507 struct linux_emuldata *led; 508 509 led = (struct linux_emuldata *)l->l_proc->p_emuldata; 510 led->clear_tid = SCARG(uap, tid); 511 512 led->s->flags |= LINUX_LES_USE_NPTL; 513 514 *retval = l->l_proc->p_pid; 515 516 return 0; 517 } 518 519 /* ARGUSED1 */ 520 int 521 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval) 522 { 523 /* The Linux kernel does it exactly that way */ 524 *retval = l->l_proc->p_pid; 525 return 0; 526 } 527 528 #ifdef LINUX_NPTL 529 /* ARGUSED1 */ 530 int 531 linux_sys_getpid(struct lwp *l, const void *v, register_t *retval) 532 { 533 struct linux_emuldata *led = l->l_proc->p_emuldata; 534 535 if (led->s->flags & LINUX_LES_USE_NPTL) { 536 /* The Linux kernel does it exactly that way */ 537 *retval = led->s->group_pid; 538 } else { 539 *retval = l->l_proc->p_pid; 540 } 541 542 return 0; 543 } 544 545 /* ARGUSED1 */ 546 int 547 linux_sys_getppid(struct lwp *l, const void *v, register_t *retval) 548 { 549 struct proc *p = l->l_proc; 550 struct linux_emuldata *led = p->p_emuldata; 551 struct proc *glp; 552 struct proc *pp; 553 554 if (led->s->flags & LINUX_LES_USE_NPTL) { 555 556 /* Find the thread group leader's parent */ 557 if ((glp = pfind(led->s->group_pid)) == NULL) { 558 /* Maybe panic... */ 559 printf("linux_sys_getppid: missing group leader PID" 560 " %d\n", led->s->group_pid); 561 return -1; 562 } 563 pp = glp->p_pptr; 564 565 /* If this is a Linux process too, return thread group PID */ 566 if (pp->p_emul == p->p_emul) { 567 struct linux_emuldata *pled; 568 569 pled = pp->p_emuldata; 570 *retval = pled->s->group_pid; 571 } else { 572 *retval = pp->p_pid; 573 } 574 575 } else { 576 *retval = p->p_pptr->p_pid; 577 } 578 579 return 0; 580 } 581 #endif /* LINUX_NPTL */ 582 583 int 584 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval) 585 { 586 /* { 587 syscallarg(pid_t) pid; 588 syscallarg(unsigned int) len; 589 syscallarg(unsigned long *) mask; 590 } */ 591 int error; 592 int ret; 593 char *data; 594 int *retp; 595 596 if (SCARG(uap, mask) == NULL) 597 return EINVAL; 598 599 if (SCARG(uap, len) < sizeof(int)) 600 return EINVAL; 601 602 if (pfind(SCARG(uap, pid)) == NULL) 603 return ESRCH; 604 605 /* 606 * return the actual number of CPU, tag all of them as available 607 * The result is a mask, the first CPU being in the least significant 608 * bit. 609 */ 610 ret = (1 << ncpu) - 1; 611 data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO); 612 retp = (int *)&data[SCARG(uap, len) - sizeof(ret)]; 613 *retp = ret; 614 615 if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0) 616 return error; 617 618 free(data, M_TEMP); 619 620 return 0; 621 622 } 623 624 int 625 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval) 626 { 627 /* { 628 syscallarg(pid_t) pid; 629 syscallarg(unsigned int) len; 630 syscallarg(unsigned long *) mask; 631 } */ 632 633 if (pfind(SCARG(uap, pid)) == NULL) 634 return ESRCH; 635 636 /* Let's ignore it */ 637 #ifdef DEBUG_LINUX 638 printf("linux_sys_sched_setaffinity\n"); 639 #endif 640 return 0; 641 }; 642 #endif /* LINUX_NPTL */ 643