1 /* $NetBSD: linux_sched.c,v 1.32 2006/06/26 07:42:00 manu Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center; by Matthias Scheler. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Linux compatibility module. Try to deal with scheduler related syscalls. 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.32 2006/06/26 07:42:00 manu Exp $"); 46 47 #include <sys/param.h> 48 #include <sys/mount.h> 49 #include <sys/proc.h> 50 #include <sys/systm.h> 51 #include <sys/sysctl.h> 52 #include <sys/malloc.h> 53 #include <sys/sa.h> 54 #include <sys/syscallargs.h> 55 #include <sys/wait.h> 56 #include <sys/kauth.h> 57 58 #include <machine/cpu.h> 59 60 #include <compat/linux/common/linux_types.h> 61 #include <compat/linux/common/linux_signal.h> 62 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */ 63 #include <compat/linux/common/linux_emuldata.h> 64 65 #include <compat/linux/linux_syscallargs.h> 66 67 #include <compat/linux/common/linux_sched.h> 68 69 int 70 linux_sys_clone(l, v, retval) 71 struct lwp *l; 72 void *v; 73 register_t *retval; 74 { 75 struct linux_sys_clone_args /* { 76 syscallarg(int) flags; 77 syscallarg(void *) stack; 78 #ifdef LINUX_NPTL 79 syscallarg(void *) parent_tidptr; 80 syscallarg(void *) child_tidptr; 81 #endif 82 } */ *uap = v; 83 int flags, sig; 84 int error; 85 #ifdef LINUX_NPTL 86 struct linux_emuldata *led; 87 #endif 88 89 /* 90 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags. 91 */ 92 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE)) 93 return (EINVAL); 94 95 /* 96 * Thread group implies shared signals. Shared signals 97 * imply shared VM. This matches what Linux kernel does. 98 */ 99 if (SCARG(uap, flags) & LINUX_CLONE_THREAD 100 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0) 101 return (EINVAL); 102 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND 103 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0) 104 return (EINVAL); 105 106 flags = 0; 107 108 if (SCARG(uap, flags) & LINUX_CLONE_VM) 109 flags |= FORK_SHAREVM; 110 if (SCARG(uap, flags) & LINUX_CLONE_FS) 111 flags |= FORK_SHARECWD; 112 if (SCARG(uap, flags) & LINUX_CLONE_FILES) 113 flags |= FORK_SHAREFILES; 114 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) 115 flags |= FORK_SHARESIGS; 116 if (SCARG(uap, flags) & LINUX_CLONE_VFORK) 117 flags |= FORK_PPWAIT; 118 119 /* Thread should not issue a SIGCHLD on termination */ 120 if (SCARG(uap, flags) & LINUX_CLONE_THREAD) { 121 sig = 0; 122 } else { 123 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL; 124 if (sig < 0 || sig >= LINUX__NSIG) 125 return (EINVAL); 126 sig = linux_to_native_signo[sig]; 127 } 128 129 #ifdef LINUX_NPTL 130 led = (struct linux_emuldata *)l->l_proc->p_emuldata; 131 132 if (SCARG(uap, flags) & LINUX_CLONE_PARENT_SETTID) { 133 if (SCARG(uap, parent_tidptr) == NULL) { 134 printf("linux_sys_clone: NULL parent_tidptr\n"); 135 return EINVAL; 136 } 137 138 if ((error = copyout(&l->l_proc->p_pid, 139 SCARG(uap, parent_tidptr), 140 sizeof(l->l_proc->p_pid))) != 0) 141 return error; 142 } 143 144 /* CLONE_CHILD_CLEARTID: TID clear in the child on exit() */ 145 if (SCARG(uap, flags) & LINUX_CLONE_CHILD_CLEARTID) 146 led->child_clear_tid = SCARG(uap, child_tidptr); 147 else 148 led->child_clear_tid = NULL; 149 150 /* CLONE_CHILD_SETTID: TID set in the child on clone() */ 151 if (SCARG(uap, flags) & LINUX_CLONE_CHILD_SETTID) 152 led->child_set_tid = SCARG(uap, child_tidptr); 153 else 154 led->child_set_tid = NULL; 155 156 /* CLONE_SETTLS: new Thread Local Storage in the child */ 157 if (SCARG(uap, flags) & LINUX_CLONE_SETTLS) 158 led->set_tls = linux_get_newtls(l); 159 else 160 led->set_tls = 0; 161 #endif /* LINUX_NPTL */ 162 /* 163 * Note that Linux does not provide a portable way of specifying 164 * the stack area; the caller must know if the stack grows up 165 * or down. So, we pass a stack size of 0, so that the code 166 * that makes this adjustment is a noop. 167 */ 168 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0, 169 NULL, NULL, retval, NULL)) != 0) 170 return error; 171 172 return 0; 173 } 174 175 int 176 linux_sys_sched_setparam(cl, v, retval) 177 struct lwp *cl; 178 void *v; 179 register_t *retval; 180 { 181 struct linux_sys_sched_setparam_args /* { 182 syscallarg(linux_pid_t) pid; 183 syscallarg(const struct linux_sched_param *) sp; 184 } */ *uap = v; 185 struct proc *cp = cl->l_proc; 186 int error; 187 struct linux_sched_param lp; 188 struct proc *p; 189 190 /* 191 * We only check for valid parameters and return afterwards. 192 */ 193 194 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) 195 return EINVAL; 196 197 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 198 if (error) 199 return error; 200 201 if (SCARG(uap, pid) != 0) { 202 kauth_cred_t pc = cp->p_cred; 203 204 if ((p = pfind(SCARG(uap, pid))) == NULL) 205 return ESRCH; 206 if (!(cp == p || 207 kauth_cred_geteuid(pc) == 0 || 208 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) || 209 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) || 210 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) || 211 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred))) 212 return EPERM; 213 } 214 215 return 0; 216 } 217 218 int 219 linux_sys_sched_getparam(cl, v, retval) 220 struct lwp *cl; 221 void *v; 222 register_t *retval; 223 { 224 struct linux_sys_sched_getparam_args /* { 225 syscallarg(linux_pid_t) pid; 226 syscallarg(struct linux_sched_param *) sp; 227 } */ *uap = v; 228 struct proc *cp = cl->l_proc; 229 struct proc *p; 230 struct linux_sched_param lp; 231 232 /* 233 * We only check for valid parameters and return a dummy priority afterwards. 234 */ 235 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) 236 return EINVAL; 237 238 if (SCARG(uap, pid) != 0) { 239 kauth_cred_t pc = cp->p_cred; 240 241 if ((p = pfind(SCARG(uap, pid))) == NULL) 242 return ESRCH; 243 if (!(cp == p || 244 kauth_cred_geteuid(pc) == 0 || 245 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) || 246 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) || 247 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) || 248 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred))) 249 return EPERM; 250 } 251 252 lp.sched_priority = 0; 253 return copyout(&lp, SCARG(uap, sp), sizeof(lp)); 254 } 255 256 int 257 linux_sys_sched_setscheduler(cl, v, retval) 258 struct lwp *cl; 259 void *v; 260 register_t *retval; 261 { 262 struct linux_sys_sched_setscheduler_args /* { 263 syscallarg(linux_pid_t) pid; 264 syscallarg(int) policy; 265 syscallarg(cont struct linux_sched_scheduler *) sp; 266 } */ *uap = v; 267 struct proc *cp = cl->l_proc; 268 int error; 269 struct linux_sched_param lp; 270 struct proc *p; 271 272 /* 273 * We only check for valid parameters and return afterwards. 274 */ 275 276 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) 277 return EINVAL; 278 279 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 280 if (error) 281 return error; 282 283 if (SCARG(uap, pid) != 0) { 284 kauth_cred_t pc = cp->p_cred; 285 286 if ((p = pfind(SCARG(uap, pid))) == NULL) 287 return ESRCH; 288 if (!(cp == p || 289 kauth_cred_geteuid(pc) == 0 || 290 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) || 291 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) || 292 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) || 293 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred))) 294 return EPERM; 295 } 296 297 /* 298 * We can't emulate anything put the default scheduling policy. 299 */ 300 if (SCARG(uap, policy) != LINUX_SCHED_OTHER || lp.sched_priority != 0) 301 return EINVAL; 302 303 return 0; 304 } 305 306 int 307 linux_sys_sched_getscheduler(cl, v, retval) 308 struct lwp *cl; 309 void *v; 310 register_t *retval; 311 { 312 struct linux_sys_sched_getscheduler_args /* { 313 syscallarg(linux_pid_t) pid; 314 } */ *uap = v; 315 struct proc *cp = cl->l_proc; 316 struct proc *p; 317 318 *retval = -1; 319 /* 320 * We only check for valid parameters and return afterwards. 321 */ 322 323 if (SCARG(uap, pid) != 0) { 324 kauth_cred_t pc = cp->p_cred; 325 326 if ((p = pfind(SCARG(uap, pid))) == NULL) 327 return ESRCH; 328 if (!(cp == p || 329 kauth_cred_geteuid(pc) == 0 || 330 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) || 331 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) || 332 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) || 333 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred))) 334 return EPERM; 335 } 336 337 /* 338 * We can't emulate anything put the default scheduling policy. 339 */ 340 *retval = LINUX_SCHED_OTHER; 341 return 0; 342 } 343 344 int 345 linux_sys_sched_yield(cl, v, retval) 346 struct lwp *cl; 347 void *v; 348 register_t *retval; 349 { 350 351 yield(); 352 return 0; 353 } 354 355 int 356 linux_sys_sched_get_priority_max(cl, v, retval) 357 struct lwp *cl; 358 void *v; 359 register_t *retval; 360 { 361 struct linux_sys_sched_get_priority_max_args /* { 362 syscallarg(int) policy; 363 } */ *uap = v; 364 365 /* 366 * We can't emulate anything put the default scheduling policy. 367 */ 368 if (SCARG(uap, policy) != LINUX_SCHED_OTHER) { 369 *retval = -1; 370 return EINVAL; 371 } 372 373 *retval = 0; 374 return 0; 375 } 376 377 int 378 linux_sys_sched_get_priority_min(cl, v, retval) 379 struct lwp *cl; 380 void *v; 381 register_t *retval; 382 { 383 struct linux_sys_sched_get_priority_min_args /* { 384 syscallarg(int) policy; 385 } */ *uap = v; 386 387 /* 388 * We can't emulate anything put the default scheduling policy. 389 */ 390 if (SCARG(uap, policy) != LINUX_SCHED_OTHER) { 391 *retval = -1; 392 return EINVAL; 393 } 394 395 *retval = 0; 396 return 0; 397 } 398 399 #ifndef __m68k__ 400 /* Present on everything but m68k */ 401 int 402 linux_sys_exit_group(l, v, retval) 403 struct lwp *l; 404 void *v; 405 register_t *retval; 406 { 407 struct linux_sys_exit_group_args /* { 408 syscallarg(int) error_code; 409 } */ *uap = v; 410 #ifdef LINUX_NPTL 411 struct proc *p = l->l_proc; 412 struct linux_emuldata *led = p->p_emuldata; 413 struct linux_emuldata *e; 414 struct lwp *sl; 415 struct proc *sp; 416 int s; 417 418 SCHED_LOCK(s); 419 /* 420 * The calling thread is supposed to kill all threads 421 * in the same thread group (i.e. all threads created 422 * via clone(2) with CLONE_THREAD flag set). 423 */ 424 LIST_FOREACH(e, &led->s->threads, threads) { 425 sp = e->proc; 426 427 if (sp == p) 428 continue; 429 #ifdef DEBUG_LINUX 430 printf("linux_sys_exit_group: kill PID %d\n", sp->p_pid); 431 #endif 432 /* wakeup any waiter */ 433 if (sp->p_sigctx.ps_sigwaited && 434 sigismember(sp->p_sigctx.ps_sigwait, SIGKILL) && 435 sp->p_stat != SSTOP) { 436 sched_wakeup(&sp->p_sigctx.ps_sigwait); 437 } 438 439 /* post SIGKILL */ 440 sigaddset(&sp->p_sigctx.ps_siglist, SIGKILL); 441 sp->p_sigctx.ps_sigcheck = 1; 442 443 /* Unblock the process if sleeping or stopped */ 444 switch(sp->p_stat) { 445 case SSTOP: 446 sl = proc_unstop(sp); 447 break; 448 case SACTIVE: 449 sl = proc_representative_lwp(sp); 450 break; 451 default: 452 sl = NULL; 453 break; 454 } 455 456 if (sl == NULL) { 457 printf("linux_sys_exit_group: no lwp for process %d\n", 458 sp->p_pid); 459 continue; 460 } 461 462 if (sl->l_priority > PUSER) 463 sl->l_priority = PUSER; 464 465 switch(sl->l_stat) { 466 case LSSUSPENDED: 467 lwp_continue(sl); 468 /* FALLTHROUGH */ 469 case LSSTOP: 470 case LSSLEEP: 471 case LSIDL: 472 setrunnable(sl); 473 /* FALLTHROUGH */ 474 default: 475 break; 476 } 477 } 478 SCHED_UNLOCK(s); 479 #endif /* LINUX_NPTL */ 480 481 exit1(l, W_EXITCODE(SCARG(uap, error_code), 0)); 482 /* NOTREACHED */ 483 return 0; 484 } 485 #endif /* !__m68k__ */ 486 487 #ifdef LINUX_NPTL 488 int 489 linux_sys_set_tid_address(l, v, retval) 490 struct lwp *l; 491 void *v; 492 register_t *retval; 493 { 494 struct linux_sys_set_tid_address_args /* { 495 syscallarg(int *) tidptr; 496 } */ *uap = v; 497 struct linux_emuldata *led; 498 499 led = (struct linux_emuldata *)l->l_proc->p_emuldata; 500 led->clear_tid = SCARG(uap, tid); 501 502 *retval = l->l_proc->p_pid; 503 504 return 0; 505 } 506 507 /* ARGUSED1 */ 508 int 509 linux_sys_gettid(l, v, retval) 510 struct lwp *l; 511 void *v; 512 register_t *retval; 513 { 514 /* The Linux kernel does it exactly that way */ 515 *retval = l->l_proc->p_pid; 516 return 0; 517 } 518 519 #ifdef LINUX_NPTL 520 /* ARGUSED1 */ 521 int 522 linux_sys_getpid(l, v, retval) 523 struct lwp *l; 524 void *v; 525 register_t *retval; 526 { 527 struct linux_emuldata *led; 528 529 led = l->l_proc->p_emuldata; 530 531 /* The Linux kernel does it exactly that way */ 532 *retval = led->s->group_pid; 533 534 return 0; 535 } 536 537 /* ARGUSED1 */ 538 int 539 linux_sys_getppid(l, v, retval) 540 struct lwp *l; 541 void *v; 542 register_t *retval; 543 { 544 struct proc *p = l->l_proc; 545 struct linux_emuldata *led = p->p_emuldata; 546 struct proc *glp; 547 struct proc *pp; 548 549 /* Find the thread group leader's parent */ 550 if ((glp = pfind(led->s->group_pid)) == NULL) { 551 /* Maybe panic... */ 552 printf("linux_sys_getppid: missing group leader PID %d\n", 553 led->s->group_pid); 554 return -1; 555 } 556 pp = glp->p_pptr; 557 558 /* If this is a Linux process too, return thread group PID */ 559 if (pp->p_emul == p->p_emul) { 560 struct linux_emuldata *pled; 561 562 pled = pp->p_emuldata; 563 *retval = pled->s->group_pid; 564 } else { 565 *retval = pp->p_pid; 566 } 567 568 return 0; 569 } 570 #endif /* LINUX_NPTL */ 571 572 int 573 linux_sys_sched_getaffinity(l, v, retval) 574 struct lwp *l; 575 void *v; 576 register_t *retval; 577 { 578 struct linux_sys_sched_getaffinity_args /* { 579 syscallarg(pid_t) pid; 580 syscallarg(unsigned int) len; 581 syscallarg(unsigned long *) mask; 582 } */ *uap = v; 583 int error; 584 int ret; 585 int ncpu; 586 int name[2]; 587 size_t sz; 588 char *data; 589 int *retp; 590 591 if (SCARG(uap, mask) == NULL) 592 return EINVAL; 593 594 if (SCARG(uap, len) < sizeof(int)) 595 return EINVAL; 596 597 if (pfind(SCARG(uap, pid)) == NULL) 598 return ESRCH; 599 600 /* 601 * return the actual number of CPU, tag all of them as available 602 * The result is a mask, the first CPU being in the least significant 603 * bit. 604 */ 605 name[0] = CTL_HW; 606 name[1] = HW_NCPU; 607 sz = sizeof(ncpu); 608 609 if ((error = old_sysctl(&name[0], 2, &ncpu, &sz, NULL, 0, NULL)) != 0) 610 return error; 611 612 ret = (1 << ncpu) - 1; 613 614 data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO); 615 retp = (int *)&data[SCARG(uap, len) - sizeof(ret)]; 616 *retp = ret; 617 618 if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0) 619 return error; 620 621 free(data, M_TEMP); 622 623 return 0; 624 625 } 626 627 int 628 linux_sys_sched_setaffinity(l, v, retval) 629 struct lwp *l; 630 void *v; 631 register_t *retval; 632 { 633 struct linux_sys_sched_setaffinity_args /* { 634 syscallarg(pid_t) pid; 635 syscallarg(unsigned int) len; 636 syscallarg(unsigned long *) mask; 637 } */ *uap = v; 638 639 if (pfind(SCARG(uap, pid)) == NULL) 640 return ESRCH; 641 642 /* Let's ignore it */ 643 #ifdef DEBUG_LINUX 644 printf("linux_sys_sched_setaffinity\n"); 645 #endif 646 return 0; 647 }; 648 #endif /* LINUX_NPTL */ 649