1 /* $NetBSD: linux_sched.c,v 1.36 2006/10/12 01:30:48 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center; by Matthias Scheler. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Linux compatibility module. Try to deal with scheduler related syscalls. 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.36 2006/10/12 01:30:48 christos Exp $"); 46 47 #include <sys/param.h> 48 #include <sys/mount.h> 49 #include <sys/proc.h> 50 #include <sys/systm.h> 51 #include <sys/sysctl.h> 52 #include <sys/malloc.h> 53 #include <sys/sa.h> 54 #include <sys/syscallargs.h> 55 #include <sys/wait.h> 56 #include <sys/kauth.h> 57 #include <sys/ptrace.h> 58 59 #include <machine/cpu.h> 60 61 #include <compat/linux/common/linux_types.h> 62 #include <compat/linux/common/linux_signal.h> 63 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */ 64 #include <compat/linux/common/linux_emuldata.h> 65 66 #include <compat/linux/linux_syscallargs.h> 67 68 #include <compat/linux/common/linux_sched.h> 69 70 int 71 linux_sys_clone(l, v, retval) 72 struct lwp *l; 73 void *v; 74 register_t *retval; 75 { 76 struct linux_sys_clone_args /* { 77 syscallarg(int) flags; 78 syscallarg(void *) stack; 79 #ifdef LINUX_NPTL 80 syscallarg(void *) parent_tidptr; 81 syscallarg(void *) child_tidptr; 82 #endif 83 } */ *uap = v; 84 int flags, sig; 85 int error; 86 #ifdef LINUX_NPTL 87 struct linux_emuldata *led; 88 #endif 89 90 /* 91 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags. 92 */ 93 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE)) 94 return (EINVAL); 95 96 /* 97 * Thread group implies shared signals. Shared signals 98 * imply shared VM. This matches what Linux kernel does. 99 */ 100 if (SCARG(uap, flags) & LINUX_CLONE_THREAD 101 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0) 102 return (EINVAL); 103 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND 104 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0) 105 return (EINVAL); 106 107 flags = 0; 108 109 if (SCARG(uap, flags) & LINUX_CLONE_VM) 110 flags |= FORK_SHAREVM; 111 if (SCARG(uap, flags) & LINUX_CLONE_FS) 112 flags |= FORK_SHARECWD; 113 if (SCARG(uap, flags) & LINUX_CLONE_FILES) 114 flags |= FORK_SHAREFILES; 115 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) 116 flags |= FORK_SHARESIGS; 117 if (SCARG(uap, flags) & LINUX_CLONE_VFORK) 118 flags |= FORK_PPWAIT; 119 120 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL; 121 if (sig < 0 || sig >= LINUX__NSIG) 122 return (EINVAL); 123 sig = linux_to_native_signo[sig]; 124 125 #ifdef LINUX_NPTL 126 led = (struct linux_emuldata *)l->l_proc->p_emuldata; 127 128 led->parent_tidptr = SCARG(uap, parent_tidptr); 129 led->child_tidptr = SCARG(uap, child_tidptr); 130 led->clone_flags = SCARG(uap, flags); 131 #endif /* LINUX_NPTL */ 132 133 /* 134 * Note that Linux does not provide a portable way of specifying 135 * the stack area; the caller must know if the stack grows up 136 * or down. So, we pass a stack size of 0, so that the code 137 * that makes this adjustment is a noop. 138 */ 139 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0, 140 NULL, NULL, retval, NULL)) != 0) 141 return error; 142 143 return 0; 144 } 145 146 int 147 linux_sys_sched_setparam(struct lwp *cl, void *v, register_t *retval __unused) 148 { 149 struct linux_sys_sched_setparam_args /* { 150 syscallarg(linux_pid_t) pid; 151 syscallarg(const struct linux_sched_param *) sp; 152 } */ *uap = v; 153 int error; 154 struct linux_sched_param lp; 155 struct proc *p; 156 157 /* 158 * We only check for valid parameters and return afterwards. 159 */ 160 161 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) 162 return EINVAL; 163 164 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 165 if (error) 166 return error; 167 168 if (SCARG(uap, pid) != 0) { 169 kauth_cred_t pc = cl->l_cred; 170 171 if ((p = pfind(SCARG(uap, pid))) == NULL) 172 return ESRCH; 173 if (!(cl->l_proc == p || 174 kauth_cred_geteuid(pc) == 0 || 175 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) || 176 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) || 177 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) || 178 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred))) 179 return EPERM; 180 } 181 182 return 0; 183 } 184 185 int 186 linux_sys_sched_getparam(struct lwp *cl, void *v, register_t *retval __unused) 187 { 188 struct linux_sys_sched_getparam_args /* { 189 syscallarg(linux_pid_t) pid; 190 syscallarg(struct linux_sched_param *) sp; 191 } */ *uap = v; 192 struct proc *p; 193 struct linux_sched_param lp; 194 195 /* 196 * We only check for valid parameters and return a dummy priority afterwards. 197 */ 198 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) 199 return EINVAL; 200 201 if (SCARG(uap, pid) != 0) { 202 kauth_cred_t pc = cl->l_cred; 203 204 if ((p = pfind(SCARG(uap, pid))) == NULL) 205 return ESRCH; 206 if (!(cl->l_proc == p || 207 kauth_cred_geteuid(pc) == 0 || 208 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) || 209 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) || 210 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) || 211 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred))) 212 return EPERM; 213 } 214 215 lp.sched_priority = 0; 216 return copyout(&lp, SCARG(uap, sp), sizeof(lp)); 217 } 218 219 int 220 linux_sys_sched_setscheduler(struct lwp *cl, void *v, 221 register_t *retval __unused) 222 { 223 struct linux_sys_sched_setscheduler_args /* { 224 syscallarg(linux_pid_t) pid; 225 syscallarg(int) policy; 226 syscallarg(cont struct linux_sched_scheduler *) sp; 227 } */ *uap = v; 228 int error; 229 struct linux_sched_param lp; 230 struct proc *p; 231 232 /* 233 * We only check for valid parameters and return afterwards. 234 */ 235 236 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) 237 return EINVAL; 238 239 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 240 if (error) 241 return error; 242 243 if (SCARG(uap, pid) != 0) { 244 kauth_cred_t pc = cl->l_cred; 245 246 if ((p = pfind(SCARG(uap, pid))) == NULL) 247 return ESRCH; 248 if (!(cl->l_proc == p || 249 kauth_cred_geteuid(pc) == 0 || 250 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) || 251 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) || 252 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) || 253 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred))) 254 return EPERM; 255 } 256 257 return 0; 258 /* 259 * We can't emulate anything put the default scheduling policy. 260 */ 261 if (SCARG(uap, policy) != LINUX_SCHED_OTHER || lp.sched_priority != 0) 262 return EINVAL; 263 264 return 0; 265 } 266 267 int 268 linux_sys_sched_getscheduler(cl, v, retval) 269 struct lwp *cl; 270 void *v; 271 register_t *retval; 272 { 273 struct linux_sys_sched_getscheduler_args /* { 274 syscallarg(linux_pid_t) pid; 275 } */ *uap = v; 276 struct proc *p; 277 278 *retval = -1; 279 /* 280 * We only check for valid parameters and return afterwards. 281 */ 282 283 if (SCARG(uap, pid) != 0) { 284 kauth_cred_t pc = cl->l_cred; 285 286 if ((p = pfind(SCARG(uap, pid))) == NULL) 287 return ESRCH; 288 if (!(cl->l_proc == p || 289 kauth_cred_geteuid(pc) == 0 || 290 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) || 291 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) || 292 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) || 293 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred))) 294 return EPERM; 295 } 296 297 /* 298 * We can't emulate anything put the default scheduling policy. 299 */ 300 *retval = LINUX_SCHED_OTHER; 301 return 0; 302 } 303 304 int 305 linux_sys_sched_yield(struct lwp *cl __unused, void *v __unused, 306 register_t *retval __unused) 307 { 308 309 yield(); 310 return 0; 311 } 312 313 int 314 linux_sys_sched_get_priority_max(struct lwp *cl __unused, void *v, 315 register_t *retval) 316 { 317 struct linux_sys_sched_get_priority_max_args /* { 318 syscallarg(int) policy; 319 } */ *uap = v; 320 321 /* 322 * We can't emulate anything put the default scheduling policy. 323 */ 324 if (SCARG(uap, policy) != LINUX_SCHED_OTHER) { 325 *retval = -1; 326 return EINVAL; 327 } 328 329 *retval = 0; 330 return 0; 331 } 332 333 int 334 linux_sys_sched_get_priority_min(struct lwp *cl __unused, void *v, 335 register_t *retval) 336 { 337 struct linux_sys_sched_get_priority_min_args /* { 338 syscallarg(int) policy; 339 } */ *uap = v; 340 341 /* 342 * We can't emulate anything put the default scheduling policy. 343 */ 344 if (SCARG(uap, policy) != LINUX_SCHED_OTHER) { 345 *retval = -1; 346 return EINVAL; 347 } 348 349 *retval = 0; 350 return 0; 351 } 352 353 #ifndef __m68k__ 354 /* Present on everything but m68k */ 355 int 356 linux_sys_exit_group(l, v, retval) 357 struct lwp *l; 358 void *v; 359 register_t *retval; 360 { 361 #ifdef LINUX_NPTL 362 struct linux_sys_exit_group_args /* { 363 syscallarg(int) error_code; 364 } */ *uap = v; 365 struct proc *p = l->l_proc; 366 struct linux_emuldata *led = p->p_emuldata; 367 struct linux_emuldata *e; 368 369 #ifdef DEBUG_LINUX 370 printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__, led->s->refs); 371 #endif 372 /* 373 * The calling thread is supposed to kill all threads 374 * in the same thread group (i.e. all threads created 375 * via clone(2) with CLONE_THREAD flag set). 376 * 377 * If there is only one thread, things are quite simple 378 */ 379 if (led->s->refs == 1) 380 return sys_exit(l, v, retval); 381 382 #ifdef DEBUG_LINUX 383 printf("%s:%d\n", __func__, __LINE__); 384 #endif 385 386 led->s->flags |= LINUX_LES_INEXITGROUP; 387 led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0); 388 389 /* 390 * Kill all threads in the group. The emulation exit hook takes 391 * care of hiding the zombies and reporting the exit code properly 392 */ 393 LIST_FOREACH(e, &led->s->threads, threads) { 394 if (e->proc == p) 395 continue; 396 397 #ifdef DEBUG_LINUX 398 printf("%s: kill PID %d\n", __func__, e->proc->p_pid); 399 #endif 400 psignal(e->proc, SIGKILL); 401 } 402 403 /* Now, kill ourselves */ 404 psignal(p, SIGKILL); 405 return 0; 406 #else /* LINUX_NPTL */ 407 return sys_exit(l, v, retval); 408 #endif /* LINUX_NPTL */ 409 } 410 #endif /* !__m68k__ */ 411 412 #ifdef LINUX_NPTL 413 int 414 linux_sys_set_tid_address(l, v, retval) 415 struct lwp *l; 416 void *v; 417 register_t *retval; 418 { 419 struct linux_sys_set_tid_address_args /* { 420 syscallarg(int *) tidptr; 421 } */ *uap = v; 422 struct linux_emuldata *led; 423 424 led = (struct linux_emuldata *)l->l_proc->p_emuldata; 425 led->clear_tid = SCARG(uap, tid); 426 427 *retval = l->l_proc->p_pid; 428 429 return 0; 430 } 431 432 /* ARGUSED1 */ 433 int 434 linux_sys_gettid(l, v, retval) 435 struct lwp *l; 436 void *v; 437 register_t *retval; 438 { 439 /* The Linux kernel does it exactly that way */ 440 *retval = l->l_proc->p_pid; 441 return 0; 442 } 443 444 #ifdef LINUX_NPTL 445 /* ARGUSED1 */ 446 int 447 linux_sys_getpid(l, v, retval) 448 struct lwp *l; 449 void *v; 450 register_t *retval; 451 { 452 struct linux_emuldata *led; 453 454 led = l->l_proc->p_emuldata; 455 456 /* The Linux kernel does it exactly that way */ 457 *retval = led->s->group_pid; 458 459 return 0; 460 } 461 462 /* ARGUSED1 */ 463 int 464 linux_sys_getppid(l, v, retval) 465 struct lwp *l; 466 void *v; 467 register_t *retval; 468 { 469 struct proc *p = l->l_proc; 470 struct linux_emuldata *led = p->p_emuldata; 471 struct proc *glp; 472 struct proc *pp; 473 474 /* Find the thread group leader's parent */ 475 if ((glp = pfind(led->s->group_pid)) == NULL) { 476 /* Maybe panic... */ 477 printf("linux_sys_getppid: missing group leader PID %d\n", 478 led->s->group_pid); 479 return -1; 480 } 481 pp = glp->p_pptr; 482 483 /* If this is a Linux process too, return thread group PID */ 484 if (pp->p_emul == p->p_emul) { 485 struct linux_emuldata *pled; 486 487 pled = pp->p_emuldata; 488 *retval = pled->s->group_pid; 489 } else { 490 *retval = pp->p_pid; 491 } 492 493 return 0; 494 } 495 #endif /* LINUX_NPTL */ 496 497 int 498 linux_sys_sched_getaffinity(l, v, retval) 499 struct lwp *l; 500 void *v; 501 register_t *retval; 502 { 503 struct linux_sys_sched_getaffinity_args /* { 504 syscallarg(pid_t) pid; 505 syscallarg(unsigned int) len; 506 syscallarg(unsigned long *) mask; 507 } */ *uap = v; 508 int error; 509 int ret; 510 int ncpu; 511 int name[2]; 512 size_t sz; 513 char *data; 514 int *retp; 515 516 if (SCARG(uap, mask) == NULL) 517 return EINVAL; 518 519 if (SCARG(uap, len) < sizeof(int)) 520 return EINVAL; 521 522 if (pfind(SCARG(uap, pid)) == NULL) 523 return ESRCH; 524 525 /* 526 * return the actual number of CPU, tag all of them as available 527 * The result is a mask, the first CPU being in the least significant 528 * bit. 529 */ 530 name[0] = CTL_HW; 531 name[1] = HW_NCPU; 532 sz = sizeof(ncpu); 533 534 if ((error = old_sysctl(&name[0], 2, &ncpu, &sz, NULL, 0, NULL)) != 0) 535 return error; 536 537 ret = (1 << ncpu) - 1; 538 539 data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO); 540 retp = (int *)&data[SCARG(uap, len) - sizeof(ret)]; 541 *retp = ret; 542 543 if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0) 544 return error; 545 546 free(data, M_TEMP); 547 548 return 0; 549 550 } 551 552 int 553 linux_sys_sched_setaffinity(l, v, retval) 554 struct lwp *l; 555 void *v; 556 register_t *retval; 557 { 558 struct linux_sys_sched_setaffinity_args /* { 559 syscallarg(pid_t) pid; 560 syscallarg(unsigned int) len; 561 syscallarg(unsigned long *) mask; 562 } */ *uap = v; 563 564 if (pfind(SCARG(uap, pid)) == NULL) 565 return ESRCH; 566 567 /* Let's ignore it */ 568 #ifdef DEBUG_LINUX 569 printf("linux_sys_sched_setaffinity\n"); 570 #endif 571 return 0; 572 }; 573 #endif /* LINUX_NPTL */ 574