1 /* $NetBSD: sys_sched.c,v 1.40 2012/02/19 21:06:56 rmind Exp $ */ 2 3 /* 4 * Copyright (c) 2008, 2011 Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * System calls relating to the scheduler. 31 * 32 * Lock order: 33 * 34 * cpu_lock -> 35 * proc_lock -> 36 * proc_t::p_lock -> 37 * lwp_t::lwp_lock 38 * 39 * TODO: 40 * - Handle pthread_setschedprio() as defined by POSIX; 41 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX; 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.40 2012/02/19 21:06:56 rmind Exp $"); 46 47 #include <sys/param.h> 48 49 #include <sys/cpu.h> 50 #include <sys/kauth.h> 51 #include <sys/kmem.h> 52 #include <sys/lwp.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/pset.h> 56 #include <sys/sched.h> 57 #include <sys/syscallargs.h> 58 #include <sys/sysctl.h> 59 #include <sys/systm.h> 60 #include <sys/types.h> 61 #include <sys/unistd.h> 62 63 static struct sysctllog *sched_sysctl_log; 64 static kauth_listener_t sched_listener; 65 66 /* 67 * Convert user priority or the in-kernel priority or convert the current 68 * priority to the appropriate range according to the policy change. 69 */ 70 static pri_t 71 convert_pri(lwp_t *l, int policy, pri_t pri) 72 { 73 74 /* Convert user priority to the in-kernel */ 75 if (pri != PRI_NONE) { 76 /* Only for real-time threads */ 77 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX); 78 KASSERT(policy != SCHED_OTHER); 79 return PRI_USER_RT + pri; 80 } 81 82 /* Neither policy, nor priority change */ 83 if (l->l_class == policy) 84 return l->l_priority; 85 86 /* Time-sharing -> real-time */ 87 if (l->l_class == SCHED_OTHER) { 88 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR); 89 return PRI_USER_RT; 90 } 91 92 /* Real-time -> time-sharing */ 93 if (policy == SCHED_OTHER) { 94 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR); 95 return l->l_priority - PRI_USER_RT; 96 } 97 98 /* Real-time -> real-time */ 99 return l->l_priority; 100 } 101 102 int 103 do_sched_setparam(pid_t pid, lwpid_t lid, int policy, 104 const struct sched_param *params) 105 { 106 struct proc *p; 107 struct lwp *t; 108 pri_t pri; 109 u_int lcnt; 110 int error; 111 112 error = 0; 113 114 pri = params->sched_priority; 115 116 /* If no parameters specified, just return (this should not happen) */ 117 if (pri == PRI_NONE && policy == SCHED_NONE) 118 return 0; 119 120 /* Validate scheduling class */ 121 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR)) 122 return EINVAL; 123 124 /* Validate priority */ 125 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX)) 126 return EINVAL; 127 128 if (pid != 0) { 129 /* Find the process */ 130 mutex_enter(proc_lock); 131 p = proc_find(pid); 132 if (p == NULL) { 133 mutex_exit(proc_lock); 134 return ESRCH; 135 } 136 mutex_enter(p->p_lock); 137 mutex_exit(proc_lock); 138 /* Disallow modification of system processes */ 139 if ((p->p_flag & PK_SYSTEM) != 0) { 140 mutex_exit(p->p_lock); 141 return EPERM; 142 } 143 } else { 144 /* Use the calling process */ 145 p = curlwp->l_proc; 146 mutex_enter(p->p_lock); 147 } 148 149 /* Find the LWP(s) */ 150 lcnt = 0; 151 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 152 pri_t kpri; 153 int lpolicy; 154 155 if (lid && lid != t->l_lid) 156 continue; 157 158 lcnt++; 159 lwp_lock(t); 160 lpolicy = (policy == SCHED_NONE) ? t->l_class : policy; 161 162 /* Disallow setting of priority for SCHED_OTHER threads */ 163 if (lpolicy == SCHED_OTHER && pri != PRI_NONE) { 164 lwp_unlock(t); 165 error = EINVAL; 166 break; 167 } 168 169 /* Convert priority, if needed */ 170 kpri = convert_pri(t, lpolicy, pri); 171 172 /* Check the permission */ 173 error = kauth_authorize_process(kauth_cred_get(), 174 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy), 175 KAUTH_ARG(kpri)); 176 if (error) { 177 lwp_unlock(t); 178 break; 179 } 180 181 /* Set the scheduling class, change the priority */ 182 t->l_class = lpolicy; 183 lwp_changepri(t, kpri); 184 lwp_unlock(t); 185 } 186 mutex_exit(p->p_lock); 187 return (lcnt == 0) ? ESRCH : error; 188 } 189 190 /* 191 * Set scheduling parameters. 192 */ 193 int 194 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap, 195 register_t *retval) 196 { 197 /* { 198 syscallarg(pid_t) pid; 199 syscallarg(lwpid_t) lid; 200 syscallarg(int) policy; 201 syscallarg(const struct sched_param *) params; 202 } */ 203 struct sched_param params; 204 int error; 205 206 /* Get the parameters from the user-space */ 207 error = copyin(SCARG(uap, params), ¶ms, sizeof(params)); 208 if (error) 209 goto out; 210 211 error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid), 212 SCARG(uap, policy), ¶ms); 213 out: 214 return error; 215 } 216 217 int 218 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy, 219 struct sched_param *params) 220 { 221 struct sched_param lparams; 222 struct lwp *t; 223 int error, lpolicy; 224 225 /* Locks the LWP */ 226 t = lwp_find2(pid, lid); 227 if (t == NULL) 228 return ESRCH; 229 230 /* Check the permission */ 231 error = kauth_authorize_process(kauth_cred_get(), 232 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL); 233 if (error != 0) { 234 mutex_exit(t->l_proc->p_lock); 235 return error; 236 } 237 238 lwp_lock(t); 239 lparams.sched_priority = t->l_priority; 240 lpolicy = t->l_class; 241 242 switch (lpolicy) { 243 case SCHED_OTHER: 244 lparams.sched_priority -= PRI_USER; 245 break; 246 case SCHED_RR: 247 case SCHED_FIFO: 248 lparams.sched_priority -= PRI_USER_RT; 249 break; 250 } 251 252 if (policy != NULL) 253 *policy = lpolicy; 254 255 if (params != NULL) 256 *params = lparams; 257 258 lwp_unlock(t); 259 mutex_exit(t->l_proc->p_lock); 260 return error; 261 } 262 263 /* 264 * Get scheduling parameters. 265 */ 266 int 267 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap, 268 register_t *retval) 269 { 270 /* { 271 syscallarg(pid_t) pid; 272 syscallarg(lwpid_t) lid; 273 syscallarg(int *) policy; 274 syscallarg(struct sched_param *) params; 275 } */ 276 struct sched_param params; 277 int error, policy; 278 279 error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy, 280 ¶ms); 281 if (error) 282 goto out; 283 284 error = copyout(¶ms, SCARG(uap, params), sizeof(params)); 285 if (error == 0 && SCARG(uap, policy) != NULL) 286 error = copyout(&policy, SCARG(uap, policy), sizeof(int)); 287 out: 288 return error; 289 } 290 291 /* 292 * Allocate the CPU set, and get it from userspace. 293 */ 294 static int 295 genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size) 296 { 297 kcpuset_t *kset; 298 int error; 299 300 kcpuset_create(&kset, false); 301 error = kcpuset_copyin(sset, kset, size); 302 if (error) { 303 kcpuset_unuse(kset, NULL); 304 } else { 305 *dset = kset; 306 } 307 return error; 308 } 309 310 /* 311 * Set affinity. 312 */ 313 int 314 sys__sched_setaffinity(struct lwp *l, 315 const struct sys__sched_setaffinity_args *uap, register_t *retval) 316 { 317 /* { 318 syscallarg(pid_t) pid; 319 syscallarg(lwpid_t) lid; 320 syscallarg(size_t) size; 321 syscallarg(const cpuset_t *) cpuset; 322 } */ 323 kcpuset_t *kcset, *kcpulst = NULL; 324 struct cpu_info *ici, *ci; 325 struct proc *p; 326 struct lwp *t; 327 CPU_INFO_ITERATOR cii; 328 bool alloff; 329 lwpid_t lid; 330 u_int lcnt; 331 int error; 332 333 error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size)); 334 if (error) 335 return error; 336 337 /* 338 * Traverse _each_ CPU to: 339 * - Check that CPUs in the mask have no assigned processor set. 340 * - Check that at least one CPU from the mask is online. 341 * - Find the first target CPU to migrate. 342 * 343 * To avoid the race with CPU online/offline calls and processor sets, 344 * cpu_lock will be locked for the entire operation. 345 */ 346 ci = NULL; 347 alloff = false; 348 mutex_enter(&cpu_lock); 349 for (CPU_INFO_FOREACH(cii, ici)) { 350 struct schedstate_percpu *ispc; 351 352 if (!kcpuset_isset(kcset, cpu_index(ici))) { 353 continue; 354 } 355 356 ispc = &ici->ci_schedstate; 357 /* Check that CPU is not in the processor-set */ 358 if (ispc->spc_psid != PS_NONE) { 359 error = EPERM; 360 goto out; 361 } 362 /* Skip offline CPUs */ 363 if (ispc->spc_flags & SPCF_OFFLINE) { 364 alloff = true; 365 continue; 366 } 367 /* Target CPU to migrate */ 368 if (ci == NULL) { 369 ci = ici; 370 } 371 } 372 if (ci == NULL) { 373 if (alloff) { 374 /* All CPUs in the set are offline */ 375 error = EPERM; 376 goto out; 377 } 378 /* Empty set */ 379 kcpuset_unuse(kcset, &kcpulst); 380 kcset = NULL; 381 } 382 383 if (SCARG(uap, pid) != 0) { 384 /* Find the process */ 385 mutex_enter(proc_lock); 386 p = proc_find(SCARG(uap, pid)); 387 if (p == NULL) { 388 mutex_exit(proc_lock); 389 error = ESRCH; 390 goto out; 391 } 392 mutex_enter(p->p_lock); 393 mutex_exit(proc_lock); 394 /* Disallow modification of system processes. */ 395 if ((p->p_flag & PK_SYSTEM) != 0) { 396 mutex_exit(p->p_lock); 397 error = EPERM; 398 goto out; 399 } 400 } else { 401 /* Use the calling process */ 402 p = l->l_proc; 403 mutex_enter(p->p_lock); 404 } 405 406 /* 407 * Check the permission. 408 */ 409 error = kauth_authorize_process(l->l_cred, 410 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL); 411 if (error != 0) { 412 mutex_exit(p->p_lock); 413 goto out; 414 } 415 416 /* Iterate through LWP(s). */ 417 lcnt = 0; 418 lid = SCARG(uap, lid); 419 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 420 if (lid && lid != t->l_lid) { 421 continue; 422 } 423 lwp_lock(t); 424 /* No affinity for zombie LWPs. */ 425 if (t->l_stat == LSZOMB) { 426 lwp_unlock(t); 427 continue; 428 } 429 /* First, release existing affinity, if any. */ 430 if (t->l_affinity) { 431 kcpuset_unuse(t->l_affinity, &kcpulst); 432 } 433 if (kcset) { 434 /* 435 * Hold a reference on affinity mask, assign mask to 436 * LWP and migrate it to another CPU (unlocks LWP). 437 */ 438 kcpuset_use(kcset); 439 t->l_affinity = kcset; 440 lwp_migrate(t, ci); 441 } else { 442 /* Old affinity mask is released, just clear. */ 443 t->l_affinity = NULL; 444 lwp_unlock(t); 445 } 446 lcnt++; 447 } 448 mutex_exit(p->p_lock); 449 if (lcnt == 0) { 450 error = ESRCH; 451 } 452 out: 453 mutex_exit(&cpu_lock); 454 455 /* 456 * Drop the initial reference (LWPs, if any, have the ownership now), 457 * and destroy whatever is in the G/C list, if filled. 458 */ 459 if (kcset) { 460 kcpuset_unuse(kcset, &kcpulst); 461 } 462 if (kcpulst) { 463 kcpuset_destroy(kcpulst); 464 } 465 return error; 466 } 467 468 /* 469 * Get affinity. 470 */ 471 int 472 sys__sched_getaffinity(struct lwp *l, 473 const struct sys__sched_getaffinity_args *uap, register_t *retval) 474 { 475 /* { 476 syscallarg(pid_t) pid; 477 syscallarg(lwpid_t) lid; 478 syscallarg(size_t) size; 479 syscallarg(cpuset_t *) cpuset; 480 } */ 481 struct lwp *t; 482 kcpuset_t *kcset; 483 int error; 484 485 error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size)); 486 if (error) 487 return error; 488 489 /* Locks the LWP */ 490 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid)); 491 if (t == NULL) { 492 error = ESRCH; 493 goto out; 494 } 495 /* Check the permission */ 496 if (kauth_authorize_process(l->l_cred, 497 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) { 498 mutex_exit(t->l_proc->p_lock); 499 error = EPERM; 500 goto out; 501 } 502 lwp_lock(t); 503 if (t->l_affinity) { 504 kcpuset_copy(kcset, t->l_affinity); 505 } else { 506 kcpuset_zero(kcset); 507 } 508 lwp_unlock(t); 509 mutex_exit(t->l_proc->p_lock); 510 511 error = kcpuset_copyout(kcset, SCARG(uap, cpuset), SCARG(uap, size)); 512 out: 513 kcpuset_unuse(kcset, NULL); 514 return error; 515 } 516 517 /* 518 * Yield. 519 */ 520 int 521 sys_sched_yield(struct lwp *l, const void *v, register_t *retval) 522 { 523 524 yield(); 525 return 0; 526 } 527 528 /* 529 * Sysctl nodes and initialization. 530 */ 531 static void 532 sysctl_sched_setup(struct sysctllog **clog) 533 { 534 const struct sysctlnode *node = NULL; 535 536 sysctl_createv(clog, 0, NULL, NULL, 537 CTLFLAG_PERMANENT, 538 CTLTYPE_NODE, "kern", NULL, 539 NULL, 0, NULL, 0, 540 CTL_KERN, CTL_EOL); 541 sysctl_createv(clog, 0, NULL, NULL, 542 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 543 CTLTYPE_INT, "posix_sched", 544 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 545 "Process Scheduling option to which the " 546 "system attempts to conform"), 547 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0, 548 CTL_KERN, CTL_CREATE, CTL_EOL); 549 sysctl_createv(clog, 0, NULL, &node, 550 CTLFLAG_PERMANENT, 551 CTLTYPE_NODE, "sched", 552 SYSCTL_DESCR("Scheduler options"), 553 NULL, 0, NULL, 0, 554 CTL_KERN, CTL_CREATE, CTL_EOL); 555 556 if (node == NULL) 557 return; 558 559 sysctl_createv(clog, 0, &node, NULL, 560 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 561 CTLTYPE_INT, "pri_min", 562 SYSCTL_DESCR("Minimal POSIX real-time priority"), 563 NULL, SCHED_PRI_MIN, NULL, 0, 564 CTL_CREATE, CTL_EOL); 565 sysctl_createv(clog, 0, &node, NULL, 566 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 567 CTLTYPE_INT, "pri_max", 568 SYSCTL_DESCR("Maximal POSIX real-time priority"), 569 NULL, SCHED_PRI_MAX, NULL, 0, 570 CTL_CREATE, CTL_EOL); 571 } 572 573 static int 574 sched_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 575 void *arg0, void *arg1, void *arg2, void *arg3) 576 { 577 struct proc *p; 578 int result; 579 580 result = KAUTH_RESULT_DEFER; 581 p = arg0; 582 583 switch (action) { 584 case KAUTH_PROCESS_SCHEDULER_GETPARAM: 585 if (kauth_cred_uidmatch(cred, p->p_cred)) 586 result = KAUTH_RESULT_ALLOW; 587 break; 588 589 case KAUTH_PROCESS_SCHEDULER_SETPARAM: 590 if (kauth_cred_uidmatch(cred, p->p_cred)) { 591 struct lwp *l; 592 int policy; 593 pri_t priority; 594 595 l = arg1; 596 policy = (int)(unsigned long)arg2; 597 priority = (pri_t)(unsigned long)arg3; 598 599 if ((policy == l->l_class || 600 (policy != SCHED_FIFO && policy != SCHED_RR)) && 601 priority <= l->l_priority) 602 result = KAUTH_RESULT_ALLOW; 603 } 604 605 break; 606 607 case KAUTH_PROCESS_SCHEDULER_GETAFFINITY: 608 result = KAUTH_RESULT_ALLOW; 609 break; 610 611 case KAUTH_PROCESS_SCHEDULER_SETAFFINITY: 612 /* Privileged; we let the secmodel handle this. */ 613 break; 614 615 default: 616 break; 617 } 618 619 return result; 620 } 621 622 void 623 sched_init(void) 624 { 625 626 sysctl_sched_setup(&sched_sysctl_log); 627 628 sched_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS, 629 sched_listener_cb, NULL); 630 } 631