1 /* $NetBSD: sys_sched.c,v 1.39 2012/01/29 22:55:40 rmind Exp $ */ 2 3 /* 4 * Copyright (c) 2008, 2011 Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * System calls relating to the scheduler. 31 * 32 * Lock order: 33 * 34 * cpu_lock -> 35 * proc_lock -> 36 * proc_t::p_lock -> 37 * lwp_t::lwp_lock 38 * 39 * TODO: 40 * - Handle pthread_setschedprio() as defined by POSIX; 41 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX; 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.39 2012/01/29 22:55:40 rmind Exp $"); 46 47 #include <sys/param.h> 48 49 #include <sys/cpu.h> 50 #include <sys/kauth.h> 51 #include <sys/kmem.h> 52 #include <sys/lwp.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/pset.h> 56 #include <sys/sa.h> 57 #include <sys/savar.h> 58 #include <sys/sched.h> 59 #include <sys/syscallargs.h> 60 #include <sys/sysctl.h> 61 #include <sys/systm.h> 62 #include <sys/types.h> 63 #include <sys/unistd.h> 64 65 #include "opt_sa.h" 66 67 static struct sysctllog *sched_sysctl_log; 68 static kauth_listener_t sched_listener; 69 70 /* 71 * Convert user priority or the in-kernel priority or convert the current 72 * priority to the appropriate range according to the policy change. 73 */ 74 static pri_t 75 convert_pri(lwp_t *l, int policy, pri_t pri) 76 { 77 78 /* Convert user priority to the in-kernel */ 79 if (pri != PRI_NONE) { 80 /* Only for real-time threads */ 81 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX); 82 KASSERT(policy != SCHED_OTHER); 83 return PRI_USER_RT + pri; 84 } 85 86 /* Neither policy, nor priority change */ 87 if (l->l_class == policy) 88 return l->l_priority; 89 90 /* Time-sharing -> real-time */ 91 if (l->l_class == SCHED_OTHER) { 92 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR); 93 return PRI_USER_RT; 94 } 95 96 /* Real-time -> time-sharing */ 97 if (policy == SCHED_OTHER) { 98 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR); 99 return l->l_priority - PRI_USER_RT; 100 } 101 102 /* Real-time -> real-time */ 103 return l->l_priority; 104 } 105 106 int 107 do_sched_setparam(pid_t pid, lwpid_t lid, int policy, 108 const struct sched_param *params) 109 { 110 struct proc *p; 111 struct lwp *t; 112 pri_t pri; 113 u_int lcnt; 114 int error; 115 116 error = 0; 117 118 pri = params->sched_priority; 119 120 /* If no parameters specified, just return (this should not happen) */ 121 if (pri == PRI_NONE && policy == SCHED_NONE) 122 return 0; 123 124 /* Validate scheduling class */ 125 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR)) 126 return EINVAL; 127 128 /* Validate priority */ 129 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX)) 130 return EINVAL; 131 132 if (pid != 0) { 133 /* Find the process */ 134 mutex_enter(proc_lock); 135 p = proc_find(pid); 136 if (p == NULL) { 137 mutex_exit(proc_lock); 138 return ESRCH; 139 } 140 mutex_enter(p->p_lock); 141 mutex_exit(proc_lock); 142 /* Disallow modification of system processes */ 143 if ((p->p_flag & PK_SYSTEM) != 0) { 144 mutex_exit(p->p_lock); 145 return EPERM; 146 } 147 } else { 148 /* Use the calling process */ 149 p = curlwp->l_proc; 150 mutex_enter(p->p_lock); 151 } 152 153 /* Find the LWP(s) */ 154 lcnt = 0; 155 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 156 pri_t kpri; 157 int lpolicy; 158 159 if (lid && lid != t->l_lid) 160 continue; 161 162 lcnt++; 163 lwp_lock(t); 164 lpolicy = (policy == SCHED_NONE) ? t->l_class : policy; 165 166 /* Disallow setting of priority for SCHED_OTHER threads */ 167 if (lpolicy == SCHED_OTHER && pri != PRI_NONE) { 168 lwp_unlock(t); 169 error = EINVAL; 170 break; 171 } 172 173 /* Convert priority, if needed */ 174 kpri = convert_pri(t, lpolicy, pri); 175 176 /* Check the permission */ 177 error = kauth_authorize_process(kauth_cred_get(), 178 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy), 179 KAUTH_ARG(kpri)); 180 if (error) { 181 lwp_unlock(t); 182 break; 183 } 184 185 /* Set the scheduling class, change the priority */ 186 t->l_class = lpolicy; 187 lwp_changepri(t, kpri); 188 lwp_unlock(t); 189 } 190 mutex_exit(p->p_lock); 191 return (lcnt == 0) ? ESRCH : error; 192 } 193 194 /* 195 * Set scheduling parameters. 196 */ 197 int 198 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap, 199 register_t *retval) 200 { 201 /* { 202 syscallarg(pid_t) pid; 203 syscallarg(lwpid_t) lid; 204 syscallarg(int) policy; 205 syscallarg(const struct sched_param *) params; 206 } */ 207 struct sched_param params; 208 int error; 209 210 /* Get the parameters from the user-space */ 211 error = copyin(SCARG(uap, params), ¶ms, sizeof(params)); 212 if (error) 213 goto out; 214 215 error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid), 216 SCARG(uap, policy), ¶ms); 217 out: 218 return error; 219 } 220 221 int 222 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy, 223 struct sched_param *params) 224 { 225 struct sched_param lparams; 226 struct lwp *t; 227 int error, lpolicy; 228 229 /* Locks the LWP */ 230 t = lwp_find2(pid, lid); 231 if (t == NULL) 232 return ESRCH; 233 234 /* Check the permission */ 235 error = kauth_authorize_process(kauth_cred_get(), 236 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL); 237 if (error != 0) { 238 mutex_exit(t->l_proc->p_lock); 239 return error; 240 } 241 242 lwp_lock(t); 243 lparams.sched_priority = t->l_priority; 244 lpolicy = t->l_class; 245 246 switch (lpolicy) { 247 case SCHED_OTHER: 248 lparams.sched_priority -= PRI_USER; 249 break; 250 case SCHED_RR: 251 case SCHED_FIFO: 252 lparams.sched_priority -= PRI_USER_RT; 253 break; 254 } 255 256 if (policy != NULL) 257 *policy = lpolicy; 258 259 if (params != NULL) 260 *params = lparams; 261 262 lwp_unlock(t); 263 mutex_exit(t->l_proc->p_lock); 264 return error; 265 } 266 267 /* 268 * Get scheduling parameters. 269 */ 270 int 271 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap, 272 register_t *retval) 273 { 274 /* { 275 syscallarg(pid_t) pid; 276 syscallarg(lwpid_t) lid; 277 syscallarg(int *) policy; 278 syscallarg(struct sched_param *) params; 279 } */ 280 struct sched_param params; 281 int error, policy; 282 283 error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy, 284 ¶ms); 285 if (error) 286 goto out; 287 288 error = copyout(¶ms, SCARG(uap, params), sizeof(params)); 289 if (error == 0 && SCARG(uap, policy) != NULL) 290 error = copyout(&policy, SCARG(uap, policy), sizeof(int)); 291 out: 292 return error; 293 } 294 295 /* 296 * Allocate the CPU set, and get it from userspace. 297 */ 298 static int 299 genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size) 300 { 301 kcpuset_t *kset; 302 int error; 303 304 kcpuset_create(&kset, false); 305 error = kcpuset_copyin(sset, kset, size); 306 if (error) { 307 kcpuset_unuse(kset, NULL); 308 } else { 309 *dset = kset; 310 } 311 return error; 312 } 313 314 /* 315 * Set affinity. 316 */ 317 int 318 sys__sched_setaffinity(struct lwp *l, 319 const struct sys__sched_setaffinity_args *uap, register_t *retval) 320 { 321 /* { 322 syscallarg(pid_t) pid; 323 syscallarg(lwpid_t) lid; 324 syscallarg(size_t) size; 325 syscallarg(const cpuset_t *) cpuset; 326 } */ 327 kcpuset_t *kcset, *kcpulst = NULL; 328 struct cpu_info *ici, *ci; 329 struct proc *p; 330 struct lwp *t; 331 CPU_INFO_ITERATOR cii; 332 bool alloff; 333 lwpid_t lid; 334 u_int lcnt; 335 int error; 336 337 error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size)); 338 if (error) 339 return error; 340 341 /* 342 * Traverse _each_ CPU to: 343 * - Check that CPUs in the mask have no assigned processor set. 344 * - Check that at least one CPU from the mask is online. 345 * - Find the first target CPU to migrate. 346 * 347 * To avoid the race with CPU online/offline calls and processor sets, 348 * cpu_lock will be locked for the entire operation. 349 */ 350 ci = NULL; 351 alloff = false; 352 mutex_enter(&cpu_lock); 353 for (CPU_INFO_FOREACH(cii, ici)) { 354 struct schedstate_percpu *ispc; 355 356 if (!kcpuset_isset(kcset, cpu_index(ici))) { 357 continue; 358 } 359 360 ispc = &ici->ci_schedstate; 361 /* Check that CPU is not in the processor-set */ 362 if (ispc->spc_psid != PS_NONE) { 363 error = EPERM; 364 goto out; 365 } 366 /* Skip offline CPUs */ 367 if (ispc->spc_flags & SPCF_OFFLINE) { 368 alloff = true; 369 continue; 370 } 371 /* Target CPU to migrate */ 372 if (ci == NULL) { 373 ci = ici; 374 } 375 } 376 if (ci == NULL) { 377 if (alloff) { 378 /* All CPUs in the set are offline */ 379 error = EPERM; 380 goto out; 381 } 382 /* Empty set */ 383 kcpuset_unuse(kcset, &kcpulst); 384 kcset = NULL; 385 } 386 387 if (SCARG(uap, pid) != 0) { 388 /* Find the process */ 389 mutex_enter(proc_lock); 390 p = proc_find(SCARG(uap, pid)); 391 if (p == NULL) { 392 mutex_exit(proc_lock); 393 error = ESRCH; 394 goto out; 395 } 396 mutex_enter(p->p_lock); 397 mutex_exit(proc_lock); 398 /* Disallow modification of system processes. */ 399 if ((p->p_flag & PK_SYSTEM) != 0) { 400 mutex_exit(p->p_lock); 401 error = EPERM; 402 goto out; 403 } 404 } else { 405 /* Use the calling process */ 406 p = l->l_proc; 407 mutex_enter(p->p_lock); 408 } 409 410 /* 411 * Check the permission. 412 */ 413 error = kauth_authorize_process(l->l_cred, 414 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL); 415 if (error != 0) { 416 mutex_exit(p->p_lock); 417 goto out; 418 } 419 420 #ifdef KERN_SA 421 /* Changing the affinity of a SA process is not supported */ 422 if ((p->p_sflag & (PS_SA | PS_WEXIT)) != 0 || p->p_sa != NULL) { 423 mutex_exit(p->p_lock); 424 error = EINVAL; 425 goto out; 426 } 427 #endif 428 429 /* Iterate through LWP(s). */ 430 lcnt = 0; 431 lid = SCARG(uap, lid); 432 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 433 if (lid && lid != t->l_lid) { 434 continue; 435 } 436 lwp_lock(t); 437 /* No affinity for zombie LWPs. */ 438 if (t->l_stat == LSZOMB) { 439 lwp_unlock(t); 440 continue; 441 } 442 /* First, release existing affinity, if any. */ 443 if (t->l_affinity) { 444 kcpuset_unuse(t->l_affinity, &kcpulst); 445 } 446 if (kcset) { 447 /* 448 * Hold a reference on affinity mask, assign mask to 449 * LWP and migrate it to another CPU (unlocks LWP). 450 */ 451 kcpuset_use(kcset); 452 t->l_affinity = kcset; 453 lwp_migrate(t, ci); 454 } else { 455 /* Old affinity mask is released, just clear. */ 456 t->l_affinity = NULL; 457 lwp_unlock(t); 458 } 459 lcnt++; 460 } 461 mutex_exit(p->p_lock); 462 if (lcnt == 0) { 463 error = ESRCH; 464 } 465 out: 466 mutex_exit(&cpu_lock); 467 468 /* 469 * Drop the initial reference (LWPs, if any, have the ownership now), 470 * and destroy whatever is in the G/C list, if filled. 471 */ 472 if (kcset) { 473 kcpuset_unuse(kcset, &kcpulst); 474 } 475 if (kcpulst) { 476 kcpuset_destroy(kcpulst); 477 } 478 return error; 479 } 480 481 /* 482 * Get affinity. 483 */ 484 int 485 sys__sched_getaffinity(struct lwp *l, 486 const struct sys__sched_getaffinity_args *uap, register_t *retval) 487 { 488 /* { 489 syscallarg(pid_t) pid; 490 syscallarg(lwpid_t) lid; 491 syscallarg(size_t) size; 492 syscallarg(cpuset_t *) cpuset; 493 } */ 494 struct lwp *t; 495 kcpuset_t *kcset; 496 int error; 497 498 error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size)); 499 if (error) 500 return error; 501 502 /* Locks the LWP */ 503 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid)); 504 if (t == NULL) { 505 error = ESRCH; 506 goto out; 507 } 508 /* Check the permission */ 509 if (kauth_authorize_process(l->l_cred, 510 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) { 511 mutex_exit(t->l_proc->p_lock); 512 error = EPERM; 513 goto out; 514 } 515 lwp_lock(t); 516 if (t->l_affinity) { 517 kcpuset_copy(kcset, t->l_affinity); 518 } else { 519 kcpuset_zero(kcset); 520 } 521 lwp_unlock(t); 522 mutex_exit(t->l_proc->p_lock); 523 524 error = kcpuset_copyout(kcset, SCARG(uap, cpuset), SCARG(uap, size)); 525 out: 526 kcpuset_unuse(kcset, NULL); 527 return error; 528 } 529 530 /* 531 * Yield. 532 */ 533 int 534 sys_sched_yield(struct lwp *l, const void *v, register_t *retval) 535 { 536 537 yield(); 538 #ifdef KERN_SA 539 if (l->l_flag & LW_SA) { 540 sa_preempt(l); 541 } 542 #endif 543 return 0; 544 } 545 546 /* 547 * Sysctl nodes and initialization. 548 */ 549 static void 550 sysctl_sched_setup(struct sysctllog **clog) 551 { 552 const struct sysctlnode *node = NULL; 553 554 sysctl_createv(clog, 0, NULL, NULL, 555 CTLFLAG_PERMANENT, 556 CTLTYPE_NODE, "kern", NULL, 557 NULL, 0, NULL, 0, 558 CTL_KERN, CTL_EOL); 559 sysctl_createv(clog, 0, NULL, NULL, 560 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 561 CTLTYPE_INT, "posix_sched", 562 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 563 "Process Scheduling option to which the " 564 "system attempts to conform"), 565 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0, 566 CTL_KERN, CTL_CREATE, CTL_EOL); 567 sysctl_createv(clog, 0, NULL, &node, 568 CTLFLAG_PERMANENT, 569 CTLTYPE_NODE, "sched", 570 SYSCTL_DESCR("Scheduler options"), 571 NULL, 0, NULL, 0, 572 CTL_KERN, CTL_CREATE, CTL_EOL); 573 574 if (node == NULL) 575 return; 576 577 sysctl_createv(clog, 0, &node, NULL, 578 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 579 CTLTYPE_INT, "pri_min", 580 SYSCTL_DESCR("Minimal POSIX real-time priority"), 581 NULL, SCHED_PRI_MIN, NULL, 0, 582 CTL_CREATE, CTL_EOL); 583 sysctl_createv(clog, 0, &node, NULL, 584 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 585 CTLTYPE_INT, "pri_max", 586 SYSCTL_DESCR("Maximal POSIX real-time priority"), 587 NULL, SCHED_PRI_MAX, NULL, 0, 588 CTL_CREATE, CTL_EOL); 589 } 590 591 static int 592 sched_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 593 void *arg0, void *arg1, void *arg2, void *arg3) 594 { 595 struct proc *p; 596 int result; 597 598 result = KAUTH_RESULT_DEFER; 599 p = arg0; 600 601 switch (action) { 602 case KAUTH_PROCESS_SCHEDULER_GETPARAM: 603 if (kauth_cred_uidmatch(cred, p->p_cred)) 604 result = KAUTH_RESULT_ALLOW; 605 break; 606 607 case KAUTH_PROCESS_SCHEDULER_SETPARAM: 608 if (kauth_cred_uidmatch(cred, p->p_cred)) { 609 struct lwp *l; 610 int policy; 611 pri_t priority; 612 613 l = arg1; 614 policy = (int)(unsigned long)arg2; 615 priority = (pri_t)(unsigned long)arg3; 616 617 if ((policy == l->l_class || 618 (policy != SCHED_FIFO && policy != SCHED_RR)) && 619 priority <= l->l_priority) 620 result = KAUTH_RESULT_ALLOW; 621 } 622 623 break; 624 625 case KAUTH_PROCESS_SCHEDULER_GETAFFINITY: 626 result = KAUTH_RESULT_ALLOW; 627 break; 628 629 case KAUTH_PROCESS_SCHEDULER_SETAFFINITY: 630 /* Privileged; we let the secmodel handle this. */ 631 break; 632 633 default: 634 break; 635 } 636 637 return result; 638 } 639 640 void 641 sched_init(void) 642 { 643 644 sysctl_sched_setup(&sched_sysctl_log); 645 646 sched_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS, 647 sched_listener_cb, NULL); 648 } 649