1 /* $NetBSD: sys_sched.c,v 1.33 2009/03/03 21:55:06 rmind Exp $ */ 2 3 /* 4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * System calls relating to the scheduler. 31 * 32 * Lock order: 33 * 34 * cpu_lock -> 35 * proc_lock -> 36 * proc_t::p_lock -> 37 * lwp_t::lwp_lock 38 * 39 * TODO: 40 * - Handle pthread_setschedprio() as defined by POSIX; 41 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX; 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.33 2009/03/03 21:55:06 rmind Exp $"); 46 47 #include <sys/param.h> 48 49 #include <sys/cpu.h> 50 #include <sys/kauth.h> 51 #include <sys/kmem.h> 52 #include <sys/lwp.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/pset.h> 56 #include <sys/sa.h> 57 #include <sys/savar.h> 58 #include <sys/sched.h> 59 #include <sys/syscallargs.h> 60 #include <sys/sysctl.h> 61 #include <sys/systm.h> 62 #include <sys/types.h> 63 #include <sys/unistd.h> 64 65 #include "opt_sa.h" 66 67 /* 68 * Convert user priority or the in-kernel priority or convert the current 69 * priority to the appropriate range according to the policy change. 70 */ 71 static pri_t 72 convert_pri(lwp_t *l, int policy, pri_t pri) 73 { 74 75 /* Convert user priority to the in-kernel */ 76 if (pri != PRI_NONE) { 77 /* Only for real-time threads */ 78 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX); 79 KASSERT(policy != SCHED_OTHER); 80 return PRI_USER_RT + pri; 81 } 82 83 /* Neither policy, nor priority change */ 84 if (l->l_class == policy) 85 return l->l_priority; 86 87 /* Time-sharing -> real-time */ 88 if (l->l_class == SCHED_OTHER) { 89 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR); 90 return PRI_USER_RT; 91 } 92 93 /* Real-time -> time-sharing */ 94 if (policy == SCHED_OTHER) { 95 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR); 96 return l->l_priority - PRI_USER_RT; 97 } 98 99 /* Real-time -> real-time */ 100 return l->l_priority; 101 } 102 103 int 104 do_sched_setparam(pid_t pid, lwpid_t lid, int policy, 105 const struct sched_param *params) 106 { 107 struct proc *p; 108 struct lwp *t; 109 pri_t pri; 110 u_int lcnt; 111 int error; 112 113 error = 0; 114 115 pri = params->sched_priority; 116 117 /* If no parameters specified, just return (this should not happen) */ 118 if (pri == PRI_NONE && policy == SCHED_NONE) 119 return 0; 120 121 /* Validate scheduling class */ 122 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR)) 123 return EINVAL; 124 125 /* Validate priority */ 126 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX)) 127 return EINVAL; 128 129 if (pid != 0) { 130 /* Find the process */ 131 mutex_enter(proc_lock); 132 p = p_find(pid, PFIND_LOCKED); 133 if (p == NULL) { 134 mutex_exit(proc_lock); 135 return ESRCH; 136 } 137 mutex_enter(p->p_lock); 138 mutex_exit(proc_lock); 139 /* Disallow modification of system processes */ 140 if ((p->p_flag & PK_SYSTEM) != 0) { 141 mutex_exit(p->p_lock); 142 return EPERM; 143 } 144 } else { 145 /* Use the calling process */ 146 p = curlwp->l_proc; 147 mutex_enter(p->p_lock); 148 } 149 150 /* Find the LWP(s) */ 151 lcnt = 0; 152 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 153 pri_t kpri; 154 int lpolicy; 155 156 if (lid && lid != t->l_lid) 157 continue; 158 159 lcnt++; 160 lwp_lock(t); 161 lpolicy = (policy == SCHED_NONE) ? t->l_class : policy; 162 163 /* Disallow setting of priority for SCHED_OTHER threads */ 164 if (lpolicy == SCHED_OTHER && pri != PRI_NONE) { 165 lwp_unlock(t); 166 error = EINVAL; 167 break; 168 } 169 170 /* Convert priority, if needed */ 171 kpri = convert_pri(t, lpolicy, pri); 172 173 /* Check the permission */ 174 error = kauth_authorize_process(kauth_cred_get(), 175 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy), 176 KAUTH_ARG(kpri)); 177 if (error) { 178 lwp_unlock(t); 179 break; 180 } 181 182 /* Set the scheduling class, change the priority */ 183 t->l_class = lpolicy; 184 lwp_changepri(t, kpri); 185 lwp_unlock(t); 186 } 187 mutex_exit(p->p_lock); 188 return (lcnt == 0) ? ESRCH : error; 189 } 190 191 /* 192 * Set scheduling parameters. 193 */ 194 int 195 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap, 196 register_t *retval) 197 { 198 /* { 199 syscallarg(pid_t) pid; 200 syscallarg(lwpid_t) lid; 201 syscallarg(int) policy; 202 syscallarg(const struct sched_param *) params; 203 } */ 204 struct sched_param params; 205 int error; 206 207 /* Get the parameters from the user-space */ 208 error = copyin(SCARG(uap, params), ¶ms, sizeof(params)); 209 if (error) 210 goto out; 211 212 error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid), 213 SCARG(uap, policy), ¶ms); 214 out: 215 return error; 216 } 217 218 int 219 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy, 220 struct sched_param *params) 221 { 222 struct sched_param lparams; 223 struct lwp *t; 224 int error, lpolicy; 225 226 /* Locks the LWP */ 227 t = lwp_find2(pid, lid); 228 if (t == NULL) 229 return ESRCH; 230 231 /* Check the permission */ 232 error = kauth_authorize_process(kauth_cred_get(), 233 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL); 234 if (error != 0) { 235 mutex_exit(t->l_proc->p_lock); 236 return error; 237 } 238 239 lwp_lock(t); 240 lparams.sched_priority = t->l_priority; 241 lpolicy = t->l_class; 242 243 switch (lpolicy) { 244 case SCHED_OTHER: 245 lparams.sched_priority -= PRI_USER; 246 break; 247 case SCHED_RR: 248 case SCHED_FIFO: 249 lparams.sched_priority -= PRI_USER_RT; 250 break; 251 } 252 253 if (policy != NULL) 254 *policy = lpolicy; 255 256 if (params != NULL) 257 *params = lparams; 258 259 lwp_unlock(t); 260 mutex_exit(t->l_proc->p_lock); 261 return error; 262 } 263 264 /* 265 * Get scheduling parameters. 266 */ 267 int 268 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap, 269 register_t *retval) 270 { 271 /* { 272 syscallarg(pid_t) pid; 273 syscallarg(lwpid_t) lid; 274 syscallarg(int *) policy; 275 syscallarg(struct sched_param *) params; 276 } */ 277 struct sched_param params; 278 int error, policy; 279 280 error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy, 281 ¶ms); 282 if (error) 283 goto out; 284 285 error = copyout(¶ms, SCARG(uap, params), sizeof(params)); 286 if (error == 0 && SCARG(uap, policy) != NULL) 287 error = copyout(&policy, SCARG(uap, policy), sizeof(int)); 288 out: 289 return error; 290 } 291 292 /* 293 * Allocate the CPU set, and get it from userspace. 294 */ 295 static int 296 genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size) 297 { 298 int error; 299 300 *dset = kcpuset_create(); 301 error = kcpuset_copyin(sset, *dset, size); 302 if (error != 0) 303 kcpuset_unuse(*dset, NULL); 304 return error; 305 } 306 307 /* 308 * Set affinity. 309 */ 310 int 311 sys__sched_setaffinity(struct lwp *l, 312 const struct sys__sched_setaffinity_args *uap, register_t *retval) 313 { 314 /* { 315 syscallarg(pid_t) pid; 316 syscallarg(lwpid_t) lid; 317 syscallarg(size_t) size; 318 syscallarg(const cpuset_t *) cpuset; 319 } */ 320 kcpuset_t *cpuset, *cpulst = NULL; 321 struct cpu_info *ici, *ci; 322 struct proc *p; 323 struct lwp *t; 324 CPU_INFO_ITERATOR cii; 325 bool alloff; 326 lwpid_t lid; 327 u_int lcnt; 328 int error; 329 330 error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size)); 331 if (error) 332 return error; 333 334 /* 335 * Traverse _each_ CPU to: 336 * - Check that CPUs in the mask have no assigned processor set. 337 * - Check that at least one CPU from the mask is online. 338 * - Find the first target CPU to migrate. 339 * 340 * To avoid the race with CPU online/offline calls and processor sets, 341 * cpu_lock will be locked for the entire operation. 342 */ 343 ci = NULL; 344 alloff = false; 345 mutex_enter(&cpu_lock); 346 for (CPU_INFO_FOREACH(cii, ici)) { 347 struct schedstate_percpu *ispc; 348 349 if (kcpuset_isset(cpu_index(ici), cpuset) == 0) 350 continue; 351 352 ispc = &ici->ci_schedstate; 353 /* Check that CPU is not in the processor-set */ 354 if (ispc->spc_psid != PS_NONE) { 355 error = EPERM; 356 goto out; 357 } 358 /* Skip offline CPUs */ 359 if (ispc->spc_flags & SPCF_OFFLINE) { 360 alloff = true; 361 continue; 362 } 363 /* Target CPU to migrate */ 364 if (ci == NULL) { 365 ci = ici; 366 } 367 } 368 if (ci == NULL) { 369 if (alloff) { 370 /* All CPUs in the set are offline */ 371 error = EPERM; 372 goto out; 373 } 374 /* Empty set */ 375 kcpuset_unuse(cpuset, &cpulst); 376 cpuset = NULL; 377 } 378 379 if (SCARG(uap, pid) != 0) { 380 /* Find the process */ 381 mutex_enter(proc_lock); 382 p = p_find(SCARG(uap, pid), PFIND_LOCKED); 383 if (p == NULL) { 384 mutex_exit(proc_lock); 385 error = ESRCH; 386 goto out; 387 } 388 mutex_enter(p->p_lock); 389 mutex_exit(proc_lock); 390 /* Disallow modification of system processes. */ 391 if ((p->p_flag & PK_SYSTEM) != 0) { 392 mutex_exit(p->p_lock); 393 error = EPERM; 394 goto out; 395 } 396 } else { 397 /* Use the calling process */ 398 p = l->l_proc; 399 mutex_enter(p->p_lock); 400 } 401 402 /* 403 * Check the permission. 404 */ 405 error = kauth_authorize_process(l->l_cred, 406 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL); 407 if (error != 0) { 408 mutex_exit(p->p_lock); 409 goto out; 410 } 411 412 #ifdef KERN_SA 413 /* Changing the affinity of a SA process is not supported */ 414 if ((p->p_sflag & (PS_SA | PS_WEXIT)) != 0 || p->p_sa != NULL) { 415 mutex_exit(p->p_lock); 416 error = EINVAL; 417 goto out; 418 } 419 #endif 420 421 /* Find the LWP(s) */ 422 lcnt = 0; 423 lid = SCARG(uap, lid); 424 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 425 if (lid && lid != t->l_lid) 426 continue; 427 lwp_lock(t); 428 /* It is not allowed to set the affinity for zombie LWPs */ 429 if (t->l_stat == LSZOMB) { 430 lwp_unlock(t); 431 continue; 432 } 433 if (cpuset) { 434 /* Set the affinity flag and new CPU set */ 435 t->l_flag |= LW_AFFINITY; 436 kcpuset_use(cpuset); 437 if (t->l_affinity != NULL) 438 kcpuset_unuse(t->l_affinity, &cpulst); 439 t->l_affinity = cpuset; 440 /* Migrate to another CPU, unlocks LWP */ 441 lwp_migrate(t, ci); 442 } else { 443 /* Unset the affinity flag */ 444 t->l_flag &= ~LW_AFFINITY; 445 if (t->l_affinity != NULL) 446 kcpuset_unuse(t->l_affinity, &cpulst); 447 t->l_affinity = NULL; 448 lwp_unlock(t); 449 } 450 lcnt++; 451 } 452 mutex_exit(p->p_lock); 453 if (lcnt == 0) 454 error = ESRCH; 455 out: 456 mutex_exit(&cpu_lock); 457 if (cpuset != NULL) 458 kcpuset_unuse(cpuset, &cpulst); 459 kcpuset_destroy(cpulst); 460 return error; 461 } 462 463 /* 464 * Get affinity. 465 */ 466 int 467 sys__sched_getaffinity(struct lwp *l, 468 const struct sys__sched_getaffinity_args *uap, register_t *retval) 469 { 470 /* { 471 syscallarg(pid_t) pid; 472 syscallarg(lwpid_t) lid; 473 syscallarg(size_t) size; 474 syscallarg(cpuset_t *) cpuset; 475 } */ 476 struct lwp *t; 477 kcpuset_t *cpuset; 478 int error; 479 480 error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size)); 481 if (error) 482 return error; 483 484 /* Locks the LWP */ 485 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid)); 486 if (t == NULL) { 487 error = ESRCH; 488 goto out; 489 } 490 /* Check the permission */ 491 if (kauth_authorize_process(l->l_cred, 492 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) { 493 mutex_exit(t->l_proc->p_lock); 494 error = EPERM; 495 goto out; 496 } 497 lwp_lock(t); 498 if (t->l_flag & LW_AFFINITY) { 499 KASSERT(t->l_affinity != NULL); 500 kcpuset_copy(cpuset, t->l_affinity); 501 } else 502 kcpuset_zero(cpuset); 503 lwp_unlock(t); 504 mutex_exit(t->l_proc->p_lock); 505 506 error = kcpuset_copyout(cpuset, SCARG(uap, cpuset), SCARG(uap, size)); 507 out: 508 kcpuset_unuse(cpuset, NULL); 509 return error; 510 } 511 512 /* 513 * Yield. 514 */ 515 int 516 sys_sched_yield(struct lwp *l, const void *v, register_t *retval) 517 { 518 519 yield(); 520 #ifdef KERN_SA 521 if (l->l_flag & LW_SA) { 522 sa_preempt(l); 523 } 524 #endif 525 return 0; 526 } 527 528 /* 529 * Sysctl nodes and initialization. 530 */ 531 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup") 532 { 533 const struct sysctlnode *node = NULL; 534 535 sysctl_createv(clog, 0, NULL, NULL, 536 CTLFLAG_PERMANENT, 537 CTLTYPE_NODE, "kern", NULL, 538 NULL, 0, NULL, 0, 539 CTL_KERN, CTL_EOL); 540 sysctl_createv(clog, 0, NULL, NULL, 541 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 542 CTLTYPE_INT, "posix_sched", 543 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 544 "Process Scheduling option to which the " 545 "system attempts to conform"), 546 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0, 547 CTL_KERN, CTL_CREATE, CTL_EOL); 548 sysctl_createv(clog, 0, NULL, &node, 549 CTLFLAG_PERMANENT, 550 CTLTYPE_NODE, "sched", 551 SYSCTL_DESCR("Scheduler options"), 552 NULL, 0, NULL, 0, 553 CTL_KERN, CTL_CREATE, CTL_EOL); 554 555 if (node == NULL) 556 return; 557 558 sysctl_createv(clog, 0, &node, NULL, 559 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 560 CTLTYPE_INT, "pri_min", 561 SYSCTL_DESCR("Minimal POSIX real-time priority"), 562 NULL, SCHED_PRI_MIN, NULL, 0, 563 CTL_CREATE, CTL_EOL); 564 sysctl_createv(clog, 0, &node, NULL, 565 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 566 CTLTYPE_INT, "pri_max", 567 SYSCTL_DESCR("Maximal POSIX real-time priority"), 568 NULL, SCHED_PRI_MAX, NULL, 0, 569 CTL_CREATE, CTL_EOL); 570 } 571