1 /* $NetBSD: sys_sched.c,v 1.31 2008/10/31 00:36:22 rmind Exp $ */ 2 3 /* 4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * System calls relating to the scheduler. 31 * 32 * Lock order: 33 * 34 * cpu_lock -> 35 * proc_lock -> 36 * proc_t::p_lock -> 37 * lwp_t::lwp_lock 38 * 39 * TODO: 40 * - Handle pthread_setschedprio() as defined by POSIX; 41 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX; 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.31 2008/10/31 00:36:22 rmind Exp $"); 46 47 #include <sys/param.h> 48 49 #include <sys/cpu.h> 50 #include <sys/kauth.h> 51 #include <sys/kmem.h> 52 #include <sys/lwp.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/pset.h> 56 #include <sys/sa.h> 57 #include <sys/savar.h> 58 #include <sys/sched.h> 59 #include <sys/syscallargs.h> 60 #include <sys/sysctl.h> 61 #include <sys/systm.h> 62 #include <sys/types.h> 63 #include <sys/unistd.h> 64 65 #include "opt_sa.h" 66 67 /* 68 * Convert user priority or the in-kernel priority or convert the current 69 * priority to the appropriate range according to the policy change. 70 */ 71 static pri_t 72 convert_pri(lwp_t *l, int policy, pri_t pri) 73 { 74 75 /* Convert user priority to the in-kernel */ 76 if (pri != PRI_NONE) { 77 /* Only for real-time threads */ 78 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX); 79 KASSERT(policy != SCHED_OTHER); 80 return PRI_USER_RT + pri; 81 } 82 83 /* Neither policy, nor priority change */ 84 if (l->l_class == policy) 85 return l->l_priority; 86 87 /* Time-sharing -> real-time */ 88 if (l->l_class == SCHED_OTHER) { 89 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR); 90 return PRI_USER_RT; 91 } 92 93 /* Real-time -> time-sharing */ 94 if (policy == SCHED_OTHER) { 95 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR); 96 return l->l_priority - PRI_USER_RT; 97 } 98 99 /* Real-time -> real-time */ 100 return l->l_priority; 101 } 102 103 int 104 do_sched_setparam(pid_t pid, lwpid_t lid, int policy, 105 const struct sched_param *params) 106 { 107 struct proc *p; 108 struct lwp *t; 109 pri_t pri; 110 u_int lcnt; 111 int error; 112 113 error = 0; 114 115 pri = params->sched_priority; 116 117 /* If no parameters specified, just return (this should not happen) */ 118 if (pri == PRI_NONE && policy == SCHED_NONE) 119 return 0; 120 121 /* Validate scheduling class */ 122 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR)) 123 return EINVAL; 124 125 /* Validate priority */ 126 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX)) 127 return EINVAL; 128 129 if (pid != 0) { 130 /* Find the process */ 131 mutex_enter(proc_lock); 132 p = p_find(pid, PFIND_LOCKED); 133 if (p == NULL) { 134 mutex_exit(proc_lock); 135 return ESRCH; 136 } 137 mutex_enter(p->p_lock); 138 mutex_exit(proc_lock); 139 /* Disallow modification of system processes */ 140 if ((p->p_flag & PK_SYSTEM) != 0) { 141 mutex_exit(p->p_lock); 142 return EPERM; 143 } 144 } else { 145 /* Use the calling process */ 146 p = curlwp->l_proc; 147 mutex_enter(p->p_lock); 148 } 149 150 /* Find the LWP(s) */ 151 lcnt = 0; 152 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 153 pri_t kpri; 154 int lpolicy; 155 156 if (lid && lid != t->l_lid) 157 continue; 158 159 lcnt++; 160 lwp_lock(t); 161 lpolicy = (policy == SCHED_NONE) ? t->l_class : policy; 162 163 /* Disallow setting of priority for SCHED_OTHER threads */ 164 if (lpolicy == SCHED_OTHER && pri != PRI_NONE) { 165 lwp_unlock(t); 166 error = EINVAL; 167 break; 168 } 169 170 /* Convert priority, if needed */ 171 kpri = convert_pri(t, lpolicy, pri); 172 173 /* Check the permission */ 174 error = kauth_authorize_process(kauth_cred_get(), 175 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy), 176 KAUTH_ARG(kpri)); 177 if (error) { 178 lwp_unlock(t); 179 break; 180 } 181 182 /* Set the scheduling class, change the priority */ 183 t->l_class = lpolicy; 184 lwp_changepri(t, kpri); 185 lwp_unlock(t); 186 } 187 mutex_exit(p->p_lock); 188 return (lcnt == 0) ? ESRCH : error; 189 } 190 191 /* 192 * Set scheduling parameters. 193 */ 194 int 195 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap, 196 register_t *retval) 197 { 198 /* { 199 syscallarg(pid_t) pid; 200 syscallarg(lwpid_t) lid; 201 syscallarg(int) policy; 202 syscallarg(const struct sched_param *) params; 203 } */ 204 struct sched_param params; 205 int error; 206 207 /* Get the parameters from the user-space */ 208 error = copyin(SCARG(uap, params), ¶ms, sizeof(params)); 209 if (error) 210 goto out; 211 212 error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid), 213 SCARG(uap, policy), ¶ms); 214 out: 215 return error; 216 } 217 218 int 219 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy, 220 struct sched_param *params) 221 { 222 struct sched_param lparams; 223 struct lwp *t; 224 int error, lpolicy; 225 226 /* Locks the LWP */ 227 t = lwp_find2(pid, lid); 228 if (t == NULL) 229 return ESRCH; 230 231 /* Check the permission */ 232 error = kauth_authorize_process(kauth_cred_get(), 233 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL); 234 if (error != 0) { 235 mutex_exit(t->l_proc->p_lock); 236 return error; 237 } 238 239 lwp_lock(t); 240 lparams.sched_priority = t->l_priority; 241 lpolicy = t->l_class; 242 243 switch (lpolicy) { 244 case SCHED_OTHER: 245 lparams.sched_priority -= PRI_USER; 246 break; 247 case SCHED_RR: 248 case SCHED_FIFO: 249 lparams.sched_priority -= PRI_USER_RT; 250 break; 251 } 252 253 if (policy != NULL) 254 *policy = lpolicy; 255 256 if (params != NULL) 257 *params = lparams; 258 259 lwp_unlock(t); 260 mutex_exit(t->l_proc->p_lock); 261 return error; 262 } 263 264 /* 265 * Get scheduling parameters. 266 */ 267 int 268 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap, 269 register_t *retval) 270 { 271 /* { 272 syscallarg(pid_t) pid; 273 syscallarg(lwpid_t) lid; 274 syscallarg(int *) policy; 275 syscallarg(struct sched_param *) params; 276 } */ 277 struct sched_param params; 278 int error, policy; 279 280 error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy, 281 ¶ms); 282 if (error) 283 goto out; 284 285 error = copyout(¶ms, SCARG(uap, params), sizeof(params)); 286 if (error == 0 && SCARG(uap, policy) != NULL) 287 error = copyout(&policy, SCARG(uap, policy), sizeof(int)); 288 out: 289 return error; 290 } 291 292 /* 293 * Allocate the CPU set, and get it from userspace. 294 */ 295 static int 296 genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size) 297 { 298 int error; 299 300 *dset = kcpuset_create(); 301 error = kcpuset_copyin(sset, *dset, size); 302 if (error != 0) 303 kcpuset_unuse(*dset, NULL); 304 return error; 305 } 306 307 /* 308 * Set affinity. 309 */ 310 int 311 sys__sched_setaffinity(struct lwp *l, 312 const struct sys__sched_setaffinity_args *uap, register_t *retval) 313 { 314 /* { 315 syscallarg(pid_t) pid; 316 syscallarg(lwpid_t) lid; 317 syscallarg(size_t) size; 318 syscallarg(const cpuset_t *) cpuset; 319 } */ 320 kcpuset_t *cpuset, *cpulst = NULL; 321 struct cpu_info *ci = NULL; 322 struct proc *p; 323 struct lwp *t; 324 CPU_INFO_ITERATOR cii; 325 bool offline_in_set; 326 lwpid_t lid; 327 u_int lcnt; 328 int error; 329 330 error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size)); 331 if (error) 332 return error; 333 334 /* 335 * Look for a CPU in the set, however, skip offline CPUs. 336 * 337 * To avoid the race with CPU online/offline calls, cpu_lock will 338 * be locked for the entire operation. 339 */ 340 offline_in_set = false; 341 mutex_enter(&cpu_lock); 342 for (CPU_INFO_FOREACH(cii, ci)) { 343 struct schedstate_percpu *spc; 344 345 if (kcpuset_isset(cpu_index(ci), cpuset) == 0) 346 continue; 347 spc = &ci->ci_schedstate; 348 if (spc->spc_flags & SPCF_OFFLINE) { 349 offline_in_set = true; 350 continue; 351 } 352 break; 353 } 354 if (ci == NULL) { 355 if (offline_in_set) { 356 /* All CPUs in the set are offline */ 357 error = EPERM; 358 goto out; 359 } 360 /* Empty set */ 361 kcpuset_unuse(cpuset, NULL); 362 cpuset = NULL; 363 } 364 365 if (SCARG(uap, pid) != 0) { 366 /* Find the process */ 367 mutex_enter(proc_lock); 368 p = p_find(SCARG(uap, pid), PFIND_LOCKED); 369 if (p == NULL) { 370 mutex_exit(proc_lock); 371 error = ESRCH; 372 goto out; 373 } 374 mutex_enter(p->p_lock); 375 mutex_exit(proc_lock); 376 /* Disallow modification of system processes. */ 377 if ((p->p_flag & PK_SYSTEM) != 0) { 378 mutex_exit(p->p_lock); 379 error = EPERM; 380 goto out; 381 } 382 } else { 383 /* Use the calling process */ 384 p = l->l_proc; 385 mutex_enter(p->p_lock); 386 } 387 388 /* 389 * Check the permission. 390 */ 391 error = kauth_authorize_process(l->l_cred, 392 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL); 393 if (error != 0) { 394 mutex_exit(p->p_lock); 395 goto out; 396 } 397 398 #ifdef KERN_SA 399 /* Changing the affinity of a SA process is not supported */ 400 if ((p->p_sflag & (PS_SA | PS_WEXIT)) != 0 || p->p_sa != NULL) { 401 mutex_exit(p->p_lock); 402 error = EINVAL; 403 goto out; 404 } 405 #endif 406 407 /* Find the LWP(s) */ 408 lcnt = 0; 409 lid = SCARG(uap, lid); 410 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 411 if (lid && lid != t->l_lid) 412 continue; 413 lwp_lock(t); 414 /* It is not allowed to set the affinity for zombie LWPs */ 415 if (t->l_stat == LSZOMB) { 416 lwp_unlock(t); 417 continue; 418 } 419 if (cpuset) { 420 /* Set the affinity flag and new CPU set */ 421 t->l_flag |= LW_AFFINITY; 422 kcpuset_use(cpuset); 423 if (t->l_affinity != NULL) 424 kcpuset_unuse(t->l_affinity, &cpulst); 425 t->l_affinity = cpuset; 426 /* Migrate to another CPU, unlocks LWP */ 427 lwp_migrate(t, ci); 428 } else { 429 /* Unset the affinity flag */ 430 t->l_flag &= ~LW_AFFINITY; 431 if (t->l_affinity != NULL) 432 kcpuset_unuse(t->l_affinity, &cpulst); 433 t->l_affinity = NULL; 434 lwp_unlock(t); 435 } 436 lcnt++; 437 } 438 mutex_exit(p->p_lock); 439 if (lcnt == 0) 440 error = ESRCH; 441 out: 442 mutex_exit(&cpu_lock); 443 if (cpuset != NULL) 444 kcpuset_unuse(cpuset, &cpulst); 445 kcpuset_destroy(cpulst); 446 return error; 447 } 448 449 /* 450 * Get affinity. 451 */ 452 int 453 sys__sched_getaffinity(struct lwp *l, 454 const struct sys__sched_getaffinity_args *uap, register_t *retval) 455 { 456 /* { 457 syscallarg(pid_t) pid; 458 syscallarg(lwpid_t) lid; 459 syscallarg(size_t) size; 460 syscallarg(cpuset_t *) cpuset; 461 } */ 462 struct lwp *t; 463 kcpuset_t *cpuset; 464 int error; 465 466 error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size)); 467 if (error) 468 return error; 469 470 /* Locks the LWP */ 471 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid)); 472 if (t == NULL) { 473 error = ESRCH; 474 goto out; 475 } 476 /* Check the permission */ 477 if (kauth_authorize_process(l->l_cred, 478 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) { 479 mutex_exit(t->l_proc->p_lock); 480 error = EPERM; 481 goto out; 482 } 483 lwp_lock(t); 484 if (t->l_flag & LW_AFFINITY) { 485 KASSERT(t->l_affinity != NULL); 486 kcpuset_copy(cpuset, t->l_affinity); 487 } else 488 kcpuset_zero(cpuset); 489 lwp_unlock(t); 490 mutex_exit(t->l_proc->p_lock); 491 492 error = kcpuset_copyout(cpuset, SCARG(uap, cpuset), SCARG(uap, size)); 493 out: 494 kcpuset_unuse(cpuset, NULL); 495 return error; 496 } 497 498 /* 499 * Yield. 500 */ 501 int 502 sys_sched_yield(struct lwp *l, const void *v, register_t *retval) 503 { 504 505 yield(); 506 #ifdef KERN_SA 507 if (l->l_flag & LW_SA) { 508 sa_preempt(l); 509 } 510 #endif 511 return 0; 512 } 513 514 /* 515 * Sysctl nodes and initialization. 516 */ 517 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup") 518 { 519 const struct sysctlnode *node = NULL; 520 521 sysctl_createv(clog, 0, NULL, NULL, 522 CTLFLAG_PERMANENT, 523 CTLTYPE_NODE, "kern", NULL, 524 NULL, 0, NULL, 0, 525 CTL_KERN, CTL_EOL); 526 sysctl_createv(clog, 0, NULL, NULL, 527 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 528 CTLTYPE_INT, "posix_sched", 529 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 530 "Process Scheduling option to which the " 531 "system attempts to conform"), 532 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0, 533 CTL_KERN, CTL_CREATE, CTL_EOL); 534 sysctl_createv(clog, 0, NULL, &node, 535 CTLFLAG_PERMANENT, 536 CTLTYPE_NODE, "sched", 537 SYSCTL_DESCR("Scheduler options"), 538 NULL, 0, NULL, 0, 539 CTL_KERN, CTL_CREATE, CTL_EOL); 540 541 if (node == NULL) 542 return; 543 544 sysctl_createv(clog, 0, &node, NULL, 545 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 546 CTLTYPE_INT, "pri_min", 547 SYSCTL_DESCR("Minimal POSIX real-time priority"), 548 NULL, SCHED_PRI_MIN, NULL, 0, 549 CTL_CREATE, CTL_EOL); 550 sysctl_createv(clog, 0, &node, NULL, 551 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 552 CTLTYPE_INT, "pri_max", 553 SYSCTL_DESCR("Maximal POSIX real-time priority"), 554 NULL, SCHED_PRI_MAX, NULL, 0, 555 CTL_CREATE, CTL_EOL); 556 } 557