1 /* $NetBSD: sys_sched.c,v 1.25 2008/06/16 01:41:20 rmind Exp $ */ 2 3 /* 4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * System calls relating to the scheduler. 31 * 32 * TODO: 33 * - Handle pthread_setschedprio() as defined by POSIX; 34 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX; 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.25 2008/06/16 01:41:20 rmind Exp $"); 39 40 #include <sys/param.h> 41 42 #include <sys/cpu.h> 43 #include <sys/kauth.h> 44 #include <sys/kmem.h> 45 #include <sys/lwp.h> 46 #include <sys/mutex.h> 47 #include <sys/proc.h> 48 #include <sys/pset.h> 49 #include <sys/sched.h> 50 #include <sys/syscallargs.h> 51 #include <sys/sysctl.h> 52 #include <sys/systm.h> 53 #include <sys/types.h> 54 #include <sys/unistd.h> 55 56 /* 57 * Convert user priority or the in-kernel priority or convert the current 58 * priority to the appropriate range according to the policy change. 59 */ 60 static pri_t 61 convert_pri(lwp_t *l, int policy, pri_t pri) 62 { 63 int delta = 0; 64 65 switch (policy) { 66 case SCHED_OTHER: 67 delta = PRI_USER; 68 break; 69 case SCHED_FIFO: 70 case SCHED_RR: 71 delta = PRI_USER_RT; 72 break; 73 default: 74 panic("upri_to_kpri"); 75 } 76 77 if (pri != PRI_NONE) { 78 /* Convert user priority to the in-kernel */ 79 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX); 80 return pri + delta; 81 } 82 if (l->l_class == policy) 83 return l->l_priority; 84 85 /* Change the current priority to the appropriate range */ 86 if (l->l_class == SCHED_OTHER) { 87 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR); 88 return delta; 89 } 90 if (policy == SCHED_OTHER) { 91 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR); 92 return l->l_priority - delta; 93 } 94 KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER); 95 return l->l_class; 96 } 97 98 int 99 do_sched_setparam(pid_t pid, lwpid_t lid, int policy, 100 const struct sched_param *params) 101 { 102 struct proc *p; 103 struct lwp *t; 104 pri_t pri; 105 u_int lcnt; 106 int error; 107 108 error = 0; 109 110 pri = params->sched_priority; 111 112 /* If no parameters specified, just return (this should not happen) */ 113 if (pri == PRI_NONE && policy == SCHED_NONE) 114 return 0; 115 116 /* Validate scheduling class */ 117 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR)) 118 return EINVAL; 119 120 /* Validate priority */ 121 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX)) 122 return EINVAL; 123 124 if (pid != 0) { 125 /* Find the process */ 126 mutex_enter(proc_lock); 127 p = p_find(pid, PFIND_LOCKED); 128 if (p == NULL) { 129 mutex_exit(proc_lock); 130 return ESRCH; 131 } 132 mutex_enter(p->p_lock); 133 mutex_exit(proc_lock); 134 /* Disallow modification of system processes */ 135 if ((p->p_flag & PK_SYSTEM) != 0) { 136 mutex_exit(p->p_lock); 137 return EPERM; 138 } 139 } else { 140 /* Use the calling process */ 141 p = curlwp->l_proc; 142 mutex_enter(p->p_lock); 143 } 144 145 /* Find the LWP(s) */ 146 lcnt = 0; 147 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 148 pri_t kpri; 149 int lpolicy; 150 151 if (lid && lid != t->l_lid) 152 continue; 153 lcnt++; 154 KASSERT(pri != PRI_NONE || policy != SCHED_NONE); 155 lwp_lock(t); 156 157 if (policy == SCHED_NONE) 158 lpolicy = t->l_class; 159 else 160 lpolicy = policy; 161 162 /* 163 * Note that, priority may need to be changed to get into 164 * the correct priority range of the new scheduling class. 165 */ 166 kpri = convert_pri(t, lpolicy, pri); 167 168 /* Check the permission */ 169 error = kauth_authorize_process(kauth_cred_get(), 170 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy), 171 KAUTH_ARG(kpri)); 172 if (error) { 173 lwp_unlock(t); 174 break; 175 } 176 177 /* Set the scheduling class */ 178 if (policy != SCHED_NONE) 179 t->l_class = policy; 180 181 /* Change the priority */ 182 if (t->l_priority != kpri) 183 lwp_changepri(t, kpri); 184 185 lwp_unlock(t); 186 } 187 mutex_exit(p->p_lock); 188 return (lcnt == 0) ? ESRCH : error; 189 } 190 191 /* 192 * Set scheduling parameters. 193 */ 194 int 195 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap, 196 register_t *retval) 197 { 198 /* { 199 syscallarg(pid_t) pid; 200 syscallarg(lwpid_t) lid; 201 syscallarg(int) policy; 202 syscallarg(const struct sched_param *) params; 203 } */ 204 struct sched_param params; 205 int error; 206 207 /* Get the parameters from the user-space */ 208 error = copyin(SCARG(uap, params), ¶ms, sizeof(params)); 209 if (error) 210 goto out; 211 212 error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid), 213 SCARG(uap, policy), ¶ms); 214 215 out: 216 return (error); 217 } 218 219 int 220 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy, 221 struct sched_param *params) 222 { 223 struct sched_param lparams; 224 struct lwp *t; 225 int error, lpolicy; 226 227 /* Locks the LWP */ 228 t = lwp_find2(pid, lid); 229 if (t == NULL) 230 return ESRCH; 231 232 /* Check the permission */ 233 error = kauth_authorize_process(kauth_cred_get(), 234 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL); 235 if (error != 0) { 236 mutex_exit(t->l_proc->p_lock); 237 return error; 238 } 239 240 lwp_lock(t); 241 lparams.sched_priority = t->l_priority; 242 lpolicy = t->l_class; 243 244 switch (lpolicy) { 245 case SCHED_OTHER: 246 lparams.sched_priority -= PRI_USER; 247 break; 248 case SCHED_RR: 249 case SCHED_FIFO: 250 lparams.sched_priority -= PRI_USER_RT; 251 break; 252 } 253 254 if (policy != NULL) 255 *policy = lpolicy; 256 257 if (params != NULL) 258 *params = lparams; 259 260 lwp_unlock(t); 261 mutex_exit(t->l_proc->p_lock); 262 return error; 263 } 264 265 /* 266 * Get scheduling parameters. 267 */ 268 int 269 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap, 270 register_t *retval) 271 { 272 /* { 273 syscallarg(pid_t) pid; 274 syscallarg(lwpid_t) lid; 275 syscallarg(int *) policy; 276 syscallarg(struct sched_param *) params; 277 } */ 278 struct sched_param params; 279 int error, policy; 280 281 error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy, 282 ¶ms); 283 if (error) 284 goto out; 285 286 error = copyout(¶ms, SCARG(uap, params), sizeof(params)); 287 if (error == 0 && SCARG(uap, policy) != NULL) 288 error = copyout(&policy, SCARG(uap, policy), sizeof(int)); 289 290 out: 291 return (error); 292 } 293 294 /* Allocate the CPU set, and get it from userspace */ 295 static int 296 gencpuset(cpuset_t **dset, const cpuset_t *sset, size_t size) 297 { 298 int error; 299 300 *dset = cpuset_create(); 301 if (size != cpuset_size(*dset)) { 302 error = EINVAL; 303 goto out; 304 } 305 306 error = copyin(sset, *dset, size); 307 if (error) 308 goto out; 309 310 if (kcpuset_nused(*dset) != 1) { 311 error = EINVAL; 312 goto out; 313 } 314 315 return 0; 316 out: 317 kcpuset_unuse(*dset, NULL); 318 return error; 319 } 320 321 /* 322 * Set affinity. 323 */ 324 int 325 sys__sched_setaffinity(struct lwp *l, 326 const struct sys__sched_setaffinity_args *uap, register_t *retval) 327 { 328 /* { 329 syscallarg(pid_t) pid; 330 syscallarg(lwpid_t) lid; 331 syscallarg(size_t) size; 332 syscallarg(const cpuset_t *) cpuset; 333 } */ 334 cpuset_t *cpuset, *cpulst = NULL; 335 struct cpu_info *ci = NULL; 336 struct proc *p; 337 struct lwp *t; 338 CPU_INFO_ITERATOR cii; 339 lwpid_t lid; 340 u_int lcnt; 341 int error; 342 343 if ((error = gencpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size)))) 344 return error; 345 346 /* Look for a CPU in the set */ 347 for (CPU_INFO_FOREACH(cii, ci)) { 348 error = cpuset_isset(cpu_index(ci), cpuset); 349 if (error) { 350 if (error == -1) { 351 error = E2BIG; 352 goto out; 353 } 354 break; 355 } 356 } 357 358 if (ci == NULL) { 359 /* Empty set */ 360 kcpuset_unuse(cpuset, NULL); 361 cpuset = NULL; 362 } 363 364 if (SCARG(uap, pid) != 0) { 365 /* Find the process */ 366 mutex_enter(proc_lock); 367 p = p_find(SCARG(uap, pid), PFIND_LOCKED); 368 if (p == NULL) { 369 mutex_exit(proc_lock); 370 error = ESRCH; 371 goto out; 372 } 373 mutex_enter(p->p_lock); 374 mutex_exit(proc_lock); 375 /* Disallow modification of system processes. */ 376 if ((p->p_flag & PK_SYSTEM) != 0) { 377 mutex_exit(p->p_lock); 378 error = EPERM; 379 goto out; 380 } 381 } else { 382 /* Use the calling process */ 383 p = l->l_proc; 384 mutex_enter(p->p_lock); 385 } 386 387 /* 388 * Check the permission. 389 */ 390 error = kauth_authorize_process(l->l_cred, 391 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL); 392 if (error != 0) { 393 mutex_exit(p->p_lock); 394 goto out; 395 } 396 397 /* Find the LWP(s) */ 398 lcnt = 0; 399 lid = SCARG(uap, lid); 400 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 401 if (lid && lid != t->l_lid) 402 continue; 403 lwp_lock(t); 404 if (cpuset) { 405 /* Set the affinity flag and new CPU set */ 406 t->l_flag |= LW_AFFINITY; 407 kcpuset_use(cpuset); 408 if (t->l_affinity != NULL) 409 kcpuset_unuse(t->l_affinity, &cpulst); 410 t->l_affinity = cpuset; 411 /* Migrate to another CPU, unlocks LWP */ 412 lwp_migrate(t, ci); 413 } else { 414 /* Unset the affinity flag */ 415 t->l_flag &= ~LW_AFFINITY; 416 if (t->l_affinity != NULL) 417 kcpuset_unuse(t->l_affinity, &cpulst); 418 t->l_affinity = NULL; 419 lwp_unlock(t); 420 } 421 lcnt++; 422 } 423 mutex_exit(p->p_lock); 424 if (lcnt == 0) 425 error = ESRCH; 426 out: 427 if (cpuset != NULL) 428 kcpuset_unuse(cpuset, &cpulst); 429 cpuset_destroy(cpulst); 430 return error; 431 } 432 433 /* 434 * Get affinity. 435 */ 436 int 437 sys__sched_getaffinity(struct lwp *l, 438 const struct sys__sched_getaffinity_args *uap, register_t *retval) 439 { 440 /* { 441 syscallarg(pid_t) pid; 442 syscallarg(lwpid_t) lid; 443 syscallarg(size_t) size; 444 syscallarg(cpuset_t *) cpuset; 445 } */ 446 struct lwp *t; 447 cpuset_t *cpuset; 448 int error; 449 450 if ((error = gencpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size)))) 451 return error; 452 453 /* Locks the LWP */ 454 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid)); 455 if (t == NULL) { 456 error = ESRCH; 457 goto out; 458 } 459 /* Check the permission */ 460 if (kauth_authorize_process(l->l_cred, 461 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) { 462 mutex_exit(t->l_proc->p_lock); 463 error = EPERM; 464 goto out; 465 } 466 lwp_lock(t); 467 if (t->l_flag & LW_AFFINITY) { 468 KASSERT(t->l_affinity != NULL); 469 kcpuset_copy(cpuset, t->l_affinity); 470 } else 471 cpuset_zero(cpuset); 472 lwp_unlock(t); 473 mutex_exit(t->l_proc->p_lock); 474 475 error = copyout(cpuset, SCARG(uap, cpuset), cpuset_size(cpuset)); 476 out: 477 kcpuset_unuse(cpuset, NULL); 478 return error; 479 } 480 481 /* 482 * Yield. 483 */ 484 int 485 sys_sched_yield(struct lwp *l, const void *v, register_t *retval) 486 { 487 488 yield(); 489 return 0; 490 } 491 492 /* 493 * Sysctl nodes and initialization. 494 */ 495 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup") 496 { 497 const struct sysctlnode *node = NULL; 498 499 sysctl_createv(clog, 0, NULL, NULL, 500 CTLFLAG_PERMANENT, 501 CTLTYPE_NODE, "kern", NULL, 502 NULL, 0, NULL, 0, 503 CTL_KERN, CTL_EOL); 504 sysctl_createv(clog, 0, NULL, NULL, 505 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 506 CTLTYPE_INT, "posix_sched", 507 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 508 "Process Scheduling option to which the " 509 "system attempts to conform"), 510 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0, 511 CTL_KERN, CTL_CREATE, CTL_EOL); 512 sysctl_createv(clog, 0, NULL, &node, 513 CTLFLAG_PERMANENT, 514 CTLTYPE_NODE, "sched", 515 SYSCTL_DESCR("Scheduler options"), 516 NULL, 0, NULL, 0, 517 CTL_KERN, CTL_CREATE, CTL_EOL); 518 519 if (node == NULL) 520 return; 521 522 sysctl_createv(clog, 0, &node, NULL, 523 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 524 CTLTYPE_INT, "pri_min", 525 SYSCTL_DESCR("Minimal POSIX real-time priority"), 526 NULL, SCHED_PRI_MIN, NULL, 0, 527 CTL_CREATE, CTL_EOL); 528 sysctl_createv(clog, 0, &node, NULL, 529 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 530 CTLTYPE_INT, "pri_max", 531 SYSCTL_DESCR("Maximal POSIX real-time priority"), 532 NULL, SCHED_PRI_MAX, NULL, 0, 533 CTL_CREATE, CTL_EOL); 534 } 535