1 /* $NetBSD: sys_sched.c,v 1.19 2008/03/05 12:47:13 njoly Exp $ */ 2 3 /* 4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * System calls relating to the scheduler. 31 * 32 * TODO: 33 * - Handle pthread_setschedprio() as defined by POSIX; 34 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX; 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.19 2008/03/05 12:47:13 njoly Exp $"); 39 40 #include <sys/param.h> 41 42 #include <sys/cpu.h> 43 #include <sys/kauth.h> 44 #include <sys/kmem.h> 45 #include <sys/lwp.h> 46 #include <sys/mutex.h> 47 #include <sys/proc.h> 48 #include <sys/pset.h> 49 #include <sys/sched.h> 50 #include <sys/syscallargs.h> 51 #include <sys/sysctl.h> 52 #include <sys/systm.h> 53 #include <sys/types.h> 54 #include <sys/unistd.h> 55 56 /* 57 * Convert user priority or the in-kernel priority or convert the current 58 * priority to the appropriate range according to the policy change. 59 */ 60 static pri_t 61 convert_pri(lwp_t *l, int policy, pri_t pri) 62 { 63 int delta = 0; 64 65 switch (policy) { 66 case SCHED_OTHER: 67 delta = PRI_USER; 68 break; 69 case SCHED_FIFO: 70 case SCHED_RR: 71 delta = PRI_USER_RT; 72 break; 73 default: 74 panic("upri_to_kpri"); 75 } 76 77 if (pri != PRI_NONE) { 78 /* Convert user priority to the in-kernel */ 79 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX); 80 return pri + delta; 81 } 82 if (l->l_class == policy) 83 return l->l_priority; 84 85 /* Change the current priority to the appropriate range */ 86 if (l->l_class == SCHED_OTHER) { 87 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR); 88 return l->l_priority + delta; 89 } 90 if (policy == SCHED_OTHER) { 91 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR); 92 return l->l_priority - delta; 93 } 94 KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER); 95 return l->l_class; 96 } 97 98 int 99 do_sched_setparam(pid_t pid, lwpid_t lid, int policy, 100 const struct sched_param *params) 101 { 102 struct proc *p; 103 struct lwp *t; 104 pri_t pri; 105 u_int lcnt; 106 int error; 107 108 error = 0; 109 110 pri = params->sched_priority; 111 112 /* If no parameters specified, just return (this should not happen) */ 113 if (pri == PRI_NONE && policy == SCHED_NONE) 114 return 0; 115 116 /* Validate scheduling class */ 117 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR)) 118 return EINVAL; 119 120 /* Validate priority */ 121 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX)) 122 return EINVAL; 123 124 if (pid != 0) { 125 /* Find the process */ 126 p = p_find(pid, PFIND_UNLOCK_FAIL); 127 if (p == NULL) 128 return ESRCH; 129 mutex_enter(&p->p_smutex); 130 mutex_exit(&proclist_lock); 131 /* Disallow modification of system processes */ 132 if ((p->p_flag & PK_SYSTEM) != 0) { 133 mutex_exit(&p->p_smutex); 134 return EPERM; 135 } 136 } else { 137 /* Use the calling process */ 138 p = curlwp->l_proc; 139 mutex_enter(&p->p_smutex); 140 } 141 142 /* Find the LWP(s) */ 143 lcnt = 0; 144 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 145 pri_t kpri; 146 int lpolicy; 147 148 if (lid && lid != t->l_lid) 149 continue; 150 lcnt++; 151 KASSERT(pri != PRI_NONE || policy != SCHED_NONE); 152 lwp_lock(t); 153 154 if (policy == SCHED_NONE) 155 lpolicy = t->l_class; 156 else 157 lpolicy = policy; 158 159 /* 160 * Note that, priority may need to be changed to get into 161 * the correct priority range of the new scheduling class. 162 */ 163 kpri = convert_pri(t, lpolicy, pri); 164 165 /* Check the permission */ 166 error = kauth_authorize_process(kauth_cred_get(), 167 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy), 168 KAUTH_ARG(kpri)); 169 if (error) { 170 lwp_unlock(t); 171 break; 172 } 173 174 /* Set the scheduling class */ 175 if (policy != SCHED_NONE) 176 t->l_class = policy; 177 178 /* Change the priority */ 179 if (t->l_priority != kpri) 180 lwp_changepri(t, kpri); 181 182 lwp_unlock(t); 183 } 184 mutex_exit(&p->p_smutex); 185 return (lcnt == 0) ? ESRCH : error; 186 } 187 188 /* 189 * Set scheduling parameters. 190 */ 191 int 192 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap, 193 register_t *retval) 194 { 195 /* { 196 syscallarg(pid_t) pid; 197 syscallarg(lwpid_t) lid; 198 syscallarg(int) policy; 199 syscallarg(const struct sched_param *) params; 200 } */ 201 struct sched_param params; 202 int error; 203 204 /* Get the parameters from the user-space */ 205 error = copyin(SCARG(uap, params), ¶ms, sizeof(params)); 206 if (error) 207 goto out; 208 209 error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid), 210 SCARG(uap, policy), ¶ms); 211 212 out: 213 return (error); 214 } 215 216 int 217 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy, 218 struct sched_param *params) 219 { 220 struct sched_param lparams; 221 struct lwp *t; 222 int error, lpolicy; 223 224 /* Locks the LWP */ 225 t = lwp_find2(pid, lid); 226 if (t == NULL) { 227 error = ESRCH; 228 goto out; 229 } 230 231 /* Check the permission */ 232 error = kauth_authorize_process(kauth_cred_get(), 233 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL); 234 if (error != 0) { 235 lwp_unlock(t); 236 goto out; 237 } 238 239 lparams.sched_priority = t->l_priority; 240 lpolicy = t->l_class; 241 lwp_unlock(t); 242 243 switch (lpolicy) { 244 case SCHED_OTHER: 245 lparams.sched_priority -= PRI_USER; 246 break; 247 case SCHED_RR: 248 case SCHED_FIFO: 249 lparams.sched_priority -= PRI_USER_RT; 250 break; 251 } 252 253 if (policy != NULL) 254 *policy = lpolicy; 255 256 if (params != NULL) 257 *params = lparams; 258 259 out: 260 return error; 261 } 262 263 /* 264 * Get scheduling parameters. 265 */ 266 int 267 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap, 268 register_t *retval) 269 { 270 /* { 271 syscallarg(pid_t) pid; 272 syscallarg(lwpid_t) lid; 273 syscallarg(int *) policy; 274 syscallarg(struct sched_param *) params; 275 } */ 276 struct sched_param params; 277 int error, policy; 278 279 error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy, 280 ¶ms); 281 if (error) 282 goto out; 283 284 error = copyout(¶ms, SCARG(uap, params), sizeof(params)); 285 if (error == 0 && SCARG(uap, policy) != NULL) 286 error = copyout(&policy, SCARG(uap, policy), sizeof(int)); 287 288 out: 289 return (error); 290 } 291 292 /* 293 * Set affinity. 294 */ 295 int 296 sys__sched_setaffinity(struct lwp *l, 297 const struct sys__sched_setaffinity_args *uap, register_t *retval) 298 { 299 /* { 300 syscallarg(pid_t) pid; 301 syscallarg(lwpid_t) lid; 302 syscallarg(size_t) size; 303 syscallarg(void *) cpuset; 304 } */ 305 cpuset_t *cpuset; 306 struct cpu_info *ci = NULL; 307 struct proc *p; 308 struct lwp *t; 309 CPU_INFO_ITERATOR cii; 310 lwpid_t lid; 311 u_int lcnt; 312 int error; 313 314 /* Allocate the CPU set, and get it from userspace */ 315 cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP); 316 error = copyin(SCARG(uap, cpuset), cpuset, 317 min(SCARG(uap, size), sizeof(cpuset_t))); 318 if (error) 319 goto error; 320 321 /* Look for a CPU in the set */ 322 for (CPU_INFO_FOREACH(cii, ci)) 323 if (CPU_ISSET(cpu_index(ci), cpuset)) 324 break; 325 if (ci == NULL) { 326 /* Empty set */ 327 kmem_free(cpuset, sizeof(cpuset_t)); 328 cpuset = NULL; 329 } 330 331 if (SCARG(uap, pid) != 0) { 332 /* Find the process */ 333 p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL); 334 if (p == NULL) { 335 error = ESRCH; 336 goto error; 337 } 338 mutex_enter(&p->p_smutex); 339 mutex_exit(&proclist_lock); 340 /* Disallow modification of system processes. */ 341 if ((p->p_flag & PK_SYSTEM) != 0) { 342 mutex_exit(&p->p_smutex); 343 error = EPERM; 344 goto error; 345 } 346 } else { 347 /* Use the calling process */ 348 p = l->l_proc; 349 mutex_enter(&p->p_smutex); 350 } 351 352 /* 353 * Check the permission. 354 */ 355 error = kauth_authorize_process(l->l_cred, 356 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL); 357 if (error != 0) { 358 mutex_exit(&p->p_smutex); 359 goto error; 360 } 361 362 /* Find the LWP(s) */ 363 lcnt = 0; 364 lid = SCARG(uap, lid); 365 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 366 if (lid && lid != t->l_lid) 367 continue; 368 lwp_lock(t); 369 if (cpuset) { 370 /* Set the affinity flag and new CPU set */ 371 t->l_flag |= LW_AFFINITY; 372 memcpy(&t->l_affinity, cpuset, sizeof(cpuset_t)); 373 /* Migrate to another CPU, unlocks LWP */ 374 lwp_migrate(t, ci); 375 } else { 376 /* Unset the affinity flag */ 377 t->l_flag &= ~LW_AFFINITY; 378 lwp_unlock(t); 379 } 380 lcnt++; 381 } 382 mutex_exit(&p->p_smutex); 383 if (lcnt == 0) 384 error = ESRCH; 385 error: 386 if (cpuset != NULL) 387 kmem_free(cpuset, sizeof(cpuset_t)); 388 return error; 389 } 390 391 /* 392 * Get affinity. 393 */ 394 int 395 sys__sched_getaffinity(struct lwp *l, 396 const struct sys__sched_getaffinity_args *uap, register_t *retval) 397 { 398 /* { 399 syscallarg(pid_t) pid; 400 syscallarg(lwpid_t) lid; 401 syscallarg(size_t) size; 402 syscallarg(void *) cpuset; 403 } */ 404 struct lwp *t; 405 void *cpuset; 406 int error; 407 408 if (SCARG(uap, size) <= 0) 409 return EINVAL; 410 cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP); 411 412 /* Locks the LWP */ 413 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid)); 414 if (t == NULL) { 415 kmem_free(cpuset, sizeof(cpuset_t)); 416 return ESRCH; 417 } 418 /* Check the permission */ 419 if (kauth_authorize_process(l->l_cred, 420 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) { 421 lwp_unlock(t); 422 kmem_free(cpuset, sizeof(cpuset_t)); 423 return EPERM; 424 } 425 if (t->l_flag & LW_AFFINITY) 426 memcpy(cpuset, &t->l_affinity, sizeof(cpuset_t)); 427 lwp_unlock(t); 428 429 error = copyout(cpuset, SCARG(uap, cpuset), 430 min(SCARG(uap, size), sizeof(cpuset_t))); 431 432 kmem_free(cpuset, sizeof(cpuset_t)); 433 return error; 434 } 435 436 /* 437 * Yield. 438 */ 439 int 440 sys_sched_yield(struct lwp *l, const void *v, register_t *retval) 441 { 442 443 yield(); 444 return 0; 445 } 446 447 /* 448 * Sysctl nodes and initialization. 449 */ 450 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup") 451 { 452 const struct sysctlnode *node = NULL; 453 454 sysctl_createv(clog, 0, NULL, NULL, 455 CTLFLAG_PERMANENT, 456 CTLTYPE_NODE, "kern", NULL, 457 NULL, 0, NULL, 0, 458 CTL_KERN, CTL_EOL); 459 sysctl_createv(clog, 0, NULL, NULL, 460 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 461 CTLTYPE_INT, "posix_sched", 462 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 463 "Process Scheduling option to which the " 464 "system attempts to conform"), 465 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0, 466 CTL_KERN, CTL_CREATE, CTL_EOL); 467 sysctl_createv(clog, 0, NULL, &node, 468 CTLFLAG_PERMANENT, 469 CTLTYPE_NODE, "sched", 470 SYSCTL_DESCR("Scheduler options"), 471 NULL, 0, NULL, 0, 472 CTL_KERN, CTL_CREATE, CTL_EOL); 473 474 if (node == NULL) 475 return; 476 477 sysctl_createv(clog, 0, &node, NULL, 478 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 479 CTLTYPE_INT, "pri_min", 480 SYSCTL_DESCR("Minimal POSIX real-time priority"), 481 NULL, SCHED_PRI_MIN, NULL, 0, 482 CTL_CREATE, CTL_EOL); 483 sysctl_createv(clog, 0, &node, NULL, 484 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 485 CTLTYPE_INT, "pri_max", 486 SYSCTL_DESCR("Maximal POSIX real-time priority"), 487 NULL, SCHED_PRI_MAX, NULL, 0, 488 CTL_CREATE, CTL_EOL); 489 } 490