1 /* $NetBSD: sys_sched.c,v 1.15 2008/02/19 19:38:18 drochner Exp $ */ 2 3 /* 4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * TODO: 31 * - Handle pthread_setschedprio() as defined by POSIX; 32 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX; 33 */ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.15 2008/02/19 19:38:18 drochner Exp $"); 37 38 #include <sys/param.h> 39 40 #include <sys/cpu.h> 41 #include <sys/kauth.h> 42 #include <sys/kmem.h> 43 #include <sys/lwp.h> 44 #include <sys/mutex.h> 45 #include <sys/proc.h> 46 #include <sys/pset.h> 47 #include <sys/sched.h> 48 #include <sys/syscallargs.h> 49 #include <sys/sysctl.h> 50 #include <sys/systm.h> 51 #include <sys/types.h> 52 #include <sys/unistd.h> 53 54 /* 55 * Convert user priority or the in-kernel priority or convert the current 56 * priority to the appropriate range according to the policy change. 57 */ 58 static pri_t 59 convert_pri(lwp_t *l, int policy, pri_t pri) 60 { 61 int delta = 0; 62 63 switch (policy) { 64 case SCHED_OTHER: 65 delta = PRI_USER; 66 break; 67 case SCHED_FIFO: 68 case SCHED_RR: 69 delta = PRI_USER_RT; 70 break; 71 default: 72 panic("upri_to_kpri"); 73 } 74 75 if (pri != PRI_NONE) { 76 /* Convert user priority to the in-kernel */ 77 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX); 78 return pri + delta; 79 } 80 if (l->l_class == policy) 81 return l->l_priority; 82 83 /* Change the current priority to the appropriate range */ 84 if (l->l_class == SCHED_OTHER) { 85 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR); 86 return l->l_priority + delta; 87 } 88 if (policy == SCHED_OTHER) { 89 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR); 90 return l->l_priority - delta; 91 } 92 KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER); 93 return l->l_class; 94 } 95 96 /* 97 * Set scheduling parameters. 98 */ 99 int 100 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap, 101 register_t *retval) 102 { 103 /* { 104 syscallarg(pid_t) pid; 105 syscallarg(lwpid_t) lid; 106 syscallarg(int) policy; 107 syscallarg(const struct sched_param *) params; 108 } */ 109 struct sched_param param; 110 struct proc *p; 111 struct lwp *t; 112 lwpid_t lid; 113 u_int lcnt; 114 int policy; 115 pri_t pri; 116 int error; 117 118 /* Get the parameters from the user-space */ 119 error = copyin(SCARG(uap, params), ¶m, sizeof(param)); 120 if (error) { 121 return error; 122 } 123 pri = param.sched_priority; 124 policy = SCARG(uap, policy); 125 126 /* If no parameters specified, just return (this should not happen) */ 127 if (pri == PRI_NONE && policy == SCHED_NONE) 128 return 0; 129 130 /* Validate scheduling class */ 131 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR)) 132 return EINVAL; 133 134 /* Validate priority */ 135 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX)) 136 return EINVAL; 137 138 if (SCARG(uap, pid) != 0) { 139 /* Find the process */ 140 p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL); 141 if (p == NULL) 142 return ESRCH; 143 mutex_enter(&p->p_smutex); 144 mutex_exit(&proclist_lock); 145 /* Disallow modification of system processes */ 146 if (p->p_flag & PK_SYSTEM) { 147 mutex_exit(&p->p_smutex); 148 return EPERM; 149 } 150 } else { 151 /* Use the calling process */ 152 p = l->l_proc; 153 mutex_enter(&p->p_smutex); 154 } 155 156 /* Find the LWP(s) */ 157 lcnt = 0; 158 lid = SCARG(uap, lid); 159 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 160 pri_t kpri; 161 int lpolicy; 162 163 if (lid && lid != t->l_lid) 164 continue; 165 lcnt++; 166 KASSERT(pri != PRI_NONE || policy != SCHED_NONE); 167 lwp_lock(t); 168 169 if (policy == SCHED_NONE) 170 lpolicy = t->l_class; 171 else 172 lpolicy = policy; 173 174 /* 175 * Note that, priority may need to be changed to get into 176 * the correct priority range of the new scheduling class. 177 */ 178 kpri = convert_pri(t, lpolicy, pri); 179 180 /* Check the permission */ 181 error = kauth_authorize_process(l->l_cred, 182 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy), 183 KAUTH_ARG(kpri)); 184 if (error) { 185 lwp_unlock(t); 186 break; 187 } 188 189 /* Set the scheduling class */ 190 if (policy != SCHED_NONE) 191 t->l_class = policy; 192 193 /* Change the priority */ 194 if (t->l_priority != kpri) 195 lwp_changepri(t, kpri); 196 197 lwp_unlock(t); 198 } 199 mutex_exit(&p->p_smutex); 200 return (lcnt == 0) ? ESRCH : error; 201 } 202 203 /* 204 * Get scheduling parameters. 205 */ 206 int 207 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap, 208 register_t *retval) 209 { 210 /* { 211 syscallarg(pid_t) pid; 212 syscallarg(lwpid_t) lid; 213 syscallarg(int *) policy; 214 syscallarg(struct sched_param *) params; 215 } */ 216 struct sched_param param; 217 struct lwp *t; 218 lwpid_t lid; 219 int error, policy; 220 221 /* If not specified, use the first LWP */ 222 lid = SCARG(uap, lid) == 0 ? 1 : SCARG(uap, lid); 223 224 if (SCARG(uap, pid) != 0) { 225 /* Locks the LWP */ 226 t = lwp_find2(SCARG(uap, pid), lid); 227 } else { 228 struct proc *p = l->l_proc; 229 /* Use the calling process */ 230 mutex_enter(&p->p_smutex); 231 t = lwp_find(p, lid); 232 if (t != NULL) 233 lwp_lock(t); 234 mutex_exit(&p->p_smutex); 235 } 236 if (t == NULL) { 237 error = ESRCH; 238 goto error; 239 } 240 241 /* Check the permission */ 242 error = kauth_authorize_process(l->l_cred, 243 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL); 244 if (error != 0) { 245 lwp_unlock(t); 246 goto error; 247 } 248 249 param.sched_priority = t->l_priority; 250 policy = t->l_class; 251 lwp_unlock(t); 252 253 switch (policy) { 254 case SCHED_OTHER: 255 param.sched_priority -= PRI_USER; 256 break; 257 case SCHED_RR: 258 case SCHED_FIFO: 259 param.sched_priority -= PRI_USER_RT; 260 break; 261 } 262 error = copyout(¶m, SCARG(uap, params), sizeof(param)); 263 if (error == 0 && SCARG(uap, policy) != NULL) 264 error = copyout(&policy, SCARG(uap, policy), sizeof(int)); 265 error: 266 return error; 267 } 268 269 /* 270 * Set affinity. 271 */ 272 int 273 sys__sched_setaffinity(struct lwp *l, 274 const struct sys__sched_setaffinity_args *uap, register_t *retval) 275 { 276 /* { 277 syscallarg(pid_t) pid; 278 syscallarg(lwpid_t) lid; 279 syscallarg(size_t) size; 280 syscallarg(void *) cpuset; 281 } */ 282 cpuset_t *cpuset; 283 struct cpu_info *ci = NULL; 284 struct proc *p; 285 struct lwp *t; 286 CPU_INFO_ITERATOR cii; 287 lwpid_t lid; 288 u_int lcnt; 289 int error; 290 291 /* Allocate the CPU set, and get it from userspace */ 292 cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP); 293 error = copyin(SCARG(uap, cpuset), cpuset, 294 min(SCARG(uap, size), sizeof(cpuset_t))); 295 if (error) 296 goto error; 297 298 /* Look for a CPU in the set */ 299 for (CPU_INFO_FOREACH(cii, ci)) 300 if (CPU_ISSET(cpu_index(ci), cpuset)) 301 break; 302 if (ci == NULL) { 303 /* Empty set */ 304 kmem_free(cpuset, sizeof(cpuset_t)); 305 cpuset = NULL; 306 } 307 308 if (SCARG(uap, pid) != 0) { 309 /* Find the process */ 310 p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL); 311 if (p == NULL) { 312 error = ESRCH; 313 goto error; 314 } 315 mutex_enter(&p->p_smutex); 316 mutex_exit(&proclist_lock); 317 } else { 318 /* Use the calling process */ 319 p = l->l_proc; 320 mutex_enter(&p->p_smutex); 321 } 322 323 /* 324 * Check the permission. 325 * Disallow modification of system processes. 326 */ 327 error = kauth_authorize_process(l->l_cred, 328 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL); 329 if (error != 0) { 330 mutex_exit(&p->p_smutex); 331 goto error; 332 } 333 if ((p->p_flag & PK_SYSTEM) != 0) { 334 mutex_exit(&p->p_smutex); 335 error = EPERM; 336 goto error; 337 } 338 339 /* Find the LWP(s) */ 340 lcnt = 0; 341 lid = SCARG(uap, lid); 342 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 343 if (lid && lid != t->l_lid) 344 continue; 345 lwp_lock(t); 346 if (cpuset) { 347 /* Set the affinity flag and new CPU set */ 348 t->l_flag |= LW_AFFINITY; 349 memcpy(&t->l_affinity, cpuset, sizeof(cpuset_t)); 350 /* Migrate to another CPU, unlocks LWP */ 351 lwp_migrate(t, ci); 352 } else { 353 /* Unset the affinity flag */ 354 t->l_flag &= ~LW_AFFINITY; 355 lwp_unlock(t); 356 } 357 lcnt++; 358 } 359 mutex_exit(&p->p_smutex); 360 if (lcnt == 0) 361 error = ESRCH; 362 error: 363 if (cpuset != NULL) 364 kmem_free(cpuset, sizeof(cpuset_t)); 365 return error; 366 } 367 368 /* 369 * Get affinity. 370 */ 371 int 372 sys__sched_getaffinity(struct lwp *l, 373 const struct sys__sched_getaffinity_args *uap, register_t *retval) 374 { 375 /* { 376 syscallarg(pid_t) pid; 377 syscallarg(lwpid_t) lid; 378 syscallarg(size_t) size; 379 syscallarg(void *) cpuset; 380 } */ 381 struct lwp *t; 382 void *cpuset; 383 lwpid_t lid; 384 int error; 385 386 if (SCARG(uap, size) <= 0) 387 return EINVAL; 388 cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP); 389 390 /* If not specified, use the first LWP */ 391 lid = SCARG(uap, lid) == 0 ? 1 : SCARG(uap, lid); 392 393 if (SCARG(uap, pid) != 0) { 394 /* Locks the LWP */ 395 t = lwp_find2(SCARG(uap, pid), lid); 396 } else { 397 struct proc *p = l->l_proc; 398 /* Use the calling process */ 399 mutex_enter(&p->p_smutex); 400 t = lwp_find(p, lid); 401 if (t != NULL) 402 lwp_lock(t); 403 mutex_exit(&p->p_smutex); 404 } 405 if (t == NULL) { 406 kmem_free(cpuset, sizeof(cpuset_t)); 407 return ESRCH; 408 } 409 /* Check the permission */ 410 if (kauth_authorize_process(l->l_cred, 411 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) { 412 lwp_unlock(t); 413 kmem_free(cpuset, sizeof(cpuset_t)); 414 return EPERM; 415 } 416 if (t->l_flag & LW_AFFINITY) 417 memcpy(cpuset, &t->l_affinity, sizeof(cpuset_t)); 418 lwp_unlock(t); 419 420 error = copyout(cpuset, SCARG(uap, cpuset), 421 min(SCARG(uap, size), sizeof(cpuset_t))); 422 423 kmem_free(cpuset, sizeof(cpuset_t)); 424 return error; 425 } 426 427 /* 428 * Yield. 429 */ 430 int 431 sys_sched_yield(struct lwp *l, const void *v, register_t *retval) 432 { 433 434 yield(); 435 return 0; 436 } 437 438 /* 439 * Sysctl nodes and initialization. 440 */ 441 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup") 442 { 443 const struct sysctlnode *node = NULL; 444 445 sysctl_createv(clog, 0, NULL, NULL, 446 CTLFLAG_PERMANENT, 447 CTLTYPE_NODE, "kern", NULL, 448 NULL, 0, NULL, 0, 449 CTL_KERN, CTL_EOL); 450 sysctl_createv(clog, 0, NULL, NULL, 451 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 452 CTLTYPE_INT, "posix_sched", 453 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 454 "Process Scheduling option to which the " 455 "system attempts to conform"), 456 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0, 457 CTL_KERN, CTL_CREATE, CTL_EOL); 458 sysctl_createv(clog, 0, NULL, &node, 459 CTLFLAG_PERMANENT, 460 CTLTYPE_NODE, "sched", 461 SYSCTL_DESCR("Scheduler options"), 462 NULL, 0, NULL, 0, 463 CTL_KERN, CTL_CREATE, CTL_EOL); 464 465 if (node == NULL) 466 return; 467 468 sysctl_createv(clog, 0, &node, NULL, 469 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 470 CTLTYPE_INT, "pri_min", 471 SYSCTL_DESCR("Minimal POSIX real-time priority"), 472 NULL, SCHED_PRI_MIN, NULL, 0, 473 CTL_CREATE, CTL_EOL); 474 sysctl_createv(clog, 0, &node, NULL, 475 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 476 CTLTYPE_INT, "pri_max", 477 SYSCTL_DESCR("Minimal POSIX real-time priority"), 478 NULL, SCHED_PRI_MAX, NULL, 0, 479 CTL_CREATE, CTL_EOL); 480 } 481