1 /* $NetBSD: sys_sched.c,v 1.11 2008/02/16 16:39:34 elad Exp $ */ 2 3 /* 4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * TODO: 31 * - Handle pthread_setschedprio() as defined by POSIX; 32 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX; 33 */ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.11 2008/02/16 16:39:34 elad Exp $"); 37 38 #include <sys/param.h> 39 40 #include <sys/cpu.h> 41 #include <sys/kauth.h> 42 #include <sys/kmem.h> 43 #include <sys/lwp.h> 44 #include <sys/mutex.h> 45 #include <sys/proc.h> 46 #include <sys/pset.h> 47 #include <sys/sched.h> 48 #include <sys/syscallargs.h> 49 #include <sys/sysctl.h> 50 #include <sys/systm.h> 51 #include <sys/types.h> 52 #include <sys/unistd.h> 53 54 /* 55 * Convert user priority or the in-kernel priority or convert the current 56 * priority to the appropriate range according to the policy change. 57 */ 58 static pri_t 59 convert_pri(lwp_t *l, int policy, pri_t pri) 60 { 61 int delta = 0; 62 63 if (policy == SCHED_NONE) 64 policy = l->l_class; 65 66 switch (policy) { 67 case SCHED_OTHER: 68 delta = PRI_USER; 69 break; 70 case SCHED_FIFO: 71 case SCHED_RR: 72 delta = PRI_USER_RT; 73 break; 74 default: 75 panic("upri_to_kpri"); 76 } 77 78 if (pri != PRI_NONE) { 79 /* Convert user priority to the in-kernel */ 80 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX); 81 return pri + delta; 82 } 83 if (l->l_class == policy) 84 return l->l_priority; 85 86 /* Change the current priority to the appropriate range */ 87 if (l->l_class == SCHED_OTHER) { 88 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR); 89 return l->l_priority + delta; 90 } 91 if (policy == SCHED_OTHER) { 92 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR); 93 return l->l_priority - delta; 94 } 95 KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER); 96 return l->l_class; 97 } 98 99 /* 100 * Set scheduling parameters. 101 */ 102 int 103 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap, 104 register_t *retval) 105 { 106 /* { 107 syscallarg(pid_t) pid; 108 syscallarg(lwpid_t) lid; 109 syscallarg(int) policy; 110 syscallarg(const struct sched_param *) params; 111 } */ 112 struct sched_param param; 113 struct proc *p; 114 struct lwp *t; 115 lwpid_t lid; 116 u_int lcnt; 117 int policy; 118 pri_t pri; 119 int error; 120 121 /* Get the parameters from the user-space */ 122 error = copyin(SCARG(uap, params), ¶m, sizeof(param)); 123 if (error) { 124 return error; 125 } 126 pri = param.sched_priority; 127 policy = SCARG(uap, policy); 128 129 /* If no parameters specified, just return (this should not happen) */ 130 if (pri == PRI_NONE && policy == SCHED_NONE) 131 return 0; 132 133 /* Validate scheduling class */ 134 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR)) 135 return EINVAL; 136 137 /* Validate priority */ 138 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX)) 139 return EINVAL; 140 141 if (SCARG(uap, pid) != 0) { 142 /* Find the process */ 143 p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL); 144 if (p == NULL) 145 return ESRCH; 146 mutex_enter(&p->p_smutex); 147 mutex_exit(&proclist_lock); 148 /* Disallow modification of system processes */ 149 if (p->p_flag & PK_SYSTEM) { 150 mutex_exit(&p->p_smutex); 151 return EPERM; 152 } 153 } else { 154 /* Use the calling process */ 155 p = l->l_proc; 156 mutex_enter(&p->p_smutex); 157 } 158 159 /* Check the permission */ 160 if (kauth_authorize_process(l->l_cred, 161 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, NULL, NULL, NULL)) { 162 mutex_exit(&p->p_smutex); 163 return EPERM; 164 } 165 166 /* Find the LWP(s) */ 167 lcnt = 0; 168 lid = SCARG(uap, lid); 169 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 170 pri_t kpri; 171 172 if (lid && lid != t->l_lid) 173 continue; 174 KASSERT(pri != PRI_NONE || policy != SCHED_NONE); 175 lwp_lock(t); 176 177 /* 178 * Note that, priority may need to be changed to get into 179 * the correct priority range of the new scheduling class. 180 */ 181 kpri = convert_pri(t, policy, pri); 182 183 /* Set the scheduling class */ 184 if (policy != SCHED_NONE) 185 t->l_class = policy; 186 187 /* Change the priority */ 188 if (t->l_priority != kpri) 189 lwp_changepri(t, kpri); 190 191 lwp_unlock(t); 192 lcnt++; 193 } 194 mutex_exit(&p->p_smutex); 195 return (lcnt == 0) ? ESRCH : error; 196 } 197 198 /* 199 * Get scheduling parameters. 200 */ 201 int 202 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap, 203 register_t *retval) 204 { 205 /* { 206 syscallarg(pid_t) pid; 207 syscallarg(lwpid_t) lid; 208 syscallarg(int *) policy; 209 syscallarg(struct sched_param *) params; 210 } */ 211 struct sched_param param; 212 struct lwp *t; 213 lwpid_t lid; 214 int error, policy; 215 216 /* If not specified, use the first LWP */ 217 lid = SCARG(uap, lid) == 0 ? 1 : SCARG(uap, lid); 218 219 if (SCARG(uap, pid) != 0) { 220 /* Locks the LWP */ 221 t = lwp_find2(SCARG(uap, pid), lid); 222 } else { 223 struct proc *p = l->l_proc; 224 /* Use the calling process */ 225 mutex_enter(&p->p_smutex); 226 t = lwp_find(p, lid); 227 if (t != NULL) 228 lwp_lock(t); 229 mutex_exit(&p->p_smutex); 230 } 231 if (t == NULL) { 232 error = ESRCH; 233 goto error; 234 } 235 236 /* Check the permission */ 237 error = kauth_authorize_process(l->l_cred, 238 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL); 239 if (error != 0) { 240 lwp_unlock(t); 241 goto error; 242 } 243 244 param.sched_priority = t->l_priority; 245 policy = t->l_class; 246 lwp_unlock(t); 247 248 switch (policy) { 249 case SCHED_OTHER: 250 param.sched_priority -= PRI_USER; 251 break; 252 case SCHED_RR: 253 case SCHED_FIFO: 254 param.sched_priority -= PRI_USER_RT; 255 break; 256 } 257 error = copyout(¶m, SCARG(uap, params), sizeof(param)); 258 if (error == 0 && SCARG(uap, policy) != NULL) 259 error = copyout(&policy, SCARG(uap, policy), sizeof(int)); 260 error: 261 return error; 262 } 263 264 /* 265 * Set affinity. 266 */ 267 int 268 sys__sched_setaffinity(struct lwp *l, 269 const struct sys__sched_setaffinity_args *uap, register_t *retval) 270 { 271 /* { 272 syscallarg(pid_t) pid; 273 syscallarg(lwpid_t) lid; 274 syscallarg(size_t) size; 275 syscallarg(void *) cpuset; 276 } */ 277 cpuset_t *cpuset; 278 struct cpu_info *ci = NULL; 279 struct proc *p; 280 struct lwp *t; 281 CPU_INFO_ITERATOR cii; 282 lwpid_t lid; 283 u_int lcnt; 284 int error; 285 286 /* Allocate the CPU set, and get it from userspace */ 287 cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP); 288 error = copyin(SCARG(uap, cpuset), cpuset, 289 min(SCARG(uap, size), sizeof(cpuset_t))); 290 if (error) 291 goto error; 292 293 /* Look for a CPU in the set */ 294 for (CPU_INFO_FOREACH(cii, ci)) 295 if (CPU_ISSET(cpu_index(ci), cpuset)) 296 break; 297 if (ci == NULL) { 298 /* Empty set */ 299 kmem_free(cpuset, sizeof(cpuset_t)); 300 cpuset = NULL; 301 } 302 303 if (SCARG(uap, pid) != 0) { 304 /* Find the process */ 305 p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL); 306 if (p == NULL) { 307 error = ESRCH; 308 goto error; 309 } 310 mutex_enter(&p->p_smutex); 311 mutex_exit(&proclist_lock); 312 } else { 313 /* Use the calling process */ 314 p = l->l_proc; 315 mutex_enter(&p->p_smutex); 316 } 317 318 /* 319 * Check the permission. 320 * Disallow modification of system processes. 321 */ 322 error = kauth_authorize_process(l->l_cred, 323 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL); 324 if (error != 0) { 325 mutex_exit(&p->p_smutex); 326 goto error; 327 } 328 if ((p->p_flag & PK_SYSTEM) != 0) { 329 mutex_exit(&p->p_smutex); 330 error = EPERM; 331 goto error; 332 } 333 334 /* Find the LWP(s) */ 335 lcnt = 0; 336 lid = SCARG(uap, lid); 337 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 338 if (lid && lid != t->l_lid) 339 continue; 340 lwp_lock(t); 341 if (cpuset) { 342 /* Set the affinity flag and new CPU set */ 343 t->l_flag |= LW_AFFINITY; 344 memcpy(&t->l_affinity, cpuset, sizeof(cpuset_t)); 345 /* Migrate to another CPU, unlocks LWP */ 346 lwp_migrate(t, ci); 347 } else { 348 /* Unset the affinity flag */ 349 t->l_flag &= ~LW_AFFINITY; 350 lwp_unlock(t); 351 } 352 lcnt++; 353 } 354 mutex_exit(&p->p_smutex); 355 if (lcnt == 0) 356 error = ESRCH; 357 error: 358 if (cpuset != NULL) 359 kmem_free(cpuset, sizeof(cpuset_t)); 360 return error; 361 } 362 363 /* 364 * Get affinity. 365 */ 366 int 367 sys__sched_getaffinity(struct lwp *l, 368 const struct sys__sched_getaffinity_args *uap, register_t *retval) 369 { 370 /* { 371 syscallarg(pid_t) pid; 372 syscallarg(lwpid_t) lid; 373 syscallarg(size_t) size; 374 syscallarg(void *) cpuset; 375 } */ 376 struct lwp *t; 377 void *cpuset; 378 lwpid_t lid; 379 int error; 380 381 if (SCARG(uap, size) <= 0) 382 return EINVAL; 383 cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP); 384 385 /* If not specified, use the first LWP */ 386 lid = SCARG(uap, lid) == 0 ? 1 : SCARG(uap, lid); 387 388 if (SCARG(uap, pid) != 0) { 389 /* Locks the LWP */ 390 t = lwp_find2(SCARG(uap, pid), lid); 391 } else { 392 struct proc *p = l->l_proc; 393 /* Use the calling process */ 394 mutex_enter(&p->p_smutex); 395 t = lwp_find(p, lid); 396 if (t != NULL) 397 lwp_lock(t); 398 mutex_exit(&p->p_smutex); 399 } 400 if (t == NULL) { 401 kmem_free(cpuset, sizeof(cpuset_t)); 402 return ESRCH; 403 } 404 /* Check the permission */ 405 if (kauth_authorize_process(l->l_cred, 406 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) { 407 lwp_unlock(t); 408 kmem_free(cpuset, sizeof(cpuset_t)); 409 return EPERM; 410 } 411 if (t->l_flag & LW_AFFINITY) 412 memcpy(cpuset, &t->l_affinity, sizeof(cpuset_t)); 413 lwp_unlock(t); 414 415 error = copyout(cpuset, SCARG(uap, cpuset), 416 min(SCARG(uap, size), sizeof(cpuset_t))); 417 418 kmem_free(cpuset, sizeof(cpuset_t)); 419 return error; 420 } 421 422 /* 423 * Yield. 424 */ 425 int 426 sys_sched_yield(struct lwp *l, const void *v, register_t *retval) 427 { 428 429 yield(); 430 return 0; 431 } 432 433 /* 434 * Sysctl nodes and initialization. 435 */ 436 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup") 437 { 438 const struct sysctlnode *node = NULL; 439 440 sysctl_createv(clog, 0, NULL, NULL, 441 CTLFLAG_PERMANENT, 442 CTLTYPE_NODE, "kern", NULL, 443 NULL, 0, NULL, 0, 444 CTL_KERN, CTL_EOL); 445 sysctl_createv(clog, 0, NULL, NULL, 446 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 447 CTLTYPE_INT, "posix_sched", 448 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 449 "Process Scheduling option to which the " 450 "system attempts to conform"), 451 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0, 452 CTL_KERN, CTL_CREATE, CTL_EOL); 453 sysctl_createv(clog, 0, NULL, &node, 454 CTLFLAG_PERMANENT, 455 CTLTYPE_NODE, "sched", 456 SYSCTL_DESCR("Scheduler options"), 457 NULL, 0, NULL, 0, 458 CTL_KERN, CTL_CREATE, CTL_EOL); 459 460 if (node == NULL) 461 return; 462 463 sysctl_createv(clog, 0, &node, NULL, 464 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 465 CTLTYPE_INT, "pri_min", 466 SYSCTL_DESCR("Minimal POSIX real-time priority"), 467 NULL, SCHED_PRI_MIN, NULL, 0, 468 CTL_CREATE, CTL_EOL); 469 sysctl_createv(clog, 0, &node, NULL, 470 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 471 CTLTYPE_INT, "pri_max", 472 SYSCTL_DESCR("Minimal POSIX real-time priority"), 473 NULL, SCHED_PRI_MAX, NULL, 0, 474 CTL_CREATE, CTL_EOL); 475 } 476