xref: /netbsd-src/sys/kern/sys_sched.c (revision fff57c5525bbe431aee7bdb3983954f0627a42cb)
1 /*	$NetBSD: sys_sched.c,v 1.25 2008/06/16 01:41:20 rmind Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * System calls relating to the scheduler.
31  *
32  * TODO:
33  *  - Handle pthread_setschedprio() as defined by POSIX;
34  *  - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.25 2008/06/16 01:41:20 rmind Exp $");
39 
40 #include <sys/param.h>
41 
42 #include <sys/cpu.h>
43 #include <sys/kauth.h>
44 #include <sys/kmem.h>
45 #include <sys/lwp.h>
46 #include <sys/mutex.h>
47 #include <sys/proc.h>
48 #include <sys/pset.h>
49 #include <sys/sched.h>
50 #include <sys/syscallargs.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/unistd.h>
55 
56 /*
57  * Convert user priority or the in-kernel priority or convert the current
58  * priority to the appropriate range according to the policy change.
59  */
60 static pri_t
61 convert_pri(lwp_t *l, int policy, pri_t pri)
62 {
63 	int delta = 0;
64 
65 	switch (policy) {
66 	case SCHED_OTHER:
67 		delta = PRI_USER;
68 		break;
69 	case SCHED_FIFO:
70 	case SCHED_RR:
71 		delta = PRI_USER_RT;
72 		break;
73 	default:
74 		panic("upri_to_kpri");
75 	}
76 
77 	if (pri != PRI_NONE) {
78 		/* Convert user priority to the in-kernel */
79 		KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
80 		return pri + delta;
81 	}
82 	if (l->l_class == policy)
83 		return l->l_priority;
84 
85 	/* Change the current priority to the appropriate range */
86 	if (l->l_class == SCHED_OTHER) {
87 		KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
88 		return delta;
89 	}
90 	if (policy == SCHED_OTHER) {
91 		KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
92 		return l->l_priority - delta;
93 	}
94 	KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER);
95 	return l->l_class;
96 }
97 
98 int
99 do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
100     const struct sched_param *params)
101 {
102 	struct proc *p;
103 	struct lwp *t;
104 	pri_t pri;
105 	u_int lcnt;
106 	int error;
107 
108 	error = 0;
109 
110 	pri = params->sched_priority;
111 
112 	/* If no parameters specified, just return (this should not happen) */
113 	if (pri == PRI_NONE && policy == SCHED_NONE)
114 		return 0;
115 
116 	/* Validate scheduling class */
117 	if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
118 		return EINVAL;
119 
120 	/* Validate priority */
121 	if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
122 		return EINVAL;
123 
124 	if (pid != 0) {
125 		/* Find the process */
126 		mutex_enter(proc_lock);
127 		p = p_find(pid, PFIND_LOCKED);
128 		if (p == NULL) {
129 			mutex_exit(proc_lock);
130 			return ESRCH;
131 		}
132 		mutex_enter(p->p_lock);
133 		mutex_exit(proc_lock);
134 		/* Disallow modification of system processes */
135 		if ((p->p_flag & PK_SYSTEM) != 0) {
136 			mutex_exit(p->p_lock);
137 			return EPERM;
138 		}
139 	} else {
140 		/* Use the calling process */
141 		p = curlwp->l_proc;
142 		mutex_enter(p->p_lock);
143 	}
144 
145 	/* Find the LWP(s) */
146 	lcnt = 0;
147 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
148 		pri_t kpri;
149 		int lpolicy;
150 
151 		if (lid && lid != t->l_lid)
152 			continue;
153 		lcnt++;
154 		KASSERT(pri != PRI_NONE || policy != SCHED_NONE);
155 		lwp_lock(t);
156 
157 		if (policy == SCHED_NONE)
158 			lpolicy = t->l_class;
159 		else
160 			lpolicy = policy;
161 
162 		/*
163 		 * Note that, priority may need to be changed to get into
164 		 * the correct priority range of the new scheduling class.
165 		 */
166 		kpri = convert_pri(t, lpolicy, pri);
167 
168 		/* Check the permission */
169 		error = kauth_authorize_process(kauth_cred_get(),
170 		    KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
171 		    KAUTH_ARG(kpri));
172 		if (error) {
173 			lwp_unlock(t);
174 			break;
175 		}
176 
177 		/* Set the scheduling class */
178 		if (policy != SCHED_NONE)
179 			t->l_class = policy;
180 
181 		/* Change the priority */
182 		if (t->l_priority != kpri)
183 			lwp_changepri(t, kpri);
184 
185 		lwp_unlock(t);
186 	}
187 	mutex_exit(p->p_lock);
188 	return (lcnt == 0) ? ESRCH : error;
189 }
190 
191 /*
192  * Set scheduling parameters.
193  */
194 int
195 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
196     register_t *retval)
197 {
198 	/* {
199 		syscallarg(pid_t) pid;
200 		syscallarg(lwpid_t) lid;
201 		syscallarg(int) policy;
202 		syscallarg(const struct sched_param *) params;
203 	} */
204 	struct sched_param params;
205 	int error;
206 
207 	/* Get the parameters from the user-space */
208 	error = copyin(SCARG(uap, params), &params, sizeof(params));
209 	if (error)
210 		goto out;
211 
212 	error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
213 	    SCARG(uap, policy), &params);
214 
215  out:
216 	return (error);
217 }
218 
219 int
220 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
221     struct sched_param *params)
222 {
223 	struct sched_param lparams;
224 	struct lwp *t;
225 	int error, lpolicy;
226 
227 	/* Locks the LWP */
228 	t = lwp_find2(pid, lid);
229 	if (t == NULL)
230 		return ESRCH;
231 
232 	/* Check the permission */
233 	error = kauth_authorize_process(kauth_cred_get(),
234 	    KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
235 	if (error != 0) {
236 		mutex_exit(t->l_proc->p_lock);
237 		return error;
238 	}
239 
240 	lwp_lock(t);
241 	lparams.sched_priority = t->l_priority;
242 	lpolicy = t->l_class;
243 
244 	switch (lpolicy) {
245 	case SCHED_OTHER:
246 		lparams.sched_priority -= PRI_USER;
247 		break;
248 	case SCHED_RR:
249 	case SCHED_FIFO:
250 		lparams.sched_priority -= PRI_USER_RT;
251 		break;
252 	}
253 
254 	if (policy != NULL)
255 		*policy = lpolicy;
256 
257 	if (params != NULL)
258 		*params = lparams;
259 
260 	lwp_unlock(t);
261 	mutex_exit(t->l_proc->p_lock);
262 	return error;
263 }
264 
265 /*
266  * Get scheduling parameters.
267  */
268 int
269 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
270     register_t *retval)
271 {
272 	/* {
273 		syscallarg(pid_t) pid;
274 		syscallarg(lwpid_t) lid;
275 		syscallarg(int *) policy;
276 		syscallarg(struct sched_param *) params;
277 	} */
278 	struct sched_param params;
279 	int error, policy;
280 
281 	error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
282 	    &params);
283 	if (error)
284 		goto out;
285 
286 	error = copyout(&params, SCARG(uap, params), sizeof(params));
287 	if (error == 0 && SCARG(uap, policy) != NULL)
288 		error = copyout(&policy, SCARG(uap, policy), sizeof(int));
289 
290  out:
291 	return (error);
292 }
293 
294 /* Allocate the CPU set, and get it from userspace */
295 static int
296 gencpuset(cpuset_t **dset, const cpuset_t *sset, size_t size)
297 {
298 	int error;
299 
300 	*dset = cpuset_create();
301 	if (size != cpuset_size(*dset)) {
302 		error = EINVAL;
303 		goto out;
304 	}
305 
306 	error = copyin(sset, *dset, size);
307 	if (error)
308 		goto out;
309 
310 	if (kcpuset_nused(*dset) != 1) {
311 		error = EINVAL;
312 		goto out;
313 	}
314 
315 	return 0;
316 out:
317 	kcpuset_unuse(*dset, NULL);
318 	return error;
319 }
320 
321 /*
322  * Set affinity.
323  */
324 int
325 sys__sched_setaffinity(struct lwp *l,
326     const struct sys__sched_setaffinity_args *uap, register_t *retval)
327 {
328 	/* {
329 		syscallarg(pid_t) pid;
330 		syscallarg(lwpid_t) lid;
331 		syscallarg(size_t) size;
332 		syscallarg(const cpuset_t *) cpuset;
333 	} */
334 	cpuset_t *cpuset, *cpulst = NULL;
335 	struct cpu_info *ci = NULL;
336 	struct proc *p;
337 	struct lwp *t;
338 	CPU_INFO_ITERATOR cii;
339 	lwpid_t lid;
340 	u_int lcnt;
341 	int error;
342 
343 	if ((error = gencpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size))))
344 		return error;
345 
346 	/* Look for a CPU in the set */
347 	for (CPU_INFO_FOREACH(cii, ci)) {
348 		error = cpuset_isset(cpu_index(ci), cpuset);
349 		if (error) {
350 			if (error == -1) {
351 				error = E2BIG;
352 				goto out;
353 			}
354 			break;
355 		}
356 	}
357 
358 	if (ci == NULL) {
359 		/* Empty set */
360 		kcpuset_unuse(cpuset, NULL);
361 		cpuset = NULL;
362 	}
363 
364 	if (SCARG(uap, pid) != 0) {
365 		/* Find the process */
366 		mutex_enter(proc_lock);
367 		p = p_find(SCARG(uap, pid), PFIND_LOCKED);
368 		if (p == NULL) {
369 			mutex_exit(proc_lock);
370 			error = ESRCH;
371 			goto out;
372 		}
373 		mutex_enter(p->p_lock);
374 		mutex_exit(proc_lock);
375 		/* Disallow modification of system processes. */
376 		if ((p->p_flag & PK_SYSTEM) != 0) {
377 			mutex_exit(p->p_lock);
378 			error = EPERM;
379 			goto out;
380 		}
381 	} else {
382 		/* Use the calling process */
383 		p = l->l_proc;
384 		mutex_enter(p->p_lock);
385 	}
386 
387 	/*
388 	 * Check the permission.
389 	 */
390 	error = kauth_authorize_process(l->l_cred,
391 	    KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
392 	if (error != 0) {
393 		mutex_exit(p->p_lock);
394 		goto out;
395 	}
396 
397 	/* Find the LWP(s) */
398 	lcnt = 0;
399 	lid = SCARG(uap, lid);
400 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
401 		if (lid && lid != t->l_lid)
402 			continue;
403 		lwp_lock(t);
404 		if (cpuset) {
405 			/* Set the affinity flag and new CPU set */
406 			t->l_flag |= LW_AFFINITY;
407 			kcpuset_use(cpuset);
408 			if (t->l_affinity != NULL)
409 				kcpuset_unuse(t->l_affinity, &cpulst);
410 			t->l_affinity = cpuset;
411 			/* Migrate to another CPU, unlocks LWP */
412 			lwp_migrate(t, ci);
413 		} else {
414 			/* Unset the affinity flag */
415 			t->l_flag &= ~LW_AFFINITY;
416 			if (t->l_affinity != NULL)
417 				kcpuset_unuse(t->l_affinity, &cpulst);
418 			t->l_affinity = NULL;
419 			lwp_unlock(t);
420 		}
421 		lcnt++;
422 	}
423 	mutex_exit(p->p_lock);
424 	if (lcnt == 0)
425 		error = ESRCH;
426 out:
427 	if (cpuset != NULL)
428 		kcpuset_unuse(cpuset, &cpulst);
429 	cpuset_destroy(cpulst);
430 	return error;
431 }
432 
433 /*
434  * Get affinity.
435  */
436 int
437 sys__sched_getaffinity(struct lwp *l,
438     const struct sys__sched_getaffinity_args *uap, register_t *retval)
439 {
440 	/* {
441 		syscallarg(pid_t) pid;
442 		syscallarg(lwpid_t) lid;
443 		syscallarg(size_t) size;
444 		syscallarg(cpuset_t *) cpuset;
445 	} */
446 	struct lwp *t;
447 	cpuset_t *cpuset;
448 	int error;
449 
450 	if ((error = gencpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size))))
451 		return error;
452 
453 	/* Locks the LWP */
454 	t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
455 	if (t == NULL) {
456 		error = ESRCH;
457 		goto out;
458 	}
459 	/* Check the permission */
460 	if (kauth_authorize_process(l->l_cred,
461 	    KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
462 		mutex_exit(t->l_proc->p_lock);
463 		error = EPERM;
464 		goto out;
465 	}
466 	lwp_lock(t);
467 	if (t->l_flag & LW_AFFINITY) {
468 		KASSERT(t->l_affinity != NULL);
469 		kcpuset_copy(cpuset, t->l_affinity);
470 	} else
471 		cpuset_zero(cpuset);
472 	lwp_unlock(t);
473 	mutex_exit(t->l_proc->p_lock);
474 
475 	error = copyout(cpuset, SCARG(uap, cpuset), cpuset_size(cpuset));
476 out:
477 	kcpuset_unuse(cpuset, NULL);
478 	return error;
479 }
480 
481 /*
482  * Yield.
483  */
484 int
485 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
486 {
487 
488 	yield();
489 	return 0;
490 }
491 
492 /*
493  * Sysctl nodes and initialization.
494  */
495 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
496 {
497 	const struct sysctlnode *node = NULL;
498 
499 	sysctl_createv(clog, 0, NULL, NULL,
500 		CTLFLAG_PERMANENT,
501 		CTLTYPE_NODE, "kern", NULL,
502 		NULL, 0, NULL, 0,
503 		CTL_KERN, CTL_EOL);
504 	sysctl_createv(clog, 0, NULL, NULL,
505 		CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
506 		CTLTYPE_INT, "posix_sched",
507 		SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
508 			     "Process Scheduling option to which the "
509 			     "system attempts to conform"),
510 		NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
511 		CTL_KERN, CTL_CREATE, CTL_EOL);
512 	sysctl_createv(clog, 0, NULL, &node,
513 		CTLFLAG_PERMANENT,
514 		CTLTYPE_NODE, "sched",
515 		SYSCTL_DESCR("Scheduler options"),
516 		NULL, 0, NULL, 0,
517 		CTL_KERN, CTL_CREATE, CTL_EOL);
518 
519 	if (node == NULL)
520 		return;
521 
522 	sysctl_createv(clog, 0, &node, NULL,
523 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
524 		CTLTYPE_INT, "pri_min",
525 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
526 		NULL, SCHED_PRI_MIN, NULL, 0,
527 		CTL_CREATE, CTL_EOL);
528 	sysctl_createv(clog, 0, &node, NULL,
529 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
530 		CTLTYPE_INT, "pri_max",
531 		SYSCTL_DESCR("Maximal POSIX real-time priority"),
532 		NULL, SCHED_PRI_MAX, NULL, 0,
533 		CTL_CREATE, CTL_EOL);
534 }
535