xref: /netbsd-src/sys/kern/sys_sched.c (revision 7c3f385475147b6e1c4753f2bee961630e2dfc40)
1 /*	$NetBSD: sys_sched.c,v 1.19 2008/03/05 12:47:13 njoly Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * System calls relating to the scheduler.
31  *
32  * TODO:
33  *  - Handle pthread_setschedprio() as defined by POSIX;
34  *  - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.19 2008/03/05 12:47:13 njoly Exp $");
39 
40 #include <sys/param.h>
41 
42 #include <sys/cpu.h>
43 #include <sys/kauth.h>
44 #include <sys/kmem.h>
45 #include <sys/lwp.h>
46 #include <sys/mutex.h>
47 #include <sys/proc.h>
48 #include <sys/pset.h>
49 #include <sys/sched.h>
50 #include <sys/syscallargs.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/unistd.h>
55 
56 /*
57  * Convert user priority or the in-kernel priority or convert the current
58  * priority to the appropriate range according to the policy change.
59  */
60 static pri_t
61 convert_pri(lwp_t *l, int policy, pri_t pri)
62 {
63 	int delta = 0;
64 
65 	switch (policy) {
66 	case SCHED_OTHER:
67 		delta = PRI_USER;
68 		break;
69 	case SCHED_FIFO:
70 	case SCHED_RR:
71 		delta = PRI_USER_RT;
72 		break;
73 	default:
74 		panic("upri_to_kpri");
75 	}
76 
77 	if (pri != PRI_NONE) {
78 		/* Convert user priority to the in-kernel */
79 		KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
80 		return pri + delta;
81 	}
82 	if (l->l_class == policy)
83 		return l->l_priority;
84 
85 	/* Change the current priority to the appropriate range */
86 	if (l->l_class == SCHED_OTHER) {
87 		KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
88 		return l->l_priority + delta;
89 	}
90 	if (policy == SCHED_OTHER) {
91 		KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
92 		return l->l_priority - delta;
93 	}
94 	KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER);
95 	return l->l_class;
96 }
97 
98 int
99 do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
100     const struct sched_param *params)
101 {
102 	struct proc *p;
103 	struct lwp *t;
104 	pri_t pri;
105 	u_int lcnt;
106 	int error;
107 
108 	error = 0;
109 
110 	pri = params->sched_priority;
111 
112 	/* If no parameters specified, just return (this should not happen) */
113 	if (pri == PRI_NONE && policy == SCHED_NONE)
114 		return 0;
115 
116 	/* Validate scheduling class */
117 	if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
118 		return EINVAL;
119 
120 	/* Validate priority */
121 	if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
122 		return EINVAL;
123 
124 	if (pid != 0) {
125 		/* Find the process */
126 		p = p_find(pid, PFIND_UNLOCK_FAIL);
127 		if (p == NULL)
128 			return ESRCH;
129 		mutex_enter(&p->p_smutex);
130 		mutex_exit(&proclist_lock);
131 		/* Disallow modification of system processes */
132 		if ((p->p_flag & PK_SYSTEM) != 0) {
133 			mutex_exit(&p->p_smutex);
134 			return EPERM;
135 		}
136 	} else {
137 		/* Use the calling process */
138 		p = curlwp->l_proc;
139 		mutex_enter(&p->p_smutex);
140 	}
141 
142 	/* Find the LWP(s) */
143 	lcnt = 0;
144 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
145 		pri_t kpri;
146 		int lpolicy;
147 
148 		if (lid && lid != t->l_lid)
149 			continue;
150 		lcnt++;
151 		KASSERT(pri != PRI_NONE || policy != SCHED_NONE);
152 		lwp_lock(t);
153 
154 		if (policy == SCHED_NONE)
155 			lpolicy = t->l_class;
156 		else
157 			lpolicy = policy;
158 
159 		/*
160 		 * Note that, priority may need to be changed to get into
161 		 * the correct priority range of the new scheduling class.
162 		 */
163 		kpri = convert_pri(t, lpolicy, pri);
164 
165 		/* Check the permission */
166 		error = kauth_authorize_process(kauth_cred_get(),
167 		    KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
168 		    KAUTH_ARG(kpri));
169 		if (error) {
170 			lwp_unlock(t);
171 			break;
172 		}
173 
174 		/* Set the scheduling class */
175 		if (policy != SCHED_NONE)
176 			t->l_class = policy;
177 
178 		/* Change the priority */
179 		if (t->l_priority != kpri)
180 			lwp_changepri(t, kpri);
181 
182 		lwp_unlock(t);
183 	}
184 	mutex_exit(&p->p_smutex);
185 	return (lcnt == 0) ? ESRCH : error;
186 }
187 
188 /*
189  * Set scheduling parameters.
190  */
191 int
192 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
193     register_t *retval)
194 {
195 	/* {
196 		syscallarg(pid_t) pid;
197 		syscallarg(lwpid_t) lid;
198 		syscallarg(int) policy;
199 		syscallarg(const struct sched_param *) params;
200 	} */
201 	struct sched_param params;
202 	int error;
203 
204 	/* Get the parameters from the user-space */
205 	error = copyin(SCARG(uap, params), &params, sizeof(params));
206 	if (error)
207 		goto out;
208 
209 	error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
210 	    SCARG(uap, policy), &params);
211 
212  out:
213 	return (error);
214 }
215 
216 int
217 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
218     struct sched_param *params)
219 {
220 	struct sched_param lparams;
221 	struct lwp *t;
222 	int error, lpolicy;
223 
224 	/* Locks the LWP */
225 	t = lwp_find2(pid, lid);
226 	if (t == NULL) {
227 		error = ESRCH;
228 		goto out;
229 	}
230 
231 	/* Check the permission */
232 	error = kauth_authorize_process(kauth_cred_get(),
233 	    KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
234 	if (error != 0) {
235 		lwp_unlock(t);
236 		goto out;
237 	}
238 
239 	lparams.sched_priority = t->l_priority;
240 	lpolicy = t->l_class;
241 	lwp_unlock(t);
242 
243 	switch (lpolicy) {
244 	case SCHED_OTHER:
245 		lparams.sched_priority -= PRI_USER;
246 		break;
247 	case SCHED_RR:
248 	case SCHED_FIFO:
249 		lparams.sched_priority -= PRI_USER_RT;
250 		break;
251 	}
252 
253 	if (policy != NULL)
254 		*policy = lpolicy;
255 
256 	if (params != NULL)
257 		*params = lparams;
258 
259  out:
260 	return error;
261 }
262 
263 /*
264  * Get scheduling parameters.
265  */
266 int
267 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
268     register_t *retval)
269 {
270 	/* {
271 		syscallarg(pid_t) pid;
272 		syscallarg(lwpid_t) lid;
273 		syscallarg(int *) policy;
274 		syscallarg(struct sched_param *) params;
275 	} */
276 	struct sched_param params;
277 	int error, policy;
278 
279 	error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
280 	    &params);
281 	if (error)
282 		goto out;
283 
284 	error = copyout(&params, SCARG(uap, params), sizeof(params));
285 	if (error == 0 && SCARG(uap, policy) != NULL)
286 		error = copyout(&policy, SCARG(uap, policy), sizeof(int));
287 
288  out:
289 	return (error);
290 }
291 
292 /*
293  * Set affinity.
294  */
295 int
296 sys__sched_setaffinity(struct lwp *l,
297     const struct sys__sched_setaffinity_args *uap, register_t *retval)
298 {
299 	/* {
300 		syscallarg(pid_t) pid;
301 		syscallarg(lwpid_t) lid;
302 		syscallarg(size_t) size;
303 		syscallarg(void *) cpuset;
304 	} */
305 	cpuset_t *cpuset;
306 	struct cpu_info *ci = NULL;
307 	struct proc *p;
308 	struct lwp *t;
309 	CPU_INFO_ITERATOR cii;
310 	lwpid_t lid;
311 	u_int lcnt;
312 	int error;
313 
314 	/* Allocate the CPU set, and get it from userspace */
315 	cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
316 	error = copyin(SCARG(uap, cpuset), cpuset,
317 	    min(SCARG(uap, size), sizeof(cpuset_t)));
318 	if (error)
319 		goto error;
320 
321 	/* Look for a CPU in the set */
322 	for (CPU_INFO_FOREACH(cii, ci))
323 		if (CPU_ISSET(cpu_index(ci), cpuset))
324 			break;
325 	if (ci == NULL) {
326 		/* Empty set */
327 		kmem_free(cpuset, sizeof(cpuset_t));
328 		cpuset = NULL;
329 	}
330 
331 	if (SCARG(uap, pid) != 0) {
332 		/* Find the process */
333 		p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
334 		if (p == NULL) {
335 			error = ESRCH;
336 			goto error;
337 		}
338 		mutex_enter(&p->p_smutex);
339 		mutex_exit(&proclist_lock);
340 		/* Disallow modification of system processes. */
341 		if ((p->p_flag & PK_SYSTEM) != 0) {
342 			mutex_exit(&p->p_smutex);
343 			error = EPERM;
344 			goto error;
345 		}
346 	} else {
347 		/* Use the calling process */
348 		p = l->l_proc;
349 		mutex_enter(&p->p_smutex);
350 	}
351 
352 	/*
353 	 * Check the permission.
354 	 */
355 	error = kauth_authorize_process(l->l_cred,
356 	    KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
357 	if (error != 0) {
358 		mutex_exit(&p->p_smutex);
359 		goto error;
360 	}
361 
362 	/* Find the LWP(s) */
363 	lcnt = 0;
364 	lid = SCARG(uap, lid);
365 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
366 		if (lid && lid != t->l_lid)
367 			continue;
368 		lwp_lock(t);
369 		if (cpuset) {
370 			/* Set the affinity flag and new CPU set */
371 			t->l_flag |= LW_AFFINITY;
372 			memcpy(&t->l_affinity, cpuset, sizeof(cpuset_t));
373 			/* Migrate to another CPU, unlocks LWP */
374 			lwp_migrate(t, ci);
375 		} else {
376 			/* Unset the affinity flag */
377 			t->l_flag &= ~LW_AFFINITY;
378 			lwp_unlock(t);
379 		}
380 		lcnt++;
381 	}
382 	mutex_exit(&p->p_smutex);
383 	if (lcnt == 0)
384 		error = ESRCH;
385 error:
386 	if (cpuset != NULL)
387 		kmem_free(cpuset, sizeof(cpuset_t));
388 	return error;
389 }
390 
391 /*
392  * Get affinity.
393  */
394 int
395 sys__sched_getaffinity(struct lwp *l,
396     const struct sys__sched_getaffinity_args *uap, register_t *retval)
397 {
398 	/* {
399 		syscallarg(pid_t) pid;
400 		syscallarg(lwpid_t) lid;
401 		syscallarg(size_t) size;
402 		syscallarg(void *) cpuset;
403 	} */
404 	struct lwp *t;
405 	void *cpuset;
406 	int error;
407 
408 	if (SCARG(uap, size) <= 0)
409 		return EINVAL;
410 	cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
411 
412 	/* Locks the LWP */
413 	t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
414 	if (t == NULL) {
415 		kmem_free(cpuset, sizeof(cpuset_t));
416 		return ESRCH;
417 	}
418 	/* Check the permission */
419 	if (kauth_authorize_process(l->l_cred,
420 	    KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
421 		lwp_unlock(t);
422 		kmem_free(cpuset, sizeof(cpuset_t));
423 		return EPERM;
424 	}
425 	if (t->l_flag & LW_AFFINITY)
426 		memcpy(cpuset, &t->l_affinity, sizeof(cpuset_t));
427 	lwp_unlock(t);
428 
429 	error = copyout(cpuset, SCARG(uap, cpuset),
430 	    min(SCARG(uap, size), sizeof(cpuset_t)));
431 
432 	kmem_free(cpuset, sizeof(cpuset_t));
433 	return error;
434 }
435 
436 /*
437  * Yield.
438  */
439 int
440 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
441 {
442 
443 	yield();
444 	return 0;
445 }
446 
447 /*
448  * Sysctl nodes and initialization.
449  */
450 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
451 {
452 	const struct sysctlnode *node = NULL;
453 
454 	sysctl_createv(clog, 0, NULL, NULL,
455 		CTLFLAG_PERMANENT,
456 		CTLTYPE_NODE, "kern", NULL,
457 		NULL, 0, NULL, 0,
458 		CTL_KERN, CTL_EOL);
459 	sysctl_createv(clog, 0, NULL, NULL,
460 		CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
461 		CTLTYPE_INT, "posix_sched",
462 		SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
463 			     "Process Scheduling option to which the "
464 			     "system attempts to conform"),
465 		NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
466 		CTL_KERN, CTL_CREATE, CTL_EOL);
467 	sysctl_createv(clog, 0, NULL, &node,
468 		CTLFLAG_PERMANENT,
469 		CTLTYPE_NODE, "sched",
470 		SYSCTL_DESCR("Scheduler options"),
471 		NULL, 0, NULL, 0,
472 		CTL_KERN, CTL_CREATE, CTL_EOL);
473 
474 	if (node == NULL)
475 		return;
476 
477 	sysctl_createv(clog, 0, &node, NULL,
478 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
479 		CTLTYPE_INT, "pri_min",
480 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
481 		NULL, SCHED_PRI_MIN, NULL, 0,
482 		CTL_CREATE, CTL_EOL);
483 	sysctl_createv(clog, 0, &node, NULL,
484 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
485 		CTLTYPE_INT, "pri_max",
486 		SYSCTL_DESCR("Maximal POSIX real-time priority"),
487 		NULL, SCHED_PRI_MAX, NULL, 0,
488 		CTL_CREATE, CTL_EOL);
489 }
490