xref: /netbsd-src/sys/kern/sys_sched.c (revision 2ba84f0f9e5d2b02f627ceb7a354c970d0e4411a)
1 /*	$NetBSD: sys_sched.c,v 1.15 2008/02/19 19:38:18 drochner Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * TODO:
31  *  - Handle pthread_setschedprio() as defined by POSIX;
32  *  - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
33  */
34 
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.15 2008/02/19 19:38:18 drochner Exp $");
37 
38 #include <sys/param.h>
39 
40 #include <sys/cpu.h>
41 #include <sys/kauth.h>
42 #include <sys/kmem.h>
43 #include <sys/lwp.h>
44 #include <sys/mutex.h>
45 #include <sys/proc.h>
46 #include <sys/pset.h>
47 #include <sys/sched.h>
48 #include <sys/syscallargs.h>
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51 #include <sys/types.h>
52 #include <sys/unistd.h>
53 
54 /*
55  * Convert user priority or the in-kernel priority or convert the current
56  * priority to the appropriate range according to the policy change.
57  */
58 static pri_t
59 convert_pri(lwp_t *l, int policy, pri_t pri)
60 {
61 	int delta = 0;
62 
63 	switch (policy) {
64 	case SCHED_OTHER:
65 		delta = PRI_USER;
66 		break;
67 	case SCHED_FIFO:
68 	case SCHED_RR:
69 		delta = PRI_USER_RT;
70 		break;
71 	default:
72 		panic("upri_to_kpri");
73 	}
74 
75 	if (pri != PRI_NONE) {
76 		/* Convert user priority to the in-kernel */
77 		KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
78 		return pri + delta;
79 	}
80 	if (l->l_class == policy)
81 		return l->l_priority;
82 
83 	/* Change the current priority to the appropriate range */
84 	if (l->l_class == SCHED_OTHER) {
85 		KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
86 		return l->l_priority + delta;
87 	}
88 	if (policy == SCHED_OTHER) {
89 		KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
90 		return l->l_priority - delta;
91 	}
92 	KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER);
93 	return l->l_class;
94 }
95 
96 /*
97  * Set scheduling parameters.
98  */
99 int
100 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
101     register_t *retval)
102 {
103 	/* {
104 		syscallarg(pid_t) pid;
105 		syscallarg(lwpid_t) lid;
106 		syscallarg(int) policy;
107 		syscallarg(const struct sched_param *) params;
108 	} */
109 	struct sched_param param;
110 	struct proc *p;
111 	struct lwp *t;
112 	lwpid_t lid;
113 	u_int lcnt;
114 	int policy;
115 	pri_t pri;
116 	int error;
117 
118 	/* Get the parameters from the user-space */
119 	error = copyin(SCARG(uap, params), &param, sizeof(param));
120 	if (error) {
121 		return error;
122 	}
123 	pri = param.sched_priority;
124 	policy = SCARG(uap, policy);
125 
126 	/* If no parameters specified, just return (this should not happen) */
127 	if (pri == PRI_NONE && policy == SCHED_NONE)
128 		return 0;
129 
130 	/* Validate scheduling class */
131 	if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
132 		return EINVAL;
133 
134 	/* Validate priority */
135 	if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
136 		return EINVAL;
137 
138 	if (SCARG(uap, pid) != 0) {
139 		/* Find the process */
140 		p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
141 		if (p == NULL)
142 			return ESRCH;
143 		mutex_enter(&p->p_smutex);
144 		mutex_exit(&proclist_lock);
145 		/* Disallow modification of system processes */
146 		if (p->p_flag & PK_SYSTEM) {
147 			mutex_exit(&p->p_smutex);
148 			return EPERM;
149 		}
150 	} else {
151 		/* Use the calling process */
152 		p = l->l_proc;
153 		mutex_enter(&p->p_smutex);
154 	}
155 
156 	/* Find the LWP(s) */
157 	lcnt = 0;
158 	lid = SCARG(uap, lid);
159 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
160 		pri_t kpri;
161 		int lpolicy;
162 
163 		if (lid && lid != t->l_lid)
164 			continue;
165 		lcnt++;
166 		KASSERT(pri != PRI_NONE || policy != SCHED_NONE);
167 		lwp_lock(t);
168 
169 		if (policy == SCHED_NONE)
170 			lpolicy = t->l_class;
171 		else
172 			lpolicy = policy;
173 
174 		/*
175 		 * Note that, priority may need to be changed to get into
176 		 * the correct priority range of the new scheduling class.
177 		 */
178 		kpri = convert_pri(t, lpolicy, pri);
179 
180 		/* Check the permission */
181 		error = kauth_authorize_process(l->l_cred,
182 		    KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
183 		    KAUTH_ARG(kpri));
184 		if (error) {
185 			lwp_unlock(t);
186 			break;
187 		}
188 
189 		/* Set the scheduling class */
190 		if (policy != SCHED_NONE)
191 			t->l_class = policy;
192 
193 		/* Change the priority */
194 		if (t->l_priority != kpri)
195 			lwp_changepri(t, kpri);
196 
197 		lwp_unlock(t);
198 	}
199 	mutex_exit(&p->p_smutex);
200 	return (lcnt == 0) ? ESRCH : error;
201 }
202 
203 /*
204  * Get scheduling parameters.
205  */
206 int
207 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
208     register_t *retval)
209 {
210 	/* {
211 		syscallarg(pid_t) pid;
212 		syscallarg(lwpid_t) lid;
213 		syscallarg(int *) policy;
214 		syscallarg(struct sched_param *) params;
215 	} */
216 	struct sched_param param;
217 	struct lwp *t;
218 	lwpid_t lid;
219 	int error, policy;
220 
221 	/* If not specified, use the first LWP */
222 	lid = SCARG(uap, lid) == 0 ? 1 : SCARG(uap, lid);
223 
224 	if (SCARG(uap, pid) != 0) {
225 		/* Locks the LWP */
226 		t = lwp_find2(SCARG(uap, pid), lid);
227 	} else {
228 		struct proc *p = l->l_proc;
229 		/* Use the calling process */
230 		mutex_enter(&p->p_smutex);
231 		t = lwp_find(p, lid);
232 		if (t != NULL)
233 			lwp_lock(t);
234 		mutex_exit(&p->p_smutex);
235 	}
236 	if (t == NULL) {
237 		error = ESRCH;
238 		goto error;
239 	}
240 
241 	/* Check the permission */
242 	error = kauth_authorize_process(l->l_cred,
243 	    KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
244 	if (error != 0) {
245 		lwp_unlock(t);
246 		goto error;
247 	}
248 
249 	param.sched_priority = t->l_priority;
250 	policy = t->l_class;
251 	lwp_unlock(t);
252 
253 	switch (policy) {
254 	case SCHED_OTHER:
255 		param.sched_priority -= PRI_USER;
256 		break;
257 	case SCHED_RR:
258 	case SCHED_FIFO:
259 		param.sched_priority -= PRI_USER_RT;
260 		break;
261 	}
262 	error = copyout(&param, SCARG(uap, params), sizeof(param));
263 	if (error == 0 && SCARG(uap, policy) != NULL)
264 		error = copyout(&policy, SCARG(uap, policy), sizeof(int));
265 error:
266 	return error;
267 }
268 
269 /*
270  * Set affinity.
271  */
272 int
273 sys__sched_setaffinity(struct lwp *l,
274     const struct sys__sched_setaffinity_args *uap, register_t *retval)
275 {
276 	/* {
277 		syscallarg(pid_t) pid;
278 		syscallarg(lwpid_t) lid;
279 		syscallarg(size_t) size;
280 		syscallarg(void *) cpuset;
281 	} */
282 	cpuset_t *cpuset;
283 	struct cpu_info *ci = NULL;
284 	struct proc *p;
285 	struct lwp *t;
286 	CPU_INFO_ITERATOR cii;
287 	lwpid_t lid;
288 	u_int lcnt;
289 	int error;
290 
291 	/* Allocate the CPU set, and get it from userspace */
292 	cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
293 	error = copyin(SCARG(uap, cpuset), cpuset,
294 	    min(SCARG(uap, size), sizeof(cpuset_t)));
295 	if (error)
296 		goto error;
297 
298 	/* Look for a CPU in the set */
299 	for (CPU_INFO_FOREACH(cii, ci))
300 		if (CPU_ISSET(cpu_index(ci), cpuset))
301 			break;
302 	if (ci == NULL) {
303 		/* Empty set */
304 		kmem_free(cpuset, sizeof(cpuset_t));
305 		cpuset = NULL;
306 	}
307 
308 	if (SCARG(uap, pid) != 0) {
309 		/* Find the process */
310 		p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
311 		if (p == NULL) {
312 			error = ESRCH;
313 			goto error;
314 		}
315 		mutex_enter(&p->p_smutex);
316 		mutex_exit(&proclist_lock);
317 	} else {
318 		/* Use the calling process */
319 		p = l->l_proc;
320 		mutex_enter(&p->p_smutex);
321 	}
322 
323 	/*
324 	 * Check the permission.
325 	 * Disallow modification of system processes.
326 	 */
327 	error = kauth_authorize_process(l->l_cred,
328 	    KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
329 	if (error != 0) {
330 		mutex_exit(&p->p_smutex);
331 		goto error;
332 	}
333 	if ((p->p_flag & PK_SYSTEM) != 0) {
334 		mutex_exit(&p->p_smutex);
335 		error = EPERM;
336 		goto error;
337 	}
338 
339 	/* Find the LWP(s) */
340 	lcnt = 0;
341 	lid = SCARG(uap, lid);
342 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
343 		if (lid && lid != t->l_lid)
344 			continue;
345 		lwp_lock(t);
346 		if (cpuset) {
347 			/* Set the affinity flag and new CPU set */
348 			t->l_flag |= LW_AFFINITY;
349 			memcpy(&t->l_affinity, cpuset, sizeof(cpuset_t));
350 			/* Migrate to another CPU, unlocks LWP */
351 			lwp_migrate(t, ci);
352 		} else {
353 			/* Unset the affinity flag */
354 			t->l_flag &= ~LW_AFFINITY;
355 			lwp_unlock(t);
356 		}
357 		lcnt++;
358 	}
359 	mutex_exit(&p->p_smutex);
360 	if (lcnt == 0)
361 		error = ESRCH;
362 error:
363 	if (cpuset != NULL)
364 		kmem_free(cpuset, sizeof(cpuset_t));
365 	return error;
366 }
367 
368 /*
369  * Get affinity.
370  */
371 int
372 sys__sched_getaffinity(struct lwp *l,
373     const struct sys__sched_getaffinity_args *uap, register_t *retval)
374 {
375 	/* {
376 		syscallarg(pid_t) pid;
377 		syscallarg(lwpid_t) lid;
378 		syscallarg(size_t) size;
379 		syscallarg(void *) cpuset;
380 	} */
381 	struct lwp *t;
382 	void *cpuset;
383 	lwpid_t lid;
384 	int error;
385 
386 	if (SCARG(uap, size) <= 0)
387 		return EINVAL;
388 	cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
389 
390 	/* If not specified, use the first LWP */
391 	lid = SCARG(uap, lid) == 0 ? 1 : SCARG(uap, lid);
392 
393 	if (SCARG(uap, pid) != 0) {
394 		/* Locks the LWP */
395 		t = lwp_find2(SCARG(uap, pid), lid);
396 	} else {
397 		struct proc *p = l->l_proc;
398 		/* Use the calling process */
399 		mutex_enter(&p->p_smutex);
400 		t = lwp_find(p, lid);
401 		if (t != NULL)
402 			lwp_lock(t);
403 		mutex_exit(&p->p_smutex);
404 	}
405 	if (t == NULL) {
406 		kmem_free(cpuset, sizeof(cpuset_t));
407 		return ESRCH;
408 	}
409 	/* Check the permission */
410 	if (kauth_authorize_process(l->l_cred,
411 	    KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
412 		lwp_unlock(t);
413 		kmem_free(cpuset, sizeof(cpuset_t));
414 		return EPERM;
415 	}
416 	if (t->l_flag & LW_AFFINITY)
417 		memcpy(cpuset, &t->l_affinity, sizeof(cpuset_t));
418 	lwp_unlock(t);
419 
420 	error = copyout(cpuset, SCARG(uap, cpuset),
421 	    min(SCARG(uap, size), sizeof(cpuset_t)));
422 
423 	kmem_free(cpuset, sizeof(cpuset_t));
424 	return error;
425 }
426 
427 /*
428  * Yield.
429  */
430 int
431 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
432 {
433 
434 	yield();
435 	return 0;
436 }
437 
438 /*
439  * Sysctl nodes and initialization.
440  */
441 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
442 {
443 	const struct sysctlnode *node = NULL;
444 
445 	sysctl_createv(clog, 0, NULL, NULL,
446 		CTLFLAG_PERMANENT,
447 		CTLTYPE_NODE, "kern", NULL,
448 		NULL, 0, NULL, 0,
449 		CTL_KERN, CTL_EOL);
450 	sysctl_createv(clog, 0, NULL, NULL,
451 		CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
452 		CTLTYPE_INT, "posix_sched",
453 		SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
454 			     "Process Scheduling option to which the "
455 			     "system attempts to conform"),
456 		NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
457 		CTL_KERN, CTL_CREATE, CTL_EOL);
458 	sysctl_createv(clog, 0, NULL, &node,
459 		CTLFLAG_PERMANENT,
460 		CTLTYPE_NODE, "sched",
461 		SYSCTL_DESCR("Scheduler options"),
462 		NULL, 0, NULL, 0,
463 		CTL_KERN, CTL_CREATE, CTL_EOL);
464 
465 	if (node == NULL)
466 		return;
467 
468 	sysctl_createv(clog, 0, &node, NULL,
469 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
470 		CTLTYPE_INT, "pri_min",
471 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
472 		NULL, SCHED_PRI_MIN, NULL, 0,
473 		CTL_CREATE, CTL_EOL);
474 	sysctl_createv(clog, 0, &node, NULL,
475 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
476 		CTLTYPE_INT, "pri_max",
477 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
478 		NULL, SCHED_PRI_MAX, NULL, 0,
479 		CTL_CREATE, CTL_EOL);
480 }
481