xref: /netbsd-src/sys/kern/sys_sched.c (revision 267197ec1eebfcb9810ea27a89625b6ddf68e3e7)
1 /*	$NetBSD: sys_sched.c,v 1.11 2008/02/16 16:39:34 elad Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * TODO:
31  *  - Handle pthread_setschedprio() as defined by POSIX;
32  *  - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
33  */
34 
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.11 2008/02/16 16:39:34 elad Exp $");
37 
38 #include <sys/param.h>
39 
40 #include <sys/cpu.h>
41 #include <sys/kauth.h>
42 #include <sys/kmem.h>
43 #include <sys/lwp.h>
44 #include <sys/mutex.h>
45 #include <sys/proc.h>
46 #include <sys/pset.h>
47 #include <sys/sched.h>
48 #include <sys/syscallargs.h>
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51 #include <sys/types.h>
52 #include <sys/unistd.h>
53 
54 /*
55  * Convert user priority or the in-kernel priority or convert the current
56  * priority to the appropriate range according to the policy change.
57  */
58 static pri_t
59 convert_pri(lwp_t *l, int policy, pri_t pri)
60 {
61 	int delta = 0;
62 
63 	if (policy == SCHED_NONE)
64 		policy = l->l_class;
65 
66 	switch (policy) {
67 	case SCHED_OTHER:
68 		delta = PRI_USER;
69 		break;
70 	case SCHED_FIFO:
71 	case SCHED_RR:
72 		delta = PRI_USER_RT;
73 		break;
74 	default:
75 		panic("upri_to_kpri");
76 	}
77 
78 	if (pri != PRI_NONE) {
79 		/* Convert user priority to the in-kernel */
80 		KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
81 		return pri + delta;
82 	}
83 	if (l->l_class == policy)
84 		return l->l_priority;
85 
86 	/* Change the current priority to the appropriate range */
87 	if (l->l_class == SCHED_OTHER) {
88 		KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
89 		return l->l_priority + delta;
90 	}
91 	if (policy == SCHED_OTHER) {
92 		KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
93 		return l->l_priority - delta;
94 	}
95 	KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER);
96 	return l->l_class;
97 }
98 
99 /*
100  * Set scheduling parameters.
101  */
102 int
103 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
104     register_t *retval)
105 {
106 	/* {
107 		syscallarg(pid_t) pid;
108 		syscallarg(lwpid_t) lid;
109 		syscallarg(int) policy;
110 		syscallarg(const struct sched_param *) params;
111 	} */
112 	struct sched_param param;
113 	struct proc *p;
114 	struct lwp *t;
115 	lwpid_t lid;
116 	u_int lcnt;
117 	int policy;
118 	pri_t pri;
119 	int error;
120 
121 	/* Get the parameters from the user-space */
122 	error = copyin(SCARG(uap, params), &param, sizeof(param));
123 	if (error) {
124 		return error;
125 	}
126 	pri = param.sched_priority;
127 	policy = SCARG(uap, policy);
128 
129 	/* If no parameters specified, just return (this should not happen) */
130 	if (pri == PRI_NONE && policy == SCHED_NONE)
131 		return 0;
132 
133 	/* Validate scheduling class */
134 	if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
135 		return EINVAL;
136 
137 	/* Validate priority */
138 	if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
139 		return EINVAL;
140 
141 	if (SCARG(uap, pid) != 0) {
142 		/* Find the process */
143 		p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
144 		if (p == NULL)
145 			return ESRCH;
146 		mutex_enter(&p->p_smutex);
147 		mutex_exit(&proclist_lock);
148 		/* Disallow modification of system processes */
149 		if (p->p_flag & PK_SYSTEM) {
150 			mutex_exit(&p->p_smutex);
151 			return EPERM;
152 		}
153 	} else {
154 		/* Use the calling process */
155 		p = l->l_proc;
156 		mutex_enter(&p->p_smutex);
157 	}
158 
159 	/* Check the permission */
160 	if (kauth_authorize_process(l->l_cred,
161 	    KAUTH_PROCESS_SCHEDULER_SETPARAM, p, NULL, NULL, NULL)) {
162 		mutex_exit(&p->p_smutex);
163 		return EPERM;
164 	}
165 
166 	/* Find the LWP(s) */
167 	lcnt = 0;
168 	lid = SCARG(uap, lid);
169 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
170 		pri_t kpri;
171 
172 		if (lid && lid != t->l_lid)
173 			continue;
174 		KASSERT(pri != PRI_NONE || policy != SCHED_NONE);
175 		lwp_lock(t);
176 
177 		/*
178 		 * Note that, priority may need to be changed to get into
179 		 * the correct priority range of the new scheduling class.
180 		 */
181 		kpri = convert_pri(t, policy, pri);
182 
183 		/* Set the scheduling class */
184 		if (policy != SCHED_NONE)
185 			t->l_class = policy;
186 
187 		/* Change the priority */
188 		if (t->l_priority != kpri)
189 			lwp_changepri(t, kpri);
190 
191 		lwp_unlock(t);
192 		lcnt++;
193 	}
194 	mutex_exit(&p->p_smutex);
195 	return (lcnt == 0) ? ESRCH : error;
196 }
197 
198 /*
199  * Get scheduling parameters.
200  */
201 int
202 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
203     register_t *retval)
204 {
205 	/* {
206 		syscallarg(pid_t) pid;
207 		syscallarg(lwpid_t) lid;
208 		syscallarg(int *) policy;
209 		syscallarg(struct sched_param *) params;
210 	} */
211 	struct sched_param param;
212 	struct lwp *t;
213 	lwpid_t lid;
214 	int error, policy;
215 
216 	/* If not specified, use the first LWP */
217 	lid = SCARG(uap, lid) == 0 ? 1 : SCARG(uap, lid);
218 
219 	if (SCARG(uap, pid) != 0) {
220 		/* Locks the LWP */
221 		t = lwp_find2(SCARG(uap, pid), lid);
222 	} else {
223 		struct proc *p = l->l_proc;
224 		/* Use the calling process */
225 		mutex_enter(&p->p_smutex);
226 		t = lwp_find(p, lid);
227 		if (t != NULL)
228 			lwp_lock(t);
229 		mutex_exit(&p->p_smutex);
230 	}
231 	if (t == NULL) {
232 		error = ESRCH;
233 		goto error;
234 	}
235 
236 	/* Check the permission */
237 	error = kauth_authorize_process(l->l_cred,
238 	    KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
239 	if (error != 0) {
240 		lwp_unlock(t);
241 		goto error;
242 	}
243 
244 	param.sched_priority = t->l_priority;
245 	policy = t->l_class;
246 	lwp_unlock(t);
247 
248 	switch (policy) {
249 	case SCHED_OTHER:
250 		param.sched_priority -= PRI_USER;
251 		break;
252 	case SCHED_RR:
253 	case SCHED_FIFO:
254 		param.sched_priority -= PRI_USER_RT;
255 		break;
256 	}
257 	error = copyout(&param, SCARG(uap, params), sizeof(param));
258 	if (error == 0 && SCARG(uap, policy) != NULL)
259 		error = copyout(&policy, SCARG(uap, policy), sizeof(int));
260 error:
261 	return error;
262 }
263 
264 /*
265  * Set affinity.
266  */
267 int
268 sys__sched_setaffinity(struct lwp *l,
269     const struct sys__sched_setaffinity_args *uap, register_t *retval)
270 {
271 	/* {
272 		syscallarg(pid_t) pid;
273 		syscallarg(lwpid_t) lid;
274 		syscallarg(size_t) size;
275 		syscallarg(void *) cpuset;
276 	} */
277 	cpuset_t *cpuset;
278 	struct cpu_info *ci = NULL;
279 	struct proc *p;
280 	struct lwp *t;
281 	CPU_INFO_ITERATOR cii;
282 	lwpid_t lid;
283 	u_int lcnt;
284 	int error;
285 
286 	/* Allocate the CPU set, and get it from userspace */
287 	cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
288 	error = copyin(SCARG(uap, cpuset), cpuset,
289 	    min(SCARG(uap, size), sizeof(cpuset_t)));
290 	if (error)
291 		goto error;
292 
293 	/* Look for a CPU in the set */
294 	for (CPU_INFO_FOREACH(cii, ci))
295 		if (CPU_ISSET(cpu_index(ci), cpuset))
296 			break;
297 	if (ci == NULL) {
298 		/* Empty set */
299 		kmem_free(cpuset, sizeof(cpuset_t));
300 		cpuset = NULL;
301 	}
302 
303 	if (SCARG(uap, pid) != 0) {
304 		/* Find the process */
305 		p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
306 		if (p == NULL) {
307 			error = ESRCH;
308 			goto error;
309 		}
310 		mutex_enter(&p->p_smutex);
311 		mutex_exit(&proclist_lock);
312 	} else {
313 		/* Use the calling process */
314 		p = l->l_proc;
315 		mutex_enter(&p->p_smutex);
316 	}
317 
318 	/*
319 	 * Check the permission.
320 	 * Disallow modification of system processes.
321 	 */
322 	error = kauth_authorize_process(l->l_cred,
323 	    KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
324 	if (error != 0) {
325 		mutex_exit(&p->p_smutex);
326 		goto error;
327 	}
328 	if ((p->p_flag & PK_SYSTEM) != 0) {
329 		mutex_exit(&p->p_smutex);
330 		error = EPERM;
331 		goto error;
332 	}
333 
334 	/* Find the LWP(s) */
335 	lcnt = 0;
336 	lid = SCARG(uap, lid);
337 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
338 		if (lid && lid != t->l_lid)
339 			continue;
340 		lwp_lock(t);
341 		if (cpuset) {
342 			/* Set the affinity flag and new CPU set */
343 			t->l_flag |= LW_AFFINITY;
344 			memcpy(&t->l_affinity, cpuset, sizeof(cpuset_t));
345 			/* Migrate to another CPU, unlocks LWP */
346 			lwp_migrate(t, ci);
347 		} else {
348 			/* Unset the affinity flag */
349 			t->l_flag &= ~LW_AFFINITY;
350 			lwp_unlock(t);
351 		}
352 		lcnt++;
353 	}
354 	mutex_exit(&p->p_smutex);
355 	if (lcnt == 0)
356 		error = ESRCH;
357 error:
358 	if (cpuset != NULL)
359 		kmem_free(cpuset, sizeof(cpuset_t));
360 	return error;
361 }
362 
363 /*
364  * Get affinity.
365  */
366 int
367 sys__sched_getaffinity(struct lwp *l,
368     const struct sys__sched_getaffinity_args *uap, register_t *retval)
369 {
370 	/* {
371 		syscallarg(pid_t) pid;
372 		syscallarg(lwpid_t) lid;
373 		syscallarg(size_t) size;
374 		syscallarg(void *) cpuset;
375 	} */
376 	struct lwp *t;
377 	void *cpuset;
378 	lwpid_t lid;
379 	int error;
380 
381 	if (SCARG(uap, size) <= 0)
382 		return EINVAL;
383 	cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
384 
385 	/* If not specified, use the first LWP */
386 	lid = SCARG(uap, lid) == 0 ? 1 : SCARG(uap, lid);
387 
388 	if (SCARG(uap, pid) != 0) {
389 		/* Locks the LWP */
390 		t = lwp_find2(SCARG(uap, pid), lid);
391 	} else {
392 		struct proc *p = l->l_proc;
393 		/* Use the calling process */
394 		mutex_enter(&p->p_smutex);
395 		t = lwp_find(p, lid);
396 		if (t != NULL)
397 			lwp_lock(t);
398 		mutex_exit(&p->p_smutex);
399 	}
400 	if (t == NULL) {
401 		kmem_free(cpuset, sizeof(cpuset_t));
402 		return ESRCH;
403 	}
404 	/* Check the permission */
405 	if (kauth_authorize_process(l->l_cred,
406 	    KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
407 		lwp_unlock(t);
408 		kmem_free(cpuset, sizeof(cpuset_t));
409 		return EPERM;
410 	}
411 	if (t->l_flag & LW_AFFINITY)
412 		memcpy(cpuset, &t->l_affinity, sizeof(cpuset_t));
413 	lwp_unlock(t);
414 
415 	error = copyout(cpuset, SCARG(uap, cpuset),
416 	    min(SCARG(uap, size), sizeof(cpuset_t)));
417 
418 	kmem_free(cpuset, sizeof(cpuset_t));
419 	return error;
420 }
421 
422 /*
423  * Yield.
424  */
425 int
426 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
427 {
428 
429 	yield();
430 	return 0;
431 }
432 
433 /*
434  * Sysctl nodes and initialization.
435  */
436 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
437 {
438 	const struct sysctlnode *node = NULL;
439 
440 	sysctl_createv(clog, 0, NULL, NULL,
441 		CTLFLAG_PERMANENT,
442 		CTLTYPE_NODE, "kern", NULL,
443 		NULL, 0, NULL, 0,
444 		CTL_KERN, CTL_EOL);
445 	sysctl_createv(clog, 0, NULL, NULL,
446 		CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
447 		CTLTYPE_INT, "posix_sched",
448 		SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
449 			     "Process Scheduling option to which the "
450 			     "system attempts to conform"),
451 		NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
452 		CTL_KERN, CTL_CREATE, CTL_EOL);
453 	sysctl_createv(clog, 0, NULL, &node,
454 		CTLFLAG_PERMANENT,
455 		CTLTYPE_NODE, "sched",
456 		SYSCTL_DESCR("Scheduler options"),
457 		NULL, 0, NULL, 0,
458 		CTL_KERN, CTL_CREATE, CTL_EOL);
459 
460 	if (node == NULL)
461 		return;
462 
463 	sysctl_createv(clog, 0, &node, NULL,
464 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
465 		CTLTYPE_INT, "pri_min",
466 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
467 		NULL, SCHED_PRI_MIN, NULL, 0,
468 		CTL_CREATE, CTL_EOL);
469 	sysctl_createv(clog, 0, &node, NULL,
470 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
471 		CTLTYPE_INT, "pri_max",
472 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
473 		NULL, SCHED_PRI_MAX, NULL, 0,
474 		CTL_CREATE, CTL_EOL);
475 }
476