xref: /netbsd-src/sys/kern/sys_sched.c (revision 9ddb6ab554e70fb9bbd90c3d96b812bc57755a14)
1 /*	$NetBSD: sys_sched.c,v 1.40 2012/02/19 21:06:56 rmind Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, 2011 Mindaugas Rasiukevicius <rmind at NetBSD org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * System calls relating to the scheduler.
31  *
32  * Lock order:
33  *
34  *	cpu_lock ->
35  *	    proc_lock ->
36  *		proc_t::p_lock ->
37  *		    lwp_t::lwp_lock
38  *
39  * TODO:
40  *  - Handle pthread_setschedprio() as defined by POSIX;
41  *  - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.40 2012/02/19 21:06:56 rmind Exp $");
46 
47 #include <sys/param.h>
48 
49 #include <sys/cpu.h>
50 #include <sys/kauth.h>
51 #include <sys/kmem.h>
52 #include <sys/lwp.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/pset.h>
56 #include <sys/sched.h>
57 #include <sys/syscallargs.h>
58 #include <sys/sysctl.h>
59 #include <sys/systm.h>
60 #include <sys/types.h>
61 #include <sys/unistd.h>
62 
63 static struct sysctllog *sched_sysctl_log;
64 static kauth_listener_t sched_listener;
65 
66 /*
67  * Convert user priority or the in-kernel priority or convert the current
68  * priority to the appropriate range according to the policy change.
69  */
70 static pri_t
71 convert_pri(lwp_t *l, int policy, pri_t pri)
72 {
73 
74 	/* Convert user priority to the in-kernel */
75 	if (pri != PRI_NONE) {
76 		/* Only for real-time threads */
77 		KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
78 		KASSERT(policy != SCHED_OTHER);
79 		return PRI_USER_RT + pri;
80 	}
81 
82 	/* Neither policy, nor priority change */
83 	if (l->l_class == policy)
84 		return l->l_priority;
85 
86 	/* Time-sharing -> real-time */
87 	if (l->l_class == SCHED_OTHER) {
88 		KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
89 		return PRI_USER_RT;
90 	}
91 
92 	/* Real-time -> time-sharing */
93 	if (policy == SCHED_OTHER) {
94 		KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
95 		return l->l_priority - PRI_USER_RT;
96 	}
97 
98 	/* Real-time -> real-time */
99 	return l->l_priority;
100 }
101 
102 int
103 do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
104     const struct sched_param *params)
105 {
106 	struct proc *p;
107 	struct lwp *t;
108 	pri_t pri;
109 	u_int lcnt;
110 	int error;
111 
112 	error = 0;
113 
114 	pri = params->sched_priority;
115 
116 	/* If no parameters specified, just return (this should not happen) */
117 	if (pri == PRI_NONE && policy == SCHED_NONE)
118 		return 0;
119 
120 	/* Validate scheduling class */
121 	if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
122 		return EINVAL;
123 
124 	/* Validate priority */
125 	if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
126 		return EINVAL;
127 
128 	if (pid != 0) {
129 		/* Find the process */
130 		mutex_enter(proc_lock);
131 		p = proc_find(pid);
132 		if (p == NULL) {
133 			mutex_exit(proc_lock);
134 			return ESRCH;
135 		}
136 		mutex_enter(p->p_lock);
137 		mutex_exit(proc_lock);
138 		/* Disallow modification of system processes */
139 		if ((p->p_flag & PK_SYSTEM) != 0) {
140 			mutex_exit(p->p_lock);
141 			return EPERM;
142 		}
143 	} else {
144 		/* Use the calling process */
145 		p = curlwp->l_proc;
146 		mutex_enter(p->p_lock);
147 	}
148 
149 	/* Find the LWP(s) */
150 	lcnt = 0;
151 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
152 		pri_t kpri;
153 		int lpolicy;
154 
155 		if (lid && lid != t->l_lid)
156 			continue;
157 
158 		lcnt++;
159 		lwp_lock(t);
160 		lpolicy = (policy == SCHED_NONE) ? t->l_class : policy;
161 
162 		/* Disallow setting of priority for SCHED_OTHER threads */
163 		if (lpolicy == SCHED_OTHER && pri != PRI_NONE) {
164 			lwp_unlock(t);
165 			error = EINVAL;
166 			break;
167 		}
168 
169 		/* Convert priority, if needed */
170 		kpri = convert_pri(t, lpolicy, pri);
171 
172 		/* Check the permission */
173 		error = kauth_authorize_process(kauth_cred_get(),
174 		    KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
175 		    KAUTH_ARG(kpri));
176 		if (error) {
177 			lwp_unlock(t);
178 			break;
179 		}
180 
181 		/* Set the scheduling class, change the priority */
182 		t->l_class = lpolicy;
183 		lwp_changepri(t, kpri);
184 		lwp_unlock(t);
185 	}
186 	mutex_exit(p->p_lock);
187 	return (lcnt == 0) ? ESRCH : error;
188 }
189 
190 /*
191  * Set scheduling parameters.
192  */
193 int
194 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
195     register_t *retval)
196 {
197 	/* {
198 		syscallarg(pid_t) pid;
199 		syscallarg(lwpid_t) lid;
200 		syscallarg(int) policy;
201 		syscallarg(const struct sched_param *) params;
202 	} */
203 	struct sched_param params;
204 	int error;
205 
206 	/* Get the parameters from the user-space */
207 	error = copyin(SCARG(uap, params), &params, sizeof(params));
208 	if (error)
209 		goto out;
210 
211 	error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
212 	    SCARG(uap, policy), &params);
213 out:
214 	return error;
215 }
216 
217 int
218 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
219     struct sched_param *params)
220 {
221 	struct sched_param lparams;
222 	struct lwp *t;
223 	int error, lpolicy;
224 
225 	/* Locks the LWP */
226 	t = lwp_find2(pid, lid);
227 	if (t == NULL)
228 		return ESRCH;
229 
230 	/* Check the permission */
231 	error = kauth_authorize_process(kauth_cred_get(),
232 	    KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
233 	if (error != 0) {
234 		mutex_exit(t->l_proc->p_lock);
235 		return error;
236 	}
237 
238 	lwp_lock(t);
239 	lparams.sched_priority = t->l_priority;
240 	lpolicy = t->l_class;
241 
242 	switch (lpolicy) {
243 	case SCHED_OTHER:
244 		lparams.sched_priority -= PRI_USER;
245 		break;
246 	case SCHED_RR:
247 	case SCHED_FIFO:
248 		lparams.sched_priority -= PRI_USER_RT;
249 		break;
250 	}
251 
252 	if (policy != NULL)
253 		*policy = lpolicy;
254 
255 	if (params != NULL)
256 		*params = lparams;
257 
258 	lwp_unlock(t);
259 	mutex_exit(t->l_proc->p_lock);
260 	return error;
261 }
262 
263 /*
264  * Get scheduling parameters.
265  */
266 int
267 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
268     register_t *retval)
269 {
270 	/* {
271 		syscallarg(pid_t) pid;
272 		syscallarg(lwpid_t) lid;
273 		syscallarg(int *) policy;
274 		syscallarg(struct sched_param *) params;
275 	} */
276 	struct sched_param params;
277 	int error, policy;
278 
279 	error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
280 	    &params);
281 	if (error)
282 		goto out;
283 
284 	error = copyout(&params, SCARG(uap, params), sizeof(params));
285 	if (error == 0 && SCARG(uap, policy) != NULL)
286 		error = copyout(&policy, SCARG(uap, policy), sizeof(int));
287 out:
288 	return error;
289 }
290 
291 /*
292  * Allocate the CPU set, and get it from userspace.
293  */
294 static int
295 genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size)
296 {
297 	kcpuset_t *kset;
298 	int error;
299 
300 	kcpuset_create(&kset, false);
301 	error = kcpuset_copyin(sset, kset, size);
302 	if (error) {
303 		kcpuset_unuse(kset, NULL);
304 	} else {
305 		*dset = kset;
306 	}
307 	return error;
308 }
309 
310 /*
311  * Set affinity.
312  */
313 int
314 sys__sched_setaffinity(struct lwp *l,
315     const struct sys__sched_setaffinity_args *uap, register_t *retval)
316 {
317 	/* {
318 		syscallarg(pid_t) pid;
319 		syscallarg(lwpid_t) lid;
320 		syscallarg(size_t) size;
321 		syscallarg(const cpuset_t *) cpuset;
322 	} */
323 	kcpuset_t *kcset, *kcpulst = NULL;
324 	struct cpu_info *ici, *ci;
325 	struct proc *p;
326 	struct lwp *t;
327 	CPU_INFO_ITERATOR cii;
328 	bool alloff;
329 	lwpid_t lid;
330 	u_int lcnt;
331 	int error;
332 
333 	error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size));
334 	if (error)
335 		return error;
336 
337 	/*
338 	 * Traverse _each_ CPU to:
339 	 *  - Check that CPUs in the mask have no assigned processor set.
340 	 *  - Check that at least one CPU from the mask is online.
341 	 *  - Find the first target CPU to migrate.
342 	 *
343 	 * To avoid the race with CPU online/offline calls and processor sets,
344 	 * cpu_lock will be locked for the entire operation.
345 	 */
346 	ci = NULL;
347 	alloff = false;
348 	mutex_enter(&cpu_lock);
349 	for (CPU_INFO_FOREACH(cii, ici)) {
350 		struct schedstate_percpu *ispc;
351 
352 		if (!kcpuset_isset(kcset, cpu_index(ici))) {
353 			continue;
354 		}
355 
356 		ispc = &ici->ci_schedstate;
357 		/* Check that CPU is not in the processor-set */
358 		if (ispc->spc_psid != PS_NONE) {
359 			error = EPERM;
360 			goto out;
361 		}
362 		/* Skip offline CPUs */
363 		if (ispc->spc_flags & SPCF_OFFLINE) {
364 			alloff = true;
365 			continue;
366 		}
367 		/* Target CPU to migrate */
368 		if (ci == NULL) {
369 			ci = ici;
370 		}
371 	}
372 	if (ci == NULL) {
373 		if (alloff) {
374 			/* All CPUs in the set are offline */
375 			error = EPERM;
376 			goto out;
377 		}
378 		/* Empty set */
379 		kcpuset_unuse(kcset, &kcpulst);
380 		kcset = NULL;
381 	}
382 
383 	if (SCARG(uap, pid) != 0) {
384 		/* Find the process */
385 		mutex_enter(proc_lock);
386 		p = proc_find(SCARG(uap, pid));
387 		if (p == NULL) {
388 			mutex_exit(proc_lock);
389 			error = ESRCH;
390 			goto out;
391 		}
392 		mutex_enter(p->p_lock);
393 		mutex_exit(proc_lock);
394 		/* Disallow modification of system processes. */
395 		if ((p->p_flag & PK_SYSTEM) != 0) {
396 			mutex_exit(p->p_lock);
397 			error = EPERM;
398 			goto out;
399 		}
400 	} else {
401 		/* Use the calling process */
402 		p = l->l_proc;
403 		mutex_enter(p->p_lock);
404 	}
405 
406 	/*
407 	 * Check the permission.
408 	 */
409 	error = kauth_authorize_process(l->l_cred,
410 	    KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
411 	if (error != 0) {
412 		mutex_exit(p->p_lock);
413 		goto out;
414 	}
415 
416 	/* Iterate through LWP(s). */
417 	lcnt = 0;
418 	lid = SCARG(uap, lid);
419 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
420 		if (lid && lid != t->l_lid) {
421 			continue;
422 		}
423 		lwp_lock(t);
424 		/* No affinity for zombie LWPs. */
425 		if (t->l_stat == LSZOMB) {
426 			lwp_unlock(t);
427 			continue;
428 		}
429 		/* First, release existing affinity, if any. */
430 		if (t->l_affinity) {
431 			kcpuset_unuse(t->l_affinity, &kcpulst);
432 		}
433 		if (kcset) {
434 			/*
435 			 * Hold a reference on affinity mask, assign mask to
436 			 * LWP and migrate it to another CPU (unlocks LWP).
437 			 */
438 			kcpuset_use(kcset);
439 			t->l_affinity = kcset;
440 			lwp_migrate(t, ci);
441 		} else {
442 			/* Old affinity mask is released, just clear. */
443 			t->l_affinity = NULL;
444 			lwp_unlock(t);
445 		}
446 		lcnt++;
447 	}
448 	mutex_exit(p->p_lock);
449 	if (lcnt == 0) {
450 		error = ESRCH;
451 	}
452 out:
453 	mutex_exit(&cpu_lock);
454 
455 	/*
456 	 * Drop the initial reference (LWPs, if any, have the ownership now),
457 	 * and destroy whatever is in the G/C list, if filled.
458 	 */
459 	if (kcset) {
460 		kcpuset_unuse(kcset, &kcpulst);
461 	}
462 	if (kcpulst) {
463 		kcpuset_destroy(kcpulst);
464 	}
465 	return error;
466 }
467 
468 /*
469  * Get affinity.
470  */
471 int
472 sys__sched_getaffinity(struct lwp *l,
473     const struct sys__sched_getaffinity_args *uap, register_t *retval)
474 {
475 	/* {
476 		syscallarg(pid_t) pid;
477 		syscallarg(lwpid_t) lid;
478 		syscallarg(size_t) size;
479 		syscallarg(cpuset_t *) cpuset;
480 	} */
481 	struct lwp *t;
482 	kcpuset_t *kcset;
483 	int error;
484 
485 	error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size));
486 	if (error)
487 		return error;
488 
489 	/* Locks the LWP */
490 	t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
491 	if (t == NULL) {
492 		error = ESRCH;
493 		goto out;
494 	}
495 	/* Check the permission */
496 	if (kauth_authorize_process(l->l_cred,
497 	    KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
498 		mutex_exit(t->l_proc->p_lock);
499 		error = EPERM;
500 		goto out;
501 	}
502 	lwp_lock(t);
503 	if (t->l_affinity) {
504 		kcpuset_copy(kcset, t->l_affinity);
505 	} else {
506 		kcpuset_zero(kcset);
507 	}
508 	lwp_unlock(t);
509 	mutex_exit(t->l_proc->p_lock);
510 
511 	error = kcpuset_copyout(kcset, SCARG(uap, cpuset), SCARG(uap, size));
512 out:
513 	kcpuset_unuse(kcset, NULL);
514 	return error;
515 }
516 
517 /*
518  * Yield.
519  */
520 int
521 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
522 {
523 
524 	yield();
525 	return 0;
526 }
527 
528 /*
529  * Sysctl nodes and initialization.
530  */
531 static void
532 sysctl_sched_setup(struct sysctllog **clog)
533 {
534 	const struct sysctlnode *node = NULL;
535 
536 	sysctl_createv(clog, 0, NULL, NULL,
537 		CTLFLAG_PERMANENT,
538 		CTLTYPE_NODE, "kern", NULL,
539 		NULL, 0, NULL, 0,
540 		CTL_KERN, CTL_EOL);
541 	sysctl_createv(clog, 0, NULL, NULL,
542 		CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
543 		CTLTYPE_INT, "posix_sched",
544 		SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
545 			     "Process Scheduling option to which the "
546 			     "system attempts to conform"),
547 		NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
548 		CTL_KERN, CTL_CREATE, CTL_EOL);
549 	sysctl_createv(clog, 0, NULL, &node,
550 		CTLFLAG_PERMANENT,
551 		CTLTYPE_NODE, "sched",
552 		SYSCTL_DESCR("Scheduler options"),
553 		NULL, 0, NULL, 0,
554 		CTL_KERN, CTL_CREATE, CTL_EOL);
555 
556 	if (node == NULL)
557 		return;
558 
559 	sysctl_createv(clog, 0, &node, NULL,
560 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
561 		CTLTYPE_INT, "pri_min",
562 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
563 		NULL, SCHED_PRI_MIN, NULL, 0,
564 		CTL_CREATE, CTL_EOL);
565 	sysctl_createv(clog, 0, &node, NULL,
566 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
567 		CTLTYPE_INT, "pri_max",
568 		SYSCTL_DESCR("Maximal POSIX real-time priority"),
569 		NULL, SCHED_PRI_MAX, NULL, 0,
570 		CTL_CREATE, CTL_EOL);
571 }
572 
573 static int
574 sched_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
575     void *arg0, void *arg1, void *arg2, void *arg3)
576 {
577 	struct proc *p;
578 	int result;
579 
580 	result = KAUTH_RESULT_DEFER;
581 	p = arg0;
582 
583 	switch (action) {
584 	case KAUTH_PROCESS_SCHEDULER_GETPARAM:
585 		if (kauth_cred_uidmatch(cred, p->p_cred))
586 			result = KAUTH_RESULT_ALLOW;
587 		break;
588 
589 	case KAUTH_PROCESS_SCHEDULER_SETPARAM:
590 		if (kauth_cred_uidmatch(cred, p->p_cred)) {
591 			struct lwp *l;
592 			int policy;
593 			pri_t priority;
594 
595 			l = arg1;
596 			policy = (int)(unsigned long)arg2;
597 			priority = (pri_t)(unsigned long)arg3;
598 
599 			if ((policy == l->l_class ||
600 			    (policy != SCHED_FIFO && policy != SCHED_RR)) &&
601 			    priority <= l->l_priority)
602 				result = KAUTH_RESULT_ALLOW;
603 		}
604 
605 		break;
606 
607 	case KAUTH_PROCESS_SCHEDULER_GETAFFINITY:
608 		result = KAUTH_RESULT_ALLOW;
609 		break;
610 
611 	case KAUTH_PROCESS_SCHEDULER_SETAFFINITY:
612 		/* Privileged; we let the secmodel handle this. */
613 		break;
614 
615 	default:
616 		break;
617 	}
618 
619 	return result;
620 }
621 
622 void
623 sched_init(void)
624 {
625 
626 	sysctl_sched_setup(&sched_sysctl_log);
627 
628 	sched_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
629 	    sched_listener_cb, NULL);
630 }
631