xref: /netbsd-src/sys/kern/sys_sched.c (revision 0dec6ba3d54d2339e16436c919b6bf3e2f335935)
1*0dec6ba3Sriastradh /*	$NetBSD: sys_sched.c,v 1.50 2023/04/09 09:18:09 riastradh Exp $	*/
2606e323bSad 
35c71a4d4Srmind /*
452b220e9Srmind  * Copyright (c) 2008, 2011 Mindaugas Rasiukevicius <rmind at NetBSD org>
5606e323bSad  * All rights reserved.
6606e323bSad  *
7606e323bSad  * Redistribution and use in source and binary forms, with or without
8606e323bSad  * modification, are permitted provided that the following conditions
9606e323bSad  * are met:
10606e323bSad  * 1. Redistributions of source code must retain the above copyright
11606e323bSad  *    notice, this list of conditions and the following disclaimer.
12606e323bSad  * 2. Redistributions in binary form must reproduce the above copyright
13606e323bSad  *    notice, this list of conditions and the following disclaimer in the
14606e323bSad  *    documentation and/or other materials provided with the distribution.
15606e323bSad  *
169850c055Srmind  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
179850c055Srmind  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
189850c055Srmind  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
199850c055Srmind  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
209850c055Srmind  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
219850c055Srmind  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
229850c055Srmind  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
239850c055Srmind  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
249850c055Srmind  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
259850c055Srmind  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
269850c055Srmind  * SUCH DAMAGE.
27606e323bSad  */
28606e323bSad 
295c71a4d4Srmind /*
309a0b455fSad  * System calls relating to the scheduler.
319a0b455fSad  *
328f1873eaSrmind  * Lock order:
338f1873eaSrmind  *
348f1873eaSrmind  *	cpu_lock ->
358f1873eaSrmind  *	    proc_lock ->
368f1873eaSrmind  *		proc_t::p_lock ->
378f1873eaSrmind  *		    lwp_t::lwp_lock
388f1873eaSrmind  *
395c71a4d4Srmind  * TODO:
405c71a4d4Srmind  *  - Handle pthread_setschedprio() as defined by POSIX;
415c71a4d4Srmind  */
425c71a4d4Srmind 
43606e323bSad #include <sys/cdefs.h>
44*0dec6ba3Sriastradh __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.50 2023/04/09 09:18:09 riastradh Exp $");
45606e323bSad 
46606e323bSad #include <sys/param.h>
47606e323bSad 
485c71a4d4Srmind #include <sys/cpu.h>
495c71a4d4Srmind #include <sys/kauth.h>
505c71a4d4Srmind #include <sys/kmem.h>
515c71a4d4Srmind #include <sys/lwp.h>
525c71a4d4Srmind #include <sys/mutex.h>
535c71a4d4Srmind #include <sys/proc.h>
545c71a4d4Srmind #include <sys/pset.h>
555c71a4d4Srmind #include <sys/sched.h>
565c71a4d4Srmind #include <sys/syscallargs.h>
575c71a4d4Srmind #include <sys/sysctl.h>
585c71a4d4Srmind #include <sys/systm.h>
595c71a4d4Srmind #include <sys/types.h>
605c71a4d4Srmind #include <sys/unistd.h>
615c71a4d4Srmind 
62b2f37683Selad static struct sysctllog *sched_sysctl_log;
63b2f37683Selad static kauth_listener_t sched_listener;
64b2f37683Selad 
655c71a4d4Srmind /*
66b5e9adddSrmind  * Convert user priority or the in-kernel priority or convert the current
67b5e9adddSrmind  * priority to the appropriate range according to the policy change.
68b5e9adddSrmind  */
69b5e9adddSrmind static pri_t
convert_pri(lwp_t * l,int policy,pri_t pri)70b5e9adddSrmind convert_pri(lwp_t *l, int policy, pri_t pri)
71b5e9adddSrmind {
72b5e9adddSrmind 
73b5e9adddSrmind 	/* Convert user priority to the in-kernel */
74d5ea013eSrmind 	if (pri != PRI_NONE) {
75d5ea013eSrmind 		/* Only for real-time threads */
76*0dec6ba3Sriastradh 		KASSERT(pri >= SCHED_PRI_MIN);
77*0dec6ba3Sriastradh 		KASSERT(pri <= SCHED_PRI_MAX);
78d5ea013eSrmind 		KASSERT(policy != SCHED_OTHER);
79d5ea013eSrmind 		return PRI_USER_RT + pri;
80b5e9adddSrmind 	}
81d5ea013eSrmind 
82d5ea013eSrmind 	/* Neither policy, nor priority change */
83b5e9adddSrmind 	if (l->l_class == policy)
84b5e9adddSrmind 		return l->l_priority;
85b5e9adddSrmind 
86d5ea013eSrmind 	/* Time-sharing -> real-time */
87b5e9adddSrmind 	if (l->l_class == SCHED_OTHER) {
88b5e9adddSrmind 		KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
89d5ea013eSrmind 		return PRI_USER_RT;
90b5e9adddSrmind 	}
91d5ea013eSrmind 
92d5ea013eSrmind 	/* Real-time -> time-sharing */
93b5e9adddSrmind 	if (policy == SCHED_OTHER) {
94b5e9adddSrmind 		KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
956e7d55c5Syamt 		/*
966e7d55c5Syamt 		 * this is a bit arbitrary because the priority is dynamic
976e7d55c5Syamt 		 * for SCHED_OTHER threads and will likely be changed by
986e7d55c5Syamt 		 * the scheduler soon anyway.
996e7d55c5Syamt 		 */
100d5ea013eSrmind 		return l->l_priority - PRI_USER_RT;
101b5e9adddSrmind 	}
102d5ea013eSrmind 
103d5ea013eSrmind 	/* Real-time -> real-time */
104d5ea013eSrmind 	return l->l_priority;
105b5e9adddSrmind }
106b5e9adddSrmind 
1075c71a4d4Srmind int
do_sched_setparam(pid_t pid,lwpid_t lid,int policy,const struct sched_param * params)10867470a76Selad do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
10967470a76Selad     const struct sched_param *params)
1105c71a4d4Srmind {
1115c71a4d4Srmind 	struct proc *p;
1125c71a4d4Srmind 	struct lwp *t;
1135c71a4d4Srmind 	pri_t pri;
11467470a76Selad 	u_int lcnt;
1155c71a4d4Srmind 	int error;
1165c71a4d4Srmind 
11767470a76Selad 	error = 0;
11867470a76Selad 
11967470a76Selad 	pri = params->sched_priority;
1205c71a4d4Srmind 
121b5e9adddSrmind 	/* If no parameters specified, just return (this should not happen) */
122b5e9adddSrmind 	if (pri == PRI_NONE && policy == SCHED_NONE)
123b5e9adddSrmind 		return 0;
124b5e9adddSrmind 
125b5e9adddSrmind 	/* Validate scheduling class */
126b5e9adddSrmind 	if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
127b5e9adddSrmind 		return EINVAL;
128b5e9adddSrmind 
129b5e9adddSrmind 	/* Validate priority */
130b5e9adddSrmind 	if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
131b5e9adddSrmind 		return EINVAL;
132b5e9adddSrmind 
13367470a76Selad 	if (pid != 0) {
1345c71a4d4Srmind 		/* Find the process */
1350eaaa024Sad 		mutex_enter(&proc_lock);
1363c507045Srmind 		p = proc_find(pid);
1376d70f903Sad 		if (p == NULL) {
1380eaaa024Sad 			mutex_exit(&proc_lock);
139b5e9adddSrmind 			return ESRCH;
1406d70f903Sad 		}
141284c2b9aSad 		mutex_enter(p->p_lock);
1420eaaa024Sad 		mutex_exit(&proc_lock);
1435c71a4d4Srmind 		/* Disallow modification of system processes */
1449a0b455fSad 		if ((p->p_flag & PK_SYSTEM) != 0) {
145284c2b9aSad 			mutex_exit(p->p_lock);
146b5e9adddSrmind 			return EPERM;
147b5e9adddSrmind 		}
148b5e9adddSrmind 	} else {
149b5e9adddSrmind 		/* Use the calling process */
15067470a76Selad 		p = curlwp->l_proc;
151284c2b9aSad 		mutex_enter(p->p_lock);
1525c71a4d4Srmind 	}
1535c71a4d4Srmind 
1545c71a4d4Srmind 	/* Find the LWP(s) */
1555c71a4d4Srmind 	lcnt = 0;
1565c71a4d4Srmind 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
157b5e9adddSrmind 		pri_t kpri;
1580bb7f5ccSelad 		int lpolicy;
1595c71a4d4Srmind 
1605c71a4d4Srmind 		if (lid && lid != t->l_lid)
1615c71a4d4Srmind 			continue;
162d5ea013eSrmind 
163cc0caabcSdrochner 		lcnt++;
164b5e9adddSrmind 		lwp_lock(t);
165d5ea013eSrmind 		lpolicy = (policy == SCHED_NONE) ? t->l_class : policy;
166b5e9adddSrmind 
167d5ea013eSrmind 		/* Disallow setting of priority for SCHED_OTHER threads */
168a8552a3aSrmind 		if (lpolicy == SCHED_OTHER && pri != PRI_NONE) {
169d5ea013eSrmind 			lwp_unlock(t);
170d5ea013eSrmind 			error = EINVAL;
171d5ea013eSrmind 			break;
172d5ea013eSrmind 		}
1730bb7f5ccSelad 
174d5ea013eSrmind 		/* Convert priority, if needed */
1750bb7f5ccSelad 		kpri = convert_pri(t, lpolicy, pri);
1760bb7f5ccSelad 
1770bb7f5ccSelad 		/* Check the permission */
17867470a76Selad 		error = kauth_authorize_process(kauth_cred_get(),
1790bb7f5ccSelad 		    KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
1800bb7f5ccSelad 		    KAUTH_ARG(kpri));
181aa57485fSyamt 		if (error) {
182aa57485fSyamt 			lwp_unlock(t);
1830bb7f5ccSelad 			break;
184aa57485fSyamt 		}
1855c71a4d4Srmind 
186d5ea013eSrmind 		/* Set the scheduling class, change the priority */
187d5ea013eSrmind 		t->l_class = lpolicy;
188b5e9adddSrmind 		lwp_changepri(t, kpri);
1895c71a4d4Srmind 		lwp_unlock(t);
1905c71a4d4Srmind 	}
191284c2b9aSad 	mutex_exit(p->p_lock);
192b5e9adddSrmind 	return (lcnt == 0) ? ESRCH : error;
1935c71a4d4Srmind }
1945c71a4d4Srmind 
1955c71a4d4Srmind /*
19667470a76Selad  * Set scheduling parameters.
19767470a76Selad  */
19867470a76Selad int
sys__sched_setparam(struct lwp * l,const struct sys__sched_setparam_args * uap,register_t * retval)19967470a76Selad sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
20067470a76Selad     register_t *retval)
20167470a76Selad {
20267470a76Selad 	/* {
20367470a76Selad 		syscallarg(pid_t) pid;
20467470a76Selad 		syscallarg(lwpid_t) lid;
20567470a76Selad 		syscallarg(int) policy;
20667470a76Selad 		syscallarg(const struct sched_param *) params;
20767470a76Selad 	} */
20867470a76Selad 	struct sched_param params;
20967470a76Selad 	int error;
21067470a76Selad 
21167470a76Selad 	/* Get the parameters from the user-space */
21267470a76Selad 	error = copyin(SCARG(uap, params), &params, sizeof(params));
21367470a76Selad 	if (error)
21467470a76Selad 		goto out;
21567470a76Selad 
21667470a76Selad 	error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
21767470a76Selad 	    SCARG(uap, policy), &params);
21867470a76Selad out:
2198f1873eaSrmind 	return error;
22067470a76Selad }
22167470a76Selad 
2226e7d55c5Syamt /*
2236e7d55c5Syamt  * do_sched_getparam:
2246e7d55c5Syamt  *
2256e7d55c5Syamt  * if lid=0, returns the parameter of the first LWP in the process.
2266e7d55c5Syamt  */
22767470a76Selad int
do_sched_getparam(pid_t pid,lwpid_t lid,int * policy,struct sched_param * params)22867470a76Selad do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
22967470a76Selad     struct sched_param *params)
23067470a76Selad {
23167470a76Selad 	struct sched_param lparams;
23267470a76Selad 	struct lwp *t;
23367470a76Selad 	int error, lpolicy;
23467470a76Selad 
2356925a27fSthorpej 	if (pid < 0 || lid < 0)
2366925a27fSthorpej 		return EINVAL;
2376925a27fSthorpej 
2386e7d55c5Syamt 	t = lwp_find2(pid, lid); /* acquire p_lock */
239284c2b9aSad 	if (t == NULL)
240284c2b9aSad 		return ESRCH;
24167470a76Selad 
24267470a76Selad 	/* Check the permission */
24367470a76Selad 	error = kauth_authorize_process(kauth_cred_get(),
24467470a76Selad 	    KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
24567470a76Selad 	if (error != 0) {
246284c2b9aSad 		mutex_exit(t->l_proc->p_lock);
247284c2b9aSad 		return error;
24867470a76Selad 	}
24967470a76Selad 
250284c2b9aSad 	lwp_lock(t);
25167470a76Selad 	lparams.sched_priority = t->l_priority;
25267470a76Selad 	lpolicy = t->l_class;
2536e7d55c5Syamt 	lwp_unlock(t);
2546e7d55c5Syamt 	mutex_exit(t->l_proc->p_lock);
25567470a76Selad 
2566e7d55c5Syamt 	/*
2576e7d55c5Syamt 	 * convert to the user-visible priority value.
2586e7d55c5Syamt 	 * it's an inversion of convert_pri().
2596e7d55c5Syamt 	 *
2606e7d55c5Syamt 	 * the SCHED_OTHER case is a bit arbitrary given that
2616e7d55c5Syamt 	 *	- we don't allow setting the priority.
2626e7d55c5Syamt 	 *	- the priority is dynamic.
2636e7d55c5Syamt 	 */
26467470a76Selad 	switch (lpolicy) {
26567470a76Selad 	case SCHED_OTHER:
26667470a76Selad 		lparams.sched_priority -= PRI_USER;
26767470a76Selad 		break;
26867470a76Selad 	case SCHED_RR:
26967470a76Selad 	case SCHED_FIFO:
27067470a76Selad 		lparams.sched_priority -= PRI_USER_RT;
27167470a76Selad 		break;
27267470a76Selad 	}
27367470a76Selad 
27467470a76Selad 	if (policy != NULL)
27567470a76Selad 		*policy = lpolicy;
27667470a76Selad 
27767470a76Selad 	if (params != NULL)
27867470a76Selad 		*params = lparams;
27967470a76Selad 
28067470a76Selad 	return error;
28167470a76Selad }
28267470a76Selad 
28367470a76Selad /*
2845c71a4d4Srmind  * Get scheduling parameters.
2855c71a4d4Srmind  */
2865c71a4d4Srmind int
sys__sched_getparam(struct lwp * l,const struct sys__sched_getparam_args * uap,register_t * retval)2875c71a4d4Srmind sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
2885c71a4d4Srmind     register_t *retval)
2895c71a4d4Srmind {
2905c71a4d4Srmind 	/* {
2915c71a4d4Srmind 		syscallarg(pid_t) pid;
2925c71a4d4Srmind 		syscallarg(lwpid_t) lid;
29316b042cbSyamt 		syscallarg(int *) policy;
2945c71a4d4Srmind 		syscallarg(struct sched_param *) params;
2955c71a4d4Srmind 	} */
29667470a76Selad 	struct sched_param params;
29716b042cbSyamt 	int error, policy;
2985c71a4d4Srmind 
29967470a76Selad 	error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
30067470a76Selad 	    &params);
30167470a76Selad 	if (error)
30267470a76Selad 		goto out;
30316b042cbSyamt 
30467470a76Selad 	error = copyout(&params, SCARG(uap, params), sizeof(params));
30516b042cbSyamt 	if (error == 0 && SCARG(uap, policy) != NULL)
30616b042cbSyamt 		error = copyout(&policy, SCARG(uap, policy), sizeof(int));
30767470a76Selad out:
3088f1873eaSrmind 	return error;
3095c71a4d4Srmind }
3105c71a4d4Srmind 
3118f1873eaSrmind /*
3128f1873eaSrmind  * Allocate the CPU set, and get it from userspace.
3138f1873eaSrmind  */
314f30b5785Schristos static int
genkcpuset(kcpuset_t ** dset,const cpuset_t * sset,size_t size)3151d875fc7Schristos genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size)
316f30b5785Schristos {
31752b220e9Srmind 	kcpuset_t *kset;
318f30b5785Schristos 	int error;
319f30b5785Schristos 
3200c794722Srmind 	kcpuset_create(&kset, true);
32152b220e9Srmind 	error = kcpuset_copyin(sset, kset, size);
32252b220e9Srmind 	if (error) {
32352b220e9Srmind 		kcpuset_unuse(kset, NULL);
32452b220e9Srmind 	} else {
32552b220e9Srmind 		*dset = kset;
32652b220e9Srmind 	}
327f30b5785Schristos 	return error;
328f30b5785Schristos }
329f30b5785Schristos 
3305c71a4d4Srmind /*
3315c71a4d4Srmind  * Set affinity.
3325c71a4d4Srmind  */
3335c71a4d4Srmind int
sys__sched_setaffinity(struct lwp * l,const struct sys__sched_setaffinity_args * uap,register_t * retval)3345c71a4d4Srmind sys__sched_setaffinity(struct lwp *l,
3355c71a4d4Srmind     const struct sys__sched_setaffinity_args *uap, register_t *retval)
3365c71a4d4Srmind {
3375c71a4d4Srmind 	/* {
3385c71a4d4Srmind 		syscallarg(pid_t) pid;
3395c71a4d4Srmind 		syscallarg(lwpid_t) lid;
3405c71a4d4Srmind 		syscallarg(size_t) size;
341f30b5785Schristos 		syscallarg(const cpuset_t *) cpuset;
3425c71a4d4Srmind 	} */
34352b220e9Srmind 	kcpuset_t *kcset, *kcpulst = NULL;
344909e7f42Srmind 	struct cpu_info *ici, *ci;
3455c71a4d4Srmind 	struct proc *p;
3465c71a4d4Srmind 	struct lwp *t;
3475c71a4d4Srmind 	CPU_INFO_ITERATOR cii;
348909e7f42Srmind 	bool alloff;
3495c71a4d4Srmind 	lwpid_t lid;
3505c71a4d4Srmind 	u_int lcnt;
3515c71a4d4Srmind 	int error;
3525c71a4d4Srmind 
35352b220e9Srmind 	error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size));
3548f1873eaSrmind 	if (error)
355f30b5785Schristos 		return error;
3565c71a4d4Srmind 
3578f1873eaSrmind 	/*
358909e7f42Srmind 	 * Traverse _each_ CPU to:
359909e7f42Srmind 	 *  - Check that CPUs in the mask have no assigned processor set.
360909e7f42Srmind 	 *  - Check that at least one CPU from the mask is online.
361909e7f42Srmind 	 *  - Find the first target CPU to migrate.
3628f1873eaSrmind 	 *
363909e7f42Srmind 	 * To avoid the race with CPU online/offline calls and processor sets,
364909e7f42Srmind 	 * cpu_lock will be locked for the entire operation.
3658f1873eaSrmind 	 */
366909e7f42Srmind 	ci = NULL;
367909e7f42Srmind 	alloff = false;
3688f1873eaSrmind 	mutex_enter(&cpu_lock);
369909e7f42Srmind 	for (CPU_INFO_FOREACH(cii, ici)) {
370909e7f42Srmind 		struct schedstate_percpu *ispc;
3718f1873eaSrmind 
372f7666738Srmind 		if (!kcpuset_isset(kcset, cpu_index(ici))) {
3738f1873eaSrmind 			continue;
374f7666738Srmind 		}
375909e7f42Srmind 
376909e7f42Srmind 		ispc = &ici->ci_schedstate;
377909e7f42Srmind 		/* Check that CPU is not in the processor-set */
378909e7f42Srmind 		if (ispc->spc_psid != PS_NONE) {
379909e7f42Srmind 			error = EPERM;
380909e7f42Srmind 			goto out;
381909e7f42Srmind 		}
382909e7f42Srmind 		/* Skip offline CPUs */
383909e7f42Srmind 		if (ispc->spc_flags & SPCF_OFFLINE) {
384909e7f42Srmind 			alloff = true;
3858f1873eaSrmind 			continue;
386a6092d3cSrmind 		}
387909e7f42Srmind 		/* Target CPU to migrate */
388909e7f42Srmind 		if (ci == NULL) {
389909e7f42Srmind 			ci = ici;
390909e7f42Srmind 		}
391f30b5785Schristos 	}
3925c71a4d4Srmind 	if (ci == NULL) {
393909e7f42Srmind 		if (alloff) {
3948f1873eaSrmind 			/* All CPUs in the set are offline */
3958f1873eaSrmind 			error = EPERM;
3968f1873eaSrmind 			goto out;
3978f1873eaSrmind 		}
3985c71a4d4Srmind 		/* Empty set */
39952b220e9Srmind 		kcpuset_unuse(kcset, &kcpulst);
40052b220e9Srmind 		kcset = NULL;
4015c71a4d4Srmind 	}
4025c71a4d4Srmind 
403b5e9adddSrmind 	if (SCARG(uap, pid) != 0) {
4045c71a4d4Srmind 		/* Find the process */
4050eaaa024Sad 		mutex_enter(&proc_lock);
4063c507045Srmind 		p = proc_find(SCARG(uap, pid));
4075c71a4d4Srmind 		if (p == NULL) {
4080eaaa024Sad 			mutex_exit(&proc_lock);
4095c71a4d4Srmind 			error = ESRCH;
410f30b5785Schristos 			goto out;
4115c71a4d4Srmind 		}
412284c2b9aSad 		mutex_enter(p->p_lock);
4130eaaa024Sad 		mutex_exit(&proc_lock);
4149a0b455fSad 		/* Disallow modification of system processes. */
4159a0b455fSad 		if ((p->p_flag & PK_SYSTEM) != 0) {
416284c2b9aSad 			mutex_exit(p->p_lock);
4179a0b455fSad 			error = EPERM;
418f30b5785Schristos 			goto out;
4199a0b455fSad 		}
420b5e9adddSrmind 	} else {
421b5e9adddSrmind 		/* Use the calling process */
422b5e9adddSrmind 		p = l->l_proc;
423284c2b9aSad 		mutex_enter(p->p_lock);
424b5e9adddSrmind 	}
4255c71a4d4Srmind 
42616b042cbSyamt 	/*
42716b042cbSyamt 	 * Check the permission.
42816b042cbSyamt 	 */
429e99760e7Selad 	error = kauth_authorize_process(l->l_cred,
430e99760e7Selad 	    KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
43116b042cbSyamt 	if (error != 0) {
432284c2b9aSad 		mutex_exit(p->p_lock);
433f30b5785Schristos 		goto out;
43416b042cbSyamt 	}
4355c71a4d4Srmind 
436501dd321Srmind 	/* Iterate through LWP(s). */
4375c71a4d4Srmind 	lcnt = 0;
4385c71a4d4Srmind 	lid = SCARG(uap, lid);
4395c71a4d4Srmind 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
440501dd321Srmind 		if (lid && lid != t->l_lid) {
4415c71a4d4Srmind 			continue;
442501dd321Srmind 		}
4435c71a4d4Srmind 		lwp_lock(t);
444501dd321Srmind 		/* No affinity for zombie LWPs. */
4454f91cff0Srmind 		if (t->l_stat == LSZOMB) {
4464f91cff0Srmind 			lwp_unlock(t);
4474f91cff0Srmind 			continue;
4484f91cff0Srmind 		}
449501dd321Srmind 		/* First, release existing affinity, if any. */
450501dd321Srmind 		if (t->l_affinity) {
45152b220e9Srmind 			kcpuset_unuse(t->l_affinity, &kcpulst);
452501dd321Srmind 		}
453501dd321Srmind 		if (kcset) {
454501dd321Srmind 			/*
455501dd321Srmind 			 * Hold a reference on affinity mask, assign mask to
456501dd321Srmind 			 * LWP and migrate it to another CPU (unlocks LWP).
457501dd321Srmind 			 */
458501dd321Srmind 			kcpuset_use(kcset);
45952b220e9Srmind 			t->l_affinity = kcset;
4605c71a4d4Srmind 			lwp_migrate(t, ci);
4615c71a4d4Srmind 		} else {
462501dd321Srmind 			/* Old affinity mask is released, just clear. */
463f30b5785Schristos 			t->l_affinity = NULL;
4645c71a4d4Srmind 			lwp_unlock(t);
4655c71a4d4Srmind 		}
4665c71a4d4Srmind 		lcnt++;
4675c71a4d4Srmind 	}
468284c2b9aSad 	mutex_exit(p->p_lock);
46952b220e9Srmind 	if (lcnt == 0) {
4705c71a4d4Srmind 		error = ESRCH;
47152b220e9Srmind 	}
472f30b5785Schristos out:
4738f1873eaSrmind 	mutex_exit(&cpu_lock);
47452b220e9Srmind 
47552b220e9Srmind 	/*
47652b220e9Srmind 	 * Drop the initial reference (LWPs, if any, have the ownership now),
47752b220e9Srmind 	 * and destroy whatever is in the G/C list, if filled.
47852b220e9Srmind 	 */
47952b220e9Srmind 	if (kcset) {
48052b220e9Srmind 		kcpuset_unuse(kcset, &kcpulst);
48152b220e9Srmind 	}
48252b220e9Srmind 	if (kcpulst) {
48352b220e9Srmind 		kcpuset_destroy(kcpulst);
48452b220e9Srmind 	}
4855c71a4d4Srmind 	return error;
4865c71a4d4Srmind }
4875c71a4d4Srmind 
4885c71a4d4Srmind /*
4895c71a4d4Srmind  * Get affinity.
4905c71a4d4Srmind  */
4915c71a4d4Srmind int
sys__sched_getaffinity(struct lwp * l,const struct sys__sched_getaffinity_args * uap,register_t * retval)4925c71a4d4Srmind sys__sched_getaffinity(struct lwp *l,
4935c71a4d4Srmind     const struct sys__sched_getaffinity_args *uap, register_t *retval)
4945c71a4d4Srmind {
4955c71a4d4Srmind 	/* {
4965c71a4d4Srmind 		syscallarg(pid_t) pid;
4975c71a4d4Srmind 		syscallarg(lwpid_t) lid;
4985c71a4d4Srmind 		syscallarg(size_t) size;
499f30b5785Schristos 		syscallarg(cpuset_t *) cpuset;
5005c71a4d4Srmind 	} */
5015c71a4d4Srmind 	struct lwp *t;
50252b220e9Srmind 	kcpuset_t *kcset;
5035c71a4d4Srmind 	int error;
5045c71a4d4Srmind 
5056925a27fSthorpej 	if (SCARG(uap, pid) < 0 || SCARG(uap, lid) < 0)
5066925a27fSthorpej 		return EINVAL;
5076925a27fSthorpej 
50852b220e9Srmind 	error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size));
5098f1873eaSrmind 	if (error)
510f30b5785Schristos 		return error;
5115c71a4d4Srmind 
5125c71a4d4Srmind 	/* Locks the LWP */
5139850c055Srmind 	t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
5145c71a4d4Srmind 	if (t == NULL) {
515f30b5785Schristos 		error = ESRCH;
516f30b5785Schristos 		goto out;
5175c71a4d4Srmind 	}
51816b042cbSyamt 	/* Check the permission */
519e99760e7Selad 	if (kauth_authorize_process(l->l_cred,
520e99760e7Selad 	    KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
521284c2b9aSad 		mutex_exit(t->l_proc->p_lock);
522f30b5785Schristos 		error = EPERM;
523f30b5785Schristos 		goto out;
52416b042cbSyamt 	}
525284c2b9aSad 	lwp_lock(t);
526501dd321Srmind 	if (t->l_affinity) {
52752b220e9Srmind 		kcpuset_copy(kcset, t->l_affinity);
52852b220e9Srmind 	} else {
52952b220e9Srmind 		kcpuset_zero(kcset);
53052b220e9Srmind 	}
5315c71a4d4Srmind 	lwp_unlock(t);
532284c2b9aSad 	mutex_exit(t->l_proc->p_lock);
5335c71a4d4Srmind 
53452b220e9Srmind 	error = kcpuset_copyout(kcset, SCARG(uap, cpuset), SCARG(uap, size));
535f30b5785Schristos out:
53652b220e9Srmind 	kcpuset_unuse(kcset, NULL);
5375c71a4d4Srmind 	return error;
5385c71a4d4Srmind }
5395c71a4d4Srmind 
5405c71a4d4Srmind /*
5417cf7644fSchristos  * Priority protection for PTHREAD_PRIO_PROTECT. This is a weak
5427cf7644fSchristos  * analogue of priority inheritance: temp raise the priority
5437cf7644fSchristos  * of the caller when accessing a protected resource.
5447cf7644fSchristos  */
5457cf7644fSchristos int
sys__sched_protect(struct lwp * l,const struct sys__sched_protect_args * uap,register_t * retval)5467cf7644fSchristos sys__sched_protect(struct lwp *l,
5477cf7644fSchristos     const struct sys__sched_protect_args *uap, register_t *retval)
5487cf7644fSchristos {
5497cf7644fSchristos         /* {
5507cf7644fSchristos                 syscallarg(int) priority;
5517cf7644fSchristos 		syscallarg(int *) opriority;
5527cf7644fSchristos         } */
5537cf7644fSchristos 	int error;
5547cf7644fSchristos 	pri_t pri;
5557cf7644fSchristos 
5567cf7644fSchristos 	KASSERT(l->l_inheritedprio == -1);
5577cf7644fSchristos 	KASSERT(l->l_auxprio == -1 || l->l_auxprio == l->l_protectprio);
5587cf7644fSchristos 
5597cf7644fSchristos 	pri = SCARG(uap, priority);
5607cf7644fSchristos 	error = 0;
5617cf7644fSchristos 	lwp_lock(l);
5627cf7644fSchristos 	if (pri == -1) {
5637cf7644fSchristos 		/* back out priority changes */
5647cf7644fSchristos 		switch(l->l_protectdepth) {
5657cf7644fSchristos 		case 0:
5667cf7644fSchristos 			error = EINVAL;
5677cf7644fSchristos 			break;
5687cf7644fSchristos 		case 1:
5697cf7644fSchristos 			l->l_protectdepth = 0;
5707cf7644fSchristos 			l->l_protectprio = -1;
5717cf7644fSchristos 			l->l_auxprio = -1;
5727cf7644fSchristos 			break;
5737cf7644fSchristos 		default:
5747cf7644fSchristos 			l->l_protectdepth--;
5757cf7644fSchristos 			break;
5767cf7644fSchristos 		}
5777cf7644fSchristos 	} else if (pri < 0) {
5787cf7644fSchristos 		/* Just retrieve the current value, for debugging */
579b265873dSchristos 		if (l->l_protectprio == -1)
5807cf7644fSchristos 			error = ENOENT;
5817cf7644fSchristos 		else
5827cf7644fSchristos 			*retval = l->l_protectprio - PRI_USER_RT;
5837cf7644fSchristos 	} else if (__predict_false(pri < SCHED_PRI_MIN ||
5847cf7644fSchristos 	    pri > SCHED_PRI_MAX || l->l_priority > pri + PRI_USER_RT)) {
5857cf7644fSchristos 		/* must fail if existing priority is higher */
5867cf7644fSchristos 		error = EPERM;
5877cf7644fSchristos 	} else {
5887cf7644fSchristos 		/* play along but make no changes if not a realtime LWP. */
5897cf7644fSchristos 		l->l_protectdepth++;
5907cf7644fSchristos 		pri += PRI_USER_RT;
5917cf7644fSchristos 		if (__predict_true(l->l_class != SCHED_OTHER &&
5927cf7644fSchristos 		    pri > l->l_protectprio)) {
5937cf7644fSchristos 			l->l_protectprio = pri;
5947cf7644fSchristos 			l->l_auxprio = pri;
5957cf7644fSchristos 		}
5967cf7644fSchristos 	}
5977cf7644fSchristos 	lwp_unlock(l);
5987cf7644fSchristos 
5997cf7644fSchristos 	return error;
6007cf7644fSchristos }
6017cf7644fSchristos 
6027cf7644fSchristos /*
6035c71a4d4Srmind  * Yield.
6045c71a4d4Srmind  */
605606e323bSad int
sys_sched_yield(struct lwp * l,const void * v,register_t * retval)6067e2790cfSdsl sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
607606e323bSad {
608606e323bSad 
609606e323bSad 	yield();
610606e323bSad 	return 0;
611606e323bSad }
6125c71a4d4Srmind 
6135c71a4d4Srmind /*
6145c71a4d4Srmind  * Sysctl nodes and initialization.
6155c71a4d4Srmind  */
616b2f37683Selad static void
sysctl_sched_setup(struct sysctllog ** clog)617b2f37683Selad sysctl_sched_setup(struct sysctllog **clog)
6185c71a4d4Srmind {
6195c71a4d4Srmind 	const struct sysctlnode *node = NULL;
6205c71a4d4Srmind 
6215c71a4d4Srmind 	sysctl_createv(clog, 0, NULL, NULL,
6225c71a4d4Srmind 		CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
6235c71a4d4Srmind 		CTLTYPE_INT, "posix_sched",
6245c71a4d4Srmind 		SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
6255c71a4d4Srmind 			     "Process Scheduling option to which the "
6265c71a4d4Srmind 			     "system attempts to conform"),
6275c71a4d4Srmind 		NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
6285c71a4d4Srmind 		CTL_KERN, CTL_CREATE, CTL_EOL);
6295c71a4d4Srmind 	sysctl_createv(clog, 0, NULL, &node,
6305c71a4d4Srmind 		CTLFLAG_PERMANENT,
6315c71a4d4Srmind 		CTLTYPE_NODE, "sched",
6325c71a4d4Srmind 		SYSCTL_DESCR("Scheduler options"),
6335c71a4d4Srmind 		NULL, 0, NULL, 0,
6345c71a4d4Srmind 		CTL_KERN, CTL_CREATE, CTL_EOL);
6355c71a4d4Srmind 
6365c71a4d4Srmind 	if (node == NULL)
6375c71a4d4Srmind 		return;
6385c71a4d4Srmind 
6395c71a4d4Srmind 	sysctl_createv(clog, 0, &node, NULL,
6405c71a4d4Srmind 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
6415c71a4d4Srmind 		CTLTYPE_INT, "pri_min",
6425c71a4d4Srmind 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
6435c71a4d4Srmind 		NULL, SCHED_PRI_MIN, NULL, 0,
6445c71a4d4Srmind 		CTL_CREATE, CTL_EOL);
6455c71a4d4Srmind 	sysctl_createv(clog, 0, &node, NULL,
6465c71a4d4Srmind 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
6475c71a4d4Srmind 		CTLTYPE_INT, "pri_max",
648425fc32dSnjoly 		SYSCTL_DESCR("Maximal POSIX real-time priority"),
6495c71a4d4Srmind 		NULL, SCHED_PRI_MAX, NULL, 0,
6505c71a4d4Srmind 		CTL_CREATE, CTL_EOL);
6515c71a4d4Srmind }
652b2f37683Selad 
653b2f37683Selad static int
sched_listener_cb(kauth_cred_t cred,kauth_action_t action,void * cookie,void * arg0,void * arg1,void * arg2,void * arg3)654b2f37683Selad sched_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
655b2f37683Selad     void *arg0, void *arg1, void *arg2, void *arg3)
656b2f37683Selad {
657b2f37683Selad 	struct proc *p;
658b2f37683Selad 	int result;
659b2f37683Selad 
660b2f37683Selad 	result = KAUTH_RESULT_DEFER;
661b2f37683Selad 	p = arg0;
662b2f37683Selad 
663b2f37683Selad 	switch (action) {
664b2f37683Selad 	case KAUTH_PROCESS_SCHEDULER_GETPARAM:
665b2f37683Selad 		if (kauth_cred_uidmatch(cred, p->p_cred))
666b2f37683Selad 			result = KAUTH_RESULT_ALLOW;
667b2f37683Selad 		break;
668b2f37683Selad 
669b2f37683Selad 	case KAUTH_PROCESS_SCHEDULER_SETPARAM:
670b2f37683Selad 		if (kauth_cred_uidmatch(cred, p->p_cred)) {
671b2f37683Selad 			struct lwp *l;
672b2f37683Selad 			int policy;
673b2f37683Selad 			pri_t priority;
674b2f37683Selad 
675b2f37683Selad 			l = arg1;
676b2f37683Selad 			policy = (int)(unsigned long)arg2;
677b2f37683Selad 			priority = (pri_t)(unsigned long)arg3;
678b2f37683Selad 
679b2f37683Selad 			if ((policy == l->l_class ||
680b2f37683Selad 			    (policy != SCHED_FIFO && policy != SCHED_RR)) &&
681b2f37683Selad 			    priority <= l->l_priority)
682b2f37683Selad 				result = KAUTH_RESULT_ALLOW;
683b2f37683Selad 		}
684b2f37683Selad 
685b2f37683Selad 		break;
686b2f37683Selad 
687b2f37683Selad 	case KAUTH_PROCESS_SCHEDULER_GETAFFINITY:
688b2f37683Selad 		result = KAUTH_RESULT_ALLOW;
689b2f37683Selad 		break;
690b2f37683Selad 
691b2f37683Selad 	case KAUTH_PROCESS_SCHEDULER_SETAFFINITY:
692b2f37683Selad 		/* Privileged; we let the secmodel handle this. */
693b2f37683Selad 		break;
694b2f37683Selad 
695b2f37683Selad 	default:
696b2f37683Selad 		break;
697b2f37683Selad 	}
698b2f37683Selad 
699b2f37683Selad 	return result;
700b2f37683Selad }
701b2f37683Selad 
702b2f37683Selad void
sched_init(void)703b2f37683Selad sched_init(void)
704b2f37683Selad {
705b2f37683Selad 
706b2f37683Selad 	sysctl_sched_setup(&sched_sysctl_log);
707b2f37683Selad 
708b2f37683Selad 	sched_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
709b2f37683Selad 	    sched_listener_cb, NULL);
710b2f37683Selad }
711