1*0dec6ba3Sriastradh /* $NetBSD: sys_sched.c,v 1.50 2023/04/09 09:18:09 riastradh Exp $ */
2606e323bSad
35c71a4d4Srmind /*
452b220e9Srmind * Copyright (c) 2008, 2011 Mindaugas Rasiukevicius <rmind at NetBSD org>
5606e323bSad * All rights reserved.
6606e323bSad *
7606e323bSad * Redistribution and use in source and binary forms, with or without
8606e323bSad * modification, are permitted provided that the following conditions
9606e323bSad * are met:
10606e323bSad * 1. Redistributions of source code must retain the above copyright
11606e323bSad * notice, this list of conditions and the following disclaimer.
12606e323bSad * 2. Redistributions in binary form must reproduce the above copyright
13606e323bSad * notice, this list of conditions and the following disclaimer in the
14606e323bSad * documentation and/or other materials provided with the distribution.
15606e323bSad *
169850c055Srmind * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
179850c055Srmind * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
189850c055Srmind * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
199850c055Srmind * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
209850c055Srmind * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
219850c055Srmind * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
229850c055Srmind * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
239850c055Srmind * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
249850c055Srmind * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
259850c055Srmind * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
269850c055Srmind * SUCH DAMAGE.
27606e323bSad */
28606e323bSad
295c71a4d4Srmind /*
309a0b455fSad * System calls relating to the scheduler.
319a0b455fSad *
328f1873eaSrmind * Lock order:
338f1873eaSrmind *
348f1873eaSrmind * cpu_lock ->
358f1873eaSrmind * proc_lock ->
368f1873eaSrmind * proc_t::p_lock ->
378f1873eaSrmind * lwp_t::lwp_lock
388f1873eaSrmind *
395c71a4d4Srmind * TODO:
405c71a4d4Srmind * - Handle pthread_setschedprio() as defined by POSIX;
415c71a4d4Srmind */
425c71a4d4Srmind
43606e323bSad #include <sys/cdefs.h>
44*0dec6ba3Sriastradh __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.50 2023/04/09 09:18:09 riastradh Exp $");
45606e323bSad
46606e323bSad #include <sys/param.h>
47606e323bSad
485c71a4d4Srmind #include <sys/cpu.h>
495c71a4d4Srmind #include <sys/kauth.h>
505c71a4d4Srmind #include <sys/kmem.h>
515c71a4d4Srmind #include <sys/lwp.h>
525c71a4d4Srmind #include <sys/mutex.h>
535c71a4d4Srmind #include <sys/proc.h>
545c71a4d4Srmind #include <sys/pset.h>
555c71a4d4Srmind #include <sys/sched.h>
565c71a4d4Srmind #include <sys/syscallargs.h>
575c71a4d4Srmind #include <sys/sysctl.h>
585c71a4d4Srmind #include <sys/systm.h>
595c71a4d4Srmind #include <sys/types.h>
605c71a4d4Srmind #include <sys/unistd.h>
615c71a4d4Srmind
62b2f37683Selad static struct sysctllog *sched_sysctl_log;
63b2f37683Selad static kauth_listener_t sched_listener;
64b2f37683Selad
655c71a4d4Srmind /*
66b5e9adddSrmind * Convert user priority or the in-kernel priority or convert the current
67b5e9adddSrmind * priority to the appropriate range according to the policy change.
68b5e9adddSrmind */
69b5e9adddSrmind static pri_t
convert_pri(lwp_t * l,int policy,pri_t pri)70b5e9adddSrmind convert_pri(lwp_t *l, int policy, pri_t pri)
71b5e9adddSrmind {
72b5e9adddSrmind
73b5e9adddSrmind /* Convert user priority to the in-kernel */
74d5ea013eSrmind if (pri != PRI_NONE) {
75d5ea013eSrmind /* Only for real-time threads */
76*0dec6ba3Sriastradh KASSERT(pri >= SCHED_PRI_MIN);
77*0dec6ba3Sriastradh KASSERT(pri <= SCHED_PRI_MAX);
78d5ea013eSrmind KASSERT(policy != SCHED_OTHER);
79d5ea013eSrmind return PRI_USER_RT + pri;
80b5e9adddSrmind }
81d5ea013eSrmind
82d5ea013eSrmind /* Neither policy, nor priority change */
83b5e9adddSrmind if (l->l_class == policy)
84b5e9adddSrmind return l->l_priority;
85b5e9adddSrmind
86d5ea013eSrmind /* Time-sharing -> real-time */
87b5e9adddSrmind if (l->l_class == SCHED_OTHER) {
88b5e9adddSrmind KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
89d5ea013eSrmind return PRI_USER_RT;
90b5e9adddSrmind }
91d5ea013eSrmind
92d5ea013eSrmind /* Real-time -> time-sharing */
93b5e9adddSrmind if (policy == SCHED_OTHER) {
94b5e9adddSrmind KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
956e7d55c5Syamt /*
966e7d55c5Syamt * this is a bit arbitrary because the priority is dynamic
976e7d55c5Syamt * for SCHED_OTHER threads and will likely be changed by
986e7d55c5Syamt * the scheduler soon anyway.
996e7d55c5Syamt */
100d5ea013eSrmind return l->l_priority - PRI_USER_RT;
101b5e9adddSrmind }
102d5ea013eSrmind
103d5ea013eSrmind /* Real-time -> real-time */
104d5ea013eSrmind return l->l_priority;
105b5e9adddSrmind }
106b5e9adddSrmind
1075c71a4d4Srmind int
do_sched_setparam(pid_t pid,lwpid_t lid,int policy,const struct sched_param * params)10867470a76Selad do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
10967470a76Selad const struct sched_param *params)
1105c71a4d4Srmind {
1115c71a4d4Srmind struct proc *p;
1125c71a4d4Srmind struct lwp *t;
1135c71a4d4Srmind pri_t pri;
11467470a76Selad u_int lcnt;
1155c71a4d4Srmind int error;
1165c71a4d4Srmind
11767470a76Selad error = 0;
11867470a76Selad
11967470a76Selad pri = params->sched_priority;
1205c71a4d4Srmind
121b5e9adddSrmind /* If no parameters specified, just return (this should not happen) */
122b5e9adddSrmind if (pri == PRI_NONE && policy == SCHED_NONE)
123b5e9adddSrmind return 0;
124b5e9adddSrmind
125b5e9adddSrmind /* Validate scheduling class */
126b5e9adddSrmind if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
127b5e9adddSrmind return EINVAL;
128b5e9adddSrmind
129b5e9adddSrmind /* Validate priority */
130b5e9adddSrmind if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
131b5e9adddSrmind return EINVAL;
132b5e9adddSrmind
13367470a76Selad if (pid != 0) {
1345c71a4d4Srmind /* Find the process */
1350eaaa024Sad mutex_enter(&proc_lock);
1363c507045Srmind p = proc_find(pid);
1376d70f903Sad if (p == NULL) {
1380eaaa024Sad mutex_exit(&proc_lock);
139b5e9adddSrmind return ESRCH;
1406d70f903Sad }
141284c2b9aSad mutex_enter(p->p_lock);
1420eaaa024Sad mutex_exit(&proc_lock);
1435c71a4d4Srmind /* Disallow modification of system processes */
1449a0b455fSad if ((p->p_flag & PK_SYSTEM) != 0) {
145284c2b9aSad mutex_exit(p->p_lock);
146b5e9adddSrmind return EPERM;
147b5e9adddSrmind }
148b5e9adddSrmind } else {
149b5e9adddSrmind /* Use the calling process */
15067470a76Selad p = curlwp->l_proc;
151284c2b9aSad mutex_enter(p->p_lock);
1525c71a4d4Srmind }
1535c71a4d4Srmind
1545c71a4d4Srmind /* Find the LWP(s) */
1555c71a4d4Srmind lcnt = 0;
1565c71a4d4Srmind LIST_FOREACH(t, &p->p_lwps, l_sibling) {
157b5e9adddSrmind pri_t kpri;
1580bb7f5ccSelad int lpolicy;
1595c71a4d4Srmind
1605c71a4d4Srmind if (lid && lid != t->l_lid)
1615c71a4d4Srmind continue;
162d5ea013eSrmind
163cc0caabcSdrochner lcnt++;
164b5e9adddSrmind lwp_lock(t);
165d5ea013eSrmind lpolicy = (policy == SCHED_NONE) ? t->l_class : policy;
166b5e9adddSrmind
167d5ea013eSrmind /* Disallow setting of priority for SCHED_OTHER threads */
168a8552a3aSrmind if (lpolicy == SCHED_OTHER && pri != PRI_NONE) {
169d5ea013eSrmind lwp_unlock(t);
170d5ea013eSrmind error = EINVAL;
171d5ea013eSrmind break;
172d5ea013eSrmind }
1730bb7f5ccSelad
174d5ea013eSrmind /* Convert priority, if needed */
1750bb7f5ccSelad kpri = convert_pri(t, lpolicy, pri);
1760bb7f5ccSelad
1770bb7f5ccSelad /* Check the permission */
17867470a76Selad error = kauth_authorize_process(kauth_cred_get(),
1790bb7f5ccSelad KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
1800bb7f5ccSelad KAUTH_ARG(kpri));
181aa57485fSyamt if (error) {
182aa57485fSyamt lwp_unlock(t);
1830bb7f5ccSelad break;
184aa57485fSyamt }
1855c71a4d4Srmind
186d5ea013eSrmind /* Set the scheduling class, change the priority */
187d5ea013eSrmind t->l_class = lpolicy;
188b5e9adddSrmind lwp_changepri(t, kpri);
1895c71a4d4Srmind lwp_unlock(t);
1905c71a4d4Srmind }
191284c2b9aSad mutex_exit(p->p_lock);
192b5e9adddSrmind return (lcnt == 0) ? ESRCH : error;
1935c71a4d4Srmind }
1945c71a4d4Srmind
1955c71a4d4Srmind /*
19667470a76Selad * Set scheduling parameters.
19767470a76Selad */
19867470a76Selad int
sys__sched_setparam(struct lwp * l,const struct sys__sched_setparam_args * uap,register_t * retval)19967470a76Selad sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
20067470a76Selad register_t *retval)
20167470a76Selad {
20267470a76Selad /* {
20367470a76Selad syscallarg(pid_t) pid;
20467470a76Selad syscallarg(lwpid_t) lid;
20567470a76Selad syscallarg(int) policy;
20667470a76Selad syscallarg(const struct sched_param *) params;
20767470a76Selad } */
20867470a76Selad struct sched_param params;
20967470a76Selad int error;
21067470a76Selad
21167470a76Selad /* Get the parameters from the user-space */
21267470a76Selad error = copyin(SCARG(uap, params), ¶ms, sizeof(params));
21367470a76Selad if (error)
21467470a76Selad goto out;
21567470a76Selad
21667470a76Selad error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
21767470a76Selad SCARG(uap, policy), ¶ms);
21867470a76Selad out:
2198f1873eaSrmind return error;
22067470a76Selad }
22167470a76Selad
2226e7d55c5Syamt /*
2236e7d55c5Syamt * do_sched_getparam:
2246e7d55c5Syamt *
2256e7d55c5Syamt * if lid=0, returns the parameter of the first LWP in the process.
2266e7d55c5Syamt */
22767470a76Selad int
do_sched_getparam(pid_t pid,lwpid_t lid,int * policy,struct sched_param * params)22867470a76Selad do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
22967470a76Selad struct sched_param *params)
23067470a76Selad {
23167470a76Selad struct sched_param lparams;
23267470a76Selad struct lwp *t;
23367470a76Selad int error, lpolicy;
23467470a76Selad
2356925a27fSthorpej if (pid < 0 || lid < 0)
2366925a27fSthorpej return EINVAL;
2376925a27fSthorpej
2386e7d55c5Syamt t = lwp_find2(pid, lid); /* acquire p_lock */
239284c2b9aSad if (t == NULL)
240284c2b9aSad return ESRCH;
24167470a76Selad
24267470a76Selad /* Check the permission */
24367470a76Selad error = kauth_authorize_process(kauth_cred_get(),
24467470a76Selad KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
24567470a76Selad if (error != 0) {
246284c2b9aSad mutex_exit(t->l_proc->p_lock);
247284c2b9aSad return error;
24867470a76Selad }
24967470a76Selad
250284c2b9aSad lwp_lock(t);
25167470a76Selad lparams.sched_priority = t->l_priority;
25267470a76Selad lpolicy = t->l_class;
2536e7d55c5Syamt lwp_unlock(t);
2546e7d55c5Syamt mutex_exit(t->l_proc->p_lock);
25567470a76Selad
2566e7d55c5Syamt /*
2576e7d55c5Syamt * convert to the user-visible priority value.
2586e7d55c5Syamt * it's an inversion of convert_pri().
2596e7d55c5Syamt *
2606e7d55c5Syamt * the SCHED_OTHER case is a bit arbitrary given that
2616e7d55c5Syamt * - we don't allow setting the priority.
2626e7d55c5Syamt * - the priority is dynamic.
2636e7d55c5Syamt */
26467470a76Selad switch (lpolicy) {
26567470a76Selad case SCHED_OTHER:
26667470a76Selad lparams.sched_priority -= PRI_USER;
26767470a76Selad break;
26867470a76Selad case SCHED_RR:
26967470a76Selad case SCHED_FIFO:
27067470a76Selad lparams.sched_priority -= PRI_USER_RT;
27167470a76Selad break;
27267470a76Selad }
27367470a76Selad
27467470a76Selad if (policy != NULL)
27567470a76Selad *policy = lpolicy;
27667470a76Selad
27767470a76Selad if (params != NULL)
27867470a76Selad *params = lparams;
27967470a76Selad
28067470a76Selad return error;
28167470a76Selad }
28267470a76Selad
28367470a76Selad /*
2845c71a4d4Srmind * Get scheduling parameters.
2855c71a4d4Srmind */
2865c71a4d4Srmind int
sys__sched_getparam(struct lwp * l,const struct sys__sched_getparam_args * uap,register_t * retval)2875c71a4d4Srmind sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
2885c71a4d4Srmind register_t *retval)
2895c71a4d4Srmind {
2905c71a4d4Srmind /* {
2915c71a4d4Srmind syscallarg(pid_t) pid;
2925c71a4d4Srmind syscallarg(lwpid_t) lid;
29316b042cbSyamt syscallarg(int *) policy;
2945c71a4d4Srmind syscallarg(struct sched_param *) params;
2955c71a4d4Srmind } */
29667470a76Selad struct sched_param params;
29716b042cbSyamt int error, policy;
2985c71a4d4Srmind
29967470a76Selad error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
30067470a76Selad ¶ms);
30167470a76Selad if (error)
30267470a76Selad goto out;
30316b042cbSyamt
30467470a76Selad error = copyout(¶ms, SCARG(uap, params), sizeof(params));
30516b042cbSyamt if (error == 0 && SCARG(uap, policy) != NULL)
30616b042cbSyamt error = copyout(&policy, SCARG(uap, policy), sizeof(int));
30767470a76Selad out:
3088f1873eaSrmind return error;
3095c71a4d4Srmind }
3105c71a4d4Srmind
3118f1873eaSrmind /*
3128f1873eaSrmind * Allocate the CPU set, and get it from userspace.
3138f1873eaSrmind */
314f30b5785Schristos static int
genkcpuset(kcpuset_t ** dset,const cpuset_t * sset,size_t size)3151d875fc7Schristos genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size)
316f30b5785Schristos {
31752b220e9Srmind kcpuset_t *kset;
318f30b5785Schristos int error;
319f30b5785Schristos
3200c794722Srmind kcpuset_create(&kset, true);
32152b220e9Srmind error = kcpuset_copyin(sset, kset, size);
32252b220e9Srmind if (error) {
32352b220e9Srmind kcpuset_unuse(kset, NULL);
32452b220e9Srmind } else {
32552b220e9Srmind *dset = kset;
32652b220e9Srmind }
327f30b5785Schristos return error;
328f30b5785Schristos }
329f30b5785Schristos
3305c71a4d4Srmind /*
3315c71a4d4Srmind * Set affinity.
3325c71a4d4Srmind */
3335c71a4d4Srmind int
sys__sched_setaffinity(struct lwp * l,const struct sys__sched_setaffinity_args * uap,register_t * retval)3345c71a4d4Srmind sys__sched_setaffinity(struct lwp *l,
3355c71a4d4Srmind const struct sys__sched_setaffinity_args *uap, register_t *retval)
3365c71a4d4Srmind {
3375c71a4d4Srmind /* {
3385c71a4d4Srmind syscallarg(pid_t) pid;
3395c71a4d4Srmind syscallarg(lwpid_t) lid;
3405c71a4d4Srmind syscallarg(size_t) size;
341f30b5785Schristos syscallarg(const cpuset_t *) cpuset;
3425c71a4d4Srmind } */
34352b220e9Srmind kcpuset_t *kcset, *kcpulst = NULL;
344909e7f42Srmind struct cpu_info *ici, *ci;
3455c71a4d4Srmind struct proc *p;
3465c71a4d4Srmind struct lwp *t;
3475c71a4d4Srmind CPU_INFO_ITERATOR cii;
348909e7f42Srmind bool alloff;
3495c71a4d4Srmind lwpid_t lid;
3505c71a4d4Srmind u_int lcnt;
3515c71a4d4Srmind int error;
3525c71a4d4Srmind
35352b220e9Srmind error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size));
3548f1873eaSrmind if (error)
355f30b5785Schristos return error;
3565c71a4d4Srmind
3578f1873eaSrmind /*
358909e7f42Srmind * Traverse _each_ CPU to:
359909e7f42Srmind * - Check that CPUs in the mask have no assigned processor set.
360909e7f42Srmind * - Check that at least one CPU from the mask is online.
361909e7f42Srmind * - Find the first target CPU to migrate.
3628f1873eaSrmind *
363909e7f42Srmind * To avoid the race with CPU online/offline calls and processor sets,
364909e7f42Srmind * cpu_lock will be locked for the entire operation.
3658f1873eaSrmind */
366909e7f42Srmind ci = NULL;
367909e7f42Srmind alloff = false;
3688f1873eaSrmind mutex_enter(&cpu_lock);
369909e7f42Srmind for (CPU_INFO_FOREACH(cii, ici)) {
370909e7f42Srmind struct schedstate_percpu *ispc;
3718f1873eaSrmind
372f7666738Srmind if (!kcpuset_isset(kcset, cpu_index(ici))) {
3738f1873eaSrmind continue;
374f7666738Srmind }
375909e7f42Srmind
376909e7f42Srmind ispc = &ici->ci_schedstate;
377909e7f42Srmind /* Check that CPU is not in the processor-set */
378909e7f42Srmind if (ispc->spc_psid != PS_NONE) {
379909e7f42Srmind error = EPERM;
380909e7f42Srmind goto out;
381909e7f42Srmind }
382909e7f42Srmind /* Skip offline CPUs */
383909e7f42Srmind if (ispc->spc_flags & SPCF_OFFLINE) {
384909e7f42Srmind alloff = true;
3858f1873eaSrmind continue;
386a6092d3cSrmind }
387909e7f42Srmind /* Target CPU to migrate */
388909e7f42Srmind if (ci == NULL) {
389909e7f42Srmind ci = ici;
390909e7f42Srmind }
391f30b5785Schristos }
3925c71a4d4Srmind if (ci == NULL) {
393909e7f42Srmind if (alloff) {
3948f1873eaSrmind /* All CPUs in the set are offline */
3958f1873eaSrmind error = EPERM;
3968f1873eaSrmind goto out;
3978f1873eaSrmind }
3985c71a4d4Srmind /* Empty set */
39952b220e9Srmind kcpuset_unuse(kcset, &kcpulst);
40052b220e9Srmind kcset = NULL;
4015c71a4d4Srmind }
4025c71a4d4Srmind
403b5e9adddSrmind if (SCARG(uap, pid) != 0) {
4045c71a4d4Srmind /* Find the process */
4050eaaa024Sad mutex_enter(&proc_lock);
4063c507045Srmind p = proc_find(SCARG(uap, pid));
4075c71a4d4Srmind if (p == NULL) {
4080eaaa024Sad mutex_exit(&proc_lock);
4095c71a4d4Srmind error = ESRCH;
410f30b5785Schristos goto out;
4115c71a4d4Srmind }
412284c2b9aSad mutex_enter(p->p_lock);
4130eaaa024Sad mutex_exit(&proc_lock);
4149a0b455fSad /* Disallow modification of system processes. */
4159a0b455fSad if ((p->p_flag & PK_SYSTEM) != 0) {
416284c2b9aSad mutex_exit(p->p_lock);
4179a0b455fSad error = EPERM;
418f30b5785Schristos goto out;
4199a0b455fSad }
420b5e9adddSrmind } else {
421b5e9adddSrmind /* Use the calling process */
422b5e9adddSrmind p = l->l_proc;
423284c2b9aSad mutex_enter(p->p_lock);
424b5e9adddSrmind }
4255c71a4d4Srmind
42616b042cbSyamt /*
42716b042cbSyamt * Check the permission.
42816b042cbSyamt */
429e99760e7Selad error = kauth_authorize_process(l->l_cred,
430e99760e7Selad KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
43116b042cbSyamt if (error != 0) {
432284c2b9aSad mutex_exit(p->p_lock);
433f30b5785Schristos goto out;
43416b042cbSyamt }
4355c71a4d4Srmind
436501dd321Srmind /* Iterate through LWP(s). */
4375c71a4d4Srmind lcnt = 0;
4385c71a4d4Srmind lid = SCARG(uap, lid);
4395c71a4d4Srmind LIST_FOREACH(t, &p->p_lwps, l_sibling) {
440501dd321Srmind if (lid && lid != t->l_lid) {
4415c71a4d4Srmind continue;
442501dd321Srmind }
4435c71a4d4Srmind lwp_lock(t);
444501dd321Srmind /* No affinity for zombie LWPs. */
4454f91cff0Srmind if (t->l_stat == LSZOMB) {
4464f91cff0Srmind lwp_unlock(t);
4474f91cff0Srmind continue;
4484f91cff0Srmind }
449501dd321Srmind /* First, release existing affinity, if any. */
450501dd321Srmind if (t->l_affinity) {
45152b220e9Srmind kcpuset_unuse(t->l_affinity, &kcpulst);
452501dd321Srmind }
453501dd321Srmind if (kcset) {
454501dd321Srmind /*
455501dd321Srmind * Hold a reference on affinity mask, assign mask to
456501dd321Srmind * LWP and migrate it to another CPU (unlocks LWP).
457501dd321Srmind */
458501dd321Srmind kcpuset_use(kcset);
45952b220e9Srmind t->l_affinity = kcset;
4605c71a4d4Srmind lwp_migrate(t, ci);
4615c71a4d4Srmind } else {
462501dd321Srmind /* Old affinity mask is released, just clear. */
463f30b5785Schristos t->l_affinity = NULL;
4645c71a4d4Srmind lwp_unlock(t);
4655c71a4d4Srmind }
4665c71a4d4Srmind lcnt++;
4675c71a4d4Srmind }
468284c2b9aSad mutex_exit(p->p_lock);
46952b220e9Srmind if (lcnt == 0) {
4705c71a4d4Srmind error = ESRCH;
47152b220e9Srmind }
472f30b5785Schristos out:
4738f1873eaSrmind mutex_exit(&cpu_lock);
47452b220e9Srmind
47552b220e9Srmind /*
47652b220e9Srmind * Drop the initial reference (LWPs, if any, have the ownership now),
47752b220e9Srmind * and destroy whatever is in the G/C list, if filled.
47852b220e9Srmind */
47952b220e9Srmind if (kcset) {
48052b220e9Srmind kcpuset_unuse(kcset, &kcpulst);
48152b220e9Srmind }
48252b220e9Srmind if (kcpulst) {
48352b220e9Srmind kcpuset_destroy(kcpulst);
48452b220e9Srmind }
4855c71a4d4Srmind return error;
4865c71a4d4Srmind }
4875c71a4d4Srmind
4885c71a4d4Srmind /*
4895c71a4d4Srmind * Get affinity.
4905c71a4d4Srmind */
4915c71a4d4Srmind int
sys__sched_getaffinity(struct lwp * l,const struct sys__sched_getaffinity_args * uap,register_t * retval)4925c71a4d4Srmind sys__sched_getaffinity(struct lwp *l,
4935c71a4d4Srmind const struct sys__sched_getaffinity_args *uap, register_t *retval)
4945c71a4d4Srmind {
4955c71a4d4Srmind /* {
4965c71a4d4Srmind syscallarg(pid_t) pid;
4975c71a4d4Srmind syscallarg(lwpid_t) lid;
4985c71a4d4Srmind syscallarg(size_t) size;
499f30b5785Schristos syscallarg(cpuset_t *) cpuset;
5005c71a4d4Srmind } */
5015c71a4d4Srmind struct lwp *t;
50252b220e9Srmind kcpuset_t *kcset;
5035c71a4d4Srmind int error;
5045c71a4d4Srmind
5056925a27fSthorpej if (SCARG(uap, pid) < 0 || SCARG(uap, lid) < 0)
5066925a27fSthorpej return EINVAL;
5076925a27fSthorpej
50852b220e9Srmind error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size));
5098f1873eaSrmind if (error)
510f30b5785Schristos return error;
5115c71a4d4Srmind
5125c71a4d4Srmind /* Locks the LWP */
5139850c055Srmind t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
5145c71a4d4Srmind if (t == NULL) {
515f30b5785Schristos error = ESRCH;
516f30b5785Schristos goto out;
5175c71a4d4Srmind }
51816b042cbSyamt /* Check the permission */
519e99760e7Selad if (kauth_authorize_process(l->l_cred,
520e99760e7Selad KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
521284c2b9aSad mutex_exit(t->l_proc->p_lock);
522f30b5785Schristos error = EPERM;
523f30b5785Schristos goto out;
52416b042cbSyamt }
525284c2b9aSad lwp_lock(t);
526501dd321Srmind if (t->l_affinity) {
52752b220e9Srmind kcpuset_copy(kcset, t->l_affinity);
52852b220e9Srmind } else {
52952b220e9Srmind kcpuset_zero(kcset);
53052b220e9Srmind }
5315c71a4d4Srmind lwp_unlock(t);
532284c2b9aSad mutex_exit(t->l_proc->p_lock);
5335c71a4d4Srmind
53452b220e9Srmind error = kcpuset_copyout(kcset, SCARG(uap, cpuset), SCARG(uap, size));
535f30b5785Schristos out:
53652b220e9Srmind kcpuset_unuse(kcset, NULL);
5375c71a4d4Srmind return error;
5385c71a4d4Srmind }
5395c71a4d4Srmind
5405c71a4d4Srmind /*
5417cf7644fSchristos * Priority protection for PTHREAD_PRIO_PROTECT. This is a weak
5427cf7644fSchristos * analogue of priority inheritance: temp raise the priority
5437cf7644fSchristos * of the caller when accessing a protected resource.
5447cf7644fSchristos */
5457cf7644fSchristos int
sys__sched_protect(struct lwp * l,const struct sys__sched_protect_args * uap,register_t * retval)5467cf7644fSchristos sys__sched_protect(struct lwp *l,
5477cf7644fSchristos const struct sys__sched_protect_args *uap, register_t *retval)
5487cf7644fSchristos {
5497cf7644fSchristos /* {
5507cf7644fSchristos syscallarg(int) priority;
5517cf7644fSchristos syscallarg(int *) opriority;
5527cf7644fSchristos } */
5537cf7644fSchristos int error;
5547cf7644fSchristos pri_t pri;
5557cf7644fSchristos
5567cf7644fSchristos KASSERT(l->l_inheritedprio == -1);
5577cf7644fSchristos KASSERT(l->l_auxprio == -1 || l->l_auxprio == l->l_protectprio);
5587cf7644fSchristos
5597cf7644fSchristos pri = SCARG(uap, priority);
5607cf7644fSchristos error = 0;
5617cf7644fSchristos lwp_lock(l);
5627cf7644fSchristos if (pri == -1) {
5637cf7644fSchristos /* back out priority changes */
5647cf7644fSchristos switch(l->l_protectdepth) {
5657cf7644fSchristos case 0:
5667cf7644fSchristos error = EINVAL;
5677cf7644fSchristos break;
5687cf7644fSchristos case 1:
5697cf7644fSchristos l->l_protectdepth = 0;
5707cf7644fSchristos l->l_protectprio = -1;
5717cf7644fSchristos l->l_auxprio = -1;
5727cf7644fSchristos break;
5737cf7644fSchristos default:
5747cf7644fSchristos l->l_protectdepth--;
5757cf7644fSchristos break;
5767cf7644fSchristos }
5777cf7644fSchristos } else if (pri < 0) {
5787cf7644fSchristos /* Just retrieve the current value, for debugging */
579b265873dSchristos if (l->l_protectprio == -1)
5807cf7644fSchristos error = ENOENT;
5817cf7644fSchristos else
5827cf7644fSchristos *retval = l->l_protectprio - PRI_USER_RT;
5837cf7644fSchristos } else if (__predict_false(pri < SCHED_PRI_MIN ||
5847cf7644fSchristos pri > SCHED_PRI_MAX || l->l_priority > pri + PRI_USER_RT)) {
5857cf7644fSchristos /* must fail if existing priority is higher */
5867cf7644fSchristos error = EPERM;
5877cf7644fSchristos } else {
5887cf7644fSchristos /* play along but make no changes if not a realtime LWP. */
5897cf7644fSchristos l->l_protectdepth++;
5907cf7644fSchristos pri += PRI_USER_RT;
5917cf7644fSchristos if (__predict_true(l->l_class != SCHED_OTHER &&
5927cf7644fSchristos pri > l->l_protectprio)) {
5937cf7644fSchristos l->l_protectprio = pri;
5947cf7644fSchristos l->l_auxprio = pri;
5957cf7644fSchristos }
5967cf7644fSchristos }
5977cf7644fSchristos lwp_unlock(l);
5987cf7644fSchristos
5997cf7644fSchristos return error;
6007cf7644fSchristos }
6017cf7644fSchristos
6027cf7644fSchristos /*
6035c71a4d4Srmind * Yield.
6045c71a4d4Srmind */
605606e323bSad int
sys_sched_yield(struct lwp * l,const void * v,register_t * retval)6067e2790cfSdsl sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
607606e323bSad {
608606e323bSad
609606e323bSad yield();
610606e323bSad return 0;
611606e323bSad }
6125c71a4d4Srmind
6135c71a4d4Srmind /*
6145c71a4d4Srmind * Sysctl nodes and initialization.
6155c71a4d4Srmind */
616b2f37683Selad static void
sysctl_sched_setup(struct sysctllog ** clog)617b2f37683Selad sysctl_sched_setup(struct sysctllog **clog)
6185c71a4d4Srmind {
6195c71a4d4Srmind const struct sysctlnode *node = NULL;
6205c71a4d4Srmind
6215c71a4d4Srmind sysctl_createv(clog, 0, NULL, NULL,
6225c71a4d4Srmind CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
6235c71a4d4Srmind CTLTYPE_INT, "posix_sched",
6245c71a4d4Srmind SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
6255c71a4d4Srmind "Process Scheduling option to which the "
6265c71a4d4Srmind "system attempts to conform"),
6275c71a4d4Srmind NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
6285c71a4d4Srmind CTL_KERN, CTL_CREATE, CTL_EOL);
6295c71a4d4Srmind sysctl_createv(clog, 0, NULL, &node,
6305c71a4d4Srmind CTLFLAG_PERMANENT,
6315c71a4d4Srmind CTLTYPE_NODE, "sched",
6325c71a4d4Srmind SYSCTL_DESCR("Scheduler options"),
6335c71a4d4Srmind NULL, 0, NULL, 0,
6345c71a4d4Srmind CTL_KERN, CTL_CREATE, CTL_EOL);
6355c71a4d4Srmind
6365c71a4d4Srmind if (node == NULL)
6375c71a4d4Srmind return;
6385c71a4d4Srmind
6395c71a4d4Srmind sysctl_createv(clog, 0, &node, NULL,
6405c71a4d4Srmind CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
6415c71a4d4Srmind CTLTYPE_INT, "pri_min",
6425c71a4d4Srmind SYSCTL_DESCR("Minimal POSIX real-time priority"),
6435c71a4d4Srmind NULL, SCHED_PRI_MIN, NULL, 0,
6445c71a4d4Srmind CTL_CREATE, CTL_EOL);
6455c71a4d4Srmind sysctl_createv(clog, 0, &node, NULL,
6465c71a4d4Srmind CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
6475c71a4d4Srmind CTLTYPE_INT, "pri_max",
648425fc32dSnjoly SYSCTL_DESCR("Maximal POSIX real-time priority"),
6495c71a4d4Srmind NULL, SCHED_PRI_MAX, NULL, 0,
6505c71a4d4Srmind CTL_CREATE, CTL_EOL);
6515c71a4d4Srmind }
652b2f37683Selad
653b2f37683Selad static int
sched_listener_cb(kauth_cred_t cred,kauth_action_t action,void * cookie,void * arg0,void * arg1,void * arg2,void * arg3)654b2f37683Selad sched_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
655b2f37683Selad void *arg0, void *arg1, void *arg2, void *arg3)
656b2f37683Selad {
657b2f37683Selad struct proc *p;
658b2f37683Selad int result;
659b2f37683Selad
660b2f37683Selad result = KAUTH_RESULT_DEFER;
661b2f37683Selad p = arg0;
662b2f37683Selad
663b2f37683Selad switch (action) {
664b2f37683Selad case KAUTH_PROCESS_SCHEDULER_GETPARAM:
665b2f37683Selad if (kauth_cred_uidmatch(cred, p->p_cred))
666b2f37683Selad result = KAUTH_RESULT_ALLOW;
667b2f37683Selad break;
668b2f37683Selad
669b2f37683Selad case KAUTH_PROCESS_SCHEDULER_SETPARAM:
670b2f37683Selad if (kauth_cred_uidmatch(cred, p->p_cred)) {
671b2f37683Selad struct lwp *l;
672b2f37683Selad int policy;
673b2f37683Selad pri_t priority;
674b2f37683Selad
675b2f37683Selad l = arg1;
676b2f37683Selad policy = (int)(unsigned long)arg2;
677b2f37683Selad priority = (pri_t)(unsigned long)arg3;
678b2f37683Selad
679b2f37683Selad if ((policy == l->l_class ||
680b2f37683Selad (policy != SCHED_FIFO && policy != SCHED_RR)) &&
681b2f37683Selad priority <= l->l_priority)
682b2f37683Selad result = KAUTH_RESULT_ALLOW;
683b2f37683Selad }
684b2f37683Selad
685b2f37683Selad break;
686b2f37683Selad
687b2f37683Selad case KAUTH_PROCESS_SCHEDULER_GETAFFINITY:
688b2f37683Selad result = KAUTH_RESULT_ALLOW;
689b2f37683Selad break;
690b2f37683Selad
691b2f37683Selad case KAUTH_PROCESS_SCHEDULER_SETAFFINITY:
692b2f37683Selad /* Privileged; we let the secmodel handle this. */
693b2f37683Selad break;
694b2f37683Selad
695b2f37683Selad default:
696b2f37683Selad break;
697b2f37683Selad }
698b2f37683Selad
699b2f37683Selad return result;
700b2f37683Selad }
701b2f37683Selad
702b2f37683Selad void
sched_init(void)703b2f37683Selad sched_init(void)
704b2f37683Selad {
705b2f37683Selad
706b2f37683Selad sysctl_sched_setup(&sched_sysctl_log);
707b2f37683Selad
708b2f37683Selad sched_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
709b2f37683Selad sched_listener_cb, NULL);
710b2f37683Selad }
711