1*0eaaa024Sad /* $NetBSD: sys_pset.c,v 1.24 2020/05/23 23:42:43 ad Exp $ */
25c71a4d4Srmind
35c71a4d4Srmind /*
45c71a4d4Srmind * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
55c71a4d4Srmind * All rights reserved.
65c71a4d4Srmind *
75c71a4d4Srmind * Redistribution and use in source and binary forms, with or without
85c71a4d4Srmind * modification, are permitted provided that the following conditions
95c71a4d4Srmind * are met:
105c71a4d4Srmind * 1. Redistributions of source code must retain the above copyright
115c71a4d4Srmind * notice, this list of conditions and the following disclaimer.
125c71a4d4Srmind * 2. Redistributions in binary form must reproduce the above copyright
135c71a4d4Srmind * notice, this list of conditions and the following disclaimer in the
145c71a4d4Srmind * documentation and/or other materials provided with the distribution.
155c71a4d4Srmind *
1606171502Srmind * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1706171502Srmind * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1806171502Srmind * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1906171502Srmind * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2006171502Srmind * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2106171502Srmind * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2206171502Srmind * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2306171502Srmind * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2406171502Srmind * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2506171502Srmind * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2606171502Srmind * SUCH DAMAGE.
275c71a4d4Srmind */
285c71a4d4Srmind
295c71a4d4Srmind /*
305c71a4d4Srmind * Implementation of the Processor Sets.
315c71a4d4Srmind *
325c71a4d4Srmind * Locking
335c71a4d4Srmind * The array of the processor-set structures and its members are protected
3453db9954Sad * by the global cpu_lock. Note that in scheduler, the very l_psid value
355c71a4d4Srmind * might be used without lock held.
365c71a4d4Srmind */
375c71a4d4Srmind
385c71a4d4Srmind #include <sys/cdefs.h>
39*0eaaa024Sad __KERNEL_RCSID(0, "$NetBSD: sys_pset.c,v 1.24 2020/05/23 23:42:43 ad Exp $");
405c71a4d4Srmind
415c71a4d4Srmind #include <sys/param.h>
425c71a4d4Srmind
435c71a4d4Srmind #include <sys/cpu.h>
445c71a4d4Srmind #include <sys/kauth.h>
455c71a4d4Srmind #include <sys/kmem.h>
465c71a4d4Srmind #include <sys/lwp.h>
475c71a4d4Srmind #include <sys/mutex.h>
485c71a4d4Srmind #include <sys/proc.h>
495c71a4d4Srmind #include <sys/pset.h>
505c71a4d4Srmind #include <sys/sched.h>
515c71a4d4Srmind #include <sys/syscallargs.h>
525c71a4d4Srmind #include <sys/sysctl.h>
535c71a4d4Srmind #include <sys/systm.h>
545c71a4d4Srmind #include <sys/types.h>
555c71a4d4Srmind
565c71a4d4Srmind static pset_info_t ** psets;
575c71a4d4Srmind static u_int psets_max;
585c71a4d4Srmind static u_int psets_count;
5940cc528aSelad static kauth_listener_t psets_listener;
605c71a4d4Srmind
615c71a4d4Srmind static int psets_realloc(int);
625c71a4d4Srmind static int psid_validate(psetid_t, bool);
635c71a4d4Srmind static int kern_pset_create(psetid_t *);
645c71a4d4Srmind static int kern_pset_destroy(psetid_t);
655c71a4d4Srmind
6640cc528aSelad static int
psets_listener_cb(kauth_cred_t cred,kauth_action_t action,void * cookie,void * arg0,void * arg1,void * arg2,void * arg3)6740cc528aSelad psets_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
6840cc528aSelad void *arg0, void *arg1, void *arg2, void *arg3)
6940cc528aSelad {
7040cc528aSelad psetid_t id;
7140cc528aSelad enum kauth_system_req req;
7240cc528aSelad int result;
7340cc528aSelad
7440cc528aSelad result = KAUTH_RESULT_DEFER;
75ce578dfcSjoerg req = (enum kauth_system_req)(uintptr_t)arg0;
76ce578dfcSjoerg id = (psetid_t)(uintptr_t)arg1;
7740cc528aSelad
7840cc528aSelad if (action != KAUTH_SYSTEM_PSET)
7940cc528aSelad return result;
8040cc528aSelad
8140cc528aSelad if ((req == KAUTH_REQ_SYSTEM_PSET_ASSIGN) ||
8240cc528aSelad (req == KAUTH_REQ_SYSTEM_PSET_BIND)) {
8340cc528aSelad if (id == PS_QUERY)
8440cc528aSelad result = KAUTH_RESULT_ALLOW;
8540cc528aSelad }
8640cc528aSelad
8740cc528aSelad return result;
8840cc528aSelad }
8940cc528aSelad
905c71a4d4Srmind /*
915c71a4d4Srmind * Initialization of the processor-sets.
925c71a4d4Srmind */
935c71a4d4Srmind void
psets_init(void)945c71a4d4Srmind psets_init(void)
955c71a4d4Srmind {
965c71a4d4Srmind
97d1579b2dSriastradh psets_max = uimax(maxcpus, 32);
985c71a4d4Srmind psets = kmem_zalloc(psets_max * sizeof(void *), KM_SLEEP);
995c71a4d4Srmind psets_count = 0;
10040cc528aSelad
10140cc528aSelad psets_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
10240cc528aSelad psets_listener_cb, NULL);
1035c71a4d4Srmind }
1045c71a4d4Srmind
1055c71a4d4Srmind /*
1065c71a4d4Srmind * Reallocate the array of the processor-set structures.
1075c71a4d4Srmind */
1085c71a4d4Srmind static int
psets_realloc(int new_psets_max)1095c71a4d4Srmind psets_realloc(int new_psets_max)
1105c71a4d4Srmind {
1115c71a4d4Srmind pset_info_t **new_psets, **old_psets;
1125c71a4d4Srmind const u_int newsize = new_psets_max * sizeof(void *);
1135c71a4d4Srmind u_int i, oldsize;
1145c71a4d4Srmind
1155c71a4d4Srmind if (new_psets_max < 1)
1165c71a4d4Srmind return EINVAL;
1175c71a4d4Srmind
1185c71a4d4Srmind new_psets = kmem_zalloc(newsize, KM_SLEEP);
11953db9954Sad mutex_enter(&cpu_lock);
1205c71a4d4Srmind old_psets = psets;
1215c71a4d4Srmind oldsize = psets_max * sizeof(void *);
1225c71a4d4Srmind
1235c71a4d4Srmind /* Check if we can lower the size of the array */
1245c71a4d4Srmind if (new_psets_max < psets_max) {
1255c71a4d4Srmind for (i = new_psets_max; i < psets_max; i++) {
1265c71a4d4Srmind if (psets[i] == NULL)
1275c71a4d4Srmind continue;
12853db9954Sad mutex_exit(&cpu_lock);
1295c71a4d4Srmind kmem_free(new_psets, newsize);
1305c71a4d4Srmind return EBUSY;
1315c71a4d4Srmind }
1325c71a4d4Srmind }
1335c71a4d4Srmind
1345c71a4d4Srmind /* Copy all pointers to the new array */
1355c71a4d4Srmind memcpy(new_psets, psets, newsize);
1365c71a4d4Srmind psets_max = new_psets_max;
1375c71a4d4Srmind psets = new_psets;
13853db9954Sad mutex_exit(&cpu_lock);
1395c71a4d4Srmind
1405c71a4d4Srmind kmem_free(old_psets, oldsize);
1415c71a4d4Srmind return 0;
1425c71a4d4Srmind }
1435c71a4d4Srmind
1445c71a4d4Srmind /*
1455c71a4d4Srmind * Validate processor-set ID.
1465c71a4d4Srmind */
1475c71a4d4Srmind static int
psid_validate(psetid_t psid,bool chkps)1485c71a4d4Srmind psid_validate(psetid_t psid, bool chkps)
1495c71a4d4Srmind {
1505c71a4d4Srmind
15153db9954Sad KASSERT(mutex_owned(&cpu_lock));
1525c71a4d4Srmind
1535c71a4d4Srmind if (chkps && (psid == PS_NONE || psid == PS_QUERY || psid == PS_MYID))
1545c71a4d4Srmind return 0;
1555c71a4d4Srmind if (psid <= 0 || psid > psets_max)
1565c71a4d4Srmind return EINVAL;
1575c71a4d4Srmind if (psets[psid - 1] == NULL)
1585c71a4d4Srmind return EINVAL;
1595c71a4d4Srmind
1605c71a4d4Srmind return 0;
1615c71a4d4Srmind }
1625c71a4d4Srmind
1635c71a4d4Srmind /*
1645c71a4d4Srmind * Create a processor-set.
1655c71a4d4Srmind */
1665c71a4d4Srmind static int
kern_pset_create(psetid_t * psid)1675c71a4d4Srmind kern_pset_create(psetid_t *psid)
1685c71a4d4Srmind {
1695c71a4d4Srmind pset_info_t *pi;
1705c71a4d4Srmind u_int i;
1715c71a4d4Srmind
1725c71a4d4Srmind if (psets_count == psets_max)
1735c71a4d4Srmind return ENOMEM;
1745c71a4d4Srmind
1755c71a4d4Srmind pi = kmem_zalloc(sizeof(pset_info_t), KM_SLEEP);
1765c71a4d4Srmind
17753db9954Sad mutex_enter(&cpu_lock);
1785c71a4d4Srmind if (psets_count == psets_max) {
17953db9954Sad mutex_exit(&cpu_lock);
1805c71a4d4Srmind kmem_free(pi, sizeof(pset_info_t));
1815c71a4d4Srmind return ENOMEM;
1825c71a4d4Srmind }
1835c71a4d4Srmind
1845c71a4d4Srmind /* Find a free entry in the array */
1855c71a4d4Srmind for (i = 0; i < psets_max; i++)
1865c71a4d4Srmind if (psets[i] == NULL)
1875c71a4d4Srmind break;
1885c71a4d4Srmind KASSERT(i != psets_max);
1895c71a4d4Srmind
1905c71a4d4Srmind psets[i] = pi;
1915c71a4d4Srmind psets_count++;
19253db9954Sad mutex_exit(&cpu_lock);
1935c71a4d4Srmind
1945c71a4d4Srmind *psid = i + 1;
1955c71a4d4Srmind return 0;
1965c71a4d4Srmind }
1975c71a4d4Srmind
1985c71a4d4Srmind /*
1995c71a4d4Srmind * Destroy a processor-set.
2005c71a4d4Srmind */
2015c71a4d4Srmind static int
kern_pset_destroy(psetid_t psid)2025c71a4d4Srmind kern_pset_destroy(psetid_t psid)
2035c71a4d4Srmind {
2045c71a4d4Srmind struct cpu_info *ci;
2055c71a4d4Srmind struct lwp *l;
2065c71a4d4Srmind CPU_INFO_ITERATOR cii;
2075c71a4d4Srmind int error;
2085c71a4d4Srmind
20953db9954Sad mutex_enter(&cpu_lock);
2105c71a4d4Srmind if (psid == PS_MYID) {
2115c71a4d4Srmind /* Use caller's processor-set ID */
2125c71a4d4Srmind psid = curlwp->l_psid;
2135c71a4d4Srmind }
2145c71a4d4Srmind error = psid_validate(psid, false);
2155c71a4d4Srmind if (error) {
21653db9954Sad mutex_exit(&cpu_lock);
2175c71a4d4Srmind return error;
2185c71a4d4Srmind }
2195c71a4d4Srmind
2205c71a4d4Srmind /* Release the processor-set from all CPUs */
2215c71a4d4Srmind for (CPU_INFO_FOREACH(cii, ci)) {
2225c71a4d4Srmind struct schedstate_percpu *spc;
2235c71a4d4Srmind
2245c71a4d4Srmind spc = &ci->ci_schedstate;
2255c71a4d4Srmind if (spc->spc_psid != psid)
2265c71a4d4Srmind continue;
2275c71a4d4Srmind spc->spc_psid = PS_NONE;
2285c71a4d4Srmind }
2295c71a4d4Srmind
2305c71a4d4Srmind /* Unmark the processor-set ID from each thread */
231*0eaaa024Sad mutex_enter(&proc_lock);
2325c71a4d4Srmind LIST_FOREACH(l, &alllwp, l_list) {
2335c71a4d4Srmind /* Safe to check and set without lock held */
2345c71a4d4Srmind if (l->l_psid != psid)
2355c71a4d4Srmind continue;
2365c71a4d4Srmind l->l_psid = PS_NONE;
2375c71a4d4Srmind }
238*0eaaa024Sad mutex_exit(&proc_lock);
2395c71a4d4Srmind
2405c71a4d4Srmind /* Destroy the processor-set */
241bec282a6Sad kmem_free(psets[psid - 1], sizeof(pset_info_t));
2425c71a4d4Srmind psets[psid - 1] = NULL;
2435c71a4d4Srmind psets_count--;
24453db9954Sad mutex_exit(&cpu_lock);
2455c71a4d4Srmind
2465c71a4d4Srmind return 0;
2475c71a4d4Srmind }
2485c71a4d4Srmind
2495c71a4d4Srmind /*
2505c71a4d4Srmind * General system calls for the processor-sets.
2515c71a4d4Srmind */
2525c71a4d4Srmind
2535c71a4d4Srmind int
sys_pset_create(struct lwp * l,const struct sys_pset_create_args * uap,register_t * retval)2545c71a4d4Srmind sys_pset_create(struct lwp *l, const struct sys_pset_create_args *uap,
2555c71a4d4Srmind register_t *retval)
2565c71a4d4Srmind {
2575c71a4d4Srmind /* {
2585c71a4d4Srmind syscallarg(psetid_t) *psid;
2595c71a4d4Srmind } */
2605c71a4d4Srmind psetid_t psid;
2615c71a4d4Srmind int error;
2625c71a4d4Srmind
2635c71a4d4Srmind /* Available only for super-user */
264fb37bad4Selad if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
265fb37bad4Selad KAUTH_REQ_SYSTEM_PSET_CREATE, NULL, NULL, NULL))
2665c71a4d4Srmind return EPERM;
2675c71a4d4Srmind
2685c71a4d4Srmind error = kern_pset_create(&psid);
2695c71a4d4Srmind if (error)
2705c71a4d4Srmind return error;
2715c71a4d4Srmind
2725c71a4d4Srmind error = copyout(&psid, SCARG(uap, psid), sizeof(psetid_t));
2735c71a4d4Srmind if (error)
2745c71a4d4Srmind (void)kern_pset_destroy(psid);
2755c71a4d4Srmind
2765c71a4d4Srmind return error;
2775c71a4d4Srmind }
2785c71a4d4Srmind
2795c71a4d4Srmind int
sys_pset_destroy(struct lwp * l,const struct sys_pset_destroy_args * uap,register_t * retval)2805c71a4d4Srmind sys_pset_destroy(struct lwp *l, const struct sys_pset_destroy_args *uap,
2815c71a4d4Srmind register_t *retval)
2825c71a4d4Srmind {
2835c71a4d4Srmind /* {
2845c71a4d4Srmind syscallarg(psetid_t) psid;
2855c71a4d4Srmind } */
2865c71a4d4Srmind
2875c71a4d4Srmind /* Available only for super-user */
288fb37bad4Selad if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
289fb37bad4Selad KAUTH_REQ_SYSTEM_PSET_DESTROY,
290fb37bad4Selad KAUTH_ARG(SCARG(uap, psid)), NULL, NULL))
2915c71a4d4Srmind return EPERM;
2925c71a4d4Srmind
2935c71a4d4Srmind return kern_pset_destroy(SCARG(uap, psid));
2945c71a4d4Srmind }
2955c71a4d4Srmind
2965c71a4d4Srmind int
sys_pset_assign(struct lwp * l,const struct sys_pset_assign_args * uap,register_t * retval)2975c71a4d4Srmind sys_pset_assign(struct lwp *l, const struct sys_pset_assign_args *uap,
2985c71a4d4Srmind register_t *retval)
2995c71a4d4Srmind {
3005c71a4d4Srmind /* {
3015c71a4d4Srmind syscallarg(psetid_t) psid;
3025c71a4d4Srmind syscallarg(cpuid_t) cpuid;
3035c71a4d4Srmind syscallarg(psetid_t) *opsid;
3045c71a4d4Srmind } */
305909e7f42Srmind struct cpu_info *ici, *ci = NULL;
306ae626d79Srmind struct schedstate_percpu *spc = NULL;
307909e7f42Srmind struct lwp *t;
3085c71a4d4Srmind psetid_t psid = SCARG(uap, psid), opsid = 0;
3095c71a4d4Srmind CPU_INFO_ITERATOR cii;
310ae626d79Srmind int error = 0, nnone = 0;
3115c71a4d4Srmind
3125c71a4d4Srmind /* Available only for super-user, except the case of PS_QUERY */
313fb37bad4Selad if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
314fb37bad4Selad KAUTH_REQ_SYSTEM_PSET_ASSIGN, KAUTH_ARG(SCARG(uap, psid)), NULL,
315fb37bad4Selad NULL))
3165c71a4d4Srmind return EPERM;
3175c71a4d4Srmind
3185c71a4d4Srmind /* Find the target CPU */
31953db9954Sad mutex_enter(&cpu_lock);
320909e7f42Srmind for (CPU_INFO_FOREACH(cii, ici)) {
321909e7f42Srmind struct schedstate_percpu *ispc;
322909e7f42Srmind ispc = &ici->ci_schedstate;
323909e7f42Srmind if (cpu_index(ici) == SCARG(uap, cpuid)) {
324909e7f42Srmind ci = ici;
325909e7f42Srmind spc = ispc;
32653db9954Sad }
327909e7f42Srmind nnone += (ispc->spc_psid == PS_NONE);
328909e7f42Srmind }
329909e7f42Srmind if (ci == NULL) {
33053db9954Sad mutex_exit(&cpu_lock);
33153db9954Sad return EINVAL;
33253db9954Sad }
3335c71a4d4Srmind error = psid_validate(psid, true);
3345c71a4d4Srmind if (error) {
33553db9954Sad mutex_exit(&cpu_lock);
3365c71a4d4Srmind return error;
3375c71a4d4Srmind }
3385c71a4d4Srmind opsid = spc->spc_psid;
3395c71a4d4Srmind switch (psid) {
3405c71a4d4Srmind case PS_QUERY:
3415c71a4d4Srmind break;
3425c71a4d4Srmind case PS_MYID:
3435c71a4d4Srmind psid = curlwp->l_psid;
34453db9954Sad /* FALLTHROUGH */
3455c71a4d4Srmind default:
346ae626d79Srmind /*
347784e861dSmlelstv * Just finish if old and new processor-sets are
348784e861dSmlelstv * the same.
349784e861dSmlelstv */
350784e861dSmlelstv if (spc->spc_psid == psid)
351784e861dSmlelstv break;
352784e861dSmlelstv /*
353ae626d79Srmind * Ensure at least one CPU stays in the default set,
354ae626d79Srmind * and that specified CPU is not offline.
355ae626d79Srmind */
356ae626d79Srmind if (psid != PS_NONE && ((spc->spc_flags & SPCF_OFFLINE) ||
357ae626d79Srmind (nnone == 1 && spc->spc_psid == PS_NONE))) {
35853db9954Sad mutex_exit(&cpu_lock);
35953db9954Sad return EBUSY;
3605c71a4d4Srmind }
361*0eaaa024Sad mutex_enter(&proc_lock);
362909e7f42Srmind /*
363909e7f42Srmind * Ensure that none of the threads are using affinity mask
364909e7f42Srmind * with this target CPU in it.
365909e7f42Srmind */
366909e7f42Srmind LIST_FOREACH(t, &alllwp, l_list) {
367501dd321Srmind if (t->l_affinity == NULL) {
368909e7f42Srmind continue;
369501dd321Srmind }
3704f1720c3Srmind lwp_lock(t);
371501dd321Srmind if (t->l_affinity == NULL) {
3724f1720c3Srmind lwp_unlock(t);
3734f1720c3Srmind continue;
3744f1720c3Srmind }
37552b220e9Srmind if (kcpuset_isset(t->l_affinity, cpu_index(ci))) {
3764f1720c3Srmind lwp_unlock(t);
377*0eaaa024Sad mutex_exit(&proc_lock);
378909e7f42Srmind mutex_exit(&cpu_lock);
379909e7f42Srmind return EPERM;
380909e7f42Srmind }
3813478485fSmaxv lwp_unlock(t);
382909e7f42Srmind }
383909e7f42Srmind /*
384909e7f42Srmind * Set the processor-set ID.
385909e7f42Srmind * Migrate out any threads running on this CPU.
386909e7f42Srmind */
38753db9954Sad spc->spc_psid = psid;
388909e7f42Srmind
389909e7f42Srmind LIST_FOREACH(t, &alllwp, l_list) {
390909e7f42Srmind struct cpu_info *tci;
391909e7f42Srmind if (t->l_cpu != ci)
392909e7f42Srmind continue;
393909e7f42Srmind if (t->l_pflag & (LP_BOUND | LP_INTR))
394909e7f42Srmind continue;
395909e7f42Srmind lwp_lock(t);
396909e7f42Srmind tci = sched_takecpu(t);
397909e7f42Srmind KASSERT(tci != ci);
398909e7f42Srmind lwp_migrate(t, tci);
399909e7f42Srmind }
400*0eaaa024Sad mutex_exit(&proc_lock);
40153db9954Sad break;
40253db9954Sad }
40353db9954Sad mutex_exit(&cpu_lock);
4045c71a4d4Srmind
4055c71a4d4Srmind if (SCARG(uap, opsid) != NULL)
4065c71a4d4Srmind error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
4075c71a4d4Srmind
4085c71a4d4Srmind return error;
4095c71a4d4Srmind }
4105c71a4d4Srmind
4115c71a4d4Srmind int
sys__pset_bind(struct lwp * l,const struct sys__pset_bind_args * uap,register_t * retval)4125c71a4d4Srmind sys__pset_bind(struct lwp *l, const struct sys__pset_bind_args *uap,
4135c71a4d4Srmind register_t *retval)
4145c71a4d4Srmind {
4155c71a4d4Srmind /* {
4165c71a4d4Srmind syscallarg(idtype_t) idtype;
4175c71a4d4Srmind syscallarg(id_t) first_id;
4185c71a4d4Srmind syscallarg(id_t) second_id;
4195c71a4d4Srmind syscallarg(psetid_t) psid;
4205c71a4d4Srmind syscallarg(psetid_t) *opsid;
4215c71a4d4Srmind } */
4225c71a4d4Srmind struct cpu_info *ci;
4235c71a4d4Srmind struct proc *p;
4245c71a4d4Srmind struct lwp *t;
4255c71a4d4Srmind id_t id1, id2;
4265c71a4d4Srmind pid_t pid = 0;
4275c71a4d4Srmind lwpid_t lid = 0;
4285c71a4d4Srmind psetid_t psid, opsid;
4295c71a4d4Srmind int error = 0, lcnt;
4305c71a4d4Srmind
4315c71a4d4Srmind psid = SCARG(uap, psid);
4325c71a4d4Srmind
4335c71a4d4Srmind /* Available only for super-user, except the case of PS_QUERY */
434fb37bad4Selad if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
435fb37bad4Selad KAUTH_REQ_SYSTEM_PSET_BIND, KAUTH_ARG(SCARG(uap, psid)), NULL,
436fb37bad4Selad NULL))
4375c71a4d4Srmind return EPERM;
4385c71a4d4Srmind
43953db9954Sad mutex_enter(&cpu_lock);
4405c71a4d4Srmind error = psid_validate(psid, true);
4415c71a4d4Srmind if (error) {
44253db9954Sad mutex_exit(&cpu_lock);
4435c71a4d4Srmind return error;
4445c71a4d4Srmind }
4455c71a4d4Srmind if (psid == PS_MYID)
4465c71a4d4Srmind psid = curlwp->l_psid;
4475c71a4d4Srmind
4485c71a4d4Srmind /*
4495c71a4d4Srmind * Get PID and LID from the ID.
4505c71a4d4Srmind */
4515c71a4d4Srmind p = l->l_proc;
4525c71a4d4Srmind id1 = SCARG(uap, first_id);
4535c71a4d4Srmind id2 = SCARG(uap, second_id);
4545c71a4d4Srmind
455*0eaaa024Sad mutex_enter(&proc_lock);
4565c71a4d4Srmind switch (SCARG(uap, idtype)) {
4575c71a4d4Srmind case P_PID:
4585c71a4d4Srmind /*
4595c71a4d4Srmind * Process:
4605c71a4d4Srmind * First ID - PID;
4615c71a4d4Srmind * Second ID - ignored;
4625c71a4d4Srmind */
4635c71a4d4Srmind pid = (id1 == P_MYID) ? p->p_pid : id1;
4645c71a4d4Srmind lid = 0;
4655c71a4d4Srmind break;
4665c71a4d4Srmind case P_LWPID:
4675c71a4d4Srmind /*
4685c71a4d4Srmind * Thread (LWP):
4695c71a4d4Srmind * First ID - LID;
4705c71a4d4Srmind * Second ID - PID;
4715c71a4d4Srmind */
4725c71a4d4Srmind if (id1 == P_MYID) {
4735c71a4d4Srmind pid = p->p_pid;
4745c71a4d4Srmind lid = l->l_lid;
4755c71a4d4Srmind break;
4765c71a4d4Srmind }
4775c71a4d4Srmind lid = id1;
4785c71a4d4Srmind pid = (id2 == P_MYID) ? p->p_pid : id2;
4795c71a4d4Srmind break;
4805c71a4d4Srmind default:
481183f0fa9Syamt error = EINVAL;
482183f0fa9Syamt goto error;
4835c71a4d4Srmind }
4845c71a4d4Srmind
4855c71a4d4Srmind /* Find the process */
4863c507045Srmind p = proc_find(pid);
4875c71a4d4Srmind if (p == NULL) {
4885c71a4d4Srmind error = ESRCH;
4895c71a4d4Srmind goto error;
4905c71a4d4Srmind }
4915c71a4d4Srmind /* Disallow modification of the system processes */
4925c71a4d4Srmind if (p->p_flag & PK_SYSTEM) {
4935c71a4d4Srmind error = EPERM;
4945c71a4d4Srmind goto error;
4955c71a4d4Srmind }
4965c71a4d4Srmind
4975c71a4d4Srmind /* Find the LWP(s) */
4985c71a4d4Srmind lcnt = 0;
4995c71a4d4Srmind ci = NULL;
500bec282a6Sad mutex_enter(p->p_lock);
5015c71a4d4Srmind LIST_FOREACH(t, &p->p_lwps, l_sibling) {
5025c71a4d4Srmind if (lid && lid != t->l_lid)
5035c71a4d4Srmind continue;
5045c71a4d4Srmind /*
5055c71a4d4Srmind * Bind the thread to the processor-set,
5065c71a4d4Srmind * take some CPU and migrate.
5075c71a4d4Srmind */
5085c71a4d4Srmind lwp_lock(t);
5095c71a4d4Srmind opsid = t->l_psid;
5105c71a4d4Srmind t->l_psid = psid;
5119e43fad6Srmind ci = sched_takecpu(t);
5125c71a4d4Srmind /* Unlocks LWP */
5135c71a4d4Srmind lwp_migrate(t, ci);
5145c71a4d4Srmind lcnt++;
5155c71a4d4Srmind }
516284c2b9aSad mutex_exit(p->p_lock);
5175c71a4d4Srmind if (lcnt == 0) {
5185c71a4d4Srmind error = ESRCH;
5195c71a4d4Srmind }
5205c71a4d4Srmind error:
521*0eaaa024Sad mutex_exit(&proc_lock);
52253db9954Sad mutex_exit(&cpu_lock);
523bec282a6Sad if (error == 0 && SCARG(uap, opsid))
524bec282a6Sad error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
5255c71a4d4Srmind return error;
5265c71a4d4Srmind }
5275c71a4d4Srmind
5285c71a4d4Srmind /*
5295c71a4d4Srmind * Sysctl nodes and initialization.
5305c71a4d4Srmind */
5315c71a4d4Srmind
5325c71a4d4Srmind static int
sysctl_psets_max(SYSCTLFN_ARGS)5335c71a4d4Srmind sysctl_psets_max(SYSCTLFN_ARGS)
5345c71a4d4Srmind {
5355c71a4d4Srmind struct sysctlnode node;
5365c71a4d4Srmind int error, newsize;
5375c71a4d4Srmind
5385c71a4d4Srmind node = *rnode;
5395c71a4d4Srmind node.sysctl_data = &newsize;
5405c71a4d4Srmind
5415c71a4d4Srmind newsize = psets_max;
5425c71a4d4Srmind error = sysctl_lookup(SYSCTLFN_CALL(&node));
5435c71a4d4Srmind if (error || newp == NULL)
5445c71a4d4Srmind return error;
5455c71a4d4Srmind
5465c71a4d4Srmind if (newsize <= 0)
5475c71a4d4Srmind return EINVAL;
5485c71a4d4Srmind
5495c71a4d4Srmind sysctl_unlock();
5505c71a4d4Srmind error = psets_realloc(newsize);
5515c71a4d4Srmind sysctl_relock();
5525c71a4d4Srmind return error;
5535c71a4d4Srmind }
5545c71a4d4Srmind
55553db9954Sad static int
sysctl_psets_list(SYSCTLFN_ARGS)55653db9954Sad sysctl_psets_list(SYSCTLFN_ARGS)
55753db9954Sad {
55853db9954Sad const size_t bufsz = 1024;
55953db9954Sad char *buf, tbuf[16];
56053db9954Sad int i, error;
56153db9954Sad size_t len;
56253db9954Sad
56353db9954Sad sysctl_unlock();
56453db9954Sad buf = kmem_alloc(bufsz, KM_SLEEP);
56553db9954Sad snprintf(buf, bufsz, "%d:1", PS_NONE); /* XXX */
56653db9954Sad
56753db9954Sad mutex_enter(&cpu_lock);
56853db9954Sad for (i = 0; i < psets_max; i++) {
56953db9954Sad if (psets[i] == NULL)
57053db9954Sad continue;
57153db9954Sad snprintf(tbuf, sizeof(tbuf), ",%d:2", i + 1); /* XXX */
57253db9954Sad strlcat(buf, tbuf, bufsz);
57353db9954Sad }
57453db9954Sad mutex_exit(&cpu_lock);
57553db9954Sad len = strlen(buf) + 1;
57653db9954Sad error = 0;
57753db9954Sad if (oldp != NULL)
578d1579b2dSriastradh error = copyout(buf, oldp, uimin(len, *oldlenp));
57953db9954Sad *oldlenp = len;
58053db9954Sad kmem_free(buf, bufsz);
58153db9954Sad sysctl_relock();
58253db9954Sad return error;
58353db9954Sad }
58453db9954Sad
5855c71a4d4Srmind SYSCTL_SETUP(sysctl_pset_setup, "sysctl kern.pset subtree setup")
5865c71a4d4Srmind {
5875c71a4d4Srmind const struct sysctlnode *node = NULL;
5885c71a4d4Srmind
5895c71a4d4Srmind sysctl_createv(clog, 0, NULL, &node,
5905c71a4d4Srmind CTLFLAG_PERMANENT,
5915c71a4d4Srmind CTLTYPE_NODE, "pset",
5925c71a4d4Srmind SYSCTL_DESCR("Processor-set options"),
5935c71a4d4Srmind NULL, 0, NULL, 0,
5945c71a4d4Srmind CTL_KERN, CTL_CREATE, CTL_EOL);
5955c71a4d4Srmind
5965c71a4d4Srmind if (node == NULL)
5975c71a4d4Srmind return;
5985c71a4d4Srmind
5995c71a4d4Srmind sysctl_createv(clog, 0, &node, NULL,
6005c71a4d4Srmind CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
6015c71a4d4Srmind CTLTYPE_INT, "psets_max",
6025c71a4d4Srmind SYSCTL_DESCR("Maximal count of the processor-sets"),
6035c71a4d4Srmind sysctl_psets_max, 0, &psets_max, 0,
6045c71a4d4Srmind CTL_CREATE, CTL_EOL);
60553db9954Sad sysctl_createv(clog, 0, &node, NULL,
60653db9954Sad CTLFLAG_PERMANENT,
60753db9954Sad CTLTYPE_STRING, "list",
60853db9954Sad SYSCTL_DESCR("List of active sets"),
60953db9954Sad sysctl_psets_list, 0, NULL, 0,
61053db9954Sad CTL_CREATE, CTL_EOL);
6115c71a4d4Srmind }
612