xref: /netbsd-src/sys/kern/sys_pset.c (revision 0eaaa024ea5a271b8f91fe07fe846a86f31dd9b9)
1*0eaaa024Sad /*	$NetBSD: sys_pset.c,v 1.24 2020/05/23 23:42:43 ad Exp $	*/
25c71a4d4Srmind 
35c71a4d4Srmind /*
45c71a4d4Srmind  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
55c71a4d4Srmind  * All rights reserved.
65c71a4d4Srmind  *
75c71a4d4Srmind  * Redistribution and use in source and binary forms, with or without
85c71a4d4Srmind  * modification, are permitted provided that the following conditions
95c71a4d4Srmind  * are met:
105c71a4d4Srmind  * 1. Redistributions of source code must retain the above copyright
115c71a4d4Srmind  *    notice, this list of conditions and the following disclaimer.
125c71a4d4Srmind  * 2. Redistributions in binary form must reproduce the above copyright
135c71a4d4Srmind  *    notice, this list of conditions and the following disclaimer in the
145c71a4d4Srmind  *    documentation and/or other materials provided with the distribution.
155c71a4d4Srmind  *
1606171502Srmind  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1706171502Srmind  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1806171502Srmind  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1906171502Srmind  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2006171502Srmind  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2106171502Srmind  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2206171502Srmind  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2306171502Srmind  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2406171502Srmind  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2506171502Srmind  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2606171502Srmind  * SUCH DAMAGE.
275c71a4d4Srmind  */
285c71a4d4Srmind 
295c71a4d4Srmind /*
305c71a4d4Srmind  * Implementation of the Processor Sets.
315c71a4d4Srmind  *
325c71a4d4Srmind  * Locking
335c71a4d4Srmind  *  The array of the processor-set structures and its members are protected
3453db9954Sad  *  by the global cpu_lock.  Note that in scheduler, the very l_psid value
355c71a4d4Srmind  *  might be used without lock held.
365c71a4d4Srmind  */
375c71a4d4Srmind 
385c71a4d4Srmind #include <sys/cdefs.h>
39*0eaaa024Sad __KERNEL_RCSID(0, "$NetBSD: sys_pset.c,v 1.24 2020/05/23 23:42:43 ad Exp $");
405c71a4d4Srmind 
415c71a4d4Srmind #include <sys/param.h>
425c71a4d4Srmind 
435c71a4d4Srmind #include <sys/cpu.h>
445c71a4d4Srmind #include <sys/kauth.h>
455c71a4d4Srmind #include <sys/kmem.h>
465c71a4d4Srmind #include <sys/lwp.h>
475c71a4d4Srmind #include <sys/mutex.h>
485c71a4d4Srmind #include <sys/proc.h>
495c71a4d4Srmind #include <sys/pset.h>
505c71a4d4Srmind #include <sys/sched.h>
515c71a4d4Srmind #include <sys/syscallargs.h>
525c71a4d4Srmind #include <sys/sysctl.h>
535c71a4d4Srmind #include <sys/systm.h>
545c71a4d4Srmind #include <sys/types.h>
555c71a4d4Srmind 
565c71a4d4Srmind static pset_info_t **	psets;
575c71a4d4Srmind static u_int		psets_max;
585c71a4d4Srmind static u_int		psets_count;
5940cc528aSelad static kauth_listener_t	psets_listener;
605c71a4d4Srmind 
615c71a4d4Srmind static int	psets_realloc(int);
625c71a4d4Srmind static int	psid_validate(psetid_t, bool);
635c71a4d4Srmind static int	kern_pset_create(psetid_t *);
645c71a4d4Srmind static int	kern_pset_destroy(psetid_t);
655c71a4d4Srmind 
6640cc528aSelad static int
psets_listener_cb(kauth_cred_t cred,kauth_action_t action,void * cookie,void * arg0,void * arg1,void * arg2,void * arg3)6740cc528aSelad psets_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
6840cc528aSelad     void *arg0, void *arg1, void *arg2, void *arg3)
6940cc528aSelad {
7040cc528aSelad 	psetid_t id;
7140cc528aSelad 	enum kauth_system_req req;
7240cc528aSelad 	int result;
7340cc528aSelad 
7440cc528aSelad 	result = KAUTH_RESULT_DEFER;
75ce578dfcSjoerg 	req = (enum kauth_system_req)(uintptr_t)arg0;
76ce578dfcSjoerg 	id = (psetid_t)(uintptr_t)arg1;
7740cc528aSelad 
7840cc528aSelad 	if (action != KAUTH_SYSTEM_PSET)
7940cc528aSelad 		return result;
8040cc528aSelad 
8140cc528aSelad 	if ((req == KAUTH_REQ_SYSTEM_PSET_ASSIGN) ||
8240cc528aSelad 	    (req == KAUTH_REQ_SYSTEM_PSET_BIND)) {
8340cc528aSelad 		if (id == PS_QUERY)
8440cc528aSelad 			result = KAUTH_RESULT_ALLOW;
8540cc528aSelad 	}
8640cc528aSelad 
8740cc528aSelad 	return result;
8840cc528aSelad }
8940cc528aSelad 
905c71a4d4Srmind /*
915c71a4d4Srmind  * Initialization of the processor-sets.
925c71a4d4Srmind  */
935c71a4d4Srmind void
psets_init(void)945c71a4d4Srmind psets_init(void)
955c71a4d4Srmind {
965c71a4d4Srmind 
97d1579b2dSriastradh 	psets_max = uimax(maxcpus, 32);
985c71a4d4Srmind 	psets = kmem_zalloc(psets_max * sizeof(void *), KM_SLEEP);
995c71a4d4Srmind 	psets_count = 0;
10040cc528aSelad 
10140cc528aSelad 	psets_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
10240cc528aSelad 	    psets_listener_cb, NULL);
1035c71a4d4Srmind }
1045c71a4d4Srmind 
1055c71a4d4Srmind /*
1065c71a4d4Srmind  * Reallocate the array of the processor-set structures.
1075c71a4d4Srmind  */
1085c71a4d4Srmind static int
psets_realloc(int new_psets_max)1095c71a4d4Srmind psets_realloc(int new_psets_max)
1105c71a4d4Srmind {
1115c71a4d4Srmind 	pset_info_t **new_psets, **old_psets;
1125c71a4d4Srmind 	const u_int newsize = new_psets_max * sizeof(void *);
1135c71a4d4Srmind 	u_int i, oldsize;
1145c71a4d4Srmind 
1155c71a4d4Srmind 	if (new_psets_max < 1)
1165c71a4d4Srmind 		return EINVAL;
1175c71a4d4Srmind 
1185c71a4d4Srmind 	new_psets = kmem_zalloc(newsize, KM_SLEEP);
11953db9954Sad 	mutex_enter(&cpu_lock);
1205c71a4d4Srmind 	old_psets = psets;
1215c71a4d4Srmind 	oldsize = psets_max * sizeof(void *);
1225c71a4d4Srmind 
1235c71a4d4Srmind 	/* Check if we can lower the size of the array */
1245c71a4d4Srmind 	if (new_psets_max < psets_max) {
1255c71a4d4Srmind 		for (i = new_psets_max; i < psets_max; i++) {
1265c71a4d4Srmind 			if (psets[i] == NULL)
1275c71a4d4Srmind 				continue;
12853db9954Sad 			mutex_exit(&cpu_lock);
1295c71a4d4Srmind 			kmem_free(new_psets, newsize);
1305c71a4d4Srmind 			return EBUSY;
1315c71a4d4Srmind 		}
1325c71a4d4Srmind 	}
1335c71a4d4Srmind 
1345c71a4d4Srmind 	/* Copy all pointers to the new array */
1355c71a4d4Srmind 	memcpy(new_psets, psets, newsize);
1365c71a4d4Srmind 	psets_max = new_psets_max;
1375c71a4d4Srmind 	psets = new_psets;
13853db9954Sad 	mutex_exit(&cpu_lock);
1395c71a4d4Srmind 
1405c71a4d4Srmind 	kmem_free(old_psets, oldsize);
1415c71a4d4Srmind 	return 0;
1425c71a4d4Srmind }
1435c71a4d4Srmind 
1445c71a4d4Srmind /*
1455c71a4d4Srmind  * Validate processor-set ID.
1465c71a4d4Srmind  */
1475c71a4d4Srmind static int
psid_validate(psetid_t psid,bool chkps)1485c71a4d4Srmind psid_validate(psetid_t psid, bool chkps)
1495c71a4d4Srmind {
1505c71a4d4Srmind 
15153db9954Sad 	KASSERT(mutex_owned(&cpu_lock));
1525c71a4d4Srmind 
1535c71a4d4Srmind 	if (chkps && (psid == PS_NONE || psid == PS_QUERY || psid == PS_MYID))
1545c71a4d4Srmind 		return 0;
1555c71a4d4Srmind 	if (psid <= 0 || psid > psets_max)
1565c71a4d4Srmind 		return EINVAL;
1575c71a4d4Srmind 	if (psets[psid - 1] == NULL)
1585c71a4d4Srmind 		return EINVAL;
1595c71a4d4Srmind 
1605c71a4d4Srmind 	return 0;
1615c71a4d4Srmind }
1625c71a4d4Srmind 
1635c71a4d4Srmind /*
1645c71a4d4Srmind  * Create a processor-set.
1655c71a4d4Srmind  */
1665c71a4d4Srmind static int
kern_pset_create(psetid_t * psid)1675c71a4d4Srmind kern_pset_create(psetid_t *psid)
1685c71a4d4Srmind {
1695c71a4d4Srmind 	pset_info_t *pi;
1705c71a4d4Srmind 	u_int i;
1715c71a4d4Srmind 
1725c71a4d4Srmind 	if (psets_count == psets_max)
1735c71a4d4Srmind 		return ENOMEM;
1745c71a4d4Srmind 
1755c71a4d4Srmind 	pi = kmem_zalloc(sizeof(pset_info_t), KM_SLEEP);
1765c71a4d4Srmind 
17753db9954Sad 	mutex_enter(&cpu_lock);
1785c71a4d4Srmind 	if (psets_count == psets_max) {
17953db9954Sad 		mutex_exit(&cpu_lock);
1805c71a4d4Srmind 		kmem_free(pi, sizeof(pset_info_t));
1815c71a4d4Srmind 		return ENOMEM;
1825c71a4d4Srmind 	}
1835c71a4d4Srmind 
1845c71a4d4Srmind 	/* Find a free entry in the array */
1855c71a4d4Srmind 	for (i = 0; i < psets_max; i++)
1865c71a4d4Srmind 		if (psets[i] == NULL)
1875c71a4d4Srmind 			break;
1885c71a4d4Srmind 	KASSERT(i != psets_max);
1895c71a4d4Srmind 
1905c71a4d4Srmind 	psets[i] = pi;
1915c71a4d4Srmind 	psets_count++;
19253db9954Sad 	mutex_exit(&cpu_lock);
1935c71a4d4Srmind 
1945c71a4d4Srmind 	*psid = i + 1;
1955c71a4d4Srmind 	return 0;
1965c71a4d4Srmind }
1975c71a4d4Srmind 
1985c71a4d4Srmind /*
1995c71a4d4Srmind  * Destroy a processor-set.
2005c71a4d4Srmind  */
2015c71a4d4Srmind static int
kern_pset_destroy(psetid_t psid)2025c71a4d4Srmind kern_pset_destroy(psetid_t psid)
2035c71a4d4Srmind {
2045c71a4d4Srmind 	struct cpu_info *ci;
2055c71a4d4Srmind 	struct lwp *l;
2065c71a4d4Srmind 	CPU_INFO_ITERATOR cii;
2075c71a4d4Srmind 	int error;
2085c71a4d4Srmind 
20953db9954Sad 	mutex_enter(&cpu_lock);
2105c71a4d4Srmind 	if (psid == PS_MYID) {
2115c71a4d4Srmind 		/* Use caller's processor-set ID */
2125c71a4d4Srmind 		psid = curlwp->l_psid;
2135c71a4d4Srmind 	}
2145c71a4d4Srmind 	error = psid_validate(psid, false);
2155c71a4d4Srmind 	if (error) {
21653db9954Sad 		mutex_exit(&cpu_lock);
2175c71a4d4Srmind 		return error;
2185c71a4d4Srmind 	}
2195c71a4d4Srmind 
2205c71a4d4Srmind 	/* Release the processor-set from all CPUs */
2215c71a4d4Srmind 	for (CPU_INFO_FOREACH(cii, ci)) {
2225c71a4d4Srmind 		struct schedstate_percpu *spc;
2235c71a4d4Srmind 
2245c71a4d4Srmind 		spc = &ci->ci_schedstate;
2255c71a4d4Srmind 		if (spc->spc_psid != psid)
2265c71a4d4Srmind 			continue;
2275c71a4d4Srmind 		spc->spc_psid = PS_NONE;
2285c71a4d4Srmind 	}
2295c71a4d4Srmind 
2305c71a4d4Srmind 	/* Unmark the processor-set ID from each thread */
231*0eaaa024Sad 	mutex_enter(&proc_lock);
2325c71a4d4Srmind 	LIST_FOREACH(l, &alllwp, l_list) {
2335c71a4d4Srmind 		/* Safe to check and set without lock held */
2345c71a4d4Srmind 		if (l->l_psid != psid)
2355c71a4d4Srmind 			continue;
2365c71a4d4Srmind 		l->l_psid = PS_NONE;
2375c71a4d4Srmind 	}
238*0eaaa024Sad 	mutex_exit(&proc_lock);
2395c71a4d4Srmind 
2405c71a4d4Srmind 	/* Destroy the processor-set */
241bec282a6Sad 	kmem_free(psets[psid - 1], sizeof(pset_info_t));
2425c71a4d4Srmind 	psets[psid - 1] = NULL;
2435c71a4d4Srmind 	psets_count--;
24453db9954Sad 	mutex_exit(&cpu_lock);
2455c71a4d4Srmind 
2465c71a4d4Srmind 	return 0;
2475c71a4d4Srmind }
2485c71a4d4Srmind 
2495c71a4d4Srmind /*
2505c71a4d4Srmind  * General system calls for the processor-sets.
2515c71a4d4Srmind  */
2525c71a4d4Srmind 
2535c71a4d4Srmind int
sys_pset_create(struct lwp * l,const struct sys_pset_create_args * uap,register_t * retval)2545c71a4d4Srmind sys_pset_create(struct lwp *l, const struct sys_pset_create_args *uap,
2555c71a4d4Srmind     register_t *retval)
2565c71a4d4Srmind {
2575c71a4d4Srmind 	/* {
2585c71a4d4Srmind 		syscallarg(psetid_t) *psid;
2595c71a4d4Srmind 	} */
2605c71a4d4Srmind 	psetid_t psid;
2615c71a4d4Srmind 	int error;
2625c71a4d4Srmind 
2635c71a4d4Srmind 	/* Available only for super-user */
264fb37bad4Selad 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
265fb37bad4Selad 	    KAUTH_REQ_SYSTEM_PSET_CREATE, NULL, NULL, NULL))
2665c71a4d4Srmind 		return EPERM;
2675c71a4d4Srmind 
2685c71a4d4Srmind 	error = kern_pset_create(&psid);
2695c71a4d4Srmind 	if (error)
2705c71a4d4Srmind 		return error;
2715c71a4d4Srmind 
2725c71a4d4Srmind 	error = copyout(&psid, SCARG(uap, psid), sizeof(psetid_t));
2735c71a4d4Srmind 	if (error)
2745c71a4d4Srmind 		(void)kern_pset_destroy(psid);
2755c71a4d4Srmind 
2765c71a4d4Srmind 	return error;
2775c71a4d4Srmind }
2785c71a4d4Srmind 
2795c71a4d4Srmind int
sys_pset_destroy(struct lwp * l,const struct sys_pset_destroy_args * uap,register_t * retval)2805c71a4d4Srmind sys_pset_destroy(struct lwp *l, const struct sys_pset_destroy_args *uap,
2815c71a4d4Srmind     register_t *retval)
2825c71a4d4Srmind {
2835c71a4d4Srmind 	/* {
2845c71a4d4Srmind 		syscallarg(psetid_t) psid;
2855c71a4d4Srmind 	} */
2865c71a4d4Srmind 
2875c71a4d4Srmind 	/* Available only for super-user */
288fb37bad4Selad 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
289fb37bad4Selad 	    KAUTH_REQ_SYSTEM_PSET_DESTROY,
290fb37bad4Selad 	    KAUTH_ARG(SCARG(uap, psid)), NULL, NULL))
2915c71a4d4Srmind 		return EPERM;
2925c71a4d4Srmind 
2935c71a4d4Srmind 	return kern_pset_destroy(SCARG(uap, psid));
2945c71a4d4Srmind }
2955c71a4d4Srmind 
2965c71a4d4Srmind int
sys_pset_assign(struct lwp * l,const struct sys_pset_assign_args * uap,register_t * retval)2975c71a4d4Srmind sys_pset_assign(struct lwp *l, const struct sys_pset_assign_args *uap,
2985c71a4d4Srmind     register_t *retval)
2995c71a4d4Srmind {
3005c71a4d4Srmind 	/* {
3015c71a4d4Srmind 		syscallarg(psetid_t) psid;
3025c71a4d4Srmind 		syscallarg(cpuid_t) cpuid;
3035c71a4d4Srmind 		syscallarg(psetid_t) *opsid;
3045c71a4d4Srmind 	} */
305909e7f42Srmind 	struct cpu_info *ici, *ci = NULL;
306ae626d79Srmind 	struct schedstate_percpu *spc = NULL;
307909e7f42Srmind 	struct lwp *t;
3085c71a4d4Srmind 	psetid_t psid = SCARG(uap, psid), opsid = 0;
3095c71a4d4Srmind 	CPU_INFO_ITERATOR cii;
310ae626d79Srmind 	int error = 0, nnone = 0;
3115c71a4d4Srmind 
3125c71a4d4Srmind 	/* Available only for super-user, except the case of PS_QUERY */
313fb37bad4Selad 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
314fb37bad4Selad 	    KAUTH_REQ_SYSTEM_PSET_ASSIGN, KAUTH_ARG(SCARG(uap, psid)), NULL,
315fb37bad4Selad 	    NULL))
3165c71a4d4Srmind 		return EPERM;
3175c71a4d4Srmind 
3185c71a4d4Srmind 	/* Find the target CPU */
31953db9954Sad 	mutex_enter(&cpu_lock);
320909e7f42Srmind 	for (CPU_INFO_FOREACH(cii, ici)) {
321909e7f42Srmind 		struct schedstate_percpu *ispc;
322909e7f42Srmind 		ispc = &ici->ci_schedstate;
323909e7f42Srmind 		if (cpu_index(ici) == SCARG(uap, cpuid)) {
324909e7f42Srmind 			ci = ici;
325909e7f42Srmind 			spc = ispc;
32653db9954Sad 		}
327909e7f42Srmind 		nnone += (ispc->spc_psid == PS_NONE);
328909e7f42Srmind 	}
329909e7f42Srmind 	if (ci == NULL) {
33053db9954Sad 		mutex_exit(&cpu_lock);
33153db9954Sad 		return EINVAL;
33253db9954Sad 	}
3335c71a4d4Srmind 	error = psid_validate(psid, true);
3345c71a4d4Srmind 	if (error) {
33553db9954Sad 		mutex_exit(&cpu_lock);
3365c71a4d4Srmind 		return error;
3375c71a4d4Srmind 	}
3385c71a4d4Srmind 	opsid = spc->spc_psid;
3395c71a4d4Srmind 	switch (psid) {
3405c71a4d4Srmind 	case PS_QUERY:
3415c71a4d4Srmind 		break;
3425c71a4d4Srmind 	case PS_MYID:
3435c71a4d4Srmind 		psid = curlwp->l_psid;
34453db9954Sad 		/* FALLTHROUGH */
3455c71a4d4Srmind 	default:
346ae626d79Srmind 		/*
347784e861dSmlelstv 		 * Just finish if old and new processor-sets are
348784e861dSmlelstv 		 * the same.
349784e861dSmlelstv 		 */
350784e861dSmlelstv 		if (spc->spc_psid == psid)
351784e861dSmlelstv 			break;
352784e861dSmlelstv 		/*
353ae626d79Srmind 		 * Ensure at least one CPU stays in the default set,
354ae626d79Srmind 		 * and that specified CPU is not offline.
355ae626d79Srmind 		 */
356ae626d79Srmind 		if (psid != PS_NONE && ((spc->spc_flags & SPCF_OFFLINE) ||
357ae626d79Srmind 		    (nnone == 1 && spc->spc_psid == PS_NONE))) {
35853db9954Sad 			mutex_exit(&cpu_lock);
35953db9954Sad 			return EBUSY;
3605c71a4d4Srmind 		}
361*0eaaa024Sad 		mutex_enter(&proc_lock);
362909e7f42Srmind 		/*
363909e7f42Srmind 		 * Ensure that none of the threads are using affinity mask
364909e7f42Srmind 		 * with this target CPU in it.
365909e7f42Srmind 		 */
366909e7f42Srmind 		LIST_FOREACH(t, &alllwp, l_list) {
367501dd321Srmind 			if (t->l_affinity == NULL) {
368909e7f42Srmind 				continue;
369501dd321Srmind 			}
3704f1720c3Srmind 			lwp_lock(t);
371501dd321Srmind 			if (t->l_affinity == NULL) {
3724f1720c3Srmind 				lwp_unlock(t);
3734f1720c3Srmind 				continue;
3744f1720c3Srmind 			}
37552b220e9Srmind 			if (kcpuset_isset(t->l_affinity, cpu_index(ci))) {
3764f1720c3Srmind 				lwp_unlock(t);
377*0eaaa024Sad 				mutex_exit(&proc_lock);
378909e7f42Srmind 				mutex_exit(&cpu_lock);
379909e7f42Srmind 				return EPERM;
380909e7f42Srmind 			}
3813478485fSmaxv 			lwp_unlock(t);
382909e7f42Srmind 		}
383909e7f42Srmind 		/*
384909e7f42Srmind 		 * Set the processor-set ID.
385909e7f42Srmind 		 * Migrate out any threads running on this CPU.
386909e7f42Srmind 		 */
38753db9954Sad 		spc->spc_psid = psid;
388909e7f42Srmind 
389909e7f42Srmind 		LIST_FOREACH(t, &alllwp, l_list) {
390909e7f42Srmind 			struct cpu_info *tci;
391909e7f42Srmind 			if (t->l_cpu != ci)
392909e7f42Srmind 				continue;
393909e7f42Srmind 			if (t->l_pflag & (LP_BOUND | LP_INTR))
394909e7f42Srmind 				continue;
395909e7f42Srmind 			lwp_lock(t);
396909e7f42Srmind 			tci = sched_takecpu(t);
397909e7f42Srmind 			KASSERT(tci != ci);
398909e7f42Srmind 			lwp_migrate(t, tci);
399909e7f42Srmind 		}
400*0eaaa024Sad 		mutex_exit(&proc_lock);
40153db9954Sad 		break;
40253db9954Sad 	}
40353db9954Sad 	mutex_exit(&cpu_lock);
4045c71a4d4Srmind 
4055c71a4d4Srmind 	if (SCARG(uap, opsid) != NULL)
4065c71a4d4Srmind 		error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
4075c71a4d4Srmind 
4085c71a4d4Srmind 	return error;
4095c71a4d4Srmind }
4105c71a4d4Srmind 
4115c71a4d4Srmind int
sys__pset_bind(struct lwp * l,const struct sys__pset_bind_args * uap,register_t * retval)4125c71a4d4Srmind sys__pset_bind(struct lwp *l, const struct sys__pset_bind_args *uap,
4135c71a4d4Srmind     register_t *retval)
4145c71a4d4Srmind {
4155c71a4d4Srmind 	/* {
4165c71a4d4Srmind 		syscallarg(idtype_t) idtype;
4175c71a4d4Srmind 		syscallarg(id_t) first_id;
4185c71a4d4Srmind 		syscallarg(id_t) second_id;
4195c71a4d4Srmind 		syscallarg(psetid_t) psid;
4205c71a4d4Srmind 		syscallarg(psetid_t) *opsid;
4215c71a4d4Srmind 	} */
4225c71a4d4Srmind 	struct cpu_info *ci;
4235c71a4d4Srmind 	struct proc *p;
4245c71a4d4Srmind 	struct lwp *t;
4255c71a4d4Srmind 	id_t id1, id2;
4265c71a4d4Srmind 	pid_t pid = 0;
4275c71a4d4Srmind 	lwpid_t lid = 0;
4285c71a4d4Srmind 	psetid_t psid, opsid;
4295c71a4d4Srmind 	int error = 0, lcnt;
4305c71a4d4Srmind 
4315c71a4d4Srmind 	psid = SCARG(uap, psid);
4325c71a4d4Srmind 
4335c71a4d4Srmind 	/* Available only for super-user, except the case of PS_QUERY */
434fb37bad4Selad 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
435fb37bad4Selad 	    KAUTH_REQ_SYSTEM_PSET_BIND, KAUTH_ARG(SCARG(uap, psid)), NULL,
436fb37bad4Selad 	    NULL))
4375c71a4d4Srmind 		return EPERM;
4385c71a4d4Srmind 
43953db9954Sad 	mutex_enter(&cpu_lock);
4405c71a4d4Srmind 	error = psid_validate(psid, true);
4415c71a4d4Srmind 	if (error) {
44253db9954Sad 		mutex_exit(&cpu_lock);
4435c71a4d4Srmind 		return error;
4445c71a4d4Srmind 	}
4455c71a4d4Srmind 	if (psid == PS_MYID)
4465c71a4d4Srmind 		psid = curlwp->l_psid;
4475c71a4d4Srmind 
4485c71a4d4Srmind 	/*
4495c71a4d4Srmind 	 * Get PID and LID from the ID.
4505c71a4d4Srmind 	 */
4515c71a4d4Srmind 	p = l->l_proc;
4525c71a4d4Srmind 	id1 = SCARG(uap, first_id);
4535c71a4d4Srmind 	id2 = SCARG(uap, second_id);
4545c71a4d4Srmind 
455*0eaaa024Sad 	mutex_enter(&proc_lock);
4565c71a4d4Srmind 	switch (SCARG(uap, idtype)) {
4575c71a4d4Srmind 	case P_PID:
4585c71a4d4Srmind 		/*
4595c71a4d4Srmind 		 * Process:
4605c71a4d4Srmind 		 *  First ID	- PID;
4615c71a4d4Srmind 		 *  Second ID	- ignored;
4625c71a4d4Srmind 		 */
4635c71a4d4Srmind 		pid = (id1 == P_MYID) ? p->p_pid : id1;
4645c71a4d4Srmind 		lid = 0;
4655c71a4d4Srmind 		break;
4665c71a4d4Srmind 	case P_LWPID:
4675c71a4d4Srmind 		/*
4685c71a4d4Srmind 		 * Thread (LWP):
4695c71a4d4Srmind 		 *  First ID	- LID;
4705c71a4d4Srmind 		 *  Second ID	- PID;
4715c71a4d4Srmind 		 */
4725c71a4d4Srmind 		if (id1 == P_MYID) {
4735c71a4d4Srmind 			pid = p->p_pid;
4745c71a4d4Srmind 			lid = l->l_lid;
4755c71a4d4Srmind 			break;
4765c71a4d4Srmind 		}
4775c71a4d4Srmind 		lid = id1;
4785c71a4d4Srmind 		pid = (id2 == P_MYID) ? p->p_pid : id2;
4795c71a4d4Srmind 		break;
4805c71a4d4Srmind 	default:
481183f0fa9Syamt 		error = EINVAL;
482183f0fa9Syamt 		goto error;
4835c71a4d4Srmind 	}
4845c71a4d4Srmind 
4855c71a4d4Srmind 	/* Find the process */
4863c507045Srmind 	p = proc_find(pid);
4875c71a4d4Srmind 	if (p == NULL) {
4885c71a4d4Srmind 		error = ESRCH;
4895c71a4d4Srmind 		goto error;
4905c71a4d4Srmind 	}
4915c71a4d4Srmind 	/* Disallow modification of the system processes */
4925c71a4d4Srmind 	if (p->p_flag & PK_SYSTEM) {
4935c71a4d4Srmind 		error = EPERM;
4945c71a4d4Srmind 		goto error;
4955c71a4d4Srmind 	}
4965c71a4d4Srmind 
4975c71a4d4Srmind 	/* Find the LWP(s) */
4985c71a4d4Srmind 	lcnt = 0;
4995c71a4d4Srmind 	ci = NULL;
500bec282a6Sad 	mutex_enter(p->p_lock);
5015c71a4d4Srmind 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
5025c71a4d4Srmind 		if (lid && lid != t->l_lid)
5035c71a4d4Srmind 			continue;
5045c71a4d4Srmind 		/*
5055c71a4d4Srmind 		 * Bind the thread to the processor-set,
5065c71a4d4Srmind 		 * take some CPU and migrate.
5075c71a4d4Srmind 		 */
5085c71a4d4Srmind 		lwp_lock(t);
5095c71a4d4Srmind 		opsid = t->l_psid;
5105c71a4d4Srmind 		t->l_psid = psid;
5119e43fad6Srmind 		ci = sched_takecpu(t);
5125c71a4d4Srmind 		/* Unlocks LWP */
5135c71a4d4Srmind 		lwp_migrate(t, ci);
5145c71a4d4Srmind 		lcnt++;
5155c71a4d4Srmind 	}
516284c2b9aSad 	mutex_exit(p->p_lock);
5175c71a4d4Srmind 	if (lcnt == 0) {
5185c71a4d4Srmind 		error = ESRCH;
5195c71a4d4Srmind 	}
5205c71a4d4Srmind error:
521*0eaaa024Sad 	mutex_exit(&proc_lock);
52253db9954Sad 	mutex_exit(&cpu_lock);
523bec282a6Sad 	if (error == 0 && SCARG(uap, opsid))
524bec282a6Sad 		error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
5255c71a4d4Srmind 	return error;
5265c71a4d4Srmind }
5275c71a4d4Srmind 
5285c71a4d4Srmind /*
5295c71a4d4Srmind  * Sysctl nodes and initialization.
5305c71a4d4Srmind  */
5315c71a4d4Srmind 
5325c71a4d4Srmind static int
sysctl_psets_max(SYSCTLFN_ARGS)5335c71a4d4Srmind sysctl_psets_max(SYSCTLFN_ARGS)
5345c71a4d4Srmind {
5355c71a4d4Srmind 	struct sysctlnode node;
5365c71a4d4Srmind 	int error, newsize;
5375c71a4d4Srmind 
5385c71a4d4Srmind 	node = *rnode;
5395c71a4d4Srmind 	node.sysctl_data = &newsize;
5405c71a4d4Srmind 
5415c71a4d4Srmind 	newsize = psets_max;
5425c71a4d4Srmind 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
5435c71a4d4Srmind 	if (error || newp == NULL)
5445c71a4d4Srmind 		return error;
5455c71a4d4Srmind 
5465c71a4d4Srmind 	if (newsize <= 0)
5475c71a4d4Srmind 		return EINVAL;
5485c71a4d4Srmind 
5495c71a4d4Srmind 	sysctl_unlock();
5505c71a4d4Srmind 	error = psets_realloc(newsize);
5515c71a4d4Srmind 	sysctl_relock();
5525c71a4d4Srmind 	return error;
5535c71a4d4Srmind }
5545c71a4d4Srmind 
55553db9954Sad static int
sysctl_psets_list(SYSCTLFN_ARGS)55653db9954Sad sysctl_psets_list(SYSCTLFN_ARGS)
55753db9954Sad {
55853db9954Sad 	const size_t bufsz = 1024;
55953db9954Sad 	char *buf, tbuf[16];
56053db9954Sad 	int i, error;
56153db9954Sad 	size_t len;
56253db9954Sad 
56353db9954Sad 	sysctl_unlock();
56453db9954Sad 	buf = kmem_alloc(bufsz, KM_SLEEP);
56553db9954Sad 	snprintf(buf, bufsz, "%d:1", PS_NONE);	/* XXX */
56653db9954Sad 
56753db9954Sad 	mutex_enter(&cpu_lock);
56853db9954Sad 	for (i = 0; i < psets_max; i++) {
56953db9954Sad 		if (psets[i] == NULL)
57053db9954Sad 			continue;
57153db9954Sad 		snprintf(tbuf, sizeof(tbuf), ",%d:2", i + 1);	/* XXX */
57253db9954Sad 		strlcat(buf, tbuf, bufsz);
57353db9954Sad 	}
57453db9954Sad 	mutex_exit(&cpu_lock);
57553db9954Sad 	len = strlen(buf) + 1;
57653db9954Sad 	error = 0;
57753db9954Sad 	if (oldp != NULL)
578d1579b2dSriastradh 		error = copyout(buf, oldp, uimin(len, *oldlenp));
57953db9954Sad 	*oldlenp = len;
58053db9954Sad 	kmem_free(buf, bufsz);
58153db9954Sad 	sysctl_relock();
58253db9954Sad 	return error;
58353db9954Sad }
58453db9954Sad 
5855c71a4d4Srmind SYSCTL_SETUP(sysctl_pset_setup, "sysctl kern.pset subtree setup")
5865c71a4d4Srmind {
5875c71a4d4Srmind 	const struct sysctlnode *node = NULL;
5885c71a4d4Srmind 
5895c71a4d4Srmind 	sysctl_createv(clog, 0, NULL, &node,
5905c71a4d4Srmind 		CTLFLAG_PERMANENT,
5915c71a4d4Srmind 		CTLTYPE_NODE, "pset",
5925c71a4d4Srmind 		SYSCTL_DESCR("Processor-set options"),
5935c71a4d4Srmind 		NULL, 0, NULL, 0,
5945c71a4d4Srmind 		CTL_KERN, CTL_CREATE, CTL_EOL);
5955c71a4d4Srmind 
5965c71a4d4Srmind 	if (node == NULL)
5975c71a4d4Srmind 		return;
5985c71a4d4Srmind 
5995c71a4d4Srmind 	sysctl_createv(clog, 0, &node, NULL,
6005c71a4d4Srmind 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
6015c71a4d4Srmind 		CTLTYPE_INT, "psets_max",
6025c71a4d4Srmind 		SYSCTL_DESCR("Maximal count of the processor-sets"),
6035c71a4d4Srmind 		sysctl_psets_max, 0, &psets_max, 0,
6045c71a4d4Srmind 		CTL_CREATE, CTL_EOL);
60553db9954Sad 	sysctl_createv(clog, 0, &node, NULL,
60653db9954Sad 		CTLFLAG_PERMANENT,
60753db9954Sad 		CTLTYPE_STRING, "list",
60853db9954Sad 		SYSCTL_DESCR("List of active sets"),
60953db9954Sad 		sysctl_psets_list, 0, NULL, 0,
61053db9954Sad 		CTL_CREATE, CTL_EOL);
6115c71a4d4Srmind }
612