xref: /netbsd-src/sys/kern/sys_pset.c (revision ce099b40997c43048fb78bd578195f81d2456523)
1 /*	$NetBSD: sys_pset.c,v 1.6 2008/04/24 18:39:24 ad Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Implementation of the Processor Sets.
31  *
32  * Locking
33  *  The array of the processor-set structures and its members are protected
34  *  by the global psets_lock.  Note that in scheduler, the very l_psid value
35  *  might be used without lock held.
36  */
37 
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: sys_pset.c,v 1.6 2008/04/24 18:39:24 ad Exp $");
40 
41 #include <sys/param.h>
42 
43 #include <sys/cpu.h>
44 #include <sys/kauth.h>
45 #include <sys/kmem.h>
46 #include <sys/lwp.h>
47 #include <sys/mutex.h>
48 #include <sys/proc.h>
49 #include <sys/pset.h>
50 #include <sys/sched.h>
51 #include <sys/syscallargs.h>
52 #include <sys/sysctl.h>
53 #include <sys/systm.h>
54 #include <sys/types.h>
55 
56 static pset_info_t **	psets;
57 static kmutex_t		psets_lock;
58 static u_int		psets_max;
59 static u_int		psets_count;
60 
61 static int	psets_realloc(int);
62 static int	psid_validate(psetid_t, bool);
63 static int	kern_pset_create(psetid_t *);
64 static int	kern_pset_destroy(psetid_t);
65 
66 /*
67  * Initialization of the processor-sets.
68  */
69 void
70 psets_init(void)
71 {
72 
73 	psets_max = max(MAXCPUS, 32);
74 	psets = kmem_zalloc(psets_max * sizeof(void *), KM_SLEEP);
75 	mutex_init(&psets_lock, MUTEX_DEFAULT, IPL_NONE);
76 	psets_count = 0;
77 }
78 
79 /*
80  * Reallocate the array of the processor-set structures.
81  */
82 static int
83 psets_realloc(int new_psets_max)
84 {
85 	pset_info_t **new_psets, **old_psets;
86 	const u_int newsize = new_psets_max * sizeof(void *);
87 	u_int i, oldsize;
88 
89 	if (new_psets_max < 1)
90 		return EINVAL;
91 
92 	new_psets = kmem_zalloc(newsize, KM_SLEEP);
93 	mutex_enter(&psets_lock);
94 	old_psets = psets;
95 	oldsize = psets_max * sizeof(void *);
96 
97 	/* Check if we can lower the size of the array */
98 	if (new_psets_max < psets_max) {
99 		for (i = new_psets_max; i < psets_max; i++) {
100 			if (psets[i] == NULL)
101 				continue;
102 			mutex_exit(&psets_lock);
103 			kmem_free(new_psets, newsize);
104 			return EBUSY;
105 		}
106 	}
107 
108 	/* Copy all pointers to the new array */
109 	memcpy(new_psets, psets, newsize);
110 	psets_max = new_psets_max;
111 	psets = new_psets;
112 	mutex_exit(&psets_lock);
113 
114 	kmem_free(old_psets, oldsize);
115 	return 0;
116 }
117 
118 /*
119  * Validate processor-set ID.
120  */
121 static int
122 psid_validate(psetid_t psid, bool chkps)
123 {
124 
125 	KASSERT(mutex_owned(&psets_lock));
126 
127 	if (chkps && (psid == PS_NONE || psid == PS_QUERY || psid == PS_MYID))
128 		return 0;
129 	if (psid <= 0 || psid > psets_max)
130 		return EINVAL;
131 	if (psets[psid - 1] == NULL)
132 		return EINVAL;
133 	if (psets[psid - 1]->ps_flags & PSET_BUSY)
134 		return EBUSY;
135 
136 	return 0;
137 }
138 
139 /*
140  * Create a processor-set.
141  */
142 static int
143 kern_pset_create(psetid_t *psid)
144 {
145 	pset_info_t *pi;
146 	u_int i;
147 
148 	if (psets_count == psets_max)
149 		return ENOMEM;
150 
151 	pi = kmem_zalloc(sizeof(pset_info_t), KM_SLEEP);
152 
153 	mutex_enter(&psets_lock);
154 	if (psets_count == psets_max) {
155 		mutex_exit(&psets_lock);
156 		kmem_free(pi, sizeof(pset_info_t));
157 		return ENOMEM;
158 	}
159 
160 	/* Find a free entry in the array */
161 	for (i = 0; i < psets_max; i++)
162 		if (psets[i] == NULL)
163 			break;
164 	KASSERT(i != psets_max);
165 
166 	psets[i] = pi;
167 	psets_count++;
168 	mutex_exit(&psets_lock);
169 
170 	*psid = i + 1;
171 	return 0;
172 }
173 
174 /*
175  * Destroy a processor-set.
176  */
177 static int
178 kern_pset_destroy(psetid_t psid)
179 {
180 	struct cpu_info *ci;
181 	pset_info_t *pi;
182 	struct lwp *l;
183 	CPU_INFO_ITERATOR cii;
184 	int error;
185 
186 	mutex_enter(&psets_lock);
187 	if (psid == PS_MYID) {
188 		/* Use caller's processor-set ID */
189 		psid = curlwp->l_psid;
190 	}
191 	error = psid_validate(psid, false);
192 	if (error) {
193 		mutex_exit(&psets_lock);
194 		return error;
195 	}
196 
197 	/* Release the processor-set from all CPUs */
198 	for (CPU_INFO_FOREACH(cii, ci)) {
199 		struct schedstate_percpu *spc;
200 
201 		spc = &ci->ci_schedstate;
202 		if (spc->spc_psid != psid)
203 			continue;
204 		spc->spc_psid = PS_NONE;
205 	}
206 	/* Mark that processor-set is going to be destroyed */
207 	pi = psets[psid - 1];
208 	pi->ps_flags |= PSET_BUSY;
209 	mutex_exit(&psets_lock);
210 
211 	/* Unmark the processor-set ID from each thread */
212 	mutex_enter(proc_lock);
213 	LIST_FOREACH(l, &alllwp, l_list) {
214 		/* Safe to check and set without lock held */
215 		if (l->l_psid != psid)
216 			continue;
217 		l->l_psid = PS_NONE;
218 	}
219 	mutex_exit(proc_lock);
220 
221 	/* Destroy the processor-set */
222 	mutex_enter(&psets_lock);
223 	psets[psid - 1] = NULL;
224 	psets_count--;
225 	mutex_exit(&psets_lock);
226 
227 	kmem_free(pi, sizeof(pset_info_t));
228 	return 0;
229 }
230 
231 /*
232  * General system calls for the processor-sets.
233  */
234 
235 int
236 sys_pset_create(struct lwp *l, const struct sys_pset_create_args *uap,
237     register_t *retval)
238 {
239 	/* {
240 		syscallarg(psetid_t) *psid;
241 	} */
242 	psetid_t psid;
243 	int error;
244 
245 	/* Available only for super-user */
246 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
247 	    KAUTH_REQ_SYSTEM_PSET_CREATE, NULL, NULL, NULL))
248 		return EPERM;
249 
250 	error = kern_pset_create(&psid);
251 	if (error)
252 		return error;
253 
254 	error = copyout(&psid, SCARG(uap, psid), sizeof(psetid_t));
255 	if (error)
256 		(void)kern_pset_destroy(psid);
257 
258 	return error;
259 }
260 
261 int
262 sys_pset_destroy(struct lwp *l, const struct sys_pset_destroy_args *uap,
263     register_t *retval)
264 {
265 	/* {
266 		syscallarg(psetid_t) psid;
267 	} */
268 
269 	/* Available only for super-user */
270 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
271 	    KAUTH_REQ_SYSTEM_PSET_DESTROY,
272 	    KAUTH_ARG(SCARG(uap, psid)), NULL, NULL))
273 		return EPERM;
274 
275 	return kern_pset_destroy(SCARG(uap, psid));
276 }
277 
278 int
279 sys_pset_assign(struct lwp *l, const struct sys_pset_assign_args *uap,
280     register_t *retval)
281 {
282 	/* {
283 		syscallarg(psetid_t) psid;
284 		syscallarg(cpuid_t) cpuid;
285 		syscallarg(psetid_t) *opsid;
286 	} */
287 	struct cpu_info *ci;
288 	struct schedstate_percpu *spc;
289 	psetid_t psid = SCARG(uap, psid), opsid = 0;
290 	CPU_INFO_ITERATOR cii;
291 	int error = 0;
292 
293 	/* Available only for super-user, except the case of PS_QUERY */
294 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
295 	    KAUTH_REQ_SYSTEM_PSET_ASSIGN, KAUTH_ARG(SCARG(uap, psid)), NULL,
296 	    NULL))
297 		return EPERM;
298 
299 	/* Find the target CPU */
300 	for (CPU_INFO_FOREACH(cii, ci))
301 		if (cpu_index(ci) == SCARG(uap, cpuid))
302 			break;
303 	if (ci == NULL)
304 		return EINVAL;
305 	spc = &ci->ci_schedstate;
306 
307 	mutex_enter(&psets_lock);
308 	error = psid_validate(psid, true);
309 	if (error) {
310 		mutex_exit(&psets_lock);
311 		return error;
312 	}
313 	opsid = spc->spc_psid;
314 	switch (psid) {
315 	case PS_QUERY:
316 		break;
317 	case PS_MYID:
318 		psid = curlwp->l_psid;
319 	default:
320 		spc->spc_psid = psid;
321 	}
322 	mutex_exit(&psets_lock);
323 
324 	if (SCARG(uap, opsid) != NULL)
325 		error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
326 
327 	return error;
328 }
329 
330 int
331 sys__pset_bind(struct lwp *l, const struct sys__pset_bind_args *uap,
332     register_t *retval)
333 {
334 	/* {
335 		syscallarg(idtype_t) idtype;
336 		syscallarg(id_t) first_id;
337 		syscallarg(id_t) second_id;
338 		syscallarg(psetid_t) psid;
339 		syscallarg(psetid_t) *opsid;
340 	} */
341 	struct cpu_info *ci;
342 	struct proc *p;
343 	struct lwp *t;
344 	id_t id1, id2;
345 	pid_t pid = 0;
346 	lwpid_t lid = 0;
347 	psetid_t psid, opsid;
348 	int error = 0, lcnt;
349 
350 	psid = SCARG(uap, psid);
351 
352 	/* Available only for super-user, except the case of PS_QUERY */
353 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
354 	    KAUTH_REQ_SYSTEM_PSET_BIND, KAUTH_ARG(SCARG(uap, psid)), NULL,
355 	    NULL))
356 		return EPERM;
357 
358 	mutex_enter(&psets_lock);
359 	error = psid_validate(psid, true);
360 	if (error) {
361 		mutex_exit(&psets_lock);
362 		return error;
363 	}
364 	if (psid == PS_MYID)
365 		psid = curlwp->l_psid;
366 	if (psid != PS_QUERY && psid != PS_NONE)
367 		psets[psid - 1]->ps_flags |= PSET_BUSY;
368 	mutex_exit(&psets_lock);
369 
370 	/*
371 	 * Get PID and LID from the ID.
372 	 */
373 	p = l->l_proc;
374 	id1 = SCARG(uap, first_id);
375 	id2 = SCARG(uap, second_id);
376 
377 	switch (SCARG(uap, idtype)) {
378 	case P_PID:
379 		/*
380 		 * Process:
381 		 *  First ID	- PID;
382 		 *  Second ID	- ignored;
383 		 */
384 		pid = (id1 == P_MYID) ? p->p_pid : id1;
385 		lid = 0;
386 		break;
387 	case P_LWPID:
388 		/*
389 		 * Thread (LWP):
390 		 *  First ID	- LID;
391 		 *  Second ID	- PID;
392 		 */
393 		if (id1 == P_MYID) {
394 			pid = p->p_pid;
395 			lid = l->l_lid;
396 			break;
397 		}
398 		lid = id1;
399 		pid = (id2 == P_MYID) ? p->p_pid : id2;
400 		break;
401 	default:
402 		error = EINVAL;
403 		goto error;
404 	}
405 
406 	/* Find the process */
407 	mutex_enter(proc_lock);
408 	p = p_find(pid, PFIND_LOCKED);
409 	if (p == NULL) {
410 		mutex_exit(proc_lock);
411 		error = ESRCH;
412 		goto error;
413 	}
414 	mutex_enter(p->p_lock);
415 	mutex_exit(proc_lock);
416 
417 	/* Disallow modification of the system processes */
418 	if (p->p_flag & PK_SYSTEM) {
419 		mutex_exit(p->p_lock);
420 		error = EPERM;
421 		goto error;
422 	}
423 
424 	/* Find the LWP(s) */
425 	lcnt = 0;
426 	ci = NULL;
427 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
428 		if (lid && lid != t->l_lid)
429 			continue;
430 		/*
431 		 * Bind the thread to the processor-set,
432 		 * take some CPU and migrate.
433 		 */
434 		lwp_lock(t);
435 		opsid = t->l_psid;
436 		t->l_psid = psid;
437 		ci = sched_takecpu(l);
438 		/* Unlocks LWP */
439 		lwp_migrate(t, ci);
440 		lcnt++;
441 	}
442 	mutex_exit(p->p_lock);
443 	if (lcnt == 0) {
444 		error = ESRCH;
445 		goto error;
446 	}
447 	if (SCARG(uap, opsid))
448 		error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
449 error:
450 	if (psid != PS_QUERY && psid != PS_NONE) {
451 		mutex_enter(&psets_lock);
452 		psets[psid - 1]->ps_flags &= ~PSET_BUSY;
453 		mutex_exit(&psets_lock);
454 	}
455 	return error;
456 }
457 
458 /*
459  * Sysctl nodes and initialization.
460  */
461 
462 static int
463 sysctl_psets_max(SYSCTLFN_ARGS)
464 {
465 	struct sysctlnode node;
466 	int error, newsize;
467 
468 	node = *rnode;
469 	node.sysctl_data = &newsize;
470 
471 	newsize = psets_max;
472 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
473 	if (error || newp == NULL)
474 		return error;
475 
476 	if (newsize <= 0)
477 		return EINVAL;
478 
479 	sysctl_unlock();
480 	error = psets_realloc(newsize);
481 	sysctl_relock();
482 	return error;
483 }
484 
485 SYSCTL_SETUP(sysctl_pset_setup, "sysctl kern.pset subtree setup")
486 {
487 	const struct sysctlnode *node = NULL;
488 
489 	sysctl_createv(clog, 0, NULL, NULL,
490 		CTLFLAG_PERMANENT,
491 		CTLTYPE_NODE, "kern", NULL,
492 		NULL, 0, NULL, 0,
493 		CTL_KERN, CTL_EOL);
494 	sysctl_createv(clog, 0, NULL, &node,
495 		CTLFLAG_PERMANENT,
496 		CTLTYPE_NODE, "pset",
497 		SYSCTL_DESCR("Processor-set options"),
498 		NULL, 0, NULL, 0,
499 		CTL_KERN, CTL_CREATE, CTL_EOL);
500 
501 	if (node == NULL)
502 		return;
503 
504 	sysctl_createv(clog, 0, &node, NULL,
505 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
506 		CTLTYPE_INT, "psets_max",
507 		SYSCTL_DESCR("Maximal count of the processor-sets"),
508 		sysctl_psets_max, 0, &psets_max, 0,
509 		CTL_CREATE, CTL_EOL);
510 }
511