xref: /netbsd-src/sys/kern/sys_pset.c (revision b78992537496bc71ee3d761f9fe0be0fc0a9a001)
1 /*	$NetBSD: sys_pset.c,v 1.8 2008/06/22 12:59:18 ad Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Implementation of the Processor Sets.
31  *
32  * Locking
33  *  The array of the processor-set structures and its members are protected
34  *  by the global cpu_lock.  Note that in scheduler, the very l_psid value
35  *  might be used without lock held.
36  */
37 
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: sys_pset.c,v 1.8 2008/06/22 12:59:18 ad Exp $");
40 
41 #include <sys/param.h>
42 
43 #include <sys/cpu.h>
44 #include <sys/kauth.h>
45 #include <sys/kmem.h>
46 #include <sys/lwp.h>
47 #include <sys/mutex.h>
48 #include <sys/proc.h>
49 #include <sys/pset.h>
50 #include <sys/sched.h>
51 #include <sys/syscallargs.h>
52 #include <sys/sysctl.h>
53 #include <sys/systm.h>
54 #include <sys/types.h>
55 
56 static pset_info_t **	psets;
57 static u_int		psets_max;
58 static u_int		psets_count;
59 
60 static int	psets_realloc(int);
61 static int	psid_validate(psetid_t, bool);
62 static int	kern_pset_create(psetid_t *);
63 static int	kern_pset_destroy(psetid_t);
64 
65 /*
66  * Initialization of the processor-sets.
67  */
68 void
69 psets_init(void)
70 {
71 
72 	psets_max = max(MAXCPUS, 32);
73 	psets = kmem_zalloc(psets_max * sizeof(void *), KM_SLEEP);
74 	psets_count = 0;
75 }
76 
77 /*
78  * Reallocate the array of the processor-set structures.
79  */
80 static int
81 psets_realloc(int new_psets_max)
82 {
83 	pset_info_t **new_psets, **old_psets;
84 	const u_int newsize = new_psets_max * sizeof(void *);
85 	u_int i, oldsize;
86 
87 	if (new_psets_max < 1)
88 		return EINVAL;
89 
90 	new_psets = kmem_zalloc(newsize, KM_SLEEP);
91 	mutex_enter(&cpu_lock);
92 	old_psets = psets;
93 	oldsize = psets_max * sizeof(void *);
94 
95 	/* Check if we can lower the size of the array */
96 	if (new_psets_max < psets_max) {
97 		for (i = new_psets_max; i < psets_max; i++) {
98 			if (psets[i] == NULL)
99 				continue;
100 			mutex_exit(&cpu_lock);
101 			kmem_free(new_psets, newsize);
102 			return EBUSY;
103 		}
104 	}
105 
106 	/* Copy all pointers to the new array */
107 	memcpy(new_psets, psets, newsize);
108 	psets_max = new_psets_max;
109 	psets = new_psets;
110 	mutex_exit(&cpu_lock);
111 
112 	kmem_free(old_psets, oldsize);
113 	return 0;
114 }
115 
116 /*
117  * Validate processor-set ID.
118  */
119 static int
120 psid_validate(psetid_t psid, bool chkps)
121 {
122 
123 	KASSERT(mutex_owned(&cpu_lock));
124 
125 	if (chkps && (psid == PS_NONE || psid == PS_QUERY || psid == PS_MYID))
126 		return 0;
127 	if (psid <= 0 || psid > psets_max)
128 		return EINVAL;
129 	if (psets[psid - 1] == NULL)
130 		return EINVAL;
131 	if (psets[psid - 1]->ps_flags & PSET_BUSY)
132 		return EBUSY;
133 
134 	return 0;
135 }
136 
137 /*
138  * Create a processor-set.
139  */
140 static int
141 kern_pset_create(psetid_t *psid)
142 {
143 	pset_info_t *pi;
144 	u_int i;
145 
146 	if (psets_count == psets_max)
147 		return ENOMEM;
148 
149 	pi = kmem_zalloc(sizeof(pset_info_t), KM_SLEEP);
150 
151 	mutex_enter(&cpu_lock);
152 	if (psets_count == psets_max) {
153 		mutex_exit(&cpu_lock);
154 		kmem_free(pi, sizeof(pset_info_t));
155 		return ENOMEM;
156 	}
157 
158 	/* Find a free entry in the array */
159 	for (i = 0; i < psets_max; i++)
160 		if (psets[i] == NULL)
161 			break;
162 	KASSERT(i != psets_max);
163 
164 	psets[i] = pi;
165 	psets_count++;
166 	mutex_exit(&cpu_lock);
167 
168 	*psid = i + 1;
169 	return 0;
170 }
171 
172 /*
173  * Destroy a processor-set.
174  */
175 static int
176 kern_pset_destroy(psetid_t psid)
177 {
178 	struct cpu_info *ci;
179 	pset_info_t *pi;
180 	struct lwp *l;
181 	CPU_INFO_ITERATOR cii;
182 	int error;
183 
184 	mutex_enter(&cpu_lock);
185 	if (psid == PS_MYID) {
186 		/* Use caller's processor-set ID */
187 		psid = curlwp->l_psid;
188 	}
189 	error = psid_validate(psid, false);
190 	if (error) {
191 		mutex_exit(&cpu_lock);
192 		return error;
193 	}
194 
195 	/* Release the processor-set from all CPUs */
196 	for (CPU_INFO_FOREACH(cii, ci)) {
197 		struct schedstate_percpu *spc;
198 
199 		spc = &ci->ci_schedstate;
200 		if (spc->spc_psid != psid)
201 			continue;
202 		spc->spc_psid = PS_NONE;
203 	}
204 	/* Mark that processor-set is going to be destroyed */
205 	pi = psets[psid - 1];
206 	pi->ps_flags |= PSET_BUSY;
207 	mutex_exit(&cpu_lock);
208 
209 	/* Unmark the processor-set ID from each thread */
210 	mutex_enter(proc_lock);
211 	LIST_FOREACH(l, &alllwp, l_list) {
212 		/* Safe to check and set without lock held */
213 		if (l->l_psid != psid)
214 			continue;
215 		l->l_psid = PS_NONE;
216 	}
217 	mutex_exit(proc_lock);
218 
219 	/* Destroy the processor-set */
220 	mutex_enter(&cpu_lock);
221 	psets[psid - 1] = NULL;
222 	psets_count--;
223 	mutex_exit(&cpu_lock);
224 
225 	kmem_free(pi, sizeof(pset_info_t));
226 	return 0;
227 }
228 
229 /*
230  * General system calls for the processor-sets.
231  */
232 
233 int
234 sys_pset_create(struct lwp *l, const struct sys_pset_create_args *uap,
235     register_t *retval)
236 {
237 	/* {
238 		syscallarg(psetid_t) *psid;
239 	} */
240 	psetid_t psid;
241 	int error;
242 
243 	/* Available only for super-user */
244 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
245 	    KAUTH_REQ_SYSTEM_PSET_CREATE, NULL, NULL, NULL))
246 		return EPERM;
247 
248 	error = kern_pset_create(&psid);
249 	if (error)
250 		return error;
251 
252 	error = copyout(&psid, SCARG(uap, psid), sizeof(psetid_t));
253 	if (error)
254 		(void)kern_pset_destroy(psid);
255 
256 	return error;
257 }
258 
259 int
260 sys_pset_destroy(struct lwp *l, const struct sys_pset_destroy_args *uap,
261     register_t *retval)
262 {
263 	/* {
264 		syscallarg(psetid_t) psid;
265 	} */
266 
267 	/* Available only for super-user */
268 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
269 	    KAUTH_REQ_SYSTEM_PSET_DESTROY,
270 	    KAUTH_ARG(SCARG(uap, psid)), NULL, NULL))
271 		return EPERM;
272 
273 	return kern_pset_destroy(SCARG(uap, psid));
274 }
275 
276 int
277 sys_pset_assign(struct lwp *l, const struct sys_pset_assign_args *uap,
278     register_t *retval)
279 {
280 	/* {
281 		syscallarg(psetid_t) psid;
282 		syscallarg(cpuid_t) cpuid;
283 		syscallarg(psetid_t) *opsid;
284 	} */
285 	struct cpu_info *ci;
286 	struct schedstate_percpu *spc;
287 	psetid_t psid = SCARG(uap, psid), opsid = 0;
288 	CPU_INFO_ITERATOR cii;
289 	int error = 0, nnone;
290 
291 	/* Available only for super-user, except the case of PS_QUERY */
292 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
293 	    KAUTH_REQ_SYSTEM_PSET_ASSIGN, KAUTH_ARG(SCARG(uap, psid)), NULL,
294 	    NULL))
295 		return EPERM;
296 
297 	/* Find the target CPU */
298 	mutex_enter(&cpu_lock);
299 	spc = NULL;
300 	nnone = 0;
301 	for (CPU_INFO_FOREACH(cii, ci)) {
302 		if (cpu_index(ci) == SCARG(uap, cpuid))
303 			spc = &ci->ci_schedstate;
304 		nnone += (ci->ci_schedstate.spc_psid == PS_NONE);
305 	}
306 	if (spc == NULL) {
307 		mutex_exit(&cpu_lock);
308 		return EINVAL;
309 	}
310 	error = psid_validate(psid, true);
311 	if (error) {
312 		mutex_exit(&cpu_lock);
313 		return error;
314 	}
315 	opsid = spc->spc_psid;
316 	switch (psid) {
317 	case PS_QUERY:
318 		break;
319 	case PS_MYID:
320 		psid = curlwp->l_psid;
321 		/* FALLTHROUGH */
322 	default:
323 		/* Ensure at least one CPU stays in the default set. */
324 		if (nnone == 1 && spc->spc_psid == PS_NONE &&
325 		    psid != PS_NONE) {
326 			mutex_exit(&cpu_lock);
327 			return EBUSY;
328 		}
329 		spc->spc_psid = psid;
330 		break;
331 	}
332 	mutex_exit(&cpu_lock);
333 
334 	if (SCARG(uap, opsid) != NULL)
335 		error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
336 
337 	return error;
338 }
339 
340 int
341 sys__pset_bind(struct lwp *l, const struct sys__pset_bind_args *uap,
342     register_t *retval)
343 {
344 	/* {
345 		syscallarg(idtype_t) idtype;
346 		syscallarg(id_t) first_id;
347 		syscallarg(id_t) second_id;
348 		syscallarg(psetid_t) psid;
349 		syscallarg(psetid_t) *opsid;
350 	} */
351 	struct cpu_info *ci;
352 	struct proc *p;
353 	struct lwp *t;
354 	id_t id1, id2;
355 	pid_t pid = 0;
356 	lwpid_t lid = 0;
357 	psetid_t psid, opsid;
358 	int error = 0, lcnt;
359 
360 	psid = SCARG(uap, psid);
361 
362 	/* Available only for super-user, except the case of PS_QUERY */
363 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
364 	    KAUTH_REQ_SYSTEM_PSET_BIND, KAUTH_ARG(SCARG(uap, psid)), NULL,
365 	    NULL))
366 		return EPERM;
367 
368 	mutex_enter(&cpu_lock);
369 	error = psid_validate(psid, true);
370 	if (error) {
371 		mutex_exit(&cpu_lock);
372 		return error;
373 	}
374 	if (psid == PS_MYID)
375 		psid = curlwp->l_psid;
376 	if (psid != PS_QUERY && psid != PS_NONE)
377 		psets[psid - 1]->ps_flags |= PSET_BUSY;
378 	mutex_exit(&cpu_lock);
379 
380 	/*
381 	 * Get PID and LID from the ID.
382 	 */
383 	p = l->l_proc;
384 	id1 = SCARG(uap, first_id);
385 	id2 = SCARG(uap, second_id);
386 
387 	switch (SCARG(uap, idtype)) {
388 	case P_PID:
389 		/*
390 		 * Process:
391 		 *  First ID	- PID;
392 		 *  Second ID	- ignored;
393 		 */
394 		pid = (id1 == P_MYID) ? p->p_pid : id1;
395 		lid = 0;
396 		break;
397 	case P_LWPID:
398 		/*
399 		 * Thread (LWP):
400 		 *  First ID	- LID;
401 		 *  Second ID	- PID;
402 		 */
403 		if (id1 == P_MYID) {
404 			pid = p->p_pid;
405 			lid = l->l_lid;
406 			break;
407 		}
408 		lid = id1;
409 		pid = (id2 == P_MYID) ? p->p_pid : id2;
410 		break;
411 	default:
412 		error = EINVAL;
413 		goto error;
414 	}
415 
416 	/* Find the process */
417 	mutex_enter(proc_lock);
418 	p = p_find(pid, PFIND_LOCKED);
419 	if (p == NULL) {
420 		mutex_exit(proc_lock);
421 		error = ESRCH;
422 		goto error;
423 	}
424 	mutex_enter(p->p_lock);
425 	mutex_exit(proc_lock);
426 
427 	/* Disallow modification of the system processes */
428 	if (p->p_flag & PK_SYSTEM) {
429 		mutex_exit(p->p_lock);
430 		error = EPERM;
431 		goto error;
432 	}
433 
434 	/* Find the LWP(s) */
435 	lcnt = 0;
436 	ci = NULL;
437 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
438 		if (lid && lid != t->l_lid)
439 			continue;
440 		/*
441 		 * Bind the thread to the processor-set,
442 		 * take some CPU and migrate.
443 		 */
444 		lwp_lock(t);
445 		opsid = t->l_psid;
446 		t->l_psid = psid;
447 		ci = sched_takecpu(l);
448 		/* Unlocks LWP */
449 		lwp_migrate(t, ci);
450 		lcnt++;
451 	}
452 	mutex_exit(p->p_lock);
453 	if (lcnt == 0) {
454 		error = ESRCH;
455 		goto error;
456 	}
457 	if (SCARG(uap, opsid))
458 		error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
459 error:
460 	if (psid != PS_QUERY && psid != PS_NONE) {
461 		mutex_enter(&cpu_lock);
462 		psets[psid - 1]->ps_flags &= ~PSET_BUSY;
463 		mutex_exit(&cpu_lock);
464 	}
465 	return error;
466 }
467 
468 /*
469  * Sysctl nodes and initialization.
470  */
471 
472 static int
473 sysctl_psets_max(SYSCTLFN_ARGS)
474 {
475 	struct sysctlnode node;
476 	int error, newsize;
477 
478 	node = *rnode;
479 	node.sysctl_data = &newsize;
480 
481 	newsize = psets_max;
482 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
483 	if (error || newp == NULL)
484 		return error;
485 
486 	if (newsize <= 0)
487 		return EINVAL;
488 
489 	sysctl_unlock();
490 	error = psets_realloc(newsize);
491 	sysctl_relock();
492 	return error;
493 }
494 
495 static int
496 sysctl_psets_list(SYSCTLFN_ARGS)
497 {
498 	const size_t bufsz = 1024;
499 	char *buf, tbuf[16];
500 	int i, error;
501 	size_t len;
502 
503 	sysctl_unlock();
504 	buf = kmem_alloc(bufsz, KM_SLEEP);
505 	snprintf(buf, bufsz, "%d:1", PS_NONE);	/* XXX */
506 
507 	mutex_enter(&cpu_lock);
508 	for (i = 0; i < psets_max; i++) {
509 		if (psets[i] == NULL)
510 			continue;
511 		snprintf(tbuf, sizeof(tbuf), ",%d:2", i + 1);	/* XXX */
512 		strlcat(buf, tbuf, bufsz);
513 	}
514 	mutex_exit(&cpu_lock);
515 	len = strlen(buf) + 1;
516 	error = 0;
517 	if (oldp != NULL)
518 		error = copyout(buf, oldp, min(len, *oldlenp));
519 	*oldlenp = len;
520 	kmem_free(buf, bufsz);
521 	sysctl_relock();
522 	return error;
523 }
524 
525 SYSCTL_SETUP(sysctl_pset_setup, "sysctl kern.pset subtree setup")
526 {
527 	const struct sysctlnode *node = NULL;
528 
529 	sysctl_createv(clog, 0, NULL, NULL,
530 		CTLFLAG_PERMANENT,
531 		CTLTYPE_NODE, "kern", NULL,
532 		NULL, 0, NULL, 0,
533 		CTL_KERN, CTL_EOL);
534 	sysctl_createv(clog, 0, NULL, &node,
535 		CTLFLAG_PERMANENT,
536 		CTLTYPE_NODE, "pset",
537 		SYSCTL_DESCR("Processor-set options"),
538 		NULL, 0, NULL, 0,
539 		CTL_KERN, CTL_CREATE, CTL_EOL);
540 
541 	if (node == NULL)
542 		return;
543 
544 	sysctl_createv(clog, 0, &node, NULL,
545 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
546 		CTLTYPE_INT, "psets_max",
547 		SYSCTL_DESCR("Maximal count of the processor-sets"),
548 		sysctl_psets_max, 0, &psets_max, 0,
549 		CTL_CREATE, CTL_EOL);
550 	sysctl_createv(clog, 0, &node, NULL,
551 		CTLFLAG_PERMANENT,
552 		CTLTYPE_STRING, "list",
553 		SYSCTL_DESCR("List of active sets"),
554 		sysctl_psets_list, 0, NULL, 0,
555 		CTL_CREATE, CTL_EOL);
556 }
557