xref: /netbsd-src/sys/kern/sys_pset.c (revision 404fbe5fb94ca1e054339640cabb2801ce52dd30)
1 /*	$NetBSD: sys_pset.c,v 1.9 2008/09/30 16:28:45 rmind Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Implementation of the Processor Sets.
31  *
32  * Locking
33  *  The array of the processor-set structures and its members are protected
34  *  by the global cpu_lock.  Note that in scheduler, the very l_psid value
35  *  might be used without lock held.
36  */
37 
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: sys_pset.c,v 1.9 2008/09/30 16:28:45 rmind Exp $");
40 
41 #include <sys/param.h>
42 
43 #include <sys/cpu.h>
44 #include <sys/kauth.h>
45 #include <sys/kmem.h>
46 #include <sys/lwp.h>
47 #include <sys/mutex.h>
48 #include <sys/proc.h>
49 #include <sys/pset.h>
50 #include <sys/sched.h>
51 #include <sys/syscallargs.h>
52 #include <sys/sysctl.h>
53 #include <sys/systm.h>
54 #include <sys/types.h>
55 
56 static pset_info_t **	psets;
57 static u_int		psets_max;
58 static u_int		psets_count;
59 
60 static int	psets_realloc(int);
61 static int	psid_validate(psetid_t, bool);
62 static int	kern_pset_create(psetid_t *);
63 static int	kern_pset_destroy(psetid_t);
64 
65 /*
66  * Initialization of the processor-sets.
67  */
68 void
69 psets_init(void)
70 {
71 
72 	psets_max = max(MAXCPUS, 32);
73 	psets = kmem_zalloc(psets_max * sizeof(void *), KM_SLEEP);
74 	psets_count = 0;
75 }
76 
77 /*
78  * Reallocate the array of the processor-set structures.
79  */
80 static int
81 psets_realloc(int new_psets_max)
82 {
83 	pset_info_t **new_psets, **old_psets;
84 	const u_int newsize = new_psets_max * sizeof(void *);
85 	u_int i, oldsize;
86 
87 	if (new_psets_max < 1)
88 		return EINVAL;
89 
90 	new_psets = kmem_zalloc(newsize, KM_SLEEP);
91 	mutex_enter(&cpu_lock);
92 	old_psets = psets;
93 	oldsize = psets_max * sizeof(void *);
94 
95 	/* Check if we can lower the size of the array */
96 	if (new_psets_max < psets_max) {
97 		for (i = new_psets_max; i < psets_max; i++) {
98 			if (psets[i] == NULL)
99 				continue;
100 			mutex_exit(&cpu_lock);
101 			kmem_free(new_psets, newsize);
102 			return EBUSY;
103 		}
104 	}
105 
106 	/* Copy all pointers to the new array */
107 	memcpy(new_psets, psets, newsize);
108 	psets_max = new_psets_max;
109 	psets = new_psets;
110 	mutex_exit(&cpu_lock);
111 
112 	kmem_free(old_psets, oldsize);
113 	return 0;
114 }
115 
116 /*
117  * Validate processor-set ID.
118  */
119 static int
120 psid_validate(psetid_t psid, bool chkps)
121 {
122 
123 	KASSERT(mutex_owned(&cpu_lock));
124 
125 	if (chkps && (psid == PS_NONE || psid == PS_QUERY || psid == PS_MYID))
126 		return 0;
127 	if (psid <= 0 || psid > psets_max)
128 		return EINVAL;
129 	if (psets[psid - 1] == NULL)
130 		return EINVAL;
131 	if (psets[psid - 1]->ps_flags & PSET_BUSY)
132 		return EBUSY;
133 
134 	return 0;
135 }
136 
137 /*
138  * Create a processor-set.
139  */
140 static int
141 kern_pset_create(psetid_t *psid)
142 {
143 	pset_info_t *pi;
144 	u_int i;
145 
146 	if (psets_count == psets_max)
147 		return ENOMEM;
148 
149 	pi = kmem_zalloc(sizeof(pset_info_t), KM_SLEEP);
150 
151 	mutex_enter(&cpu_lock);
152 	if (psets_count == psets_max) {
153 		mutex_exit(&cpu_lock);
154 		kmem_free(pi, sizeof(pset_info_t));
155 		return ENOMEM;
156 	}
157 
158 	/* Find a free entry in the array */
159 	for (i = 0; i < psets_max; i++)
160 		if (psets[i] == NULL)
161 			break;
162 	KASSERT(i != psets_max);
163 
164 	psets[i] = pi;
165 	psets_count++;
166 	mutex_exit(&cpu_lock);
167 
168 	*psid = i + 1;
169 	return 0;
170 }
171 
172 /*
173  * Destroy a processor-set.
174  */
175 static int
176 kern_pset_destroy(psetid_t psid)
177 {
178 	struct cpu_info *ci;
179 	pset_info_t *pi;
180 	struct lwp *l;
181 	CPU_INFO_ITERATOR cii;
182 	int error;
183 
184 	mutex_enter(&cpu_lock);
185 	if (psid == PS_MYID) {
186 		/* Use caller's processor-set ID */
187 		psid = curlwp->l_psid;
188 	}
189 	error = psid_validate(psid, false);
190 	if (error) {
191 		mutex_exit(&cpu_lock);
192 		return error;
193 	}
194 
195 	/* Release the processor-set from all CPUs */
196 	for (CPU_INFO_FOREACH(cii, ci)) {
197 		struct schedstate_percpu *spc;
198 
199 		spc = &ci->ci_schedstate;
200 		if (spc->spc_psid != psid)
201 			continue;
202 		spc->spc_psid = PS_NONE;
203 	}
204 	/* Mark that processor-set is going to be destroyed */
205 	pi = psets[psid - 1];
206 	pi->ps_flags |= PSET_BUSY;
207 	mutex_exit(&cpu_lock);
208 
209 	/* Unmark the processor-set ID from each thread */
210 	mutex_enter(proc_lock);
211 	LIST_FOREACH(l, &alllwp, l_list) {
212 		/* Safe to check and set without lock held */
213 		if (l->l_psid != psid)
214 			continue;
215 		l->l_psid = PS_NONE;
216 	}
217 	mutex_exit(proc_lock);
218 
219 	/* Destroy the processor-set */
220 	mutex_enter(&cpu_lock);
221 	psets[psid - 1] = NULL;
222 	psets_count--;
223 	mutex_exit(&cpu_lock);
224 
225 	kmem_free(pi, sizeof(pset_info_t));
226 	return 0;
227 }
228 
229 /*
230  * General system calls for the processor-sets.
231  */
232 
233 int
234 sys_pset_create(struct lwp *l, const struct sys_pset_create_args *uap,
235     register_t *retval)
236 {
237 	/* {
238 		syscallarg(psetid_t) *psid;
239 	} */
240 	psetid_t psid;
241 	int error;
242 
243 	/* Available only for super-user */
244 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
245 	    KAUTH_REQ_SYSTEM_PSET_CREATE, NULL, NULL, NULL))
246 		return EPERM;
247 
248 	error = kern_pset_create(&psid);
249 	if (error)
250 		return error;
251 
252 	error = copyout(&psid, SCARG(uap, psid), sizeof(psetid_t));
253 	if (error)
254 		(void)kern_pset_destroy(psid);
255 
256 	return error;
257 }
258 
259 int
260 sys_pset_destroy(struct lwp *l, const struct sys_pset_destroy_args *uap,
261     register_t *retval)
262 {
263 	/* {
264 		syscallarg(psetid_t) psid;
265 	} */
266 
267 	/* Available only for super-user */
268 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
269 	    KAUTH_REQ_SYSTEM_PSET_DESTROY,
270 	    KAUTH_ARG(SCARG(uap, psid)), NULL, NULL))
271 		return EPERM;
272 
273 	return kern_pset_destroy(SCARG(uap, psid));
274 }
275 
276 int
277 sys_pset_assign(struct lwp *l, const struct sys_pset_assign_args *uap,
278     register_t *retval)
279 {
280 	/* {
281 		syscallarg(psetid_t) psid;
282 		syscallarg(cpuid_t) cpuid;
283 		syscallarg(psetid_t) *opsid;
284 	} */
285 	struct cpu_info *ci;
286 	struct schedstate_percpu *spc = NULL;
287 	psetid_t psid = SCARG(uap, psid), opsid = 0;
288 	CPU_INFO_ITERATOR cii;
289 	int error = 0, nnone = 0;
290 
291 	/* Available only for super-user, except the case of PS_QUERY */
292 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
293 	    KAUTH_REQ_SYSTEM_PSET_ASSIGN, KAUTH_ARG(SCARG(uap, psid)), NULL,
294 	    NULL))
295 		return EPERM;
296 
297 	/* Find the target CPU */
298 	mutex_enter(&cpu_lock);
299 	for (CPU_INFO_FOREACH(cii, ci)) {
300 		if (cpu_index(ci) == SCARG(uap, cpuid))
301 			spc = &ci->ci_schedstate;
302 		nnone += (ci->ci_schedstate.spc_psid == PS_NONE);
303 	}
304 	if (spc == NULL) {
305 		mutex_exit(&cpu_lock);
306 		return EINVAL;
307 	}
308 	error = psid_validate(psid, true);
309 	if (error) {
310 		mutex_exit(&cpu_lock);
311 		return error;
312 	}
313 	opsid = spc->spc_psid;
314 	switch (psid) {
315 	case PS_QUERY:
316 		break;
317 	case PS_MYID:
318 		psid = curlwp->l_psid;
319 		/* FALLTHROUGH */
320 	default:
321 		/*
322 		 * Ensure at least one CPU stays in the default set,
323 		 * and that specified CPU is not offline.
324 		 */
325 		if (psid != PS_NONE && ((spc->spc_flags & SPCF_OFFLINE) ||
326 		    (nnone == 1 && spc->spc_psid == PS_NONE))) {
327 			mutex_exit(&cpu_lock);
328 			return EBUSY;
329 		}
330 		spc->spc_psid = psid;
331 		break;
332 	}
333 	mutex_exit(&cpu_lock);
334 
335 	if (SCARG(uap, opsid) != NULL)
336 		error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
337 
338 	return error;
339 }
340 
341 int
342 sys__pset_bind(struct lwp *l, const struct sys__pset_bind_args *uap,
343     register_t *retval)
344 {
345 	/* {
346 		syscallarg(idtype_t) idtype;
347 		syscallarg(id_t) first_id;
348 		syscallarg(id_t) second_id;
349 		syscallarg(psetid_t) psid;
350 		syscallarg(psetid_t) *opsid;
351 	} */
352 	struct cpu_info *ci;
353 	struct proc *p;
354 	struct lwp *t;
355 	id_t id1, id2;
356 	pid_t pid = 0;
357 	lwpid_t lid = 0;
358 	psetid_t psid, opsid;
359 	int error = 0, lcnt;
360 
361 	psid = SCARG(uap, psid);
362 
363 	/* Available only for super-user, except the case of PS_QUERY */
364 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
365 	    KAUTH_REQ_SYSTEM_PSET_BIND, KAUTH_ARG(SCARG(uap, psid)), NULL,
366 	    NULL))
367 		return EPERM;
368 
369 	mutex_enter(&cpu_lock);
370 	error = psid_validate(psid, true);
371 	if (error) {
372 		mutex_exit(&cpu_lock);
373 		return error;
374 	}
375 	if (psid == PS_MYID)
376 		psid = curlwp->l_psid;
377 	if (psid != PS_QUERY && psid != PS_NONE)
378 		psets[psid - 1]->ps_flags |= PSET_BUSY;
379 	mutex_exit(&cpu_lock);
380 
381 	/*
382 	 * Get PID and LID from the ID.
383 	 */
384 	p = l->l_proc;
385 	id1 = SCARG(uap, first_id);
386 	id2 = SCARG(uap, second_id);
387 
388 	switch (SCARG(uap, idtype)) {
389 	case P_PID:
390 		/*
391 		 * Process:
392 		 *  First ID	- PID;
393 		 *  Second ID	- ignored;
394 		 */
395 		pid = (id1 == P_MYID) ? p->p_pid : id1;
396 		lid = 0;
397 		break;
398 	case P_LWPID:
399 		/*
400 		 * Thread (LWP):
401 		 *  First ID	- LID;
402 		 *  Second ID	- PID;
403 		 */
404 		if (id1 == P_MYID) {
405 			pid = p->p_pid;
406 			lid = l->l_lid;
407 			break;
408 		}
409 		lid = id1;
410 		pid = (id2 == P_MYID) ? p->p_pid : id2;
411 		break;
412 	default:
413 		error = EINVAL;
414 		goto error;
415 	}
416 
417 	/* Find the process */
418 	mutex_enter(proc_lock);
419 	p = p_find(pid, PFIND_LOCKED);
420 	if (p == NULL) {
421 		mutex_exit(proc_lock);
422 		error = ESRCH;
423 		goto error;
424 	}
425 	mutex_enter(p->p_lock);
426 	mutex_exit(proc_lock);
427 
428 	/* Disallow modification of the system processes */
429 	if (p->p_flag & PK_SYSTEM) {
430 		mutex_exit(p->p_lock);
431 		error = EPERM;
432 		goto error;
433 	}
434 
435 	/* Find the LWP(s) */
436 	lcnt = 0;
437 	ci = NULL;
438 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
439 		if (lid && lid != t->l_lid)
440 			continue;
441 		/*
442 		 * Bind the thread to the processor-set,
443 		 * take some CPU and migrate.
444 		 */
445 		lwp_lock(t);
446 		opsid = t->l_psid;
447 		t->l_psid = psid;
448 		ci = sched_takecpu(l);
449 		/* Unlocks LWP */
450 		lwp_migrate(t, ci);
451 		lcnt++;
452 	}
453 	mutex_exit(p->p_lock);
454 	if (lcnt == 0) {
455 		error = ESRCH;
456 		goto error;
457 	}
458 	if (SCARG(uap, opsid))
459 		error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
460 error:
461 	if (psid != PS_QUERY && psid != PS_NONE) {
462 		mutex_enter(&cpu_lock);
463 		psets[psid - 1]->ps_flags &= ~PSET_BUSY;
464 		mutex_exit(&cpu_lock);
465 	}
466 	return error;
467 }
468 
469 /*
470  * Sysctl nodes and initialization.
471  */
472 
473 static int
474 sysctl_psets_max(SYSCTLFN_ARGS)
475 {
476 	struct sysctlnode node;
477 	int error, newsize;
478 
479 	node = *rnode;
480 	node.sysctl_data = &newsize;
481 
482 	newsize = psets_max;
483 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
484 	if (error || newp == NULL)
485 		return error;
486 
487 	if (newsize <= 0)
488 		return EINVAL;
489 
490 	sysctl_unlock();
491 	error = psets_realloc(newsize);
492 	sysctl_relock();
493 	return error;
494 }
495 
496 static int
497 sysctl_psets_list(SYSCTLFN_ARGS)
498 {
499 	const size_t bufsz = 1024;
500 	char *buf, tbuf[16];
501 	int i, error;
502 	size_t len;
503 
504 	sysctl_unlock();
505 	buf = kmem_alloc(bufsz, KM_SLEEP);
506 	snprintf(buf, bufsz, "%d:1", PS_NONE);	/* XXX */
507 
508 	mutex_enter(&cpu_lock);
509 	for (i = 0; i < psets_max; i++) {
510 		if (psets[i] == NULL)
511 			continue;
512 		snprintf(tbuf, sizeof(tbuf), ",%d:2", i + 1);	/* XXX */
513 		strlcat(buf, tbuf, bufsz);
514 	}
515 	mutex_exit(&cpu_lock);
516 	len = strlen(buf) + 1;
517 	error = 0;
518 	if (oldp != NULL)
519 		error = copyout(buf, oldp, min(len, *oldlenp));
520 	*oldlenp = len;
521 	kmem_free(buf, bufsz);
522 	sysctl_relock();
523 	return error;
524 }
525 
526 SYSCTL_SETUP(sysctl_pset_setup, "sysctl kern.pset subtree setup")
527 {
528 	const struct sysctlnode *node = NULL;
529 
530 	sysctl_createv(clog, 0, NULL, NULL,
531 		CTLFLAG_PERMANENT,
532 		CTLTYPE_NODE, "kern", NULL,
533 		NULL, 0, NULL, 0,
534 		CTL_KERN, CTL_EOL);
535 	sysctl_createv(clog, 0, NULL, &node,
536 		CTLFLAG_PERMANENT,
537 		CTLTYPE_NODE, "pset",
538 		SYSCTL_DESCR("Processor-set options"),
539 		NULL, 0, NULL, 0,
540 		CTL_KERN, CTL_CREATE, CTL_EOL);
541 
542 	if (node == NULL)
543 		return;
544 
545 	sysctl_createv(clog, 0, &node, NULL,
546 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
547 		CTLTYPE_INT, "psets_max",
548 		SYSCTL_DESCR("Maximal count of the processor-sets"),
549 		sysctl_psets_max, 0, &psets_max, 0,
550 		CTL_CREATE, CTL_EOL);
551 	sysctl_createv(clog, 0, &node, NULL,
552 		CTLFLAG_PERMANENT,
553 		CTLTYPE_STRING, "list",
554 		SYSCTL_DESCR("List of active sets"),
555 		sysctl_psets_list, 0, NULL, 0,
556 		CTL_CREATE, CTL_EOL);
557 }
558