xref: /netbsd-src/sys/kern/sys_pset.c (revision b5677b36047b601b9addaaa494a58ceae82c2a6c)
1 /*	$NetBSD: sys_pset.c,v 1.12 2009/03/03 21:55:06 rmind Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Implementation of the Processor Sets.
31  *
32  * Locking
33  *  The array of the processor-set structures and its members are protected
34  *  by the global cpu_lock.  Note that in scheduler, the very l_psid value
35  *  might be used without lock held.
36  */
37 
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: sys_pset.c,v 1.12 2009/03/03 21:55:06 rmind Exp $");
40 
41 #include <sys/param.h>
42 
43 #include <sys/cpu.h>
44 #include <sys/kauth.h>
45 #include <sys/kmem.h>
46 #include <sys/lwp.h>
47 #include <sys/mutex.h>
48 #include <sys/proc.h>
49 #include <sys/pset.h>
50 #include <sys/sched.h>
51 #include <sys/syscallargs.h>
52 #include <sys/sysctl.h>
53 #include <sys/systm.h>
54 #include <sys/types.h>
55 
56 static pset_info_t **	psets;
57 static u_int		psets_max;
58 static u_int		psets_count;
59 
60 static int	psets_realloc(int);
61 static int	psid_validate(psetid_t, bool);
62 static int	kern_pset_create(psetid_t *);
63 static int	kern_pset_destroy(psetid_t);
64 
65 /*
66  * Initialization of the processor-sets.
67  */
68 void
69 psets_init(void)
70 {
71 
72 	psets_max = max(MAXCPUS, 32);
73 	psets = kmem_zalloc(psets_max * sizeof(void *), KM_SLEEP);
74 	psets_count = 0;
75 }
76 
77 /*
78  * Reallocate the array of the processor-set structures.
79  */
80 static int
81 psets_realloc(int new_psets_max)
82 {
83 	pset_info_t **new_psets, **old_psets;
84 	const u_int newsize = new_psets_max * sizeof(void *);
85 	u_int i, oldsize;
86 
87 	if (new_psets_max < 1)
88 		return EINVAL;
89 
90 	new_psets = kmem_zalloc(newsize, KM_SLEEP);
91 	mutex_enter(&cpu_lock);
92 	old_psets = psets;
93 	oldsize = psets_max * sizeof(void *);
94 
95 	/* Check if we can lower the size of the array */
96 	if (new_psets_max < psets_max) {
97 		for (i = new_psets_max; i < psets_max; i++) {
98 			if (psets[i] == NULL)
99 				continue;
100 			mutex_exit(&cpu_lock);
101 			kmem_free(new_psets, newsize);
102 			return EBUSY;
103 		}
104 	}
105 
106 	/* Copy all pointers to the new array */
107 	memcpy(new_psets, psets, newsize);
108 	psets_max = new_psets_max;
109 	psets = new_psets;
110 	mutex_exit(&cpu_lock);
111 
112 	kmem_free(old_psets, oldsize);
113 	return 0;
114 }
115 
116 /*
117  * Validate processor-set ID.
118  */
119 static int
120 psid_validate(psetid_t psid, bool chkps)
121 {
122 
123 	KASSERT(mutex_owned(&cpu_lock));
124 
125 	if (chkps && (psid == PS_NONE || psid == PS_QUERY || psid == PS_MYID))
126 		return 0;
127 	if (psid <= 0 || psid > psets_max)
128 		return EINVAL;
129 	if (psets[psid - 1] == NULL)
130 		return EINVAL;
131 	if (psets[psid - 1]->ps_flags & PSET_BUSY)
132 		return EBUSY;
133 
134 	return 0;
135 }
136 
137 /*
138  * Create a processor-set.
139  */
140 static int
141 kern_pset_create(psetid_t *psid)
142 {
143 	pset_info_t *pi;
144 	u_int i;
145 
146 	if (psets_count == psets_max)
147 		return ENOMEM;
148 
149 	pi = kmem_zalloc(sizeof(pset_info_t), KM_SLEEP);
150 
151 	mutex_enter(&cpu_lock);
152 	if (psets_count == psets_max) {
153 		mutex_exit(&cpu_lock);
154 		kmem_free(pi, sizeof(pset_info_t));
155 		return ENOMEM;
156 	}
157 
158 	/* Find a free entry in the array */
159 	for (i = 0; i < psets_max; i++)
160 		if (psets[i] == NULL)
161 			break;
162 	KASSERT(i != psets_max);
163 
164 	psets[i] = pi;
165 	psets_count++;
166 	mutex_exit(&cpu_lock);
167 
168 	*psid = i + 1;
169 	return 0;
170 }
171 
172 /*
173  * Destroy a processor-set.
174  */
175 static int
176 kern_pset_destroy(psetid_t psid)
177 {
178 	struct cpu_info *ci;
179 	pset_info_t *pi;
180 	struct lwp *l;
181 	CPU_INFO_ITERATOR cii;
182 	int error;
183 
184 	mutex_enter(&cpu_lock);
185 	if (psid == PS_MYID) {
186 		/* Use caller's processor-set ID */
187 		psid = curlwp->l_psid;
188 	}
189 	error = psid_validate(psid, false);
190 	if (error) {
191 		mutex_exit(&cpu_lock);
192 		return error;
193 	}
194 
195 	/* Release the processor-set from all CPUs */
196 	for (CPU_INFO_FOREACH(cii, ci)) {
197 		struct schedstate_percpu *spc;
198 
199 		spc = &ci->ci_schedstate;
200 		if (spc->spc_psid != psid)
201 			continue;
202 		spc->spc_psid = PS_NONE;
203 	}
204 	/* Mark that processor-set is going to be destroyed */
205 	pi = psets[psid - 1];
206 	pi->ps_flags |= PSET_BUSY;
207 	mutex_exit(&cpu_lock);
208 
209 	/* Unmark the processor-set ID from each thread */
210 	mutex_enter(proc_lock);
211 	LIST_FOREACH(l, &alllwp, l_list) {
212 		/* Safe to check and set without lock held */
213 		if (l->l_psid != psid)
214 			continue;
215 		l->l_psid = PS_NONE;
216 	}
217 	mutex_exit(proc_lock);
218 
219 	/* Destroy the processor-set */
220 	mutex_enter(&cpu_lock);
221 	psets[psid - 1] = NULL;
222 	psets_count--;
223 	mutex_exit(&cpu_lock);
224 
225 	kmem_free(pi, sizeof(pset_info_t));
226 	return 0;
227 }
228 
229 /*
230  * General system calls for the processor-sets.
231  */
232 
233 int
234 sys_pset_create(struct lwp *l, const struct sys_pset_create_args *uap,
235     register_t *retval)
236 {
237 	/* {
238 		syscallarg(psetid_t) *psid;
239 	} */
240 	psetid_t psid;
241 	int error;
242 
243 	/* Available only for super-user */
244 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
245 	    KAUTH_REQ_SYSTEM_PSET_CREATE, NULL, NULL, NULL))
246 		return EPERM;
247 
248 	error = kern_pset_create(&psid);
249 	if (error)
250 		return error;
251 
252 	error = copyout(&psid, SCARG(uap, psid), sizeof(psetid_t));
253 	if (error)
254 		(void)kern_pset_destroy(psid);
255 
256 	return error;
257 }
258 
259 int
260 sys_pset_destroy(struct lwp *l, const struct sys_pset_destroy_args *uap,
261     register_t *retval)
262 {
263 	/* {
264 		syscallarg(psetid_t) psid;
265 	} */
266 
267 	/* Available only for super-user */
268 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
269 	    KAUTH_REQ_SYSTEM_PSET_DESTROY,
270 	    KAUTH_ARG(SCARG(uap, psid)), NULL, NULL))
271 		return EPERM;
272 
273 	return kern_pset_destroy(SCARG(uap, psid));
274 }
275 
276 int
277 sys_pset_assign(struct lwp *l, const struct sys_pset_assign_args *uap,
278     register_t *retval)
279 {
280 	/* {
281 		syscallarg(psetid_t) psid;
282 		syscallarg(cpuid_t) cpuid;
283 		syscallarg(psetid_t) *opsid;
284 	} */
285 	struct cpu_info *ici, *ci = NULL;
286 	struct schedstate_percpu *spc = NULL;
287 	struct lwp *t;
288 	psetid_t psid = SCARG(uap, psid), opsid = 0;
289 	CPU_INFO_ITERATOR cii;
290 	int error = 0, nnone = 0;
291 
292 	/* Available only for super-user, except the case of PS_QUERY */
293 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
294 	    KAUTH_REQ_SYSTEM_PSET_ASSIGN, KAUTH_ARG(SCARG(uap, psid)), NULL,
295 	    NULL))
296 		return EPERM;
297 
298 	/* Find the target CPU */
299 	mutex_enter(&cpu_lock);
300 	for (CPU_INFO_FOREACH(cii, ici)) {
301 		struct schedstate_percpu *ispc;
302 		ispc = &ici->ci_schedstate;
303 		if (cpu_index(ici) == SCARG(uap, cpuid)) {
304 			ci = ici;
305 			spc = ispc;
306 		}
307 		nnone += (ispc->spc_psid == PS_NONE);
308 	}
309 	if (ci == NULL) {
310 		mutex_exit(&cpu_lock);
311 		return EINVAL;
312 	}
313 	error = psid_validate(psid, true);
314 	if (error) {
315 		mutex_exit(&cpu_lock);
316 		return error;
317 	}
318 	opsid = spc->spc_psid;
319 	switch (psid) {
320 	case PS_QUERY:
321 		break;
322 	case PS_MYID:
323 		psid = curlwp->l_psid;
324 		/* FALLTHROUGH */
325 	default:
326 		/*
327 		 * Ensure at least one CPU stays in the default set,
328 		 * and that specified CPU is not offline.
329 		 */
330 		if (psid != PS_NONE && ((spc->spc_flags & SPCF_OFFLINE) ||
331 		    (nnone == 1 && spc->spc_psid == PS_NONE))) {
332 			mutex_exit(&cpu_lock);
333 			return EBUSY;
334 		}
335 		mutex_enter(proc_lock);
336 		/*
337 		 * Ensure that none of the threads are using affinity mask
338 		 * with this target CPU in it.
339 		 */
340 		LIST_FOREACH(t, &alllwp, l_list) {
341 			if ((t->l_flag & LW_AFFINITY) == 0)
342 				continue;
343 			lwp_lock(t);
344 			if ((t->l_flag & LW_AFFINITY) == 0) {
345 				lwp_unlock(t);
346 				continue;
347 			}
348 			if (kcpuset_isset(cpu_index(ci), t->l_affinity)) {
349 				lwp_unlock(t);
350 				mutex_exit(proc_lock);
351 				mutex_exit(&cpu_lock);
352 				return EPERM;
353 			}
354 		}
355 		/*
356 		 * Set the processor-set ID.
357 		 * Migrate out any threads running on this CPU.
358 		 */
359 		spc->spc_psid = psid;
360 
361 		LIST_FOREACH(t, &alllwp, l_list) {
362 			struct cpu_info *tci;
363 			if (t->l_cpu != ci)
364 				continue;
365 			if (t->l_pflag & (LP_BOUND | LP_INTR))
366 				continue;
367 			lwp_lock(t);
368 			tci = sched_takecpu(t);
369 			KASSERT(tci != ci);
370 			lwp_migrate(t, tci);
371 		}
372 		mutex_exit(proc_lock);
373 		break;
374 	}
375 	mutex_exit(&cpu_lock);
376 
377 	if (SCARG(uap, opsid) != NULL)
378 		error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
379 
380 	return error;
381 }
382 
383 int
384 sys__pset_bind(struct lwp *l, const struct sys__pset_bind_args *uap,
385     register_t *retval)
386 {
387 	/* {
388 		syscallarg(idtype_t) idtype;
389 		syscallarg(id_t) first_id;
390 		syscallarg(id_t) second_id;
391 		syscallarg(psetid_t) psid;
392 		syscallarg(psetid_t) *opsid;
393 	} */
394 	struct cpu_info *ci;
395 	struct proc *p;
396 	struct lwp *t;
397 	id_t id1, id2;
398 	pid_t pid = 0;
399 	lwpid_t lid = 0;
400 	psetid_t psid, opsid;
401 	int error = 0, lcnt;
402 
403 	psid = SCARG(uap, psid);
404 
405 	/* Available only for super-user, except the case of PS_QUERY */
406 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
407 	    KAUTH_REQ_SYSTEM_PSET_BIND, KAUTH_ARG(SCARG(uap, psid)), NULL,
408 	    NULL))
409 		return EPERM;
410 
411 	mutex_enter(&cpu_lock);
412 	error = psid_validate(psid, true);
413 	if (error) {
414 		mutex_exit(&cpu_lock);
415 		return error;
416 	}
417 	if (psid == PS_MYID)
418 		psid = curlwp->l_psid;
419 	if (psid != PS_QUERY && psid != PS_NONE)
420 		psets[psid - 1]->ps_flags |= PSET_BUSY;
421 	mutex_exit(&cpu_lock);
422 
423 	/*
424 	 * Get PID and LID from the ID.
425 	 */
426 	p = l->l_proc;
427 	id1 = SCARG(uap, first_id);
428 	id2 = SCARG(uap, second_id);
429 
430 	switch (SCARG(uap, idtype)) {
431 	case P_PID:
432 		/*
433 		 * Process:
434 		 *  First ID	- PID;
435 		 *  Second ID	- ignored;
436 		 */
437 		pid = (id1 == P_MYID) ? p->p_pid : id1;
438 		lid = 0;
439 		break;
440 	case P_LWPID:
441 		/*
442 		 * Thread (LWP):
443 		 *  First ID	- LID;
444 		 *  Second ID	- PID;
445 		 */
446 		if (id1 == P_MYID) {
447 			pid = p->p_pid;
448 			lid = l->l_lid;
449 			break;
450 		}
451 		lid = id1;
452 		pid = (id2 == P_MYID) ? p->p_pid : id2;
453 		break;
454 	default:
455 		error = EINVAL;
456 		goto error;
457 	}
458 
459 	/* Find the process */
460 	mutex_enter(proc_lock);
461 	p = p_find(pid, PFIND_LOCKED);
462 	if (p == NULL) {
463 		mutex_exit(proc_lock);
464 		error = ESRCH;
465 		goto error;
466 	}
467 	mutex_enter(p->p_lock);
468 	mutex_exit(proc_lock);
469 
470 	/* Disallow modification of the system processes */
471 	if (p->p_flag & PK_SYSTEM) {
472 		mutex_exit(p->p_lock);
473 		error = EPERM;
474 		goto error;
475 	}
476 
477 	/* Find the LWP(s) */
478 	lcnt = 0;
479 	ci = NULL;
480 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
481 		if (lid && lid != t->l_lid)
482 			continue;
483 		/*
484 		 * Bind the thread to the processor-set,
485 		 * take some CPU and migrate.
486 		 */
487 		lwp_lock(t);
488 		opsid = t->l_psid;
489 		t->l_psid = psid;
490 		ci = sched_takecpu(t);
491 		/* Unlocks LWP */
492 		lwp_migrate(t, ci);
493 		lcnt++;
494 	}
495 	mutex_exit(p->p_lock);
496 	if (lcnt == 0) {
497 		error = ESRCH;
498 		goto error;
499 	}
500 	if (SCARG(uap, opsid))
501 		error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
502 error:
503 	if (psid != PS_QUERY && psid != PS_NONE) {
504 		mutex_enter(&cpu_lock);
505 		psets[psid - 1]->ps_flags &= ~PSET_BUSY;
506 		mutex_exit(&cpu_lock);
507 	}
508 	return error;
509 }
510 
511 /*
512  * Sysctl nodes and initialization.
513  */
514 
515 static int
516 sysctl_psets_max(SYSCTLFN_ARGS)
517 {
518 	struct sysctlnode node;
519 	int error, newsize;
520 
521 	node = *rnode;
522 	node.sysctl_data = &newsize;
523 
524 	newsize = psets_max;
525 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
526 	if (error || newp == NULL)
527 		return error;
528 
529 	if (newsize <= 0)
530 		return EINVAL;
531 
532 	sysctl_unlock();
533 	error = psets_realloc(newsize);
534 	sysctl_relock();
535 	return error;
536 }
537 
538 static int
539 sysctl_psets_list(SYSCTLFN_ARGS)
540 {
541 	const size_t bufsz = 1024;
542 	char *buf, tbuf[16];
543 	int i, error;
544 	size_t len;
545 
546 	sysctl_unlock();
547 	buf = kmem_alloc(bufsz, KM_SLEEP);
548 	snprintf(buf, bufsz, "%d:1", PS_NONE);	/* XXX */
549 
550 	mutex_enter(&cpu_lock);
551 	for (i = 0; i < psets_max; i++) {
552 		if (psets[i] == NULL)
553 			continue;
554 		snprintf(tbuf, sizeof(tbuf), ",%d:2", i + 1);	/* XXX */
555 		strlcat(buf, tbuf, bufsz);
556 	}
557 	mutex_exit(&cpu_lock);
558 	len = strlen(buf) + 1;
559 	error = 0;
560 	if (oldp != NULL)
561 		error = copyout(buf, oldp, min(len, *oldlenp));
562 	*oldlenp = len;
563 	kmem_free(buf, bufsz);
564 	sysctl_relock();
565 	return error;
566 }
567 
568 SYSCTL_SETUP(sysctl_pset_setup, "sysctl kern.pset subtree setup")
569 {
570 	const struct sysctlnode *node = NULL;
571 
572 	sysctl_createv(clog, 0, NULL, NULL,
573 		CTLFLAG_PERMANENT,
574 		CTLTYPE_NODE, "kern", NULL,
575 		NULL, 0, NULL, 0,
576 		CTL_KERN, CTL_EOL);
577 	sysctl_createv(clog, 0, NULL, &node,
578 		CTLFLAG_PERMANENT,
579 		CTLTYPE_NODE, "pset",
580 		SYSCTL_DESCR("Processor-set options"),
581 		NULL, 0, NULL, 0,
582 		CTL_KERN, CTL_CREATE, CTL_EOL);
583 
584 	if (node == NULL)
585 		return;
586 
587 	sysctl_createv(clog, 0, &node, NULL,
588 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
589 		CTLTYPE_INT, "psets_max",
590 		SYSCTL_DESCR("Maximal count of the processor-sets"),
591 		sysctl_psets_max, 0, &psets_max, 0,
592 		CTL_CREATE, CTL_EOL);
593 	sysctl_createv(clog, 0, &node, NULL,
594 		CTLFLAG_PERMANENT,
595 		CTLTYPE_STRING, "list",
596 		SYSCTL_DESCR("List of active sets"),
597 		sysctl_psets_list, 0, NULL, 0,
598 		CTL_CREATE, CTL_EOL);
599 }
600