xref: /netbsd-src/sys/kern/sys_pset.c (revision 0d9ab2b40eafdd033d0c720bc373cbc79e301d63)
1 /*	$NetBSD: sys_pset.c,v 1.11 2009/01/23 13:58:08 rmind Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Implementation of the Processor Sets.
31  *
32  * Locking
33  *  The array of the processor-set structures and its members are protected
34  *  by the global cpu_lock.  Note that in scheduler, the very l_psid value
35  *  might be used without lock held.
36  */
37 
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: sys_pset.c,v 1.11 2009/01/23 13:58:08 rmind Exp $");
40 
41 #include <sys/param.h>
42 
43 #include <sys/cpu.h>
44 #include <sys/kauth.h>
45 #include <sys/kmem.h>
46 #include <sys/lwp.h>
47 #include <sys/mutex.h>
48 #include <sys/proc.h>
49 #include <sys/pset.h>
50 #include <sys/sched.h>
51 #include <sys/syscallargs.h>
52 #include <sys/sysctl.h>
53 #include <sys/systm.h>
54 #include <sys/types.h>
55 
56 static pset_info_t **	psets;
57 static u_int		psets_max;
58 static u_int		psets_count;
59 
60 static int	psets_realloc(int);
61 static int	psid_validate(psetid_t, bool);
62 static int	kern_pset_create(psetid_t *);
63 static int	kern_pset_destroy(psetid_t);
64 
65 /*
66  * Initialization of the processor-sets.
67  */
68 void
69 psets_init(void)
70 {
71 
72 	psets_max = max(MAXCPUS, 32);
73 	psets = kmem_zalloc(psets_max * sizeof(void *), KM_SLEEP);
74 	psets_count = 0;
75 }
76 
77 /*
78  * Reallocate the array of the processor-set structures.
79  */
80 static int
81 psets_realloc(int new_psets_max)
82 {
83 	pset_info_t **new_psets, **old_psets;
84 	const u_int newsize = new_psets_max * sizeof(void *);
85 	u_int i, oldsize;
86 
87 	if (new_psets_max < 1)
88 		return EINVAL;
89 
90 	new_psets = kmem_zalloc(newsize, KM_SLEEP);
91 	mutex_enter(&cpu_lock);
92 	old_psets = psets;
93 	oldsize = psets_max * sizeof(void *);
94 
95 	/* Check if we can lower the size of the array */
96 	if (new_psets_max < psets_max) {
97 		for (i = new_psets_max; i < psets_max; i++) {
98 			if (psets[i] == NULL)
99 				continue;
100 			mutex_exit(&cpu_lock);
101 			kmem_free(new_psets, newsize);
102 			return EBUSY;
103 		}
104 	}
105 
106 	/* Copy all pointers to the new array */
107 	memcpy(new_psets, psets, newsize);
108 	psets_max = new_psets_max;
109 	psets = new_psets;
110 	mutex_exit(&cpu_lock);
111 
112 	kmem_free(old_psets, oldsize);
113 	return 0;
114 }
115 
116 /*
117  * Validate processor-set ID.
118  */
119 static int
120 psid_validate(psetid_t psid, bool chkps)
121 {
122 
123 	KASSERT(mutex_owned(&cpu_lock));
124 
125 	if (chkps && (psid == PS_NONE || psid == PS_QUERY || psid == PS_MYID))
126 		return 0;
127 	if (psid <= 0 || psid > psets_max)
128 		return EINVAL;
129 	if (psets[psid - 1] == NULL)
130 		return EINVAL;
131 	if (psets[psid - 1]->ps_flags & PSET_BUSY)
132 		return EBUSY;
133 
134 	return 0;
135 }
136 
137 /*
138  * Create a processor-set.
139  */
140 static int
141 kern_pset_create(psetid_t *psid)
142 {
143 	pset_info_t *pi;
144 	u_int i;
145 
146 	if (psets_count == psets_max)
147 		return ENOMEM;
148 
149 	pi = kmem_zalloc(sizeof(pset_info_t), KM_SLEEP);
150 
151 	mutex_enter(&cpu_lock);
152 	if (psets_count == psets_max) {
153 		mutex_exit(&cpu_lock);
154 		kmem_free(pi, sizeof(pset_info_t));
155 		return ENOMEM;
156 	}
157 
158 	/* Find a free entry in the array */
159 	for (i = 0; i < psets_max; i++)
160 		if (psets[i] == NULL)
161 			break;
162 	KASSERT(i != psets_max);
163 
164 	psets[i] = pi;
165 	psets_count++;
166 	mutex_exit(&cpu_lock);
167 
168 	*psid = i + 1;
169 	return 0;
170 }
171 
172 /*
173  * Destroy a processor-set.
174  */
175 static int
176 kern_pset_destroy(psetid_t psid)
177 {
178 	struct cpu_info *ci;
179 	pset_info_t *pi;
180 	struct lwp *l;
181 	CPU_INFO_ITERATOR cii;
182 	int error;
183 
184 	mutex_enter(&cpu_lock);
185 	if (psid == PS_MYID) {
186 		/* Use caller's processor-set ID */
187 		psid = curlwp->l_psid;
188 	}
189 	error = psid_validate(psid, false);
190 	if (error) {
191 		mutex_exit(&cpu_lock);
192 		return error;
193 	}
194 
195 	/* Release the processor-set from all CPUs */
196 	for (CPU_INFO_FOREACH(cii, ci)) {
197 		struct schedstate_percpu *spc;
198 
199 		spc = &ci->ci_schedstate;
200 		if (spc->spc_psid != psid)
201 			continue;
202 		spc->spc_psid = PS_NONE;
203 	}
204 	/* Mark that processor-set is going to be destroyed */
205 	pi = psets[psid - 1];
206 	pi->ps_flags |= PSET_BUSY;
207 	mutex_exit(&cpu_lock);
208 
209 	/* Unmark the processor-set ID from each thread */
210 	mutex_enter(proc_lock);
211 	LIST_FOREACH(l, &alllwp, l_list) {
212 		/* Safe to check and set without lock held */
213 		if (l->l_psid != psid)
214 			continue;
215 		l->l_psid = PS_NONE;
216 	}
217 	mutex_exit(proc_lock);
218 
219 	/* Destroy the processor-set */
220 	mutex_enter(&cpu_lock);
221 	psets[psid - 1] = NULL;
222 	psets_count--;
223 	mutex_exit(&cpu_lock);
224 
225 	kmem_free(pi, sizeof(pset_info_t));
226 	return 0;
227 }
228 
229 /*
230  * General system calls for the processor-sets.
231  */
232 
233 int
234 sys_pset_create(struct lwp *l, const struct sys_pset_create_args *uap,
235     register_t *retval)
236 {
237 	/* {
238 		syscallarg(psetid_t) *psid;
239 	} */
240 	psetid_t psid;
241 	int error;
242 
243 	/* Available only for super-user */
244 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
245 	    KAUTH_REQ_SYSTEM_PSET_CREATE, NULL, NULL, NULL))
246 		return EPERM;
247 
248 	error = kern_pset_create(&psid);
249 	if (error)
250 		return error;
251 
252 	error = copyout(&psid, SCARG(uap, psid), sizeof(psetid_t));
253 	if (error)
254 		(void)kern_pset_destroy(psid);
255 
256 	return error;
257 }
258 
259 int
260 sys_pset_destroy(struct lwp *l, const struct sys_pset_destroy_args *uap,
261     register_t *retval)
262 {
263 	/* {
264 		syscallarg(psetid_t) psid;
265 	} */
266 
267 	/* Available only for super-user */
268 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
269 	    KAUTH_REQ_SYSTEM_PSET_DESTROY,
270 	    KAUTH_ARG(SCARG(uap, psid)), NULL, NULL))
271 		return EPERM;
272 
273 	return kern_pset_destroy(SCARG(uap, psid));
274 }
275 
276 int
277 sys_pset_assign(struct lwp *l, const struct sys_pset_assign_args *uap,
278     register_t *retval)
279 {
280 	/* {
281 		syscallarg(psetid_t) psid;
282 		syscallarg(cpuid_t) cpuid;
283 		syscallarg(psetid_t) *opsid;
284 	} */
285 	struct cpu_info *ici, *ci = NULL;
286 	struct schedstate_percpu *spc = NULL;
287 	struct lwp *t;
288 	psetid_t psid = SCARG(uap, psid), opsid = 0;
289 	CPU_INFO_ITERATOR cii;
290 	int error = 0, nnone = 0;
291 
292 	/* Available only for super-user, except the case of PS_QUERY */
293 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
294 	    KAUTH_REQ_SYSTEM_PSET_ASSIGN, KAUTH_ARG(SCARG(uap, psid)), NULL,
295 	    NULL))
296 		return EPERM;
297 
298 	/* Find the target CPU */
299 	mutex_enter(&cpu_lock);
300 	for (CPU_INFO_FOREACH(cii, ici)) {
301 		struct schedstate_percpu *ispc;
302 		ispc = &ici->ci_schedstate;
303 		if (cpu_index(ici) == SCARG(uap, cpuid)) {
304 			ci = ici;
305 			spc = ispc;
306 		}
307 		nnone += (ispc->spc_psid == PS_NONE);
308 	}
309 	if (ci == NULL) {
310 		mutex_exit(&cpu_lock);
311 		return EINVAL;
312 	}
313 	error = psid_validate(psid, true);
314 	if (error) {
315 		mutex_exit(&cpu_lock);
316 		return error;
317 	}
318 	opsid = spc->spc_psid;
319 	switch (psid) {
320 	case PS_QUERY:
321 		break;
322 	case PS_MYID:
323 		psid = curlwp->l_psid;
324 		/* FALLTHROUGH */
325 	default:
326 		/*
327 		 * Ensure at least one CPU stays in the default set,
328 		 * and that specified CPU is not offline.
329 		 */
330 		if (psid != PS_NONE && ((spc->spc_flags & SPCF_OFFLINE) ||
331 		    (nnone == 1 && spc->spc_psid == PS_NONE))) {
332 			mutex_exit(&cpu_lock);
333 			return EBUSY;
334 		}
335 		mutex_enter(proc_lock);
336 		/*
337 		 * Ensure that none of the threads are using affinity mask
338 		 * with this target CPU in it.
339 		 */
340 		LIST_FOREACH(t, &alllwp, l_list) {
341 			if ((t->l_flag & LW_AFFINITY) == 0)
342 				continue;
343 			if (kcpuset_isset(cpu_index(ci), t->l_affinity)) {
344 				mutex_exit(proc_lock);
345 				mutex_exit(&cpu_lock);
346 				return EPERM;
347 			}
348 		}
349 		/*
350 		 * Set the processor-set ID.
351 		 * Migrate out any threads running on this CPU.
352 		 */
353 		spc->spc_psid = psid;
354 
355 		LIST_FOREACH(t, &alllwp, l_list) {
356 			struct cpu_info *tci;
357 			if (t->l_cpu != ci)
358 				continue;
359 			if (t->l_pflag & (LP_BOUND | LP_INTR))
360 				continue;
361 			lwp_lock(t);
362 			tci = sched_takecpu(t);
363 			KASSERT(tci != ci);
364 			lwp_migrate(t, tci);
365 		}
366 		mutex_exit(proc_lock);
367 		break;
368 	}
369 	mutex_exit(&cpu_lock);
370 
371 	if (SCARG(uap, opsid) != NULL)
372 		error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
373 
374 	return error;
375 }
376 
377 int
378 sys__pset_bind(struct lwp *l, const struct sys__pset_bind_args *uap,
379     register_t *retval)
380 {
381 	/* {
382 		syscallarg(idtype_t) idtype;
383 		syscallarg(id_t) first_id;
384 		syscallarg(id_t) second_id;
385 		syscallarg(psetid_t) psid;
386 		syscallarg(psetid_t) *opsid;
387 	} */
388 	struct cpu_info *ci;
389 	struct proc *p;
390 	struct lwp *t;
391 	id_t id1, id2;
392 	pid_t pid = 0;
393 	lwpid_t lid = 0;
394 	psetid_t psid, opsid;
395 	int error = 0, lcnt;
396 
397 	psid = SCARG(uap, psid);
398 
399 	/* Available only for super-user, except the case of PS_QUERY */
400 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
401 	    KAUTH_REQ_SYSTEM_PSET_BIND, KAUTH_ARG(SCARG(uap, psid)), NULL,
402 	    NULL))
403 		return EPERM;
404 
405 	mutex_enter(&cpu_lock);
406 	error = psid_validate(psid, true);
407 	if (error) {
408 		mutex_exit(&cpu_lock);
409 		return error;
410 	}
411 	if (psid == PS_MYID)
412 		psid = curlwp->l_psid;
413 	if (psid != PS_QUERY && psid != PS_NONE)
414 		psets[psid - 1]->ps_flags |= PSET_BUSY;
415 	mutex_exit(&cpu_lock);
416 
417 	/*
418 	 * Get PID and LID from the ID.
419 	 */
420 	p = l->l_proc;
421 	id1 = SCARG(uap, first_id);
422 	id2 = SCARG(uap, second_id);
423 
424 	switch (SCARG(uap, idtype)) {
425 	case P_PID:
426 		/*
427 		 * Process:
428 		 *  First ID	- PID;
429 		 *  Second ID	- ignored;
430 		 */
431 		pid = (id1 == P_MYID) ? p->p_pid : id1;
432 		lid = 0;
433 		break;
434 	case P_LWPID:
435 		/*
436 		 * Thread (LWP):
437 		 *  First ID	- LID;
438 		 *  Second ID	- PID;
439 		 */
440 		if (id1 == P_MYID) {
441 			pid = p->p_pid;
442 			lid = l->l_lid;
443 			break;
444 		}
445 		lid = id1;
446 		pid = (id2 == P_MYID) ? p->p_pid : id2;
447 		break;
448 	default:
449 		error = EINVAL;
450 		goto error;
451 	}
452 
453 	/* Find the process */
454 	mutex_enter(proc_lock);
455 	p = p_find(pid, PFIND_LOCKED);
456 	if (p == NULL) {
457 		mutex_exit(proc_lock);
458 		error = ESRCH;
459 		goto error;
460 	}
461 	mutex_enter(p->p_lock);
462 	mutex_exit(proc_lock);
463 
464 	/* Disallow modification of the system processes */
465 	if (p->p_flag & PK_SYSTEM) {
466 		mutex_exit(p->p_lock);
467 		error = EPERM;
468 		goto error;
469 	}
470 
471 	/* Find the LWP(s) */
472 	lcnt = 0;
473 	ci = NULL;
474 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
475 		if (lid && lid != t->l_lid)
476 			continue;
477 		/*
478 		 * Bind the thread to the processor-set,
479 		 * take some CPU and migrate.
480 		 */
481 		lwp_lock(t);
482 		opsid = t->l_psid;
483 		t->l_psid = psid;
484 		ci = sched_takecpu(t);
485 		/* Unlocks LWP */
486 		lwp_migrate(t, ci);
487 		lcnt++;
488 	}
489 	mutex_exit(p->p_lock);
490 	if (lcnt == 0) {
491 		error = ESRCH;
492 		goto error;
493 	}
494 	if (SCARG(uap, opsid))
495 		error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
496 error:
497 	if (psid != PS_QUERY && psid != PS_NONE) {
498 		mutex_enter(&cpu_lock);
499 		psets[psid - 1]->ps_flags &= ~PSET_BUSY;
500 		mutex_exit(&cpu_lock);
501 	}
502 	return error;
503 }
504 
505 /*
506  * Sysctl nodes and initialization.
507  */
508 
509 static int
510 sysctl_psets_max(SYSCTLFN_ARGS)
511 {
512 	struct sysctlnode node;
513 	int error, newsize;
514 
515 	node = *rnode;
516 	node.sysctl_data = &newsize;
517 
518 	newsize = psets_max;
519 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
520 	if (error || newp == NULL)
521 		return error;
522 
523 	if (newsize <= 0)
524 		return EINVAL;
525 
526 	sysctl_unlock();
527 	error = psets_realloc(newsize);
528 	sysctl_relock();
529 	return error;
530 }
531 
532 static int
533 sysctl_psets_list(SYSCTLFN_ARGS)
534 {
535 	const size_t bufsz = 1024;
536 	char *buf, tbuf[16];
537 	int i, error;
538 	size_t len;
539 
540 	sysctl_unlock();
541 	buf = kmem_alloc(bufsz, KM_SLEEP);
542 	snprintf(buf, bufsz, "%d:1", PS_NONE);	/* XXX */
543 
544 	mutex_enter(&cpu_lock);
545 	for (i = 0; i < psets_max; i++) {
546 		if (psets[i] == NULL)
547 			continue;
548 		snprintf(tbuf, sizeof(tbuf), ",%d:2", i + 1);	/* XXX */
549 		strlcat(buf, tbuf, bufsz);
550 	}
551 	mutex_exit(&cpu_lock);
552 	len = strlen(buf) + 1;
553 	error = 0;
554 	if (oldp != NULL)
555 		error = copyout(buf, oldp, min(len, *oldlenp));
556 	*oldlenp = len;
557 	kmem_free(buf, bufsz);
558 	sysctl_relock();
559 	return error;
560 }
561 
562 SYSCTL_SETUP(sysctl_pset_setup, "sysctl kern.pset subtree setup")
563 {
564 	const struct sysctlnode *node = NULL;
565 
566 	sysctl_createv(clog, 0, NULL, NULL,
567 		CTLFLAG_PERMANENT,
568 		CTLTYPE_NODE, "kern", NULL,
569 		NULL, 0, NULL, 0,
570 		CTL_KERN, CTL_EOL);
571 	sysctl_createv(clog, 0, NULL, &node,
572 		CTLFLAG_PERMANENT,
573 		CTLTYPE_NODE, "pset",
574 		SYSCTL_DESCR("Processor-set options"),
575 		NULL, 0, NULL, 0,
576 		CTL_KERN, CTL_CREATE, CTL_EOL);
577 
578 	if (node == NULL)
579 		return;
580 
581 	sysctl_createv(clog, 0, &node, NULL,
582 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
583 		CTLTYPE_INT, "psets_max",
584 		SYSCTL_DESCR("Maximal count of the processor-sets"),
585 		sysctl_psets_max, 0, &psets_max, 0,
586 		CTL_CREATE, CTL_EOL);
587 	sysctl_createv(clog, 0, &node, NULL,
588 		CTLFLAG_PERMANENT,
589 		CTLTYPE_STRING, "list",
590 		SYSCTL_DESCR("List of active sets"),
591 		sysctl_psets_list, 0, NULL, 0,
592 		CTL_CREATE, CTL_EOL);
593 }
594