xref: /onnv-gate/usr/src/uts/common/os/pg.c (revision 11172:a792f425ae2e)
13434Sesaxe /*
23434Sesaxe  * CDDL HEADER START
33434Sesaxe  *
43434Sesaxe  * The contents of this file are subject to the terms of the
53434Sesaxe  * Common Development and Distribution License (the "License").
63434Sesaxe  * You may not use this file except in compliance with the License.
73434Sesaxe  *
83434Sesaxe  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93434Sesaxe  * or http://www.opensolaris.org/os/licensing.
103434Sesaxe  * See the License for the specific language governing permissions
113434Sesaxe  * and limitations under the License.
123434Sesaxe  *
133434Sesaxe  * When distributing Covered Code, include this CDDL HEADER in each
143434Sesaxe  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153434Sesaxe  * If applicable, add the following below this CDDL HEADER, with the
163434Sesaxe  * fields enclosed by brackets "[]" replaced with your own identifying
173434Sesaxe  * information: Portions Copyright [yyyy] [name of copyright owner]
183434Sesaxe  *
193434Sesaxe  * CDDL HEADER END
203434Sesaxe  */
213434Sesaxe /*
228906SEric.Saxe@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
233434Sesaxe  * Use is subject to license terms.
243434Sesaxe  */
253434Sesaxe 
263434Sesaxe #include <sys/systm.h>
273434Sesaxe #include <sys/types.h>
283434Sesaxe #include <sys/param.h>
293434Sesaxe #include <sys/thread.h>
303434Sesaxe #include <sys/cpuvar.h>
313434Sesaxe #include <sys/cpupart.h>
323434Sesaxe #include <sys/kmem.h>
333434Sesaxe #include <sys/cmn_err.h>
343434Sesaxe #include <sys/kstat.h>
353434Sesaxe #include <sys/processor.h>
363434Sesaxe #include <sys/disp.h>
373434Sesaxe #include <sys/group.h>
383434Sesaxe #include <sys/pg.h>
393434Sesaxe 
403434Sesaxe /*
413434Sesaxe  * Processor groups
423434Sesaxe  *
433434Sesaxe  * With the introduction of Chip Multi-Threaded (CMT) processor architectures,
443434Sesaxe  * it is no longer necessarily true that a given physical processor module
453434Sesaxe  * will present itself as a single schedulable entity (cpu_t). Rather, each
463434Sesaxe  * chip and/or processor core may present itself as one or more "logical" CPUs.
473434Sesaxe  *
483434Sesaxe  * The logical CPUs presented may share physical components such as caches,
493434Sesaxe  * data pipes, execution pipelines, FPUs, etc. It is advantageous to have the
503434Sesaxe  * kernel be aware of the relationships existing between logical CPUs so that
513434Sesaxe  * the appropriate optmizations may be employed.
523434Sesaxe  *
533434Sesaxe  * The processor group abstraction represents a set of logical CPUs that
543434Sesaxe  * generally share some sort of physical or characteristic relationship.
553434Sesaxe  *
563434Sesaxe  * In the case of a physical sharing relationship, the CPUs in the group may
573434Sesaxe  * share a pipeline, cache or floating point unit. In the case of a logical
583434Sesaxe  * relationship, a PG may represent the set of CPUs in a processor set, or the
593434Sesaxe  * set of CPUs running at a particular clock speed.
603434Sesaxe  *
613434Sesaxe  * The generic processor group structure, pg_t, contains the elements generic
623434Sesaxe  * to a group of CPUs. Depending on the nature of the CPU relationship
633434Sesaxe  * (LOGICAL or PHYSICAL), a pointer to a pg may be recast to a "view" of that
643434Sesaxe  * PG where more specific data is represented.
653434Sesaxe  *
663434Sesaxe  * As an example, a PG representing a PHYSICAL relationship, may be recast to
673434Sesaxe  * a pghw_t, where data further describing the hardware sharing relationship
683434Sesaxe  * is maintained. See pghw.c and pghw.h for details on physical PGs.
693434Sesaxe  *
703434Sesaxe  * At this time a more specialized casting of a PG representing a LOGICAL
713434Sesaxe  * relationship has not been implemented, but the architecture allows for this
723434Sesaxe  * in the future.
733434Sesaxe  *
743434Sesaxe  * Processor Group Classes
753434Sesaxe  *
763434Sesaxe  * Processor group consumers may wish to maintain and associate specific
773434Sesaxe  * data with the PGs they create. For this reason, a mechanism for creating
783434Sesaxe  * class specific PGs exists. Classes may overload the default functions for
793434Sesaxe  * creating, destroying, and associating CPUs with PGs, and may also register
803434Sesaxe  * class specific callbacks to be invoked when the CPU related system
813434Sesaxe  * configuration changes. Class specific data is stored/associated with
823434Sesaxe  * PGs by incorporating the pg_t (or pghw_t, as appropriate), as the first
833434Sesaxe  * element of a class specific PG object. In memory, such a structure may look
843434Sesaxe  * like:
853434Sesaxe  *
863434Sesaxe  * ----------------------- - - -
873434Sesaxe  * | common              | | | |  <--(pg_t *)
883434Sesaxe  * ----------------------- | | -
893434Sesaxe  * | HW specific         | | | <-----(pghw_t *)
903434Sesaxe  * ----------------------- | -
913434Sesaxe  * | class specific      | | <-------(pg_cmt_t *)
923434Sesaxe  * ----------------------- -
933434Sesaxe  *
943434Sesaxe  * Access to the PG class specific data can be had by casting a pointer to
953434Sesaxe  * it's class specific view.
963434Sesaxe  */
973434Sesaxe 
983434Sesaxe static pg_t		*pg_alloc_default(pg_class_t);
993434Sesaxe static void		pg_free_default(pg_t *);
1008906SEric.Saxe@Sun.COM static void		pg_null_op();
1013434Sesaxe 
1023434Sesaxe /*
1033434Sesaxe  * Bootstrap CPU specific PG data
1043434Sesaxe  * See pg_cpu_bootstrap()
1053434Sesaxe  */
1063434Sesaxe static cpu_pg_t		bootstrap_pg_data;
1073434Sesaxe 
1083434Sesaxe /*
1093434Sesaxe  * Bitset of allocated PG ids (they are sequential)
1103434Sesaxe  * and the next free id in the set.
1113434Sesaxe  */
1123434Sesaxe static bitset_t		pg_id_set;
1133434Sesaxe static pgid_t		pg_id_next = 0;
1143434Sesaxe 
1153434Sesaxe /*
1163434Sesaxe  * Default and externed PG ops vectors
1173434Sesaxe  */
1183434Sesaxe static struct pg_ops pg_ops_default = {
1193434Sesaxe 	pg_alloc_default,	/* alloc */
1203434Sesaxe 	pg_free_default,	/* free */
1213434Sesaxe 	NULL,			/* cpu_init */
1223434Sesaxe 	NULL,			/* cpu_fini */
1233434Sesaxe 	NULL,			/* cpu_active */
1243434Sesaxe 	NULL,			/* cpu_inactive */
1253434Sesaxe 	NULL,			/* cpupart_in */
1263434Sesaxe 	NULL,			/* cpupart_out */
1273434Sesaxe 	NULL,			/* cpupart_move */
1283434Sesaxe 	NULL,			/* cpu_belongs */
1298906SEric.Saxe@Sun.COM 	NULL,			/* policy_name */
1308906SEric.Saxe@Sun.COM };
1318906SEric.Saxe@Sun.COM 
1328906SEric.Saxe@Sun.COM static struct pg_cb_ops pg_cb_ops_default = {
1338906SEric.Saxe@Sun.COM 	pg_null_op,		/* thread_swtch */
1348906SEric.Saxe@Sun.COM 	pg_null_op,		/* thread_remain */
1353434Sesaxe };
1363434Sesaxe 
1373434Sesaxe /*
1383434Sesaxe  * Class specific PG allocation callbacks
1393434Sesaxe  */
1403434Sesaxe #define	PG_ALLOC(class)							\
1413434Sesaxe 	(pg_classes[class].pgc_ops->alloc ?				\
1423434Sesaxe 	    pg_classes[class].pgc_ops->alloc() :			\
1433434Sesaxe 	    pg_classes[pg_default_cid].pgc_ops->alloc())
1443434Sesaxe 
1453434Sesaxe #define	PG_FREE(pg)							\
1463434Sesaxe 	((pg)->pg_class->pgc_ops->free ?				\
1473434Sesaxe 	    (pg)->pg_class->pgc_ops->free(pg) :				\
1483434Sesaxe 	    pg_classes[pg_default_cid].pgc_ops->free(pg))		\
1493434Sesaxe 
1503434Sesaxe 
1513434Sesaxe /*
1528906SEric.Saxe@Sun.COM  * Class specific PG policy name
1538906SEric.Saxe@Sun.COM  */
1548906SEric.Saxe@Sun.COM #define	PG_POLICY_NAME(pg)						\
1558906SEric.Saxe@Sun.COM 	((pg)->pg_class->pgc_ops->policy_name ?				\
1568906SEric.Saxe@Sun.COM 	    (pg)->pg_class->pgc_ops->policy_name(pg) : NULL)		\
1578906SEric.Saxe@Sun.COM 
1588906SEric.Saxe@Sun.COM /*
1593434Sesaxe  * Class specific membership test callback
1603434Sesaxe  */
1613434Sesaxe #define	PG_CPU_BELONGS(pg, cp)						\
1623434Sesaxe 	((pg)->pg_class->pgc_ops->cpu_belongs ?				\
1633434Sesaxe 	    (pg)->pg_class->pgc_ops->cpu_belongs(pg, cp) : 0)		\
1643434Sesaxe 
1653434Sesaxe /*
1663434Sesaxe  * CPU configuration callbacks
1673434Sesaxe  */
1689352SEric.Saxe@Sun.COM #define	PG_CPU_INIT(class, cp, cpu_pg)					\
1693434Sesaxe {									\
1703434Sesaxe 	if (pg_classes[class].pgc_ops->cpu_init)			\
1719352SEric.Saxe@Sun.COM 		pg_classes[class].pgc_ops->cpu_init(cp, cpu_pg);	\
1723434Sesaxe }
1733434Sesaxe 
1749352SEric.Saxe@Sun.COM #define	PG_CPU_FINI(class, cp, cpu_pg)					\
1753434Sesaxe {									\
1763434Sesaxe 	if (pg_classes[class].pgc_ops->cpu_fini)			\
1779352SEric.Saxe@Sun.COM 		pg_classes[class].pgc_ops->cpu_fini(cp, cpu_pg);	\
1783434Sesaxe }
1793434Sesaxe 
1803434Sesaxe #define	PG_CPU_ACTIVE(class, cp)					\
1813434Sesaxe {									\
1823434Sesaxe 	if (pg_classes[class].pgc_ops->cpu_active)			\
1833434Sesaxe 		pg_classes[class].pgc_ops->cpu_active(cp);		\
1843434Sesaxe }
1853434Sesaxe 
1863434Sesaxe #define	PG_CPU_INACTIVE(class, cp)					\
1873434Sesaxe {									\
1883434Sesaxe 	if (pg_classes[class].pgc_ops->cpu_inactive)			\
1893434Sesaxe 		pg_classes[class].pgc_ops->cpu_inactive(cp);		\
1903434Sesaxe }
1913434Sesaxe 
1923434Sesaxe /*
1933434Sesaxe  * CPU / cpupart configuration callbacks
1943434Sesaxe  */
1953434Sesaxe #define	PG_CPUPART_IN(class, cp, pp)					\
1963434Sesaxe {									\
1973434Sesaxe 	if (pg_classes[class].pgc_ops->cpupart_in)			\
1983434Sesaxe 		pg_classes[class].pgc_ops->cpupart_in(cp, pp);		\
1993434Sesaxe }
2003434Sesaxe 
2013434Sesaxe #define	PG_CPUPART_OUT(class, cp, pp)					\
2023434Sesaxe {									\
2033434Sesaxe 	if (pg_classes[class].pgc_ops->cpupart_out)			\
2043434Sesaxe 		pg_classes[class].pgc_ops->cpupart_out(cp, pp);		\
2053434Sesaxe }
2063434Sesaxe 
2073434Sesaxe #define	PG_CPUPART_MOVE(class, cp, old, new)				\
2083434Sesaxe {									\
2093434Sesaxe 	if (pg_classes[class].pgc_ops->cpupart_move)			\
2103434Sesaxe 		pg_classes[class].pgc_ops->cpupart_move(cp, old, new);	\
2113434Sesaxe }
2123434Sesaxe 
2133434Sesaxe 
2143434Sesaxe 
2153434Sesaxe static pg_class_t	*pg_classes;
2163434Sesaxe static int		pg_nclasses;
2173434Sesaxe 
2183434Sesaxe static pg_cid_t		pg_default_cid;
2193434Sesaxe 
2203434Sesaxe /*
2218906SEric.Saxe@Sun.COM  * Initialze common PG subsystem.
2223434Sesaxe  */
2233434Sesaxe void
2243434Sesaxe pg_init(void)
2253434Sesaxe {
2268906SEric.Saxe@Sun.COM 	extern void pg_cmt_class_init();
2279478SEric.Saxe@Sun.COM 	extern void pg_cmt_cpu_startup();
2288906SEric.Saxe@Sun.COM 
2293434Sesaxe 	pg_default_cid =
2303434Sesaxe 	    pg_class_register("default", &pg_ops_default, PGR_LOGICAL);
2318906SEric.Saxe@Sun.COM 
2328906SEric.Saxe@Sun.COM 	/*
2338906SEric.Saxe@Sun.COM 	 * Initialize classes to allow them to register with the framework
2348906SEric.Saxe@Sun.COM 	 */
2358906SEric.Saxe@Sun.COM 	pg_cmt_class_init();
2368906SEric.Saxe@Sun.COM 
2378906SEric.Saxe@Sun.COM 	pg_cpu0_init();
2389478SEric.Saxe@Sun.COM 	pg_cmt_cpu_startup(CPU);
2393434Sesaxe }
2403434Sesaxe 
2413434Sesaxe /*
2423434Sesaxe  * Perform CPU 0 initialization
2433434Sesaxe  */
2443434Sesaxe void
2453434Sesaxe pg_cpu0_init(void)
2463434Sesaxe {
2473434Sesaxe 	extern void pghw_physid_create();
2483434Sesaxe 
2493434Sesaxe 	/*
2503434Sesaxe 	 * Create the physical ID cache for the boot CPU
2513434Sesaxe 	 */
2523434Sesaxe 	pghw_physid_create(CPU);
2533434Sesaxe 
2543434Sesaxe 	/*
2553434Sesaxe 	 * pg_cpu_* require that cpu_lock be held
2563434Sesaxe 	 */
2573434Sesaxe 	mutex_enter(&cpu_lock);
2583434Sesaxe 
259*11172SHaik.Aftandilian@Sun.COM 	(void) pg_cpu_init(CPU, B_FALSE);
2603434Sesaxe 	pg_cpupart_in(CPU, &cp_default);
2613434Sesaxe 	pg_cpu_active(CPU);
2623434Sesaxe 
2633434Sesaxe 	mutex_exit(&cpu_lock);
2643434Sesaxe }
2653434Sesaxe 
2663434Sesaxe /*
2673676Sesaxe  * Invoked when topology for CPU0 changes
2683676Sesaxe  * post pg_cpu0_init().
2693676Sesaxe  *
2703676Sesaxe  * Currently happens as a result of null_proc_lpa
2713676Sesaxe  * on Starcat.
2723676Sesaxe  */
2733676Sesaxe void
2743676Sesaxe pg_cpu0_reinit(void)
2753676Sesaxe {
2763676Sesaxe 	mutex_enter(&cpu_lock);
2773676Sesaxe 	pg_cpu_inactive(CPU);
2783676Sesaxe 	pg_cpupart_out(CPU, &cp_default);
279*11172SHaik.Aftandilian@Sun.COM 	pg_cpu_fini(CPU, NULL);
2803676Sesaxe 
281*11172SHaik.Aftandilian@Sun.COM 	(void) pg_cpu_init(CPU, B_FALSE);
2823676Sesaxe 	pg_cpupart_in(CPU, &cp_default);
2833676Sesaxe 	pg_cpu_active(CPU);
2843676Sesaxe 	mutex_exit(&cpu_lock);
2853676Sesaxe }
2863676Sesaxe 
2873676Sesaxe /*
2883434Sesaxe  * Register a new PG class
2893434Sesaxe  */
2903434Sesaxe pg_cid_t
2913434Sesaxe pg_class_register(char *name, struct pg_ops *ops, pg_relation_t relation)
2923434Sesaxe {
2933434Sesaxe 	pg_class_t	*newclass;
2943434Sesaxe 	pg_class_t	*classes_old;
2953434Sesaxe 	id_t		cid;
2963434Sesaxe 
2973434Sesaxe 	mutex_enter(&cpu_lock);
2983434Sesaxe 
2993434Sesaxe 	/*
3003434Sesaxe 	 * Allocate a new pg_class_t in the pg_classes array
3013434Sesaxe 	 */
3023434Sesaxe 	if (pg_nclasses == 0) {
3033434Sesaxe 		pg_classes = kmem_zalloc(sizeof (pg_class_t), KM_SLEEP);
3043434Sesaxe 	} else {
3053434Sesaxe 		classes_old = pg_classes;
3063434Sesaxe 		pg_classes =
3073434Sesaxe 		    kmem_zalloc(sizeof (pg_class_t) * (pg_nclasses + 1),
3088906SEric.Saxe@Sun.COM 		    KM_SLEEP);
3093434Sesaxe 		(void) kcopy(classes_old, pg_classes,
3103434Sesaxe 		    sizeof (pg_class_t) * pg_nclasses);
3113434Sesaxe 		kmem_free(classes_old, sizeof (pg_class_t) * pg_nclasses);
3123434Sesaxe 	}
3133434Sesaxe 
3143434Sesaxe 	cid = pg_nclasses++;
3153434Sesaxe 	newclass = &pg_classes[cid];
3163434Sesaxe 
3173434Sesaxe 	(void) strncpy(newclass->pgc_name, name, PG_CLASS_NAME_MAX);
3183434Sesaxe 	newclass->pgc_id = cid;
3193434Sesaxe 	newclass->pgc_ops = ops;
3203434Sesaxe 	newclass->pgc_relation = relation;
3213434Sesaxe 
3223434Sesaxe 	mutex_exit(&cpu_lock);
3233434Sesaxe 
3243434Sesaxe 	return (cid);
3253434Sesaxe }
3263434Sesaxe 
3273434Sesaxe /*
3283434Sesaxe  * Try to find an existing pg in set in which to place cp.
3293434Sesaxe  * Returns the pg if found, and NULL otherwise.
3303434Sesaxe  * In the event that the CPU could belong to multiple
3313434Sesaxe  * PGs in the set, the first matching PG will be returned.
3323434Sesaxe  */
3333434Sesaxe pg_t *
3343434Sesaxe pg_cpu_find_pg(cpu_t *cp, group_t *set)
3353434Sesaxe {
3363434Sesaxe 	pg_t		*pg;
3373434Sesaxe 	group_iter_t	i;
3383434Sesaxe 
3393434Sesaxe 	group_iter_init(&i);
3403434Sesaxe 	while ((pg = group_iterate(set, &i)) != NULL) {
3413434Sesaxe 		/*
3423434Sesaxe 		 * Ask the class if the CPU belongs here
3433434Sesaxe 		 */
3443434Sesaxe 		if (PG_CPU_BELONGS(pg, cp))
3453434Sesaxe 			return (pg);
3463434Sesaxe 	}
3473434Sesaxe 	return (NULL);
3483434Sesaxe }
3493434Sesaxe 
3503434Sesaxe /*
3513434Sesaxe  * Iterate over the CPUs in a PG after initializing
3523434Sesaxe  * the iterator with PG_CPU_ITR_INIT()
3533434Sesaxe  */
3543434Sesaxe cpu_t *
3553434Sesaxe pg_cpu_next(pg_cpu_itr_t *itr)
3563434Sesaxe {
3573434Sesaxe 	cpu_t		*cpu;
3583434Sesaxe 	pg_t		*pg = itr->pg;
3593434Sesaxe 
3603434Sesaxe 	cpu = group_iterate(&pg->pg_cpus, &itr->position);
3613434Sesaxe 	return (cpu);
3623434Sesaxe }
3633434Sesaxe 
3643434Sesaxe /*
3658906SEric.Saxe@Sun.COM  * Test if a given PG contains a given CPU
3668906SEric.Saxe@Sun.COM  */
3678906SEric.Saxe@Sun.COM boolean_t
3688906SEric.Saxe@Sun.COM pg_cpu_find(pg_t *pg, cpu_t *cp)
3698906SEric.Saxe@Sun.COM {
3708906SEric.Saxe@Sun.COM 	if (group_find(&pg->pg_cpus, cp) == (uint_t)-1)
3718906SEric.Saxe@Sun.COM 		return (B_FALSE);
3728906SEric.Saxe@Sun.COM 
3738906SEric.Saxe@Sun.COM 	return (B_TRUE);
3748906SEric.Saxe@Sun.COM }
3758906SEric.Saxe@Sun.COM 
3768906SEric.Saxe@Sun.COM /*
3778906SEric.Saxe@Sun.COM  * Set the PGs callbacks to the default
3788906SEric.Saxe@Sun.COM  */
3798906SEric.Saxe@Sun.COM void
3808906SEric.Saxe@Sun.COM pg_callback_set_defaults(pg_t *pg)
3818906SEric.Saxe@Sun.COM {
3828906SEric.Saxe@Sun.COM 	bcopy(&pg_cb_ops_default, &pg->pg_cb, sizeof (struct pg_cb_ops));
3838906SEric.Saxe@Sun.COM }
3848906SEric.Saxe@Sun.COM 
3858906SEric.Saxe@Sun.COM /*
3863434Sesaxe  * Create a PG of a given class.
3873434Sesaxe  * This routine may block.
3883434Sesaxe  */
3893434Sesaxe pg_t *
3903434Sesaxe pg_create(pg_cid_t cid)
3913434Sesaxe {
3923434Sesaxe 	pg_t	*pg;
3933434Sesaxe 	pgid_t	id;
3943434Sesaxe 
3953434Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
3963434Sesaxe 
3973434Sesaxe 	/*
3983434Sesaxe 	 * Call the class specific PG allocation routine
3993434Sesaxe 	 */
4003434Sesaxe 	pg = PG_ALLOC(cid);
4013434Sesaxe 	pg->pg_class = &pg_classes[cid];
4023434Sesaxe 	pg->pg_relation = pg->pg_class->pgc_relation;
4033434Sesaxe 
4043434Sesaxe 	/*
4053434Sesaxe 	 * Find the next free sequential pg id
4063434Sesaxe 	 */
4073434Sesaxe 	do {
4083434Sesaxe 		if (pg_id_next >= bitset_capacity(&pg_id_set))
4093434Sesaxe 			bitset_resize(&pg_id_set, pg_id_next + 1);
4103434Sesaxe 		id = pg_id_next++;
4113434Sesaxe 	} while (bitset_in_set(&pg_id_set, id));
4123434Sesaxe 
4133434Sesaxe 	pg->pg_id = id;
4143434Sesaxe 	bitset_add(&pg_id_set, pg->pg_id);
4153434Sesaxe 
4163434Sesaxe 	/*
4173434Sesaxe 	 * Create the PG's CPU group
4183434Sesaxe 	 */
4193434Sesaxe 	group_create(&pg->pg_cpus);
4203434Sesaxe 
4218906SEric.Saxe@Sun.COM 	/*
4228906SEric.Saxe@Sun.COM 	 * Initialize the events ops vector
4238906SEric.Saxe@Sun.COM 	 */
4248906SEric.Saxe@Sun.COM 	pg_callback_set_defaults(pg);
4258906SEric.Saxe@Sun.COM 
4263434Sesaxe 	return (pg);
4273434Sesaxe }
4283434Sesaxe 
4293434Sesaxe /*
4303434Sesaxe  * Destroy a PG.
4313434Sesaxe  * This routine may block.
4323434Sesaxe  */
4333434Sesaxe void
4343434Sesaxe pg_destroy(pg_t *pg)
4353434Sesaxe {
4363434Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
4373434Sesaxe 
4383434Sesaxe 	group_destroy(&pg->pg_cpus);
4393434Sesaxe 
4403434Sesaxe 	/*
4413434Sesaxe 	 * Unassign the pg_id
4423434Sesaxe 	 */
4433434Sesaxe 	if (pg_id_next > pg->pg_id)
4443434Sesaxe 		pg_id_next = pg->pg_id;
4453434Sesaxe 	bitset_del(&pg_id_set, pg->pg_id);
4463434Sesaxe 
4473434Sesaxe 	/*
4483434Sesaxe 	 * Invoke the class specific de-allocation routine
4493434Sesaxe 	 */
4503434Sesaxe 	PG_FREE(pg);
4513434Sesaxe }
4523434Sesaxe 
4533434Sesaxe /*
4543434Sesaxe  * Add the CPU "cp" to processor group "pg"
4553434Sesaxe  * This routine may block.
4563434Sesaxe  */
4573434Sesaxe void
4589352SEric.Saxe@Sun.COM pg_cpu_add(pg_t *pg, cpu_t *cp, cpu_pg_t *cpu_pg)
4593434Sesaxe {
4603434Sesaxe 	int	err;
4613434Sesaxe 
4623434Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
4633434Sesaxe 
4643434Sesaxe 	/* This adds the CPU to the PG's CPU group */
4653434Sesaxe 	err = group_add(&pg->pg_cpus, cp, GRP_RESIZE);
4663434Sesaxe 	ASSERT(err == 0);
4673434Sesaxe 
4689352SEric.Saxe@Sun.COM 	/*
4699352SEric.Saxe@Sun.COM 	 * The CPU should be referencing the bootstrap PG data still
4709352SEric.Saxe@Sun.COM 	 * at this point, since this routine may block causing us to
4719352SEric.Saxe@Sun.COM 	 * enter the dispatcher.
4729352SEric.Saxe@Sun.COM 	 */
4739438SEric.Saxe@Sun.COM 	ASSERT(pg_cpu_is_bootstrapped(cp));
4749352SEric.Saxe@Sun.COM 
4753434Sesaxe 	/* This adds the PG to the CPUs PG group */
4769352SEric.Saxe@Sun.COM 	err = group_add(&cpu_pg->pgs, pg, GRP_RESIZE);
4773434Sesaxe 	ASSERT(err == 0);
4783434Sesaxe }
4793434Sesaxe 
4803434Sesaxe /*
4813434Sesaxe  * Remove "cp" from "pg".
4823434Sesaxe  * This routine may block.
4833434Sesaxe  */
4843434Sesaxe void
4859352SEric.Saxe@Sun.COM pg_cpu_delete(pg_t *pg, cpu_t *cp, cpu_pg_t *cpu_pg)
4863434Sesaxe {
4873434Sesaxe 	int	err;
4883434Sesaxe 
4893434Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
4903434Sesaxe 
4913434Sesaxe 	/* Remove the CPU from the PG */
4923434Sesaxe 	err = group_remove(&pg->pg_cpus, cp, GRP_RESIZE);
4933434Sesaxe 	ASSERT(err == 0);
4943434Sesaxe 
4959352SEric.Saxe@Sun.COM 	/*
4969352SEric.Saxe@Sun.COM 	 * The CPU should be referencing the bootstrap PG data still
4979352SEric.Saxe@Sun.COM 	 * at this point, since this routine may block causing us to
4989352SEric.Saxe@Sun.COM 	 * enter the dispatcher.
4999352SEric.Saxe@Sun.COM 	 */
5009438SEric.Saxe@Sun.COM 	ASSERT(pg_cpu_is_bootstrapped(cp));
5019352SEric.Saxe@Sun.COM 
5023434Sesaxe 	/* Remove the PG from the CPU's PG group */
5039352SEric.Saxe@Sun.COM 	err = group_remove(&cpu_pg->pgs, pg, GRP_RESIZE);
5043434Sesaxe 	ASSERT(err == 0);
5053434Sesaxe }
5063434Sesaxe 
5073434Sesaxe /*
5083434Sesaxe  * Allocate a CPU's PG data. This hangs off struct cpu at cpu_pg
5093434Sesaxe  */
5103434Sesaxe static cpu_pg_t *
5113434Sesaxe pg_cpu_data_alloc(void)
5123434Sesaxe {
5133434Sesaxe 	cpu_pg_t	*pgd;
5143434Sesaxe 
5153434Sesaxe 	pgd = kmem_zalloc(sizeof (cpu_pg_t), KM_SLEEP);
5163434Sesaxe 	group_create(&pgd->pgs);
5173434Sesaxe 	group_create(&pgd->cmt_pgs);
5183434Sesaxe 
5193434Sesaxe 	return (pgd);
5203434Sesaxe }
5213434Sesaxe 
5223434Sesaxe /*
5233434Sesaxe  * Free the CPU's PG data.
5243434Sesaxe  */
5253434Sesaxe static void
5263434Sesaxe pg_cpu_data_free(cpu_pg_t *pgd)
5273434Sesaxe {
5283434Sesaxe 	group_destroy(&pgd->pgs);
5293434Sesaxe 	group_destroy(&pgd->cmt_pgs);
5303434Sesaxe 	kmem_free(pgd, sizeof (cpu_pg_t));
5313434Sesaxe }
5323434Sesaxe 
5333434Sesaxe /*
534*11172SHaik.Aftandilian@Sun.COM  * Called when either a new CPU is coming into the system (either
535*11172SHaik.Aftandilian@Sun.COM  * via booting or DR) or when the CPU's PG data is being recalculated.
536*11172SHaik.Aftandilian@Sun.COM  * Allocate its PG data, and notify all registered classes about
5373434Sesaxe  * the new CPU.
5383434Sesaxe  *
539*11172SHaik.Aftandilian@Sun.COM  * If "deferred_init" is B_TRUE, the CPU's PG data will be allocated
540*11172SHaik.Aftandilian@Sun.COM  * and returned, but the "bootstrap" structure will be left in place.
541*11172SHaik.Aftandilian@Sun.COM  * The deferred_init option is used when all CPUs in the system are
542*11172SHaik.Aftandilian@Sun.COM  * using the bootstrap structure as part of the process of recalculating
543*11172SHaik.Aftandilian@Sun.COM  * all PG data. The caller must replace the bootstrap structure with the
544*11172SHaik.Aftandilian@Sun.COM  * allocated PG data before pg_cpu_active is called.
545*11172SHaik.Aftandilian@Sun.COM  *
5463434Sesaxe  * This routine may block.
5473434Sesaxe  */
548*11172SHaik.Aftandilian@Sun.COM cpu_pg_t *
549*11172SHaik.Aftandilian@Sun.COM pg_cpu_init(cpu_t *cp, boolean_t deferred_init)
5503434Sesaxe {
5513434Sesaxe 	pg_cid_t	i;
5529352SEric.Saxe@Sun.COM 	cpu_pg_t	*cpu_pg;
5533434Sesaxe 
5543434Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
5553434Sesaxe 
5563434Sesaxe 	/*
5573434Sesaxe 	 * Allocate and size the per CPU pg data
5589352SEric.Saxe@Sun.COM 	 *
5599352SEric.Saxe@Sun.COM 	 * The CPU's PG data will be populated by the various
5609352SEric.Saxe@Sun.COM 	 * PG classes during the invocation of the PG_CPU_INIT()
5619352SEric.Saxe@Sun.COM 	 * callback below.
5629352SEric.Saxe@Sun.COM 	 *
5639352SEric.Saxe@Sun.COM 	 * Since the we could block and enter the dispatcher during
5649352SEric.Saxe@Sun.COM 	 * this process, the CPU will continue to reference the bootstrap
5659352SEric.Saxe@Sun.COM 	 * PG data until all the initialization completes.
5663434Sesaxe 	 */
5679438SEric.Saxe@Sun.COM 	ASSERT(pg_cpu_is_bootstrapped(cp));
5689352SEric.Saxe@Sun.COM 
5699352SEric.Saxe@Sun.COM 	cpu_pg = pg_cpu_data_alloc();
5703434Sesaxe 
5713434Sesaxe 	/*
5723434Sesaxe 	 * Notify all registered classes about the new CPU
5733434Sesaxe 	 */
5743434Sesaxe 	for (i = 0; i < pg_nclasses; i++)
5759352SEric.Saxe@Sun.COM 		PG_CPU_INIT(i, cp, cpu_pg);
5769352SEric.Saxe@Sun.COM 
5779352SEric.Saxe@Sun.COM 	/*
5789352SEric.Saxe@Sun.COM 	 * The CPU's PG data is now ready to use.
5799352SEric.Saxe@Sun.COM 	 */
580*11172SHaik.Aftandilian@Sun.COM 	if (deferred_init == B_FALSE)
581*11172SHaik.Aftandilian@Sun.COM 		cp->cpu_pg = cpu_pg;
582*11172SHaik.Aftandilian@Sun.COM 
583*11172SHaik.Aftandilian@Sun.COM 	return (cpu_pg);
5843434Sesaxe }
5853434Sesaxe 
5863434Sesaxe /*
587*11172SHaik.Aftandilian@Sun.COM  * Either this CPU is being deleted from the system or its PG data is
588*11172SHaik.Aftandilian@Sun.COM  * being recalculated. Notify the classes and free up the CPU's PG data.
589*11172SHaik.Aftandilian@Sun.COM  *
590*11172SHaik.Aftandilian@Sun.COM  * If "cpu_pg_deferred" is non-NULL, it points to the CPU's PG data and
591*11172SHaik.Aftandilian@Sun.COM  * serves to indicate that this CPU is already using the bootstrap
592*11172SHaik.Aftandilian@Sun.COM  * stucture. Used as part of the process to recalculate the PG data for
593*11172SHaik.Aftandilian@Sun.COM  * all CPUs in the system.
5943434Sesaxe  */
5953434Sesaxe void
596*11172SHaik.Aftandilian@Sun.COM pg_cpu_fini(cpu_t *cp, cpu_pg_t *cpu_pg_deferred)
5973434Sesaxe {
5983434Sesaxe 	pg_cid_t	i;
5999352SEric.Saxe@Sun.COM 	cpu_pg_t	*cpu_pg;
6003434Sesaxe 
6013434Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
6023434Sesaxe 
603*11172SHaik.Aftandilian@Sun.COM 	if (cpu_pg_deferred == NULL) {
604*11172SHaik.Aftandilian@Sun.COM 		cpu_pg = cp->cpu_pg;
605*11172SHaik.Aftandilian@Sun.COM 
606*11172SHaik.Aftandilian@Sun.COM 		/*
607*11172SHaik.Aftandilian@Sun.COM 		 * This can happen if the CPU coming into the system
608*11172SHaik.Aftandilian@Sun.COM 		 * failed to power on.
609*11172SHaik.Aftandilian@Sun.COM 		 */
610*11172SHaik.Aftandilian@Sun.COM 		if (cpu_pg == NULL || pg_cpu_is_bootstrapped(cp))
611*11172SHaik.Aftandilian@Sun.COM 			return;
6129352SEric.Saxe@Sun.COM 
613*11172SHaik.Aftandilian@Sun.COM 		/*
614*11172SHaik.Aftandilian@Sun.COM 		 * Have the CPU reference the bootstrap PG data to survive
615*11172SHaik.Aftandilian@Sun.COM 		 * the dispatcher should it block from here on out.
616*11172SHaik.Aftandilian@Sun.COM 		 */
617*11172SHaik.Aftandilian@Sun.COM 		pg_cpu_bootstrap(cp);
618*11172SHaik.Aftandilian@Sun.COM 	} else {
619*11172SHaik.Aftandilian@Sun.COM 		ASSERT(pg_cpu_is_bootstrapped(cp));
620*11172SHaik.Aftandilian@Sun.COM 		cpu_pg = cpu_pg_deferred;
621*11172SHaik.Aftandilian@Sun.COM 	}
6229352SEric.Saxe@Sun.COM 
6233434Sesaxe 	for (i = 0; i < pg_nclasses; i++)
6249352SEric.Saxe@Sun.COM 		PG_CPU_FINI(i, cp, cpu_pg);
6253434Sesaxe 
6269352SEric.Saxe@Sun.COM 	pg_cpu_data_free(cpu_pg);
6273434Sesaxe }
6283434Sesaxe 
6293434Sesaxe /*
6303434Sesaxe  * This CPU is becoming active (online)
6313434Sesaxe  * This routine may not block as it is called from paused CPUs
6323434Sesaxe  * context.
6333434Sesaxe  */
6343434Sesaxe void
6353434Sesaxe pg_cpu_active(cpu_t *cp)
6363434Sesaxe {
6373434Sesaxe 	pg_cid_t	i;
6383434Sesaxe 
6393434Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
6403434Sesaxe 
6413434Sesaxe 	/*
6423434Sesaxe 	 * Notify all registered classes about the new CPU
6433434Sesaxe 	 */
6443434Sesaxe 	for (i = 0; i < pg_nclasses; i++)
6453434Sesaxe 		PG_CPU_ACTIVE(i, cp);
6463434Sesaxe }
6473434Sesaxe 
6483434Sesaxe /*
6493434Sesaxe  * This CPU is going inactive (offline)
6503434Sesaxe  * This routine may not block, as it is called from paused
6513434Sesaxe  * CPUs context.
6523434Sesaxe  */
6533434Sesaxe void
6543434Sesaxe pg_cpu_inactive(cpu_t *cp)
6553434Sesaxe {
6563434Sesaxe 	pg_cid_t	i;
6573434Sesaxe 
6583434Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
6593434Sesaxe 
6603434Sesaxe 	/*
6613434Sesaxe 	 * Notify all registered classes about the new CPU
6623434Sesaxe 	 */
6633434Sesaxe 	for (i = 0; i < pg_nclasses; i++)
6643434Sesaxe 		PG_CPU_INACTIVE(i, cp);
6653434Sesaxe }
6663434Sesaxe 
6673434Sesaxe /*
6683434Sesaxe  * Invoked when the CPU is about to move into the partition
6693434Sesaxe  * This routine may block.
6703434Sesaxe  */
6713434Sesaxe void
6723434Sesaxe pg_cpupart_in(cpu_t *cp, cpupart_t *pp)
6733434Sesaxe {
6743434Sesaxe 	int	i;
6753434Sesaxe 
6763434Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
6773434Sesaxe 
6783434Sesaxe 	/*
6793434Sesaxe 	 * Notify all registered classes that the
6803434Sesaxe 	 * CPU is about to enter the CPU partition
6813434Sesaxe 	 */
6823434Sesaxe 	for (i = 0; i < pg_nclasses; i++)
6833434Sesaxe 		PG_CPUPART_IN(i, cp, pp);
6843434Sesaxe }
6853434Sesaxe 
6863434Sesaxe /*
6873434Sesaxe  * Invoked when the CPU is about to move out of the partition
6883434Sesaxe  * This routine may block.
6893434Sesaxe  */
6903434Sesaxe /*ARGSUSED*/
6913434Sesaxe void
6923434Sesaxe pg_cpupart_out(cpu_t *cp, cpupart_t *pp)
6933434Sesaxe {
6943434Sesaxe 	int	i;
6953434Sesaxe 
6963434Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
6973434Sesaxe 
6983434Sesaxe 	/*
6993434Sesaxe 	 * Notify all registered classes that the
7003434Sesaxe 	 * CPU is about to leave the CPU partition
7013434Sesaxe 	 */
7023434Sesaxe 	for (i = 0; i < pg_nclasses; i++)
7033434Sesaxe 		PG_CPUPART_OUT(i, cp, pp);
7043434Sesaxe }
7053434Sesaxe 
7063434Sesaxe /*
7073434Sesaxe  * Invoked when the CPU is *moving* partitions.
7083434Sesaxe  *
7093434Sesaxe  * This routine may not block, as it is called from paused CPUs
7103434Sesaxe  * context.
7113434Sesaxe  */
7123434Sesaxe void
7133434Sesaxe pg_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp)
7143434Sesaxe {
7153434Sesaxe 	int	i;
7163434Sesaxe 
7173434Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
7183434Sesaxe 
7193434Sesaxe 	/*
7203434Sesaxe 	 * Notify all registered classes that the
7213434Sesaxe 	 * CPU is about to leave the CPU partition
7223434Sesaxe 	 */
7233434Sesaxe 	for (i = 0; i < pg_nclasses; i++)
7243434Sesaxe 		PG_CPUPART_MOVE(i, cp, oldpp, newpp);
7253434Sesaxe }
7263434Sesaxe 
7273434Sesaxe /*
7288906SEric.Saxe@Sun.COM  * Return a class specific string describing a policy implemented
7298906SEric.Saxe@Sun.COM  * across this PG
7308906SEric.Saxe@Sun.COM  */
7318906SEric.Saxe@Sun.COM char *
7328906SEric.Saxe@Sun.COM pg_policy_name(pg_t *pg)
7338906SEric.Saxe@Sun.COM {
7348906SEric.Saxe@Sun.COM 	char *str;
7358906SEric.Saxe@Sun.COM 	if ((str = PG_POLICY_NAME(pg)) != NULL)
7368906SEric.Saxe@Sun.COM 		return (str);
7378906SEric.Saxe@Sun.COM 
7388906SEric.Saxe@Sun.COM 	return ("N/A");
7398906SEric.Saxe@Sun.COM }
7408906SEric.Saxe@Sun.COM 
7418906SEric.Saxe@Sun.COM /*
7423434Sesaxe  * Provide the specified CPU a bootstrap pg
7433434Sesaxe  * This is needed to allow sane behaviour if any PG consuming
7443434Sesaxe  * code needs to deal with a partially initialized CPU
7453434Sesaxe  */
7463434Sesaxe void
7473434Sesaxe pg_cpu_bootstrap(cpu_t *cp)
7483434Sesaxe {
7493434Sesaxe 	cp->cpu_pg = &bootstrap_pg_data;
7503434Sesaxe }
7513434Sesaxe 
7529438SEric.Saxe@Sun.COM /*
7539438SEric.Saxe@Sun.COM  * Return non-zero if the specified CPU is bootstrapped,
7549438SEric.Saxe@Sun.COM  * which means it's CPU specific PG data has not yet been
7559438SEric.Saxe@Sun.COM  * fully constructed.
7569438SEric.Saxe@Sun.COM  */
7579438SEric.Saxe@Sun.COM int
7589438SEric.Saxe@Sun.COM pg_cpu_is_bootstrapped(cpu_t *cp)
7599438SEric.Saxe@Sun.COM {
7609438SEric.Saxe@Sun.COM 	return (cp->cpu_pg == &bootstrap_pg_data);
7619438SEric.Saxe@Sun.COM }
7629438SEric.Saxe@Sun.COM 
7633434Sesaxe /*ARGSUSED*/
7643434Sesaxe static pg_t *
7653434Sesaxe pg_alloc_default(pg_class_t class)
7663434Sesaxe {
7673434Sesaxe 	return (kmem_zalloc(sizeof (pg_t), KM_SLEEP));
7683434Sesaxe }
7693434Sesaxe 
7703434Sesaxe /*ARGSUSED*/
7713434Sesaxe static void
7723434Sesaxe pg_free_default(struct pg *pg)
7733434Sesaxe {
7743434Sesaxe 	kmem_free(pg, sizeof (pg_t));
7753434Sesaxe }
7768906SEric.Saxe@Sun.COM 
7778906SEric.Saxe@Sun.COM static void
7788906SEric.Saxe@Sun.COM pg_null_op()
7798906SEric.Saxe@Sun.COM {
7808906SEric.Saxe@Sun.COM }
7818906SEric.Saxe@Sun.COM 
7828906SEric.Saxe@Sun.COM /*
7838906SEric.Saxe@Sun.COM  * Invoke the "thread switch" callback for each of the CPU's PGs
7848906SEric.Saxe@Sun.COM  * This is invoked from the dispatcher swtch() routine, which is called
7858906SEric.Saxe@Sun.COM  * when a thread running an a CPU should switch to another thread.
7868906SEric.Saxe@Sun.COM  * "cp" is the CPU on which the thread switch is happening
7878906SEric.Saxe@Sun.COM  * "now" is an unscaled hrtime_t timestamp taken in swtch()
7888906SEric.Saxe@Sun.COM  * "old" and "new" are the outgoing and incoming threads, respectively.
7898906SEric.Saxe@Sun.COM  */
7908906SEric.Saxe@Sun.COM void
7918906SEric.Saxe@Sun.COM pg_ev_thread_swtch(struct cpu *cp, hrtime_t now, kthread_t *old, kthread_t *new)
7928906SEric.Saxe@Sun.COM {
7938906SEric.Saxe@Sun.COM 	int	i, sz;
7948906SEric.Saxe@Sun.COM 	group_t	*grp;
7958906SEric.Saxe@Sun.COM 	pg_t	*pg;
7968906SEric.Saxe@Sun.COM 
7978906SEric.Saxe@Sun.COM 	grp = &cp->cpu_pg->pgs;
7988906SEric.Saxe@Sun.COM 	sz = GROUP_SIZE(grp);
7998906SEric.Saxe@Sun.COM 	for (i = 0; i < sz; i++) {
8008906SEric.Saxe@Sun.COM 		pg = GROUP_ACCESS(grp, i);
8018906SEric.Saxe@Sun.COM 		pg->pg_cb.thread_swtch(pg, cp, now, old, new);
8028906SEric.Saxe@Sun.COM 	}
8038906SEric.Saxe@Sun.COM }
8048906SEric.Saxe@Sun.COM 
8058906SEric.Saxe@Sun.COM /*
8068906SEric.Saxe@Sun.COM  * Invoke the "thread remain" callback for each of the CPU's PGs.
8078906SEric.Saxe@Sun.COM  * This is called from the dispatcher's swtch() routine when a thread
8088906SEric.Saxe@Sun.COM  * running on the CPU "cp" is switching to itself, which can happen as an
8098906SEric.Saxe@Sun.COM  * artifact of the thread's timeslice expiring.
8108906SEric.Saxe@Sun.COM  */
8118906SEric.Saxe@Sun.COM void
8128906SEric.Saxe@Sun.COM pg_ev_thread_remain(struct cpu *cp, kthread_t *t)
8138906SEric.Saxe@Sun.COM {
8148906SEric.Saxe@Sun.COM 	int	i, sz;
8158906SEric.Saxe@Sun.COM 	group_t	*grp;
8168906SEric.Saxe@Sun.COM 	pg_t	*pg;
8178906SEric.Saxe@Sun.COM 
8188906SEric.Saxe@Sun.COM 	grp = &cp->cpu_pg->pgs;
8198906SEric.Saxe@Sun.COM 	sz = GROUP_SIZE(grp);
8208906SEric.Saxe@Sun.COM 	for (i = 0; i < sz; i++) {
8218906SEric.Saxe@Sun.COM 		pg = GROUP_ACCESS(grp, i);
8228906SEric.Saxe@Sun.COM 		pg->pg_cb.thread_remain(pg, cp, t);
8238906SEric.Saxe@Sun.COM 	}
8248906SEric.Saxe@Sun.COM }
825