13434Sesaxe /*
23434Sesaxe * CDDL HEADER START
33434Sesaxe *
43434Sesaxe * The contents of this file are subject to the terms of the
53434Sesaxe * Common Development and Distribution License (the "License").
63434Sesaxe * You may not use this file except in compliance with the License.
73434Sesaxe *
83434Sesaxe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93434Sesaxe * or http://www.opensolaris.org/os/licensing.
103434Sesaxe * See the License for the specific language governing permissions
113434Sesaxe * and limitations under the License.
123434Sesaxe *
133434Sesaxe * When distributing Covered Code, include this CDDL HEADER in each
143434Sesaxe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153434Sesaxe * If applicable, add the following below this CDDL HEADER, with the
163434Sesaxe * fields enclosed by brackets "[]" replaced with your own identifying
173434Sesaxe * information: Portions Copyright [yyyy] [name of copyright owner]
183434Sesaxe *
193434Sesaxe * CDDL HEADER END
203434Sesaxe */
213434Sesaxe /*
22*13124SAlexander.Kolbasov@Sun.COM * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
233434Sesaxe */
243434Sesaxe
253434Sesaxe #include <sys/systm.h>
263434Sesaxe #include <sys/types.h>
273434Sesaxe #include <sys/param.h>
283434Sesaxe #include <sys/thread.h>
293434Sesaxe #include <sys/cpuvar.h>
303434Sesaxe #include <sys/cpupart.h>
313434Sesaxe #include <sys/kmem.h>
323434Sesaxe #include <sys/cmn_err.h>
333434Sesaxe #include <sys/kstat.h>
343434Sesaxe #include <sys/processor.h>
353434Sesaxe #include <sys/disp.h>
363434Sesaxe #include <sys/group.h>
373434Sesaxe #include <sys/pghw.h>
383434Sesaxe #include <sys/bitset.h>
393434Sesaxe #include <sys/lgrp.h>
403434Sesaxe #include <sys/cmt.h>
418906SEric.Saxe@Sun.COM #include <sys/cpu_pm.h>
423434Sesaxe
433434Sesaxe /*
443434Sesaxe * CMT scheduler / dispatcher support
453434Sesaxe *
463434Sesaxe * This file implements CMT scheduler support using Processor Groups.
473434Sesaxe * The CMT processor group class creates and maintains the CMT class
483434Sesaxe * specific processor group pg_cmt_t.
493434Sesaxe *
503434Sesaxe * ---------------------------- <-- pg_cmt_t *
513434Sesaxe * | pghw_t |
523434Sesaxe * ----------------------------
533434Sesaxe * | CMT class specific data |
543434Sesaxe * | - hierarchy linkage |
553434Sesaxe * | - CMT load balancing data|
563434Sesaxe * | - active CPU group/bitset|
573434Sesaxe * ----------------------------
583434Sesaxe *
593434Sesaxe * The scheduler/dispatcher leverages knowledge of the performance
603434Sesaxe * relevant CMT sharing relationships existing between cpus to implement
618906SEric.Saxe@Sun.COM * optimized affinity, load balancing, and coalescence policies.
623434Sesaxe *
633434Sesaxe * Load balancing policy seeks to improve performance by minimizing
648906SEric.Saxe@Sun.COM * contention over shared processor resources / facilities, Affinity
658906SEric.Saxe@Sun.COM * policies seek to improve cache and TLB utilization. Coalescence
668906SEric.Saxe@Sun.COM * policies improve resource utilization and ultimately power efficiency.
673434Sesaxe *
683434Sesaxe * The CMT PGs created by this class are already arranged into a
693434Sesaxe * hierarchy (which is done in the pghw layer). To implement the top-down
703434Sesaxe * CMT load balancing algorithm, the CMT PGs additionally maintain
713434Sesaxe * parent, child and sibling hierarchy relationships.
723434Sesaxe * Parent PGs always contain a superset of their children(s) resources,
733434Sesaxe * each PG can have at most one parent, and siblings are the group of PGs
743434Sesaxe * sharing the same parent.
753434Sesaxe *
769746SEric.Saxe@Sun.COM * On UMA based systems, the CMT load balancing algorithm begins by balancing
779746SEric.Saxe@Sun.COM * load across the group of top level PGs in the system hierarchy.
789746SEric.Saxe@Sun.COM * On NUMA systems, the CMT load balancing algorithm balances load across the
799746SEric.Saxe@Sun.COM * group of top level PGs in each leaf lgroup...but for root homed threads,
809746SEric.Saxe@Sun.COM * is willing to balance against all the top level PGs in the system.
819746SEric.Saxe@Sun.COM *
829746SEric.Saxe@Sun.COM * Groups of top level PGs are maintained to implement the above, one for each
839746SEric.Saxe@Sun.COM * leaf lgroup (containing the top level PGs in that lgroup), and one (for the
849746SEric.Saxe@Sun.COM * root lgroup) that contains all the top level PGs in the system.
853434Sesaxe */
863676Sesaxe static cmt_lgrp_t *cmt_lgrps = NULL; /* cmt_lgrps list head */
873676Sesaxe static cmt_lgrp_t *cpu0_lgrp = NULL; /* boot CPU's initial lgrp */
883676Sesaxe /* used for null_proc_lpa */
898906SEric.Saxe@Sun.COM cmt_lgrp_t *cmt_root = NULL; /* Reference to root cmt pg */
903434Sesaxe
913676Sesaxe static int is_cpu0 = 1; /* true if this is boot CPU context */
923676Sesaxe
933676Sesaxe /*
948906SEric.Saxe@Sun.COM * Array of hardware sharing relationships that are blacklisted.
959746SEric.Saxe@Sun.COM * CMT scheduling optimizations won't be performed for blacklisted sharing
969746SEric.Saxe@Sun.COM * relationships.
978906SEric.Saxe@Sun.COM */
988906SEric.Saxe@Sun.COM static int cmt_hw_blacklisted[PGHW_NUM_COMPONENTS];
998906SEric.Saxe@Sun.COM
1008906SEric.Saxe@Sun.COM /*
1013676Sesaxe * Set this to non-zero to disable CMT scheduling
1023676Sesaxe * This must be done via kmdb -d, as /etc/system will be too late
1033676Sesaxe */
1048906SEric.Saxe@Sun.COM int cmt_sched_disabled = 0;
1053434Sesaxe
1069036SEric.Saxe@Sun.COM /*
1079036SEric.Saxe@Sun.COM * Status codes for CMT lineage validation
1089036SEric.Saxe@Sun.COM * See pg_cmt_lineage_validate() below
1099036SEric.Saxe@Sun.COM */
1109036SEric.Saxe@Sun.COM typedef enum cmt_lineage_validation {
1119036SEric.Saxe@Sun.COM CMT_LINEAGE_VALID,
1129036SEric.Saxe@Sun.COM CMT_LINEAGE_NON_CONCENTRIC,
1139036SEric.Saxe@Sun.COM CMT_LINEAGE_PG_SPANS_LGRPS,
1149036SEric.Saxe@Sun.COM CMT_LINEAGE_NON_PROMOTABLE,
1159036SEric.Saxe@Sun.COM CMT_LINEAGE_REPAIRED,
1169036SEric.Saxe@Sun.COM CMT_LINEAGE_UNRECOVERABLE
1179036SEric.Saxe@Sun.COM } cmt_lineage_validation_t;
1189036SEric.Saxe@Sun.COM
1199036SEric.Saxe@Sun.COM /*
1209036SEric.Saxe@Sun.COM * Status of the current lineage under construction.
1219036SEric.Saxe@Sun.COM * One must be holding cpu_lock to change this.
1229036SEric.Saxe@Sun.COM */
1239036SEric.Saxe@Sun.COM cmt_lineage_validation_t cmt_lineage_status = CMT_LINEAGE_VALID;
1249036SEric.Saxe@Sun.COM
1259036SEric.Saxe@Sun.COM /*
1269036SEric.Saxe@Sun.COM * Power domain definitions (on x86) are defined by ACPI, and
1279036SEric.Saxe@Sun.COM * therefore may be subject to BIOS bugs.
1289036SEric.Saxe@Sun.COM */
1299036SEric.Saxe@Sun.COM #define PG_CMT_HW_SUSPECT(hw) PGHW_IS_PM_DOMAIN(hw)
1309036SEric.Saxe@Sun.COM
1319036SEric.Saxe@Sun.COM /*
1329036SEric.Saxe@Sun.COM * Macro to test if PG is managed by the CMT PG class
1339036SEric.Saxe@Sun.COM */
1349036SEric.Saxe@Sun.COM #define IS_CMT_PG(pg) (((pg_t *)(pg))->pg_class->pgc_id == pg_cmt_class_id)
1359036SEric.Saxe@Sun.COM
1363434Sesaxe static pg_cid_t pg_cmt_class_id; /* PG class id */
1373434Sesaxe
1383434Sesaxe static pg_t *pg_cmt_alloc();
1393434Sesaxe static void pg_cmt_free(pg_t *);
1409352SEric.Saxe@Sun.COM static void pg_cmt_cpu_init(cpu_t *, cpu_pg_t *);
1419352SEric.Saxe@Sun.COM static void pg_cmt_cpu_fini(cpu_t *, cpu_pg_t *);
1423434Sesaxe static void pg_cmt_cpu_active(cpu_t *);
1433434Sesaxe static void pg_cmt_cpu_inactive(cpu_t *);
1443434Sesaxe static void pg_cmt_cpupart_in(cpu_t *, cpupart_t *);
1453434Sesaxe static void pg_cmt_cpupart_move(cpu_t *, cpupart_t *, cpupart_t *);
1468906SEric.Saxe@Sun.COM static char *pg_cmt_policy_name(pg_t *);
1478906SEric.Saxe@Sun.COM static void pg_cmt_hier_sort(pg_cmt_t **, int);
1488906SEric.Saxe@Sun.COM static pg_cmt_t *pg_cmt_hier_rank(pg_cmt_t *, pg_cmt_t *);
1493434Sesaxe static int pg_cmt_cpu_belongs(pg_t *, cpu_t *);
1503434Sesaxe static int pg_cmt_hw(pghw_type_t);
1513434Sesaxe static cmt_lgrp_t *pg_cmt_find_lgrp(lgrp_handle_t);
1523676Sesaxe static cmt_lgrp_t *pg_cmt_lgrp_create(lgrp_handle_t);
1538906SEric.Saxe@Sun.COM static void cmt_ev_thread_swtch(pg_t *, cpu_t *, hrtime_t,
1548906SEric.Saxe@Sun.COM kthread_t *, kthread_t *);
1558906SEric.Saxe@Sun.COM static void cmt_ev_thread_swtch_pwr(pg_t *, cpu_t *, hrtime_t,
1568906SEric.Saxe@Sun.COM kthread_t *, kthread_t *);
1578906SEric.Saxe@Sun.COM static void cmt_ev_thread_remain_pwr(pg_t *, cpu_t *, kthread_t *);
1589438SEric.Saxe@Sun.COM static cmt_lineage_validation_t pg_cmt_lineage_validate(pg_cmt_t **, int *,
1599438SEric.Saxe@Sun.COM cpu_pg_t *);
1603434Sesaxe
1618906SEric.Saxe@Sun.COM /*
1623434Sesaxe * CMT PG ops
1633434Sesaxe */
1643434Sesaxe struct pg_ops pg_ops_cmt = {
1653434Sesaxe pg_cmt_alloc,
1663434Sesaxe pg_cmt_free,
1673434Sesaxe pg_cmt_cpu_init,
1683434Sesaxe pg_cmt_cpu_fini,
1693434Sesaxe pg_cmt_cpu_active,
1703434Sesaxe pg_cmt_cpu_inactive,
1713434Sesaxe pg_cmt_cpupart_in,
1723434Sesaxe NULL, /* cpupart_out */
1733434Sesaxe pg_cmt_cpupart_move,
1743434Sesaxe pg_cmt_cpu_belongs,
1758906SEric.Saxe@Sun.COM pg_cmt_policy_name,
1763434Sesaxe };
1773434Sesaxe
1783434Sesaxe /*
1793434Sesaxe * Initialize the CMT PG class
1803434Sesaxe */
1813434Sesaxe void
pg_cmt_class_init(void)1823434Sesaxe pg_cmt_class_init(void)
1833434Sesaxe {
1843434Sesaxe if (cmt_sched_disabled)
1853434Sesaxe return;
1863434Sesaxe
1873434Sesaxe pg_cmt_class_id = pg_class_register("cmt", &pg_ops_cmt, PGR_PHYSICAL);
1883434Sesaxe }
1893434Sesaxe
1903434Sesaxe /*
1913434Sesaxe * Called to indicate a new CPU has started up so
1923434Sesaxe * that either t0 or the slave startup thread can
1933434Sesaxe * be accounted for.
1943434Sesaxe */
1953434Sesaxe void
pg_cmt_cpu_startup(cpu_t * cp)1963434Sesaxe pg_cmt_cpu_startup(cpu_t *cp)
1973434Sesaxe {
1988906SEric.Saxe@Sun.COM pg_ev_thread_swtch(cp, gethrtime_unscaled(), cp->cpu_idle_thread,
1998906SEric.Saxe@Sun.COM cp->cpu_thread);
2003434Sesaxe }
2013434Sesaxe
2023434Sesaxe /*
2033434Sesaxe * Return non-zero if thread can migrate between "from" and "to"
2043434Sesaxe * without a performance penalty
2053434Sesaxe */
2063434Sesaxe int
pg_cmt_can_migrate(cpu_t * from,cpu_t * to)2073434Sesaxe pg_cmt_can_migrate(cpu_t *from, cpu_t *to)
2083434Sesaxe {
2093434Sesaxe if (from->cpu_physid->cpu_cacheid ==
2103434Sesaxe to->cpu_physid->cpu_cacheid)
2113434Sesaxe return (1);
2123434Sesaxe return (0);
2133434Sesaxe }
2143434Sesaxe
2153434Sesaxe /*
2163434Sesaxe * CMT class specific PG allocation
2173434Sesaxe */
2183434Sesaxe static pg_t *
pg_cmt_alloc(void)2193434Sesaxe pg_cmt_alloc(void)
2203434Sesaxe {
2213434Sesaxe return (kmem_zalloc(sizeof (pg_cmt_t), KM_NOSLEEP));
2223434Sesaxe }
2233434Sesaxe
2243434Sesaxe /*
2253434Sesaxe * Class specific PG de-allocation
2263434Sesaxe */
2273434Sesaxe static void
pg_cmt_free(pg_t * pg)2283434Sesaxe pg_cmt_free(pg_t *pg)
2293434Sesaxe {
2303434Sesaxe ASSERT(pg != NULL);
2313434Sesaxe ASSERT(IS_CMT_PG(pg));
2323434Sesaxe
2333434Sesaxe kmem_free((pg_cmt_t *)pg, sizeof (pg_cmt_t));
2343434Sesaxe }
2353434Sesaxe
2363434Sesaxe /*
2378906SEric.Saxe@Sun.COM * Given a hardware sharing relationship, return which dispatcher
2388906SEric.Saxe@Sun.COM * policies should be implemented to optimize performance and efficiency
2398906SEric.Saxe@Sun.COM */
2408906SEric.Saxe@Sun.COM static pg_cmt_policy_t
pg_cmt_policy(pghw_type_t hw)2418906SEric.Saxe@Sun.COM pg_cmt_policy(pghw_type_t hw)
2428906SEric.Saxe@Sun.COM {
2438906SEric.Saxe@Sun.COM pg_cmt_policy_t p;
2448906SEric.Saxe@Sun.COM
2458906SEric.Saxe@Sun.COM /*
2468906SEric.Saxe@Sun.COM * Give the platform a chance to override the default
2478906SEric.Saxe@Sun.COM */
2488906SEric.Saxe@Sun.COM if ((p = pg_plat_cmt_policy(hw)) != CMT_NO_POLICY)
2498906SEric.Saxe@Sun.COM return (p);
2508906SEric.Saxe@Sun.COM
2518906SEric.Saxe@Sun.COM switch (hw) {
2528906SEric.Saxe@Sun.COM case PGHW_IPIPE:
2538906SEric.Saxe@Sun.COM case PGHW_FPU:
25410947SSrihari.Venkatesan@Sun.COM case PGHW_PROCNODE:
2558906SEric.Saxe@Sun.COM case PGHW_CHIP:
2568906SEric.Saxe@Sun.COM return (CMT_BALANCE);
2578906SEric.Saxe@Sun.COM case PGHW_CACHE:
258*13124SAlexander.Kolbasov@Sun.COM return (CMT_AFFINITY | CMT_BALANCE);
2598906SEric.Saxe@Sun.COM case PGHW_POW_ACTIVE:
2608906SEric.Saxe@Sun.COM case PGHW_POW_IDLE:
2618906SEric.Saxe@Sun.COM return (CMT_BALANCE);
2628906SEric.Saxe@Sun.COM default:
2638906SEric.Saxe@Sun.COM return (CMT_NO_POLICY);
2648906SEric.Saxe@Sun.COM }
2658906SEric.Saxe@Sun.COM }
2668906SEric.Saxe@Sun.COM
2678906SEric.Saxe@Sun.COM /*
2688906SEric.Saxe@Sun.COM * Rank the importance of optimizing for the pg1 relationship vs.
2698906SEric.Saxe@Sun.COM * the pg2 relationship.
2708906SEric.Saxe@Sun.COM */
2718906SEric.Saxe@Sun.COM static pg_cmt_t *
pg_cmt_hier_rank(pg_cmt_t * pg1,pg_cmt_t * pg2)2728906SEric.Saxe@Sun.COM pg_cmt_hier_rank(pg_cmt_t *pg1, pg_cmt_t *pg2)
2738906SEric.Saxe@Sun.COM {
2748906SEric.Saxe@Sun.COM pghw_type_t hw1 = ((pghw_t *)pg1)->pghw_hw;
2758906SEric.Saxe@Sun.COM pghw_type_t hw2 = ((pghw_t *)pg2)->pghw_hw;
2768906SEric.Saxe@Sun.COM
2778906SEric.Saxe@Sun.COM /*
2788906SEric.Saxe@Sun.COM * A power domain is only important if CPUPM is enabled.
2798906SEric.Saxe@Sun.COM */
2808906SEric.Saxe@Sun.COM if (cpupm_get_policy() == CPUPM_POLICY_DISABLED) {
2818906SEric.Saxe@Sun.COM if (PGHW_IS_PM_DOMAIN(hw1) && !PGHW_IS_PM_DOMAIN(hw2))
2828906SEric.Saxe@Sun.COM return (pg2);
2838906SEric.Saxe@Sun.COM if (PGHW_IS_PM_DOMAIN(hw2) && !PGHW_IS_PM_DOMAIN(hw1))
2848906SEric.Saxe@Sun.COM return (pg1);
2858906SEric.Saxe@Sun.COM }
2868906SEric.Saxe@Sun.COM
2878906SEric.Saxe@Sun.COM /*
2888906SEric.Saxe@Sun.COM * Otherwise, ask the platform
2898906SEric.Saxe@Sun.COM */
2908906SEric.Saxe@Sun.COM if (pg_plat_hw_rank(hw1, hw2) == hw1)
2918906SEric.Saxe@Sun.COM return (pg1);
2928906SEric.Saxe@Sun.COM else
2938906SEric.Saxe@Sun.COM return (pg2);
2948906SEric.Saxe@Sun.COM }
2958906SEric.Saxe@Sun.COM
2968906SEric.Saxe@Sun.COM /*
2978906SEric.Saxe@Sun.COM * Initialize CMT callbacks for the given PG
2988906SEric.Saxe@Sun.COM */
2998906SEric.Saxe@Sun.COM static void
cmt_callback_init(pg_t * pg)3008906SEric.Saxe@Sun.COM cmt_callback_init(pg_t *pg)
3018906SEric.Saxe@Sun.COM {
3029746SEric.Saxe@Sun.COM /*
3039746SEric.Saxe@Sun.COM * Stick with the default callbacks if there isn't going to be
3049746SEric.Saxe@Sun.COM * any CMT thread placement optimizations implemented.
3059746SEric.Saxe@Sun.COM */
3069746SEric.Saxe@Sun.COM if (((pg_cmt_t *)pg)->cmt_policy == CMT_NO_POLICY)
3079746SEric.Saxe@Sun.COM return;
3089746SEric.Saxe@Sun.COM
3098906SEric.Saxe@Sun.COM switch (((pghw_t *)pg)->pghw_hw) {
3108906SEric.Saxe@Sun.COM case PGHW_POW_ACTIVE:
3118906SEric.Saxe@Sun.COM pg->pg_cb.thread_swtch = cmt_ev_thread_swtch_pwr;
3128906SEric.Saxe@Sun.COM pg->pg_cb.thread_remain = cmt_ev_thread_remain_pwr;
3138906SEric.Saxe@Sun.COM break;
3148906SEric.Saxe@Sun.COM default:
3158906SEric.Saxe@Sun.COM pg->pg_cb.thread_swtch = cmt_ev_thread_swtch;
3168906SEric.Saxe@Sun.COM
3178906SEric.Saxe@Sun.COM }
3188906SEric.Saxe@Sun.COM }
3198906SEric.Saxe@Sun.COM
3208906SEric.Saxe@Sun.COM /*
3218906SEric.Saxe@Sun.COM * Promote PG above it's current parent.
3229438SEric.Saxe@Sun.COM * This is only legal if PG has an equal or greater number of CPUs than its
3239438SEric.Saxe@Sun.COM * parent.
3249438SEric.Saxe@Sun.COM *
3259438SEric.Saxe@Sun.COM * This routine operates on the CPU specific processor group data (for the CPUs
3269438SEric.Saxe@Sun.COM * in the PG being promoted), and may be invoked from a context where one CPU's
3279438SEric.Saxe@Sun.COM * PG data is under construction. In this case the argument "pgdata", if not
3289438SEric.Saxe@Sun.COM * NULL, is a reference to the CPU's under-construction PG data.
3293434Sesaxe */
3308906SEric.Saxe@Sun.COM static void
cmt_hier_promote(pg_cmt_t * pg,cpu_pg_t * pgdata)3319438SEric.Saxe@Sun.COM cmt_hier_promote(pg_cmt_t *pg, cpu_pg_t *pgdata)
3323434Sesaxe {
3338906SEric.Saxe@Sun.COM pg_cmt_t *parent;
3348906SEric.Saxe@Sun.COM group_t *children;
3358906SEric.Saxe@Sun.COM cpu_t *cpu;
3368906SEric.Saxe@Sun.COM group_iter_t iter;
3378906SEric.Saxe@Sun.COM pg_cpu_itr_t cpu_iter;
3388906SEric.Saxe@Sun.COM int r;
3398906SEric.Saxe@Sun.COM int err;
34011263SEric.Saxe@Sun.COM int nchildren;
3418906SEric.Saxe@Sun.COM
3428906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock));
3438906SEric.Saxe@Sun.COM
3448906SEric.Saxe@Sun.COM parent = pg->cmt_parent;
3458906SEric.Saxe@Sun.COM if (parent == NULL) {
3468906SEric.Saxe@Sun.COM /*
3478906SEric.Saxe@Sun.COM * Nothing to do
3488906SEric.Saxe@Sun.COM */
3498906SEric.Saxe@Sun.COM return;
3508906SEric.Saxe@Sun.COM }
3518906SEric.Saxe@Sun.COM
3528906SEric.Saxe@Sun.COM ASSERT(PG_NUM_CPUS((pg_t *)pg) >= PG_NUM_CPUS((pg_t *)parent));
3538906SEric.Saxe@Sun.COM
3548906SEric.Saxe@Sun.COM /*
3558906SEric.Saxe@Sun.COM * We're changing around the hierarchy, which is actively traversed
3568906SEric.Saxe@Sun.COM * by the dispatcher. Pause CPUS to ensure exclusivity.
3578906SEric.Saxe@Sun.COM */
3588906SEric.Saxe@Sun.COM pause_cpus(NULL);
3598906SEric.Saxe@Sun.COM
3608906SEric.Saxe@Sun.COM /*
3618906SEric.Saxe@Sun.COM * If necessary, update the parent's sibling set, replacing parent
3628906SEric.Saxe@Sun.COM * with PG.
3638906SEric.Saxe@Sun.COM */
3648906SEric.Saxe@Sun.COM if (parent->cmt_siblings) {
3658906SEric.Saxe@Sun.COM if (group_remove(parent->cmt_siblings, parent, GRP_NORESIZE)
3668906SEric.Saxe@Sun.COM != -1) {
3678906SEric.Saxe@Sun.COM r = group_add(parent->cmt_siblings, pg, GRP_NORESIZE);
3688906SEric.Saxe@Sun.COM ASSERT(r != -1);
3698906SEric.Saxe@Sun.COM }
3708906SEric.Saxe@Sun.COM }
3718906SEric.Saxe@Sun.COM
3728906SEric.Saxe@Sun.COM /*
3738906SEric.Saxe@Sun.COM * If the parent is at the top of the hierarchy, replace it's entry
3748906SEric.Saxe@Sun.COM * in the root lgroup's group of top level PGs.
3758906SEric.Saxe@Sun.COM */
3768906SEric.Saxe@Sun.COM if (parent->cmt_parent == NULL &&
3778906SEric.Saxe@Sun.COM parent->cmt_siblings != &cmt_root->cl_pgs) {
3788906SEric.Saxe@Sun.COM if (group_remove(&cmt_root->cl_pgs, parent, GRP_NORESIZE)
3798906SEric.Saxe@Sun.COM != -1) {
3808906SEric.Saxe@Sun.COM r = group_add(&cmt_root->cl_pgs, pg, GRP_NORESIZE);
3818906SEric.Saxe@Sun.COM ASSERT(r != -1);
3828906SEric.Saxe@Sun.COM }
3838906SEric.Saxe@Sun.COM }
3848906SEric.Saxe@Sun.COM
3858906SEric.Saxe@Sun.COM /*
3868906SEric.Saxe@Sun.COM * We assume (and therefore assert) that the PG being promoted is an
3878906SEric.Saxe@Sun.COM * only child of it's parent. Update the parent's children set
3888906SEric.Saxe@Sun.COM * replacing PG's entry with the parent (since the parent is becoming
38911263SEric.Saxe@Sun.COM * the child). Then have PG and the parent swap children sets and
39011263SEric.Saxe@Sun.COM * children counts.
3918906SEric.Saxe@Sun.COM */
3928906SEric.Saxe@Sun.COM ASSERT(GROUP_SIZE(parent->cmt_children) <= 1);
3938906SEric.Saxe@Sun.COM if (group_remove(parent->cmt_children, pg, GRP_NORESIZE) != -1) {
3948906SEric.Saxe@Sun.COM r = group_add(parent->cmt_children, parent, GRP_NORESIZE);
3958906SEric.Saxe@Sun.COM ASSERT(r != -1);
3968906SEric.Saxe@Sun.COM }
3978906SEric.Saxe@Sun.COM
3988906SEric.Saxe@Sun.COM children = pg->cmt_children;
3998906SEric.Saxe@Sun.COM pg->cmt_children = parent->cmt_children;
4008906SEric.Saxe@Sun.COM parent->cmt_children = children;
4018906SEric.Saxe@Sun.COM
40211263SEric.Saxe@Sun.COM nchildren = pg->cmt_nchildren;
40311263SEric.Saxe@Sun.COM pg->cmt_nchildren = parent->cmt_nchildren;
40411263SEric.Saxe@Sun.COM parent->cmt_nchildren = nchildren;
40511263SEric.Saxe@Sun.COM
4068906SEric.Saxe@Sun.COM /*
4078906SEric.Saxe@Sun.COM * Update the sibling references for PG and it's parent
4088906SEric.Saxe@Sun.COM */
4098906SEric.Saxe@Sun.COM pg->cmt_siblings = parent->cmt_siblings;
4108906SEric.Saxe@Sun.COM parent->cmt_siblings = pg->cmt_children;
4118906SEric.Saxe@Sun.COM
4128906SEric.Saxe@Sun.COM /*
4138906SEric.Saxe@Sun.COM * Update any cached lineages in the per CPU pg data.
4148906SEric.Saxe@Sun.COM */
4158906SEric.Saxe@Sun.COM PG_CPU_ITR_INIT(pg, cpu_iter);
4168906SEric.Saxe@Sun.COM while ((cpu = pg_cpu_next(&cpu_iter)) != NULL) {
4178906SEric.Saxe@Sun.COM int idx;
41811263SEric.Saxe@Sun.COM int sz;
4198906SEric.Saxe@Sun.COM pg_cmt_t *cpu_pg;
4209438SEric.Saxe@Sun.COM cpu_pg_t *pgd; /* CPU's PG data */
4219438SEric.Saxe@Sun.COM
4229438SEric.Saxe@Sun.COM /*
4239438SEric.Saxe@Sun.COM * The CPU's whose lineage is under construction still
4249438SEric.Saxe@Sun.COM * references the bootstrap CPU PG data structure.
4259438SEric.Saxe@Sun.COM */
4269438SEric.Saxe@Sun.COM if (pg_cpu_is_bootstrapped(cpu))
4279438SEric.Saxe@Sun.COM pgd = pgdata;
4289438SEric.Saxe@Sun.COM else
4299438SEric.Saxe@Sun.COM pgd = cpu->cpu_pg;
4308906SEric.Saxe@Sun.COM
4318906SEric.Saxe@Sun.COM /*
4328906SEric.Saxe@Sun.COM * Iterate over the CPU's PGs updating the children
4338906SEric.Saxe@Sun.COM * of the PG being promoted, since they have a new parent.
4348906SEric.Saxe@Sun.COM */
4358906SEric.Saxe@Sun.COM group_iter_init(&iter);
4369438SEric.Saxe@Sun.COM while ((cpu_pg = group_iterate(&pgd->cmt_pgs, &iter)) != NULL) {
4378906SEric.Saxe@Sun.COM if (cpu_pg->cmt_parent == pg) {
4388906SEric.Saxe@Sun.COM cpu_pg->cmt_parent = parent;
4398906SEric.Saxe@Sun.COM }
4408906SEric.Saxe@Sun.COM }
4418906SEric.Saxe@Sun.COM
4428906SEric.Saxe@Sun.COM /*
4438906SEric.Saxe@Sun.COM * Update the CMT load balancing lineage
4448906SEric.Saxe@Sun.COM */
4459438SEric.Saxe@Sun.COM if ((idx = group_find(&pgd->cmt_pgs, (void *)pg)) == -1) {
4468906SEric.Saxe@Sun.COM /*
4478906SEric.Saxe@Sun.COM * Unless this is the CPU who's lineage is being
4488906SEric.Saxe@Sun.COM * constructed, the PG being promoted should be
4498906SEric.Saxe@Sun.COM * in the lineage.
4508906SEric.Saxe@Sun.COM */
4519438SEric.Saxe@Sun.COM ASSERT(pg_cpu_is_bootstrapped(cpu));
4528906SEric.Saxe@Sun.COM continue;
4538906SEric.Saxe@Sun.COM }
4548906SEric.Saxe@Sun.COM
45511263SEric.Saxe@Sun.COM ASSERT(idx > 0);
4569438SEric.Saxe@Sun.COM ASSERT(GROUP_ACCESS(&pgd->cmt_pgs, idx - 1) == parent);
4578906SEric.Saxe@Sun.COM
4588906SEric.Saxe@Sun.COM /*
4598906SEric.Saxe@Sun.COM * Have the child and the parent swap places in the CPU's
4608906SEric.Saxe@Sun.COM * lineage
4618906SEric.Saxe@Sun.COM */
4629438SEric.Saxe@Sun.COM group_remove_at(&pgd->cmt_pgs, idx);
4639438SEric.Saxe@Sun.COM group_remove_at(&pgd->cmt_pgs, idx - 1);
4649438SEric.Saxe@Sun.COM err = group_add_at(&pgd->cmt_pgs, parent, idx);
4658906SEric.Saxe@Sun.COM ASSERT(err == 0);
4669438SEric.Saxe@Sun.COM err = group_add_at(&pgd->cmt_pgs, pg, idx - 1);
4678906SEric.Saxe@Sun.COM ASSERT(err == 0);
46811263SEric.Saxe@Sun.COM
46911263SEric.Saxe@Sun.COM /*
47011263SEric.Saxe@Sun.COM * Ensure cmt_lineage references CPU's leaf PG.
47111263SEric.Saxe@Sun.COM * Since cmt_pgs is top-down ordered, the bottom is the last
47211263SEric.Saxe@Sun.COM * element.
47311263SEric.Saxe@Sun.COM */
47411263SEric.Saxe@Sun.COM if ((sz = GROUP_SIZE(&pgd->cmt_pgs)) > 0)
47511263SEric.Saxe@Sun.COM pgd->cmt_lineage = GROUP_ACCESS(&pgd->cmt_pgs, sz - 1);
4768906SEric.Saxe@Sun.COM }
4778906SEric.Saxe@Sun.COM
4788906SEric.Saxe@Sun.COM /*
4798906SEric.Saxe@Sun.COM * Update the parent references for PG and it's parent
4808906SEric.Saxe@Sun.COM */
4818906SEric.Saxe@Sun.COM pg->cmt_parent = parent->cmt_parent;
4828906SEric.Saxe@Sun.COM parent->cmt_parent = pg;
4838906SEric.Saxe@Sun.COM
4848906SEric.Saxe@Sun.COM start_cpus();
4853434Sesaxe }
4863434Sesaxe
4873434Sesaxe /*
4883434Sesaxe * CMT class callback for a new CPU entering the system
4899438SEric.Saxe@Sun.COM *
4909438SEric.Saxe@Sun.COM * This routine operates on the CPU specific processor group data (for the CPU
4919438SEric.Saxe@Sun.COM * being initialized). The argument "pgdata" is a reference to the CPU's PG
4929438SEric.Saxe@Sun.COM * data to be constructed.
4939438SEric.Saxe@Sun.COM *
4949438SEric.Saxe@Sun.COM * cp->cpu_pg is used by the dispatcher to access the CPU's PG data
4959438SEric.Saxe@Sun.COM * references a "bootstrap" structure. pg_cmt_cpu_init() and the routines it
4969438SEric.Saxe@Sun.COM * calls must be careful to operate only on the "pgdata" argument, and not
4979438SEric.Saxe@Sun.COM * cp->cpu_pg.
4983434Sesaxe */
4993434Sesaxe static void
pg_cmt_cpu_init(cpu_t * cp,cpu_pg_t * pgdata)5009438SEric.Saxe@Sun.COM pg_cmt_cpu_init(cpu_t *cp, cpu_pg_t *pgdata)
5013434Sesaxe {
5023434Sesaxe pg_cmt_t *pg;
5033434Sesaxe group_t *cmt_pgs;
5048906SEric.Saxe@Sun.COM int levels, level;
5053434Sesaxe pghw_type_t hw;
5063434Sesaxe pg_t *pg_cache = NULL;
5073434Sesaxe pg_cmt_t *cpu_cmt_hier[PGHW_NUM_COMPONENTS];
5083434Sesaxe lgrp_handle_t lgrp_handle;
5093434Sesaxe cmt_lgrp_t *lgrp;
5109036SEric.Saxe@Sun.COM cmt_lineage_validation_t lineage_status;
5113434Sesaxe
5123434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock));
5139438SEric.Saxe@Sun.COM ASSERT(pg_cpu_is_bootstrapped(cp));
5143434Sesaxe
5158906SEric.Saxe@Sun.COM if (cmt_sched_disabled)
5168906SEric.Saxe@Sun.COM return;
5178906SEric.Saxe@Sun.COM
5183434Sesaxe /*
5193434Sesaxe * A new CPU is coming into the system.
5203434Sesaxe * Interrogate the platform to see if the CPU
5218906SEric.Saxe@Sun.COM * has any performance or efficiency relevant
5228906SEric.Saxe@Sun.COM * sharing relationships
5233434Sesaxe */
5249438SEric.Saxe@Sun.COM cmt_pgs = &pgdata->cmt_pgs;
5259438SEric.Saxe@Sun.COM pgdata->cmt_lineage = NULL;
5263434Sesaxe
5273434Sesaxe bzero(cpu_cmt_hier, sizeof (cpu_cmt_hier));
5288906SEric.Saxe@Sun.COM levels = 0;
5293434Sesaxe for (hw = PGHW_START; hw < PGHW_NUM_COMPONENTS; hw++) {
5303434Sesaxe
5318906SEric.Saxe@Sun.COM pg_cmt_policy_t policy;
5328906SEric.Saxe@Sun.COM
5333434Sesaxe /*
5348906SEric.Saxe@Sun.COM * We're only interested in the hw sharing relationships
5358906SEric.Saxe@Sun.COM * for which we know how to optimize.
5363434Sesaxe */
5378906SEric.Saxe@Sun.COM policy = pg_cmt_policy(hw);
5388906SEric.Saxe@Sun.COM if (policy == CMT_NO_POLICY ||
5398906SEric.Saxe@Sun.COM pg_plat_hw_shared(cp, hw) == 0)
5403434Sesaxe continue;
5413434Sesaxe
5423434Sesaxe /*
5439746SEric.Saxe@Sun.COM * We will still create the PGs for hardware sharing
5449746SEric.Saxe@Sun.COM * relationships that have been blacklisted, but won't
5459746SEric.Saxe@Sun.COM * implement CMT thread placement optimizations against them.
5468906SEric.Saxe@Sun.COM */
5479746SEric.Saxe@Sun.COM if (cmt_hw_blacklisted[hw] == 1)
5489746SEric.Saxe@Sun.COM policy = CMT_NO_POLICY;
5498906SEric.Saxe@Sun.COM
5508906SEric.Saxe@Sun.COM /*
5513434Sesaxe * Find (or create) the PG associated with
5523434Sesaxe * the hw sharing relationship in which cp
5533434Sesaxe * belongs.
5543434Sesaxe *
5553434Sesaxe * Determine if a suitable PG already
5563434Sesaxe * exists, or if one needs to be created.
5573434Sesaxe */
5583434Sesaxe pg = (pg_cmt_t *)pghw_place_cpu(cp, hw);
5593434Sesaxe if (pg == NULL) {
5603434Sesaxe /*
5613434Sesaxe * Create a new one.
5623434Sesaxe * Initialize the common...
5633434Sesaxe */
5643434Sesaxe pg = (pg_cmt_t *)pg_create(pg_cmt_class_id);
5653434Sesaxe
5663434Sesaxe /* ... physical ... */
5673434Sesaxe pghw_init((pghw_t *)pg, cp, hw);
5683434Sesaxe
5693434Sesaxe /*
5703434Sesaxe * ... and CMT specific portions of the
5713434Sesaxe * structure.
5723434Sesaxe */
5738906SEric.Saxe@Sun.COM pg->cmt_policy = policy;
5748906SEric.Saxe@Sun.COM
5758906SEric.Saxe@Sun.COM /* CMT event callbacks */
5768906SEric.Saxe@Sun.COM cmt_callback_init((pg_t *)pg);
5778906SEric.Saxe@Sun.COM
5783434Sesaxe bitset_init(&pg->cmt_cpus_actv_set);
5793434Sesaxe group_create(&pg->cmt_cpus_actv);
5803434Sesaxe } else {
5813434Sesaxe ASSERT(IS_CMT_PG(pg));
5823434Sesaxe }
5833434Sesaxe
58411389SAlexander.Kolbasov@Sun.COM ((pghw_t *)pg)->pghw_generation++;
58511389SAlexander.Kolbasov@Sun.COM
5863434Sesaxe /* Add the CPU to the PG */
5879438SEric.Saxe@Sun.COM pg_cpu_add((pg_t *)pg, cp, pgdata);
5883434Sesaxe
5893434Sesaxe /*
5908408SEric.Saxe@Sun.COM * Ensure capacity of the active CPU group/bitset
5913434Sesaxe */
5923434Sesaxe group_expand(&pg->cmt_cpus_actv,
5933434Sesaxe GROUP_SIZE(&((pg_t *)pg)->pg_cpus));
5943434Sesaxe
5953434Sesaxe if (cp->cpu_seqid >=
5963434Sesaxe bitset_capacity(&pg->cmt_cpus_actv_set)) {
5973434Sesaxe bitset_resize(&pg->cmt_cpus_actv_set,
5983434Sesaxe cp->cpu_seqid + 1);
5993434Sesaxe }
6003434Sesaxe
6013434Sesaxe /*
6028906SEric.Saxe@Sun.COM * Build a lineage of CMT PGs for load balancing / coalescence
6033434Sesaxe */
6048906SEric.Saxe@Sun.COM if (policy & (CMT_BALANCE | CMT_COALESCE)) {
6058906SEric.Saxe@Sun.COM cpu_cmt_hier[levels++] = pg;
6063434Sesaxe }
6073434Sesaxe
6083434Sesaxe /* Cache this for later */
6093434Sesaxe if (hw == PGHW_CACHE)
6103434Sesaxe pg_cache = (pg_t *)pg;
6113434Sesaxe }
6123434Sesaxe
6138906SEric.Saxe@Sun.COM group_expand(cmt_pgs, levels);
6148408SEric.Saxe@Sun.COM
6158408SEric.Saxe@Sun.COM if (cmt_root == NULL)
6168408SEric.Saxe@Sun.COM cmt_root = pg_cmt_lgrp_create(lgrp_plat_root_hand());
6173434Sesaxe
6183434Sesaxe /*
6198906SEric.Saxe@Sun.COM * Find the lgrp that encapsulates this CPU's CMT hierarchy
6208408SEric.Saxe@Sun.COM */
6218408SEric.Saxe@Sun.COM lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id);
6228408SEric.Saxe@Sun.COM if ((lgrp = pg_cmt_find_lgrp(lgrp_handle)) == NULL)
6238408SEric.Saxe@Sun.COM lgrp = pg_cmt_lgrp_create(lgrp_handle);
6248408SEric.Saxe@Sun.COM
6258408SEric.Saxe@Sun.COM /*
6268906SEric.Saxe@Sun.COM * Ascendingly sort the PGs in the lineage by number of CPUs
6278906SEric.Saxe@Sun.COM */
6288906SEric.Saxe@Sun.COM pg_cmt_hier_sort(cpu_cmt_hier, levels);
6298906SEric.Saxe@Sun.COM
6308906SEric.Saxe@Sun.COM /*
6318906SEric.Saxe@Sun.COM * Examine the lineage and validate it.
6328906SEric.Saxe@Sun.COM * This routine will also try to fix the lineage along with the
6338906SEric.Saxe@Sun.COM * rest of the PG hierarchy should it detect an issue.
6348906SEric.Saxe@Sun.COM *
6359036SEric.Saxe@Sun.COM * If it returns anything other than VALID or REPAIRED, an
6369036SEric.Saxe@Sun.COM * unrecoverable error has occurred, and we cannot proceed.
6378906SEric.Saxe@Sun.COM */
6389438SEric.Saxe@Sun.COM lineage_status = pg_cmt_lineage_validate(cpu_cmt_hier, &levels, pgdata);
6399036SEric.Saxe@Sun.COM if ((lineage_status != CMT_LINEAGE_VALID) &&
6409438SEric.Saxe@Sun.COM (lineage_status != CMT_LINEAGE_REPAIRED)) {
6419438SEric.Saxe@Sun.COM /*
6429438SEric.Saxe@Sun.COM * In the case of an unrecoverable error where CMT scheduling
6439438SEric.Saxe@Sun.COM * has been disabled, assert that the under construction CPU's
6449438SEric.Saxe@Sun.COM * PG data has an empty CMT load balancing lineage.
6459438SEric.Saxe@Sun.COM */
6469438SEric.Saxe@Sun.COM ASSERT((cmt_sched_disabled == 0) ||
6479438SEric.Saxe@Sun.COM (GROUP_SIZE(&(pgdata->cmt_pgs)) == 0));
6488906SEric.Saxe@Sun.COM return;
6499438SEric.Saxe@Sun.COM }
6508906SEric.Saxe@Sun.COM
6518906SEric.Saxe@Sun.COM /*
6528906SEric.Saxe@Sun.COM * For existing PGs in the lineage, verify that the parent is
6538906SEric.Saxe@Sun.COM * correct, as the generation in the lineage may have changed
6548906SEric.Saxe@Sun.COM * as a result of the sorting. Start the traversal at the top
6558906SEric.Saxe@Sun.COM * of the lineage, moving down.
6568906SEric.Saxe@Sun.COM */
6578906SEric.Saxe@Sun.COM for (level = levels - 1; level >= 0; ) {
6588906SEric.Saxe@Sun.COM int reorg;
6598906SEric.Saxe@Sun.COM
6608906SEric.Saxe@Sun.COM reorg = 0;
6618906SEric.Saxe@Sun.COM pg = cpu_cmt_hier[level];
6628906SEric.Saxe@Sun.COM
6638906SEric.Saxe@Sun.COM /*
6648906SEric.Saxe@Sun.COM * Promote PGs at an incorrect generation into place.
6658906SEric.Saxe@Sun.COM */
6668906SEric.Saxe@Sun.COM while (pg->cmt_parent &&
6678906SEric.Saxe@Sun.COM pg->cmt_parent != cpu_cmt_hier[level + 1]) {
6689438SEric.Saxe@Sun.COM cmt_hier_promote(pg, pgdata);
6698906SEric.Saxe@Sun.COM reorg++;
6708906SEric.Saxe@Sun.COM }
6718906SEric.Saxe@Sun.COM if (reorg > 0)
6728906SEric.Saxe@Sun.COM level = levels - 1;
6738906SEric.Saxe@Sun.COM else
6748906SEric.Saxe@Sun.COM level--;
6758906SEric.Saxe@Sun.COM }
6768906SEric.Saxe@Sun.COM
6778906SEric.Saxe@Sun.COM /*
6788408SEric.Saxe@Sun.COM * For each of the PGs in the CPU's lineage:
6798906SEric.Saxe@Sun.COM * - Add an entry in the CPU sorted CMT PG group
6808906SEric.Saxe@Sun.COM * which is used for top down CMT load balancing
6813434Sesaxe * - Tie the PG into the CMT hierarchy by connecting
6823434Sesaxe * it to it's parent and siblings.
6833434Sesaxe */
6848906SEric.Saxe@Sun.COM for (level = 0; level < levels; level++) {
6853434Sesaxe uint_t children;
6863434Sesaxe int err;
6873434Sesaxe
6883434Sesaxe pg = cpu_cmt_hier[level];
6898906SEric.Saxe@Sun.COM err = group_add_at(cmt_pgs, pg, levels - level - 1);
6903434Sesaxe ASSERT(err == 0);
6913434Sesaxe
6923434Sesaxe if (level == 0)
6939438SEric.Saxe@Sun.COM pgdata->cmt_lineage = (pg_t *)pg;
6943434Sesaxe
6953434Sesaxe if (pg->cmt_siblings != NULL) {
6963434Sesaxe /* Already initialized */
6973434Sesaxe ASSERT(pg->cmt_parent == NULL ||
6983434Sesaxe pg->cmt_parent == cpu_cmt_hier[level + 1]);
6993434Sesaxe ASSERT(pg->cmt_siblings == &lgrp->cl_pgs ||
7005933Sjb145095 ((pg->cmt_parent != NULL) &&
7015933Sjb145095 pg->cmt_siblings == pg->cmt_parent->cmt_children));
7023434Sesaxe continue;
7033434Sesaxe }
7043434Sesaxe
7058906SEric.Saxe@Sun.COM if ((level + 1) == levels) {
7063434Sesaxe pg->cmt_parent = NULL;
7078408SEric.Saxe@Sun.COM
7083434Sesaxe pg->cmt_siblings = &lgrp->cl_pgs;
7093434Sesaxe children = ++lgrp->cl_npgs;
7108906SEric.Saxe@Sun.COM if (cmt_root != lgrp)
7118906SEric.Saxe@Sun.COM cmt_root->cl_npgs++;
7123434Sesaxe } else {
7133434Sesaxe pg->cmt_parent = cpu_cmt_hier[level + 1];
7143434Sesaxe
7153434Sesaxe /*
7163434Sesaxe * A good parent keeps track of their children.
7173434Sesaxe * The parent's children group is also the PG's
7183434Sesaxe * siblings.
7193434Sesaxe */
7203434Sesaxe if (pg->cmt_parent->cmt_children == NULL) {
7213434Sesaxe pg->cmt_parent->cmt_children =
7223434Sesaxe kmem_zalloc(sizeof (group_t), KM_SLEEP);
7233434Sesaxe group_create(pg->cmt_parent->cmt_children);
7243434Sesaxe }
7253434Sesaxe pg->cmt_siblings = pg->cmt_parent->cmt_children;
7263434Sesaxe children = ++pg->cmt_parent->cmt_nchildren;
7273434Sesaxe }
7288408SEric.Saxe@Sun.COM
7293434Sesaxe group_expand(pg->cmt_siblings, children);
7308408SEric.Saxe@Sun.COM group_expand(&cmt_root->cl_pgs, cmt_root->cl_npgs);
7313434Sesaxe }
7323434Sesaxe
7333434Sesaxe /*
7343434Sesaxe * Cache the chip and core IDs in the cpu_t->cpu_physid structure
7353434Sesaxe * for fast lookups later.
7363434Sesaxe */
7373434Sesaxe if (cp->cpu_physid) {
7383434Sesaxe cp->cpu_physid->cpu_chipid =
7393434Sesaxe pg_plat_hw_instance_id(cp, PGHW_CHIP);
7403434Sesaxe cp->cpu_physid->cpu_coreid = pg_plat_get_core_id(cp);
7413434Sesaxe
7423434Sesaxe /*
7433434Sesaxe * If this cpu has a PG representing shared cache, then set
7443434Sesaxe * cpu_cacheid to that PG's logical id
7453434Sesaxe */
7463434Sesaxe if (pg_cache)
7473434Sesaxe cp->cpu_physid->cpu_cacheid = pg_cache->pg_id;
7483434Sesaxe }
7493434Sesaxe
7503434Sesaxe /* CPU0 only initialization */
7513434Sesaxe if (is_cpu0) {
7523434Sesaxe is_cpu0 = 0;
7533676Sesaxe cpu0_lgrp = lgrp;
7543434Sesaxe }
7553434Sesaxe
7563434Sesaxe }
7573434Sesaxe
7583434Sesaxe /*
7593434Sesaxe * Class callback when a CPU is leaving the system (deletion)
7609438SEric.Saxe@Sun.COM *
7619438SEric.Saxe@Sun.COM * "pgdata" is a reference to the CPU's PG data to be deconstructed.
7629438SEric.Saxe@Sun.COM *
7639438SEric.Saxe@Sun.COM * cp->cpu_pg is used by the dispatcher to access the CPU's PG data
7649438SEric.Saxe@Sun.COM * references a "bootstrap" structure across this function's invocation.
76511389SAlexander.Kolbasov@Sun.COM * pg_cmt_cpu_fini() and the routines it calls must be careful to operate only
7669438SEric.Saxe@Sun.COM * on the "pgdata" argument, and not cp->cpu_pg.
7673434Sesaxe */
7683434Sesaxe static void
pg_cmt_cpu_fini(cpu_t * cp,cpu_pg_t * pgdata)7699438SEric.Saxe@Sun.COM pg_cmt_cpu_fini(cpu_t *cp, cpu_pg_t *pgdata)
7703434Sesaxe {
7713434Sesaxe group_iter_t i;
7723434Sesaxe pg_cmt_t *pg;
7733434Sesaxe group_t *pgs, *cmt_pgs;
7743434Sesaxe lgrp_handle_t lgrp_handle;
7753434Sesaxe cmt_lgrp_t *lgrp;
7763434Sesaxe
7778906SEric.Saxe@Sun.COM if (cmt_sched_disabled)
7788906SEric.Saxe@Sun.COM return;
7798906SEric.Saxe@Sun.COM
7809438SEric.Saxe@Sun.COM ASSERT(pg_cpu_is_bootstrapped(cp));
7819438SEric.Saxe@Sun.COM
7829438SEric.Saxe@Sun.COM pgs = &pgdata->pgs;
7839438SEric.Saxe@Sun.COM cmt_pgs = &pgdata->cmt_pgs;
7843434Sesaxe
7853434Sesaxe /*
7863434Sesaxe * Find the lgroup that encapsulates this CPU's CMT hierarchy
7873434Sesaxe */
7883434Sesaxe lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id);
7893676Sesaxe
7903434Sesaxe lgrp = pg_cmt_find_lgrp(lgrp_handle);
7918689SEric.Saxe@Sun.COM if (ncpus == 1 && lgrp != cpu0_lgrp) {
7923676Sesaxe /*
7938689SEric.Saxe@Sun.COM * One might wonder how we could be deconfiguring the
7948689SEric.Saxe@Sun.COM * only CPU in the system.
7953676Sesaxe *
7968689SEric.Saxe@Sun.COM * On Starcat systems when null_proc_lpa is detected,
7978689SEric.Saxe@Sun.COM * the boot CPU (which is already configured into a leaf
7988689SEric.Saxe@Sun.COM * lgroup), is moved into the root lgroup. This is done by
7998689SEric.Saxe@Sun.COM * deconfiguring it from both lgroups and processor
8008689SEric.Saxe@Sun.COM * groups), and then later reconfiguring it back in. This
8018689SEric.Saxe@Sun.COM * call to pg_cmt_cpu_fini() is part of that deconfiguration.
8028689SEric.Saxe@Sun.COM *
8038689SEric.Saxe@Sun.COM * This special case is detected by noting that the platform
8048689SEric.Saxe@Sun.COM * has changed the CPU's lgrp affiliation (since it now
8058689SEric.Saxe@Sun.COM * belongs in the root). In this case, use the cmt_lgrp_t
8068689SEric.Saxe@Sun.COM * cached for the boot CPU, since this is what needs to be
8078689SEric.Saxe@Sun.COM * torn down.
8083676Sesaxe */
8093676Sesaxe lgrp = cpu0_lgrp;
8103676Sesaxe }
8113434Sesaxe
8128689SEric.Saxe@Sun.COM ASSERT(lgrp != NULL);
8138689SEric.Saxe@Sun.COM
8143434Sesaxe /*
8153434Sesaxe * First, clean up anything load balancing specific for each of
8163434Sesaxe * the CPU's PGs that participated in CMT load balancing
8173434Sesaxe */
8189438SEric.Saxe@Sun.COM pg = (pg_cmt_t *)pgdata->cmt_lineage;
8193434Sesaxe while (pg != NULL) {
8203434Sesaxe
82111389SAlexander.Kolbasov@Sun.COM ((pghw_t *)pg)->pghw_generation++;
82211389SAlexander.Kolbasov@Sun.COM
8233434Sesaxe /*
8243434Sesaxe * Remove the PG from the CPU's load balancing lineage
8253434Sesaxe */
8263434Sesaxe (void) group_remove(cmt_pgs, pg, GRP_RESIZE);
8273434Sesaxe
8283434Sesaxe /*
8293434Sesaxe * If it's about to become empty, destroy it's children
8303434Sesaxe * group, and remove it's reference from it's siblings.
8313434Sesaxe * This is done here (rather than below) to avoid removing
8323434Sesaxe * our reference from a PG that we just eliminated.
8333434Sesaxe */
8343434Sesaxe if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 1) {
8353434Sesaxe if (pg->cmt_children != NULL)
8363434Sesaxe group_destroy(pg->cmt_children);
8373434Sesaxe if (pg->cmt_siblings != NULL) {
8383434Sesaxe if (pg->cmt_siblings == &lgrp->cl_pgs)
8393434Sesaxe lgrp->cl_npgs--;
8403434Sesaxe else
8413434Sesaxe pg->cmt_parent->cmt_nchildren--;
8423434Sesaxe }
8433434Sesaxe }
8443434Sesaxe pg = pg->cmt_parent;
8453434Sesaxe }
8463434Sesaxe ASSERT(GROUP_SIZE(cmt_pgs) == 0);
8473434Sesaxe
8483434Sesaxe /*
8493434Sesaxe * Now that the load balancing lineage updates have happened,
8503434Sesaxe * remove the CPU from all it's PGs (destroying any that become
8513434Sesaxe * empty).
8523434Sesaxe */
8533434Sesaxe group_iter_init(&i);
8543434Sesaxe while ((pg = group_iterate(pgs, &i)) != NULL) {
8553434Sesaxe if (IS_CMT_PG(pg) == 0)
8563434Sesaxe continue;
8573434Sesaxe
8589438SEric.Saxe@Sun.COM pg_cpu_delete((pg_t *)pg, cp, pgdata);
8593434Sesaxe /*
8603434Sesaxe * Deleting the CPU from the PG changes the CPU's
8613434Sesaxe * PG group over which we are actively iterating
8623434Sesaxe * Re-initialize the iteration
8633434Sesaxe */
8643434Sesaxe group_iter_init(&i);
8653434Sesaxe
8663434Sesaxe if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 0) {
8673434Sesaxe
8683434Sesaxe /*
8693434Sesaxe * The PG has become zero sized, so destroy it.
8703434Sesaxe */
8713434Sesaxe group_destroy(&pg->cmt_cpus_actv);
8723434Sesaxe bitset_fini(&pg->cmt_cpus_actv_set);
8733434Sesaxe pghw_fini((pghw_t *)pg);
8743434Sesaxe
8753434Sesaxe pg_destroy((pg_t *)pg);
8763434Sesaxe }
8773434Sesaxe }
8783434Sesaxe }
8793434Sesaxe
8803434Sesaxe /*
8813434Sesaxe * Class callback when a CPU is entering a cpu partition
8823434Sesaxe */
8833434Sesaxe static void
pg_cmt_cpupart_in(cpu_t * cp,cpupart_t * pp)8843434Sesaxe pg_cmt_cpupart_in(cpu_t *cp, cpupart_t *pp)
8853434Sesaxe {
8863434Sesaxe group_t *pgs;
8873434Sesaxe pg_t *pg;
8883434Sesaxe group_iter_t i;
8893434Sesaxe
8903434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock));
8913434Sesaxe
8928906SEric.Saxe@Sun.COM if (cmt_sched_disabled)
8938906SEric.Saxe@Sun.COM return;
8948906SEric.Saxe@Sun.COM
8953434Sesaxe pgs = &cp->cpu_pg->pgs;
8963434Sesaxe
8973434Sesaxe /*
8983434Sesaxe * Ensure that the new partition's PG bitset
8993434Sesaxe * is large enough for all CMT PG's to which cp
9003434Sesaxe * belongs
9013434Sesaxe */
9023434Sesaxe group_iter_init(&i);
9033434Sesaxe while ((pg = group_iterate(pgs, &i)) != NULL) {
9043434Sesaxe if (IS_CMT_PG(pg) == 0)
9053434Sesaxe continue;
9063434Sesaxe
9073434Sesaxe if (bitset_capacity(&pp->cp_cmt_pgs) <= pg->pg_id)
9083434Sesaxe bitset_resize(&pp->cp_cmt_pgs, pg->pg_id + 1);
9093434Sesaxe }
9103434Sesaxe }
9113434Sesaxe
9123434Sesaxe /*
9133434Sesaxe * Class callback when a CPU is actually moving partitions
9143434Sesaxe */
9153434Sesaxe static void
pg_cmt_cpupart_move(cpu_t * cp,cpupart_t * oldpp,cpupart_t * newpp)9163434Sesaxe pg_cmt_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp)
9173434Sesaxe {
9183434Sesaxe cpu_t *cpp;
9193434Sesaxe group_t *pgs;
9203434Sesaxe pg_t *pg;
9213434Sesaxe group_iter_t pg_iter;
9223434Sesaxe pg_cpu_itr_t cpu_iter;
9233434Sesaxe boolean_t found;
9243434Sesaxe
9253434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock));
9263434Sesaxe
9278906SEric.Saxe@Sun.COM if (cmt_sched_disabled)
9288906SEric.Saxe@Sun.COM return;
9298906SEric.Saxe@Sun.COM
9303434Sesaxe pgs = &cp->cpu_pg->pgs;
9313434Sesaxe group_iter_init(&pg_iter);
9323434Sesaxe
9333434Sesaxe /*
9343434Sesaxe * Iterate over the CPUs CMT PGs
9353434Sesaxe */
9363434Sesaxe while ((pg = group_iterate(pgs, &pg_iter)) != NULL) {
9373434Sesaxe
9383434Sesaxe if (IS_CMT_PG(pg) == 0)
9393434Sesaxe continue;
9403434Sesaxe
9413434Sesaxe /*
9423434Sesaxe * Add the PG to the bitset in the new partition.
9433434Sesaxe */
9443434Sesaxe bitset_add(&newpp->cp_cmt_pgs, pg->pg_id);
9453434Sesaxe
9463434Sesaxe /*
9473434Sesaxe * Remove the PG from the bitset in the old partition
9483434Sesaxe * if the last of the PG's CPUs have left.
9493434Sesaxe */
9503434Sesaxe found = B_FALSE;
9513434Sesaxe PG_CPU_ITR_INIT(pg, cpu_iter);
9523434Sesaxe while ((cpp = pg_cpu_next(&cpu_iter)) != NULL) {
9533434Sesaxe if (cpp == cp)
9543434Sesaxe continue;
9553676Sesaxe if (CPU_ACTIVE(cpp) &&
9563676Sesaxe cpp->cpu_part->cp_id == oldpp->cp_id) {
9573434Sesaxe found = B_TRUE;
9583434Sesaxe break;
9593434Sesaxe }
9603434Sesaxe }
9613434Sesaxe if (!found)
9623434Sesaxe bitset_del(&cp->cpu_part->cp_cmt_pgs, pg->pg_id);
9633434Sesaxe }
9643434Sesaxe }
9653434Sesaxe
9663434Sesaxe /*
9673434Sesaxe * Class callback when a CPU becomes active (online)
9683434Sesaxe *
9693434Sesaxe * This is called in a context where CPUs are paused
9703434Sesaxe */
9713434Sesaxe static void
pg_cmt_cpu_active(cpu_t * cp)9723434Sesaxe pg_cmt_cpu_active(cpu_t *cp)
9733434Sesaxe {
9743434Sesaxe int err;
9753434Sesaxe group_iter_t i;
9763434Sesaxe pg_cmt_t *pg;
9773434Sesaxe group_t *pgs;
9783434Sesaxe
9793434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock));
9803434Sesaxe
9818906SEric.Saxe@Sun.COM if (cmt_sched_disabled)
9828906SEric.Saxe@Sun.COM return;
9838906SEric.Saxe@Sun.COM
9843434Sesaxe pgs = &cp->cpu_pg->pgs;
9853434Sesaxe group_iter_init(&i);
9863434Sesaxe
9873434Sesaxe /*
9883434Sesaxe * Iterate over the CPU's PGs
9893434Sesaxe */
9903434Sesaxe while ((pg = group_iterate(pgs, &i)) != NULL) {
9913434Sesaxe
9923434Sesaxe if (IS_CMT_PG(pg) == 0)
9933434Sesaxe continue;
9943434Sesaxe
99511389SAlexander.Kolbasov@Sun.COM /*
99611389SAlexander.Kolbasov@Sun.COM * Move to the next generation since topology is changing
99711389SAlexander.Kolbasov@Sun.COM */
99811389SAlexander.Kolbasov@Sun.COM ((pghw_t *)pg)->pghw_generation++;
99911389SAlexander.Kolbasov@Sun.COM
10003434Sesaxe err = group_add(&pg->cmt_cpus_actv, cp, GRP_NORESIZE);
10013434Sesaxe ASSERT(err == 0);
10023434Sesaxe
10033434Sesaxe /*
10043434Sesaxe * If this is the first active CPU in the PG, and it
10053434Sesaxe * represents a hardware sharing relationship over which
10063434Sesaxe * CMT load balancing is performed, add it as a candidate
10073434Sesaxe * for balancing with it's siblings.
10083434Sesaxe */
10093434Sesaxe if (GROUP_SIZE(&pg->cmt_cpus_actv) == 1 &&
10108906SEric.Saxe@Sun.COM (pg->cmt_policy & (CMT_BALANCE | CMT_COALESCE))) {
10113434Sesaxe err = group_add(pg->cmt_siblings, pg, GRP_NORESIZE);
10123434Sesaxe ASSERT(err == 0);
10138408SEric.Saxe@Sun.COM
10148408SEric.Saxe@Sun.COM /*
10158408SEric.Saxe@Sun.COM * If this is a top level PG, add it as a balancing
10168906SEric.Saxe@Sun.COM * candidate when balancing within the root lgroup.
10178408SEric.Saxe@Sun.COM */
10188906SEric.Saxe@Sun.COM if (pg->cmt_parent == NULL &&
10198906SEric.Saxe@Sun.COM pg->cmt_siblings != &cmt_root->cl_pgs) {
10208408SEric.Saxe@Sun.COM err = group_add(&cmt_root->cl_pgs, pg,
10218408SEric.Saxe@Sun.COM GRP_NORESIZE);
10228408SEric.Saxe@Sun.COM ASSERT(err == 0);
10238408SEric.Saxe@Sun.COM }
10243434Sesaxe }
10253434Sesaxe
10263434Sesaxe /*
10273434Sesaxe * Notate the CPU in the PGs active CPU bitset.
10283434Sesaxe * Also notate the PG as being active in it's associated
10293434Sesaxe * partition
10303434Sesaxe */
10313434Sesaxe bitset_add(&pg->cmt_cpus_actv_set, cp->cpu_seqid);
10323434Sesaxe bitset_add(&cp->cpu_part->cp_cmt_pgs, ((pg_t *)pg)->pg_id);
10333434Sesaxe }
10343434Sesaxe }
10353434Sesaxe
10363434Sesaxe /*
10373434Sesaxe * Class callback when a CPU goes inactive (offline)
10383434Sesaxe *
10393434Sesaxe * This is called in a context where CPUs are paused
10403434Sesaxe */
10413434Sesaxe static void
pg_cmt_cpu_inactive(cpu_t * cp)10423434Sesaxe pg_cmt_cpu_inactive(cpu_t *cp)
10433434Sesaxe {
10443434Sesaxe int err;
10453434Sesaxe group_t *pgs;
10463434Sesaxe pg_cmt_t *pg;
10473434Sesaxe cpu_t *cpp;
10483434Sesaxe group_iter_t i;
10493434Sesaxe pg_cpu_itr_t cpu_itr;
10503434Sesaxe boolean_t found;
10513434Sesaxe
10523434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock));
10533434Sesaxe
10548906SEric.Saxe@Sun.COM if (cmt_sched_disabled)
10558906SEric.Saxe@Sun.COM return;
10568906SEric.Saxe@Sun.COM
10573434Sesaxe pgs = &cp->cpu_pg->pgs;
10583434Sesaxe group_iter_init(&i);
10593434Sesaxe
10603434Sesaxe while ((pg = group_iterate(pgs, &i)) != NULL) {
10613434Sesaxe
10623434Sesaxe if (IS_CMT_PG(pg) == 0)
10633434Sesaxe continue;
10643434Sesaxe
10653434Sesaxe /*
106611389SAlexander.Kolbasov@Sun.COM * Move to the next generation since topology is changing
106711389SAlexander.Kolbasov@Sun.COM */
106811389SAlexander.Kolbasov@Sun.COM ((pghw_t *)pg)->pghw_generation++;
106911389SAlexander.Kolbasov@Sun.COM
107011389SAlexander.Kolbasov@Sun.COM /*
10713434Sesaxe * Remove the CPU from the CMT PGs active CPU group
10723434Sesaxe * bitmap
10733434Sesaxe */
10743434Sesaxe err = group_remove(&pg->cmt_cpus_actv, cp, GRP_NORESIZE);
10753434Sesaxe ASSERT(err == 0);
10763434Sesaxe
10773434Sesaxe bitset_del(&pg->cmt_cpus_actv_set, cp->cpu_seqid);
10783434Sesaxe
10793434Sesaxe /*
10803434Sesaxe * If there are no more active CPUs in this PG over which
10813434Sesaxe * load was balanced, remove it as a balancing candidate.
10823434Sesaxe */
10833434Sesaxe if (GROUP_SIZE(&pg->cmt_cpus_actv) == 0 &&
10848906SEric.Saxe@Sun.COM (pg->cmt_policy & (CMT_BALANCE | CMT_COALESCE))) {
10853434Sesaxe err = group_remove(pg->cmt_siblings, pg, GRP_NORESIZE);
10863434Sesaxe ASSERT(err == 0);
10878408SEric.Saxe@Sun.COM
10888906SEric.Saxe@Sun.COM if (pg->cmt_parent == NULL &&
10898906SEric.Saxe@Sun.COM pg->cmt_siblings != &cmt_root->cl_pgs) {
10908408SEric.Saxe@Sun.COM err = group_remove(&cmt_root->cl_pgs, pg,
10918408SEric.Saxe@Sun.COM GRP_NORESIZE);
10928408SEric.Saxe@Sun.COM ASSERT(err == 0);
10938408SEric.Saxe@Sun.COM }
10943434Sesaxe }
10953434Sesaxe
10963434Sesaxe /*
10973434Sesaxe * Assert the number of active CPUs does not exceed
10983434Sesaxe * the total number of CPUs in the PG
10993434Sesaxe */
11003434Sesaxe ASSERT(GROUP_SIZE(&pg->cmt_cpus_actv) <=
11013434Sesaxe GROUP_SIZE(&((pg_t *)pg)->pg_cpus));
11023434Sesaxe
11033434Sesaxe /*
11043434Sesaxe * Update the PG bitset in the CPU's old partition
11053434Sesaxe */
11063434Sesaxe found = B_FALSE;
11073434Sesaxe PG_CPU_ITR_INIT(pg, cpu_itr);
11083434Sesaxe while ((cpp = pg_cpu_next(&cpu_itr)) != NULL) {
11093434Sesaxe if (cpp == cp)
11103434Sesaxe continue;
11113676Sesaxe if (CPU_ACTIVE(cpp) &&
11123676Sesaxe cpp->cpu_part->cp_id == cp->cpu_part->cp_id) {
11133434Sesaxe found = B_TRUE;
11143434Sesaxe break;
11153434Sesaxe }
11163434Sesaxe }
11173434Sesaxe if (!found) {
11183434Sesaxe bitset_del(&cp->cpu_part->cp_cmt_pgs,
11193434Sesaxe ((pg_t *)pg)->pg_id);
11203434Sesaxe }
11213434Sesaxe }
11223434Sesaxe }
11233434Sesaxe
11243434Sesaxe /*
11253434Sesaxe * Return non-zero if the CPU belongs in the given PG
11263434Sesaxe */
11273434Sesaxe static int
pg_cmt_cpu_belongs(pg_t * pg,cpu_t * cp)11283434Sesaxe pg_cmt_cpu_belongs(pg_t *pg, cpu_t *cp)
11293434Sesaxe {
11303434Sesaxe cpu_t *pg_cpu;
11313434Sesaxe
11323434Sesaxe pg_cpu = GROUP_ACCESS(&pg->pg_cpus, 0);
11333434Sesaxe
11343434Sesaxe ASSERT(pg_cpu != NULL);
11353434Sesaxe
11363434Sesaxe /*
11373434Sesaxe * The CPU belongs if, given the nature of the hardware sharing
11383434Sesaxe * relationship represented by the PG, the CPU has that
11393434Sesaxe * relationship with some other CPU already in the PG
11403434Sesaxe */
11413434Sesaxe if (pg_plat_cpus_share(cp, pg_cpu, ((pghw_t *)pg)->pghw_hw))
11423434Sesaxe return (1);
11433434Sesaxe
11443434Sesaxe return (0);
11453434Sesaxe }
11463434Sesaxe
11473434Sesaxe /*
11488906SEric.Saxe@Sun.COM * Sort the CPUs CMT hierarchy, where "size" is the number of levels.
11493434Sesaxe */
11503434Sesaxe static void
pg_cmt_hier_sort(pg_cmt_t ** hier,int size)11518906SEric.Saxe@Sun.COM pg_cmt_hier_sort(pg_cmt_t **hier, int size)
11523434Sesaxe {
115310947SSrihari.Venkatesan@Sun.COM int i, j, inc, sz;
115410947SSrihari.Venkatesan@Sun.COM int start, end;
11558906SEric.Saxe@Sun.COM pg_t *tmp;
11568906SEric.Saxe@Sun.COM pg_t **h = (pg_t **)hier;
11573434Sesaxe
11588906SEric.Saxe@Sun.COM /*
11598906SEric.Saxe@Sun.COM * First sort by number of CPUs
11608906SEric.Saxe@Sun.COM */
11618906SEric.Saxe@Sun.COM inc = size / 2;
11628906SEric.Saxe@Sun.COM while (inc > 0) {
11638906SEric.Saxe@Sun.COM for (i = inc; i < size; i++) {
11648906SEric.Saxe@Sun.COM j = i;
11658906SEric.Saxe@Sun.COM tmp = h[i];
11668906SEric.Saxe@Sun.COM while ((j >= inc) &&
11678906SEric.Saxe@Sun.COM (PG_NUM_CPUS(h[j - inc]) > PG_NUM_CPUS(tmp))) {
11688906SEric.Saxe@Sun.COM h[j] = h[j - inc];
11698906SEric.Saxe@Sun.COM j = j - inc;
11703434Sesaxe }
11718906SEric.Saxe@Sun.COM h[j] = tmp;
11723434Sesaxe }
11738906SEric.Saxe@Sun.COM if (inc == 2)
11748906SEric.Saxe@Sun.COM inc = 1;
11758906SEric.Saxe@Sun.COM else
11768906SEric.Saxe@Sun.COM inc = (inc * 5) / 11;
11778906SEric.Saxe@Sun.COM }
11788906SEric.Saxe@Sun.COM
11798906SEric.Saxe@Sun.COM /*
11808906SEric.Saxe@Sun.COM * Break ties by asking the platform.
11818906SEric.Saxe@Sun.COM * Determine if h[i] outranks h[i + 1] and if so, swap them.
11828906SEric.Saxe@Sun.COM */
118310947SSrihari.Venkatesan@Sun.COM for (start = 0; start < size; start++) {
118410947SSrihari.Venkatesan@Sun.COM
118510947SSrihari.Venkatesan@Sun.COM /*
118610947SSrihari.Venkatesan@Sun.COM * Find various contiguous sets of elements,
118710947SSrihari.Venkatesan@Sun.COM * in the array, with the same number of cpus
118810947SSrihari.Venkatesan@Sun.COM */
118910947SSrihari.Venkatesan@Sun.COM end = start;
119010947SSrihari.Venkatesan@Sun.COM sz = PG_NUM_CPUS(h[start]);
119110947SSrihari.Venkatesan@Sun.COM while ((end < size) && (sz == PG_NUM_CPUS(h[end])))
119210947SSrihari.Venkatesan@Sun.COM end++;
119310947SSrihari.Venkatesan@Sun.COM /*
119410947SSrihari.Venkatesan@Sun.COM * Sort each such set of the array by rank
119510947SSrihari.Venkatesan@Sun.COM */
119610947SSrihari.Venkatesan@Sun.COM for (i = start + 1; i < end; i++) {
119710947SSrihari.Venkatesan@Sun.COM j = i - 1;
11988906SEric.Saxe@Sun.COM tmp = h[i];
119910947SSrihari.Venkatesan@Sun.COM while (j >= start &&
120010947SSrihari.Venkatesan@Sun.COM pg_cmt_hier_rank(hier[j],
120110947SSrihari.Venkatesan@Sun.COM (pg_cmt_t *)tmp) == hier[j]) {
120210947SSrihari.Venkatesan@Sun.COM h[j + 1] = h[j];
120310947SSrihari.Venkatesan@Sun.COM j--;
120410947SSrihari.Venkatesan@Sun.COM }
120510947SSrihari.Venkatesan@Sun.COM h[j + 1] = tmp;
12068906SEric.Saxe@Sun.COM }
12073434Sesaxe }
12083434Sesaxe }
12093434Sesaxe
12103434Sesaxe /*
12113434Sesaxe * Return a cmt_lgrp_t * given an lgroup handle.
12123434Sesaxe */
12133434Sesaxe static cmt_lgrp_t *
pg_cmt_find_lgrp(lgrp_handle_t hand)12143434Sesaxe pg_cmt_find_lgrp(lgrp_handle_t hand)
12153434Sesaxe {
12163434Sesaxe cmt_lgrp_t *lgrp;
12173434Sesaxe
12183434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock));
12193434Sesaxe
12203434Sesaxe lgrp = cmt_lgrps;
12213434Sesaxe while (lgrp != NULL) {
12223434Sesaxe if (lgrp->cl_hand == hand)
12233676Sesaxe break;
12243434Sesaxe lgrp = lgrp->cl_next;
12253434Sesaxe }
12263676Sesaxe return (lgrp);
12273676Sesaxe }
12283434Sesaxe
12293676Sesaxe /*
12303676Sesaxe * Create a cmt_lgrp_t with the specified handle.
12313676Sesaxe */
12323676Sesaxe static cmt_lgrp_t *
pg_cmt_lgrp_create(lgrp_handle_t hand)12333676Sesaxe pg_cmt_lgrp_create(lgrp_handle_t hand)
12343676Sesaxe {
12353676Sesaxe cmt_lgrp_t *lgrp;
12363676Sesaxe
12373676Sesaxe ASSERT(MUTEX_HELD(&cpu_lock));
12383676Sesaxe
12393434Sesaxe lgrp = kmem_zalloc(sizeof (cmt_lgrp_t), KM_SLEEP);
12403434Sesaxe
12413434Sesaxe lgrp->cl_hand = hand;
12423434Sesaxe lgrp->cl_npgs = 0;
12433434Sesaxe lgrp->cl_next = cmt_lgrps;
12443434Sesaxe cmt_lgrps = lgrp;
12453434Sesaxe group_create(&lgrp->cl_pgs);
12463434Sesaxe
12473434Sesaxe return (lgrp);
12483434Sesaxe }
12498408SEric.Saxe@Sun.COM
12508408SEric.Saxe@Sun.COM /*
12518906SEric.Saxe@Sun.COM * Interfaces to enable and disable power aware dispatching
12528906SEric.Saxe@Sun.COM * The caller must be holding cpu_lock.
12538408SEric.Saxe@Sun.COM *
12548906SEric.Saxe@Sun.COM * Return 0 on success and -1 on failure.
12558408SEric.Saxe@Sun.COM */
12568906SEric.Saxe@Sun.COM int
cmt_pad_enable(pghw_type_t type)12578906SEric.Saxe@Sun.COM cmt_pad_enable(pghw_type_t type)
12588408SEric.Saxe@Sun.COM {
12598906SEric.Saxe@Sun.COM group_t *hwset;
12608906SEric.Saxe@Sun.COM group_iter_t iter;
12618906SEric.Saxe@Sun.COM pg_cmt_t *pg;
12628906SEric.Saxe@Sun.COM
12638906SEric.Saxe@Sun.COM ASSERT(PGHW_IS_PM_DOMAIN(type));
12648906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock));
12658408SEric.Saxe@Sun.COM
12668906SEric.Saxe@Sun.COM if ((hwset = pghw_set_lookup(type)) == NULL ||
12678906SEric.Saxe@Sun.COM cmt_hw_blacklisted[type]) {
12688906SEric.Saxe@Sun.COM /*
12698906SEric.Saxe@Sun.COM * Unable to find any instances of the specified type
12708906SEric.Saxe@Sun.COM * of power domain, or the power domains have been blacklisted.
12718906SEric.Saxe@Sun.COM */
12728906SEric.Saxe@Sun.COM return (-1);
12738906SEric.Saxe@Sun.COM }
12748408SEric.Saxe@Sun.COM
12758408SEric.Saxe@Sun.COM /*
12768906SEric.Saxe@Sun.COM * Iterate over the power domains, setting the default dispatcher
12778906SEric.Saxe@Sun.COM * policy for power/performance optimization.
12788906SEric.Saxe@Sun.COM *
12798906SEric.Saxe@Sun.COM * Simply setting the policy isn't enough in the case where the power
12808906SEric.Saxe@Sun.COM * domain is an only child of another PG. Because the dispatcher walks
12818906SEric.Saxe@Sun.COM * the PG hierarchy in a top down fashion, the higher up PG's policy
12828906SEric.Saxe@Sun.COM * will dominate. So promote the power domain above it's parent if both
12838906SEric.Saxe@Sun.COM * PG and it's parent have the same CPUs to ensure it's policy
12848906SEric.Saxe@Sun.COM * dominates.
12858408SEric.Saxe@Sun.COM */
12868906SEric.Saxe@Sun.COM group_iter_init(&iter);
12878906SEric.Saxe@Sun.COM while ((pg = group_iterate(hwset, &iter)) != NULL) {
12888906SEric.Saxe@Sun.COM /*
12898906SEric.Saxe@Sun.COM * If the power domain is an only child to a parent
12908906SEric.Saxe@Sun.COM * not implementing the same policy, promote the child
12918906SEric.Saxe@Sun.COM * above the parent to activate the policy.
12928906SEric.Saxe@Sun.COM */
12938906SEric.Saxe@Sun.COM pg->cmt_policy = pg_cmt_policy(((pghw_t *)pg)->pghw_hw);
12948906SEric.Saxe@Sun.COM while ((pg->cmt_parent != NULL) &&
12958906SEric.Saxe@Sun.COM (pg->cmt_parent->cmt_policy != pg->cmt_policy) &&
12968906SEric.Saxe@Sun.COM (PG_NUM_CPUS((pg_t *)pg) ==
12978906SEric.Saxe@Sun.COM PG_NUM_CPUS((pg_t *)pg->cmt_parent))) {
12989438SEric.Saxe@Sun.COM cmt_hier_promote(pg, NULL);
12998906SEric.Saxe@Sun.COM }
13008906SEric.Saxe@Sun.COM }
13018906SEric.Saxe@Sun.COM
13028906SEric.Saxe@Sun.COM return (0);
13038906SEric.Saxe@Sun.COM }
13048408SEric.Saxe@Sun.COM
13058906SEric.Saxe@Sun.COM int
cmt_pad_disable(pghw_type_t type)13068906SEric.Saxe@Sun.COM cmt_pad_disable(pghw_type_t type)
13078906SEric.Saxe@Sun.COM {
13088906SEric.Saxe@Sun.COM group_t *hwset;
13098906SEric.Saxe@Sun.COM group_iter_t iter;
13108906SEric.Saxe@Sun.COM pg_cmt_t *pg;
13118906SEric.Saxe@Sun.COM pg_cmt_t *child;
13128906SEric.Saxe@Sun.COM
13138906SEric.Saxe@Sun.COM ASSERT(PGHW_IS_PM_DOMAIN(type));
13148906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock));
13158906SEric.Saxe@Sun.COM
13168906SEric.Saxe@Sun.COM if ((hwset = pghw_set_lookup(type)) == NULL) {
13178906SEric.Saxe@Sun.COM /*
13188906SEric.Saxe@Sun.COM * Unable to find any instances of the specified type of
13198906SEric.Saxe@Sun.COM * power domain.
13208906SEric.Saxe@Sun.COM */
13218906SEric.Saxe@Sun.COM return (-1);
13228906SEric.Saxe@Sun.COM }
13238408SEric.Saxe@Sun.COM /*
13248906SEric.Saxe@Sun.COM * Iterate over the power domains, setting the default dispatcher
13258906SEric.Saxe@Sun.COM * policy for performance optimization (load balancing).
13268408SEric.Saxe@Sun.COM */
13278906SEric.Saxe@Sun.COM group_iter_init(&iter);
13288906SEric.Saxe@Sun.COM while ((pg = group_iterate(hwset, &iter)) != NULL) {
13298408SEric.Saxe@Sun.COM
13308408SEric.Saxe@Sun.COM /*
13318906SEric.Saxe@Sun.COM * If the power domain has an only child that implements
13328906SEric.Saxe@Sun.COM * policy other than load balancing, promote the child
13338906SEric.Saxe@Sun.COM * above the power domain to ensure it's policy dominates.
13348408SEric.Saxe@Sun.COM */
13358969SEric.Saxe@Sun.COM if (pg->cmt_children != NULL &&
13368969SEric.Saxe@Sun.COM GROUP_SIZE(pg->cmt_children) == 1) {
13378906SEric.Saxe@Sun.COM child = GROUP_ACCESS(pg->cmt_children, 0);
13388906SEric.Saxe@Sun.COM if ((child->cmt_policy & CMT_BALANCE) == 0) {
13399438SEric.Saxe@Sun.COM cmt_hier_promote(child, NULL);
13408906SEric.Saxe@Sun.COM }
13418906SEric.Saxe@Sun.COM }
13428906SEric.Saxe@Sun.COM pg->cmt_policy = CMT_BALANCE;
13438906SEric.Saxe@Sun.COM }
13448906SEric.Saxe@Sun.COM return (0);
13458906SEric.Saxe@Sun.COM }
13468906SEric.Saxe@Sun.COM
13478906SEric.Saxe@Sun.COM /* ARGSUSED */
13488906SEric.Saxe@Sun.COM static void
cmt_ev_thread_swtch(pg_t * pg,cpu_t * cp,hrtime_t now,kthread_t * old,kthread_t * new)13498906SEric.Saxe@Sun.COM cmt_ev_thread_swtch(pg_t *pg, cpu_t *cp, hrtime_t now, kthread_t *old,
13508906SEric.Saxe@Sun.COM kthread_t *new)
13518906SEric.Saxe@Sun.COM {
13528906SEric.Saxe@Sun.COM pg_cmt_t *cmt_pg = (pg_cmt_t *)pg;
13538906SEric.Saxe@Sun.COM
13548906SEric.Saxe@Sun.COM if (old == cp->cpu_idle_thread) {
13558906SEric.Saxe@Sun.COM atomic_add_32(&cmt_pg->cmt_utilization, 1);
13568906SEric.Saxe@Sun.COM } else if (new == cp->cpu_idle_thread) {
13578906SEric.Saxe@Sun.COM atomic_add_32(&cmt_pg->cmt_utilization, -1);
13588906SEric.Saxe@Sun.COM }
13598906SEric.Saxe@Sun.COM }
13608906SEric.Saxe@Sun.COM
13618906SEric.Saxe@Sun.COM /*
13628906SEric.Saxe@Sun.COM * Macro to test whether a thread is currently runnable on a CPU in a PG.
13638906SEric.Saxe@Sun.COM */
13648906SEric.Saxe@Sun.COM #define THREAD_RUNNABLE_IN_PG(t, pg) \
13658906SEric.Saxe@Sun.COM ((t)->t_state == TS_RUN && \
13668906SEric.Saxe@Sun.COM (t)->t_disp_queue->disp_cpu && \
13678906SEric.Saxe@Sun.COM bitset_in_set(&(pg)->cmt_cpus_actv_set, \
13688906SEric.Saxe@Sun.COM (t)->t_disp_queue->disp_cpu->cpu_seqid))
13698906SEric.Saxe@Sun.COM
13708906SEric.Saxe@Sun.COM static void
cmt_ev_thread_swtch_pwr(pg_t * pg,cpu_t * cp,hrtime_t now,kthread_t * old,kthread_t * new)13718906SEric.Saxe@Sun.COM cmt_ev_thread_swtch_pwr(pg_t *pg, cpu_t *cp, hrtime_t now, kthread_t *old,
13728906SEric.Saxe@Sun.COM kthread_t *new)
13738906SEric.Saxe@Sun.COM {
13748906SEric.Saxe@Sun.COM pg_cmt_t *cmt = (pg_cmt_t *)pg;
13758906SEric.Saxe@Sun.COM cpupm_domain_t *dom;
13768906SEric.Saxe@Sun.COM uint32_t u;
13778906SEric.Saxe@Sun.COM
13788906SEric.Saxe@Sun.COM if (old == cp->cpu_idle_thread) {
13798906SEric.Saxe@Sun.COM ASSERT(new != cp->cpu_idle_thread);
13808906SEric.Saxe@Sun.COM u = atomic_add_32_nv(&cmt->cmt_utilization, 1);
13818906SEric.Saxe@Sun.COM if (u == 1) {
13828906SEric.Saxe@Sun.COM /*
13838906SEric.Saxe@Sun.COM * Notify the CPU power manager that the domain
13848906SEric.Saxe@Sun.COM * is non-idle.
13858906SEric.Saxe@Sun.COM */
13868906SEric.Saxe@Sun.COM dom = (cpupm_domain_t *)cmt->cmt_pg.pghw_handle;
13878906SEric.Saxe@Sun.COM cpupm_utilization_event(cp, now, dom,
13888906SEric.Saxe@Sun.COM CPUPM_DOM_BUSY_FROM_IDLE);
13898906SEric.Saxe@Sun.COM }
13908906SEric.Saxe@Sun.COM } else if (new == cp->cpu_idle_thread) {
13918906SEric.Saxe@Sun.COM ASSERT(old != cp->cpu_idle_thread);
13928906SEric.Saxe@Sun.COM u = atomic_add_32_nv(&cmt->cmt_utilization, -1);
13938906SEric.Saxe@Sun.COM if (u == 0) {
13948906SEric.Saxe@Sun.COM /*
13958906SEric.Saxe@Sun.COM * The domain is idle, notify the CPU power
13968906SEric.Saxe@Sun.COM * manager.
13978906SEric.Saxe@Sun.COM *
13988906SEric.Saxe@Sun.COM * Avoid notifying if the thread is simply migrating
13998906SEric.Saxe@Sun.COM * between CPUs in the domain.
14008906SEric.Saxe@Sun.COM */
14018906SEric.Saxe@Sun.COM if (!THREAD_RUNNABLE_IN_PG(old, cmt)) {
14028906SEric.Saxe@Sun.COM dom = (cpupm_domain_t *)cmt->cmt_pg.pghw_handle;
14038906SEric.Saxe@Sun.COM cpupm_utilization_event(cp, now, dom,
14048906SEric.Saxe@Sun.COM CPUPM_DOM_IDLE_FROM_BUSY);
14058906SEric.Saxe@Sun.COM }
14068906SEric.Saxe@Sun.COM }
14078906SEric.Saxe@Sun.COM }
14088906SEric.Saxe@Sun.COM }
14098906SEric.Saxe@Sun.COM
14108906SEric.Saxe@Sun.COM /* ARGSUSED */
14118906SEric.Saxe@Sun.COM static void
cmt_ev_thread_remain_pwr(pg_t * pg,cpu_t * cp,kthread_t * t)14128906SEric.Saxe@Sun.COM cmt_ev_thread_remain_pwr(pg_t *pg, cpu_t *cp, kthread_t *t)
14138906SEric.Saxe@Sun.COM {
14148906SEric.Saxe@Sun.COM pg_cmt_t *cmt = (pg_cmt_t *)pg;
14158906SEric.Saxe@Sun.COM cpupm_domain_t *dom;
14168906SEric.Saxe@Sun.COM
14178906SEric.Saxe@Sun.COM dom = (cpupm_domain_t *)cmt->cmt_pg.pghw_handle;
14188906SEric.Saxe@Sun.COM cpupm_utilization_event(cp, (hrtime_t)0, dom, CPUPM_DOM_REMAIN_BUSY);
14198906SEric.Saxe@Sun.COM }
14208906SEric.Saxe@Sun.COM
14218906SEric.Saxe@Sun.COM /*
14228906SEric.Saxe@Sun.COM * Return the name of the CMT scheduling policy
14238906SEric.Saxe@Sun.COM * being implemented across this PG
14248906SEric.Saxe@Sun.COM */
14258906SEric.Saxe@Sun.COM static char *
pg_cmt_policy_name(pg_t * pg)14268906SEric.Saxe@Sun.COM pg_cmt_policy_name(pg_t *pg)
14278906SEric.Saxe@Sun.COM {
14288906SEric.Saxe@Sun.COM pg_cmt_policy_t policy;
14298906SEric.Saxe@Sun.COM
14308906SEric.Saxe@Sun.COM policy = ((pg_cmt_t *)pg)->cmt_policy;
14318906SEric.Saxe@Sun.COM
14328906SEric.Saxe@Sun.COM if (policy & CMT_AFFINITY) {
14338906SEric.Saxe@Sun.COM if (policy & CMT_BALANCE)
14348906SEric.Saxe@Sun.COM return ("Load Balancing & Affinity");
14358906SEric.Saxe@Sun.COM else if (policy & CMT_COALESCE)
14368906SEric.Saxe@Sun.COM return ("Load Coalescence & Affinity");
14378906SEric.Saxe@Sun.COM else
14388906SEric.Saxe@Sun.COM return ("Affinity");
14398906SEric.Saxe@Sun.COM } else {
14408906SEric.Saxe@Sun.COM if (policy & CMT_BALANCE)
14418906SEric.Saxe@Sun.COM return ("Load Balancing");
14428906SEric.Saxe@Sun.COM else if (policy & CMT_COALESCE)
14438906SEric.Saxe@Sun.COM return ("Load Coalescence");
14448906SEric.Saxe@Sun.COM else
14458906SEric.Saxe@Sun.COM return ("None");
14468906SEric.Saxe@Sun.COM }
14478906SEric.Saxe@Sun.COM }
14488906SEric.Saxe@Sun.COM
14498906SEric.Saxe@Sun.COM /*
14508906SEric.Saxe@Sun.COM * Prune PG, and all other instances of PG's hardware sharing relationship
14519746SEric.Saxe@Sun.COM * from the CMT PG hierarchy.
14529438SEric.Saxe@Sun.COM *
14539438SEric.Saxe@Sun.COM * This routine operates on the CPU specific processor group data (for the CPUs
14549438SEric.Saxe@Sun.COM * in the PG being pruned), and may be invoked from a context where one CPU's
14559438SEric.Saxe@Sun.COM * PG data is under construction. In this case the argument "pgdata", if not
14569438SEric.Saxe@Sun.COM * NULL, is a reference to the CPU's under-construction PG data.
14578906SEric.Saxe@Sun.COM */
14588906SEric.Saxe@Sun.COM static int
pg_cmt_prune(pg_cmt_t * pg_bad,pg_cmt_t ** lineage,int * sz,cpu_pg_t * pgdata)14599438SEric.Saxe@Sun.COM pg_cmt_prune(pg_cmt_t *pg_bad, pg_cmt_t **lineage, int *sz, cpu_pg_t *pgdata)
14608906SEric.Saxe@Sun.COM {
14618906SEric.Saxe@Sun.COM group_t *hwset, *children;
14628906SEric.Saxe@Sun.COM int i, j, r, size = *sz;
14638906SEric.Saxe@Sun.COM group_iter_t hw_iter, child_iter;
14648906SEric.Saxe@Sun.COM pg_cpu_itr_t cpu_iter;
14658906SEric.Saxe@Sun.COM pg_cmt_t *pg, *child;
14668906SEric.Saxe@Sun.COM cpu_t *cpu;
14678906SEric.Saxe@Sun.COM int cap_needed;
14688906SEric.Saxe@Sun.COM pghw_type_t hw;
14698906SEric.Saxe@Sun.COM
14708906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock));
14718906SEric.Saxe@Sun.COM
1472*13124SAlexander.Kolbasov@Sun.COM /*
1473*13124SAlexander.Kolbasov@Sun.COM * Inform pghw layer that this PG is pruned.
1474*13124SAlexander.Kolbasov@Sun.COM */
1475*13124SAlexander.Kolbasov@Sun.COM pghw_cmt_fini((pghw_t *)pg_bad);
1476*13124SAlexander.Kolbasov@Sun.COM
14778906SEric.Saxe@Sun.COM hw = ((pghw_t *)pg_bad)->pghw_hw;
14788906SEric.Saxe@Sun.COM
14798906SEric.Saxe@Sun.COM if (hw == PGHW_POW_ACTIVE) {
14808906SEric.Saxe@Sun.COM cmn_err(CE_NOTE, "!Active CPUPM domain groups look suspect. "
14818906SEric.Saxe@Sun.COM "Event Based CPUPM Unavailable");
14828906SEric.Saxe@Sun.COM } else if (hw == PGHW_POW_IDLE) {
14838906SEric.Saxe@Sun.COM cmn_err(CE_NOTE, "!Idle CPUPM domain groups look suspect. "
14848906SEric.Saxe@Sun.COM "Dispatcher assisted CPUPM disabled.");
14858906SEric.Saxe@Sun.COM }
14868906SEric.Saxe@Sun.COM
14878906SEric.Saxe@Sun.COM /*
14888906SEric.Saxe@Sun.COM * Find and eliminate the PG from the lineage.
14898906SEric.Saxe@Sun.COM */
14908906SEric.Saxe@Sun.COM for (i = 0; i < size; i++) {
14918906SEric.Saxe@Sun.COM if (lineage[i] == pg_bad) {
14928906SEric.Saxe@Sun.COM for (j = i; j < size - 1; j++)
14938906SEric.Saxe@Sun.COM lineage[j] = lineage[j + 1];
14948906SEric.Saxe@Sun.COM *sz = size - 1;
14958906SEric.Saxe@Sun.COM break;
14968906SEric.Saxe@Sun.COM }
14978906SEric.Saxe@Sun.COM }
14988906SEric.Saxe@Sun.COM
14998906SEric.Saxe@Sun.COM /*
15008906SEric.Saxe@Sun.COM * We'll prune all instances of the hardware sharing relationship
15018906SEric.Saxe@Sun.COM * represented by pg. But before we do that (and pause CPUs) we need
15028906SEric.Saxe@Sun.COM * to ensure the hierarchy's groups are properly sized.
15038906SEric.Saxe@Sun.COM */
15048906SEric.Saxe@Sun.COM hwset = pghw_set_lookup(hw);
15058906SEric.Saxe@Sun.COM
15068906SEric.Saxe@Sun.COM /*
15079746SEric.Saxe@Sun.COM * Blacklist the hardware so future processor groups of this type won't
15089746SEric.Saxe@Sun.COM * participate in CMT thread placement.
15099746SEric.Saxe@Sun.COM *
15109746SEric.Saxe@Sun.COM * XXX
15119746SEric.Saxe@Sun.COM * For heterogeneous system configurations, this might be overkill.
15129746SEric.Saxe@Sun.COM * We may only need to blacklist the illegal PGs, and other instances
15139746SEric.Saxe@Sun.COM * of this hardware sharing relationship may be ok.
15148906SEric.Saxe@Sun.COM */
15158906SEric.Saxe@Sun.COM cmt_hw_blacklisted[hw] = 1;
15168906SEric.Saxe@Sun.COM
15178906SEric.Saxe@Sun.COM /*
15188906SEric.Saxe@Sun.COM * For each of the PGs being pruned, ensure sufficient capacity in
15198906SEric.Saxe@Sun.COM * the siblings set for the PG's children
15208906SEric.Saxe@Sun.COM */
15218906SEric.Saxe@Sun.COM group_iter_init(&hw_iter);
15228906SEric.Saxe@Sun.COM while ((pg = group_iterate(hwset, &hw_iter)) != NULL) {
15238906SEric.Saxe@Sun.COM /*
15248906SEric.Saxe@Sun.COM * PG is being pruned, but if it is bringing up more than
15258906SEric.Saxe@Sun.COM * one child, ask for more capacity in the siblings group.
15268906SEric.Saxe@Sun.COM */
15278906SEric.Saxe@Sun.COM cap_needed = 0;
15288906SEric.Saxe@Sun.COM if (pg->cmt_children &&
15298906SEric.Saxe@Sun.COM GROUP_SIZE(pg->cmt_children) > 1) {
15308906SEric.Saxe@Sun.COM cap_needed = GROUP_SIZE(pg->cmt_children) - 1;
15318906SEric.Saxe@Sun.COM
15328906SEric.Saxe@Sun.COM group_expand(pg->cmt_siblings,
15338906SEric.Saxe@Sun.COM GROUP_SIZE(pg->cmt_siblings) + cap_needed);
15348408SEric.Saxe@Sun.COM
15358408SEric.Saxe@Sun.COM /*
15368906SEric.Saxe@Sun.COM * If this is a top level group, also ensure the
15378906SEric.Saxe@Sun.COM * capacity in the root lgrp level CMT grouping.
15388408SEric.Saxe@Sun.COM */
15398906SEric.Saxe@Sun.COM if (pg->cmt_parent == NULL &&
15408906SEric.Saxe@Sun.COM pg->cmt_siblings != &cmt_root->cl_pgs) {
15418906SEric.Saxe@Sun.COM group_expand(&cmt_root->cl_pgs,
15428906SEric.Saxe@Sun.COM GROUP_SIZE(&cmt_root->cl_pgs) + cap_needed);
15439746SEric.Saxe@Sun.COM cmt_root->cl_npgs += cap_needed;
15448408SEric.Saxe@Sun.COM }
15458906SEric.Saxe@Sun.COM }
15468906SEric.Saxe@Sun.COM }
15478408SEric.Saxe@Sun.COM
15488906SEric.Saxe@Sun.COM /*
15498906SEric.Saxe@Sun.COM * We're operating on the PG hierarchy. Pause CPUs to ensure
15508906SEric.Saxe@Sun.COM * exclusivity with respect to the dispatcher.
15518906SEric.Saxe@Sun.COM */
15528906SEric.Saxe@Sun.COM pause_cpus(NULL);
15538408SEric.Saxe@Sun.COM
15548906SEric.Saxe@Sun.COM /*
15558906SEric.Saxe@Sun.COM * Prune all PG instances of the hardware sharing relationship
15568906SEric.Saxe@Sun.COM * represented by pg.
15578906SEric.Saxe@Sun.COM */
15588906SEric.Saxe@Sun.COM group_iter_init(&hw_iter);
15598906SEric.Saxe@Sun.COM while ((pg = group_iterate(hwset, &hw_iter)) != NULL) {
15608408SEric.Saxe@Sun.COM
15618408SEric.Saxe@Sun.COM /*
15628906SEric.Saxe@Sun.COM * Remove PG from it's group of siblings, if it's there.
15638906SEric.Saxe@Sun.COM */
15648906SEric.Saxe@Sun.COM if (pg->cmt_siblings) {
15658906SEric.Saxe@Sun.COM (void) group_remove(pg->cmt_siblings, pg, GRP_NORESIZE);
15668906SEric.Saxe@Sun.COM }
15678906SEric.Saxe@Sun.COM if (pg->cmt_parent == NULL &&
15688906SEric.Saxe@Sun.COM pg->cmt_siblings != &cmt_root->cl_pgs) {
15698906SEric.Saxe@Sun.COM (void) group_remove(&cmt_root->cl_pgs, pg,
15708906SEric.Saxe@Sun.COM GRP_NORESIZE);
15718906SEric.Saxe@Sun.COM }
15729746SEric.Saxe@Sun.COM
15739746SEric.Saxe@Sun.COM /*
15749746SEric.Saxe@Sun.COM * Indicate that no CMT policy will be implemented across
15759746SEric.Saxe@Sun.COM * this PG.
15769746SEric.Saxe@Sun.COM */
15779746SEric.Saxe@Sun.COM pg->cmt_policy = CMT_NO_POLICY;
15789746SEric.Saxe@Sun.COM
15798906SEric.Saxe@Sun.COM /*
15809036SEric.Saxe@Sun.COM * Move PG's children from it's children set to it's parent's
15819036SEric.Saxe@Sun.COM * children set. Note that the parent's children set, and PG's
15829036SEric.Saxe@Sun.COM * siblings set are the same thing.
15839036SEric.Saxe@Sun.COM *
15849036SEric.Saxe@Sun.COM * Because we are iterating over the same group that we are
15859036SEric.Saxe@Sun.COM * operating on (removing the children), first add all of PG's
15869036SEric.Saxe@Sun.COM * children to the parent's children set, and once we are done
15879036SEric.Saxe@Sun.COM * iterating, empty PG's children set.
15888906SEric.Saxe@Sun.COM */
15898906SEric.Saxe@Sun.COM if (pg->cmt_children != NULL) {
15908906SEric.Saxe@Sun.COM children = pg->cmt_children;
15918906SEric.Saxe@Sun.COM
15928906SEric.Saxe@Sun.COM group_iter_init(&child_iter);
15938906SEric.Saxe@Sun.COM while ((child = group_iterate(children, &child_iter))
15948906SEric.Saxe@Sun.COM != NULL) {
15959036SEric.Saxe@Sun.COM if (pg->cmt_siblings != NULL) {
15968906SEric.Saxe@Sun.COM r = group_add(pg->cmt_siblings, child,
15978906SEric.Saxe@Sun.COM GRP_NORESIZE);
15988906SEric.Saxe@Sun.COM ASSERT(r == 0);
15999746SEric.Saxe@Sun.COM
16009746SEric.Saxe@Sun.COM if (pg->cmt_parent == NULL &&
16019746SEric.Saxe@Sun.COM pg->cmt_siblings !=
16029746SEric.Saxe@Sun.COM &cmt_root->cl_pgs) {
16039746SEric.Saxe@Sun.COM r = group_add(&cmt_root->cl_pgs,
16049746SEric.Saxe@Sun.COM child, GRP_NORESIZE);
16059746SEric.Saxe@Sun.COM ASSERT(r == 0);
16069746SEric.Saxe@Sun.COM }
16078906SEric.Saxe@Sun.COM }
16088906SEric.Saxe@Sun.COM }
16099036SEric.Saxe@Sun.COM group_empty(pg->cmt_children);
16108906SEric.Saxe@Sun.COM }
16118906SEric.Saxe@Sun.COM
16128906SEric.Saxe@Sun.COM /*
16138906SEric.Saxe@Sun.COM * Reset the callbacks to the defaults
16148906SEric.Saxe@Sun.COM */
16158906SEric.Saxe@Sun.COM pg_callback_set_defaults((pg_t *)pg);
16168906SEric.Saxe@Sun.COM
16178906SEric.Saxe@Sun.COM /*
16188906SEric.Saxe@Sun.COM * Update all the CPU lineages in each of PG's CPUs
16198408SEric.Saxe@Sun.COM */
16208906SEric.Saxe@Sun.COM PG_CPU_ITR_INIT(pg, cpu_iter);
16218906SEric.Saxe@Sun.COM while ((cpu = pg_cpu_next(&cpu_iter)) != NULL) {
16228906SEric.Saxe@Sun.COM pg_cmt_t *cpu_pg;
16238906SEric.Saxe@Sun.COM group_iter_t liter; /* Iterator for the lineage */
16249438SEric.Saxe@Sun.COM cpu_pg_t *cpd; /* CPU's PG data */
16259438SEric.Saxe@Sun.COM
16269438SEric.Saxe@Sun.COM /*
16279438SEric.Saxe@Sun.COM * The CPU's lineage is under construction still
16289438SEric.Saxe@Sun.COM * references the bootstrap CPU PG data structure.
16299438SEric.Saxe@Sun.COM */
16309438SEric.Saxe@Sun.COM if (pg_cpu_is_bootstrapped(cpu))
16319438SEric.Saxe@Sun.COM cpd = pgdata;
16329438SEric.Saxe@Sun.COM else
16339438SEric.Saxe@Sun.COM cpd = cpu->cpu_pg;
16348906SEric.Saxe@Sun.COM
16358906SEric.Saxe@Sun.COM /*
16368906SEric.Saxe@Sun.COM * Iterate over the CPU's PGs updating the children
16378906SEric.Saxe@Sun.COM * of the PG being promoted, since they have a new
16388906SEric.Saxe@Sun.COM * parent and siblings set.
16398906SEric.Saxe@Sun.COM */
16408906SEric.Saxe@Sun.COM group_iter_init(&liter);
16419438SEric.Saxe@Sun.COM while ((cpu_pg = group_iterate(&cpd->pgs,
16429438SEric.Saxe@Sun.COM &liter)) != NULL) {
16438906SEric.Saxe@Sun.COM if (cpu_pg->cmt_parent == pg) {
16448906SEric.Saxe@Sun.COM cpu_pg->cmt_parent = pg->cmt_parent;
16458906SEric.Saxe@Sun.COM cpu_pg->cmt_siblings = pg->cmt_siblings;
16468906SEric.Saxe@Sun.COM }
16478906SEric.Saxe@Sun.COM }
16488906SEric.Saxe@Sun.COM
16498906SEric.Saxe@Sun.COM /*
16508906SEric.Saxe@Sun.COM * Update the CPU's lineages
16519746SEric.Saxe@Sun.COM *
16529746SEric.Saxe@Sun.COM * Remove the PG from the CPU's group used for CMT
16539746SEric.Saxe@Sun.COM * scheduling.
16548906SEric.Saxe@Sun.COM */
16559438SEric.Saxe@Sun.COM (void) group_remove(&cpd->cmt_pgs, pg, GRP_NORESIZE);
16568408SEric.Saxe@Sun.COM }
16578906SEric.Saxe@Sun.COM }
16588906SEric.Saxe@Sun.COM start_cpus();
16598906SEric.Saxe@Sun.COM return (0);
16608906SEric.Saxe@Sun.COM }
16618906SEric.Saxe@Sun.COM
16628906SEric.Saxe@Sun.COM /*
16638906SEric.Saxe@Sun.COM * Disable CMT scheduling
16648906SEric.Saxe@Sun.COM */
16658906SEric.Saxe@Sun.COM static void
pg_cmt_disable(void)16668906SEric.Saxe@Sun.COM pg_cmt_disable(void)
16678906SEric.Saxe@Sun.COM {
16689438SEric.Saxe@Sun.COM cpu_t *cpu;
16699438SEric.Saxe@Sun.COM
16709438SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock));
16718906SEric.Saxe@Sun.COM
16728906SEric.Saxe@Sun.COM pause_cpus(NULL);
16738906SEric.Saxe@Sun.COM cpu = cpu_list;
16748906SEric.Saxe@Sun.COM
16758906SEric.Saxe@Sun.COM do {
16768906SEric.Saxe@Sun.COM if (cpu->cpu_pg)
16778906SEric.Saxe@Sun.COM group_empty(&cpu->cpu_pg->cmt_pgs);
16788906SEric.Saxe@Sun.COM } while ((cpu = cpu->cpu_next) != cpu_list);
16798906SEric.Saxe@Sun.COM
16808906SEric.Saxe@Sun.COM cmt_sched_disabled = 1;
16818906SEric.Saxe@Sun.COM start_cpus();
16828906SEric.Saxe@Sun.COM cmn_err(CE_NOTE, "!CMT thread placement optimizations unavailable");
16838906SEric.Saxe@Sun.COM }
16848408SEric.Saxe@Sun.COM
16859036SEric.Saxe@Sun.COM /*
16869036SEric.Saxe@Sun.COM * CMT lineage validation
16879036SEric.Saxe@Sun.COM *
16889036SEric.Saxe@Sun.COM * This routine is invoked by pg_cmt_cpu_init() to validate the integrity
16899036SEric.Saxe@Sun.COM * of the PGs in a CPU's lineage. This is necessary because it's possible that
16909036SEric.Saxe@Sun.COM * some groupings (power domain groupings in particular) may be defined by
16919036SEric.Saxe@Sun.COM * sources that are buggy (e.g. BIOS bugs). In such cases, it may not be
16929036SEric.Saxe@Sun.COM * possible to integrate those groupings into the CMT PG hierarchy, if doing
16939036SEric.Saxe@Sun.COM * so would violate the subset invariant of the hierarchy, which says that
16949036SEric.Saxe@Sun.COM * a PG must be subset of its parent (if it has one).
16959036SEric.Saxe@Sun.COM *
16969036SEric.Saxe@Sun.COM * pg_cmt_lineage_validate()'s purpose is to detect grouping definitions that
16979036SEric.Saxe@Sun.COM * would result in a violation of this invariant. If a violation is found,
16989036SEric.Saxe@Sun.COM * and the PG is of a grouping type who's definition is known to originate from
16999036SEric.Saxe@Sun.COM * suspect sources (BIOS), then pg_cmt_prune() will be invoked to prune the
170011263SEric.Saxe@Sun.COM * PG (and all other instances PG's sharing relationship type) from the CMT
17019036SEric.Saxe@Sun.COM * hierarchy. Further, future instances of that sharing relationship type won't
170211263SEric.Saxe@Sun.COM * be added. If the grouping definition doesn't originate from suspect
17039036SEric.Saxe@Sun.COM * sources, then pg_cmt_disable() will be invoked to log an error, and disable
17049036SEric.Saxe@Sun.COM * CMT scheduling altogether.
17059036SEric.Saxe@Sun.COM *
17069036SEric.Saxe@Sun.COM * This routine is invoked after the CPU has been added to the PGs in which
17079036SEric.Saxe@Sun.COM * it belongs, but before those PGs have been added to (or had their place
17089036SEric.Saxe@Sun.COM * adjusted in) the CMT PG hierarchy.
17099036SEric.Saxe@Sun.COM *
17109036SEric.Saxe@Sun.COM * The first argument is the CPUs PG lineage (essentially an array of PGs in
17119036SEric.Saxe@Sun.COM * which the CPU belongs) that has already been sorted in ascending order
17129036SEric.Saxe@Sun.COM * by CPU count. Some of the PGs in the CPUs lineage may already have other
17139036SEric.Saxe@Sun.COM * CPUs in them, and have already been integrated into the CMT hierarchy.
17149036SEric.Saxe@Sun.COM *
17159036SEric.Saxe@Sun.COM * The addition of this new CPU to these pre-existing PGs means that those
17169036SEric.Saxe@Sun.COM * PGs may need to be promoted up in the hierarchy to satisfy the subset
17179036SEric.Saxe@Sun.COM * invariant. In additon to testing the subset invariant for the lineage,
17189036SEric.Saxe@Sun.COM * this routine also verifies that the addition of the new CPU to the
17199036SEric.Saxe@Sun.COM * existing PGs wouldn't cause the subset invariant to be violated in
17209036SEric.Saxe@Sun.COM * the exiting lineages.
17219036SEric.Saxe@Sun.COM *
17229036SEric.Saxe@Sun.COM * This routine will normally return one of the following:
17239036SEric.Saxe@Sun.COM * CMT_LINEAGE_VALID - There were no problems detected with the lineage.
17249036SEric.Saxe@Sun.COM * CMT_LINEAGE_REPAIRED - Problems were detected, but repaired via pruning.
17259036SEric.Saxe@Sun.COM *
17269036SEric.Saxe@Sun.COM * Otherwise, this routine will return a value indicating which error it
17279036SEric.Saxe@Sun.COM * was unable to recover from (and set cmt_lineage_status along the way).
17289438SEric.Saxe@Sun.COM *
17299438SEric.Saxe@Sun.COM * This routine operates on the CPU specific processor group data (for the CPU
17309438SEric.Saxe@Sun.COM * whose lineage is being validated), which is under-construction.
17319438SEric.Saxe@Sun.COM * "pgdata" is a reference to the CPU's under-construction PG data.
17329438SEric.Saxe@Sun.COM * This routine must be careful to operate only on "pgdata", and not cp->cpu_pg.
17339036SEric.Saxe@Sun.COM */
17349036SEric.Saxe@Sun.COM static cmt_lineage_validation_t
pg_cmt_lineage_validate(pg_cmt_t ** lineage,int * sz,cpu_pg_t * pgdata)17359438SEric.Saxe@Sun.COM pg_cmt_lineage_validate(pg_cmt_t **lineage, int *sz, cpu_pg_t *pgdata)
17368906SEric.Saxe@Sun.COM {
17379036SEric.Saxe@Sun.COM int i, j, size;
173811263SEric.Saxe@Sun.COM pg_cmt_t *pg, *pg_next, *pg_bad, *pg_tmp, *parent;
17398906SEric.Saxe@Sun.COM cpu_t *cp;
17408906SEric.Saxe@Sun.COM pg_cpu_itr_t cpu_iter;
17419036SEric.Saxe@Sun.COM lgrp_handle_t lgrp;
17428906SEric.Saxe@Sun.COM
17438906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock));
17448906SEric.Saxe@Sun.COM
17458906SEric.Saxe@Sun.COM revalidate:
17468906SEric.Saxe@Sun.COM size = *sz;
17478906SEric.Saxe@Sun.COM pg_bad = NULL;
17489036SEric.Saxe@Sun.COM lgrp = LGRP_NULL_HANDLE;
17499036SEric.Saxe@Sun.COM for (i = 0; i < size; i++) {
17508906SEric.Saxe@Sun.COM
17518906SEric.Saxe@Sun.COM pg = lineage[i];
17529036SEric.Saxe@Sun.COM if (i < size - 1)
17539036SEric.Saxe@Sun.COM pg_next = lineage[i + 1];
17549036SEric.Saxe@Sun.COM else
17559036SEric.Saxe@Sun.COM pg_next = NULL;
17568408SEric.Saxe@Sun.COM
17578906SEric.Saxe@Sun.COM /*
17588906SEric.Saxe@Sun.COM * We assume that the lineage has already been sorted
17598906SEric.Saxe@Sun.COM * by the number of CPUs. In fact, we depend on it.
17608906SEric.Saxe@Sun.COM */
17619036SEric.Saxe@Sun.COM ASSERT(pg_next == NULL ||
17629036SEric.Saxe@Sun.COM (PG_NUM_CPUS((pg_t *)pg) <= PG_NUM_CPUS((pg_t *)pg_next)));
17638906SEric.Saxe@Sun.COM
17648906SEric.Saxe@Sun.COM /*
176511263SEric.Saxe@Sun.COM * The CPUs PG lineage was passed as the first argument to
176611263SEric.Saxe@Sun.COM * this routine and contains the sorted list of the CPU's
176711263SEric.Saxe@Sun.COM * PGs. Ultimately, the ordering of the PGs in that list, and
176811263SEric.Saxe@Sun.COM * the ordering as traversed by the cmt_parent list must be
176911263SEric.Saxe@Sun.COM * the same. PG promotion will be used as the mechanism to
177011263SEric.Saxe@Sun.COM * achieve this, but first we need to look for cases where
177111263SEric.Saxe@Sun.COM * promotion will be necessary, and validate that will be
177211263SEric.Saxe@Sun.COM * possible without violating the subset invarient described
177311263SEric.Saxe@Sun.COM * above.
17749036SEric.Saxe@Sun.COM *
17759036SEric.Saxe@Sun.COM * Since the PG topology is in the middle of being changed, we
17769036SEric.Saxe@Sun.COM * need to check whether the PG's existing parent (if any) is
177711263SEric.Saxe@Sun.COM * part of this CPU's lineage (and therefore should contain
177811263SEric.Saxe@Sun.COM * the new CPU). If not, it means that the addition of the
177911263SEric.Saxe@Sun.COM * new CPU should have made this PG have more CPUs than its
178011263SEric.Saxe@Sun.COM * parent (and other ancestors not in the same lineage) and
178111263SEric.Saxe@Sun.COM * will need to be promoted into place.
178211263SEric.Saxe@Sun.COM *
178311263SEric.Saxe@Sun.COM * We need to verify all of this to defend against a buggy
17849036SEric.Saxe@Sun.COM * BIOS giving bad power domain CPU groupings. Sigh.
17859036SEric.Saxe@Sun.COM */
178611263SEric.Saxe@Sun.COM parent = pg->cmt_parent;
178711263SEric.Saxe@Sun.COM while (parent != NULL) {
17889036SEric.Saxe@Sun.COM /*
178911263SEric.Saxe@Sun.COM * Determine if the parent/ancestor is in this lineage
17909036SEric.Saxe@Sun.COM */
179111263SEric.Saxe@Sun.COM pg_tmp = NULL;
179211263SEric.Saxe@Sun.COM for (j = 0; (j < size) && (pg_tmp != parent); j++) {
17939036SEric.Saxe@Sun.COM pg_tmp = lineage[j];
17949036SEric.Saxe@Sun.COM }
179511263SEric.Saxe@Sun.COM if (pg_tmp == parent) {
17969036SEric.Saxe@Sun.COM /*
179711263SEric.Saxe@Sun.COM * It's in the lineage. The concentricity
179811263SEric.Saxe@Sun.COM * checks will handle the rest.
17999036SEric.Saxe@Sun.COM */
180011263SEric.Saxe@Sun.COM break;
18019036SEric.Saxe@Sun.COM }
180211263SEric.Saxe@Sun.COM /*
180311263SEric.Saxe@Sun.COM * If it is not in the lineage, PG will eventually
180411263SEric.Saxe@Sun.COM * need to be promoted above it. Verify the ancestor
180511263SEric.Saxe@Sun.COM * is a proper subset. There is still an error if
180611263SEric.Saxe@Sun.COM * the ancestor has the same number of CPUs as PG,
180711263SEric.Saxe@Sun.COM * since that would imply it should be in the lineage,
180811263SEric.Saxe@Sun.COM * and we already know it isn't.
180911263SEric.Saxe@Sun.COM */
181011263SEric.Saxe@Sun.COM if (PG_NUM_CPUS((pg_t *)parent) >=
181111263SEric.Saxe@Sun.COM PG_NUM_CPUS((pg_t *)pg)) {
181211263SEric.Saxe@Sun.COM /*
181311263SEric.Saxe@Sun.COM * Not a proper subset if the parent/ancestor
181411263SEric.Saxe@Sun.COM * has the same or more CPUs than PG.
181511263SEric.Saxe@Sun.COM */
181611263SEric.Saxe@Sun.COM cmt_lineage_status = CMT_LINEAGE_NON_PROMOTABLE;
181711263SEric.Saxe@Sun.COM goto handle_error;
181811263SEric.Saxe@Sun.COM }
181911263SEric.Saxe@Sun.COM parent = parent->cmt_parent;
18209036SEric.Saxe@Sun.COM }
18219036SEric.Saxe@Sun.COM
18229036SEric.Saxe@Sun.COM /*
18239036SEric.Saxe@Sun.COM * Walk each of the CPUs in the PGs group and perform
18249036SEric.Saxe@Sun.COM * consistency checks along the way.
18258906SEric.Saxe@Sun.COM */
18268906SEric.Saxe@Sun.COM PG_CPU_ITR_INIT((pg_t *)pg, cpu_iter);
18278906SEric.Saxe@Sun.COM while ((cp = pg_cpu_next(&cpu_iter)) != NULL) {
18289036SEric.Saxe@Sun.COM /*
18299036SEric.Saxe@Sun.COM * Verify that there aren't any CPUs contained in PG
18309036SEric.Saxe@Sun.COM * that the next PG in the lineage (which is larger
18319036SEric.Saxe@Sun.COM * or same size) doesn't also contain.
18329036SEric.Saxe@Sun.COM */
18339036SEric.Saxe@Sun.COM if (pg_next != NULL &&
18349036SEric.Saxe@Sun.COM pg_cpu_find((pg_t *)pg_next, cp) == B_FALSE) {
18358906SEric.Saxe@Sun.COM cmt_lineage_status = CMT_LINEAGE_NON_CONCENTRIC;
18368906SEric.Saxe@Sun.COM goto handle_error;
18378906SEric.Saxe@Sun.COM }
18389036SEric.Saxe@Sun.COM
18399036SEric.Saxe@Sun.COM /*
18409036SEric.Saxe@Sun.COM * Verify that all the CPUs in the PG are in the same
18419036SEric.Saxe@Sun.COM * lgroup.
18429036SEric.Saxe@Sun.COM */
18439036SEric.Saxe@Sun.COM if (lgrp == LGRP_NULL_HANDLE) {
18449036SEric.Saxe@Sun.COM lgrp = lgrp_plat_cpu_to_hand(cp->cpu_id);
18459036SEric.Saxe@Sun.COM } else if (lgrp_plat_cpu_to_hand(cp->cpu_id) != lgrp) {
18469036SEric.Saxe@Sun.COM cmt_lineage_status = CMT_LINEAGE_PG_SPANS_LGRPS;
18479036SEric.Saxe@Sun.COM goto handle_error;
18489036SEric.Saxe@Sun.COM }
18498906SEric.Saxe@Sun.COM }
18508408SEric.Saxe@Sun.COM }
18518408SEric.Saxe@Sun.COM
18528906SEric.Saxe@Sun.COM handle_error:
18539036SEric.Saxe@Sun.COM /*
18549036SEric.Saxe@Sun.COM * Some of these validation errors can result when the CPU grouping
18559036SEric.Saxe@Sun.COM * information is derived from buggy sources (for example, incorrect
18569036SEric.Saxe@Sun.COM * ACPI tables on x86 systems).
18579036SEric.Saxe@Sun.COM *
18589036SEric.Saxe@Sun.COM * We'll try to recover in such cases by pruning out the illegal
18599036SEric.Saxe@Sun.COM * groupings from the PG hierarchy, which means that we won't optimize
18609036SEric.Saxe@Sun.COM * for those levels, but we will for the remaining ones.
18619036SEric.Saxe@Sun.COM */
18628906SEric.Saxe@Sun.COM switch (cmt_lineage_status) {
18638906SEric.Saxe@Sun.COM case CMT_LINEAGE_VALID:
18648906SEric.Saxe@Sun.COM case CMT_LINEAGE_REPAIRED:
18658906SEric.Saxe@Sun.COM break;
18669036SEric.Saxe@Sun.COM case CMT_LINEAGE_PG_SPANS_LGRPS:
18679036SEric.Saxe@Sun.COM /*
18689036SEric.Saxe@Sun.COM * We've detected a PG whose CPUs span lgroups.
18699036SEric.Saxe@Sun.COM *
18709036SEric.Saxe@Sun.COM * This isn't supported, as the dispatcher isn't allowed to
18719036SEric.Saxe@Sun.COM * to do CMT thread placement across lgroups, as this would
18729036SEric.Saxe@Sun.COM * conflict with policies implementing MPO thread affinity.
18739036SEric.Saxe@Sun.COM *
18749746SEric.Saxe@Sun.COM * If the PG is of a sharing relationship type known to
18759746SEric.Saxe@Sun.COM * legitimately span lgroups, specify that no CMT thread
18769746SEric.Saxe@Sun.COM * placement policy should be implemented, and prune the PG
18779746SEric.Saxe@Sun.COM * from the existing CMT PG hierarchy.
18789746SEric.Saxe@Sun.COM *
18799746SEric.Saxe@Sun.COM * Otherwise, fall though to the case below for handling.
18809036SEric.Saxe@Sun.COM */
18819746SEric.Saxe@Sun.COM if (((pghw_t *)pg)->pghw_hw == PGHW_CHIP) {
18829746SEric.Saxe@Sun.COM if (pg_cmt_prune(pg, lineage, sz, pgdata) == 0) {
18839746SEric.Saxe@Sun.COM cmt_lineage_status = CMT_LINEAGE_REPAIRED;
18849746SEric.Saxe@Sun.COM goto revalidate;
18859746SEric.Saxe@Sun.COM }
18869746SEric.Saxe@Sun.COM }
18879746SEric.Saxe@Sun.COM /*LINTED*/
18889036SEric.Saxe@Sun.COM case CMT_LINEAGE_NON_PROMOTABLE:
18899036SEric.Saxe@Sun.COM /*
18909036SEric.Saxe@Sun.COM * We've detected a PG that already exists in another CPU's
18919036SEric.Saxe@Sun.COM * lineage that cannot cannot legally be promoted into place
18929036SEric.Saxe@Sun.COM * without breaking the invariants of the hierarchy.
18939036SEric.Saxe@Sun.COM */
18949036SEric.Saxe@Sun.COM if (PG_CMT_HW_SUSPECT(((pghw_t *)pg)->pghw_hw)) {
18959438SEric.Saxe@Sun.COM if (pg_cmt_prune(pg, lineage, sz, pgdata) == 0) {
18969036SEric.Saxe@Sun.COM cmt_lineage_status = CMT_LINEAGE_REPAIRED;
18979036SEric.Saxe@Sun.COM goto revalidate;
18989036SEric.Saxe@Sun.COM }
18999036SEric.Saxe@Sun.COM }
19009036SEric.Saxe@Sun.COM /*
19019036SEric.Saxe@Sun.COM * Something went wrong trying to prune out the bad level.
19029036SEric.Saxe@Sun.COM * Disable CMT scheduling altogether.
19039036SEric.Saxe@Sun.COM */
19049036SEric.Saxe@Sun.COM pg_cmt_disable();
19059036SEric.Saxe@Sun.COM break;
19068906SEric.Saxe@Sun.COM case CMT_LINEAGE_NON_CONCENTRIC:
19078408SEric.Saxe@Sun.COM /*
19089036SEric.Saxe@Sun.COM * We've detected a non-concentric PG lineage, which means that
19099036SEric.Saxe@Sun.COM * there's a PG in the lineage that has CPUs that the next PG
19109036SEric.Saxe@Sun.COM * over in the lineage (which is the same size or larger)
19119036SEric.Saxe@Sun.COM * doesn't have.
19128906SEric.Saxe@Sun.COM *
19139036SEric.Saxe@Sun.COM * In this case, we examine the two PGs to see if either
19149036SEric.Saxe@Sun.COM * grouping is defined by potentially buggy sources.
19158906SEric.Saxe@Sun.COM *
19168906SEric.Saxe@Sun.COM * If one has less CPUs than the other, and contains CPUs
19178906SEric.Saxe@Sun.COM * not found in the parent, and it is an untrusted enumeration,
19188906SEric.Saxe@Sun.COM * then prune it. If both have the same number of CPUs, then
19198906SEric.Saxe@Sun.COM * prune the one that is untrusted.
19208906SEric.Saxe@Sun.COM *
19218906SEric.Saxe@Sun.COM * This process repeats until we have a concentric lineage,
19228906SEric.Saxe@Sun.COM * or we would have to prune out level derived from what we
19238906SEric.Saxe@Sun.COM * thought was a reliable source, in which case CMT scheduling
19249036SEric.Saxe@Sun.COM * is disabled altogether.
19258408SEric.Saxe@Sun.COM */
19269036SEric.Saxe@Sun.COM if ((PG_NUM_CPUS((pg_t *)pg) < PG_NUM_CPUS((pg_t *)pg_next)) &&
19278906SEric.Saxe@Sun.COM (PG_CMT_HW_SUSPECT(((pghw_t *)pg)->pghw_hw))) {
19288906SEric.Saxe@Sun.COM pg_bad = pg;
19298906SEric.Saxe@Sun.COM } else if (PG_NUM_CPUS((pg_t *)pg) ==
19309036SEric.Saxe@Sun.COM PG_NUM_CPUS((pg_t *)pg_next)) {
19319036SEric.Saxe@Sun.COM if (PG_CMT_HW_SUSPECT(((pghw_t *)pg_next)->pghw_hw)) {
19329036SEric.Saxe@Sun.COM pg_bad = pg_next;
19338906SEric.Saxe@Sun.COM } else if (PG_CMT_HW_SUSPECT(((pghw_t *)pg)->pghw_hw)) {
19348906SEric.Saxe@Sun.COM pg_bad = pg;
19358906SEric.Saxe@Sun.COM }
19368906SEric.Saxe@Sun.COM }
19378906SEric.Saxe@Sun.COM if (pg_bad) {
19389438SEric.Saxe@Sun.COM if (pg_cmt_prune(pg_bad, lineage, sz, pgdata) == 0) {
19398906SEric.Saxe@Sun.COM cmt_lineage_status = CMT_LINEAGE_REPAIRED;
19408906SEric.Saxe@Sun.COM goto revalidate;
19418408SEric.Saxe@Sun.COM }
19428906SEric.Saxe@Sun.COM }
19439036SEric.Saxe@Sun.COM /*
19449036SEric.Saxe@Sun.COM * Something went wrong trying to identify and/or prune out
19459036SEric.Saxe@Sun.COM * the bad level. Disable CMT scheduling altogether.
19469036SEric.Saxe@Sun.COM */
19479036SEric.Saxe@Sun.COM pg_cmt_disable();
19489036SEric.Saxe@Sun.COM break;
19498906SEric.Saxe@Sun.COM default:
19508906SEric.Saxe@Sun.COM /*
19519036SEric.Saxe@Sun.COM * If we're here, we've encountered a validation error for
19529036SEric.Saxe@Sun.COM * which we don't know how to recover. In this case, disable
19539036SEric.Saxe@Sun.COM * CMT scheduling altogether.
19548906SEric.Saxe@Sun.COM */
19559036SEric.Saxe@Sun.COM cmt_lineage_status = CMT_LINEAGE_UNRECOVERABLE;
19568906SEric.Saxe@Sun.COM pg_cmt_disable();
19578408SEric.Saxe@Sun.COM }
19589036SEric.Saxe@Sun.COM return (cmt_lineage_status);
19598408SEric.Saxe@Sun.COM }
1960