13434Sesaxe /* 23434Sesaxe * CDDL HEADER START 33434Sesaxe * 43434Sesaxe * The contents of this file are subject to the terms of the 53434Sesaxe * Common Development and Distribution License (the "License"). 63434Sesaxe * You may not use this file except in compliance with the License. 73434Sesaxe * 83434Sesaxe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 93434Sesaxe * or http://www.opensolaris.org/os/licensing. 103434Sesaxe * See the License for the specific language governing permissions 113434Sesaxe * and limitations under the License. 123434Sesaxe * 133434Sesaxe * When distributing Covered Code, include this CDDL HEADER in each 143434Sesaxe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 153434Sesaxe * If applicable, add the following below this CDDL HEADER, with the 163434Sesaxe * fields enclosed by brackets "[]" replaced with your own identifying 173434Sesaxe * information: Portions Copyright [yyyy] [name of copyright owner] 183434Sesaxe * 193434Sesaxe * CDDL HEADER END 203434Sesaxe */ 213434Sesaxe /* 228689SEric.Saxe@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 233434Sesaxe * Use is subject to license terms. 243434Sesaxe */ 253434Sesaxe 263434Sesaxe #include <sys/systm.h> 273434Sesaxe #include <sys/types.h> 283434Sesaxe #include <sys/param.h> 293434Sesaxe #include <sys/thread.h> 303434Sesaxe #include <sys/cpuvar.h> 313434Sesaxe #include <sys/cpupart.h> 323434Sesaxe #include <sys/kmem.h> 333434Sesaxe #include <sys/cmn_err.h> 343434Sesaxe #include <sys/kstat.h> 353434Sesaxe #include <sys/processor.h> 363434Sesaxe #include <sys/disp.h> 373434Sesaxe #include <sys/group.h> 383434Sesaxe #include <sys/pghw.h> 393434Sesaxe #include <sys/bitset.h> 403434Sesaxe #include <sys/lgrp.h> 413434Sesaxe #include <sys/cmt.h> 428906SEric.Saxe@Sun.COM #include <sys/cpu_pm.h> 433434Sesaxe 443434Sesaxe /* 453434Sesaxe * CMT scheduler / dispatcher support 463434Sesaxe * 473434Sesaxe * This file implements CMT scheduler support using Processor Groups. 483434Sesaxe * The CMT processor group class creates and maintains the CMT class 493434Sesaxe * specific processor group pg_cmt_t. 503434Sesaxe * 513434Sesaxe * ---------------------------- <-- pg_cmt_t * 523434Sesaxe * | pghw_t | 533434Sesaxe * ---------------------------- 543434Sesaxe * | CMT class specific data | 553434Sesaxe * | - hierarchy linkage | 563434Sesaxe * | - CMT load balancing data| 573434Sesaxe * | - active CPU group/bitset| 583434Sesaxe * ---------------------------- 593434Sesaxe * 603434Sesaxe * The scheduler/dispatcher leverages knowledge of the performance 613434Sesaxe * relevant CMT sharing relationships existing between cpus to implement 628906SEric.Saxe@Sun.COM * optimized affinity, load balancing, and coalescence policies. 633434Sesaxe * 643434Sesaxe * Load balancing policy seeks to improve performance by minimizing 658906SEric.Saxe@Sun.COM * contention over shared processor resources / facilities, Affinity 668906SEric.Saxe@Sun.COM * policies seek to improve cache and TLB utilization. Coalescence 678906SEric.Saxe@Sun.COM * policies improve resource utilization and ultimately power efficiency. 683434Sesaxe * 693434Sesaxe * The CMT PGs created by this class are already arranged into a 703434Sesaxe * hierarchy (which is done in the pghw layer). To implement the top-down 713434Sesaxe * CMT load balancing algorithm, the CMT PGs additionally maintain 723434Sesaxe * parent, child and sibling hierarchy relationships. 733434Sesaxe * Parent PGs always contain a superset of their children(s) resources, 743434Sesaxe * each PG can have at most one parent, and siblings are the group of PGs 753434Sesaxe * sharing the same parent. 763434Sesaxe * 779746SEric.Saxe@Sun.COM * On UMA based systems, the CMT load balancing algorithm begins by balancing 789746SEric.Saxe@Sun.COM * load across the group of top level PGs in the system hierarchy. 799746SEric.Saxe@Sun.COM * On NUMA systems, the CMT load balancing algorithm balances load across the 809746SEric.Saxe@Sun.COM * group of top level PGs in each leaf lgroup...but for root homed threads, 819746SEric.Saxe@Sun.COM * is willing to balance against all the top level PGs in the system. 829746SEric.Saxe@Sun.COM * 839746SEric.Saxe@Sun.COM * Groups of top level PGs are maintained to implement the above, one for each 849746SEric.Saxe@Sun.COM * leaf lgroup (containing the top level PGs in that lgroup), and one (for the 859746SEric.Saxe@Sun.COM * root lgroup) that contains all the top level PGs in the system. 863434Sesaxe */ 873676Sesaxe static cmt_lgrp_t *cmt_lgrps = NULL; /* cmt_lgrps list head */ 883676Sesaxe static cmt_lgrp_t *cpu0_lgrp = NULL; /* boot CPU's initial lgrp */ 893676Sesaxe /* used for null_proc_lpa */ 908906SEric.Saxe@Sun.COM cmt_lgrp_t *cmt_root = NULL; /* Reference to root cmt pg */ 913434Sesaxe 923676Sesaxe static int is_cpu0 = 1; /* true if this is boot CPU context */ 933676Sesaxe 943676Sesaxe /* 958906SEric.Saxe@Sun.COM * Array of hardware sharing relationships that are blacklisted. 969746SEric.Saxe@Sun.COM * CMT scheduling optimizations won't be performed for blacklisted sharing 979746SEric.Saxe@Sun.COM * relationships. 988906SEric.Saxe@Sun.COM */ 998906SEric.Saxe@Sun.COM static int cmt_hw_blacklisted[PGHW_NUM_COMPONENTS]; 1008906SEric.Saxe@Sun.COM 1018906SEric.Saxe@Sun.COM /* 1023676Sesaxe * Set this to non-zero to disable CMT scheduling 1033676Sesaxe * This must be done via kmdb -d, as /etc/system will be too late 1043676Sesaxe */ 1058906SEric.Saxe@Sun.COM int cmt_sched_disabled = 0; 1063434Sesaxe 1079036SEric.Saxe@Sun.COM /* 1089036SEric.Saxe@Sun.COM * Status codes for CMT lineage validation 1099036SEric.Saxe@Sun.COM * See pg_cmt_lineage_validate() below 1109036SEric.Saxe@Sun.COM */ 1119036SEric.Saxe@Sun.COM typedef enum cmt_lineage_validation { 1129036SEric.Saxe@Sun.COM CMT_LINEAGE_VALID, 1139036SEric.Saxe@Sun.COM CMT_LINEAGE_NON_CONCENTRIC, 1149036SEric.Saxe@Sun.COM CMT_LINEAGE_PG_SPANS_LGRPS, 1159036SEric.Saxe@Sun.COM CMT_LINEAGE_NON_PROMOTABLE, 1169036SEric.Saxe@Sun.COM CMT_LINEAGE_REPAIRED, 1179036SEric.Saxe@Sun.COM CMT_LINEAGE_UNRECOVERABLE 1189036SEric.Saxe@Sun.COM } cmt_lineage_validation_t; 1199036SEric.Saxe@Sun.COM 1209036SEric.Saxe@Sun.COM /* 1219036SEric.Saxe@Sun.COM * Status of the current lineage under construction. 1229036SEric.Saxe@Sun.COM * One must be holding cpu_lock to change this. 1239036SEric.Saxe@Sun.COM */ 1249036SEric.Saxe@Sun.COM cmt_lineage_validation_t cmt_lineage_status = CMT_LINEAGE_VALID; 1259036SEric.Saxe@Sun.COM 1269036SEric.Saxe@Sun.COM /* 1279036SEric.Saxe@Sun.COM * Power domain definitions (on x86) are defined by ACPI, and 1289036SEric.Saxe@Sun.COM * therefore may be subject to BIOS bugs. 1299036SEric.Saxe@Sun.COM */ 1309036SEric.Saxe@Sun.COM #define PG_CMT_HW_SUSPECT(hw) PGHW_IS_PM_DOMAIN(hw) 1319036SEric.Saxe@Sun.COM 1329036SEric.Saxe@Sun.COM /* 1339036SEric.Saxe@Sun.COM * Macro to test if PG is managed by the CMT PG class 1349036SEric.Saxe@Sun.COM */ 1359036SEric.Saxe@Sun.COM #define IS_CMT_PG(pg) (((pg_t *)(pg))->pg_class->pgc_id == pg_cmt_class_id) 1369036SEric.Saxe@Sun.COM 1373434Sesaxe static pg_cid_t pg_cmt_class_id; /* PG class id */ 1383434Sesaxe 1393434Sesaxe static pg_t *pg_cmt_alloc(); 1403434Sesaxe static void pg_cmt_free(pg_t *); 1419352SEric.Saxe@Sun.COM static void pg_cmt_cpu_init(cpu_t *, cpu_pg_t *); 1429352SEric.Saxe@Sun.COM static void pg_cmt_cpu_fini(cpu_t *, cpu_pg_t *); 1433434Sesaxe static void pg_cmt_cpu_active(cpu_t *); 1443434Sesaxe static void pg_cmt_cpu_inactive(cpu_t *); 1453434Sesaxe static void pg_cmt_cpupart_in(cpu_t *, cpupart_t *); 1463434Sesaxe static void pg_cmt_cpupart_move(cpu_t *, cpupart_t *, cpupart_t *); 1478906SEric.Saxe@Sun.COM static char *pg_cmt_policy_name(pg_t *); 1488906SEric.Saxe@Sun.COM static void pg_cmt_hier_sort(pg_cmt_t **, int); 1498906SEric.Saxe@Sun.COM static pg_cmt_t *pg_cmt_hier_rank(pg_cmt_t *, pg_cmt_t *); 1503434Sesaxe static int pg_cmt_cpu_belongs(pg_t *, cpu_t *); 1513434Sesaxe static int pg_cmt_hw(pghw_type_t); 1523434Sesaxe static cmt_lgrp_t *pg_cmt_find_lgrp(lgrp_handle_t); 1533676Sesaxe static cmt_lgrp_t *pg_cmt_lgrp_create(lgrp_handle_t); 1548906SEric.Saxe@Sun.COM static void cmt_ev_thread_swtch(pg_t *, cpu_t *, hrtime_t, 1558906SEric.Saxe@Sun.COM kthread_t *, kthread_t *); 1568906SEric.Saxe@Sun.COM static void cmt_ev_thread_swtch_pwr(pg_t *, cpu_t *, hrtime_t, 1578906SEric.Saxe@Sun.COM kthread_t *, kthread_t *); 1588906SEric.Saxe@Sun.COM static void cmt_ev_thread_remain_pwr(pg_t *, cpu_t *, kthread_t *); 1599438SEric.Saxe@Sun.COM static cmt_lineage_validation_t pg_cmt_lineage_validate(pg_cmt_t **, int *, 1609438SEric.Saxe@Sun.COM cpu_pg_t *); 1613434Sesaxe 1628906SEric.Saxe@Sun.COM /* 1633434Sesaxe * CMT PG ops 1643434Sesaxe */ 1653434Sesaxe struct pg_ops pg_ops_cmt = { 1663434Sesaxe pg_cmt_alloc, 1673434Sesaxe pg_cmt_free, 1683434Sesaxe pg_cmt_cpu_init, 1693434Sesaxe pg_cmt_cpu_fini, 1703434Sesaxe pg_cmt_cpu_active, 1713434Sesaxe pg_cmt_cpu_inactive, 1723434Sesaxe pg_cmt_cpupart_in, 1733434Sesaxe NULL, /* cpupart_out */ 1743434Sesaxe pg_cmt_cpupart_move, 1753434Sesaxe pg_cmt_cpu_belongs, 1768906SEric.Saxe@Sun.COM pg_cmt_policy_name, 1773434Sesaxe }; 1783434Sesaxe 1793434Sesaxe /* 1803434Sesaxe * Initialize the CMT PG class 1813434Sesaxe */ 1823434Sesaxe void 1833434Sesaxe pg_cmt_class_init(void) 1843434Sesaxe { 1853434Sesaxe if (cmt_sched_disabled) 1863434Sesaxe return; 1873434Sesaxe 1883434Sesaxe pg_cmt_class_id = pg_class_register("cmt", &pg_ops_cmt, PGR_PHYSICAL); 1893434Sesaxe } 1903434Sesaxe 1913434Sesaxe /* 1923434Sesaxe * Called to indicate a new CPU has started up so 1933434Sesaxe * that either t0 or the slave startup thread can 1943434Sesaxe * be accounted for. 1953434Sesaxe */ 1963434Sesaxe void 1973434Sesaxe pg_cmt_cpu_startup(cpu_t *cp) 1983434Sesaxe { 1998906SEric.Saxe@Sun.COM pg_ev_thread_swtch(cp, gethrtime_unscaled(), cp->cpu_idle_thread, 2008906SEric.Saxe@Sun.COM cp->cpu_thread); 2013434Sesaxe } 2023434Sesaxe 2033434Sesaxe /* 2043434Sesaxe * Return non-zero if thread can migrate between "from" and "to" 2053434Sesaxe * without a performance penalty 2063434Sesaxe */ 2073434Sesaxe int 2083434Sesaxe pg_cmt_can_migrate(cpu_t *from, cpu_t *to) 2093434Sesaxe { 2103434Sesaxe if (from->cpu_physid->cpu_cacheid == 2113434Sesaxe to->cpu_physid->cpu_cacheid) 2123434Sesaxe return (1); 2133434Sesaxe return (0); 2143434Sesaxe } 2153434Sesaxe 2163434Sesaxe /* 2173434Sesaxe * CMT class specific PG allocation 2183434Sesaxe */ 2193434Sesaxe static pg_t * 2203434Sesaxe pg_cmt_alloc(void) 2213434Sesaxe { 2223434Sesaxe return (kmem_zalloc(sizeof (pg_cmt_t), KM_NOSLEEP)); 2233434Sesaxe } 2243434Sesaxe 2253434Sesaxe /* 2263434Sesaxe * Class specific PG de-allocation 2273434Sesaxe */ 2283434Sesaxe static void 2293434Sesaxe pg_cmt_free(pg_t *pg) 2303434Sesaxe { 2313434Sesaxe ASSERT(pg != NULL); 2323434Sesaxe ASSERT(IS_CMT_PG(pg)); 2333434Sesaxe 2343434Sesaxe kmem_free((pg_cmt_t *)pg, sizeof (pg_cmt_t)); 2353434Sesaxe } 2363434Sesaxe 2373434Sesaxe /* 2388906SEric.Saxe@Sun.COM * Given a hardware sharing relationship, return which dispatcher 2398906SEric.Saxe@Sun.COM * policies should be implemented to optimize performance and efficiency 2408906SEric.Saxe@Sun.COM */ 2418906SEric.Saxe@Sun.COM static pg_cmt_policy_t 2428906SEric.Saxe@Sun.COM pg_cmt_policy(pghw_type_t hw) 2438906SEric.Saxe@Sun.COM { 2448906SEric.Saxe@Sun.COM pg_cmt_policy_t p; 2458906SEric.Saxe@Sun.COM 2468906SEric.Saxe@Sun.COM /* 2478906SEric.Saxe@Sun.COM * Give the platform a chance to override the default 2488906SEric.Saxe@Sun.COM */ 2498906SEric.Saxe@Sun.COM if ((p = pg_plat_cmt_policy(hw)) != CMT_NO_POLICY) 2508906SEric.Saxe@Sun.COM return (p); 2518906SEric.Saxe@Sun.COM 2528906SEric.Saxe@Sun.COM switch (hw) { 2538906SEric.Saxe@Sun.COM case PGHW_IPIPE: 2548906SEric.Saxe@Sun.COM case PGHW_FPU: 25510947SSrihari.Venkatesan@Sun.COM case PGHW_PROCNODE: 2568906SEric.Saxe@Sun.COM case PGHW_CHIP: 2578906SEric.Saxe@Sun.COM return (CMT_BALANCE); 2588906SEric.Saxe@Sun.COM case PGHW_CACHE: 2598906SEric.Saxe@Sun.COM return (CMT_AFFINITY); 2608906SEric.Saxe@Sun.COM case PGHW_POW_ACTIVE: 2618906SEric.Saxe@Sun.COM case PGHW_POW_IDLE: 2628906SEric.Saxe@Sun.COM return (CMT_BALANCE); 2638906SEric.Saxe@Sun.COM default: 2648906SEric.Saxe@Sun.COM return (CMT_NO_POLICY); 2658906SEric.Saxe@Sun.COM } 2668906SEric.Saxe@Sun.COM } 2678906SEric.Saxe@Sun.COM 2688906SEric.Saxe@Sun.COM /* 2698906SEric.Saxe@Sun.COM * Rank the importance of optimizing for the pg1 relationship vs. 2708906SEric.Saxe@Sun.COM * the pg2 relationship. 2718906SEric.Saxe@Sun.COM */ 2728906SEric.Saxe@Sun.COM static pg_cmt_t * 2738906SEric.Saxe@Sun.COM pg_cmt_hier_rank(pg_cmt_t *pg1, pg_cmt_t *pg2) 2748906SEric.Saxe@Sun.COM { 2758906SEric.Saxe@Sun.COM pghw_type_t hw1 = ((pghw_t *)pg1)->pghw_hw; 2768906SEric.Saxe@Sun.COM pghw_type_t hw2 = ((pghw_t *)pg2)->pghw_hw; 2778906SEric.Saxe@Sun.COM 2788906SEric.Saxe@Sun.COM /* 2798906SEric.Saxe@Sun.COM * A power domain is only important if CPUPM is enabled. 2808906SEric.Saxe@Sun.COM */ 2818906SEric.Saxe@Sun.COM if (cpupm_get_policy() == CPUPM_POLICY_DISABLED) { 2828906SEric.Saxe@Sun.COM if (PGHW_IS_PM_DOMAIN(hw1) && !PGHW_IS_PM_DOMAIN(hw2)) 2838906SEric.Saxe@Sun.COM return (pg2); 2848906SEric.Saxe@Sun.COM if (PGHW_IS_PM_DOMAIN(hw2) && !PGHW_IS_PM_DOMAIN(hw1)) 2858906SEric.Saxe@Sun.COM return (pg1); 2868906SEric.Saxe@Sun.COM } 2878906SEric.Saxe@Sun.COM 2888906SEric.Saxe@Sun.COM /* 2898906SEric.Saxe@Sun.COM * Otherwise, ask the platform 2908906SEric.Saxe@Sun.COM */ 2918906SEric.Saxe@Sun.COM if (pg_plat_hw_rank(hw1, hw2) == hw1) 2928906SEric.Saxe@Sun.COM return (pg1); 2938906SEric.Saxe@Sun.COM else 2948906SEric.Saxe@Sun.COM return (pg2); 2958906SEric.Saxe@Sun.COM } 2968906SEric.Saxe@Sun.COM 2978906SEric.Saxe@Sun.COM /* 2988906SEric.Saxe@Sun.COM * Initialize CMT callbacks for the given PG 2998906SEric.Saxe@Sun.COM */ 3008906SEric.Saxe@Sun.COM static void 3018906SEric.Saxe@Sun.COM cmt_callback_init(pg_t *pg) 3028906SEric.Saxe@Sun.COM { 3039746SEric.Saxe@Sun.COM /* 3049746SEric.Saxe@Sun.COM * Stick with the default callbacks if there isn't going to be 3059746SEric.Saxe@Sun.COM * any CMT thread placement optimizations implemented. 3069746SEric.Saxe@Sun.COM */ 3079746SEric.Saxe@Sun.COM if (((pg_cmt_t *)pg)->cmt_policy == CMT_NO_POLICY) 3089746SEric.Saxe@Sun.COM return; 3099746SEric.Saxe@Sun.COM 3108906SEric.Saxe@Sun.COM switch (((pghw_t *)pg)->pghw_hw) { 3118906SEric.Saxe@Sun.COM case PGHW_POW_ACTIVE: 3128906SEric.Saxe@Sun.COM pg->pg_cb.thread_swtch = cmt_ev_thread_swtch_pwr; 3138906SEric.Saxe@Sun.COM pg->pg_cb.thread_remain = cmt_ev_thread_remain_pwr; 3148906SEric.Saxe@Sun.COM break; 3158906SEric.Saxe@Sun.COM default: 3168906SEric.Saxe@Sun.COM pg->pg_cb.thread_swtch = cmt_ev_thread_swtch; 3178906SEric.Saxe@Sun.COM 3188906SEric.Saxe@Sun.COM } 3198906SEric.Saxe@Sun.COM } 3208906SEric.Saxe@Sun.COM 3218906SEric.Saxe@Sun.COM /* 3228906SEric.Saxe@Sun.COM * Promote PG above it's current parent. 3239438SEric.Saxe@Sun.COM * This is only legal if PG has an equal or greater number of CPUs than its 3249438SEric.Saxe@Sun.COM * parent. 3259438SEric.Saxe@Sun.COM * 3269438SEric.Saxe@Sun.COM * This routine operates on the CPU specific processor group data (for the CPUs 3279438SEric.Saxe@Sun.COM * in the PG being promoted), and may be invoked from a context where one CPU's 3289438SEric.Saxe@Sun.COM * PG data is under construction. In this case the argument "pgdata", if not 3299438SEric.Saxe@Sun.COM * NULL, is a reference to the CPU's under-construction PG data. 3303434Sesaxe */ 3318906SEric.Saxe@Sun.COM static void 3329438SEric.Saxe@Sun.COM cmt_hier_promote(pg_cmt_t *pg, cpu_pg_t *pgdata) 3333434Sesaxe { 3348906SEric.Saxe@Sun.COM pg_cmt_t *parent; 3358906SEric.Saxe@Sun.COM group_t *children; 3368906SEric.Saxe@Sun.COM cpu_t *cpu; 3378906SEric.Saxe@Sun.COM group_iter_t iter; 3388906SEric.Saxe@Sun.COM pg_cpu_itr_t cpu_iter; 3398906SEric.Saxe@Sun.COM int r; 3408906SEric.Saxe@Sun.COM int err; 34111263SEric.Saxe@Sun.COM int nchildren; 3428906SEric.Saxe@Sun.COM 3438906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock)); 3448906SEric.Saxe@Sun.COM 3458906SEric.Saxe@Sun.COM parent = pg->cmt_parent; 3468906SEric.Saxe@Sun.COM if (parent == NULL) { 3478906SEric.Saxe@Sun.COM /* 3488906SEric.Saxe@Sun.COM * Nothing to do 3498906SEric.Saxe@Sun.COM */ 3508906SEric.Saxe@Sun.COM return; 3518906SEric.Saxe@Sun.COM } 3528906SEric.Saxe@Sun.COM 3538906SEric.Saxe@Sun.COM ASSERT(PG_NUM_CPUS((pg_t *)pg) >= PG_NUM_CPUS((pg_t *)parent)); 3548906SEric.Saxe@Sun.COM 3558906SEric.Saxe@Sun.COM /* 3568906SEric.Saxe@Sun.COM * We're changing around the hierarchy, which is actively traversed 3578906SEric.Saxe@Sun.COM * by the dispatcher. Pause CPUS to ensure exclusivity. 3588906SEric.Saxe@Sun.COM */ 3598906SEric.Saxe@Sun.COM pause_cpus(NULL); 3608906SEric.Saxe@Sun.COM 3618906SEric.Saxe@Sun.COM /* 3628906SEric.Saxe@Sun.COM * If necessary, update the parent's sibling set, replacing parent 3638906SEric.Saxe@Sun.COM * with PG. 3648906SEric.Saxe@Sun.COM */ 3658906SEric.Saxe@Sun.COM if (parent->cmt_siblings) { 3668906SEric.Saxe@Sun.COM if (group_remove(parent->cmt_siblings, parent, GRP_NORESIZE) 3678906SEric.Saxe@Sun.COM != -1) { 3688906SEric.Saxe@Sun.COM r = group_add(parent->cmt_siblings, pg, GRP_NORESIZE); 3698906SEric.Saxe@Sun.COM ASSERT(r != -1); 3708906SEric.Saxe@Sun.COM } 3718906SEric.Saxe@Sun.COM } 3728906SEric.Saxe@Sun.COM 3738906SEric.Saxe@Sun.COM /* 3748906SEric.Saxe@Sun.COM * If the parent is at the top of the hierarchy, replace it's entry 3758906SEric.Saxe@Sun.COM * in the root lgroup's group of top level PGs. 3768906SEric.Saxe@Sun.COM */ 3778906SEric.Saxe@Sun.COM if (parent->cmt_parent == NULL && 3788906SEric.Saxe@Sun.COM parent->cmt_siblings != &cmt_root->cl_pgs) { 3798906SEric.Saxe@Sun.COM if (group_remove(&cmt_root->cl_pgs, parent, GRP_NORESIZE) 3808906SEric.Saxe@Sun.COM != -1) { 3818906SEric.Saxe@Sun.COM r = group_add(&cmt_root->cl_pgs, pg, GRP_NORESIZE); 3828906SEric.Saxe@Sun.COM ASSERT(r != -1); 3838906SEric.Saxe@Sun.COM } 3848906SEric.Saxe@Sun.COM } 3858906SEric.Saxe@Sun.COM 3868906SEric.Saxe@Sun.COM /* 3878906SEric.Saxe@Sun.COM * We assume (and therefore assert) that the PG being promoted is an 3888906SEric.Saxe@Sun.COM * only child of it's parent. Update the parent's children set 3898906SEric.Saxe@Sun.COM * replacing PG's entry with the parent (since the parent is becoming 39011263SEric.Saxe@Sun.COM * the child). Then have PG and the parent swap children sets and 39111263SEric.Saxe@Sun.COM * children counts. 3928906SEric.Saxe@Sun.COM */ 3938906SEric.Saxe@Sun.COM ASSERT(GROUP_SIZE(parent->cmt_children) <= 1); 3948906SEric.Saxe@Sun.COM if (group_remove(parent->cmt_children, pg, GRP_NORESIZE) != -1) { 3958906SEric.Saxe@Sun.COM r = group_add(parent->cmt_children, parent, GRP_NORESIZE); 3968906SEric.Saxe@Sun.COM ASSERT(r != -1); 3978906SEric.Saxe@Sun.COM } 3988906SEric.Saxe@Sun.COM 3998906SEric.Saxe@Sun.COM children = pg->cmt_children; 4008906SEric.Saxe@Sun.COM pg->cmt_children = parent->cmt_children; 4018906SEric.Saxe@Sun.COM parent->cmt_children = children; 4028906SEric.Saxe@Sun.COM 40311263SEric.Saxe@Sun.COM nchildren = pg->cmt_nchildren; 40411263SEric.Saxe@Sun.COM pg->cmt_nchildren = parent->cmt_nchildren; 40511263SEric.Saxe@Sun.COM parent->cmt_nchildren = nchildren; 40611263SEric.Saxe@Sun.COM 4078906SEric.Saxe@Sun.COM /* 4088906SEric.Saxe@Sun.COM * Update the sibling references for PG and it's parent 4098906SEric.Saxe@Sun.COM */ 4108906SEric.Saxe@Sun.COM pg->cmt_siblings = parent->cmt_siblings; 4118906SEric.Saxe@Sun.COM parent->cmt_siblings = pg->cmt_children; 4128906SEric.Saxe@Sun.COM 4138906SEric.Saxe@Sun.COM /* 4148906SEric.Saxe@Sun.COM * Update any cached lineages in the per CPU pg data. 4158906SEric.Saxe@Sun.COM */ 4168906SEric.Saxe@Sun.COM PG_CPU_ITR_INIT(pg, cpu_iter); 4178906SEric.Saxe@Sun.COM while ((cpu = pg_cpu_next(&cpu_iter)) != NULL) { 4188906SEric.Saxe@Sun.COM int idx; 41911263SEric.Saxe@Sun.COM int sz; 4208906SEric.Saxe@Sun.COM pg_cmt_t *cpu_pg; 4219438SEric.Saxe@Sun.COM cpu_pg_t *pgd; /* CPU's PG data */ 4229438SEric.Saxe@Sun.COM 4239438SEric.Saxe@Sun.COM /* 4249438SEric.Saxe@Sun.COM * The CPU's whose lineage is under construction still 4259438SEric.Saxe@Sun.COM * references the bootstrap CPU PG data structure. 4269438SEric.Saxe@Sun.COM */ 4279438SEric.Saxe@Sun.COM if (pg_cpu_is_bootstrapped(cpu)) 4289438SEric.Saxe@Sun.COM pgd = pgdata; 4299438SEric.Saxe@Sun.COM else 4309438SEric.Saxe@Sun.COM pgd = cpu->cpu_pg; 4318906SEric.Saxe@Sun.COM 4328906SEric.Saxe@Sun.COM /* 4338906SEric.Saxe@Sun.COM * Iterate over the CPU's PGs updating the children 4348906SEric.Saxe@Sun.COM * of the PG being promoted, since they have a new parent. 4358906SEric.Saxe@Sun.COM */ 4368906SEric.Saxe@Sun.COM group_iter_init(&iter); 4379438SEric.Saxe@Sun.COM while ((cpu_pg = group_iterate(&pgd->cmt_pgs, &iter)) != NULL) { 4388906SEric.Saxe@Sun.COM if (cpu_pg->cmt_parent == pg) { 4398906SEric.Saxe@Sun.COM cpu_pg->cmt_parent = parent; 4408906SEric.Saxe@Sun.COM } 4418906SEric.Saxe@Sun.COM } 4428906SEric.Saxe@Sun.COM 4438906SEric.Saxe@Sun.COM /* 4448906SEric.Saxe@Sun.COM * Update the CMT load balancing lineage 4458906SEric.Saxe@Sun.COM */ 4469438SEric.Saxe@Sun.COM if ((idx = group_find(&pgd->cmt_pgs, (void *)pg)) == -1) { 4478906SEric.Saxe@Sun.COM /* 4488906SEric.Saxe@Sun.COM * Unless this is the CPU who's lineage is being 4498906SEric.Saxe@Sun.COM * constructed, the PG being promoted should be 4508906SEric.Saxe@Sun.COM * in the lineage. 4518906SEric.Saxe@Sun.COM */ 4529438SEric.Saxe@Sun.COM ASSERT(pg_cpu_is_bootstrapped(cpu)); 4538906SEric.Saxe@Sun.COM continue; 4548906SEric.Saxe@Sun.COM } 4558906SEric.Saxe@Sun.COM 45611263SEric.Saxe@Sun.COM ASSERT(idx > 0); 4579438SEric.Saxe@Sun.COM ASSERT(GROUP_ACCESS(&pgd->cmt_pgs, idx - 1) == parent); 4588906SEric.Saxe@Sun.COM 4598906SEric.Saxe@Sun.COM /* 4608906SEric.Saxe@Sun.COM * Have the child and the parent swap places in the CPU's 4618906SEric.Saxe@Sun.COM * lineage 4628906SEric.Saxe@Sun.COM */ 4639438SEric.Saxe@Sun.COM group_remove_at(&pgd->cmt_pgs, idx); 4649438SEric.Saxe@Sun.COM group_remove_at(&pgd->cmt_pgs, idx - 1); 4659438SEric.Saxe@Sun.COM err = group_add_at(&pgd->cmt_pgs, parent, idx); 4668906SEric.Saxe@Sun.COM ASSERT(err == 0); 4679438SEric.Saxe@Sun.COM err = group_add_at(&pgd->cmt_pgs, pg, idx - 1); 4688906SEric.Saxe@Sun.COM ASSERT(err == 0); 46911263SEric.Saxe@Sun.COM 47011263SEric.Saxe@Sun.COM /* 47111263SEric.Saxe@Sun.COM * Ensure cmt_lineage references CPU's leaf PG. 47211263SEric.Saxe@Sun.COM * Since cmt_pgs is top-down ordered, the bottom is the last 47311263SEric.Saxe@Sun.COM * element. 47411263SEric.Saxe@Sun.COM */ 47511263SEric.Saxe@Sun.COM if ((sz = GROUP_SIZE(&pgd->cmt_pgs)) > 0) 47611263SEric.Saxe@Sun.COM pgd->cmt_lineage = GROUP_ACCESS(&pgd->cmt_pgs, sz - 1); 4778906SEric.Saxe@Sun.COM } 4788906SEric.Saxe@Sun.COM 4798906SEric.Saxe@Sun.COM /* 4808906SEric.Saxe@Sun.COM * Update the parent references for PG and it's parent 4818906SEric.Saxe@Sun.COM */ 4828906SEric.Saxe@Sun.COM pg->cmt_parent = parent->cmt_parent; 4838906SEric.Saxe@Sun.COM parent->cmt_parent = pg; 4848906SEric.Saxe@Sun.COM 4858906SEric.Saxe@Sun.COM start_cpus(); 4863434Sesaxe } 4873434Sesaxe 4883434Sesaxe /* 4893434Sesaxe * CMT class callback for a new CPU entering the system 4909438SEric.Saxe@Sun.COM * 4919438SEric.Saxe@Sun.COM * This routine operates on the CPU specific processor group data (for the CPU 4929438SEric.Saxe@Sun.COM * being initialized). The argument "pgdata" is a reference to the CPU's PG 4939438SEric.Saxe@Sun.COM * data to be constructed. 4949438SEric.Saxe@Sun.COM * 4959438SEric.Saxe@Sun.COM * cp->cpu_pg is used by the dispatcher to access the CPU's PG data 4969438SEric.Saxe@Sun.COM * references a "bootstrap" structure. pg_cmt_cpu_init() and the routines it 4979438SEric.Saxe@Sun.COM * calls must be careful to operate only on the "pgdata" argument, and not 4989438SEric.Saxe@Sun.COM * cp->cpu_pg. 4993434Sesaxe */ 5003434Sesaxe static void 5019438SEric.Saxe@Sun.COM pg_cmt_cpu_init(cpu_t *cp, cpu_pg_t *pgdata) 5023434Sesaxe { 5033434Sesaxe pg_cmt_t *pg; 5043434Sesaxe group_t *cmt_pgs; 5058906SEric.Saxe@Sun.COM int levels, level; 5063434Sesaxe pghw_type_t hw; 5073434Sesaxe pg_t *pg_cache = NULL; 5083434Sesaxe pg_cmt_t *cpu_cmt_hier[PGHW_NUM_COMPONENTS]; 5093434Sesaxe lgrp_handle_t lgrp_handle; 5103434Sesaxe cmt_lgrp_t *lgrp; 5119036SEric.Saxe@Sun.COM cmt_lineage_validation_t lineage_status; 5123434Sesaxe 5133434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 5149438SEric.Saxe@Sun.COM ASSERT(pg_cpu_is_bootstrapped(cp)); 5153434Sesaxe 5168906SEric.Saxe@Sun.COM if (cmt_sched_disabled) 5178906SEric.Saxe@Sun.COM return; 5188906SEric.Saxe@Sun.COM 5193434Sesaxe /* 5203434Sesaxe * A new CPU is coming into the system. 5213434Sesaxe * Interrogate the platform to see if the CPU 5228906SEric.Saxe@Sun.COM * has any performance or efficiency relevant 5238906SEric.Saxe@Sun.COM * sharing relationships 5243434Sesaxe */ 5259438SEric.Saxe@Sun.COM cmt_pgs = &pgdata->cmt_pgs; 5269438SEric.Saxe@Sun.COM pgdata->cmt_lineage = NULL; 5273434Sesaxe 5283434Sesaxe bzero(cpu_cmt_hier, sizeof (cpu_cmt_hier)); 5298906SEric.Saxe@Sun.COM levels = 0; 5303434Sesaxe for (hw = PGHW_START; hw < PGHW_NUM_COMPONENTS; hw++) { 5313434Sesaxe 5328906SEric.Saxe@Sun.COM pg_cmt_policy_t policy; 5338906SEric.Saxe@Sun.COM 5343434Sesaxe /* 5358906SEric.Saxe@Sun.COM * We're only interested in the hw sharing relationships 5368906SEric.Saxe@Sun.COM * for which we know how to optimize. 5373434Sesaxe */ 5388906SEric.Saxe@Sun.COM policy = pg_cmt_policy(hw); 5398906SEric.Saxe@Sun.COM if (policy == CMT_NO_POLICY || 5408906SEric.Saxe@Sun.COM pg_plat_hw_shared(cp, hw) == 0) 5413434Sesaxe continue; 5423434Sesaxe 5433434Sesaxe /* 5449746SEric.Saxe@Sun.COM * We will still create the PGs for hardware sharing 5459746SEric.Saxe@Sun.COM * relationships that have been blacklisted, but won't 5469746SEric.Saxe@Sun.COM * implement CMT thread placement optimizations against them. 5478906SEric.Saxe@Sun.COM */ 5489746SEric.Saxe@Sun.COM if (cmt_hw_blacklisted[hw] == 1) 5499746SEric.Saxe@Sun.COM policy = CMT_NO_POLICY; 5508906SEric.Saxe@Sun.COM 5518906SEric.Saxe@Sun.COM /* 5523434Sesaxe * Find (or create) the PG associated with 5533434Sesaxe * the hw sharing relationship in which cp 5543434Sesaxe * belongs. 5553434Sesaxe * 5563434Sesaxe * Determine if a suitable PG already 5573434Sesaxe * exists, or if one needs to be created. 5583434Sesaxe */ 5593434Sesaxe pg = (pg_cmt_t *)pghw_place_cpu(cp, hw); 5603434Sesaxe if (pg == NULL) { 5613434Sesaxe /* 5623434Sesaxe * Create a new one. 5633434Sesaxe * Initialize the common... 5643434Sesaxe */ 5653434Sesaxe pg = (pg_cmt_t *)pg_create(pg_cmt_class_id); 5663434Sesaxe 5673434Sesaxe /* ... physical ... */ 5683434Sesaxe pghw_init((pghw_t *)pg, cp, hw); 5693434Sesaxe 5703434Sesaxe /* 5713434Sesaxe * ... and CMT specific portions of the 5723434Sesaxe * structure. 5733434Sesaxe */ 5748906SEric.Saxe@Sun.COM pg->cmt_policy = policy; 5758906SEric.Saxe@Sun.COM 5768906SEric.Saxe@Sun.COM /* CMT event callbacks */ 5778906SEric.Saxe@Sun.COM cmt_callback_init((pg_t *)pg); 5788906SEric.Saxe@Sun.COM 5793434Sesaxe bitset_init(&pg->cmt_cpus_actv_set); 5803434Sesaxe group_create(&pg->cmt_cpus_actv); 5813434Sesaxe } else { 5823434Sesaxe ASSERT(IS_CMT_PG(pg)); 5833434Sesaxe } 5843434Sesaxe 585*11389SAlexander.Kolbasov@Sun.COM ((pghw_t *)pg)->pghw_generation++; 586*11389SAlexander.Kolbasov@Sun.COM 5873434Sesaxe /* Add the CPU to the PG */ 5889438SEric.Saxe@Sun.COM pg_cpu_add((pg_t *)pg, cp, pgdata); 5893434Sesaxe 5903434Sesaxe /* 5918408SEric.Saxe@Sun.COM * Ensure capacity of the active CPU group/bitset 5923434Sesaxe */ 5933434Sesaxe group_expand(&pg->cmt_cpus_actv, 5943434Sesaxe GROUP_SIZE(&((pg_t *)pg)->pg_cpus)); 5953434Sesaxe 5963434Sesaxe if (cp->cpu_seqid >= 5973434Sesaxe bitset_capacity(&pg->cmt_cpus_actv_set)) { 5983434Sesaxe bitset_resize(&pg->cmt_cpus_actv_set, 5993434Sesaxe cp->cpu_seqid + 1); 6003434Sesaxe } 6013434Sesaxe 6023434Sesaxe /* 6038906SEric.Saxe@Sun.COM * Build a lineage of CMT PGs for load balancing / coalescence 6043434Sesaxe */ 6058906SEric.Saxe@Sun.COM if (policy & (CMT_BALANCE | CMT_COALESCE)) { 6068906SEric.Saxe@Sun.COM cpu_cmt_hier[levels++] = pg; 6073434Sesaxe } 6083434Sesaxe 6093434Sesaxe /* Cache this for later */ 6103434Sesaxe if (hw == PGHW_CACHE) 6113434Sesaxe pg_cache = (pg_t *)pg; 6123434Sesaxe } 6133434Sesaxe 6148906SEric.Saxe@Sun.COM group_expand(cmt_pgs, levels); 6158408SEric.Saxe@Sun.COM 6168408SEric.Saxe@Sun.COM if (cmt_root == NULL) 6178408SEric.Saxe@Sun.COM cmt_root = pg_cmt_lgrp_create(lgrp_plat_root_hand()); 6183434Sesaxe 6193434Sesaxe /* 6208906SEric.Saxe@Sun.COM * Find the lgrp that encapsulates this CPU's CMT hierarchy 6218408SEric.Saxe@Sun.COM */ 6228408SEric.Saxe@Sun.COM lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id); 6238408SEric.Saxe@Sun.COM if ((lgrp = pg_cmt_find_lgrp(lgrp_handle)) == NULL) 6248408SEric.Saxe@Sun.COM lgrp = pg_cmt_lgrp_create(lgrp_handle); 6258408SEric.Saxe@Sun.COM 6268408SEric.Saxe@Sun.COM /* 6278906SEric.Saxe@Sun.COM * Ascendingly sort the PGs in the lineage by number of CPUs 6288906SEric.Saxe@Sun.COM */ 6298906SEric.Saxe@Sun.COM pg_cmt_hier_sort(cpu_cmt_hier, levels); 6308906SEric.Saxe@Sun.COM 6318906SEric.Saxe@Sun.COM /* 6328906SEric.Saxe@Sun.COM * Examine the lineage and validate it. 6338906SEric.Saxe@Sun.COM * This routine will also try to fix the lineage along with the 6348906SEric.Saxe@Sun.COM * rest of the PG hierarchy should it detect an issue. 6358906SEric.Saxe@Sun.COM * 6369036SEric.Saxe@Sun.COM * If it returns anything other than VALID or REPAIRED, an 6379036SEric.Saxe@Sun.COM * unrecoverable error has occurred, and we cannot proceed. 6388906SEric.Saxe@Sun.COM */ 6399438SEric.Saxe@Sun.COM lineage_status = pg_cmt_lineage_validate(cpu_cmt_hier, &levels, pgdata); 6409036SEric.Saxe@Sun.COM if ((lineage_status != CMT_LINEAGE_VALID) && 6419438SEric.Saxe@Sun.COM (lineage_status != CMT_LINEAGE_REPAIRED)) { 6429438SEric.Saxe@Sun.COM /* 6439438SEric.Saxe@Sun.COM * In the case of an unrecoverable error where CMT scheduling 6449438SEric.Saxe@Sun.COM * has been disabled, assert that the under construction CPU's 6459438SEric.Saxe@Sun.COM * PG data has an empty CMT load balancing lineage. 6469438SEric.Saxe@Sun.COM */ 6479438SEric.Saxe@Sun.COM ASSERT((cmt_sched_disabled == 0) || 6489438SEric.Saxe@Sun.COM (GROUP_SIZE(&(pgdata->cmt_pgs)) == 0)); 6498906SEric.Saxe@Sun.COM return; 6509438SEric.Saxe@Sun.COM } 6518906SEric.Saxe@Sun.COM 6528906SEric.Saxe@Sun.COM /* 6538906SEric.Saxe@Sun.COM * For existing PGs in the lineage, verify that the parent is 6548906SEric.Saxe@Sun.COM * correct, as the generation in the lineage may have changed 6558906SEric.Saxe@Sun.COM * as a result of the sorting. Start the traversal at the top 6568906SEric.Saxe@Sun.COM * of the lineage, moving down. 6578906SEric.Saxe@Sun.COM */ 6588906SEric.Saxe@Sun.COM for (level = levels - 1; level >= 0; ) { 6598906SEric.Saxe@Sun.COM int reorg; 6608906SEric.Saxe@Sun.COM 6618906SEric.Saxe@Sun.COM reorg = 0; 6628906SEric.Saxe@Sun.COM pg = cpu_cmt_hier[level]; 6638906SEric.Saxe@Sun.COM 6648906SEric.Saxe@Sun.COM /* 6658906SEric.Saxe@Sun.COM * Promote PGs at an incorrect generation into place. 6668906SEric.Saxe@Sun.COM */ 6678906SEric.Saxe@Sun.COM while (pg->cmt_parent && 6688906SEric.Saxe@Sun.COM pg->cmt_parent != cpu_cmt_hier[level + 1]) { 6699438SEric.Saxe@Sun.COM cmt_hier_promote(pg, pgdata); 6708906SEric.Saxe@Sun.COM reorg++; 6718906SEric.Saxe@Sun.COM } 6728906SEric.Saxe@Sun.COM if (reorg > 0) 6738906SEric.Saxe@Sun.COM level = levels - 1; 6748906SEric.Saxe@Sun.COM else 6758906SEric.Saxe@Sun.COM level--; 6768906SEric.Saxe@Sun.COM } 6778906SEric.Saxe@Sun.COM 6788906SEric.Saxe@Sun.COM /* 6798408SEric.Saxe@Sun.COM * For each of the PGs in the CPU's lineage: 6808906SEric.Saxe@Sun.COM * - Add an entry in the CPU sorted CMT PG group 6818906SEric.Saxe@Sun.COM * which is used for top down CMT load balancing 6823434Sesaxe * - Tie the PG into the CMT hierarchy by connecting 6833434Sesaxe * it to it's parent and siblings. 6843434Sesaxe */ 6858906SEric.Saxe@Sun.COM for (level = 0; level < levels; level++) { 6863434Sesaxe uint_t children; 6873434Sesaxe int err; 6883434Sesaxe 6893434Sesaxe pg = cpu_cmt_hier[level]; 6908906SEric.Saxe@Sun.COM err = group_add_at(cmt_pgs, pg, levels - level - 1); 6913434Sesaxe ASSERT(err == 0); 6923434Sesaxe 6933434Sesaxe if (level == 0) 6949438SEric.Saxe@Sun.COM pgdata->cmt_lineage = (pg_t *)pg; 6953434Sesaxe 6963434Sesaxe if (pg->cmt_siblings != NULL) { 6973434Sesaxe /* Already initialized */ 6983434Sesaxe ASSERT(pg->cmt_parent == NULL || 6993434Sesaxe pg->cmt_parent == cpu_cmt_hier[level + 1]); 7003434Sesaxe ASSERT(pg->cmt_siblings == &lgrp->cl_pgs || 7015933Sjb145095 ((pg->cmt_parent != NULL) && 7025933Sjb145095 pg->cmt_siblings == pg->cmt_parent->cmt_children)); 7033434Sesaxe continue; 7043434Sesaxe } 7053434Sesaxe 7068906SEric.Saxe@Sun.COM if ((level + 1) == levels) { 7073434Sesaxe pg->cmt_parent = NULL; 7088408SEric.Saxe@Sun.COM 7093434Sesaxe pg->cmt_siblings = &lgrp->cl_pgs; 7103434Sesaxe children = ++lgrp->cl_npgs; 7118906SEric.Saxe@Sun.COM if (cmt_root != lgrp) 7128906SEric.Saxe@Sun.COM cmt_root->cl_npgs++; 7133434Sesaxe } else { 7143434Sesaxe pg->cmt_parent = cpu_cmt_hier[level + 1]; 7153434Sesaxe 7163434Sesaxe /* 7173434Sesaxe * A good parent keeps track of their children. 7183434Sesaxe * The parent's children group is also the PG's 7193434Sesaxe * siblings. 7203434Sesaxe */ 7213434Sesaxe if (pg->cmt_parent->cmt_children == NULL) { 7223434Sesaxe pg->cmt_parent->cmt_children = 7233434Sesaxe kmem_zalloc(sizeof (group_t), KM_SLEEP); 7243434Sesaxe group_create(pg->cmt_parent->cmt_children); 7253434Sesaxe } 7263434Sesaxe pg->cmt_siblings = pg->cmt_parent->cmt_children; 7273434Sesaxe children = ++pg->cmt_parent->cmt_nchildren; 7283434Sesaxe } 7298408SEric.Saxe@Sun.COM 7303434Sesaxe group_expand(pg->cmt_siblings, children); 7318408SEric.Saxe@Sun.COM group_expand(&cmt_root->cl_pgs, cmt_root->cl_npgs); 7323434Sesaxe } 7333434Sesaxe 7343434Sesaxe /* 7353434Sesaxe * Cache the chip and core IDs in the cpu_t->cpu_physid structure 7363434Sesaxe * for fast lookups later. 7373434Sesaxe */ 7383434Sesaxe if (cp->cpu_physid) { 7393434Sesaxe cp->cpu_physid->cpu_chipid = 7403434Sesaxe pg_plat_hw_instance_id(cp, PGHW_CHIP); 7413434Sesaxe cp->cpu_physid->cpu_coreid = pg_plat_get_core_id(cp); 7423434Sesaxe 7433434Sesaxe /* 7443434Sesaxe * If this cpu has a PG representing shared cache, then set 7453434Sesaxe * cpu_cacheid to that PG's logical id 7463434Sesaxe */ 7473434Sesaxe if (pg_cache) 7483434Sesaxe cp->cpu_physid->cpu_cacheid = pg_cache->pg_id; 7493434Sesaxe } 7503434Sesaxe 7513434Sesaxe /* CPU0 only initialization */ 7523434Sesaxe if (is_cpu0) { 7533434Sesaxe is_cpu0 = 0; 7543676Sesaxe cpu0_lgrp = lgrp; 7553434Sesaxe } 7563434Sesaxe 7573434Sesaxe } 7583434Sesaxe 7593434Sesaxe /* 7603434Sesaxe * Class callback when a CPU is leaving the system (deletion) 7619438SEric.Saxe@Sun.COM * 7629438SEric.Saxe@Sun.COM * "pgdata" is a reference to the CPU's PG data to be deconstructed. 7639438SEric.Saxe@Sun.COM * 7649438SEric.Saxe@Sun.COM * cp->cpu_pg is used by the dispatcher to access the CPU's PG data 7659438SEric.Saxe@Sun.COM * references a "bootstrap" structure across this function's invocation. 766*11389SAlexander.Kolbasov@Sun.COM * pg_cmt_cpu_fini() and the routines it calls must be careful to operate only 7679438SEric.Saxe@Sun.COM * on the "pgdata" argument, and not cp->cpu_pg. 7683434Sesaxe */ 7693434Sesaxe static void 7709438SEric.Saxe@Sun.COM pg_cmt_cpu_fini(cpu_t *cp, cpu_pg_t *pgdata) 7713434Sesaxe { 7723434Sesaxe group_iter_t i; 7733434Sesaxe pg_cmt_t *pg; 7743434Sesaxe group_t *pgs, *cmt_pgs; 7753434Sesaxe lgrp_handle_t lgrp_handle; 7763434Sesaxe cmt_lgrp_t *lgrp; 7773434Sesaxe 7788906SEric.Saxe@Sun.COM if (cmt_sched_disabled) 7798906SEric.Saxe@Sun.COM return; 7808906SEric.Saxe@Sun.COM 7819438SEric.Saxe@Sun.COM ASSERT(pg_cpu_is_bootstrapped(cp)); 7829438SEric.Saxe@Sun.COM 7839438SEric.Saxe@Sun.COM pgs = &pgdata->pgs; 7849438SEric.Saxe@Sun.COM cmt_pgs = &pgdata->cmt_pgs; 7853434Sesaxe 7863434Sesaxe /* 7873434Sesaxe * Find the lgroup that encapsulates this CPU's CMT hierarchy 7883434Sesaxe */ 7893434Sesaxe lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id); 7903676Sesaxe 7913434Sesaxe lgrp = pg_cmt_find_lgrp(lgrp_handle); 7928689SEric.Saxe@Sun.COM if (ncpus == 1 && lgrp != cpu0_lgrp) { 7933676Sesaxe /* 7948689SEric.Saxe@Sun.COM * One might wonder how we could be deconfiguring the 7958689SEric.Saxe@Sun.COM * only CPU in the system. 7963676Sesaxe * 7978689SEric.Saxe@Sun.COM * On Starcat systems when null_proc_lpa is detected, 7988689SEric.Saxe@Sun.COM * the boot CPU (which is already configured into a leaf 7998689SEric.Saxe@Sun.COM * lgroup), is moved into the root lgroup. This is done by 8008689SEric.Saxe@Sun.COM * deconfiguring it from both lgroups and processor 8018689SEric.Saxe@Sun.COM * groups), and then later reconfiguring it back in. This 8028689SEric.Saxe@Sun.COM * call to pg_cmt_cpu_fini() is part of that deconfiguration. 8038689SEric.Saxe@Sun.COM * 8048689SEric.Saxe@Sun.COM * This special case is detected by noting that the platform 8058689SEric.Saxe@Sun.COM * has changed the CPU's lgrp affiliation (since it now 8068689SEric.Saxe@Sun.COM * belongs in the root). In this case, use the cmt_lgrp_t 8078689SEric.Saxe@Sun.COM * cached for the boot CPU, since this is what needs to be 8088689SEric.Saxe@Sun.COM * torn down. 8093676Sesaxe */ 8103676Sesaxe lgrp = cpu0_lgrp; 8113676Sesaxe } 8123434Sesaxe 8138689SEric.Saxe@Sun.COM ASSERT(lgrp != NULL); 8148689SEric.Saxe@Sun.COM 8153434Sesaxe /* 8163434Sesaxe * First, clean up anything load balancing specific for each of 8173434Sesaxe * the CPU's PGs that participated in CMT load balancing 8183434Sesaxe */ 8199438SEric.Saxe@Sun.COM pg = (pg_cmt_t *)pgdata->cmt_lineage; 8203434Sesaxe while (pg != NULL) { 8213434Sesaxe 822*11389SAlexander.Kolbasov@Sun.COM ((pghw_t *)pg)->pghw_generation++; 823*11389SAlexander.Kolbasov@Sun.COM 8243434Sesaxe /* 8253434Sesaxe * Remove the PG from the CPU's load balancing lineage 8263434Sesaxe */ 8273434Sesaxe (void) group_remove(cmt_pgs, pg, GRP_RESIZE); 8283434Sesaxe 8293434Sesaxe /* 8303434Sesaxe * If it's about to become empty, destroy it's children 8313434Sesaxe * group, and remove it's reference from it's siblings. 8323434Sesaxe * This is done here (rather than below) to avoid removing 8333434Sesaxe * our reference from a PG that we just eliminated. 8343434Sesaxe */ 8353434Sesaxe if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 1) { 8363434Sesaxe if (pg->cmt_children != NULL) 8373434Sesaxe group_destroy(pg->cmt_children); 8383434Sesaxe if (pg->cmt_siblings != NULL) { 8393434Sesaxe if (pg->cmt_siblings == &lgrp->cl_pgs) 8403434Sesaxe lgrp->cl_npgs--; 8413434Sesaxe else 8423434Sesaxe pg->cmt_parent->cmt_nchildren--; 8433434Sesaxe } 8443434Sesaxe } 8453434Sesaxe pg = pg->cmt_parent; 8463434Sesaxe } 8473434Sesaxe ASSERT(GROUP_SIZE(cmt_pgs) == 0); 8483434Sesaxe 8493434Sesaxe /* 8503434Sesaxe * Now that the load balancing lineage updates have happened, 8513434Sesaxe * remove the CPU from all it's PGs (destroying any that become 8523434Sesaxe * empty). 8533434Sesaxe */ 8543434Sesaxe group_iter_init(&i); 8553434Sesaxe while ((pg = group_iterate(pgs, &i)) != NULL) { 8563434Sesaxe if (IS_CMT_PG(pg) == 0) 8573434Sesaxe continue; 8583434Sesaxe 8599438SEric.Saxe@Sun.COM pg_cpu_delete((pg_t *)pg, cp, pgdata); 8603434Sesaxe /* 8613434Sesaxe * Deleting the CPU from the PG changes the CPU's 8623434Sesaxe * PG group over which we are actively iterating 8633434Sesaxe * Re-initialize the iteration 8643434Sesaxe */ 8653434Sesaxe group_iter_init(&i); 8663434Sesaxe 8673434Sesaxe if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 0) { 8683434Sesaxe 8693434Sesaxe /* 8703434Sesaxe * The PG has become zero sized, so destroy it. 8713434Sesaxe */ 8723434Sesaxe group_destroy(&pg->cmt_cpus_actv); 8733434Sesaxe bitset_fini(&pg->cmt_cpus_actv_set); 8743434Sesaxe pghw_fini((pghw_t *)pg); 8753434Sesaxe 8763434Sesaxe pg_destroy((pg_t *)pg); 8773434Sesaxe } 8783434Sesaxe } 8793434Sesaxe } 8803434Sesaxe 8813434Sesaxe /* 8823434Sesaxe * Class callback when a CPU is entering a cpu partition 8833434Sesaxe */ 8843434Sesaxe static void 8853434Sesaxe pg_cmt_cpupart_in(cpu_t *cp, cpupart_t *pp) 8863434Sesaxe { 8873434Sesaxe group_t *pgs; 8883434Sesaxe pg_t *pg; 8893434Sesaxe group_iter_t i; 8903434Sesaxe 8913434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 8923434Sesaxe 8938906SEric.Saxe@Sun.COM if (cmt_sched_disabled) 8948906SEric.Saxe@Sun.COM return; 8958906SEric.Saxe@Sun.COM 8963434Sesaxe pgs = &cp->cpu_pg->pgs; 8973434Sesaxe 8983434Sesaxe /* 8993434Sesaxe * Ensure that the new partition's PG bitset 9003434Sesaxe * is large enough for all CMT PG's to which cp 9013434Sesaxe * belongs 9023434Sesaxe */ 9033434Sesaxe group_iter_init(&i); 9043434Sesaxe while ((pg = group_iterate(pgs, &i)) != NULL) { 9053434Sesaxe if (IS_CMT_PG(pg) == 0) 9063434Sesaxe continue; 9073434Sesaxe 9083434Sesaxe if (bitset_capacity(&pp->cp_cmt_pgs) <= pg->pg_id) 9093434Sesaxe bitset_resize(&pp->cp_cmt_pgs, pg->pg_id + 1); 9103434Sesaxe } 9113434Sesaxe } 9123434Sesaxe 9133434Sesaxe /* 9143434Sesaxe * Class callback when a CPU is actually moving partitions 9153434Sesaxe */ 9163434Sesaxe static void 9173434Sesaxe pg_cmt_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp) 9183434Sesaxe { 9193434Sesaxe cpu_t *cpp; 9203434Sesaxe group_t *pgs; 9213434Sesaxe pg_t *pg; 9223434Sesaxe group_iter_t pg_iter; 9233434Sesaxe pg_cpu_itr_t cpu_iter; 9243434Sesaxe boolean_t found; 9253434Sesaxe 9263434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 9273434Sesaxe 9288906SEric.Saxe@Sun.COM if (cmt_sched_disabled) 9298906SEric.Saxe@Sun.COM return; 9308906SEric.Saxe@Sun.COM 9313434Sesaxe pgs = &cp->cpu_pg->pgs; 9323434Sesaxe group_iter_init(&pg_iter); 9333434Sesaxe 9343434Sesaxe /* 9353434Sesaxe * Iterate over the CPUs CMT PGs 9363434Sesaxe */ 9373434Sesaxe while ((pg = group_iterate(pgs, &pg_iter)) != NULL) { 9383434Sesaxe 9393434Sesaxe if (IS_CMT_PG(pg) == 0) 9403434Sesaxe continue; 9413434Sesaxe 9423434Sesaxe /* 9433434Sesaxe * Add the PG to the bitset in the new partition. 9443434Sesaxe */ 9453434Sesaxe bitset_add(&newpp->cp_cmt_pgs, pg->pg_id); 9463434Sesaxe 9473434Sesaxe /* 9483434Sesaxe * Remove the PG from the bitset in the old partition 9493434Sesaxe * if the last of the PG's CPUs have left. 9503434Sesaxe */ 9513434Sesaxe found = B_FALSE; 9523434Sesaxe PG_CPU_ITR_INIT(pg, cpu_iter); 9533434Sesaxe while ((cpp = pg_cpu_next(&cpu_iter)) != NULL) { 9543434Sesaxe if (cpp == cp) 9553434Sesaxe continue; 9563676Sesaxe if (CPU_ACTIVE(cpp) && 9573676Sesaxe cpp->cpu_part->cp_id == oldpp->cp_id) { 9583434Sesaxe found = B_TRUE; 9593434Sesaxe break; 9603434Sesaxe } 9613434Sesaxe } 9623434Sesaxe if (!found) 9633434Sesaxe bitset_del(&cp->cpu_part->cp_cmt_pgs, pg->pg_id); 9643434Sesaxe } 9653434Sesaxe } 9663434Sesaxe 9673434Sesaxe /* 9683434Sesaxe * Class callback when a CPU becomes active (online) 9693434Sesaxe * 9703434Sesaxe * This is called in a context where CPUs are paused 9713434Sesaxe */ 9723434Sesaxe static void 9733434Sesaxe pg_cmt_cpu_active(cpu_t *cp) 9743434Sesaxe { 9753434Sesaxe int err; 9763434Sesaxe group_iter_t i; 9773434Sesaxe pg_cmt_t *pg; 9783434Sesaxe group_t *pgs; 9793434Sesaxe 9803434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 9813434Sesaxe 9828906SEric.Saxe@Sun.COM if (cmt_sched_disabled) 9838906SEric.Saxe@Sun.COM return; 9848906SEric.Saxe@Sun.COM 9853434Sesaxe pgs = &cp->cpu_pg->pgs; 9863434Sesaxe group_iter_init(&i); 9873434Sesaxe 9883434Sesaxe /* 9893434Sesaxe * Iterate over the CPU's PGs 9903434Sesaxe */ 9913434Sesaxe while ((pg = group_iterate(pgs, &i)) != NULL) { 9923434Sesaxe 9933434Sesaxe if (IS_CMT_PG(pg) == 0) 9943434Sesaxe continue; 9953434Sesaxe 996*11389SAlexander.Kolbasov@Sun.COM /* 997*11389SAlexander.Kolbasov@Sun.COM * Move to the next generation since topology is changing 998*11389SAlexander.Kolbasov@Sun.COM */ 999*11389SAlexander.Kolbasov@Sun.COM ((pghw_t *)pg)->pghw_generation++; 1000*11389SAlexander.Kolbasov@Sun.COM 10013434Sesaxe err = group_add(&pg->cmt_cpus_actv, cp, GRP_NORESIZE); 10023434Sesaxe ASSERT(err == 0); 10033434Sesaxe 10043434Sesaxe /* 10053434Sesaxe * If this is the first active CPU in the PG, and it 10063434Sesaxe * represents a hardware sharing relationship over which 10073434Sesaxe * CMT load balancing is performed, add it as a candidate 10083434Sesaxe * for balancing with it's siblings. 10093434Sesaxe */ 10103434Sesaxe if (GROUP_SIZE(&pg->cmt_cpus_actv) == 1 && 10118906SEric.Saxe@Sun.COM (pg->cmt_policy & (CMT_BALANCE | CMT_COALESCE))) { 10123434Sesaxe err = group_add(pg->cmt_siblings, pg, GRP_NORESIZE); 10133434Sesaxe ASSERT(err == 0); 10148408SEric.Saxe@Sun.COM 10158408SEric.Saxe@Sun.COM /* 10168408SEric.Saxe@Sun.COM * If this is a top level PG, add it as a balancing 10178906SEric.Saxe@Sun.COM * candidate when balancing within the root lgroup. 10188408SEric.Saxe@Sun.COM */ 10198906SEric.Saxe@Sun.COM if (pg->cmt_parent == NULL && 10208906SEric.Saxe@Sun.COM pg->cmt_siblings != &cmt_root->cl_pgs) { 10218408SEric.Saxe@Sun.COM err = group_add(&cmt_root->cl_pgs, pg, 10228408SEric.Saxe@Sun.COM GRP_NORESIZE); 10238408SEric.Saxe@Sun.COM ASSERT(err == 0); 10248408SEric.Saxe@Sun.COM } 10253434Sesaxe } 10263434Sesaxe 10273434Sesaxe /* 10283434Sesaxe * Notate the CPU in the PGs active CPU bitset. 10293434Sesaxe * Also notate the PG as being active in it's associated 10303434Sesaxe * partition 10313434Sesaxe */ 10323434Sesaxe bitset_add(&pg->cmt_cpus_actv_set, cp->cpu_seqid); 10333434Sesaxe bitset_add(&cp->cpu_part->cp_cmt_pgs, ((pg_t *)pg)->pg_id); 10343434Sesaxe } 10353434Sesaxe } 10363434Sesaxe 10373434Sesaxe /* 10383434Sesaxe * Class callback when a CPU goes inactive (offline) 10393434Sesaxe * 10403434Sesaxe * This is called in a context where CPUs are paused 10413434Sesaxe */ 10423434Sesaxe static void 10433434Sesaxe pg_cmt_cpu_inactive(cpu_t *cp) 10443434Sesaxe { 10453434Sesaxe int err; 10463434Sesaxe group_t *pgs; 10473434Sesaxe pg_cmt_t *pg; 10483434Sesaxe cpu_t *cpp; 10493434Sesaxe group_iter_t i; 10503434Sesaxe pg_cpu_itr_t cpu_itr; 10513434Sesaxe boolean_t found; 10523434Sesaxe 10533434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 10543434Sesaxe 10558906SEric.Saxe@Sun.COM if (cmt_sched_disabled) 10568906SEric.Saxe@Sun.COM return; 10578906SEric.Saxe@Sun.COM 10583434Sesaxe pgs = &cp->cpu_pg->pgs; 10593434Sesaxe group_iter_init(&i); 10603434Sesaxe 10613434Sesaxe while ((pg = group_iterate(pgs, &i)) != NULL) { 10623434Sesaxe 10633434Sesaxe if (IS_CMT_PG(pg) == 0) 10643434Sesaxe continue; 10653434Sesaxe 10663434Sesaxe /* 1067*11389SAlexander.Kolbasov@Sun.COM * Move to the next generation since topology is changing 1068*11389SAlexander.Kolbasov@Sun.COM */ 1069*11389SAlexander.Kolbasov@Sun.COM ((pghw_t *)pg)->pghw_generation++; 1070*11389SAlexander.Kolbasov@Sun.COM 1071*11389SAlexander.Kolbasov@Sun.COM /* 10723434Sesaxe * Remove the CPU from the CMT PGs active CPU group 10733434Sesaxe * bitmap 10743434Sesaxe */ 10753434Sesaxe err = group_remove(&pg->cmt_cpus_actv, cp, GRP_NORESIZE); 10763434Sesaxe ASSERT(err == 0); 10773434Sesaxe 10783434Sesaxe bitset_del(&pg->cmt_cpus_actv_set, cp->cpu_seqid); 10793434Sesaxe 10803434Sesaxe /* 10813434Sesaxe * If there are no more active CPUs in this PG over which 10823434Sesaxe * load was balanced, remove it as a balancing candidate. 10833434Sesaxe */ 10843434Sesaxe if (GROUP_SIZE(&pg->cmt_cpus_actv) == 0 && 10858906SEric.Saxe@Sun.COM (pg->cmt_policy & (CMT_BALANCE | CMT_COALESCE))) { 10863434Sesaxe err = group_remove(pg->cmt_siblings, pg, GRP_NORESIZE); 10873434Sesaxe ASSERT(err == 0); 10888408SEric.Saxe@Sun.COM 10898906SEric.Saxe@Sun.COM if (pg->cmt_parent == NULL && 10908906SEric.Saxe@Sun.COM pg->cmt_siblings != &cmt_root->cl_pgs) { 10918408SEric.Saxe@Sun.COM err = group_remove(&cmt_root->cl_pgs, pg, 10928408SEric.Saxe@Sun.COM GRP_NORESIZE); 10938408SEric.Saxe@Sun.COM ASSERT(err == 0); 10948408SEric.Saxe@Sun.COM } 10953434Sesaxe } 10963434Sesaxe 10973434Sesaxe /* 10983434Sesaxe * Assert the number of active CPUs does not exceed 10993434Sesaxe * the total number of CPUs in the PG 11003434Sesaxe */ 11013434Sesaxe ASSERT(GROUP_SIZE(&pg->cmt_cpus_actv) <= 11023434Sesaxe GROUP_SIZE(&((pg_t *)pg)->pg_cpus)); 11033434Sesaxe 11043434Sesaxe /* 11053434Sesaxe * Update the PG bitset in the CPU's old partition 11063434Sesaxe */ 11073434Sesaxe found = B_FALSE; 11083434Sesaxe PG_CPU_ITR_INIT(pg, cpu_itr); 11093434Sesaxe while ((cpp = pg_cpu_next(&cpu_itr)) != NULL) { 11103434Sesaxe if (cpp == cp) 11113434Sesaxe continue; 11123676Sesaxe if (CPU_ACTIVE(cpp) && 11133676Sesaxe cpp->cpu_part->cp_id == cp->cpu_part->cp_id) { 11143434Sesaxe found = B_TRUE; 11153434Sesaxe break; 11163434Sesaxe } 11173434Sesaxe } 11183434Sesaxe if (!found) { 11193434Sesaxe bitset_del(&cp->cpu_part->cp_cmt_pgs, 11203434Sesaxe ((pg_t *)pg)->pg_id); 11213434Sesaxe } 11223434Sesaxe } 11233434Sesaxe } 11243434Sesaxe 11253434Sesaxe /* 11263434Sesaxe * Return non-zero if the CPU belongs in the given PG 11273434Sesaxe */ 11283434Sesaxe static int 11293434Sesaxe pg_cmt_cpu_belongs(pg_t *pg, cpu_t *cp) 11303434Sesaxe { 11313434Sesaxe cpu_t *pg_cpu; 11323434Sesaxe 11333434Sesaxe pg_cpu = GROUP_ACCESS(&pg->pg_cpus, 0); 11343434Sesaxe 11353434Sesaxe ASSERT(pg_cpu != NULL); 11363434Sesaxe 11373434Sesaxe /* 11383434Sesaxe * The CPU belongs if, given the nature of the hardware sharing 11393434Sesaxe * relationship represented by the PG, the CPU has that 11403434Sesaxe * relationship with some other CPU already in the PG 11413434Sesaxe */ 11423434Sesaxe if (pg_plat_cpus_share(cp, pg_cpu, ((pghw_t *)pg)->pghw_hw)) 11433434Sesaxe return (1); 11443434Sesaxe 11453434Sesaxe return (0); 11463434Sesaxe } 11473434Sesaxe 11483434Sesaxe /* 11498906SEric.Saxe@Sun.COM * Sort the CPUs CMT hierarchy, where "size" is the number of levels. 11503434Sesaxe */ 11513434Sesaxe static void 11528906SEric.Saxe@Sun.COM pg_cmt_hier_sort(pg_cmt_t **hier, int size) 11533434Sesaxe { 115410947SSrihari.Venkatesan@Sun.COM int i, j, inc, sz; 115510947SSrihari.Venkatesan@Sun.COM int start, end; 11568906SEric.Saxe@Sun.COM pg_t *tmp; 11578906SEric.Saxe@Sun.COM pg_t **h = (pg_t **)hier; 11583434Sesaxe 11598906SEric.Saxe@Sun.COM /* 11608906SEric.Saxe@Sun.COM * First sort by number of CPUs 11618906SEric.Saxe@Sun.COM */ 11628906SEric.Saxe@Sun.COM inc = size / 2; 11638906SEric.Saxe@Sun.COM while (inc > 0) { 11648906SEric.Saxe@Sun.COM for (i = inc; i < size; i++) { 11658906SEric.Saxe@Sun.COM j = i; 11668906SEric.Saxe@Sun.COM tmp = h[i]; 11678906SEric.Saxe@Sun.COM while ((j >= inc) && 11688906SEric.Saxe@Sun.COM (PG_NUM_CPUS(h[j - inc]) > PG_NUM_CPUS(tmp))) { 11698906SEric.Saxe@Sun.COM h[j] = h[j - inc]; 11708906SEric.Saxe@Sun.COM j = j - inc; 11713434Sesaxe } 11728906SEric.Saxe@Sun.COM h[j] = tmp; 11733434Sesaxe } 11748906SEric.Saxe@Sun.COM if (inc == 2) 11758906SEric.Saxe@Sun.COM inc = 1; 11768906SEric.Saxe@Sun.COM else 11778906SEric.Saxe@Sun.COM inc = (inc * 5) / 11; 11788906SEric.Saxe@Sun.COM } 11798906SEric.Saxe@Sun.COM 11808906SEric.Saxe@Sun.COM /* 11818906SEric.Saxe@Sun.COM * Break ties by asking the platform. 11828906SEric.Saxe@Sun.COM * Determine if h[i] outranks h[i + 1] and if so, swap them. 11838906SEric.Saxe@Sun.COM */ 118410947SSrihari.Venkatesan@Sun.COM for (start = 0; start < size; start++) { 118510947SSrihari.Venkatesan@Sun.COM 118610947SSrihari.Venkatesan@Sun.COM /* 118710947SSrihari.Venkatesan@Sun.COM * Find various contiguous sets of elements, 118810947SSrihari.Venkatesan@Sun.COM * in the array, with the same number of cpus 118910947SSrihari.Venkatesan@Sun.COM */ 119010947SSrihari.Venkatesan@Sun.COM end = start; 119110947SSrihari.Venkatesan@Sun.COM sz = PG_NUM_CPUS(h[start]); 119210947SSrihari.Venkatesan@Sun.COM while ((end < size) && (sz == PG_NUM_CPUS(h[end]))) 119310947SSrihari.Venkatesan@Sun.COM end++; 119410947SSrihari.Venkatesan@Sun.COM /* 119510947SSrihari.Venkatesan@Sun.COM * Sort each such set of the array by rank 119610947SSrihari.Venkatesan@Sun.COM */ 119710947SSrihari.Venkatesan@Sun.COM for (i = start + 1; i < end; i++) { 119810947SSrihari.Venkatesan@Sun.COM j = i - 1; 11998906SEric.Saxe@Sun.COM tmp = h[i]; 120010947SSrihari.Venkatesan@Sun.COM while (j >= start && 120110947SSrihari.Venkatesan@Sun.COM pg_cmt_hier_rank(hier[j], 120210947SSrihari.Venkatesan@Sun.COM (pg_cmt_t *)tmp) == hier[j]) { 120310947SSrihari.Venkatesan@Sun.COM h[j + 1] = h[j]; 120410947SSrihari.Venkatesan@Sun.COM j--; 120510947SSrihari.Venkatesan@Sun.COM } 120610947SSrihari.Venkatesan@Sun.COM h[j + 1] = tmp; 12078906SEric.Saxe@Sun.COM } 12083434Sesaxe } 12093434Sesaxe } 12103434Sesaxe 12113434Sesaxe /* 12123434Sesaxe * Return a cmt_lgrp_t * given an lgroup handle. 12133434Sesaxe */ 12143434Sesaxe static cmt_lgrp_t * 12153434Sesaxe pg_cmt_find_lgrp(lgrp_handle_t hand) 12163434Sesaxe { 12173434Sesaxe cmt_lgrp_t *lgrp; 12183434Sesaxe 12193434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 12203434Sesaxe 12213434Sesaxe lgrp = cmt_lgrps; 12223434Sesaxe while (lgrp != NULL) { 12233434Sesaxe if (lgrp->cl_hand == hand) 12243676Sesaxe break; 12253434Sesaxe lgrp = lgrp->cl_next; 12263434Sesaxe } 12273676Sesaxe return (lgrp); 12283676Sesaxe } 12293434Sesaxe 12303676Sesaxe /* 12313676Sesaxe * Create a cmt_lgrp_t with the specified handle. 12323676Sesaxe */ 12333676Sesaxe static cmt_lgrp_t * 12343676Sesaxe pg_cmt_lgrp_create(lgrp_handle_t hand) 12353676Sesaxe { 12363676Sesaxe cmt_lgrp_t *lgrp; 12373676Sesaxe 12383676Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 12393676Sesaxe 12403434Sesaxe lgrp = kmem_zalloc(sizeof (cmt_lgrp_t), KM_SLEEP); 12413434Sesaxe 12423434Sesaxe lgrp->cl_hand = hand; 12433434Sesaxe lgrp->cl_npgs = 0; 12443434Sesaxe lgrp->cl_next = cmt_lgrps; 12453434Sesaxe cmt_lgrps = lgrp; 12463434Sesaxe group_create(&lgrp->cl_pgs); 12473434Sesaxe 12483434Sesaxe return (lgrp); 12493434Sesaxe } 12508408SEric.Saxe@Sun.COM 12518408SEric.Saxe@Sun.COM /* 12528906SEric.Saxe@Sun.COM * Interfaces to enable and disable power aware dispatching 12538906SEric.Saxe@Sun.COM * The caller must be holding cpu_lock. 12548408SEric.Saxe@Sun.COM * 12558906SEric.Saxe@Sun.COM * Return 0 on success and -1 on failure. 12568408SEric.Saxe@Sun.COM */ 12578906SEric.Saxe@Sun.COM int 12588906SEric.Saxe@Sun.COM cmt_pad_enable(pghw_type_t type) 12598408SEric.Saxe@Sun.COM { 12608906SEric.Saxe@Sun.COM group_t *hwset; 12618906SEric.Saxe@Sun.COM group_iter_t iter; 12628906SEric.Saxe@Sun.COM pg_cmt_t *pg; 12638906SEric.Saxe@Sun.COM 12648906SEric.Saxe@Sun.COM ASSERT(PGHW_IS_PM_DOMAIN(type)); 12658906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock)); 12668408SEric.Saxe@Sun.COM 12678906SEric.Saxe@Sun.COM if ((hwset = pghw_set_lookup(type)) == NULL || 12688906SEric.Saxe@Sun.COM cmt_hw_blacklisted[type]) { 12698906SEric.Saxe@Sun.COM /* 12708906SEric.Saxe@Sun.COM * Unable to find any instances of the specified type 12718906SEric.Saxe@Sun.COM * of power domain, or the power domains have been blacklisted. 12728906SEric.Saxe@Sun.COM */ 12738906SEric.Saxe@Sun.COM return (-1); 12748906SEric.Saxe@Sun.COM } 12758408SEric.Saxe@Sun.COM 12768408SEric.Saxe@Sun.COM /* 12778906SEric.Saxe@Sun.COM * Iterate over the power domains, setting the default dispatcher 12788906SEric.Saxe@Sun.COM * policy for power/performance optimization. 12798906SEric.Saxe@Sun.COM * 12808906SEric.Saxe@Sun.COM * Simply setting the policy isn't enough in the case where the power 12818906SEric.Saxe@Sun.COM * domain is an only child of another PG. Because the dispatcher walks 12828906SEric.Saxe@Sun.COM * the PG hierarchy in a top down fashion, the higher up PG's policy 12838906SEric.Saxe@Sun.COM * will dominate. So promote the power domain above it's parent if both 12848906SEric.Saxe@Sun.COM * PG and it's parent have the same CPUs to ensure it's policy 12858906SEric.Saxe@Sun.COM * dominates. 12868408SEric.Saxe@Sun.COM */ 12878906SEric.Saxe@Sun.COM group_iter_init(&iter); 12888906SEric.Saxe@Sun.COM while ((pg = group_iterate(hwset, &iter)) != NULL) { 12898906SEric.Saxe@Sun.COM /* 12908906SEric.Saxe@Sun.COM * If the power domain is an only child to a parent 12918906SEric.Saxe@Sun.COM * not implementing the same policy, promote the child 12928906SEric.Saxe@Sun.COM * above the parent to activate the policy. 12938906SEric.Saxe@Sun.COM */ 12948906SEric.Saxe@Sun.COM pg->cmt_policy = pg_cmt_policy(((pghw_t *)pg)->pghw_hw); 12958906SEric.Saxe@Sun.COM while ((pg->cmt_parent != NULL) && 12968906SEric.Saxe@Sun.COM (pg->cmt_parent->cmt_policy != pg->cmt_policy) && 12978906SEric.Saxe@Sun.COM (PG_NUM_CPUS((pg_t *)pg) == 12988906SEric.Saxe@Sun.COM PG_NUM_CPUS((pg_t *)pg->cmt_parent))) { 12999438SEric.Saxe@Sun.COM cmt_hier_promote(pg, NULL); 13008906SEric.Saxe@Sun.COM } 13018906SEric.Saxe@Sun.COM } 13028906SEric.Saxe@Sun.COM 13038906SEric.Saxe@Sun.COM return (0); 13048906SEric.Saxe@Sun.COM } 13058408SEric.Saxe@Sun.COM 13068906SEric.Saxe@Sun.COM int 13078906SEric.Saxe@Sun.COM cmt_pad_disable(pghw_type_t type) 13088906SEric.Saxe@Sun.COM { 13098906SEric.Saxe@Sun.COM group_t *hwset; 13108906SEric.Saxe@Sun.COM group_iter_t iter; 13118906SEric.Saxe@Sun.COM pg_cmt_t *pg; 13128906SEric.Saxe@Sun.COM pg_cmt_t *child; 13138906SEric.Saxe@Sun.COM 13148906SEric.Saxe@Sun.COM ASSERT(PGHW_IS_PM_DOMAIN(type)); 13158906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock)); 13168906SEric.Saxe@Sun.COM 13178906SEric.Saxe@Sun.COM if ((hwset = pghw_set_lookup(type)) == NULL) { 13188906SEric.Saxe@Sun.COM /* 13198906SEric.Saxe@Sun.COM * Unable to find any instances of the specified type of 13208906SEric.Saxe@Sun.COM * power domain. 13218906SEric.Saxe@Sun.COM */ 13228906SEric.Saxe@Sun.COM return (-1); 13238906SEric.Saxe@Sun.COM } 13248408SEric.Saxe@Sun.COM /* 13258906SEric.Saxe@Sun.COM * Iterate over the power domains, setting the default dispatcher 13268906SEric.Saxe@Sun.COM * policy for performance optimization (load balancing). 13278408SEric.Saxe@Sun.COM */ 13288906SEric.Saxe@Sun.COM group_iter_init(&iter); 13298906SEric.Saxe@Sun.COM while ((pg = group_iterate(hwset, &iter)) != NULL) { 13308408SEric.Saxe@Sun.COM 13318408SEric.Saxe@Sun.COM /* 13328906SEric.Saxe@Sun.COM * If the power domain has an only child that implements 13338906SEric.Saxe@Sun.COM * policy other than load balancing, promote the child 13348906SEric.Saxe@Sun.COM * above the power domain to ensure it's policy dominates. 13358408SEric.Saxe@Sun.COM */ 13368969SEric.Saxe@Sun.COM if (pg->cmt_children != NULL && 13378969SEric.Saxe@Sun.COM GROUP_SIZE(pg->cmt_children) == 1) { 13388906SEric.Saxe@Sun.COM child = GROUP_ACCESS(pg->cmt_children, 0); 13398906SEric.Saxe@Sun.COM if ((child->cmt_policy & CMT_BALANCE) == 0) { 13409438SEric.Saxe@Sun.COM cmt_hier_promote(child, NULL); 13418906SEric.Saxe@Sun.COM } 13428906SEric.Saxe@Sun.COM } 13438906SEric.Saxe@Sun.COM pg->cmt_policy = CMT_BALANCE; 13448906SEric.Saxe@Sun.COM } 13458906SEric.Saxe@Sun.COM return (0); 13468906SEric.Saxe@Sun.COM } 13478906SEric.Saxe@Sun.COM 13488906SEric.Saxe@Sun.COM /* ARGSUSED */ 13498906SEric.Saxe@Sun.COM static void 13508906SEric.Saxe@Sun.COM cmt_ev_thread_swtch(pg_t *pg, cpu_t *cp, hrtime_t now, kthread_t *old, 13518906SEric.Saxe@Sun.COM kthread_t *new) 13528906SEric.Saxe@Sun.COM { 13538906SEric.Saxe@Sun.COM pg_cmt_t *cmt_pg = (pg_cmt_t *)pg; 13548906SEric.Saxe@Sun.COM 13558906SEric.Saxe@Sun.COM if (old == cp->cpu_idle_thread) { 13568906SEric.Saxe@Sun.COM atomic_add_32(&cmt_pg->cmt_utilization, 1); 13578906SEric.Saxe@Sun.COM } else if (new == cp->cpu_idle_thread) { 13588906SEric.Saxe@Sun.COM atomic_add_32(&cmt_pg->cmt_utilization, -1); 13598906SEric.Saxe@Sun.COM } 13608906SEric.Saxe@Sun.COM } 13618906SEric.Saxe@Sun.COM 13628906SEric.Saxe@Sun.COM /* 13638906SEric.Saxe@Sun.COM * Macro to test whether a thread is currently runnable on a CPU in a PG. 13648906SEric.Saxe@Sun.COM */ 13658906SEric.Saxe@Sun.COM #define THREAD_RUNNABLE_IN_PG(t, pg) \ 13668906SEric.Saxe@Sun.COM ((t)->t_state == TS_RUN && \ 13678906SEric.Saxe@Sun.COM (t)->t_disp_queue->disp_cpu && \ 13688906SEric.Saxe@Sun.COM bitset_in_set(&(pg)->cmt_cpus_actv_set, \ 13698906SEric.Saxe@Sun.COM (t)->t_disp_queue->disp_cpu->cpu_seqid)) 13708906SEric.Saxe@Sun.COM 13718906SEric.Saxe@Sun.COM static void 13728906SEric.Saxe@Sun.COM cmt_ev_thread_swtch_pwr(pg_t *pg, cpu_t *cp, hrtime_t now, kthread_t *old, 13738906SEric.Saxe@Sun.COM kthread_t *new) 13748906SEric.Saxe@Sun.COM { 13758906SEric.Saxe@Sun.COM pg_cmt_t *cmt = (pg_cmt_t *)pg; 13768906SEric.Saxe@Sun.COM cpupm_domain_t *dom; 13778906SEric.Saxe@Sun.COM uint32_t u; 13788906SEric.Saxe@Sun.COM 13798906SEric.Saxe@Sun.COM if (old == cp->cpu_idle_thread) { 13808906SEric.Saxe@Sun.COM ASSERT(new != cp->cpu_idle_thread); 13818906SEric.Saxe@Sun.COM u = atomic_add_32_nv(&cmt->cmt_utilization, 1); 13828906SEric.Saxe@Sun.COM if (u == 1) { 13838906SEric.Saxe@Sun.COM /* 13848906SEric.Saxe@Sun.COM * Notify the CPU power manager that the domain 13858906SEric.Saxe@Sun.COM * is non-idle. 13868906SEric.Saxe@Sun.COM */ 13878906SEric.Saxe@Sun.COM dom = (cpupm_domain_t *)cmt->cmt_pg.pghw_handle; 13888906SEric.Saxe@Sun.COM cpupm_utilization_event(cp, now, dom, 13898906SEric.Saxe@Sun.COM CPUPM_DOM_BUSY_FROM_IDLE); 13908906SEric.Saxe@Sun.COM } 13918906SEric.Saxe@Sun.COM } else if (new == cp->cpu_idle_thread) { 13928906SEric.Saxe@Sun.COM ASSERT(old != cp->cpu_idle_thread); 13938906SEric.Saxe@Sun.COM u = atomic_add_32_nv(&cmt->cmt_utilization, -1); 13948906SEric.Saxe@Sun.COM if (u == 0) { 13958906SEric.Saxe@Sun.COM /* 13968906SEric.Saxe@Sun.COM * The domain is idle, notify the CPU power 13978906SEric.Saxe@Sun.COM * manager. 13988906SEric.Saxe@Sun.COM * 13998906SEric.Saxe@Sun.COM * Avoid notifying if the thread is simply migrating 14008906SEric.Saxe@Sun.COM * between CPUs in the domain. 14018906SEric.Saxe@Sun.COM */ 14028906SEric.Saxe@Sun.COM if (!THREAD_RUNNABLE_IN_PG(old, cmt)) { 14038906SEric.Saxe@Sun.COM dom = (cpupm_domain_t *)cmt->cmt_pg.pghw_handle; 14048906SEric.Saxe@Sun.COM cpupm_utilization_event(cp, now, dom, 14058906SEric.Saxe@Sun.COM CPUPM_DOM_IDLE_FROM_BUSY); 14068906SEric.Saxe@Sun.COM } 14078906SEric.Saxe@Sun.COM } 14088906SEric.Saxe@Sun.COM } 14098906SEric.Saxe@Sun.COM } 14108906SEric.Saxe@Sun.COM 14118906SEric.Saxe@Sun.COM /* ARGSUSED */ 14128906SEric.Saxe@Sun.COM static void 14138906SEric.Saxe@Sun.COM cmt_ev_thread_remain_pwr(pg_t *pg, cpu_t *cp, kthread_t *t) 14148906SEric.Saxe@Sun.COM { 14158906SEric.Saxe@Sun.COM pg_cmt_t *cmt = (pg_cmt_t *)pg; 14168906SEric.Saxe@Sun.COM cpupm_domain_t *dom; 14178906SEric.Saxe@Sun.COM 14188906SEric.Saxe@Sun.COM dom = (cpupm_domain_t *)cmt->cmt_pg.pghw_handle; 14198906SEric.Saxe@Sun.COM cpupm_utilization_event(cp, (hrtime_t)0, dom, CPUPM_DOM_REMAIN_BUSY); 14208906SEric.Saxe@Sun.COM } 14218906SEric.Saxe@Sun.COM 14228906SEric.Saxe@Sun.COM /* 14238906SEric.Saxe@Sun.COM * Return the name of the CMT scheduling policy 14248906SEric.Saxe@Sun.COM * being implemented across this PG 14258906SEric.Saxe@Sun.COM */ 14268906SEric.Saxe@Sun.COM static char * 14278906SEric.Saxe@Sun.COM pg_cmt_policy_name(pg_t *pg) 14288906SEric.Saxe@Sun.COM { 14298906SEric.Saxe@Sun.COM pg_cmt_policy_t policy; 14308906SEric.Saxe@Sun.COM 14318906SEric.Saxe@Sun.COM policy = ((pg_cmt_t *)pg)->cmt_policy; 14328906SEric.Saxe@Sun.COM 14338906SEric.Saxe@Sun.COM if (policy & CMT_AFFINITY) { 14348906SEric.Saxe@Sun.COM if (policy & CMT_BALANCE) 14358906SEric.Saxe@Sun.COM return ("Load Balancing & Affinity"); 14368906SEric.Saxe@Sun.COM else if (policy & CMT_COALESCE) 14378906SEric.Saxe@Sun.COM return ("Load Coalescence & Affinity"); 14388906SEric.Saxe@Sun.COM else 14398906SEric.Saxe@Sun.COM return ("Affinity"); 14408906SEric.Saxe@Sun.COM } else { 14418906SEric.Saxe@Sun.COM if (policy & CMT_BALANCE) 14428906SEric.Saxe@Sun.COM return ("Load Balancing"); 14438906SEric.Saxe@Sun.COM else if (policy & CMT_COALESCE) 14448906SEric.Saxe@Sun.COM return ("Load Coalescence"); 14458906SEric.Saxe@Sun.COM else 14468906SEric.Saxe@Sun.COM return ("None"); 14478906SEric.Saxe@Sun.COM } 14488906SEric.Saxe@Sun.COM } 14498906SEric.Saxe@Sun.COM 14508906SEric.Saxe@Sun.COM /* 14518906SEric.Saxe@Sun.COM * Prune PG, and all other instances of PG's hardware sharing relationship 14529746SEric.Saxe@Sun.COM * from the CMT PG hierarchy. 14539438SEric.Saxe@Sun.COM * 14549438SEric.Saxe@Sun.COM * This routine operates on the CPU specific processor group data (for the CPUs 14559438SEric.Saxe@Sun.COM * in the PG being pruned), and may be invoked from a context where one CPU's 14569438SEric.Saxe@Sun.COM * PG data is under construction. In this case the argument "pgdata", if not 14579438SEric.Saxe@Sun.COM * NULL, is a reference to the CPU's under-construction PG data. 14588906SEric.Saxe@Sun.COM */ 14598906SEric.Saxe@Sun.COM static int 14609438SEric.Saxe@Sun.COM pg_cmt_prune(pg_cmt_t *pg_bad, pg_cmt_t **lineage, int *sz, cpu_pg_t *pgdata) 14618906SEric.Saxe@Sun.COM { 14628906SEric.Saxe@Sun.COM group_t *hwset, *children; 14638906SEric.Saxe@Sun.COM int i, j, r, size = *sz; 14648906SEric.Saxe@Sun.COM group_iter_t hw_iter, child_iter; 14658906SEric.Saxe@Sun.COM pg_cpu_itr_t cpu_iter; 14668906SEric.Saxe@Sun.COM pg_cmt_t *pg, *child; 14678906SEric.Saxe@Sun.COM cpu_t *cpu; 14688906SEric.Saxe@Sun.COM int cap_needed; 14698906SEric.Saxe@Sun.COM pghw_type_t hw; 14708906SEric.Saxe@Sun.COM 14718906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock)); 14728906SEric.Saxe@Sun.COM 14738906SEric.Saxe@Sun.COM hw = ((pghw_t *)pg_bad)->pghw_hw; 14748906SEric.Saxe@Sun.COM 14758906SEric.Saxe@Sun.COM if (hw == PGHW_POW_ACTIVE) { 14768906SEric.Saxe@Sun.COM cmn_err(CE_NOTE, "!Active CPUPM domain groups look suspect. " 14778906SEric.Saxe@Sun.COM "Event Based CPUPM Unavailable"); 14788906SEric.Saxe@Sun.COM } else if (hw == PGHW_POW_IDLE) { 14798906SEric.Saxe@Sun.COM cmn_err(CE_NOTE, "!Idle CPUPM domain groups look suspect. " 14808906SEric.Saxe@Sun.COM "Dispatcher assisted CPUPM disabled."); 14818906SEric.Saxe@Sun.COM } 14828906SEric.Saxe@Sun.COM 14838906SEric.Saxe@Sun.COM /* 14848906SEric.Saxe@Sun.COM * Find and eliminate the PG from the lineage. 14858906SEric.Saxe@Sun.COM */ 14868906SEric.Saxe@Sun.COM for (i = 0; i < size; i++) { 14878906SEric.Saxe@Sun.COM if (lineage[i] == pg_bad) { 14888906SEric.Saxe@Sun.COM for (j = i; j < size - 1; j++) 14898906SEric.Saxe@Sun.COM lineage[j] = lineage[j + 1]; 14908906SEric.Saxe@Sun.COM *sz = size - 1; 14918906SEric.Saxe@Sun.COM break; 14928906SEric.Saxe@Sun.COM } 14938906SEric.Saxe@Sun.COM } 14948906SEric.Saxe@Sun.COM 14958906SEric.Saxe@Sun.COM /* 14968906SEric.Saxe@Sun.COM * We'll prune all instances of the hardware sharing relationship 14978906SEric.Saxe@Sun.COM * represented by pg. But before we do that (and pause CPUs) we need 14988906SEric.Saxe@Sun.COM * to ensure the hierarchy's groups are properly sized. 14998906SEric.Saxe@Sun.COM */ 15008906SEric.Saxe@Sun.COM hwset = pghw_set_lookup(hw); 15018906SEric.Saxe@Sun.COM 15028906SEric.Saxe@Sun.COM /* 15039746SEric.Saxe@Sun.COM * Blacklist the hardware so future processor groups of this type won't 15049746SEric.Saxe@Sun.COM * participate in CMT thread placement. 15059746SEric.Saxe@Sun.COM * 15069746SEric.Saxe@Sun.COM * XXX 15079746SEric.Saxe@Sun.COM * For heterogeneous system configurations, this might be overkill. 15089746SEric.Saxe@Sun.COM * We may only need to blacklist the illegal PGs, and other instances 15099746SEric.Saxe@Sun.COM * of this hardware sharing relationship may be ok. 15108906SEric.Saxe@Sun.COM */ 15118906SEric.Saxe@Sun.COM cmt_hw_blacklisted[hw] = 1; 15128906SEric.Saxe@Sun.COM 15138906SEric.Saxe@Sun.COM /* 15148906SEric.Saxe@Sun.COM * For each of the PGs being pruned, ensure sufficient capacity in 15158906SEric.Saxe@Sun.COM * the siblings set for the PG's children 15168906SEric.Saxe@Sun.COM */ 15178906SEric.Saxe@Sun.COM group_iter_init(&hw_iter); 15188906SEric.Saxe@Sun.COM while ((pg = group_iterate(hwset, &hw_iter)) != NULL) { 15198906SEric.Saxe@Sun.COM /* 15208906SEric.Saxe@Sun.COM * PG is being pruned, but if it is bringing up more than 15218906SEric.Saxe@Sun.COM * one child, ask for more capacity in the siblings group. 15228906SEric.Saxe@Sun.COM */ 15238906SEric.Saxe@Sun.COM cap_needed = 0; 15248906SEric.Saxe@Sun.COM if (pg->cmt_children && 15258906SEric.Saxe@Sun.COM GROUP_SIZE(pg->cmt_children) > 1) { 15268906SEric.Saxe@Sun.COM cap_needed = GROUP_SIZE(pg->cmt_children) - 1; 15278906SEric.Saxe@Sun.COM 15288906SEric.Saxe@Sun.COM group_expand(pg->cmt_siblings, 15298906SEric.Saxe@Sun.COM GROUP_SIZE(pg->cmt_siblings) + cap_needed); 15308408SEric.Saxe@Sun.COM 15318408SEric.Saxe@Sun.COM /* 15328906SEric.Saxe@Sun.COM * If this is a top level group, also ensure the 15338906SEric.Saxe@Sun.COM * capacity in the root lgrp level CMT grouping. 15348408SEric.Saxe@Sun.COM */ 15358906SEric.Saxe@Sun.COM if (pg->cmt_parent == NULL && 15368906SEric.Saxe@Sun.COM pg->cmt_siblings != &cmt_root->cl_pgs) { 15378906SEric.Saxe@Sun.COM group_expand(&cmt_root->cl_pgs, 15388906SEric.Saxe@Sun.COM GROUP_SIZE(&cmt_root->cl_pgs) + cap_needed); 15399746SEric.Saxe@Sun.COM cmt_root->cl_npgs += cap_needed; 15408408SEric.Saxe@Sun.COM } 15418906SEric.Saxe@Sun.COM } 15428906SEric.Saxe@Sun.COM } 15438408SEric.Saxe@Sun.COM 15448906SEric.Saxe@Sun.COM /* 15458906SEric.Saxe@Sun.COM * We're operating on the PG hierarchy. Pause CPUs to ensure 15468906SEric.Saxe@Sun.COM * exclusivity with respect to the dispatcher. 15478906SEric.Saxe@Sun.COM */ 15488906SEric.Saxe@Sun.COM pause_cpus(NULL); 15498408SEric.Saxe@Sun.COM 15508906SEric.Saxe@Sun.COM /* 15518906SEric.Saxe@Sun.COM * Prune all PG instances of the hardware sharing relationship 15528906SEric.Saxe@Sun.COM * represented by pg. 15538906SEric.Saxe@Sun.COM */ 15548906SEric.Saxe@Sun.COM group_iter_init(&hw_iter); 15558906SEric.Saxe@Sun.COM while ((pg = group_iterate(hwset, &hw_iter)) != NULL) { 15568408SEric.Saxe@Sun.COM 15578408SEric.Saxe@Sun.COM /* 15588906SEric.Saxe@Sun.COM * Remove PG from it's group of siblings, if it's there. 15598906SEric.Saxe@Sun.COM */ 15608906SEric.Saxe@Sun.COM if (pg->cmt_siblings) { 15618906SEric.Saxe@Sun.COM (void) group_remove(pg->cmt_siblings, pg, GRP_NORESIZE); 15628906SEric.Saxe@Sun.COM } 15638906SEric.Saxe@Sun.COM if (pg->cmt_parent == NULL && 15648906SEric.Saxe@Sun.COM pg->cmt_siblings != &cmt_root->cl_pgs) { 15658906SEric.Saxe@Sun.COM (void) group_remove(&cmt_root->cl_pgs, pg, 15668906SEric.Saxe@Sun.COM GRP_NORESIZE); 15678906SEric.Saxe@Sun.COM } 15689746SEric.Saxe@Sun.COM 15699746SEric.Saxe@Sun.COM /* 15709746SEric.Saxe@Sun.COM * Indicate that no CMT policy will be implemented across 15719746SEric.Saxe@Sun.COM * this PG. 15729746SEric.Saxe@Sun.COM */ 15739746SEric.Saxe@Sun.COM pg->cmt_policy = CMT_NO_POLICY; 15749746SEric.Saxe@Sun.COM 15758906SEric.Saxe@Sun.COM /* 15769036SEric.Saxe@Sun.COM * Move PG's children from it's children set to it's parent's 15779036SEric.Saxe@Sun.COM * children set. Note that the parent's children set, and PG's 15789036SEric.Saxe@Sun.COM * siblings set are the same thing. 15799036SEric.Saxe@Sun.COM * 15809036SEric.Saxe@Sun.COM * Because we are iterating over the same group that we are 15819036SEric.Saxe@Sun.COM * operating on (removing the children), first add all of PG's 15829036SEric.Saxe@Sun.COM * children to the parent's children set, and once we are done 15839036SEric.Saxe@Sun.COM * iterating, empty PG's children set. 15848906SEric.Saxe@Sun.COM */ 15858906SEric.Saxe@Sun.COM if (pg->cmt_children != NULL) { 15868906SEric.Saxe@Sun.COM children = pg->cmt_children; 15878906SEric.Saxe@Sun.COM 15888906SEric.Saxe@Sun.COM group_iter_init(&child_iter); 15898906SEric.Saxe@Sun.COM while ((child = group_iterate(children, &child_iter)) 15908906SEric.Saxe@Sun.COM != NULL) { 15919036SEric.Saxe@Sun.COM if (pg->cmt_siblings != NULL) { 15928906SEric.Saxe@Sun.COM r = group_add(pg->cmt_siblings, child, 15938906SEric.Saxe@Sun.COM GRP_NORESIZE); 15948906SEric.Saxe@Sun.COM ASSERT(r == 0); 15959746SEric.Saxe@Sun.COM 15969746SEric.Saxe@Sun.COM if (pg->cmt_parent == NULL && 15979746SEric.Saxe@Sun.COM pg->cmt_siblings != 15989746SEric.Saxe@Sun.COM &cmt_root->cl_pgs) { 15999746SEric.Saxe@Sun.COM r = group_add(&cmt_root->cl_pgs, 16009746SEric.Saxe@Sun.COM child, GRP_NORESIZE); 16019746SEric.Saxe@Sun.COM ASSERT(r == 0); 16029746SEric.Saxe@Sun.COM } 16038906SEric.Saxe@Sun.COM } 16048906SEric.Saxe@Sun.COM } 16059036SEric.Saxe@Sun.COM group_empty(pg->cmt_children); 16068906SEric.Saxe@Sun.COM } 16078906SEric.Saxe@Sun.COM 16088906SEric.Saxe@Sun.COM /* 16098906SEric.Saxe@Sun.COM * Reset the callbacks to the defaults 16108906SEric.Saxe@Sun.COM */ 16118906SEric.Saxe@Sun.COM pg_callback_set_defaults((pg_t *)pg); 16128906SEric.Saxe@Sun.COM 16138906SEric.Saxe@Sun.COM /* 16148906SEric.Saxe@Sun.COM * Update all the CPU lineages in each of PG's CPUs 16158408SEric.Saxe@Sun.COM */ 16168906SEric.Saxe@Sun.COM PG_CPU_ITR_INIT(pg, cpu_iter); 16178906SEric.Saxe@Sun.COM while ((cpu = pg_cpu_next(&cpu_iter)) != NULL) { 16188906SEric.Saxe@Sun.COM pg_cmt_t *cpu_pg; 16198906SEric.Saxe@Sun.COM group_iter_t liter; /* Iterator for the lineage */ 16209438SEric.Saxe@Sun.COM cpu_pg_t *cpd; /* CPU's PG data */ 16219438SEric.Saxe@Sun.COM 16229438SEric.Saxe@Sun.COM /* 16239438SEric.Saxe@Sun.COM * The CPU's lineage is under construction still 16249438SEric.Saxe@Sun.COM * references the bootstrap CPU PG data structure. 16259438SEric.Saxe@Sun.COM */ 16269438SEric.Saxe@Sun.COM if (pg_cpu_is_bootstrapped(cpu)) 16279438SEric.Saxe@Sun.COM cpd = pgdata; 16289438SEric.Saxe@Sun.COM else 16299438SEric.Saxe@Sun.COM cpd = cpu->cpu_pg; 16308906SEric.Saxe@Sun.COM 16318906SEric.Saxe@Sun.COM /* 16328906SEric.Saxe@Sun.COM * Iterate over the CPU's PGs updating the children 16338906SEric.Saxe@Sun.COM * of the PG being promoted, since they have a new 16348906SEric.Saxe@Sun.COM * parent and siblings set. 16358906SEric.Saxe@Sun.COM */ 16368906SEric.Saxe@Sun.COM group_iter_init(&liter); 16379438SEric.Saxe@Sun.COM while ((cpu_pg = group_iterate(&cpd->pgs, 16389438SEric.Saxe@Sun.COM &liter)) != NULL) { 16398906SEric.Saxe@Sun.COM if (cpu_pg->cmt_parent == pg) { 16408906SEric.Saxe@Sun.COM cpu_pg->cmt_parent = pg->cmt_parent; 16418906SEric.Saxe@Sun.COM cpu_pg->cmt_siblings = pg->cmt_siblings; 16428906SEric.Saxe@Sun.COM } 16438906SEric.Saxe@Sun.COM } 16448906SEric.Saxe@Sun.COM 16458906SEric.Saxe@Sun.COM /* 16468906SEric.Saxe@Sun.COM * Update the CPU's lineages 16479746SEric.Saxe@Sun.COM * 16489746SEric.Saxe@Sun.COM * Remove the PG from the CPU's group used for CMT 16499746SEric.Saxe@Sun.COM * scheduling. 16508906SEric.Saxe@Sun.COM */ 16519438SEric.Saxe@Sun.COM (void) group_remove(&cpd->cmt_pgs, pg, GRP_NORESIZE); 16528408SEric.Saxe@Sun.COM } 16538906SEric.Saxe@Sun.COM } 16548906SEric.Saxe@Sun.COM start_cpus(); 16558906SEric.Saxe@Sun.COM return (0); 16568906SEric.Saxe@Sun.COM } 16578906SEric.Saxe@Sun.COM 16588906SEric.Saxe@Sun.COM /* 16598906SEric.Saxe@Sun.COM * Disable CMT scheduling 16608906SEric.Saxe@Sun.COM */ 16618906SEric.Saxe@Sun.COM static void 16628906SEric.Saxe@Sun.COM pg_cmt_disable(void) 16638906SEric.Saxe@Sun.COM { 16649438SEric.Saxe@Sun.COM cpu_t *cpu; 16659438SEric.Saxe@Sun.COM 16669438SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock)); 16678906SEric.Saxe@Sun.COM 16688906SEric.Saxe@Sun.COM pause_cpus(NULL); 16698906SEric.Saxe@Sun.COM cpu = cpu_list; 16708906SEric.Saxe@Sun.COM 16718906SEric.Saxe@Sun.COM do { 16728906SEric.Saxe@Sun.COM if (cpu->cpu_pg) 16738906SEric.Saxe@Sun.COM group_empty(&cpu->cpu_pg->cmt_pgs); 16748906SEric.Saxe@Sun.COM } while ((cpu = cpu->cpu_next) != cpu_list); 16758906SEric.Saxe@Sun.COM 16768906SEric.Saxe@Sun.COM cmt_sched_disabled = 1; 16778906SEric.Saxe@Sun.COM start_cpus(); 16788906SEric.Saxe@Sun.COM cmn_err(CE_NOTE, "!CMT thread placement optimizations unavailable"); 16798906SEric.Saxe@Sun.COM } 16808408SEric.Saxe@Sun.COM 16819036SEric.Saxe@Sun.COM /* 16829036SEric.Saxe@Sun.COM * CMT lineage validation 16839036SEric.Saxe@Sun.COM * 16849036SEric.Saxe@Sun.COM * This routine is invoked by pg_cmt_cpu_init() to validate the integrity 16859036SEric.Saxe@Sun.COM * of the PGs in a CPU's lineage. This is necessary because it's possible that 16869036SEric.Saxe@Sun.COM * some groupings (power domain groupings in particular) may be defined by 16879036SEric.Saxe@Sun.COM * sources that are buggy (e.g. BIOS bugs). In such cases, it may not be 16889036SEric.Saxe@Sun.COM * possible to integrate those groupings into the CMT PG hierarchy, if doing 16899036SEric.Saxe@Sun.COM * so would violate the subset invariant of the hierarchy, which says that 16909036SEric.Saxe@Sun.COM * a PG must be subset of its parent (if it has one). 16919036SEric.Saxe@Sun.COM * 16929036SEric.Saxe@Sun.COM * pg_cmt_lineage_validate()'s purpose is to detect grouping definitions that 16939036SEric.Saxe@Sun.COM * would result in a violation of this invariant. If a violation is found, 16949036SEric.Saxe@Sun.COM * and the PG is of a grouping type who's definition is known to originate from 16959036SEric.Saxe@Sun.COM * suspect sources (BIOS), then pg_cmt_prune() will be invoked to prune the 169611263SEric.Saxe@Sun.COM * PG (and all other instances PG's sharing relationship type) from the CMT 16979036SEric.Saxe@Sun.COM * hierarchy. Further, future instances of that sharing relationship type won't 169811263SEric.Saxe@Sun.COM * be added. If the grouping definition doesn't originate from suspect 16999036SEric.Saxe@Sun.COM * sources, then pg_cmt_disable() will be invoked to log an error, and disable 17009036SEric.Saxe@Sun.COM * CMT scheduling altogether. 17019036SEric.Saxe@Sun.COM * 17029036SEric.Saxe@Sun.COM * This routine is invoked after the CPU has been added to the PGs in which 17039036SEric.Saxe@Sun.COM * it belongs, but before those PGs have been added to (or had their place 17049036SEric.Saxe@Sun.COM * adjusted in) the CMT PG hierarchy. 17059036SEric.Saxe@Sun.COM * 17069036SEric.Saxe@Sun.COM * The first argument is the CPUs PG lineage (essentially an array of PGs in 17079036SEric.Saxe@Sun.COM * which the CPU belongs) that has already been sorted in ascending order 17089036SEric.Saxe@Sun.COM * by CPU count. Some of the PGs in the CPUs lineage may already have other 17099036SEric.Saxe@Sun.COM * CPUs in them, and have already been integrated into the CMT hierarchy. 17109036SEric.Saxe@Sun.COM * 17119036SEric.Saxe@Sun.COM * The addition of this new CPU to these pre-existing PGs means that those 17129036SEric.Saxe@Sun.COM * PGs may need to be promoted up in the hierarchy to satisfy the subset 17139036SEric.Saxe@Sun.COM * invariant. In additon to testing the subset invariant for the lineage, 17149036SEric.Saxe@Sun.COM * this routine also verifies that the addition of the new CPU to the 17159036SEric.Saxe@Sun.COM * existing PGs wouldn't cause the subset invariant to be violated in 17169036SEric.Saxe@Sun.COM * the exiting lineages. 17179036SEric.Saxe@Sun.COM * 17189036SEric.Saxe@Sun.COM * This routine will normally return one of the following: 17199036SEric.Saxe@Sun.COM * CMT_LINEAGE_VALID - There were no problems detected with the lineage. 17209036SEric.Saxe@Sun.COM * CMT_LINEAGE_REPAIRED - Problems were detected, but repaired via pruning. 17219036SEric.Saxe@Sun.COM * 17229036SEric.Saxe@Sun.COM * Otherwise, this routine will return a value indicating which error it 17239036SEric.Saxe@Sun.COM * was unable to recover from (and set cmt_lineage_status along the way). 17249438SEric.Saxe@Sun.COM * 17259438SEric.Saxe@Sun.COM * This routine operates on the CPU specific processor group data (for the CPU 17269438SEric.Saxe@Sun.COM * whose lineage is being validated), which is under-construction. 17279438SEric.Saxe@Sun.COM * "pgdata" is a reference to the CPU's under-construction PG data. 17289438SEric.Saxe@Sun.COM * This routine must be careful to operate only on "pgdata", and not cp->cpu_pg. 17299036SEric.Saxe@Sun.COM */ 17309036SEric.Saxe@Sun.COM static cmt_lineage_validation_t 17319438SEric.Saxe@Sun.COM pg_cmt_lineage_validate(pg_cmt_t **lineage, int *sz, cpu_pg_t *pgdata) 17328906SEric.Saxe@Sun.COM { 17339036SEric.Saxe@Sun.COM int i, j, size; 173411263SEric.Saxe@Sun.COM pg_cmt_t *pg, *pg_next, *pg_bad, *pg_tmp, *parent; 17358906SEric.Saxe@Sun.COM cpu_t *cp; 17368906SEric.Saxe@Sun.COM pg_cpu_itr_t cpu_iter; 17379036SEric.Saxe@Sun.COM lgrp_handle_t lgrp; 17388906SEric.Saxe@Sun.COM 17398906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock)); 17408906SEric.Saxe@Sun.COM 17418906SEric.Saxe@Sun.COM revalidate: 17428906SEric.Saxe@Sun.COM size = *sz; 17438906SEric.Saxe@Sun.COM pg_bad = NULL; 17449036SEric.Saxe@Sun.COM lgrp = LGRP_NULL_HANDLE; 17459036SEric.Saxe@Sun.COM for (i = 0; i < size; i++) { 17468906SEric.Saxe@Sun.COM 17478906SEric.Saxe@Sun.COM pg = lineage[i]; 17489036SEric.Saxe@Sun.COM if (i < size - 1) 17499036SEric.Saxe@Sun.COM pg_next = lineage[i + 1]; 17509036SEric.Saxe@Sun.COM else 17519036SEric.Saxe@Sun.COM pg_next = NULL; 17528408SEric.Saxe@Sun.COM 17538906SEric.Saxe@Sun.COM /* 17548906SEric.Saxe@Sun.COM * We assume that the lineage has already been sorted 17558906SEric.Saxe@Sun.COM * by the number of CPUs. In fact, we depend on it. 17568906SEric.Saxe@Sun.COM */ 17579036SEric.Saxe@Sun.COM ASSERT(pg_next == NULL || 17589036SEric.Saxe@Sun.COM (PG_NUM_CPUS((pg_t *)pg) <= PG_NUM_CPUS((pg_t *)pg_next))); 17598906SEric.Saxe@Sun.COM 17608906SEric.Saxe@Sun.COM /* 176111263SEric.Saxe@Sun.COM * The CPUs PG lineage was passed as the first argument to 176211263SEric.Saxe@Sun.COM * this routine and contains the sorted list of the CPU's 176311263SEric.Saxe@Sun.COM * PGs. Ultimately, the ordering of the PGs in that list, and 176411263SEric.Saxe@Sun.COM * the ordering as traversed by the cmt_parent list must be 176511263SEric.Saxe@Sun.COM * the same. PG promotion will be used as the mechanism to 176611263SEric.Saxe@Sun.COM * achieve this, but first we need to look for cases where 176711263SEric.Saxe@Sun.COM * promotion will be necessary, and validate that will be 176811263SEric.Saxe@Sun.COM * possible without violating the subset invarient described 176911263SEric.Saxe@Sun.COM * above. 17709036SEric.Saxe@Sun.COM * 17719036SEric.Saxe@Sun.COM * Since the PG topology is in the middle of being changed, we 17729036SEric.Saxe@Sun.COM * need to check whether the PG's existing parent (if any) is 177311263SEric.Saxe@Sun.COM * part of this CPU's lineage (and therefore should contain 177411263SEric.Saxe@Sun.COM * the new CPU). If not, it means that the addition of the 177511263SEric.Saxe@Sun.COM * new CPU should have made this PG have more CPUs than its 177611263SEric.Saxe@Sun.COM * parent (and other ancestors not in the same lineage) and 177711263SEric.Saxe@Sun.COM * will need to be promoted into place. 177811263SEric.Saxe@Sun.COM * 177911263SEric.Saxe@Sun.COM * We need to verify all of this to defend against a buggy 17809036SEric.Saxe@Sun.COM * BIOS giving bad power domain CPU groupings. Sigh. 17819036SEric.Saxe@Sun.COM */ 178211263SEric.Saxe@Sun.COM parent = pg->cmt_parent; 178311263SEric.Saxe@Sun.COM while (parent != NULL) { 17849036SEric.Saxe@Sun.COM /* 178511263SEric.Saxe@Sun.COM * Determine if the parent/ancestor is in this lineage 17869036SEric.Saxe@Sun.COM */ 178711263SEric.Saxe@Sun.COM pg_tmp = NULL; 178811263SEric.Saxe@Sun.COM for (j = 0; (j < size) && (pg_tmp != parent); j++) { 17899036SEric.Saxe@Sun.COM pg_tmp = lineage[j]; 17909036SEric.Saxe@Sun.COM } 179111263SEric.Saxe@Sun.COM if (pg_tmp == parent) { 17929036SEric.Saxe@Sun.COM /* 179311263SEric.Saxe@Sun.COM * It's in the lineage. The concentricity 179411263SEric.Saxe@Sun.COM * checks will handle the rest. 17959036SEric.Saxe@Sun.COM */ 179611263SEric.Saxe@Sun.COM break; 17979036SEric.Saxe@Sun.COM } 179811263SEric.Saxe@Sun.COM /* 179911263SEric.Saxe@Sun.COM * If it is not in the lineage, PG will eventually 180011263SEric.Saxe@Sun.COM * need to be promoted above it. Verify the ancestor 180111263SEric.Saxe@Sun.COM * is a proper subset. There is still an error if 180211263SEric.Saxe@Sun.COM * the ancestor has the same number of CPUs as PG, 180311263SEric.Saxe@Sun.COM * since that would imply it should be in the lineage, 180411263SEric.Saxe@Sun.COM * and we already know it isn't. 180511263SEric.Saxe@Sun.COM */ 180611263SEric.Saxe@Sun.COM if (PG_NUM_CPUS((pg_t *)parent) >= 180711263SEric.Saxe@Sun.COM PG_NUM_CPUS((pg_t *)pg)) { 180811263SEric.Saxe@Sun.COM /* 180911263SEric.Saxe@Sun.COM * Not a proper subset if the parent/ancestor 181011263SEric.Saxe@Sun.COM * has the same or more CPUs than PG. 181111263SEric.Saxe@Sun.COM */ 181211263SEric.Saxe@Sun.COM cmt_lineage_status = CMT_LINEAGE_NON_PROMOTABLE; 181311263SEric.Saxe@Sun.COM goto handle_error; 181411263SEric.Saxe@Sun.COM } 181511263SEric.Saxe@Sun.COM parent = parent->cmt_parent; 18169036SEric.Saxe@Sun.COM } 18179036SEric.Saxe@Sun.COM 18189036SEric.Saxe@Sun.COM /* 18199036SEric.Saxe@Sun.COM * Walk each of the CPUs in the PGs group and perform 18209036SEric.Saxe@Sun.COM * consistency checks along the way. 18218906SEric.Saxe@Sun.COM */ 18228906SEric.Saxe@Sun.COM PG_CPU_ITR_INIT((pg_t *)pg, cpu_iter); 18238906SEric.Saxe@Sun.COM while ((cp = pg_cpu_next(&cpu_iter)) != NULL) { 18249036SEric.Saxe@Sun.COM /* 18259036SEric.Saxe@Sun.COM * Verify that there aren't any CPUs contained in PG 18269036SEric.Saxe@Sun.COM * that the next PG in the lineage (which is larger 18279036SEric.Saxe@Sun.COM * or same size) doesn't also contain. 18289036SEric.Saxe@Sun.COM */ 18299036SEric.Saxe@Sun.COM if (pg_next != NULL && 18309036SEric.Saxe@Sun.COM pg_cpu_find((pg_t *)pg_next, cp) == B_FALSE) { 18318906SEric.Saxe@Sun.COM cmt_lineage_status = CMT_LINEAGE_NON_CONCENTRIC; 18328906SEric.Saxe@Sun.COM goto handle_error; 18338906SEric.Saxe@Sun.COM } 18349036SEric.Saxe@Sun.COM 18359036SEric.Saxe@Sun.COM /* 18369036SEric.Saxe@Sun.COM * Verify that all the CPUs in the PG are in the same 18379036SEric.Saxe@Sun.COM * lgroup. 18389036SEric.Saxe@Sun.COM */ 18399036SEric.Saxe@Sun.COM if (lgrp == LGRP_NULL_HANDLE) { 18409036SEric.Saxe@Sun.COM lgrp = lgrp_plat_cpu_to_hand(cp->cpu_id); 18419036SEric.Saxe@Sun.COM } else if (lgrp_plat_cpu_to_hand(cp->cpu_id) != lgrp) { 18429036SEric.Saxe@Sun.COM cmt_lineage_status = CMT_LINEAGE_PG_SPANS_LGRPS; 18439036SEric.Saxe@Sun.COM goto handle_error; 18449036SEric.Saxe@Sun.COM } 18458906SEric.Saxe@Sun.COM } 18468408SEric.Saxe@Sun.COM } 18478408SEric.Saxe@Sun.COM 18488906SEric.Saxe@Sun.COM handle_error: 18499036SEric.Saxe@Sun.COM /* 18509036SEric.Saxe@Sun.COM * Some of these validation errors can result when the CPU grouping 18519036SEric.Saxe@Sun.COM * information is derived from buggy sources (for example, incorrect 18529036SEric.Saxe@Sun.COM * ACPI tables on x86 systems). 18539036SEric.Saxe@Sun.COM * 18549036SEric.Saxe@Sun.COM * We'll try to recover in such cases by pruning out the illegal 18559036SEric.Saxe@Sun.COM * groupings from the PG hierarchy, which means that we won't optimize 18569036SEric.Saxe@Sun.COM * for those levels, but we will for the remaining ones. 18579036SEric.Saxe@Sun.COM */ 18588906SEric.Saxe@Sun.COM switch (cmt_lineage_status) { 18598906SEric.Saxe@Sun.COM case CMT_LINEAGE_VALID: 18608906SEric.Saxe@Sun.COM case CMT_LINEAGE_REPAIRED: 18618906SEric.Saxe@Sun.COM break; 18629036SEric.Saxe@Sun.COM case CMT_LINEAGE_PG_SPANS_LGRPS: 18639036SEric.Saxe@Sun.COM /* 18649036SEric.Saxe@Sun.COM * We've detected a PG whose CPUs span lgroups. 18659036SEric.Saxe@Sun.COM * 18669036SEric.Saxe@Sun.COM * This isn't supported, as the dispatcher isn't allowed to 18679036SEric.Saxe@Sun.COM * to do CMT thread placement across lgroups, as this would 18689036SEric.Saxe@Sun.COM * conflict with policies implementing MPO thread affinity. 18699036SEric.Saxe@Sun.COM * 18709746SEric.Saxe@Sun.COM * If the PG is of a sharing relationship type known to 18719746SEric.Saxe@Sun.COM * legitimately span lgroups, specify that no CMT thread 18729746SEric.Saxe@Sun.COM * placement policy should be implemented, and prune the PG 18739746SEric.Saxe@Sun.COM * from the existing CMT PG hierarchy. 18749746SEric.Saxe@Sun.COM * 18759746SEric.Saxe@Sun.COM * Otherwise, fall though to the case below for handling. 18769036SEric.Saxe@Sun.COM */ 18779746SEric.Saxe@Sun.COM if (((pghw_t *)pg)->pghw_hw == PGHW_CHIP) { 18789746SEric.Saxe@Sun.COM if (pg_cmt_prune(pg, lineage, sz, pgdata) == 0) { 18799746SEric.Saxe@Sun.COM cmt_lineage_status = CMT_LINEAGE_REPAIRED; 18809746SEric.Saxe@Sun.COM goto revalidate; 18819746SEric.Saxe@Sun.COM } 18829746SEric.Saxe@Sun.COM } 18839746SEric.Saxe@Sun.COM /*LINTED*/ 18849036SEric.Saxe@Sun.COM case CMT_LINEAGE_NON_PROMOTABLE: 18859036SEric.Saxe@Sun.COM /* 18869036SEric.Saxe@Sun.COM * We've detected a PG that already exists in another CPU's 18879036SEric.Saxe@Sun.COM * lineage that cannot cannot legally be promoted into place 18889036SEric.Saxe@Sun.COM * without breaking the invariants of the hierarchy. 18899036SEric.Saxe@Sun.COM */ 18909036SEric.Saxe@Sun.COM if (PG_CMT_HW_SUSPECT(((pghw_t *)pg)->pghw_hw)) { 18919438SEric.Saxe@Sun.COM if (pg_cmt_prune(pg, lineage, sz, pgdata) == 0) { 18929036SEric.Saxe@Sun.COM cmt_lineage_status = CMT_LINEAGE_REPAIRED; 18939036SEric.Saxe@Sun.COM goto revalidate; 18949036SEric.Saxe@Sun.COM } 18959036SEric.Saxe@Sun.COM } 18969036SEric.Saxe@Sun.COM /* 18979036SEric.Saxe@Sun.COM * Something went wrong trying to prune out the bad level. 18989036SEric.Saxe@Sun.COM * Disable CMT scheduling altogether. 18999036SEric.Saxe@Sun.COM */ 19009036SEric.Saxe@Sun.COM pg_cmt_disable(); 19019036SEric.Saxe@Sun.COM break; 19028906SEric.Saxe@Sun.COM case CMT_LINEAGE_NON_CONCENTRIC: 19038408SEric.Saxe@Sun.COM /* 19049036SEric.Saxe@Sun.COM * We've detected a non-concentric PG lineage, which means that 19059036SEric.Saxe@Sun.COM * there's a PG in the lineage that has CPUs that the next PG 19069036SEric.Saxe@Sun.COM * over in the lineage (which is the same size or larger) 19079036SEric.Saxe@Sun.COM * doesn't have. 19088906SEric.Saxe@Sun.COM * 19099036SEric.Saxe@Sun.COM * In this case, we examine the two PGs to see if either 19109036SEric.Saxe@Sun.COM * grouping is defined by potentially buggy sources. 19118906SEric.Saxe@Sun.COM * 19128906SEric.Saxe@Sun.COM * If one has less CPUs than the other, and contains CPUs 19138906SEric.Saxe@Sun.COM * not found in the parent, and it is an untrusted enumeration, 19148906SEric.Saxe@Sun.COM * then prune it. If both have the same number of CPUs, then 19158906SEric.Saxe@Sun.COM * prune the one that is untrusted. 19168906SEric.Saxe@Sun.COM * 19178906SEric.Saxe@Sun.COM * This process repeats until we have a concentric lineage, 19188906SEric.Saxe@Sun.COM * or we would have to prune out level derived from what we 19198906SEric.Saxe@Sun.COM * thought was a reliable source, in which case CMT scheduling 19209036SEric.Saxe@Sun.COM * is disabled altogether. 19218408SEric.Saxe@Sun.COM */ 19229036SEric.Saxe@Sun.COM if ((PG_NUM_CPUS((pg_t *)pg) < PG_NUM_CPUS((pg_t *)pg_next)) && 19238906SEric.Saxe@Sun.COM (PG_CMT_HW_SUSPECT(((pghw_t *)pg)->pghw_hw))) { 19248906SEric.Saxe@Sun.COM pg_bad = pg; 19258906SEric.Saxe@Sun.COM } else if (PG_NUM_CPUS((pg_t *)pg) == 19269036SEric.Saxe@Sun.COM PG_NUM_CPUS((pg_t *)pg_next)) { 19279036SEric.Saxe@Sun.COM if (PG_CMT_HW_SUSPECT(((pghw_t *)pg_next)->pghw_hw)) { 19289036SEric.Saxe@Sun.COM pg_bad = pg_next; 19298906SEric.Saxe@Sun.COM } else if (PG_CMT_HW_SUSPECT(((pghw_t *)pg)->pghw_hw)) { 19308906SEric.Saxe@Sun.COM pg_bad = pg; 19318906SEric.Saxe@Sun.COM } 19328906SEric.Saxe@Sun.COM } 19338906SEric.Saxe@Sun.COM if (pg_bad) { 19349438SEric.Saxe@Sun.COM if (pg_cmt_prune(pg_bad, lineage, sz, pgdata) == 0) { 19358906SEric.Saxe@Sun.COM cmt_lineage_status = CMT_LINEAGE_REPAIRED; 19368906SEric.Saxe@Sun.COM goto revalidate; 19378408SEric.Saxe@Sun.COM } 19388906SEric.Saxe@Sun.COM } 19399036SEric.Saxe@Sun.COM /* 19409036SEric.Saxe@Sun.COM * Something went wrong trying to identify and/or prune out 19419036SEric.Saxe@Sun.COM * the bad level. Disable CMT scheduling altogether. 19429036SEric.Saxe@Sun.COM */ 19439036SEric.Saxe@Sun.COM pg_cmt_disable(); 19449036SEric.Saxe@Sun.COM break; 19458906SEric.Saxe@Sun.COM default: 19468906SEric.Saxe@Sun.COM /* 19479036SEric.Saxe@Sun.COM * If we're here, we've encountered a validation error for 19489036SEric.Saxe@Sun.COM * which we don't know how to recover. In this case, disable 19499036SEric.Saxe@Sun.COM * CMT scheduling altogether. 19508906SEric.Saxe@Sun.COM */ 19519036SEric.Saxe@Sun.COM cmt_lineage_status = CMT_LINEAGE_UNRECOVERABLE; 19528906SEric.Saxe@Sun.COM pg_cmt_disable(); 19538408SEric.Saxe@Sun.COM } 19549036SEric.Saxe@Sun.COM return (cmt_lineage_status); 19558408SEric.Saxe@Sun.COM } 1956