13434Sesaxe /* 23434Sesaxe * CDDL HEADER START 33434Sesaxe * 43434Sesaxe * The contents of this file are subject to the terms of the 53434Sesaxe * Common Development and Distribution License (the "License"). 63434Sesaxe * You may not use this file except in compliance with the License. 73434Sesaxe * 83434Sesaxe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 93434Sesaxe * or http://www.opensolaris.org/os/licensing. 103434Sesaxe * See the License for the specific language governing permissions 113434Sesaxe * and limitations under the License. 123434Sesaxe * 133434Sesaxe * When distributing Covered Code, include this CDDL HEADER in each 143434Sesaxe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 153434Sesaxe * If applicable, add the following below this CDDL HEADER, with the 163434Sesaxe * fields enclosed by brackets "[]" replaced with your own identifying 173434Sesaxe * information: Portions Copyright [yyyy] [name of copyright owner] 183434Sesaxe * 193434Sesaxe * CDDL HEADER END 203434Sesaxe */ 213434Sesaxe /* 228689SEric.Saxe@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 233434Sesaxe * Use is subject to license terms. 243434Sesaxe */ 253434Sesaxe 263434Sesaxe #include <sys/systm.h> 273434Sesaxe #include <sys/types.h> 283434Sesaxe #include <sys/param.h> 293434Sesaxe #include <sys/thread.h> 303434Sesaxe #include <sys/cpuvar.h> 313434Sesaxe #include <sys/cpupart.h> 323434Sesaxe #include <sys/kmem.h> 333434Sesaxe #include <sys/cmn_err.h> 343434Sesaxe #include <sys/kstat.h> 353434Sesaxe #include <sys/processor.h> 363434Sesaxe #include <sys/disp.h> 373434Sesaxe #include <sys/group.h> 383434Sesaxe #include <sys/pghw.h> 393434Sesaxe #include <sys/bitset.h> 403434Sesaxe #include <sys/lgrp.h> 413434Sesaxe #include <sys/cmt.h> 428906SEric.Saxe@Sun.COM #include <sys/cpu_pm.h> 433434Sesaxe 443434Sesaxe /* 453434Sesaxe * CMT scheduler / dispatcher support 463434Sesaxe * 473434Sesaxe * This file implements CMT scheduler support using Processor Groups. 483434Sesaxe * The CMT processor group class creates and maintains the CMT class 493434Sesaxe * specific processor group pg_cmt_t. 503434Sesaxe * 513434Sesaxe * ---------------------------- <-- pg_cmt_t * 523434Sesaxe * | pghw_t | 533434Sesaxe * ---------------------------- 543434Sesaxe * | CMT class specific data | 553434Sesaxe * | - hierarchy linkage | 563434Sesaxe * | - CMT load balancing data| 573434Sesaxe * | - active CPU group/bitset| 583434Sesaxe * ---------------------------- 593434Sesaxe * 603434Sesaxe * The scheduler/dispatcher leverages knowledge of the performance 613434Sesaxe * relevant CMT sharing relationships existing between cpus to implement 628906SEric.Saxe@Sun.COM * optimized affinity, load balancing, and coalescence policies. 633434Sesaxe * 643434Sesaxe * Load balancing policy seeks to improve performance by minimizing 658906SEric.Saxe@Sun.COM * contention over shared processor resources / facilities, Affinity 668906SEric.Saxe@Sun.COM * policies seek to improve cache and TLB utilization. Coalescence 678906SEric.Saxe@Sun.COM * policies improve resource utilization and ultimately power efficiency. 683434Sesaxe * 693434Sesaxe * The CMT PGs created by this class are already arranged into a 703434Sesaxe * hierarchy (which is done in the pghw layer). To implement the top-down 713434Sesaxe * CMT load balancing algorithm, the CMT PGs additionally maintain 723434Sesaxe * parent, child and sibling hierarchy relationships. 733434Sesaxe * Parent PGs always contain a superset of their children(s) resources, 743434Sesaxe * each PG can have at most one parent, and siblings are the group of PGs 753434Sesaxe * sharing the same parent. 763434Sesaxe * 773434Sesaxe * On NUMA systems, the CMT load balancing algorithm balances across the 783434Sesaxe * CMT PGs within their respective lgroups. On UMA based system, there 793434Sesaxe * exists a top level group of PGs to balance across. On NUMA systems multiple 803434Sesaxe * top level groups are instantiated, where the top level balancing begins by 813434Sesaxe * balancng across the CMT PGs within their respective (per lgroup) top level 823434Sesaxe * groups. 833434Sesaxe */ 843676Sesaxe static cmt_lgrp_t *cmt_lgrps = NULL; /* cmt_lgrps list head */ 853676Sesaxe static cmt_lgrp_t *cpu0_lgrp = NULL; /* boot CPU's initial lgrp */ 863676Sesaxe /* used for null_proc_lpa */ 878906SEric.Saxe@Sun.COM cmt_lgrp_t *cmt_root = NULL; /* Reference to root cmt pg */ 883434Sesaxe 893676Sesaxe static int is_cpu0 = 1; /* true if this is boot CPU context */ 903676Sesaxe 913676Sesaxe /* 928906SEric.Saxe@Sun.COM * Array of hardware sharing relationships that are blacklisted. 938906SEric.Saxe@Sun.COM * PGs won't be instantiated for blacklisted hardware sharing relationships. 948906SEric.Saxe@Sun.COM */ 958906SEric.Saxe@Sun.COM static int cmt_hw_blacklisted[PGHW_NUM_COMPONENTS]; 968906SEric.Saxe@Sun.COM 978906SEric.Saxe@Sun.COM /* 983676Sesaxe * Set this to non-zero to disable CMT scheduling 993676Sesaxe * This must be done via kmdb -d, as /etc/system will be too late 1003676Sesaxe */ 1018906SEric.Saxe@Sun.COM int cmt_sched_disabled = 0; 1023434Sesaxe 1033434Sesaxe static pg_cid_t pg_cmt_class_id; /* PG class id */ 1043434Sesaxe 1053434Sesaxe static pg_t *pg_cmt_alloc(); 1063434Sesaxe static void pg_cmt_free(pg_t *); 1073434Sesaxe static void pg_cmt_cpu_init(cpu_t *); 1083434Sesaxe static void pg_cmt_cpu_fini(cpu_t *); 1093434Sesaxe static void pg_cmt_cpu_active(cpu_t *); 1103434Sesaxe static void pg_cmt_cpu_inactive(cpu_t *); 1113434Sesaxe static void pg_cmt_cpupart_in(cpu_t *, cpupart_t *); 1123434Sesaxe static void pg_cmt_cpupart_move(cpu_t *, cpupart_t *, cpupart_t *); 1138906SEric.Saxe@Sun.COM static char *pg_cmt_policy_name(pg_t *); 1148906SEric.Saxe@Sun.COM static void pg_cmt_hier_sort(pg_cmt_t **, int); 1158906SEric.Saxe@Sun.COM static pg_cmt_t *pg_cmt_hier_rank(pg_cmt_t *, pg_cmt_t *); 1163434Sesaxe static int pg_cmt_cpu_belongs(pg_t *, cpu_t *); 1173434Sesaxe static int pg_cmt_hw(pghw_type_t); 1183434Sesaxe static cmt_lgrp_t *pg_cmt_find_lgrp(lgrp_handle_t); 1193676Sesaxe static cmt_lgrp_t *pg_cmt_lgrp_create(lgrp_handle_t); 1208906SEric.Saxe@Sun.COM static int pg_cmt_lineage_validate(pg_cmt_t **, int *); 1218906SEric.Saxe@Sun.COM static void cmt_ev_thread_swtch(pg_t *, cpu_t *, hrtime_t, 1228906SEric.Saxe@Sun.COM kthread_t *, kthread_t *); 1238906SEric.Saxe@Sun.COM static void cmt_ev_thread_swtch_pwr(pg_t *, cpu_t *, hrtime_t, 1248906SEric.Saxe@Sun.COM kthread_t *, kthread_t *); 1258906SEric.Saxe@Sun.COM static void cmt_ev_thread_remain_pwr(pg_t *, cpu_t *, kthread_t *); 1263434Sesaxe 1273434Sesaxe /* 1283434Sesaxe * Macro to test if PG is managed by the CMT PG class 1293434Sesaxe */ 1303434Sesaxe #define IS_CMT_PG(pg) (((pg_t *)(pg))->pg_class->pgc_id == pg_cmt_class_id) 1313434Sesaxe 1323434Sesaxe /* 1338906SEric.Saxe@Sun.COM * Status codes for CMT lineage validation 1348906SEric.Saxe@Sun.COM * See cmt_lineage_validate() below 1358906SEric.Saxe@Sun.COM */ 1368906SEric.Saxe@Sun.COM typedef enum cmt_lineage_validation { 1378906SEric.Saxe@Sun.COM CMT_LINEAGE_VALID, 1388906SEric.Saxe@Sun.COM CMT_LINEAGE_NON_CONCENTRIC, 1398906SEric.Saxe@Sun.COM CMT_LINEAGE_REPAIRED, 1408906SEric.Saxe@Sun.COM CMT_LINEAGE_UNRECOVERABLE 1418906SEric.Saxe@Sun.COM } cmt_lineage_validation_t; 1428906SEric.Saxe@Sun.COM 1438906SEric.Saxe@Sun.COM /* 1448906SEric.Saxe@Sun.COM * Status of the current lineage under construction. 1458906SEric.Saxe@Sun.COM * One must be holding cpu_lock to change this. 1468906SEric.Saxe@Sun.COM */ 1478906SEric.Saxe@Sun.COM static cmt_lineage_validation_t cmt_lineage_status = CMT_LINEAGE_VALID; 1488906SEric.Saxe@Sun.COM 1498906SEric.Saxe@Sun.COM /* 1508906SEric.Saxe@Sun.COM * Power domain definitions (on x86) are defined by ACPI, and 1518906SEric.Saxe@Sun.COM * therefore may be subject to BIOS bugs. 1528906SEric.Saxe@Sun.COM */ 1538906SEric.Saxe@Sun.COM #define PG_CMT_HW_SUSPECT(hw) PGHW_IS_PM_DOMAIN(hw) 1548906SEric.Saxe@Sun.COM 1558906SEric.Saxe@Sun.COM /* 1563434Sesaxe * CMT PG ops 1573434Sesaxe */ 1583434Sesaxe struct pg_ops pg_ops_cmt = { 1593434Sesaxe pg_cmt_alloc, 1603434Sesaxe pg_cmt_free, 1613434Sesaxe pg_cmt_cpu_init, 1623434Sesaxe pg_cmt_cpu_fini, 1633434Sesaxe pg_cmt_cpu_active, 1643434Sesaxe pg_cmt_cpu_inactive, 1653434Sesaxe pg_cmt_cpupart_in, 1663434Sesaxe NULL, /* cpupart_out */ 1673434Sesaxe pg_cmt_cpupart_move, 1683434Sesaxe pg_cmt_cpu_belongs, 1698906SEric.Saxe@Sun.COM pg_cmt_policy_name, 1703434Sesaxe }; 1713434Sesaxe 1723434Sesaxe /* 1733434Sesaxe * Initialize the CMT PG class 1743434Sesaxe */ 1753434Sesaxe void 1763434Sesaxe pg_cmt_class_init(void) 1773434Sesaxe { 1783434Sesaxe if (cmt_sched_disabled) 1793434Sesaxe return; 1803434Sesaxe 1813434Sesaxe pg_cmt_class_id = pg_class_register("cmt", &pg_ops_cmt, PGR_PHYSICAL); 1823434Sesaxe } 1833434Sesaxe 1843434Sesaxe /* 1853434Sesaxe * Called to indicate a new CPU has started up so 1863434Sesaxe * that either t0 or the slave startup thread can 1873434Sesaxe * be accounted for. 1883434Sesaxe */ 1893434Sesaxe void 1903434Sesaxe pg_cmt_cpu_startup(cpu_t *cp) 1913434Sesaxe { 1928906SEric.Saxe@Sun.COM pg_ev_thread_swtch(cp, gethrtime_unscaled(), cp->cpu_idle_thread, 1938906SEric.Saxe@Sun.COM cp->cpu_thread); 1943434Sesaxe } 1953434Sesaxe 1963434Sesaxe /* 1973434Sesaxe * Return non-zero if thread can migrate between "from" and "to" 1983434Sesaxe * without a performance penalty 1993434Sesaxe */ 2003434Sesaxe int 2013434Sesaxe pg_cmt_can_migrate(cpu_t *from, cpu_t *to) 2023434Sesaxe { 2033434Sesaxe if (from->cpu_physid->cpu_cacheid == 2043434Sesaxe to->cpu_physid->cpu_cacheid) 2053434Sesaxe return (1); 2063434Sesaxe return (0); 2073434Sesaxe } 2083434Sesaxe 2093434Sesaxe /* 2103434Sesaxe * CMT class specific PG allocation 2113434Sesaxe */ 2123434Sesaxe static pg_t * 2133434Sesaxe pg_cmt_alloc(void) 2143434Sesaxe { 2153434Sesaxe return (kmem_zalloc(sizeof (pg_cmt_t), KM_NOSLEEP)); 2163434Sesaxe } 2173434Sesaxe 2183434Sesaxe /* 2193434Sesaxe * Class specific PG de-allocation 2203434Sesaxe */ 2213434Sesaxe static void 2223434Sesaxe pg_cmt_free(pg_t *pg) 2233434Sesaxe { 2243434Sesaxe ASSERT(pg != NULL); 2253434Sesaxe ASSERT(IS_CMT_PG(pg)); 2263434Sesaxe 2273434Sesaxe kmem_free((pg_cmt_t *)pg, sizeof (pg_cmt_t)); 2283434Sesaxe } 2293434Sesaxe 2303434Sesaxe /* 2318906SEric.Saxe@Sun.COM * Given a hardware sharing relationship, return which dispatcher 2328906SEric.Saxe@Sun.COM * policies should be implemented to optimize performance and efficiency 2338906SEric.Saxe@Sun.COM */ 2348906SEric.Saxe@Sun.COM static pg_cmt_policy_t 2358906SEric.Saxe@Sun.COM pg_cmt_policy(pghw_type_t hw) 2368906SEric.Saxe@Sun.COM { 2378906SEric.Saxe@Sun.COM pg_cmt_policy_t p; 2388906SEric.Saxe@Sun.COM 2398906SEric.Saxe@Sun.COM /* 2408906SEric.Saxe@Sun.COM * Give the platform a chance to override the default 2418906SEric.Saxe@Sun.COM */ 2428906SEric.Saxe@Sun.COM if ((p = pg_plat_cmt_policy(hw)) != CMT_NO_POLICY) 2438906SEric.Saxe@Sun.COM return (p); 2448906SEric.Saxe@Sun.COM 2458906SEric.Saxe@Sun.COM switch (hw) { 2468906SEric.Saxe@Sun.COM case PGHW_IPIPE: 2478906SEric.Saxe@Sun.COM case PGHW_FPU: 2488906SEric.Saxe@Sun.COM case PGHW_CHIP: 2498906SEric.Saxe@Sun.COM return (CMT_BALANCE); 2508906SEric.Saxe@Sun.COM case PGHW_CACHE: 2518906SEric.Saxe@Sun.COM return (CMT_AFFINITY); 2528906SEric.Saxe@Sun.COM case PGHW_POW_ACTIVE: 2538906SEric.Saxe@Sun.COM case PGHW_POW_IDLE: 2548906SEric.Saxe@Sun.COM return (CMT_BALANCE); 2558906SEric.Saxe@Sun.COM default: 2568906SEric.Saxe@Sun.COM return (CMT_NO_POLICY); 2578906SEric.Saxe@Sun.COM } 2588906SEric.Saxe@Sun.COM } 2598906SEric.Saxe@Sun.COM 2608906SEric.Saxe@Sun.COM /* 2618906SEric.Saxe@Sun.COM * Rank the importance of optimizing for the pg1 relationship vs. 2628906SEric.Saxe@Sun.COM * the pg2 relationship. 2638906SEric.Saxe@Sun.COM */ 2648906SEric.Saxe@Sun.COM static pg_cmt_t * 2658906SEric.Saxe@Sun.COM pg_cmt_hier_rank(pg_cmt_t *pg1, pg_cmt_t *pg2) 2668906SEric.Saxe@Sun.COM { 2678906SEric.Saxe@Sun.COM pghw_type_t hw1 = ((pghw_t *)pg1)->pghw_hw; 2688906SEric.Saxe@Sun.COM pghw_type_t hw2 = ((pghw_t *)pg2)->pghw_hw; 2698906SEric.Saxe@Sun.COM 2708906SEric.Saxe@Sun.COM /* 2718906SEric.Saxe@Sun.COM * A power domain is only important if CPUPM is enabled. 2728906SEric.Saxe@Sun.COM */ 2738906SEric.Saxe@Sun.COM if (cpupm_get_policy() == CPUPM_POLICY_DISABLED) { 2748906SEric.Saxe@Sun.COM if (PGHW_IS_PM_DOMAIN(hw1) && !PGHW_IS_PM_DOMAIN(hw2)) 2758906SEric.Saxe@Sun.COM return (pg2); 2768906SEric.Saxe@Sun.COM if (PGHW_IS_PM_DOMAIN(hw2) && !PGHW_IS_PM_DOMAIN(hw1)) 2778906SEric.Saxe@Sun.COM return (pg1); 2788906SEric.Saxe@Sun.COM } 2798906SEric.Saxe@Sun.COM 2808906SEric.Saxe@Sun.COM /* 2818906SEric.Saxe@Sun.COM * Otherwise, ask the platform 2828906SEric.Saxe@Sun.COM */ 2838906SEric.Saxe@Sun.COM if (pg_plat_hw_rank(hw1, hw2) == hw1) 2848906SEric.Saxe@Sun.COM return (pg1); 2858906SEric.Saxe@Sun.COM else 2868906SEric.Saxe@Sun.COM return (pg2); 2878906SEric.Saxe@Sun.COM } 2888906SEric.Saxe@Sun.COM 2898906SEric.Saxe@Sun.COM /* 2908906SEric.Saxe@Sun.COM * Initialize CMT callbacks for the given PG 2918906SEric.Saxe@Sun.COM */ 2928906SEric.Saxe@Sun.COM static void 2938906SEric.Saxe@Sun.COM cmt_callback_init(pg_t *pg) 2948906SEric.Saxe@Sun.COM { 2958906SEric.Saxe@Sun.COM switch (((pghw_t *)pg)->pghw_hw) { 2968906SEric.Saxe@Sun.COM case PGHW_POW_ACTIVE: 2978906SEric.Saxe@Sun.COM pg->pg_cb.thread_swtch = cmt_ev_thread_swtch_pwr; 2988906SEric.Saxe@Sun.COM pg->pg_cb.thread_remain = cmt_ev_thread_remain_pwr; 2998906SEric.Saxe@Sun.COM break; 3008906SEric.Saxe@Sun.COM default: 3018906SEric.Saxe@Sun.COM pg->pg_cb.thread_swtch = cmt_ev_thread_swtch; 3028906SEric.Saxe@Sun.COM 3038906SEric.Saxe@Sun.COM } 3048906SEric.Saxe@Sun.COM } 3058906SEric.Saxe@Sun.COM 3068906SEric.Saxe@Sun.COM /* 3078906SEric.Saxe@Sun.COM * Promote PG above it's current parent. 3088906SEric.Saxe@Sun.COM * This is only legal if PG has an equal or greater number of CPUs 3098906SEric.Saxe@Sun.COM * than it's parent. 3103434Sesaxe */ 3118906SEric.Saxe@Sun.COM static void 3128906SEric.Saxe@Sun.COM cmt_hier_promote(pg_cmt_t *pg) 3133434Sesaxe { 3148906SEric.Saxe@Sun.COM pg_cmt_t *parent; 3158906SEric.Saxe@Sun.COM group_t *children; 3168906SEric.Saxe@Sun.COM cpu_t *cpu; 3178906SEric.Saxe@Sun.COM group_iter_t iter; 3188906SEric.Saxe@Sun.COM pg_cpu_itr_t cpu_iter; 3198906SEric.Saxe@Sun.COM int r; 3208906SEric.Saxe@Sun.COM int err; 3218906SEric.Saxe@Sun.COM 3228906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock)); 3238906SEric.Saxe@Sun.COM 3248906SEric.Saxe@Sun.COM parent = pg->cmt_parent; 3258906SEric.Saxe@Sun.COM if (parent == NULL) { 3268906SEric.Saxe@Sun.COM /* 3278906SEric.Saxe@Sun.COM * Nothing to do 3288906SEric.Saxe@Sun.COM */ 3298906SEric.Saxe@Sun.COM return; 3308906SEric.Saxe@Sun.COM } 3318906SEric.Saxe@Sun.COM 3328906SEric.Saxe@Sun.COM ASSERT(PG_NUM_CPUS((pg_t *)pg) >= PG_NUM_CPUS((pg_t *)parent)); 3338906SEric.Saxe@Sun.COM 3348906SEric.Saxe@Sun.COM /* 3358906SEric.Saxe@Sun.COM * We're changing around the hierarchy, which is actively traversed 3368906SEric.Saxe@Sun.COM * by the dispatcher. Pause CPUS to ensure exclusivity. 3378906SEric.Saxe@Sun.COM */ 3388906SEric.Saxe@Sun.COM pause_cpus(NULL); 3398906SEric.Saxe@Sun.COM 3408906SEric.Saxe@Sun.COM /* 3418906SEric.Saxe@Sun.COM * If necessary, update the parent's sibling set, replacing parent 3428906SEric.Saxe@Sun.COM * with PG. 3438906SEric.Saxe@Sun.COM */ 3448906SEric.Saxe@Sun.COM if (parent->cmt_siblings) { 3458906SEric.Saxe@Sun.COM if (group_remove(parent->cmt_siblings, parent, GRP_NORESIZE) 3468906SEric.Saxe@Sun.COM != -1) { 3478906SEric.Saxe@Sun.COM r = group_add(parent->cmt_siblings, pg, GRP_NORESIZE); 3488906SEric.Saxe@Sun.COM ASSERT(r != -1); 3498906SEric.Saxe@Sun.COM } 3508906SEric.Saxe@Sun.COM } 3518906SEric.Saxe@Sun.COM 3528906SEric.Saxe@Sun.COM /* 3538906SEric.Saxe@Sun.COM * If the parent is at the top of the hierarchy, replace it's entry 3548906SEric.Saxe@Sun.COM * in the root lgroup's group of top level PGs. 3558906SEric.Saxe@Sun.COM */ 3568906SEric.Saxe@Sun.COM if (parent->cmt_parent == NULL && 3578906SEric.Saxe@Sun.COM parent->cmt_siblings != &cmt_root->cl_pgs) { 3588906SEric.Saxe@Sun.COM if (group_remove(&cmt_root->cl_pgs, parent, GRP_NORESIZE) 3598906SEric.Saxe@Sun.COM != -1) { 3608906SEric.Saxe@Sun.COM r = group_add(&cmt_root->cl_pgs, pg, GRP_NORESIZE); 3618906SEric.Saxe@Sun.COM ASSERT(r != -1); 3628906SEric.Saxe@Sun.COM } 3638906SEric.Saxe@Sun.COM } 3648906SEric.Saxe@Sun.COM 3658906SEric.Saxe@Sun.COM /* 3668906SEric.Saxe@Sun.COM * We assume (and therefore assert) that the PG being promoted is an 3678906SEric.Saxe@Sun.COM * only child of it's parent. Update the parent's children set 3688906SEric.Saxe@Sun.COM * replacing PG's entry with the parent (since the parent is becoming 3698906SEric.Saxe@Sun.COM * the child). Then have PG and the parent swap children sets. 3708906SEric.Saxe@Sun.COM */ 3718906SEric.Saxe@Sun.COM ASSERT(GROUP_SIZE(parent->cmt_children) <= 1); 3728906SEric.Saxe@Sun.COM if (group_remove(parent->cmt_children, pg, GRP_NORESIZE) != -1) { 3738906SEric.Saxe@Sun.COM r = group_add(parent->cmt_children, parent, GRP_NORESIZE); 3748906SEric.Saxe@Sun.COM ASSERT(r != -1); 3758906SEric.Saxe@Sun.COM } 3768906SEric.Saxe@Sun.COM 3778906SEric.Saxe@Sun.COM children = pg->cmt_children; 3788906SEric.Saxe@Sun.COM pg->cmt_children = parent->cmt_children; 3798906SEric.Saxe@Sun.COM parent->cmt_children = children; 3808906SEric.Saxe@Sun.COM 3818906SEric.Saxe@Sun.COM /* 3828906SEric.Saxe@Sun.COM * Update the sibling references for PG and it's parent 3838906SEric.Saxe@Sun.COM */ 3848906SEric.Saxe@Sun.COM pg->cmt_siblings = parent->cmt_siblings; 3858906SEric.Saxe@Sun.COM parent->cmt_siblings = pg->cmt_children; 3868906SEric.Saxe@Sun.COM 3878906SEric.Saxe@Sun.COM /* 3888906SEric.Saxe@Sun.COM * Update any cached lineages in the per CPU pg data. 3898906SEric.Saxe@Sun.COM */ 3908906SEric.Saxe@Sun.COM PG_CPU_ITR_INIT(pg, cpu_iter); 3918906SEric.Saxe@Sun.COM while ((cpu = pg_cpu_next(&cpu_iter)) != NULL) { 3928906SEric.Saxe@Sun.COM int idx; 3938906SEric.Saxe@Sun.COM group_t *pgs; 3948906SEric.Saxe@Sun.COM pg_cmt_t *cpu_pg; 3958906SEric.Saxe@Sun.COM 3968906SEric.Saxe@Sun.COM /* 3978906SEric.Saxe@Sun.COM * Iterate over the CPU's PGs updating the children 3988906SEric.Saxe@Sun.COM * of the PG being promoted, since they have a new parent. 3998906SEric.Saxe@Sun.COM */ 4008906SEric.Saxe@Sun.COM pgs = &cpu->cpu_pg->pgs; 4018906SEric.Saxe@Sun.COM group_iter_init(&iter); 4028906SEric.Saxe@Sun.COM while ((cpu_pg = group_iterate(pgs, &iter)) != NULL) { 4038906SEric.Saxe@Sun.COM if (cpu_pg->cmt_parent == pg) { 4048906SEric.Saxe@Sun.COM cpu_pg->cmt_parent = parent; 4058906SEric.Saxe@Sun.COM } 4068906SEric.Saxe@Sun.COM } 4078906SEric.Saxe@Sun.COM 4088906SEric.Saxe@Sun.COM /* 4098906SEric.Saxe@Sun.COM * Update the CMT load balancing lineage 4108906SEric.Saxe@Sun.COM */ 4118906SEric.Saxe@Sun.COM pgs = &cpu->cpu_pg->cmt_pgs; 4128906SEric.Saxe@Sun.COM if ((idx = group_find(pgs, (void *)pg)) == -1) { 4138906SEric.Saxe@Sun.COM /* 4148906SEric.Saxe@Sun.COM * Unless this is the CPU who's lineage is being 4158906SEric.Saxe@Sun.COM * constructed, the PG being promoted should be 4168906SEric.Saxe@Sun.COM * in the lineage. 4178906SEric.Saxe@Sun.COM */ 4188906SEric.Saxe@Sun.COM ASSERT(GROUP_SIZE(pgs) == 0); 4198906SEric.Saxe@Sun.COM continue; 4208906SEric.Saxe@Sun.COM } 4218906SEric.Saxe@Sun.COM 4228906SEric.Saxe@Sun.COM ASSERT(GROUP_ACCESS(pgs, idx - 1) == parent); 4238906SEric.Saxe@Sun.COM ASSERT(idx > 0); 4248906SEric.Saxe@Sun.COM 4258906SEric.Saxe@Sun.COM /* 4268906SEric.Saxe@Sun.COM * Have the child and the parent swap places in the CPU's 4278906SEric.Saxe@Sun.COM * lineage 4288906SEric.Saxe@Sun.COM */ 4298906SEric.Saxe@Sun.COM group_remove_at(pgs, idx); 4308906SEric.Saxe@Sun.COM group_remove_at(pgs, idx - 1); 4318906SEric.Saxe@Sun.COM err = group_add_at(pgs, parent, idx); 4328906SEric.Saxe@Sun.COM ASSERT(err == 0); 4338906SEric.Saxe@Sun.COM err = group_add_at(pgs, pg, idx - 1); 4348906SEric.Saxe@Sun.COM ASSERT(err == 0); 4358906SEric.Saxe@Sun.COM } 4368906SEric.Saxe@Sun.COM 4378906SEric.Saxe@Sun.COM /* 4388906SEric.Saxe@Sun.COM * Update the parent references for PG and it's parent 4398906SEric.Saxe@Sun.COM */ 4408906SEric.Saxe@Sun.COM pg->cmt_parent = parent->cmt_parent; 4418906SEric.Saxe@Sun.COM parent->cmt_parent = pg; 4428906SEric.Saxe@Sun.COM 4438906SEric.Saxe@Sun.COM start_cpus(); 4443434Sesaxe } 4453434Sesaxe 4463434Sesaxe /* 4473434Sesaxe * CMT class callback for a new CPU entering the system 4483434Sesaxe */ 4493434Sesaxe static void 4503434Sesaxe pg_cmt_cpu_init(cpu_t *cp) 4513434Sesaxe { 4523434Sesaxe pg_cmt_t *pg; 4533434Sesaxe group_t *cmt_pgs; 4548906SEric.Saxe@Sun.COM int levels, level; 4553434Sesaxe pghw_type_t hw; 4563434Sesaxe pg_t *pg_cache = NULL; 4573434Sesaxe pg_cmt_t *cpu_cmt_hier[PGHW_NUM_COMPONENTS]; 4583434Sesaxe lgrp_handle_t lgrp_handle; 4593434Sesaxe cmt_lgrp_t *lgrp; 4603434Sesaxe 4613434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 4623434Sesaxe 4638906SEric.Saxe@Sun.COM if (cmt_sched_disabled) 4648906SEric.Saxe@Sun.COM return; 4658906SEric.Saxe@Sun.COM 4663434Sesaxe /* 4673434Sesaxe * A new CPU is coming into the system. 4683434Sesaxe * Interrogate the platform to see if the CPU 4698906SEric.Saxe@Sun.COM * has any performance or efficiency relevant 4708906SEric.Saxe@Sun.COM * sharing relationships 4713434Sesaxe */ 4723434Sesaxe cmt_pgs = &cp->cpu_pg->cmt_pgs; 4733434Sesaxe cp->cpu_pg->cmt_lineage = NULL; 4743434Sesaxe 4753434Sesaxe bzero(cpu_cmt_hier, sizeof (cpu_cmt_hier)); 4768906SEric.Saxe@Sun.COM levels = 0; 4773434Sesaxe for (hw = PGHW_START; hw < PGHW_NUM_COMPONENTS; hw++) { 4783434Sesaxe 4798906SEric.Saxe@Sun.COM pg_cmt_policy_t policy; 4808906SEric.Saxe@Sun.COM 4813434Sesaxe /* 4828906SEric.Saxe@Sun.COM * We're only interested in the hw sharing relationships 4838906SEric.Saxe@Sun.COM * for which we know how to optimize. 4843434Sesaxe */ 4858906SEric.Saxe@Sun.COM policy = pg_cmt_policy(hw); 4868906SEric.Saxe@Sun.COM if (policy == CMT_NO_POLICY || 4878906SEric.Saxe@Sun.COM pg_plat_hw_shared(cp, hw) == 0) 4883434Sesaxe continue; 4893434Sesaxe 4903434Sesaxe /* 4918906SEric.Saxe@Sun.COM * Continue if the hardware sharing relationship has been 4928906SEric.Saxe@Sun.COM * blacklisted. 4938906SEric.Saxe@Sun.COM */ 4948906SEric.Saxe@Sun.COM if (cmt_hw_blacklisted[hw]) { 4958906SEric.Saxe@Sun.COM continue; 4968906SEric.Saxe@Sun.COM } 4978906SEric.Saxe@Sun.COM 4988906SEric.Saxe@Sun.COM /* 4993434Sesaxe * Find (or create) the PG associated with 5003434Sesaxe * the hw sharing relationship in which cp 5013434Sesaxe * belongs. 5023434Sesaxe * 5033434Sesaxe * Determine if a suitable PG already 5043434Sesaxe * exists, or if one needs to be created. 5053434Sesaxe */ 5063434Sesaxe pg = (pg_cmt_t *)pghw_place_cpu(cp, hw); 5073434Sesaxe if (pg == NULL) { 5083434Sesaxe /* 5093434Sesaxe * Create a new one. 5103434Sesaxe * Initialize the common... 5113434Sesaxe */ 5123434Sesaxe pg = (pg_cmt_t *)pg_create(pg_cmt_class_id); 5133434Sesaxe 5143434Sesaxe /* ... physical ... */ 5153434Sesaxe pghw_init((pghw_t *)pg, cp, hw); 5163434Sesaxe 5173434Sesaxe /* 5183434Sesaxe * ... and CMT specific portions of the 5193434Sesaxe * structure. 5203434Sesaxe */ 5218906SEric.Saxe@Sun.COM pg->cmt_policy = policy; 5228906SEric.Saxe@Sun.COM 5238906SEric.Saxe@Sun.COM /* CMT event callbacks */ 5248906SEric.Saxe@Sun.COM cmt_callback_init((pg_t *)pg); 5258906SEric.Saxe@Sun.COM 5263434Sesaxe bitset_init(&pg->cmt_cpus_actv_set); 5273434Sesaxe group_create(&pg->cmt_cpus_actv); 5283434Sesaxe } else { 5293434Sesaxe ASSERT(IS_CMT_PG(pg)); 5303434Sesaxe } 5313434Sesaxe 5323434Sesaxe /* Add the CPU to the PG */ 5333434Sesaxe pg_cpu_add((pg_t *)pg, cp); 5343434Sesaxe 5353434Sesaxe /* 5368408SEric.Saxe@Sun.COM * Ensure capacity of the active CPU group/bitset 5373434Sesaxe */ 5383434Sesaxe group_expand(&pg->cmt_cpus_actv, 5393434Sesaxe GROUP_SIZE(&((pg_t *)pg)->pg_cpus)); 5403434Sesaxe 5413434Sesaxe if (cp->cpu_seqid >= 5423434Sesaxe bitset_capacity(&pg->cmt_cpus_actv_set)) { 5433434Sesaxe bitset_resize(&pg->cmt_cpus_actv_set, 5443434Sesaxe cp->cpu_seqid + 1); 5453434Sesaxe } 5463434Sesaxe 5473434Sesaxe /* 5488906SEric.Saxe@Sun.COM * Build a lineage of CMT PGs for load balancing / coalescence 5493434Sesaxe */ 5508906SEric.Saxe@Sun.COM if (policy & (CMT_BALANCE | CMT_COALESCE)) { 5518906SEric.Saxe@Sun.COM cpu_cmt_hier[levels++] = pg; 5523434Sesaxe } 5533434Sesaxe 5543434Sesaxe /* Cache this for later */ 5553434Sesaxe if (hw == PGHW_CACHE) 5563434Sesaxe pg_cache = (pg_t *)pg; 5573434Sesaxe } 5583434Sesaxe 5598906SEric.Saxe@Sun.COM group_expand(cmt_pgs, levels); 5608408SEric.Saxe@Sun.COM 5618408SEric.Saxe@Sun.COM if (cmt_root == NULL) 5628408SEric.Saxe@Sun.COM cmt_root = pg_cmt_lgrp_create(lgrp_plat_root_hand()); 5633434Sesaxe 5643434Sesaxe /* 5658906SEric.Saxe@Sun.COM * Find the lgrp that encapsulates this CPU's CMT hierarchy 5668408SEric.Saxe@Sun.COM */ 5678408SEric.Saxe@Sun.COM lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id); 5688408SEric.Saxe@Sun.COM if ((lgrp = pg_cmt_find_lgrp(lgrp_handle)) == NULL) 5698408SEric.Saxe@Sun.COM lgrp = pg_cmt_lgrp_create(lgrp_handle); 5708408SEric.Saxe@Sun.COM 5718408SEric.Saxe@Sun.COM /* 5728906SEric.Saxe@Sun.COM * Ascendingly sort the PGs in the lineage by number of CPUs 5738906SEric.Saxe@Sun.COM */ 5748906SEric.Saxe@Sun.COM pg_cmt_hier_sort(cpu_cmt_hier, levels); 5758906SEric.Saxe@Sun.COM 5768906SEric.Saxe@Sun.COM /* 5778906SEric.Saxe@Sun.COM * Examine the lineage and validate it. 5788906SEric.Saxe@Sun.COM * This routine will also try to fix the lineage along with the 5798906SEric.Saxe@Sun.COM * rest of the PG hierarchy should it detect an issue. 5808906SEric.Saxe@Sun.COM * 5818906SEric.Saxe@Sun.COM * If it returns -1, an unrecoverable error has happened and we 5828906SEric.Saxe@Sun.COM * need to return. 5838906SEric.Saxe@Sun.COM */ 5848906SEric.Saxe@Sun.COM if (pg_cmt_lineage_validate(cpu_cmt_hier, &levels) < 0) 5858906SEric.Saxe@Sun.COM return; 5868906SEric.Saxe@Sun.COM 5878906SEric.Saxe@Sun.COM /* 5888906SEric.Saxe@Sun.COM * For existing PGs in the lineage, verify that the parent is 5898906SEric.Saxe@Sun.COM * correct, as the generation in the lineage may have changed 5908906SEric.Saxe@Sun.COM * as a result of the sorting. Start the traversal at the top 5918906SEric.Saxe@Sun.COM * of the lineage, moving down. 5928906SEric.Saxe@Sun.COM */ 5938906SEric.Saxe@Sun.COM for (level = levels - 1; level >= 0; ) { 5948906SEric.Saxe@Sun.COM int reorg; 5958906SEric.Saxe@Sun.COM 5968906SEric.Saxe@Sun.COM reorg = 0; 5978906SEric.Saxe@Sun.COM pg = cpu_cmt_hier[level]; 5988906SEric.Saxe@Sun.COM 5998906SEric.Saxe@Sun.COM /* 6008906SEric.Saxe@Sun.COM * Promote PGs at an incorrect generation into place. 6018906SEric.Saxe@Sun.COM */ 6028906SEric.Saxe@Sun.COM while (pg->cmt_parent && 6038906SEric.Saxe@Sun.COM pg->cmt_parent != cpu_cmt_hier[level + 1]) { 6048906SEric.Saxe@Sun.COM cmt_hier_promote(pg); 6058906SEric.Saxe@Sun.COM reorg++; 6068906SEric.Saxe@Sun.COM } 6078906SEric.Saxe@Sun.COM if (reorg > 0) 6088906SEric.Saxe@Sun.COM level = levels - 1; 6098906SEric.Saxe@Sun.COM else 6108906SEric.Saxe@Sun.COM level--; 6118906SEric.Saxe@Sun.COM } 6128906SEric.Saxe@Sun.COM 6138906SEric.Saxe@Sun.COM /* 6148408SEric.Saxe@Sun.COM * For each of the PGs in the CPU's lineage: 6158906SEric.Saxe@Sun.COM * - Add an entry in the CPU sorted CMT PG group 6168906SEric.Saxe@Sun.COM * which is used for top down CMT load balancing 6173434Sesaxe * - Tie the PG into the CMT hierarchy by connecting 6183434Sesaxe * it to it's parent and siblings. 6193434Sesaxe */ 6208906SEric.Saxe@Sun.COM for (level = 0; level < levels; level++) { 6213434Sesaxe uint_t children; 6223434Sesaxe int err; 6233434Sesaxe 6243434Sesaxe pg = cpu_cmt_hier[level]; 6258906SEric.Saxe@Sun.COM err = group_add_at(cmt_pgs, pg, levels - level - 1); 6263434Sesaxe ASSERT(err == 0); 6273434Sesaxe 6283434Sesaxe if (level == 0) 6293434Sesaxe cp->cpu_pg->cmt_lineage = (pg_t *)pg; 6303434Sesaxe 6313434Sesaxe if (pg->cmt_siblings != NULL) { 6323434Sesaxe /* Already initialized */ 6333434Sesaxe ASSERT(pg->cmt_parent == NULL || 6343434Sesaxe pg->cmt_parent == cpu_cmt_hier[level + 1]); 6353434Sesaxe ASSERT(pg->cmt_siblings == &lgrp->cl_pgs || 6365933Sjb145095 ((pg->cmt_parent != NULL) && 6375933Sjb145095 pg->cmt_siblings == pg->cmt_parent->cmt_children)); 6383434Sesaxe continue; 6393434Sesaxe } 6403434Sesaxe 6418906SEric.Saxe@Sun.COM if ((level + 1) == levels) { 6423434Sesaxe pg->cmt_parent = NULL; 6438408SEric.Saxe@Sun.COM 6443434Sesaxe pg->cmt_siblings = &lgrp->cl_pgs; 6453434Sesaxe children = ++lgrp->cl_npgs; 6468906SEric.Saxe@Sun.COM if (cmt_root != lgrp) 6478906SEric.Saxe@Sun.COM cmt_root->cl_npgs++; 6483434Sesaxe } else { 6493434Sesaxe pg->cmt_parent = cpu_cmt_hier[level + 1]; 6503434Sesaxe 6513434Sesaxe /* 6523434Sesaxe * A good parent keeps track of their children. 6533434Sesaxe * The parent's children group is also the PG's 6543434Sesaxe * siblings. 6553434Sesaxe */ 6563434Sesaxe if (pg->cmt_parent->cmt_children == NULL) { 6573434Sesaxe pg->cmt_parent->cmt_children = 6583434Sesaxe kmem_zalloc(sizeof (group_t), KM_SLEEP); 6593434Sesaxe group_create(pg->cmt_parent->cmt_children); 6603434Sesaxe } 6613434Sesaxe pg->cmt_siblings = pg->cmt_parent->cmt_children; 6623434Sesaxe children = ++pg->cmt_parent->cmt_nchildren; 6633434Sesaxe } 6648408SEric.Saxe@Sun.COM 6653434Sesaxe group_expand(pg->cmt_siblings, children); 6668408SEric.Saxe@Sun.COM group_expand(&cmt_root->cl_pgs, cmt_root->cl_npgs); 6673434Sesaxe } 6683434Sesaxe 6693434Sesaxe /* 6703434Sesaxe * Cache the chip and core IDs in the cpu_t->cpu_physid structure 6713434Sesaxe * for fast lookups later. 6723434Sesaxe */ 6733434Sesaxe if (cp->cpu_physid) { 6743434Sesaxe cp->cpu_physid->cpu_chipid = 6753434Sesaxe pg_plat_hw_instance_id(cp, PGHW_CHIP); 6763434Sesaxe cp->cpu_physid->cpu_coreid = pg_plat_get_core_id(cp); 6773434Sesaxe 6783434Sesaxe /* 6793434Sesaxe * If this cpu has a PG representing shared cache, then set 6803434Sesaxe * cpu_cacheid to that PG's logical id 6813434Sesaxe */ 6823434Sesaxe if (pg_cache) 6833434Sesaxe cp->cpu_physid->cpu_cacheid = pg_cache->pg_id; 6843434Sesaxe } 6853434Sesaxe 6863434Sesaxe /* CPU0 only initialization */ 6873434Sesaxe if (is_cpu0) { 6883434Sesaxe pg_cmt_cpu_startup(cp); 6893434Sesaxe is_cpu0 = 0; 6903676Sesaxe cpu0_lgrp = lgrp; 6913434Sesaxe } 6923434Sesaxe 6933434Sesaxe } 6943434Sesaxe 6953434Sesaxe /* 6963434Sesaxe * Class callback when a CPU is leaving the system (deletion) 6973434Sesaxe */ 6983434Sesaxe static void 6993434Sesaxe pg_cmt_cpu_fini(cpu_t *cp) 7003434Sesaxe { 7013434Sesaxe group_iter_t i; 7023434Sesaxe pg_cmt_t *pg; 7033434Sesaxe group_t *pgs, *cmt_pgs; 7043434Sesaxe lgrp_handle_t lgrp_handle; 7053434Sesaxe cmt_lgrp_t *lgrp; 7063434Sesaxe 7078906SEric.Saxe@Sun.COM if (cmt_sched_disabled) 7088906SEric.Saxe@Sun.COM return; 7098906SEric.Saxe@Sun.COM 7103434Sesaxe pgs = &cp->cpu_pg->pgs; 7113434Sesaxe cmt_pgs = &cp->cpu_pg->cmt_pgs; 7123434Sesaxe 7133434Sesaxe /* 7143434Sesaxe * Find the lgroup that encapsulates this CPU's CMT hierarchy 7153434Sesaxe */ 7163434Sesaxe lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id); 7173676Sesaxe 7183434Sesaxe lgrp = pg_cmt_find_lgrp(lgrp_handle); 7198689SEric.Saxe@Sun.COM if (ncpus == 1 && lgrp != cpu0_lgrp) { 7203676Sesaxe /* 7218689SEric.Saxe@Sun.COM * One might wonder how we could be deconfiguring the 7228689SEric.Saxe@Sun.COM * only CPU in the system. 7233676Sesaxe * 7248689SEric.Saxe@Sun.COM * On Starcat systems when null_proc_lpa is detected, 7258689SEric.Saxe@Sun.COM * the boot CPU (which is already configured into a leaf 7268689SEric.Saxe@Sun.COM * lgroup), is moved into the root lgroup. This is done by 7278689SEric.Saxe@Sun.COM * deconfiguring it from both lgroups and processor 7288689SEric.Saxe@Sun.COM * groups), and then later reconfiguring it back in. This 7298689SEric.Saxe@Sun.COM * call to pg_cmt_cpu_fini() is part of that deconfiguration. 7308689SEric.Saxe@Sun.COM * 7318689SEric.Saxe@Sun.COM * This special case is detected by noting that the platform 7328689SEric.Saxe@Sun.COM * has changed the CPU's lgrp affiliation (since it now 7338689SEric.Saxe@Sun.COM * belongs in the root). In this case, use the cmt_lgrp_t 7348689SEric.Saxe@Sun.COM * cached for the boot CPU, since this is what needs to be 7358689SEric.Saxe@Sun.COM * torn down. 7363676Sesaxe */ 7373676Sesaxe lgrp = cpu0_lgrp; 7383676Sesaxe } 7393434Sesaxe 7408689SEric.Saxe@Sun.COM ASSERT(lgrp != NULL); 7418689SEric.Saxe@Sun.COM 7423434Sesaxe /* 7433434Sesaxe * First, clean up anything load balancing specific for each of 7443434Sesaxe * the CPU's PGs that participated in CMT load balancing 7453434Sesaxe */ 7463434Sesaxe pg = (pg_cmt_t *)cp->cpu_pg->cmt_lineage; 7473434Sesaxe while (pg != NULL) { 7483434Sesaxe 7493434Sesaxe /* 7503434Sesaxe * Remove the PG from the CPU's load balancing lineage 7513434Sesaxe */ 7523434Sesaxe (void) group_remove(cmt_pgs, pg, GRP_RESIZE); 7533434Sesaxe 7543434Sesaxe /* 7553434Sesaxe * If it's about to become empty, destroy it's children 7563434Sesaxe * group, and remove it's reference from it's siblings. 7573434Sesaxe * This is done here (rather than below) to avoid removing 7583434Sesaxe * our reference from a PG that we just eliminated. 7593434Sesaxe */ 7603434Sesaxe if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 1) { 7613434Sesaxe if (pg->cmt_children != NULL) 7623434Sesaxe group_destroy(pg->cmt_children); 7633434Sesaxe if (pg->cmt_siblings != NULL) { 7643434Sesaxe if (pg->cmt_siblings == &lgrp->cl_pgs) 7653434Sesaxe lgrp->cl_npgs--; 7663434Sesaxe else 7673434Sesaxe pg->cmt_parent->cmt_nchildren--; 7683434Sesaxe } 7693434Sesaxe } 7703434Sesaxe pg = pg->cmt_parent; 7713434Sesaxe } 7723434Sesaxe ASSERT(GROUP_SIZE(cmt_pgs) == 0); 7733434Sesaxe 7743434Sesaxe /* 7753434Sesaxe * Now that the load balancing lineage updates have happened, 7763434Sesaxe * remove the CPU from all it's PGs (destroying any that become 7773434Sesaxe * empty). 7783434Sesaxe */ 7793434Sesaxe group_iter_init(&i); 7803434Sesaxe while ((pg = group_iterate(pgs, &i)) != NULL) { 7813434Sesaxe if (IS_CMT_PG(pg) == 0) 7823434Sesaxe continue; 7833434Sesaxe 7843434Sesaxe pg_cpu_delete((pg_t *)pg, cp); 7853434Sesaxe /* 7863434Sesaxe * Deleting the CPU from the PG changes the CPU's 7873434Sesaxe * PG group over which we are actively iterating 7883434Sesaxe * Re-initialize the iteration 7893434Sesaxe */ 7903434Sesaxe group_iter_init(&i); 7913434Sesaxe 7923434Sesaxe if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 0) { 7933434Sesaxe 7943434Sesaxe /* 7953434Sesaxe * The PG has become zero sized, so destroy it. 7963434Sesaxe */ 7973434Sesaxe group_destroy(&pg->cmt_cpus_actv); 7983434Sesaxe bitset_fini(&pg->cmt_cpus_actv_set); 7993434Sesaxe pghw_fini((pghw_t *)pg); 8003434Sesaxe 8013434Sesaxe pg_destroy((pg_t *)pg); 8023434Sesaxe } 8033434Sesaxe } 8043434Sesaxe } 8053434Sesaxe 8063434Sesaxe /* 8073434Sesaxe * Class callback when a CPU is entering a cpu partition 8083434Sesaxe */ 8093434Sesaxe static void 8103434Sesaxe pg_cmt_cpupart_in(cpu_t *cp, cpupart_t *pp) 8113434Sesaxe { 8123434Sesaxe group_t *pgs; 8133434Sesaxe pg_t *pg; 8143434Sesaxe group_iter_t i; 8153434Sesaxe 8163434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 8173434Sesaxe 8188906SEric.Saxe@Sun.COM if (cmt_sched_disabled) 8198906SEric.Saxe@Sun.COM return; 8208906SEric.Saxe@Sun.COM 8213434Sesaxe pgs = &cp->cpu_pg->pgs; 8223434Sesaxe 8233434Sesaxe /* 8243434Sesaxe * Ensure that the new partition's PG bitset 8253434Sesaxe * is large enough for all CMT PG's to which cp 8263434Sesaxe * belongs 8273434Sesaxe */ 8283434Sesaxe group_iter_init(&i); 8293434Sesaxe while ((pg = group_iterate(pgs, &i)) != NULL) { 8303434Sesaxe if (IS_CMT_PG(pg) == 0) 8313434Sesaxe continue; 8323434Sesaxe 8333434Sesaxe if (bitset_capacity(&pp->cp_cmt_pgs) <= pg->pg_id) 8343434Sesaxe bitset_resize(&pp->cp_cmt_pgs, pg->pg_id + 1); 8353434Sesaxe } 8363434Sesaxe } 8373434Sesaxe 8383434Sesaxe /* 8393434Sesaxe * Class callback when a CPU is actually moving partitions 8403434Sesaxe */ 8413434Sesaxe static void 8423434Sesaxe pg_cmt_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp) 8433434Sesaxe { 8443434Sesaxe cpu_t *cpp; 8453434Sesaxe group_t *pgs; 8463434Sesaxe pg_t *pg; 8473434Sesaxe group_iter_t pg_iter; 8483434Sesaxe pg_cpu_itr_t cpu_iter; 8493434Sesaxe boolean_t found; 8503434Sesaxe 8513434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 8523434Sesaxe 8538906SEric.Saxe@Sun.COM if (cmt_sched_disabled) 8548906SEric.Saxe@Sun.COM return; 8558906SEric.Saxe@Sun.COM 8563434Sesaxe pgs = &cp->cpu_pg->pgs; 8573434Sesaxe group_iter_init(&pg_iter); 8583434Sesaxe 8593434Sesaxe /* 8603434Sesaxe * Iterate over the CPUs CMT PGs 8613434Sesaxe */ 8623434Sesaxe while ((pg = group_iterate(pgs, &pg_iter)) != NULL) { 8633434Sesaxe 8643434Sesaxe if (IS_CMT_PG(pg) == 0) 8653434Sesaxe continue; 8663434Sesaxe 8673434Sesaxe /* 8683434Sesaxe * Add the PG to the bitset in the new partition. 8693434Sesaxe */ 8703434Sesaxe bitset_add(&newpp->cp_cmt_pgs, pg->pg_id); 8713434Sesaxe 8723434Sesaxe /* 8733434Sesaxe * Remove the PG from the bitset in the old partition 8743434Sesaxe * if the last of the PG's CPUs have left. 8753434Sesaxe */ 8763434Sesaxe found = B_FALSE; 8773434Sesaxe PG_CPU_ITR_INIT(pg, cpu_iter); 8783434Sesaxe while ((cpp = pg_cpu_next(&cpu_iter)) != NULL) { 8793434Sesaxe if (cpp == cp) 8803434Sesaxe continue; 8813676Sesaxe if (CPU_ACTIVE(cpp) && 8823676Sesaxe cpp->cpu_part->cp_id == oldpp->cp_id) { 8833434Sesaxe found = B_TRUE; 8843434Sesaxe break; 8853434Sesaxe } 8863434Sesaxe } 8873434Sesaxe if (!found) 8883434Sesaxe bitset_del(&cp->cpu_part->cp_cmt_pgs, pg->pg_id); 8893434Sesaxe } 8903434Sesaxe } 8913434Sesaxe 8923434Sesaxe /* 8933434Sesaxe * Class callback when a CPU becomes active (online) 8943434Sesaxe * 8953434Sesaxe * This is called in a context where CPUs are paused 8963434Sesaxe */ 8973434Sesaxe static void 8983434Sesaxe pg_cmt_cpu_active(cpu_t *cp) 8993434Sesaxe { 9003434Sesaxe int err; 9013434Sesaxe group_iter_t i; 9023434Sesaxe pg_cmt_t *pg; 9033434Sesaxe group_t *pgs; 9043434Sesaxe 9053434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 9063434Sesaxe 9078906SEric.Saxe@Sun.COM if (cmt_sched_disabled) 9088906SEric.Saxe@Sun.COM return; 9098906SEric.Saxe@Sun.COM 9103434Sesaxe pgs = &cp->cpu_pg->pgs; 9113434Sesaxe group_iter_init(&i); 9123434Sesaxe 9133434Sesaxe /* 9143434Sesaxe * Iterate over the CPU's PGs 9153434Sesaxe */ 9163434Sesaxe while ((pg = group_iterate(pgs, &i)) != NULL) { 9173434Sesaxe 9183434Sesaxe if (IS_CMT_PG(pg) == 0) 9193434Sesaxe continue; 9203434Sesaxe 9213434Sesaxe err = group_add(&pg->cmt_cpus_actv, cp, GRP_NORESIZE); 9223434Sesaxe ASSERT(err == 0); 9233434Sesaxe 9243434Sesaxe /* 9253434Sesaxe * If this is the first active CPU in the PG, and it 9263434Sesaxe * represents a hardware sharing relationship over which 9273434Sesaxe * CMT load balancing is performed, add it as a candidate 9283434Sesaxe * for balancing with it's siblings. 9293434Sesaxe */ 9303434Sesaxe if (GROUP_SIZE(&pg->cmt_cpus_actv) == 1 && 9318906SEric.Saxe@Sun.COM (pg->cmt_policy & (CMT_BALANCE | CMT_COALESCE))) { 9323434Sesaxe err = group_add(pg->cmt_siblings, pg, GRP_NORESIZE); 9333434Sesaxe ASSERT(err == 0); 9348408SEric.Saxe@Sun.COM 9358408SEric.Saxe@Sun.COM /* 9368408SEric.Saxe@Sun.COM * If this is a top level PG, add it as a balancing 9378906SEric.Saxe@Sun.COM * candidate when balancing within the root lgroup. 9388408SEric.Saxe@Sun.COM */ 9398906SEric.Saxe@Sun.COM if (pg->cmt_parent == NULL && 9408906SEric.Saxe@Sun.COM pg->cmt_siblings != &cmt_root->cl_pgs) { 9418408SEric.Saxe@Sun.COM err = group_add(&cmt_root->cl_pgs, pg, 9428408SEric.Saxe@Sun.COM GRP_NORESIZE); 9438408SEric.Saxe@Sun.COM ASSERT(err == 0); 9448408SEric.Saxe@Sun.COM } 9453434Sesaxe } 9463434Sesaxe 9473434Sesaxe /* 9483434Sesaxe * Notate the CPU in the PGs active CPU bitset. 9493434Sesaxe * Also notate the PG as being active in it's associated 9503434Sesaxe * partition 9513434Sesaxe */ 9523434Sesaxe bitset_add(&pg->cmt_cpus_actv_set, cp->cpu_seqid); 9533434Sesaxe bitset_add(&cp->cpu_part->cp_cmt_pgs, ((pg_t *)pg)->pg_id); 9543434Sesaxe } 9553434Sesaxe } 9563434Sesaxe 9573434Sesaxe /* 9583434Sesaxe * Class callback when a CPU goes inactive (offline) 9593434Sesaxe * 9603434Sesaxe * This is called in a context where CPUs are paused 9613434Sesaxe */ 9623434Sesaxe static void 9633434Sesaxe pg_cmt_cpu_inactive(cpu_t *cp) 9643434Sesaxe { 9653434Sesaxe int err; 9663434Sesaxe group_t *pgs; 9673434Sesaxe pg_cmt_t *pg; 9683434Sesaxe cpu_t *cpp; 9693434Sesaxe group_iter_t i; 9703434Sesaxe pg_cpu_itr_t cpu_itr; 9713434Sesaxe boolean_t found; 9723434Sesaxe 9733434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 9743434Sesaxe 9758906SEric.Saxe@Sun.COM if (cmt_sched_disabled) 9768906SEric.Saxe@Sun.COM return; 9778906SEric.Saxe@Sun.COM 9783434Sesaxe pgs = &cp->cpu_pg->pgs; 9793434Sesaxe group_iter_init(&i); 9803434Sesaxe 9813434Sesaxe while ((pg = group_iterate(pgs, &i)) != NULL) { 9823434Sesaxe 9833434Sesaxe if (IS_CMT_PG(pg) == 0) 9843434Sesaxe continue; 9853434Sesaxe 9863434Sesaxe /* 9873434Sesaxe * Remove the CPU from the CMT PGs active CPU group 9883434Sesaxe * bitmap 9893434Sesaxe */ 9903434Sesaxe err = group_remove(&pg->cmt_cpus_actv, cp, GRP_NORESIZE); 9913434Sesaxe ASSERT(err == 0); 9923434Sesaxe 9933434Sesaxe bitset_del(&pg->cmt_cpus_actv_set, cp->cpu_seqid); 9943434Sesaxe 9953434Sesaxe /* 9963434Sesaxe * If there are no more active CPUs in this PG over which 9973434Sesaxe * load was balanced, remove it as a balancing candidate. 9983434Sesaxe */ 9993434Sesaxe if (GROUP_SIZE(&pg->cmt_cpus_actv) == 0 && 10008906SEric.Saxe@Sun.COM (pg->cmt_policy & (CMT_BALANCE | CMT_COALESCE))) { 10013434Sesaxe err = group_remove(pg->cmt_siblings, pg, GRP_NORESIZE); 10023434Sesaxe ASSERT(err == 0); 10038408SEric.Saxe@Sun.COM 10048906SEric.Saxe@Sun.COM if (pg->cmt_parent == NULL && 10058906SEric.Saxe@Sun.COM pg->cmt_siblings != &cmt_root->cl_pgs) { 10068408SEric.Saxe@Sun.COM err = group_remove(&cmt_root->cl_pgs, pg, 10078408SEric.Saxe@Sun.COM GRP_NORESIZE); 10088408SEric.Saxe@Sun.COM ASSERT(err == 0); 10098408SEric.Saxe@Sun.COM } 10103434Sesaxe } 10113434Sesaxe 10123434Sesaxe /* 10133434Sesaxe * Assert the number of active CPUs does not exceed 10143434Sesaxe * the total number of CPUs in the PG 10153434Sesaxe */ 10163434Sesaxe ASSERT(GROUP_SIZE(&pg->cmt_cpus_actv) <= 10173434Sesaxe GROUP_SIZE(&((pg_t *)pg)->pg_cpus)); 10183434Sesaxe 10193434Sesaxe /* 10203434Sesaxe * Update the PG bitset in the CPU's old partition 10213434Sesaxe */ 10223434Sesaxe found = B_FALSE; 10233434Sesaxe PG_CPU_ITR_INIT(pg, cpu_itr); 10243434Sesaxe while ((cpp = pg_cpu_next(&cpu_itr)) != NULL) { 10253434Sesaxe if (cpp == cp) 10263434Sesaxe continue; 10273676Sesaxe if (CPU_ACTIVE(cpp) && 10283676Sesaxe cpp->cpu_part->cp_id == cp->cpu_part->cp_id) { 10293434Sesaxe found = B_TRUE; 10303434Sesaxe break; 10313434Sesaxe } 10323434Sesaxe } 10333434Sesaxe if (!found) { 10343434Sesaxe bitset_del(&cp->cpu_part->cp_cmt_pgs, 10353434Sesaxe ((pg_t *)pg)->pg_id); 10363434Sesaxe } 10373434Sesaxe } 10383434Sesaxe } 10393434Sesaxe 10403434Sesaxe /* 10413434Sesaxe * Return non-zero if the CPU belongs in the given PG 10423434Sesaxe */ 10433434Sesaxe static int 10443434Sesaxe pg_cmt_cpu_belongs(pg_t *pg, cpu_t *cp) 10453434Sesaxe { 10463434Sesaxe cpu_t *pg_cpu; 10473434Sesaxe 10483434Sesaxe pg_cpu = GROUP_ACCESS(&pg->pg_cpus, 0); 10493434Sesaxe 10503434Sesaxe ASSERT(pg_cpu != NULL); 10513434Sesaxe 10523434Sesaxe /* 10533434Sesaxe * The CPU belongs if, given the nature of the hardware sharing 10543434Sesaxe * relationship represented by the PG, the CPU has that 10553434Sesaxe * relationship with some other CPU already in the PG 10563434Sesaxe */ 10573434Sesaxe if (pg_plat_cpus_share(cp, pg_cpu, ((pghw_t *)pg)->pghw_hw)) 10583434Sesaxe return (1); 10593434Sesaxe 10603434Sesaxe return (0); 10613434Sesaxe } 10623434Sesaxe 10633434Sesaxe /* 10648906SEric.Saxe@Sun.COM * Sort the CPUs CMT hierarchy, where "size" is the number of levels. 10653434Sesaxe */ 10663434Sesaxe static void 10678906SEric.Saxe@Sun.COM pg_cmt_hier_sort(pg_cmt_t **hier, int size) 10683434Sesaxe { 10698906SEric.Saxe@Sun.COM int i, j, inc; 10708906SEric.Saxe@Sun.COM pg_t *tmp; 10718906SEric.Saxe@Sun.COM pg_t **h = (pg_t **)hier; 10723434Sesaxe 10738906SEric.Saxe@Sun.COM /* 10748906SEric.Saxe@Sun.COM * First sort by number of CPUs 10758906SEric.Saxe@Sun.COM */ 10768906SEric.Saxe@Sun.COM inc = size / 2; 10778906SEric.Saxe@Sun.COM while (inc > 0) { 10788906SEric.Saxe@Sun.COM for (i = inc; i < size; i++) { 10798906SEric.Saxe@Sun.COM j = i; 10808906SEric.Saxe@Sun.COM tmp = h[i]; 10818906SEric.Saxe@Sun.COM while ((j >= inc) && 10828906SEric.Saxe@Sun.COM (PG_NUM_CPUS(h[j - inc]) > PG_NUM_CPUS(tmp))) { 10838906SEric.Saxe@Sun.COM h[j] = h[j - inc]; 10848906SEric.Saxe@Sun.COM j = j - inc; 10853434Sesaxe } 10868906SEric.Saxe@Sun.COM h[j] = tmp; 10873434Sesaxe } 10888906SEric.Saxe@Sun.COM if (inc == 2) 10898906SEric.Saxe@Sun.COM inc = 1; 10908906SEric.Saxe@Sun.COM else 10918906SEric.Saxe@Sun.COM inc = (inc * 5) / 11; 10928906SEric.Saxe@Sun.COM } 10938906SEric.Saxe@Sun.COM 10948906SEric.Saxe@Sun.COM /* 10958906SEric.Saxe@Sun.COM * Break ties by asking the platform. 10968906SEric.Saxe@Sun.COM * Determine if h[i] outranks h[i + 1] and if so, swap them. 10978906SEric.Saxe@Sun.COM */ 10988906SEric.Saxe@Sun.COM for (i = 0; i < size - 1; i++) { 10998906SEric.Saxe@Sun.COM if ((PG_NUM_CPUS(h[i]) == PG_NUM_CPUS(h[i + 1])) && 11008906SEric.Saxe@Sun.COM pg_cmt_hier_rank(hier[i], hier[i + 1]) == hier[i]) { 11018906SEric.Saxe@Sun.COM tmp = h[i]; 11028906SEric.Saxe@Sun.COM h[i] = h[i + 1]; 11038906SEric.Saxe@Sun.COM h[i + 1] = tmp; 11048906SEric.Saxe@Sun.COM } 11053434Sesaxe } 11063434Sesaxe } 11073434Sesaxe 11083434Sesaxe /* 11093434Sesaxe * Return a cmt_lgrp_t * given an lgroup handle. 11103434Sesaxe */ 11113434Sesaxe static cmt_lgrp_t * 11123434Sesaxe pg_cmt_find_lgrp(lgrp_handle_t hand) 11133434Sesaxe { 11143434Sesaxe cmt_lgrp_t *lgrp; 11153434Sesaxe 11163434Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 11173434Sesaxe 11183434Sesaxe lgrp = cmt_lgrps; 11193434Sesaxe while (lgrp != NULL) { 11203434Sesaxe if (lgrp->cl_hand == hand) 11213676Sesaxe break; 11223434Sesaxe lgrp = lgrp->cl_next; 11233434Sesaxe } 11243676Sesaxe return (lgrp); 11253676Sesaxe } 11263434Sesaxe 11273676Sesaxe /* 11283676Sesaxe * Create a cmt_lgrp_t with the specified handle. 11293676Sesaxe */ 11303676Sesaxe static cmt_lgrp_t * 11313676Sesaxe pg_cmt_lgrp_create(lgrp_handle_t hand) 11323676Sesaxe { 11333676Sesaxe cmt_lgrp_t *lgrp; 11343676Sesaxe 11353676Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 11363676Sesaxe 11373434Sesaxe lgrp = kmem_zalloc(sizeof (cmt_lgrp_t), KM_SLEEP); 11383434Sesaxe 11393434Sesaxe lgrp->cl_hand = hand; 11403434Sesaxe lgrp->cl_npgs = 0; 11413434Sesaxe lgrp->cl_next = cmt_lgrps; 11423434Sesaxe cmt_lgrps = lgrp; 11433434Sesaxe group_create(&lgrp->cl_pgs); 11443434Sesaxe 11453434Sesaxe return (lgrp); 11463434Sesaxe } 11478408SEric.Saxe@Sun.COM 11488408SEric.Saxe@Sun.COM /* 11498906SEric.Saxe@Sun.COM * Interfaces to enable and disable power aware dispatching 11508906SEric.Saxe@Sun.COM * The caller must be holding cpu_lock. 11518408SEric.Saxe@Sun.COM * 11528906SEric.Saxe@Sun.COM * Return 0 on success and -1 on failure. 11538408SEric.Saxe@Sun.COM */ 11548906SEric.Saxe@Sun.COM int 11558906SEric.Saxe@Sun.COM cmt_pad_enable(pghw_type_t type) 11568408SEric.Saxe@Sun.COM { 11578906SEric.Saxe@Sun.COM group_t *hwset; 11588906SEric.Saxe@Sun.COM group_iter_t iter; 11598906SEric.Saxe@Sun.COM pg_cmt_t *pg; 11608906SEric.Saxe@Sun.COM 11618906SEric.Saxe@Sun.COM ASSERT(PGHW_IS_PM_DOMAIN(type)); 11628906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock)); 11638408SEric.Saxe@Sun.COM 11648906SEric.Saxe@Sun.COM if ((hwset = pghw_set_lookup(type)) == NULL || 11658906SEric.Saxe@Sun.COM cmt_hw_blacklisted[type]) { 11668906SEric.Saxe@Sun.COM /* 11678906SEric.Saxe@Sun.COM * Unable to find any instances of the specified type 11688906SEric.Saxe@Sun.COM * of power domain, or the power domains have been blacklisted. 11698906SEric.Saxe@Sun.COM */ 11708906SEric.Saxe@Sun.COM return (-1); 11718906SEric.Saxe@Sun.COM } 11728408SEric.Saxe@Sun.COM 11738408SEric.Saxe@Sun.COM /* 11748906SEric.Saxe@Sun.COM * Iterate over the power domains, setting the default dispatcher 11758906SEric.Saxe@Sun.COM * policy for power/performance optimization. 11768906SEric.Saxe@Sun.COM * 11778906SEric.Saxe@Sun.COM * Simply setting the policy isn't enough in the case where the power 11788906SEric.Saxe@Sun.COM * domain is an only child of another PG. Because the dispatcher walks 11798906SEric.Saxe@Sun.COM * the PG hierarchy in a top down fashion, the higher up PG's policy 11808906SEric.Saxe@Sun.COM * will dominate. So promote the power domain above it's parent if both 11818906SEric.Saxe@Sun.COM * PG and it's parent have the same CPUs to ensure it's policy 11828906SEric.Saxe@Sun.COM * dominates. 11838408SEric.Saxe@Sun.COM */ 11848906SEric.Saxe@Sun.COM group_iter_init(&iter); 11858906SEric.Saxe@Sun.COM while ((pg = group_iterate(hwset, &iter)) != NULL) { 11868906SEric.Saxe@Sun.COM /* 11878906SEric.Saxe@Sun.COM * If the power domain is an only child to a parent 11888906SEric.Saxe@Sun.COM * not implementing the same policy, promote the child 11898906SEric.Saxe@Sun.COM * above the parent to activate the policy. 11908906SEric.Saxe@Sun.COM */ 11918906SEric.Saxe@Sun.COM pg->cmt_policy = pg_cmt_policy(((pghw_t *)pg)->pghw_hw); 11928906SEric.Saxe@Sun.COM while ((pg->cmt_parent != NULL) && 11938906SEric.Saxe@Sun.COM (pg->cmt_parent->cmt_policy != pg->cmt_policy) && 11948906SEric.Saxe@Sun.COM (PG_NUM_CPUS((pg_t *)pg) == 11958906SEric.Saxe@Sun.COM PG_NUM_CPUS((pg_t *)pg->cmt_parent))) { 11968906SEric.Saxe@Sun.COM cmt_hier_promote(pg); 11978906SEric.Saxe@Sun.COM } 11988906SEric.Saxe@Sun.COM } 11998906SEric.Saxe@Sun.COM 12008906SEric.Saxe@Sun.COM return (0); 12018906SEric.Saxe@Sun.COM } 12028408SEric.Saxe@Sun.COM 12038906SEric.Saxe@Sun.COM int 12048906SEric.Saxe@Sun.COM cmt_pad_disable(pghw_type_t type) 12058906SEric.Saxe@Sun.COM { 12068906SEric.Saxe@Sun.COM group_t *hwset; 12078906SEric.Saxe@Sun.COM group_iter_t iter; 12088906SEric.Saxe@Sun.COM pg_cmt_t *pg; 12098906SEric.Saxe@Sun.COM pg_cmt_t *child; 12108906SEric.Saxe@Sun.COM 12118906SEric.Saxe@Sun.COM ASSERT(PGHW_IS_PM_DOMAIN(type)); 12128906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock)); 12138906SEric.Saxe@Sun.COM 12148906SEric.Saxe@Sun.COM if ((hwset = pghw_set_lookup(type)) == NULL) { 12158906SEric.Saxe@Sun.COM /* 12168906SEric.Saxe@Sun.COM * Unable to find any instances of the specified type of 12178906SEric.Saxe@Sun.COM * power domain. 12188906SEric.Saxe@Sun.COM */ 12198906SEric.Saxe@Sun.COM return (-1); 12208906SEric.Saxe@Sun.COM } 12218408SEric.Saxe@Sun.COM /* 12228906SEric.Saxe@Sun.COM * Iterate over the power domains, setting the default dispatcher 12238906SEric.Saxe@Sun.COM * policy for performance optimization (load balancing). 12248408SEric.Saxe@Sun.COM */ 12258906SEric.Saxe@Sun.COM group_iter_init(&iter); 12268906SEric.Saxe@Sun.COM while ((pg = group_iterate(hwset, &iter)) != NULL) { 12278408SEric.Saxe@Sun.COM 12288408SEric.Saxe@Sun.COM /* 12298906SEric.Saxe@Sun.COM * If the power domain has an only child that implements 12308906SEric.Saxe@Sun.COM * policy other than load balancing, promote the child 12318906SEric.Saxe@Sun.COM * above the power domain to ensure it's policy dominates. 12328408SEric.Saxe@Sun.COM */ 1233*8969SEric.Saxe@Sun.COM if (pg->cmt_children != NULL && 1234*8969SEric.Saxe@Sun.COM GROUP_SIZE(pg->cmt_children) == 1) { 12358906SEric.Saxe@Sun.COM child = GROUP_ACCESS(pg->cmt_children, 0); 12368906SEric.Saxe@Sun.COM if ((child->cmt_policy & CMT_BALANCE) == 0) { 12378906SEric.Saxe@Sun.COM cmt_hier_promote(child); 12388906SEric.Saxe@Sun.COM } 12398906SEric.Saxe@Sun.COM } 12408906SEric.Saxe@Sun.COM pg->cmt_policy = CMT_BALANCE; 12418906SEric.Saxe@Sun.COM } 12428906SEric.Saxe@Sun.COM return (0); 12438906SEric.Saxe@Sun.COM } 12448906SEric.Saxe@Sun.COM 12458906SEric.Saxe@Sun.COM /* ARGSUSED */ 12468906SEric.Saxe@Sun.COM static void 12478906SEric.Saxe@Sun.COM cmt_ev_thread_swtch(pg_t *pg, cpu_t *cp, hrtime_t now, kthread_t *old, 12488906SEric.Saxe@Sun.COM kthread_t *new) 12498906SEric.Saxe@Sun.COM { 12508906SEric.Saxe@Sun.COM pg_cmt_t *cmt_pg = (pg_cmt_t *)pg; 12518906SEric.Saxe@Sun.COM 12528906SEric.Saxe@Sun.COM if (old == cp->cpu_idle_thread) { 12538906SEric.Saxe@Sun.COM atomic_add_32(&cmt_pg->cmt_utilization, 1); 12548906SEric.Saxe@Sun.COM } else if (new == cp->cpu_idle_thread) { 12558906SEric.Saxe@Sun.COM atomic_add_32(&cmt_pg->cmt_utilization, -1); 12568906SEric.Saxe@Sun.COM } 12578906SEric.Saxe@Sun.COM } 12588906SEric.Saxe@Sun.COM 12598906SEric.Saxe@Sun.COM /* 12608906SEric.Saxe@Sun.COM * Macro to test whether a thread is currently runnable on a CPU in a PG. 12618906SEric.Saxe@Sun.COM */ 12628906SEric.Saxe@Sun.COM #define THREAD_RUNNABLE_IN_PG(t, pg) \ 12638906SEric.Saxe@Sun.COM ((t)->t_state == TS_RUN && \ 12648906SEric.Saxe@Sun.COM (t)->t_disp_queue->disp_cpu && \ 12658906SEric.Saxe@Sun.COM bitset_in_set(&(pg)->cmt_cpus_actv_set, \ 12668906SEric.Saxe@Sun.COM (t)->t_disp_queue->disp_cpu->cpu_seqid)) 12678906SEric.Saxe@Sun.COM 12688906SEric.Saxe@Sun.COM static void 12698906SEric.Saxe@Sun.COM cmt_ev_thread_swtch_pwr(pg_t *pg, cpu_t *cp, hrtime_t now, kthread_t *old, 12708906SEric.Saxe@Sun.COM kthread_t *new) 12718906SEric.Saxe@Sun.COM { 12728906SEric.Saxe@Sun.COM pg_cmt_t *cmt = (pg_cmt_t *)pg; 12738906SEric.Saxe@Sun.COM cpupm_domain_t *dom; 12748906SEric.Saxe@Sun.COM uint32_t u; 12758906SEric.Saxe@Sun.COM 12768906SEric.Saxe@Sun.COM if (old == cp->cpu_idle_thread) { 12778906SEric.Saxe@Sun.COM ASSERT(new != cp->cpu_idle_thread); 12788906SEric.Saxe@Sun.COM u = atomic_add_32_nv(&cmt->cmt_utilization, 1); 12798906SEric.Saxe@Sun.COM if (u == 1) { 12808906SEric.Saxe@Sun.COM /* 12818906SEric.Saxe@Sun.COM * Notify the CPU power manager that the domain 12828906SEric.Saxe@Sun.COM * is non-idle. 12838906SEric.Saxe@Sun.COM */ 12848906SEric.Saxe@Sun.COM dom = (cpupm_domain_t *)cmt->cmt_pg.pghw_handle; 12858906SEric.Saxe@Sun.COM cpupm_utilization_event(cp, now, dom, 12868906SEric.Saxe@Sun.COM CPUPM_DOM_BUSY_FROM_IDLE); 12878906SEric.Saxe@Sun.COM } 12888906SEric.Saxe@Sun.COM } else if (new == cp->cpu_idle_thread) { 12898906SEric.Saxe@Sun.COM ASSERT(old != cp->cpu_idle_thread); 12908906SEric.Saxe@Sun.COM u = atomic_add_32_nv(&cmt->cmt_utilization, -1); 12918906SEric.Saxe@Sun.COM if (u == 0) { 12928906SEric.Saxe@Sun.COM /* 12938906SEric.Saxe@Sun.COM * The domain is idle, notify the CPU power 12948906SEric.Saxe@Sun.COM * manager. 12958906SEric.Saxe@Sun.COM * 12968906SEric.Saxe@Sun.COM * Avoid notifying if the thread is simply migrating 12978906SEric.Saxe@Sun.COM * between CPUs in the domain. 12988906SEric.Saxe@Sun.COM */ 12998906SEric.Saxe@Sun.COM if (!THREAD_RUNNABLE_IN_PG(old, cmt)) { 13008906SEric.Saxe@Sun.COM dom = (cpupm_domain_t *)cmt->cmt_pg.pghw_handle; 13018906SEric.Saxe@Sun.COM cpupm_utilization_event(cp, now, dom, 13028906SEric.Saxe@Sun.COM CPUPM_DOM_IDLE_FROM_BUSY); 13038906SEric.Saxe@Sun.COM } 13048906SEric.Saxe@Sun.COM } 13058906SEric.Saxe@Sun.COM } 13068906SEric.Saxe@Sun.COM } 13078906SEric.Saxe@Sun.COM 13088906SEric.Saxe@Sun.COM /* ARGSUSED */ 13098906SEric.Saxe@Sun.COM static void 13108906SEric.Saxe@Sun.COM cmt_ev_thread_remain_pwr(pg_t *pg, cpu_t *cp, kthread_t *t) 13118906SEric.Saxe@Sun.COM { 13128906SEric.Saxe@Sun.COM pg_cmt_t *cmt = (pg_cmt_t *)pg; 13138906SEric.Saxe@Sun.COM cpupm_domain_t *dom; 13148906SEric.Saxe@Sun.COM 13158906SEric.Saxe@Sun.COM dom = (cpupm_domain_t *)cmt->cmt_pg.pghw_handle; 13168906SEric.Saxe@Sun.COM cpupm_utilization_event(cp, (hrtime_t)0, dom, CPUPM_DOM_REMAIN_BUSY); 13178906SEric.Saxe@Sun.COM } 13188906SEric.Saxe@Sun.COM 13198906SEric.Saxe@Sun.COM /* 13208906SEric.Saxe@Sun.COM * Return the name of the CMT scheduling policy 13218906SEric.Saxe@Sun.COM * being implemented across this PG 13228906SEric.Saxe@Sun.COM */ 13238906SEric.Saxe@Sun.COM static char * 13248906SEric.Saxe@Sun.COM pg_cmt_policy_name(pg_t *pg) 13258906SEric.Saxe@Sun.COM { 13268906SEric.Saxe@Sun.COM pg_cmt_policy_t policy; 13278906SEric.Saxe@Sun.COM 13288906SEric.Saxe@Sun.COM policy = ((pg_cmt_t *)pg)->cmt_policy; 13298906SEric.Saxe@Sun.COM 13308906SEric.Saxe@Sun.COM if (policy & CMT_AFFINITY) { 13318906SEric.Saxe@Sun.COM if (policy & CMT_BALANCE) 13328906SEric.Saxe@Sun.COM return ("Load Balancing & Affinity"); 13338906SEric.Saxe@Sun.COM else if (policy & CMT_COALESCE) 13348906SEric.Saxe@Sun.COM return ("Load Coalescence & Affinity"); 13358906SEric.Saxe@Sun.COM else 13368906SEric.Saxe@Sun.COM return ("Affinity"); 13378906SEric.Saxe@Sun.COM } else { 13388906SEric.Saxe@Sun.COM if (policy & CMT_BALANCE) 13398906SEric.Saxe@Sun.COM return ("Load Balancing"); 13408906SEric.Saxe@Sun.COM else if (policy & CMT_COALESCE) 13418906SEric.Saxe@Sun.COM return ("Load Coalescence"); 13428906SEric.Saxe@Sun.COM else 13438906SEric.Saxe@Sun.COM return ("None"); 13448906SEric.Saxe@Sun.COM } 13458906SEric.Saxe@Sun.COM } 13468906SEric.Saxe@Sun.COM 13478906SEric.Saxe@Sun.COM /* 13488906SEric.Saxe@Sun.COM * Prune PG, and all other instances of PG's hardware sharing relationship 13498906SEric.Saxe@Sun.COM * from the PG hierarchy. 13508906SEric.Saxe@Sun.COM */ 13518906SEric.Saxe@Sun.COM static int 13528906SEric.Saxe@Sun.COM pg_cmt_prune(pg_cmt_t *pg_bad, pg_cmt_t **lineage, int *sz) 13538906SEric.Saxe@Sun.COM { 13548906SEric.Saxe@Sun.COM group_t *hwset, *children; 13558906SEric.Saxe@Sun.COM int i, j, r, size = *sz; 13568906SEric.Saxe@Sun.COM group_iter_t hw_iter, child_iter; 13578906SEric.Saxe@Sun.COM pg_cpu_itr_t cpu_iter; 13588906SEric.Saxe@Sun.COM pg_cmt_t *pg, *child; 13598906SEric.Saxe@Sun.COM cpu_t *cpu; 13608906SEric.Saxe@Sun.COM int cap_needed; 13618906SEric.Saxe@Sun.COM pghw_type_t hw; 13628906SEric.Saxe@Sun.COM 13638906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock)); 13648906SEric.Saxe@Sun.COM 13658906SEric.Saxe@Sun.COM hw = ((pghw_t *)pg_bad)->pghw_hw; 13668906SEric.Saxe@Sun.COM 13678906SEric.Saxe@Sun.COM if (hw == PGHW_POW_ACTIVE) { 13688906SEric.Saxe@Sun.COM cmn_err(CE_NOTE, "!Active CPUPM domain groups look suspect. " 13698906SEric.Saxe@Sun.COM "Event Based CPUPM Unavailable"); 13708906SEric.Saxe@Sun.COM } else if (hw == PGHW_POW_IDLE) { 13718906SEric.Saxe@Sun.COM cmn_err(CE_NOTE, "!Idle CPUPM domain groups look suspect. " 13728906SEric.Saxe@Sun.COM "Dispatcher assisted CPUPM disabled."); 13738906SEric.Saxe@Sun.COM } 13748906SEric.Saxe@Sun.COM 13758906SEric.Saxe@Sun.COM /* 13768906SEric.Saxe@Sun.COM * Find and eliminate the PG from the lineage. 13778906SEric.Saxe@Sun.COM */ 13788906SEric.Saxe@Sun.COM for (i = 0; i < size; i++) { 13798906SEric.Saxe@Sun.COM if (lineage[i] == pg_bad) { 13808906SEric.Saxe@Sun.COM for (j = i; j < size - 1; j++) 13818906SEric.Saxe@Sun.COM lineage[j] = lineage[j + 1]; 13828906SEric.Saxe@Sun.COM *sz = size - 1; 13838906SEric.Saxe@Sun.COM break; 13848906SEric.Saxe@Sun.COM } 13858906SEric.Saxe@Sun.COM } 13868906SEric.Saxe@Sun.COM 13878906SEric.Saxe@Sun.COM /* 13888906SEric.Saxe@Sun.COM * We'll prune all instances of the hardware sharing relationship 13898906SEric.Saxe@Sun.COM * represented by pg. But before we do that (and pause CPUs) we need 13908906SEric.Saxe@Sun.COM * to ensure the hierarchy's groups are properly sized. 13918906SEric.Saxe@Sun.COM */ 13928906SEric.Saxe@Sun.COM hwset = pghw_set_lookup(hw); 13938906SEric.Saxe@Sun.COM 13948906SEric.Saxe@Sun.COM /* 13958906SEric.Saxe@Sun.COM * Blacklist the hardware so that future groups won't be created. 13968906SEric.Saxe@Sun.COM */ 13978906SEric.Saxe@Sun.COM cmt_hw_blacklisted[hw] = 1; 13988906SEric.Saxe@Sun.COM 13998906SEric.Saxe@Sun.COM /* 14008906SEric.Saxe@Sun.COM * For each of the PGs being pruned, ensure sufficient capacity in 14018906SEric.Saxe@Sun.COM * the siblings set for the PG's children 14028906SEric.Saxe@Sun.COM */ 14038906SEric.Saxe@Sun.COM group_iter_init(&hw_iter); 14048906SEric.Saxe@Sun.COM while ((pg = group_iterate(hwset, &hw_iter)) != NULL) { 14058906SEric.Saxe@Sun.COM /* 14068906SEric.Saxe@Sun.COM * PG is being pruned, but if it is bringing up more than 14078906SEric.Saxe@Sun.COM * one child, ask for more capacity in the siblings group. 14088906SEric.Saxe@Sun.COM */ 14098906SEric.Saxe@Sun.COM cap_needed = 0; 14108906SEric.Saxe@Sun.COM if (pg->cmt_children && 14118906SEric.Saxe@Sun.COM GROUP_SIZE(pg->cmt_children) > 1) { 14128906SEric.Saxe@Sun.COM cap_needed = GROUP_SIZE(pg->cmt_children) - 1; 14138906SEric.Saxe@Sun.COM 14148906SEric.Saxe@Sun.COM group_expand(pg->cmt_siblings, 14158906SEric.Saxe@Sun.COM GROUP_SIZE(pg->cmt_siblings) + cap_needed); 14168408SEric.Saxe@Sun.COM 14178408SEric.Saxe@Sun.COM /* 14188906SEric.Saxe@Sun.COM * If this is a top level group, also ensure the 14198906SEric.Saxe@Sun.COM * capacity in the root lgrp level CMT grouping. 14208408SEric.Saxe@Sun.COM */ 14218906SEric.Saxe@Sun.COM if (pg->cmt_parent == NULL && 14228906SEric.Saxe@Sun.COM pg->cmt_siblings != &cmt_root->cl_pgs) { 14238906SEric.Saxe@Sun.COM group_expand(&cmt_root->cl_pgs, 14248906SEric.Saxe@Sun.COM GROUP_SIZE(&cmt_root->cl_pgs) + cap_needed); 14258408SEric.Saxe@Sun.COM } 14268906SEric.Saxe@Sun.COM } 14278906SEric.Saxe@Sun.COM } 14288408SEric.Saxe@Sun.COM 14298906SEric.Saxe@Sun.COM /* 14308906SEric.Saxe@Sun.COM * We're operating on the PG hierarchy. Pause CPUs to ensure 14318906SEric.Saxe@Sun.COM * exclusivity with respect to the dispatcher. 14328906SEric.Saxe@Sun.COM */ 14338906SEric.Saxe@Sun.COM pause_cpus(NULL); 14348408SEric.Saxe@Sun.COM 14358906SEric.Saxe@Sun.COM /* 14368906SEric.Saxe@Sun.COM * Prune all PG instances of the hardware sharing relationship 14378906SEric.Saxe@Sun.COM * represented by pg. 14388906SEric.Saxe@Sun.COM */ 14398906SEric.Saxe@Sun.COM group_iter_init(&hw_iter); 14408906SEric.Saxe@Sun.COM while ((pg = group_iterate(hwset, &hw_iter)) != NULL) { 14418408SEric.Saxe@Sun.COM 14428408SEric.Saxe@Sun.COM /* 14438906SEric.Saxe@Sun.COM * Remove PG from it's group of siblings, if it's there. 14448906SEric.Saxe@Sun.COM */ 14458906SEric.Saxe@Sun.COM if (pg->cmt_siblings) { 14468906SEric.Saxe@Sun.COM (void) group_remove(pg->cmt_siblings, pg, GRP_NORESIZE); 14478906SEric.Saxe@Sun.COM } 14488906SEric.Saxe@Sun.COM if (pg->cmt_parent == NULL && 14498906SEric.Saxe@Sun.COM pg->cmt_siblings != &cmt_root->cl_pgs) { 14508906SEric.Saxe@Sun.COM (void) group_remove(&cmt_root->cl_pgs, pg, 14518906SEric.Saxe@Sun.COM GRP_NORESIZE); 14528906SEric.Saxe@Sun.COM } 14538906SEric.Saxe@Sun.COM /* 14548906SEric.Saxe@Sun.COM * Add PGs children to it's group of siblings. 14558906SEric.Saxe@Sun.COM */ 14568906SEric.Saxe@Sun.COM if (pg->cmt_children != NULL) { 14578906SEric.Saxe@Sun.COM children = pg->cmt_children; 14588906SEric.Saxe@Sun.COM 14598906SEric.Saxe@Sun.COM group_iter_init(&child_iter); 14608906SEric.Saxe@Sun.COM while ((child = group_iterate(children, &child_iter)) 14618906SEric.Saxe@Sun.COM != NULL) { 14628906SEric.Saxe@Sun.COM /* 14638906SEric.Saxe@Sun.COM * Transplant child from it's siblings set to 14648906SEric.Saxe@Sun.COM * PGs. 14658906SEric.Saxe@Sun.COM */ 14668906SEric.Saxe@Sun.COM if (pg->cmt_siblings != NULL && 14678906SEric.Saxe@Sun.COM child->cmt_siblings != NULL && 14688906SEric.Saxe@Sun.COM group_remove(child->cmt_siblings, child, 14698906SEric.Saxe@Sun.COM GRP_NORESIZE) != -1) { 14708906SEric.Saxe@Sun.COM r = group_add(pg->cmt_siblings, child, 14718906SEric.Saxe@Sun.COM GRP_NORESIZE); 14728906SEric.Saxe@Sun.COM ASSERT(r == 0); 14738906SEric.Saxe@Sun.COM } 14748906SEric.Saxe@Sun.COM } 14758906SEric.Saxe@Sun.COM } 14768906SEric.Saxe@Sun.COM 14778906SEric.Saxe@Sun.COM /* 14788906SEric.Saxe@Sun.COM * Reset the callbacks to the defaults 14798906SEric.Saxe@Sun.COM */ 14808906SEric.Saxe@Sun.COM pg_callback_set_defaults((pg_t *)pg); 14818906SEric.Saxe@Sun.COM 14828906SEric.Saxe@Sun.COM /* 14838906SEric.Saxe@Sun.COM * Update all the CPU lineages in each of PG's CPUs 14848408SEric.Saxe@Sun.COM */ 14858906SEric.Saxe@Sun.COM PG_CPU_ITR_INIT(pg, cpu_iter); 14868906SEric.Saxe@Sun.COM while ((cpu = pg_cpu_next(&cpu_iter)) != NULL) { 14878906SEric.Saxe@Sun.COM group_t *pgs; 14888906SEric.Saxe@Sun.COM pg_cmt_t *cpu_pg; 14898906SEric.Saxe@Sun.COM group_iter_t liter; /* Iterator for the lineage */ 14908906SEric.Saxe@Sun.COM 14918906SEric.Saxe@Sun.COM /* 14928906SEric.Saxe@Sun.COM * Iterate over the CPU's PGs updating the children 14938906SEric.Saxe@Sun.COM * of the PG being promoted, since they have a new 14948906SEric.Saxe@Sun.COM * parent and siblings set. 14958906SEric.Saxe@Sun.COM */ 14968906SEric.Saxe@Sun.COM pgs = &cpu->cpu_pg->pgs; 14978906SEric.Saxe@Sun.COM group_iter_init(&liter); 14988906SEric.Saxe@Sun.COM while ((cpu_pg = group_iterate(pgs, &liter)) != NULL) { 14998906SEric.Saxe@Sun.COM if (cpu_pg->cmt_parent == pg) { 15008906SEric.Saxe@Sun.COM cpu_pg->cmt_parent = pg->cmt_parent; 15018906SEric.Saxe@Sun.COM cpu_pg->cmt_siblings = pg->cmt_siblings; 15028906SEric.Saxe@Sun.COM } 15038906SEric.Saxe@Sun.COM } 15048906SEric.Saxe@Sun.COM 15058906SEric.Saxe@Sun.COM /* 15068906SEric.Saxe@Sun.COM * Update the CPU's lineages 15078906SEric.Saxe@Sun.COM */ 15088906SEric.Saxe@Sun.COM pgs = &cpu->cpu_pg->cmt_pgs; 15098906SEric.Saxe@Sun.COM (void) group_remove(pgs, pg, GRP_NORESIZE); 15108906SEric.Saxe@Sun.COM pgs = &cpu->cpu_pg->pgs; 15118906SEric.Saxe@Sun.COM (void) group_remove(pgs, pg, GRP_NORESIZE); 15128408SEric.Saxe@Sun.COM } 15138906SEric.Saxe@Sun.COM } 15148906SEric.Saxe@Sun.COM start_cpus(); 15158906SEric.Saxe@Sun.COM return (0); 15168906SEric.Saxe@Sun.COM } 15178906SEric.Saxe@Sun.COM 15188906SEric.Saxe@Sun.COM /* 15198906SEric.Saxe@Sun.COM * Disable CMT scheduling 15208906SEric.Saxe@Sun.COM */ 15218906SEric.Saxe@Sun.COM static void 15228906SEric.Saxe@Sun.COM pg_cmt_disable(void) 15238906SEric.Saxe@Sun.COM { 15248906SEric.Saxe@Sun.COM cpu_t *cpu; 15258906SEric.Saxe@Sun.COM 15268906SEric.Saxe@Sun.COM pause_cpus(NULL); 15278906SEric.Saxe@Sun.COM cpu = cpu_list; 15288906SEric.Saxe@Sun.COM 15298906SEric.Saxe@Sun.COM do { 15308906SEric.Saxe@Sun.COM if (cpu->cpu_pg) 15318906SEric.Saxe@Sun.COM group_empty(&cpu->cpu_pg->cmt_pgs); 15328906SEric.Saxe@Sun.COM } while ((cpu = cpu->cpu_next) != cpu_list); 15338906SEric.Saxe@Sun.COM 15348906SEric.Saxe@Sun.COM cmt_sched_disabled = 1; 15358906SEric.Saxe@Sun.COM start_cpus(); 15368906SEric.Saxe@Sun.COM cmn_err(CE_NOTE, "!CMT thread placement optimizations unavailable"); 15378906SEric.Saxe@Sun.COM } 15388408SEric.Saxe@Sun.COM 15398906SEric.Saxe@Sun.COM static int 15408906SEric.Saxe@Sun.COM pg_cmt_lineage_validate(pg_cmt_t **lineage, int *sz) 15418906SEric.Saxe@Sun.COM { 15428906SEric.Saxe@Sun.COM int i, size; 15438906SEric.Saxe@Sun.COM pg_cmt_t *pg, *parent, *pg_bad; 15448906SEric.Saxe@Sun.COM cpu_t *cp; 15458906SEric.Saxe@Sun.COM pg_cpu_itr_t cpu_iter; 15468906SEric.Saxe@Sun.COM 15478906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock)); 15488906SEric.Saxe@Sun.COM 15498906SEric.Saxe@Sun.COM revalidate: 15508906SEric.Saxe@Sun.COM size = *sz; 15518906SEric.Saxe@Sun.COM pg_bad = NULL; 15528906SEric.Saxe@Sun.COM for (i = 0; i < size - 1; i++) { 15538906SEric.Saxe@Sun.COM 15548906SEric.Saxe@Sun.COM pg = lineage[i]; 15558906SEric.Saxe@Sun.COM parent = lineage[i + 1]; 15568408SEric.Saxe@Sun.COM 15578906SEric.Saxe@Sun.COM /* 15588906SEric.Saxe@Sun.COM * We assume that the lineage has already been sorted 15598906SEric.Saxe@Sun.COM * by the number of CPUs. In fact, we depend on it. 15608906SEric.Saxe@Sun.COM */ 15618906SEric.Saxe@Sun.COM ASSERT(PG_NUM_CPUS((pg_t *)pg) <= PG_NUM_CPUS((pg_t *)parent)); 15628906SEric.Saxe@Sun.COM 15638906SEric.Saxe@Sun.COM /* 15648906SEric.Saxe@Sun.COM * Walk each of the CPUs in the PGs group, and verify that 15658906SEric.Saxe@Sun.COM * the next larger PG contains at least the CPUs in this one. 15668906SEric.Saxe@Sun.COM */ 15678906SEric.Saxe@Sun.COM PG_CPU_ITR_INIT((pg_t *)pg, cpu_iter); 15688906SEric.Saxe@Sun.COM while ((cp = pg_cpu_next(&cpu_iter)) != NULL) { 15698906SEric.Saxe@Sun.COM if (pg_cpu_find((pg_t *)parent, cp) == B_FALSE) { 15708906SEric.Saxe@Sun.COM cmt_lineage_status = CMT_LINEAGE_NON_CONCENTRIC; 15718906SEric.Saxe@Sun.COM goto handle_error; 15728906SEric.Saxe@Sun.COM } 15738906SEric.Saxe@Sun.COM } 15748408SEric.Saxe@Sun.COM } 15758408SEric.Saxe@Sun.COM 15768906SEric.Saxe@Sun.COM handle_error: 15778906SEric.Saxe@Sun.COM switch (cmt_lineage_status) { 15788906SEric.Saxe@Sun.COM case CMT_LINEAGE_VALID: 15798906SEric.Saxe@Sun.COM case CMT_LINEAGE_REPAIRED: 15808906SEric.Saxe@Sun.COM break; 15818906SEric.Saxe@Sun.COM case CMT_LINEAGE_NON_CONCENTRIC: 15828408SEric.Saxe@Sun.COM /* 15838906SEric.Saxe@Sun.COM * We've detected a non-concentric PG lineage. 15848906SEric.Saxe@Sun.COM * 15858906SEric.Saxe@Sun.COM * This can happen when some of the CPU grouping information 15868906SEric.Saxe@Sun.COM * is derived from buggy sources (for example, incorrect ACPI 15878906SEric.Saxe@Sun.COM * tables on x86 systems). 15888906SEric.Saxe@Sun.COM * 15898906SEric.Saxe@Sun.COM * We attempt to recover from this by pruning out the 15908906SEric.Saxe@Sun.COM * illegal groupings from the PG hierarchy, which means that 15918906SEric.Saxe@Sun.COM * we won't optimize for those levels, but we will for the 15928906SEric.Saxe@Sun.COM * remaining ones. 15938906SEric.Saxe@Sun.COM * 15948906SEric.Saxe@Sun.COM * If a given level has CPUs not found in it's parent, then 15958906SEric.Saxe@Sun.COM * we examine the PG and it's parent to see if either grouping 15968906SEric.Saxe@Sun.COM * is enumerated from potentially buggy sources. 15978906SEric.Saxe@Sun.COM * 15988906SEric.Saxe@Sun.COM * If one has less CPUs than the other, and contains CPUs 15998906SEric.Saxe@Sun.COM * not found in the parent, and it is an untrusted enumeration, 16008906SEric.Saxe@Sun.COM * then prune it. If both have the same number of CPUs, then 16018906SEric.Saxe@Sun.COM * prune the one that is untrusted. 16028906SEric.Saxe@Sun.COM * 16038906SEric.Saxe@Sun.COM * This process repeats until we have a concentric lineage, 16048906SEric.Saxe@Sun.COM * or we would have to prune out level derived from what we 16058906SEric.Saxe@Sun.COM * thought was a reliable source, in which case CMT scheduling 16068906SEric.Saxe@Sun.COM * is disabled all together. 16078408SEric.Saxe@Sun.COM */ 16088906SEric.Saxe@Sun.COM if ((PG_NUM_CPUS((pg_t *)pg) < PG_NUM_CPUS((pg_t *)parent)) && 16098906SEric.Saxe@Sun.COM (PG_CMT_HW_SUSPECT(((pghw_t *)pg)->pghw_hw))) { 16108906SEric.Saxe@Sun.COM pg_bad = pg; 16118906SEric.Saxe@Sun.COM } else if (PG_NUM_CPUS((pg_t *)pg) == 16128906SEric.Saxe@Sun.COM PG_NUM_CPUS((pg_t *)parent)) { 16138906SEric.Saxe@Sun.COM if (PG_CMT_HW_SUSPECT(((pghw_t *)parent)->pghw_hw)) { 16148906SEric.Saxe@Sun.COM pg_bad = parent; 16158906SEric.Saxe@Sun.COM } else if (PG_CMT_HW_SUSPECT(((pghw_t *)pg)->pghw_hw)) { 16168906SEric.Saxe@Sun.COM pg_bad = pg; 16178906SEric.Saxe@Sun.COM } 16188906SEric.Saxe@Sun.COM } 16198906SEric.Saxe@Sun.COM if (pg_bad) { 16208906SEric.Saxe@Sun.COM if (pg_cmt_prune(pg_bad, lineage, sz) == 0) { 16218906SEric.Saxe@Sun.COM cmt_lineage_status = CMT_LINEAGE_REPAIRED; 16228906SEric.Saxe@Sun.COM goto revalidate; 16238408SEric.Saxe@Sun.COM } 16248906SEric.Saxe@Sun.COM } 16258906SEric.Saxe@Sun.COM /*FALLTHROUGH*/ 16268906SEric.Saxe@Sun.COM default: 16278906SEric.Saxe@Sun.COM /* 16288906SEric.Saxe@Sun.COM * If we're here, something has gone wrong in trying to 16298906SEric.Saxe@Sun.COM * recover from a illegal PG hierarchy, or we've encountered 16308906SEric.Saxe@Sun.COM * a validation error for which we don't know how to recover. 16318906SEric.Saxe@Sun.COM * In this case, disable CMT scheduling all together. 16328906SEric.Saxe@Sun.COM */ 16338906SEric.Saxe@Sun.COM pg_cmt_disable(); 16348906SEric.Saxe@Sun.COM cmt_lineage_status = CMT_LINEAGE_UNRECOVERABLE; 16358906SEric.Saxe@Sun.COM return (-1); 16368408SEric.Saxe@Sun.COM } 16378906SEric.Saxe@Sun.COM return (0); 16388408SEric.Saxe@Sun.COM } 1639