13434Sesaxe /* 23434Sesaxe * CDDL HEADER START 33434Sesaxe * 43434Sesaxe * The contents of this file are subject to the terms of the 53434Sesaxe * Common Development and Distribution License (the "License"). 63434Sesaxe * You may not use this file except in compliance with the License. 73434Sesaxe * 83434Sesaxe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 93434Sesaxe * or http://www.opensolaris.org/os/licensing. 103434Sesaxe * See the License for the specific language governing permissions 113434Sesaxe * and limitations under the License. 123434Sesaxe * 133434Sesaxe * When distributing Covered Code, include this CDDL HEADER in each 143434Sesaxe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 153434Sesaxe * If applicable, add the following below this CDDL HEADER, with the 163434Sesaxe * fields enclosed by brackets "[]" replaced with your own identifying 173434Sesaxe * information: Portions Copyright [yyyy] [name of copyright owner] 183434Sesaxe * 193434Sesaxe * CDDL HEADER END 203434Sesaxe */ 213434Sesaxe /* 228906SEric.Saxe@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 233434Sesaxe * Use is subject to license terms. 243434Sesaxe */ 253434Sesaxe 263434Sesaxe #include <sys/systm.h> 273434Sesaxe #include <sys/types.h> 283434Sesaxe #include <sys/param.h> 293434Sesaxe #include <sys/thread.h> 303434Sesaxe #include <sys/cpuvar.h> 313434Sesaxe #include <sys/kmem.h> 323434Sesaxe #include <sys/cmn_err.h> 333434Sesaxe #include <sys/group.h> 343434Sesaxe #include <sys/pg.h> 353434Sesaxe #include <sys/pghw.h> 368906SEric.Saxe@Sun.COM #include <sys/cpu_pm.h> 37*11389SAlexander.Kolbasov@Sun.COM #include <sys/cap_util.h> 383434Sesaxe 393434Sesaxe /* 403434Sesaxe * Processor Groups: Hardware sharing relationship layer 413434Sesaxe * 423434Sesaxe * This file implements an extension to Processor Groups to capture 433434Sesaxe * hardware sharing relationships existing between logical CPUs. Examples of 443434Sesaxe * hardware sharing relationships include shared caches on some CMT 453434Sesaxe * procesoor architectures, or shared local memory controllers on NUMA 463434Sesaxe * based system architectures. 473434Sesaxe * 483434Sesaxe * The pghw_t structure represents the extended PG. The first member 493434Sesaxe * of the structure is the generic pg_t with the pghw specific members 503434Sesaxe * following. The generic pg_t *must* remain the first member of the 513434Sesaxe * structure as the code uses casting of structure references to access 523434Sesaxe * the generic pg_t structure elements. 533434Sesaxe * 543434Sesaxe * In addition to the generic CPU grouping, physical PGs have a hardware 553434Sesaxe * sharing relationship enumerated "type", and an instance id. The enumerated 563434Sesaxe * type is defined by the pghw_type_t enumeration, while the instance id 573434Sesaxe * uniquely identifies the sharing instance from among others of the same 583434Sesaxe * hardware sharing type. 593434Sesaxe * 603434Sesaxe * The physical PGs are organized into an overall hierarchy, and are tracked 613434Sesaxe * in a number of different per CPU, and per pghw_type_t type groups. 623434Sesaxe * As an example: 633434Sesaxe * 643434Sesaxe * ------------- 653434Sesaxe * | pg_hw | 663434Sesaxe * | (group_t) | 673434Sesaxe * ------------- 683434Sesaxe * || ============================ 693434Sesaxe * ||\\-----------------------// \\ \\ 703434Sesaxe * || | hwset (PGC_HW_CHIP) | ------------- ------------- 713434Sesaxe * || | (group_t) | | pghw_t | | pghw_t | 723434Sesaxe * || ----------------------- | chip 0 | | chip 1 | 733434Sesaxe * || ------------- ------------- 743434Sesaxe * || \\ \\ \\ \\ \\ \\ \\ \\ 753434Sesaxe * || cpu cpu cpu cpu cpu cpu cpu cpu 763434Sesaxe * || 773434Sesaxe * || ============================ 783434Sesaxe * ||\\-----------------------// \\ \\ 793434Sesaxe * || | hwset (PGC_HW_IPIPE)| ------------- ------------- 803434Sesaxe * || | (group_t) | | pghw_t | | pghw_t | 813434Sesaxe * || ----------------------- | ipipe 0 | | ipipe 1 | 823434Sesaxe * || ------------- ------------- 833434Sesaxe * || \\ \\ \\ \\ 843434Sesaxe * || cpu cpu cpu cpu 853434Sesaxe * ... 863434Sesaxe * 873434Sesaxe * 883434Sesaxe * The top level pg_hw is a group of "hwset" groups. Each hwset holds of group 893434Sesaxe * of physical PGs of the same hardware sharing type. Within each hwset, the 903434Sesaxe * PG's instance id uniquely identifies the grouping relationshsip among other 913434Sesaxe * groupings of the same sharing type. The instance id for a grouping is 923434Sesaxe * platform defined, and in some cases may be used by platform code as a handle 933434Sesaxe * to search for a particular relationship instance. 943434Sesaxe * 953434Sesaxe * Each physical PG (by virtue of the embedded pg_t) contains a group of CPUs 963434Sesaxe * that participate in the sharing relationship. Each CPU also has associated 973434Sesaxe * with it a grouping tracking the PGs in which the CPU belongs. This can be 983434Sesaxe * used to iterate over the various relationships in which the CPU participates 993434Sesaxe * (the CPU's chip, cache, lgroup, etc.). 1003434Sesaxe * 1013434Sesaxe * The hwsets are created dynamically as new hardware sharing relationship types 1028906SEric.Saxe@Sun.COM * are instantiated. They are never destroyed, as once a given relationship 1033434Sesaxe * type appears in the system, it is quite likely that at least one instance of 1043434Sesaxe * that relationship will always persist as long as the system is running. 1053434Sesaxe */ 1063434Sesaxe 1073434Sesaxe static group_t *pg_hw; /* top level pg hw group */ 1083434Sesaxe 1093434Sesaxe /* 1103434Sesaxe * Physical PG kstats 1113434Sesaxe */ 1123434Sesaxe struct pghw_kstat { 1133434Sesaxe kstat_named_t pg_id; 1143434Sesaxe kstat_named_t pg_class; 1153434Sesaxe kstat_named_t pg_ncpus; 1163434Sesaxe kstat_named_t pg_instance_id; 1173434Sesaxe kstat_named_t pg_hw; 1188906SEric.Saxe@Sun.COM kstat_named_t pg_policy; 1193434Sesaxe } pghw_kstat = { 120*11389SAlexander.Kolbasov@Sun.COM { "id", KSTAT_DATA_UINT32 }, 1213434Sesaxe { "pg_class", KSTAT_DATA_STRING }, 122*11389SAlexander.Kolbasov@Sun.COM { "ncpus", KSTAT_DATA_UINT32 }, 123*11389SAlexander.Kolbasov@Sun.COM { "instance_id", KSTAT_DATA_UINT32 }, 1243434Sesaxe { "hardware", KSTAT_DATA_STRING }, 1258906SEric.Saxe@Sun.COM { "policy", KSTAT_DATA_STRING }, 1263434Sesaxe }; 1273434Sesaxe 1283434Sesaxe kmutex_t pghw_kstat_lock; 1293434Sesaxe 1303434Sesaxe /* 131*11389SAlexander.Kolbasov@Sun.COM * Capacity and Utilization PG kstats 132*11389SAlexander.Kolbasov@Sun.COM * 133*11389SAlexander.Kolbasov@Sun.COM * These kstats are updated one at a time, so we can have a single scratch space 134*11389SAlexander.Kolbasov@Sun.COM * to fill the data. 135*11389SAlexander.Kolbasov@Sun.COM * 136*11389SAlexander.Kolbasov@Sun.COM * kstat fields: 137*11389SAlexander.Kolbasov@Sun.COM * 138*11389SAlexander.Kolbasov@Sun.COM * pgid PG ID for PG described by this kstat 139*11389SAlexander.Kolbasov@Sun.COM * 140*11389SAlexander.Kolbasov@Sun.COM * pg_ncpus Number of CPUs within this PG 141*11389SAlexander.Kolbasov@Sun.COM * 142*11389SAlexander.Kolbasov@Sun.COM * pg_cpus String describing CPUs within this PG 143*11389SAlexander.Kolbasov@Sun.COM * 144*11389SAlexander.Kolbasov@Sun.COM * pg_sharing Name of sharing relationship for this PG 145*11389SAlexander.Kolbasov@Sun.COM * 146*11389SAlexander.Kolbasov@Sun.COM * pg_generation Generation value that increases whenever any CPU leaves 147*11389SAlexander.Kolbasov@Sun.COM * or joins PG. Two kstat snapshots for the same 148*11389SAlexander.Kolbasov@Sun.COM * CPU may only be compared if they have the same 149*11389SAlexander.Kolbasov@Sun.COM * generation 150*11389SAlexander.Kolbasov@Sun.COM * 151*11389SAlexander.Kolbasov@Sun.COM * pg_hw_util Running value of PG utilization for the sharing 152*11389SAlexander.Kolbasov@Sun.COM * relationship 153*11389SAlexander.Kolbasov@Sun.COM * 154*11389SAlexander.Kolbasov@Sun.COM * pg_hw_util_time_running 155*11389SAlexander.Kolbasov@Sun.COM * Total time spent collecting CU data. The time may be 156*11389SAlexander.Kolbasov@Sun.COM * less than wall time if CU counters were stopped for 157*11389SAlexander.Kolbasov@Sun.COM * some time. 158*11389SAlexander.Kolbasov@Sun.COM * 159*11389SAlexander.Kolbasov@Sun.COM * pg_hw_util_time_stopped Total time the CU counters were stopped. 160*11389SAlexander.Kolbasov@Sun.COM * 161*11389SAlexander.Kolbasov@Sun.COM * pg_hw_util_rate Utilization rate, expressed in operations per second. 162*11389SAlexander.Kolbasov@Sun.COM * 163*11389SAlexander.Kolbasov@Sun.COM * pg_hw_util_rate_max Maximum observed value of utilization rate. 164*11389SAlexander.Kolbasov@Sun.COM */ 165*11389SAlexander.Kolbasov@Sun.COM struct pghw_cu_kstat { 166*11389SAlexander.Kolbasov@Sun.COM kstat_named_t pg_id; 167*11389SAlexander.Kolbasov@Sun.COM kstat_named_t pg_ncpus; 168*11389SAlexander.Kolbasov@Sun.COM kstat_named_t pg_generation; 169*11389SAlexander.Kolbasov@Sun.COM kstat_named_t pg_hw_util; 170*11389SAlexander.Kolbasov@Sun.COM kstat_named_t pg_hw_util_time_running; 171*11389SAlexander.Kolbasov@Sun.COM kstat_named_t pg_hw_util_time_stopped; 172*11389SAlexander.Kolbasov@Sun.COM kstat_named_t pg_hw_util_rate; 173*11389SAlexander.Kolbasov@Sun.COM kstat_named_t pg_hw_util_rate_max; 174*11389SAlexander.Kolbasov@Sun.COM kstat_named_t pg_cpus; 175*11389SAlexander.Kolbasov@Sun.COM kstat_named_t pg_sharing; 176*11389SAlexander.Kolbasov@Sun.COM } pghw_cu_kstat = { 177*11389SAlexander.Kolbasov@Sun.COM { "id", KSTAT_DATA_UINT32 }, 178*11389SAlexander.Kolbasov@Sun.COM { "ncpus", KSTAT_DATA_UINT32 }, 179*11389SAlexander.Kolbasov@Sun.COM { "generation", KSTAT_DATA_UINT32 }, 180*11389SAlexander.Kolbasov@Sun.COM { "hw_util", KSTAT_DATA_UINT64 }, 181*11389SAlexander.Kolbasov@Sun.COM { "hw_util_time_running", KSTAT_DATA_UINT64 }, 182*11389SAlexander.Kolbasov@Sun.COM { "hw_util_time_stopped", KSTAT_DATA_UINT64 }, 183*11389SAlexander.Kolbasov@Sun.COM { "hw_util_rate", KSTAT_DATA_UINT64 }, 184*11389SAlexander.Kolbasov@Sun.COM { "hw_util_rate_max", KSTAT_DATA_UINT64 }, 185*11389SAlexander.Kolbasov@Sun.COM { "cpus", KSTAT_DATA_STRING }, 186*11389SAlexander.Kolbasov@Sun.COM { "sharing_relation", KSTAT_DATA_STRING }, 187*11389SAlexander.Kolbasov@Sun.COM }; 188*11389SAlexander.Kolbasov@Sun.COM 189*11389SAlexander.Kolbasov@Sun.COM /* 190*11389SAlexander.Kolbasov@Sun.COM * Calculate the string size to represent NCPUS. Allow 5 digits for each CPU ID 191*11389SAlexander.Kolbasov@Sun.COM * plus one space per CPU plus NUL byte in the end. This is only an estimate, 192*11389SAlexander.Kolbasov@Sun.COM * since we try to compress CPU ranges as x-y. In the worst case the string 193*11389SAlexander.Kolbasov@Sun.COM * representation of CPUs may be truncated. 194*11389SAlexander.Kolbasov@Sun.COM */ 195*11389SAlexander.Kolbasov@Sun.COM #define CPUSTR_LEN(ncpus) ((ncpus) * 6) 196*11389SAlexander.Kolbasov@Sun.COM 197*11389SAlexander.Kolbasov@Sun.COM /* 198*11389SAlexander.Kolbasov@Sun.COM * Maximum length of the string that represents list of CPUs 199*11389SAlexander.Kolbasov@Sun.COM */ 200*11389SAlexander.Kolbasov@Sun.COM static int pg_cpulist_maxlen = 0; 201*11389SAlexander.Kolbasov@Sun.COM 202*11389SAlexander.Kolbasov@Sun.COM static void pghw_kstat_create(pghw_t *); 203*11389SAlexander.Kolbasov@Sun.COM static int pghw_kstat_update(kstat_t *, int); 204*11389SAlexander.Kolbasov@Sun.COM static int pghw_cu_kstat_update(kstat_t *, int); 205*11389SAlexander.Kolbasov@Sun.COM static int cpu2id(void *); 206*11389SAlexander.Kolbasov@Sun.COM 207*11389SAlexander.Kolbasov@Sun.COM /* 2083434Sesaxe * hwset operations 2093434Sesaxe */ 2103434Sesaxe static group_t *pghw_set_create(pghw_type_t); 2113434Sesaxe static void pghw_set_add(group_t *, pghw_t *); 2123434Sesaxe static void pghw_set_remove(group_t *, pghw_t *); 2133434Sesaxe 214*11389SAlexander.Kolbasov@Sun.COM static void pghw_cpulist_alloc(pghw_t *); 215*11389SAlexander.Kolbasov@Sun.COM static int cpu2id(void *); 216*11389SAlexander.Kolbasov@Sun.COM 2173434Sesaxe /* 2188906SEric.Saxe@Sun.COM * Initialize the physical portion of a hardware PG 2193434Sesaxe */ 2203434Sesaxe void 2213434Sesaxe pghw_init(pghw_t *pg, cpu_t *cp, pghw_type_t hw) 2223434Sesaxe { 2233434Sesaxe group_t *hwset; 2243434Sesaxe 2253434Sesaxe if ((hwset = pghw_set_lookup(hw)) == NULL) { 2263434Sesaxe /* 2273434Sesaxe * Haven't seen this hardware type yet 2283434Sesaxe */ 2293434Sesaxe hwset = pghw_set_create(hw); 2303434Sesaxe } 2313434Sesaxe 2323434Sesaxe pghw_set_add(hwset, pg); 2333434Sesaxe pg->pghw_hw = hw; 234*11389SAlexander.Kolbasov@Sun.COM pg->pghw_generation = 0; 2353434Sesaxe pg->pghw_instance = 2363434Sesaxe pg_plat_hw_instance_id(cp, hw); 2373434Sesaxe pghw_kstat_create(pg); 2388906SEric.Saxe@Sun.COM 2398906SEric.Saxe@Sun.COM /* 2408906SEric.Saxe@Sun.COM * Hardware sharing relationship specific initialization 2418906SEric.Saxe@Sun.COM */ 2428906SEric.Saxe@Sun.COM switch (pg->pghw_hw) { 2438906SEric.Saxe@Sun.COM case PGHW_POW_ACTIVE: 2448906SEric.Saxe@Sun.COM pg->pghw_handle = 2458906SEric.Saxe@Sun.COM (pghw_handle_t)cpupm_domain_init(cp, CPUPM_DTYPE_ACTIVE); 2468906SEric.Saxe@Sun.COM break; 2478906SEric.Saxe@Sun.COM case PGHW_POW_IDLE: 2488906SEric.Saxe@Sun.COM pg->pghw_handle = 2498906SEric.Saxe@Sun.COM (pghw_handle_t)cpupm_domain_init(cp, CPUPM_DTYPE_IDLE); 2508906SEric.Saxe@Sun.COM break; 2518906SEric.Saxe@Sun.COM default: 2528906SEric.Saxe@Sun.COM pg->pghw_handle = (pghw_handle_t)NULL; 2538906SEric.Saxe@Sun.COM } 2543434Sesaxe } 2553434Sesaxe 2563434Sesaxe /* 2573434Sesaxe * Teardown the physical portion of a physical PG 2583434Sesaxe */ 2593434Sesaxe void 2603434Sesaxe pghw_fini(pghw_t *pg) 2613434Sesaxe { 2623434Sesaxe group_t *hwset; 2633434Sesaxe 2643434Sesaxe hwset = pghw_set_lookup(pg->pghw_hw); 2653434Sesaxe ASSERT(hwset != NULL); 2663434Sesaxe 2673434Sesaxe pghw_set_remove(hwset, pg); 2683434Sesaxe pg->pghw_instance = (id_t)PGHW_INSTANCE_ANON; 2693434Sesaxe pg->pghw_hw = (pghw_type_t)-1; 2703434Sesaxe 271*11389SAlexander.Kolbasov@Sun.COM if (pg->pghw_kstat != NULL) 2723434Sesaxe kstat_delete(pg->pghw_kstat); 273*11389SAlexander.Kolbasov@Sun.COM 274*11389SAlexander.Kolbasov@Sun.COM /* 275*11389SAlexander.Kolbasov@Sun.COM * Destroy string representation of CPUs 276*11389SAlexander.Kolbasov@Sun.COM */ 277*11389SAlexander.Kolbasov@Sun.COM if (pg->pghw_cpulist != NULL) { 278*11389SAlexander.Kolbasov@Sun.COM kmem_free(pg->pghw_cpulist, 279*11389SAlexander.Kolbasov@Sun.COM pg->pghw_cpulist_len); 280*11389SAlexander.Kolbasov@Sun.COM pg->pghw_cpulist = NULL; 281*11389SAlexander.Kolbasov@Sun.COM } 282*11389SAlexander.Kolbasov@Sun.COM 283*11389SAlexander.Kolbasov@Sun.COM if (pg->pghw_cu_kstat != NULL) 284*11389SAlexander.Kolbasov@Sun.COM kstat_delete(pg->pghw_cu_kstat); 2853434Sesaxe } 2863434Sesaxe 2873434Sesaxe /* 2883434Sesaxe * Find an existing physical PG in which to place 2893434Sesaxe * the given CPU for the specified hardware sharing 2903434Sesaxe * relationship 2913434Sesaxe */ 2923434Sesaxe pghw_t * 2933434Sesaxe pghw_place_cpu(cpu_t *cp, pghw_type_t hw) 2943434Sesaxe { 2953434Sesaxe group_t *hwset; 2963434Sesaxe 2973434Sesaxe if ((hwset = pghw_set_lookup(hw)) == NULL) { 2983434Sesaxe return (NULL); 2993434Sesaxe } 3003434Sesaxe 3013434Sesaxe return ((pghw_t *)pg_cpu_find_pg(cp, hwset)); 3023434Sesaxe } 3033434Sesaxe 3043434Sesaxe /* 3053434Sesaxe * Find the pg representing the hw sharing relationship in which 3063434Sesaxe * cp belongs 3073434Sesaxe */ 3083434Sesaxe pghw_t * 3093434Sesaxe pghw_find_pg(cpu_t *cp, pghw_type_t hw) 3103434Sesaxe { 3113434Sesaxe group_iter_t i; 3123434Sesaxe pghw_t *pg; 3133434Sesaxe 3143434Sesaxe group_iter_init(&i); 3153434Sesaxe while ((pg = group_iterate(&cp->cpu_pg->pgs, &i)) != NULL) { 3163434Sesaxe if (pg->pghw_hw == hw) 3173434Sesaxe return (pg); 3183434Sesaxe } 3193434Sesaxe return (NULL); 3203434Sesaxe } 3213434Sesaxe 3223434Sesaxe /* 3233434Sesaxe * Find the PG of the given hardware sharing relationship 3243434Sesaxe * type with the given instance id 3253434Sesaxe */ 3263434Sesaxe pghw_t * 3273434Sesaxe pghw_find_by_instance(id_t id, pghw_type_t hw) 3283434Sesaxe { 3293434Sesaxe group_iter_t i; 3303434Sesaxe group_t *set; 3313434Sesaxe pghw_t *pg; 3323434Sesaxe 3333434Sesaxe set = pghw_set_lookup(hw); 3343434Sesaxe if (!set) 3353434Sesaxe return (NULL); 3363434Sesaxe 3373434Sesaxe group_iter_init(&i); 3383434Sesaxe while ((pg = group_iterate(set, &i)) != NULL) { 3393434Sesaxe if (pg->pghw_instance == id) 3403434Sesaxe return (pg); 3413434Sesaxe } 3423434Sesaxe return (NULL); 3433434Sesaxe } 3443434Sesaxe 3453434Sesaxe /* 3463434Sesaxe * CPUs physical ID cache creation / destruction 3473434Sesaxe * The cache's elements are initialized to the CPU's id 3483434Sesaxe */ 3493434Sesaxe void 3503434Sesaxe pghw_physid_create(cpu_t *cp) 3513434Sesaxe { 3523434Sesaxe int i; 3533434Sesaxe 3543434Sesaxe cp->cpu_physid = kmem_alloc(sizeof (cpu_physid_t), KM_SLEEP); 3553434Sesaxe 3563434Sesaxe for (i = 0; i < (sizeof (cpu_physid_t) / sizeof (id_t)); i++) { 3573434Sesaxe ((id_t *)cp->cpu_physid)[i] = cp->cpu_id; 3583434Sesaxe } 3593434Sesaxe } 3603434Sesaxe 3613434Sesaxe void 3623434Sesaxe pghw_physid_destroy(cpu_t *cp) 3633434Sesaxe { 3643434Sesaxe if (cp->cpu_physid) { 3653434Sesaxe kmem_free(cp->cpu_physid, sizeof (cpu_physid_t)); 3663434Sesaxe cp->cpu_physid = NULL; 3673434Sesaxe } 3683434Sesaxe } 3693434Sesaxe 3703434Sesaxe /* 3713434Sesaxe * Create a new, empty hwset. 3723434Sesaxe * This routine may block, and must not be called from any 3733434Sesaxe * paused CPU context. 3743434Sesaxe */ 3753434Sesaxe static group_t * 3763434Sesaxe pghw_set_create(pghw_type_t hw) 3773434Sesaxe { 3783434Sesaxe group_t *g; 3793434Sesaxe int ret; 3803434Sesaxe 3813434Sesaxe /* 3823434Sesaxe * Create the top level PG hw group if it doesn't already exist 3833434Sesaxe * This is a "set" of hardware sets, that is ordered (and indexed) 3843434Sesaxe * by the pghw_type_t enum. 3853434Sesaxe */ 3863434Sesaxe if (pg_hw == NULL) { 3873434Sesaxe pg_hw = kmem_alloc(sizeof (group_t), KM_SLEEP); 3883434Sesaxe group_create(pg_hw); 3893434Sesaxe group_expand(pg_hw, (uint_t)PGHW_NUM_COMPONENTS); 3903434Sesaxe } 3913434Sesaxe 3923434Sesaxe /* 3933434Sesaxe * Create the new hwset 3943434Sesaxe * Add it to the top level pg_hw group. 3953434Sesaxe */ 3963434Sesaxe g = kmem_alloc(sizeof (group_t), KM_SLEEP); 3973434Sesaxe group_create(g); 3983434Sesaxe 3993434Sesaxe ret = group_add_at(pg_hw, g, (uint_t)hw); 4003434Sesaxe ASSERT(ret == 0); 4013434Sesaxe 4023434Sesaxe return (g); 4033434Sesaxe } 4043434Sesaxe 4053434Sesaxe /* 4063434Sesaxe * Find the hwset associated with the given hardware sharing type 4073434Sesaxe */ 4083434Sesaxe group_t * 4093434Sesaxe pghw_set_lookup(pghw_type_t hw) 4103434Sesaxe { 4113434Sesaxe group_t *hwset; 4123434Sesaxe 4133434Sesaxe if (pg_hw == NULL) 4143434Sesaxe return (NULL); 4153434Sesaxe 4163434Sesaxe hwset = GROUP_ACCESS(pg_hw, (uint_t)hw); 4173434Sesaxe return (hwset); 4183434Sesaxe } 4193434Sesaxe 4203434Sesaxe /* 4213434Sesaxe * Add a PG to a hwset 4223434Sesaxe */ 4233434Sesaxe static void 4243434Sesaxe pghw_set_add(group_t *hwset, pghw_t *pg) 4253434Sesaxe { 4263434Sesaxe (void) group_add(hwset, pg, GRP_RESIZE); 4273434Sesaxe } 4283434Sesaxe 4293434Sesaxe /* 4303434Sesaxe * Remove a PG from a hwset 4313434Sesaxe */ 4323434Sesaxe static void 4333434Sesaxe pghw_set_remove(group_t *hwset, pghw_t *pg) 4343434Sesaxe { 4353434Sesaxe int result; 4363434Sesaxe 4373434Sesaxe result = group_remove(hwset, pg, GRP_RESIZE); 4383434Sesaxe ASSERT(result == 0); 4393434Sesaxe } 4403434Sesaxe 4413434Sesaxe /* 4423434Sesaxe * Return a string name given a pg_hw sharing type 4433434Sesaxe */ 444*11389SAlexander.Kolbasov@Sun.COM char * 4453434Sesaxe pghw_type_string(pghw_type_t hw) 4463434Sesaxe { 4473434Sesaxe switch (hw) { 4483434Sesaxe case PGHW_IPIPE: 4498906SEric.Saxe@Sun.COM return ("Integer Pipeline"); 4503434Sesaxe case PGHW_CACHE: 4518906SEric.Saxe@Sun.COM return ("Cache"); 4523434Sesaxe case PGHW_FPU: 4538906SEric.Saxe@Sun.COM return ("Floating Point Unit"); 4545079Sjc25722 case PGHW_MPIPE: 4558906SEric.Saxe@Sun.COM return ("Data Pipe to memory"); 4563434Sesaxe case PGHW_CHIP: 4578906SEric.Saxe@Sun.COM return ("Socket"); 4583434Sesaxe case PGHW_MEMORY: 4598906SEric.Saxe@Sun.COM return ("Memory"); 4608906SEric.Saxe@Sun.COM case PGHW_POW_ACTIVE: 4618906SEric.Saxe@Sun.COM return ("CPU PM Active Power Domain"); 4628906SEric.Saxe@Sun.COM case PGHW_POW_IDLE: 4638906SEric.Saxe@Sun.COM return ("CPU PM Idle Power Domain"); 4643434Sesaxe default: 4653434Sesaxe return ("unknown"); 4663434Sesaxe } 4673434Sesaxe } 4683434Sesaxe 4693434Sesaxe /* 470*11389SAlexander.Kolbasov@Sun.COM * Return a short string name given a pg_hw sharing type 471*11389SAlexander.Kolbasov@Sun.COM */ 472*11389SAlexander.Kolbasov@Sun.COM char * 473*11389SAlexander.Kolbasov@Sun.COM pghw_type_shortstring(pghw_type_t hw) 474*11389SAlexander.Kolbasov@Sun.COM { 475*11389SAlexander.Kolbasov@Sun.COM switch (hw) { 476*11389SAlexander.Kolbasov@Sun.COM case PGHW_IPIPE: 477*11389SAlexander.Kolbasov@Sun.COM return ("instr_pipeline"); 478*11389SAlexander.Kolbasov@Sun.COM case PGHW_CACHE: 479*11389SAlexander.Kolbasov@Sun.COM return ("Cache"); 480*11389SAlexander.Kolbasov@Sun.COM case PGHW_FPU: 481*11389SAlexander.Kolbasov@Sun.COM return ("FPU"); 482*11389SAlexander.Kolbasov@Sun.COM case PGHW_MPIPE: 483*11389SAlexander.Kolbasov@Sun.COM return ("memory_pipeline"); 484*11389SAlexander.Kolbasov@Sun.COM case PGHW_CHIP: 485*11389SAlexander.Kolbasov@Sun.COM return ("Socket"); 486*11389SAlexander.Kolbasov@Sun.COM case PGHW_MEMORY: 487*11389SAlexander.Kolbasov@Sun.COM return ("Memory"); 488*11389SAlexander.Kolbasov@Sun.COM case PGHW_POW_ACTIVE: 489*11389SAlexander.Kolbasov@Sun.COM return ("CPU_PM_Active"); 490*11389SAlexander.Kolbasov@Sun.COM case PGHW_POW_IDLE: 491*11389SAlexander.Kolbasov@Sun.COM return ("CPU_PM_Idle"); 492*11389SAlexander.Kolbasov@Sun.COM default: 493*11389SAlexander.Kolbasov@Sun.COM return ("unknown"); 494*11389SAlexander.Kolbasov@Sun.COM } 495*11389SAlexander.Kolbasov@Sun.COM } 496*11389SAlexander.Kolbasov@Sun.COM 497*11389SAlexander.Kolbasov@Sun.COM /* 4983434Sesaxe * Create / Update routines for PG hw kstats 4993434Sesaxe * 5003434Sesaxe * It is the intention of these kstats to provide some level 5013434Sesaxe * of informational / debugging observability into the types 5023434Sesaxe * and nature of the system's detected hardware sharing relationships 5033434Sesaxe */ 5043434Sesaxe void 5053434Sesaxe pghw_kstat_create(pghw_t *pg) 5063434Sesaxe { 507*11389SAlexander.Kolbasov@Sun.COM char *class = pghw_type_string(pg->pghw_hw); 508*11389SAlexander.Kolbasov@Sun.COM 5093434Sesaxe /* 5103434Sesaxe * Create a physical pg kstat 5113434Sesaxe */ 5123434Sesaxe if ((pg->pghw_kstat = kstat_create("pg", ((pg_t *)pg)->pg_id, 513*11389SAlexander.Kolbasov@Sun.COM "pg", "pg", 514*11389SAlexander.Kolbasov@Sun.COM KSTAT_TYPE_NAMED, 5153434Sesaxe sizeof (pghw_kstat) / sizeof (kstat_named_t), 5163434Sesaxe KSTAT_FLAG_VIRTUAL)) != NULL) { 5178906SEric.Saxe@Sun.COM /* Class string, hw string, and policy string */ 5183434Sesaxe pg->pghw_kstat->ks_data_size += PG_CLASS_NAME_MAX; 5198906SEric.Saxe@Sun.COM pg->pghw_kstat->ks_data_size += PGHW_KSTAT_STR_LEN_MAX; 5208906SEric.Saxe@Sun.COM pg->pghw_kstat->ks_data_size += PGHW_KSTAT_STR_LEN_MAX; 5213434Sesaxe pg->pghw_kstat->ks_lock = &pghw_kstat_lock; 5223434Sesaxe pg->pghw_kstat->ks_data = &pghw_kstat; 5233434Sesaxe pg->pghw_kstat->ks_update = pghw_kstat_update; 5243434Sesaxe pg->pghw_kstat->ks_private = pg; 5253434Sesaxe kstat_install(pg->pghw_kstat); 5263434Sesaxe } 527*11389SAlexander.Kolbasov@Sun.COM 528*11389SAlexander.Kolbasov@Sun.COM if (pg_cpulist_maxlen == 0) 529*11389SAlexander.Kolbasov@Sun.COM pg_cpulist_maxlen = CPUSTR_LEN(max_ncpus); 530*11389SAlexander.Kolbasov@Sun.COM 531*11389SAlexander.Kolbasov@Sun.COM /* 532*11389SAlexander.Kolbasov@Sun.COM * Create a physical pg kstat 533*11389SAlexander.Kolbasov@Sun.COM */ 534*11389SAlexander.Kolbasov@Sun.COM if ((pg->pghw_cu_kstat = kstat_create("pg", ((pg_t *)pg)->pg_id, 535*11389SAlexander.Kolbasov@Sun.COM "hardware", class, 536*11389SAlexander.Kolbasov@Sun.COM KSTAT_TYPE_NAMED, 537*11389SAlexander.Kolbasov@Sun.COM sizeof (pghw_cu_kstat) / sizeof (kstat_named_t), 538*11389SAlexander.Kolbasov@Sun.COM KSTAT_FLAG_VIRTUAL)) != NULL) { 539*11389SAlexander.Kolbasov@Sun.COM pg->pghw_cu_kstat->ks_lock = &pghw_kstat_lock; 540*11389SAlexander.Kolbasov@Sun.COM pg->pghw_cu_kstat->ks_data = &pghw_cu_kstat; 541*11389SAlexander.Kolbasov@Sun.COM pg->pghw_cu_kstat->ks_update = pghw_cu_kstat_update; 542*11389SAlexander.Kolbasov@Sun.COM pg->pghw_cu_kstat->ks_private = pg; 543*11389SAlexander.Kolbasov@Sun.COM pg->pghw_cu_kstat->ks_data_size += strlen(class) + 1; 544*11389SAlexander.Kolbasov@Sun.COM /* Allow space for CPU strings */ 545*11389SAlexander.Kolbasov@Sun.COM pg->pghw_cu_kstat->ks_data_size += PGHW_KSTAT_STR_LEN_MAX; 546*11389SAlexander.Kolbasov@Sun.COM pg->pghw_cu_kstat->ks_data_size += pg_cpulist_maxlen; 547*11389SAlexander.Kolbasov@Sun.COM kstat_install(pg->pghw_cu_kstat); 548*11389SAlexander.Kolbasov@Sun.COM } 5493434Sesaxe } 5503434Sesaxe 5513434Sesaxe int 5523434Sesaxe pghw_kstat_update(kstat_t *ksp, int rw) 5533434Sesaxe { 5543434Sesaxe struct pghw_kstat *pgsp = &pghw_kstat; 5553434Sesaxe pghw_t *pg = ksp->ks_private; 5563434Sesaxe 5573434Sesaxe if (rw == KSTAT_WRITE) 5583434Sesaxe return (EACCES); 5593434Sesaxe 560*11389SAlexander.Kolbasov@Sun.COM pgsp->pg_id.value.ui32 = ((pg_t *)pg)->pg_id; 561*11389SAlexander.Kolbasov@Sun.COM pgsp->pg_ncpus.value.ui32 = GROUP_SIZE(&((pg_t *)pg)->pg_cpus); 562*11389SAlexander.Kolbasov@Sun.COM pgsp->pg_instance_id.value.ui32 = pg->pghw_instance; 5633434Sesaxe kstat_named_setstr(&pgsp->pg_class, ((pg_t *)pg)->pg_class->pgc_name); 5643434Sesaxe kstat_named_setstr(&pgsp->pg_hw, pghw_type_string(pg->pghw_hw)); 5658906SEric.Saxe@Sun.COM kstat_named_setstr(&pgsp->pg_policy, pg_policy_name((pg_t *)pg)); 5663434Sesaxe return (0); 5673434Sesaxe } 568*11389SAlexander.Kolbasov@Sun.COM 569*11389SAlexander.Kolbasov@Sun.COM int 570*11389SAlexander.Kolbasov@Sun.COM pghw_cu_kstat_update(kstat_t *ksp, int rw) 571*11389SAlexander.Kolbasov@Sun.COM { 572*11389SAlexander.Kolbasov@Sun.COM struct pghw_cu_kstat *pgsp = &pghw_cu_kstat; 573*11389SAlexander.Kolbasov@Sun.COM pghw_t *pg = ksp->ks_private; 574*11389SAlexander.Kolbasov@Sun.COM pghw_util_t *hw_util = &pg->pghw_stats; 575*11389SAlexander.Kolbasov@Sun.COM 576*11389SAlexander.Kolbasov@Sun.COM if (rw == KSTAT_WRITE) 577*11389SAlexander.Kolbasov@Sun.COM return (EACCES); 578*11389SAlexander.Kolbasov@Sun.COM 579*11389SAlexander.Kolbasov@Sun.COM pgsp->pg_id.value.ui32 = ((pg_t *)pg)->pg_id; 580*11389SAlexander.Kolbasov@Sun.COM pgsp->pg_ncpus.value.ui32 = GROUP_SIZE(&((pg_t *)pg)->pg_cpus); 581*11389SAlexander.Kolbasov@Sun.COM 582*11389SAlexander.Kolbasov@Sun.COM /* 583*11389SAlexander.Kolbasov@Sun.COM * Allocate memory for the string representing the list of CPUs in PG. 584*11389SAlexander.Kolbasov@Sun.COM * This memory should persist past the call to pghw_cu_kstat_update() 585*11389SAlexander.Kolbasov@Sun.COM * since the kstat snapshot routine will reference this memory. 586*11389SAlexander.Kolbasov@Sun.COM */ 587*11389SAlexander.Kolbasov@Sun.COM pghw_cpulist_alloc(pg); 588*11389SAlexander.Kolbasov@Sun.COM 589*11389SAlexander.Kolbasov@Sun.COM if (pg->pghw_kstat_gen != pg->pghw_generation) { 590*11389SAlexander.Kolbasov@Sun.COM /* 591*11389SAlexander.Kolbasov@Sun.COM * PG kstat generation number is out of sync with PG's 592*11389SAlexander.Kolbasov@Sun.COM * generation mumber. It means that some CPUs could have joined 593*11389SAlexander.Kolbasov@Sun.COM * or left PG and it is not possible to compare the numbers 594*11389SAlexander.Kolbasov@Sun.COM * obtained before and after the generation change. 595*11389SAlexander.Kolbasov@Sun.COM * 596*11389SAlexander.Kolbasov@Sun.COM * Reset the maximum utilization rate and start computing it 597*11389SAlexander.Kolbasov@Sun.COM * from scratch. 598*11389SAlexander.Kolbasov@Sun.COM */ 599*11389SAlexander.Kolbasov@Sun.COM hw_util->pghw_util = 0; 600*11389SAlexander.Kolbasov@Sun.COM hw_util->pghw_rate_max = 0; 601*11389SAlexander.Kolbasov@Sun.COM pg->pghw_kstat_gen = pg->pghw_generation; 602*11389SAlexander.Kolbasov@Sun.COM } 603*11389SAlexander.Kolbasov@Sun.COM 604*11389SAlexander.Kolbasov@Sun.COM /* 605*11389SAlexander.Kolbasov@Sun.COM * We can't block on CPU lock because when PG is destroyed (under 606*11389SAlexander.Kolbasov@Sun.COM * cpu_lock) it tries to delete this kstat and it will wait for us to 607*11389SAlexander.Kolbasov@Sun.COM * complete which will never happen since we are waiting for cpu_lock to 608*11389SAlexander.Kolbasov@Sun.COM * drop. Deadlocks are fun! 609*11389SAlexander.Kolbasov@Sun.COM */ 610*11389SAlexander.Kolbasov@Sun.COM if (mutex_tryenter(&cpu_lock)) { 611*11389SAlexander.Kolbasov@Sun.COM if (pg->pghw_cpulist != NULL && 612*11389SAlexander.Kolbasov@Sun.COM *(pg->pghw_cpulist) == '\0') { 613*11389SAlexander.Kolbasov@Sun.COM (void) group2intlist(&(((pg_t *)pg)->pg_cpus), 614*11389SAlexander.Kolbasov@Sun.COM pg->pghw_cpulist, pg->pghw_cpulist_len, cpu2id); 615*11389SAlexander.Kolbasov@Sun.COM } 616*11389SAlexander.Kolbasov@Sun.COM cu_pg_update(pg); 617*11389SAlexander.Kolbasov@Sun.COM mutex_exit(&cpu_lock); 618*11389SAlexander.Kolbasov@Sun.COM } 619*11389SAlexander.Kolbasov@Sun.COM 620*11389SAlexander.Kolbasov@Sun.COM pgsp->pg_generation.value.ui32 = pg->pghw_kstat_gen; 621*11389SAlexander.Kolbasov@Sun.COM pgsp->pg_hw_util.value.ui64 = hw_util->pghw_util; 622*11389SAlexander.Kolbasov@Sun.COM pgsp->pg_hw_util_time_running.value.ui64 = hw_util->pghw_time_running; 623*11389SAlexander.Kolbasov@Sun.COM pgsp->pg_hw_util_time_stopped.value.ui64 = hw_util->pghw_time_stopped; 624*11389SAlexander.Kolbasov@Sun.COM pgsp->pg_hw_util_rate.value.ui64 = hw_util->pghw_rate; 625*11389SAlexander.Kolbasov@Sun.COM pgsp->pg_hw_util_rate_max.value.ui64 = hw_util->pghw_rate_max; 626*11389SAlexander.Kolbasov@Sun.COM if (pg->pghw_cpulist != NULL) 627*11389SAlexander.Kolbasov@Sun.COM kstat_named_setstr(&pgsp->pg_cpus, pg->pghw_cpulist); 628*11389SAlexander.Kolbasov@Sun.COM else 629*11389SAlexander.Kolbasov@Sun.COM kstat_named_setstr(&pgsp->pg_cpus, ""); 630*11389SAlexander.Kolbasov@Sun.COM 631*11389SAlexander.Kolbasov@Sun.COM kstat_named_setstr(&pgsp->pg_sharing, pghw_type_string(pg->pghw_hw)); 632*11389SAlexander.Kolbasov@Sun.COM 633*11389SAlexander.Kolbasov@Sun.COM return (0); 634*11389SAlexander.Kolbasov@Sun.COM } 635*11389SAlexander.Kolbasov@Sun.COM 636*11389SAlexander.Kolbasov@Sun.COM /* 637*11389SAlexander.Kolbasov@Sun.COM * Update the string representation of CPUs in PG (pg->pghw_cpulist). 638*11389SAlexander.Kolbasov@Sun.COM * The string representation is used for kstats. 639*11389SAlexander.Kolbasov@Sun.COM * 640*11389SAlexander.Kolbasov@Sun.COM * The string is allocated if it has not already been or if it is already 641*11389SAlexander.Kolbasov@Sun.COM * allocated and PG has more CPUs now. If PG has smaller or equal number of 642*11389SAlexander.Kolbasov@Sun.COM * CPUs, but the actual CPUs may have changed, the string is reset to the empty 643*11389SAlexander.Kolbasov@Sun.COM * string causes the string representation to be recreated. The pghw_generation 644*11389SAlexander.Kolbasov@Sun.COM * field is used to detect whether CPUs within the pg may have changed. 645*11389SAlexander.Kolbasov@Sun.COM */ 646*11389SAlexander.Kolbasov@Sun.COM static void 647*11389SAlexander.Kolbasov@Sun.COM pghw_cpulist_alloc(pghw_t *pg) 648*11389SAlexander.Kolbasov@Sun.COM { 649*11389SAlexander.Kolbasov@Sun.COM uint_t ncpus = GROUP_SIZE(&((pg_t *)pg)->pg_cpus); 650*11389SAlexander.Kolbasov@Sun.COM size_t len = CPUSTR_LEN(ncpus); 651*11389SAlexander.Kolbasov@Sun.COM 652*11389SAlexander.Kolbasov@Sun.COM /* 653*11389SAlexander.Kolbasov@Sun.COM * If the pghw_cpulist string is already allocated we need to make sure 654*11389SAlexander.Kolbasov@Sun.COM * that it has sufficient length. Also if the set of CPUs may have 655*11389SAlexander.Kolbasov@Sun.COM * changed, we need to re-generate the string. 656*11389SAlexander.Kolbasov@Sun.COM */ 657*11389SAlexander.Kolbasov@Sun.COM if (pg->pghw_cpulist != NULL && 658*11389SAlexander.Kolbasov@Sun.COM pg->pghw_kstat_gen != pg->pghw_generation) { 659*11389SAlexander.Kolbasov@Sun.COM if (len <= pg->pghw_cpulist_len) { 660*11389SAlexander.Kolbasov@Sun.COM /* 661*11389SAlexander.Kolbasov@Sun.COM * There is sufficient space in the pghw_cpulist for 662*11389SAlexander.Kolbasov@Sun.COM * the new set of CPUs. Just clear the string to trigger 663*11389SAlexander.Kolbasov@Sun.COM * re-generation of list of CPUs 664*11389SAlexander.Kolbasov@Sun.COM */ 665*11389SAlexander.Kolbasov@Sun.COM *(pg->pghw_cpulist) = '\0'; 666*11389SAlexander.Kolbasov@Sun.COM } else { 667*11389SAlexander.Kolbasov@Sun.COM /* 668*11389SAlexander.Kolbasov@Sun.COM * There is, potentially, insufficient space in 669*11389SAlexander.Kolbasov@Sun.COM * pghw_cpulist, so reallocate the string. 670*11389SAlexander.Kolbasov@Sun.COM */ 671*11389SAlexander.Kolbasov@Sun.COM ASSERT(strlen(pg->pghw_cpulist) < pg->pghw_cpulist_len); 672*11389SAlexander.Kolbasov@Sun.COM kmem_free(pg->pghw_cpulist, pg->pghw_cpulist_len); 673*11389SAlexander.Kolbasov@Sun.COM pg->pghw_cpulist = NULL; 674*11389SAlexander.Kolbasov@Sun.COM pg->pghw_cpulist_len = 0; 675*11389SAlexander.Kolbasov@Sun.COM } 676*11389SAlexander.Kolbasov@Sun.COM } 677*11389SAlexander.Kolbasov@Sun.COM 678*11389SAlexander.Kolbasov@Sun.COM if (pg->pghw_cpulist == NULL) { 679*11389SAlexander.Kolbasov@Sun.COM /* 680*11389SAlexander.Kolbasov@Sun.COM * Allocate space to hold cpulist. 681*11389SAlexander.Kolbasov@Sun.COM * 682*11389SAlexander.Kolbasov@Sun.COM * Length can not be bigger that the maximum space we have 683*11389SAlexander.Kolbasov@Sun.COM * allowed for the kstat buffer 684*11389SAlexander.Kolbasov@Sun.COM */ 685*11389SAlexander.Kolbasov@Sun.COM if (len > pg_cpulist_maxlen) 686*11389SAlexander.Kolbasov@Sun.COM len = pg_cpulist_maxlen; 687*11389SAlexander.Kolbasov@Sun.COM if (len > 0) { 688*11389SAlexander.Kolbasov@Sun.COM pg->pghw_cpulist = kmem_zalloc(len, KM_NOSLEEP); 689*11389SAlexander.Kolbasov@Sun.COM if (pg->pghw_cpulist != NULL) 690*11389SAlexander.Kolbasov@Sun.COM pg->pghw_cpulist_len = len; 691*11389SAlexander.Kolbasov@Sun.COM } 692*11389SAlexander.Kolbasov@Sun.COM } 693*11389SAlexander.Kolbasov@Sun.COM } 694*11389SAlexander.Kolbasov@Sun.COM 695*11389SAlexander.Kolbasov@Sun.COM static int 696*11389SAlexander.Kolbasov@Sun.COM cpu2id(void *v) 697*11389SAlexander.Kolbasov@Sun.COM { 698*11389SAlexander.Kolbasov@Sun.COM cpu_t *cp = (cpu_t *)v; 699*11389SAlexander.Kolbasov@Sun.COM 700*11389SAlexander.Kolbasov@Sun.COM ASSERT(v != NULL); 701*11389SAlexander.Kolbasov@Sun.COM 702*11389SAlexander.Kolbasov@Sun.COM return (cp->cpu_id); 703*11389SAlexander.Kolbasov@Sun.COM } 704