1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gate #include <sys/types.h> 30*0Sstevel@tonic-gate #include <sys/systm.h> 31*0Sstevel@tonic-gate #include <sys/cmn_err.h> 32*0Sstevel@tonic-gate #include <sys/cpuvar.h> 33*0Sstevel@tonic-gate #include <sys/thread.h> 34*0Sstevel@tonic-gate #include <sys/disp.h> 35*0Sstevel@tonic-gate #include <sys/kmem.h> 36*0Sstevel@tonic-gate #include <sys/debug.h> 37*0Sstevel@tonic-gate #include <sys/cpupart.h> 38*0Sstevel@tonic-gate #include <sys/pset.h> 39*0Sstevel@tonic-gate #include <sys/var.h> 40*0Sstevel@tonic-gate #include <sys/cyclic.h> 41*0Sstevel@tonic-gate #include <sys/lgrp.h> 42*0Sstevel@tonic-gate #include <sys/chip.h> 43*0Sstevel@tonic-gate #include <sys/loadavg.h> 44*0Sstevel@tonic-gate #include <sys/class.h> 45*0Sstevel@tonic-gate #include <sys/fss.h> 46*0Sstevel@tonic-gate #include <sys/pool.h> 47*0Sstevel@tonic-gate #include <sys/pool_pset.h> 48*0Sstevel@tonic-gate #include <sys/policy.h> 49*0Sstevel@tonic-gate 50*0Sstevel@tonic-gate /* 51*0Sstevel@tonic-gate * Calling pool_lock() protects the pools configuration, which includes 52*0Sstevel@tonic-gate * CPU partitions. cpu_lock protects the CPU partition list, and prevents 53*0Sstevel@tonic-gate * partitions from being created or destroyed while the lock is held. 54*0Sstevel@tonic-gate * The lock ordering with respect to related locks is: 55*0Sstevel@tonic-gate * 56*0Sstevel@tonic-gate * pool_lock() ---> cpu_lock ---> pidlock --> p_lock 57*0Sstevel@tonic-gate * 58*0Sstevel@tonic-gate * Blocking memory allocations may be made while holding "pool_lock" 59*0Sstevel@tonic-gate * or cpu_lock. 60*0Sstevel@tonic-gate */ 61*0Sstevel@tonic-gate 62*0Sstevel@tonic-gate /* 63*0Sstevel@tonic-gate * The cp_default partition is allocated statically, but its lgroup load average 64*0Sstevel@tonic-gate * (lpl) list is allocated dynamically after kmem subsystem is initialized. This 65*0Sstevel@tonic-gate * saves some memory since the space allocated reflects the actual number of 66*0Sstevel@tonic-gate * lgroups supported by the platform. The lgrp facility provides a temporary 67*0Sstevel@tonic-gate * space to hold lpl information during system bootstrap. 68*0Sstevel@tonic-gate */ 69*0Sstevel@tonic-gate 70*0Sstevel@tonic-gate cpupart_t *cp_list_head; 71*0Sstevel@tonic-gate cpupart_t cp_default; 72*0Sstevel@tonic-gate static cpupartid_t cp_id_next; 73*0Sstevel@tonic-gate uint_t cp_numparts; 74*0Sstevel@tonic-gate uint_t cp_numparts_nonempty; 75*0Sstevel@tonic-gate 76*0Sstevel@tonic-gate /* 77*0Sstevel@tonic-gate * Need to limit total number of partitions to avoid slowing down the 78*0Sstevel@tonic-gate * clock code too much. The clock code traverses the list of 79*0Sstevel@tonic-gate * partitions and needs to be able to execute in a reasonable amount 80*0Sstevel@tonic-gate * of time (less than 1/hz seconds). The maximum is sized based on 81*0Sstevel@tonic-gate * max_ncpus so it shouldn't be a problem unless there are large 82*0Sstevel@tonic-gate * numbers of empty partitions. 83*0Sstevel@tonic-gate */ 84*0Sstevel@tonic-gate static uint_t cp_max_numparts; 85*0Sstevel@tonic-gate 86*0Sstevel@tonic-gate /* 87*0Sstevel@tonic-gate * Processor sets and CPU partitions are different but related concepts. 88*0Sstevel@tonic-gate * A processor set is a user-level abstraction allowing users to create 89*0Sstevel@tonic-gate * sets of CPUs and bind threads exclusively to those sets. A CPU 90*0Sstevel@tonic-gate * partition is a kernel dispatcher object consisting of a set of CPUs 91*0Sstevel@tonic-gate * and a global dispatch queue. The processor set abstraction is 92*0Sstevel@tonic-gate * implemented via a CPU partition, and currently there is a 1-1 93*0Sstevel@tonic-gate * mapping between processor sets and partitions (excluding the default 94*0Sstevel@tonic-gate * partition, which is not visible as a processor set). Hence, the 95*0Sstevel@tonic-gate * numbering for processor sets and CPU partitions is identical. This 96*0Sstevel@tonic-gate * may not always be true in the future, and these macros could become 97*0Sstevel@tonic-gate * less trivial if we support e.g. a processor set containing multiple 98*0Sstevel@tonic-gate * CPU partitions. 99*0Sstevel@tonic-gate */ 100*0Sstevel@tonic-gate #define PSTOCP(psid) ((cpupartid_t)((psid) == PS_NONE ? CP_DEFAULT : (psid))) 101*0Sstevel@tonic-gate #define CPTOPS(cpid) ((psetid_t)((cpid) == CP_DEFAULT ? PS_NONE : (cpid))) 102*0Sstevel@tonic-gate 103*0Sstevel@tonic-gate 104*0Sstevel@tonic-gate /* 105*0Sstevel@tonic-gate * Find a CPU partition given a processor set ID. 106*0Sstevel@tonic-gate */ 107*0Sstevel@tonic-gate static cpupart_t * 108*0Sstevel@tonic-gate cpupart_find_all(psetid_t psid) 109*0Sstevel@tonic-gate { 110*0Sstevel@tonic-gate cpupart_t *cp; 111*0Sstevel@tonic-gate cpupartid_t cpid = PSTOCP(psid); 112*0Sstevel@tonic-gate 113*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 114*0Sstevel@tonic-gate 115*0Sstevel@tonic-gate /* default partition not visible as a processor set */ 116*0Sstevel@tonic-gate if (psid == CP_DEFAULT) 117*0Sstevel@tonic-gate return (NULL); 118*0Sstevel@tonic-gate 119*0Sstevel@tonic-gate if (psid == PS_MYID) 120*0Sstevel@tonic-gate return (curthread->t_cpupart); 121*0Sstevel@tonic-gate 122*0Sstevel@tonic-gate cp = cp_list_head; 123*0Sstevel@tonic-gate do { 124*0Sstevel@tonic-gate if (cp->cp_id == cpid) 125*0Sstevel@tonic-gate return (cp); 126*0Sstevel@tonic-gate cp = cp->cp_next; 127*0Sstevel@tonic-gate } while (cp != cp_list_head); 128*0Sstevel@tonic-gate return (NULL); 129*0Sstevel@tonic-gate } 130*0Sstevel@tonic-gate 131*0Sstevel@tonic-gate /* 132*0Sstevel@tonic-gate * Find a CPU partition given a processor set ID if the processor set 133*0Sstevel@tonic-gate * should be visible from the calling zone. 134*0Sstevel@tonic-gate */ 135*0Sstevel@tonic-gate cpupart_t * 136*0Sstevel@tonic-gate cpupart_find(psetid_t psid) 137*0Sstevel@tonic-gate { 138*0Sstevel@tonic-gate cpupart_t *cp; 139*0Sstevel@tonic-gate 140*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 141*0Sstevel@tonic-gate cp = cpupart_find_all(psid); 142*0Sstevel@tonic-gate if (cp != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() && 143*0Sstevel@tonic-gate zone_pset_get(curproc->p_zone) != CPTOPS(cp->cp_id)) 144*0Sstevel@tonic-gate return (NULL); 145*0Sstevel@tonic-gate return (cp); 146*0Sstevel@tonic-gate } 147*0Sstevel@tonic-gate 148*0Sstevel@tonic-gate static int 149*0Sstevel@tonic-gate cpupart_kstat_update(kstat_t *ksp, int rw) 150*0Sstevel@tonic-gate { 151*0Sstevel@tonic-gate cpupart_t *cp = (cpupart_t *)ksp->ks_private; 152*0Sstevel@tonic-gate cpupart_kstat_t *cpksp = ksp->ks_data; 153*0Sstevel@tonic-gate 154*0Sstevel@tonic-gate if (rw == KSTAT_WRITE) 155*0Sstevel@tonic-gate return (EACCES); 156*0Sstevel@tonic-gate 157*0Sstevel@tonic-gate cpksp->cpk_updates.value.ui64 = cp->cp_updates; 158*0Sstevel@tonic-gate cpksp->cpk_runnable.value.ui64 = cp->cp_nrunnable_cum; 159*0Sstevel@tonic-gate cpksp->cpk_waiting.value.ui64 = cp->cp_nwaiting_cum; 160*0Sstevel@tonic-gate cpksp->cpk_ncpus.value.ui32 = cp->cp_ncpus; 161*0Sstevel@tonic-gate cpksp->cpk_avenrun_1min.value.ui32 = cp->cp_hp_avenrun[0] >> 162*0Sstevel@tonic-gate (16 - FSHIFT); 163*0Sstevel@tonic-gate cpksp->cpk_avenrun_5min.value.ui32 = cp->cp_hp_avenrun[1] >> 164*0Sstevel@tonic-gate (16 - FSHIFT); 165*0Sstevel@tonic-gate cpksp->cpk_avenrun_15min.value.ui32 = cp->cp_hp_avenrun[2] >> 166*0Sstevel@tonic-gate (16 - FSHIFT); 167*0Sstevel@tonic-gate return (0); 168*0Sstevel@tonic-gate } 169*0Sstevel@tonic-gate 170*0Sstevel@tonic-gate static void 171*0Sstevel@tonic-gate cpupart_kstat_create(cpupart_t *cp) 172*0Sstevel@tonic-gate { 173*0Sstevel@tonic-gate kstat_t *ksp; 174*0Sstevel@tonic-gate zoneid_t zoneid; 175*0Sstevel@tonic-gate 176*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 177*0Sstevel@tonic-gate 178*0Sstevel@tonic-gate /* 179*0Sstevel@tonic-gate * We have a bit of a chicken-egg problem since this code will 180*0Sstevel@tonic-gate * get called to create the kstats for CP_DEFAULT before the 181*0Sstevel@tonic-gate * pools framework gets initialized. We circumvent the problem 182*0Sstevel@tonic-gate * by special-casing cp_default. 183*0Sstevel@tonic-gate */ 184*0Sstevel@tonic-gate if (cp != &cp_default && pool_pset_enabled()) 185*0Sstevel@tonic-gate zoneid = GLOBAL_ZONEID; 186*0Sstevel@tonic-gate else 187*0Sstevel@tonic-gate zoneid = ALL_ZONES; 188*0Sstevel@tonic-gate ksp = kstat_create_zone("unix", cp->cp_id, "pset", "misc", 189*0Sstevel@tonic-gate KSTAT_TYPE_NAMED, 190*0Sstevel@tonic-gate sizeof (cpupart_kstat_t) / sizeof (kstat_named_t), 0, zoneid); 191*0Sstevel@tonic-gate if (ksp != NULL) { 192*0Sstevel@tonic-gate cpupart_kstat_t *cpksp = ksp->ks_data; 193*0Sstevel@tonic-gate 194*0Sstevel@tonic-gate kstat_named_init(&cpksp->cpk_updates, "updates", 195*0Sstevel@tonic-gate KSTAT_DATA_UINT64); 196*0Sstevel@tonic-gate kstat_named_init(&cpksp->cpk_runnable, "runnable", 197*0Sstevel@tonic-gate KSTAT_DATA_UINT64); 198*0Sstevel@tonic-gate kstat_named_init(&cpksp->cpk_waiting, "waiting", 199*0Sstevel@tonic-gate KSTAT_DATA_UINT64); 200*0Sstevel@tonic-gate kstat_named_init(&cpksp->cpk_ncpus, "ncpus", 201*0Sstevel@tonic-gate KSTAT_DATA_UINT32); 202*0Sstevel@tonic-gate kstat_named_init(&cpksp->cpk_avenrun_1min, "avenrun_1min", 203*0Sstevel@tonic-gate KSTAT_DATA_UINT32); 204*0Sstevel@tonic-gate kstat_named_init(&cpksp->cpk_avenrun_5min, "avenrun_5min", 205*0Sstevel@tonic-gate KSTAT_DATA_UINT32); 206*0Sstevel@tonic-gate kstat_named_init(&cpksp->cpk_avenrun_15min, "avenrun_15min", 207*0Sstevel@tonic-gate KSTAT_DATA_UINT32); 208*0Sstevel@tonic-gate 209*0Sstevel@tonic-gate ksp->ks_update = cpupart_kstat_update; 210*0Sstevel@tonic-gate ksp->ks_private = cp; 211*0Sstevel@tonic-gate 212*0Sstevel@tonic-gate kstat_install(ksp); 213*0Sstevel@tonic-gate } 214*0Sstevel@tonic-gate cp->cp_kstat = ksp; 215*0Sstevel@tonic-gate } 216*0Sstevel@tonic-gate 217*0Sstevel@tonic-gate /* 218*0Sstevel@tonic-gate * Initialize the default partition and kpreempt disp queue. 219*0Sstevel@tonic-gate */ 220*0Sstevel@tonic-gate void 221*0Sstevel@tonic-gate cpupart_initialize_default(void) 222*0Sstevel@tonic-gate { 223*0Sstevel@tonic-gate lgrp_id_t i; 224*0Sstevel@tonic-gate 225*0Sstevel@tonic-gate cp_list_head = &cp_default; 226*0Sstevel@tonic-gate cp_default.cp_next = &cp_default; 227*0Sstevel@tonic-gate cp_default.cp_prev = &cp_default; 228*0Sstevel@tonic-gate cp_default.cp_id = CP_DEFAULT; 229*0Sstevel@tonic-gate cp_default.cp_kp_queue.disp_maxrunpri = -1; 230*0Sstevel@tonic-gate cp_default.cp_kp_queue.disp_max_unbound_pri = -1; 231*0Sstevel@tonic-gate cp_default.cp_kp_queue.disp_cpu = NULL; 232*0Sstevel@tonic-gate cp_default.cp_gen = 0; 233*0Sstevel@tonic-gate cp_default.cp_loadavg.lg_cur = 0; 234*0Sstevel@tonic-gate cp_default.cp_loadavg.lg_len = 0; 235*0Sstevel@tonic-gate cp_default.cp_loadavg.lg_total = 0; 236*0Sstevel@tonic-gate for (i = 0; i < S_LOADAVG_SZ; i++) { 237*0Sstevel@tonic-gate cp_default.cp_loadavg.lg_loads[i] = 0; 238*0Sstevel@tonic-gate } 239*0Sstevel@tonic-gate CPUSET_ZERO(cp_default.cp_haltset); 240*0Sstevel@tonic-gate DISP_LOCK_INIT(&cp_default.cp_kp_queue.disp_lock); 241*0Sstevel@tonic-gate cp_id_next = CP_DEFAULT + 1; 242*0Sstevel@tonic-gate cpupart_kstat_create(&cp_default); 243*0Sstevel@tonic-gate cp_numparts = 1; 244*0Sstevel@tonic-gate if (cp_max_numparts == 0) /* allow for /etc/system tuning */ 245*0Sstevel@tonic-gate cp_max_numparts = max_ncpus * 2 + 1; 246*0Sstevel@tonic-gate /* 247*0Sstevel@tonic-gate * Allocate space for cp_default list of lgrploads 248*0Sstevel@tonic-gate */ 249*0Sstevel@tonic-gate cp_default.cp_nlgrploads = lgrp_plat_max_lgrps(); 250*0Sstevel@tonic-gate cp_default.cp_lgrploads = kmem_zalloc(sizeof (lpl_t) * 251*0Sstevel@tonic-gate cp_default.cp_nlgrploads, KM_SLEEP); 252*0Sstevel@tonic-gate 253*0Sstevel@tonic-gate /* 254*0Sstevel@tonic-gate * The initial lpl topology is created in a special lpl list 255*0Sstevel@tonic-gate * lpl_bootstrap. It should be copied to cp_default. 256*0Sstevel@tonic-gate * NOTE: lpl_topo_bootstrap() also updates CPU0 cpu_lpl pointer to point 257*0Sstevel@tonic-gate * to the correct lpl in the cp_default.cp_lgrploads list. 258*0Sstevel@tonic-gate */ 259*0Sstevel@tonic-gate lpl_topo_bootstrap(cp_default.cp_lgrploads, 260*0Sstevel@tonic-gate cp_default.cp_nlgrploads); 261*0Sstevel@tonic-gate 262*0Sstevel@tonic-gate for (i = 0; i < cp_default.cp_nlgrploads; i++) { 263*0Sstevel@tonic-gate cp_default.cp_lgrploads[i].lpl_lgrpid = i; 264*0Sstevel@tonic-gate } 265*0Sstevel@tonic-gate cp_default.cp_attr = PSET_NOESCAPE; 266*0Sstevel@tonic-gate cp_numparts_nonempty = 1; 267*0Sstevel@tonic-gate /* 268*0Sstevel@tonic-gate * Set t0's home 269*0Sstevel@tonic-gate */ 270*0Sstevel@tonic-gate t0.t_lpl = &cp_default.cp_lgrploads[LGRP_ROOTID]; 271*0Sstevel@tonic-gate } 272*0Sstevel@tonic-gate 273*0Sstevel@tonic-gate 274*0Sstevel@tonic-gate static int 275*0Sstevel@tonic-gate cpupart_move_cpu(cpu_t *cp, cpupart_t *newpp, int forced) 276*0Sstevel@tonic-gate { 277*0Sstevel@tonic-gate cpupart_t *oldpp; 278*0Sstevel@tonic-gate cpu_t *ncp, *newlist; 279*0Sstevel@tonic-gate kthread_t *t; 280*0Sstevel@tonic-gate int move_threads = 1; 281*0Sstevel@tonic-gate lgrp_id_t lgrpid; 282*0Sstevel@tonic-gate proc_t *p; 283*0Sstevel@tonic-gate int lgrp_diff_lpl; 284*0Sstevel@tonic-gate lpl_t *cpu_lpl; 285*0Sstevel@tonic-gate int ret; 286*0Sstevel@tonic-gate 287*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 288*0Sstevel@tonic-gate ASSERT(newpp != NULL); 289*0Sstevel@tonic-gate 290*0Sstevel@tonic-gate oldpp = cp->cpu_part; 291*0Sstevel@tonic-gate ASSERT(oldpp != NULL); 292*0Sstevel@tonic-gate ASSERT(oldpp->cp_ncpus > 0); 293*0Sstevel@tonic-gate 294*0Sstevel@tonic-gate if (newpp == oldpp) { 295*0Sstevel@tonic-gate /* 296*0Sstevel@tonic-gate * Don't need to do anything. 297*0Sstevel@tonic-gate */ 298*0Sstevel@tonic-gate return (0); 299*0Sstevel@tonic-gate } 300*0Sstevel@tonic-gate 301*0Sstevel@tonic-gate cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_OUT); 302*0Sstevel@tonic-gate 303*0Sstevel@tonic-gate if (!disp_bound_partition(cp, 0)) { 304*0Sstevel@tonic-gate /* 305*0Sstevel@tonic-gate * Don't need to move threads if there are no threads in 306*0Sstevel@tonic-gate * the partition. Note that threads can't enter the 307*0Sstevel@tonic-gate * partition while we're holding cpu_lock. 308*0Sstevel@tonic-gate */ 309*0Sstevel@tonic-gate move_threads = 0; 310*0Sstevel@tonic-gate } else if (oldpp->cp_ncpus == 1) { 311*0Sstevel@tonic-gate cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN); 312*0Sstevel@tonic-gate return (EBUSY); 313*0Sstevel@tonic-gate } 314*0Sstevel@tonic-gate 315*0Sstevel@tonic-gate if (forced && (ret = cpu_unbind(cp->cpu_id)) != 0) { 316*0Sstevel@tonic-gate cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN); 317*0Sstevel@tonic-gate return (ret); 318*0Sstevel@tonic-gate } 319*0Sstevel@tonic-gate 320*0Sstevel@tonic-gate /* 321*0Sstevel@tonic-gate * Stop further threads weak binding to this cpu. 322*0Sstevel@tonic-gate */ 323*0Sstevel@tonic-gate cpu_inmotion = cp; 324*0Sstevel@tonic-gate membar_enter(); 325*0Sstevel@tonic-gate 326*0Sstevel@tonic-gate again: 327*0Sstevel@tonic-gate if (move_threads) { 328*0Sstevel@tonic-gate int loop_count; 329*0Sstevel@tonic-gate /* 330*0Sstevel@tonic-gate * Check for threads strong or weak bound to this CPU. 331*0Sstevel@tonic-gate */ 332*0Sstevel@tonic-gate for (loop_count = 0; disp_bound_threads(cp, 0); loop_count++) { 333*0Sstevel@tonic-gate if (loop_count >= 5) { 334*0Sstevel@tonic-gate cpu_state_change_notify(cp->cpu_id, 335*0Sstevel@tonic-gate CPU_CPUPART_IN); 336*0Sstevel@tonic-gate cpu_inmotion = NULL; 337*0Sstevel@tonic-gate return (EBUSY); /* some threads still bound */ 338*0Sstevel@tonic-gate } 339*0Sstevel@tonic-gate delay(1); 340*0Sstevel@tonic-gate } 341*0Sstevel@tonic-gate } 342*0Sstevel@tonic-gate 343*0Sstevel@tonic-gate /* 344*0Sstevel@tonic-gate * Before we actually start changing data structures, notify 345*0Sstevel@tonic-gate * the cyclic subsystem that we want to move this CPU out of its 346*0Sstevel@tonic-gate * partition. 347*0Sstevel@tonic-gate */ 348*0Sstevel@tonic-gate if (!cyclic_move_out(cp)) { 349*0Sstevel@tonic-gate /* 350*0Sstevel@tonic-gate * This CPU must be the last CPU in a processor set with 351*0Sstevel@tonic-gate * a bound cyclic. 352*0Sstevel@tonic-gate */ 353*0Sstevel@tonic-gate cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN); 354*0Sstevel@tonic-gate cpu_inmotion = NULL; 355*0Sstevel@tonic-gate return (EBUSY); 356*0Sstevel@tonic-gate } 357*0Sstevel@tonic-gate 358*0Sstevel@tonic-gate pause_cpus(cp); 359*0Sstevel@tonic-gate 360*0Sstevel@tonic-gate if (move_threads) { 361*0Sstevel@tonic-gate /* 362*0Sstevel@tonic-gate * The thread on cpu before the pause thread may have read 363*0Sstevel@tonic-gate * cpu_inmotion before we raised the barrier above. Check 364*0Sstevel@tonic-gate * again. 365*0Sstevel@tonic-gate */ 366*0Sstevel@tonic-gate if (disp_bound_threads(cp, 1)) { 367*0Sstevel@tonic-gate start_cpus(); 368*0Sstevel@tonic-gate goto again; 369*0Sstevel@tonic-gate } 370*0Sstevel@tonic-gate 371*0Sstevel@tonic-gate } 372*0Sstevel@tonic-gate 373*0Sstevel@tonic-gate /* 374*0Sstevel@tonic-gate * Update the set of chip's being spanned 375*0Sstevel@tonic-gate */ 376*0Sstevel@tonic-gate chip_cpu_move_part(cp, oldpp, newpp); 377*0Sstevel@tonic-gate 378*0Sstevel@tonic-gate /* save this cpu's lgroup -- it'll be the same in the new partition */ 379*0Sstevel@tonic-gate lgrpid = cp->cpu_lpl->lpl_lgrpid; 380*0Sstevel@tonic-gate 381*0Sstevel@tonic-gate cpu_lpl = cp->cpu_lpl; 382*0Sstevel@tonic-gate /* 383*0Sstevel@tonic-gate * let the lgroup framework know cp has left the partition 384*0Sstevel@tonic-gate */ 385*0Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_CPUPART_DEL, (uintptr_t)cp, lgrpid); 386*0Sstevel@tonic-gate 387*0Sstevel@tonic-gate /* move out of old partition */ 388*0Sstevel@tonic-gate oldpp->cp_ncpus--; 389*0Sstevel@tonic-gate if (oldpp->cp_ncpus > 0) { 390*0Sstevel@tonic-gate 391*0Sstevel@tonic-gate ncp = cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part; 392*0Sstevel@tonic-gate cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part; 393*0Sstevel@tonic-gate if (oldpp->cp_cpulist == cp) { 394*0Sstevel@tonic-gate oldpp->cp_cpulist = ncp; 395*0Sstevel@tonic-gate } 396*0Sstevel@tonic-gate } else { 397*0Sstevel@tonic-gate ncp = oldpp->cp_cpulist = NULL; 398*0Sstevel@tonic-gate cp_numparts_nonempty--; 399*0Sstevel@tonic-gate ASSERT(cp_numparts_nonempty != 0); 400*0Sstevel@tonic-gate } 401*0Sstevel@tonic-gate oldpp->cp_gen++; 402*0Sstevel@tonic-gate 403*0Sstevel@tonic-gate /* move into new partition */ 404*0Sstevel@tonic-gate newlist = newpp->cp_cpulist; 405*0Sstevel@tonic-gate if (newlist == NULL) { 406*0Sstevel@tonic-gate newpp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp; 407*0Sstevel@tonic-gate cp_numparts_nonempty++; 408*0Sstevel@tonic-gate ASSERT(cp_numparts_nonempty != 0); 409*0Sstevel@tonic-gate } else { 410*0Sstevel@tonic-gate cp->cpu_next_part = newlist; 411*0Sstevel@tonic-gate cp->cpu_prev_part = newlist->cpu_prev_part; 412*0Sstevel@tonic-gate newlist->cpu_prev_part->cpu_next_part = cp; 413*0Sstevel@tonic-gate newlist->cpu_prev_part = cp; 414*0Sstevel@tonic-gate } 415*0Sstevel@tonic-gate cp->cpu_part = newpp; 416*0Sstevel@tonic-gate newpp->cp_ncpus++; 417*0Sstevel@tonic-gate newpp->cp_gen++; 418*0Sstevel@tonic-gate 419*0Sstevel@tonic-gate ASSERT(CPUSET_ISNULL(newpp->cp_haltset)); 420*0Sstevel@tonic-gate ASSERT(CPUSET_ISNULL(oldpp->cp_haltset)); 421*0Sstevel@tonic-gate 422*0Sstevel@tonic-gate /* 423*0Sstevel@tonic-gate * let the lgroup framework know cp has entered the partition 424*0Sstevel@tonic-gate */ 425*0Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_CPUPART_ADD, (uintptr_t)cp, lgrpid); 426*0Sstevel@tonic-gate 427*0Sstevel@tonic-gate /* 428*0Sstevel@tonic-gate * If necessary, move threads off processor. 429*0Sstevel@tonic-gate */ 430*0Sstevel@tonic-gate if (move_threads) { 431*0Sstevel@tonic-gate ASSERT(ncp != NULL); 432*0Sstevel@tonic-gate 433*0Sstevel@tonic-gate /* 434*0Sstevel@tonic-gate * Walk thru the active process list to look for 435*0Sstevel@tonic-gate * threads that need to have a new home lgroup, 436*0Sstevel@tonic-gate * or the last CPU they run on is the same CPU 437*0Sstevel@tonic-gate * being moved out of the partition. 438*0Sstevel@tonic-gate */ 439*0Sstevel@tonic-gate 440*0Sstevel@tonic-gate for (p = practive; p != NULL; p = p->p_next) { 441*0Sstevel@tonic-gate 442*0Sstevel@tonic-gate t = p->p_tlist; 443*0Sstevel@tonic-gate 444*0Sstevel@tonic-gate if (t == NULL) 445*0Sstevel@tonic-gate continue; 446*0Sstevel@tonic-gate 447*0Sstevel@tonic-gate lgrp_diff_lpl = 0; 448*0Sstevel@tonic-gate 449*0Sstevel@tonic-gate do { 450*0Sstevel@tonic-gate 451*0Sstevel@tonic-gate ASSERT(t->t_lpl != NULL); 452*0Sstevel@tonic-gate 453*0Sstevel@tonic-gate /* 454*0Sstevel@tonic-gate * Update the count of how many threads are 455*0Sstevel@tonic-gate * in this CPU's lgroup but have a different lpl 456*0Sstevel@tonic-gate */ 457*0Sstevel@tonic-gate 458*0Sstevel@tonic-gate if (t->t_lpl != cpu_lpl && 459*0Sstevel@tonic-gate t->t_lpl->lpl_lgrpid == lgrpid) 460*0Sstevel@tonic-gate lgrp_diff_lpl++; 461*0Sstevel@tonic-gate /* 462*0Sstevel@tonic-gate * If the lgroup that t is assigned to no 463*0Sstevel@tonic-gate * longer has any CPUs in t's partition, 464*0Sstevel@tonic-gate * we'll have to choose a new lgroup for t. 465*0Sstevel@tonic-gate */ 466*0Sstevel@tonic-gate 467*0Sstevel@tonic-gate if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid, 468*0Sstevel@tonic-gate t->t_cpupart)) { 469*0Sstevel@tonic-gate lgrp_move_thread(t, 470*0Sstevel@tonic-gate lgrp_choose(t, t->t_cpupart), 0); 471*0Sstevel@tonic-gate } 472*0Sstevel@tonic-gate 473*0Sstevel@tonic-gate /* 474*0Sstevel@tonic-gate * make sure lpl points to our own partition 475*0Sstevel@tonic-gate */ 476*0Sstevel@tonic-gate ASSERT(t->t_lpl >= t->t_cpupart->cp_lgrploads && 477*0Sstevel@tonic-gate (t->t_lpl < t->t_cpupart->cp_lgrploads + 478*0Sstevel@tonic-gate t->t_cpupart->cp_nlgrploads)); 479*0Sstevel@tonic-gate 480*0Sstevel@tonic-gate ASSERT(t->t_lpl->lpl_ncpu > 0); 481*0Sstevel@tonic-gate 482*0Sstevel@tonic-gate /* Update CPU last ran on if it was this CPU */ 483*0Sstevel@tonic-gate if (t->t_cpu == cp && t->t_cpupart == oldpp && 484*0Sstevel@tonic-gate t->t_bound_cpu != cp) { 485*0Sstevel@tonic-gate t->t_cpu = disp_lowpri_cpu(ncp, 486*0Sstevel@tonic-gate t->t_lpl, t->t_pri, NULL); 487*0Sstevel@tonic-gate } 488*0Sstevel@tonic-gate t = t->t_forw; 489*0Sstevel@tonic-gate } while (t != p->p_tlist); 490*0Sstevel@tonic-gate 491*0Sstevel@tonic-gate /* 492*0Sstevel@tonic-gate * Didn't find any threads in the same lgroup as this 493*0Sstevel@tonic-gate * CPU with a different lpl, so remove the lgroup from 494*0Sstevel@tonic-gate * the process lgroup bitmask. 495*0Sstevel@tonic-gate */ 496*0Sstevel@tonic-gate 497*0Sstevel@tonic-gate if (lgrp_diff_lpl) 498*0Sstevel@tonic-gate klgrpset_del(p->p_lgrpset, lgrpid); 499*0Sstevel@tonic-gate } 500*0Sstevel@tonic-gate 501*0Sstevel@tonic-gate /* 502*0Sstevel@tonic-gate * Walk thread list looking for threads that need to be 503*0Sstevel@tonic-gate * rehomed, since there are some threads that are not in 504*0Sstevel@tonic-gate * their process's p_tlist. 505*0Sstevel@tonic-gate */ 506*0Sstevel@tonic-gate 507*0Sstevel@tonic-gate t = curthread; 508*0Sstevel@tonic-gate 509*0Sstevel@tonic-gate do { 510*0Sstevel@tonic-gate ASSERT(t != NULL && t->t_lpl != NULL); 511*0Sstevel@tonic-gate 512*0Sstevel@tonic-gate /* 513*0Sstevel@tonic-gate * If the lgroup that t is assigned to no 514*0Sstevel@tonic-gate * longer has any CPUs in t's partition, 515*0Sstevel@tonic-gate * we'll have to choose a new lgroup for t. 516*0Sstevel@tonic-gate * Also, choose best lgroup for home when 517*0Sstevel@tonic-gate * thread has specified lgroup affinities, 518*0Sstevel@tonic-gate * since there may be an lgroup with more 519*0Sstevel@tonic-gate * affinity available after moving CPUs 520*0Sstevel@tonic-gate * around. 521*0Sstevel@tonic-gate */ 522*0Sstevel@tonic-gate if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid, 523*0Sstevel@tonic-gate t->t_cpupart) || t->t_lgrp_affinity) { 524*0Sstevel@tonic-gate lgrp_move_thread(t, 525*0Sstevel@tonic-gate lgrp_choose(t, t->t_cpupart), 1); 526*0Sstevel@tonic-gate } 527*0Sstevel@tonic-gate 528*0Sstevel@tonic-gate /* make sure lpl points to our own partition */ 529*0Sstevel@tonic-gate ASSERT((t->t_lpl >= t->t_cpupart->cp_lgrploads) && 530*0Sstevel@tonic-gate (t->t_lpl < t->t_cpupart->cp_lgrploads + 531*0Sstevel@tonic-gate t->t_cpupart->cp_nlgrploads)); 532*0Sstevel@tonic-gate 533*0Sstevel@tonic-gate ASSERT(t->t_lpl->lpl_ncpu > 0); 534*0Sstevel@tonic-gate 535*0Sstevel@tonic-gate /* Update CPU last ran on if it was this CPU */ 536*0Sstevel@tonic-gate if (t->t_cpu == cp && t->t_cpupart == oldpp && 537*0Sstevel@tonic-gate t->t_bound_cpu != cp) { 538*0Sstevel@tonic-gate t->t_cpu = disp_lowpri_cpu(ncp, t->t_lpl, 539*0Sstevel@tonic-gate t->t_pri, NULL); 540*0Sstevel@tonic-gate } 541*0Sstevel@tonic-gate 542*0Sstevel@tonic-gate t = t->t_next; 543*0Sstevel@tonic-gate } while (t != curthread); 544*0Sstevel@tonic-gate 545*0Sstevel@tonic-gate /* 546*0Sstevel@tonic-gate * Clear off the CPU's run queue, and the kp queue if the 547*0Sstevel@tonic-gate * partition is now empty. 548*0Sstevel@tonic-gate */ 549*0Sstevel@tonic-gate disp_cpu_inactive(cp); 550*0Sstevel@tonic-gate 551*0Sstevel@tonic-gate /* 552*0Sstevel@tonic-gate * Make cp switch to a thread from the new partition. 553*0Sstevel@tonic-gate */ 554*0Sstevel@tonic-gate cp->cpu_runrun = 1; 555*0Sstevel@tonic-gate cp->cpu_kprunrun = 1; 556*0Sstevel@tonic-gate } 557*0Sstevel@tonic-gate 558*0Sstevel@tonic-gate cpu_inmotion = NULL; 559*0Sstevel@tonic-gate start_cpus(); 560*0Sstevel@tonic-gate 561*0Sstevel@tonic-gate /* 562*0Sstevel@tonic-gate * Let anyone interested know that cpu has been added to the set. 563*0Sstevel@tonic-gate */ 564*0Sstevel@tonic-gate cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN); 565*0Sstevel@tonic-gate 566*0Sstevel@tonic-gate /* 567*0Sstevel@tonic-gate * Now let the cyclic subsystem know that it can reshuffle cyclics 568*0Sstevel@tonic-gate * bound to the new processor set. 569*0Sstevel@tonic-gate */ 570*0Sstevel@tonic-gate cyclic_move_in(cp); 571*0Sstevel@tonic-gate 572*0Sstevel@tonic-gate return (0); 573*0Sstevel@tonic-gate } 574*0Sstevel@tonic-gate 575*0Sstevel@tonic-gate /* 576*0Sstevel@tonic-gate * Check if thread can be moved to a new cpu partition. Called by 577*0Sstevel@tonic-gate * cpupart_move_thread() and pset_bind_start(). 578*0Sstevel@tonic-gate */ 579*0Sstevel@tonic-gate int 580*0Sstevel@tonic-gate cpupart_movable_thread(kthread_id_t tp, cpupart_t *cp, int ignore) 581*0Sstevel@tonic-gate { 582*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 583*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock)); 584*0Sstevel@tonic-gate ASSERT(cp != NULL); 585*0Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 586*0Sstevel@tonic-gate 587*0Sstevel@tonic-gate /* 588*0Sstevel@tonic-gate * CPU-bound threads can't be moved. 589*0Sstevel@tonic-gate */ 590*0Sstevel@tonic-gate if (!ignore) { 591*0Sstevel@tonic-gate cpu_t *boundcpu = tp->t_bound_cpu ? tp->t_bound_cpu : 592*0Sstevel@tonic-gate tp->t_weakbound_cpu; 593*0Sstevel@tonic-gate if (boundcpu != NULL && boundcpu->cpu_part != cp) 594*0Sstevel@tonic-gate return (EBUSY); 595*0Sstevel@tonic-gate } 596*0Sstevel@tonic-gate return (0); 597*0Sstevel@tonic-gate } 598*0Sstevel@tonic-gate 599*0Sstevel@tonic-gate /* 600*0Sstevel@tonic-gate * Move thread to new partition. If ignore is non-zero, then CPU 601*0Sstevel@tonic-gate * bindings should be ignored (this is used when destroying a 602*0Sstevel@tonic-gate * partition). 603*0Sstevel@tonic-gate */ 604*0Sstevel@tonic-gate static int 605*0Sstevel@tonic-gate cpupart_move_thread(kthread_id_t tp, cpupart_t *newpp, int ignore, 606*0Sstevel@tonic-gate void *projbuf, void *zonebuf) 607*0Sstevel@tonic-gate { 608*0Sstevel@tonic-gate cpupart_t *oldpp = tp->t_cpupart; 609*0Sstevel@tonic-gate int ret; 610*0Sstevel@tonic-gate 611*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 612*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock)); 613*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock)); 614*0Sstevel@tonic-gate ASSERT(newpp != NULL); 615*0Sstevel@tonic-gate 616*0Sstevel@tonic-gate if (newpp->cp_cpulist == NULL) 617*0Sstevel@tonic-gate return (EINVAL); 618*0Sstevel@tonic-gate 619*0Sstevel@tonic-gate /* 620*0Sstevel@tonic-gate * Check for errors first. 621*0Sstevel@tonic-gate */ 622*0Sstevel@tonic-gate thread_lock(tp); 623*0Sstevel@tonic-gate if ((ret = cpupart_movable_thread(tp, newpp, ignore)) != 0) { 624*0Sstevel@tonic-gate thread_unlock(tp); 625*0Sstevel@tonic-gate return (ret); 626*0Sstevel@tonic-gate } 627*0Sstevel@tonic-gate 628*0Sstevel@tonic-gate /* move the thread */ 629*0Sstevel@tonic-gate if (oldpp != newpp) { 630*0Sstevel@tonic-gate /* 631*0Sstevel@tonic-gate * Make the thread switch to the new partition. 632*0Sstevel@tonic-gate */ 633*0Sstevel@tonic-gate tp->t_cpupart = newpp; 634*0Sstevel@tonic-gate ASSERT(tp->t_lpl != NULL); 635*0Sstevel@tonic-gate /* 636*0Sstevel@tonic-gate * Leave the thread on the same lgroup if possible; otherwise 637*0Sstevel@tonic-gate * choose a new lgroup for it. In either case, update its 638*0Sstevel@tonic-gate * t_lpl. 639*0Sstevel@tonic-gate */ 640*0Sstevel@tonic-gate if (LGRP_CPUS_IN_PART(tp->t_lpl->lpl_lgrpid, newpp) && 641*0Sstevel@tonic-gate tp->t_lgrp_affinity == NULL) { 642*0Sstevel@tonic-gate /* 643*0Sstevel@tonic-gate * The thread's lgroup has CPUs in the thread's new 644*0Sstevel@tonic-gate * partition, so the thread can stay assigned to the 645*0Sstevel@tonic-gate * same lgroup. Update its t_lpl to point to the 646*0Sstevel@tonic-gate * lpl_t for its lgroup in its new partition. 647*0Sstevel@tonic-gate */ 648*0Sstevel@tonic-gate lgrp_move_thread(tp, &tp->t_cpupart->\ 649*0Sstevel@tonic-gate cp_lgrploads[tp->t_lpl->lpl_lgrpid], 1); 650*0Sstevel@tonic-gate } else { 651*0Sstevel@tonic-gate /* 652*0Sstevel@tonic-gate * The thread's lgroup has no cpus in its new 653*0Sstevel@tonic-gate * partition or it has specified lgroup affinities, 654*0Sstevel@tonic-gate * so choose the best lgroup for the thread and 655*0Sstevel@tonic-gate * assign it to that lgroup. 656*0Sstevel@tonic-gate */ 657*0Sstevel@tonic-gate lgrp_move_thread(tp, lgrp_choose(tp, tp->t_cpupart), 658*0Sstevel@tonic-gate 1); 659*0Sstevel@tonic-gate } 660*0Sstevel@tonic-gate /* 661*0Sstevel@tonic-gate * make sure lpl points to our own partition 662*0Sstevel@tonic-gate */ 663*0Sstevel@tonic-gate ASSERT((tp->t_lpl >= tp->t_cpupart->cp_lgrploads) && 664*0Sstevel@tonic-gate (tp->t_lpl < tp->t_cpupart->cp_lgrploads + 665*0Sstevel@tonic-gate tp->t_cpupart->cp_nlgrploads)); 666*0Sstevel@tonic-gate 667*0Sstevel@tonic-gate ASSERT(tp->t_lpl->lpl_ncpu > 0); 668*0Sstevel@tonic-gate 669*0Sstevel@tonic-gate if (tp->t_state == TS_ONPROC) { 670*0Sstevel@tonic-gate cpu_surrender(tp); 671*0Sstevel@tonic-gate } else if (tp->t_state == TS_RUN) { 672*0Sstevel@tonic-gate (void) dispdeq(tp); 673*0Sstevel@tonic-gate setbackdq(tp); 674*0Sstevel@tonic-gate } 675*0Sstevel@tonic-gate } 676*0Sstevel@tonic-gate 677*0Sstevel@tonic-gate /* 678*0Sstevel@tonic-gate * Our binding has changed; set TP_CHANGEBIND. 679*0Sstevel@tonic-gate */ 680*0Sstevel@tonic-gate tp->t_proc_flag |= TP_CHANGEBIND; 681*0Sstevel@tonic-gate aston(tp); 682*0Sstevel@tonic-gate 683*0Sstevel@tonic-gate thread_unlock(tp); 684*0Sstevel@tonic-gate fss_changepset(tp, newpp, projbuf, zonebuf); 685*0Sstevel@tonic-gate 686*0Sstevel@tonic-gate return (0); /* success */ 687*0Sstevel@tonic-gate } 688*0Sstevel@tonic-gate 689*0Sstevel@tonic-gate 690*0Sstevel@tonic-gate /* 691*0Sstevel@tonic-gate * This function binds a thread to a partition. Must be called with the 692*0Sstevel@tonic-gate * p_lock of the containing process held (to keep the thread from going 693*0Sstevel@tonic-gate * away), and thus also with cpu_lock held (since cpu_lock must be 694*0Sstevel@tonic-gate * acquired before p_lock). If ignore is non-zero, then CPU bindings 695*0Sstevel@tonic-gate * should be ignored (this is used when destroying a partition). 696*0Sstevel@tonic-gate */ 697*0Sstevel@tonic-gate int 698*0Sstevel@tonic-gate cpupart_bind_thread(kthread_id_t tp, psetid_t psid, int ignore, void *projbuf, 699*0Sstevel@tonic-gate void *zonebuf) 700*0Sstevel@tonic-gate { 701*0Sstevel@tonic-gate cpupart_t *newpp; 702*0Sstevel@tonic-gate 703*0Sstevel@tonic-gate ASSERT(pool_lock_held()); 704*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 705*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock)); 706*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock)); 707*0Sstevel@tonic-gate 708*0Sstevel@tonic-gate if (psid == PS_NONE) 709*0Sstevel@tonic-gate newpp = &cp_default; 710*0Sstevel@tonic-gate else { 711*0Sstevel@tonic-gate newpp = cpupart_find(psid); 712*0Sstevel@tonic-gate if (newpp == NULL) { 713*0Sstevel@tonic-gate return (EINVAL); 714*0Sstevel@tonic-gate } 715*0Sstevel@tonic-gate } 716*0Sstevel@tonic-gate return (cpupart_move_thread(tp, newpp, ignore, projbuf, zonebuf)); 717*0Sstevel@tonic-gate } 718*0Sstevel@tonic-gate 719*0Sstevel@tonic-gate 720*0Sstevel@tonic-gate /* 721*0Sstevel@tonic-gate * Create a new partition. On MP systems, this also allocates a 722*0Sstevel@tonic-gate * kpreempt disp queue for that partition. 723*0Sstevel@tonic-gate */ 724*0Sstevel@tonic-gate int 725*0Sstevel@tonic-gate cpupart_create(psetid_t *psid) 726*0Sstevel@tonic-gate { 727*0Sstevel@tonic-gate cpupart_t *pp; 728*0Sstevel@tonic-gate lgrp_id_t i; 729*0Sstevel@tonic-gate 730*0Sstevel@tonic-gate ASSERT(pool_lock_held()); 731*0Sstevel@tonic-gate 732*0Sstevel@tonic-gate pp = kmem_zalloc(sizeof (cpupart_t), KM_SLEEP); 733*0Sstevel@tonic-gate pp->cp_nlgrploads = lgrp_plat_max_lgrps(); 734*0Sstevel@tonic-gate pp->cp_lgrploads = kmem_zalloc(sizeof (lpl_t) * pp->cp_nlgrploads, 735*0Sstevel@tonic-gate KM_SLEEP); 736*0Sstevel@tonic-gate 737*0Sstevel@tonic-gate mutex_enter(&cpu_lock); 738*0Sstevel@tonic-gate if (cp_numparts == cp_max_numparts) { 739*0Sstevel@tonic-gate mutex_exit(&cpu_lock); 740*0Sstevel@tonic-gate kmem_free(pp->cp_lgrploads, sizeof (lpl_t) * pp->cp_nlgrploads); 741*0Sstevel@tonic-gate pp->cp_lgrploads = NULL; 742*0Sstevel@tonic-gate kmem_free(pp, sizeof (cpupart_t)); 743*0Sstevel@tonic-gate return (ENOMEM); 744*0Sstevel@tonic-gate } 745*0Sstevel@tonic-gate cp_numparts++; 746*0Sstevel@tonic-gate /* find the next free partition ID */ 747*0Sstevel@tonic-gate while (cpupart_find(CPTOPS(cp_id_next)) != NULL) 748*0Sstevel@tonic-gate cp_id_next++; 749*0Sstevel@tonic-gate pp->cp_id = cp_id_next++; 750*0Sstevel@tonic-gate pp->cp_ncpus = 0; 751*0Sstevel@tonic-gate pp->cp_cpulist = NULL; 752*0Sstevel@tonic-gate pp->cp_attr = 0; 753*0Sstevel@tonic-gate klgrpset_clear(pp->cp_lgrpset); 754*0Sstevel@tonic-gate pp->cp_kp_queue.disp_maxrunpri = -1; 755*0Sstevel@tonic-gate pp->cp_kp_queue.disp_max_unbound_pri = -1; 756*0Sstevel@tonic-gate pp->cp_kp_queue.disp_cpu = NULL; 757*0Sstevel@tonic-gate pp->cp_gen = 0; 758*0Sstevel@tonic-gate CPUSET_ZERO(pp->cp_haltset); 759*0Sstevel@tonic-gate DISP_LOCK_INIT(&pp->cp_kp_queue.disp_lock); 760*0Sstevel@tonic-gate *psid = CPTOPS(pp->cp_id); 761*0Sstevel@tonic-gate disp_kp_alloc(&pp->cp_kp_queue, v.v_nglobpris); 762*0Sstevel@tonic-gate cpupart_kstat_create(pp); 763*0Sstevel@tonic-gate for (i = 0; i < pp->cp_nlgrploads; i++) { 764*0Sstevel@tonic-gate pp->cp_lgrploads[i].lpl_lgrpid = i; 765*0Sstevel@tonic-gate } 766*0Sstevel@tonic-gate CHIP_SET_ZERO(pp->cp_chipset); 767*0Sstevel@tonic-gate 768*0Sstevel@tonic-gate /* 769*0Sstevel@tonic-gate * Pause all CPUs while changing the partition list, to make sure 770*0Sstevel@tonic-gate * the clock thread (which traverses the list without holding 771*0Sstevel@tonic-gate * cpu_lock) isn't running. 772*0Sstevel@tonic-gate */ 773*0Sstevel@tonic-gate pause_cpus(NULL); 774*0Sstevel@tonic-gate pp->cp_next = cp_list_head; 775*0Sstevel@tonic-gate pp->cp_prev = cp_list_head->cp_prev; 776*0Sstevel@tonic-gate cp_list_head->cp_prev->cp_next = pp; 777*0Sstevel@tonic-gate cp_list_head->cp_prev = pp; 778*0Sstevel@tonic-gate start_cpus(); 779*0Sstevel@tonic-gate mutex_exit(&cpu_lock); 780*0Sstevel@tonic-gate 781*0Sstevel@tonic-gate return (0); 782*0Sstevel@tonic-gate } 783*0Sstevel@tonic-gate 784*0Sstevel@tonic-gate 785*0Sstevel@tonic-gate /* 786*0Sstevel@tonic-gate * Destroy a partition. 787*0Sstevel@tonic-gate */ 788*0Sstevel@tonic-gate int 789*0Sstevel@tonic-gate cpupart_destroy(psetid_t psid) 790*0Sstevel@tonic-gate { 791*0Sstevel@tonic-gate cpu_t *cp, *first_cp; 792*0Sstevel@tonic-gate cpupart_t *pp, *newpp; 793*0Sstevel@tonic-gate int err = 0; 794*0Sstevel@tonic-gate void *projbuf, *zonebuf; 795*0Sstevel@tonic-gate kthread_t *t; 796*0Sstevel@tonic-gate proc_t *p; 797*0Sstevel@tonic-gate 798*0Sstevel@tonic-gate ASSERT(pool_lock_held()); 799*0Sstevel@tonic-gate mutex_enter(&cpu_lock); 800*0Sstevel@tonic-gate 801*0Sstevel@tonic-gate pp = cpupart_find(psid); 802*0Sstevel@tonic-gate if (pp == NULL || pp == &cp_default) { 803*0Sstevel@tonic-gate mutex_exit(&cpu_lock); 804*0Sstevel@tonic-gate return (EINVAL); 805*0Sstevel@tonic-gate } 806*0Sstevel@tonic-gate 807*0Sstevel@tonic-gate /* 808*0Sstevel@tonic-gate * Pre-allocate enough buffers for FSS for all active projects and 809*0Sstevel@tonic-gate * for all active zones on the system. Unused buffers will be 810*0Sstevel@tonic-gate * freed later by fss_freebuf(). 811*0Sstevel@tonic-gate */ 812*0Sstevel@tonic-gate projbuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_PROJ); 813*0Sstevel@tonic-gate zonebuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_ZONE); 814*0Sstevel@tonic-gate 815*0Sstevel@tonic-gate /* 816*0Sstevel@tonic-gate * First need to unbind all the threads currently bound to the 817*0Sstevel@tonic-gate * partition. Then do the actual destroy (which moves the CPUs). 818*0Sstevel@tonic-gate */ 819*0Sstevel@tonic-gate mutex_enter(&pidlock); 820*0Sstevel@tonic-gate t = curthread; 821*0Sstevel@tonic-gate do { 822*0Sstevel@tonic-gate if (t->t_bind_pset == psid) { 823*0Sstevel@tonic-gate again: p = ttoproc(t); 824*0Sstevel@tonic-gate mutex_enter(&p->p_lock); 825*0Sstevel@tonic-gate if (ttoproc(t) != p) { 826*0Sstevel@tonic-gate /* 827*0Sstevel@tonic-gate * lwp_exit has changed this thread's process 828*0Sstevel@tonic-gate * pointer before we grabbed its p_lock. 829*0Sstevel@tonic-gate */ 830*0Sstevel@tonic-gate mutex_exit(&p->p_lock); 831*0Sstevel@tonic-gate goto again; 832*0Sstevel@tonic-gate } 833*0Sstevel@tonic-gate err = cpupart_bind_thread(t, PS_NONE, 1, 834*0Sstevel@tonic-gate projbuf, zonebuf); 835*0Sstevel@tonic-gate if (err) { 836*0Sstevel@tonic-gate mutex_exit(&p->p_lock); 837*0Sstevel@tonic-gate mutex_exit(&pidlock); 838*0Sstevel@tonic-gate mutex_exit(&cpu_lock); 839*0Sstevel@tonic-gate fss_freebuf(projbuf, FSS_ALLOC_PROJ); 840*0Sstevel@tonic-gate fss_freebuf(zonebuf, FSS_ALLOC_ZONE); 841*0Sstevel@tonic-gate return (err); 842*0Sstevel@tonic-gate } 843*0Sstevel@tonic-gate t->t_bind_pset = PS_NONE; 844*0Sstevel@tonic-gate mutex_exit(&p->p_lock); 845*0Sstevel@tonic-gate } 846*0Sstevel@tonic-gate t = t->t_next; 847*0Sstevel@tonic-gate } while (t != curthread); 848*0Sstevel@tonic-gate 849*0Sstevel@tonic-gate mutex_exit(&pidlock); 850*0Sstevel@tonic-gate fss_freebuf(projbuf, FSS_ALLOC_PROJ); 851*0Sstevel@tonic-gate fss_freebuf(zonebuf, FSS_ALLOC_ZONE); 852*0Sstevel@tonic-gate 853*0Sstevel@tonic-gate newpp = &cp_default; 854*0Sstevel@tonic-gate while ((cp = pp->cp_cpulist) != NULL) { 855*0Sstevel@tonic-gate if (err = cpupart_move_cpu(cp, newpp, 0)) { 856*0Sstevel@tonic-gate mutex_exit(&cpu_lock); 857*0Sstevel@tonic-gate return (err); 858*0Sstevel@tonic-gate } 859*0Sstevel@tonic-gate } 860*0Sstevel@tonic-gate 861*0Sstevel@tonic-gate ASSERT(CHIP_SET_ISNULL(pp->cp_chipset)); 862*0Sstevel@tonic-gate ASSERT(CPUSET_ISNULL(pp->cp_haltset)); 863*0Sstevel@tonic-gate 864*0Sstevel@tonic-gate /* 865*0Sstevel@tonic-gate * Reset the pointers in any offline processors so they won't 866*0Sstevel@tonic-gate * try to rejoin the destroyed partition when they're turned 867*0Sstevel@tonic-gate * online. 868*0Sstevel@tonic-gate */ 869*0Sstevel@tonic-gate first_cp = cp = CPU; 870*0Sstevel@tonic-gate do { 871*0Sstevel@tonic-gate if (cp->cpu_part == pp) { 872*0Sstevel@tonic-gate ASSERT(cp->cpu_flags & CPU_OFFLINE); 873*0Sstevel@tonic-gate cp->cpu_part = newpp; 874*0Sstevel@tonic-gate } 875*0Sstevel@tonic-gate cp = cp->cpu_next; 876*0Sstevel@tonic-gate } while (cp != first_cp); 877*0Sstevel@tonic-gate 878*0Sstevel@tonic-gate /* 879*0Sstevel@tonic-gate * Pause all CPUs while changing the partition list, to make sure 880*0Sstevel@tonic-gate * the clock thread (which traverses the list without holding 881*0Sstevel@tonic-gate * cpu_lock) isn't running. 882*0Sstevel@tonic-gate */ 883*0Sstevel@tonic-gate pause_cpus(NULL); 884*0Sstevel@tonic-gate pp->cp_prev->cp_next = pp->cp_next; 885*0Sstevel@tonic-gate pp->cp_next->cp_prev = pp->cp_prev; 886*0Sstevel@tonic-gate if (cp_list_head == pp) 887*0Sstevel@tonic-gate cp_list_head = pp->cp_next; 888*0Sstevel@tonic-gate start_cpus(); 889*0Sstevel@tonic-gate 890*0Sstevel@tonic-gate if (cp_id_next > pp->cp_id) 891*0Sstevel@tonic-gate cp_id_next = pp->cp_id; 892*0Sstevel@tonic-gate 893*0Sstevel@tonic-gate if (pp->cp_kstat) 894*0Sstevel@tonic-gate kstat_delete(pp->cp_kstat); 895*0Sstevel@tonic-gate 896*0Sstevel@tonic-gate cp_numparts--; 897*0Sstevel@tonic-gate 898*0Sstevel@tonic-gate disp_kp_free(&pp->cp_kp_queue); 899*0Sstevel@tonic-gate kmem_free(pp->cp_lgrploads, sizeof (lpl_t) * pp->cp_nlgrploads); 900*0Sstevel@tonic-gate pp->cp_lgrploads = NULL; 901*0Sstevel@tonic-gate kmem_free(pp, sizeof (cpupart_t)); 902*0Sstevel@tonic-gate mutex_exit(&cpu_lock); 903*0Sstevel@tonic-gate 904*0Sstevel@tonic-gate return (err); 905*0Sstevel@tonic-gate } 906*0Sstevel@tonic-gate 907*0Sstevel@tonic-gate 908*0Sstevel@tonic-gate /* 909*0Sstevel@tonic-gate * Return the ID of the partition to which the specified processor belongs. 910*0Sstevel@tonic-gate */ 911*0Sstevel@tonic-gate psetid_t 912*0Sstevel@tonic-gate cpupart_query_cpu(cpu_t *cp) 913*0Sstevel@tonic-gate { 914*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 915*0Sstevel@tonic-gate 916*0Sstevel@tonic-gate return (CPTOPS(cp->cpu_part->cp_id)); 917*0Sstevel@tonic-gate } 918*0Sstevel@tonic-gate 919*0Sstevel@tonic-gate 920*0Sstevel@tonic-gate /* 921*0Sstevel@tonic-gate * Attach a processor to an existing partition. 922*0Sstevel@tonic-gate */ 923*0Sstevel@tonic-gate int 924*0Sstevel@tonic-gate cpupart_attach_cpu(psetid_t psid, cpu_t *cp, int forced) 925*0Sstevel@tonic-gate { 926*0Sstevel@tonic-gate cpupart_t *pp; 927*0Sstevel@tonic-gate int err; 928*0Sstevel@tonic-gate 929*0Sstevel@tonic-gate ASSERT(pool_lock_held()); 930*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 931*0Sstevel@tonic-gate 932*0Sstevel@tonic-gate pp = cpupart_find(psid); 933*0Sstevel@tonic-gate if (pp == NULL) 934*0Sstevel@tonic-gate return (EINVAL); 935*0Sstevel@tonic-gate if (cp->cpu_flags & CPU_OFFLINE) 936*0Sstevel@tonic-gate return (EINVAL); 937*0Sstevel@tonic-gate 938*0Sstevel@tonic-gate err = cpupart_move_cpu(cp, pp, forced); 939*0Sstevel@tonic-gate return (err); 940*0Sstevel@tonic-gate } 941*0Sstevel@tonic-gate 942*0Sstevel@tonic-gate /* 943*0Sstevel@tonic-gate * Get a list of cpus belonging to the partition. If numcpus is NULL, 944*0Sstevel@tonic-gate * this just checks for a valid partition. If numcpus is non-NULL but 945*0Sstevel@tonic-gate * cpulist is NULL, the current number of cpus is stored in *numcpus. 946*0Sstevel@tonic-gate * If both are non-NULL, the current number of cpus is stored in *numcpus, 947*0Sstevel@tonic-gate * and a list of those cpus up to the size originally in *numcpus is 948*0Sstevel@tonic-gate * stored in cpulist[]. Also, store the processor set id in *psid. 949*0Sstevel@tonic-gate * This is useful in case the processor set id passed in was PS_MYID. 950*0Sstevel@tonic-gate */ 951*0Sstevel@tonic-gate int 952*0Sstevel@tonic-gate cpupart_get_cpus(psetid_t *psid, processorid_t *cpulist, uint_t *numcpus) 953*0Sstevel@tonic-gate { 954*0Sstevel@tonic-gate cpupart_t *pp; 955*0Sstevel@tonic-gate uint_t ncpus; 956*0Sstevel@tonic-gate cpu_t *c; 957*0Sstevel@tonic-gate int i; 958*0Sstevel@tonic-gate 959*0Sstevel@tonic-gate mutex_enter(&cpu_lock); 960*0Sstevel@tonic-gate pp = cpupart_find(*psid); 961*0Sstevel@tonic-gate if (pp == NULL) { 962*0Sstevel@tonic-gate mutex_exit(&cpu_lock); 963*0Sstevel@tonic-gate return (EINVAL); 964*0Sstevel@tonic-gate } 965*0Sstevel@tonic-gate *psid = CPTOPS(pp->cp_id); 966*0Sstevel@tonic-gate ncpus = pp->cp_ncpus; 967*0Sstevel@tonic-gate if (numcpus) { 968*0Sstevel@tonic-gate if (ncpus > *numcpus) { 969*0Sstevel@tonic-gate /* 970*0Sstevel@tonic-gate * Only copy as many cpus as were passed in, but 971*0Sstevel@tonic-gate * pass back the real number. 972*0Sstevel@tonic-gate */ 973*0Sstevel@tonic-gate uint_t t = ncpus; 974*0Sstevel@tonic-gate ncpus = *numcpus; 975*0Sstevel@tonic-gate *numcpus = t; 976*0Sstevel@tonic-gate } else 977*0Sstevel@tonic-gate *numcpus = ncpus; 978*0Sstevel@tonic-gate 979*0Sstevel@tonic-gate if (cpulist) { 980*0Sstevel@tonic-gate c = pp->cp_cpulist; 981*0Sstevel@tonic-gate for (i = 0; i < ncpus; i++) { 982*0Sstevel@tonic-gate ASSERT(c != NULL); 983*0Sstevel@tonic-gate cpulist[i] = c->cpu_id; 984*0Sstevel@tonic-gate c = c->cpu_next_part; 985*0Sstevel@tonic-gate } 986*0Sstevel@tonic-gate } 987*0Sstevel@tonic-gate } 988*0Sstevel@tonic-gate mutex_exit(&cpu_lock); 989*0Sstevel@tonic-gate return (0); 990*0Sstevel@tonic-gate } 991*0Sstevel@tonic-gate 992*0Sstevel@tonic-gate /* 993*0Sstevel@tonic-gate * Reallocate kpreempt queues for each CPU partition. Called from 994*0Sstevel@tonic-gate * disp_setup when a new scheduling class is loaded that increases the 995*0Sstevel@tonic-gate * number of priorities in the system. 996*0Sstevel@tonic-gate */ 997*0Sstevel@tonic-gate void 998*0Sstevel@tonic-gate cpupart_kpqalloc(pri_t npri) 999*0Sstevel@tonic-gate { 1000*0Sstevel@tonic-gate cpupart_t *cpp; 1001*0Sstevel@tonic-gate 1002*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 1003*0Sstevel@tonic-gate cpp = cp_list_head; 1004*0Sstevel@tonic-gate do { 1005*0Sstevel@tonic-gate disp_kp_alloc(&cpp->cp_kp_queue, npri); 1006*0Sstevel@tonic-gate cpp = cpp->cp_next; 1007*0Sstevel@tonic-gate } while (cpp != cp_list_head); 1008*0Sstevel@tonic-gate } 1009*0Sstevel@tonic-gate 1010*0Sstevel@tonic-gate int 1011*0Sstevel@tonic-gate cpupart_get_loadavg(psetid_t psid, int *buf, int nelem) 1012*0Sstevel@tonic-gate { 1013*0Sstevel@tonic-gate cpupart_t *cp; 1014*0Sstevel@tonic-gate int i; 1015*0Sstevel@tonic-gate 1016*0Sstevel@tonic-gate ASSERT(nelem >= 0); 1017*0Sstevel@tonic-gate ASSERT(nelem <= LOADAVG_NSTATS); 1018*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 1019*0Sstevel@tonic-gate 1020*0Sstevel@tonic-gate cp = cpupart_find(psid); 1021*0Sstevel@tonic-gate if (cp == NULL) 1022*0Sstevel@tonic-gate return (EINVAL); 1023*0Sstevel@tonic-gate for (i = 0; i < nelem; i++) 1024*0Sstevel@tonic-gate buf[i] = cp->cp_hp_avenrun[i] >> (16 - FSHIFT); 1025*0Sstevel@tonic-gate 1026*0Sstevel@tonic-gate return (0); 1027*0Sstevel@tonic-gate } 1028*0Sstevel@tonic-gate 1029*0Sstevel@tonic-gate 1030*0Sstevel@tonic-gate uint_t 1031*0Sstevel@tonic-gate cpupart_list(psetid_t *list, uint_t nelem, int flag) 1032*0Sstevel@tonic-gate { 1033*0Sstevel@tonic-gate uint_t numpart = 0; 1034*0Sstevel@tonic-gate cpupart_t *cp; 1035*0Sstevel@tonic-gate 1036*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 1037*0Sstevel@tonic-gate ASSERT(flag == CP_ALL || flag == CP_NONEMPTY); 1038*0Sstevel@tonic-gate 1039*0Sstevel@tonic-gate if (list != NULL) { 1040*0Sstevel@tonic-gate cp = cp_list_head; 1041*0Sstevel@tonic-gate do { 1042*0Sstevel@tonic-gate if (((flag == CP_ALL) && (cp != &cp_default)) || 1043*0Sstevel@tonic-gate ((flag == CP_NONEMPTY) && (cp->cp_ncpus != 0))) { 1044*0Sstevel@tonic-gate if (numpart == nelem) 1045*0Sstevel@tonic-gate break; 1046*0Sstevel@tonic-gate list[numpart++] = CPTOPS(cp->cp_id); 1047*0Sstevel@tonic-gate } 1048*0Sstevel@tonic-gate cp = cp->cp_next; 1049*0Sstevel@tonic-gate } while (cp != cp_list_head); 1050*0Sstevel@tonic-gate } 1051*0Sstevel@tonic-gate 1052*0Sstevel@tonic-gate ASSERT(numpart < cp_numparts); 1053*0Sstevel@tonic-gate 1054*0Sstevel@tonic-gate if (flag == CP_ALL) 1055*0Sstevel@tonic-gate numpart = cp_numparts - 1; /* leave out default partition */ 1056*0Sstevel@tonic-gate else if (flag == CP_NONEMPTY) 1057*0Sstevel@tonic-gate numpart = cp_numparts_nonempty; 1058*0Sstevel@tonic-gate 1059*0Sstevel@tonic-gate return (numpart); 1060*0Sstevel@tonic-gate } 1061*0Sstevel@tonic-gate 1062*0Sstevel@tonic-gate int 1063*0Sstevel@tonic-gate cpupart_setattr(psetid_t psid, uint_t attr) 1064*0Sstevel@tonic-gate { 1065*0Sstevel@tonic-gate cpupart_t *cp; 1066*0Sstevel@tonic-gate 1067*0Sstevel@tonic-gate ASSERT(pool_lock_held()); 1068*0Sstevel@tonic-gate 1069*0Sstevel@tonic-gate mutex_enter(&cpu_lock); 1070*0Sstevel@tonic-gate if ((cp = cpupart_find(psid)) == NULL) { 1071*0Sstevel@tonic-gate mutex_exit(&cpu_lock); 1072*0Sstevel@tonic-gate return (EINVAL); 1073*0Sstevel@tonic-gate } 1074*0Sstevel@tonic-gate /* 1075*0Sstevel@tonic-gate * PSET_NOESCAPE attribute for default cpu partition is always set 1076*0Sstevel@tonic-gate */ 1077*0Sstevel@tonic-gate if (cp == &cp_default && !(attr & PSET_NOESCAPE)) { 1078*0Sstevel@tonic-gate mutex_exit(&cpu_lock); 1079*0Sstevel@tonic-gate return (EINVAL); 1080*0Sstevel@tonic-gate } 1081*0Sstevel@tonic-gate cp->cp_attr = attr; 1082*0Sstevel@tonic-gate mutex_exit(&cpu_lock); 1083*0Sstevel@tonic-gate return (0); 1084*0Sstevel@tonic-gate } 1085*0Sstevel@tonic-gate 1086*0Sstevel@tonic-gate int 1087*0Sstevel@tonic-gate cpupart_getattr(psetid_t psid, uint_t *attrp) 1088*0Sstevel@tonic-gate { 1089*0Sstevel@tonic-gate cpupart_t *cp; 1090*0Sstevel@tonic-gate 1091*0Sstevel@tonic-gate mutex_enter(&cpu_lock); 1092*0Sstevel@tonic-gate if ((cp = cpupart_find(psid)) == NULL) { 1093*0Sstevel@tonic-gate mutex_exit(&cpu_lock); 1094*0Sstevel@tonic-gate return (EINVAL); 1095*0Sstevel@tonic-gate } 1096*0Sstevel@tonic-gate *attrp = cp->cp_attr; 1097*0Sstevel@tonic-gate mutex_exit(&cpu_lock); 1098*0Sstevel@tonic-gate return (0); 1099*0Sstevel@tonic-gate } 1100