1*3247Sgjelinek /* 2*3247Sgjelinek * CDDL HEADER START 3*3247Sgjelinek * 4*3247Sgjelinek * The contents of this file are subject to the terms of the 5*3247Sgjelinek * Common Development and Distribution License (the "License"). 6*3247Sgjelinek * You may not use this file except in compliance with the License. 7*3247Sgjelinek * 8*3247Sgjelinek * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*3247Sgjelinek * or http://www.opensolaris.org/os/licensing. 10*3247Sgjelinek * See the License for the specific language governing permissions 11*3247Sgjelinek * and limitations under the License. 12*3247Sgjelinek * 13*3247Sgjelinek * When distributing Covered Code, include this CDDL HEADER in each 14*3247Sgjelinek * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*3247Sgjelinek * If applicable, add the following below this CDDL HEADER, with the 16*3247Sgjelinek * fields enclosed by brackets "[]" replaced with your own identifying 17*3247Sgjelinek * information: Portions Copyright [yyyy] [name of copyright owner] 18*3247Sgjelinek * 19*3247Sgjelinek * CDDL HEADER END 20*3247Sgjelinek */ 21*3247Sgjelinek 22*3247Sgjelinek /* 23*3247Sgjelinek * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24*3247Sgjelinek * Use is subject to license terms. 25*3247Sgjelinek */ 26*3247Sgjelinek 27*3247Sgjelinek #pragma ident "%Z%%M% %I% %E% SMI" 28*3247Sgjelinek 29*3247Sgjelinek /* 30*3247Sgjelinek * vm_usage 31*3247Sgjelinek * 32*3247Sgjelinek * This file implements the getvmusage() private system call. 33*3247Sgjelinek * getvmusage() counts the amount of resident memory pages and swap 34*3247Sgjelinek * reserved by the specified process collective. A "process collective" is 35*3247Sgjelinek * the set of processes owned by a particular, zone, project, task, or user. 36*3247Sgjelinek * 37*3247Sgjelinek * rss and swap are counted so that for a given process collective, a page is 38*3247Sgjelinek * only counted once. For example, this means that if multiple processes in 39*3247Sgjelinek * the same project map the same page, then the project will only be charged 40*3247Sgjelinek * once for that page. On the other hand, if two processes in different 41*3247Sgjelinek * projects map the same page, then both projects will be charged 42*3247Sgjelinek * for the page. 43*3247Sgjelinek * 44*3247Sgjelinek * The vm_getusage() calculation is implemented so that the first thread 45*3247Sgjelinek * performs the rss/swap counting. Other callers will wait for that thread to 46*3247Sgjelinek * finish, copying the results. This enables multiple rcapds and prstats to 47*3247Sgjelinek * consume data from the same calculation. The results are also cached so that 48*3247Sgjelinek * a caller interested in recent results can just copy them instead of starting 49*3247Sgjelinek * a new calculation. The caller passes the maximium age (in seconds) of the 50*3247Sgjelinek * data. If the cached data is young enough, the cache is copied, otherwise, 51*3247Sgjelinek * a new calculation is executed and the cache is replaced with the new 52*3247Sgjelinek * data. 53*3247Sgjelinek * 54*3247Sgjelinek * The rss calculation for each process collective is as follows: 55*3247Sgjelinek * 56*3247Sgjelinek * - Inspect flags, determine if counting rss for zones, projects, tasks, 57*3247Sgjelinek * and/or users. 58*3247Sgjelinek * - For each proc: 59*3247Sgjelinek * - Figure out proc's collectives (zone, project, task, and/or user). 60*3247Sgjelinek * - For each seg in proc's address space: 61*3247Sgjelinek * - If seg is private: 62*3247Sgjelinek * - Lookup anons in the amp. 63*3247Sgjelinek * - For incore pages not previously visited each of the 64*3247Sgjelinek * proc's collectives, add incore pagesize to each. 65*3247Sgjelinek * collective. 66*3247Sgjelinek * Anon's with a refcnt of 1 can be assummed to be not 67*3247Sgjelinek * previously visited. 68*3247Sgjelinek * - For address ranges without anons in the amp: 69*3247Sgjelinek * - Lookup pages in underlying vnode. 70*3247Sgjelinek * - For incore pages not previously visiting for 71*3247Sgjelinek * each of the proc's collectives, add incore 72*3247Sgjelinek * pagesize to each collective. 73*3247Sgjelinek * - If seg is shared: 74*3247Sgjelinek * - Lookup pages in the shared amp or vnode. 75*3247Sgjelinek * - For incore pages not previously visited for each of 76*3247Sgjelinek * the proc's collectives, add incore pagesize to each 77*3247Sgjelinek * collective. 78*3247Sgjelinek * 79*3247Sgjelinek * Swap is reserved by private segments, and shared anonymous segments. 80*3247Sgjelinek * The only shared anon segments which do not reserve swap are ISM segments 81*3247Sgjelinek * and schedctl segments, both of which can be identified by having 82*3247Sgjelinek * amp->swresv == 0. 83*3247Sgjelinek * 84*3247Sgjelinek * The swap calculation for each collective is as follows: 85*3247Sgjelinek * 86*3247Sgjelinek * - Inspect flags, determine if counting rss for zones, projects, tasks, 87*3247Sgjelinek * and/or users. 88*3247Sgjelinek * - For each proc: 89*3247Sgjelinek * - Figure out proc's collectives (zone, project, task, and/or user). 90*3247Sgjelinek * - For each seg in proc's address space: 91*3247Sgjelinek * - If seg is private: 92*3247Sgjelinek * - Add svd->swresv pages to swap count for each of the 93*3247Sgjelinek * proc's collectives. 94*3247Sgjelinek * - If seg is anon, shared, and amp->swresv != 0 95*3247Sgjelinek * - For address ranges in amp not previously visited for 96*3247Sgjelinek * each of the proc's collectives, add size of address 97*3247Sgjelinek * range to the swap count for each collective. 98*3247Sgjelinek * 99*3247Sgjelinek * These two calculations are done simultaneously, with most of the work 100*3247Sgjelinek * being done in vmu_calculate_seg(). The results of the calculation are 101*3247Sgjelinek * copied into "vmu_data.vmu_cache_results". 102*3247Sgjelinek * 103*3247Sgjelinek * To perform the calculation, various things are tracked and cached: 104*3247Sgjelinek * 105*3247Sgjelinek * - incore/not-incore page ranges for all vnodes. 106*3247Sgjelinek * (vmu_data.vmu_all_vnodes_hash) 107*3247Sgjelinek * This eliminates looking up the same page more than once. 108*3247Sgjelinek * 109*3247Sgjelinek * - incore/not-incore page ranges for all shared amps. 110*3247Sgjelinek * (vmu_data.vmu_all_amps_hash) 111*3247Sgjelinek * This eliminates looking up the same page more than once. 112*3247Sgjelinek * 113*3247Sgjelinek * - visited page ranges for each collective. 114*3247Sgjelinek * - per vnode (entity->vme_vnode_hash) 115*3247Sgjelinek * - per shared amp (entity->vme_amp_hash) 116*3247Sgjelinek * For accurate counting of map-shared and cow-shared pages. 117*3247Sgjelinek * 118*3247Sgjelinek * - visited private anons (refcnt > 1) for each collective. 119*3247Sgjelinek * (entity->vme_anon_hash) 120*3247Sgjelinek * For accurate counting of cow-shared pages. 121*3247Sgjelinek * 122*3247Sgjelinek * The common accounting structure is the vmu_entity_t, which represents 123*3247Sgjelinek * collectives: 124*3247Sgjelinek * 125*3247Sgjelinek * - A zone. 126*3247Sgjelinek * - A project, task, or user within a zone. 127*3247Sgjelinek * - The entire system (vmu_data.vmu_system). 128*3247Sgjelinek * - Each collapsed (col) project and user. This means a given projid or 129*3247Sgjelinek * uid, regardless of which zone the process is in. For instance, 130*3247Sgjelinek * project 0 in the global zone and project 0 in a non global zone are 131*3247Sgjelinek * the same collapsed project. 132*3247Sgjelinek * 133*3247Sgjelinek * Each entity structure tracks which pages have been already visited for 134*3247Sgjelinek * that entity (via previously inspected processes) so that these pages are 135*3247Sgjelinek * not double counted. 136*3247Sgjelinek */ 137*3247Sgjelinek 138*3247Sgjelinek #include <sys/errno.h> 139*3247Sgjelinek #include <sys/types.h> 140*3247Sgjelinek #include <sys/zone.h> 141*3247Sgjelinek #include <sys/proc.h> 142*3247Sgjelinek #include <sys/project.h> 143*3247Sgjelinek #include <sys/task.h> 144*3247Sgjelinek #include <sys/thread.h> 145*3247Sgjelinek #include <sys/time.h> 146*3247Sgjelinek #include <sys/mman.h> 147*3247Sgjelinek #include <sys/modhash.h> 148*3247Sgjelinek #include <sys/modhash_impl.h> 149*3247Sgjelinek #include <sys/shm.h> 150*3247Sgjelinek #include <sys/swap.h> 151*3247Sgjelinek #include <sys/synch.h> 152*3247Sgjelinek #include <sys/systm.h> 153*3247Sgjelinek #include <sys/var.h> 154*3247Sgjelinek #include <sys/vm_usage.h> 155*3247Sgjelinek #include <sys/zone.h> 156*3247Sgjelinek #include <vm/anon.h> 157*3247Sgjelinek #include <vm/as.h> 158*3247Sgjelinek #include <vm/seg_vn.h> 159*3247Sgjelinek #include <vm/seg_spt.h> 160*3247Sgjelinek 161*3247Sgjelinek #define VMUSAGE_HASH_SIZE 512 162*3247Sgjelinek 163*3247Sgjelinek #define VMUSAGE_TYPE_VNODE 1 164*3247Sgjelinek #define VMUSAGE_TYPE_AMP 2 165*3247Sgjelinek #define VMUSAGE_TYPE_ANON 3 166*3247Sgjelinek 167*3247Sgjelinek #define VMUSAGE_BOUND_UNKNOWN 0 168*3247Sgjelinek #define VMUSAGE_BOUND_INCORE 1 169*3247Sgjelinek #define VMUSAGE_BOUND_NOT_INCORE 2 170*3247Sgjelinek 171*3247Sgjelinek /* 172*3247Sgjelinek * bounds for vnodes and shared amps 173*3247Sgjelinek * Each bound is either entirely incore, entirely not in core, or 174*3247Sgjelinek * entirely unknown. bounds are stored in order by offset. 175*3247Sgjelinek */ 176*3247Sgjelinek typedef struct vmu_bound { 177*3247Sgjelinek struct vmu_bound *vmb_next; 178*3247Sgjelinek pgcnt_t vmb_start; /* page offset in vnode/amp on which bound starts */ 179*3247Sgjelinek pgcnt_t vmb_end; /* page offset in vnode/amp on which bound ends */ 180*3247Sgjelinek char vmb_type; /* One of VMUSAGE_BOUND_* */ 181*3247Sgjelinek } vmu_bound_t; 182*3247Sgjelinek 183*3247Sgjelinek /* 184*3247Sgjelinek * hash of visited objects (vnodes or shared amps) 185*3247Sgjelinek * key is address of vnode or amp. Bounds lists known incore/non-incore 186*3247Sgjelinek * bounds for vnode/amp. 187*3247Sgjelinek */ 188*3247Sgjelinek typedef struct vmu_object { 189*3247Sgjelinek struct vmu_object *vmo_next; /* free list */ 190*3247Sgjelinek caddr_t vmo_key; 191*3247Sgjelinek short vmo_type; 192*3247Sgjelinek vmu_bound_t *vmo_bounds; 193*3247Sgjelinek } vmu_object_t; 194*3247Sgjelinek 195*3247Sgjelinek /* 196*3247Sgjelinek * Entity by which to count results. 197*3247Sgjelinek * 198*3247Sgjelinek * The entity structure keeps the current rss/swap counts for each entity 199*3247Sgjelinek * (zone, project, etc), and hashes of vm structures that have already 200*3247Sgjelinek * been visited for the entity. 201*3247Sgjelinek * 202*3247Sgjelinek * vme_next: links the list of all entities currently being counted by 203*3247Sgjelinek * vmu_calculate(). 204*3247Sgjelinek * 205*3247Sgjelinek * vme_next_calc: links the list of entities related to the current process 206*3247Sgjelinek * being counted by vmu_calculate_proc(). 207*3247Sgjelinek * 208*3247Sgjelinek * vmu_calculate_proc() walks all processes. For each process, it makes a 209*3247Sgjelinek * list of the entities related to that process using vme_next_calc. This 210*3247Sgjelinek * list changes each time vmu_calculate_proc() is called. 211*3247Sgjelinek * 212*3247Sgjelinek */ 213*3247Sgjelinek typedef struct vmu_entity { 214*3247Sgjelinek struct vmu_entity *vme_next; 215*3247Sgjelinek struct vmu_entity *vme_next_calc; 216*3247Sgjelinek mod_hash_t *vme_vnode_hash; /* vnodes visited for entity */ 217*3247Sgjelinek mod_hash_t *vme_amp_hash; /* shared amps visited for entity */ 218*3247Sgjelinek mod_hash_t *vme_anon_hash; /* cow anons visited for entity */ 219*3247Sgjelinek vmusage_t vme_result; /* identifies entity and results */ 220*3247Sgjelinek } vmu_entity_t; 221*3247Sgjelinek 222*3247Sgjelinek /* 223*3247Sgjelinek * Hash of entities visited within a zone, and an entity for the zone 224*3247Sgjelinek * itself. 225*3247Sgjelinek */ 226*3247Sgjelinek typedef struct vmu_zone { 227*3247Sgjelinek struct vmu_zone *vmz_next; /* free list */ 228*3247Sgjelinek id_t vmz_id; 229*3247Sgjelinek vmu_entity_t *vmz_zone; 230*3247Sgjelinek mod_hash_t *vmz_projects_hash; 231*3247Sgjelinek mod_hash_t *vmz_tasks_hash; 232*3247Sgjelinek mod_hash_t *vmz_rusers_hash; 233*3247Sgjelinek mod_hash_t *vmz_eusers_hash; 234*3247Sgjelinek } vmu_zone_t; 235*3247Sgjelinek 236*3247Sgjelinek /* 237*3247Sgjelinek * Cache of results from last calculation 238*3247Sgjelinek */ 239*3247Sgjelinek typedef struct vmu_cache { 240*3247Sgjelinek vmusage_t *vmc_results; /* Results from last call to */ 241*3247Sgjelinek /* vm_getusage(). */ 242*3247Sgjelinek uint64_t vmc_nresults; /* Count of cached results */ 243*3247Sgjelinek uint64_t vmc_refcnt; /* refcnt for free */ 244*3247Sgjelinek uint_t vmc_flags; /* Flags for vm_getusage() */ 245*3247Sgjelinek hrtime_t vmc_timestamp; /* when cache was created */ 246*3247Sgjelinek } vmu_cache_t; 247*3247Sgjelinek 248*3247Sgjelinek /* 249*3247Sgjelinek * top level rss info for the system 250*3247Sgjelinek */ 251*3247Sgjelinek typedef struct vmu_data { 252*3247Sgjelinek kmutex_t vmu_lock; /* Protects vmu_data */ 253*3247Sgjelinek kcondvar_t vmu_cv; /* Used to signal threads */ 254*3247Sgjelinek /* Waiting for */ 255*3247Sgjelinek /* Rss_calc_thread to finish */ 256*3247Sgjelinek vmu_entity_t *vmu_system; /* Entity for tracking */ 257*3247Sgjelinek /* rss/swap for all processes */ 258*3247Sgjelinek /* in all zones */ 259*3247Sgjelinek mod_hash_t *vmu_zones_hash; /* Zones visited */ 260*3247Sgjelinek mod_hash_t *vmu_projects_col_hash; /* These *_col_hash hashes */ 261*3247Sgjelinek mod_hash_t *vmu_rusers_col_hash; /* keep track of entities, */ 262*3247Sgjelinek mod_hash_t *vmu_eusers_col_hash; /* ignoring zoneid, in order */ 263*3247Sgjelinek /* to implement VMUSAGE_COL_* */ 264*3247Sgjelinek /* flags, which aggregate by */ 265*3247Sgjelinek /* project or user regardless */ 266*3247Sgjelinek /* of zoneid. */ 267*3247Sgjelinek mod_hash_t *vmu_all_vnodes_hash; /* System wide visited vnodes */ 268*3247Sgjelinek /* to track incore/not-incore */ 269*3247Sgjelinek mod_hash_t *vmu_all_amps_hash; /* System wide visited shared */ 270*3247Sgjelinek /* amps to track incore/not- */ 271*3247Sgjelinek /* incore */ 272*3247Sgjelinek vmu_entity_t *vmu_entities; /* Linked list of entities */ 273*3247Sgjelinek size_t vmu_nentities; /* Count of entities in list */ 274*3247Sgjelinek vmu_cache_t *vmu_cache; /* Cached results */ 275*3247Sgjelinek kthread_t *vmu_calc_thread; /* NULL, or thread running */ 276*3247Sgjelinek /* vmu_calculate() */ 277*3247Sgjelinek uint_t vmu_calc_flags; /* Flags being using by */ 278*3247Sgjelinek /* currently running calc */ 279*3247Sgjelinek /* thread */ 280*3247Sgjelinek uint_t vmu_pending_flags; /* Flags of vm_getusage() */ 281*3247Sgjelinek /* threads waiting for */ 282*3247Sgjelinek /* calc thread to finish */ 283*3247Sgjelinek uint_t vmu_pending_waiters; /* Number of threads waiting */ 284*3247Sgjelinek /* for calc thread */ 285*3247Sgjelinek vmu_bound_t *vmu_free_bounds; 286*3247Sgjelinek vmu_object_t *vmu_free_objects; 287*3247Sgjelinek vmu_entity_t *vmu_free_entities; 288*3247Sgjelinek vmu_zone_t *vmu_free_zones; 289*3247Sgjelinek } vmu_data_t; 290*3247Sgjelinek 291*3247Sgjelinek extern struct as kas; 292*3247Sgjelinek extern proc_t *practive; 293*3247Sgjelinek extern zone_t *global_zone; 294*3247Sgjelinek extern struct seg_ops segvn_ops; 295*3247Sgjelinek extern struct seg_ops segspt_shmops; 296*3247Sgjelinek 297*3247Sgjelinek static vmu_data_t vmu_data; 298*3247Sgjelinek static kmem_cache_t *vmu_bound_cache; 299*3247Sgjelinek static kmem_cache_t *vmu_object_cache; 300*3247Sgjelinek 301*3247Sgjelinek /* 302*3247Sgjelinek * Save a bound on the free list 303*3247Sgjelinek */ 304*3247Sgjelinek static void 305*3247Sgjelinek vmu_free_bound(vmu_bound_t *bound) 306*3247Sgjelinek { 307*3247Sgjelinek bound->vmb_next = vmu_data.vmu_free_bounds; 308*3247Sgjelinek vmu_data.vmu_free_bounds = bound; 309*3247Sgjelinek } 310*3247Sgjelinek 311*3247Sgjelinek /* 312*3247Sgjelinek * Free an object, and all visited bound info. 313*3247Sgjelinek */ 314*3247Sgjelinek static void 315*3247Sgjelinek vmu_free_object(mod_hash_val_t val) 316*3247Sgjelinek { 317*3247Sgjelinek vmu_object_t *obj = (vmu_object_t *)val; 318*3247Sgjelinek vmu_bound_t *bound = obj->vmo_bounds; 319*3247Sgjelinek vmu_bound_t *tmp; 320*3247Sgjelinek 321*3247Sgjelinek while (bound != NULL) { 322*3247Sgjelinek tmp = bound; 323*3247Sgjelinek bound = bound->vmb_next; 324*3247Sgjelinek vmu_free_bound(tmp); 325*3247Sgjelinek } 326*3247Sgjelinek obj->vmo_next = vmu_data.vmu_free_objects; 327*3247Sgjelinek vmu_data.vmu_free_objects = obj; 328*3247Sgjelinek } 329*3247Sgjelinek 330*3247Sgjelinek /* 331*3247Sgjelinek * Free an entity, and hashes of visited objects for that entity. 332*3247Sgjelinek */ 333*3247Sgjelinek static void 334*3247Sgjelinek vmu_free_entity(mod_hash_val_t val) 335*3247Sgjelinek { 336*3247Sgjelinek vmu_entity_t *entity = (vmu_entity_t *)val; 337*3247Sgjelinek 338*3247Sgjelinek if (entity->vme_vnode_hash != NULL) 339*3247Sgjelinek i_mod_hash_clear_nosync(entity->vme_vnode_hash); 340*3247Sgjelinek if (entity->vme_amp_hash != NULL) 341*3247Sgjelinek i_mod_hash_clear_nosync(entity->vme_amp_hash); 342*3247Sgjelinek if (entity->vme_anon_hash != NULL) 343*3247Sgjelinek i_mod_hash_clear_nosync(entity->vme_anon_hash); 344*3247Sgjelinek 345*3247Sgjelinek entity->vme_next = vmu_data.vmu_free_entities; 346*3247Sgjelinek vmu_data.vmu_free_entities = entity; 347*3247Sgjelinek } 348*3247Sgjelinek 349*3247Sgjelinek /* 350*3247Sgjelinek * Free zone entity, and all hashes of entities inside that zone, 351*3247Sgjelinek * which are projects, tasks, and users. 352*3247Sgjelinek */ 353*3247Sgjelinek static void 354*3247Sgjelinek vmu_free_zone(mod_hash_val_t val) 355*3247Sgjelinek { 356*3247Sgjelinek vmu_zone_t *zone = (vmu_zone_t *)val; 357*3247Sgjelinek 358*3247Sgjelinek if (zone->vmz_zone != NULL) { 359*3247Sgjelinek vmu_free_entity((mod_hash_val_t)zone->vmz_zone); 360*3247Sgjelinek zone->vmz_zone = NULL; 361*3247Sgjelinek } 362*3247Sgjelinek if (zone->vmz_projects_hash != NULL) 363*3247Sgjelinek i_mod_hash_clear_nosync(zone->vmz_projects_hash); 364*3247Sgjelinek if (zone->vmz_tasks_hash != NULL) 365*3247Sgjelinek i_mod_hash_clear_nosync(zone->vmz_tasks_hash); 366*3247Sgjelinek if (zone->vmz_rusers_hash != NULL) 367*3247Sgjelinek i_mod_hash_clear_nosync(zone->vmz_rusers_hash); 368*3247Sgjelinek if (zone->vmz_eusers_hash != NULL) 369*3247Sgjelinek i_mod_hash_clear_nosync(zone->vmz_eusers_hash); 370*3247Sgjelinek zone->vmz_next = vmu_data.vmu_free_zones; 371*3247Sgjelinek vmu_data.vmu_free_zones = zone; 372*3247Sgjelinek } 373*3247Sgjelinek 374*3247Sgjelinek /* 375*3247Sgjelinek * Initialize synchronization primitives and hashes for system-wide tracking 376*3247Sgjelinek * of visited vnodes and shared amps. Initialize results cache. 377*3247Sgjelinek */ 378*3247Sgjelinek void 379*3247Sgjelinek vm_usage_init() 380*3247Sgjelinek { 381*3247Sgjelinek mutex_init(&vmu_data.vmu_lock, NULL, MUTEX_DEFAULT, NULL); 382*3247Sgjelinek cv_init(&vmu_data.vmu_cv, NULL, CV_DEFAULT, NULL); 383*3247Sgjelinek 384*3247Sgjelinek vmu_data.vmu_system = NULL; 385*3247Sgjelinek vmu_data.vmu_zones_hash = NULL; 386*3247Sgjelinek vmu_data.vmu_projects_col_hash = NULL; 387*3247Sgjelinek vmu_data.vmu_rusers_col_hash = NULL; 388*3247Sgjelinek vmu_data.vmu_eusers_col_hash = NULL; 389*3247Sgjelinek 390*3247Sgjelinek vmu_data.vmu_free_bounds = NULL; 391*3247Sgjelinek vmu_data.vmu_free_objects = NULL; 392*3247Sgjelinek vmu_data.vmu_free_entities = NULL; 393*3247Sgjelinek vmu_data.vmu_free_zones = NULL; 394*3247Sgjelinek 395*3247Sgjelinek vmu_data.vmu_all_vnodes_hash = mod_hash_create_ptrhash( 396*3247Sgjelinek "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object, 397*3247Sgjelinek sizeof (vnode_t)); 398*3247Sgjelinek vmu_data.vmu_all_amps_hash = mod_hash_create_ptrhash( 399*3247Sgjelinek "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object, 400*3247Sgjelinek sizeof (struct anon_map)); 401*3247Sgjelinek vmu_data.vmu_projects_col_hash = mod_hash_create_idhash( 402*3247Sgjelinek "vmusage collapsed project hash", VMUSAGE_HASH_SIZE, 403*3247Sgjelinek vmu_free_entity); 404*3247Sgjelinek vmu_data.vmu_rusers_col_hash = mod_hash_create_idhash( 405*3247Sgjelinek "vmusage collapsed ruser hash", VMUSAGE_HASH_SIZE, 406*3247Sgjelinek vmu_free_entity); 407*3247Sgjelinek vmu_data.vmu_eusers_col_hash = mod_hash_create_idhash( 408*3247Sgjelinek "vmusage collpased euser hash", VMUSAGE_HASH_SIZE, 409*3247Sgjelinek vmu_free_entity); 410*3247Sgjelinek vmu_data.vmu_zones_hash = mod_hash_create_idhash( 411*3247Sgjelinek "vmusage zone hash", VMUSAGE_HASH_SIZE, vmu_free_zone); 412*3247Sgjelinek 413*3247Sgjelinek vmu_bound_cache = kmem_cache_create("vmu_bound_cache", 414*3247Sgjelinek sizeof (vmu_bound_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 415*3247Sgjelinek vmu_object_cache = kmem_cache_create("vmu_object_cache", 416*3247Sgjelinek sizeof (vmu_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 417*3247Sgjelinek 418*3247Sgjelinek vmu_data.vmu_entities = NULL; 419*3247Sgjelinek vmu_data.vmu_nentities = 0; 420*3247Sgjelinek 421*3247Sgjelinek vmu_data.vmu_cache = NULL; 422*3247Sgjelinek vmu_data.vmu_calc_thread = NULL; 423*3247Sgjelinek vmu_data.vmu_calc_flags = 0; 424*3247Sgjelinek vmu_data.vmu_pending_flags = 0; 425*3247Sgjelinek vmu_data.vmu_pending_waiters = 0; 426*3247Sgjelinek } 427*3247Sgjelinek 428*3247Sgjelinek /* 429*3247Sgjelinek * Allocate hashes for tracking vm objects visited for an entity. 430*3247Sgjelinek * Update list of entities. 431*3247Sgjelinek */ 432*3247Sgjelinek static vmu_entity_t * 433*3247Sgjelinek vmu_alloc_entity(id_t id, int type, id_t zoneid) 434*3247Sgjelinek { 435*3247Sgjelinek vmu_entity_t *entity; 436*3247Sgjelinek 437*3247Sgjelinek if (vmu_data.vmu_free_entities != NULL) { 438*3247Sgjelinek entity = vmu_data.vmu_free_entities; 439*3247Sgjelinek vmu_data.vmu_free_entities = 440*3247Sgjelinek vmu_data.vmu_free_entities->vme_next; 441*3247Sgjelinek bzero(&entity->vme_result, sizeof (vmusage_t)); 442*3247Sgjelinek } else { 443*3247Sgjelinek entity = kmem_zalloc(sizeof (vmu_entity_t), KM_SLEEP); 444*3247Sgjelinek } 445*3247Sgjelinek entity->vme_result.vmu_id = id; 446*3247Sgjelinek entity->vme_result.vmu_zoneid = zoneid; 447*3247Sgjelinek entity->vme_result.vmu_type = type; 448*3247Sgjelinek 449*3247Sgjelinek if (entity->vme_vnode_hash == NULL) 450*3247Sgjelinek entity->vme_vnode_hash = mod_hash_create_ptrhash( 451*3247Sgjelinek "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object, 452*3247Sgjelinek sizeof (vnode_t)); 453*3247Sgjelinek 454*3247Sgjelinek if (entity->vme_amp_hash == NULL) 455*3247Sgjelinek entity->vme_amp_hash = mod_hash_create_ptrhash( 456*3247Sgjelinek "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object, 457*3247Sgjelinek sizeof (struct anon_map)); 458*3247Sgjelinek 459*3247Sgjelinek if (entity->vme_anon_hash == NULL) 460*3247Sgjelinek entity->vme_anon_hash = mod_hash_create_ptrhash( 461*3247Sgjelinek "vmusage anon hash", VMUSAGE_HASH_SIZE, 462*3247Sgjelinek mod_hash_null_valdtor, sizeof (struct anon)); 463*3247Sgjelinek 464*3247Sgjelinek entity->vme_next = vmu_data.vmu_entities; 465*3247Sgjelinek vmu_data.vmu_entities = entity; 466*3247Sgjelinek vmu_data.vmu_nentities++; 467*3247Sgjelinek 468*3247Sgjelinek return (entity); 469*3247Sgjelinek } 470*3247Sgjelinek 471*3247Sgjelinek /* 472*3247Sgjelinek * Allocate a zone entity, and hashes for tracking visited vm objects 473*3247Sgjelinek * for projects, tasks, and users within that zone. 474*3247Sgjelinek */ 475*3247Sgjelinek static vmu_zone_t * 476*3247Sgjelinek vmu_alloc_zone(id_t id) 477*3247Sgjelinek { 478*3247Sgjelinek vmu_zone_t *zone; 479*3247Sgjelinek 480*3247Sgjelinek if (vmu_data.vmu_free_zones != NULL) { 481*3247Sgjelinek zone = vmu_data.vmu_free_zones; 482*3247Sgjelinek vmu_data.vmu_free_zones = 483*3247Sgjelinek vmu_data.vmu_free_zones->vmz_next; 484*3247Sgjelinek zone->vmz_next = NULL; 485*3247Sgjelinek zone->vmz_zone = NULL; 486*3247Sgjelinek } else { 487*3247Sgjelinek zone = kmem_zalloc(sizeof (vmu_zone_t), KM_SLEEP); 488*3247Sgjelinek } 489*3247Sgjelinek 490*3247Sgjelinek zone->vmz_id = id; 491*3247Sgjelinek 492*3247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES)) != 0) 493*3247Sgjelinek zone->vmz_zone = vmu_alloc_entity(id, VMUSAGE_ZONE, id); 494*3247Sgjelinek 495*3247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_PROJECTS | 496*3247Sgjelinek VMUSAGE_ALL_PROJECTS)) != 0 && zone->vmz_projects_hash == NULL) 497*3247Sgjelinek zone->vmz_projects_hash = mod_hash_create_idhash( 498*3247Sgjelinek "vmusage project hash", VMUSAGE_HASH_SIZE, vmu_free_entity); 499*3247Sgjelinek 500*3247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS)) 501*3247Sgjelinek != 0 && zone->vmz_tasks_hash == NULL) 502*3247Sgjelinek zone->vmz_tasks_hash = mod_hash_create_idhash( 503*3247Sgjelinek "vmusage task hash", VMUSAGE_HASH_SIZE, vmu_free_entity); 504*3247Sgjelinek 505*3247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS)) 506*3247Sgjelinek != 0 && zone->vmz_rusers_hash == NULL) 507*3247Sgjelinek zone->vmz_rusers_hash = mod_hash_create_idhash( 508*3247Sgjelinek "vmusage ruser hash", VMUSAGE_HASH_SIZE, vmu_free_entity); 509*3247Sgjelinek 510*3247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS)) 511*3247Sgjelinek != 0 && zone->vmz_eusers_hash == NULL) 512*3247Sgjelinek zone->vmz_eusers_hash = mod_hash_create_idhash( 513*3247Sgjelinek "vmusage euser hash", VMUSAGE_HASH_SIZE, vmu_free_entity); 514*3247Sgjelinek 515*3247Sgjelinek return (zone); 516*3247Sgjelinek } 517*3247Sgjelinek 518*3247Sgjelinek /* 519*3247Sgjelinek * Allocate a structure for tracking visited bounds for a vm object. 520*3247Sgjelinek */ 521*3247Sgjelinek static vmu_object_t * 522*3247Sgjelinek vmu_alloc_object(caddr_t key, int type) 523*3247Sgjelinek { 524*3247Sgjelinek vmu_object_t *object; 525*3247Sgjelinek 526*3247Sgjelinek if (vmu_data.vmu_free_objects != NULL) { 527*3247Sgjelinek object = vmu_data.vmu_free_objects; 528*3247Sgjelinek vmu_data.vmu_free_objects = 529*3247Sgjelinek vmu_data.vmu_free_objects->vmo_next; 530*3247Sgjelinek } else { 531*3247Sgjelinek object = kmem_cache_alloc(vmu_object_cache, KM_SLEEP); 532*3247Sgjelinek } 533*3247Sgjelinek 534*3247Sgjelinek object->vmo_key = key; 535*3247Sgjelinek object->vmo_type = type; 536*3247Sgjelinek object->vmo_bounds = NULL; 537*3247Sgjelinek 538*3247Sgjelinek return (object); 539*3247Sgjelinek } 540*3247Sgjelinek 541*3247Sgjelinek /* 542*3247Sgjelinek * Allocate and return a bound structure. 543*3247Sgjelinek */ 544*3247Sgjelinek static vmu_bound_t * 545*3247Sgjelinek vmu_alloc_bound() 546*3247Sgjelinek { 547*3247Sgjelinek vmu_bound_t *bound; 548*3247Sgjelinek 549*3247Sgjelinek if (vmu_data.vmu_free_bounds != NULL) { 550*3247Sgjelinek bound = vmu_data.vmu_free_bounds; 551*3247Sgjelinek vmu_data.vmu_free_bounds = 552*3247Sgjelinek vmu_data.vmu_free_bounds->vmb_next; 553*3247Sgjelinek bzero(bound, sizeof (vmu_bound_t)); 554*3247Sgjelinek } else { 555*3247Sgjelinek bound = kmem_cache_alloc(vmu_bound_cache, KM_SLEEP); 556*3247Sgjelinek bzero(bound, sizeof (vmu_bound_t)); 557*3247Sgjelinek } 558*3247Sgjelinek return (bound); 559*3247Sgjelinek } 560*3247Sgjelinek 561*3247Sgjelinek /* 562*3247Sgjelinek * vmu_find_insert_* functions implement hash lookup or allocate and 563*3247Sgjelinek * insert operations. 564*3247Sgjelinek */ 565*3247Sgjelinek static vmu_object_t * 566*3247Sgjelinek vmu_find_insert_object(mod_hash_t *hash, caddr_t key, uint_t type) 567*3247Sgjelinek { 568*3247Sgjelinek int ret; 569*3247Sgjelinek vmu_object_t *object; 570*3247Sgjelinek 571*3247Sgjelinek ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key, 572*3247Sgjelinek (mod_hash_val_t *)&object); 573*3247Sgjelinek if (ret != 0) { 574*3247Sgjelinek object = vmu_alloc_object(key, type); 575*3247Sgjelinek ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key, 576*3247Sgjelinek (mod_hash_val_t)object, (mod_hash_hndl_t)0); 577*3247Sgjelinek ASSERT(ret == 0); 578*3247Sgjelinek } 579*3247Sgjelinek return (object); 580*3247Sgjelinek } 581*3247Sgjelinek 582*3247Sgjelinek static int 583*3247Sgjelinek vmu_find_insert_anon(mod_hash_t *hash, caddr_t key) 584*3247Sgjelinek { 585*3247Sgjelinek int ret; 586*3247Sgjelinek caddr_t val; 587*3247Sgjelinek 588*3247Sgjelinek ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key, 589*3247Sgjelinek (mod_hash_val_t *)&val); 590*3247Sgjelinek 591*3247Sgjelinek if (ret == 0) 592*3247Sgjelinek return (0); 593*3247Sgjelinek 594*3247Sgjelinek ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key, 595*3247Sgjelinek (mod_hash_val_t)key, (mod_hash_hndl_t)0); 596*3247Sgjelinek 597*3247Sgjelinek ASSERT(ret == 0); 598*3247Sgjelinek 599*3247Sgjelinek return (1); 600*3247Sgjelinek } 601*3247Sgjelinek 602*3247Sgjelinek static vmu_entity_t * 603*3247Sgjelinek vmu_find_insert_entity(mod_hash_t *hash, id_t id, uint_t type, id_t zoneid) 604*3247Sgjelinek { 605*3247Sgjelinek int ret; 606*3247Sgjelinek vmu_entity_t *entity; 607*3247Sgjelinek 608*3247Sgjelinek ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)(uintptr_t)id, 609*3247Sgjelinek (mod_hash_val_t *)&entity); 610*3247Sgjelinek if (ret != 0) { 611*3247Sgjelinek entity = vmu_alloc_entity(id, type, zoneid); 612*3247Sgjelinek ret = i_mod_hash_insert_nosync(hash, 613*3247Sgjelinek (mod_hash_key_t)(uintptr_t)id, (mod_hash_val_t)entity, 614*3247Sgjelinek (mod_hash_hndl_t)0); 615*3247Sgjelinek ASSERT(ret == 0); 616*3247Sgjelinek } 617*3247Sgjelinek return (entity); 618*3247Sgjelinek } 619*3247Sgjelinek 620*3247Sgjelinek 621*3247Sgjelinek 622*3247Sgjelinek 623*3247Sgjelinek /* 624*3247Sgjelinek * Returns list of object bounds between start and end. New bounds inserted 625*3247Sgjelinek * by this call are given type. 626*3247Sgjelinek * 627*3247Sgjelinek * Returns the number of pages covered if new bounds are created. Returns 0 628*3247Sgjelinek * if region between start/end consists of all existing bounds. 629*3247Sgjelinek */ 630*3247Sgjelinek static pgcnt_t 631*3247Sgjelinek vmu_insert_lookup_object_bounds(vmu_object_t *ro, pgcnt_t start, pgcnt_t 632*3247Sgjelinek end, char type, vmu_bound_t **first, vmu_bound_t **last) 633*3247Sgjelinek { 634*3247Sgjelinek vmu_bound_t *next; 635*3247Sgjelinek vmu_bound_t *prev = NULL; 636*3247Sgjelinek vmu_bound_t *tmp = NULL; 637*3247Sgjelinek pgcnt_t ret = 0; 638*3247Sgjelinek 639*3247Sgjelinek *first = *last = NULL; 640*3247Sgjelinek 641*3247Sgjelinek for (next = ro->vmo_bounds; next != NULL; next = next->vmb_next) { 642*3247Sgjelinek /* 643*3247Sgjelinek * Find bounds overlapping or overlapped by range [start,end]. 644*3247Sgjelinek */ 645*3247Sgjelinek if (start > next->vmb_end) { 646*3247Sgjelinek /* bound is before new bound */ 647*3247Sgjelinek prev = next; 648*3247Sgjelinek continue; 649*3247Sgjelinek } 650*3247Sgjelinek if (next->vmb_start > end) { 651*3247Sgjelinek /* bound is after new bound */ 652*3247Sgjelinek break; 653*3247Sgjelinek } 654*3247Sgjelinek if (*first == NULL) 655*3247Sgjelinek *first = next; 656*3247Sgjelinek *last = next; 657*3247Sgjelinek } 658*3247Sgjelinek 659*3247Sgjelinek if (*first == NULL) { 660*3247Sgjelinek ASSERT(*last == NULL); 661*3247Sgjelinek /* 662*3247Sgjelinek * No bounds overlapping range [start,end], so create new 663*3247Sgjelinek * bound 664*3247Sgjelinek */ 665*3247Sgjelinek tmp = vmu_alloc_bound(); 666*3247Sgjelinek tmp->vmb_start = start; 667*3247Sgjelinek tmp->vmb_end = end; 668*3247Sgjelinek tmp->vmb_type = type; 669*3247Sgjelinek if (prev == NULL) { 670*3247Sgjelinek tmp->vmb_next = ro->vmo_bounds; 671*3247Sgjelinek ro->vmo_bounds = tmp; 672*3247Sgjelinek } else { 673*3247Sgjelinek tmp->vmb_next = prev->vmb_next; 674*3247Sgjelinek prev->vmb_next = tmp; 675*3247Sgjelinek } 676*3247Sgjelinek *first = tmp; 677*3247Sgjelinek *last = tmp; 678*3247Sgjelinek ASSERT(tmp->vmb_end >= tmp->vmb_start); 679*3247Sgjelinek ret = tmp->vmb_end - tmp->vmb_start + 1; 680*3247Sgjelinek return (ret); 681*3247Sgjelinek } 682*3247Sgjelinek 683*3247Sgjelinek /* Check to see if start is before first known bound */ 684*3247Sgjelinek ASSERT(first != NULL && last != NULL); 685*3247Sgjelinek next = (*first); 686*3247Sgjelinek if (start < (*first)->vmb_start) { 687*3247Sgjelinek /* Create new bound before first bound */ 688*3247Sgjelinek tmp = vmu_alloc_bound(); 689*3247Sgjelinek tmp->vmb_start = start; 690*3247Sgjelinek tmp->vmb_end = (*first)->vmb_start - 1; 691*3247Sgjelinek tmp->vmb_type = type; 692*3247Sgjelinek tmp->vmb_next = *first; 693*3247Sgjelinek if (*first == ro->vmo_bounds) 694*3247Sgjelinek ro->vmo_bounds = tmp; 695*3247Sgjelinek if (prev != NULL) 696*3247Sgjelinek prev->vmb_next = tmp; 697*3247Sgjelinek ASSERT(tmp->vmb_end >= tmp->vmb_start); 698*3247Sgjelinek ret += tmp->vmb_end - tmp->vmb_start + 1; 699*3247Sgjelinek *first = tmp; 700*3247Sgjelinek } 701*3247Sgjelinek /* 702*3247Sgjelinek * Between start and end, search for gaps between and after existing 703*3247Sgjelinek * bounds. Create new bounds to fill gaps if they exist. 704*3247Sgjelinek */ 705*3247Sgjelinek while (end > next->vmb_end) { 706*3247Sgjelinek /* 707*3247Sgjelinek * Check for gap between bound and next bound. if no gap, 708*3247Sgjelinek * continue. 709*3247Sgjelinek */ 710*3247Sgjelinek if ((next != *last) && 711*3247Sgjelinek ((next->vmb_end + 1) == next->vmb_next->vmb_start)) { 712*3247Sgjelinek next = next->vmb_next; 713*3247Sgjelinek continue; 714*3247Sgjelinek } 715*3247Sgjelinek /* 716*3247Sgjelinek * Insert new bound in gap after bound, and before next 717*3247Sgjelinek * bound if next bound exists. 718*3247Sgjelinek */ 719*3247Sgjelinek tmp = vmu_alloc_bound(); 720*3247Sgjelinek tmp->vmb_type = type; 721*3247Sgjelinek tmp->vmb_next = next->vmb_next; 722*3247Sgjelinek tmp->vmb_start = next->vmb_end + 1; 723*3247Sgjelinek 724*3247Sgjelinek if (next != *last) { 725*3247Sgjelinek tmp->vmb_end = next->vmb_next->vmb_start - 1; 726*3247Sgjelinek ASSERT(tmp->vmb_end >= tmp->vmb_start); 727*3247Sgjelinek ret += tmp->vmb_end - tmp->vmb_start + 1; 728*3247Sgjelinek next->vmb_next = tmp; 729*3247Sgjelinek next = tmp->vmb_next; 730*3247Sgjelinek } else { 731*3247Sgjelinek tmp->vmb_end = end; 732*3247Sgjelinek ASSERT(tmp->vmb_end >= tmp->vmb_start); 733*3247Sgjelinek ret += tmp->vmb_end - tmp->vmb_start + 1; 734*3247Sgjelinek next->vmb_next = tmp; 735*3247Sgjelinek *last = tmp; 736*3247Sgjelinek break; 737*3247Sgjelinek } 738*3247Sgjelinek } 739*3247Sgjelinek return (ret); 740*3247Sgjelinek } 741*3247Sgjelinek 742*3247Sgjelinek /* 743*3247Sgjelinek * vmu_update_bounds() 744*3247Sgjelinek * 745*3247Sgjelinek * first, last: list of continuous bounds, of which zero or more are of 746*3247Sgjelinek * type VMUSAGE_BOUND_UNKNOWN. 747*3247Sgjelinek * 748*3247Sgjelinek * new_first, new_last: list of continuous bounds, of which none are of 749*3247Sgjelinek * type VMUSAGE_BOUND_UNKNOWN. These bounds are used to 750*3247Sgjelinek * update the types of bounds in (first,last) with 751*3247Sgjelinek * type VMUSAGE_BOUND_UNKNOWN. 752*3247Sgjelinek * 753*3247Sgjelinek * For the list of bounds (first,last), this function updates any bounds 754*3247Sgjelinek * with type VMUSAGE_BOUND_UNKNOWN using the type of the corresponding bound in 755*3247Sgjelinek * the list (new_first, new_last). 756*3247Sgjelinek * 757*3247Sgjelinek * If a bound of type VMUSAGE_BOUND_UNKNOWN spans multiple bounds in the list 758*3247Sgjelinek * (new_first, new_last), it will be split into multiple bounds. 759*3247Sgjelinek * 760*3247Sgjelinek * Return value: 761*3247Sgjelinek * The number of pages in the list of bounds (first,last) that were of 762*3247Sgjelinek * type VMUSAGE_BOUND_UNKNOWN, which have been updated to be of type 763*3247Sgjelinek * VMUSAGE_BOUND_INCORE. 764*3247Sgjelinek * 765*3247Sgjelinek */ 766*3247Sgjelinek static pgcnt_t 767*3247Sgjelinek vmu_update_bounds(vmu_bound_t **first, vmu_bound_t **last, 768*3247Sgjelinek vmu_bound_t *new_first, vmu_bound_t *new_last) 769*3247Sgjelinek { 770*3247Sgjelinek vmu_bound_t *next, *new_next, *tmp; 771*3247Sgjelinek pgcnt_t rss = 0; 772*3247Sgjelinek 773*3247Sgjelinek next = *first; 774*3247Sgjelinek new_next = new_first; 775*3247Sgjelinek 776*3247Sgjelinek /* verify bounds span same pages */ 777*3247Sgjelinek ASSERT((*first)->vmb_start >= new_next->vmb_start); 778*3247Sgjelinek ASSERT((*last)->vmb_end <= new_last->vmb_end); 779*3247Sgjelinek for (;;) { 780*3247Sgjelinek /* If bound already has type, proceed to next bound */ 781*3247Sgjelinek if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) { 782*3247Sgjelinek if (next == *last) 783*3247Sgjelinek break; 784*3247Sgjelinek next = next->vmb_next; 785*3247Sgjelinek continue; 786*3247Sgjelinek } 787*3247Sgjelinek while (new_next->vmb_end < next->vmb_start) 788*3247Sgjelinek new_next = new_next->vmb_next; 789*3247Sgjelinek ASSERT(new_next->vmb_type != VMUSAGE_BOUND_UNKNOWN); 790*3247Sgjelinek next->vmb_type = new_next->vmb_type; 791*3247Sgjelinek if (new_next->vmb_end < next->vmb_end) { 792*3247Sgjelinek /* need to split bound */ 793*3247Sgjelinek tmp = vmu_alloc_bound(); 794*3247Sgjelinek tmp->vmb_type = VMUSAGE_BOUND_UNKNOWN; 795*3247Sgjelinek tmp->vmb_start = new_next->vmb_end + 1; 796*3247Sgjelinek tmp->vmb_end = next->vmb_end; 797*3247Sgjelinek tmp->vmb_next = next->vmb_next; 798*3247Sgjelinek next->vmb_end = new_next->vmb_end; 799*3247Sgjelinek next->vmb_next = tmp; 800*3247Sgjelinek if (*last == next) 801*3247Sgjelinek *last = tmp; 802*3247Sgjelinek if (next->vmb_type == VMUSAGE_BOUND_INCORE) 803*3247Sgjelinek rss += next->vmb_end - next->vmb_start + 1; 804*3247Sgjelinek next = tmp; 805*3247Sgjelinek } else { 806*3247Sgjelinek if (next->vmb_type == VMUSAGE_BOUND_INCORE) 807*3247Sgjelinek rss += next->vmb_end - next->vmb_start + 1; 808*3247Sgjelinek if (next == *last) 809*3247Sgjelinek break; 810*3247Sgjelinek next = next->vmb_next; 811*3247Sgjelinek } 812*3247Sgjelinek } 813*3247Sgjelinek return (rss); 814*3247Sgjelinek } 815*3247Sgjelinek 816*3247Sgjelinek /* 817*3247Sgjelinek * merges adjacent bounds with same type between first and last bound. 818*3247Sgjelinek * After merge, last pointer is no longer valid, as last bound may be 819*3247Sgjelinek * merged away. 820*3247Sgjelinek */ 821*3247Sgjelinek static void 822*3247Sgjelinek vmu_merge_bounds(vmu_bound_t **first, vmu_bound_t **last) 823*3247Sgjelinek { 824*3247Sgjelinek vmu_bound_t *next; 825*3247Sgjelinek vmu_bound_t *tmp; 826*3247Sgjelinek 827*3247Sgjelinek ASSERT(*first != NULL); 828*3247Sgjelinek ASSERT(*last != NULL); 829*3247Sgjelinek 830*3247Sgjelinek next = *first; 831*3247Sgjelinek while (next != *last) { 832*3247Sgjelinek 833*3247Sgjelinek /* If bounds are adjacent and have same type, merge them */ 834*3247Sgjelinek if (((next->vmb_end + 1) == next->vmb_next->vmb_start) && 835*3247Sgjelinek (next->vmb_type == next->vmb_next->vmb_type)) { 836*3247Sgjelinek tmp = next->vmb_next; 837*3247Sgjelinek next->vmb_end = tmp->vmb_end; 838*3247Sgjelinek next->vmb_next = tmp->vmb_next; 839*3247Sgjelinek vmu_free_bound(tmp); 840*3247Sgjelinek if (tmp == *last) 841*3247Sgjelinek *last = next; 842*3247Sgjelinek } else { 843*3247Sgjelinek next = next->vmb_next; 844*3247Sgjelinek } 845*3247Sgjelinek } 846*3247Sgjelinek } 847*3247Sgjelinek 848*3247Sgjelinek /* 849*3247Sgjelinek * Given an amp and a list of bounds, updates each bound's type with 850*3247Sgjelinek * VMUSAGE_BOUND_INCORE or VMUSAGE_BOUND_NOT_INCORE. 851*3247Sgjelinek * 852*3247Sgjelinek * If a bound is partially incore, it will be split into two bounds. 853*3247Sgjelinek * first and last may be modified, as bounds may be split into multiple 854*3247Sgjelinek * bounds if the are partially incore/not-incore. 855*3247Sgjelinek * 856*3247Sgjelinek * Set incore to non-zero if bounds are already known to be incore 857*3247Sgjelinek * 858*3247Sgjelinek */ 859*3247Sgjelinek static void 860*3247Sgjelinek vmu_amp_update_incore_bounds(struct anon_map *amp, vmu_bound_t **first, 861*3247Sgjelinek vmu_bound_t **last, boolean_t incore) 862*3247Sgjelinek { 863*3247Sgjelinek vmu_bound_t *next; 864*3247Sgjelinek vmu_bound_t *tmp; 865*3247Sgjelinek pgcnt_t index; 866*3247Sgjelinek short bound_type; 867*3247Sgjelinek short page_type; 868*3247Sgjelinek vnode_t *vn; 869*3247Sgjelinek anoff_t off; 870*3247Sgjelinek struct anon *ap; 871*3247Sgjelinek 872*3247Sgjelinek next = *first; 873*3247Sgjelinek /* Shared anon slots don't change once set */ 874*3247Sgjelinek ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 875*3247Sgjelinek for (;;) { 876*3247Sgjelinek if (incore == B_TRUE) 877*3247Sgjelinek next->vmb_type = VMUSAGE_BOUND_INCORE; 878*3247Sgjelinek 879*3247Sgjelinek if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) { 880*3247Sgjelinek if (next == *last) 881*3247Sgjelinek break; 882*3247Sgjelinek next = next->vmb_next; 883*3247Sgjelinek continue; 884*3247Sgjelinek } 885*3247Sgjelinek bound_type = next->vmb_type; 886*3247Sgjelinek index = next->vmb_start; 887*3247Sgjelinek while (index <= next->vmb_end) { 888*3247Sgjelinek 889*3247Sgjelinek /* 890*3247Sgjelinek * These are used to determine how much to increment 891*3247Sgjelinek * index when a large page is found. 892*3247Sgjelinek */ 893*3247Sgjelinek page_t *page; 894*3247Sgjelinek pgcnt_t pgcnt = 1; 895*3247Sgjelinek uint_t pgshft; 896*3247Sgjelinek pgcnt_t pgmsk; 897*3247Sgjelinek 898*3247Sgjelinek ap = anon_get_ptr(amp->ahp, index); 899*3247Sgjelinek if (ap != NULL) 900*3247Sgjelinek swap_xlate(ap, &vn, &off); 901*3247Sgjelinek 902*3247Sgjelinek if (ap != NULL && vn != NULL && vn->v_pages != NULL && 903*3247Sgjelinek (page = page_exists(vn, off)) != NULL) { 904*3247Sgjelinek page_type = VMUSAGE_BOUND_INCORE; 905*3247Sgjelinek if (page->p_szc > 0) { 906*3247Sgjelinek pgcnt = page_get_pagecnt(page->p_szc); 907*3247Sgjelinek pgshft = page_get_shift(page->p_szc); 908*3247Sgjelinek pgmsk = (0x1 << (pgshft - PAGESHIFT)) 909*3247Sgjelinek - 1; 910*3247Sgjelinek } 911*3247Sgjelinek } else { 912*3247Sgjelinek page_type = VMUSAGE_BOUND_NOT_INCORE; 913*3247Sgjelinek } 914*3247Sgjelinek if (bound_type == VMUSAGE_BOUND_UNKNOWN) { 915*3247Sgjelinek next->vmb_type = page_type; 916*3247Sgjelinek } else if (next->vmb_type != page_type) { 917*3247Sgjelinek /* 918*3247Sgjelinek * if current bound type does not match page 919*3247Sgjelinek * type, need to split off new bound. 920*3247Sgjelinek */ 921*3247Sgjelinek tmp = vmu_alloc_bound(); 922*3247Sgjelinek tmp->vmb_type = page_type; 923*3247Sgjelinek tmp->vmb_start = index; 924*3247Sgjelinek tmp->vmb_end = next->vmb_end; 925*3247Sgjelinek tmp->vmb_next = next->vmb_next; 926*3247Sgjelinek next->vmb_end = index - 1; 927*3247Sgjelinek next->vmb_next = tmp; 928*3247Sgjelinek if (*last == next) 929*3247Sgjelinek *last = tmp; 930*3247Sgjelinek next = tmp; 931*3247Sgjelinek } 932*3247Sgjelinek if (pgcnt > 1) { 933*3247Sgjelinek /* 934*3247Sgjelinek * If inside large page, jump to next large 935*3247Sgjelinek * page 936*3247Sgjelinek */ 937*3247Sgjelinek index = (index & ~pgmsk) + pgcnt; 938*3247Sgjelinek } else { 939*3247Sgjelinek index++; 940*3247Sgjelinek } 941*3247Sgjelinek } 942*3247Sgjelinek if (next == *last) { 943*3247Sgjelinek ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN); 944*3247Sgjelinek break; 945*3247Sgjelinek } else 946*3247Sgjelinek next = next->vmb_next; 947*3247Sgjelinek } 948*3247Sgjelinek ANON_LOCK_EXIT(&->a_rwlock); 949*3247Sgjelinek } 950*3247Sgjelinek 951*3247Sgjelinek /* 952*3247Sgjelinek * Same as vmu_amp_update_incore_bounds(), except for tracking 953*3247Sgjelinek * incore-/not-incore for vnodes. 954*3247Sgjelinek */ 955*3247Sgjelinek static void 956*3247Sgjelinek vmu_vnode_update_incore_bounds(vnode_t *vnode, vmu_bound_t **first, 957*3247Sgjelinek vmu_bound_t **last) 958*3247Sgjelinek { 959*3247Sgjelinek vmu_bound_t *next; 960*3247Sgjelinek vmu_bound_t *tmp; 961*3247Sgjelinek pgcnt_t index; 962*3247Sgjelinek short bound_type; 963*3247Sgjelinek short page_type; 964*3247Sgjelinek 965*3247Sgjelinek next = *first; 966*3247Sgjelinek for (;;) { 967*3247Sgjelinek if (vnode->v_pages == NULL) 968*3247Sgjelinek next->vmb_type = VMUSAGE_BOUND_NOT_INCORE; 969*3247Sgjelinek 970*3247Sgjelinek if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) { 971*3247Sgjelinek if (next == *last) 972*3247Sgjelinek break; 973*3247Sgjelinek next = next->vmb_next; 974*3247Sgjelinek continue; 975*3247Sgjelinek } 976*3247Sgjelinek 977*3247Sgjelinek bound_type = next->vmb_type; 978*3247Sgjelinek index = next->vmb_start; 979*3247Sgjelinek while (index <= next->vmb_end) { 980*3247Sgjelinek 981*3247Sgjelinek /* 982*3247Sgjelinek * These are used to determine how much to increment 983*3247Sgjelinek * index when a large page is found. 984*3247Sgjelinek */ 985*3247Sgjelinek page_t *page; 986*3247Sgjelinek pgcnt_t pgcnt = 1; 987*3247Sgjelinek uint_t pgshft; 988*3247Sgjelinek pgcnt_t pgmsk; 989*3247Sgjelinek 990*3247Sgjelinek if (vnode->v_pages != NULL && 991*3247Sgjelinek (page = page_exists(vnode, ptob(index))) != NULL) { 992*3247Sgjelinek page_type = VMUSAGE_BOUND_INCORE; 993*3247Sgjelinek if (page->p_szc > 0) { 994*3247Sgjelinek pgcnt = page_get_pagecnt(page->p_szc); 995*3247Sgjelinek pgshft = page_get_shift(page->p_szc); 996*3247Sgjelinek pgmsk = (0x1 << (pgshft - PAGESHIFT)) 997*3247Sgjelinek - 1; 998*3247Sgjelinek } 999*3247Sgjelinek } else { 1000*3247Sgjelinek page_type = VMUSAGE_BOUND_NOT_INCORE; 1001*3247Sgjelinek } 1002*3247Sgjelinek if (bound_type == VMUSAGE_BOUND_UNKNOWN) { 1003*3247Sgjelinek next->vmb_type = page_type; 1004*3247Sgjelinek } else if (next->vmb_type != page_type) { 1005*3247Sgjelinek /* 1006*3247Sgjelinek * if current bound type does not match page 1007*3247Sgjelinek * type, need to split off new bound. 1008*3247Sgjelinek */ 1009*3247Sgjelinek tmp = vmu_alloc_bound(); 1010*3247Sgjelinek tmp->vmb_type = page_type; 1011*3247Sgjelinek tmp->vmb_start = index; 1012*3247Sgjelinek tmp->vmb_end = next->vmb_end; 1013*3247Sgjelinek tmp->vmb_next = next->vmb_next; 1014*3247Sgjelinek next->vmb_end = index - 1; 1015*3247Sgjelinek next->vmb_next = tmp; 1016*3247Sgjelinek if (*last == next) 1017*3247Sgjelinek *last = tmp; 1018*3247Sgjelinek next = tmp; 1019*3247Sgjelinek } 1020*3247Sgjelinek if (pgcnt > 1) { 1021*3247Sgjelinek /* 1022*3247Sgjelinek * If inside large page, jump to next large 1023*3247Sgjelinek * page 1024*3247Sgjelinek */ 1025*3247Sgjelinek index = (index & ~pgmsk) + pgcnt; 1026*3247Sgjelinek } else { 1027*3247Sgjelinek index++; 1028*3247Sgjelinek } 1029*3247Sgjelinek } 1030*3247Sgjelinek if (next == *last) { 1031*3247Sgjelinek ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN); 1032*3247Sgjelinek break; 1033*3247Sgjelinek } else 1034*3247Sgjelinek next = next->vmb_next; 1035*3247Sgjelinek } 1036*3247Sgjelinek } 1037*3247Sgjelinek 1038*3247Sgjelinek /* 1039*3247Sgjelinek * Calculate the rss and swap consumed by a segment. vmu_entities is the 1040*3247Sgjelinek * list of entities to visit. For shared segments, the vnode or amp 1041*3247Sgjelinek * is looked up in each entity to see if has been already counted. Private 1042*3247Sgjelinek * anon pages are checked per entity to ensure that cow pages are not 1043*3247Sgjelinek * double counted. 1044*3247Sgjelinek * 1045*3247Sgjelinek * For private mapped files, first the amp is checked for private pages. 1046*3247Sgjelinek * Bounds not backed by the amp are looked up in the vnode for each entity 1047*3247Sgjelinek * to avoid double counting of private COW vnode pages. 1048*3247Sgjelinek */ 1049*3247Sgjelinek static void 1050*3247Sgjelinek vmu_calculate_seg(vmu_entity_t *vmu_entities, struct seg *seg) 1051*3247Sgjelinek { 1052*3247Sgjelinek struct segvn_data *svd; 1053*3247Sgjelinek struct shm_data *shmd; 1054*3247Sgjelinek struct spt_data *sptd; 1055*3247Sgjelinek vmu_object_t *shared_object = NULL; 1056*3247Sgjelinek vmu_object_t *entity_object = NULL; 1057*3247Sgjelinek vmu_entity_t *entity; 1058*3247Sgjelinek vmusage_t *result; 1059*3247Sgjelinek vmu_bound_t *first = NULL; 1060*3247Sgjelinek vmu_bound_t *last = NULL; 1061*3247Sgjelinek vmu_bound_t *cur = NULL; 1062*3247Sgjelinek vmu_bound_t *e_first = NULL; 1063*3247Sgjelinek vmu_bound_t *e_last = NULL; 1064*3247Sgjelinek vmu_bound_t *tmp; 1065*3247Sgjelinek pgcnt_t p_index, s_index, p_start, p_end, s_start, s_end, rss, virt; 1066*3247Sgjelinek struct anon_map *private_amp = NULL; 1067*3247Sgjelinek boolean_t incore = B_FALSE; 1068*3247Sgjelinek boolean_t shared = B_FALSE; 1069*3247Sgjelinek int file = 0; 1070*3247Sgjelinek pgcnt_t swresv = 0; 1071*3247Sgjelinek pgcnt_t panon = 0; 1072*3247Sgjelinek 1073*3247Sgjelinek /* Can zero-length segments exist? Not sure, so parenoia */ 1074*3247Sgjelinek if (seg->s_size <= 0) 1075*3247Sgjelinek return; 1076*3247Sgjelinek 1077*3247Sgjelinek /* 1078*3247Sgjelinek * Figure out if there is a shared object (such as a named vnode or 1079*3247Sgjelinek * a shared amp, then figure out if there is a private amp, which 1080*3247Sgjelinek * identifies private pages. 1081*3247Sgjelinek */ 1082*3247Sgjelinek if (seg->s_ops == &segvn_ops) { 1083*3247Sgjelinek svd = (struct segvn_data *)seg->s_data; 1084*3247Sgjelinek if (svd->type == MAP_SHARED) 1085*3247Sgjelinek shared = B_TRUE; 1086*3247Sgjelinek else 1087*3247Sgjelinek swresv = svd->swresv; 1088*3247Sgjelinek 1089*3247Sgjelinek if (svd->vp != NULL) { 1090*3247Sgjelinek file = 1; 1091*3247Sgjelinek shared_object = vmu_find_insert_object( 1092*3247Sgjelinek vmu_data.vmu_all_vnodes_hash, (caddr_t)svd->vp, 1093*3247Sgjelinek VMUSAGE_TYPE_VNODE); 1094*3247Sgjelinek s_start = btop(svd->offset); 1095*3247Sgjelinek s_end = btop(svd->offset + seg->s_size) - 1; 1096*3247Sgjelinek } 1097*3247Sgjelinek if (svd->amp != NULL && svd->type == MAP_SHARED) { 1098*3247Sgjelinek ASSERT(shared_object == NULL); 1099*3247Sgjelinek shared_object = vmu_find_insert_object( 1100*3247Sgjelinek vmu_data.vmu_all_amps_hash, (caddr_t)svd->amp, 1101*3247Sgjelinek VMUSAGE_TYPE_AMP); 1102*3247Sgjelinek s_start = svd->anon_index; 1103*3247Sgjelinek s_end = svd->anon_index + btop(seg->s_size) - 1; 1104*3247Sgjelinek /* schedctl mappings are always in core */ 1105*3247Sgjelinek if (svd->amp->swresv == 0) 1106*3247Sgjelinek incore = B_TRUE; 1107*3247Sgjelinek } 1108*3247Sgjelinek if (svd->amp != NULL && svd->type == MAP_PRIVATE) { 1109*3247Sgjelinek private_amp = svd->amp; 1110*3247Sgjelinek p_start = svd->anon_index; 1111*3247Sgjelinek p_end = svd->anon_index + btop(seg->s_size) - 1; 1112*3247Sgjelinek } 1113*3247Sgjelinek } else if (seg->s_ops == &segspt_shmops) { 1114*3247Sgjelinek shared = B_TRUE; 1115*3247Sgjelinek shmd = (struct shm_data *)seg->s_data; 1116*3247Sgjelinek shared_object = vmu_find_insert_object( 1117*3247Sgjelinek vmu_data.vmu_all_amps_hash, (caddr_t)shmd->shm_amp, 1118*3247Sgjelinek VMUSAGE_TYPE_AMP); 1119*3247Sgjelinek s_start = 0; 1120*3247Sgjelinek s_end = btop(seg->s_size) - 1; 1121*3247Sgjelinek sptd = shmd->shm_sptseg->s_data; 1122*3247Sgjelinek 1123*3247Sgjelinek /* ism segments are always incore and do not reserve swap */ 1124*3247Sgjelinek if (sptd->spt_flags & SHM_SHARE_MMU) 1125*3247Sgjelinek incore = B_TRUE; 1126*3247Sgjelinek 1127*3247Sgjelinek } else { 1128*3247Sgjelinek return; 1129*3247Sgjelinek } 1130*3247Sgjelinek 1131*3247Sgjelinek /* 1132*3247Sgjelinek * If there is a private amp, count anon pages that exist. If an 1133*3247Sgjelinek * anon has a refcnt > 1 (cow sharing), then save the anon in a 1134*3247Sgjelinek * hash so that it is not double counted. 1135*3247Sgjelinek * 1136*3247Sgjelinek * If there is also a shared object, they figure out the bounds 1137*3247Sgjelinek * which are not mapped by the private amp. 1138*3247Sgjelinek */ 1139*3247Sgjelinek if (private_amp != NULL) { 1140*3247Sgjelinek 1141*3247Sgjelinek /* Enter as writer to prevent cow anons from being freed */ 1142*3247Sgjelinek ANON_LOCK_ENTER(&private_amp->a_rwlock, RW_WRITER); 1143*3247Sgjelinek 1144*3247Sgjelinek p_index = p_start; 1145*3247Sgjelinek s_index = s_start; 1146*3247Sgjelinek 1147*3247Sgjelinek while (p_index <= p_end) { 1148*3247Sgjelinek 1149*3247Sgjelinek pgcnt_t p_index_next; 1150*3247Sgjelinek pgcnt_t p_bound_size; 1151*3247Sgjelinek int cnt; 1152*3247Sgjelinek anoff_t off; 1153*3247Sgjelinek struct vnode *vn; 1154*3247Sgjelinek struct anon *ap; 1155*3247Sgjelinek page_t *page; /* For handling of large */ 1156*3247Sgjelinek pgcnt_t pgcnt = 1; /* pages */ 1157*3247Sgjelinek pgcnt_t pgstart; 1158*3247Sgjelinek pgcnt_t pgend; 1159*3247Sgjelinek uint_t pgshft; 1160*3247Sgjelinek pgcnt_t pgmsk; 1161*3247Sgjelinek 1162*3247Sgjelinek p_index_next = p_index; 1163*3247Sgjelinek ap = anon_get_next_ptr(private_amp->ahp, 1164*3247Sgjelinek &p_index_next); 1165*3247Sgjelinek 1166*3247Sgjelinek /* 1167*3247Sgjelinek * If next anon is past end of mapping, simulate 1168*3247Sgjelinek * end of anon so loop terminates. 1169*3247Sgjelinek */ 1170*3247Sgjelinek if (p_index_next > p_end) { 1171*3247Sgjelinek p_index_next = p_end + 1; 1172*3247Sgjelinek ap = NULL; 1173*3247Sgjelinek } 1174*3247Sgjelinek /* 1175*3247Sgjelinek * For cow segments, keep track of bounds not 1176*3247Sgjelinek * backed by private amp so they can be looked 1177*3247Sgjelinek * up in the backing vnode 1178*3247Sgjelinek */ 1179*3247Sgjelinek if (p_index_next != p_index) { 1180*3247Sgjelinek 1181*3247Sgjelinek /* 1182*3247Sgjelinek * Compute index difference between anon and 1183*3247Sgjelinek * previous anon. 1184*3247Sgjelinek */ 1185*3247Sgjelinek p_bound_size = p_index_next - p_index - 1; 1186*3247Sgjelinek 1187*3247Sgjelinek if (shared_object != NULL) { 1188*3247Sgjelinek cur = vmu_alloc_bound(); 1189*3247Sgjelinek cur->vmb_next = NULL; 1190*3247Sgjelinek cur->vmb_start = s_index; 1191*3247Sgjelinek cur->vmb_end = s_index + p_bound_size; 1192*3247Sgjelinek cur->vmb_type = VMUSAGE_BOUND_UNKNOWN; 1193*3247Sgjelinek if (first == NULL) { 1194*3247Sgjelinek first = cur; 1195*3247Sgjelinek last = cur; 1196*3247Sgjelinek } else { 1197*3247Sgjelinek last->vmb_next = cur; 1198*3247Sgjelinek last = cur; 1199*3247Sgjelinek } 1200*3247Sgjelinek } 1201*3247Sgjelinek p_index = p_index + p_bound_size + 1; 1202*3247Sgjelinek s_index = s_index + p_bound_size + 1; 1203*3247Sgjelinek } 1204*3247Sgjelinek 1205*3247Sgjelinek /* Detect end of anons in amp */ 1206*3247Sgjelinek if (ap == NULL) 1207*3247Sgjelinek break; 1208*3247Sgjelinek 1209*3247Sgjelinek cnt = ap->an_refcnt; 1210*3247Sgjelinek swap_xlate(ap, &vn, &off); 1211*3247Sgjelinek 1212*3247Sgjelinek if (vn == NULL || vn->v_pages == NULL || 1213*3247Sgjelinek (page = page_exists(vn, off)) == NULL) { 1214*3247Sgjelinek p_index++; 1215*3247Sgjelinek s_index++; 1216*3247Sgjelinek continue; 1217*3247Sgjelinek } 1218*3247Sgjelinek 1219*3247Sgjelinek /* 1220*3247Sgjelinek * If large page is found, compute portion of large 1221*3247Sgjelinek * page in mapping, and increment indicies to the next 1222*3247Sgjelinek * large page. 1223*3247Sgjelinek */ 1224*3247Sgjelinek if (page->p_szc > 0) { 1225*3247Sgjelinek 1226*3247Sgjelinek pgcnt = page_get_pagecnt(page->p_szc); 1227*3247Sgjelinek pgshft = page_get_shift(page->p_szc); 1228*3247Sgjelinek pgmsk = (0x1 << (pgshft - PAGESHIFT)) - 1; 1229*3247Sgjelinek 1230*3247Sgjelinek /* First page in large page */ 1231*3247Sgjelinek pgstart = p_index & ~pgmsk; 1232*3247Sgjelinek /* Last page in large page */ 1233*3247Sgjelinek pgend = pgstart + pgcnt - 1; 1234*3247Sgjelinek /* 1235*3247Sgjelinek * Artifically end page if page extends past 1236*3247Sgjelinek * end of mapping. 1237*3247Sgjelinek */ 1238*3247Sgjelinek if (pgend > p_end) 1239*3247Sgjelinek pgend = p_end; 1240*3247Sgjelinek 1241*3247Sgjelinek /* 1242*3247Sgjelinek * Compute number of pages from large page 1243*3247Sgjelinek * which are mapped. 1244*3247Sgjelinek */ 1245*3247Sgjelinek pgcnt = pgend - p_index + 1; 1246*3247Sgjelinek 1247*3247Sgjelinek /* 1248*3247Sgjelinek * Point indicies at page after large page, 1249*3247Sgjelinek * or at page after end of mapping. 1250*3247Sgjelinek */ 1251*3247Sgjelinek p_index += pgcnt; 1252*3247Sgjelinek s_index += pgcnt; 1253*3247Sgjelinek } else { 1254*3247Sgjelinek p_index++; 1255*3247Sgjelinek s_index++; 1256*3247Sgjelinek } 1257*3247Sgjelinek 1258*3247Sgjelinek /* 1259*3247Sgjelinek * Assume anon structs with a refcnt 1260*3247Sgjelinek * of 1 are not cow shared, so there 1261*3247Sgjelinek * is no reason to track them per entity. 1262*3247Sgjelinek */ 1263*3247Sgjelinek if (cnt == 1) { 1264*3247Sgjelinek panon += pgcnt; 1265*3247Sgjelinek continue; 1266*3247Sgjelinek } 1267*3247Sgjelinek for (entity = vmu_entities; entity != NULL; 1268*3247Sgjelinek entity = entity->vme_next_calc) { 1269*3247Sgjelinek 1270*3247Sgjelinek result = &entity->vme_result; 1271*3247Sgjelinek /* 1272*3247Sgjelinek * Track cow anons per entity so 1273*3247Sgjelinek * they are not double counted. 1274*3247Sgjelinek */ 1275*3247Sgjelinek if (vmu_find_insert_anon(entity->vme_anon_hash, 1276*3247Sgjelinek (caddr_t)ap) == 0) 1277*3247Sgjelinek continue; 1278*3247Sgjelinek 1279*3247Sgjelinek result->vmu_rss_all += (pgcnt << PAGESHIFT); 1280*3247Sgjelinek result->vmu_rss_private += 1281*3247Sgjelinek (pgcnt << PAGESHIFT); 1282*3247Sgjelinek } 1283*3247Sgjelinek } 1284*3247Sgjelinek ANON_LOCK_EXIT(&private_amp->a_rwlock); 1285*3247Sgjelinek } 1286*3247Sgjelinek 1287*3247Sgjelinek /* Add up resident anon and swap reserved for private mappings */ 1288*3247Sgjelinek if (swresv > 0 || panon > 0) { 1289*3247Sgjelinek for (entity = vmu_entities; entity != NULL; 1290*3247Sgjelinek entity = entity->vme_next_calc) { 1291*3247Sgjelinek result = &entity->vme_result; 1292*3247Sgjelinek result->vmu_swap_all += swresv; 1293*3247Sgjelinek result->vmu_swap_private += swresv; 1294*3247Sgjelinek result->vmu_rss_all += (panon << PAGESHIFT); 1295*3247Sgjelinek result->vmu_rss_private += (panon << PAGESHIFT); 1296*3247Sgjelinek } 1297*3247Sgjelinek } 1298*3247Sgjelinek 1299*3247Sgjelinek /* Compute resident pages backing shared amp or named vnode */ 1300*3247Sgjelinek if (shared_object != NULL) { 1301*3247Sgjelinek if (first == NULL) { 1302*3247Sgjelinek /* 1303*3247Sgjelinek * No private amp, or private amp has no anon 1304*3247Sgjelinek * structs. This means entire segment is backed by 1305*3247Sgjelinek * the shared object. 1306*3247Sgjelinek */ 1307*3247Sgjelinek first = vmu_alloc_bound(); 1308*3247Sgjelinek first->vmb_next = NULL; 1309*3247Sgjelinek first->vmb_start = s_start; 1310*3247Sgjelinek first->vmb_end = s_end; 1311*3247Sgjelinek first->vmb_type = VMUSAGE_BOUND_UNKNOWN; 1312*3247Sgjelinek } 1313*3247Sgjelinek /* 1314*3247Sgjelinek * Iterate bounds not backed by private amp, and compute 1315*3247Sgjelinek * resident pages. 1316*3247Sgjelinek */ 1317*3247Sgjelinek cur = first; 1318*3247Sgjelinek while (cur != NULL) { 1319*3247Sgjelinek 1320*3247Sgjelinek if (vmu_insert_lookup_object_bounds(shared_object, 1321*3247Sgjelinek cur->vmb_start, cur->vmb_end, VMUSAGE_BOUND_UNKNOWN, 1322*3247Sgjelinek &first, &last) > 0) { 1323*3247Sgjelinek /* new bounds, find incore/not-incore */ 1324*3247Sgjelinek if (shared_object->vmo_type == 1325*3247Sgjelinek VMUSAGE_TYPE_VNODE) 1326*3247Sgjelinek vmu_vnode_update_incore_bounds( 1327*3247Sgjelinek (vnode_t *) 1328*3247Sgjelinek shared_object->vmo_key, &first, 1329*3247Sgjelinek &last); 1330*3247Sgjelinek else 1331*3247Sgjelinek vmu_amp_update_incore_bounds( 1332*3247Sgjelinek (struct anon_map *) 1333*3247Sgjelinek shared_object->vmo_key, &first, 1334*3247Sgjelinek &last, incore); 1335*3247Sgjelinek vmu_merge_bounds(&first, &last); 1336*3247Sgjelinek } 1337*3247Sgjelinek for (entity = vmu_entities; entity != NULL; 1338*3247Sgjelinek entity = entity->vme_next_calc) { 1339*3247Sgjelinek 1340*3247Sgjelinek result = &entity->vme_result; 1341*3247Sgjelinek 1342*3247Sgjelinek entity_object = vmu_find_insert_object( 1343*3247Sgjelinek shared_object->vmo_type == 1344*3247Sgjelinek VMUSAGE_TYPE_VNODE ? entity->vme_vnode_hash: 1345*3247Sgjelinek entity->vme_amp_hash, 1346*3247Sgjelinek shared_object->vmo_key, 1347*3247Sgjelinek shared_object->vmo_type); 1348*3247Sgjelinek 1349*3247Sgjelinek virt = vmu_insert_lookup_object_bounds( 1350*3247Sgjelinek entity_object, cur->vmb_start, cur->vmb_end, 1351*3247Sgjelinek VMUSAGE_BOUND_UNKNOWN, &e_first, &e_last); 1352*3247Sgjelinek 1353*3247Sgjelinek if (virt == 0) 1354*3247Sgjelinek continue; 1355*3247Sgjelinek /* 1356*3247Sgjelinek * Range visited for this entity 1357*3247Sgjelinek */ 1358*3247Sgjelinek rss = vmu_update_bounds(&e_first, 1359*3247Sgjelinek &e_last, first, last); 1360*3247Sgjelinek result->vmu_rss_all += (rss << PAGESHIFT); 1361*3247Sgjelinek if (shared == B_TRUE && file == B_FALSE) { 1362*3247Sgjelinek /* shared anon mapping */ 1363*3247Sgjelinek result->vmu_swap_all += 1364*3247Sgjelinek (virt << PAGESHIFT); 1365*3247Sgjelinek result->vmu_swap_shared += 1366*3247Sgjelinek (virt << PAGESHIFT); 1367*3247Sgjelinek result->vmu_rss_shared += 1368*3247Sgjelinek (rss << PAGESHIFT); 1369*3247Sgjelinek } else if (shared == B_TRUE && file == B_TRUE) { 1370*3247Sgjelinek /* shared file mapping */ 1371*3247Sgjelinek result->vmu_rss_shared += 1372*3247Sgjelinek (rss << PAGESHIFT); 1373*3247Sgjelinek } else if (shared == B_FALSE && 1374*3247Sgjelinek file == B_TRUE) { 1375*3247Sgjelinek /* private file mapping */ 1376*3247Sgjelinek result->vmu_rss_private += 1377*3247Sgjelinek (rss << PAGESHIFT); 1378*3247Sgjelinek } 1379*3247Sgjelinek vmu_merge_bounds(&e_first, &e_last); 1380*3247Sgjelinek } 1381*3247Sgjelinek tmp = cur; 1382*3247Sgjelinek cur = cur->vmb_next; 1383*3247Sgjelinek vmu_free_bound(tmp); 1384*3247Sgjelinek } 1385*3247Sgjelinek } 1386*3247Sgjelinek } 1387*3247Sgjelinek 1388*3247Sgjelinek /* 1389*3247Sgjelinek * Based on the current calculation flags, find the relevant entities 1390*3247Sgjelinek * which are relative to the process. Then calculate each segment 1391*3247Sgjelinek * in the process'es address space for each relevant entity. 1392*3247Sgjelinek */ 1393*3247Sgjelinek static void 1394*3247Sgjelinek vmu_calculate_proc(proc_t *p) 1395*3247Sgjelinek { 1396*3247Sgjelinek vmu_entity_t *entities = NULL; 1397*3247Sgjelinek vmu_zone_t *zone; 1398*3247Sgjelinek vmu_entity_t *tmp; 1399*3247Sgjelinek struct as *as; 1400*3247Sgjelinek struct seg *seg; 1401*3247Sgjelinek int ret; 1402*3247Sgjelinek 1403*3247Sgjelinek /* Figure out which entities are being computed */ 1404*3247Sgjelinek if ((vmu_data.vmu_system) != NULL) { 1405*3247Sgjelinek tmp = vmu_data.vmu_system; 1406*3247Sgjelinek tmp->vme_next_calc = entities; 1407*3247Sgjelinek entities = tmp; 1408*3247Sgjelinek } 1409*3247Sgjelinek if (vmu_data.vmu_calc_flags & 1410*3247Sgjelinek (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_PROJECTS | 1411*3247Sgjelinek VMUSAGE_ALL_PROJECTS | VMUSAGE_TASKS | VMUSAGE_ALL_TASKS | 1412*3247Sgjelinek VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_EUSERS | 1413*3247Sgjelinek VMUSAGE_ALL_EUSERS)) { 1414*3247Sgjelinek ret = i_mod_hash_find_nosync(vmu_data.vmu_zones_hash, 1415*3247Sgjelinek (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id, 1416*3247Sgjelinek (mod_hash_val_t *)&zone); 1417*3247Sgjelinek if (ret != 0) { 1418*3247Sgjelinek zone = vmu_alloc_zone(p->p_zone->zone_id); 1419*3247Sgjelinek ret = i_mod_hash_insert_nosync(vmu_data.vmu_zones_hash, 1420*3247Sgjelinek (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id, 1421*3247Sgjelinek (mod_hash_val_t)zone, (mod_hash_hndl_t)0); 1422*3247Sgjelinek ASSERT(ret == 0); 1423*3247Sgjelinek } 1424*3247Sgjelinek if (zone->vmz_zone != NULL) { 1425*3247Sgjelinek tmp = zone->vmz_zone; 1426*3247Sgjelinek tmp->vme_next_calc = entities; 1427*3247Sgjelinek entities = tmp; 1428*3247Sgjelinek } 1429*3247Sgjelinek if (vmu_data.vmu_calc_flags & 1430*3247Sgjelinek (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS)) { 1431*3247Sgjelinek tmp = vmu_find_insert_entity(zone->vmz_projects_hash, 1432*3247Sgjelinek p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS, 1433*3247Sgjelinek zone->vmz_id); 1434*3247Sgjelinek tmp->vme_next_calc = entities; 1435*3247Sgjelinek entities = tmp; 1436*3247Sgjelinek } 1437*3247Sgjelinek if (vmu_data.vmu_calc_flags & 1438*3247Sgjelinek (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS)) { 1439*3247Sgjelinek tmp = vmu_find_insert_entity(zone->vmz_tasks_hash, 1440*3247Sgjelinek p->p_task->tk_tkid, VMUSAGE_TASKS, zone->vmz_id); 1441*3247Sgjelinek tmp->vme_next_calc = entities; 1442*3247Sgjelinek entities = tmp; 1443*3247Sgjelinek } 1444*3247Sgjelinek if (vmu_data.vmu_calc_flags & 1445*3247Sgjelinek (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS)) { 1446*3247Sgjelinek tmp = vmu_find_insert_entity(zone->vmz_rusers_hash, 1447*3247Sgjelinek crgetruid(p->p_cred), VMUSAGE_RUSERS, zone->vmz_id); 1448*3247Sgjelinek tmp->vme_next_calc = entities; 1449*3247Sgjelinek entities = tmp; 1450*3247Sgjelinek } 1451*3247Sgjelinek if (vmu_data.vmu_calc_flags & 1452*3247Sgjelinek (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS)) { 1453*3247Sgjelinek tmp = vmu_find_insert_entity(zone->vmz_eusers_hash, 1454*3247Sgjelinek crgetuid(p->p_cred), VMUSAGE_EUSERS, zone->vmz_id); 1455*3247Sgjelinek tmp->vme_next_calc = entities; 1456*3247Sgjelinek entities = tmp; 1457*3247Sgjelinek } 1458*3247Sgjelinek } 1459*3247Sgjelinek /* Entities which collapse projects and users for all zones */ 1460*3247Sgjelinek if (vmu_data.vmu_calc_flags & VMUSAGE_COL_PROJECTS) { 1461*3247Sgjelinek tmp = vmu_find_insert_entity(vmu_data.vmu_projects_col_hash, 1462*3247Sgjelinek p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS, ALL_ZONES); 1463*3247Sgjelinek tmp->vme_next_calc = entities; 1464*3247Sgjelinek entities = tmp; 1465*3247Sgjelinek } 1466*3247Sgjelinek if (vmu_data.vmu_calc_flags & VMUSAGE_COL_RUSERS) { 1467*3247Sgjelinek tmp = vmu_find_insert_entity(vmu_data.vmu_rusers_col_hash, 1468*3247Sgjelinek crgetruid(p->p_cred), VMUSAGE_RUSERS, ALL_ZONES); 1469*3247Sgjelinek tmp->vme_next_calc = entities; 1470*3247Sgjelinek entities = tmp; 1471*3247Sgjelinek } 1472*3247Sgjelinek if (vmu_data.vmu_calc_flags & VMUSAGE_COL_EUSERS) { 1473*3247Sgjelinek tmp = vmu_find_insert_entity(vmu_data.vmu_eusers_col_hash, 1474*3247Sgjelinek crgetuid(p->p_cred), VMUSAGE_EUSERS, ALL_ZONES); 1475*3247Sgjelinek tmp->vme_next_calc = entities; 1476*3247Sgjelinek entities = tmp; 1477*3247Sgjelinek } 1478*3247Sgjelinek 1479*3247Sgjelinek ASSERT(entities != NULL); 1480*3247Sgjelinek /* process all segs in process's address space */ 1481*3247Sgjelinek as = p->p_as; 1482*3247Sgjelinek AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1483*3247Sgjelinek for (seg = AS_SEGFIRST(as); seg != NULL; 1484*3247Sgjelinek seg = AS_SEGNEXT(as, seg)) { 1485*3247Sgjelinek vmu_calculate_seg(entities, seg); 1486*3247Sgjelinek } 1487*3247Sgjelinek AS_LOCK_EXIT(as, &as->a_lock); 1488*3247Sgjelinek } 1489*3247Sgjelinek 1490*3247Sgjelinek /* 1491*3247Sgjelinek * Free data created by previous call to vmu_calculate(). 1492*3247Sgjelinek */ 1493*3247Sgjelinek static void 1494*3247Sgjelinek vmu_clear_calc() 1495*3247Sgjelinek { 1496*3247Sgjelinek if (vmu_data.vmu_system != NULL) 1497*3247Sgjelinek vmu_free_entity(vmu_data.vmu_system); 1498*3247Sgjelinek vmu_data.vmu_system = NULL; 1499*3247Sgjelinek if (vmu_data.vmu_zones_hash != NULL) 1500*3247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_zones_hash); 1501*3247Sgjelinek if (vmu_data.vmu_projects_col_hash != NULL) 1502*3247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_projects_col_hash); 1503*3247Sgjelinek if (vmu_data.vmu_rusers_col_hash != NULL) 1504*3247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_rusers_col_hash); 1505*3247Sgjelinek if (vmu_data.vmu_eusers_col_hash != NULL) 1506*3247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_eusers_col_hash); 1507*3247Sgjelinek 1508*3247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_all_vnodes_hash); 1509*3247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_all_amps_hash); 1510*3247Sgjelinek } 1511*3247Sgjelinek 1512*3247Sgjelinek /* 1513*3247Sgjelinek * Free unused data structures. These can result if the system workload 1514*3247Sgjelinek * decreases between calculations. 1515*3247Sgjelinek */ 1516*3247Sgjelinek static void 1517*3247Sgjelinek vmu_free_extra() 1518*3247Sgjelinek { 1519*3247Sgjelinek vmu_bound_t *tb; 1520*3247Sgjelinek vmu_object_t *to; 1521*3247Sgjelinek vmu_entity_t *te; 1522*3247Sgjelinek vmu_zone_t *tz; 1523*3247Sgjelinek 1524*3247Sgjelinek while (vmu_data.vmu_free_bounds != NULL) { 1525*3247Sgjelinek tb = vmu_data.vmu_free_bounds; 1526*3247Sgjelinek vmu_data.vmu_free_bounds = vmu_data.vmu_free_bounds->vmb_next; 1527*3247Sgjelinek kmem_cache_free(vmu_bound_cache, tb); 1528*3247Sgjelinek } 1529*3247Sgjelinek while (vmu_data.vmu_free_objects != NULL) { 1530*3247Sgjelinek to = vmu_data.vmu_free_objects; 1531*3247Sgjelinek vmu_data.vmu_free_objects = 1532*3247Sgjelinek vmu_data.vmu_free_objects->vmo_next; 1533*3247Sgjelinek kmem_cache_free(vmu_object_cache, to); 1534*3247Sgjelinek } 1535*3247Sgjelinek while (vmu_data.vmu_free_entities != NULL) { 1536*3247Sgjelinek te = vmu_data.vmu_free_entities; 1537*3247Sgjelinek vmu_data.vmu_free_entities = 1538*3247Sgjelinek vmu_data.vmu_free_entities->vme_next; 1539*3247Sgjelinek if (te->vme_vnode_hash != NULL) 1540*3247Sgjelinek mod_hash_destroy_hash(te->vme_vnode_hash); 1541*3247Sgjelinek if (te->vme_amp_hash != NULL) 1542*3247Sgjelinek mod_hash_destroy_hash(te->vme_amp_hash); 1543*3247Sgjelinek if (te->vme_anon_hash != NULL) 1544*3247Sgjelinek mod_hash_destroy_hash(te->vme_anon_hash); 1545*3247Sgjelinek kmem_free(te, sizeof (vmu_entity_t)); 1546*3247Sgjelinek } 1547*3247Sgjelinek while (vmu_data.vmu_free_zones != NULL) { 1548*3247Sgjelinek tz = vmu_data.vmu_free_zones; 1549*3247Sgjelinek vmu_data.vmu_free_zones = 1550*3247Sgjelinek vmu_data.vmu_free_zones->vmz_next; 1551*3247Sgjelinek if (tz->vmz_projects_hash != NULL) 1552*3247Sgjelinek mod_hash_destroy_hash(tz->vmz_projects_hash); 1553*3247Sgjelinek if (tz->vmz_tasks_hash != NULL) 1554*3247Sgjelinek mod_hash_destroy_hash(tz->vmz_tasks_hash); 1555*3247Sgjelinek if (tz->vmz_rusers_hash != NULL) 1556*3247Sgjelinek mod_hash_destroy_hash(tz->vmz_rusers_hash); 1557*3247Sgjelinek if (tz->vmz_eusers_hash != NULL) 1558*3247Sgjelinek mod_hash_destroy_hash(tz->vmz_eusers_hash); 1559*3247Sgjelinek kmem_free(tz, sizeof (vmu_zone_t)); 1560*3247Sgjelinek } 1561*3247Sgjelinek } 1562*3247Sgjelinek 1563*3247Sgjelinek extern kcondvar_t *pr_pid_cv; 1564*3247Sgjelinek 1565*3247Sgjelinek /* 1566*3247Sgjelinek * Determine which entity types are relevant and allocate the hashes to 1567*3247Sgjelinek * track them. Then walk the process table and count rss and swap 1568*3247Sgjelinek * for each process'es address space. Address space object such as 1569*3247Sgjelinek * vnodes, amps and anons are tracked per entity, so that they are 1570*3247Sgjelinek * not double counted in the results. 1571*3247Sgjelinek * 1572*3247Sgjelinek */ 1573*3247Sgjelinek static void 1574*3247Sgjelinek vmu_calculate() 1575*3247Sgjelinek { 1576*3247Sgjelinek int i = 0; 1577*3247Sgjelinek int ret; 1578*3247Sgjelinek proc_t *p; 1579*3247Sgjelinek 1580*3247Sgjelinek vmu_clear_calc(); 1581*3247Sgjelinek 1582*3247Sgjelinek if (vmu_data.vmu_calc_flags & VMUSAGE_SYSTEM) 1583*3247Sgjelinek vmu_data.vmu_system = vmu_alloc_entity(0, VMUSAGE_SYSTEM, 1584*3247Sgjelinek ALL_ZONES); 1585*3247Sgjelinek 1586*3247Sgjelinek /* 1587*3247Sgjelinek * Walk process table and calculate rss of each proc. 1588*3247Sgjelinek * 1589*3247Sgjelinek * Pidlock and p_lock cannot be held while doing the rss calculation. 1590*3247Sgjelinek * This is because: 1591*3247Sgjelinek * 1. The calculation allocates using KM_SLEEP. 1592*3247Sgjelinek * 2. The calculation grabs a_lock, which cannot be grabbed 1593*3247Sgjelinek * after p_lock. 1594*3247Sgjelinek * 1595*3247Sgjelinek * Since pidlock must be dropped, we cannot simply just walk the 1596*3247Sgjelinek * practive list. Instead, we walk the process table, and sprlock 1597*3247Sgjelinek * each process to ensure that it does not exit during the 1598*3247Sgjelinek * calculation. 1599*3247Sgjelinek */ 1600*3247Sgjelinek 1601*3247Sgjelinek mutex_enter(&pidlock); 1602*3247Sgjelinek for (i = 0; i < v.v_proc; i++) { 1603*3247Sgjelinek again: 1604*3247Sgjelinek p = pid_entry(i); 1605*3247Sgjelinek if (p == NULL) 1606*3247Sgjelinek continue; 1607*3247Sgjelinek 1608*3247Sgjelinek mutex_enter(&p->p_lock); 1609*3247Sgjelinek mutex_exit(&pidlock); 1610*3247Sgjelinek 1611*3247Sgjelinek if (panicstr) { 1612*3247Sgjelinek mutex_exit(&p->p_lock); 1613*3247Sgjelinek return; 1614*3247Sgjelinek } 1615*3247Sgjelinek 1616*3247Sgjelinek /* Try to set P_PR_LOCK */ 1617*3247Sgjelinek ret = sprtrylock_proc(p); 1618*3247Sgjelinek if (ret == -1) { 1619*3247Sgjelinek /* Process in invalid state */ 1620*3247Sgjelinek mutex_exit(&p->p_lock); 1621*3247Sgjelinek mutex_enter(&pidlock); 1622*3247Sgjelinek continue; 1623*3247Sgjelinek } else if (ret == 1) { 1624*3247Sgjelinek /* 1625*3247Sgjelinek * P_PR_LOCK is already set. Wait and try again. 1626*3247Sgjelinek * This also drops p_lock. 1627*3247Sgjelinek */ 1628*3247Sgjelinek sprwaitlock_proc(p); 1629*3247Sgjelinek mutex_enter(&pidlock); 1630*3247Sgjelinek goto again; 1631*3247Sgjelinek } 1632*3247Sgjelinek mutex_exit(&p->p_lock); 1633*3247Sgjelinek 1634*3247Sgjelinek vmu_calculate_proc(p); 1635*3247Sgjelinek 1636*3247Sgjelinek mutex_enter(&p->p_lock); 1637*3247Sgjelinek sprunlock(p); 1638*3247Sgjelinek mutex_enter(&pidlock); 1639*3247Sgjelinek } 1640*3247Sgjelinek mutex_exit(&pidlock); 1641*3247Sgjelinek 1642*3247Sgjelinek vmu_free_extra(); 1643*3247Sgjelinek } 1644*3247Sgjelinek 1645*3247Sgjelinek /* 1646*3247Sgjelinek * allocate a new cache for N results satisfying flags 1647*3247Sgjelinek */ 1648*3247Sgjelinek vmu_cache_t * 1649*3247Sgjelinek vmu_cache_alloc(size_t nres, uint_t flags) 1650*3247Sgjelinek { 1651*3247Sgjelinek vmu_cache_t *cache; 1652*3247Sgjelinek 1653*3247Sgjelinek cache = kmem_zalloc(sizeof (vmu_cache_t), KM_SLEEP); 1654*3247Sgjelinek cache->vmc_results = kmem_zalloc(sizeof (vmusage_t) * nres, KM_SLEEP); 1655*3247Sgjelinek cache->vmc_nresults = nres; 1656*3247Sgjelinek cache->vmc_flags = flags; 1657*3247Sgjelinek cache->vmc_refcnt = 1; 1658*3247Sgjelinek return (cache); 1659*3247Sgjelinek } 1660*3247Sgjelinek 1661*3247Sgjelinek /* 1662*3247Sgjelinek * Make sure cached results are not freed 1663*3247Sgjelinek */ 1664*3247Sgjelinek static void 1665*3247Sgjelinek vmu_cache_hold(vmu_cache_t *cache) 1666*3247Sgjelinek { 1667*3247Sgjelinek ASSERT(MUTEX_HELD(&vmu_data.vmu_lock)); 1668*3247Sgjelinek cache->vmc_refcnt++; 1669*3247Sgjelinek } 1670*3247Sgjelinek 1671*3247Sgjelinek /* 1672*3247Sgjelinek * free cache data 1673*3247Sgjelinek */ 1674*3247Sgjelinek static void 1675*3247Sgjelinek vmu_cache_rele(vmu_cache_t *cache) 1676*3247Sgjelinek { 1677*3247Sgjelinek ASSERT(MUTEX_HELD(&vmu_data.vmu_lock)); 1678*3247Sgjelinek ASSERT(cache->vmc_refcnt > 0); 1679*3247Sgjelinek cache->vmc_refcnt--; 1680*3247Sgjelinek if (cache->vmc_refcnt == 0) { 1681*3247Sgjelinek kmem_free(cache->vmc_results, sizeof (vmusage_t) * 1682*3247Sgjelinek cache->vmc_nresults); 1683*3247Sgjelinek kmem_free(cache, sizeof (vmu_cache_t)); 1684*3247Sgjelinek } 1685*3247Sgjelinek } 1686*3247Sgjelinek 1687*3247Sgjelinek /* 1688*3247Sgjelinek * Copy out the cached results to a caller. Inspect the callers flags 1689*3247Sgjelinek * and zone to determine which cached results should be copied. 1690*3247Sgjelinek */ 1691*3247Sgjelinek static int 1692*3247Sgjelinek vmu_copyout_results(vmu_cache_t *cache, vmusage_t *buf, size_t *nres, 1693*3247Sgjelinek uint_t flags) 1694*3247Sgjelinek { 1695*3247Sgjelinek vmusage_t *result, *out_result; 1696*3247Sgjelinek vmusage_t dummy; 1697*3247Sgjelinek size_t i, count = 0; 1698*3247Sgjelinek size_t bufsize; 1699*3247Sgjelinek int ret = 0; 1700*3247Sgjelinek uint_t types = 0; 1701*3247Sgjelinek 1702*3247Sgjelinek if (nres != NULL) { 1703*3247Sgjelinek if (copyin((caddr_t)nres, &bufsize, sizeof (size_t))) 1704*3247Sgjelinek return (set_errno(EFAULT)); 1705*3247Sgjelinek } else { 1706*3247Sgjelinek bufsize = 0; 1707*3247Sgjelinek } 1708*3247Sgjelinek 1709*3247Sgjelinek /* figure out what results the caller is interested in. */ 1710*3247Sgjelinek if ((flags & VMUSAGE_SYSTEM) && curproc->p_zone == global_zone) 1711*3247Sgjelinek types |= VMUSAGE_SYSTEM; 1712*3247Sgjelinek if (flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES)) 1713*3247Sgjelinek types |= VMUSAGE_ZONE; 1714*3247Sgjelinek if (flags & (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS | 1715*3247Sgjelinek VMUSAGE_COL_PROJECTS)) 1716*3247Sgjelinek types |= VMUSAGE_PROJECTS; 1717*3247Sgjelinek if (flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS)) 1718*3247Sgjelinek types |= VMUSAGE_TASKS; 1719*3247Sgjelinek if (flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS)) 1720*3247Sgjelinek types |= VMUSAGE_RUSERS; 1721*3247Sgjelinek if (flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS)) 1722*3247Sgjelinek types |= VMUSAGE_EUSERS; 1723*3247Sgjelinek 1724*3247Sgjelinek /* count results for current zone */ 1725*3247Sgjelinek out_result = buf; 1726*3247Sgjelinek for (result = cache->vmc_results, i = 0; 1727*3247Sgjelinek i < cache->vmc_nresults; result++, i++) { 1728*3247Sgjelinek 1729*3247Sgjelinek /* Do not return "other-zone" results to non-global zones */ 1730*3247Sgjelinek if (curproc->p_zone != global_zone && 1731*3247Sgjelinek curproc->p_zone->zone_id != result->vmu_zoneid) 1732*3247Sgjelinek continue; 1733*3247Sgjelinek 1734*3247Sgjelinek /* 1735*3247Sgjelinek * If non-global zone requests VMUSAGE_SYSTEM, fake 1736*3247Sgjelinek * up VMUSAGE_ZONE result as VMUSAGE_SYSTEM result. 1737*3247Sgjelinek */ 1738*3247Sgjelinek if (curproc->p_zone != global_zone && 1739*3247Sgjelinek (flags & VMUSAGE_SYSTEM) != 0 && 1740*3247Sgjelinek result->vmu_type == VMUSAGE_ZONE) { 1741*3247Sgjelinek count++; 1742*3247Sgjelinek if (out_result != NULL) { 1743*3247Sgjelinek if (bufsize < count) { 1744*3247Sgjelinek ret = set_errno(EOVERFLOW); 1745*3247Sgjelinek } else { 1746*3247Sgjelinek dummy = *result; 1747*3247Sgjelinek dummy.vmu_zoneid = ALL_ZONES; 1748*3247Sgjelinek dummy.vmu_id = 0; 1749*3247Sgjelinek dummy.vmu_type = VMUSAGE_SYSTEM; 1750*3247Sgjelinek if (copyout(&dummy, out_result, 1751*3247Sgjelinek sizeof (vmusage_t))) 1752*3247Sgjelinek return (set_errno( 1753*3247Sgjelinek EFAULT)); 1754*3247Sgjelinek out_result++; 1755*3247Sgjelinek } 1756*3247Sgjelinek } 1757*3247Sgjelinek } 1758*3247Sgjelinek 1759*3247Sgjelinek /* Skip results that do not match requested type */ 1760*3247Sgjelinek if ((result->vmu_type & types) == 0) 1761*3247Sgjelinek continue; 1762*3247Sgjelinek 1763*3247Sgjelinek /* Skip collated results if not requested */ 1764*3247Sgjelinek if (result->vmu_zoneid == ALL_ZONES) { 1765*3247Sgjelinek if (result->vmu_type == VMUSAGE_PROJECTS && 1766*3247Sgjelinek (flags & VMUSAGE_COL_PROJECTS) == 0) 1767*3247Sgjelinek continue; 1768*3247Sgjelinek if (result->vmu_type == VMUSAGE_EUSERS && 1769*3247Sgjelinek (flags & VMUSAGE_COL_EUSERS) == 0) 1770*3247Sgjelinek continue; 1771*3247Sgjelinek if (result->vmu_type == VMUSAGE_RUSERS && 1772*3247Sgjelinek (flags & VMUSAGE_COL_RUSERS) == 0) 1773*3247Sgjelinek continue; 1774*3247Sgjelinek } 1775*3247Sgjelinek 1776*3247Sgjelinek /* Skip "other zone" results if not requested */ 1777*3247Sgjelinek if (result->vmu_zoneid != curproc->p_zone->zone_id) { 1778*3247Sgjelinek if (result->vmu_type == VMUSAGE_ZONE && 1779*3247Sgjelinek (flags & VMUSAGE_ALL_ZONES) == 0) 1780*3247Sgjelinek continue; 1781*3247Sgjelinek if (result->vmu_type == VMUSAGE_PROJECTS && 1782*3247Sgjelinek (flags & (VMUSAGE_ALL_PROJECTS | 1783*3247Sgjelinek VMUSAGE_COL_PROJECTS)) == 0) 1784*3247Sgjelinek continue; 1785*3247Sgjelinek if (result->vmu_type == VMUSAGE_TASKS && 1786*3247Sgjelinek (flags & VMUSAGE_ALL_TASKS) == 0) 1787*3247Sgjelinek continue; 1788*3247Sgjelinek if (result->vmu_type == VMUSAGE_RUSERS && 1789*3247Sgjelinek (flags & (VMUSAGE_ALL_RUSERS | 1790*3247Sgjelinek VMUSAGE_COL_RUSERS)) == 0) 1791*3247Sgjelinek continue; 1792*3247Sgjelinek if (result->vmu_type == VMUSAGE_EUSERS && 1793*3247Sgjelinek (flags & (VMUSAGE_ALL_EUSERS | 1794*3247Sgjelinek VMUSAGE_COL_EUSERS)) == 0) 1795*3247Sgjelinek continue; 1796*3247Sgjelinek } 1797*3247Sgjelinek count++; 1798*3247Sgjelinek if (out_result != NULL) { 1799*3247Sgjelinek if (bufsize < count) { 1800*3247Sgjelinek ret = set_errno(EOVERFLOW); 1801*3247Sgjelinek } else { 1802*3247Sgjelinek if (copyout(result, out_result, 1803*3247Sgjelinek sizeof (vmusage_t))) 1804*3247Sgjelinek return (set_errno(EFAULT)); 1805*3247Sgjelinek out_result++; 1806*3247Sgjelinek } 1807*3247Sgjelinek } 1808*3247Sgjelinek } 1809*3247Sgjelinek if (nres != NULL) 1810*3247Sgjelinek if (copyout(&count, (void *)nres, sizeof (size_t))) 1811*3247Sgjelinek return (set_errno(EFAULT)); 1812*3247Sgjelinek 1813*3247Sgjelinek return (ret); 1814*3247Sgjelinek } 1815*3247Sgjelinek 1816*3247Sgjelinek /* 1817*3247Sgjelinek * vm_getusage() 1818*3247Sgjelinek * 1819*3247Sgjelinek * Counts rss and swap by zone, project, task, and/or user. The flags argument 1820*3247Sgjelinek * determines the type of results structures returned. Flags requesting 1821*3247Sgjelinek * results from more than one zone are "flattened" to the local zone if the 1822*3247Sgjelinek * caller is not the global zone. 1823*3247Sgjelinek * 1824*3247Sgjelinek * args: 1825*3247Sgjelinek * flags: bitmap consisting of one or more of VMUSAGE_*. 1826*3247Sgjelinek * age: maximum allowable age (time since counting was done) in 1827*3247Sgjelinek * seconds of the results. Results from previous callers are 1828*3247Sgjelinek * cached in kernel. 1829*3247Sgjelinek * buf: pointer to buffer array of vmusage_t. If NULL, then only nres 1830*3247Sgjelinek * set on success. 1831*3247Sgjelinek * nres: Set to number of vmusage_t structures pointed to by buf 1832*3247Sgjelinek * before calling vm_getusage(). 1833*3247Sgjelinek * On return 0 (success) or ENOSPC, is set to the number of result 1834*3247Sgjelinek * structures returned or attempted to return. 1835*3247Sgjelinek * 1836*3247Sgjelinek * returns 0 on success, -1 on failure: 1837*3247Sgjelinek * EINTR (interrupted) 1838*3247Sgjelinek * ENOSPC (nres to small for results, nres set to needed value for success) 1839*3247Sgjelinek * EINVAL (flags invalid) 1840*3247Sgjelinek * EFAULT (bad address for buf or nres) 1841*3247Sgjelinek */ 1842*3247Sgjelinek int 1843*3247Sgjelinek vm_getusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres) 1844*3247Sgjelinek { 1845*3247Sgjelinek vmu_entity_t *entity; 1846*3247Sgjelinek vmusage_t *result; 1847*3247Sgjelinek int ret = 0; 1848*3247Sgjelinek int cacherecent = 0; 1849*3247Sgjelinek hrtime_t now; 1850*3247Sgjelinek uint_t flags_orig; 1851*3247Sgjelinek 1852*3247Sgjelinek /* 1853*3247Sgjelinek * Non-global zones cannot request system wide and/or collated 1854*3247Sgjelinek * results, or the system result, so munge the flags accordingly. 1855*3247Sgjelinek */ 1856*3247Sgjelinek flags_orig = flags; 1857*3247Sgjelinek if (curproc->p_zone != global_zone) { 1858*3247Sgjelinek if (flags & (VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS)) { 1859*3247Sgjelinek flags &= ~(VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS); 1860*3247Sgjelinek flags |= VMUSAGE_PROJECTS; 1861*3247Sgjelinek } 1862*3247Sgjelinek if (flags & (VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS)) { 1863*3247Sgjelinek flags &= ~(VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS); 1864*3247Sgjelinek flags |= VMUSAGE_RUSERS; 1865*3247Sgjelinek } 1866*3247Sgjelinek if (flags & (VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS)) { 1867*3247Sgjelinek flags &= ~(VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS); 1868*3247Sgjelinek flags |= VMUSAGE_EUSERS; 1869*3247Sgjelinek } 1870*3247Sgjelinek if (flags & VMUSAGE_SYSTEM) { 1871*3247Sgjelinek flags &= ~VMUSAGE_SYSTEM; 1872*3247Sgjelinek flags |= VMUSAGE_ZONE; 1873*3247Sgjelinek } 1874*3247Sgjelinek } 1875*3247Sgjelinek 1876*3247Sgjelinek /* Check for unknown flags */ 1877*3247Sgjelinek if ((flags & (~VMUSAGE_MASK)) != 0) 1878*3247Sgjelinek return (set_errno(EINVAL)); 1879*3247Sgjelinek 1880*3247Sgjelinek /* Check for no flags */ 1881*3247Sgjelinek if ((flags & VMUSAGE_MASK) == 0) 1882*3247Sgjelinek return (set_errno(EINVAL)); 1883*3247Sgjelinek 1884*3247Sgjelinek mutex_enter(&vmu_data.vmu_lock); 1885*3247Sgjelinek now = gethrtime(); 1886*3247Sgjelinek 1887*3247Sgjelinek start: 1888*3247Sgjelinek if (vmu_data.vmu_cache != NULL) { 1889*3247Sgjelinek 1890*3247Sgjelinek vmu_cache_t *cache; 1891*3247Sgjelinek 1892*3247Sgjelinek if ((vmu_data.vmu_cache->vmc_timestamp + 1893*3247Sgjelinek ((hrtime_t)age * NANOSEC)) > now) 1894*3247Sgjelinek cacherecent = 1; 1895*3247Sgjelinek 1896*3247Sgjelinek if ((vmu_data.vmu_cache->vmc_flags & flags) == flags && 1897*3247Sgjelinek cacherecent == 1) { 1898*3247Sgjelinek cache = vmu_data.vmu_cache; 1899*3247Sgjelinek vmu_cache_hold(cache); 1900*3247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 1901*3247Sgjelinek 1902*3247Sgjelinek ret = vmu_copyout_results(cache, buf, nres, flags_orig); 1903*3247Sgjelinek mutex_enter(&vmu_data.vmu_lock); 1904*3247Sgjelinek vmu_cache_rele(cache); 1905*3247Sgjelinek if (vmu_data.vmu_pending_waiters > 0) 1906*3247Sgjelinek cv_broadcast(&vmu_data.vmu_cv); 1907*3247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 1908*3247Sgjelinek return (ret); 1909*3247Sgjelinek } 1910*3247Sgjelinek /* 1911*3247Sgjelinek * If the cache is recent, it is likely that there are other 1912*3247Sgjelinek * consumers of vm_getusage running, so add their flags to the 1913*3247Sgjelinek * desired flags for the calculation. 1914*3247Sgjelinek */ 1915*3247Sgjelinek if (cacherecent == 1) 1916*3247Sgjelinek flags = vmu_data.vmu_cache->vmc_flags | flags; 1917*3247Sgjelinek } 1918*3247Sgjelinek if (vmu_data.vmu_calc_thread == NULL) { 1919*3247Sgjelinek 1920*3247Sgjelinek vmu_cache_t *cache; 1921*3247Sgjelinek 1922*3247Sgjelinek vmu_data.vmu_calc_thread = curthread; 1923*3247Sgjelinek vmu_data.vmu_calc_flags = flags; 1924*3247Sgjelinek vmu_data.vmu_entities = NULL; 1925*3247Sgjelinek vmu_data.vmu_nentities = 0; 1926*3247Sgjelinek if (vmu_data.vmu_pending_waiters > 0) 1927*3247Sgjelinek vmu_data.vmu_calc_flags |= 1928*3247Sgjelinek vmu_data.vmu_pending_flags; 1929*3247Sgjelinek 1930*3247Sgjelinek vmu_data.vmu_pending_flags = 0; 1931*3247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 1932*3247Sgjelinek vmu_calculate(); 1933*3247Sgjelinek mutex_enter(&vmu_data.vmu_lock); 1934*3247Sgjelinek /* copy results to cache */ 1935*3247Sgjelinek if (vmu_data.vmu_cache != NULL) 1936*3247Sgjelinek vmu_cache_rele(vmu_data.vmu_cache); 1937*3247Sgjelinek cache = vmu_data.vmu_cache = 1938*3247Sgjelinek vmu_cache_alloc(vmu_data.vmu_nentities, 1939*3247Sgjelinek vmu_data.vmu_calc_flags); 1940*3247Sgjelinek 1941*3247Sgjelinek result = cache->vmc_results; 1942*3247Sgjelinek for (entity = vmu_data.vmu_entities; entity != NULL; 1943*3247Sgjelinek entity = entity->vme_next) { 1944*3247Sgjelinek *result = entity->vme_result; 1945*3247Sgjelinek result++; 1946*3247Sgjelinek } 1947*3247Sgjelinek cache->vmc_timestamp = gethrtime(); 1948*3247Sgjelinek vmu_cache_hold(cache); 1949*3247Sgjelinek 1950*3247Sgjelinek vmu_data.vmu_calc_flags = 0; 1951*3247Sgjelinek vmu_data.vmu_calc_thread = NULL; 1952*3247Sgjelinek 1953*3247Sgjelinek if (vmu_data.vmu_pending_waiters > 0) 1954*3247Sgjelinek cv_broadcast(&vmu_data.vmu_cv); 1955*3247Sgjelinek 1956*3247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 1957*3247Sgjelinek 1958*3247Sgjelinek /* copy cache */ 1959*3247Sgjelinek ret = vmu_copyout_results(cache, buf, nres, flags_orig); 1960*3247Sgjelinek mutex_enter(&vmu_data.vmu_lock); 1961*3247Sgjelinek vmu_cache_rele(cache); 1962*3247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 1963*3247Sgjelinek 1964*3247Sgjelinek return (ret); 1965*3247Sgjelinek } 1966*3247Sgjelinek vmu_data.vmu_pending_flags |= flags; 1967*3247Sgjelinek vmu_data.vmu_pending_waiters++; 1968*3247Sgjelinek while (vmu_data.vmu_calc_thread != NULL) { 1969*3247Sgjelinek if (cv_wait_sig(&vmu_data.vmu_cv, 1970*3247Sgjelinek &vmu_data.vmu_lock) == 0) { 1971*3247Sgjelinek vmu_data.vmu_pending_waiters--; 1972*3247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 1973*3247Sgjelinek return (set_errno(EINTR)); 1974*3247Sgjelinek } 1975*3247Sgjelinek } 1976*3247Sgjelinek vmu_data.vmu_pending_waiters--; 1977*3247Sgjelinek goto start; 1978*3247Sgjelinek } 1979