13247Sgjelinek /* 23247Sgjelinek * CDDL HEADER START 33247Sgjelinek * 43247Sgjelinek * The contents of this file are subject to the terms of the 53247Sgjelinek * Common Development and Distribution License (the "License"). 63247Sgjelinek * You may not use this file except in compliance with the License. 73247Sgjelinek * 83247Sgjelinek * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 93247Sgjelinek * or http://www.opensolaris.org/os/licensing. 103247Sgjelinek * See the License for the specific language governing permissions 113247Sgjelinek * and limitations under the License. 123247Sgjelinek * 133247Sgjelinek * When distributing Covered Code, include this CDDL HEADER in each 143247Sgjelinek * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 153247Sgjelinek * If applicable, add the following below this CDDL HEADER, with the 163247Sgjelinek * fields enclosed by brackets "[]" replaced with your own identifying 173247Sgjelinek * information: Portions Copyright [yyyy] [name of copyright owner] 183247Sgjelinek * 193247Sgjelinek * CDDL HEADER END 203247Sgjelinek */ 213247Sgjelinek 223247Sgjelinek /* 23*10093SPeter.Telford@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 243247Sgjelinek * Use is subject to license terms. 253247Sgjelinek */ 263247Sgjelinek 273247Sgjelinek /* 283247Sgjelinek * vm_usage 293247Sgjelinek * 303247Sgjelinek * This file implements the getvmusage() private system call. 313247Sgjelinek * getvmusage() counts the amount of resident memory pages and swap 323247Sgjelinek * reserved by the specified process collective. A "process collective" is 333247Sgjelinek * the set of processes owned by a particular, zone, project, task, or user. 343247Sgjelinek * 353247Sgjelinek * rss and swap are counted so that for a given process collective, a page is 363247Sgjelinek * only counted once. For example, this means that if multiple processes in 373247Sgjelinek * the same project map the same page, then the project will only be charged 383247Sgjelinek * once for that page. On the other hand, if two processes in different 393247Sgjelinek * projects map the same page, then both projects will be charged 403247Sgjelinek * for the page. 413247Sgjelinek * 423247Sgjelinek * The vm_getusage() calculation is implemented so that the first thread 433247Sgjelinek * performs the rss/swap counting. Other callers will wait for that thread to 443247Sgjelinek * finish, copying the results. This enables multiple rcapds and prstats to 453247Sgjelinek * consume data from the same calculation. The results are also cached so that 463247Sgjelinek * a caller interested in recent results can just copy them instead of starting 473247Sgjelinek * a new calculation. The caller passes the maximium age (in seconds) of the 483247Sgjelinek * data. If the cached data is young enough, the cache is copied, otherwise, 493247Sgjelinek * a new calculation is executed and the cache is replaced with the new 503247Sgjelinek * data. 513247Sgjelinek * 523247Sgjelinek * The rss calculation for each process collective is as follows: 533247Sgjelinek * 543247Sgjelinek * - Inspect flags, determine if counting rss for zones, projects, tasks, 553247Sgjelinek * and/or users. 563247Sgjelinek * - For each proc: 573247Sgjelinek * - Figure out proc's collectives (zone, project, task, and/or user). 583247Sgjelinek * - For each seg in proc's address space: 593247Sgjelinek * - If seg is private: 603247Sgjelinek * - Lookup anons in the amp. 613247Sgjelinek * - For incore pages not previously visited each of the 623247Sgjelinek * proc's collectives, add incore pagesize to each. 633247Sgjelinek * collective. 643247Sgjelinek * Anon's with a refcnt of 1 can be assummed to be not 653247Sgjelinek * previously visited. 663247Sgjelinek * - For address ranges without anons in the amp: 673247Sgjelinek * - Lookup pages in underlying vnode. 683247Sgjelinek * - For incore pages not previously visiting for 693247Sgjelinek * each of the proc's collectives, add incore 703247Sgjelinek * pagesize to each collective. 713247Sgjelinek * - If seg is shared: 723247Sgjelinek * - Lookup pages in the shared amp or vnode. 733247Sgjelinek * - For incore pages not previously visited for each of 743247Sgjelinek * the proc's collectives, add incore pagesize to each 753247Sgjelinek * collective. 763247Sgjelinek * 773247Sgjelinek * Swap is reserved by private segments, and shared anonymous segments. 783247Sgjelinek * The only shared anon segments which do not reserve swap are ISM segments 793247Sgjelinek * and schedctl segments, both of which can be identified by having 803247Sgjelinek * amp->swresv == 0. 813247Sgjelinek * 823247Sgjelinek * The swap calculation for each collective is as follows: 833247Sgjelinek * 843247Sgjelinek * - Inspect flags, determine if counting rss for zones, projects, tasks, 853247Sgjelinek * and/or users. 863247Sgjelinek * - For each proc: 873247Sgjelinek * - Figure out proc's collectives (zone, project, task, and/or user). 883247Sgjelinek * - For each seg in proc's address space: 893247Sgjelinek * - If seg is private: 903247Sgjelinek * - Add svd->swresv pages to swap count for each of the 913247Sgjelinek * proc's collectives. 923247Sgjelinek * - If seg is anon, shared, and amp->swresv != 0 933247Sgjelinek * - For address ranges in amp not previously visited for 943247Sgjelinek * each of the proc's collectives, add size of address 953247Sgjelinek * range to the swap count for each collective. 963247Sgjelinek * 973247Sgjelinek * These two calculations are done simultaneously, with most of the work 983247Sgjelinek * being done in vmu_calculate_seg(). The results of the calculation are 993247Sgjelinek * copied into "vmu_data.vmu_cache_results". 1003247Sgjelinek * 1013247Sgjelinek * To perform the calculation, various things are tracked and cached: 1023247Sgjelinek * 1033247Sgjelinek * - incore/not-incore page ranges for all vnodes. 1043247Sgjelinek * (vmu_data.vmu_all_vnodes_hash) 1053247Sgjelinek * This eliminates looking up the same page more than once. 1063247Sgjelinek * 1073247Sgjelinek * - incore/not-incore page ranges for all shared amps. 1083247Sgjelinek * (vmu_data.vmu_all_amps_hash) 1093247Sgjelinek * This eliminates looking up the same page more than once. 1103247Sgjelinek * 1113247Sgjelinek * - visited page ranges for each collective. 1123247Sgjelinek * - per vnode (entity->vme_vnode_hash) 1133247Sgjelinek * - per shared amp (entity->vme_amp_hash) 114*10093SPeter.Telford@Sun.COM * For accurate counting of map-shared and COW-shared pages. 1153247Sgjelinek * 1163247Sgjelinek * - visited private anons (refcnt > 1) for each collective. 1173247Sgjelinek * (entity->vme_anon_hash) 118*10093SPeter.Telford@Sun.COM * For accurate counting of COW-shared pages. 1193247Sgjelinek * 1203247Sgjelinek * The common accounting structure is the vmu_entity_t, which represents 1213247Sgjelinek * collectives: 1223247Sgjelinek * 1233247Sgjelinek * - A zone. 1243247Sgjelinek * - A project, task, or user within a zone. 1253247Sgjelinek * - The entire system (vmu_data.vmu_system). 1263247Sgjelinek * - Each collapsed (col) project and user. This means a given projid or 1273247Sgjelinek * uid, regardless of which zone the process is in. For instance, 1283247Sgjelinek * project 0 in the global zone and project 0 in a non global zone are 1293247Sgjelinek * the same collapsed project. 1303247Sgjelinek * 1313247Sgjelinek * Each entity structure tracks which pages have been already visited for 1323247Sgjelinek * that entity (via previously inspected processes) so that these pages are 1333247Sgjelinek * not double counted. 1343247Sgjelinek */ 1353247Sgjelinek 1363247Sgjelinek #include <sys/errno.h> 1373247Sgjelinek #include <sys/types.h> 1383247Sgjelinek #include <sys/zone.h> 1393247Sgjelinek #include <sys/proc.h> 1403247Sgjelinek #include <sys/project.h> 1413247Sgjelinek #include <sys/task.h> 1423247Sgjelinek #include <sys/thread.h> 1433247Sgjelinek #include <sys/time.h> 1443247Sgjelinek #include <sys/mman.h> 1453247Sgjelinek #include <sys/modhash.h> 1463247Sgjelinek #include <sys/modhash_impl.h> 1473247Sgjelinek #include <sys/shm.h> 1483247Sgjelinek #include <sys/swap.h> 1493247Sgjelinek #include <sys/synch.h> 1503247Sgjelinek #include <sys/systm.h> 1513247Sgjelinek #include <sys/var.h> 1523247Sgjelinek #include <sys/vm_usage.h> 1533247Sgjelinek #include <sys/zone.h> 1547884Sgerald.jelinek@sun.com #include <sys/sunddi.h> 155*10093SPeter.Telford@Sun.COM #include <sys/avl.h> 1563247Sgjelinek #include <vm/anon.h> 1573247Sgjelinek #include <vm/as.h> 1583247Sgjelinek #include <vm/seg_vn.h> 1593247Sgjelinek #include <vm/seg_spt.h> 1603247Sgjelinek 1613247Sgjelinek #define VMUSAGE_HASH_SIZE 512 1623247Sgjelinek 1633247Sgjelinek #define VMUSAGE_TYPE_VNODE 1 1643247Sgjelinek #define VMUSAGE_TYPE_AMP 2 1653247Sgjelinek #define VMUSAGE_TYPE_ANON 3 1663247Sgjelinek 1673247Sgjelinek #define VMUSAGE_BOUND_UNKNOWN 0 1683247Sgjelinek #define VMUSAGE_BOUND_INCORE 1 1693247Sgjelinek #define VMUSAGE_BOUND_NOT_INCORE 2 1703247Sgjelinek 171*10093SPeter.Telford@Sun.COM #define ISWITHIN(node, addr) ((node)->vmb_start <= addr && \ 172*10093SPeter.Telford@Sun.COM (node)->vmb_end >= addr ? 1 : 0) 173*10093SPeter.Telford@Sun.COM 1743247Sgjelinek /* 1753247Sgjelinek * bounds for vnodes and shared amps 1763247Sgjelinek * Each bound is either entirely incore, entirely not in core, or 177*10093SPeter.Telford@Sun.COM * entirely unknown. bounds are stored in an avl tree sorted by start member 178*10093SPeter.Telford@Sun.COM * when in use, otherwise (free or temporary lists) they're strung 179*10093SPeter.Telford@Sun.COM * together off of vmb_next. 1803247Sgjelinek */ 1813247Sgjelinek typedef struct vmu_bound { 182*10093SPeter.Telford@Sun.COM avl_node_t vmb_node; 183*10093SPeter.Telford@Sun.COM struct vmu_bound *vmb_next; /* NULL in tree else on free or temp list */ 1843247Sgjelinek pgcnt_t vmb_start; /* page offset in vnode/amp on which bound starts */ 1853247Sgjelinek pgcnt_t vmb_end; /* page offset in vnode/amp on which bound ends */ 1863247Sgjelinek char vmb_type; /* One of VMUSAGE_BOUND_* */ 1873247Sgjelinek } vmu_bound_t; 1883247Sgjelinek 1893247Sgjelinek /* 1903247Sgjelinek * hash of visited objects (vnodes or shared amps) 1913247Sgjelinek * key is address of vnode or amp. Bounds lists known incore/non-incore 1923247Sgjelinek * bounds for vnode/amp. 1933247Sgjelinek */ 1943247Sgjelinek typedef struct vmu_object { 1953247Sgjelinek struct vmu_object *vmo_next; /* free list */ 1963247Sgjelinek caddr_t vmo_key; 1973247Sgjelinek short vmo_type; 198*10093SPeter.Telford@Sun.COM avl_tree_t vmo_bounds; 1993247Sgjelinek } vmu_object_t; 2003247Sgjelinek 2013247Sgjelinek /* 2023247Sgjelinek * Entity by which to count results. 2033247Sgjelinek * 2043247Sgjelinek * The entity structure keeps the current rss/swap counts for each entity 2053247Sgjelinek * (zone, project, etc), and hashes of vm structures that have already 2063247Sgjelinek * been visited for the entity. 2073247Sgjelinek * 2083247Sgjelinek * vme_next: links the list of all entities currently being counted by 2093247Sgjelinek * vmu_calculate(). 2103247Sgjelinek * 2113247Sgjelinek * vme_next_calc: links the list of entities related to the current process 2123247Sgjelinek * being counted by vmu_calculate_proc(). 2133247Sgjelinek * 2143247Sgjelinek * vmu_calculate_proc() walks all processes. For each process, it makes a 2153247Sgjelinek * list of the entities related to that process using vme_next_calc. This 2163247Sgjelinek * list changes each time vmu_calculate_proc() is called. 2173247Sgjelinek * 2183247Sgjelinek */ 2193247Sgjelinek typedef struct vmu_entity { 2203247Sgjelinek struct vmu_entity *vme_next; 2213247Sgjelinek struct vmu_entity *vme_next_calc; 2223247Sgjelinek mod_hash_t *vme_vnode_hash; /* vnodes visited for entity */ 2233247Sgjelinek mod_hash_t *vme_amp_hash; /* shared amps visited for entity */ 224*10093SPeter.Telford@Sun.COM mod_hash_t *vme_anon_hash; /* COW anons visited for entity */ 2253247Sgjelinek vmusage_t vme_result; /* identifies entity and results */ 2263247Sgjelinek } vmu_entity_t; 2273247Sgjelinek 2283247Sgjelinek /* 2293247Sgjelinek * Hash of entities visited within a zone, and an entity for the zone 2303247Sgjelinek * itself. 2313247Sgjelinek */ 2323247Sgjelinek typedef struct vmu_zone { 2333247Sgjelinek struct vmu_zone *vmz_next; /* free list */ 2343247Sgjelinek id_t vmz_id; 2353247Sgjelinek vmu_entity_t *vmz_zone; 2363247Sgjelinek mod_hash_t *vmz_projects_hash; 2373247Sgjelinek mod_hash_t *vmz_tasks_hash; 2383247Sgjelinek mod_hash_t *vmz_rusers_hash; 2393247Sgjelinek mod_hash_t *vmz_eusers_hash; 2403247Sgjelinek } vmu_zone_t; 2413247Sgjelinek 2423247Sgjelinek /* 2433247Sgjelinek * Cache of results from last calculation 2443247Sgjelinek */ 2453247Sgjelinek typedef struct vmu_cache { 2463247Sgjelinek vmusage_t *vmc_results; /* Results from last call to */ 2473247Sgjelinek /* vm_getusage(). */ 2483247Sgjelinek uint64_t vmc_nresults; /* Count of cached results */ 2493247Sgjelinek uint64_t vmc_refcnt; /* refcnt for free */ 2503247Sgjelinek uint_t vmc_flags; /* Flags for vm_getusage() */ 2513247Sgjelinek hrtime_t vmc_timestamp; /* when cache was created */ 2523247Sgjelinek } vmu_cache_t; 2533247Sgjelinek 2543247Sgjelinek /* 2553247Sgjelinek * top level rss info for the system 2563247Sgjelinek */ 2573247Sgjelinek typedef struct vmu_data { 2583247Sgjelinek kmutex_t vmu_lock; /* Protects vmu_data */ 2593247Sgjelinek kcondvar_t vmu_cv; /* Used to signal threads */ 2603247Sgjelinek /* Waiting for */ 2613247Sgjelinek /* Rss_calc_thread to finish */ 2623247Sgjelinek vmu_entity_t *vmu_system; /* Entity for tracking */ 2633247Sgjelinek /* rss/swap for all processes */ 2643247Sgjelinek /* in all zones */ 2653247Sgjelinek mod_hash_t *vmu_zones_hash; /* Zones visited */ 2663247Sgjelinek mod_hash_t *vmu_projects_col_hash; /* These *_col_hash hashes */ 2673247Sgjelinek mod_hash_t *vmu_rusers_col_hash; /* keep track of entities, */ 2683247Sgjelinek mod_hash_t *vmu_eusers_col_hash; /* ignoring zoneid, in order */ 2693247Sgjelinek /* to implement VMUSAGE_COL_* */ 2703247Sgjelinek /* flags, which aggregate by */ 2713247Sgjelinek /* project or user regardless */ 2723247Sgjelinek /* of zoneid. */ 2733247Sgjelinek mod_hash_t *vmu_all_vnodes_hash; /* System wide visited vnodes */ 2743247Sgjelinek /* to track incore/not-incore */ 2753247Sgjelinek mod_hash_t *vmu_all_amps_hash; /* System wide visited shared */ 2763247Sgjelinek /* amps to track incore/not- */ 2773247Sgjelinek /* incore */ 2783247Sgjelinek vmu_entity_t *vmu_entities; /* Linked list of entities */ 2793247Sgjelinek size_t vmu_nentities; /* Count of entities in list */ 2803247Sgjelinek vmu_cache_t *vmu_cache; /* Cached results */ 2813247Sgjelinek kthread_t *vmu_calc_thread; /* NULL, or thread running */ 2823247Sgjelinek /* vmu_calculate() */ 2833247Sgjelinek uint_t vmu_calc_flags; /* Flags being using by */ 2843247Sgjelinek /* currently running calc */ 2853247Sgjelinek /* thread */ 2863247Sgjelinek uint_t vmu_pending_flags; /* Flags of vm_getusage() */ 2873247Sgjelinek /* threads waiting for */ 2883247Sgjelinek /* calc thread to finish */ 2893247Sgjelinek uint_t vmu_pending_waiters; /* Number of threads waiting */ 2903247Sgjelinek /* for calc thread */ 2913247Sgjelinek vmu_bound_t *vmu_free_bounds; 2923247Sgjelinek vmu_object_t *vmu_free_objects; 2933247Sgjelinek vmu_entity_t *vmu_free_entities; 2943247Sgjelinek vmu_zone_t *vmu_free_zones; 2953247Sgjelinek } vmu_data_t; 2963247Sgjelinek 2973247Sgjelinek extern struct as kas; 2983247Sgjelinek extern proc_t *practive; 2993247Sgjelinek extern zone_t *global_zone; 3003247Sgjelinek extern struct seg_ops segvn_ops; 3013247Sgjelinek extern struct seg_ops segspt_shmops; 3023247Sgjelinek 3033247Sgjelinek static vmu_data_t vmu_data; 3043247Sgjelinek static kmem_cache_t *vmu_bound_cache; 3053247Sgjelinek static kmem_cache_t *vmu_object_cache; 3063247Sgjelinek 3073247Sgjelinek /* 308*10093SPeter.Telford@Sun.COM * Comparison routine for AVL tree. We base our comparison on vmb_start. 309*10093SPeter.Telford@Sun.COM */ 310*10093SPeter.Telford@Sun.COM static int 311*10093SPeter.Telford@Sun.COM bounds_cmp(const void *bnd1, const void *bnd2) 312*10093SPeter.Telford@Sun.COM { 313*10093SPeter.Telford@Sun.COM const vmu_bound_t *bound1 = bnd1; 314*10093SPeter.Telford@Sun.COM const vmu_bound_t *bound2 = bnd2; 315*10093SPeter.Telford@Sun.COM 316*10093SPeter.Telford@Sun.COM if (bound1->vmb_start == bound2->vmb_start) { 317*10093SPeter.Telford@Sun.COM return (0); 318*10093SPeter.Telford@Sun.COM } 319*10093SPeter.Telford@Sun.COM if (bound1->vmb_start < bound2->vmb_start) { 320*10093SPeter.Telford@Sun.COM return (-1); 321*10093SPeter.Telford@Sun.COM } 322*10093SPeter.Telford@Sun.COM 323*10093SPeter.Telford@Sun.COM return (1); 324*10093SPeter.Telford@Sun.COM } 325*10093SPeter.Telford@Sun.COM 326*10093SPeter.Telford@Sun.COM /* 327*10093SPeter.Telford@Sun.COM * Save a bound on the free list. 3283247Sgjelinek */ 3293247Sgjelinek static void 3303247Sgjelinek vmu_free_bound(vmu_bound_t *bound) 3313247Sgjelinek { 3323247Sgjelinek bound->vmb_next = vmu_data.vmu_free_bounds; 333*10093SPeter.Telford@Sun.COM bound->vmb_start = 0; 334*10093SPeter.Telford@Sun.COM bound->vmb_end = 0; 335*10093SPeter.Telford@Sun.COM bound->vmb_type = 0; 3363247Sgjelinek vmu_data.vmu_free_bounds = bound; 3373247Sgjelinek } 3383247Sgjelinek 3393247Sgjelinek /* 3403247Sgjelinek * Free an object, and all visited bound info. 3413247Sgjelinek */ 3423247Sgjelinek static void 3433247Sgjelinek vmu_free_object(mod_hash_val_t val) 3443247Sgjelinek { 3453247Sgjelinek vmu_object_t *obj = (vmu_object_t *)val; 346*10093SPeter.Telford@Sun.COM avl_tree_t *tree = &(obj->vmo_bounds); 347*10093SPeter.Telford@Sun.COM vmu_bound_t *bound; 348*10093SPeter.Telford@Sun.COM void *cookie = NULL; 3493247Sgjelinek 350*10093SPeter.Telford@Sun.COM while ((bound = avl_destroy_nodes(tree, &cookie)) != NULL) 351*10093SPeter.Telford@Sun.COM vmu_free_bound(bound); 352*10093SPeter.Telford@Sun.COM avl_destroy(tree); 353*10093SPeter.Telford@Sun.COM 354*10093SPeter.Telford@Sun.COM obj->vmo_type = 0; 3553247Sgjelinek obj->vmo_next = vmu_data.vmu_free_objects; 3563247Sgjelinek vmu_data.vmu_free_objects = obj; 3573247Sgjelinek } 3583247Sgjelinek 3593247Sgjelinek /* 3603247Sgjelinek * Free an entity, and hashes of visited objects for that entity. 3613247Sgjelinek */ 3623247Sgjelinek static void 3633247Sgjelinek vmu_free_entity(mod_hash_val_t val) 3643247Sgjelinek { 3653247Sgjelinek vmu_entity_t *entity = (vmu_entity_t *)val; 3663247Sgjelinek 3673247Sgjelinek if (entity->vme_vnode_hash != NULL) 3683247Sgjelinek i_mod_hash_clear_nosync(entity->vme_vnode_hash); 3693247Sgjelinek if (entity->vme_amp_hash != NULL) 3703247Sgjelinek i_mod_hash_clear_nosync(entity->vme_amp_hash); 3713247Sgjelinek if (entity->vme_anon_hash != NULL) 3723247Sgjelinek i_mod_hash_clear_nosync(entity->vme_anon_hash); 3733247Sgjelinek 3743247Sgjelinek entity->vme_next = vmu_data.vmu_free_entities; 3753247Sgjelinek vmu_data.vmu_free_entities = entity; 3763247Sgjelinek } 3773247Sgjelinek 3783247Sgjelinek /* 3793247Sgjelinek * Free zone entity, and all hashes of entities inside that zone, 3803247Sgjelinek * which are projects, tasks, and users. 3813247Sgjelinek */ 3823247Sgjelinek static void 3833247Sgjelinek vmu_free_zone(mod_hash_val_t val) 3843247Sgjelinek { 3853247Sgjelinek vmu_zone_t *zone = (vmu_zone_t *)val; 3863247Sgjelinek 3873247Sgjelinek if (zone->vmz_zone != NULL) { 3883247Sgjelinek vmu_free_entity((mod_hash_val_t)zone->vmz_zone); 3893247Sgjelinek zone->vmz_zone = NULL; 3903247Sgjelinek } 3913247Sgjelinek if (zone->vmz_projects_hash != NULL) 3923247Sgjelinek i_mod_hash_clear_nosync(zone->vmz_projects_hash); 3933247Sgjelinek if (zone->vmz_tasks_hash != NULL) 3943247Sgjelinek i_mod_hash_clear_nosync(zone->vmz_tasks_hash); 3953247Sgjelinek if (zone->vmz_rusers_hash != NULL) 3963247Sgjelinek i_mod_hash_clear_nosync(zone->vmz_rusers_hash); 3973247Sgjelinek if (zone->vmz_eusers_hash != NULL) 3983247Sgjelinek i_mod_hash_clear_nosync(zone->vmz_eusers_hash); 3993247Sgjelinek zone->vmz_next = vmu_data.vmu_free_zones; 4003247Sgjelinek vmu_data.vmu_free_zones = zone; 4013247Sgjelinek } 4023247Sgjelinek 4033247Sgjelinek /* 4043247Sgjelinek * Initialize synchronization primitives and hashes for system-wide tracking 4053247Sgjelinek * of visited vnodes and shared amps. Initialize results cache. 4063247Sgjelinek */ 4073247Sgjelinek void 4083247Sgjelinek vm_usage_init() 4093247Sgjelinek { 4103247Sgjelinek mutex_init(&vmu_data.vmu_lock, NULL, MUTEX_DEFAULT, NULL); 4113247Sgjelinek cv_init(&vmu_data.vmu_cv, NULL, CV_DEFAULT, NULL); 4123247Sgjelinek 4133247Sgjelinek vmu_data.vmu_system = NULL; 4143247Sgjelinek vmu_data.vmu_zones_hash = NULL; 4153247Sgjelinek vmu_data.vmu_projects_col_hash = NULL; 4163247Sgjelinek vmu_data.vmu_rusers_col_hash = NULL; 4173247Sgjelinek vmu_data.vmu_eusers_col_hash = NULL; 4183247Sgjelinek 4193247Sgjelinek vmu_data.vmu_free_bounds = NULL; 4203247Sgjelinek vmu_data.vmu_free_objects = NULL; 4213247Sgjelinek vmu_data.vmu_free_entities = NULL; 4223247Sgjelinek vmu_data.vmu_free_zones = NULL; 4233247Sgjelinek 4243247Sgjelinek vmu_data.vmu_all_vnodes_hash = mod_hash_create_ptrhash( 4253247Sgjelinek "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object, 4263247Sgjelinek sizeof (vnode_t)); 4273247Sgjelinek vmu_data.vmu_all_amps_hash = mod_hash_create_ptrhash( 4283247Sgjelinek "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object, 4293247Sgjelinek sizeof (struct anon_map)); 4303247Sgjelinek vmu_data.vmu_projects_col_hash = mod_hash_create_idhash( 4313247Sgjelinek "vmusage collapsed project hash", VMUSAGE_HASH_SIZE, 4323247Sgjelinek vmu_free_entity); 4333247Sgjelinek vmu_data.vmu_rusers_col_hash = mod_hash_create_idhash( 4343247Sgjelinek "vmusage collapsed ruser hash", VMUSAGE_HASH_SIZE, 4353247Sgjelinek vmu_free_entity); 4363247Sgjelinek vmu_data.vmu_eusers_col_hash = mod_hash_create_idhash( 4373247Sgjelinek "vmusage collpased euser hash", VMUSAGE_HASH_SIZE, 4383247Sgjelinek vmu_free_entity); 4393247Sgjelinek vmu_data.vmu_zones_hash = mod_hash_create_idhash( 4403247Sgjelinek "vmusage zone hash", VMUSAGE_HASH_SIZE, vmu_free_zone); 4413247Sgjelinek 4423247Sgjelinek vmu_bound_cache = kmem_cache_create("vmu_bound_cache", 4433247Sgjelinek sizeof (vmu_bound_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 4443247Sgjelinek vmu_object_cache = kmem_cache_create("vmu_object_cache", 4453247Sgjelinek sizeof (vmu_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 4463247Sgjelinek 4473247Sgjelinek vmu_data.vmu_entities = NULL; 4483247Sgjelinek vmu_data.vmu_nentities = 0; 4493247Sgjelinek 4503247Sgjelinek vmu_data.vmu_cache = NULL; 4513247Sgjelinek vmu_data.vmu_calc_thread = NULL; 4523247Sgjelinek vmu_data.vmu_calc_flags = 0; 4533247Sgjelinek vmu_data.vmu_pending_flags = 0; 4543247Sgjelinek vmu_data.vmu_pending_waiters = 0; 4553247Sgjelinek } 4563247Sgjelinek 4573247Sgjelinek /* 4583247Sgjelinek * Allocate hashes for tracking vm objects visited for an entity. 4593247Sgjelinek * Update list of entities. 4603247Sgjelinek */ 4613247Sgjelinek static vmu_entity_t * 4623247Sgjelinek vmu_alloc_entity(id_t id, int type, id_t zoneid) 4633247Sgjelinek { 4643247Sgjelinek vmu_entity_t *entity; 4653247Sgjelinek 4663247Sgjelinek if (vmu_data.vmu_free_entities != NULL) { 4673247Sgjelinek entity = vmu_data.vmu_free_entities; 4683247Sgjelinek vmu_data.vmu_free_entities = 4693247Sgjelinek vmu_data.vmu_free_entities->vme_next; 4703247Sgjelinek bzero(&entity->vme_result, sizeof (vmusage_t)); 4713247Sgjelinek } else { 4723247Sgjelinek entity = kmem_zalloc(sizeof (vmu_entity_t), KM_SLEEP); 4733247Sgjelinek } 4743247Sgjelinek entity->vme_result.vmu_id = id; 4753247Sgjelinek entity->vme_result.vmu_zoneid = zoneid; 4763247Sgjelinek entity->vme_result.vmu_type = type; 4773247Sgjelinek 4783247Sgjelinek if (entity->vme_vnode_hash == NULL) 4793247Sgjelinek entity->vme_vnode_hash = mod_hash_create_ptrhash( 4803247Sgjelinek "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object, 4813247Sgjelinek sizeof (vnode_t)); 4823247Sgjelinek 4833247Sgjelinek if (entity->vme_amp_hash == NULL) 4843247Sgjelinek entity->vme_amp_hash = mod_hash_create_ptrhash( 4853247Sgjelinek "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object, 4863247Sgjelinek sizeof (struct anon_map)); 4873247Sgjelinek 4883247Sgjelinek if (entity->vme_anon_hash == NULL) 4893247Sgjelinek entity->vme_anon_hash = mod_hash_create_ptrhash( 4903247Sgjelinek "vmusage anon hash", VMUSAGE_HASH_SIZE, 4913247Sgjelinek mod_hash_null_valdtor, sizeof (struct anon)); 4923247Sgjelinek 4933247Sgjelinek entity->vme_next = vmu_data.vmu_entities; 4943247Sgjelinek vmu_data.vmu_entities = entity; 4953247Sgjelinek vmu_data.vmu_nentities++; 4963247Sgjelinek 4973247Sgjelinek return (entity); 4983247Sgjelinek } 4993247Sgjelinek 5003247Sgjelinek /* 5013247Sgjelinek * Allocate a zone entity, and hashes for tracking visited vm objects 5023247Sgjelinek * for projects, tasks, and users within that zone. 5033247Sgjelinek */ 5043247Sgjelinek static vmu_zone_t * 5053247Sgjelinek vmu_alloc_zone(id_t id) 5063247Sgjelinek { 5073247Sgjelinek vmu_zone_t *zone; 5083247Sgjelinek 5093247Sgjelinek if (vmu_data.vmu_free_zones != NULL) { 5103247Sgjelinek zone = vmu_data.vmu_free_zones; 5113247Sgjelinek vmu_data.vmu_free_zones = 5123247Sgjelinek vmu_data.vmu_free_zones->vmz_next; 5133247Sgjelinek zone->vmz_next = NULL; 5143247Sgjelinek zone->vmz_zone = NULL; 5153247Sgjelinek } else { 5163247Sgjelinek zone = kmem_zalloc(sizeof (vmu_zone_t), KM_SLEEP); 5173247Sgjelinek } 5183247Sgjelinek 5193247Sgjelinek zone->vmz_id = id; 5203247Sgjelinek 5213247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES)) != 0) 5223247Sgjelinek zone->vmz_zone = vmu_alloc_entity(id, VMUSAGE_ZONE, id); 5233247Sgjelinek 5243247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_PROJECTS | 5253247Sgjelinek VMUSAGE_ALL_PROJECTS)) != 0 && zone->vmz_projects_hash == NULL) 5263247Sgjelinek zone->vmz_projects_hash = mod_hash_create_idhash( 5273247Sgjelinek "vmusage project hash", VMUSAGE_HASH_SIZE, vmu_free_entity); 5283247Sgjelinek 5293247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS)) 5303247Sgjelinek != 0 && zone->vmz_tasks_hash == NULL) 5313247Sgjelinek zone->vmz_tasks_hash = mod_hash_create_idhash( 5323247Sgjelinek "vmusage task hash", VMUSAGE_HASH_SIZE, vmu_free_entity); 5333247Sgjelinek 5343247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS)) 5353247Sgjelinek != 0 && zone->vmz_rusers_hash == NULL) 5363247Sgjelinek zone->vmz_rusers_hash = mod_hash_create_idhash( 5373247Sgjelinek "vmusage ruser hash", VMUSAGE_HASH_SIZE, vmu_free_entity); 5383247Sgjelinek 5393247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS)) 5403247Sgjelinek != 0 && zone->vmz_eusers_hash == NULL) 5413247Sgjelinek zone->vmz_eusers_hash = mod_hash_create_idhash( 5423247Sgjelinek "vmusage euser hash", VMUSAGE_HASH_SIZE, vmu_free_entity); 5433247Sgjelinek 5443247Sgjelinek return (zone); 5453247Sgjelinek } 5463247Sgjelinek 5473247Sgjelinek /* 5483247Sgjelinek * Allocate a structure for tracking visited bounds for a vm object. 5493247Sgjelinek */ 5503247Sgjelinek static vmu_object_t * 5513247Sgjelinek vmu_alloc_object(caddr_t key, int type) 5523247Sgjelinek { 5533247Sgjelinek vmu_object_t *object; 5543247Sgjelinek 5553247Sgjelinek if (vmu_data.vmu_free_objects != NULL) { 5563247Sgjelinek object = vmu_data.vmu_free_objects; 5573247Sgjelinek vmu_data.vmu_free_objects = 5583247Sgjelinek vmu_data.vmu_free_objects->vmo_next; 5593247Sgjelinek } else { 5603247Sgjelinek object = kmem_cache_alloc(vmu_object_cache, KM_SLEEP); 5613247Sgjelinek } 5623247Sgjelinek 563*10093SPeter.Telford@Sun.COM object->vmo_next = NULL; 5643247Sgjelinek object->vmo_key = key; 5653247Sgjelinek object->vmo_type = type; 566*10093SPeter.Telford@Sun.COM avl_create(&(object->vmo_bounds), bounds_cmp, sizeof (vmu_bound_t), 0); 5673247Sgjelinek 5683247Sgjelinek return (object); 5693247Sgjelinek } 5703247Sgjelinek 5713247Sgjelinek /* 5723247Sgjelinek * Allocate and return a bound structure. 5733247Sgjelinek */ 5743247Sgjelinek static vmu_bound_t * 5753247Sgjelinek vmu_alloc_bound() 5763247Sgjelinek { 5773247Sgjelinek vmu_bound_t *bound; 5783247Sgjelinek 5793247Sgjelinek if (vmu_data.vmu_free_bounds != NULL) { 5803247Sgjelinek bound = vmu_data.vmu_free_bounds; 5813247Sgjelinek vmu_data.vmu_free_bounds = 5823247Sgjelinek vmu_data.vmu_free_bounds->vmb_next; 5833247Sgjelinek } else { 5843247Sgjelinek bound = kmem_cache_alloc(vmu_bound_cache, KM_SLEEP); 5853247Sgjelinek } 586*10093SPeter.Telford@Sun.COM 587*10093SPeter.Telford@Sun.COM bound->vmb_next = NULL; 588*10093SPeter.Telford@Sun.COM bound->vmb_start = 0; 589*10093SPeter.Telford@Sun.COM bound->vmb_end = 0; 590*10093SPeter.Telford@Sun.COM bound->vmb_type = 0; 5913247Sgjelinek return (bound); 5923247Sgjelinek } 5933247Sgjelinek 5943247Sgjelinek /* 5953247Sgjelinek * vmu_find_insert_* functions implement hash lookup or allocate and 5963247Sgjelinek * insert operations. 5973247Sgjelinek */ 5983247Sgjelinek static vmu_object_t * 5993247Sgjelinek vmu_find_insert_object(mod_hash_t *hash, caddr_t key, uint_t type) 6003247Sgjelinek { 6013247Sgjelinek int ret; 6023247Sgjelinek vmu_object_t *object; 6033247Sgjelinek 6043247Sgjelinek ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key, 6053247Sgjelinek (mod_hash_val_t *)&object); 6063247Sgjelinek if (ret != 0) { 6073247Sgjelinek object = vmu_alloc_object(key, type); 6083247Sgjelinek ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key, 6093247Sgjelinek (mod_hash_val_t)object, (mod_hash_hndl_t)0); 6103247Sgjelinek ASSERT(ret == 0); 6113247Sgjelinek } 6123247Sgjelinek return (object); 6133247Sgjelinek } 6143247Sgjelinek 6153247Sgjelinek static int 6163247Sgjelinek vmu_find_insert_anon(mod_hash_t *hash, caddr_t key) 6173247Sgjelinek { 6183247Sgjelinek int ret; 6193247Sgjelinek caddr_t val; 6203247Sgjelinek 6213247Sgjelinek ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key, 6223247Sgjelinek (mod_hash_val_t *)&val); 6233247Sgjelinek 6243247Sgjelinek if (ret == 0) 6253247Sgjelinek return (0); 6263247Sgjelinek 6273247Sgjelinek ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key, 6283247Sgjelinek (mod_hash_val_t)key, (mod_hash_hndl_t)0); 6293247Sgjelinek 6303247Sgjelinek ASSERT(ret == 0); 6313247Sgjelinek 6323247Sgjelinek return (1); 6333247Sgjelinek } 6343247Sgjelinek 6353247Sgjelinek static vmu_entity_t * 6363247Sgjelinek vmu_find_insert_entity(mod_hash_t *hash, id_t id, uint_t type, id_t zoneid) 6373247Sgjelinek { 6383247Sgjelinek int ret; 6393247Sgjelinek vmu_entity_t *entity; 6403247Sgjelinek 6413247Sgjelinek ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)(uintptr_t)id, 6423247Sgjelinek (mod_hash_val_t *)&entity); 6433247Sgjelinek if (ret != 0) { 6443247Sgjelinek entity = vmu_alloc_entity(id, type, zoneid); 6453247Sgjelinek ret = i_mod_hash_insert_nosync(hash, 6463247Sgjelinek (mod_hash_key_t)(uintptr_t)id, (mod_hash_val_t)entity, 6473247Sgjelinek (mod_hash_hndl_t)0); 6483247Sgjelinek ASSERT(ret == 0); 6493247Sgjelinek } 6503247Sgjelinek return (entity); 6513247Sgjelinek } 6523247Sgjelinek 6533247Sgjelinek 6543247Sgjelinek 6553247Sgjelinek 6563247Sgjelinek /* 6573247Sgjelinek * Returns list of object bounds between start and end. New bounds inserted 6583247Sgjelinek * by this call are given type. 6593247Sgjelinek * 6603247Sgjelinek * Returns the number of pages covered if new bounds are created. Returns 0 6613247Sgjelinek * if region between start/end consists of all existing bounds. 6623247Sgjelinek */ 6633247Sgjelinek static pgcnt_t 6643247Sgjelinek vmu_insert_lookup_object_bounds(vmu_object_t *ro, pgcnt_t start, pgcnt_t 6653247Sgjelinek end, char type, vmu_bound_t **first, vmu_bound_t **last) 6663247Sgjelinek { 667*10093SPeter.Telford@Sun.COM avl_tree_t *tree = &(ro->vmo_bounds); 668*10093SPeter.Telford@Sun.COM avl_index_t where; 669*10093SPeter.Telford@Sun.COM vmu_bound_t *walker, *tmp; 670*10093SPeter.Telford@Sun.COM pgcnt_t ret = 0; 671*10093SPeter.Telford@Sun.COM 672*10093SPeter.Telford@Sun.COM ASSERT(start <= end); 6733247Sgjelinek 6743247Sgjelinek *first = *last = NULL; 6753247Sgjelinek 676*10093SPeter.Telford@Sun.COM tmp = vmu_alloc_bound(); 677*10093SPeter.Telford@Sun.COM tmp->vmb_start = start; 678*10093SPeter.Telford@Sun.COM tmp->vmb_type = type; 679*10093SPeter.Telford@Sun.COM 680*10093SPeter.Telford@Sun.COM /* Hopelessly optimistic case. */ 681*10093SPeter.Telford@Sun.COM if (walker = avl_find(tree, tmp, &where)) { 682*10093SPeter.Telford@Sun.COM /* We got lucky. */ 683*10093SPeter.Telford@Sun.COM vmu_free_bound(tmp); 684*10093SPeter.Telford@Sun.COM *first = walker; 685*10093SPeter.Telford@Sun.COM } 686*10093SPeter.Telford@Sun.COM 687*10093SPeter.Telford@Sun.COM if (walker == NULL) { 688*10093SPeter.Telford@Sun.COM /* Is start in the previous node? */ 689*10093SPeter.Telford@Sun.COM walker = avl_nearest(tree, where, AVL_BEFORE); 690*10093SPeter.Telford@Sun.COM if (walker != NULL) { 691*10093SPeter.Telford@Sun.COM if (ISWITHIN(walker, start)) { 692*10093SPeter.Telford@Sun.COM /* We found start. */ 693*10093SPeter.Telford@Sun.COM vmu_free_bound(tmp); 694*10093SPeter.Telford@Sun.COM *first = walker; 695*10093SPeter.Telford@Sun.COM } 6963247Sgjelinek } 6973247Sgjelinek } 6983247Sgjelinek 699*10093SPeter.Telford@Sun.COM /* 700*10093SPeter.Telford@Sun.COM * At this point, if *first is still NULL, then we 701*10093SPeter.Telford@Sun.COM * didn't get a direct hit and start isn't covered 702*10093SPeter.Telford@Sun.COM * by the previous node. We know that the next node 703*10093SPeter.Telford@Sun.COM * must have a greater start value than we require 704*10093SPeter.Telford@Sun.COM * because avl_find tells us where the AVL routines would 705*10093SPeter.Telford@Sun.COM * insert our new node. We have some gap between the 706*10093SPeter.Telford@Sun.COM * start we want and the next node. 707*10093SPeter.Telford@Sun.COM */ 7083247Sgjelinek if (*first == NULL) { 709*10093SPeter.Telford@Sun.COM walker = avl_nearest(tree, where, AVL_AFTER); 710*10093SPeter.Telford@Sun.COM if (walker != NULL && walker->vmb_start <= end) { 711*10093SPeter.Telford@Sun.COM /* Fill the gap. */ 712*10093SPeter.Telford@Sun.COM tmp->vmb_end = walker->vmb_start - 1; 713*10093SPeter.Telford@Sun.COM *first = tmp; 7143247Sgjelinek } else { 715*10093SPeter.Telford@Sun.COM /* We have a gap over [start, end]. */ 716*10093SPeter.Telford@Sun.COM tmp->vmb_end = end; 717*10093SPeter.Telford@Sun.COM *first = *last = tmp; 7183247Sgjelinek } 719*10093SPeter.Telford@Sun.COM ret += tmp->vmb_end - tmp->vmb_start + 1; 720*10093SPeter.Telford@Sun.COM avl_insert(tree, tmp, where); 721*10093SPeter.Telford@Sun.COM } 722*10093SPeter.Telford@Sun.COM 723*10093SPeter.Telford@Sun.COM ASSERT(*first != NULL); 724*10093SPeter.Telford@Sun.COM 725*10093SPeter.Telford@Sun.COM if (*last != NULL) { 726*10093SPeter.Telford@Sun.COM /* We're done. */ 7273247Sgjelinek return (ret); 7283247Sgjelinek } 7293247Sgjelinek 7303247Sgjelinek /* 731*10093SPeter.Telford@Sun.COM * If we are here we still need to set *last and 732*10093SPeter.Telford@Sun.COM * that may involve filling in some gaps. 7333247Sgjelinek */ 734*10093SPeter.Telford@Sun.COM *last = *first; 735*10093SPeter.Telford@Sun.COM for (;;) { 736*10093SPeter.Telford@Sun.COM if (ISWITHIN(*last, end)) { 737*10093SPeter.Telford@Sun.COM /* We're done. */ 738*10093SPeter.Telford@Sun.COM break; 7393247Sgjelinek } 740*10093SPeter.Telford@Sun.COM walker = AVL_NEXT(tree, *last); 741*10093SPeter.Telford@Sun.COM if (walker == NULL || walker->vmb_start > end) { 742*10093SPeter.Telford@Sun.COM /* Bottom or mid tree with gap. */ 743*10093SPeter.Telford@Sun.COM tmp = vmu_alloc_bound(); 744*10093SPeter.Telford@Sun.COM tmp->vmb_start = (*last)->vmb_end + 1; 745*10093SPeter.Telford@Sun.COM tmp->vmb_end = end; 7463247Sgjelinek ret += tmp->vmb_end - tmp->vmb_start + 1; 747*10093SPeter.Telford@Sun.COM avl_insert_here(tree, tmp, *last, AVL_AFTER); 7483247Sgjelinek *last = tmp; 7493247Sgjelinek break; 750*10093SPeter.Telford@Sun.COM } else { 751*10093SPeter.Telford@Sun.COM if ((*last)->vmb_end + 1 != walker->vmb_start) { 752*10093SPeter.Telford@Sun.COM /* Non-contiguous. */ 753*10093SPeter.Telford@Sun.COM tmp = vmu_alloc_bound(); 754*10093SPeter.Telford@Sun.COM tmp->vmb_start = (*last)->vmb_end + 1; 755*10093SPeter.Telford@Sun.COM tmp->vmb_end = walker->vmb_start - 1; 756*10093SPeter.Telford@Sun.COM ret += tmp->vmb_end - tmp->vmb_start + 1; 757*10093SPeter.Telford@Sun.COM avl_insert_here(tree, tmp, *last, AVL_AFTER); 758*10093SPeter.Telford@Sun.COM *last = tmp; 759*10093SPeter.Telford@Sun.COM } else { 760*10093SPeter.Telford@Sun.COM *last = walker; 761*10093SPeter.Telford@Sun.COM } 7623247Sgjelinek } 7633247Sgjelinek } 764*10093SPeter.Telford@Sun.COM 7653247Sgjelinek return (ret); 7663247Sgjelinek } 7673247Sgjelinek 7683247Sgjelinek /* 7693247Sgjelinek * vmu_update_bounds() 7703247Sgjelinek * 771*10093SPeter.Telford@Sun.COM * tree: avl_tree in which first and last hang. 772*10093SPeter.Telford@Sun.COM * 7733247Sgjelinek * first, last: list of continuous bounds, of which zero or more are of 7743247Sgjelinek * type VMUSAGE_BOUND_UNKNOWN. 7753247Sgjelinek * 776*10093SPeter.Telford@Sun.COM * new_tree: avl_tree in which new_first and new_last hang. 777*10093SPeter.Telford@Sun.COM * 7783247Sgjelinek * new_first, new_last: list of continuous bounds, of which none are of 7793247Sgjelinek * type VMUSAGE_BOUND_UNKNOWN. These bounds are used to 7803247Sgjelinek * update the types of bounds in (first,last) with 7813247Sgjelinek * type VMUSAGE_BOUND_UNKNOWN. 7823247Sgjelinek * 7833247Sgjelinek * For the list of bounds (first,last), this function updates any bounds 7843247Sgjelinek * with type VMUSAGE_BOUND_UNKNOWN using the type of the corresponding bound in 7853247Sgjelinek * the list (new_first, new_last). 7863247Sgjelinek * 7873247Sgjelinek * If a bound of type VMUSAGE_BOUND_UNKNOWN spans multiple bounds in the list 7883247Sgjelinek * (new_first, new_last), it will be split into multiple bounds. 7893247Sgjelinek * 7903247Sgjelinek * Return value: 7913247Sgjelinek * The number of pages in the list of bounds (first,last) that were of 7923247Sgjelinek * type VMUSAGE_BOUND_UNKNOWN, which have been updated to be of type 7933247Sgjelinek * VMUSAGE_BOUND_INCORE. 7943247Sgjelinek * 7953247Sgjelinek */ 7963247Sgjelinek static pgcnt_t 797*10093SPeter.Telford@Sun.COM vmu_update_bounds(avl_tree_t *tree, vmu_bound_t **first, vmu_bound_t **last, 798*10093SPeter.Telford@Sun.COM avl_tree_t *new_tree, vmu_bound_t *new_first, vmu_bound_t *new_last) 7993247Sgjelinek { 8003247Sgjelinek vmu_bound_t *next, *new_next, *tmp; 8013247Sgjelinek pgcnt_t rss = 0; 8023247Sgjelinek 8033247Sgjelinek next = *first; 8043247Sgjelinek new_next = new_first; 8053247Sgjelinek 8063671Ssl108498 /* 8073671Ssl108498 * Verify first and last bound are covered by new bounds if they 8083671Ssl108498 * have unknown type. 8093671Ssl108498 */ 8103671Ssl108498 ASSERT((*first)->vmb_type != VMUSAGE_BOUND_UNKNOWN || 811*10093SPeter.Telford@Sun.COM (*first)->vmb_start >= new_first->vmb_start); 8123671Ssl108498 ASSERT((*last)->vmb_type != VMUSAGE_BOUND_UNKNOWN || 8133671Ssl108498 (*last)->vmb_end <= new_last->vmb_end); 8143247Sgjelinek for (;;) { 815*10093SPeter.Telford@Sun.COM /* If bound already has type, proceed to next bound. */ 8163247Sgjelinek if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) { 8173247Sgjelinek if (next == *last) 8183247Sgjelinek break; 819*10093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next); 8203247Sgjelinek continue; 8213247Sgjelinek } 8223247Sgjelinek while (new_next->vmb_end < next->vmb_start) 823*10093SPeter.Telford@Sun.COM new_next = AVL_NEXT(new_tree, new_next); 8243247Sgjelinek ASSERT(new_next->vmb_type != VMUSAGE_BOUND_UNKNOWN); 8253247Sgjelinek next->vmb_type = new_next->vmb_type; 8263247Sgjelinek if (new_next->vmb_end < next->vmb_end) { 8273247Sgjelinek /* need to split bound */ 8283247Sgjelinek tmp = vmu_alloc_bound(); 8293247Sgjelinek tmp->vmb_type = VMUSAGE_BOUND_UNKNOWN; 8303247Sgjelinek tmp->vmb_start = new_next->vmb_end + 1; 8313247Sgjelinek tmp->vmb_end = next->vmb_end; 832*10093SPeter.Telford@Sun.COM avl_insert_here(tree, tmp, next, AVL_AFTER); 8333247Sgjelinek next->vmb_end = new_next->vmb_end; 8343247Sgjelinek if (*last == next) 8353247Sgjelinek *last = tmp; 8363247Sgjelinek if (next->vmb_type == VMUSAGE_BOUND_INCORE) 8373247Sgjelinek rss += next->vmb_end - next->vmb_start + 1; 8383247Sgjelinek next = tmp; 8393247Sgjelinek } else { 8403247Sgjelinek if (next->vmb_type == VMUSAGE_BOUND_INCORE) 8413247Sgjelinek rss += next->vmb_end - next->vmb_start + 1; 8423247Sgjelinek if (next == *last) 8433247Sgjelinek break; 844*10093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next); 8453247Sgjelinek } 8463247Sgjelinek } 8473247Sgjelinek return (rss); 8483247Sgjelinek } 8493247Sgjelinek 8503247Sgjelinek /* 851*10093SPeter.Telford@Sun.COM * Merges adjacent bounds with same type between first and last bound. 8523247Sgjelinek * After merge, last pointer is no longer valid, as last bound may be 8533247Sgjelinek * merged away. 8543247Sgjelinek */ 8553247Sgjelinek static void 856*10093SPeter.Telford@Sun.COM vmu_merge_bounds(avl_tree_t *tree, vmu_bound_t **first, vmu_bound_t **last) 8573247Sgjelinek { 858*10093SPeter.Telford@Sun.COM vmu_bound_t *current; 8593247Sgjelinek vmu_bound_t *next; 8603247Sgjelinek 861*10093SPeter.Telford@Sun.COM ASSERT(tree != NULL); 8623247Sgjelinek ASSERT(*first != NULL); 8633247Sgjelinek ASSERT(*last != NULL); 8643247Sgjelinek 865*10093SPeter.Telford@Sun.COM current = *first; 866*10093SPeter.Telford@Sun.COM while (current != *last) { 867*10093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, current); 868*10093SPeter.Telford@Sun.COM if ((current->vmb_end + 1) == next->vmb_start && 869*10093SPeter.Telford@Sun.COM current->vmb_type == next->vmb_type) { 870*10093SPeter.Telford@Sun.COM current->vmb_end = next->vmb_end; 871*10093SPeter.Telford@Sun.COM avl_remove(tree, next); 872*10093SPeter.Telford@Sun.COM vmu_free_bound(next); 873*10093SPeter.Telford@Sun.COM if (next == *last) { 874*10093SPeter.Telford@Sun.COM break; 875*10093SPeter.Telford@Sun.COM } 8763247Sgjelinek } 877*10093SPeter.Telford@Sun.COM current = AVL_NEXT(tree, current); 8783247Sgjelinek } 8793247Sgjelinek } 8803247Sgjelinek 8813247Sgjelinek /* 8823247Sgjelinek * Given an amp and a list of bounds, updates each bound's type with 8833247Sgjelinek * VMUSAGE_BOUND_INCORE or VMUSAGE_BOUND_NOT_INCORE. 8843247Sgjelinek * 8853247Sgjelinek * If a bound is partially incore, it will be split into two bounds. 8863247Sgjelinek * first and last may be modified, as bounds may be split into multiple 887*10093SPeter.Telford@Sun.COM * bounds if they are partially incore/not-incore. 8883247Sgjelinek * 889*10093SPeter.Telford@Sun.COM * Set incore to non-zero if bounds are already known to be incore. 8903247Sgjelinek * 8913247Sgjelinek */ 8923247Sgjelinek static void 893*10093SPeter.Telford@Sun.COM vmu_amp_update_incore_bounds(avl_tree_t *tree, struct anon_map *amp, 894*10093SPeter.Telford@Sun.COM vmu_bound_t **first, vmu_bound_t **last, boolean_t incore) 8953247Sgjelinek { 8963247Sgjelinek vmu_bound_t *next; 8973247Sgjelinek vmu_bound_t *tmp; 8983247Sgjelinek pgcnt_t index; 8993247Sgjelinek short bound_type; 9003247Sgjelinek short page_type; 9013247Sgjelinek vnode_t *vn; 9023247Sgjelinek anoff_t off; 9033247Sgjelinek struct anon *ap; 9043247Sgjelinek 9053247Sgjelinek next = *first; 906*10093SPeter.Telford@Sun.COM /* Shared anon slots don't change once set. */ 9073247Sgjelinek ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 9083247Sgjelinek for (;;) { 9093247Sgjelinek if (incore == B_TRUE) 9103247Sgjelinek next->vmb_type = VMUSAGE_BOUND_INCORE; 9113247Sgjelinek 9123247Sgjelinek if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) { 9133247Sgjelinek if (next == *last) 9143247Sgjelinek break; 915*10093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next); 9163247Sgjelinek continue; 9173247Sgjelinek } 9183247Sgjelinek bound_type = next->vmb_type; 9193247Sgjelinek index = next->vmb_start; 9203247Sgjelinek while (index <= next->vmb_end) { 9213247Sgjelinek 9223247Sgjelinek /* 9233247Sgjelinek * These are used to determine how much to increment 9243247Sgjelinek * index when a large page is found. 9253247Sgjelinek */ 9263247Sgjelinek page_t *page; 9273247Sgjelinek pgcnt_t pgcnt = 1; 9283247Sgjelinek uint_t pgshft; 9293247Sgjelinek pgcnt_t pgmsk; 9303247Sgjelinek 9313247Sgjelinek ap = anon_get_ptr(amp->ahp, index); 9323247Sgjelinek if (ap != NULL) 9333247Sgjelinek swap_xlate(ap, &vn, &off); 9343247Sgjelinek 9353247Sgjelinek if (ap != NULL && vn != NULL && vn->v_pages != NULL && 9363247Sgjelinek (page = page_exists(vn, off)) != NULL) { 9373247Sgjelinek page_type = VMUSAGE_BOUND_INCORE; 9383247Sgjelinek if (page->p_szc > 0) { 9393247Sgjelinek pgcnt = page_get_pagecnt(page->p_szc); 9403247Sgjelinek pgshft = page_get_shift(page->p_szc); 9413247Sgjelinek pgmsk = (0x1 << (pgshft - PAGESHIFT)) 9423247Sgjelinek - 1; 9433247Sgjelinek } 9443247Sgjelinek } else { 9453247Sgjelinek page_type = VMUSAGE_BOUND_NOT_INCORE; 9463247Sgjelinek } 9473247Sgjelinek if (bound_type == VMUSAGE_BOUND_UNKNOWN) { 9483247Sgjelinek next->vmb_type = page_type; 9493247Sgjelinek } else if (next->vmb_type != page_type) { 9503247Sgjelinek /* 951*10093SPeter.Telford@Sun.COM * If current bound type does not match page 9523247Sgjelinek * type, need to split off new bound. 9533247Sgjelinek */ 9543247Sgjelinek tmp = vmu_alloc_bound(); 9553247Sgjelinek tmp->vmb_type = page_type; 9563247Sgjelinek tmp->vmb_start = index; 9573247Sgjelinek tmp->vmb_end = next->vmb_end; 958*10093SPeter.Telford@Sun.COM avl_insert_here(tree, tmp, next, AVL_AFTER); 9593247Sgjelinek next->vmb_end = index - 1; 9603247Sgjelinek if (*last == next) 9613247Sgjelinek *last = tmp; 9623247Sgjelinek next = tmp; 9633247Sgjelinek } 9643247Sgjelinek if (pgcnt > 1) { 9653247Sgjelinek /* 9663247Sgjelinek * If inside large page, jump to next large 9673247Sgjelinek * page 9683247Sgjelinek */ 9693247Sgjelinek index = (index & ~pgmsk) + pgcnt; 9703247Sgjelinek } else { 9713247Sgjelinek index++; 9723247Sgjelinek } 9733247Sgjelinek } 9743247Sgjelinek if (next == *last) { 9753247Sgjelinek ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN); 9763247Sgjelinek break; 9773247Sgjelinek } else 978*10093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next); 9793247Sgjelinek } 9803247Sgjelinek ANON_LOCK_EXIT(&->a_rwlock); 9813247Sgjelinek } 9823247Sgjelinek 9833247Sgjelinek /* 9843247Sgjelinek * Same as vmu_amp_update_incore_bounds(), except for tracking 9853247Sgjelinek * incore-/not-incore for vnodes. 9863247Sgjelinek */ 9873247Sgjelinek static void 988*10093SPeter.Telford@Sun.COM vmu_vnode_update_incore_bounds(avl_tree_t *tree, vnode_t *vnode, 989*10093SPeter.Telford@Sun.COM vmu_bound_t **first, vmu_bound_t **last) 9903247Sgjelinek { 9913247Sgjelinek vmu_bound_t *next; 9923247Sgjelinek vmu_bound_t *tmp; 9933247Sgjelinek pgcnt_t index; 9943247Sgjelinek short bound_type; 9953247Sgjelinek short page_type; 9963247Sgjelinek 9973247Sgjelinek next = *first; 9983247Sgjelinek for (;;) { 9993247Sgjelinek if (vnode->v_pages == NULL) 10003247Sgjelinek next->vmb_type = VMUSAGE_BOUND_NOT_INCORE; 10013247Sgjelinek 10023247Sgjelinek if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) { 10033247Sgjelinek if (next == *last) 10043247Sgjelinek break; 1005*10093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next); 10063247Sgjelinek continue; 10073247Sgjelinek } 10083247Sgjelinek 10093247Sgjelinek bound_type = next->vmb_type; 10103247Sgjelinek index = next->vmb_start; 10113247Sgjelinek while (index <= next->vmb_end) { 10123247Sgjelinek 10133247Sgjelinek /* 10143247Sgjelinek * These are used to determine how much to increment 10153247Sgjelinek * index when a large page is found. 10163247Sgjelinek */ 10173247Sgjelinek page_t *page; 10183247Sgjelinek pgcnt_t pgcnt = 1; 10193247Sgjelinek uint_t pgshft; 10203247Sgjelinek pgcnt_t pgmsk; 10213247Sgjelinek 10223247Sgjelinek if (vnode->v_pages != NULL && 10233247Sgjelinek (page = page_exists(vnode, ptob(index))) != NULL) { 10243247Sgjelinek page_type = VMUSAGE_BOUND_INCORE; 10253247Sgjelinek if (page->p_szc > 0) { 10263247Sgjelinek pgcnt = page_get_pagecnt(page->p_szc); 10273247Sgjelinek pgshft = page_get_shift(page->p_szc); 10283247Sgjelinek pgmsk = (0x1 << (pgshft - PAGESHIFT)) 10293247Sgjelinek - 1; 10303247Sgjelinek } 10313247Sgjelinek } else { 10323247Sgjelinek page_type = VMUSAGE_BOUND_NOT_INCORE; 10333247Sgjelinek } 10343247Sgjelinek if (bound_type == VMUSAGE_BOUND_UNKNOWN) { 10353247Sgjelinek next->vmb_type = page_type; 10363247Sgjelinek } else if (next->vmb_type != page_type) { 10373247Sgjelinek /* 1038*10093SPeter.Telford@Sun.COM * If current bound type does not match page 10393247Sgjelinek * type, need to split off new bound. 10403247Sgjelinek */ 10413247Sgjelinek tmp = vmu_alloc_bound(); 10423247Sgjelinek tmp->vmb_type = page_type; 10433247Sgjelinek tmp->vmb_start = index; 10443247Sgjelinek tmp->vmb_end = next->vmb_end; 1045*10093SPeter.Telford@Sun.COM avl_insert_here(tree, tmp, next, AVL_AFTER); 10463247Sgjelinek next->vmb_end = index - 1; 10473247Sgjelinek if (*last == next) 10483247Sgjelinek *last = tmp; 10493247Sgjelinek next = tmp; 10503247Sgjelinek } 10513247Sgjelinek if (pgcnt > 1) { 10523247Sgjelinek /* 10533247Sgjelinek * If inside large page, jump to next large 10543247Sgjelinek * page 10553247Sgjelinek */ 10563247Sgjelinek index = (index & ~pgmsk) + pgcnt; 10573247Sgjelinek } else { 10583247Sgjelinek index++; 10593247Sgjelinek } 10603247Sgjelinek } 10613247Sgjelinek if (next == *last) { 10623247Sgjelinek ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN); 10633247Sgjelinek break; 10643247Sgjelinek } else 1065*10093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next); 10663247Sgjelinek } 10673247Sgjelinek } 10683247Sgjelinek 10693247Sgjelinek /* 10703247Sgjelinek * Calculate the rss and swap consumed by a segment. vmu_entities is the 10713247Sgjelinek * list of entities to visit. For shared segments, the vnode or amp 1072*10093SPeter.Telford@Sun.COM * is looked up in each entity to see if it has been already counted. Private 1073*10093SPeter.Telford@Sun.COM * anon pages are checked per entity to ensure that COW pages are not 10743247Sgjelinek * double counted. 10753247Sgjelinek * 10763247Sgjelinek * For private mapped files, first the amp is checked for private pages. 10773247Sgjelinek * Bounds not backed by the amp are looked up in the vnode for each entity 10783247Sgjelinek * to avoid double counting of private COW vnode pages. 10793247Sgjelinek */ 10803247Sgjelinek static void 10813247Sgjelinek vmu_calculate_seg(vmu_entity_t *vmu_entities, struct seg *seg) 10823247Sgjelinek { 10833247Sgjelinek struct segvn_data *svd; 10843247Sgjelinek struct shm_data *shmd; 10853247Sgjelinek struct spt_data *sptd; 10863247Sgjelinek vmu_object_t *shared_object = NULL; 10873247Sgjelinek vmu_object_t *entity_object = NULL; 10883247Sgjelinek vmu_entity_t *entity; 10893247Sgjelinek vmusage_t *result; 1090*10093SPeter.Telford@Sun.COM avl_tree_t *tree; 10913247Sgjelinek vmu_bound_t *first = NULL; 10923247Sgjelinek vmu_bound_t *last = NULL; 10933247Sgjelinek vmu_bound_t *cur = NULL; 10943247Sgjelinek vmu_bound_t *e_first = NULL; 10953247Sgjelinek vmu_bound_t *e_last = NULL; 10963247Sgjelinek vmu_bound_t *tmp; 10973247Sgjelinek pgcnt_t p_index, s_index, p_start, p_end, s_start, s_end, rss, virt; 10983247Sgjelinek struct anon_map *private_amp = NULL; 10993247Sgjelinek boolean_t incore = B_FALSE; 11003247Sgjelinek boolean_t shared = B_FALSE; 11013247Sgjelinek int file = 0; 11023247Sgjelinek pgcnt_t swresv = 0; 11033247Sgjelinek pgcnt_t panon = 0; 11043247Sgjelinek 1105*10093SPeter.Telford@Sun.COM /* Can zero-length segments exist? Not sure, so paranoia. */ 11063247Sgjelinek if (seg->s_size <= 0) 11073247Sgjelinek return; 11083247Sgjelinek 11093247Sgjelinek /* 11103247Sgjelinek * Figure out if there is a shared object (such as a named vnode or 11113247Sgjelinek * a shared amp, then figure out if there is a private amp, which 11123247Sgjelinek * identifies private pages. 11133247Sgjelinek */ 11143247Sgjelinek if (seg->s_ops == &segvn_ops) { 11153247Sgjelinek svd = (struct segvn_data *)seg->s_data; 1116*10093SPeter.Telford@Sun.COM if (svd->type == MAP_SHARED) { 11173247Sgjelinek shared = B_TRUE; 1118*10093SPeter.Telford@Sun.COM } else { 11193247Sgjelinek swresv = svd->swresv; 11203247Sgjelinek 1121*10093SPeter.Telford@Sun.COM if (SEGVN_LOCK_TRYENTER(seg->s_as, &svd->lock, 1122*10093SPeter.Telford@Sun.COM RW_READER) != 0) { 1123*10093SPeter.Telford@Sun.COM /* 1124*10093SPeter.Telford@Sun.COM * Text replication anon maps can be shared 1125*10093SPeter.Telford@Sun.COM * across all zones. Space used for text 1126*10093SPeter.Telford@Sun.COM * replication is typically capped as a small % 1127*10093SPeter.Telford@Sun.COM * of memory. To keep it simple for now we 1128*10093SPeter.Telford@Sun.COM * don't account for swap and memory space used 1129*10093SPeter.Telford@Sun.COM * for text replication. 1130*10093SPeter.Telford@Sun.COM */ 1131*10093SPeter.Telford@Sun.COM if (svd->tr_state == SEGVN_TR_OFF && 1132*10093SPeter.Telford@Sun.COM svd->amp != NULL) { 1133*10093SPeter.Telford@Sun.COM private_amp = svd->amp; 1134*10093SPeter.Telford@Sun.COM p_start = svd->anon_index; 1135*10093SPeter.Telford@Sun.COM p_end = svd->anon_index + 1136*10093SPeter.Telford@Sun.COM btop(seg->s_size) - 1; 1137*10093SPeter.Telford@Sun.COM } 1138*10093SPeter.Telford@Sun.COM SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 1139*10093SPeter.Telford@Sun.COM } 1140*10093SPeter.Telford@Sun.COM } 11413247Sgjelinek if (svd->vp != NULL) { 11423247Sgjelinek file = 1; 11433247Sgjelinek shared_object = vmu_find_insert_object( 11443247Sgjelinek vmu_data.vmu_all_vnodes_hash, (caddr_t)svd->vp, 11453247Sgjelinek VMUSAGE_TYPE_VNODE); 11463247Sgjelinek s_start = btop(svd->offset); 11473247Sgjelinek s_end = btop(svd->offset + seg->s_size) - 1; 11483247Sgjelinek } 11493247Sgjelinek if (svd->amp != NULL && svd->type == MAP_SHARED) { 11503247Sgjelinek ASSERT(shared_object == NULL); 11513247Sgjelinek shared_object = vmu_find_insert_object( 11523247Sgjelinek vmu_data.vmu_all_amps_hash, (caddr_t)svd->amp, 11533247Sgjelinek VMUSAGE_TYPE_AMP); 11543247Sgjelinek s_start = svd->anon_index; 11553247Sgjelinek s_end = svd->anon_index + btop(seg->s_size) - 1; 11563247Sgjelinek /* schedctl mappings are always in core */ 11573247Sgjelinek if (svd->amp->swresv == 0) 11583247Sgjelinek incore = B_TRUE; 11593247Sgjelinek } 11603247Sgjelinek } else if (seg->s_ops == &segspt_shmops) { 11613247Sgjelinek shared = B_TRUE; 11623247Sgjelinek shmd = (struct shm_data *)seg->s_data; 11633247Sgjelinek shared_object = vmu_find_insert_object( 11643247Sgjelinek vmu_data.vmu_all_amps_hash, (caddr_t)shmd->shm_amp, 11653247Sgjelinek VMUSAGE_TYPE_AMP); 11663247Sgjelinek s_start = 0; 11673247Sgjelinek s_end = btop(seg->s_size) - 1; 11683247Sgjelinek sptd = shmd->shm_sptseg->s_data; 11693247Sgjelinek 11703247Sgjelinek /* ism segments are always incore and do not reserve swap */ 11713247Sgjelinek if (sptd->spt_flags & SHM_SHARE_MMU) 11723247Sgjelinek incore = B_TRUE; 11733247Sgjelinek 11743247Sgjelinek } else { 11753247Sgjelinek return; 11763247Sgjelinek } 11773247Sgjelinek 11783247Sgjelinek /* 11793247Sgjelinek * If there is a private amp, count anon pages that exist. If an 1180*10093SPeter.Telford@Sun.COM * anon has a refcnt > 1 (COW sharing), then save the anon in a 11813247Sgjelinek * hash so that it is not double counted. 11823247Sgjelinek * 1183*10093SPeter.Telford@Sun.COM * If there is also a shared object, then figure out the bounds 11843247Sgjelinek * which are not mapped by the private amp. 11853247Sgjelinek */ 11863247Sgjelinek if (private_amp != NULL) { 11873247Sgjelinek 1188*10093SPeter.Telford@Sun.COM /* Enter as writer to prevent COW anons from being freed */ 11893247Sgjelinek ANON_LOCK_ENTER(&private_amp->a_rwlock, RW_WRITER); 11903247Sgjelinek 11913247Sgjelinek p_index = p_start; 11923247Sgjelinek s_index = s_start; 11933247Sgjelinek 11943247Sgjelinek while (p_index <= p_end) { 11953247Sgjelinek 11963247Sgjelinek pgcnt_t p_index_next; 11973247Sgjelinek pgcnt_t p_bound_size; 11983247Sgjelinek int cnt; 11993247Sgjelinek anoff_t off; 12003247Sgjelinek struct vnode *vn; 12013247Sgjelinek struct anon *ap; 12023247Sgjelinek page_t *page; /* For handling of large */ 12033247Sgjelinek pgcnt_t pgcnt = 1; /* pages */ 12043247Sgjelinek pgcnt_t pgstart; 12053247Sgjelinek pgcnt_t pgend; 12063247Sgjelinek uint_t pgshft; 12073247Sgjelinek pgcnt_t pgmsk; 12083247Sgjelinek 12093247Sgjelinek p_index_next = p_index; 12103247Sgjelinek ap = anon_get_next_ptr(private_amp->ahp, 12113247Sgjelinek &p_index_next); 12123247Sgjelinek 12133247Sgjelinek /* 12143247Sgjelinek * If next anon is past end of mapping, simulate 12153247Sgjelinek * end of anon so loop terminates. 12163247Sgjelinek */ 12173247Sgjelinek if (p_index_next > p_end) { 12183247Sgjelinek p_index_next = p_end + 1; 12193247Sgjelinek ap = NULL; 12203247Sgjelinek } 12213247Sgjelinek /* 1222*10093SPeter.Telford@Sun.COM * For COW segments, keep track of bounds not 12233247Sgjelinek * backed by private amp so they can be looked 12243247Sgjelinek * up in the backing vnode 12253247Sgjelinek */ 12263247Sgjelinek if (p_index_next != p_index) { 12273247Sgjelinek 12283247Sgjelinek /* 12293247Sgjelinek * Compute index difference between anon and 12303247Sgjelinek * previous anon. 12313247Sgjelinek */ 12323247Sgjelinek p_bound_size = p_index_next - p_index - 1; 12333247Sgjelinek 12343247Sgjelinek if (shared_object != NULL) { 12353247Sgjelinek cur = vmu_alloc_bound(); 12363247Sgjelinek cur->vmb_start = s_index; 12373247Sgjelinek cur->vmb_end = s_index + p_bound_size; 12383247Sgjelinek cur->vmb_type = VMUSAGE_BOUND_UNKNOWN; 12393247Sgjelinek if (first == NULL) { 12403247Sgjelinek first = cur; 12413247Sgjelinek last = cur; 12423247Sgjelinek } else { 12433247Sgjelinek last->vmb_next = cur; 12443247Sgjelinek last = cur; 12453247Sgjelinek } 12463247Sgjelinek } 12473247Sgjelinek p_index = p_index + p_bound_size + 1; 12483247Sgjelinek s_index = s_index + p_bound_size + 1; 12493247Sgjelinek } 12503247Sgjelinek 12513247Sgjelinek /* Detect end of anons in amp */ 12523247Sgjelinek if (ap == NULL) 12533247Sgjelinek break; 12543247Sgjelinek 12553247Sgjelinek cnt = ap->an_refcnt; 12563247Sgjelinek swap_xlate(ap, &vn, &off); 12573247Sgjelinek 12583247Sgjelinek if (vn == NULL || vn->v_pages == NULL || 12593247Sgjelinek (page = page_exists(vn, off)) == NULL) { 12603247Sgjelinek p_index++; 12613247Sgjelinek s_index++; 12623247Sgjelinek continue; 12633247Sgjelinek } 12643247Sgjelinek 12653247Sgjelinek /* 12663247Sgjelinek * If large page is found, compute portion of large 12673247Sgjelinek * page in mapping, and increment indicies to the next 12683247Sgjelinek * large page. 12693247Sgjelinek */ 12703247Sgjelinek if (page->p_szc > 0) { 12713247Sgjelinek 12723247Sgjelinek pgcnt = page_get_pagecnt(page->p_szc); 12733247Sgjelinek pgshft = page_get_shift(page->p_szc); 12743247Sgjelinek pgmsk = (0x1 << (pgshft - PAGESHIFT)) - 1; 12753247Sgjelinek 12763247Sgjelinek /* First page in large page */ 12773247Sgjelinek pgstart = p_index & ~pgmsk; 12783247Sgjelinek /* Last page in large page */ 12793247Sgjelinek pgend = pgstart + pgcnt - 1; 12803247Sgjelinek /* 12813247Sgjelinek * Artifically end page if page extends past 12823247Sgjelinek * end of mapping. 12833247Sgjelinek */ 12843247Sgjelinek if (pgend > p_end) 12853247Sgjelinek pgend = p_end; 12863247Sgjelinek 12873247Sgjelinek /* 12883247Sgjelinek * Compute number of pages from large page 12893247Sgjelinek * which are mapped. 12903247Sgjelinek */ 12913247Sgjelinek pgcnt = pgend - p_index + 1; 12923247Sgjelinek 12933247Sgjelinek /* 12943247Sgjelinek * Point indicies at page after large page, 12953247Sgjelinek * or at page after end of mapping. 12963247Sgjelinek */ 12973247Sgjelinek p_index += pgcnt; 12983247Sgjelinek s_index += pgcnt; 12993247Sgjelinek } else { 13003247Sgjelinek p_index++; 13013247Sgjelinek s_index++; 13023247Sgjelinek } 13033247Sgjelinek 13043247Sgjelinek /* 13053247Sgjelinek * Assume anon structs with a refcnt 1306*10093SPeter.Telford@Sun.COM * of 1 are not COW shared, so there 13073247Sgjelinek * is no reason to track them per entity. 13083247Sgjelinek */ 13093247Sgjelinek if (cnt == 1) { 13103247Sgjelinek panon += pgcnt; 13113247Sgjelinek continue; 13123247Sgjelinek } 13133247Sgjelinek for (entity = vmu_entities; entity != NULL; 13143247Sgjelinek entity = entity->vme_next_calc) { 13153247Sgjelinek 13163247Sgjelinek result = &entity->vme_result; 13173247Sgjelinek /* 1318*10093SPeter.Telford@Sun.COM * Track COW anons per entity so 13193247Sgjelinek * they are not double counted. 13203247Sgjelinek */ 13213247Sgjelinek if (vmu_find_insert_anon(entity->vme_anon_hash, 13223247Sgjelinek (caddr_t)ap) == 0) 13233247Sgjelinek continue; 13243247Sgjelinek 13253247Sgjelinek result->vmu_rss_all += (pgcnt << PAGESHIFT); 13263247Sgjelinek result->vmu_rss_private += 13273247Sgjelinek (pgcnt << PAGESHIFT); 13283247Sgjelinek } 13293247Sgjelinek } 13303247Sgjelinek ANON_LOCK_EXIT(&private_amp->a_rwlock); 13313247Sgjelinek } 13323247Sgjelinek 13333247Sgjelinek /* Add up resident anon and swap reserved for private mappings */ 13343247Sgjelinek if (swresv > 0 || panon > 0) { 13353247Sgjelinek for (entity = vmu_entities; entity != NULL; 13363247Sgjelinek entity = entity->vme_next_calc) { 13373247Sgjelinek result = &entity->vme_result; 13383247Sgjelinek result->vmu_swap_all += swresv; 13393247Sgjelinek result->vmu_swap_private += swresv; 13403247Sgjelinek result->vmu_rss_all += (panon << PAGESHIFT); 13413247Sgjelinek result->vmu_rss_private += (panon << PAGESHIFT); 13423247Sgjelinek } 13433247Sgjelinek } 13443247Sgjelinek 13453247Sgjelinek /* Compute resident pages backing shared amp or named vnode */ 13463247Sgjelinek if (shared_object != NULL) { 13473247Sgjelinek if (first == NULL) { 13483247Sgjelinek /* 13493247Sgjelinek * No private amp, or private amp has no anon 13503247Sgjelinek * structs. This means entire segment is backed by 13513247Sgjelinek * the shared object. 13523247Sgjelinek */ 13533247Sgjelinek first = vmu_alloc_bound(); 13543247Sgjelinek first->vmb_start = s_start; 13553247Sgjelinek first->vmb_end = s_end; 13563247Sgjelinek first->vmb_type = VMUSAGE_BOUND_UNKNOWN; 13573247Sgjelinek } 13583247Sgjelinek /* 13593247Sgjelinek * Iterate bounds not backed by private amp, and compute 13603247Sgjelinek * resident pages. 13613247Sgjelinek */ 13623247Sgjelinek cur = first; 13633247Sgjelinek while (cur != NULL) { 13643247Sgjelinek 13653247Sgjelinek if (vmu_insert_lookup_object_bounds(shared_object, 13663247Sgjelinek cur->vmb_start, cur->vmb_end, VMUSAGE_BOUND_UNKNOWN, 13673247Sgjelinek &first, &last) > 0) { 13683247Sgjelinek /* new bounds, find incore/not-incore */ 1369*10093SPeter.Telford@Sun.COM tree = &(shared_object->vmo_bounds); 13703247Sgjelinek if (shared_object->vmo_type == 1371*10093SPeter.Telford@Sun.COM VMUSAGE_TYPE_VNODE) { 13723247Sgjelinek vmu_vnode_update_incore_bounds( 1373*10093SPeter.Telford@Sun.COM tree, 13743247Sgjelinek (vnode_t *) 13753247Sgjelinek shared_object->vmo_key, &first, 13763247Sgjelinek &last); 1377*10093SPeter.Telford@Sun.COM } else { 13783247Sgjelinek vmu_amp_update_incore_bounds( 1379*10093SPeter.Telford@Sun.COM tree, 13803247Sgjelinek (struct anon_map *) 13813247Sgjelinek shared_object->vmo_key, &first, 13823247Sgjelinek &last, incore); 1383*10093SPeter.Telford@Sun.COM } 1384*10093SPeter.Telford@Sun.COM vmu_merge_bounds(tree, &first, &last); 13853247Sgjelinek } 13863247Sgjelinek for (entity = vmu_entities; entity != NULL; 13873247Sgjelinek entity = entity->vme_next_calc) { 1388*10093SPeter.Telford@Sun.COM avl_tree_t *e_tree; 13893247Sgjelinek 13903247Sgjelinek result = &entity->vme_result; 13913247Sgjelinek 13923247Sgjelinek entity_object = vmu_find_insert_object( 13933247Sgjelinek shared_object->vmo_type == 13943247Sgjelinek VMUSAGE_TYPE_VNODE ? entity->vme_vnode_hash: 13957884Sgerald.jelinek@sun.com entity->vme_amp_hash, 13967884Sgerald.jelinek@sun.com shared_object->vmo_key, 13977884Sgerald.jelinek@sun.com shared_object->vmo_type); 13983247Sgjelinek 13993247Sgjelinek virt = vmu_insert_lookup_object_bounds( 14003247Sgjelinek entity_object, cur->vmb_start, cur->vmb_end, 14013247Sgjelinek VMUSAGE_BOUND_UNKNOWN, &e_first, &e_last); 14023247Sgjelinek 14033247Sgjelinek if (virt == 0) 14043247Sgjelinek continue; 14053247Sgjelinek /* 14063247Sgjelinek * Range visited for this entity 14073247Sgjelinek */ 1408*10093SPeter.Telford@Sun.COM e_tree = &(entity_object->vmo_bounds); 1409*10093SPeter.Telford@Sun.COM rss = vmu_update_bounds(e_tree, &e_first, 1410*10093SPeter.Telford@Sun.COM &e_last, tree, first, last); 14113247Sgjelinek result->vmu_rss_all += (rss << PAGESHIFT); 14123247Sgjelinek if (shared == B_TRUE && file == B_FALSE) { 14133247Sgjelinek /* shared anon mapping */ 14143247Sgjelinek result->vmu_swap_all += 14153247Sgjelinek (virt << PAGESHIFT); 14163247Sgjelinek result->vmu_swap_shared += 14173247Sgjelinek (virt << PAGESHIFT); 14183247Sgjelinek result->vmu_rss_shared += 14193247Sgjelinek (rss << PAGESHIFT); 14203247Sgjelinek } else if (shared == B_TRUE && file == B_TRUE) { 14213247Sgjelinek /* shared file mapping */ 14223247Sgjelinek result->vmu_rss_shared += 14233247Sgjelinek (rss << PAGESHIFT); 14243247Sgjelinek } else if (shared == B_FALSE && 14253247Sgjelinek file == B_TRUE) { 14263247Sgjelinek /* private file mapping */ 14273247Sgjelinek result->vmu_rss_private += 14283247Sgjelinek (rss << PAGESHIFT); 14293247Sgjelinek } 1430*10093SPeter.Telford@Sun.COM vmu_merge_bounds(e_tree, &e_first, &e_last); 14313247Sgjelinek } 14323247Sgjelinek tmp = cur; 14333247Sgjelinek cur = cur->vmb_next; 14343247Sgjelinek vmu_free_bound(tmp); 14353247Sgjelinek } 14363247Sgjelinek } 14373247Sgjelinek } 14383247Sgjelinek 14393247Sgjelinek /* 14403247Sgjelinek * Based on the current calculation flags, find the relevant entities 14413247Sgjelinek * which are relative to the process. Then calculate each segment 14423247Sgjelinek * in the process'es address space for each relevant entity. 14433247Sgjelinek */ 14443247Sgjelinek static void 14453247Sgjelinek vmu_calculate_proc(proc_t *p) 14463247Sgjelinek { 14473247Sgjelinek vmu_entity_t *entities = NULL; 14483247Sgjelinek vmu_zone_t *zone; 14493247Sgjelinek vmu_entity_t *tmp; 14503247Sgjelinek struct as *as; 14513247Sgjelinek struct seg *seg; 14523247Sgjelinek int ret; 14533247Sgjelinek 14543247Sgjelinek /* Figure out which entities are being computed */ 14553247Sgjelinek if ((vmu_data.vmu_system) != NULL) { 14563247Sgjelinek tmp = vmu_data.vmu_system; 14573247Sgjelinek tmp->vme_next_calc = entities; 14583247Sgjelinek entities = tmp; 14593247Sgjelinek } 14603247Sgjelinek if (vmu_data.vmu_calc_flags & 14613247Sgjelinek (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_PROJECTS | 14623247Sgjelinek VMUSAGE_ALL_PROJECTS | VMUSAGE_TASKS | VMUSAGE_ALL_TASKS | 14633247Sgjelinek VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_EUSERS | 14643247Sgjelinek VMUSAGE_ALL_EUSERS)) { 14653247Sgjelinek ret = i_mod_hash_find_nosync(vmu_data.vmu_zones_hash, 14663247Sgjelinek (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id, 14673247Sgjelinek (mod_hash_val_t *)&zone); 14683247Sgjelinek if (ret != 0) { 14693247Sgjelinek zone = vmu_alloc_zone(p->p_zone->zone_id); 14703247Sgjelinek ret = i_mod_hash_insert_nosync(vmu_data.vmu_zones_hash, 14713247Sgjelinek (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id, 14723247Sgjelinek (mod_hash_val_t)zone, (mod_hash_hndl_t)0); 14733247Sgjelinek ASSERT(ret == 0); 14743247Sgjelinek } 14753247Sgjelinek if (zone->vmz_zone != NULL) { 14763247Sgjelinek tmp = zone->vmz_zone; 14773247Sgjelinek tmp->vme_next_calc = entities; 14783247Sgjelinek entities = tmp; 14793247Sgjelinek } 14803247Sgjelinek if (vmu_data.vmu_calc_flags & 14813247Sgjelinek (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS)) { 14823247Sgjelinek tmp = vmu_find_insert_entity(zone->vmz_projects_hash, 14833247Sgjelinek p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS, 14843247Sgjelinek zone->vmz_id); 14853247Sgjelinek tmp->vme_next_calc = entities; 14863247Sgjelinek entities = tmp; 14873247Sgjelinek } 14883247Sgjelinek if (vmu_data.vmu_calc_flags & 14893247Sgjelinek (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS)) { 14903247Sgjelinek tmp = vmu_find_insert_entity(zone->vmz_tasks_hash, 14913247Sgjelinek p->p_task->tk_tkid, VMUSAGE_TASKS, zone->vmz_id); 14923247Sgjelinek tmp->vme_next_calc = entities; 14933247Sgjelinek entities = tmp; 14943247Sgjelinek } 14953247Sgjelinek if (vmu_data.vmu_calc_flags & 14963247Sgjelinek (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS)) { 14973247Sgjelinek tmp = vmu_find_insert_entity(zone->vmz_rusers_hash, 14983247Sgjelinek crgetruid(p->p_cred), VMUSAGE_RUSERS, zone->vmz_id); 14993247Sgjelinek tmp->vme_next_calc = entities; 15003247Sgjelinek entities = tmp; 15013247Sgjelinek } 15023247Sgjelinek if (vmu_data.vmu_calc_flags & 15033247Sgjelinek (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS)) { 15043247Sgjelinek tmp = vmu_find_insert_entity(zone->vmz_eusers_hash, 15053247Sgjelinek crgetuid(p->p_cred), VMUSAGE_EUSERS, zone->vmz_id); 15063247Sgjelinek tmp->vme_next_calc = entities; 15073247Sgjelinek entities = tmp; 15083247Sgjelinek } 15093247Sgjelinek } 15103247Sgjelinek /* Entities which collapse projects and users for all zones */ 15113247Sgjelinek if (vmu_data.vmu_calc_flags & VMUSAGE_COL_PROJECTS) { 15123247Sgjelinek tmp = vmu_find_insert_entity(vmu_data.vmu_projects_col_hash, 15133247Sgjelinek p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS, ALL_ZONES); 15143247Sgjelinek tmp->vme_next_calc = entities; 15153247Sgjelinek entities = tmp; 15163247Sgjelinek } 15173247Sgjelinek if (vmu_data.vmu_calc_flags & VMUSAGE_COL_RUSERS) { 15183247Sgjelinek tmp = vmu_find_insert_entity(vmu_data.vmu_rusers_col_hash, 15193247Sgjelinek crgetruid(p->p_cred), VMUSAGE_RUSERS, ALL_ZONES); 15203247Sgjelinek tmp->vme_next_calc = entities; 15213247Sgjelinek entities = tmp; 15223247Sgjelinek } 15233247Sgjelinek if (vmu_data.vmu_calc_flags & VMUSAGE_COL_EUSERS) { 15243247Sgjelinek tmp = vmu_find_insert_entity(vmu_data.vmu_eusers_col_hash, 15253247Sgjelinek crgetuid(p->p_cred), VMUSAGE_EUSERS, ALL_ZONES); 15263247Sgjelinek tmp->vme_next_calc = entities; 15273247Sgjelinek entities = tmp; 15283247Sgjelinek } 15293247Sgjelinek 15303247Sgjelinek ASSERT(entities != NULL); 15313247Sgjelinek /* process all segs in process's address space */ 15323247Sgjelinek as = p->p_as; 15333247Sgjelinek AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 15343247Sgjelinek for (seg = AS_SEGFIRST(as); seg != NULL; 15353247Sgjelinek seg = AS_SEGNEXT(as, seg)) { 15363247Sgjelinek vmu_calculate_seg(entities, seg); 15373247Sgjelinek } 15383247Sgjelinek AS_LOCK_EXIT(as, &as->a_lock); 15393247Sgjelinek } 15403247Sgjelinek 15413247Sgjelinek /* 15423247Sgjelinek * Free data created by previous call to vmu_calculate(). 15433247Sgjelinek */ 15443247Sgjelinek static void 15453247Sgjelinek vmu_clear_calc() 15463247Sgjelinek { 15473247Sgjelinek if (vmu_data.vmu_system != NULL) 15483247Sgjelinek vmu_free_entity(vmu_data.vmu_system); 15493247Sgjelinek vmu_data.vmu_system = NULL; 15503247Sgjelinek if (vmu_data.vmu_zones_hash != NULL) 15513247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_zones_hash); 15523247Sgjelinek if (vmu_data.vmu_projects_col_hash != NULL) 15533247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_projects_col_hash); 15543247Sgjelinek if (vmu_data.vmu_rusers_col_hash != NULL) 15553247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_rusers_col_hash); 15563247Sgjelinek if (vmu_data.vmu_eusers_col_hash != NULL) 15573247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_eusers_col_hash); 15583247Sgjelinek 15593247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_all_vnodes_hash); 15603247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_all_amps_hash); 15613247Sgjelinek } 15623247Sgjelinek 15633247Sgjelinek /* 15643247Sgjelinek * Free unused data structures. These can result if the system workload 15653247Sgjelinek * decreases between calculations. 15663247Sgjelinek */ 15673247Sgjelinek static void 15683247Sgjelinek vmu_free_extra() 15693247Sgjelinek { 15703247Sgjelinek vmu_bound_t *tb; 15713247Sgjelinek vmu_object_t *to; 15723247Sgjelinek vmu_entity_t *te; 15733247Sgjelinek vmu_zone_t *tz; 15743247Sgjelinek 15753247Sgjelinek while (vmu_data.vmu_free_bounds != NULL) { 15763247Sgjelinek tb = vmu_data.vmu_free_bounds; 15773247Sgjelinek vmu_data.vmu_free_bounds = vmu_data.vmu_free_bounds->vmb_next; 15783247Sgjelinek kmem_cache_free(vmu_bound_cache, tb); 15793247Sgjelinek } 15803247Sgjelinek while (vmu_data.vmu_free_objects != NULL) { 15813247Sgjelinek to = vmu_data.vmu_free_objects; 15823247Sgjelinek vmu_data.vmu_free_objects = 15833247Sgjelinek vmu_data.vmu_free_objects->vmo_next; 15843247Sgjelinek kmem_cache_free(vmu_object_cache, to); 15853247Sgjelinek } 15863247Sgjelinek while (vmu_data.vmu_free_entities != NULL) { 15873247Sgjelinek te = vmu_data.vmu_free_entities; 15883247Sgjelinek vmu_data.vmu_free_entities = 15893247Sgjelinek vmu_data.vmu_free_entities->vme_next; 15903247Sgjelinek if (te->vme_vnode_hash != NULL) 15913247Sgjelinek mod_hash_destroy_hash(te->vme_vnode_hash); 15923247Sgjelinek if (te->vme_amp_hash != NULL) 15933247Sgjelinek mod_hash_destroy_hash(te->vme_amp_hash); 15943247Sgjelinek if (te->vme_anon_hash != NULL) 15953247Sgjelinek mod_hash_destroy_hash(te->vme_anon_hash); 15963247Sgjelinek kmem_free(te, sizeof (vmu_entity_t)); 15973247Sgjelinek } 15983247Sgjelinek while (vmu_data.vmu_free_zones != NULL) { 15993247Sgjelinek tz = vmu_data.vmu_free_zones; 16003247Sgjelinek vmu_data.vmu_free_zones = 16013247Sgjelinek vmu_data.vmu_free_zones->vmz_next; 16023247Sgjelinek if (tz->vmz_projects_hash != NULL) 16033247Sgjelinek mod_hash_destroy_hash(tz->vmz_projects_hash); 16043247Sgjelinek if (tz->vmz_tasks_hash != NULL) 16053247Sgjelinek mod_hash_destroy_hash(tz->vmz_tasks_hash); 16063247Sgjelinek if (tz->vmz_rusers_hash != NULL) 16073247Sgjelinek mod_hash_destroy_hash(tz->vmz_rusers_hash); 16083247Sgjelinek if (tz->vmz_eusers_hash != NULL) 16093247Sgjelinek mod_hash_destroy_hash(tz->vmz_eusers_hash); 16103247Sgjelinek kmem_free(tz, sizeof (vmu_zone_t)); 16113247Sgjelinek } 16123247Sgjelinek } 16133247Sgjelinek 16143247Sgjelinek extern kcondvar_t *pr_pid_cv; 16153247Sgjelinek 16163247Sgjelinek /* 16173247Sgjelinek * Determine which entity types are relevant and allocate the hashes to 16183247Sgjelinek * track them. Then walk the process table and count rss and swap 16193247Sgjelinek * for each process'es address space. Address space object such as 16203247Sgjelinek * vnodes, amps and anons are tracked per entity, so that they are 16213247Sgjelinek * not double counted in the results. 16223247Sgjelinek * 16233247Sgjelinek */ 16243247Sgjelinek static void 16253247Sgjelinek vmu_calculate() 16263247Sgjelinek { 16273247Sgjelinek int i = 0; 16283247Sgjelinek int ret; 16293247Sgjelinek proc_t *p; 16303247Sgjelinek 16313247Sgjelinek vmu_clear_calc(); 16323247Sgjelinek 16333247Sgjelinek if (vmu_data.vmu_calc_flags & VMUSAGE_SYSTEM) 16343247Sgjelinek vmu_data.vmu_system = vmu_alloc_entity(0, VMUSAGE_SYSTEM, 16353247Sgjelinek ALL_ZONES); 16363247Sgjelinek 16373247Sgjelinek /* 16383247Sgjelinek * Walk process table and calculate rss of each proc. 16393247Sgjelinek * 16403247Sgjelinek * Pidlock and p_lock cannot be held while doing the rss calculation. 16413247Sgjelinek * This is because: 16423247Sgjelinek * 1. The calculation allocates using KM_SLEEP. 16433247Sgjelinek * 2. The calculation grabs a_lock, which cannot be grabbed 16443247Sgjelinek * after p_lock. 16453247Sgjelinek * 16463247Sgjelinek * Since pidlock must be dropped, we cannot simply just walk the 16473247Sgjelinek * practive list. Instead, we walk the process table, and sprlock 16483247Sgjelinek * each process to ensure that it does not exit during the 16493247Sgjelinek * calculation. 16503247Sgjelinek */ 16513247Sgjelinek 16523247Sgjelinek mutex_enter(&pidlock); 16533247Sgjelinek for (i = 0; i < v.v_proc; i++) { 16543247Sgjelinek again: 16553247Sgjelinek p = pid_entry(i); 16563247Sgjelinek if (p == NULL) 16573247Sgjelinek continue; 16583247Sgjelinek 16593247Sgjelinek mutex_enter(&p->p_lock); 16603247Sgjelinek mutex_exit(&pidlock); 16613247Sgjelinek 16623247Sgjelinek if (panicstr) { 16633247Sgjelinek mutex_exit(&p->p_lock); 16643247Sgjelinek return; 16653247Sgjelinek } 16663247Sgjelinek 16673247Sgjelinek /* Try to set P_PR_LOCK */ 16683247Sgjelinek ret = sprtrylock_proc(p); 16693247Sgjelinek if (ret == -1) { 16703247Sgjelinek /* Process in invalid state */ 16713247Sgjelinek mutex_exit(&p->p_lock); 16723247Sgjelinek mutex_enter(&pidlock); 16733247Sgjelinek continue; 16743247Sgjelinek } else if (ret == 1) { 16753247Sgjelinek /* 16763247Sgjelinek * P_PR_LOCK is already set. Wait and try again. 16773247Sgjelinek * This also drops p_lock. 16783247Sgjelinek */ 16793247Sgjelinek sprwaitlock_proc(p); 16803247Sgjelinek mutex_enter(&pidlock); 16813247Sgjelinek goto again; 16823247Sgjelinek } 16833247Sgjelinek mutex_exit(&p->p_lock); 16843247Sgjelinek 16853247Sgjelinek vmu_calculate_proc(p); 16863247Sgjelinek 16873247Sgjelinek mutex_enter(&p->p_lock); 16883247Sgjelinek sprunlock(p); 16893247Sgjelinek mutex_enter(&pidlock); 16903247Sgjelinek } 16913247Sgjelinek mutex_exit(&pidlock); 16923247Sgjelinek 16933247Sgjelinek vmu_free_extra(); 16943247Sgjelinek } 16953247Sgjelinek 16963247Sgjelinek /* 16973247Sgjelinek * allocate a new cache for N results satisfying flags 16983247Sgjelinek */ 16993247Sgjelinek vmu_cache_t * 17003247Sgjelinek vmu_cache_alloc(size_t nres, uint_t flags) 17013247Sgjelinek { 17023247Sgjelinek vmu_cache_t *cache; 17033247Sgjelinek 17043247Sgjelinek cache = kmem_zalloc(sizeof (vmu_cache_t), KM_SLEEP); 17053247Sgjelinek cache->vmc_results = kmem_zalloc(sizeof (vmusage_t) * nres, KM_SLEEP); 17063247Sgjelinek cache->vmc_nresults = nres; 17073247Sgjelinek cache->vmc_flags = flags; 17083247Sgjelinek cache->vmc_refcnt = 1; 17093247Sgjelinek return (cache); 17103247Sgjelinek } 17113247Sgjelinek 17123247Sgjelinek /* 17133247Sgjelinek * Make sure cached results are not freed 17143247Sgjelinek */ 17153247Sgjelinek static void 17163247Sgjelinek vmu_cache_hold(vmu_cache_t *cache) 17173247Sgjelinek { 17183247Sgjelinek ASSERT(MUTEX_HELD(&vmu_data.vmu_lock)); 17193247Sgjelinek cache->vmc_refcnt++; 17203247Sgjelinek } 17213247Sgjelinek 17223247Sgjelinek /* 17233247Sgjelinek * free cache data 17243247Sgjelinek */ 17253247Sgjelinek static void 17263247Sgjelinek vmu_cache_rele(vmu_cache_t *cache) 17273247Sgjelinek { 17283247Sgjelinek ASSERT(MUTEX_HELD(&vmu_data.vmu_lock)); 17293247Sgjelinek ASSERT(cache->vmc_refcnt > 0); 17303247Sgjelinek cache->vmc_refcnt--; 17313247Sgjelinek if (cache->vmc_refcnt == 0) { 17323247Sgjelinek kmem_free(cache->vmc_results, sizeof (vmusage_t) * 17337884Sgerald.jelinek@sun.com cache->vmc_nresults); 17343247Sgjelinek kmem_free(cache, sizeof (vmu_cache_t)); 17353247Sgjelinek } 17363247Sgjelinek } 17373247Sgjelinek 17383247Sgjelinek /* 17393247Sgjelinek * Copy out the cached results to a caller. Inspect the callers flags 17403247Sgjelinek * and zone to determine which cached results should be copied. 17413247Sgjelinek */ 17423247Sgjelinek static int 17433247Sgjelinek vmu_copyout_results(vmu_cache_t *cache, vmusage_t *buf, size_t *nres, 17447884Sgerald.jelinek@sun.com uint_t flags, int cpflg) 17453247Sgjelinek { 17463247Sgjelinek vmusage_t *result, *out_result; 17473247Sgjelinek vmusage_t dummy; 17483247Sgjelinek size_t i, count = 0; 17493247Sgjelinek size_t bufsize; 17503247Sgjelinek int ret = 0; 17513247Sgjelinek uint_t types = 0; 17523247Sgjelinek 17533247Sgjelinek if (nres != NULL) { 17547884Sgerald.jelinek@sun.com if (ddi_copyin((caddr_t)nres, &bufsize, sizeof (size_t), cpflg)) 17553247Sgjelinek return (set_errno(EFAULT)); 17563247Sgjelinek } else { 17573247Sgjelinek bufsize = 0; 17583247Sgjelinek } 17593247Sgjelinek 17603247Sgjelinek /* figure out what results the caller is interested in. */ 17613247Sgjelinek if ((flags & VMUSAGE_SYSTEM) && curproc->p_zone == global_zone) 17623247Sgjelinek types |= VMUSAGE_SYSTEM; 17633247Sgjelinek if (flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES)) 17643247Sgjelinek types |= VMUSAGE_ZONE; 17653247Sgjelinek if (flags & (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS | 17663247Sgjelinek VMUSAGE_COL_PROJECTS)) 17673247Sgjelinek types |= VMUSAGE_PROJECTS; 17683247Sgjelinek if (flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS)) 17693247Sgjelinek types |= VMUSAGE_TASKS; 17703247Sgjelinek if (flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS)) 17713247Sgjelinek types |= VMUSAGE_RUSERS; 17723247Sgjelinek if (flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS)) 17733247Sgjelinek types |= VMUSAGE_EUSERS; 17743247Sgjelinek 17753247Sgjelinek /* count results for current zone */ 17763247Sgjelinek out_result = buf; 17773247Sgjelinek for (result = cache->vmc_results, i = 0; 17783247Sgjelinek i < cache->vmc_nresults; result++, i++) { 17793247Sgjelinek 17803247Sgjelinek /* Do not return "other-zone" results to non-global zones */ 17813247Sgjelinek if (curproc->p_zone != global_zone && 17823247Sgjelinek curproc->p_zone->zone_id != result->vmu_zoneid) 17833247Sgjelinek continue; 17843247Sgjelinek 17853247Sgjelinek /* 17863247Sgjelinek * If non-global zone requests VMUSAGE_SYSTEM, fake 17873247Sgjelinek * up VMUSAGE_ZONE result as VMUSAGE_SYSTEM result. 17883247Sgjelinek */ 17893247Sgjelinek if (curproc->p_zone != global_zone && 17903247Sgjelinek (flags & VMUSAGE_SYSTEM) != 0 && 17913247Sgjelinek result->vmu_type == VMUSAGE_ZONE) { 17923247Sgjelinek count++; 17933247Sgjelinek if (out_result != NULL) { 17943247Sgjelinek if (bufsize < count) { 17953247Sgjelinek ret = set_errno(EOVERFLOW); 17963247Sgjelinek } else { 17973247Sgjelinek dummy = *result; 17983247Sgjelinek dummy.vmu_zoneid = ALL_ZONES; 17993247Sgjelinek dummy.vmu_id = 0; 18003247Sgjelinek dummy.vmu_type = VMUSAGE_SYSTEM; 18017884Sgerald.jelinek@sun.com if (ddi_copyout(&dummy, out_result, 18027884Sgerald.jelinek@sun.com sizeof (vmusage_t), cpflg)) 18037884Sgerald.jelinek@sun.com return (set_errno(EFAULT)); 18043247Sgjelinek out_result++; 18053247Sgjelinek } 18063247Sgjelinek } 18073247Sgjelinek } 18083247Sgjelinek 18093247Sgjelinek /* Skip results that do not match requested type */ 18103247Sgjelinek if ((result->vmu_type & types) == 0) 18113247Sgjelinek continue; 18123247Sgjelinek 18133247Sgjelinek /* Skip collated results if not requested */ 18143247Sgjelinek if (result->vmu_zoneid == ALL_ZONES) { 18153247Sgjelinek if (result->vmu_type == VMUSAGE_PROJECTS && 18163247Sgjelinek (flags & VMUSAGE_COL_PROJECTS) == 0) 18173247Sgjelinek continue; 18183247Sgjelinek if (result->vmu_type == VMUSAGE_EUSERS && 18193247Sgjelinek (flags & VMUSAGE_COL_EUSERS) == 0) 18203247Sgjelinek continue; 18213247Sgjelinek if (result->vmu_type == VMUSAGE_RUSERS && 18223247Sgjelinek (flags & VMUSAGE_COL_RUSERS) == 0) 18233247Sgjelinek continue; 18243247Sgjelinek } 18253247Sgjelinek 18263247Sgjelinek /* Skip "other zone" results if not requested */ 18273247Sgjelinek if (result->vmu_zoneid != curproc->p_zone->zone_id) { 18283247Sgjelinek if (result->vmu_type == VMUSAGE_ZONE && 18293247Sgjelinek (flags & VMUSAGE_ALL_ZONES) == 0) 18303247Sgjelinek continue; 18313247Sgjelinek if (result->vmu_type == VMUSAGE_PROJECTS && 18323247Sgjelinek (flags & (VMUSAGE_ALL_PROJECTS | 18333247Sgjelinek VMUSAGE_COL_PROJECTS)) == 0) 18343247Sgjelinek continue; 18353247Sgjelinek if (result->vmu_type == VMUSAGE_TASKS && 18363247Sgjelinek (flags & VMUSAGE_ALL_TASKS) == 0) 18373247Sgjelinek continue; 18383247Sgjelinek if (result->vmu_type == VMUSAGE_RUSERS && 18393247Sgjelinek (flags & (VMUSAGE_ALL_RUSERS | 18403247Sgjelinek VMUSAGE_COL_RUSERS)) == 0) 18413247Sgjelinek continue; 18423247Sgjelinek if (result->vmu_type == VMUSAGE_EUSERS && 18433247Sgjelinek (flags & (VMUSAGE_ALL_EUSERS | 18443247Sgjelinek VMUSAGE_COL_EUSERS)) == 0) 18453247Sgjelinek continue; 18463247Sgjelinek } 18473247Sgjelinek count++; 18483247Sgjelinek if (out_result != NULL) { 18493247Sgjelinek if (bufsize < count) { 18503247Sgjelinek ret = set_errno(EOVERFLOW); 18513247Sgjelinek } else { 18527884Sgerald.jelinek@sun.com if (ddi_copyout(result, out_result, 18537884Sgerald.jelinek@sun.com sizeof (vmusage_t), cpflg)) 18543247Sgjelinek return (set_errno(EFAULT)); 18553247Sgjelinek out_result++; 18563247Sgjelinek } 18573247Sgjelinek } 18583247Sgjelinek } 18593247Sgjelinek if (nres != NULL) 18607884Sgerald.jelinek@sun.com if (ddi_copyout(&count, (void *)nres, sizeof (size_t), cpflg)) 18613247Sgjelinek return (set_errno(EFAULT)); 18623247Sgjelinek 18633247Sgjelinek return (ret); 18643247Sgjelinek } 18653247Sgjelinek 18663247Sgjelinek /* 18673247Sgjelinek * vm_getusage() 18683247Sgjelinek * 18693247Sgjelinek * Counts rss and swap by zone, project, task, and/or user. The flags argument 18703247Sgjelinek * determines the type of results structures returned. Flags requesting 18713247Sgjelinek * results from more than one zone are "flattened" to the local zone if the 18723247Sgjelinek * caller is not the global zone. 18733247Sgjelinek * 18743247Sgjelinek * args: 18753247Sgjelinek * flags: bitmap consisting of one or more of VMUSAGE_*. 18763247Sgjelinek * age: maximum allowable age (time since counting was done) in 18773247Sgjelinek * seconds of the results. Results from previous callers are 18783247Sgjelinek * cached in kernel. 18793247Sgjelinek * buf: pointer to buffer array of vmusage_t. If NULL, then only nres 18803247Sgjelinek * set on success. 18813247Sgjelinek * nres: Set to number of vmusage_t structures pointed to by buf 18823247Sgjelinek * before calling vm_getusage(). 18833247Sgjelinek * On return 0 (success) or ENOSPC, is set to the number of result 18843247Sgjelinek * structures returned or attempted to return. 18853247Sgjelinek * 18863247Sgjelinek * returns 0 on success, -1 on failure: 18873247Sgjelinek * EINTR (interrupted) 18883247Sgjelinek * ENOSPC (nres to small for results, nres set to needed value for success) 18893247Sgjelinek * EINVAL (flags invalid) 18903247Sgjelinek * EFAULT (bad address for buf or nres) 18913247Sgjelinek */ 18923247Sgjelinek int 18937884Sgerald.jelinek@sun.com vm_getusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres, int cpflg) 18943247Sgjelinek { 18953247Sgjelinek vmu_entity_t *entity; 18963247Sgjelinek vmusage_t *result; 18973247Sgjelinek int ret = 0; 18983247Sgjelinek int cacherecent = 0; 18993247Sgjelinek hrtime_t now; 19003247Sgjelinek uint_t flags_orig; 19013247Sgjelinek 19023247Sgjelinek /* 19033247Sgjelinek * Non-global zones cannot request system wide and/or collated 19043247Sgjelinek * results, or the system result, so munge the flags accordingly. 19053247Sgjelinek */ 19063247Sgjelinek flags_orig = flags; 19073247Sgjelinek if (curproc->p_zone != global_zone) { 19083247Sgjelinek if (flags & (VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS)) { 19093247Sgjelinek flags &= ~(VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS); 19103247Sgjelinek flags |= VMUSAGE_PROJECTS; 19113247Sgjelinek } 19123247Sgjelinek if (flags & (VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS)) { 19133247Sgjelinek flags &= ~(VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS); 19143247Sgjelinek flags |= VMUSAGE_RUSERS; 19153247Sgjelinek } 19163247Sgjelinek if (flags & (VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS)) { 19173247Sgjelinek flags &= ~(VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS); 19183247Sgjelinek flags |= VMUSAGE_EUSERS; 19193247Sgjelinek } 19203247Sgjelinek if (flags & VMUSAGE_SYSTEM) { 19213247Sgjelinek flags &= ~VMUSAGE_SYSTEM; 19223247Sgjelinek flags |= VMUSAGE_ZONE; 19233247Sgjelinek } 19243247Sgjelinek } 19253247Sgjelinek 19263247Sgjelinek /* Check for unknown flags */ 19273247Sgjelinek if ((flags & (~VMUSAGE_MASK)) != 0) 19283247Sgjelinek return (set_errno(EINVAL)); 19293247Sgjelinek 19303247Sgjelinek /* Check for no flags */ 19313247Sgjelinek if ((flags & VMUSAGE_MASK) == 0) 19323247Sgjelinek return (set_errno(EINVAL)); 19333247Sgjelinek 19343247Sgjelinek mutex_enter(&vmu_data.vmu_lock); 19353247Sgjelinek now = gethrtime(); 19363247Sgjelinek 19373247Sgjelinek start: 19383247Sgjelinek if (vmu_data.vmu_cache != NULL) { 19393247Sgjelinek 19403247Sgjelinek vmu_cache_t *cache; 19413247Sgjelinek 19423247Sgjelinek if ((vmu_data.vmu_cache->vmc_timestamp + 19433247Sgjelinek ((hrtime_t)age * NANOSEC)) > now) 19443247Sgjelinek cacherecent = 1; 19453247Sgjelinek 19463247Sgjelinek if ((vmu_data.vmu_cache->vmc_flags & flags) == flags && 19473247Sgjelinek cacherecent == 1) { 19483247Sgjelinek cache = vmu_data.vmu_cache; 19493247Sgjelinek vmu_cache_hold(cache); 19503247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 19513247Sgjelinek 19527884Sgerald.jelinek@sun.com ret = vmu_copyout_results(cache, buf, nres, flags_orig, 19537884Sgerald.jelinek@sun.com cpflg); 19543247Sgjelinek mutex_enter(&vmu_data.vmu_lock); 19553247Sgjelinek vmu_cache_rele(cache); 19563247Sgjelinek if (vmu_data.vmu_pending_waiters > 0) 19573247Sgjelinek cv_broadcast(&vmu_data.vmu_cv); 19583247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 19593247Sgjelinek return (ret); 19603247Sgjelinek } 19613247Sgjelinek /* 19623247Sgjelinek * If the cache is recent, it is likely that there are other 19633247Sgjelinek * consumers of vm_getusage running, so add their flags to the 19643247Sgjelinek * desired flags for the calculation. 19653247Sgjelinek */ 19663247Sgjelinek if (cacherecent == 1) 19673247Sgjelinek flags = vmu_data.vmu_cache->vmc_flags | flags; 19683247Sgjelinek } 19693247Sgjelinek if (vmu_data.vmu_calc_thread == NULL) { 19703247Sgjelinek 19713247Sgjelinek vmu_cache_t *cache; 19723247Sgjelinek 19733247Sgjelinek vmu_data.vmu_calc_thread = curthread; 19743247Sgjelinek vmu_data.vmu_calc_flags = flags; 19753247Sgjelinek vmu_data.vmu_entities = NULL; 19763247Sgjelinek vmu_data.vmu_nentities = 0; 19773247Sgjelinek if (vmu_data.vmu_pending_waiters > 0) 19783247Sgjelinek vmu_data.vmu_calc_flags |= 19793247Sgjelinek vmu_data.vmu_pending_flags; 19803247Sgjelinek 19813247Sgjelinek vmu_data.vmu_pending_flags = 0; 19823247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 19833247Sgjelinek vmu_calculate(); 19843247Sgjelinek mutex_enter(&vmu_data.vmu_lock); 19853247Sgjelinek /* copy results to cache */ 19863247Sgjelinek if (vmu_data.vmu_cache != NULL) 19873247Sgjelinek vmu_cache_rele(vmu_data.vmu_cache); 19883247Sgjelinek cache = vmu_data.vmu_cache = 19893247Sgjelinek vmu_cache_alloc(vmu_data.vmu_nentities, 19907884Sgerald.jelinek@sun.com vmu_data.vmu_calc_flags); 19913247Sgjelinek 19923247Sgjelinek result = cache->vmc_results; 19933247Sgjelinek for (entity = vmu_data.vmu_entities; entity != NULL; 19943247Sgjelinek entity = entity->vme_next) { 19953247Sgjelinek *result = entity->vme_result; 19963247Sgjelinek result++; 19973247Sgjelinek } 19983247Sgjelinek cache->vmc_timestamp = gethrtime(); 19993247Sgjelinek vmu_cache_hold(cache); 20003247Sgjelinek 20013247Sgjelinek vmu_data.vmu_calc_flags = 0; 20023247Sgjelinek vmu_data.vmu_calc_thread = NULL; 20033247Sgjelinek 20043247Sgjelinek if (vmu_data.vmu_pending_waiters > 0) 20053247Sgjelinek cv_broadcast(&vmu_data.vmu_cv); 20063247Sgjelinek 20073247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 20083247Sgjelinek 20093247Sgjelinek /* copy cache */ 20107884Sgerald.jelinek@sun.com ret = vmu_copyout_results(cache, buf, nres, flags_orig, cpflg); 20113247Sgjelinek mutex_enter(&vmu_data.vmu_lock); 20123247Sgjelinek vmu_cache_rele(cache); 20133247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 20143247Sgjelinek 20153247Sgjelinek return (ret); 20163247Sgjelinek } 20173247Sgjelinek vmu_data.vmu_pending_flags |= flags; 20183247Sgjelinek vmu_data.vmu_pending_waiters++; 20193247Sgjelinek while (vmu_data.vmu_calc_thread != NULL) { 20203247Sgjelinek if (cv_wait_sig(&vmu_data.vmu_cv, 20213247Sgjelinek &vmu_data.vmu_lock) == 0) { 20223247Sgjelinek vmu_data.vmu_pending_waiters--; 20233247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 20243247Sgjelinek return (set_errno(EINTR)); 20253247Sgjelinek } 20263247Sgjelinek } 20273247Sgjelinek vmu_data.vmu_pending_waiters--; 20283247Sgjelinek goto start; 20293247Sgjelinek } 2030