13247Sgjelinek /* 23247Sgjelinek * CDDL HEADER START 33247Sgjelinek * 43247Sgjelinek * The contents of this file are subject to the terms of the 53247Sgjelinek * Common Development and Distribution License (the "License"). 63247Sgjelinek * You may not use this file except in compliance with the License. 73247Sgjelinek * 83247Sgjelinek * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 93247Sgjelinek * or http://www.opensolaris.org/os/licensing. 103247Sgjelinek * See the License for the specific language governing permissions 113247Sgjelinek * and limitations under the License. 123247Sgjelinek * 133247Sgjelinek * When distributing Covered Code, include this CDDL HEADER in each 143247Sgjelinek * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 153247Sgjelinek * If applicable, add the following below this CDDL HEADER, with the 163247Sgjelinek * fields enclosed by brackets "[]" replaced with your own identifying 173247Sgjelinek * information: Portions Copyright [yyyy] [name of copyright owner] 183247Sgjelinek * 193247Sgjelinek * CDDL HEADER END 203247Sgjelinek */ 213247Sgjelinek 223247Sgjelinek /* 2310093SPeter.Telford@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 243247Sgjelinek * Use is subject to license terms. 253247Sgjelinek */ 263247Sgjelinek 273247Sgjelinek /* 283247Sgjelinek * vm_usage 293247Sgjelinek * 303247Sgjelinek * This file implements the getvmusage() private system call. 313247Sgjelinek * getvmusage() counts the amount of resident memory pages and swap 323247Sgjelinek * reserved by the specified process collective. A "process collective" is 333247Sgjelinek * the set of processes owned by a particular, zone, project, task, or user. 343247Sgjelinek * 353247Sgjelinek * rss and swap are counted so that for a given process collective, a page is 363247Sgjelinek * only counted once. For example, this means that if multiple processes in 373247Sgjelinek * the same project map the same page, then the project will only be charged 383247Sgjelinek * once for that page. On the other hand, if two processes in different 393247Sgjelinek * projects map the same page, then both projects will be charged 403247Sgjelinek * for the page. 413247Sgjelinek * 423247Sgjelinek * The vm_getusage() calculation is implemented so that the first thread 433247Sgjelinek * performs the rss/swap counting. Other callers will wait for that thread to 443247Sgjelinek * finish, copying the results. This enables multiple rcapds and prstats to 453247Sgjelinek * consume data from the same calculation. The results are also cached so that 463247Sgjelinek * a caller interested in recent results can just copy them instead of starting 473247Sgjelinek * a new calculation. The caller passes the maximium age (in seconds) of the 483247Sgjelinek * data. If the cached data is young enough, the cache is copied, otherwise, 493247Sgjelinek * a new calculation is executed and the cache is replaced with the new 503247Sgjelinek * data. 513247Sgjelinek * 523247Sgjelinek * The rss calculation for each process collective is as follows: 533247Sgjelinek * 543247Sgjelinek * - Inspect flags, determine if counting rss for zones, projects, tasks, 553247Sgjelinek * and/or users. 563247Sgjelinek * - For each proc: 573247Sgjelinek * - Figure out proc's collectives (zone, project, task, and/or user). 583247Sgjelinek * - For each seg in proc's address space: 593247Sgjelinek * - If seg is private: 603247Sgjelinek * - Lookup anons in the amp. 613247Sgjelinek * - For incore pages not previously visited each of the 623247Sgjelinek * proc's collectives, add incore pagesize to each. 633247Sgjelinek * collective. 643247Sgjelinek * Anon's with a refcnt of 1 can be assummed to be not 653247Sgjelinek * previously visited. 663247Sgjelinek * - For address ranges without anons in the amp: 673247Sgjelinek * - Lookup pages in underlying vnode. 683247Sgjelinek * - For incore pages not previously visiting for 693247Sgjelinek * each of the proc's collectives, add incore 703247Sgjelinek * pagesize to each collective. 713247Sgjelinek * - If seg is shared: 723247Sgjelinek * - Lookup pages in the shared amp or vnode. 733247Sgjelinek * - For incore pages not previously visited for each of 743247Sgjelinek * the proc's collectives, add incore pagesize to each 753247Sgjelinek * collective. 763247Sgjelinek * 773247Sgjelinek * Swap is reserved by private segments, and shared anonymous segments. 783247Sgjelinek * The only shared anon segments which do not reserve swap are ISM segments 793247Sgjelinek * and schedctl segments, both of which can be identified by having 803247Sgjelinek * amp->swresv == 0. 813247Sgjelinek * 823247Sgjelinek * The swap calculation for each collective is as follows: 833247Sgjelinek * 843247Sgjelinek * - Inspect flags, determine if counting rss for zones, projects, tasks, 853247Sgjelinek * and/or users. 863247Sgjelinek * - For each proc: 873247Sgjelinek * - Figure out proc's collectives (zone, project, task, and/or user). 883247Sgjelinek * - For each seg in proc's address space: 893247Sgjelinek * - If seg is private: 903247Sgjelinek * - Add svd->swresv pages to swap count for each of the 913247Sgjelinek * proc's collectives. 923247Sgjelinek * - If seg is anon, shared, and amp->swresv != 0 933247Sgjelinek * - For address ranges in amp not previously visited for 943247Sgjelinek * each of the proc's collectives, add size of address 953247Sgjelinek * range to the swap count for each collective. 963247Sgjelinek * 973247Sgjelinek * These two calculations are done simultaneously, with most of the work 983247Sgjelinek * being done in vmu_calculate_seg(). The results of the calculation are 993247Sgjelinek * copied into "vmu_data.vmu_cache_results". 1003247Sgjelinek * 1013247Sgjelinek * To perform the calculation, various things are tracked and cached: 1023247Sgjelinek * 1033247Sgjelinek * - incore/not-incore page ranges for all vnodes. 1043247Sgjelinek * (vmu_data.vmu_all_vnodes_hash) 1053247Sgjelinek * This eliminates looking up the same page more than once. 1063247Sgjelinek * 1073247Sgjelinek * - incore/not-incore page ranges for all shared amps. 1083247Sgjelinek * (vmu_data.vmu_all_amps_hash) 1093247Sgjelinek * This eliminates looking up the same page more than once. 1103247Sgjelinek * 1113247Sgjelinek * - visited page ranges for each collective. 1123247Sgjelinek * - per vnode (entity->vme_vnode_hash) 1133247Sgjelinek * - per shared amp (entity->vme_amp_hash) 11410093SPeter.Telford@Sun.COM * For accurate counting of map-shared and COW-shared pages. 1153247Sgjelinek * 1163247Sgjelinek * - visited private anons (refcnt > 1) for each collective. 1173247Sgjelinek * (entity->vme_anon_hash) 11810093SPeter.Telford@Sun.COM * For accurate counting of COW-shared pages. 1193247Sgjelinek * 1203247Sgjelinek * The common accounting structure is the vmu_entity_t, which represents 1213247Sgjelinek * collectives: 1223247Sgjelinek * 1233247Sgjelinek * - A zone. 1243247Sgjelinek * - A project, task, or user within a zone. 1253247Sgjelinek * - The entire system (vmu_data.vmu_system). 1263247Sgjelinek * - Each collapsed (col) project and user. This means a given projid or 1273247Sgjelinek * uid, regardless of which zone the process is in. For instance, 1283247Sgjelinek * project 0 in the global zone and project 0 in a non global zone are 1293247Sgjelinek * the same collapsed project. 1303247Sgjelinek * 1313247Sgjelinek * Each entity structure tracks which pages have been already visited for 1323247Sgjelinek * that entity (via previously inspected processes) so that these pages are 1333247Sgjelinek * not double counted. 1343247Sgjelinek */ 1353247Sgjelinek 1363247Sgjelinek #include <sys/errno.h> 1373247Sgjelinek #include <sys/types.h> 1383247Sgjelinek #include <sys/zone.h> 1393247Sgjelinek #include <sys/proc.h> 1403247Sgjelinek #include <sys/project.h> 1413247Sgjelinek #include <sys/task.h> 1423247Sgjelinek #include <sys/thread.h> 1433247Sgjelinek #include <sys/time.h> 1443247Sgjelinek #include <sys/mman.h> 1453247Sgjelinek #include <sys/modhash.h> 1463247Sgjelinek #include <sys/modhash_impl.h> 1473247Sgjelinek #include <sys/shm.h> 1483247Sgjelinek #include <sys/swap.h> 1493247Sgjelinek #include <sys/synch.h> 1503247Sgjelinek #include <sys/systm.h> 1513247Sgjelinek #include <sys/var.h> 1523247Sgjelinek #include <sys/vm_usage.h> 1533247Sgjelinek #include <sys/zone.h> 1547884Sgerald.jelinek@sun.com #include <sys/sunddi.h> 15510093SPeter.Telford@Sun.COM #include <sys/avl.h> 1563247Sgjelinek #include <vm/anon.h> 1573247Sgjelinek #include <vm/as.h> 1583247Sgjelinek #include <vm/seg_vn.h> 1593247Sgjelinek #include <vm/seg_spt.h> 1603247Sgjelinek 1613247Sgjelinek #define VMUSAGE_HASH_SIZE 512 1623247Sgjelinek 1633247Sgjelinek #define VMUSAGE_TYPE_VNODE 1 1643247Sgjelinek #define VMUSAGE_TYPE_AMP 2 1653247Sgjelinek #define VMUSAGE_TYPE_ANON 3 1663247Sgjelinek 1673247Sgjelinek #define VMUSAGE_BOUND_UNKNOWN 0 1683247Sgjelinek #define VMUSAGE_BOUND_INCORE 1 1693247Sgjelinek #define VMUSAGE_BOUND_NOT_INCORE 2 1703247Sgjelinek 17110093SPeter.Telford@Sun.COM #define ISWITHIN(node, addr) ((node)->vmb_start <= addr && \ 17210093SPeter.Telford@Sun.COM (node)->vmb_end >= addr ? 1 : 0) 17310093SPeter.Telford@Sun.COM 1743247Sgjelinek /* 1753247Sgjelinek * bounds for vnodes and shared amps 1763247Sgjelinek * Each bound is either entirely incore, entirely not in core, or 17710093SPeter.Telford@Sun.COM * entirely unknown. bounds are stored in an avl tree sorted by start member 17810093SPeter.Telford@Sun.COM * when in use, otherwise (free or temporary lists) they're strung 17910093SPeter.Telford@Sun.COM * together off of vmb_next. 1803247Sgjelinek */ 1813247Sgjelinek typedef struct vmu_bound { 18210093SPeter.Telford@Sun.COM avl_node_t vmb_node; 18310093SPeter.Telford@Sun.COM struct vmu_bound *vmb_next; /* NULL in tree else on free or temp list */ 1843247Sgjelinek pgcnt_t vmb_start; /* page offset in vnode/amp on which bound starts */ 1853247Sgjelinek pgcnt_t vmb_end; /* page offset in vnode/amp on which bound ends */ 1863247Sgjelinek char vmb_type; /* One of VMUSAGE_BOUND_* */ 1873247Sgjelinek } vmu_bound_t; 1883247Sgjelinek 1893247Sgjelinek /* 1903247Sgjelinek * hash of visited objects (vnodes or shared amps) 1913247Sgjelinek * key is address of vnode or amp. Bounds lists known incore/non-incore 1923247Sgjelinek * bounds for vnode/amp. 1933247Sgjelinek */ 1943247Sgjelinek typedef struct vmu_object { 1953247Sgjelinek struct vmu_object *vmo_next; /* free list */ 1963247Sgjelinek caddr_t vmo_key; 1973247Sgjelinek short vmo_type; 19810093SPeter.Telford@Sun.COM avl_tree_t vmo_bounds; 1993247Sgjelinek } vmu_object_t; 2003247Sgjelinek 2013247Sgjelinek /* 2023247Sgjelinek * Entity by which to count results. 2033247Sgjelinek * 2043247Sgjelinek * The entity structure keeps the current rss/swap counts for each entity 2053247Sgjelinek * (zone, project, etc), and hashes of vm structures that have already 2063247Sgjelinek * been visited for the entity. 2073247Sgjelinek * 2083247Sgjelinek * vme_next: links the list of all entities currently being counted by 2093247Sgjelinek * vmu_calculate(). 2103247Sgjelinek * 2113247Sgjelinek * vme_next_calc: links the list of entities related to the current process 2123247Sgjelinek * being counted by vmu_calculate_proc(). 2133247Sgjelinek * 2143247Sgjelinek * vmu_calculate_proc() walks all processes. For each process, it makes a 2153247Sgjelinek * list of the entities related to that process using vme_next_calc. This 2163247Sgjelinek * list changes each time vmu_calculate_proc() is called. 2173247Sgjelinek * 2183247Sgjelinek */ 2193247Sgjelinek typedef struct vmu_entity { 2203247Sgjelinek struct vmu_entity *vme_next; 2213247Sgjelinek struct vmu_entity *vme_next_calc; 2223247Sgjelinek mod_hash_t *vme_vnode_hash; /* vnodes visited for entity */ 2233247Sgjelinek mod_hash_t *vme_amp_hash; /* shared amps visited for entity */ 22410093SPeter.Telford@Sun.COM mod_hash_t *vme_anon_hash; /* COW anons visited for entity */ 2253247Sgjelinek vmusage_t vme_result; /* identifies entity and results */ 2263247Sgjelinek } vmu_entity_t; 2273247Sgjelinek 2283247Sgjelinek /* 2293247Sgjelinek * Hash of entities visited within a zone, and an entity for the zone 2303247Sgjelinek * itself. 2313247Sgjelinek */ 2323247Sgjelinek typedef struct vmu_zone { 2333247Sgjelinek struct vmu_zone *vmz_next; /* free list */ 2343247Sgjelinek id_t vmz_id; 2353247Sgjelinek vmu_entity_t *vmz_zone; 2363247Sgjelinek mod_hash_t *vmz_projects_hash; 2373247Sgjelinek mod_hash_t *vmz_tasks_hash; 2383247Sgjelinek mod_hash_t *vmz_rusers_hash; 2393247Sgjelinek mod_hash_t *vmz_eusers_hash; 2403247Sgjelinek } vmu_zone_t; 2413247Sgjelinek 2423247Sgjelinek /* 2433247Sgjelinek * Cache of results from last calculation 2443247Sgjelinek */ 2453247Sgjelinek typedef struct vmu_cache { 2463247Sgjelinek vmusage_t *vmc_results; /* Results from last call to */ 2473247Sgjelinek /* vm_getusage(). */ 2483247Sgjelinek uint64_t vmc_nresults; /* Count of cached results */ 2493247Sgjelinek uint64_t vmc_refcnt; /* refcnt for free */ 2503247Sgjelinek uint_t vmc_flags; /* Flags for vm_getusage() */ 2513247Sgjelinek hrtime_t vmc_timestamp; /* when cache was created */ 2523247Sgjelinek } vmu_cache_t; 2533247Sgjelinek 2543247Sgjelinek /* 2553247Sgjelinek * top level rss info for the system 2563247Sgjelinek */ 2573247Sgjelinek typedef struct vmu_data { 2583247Sgjelinek kmutex_t vmu_lock; /* Protects vmu_data */ 2593247Sgjelinek kcondvar_t vmu_cv; /* Used to signal threads */ 2603247Sgjelinek /* Waiting for */ 2613247Sgjelinek /* Rss_calc_thread to finish */ 2623247Sgjelinek vmu_entity_t *vmu_system; /* Entity for tracking */ 2633247Sgjelinek /* rss/swap for all processes */ 2643247Sgjelinek /* in all zones */ 2653247Sgjelinek mod_hash_t *vmu_zones_hash; /* Zones visited */ 2663247Sgjelinek mod_hash_t *vmu_projects_col_hash; /* These *_col_hash hashes */ 2673247Sgjelinek mod_hash_t *vmu_rusers_col_hash; /* keep track of entities, */ 2683247Sgjelinek mod_hash_t *vmu_eusers_col_hash; /* ignoring zoneid, in order */ 2693247Sgjelinek /* to implement VMUSAGE_COL_* */ 2703247Sgjelinek /* flags, which aggregate by */ 2713247Sgjelinek /* project or user regardless */ 2723247Sgjelinek /* of zoneid. */ 2733247Sgjelinek mod_hash_t *vmu_all_vnodes_hash; /* System wide visited vnodes */ 2743247Sgjelinek /* to track incore/not-incore */ 2753247Sgjelinek mod_hash_t *vmu_all_amps_hash; /* System wide visited shared */ 2763247Sgjelinek /* amps to track incore/not- */ 2773247Sgjelinek /* incore */ 2783247Sgjelinek vmu_entity_t *vmu_entities; /* Linked list of entities */ 2793247Sgjelinek size_t vmu_nentities; /* Count of entities in list */ 2803247Sgjelinek vmu_cache_t *vmu_cache; /* Cached results */ 2813247Sgjelinek kthread_t *vmu_calc_thread; /* NULL, or thread running */ 2823247Sgjelinek /* vmu_calculate() */ 2833247Sgjelinek uint_t vmu_calc_flags; /* Flags being using by */ 2843247Sgjelinek /* currently running calc */ 2853247Sgjelinek /* thread */ 2863247Sgjelinek uint_t vmu_pending_flags; /* Flags of vm_getusage() */ 2873247Sgjelinek /* threads waiting for */ 2883247Sgjelinek /* calc thread to finish */ 2893247Sgjelinek uint_t vmu_pending_waiters; /* Number of threads waiting */ 2903247Sgjelinek /* for calc thread */ 2913247Sgjelinek vmu_bound_t *vmu_free_bounds; 2923247Sgjelinek vmu_object_t *vmu_free_objects; 2933247Sgjelinek vmu_entity_t *vmu_free_entities; 2943247Sgjelinek vmu_zone_t *vmu_free_zones; 2953247Sgjelinek } vmu_data_t; 2963247Sgjelinek 2973247Sgjelinek extern struct as kas; 2983247Sgjelinek extern proc_t *practive; 2993247Sgjelinek extern zone_t *global_zone; 3003247Sgjelinek extern struct seg_ops segvn_ops; 3013247Sgjelinek extern struct seg_ops segspt_shmops; 3023247Sgjelinek 3033247Sgjelinek static vmu_data_t vmu_data; 3043247Sgjelinek static kmem_cache_t *vmu_bound_cache; 3053247Sgjelinek static kmem_cache_t *vmu_object_cache; 3063247Sgjelinek 3073247Sgjelinek /* 30810093SPeter.Telford@Sun.COM * Comparison routine for AVL tree. We base our comparison on vmb_start. 30910093SPeter.Telford@Sun.COM */ 31010093SPeter.Telford@Sun.COM static int 31110093SPeter.Telford@Sun.COM bounds_cmp(const void *bnd1, const void *bnd2) 31210093SPeter.Telford@Sun.COM { 31310093SPeter.Telford@Sun.COM const vmu_bound_t *bound1 = bnd1; 31410093SPeter.Telford@Sun.COM const vmu_bound_t *bound2 = bnd2; 31510093SPeter.Telford@Sun.COM 31610093SPeter.Telford@Sun.COM if (bound1->vmb_start == bound2->vmb_start) { 31710093SPeter.Telford@Sun.COM return (0); 31810093SPeter.Telford@Sun.COM } 31910093SPeter.Telford@Sun.COM if (bound1->vmb_start < bound2->vmb_start) { 32010093SPeter.Telford@Sun.COM return (-1); 32110093SPeter.Telford@Sun.COM } 32210093SPeter.Telford@Sun.COM 32310093SPeter.Telford@Sun.COM return (1); 32410093SPeter.Telford@Sun.COM } 32510093SPeter.Telford@Sun.COM 32610093SPeter.Telford@Sun.COM /* 32710093SPeter.Telford@Sun.COM * Save a bound on the free list. 3283247Sgjelinek */ 3293247Sgjelinek static void 3303247Sgjelinek vmu_free_bound(vmu_bound_t *bound) 3313247Sgjelinek { 3323247Sgjelinek bound->vmb_next = vmu_data.vmu_free_bounds; 33310093SPeter.Telford@Sun.COM bound->vmb_start = 0; 33410093SPeter.Telford@Sun.COM bound->vmb_end = 0; 33510093SPeter.Telford@Sun.COM bound->vmb_type = 0; 3363247Sgjelinek vmu_data.vmu_free_bounds = bound; 3373247Sgjelinek } 3383247Sgjelinek 3393247Sgjelinek /* 3403247Sgjelinek * Free an object, and all visited bound info. 3413247Sgjelinek */ 3423247Sgjelinek static void 3433247Sgjelinek vmu_free_object(mod_hash_val_t val) 3443247Sgjelinek { 3453247Sgjelinek vmu_object_t *obj = (vmu_object_t *)val; 34610093SPeter.Telford@Sun.COM avl_tree_t *tree = &(obj->vmo_bounds); 34710093SPeter.Telford@Sun.COM vmu_bound_t *bound; 34810093SPeter.Telford@Sun.COM void *cookie = NULL; 3493247Sgjelinek 35010093SPeter.Telford@Sun.COM while ((bound = avl_destroy_nodes(tree, &cookie)) != NULL) 35110093SPeter.Telford@Sun.COM vmu_free_bound(bound); 35210093SPeter.Telford@Sun.COM avl_destroy(tree); 35310093SPeter.Telford@Sun.COM 35410093SPeter.Telford@Sun.COM obj->vmo_type = 0; 3553247Sgjelinek obj->vmo_next = vmu_data.vmu_free_objects; 3563247Sgjelinek vmu_data.vmu_free_objects = obj; 3573247Sgjelinek } 3583247Sgjelinek 3593247Sgjelinek /* 3603247Sgjelinek * Free an entity, and hashes of visited objects for that entity. 3613247Sgjelinek */ 3623247Sgjelinek static void 3633247Sgjelinek vmu_free_entity(mod_hash_val_t val) 3643247Sgjelinek { 3653247Sgjelinek vmu_entity_t *entity = (vmu_entity_t *)val; 3663247Sgjelinek 3673247Sgjelinek if (entity->vme_vnode_hash != NULL) 3683247Sgjelinek i_mod_hash_clear_nosync(entity->vme_vnode_hash); 3693247Sgjelinek if (entity->vme_amp_hash != NULL) 3703247Sgjelinek i_mod_hash_clear_nosync(entity->vme_amp_hash); 3713247Sgjelinek if (entity->vme_anon_hash != NULL) 3723247Sgjelinek i_mod_hash_clear_nosync(entity->vme_anon_hash); 3733247Sgjelinek 3743247Sgjelinek entity->vme_next = vmu_data.vmu_free_entities; 3753247Sgjelinek vmu_data.vmu_free_entities = entity; 3763247Sgjelinek } 3773247Sgjelinek 3783247Sgjelinek /* 3793247Sgjelinek * Free zone entity, and all hashes of entities inside that zone, 3803247Sgjelinek * which are projects, tasks, and users. 3813247Sgjelinek */ 3823247Sgjelinek static void 3833247Sgjelinek vmu_free_zone(mod_hash_val_t val) 3843247Sgjelinek { 3853247Sgjelinek vmu_zone_t *zone = (vmu_zone_t *)val; 3863247Sgjelinek 3873247Sgjelinek if (zone->vmz_zone != NULL) { 3883247Sgjelinek vmu_free_entity((mod_hash_val_t)zone->vmz_zone); 3893247Sgjelinek zone->vmz_zone = NULL; 3903247Sgjelinek } 3913247Sgjelinek if (zone->vmz_projects_hash != NULL) 3923247Sgjelinek i_mod_hash_clear_nosync(zone->vmz_projects_hash); 3933247Sgjelinek if (zone->vmz_tasks_hash != NULL) 3943247Sgjelinek i_mod_hash_clear_nosync(zone->vmz_tasks_hash); 3953247Sgjelinek if (zone->vmz_rusers_hash != NULL) 3963247Sgjelinek i_mod_hash_clear_nosync(zone->vmz_rusers_hash); 3973247Sgjelinek if (zone->vmz_eusers_hash != NULL) 3983247Sgjelinek i_mod_hash_clear_nosync(zone->vmz_eusers_hash); 3993247Sgjelinek zone->vmz_next = vmu_data.vmu_free_zones; 4003247Sgjelinek vmu_data.vmu_free_zones = zone; 4013247Sgjelinek } 4023247Sgjelinek 4033247Sgjelinek /* 4043247Sgjelinek * Initialize synchronization primitives and hashes for system-wide tracking 4053247Sgjelinek * of visited vnodes and shared amps. Initialize results cache. 4063247Sgjelinek */ 4073247Sgjelinek void 4083247Sgjelinek vm_usage_init() 4093247Sgjelinek { 4103247Sgjelinek mutex_init(&vmu_data.vmu_lock, NULL, MUTEX_DEFAULT, NULL); 4113247Sgjelinek cv_init(&vmu_data.vmu_cv, NULL, CV_DEFAULT, NULL); 4123247Sgjelinek 4133247Sgjelinek vmu_data.vmu_system = NULL; 4143247Sgjelinek vmu_data.vmu_zones_hash = NULL; 4153247Sgjelinek vmu_data.vmu_projects_col_hash = NULL; 4163247Sgjelinek vmu_data.vmu_rusers_col_hash = NULL; 4173247Sgjelinek vmu_data.vmu_eusers_col_hash = NULL; 4183247Sgjelinek 4193247Sgjelinek vmu_data.vmu_free_bounds = NULL; 4203247Sgjelinek vmu_data.vmu_free_objects = NULL; 4213247Sgjelinek vmu_data.vmu_free_entities = NULL; 4223247Sgjelinek vmu_data.vmu_free_zones = NULL; 4233247Sgjelinek 4243247Sgjelinek vmu_data.vmu_all_vnodes_hash = mod_hash_create_ptrhash( 4253247Sgjelinek "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object, 4263247Sgjelinek sizeof (vnode_t)); 4273247Sgjelinek vmu_data.vmu_all_amps_hash = mod_hash_create_ptrhash( 4283247Sgjelinek "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object, 4293247Sgjelinek sizeof (struct anon_map)); 4303247Sgjelinek vmu_data.vmu_projects_col_hash = mod_hash_create_idhash( 4313247Sgjelinek "vmusage collapsed project hash", VMUSAGE_HASH_SIZE, 4323247Sgjelinek vmu_free_entity); 4333247Sgjelinek vmu_data.vmu_rusers_col_hash = mod_hash_create_idhash( 4343247Sgjelinek "vmusage collapsed ruser hash", VMUSAGE_HASH_SIZE, 4353247Sgjelinek vmu_free_entity); 4363247Sgjelinek vmu_data.vmu_eusers_col_hash = mod_hash_create_idhash( 4373247Sgjelinek "vmusage collpased euser hash", VMUSAGE_HASH_SIZE, 4383247Sgjelinek vmu_free_entity); 4393247Sgjelinek vmu_data.vmu_zones_hash = mod_hash_create_idhash( 4403247Sgjelinek "vmusage zone hash", VMUSAGE_HASH_SIZE, vmu_free_zone); 4413247Sgjelinek 4423247Sgjelinek vmu_bound_cache = kmem_cache_create("vmu_bound_cache", 4433247Sgjelinek sizeof (vmu_bound_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 4443247Sgjelinek vmu_object_cache = kmem_cache_create("vmu_object_cache", 4453247Sgjelinek sizeof (vmu_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 4463247Sgjelinek 4473247Sgjelinek vmu_data.vmu_entities = NULL; 4483247Sgjelinek vmu_data.vmu_nentities = 0; 4493247Sgjelinek 4503247Sgjelinek vmu_data.vmu_cache = NULL; 4513247Sgjelinek vmu_data.vmu_calc_thread = NULL; 4523247Sgjelinek vmu_data.vmu_calc_flags = 0; 4533247Sgjelinek vmu_data.vmu_pending_flags = 0; 4543247Sgjelinek vmu_data.vmu_pending_waiters = 0; 4553247Sgjelinek } 4563247Sgjelinek 4573247Sgjelinek /* 4583247Sgjelinek * Allocate hashes for tracking vm objects visited for an entity. 4593247Sgjelinek * Update list of entities. 4603247Sgjelinek */ 4613247Sgjelinek static vmu_entity_t * 4623247Sgjelinek vmu_alloc_entity(id_t id, int type, id_t zoneid) 4633247Sgjelinek { 4643247Sgjelinek vmu_entity_t *entity; 4653247Sgjelinek 4663247Sgjelinek if (vmu_data.vmu_free_entities != NULL) { 4673247Sgjelinek entity = vmu_data.vmu_free_entities; 4683247Sgjelinek vmu_data.vmu_free_entities = 4693247Sgjelinek vmu_data.vmu_free_entities->vme_next; 4703247Sgjelinek bzero(&entity->vme_result, sizeof (vmusage_t)); 4713247Sgjelinek } else { 4723247Sgjelinek entity = kmem_zalloc(sizeof (vmu_entity_t), KM_SLEEP); 4733247Sgjelinek } 4743247Sgjelinek entity->vme_result.vmu_id = id; 4753247Sgjelinek entity->vme_result.vmu_zoneid = zoneid; 4763247Sgjelinek entity->vme_result.vmu_type = type; 4773247Sgjelinek 4783247Sgjelinek if (entity->vme_vnode_hash == NULL) 4793247Sgjelinek entity->vme_vnode_hash = mod_hash_create_ptrhash( 4803247Sgjelinek "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object, 4813247Sgjelinek sizeof (vnode_t)); 4823247Sgjelinek 4833247Sgjelinek if (entity->vme_amp_hash == NULL) 4843247Sgjelinek entity->vme_amp_hash = mod_hash_create_ptrhash( 4853247Sgjelinek "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object, 4863247Sgjelinek sizeof (struct anon_map)); 4873247Sgjelinek 4883247Sgjelinek if (entity->vme_anon_hash == NULL) 4893247Sgjelinek entity->vme_anon_hash = mod_hash_create_ptrhash( 4903247Sgjelinek "vmusage anon hash", VMUSAGE_HASH_SIZE, 4913247Sgjelinek mod_hash_null_valdtor, sizeof (struct anon)); 4923247Sgjelinek 4933247Sgjelinek entity->vme_next = vmu_data.vmu_entities; 4943247Sgjelinek vmu_data.vmu_entities = entity; 4953247Sgjelinek vmu_data.vmu_nentities++; 4963247Sgjelinek 4973247Sgjelinek return (entity); 4983247Sgjelinek } 4993247Sgjelinek 5003247Sgjelinek /* 5013247Sgjelinek * Allocate a zone entity, and hashes for tracking visited vm objects 5023247Sgjelinek * for projects, tasks, and users within that zone. 5033247Sgjelinek */ 5043247Sgjelinek static vmu_zone_t * 5053247Sgjelinek vmu_alloc_zone(id_t id) 5063247Sgjelinek { 5073247Sgjelinek vmu_zone_t *zone; 5083247Sgjelinek 5093247Sgjelinek if (vmu_data.vmu_free_zones != NULL) { 5103247Sgjelinek zone = vmu_data.vmu_free_zones; 5113247Sgjelinek vmu_data.vmu_free_zones = 5123247Sgjelinek vmu_data.vmu_free_zones->vmz_next; 5133247Sgjelinek zone->vmz_next = NULL; 5143247Sgjelinek zone->vmz_zone = NULL; 5153247Sgjelinek } else { 5163247Sgjelinek zone = kmem_zalloc(sizeof (vmu_zone_t), KM_SLEEP); 5173247Sgjelinek } 5183247Sgjelinek 5193247Sgjelinek zone->vmz_id = id; 5203247Sgjelinek 5213247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES)) != 0) 5223247Sgjelinek zone->vmz_zone = vmu_alloc_entity(id, VMUSAGE_ZONE, id); 5233247Sgjelinek 5243247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_PROJECTS | 5253247Sgjelinek VMUSAGE_ALL_PROJECTS)) != 0 && zone->vmz_projects_hash == NULL) 5263247Sgjelinek zone->vmz_projects_hash = mod_hash_create_idhash( 5273247Sgjelinek "vmusage project hash", VMUSAGE_HASH_SIZE, vmu_free_entity); 5283247Sgjelinek 5293247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS)) 5303247Sgjelinek != 0 && zone->vmz_tasks_hash == NULL) 5313247Sgjelinek zone->vmz_tasks_hash = mod_hash_create_idhash( 5323247Sgjelinek "vmusage task hash", VMUSAGE_HASH_SIZE, vmu_free_entity); 5333247Sgjelinek 5343247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS)) 5353247Sgjelinek != 0 && zone->vmz_rusers_hash == NULL) 5363247Sgjelinek zone->vmz_rusers_hash = mod_hash_create_idhash( 5373247Sgjelinek "vmusage ruser hash", VMUSAGE_HASH_SIZE, vmu_free_entity); 5383247Sgjelinek 5393247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS)) 5403247Sgjelinek != 0 && zone->vmz_eusers_hash == NULL) 5413247Sgjelinek zone->vmz_eusers_hash = mod_hash_create_idhash( 5423247Sgjelinek "vmusage euser hash", VMUSAGE_HASH_SIZE, vmu_free_entity); 5433247Sgjelinek 5443247Sgjelinek return (zone); 5453247Sgjelinek } 5463247Sgjelinek 5473247Sgjelinek /* 5483247Sgjelinek * Allocate a structure for tracking visited bounds for a vm object. 5493247Sgjelinek */ 5503247Sgjelinek static vmu_object_t * 5513247Sgjelinek vmu_alloc_object(caddr_t key, int type) 5523247Sgjelinek { 5533247Sgjelinek vmu_object_t *object; 5543247Sgjelinek 5553247Sgjelinek if (vmu_data.vmu_free_objects != NULL) { 5563247Sgjelinek object = vmu_data.vmu_free_objects; 5573247Sgjelinek vmu_data.vmu_free_objects = 5583247Sgjelinek vmu_data.vmu_free_objects->vmo_next; 5593247Sgjelinek } else { 5603247Sgjelinek object = kmem_cache_alloc(vmu_object_cache, KM_SLEEP); 5613247Sgjelinek } 5623247Sgjelinek 56310093SPeter.Telford@Sun.COM object->vmo_next = NULL; 5643247Sgjelinek object->vmo_key = key; 5653247Sgjelinek object->vmo_type = type; 56610093SPeter.Telford@Sun.COM avl_create(&(object->vmo_bounds), bounds_cmp, sizeof (vmu_bound_t), 0); 5673247Sgjelinek 5683247Sgjelinek return (object); 5693247Sgjelinek } 5703247Sgjelinek 5713247Sgjelinek /* 5723247Sgjelinek * Allocate and return a bound structure. 5733247Sgjelinek */ 5743247Sgjelinek static vmu_bound_t * 5753247Sgjelinek vmu_alloc_bound() 5763247Sgjelinek { 5773247Sgjelinek vmu_bound_t *bound; 5783247Sgjelinek 5793247Sgjelinek if (vmu_data.vmu_free_bounds != NULL) { 5803247Sgjelinek bound = vmu_data.vmu_free_bounds; 5813247Sgjelinek vmu_data.vmu_free_bounds = 5823247Sgjelinek vmu_data.vmu_free_bounds->vmb_next; 5833247Sgjelinek } else { 5843247Sgjelinek bound = kmem_cache_alloc(vmu_bound_cache, KM_SLEEP); 5853247Sgjelinek } 58610093SPeter.Telford@Sun.COM 58710093SPeter.Telford@Sun.COM bound->vmb_next = NULL; 58810093SPeter.Telford@Sun.COM bound->vmb_start = 0; 58910093SPeter.Telford@Sun.COM bound->vmb_end = 0; 59010093SPeter.Telford@Sun.COM bound->vmb_type = 0; 5913247Sgjelinek return (bound); 5923247Sgjelinek } 5933247Sgjelinek 5943247Sgjelinek /* 5953247Sgjelinek * vmu_find_insert_* functions implement hash lookup or allocate and 5963247Sgjelinek * insert operations. 5973247Sgjelinek */ 5983247Sgjelinek static vmu_object_t * 5993247Sgjelinek vmu_find_insert_object(mod_hash_t *hash, caddr_t key, uint_t type) 6003247Sgjelinek { 6013247Sgjelinek int ret; 6023247Sgjelinek vmu_object_t *object; 6033247Sgjelinek 6043247Sgjelinek ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key, 6053247Sgjelinek (mod_hash_val_t *)&object); 6063247Sgjelinek if (ret != 0) { 6073247Sgjelinek object = vmu_alloc_object(key, type); 6083247Sgjelinek ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key, 6093247Sgjelinek (mod_hash_val_t)object, (mod_hash_hndl_t)0); 6103247Sgjelinek ASSERT(ret == 0); 6113247Sgjelinek } 6123247Sgjelinek return (object); 6133247Sgjelinek } 6143247Sgjelinek 6153247Sgjelinek static int 6163247Sgjelinek vmu_find_insert_anon(mod_hash_t *hash, caddr_t key) 6173247Sgjelinek { 6183247Sgjelinek int ret; 6193247Sgjelinek caddr_t val; 6203247Sgjelinek 6213247Sgjelinek ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key, 6223247Sgjelinek (mod_hash_val_t *)&val); 6233247Sgjelinek 6243247Sgjelinek if (ret == 0) 6253247Sgjelinek return (0); 6263247Sgjelinek 6273247Sgjelinek ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key, 6283247Sgjelinek (mod_hash_val_t)key, (mod_hash_hndl_t)0); 6293247Sgjelinek 6303247Sgjelinek ASSERT(ret == 0); 6313247Sgjelinek 6323247Sgjelinek return (1); 6333247Sgjelinek } 6343247Sgjelinek 6353247Sgjelinek static vmu_entity_t * 6363247Sgjelinek vmu_find_insert_entity(mod_hash_t *hash, id_t id, uint_t type, id_t zoneid) 6373247Sgjelinek { 6383247Sgjelinek int ret; 6393247Sgjelinek vmu_entity_t *entity; 6403247Sgjelinek 6413247Sgjelinek ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)(uintptr_t)id, 6423247Sgjelinek (mod_hash_val_t *)&entity); 6433247Sgjelinek if (ret != 0) { 6443247Sgjelinek entity = vmu_alloc_entity(id, type, zoneid); 6453247Sgjelinek ret = i_mod_hash_insert_nosync(hash, 6463247Sgjelinek (mod_hash_key_t)(uintptr_t)id, (mod_hash_val_t)entity, 6473247Sgjelinek (mod_hash_hndl_t)0); 6483247Sgjelinek ASSERT(ret == 0); 6493247Sgjelinek } 6503247Sgjelinek return (entity); 6513247Sgjelinek } 6523247Sgjelinek 6533247Sgjelinek 6543247Sgjelinek 6553247Sgjelinek 6563247Sgjelinek /* 6573247Sgjelinek * Returns list of object bounds between start and end. New bounds inserted 6583247Sgjelinek * by this call are given type. 6593247Sgjelinek * 6603247Sgjelinek * Returns the number of pages covered if new bounds are created. Returns 0 6613247Sgjelinek * if region between start/end consists of all existing bounds. 6623247Sgjelinek */ 6633247Sgjelinek static pgcnt_t 6643247Sgjelinek vmu_insert_lookup_object_bounds(vmu_object_t *ro, pgcnt_t start, pgcnt_t 6653247Sgjelinek end, char type, vmu_bound_t **first, vmu_bound_t **last) 6663247Sgjelinek { 66710093SPeter.Telford@Sun.COM avl_tree_t *tree = &(ro->vmo_bounds); 66810093SPeter.Telford@Sun.COM avl_index_t where; 66910093SPeter.Telford@Sun.COM vmu_bound_t *walker, *tmp; 67010093SPeter.Telford@Sun.COM pgcnt_t ret = 0; 67110093SPeter.Telford@Sun.COM 67210093SPeter.Telford@Sun.COM ASSERT(start <= end); 6733247Sgjelinek 6743247Sgjelinek *first = *last = NULL; 6753247Sgjelinek 67610093SPeter.Telford@Sun.COM tmp = vmu_alloc_bound(); 67710093SPeter.Telford@Sun.COM tmp->vmb_start = start; 67810093SPeter.Telford@Sun.COM tmp->vmb_type = type; 67910093SPeter.Telford@Sun.COM 68010093SPeter.Telford@Sun.COM /* Hopelessly optimistic case. */ 68110093SPeter.Telford@Sun.COM if (walker = avl_find(tree, tmp, &where)) { 68210093SPeter.Telford@Sun.COM /* We got lucky. */ 68310093SPeter.Telford@Sun.COM vmu_free_bound(tmp); 68410093SPeter.Telford@Sun.COM *first = walker; 68510093SPeter.Telford@Sun.COM } 68610093SPeter.Telford@Sun.COM 68710093SPeter.Telford@Sun.COM if (walker == NULL) { 68810093SPeter.Telford@Sun.COM /* Is start in the previous node? */ 68910093SPeter.Telford@Sun.COM walker = avl_nearest(tree, where, AVL_BEFORE); 69010093SPeter.Telford@Sun.COM if (walker != NULL) { 69110093SPeter.Telford@Sun.COM if (ISWITHIN(walker, start)) { 69210093SPeter.Telford@Sun.COM /* We found start. */ 69310093SPeter.Telford@Sun.COM vmu_free_bound(tmp); 69410093SPeter.Telford@Sun.COM *first = walker; 69510093SPeter.Telford@Sun.COM } 6963247Sgjelinek } 6973247Sgjelinek } 6983247Sgjelinek 69910093SPeter.Telford@Sun.COM /* 70010093SPeter.Telford@Sun.COM * At this point, if *first is still NULL, then we 70110093SPeter.Telford@Sun.COM * didn't get a direct hit and start isn't covered 70210093SPeter.Telford@Sun.COM * by the previous node. We know that the next node 70310093SPeter.Telford@Sun.COM * must have a greater start value than we require 70410093SPeter.Telford@Sun.COM * because avl_find tells us where the AVL routines would 70510093SPeter.Telford@Sun.COM * insert our new node. We have some gap between the 70610093SPeter.Telford@Sun.COM * start we want and the next node. 70710093SPeter.Telford@Sun.COM */ 7083247Sgjelinek if (*first == NULL) { 70910093SPeter.Telford@Sun.COM walker = avl_nearest(tree, where, AVL_AFTER); 71010093SPeter.Telford@Sun.COM if (walker != NULL && walker->vmb_start <= end) { 71110093SPeter.Telford@Sun.COM /* Fill the gap. */ 71210093SPeter.Telford@Sun.COM tmp->vmb_end = walker->vmb_start - 1; 71310093SPeter.Telford@Sun.COM *first = tmp; 7143247Sgjelinek } else { 71510093SPeter.Telford@Sun.COM /* We have a gap over [start, end]. */ 71610093SPeter.Telford@Sun.COM tmp->vmb_end = end; 71710093SPeter.Telford@Sun.COM *first = *last = tmp; 7183247Sgjelinek } 71910093SPeter.Telford@Sun.COM ret += tmp->vmb_end - tmp->vmb_start + 1; 72010093SPeter.Telford@Sun.COM avl_insert(tree, tmp, where); 72110093SPeter.Telford@Sun.COM } 72210093SPeter.Telford@Sun.COM 72310093SPeter.Telford@Sun.COM ASSERT(*first != NULL); 72410093SPeter.Telford@Sun.COM 72510093SPeter.Telford@Sun.COM if (*last != NULL) { 72610093SPeter.Telford@Sun.COM /* We're done. */ 7273247Sgjelinek return (ret); 7283247Sgjelinek } 7293247Sgjelinek 7303247Sgjelinek /* 73110093SPeter.Telford@Sun.COM * If we are here we still need to set *last and 73210093SPeter.Telford@Sun.COM * that may involve filling in some gaps. 7333247Sgjelinek */ 73410093SPeter.Telford@Sun.COM *last = *first; 73510093SPeter.Telford@Sun.COM for (;;) { 73610093SPeter.Telford@Sun.COM if (ISWITHIN(*last, end)) { 73710093SPeter.Telford@Sun.COM /* We're done. */ 73810093SPeter.Telford@Sun.COM break; 7393247Sgjelinek } 74010093SPeter.Telford@Sun.COM walker = AVL_NEXT(tree, *last); 74110093SPeter.Telford@Sun.COM if (walker == NULL || walker->vmb_start > end) { 74210093SPeter.Telford@Sun.COM /* Bottom or mid tree with gap. */ 74310093SPeter.Telford@Sun.COM tmp = vmu_alloc_bound(); 74410093SPeter.Telford@Sun.COM tmp->vmb_start = (*last)->vmb_end + 1; 74510093SPeter.Telford@Sun.COM tmp->vmb_end = end; 746*10543SPeter.Telford@Sun.COM tmp->vmb_type = type; 7473247Sgjelinek ret += tmp->vmb_end - tmp->vmb_start + 1; 74810093SPeter.Telford@Sun.COM avl_insert_here(tree, tmp, *last, AVL_AFTER); 7493247Sgjelinek *last = tmp; 7503247Sgjelinek break; 75110093SPeter.Telford@Sun.COM } else { 75210093SPeter.Telford@Sun.COM if ((*last)->vmb_end + 1 != walker->vmb_start) { 75310093SPeter.Telford@Sun.COM /* Non-contiguous. */ 75410093SPeter.Telford@Sun.COM tmp = vmu_alloc_bound(); 75510093SPeter.Telford@Sun.COM tmp->vmb_start = (*last)->vmb_end + 1; 75610093SPeter.Telford@Sun.COM tmp->vmb_end = walker->vmb_start - 1; 757*10543SPeter.Telford@Sun.COM tmp->vmb_type = type; 75810093SPeter.Telford@Sun.COM ret += tmp->vmb_end - tmp->vmb_start + 1; 75910093SPeter.Telford@Sun.COM avl_insert_here(tree, tmp, *last, AVL_AFTER); 76010093SPeter.Telford@Sun.COM *last = tmp; 76110093SPeter.Telford@Sun.COM } else { 76210093SPeter.Telford@Sun.COM *last = walker; 76310093SPeter.Telford@Sun.COM } 7643247Sgjelinek } 7653247Sgjelinek } 76610093SPeter.Telford@Sun.COM 7673247Sgjelinek return (ret); 7683247Sgjelinek } 7693247Sgjelinek 7703247Sgjelinek /* 7713247Sgjelinek * vmu_update_bounds() 7723247Sgjelinek * 77310093SPeter.Telford@Sun.COM * tree: avl_tree in which first and last hang. 77410093SPeter.Telford@Sun.COM * 7753247Sgjelinek * first, last: list of continuous bounds, of which zero or more are of 7763247Sgjelinek * type VMUSAGE_BOUND_UNKNOWN. 7773247Sgjelinek * 77810093SPeter.Telford@Sun.COM * new_tree: avl_tree in which new_first and new_last hang. 77910093SPeter.Telford@Sun.COM * 7803247Sgjelinek * new_first, new_last: list of continuous bounds, of which none are of 7813247Sgjelinek * type VMUSAGE_BOUND_UNKNOWN. These bounds are used to 7823247Sgjelinek * update the types of bounds in (first,last) with 7833247Sgjelinek * type VMUSAGE_BOUND_UNKNOWN. 7843247Sgjelinek * 7853247Sgjelinek * For the list of bounds (first,last), this function updates any bounds 7863247Sgjelinek * with type VMUSAGE_BOUND_UNKNOWN using the type of the corresponding bound in 7873247Sgjelinek * the list (new_first, new_last). 7883247Sgjelinek * 7893247Sgjelinek * If a bound of type VMUSAGE_BOUND_UNKNOWN spans multiple bounds in the list 7903247Sgjelinek * (new_first, new_last), it will be split into multiple bounds. 7913247Sgjelinek * 7923247Sgjelinek * Return value: 7933247Sgjelinek * The number of pages in the list of bounds (first,last) that were of 7943247Sgjelinek * type VMUSAGE_BOUND_UNKNOWN, which have been updated to be of type 7953247Sgjelinek * VMUSAGE_BOUND_INCORE. 7963247Sgjelinek * 7973247Sgjelinek */ 7983247Sgjelinek static pgcnt_t 79910093SPeter.Telford@Sun.COM vmu_update_bounds(avl_tree_t *tree, vmu_bound_t **first, vmu_bound_t **last, 80010093SPeter.Telford@Sun.COM avl_tree_t *new_tree, vmu_bound_t *new_first, vmu_bound_t *new_last) 8013247Sgjelinek { 8023247Sgjelinek vmu_bound_t *next, *new_next, *tmp; 8033247Sgjelinek pgcnt_t rss = 0; 8043247Sgjelinek 8053247Sgjelinek next = *first; 8063247Sgjelinek new_next = new_first; 8073247Sgjelinek 8083671Ssl108498 /* 8093671Ssl108498 * Verify first and last bound are covered by new bounds if they 8103671Ssl108498 * have unknown type. 8113671Ssl108498 */ 8123671Ssl108498 ASSERT((*first)->vmb_type != VMUSAGE_BOUND_UNKNOWN || 81310093SPeter.Telford@Sun.COM (*first)->vmb_start >= new_first->vmb_start); 8143671Ssl108498 ASSERT((*last)->vmb_type != VMUSAGE_BOUND_UNKNOWN || 8153671Ssl108498 (*last)->vmb_end <= new_last->vmb_end); 8163247Sgjelinek for (;;) { 81710093SPeter.Telford@Sun.COM /* If bound already has type, proceed to next bound. */ 8183247Sgjelinek if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) { 8193247Sgjelinek if (next == *last) 8203247Sgjelinek break; 82110093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next); 8223247Sgjelinek continue; 8233247Sgjelinek } 8243247Sgjelinek while (new_next->vmb_end < next->vmb_start) 82510093SPeter.Telford@Sun.COM new_next = AVL_NEXT(new_tree, new_next); 8263247Sgjelinek ASSERT(new_next->vmb_type != VMUSAGE_BOUND_UNKNOWN); 8273247Sgjelinek next->vmb_type = new_next->vmb_type; 8283247Sgjelinek if (new_next->vmb_end < next->vmb_end) { 8293247Sgjelinek /* need to split bound */ 8303247Sgjelinek tmp = vmu_alloc_bound(); 8313247Sgjelinek tmp->vmb_type = VMUSAGE_BOUND_UNKNOWN; 8323247Sgjelinek tmp->vmb_start = new_next->vmb_end + 1; 8333247Sgjelinek tmp->vmb_end = next->vmb_end; 83410093SPeter.Telford@Sun.COM avl_insert_here(tree, tmp, next, AVL_AFTER); 8353247Sgjelinek next->vmb_end = new_next->vmb_end; 8363247Sgjelinek if (*last == next) 8373247Sgjelinek *last = tmp; 8383247Sgjelinek if (next->vmb_type == VMUSAGE_BOUND_INCORE) 8393247Sgjelinek rss += next->vmb_end - next->vmb_start + 1; 8403247Sgjelinek next = tmp; 8413247Sgjelinek } else { 8423247Sgjelinek if (next->vmb_type == VMUSAGE_BOUND_INCORE) 8433247Sgjelinek rss += next->vmb_end - next->vmb_start + 1; 8443247Sgjelinek if (next == *last) 8453247Sgjelinek break; 84610093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next); 8473247Sgjelinek } 8483247Sgjelinek } 8493247Sgjelinek return (rss); 8503247Sgjelinek } 8513247Sgjelinek 8523247Sgjelinek /* 85310093SPeter.Telford@Sun.COM * Merges adjacent bounds with same type between first and last bound. 854*10543SPeter.Telford@Sun.COM * After merge, last pointer may point to a different bound, as (incoming) 855*10543SPeter.Telford@Sun.COM * last bound may have been merged away. 8563247Sgjelinek */ 8573247Sgjelinek static void 85810093SPeter.Telford@Sun.COM vmu_merge_bounds(avl_tree_t *tree, vmu_bound_t **first, vmu_bound_t **last) 8593247Sgjelinek { 86010093SPeter.Telford@Sun.COM vmu_bound_t *current; 8613247Sgjelinek vmu_bound_t *next; 8623247Sgjelinek 86310093SPeter.Telford@Sun.COM ASSERT(tree != NULL); 8643247Sgjelinek ASSERT(*first != NULL); 8653247Sgjelinek ASSERT(*last != NULL); 8663247Sgjelinek 86710093SPeter.Telford@Sun.COM current = *first; 86810093SPeter.Telford@Sun.COM while (current != *last) { 86910093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, current); 87010093SPeter.Telford@Sun.COM if ((current->vmb_end + 1) == next->vmb_start && 87110093SPeter.Telford@Sun.COM current->vmb_type == next->vmb_type) { 87210093SPeter.Telford@Sun.COM current->vmb_end = next->vmb_end; 87310093SPeter.Telford@Sun.COM avl_remove(tree, next); 87410093SPeter.Telford@Sun.COM vmu_free_bound(next); 87510093SPeter.Telford@Sun.COM if (next == *last) { 876*10543SPeter.Telford@Sun.COM *last = current; 87710093SPeter.Telford@Sun.COM } 878*10543SPeter.Telford@Sun.COM } else { 879*10543SPeter.Telford@Sun.COM current = AVL_NEXT(tree, current); 8803247Sgjelinek } 8813247Sgjelinek } 8823247Sgjelinek } 8833247Sgjelinek 8843247Sgjelinek /* 8853247Sgjelinek * Given an amp and a list of bounds, updates each bound's type with 8863247Sgjelinek * VMUSAGE_BOUND_INCORE or VMUSAGE_BOUND_NOT_INCORE. 8873247Sgjelinek * 8883247Sgjelinek * If a bound is partially incore, it will be split into two bounds. 8893247Sgjelinek * first and last may be modified, as bounds may be split into multiple 89010093SPeter.Telford@Sun.COM * bounds if they are partially incore/not-incore. 8913247Sgjelinek * 89210093SPeter.Telford@Sun.COM * Set incore to non-zero if bounds are already known to be incore. 8933247Sgjelinek * 8943247Sgjelinek */ 8953247Sgjelinek static void 89610093SPeter.Telford@Sun.COM vmu_amp_update_incore_bounds(avl_tree_t *tree, struct anon_map *amp, 89710093SPeter.Telford@Sun.COM vmu_bound_t **first, vmu_bound_t **last, boolean_t incore) 8983247Sgjelinek { 8993247Sgjelinek vmu_bound_t *next; 9003247Sgjelinek vmu_bound_t *tmp; 9013247Sgjelinek pgcnt_t index; 9023247Sgjelinek short bound_type; 9033247Sgjelinek short page_type; 9043247Sgjelinek vnode_t *vn; 9053247Sgjelinek anoff_t off; 9063247Sgjelinek struct anon *ap; 9073247Sgjelinek 9083247Sgjelinek next = *first; 90910093SPeter.Telford@Sun.COM /* Shared anon slots don't change once set. */ 9103247Sgjelinek ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 9113247Sgjelinek for (;;) { 9123247Sgjelinek if (incore == B_TRUE) 9133247Sgjelinek next->vmb_type = VMUSAGE_BOUND_INCORE; 9143247Sgjelinek 9153247Sgjelinek if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) { 9163247Sgjelinek if (next == *last) 9173247Sgjelinek break; 91810093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next); 9193247Sgjelinek continue; 9203247Sgjelinek } 9213247Sgjelinek bound_type = next->vmb_type; 9223247Sgjelinek index = next->vmb_start; 9233247Sgjelinek while (index <= next->vmb_end) { 9243247Sgjelinek 9253247Sgjelinek /* 9263247Sgjelinek * These are used to determine how much to increment 9273247Sgjelinek * index when a large page is found. 9283247Sgjelinek */ 9293247Sgjelinek page_t *page; 9303247Sgjelinek pgcnt_t pgcnt = 1; 9313247Sgjelinek uint_t pgshft; 9323247Sgjelinek pgcnt_t pgmsk; 9333247Sgjelinek 9343247Sgjelinek ap = anon_get_ptr(amp->ahp, index); 9353247Sgjelinek if (ap != NULL) 9363247Sgjelinek swap_xlate(ap, &vn, &off); 9373247Sgjelinek 9383247Sgjelinek if (ap != NULL && vn != NULL && vn->v_pages != NULL && 9393247Sgjelinek (page = page_exists(vn, off)) != NULL) { 9403247Sgjelinek page_type = VMUSAGE_BOUND_INCORE; 9413247Sgjelinek if (page->p_szc > 0) { 9423247Sgjelinek pgcnt = page_get_pagecnt(page->p_szc); 9433247Sgjelinek pgshft = page_get_shift(page->p_szc); 9443247Sgjelinek pgmsk = (0x1 << (pgshft - PAGESHIFT)) 9453247Sgjelinek - 1; 9463247Sgjelinek } 9473247Sgjelinek } else { 9483247Sgjelinek page_type = VMUSAGE_BOUND_NOT_INCORE; 9493247Sgjelinek } 9503247Sgjelinek if (bound_type == VMUSAGE_BOUND_UNKNOWN) { 9513247Sgjelinek next->vmb_type = page_type; 9523247Sgjelinek } else if (next->vmb_type != page_type) { 9533247Sgjelinek /* 95410093SPeter.Telford@Sun.COM * If current bound type does not match page 9553247Sgjelinek * type, need to split off new bound. 9563247Sgjelinek */ 9573247Sgjelinek tmp = vmu_alloc_bound(); 9583247Sgjelinek tmp->vmb_type = page_type; 9593247Sgjelinek tmp->vmb_start = index; 9603247Sgjelinek tmp->vmb_end = next->vmb_end; 96110093SPeter.Telford@Sun.COM avl_insert_here(tree, tmp, next, AVL_AFTER); 9623247Sgjelinek next->vmb_end = index - 1; 9633247Sgjelinek if (*last == next) 9643247Sgjelinek *last = tmp; 9653247Sgjelinek next = tmp; 9663247Sgjelinek } 9673247Sgjelinek if (pgcnt > 1) { 9683247Sgjelinek /* 9693247Sgjelinek * If inside large page, jump to next large 9703247Sgjelinek * page 9713247Sgjelinek */ 9723247Sgjelinek index = (index & ~pgmsk) + pgcnt; 9733247Sgjelinek } else { 9743247Sgjelinek index++; 9753247Sgjelinek } 9763247Sgjelinek } 9773247Sgjelinek if (next == *last) { 9783247Sgjelinek ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN); 9793247Sgjelinek break; 9803247Sgjelinek } else 98110093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next); 9823247Sgjelinek } 9833247Sgjelinek ANON_LOCK_EXIT(&->a_rwlock); 9843247Sgjelinek } 9853247Sgjelinek 9863247Sgjelinek /* 9873247Sgjelinek * Same as vmu_amp_update_incore_bounds(), except for tracking 9883247Sgjelinek * incore-/not-incore for vnodes. 9893247Sgjelinek */ 9903247Sgjelinek static void 99110093SPeter.Telford@Sun.COM vmu_vnode_update_incore_bounds(avl_tree_t *tree, vnode_t *vnode, 99210093SPeter.Telford@Sun.COM vmu_bound_t **first, vmu_bound_t **last) 9933247Sgjelinek { 9943247Sgjelinek vmu_bound_t *next; 9953247Sgjelinek vmu_bound_t *tmp; 9963247Sgjelinek pgcnt_t index; 9973247Sgjelinek short bound_type; 9983247Sgjelinek short page_type; 9993247Sgjelinek 10003247Sgjelinek next = *first; 10013247Sgjelinek for (;;) { 10023247Sgjelinek if (vnode->v_pages == NULL) 10033247Sgjelinek next->vmb_type = VMUSAGE_BOUND_NOT_INCORE; 10043247Sgjelinek 10053247Sgjelinek if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) { 10063247Sgjelinek if (next == *last) 10073247Sgjelinek break; 100810093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next); 10093247Sgjelinek continue; 10103247Sgjelinek } 10113247Sgjelinek 10123247Sgjelinek bound_type = next->vmb_type; 10133247Sgjelinek index = next->vmb_start; 10143247Sgjelinek while (index <= next->vmb_end) { 10153247Sgjelinek 10163247Sgjelinek /* 10173247Sgjelinek * These are used to determine how much to increment 10183247Sgjelinek * index when a large page is found. 10193247Sgjelinek */ 10203247Sgjelinek page_t *page; 10213247Sgjelinek pgcnt_t pgcnt = 1; 10223247Sgjelinek uint_t pgshft; 10233247Sgjelinek pgcnt_t pgmsk; 10243247Sgjelinek 10253247Sgjelinek if (vnode->v_pages != NULL && 10263247Sgjelinek (page = page_exists(vnode, ptob(index))) != NULL) { 10273247Sgjelinek page_type = VMUSAGE_BOUND_INCORE; 10283247Sgjelinek if (page->p_szc > 0) { 10293247Sgjelinek pgcnt = page_get_pagecnt(page->p_szc); 10303247Sgjelinek pgshft = page_get_shift(page->p_szc); 10313247Sgjelinek pgmsk = (0x1 << (pgshft - PAGESHIFT)) 10323247Sgjelinek - 1; 10333247Sgjelinek } 10343247Sgjelinek } else { 10353247Sgjelinek page_type = VMUSAGE_BOUND_NOT_INCORE; 10363247Sgjelinek } 10373247Sgjelinek if (bound_type == VMUSAGE_BOUND_UNKNOWN) { 10383247Sgjelinek next->vmb_type = page_type; 10393247Sgjelinek } else if (next->vmb_type != page_type) { 10403247Sgjelinek /* 104110093SPeter.Telford@Sun.COM * If current bound type does not match page 10423247Sgjelinek * type, need to split off new bound. 10433247Sgjelinek */ 10443247Sgjelinek tmp = vmu_alloc_bound(); 10453247Sgjelinek tmp->vmb_type = page_type; 10463247Sgjelinek tmp->vmb_start = index; 10473247Sgjelinek tmp->vmb_end = next->vmb_end; 104810093SPeter.Telford@Sun.COM avl_insert_here(tree, tmp, next, AVL_AFTER); 10493247Sgjelinek next->vmb_end = index - 1; 10503247Sgjelinek if (*last == next) 10513247Sgjelinek *last = tmp; 10523247Sgjelinek next = tmp; 10533247Sgjelinek } 10543247Sgjelinek if (pgcnt > 1) { 10553247Sgjelinek /* 10563247Sgjelinek * If inside large page, jump to next large 10573247Sgjelinek * page 10583247Sgjelinek */ 10593247Sgjelinek index = (index & ~pgmsk) + pgcnt; 10603247Sgjelinek } else { 10613247Sgjelinek index++; 10623247Sgjelinek } 10633247Sgjelinek } 10643247Sgjelinek if (next == *last) { 10653247Sgjelinek ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN); 10663247Sgjelinek break; 10673247Sgjelinek } else 106810093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next); 10693247Sgjelinek } 10703247Sgjelinek } 10713247Sgjelinek 10723247Sgjelinek /* 10733247Sgjelinek * Calculate the rss and swap consumed by a segment. vmu_entities is the 10743247Sgjelinek * list of entities to visit. For shared segments, the vnode or amp 107510093SPeter.Telford@Sun.COM * is looked up in each entity to see if it has been already counted. Private 107610093SPeter.Telford@Sun.COM * anon pages are checked per entity to ensure that COW pages are not 10773247Sgjelinek * double counted. 10783247Sgjelinek * 10793247Sgjelinek * For private mapped files, first the amp is checked for private pages. 10803247Sgjelinek * Bounds not backed by the amp are looked up in the vnode for each entity 10813247Sgjelinek * to avoid double counting of private COW vnode pages. 10823247Sgjelinek */ 10833247Sgjelinek static void 10843247Sgjelinek vmu_calculate_seg(vmu_entity_t *vmu_entities, struct seg *seg) 10853247Sgjelinek { 10863247Sgjelinek struct segvn_data *svd; 10873247Sgjelinek struct shm_data *shmd; 10883247Sgjelinek struct spt_data *sptd; 10893247Sgjelinek vmu_object_t *shared_object = NULL; 10903247Sgjelinek vmu_object_t *entity_object = NULL; 10913247Sgjelinek vmu_entity_t *entity; 10923247Sgjelinek vmusage_t *result; 10933247Sgjelinek vmu_bound_t *first = NULL; 10943247Sgjelinek vmu_bound_t *last = NULL; 10953247Sgjelinek vmu_bound_t *cur = NULL; 10963247Sgjelinek vmu_bound_t *e_first = NULL; 10973247Sgjelinek vmu_bound_t *e_last = NULL; 10983247Sgjelinek vmu_bound_t *tmp; 10993247Sgjelinek pgcnt_t p_index, s_index, p_start, p_end, s_start, s_end, rss, virt; 11003247Sgjelinek struct anon_map *private_amp = NULL; 11013247Sgjelinek boolean_t incore = B_FALSE; 11023247Sgjelinek boolean_t shared = B_FALSE; 11033247Sgjelinek int file = 0; 11043247Sgjelinek pgcnt_t swresv = 0; 11053247Sgjelinek pgcnt_t panon = 0; 11063247Sgjelinek 110710093SPeter.Telford@Sun.COM /* Can zero-length segments exist? Not sure, so paranoia. */ 11083247Sgjelinek if (seg->s_size <= 0) 11093247Sgjelinek return; 11103247Sgjelinek 11113247Sgjelinek /* 11123247Sgjelinek * Figure out if there is a shared object (such as a named vnode or 11133247Sgjelinek * a shared amp, then figure out if there is a private amp, which 11143247Sgjelinek * identifies private pages. 11153247Sgjelinek */ 11163247Sgjelinek if (seg->s_ops == &segvn_ops) { 11173247Sgjelinek svd = (struct segvn_data *)seg->s_data; 111810093SPeter.Telford@Sun.COM if (svd->type == MAP_SHARED) { 11193247Sgjelinek shared = B_TRUE; 112010093SPeter.Telford@Sun.COM } else { 11213247Sgjelinek swresv = svd->swresv; 11223247Sgjelinek 112310093SPeter.Telford@Sun.COM if (SEGVN_LOCK_TRYENTER(seg->s_as, &svd->lock, 112410093SPeter.Telford@Sun.COM RW_READER) != 0) { 112510093SPeter.Telford@Sun.COM /* 112610093SPeter.Telford@Sun.COM * Text replication anon maps can be shared 112710093SPeter.Telford@Sun.COM * across all zones. Space used for text 112810093SPeter.Telford@Sun.COM * replication is typically capped as a small % 112910093SPeter.Telford@Sun.COM * of memory. To keep it simple for now we 113010093SPeter.Telford@Sun.COM * don't account for swap and memory space used 113110093SPeter.Telford@Sun.COM * for text replication. 113210093SPeter.Telford@Sun.COM */ 113310093SPeter.Telford@Sun.COM if (svd->tr_state == SEGVN_TR_OFF && 113410093SPeter.Telford@Sun.COM svd->amp != NULL) { 113510093SPeter.Telford@Sun.COM private_amp = svd->amp; 113610093SPeter.Telford@Sun.COM p_start = svd->anon_index; 113710093SPeter.Telford@Sun.COM p_end = svd->anon_index + 113810093SPeter.Telford@Sun.COM btop(seg->s_size) - 1; 113910093SPeter.Telford@Sun.COM } 114010093SPeter.Telford@Sun.COM SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); 114110093SPeter.Telford@Sun.COM } 114210093SPeter.Telford@Sun.COM } 11433247Sgjelinek if (svd->vp != NULL) { 11443247Sgjelinek file = 1; 11453247Sgjelinek shared_object = vmu_find_insert_object( 11463247Sgjelinek vmu_data.vmu_all_vnodes_hash, (caddr_t)svd->vp, 11473247Sgjelinek VMUSAGE_TYPE_VNODE); 11483247Sgjelinek s_start = btop(svd->offset); 11493247Sgjelinek s_end = btop(svd->offset + seg->s_size) - 1; 11503247Sgjelinek } 11513247Sgjelinek if (svd->amp != NULL && svd->type == MAP_SHARED) { 11523247Sgjelinek ASSERT(shared_object == NULL); 11533247Sgjelinek shared_object = vmu_find_insert_object( 11543247Sgjelinek vmu_data.vmu_all_amps_hash, (caddr_t)svd->amp, 11553247Sgjelinek VMUSAGE_TYPE_AMP); 11563247Sgjelinek s_start = svd->anon_index; 11573247Sgjelinek s_end = svd->anon_index + btop(seg->s_size) - 1; 11583247Sgjelinek /* schedctl mappings are always in core */ 11593247Sgjelinek if (svd->amp->swresv == 0) 11603247Sgjelinek incore = B_TRUE; 11613247Sgjelinek } 11623247Sgjelinek } else if (seg->s_ops == &segspt_shmops) { 11633247Sgjelinek shared = B_TRUE; 11643247Sgjelinek shmd = (struct shm_data *)seg->s_data; 11653247Sgjelinek shared_object = vmu_find_insert_object( 11663247Sgjelinek vmu_data.vmu_all_amps_hash, (caddr_t)shmd->shm_amp, 11673247Sgjelinek VMUSAGE_TYPE_AMP); 11683247Sgjelinek s_start = 0; 11693247Sgjelinek s_end = btop(seg->s_size) - 1; 11703247Sgjelinek sptd = shmd->shm_sptseg->s_data; 11713247Sgjelinek 11723247Sgjelinek /* ism segments are always incore and do not reserve swap */ 11733247Sgjelinek if (sptd->spt_flags & SHM_SHARE_MMU) 11743247Sgjelinek incore = B_TRUE; 11753247Sgjelinek 11763247Sgjelinek } else { 11773247Sgjelinek return; 11783247Sgjelinek } 11793247Sgjelinek 11803247Sgjelinek /* 11813247Sgjelinek * If there is a private amp, count anon pages that exist. If an 118210093SPeter.Telford@Sun.COM * anon has a refcnt > 1 (COW sharing), then save the anon in a 11833247Sgjelinek * hash so that it is not double counted. 11843247Sgjelinek * 118510093SPeter.Telford@Sun.COM * If there is also a shared object, then figure out the bounds 11863247Sgjelinek * which are not mapped by the private amp. 11873247Sgjelinek */ 11883247Sgjelinek if (private_amp != NULL) { 11893247Sgjelinek 119010093SPeter.Telford@Sun.COM /* Enter as writer to prevent COW anons from being freed */ 11913247Sgjelinek ANON_LOCK_ENTER(&private_amp->a_rwlock, RW_WRITER); 11923247Sgjelinek 11933247Sgjelinek p_index = p_start; 11943247Sgjelinek s_index = s_start; 11953247Sgjelinek 11963247Sgjelinek while (p_index <= p_end) { 11973247Sgjelinek 11983247Sgjelinek pgcnt_t p_index_next; 11993247Sgjelinek pgcnt_t p_bound_size; 12003247Sgjelinek int cnt; 12013247Sgjelinek anoff_t off; 12023247Sgjelinek struct vnode *vn; 12033247Sgjelinek struct anon *ap; 12043247Sgjelinek page_t *page; /* For handling of large */ 12053247Sgjelinek pgcnt_t pgcnt = 1; /* pages */ 12063247Sgjelinek pgcnt_t pgstart; 12073247Sgjelinek pgcnt_t pgend; 12083247Sgjelinek uint_t pgshft; 12093247Sgjelinek pgcnt_t pgmsk; 12103247Sgjelinek 12113247Sgjelinek p_index_next = p_index; 12123247Sgjelinek ap = anon_get_next_ptr(private_amp->ahp, 12133247Sgjelinek &p_index_next); 12143247Sgjelinek 12153247Sgjelinek /* 12163247Sgjelinek * If next anon is past end of mapping, simulate 12173247Sgjelinek * end of anon so loop terminates. 12183247Sgjelinek */ 12193247Sgjelinek if (p_index_next > p_end) { 12203247Sgjelinek p_index_next = p_end + 1; 12213247Sgjelinek ap = NULL; 12223247Sgjelinek } 12233247Sgjelinek /* 122410093SPeter.Telford@Sun.COM * For COW segments, keep track of bounds not 12253247Sgjelinek * backed by private amp so they can be looked 12263247Sgjelinek * up in the backing vnode 12273247Sgjelinek */ 12283247Sgjelinek if (p_index_next != p_index) { 12293247Sgjelinek 12303247Sgjelinek /* 12313247Sgjelinek * Compute index difference between anon and 12323247Sgjelinek * previous anon. 12333247Sgjelinek */ 12343247Sgjelinek p_bound_size = p_index_next - p_index - 1; 12353247Sgjelinek 12363247Sgjelinek if (shared_object != NULL) { 12373247Sgjelinek cur = vmu_alloc_bound(); 12383247Sgjelinek cur->vmb_start = s_index; 12393247Sgjelinek cur->vmb_end = s_index + p_bound_size; 12403247Sgjelinek cur->vmb_type = VMUSAGE_BOUND_UNKNOWN; 12413247Sgjelinek if (first == NULL) { 12423247Sgjelinek first = cur; 12433247Sgjelinek last = cur; 12443247Sgjelinek } else { 12453247Sgjelinek last->vmb_next = cur; 12463247Sgjelinek last = cur; 12473247Sgjelinek } 12483247Sgjelinek } 12493247Sgjelinek p_index = p_index + p_bound_size + 1; 12503247Sgjelinek s_index = s_index + p_bound_size + 1; 12513247Sgjelinek } 12523247Sgjelinek 12533247Sgjelinek /* Detect end of anons in amp */ 12543247Sgjelinek if (ap == NULL) 12553247Sgjelinek break; 12563247Sgjelinek 12573247Sgjelinek cnt = ap->an_refcnt; 12583247Sgjelinek swap_xlate(ap, &vn, &off); 12593247Sgjelinek 12603247Sgjelinek if (vn == NULL || vn->v_pages == NULL || 12613247Sgjelinek (page = page_exists(vn, off)) == NULL) { 12623247Sgjelinek p_index++; 12633247Sgjelinek s_index++; 12643247Sgjelinek continue; 12653247Sgjelinek } 12663247Sgjelinek 12673247Sgjelinek /* 12683247Sgjelinek * If large page is found, compute portion of large 12693247Sgjelinek * page in mapping, and increment indicies to the next 12703247Sgjelinek * large page. 12713247Sgjelinek */ 12723247Sgjelinek if (page->p_szc > 0) { 12733247Sgjelinek 12743247Sgjelinek pgcnt = page_get_pagecnt(page->p_szc); 12753247Sgjelinek pgshft = page_get_shift(page->p_szc); 12763247Sgjelinek pgmsk = (0x1 << (pgshft - PAGESHIFT)) - 1; 12773247Sgjelinek 12783247Sgjelinek /* First page in large page */ 12793247Sgjelinek pgstart = p_index & ~pgmsk; 12803247Sgjelinek /* Last page in large page */ 12813247Sgjelinek pgend = pgstart + pgcnt - 1; 12823247Sgjelinek /* 12833247Sgjelinek * Artifically end page if page extends past 12843247Sgjelinek * end of mapping. 12853247Sgjelinek */ 12863247Sgjelinek if (pgend > p_end) 12873247Sgjelinek pgend = p_end; 12883247Sgjelinek 12893247Sgjelinek /* 12903247Sgjelinek * Compute number of pages from large page 12913247Sgjelinek * which are mapped. 12923247Sgjelinek */ 12933247Sgjelinek pgcnt = pgend - p_index + 1; 12943247Sgjelinek 12953247Sgjelinek /* 12963247Sgjelinek * Point indicies at page after large page, 12973247Sgjelinek * or at page after end of mapping. 12983247Sgjelinek */ 12993247Sgjelinek p_index += pgcnt; 13003247Sgjelinek s_index += pgcnt; 13013247Sgjelinek } else { 13023247Sgjelinek p_index++; 13033247Sgjelinek s_index++; 13043247Sgjelinek } 13053247Sgjelinek 13063247Sgjelinek /* 13073247Sgjelinek * Assume anon structs with a refcnt 130810093SPeter.Telford@Sun.COM * of 1 are not COW shared, so there 13093247Sgjelinek * is no reason to track them per entity. 13103247Sgjelinek */ 13113247Sgjelinek if (cnt == 1) { 13123247Sgjelinek panon += pgcnt; 13133247Sgjelinek continue; 13143247Sgjelinek } 13153247Sgjelinek for (entity = vmu_entities; entity != NULL; 13163247Sgjelinek entity = entity->vme_next_calc) { 13173247Sgjelinek 13183247Sgjelinek result = &entity->vme_result; 13193247Sgjelinek /* 132010093SPeter.Telford@Sun.COM * Track COW anons per entity so 13213247Sgjelinek * they are not double counted. 13223247Sgjelinek */ 13233247Sgjelinek if (vmu_find_insert_anon(entity->vme_anon_hash, 13243247Sgjelinek (caddr_t)ap) == 0) 13253247Sgjelinek continue; 13263247Sgjelinek 13273247Sgjelinek result->vmu_rss_all += (pgcnt << PAGESHIFT); 13283247Sgjelinek result->vmu_rss_private += 13293247Sgjelinek (pgcnt << PAGESHIFT); 13303247Sgjelinek } 13313247Sgjelinek } 13323247Sgjelinek ANON_LOCK_EXIT(&private_amp->a_rwlock); 13333247Sgjelinek } 13343247Sgjelinek 13353247Sgjelinek /* Add up resident anon and swap reserved for private mappings */ 13363247Sgjelinek if (swresv > 0 || panon > 0) { 13373247Sgjelinek for (entity = vmu_entities; entity != NULL; 13383247Sgjelinek entity = entity->vme_next_calc) { 13393247Sgjelinek result = &entity->vme_result; 13403247Sgjelinek result->vmu_swap_all += swresv; 13413247Sgjelinek result->vmu_swap_private += swresv; 13423247Sgjelinek result->vmu_rss_all += (panon << PAGESHIFT); 13433247Sgjelinek result->vmu_rss_private += (panon << PAGESHIFT); 13443247Sgjelinek } 13453247Sgjelinek } 13463247Sgjelinek 13473247Sgjelinek /* Compute resident pages backing shared amp or named vnode */ 13483247Sgjelinek if (shared_object != NULL) { 1349*10543SPeter.Telford@Sun.COM avl_tree_t *tree = &(shared_object->vmo_bounds); 1350*10543SPeter.Telford@Sun.COM 13513247Sgjelinek if (first == NULL) { 13523247Sgjelinek /* 13533247Sgjelinek * No private amp, or private amp has no anon 13543247Sgjelinek * structs. This means entire segment is backed by 13553247Sgjelinek * the shared object. 13563247Sgjelinek */ 13573247Sgjelinek first = vmu_alloc_bound(); 13583247Sgjelinek first->vmb_start = s_start; 13593247Sgjelinek first->vmb_end = s_end; 13603247Sgjelinek first->vmb_type = VMUSAGE_BOUND_UNKNOWN; 13613247Sgjelinek } 13623247Sgjelinek /* 13633247Sgjelinek * Iterate bounds not backed by private amp, and compute 13643247Sgjelinek * resident pages. 13653247Sgjelinek */ 13663247Sgjelinek cur = first; 13673247Sgjelinek while (cur != NULL) { 13683247Sgjelinek 13693247Sgjelinek if (vmu_insert_lookup_object_bounds(shared_object, 13703247Sgjelinek cur->vmb_start, cur->vmb_end, VMUSAGE_BOUND_UNKNOWN, 13713247Sgjelinek &first, &last) > 0) { 13723247Sgjelinek /* new bounds, find incore/not-incore */ 13733247Sgjelinek if (shared_object->vmo_type == 137410093SPeter.Telford@Sun.COM VMUSAGE_TYPE_VNODE) { 13753247Sgjelinek vmu_vnode_update_incore_bounds( 137610093SPeter.Telford@Sun.COM tree, 13773247Sgjelinek (vnode_t *) 13783247Sgjelinek shared_object->vmo_key, &first, 13793247Sgjelinek &last); 138010093SPeter.Telford@Sun.COM } else { 13813247Sgjelinek vmu_amp_update_incore_bounds( 138210093SPeter.Telford@Sun.COM tree, 13833247Sgjelinek (struct anon_map *) 13843247Sgjelinek shared_object->vmo_key, &first, 13853247Sgjelinek &last, incore); 138610093SPeter.Telford@Sun.COM } 138710093SPeter.Telford@Sun.COM vmu_merge_bounds(tree, &first, &last); 13883247Sgjelinek } 13893247Sgjelinek for (entity = vmu_entities; entity != NULL; 13903247Sgjelinek entity = entity->vme_next_calc) { 139110093SPeter.Telford@Sun.COM avl_tree_t *e_tree; 13923247Sgjelinek 13933247Sgjelinek result = &entity->vme_result; 13943247Sgjelinek 13953247Sgjelinek entity_object = vmu_find_insert_object( 13963247Sgjelinek shared_object->vmo_type == 13973247Sgjelinek VMUSAGE_TYPE_VNODE ? entity->vme_vnode_hash: 13987884Sgerald.jelinek@sun.com entity->vme_amp_hash, 13997884Sgerald.jelinek@sun.com shared_object->vmo_key, 14007884Sgerald.jelinek@sun.com shared_object->vmo_type); 14013247Sgjelinek 14023247Sgjelinek virt = vmu_insert_lookup_object_bounds( 14033247Sgjelinek entity_object, cur->vmb_start, cur->vmb_end, 14043247Sgjelinek VMUSAGE_BOUND_UNKNOWN, &e_first, &e_last); 14053247Sgjelinek 14063247Sgjelinek if (virt == 0) 14073247Sgjelinek continue; 14083247Sgjelinek /* 14093247Sgjelinek * Range visited for this entity 14103247Sgjelinek */ 141110093SPeter.Telford@Sun.COM e_tree = &(entity_object->vmo_bounds); 141210093SPeter.Telford@Sun.COM rss = vmu_update_bounds(e_tree, &e_first, 141310093SPeter.Telford@Sun.COM &e_last, tree, first, last); 14143247Sgjelinek result->vmu_rss_all += (rss << PAGESHIFT); 14153247Sgjelinek if (shared == B_TRUE && file == B_FALSE) { 14163247Sgjelinek /* shared anon mapping */ 14173247Sgjelinek result->vmu_swap_all += 14183247Sgjelinek (virt << PAGESHIFT); 14193247Sgjelinek result->vmu_swap_shared += 14203247Sgjelinek (virt << PAGESHIFT); 14213247Sgjelinek result->vmu_rss_shared += 14223247Sgjelinek (rss << PAGESHIFT); 14233247Sgjelinek } else if (shared == B_TRUE && file == B_TRUE) { 14243247Sgjelinek /* shared file mapping */ 14253247Sgjelinek result->vmu_rss_shared += 14263247Sgjelinek (rss << PAGESHIFT); 14273247Sgjelinek } else if (shared == B_FALSE && 14283247Sgjelinek file == B_TRUE) { 14293247Sgjelinek /* private file mapping */ 14303247Sgjelinek result->vmu_rss_private += 14313247Sgjelinek (rss << PAGESHIFT); 14323247Sgjelinek } 143310093SPeter.Telford@Sun.COM vmu_merge_bounds(e_tree, &e_first, &e_last); 14343247Sgjelinek } 14353247Sgjelinek tmp = cur; 14363247Sgjelinek cur = cur->vmb_next; 14373247Sgjelinek vmu_free_bound(tmp); 14383247Sgjelinek } 14393247Sgjelinek } 14403247Sgjelinek } 14413247Sgjelinek 14423247Sgjelinek /* 14433247Sgjelinek * Based on the current calculation flags, find the relevant entities 14443247Sgjelinek * which are relative to the process. Then calculate each segment 14453247Sgjelinek * in the process'es address space for each relevant entity. 14463247Sgjelinek */ 14473247Sgjelinek static void 14483247Sgjelinek vmu_calculate_proc(proc_t *p) 14493247Sgjelinek { 14503247Sgjelinek vmu_entity_t *entities = NULL; 14513247Sgjelinek vmu_zone_t *zone; 14523247Sgjelinek vmu_entity_t *tmp; 14533247Sgjelinek struct as *as; 14543247Sgjelinek struct seg *seg; 14553247Sgjelinek int ret; 14563247Sgjelinek 14573247Sgjelinek /* Figure out which entities are being computed */ 14583247Sgjelinek if ((vmu_data.vmu_system) != NULL) { 14593247Sgjelinek tmp = vmu_data.vmu_system; 14603247Sgjelinek tmp->vme_next_calc = entities; 14613247Sgjelinek entities = tmp; 14623247Sgjelinek } 14633247Sgjelinek if (vmu_data.vmu_calc_flags & 14643247Sgjelinek (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_PROJECTS | 14653247Sgjelinek VMUSAGE_ALL_PROJECTS | VMUSAGE_TASKS | VMUSAGE_ALL_TASKS | 14663247Sgjelinek VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_EUSERS | 14673247Sgjelinek VMUSAGE_ALL_EUSERS)) { 14683247Sgjelinek ret = i_mod_hash_find_nosync(vmu_data.vmu_zones_hash, 14693247Sgjelinek (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id, 14703247Sgjelinek (mod_hash_val_t *)&zone); 14713247Sgjelinek if (ret != 0) { 14723247Sgjelinek zone = vmu_alloc_zone(p->p_zone->zone_id); 14733247Sgjelinek ret = i_mod_hash_insert_nosync(vmu_data.vmu_zones_hash, 14743247Sgjelinek (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id, 14753247Sgjelinek (mod_hash_val_t)zone, (mod_hash_hndl_t)0); 14763247Sgjelinek ASSERT(ret == 0); 14773247Sgjelinek } 14783247Sgjelinek if (zone->vmz_zone != NULL) { 14793247Sgjelinek tmp = zone->vmz_zone; 14803247Sgjelinek tmp->vme_next_calc = entities; 14813247Sgjelinek entities = tmp; 14823247Sgjelinek } 14833247Sgjelinek if (vmu_data.vmu_calc_flags & 14843247Sgjelinek (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS)) { 14853247Sgjelinek tmp = vmu_find_insert_entity(zone->vmz_projects_hash, 14863247Sgjelinek p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS, 14873247Sgjelinek zone->vmz_id); 14883247Sgjelinek tmp->vme_next_calc = entities; 14893247Sgjelinek entities = tmp; 14903247Sgjelinek } 14913247Sgjelinek if (vmu_data.vmu_calc_flags & 14923247Sgjelinek (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS)) { 14933247Sgjelinek tmp = vmu_find_insert_entity(zone->vmz_tasks_hash, 14943247Sgjelinek p->p_task->tk_tkid, VMUSAGE_TASKS, zone->vmz_id); 14953247Sgjelinek tmp->vme_next_calc = entities; 14963247Sgjelinek entities = tmp; 14973247Sgjelinek } 14983247Sgjelinek if (vmu_data.vmu_calc_flags & 14993247Sgjelinek (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS)) { 15003247Sgjelinek tmp = vmu_find_insert_entity(zone->vmz_rusers_hash, 15013247Sgjelinek crgetruid(p->p_cred), VMUSAGE_RUSERS, zone->vmz_id); 15023247Sgjelinek tmp->vme_next_calc = entities; 15033247Sgjelinek entities = tmp; 15043247Sgjelinek } 15053247Sgjelinek if (vmu_data.vmu_calc_flags & 15063247Sgjelinek (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS)) { 15073247Sgjelinek tmp = vmu_find_insert_entity(zone->vmz_eusers_hash, 15083247Sgjelinek crgetuid(p->p_cred), VMUSAGE_EUSERS, zone->vmz_id); 15093247Sgjelinek tmp->vme_next_calc = entities; 15103247Sgjelinek entities = tmp; 15113247Sgjelinek } 15123247Sgjelinek } 15133247Sgjelinek /* Entities which collapse projects and users for all zones */ 15143247Sgjelinek if (vmu_data.vmu_calc_flags & VMUSAGE_COL_PROJECTS) { 15153247Sgjelinek tmp = vmu_find_insert_entity(vmu_data.vmu_projects_col_hash, 15163247Sgjelinek p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS, ALL_ZONES); 15173247Sgjelinek tmp->vme_next_calc = entities; 15183247Sgjelinek entities = tmp; 15193247Sgjelinek } 15203247Sgjelinek if (vmu_data.vmu_calc_flags & VMUSAGE_COL_RUSERS) { 15213247Sgjelinek tmp = vmu_find_insert_entity(vmu_data.vmu_rusers_col_hash, 15223247Sgjelinek crgetruid(p->p_cred), VMUSAGE_RUSERS, ALL_ZONES); 15233247Sgjelinek tmp->vme_next_calc = entities; 15243247Sgjelinek entities = tmp; 15253247Sgjelinek } 15263247Sgjelinek if (vmu_data.vmu_calc_flags & VMUSAGE_COL_EUSERS) { 15273247Sgjelinek tmp = vmu_find_insert_entity(vmu_data.vmu_eusers_col_hash, 15283247Sgjelinek crgetuid(p->p_cred), VMUSAGE_EUSERS, ALL_ZONES); 15293247Sgjelinek tmp->vme_next_calc = entities; 15303247Sgjelinek entities = tmp; 15313247Sgjelinek } 15323247Sgjelinek 15333247Sgjelinek ASSERT(entities != NULL); 15343247Sgjelinek /* process all segs in process's address space */ 15353247Sgjelinek as = p->p_as; 15363247Sgjelinek AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 15373247Sgjelinek for (seg = AS_SEGFIRST(as); seg != NULL; 15383247Sgjelinek seg = AS_SEGNEXT(as, seg)) { 15393247Sgjelinek vmu_calculate_seg(entities, seg); 15403247Sgjelinek } 15413247Sgjelinek AS_LOCK_EXIT(as, &as->a_lock); 15423247Sgjelinek } 15433247Sgjelinek 15443247Sgjelinek /* 15453247Sgjelinek * Free data created by previous call to vmu_calculate(). 15463247Sgjelinek */ 15473247Sgjelinek static void 15483247Sgjelinek vmu_clear_calc() 15493247Sgjelinek { 15503247Sgjelinek if (vmu_data.vmu_system != NULL) 15513247Sgjelinek vmu_free_entity(vmu_data.vmu_system); 15523247Sgjelinek vmu_data.vmu_system = NULL; 15533247Sgjelinek if (vmu_data.vmu_zones_hash != NULL) 15543247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_zones_hash); 15553247Sgjelinek if (vmu_data.vmu_projects_col_hash != NULL) 15563247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_projects_col_hash); 15573247Sgjelinek if (vmu_data.vmu_rusers_col_hash != NULL) 15583247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_rusers_col_hash); 15593247Sgjelinek if (vmu_data.vmu_eusers_col_hash != NULL) 15603247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_eusers_col_hash); 15613247Sgjelinek 15623247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_all_vnodes_hash); 15633247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_all_amps_hash); 15643247Sgjelinek } 15653247Sgjelinek 15663247Sgjelinek /* 15673247Sgjelinek * Free unused data structures. These can result if the system workload 15683247Sgjelinek * decreases between calculations. 15693247Sgjelinek */ 15703247Sgjelinek static void 15713247Sgjelinek vmu_free_extra() 15723247Sgjelinek { 15733247Sgjelinek vmu_bound_t *tb; 15743247Sgjelinek vmu_object_t *to; 15753247Sgjelinek vmu_entity_t *te; 15763247Sgjelinek vmu_zone_t *tz; 15773247Sgjelinek 15783247Sgjelinek while (vmu_data.vmu_free_bounds != NULL) { 15793247Sgjelinek tb = vmu_data.vmu_free_bounds; 15803247Sgjelinek vmu_data.vmu_free_bounds = vmu_data.vmu_free_bounds->vmb_next; 15813247Sgjelinek kmem_cache_free(vmu_bound_cache, tb); 15823247Sgjelinek } 15833247Sgjelinek while (vmu_data.vmu_free_objects != NULL) { 15843247Sgjelinek to = vmu_data.vmu_free_objects; 15853247Sgjelinek vmu_data.vmu_free_objects = 15863247Sgjelinek vmu_data.vmu_free_objects->vmo_next; 15873247Sgjelinek kmem_cache_free(vmu_object_cache, to); 15883247Sgjelinek } 15893247Sgjelinek while (vmu_data.vmu_free_entities != NULL) { 15903247Sgjelinek te = vmu_data.vmu_free_entities; 15913247Sgjelinek vmu_data.vmu_free_entities = 15923247Sgjelinek vmu_data.vmu_free_entities->vme_next; 15933247Sgjelinek if (te->vme_vnode_hash != NULL) 15943247Sgjelinek mod_hash_destroy_hash(te->vme_vnode_hash); 15953247Sgjelinek if (te->vme_amp_hash != NULL) 15963247Sgjelinek mod_hash_destroy_hash(te->vme_amp_hash); 15973247Sgjelinek if (te->vme_anon_hash != NULL) 15983247Sgjelinek mod_hash_destroy_hash(te->vme_anon_hash); 15993247Sgjelinek kmem_free(te, sizeof (vmu_entity_t)); 16003247Sgjelinek } 16013247Sgjelinek while (vmu_data.vmu_free_zones != NULL) { 16023247Sgjelinek tz = vmu_data.vmu_free_zones; 16033247Sgjelinek vmu_data.vmu_free_zones = 16043247Sgjelinek vmu_data.vmu_free_zones->vmz_next; 16053247Sgjelinek if (tz->vmz_projects_hash != NULL) 16063247Sgjelinek mod_hash_destroy_hash(tz->vmz_projects_hash); 16073247Sgjelinek if (tz->vmz_tasks_hash != NULL) 16083247Sgjelinek mod_hash_destroy_hash(tz->vmz_tasks_hash); 16093247Sgjelinek if (tz->vmz_rusers_hash != NULL) 16103247Sgjelinek mod_hash_destroy_hash(tz->vmz_rusers_hash); 16113247Sgjelinek if (tz->vmz_eusers_hash != NULL) 16123247Sgjelinek mod_hash_destroy_hash(tz->vmz_eusers_hash); 16133247Sgjelinek kmem_free(tz, sizeof (vmu_zone_t)); 16143247Sgjelinek } 16153247Sgjelinek } 16163247Sgjelinek 16173247Sgjelinek extern kcondvar_t *pr_pid_cv; 16183247Sgjelinek 16193247Sgjelinek /* 16203247Sgjelinek * Determine which entity types are relevant and allocate the hashes to 16213247Sgjelinek * track them. Then walk the process table and count rss and swap 16223247Sgjelinek * for each process'es address space. Address space object such as 16233247Sgjelinek * vnodes, amps and anons are tracked per entity, so that they are 16243247Sgjelinek * not double counted in the results. 16253247Sgjelinek * 16263247Sgjelinek */ 16273247Sgjelinek static void 16283247Sgjelinek vmu_calculate() 16293247Sgjelinek { 16303247Sgjelinek int i = 0; 16313247Sgjelinek int ret; 16323247Sgjelinek proc_t *p; 16333247Sgjelinek 16343247Sgjelinek vmu_clear_calc(); 16353247Sgjelinek 16363247Sgjelinek if (vmu_data.vmu_calc_flags & VMUSAGE_SYSTEM) 16373247Sgjelinek vmu_data.vmu_system = vmu_alloc_entity(0, VMUSAGE_SYSTEM, 16383247Sgjelinek ALL_ZONES); 16393247Sgjelinek 16403247Sgjelinek /* 16413247Sgjelinek * Walk process table and calculate rss of each proc. 16423247Sgjelinek * 16433247Sgjelinek * Pidlock and p_lock cannot be held while doing the rss calculation. 16443247Sgjelinek * This is because: 16453247Sgjelinek * 1. The calculation allocates using KM_SLEEP. 16463247Sgjelinek * 2. The calculation grabs a_lock, which cannot be grabbed 16473247Sgjelinek * after p_lock. 16483247Sgjelinek * 16493247Sgjelinek * Since pidlock must be dropped, we cannot simply just walk the 16503247Sgjelinek * practive list. Instead, we walk the process table, and sprlock 16513247Sgjelinek * each process to ensure that it does not exit during the 16523247Sgjelinek * calculation. 16533247Sgjelinek */ 16543247Sgjelinek 16553247Sgjelinek mutex_enter(&pidlock); 16563247Sgjelinek for (i = 0; i < v.v_proc; i++) { 16573247Sgjelinek again: 16583247Sgjelinek p = pid_entry(i); 16593247Sgjelinek if (p == NULL) 16603247Sgjelinek continue; 16613247Sgjelinek 16623247Sgjelinek mutex_enter(&p->p_lock); 16633247Sgjelinek mutex_exit(&pidlock); 16643247Sgjelinek 16653247Sgjelinek if (panicstr) { 16663247Sgjelinek mutex_exit(&p->p_lock); 16673247Sgjelinek return; 16683247Sgjelinek } 16693247Sgjelinek 16703247Sgjelinek /* Try to set P_PR_LOCK */ 16713247Sgjelinek ret = sprtrylock_proc(p); 16723247Sgjelinek if (ret == -1) { 16733247Sgjelinek /* Process in invalid state */ 16743247Sgjelinek mutex_exit(&p->p_lock); 16753247Sgjelinek mutex_enter(&pidlock); 16763247Sgjelinek continue; 16773247Sgjelinek } else if (ret == 1) { 16783247Sgjelinek /* 16793247Sgjelinek * P_PR_LOCK is already set. Wait and try again. 16803247Sgjelinek * This also drops p_lock. 16813247Sgjelinek */ 16823247Sgjelinek sprwaitlock_proc(p); 16833247Sgjelinek mutex_enter(&pidlock); 16843247Sgjelinek goto again; 16853247Sgjelinek } 16863247Sgjelinek mutex_exit(&p->p_lock); 16873247Sgjelinek 16883247Sgjelinek vmu_calculate_proc(p); 16893247Sgjelinek 16903247Sgjelinek mutex_enter(&p->p_lock); 16913247Sgjelinek sprunlock(p); 16923247Sgjelinek mutex_enter(&pidlock); 16933247Sgjelinek } 16943247Sgjelinek mutex_exit(&pidlock); 16953247Sgjelinek 16963247Sgjelinek vmu_free_extra(); 16973247Sgjelinek } 16983247Sgjelinek 16993247Sgjelinek /* 17003247Sgjelinek * allocate a new cache for N results satisfying flags 17013247Sgjelinek */ 17023247Sgjelinek vmu_cache_t * 17033247Sgjelinek vmu_cache_alloc(size_t nres, uint_t flags) 17043247Sgjelinek { 17053247Sgjelinek vmu_cache_t *cache; 17063247Sgjelinek 17073247Sgjelinek cache = kmem_zalloc(sizeof (vmu_cache_t), KM_SLEEP); 17083247Sgjelinek cache->vmc_results = kmem_zalloc(sizeof (vmusage_t) * nres, KM_SLEEP); 17093247Sgjelinek cache->vmc_nresults = nres; 17103247Sgjelinek cache->vmc_flags = flags; 17113247Sgjelinek cache->vmc_refcnt = 1; 17123247Sgjelinek return (cache); 17133247Sgjelinek } 17143247Sgjelinek 17153247Sgjelinek /* 17163247Sgjelinek * Make sure cached results are not freed 17173247Sgjelinek */ 17183247Sgjelinek static void 17193247Sgjelinek vmu_cache_hold(vmu_cache_t *cache) 17203247Sgjelinek { 17213247Sgjelinek ASSERT(MUTEX_HELD(&vmu_data.vmu_lock)); 17223247Sgjelinek cache->vmc_refcnt++; 17233247Sgjelinek } 17243247Sgjelinek 17253247Sgjelinek /* 17263247Sgjelinek * free cache data 17273247Sgjelinek */ 17283247Sgjelinek static void 17293247Sgjelinek vmu_cache_rele(vmu_cache_t *cache) 17303247Sgjelinek { 17313247Sgjelinek ASSERT(MUTEX_HELD(&vmu_data.vmu_lock)); 17323247Sgjelinek ASSERT(cache->vmc_refcnt > 0); 17333247Sgjelinek cache->vmc_refcnt--; 17343247Sgjelinek if (cache->vmc_refcnt == 0) { 17353247Sgjelinek kmem_free(cache->vmc_results, sizeof (vmusage_t) * 17367884Sgerald.jelinek@sun.com cache->vmc_nresults); 17373247Sgjelinek kmem_free(cache, sizeof (vmu_cache_t)); 17383247Sgjelinek } 17393247Sgjelinek } 17403247Sgjelinek 17413247Sgjelinek /* 17423247Sgjelinek * Copy out the cached results to a caller. Inspect the callers flags 17433247Sgjelinek * and zone to determine which cached results should be copied. 17443247Sgjelinek */ 17453247Sgjelinek static int 17463247Sgjelinek vmu_copyout_results(vmu_cache_t *cache, vmusage_t *buf, size_t *nres, 17477884Sgerald.jelinek@sun.com uint_t flags, int cpflg) 17483247Sgjelinek { 17493247Sgjelinek vmusage_t *result, *out_result; 17503247Sgjelinek vmusage_t dummy; 17513247Sgjelinek size_t i, count = 0; 17523247Sgjelinek size_t bufsize; 17533247Sgjelinek int ret = 0; 17543247Sgjelinek uint_t types = 0; 17553247Sgjelinek 17563247Sgjelinek if (nres != NULL) { 17577884Sgerald.jelinek@sun.com if (ddi_copyin((caddr_t)nres, &bufsize, sizeof (size_t), cpflg)) 17583247Sgjelinek return (set_errno(EFAULT)); 17593247Sgjelinek } else { 17603247Sgjelinek bufsize = 0; 17613247Sgjelinek } 17623247Sgjelinek 17633247Sgjelinek /* figure out what results the caller is interested in. */ 17643247Sgjelinek if ((flags & VMUSAGE_SYSTEM) && curproc->p_zone == global_zone) 17653247Sgjelinek types |= VMUSAGE_SYSTEM; 17663247Sgjelinek if (flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES)) 17673247Sgjelinek types |= VMUSAGE_ZONE; 17683247Sgjelinek if (flags & (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS | 17693247Sgjelinek VMUSAGE_COL_PROJECTS)) 17703247Sgjelinek types |= VMUSAGE_PROJECTS; 17713247Sgjelinek if (flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS)) 17723247Sgjelinek types |= VMUSAGE_TASKS; 17733247Sgjelinek if (flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS)) 17743247Sgjelinek types |= VMUSAGE_RUSERS; 17753247Sgjelinek if (flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS)) 17763247Sgjelinek types |= VMUSAGE_EUSERS; 17773247Sgjelinek 17783247Sgjelinek /* count results for current zone */ 17793247Sgjelinek out_result = buf; 17803247Sgjelinek for (result = cache->vmc_results, i = 0; 17813247Sgjelinek i < cache->vmc_nresults; result++, i++) { 17823247Sgjelinek 17833247Sgjelinek /* Do not return "other-zone" results to non-global zones */ 17843247Sgjelinek if (curproc->p_zone != global_zone && 17853247Sgjelinek curproc->p_zone->zone_id != result->vmu_zoneid) 17863247Sgjelinek continue; 17873247Sgjelinek 17883247Sgjelinek /* 17893247Sgjelinek * If non-global zone requests VMUSAGE_SYSTEM, fake 17903247Sgjelinek * up VMUSAGE_ZONE result as VMUSAGE_SYSTEM result. 17913247Sgjelinek */ 17923247Sgjelinek if (curproc->p_zone != global_zone && 17933247Sgjelinek (flags & VMUSAGE_SYSTEM) != 0 && 17943247Sgjelinek result->vmu_type == VMUSAGE_ZONE) { 17953247Sgjelinek count++; 17963247Sgjelinek if (out_result != NULL) { 17973247Sgjelinek if (bufsize < count) { 17983247Sgjelinek ret = set_errno(EOVERFLOW); 17993247Sgjelinek } else { 18003247Sgjelinek dummy = *result; 18013247Sgjelinek dummy.vmu_zoneid = ALL_ZONES; 18023247Sgjelinek dummy.vmu_id = 0; 18033247Sgjelinek dummy.vmu_type = VMUSAGE_SYSTEM; 18047884Sgerald.jelinek@sun.com if (ddi_copyout(&dummy, out_result, 18057884Sgerald.jelinek@sun.com sizeof (vmusage_t), cpflg)) 18067884Sgerald.jelinek@sun.com return (set_errno(EFAULT)); 18073247Sgjelinek out_result++; 18083247Sgjelinek } 18093247Sgjelinek } 18103247Sgjelinek } 18113247Sgjelinek 18123247Sgjelinek /* Skip results that do not match requested type */ 18133247Sgjelinek if ((result->vmu_type & types) == 0) 18143247Sgjelinek continue; 18153247Sgjelinek 18163247Sgjelinek /* Skip collated results if not requested */ 18173247Sgjelinek if (result->vmu_zoneid == ALL_ZONES) { 18183247Sgjelinek if (result->vmu_type == VMUSAGE_PROJECTS && 18193247Sgjelinek (flags & VMUSAGE_COL_PROJECTS) == 0) 18203247Sgjelinek continue; 18213247Sgjelinek if (result->vmu_type == VMUSAGE_EUSERS && 18223247Sgjelinek (flags & VMUSAGE_COL_EUSERS) == 0) 18233247Sgjelinek continue; 18243247Sgjelinek if (result->vmu_type == VMUSAGE_RUSERS && 18253247Sgjelinek (flags & VMUSAGE_COL_RUSERS) == 0) 18263247Sgjelinek continue; 18273247Sgjelinek } 18283247Sgjelinek 18293247Sgjelinek /* Skip "other zone" results if not requested */ 18303247Sgjelinek if (result->vmu_zoneid != curproc->p_zone->zone_id) { 18313247Sgjelinek if (result->vmu_type == VMUSAGE_ZONE && 18323247Sgjelinek (flags & VMUSAGE_ALL_ZONES) == 0) 18333247Sgjelinek continue; 18343247Sgjelinek if (result->vmu_type == VMUSAGE_PROJECTS && 18353247Sgjelinek (flags & (VMUSAGE_ALL_PROJECTS | 18363247Sgjelinek VMUSAGE_COL_PROJECTS)) == 0) 18373247Sgjelinek continue; 18383247Sgjelinek if (result->vmu_type == VMUSAGE_TASKS && 18393247Sgjelinek (flags & VMUSAGE_ALL_TASKS) == 0) 18403247Sgjelinek continue; 18413247Sgjelinek if (result->vmu_type == VMUSAGE_RUSERS && 18423247Sgjelinek (flags & (VMUSAGE_ALL_RUSERS | 18433247Sgjelinek VMUSAGE_COL_RUSERS)) == 0) 18443247Sgjelinek continue; 18453247Sgjelinek if (result->vmu_type == VMUSAGE_EUSERS && 18463247Sgjelinek (flags & (VMUSAGE_ALL_EUSERS | 18473247Sgjelinek VMUSAGE_COL_EUSERS)) == 0) 18483247Sgjelinek continue; 18493247Sgjelinek } 18503247Sgjelinek count++; 18513247Sgjelinek if (out_result != NULL) { 18523247Sgjelinek if (bufsize < count) { 18533247Sgjelinek ret = set_errno(EOVERFLOW); 18543247Sgjelinek } else { 18557884Sgerald.jelinek@sun.com if (ddi_copyout(result, out_result, 18567884Sgerald.jelinek@sun.com sizeof (vmusage_t), cpflg)) 18573247Sgjelinek return (set_errno(EFAULT)); 18583247Sgjelinek out_result++; 18593247Sgjelinek } 18603247Sgjelinek } 18613247Sgjelinek } 18623247Sgjelinek if (nres != NULL) 18637884Sgerald.jelinek@sun.com if (ddi_copyout(&count, (void *)nres, sizeof (size_t), cpflg)) 18643247Sgjelinek return (set_errno(EFAULT)); 18653247Sgjelinek 18663247Sgjelinek return (ret); 18673247Sgjelinek } 18683247Sgjelinek 18693247Sgjelinek /* 18703247Sgjelinek * vm_getusage() 18713247Sgjelinek * 18723247Sgjelinek * Counts rss and swap by zone, project, task, and/or user. The flags argument 18733247Sgjelinek * determines the type of results structures returned. Flags requesting 18743247Sgjelinek * results from more than one zone are "flattened" to the local zone if the 18753247Sgjelinek * caller is not the global zone. 18763247Sgjelinek * 18773247Sgjelinek * args: 18783247Sgjelinek * flags: bitmap consisting of one or more of VMUSAGE_*. 18793247Sgjelinek * age: maximum allowable age (time since counting was done) in 18803247Sgjelinek * seconds of the results. Results from previous callers are 18813247Sgjelinek * cached in kernel. 18823247Sgjelinek * buf: pointer to buffer array of vmusage_t. If NULL, then only nres 18833247Sgjelinek * set on success. 18843247Sgjelinek * nres: Set to number of vmusage_t structures pointed to by buf 18853247Sgjelinek * before calling vm_getusage(). 18863247Sgjelinek * On return 0 (success) or ENOSPC, is set to the number of result 18873247Sgjelinek * structures returned or attempted to return. 18883247Sgjelinek * 18893247Sgjelinek * returns 0 on success, -1 on failure: 18903247Sgjelinek * EINTR (interrupted) 18913247Sgjelinek * ENOSPC (nres to small for results, nres set to needed value for success) 18923247Sgjelinek * EINVAL (flags invalid) 18933247Sgjelinek * EFAULT (bad address for buf or nres) 18943247Sgjelinek */ 18953247Sgjelinek int 18967884Sgerald.jelinek@sun.com vm_getusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres, int cpflg) 18973247Sgjelinek { 18983247Sgjelinek vmu_entity_t *entity; 18993247Sgjelinek vmusage_t *result; 19003247Sgjelinek int ret = 0; 19013247Sgjelinek int cacherecent = 0; 19023247Sgjelinek hrtime_t now; 19033247Sgjelinek uint_t flags_orig; 19043247Sgjelinek 19053247Sgjelinek /* 19063247Sgjelinek * Non-global zones cannot request system wide and/or collated 19073247Sgjelinek * results, or the system result, so munge the flags accordingly. 19083247Sgjelinek */ 19093247Sgjelinek flags_orig = flags; 19103247Sgjelinek if (curproc->p_zone != global_zone) { 19113247Sgjelinek if (flags & (VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS)) { 19123247Sgjelinek flags &= ~(VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS); 19133247Sgjelinek flags |= VMUSAGE_PROJECTS; 19143247Sgjelinek } 19153247Sgjelinek if (flags & (VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS)) { 19163247Sgjelinek flags &= ~(VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS); 19173247Sgjelinek flags |= VMUSAGE_RUSERS; 19183247Sgjelinek } 19193247Sgjelinek if (flags & (VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS)) { 19203247Sgjelinek flags &= ~(VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS); 19213247Sgjelinek flags |= VMUSAGE_EUSERS; 19223247Sgjelinek } 19233247Sgjelinek if (flags & VMUSAGE_SYSTEM) { 19243247Sgjelinek flags &= ~VMUSAGE_SYSTEM; 19253247Sgjelinek flags |= VMUSAGE_ZONE; 19263247Sgjelinek } 19273247Sgjelinek } 19283247Sgjelinek 19293247Sgjelinek /* Check for unknown flags */ 19303247Sgjelinek if ((flags & (~VMUSAGE_MASK)) != 0) 19313247Sgjelinek return (set_errno(EINVAL)); 19323247Sgjelinek 19333247Sgjelinek /* Check for no flags */ 19343247Sgjelinek if ((flags & VMUSAGE_MASK) == 0) 19353247Sgjelinek return (set_errno(EINVAL)); 19363247Sgjelinek 19373247Sgjelinek mutex_enter(&vmu_data.vmu_lock); 19383247Sgjelinek now = gethrtime(); 19393247Sgjelinek 19403247Sgjelinek start: 19413247Sgjelinek if (vmu_data.vmu_cache != NULL) { 19423247Sgjelinek 19433247Sgjelinek vmu_cache_t *cache; 19443247Sgjelinek 19453247Sgjelinek if ((vmu_data.vmu_cache->vmc_timestamp + 19463247Sgjelinek ((hrtime_t)age * NANOSEC)) > now) 19473247Sgjelinek cacherecent = 1; 19483247Sgjelinek 19493247Sgjelinek if ((vmu_data.vmu_cache->vmc_flags & flags) == flags && 19503247Sgjelinek cacherecent == 1) { 19513247Sgjelinek cache = vmu_data.vmu_cache; 19523247Sgjelinek vmu_cache_hold(cache); 19533247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 19543247Sgjelinek 19557884Sgerald.jelinek@sun.com ret = vmu_copyout_results(cache, buf, nres, flags_orig, 19567884Sgerald.jelinek@sun.com cpflg); 19573247Sgjelinek mutex_enter(&vmu_data.vmu_lock); 19583247Sgjelinek vmu_cache_rele(cache); 19593247Sgjelinek if (vmu_data.vmu_pending_waiters > 0) 19603247Sgjelinek cv_broadcast(&vmu_data.vmu_cv); 19613247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 19623247Sgjelinek return (ret); 19633247Sgjelinek } 19643247Sgjelinek /* 19653247Sgjelinek * If the cache is recent, it is likely that there are other 19663247Sgjelinek * consumers of vm_getusage running, so add their flags to the 19673247Sgjelinek * desired flags for the calculation. 19683247Sgjelinek */ 19693247Sgjelinek if (cacherecent == 1) 19703247Sgjelinek flags = vmu_data.vmu_cache->vmc_flags | flags; 19713247Sgjelinek } 19723247Sgjelinek if (vmu_data.vmu_calc_thread == NULL) { 19733247Sgjelinek 19743247Sgjelinek vmu_cache_t *cache; 19753247Sgjelinek 19763247Sgjelinek vmu_data.vmu_calc_thread = curthread; 19773247Sgjelinek vmu_data.vmu_calc_flags = flags; 19783247Sgjelinek vmu_data.vmu_entities = NULL; 19793247Sgjelinek vmu_data.vmu_nentities = 0; 19803247Sgjelinek if (vmu_data.vmu_pending_waiters > 0) 19813247Sgjelinek vmu_data.vmu_calc_flags |= 19823247Sgjelinek vmu_data.vmu_pending_flags; 19833247Sgjelinek 19843247Sgjelinek vmu_data.vmu_pending_flags = 0; 19853247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 19863247Sgjelinek vmu_calculate(); 19873247Sgjelinek mutex_enter(&vmu_data.vmu_lock); 19883247Sgjelinek /* copy results to cache */ 19893247Sgjelinek if (vmu_data.vmu_cache != NULL) 19903247Sgjelinek vmu_cache_rele(vmu_data.vmu_cache); 19913247Sgjelinek cache = vmu_data.vmu_cache = 19923247Sgjelinek vmu_cache_alloc(vmu_data.vmu_nentities, 19937884Sgerald.jelinek@sun.com vmu_data.vmu_calc_flags); 19943247Sgjelinek 19953247Sgjelinek result = cache->vmc_results; 19963247Sgjelinek for (entity = vmu_data.vmu_entities; entity != NULL; 19973247Sgjelinek entity = entity->vme_next) { 19983247Sgjelinek *result = entity->vme_result; 19993247Sgjelinek result++; 20003247Sgjelinek } 20013247Sgjelinek cache->vmc_timestamp = gethrtime(); 20023247Sgjelinek vmu_cache_hold(cache); 20033247Sgjelinek 20043247Sgjelinek vmu_data.vmu_calc_flags = 0; 20053247Sgjelinek vmu_data.vmu_calc_thread = NULL; 20063247Sgjelinek 20073247Sgjelinek if (vmu_data.vmu_pending_waiters > 0) 20083247Sgjelinek cv_broadcast(&vmu_data.vmu_cv); 20093247Sgjelinek 20103247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 20113247Sgjelinek 20123247Sgjelinek /* copy cache */ 20137884Sgerald.jelinek@sun.com ret = vmu_copyout_results(cache, buf, nres, flags_orig, cpflg); 20143247Sgjelinek mutex_enter(&vmu_data.vmu_lock); 20153247Sgjelinek vmu_cache_rele(cache); 20163247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 20173247Sgjelinek 20183247Sgjelinek return (ret); 20193247Sgjelinek } 20203247Sgjelinek vmu_data.vmu_pending_flags |= flags; 20213247Sgjelinek vmu_data.vmu_pending_waiters++; 20223247Sgjelinek while (vmu_data.vmu_calc_thread != NULL) { 20233247Sgjelinek if (cv_wait_sig(&vmu_data.vmu_cv, 20243247Sgjelinek &vmu_data.vmu_lock) == 0) { 20253247Sgjelinek vmu_data.vmu_pending_waiters--; 20263247Sgjelinek mutex_exit(&vmu_data.vmu_lock); 20273247Sgjelinek return (set_errno(EINTR)); 20283247Sgjelinek } 20293247Sgjelinek } 20303247Sgjelinek vmu_data.vmu_pending_waiters--; 20313247Sgjelinek goto start; 20323247Sgjelinek } 2033