13247Sgjelinek /*
23247Sgjelinek * CDDL HEADER START
33247Sgjelinek *
43247Sgjelinek * The contents of this file are subject to the terms of the
53247Sgjelinek * Common Development and Distribution License (the "License").
63247Sgjelinek * You may not use this file except in compliance with the License.
73247Sgjelinek *
83247Sgjelinek * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93247Sgjelinek * or http://www.opensolaris.org/os/licensing.
103247Sgjelinek * See the License for the specific language governing permissions
113247Sgjelinek * and limitations under the License.
123247Sgjelinek *
133247Sgjelinek * When distributing Covered Code, include this CDDL HEADER in each
143247Sgjelinek * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153247Sgjelinek * If applicable, add the following below this CDDL HEADER, with the
163247Sgjelinek * fields enclosed by brackets "[]" replaced with your own identifying
173247Sgjelinek * information: Portions Copyright [yyyy] [name of copyright owner]
183247Sgjelinek *
193247Sgjelinek * CDDL HEADER END
203247Sgjelinek */
213247Sgjelinek
223247Sgjelinek /*
2310093SPeter.Telford@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
243247Sgjelinek * Use is subject to license terms.
253247Sgjelinek */
263247Sgjelinek
273247Sgjelinek /*
283247Sgjelinek * vm_usage
293247Sgjelinek *
303247Sgjelinek * This file implements the getvmusage() private system call.
313247Sgjelinek * getvmusage() counts the amount of resident memory pages and swap
323247Sgjelinek * reserved by the specified process collective. A "process collective" is
333247Sgjelinek * the set of processes owned by a particular, zone, project, task, or user.
343247Sgjelinek *
353247Sgjelinek * rss and swap are counted so that for a given process collective, a page is
363247Sgjelinek * only counted once. For example, this means that if multiple processes in
373247Sgjelinek * the same project map the same page, then the project will only be charged
383247Sgjelinek * once for that page. On the other hand, if two processes in different
393247Sgjelinek * projects map the same page, then both projects will be charged
403247Sgjelinek * for the page.
413247Sgjelinek *
423247Sgjelinek * The vm_getusage() calculation is implemented so that the first thread
433247Sgjelinek * performs the rss/swap counting. Other callers will wait for that thread to
443247Sgjelinek * finish, copying the results. This enables multiple rcapds and prstats to
453247Sgjelinek * consume data from the same calculation. The results are also cached so that
463247Sgjelinek * a caller interested in recent results can just copy them instead of starting
473247Sgjelinek * a new calculation. The caller passes the maximium age (in seconds) of the
483247Sgjelinek * data. If the cached data is young enough, the cache is copied, otherwise,
493247Sgjelinek * a new calculation is executed and the cache is replaced with the new
503247Sgjelinek * data.
513247Sgjelinek *
523247Sgjelinek * The rss calculation for each process collective is as follows:
533247Sgjelinek *
543247Sgjelinek * - Inspect flags, determine if counting rss for zones, projects, tasks,
553247Sgjelinek * and/or users.
563247Sgjelinek * - For each proc:
573247Sgjelinek * - Figure out proc's collectives (zone, project, task, and/or user).
583247Sgjelinek * - For each seg in proc's address space:
593247Sgjelinek * - If seg is private:
603247Sgjelinek * - Lookup anons in the amp.
613247Sgjelinek * - For incore pages not previously visited each of the
623247Sgjelinek * proc's collectives, add incore pagesize to each.
633247Sgjelinek * collective.
643247Sgjelinek * Anon's with a refcnt of 1 can be assummed to be not
653247Sgjelinek * previously visited.
663247Sgjelinek * - For address ranges without anons in the amp:
673247Sgjelinek * - Lookup pages in underlying vnode.
683247Sgjelinek * - For incore pages not previously visiting for
693247Sgjelinek * each of the proc's collectives, add incore
703247Sgjelinek * pagesize to each collective.
713247Sgjelinek * - If seg is shared:
723247Sgjelinek * - Lookup pages in the shared amp or vnode.
733247Sgjelinek * - For incore pages not previously visited for each of
743247Sgjelinek * the proc's collectives, add incore pagesize to each
753247Sgjelinek * collective.
763247Sgjelinek *
773247Sgjelinek * Swap is reserved by private segments, and shared anonymous segments.
783247Sgjelinek * The only shared anon segments which do not reserve swap are ISM segments
793247Sgjelinek * and schedctl segments, both of which can be identified by having
803247Sgjelinek * amp->swresv == 0.
813247Sgjelinek *
823247Sgjelinek * The swap calculation for each collective is as follows:
833247Sgjelinek *
843247Sgjelinek * - Inspect flags, determine if counting rss for zones, projects, tasks,
853247Sgjelinek * and/or users.
863247Sgjelinek * - For each proc:
873247Sgjelinek * - Figure out proc's collectives (zone, project, task, and/or user).
883247Sgjelinek * - For each seg in proc's address space:
893247Sgjelinek * - If seg is private:
903247Sgjelinek * - Add svd->swresv pages to swap count for each of the
913247Sgjelinek * proc's collectives.
923247Sgjelinek * - If seg is anon, shared, and amp->swresv != 0
933247Sgjelinek * - For address ranges in amp not previously visited for
943247Sgjelinek * each of the proc's collectives, add size of address
953247Sgjelinek * range to the swap count for each collective.
963247Sgjelinek *
973247Sgjelinek * These two calculations are done simultaneously, with most of the work
983247Sgjelinek * being done in vmu_calculate_seg(). The results of the calculation are
993247Sgjelinek * copied into "vmu_data.vmu_cache_results".
1003247Sgjelinek *
1013247Sgjelinek * To perform the calculation, various things are tracked and cached:
1023247Sgjelinek *
1033247Sgjelinek * - incore/not-incore page ranges for all vnodes.
1043247Sgjelinek * (vmu_data.vmu_all_vnodes_hash)
1053247Sgjelinek * This eliminates looking up the same page more than once.
1063247Sgjelinek *
1073247Sgjelinek * - incore/not-incore page ranges for all shared amps.
1083247Sgjelinek * (vmu_data.vmu_all_amps_hash)
1093247Sgjelinek * This eliminates looking up the same page more than once.
1103247Sgjelinek *
1113247Sgjelinek * - visited page ranges for each collective.
1123247Sgjelinek * - per vnode (entity->vme_vnode_hash)
1133247Sgjelinek * - per shared amp (entity->vme_amp_hash)
11410093SPeter.Telford@Sun.COM * For accurate counting of map-shared and COW-shared pages.
1153247Sgjelinek *
1163247Sgjelinek * - visited private anons (refcnt > 1) for each collective.
1173247Sgjelinek * (entity->vme_anon_hash)
11810093SPeter.Telford@Sun.COM * For accurate counting of COW-shared pages.
1193247Sgjelinek *
1203247Sgjelinek * The common accounting structure is the vmu_entity_t, which represents
1213247Sgjelinek * collectives:
1223247Sgjelinek *
1233247Sgjelinek * - A zone.
1243247Sgjelinek * - A project, task, or user within a zone.
1253247Sgjelinek * - The entire system (vmu_data.vmu_system).
1263247Sgjelinek * - Each collapsed (col) project and user. This means a given projid or
1273247Sgjelinek * uid, regardless of which zone the process is in. For instance,
1283247Sgjelinek * project 0 in the global zone and project 0 in a non global zone are
1293247Sgjelinek * the same collapsed project.
1303247Sgjelinek *
1313247Sgjelinek * Each entity structure tracks which pages have been already visited for
1323247Sgjelinek * that entity (via previously inspected processes) so that these pages are
1333247Sgjelinek * not double counted.
1343247Sgjelinek */
1353247Sgjelinek
1363247Sgjelinek #include <sys/errno.h>
1373247Sgjelinek #include <sys/types.h>
1383247Sgjelinek #include <sys/zone.h>
1393247Sgjelinek #include <sys/proc.h>
1403247Sgjelinek #include <sys/project.h>
1413247Sgjelinek #include <sys/task.h>
1423247Sgjelinek #include <sys/thread.h>
1433247Sgjelinek #include <sys/time.h>
1443247Sgjelinek #include <sys/mman.h>
1453247Sgjelinek #include <sys/modhash.h>
1463247Sgjelinek #include <sys/modhash_impl.h>
1473247Sgjelinek #include <sys/shm.h>
1483247Sgjelinek #include <sys/swap.h>
1493247Sgjelinek #include <sys/synch.h>
1503247Sgjelinek #include <sys/systm.h>
1513247Sgjelinek #include <sys/var.h>
1523247Sgjelinek #include <sys/vm_usage.h>
1533247Sgjelinek #include <sys/zone.h>
1547884Sgerald.jelinek@sun.com #include <sys/sunddi.h>
15510093SPeter.Telford@Sun.COM #include <sys/avl.h>
1563247Sgjelinek #include <vm/anon.h>
1573247Sgjelinek #include <vm/as.h>
1583247Sgjelinek #include <vm/seg_vn.h>
1593247Sgjelinek #include <vm/seg_spt.h>
1603247Sgjelinek
1613247Sgjelinek #define VMUSAGE_HASH_SIZE 512
1623247Sgjelinek
1633247Sgjelinek #define VMUSAGE_TYPE_VNODE 1
1643247Sgjelinek #define VMUSAGE_TYPE_AMP 2
1653247Sgjelinek #define VMUSAGE_TYPE_ANON 3
1663247Sgjelinek
1673247Sgjelinek #define VMUSAGE_BOUND_UNKNOWN 0
1683247Sgjelinek #define VMUSAGE_BOUND_INCORE 1
1693247Sgjelinek #define VMUSAGE_BOUND_NOT_INCORE 2
1703247Sgjelinek
17110093SPeter.Telford@Sun.COM #define ISWITHIN(node, addr) ((node)->vmb_start <= addr && \
17210093SPeter.Telford@Sun.COM (node)->vmb_end >= addr ? 1 : 0)
17310093SPeter.Telford@Sun.COM
1743247Sgjelinek /*
1753247Sgjelinek * bounds for vnodes and shared amps
1763247Sgjelinek * Each bound is either entirely incore, entirely not in core, or
17710093SPeter.Telford@Sun.COM * entirely unknown. bounds are stored in an avl tree sorted by start member
17810093SPeter.Telford@Sun.COM * when in use, otherwise (free or temporary lists) they're strung
17910093SPeter.Telford@Sun.COM * together off of vmb_next.
1803247Sgjelinek */
1813247Sgjelinek typedef struct vmu_bound {
18210093SPeter.Telford@Sun.COM avl_node_t vmb_node;
18310093SPeter.Telford@Sun.COM struct vmu_bound *vmb_next; /* NULL in tree else on free or temp list */
1843247Sgjelinek pgcnt_t vmb_start; /* page offset in vnode/amp on which bound starts */
1853247Sgjelinek pgcnt_t vmb_end; /* page offset in vnode/amp on which bound ends */
1863247Sgjelinek char vmb_type; /* One of VMUSAGE_BOUND_* */
1873247Sgjelinek } vmu_bound_t;
1883247Sgjelinek
1893247Sgjelinek /*
1903247Sgjelinek * hash of visited objects (vnodes or shared amps)
1913247Sgjelinek * key is address of vnode or amp. Bounds lists known incore/non-incore
1923247Sgjelinek * bounds for vnode/amp.
1933247Sgjelinek */
1943247Sgjelinek typedef struct vmu_object {
1953247Sgjelinek struct vmu_object *vmo_next; /* free list */
1963247Sgjelinek caddr_t vmo_key;
1973247Sgjelinek short vmo_type;
19810093SPeter.Telford@Sun.COM avl_tree_t vmo_bounds;
1993247Sgjelinek } vmu_object_t;
2003247Sgjelinek
2013247Sgjelinek /*
2023247Sgjelinek * Entity by which to count results.
2033247Sgjelinek *
2043247Sgjelinek * The entity structure keeps the current rss/swap counts for each entity
2053247Sgjelinek * (zone, project, etc), and hashes of vm structures that have already
2063247Sgjelinek * been visited for the entity.
2073247Sgjelinek *
2083247Sgjelinek * vme_next: links the list of all entities currently being counted by
2093247Sgjelinek * vmu_calculate().
2103247Sgjelinek *
2113247Sgjelinek * vme_next_calc: links the list of entities related to the current process
2123247Sgjelinek * being counted by vmu_calculate_proc().
2133247Sgjelinek *
2143247Sgjelinek * vmu_calculate_proc() walks all processes. For each process, it makes a
2153247Sgjelinek * list of the entities related to that process using vme_next_calc. This
2163247Sgjelinek * list changes each time vmu_calculate_proc() is called.
2173247Sgjelinek *
2183247Sgjelinek */
2193247Sgjelinek typedef struct vmu_entity {
2203247Sgjelinek struct vmu_entity *vme_next;
2213247Sgjelinek struct vmu_entity *vme_next_calc;
2223247Sgjelinek mod_hash_t *vme_vnode_hash; /* vnodes visited for entity */
2233247Sgjelinek mod_hash_t *vme_amp_hash; /* shared amps visited for entity */
22410093SPeter.Telford@Sun.COM mod_hash_t *vme_anon_hash; /* COW anons visited for entity */
2253247Sgjelinek vmusage_t vme_result; /* identifies entity and results */
2263247Sgjelinek } vmu_entity_t;
2273247Sgjelinek
2283247Sgjelinek /*
2293247Sgjelinek * Hash of entities visited within a zone, and an entity for the zone
2303247Sgjelinek * itself.
2313247Sgjelinek */
2323247Sgjelinek typedef struct vmu_zone {
2333247Sgjelinek struct vmu_zone *vmz_next; /* free list */
2343247Sgjelinek id_t vmz_id;
2353247Sgjelinek vmu_entity_t *vmz_zone;
2363247Sgjelinek mod_hash_t *vmz_projects_hash;
2373247Sgjelinek mod_hash_t *vmz_tasks_hash;
2383247Sgjelinek mod_hash_t *vmz_rusers_hash;
2393247Sgjelinek mod_hash_t *vmz_eusers_hash;
2403247Sgjelinek } vmu_zone_t;
2413247Sgjelinek
2423247Sgjelinek /*
2433247Sgjelinek * Cache of results from last calculation
2443247Sgjelinek */
2453247Sgjelinek typedef struct vmu_cache {
2463247Sgjelinek vmusage_t *vmc_results; /* Results from last call to */
2473247Sgjelinek /* vm_getusage(). */
2483247Sgjelinek uint64_t vmc_nresults; /* Count of cached results */
2493247Sgjelinek uint64_t vmc_refcnt; /* refcnt for free */
2503247Sgjelinek uint_t vmc_flags; /* Flags for vm_getusage() */
2513247Sgjelinek hrtime_t vmc_timestamp; /* when cache was created */
2523247Sgjelinek } vmu_cache_t;
2533247Sgjelinek
2543247Sgjelinek /*
2553247Sgjelinek * top level rss info for the system
2563247Sgjelinek */
2573247Sgjelinek typedef struct vmu_data {
2583247Sgjelinek kmutex_t vmu_lock; /* Protects vmu_data */
2593247Sgjelinek kcondvar_t vmu_cv; /* Used to signal threads */
2603247Sgjelinek /* Waiting for */
2613247Sgjelinek /* Rss_calc_thread to finish */
2623247Sgjelinek vmu_entity_t *vmu_system; /* Entity for tracking */
2633247Sgjelinek /* rss/swap for all processes */
2643247Sgjelinek /* in all zones */
2653247Sgjelinek mod_hash_t *vmu_zones_hash; /* Zones visited */
2663247Sgjelinek mod_hash_t *vmu_projects_col_hash; /* These *_col_hash hashes */
2673247Sgjelinek mod_hash_t *vmu_rusers_col_hash; /* keep track of entities, */
2683247Sgjelinek mod_hash_t *vmu_eusers_col_hash; /* ignoring zoneid, in order */
2693247Sgjelinek /* to implement VMUSAGE_COL_* */
2703247Sgjelinek /* flags, which aggregate by */
2713247Sgjelinek /* project or user regardless */
2723247Sgjelinek /* of zoneid. */
2733247Sgjelinek mod_hash_t *vmu_all_vnodes_hash; /* System wide visited vnodes */
2743247Sgjelinek /* to track incore/not-incore */
2753247Sgjelinek mod_hash_t *vmu_all_amps_hash; /* System wide visited shared */
2763247Sgjelinek /* amps to track incore/not- */
2773247Sgjelinek /* incore */
2783247Sgjelinek vmu_entity_t *vmu_entities; /* Linked list of entities */
2793247Sgjelinek size_t vmu_nentities; /* Count of entities in list */
2803247Sgjelinek vmu_cache_t *vmu_cache; /* Cached results */
2813247Sgjelinek kthread_t *vmu_calc_thread; /* NULL, or thread running */
2823247Sgjelinek /* vmu_calculate() */
2833247Sgjelinek uint_t vmu_calc_flags; /* Flags being using by */
2843247Sgjelinek /* currently running calc */
2853247Sgjelinek /* thread */
2863247Sgjelinek uint_t vmu_pending_flags; /* Flags of vm_getusage() */
2873247Sgjelinek /* threads waiting for */
2883247Sgjelinek /* calc thread to finish */
2893247Sgjelinek uint_t vmu_pending_waiters; /* Number of threads waiting */
2903247Sgjelinek /* for calc thread */
2913247Sgjelinek vmu_bound_t *vmu_free_bounds;
2923247Sgjelinek vmu_object_t *vmu_free_objects;
2933247Sgjelinek vmu_entity_t *vmu_free_entities;
2943247Sgjelinek vmu_zone_t *vmu_free_zones;
2953247Sgjelinek } vmu_data_t;
2963247Sgjelinek
2973247Sgjelinek extern struct as kas;
2983247Sgjelinek extern proc_t *practive;
2993247Sgjelinek extern zone_t *global_zone;
3003247Sgjelinek extern struct seg_ops segvn_ops;
3013247Sgjelinek extern struct seg_ops segspt_shmops;
3023247Sgjelinek
3033247Sgjelinek static vmu_data_t vmu_data;
3043247Sgjelinek static kmem_cache_t *vmu_bound_cache;
3053247Sgjelinek static kmem_cache_t *vmu_object_cache;
3063247Sgjelinek
3073247Sgjelinek /*
30810093SPeter.Telford@Sun.COM * Comparison routine for AVL tree. We base our comparison on vmb_start.
30910093SPeter.Telford@Sun.COM */
31010093SPeter.Telford@Sun.COM static int
bounds_cmp(const void * bnd1,const void * bnd2)31110093SPeter.Telford@Sun.COM bounds_cmp(const void *bnd1, const void *bnd2)
31210093SPeter.Telford@Sun.COM {
31310093SPeter.Telford@Sun.COM const vmu_bound_t *bound1 = bnd1;
31410093SPeter.Telford@Sun.COM const vmu_bound_t *bound2 = bnd2;
31510093SPeter.Telford@Sun.COM
31610093SPeter.Telford@Sun.COM if (bound1->vmb_start == bound2->vmb_start) {
31710093SPeter.Telford@Sun.COM return (0);
31810093SPeter.Telford@Sun.COM }
31910093SPeter.Telford@Sun.COM if (bound1->vmb_start < bound2->vmb_start) {
32010093SPeter.Telford@Sun.COM return (-1);
32110093SPeter.Telford@Sun.COM }
32210093SPeter.Telford@Sun.COM
32310093SPeter.Telford@Sun.COM return (1);
32410093SPeter.Telford@Sun.COM }
32510093SPeter.Telford@Sun.COM
32610093SPeter.Telford@Sun.COM /*
32710093SPeter.Telford@Sun.COM * Save a bound on the free list.
3283247Sgjelinek */
3293247Sgjelinek static void
vmu_free_bound(vmu_bound_t * bound)3303247Sgjelinek vmu_free_bound(vmu_bound_t *bound)
3313247Sgjelinek {
3323247Sgjelinek bound->vmb_next = vmu_data.vmu_free_bounds;
33310093SPeter.Telford@Sun.COM bound->vmb_start = 0;
33410093SPeter.Telford@Sun.COM bound->vmb_end = 0;
33510093SPeter.Telford@Sun.COM bound->vmb_type = 0;
3363247Sgjelinek vmu_data.vmu_free_bounds = bound;
3373247Sgjelinek }
3383247Sgjelinek
3393247Sgjelinek /*
3403247Sgjelinek * Free an object, and all visited bound info.
3413247Sgjelinek */
3423247Sgjelinek static void
vmu_free_object(mod_hash_val_t val)3433247Sgjelinek vmu_free_object(mod_hash_val_t val)
3443247Sgjelinek {
3453247Sgjelinek vmu_object_t *obj = (vmu_object_t *)val;
34610093SPeter.Telford@Sun.COM avl_tree_t *tree = &(obj->vmo_bounds);
34710093SPeter.Telford@Sun.COM vmu_bound_t *bound;
34810093SPeter.Telford@Sun.COM void *cookie = NULL;
3493247Sgjelinek
35010093SPeter.Telford@Sun.COM while ((bound = avl_destroy_nodes(tree, &cookie)) != NULL)
35110093SPeter.Telford@Sun.COM vmu_free_bound(bound);
35210093SPeter.Telford@Sun.COM avl_destroy(tree);
35310093SPeter.Telford@Sun.COM
35410093SPeter.Telford@Sun.COM obj->vmo_type = 0;
3553247Sgjelinek obj->vmo_next = vmu_data.vmu_free_objects;
3563247Sgjelinek vmu_data.vmu_free_objects = obj;
3573247Sgjelinek }
3583247Sgjelinek
3593247Sgjelinek /*
3603247Sgjelinek * Free an entity, and hashes of visited objects for that entity.
3613247Sgjelinek */
3623247Sgjelinek static void
vmu_free_entity(mod_hash_val_t val)3633247Sgjelinek vmu_free_entity(mod_hash_val_t val)
3643247Sgjelinek {
3653247Sgjelinek vmu_entity_t *entity = (vmu_entity_t *)val;
3663247Sgjelinek
3673247Sgjelinek if (entity->vme_vnode_hash != NULL)
3683247Sgjelinek i_mod_hash_clear_nosync(entity->vme_vnode_hash);
3693247Sgjelinek if (entity->vme_amp_hash != NULL)
3703247Sgjelinek i_mod_hash_clear_nosync(entity->vme_amp_hash);
3713247Sgjelinek if (entity->vme_anon_hash != NULL)
3723247Sgjelinek i_mod_hash_clear_nosync(entity->vme_anon_hash);
3733247Sgjelinek
3743247Sgjelinek entity->vme_next = vmu_data.vmu_free_entities;
3753247Sgjelinek vmu_data.vmu_free_entities = entity;
3763247Sgjelinek }
3773247Sgjelinek
3783247Sgjelinek /*
3793247Sgjelinek * Free zone entity, and all hashes of entities inside that zone,
3803247Sgjelinek * which are projects, tasks, and users.
3813247Sgjelinek */
3823247Sgjelinek static void
vmu_free_zone(mod_hash_val_t val)3833247Sgjelinek vmu_free_zone(mod_hash_val_t val)
3843247Sgjelinek {
3853247Sgjelinek vmu_zone_t *zone = (vmu_zone_t *)val;
3863247Sgjelinek
3873247Sgjelinek if (zone->vmz_zone != NULL) {
3883247Sgjelinek vmu_free_entity((mod_hash_val_t)zone->vmz_zone);
3893247Sgjelinek zone->vmz_zone = NULL;
3903247Sgjelinek }
3913247Sgjelinek if (zone->vmz_projects_hash != NULL)
3923247Sgjelinek i_mod_hash_clear_nosync(zone->vmz_projects_hash);
3933247Sgjelinek if (zone->vmz_tasks_hash != NULL)
3943247Sgjelinek i_mod_hash_clear_nosync(zone->vmz_tasks_hash);
3953247Sgjelinek if (zone->vmz_rusers_hash != NULL)
3963247Sgjelinek i_mod_hash_clear_nosync(zone->vmz_rusers_hash);
3973247Sgjelinek if (zone->vmz_eusers_hash != NULL)
3983247Sgjelinek i_mod_hash_clear_nosync(zone->vmz_eusers_hash);
3993247Sgjelinek zone->vmz_next = vmu_data.vmu_free_zones;
4003247Sgjelinek vmu_data.vmu_free_zones = zone;
4013247Sgjelinek }
4023247Sgjelinek
4033247Sgjelinek /*
4043247Sgjelinek * Initialize synchronization primitives and hashes for system-wide tracking
4053247Sgjelinek * of visited vnodes and shared amps. Initialize results cache.
4063247Sgjelinek */
4073247Sgjelinek void
vm_usage_init()4083247Sgjelinek vm_usage_init()
4093247Sgjelinek {
4103247Sgjelinek mutex_init(&vmu_data.vmu_lock, NULL, MUTEX_DEFAULT, NULL);
4113247Sgjelinek cv_init(&vmu_data.vmu_cv, NULL, CV_DEFAULT, NULL);
4123247Sgjelinek
4133247Sgjelinek vmu_data.vmu_system = NULL;
4143247Sgjelinek vmu_data.vmu_zones_hash = NULL;
4153247Sgjelinek vmu_data.vmu_projects_col_hash = NULL;
4163247Sgjelinek vmu_data.vmu_rusers_col_hash = NULL;
4173247Sgjelinek vmu_data.vmu_eusers_col_hash = NULL;
4183247Sgjelinek
4193247Sgjelinek vmu_data.vmu_free_bounds = NULL;
4203247Sgjelinek vmu_data.vmu_free_objects = NULL;
4213247Sgjelinek vmu_data.vmu_free_entities = NULL;
4223247Sgjelinek vmu_data.vmu_free_zones = NULL;
4233247Sgjelinek
4243247Sgjelinek vmu_data.vmu_all_vnodes_hash = mod_hash_create_ptrhash(
4253247Sgjelinek "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object,
4263247Sgjelinek sizeof (vnode_t));
4273247Sgjelinek vmu_data.vmu_all_amps_hash = mod_hash_create_ptrhash(
4283247Sgjelinek "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object,
4293247Sgjelinek sizeof (struct anon_map));
4303247Sgjelinek vmu_data.vmu_projects_col_hash = mod_hash_create_idhash(
4313247Sgjelinek "vmusage collapsed project hash", VMUSAGE_HASH_SIZE,
4323247Sgjelinek vmu_free_entity);
4333247Sgjelinek vmu_data.vmu_rusers_col_hash = mod_hash_create_idhash(
4343247Sgjelinek "vmusage collapsed ruser hash", VMUSAGE_HASH_SIZE,
4353247Sgjelinek vmu_free_entity);
4363247Sgjelinek vmu_data.vmu_eusers_col_hash = mod_hash_create_idhash(
4373247Sgjelinek "vmusage collpased euser hash", VMUSAGE_HASH_SIZE,
4383247Sgjelinek vmu_free_entity);
4393247Sgjelinek vmu_data.vmu_zones_hash = mod_hash_create_idhash(
4403247Sgjelinek "vmusage zone hash", VMUSAGE_HASH_SIZE, vmu_free_zone);
4413247Sgjelinek
4423247Sgjelinek vmu_bound_cache = kmem_cache_create("vmu_bound_cache",
4433247Sgjelinek sizeof (vmu_bound_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
4443247Sgjelinek vmu_object_cache = kmem_cache_create("vmu_object_cache",
4453247Sgjelinek sizeof (vmu_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
4463247Sgjelinek
4473247Sgjelinek vmu_data.vmu_entities = NULL;
4483247Sgjelinek vmu_data.vmu_nentities = 0;
4493247Sgjelinek
4503247Sgjelinek vmu_data.vmu_cache = NULL;
4513247Sgjelinek vmu_data.vmu_calc_thread = NULL;
4523247Sgjelinek vmu_data.vmu_calc_flags = 0;
4533247Sgjelinek vmu_data.vmu_pending_flags = 0;
4543247Sgjelinek vmu_data.vmu_pending_waiters = 0;
4553247Sgjelinek }
4563247Sgjelinek
4573247Sgjelinek /*
4583247Sgjelinek * Allocate hashes for tracking vm objects visited for an entity.
4593247Sgjelinek * Update list of entities.
4603247Sgjelinek */
4613247Sgjelinek static vmu_entity_t *
vmu_alloc_entity(id_t id,int type,id_t zoneid)4623247Sgjelinek vmu_alloc_entity(id_t id, int type, id_t zoneid)
4633247Sgjelinek {
4643247Sgjelinek vmu_entity_t *entity;
4653247Sgjelinek
4663247Sgjelinek if (vmu_data.vmu_free_entities != NULL) {
4673247Sgjelinek entity = vmu_data.vmu_free_entities;
4683247Sgjelinek vmu_data.vmu_free_entities =
4693247Sgjelinek vmu_data.vmu_free_entities->vme_next;
4703247Sgjelinek bzero(&entity->vme_result, sizeof (vmusage_t));
4713247Sgjelinek } else {
4723247Sgjelinek entity = kmem_zalloc(sizeof (vmu_entity_t), KM_SLEEP);
4733247Sgjelinek }
4743247Sgjelinek entity->vme_result.vmu_id = id;
4753247Sgjelinek entity->vme_result.vmu_zoneid = zoneid;
4763247Sgjelinek entity->vme_result.vmu_type = type;
4773247Sgjelinek
4783247Sgjelinek if (entity->vme_vnode_hash == NULL)
4793247Sgjelinek entity->vme_vnode_hash = mod_hash_create_ptrhash(
4803247Sgjelinek "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object,
4813247Sgjelinek sizeof (vnode_t));
4823247Sgjelinek
4833247Sgjelinek if (entity->vme_amp_hash == NULL)
4843247Sgjelinek entity->vme_amp_hash = mod_hash_create_ptrhash(
4853247Sgjelinek "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object,
4863247Sgjelinek sizeof (struct anon_map));
4873247Sgjelinek
4883247Sgjelinek if (entity->vme_anon_hash == NULL)
4893247Sgjelinek entity->vme_anon_hash = mod_hash_create_ptrhash(
4903247Sgjelinek "vmusage anon hash", VMUSAGE_HASH_SIZE,
4913247Sgjelinek mod_hash_null_valdtor, sizeof (struct anon));
4923247Sgjelinek
4933247Sgjelinek entity->vme_next = vmu_data.vmu_entities;
4943247Sgjelinek vmu_data.vmu_entities = entity;
4953247Sgjelinek vmu_data.vmu_nentities++;
4963247Sgjelinek
4973247Sgjelinek return (entity);
4983247Sgjelinek }
4993247Sgjelinek
5003247Sgjelinek /*
5013247Sgjelinek * Allocate a zone entity, and hashes for tracking visited vm objects
5023247Sgjelinek * for projects, tasks, and users within that zone.
5033247Sgjelinek */
5043247Sgjelinek static vmu_zone_t *
vmu_alloc_zone(id_t id)5053247Sgjelinek vmu_alloc_zone(id_t id)
5063247Sgjelinek {
5073247Sgjelinek vmu_zone_t *zone;
5083247Sgjelinek
5093247Sgjelinek if (vmu_data.vmu_free_zones != NULL) {
5103247Sgjelinek zone = vmu_data.vmu_free_zones;
5113247Sgjelinek vmu_data.vmu_free_zones =
5123247Sgjelinek vmu_data.vmu_free_zones->vmz_next;
5133247Sgjelinek zone->vmz_next = NULL;
5143247Sgjelinek zone->vmz_zone = NULL;
5153247Sgjelinek } else {
5163247Sgjelinek zone = kmem_zalloc(sizeof (vmu_zone_t), KM_SLEEP);
5173247Sgjelinek }
5183247Sgjelinek
5193247Sgjelinek zone->vmz_id = id;
5203247Sgjelinek
5213247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES)) != 0)
5223247Sgjelinek zone->vmz_zone = vmu_alloc_entity(id, VMUSAGE_ZONE, id);
5233247Sgjelinek
5243247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_PROJECTS |
5253247Sgjelinek VMUSAGE_ALL_PROJECTS)) != 0 && zone->vmz_projects_hash == NULL)
5263247Sgjelinek zone->vmz_projects_hash = mod_hash_create_idhash(
5273247Sgjelinek "vmusage project hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
5283247Sgjelinek
5293247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS))
5303247Sgjelinek != 0 && zone->vmz_tasks_hash == NULL)
5313247Sgjelinek zone->vmz_tasks_hash = mod_hash_create_idhash(
5323247Sgjelinek "vmusage task hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
5333247Sgjelinek
5343247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS))
5353247Sgjelinek != 0 && zone->vmz_rusers_hash == NULL)
5363247Sgjelinek zone->vmz_rusers_hash = mod_hash_create_idhash(
5373247Sgjelinek "vmusage ruser hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
5383247Sgjelinek
5393247Sgjelinek if ((vmu_data.vmu_calc_flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS))
5403247Sgjelinek != 0 && zone->vmz_eusers_hash == NULL)
5413247Sgjelinek zone->vmz_eusers_hash = mod_hash_create_idhash(
5423247Sgjelinek "vmusage euser hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
5433247Sgjelinek
5443247Sgjelinek return (zone);
5453247Sgjelinek }
5463247Sgjelinek
5473247Sgjelinek /*
5483247Sgjelinek * Allocate a structure for tracking visited bounds for a vm object.
5493247Sgjelinek */
5503247Sgjelinek static vmu_object_t *
vmu_alloc_object(caddr_t key,int type)5513247Sgjelinek vmu_alloc_object(caddr_t key, int type)
5523247Sgjelinek {
5533247Sgjelinek vmu_object_t *object;
5543247Sgjelinek
5553247Sgjelinek if (vmu_data.vmu_free_objects != NULL) {
5563247Sgjelinek object = vmu_data.vmu_free_objects;
5573247Sgjelinek vmu_data.vmu_free_objects =
5583247Sgjelinek vmu_data.vmu_free_objects->vmo_next;
5593247Sgjelinek } else {
5603247Sgjelinek object = kmem_cache_alloc(vmu_object_cache, KM_SLEEP);
5613247Sgjelinek }
5623247Sgjelinek
56310093SPeter.Telford@Sun.COM object->vmo_next = NULL;
5643247Sgjelinek object->vmo_key = key;
5653247Sgjelinek object->vmo_type = type;
56610093SPeter.Telford@Sun.COM avl_create(&(object->vmo_bounds), bounds_cmp, sizeof (vmu_bound_t), 0);
5673247Sgjelinek
5683247Sgjelinek return (object);
5693247Sgjelinek }
5703247Sgjelinek
5713247Sgjelinek /*
5723247Sgjelinek * Allocate and return a bound structure.
5733247Sgjelinek */
5743247Sgjelinek static vmu_bound_t *
vmu_alloc_bound()5753247Sgjelinek vmu_alloc_bound()
5763247Sgjelinek {
5773247Sgjelinek vmu_bound_t *bound;
5783247Sgjelinek
5793247Sgjelinek if (vmu_data.vmu_free_bounds != NULL) {
5803247Sgjelinek bound = vmu_data.vmu_free_bounds;
5813247Sgjelinek vmu_data.vmu_free_bounds =
5823247Sgjelinek vmu_data.vmu_free_bounds->vmb_next;
5833247Sgjelinek } else {
5843247Sgjelinek bound = kmem_cache_alloc(vmu_bound_cache, KM_SLEEP);
5853247Sgjelinek }
58610093SPeter.Telford@Sun.COM
58710093SPeter.Telford@Sun.COM bound->vmb_next = NULL;
58810093SPeter.Telford@Sun.COM bound->vmb_start = 0;
58910093SPeter.Telford@Sun.COM bound->vmb_end = 0;
59010093SPeter.Telford@Sun.COM bound->vmb_type = 0;
5913247Sgjelinek return (bound);
5923247Sgjelinek }
5933247Sgjelinek
5943247Sgjelinek /*
5953247Sgjelinek * vmu_find_insert_* functions implement hash lookup or allocate and
5963247Sgjelinek * insert operations.
5973247Sgjelinek */
5983247Sgjelinek static vmu_object_t *
vmu_find_insert_object(mod_hash_t * hash,caddr_t key,uint_t type)5993247Sgjelinek vmu_find_insert_object(mod_hash_t *hash, caddr_t key, uint_t type)
6003247Sgjelinek {
6013247Sgjelinek int ret;
6023247Sgjelinek vmu_object_t *object;
6033247Sgjelinek
6043247Sgjelinek ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key,
6053247Sgjelinek (mod_hash_val_t *)&object);
6063247Sgjelinek if (ret != 0) {
6073247Sgjelinek object = vmu_alloc_object(key, type);
6083247Sgjelinek ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key,
6093247Sgjelinek (mod_hash_val_t)object, (mod_hash_hndl_t)0);
6103247Sgjelinek ASSERT(ret == 0);
6113247Sgjelinek }
6123247Sgjelinek return (object);
6133247Sgjelinek }
6143247Sgjelinek
6153247Sgjelinek static int
vmu_find_insert_anon(mod_hash_t * hash,caddr_t key)6163247Sgjelinek vmu_find_insert_anon(mod_hash_t *hash, caddr_t key)
6173247Sgjelinek {
6183247Sgjelinek int ret;
6193247Sgjelinek caddr_t val;
6203247Sgjelinek
6213247Sgjelinek ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key,
6223247Sgjelinek (mod_hash_val_t *)&val);
6233247Sgjelinek
6243247Sgjelinek if (ret == 0)
6253247Sgjelinek return (0);
6263247Sgjelinek
6273247Sgjelinek ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key,
6283247Sgjelinek (mod_hash_val_t)key, (mod_hash_hndl_t)0);
6293247Sgjelinek
6303247Sgjelinek ASSERT(ret == 0);
6313247Sgjelinek
6323247Sgjelinek return (1);
6333247Sgjelinek }
6343247Sgjelinek
6353247Sgjelinek static vmu_entity_t *
vmu_find_insert_entity(mod_hash_t * hash,id_t id,uint_t type,id_t zoneid)6363247Sgjelinek vmu_find_insert_entity(mod_hash_t *hash, id_t id, uint_t type, id_t zoneid)
6373247Sgjelinek {
6383247Sgjelinek int ret;
6393247Sgjelinek vmu_entity_t *entity;
6403247Sgjelinek
6413247Sgjelinek ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)(uintptr_t)id,
6423247Sgjelinek (mod_hash_val_t *)&entity);
6433247Sgjelinek if (ret != 0) {
6443247Sgjelinek entity = vmu_alloc_entity(id, type, zoneid);
6453247Sgjelinek ret = i_mod_hash_insert_nosync(hash,
6463247Sgjelinek (mod_hash_key_t)(uintptr_t)id, (mod_hash_val_t)entity,
6473247Sgjelinek (mod_hash_hndl_t)0);
6483247Sgjelinek ASSERT(ret == 0);
6493247Sgjelinek }
6503247Sgjelinek return (entity);
6513247Sgjelinek }
6523247Sgjelinek
6533247Sgjelinek
6543247Sgjelinek
6553247Sgjelinek
6563247Sgjelinek /*
6573247Sgjelinek * Returns list of object bounds between start and end. New bounds inserted
6583247Sgjelinek * by this call are given type.
6593247Sgjelinek *
6603247Sgjelinek * Returns the number of pages covered if new bounds are created. Returns 0
6613247Sgjelinek * if region between start/end consists of all existing bounds.
6623247Sgjelinek */
6633247Sgjelinek static pgcnt_t
vmu_insert_lookup_object_bounds(vmu_object_t * ro,pgcnt_t start,pgcnt_t end,char type,vmu_bound_t ** first,vmu_bound_t ** last)6643247Sgjelinek vmu_insert_lookup_object_bounds(vmu_object_t *ro, pgcnt_t start, pgcnt_t
6653247Sgjelinek end, char type, vmu_bound_t **first, vmu_bound_t **last)
6663247Sgjelinek {
66710093SPeter.Telford@Sun.COM avl_tree_t *tree = &(ro->vmo_bounds);
66810093SPeter.Telford@Sun.COM avl_index_t where;
66910093SPeter.Telford@Sun.COM vmu_bound_t *walker, *tmp;
67010093SPeter.Telford@Sun.COM pgcnt_t ret = 0;
67110093SPeter.Telford@Sun.COM
67210093SPeter.Telford@Sun.COM ASSERT(start <= end);
6733247Sgjelinek
6743247Sgjelinek *first = *last = NULL;
6753247Sgjelinek
67610093SPeter.Telford@Sun.COM tmp = vmu_alloc_bound();
67710093SPeter.Telford@Sun.COM tmp->vmb_start = start;
67810093SPeter.Telford@Sun.COM tmp->vmb_type = type;
67910093SPeter.Telford@Sun.COM
68010093SPeter.Telford@Sun.COM /* Hopelessly optimistic case. */
68110093SPeter.Telford@Sun.COM if (walker = avl_find(tree, tmp, &where)) {
68210093SPeter.Telford@Sun.COM /* We got lucky. */
68310093SPeter.Telford@Sun.COM vmu_free_bound(tmp);
68410093SPeter.Telford@Sun.COM *first = walker;
68510093SPeter.Telford@Sun.COM }
68610093SPeter.Telford@Sun.COM
68710093SPeter.Telford@Sun.COM if (walker == NULL) {
68810093SPeter.Telford@Sun.COM /* Is start in the previous node? */
68910093SPeter.Telford@Sun.COM walker = avl_nearest(tree, where, AVL_BEFORE);
69010093SPeter.Telford@Sun.COM if (walker != NULL) {
69110093SPeter.Telford@Sun.COM if (ISWITHIN(walker, start)) {
69210093SPeter.Telford@Sun.COM /* We found start. */
69310093SPeter.Telford@Sun.COM vmu_free_bound(tmp);
69410093SPeter.Telford@Sun.COM *first = walker;
69510093SPeter.Telford@Sun.COM }
6963247Sgjelinek }
6973247Sgjelinek }
6983247Sgjelinek
69910093SPeter.Telford@Sun.COM /*
70010093SPeter.Telford@Sun.COM * At this point, if *first is still NULL, then we
70110093SPeter.Telford@Sun.COM * didn't get a direct hit and start isn't covered
70210093SPeter.Telford@Sun.COM * by the previous node. We know that the next node
70310093SPeter.Telford@Sun.COM * must have a greater start value than we require
70410093SPeter.Telford@Sun.COM * because avl_find tells us where the AVL routines would
70510093SPeter.Telford@Sun.COM * insert our new node. We have some gap between the
70610093SPeter.Telford@Sun.COM * start we want and the next node.
70710093SPeter.Telford@Sun.COM */
7083247Sgjelinek if (*first == NULL) {
70910093SPeter.Telford@Sun.COM walker = avl_nearest(tree, where, AVL_AFTER);
71010093SPeter.Telford@Sun.COM if (walker != NULL && walker->vmb_start <= end) {
71110093SPeter.Telford@Sun.COM /* Fill the gap. */
71210093SPeter.Telford@Sun.COM tmp->vmb_end = walker->vmb_start - 1;
71310093SPeter.Telford@Sun.COM *first = tmp;
7143247Sgjelinek } else {
71510093SPeter.Telford@Sun.COM /* We have a gap over [start, end]. */
71610093SPeter.Telford@Sun.COM tmp->vmb_end = end;
71710093SPeter.Telford@Sun.COM *first = *last = tmp;
7183247Sgjelinek }
71910093SPeter.Telford@Sun.COM ret += tmp->vmb_end - tmp->vmb_start + 1;
72010093SPeter.Telford@Sun.COM avl_insert(tree, tmp, where);
72110093SPeter.Telford@Sun.COM }
72210093SPeter.Telford@Sun.COM
72310093SPeter.Telford@Sun.COM ASSERT(*first != NULL);
72410093SPeter.Telford@Sun.COM
72510093SPeter.Telford@Sun.COM if (*last != NULL) {
72610093SPeter.Telford@Sun.COM /* We're done. */
7273247Sgjelinek return (ret);
7283247Sgjelinek }
7293247Sgjelinek
7303247Sgjelinek /*
73110093SPeter.Telford@Sun.COM * If we are here we still need to set *last and
73210093SPeter.Telford@Sun.COM * that may involve filling in some gaps.
7333247Sgjelinek */
73410093SPeter.Telford@Sun.COM *last = *first;
73510093SPeter.Telford@Sun.COM for (;;) {
73610093SPeter.Telford@Sun.COM if (ISWITHIN(*last, end)) {
73710093SPeter.Telford@Sun.COM /* We're done. */
73810093SPeter.Telford@Sun.COM break;
7393247Sgjelinek }
74010093SPeter.Telford@Sun.COM walker = AVL_NEXT(tree, *last);
74110093SPeter.Telford@Sun.COM if (walker == NULL || walker->vmb_start > end) {
74210093SPeter.Telford@Sun.COM /* Bottom or mid tree with gap. */
74310093SPeter.Telford@Sun.COM tmp = vmu_alloc_bound();
74410093SPeter.Telford@Sun.COM tmp->vmb_start = (*last)->vmb_end + 1;
74510093SPeter.Telford@Sun.COM tmp->vmb_end = end;
746*10543SPeter.Telford@Sun.COM tmp->vmb_type = type;
7473247Sgjelinek ret += tmp->vmb_end - tmp->vmb_start + 1;
74810093SPeter.Telford@Sun.COM avl_insert_here(tree, tmp, *last, AVL_AFTER);
7493247Sgjelinek *last = tmp;
7503247Sgjelinek break;
75110093SPeter.Telford@Sun.COM } else {
75210093SPeter.Telford@Sun.COM if ((*last)->vmb_end + 1 != walker->vmb_start) {
75310093SPeter.Telford@Sun.COM /* Non-contiguous. */
75410093SPeter.Telford@Sun.COM tmp = vmu_alloc_bound();
75510093SPeter.Telford@Sun.COM tmp->vmb_start = (*last)->vmb_end + 1;
75610093SPeter.Telford@Sun.COM tmp->vmb_end = walker->vmb_start - 1;
757*10543SPeter.Telford@Sun.COM tmp->vmb_type = type;
75810093SPeter.Telford@Sun.COM ret += tmp->vmb_end - tmp->vmb_start + 1;
75910093SPeter.Telford@Sun.COM avl_insert_here(tree, tmp, *last, AVL_AFTER);
76010093SPeter.Telford@Sun.COM *last = tmp;
76110093SPeter.Telford@Sun.COM } else {
76210093SPeter.Telford@Sun.COM *last = walker;
76310093SPeter.Telford@Sun.COM }
7643247Sgjelinek }
7653247Sgjelinek }
76610093SPeter.Telford@Sun.COM
7673247Sgjelinek return (ret);
7683247Sgjelinek }
7693247Sgjelinek
7703247Sgjelinek /*
7713247Sgjelinek * vmu_update_bounds()
7723247Sgjelinek *
77310093SPeter.Telford@Sun.COM * tree: avl_tree in which first and last hang.
77410093SPeter.Telford@Sun.COM *
7753247Sgjelinek * first, last: list of continuous bounds, of which zero or more are of
7763247Sgjelinek * type VMUSAGE_BOUND_UNKNOWN.
7773247Sgjelinek *
77810093SPeter.Telford@Sun.COM * new_tree: avl_tree in which new_first and new_last hang.
77910093SPeter.Telford@Sun.COM *
7803247Sgjelinek * new_first, new_last: list of continuous bounds, of which none are of
7813247Sgjelinek * type VMUSAGE_BOUND_UNKNOWN. These bounds are used to
7823247Sgjelinek * update the types of bounds in (first,last) with
7833247Sgjelinek * type VMUSAGE_BOUND_UNKNOWN.
7843247Sgjelinek *
7853247Sgjelinek * For the list of bounds (first,last), this function updates any bounds
7863247Sgjelinek * with type VMUSAGE_BOUND_UNKNOWN using the type of the corresponding bound in
7873247Sgjelinek * the list (new_first, new_last).
7883247Sgjelinek *
7893247Sgjelinek * If a bound of type VMUSAGE_BOUND_UNKNOWN spans multiple bounds in the list
7903247Sgjelinek * (new_first, new_last), it will be split into multiple bounds.
7913247Sgjelinek *
7923247Sgjelinek * Return value:
7933247Sgjelinek * The number of pages in the list of bounds (first,last) that were of
7943247Sgjelinek * type VMUSAGE_BOUND_UNKNOWN, which have been updated to be of type
7953247Sgjelinek * VMUSAGE_BOUND_INCORE.
7963247Sgjelinek *
7973247Sgjelinek */
7983247Sgjelinek static pgcnt_t
vmu_update_bounds(avl_tree_t * tree,vmu_bound_t ** first,vmu_bound_t ** last,avl_tree_t * new_tree,vmu_bound_t * new_first,vmu_bound_t * new_last)79910093SPeter.Telford@Sun.COM vmu_update_bounds(avl_tree_t *tree, vmu_bound_t **first, vmu_bound_t **last,
80010093SPeter.Telford@Sun.COM avl_tree_t *new_tree, vmu_bound_t *new_first, vmu_bound_t *new_last)
8013247Sgjelinek {
8023247Sgjelinek vmu_bound_t *next, *new_next, *tmp;
8033247Sgjelinek pgcnt_t rss = 0;
8043247Sgjelinek
8053247Sgjelinek next = *first;
8063247Sgjelinek new_next = new_first;
8073247Sgjelinek
8083671Ssl108498 /*
8093671Ssl108498 * Verify first and last bound are covered by new bounds if they
8103671Ssl108498 * have unknown type.
8113671Ssl108498 */
8123671Ssl108498 ASSERT((*first)->vmb_type != VMUSAGE_BOUND_UNKNOWN ||
81310093SPeter.Telford@Sun.COM (*first)->vmb_start >= new_first->vmb_start);
8143671Ssl108498 ASSERT((*last)->vmb_type != VMUSAGE_BOUND_UNKNOWN ||
8153671Ssl108498 (*last)->vmb_end <= new_last->vmb_end);
8163247Sgjelinek for (;;) {
81710093SPeter.Telford@Sun.COM /* If bound already has type, proceed to next bound. */
8183247Sgjelinek if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
8193247Sgjelinek if (next == *last)
8203247Sgjelinek break;
82110093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next);
8223247Sgjelinek continue;
8233247Sgjelinek }
8243247Sgjelinek while (new_next->vmb_end < next->vmb_start)
82510093SPeter.Telford@Sun.COM new_next = AVL_NEXT(new_tree, new_next);
8263247Sgjelinek ASSERT(new_next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
8273247Sgjelinek next->vmb_type = new_next->vmb_type;
8283247Sgjelinek if (new_next->vmb_end < next->vmb_end) {
8293247Sgjelinek /* need to split bound */
8303247Sgjelinek tmp = vmu_alloc_bound();
8313247Sgjelinek tmp->vmb_type = VMUSAGE_BOUND_UNKNOWN;
8323247Sgjelinek tmp->vmb_start = new_next->vmb_end + 1;
8333247Sgjelinek tmp->vmb_end = next->vmb_end;
83410093SPeter.Telford@Sun.COM avl_insert_here(tree, tmp, next, AVL_AFTER);
8353247Sgjelinek next->vmb_end = new_next->vmb_end;
8363247Sgjelinek if (*last == next)
8373247Sgjelinek *last = tmp;
8383247Sgjelinek if (next->vmb_type == VMUSAGE_BOUND_INCORE)
8393247Sgjelinek rss += next->vmb_end - next->vmb_start + 1;
8403247Sgjelinek next = tmp;
8413247Sgjelinek } else {
8423247Sgjelinek if (next->vmb_type == VMUSAGE_BOUND_INCORE)
8433247Sgjelinek rss += next->vmb_end - next->vmb_start + 1;
8443247Sgjelinek if (next == *last)
8453247Sgjelinek break;
84610093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next);
8473247Sgjelinek }
8483247Sgjelinek }
8493247Sgjelinek return (rss);
8503247Sgjelinek }
8513247Sgjelinek
8523247Sgjelinek /*
85310093SPeter.Telford@Sun.COM * Merges adjacent bounds with same type between first and last bound.
854*10543SPeter.Telford@Sun.COM * After merge, last pointer may point to a different bound, as (incoming)
855*10543SPeter.Telford@Sun.COM * last bound may have been merged away.
8563247Sgjelinek */
8573247Sgjelinek static void
vmu_merge_bounds(avl_tree_t * tree,vmu_bound_t ** first,vmu_bound_t ** last)85810093SPeter.Telford@Sun.COM vmu_merge_bounds(avl_tree_t *tree, vmu_bound_t **first, vmu_bound_t **last)
8593247Sgjelinek {
86010093SPeter.Telford@Sun.COM vmu_bound_t *current;
8613247Sgjelinek vmu_bound_t *next;
8623247Sgjelinek
86310093SPeter.Telford@Sun.COM ASSERT(tree != NULL);
8643247Sgjelinek ASSERT(*first != NULL);
8653247Sgjelinek ASSERT(*last != NULL);
8663247Sgjelinek
86710093SPeter.Telford@Sun.COM current = *first;
86810093SPeter.Telford@Sun.COM while (current != *last) {
86910093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, current);
87010093SPeter.Telford@Sun.COM if ((current->vmb_end + 1) == next->vmb_start &&
87110093SPeter.Telford@Sun.COM current->vmb_type == next->vmb_type) {
87210093SPeter.Telford@Sun.COM current->vmb_end = next->vmb_end;
87310093SPeter.Telford@Sun.COM avl_remove(tree, next);
87410093SPeter.Telford@Sun.COM vmu_free_bound(next);
87510093SPeter.Telford@Sun.COM if (next == *last) {
876*10543SPeter.Telford@Sun.COM *last = current;
87710093SPeter.Telford@Sun.COM }
878*10543SPeter.Telford@Sun.COM } else {
879*10543SPeter.Telford@Sun.COM current = AVL_NEXT(tree, current);
8803247Sgjelinek }
8813247Sgjelinek }
8823247Sgjelinek }
8833247Sgjelinek
8843247Sgjelinek /*
8853247Sgjelinek * Given an amp and a list of bounds, updates each bound's type with
8863247Sgjelinek * VMUSAGE_BOUND_INCORE or VMUSAGE_BOUND_NOT_INCORE.
8873247Sgjelinek *
8883247Sgjelinek * If a bound is partially incore, it will be split into two bounds.
8893247Sgjelinek * first and last may be modified, as bounds may be split into multiple
89010093SPeter.Telford@Sun.COM * bounds if they are partially incore/not-incore.
8913247Sgjelinek *
89210093SPeter.Telford@Sun.COM * Set incore to non-zero if bounds are already known to be incore.
8933247Sgjelinek *
8943247Sgjelinek */
8953247Sgjelinek static void
vmu_amp_update_incore_bounds(avl_tree_t * tree,struct anon_map * amp,vmu_bound_t ** first,vmu_bound_t ** last,boolean_t incore)89610093SPeter.Telford@Sun.COM vmu_amp_update_incore_bounds(avl_tree_t *tree, struct anon_map *amp,
89710093SPeter.Telford@Sun.COM vmu_bound_t **first, vmu_bound_t **last, boolean_t incore)
8983247Sgjelinek {
8993247Sgjelinek vmu_bound_t *next;
9003247Sgjelinek vmu_bound_t *tmp;
9013247Sgjelinek pgcnt_t index;
9023247Sgjelinek short bound_type;
9033247Sgjelinek short page_type;
9043247Sgjelinek vnode_t *vn;
9053247Sgjelinek anoff_t off;
9063247Sgjelinek struct anon *ap;
9073247Sgjelinek
9083247Sgjelinek next = *first;
90910093SPeter.Telford@Sun.COM /* Shared anon slots don't change once set. */
9103247Sgjelinek ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
9113247Sgjelinek for (;;) {
9123247Sgjelinek if (incore == B_TRUE)
9133247Sgjelinek next->vmb_type = VMUSAGE_BOUND_INCORE;
9143247Sgjelinek
9153247Sgjelinek if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
9163247Sgjelinek if (next == *last)
9173247Sgjelinek break;
91810093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next);
9193247Sgjelinek continue;
9203247Sgjelinek }
9213247Sgjelinek bound_type = next->vmb_type;
9223247Sgjelinek index = next->vmb_start;
9233247Sgjelinek while (index <= next->vmb_end) {
9243247Sgjelinek
9253247Sgjelinek /*
9263247Sgjelinek * These are used to determine how much to increment
9273247Sgjelinek * index when a large page is found.
9283247Sgjelinek */
9293247Sgjelinek page_t *page;
9303247Sgjelinek pgcnt_t pgcnt = 1;
9313247Sgjelinek uint_t pgshft;
9323247Sgjelinek pgcnt_t pgmsk;
9333247Sgjelinek
9343247Sgjelinek ap = anon_get_ptr(amp->ahp, index);
9353247Sgjelinek if (ap != NULL)
9363247Sgjelinek swap_xlate(ap, &vn, &off);
9373247Sgjelinek
9383247Sgjelinek if (ap != NULL && vn != NULL && vn->v_pages != NULL &&
9393247Sgjelinek (page = page_exists(vn, off)) != NULL) {
9403247Sgjelinek page_type = VMUSAGE_BOUND_INCORE;
9413247Sgjelinek if (page->p_szc > 0) {
9423247Sgjelinek pgcnt = page_get_pagecnt(page->p_szc);
9433247Sgjelinek pgshft = page_get_shift(page->p_szc);
9443247Sgjelinek pgmsk = (0x1 << (pgshft - PAGESHIFT))
9453247Sgjelinek - 1;
9463247Sgjelinek }
9473247Sgjelinek } else {
9483247Sgjelinek page_type = VMUSAGE_BOUND_NOT_INCORE;
9493247Sgjelinek }
9503247Sgjelinek if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
9513247Sgjelinek next->vmb_type = page_type;
9523247Sgjelinek } else if (next->vmb_type != page_type) {
9533247Sgjelinek /*
95410093SPeter.Telford@Sun.COM * If current bound type does not match page
9553247Sgjelinek * type, need to split off new bound.
9563247Sgjelinek */
9573247Sgjelinek tmp = vmu_alloc_bound();
9583247Sgjelinek tmp->vmb_type = page_type;
9593247Sgjelinek tmp->vmb_start = index;
9603247Sgjelinek tmp->vmb_end = next->vmb_end;
96110093SPeter.Telford@Sun.COM avl_insert_here(tree, tmp, next, AVL_AFTER);
9623247Sgjelinek next->vmb_end = index - 1;
9633247Sgjelinek if (*last == next)
9643247Sgjelinek *last = tmp;
9653247Sgjelinek next = tmp;
9663247Sgjelinek }
9673247Sgjelinek if (pgcnt > 1) {
9683247Sgjelinek /*
9693247Sgjelinek * If inside large page, jump to next large
9703247Sgjelinek * page
9713247Sgjelinek */
9723247Sgjelinek index = (index & ~pgmsk) + pgcnt;
9733247Sgjelinek } else {
9743247Sgjelinek index++;
9753247Sgjelinek }
9763247Sgjelinek }
9773247Sgjelinek if (next == *last) {
9783247Sgjelinek ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
9793247Sgjelinek break;
9803247Sgjelinek } else
98110093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next);
9823247Sgjelinek }
9833247Sgjelinek ANON_LOCK_EXIT(&->a_rwlock);
9843247Sgjelinek }
9853247Sgjelinek
9863247Sgjelinek /*
9873247Sgjelinek * Same as vmu_amp_update_incore_bounds(), except for tracking
9883247Sgjelinek * incore-/not-incore for vnodes.
9893247Sgjelinek */
9903247Sgjelinek static void
vmu_vnode_update_incore_bounds(avl_tree_t * tree,vnode_t * vnode,vmu_bound_t ** first,vmu_bound_t ** last)99110093SPeter.Telford@Sun.COM vmu_vnode_update_incore_bounds(avl_tree_t *tree, vnode_t *vnode,
99210093SPeter.Telford@Sun.COM vmu_bound_t **first, vmu_bound_t **last)
9933247Sgjelinek {
9943247Sgjelinek vmu_bound_t *next;
9953247Sgjelinek vmu_bound_t *tmp;
9963247Sgjelinek pgcnt_t index;
9973247Sgjelinek short bound_type;
9983247Sgjelinek short page_type;
9993247Sgjelinek
10003247Sgjelinek next = *first;
10013247Sgjelinek for (;;) {
10023247Sgjelinek if (vnode->v_pages == NULL)
10033247Sgjelinek next->vmb_type = VMUSAGE_BOUND_NOT_INCORE;
10043247Sgjelinek
10053247Sgjelinek if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
10063247Sgjelinek if (next == *last)
10073247Sgjelinek break;
100810093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next);
10093247Sgjelinek continue;
10103247Sgjelinek }
10113247Sgjelinek
10123247Sgjelinek bound_type = next->vmb_type;
10133247Sgjelinek index = next->vmb_start;
10143247Sgjelinek while (index <= next->vmb_end) {
10153247Sgjelinek
10163247Sgjelinek /*
10173247Sgjelinek * These are used to determine how much to increment
10183247Sgjelinek * index when a large page is found.
10193247Sgjelinek */
10203247Sgjelinek page_t *page;
10213247Sgjelinek pgcnt_t pgcnt = 1;
10223247Sgjelinek uint_t pgshft;
10233247Sgjelinek pgcnt_t pgmsk;
10243247Sgjelinek
10253247Sgjelinek if (vnode->v_pages != NULL &&
10263247Sgjelinek (page = page_exists(vnode, ptob(index))) != NULL) {
10273247Sgjelinek page_type = VMUSAGE_BOUND_INCORE;
10283247Sgjelinek if (page->p_szc > 0) {
10293247Sgjelinek pgcnt = page_get_pagecnt(page->p_szc);
10303247Sgjelinek pgshft = page_get_shift(page->p_szc);
10313247Sgjelinek pgmsk = (0x1 << (pgshft - PAGESHIFT))
10323247Sgjelinek - 1;
10333247Sgjelinek }
10343247Sgjelinek } else {
10353247Sgjelinek page_type = VMUSAGE_BOUND_NOT_INCORE;
10363247Sgjelinek }
10373247Sgjelinek if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
10383247Sgjelinek next->vmb_type = page_type;
10393247Sgjelinek } else if (next->vmb_type != page_type) {
10403247Sgjelinek /*
104110093SPeter.Telford@Sun.COM * If current bound type does not match page
10423247Sgjelinek * type, need to split off new bound.
10433247Sgjelinek */
10443247Sgjelinek tmp = vmu_alloc_bound();
10453247Sgjelinek tmp->vmb_type = page_type;
10463247Sgjelinek tmp->vmb_start = index;
10473247Sgjelinek tmp->vmb_end = next->vmb_end;
104810093SPeter.Telford@Sun.COM avl_insert_here(tree, tmp, next, AVL_AFTER);
10493247Sgjelinek next->vmb_end = index - 1;
10503247Sgjelinek if (*last == next)
10513247Sgjelinek *last = tmp;
10523247Sgjelinek next = tmp;
10533247Sgjelinek }
10543247Sgjelinek if (pgcnt > 1) {
10553247Sgjelinek /*
10563247Sgjelinek * If inside large page, jump to next large
10573247Sgjelinek * page
10583247Sgjelinek */
10593247Sgjelinek index = (index & ~pgmsk) + pgcnt;
10603247Sgjelinek } else {
10613247Sgjelinek index++;
10623247Sgjelinek }
10633247Sgjelinek }
10643247Sgjelinek if (next == *last) {
10653247Sgjelinek ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
10663247Sgjelinek break;
10673247Sgjelinek } else
106810093SPeter.Telford@Sun.COM next = AVL_NEXT(tree, next);
10693247Sgjelinek }
10703247Sgjelinek }
10713247Sgjelinek
10723247Sgjelinek /*
10733247Sgjelinek * Calculate the rss and swap consumed by a segment. vmu_entities is the
10743247Sgjelinek * list of entities to visit. For shared segments, the vnode or amp
107510093SPeter.Telford@Sun.COM * is looked up in each entity to see if it has been already counted. Private
107610093SPeter.Telford@Sun.COM * anon pages are checked per entity to ensure that COW pages are not
10773247Sgjelinek * double counted.
10783247Sgjelinek *
10793247Sgjelinek * For private mapped files, first the amp is checked for private pages.
10803247Sgjelinek * Bounds not backed by the amp are looked up in the vnode for each entity
10813247Sgjelinek * to avoid double counting of private COW vnode pages.
10823247Sgjelinek */
10833247Sgjelinek static void
vmu_calculate_seg(vmu_entity_t * vmu_entities,struct seg * seg)10843247Sgjelinek vmu_calculate_seg(vmu_entity_t *vmu_entities, struct seg *seg)
10853247Sgjelinek {
10863247Sgjelinek struct segvn_data *svd;
10873247Sgjelinek struct shm_data *shmd;
10883247Sgjelinek struct spt_data *sptd;
10893247Sgjelinek vmu_object_t *shared_object = NULL;
10903247Sgjelinek vmu_object_t *entity_object = NULL;
10913247Sgjelinek vmu_entity_t *entity;
10923247Sgjelinek vmusage_t *result;
10933247Sgjelinek vmu_bound_t *first = NULL;
10943247Sgjelinek vmu_bound_t *last = NULL;
10953247Sgjelinek vmu_bound_t *cur = NULL;
10963247Sgjelinek vmu_bound_t *e_first = NULL;
10973247Sgjelinek vmu_bound_t *e_last = NULL;
10983247Sgjelinek vmu_bound_t *tmp;
10993247Sgjelinek pgcnt_t p_index, s_index, p_start, p_end, s_start, s_end, rss, virt;
11003247Sgjelinek struct anon_map *private_amp = NULL;
11013247Sgjelinek boolean_t incore = B_FALSE;
11023247Sgjelinek boolean_t shared = B_FALSE;
11033247Sgjelinek int file = 0;
11043247Sgjelinek pgcnt_t swresv = 0;
11053247Sgjelinek pgcnt_t panon = 0;
11063247Sgjelinek
110710093SPeter.Telford@Sun.COM /* Can zero-length segments exist? Not sure, so paranoia. */
11083247Sgjelinek if (seg->s_size <= 0)
11093247Sgjelinek return;
11103247Sgjelinek
11113247Sgjelinek /*
11123247Sgjelinek * Figure out if there is a shared object (such as a named vnode or
11133247Sgjelinek * a shared amp, then figure out if there is a private amp, which
11143247Sgjelinek * identifies private pages.
11153247Sgjelinek */
11163247Sgjelinek if (seg->s_ops == &segvn_ops) {
11173247Sgjelinek svd = (struct segvn_data *)seg->s_data;
111810093SPeter.Telford@Sun.COM if (svd->type == MAP_SHARED) {
11193247Sgjelinek shared = B_TRUE;
112010093SPeter.Telford@Sun.COM } else {
11213247Sgjelinek swresv = svd->swresv;
11223247Sgjelinek
112310093SPeter.Telford@Sun.COM if (SEGVN_LOCK_TRYENTER(seg->s_as, &svd->lock,
112410093SPeter.Telford@Sun.COM RW_READER) != 0) {
112510093SPeter.Telford@Sun.COM /*
112610093SPeter.Telford@Sun.COM * Text replication anon maps can be shared
112710093SPeter.Telford@Sun.COM * across all zones. Space used for text
112810093SPeter.Telford@Sun.COM * replication is typically capped as a small %
112910093SPeter.Telford@Sun.COM * of memory. To keep it simple for now we
113010093SPeter.Telford@Sun.COM * don't account for swap and memory space used
113110093SPeter.Telford@Sun.COM * for text replication.
113210093SPeter.Telford@Sun.COM */
113310093SPeter.Telford@Sun.COM if (svd->tr_state == SEGVN_TR_OFF &&
113410093SPeter.Telford@Sun.COM svd->amp != NULL) {
113510093SPeter.Telford@Sun.COM private_amp = svd->amp;
113610093SPeter.Telford@Sun.COM p_start = svd->anon_index;
113710093SPeter.Telford@Sun.COM p_end = svd->anon_index +
113810093SPeter.Telford@Sun.COM btop(seg->s_size) - 1;
113910093SPeter.Telford@Sun.COM }
114010093SPeter.Telford@Sun.COM SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
114110093SPeter.Telford@Sun.COM }
114210093SPeter.Telford@Sun.COM }
11433247Sgjelinek if (svd->vp != NULL) {
11443247Sgjelinek file = 1;
11453247Sgjelinek shared_object = vmu_find_insert_object(
11463247Sgjelinek vmu_data.vmu_all_vnodes_hash, (caddr_t)svd->vp,
11473247Sgjelinek VMUSAGE_TYPE_VNODE);
11483247Sgjelinek s_start = btop(svd->offset);
11493247Sgjelinek s_end = btop(svd->offset + seg->s_size) - 1;
11503247Sgjelinek }
11513247Sgjelinek if (svd->amp != NULL && svd->type == MAP_SHARED) {
11523247Sgjelinek ASSERT(shared_object == NULL);
11533247Sgjelinek shared_object = vmu_find_insert_object(
11543247Sgjelinek vmu_data.vmu_all_amps_hash, (caddr_t)svd->amp,
11553247Sgjelinek VMUSAGE_TYPE_AMP);
11563247Sgjelinek s_start = svd->anon_index;
11573247Sgjelinek s_end = svd->anon_index + btop(seg->s_size) - 1;
11583247Sgjelinek /* schedctl mappings are always in core */
11593247Sgjelinek if (svd->amp->swresv == 0)
11603247Sgjelinek incore = B_TRUE;
11613247Sgjelinek }
11623247Sgjelinek } else if (seg->s_ops == &segspt_shmops) {
11633247Sgjelinek shared = B_TRUE;
11643247Sgjelinek shmd = (struct shm_data *)seg->s_data;
11653247Sgjelinek shared_object = vmu_find_insert_object(
11663247Sgjelinek vmu_data.vmu_all_amps_hash, (caddr_t)shmd->shm_amp,
11673247Sgjelinek VMUSAGE_TYPE_AMP);
11683247Sgjelinek s_start = 0;
11693247Sgjelinek s_end = btop(seg->s_size) - 1;
11703247Sgjelinek sptd = shmd->shm_sptseg->s_data;
11713247Sgjelinek
11723247Sgjelinek /* ism segments are always incore and do not reserve swap */
11733247Sgjelinek if (sptd->spt_flags & SHM_SHARE_MMU)
11743247Sgjelinek incore = B_TRUE;
11753247Sgjelinek
11763247Sgjelinek } else {
11773247Sgjelinek return;
11783247Sgjelinek }
11793247Sgjelinek
11803247Sgjelinek /*
11813247Sgjelinek * If there is a private amp, count anon pages that exist. If an
118210093SPeter.Telford@Sun.COM * anon has a refcnt > 1 (COW sharing), then save the anon in a
11833247Sgjelinek * hash so that it is not double counted.
11843247Sgjelinek *
118510093SPeter.Telford@Sun.COM * If there is also a shared object, then figure out the bounds
11863247Sgjelinek * which are not mapped by the private amp.
11873247Sgjelinek */
11883247Sgjelinek if (private_amp != NULL) {
11893247Sgjelinek
119010093SPeter.Telford@Sun.COM /* Enter as writer to prevent COW anons from being freed */
11913247Sgjelinek ANON_LOCK_ENTER(&private_amp->a_rwlock, RW_WRITER);
11923247Sgjelinek
11933247Sgjelinek p_index = p_start;
11943247Sgjelinek s_index = s_start;
11953247Sgjelinek
11963247Sgjelinek while (p_index <= p_end) {
11973247Sgjelinek
11983247Sgjelinek pgcnt_t p_index_next;
11993247Sgjelinek pgcnt_t p_bound_size;
12003247Sgjelinek int cnt;
12013247Sgjelinek anoff_t off;
12023247Sgjelinek struct vnode *vn;
12033247Sgjelinek struct anon *ap;
12043247Sgjelinek page_t *page; /* For handling of large */
12053247Sgjelinek pgcnt_t pgcnt = 1; /* pages */
12063247Sgjelinek pgcnt_t pgstart;
12073247Sgjelinek pgcnt_t pgend;
12083247Sgjelinek uint_t pgshft;
12093247Sgjelinek pgcnt_t pgmsk;
12103247Sgjelinek
12113247Sgjelinek p_index_next = p_index;
12123247Sgjelinek ap = anon_get_next_ptr(private_amp->ahp,
12133247Sgjelinek &p_index_next);
12143247Sgjelinek
12153247Sgjelinek /*
12163247Sgjelinek * If next anon is past end of mapping, simulate
12173247Sgjelinek * end of anon so loop terminates.
12183247Sgjelinek */
12193247Sgjelinek if (p_index_next > p_end) {
12203247Sgjelinek p_index_next = p_end + 1;
12213247Sgjelinek ap = NULL;
12223247Sgjelinek }
12233247Sgjelinek /*
122410093SPeter.Telford@Sun.COM * For COW segments, keep track of bounds not
12253247Sgjelinek * backed by private amp so they can be looked
12263247Sgjelinek * up in the backing vnode
12273247Sgjelinek */
12283247Sgjelinek if (p_index_next != p_index) {
12293247Sgjelinek
12303247Sgjelinek /*
12313247Sgjelinek * Compute index difference between anon and
12323247Sgjelinek * previous anon.
12333247Sgjelinek */
12343247Sgjelinek p_bound_size = p_index_next - p_index - 1;
12353247Sgjelinek
12363247Sgjelinek if (shared_object != NULL) {
12373247Sgjelinek cur = vmu_alloc_bound();
12383247Sgjelinek cur->vmb_start = s_index;
12393247Sgjelinek cur->vmb_end = s_index + p_bound_size;
12403247Sgjelinek cur->vmb_type = VMUSAGE_BOUND_UNKNOWN;
12413247Sgjelinek if (first == NULL) {
12423247Sgjelinek first = cur;
12433247Sgjelinek last = cur;
12443247Sgjelinek } else {
12453247Sgjelinek last->vmb_next = cur;
12463247Sgjelinek last = cur;
12473247Sgjelinek }
12483247Sgjelinek }
12493247Sgjelinek p_index = p_index + p_bound_size + 1;
12503247Sgjelinek s_index = s_index + p_bound_size + 1;
12513247Sgjelinek }
12523247Sgjelinek
12533247Sgjelinek /* Detect end of anons in amp */
12543247Sgjelinek if (ap == NULL)
12553247Sgjelinek break;
12563247Sgjelinek
12573247Sgjelinek cnt = ap->an_refcnt;
12583247Sgjelinek swap_xlate(ap, &vn, &off);
12593247Sgjelinek
12603247Sgjelinek if (vn == NULL || vn->v_pages == NULL ||
12613247Sgjelinek (page = page_exists(vn, off)) == NULL) {
12623247Sgjelinek p_index++;
12633247Sgjelinek s_index++;
12643247Sgjelinek continue;
12653247Sgjelinek }
12663247Sgjelinek
12673247Sgjelinek /*
12683247Sgjelinek * If large page is found, compute portion of large
12693247Sgjelinek * page in mapping, and increment indicies to the next
12703247Sgjelinek * large page.
12713247Sgjelinek */
12723247Sgjelinek if (page->p_szc > 0) {
12733247Sgjelinek
12743247Sgjelinek pgcnt = page_get_pagecnt(page->p_szc);
12753247Sgjelinek pgshft = page_get_shift(page->p_szc);
12763247Sgjelinek pgmsk = (0x1 << (pgshft - PAGESHIFT)) - 1;
12773247Sgjelinek
12783247Sgjelinek /* First page in large page */
12793247Sgjelinek pgstart = p_index & ~pgmsk;
12803247Sgjelinek /* Last page in large page */
12813247Sgjelinek pgend = pgstart + pgcnt - 1;
12823247Sgjelinek /*
12833247Sgjelinek * Artifically end page if page extends past
12843247Sgjelinek * end of mapping.
12853247Sgjelinek */
12863247Sgjelinek if (pgend > p_end)
12873247Sgjelinek pgend = p_end;
12883247Sgjelinek
12893247Sgjelinek /*
12903247Sgjelinek * Compute number of pages from large page
12913247Sgjelinek * which are mapped.
12923247Sgjelinek */
12933247Sgjelinek pgcnt = pgend - p_index + 1;
12943247Sgjelinek
12953247Sgjelinek /*
12963247Sgjelinek * Point indicies at page after large page,
12973247Sgjelinek * or at page after end of mapping.
12983247Sgjelinek */
12993247Sgjelinek p_index += pgcnt;
13003247Sgjelinek s_index += pgcnt;
13013247Sgjelinek } else {
13023247Sgjelinek p_index++;
13033247Sgjelinek s_index++;
13043247Sgjelinek }
13053247Sgjelinek
13063247Sgjelinek /*
13073247Sgjelinek * Assume anon structs with a refcnt
130810093SPeter.Telford@Sun.COM * of 1 are not COW shared, so there
13093247Sgjelinek * is no reason to track them per entity.
13103247Sgjelinek */
13113247Sgjelinek if (cnt == 1) {
13123247Sgjelinek panon += pgcnt;
13133247Sgjelinek continue;
13143247Sgjelinek }
13153247Sgjelinek for (entity = vmu_entities; entity != NULL;
13163247Sgjelinek entity = entity->vme_next_calc) {
13173247Sgjelinek
13183247Sgjelinek result = &entity->vme_result;
13193247Sgjelinek /*
132010093SPeter.Telford@Sun.COM * Track COW anons per entity so
13213247Sgjelinek * they are not double counted.
13223247Sgjelinek */
13233247Sgjelinek if (vmu_find_insert_anon(entity->vme_anon_hash,
13243247Sgjelinek (caddr_t)ap) == 0)
13253247Sgjelinek continue;
13263247Sgjelinek
13273247Sgjelinek result->vmu_rss_all += (pgcnt << PAGESHIFT);
13283247Sgjelinek result->vmu_rss_private +=
13293247Sgjelinek (pgcnt << PAGESHIFT);
13303247Sgjelinek }
13313247Sgjelinek }
13323247Sgjelinek ANON_LOCK_EXIT(&private_amp->a_rwlock);
13333247Sgjelinek }
13343247Sgjelinek
13353247Sgjelinek /* Add up resident anon and swap reserved for private mappings */
13363247Sgjelinek if (swresv > 0 || panon > 0) {
13373247Sgjelinek for (entity = vmu_entities; entity != NULL;
13383247Sgjelinek entity = entity->vme_next_calc) {
13393247Sgjelinek result = &entity->vme_result;
13403247Sgjelinek result->vmu_swap_all += swresv;
13413247Sgjelinek result->vmu_swap_private += swresv;
13423247Sgjelinek result->vmu_rss_all += (panon << PAGESHIFT);
13433247Sgjelinek result->vmu_rss_private += (panon << PAGESHIFT);
13443247Sgjelinek }
13453247Sgjelinek }
13463247Sgjelinek
13473247Sgjelinek /* Compute resident pages backing shared amp or named vnode */
13483247Sgjelinek if (shared_object != NULL) {
1349*10543SPeter.Telford@Sun.COM avl_tree_t *tree = &(shared_object->vmo_bounds);
1350*10543SPeter.Telford@Sun.COM
13513247Sgjelinek if (first == NULL) {
13523247Sgjelinek /*
13533247Sgjelinek * No private amp, or private amp has no anon
13543247Sgjelinek * structs. This means entire segment is backed by
13553247Sgjelinek * the shared object.
13563247Sgjelinek */
13573247Sgjelinek first = vmu_alloc_bound();
13583247Sgjelinek first->vmb_start = s_start;
13593247Sgjelinek first->vmb_end = s_end;
13603247Sgjelinek first->vmb_type = VMUSAGE_BOUND_UNKNOWN;
13613247Sgjelinek }
13623247Sgjelinek /*
13633247Sgjelinek * Iterate bounds not backed by private amp, and compute
13643247Sgjelinek * resident pages.
13653247Sgjelinek */
13663247Sgjelinek cur = first;
13673247Sgjelinek while (cur != NULL) {
13683247Sgjelinek
13693247Sgjelinek if (vmu_insert_lookup_object_bounds(shared_object,
13703247Sgjelinek cur->vmb_start, cur->vmb_end, VMUSAGE_BOUND_UNKNOWN,
13713247Sgjelinek &first, &last) > 0) {
13723247Sgjelinek /* new bounds, find incore/not-incore */
13733247Sgjelinek if (shared_object->vmo_type ==
137410093SPeter.Telford@Sun.COM VMUSAGE_TYPE_VNODE) {
13753247Sgjelinek vmu_vnode_update_incore_bounds(
137610093SPeter.Telford@Sun.COM tree,
13773247Sgjelinek (vnode_t *)
13783247Sgjelinek shared_object->vmo_key, &first,
13793247Sgjelinek &last);
138010093SPeter.Telford@Sun.COM } else {
13813247Sgjelinek vmu_amp_update_incore_bounds(
138210093SPeter.Telford@Sun.COM tree,
13833247Sgjelinek (struct anon_map *)
13843247Sgjelinek shared_object->vmo_key, &first,
13853247Sgjelinek &last, incore);
138610093SPeter.Telford@Sun.COM }
138710093SPeter.Telford@Sun.COM vmu_merge_bounds(tree, &first, &last);
13883247Sgjelinek }
13893247Sgjelinek for (entity = vmu_entities; entity != NULL;
13903247Sgjelinek entity = entity->vme_next_calc) {
139110093SPeter.Telford@Sun.COM avl_tree_t *e_tree;
13923247Sgjelinek
13933247Sgjelinek result = &entity->vme_result;
13943247Sgjelinek
13953247Sgjelinek entity_object = vmu_find_insert_object(
13963247Sgjelinek shared_object->vmo_type ==
13973247Sgjelinek VMUSAGE_TYPE_VNODE ? entity->vme_vnode_hash:
13987884Sgerald.jelinek@sun.com entity->vme_amp_hash,
13997884Sgerald.jelinek@sun.com shared_object->vmo_key,
14007884Sgerald.jelinek@sun.com shared_object->vmo_type);
14013247Sgjelinek
14023247Sgjelinek virt = vmu_insert_lookup_object_bounds(
14033247Sgjelinek entity_object, cur->vmb_start, cur->vmb_end,
14043247Sgjelinek VMUSAGE_BOUND_UNKNOWN, &e_first, &e_last);
14053247Sgjelinek
14063247Sgjelinek if (virt == 0)
14073247Sgjelinek continue;
14083247Sgjelinek /*
14093247Sgjelinek * Range visited for this entity
14103247Sgjelinek */
141110093SPeter.Telford@Sun.COM e_tree = &(entity_object->vmo_bounds);
141210093SPeter.Telford@Sun.COM rss = vmu_update_bounds(e_tree, &e_first,
141310093SPeter.Telford@Sun.COM &e_last, tree, first, last);
14143247Sgjelinek result->vmu_rss_all += (rss << PAGESHIFT);
14153247Sgjelinek if (shared == B_TRUE && file == B_FALSE) {
14163247Sgjelinek /* shared anon mapping */
14173247Sgjelinek result->vmu_swap_all +=
14183247Sgjelinek (virt << PAGESHIFT);
14193247Sgjelinek result->vmu_swap_shared +=
14203247Sgjelinek (virt << PAGESHIFT);
14213247Sgjelinek result->vmu_rss_shared +=
14223247Sgjelinek (rss << PAGESHIFT);
14233247Sgjelinek } else if (shared == B_TRUE && file == B_TRUE) {
14243247Sgjelinek /* shared file mapping */
14253247Sgjelinek result->vmu_rss_shared +=
14263247Sgjelinek (rss << PAGESHIFT);
14273247Sgjelinek } else if (shared == B_FALSE &&
14283247Sgjelinek file == B_TRUE) {
14293247Sgjelinek /* private file mapping */
14303247Sgjelinek result->vmu_rss_private +=
14313247Sgjelinek (rss << PAGESHIFT);
14323247Sgjelinek }
143310093SPeter.Telford@Sun.COM vmu_merge_bounds(e_tree, &e_first, &e_last);
14343247Sgjelinek }
14353247Sgjelinek tmp = cur;
14363247Sgjelinek cur = cur->vmb_next;
14373247Sgjelinek vmu_free_bound(tmp);
14383247Sgjelinek }
14393247Sgjelinek }
14403247Sgjelinek }
14413247Sgjelinek
14423247Sgjelinek /*
14433247Sgjelinek * Based on the current calculation flags, find the relevant entities
14443247Sgjelinek * which are relative to the process. Then calculate each segment
14453247Sgjelinek * in the process'es address space for each relevant entity.
14463247Sgjelinek */
14473247Sgjelinek static void
vmu_calculate_proc(proc_t * p)14483247Sgjelinek vmu_calculate_proc(proc_t *p)
14493247Sgjelinek {
14503247Sgjelinek vmu_entity_t *entities = NULL;
14513247Sgjelinek vmu_zone_t *zone;
14523247Sgjelinek vmu_entity_t *tmp;
14533247Sgjelinek struct as *as;
14543247Sgjelinek struct seg *seg;
14553247Sgjelinek int ret;
14563247Sgjelinek
14573247Sgjelinek /* Figure out which entities are being computed */
14583247Sgjelinek if ((vmu_data.vmu_system) != NULL) {
14593247Sgjelinek tmp = vmu_data.vmu_system;
14603247Sgjelinek tmp->vme_next_calc = entities;
14613247Sgjelinek entities = tmp;
14623247Sgjelinek }
14633247Sgjelinek if (vmu_data.vmu_calc_flags &
14643247Sgjelinek (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_PROJECTS |
14653247Sgjelinek VMUSAGE_ALL_PROJECTS | VMUSAGE_TASKS | VMUSAGE_ALL_TASKS |
14663247Sgjelinek VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_EUSERS |
14673247Sgjelinek VMUSAGE_ALL_EUSERS)) {
14683247Sgjelinek ret = i_mod_hash_find_nosync(vmu_data.vmu_zones_hash,
14693247Sgjelinek (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id,
14703247Sgjelinek (mod_hash_val_t *)&zone);
14713247Sgjelinek if (ret != 0) {
14723247Sgjelinek zone = vmu_alloc_zone(p->p_zone->zone_id);
14733247Sgjelinek ret = i_mod_hash_insert_nosync(vmu_data.vmu_zones_hash,
14743247Sgjelinek (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id,
14753247Sgjelinek (mod_hash_val_t)zone, (mod_hash_hndl_t)0);
14763247Sgjelinek ASSERT(ret == 0);
14773247Sgjelinek }
14783247Sgjelinek if (zone->vmz_zone != NULL) {
14793247Sgjelinek tmp = zone->vmz_zone;
14803247Sgjelinek tmp->vme_next_calc = entities;
14813247Sgjelinek entities = tmp;
14823247Sgjelinek }
14833247Sgjelinek if (vmu_data.vmu_calc_flags &
14843247Sgjelinek (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS)) {
14853247Sgjelinek tmp = vmu_find_insert_entity(zone->vmz_projects_hash,
14863247Sgjelinek p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS,
14873247Sgjelinek zone->vmz_id);
14883247Sgjelinek tmp->vme_next_calc = entities;
14893247Sgjelinek entities = tmp;
14903247Sgjelinek }
14913247Sgjelinek if (vmu_data.vmu_calc_flags &
14923247Sgjelinek (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS)) {
14933247Sgjelinek tmp = vmu_find_insert_entity(zone->vmz_tasks_hash,
14943247Sgjelinek p->p_task->tk_tkid, VMUSAGE_TASKS, zone->vmz_id);
14953247Sgjelinek tmp->vme_next_calc = entities;
14963247Sgjelinek entities = tmp;
14973247Sgjelinek }
14983247Sgjelinek if (vmu_data.vmu_calc_flags &
14993247Sgjelinek (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS)) {
15003247Sgjelinek tmp = vmu_find_insert_entity(zone->vmz_rusers_hash,
15013247Sgjelinek crgetruid(p->p_cred), VMUSAGE_RUSERS, zone->vmz_id);
15023247Sgjelinek tmp->vme_next_calc = entities;
15033247Sgjelinek entities = tmp;
15043247Sgjelinek }
15053247Sgjelinek if (vmu_data.vmu_calc_flags &
15063247Sgjelinek (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS)) {
15073247Sgjelinek tmp = vmu_find_insert_entity(zone->vmz_eusers_hash,
15083247Sgjelinek crgetuid(p->p_cred), VMUSAGE_EUSERS, zone->vmz_id);
15093247Sgjelinek tmp->vme_next_calc = entities;
15103247Sgjelinek entities = tmp;
15113247Sgjelinek }
15123247Sgjelinek }
15133247Sgjelinek /* Entities which collapse projects and users for all zones */
15143247Sgjelinek if (vmu_data.vmu_calc_flags & VMUSAGE_COL_PROJECTS) {
15153247Sgjelinek tmp = vmu_find_insert_entity(vmu_data.vmu_projects_col_hash,
15163247Sgjelinek p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS, ALL_ZONES);
15173247Sgjelinek tmp->vme_next_calc = entities;
15183247Sgjelinek entities = tmp;
15193247Sgjelinek }
15203247Sgjelinek if (vmu_data.vmu_calc_flags & VMUSAGE_COL_RUSERS) {
15213247Sgjelinek tmp = vmu_find_insert_entity(vmu_data.vmu_rusers_col_hash,
15223247Sgjelinek crgetruid(p->p_cred), VMUSAGE_RUSERS, ALL_ZONES);
15233247Sgjelinek tmp->vme_next_calc = entities;
15243247Sgjelinek entities = tmp;
15253247Sgjelinek }
15263247Sgjelinek if (vmu_data.vmu_calc_flags & VMUSAGE_COL_EUSERS) {
15273247Sgjelinek tmp = vmu_find_insert_entity(vmu_data.vmu_eusers_col_hash,
15283247Sgjelinek crgetuid(p->p_cred), VMUSAGE_EUSERS, ALL_ZONES);
15293247Sgjelinek tmp->vme_next_calc = entities;
15303247Sgjelinek entities = tmp;
15313247Sgjelinek }
15323247Sgjelinek
15333247Sgjelinek ASSERT(entities != NULL);
15343247Sgjelinek /* process all segs in process's address space */
15353247Sgjelinek as = p->p_as;
15363247Sgjelinek AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
15373247Sgjelinek for (seg = AS_SEGFIRST(as); seg != NULL;
15383247Sgjelinek seg = AS_SEGNEXT(as, seg)) {
15393247Sgjelinek vmu_calculate_seg(entities, seg);
15403247Sgjelinek }
15413247Sgjelinek AS_LOCK_EXIT(as, &as->a_lock);
15423247Sgjelinek }
15433247Sgjelinek
15443247Sgjelinek /*
15453247Sgjelinek * Free data created by previous call to vmu_calculate().
15463247Sgjelinek */
15473247Sgjelinek static void
vmu_clear_calc()15483247Sgjelinek vmu_clear_calc()
15493247Sgjelinek {
15503247Sgjelinek if (vmu_data.vmu_system != NULL)
15513247Sgjelinek vmu_free_entity(vmu_data.vmu_system);
15523247Sgjelinek vmu_data.vmu_system = NULL;
15533247Sgjelinek if (vmu_data.vmu_zones_hash != NULL)
15543247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_zones_hash);
15553247Sgjelinek if (vmu_data.vmu_projects_col_hash != NULL)
15563247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_projects_col_hash);
15573247Sgjelinek if (vmu_data.vmu_rusers_col_hash != NULL)
15583247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_rusers_col_hash);
15593247Sgjelinek if (vmu_data.vmu_eusers_col_hash != NULL)
15603247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_eusers_col_hash);
15613247Sgjelinek
15623247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_all_vnodes_hash);
15633247Sgjelinek i_mod_hash_clear_nosync(vmu_data.vmu_all_amps_hash);
15643247Sgjelinek }
15653247Sgjelinek
15663247Sgjelinek /*
15673247Sgjelinek * Free unused data structures. These can result if the system workload
15683247Sgjelinek * decreases between calculations.
15693247Sgjelinek */
15703247Sgjelinek static void
vmu_free_extra()15713247Sgjelinek vmu_free_extra()
15723247Sgjelinek {
15733247Sgjelinek vmu_bound_t *tb;
15743247Sgjelinek vmu_object_t *to;
15753247Sgjelinek vmu_entity_t *te;
15763247Sgjelinek vmu_zone_t *tz;
15773247Sgjelinek
15783247Sgjelinek while (vmu_data.vmu_free_bounds != NULL) {
15793247Sgjelinek tb = vmu_data.vmu_free_bounds;
15803247Sgjelinek vmu_data.vmu_free_bounds = vmu_data.vmu_free_bounds->vmb_next;
15813247Sgjelinek kmem_cache_free(vmu_bound_cache, tb);
15823247Sgjelinek }
15833247Sgjelinek while (vmu_data.vmu_free_objects != NULL) {
15843247Sgjelinek to = vmu_data.vmu_free_objects;
15853247Sgjelinek vmu_data.vmu_free_objects =
15863247Sgjelinek vmu_data.vmu_free_objects->vmo_next;
15873247Sgjelinek kmem_cache_free(vmu_object_cache, to);
15883247Sgjelinek }
15893247Sgjelinek while (vmu_data.vmu_free_entities != NULL) {
15903247Sgjelinek te = vmu_data.vmu_free_entities;
15913247Sgjelinek vmu_data.vmu_free_entities =
15923247Sgjelinek vmu_data.vmu_free_entities->vme_next;
15933247Sgjelinek if (te->vme_vnode_hash != NULL)
15943247Sgjelinek mod_hash_destroy_hash(te->vme_vnode_hash);
15953247Sgjelinek if (te->vme_amp_hash != NULL)
15963247Sgjelinek mod_hash_destroy_hash(te->vme_amp_hash);
15973247Sgjelinek if (te->vme_anon_hash != NULL)
15983247Sgjelinek mod_hash_destroy_hash(te->vme_anon_hash);
15993247Sgjelinek kmem_free(te, sizeof (vmu_entity_t));
16003247Sgjelinek }
16013247Sgjelinek while (vmu_data.vmu_free_zones != NULL) {
16023247Sgjelinek tz = vmu_data.vmu_free_zones;
16033247Sgjelinek vmu_data.vmu_free_zones =
16043247Sgjelinek vmu_data.vmu_free_zones->vmz_next;
16053247Sgjelinek if (tz->vmz_projects_hash != NULL)
16063247Sgjelinek mod_hash_destroy_hash(tz->vmz_projects_hash);
16073247Sgjelinek if (tz->vmz_tasks_hash != NULL)
16083247Sgjelinek mod_hash_destroy_hash(tz->vmz_tasks_hash);
16093247Sgjelinek if (tz->vmz_rusers_hash != NULL)
16103247Sgjelinek mod_hash_destroy_hash(tz->vmz_rusers_hash);
16113247Sgjelinek if (tz->vmz_eusers_hash != NULL)
16123247Sgjelinek mod_hash_destroy_hash(tz->vmz_eusers_hash);
16133247Sgjelinek kmem_free(tz, sizeof (vmu_zone_t));
16143247Sgjelinek }
16153247Sgjelinek }
16163247Sgjelinek
16173247Sgjelinek extern kcondvar_t *pr_pid_cv;
16183247Sgjelinek
16193247Sgjelinek /*
16203247Sgjelinek * Determine which entity types are relevant and allocate the hashes to
16213247Sgjelinek * track them. Then walk the process table and count rss and swap
16223247Sgjelinek * for each process'es address space. Address space object such as
16233247Sgjelinek * vnodes, amps and anons are tracked per entity, so that they are
16243247Sgjelinek * not double counted in the results.
16253247Sgjelinek *
16263247Sgjelinek */
16273247Sgjelinek static void
vmu_calculate()16283247Sgjelinek vmu_calculate()
16293247Sgjelinek {
16303247Sgjelinek int i = 0;
16313247Sgjelinek int ret;
16323247Sgjelinek proc_t *p;
16333247Sgjelinek
16343247Sgjelinek vmu_clear_calc();
16353247Sgjelinek
16363247Sgjelinek if (vmu_data.vmu_calc_flags & VMUSAGE_SYSTEM)
16373247Sgjelinek vmu_data.vmu_system = vmu_alloc_entity(0, VMUSAGE_SYSTEM,
16383247Sgjelinek ALL_ZONES);
16393247Sgjelinek
16403247Sgjelinek /*
16413247Sgjelinek * Walk process table and calculate rss of each proc.
16423247Sgjelinek *
16433247Sgjelinek * Pidlock and p_lock cannot be held while doing the rss calculation.
16443247Sgjelinek * This is because:
16453247Sgjelinek * 1. The calculation allocates using KM_SLEEP.
16463247Sgjelinek * 2. The calculation grabs a_lock, which cannot be grabbed
16473247Sgjelinek * after p_lock.
16483247Sgjelinek *
16493247Sgjelinek * Since pidlock must be dropped, we cannot simply just walk the
16503247Sgjelinek * practive list. Instead, we walk the process table, and sprlock
16513247Sgjelinek * each process to ensure that it does not exit during the
16523247Sgjelinek * calculation.
16533247Sgjelinek */
16543247Sgjelinek
16553247Sgjelinek mutex_enter(&pidlock);
16563247Sgjelinek for (i = 0; i < v.v_proc; i++) {
16573247Sgjelinek again:
16583247Sgjelinek p = pid_entry(i);
16593247Sgjelinek if (p == NULL)
16603247Sgjelinek continue;
16613247Sgjelinek
16623247Sgjelinek mutex_enter(&p->p_lock);
16633247Sgjelinek mutex_exit(&pidlock);
16643247Sgjelinek
16653247Sgjelinek if (panicstr) {
16663247Sgjelinek mutex_exit(&p->p_lock);
16673247Sgjelinek return;
16683247Sgjelinek }
16693247Sgjelinek
16703247Sgjelinek /* Try to set P_PR_LOCK */
16713247Sgjelinek ret = sprtrylock_proc(p);
16723247Sgjelinek if (ret == -1) {
16733247Sgjelinek /* Process in invalid state */
16743247Sgjelinek mutex_exit(&p->p_lock);
16753247Sgjelinek mutex_enter(&pidlock);
16763247Sgjelinek continue;
16773247Sgjelinek } else if (ret == 1) {
16783247Sgjelinek /*
16793247Sgjelinek * P_PR_LOCK is already set. Wait and try again.
16803247Sgjelinek * This also drops p_lock.
16813247Sgjelinek */
16823247Sgjelinek sprwaitlock_proc(p);
16833247Sgjelinek mutex_enter(&pidlock);
16843247Sgjelinek goto again;
16853247Sgjelinek }
16863247Sgjelinek mutex_exit(&p->p_lock);
16873247Sgjelinek
16883247Sgjelinek vmu_calculate_proc(p);
16893247Sgjelinek
16903247Sgjelinek mutex_enter(&p->p_lock);
16913247Sgjelinek sprunlock(p);
16923247Sgjelinek mutex_enter(&pidlock);
16933247Sgjelinek }
16943247Sgjelinek mutex_exit(&pidlock);
16953247Sgjelinek
16963247Sgjelinek vmu_free_extra();
16973247Sgjelinek }
16983247Sgjelinek
16993247Sgjelinek /*
17003247Sgjelinek * allocate a new cache for N results satisfying flags
17013247Sgjelinek */
17023247Sgjelinek vmu_cache_t *
vmu_cache_alloc(size_t nres,uint_t flags)17033247Sgjelinek vmu_cache_alloc(size_t nres, uint_t flags)
17043247Sgjelinek {
17053247Sgjelinek vmu_cache_t *cache;
17063247Sgjelinek
17073247Sgjelinek cache = kmem_zalloc(sizeof (vmu_cache_t), KM_SLEEP);
17083247Sgjelinek cache->vmc_results = kmem_zalloc(sizeof (vmusage_t) * nres, KM_SLEEP);
17093247Sgjelinek cache->vmc_nresults = nres;
17103247Sgjelinek cache->vmc_flags = flags;
17113247Sgjelinek cache->vmc_refcnt = 1;
17123247Sgjelinek return (cache);
17133247Sgjelinek }
17143247Sgjelinek
17153247Sgjelinek /*
17163247Sgjelinek * Make sure cached results are not freed
17173247Sgjelinek */
17183247Sgjelinek static void
vmu_cache_hold(vmu_cache_t * cache)17193247Sgjelinek vmu_cache_hold(vmu_cache_t *cache)
17203247Sgjelinek {
17213247Sgjelinek ASSERT(MUTEX_HELD(&vmu_data.vmu_lock));
17223247Sgjelinek cache->vmc_refcnt++;
17233247Sgjelinek }
17243247Sgjelinek
17253247Sgjelinek /*
17263247Sgjelinek * free cache data
17273247Sgjelinek */
17283247Sgjelinek static void
vmu_cache_rele(vmu_cache_t * cache)17293247Sgjelinek vmu_cache_rele(vmu_cache_t *cache)
17303247Sgjelinek {
17313247Sgjelinek ASSERT(MUTEX_HELD(&vmu_data.vmu_lock));
17323247Sgjelinek ASSERT(cache->vmc_refcnt > 0);
17333247Sgjelinek cache->vmc_refcnt--;
17343247Sgjelinek if (cache->vmc_refcnt == 0) {
17353247Sgjelinek kmem_free(cache->vmc_results, sizeof (vmusage_t) *
17367884Sgerald.jelinek@sun.com cache->vmc_nresults);
17373247Sgjelinek kmem_free(cache, sizeof (vmu_cache_t));
17383247Sgjelinek }
17393247Sgjelinek }
17403247Sgjelinek
17413247Sgjelinek /*
17423247Sgjelinek * Copy out the cached results to a caller. Inspect the callers flags
17433247Sgjelinek * and zone to determine which cached results should be copied.
17443247Sgjelinek */
17453247Sgjelinek static int
vmu_copyout_results(vmu_cache_t * cache,vmusage_t * buf,size_t * nres,uint_t flags,int cpflg)17463247Sgjelinek vmu_copyout_results(vmu_cache_t *cache, vmusage_t *buf, size_t *nres,
17477884Sgerald.jelinek@sun.com uint_t flags, int cpflg)
17483247Sgjelinek {
17493247Sgjelinek vmusage_t *result, *out_result;
17503247Sgjelinek vmusage_t dummy;
17513247Sgjelinek size_t i, count = 0;
17523247Sgjelinek size_t bufsize;
17533247Sgjelinek int ret = 0;
17543247Sgjelinek uint_t types = 0;
17553247Sgjelinek
17563247Sgjelinek if (nres != NULL) {
17577884Sgerald.jelinek@sun.com if (ddi_copyin((caddr_t)nres, &bufsize, sizeof (size_t), cpflg))
17583247Sgjelinek return (set_errno(EFAULT));
17593247Sgjelinek } else {
17603247Sgjelinek bufsize = 0;
17613247Sgjelinek }
17623247Sgjelinek
17633247Sgjelinek /* figure out what results the caller is interested in. */
17643247Sgjelinek if ((flags & VMUSAGE_SYSTEM) && curproc->p_zone == global_zone)
17653247Sgjelinek types |= VMUSAGE_SYSTEM;
17663247Sgjelinek if (flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES))
17673247Sgjelinek types |= VMUSAGE_ZONE;
17683247Sgjelinek if (flags & (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS |
17693247Sgjelinek VMUSAGE_COL_PROJECTS))
17703247Sgjelinek types |= VMUSAGE_PROJECTS;
17713247Sgjelinek if (flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS))
17723247Sgjelinek types |= VMUSAGE_TASKS;
17733247Sgjelinek if (flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS))
17743247Sgjelinek types |= VMUSAGE_RUSERS;
17753247Sgjelinek if (flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS))
17763247Sgjelinek types |= VMUSAGE_EUSERS;
17773247Sgjelinek
17783247Sgjelinek /* count results for current zone */
17793247Sgjelinek out_result = buf;
17803247Sgjelinek for (result = cache->vmc_results, i = 0;
17813247Sgjelinek i < cache->vmc_nresults; result++, i++) {
17823247Sgjelinek
17833247Sgjelinek /* Do not return "other-zone" results to non-global zones */
17843247Sgjelinek if (curproc->p_zone != global_zone &&
17853247Sgjelinek curproc->p_zone->zone_id != result->vmu_zoneid)
17863247Sgjelinek continue;
17873247Sgjelinek
17883247Sgjelinek /*
17893247Sgjelinek * If non-global zone requests VMUSAGE_SYSTEM, fake
17903247Sgjelinek * up VMUSAGE_ZONE result as VMUSAGE_SYSTEM result.
17913247Sgjelinek */
17923247Sgjelinek if (curproc->p_zone != global_zone &&
17933247Sgjelinek (flags & VMUSAGE_SYSTEM) != 0 &&
17943247Sgjelinek result->vmu_type == VMUSAGE_ZONE) {
17953247Sgjelinek count++;
17963247Sgjelinek if (out_result != NULL) {
17973247Sgjelinek if (bufsize < count) {
17983247Sgjelinek ret = set_errno(EOVERFLOW);
17993247Sgjelinek } else {
18003247Sgjelinek dummy = *result;
18013247Sgjelinek dummy.vmu_zoneid = ALL_ZONES;
18023247Sgjelinek dummy.vmu_id = 0;
18033247Sgjelinek dummy.vmu_type = VMUSAGE_SYSTEM;
18047884Sgerald.jelinek@sun.com if (ddi_copyout(&dummy, out_result,
18057884Sgerald.jelinek@sun.com sizeof (vmusage_t), cpflg))
18067884Sgerald.jelinek@sun.com return (set_errno(EFAULT));
18073247Sgjelinek out_result++;
18083247Sgjelinek }
18093247Sgjelinek }
18103247Sgjelinek }
18113247Sgjelinek
18123247Sgjelinek /* Skip results that do not match requested type */
18133247Sgjelinek if ((result->vmu_type & types) == 0)
18143247Sgjelinek continue;
18153247Sgjelinek
18163247Sgjelinek /* Skip collated results if not requested */
18173247Sgjelinek if (result->vmu_zoneid == ALL_ZONES) {
18183247Sgjelinek if (result->vmu_type == VMUSAGE_PROJECTS &&
18193247Sgjelinek (flags & VMUSAGE_COL_PROJECTS) == 0)
18203247Sgjelinek continue;
18213247Sgjelinek if (result->vmu_type == VMUSAGE_EUSERS &&
18223247Sgjelinek (flags & VMUSAGE_COL_EUSERS) == 0)
18233247Sgjelinek continue;
18243247Sgjelinek if (result->vmu_type == VMUSAGE_RUSERS &&
18253247Sgjelinek (flags & VMUSAGE_COL_RUSERS) == 0)
18263247Sgjelinek continue;
18273247Sgjelinek }
18283247Sgjelinek
18293247Sgjelinek /* Skip "other zone" results if not requested */
18303247Sgjelinek if (result->vmu_zoneid != curproc->p_zone->zone_id) {
18313247Sgjelinek if (result->vmu_type == VMUSAGE_ZONE &&
18323247Sgjelinek (flags & VMUSAGE_ALL_ZONES) == 0)
18333247Sgjelinek continue;
18343247Sgjelinek if (result->vmu_type == VMUSAGE_PROJECTS &&
18353247Sgjelinek (flags & (VMUSAGE_ALL_PROJECTS |
18363247Sgjelinek VMUSAGE_COL_PROJECTS)) == 0)
18373247Sgjelinek continue;
18383247Sgjelinek if (result->vmu_type == VMUSAGE_TASKS &&
18393247Sgjelinek (flags & VMUSAGE_ALL_TASKS) == 0)
18403247Sgjelinek continue;
18413247Sgjelinek if (result->vmu_type == VMUSAGE_RUSERS &&
18423247Sgjelinek (flags & (VMUSAGE_ALL_RUSERS |
18433247Sgjelinek VMUSAGE_COL_RUSERS)) == 0)
18443247Sgjelinek continue;
18453247Sgjelinek if (result->vmu_type == VMUSAGE_EUSERS &&
18463247Sgjelinek (flags & (VMUSAGE_ALL_EUSERS |
18473247Sgjelinek VMUSAGE_COL_EUSERS)) == 0)
18483247Sgjelinek continue;
18493247Sgjelinek }
18503247Sgjelinek count++;
18513247Sgjelinek if (out_result != NULL) {
18523247Sgjelinek if (bufsize < count) {
18533247Sgjelinek ret = set_errno(EOVERFLOW);
18543247Sgjelinek } else {
18557884Sgerald.jelinek@sun.com if (ddi_copyout(result, out_result,
18567884Sgerald.jelinek@sun.com sizeof (vmusage_t), cpflg))
18573247Sgjelinek return (set_errno(EFAULT));
18583247Sgjelinek out_result++;
18593247Sgjelinek }
18603247Sgjelinek }
18613247Sgjelinek }
18623247Sgjelinek if (nres != NULL)
18637884Sgerald.jelinek@sun.com if (ddi_copyout(&count, (void *)nres, sizeof (size_t), cpflg))
18643247Sgjelinek return (set_errno(EFAULT));
18653247Sgjelinek
18663247Sgjelinek return (ret);
18673247Sgjelinek }
18683247Sgjelinek
18693247Sgjelinek /*
18703247Sgjelinek * vm_getusage()
18713247Sgjelinek *
18723247Sgjelinek * Counts rss and swap by zone, project, task, and/or user. The flags argument
18733247Sgjelinek * determines the type of results structures returned. Flags requesting
18743247Sgjelinek * results from more than one zone are "flattened" to the local zone if the
18753247Sgjelinek * caller is not the global zone.
18763247Sgjelinek *
18773247Sgjelinek * args:
18783247Sgjelinek * flags: bitmap consisting of one or more of VMUSAGE_*.
18793247Sgjelinek * age: maximum allowable age (time since counting was done) in
18803247Sgjelinek * seconds of the results. Results from previous callers are
18813247Sgjelinek * cached in kernel.
18823247Sgjelinek * buf: pointer to buffer array of vmusage_t. If NULL, then only nres
18833247Sgjelinek * set on success.
18843247Sgjelinek * nres: Set to number of vmusage_t structures pointed to by buf
18853247Sgjelinek * before calling vm_getusage().
18863247Sgjelinek * On return 0 (success) or ENOSPC, is set to the number of result
18873247Sgjelinek * structures returned or attempted to return.
18883247Sgjelinek *
18893247Sgjelinek * returns 0 on success, -1 on failure:
18903247Sgjelinek * EINTR (interrupted)
18913247Sgjelinek * ENOSPC (nres to small for results, nres set to needed value for success)
18923247Sgjelinek * EINVAL (flags invalid)
18933247Sgjelinek * EFAULT (bad address for buf or nres)
18943247Sgjelinek */
18953247Sgjelinek int
vm_getusage(uint_t flags,time_t age,vmusage_t * buf,size_t * nres,int cpflg)18967884Sgerald.jelinek@sun.com vm_getusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres, int cpflg)
18973247Sgjelinek {
18983247Sgjelinek vmu_entity_t *entity;
18993247Sgjelinek vmusage_t *result;
19003247Sgjelinek int ret = 0;
19013247Sgjelinek int cacherecent = 0;
19023247Sgjelinek hrtime_t now;
19033247Sgjelinek uint_t flags_orig;
19043247Sgjelinek
19053247Sgjelinek /*
19063247Sgjelinek * Non-global zones cannot request system wide and/or collated
19073247Sgjelinek * results, or the system result, so munge the flags accordingly.
19083247Sgjelinek */
19093247Sgjelinek flags_orig = flags;
19103247Sgjelinek if (curproc->p_zone != global_zone) {
19113247Sgjelinek if (flags & (VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS)) {
19123247Sgjelinek flags &= ~(VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS);
19133247Sgjelinek flags |= VMUSAGE_PROJECTS;
19143247Sgjelinek }
19153247Sgjelinek if (flags & (VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS)) {
19163247Sgjelinek flags &= ~(VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS);
19173247Sgjelinek flags |= VMUSAGE_RUSERS;
19183247Sgjelinek }
19193247Sgjelinek if (flags & (VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS)) {
19203247Sgjelinek flags &= ~(VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS);
19213247Sgjelinek flags |= VMUSAGE_EUSERS;
19223247Sgjelinek }
19233247Sgjelinek if (flags & VMUSAGE_SYSTEM) {
19243247Sgjelinek flags &= ~VMUSAGE_SYSTEM;
19253247Sgjelinek flags |= VMUSAGE_ZONE;
19263247Sgjelinek }
19273247Sgjelinek }
19283247Sgjelinek
19293247Sgjelinek /* Check for unknown flags */
19303247Sgjelinek if ((flags & (~VMUSAGE_MASK)) != 0)
19313247Sgjelinek return (set_errno(EINVAL));
19323247Sgjelinek
19333247Sgjelinek /* Check for no flags */
19343247Sgjelinek if ((flags & VMUSAGE_MASK) == 0)
19353247Sgjelinek return (set_errno(EINVAL));
19363247Sgjelinek
19373247Sgjelinek mutex_enter(&vmu_data.vmu_lock);
19383247Sgjelinek now = gethrtime();
19393247Sgjelinek
19403247Sgjelinek start:
19413247Sgjelinek if (vmu_data.vmu_cache != NULL) {
19423247Sgjelinek
19433247Sgjelinek vmu_cache_t *cache;
19443247Sgjelinek
19453247Sgjelinek if ((vmu_data.vmu_cache->vmc_timestamp +
19463247Sgjelinek ((hrtime_t)age * NANOSEC)) > now)
19473247Sgjelinek cacherecent = 1;
19483247Sgjelinek
19493247Sgjelinek if ((vmu_data.vmu_cache->vmc_flags & flags) == flags &&
19503247Sgjelinek cacherecent == 1) {
19513247Sgjelinek cache = vmu_data.vmu_cache;
19523247Sgjelinek vmu_cache_hold(cache);
19533247Sgjelinek mutex_exit(&vmu_data.vmu_lock);
19543247Sgjelinek
19557884Sgerald.jelinek@sun.com ret = vmu_copyout_results(cache, buf, nres, flags_orig,
19567884Sgerald.jelinek@sun.com cpflg);
19573247Sgjelinek mutex_enter(&vmu_data.vmu_lock);
19583247Sgjelinek vmu_cache_rele(cache);
19593247Sgjelinek if (vmu_data.vmu_pending_waiters > 0)
19603247Sgjelinek cv_broadcast(&vmu_data.vmu_cv);
19613247Sgjelinek mutex_exit(&vmu_data.vmu_lock);
19623247Sgjelinek return (ret);
19633247Sgjelinek }
19643247Sgjelinek /*
19653247Sgjelinek * If the cache is recent, it is likely that there are other
19663247Sgjelinek * consumers of vm_getusage running, so add their flags to the
19673247Sgjelinek * desired flags for the calculation.
19683247Sgjelinek */
19693247Sgjelinek if (cacherecent == 1)
19703247Sgjelinek flags = vmu_data.vmu_cache->vmc_flags | flags;
19713247Sgjelinek }
19723247Sgjelinek if (vmu_data.vmu_calc_thread == NULL) {
19733247Sgjelinek
19743247Sgjelinek vmu_cache_t *cache;
19753247Sgjelinek
19763247Sgjelinek vmu_data.vmu_calc_thread = curthread;
19773247Sgjelinek vmu_data.vmu_calc_flags = flags;
19783247Sgjelinek vmu_data.vmu_entities = NULL;
19793247Sgjelinek vmu_data.vmu_nentities = 0;
19803247Sgjelinek if (vmu_data.vmu_pending_waiters > 0)
19813247Sgjelinek vmu_data.vmu_calc_flags |=
19823247Sgjelinek vmu_data.vmu_pending_flags;
19833247Sgjelinek
19843247Sgjelinek vmu_data.vmu_pending_flags = 0;
19853247Sgjelinek mutex_exit(&vmu_data.vmu_lock);
19863247Sgjelinek vmu_calculate();
19873247Sgjelinek mutex_enter(&vmu_data.vmu_lock);
19883247Sgjelinek /* copy results to cache */
19893247Sgjelinek if (vmu_data.vmu_cache != NULL)
19903247Sgjelinek vmu_cache_rele(vmu_data.vmu_cache);
19913247Sgjelinek cache = vmu_data.vmu_cache =
19923247Sgjelinek vmu_cache_alloc(vmu_data.vmu_nentities,
19937884Sgerald.jelinek@sun.com vmu_data.vmu_calc_flags);
19943247Sgjelinek
19953247Sgjelinek result = cache->vmc_results;
19963247Sgjelinek for (entity = vmu_data.vmu_entities; entity != NULL;
19973247Sgjelinek entity = entity->vme_next) {
19983247Sgjelinek *result = entity->vme_result;
19993247Sgjelinek result++;
20003247Sgjelinek }
20013247Sgjelinek cache->vmc_timestamp = gethrtime();
20023247Sgjelinek vmu_cache_hold(cache);
20033247Sgjelinek
20043247Sgjelinek vmu_data.vmu_calc_flags = 0;
20053247Sgjelinek vmu_data.vmu_calc_thread = NULL;
20063247Sgjelinek
20073247Sgjelinek if (vmu_data.vmu_pending_waiters > 0)
20083247Sgjelinek cv_broadcast(&vmu_data.vmu_cv);
20093247Sgjelinek
20103247Sgjelinek mutex_exit(&vmu_data.vmu_lock);
20113247Sgjelinek
20123247Sgjelinek /* copy cache */
20137884Sgerald.jelinek@sun.com ret = vmu_copyout_results(cache, buf, nres, flags_orig, cpflg);
20143247Sgjelinek mutex_enter(&vmu_data.vmu_lock);
20153247Sgjelinek vmu_cache_rele(cache);
20163247Sgjelinek mutex_exit(&vmu_data.vmu_lock);
20173247Sgjelinek
20183247Sgjelinek return (ret);
20193247Sgjelinek }
20203247Sgjelinek vmu_data.vmu_pending_flags |= flags;
20213247Sgjelinek vmu_data.vmu_pending_waiters++;
20223247Sgjelinek while (vmu_data.vmu_calc_thread != NULL) {
20233247Sgjelinek if (cv_wait_sig(&vmu_data.vmu_cv,
20243247Sgjelinek &vmu_data.vmu_lock) == 0) {
20253247Sgjelinek vmu_data.vmu_pending_waiters--;
20263247Sgjelinek mutex_exit(&vmu_data.vmu_lock);
20273247Sgjelinek return (set_errno(EINTR));
20283247Sgjelinek }
20293247Sgjelinek }
20303247Sgjelinek vmu_data.vmu_pending_waiters--;
20313247Sgjelinek goto start;
20323247Sgjelinek }
2033