xref: /onnv-gate/usr/src/uts/common/vm/vm_usage.c (revision 7884:66b5b9486f94)
13247Sgjelinek /*
23247Sgjelinek  * CDDL HEADER START
33247Sgjelinek  *
43247Sgjelinek  * The contents of this file are subject to the terms of the
53247Sgjelinek  * Common Development and Distribution License (the "License").
63247Sgjelinek  * You may not use this file except in compliance with the License.
73247Sgjelinek  *
83247Sgjelinek  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93247Sgjelinek  * or http://www.opensolaris.org/os/licensing.
103247Sgjelinek  * See the License for the specific language governing permissions
113247Sgjelinek  * and limitations under the License.
123247Sgjelinek  *
133247Sgjelinek  * When distributing Covered Code, include this CDDL HEADER in each
143247Sgjelinek  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153247Sgjelinek  * If applicable, add the following below this CDDL HEADER, with the
163247Sgjelinek  * fields enclosed by brackets "[]" replaced with your own identifying
173247Sgjelinek  * information: Portions Copyright [yyyy] [name of copyright owner]
183247Sgjelinek  *
193247Sgjelinek  * CDDL HEADER END
203247Sgjelinek  */
213247Sgjelinek 
223247Sgjelinek /*
23*7884Sgerald.jelinek@sun.com  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
243247Sgjelinek  * Use is subject to license terms.
253247Sgjelinek  */
263247Sgjelinek 
273247Sgjelinek /*
283247Sgjelinek  * vm_usage
293247Sgjelinek  *
303247Sgjelinek  * This file implements the getvmusage() private system call.
313247Sgjelinek  * getvmusage() counts the amount of resident memory pages and swap
323247Sgjelinek  * reserved by the specified process collective. A "process collective" is
333247Sgjelinek  * the set of processes owned by a particular, zone, project, task, or user.
343247Sgjelinek  *
353247Sgjelinek  * rss and swap are counted so that for a given process collective, a page is
363247Sgjelinek  * only counted once.  For example, this means that if multiple processes in
373247Sgjelinek  * the same project map the same page, then the project will only be charged
383247Sgjelinek  * once for that page.  On the other hand, if two processes in different
393247Sgjelinek  * projects map the same page, then both projects will be charged
403247Sgjelinek  * for the page.
413247Sgjelinek  *
423247Sgjelinek  * The vm_getusage() calculation is implemented so that the first thread
433247Sgjelinek  * performs the rss/swap counting. Other callers will wait for that thread to
443247Sgjelinek  * finish, copying the results.  This enables multiple rcapds and prstats to
453247Sgjelinek  * consume data from the same calculation.  The results are also cached so that
463247Sgjelinek  * a caller interested in recent results can just copy them instead of starting
473247Sgjelinek  * a new calculation. The caller passes the maximium age (in seconds) of the
483247Sgjelinek  * data.  If the cached data is young enough, the cache is copied, otherwise,
493247Sgjelinek  * a new calculation is executed and the cache is replaced with the new
503247Sgjelinek  * data.
513247Sgjelinek  *
523247Sgjelinek  * The rss calculation for each process collective is as follows:
533247Sgjelinek  *
543247Sgjelinek  *   - Inspect flags, determine if counting rss for zones, projects, tasks,
553247Sgjelinek  *     and/or users.
563247Sgjelinek  *   - For each proc:
573247Sgjelinek  *	- Figure out proc's collectives (zone, project, task, and/or user).
583247Sgjelinek  *	- For each seg in proc's address space:
593247Sgjelinek  *		- If seg is private:
603247Sgjelinek  *			- Lookup anons in the amp.
613247Sgjelinek  *			- For incore pages not previously visited each of the
623247Sgjelinek  *			  proc's collectives, add incore pagesize to each.
633247Sgjelinek  *			  collective.
643247Sgjelinek  *			  Anon's with a refcnt of 1 can be assummed to be not
653247Sgjelinek  *			  previously visited.
663247Sgjelinek  *			- For address ranges without anons in the amp:
673247Sgjelinek  *				- Lookup pages in underlying vnode.
683247Sgjelinek  *				- For incore pages not previously visiting for
693247Sgjelinek  *				  each of the proc's collectives, add incore
703247Sgjelinek  *				  pagesize to each collective.
713247Sgjelinek  *		- If seg is shared:
723247Sgjelinek  *			- Lookup pages in the shared amp or vnode.
733247Sgjelinek  *			- For incore pages not previously visited for each of
743247Sgjelinek  *			  the proc's collectives, add incore pagesize to each
753247Sgjelinek  *			  collective.
763247Sgjelinek  *
773247Sgjelinek  * Swap is reserved by private segments, and shared anonymous segments.
783247Sgjelinek  * The only shared anon segments which do not reserve swap are ISM segments
793247Sgjelinek  * and schedctl segments, both of which can be identified by having
803247Sgjelinek  * amp->swresv == 0.
813247Sgjelinek  *
823247Sgjelinek  * The swap calculation for each collective is as follows:
833247Sgjelinek  *
843247Sgjelinek  *   - Inspect flags, determine if counting rss for zones, projects, tasks,
853247Sgjelinek  *     and/or users.
863247Sgjelinek  *   - For each proc:
873247Sgjelinek  *	- Figure out proc's collectives (zone, project, task, and/or user).
883247Sgjelinek  *	- For each seg in proc's address space:
893247Sgjelinek  *		- If seg is private:
903247Sgjelinek  *			- Add svd->swresv pages to swap count for each of the
913247Sgjelinek  *			  proc's collectives.
923247Sgjelinek  *		- If seg is anon, shared, and amp->swresv != 0
933247Sgjelinek  *			- For address ranges in amp not previously visited for
943247Sgjelinek  *			  each of the proc's collectives, add size of address
953247Sgjelinek  *			  range to the swap count for each collective.
963247Sgjelinek  *
973247Sgjelinek  * These two calculations are done simultaneously, with most of the work
983247Sgjelinek  * being done in vmu_calculate_seg().  The results of the calculation are
993247Sgjelinek  * copied into "vmu_data.vmu_cache_results".
1003247Sgjelinek  *
1013247Sgjelinek  * To perform the calculation, various things are tracked and cached:
1023247Sgjelinek  *
1033247Sgjelinek  *    - incore/not-incore page ranges for all vnodes.
1043247Sgjelinek  *	(vmu_data.vmu_all_vnodes_hash)
1053247Sgjelinek  *	This eliminates looking up the same page more than once.
1063247Sgjelinek  *
1073247Sgjelinek  *    - incore/not-incore page ranges for all shared amps.
1083247Sgjelinek  *	(vmu_data.vmu_all_amps_hash)
1093247Sgjelinek  *	This eliminates looking up the same page more than once.
1103247Sgjelinek  *
1113247Sgjelinek  *    - visited page ranges for each collective.
1123247Sgjelinek  *	   - per vnode (entity->vme_vnode_hash)
1133247Sgjelinek  *	   - per shared amp (entity->vme_amp_hash)
1143247Sgjelinek  *	For accurate counting of map-shared and cow-shared pages.
1153247Sgjelinek  *
1163247Sgjelinek  *    - visited private anons (refcnt > 1) for each collective.
1173247Sgjelinek  *	(entity->vme_anon_hash)
1183247Sgjelinek  *	For accurate counting of cow-shared pages.
1193247Sgjelinek  *
1203247Sgjelinek  * The common accounting structure is the vmu_entity_t, which represents
1213247Sgjelinek  * collectives:
1223247Sgjelinek  *
1233247Sgjelinek  *    - A zone.
1243247Sgjelinek  *    - A project, task, or user within a zone.
1253247Sgjelinek  *    - The entire system (vmu_data.vmu_system).
1263247Sgjelinek  *    - Each collapsed (col) project and user.  This means a given projid or
1273247Sgjelinek  *	uid, regardless of which zone the process is in.  For instance,
1283247Sgjelinek  *      project 0 in the global zone and project 0 in a non global zone are
1293247Sgjelinek  *	the same collapsed project.
1303247Sgjelinek  *
1313247Sgjelinek  *  Each entity structure tracks which pages have been already visited for
1323247Sgjelinek  *  that entity (via previously inspected processes) so that these pages are
1333247Sgjelinek  *  not double counted.
1343247Sgjelinek  */
1353247Sgjelinek 
1363247Sgjelinek #include <sys/errno.h>
1373247Sgjelinek #include <sys/types.h>
1383247Sgjelinek #include <sys/zone.h>
1393247Sgjelinek #include <sys/proc.h>
1403247Sgjelinek #include <sys/project.h>
1413247Sgjelinek #include <sys/task.h>
1423247Sgjelinek #include <sys/thread.h>
1433247Sgjelinek #include <sys/time.h>
1443247Sgjelinek #include <sys/mman.h>
1453247Sgjelinek #include <sys/modhash.h>
1463247Sgjelinek #include <sys/modhash_impl.h>
1473247Sgjelinek #include <sys/shm.h>
1483247Sgjelinek #include <sys/swap.h>
1493247Sgjelinek #include <sys/synch.h>
1503247Sgjelinek #include <sys/systm.h>
1513247Sgjelinek #include <sys/var.h>
1523247Sgjelinek #include <sys/vm_usage.h>
1533247Sgjelinek #include <sys/zone.h>
154*7884Sgerald.jelinek@sun.com #include <sys/sunddi.h>
1553247Sgjelinek #include <vm/anon.h>
1563247Sgjelinek #include <vm/as.h>
1573247Sgjelinek #include <vm/seg_vn.h>
1583247Sgjelinek #include <vm/seg_spt.h>
1593247Sgjelinek 
1603247Sgjelinek #define	VMUSAGE_HASH_SIZE		512
1613247Sgjelinek 
1623247Sgjelinek #define	VMUSAGE_TYPE_VNODE		1
1633247Sgjelinek #define	VMUSAGE_TYPE_AMP		2
1643247Sgjelinek #define	VMUSAGE_TYPE_ANON		3
1653247Sgjelinek 
1663247Sgjelinek #define	VMUSAGE_BOUND_UNKNOWN		0
1673247Sgjelinek #define	VMUSAGE_BOUND_INCORE		1
1683247Sgjelinek #define	VMUSAGE_BOUND_NOT_INCORE	2
1693247Sgjelinek 
1703247Sgjelinek /*
1713247Sgjelinek  * bounds for vnodes and shared amps
1723247Sgjelinek  * Each bound is either entirely incore, entirely not in core, or
1733247Sgjelinek  * entirely unknown.  bounds are stored in order by offset.
1743247Sgjelinek  */
1753247Sgjelinek typedef struct vmu_bound {
1763247Sgjelinek 	struct  vmu_bound *vmb_next;
1773247Sgjelinek 	pgcnt_t vmb_start;  /* page offset in vnode/amp on which bound starts */
1783247Sgjelinek 	pgcnt_t	vmb_end;    /* page offset in vnode/amp on which bound ends */
1793247Sgjelinek 	char	vmb_type;   /* One of VMUSAGE_BOUND_* */
1803247Sgjelinek } vmu_bound_t;
1813247Sgjelinek 
1823247Sgjelinek /*
1833247Sgjelinek  * hash of visited objects (vnodes or shared amps)
1843247Sgjelinek  * key is address of vnode or amp.  Bounds lists known incore/non-incore
1853247Sgjelinek  * bounds for vnode/amp.
1863247Sgjelinek  */
1873247Sgjelinek typedef struct vmu_object {
1883247Sgjelinek 	struct vmu_object	*vmo_next;	/* free list */
1893247Sgjelinek 	caddr_t		vmo_key;
1903247Sgjelinek 	short		vmo_type;
1913247Sgjelinek 	vmu_bound_t	*vmo_bounds;
1923247Sgjelinek } vmu_object_t;
1933247Sgjelinek 
1943247Sgjelinek /*
1953247Sgjelinek  * Entity by which to count results.
1963247Sgjelinek  *
1973247Sgjelinek  * The entity structure keeps the current rss/swap counts for each entity
1983247Sgjelinek  * (zone, project, etc), and hashes of vm structures that have already
1993247Sgjelinek  * been visited for the entity.
2003247Sgjelinek  *
2013247Sgjelinek  * vme_next:	links the list of all entities currently being counted by
2023247Sgjelinek  *		vmu_calculate().
2033247Sgjelinek  *
2043247Sgjelinek  * vme_next_calc: links the list of entities related to the current process
2053247Sgjelinek  *		 being counted by vmu_calculate_proc().
2063247Sgjelinek  *
2073247Sgjelinek  * vmu_calculate_proc() walks all processes.  For each process, it makes a
2083247Sgjelinek  * list of the entities related to that process using vme_next_calc.  This
2093247Sgjelinek  * list changes each time vmu_calculate_proc() is called.
2103247Sgjelinek  *
2113247Sgjelinek  */
2123247Sgjelinek typedef struct vmu_entity {
2133247Sgjelinek 	struct vmu_entity *vme_next;
2143247Sgjelinek 	struct vmu_entity *vme_next_calc;
2153247Sgjelinek 	mod_hash_t	*vme_vnode_hash; /* vnodes visited for entity */
2163247Sgjelinek 	mod_hash_t	*vme_amp_hash;	 /* shared amps visited for entity */
2173247Sgjelinek 	mod_hash_t	*vme_anon_hash;	 /* cow anons visited for entity */
2183247Sgjelinek 	vmusage_t	vme_result;	 /* identifies entity and results */
2193247Sgjelinek } vmu_entity_t;
2203247Sgjelinek 
2213247Sgjelinek /*
2223247Sgjelinek  * Hash of entities visited within a zone, and an entity for the zone
2233247Sgjelinek  * itself.
2243247Sgjelinek  */
2253247Sgjelinek typedef struct vmu_zone {
2263247Sgjelinek 	struct vmu_zone	*vmz_next;	/* free list */
2273247Sgjelinek 	id_t		vmz_id;
2283247Sgjelinek 	vmu_entity_t	*vmz_zone;
2293247Sgjelinek 	mod_hash_t	*vmz_projects_hash;
2303247Sgjelinek 	mod_hash_t	*vmz_tasks_hash;
2313247Sgjelinek 	mod_hash_t	*vmz_rusers_hash;
2323247Sgjelinek 	mod_hash_t	*vmz_eusers_hash;
2333247Sgjelinek } vmu_zone_t;
2343247Sgjelinek 
2353247Sgjelinek /*
2363247Sgjelinek  * Cache of results from last calculation
2373247Sgjelinek  */
2383247Sgjelinek typedef struct vmu_cache {
2393247Sgjelinek 	vmusage_t	*vmc_results;	/* Results from last call to */
2403247Sgjelinek 					/* vm_getusage(). */
2413247Sgjelinek 	uint64_t	vmc_nresults;	/* Count of cached results */
2423247Sgjelinek 	uint64_t	vmc_refcnt;	/* refcnt for free */
2433247Sgjelinek 	uint_t		vmc_flags;	/* Flags for vm_getusage() */
2443247Sgjelinek 	hrtime_t	vmc_timestamp;	/* when cache was created */
2453247Sgjelinek } vmu_cache_t;
2463247Sgjelinek 
2473247Sgjelinek /*
2483247Sgjelinek  * top level rss info for the system
2493247Sgjelinek  */
2503247Sgjelinek typedef struct vmu_data {
2513247Sgjelinek 	kmutex_t	vmu_lock;		/* Protects vmu_data */
2523247Sgjelinek 	kcondvar_t	vmu_cv;			/* Used to signal threads */
2533247Sgjelinek 						/* Waiting for */
2543247Sgjelinek 						/* Rss_calc_thread to finish */
2553247Sgjelinek 	vmu_entity_t	*vmu_system;		/* Entity for tracking */
2563247Sgjelinek 						/* rss/swap for all processes */
2573247Sgjelinek 						/* in all zones */
2583247Sgjelinek 	mod_hash_t	*vmu_zones_hash;	/* Zones visited */
2593247Sgjelinek 	mod_hash_t	*vmu_projects_col_hash; /* These *_col_hash hashes */
2603247Sgjelinek 	mod_hash_t	*vmu_rusers_col_hash;	/* keep track of entities, */
2613247Sgjelinek 	mod_hash_t	*vmu_eusers_col_hash;	/* ignoring zoneid, in order */
2623247Sgjelinek 						/* to implement VMUSAGE_COL_* */
2633247Sgjelinek 						/* flags, which aggregate by */
2643247Sgjelinek 						/* project or user regardless */
2653247Sgjelinek 						/* of zoneid. */
2663247Sgjelinek 	mod_hash_t	*vmu_all_vnodes_hash;	/* System wide visited vnodes */
2673247Sgjelinek 						/* to track incore/not-incore */
2683247Sgjelinek 	mod_hash_t	*vmu_all_amps_hash;	/* System wide visited shared */
2693247Sgjelinek 						/* amps to track incore/not- */
2703247Sgjelinek 						/* incore */
2713247Sgjelinek 	vmu_entity_t	*vmu_entities;		/* Linked list of entities */
2723247Sgjelinek 	size_t		vmu_nentities;		/* Count of entities in list */
2733247Sgjelinek 	vmu_cache_t	*vmu_cache;		/* Cached results */
2743247Sgjelinek 	kthread_t	*vmu_calc_thread;	/* NULL, or thread running */
2753247Sgjelinek 						/* vmu_calculate() */
2763247Sgjelinek 	uint_t		vmu_calc_flags;		/* Flags being using by */
2773247Sgjelinek 						/* currently running calc */
2783247Sgjelinek 						/* thread */
2793247Sgjelinek 	uint_t		vmu_pending_flags;	/* Flags of vm_getusage() */
2803247Sgjelinek 						/* threads waiting for */
2813247Sgjelinek 						/* calc thread to finish */
2823247Sgjelinek 	uint_t		vmu_pending_waiters;	/* Number of threads waiting */
2833247Sgjelinek 						/* for calc thread */
2843247Sgjelinek 	vmu_bound_t	*vmu_free_bounds;
2853247Sgjelinek 	vmu_object_t	*vmu_free_objects;
2863247Sgjelinek 	vmu_entity_t	*vmu_free_entities;
2873247Sgjelinek 	vmu_zone_t	*vmu_free_zones;
2883247Sgjelinek } vmu_data_t;
2893247Sgjelinek 
2903247Sgjelinek extern struct as kas;
2913247Sgjelinek extern proc_t *practive;
2923247Sgjelinek extern zone_t *global_zone;
2933247Sgjelinek extern struct seg_ops segvn_ops;
2943247Sgjelinek extern struct seg_ops segspt_shmops;
2953247Sgjelinek 
2963247Sgjelinek static vmu_data_t vmu_data;
2973247Sgjelinek static kmem_cache_t *vmu_bound_cache;
2983247Sgjelinek static kmem_cache_t *vmu_object_cache;
2993247Sgjelinek 
3003247Sgjelinek /*
3013247Sgjelinek  * Save a bound on the free list
3023247Sgjelinek  */
3033247Sgjelinek static void
3043247Sgjelinek vmu_free_bound(vmu_bound_t *bound)
3053247Sgjelinek {
3063247Sgjelinek 	bound->vmb_next = vmu_data.vmu_free_bounds;
3073247Sgjelinek 	vmu_data.vmu_free_bounds = bound;
3083247Sgjelinek }
3093247Sgjelinek 
3103247Sgjelinek /*
3113247Sgjelinek  * Free an object, and all visited bound info.
3123247Sgjelinek  */
3133247Sgjelinek static void
3143247Sgjelinek vmu_free_object(mod_hash_val_t val)
3153247Sgjelinek {
3163247Sgjelinek 	vmu_object_t *obj = (vmu_object_t *)val;
3173247Sgjelinek 	vmu_bound_t *bound = obj->vmo_bounds;
3183247Sgjelinek 	vmu_bound_t *tmp;
3193247Sgjelinek 
3203247Sgjelinek 	while (bound != NULL) {
3213247Sgjelinek 		tmp = bound;
3223247Sgjelinek 		bound = bound->vmb_next;
3233247Sgjelinek 		vmu_free_bound(tmp);
3243247Sgjelinek 	}
3253247Sgjelinek 	obj->vmo_next = vmu_data.vmu_free_objects;
3263247Sgjelinek 	vmu_data.vmu_free_objects = obj;
3273247Sgjelinek }
3283247Sgjelinek 
3293247Sgjelinek /*
3303247Sgjelinek  * Free an entity, and hashes of visited objects for that entity.
3313247Sgjelinek  */
3323247Sgjelinek static void
3333247Sgjelinek vmu_free_entity(mod_hash_val_t val)
3343247Sgjelinek {
3353247Sgjelinek 	vmu_entity_t *entity = (vmu_entity_t *)val;
3363247Sgjelinek 
3373247Sgjelinek 	if (entity->vme_vnode_hash != NULL)
3383247Sgjelinek 		i_mod_hash_clear_nosync(entity->vme_vnode_hash);
3393247Sgjelinek 	if (entity->vme_amp_hash != NULL)
3403247Sgjelinek 		i_mod_hash_clear_nosync(entity->vme_amp_hash);
3413247Sgjelinek 	if (entity->vme_anon_hash != NULL)
3423247Sgjelinek 		i_mod_hash_clear_nosync(entity->vme_anon_hash);
3433247Sgjelinek 
3443247Sgjelinek 	entity->vme_next = vmu_data.vmu_free_entities;
3453247Sgjelinek 	vmu_data.vmu_free_entities = entity;
3463247Sgjelinek }
3473247Sgjelinek 
3483247Sgjelinek /*
3493247Sgjelinek  * Free zone entity, and all hashes of entities inside that zone,
3503247Sgjelinek  * which are projects, tasks, and users.
3513247Sgjelinek  */
3523247Sgjelinek static void
3533247Sgjelinek vmu_free_zone(mod_hash_val_t val)
3543247Sgjelinek {
3553247Sgjelinek 	vmu_zone_t *zone = (vmu_zone_t *)val;
3563247Sgjelinek 
3573247Sgjelinek 	if (zone->vmz_zone != NULL) {
3583247Sgjelinek 		vmu_free_entity((mod_hash_val_t)zone->vmz_zone);
3593247Sgjelinek 		zone->vmz_zone = NULL;
3603247Sgjelinek 	}
3613247Sgjelinek 	if (zone->vmz_projects_hash != NULL)
3623247Sgjelinek 		i_mod_hash_clear_nosync(zone->vmz_projects_hash);
3633247Sgjelinek 	if (zone->vmz_tasks_hash != NULL)
3643247Sgjelinek 		i_mod_hash_clear_nosync(zone->vmz_tasks_hash);
3653247Sgjelinek 	if (zone->vmz_rusers_hash != NULL)
3663247Sgjelinek 		i_mod_hash_clear_nosync(zone->vmz_rusers_hash);
3673247Sgjelinek 	if (zone->vmz_eusers_hash != NULL)
3683247Sgjelinek 		i_mod_hash_clear_nosync(zone->vmz_eusers_hash);
3693247Sgjelinek 	zone->vmz_next = vmu_data.vmu_free_zones;
3703247Sgjelinek 	vmu_data.vmu_free_zones = zone;
3713247Sgjelinek }
3723247Sgjelinek 
3733247Sgjelinek /*
3743247Sgjelinek  * Initialize synchronization primitives and hashes for system-wide tracking
3753247Sgjelinek  * of visited vnodes and shared amps.  Initialize results cache.
3763247Sgjelinek  */
3773247Sgjelinek void
3783247Sgjelinek vm_usage_init()
3793247Sgjelinek {
3803247Sgjelinek 	mutex_init(&vmu_data.vmu_lock, NULL, MUTEX_DEFAULT, NULL);
3813247Sgjelinek 	cv_init(&vmu_data.vmu_cv, NULL, CV_DEFAULT, NULL);
3823247Sgjelinek 
3833247Sgjelinek 	vmu_data.vmu_system = NULL;
3843247Sgjelinek 	vmu_data.vmu_zones_hash = NULL;
3853247Sgjelinek 	vmu_data.vmu_projects_col_hash = NULL;
3863247Sgjelinek 	vmu_data.vmu_rusers_col_hash = NULL;
3873247Sgjelinek 	vmu_data.vmu_eusers_col_hash = NULL;
3883247Sgjelinek 
3893247Sgjelinek 	vmu_data.vmu_free_bounds = NULL;
3903247Sgjelinek 	vmu_data.vmu_free_objects = NULL;
3913247Sgjelinek 	vmu_data.vmu_free_entities = NULL;
3923247Sgjelinek 	vmu_data.vmu_free_zones = NULL;
3933247Sgjelinek 
3943247Sgjelinek 	vmu_data.vmu_all_vnodes_hash = mod_hash_create_ptrhash(
3953247Sgjelinek 	    "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object,
3963247Sgjelinek 	    sizeof (vnode_t));
3973247Sgjelinek 	vmu_data.vmu_all_amps_hash = mod_hash_create_ptrhash(
3983247Sgjelinek 	    "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object,
3993247Sgjelinek 	    sizeof (struct anon_map));
4003247Sgjelinek 	vmu_data.vmu_projects_col_hash = mod_hash_create_idhash(
4013247Sgjelinek 	    "vmusage collapsed project hash", VMUSAGE_HASH_SIZE,
4023247Sgjelinek 	    vmu_free_entity);
4033247Sgjelinek 	vmu_data.vmu_rusers_col_hash = mod_hash_create_idhash(
4043247Sgjelinek 	    "vmusage collapsed ruser hash", VMUSAGE_HASH_SIZE,
4053247Sgjelinek 	    vmu_free_entity);
4063247Sgjelinek 	vmu_data.vmu_eusers_col_hash = mod_hash_create_idhash(
4073247Sgjelinek 	    "vmusage collpased euser hash", VMUSAGE_HASH_SIZE,
4083247Sgjelinek 	    vmu_free_entity);
4093247Sgjelinek 	vmu_data.vmu_zones_hash = mod_hash_create_idhash(
4103247Sgjelinek 	    "vmusage zone hash", VMUSAGE_HASH_SIZE, vmu_free_zone);
4113247Sgjelinek 
4123247Sgjelinek 	vmu_bound_cache = kmem_cache_create("vmu_bound_cache",
4133247Sgjelinek 	    sizeof (vmu_bound_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
4143247Sgjelinek 	vmu_object_cache = kmem_cache_create("vmu_object_cache",
4153247Sgjelinek 	    sizeof (vmu_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
4163247Sgjelinek 
4173247Sgjelinek 	vmu_data.vmu_entities = NULL;
4183247Sgjelinek 	vmu_data.vmu_nentities = 0;
4193247Sgjelinek 
4203247Sgjelinek 	vmu_data.vmu_cache = NULL;
4213247Sgjelinek 	vmu_data.vmu_calc_thread = NULL;
4223247Sgjelinek 	vmu_data.vmu_calc_flags = 0;
4233247Sgjelinek 	vmu_data.vmu_pending_flags = 0;
4243247Sgjelinek 	vmu_data.vmu_pending_waiters = 0;
4253247Sgjelinek }
4263247Sgjelinek 
4273247Sgjelinek /*
4283247Sgjelinek  * Allocate hashes for tracking vm objects visited for an entity.
4293247Sgjelinek  * Update list of entities.
4303247Sgjelinek  */
4313247Sgjelinek static vmu_entity_t *
4323247Sgjelinek vmu_alloc_entity(id_t id, int type, id_t zoneid)
4333247Sgjelinek {
4343247Sgjelinek 	vmu_entity_t *entity;
4353247Sgjelinek 
4363247Sgjelinek 	if (vmu_data.vmu_free_entities != NULL) {
4373247Sgjelinek 		entity = vmu_data.vmu_free_entities;
4383247Sgjelinek 		vmu_data.vmu_free_entities =
4393247Sgjelinek 		    vmu_data.vmu_free_entities->vme_next;
4403247Sgjelinek 		bzero(&entity->vme_result, sizeof (vmusage_t));
4413247Sgjelinek 	} else {
4423247Sgjelinek 		entity = kmem_zalloc(sizeof (vmu_entity_t), KM_SLEEP);
4433247Sgjelinek 	}
4443247Sgjelinek 	entity->vme_result.vmu_id = id;
4453247Sgjelinek 	entity->vme_result.vmu_zoneid = zoneid;
4463247Sgjelinek 	entity->vme_result.vmu_type = type;
4473247Sgjelinek 
4483247Sgjelinek 	if (entity->vme_vnode_hash == NULL)
4493247Sgjelinek 		entity->vme_vnode_hash = mod_hash_create_ptrhash(
4503247Sgjelinek 		    "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object,
4513247Sgjelinek 		    sizeof (vnode_t));
4523247Sgjelinek 
4533247Sgjelinek 	if (entity->vme_amp_hash == NULL)
4543247Sgjelinek 		entity->vme_amp_hash = mod_hash_create_ptrhash(
4553247Sgjelinek 		    "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object,
4563247Sgjelinek 		    sizeof (struct anon_map));
4573247Sgjelinek 
4583247Sgjelinek 	if (entity->vme_anon_hash == NULL)
4593247Sgjelinek 		entity->vme_anon_hash = mod_hash_create_ptrhash(
4603247Sgjelinek 		    "vmusage anon hash", VMUSAGE_HASH_SIZE,
4613247Sgjelinek 		    mod_hash_null_valdtor, sizeof (struct anon));
4623247Sgjelinek 
4633247Sgjelinek 	entity->vme_next = vmu_data.vmu_entities;
4643247Sgjelinek 	vmu_data.vmu_entities = entity;
4653247Sgjelinek 	vmu_data.vmu_nentities++;
4663247Sgjelinek 
4673247Sgjelinek 	return (entity);
4683247Sgjelinek }
4693247Sgjelinek 
4703247Sgjelinek /*
4713247Sgjelinek  * Allocate a zone entity, and hashes for tracking visited vm objects
4723247Sgjelinek  * for projects, tasks, and users within that zone.
4733247Sgjelinek  */
4743247Sgjelinek static vmu_zone_t *
4753247Sgjelinek vmu_alloc_zone(id_t id)
4763247Sgjelinek {
4773247Sgjelinek 	vmu_zone_t *zone;
4783247Sgjelinek 
4793247Sgjelinek 	if (vmu_data.vmu_free_zones != NULL) {
4803247Sgjelinek 		zone = vmu_data.vmu_free_zones;
4813247Sgjelinek 		vmu_data.vmu_free_zones =
4823247Sgjelinek 		    vmu_data.vmu_free_zones->vmz_next;
4833247Sgjelinek 		zone->vmz_next = NULL;
4843247Sgjelinek 		zone->vmz_zone = NULL;
4853247Sgjelinek 	} else {
4863247Sgjelinek 		zone = kmem_zalloc(sizeof (vmu_zone_t), KM_SLEEP);
4873247Sgjelinek 	}
4883247Sgjelinek 
4893247Sgjelinek 	zone->vmz_id = id;
4903247Sgjelinek 
4913247Sgjelinek 	if ((vmu_data.vmu_calc_flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES)) != 0)
4923247Sgjelinek 		zone->vmz_zone = vmu_alloc_entity(id, VMUSAGE_ZONE, id);
4933247Sgjelinek 
4943247Sgjelinek 	if ((vmu_data.vmu_calc_flags & (VMUSAGE_PROJECTS |
4953247Sgjelinek 	    VMUSAGE_ALL_PROJECTS)) != 0 && zone->vmz_projects_hash == NULL)
4963247Sgjelinek 		zone->vmz_projects_hash = mod_hash_create_idhash(
4973247Sgjelinek 		    "vmusage project hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
4983247Sgjelinek 
4993247Sgjelinek 	if ((vmu_data.vmu_calc_flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS))
5003247Sgjelinek 	    != 0 && zone->vmz_tasks_hash == NULL)
5013247Sgjelinek 		zone->vmz_tasks_hash = mod_hash_create_idhash(
5023247Sgjelinek 		    "vmusage task hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
5033247Sgjelinek 
5043247Sgjelinek 	if ((vmu_data.vmu_calc_flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS))
5053247Sgjelinek 	    != 0 && zone->vmz_rusers_hash == NULL)
5063247Sgjelinek 		zone->vmz_rusers_hash = mod_hash_create_idhash(
5073247Sgjelinek 		    "vmusage ruser hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
5083247Sgjelinek 
5093247Sgjelinek 	if ((vmu_data.vmu_calc_flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS))
5103247Sgjelinek 	    != 0 && zone->vmz_eusers_hash == NULL)
5113247Sgjelinek 		zone->vmz_eusers_hash = mod_hash_create_idhash(
5123247Sgjelinek 		    "vmusage euser hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
5133247Sgjelinek 
5143247Sgjelinek 	return (zone);
5153247Sgjelinek }
5163247Sgjelinek 
5173247Sgjelinek /*
5183247Sgjelinek  * Allocate a structure for tracking visited bounds for a vm object.
5193247Sgjelinek  */
5203247Sgjelinek static vmu_object_t *
5213247Sgjelinek vmu_alloc_object(caddr_t key, int type)
5223247Sgjelinek {
5233247Sgjelinek 	vmu_object_t *object;
5243247Sgjelinek 
5253247Sgjelinek 	if (vmu_data.vmu_free_objects != NULL) {
5263247Sgjelinek 		object = vmu_data.vmu_free_objects;
5273247Sgjelinek 		vmu_data.vmu_free_objects =
5283247Sgjelinek 		    vmu_data.vmu_free_objects->vmo_next;
5293247Sgjelinek 	} else {
5303247Sgjelinek 		object = kmem_cache_alloc(vmu_object_cache, KM_SLEEP);
5313247Sgjelinek 	}
5323247Sgjelinek 
5333247Sgjelinek 	object->vmo_key = key;
5343247Sgjelinek 	object->vmo_type = type;
5353247Sgjelinek 	object->vmo_bounds = NULL;
5363247Sgjelinek 
5373247Sgjelinek 	return (object);
5383247Sgjelinek }
5393247Sgjelinek 
5403247Sgjelinek /*
5413247Sgjelinek  * Allocate and return a bound structure.
5423247Sgjelinek  */
5433247Sgjelinek static vmu_bound_t *
5443247Sgjelinek vmu_alloc_bound()
5453247Sgjelinek {
5463247Sgjelinek 	vmu_bound_t *bound;
5473247Sgjelinek 
5483247Sgjelinek 	if (vmu_data.vmu_free_bounds != NULL) {
5493247Sgjelinek 		bound = vmu_data.vmu_free_bounds;
5503247Sgjelinek 		vmu_data.vmu_free_bounds =
5513247Sgjelinek 		    vmu_data.vmu_free_bounds->vmb_next;
5523247Sgjelinek 		bzero(bound, sizeof (vmu_bound_t));
5533247Sgjelinek 	} else {
5543247Sgjelinek 		bound = kmem_cache_alloc(vmu_bound_cache, KM_SLEEP);
5553247Sgjelinek 		bzero(bound, sizeof (vmu_bound_t));
5563247Sgjelinek 	}
5573247Sgjelinek 	return (bound);
5583247Sgjelinek }
5593247Sgjelinek 
5603247Sgjelinek /*
5613247Sgjelinek  * vmu_find_insert_* functions implement hash lookup or allocate and
5623247Sgjelinek  * insert operations.
5633247Sgjelinek  */
5643247Sgjelinek static vmu_object_t *
5653247Sgjelinek vmu_find_insert_object(mod_hash_t *hash, caddr_t key, uint_t type)
5663247Sgjelinek {
5673247Sgjelinek 	int ret;
5683247Sgjelinek 	vmu_object_t *object;
5693247Sgjelinek 
5703247Sgjelinek 	ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key,
5713247Sgjelinek 	    (mod_hash_val_t *)&object);
5723247Sgjelinek 	if (ret != 0) {
5733247Sgjelinek 		object = vmu_alloc_object(key, type);
5743247Sgjelinek 		ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key,
5753247Sgjelinek 		    (mod_hash_val_t)object, (mod_hash_hndl_t)0);
5763247Sgjelinek 		ASSERT(ret == 0);
5773247Sgjelinek 	}
5783247Sgjelinek 	return (object);
5793247Sgjelinek }
5803247Sgjelinek 
5813247Sgjelinek static int
5823247Sgjelinek vmu_find_insert_anon(mod_hash_t *hash, caddr_t key)
5833247Sgjelinek {
5843247Sgjelinek 	int ret;
5853247Sgjelinek 	caddr_t val;
5863247Sgjelinek 
5873247Sgjelinek 	ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key,
5883247Sgjelinek 	    (mod_hash_val_t *)&val);
5893247Sgjelinek 
5903247Sgjelinek 	if (ret == 0)
5913247Sgjelinek 		return (0);
5923247Sgjelinek 
5933247Sgjelinek 	ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key,
5943247Sgjelinek 	    (mod_hash_val_t)key, (mod_hash_hndl_t)0);
5953247Sgjelinek 
5963247Sgjelinek 	ASSERT(ret == 0);
5973247Sgjelinek 
5983247Sgjelinek 	return (1);
5993247Sgjelinek }
6003247Sgjelinek 
6013247Sgjelinek static vmu_entity_t *
6023247Sgjelinek vmu_find_insert_entity(mod_hash_t *hash, id_t id, uint_t type, id_t zoneid)
6033247Sgjelinek {
6043247Sgjelinek 	int ret;
6053247Sgjelinek 	vmu_entity_t *entity;
6063247Sgjelinek 
6073247Sgjelinek 	ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)(uintptr_t)id,
6083247Sgjelinek 	    (mod_hash_val_t *)&entity);
6093247Sgjelinek 	if (ret != 0) {
6103247Sgjelinek 		entity = vmu_alloc_entity(id, type, zoneid);
6113247Sgjelinek 		ret = i_mod_hash_insert_nosync(hash,
6123247Sgjelinek 		    (mod_hash_key_t)(uintptr_t)id, (mod_hash_val_t)entity,
6133247Sgjelinek 		    (mod_hash_hndl_t)0);
6143247Sgjelinek 		ASSERT(ret == 0);
6153247Sgjelinek 	}
6163247Sgjelinek 	return (entity);
6173247Sgjelinek }
6183247Sgjelinek 
6193247Sgjelinek 
6203247Sgjelinek 
6213247Sgjelinek 
6223247Sgjelinek /*
6233247Sgjelinek  * Returns list of object bounds between start and end.  New bounds inserted
6243247Sgjelinek  * by this call are given type.
6253247Sgjelinek  *
6263247Sgjelinek  * Returns the number of pages covered if new bounds are created.  Returns 0
6273247Sgjelinek  * if region between start/end consists of all existing bounds.
6283247Sgjelinek  */
6293247Sgjelinek static pgcnt_t
6303247Sgjelinek vmu_insert_lookup_object_bounds(vmu_object_t *ro, pgcnt_t start, pgcnt_t
6313247Sgjelinek     end, char type, vmu_bound_t **first, vmu_bound_t **last)
6323247Sgjelinek {
6333247Sgjelinek 	vmu_bound_t *next;
6343247Sgjelinek 	vmu_bound_t *prev = NULL;
6353247Sgjelinek 	vmu_bound_t *tmp = NULL;
6363247Sgjelinek 	pgcnt_t ret = 0;
6373247Sgjelinek 
6383247Sgjelinek 	*first = *last = NULL;
6393247Sgjelinek 
6403247Sgjelinek 	for (next = ro->vmo_bounds; next != NULL; next = next->vmb_next) {
6413247Sgjelinek 		/*
6423247Sgjelinek 		 * Find bounds overlapping or overlapped by range [start,end].
6433247Sgjelinek 		 */
6443247Sgjelinek 		if (start > next->vmb_end) {
6453247Sgjelinek 			/* bound is before new bound */
6463247Sgjelinek 			prev = next;
6473247Sgjelinek 			continue;
6483247Sgjelinek 		}
6493247Sgjelinek 		if (next->vmb_start > end) {
6503247Sgjelinek 			/* bound is after new bound */
6513247Sgjelinek 			break;
6523247Sgjelinek 		}
6533247Sgjelinek 		if (*first == NULL)
6543247Sgjelinek 			*first = next;
6553247Sgjelinek 		*last = next;
6563247Sgjelinek 	}
6573247Sgjelinek 
6583247Sgjelinek 	if (*first == NULL) {
6593247Sgjelinek 		ASSERT(*last == NULL);
6603247Sgjelinek 		/*
6613247Sgjelinek 		 * No bounds overlapping range [start,end], so create new
6623247Sgjelinek 		 * bound
6633247Sgjelinek 		 */
6643247Sgjelinek 		tmp = vmu_alloc_bound();
6653247Sgjelinek 		tmp->vmb_start = start;
6663247Sgjelinek 		tmp->vmb_end = end;
6673247Sgjelinek 		tmp->vmb_type = type;
6683247Sgjelinek 		if (prev == NULL) {
6693247Sgjelinek 			tmp->vmb_next = ro->vmo_bounds;
6703247Sgjelinek 			ro->vmo_bounds = tmp;
6713247Sgjelinek 		} else {
6723247Sgjelinek 			tmp->vmb_next = prev->vmb_next;
6733247Sgjelinek 			prev->vmb_next = tmp;
6743247Sgjelinek 		}
6753247Sgjelinek 		*first = tmp;
6763247Sgjelinek 		*last = tmp;
6773247Sgjelinek 		ASSERT(tmp->vmb_end >= tmp->vmb_start);
6783247Sgjelinek 		ret = tmp->vmb_end - tmp->vmb_start + 1;
6793247Sgjelinek 		return (ret);
6803247Sgjelinek 	}
6813247Sgjelinek 
6823247Sgjelinek 	/* Check to see if start is before first known bound */
6833247Sgjelinek 	ASSERT(first != NULL && last != NULL);
6843247Sgjelinek 	next = (*first);
6853247Sgjelinek 	if (start < (*first)->vmb_start) {
6863247Sgjelinek 		/* Create new bound before first bound */
6873247Sgjelinek 		tmp = vmu_alloc_bound();
6883247Sgjelinek 		tmp->vmb_start = start;
6893247Sgjelinek 		tmp->vmb_end = (*first)->vmb_start - 1;
6903247Sgjelinek 		tmp->vmb_type = type;
6913247Sgjelinek 		tmp->vmb_next = *first;
6923247Sgjelinek 		if (*first == ro->vmo_bounds)
6933247Sgjelinek 			ro->vmo_bounds = tmp;
6943247Sgjelinek 		if (prev != NULL)
6953247Sgjelinek 			prev->vmb_next = tmp;
6963247Sgjelinek 		ASSERT(tmp->vmb_end >= tmp->vmb_start);
6973247Sgjelinek 		ret += tmp->vmb_end - tmp->vmb_start + 1;
6983247Sgjelinek 		*first = tmp;
6993247Sgjelinek 	}
7003247Sgjelinek 	/*
7013247Sgjelinek 	 * Between start and end, search for gaps between and after existing
7023247Sgjelinek 	 * bounds.  Create new bounds to fill gaps if they exist.
7033247Sgjelinek 	 */
7043247Sgjelinek 	while (end > next->vmb_end) {
7053247Sgjelinek 		/*
7063247Sgjelinek 		 * Check for gap between bound and next bound. if no gap,
7073247Sgjelinek 		 * continue.
7083247Sgjelinek 		 */
7093247Sgjelinek 		if ((next != *last) &&
7103247Sgjelinek 		    ((next->vmb_end + 1) == next->vmb_next->vmb_start)) {
7113247Sgjelinek 			next = next->vmb_next;
7123247Sgjelinek 			continue;
7133247Sgjelinek 		}
7143247Sgjelinek 		/*
7153247Sgjelinek 		 * Insert new bound in gap after bound, and before next
7163247Sgjelinek 		 * bound if next bound exists.
7173247Sgjelinek 		 */
7183247Sgjelinek 		tmp = vmu_alloc_bound();
7193247Sgjelinek 		tmp->vmb_type = type;
7203247Sgjelinek 		tmp->vmb_next = next->vmb_next;
7213247Sgjelinek 		tmp->vmb_start = next->vmb_end + 1;
7223247Sgjelinek 
7233247Sgjelinek 		if (next != *last) {
7243247Sgjelinek 			tmp->vmb_end = next->vmb_next->vmb_start - 1;
7253247Sgjelinek 			ASSERT(tmp->vmb_end >= tmp->vmb_start);
7263247Sgjelinek 			ret += tmp->vmb_end - tmp->vmb_start + 1;
7273247Sgjelinek 			next->vmb_next = tmp;
7283247Sgjelinek 			next = tmp->vmb_next;
7293247Sgjelinek 		} else {
7303247Sgjelinek 			tmp->vmb_end = end;
7313247Sgjelinek 			ASSERT(tmp->vmb_end >= tmp->vmb_start);
7323247Sgjelinek 			ret += tmp->vmb_end - tmp->vmb_start + 1;
7333247Sgjelinek 			next->vmb_next = tmp;
7343247Sgjelinek 			*last = tmp;
7353247Sgjelinek 			break;
7363247Sgjelinek 		}
7373247Sgjelinek 	}
7383247Sgjelinek 	return (ret);
7393247Sgjelinek }
7403247Sgjelinek 
7413247Sgjelinek /*
7423247Sgjelinek  * vmu_update_bounds()
7433247Sgjelinek  *
7443247Sgjelinek  * first, last:	list of continuous bounds, of which zero or more are of
7453247Sgjelinek  * 		type VMUSAGE_BOUND_UNKNOWN.
7463247Sgjelinek  *
7473247Sgjelinek  * new_first, new_last:	list of continuous bounds, of which none are of
7483247Sgjelinek  *			type VMUSAGE_BOUND_UNKNOWN.  These bounds are used to
7493247Sgjelinek  *			update the types of bounds in (first,last) with
7503247Sgjelinek  *			type VMUSAGE_BOUND_UNKNOWN.
7513247Sgjelinek  *
7523247Sgjelinek  * For the list of bounds (first,last), this function updates any bounds
7533247Sgjelinek  * with type VMUSAGE_BOUND_UNKNOWN using the type of the corresponding bound in
7543247Sgjelinek  * the list (new_first, new_last).
7553247Sgjelinek  *
7563247Sgjelinek  * If a bound of type VMUSAGE_BOUND_UNKNOWN spans multiple bounds in the list
7573247Sgjelinek  * (new_first, new_last), it will be split into multiple bounds.
7583247Sgjelinek  *
7593247Sgjelinek  * Return value:
7603247Sgjelinek  * 	The number of pages in the list of bounds (first,last) that were of
7613247Sgjelinek  *	type VMUSAGE_BOUND_UNKNOWN, which have been updated to be of type
7623247Sgjelinek  *	VMUSAGE_BOUND_INCORE.
7633247Sgjelinek  *
7643247Sgjelinek  */
7653247Sgjelinek static pgcnt_t
7663247Sgjelinek vmu_update_bounds(vmu_bound_t **first, vmu_bound_t **last,
7673247Sgjelinek     vmu_bound_t *new_first, vmu_bound_t *new_last)
7683247Sgjelinek {
7693247Sgjelinek 	vmu_bound_t *next, *new_next, *tmp;
7703247Sgjelinek 	pgcnt_t rss = 0;
7713247Sgjelinek 
7723247Sgjelinek 	next = *first;
7733247Sgjelinek 	new_next = new_first;
7743247Sgjelinek 
7753671Ssl108498 	/*
7763671Ssl108498 	 * Verify first and last bound are covered by new bounds if they
7773671Ssl108498 	 * have unknown type.
7783671Ssl108498 	 */
7793671Ssl108498 	ASSERT((*first)->vmb_type != VMUSAGE_BOUND_UNKNOWN ||
7803671Ssl108498 	    (*first)->vmb_start >= new_next->vmb_start);
7813671Ssl108498 	ASSERT((*last)->vmb_type != VMUSAGE_BOUND_UNKNOWN ||
7823671Ssl108498 	    (*last)->vmb_end <= new_last->vmb_end);
7833247Sgjelinek 	for (;;) {
7843247Sgjelinek 		/* If bound already has type, proceed to next bound */
7853247Sgjelinek 		if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
7863247Sgjelinek 			if (next == *last)
7873247Sgjelinek 				break;
7883247Sgjelinek 			next = next->vmb_next;
7893247Sgjelinek 			continue;
7903247Sgjelinek 		}
7913247Sgjelinek 		while (new_next->vmb_end < next->vmb_start)
7923247Sgjelinek 			new_next = new_next->vmb_next;
7933247Sgjelinek 		ASSERT(new_next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
7943247Sgjelinek 		next->vmb_type = new_next->vmb_type;
7953247Sgjelinek 		if (new_next->vmb_end < next->vmb_end) {
7963247Sgjelinek 			/* need to split bound */
7973247Sgjelinek 			tmp = vmu_alloc_bound();
7983247Sgjelinek 			tmp->vmb_type = VMUSAGE_BOUND_UNKNOWN;
7993247Sgjelinek 			tmp->vmb_start = new_next->vmb_end + 1;
8003247Sgjelinek 			tmp->vmb_end = next->vmb_end;
8013247Sgjelinek 			tmp->vmb_next = next->vmb_next;
8023247Sgjelinek 			next->vmb_end = new_next->vmb_end;
8033247Sgjelinek 			next->vmb_next = tmp;
8043247Sgjelinek 			if (*last == next)
8053247Sgjelinek 				*last = tmp;
8063247Sgjelinek 			if (next->vmb_type == VMUSAGE_BOUND_INCORE)
8073247Sgjelinek 				rss += next->vmb_end - next->vmb_start + 1;
8083247Sgjelinek 			next = tmp;
8093247Sgjelinek 		} else {
8103247Sgjelinek 			if (next->vmb_type == VMUSAGE_BOUND_INCORE)
8113247Sgjelinek 				rss += next->vmb_end - next->vmb_start + 1;
8123247Sgjelinek 			if (next == *last)
8133247Sgjelinek 				break;
8143247Sgjelinek 			next = next->vmb_next;
8153247Sgjelinek 		}
8163247Sgjelinek 	}
8173247Sgjelinek 	return (rss);
8183247Sgjelinek }
8193247Sgjelinek 
8203247Sgjelinek /*
8213247Sgjelinek  * merges adjacent bounds with same type between first and last bound.
8223247Sgjelinek  * After merge, last pointer is no longer valid, as last bound may be
8233247Sgjelinek  * merged away.
8243247Sgjelinek  */
8253247Sgjelinek static void
8263247Sgjelinek vmu_merge_bounds(vmu_bound_t **first, vmu_bound_t **last)
8273247Sgjelinek {
8283247Sgjelinek 	vmu_bound_t *next;
8293247Sgjelinek 	vmu_bound_t *tmp;
8303247Sgjelinek 
8313247Sgjelinek 	ASSERT(*first != NULL);
8323247Sgjelinek 	ASSERT(*last != NULL);
8333247Sgjelinek 
8343247Sgjelinek 	next = *first;
8353247Sgjelinek 	while (next != *last) {
8363247Sgjelinek 
8373247Sgjelinek 		/* If bounds are adjacent and have same type, merge them */
8383247Sgjelinek 		if (((next->vmb_end + 1) == next->vmb_next->vmb_start) &&
8393247Sgjelinek 		    (next->vmb_type == next->vmb_next->vmb_type)) {
8403247Sgjelinek 			tmp = next->vmb_next;
8413247Sgjelinek 			next->vmb_end = tmp->vmb_end;
8423247Sgjelinek 			next->vmb_next = tmp->vmb_next;
8433247Sgjelinek 			vmu_free_bound(tmp);
8443247Sgjelinek 			if (tmp == *last)
8453247Sgjelinek 				*last = next;
8463247Sgjelinek 		} else {
8473247Sgjelinek 			next = next->vmb_next;
8483247Sgjelinek 		}
8493247Sgjelinek 	}
8503247Sgjelinek }
8513247Sgjelinek 
8523247Sgjelinek /*
8533247Sgjelinek  * Given an amp and a list of bounds, updates each bound's type with
8543247Sgjelinek  * VMUSAGE_BOUND_INCORE or VMUSAGE_BOUND_NOT_INCORE.
8553247Sgjelinek  *
8563247Sgjelinek  * If a bound is partially incore, it will be split into two bounds.
8573247Sgjelinek  * first and last may be modified, as bounds may be split into multiple
8583247Sgjelinek  * bounds if the are partially incore/not-incore.
8593247Sgjelinek  *
8603247Sgjelinek  * Set incore to non-zero if bounds are already known to be incore
8613247Sgjelinek  *
8623247Sgjelinek  */
8633247Sgjelinek static void
8643247Sgjelinek vmu_amp_update_incore_bounds(struct anon_map *amp, vmu_bound_t **first,
8653247Sgjelinek     vmu_bound_t **last, boolean_t incore)
8663247Sgjelinek {
8673247Sgjelinek 	vmu_bound_t *next;
8683247Sgjelinek 	vmu_bound_t *tmp;
8693247Sgjelinek 	pgcnt_t index;
8703247Sgjelinek 	short bound_type;
8713247Sgjelinek 	short page_type;
8723247Sgjelinek 	vnode_t *vn;
8733247Sgjelinek 	anoff_t off;
8743247Sgjelinek 	struct anon *ap;
8753247Sgjelinek 
8763247Sgjelinek 	next = *first;
8773247Sgjelinek 	/* Shared anon slots don't change once set */
8783247Sgjelinek 	ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
8793247Sgjelinek 	for (;;) {
8803247Sgjelinek 		if (incore == B_TRUE)
8813247Sgjelinek 			next->vmb_type = VMUSAGE_BOUND_INCORE;
8823247Sgjelinek 
8833247Sgjelinek 		if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
8843247Sgjelinek 			if (next == *last)
8853247Sgjelinek 				break;
8863247Sgjelinek 			next = next->vmb_next;
8873247Sgjelinek 			continue;
8883247Sgjelinek 		}
8893247Sgjelinek 		bound_type = next->vmb_type;
8903247Sgjelinek 		index = next->vmb_start;
8913247Sgjelinek 		while (index <= next->vmb_end) {
8923247Sgjelinek 
8933247Sgjelinek 			/*
8943247Sgjelinek 			 * These are used to determine how much to increment
8953247Sgjelinek 			 * index when a large page is found.
8963247Sgjelinek 			 */
8973247Sgjelinek 			page_t *page;
8983247Sgjelinek 			pgcnt_t pgcnt = 1;
8993247Sgjelinek 			uint_t pgshft;
9003247Sgjelinek 			pgcnt_t pgmsk;
9013247Sgjelinek 
9023247Sgjelinek 			ap = anon_get_ptr(amp->ahp, index);
9033247Sgjelinek 			if (ap != NULL)
9043247Sgjelinek 				swap_xlate(ap, &vn, &off);
9053247Sgjelinek 
9063247Sgjelinek 			if (ap != NULL && vn != NULL && vn->v_pages != NULL &&
9073247Sgjelinek 			    (page = page_exists(vn, off)) != NULL) {
9083247Sgjelinek 				page_type = VMUSAGE_BOUND_INCORE;
9093247Sgjelinek 				if (page->p_szc > 0) {
9103247Sgjelinek 					pgcnt = page_get_pagecnt(page->p_szc);
9113247Sgjelinek 					pgshft = page_get_shift(page->p_szc);
9123247Sgjelinek 					pgmsk = (0x1 << (pgshft - PAGESHIFT))
9133247Sgjelinek 					    - 1;
9143247Sgjelinek 				}
9153247Sgjelinek 			} else {
9163247Sgjelinek 				page_type = VMUSAGE_BOUND_NOT_INCORE;
9173247Sgjelinek 			}
9183247Sgjelinek 			if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
9193247Sgjelinek 				next->vmb_type = page_type;
9203247Sgjelinek 			} else if (next->vmb_type != page_type) {
9213247Sgjelinek 				/*
9223247Sgjelinek 				 * if current bound type does not match page
9233247Sgjelinek 				 * type, need to split off new bound.
9243247Sgjelinek 				 */
9253247Sgjelinek 				tmp = vmu_alloc_bound();
9263247Sgjelinek 				tmp->vmb_type = page_type;
9273247Sgjelinek 				tmp->vmb_start = index;
9283247Sgjelinek 				tmp->vmb_end = next->vmb_end;
9293247Sgjelinek 				tmp->vmb_next = next->vmb_next;
9303247Sgjelinek 				next->vmb_end = index - 1;
9313247Sgjelinek 				next->vmb_next = tmp;
9323247Sgjelinek 				if (*last == next)
9333247Sgjelinek 					*last = tmp;
9343247Sgjelinek 				next = tmp;
9353247Sgjelinek 			}
9363247Sgjelinek 			if (pgcnt > 1) {
9373247Sgjelinek 				/*
9383247Sgjelinek 				 * If inside large page, jump to next large
9393247Sgjelinek 				 * page
9403247Sgjelinek 				 */
9413247Sgjelinek 				index = (index & ~pgmsk) + pgcnt;
9423247Sgjelinek 			} else {
9433247Sgjelinek 				index++;
9443247Sgjelinek 			}
9453247Sgjelinek 		}
9463247Sgjelinek 		if (next == *last) {
9473247Sgjelinek 			ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
9483247Sgjelinek 			break;
9493247Sgjelinek 		} else
9503247Sgjelinek 			next = next->vmb_next;
9513247Sgjelinek 	}
9523247Sgjelinek 	ANON_LOCK_EXIT(&amp->a_rwlock);
9533247Sgjelinek }
9543247Sgjelinek 
9553247Sgjelinek /*
9563247Sgjelinek  * Same as vmu_amp_update_incore_bounds(), except for tracking
9573247Sgjelinek  * incore-/not-incore for vnodes.
9583247Sgjelinek  */
9593247Sgjelinek static void
9603247Sgjelinek vmu_vnode_update_incore_bounds(vnode_t *vnode, vmu_bound_t **first,
9613247Sgjelinek     vmu_bound_t **last)
9623247Sgjelinek {
9633247Sgjelinek 	vmu_bound_t *next;
9643247Sgjelinek 	vmu_bound_t *tmp;
9653247Sgjelinek 	pgcnt_t index;
9663247Sgjelinek 	short bound_type;
9673247Sgjelinek 	short page_type;
9683247Sgjelinek 
9693247Sgjelinek 	next = *first;
9703247Sgjelinek 	for (;;) {
9713247Sgjelinek 		if (vnode->v_pages == NULL)
9723247Sgjelinek 			next->vmb_type = VMUSAGE_BOUND_NOT_INCORE;
9733247Sgjelinek 
9743247Sgjelinek 		if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
9753247Sgjelinek 			if (next == *last)
9763247Sgjelinek 				break;
9773247Sgjelinek 			next = next->vmb_next;
9783247Sgjelinek 			continue;
9793247Sgjelinek 		}
9803247Sgjelinek 
9813247Sgjelinek 		bound_type = next->vmb_type;
9823247Sgjelinek 		index = next->vmb_start;
9833247Sgjelinek 		while (index <= next->vmb_end) {
9843247Sgjelinek 
9853247Sgjelinek 			/*
9863247Sgjelinek 			 * These are used to determine how much to increment
9873247Sgjelinek 			 * index when a large page is found.
9883247Sgjelinek 			 */
9893247Sgjelinek 			page_t *page;
9903247Sgjelinek 			pgcnt_t pgcnt = 1;
9913247Sgjelinek 			uint_t pgshft;
9923247Sgjelinek 			pgcnt_t pgmsk;
9933247Sgjelinek 
9943247Sgjelinek 			if (vnode->v_pages != NULL &&
9953247Sgjelinek 			    (page = page_exists(vnode, ptob(index))) != NULL) {
9963247Sgjelinek 				page_type = VMUSAGE_BOUND_INCORE;
9973247Sgjelinek 				if (page->p_szc > 0) {
9983247Sgjelinek 					pgcnt = page_get_pagecnt(page->p_szc);
9993247Sgjelinek 					pgshft = page_get_shift(page->p_szc);
10003247Sgjelinek 					pgmsk = (0x1 << (pgshft - PAGESHIFT))
10013247Sgjelinek 					    - 1;
10023247Sgjelinek 				}
10033247Sgjelinek 			} else {
10043247Sgjelinek 				page_type = VMUSAGE_BOUND_NOT_INCORE;
10053247Sgjelinek 			}
10063247Sgjelinek 			if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
10073247Sgjelinek 				next->vmb_type = page_type;
10083247Sgjelinek 			} else if (next->vmb_type != page_type) {
10093247Sgjelinek 				/*
10103247Sgjelinek 				 * if current bound type does not match page
10113247Sgjelinek 				 * type, need to split off new bound.
10123247Sgjelinek 				 */
10133247Sgjelinek 				tmp = vmu_alloc_bound();
10143247Sgjelinek 				tmp->vmb_type = page_type;
10153247Sgjelinek 				tmp->vmb_start = index;
10163247Sgjelinek 				tmp->vmb_end = next->vmb_end;
10173247Sgjelinek 				tmp->vmb_next = next->vmb_next;
10183247Sgjelinek 				next->vmb_end = index - 1;
10193247Sgjelinek 				next->vmb_next = tmp;
10203247Sgjelinek 				if (*last == next)
10213247Sgjelinek 					*last = tmp;
10223247Sgjelinek 				next = tmp;
10233247Sgjelinek 			}
10243247Sgjelinek 			if (pgcnt > 1) {
10253247Sgjelinek 				/*
10263247Sgjelinek 				 * If inside large page, jump to next large
10273247Sgjelinek 				 * page
10283247Sgjelinek 				 */
10293247Sgjelinek 				index = (index & ~pgmsk) + pgcnt;
10303247Sgjelinek 			} else {
10313247Sgjelinek 				index++;
10323247Sgjelinek 			}
10333247Sgjelinek 		}
10343247Sgjelinek 		if (next == *last) {
10353247Sgjelinek 			ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
10363247Sgjelinek 			break;
10373247Sgjelinek 		} else
10383247Sgjelinek 			next = next->vmb_next;
10393247Sgjelinek 	}
10403247Sgjelinek }
10413247Sgjelinek 
10423247Sgjelinek /*
10433247Sgjelinek  * Calculate the rss and swap consumed by a segment.  vmu_entities is the
10443247Sgjelinek  * list of entities to visit.  For shared segments, the vnode or amp
10453247Sgjelinek  * is looked up in each entity to see if has been already counted.  Private
10463247Sgjelinek  * anon pages are checked per entity to ensure that cow pages are not
10473247Sgjelinek  * double counted.
10483247Sgjelinek  *
10493247Sgjelinek  * For private mapped files, first the amp is checked for private pages.
10503247Sgjelinek  * Bounds not backed by the amp are looked up in the vnode for each entity
10513247Sgjelinek  * to avoid double counting of private COW vnode pages.
10523247Sgjelinek  */
10533247Sgjelinek static void
10543247Sgjelinek vmu_calculate_seg(vmu_entity_t *vmu_entities, struct seg *seg)
10553247Sgjelinek {
10563247Sgjelinek 	struct segvn_data *svd;
10573247Sgjelinek 	struct shm_data *shmd;
10583247Sgjelinek 	struct spt_data *sptd;
10593247Sgjelinek 	vmu_object_t *shared_object = NULL;
10603247Sgjelinek 	vmu_object_t *entity_object = NULL;
10613247Sgjelinek 	vmu_entity_t *entity;
10623247Sgjelinek 	vmusage_t *result;
10633247Sgjelinek 	vmu_bound_t *first = NULL;
10643247Sgjelinek 	vmu_bound_t *last = NULL;
10653247Sgjelinek 	vmu_bound_t *cur = NULL;
10663247Sgjelinek 	vmu_bound_t *e_first = NULL;
10673247Sgjelinek 	vmu_bound_t *e_last = NULL;
10683247Sgjelinek 	vmu_bound_t *tmp;
10693247Sgjelinek 	pgcnt_t p_index, s_index, p_start, p_end, s_start, s_end, rss, virt;
10703247Sgjelinek 	struct anon_map *private_amp = NULL;
10713247Sgjelinek 	boolean_t incore = B_FALSE;
10723247Sgjelinek 	boolean_t shared = B_FALSE;
10733247Sgjelinek 	int file = 0;
10743247Sgjelinek 	pgcnt_t swresv = 0;
10753247Sgjelinek 	pgcnt_t panon = 0;
10763247Sgjelinek 
10773247Sgjelinek 	/* Can zero-length segments exist?  Not sure, so parenoia */
10783247Sgjelinek 	if (seg->s_size <= 0)
10793247Sgjelinek 		return;
10803247Sgjelinek 
10813247Sgjelinek 	/*
10823247Sgjelinek 	 * Figure out if there is a shared object (such as a named vnode or
10833247Sgjelinek 	 * a shared amp, then figure out if there is a private amp, which
10843247Sgjelinek 	 * identifies private pages.
10853247Sgjelinek 	 */
10863247Sgjelinek 	if (seg->s_ops == &segvn_ops) {
10873247Sgjelinek 		svd = (struct segvn_data *)seg->s_data;
10883247Sgjelinek 		if (svd->type == MAP_SHARED)
10893247Sgjelinek 			shared = B_TRUE;
10903247Sgjelinek 		else
10913247Sgjelinek 			swresv = svd->swresv;
10923247Sgjelinek 
10933247Sgjelinek 		if (svd->vp != NULL) {
10943247Sgjelinek 			file = 1;
10953247Sgjelinek 			shared_object = vmu_find_insert_object(
10963247Sgjelinek 			    vmu_data.vmu_all_vnodes_hash, (caddr_t)svd->vp,
10973247Sgjelinek 			    VMUSAGE_TYPE_VNODE);
10983247Sgjelinek 			s_start = btop(svd->offset);
10993247Sgjelinek 			s_end = btop(svd->offset + seg->s_size) - 1;
11003247Sgjelinek 		}
11013247Sgjelinek 		if (svd->amp != NULL && svd->type == MAP_SHARED) {
11023247Sgjelinek 			ASSERT(shared_object == NULL);
11033247Sgjelinek 			shared_object = vmu_find_insert_object(
11043247Sgjelinek 			    vmu_data.vmu_all_amps_hash, (caddr_t)svd->amp,
11053247Sgjelinek 			    VMUSAGE_TYPE_AMP);
11063247Sgjelinek 			s_start = svd->anon_index;
11073247Sgjelinek 			s_end = svd->anon_index + btop(seg->s_size) - 1;
11083247Sgjelinek 			/* schedctl mappings are always in core */
11093247Sgjelinek 			if (svd->amp->swresv == 0)
11103247Sgjelinek 				incore = B_TRUE;
11113247Sgjelinek 		}
11124426Saguzovsk 		SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
11134426Saguzovsk 		/*
11144426Saguzovsk 		 * Text replication anon maps can be shared across all zones.
11154426Saguzovsk 		 * Space used for text replication is typically capped as
11164426Saguzovsk 		 * small % of memory.  To keep it simple for now we don't
11174426Saguzovsk 		 * account for swap and memory space used for text replication.
11184426Saguzovsk 		 */
11194426Saguzovsk 		if (svd->tr_state == SEGVN_TR_OFF && svd->amp != NULL &&
11204426Saguzovsk 		    svd->type == MAP_PRIVATE) {
11213247Sgjelinek 			private_amp = svd->amp;
11223247Sgjelinek 			p_start = svd->anon_index;
11233247Sgjelinek 			p_end = svd->anon_index + btop(seg->s_size) - 1;
11243247Sgjelinek 		}
11254426Saguzovsk 		SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
11263247Sgjelinek 	} else if (seg->s_ops == &segspt_shmops) {
11273247Sgjelinek 		shared = B_TRUE;
11283247Sgjelinek 		shmd = (struct shm_data *)seg->s_data;
11293247Sgjelinek 		shared_object = vmu_find_insert_object(
11303247Sgjelinek 		    vmu_data.vmu_all_amps_hash, (caddr_t)shmd->shm_amp,
11313247Sgjelinek 		    VMUSAGE_TYPE_AMP);
11323247Sgjelinek 		s_start = 0;
11333247Sgjelinek 		s_end = btop(seg->s_size) - 1;
11343247Sgjelinek 		sptd = shmd->shm_sptseg->s_data;
11353247Sgjelinek 
11363247Sgjelinek 		/* ism segments are always incore and do not reserve swap */
11373247Sgjelinek 		if (sptd->spt_flags & SHM_SHARE_MMU)
11383247Sgjelinek 			incore = B_TRUE;
11393247Sgjelinek 
11403247Sgjelinek 	} else {
11413247Sgjelinek 		return;
11423247Sgjelinek 	}
11433247Sgjelinek 
11443247Sgjelinek 	/*
11453247Sgjelinek 	 * If there is a private amp, count anon pages that exist.  If an
11463247Sgjelinek 	 * anon has a refcnt > 1 (cow sharing), then save the anon in a
11473247Sgjelinek 	 * hash so that it is not double counted.
11483247Sgjelinek 	 *
11493247Sgjelinek 	 * If there is also a shared object, they figure out the bounds
11503247Sgjelinek 	 * which are not mapped by the private amp.
11513247Sgjelinek 	 */
11523247Sgjelinek 	if (private_amp != NULL) {
11533247Sgjelinek 
11543247Sgjelinek 		/* Enter as writer to prevent cow anons from being freed */
11553247Sgjelinek 		ANON_LOCK_ENTER(&private_amp->a_rwlock, RW_WRITER);
11563247Sgjelinek 
11573247Sgjelinek 		p_index = p_start;
11583247Sgjelinek 		s_index = s_start;
11593247Sgjelinek 
11603247Sgjelinek 		while (p_index <= p_end) {
11613247Sgjelinek 
11623247Sgjelinek 			pgcnt_t p_index_next;
11633247Sgjelinek 			pgcnt_t p_bound_size;
11643247Sgjelinek 			int cnt;
11653247Sgjelinek 			anoff_t off;
11663247Sgjelinek 			struct vnode *vn;
11673247Sgjelinek 			struct anon *ap;
11683247Sgjelinek 			page_t *page;		/* For handling of large */
11693247Sgjelinek 			pgcnt_t pgcnt = 1;	/* pages */
11703247Sgjelinek 			pgcnt_t pgstart;
11713247Sgjelinek 			pgcnt_t pgend;
11723247Sgjelinek 			uint_t pgshft;
11733247Sgjelinek 			pgcnt_t pgmsk;
11743247Sgjelinek 
11753247Sgjelinek 			p_index_next = p_index;
11763247Sgjelinek 			ap = anon_get_next_ptr(private_amp->ahp,
11773247Sgjelinek 			    &p_index_next);
11783247Sgjelinek 
11793247Sgjelinek 			/*
11803247Sgjelinek 			 * If next anon is past end of mapping, simulate
11813247Sgjelinek 			 * end of anon so loop terminates.
11823247Sgjelinek 			 */
11833247Sgjelinek 			if (p_index_next > p_end) {
11843247Sgjelinek 				p_index_next = p_end + 1;
11853247Sgjelinek 				ap = NULL;
11863247Sgjelinek 			}
11873247Sgjelinek 			/*
11883247Sgjelinek 			 * For cow segments, keep track of bounds not
11893247Sgjelinek 			 * backed by private amp so they can be looked
11903247Sgjelinek 			 * up in the backing vnode
11913247Sgjelinek 			 */
11923247Sgjelinek 			if (p_index_next != p_index) {
11933247Sgjelinek 
11943247Sgjelinek 				/*
11953247Sgjelinek 				 * Compute index difference between anon and
11963247Sgjelinek 				 * previous anon.
11973247Sgjelinek 				 */
11983247Sgjelinek 				p_bound_size = p_index_next - p_index - 1;
11993247Sgjelinek 
12003247Sgjelinek 				if (shared_object != NULL) {
12013247Sgjelinek 					cur = vmu_alloc_bound();
12023247Sgjelinek 					cur->vmb_next = NULL;
12033247Sgjelinek 					cur->vmb_start = s_index;
12043247Sgjelinek 					cur->vmb_end = s_index + p_bound_size;
12053247Sgjelinek 					cur->vmb_type = VMUSAGE_BOUND_UNKNOWN;
12063247Sgjelinek 					if (first == NULL) {
12073247Sgjelinek 						first = cur;
12083247Sgjelinek 						last = cur;
12093247Sgjelinek 					} else {
12103247Sgjelinek 						last->vmb_next = cur;
12113247Sgjelinek 						last = cur;
12123247Sgjelinek 					}
12133247Sgjelinek 				}
12143247Sgjelinek 				p_index = p_index + p_bound_size + 1;
12153247Sgjelinek 				s_index = s_index + p_bound_size + 1;
12163247Sgjelinek 			}
12173247Sgjelinek 
12183247Sgjelinek 			/* Detect end of anons in amp */
12193247Sgjelinek 			if (ap == NULL)
12203247Sgjelinek 				break;
12213247Sgjelinek 
12223247Sgjelinek 			cnt = ap->an_refcnt;
12233247Sgjelinek 			swap_xlate(ap, &vn, &off);
12243247Sgjelinek 
12253247Sgjelinek 			if (vn == NULL || vn->v_pages == NULL ||
12263247Sgjelinek 			    (page = page_exists(vn, off)) == NULL) {
12273247Sgjelinek 				p_index++;
12283247Sgjelinek 				s_index++;
12293247Sgjelinek 				continue;
12303247Sgjelinek 			}
12313247Sgjelinek 
12323247Sgjelinek 			/*
12333247Sgjelinek 			 * If large page is found, compute portion of large
12343247Sgjelinek 			 * page in mapping, and increment indicies to the next
12353247Sgjelinek 			 * large page.
12363247Sgjelinek 			 */
12373247Sgjelinek 			if (page->p_szc > 0) {
12383247Sgjelinek 
12393247Sgjelinek 				pgcnt = page_get_pagecnt(page->p_szc);
12403247Sgjelinek 				pgshft = page_get_shift(page->p_szc);
12413247Sgjelinek 				pgmsk = (0x1 << (pgshft - PAGESHIFT)) - 1;
12423247Sgjelinek 
12433247Sgjelinek 				/* First page in large page */
12443247Sgjelinek 				pgstart = p_index & ~pgmsk;
12453247Sgjelinek 				/* Last page in large page */
12463247Sgjelinek 				pgend = pgstart + pgcnt - 1;
12473247Sgjelinek 				/*
12483247Sgjelinek 				 * Artifically end page if page extends past
12493247Sgjelinek 				 * end of mapping.
12503247Sgjelinek 				 */
12513247Sgjelinek 				if (pgend > p_end)
12523247Sgjelinek 					pgend = p_end;
12533247Sgjelinek 
12543247Sgjelinek 				/*
12553247Sgjelinek 				 * Compute number of pages from large page
12563247Sgjelinek 				 * which are mapped.
12573247Sgjelinek 				 */
12583247Sgjelinek 				pgcnt = pgend - p_index + 1;
12593247Sgjelinek 
12603247Sgjelinek 				/*
12613247Sgjelinek 				 * Point indicies at page after large page,
12623247Sgjelinek 				 * or at page after end of mapping.
12633247Sgjelinek 				 */
12643247Sgjelinek 				p_index += pgcnt;
12653247Sgjelinek 				s_index += pgcnt;
12663247Sgjelinek 			} else {
12673247Sgjelinek 				p_index++;
12683247Sgjelinek 				s_index++;
12693247Sgjelinek 			}
12703247Sgjelinek 
12713247Sgjelinek 			/*
12723247Sgjelinek 			 * Assume anon structs with a refcnt
12733247Sgjelinek 			 * of 1 are not cow shared, so there
12743247Sgjelinek 			 * is no reason to track them per entity.
12753247Sgjelinek 			 */
12763247Sgjelinek 			if (cnt == 1) {
12773247Sgjelinek 				panon += pgcnt;
12783247Sgjelinek 				continue;
12793247Sgjelinek 			}
12803247Sgjelinek 			for (entity = vmu_entities; entity != NULL;
12813247Sgjelinek 			    entity = entity->vme_next_calc) {
12823247Sgjelinek 
12833247Sgjelinek 				result = &entity->vme_result;
12843247Sgjelinek 				/*
12853247Sgjelinek 				 * Track cow anons per entity so
12863247Sgjelinek 				 * they are not double counted.
12873247Sgjelinek 				 */
12883247Sgjelinek 				if (vmu_find_insert_anon(entity->vme_anon_hash,
12893247Sgjelinek 				    (caddr_t)ap) == 0)
12903247Sgjelinek 					continue;
12913247Sgjelinek 
12923247Sgjelinek 				result->vmu_rss_all += (pgcnt << PAGESHIFT);
12933247Sgjelinek 				result->vmu_rss_private +=
12943247Sgjelinek 				    (pgcnt << PAGESHIFT);
12953247Sgjelinek 			}
12963247Sgjelinek 		}
12973247Sgjelinek 		ANON_LOCK_EXIT(&private_amp->a_rwlock);
12983247Sgjelinek 	}
12993247Sgjelinek 
13003247Sgjelinek 	/* Add up resident anon and swap reserved for private mappings */
13013247Sgjelinek 	if (swresv > 0 || panon > 0) {
13023247Sgjelinek 		for (entity = vmu_entities; entity != NULL;
13033247Sgjelinek 		    entity = entity->vme_next_calc) {
13043247Sgjelinek 			result = &entity->vme_result;
13053247Sgjelinek 			result->vmu_swap_all += swresv;
13063247Sgjelinek 			result->vmu_swap_private += swresv;
13073247Sgjelinek 			result->vmu_rss_all += (panon << PAGESHIFT);
13083247Sgjelinek 			result->vmu_rss_private += (panon << PAGESHIFT);
13093247Sgjelinek 		}
13103247Sgjelinek 	}
13113247Sgjelinek 
13123247Sgjelinek 	/* Compute resident pages backing shared amp or named vnode */
13133247Sgjelinek 	if (shared_object != NULL) {
13143247Sgjelinek 		if (first == NULL) {
13153247Sgjelinek 			/*
13163247Sgjelinek 			 * No private amp, or private amp has no anon
13173247Sgjelinek 			 * structs.  This means entire segment is backed by
13183247Sgjelinek 			 * the shared object.
13193247Sgjelinek 			 */
13203247Sgjelinek 			first = vmu_alloc_bound();
13213247Sgjelinek 			first->vmb_next = NULL;
13223247Sgjelinek 			first->vmb_start = s_start;
13233247Sgjelinek 			first->vmb_end = s_end;
13243247Sgjelinek 			first->vmb_type = VMUSAGE_BOUND_UNKNOWN;
13253247Sgjelinek 		}
13263247Sgjelinek 		/*
13273247Sgjelinek 		 * Iterate bounds not backed by private amp, and compute
13283247Sgjelinek 		 * resident pages.
13293247Sgjelinek 		 */
13303247Sgjelinek 		cur = first;
13313247Sgjelinek 		while (cur != NULL) {
13323247Sgjelinek 
13333247Sgjelinek 			if (vmu_insert_lookup_object_bounds(shared_object,
13343247Sgjelinek 			    cur->vmb_start, cur->vmb_end, VMUSAGE_BOUND_UNKNOWN,
13353247Sgjelinek 			    &first, &last) > 0) {
13363247Sgjelinek 				/* new bounds, find incore/not-incore */
13373247Sgjelinek 				if (shared_object->vmo_type ==
13383247Sgjelinek 				    VMUSAGE_TYPE_VNODE)
13393247Sgjelinek 					vmu_vnode_update_incore_bounds(
13403247Sgjelinek 					    (vnode_t *)
13413247Sgjelinek 					    shared_object->vmo_key, &first,
13423247Sgjelinek 					    &last);
13433247Sgjelinek 				else
13443247Sgjelinek 					vmu_amp_update_incore_bounds(
13453247Sgjelinek 					    (struct anon_map *)
13463247Sgjelinek 					    shared_object->vmo_key, &first,
13473247Sgjelinek 					    &last, incore);
13483247Sgjelinek 				vmu_merge_bounds(&first, &last);
13493247Sgjelinek 			}
13503247Sgjelinek 			for (entity = vmu_entities; entity != NULL;
13513247Sgjelinek 			    entity = entity->vme_next_calc) {
13523247Sgjelinek 
13533247Sgjelinek 				result = &entity->vme_result;
13543247Sgjelinek 
13553247Sgjelinek 				entity_object = vmu_find_insert_object(
13563247Sgjelinek 				    shared_object->vmo_type ==
13573247Sgjelinek 				    VMUSAGE_TYPE_VNODE ? entity->vme_vnode_hash:
1358*7884Sgerald.jelinek@sun.com 				    entity->vme_amp_hash,
1359*7884Sgerald.jelinek@sun.com 				    shared_object->vmo_key,
1360*7884Sgerald.jelinek@sun.com 				    shared_object->vmo_type);
13613247Sgjelinek 
13623247Sgjelinek 				virt = vmu_insert_lookup_object_bounds(
13633247Sgjelinek 				    entity_object, cur->vmb_start, cur->vmb_end,
13643247Sgjelinek 				    VMUSAGE_BOUND_UNKNOWN, &e_first, &e_last);
13653247Sgjelinek 
13663247Sgjelinek 				if (virt == 0)
13673247Sgjelinek 					continue;
13683247Sgjelinek 				/*
13693247Sgjelinek 				 * Range visited for this entity
13703247Sgjelinek 				 */
13713247Sgjelinek 				rss = vmu_update_bounds(&e_first,
13723247Sgjelinek 				    &e_last, first, last);
13733247Sgjelinek 				result->vmu_rss_all += (rss << PAGESHIFT);
13743247Sgjelinek 				if (shared == B_TRUE && file == B_FALSE) {
13753247Sgjelinek 					/* shared anon mapping */
13763247Sgjelinek 					result->vmu_swap_all +=
13773247Sgjelinek 					    (virt << PAGESHIFT);
13783247Sgjelinek 					result->vmu_swap_shared +=
13793247Sgjelinek 					    (virt << PAGESHIFT);
13803247Sgjelinek 					result->vmu_rss_shared +=
13813247Sgjelinek 					    (rss << PAGESHIFT);
13823247Sgjelinek 				} else if (shared == B_TRUE && file == B_TRUE) {
13833247Sgjelinek 					/* shared file mapping */
13843247Sgjelinek 					result->vmu_rss_shared +=
13853247Sgjelinek 					    (rss << PAGESHIFT);
13863247Sgjelinek 				} else if (shared == B_FALSE &&
13873247Sgjelinek 				    file == B_TRUE) {
13883247Sgjelinek 					/* private file mapping */
13893247Sgjelinek 					result->vmu_rss_private +=
13903247Sgjelinek 					    (rss << PAGESHIFT);
13913247Sgjelinek 				}
13923247Sgjelinek 				vmu_merge_bounds(&e_first, &e_last);
13933247Sgjelinek 			}
13943247Sgjelinek 			tmp = cur;
13953247Sgjelinek 			cur = cur->vmb_next;
13963247Sgjelinek 			vmu_free_bound(tmp);
13973247Sgjelinek 		}
13983247Sgjelinek 	}
13993247Sgjelinek }
14003247Sgjelinek 
14013247Sgjelinek /*
14023247Sgjelinek  * Based on the current calculation flags, find the relevant entities
14033247Sgjelinek  * which are relative to the process.  Then calculate each segment
14043247Sgjelinek  * in the process'es address space for each relevant entity.
14053247Sgjelinek  */
14063247Sgjelinek static void
14073247Sgjelinek vmu_calculate_proc(proc_t *p)
14083247Sgjelinek {
14093247Sgjelinek 	vmu_entity_t *entities = NULL;
14103247Sgjelinek 	vmu_zone_t *zone;
14113247Sgjelinek 	vmu_entity_t *tmp;
14123247Sgjelinek 	struct as *as;
14133247Sgjelinek 	struct seg *seg;
14143247Sgjelinek 	int ret;
14153247Sgjelinek 
14163247Sgjelinek 	/* Figure out which entities are being computed */
14173247Sgjelinek 	if ((vmu_data.vmu_system) != NULL) {
14183247Sgjelinek 		tmp = vmu_data.vmu_system;
14193247Sgjelinek 		tmp->vme_next_calc = entities;
14203247Sgjelinek 		entities = tmp;
14213247Sgjelinek 	}
14223247Sgjelinek 	if (vmu_data.vmu_calc_flags &
14233247Sgjelinek 	    (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_PROJECTS |
14243247Sgjelinek 	    VMUSAGE_ALL_PROJECTS | VMUSAGE_TASKS | VMUSAGE_ALL_TASKS |
14253247Sgjelinek 	    VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_EUSERS |
14263247Sgjelinek 	    VMUSAGE_ALL_EUSERS)) {
14273247Sgjelinek 		ret = i_mod_hash_find_nosync(vmu_data.vmu_zones_hash,
14283247Sgjelinek 		    (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id,
14293247Sgjelinek 		    (mod_hash_val_t *)&zone);
14303247Sgjelinek 		if (ret != 0) {
14313247Sgjelinek 			zone = vmu_alloc_zone(p->p_zone->zone_id);
14323247Sgjelinek 			ret = i_mod_hash_insert_nosync(vmu_data.vmu_zones_hash,
14333247Sgjelinek 			    (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id,
14343247Sgjelinek 			    (mod_hash_val_t)zone, (mod_hash_hndl_t)0);
14353247Sgjelinek 			ASSERT(ret == 0);
14363247Sgjelinek 		}
14373247Sgjelinek 		if (zone->vmz_zone != NULL) {
14383247Sgjelinek 			tmp = zone->vmz_zone;
14393247Sgjelinek 			tmp->vme_next_calc = entities;
14403247Sgjelinek 			entities = tmp;
14413247Sgjelinek 		}
14423247Sgjelinek 		if (vmu_data.vmu_calc_flags &
14433247Sgjelinek 		    (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS)) {
14443247Sgjelinek 			tmp = vmu_find_insert_entity(zone->vmz_projects_hash,
14453247Sgjelinek 			    p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS,
14463247Sgjelinek 			    zone->vmz_id);
14473247Sgjelinek 			tmp->vme_next_calc = entities;
14483247Sgjelinek 			entities = tmp;
14493247Sgjelinek 		}
14503247Sgjelinek 		if (vmu_data.vmu_calc_flags &
14513247Sgjelinek 		    (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS)) {
14523247Sgjelinek 			tmp = vmu_find_insert_entity(zone->vmz_tasks_hash,
14533247Sgjelinek 			    p->p_task->tk_tkid, VMUSAGE_TASKS, zone->vmz_id);
14543247Sgjelinek 			tmp->vme_next_calc = entities;
14553247Sgjelinek 			entities = tmp;
14563247Sgjelinek 		}
14573247Sgjelinek 		if (vmu_data.vmu_calc_flags &
14583247Sgjelinek 		    (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS)) {
14593247Sgjelinek 			tmp = vmu_find_insert_entity(zone->vmz_rusers_hash,
14603247Sgjelinek 			    crgetruid(p->p_cred), VMUSAGE_RUSERS, zone->vmz_id);
14613247Sgjelinek 			tmp->vme_next_calc = entities;
14623247Sgjelinek 			entities = tmp;
14633247Sgjelinek 		}
14643247Sgjelinek 		if (vmu_data.vmu_calc_flags &
14653247Sgjelinek 		    (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS)) {
14663247Sgjelinek 			tmp = vmu_find_insert_entity(zone->vmz_eusers_hash,
14673247Sgjelinek 			    crgetuid(p->p_cred), VMUSAGE_EUSERS, zone->vmz_id);
14683247Sgjelinek 			tmp->vme_next_calc = entities;
14693247Sgjelinek 			entities = tmp;
14703247Sgjelinek 		}
14713247Sgjelinek 	}
14723247Sgjelinek 	/* Entities which collapse projects and users for all zones */
14733247Sgjelinek 	if (vmu_data.vmu_calc_flags & VMUSAGE_COL_PROJECTS) {
14743247Sgjelinek 		tmp = vmu_find_insert_entity(vmu_data.vmu_projects_col_hash,
14753247Sgjelinek 		    p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS, ALL_ZONES);
14763247Sgjelinek 		tmp->vme_next_calc = entities;
14773247Sgjelinek 		entities = tmp;
14783247Sgjelinek 	}
14793247Sgjelinek 	if (vmu_data.vmu_calc_flags & VMUSAGE_COL_RUSERS) {
14803247Sgjelinek 		tmp = vmu_find_insert_entity(vmu_data.vmu_rusers_col_hash,
14813247Sgjelinek 		    crgetruid(p->p_cred), VMUSAGE_RUSERS, ALL_ZONES);
14823247Sgjelinek 		tmp->vme_next_calc = entities;
14833247Sgjelinek 		entities = tmp;
14843247Sgjelinek 	}
14853247Sgjelinek 	if (vmu_data.vmu_calc_flags & VMUSAGE_COL_EUSERS) {
14863247Sgjelinek 		tmp = vmu_find_insert_entity(vmu_data.vmu_eusers_col_hash,
14873247Sgjelinek 		    crgetuid(p->p_cred), VMUSAGE_EUSERS, ALL_ZONES);
14883247Sgjelinek 		tmp->vme_next_calc = entities;
14893247Sgjelinek 		entities = tmp;
14903247Sgjelinek 	}
14913247Sgjelinek 
14923247Sgjelinek 	ASSERT(entities != NULL);
14933247Sgjelinek 	/* process all segs in process's address space */
14943247Sgjelinek 	as = p->p_as;
14953247Sgjelinek 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
14963247Sgjelinek 	for (seg = AS_SEGFIRST(as); seg != NULL;
14973247Sgjelinek 	    seg = AS_SEGNEXT(as, seg)) {
14983247Sgjelinek 		vmu_calculate_seg(entities, seg);
14993247Sgjelinek 	}
15003247Sgjelinek 	AS_LOCK_EXIT(as, &as->a_lock);
15013247Sgjelinek }
15023247Sgjelinek 
15033247Sgjelinek /*
15043247Sgjelinek  * Free data created by previous call to vmu_calculate().
15053247Sgjelinek  */
15063247Sgjelinek static void
15073247Sgjelinek vmu_clear_calc()
15083247Sgjelinek {
15093247Sgjelinek 	if (vmu_data.vmu_system != NULL)
15103247Sgjelinek 		vmu_free_entity(vmu_data.vmu_system);
15113247Sgjelinek 		vmu_data.vmu_system = NULL;
15123247Sgjelinek 	if (vmu_data.vmu_zones_hash != NULL)
15133247Sgjelinek 		i_mod_hash_clear_nosync(vmu_data.vmu_zones_hash);
15143247Sgjelinek 	if (vmu_data.vmu_projects_col_hash != NULL)
15153247Sgjelinek 		i_mod_hash_clear_nosync(vmu_data.vmu_projects_col_hash);
15163247Sgjelinek 	if (vmu_data.vmu_rusers_col_hash != NULL)
15173247Sgjelinek 		i_mod_hash_clear_nosync(vmu_data.vmu_rusers_col_hash);
15183247Sgjelinek 	if (vmu_data.vmu_eusers_col_hash != NULL)
15193247Sgjelinek 		i_mod_hash_clear_nosync(vmu_data.vmu_eusers_col_hash);
15203247Sgjelinek 
15213247Sgjelinek 	i_mod_hash_clear_nosync(vmu_data.vmu_all_vnodes_hash);
15223247Sgjelinek 	i_mod_hash_clear_nosync(vmu_data.vmu_all_amps_hash);
15233247Sgjelinek }
15243247Sgjelinek 
15253247Sgjelinek /*
15263247Sgjelinek  * Free unused data structures.  These can result if the system workload
15273247Sgjelinek  * decreases between calculations.
15283247Sgjelinek  */
15293247Sgjelinek static void
15303247Sgjelinek vmu_free_extra()
15313247Sgjelinek {
15323247Sgjelinek 	vmu_bound_t *tb;
15333247Sgjelinek 	vmu_object_t *to;
15343247Sgjelinek 	vmu_entity_t *te;
15353247Sgjelinek 	vmu_zone_t *tz;
15363247Sgjelinek 
15373247Sgjelinek 	while (vmu_data.vmu_free_bounds != NULL) {
15383247Sgjelinek 		tb = vmu_data.vmu_free_bounds;
15393247Sgjelinek 		vmu_data.vmu_free_bounds = vmu_data.vmu_free_bounds->vmb_next;
15403247Sgjelinek 		kmem_cache_free(vmu_bound_cache, tb);
15413247Sgjelinek 	}
15423247Sgjelinek 	while (vmu_data.vmu_free_objects != NULL) {
15433247Sgjelinek 		to = vmu_data.vmu_free_objects;
15443247Sgjelinek 		vmu_data.vmu_free_objects =
15453247Sgjelinek 		    vmu_data.vmu_free_objects->vmo_next;
15463247Sgjelinek 		kmem_cache_free(vmu_object_cache, to);
15473247Sgjelinek 	}
15483247Sgjelinek 	while (vmu_data.vmu_free_entities != NULL) {
15493247Sgjelinek 		te = vmu_data.vmu_free_entities;
15503247Sgjelinek 		vmu_data.vmu_free_entities =
15513247Sgjelinek 		    vmu_data.vmu_free_entities->vme_next;
15523247Sgjelinek 		if (te->vme_vnode_hash != NULL)
15533247Sgjelinek 			mod_hash_destroy_hash(te->vme_vnode_hash);
15543247Sgjelinek 		if (te->vme_amp_hash != NULL)
15553247Sgjelinek 			mod_hash_destroy_hash(te->vme_amp_hash);
15563247Sgjelinek 		if (te->vme_anon_hash != NULL)
15573247Sgjelinek 			mod_hash_destroy_hash(te->vme_anon_hash);
15583247Sgjelinek 		kmem_free(te, sizeof (vmu_entity_t));
15593247Sgjelinek 	}
15603247Sgjelinek 	while (vmu_data.vmu_free_zones != NULL) {
15613247Sgjelinek 		tz = vmu_data.vmu_free_zones;
15623247Sgjelinek 		vmu_data.vmu_free_zones =
15633247Sgjelinek 		    vmu_data.vmu_free_zones->vmz_next;
15643247Sgjelinek 		if (tz->vmz_projects_hash != NULL)
15653247Sgjelinek 			mod_hash_destroy_hash(tz->vmz_projects_hash);
15663247Sgjelinek 		if (tz->vmz_tasks_hash != NULL)
15673247Sgjelinek 			mod_hash_destroy_hash(tz->vmz_tasks_hash);
15683247Sgjelinek 		if (tz->vmz_rusers_hash != NULL)
15693247Sgjelinek 			mod_hash_destroy_hash(tz->vmz_rusers_hash);
15703247Sgjelinek 		if (tz->vmz_eusers_hash != NULL)
15713247Sgjelinek 			mod_hash_destroy_hash(tz->vmz_eusers_hash);
15723247Sgjelinek 		kmem_free(tz, sizeof (vmu_zone_t));
15733247Sgjelinek 	}
15743247Sgjelinek }
15753247Sgjelinek 
15763247Sgjelinek extern kcondvar_t *pr_pid_cv;
15773247Sgjelinek 
15783247Sgjelinek /*
15793247Sgjelinek  * Determine which entity types are relevant and allocate the hashes to
15803247Sgjelinek  * track them.  Then walk the process table and count rss and swap
15813247Sgjelinek  * for each process'es address space.  Address space object such as
15823247Sgjelinek  * vnodes, amps and anons are tracked per entity, so that they are
15833247Sgjelinek  * not double counted in the results.
15843247Sgjelinek  *
15853247Sgjelinek  */
15863247Sgjelinek static void
15873247Sgjelinek vmu_calculate()
15883247Sgjelinek {
15893247Sgjelinek 	int i = 0;
15903247Sgjelinek 	int ret;
15913247Sgjelinek 	proc_t *p;
15923247Sgjelinek 
15933247Sgjelinek 	vmu_clear_calc();
15943247Sgjelinek 
15953247Sgjelinek 	if (vmu_data.vmu_calc_flags & VMUSAGE_SYSTEM)
15963247Sgjelinek 		vmu_data.vmu_system = vmu_alloc_entity(0, VMUSAGE_SYSTEM,
15973247Sgjelinek 		    ALL_ZONES);
15983247Sgjelinek 
15993247Sgjelinek 	/*
16003247Sgjelinek 	 * Walk process table and calculate rss of each proc.
16013247Sgjelinek 	 *
16023247Sgjelinek 	 * Pidlock and p_lock cannot be held while doing the rss calculation.
16033247Sgjelinek 	 * This is because:
16043247Sgjelinek 	 *	1.  The calculation allocates using KM_SLEEP.
16053247Sgjelinek 	 *	2.  The calculation grabs a_lock, which cannot be grabbed
16063247Sgjelinek 	 *	    after p_lock.
16073247Sgjelinek 	 *
16083247Sgjelinek 	 * Since pidlock must be dropped, we cannot simply just walk the
16093247Sgjelinek 	 * practive list.  Instead, we walk the process table, and sprlock
16103247Sgjelinek 	 * each process to ensure that it does not exit during the
16113247Sgjelinek 	 * calculation.
16123247Sgjelinek 	 */
16133247Sgjelinek 
16143247Sgjelinek 	mutex_enter(&pidlock);
16153247Sgjelinek 	for (i = 0; i < v.v_proc; i++) {
16163247Sgjelinek again:
16173247Sgjelinek 		p = pid_entry(i);
16183247Sgjelinek 		if (p == NULL)
16193247Sgjelinek 			continue;
16203247Sgjelinek 
16213247Sgjelinek 		mutex_enter(&p->p_lock);
16223247Sgjelinek 		mutex_exit(&pidlock);
16233247Sgjelinek 
16243247Sgjelinek 		if (panicstr) {
16253247Sgjelinek 			mutex_exit(&p->p_lock);
16263247Sgjelinek 			return;
16273247Sgjelinek 		}
16283247Sgjelinek 
16293247Sgjelinek 		/* Try to set P_PR_LOCK */
16303247Sgjelinek 		ret = sprtrylock_proc(p);
16313247Sgjelinek 		if (ret == -1) {
16323247Sgjelinek 			/* Process in invalid state */
16333247Sgjelinek 			mutex_exit(&p->p_lock);
16343247Sgjelinek 			mutex_enter(&pidlock);
16353247Sgjelinek 			continue;
16363247Sgjelinek 		} else if (ret == 1) {
16373247Sgjelinek 			/*
16383247Sgjelinek 			 * P_PR_LOCK is already set.  Wait and try again.
16393247Sgjelinek 			 * This also drops p_lock.
16403247Sgjelinek 			 */
16413247Sgjelinek 			sprwaitlock_proc(p);
16423247Sgjelinek 			mutex_enter(&pidlock);
16433247Sgjelinek 			goto again;
16443247Sgjelinek 		}
16453247Sgjelinek 		mutex_exit(&p->p_lock);
16463247Sgjelinek 
16473247Sgjelinek 		vmu_calculate_proc(p);
16483247Sgjelinek 
16493247Sgjelinek 		mutex_enter(&p->p_lock);
16503247Sgjelinek 		sprunlock(p);
16513247Sgjelinek 		mutex_enter(&pidlock);
16523247Sgjelinek 	}
16533247Sgjelinek 	mutex_exit(&pidlock);
16543247Sgjelinek 
16553247Sgjelinek 	vmu_free_extra();
16563247Sgjelinek }
16573247Sgjelinek 
16583247Sgjelinek /*
16593247Sgjelinek  * allocate a new cache for N results satisfying flags
16603247Sgjelinek  */
16613247Sgjelinek vmu_cache_t *
16623247Sgjelinek vmu_cache_alloc(size_t nres, uint_t flags)
16633247Sgjelinek {
16643247Sgjelinek 	vmu_cache_t *cache;
16653247Sgjelinek 
16663247Sgjelinek 	cache = kmem_zalloc(sizeof (vmu_cache_t), KM_SLEEP);
16673247Sgjelinek 	cache->vmc_results = kmem_zalloc(sizeof (vmusage_t) * nres, KM_SLEEP);
16683247Sgjelinek 	cache->vmc_nresults = nres;
16693247Sgjelinek 	cache->vmc_flags = flags;
16703247Sgjelinek 	cache->vmc_refcnt = 1;
16713247Sgjelinek 	return (cache);
16723247Sgjelinek }
16733247Sgjelinek 
16743247Sgjelinek /*
16753247Sgjelinek  * Make sure cached results are not freed
16763247Sgjelinek  */
16773247Sgjelinek static void
16783247Sgjelinek vmu_cache_hold(vmu_cache_t *cache)
16793247Sgjelinek {
16803247Sgjelinek 	ASSERT(MUTEX_HELD(&vmu_data.vmu_lock));
16813247Sgjelinek 	cache->vmc_refcnt++;
16823247Sgjelinek }
16833247Sgjelinek 
16843247Sgjelinek /*
16853247Sgjelinek  * free cache data
16863247Sgjelinek  */
16873247Sgjelinek static void
16883247Sgjelinek vmu_cache_rele(vmu_cache_t *cache)
16893247Sgjelinek {
16903247Sgjelinek 	ASSERT(MUTEX_HELD(&vmu_data.vmu_lock));
16913247Sgjelinek 	ASSERT(cache->vmc_refcnt > 0);
16923247Sgjelinek 	cache->vmc_refcnt--;
16933247Sgjelinek 	if (cache->vmc_refcnt == 0) {
16943247Sgjelinek 		kmem_free(cache->vmc_results, sizeof (vmusage_t) *
1695*7884Sgerald.jelinek@sun.com 		    cache->vmc_nresults);
16963247Sgjelinek 		kmem_free(cache, sizeof (vmu_cache_t));
16973247Sgjelinek 	}
16983247Sgjelinek }
16993247Sgjelinek 
17003247Sgjelinek /*
17013247Sgjelinek  * Copy out the cached results to a caller.  Inspect the callers flags
17023247Sgjelinek  * and zone to determine which cached results should be copied.
17033247Sgjelinek  */
17043247Sgjelinek static int
17053247Sgjelinek vmu_copyout_results(vmu_cache_t *cache, vmusage_t *buf, size_t *nres,
1706*7884Sgerald.jelinek@sun.com     uint_t flags, int cpflg)
17073247Sgjelinek {
17083247Sgjelinek 	vmusage_t *result, *out_result;
17093247Sgjelinek 	vmusage_t dummy;
17103247Sgjelinek 	size_t i, count = 0;
17113247Sgjelinek 	size_t bufsize;
17123247Sgjelinek 	int ret = 0;
17133247Sgjelinek 	uint_t types = 0;
17143247Sgjelinek 
17153247Sgjelinek 	if (nres != NULL) {
1716*7884Sgerald.jelinek@sun.com 		if (ddi_copyin((caddr_t)nres, &bufsize, sizeof (size_t), cpflg))
17173247Sgjelinek 			return (set_errno(EFAULT));
17183247Sgjelinek 	} else {
17193247Sgjelinek 		bufsize = 0;
17203247Sgjelinek 	}
17213247Sgjelinek 
17223247Sgjelinek 	/* figure out what results the caller is interested in. */
17233247Sgjelinek 	if ((flags & VMUSAGE_SYSTEM) && curproc->p_zone == global_zone)
17243247Sgjelinek 		types |= VMUSAGE_SYSTEM;
17253247Sgjelinek 	if (flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES))
17263247Sgjelinek 		types |= VMUSAGE_ZONE;
17273247Sgjelinek 	if (flags & (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS |
17283247Sgjelinek 	    VMUSAGE_COL_PROJECTS))
17293247Sgjelinek 		types |= VMUSAGE_PROJECTS;
17303247Sgjelinek 	if (flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS))
17313247Sgjelinek 		types |= VMUSAGE_TASKS;
17323247Sgjelinek 	if (flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS))
17333247Sgjelinek 		types |= VMUSAGE_RUSERS;
17343247Sgjelinek 	if (flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS))
17353247Sgjelinek 		types |= VMUSAGE_EUSERS;
17363247Sgjelinek 
17373247Sgjelinek 	/* count results for current zone */
17383247Sgjelinek 	out_result = buf;
17393247Sgjelinek 	for (result = cache->vmc_results, i = 0;
17403247Sgjelinek 	    i < cache->vmc_nresults; result++, i++) {
17413247Sgjelinek 
17423247Sgjelinek 		/* Do not return "other-zone" results to non-global zones */
17433247Sgjelinek 		if (curproc->p_zone != global_zone &&
17443247Sgjelinek 		    curproc->p_zone->zone_id != result->vmu_zoneid)
17453247Sgjelinek 			continue;
17463247Sgjelinek 
17473247Sgjelinek 		/*
17483247Sgjelinek 		 * If non-global zone requests VMUSAGE_SYSTEM, fake
17493247Sgjelinek 		 * up VMUSAGE_ZONE result as VMUSAGE_SYSTEM result.
17503247Sgjelinek 		 */
17513247Sgjelinek 		if (curproc->p_zone != global_zone &&
17523247Sgjelinek 		    (flags & VMUSAGE_SYSTEM) != 0 &&
17533247Sgjelinek 		    result->vmu_type == VMUSAGE_ZONE) {
17543247Sgjelinek 			count++;
17553247Sgjelinek 			if (out_result != NULL) {
17563247Sgjelinek 				if (bufsize < count) {
17573247Sgjelinek 					ret = set_errno(EOVERFLOW);
17583247Sgjelinek 				} else {
17593247Sgjelinek 					dummy = *result;
17603247Sgjelinek 					dummy.vmu_zoneid = ALL_ZONES;
17613247Sgjelinek 					dummy.vmu_id = 0;
17623247Sgjelinek 					dummy.vmu_type = VMUSAGE_SYSTEM;
1763*7884Sgerald.jelinek@sun.com 					if (ddi_copyout(&dummy, out_result,
1764*7884Sgerald.jelinek@sun.com 					    sizeof (vmusage_t), cpflg))
1765*7884Sgerald.jelinek@sun.com 						return (set_errno(EFAULT));
17663247Sgjelinek 					out_result++;
17673247Sgjelinek 				}
17683247Sgjelinek 			}
17693247Sgjelinek 		}
17703247Sgjelinek 
17713247Sgjelinek 		/* Skip results that do not match requested type */
17723247Sgjelinek 		if ((result->vmu_type & types) == 0)
17733247Sgjelinek 			continue;
17743247Sgjelinek 
17753247Sgjelinek 		/* Skip collated results if not requested */
17763247Sgjelinek 		if (result->vmu_zoneid == ALL_ZONES) {
17773247Sgjelinek 			if (result->vmu_type == VMUSAGE_PROJECTS &&
17783247Sgjelinek 			    (flags & VMUSAGE_COL_PROJECTS) == 0)
17793247Sgjelinek 				continue;
17803247Sgjelinek 			if (result->vmu_type == VMUSAGE_EUSERS &&
17813247Sgjelinek 			    (flags & VMUSAGE_COL_EUSERS) == 0)
17823247Sgjelinek 				continue;
17833247Sgjelinek 			if (result->vmu_type == VMUSAGE_RUSERS &&
17843247Sgjelinek 			    (flags & VMUSAGE_COL_RUSERS) == 0)
17853247Sgjelinek 				continue;
17863247Sgjelinek 		}
17873247Sgjelinek 
17883247Sgjelinek 		/* Skip "other zone" results if not requested */
17893247Sgjelinek 		if (result->vmu_zoneid != curproc->p_zone->zone_id) {
17903247Sgjelinek 			if (result->vmu_type == VMUSAGE_ZONE &&
17913247Sgjelinek 			    (flags & VMUSAGE_ALL_ZONES) == 0)
17923247Sgjelinek 				continue;
17933247Sgjelinek 			if (result->vmu_type == VMUSAGE_PROJECTS &&
17943247Sgjelinek 			    (flags & (VMUSAGE_ALL_PROJECTS |
17953247Sgjelinek 			    VMUSAGE_COL_PROJECTS)) == 0)
17963247Sgjelinek 				continue;
17973247Sgjelinek 			if (result->vmu_type == VMUSAGE_TASKS &&
17983247Sgjelinek 			    (flags & VMUSAGE_ALL_TASKS) == 0)
17993247Sgjelinek 				continue;
18003247Sgjelinek 			if (result->vmu_type == VMUSAGE_RUSERS &&
18013247Sgjelinek 			    (flags & (VMUSAGE_ALL_RUSERS |
18023247Sgjelinek 			    VMUSAGE_COL_RUSERS)) == 0)
18033247Sgjelinek 				continue;
18043247Sgjelinek 			if (result->vmu_type == VMUSAGE_EUSERS &&
18053247Sgjelinek 			    (flags & (VMUSAGE_ALL_EUSERS |
18063247Sgjelinek 			    VMUSAGE_COL_EUSERS)) == 0)
18073247Sgjelinek 				continue;
18083247Sgjelinek 		}
18093247Sgjelinek 		count++;
18103247Sgjelinek 		if (out_result != NULL) {
18113247Sgjelinek 			if (bufsize < count) {
18123247Sgjelinek 				ret = set_errno(EOVERFLOW);
18133247Sgjelinek 			} else {
1814*7884Sgerald.jelinek@sun.com 				if (ddi_copyout(result, out_result,
1815*7884Sgerald.jelinek@sun.com 				    sizeof (vmusage_t), cpflg))
18163247Sgjelinek 					return (set_errno(EFAULT));
18173247Sgjelinek 				out_result++;
18183247Sgjelinek 			}
18193247Sgjelinek 		}
18203247Sgjelinek 	}
18213247Sgjelinek 	if (nres != NULL)
1822*7884Sgerald.jelinek@sun.com 		if (ddi_copyout(&count, (void *)nres, sizeof (size_t), cpflg))
18233247Sgjelinek 			return (set_errno(EFAULT));
18243247Sgjelinek 
18253247Sgjelinek 	return (ret);
18263247Sgjelinek }
18273247Sgjelinek 
18283247Sgjelinek /*
18293247Sgjelinek  * vm_getusage()
18303247Sgjelinek  *
18313247Sgjelinek  * Counts rss and swap by zone, project, task, and/or user.  The flags argument
18323247Sgjelinek  * determines the type of results structures returned.  Flags requesting
18333247Sgjelinek  * results from more than one zone are "flattened" to the local zone if the
18343247Sgjelinek  * caller is not the global zone.
18353247Sgjelinek  *
18363247Sgjelinek  * args:
18373247Sgjelinek  *	flags:	bitmap consisting of one or more of VMUSAGE_*.
18383247Sgjelinek  *	age:	maximum allowable age (time since counting was done) in
18393247Sgjelinek  *		seconds of the results.  Results from previous callers are
18403247Sgjelinek  *		cached in kernel.
18413247Sgjelinek  *	buf:	pointer to buffer array of vmusage_t.  If NULL, then only nres
18423247Sgjelinek  *		set on success.
18433247Sgjelinek  *	nres:	Set to number of vmusage_t structures pointed to by buf
18443247Sgjelinek  *		before calling vm_getusage().
18453247Sgjelinek  *		On return 0 (success) or ENOSPC, is set to the number of result
18463247Sgjelinek  *		structures returned or attempted to return.
18473247Sgjelinek  *
18483247Sgjelinek  * returns 0 on success, -1 on failure:
18493247Sgjelinek  *	EINTR (interrupted)
18503247Sgjelinek  *	ENOSPC (nres to small for results, nres set to needed value for success)
18513247Sgjelinek  *	EINVAL (flags invalid)
18523247Sgjelinek  *	EFAULT (bad address for buf or nres)
18533247Sgjelinek  */
18543247Sgjelinek int
1855*7884Sgerald.jelinek@sun.com vm_getusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres, int cpflg)
18563247Sgjelinek {
18573247Sgjelinek 	vmu_entity_t *entity;
18583247Sgjelinek 	vmusage_t *result;
18593247Sgjelinek 	int ret = 0;
18603247Sgjelinek 	int cacherecent = 0;
18613247Sgjelinek 	hrtime_t now;
18623247Sgjelinek 	uint_t flags_orig;
18633247Sgjelinek 
18643247Sgjelinek 	/*
18653247Sgjelinek 	 * Non-global zones cannot request system wide and/or collated
18663247Sgjelinek 	 * results, or the system result, so munge the flags accordingly.
18673247Sgjelinek 	 */
18683247Sgjelinek 	flags_orig = flags;
18693247Sgjelinek 	if (curproc->p_zone != global_zone) {
18703247Sgjelinek 		if (flags & (VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS)) {
18713247Sgjelinek 			flags &= ~(VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS);
18723247Sgjelinek 			flags |= VMUSAGE_PROJECTS;
18733247Sgjelinek 		}
18743247Sgjelinek 		if (flags & (VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS)) {
18753247Sgjelinek 			flags &= ~(VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS);
18763247Sgjelinek 			flags |= VMUSAGE_RUSERS;
18773247Sgjelinek 		}
18783247Sgjelinek 		if (flags & (VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS)) {
18793247Sgjelinek 			flags &= ~(VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS);
18803247Sgjelinek 			flags |= VMUSAGE_EUSERS;
18813247Sgjelinek 		}
18823247Sgjelinek 		if (flags & VMUSAGE_SYSTEM) {
18833247Sgjelinek 			flags &= ~VMUSAGE_SYSTEM;
18843247Sgjelinek 			flags |= VMUSAGE_ZONE;
18853247Sgjelinek 		}
18863247Sgjelinek 	}
18873247Sgjelinek 
18883247Sgjelinek 	/* Check for unknown flags */
18893247Sgjelinek 	if ((flags & (~VMUSAGE_MASK)) != 0)
18903247Sgjelinek 		return (set_errno(EINVAL));
18913247Sgjelinek 
18923247Sgjelinek 	/* Check for no flags */
18933247Sgjelinek 	if ((flags & VMUSAGE_MASK) == 0)
18943247Sgjelinek 		return (set_errno(EINVAL));
18953247Sgjelinek 
18963247Sgjelinek 	mutex_enter(&vmu_data.vmu_lock);
18973247Sgjelinek 	now = gethrtime();
18983247Sgjelinek 
18993247Sgjelinek start:
19003247Sgjelinek 	if (vmu_data.vmu_cache != NULL) {
19013247Sgjelinek 
19023247Sgjelinek 		vmu_cache_t *cache;
19033247Sgjelinek 
19043247Sgjelinek 		if ((vmu_data.vmu_cache->vmc_timestamp +
19053247Sgjelinek 		    ((hrtime_t)age * NANOSEC)) > now)
19063247Sgjelinek 			cacherecent = 1;
19073247Sgjelinek 
19083247Sgjelinek 		if ((vmu_data.vmu_cache->vmc_flags & flags) == flags &&
19093247Sgjelinek 		    cacherecent == 1) {
19103247Sgjelinek 			cache = vmu_data.vmu_cache;
19113247Sgjelinek 			vmu_cache_hold(cache);
19123247Sgjelinek 			mutex_exit(&vmu_data.vmu_lock);
19133247Sgjelinek 
1914*7884Sgerald.jelinek@sun.com 			ret = vmu_copyout_results(cache, buf, nres, flags_orig,
1915*7884Sgerald.jelinek@sun.com 			    cpflg);
19163247Sgjelinek 			mutex_enter(&vmu_data.vmu_lock);
19173247Sgjelinek 			vmu_cache_rele(cache);
19183247Sgjelinek 			if (vmu_data.vmu_pending_waiters > 0)
19193247Sgjelinek 				cv_broadcast(&vmu_data.vmu_cv);
19203247Sgjelinek 			mutex_exit(&vmu_data.vmu_lock);
19213247Sgjelinek 			return (ret);
19223247Sgjelinek 		}
19233247Sgjelinek 		/*
19243247Sgjelinek 		 * If the cache is recent, it is likely that there are other
19253247Sgjelinek 		 * consumers of vm_getusage running, so add their flags to the
19263247Sgjelinek 		 * desired flags for the calculation.
19273247Sgjelinek 		 */
19283247Sgjelinek 		if (cacherecent == 1)
19293247Sgjelinek 			flags = vmu_data.vmu_cache->vmc_flags | flags;
19303247Sgjelinek 	}
19313247Sgjelinek 	if (vmu_data.vmu_calc_thread == NULL) {
19323247Sgjelinek 
19333247Sgjelinek 		vmu_cache_t *cache;
19343247Sgjelinek 
19353247Sgjelinek 		vmu_data.vmu_calc_thread = curthread;
19363247Sgjelinek 		vmu_data.vmu_calc_flags = flags;
19373247Sgjelinek 		vmu_data.vmu_entities = NULL;
19383247Sgjelinek 		vmu_data.vmu_nentities = 0;
19393247Sgjelinek 		if (vmu_data.vmu_pending_waiters > 0)
19403247Sgjelinek 			vmu_data.vmu_calc_flags |=
19413247Sgjelinek 			    vmu_data.vmu_pending_flags;
19423247Sgjelinek 
19433247Sgjelinek 		vmu_data.vmu_pending_flags = 0;
19443247Sgjelinek 		mutex_exit(&vmu_data.vmu_lock);
19453247Sgjelinek 		vmu_calculate();
19463247Sgjelinek 		mutex_enter(&vmu_data.vmu_lock);
19473247Sgjelinek 		/* copy results to cache */
19483247Sgjelinek 		if (vmu_data.vmu_cache != NULL)
19493247Sgjelinek 			vmu_cache_rele(vmu_data.vmu_cache);
19503247Sgjelinek 		cache = vmu_data.vmu_cache =
19513247Sgjelinek 		    vmu_cache_alloc(vmu_data.vmu_nentities,
1952*7884Sgerald.jelinek@sun.com 		    vmu_data.vmu_calc_flags);
19533247Sgjelinek 
19543247Sgjelinek 		result = cache->vmc_results;
19553247Sgjelinek 		for (entity = vmu_data.vmu_entities; entity != NULL;
19563247Sgjelinek 		    entity = entity->vme_next) {
19573247Sgjelinek 			*result = entity->vme_result;
19583247Sgjelinek 			result++;
19593247Sgjelinek 		}
19603247Sgjelinek 		cache->vmc_timestamp = gethrtime();
19613247Sgjelinek 		vmu_cache_hold(cache);
19623247Sgjelinek 
19633247Sgjelinek 		vmu_data.vmu_calc_flags = 0;
19643247Sgjelinek 		vmu_data.vmu_calc_thread = NULL;
19653247Sgjelinek 
19663247Sgjelinek 		if (vmu_data.vmu_pending_waiters > 0)
19673247Sgjelinek 			cv_broadcast(&vmu_data.vmu_cv);
19683247Sgjelinek 
19693247Sgjelinek 		mutex_exit(&vmu_data.vmu_lock);
19703247Sgjelinek 
19713247Sgjelinek 		/* copy cache */
1972*7884Sgerald.jelinek@sun.com 		ret = vmu_copyout_results(cache, buf, nres, flags_orig, cpflg);
19733247Sgjelinek 		mutex_enter(&vmu_data.vmu_lock);
19743247Sgjelinek 		vmu_cache_rele(cache);
19753247Sgjelinek 		mutex_exit(&vmu_data.vmu_lock);
19763247Sgjelinek 
19773247Sgjelinek 		return (ret);
19783247Sgjelinek 	}
19793247Sgjelinek 	vmu_data.vmu_pending_flags |= flags;
19803247Sgjelinek 	vmu_data.vmu_pending_waiters++;
19813247Sgjelinek 	while (vmu_data.vmu_calc_thread != NULL) {
19823247Sgjelinek 		if (cv_wait_sig(&vmu_data.vmu_cv,
19833247Sgjelinek 		    &vmu_data.vmu_lock) == 0) {
19843247Sgjelinek 			vmu_data.vmu_pending_waiters--;
19853247Sgjelinek 			mutex_exit(&vmu_data.vmu_lock);
19863247Sgjelinek 			return (set_errno(EINTR));
19873247Sgjelinek 		}
19883247Sgjelinek 	}
19893247Sgjelinek 	vmu_data.vmu_pending_waiters--;
19903247Sgjelinek 	goto start;
19913247Sgjelinek }
1992