xref: /onnv-gate/usr/src/uts/common/vm/vm_usage.c (revision 3247)
1*3247Sgjelinek /*
2*3247Sgjelinek  * CDDL HEADER START
3*3247Sgjelinek  *
4*3247Sgjelinek  * The contents of this file are subject to the terms of the
5*3247Sgjelinek  * Common Development and Distribution License (the "License").
6*3247Sgjelinek  * You may not use this file except in compliance with the License.
7*3247Sgjelinek  *
8*3247Sgjelinek  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*3247Sgjelinek  * or http://www.opensolaris.org/os/licensing.
10*3247Sgjelinek  * See the License for the specific language governing permissions
11*3247Sgjelinek  * and limitations under the License.
12*3247Sgjelinek  *
13*3247Sgjelinek  * When distributing Covered Code, include this CDDL HEADER in each
14*3247Sgjelinek  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*3247Sgjelinek  * If applicable, add the following below this CDDL HEADER, with the
16*3247Sgjelinek  * fields enclosed by brackets "[]" replaced with your own identifying
17*3247Sgjelinek  * information: Portions Copyright [yyyy] [name of copyright owner]
18*3247Sgjelinek  *
19*3247Sgjelinek  * CDDL HEADER END
20*3247Sgjelinek  */
21*3247Sgjelinek 
22*3247Sgjelinek /*
23*3247Sgjelinek  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24*3247Sgjelinek  * Use is subject to license terms.
25*3247Sgjelinek  */
26*3247Sgjelinek 
27*3247Sgjelinek #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*3247Sgjelinek 
29*3247Sgjelinek /*
30*3247Sgjelinek  * vm_usage
31*3247Sgjelinek  *
32*3247Sgjelinek  * This file implements the getvmusage() private system call.
33*3247Sgjelinek  * getvmusage() counts the amount of resident memory pages and swap
34*3247Sgjelinek  * reserved by the specified process collective. A "process collective" is
35*3247Sgjelinek  * the set of processes owned by a particular, zone, project, task, or user.
36*3247Sgjelinek  *
37*3247Sgjelinek  * rss and swap are counted so that for a given process collective, a page is
38*3247Sgjelinek  * only counted once.  For example, this means that if multiple processes in
39*3247Sgjelinek  * the same project map the same page, then the project will only be charged
40*3247Sgjelinek  * once for that page.  On the other hand, if two processes in different
41*3247Sgjelinek  * projects map the same page, then both projects will be charged
42*3247Sgjelinek  * for the page.
43*3247Sgjelinek  *
44*3247Sgjelinek  * The vm_getusage() calculation is implemented so that the first thread
45*3247Sgjelinek  * performs the rss/swap counting. Other callers will wait for that thread to
46*3247Sgjelinek  * finish, copying the results.  This enables multiple rcapds and prstats to
47*3247Sgjelinek  * consume data from the same calculation.  The results are also cached so that
48*3247Sgjelinek  * a caller interested in recent results can just copy them instead of starting
49*3247Sgjelinek  * a new calculation. The caller passes the maximium age (in seconds) of the
50*3247Sgjelinek  * data.  If the cached data is young enough, the cache is copied, otherwise,
51*3247Sgjelinek  * a new calculation is executed and the cache is replaced with the new
52*3247Sgjelinek  * data.
53*3247Sgjelinek  *
54*3247Sgjelinek  * The rss calculation for each process collective is as follows:
55*3247Sgjelinek  *
56*3247Sgjelinek  *   - Inspect flags, determine if counting rss for zones, projects, tasks,
57*3247Sgjelinek  *     and/or users.
58*3247Sgjelinek  *   - For each proc:
59*3247Sgjelinek  *	- Figure out proc's collectives (zone, project, task, and/or user).
60*3247Sgjelinek  *	- For each seg in proc's address space:
61*3247Sgjelinek  *		- If seg is private:
62*3247Sgjelinek  *			- Lookup anons in the amp.
63*3247Sgjelinek  *			- For incore pages not previously visited each of the
64*3247Sgjelinek  *			  proc's collectives, add incore pagesize to each.
65*3247Sgjelinek  *			  collective.
66*3247Sgjelinek  *			  Anon's with a refcnt of 1 can be assummed to be not
67*3247Sgjelinek  *			  previously visited.
68*3247Sgjelinek  *			- For address ranges without anons in the amp:
69*3247Sgjelinek  *				- Lookup pages in underlying vnode.
70*3247Sgjelinek  *				- For incore pages not previously visiting for
71*3247Sgjelinek  *				  each of the proc's collectives, add incore
72*3247Sgjelinek  *				  pagesize to each collective.
73*3247Sgjelinek  *		- If seg is shared:
74*3247Sgjelinek  *			- Lookup pages in the shared amp or vnode.
75*3247Sgjelinek  *			- For incore pages not previously visited for each of
76*3247Sgjelinek  *			  the proc's collectives, add incore pagesize to each
77*3247Sgjelinek  *			  collective.
78*3247Sgjelinek  *
79*3247Sgjelinek  * Swap is reserved by private segments, and shared anonymous segments.
80*3247Sgjelinek  * The only shared anon segments which do not reserve swap are ISM segments
81*3247Sgjelinek  * and schedctl segments, both of which can be identified by having
82*3247Sgjelinek  * amp->swresv == 0.
83*3247Sgjelinek  *
84*3247Sgjelinek  * The swap calculation for each collective is as follows:
85*3247Sgjelinek  *
86*3247Sgjelinek  *   - Inspect flags, determine if counting rss for zones, projects, tasks,
87*3247Sgjelinek  *     and/or users.
88*3247Sgjelinek  *   - For each proc:
89*3247Sgjelinek  *	- Figure out proc's collectives (zone, project, task, and/or user).
90*3247Sgjelinek  *	- For each seg in proc's address space:
91*3247Sgjelinek  *		- If seg is private:
92*3247Sgjelinek  *			- Add svd->swresv pages to swap count for each of the
93*3247Sgjelinek  *			  proc's collectives.
94*3247Sgjelinek  *		- If seg is anon, shared, and amp->swresv != 0
95*3247Sgjelinek  *			- For address ranges in amp not previously visited for
96*3247Sgjelinek  *			  each of the proc's collectives, add size of address
97*3247Sgjelinek  *			  range to the swap count for each collective.
98*3247Sgjelinek  *
99*3247Sgjelinek  * These two calculations are done simultaneously, with most of the work
100*3247Sgjelinek  * being done in vmu_calculate_seg().  The results of the calculation are
101*3247Sgjelinek  * copied into "vmu_data.vmu_cache_results".
102*3247Sgjelinek  *
103*3247Sgjelinek  * To perform the calculation, various things are tracked and cached:
104*3247Sgjelinek  *
105*3247Sgjelinek  *    - incore/not-incore page ranges for all vnodes.
106*3247Sgjelinek  *	(vmu_data.vmu_all_vnodes_hash)
107*3247Sgjelinek  *	This eliminates looking up the same page more than once.
108*3247Sgjelinek  *
109*3247Sgjelinek  *    - incore/not-incore page ranges for all shared amps.
110*3247Sgjelinek  *	(vmu_data.vmu_all_amps_hash)
111*3247Sgjelinek  *	This eliminates looking up the same page more than once.
112*3247Sgjelinek  *
113*3247Sgjelinek  *    - visited page ranges for each collective.
114*3247Sgjelinek  *	   - per vnode (entity->vme_vnode_hash)
115*3247Sgjelinek  *	   - per shared amp (entity->vme_amp_hash)
116*3247Sgjelinek  *	For accurate counting of map-shared and cow-shared pages.
117*3247Sgjelinek  *
118*3247Sgjelinek  *    - visited private anons (refcnt > 1) for each collective.
119*3247Sgjelinek  *	(entity->vme_anon_hash)
120*3247Sgjelinek  *	For accurate counting of cow-shared pages.
121*3247Sgjelinek  *
122*3247Sgjelinek  * The common accounting structure is the vmu_entity_t, which represents
123*3247Sgjelinek  * collectives:
124*3247Sgjelinek  *
125*3247Sgjelinek  *    - A zone.
126*3247Sgjelinek  *    - A project, task, or user within a zone.
127*3247Sgjelinek  *    - The entire system (vmu_data.vmu_system).
128*3247Sgjelinek  *    - Each collapsed (col) project and user.  This means a given projid or
129*3247Sgjelinek  *	uid, regardless of which zone the process is in.  For instance,
130*3247Sgjelinek  *      project 0 in the global zone and project 0 in a non global zone are
131*3247Sgjelinek  *	the same collapsed project.
132*3247Sgjelinek  *
133*3247Sgjelinek  *  Each entity structure tracks which pages have been already visited for
134*3247Sgjelinek  *  that entity (via previously inspected processes) so that these pages are
135*3247Sgjelinek  *  not double counted.
136*3247Sgjelinek  */
137*3247Sgjelinek 
138*3247Sgjelinek #include <sys/errno.h>
139*3247Sgjelinek #include <sys/types.h>
140*3247Sgjelinek #include <sys/zone.h>
141*3247Sgjelinek #include <sys/proc.h>
142*3247Sgjelinek #include <sys/project.h>
143*3247Sgjelinek #include <sys/task.h>
144*3247Sgjelinek #include <sys/thread.h>
145*3247Sgjelinek #include <sys/time.h>
146*3247Sgjelinek #include <sys/mman.h>
147*3247Sgjelinek #include <sys/modhash.h>
148*3247Sgjelinek #include <sys/modhash_impl.h>
149*3247Sgjelinek #include <sys/shm.h>
150*3247Sgjelinek #include <sys/swap.h>
151*3247Sgjelinek #include <sys/synch.h>
152*3247Sgjelinek #include <sys/systm.h>
153*3247Sgjelinek #include <sys/var.h>
154*3247Sgjelinek #include <sys/vm_usage.h>
155*3247Sgjelinek #include <sys/zone.h>
156*3247Sgjelinek #include <vm/anon.h>
157*3247Sgjelinek #include <vm/as.h>
158*3247Sgjelinek #include <vm/seg_vn.h>
159*3247Sgjelinek #include <vm/seg_spt.h>
160*3247Sgjelinek 
161*3247Sgjelinek #define	VMUSAGE_HASH_SIZE		512
162*3247Sgjelinek 
163*3247Sgjelinek #define	VMUSAGE_TYPE_VNODE		1
164*3247Sgjelinek #define	VMUSAGE_TYPE_AMP		2
165*3247Sgjelinek #define	VMUSAGE_TYPE_ANON		3
166*3247Sgjelinek 
167*3247Sgjelinek #define	VMUSAGE_BOUND_UNKNOWN		0
168*3247Sgjelinek #define	VMUSAGE_BOUND_INCORE		1
169*3247Sgjelinek #define	VMUSAGE_BOUND_NOT_INCORE	2
170*3247Sgjelinek 
171*3247Sgjelinek /*
172*3247Sgjelinek  * bounds for vnodes and shared amps
173*3247Sgjelinek  * Each bound is either entirely incore, entirely not in core, or
174*3247Sgjelinek  * entirely unknown.  bounds are stored in order by offset.
175*3247Sgjelinek  */
176*3247Sgjelinek typedef struct vmu_bound {
177*3247Sgjelinek 	struct  vmu_bound *vmb_next;
178*3247Sgjelinek 	pgcnt_t vmb_start;  /* page offset in vnode/amp on which bound starts */
179*3247Sgjelinek 	pgcnt_t	vmb_end;    /* page offset in vnode/amp on which bound ends */
180*3247Sgjelinek 	char	vmb_type;   /* One of VMUSAGE_BOUND_* */
181*3247Sgjelinek } vmu_bound_t;
182*3247Sgjelinek 
183*3247Sgjelinek /*
184*3247Sgjelinek  * hash of visited objects (vnodes or shared amps)
185*3247Sgjelinek  * key is address of vnode or amp.  Bounds lists known incore/non-incore
186*3247Sgjelinek  * bounds for vnode/amp.
187*3247Sgjelinek  */
188*3247Sgjelinek typedef struct vmu_object {
189*3247Sgjelinek 	struct vmu_object	*vmo_next;	/* free list */
190*3247Sgjelinek 	caddr_t		vmo_key;
191*3247Sgjelinek 	short		vmo_type;
192*3247Sgjelinek 	vmu_bound_t	*vmo_bounds;
193*3247Sgjelinek } vmu_object_t;
194*3247Sgjelinek 
195*3247Sgjelinek /*
196*3247Sgjelinek  * Entity by which to count results.
197*3247Sgjelinek  *
198*3247Sgjelinek  * The entity structure keeps the current rss/swap counts for each entity
199*3247Sgjelinek  * (zone, project, etc), and hashes of vm structures that have already
200*3247Sgjelinek  * been visited for the entity.
201*3247Sgjelinek  *
202*3247Sgjelinek  * vme_next:	links the list of all entities currently being counted by
203*3247Sgjelinek  *		vmu_calculate().
204*3247Sgjelinek  *
205*3247Sgjelinek  * vme_next_calc: links the list of entities related to the current process
206*3247Sgjelinek  *		 being counted by vmu_calculate_proc().
207*3247Sgjelinek  *
208*3247Sgjelinek  * vmu_calculate_proc() walks all processes.  For each process, it makes a
209*3247Sgjelinek  * list of the entities related to that process using vme_next_calc.  This
210*3247Sgjelinek  * list changes each time vmu_calculate_proc() is called.
211*3247Sgjelinek  *
212*3247Sgjelinek  */
213*3247Sgjelinek typedef struct vmu_entity {
214*3247Sgjelinek 	struct vmu_entity *vme_next;
215*3247Sgjelinek 	struct vmu_entity *vme_next_calc;
216*3247Sgjelinek 	mod_hash_t	*vme_vnode_hash; /* vnodes visited for entity */
217*3247Sgjelinek 	mod_hash_t	*vme_amp_hash;	 /* shared amps visited for entity */
218*3247Sgjelinek 	mod_hash_t	*vme_anon_hash;	 /* cow anons visited for entity */
219*3247Sgjelinek 	vmusage_t	vme_result;	 /* identifies entity and results */
220*3247Sgjelinek } vmu_entity_t;
221*3247Sgjelinek 
222*3247Sgjelinek /*
223*3247Sgjelinek  * Hash of entities visited within a zone, and an entity for the zone
224*3247Sgjelinek  * itself.
225*3247Sgjelinek  */
226*3247Sgjelinek typedef struct vmu_zone {
227*3247Sgjelinek 	struct vmu_zone	*vmz_next;	/* free list */
228*3247Sgjelinek 	id_t		vmz_id;
229*3247Sgjelinek 	vmu_entity_t	*vmz_zone;
230*3247Sgjelinek 	mod_hash_t	*vmz_projects_hash;
231*3247Sgjelinek 	mod_hash_t	*vmz_tasks_hash;
232*3247Sgjelinek 	mod_hash_t	*vmz_rusers_hash;
233*3247Sgjelinek 	mod_hash_t	*vmz_eusers_hash;
234*3247Sgjelinek } vmu_zone_t;
235*3247Sgjelinek 
236*3247Sgjelinek /*
237*3247Sgjelinek  * Cache of results from last calculation
238*3247Sgjelinek  */
239*3247Sgjelinek typedef struct vmu_cache {
240*3247Sgjelinek 	vmusage_t	*vmc_results;	/* Results from last call to */
241*3247Sgjelinek 					/* vm_getusage(). */
242*3247Sgjelinek 	uint64_t	vmc_nresults;	/* Count of cached results */
243*3247Sgjelinek 	uint64_t	vmc_refcnt;	/* refcnt for free */
244*3247Sgjelinek 	uint_t		vmc_flags;	/* Flags for vm_getusage() */
245*3247Sgjelinek 	hrtime_t	vmc_timestamp;	/* when cache was created */
246*3247Sgjelinek } vmu_cache_t;
247*3247Sgjelinek 
248*3247Sgjelinek /*
249*3247Sgjelinek  * top level rss info for the system
250*3247Sgjelinek  */
251*3247Sgjelinek typedef struct vmu_data {
252*3247Sgjelinek 	kmutex_t	vmu_lock;		/* Protects vmu_data */
253*3247Sgjelinek 	kcondvar_t	vmu_cv;			/* Used to signal threads */
254*3247Sgjelinek 						/* Waiting for */
255*3247Sgjelinek 						/* Rss_calc_thread to finish */
256*3247Sgjelinek 	vmu_entity_t	*vmu_system;		/* Entity for tracking */
257*3247Sgjelinek 						/* rss/swap for all processes */
258*3247Sgjelinek 						/* in all zones */
259*3247Sgjelinek 	mod_hash_t	*vmu_zones_hash;	/* Zones visited */
260*3247Sgjelinek 	mod_hash_t	*vmu_projects_col_hash; /* These *_col_hash hashes */
261*3247Sgjelinek 	mod_hash_t	*vmu_rusers_col_hash;	/* keep track of entities, */
262*3247Sgjelinek 	mod_hash_t	*vmu_eusers_col_hash;	/* ignoring zoneid, in order */
263*3247Sgjelinek 						/* to implement VMUSAGE_COL_* */
264*3247Sgjelinek 						/* flags, which aggregate by */
265*3247Sgjelinek 						/* project or user regardless */
266*3247Sgjelinek 						/* of zoneid. */
267*3247Sgjelinek 	mod_hash_t	*vmu_all_vnodes_hash;	/* System wide visited vnodes */
268*3247Sgjelinek 						/* to track incore/not-incore */
269*3247Sgjelinek 	mod_hash_t	*vmu_all_amps_hash;	/* System wide visited shared */
270*3247Sgjelinek 						/* amps to track incore/not- */
271*3247Sgjelinek 						/* incore */
272*3247Sgjelinek 	vmu_entity_t	*vmu_entities;		/* Linked list of entities */
273*3247Sgjelinek 	size_t		vmu_nentities;		/* Count of entities in list */
274*3247Sgjelinek 	vmu_cache_t	*vmu_cache;		/* Cached results */
275*3247Sgjelinek 	kthread_t	*vmu_calc_thread;	/* NULL, or thread running */
276*3247Sgjelinek 						/* vmu_calculate() */
277*3247Sgjelinek 	uint_t		vmu_calc_flags;		/* Flags being using by */
278*3247Sgjelinek 						/* currently running calc */
279*3247Sgjelinek 						/* thread */
280*3247Sgjelinek 	uint_t		vmu_pending_flags;	/* Flags of vm_getusage() */
281*3247Sgjelinek 						/* threads waiting for */
282*3247Sgjelinek 						/* calc thread to finish */
283*3247Sgjelinek 	uint_t		vmu_pending_waiters;	/* Number of threads waiting */
284*3247Sgjelinek 						/* for calc thread */
285*3247Sgjelinek 	vmu_bound_t	*vmu_free_bounds;
286*3247Sgjelinek 	vmu_object_t	*vmu_free_objects;
287*3247Sgjelinek 	vmu_entity_t	*vmu_free_entities;
288*3247Sgjelinek 	vmu_zone_t	*vmu_free_zones;
289*3247Sgjelinek } vmu_data_t;
290*3247Sgjelinek 
291*3247Sgjelinek extern struct as kas;
292*3247Sgjelinek extern proc_t *practive;
293*3247Sgjelinek extern zone_t *global_zone;
294*3247Sgjelinek extern struct seg_ops segvn_ops;
295*3247Sgjelinek extern struct seg_ops segspt_shmops;
296*3247Sgjelinek 
297*3247Sgjelinek static vmu_data_t vmu_data;
298*3247Sgjelinek static kmem_cache_t *vmu_bound_cache;
299*3247Sgjelinek static kmem_cache_t *vmu_object_cache;
300*3247Sgjelinek 
301*3247Sgjelinek /*
302*3247Sgjelinek  * Save a bound on the free list
303*3247Sgjelinek  */
304*3247Sgjelinek static void
305*3247Sgjelinek vmu_free_bound(vmu_bound_t *bound)
306*3247Sgjelinek {
307*3247Sgjelinek 	bound->vmb_next = vmu_data.vmu_free_bounds;
308*3247Sgjelinek 	vmu_data.vmu_free_bounds = bound;
309*3247Sgjelinek }
310*3247Sgjelinek 
311*3247Sgjelinek /*
312*3247Sgjelinek  * Free an object, and all visited bound info.
313*3247Sgjelinek  */
314*3247Sgjelinek static void
315*3247Sgjelinek vmu_free_object(mod_hash_val_t val)
316*3247Sgjelinek {
317*3247Sgjelinek 	vmu_object_t *obj = (vmu_object_t *)val;
318*3247Sgjelinek 	vmu_bound_t *bound = obj->vmo_bounds;
319*3247Sgjelinek 	vmu_bound_t *tmp;
320*3247Sgjelinek 
321*3247Sgjelinek 	while (bound != NULL) {
322*3247Sgjelinek 		tmp = bound;
323*3247Sgjelinek 		bound = bound->vmb_next;
324*3247Sgjelinek 		vmu_free_bound(tmp);
325*3247Sgjelinek 	}
326*3247Sgjelinek 	obj->vmo_next = vmu_data.vmu_free_objects;
327*3247Sgjelinek 	vmu_data.vmu_free_objects = obj;
328*3247Sgjelinek }
329*3247Sgjelinek 
330*3247Sgjelinek /*
331*3247Sgjelinek  * Free an entity, and hashes of visited objects for that entity.
332*3247Sgjelinek  */
333*3247Sgjelinek static void
334*3247Sgjelinek vmu_free_entity(mod_hash_val_t val)
335*3247Sgjelinek {
336*3247Sgjelinek 	vmu_entity_t *entity = (vmu_entity_t *)val;
337*3247Sgjelinek 
338*3247Sgjelinek 	if (entity->vme_vnode_hash != NULL)
339*3247Sgjelinek 		i_mod_hash_clear_nosync(entity->vme_vnode_hash);
340*3247Sgjelinek 	if (entity->vme_amp_hash != NULL)
341*3247Sgjelinek 		i_mod_hash_clear_nosync(entity->vme_amp_hash);
342*3247Sgjelinek 	if (entity->vme_anon_hash != NULL)
343*3247Sgjelinek 		i_mod_hash_clear_nosync(entity->vme_anon_hash);
344*3247Sgjelinek 
345*3247Sgjelinek 	entity->vme_next = vmu_data.vmu_free_entities;
346*3247Sgjelinek 	vmu_data.vmu_free_entities = entity;
347*3247Sgjelinek }
348*3247Sgjelinek 
349*3247Sgjelinek /*
350*3247Sgjelinek  * Free zone entity, and all hashes of entities inside that zone,
351*3247Sgjelinek  * which are projects, tasks, and users.
352*3247Sgjelinek  */
353*3247Sgjelinek static void
354*3247Sgjelinek vmu_free_zone(mod_hash_val_t val)
355*3247Sgjelinek {
356*3247Sgjelinek 	vmu_zone_t *zone = (vmu_zone_t *)val;
357*3247Sgjelinek 
358*3247Sgjelinek 	if (zone->vmz_zone != NULL) {
359*3247Sgjelinek 		vmu_free_entity((mod_hash_val_t)zone->vmz_zone);
360*3247Sgjelinek 		zone->vmz_zone = NULL;
361*3247Sgjelinek 	}
362*3247Sgjelinek 	if (zone->vmz_projects_hash != NULL)
363*3247Sgjelinek 		i_mod_hash_clear_nosync(zone->vmz_projects_hash);
364*3247Sgjelinek 	if (zone->vmz_tasks_hash != NULL)
365*3247Sgjelinek 		i_mod_hash_clear_nosync(zone->vmz_tasks_hash);
366*3247Sgjelinek 	if (zone->vmz_rusers_hash != NULL)
367*3247Sgjelinek 		i_mod_hash_clear_nosync(zone->vmz_rusers_hash);
368*3247Sgjelinek 	if (zone->vmz_eusers_hash != NULL)
369*3247Sgjelinek 		i_mod_hash_clear_nosync(zone->vmz_eusers_hash);
370*3247Sgjelinek 	zone->vmz_next = vmu_data.vmu_free_zones;
371*3247Sgjelinek 	vmu_data.vmu_free_zones = zone;
372*3247Sgjelinek }
373*3247Sgjelinek 
374*3247Sgjelinek /*
375*3247Sgjelinek  * Initialize synchronization primitives and hashes for system-wide tracking
376*3247Sgjelinek  * of visited vnodes and shared amps.  Initialize results cache.
377*3247Sgjelinek  */
378*3247Sgjelinek void
379*3247Sgjelinek vm_usage_init()
380*3247Sgjelinek {
381*3247Sgjelinek 	mutex_init(&vmu_data.vmu_lock, NULL, MUTEX_DEFAULT, NULL);
382*3247Sgjelinek 	cv_init(&vmu_data.vmu_cv, NULL, CV_DEFAULT, NULL);
383*3247Sgjelinek 
384*3247Sgjelinek 	vmu_data.vmu_system = NULL;
385*3247Sgjelinek 	vmu_data.vmu_zones_hash = NULL;
386*3247Sgjelinek 	vmu_data.vmu_projects_col_hash = NULL;
387*3247Sgjelinek 	vmu_data.vmu_rusers_col_hash = NULL;
388*3247Sgjelinek 	vmu_data.vmu_eusers_col_hash = NULL;
389*3247Sgjelinek 
390*3247Sgjelinek 	vmu_data.vmu_free_bounds = NULL;
391*3247Sgjelinek 	vmu_data.vmu_free_objects = NULL;
392*3247Sgjelinek 	vmu_data.vmu_free_entities = NULL;
393*3247Sgjelinek 	vmu_data.vmu_free_zones = NULL;
394*3247Sgjelinek 
395*3247Sgjelinek 	vmu_data.vmu_all_vnodes_hash = mod_hash_create_ptrhash(
396*3247Sgjelinek 	    "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object,
397*3247Sgjelinek 	    sizeof (vnode_t));
398*3247Sgjelinek 	vmu_data.vmu_all_amps_hash = mod_hash_create_ptrhash(
399*3247Sgjelinek 	    "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object,
400*3247Sgjelinek 	    sizeof (struct anon_map));
401*3247Sgjelinek 	vmu_data.vmu_projects_col_hash = mod_hash_create_idhash(
402*3247Sgjelinek 	    "vmusage collapsed project hash", VMUSAGE_HASH_SIZE,
403*3247Sgjelinek 	    vmu_free_entity);
404*3247Sgjelinek 	vmu_data.vmu_rusers_col_hash = mod_hash_create_idhash(
405*3247Sgjelinek 	    "vmusage collapsed ruser hash", VMUSAGE_HASH_SIZE,
406*3247Sgjelinek 	    vmu_free_entity);
407*3247Sgjelinek 	vmu_data.vmu_eusers_col_hash = mod_hash_create_idhash(
408*3247Sgjelinek 	    "vmusage collpased euser hash", VMUSAGE_HASH_SIZE,
409*3247Sgjelinek 	    vmu_free_entity);
410*3247Sgjelinek 	vmu_data.vmu_zones_hash = mod_hash_create_idhash(
411*3247Sgjelinek 	    "vmusage zone hash", VMUSAGE_HASH_SIZE, vmu_free_zone);
412*3247Sgjelinek 
413*3247Sgjelinek 	vmu_bound_cache = kmem_cache_create("vmu_bound_cache",
414*3247Sgjelinek 	    sizeof (vmu_bound_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
415*3247Sgjelinek 	vmu_object_cache = kmem_cache_create("vmu_object_cache",
416*3247Sgjelinek 	    sizeof (vmu_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
417*3247Sgjelinek 
418*3247Sgjelinek 	vmu_data.vmu_entities = NULL;
419*3247Sgjelinek 	vmu_data.vmu_nentities = 0;
420*3247Sgjelinek 
421*3247Sgjelinek 	vmu_data.vmu_cache = NULL;
422*3247Sgjelinek 	vmu_data.vmu_calc_thread = NULL;
423*3247Sgjelinek 	vmu_data.vmu_calc_flags = 0;
424*3247Sgjelinek 	vmu_data.vmu_pending_flags = 0;
425*3247Sgjelinek 	vmu_data.vmu_pending_waiters = 0;
426*3247Sgjelinek }
427*3247Sgjelinek 
428*3247Sgjelinek /*
429*3247Sgjelinek  * Allocate hashes for tracking vm objects visited for an entity.
430*3247Sgjelinek  * Update list of entities.
431*3247Sgjelinek  */
432*3247Sgjelinek static vmu_entity_t *
433*3247Sgjelinek vmu_alloc_entity(id_t id, int type, id_t zoneid)
434*3247Sgjelinek {
435*3247Sgjelinek 	vmu_entity_t *entity;
436*3247Sgjelinek 
437*3247Sgjelinek 	if (vmu_data.vmu_free_entities != NULL) {
438*3247Sgjelinek 		entity = vmu_data.vmu_free_entities;
439*3247Sgjelinek 		vmu_data.vmu_free_entities =
440*3247Sgjelinek 		    vmu_data.vmu_free_entities->vme_next;
441*3247Sgjelinek 		bzero(&entity->vme_result, sizeof (vmusage_t));
442*3247Sgjelinek 	} else {
443*3247Sgjelinek 		entity = kmem_zalloc(sizeof (vmu_entity_t), KM_SLEEP);
444*3247Sgjelinek 	}
445*3247Sgjelinek 	entity->vme_result.vmu_id = id;
446*3247Sgjelinek 	entity->vme_result.vmu_zoneid = zoneid;
447*3247Sgjelinek 	entity->vme_result.vmu_type = type;
448*3247Sgjelinek 
449*3247Sgjelinek 	if (entity->vme_vnode_hash == NULL)
450*3247Sgjelinek 		entity->vme_vnode_hash = mod_hash_create_ptrhash(
451*3247Sgjelinek 		    "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object,
452*3247Sgjelinek 		    sizeof (vnode_t));
453*3247Sgjelinek 
454*3247Sgjelinek 	if (entity->vme_amp_hash == NULL)
455*3247Sgjelinek 		entity->vme_amp_hash = mod_hash_create_ptrhash(
456*3247Sgjelinek 		    "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object,
457*3247Sgjelinek 		    sizeof (struct anon_map));
458*3247Sgjelinek 
459*3247Sgjelinek 	if (entity->vme_anon_hash == NULL)
460*3247Sgjelinek 		entity->vme_anon_hash = mod_hash_create_ptrhash(
461*3247Sgjelinek 		    "vmusage anon hash", VMUSAGE_HASH_SIZE,
462*3247Sgjelinek 		    mod_hash_null_valdtor, sizeof (struct anon));
463*3247Sgjelinek 
464*3247Sgjelinek 	entity->vme_next = vmu_data.vmu_entities;
465*3247Sgjelinek 	vmu_data.vmu_entities = entity;
466*3247Sgjelinek 	vmu_data.vmu_nentities++;
467*3247Sgjelinek 
468*3247Sgjelinek 	return (entity);
469*3247Sgjelinek }
470*3247Sgjelinek 
471*3247Sgjelinek /*
472*3247Sgjelinek  * Allocate a zone entity, and hashes for tracking visited vm objects
473*3247Sgjelinek  * for projects, tasks, and users within that zone.
474*3247Sgjelinek  */
475*3247Sgjelinek static vmu_zone_t *
476*3247Sgjelinek vmu_alloc_zone(id_t id)
477*3247Sgjelinek {
478*3247Sgjelinek 	vmu_zone_t *zone;
479*3247Sgjelinek 
480*3247Sgjelinek 	if (vmu_data.vmu_free_zones != NULL) {
481*3247Sgjelinek 		zone = vmu_data.vmu_free_zones;
482*3247Sgjelinek 		vmu_data.vmu_free_zones =
483*3247Sgjelinek 		    vmu_data.vmu_free_zones->vmz_next;
484*3247Sgjelinek 		zone->vmz_next = NULL;
485*3247Sgjelinek 		zone->vmz_zone = NULL;
486*3247Sgjelinek 	} else {
487*3247Sgjelinek 		zone = kmem_zalloc(sizeof (vmu_zone_t), KM_SLEEP);
488*3247Sgjelinek 	}
489*3247Sgjelinek 
490*3247Sgjelinek 	zone->vmz_id = id;
491*3247Sgjelinek 
492*3247Sgjelinek 	if ((vmu_data.vmu_calc_flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES)) != 0)
493*3247Sgjelinek 		zone->vmz_zone = vmu_alloc_entity(id, VMUSAGE_ZONE, id);
494*3247Sgjelinek 
495*3247Sgjelinek 	if ((vmu_data.vmu_calc_flags & (VMUSAGE_PROJECTS |
496*3247Sgjelinek 	    VMUSAGE_ALL_PROJECTS)) != 0 && zone->vmz_projects_hash == NULL)
497*3247Sgjelinek 		zone->vmz_projects_hash = mod_hash_create_idhash(
498*3247Sgjelinek 		    "vmusage project hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
499*3247Sgjelinek 
500*3247Sgjelinek 	if ((vmu_data.vmu_calc_flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS))
501*3247Sgjelinek 	    != 0 && zone->vmz_tasks_hash == NULL)
502*3247Sgjelinek 		zone->vmz_tasks_hash = mod_hash_create_idhash(
503*3247Sgjelinek 		    "vmusage task hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
504*3247Sgjelinek 
505*3247Sgjelinek 	if ((vmu_data.vmu_calc_flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS))
506*3247Sgjelinek 	    != 0 && zone->vmz_rusers_hash == NULL)
507*3247Sgjelinek 		zone->vmz_rusers_hash = mod_hash_create_idhash(
508*3247Sgjelinek 		    "vmusage ruser hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
509*3247Sgjelinek 
510*3247Sgjelinek 	if ((vmu_data.vmu_calc_flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS))
511*3247Sgjelinek 	    != 0 && zone->vmz_eusers_hash == NULL)
512*3247Sgjelinek 		zone->vmz_eusers_hash = mod_hash_create_idhash(
513*3247Sgjelinek 		    "vmusage euser hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
514*3247Sgjelinek 
515*3247Sgjelinek 	return (zone);
516*3247Sgjelinek }
517*3247Sgjelinek 
518*3247Sgjelinek /*
519*3247Sgjelinek  * Allocate a structure for tracking visited bounds for a vm object.
520*3247Sgjelinek  */
521*3247Sgjelinek static vmu_object_t *
522*3247Sgjelinek vmu_alloc_object(caddr_t key, int type)
523*3247Sgjelinek {
524*3247Sgjelinek 	vmu_object_t *object;
525*3247Sgjelinek 
526*3247Sgjelinek 	if (vmu_data.vmu_free_objects != NULL) {
527*3247Sgjelinek 		object = vmu_data.vmu_free_objects;
528*3247Sgjelinek 		vmu_data.vmu_free_objects =
529*3247Sgjelinek 		    vmu_data.vmu_free_objects->vmo_next;
530*3247Sgjelinek 	} else {
531*3247Sgjelinek 		object = kmem_cache_alloc(vmu_object_cache, KM_SLEEP);
532*3247Sgjelinek 	}
533*3247Sgjelinek 
534*3247Sgjelinek 	object->vmo_key = key;
535*3247Sgjelinek 	object->vmo_type = type;
536*3247Sgjelinek 	object->vmo_bounds = NULL;
537*3247Sgjelinek 
538*3247Sgjelinek 	return (object);
539*3247Sgjelinek }
540*3247Sgjelinek 
541*3247Sgjelinek /*
542*3247Sgjelinek  * Allocate and return a bound structure.
543*3247Sgjelinek  */
544*3247Sgjelinek static vmu_bound_t *
545*3247Sgjelinek vmu_alloc_bound()
546*3247Sgjelinek {
547*3247Sgjelinek 	vmu_bound_t *bound;
548*3247Sgjelinek 
549*3247Sgjelinek 	if (vmu_data.vmu_free_bounds != NULL) {
550*3247Sgjelinek 		bound = vmu_data.vmu_free_bounds;
551*3247Sgjelinek 		vmu_data.vmu_free_bounds =
552*3247Sgjelinek 		    vmu_data.vmu_free_bounds->vmb_next;
553*3247Sgjelinek 		bzero(bound, sizeof (vmu_bound_t));
554*3247Sgjelinek 	} else {
555*3247Sgjelinek 		bound = kmem_cache_alloc(vmu_bound_cache, KM_SLEEP);
556*3247Sgjelinek 		bzero(bound, sizeof (vmu_bound_t));
557*3247Sgjelinek 	}
558*3247Sgjelinek 	return (bound);
559*3247Sgjelinek }
560*3247Sgjelinek 
561*3247Sgjelinek /*
562*3247Sgjelinek  * vmu_find_insert_* functions implement hash lookup or allocate and
563*3247Sgjelinek  * insert operations.
564*3247Sgjelinek  */
565*3247Sgjelinek static vmu_object_t *
566*3247Sgjelinek vmu_find_insert_object(mod_hash_t *hash, caddr_t key, uint_t type)
567*3247Sgjelinek {
568*3247Sgjelinek 	int ret;
569*3247Sgjelinek 	vmu_object_t *object;
570*3247Sgjelinek 
571*3247Sgjelinek 	ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key,
572*3247Sgjelinek 	    (mod_hash_val_t *)&object);
573*3247Sgjelinek 	if (ret != 0) {
574*3247Sgjelinek 		object = vmu_alloc_object(key, type);
575*3247Sgjelinek 		ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key,
576*3247Sgjelinek 		    (mod_hash_val_t)object, (mod_hash_hndl_t)0);
577*3247Sgjelinek 		ASSERT(ret == 0);
578*3247Sgjelinek 	}
579*3247Sgjelinek 	return (object);
580*3247Sgjelinek }
581*3247Sgjelinek 
582*3247Sgjelinek static int
583*3247Sgjelinek vmu_find_insert_anon(mod_hash_t *hash, caddr_t key)
584*3247Sgjelinek {
585*3247Sgjelinek 	int ret;
586*3247Sgjelinek 	caddr_t val;
587*3247Sgjelinek 
588*3247Sgjelinek 	ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key,
589*3247Sgjelinek 	    (mod_hash_val_t *)&val);
590*3247Sgjelinek 
591*3247Sgjelinek 	if (ret == 0)
592*3247Sgjelinek 		return (0);
593*3247Sgjelinek 
594*3247Sgjelinek 	ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key,
595*3247Sgjelinek 	    (mod_hash_val_t)key, (mod_hash_hndl_t)0);
596*3247Sgjelinek 
597*3247Sgjelinek 	ASSERT(ret == 0);
598*3247Sgjelinek 
599*3247Sgjelinek 	return (1);
600*3247Sgjelinek }
601*3247Sgjelinek 
602*3247Sgjelinek static vmu_entity_t *
603*3247Sgjelinek vmu_find_insert_entity(mod_hash_t *hash, id_t id, uint_t type, id_t zoneid)
604*3247Sgjelinek {
605*3247Sgjelinek 	int ret;
606*3247Sgjelinek 	vmu_entity_t *entity;
607*3247Sgjelinek 
608*3247Sgjelinek 	ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)(uintptr_t)id,
609*3247Sgjelinek 	    (mod_hash_val_t *)&entity);
610*3247Sgjelinek 	if (ret != 0) {
611*3247Sgjelinek 		entity = vmu_alloc_entity(id, type, zoneid);
612*3247Sgjelinek 		ret = i_mod_hash_insert_nosync(hash,
613*3247Sgjelinek 		    (mod_hash_key_t)(uintptr_t)id, (mod_hash_val_t)entity,
614*3247Sgjelinek 		    (mod_hash_hndl_t)0);
615*3247Sgjelinek 		ASSERT(ret == 0);
616*3247Sgjelinek 	}
617*3247Sgjelinek 	return (entity);
618*3247Sgjelinek }
619*3247Sgjelinek 
620*3247Sgjelinek 
621*3247Sgjelinek 
622*3247Sgjelinek 
623*3247Sgjelinek /*
624*3247Sgjelinek  * Returns list of object bounds between start and end.  New bounds inserted
625*3247Sgjelinek  * by this call are given type.
626*3247Sgjelinek  *
627*3247Sgjelinek  * Returns the number of pages covered if new bounds are created.  Returns 0
628*3247Sgjelinek  * if region between start/end consists of all existing bounds.
629*3247Sgjelinek  */
630*3247Sgjelinek static pgcnt_t
631*3247Sgjelinek vmu_insert_lookup_object_bounds(vmu_object_t *ro, pgcnt_t start, pgcnt_t
632*3247Sgjelinek     end, char type, vmu_bound_t **first, vmu_bound_t **last)
633*3247Sgjelinek {
634*3247Sgjelinek 	vmu_bound_t *next;
635*3247Sgjelinek 	vmu_bound_t *prev = NULL;
636*3247Sgjelinek 	vmu_bound_t *tmp = NULL;
637*3247Sgjelinek 	pgcnt_t ret = 0;
638*3247Sgjelinek 
639*3247Sgjelinek 	*first = *last = NULL;
640*3247Sgjelinek 
641*3247Sgjelinek 	for (next = ro->vmo_bounds; next != NULL; next = next->vmb_next) {
642*3247Sgjelinek 		/*
643*3247Sgjelinek 		 * Find bounds overlapping or overlapped by range [start,end].
644*3247Sgjelinek 		 */
645*3247Sgjelinek 		if (start > next->vmb_end) {
646*3247Sgjelinek 			/* bound is before new bound */
647*3247Sgjelinek 			prev = next;
648*3247Sgjelinek 			continue;
649*3247Sgjelinek 		}
650*3247Sgjelinek 		if (next->vmb_start > end) {
651*3247Sgjelinek 			/* bound is after new bound */
652*3247Sgjelinek 			break;
653*3247Sgjelinek 		}
654*3247Sgjelinek 		if (*first == NULL)
655*3247Sgjelinek 			*first = next;
656*3247Sgjelinek 		*last = next;
657*3247Sgjelinek 	}
658*3247Sgjelinek 
659*3247Sgjelinek 	if (*first == NULL) {
660*3247Sgjelinek 		ASSERT(*last == NULL);
661*3247Sgjelinek 		/*
662*3247Sgjelinek 		 * No bounds overlapping range [start,end], so create new
663*3247Sgjelinek 		 * bound
664*3247Sgjelinek 		 */
665*3247Sgjelinek 		tmp = vmu_alloc_bound();
666*3247Sgjelinek 		tmp->vmb_start = start;
667*3247Sgjelinek 		tmp->vmb_end = end;
668*3247Sgjelinek 		tmp->vmb_type = type;
669*3247Sgjelinek 		if (prev == NULL) {
670*3247Sgjelinek 			tmp->vmb_next = ro->vmo_bounds;
671*3247Sgjelinek 			ro->vmo_bounds = tmp;
672*3247Sgjelinek 		} else {
673*3247Sgjelinek 			tmp->vmb_next = prev->vmb_next;
674*3247Sgjelinek 			prev->vmb_next = tmp;
675*3247Sgjelinek 		}
676*3247Sgjelinek 		*first = tmp;
677*3247Sgjelinek 		*last = tmp;
678*3247Sgjelinek 		ASSERT(tmp->vmb_end >= tmp->vmb_start);
679*3247Sgjelinek 		ret = tmp->vmb_end - tmp->vmb_start + 1;
680*3247Sgjelinek 		return (ret);
681*3247Sgjelinek 	}
682*3247Sgjelinek 
683*3247Sgjelinek 	/* Check to see if start is before first known bound */
684*3247Sgjelinek 	ASSERT(first != NULL && last != NULL);
685*3247Sgjelinek 	next = (*first);
686*3247Sgjelinek 	if (start < (*first)->vmb_start) {
687*3247Sgjelinek 		/* Create new bound before first bound */
688*3247Sgjelinek 		tmp = vmu_alloc_bound();
689*3247Sgjelinek 		tmp->vmb_start = start;
690*3247Sgjelinek 		tmp->vmb_end = (*first)->vmb_start - 1;
691*3247Sgjelinek 		tmp->vmb_type = type;
692*3247Sgjelinek 		tmp->vmb_next = *first;
693*3247Sgjelinek 		if (*first == ro->vmo_bounds)
694*3247Sgjelinek 			ro->vmo_bounds = tmp;
695*3247Sgjelinek 		if (prev != NULL)
696*3247Sgjelinek 			prev->vmb_next = tmp;
697*3247Sgjelinek 		ASSERT(tmp->vmb_end >= tmp->vmb_start);
698*3247Sgjelinek 		ret += tmp->vmb_end - tmp->vmb_start + 1;
699*3247Sgjelinek 		*first = tmp;
700*3247Sgjelinek 	}
701*3247Sgjelinek 	/*
702*3247Sgjelinek 	 * Between start and end, search for gaps between and after existing
703*3247Sgjelinek 	 * bounds.  Create new bounds to fill gaps if they exist.
704*3247Sgjelinek 	 */
705*3247Sgjelinek 	while (end > next->vmb_end) {
706*3247Sgjelinek 		/*
707*3247Sgjelinek 		 * Check for gap between bound and next bound. if no gap,
708*3247Sgjelinek 		 * continue.
709*3247Sgjelinek 		 */
710*3247Sgjelinek 		if ((next != *last) &&
711*3247Sgjelinek 		    ((next->vmb_end + 1) == next->vmb_next->vmb_start)) {
712*3247Sgjelinek 			next = next->vmb_next;
713*3247Sgjelinek 			continue;
714*3247Sgjelinek 		}
715*3247Sgjelinek 		/*
716*3247Sgjelinek 		 * Insert new bound in gap after bound, and before next
717*3247Sgjelinek 		 * bound if next bound exists.
718*3247Sgjelinek 		 */
719*3247Sgjelinek 		tmp = vmu_alloc_bound();
720*3247Sgjelinek 		tmp->vmb_type = type;
721*3247Sgjelinek 		tmp->vmb_next = next->vmb_next;
722*3247Sgjelinek 		tmp->vmb_start = next->vmb_end + 1;
723*3247Sgjelinek 
724*3247Sgjelinek 		if (next != *last) {
725*3247Sgjelinek 			tmp->vmb_end = next->vmb_next->vmb_start - 1;
726*3247Sgjelinek 			ASSERT(tmp->vmb_end >= tmp->vmb_start);
727*3247Sgjelinek 			ret += tmp->vmb_end - tmp->vmb_start + 1;
728*3247Sgjelinek 			next->vmb_next = tmp;
729*3247Sgjelinek 			next = tmp->vmb_next;
730*3247Sgjelinek 		} else {
731*3247Sgjelinek 			tmp->vmb_end = end;
732*3247Sgjelinek 			ASSERT(tmp->vmb_end >= tmp->vmb_start);
733*3247Sgjelinek 			ret += tmp->vmb_end - tmp->vmb_start + 1;
734*3247Sgjelinek 			next->vmb_next = tmp;
735*3247Sgjelinek 			*last = tmp;
736*3247Sgjelinek 			break;
737*3247Sgjelinek 		}
738*3247Sgjelinek 	}
739*3247Sgjelinek 	return (ret);
740*3247Sgjelinek }
741*3247Sgjelinek 
742*3247Sgjelinek /*
743*3247Sgjelinek  * vmu_update_bounds()
744*3247Sgjelinek  *
745*3247Sgjelinek  * first, last:	list of continuous bounds, of which zero or more are of
746*3247Sgjelinek  * 		type VMUSAGE_BOUND_UNKNOWN.
747*3247Sgjelinek  *
748*3247Sgjelinek  * new_first, new_last:	list of continuous bounds, of which none are of
749*3247Sgjelinek  *			type VMUSAGE_BOUND_UNKNOWN.  These bounds are used to
750*3247Sgjelinek  *			update the types of bounds in (first,last) with
751*3247Sgjelinek  *			type VMUSAGE_BOUND_UNKNOWN.
752*3247Sgjelinek  *
753*3247Sgjelinek  * For the list of bounds (first,last), this function updates any bounds
754*3247Sgjelinek  * with type VMUSAGE_BOUND_UNKNOWN using the type of the corresponding bound in
755*3247Sgjelinek  * the list (new_first, new_last).
756*3247Sgjelinek  *
757*3247Sgjelinek  * If a bound of type VMUSAGE_BOUND_UNKNOWN spans multiple bounds in the list
758*3247Sgjelinek  * (new_first, new_last), it will be split into multiple bounds.
759*3247Sgjelinek  *
760*3247Sgjelinek  * Return value:
761*3247Sgjelinek  * 	The number of pages in the list of bounds (first,last) that were of
762*3247Sgjelinek  *	type VMUSAGE_BOUND_UNKNOWN, which have been updated to be of type
763*3247Sgjelinek  *	VMUSAGE_BOUND_INCORE.
764*3247Sgjelinek  *
765*3247Sgjelinek  */
766*3247Sgjelinek static pgcnt_t
767*3247Sgjelinek vmu_update_bounds(vmu_bound_t **first, vmu_bound_t **last,
768*3247Sgjelinek     vmu_bound_t *new_first, vmu_bound_t *new_last)
769*3247Sgjelinek {
770*3247Sgjelinek 	vmu_bound_t *next, *new_next, *tmp;
771*3247Sgjelinek 	pgcnt_t rss = 0;
772*3247Sgjelinek 
773*3247Sgjelinek 	next = *first;
774*3247Sgjelinek 	new_next = new_first;
775*3247Sgjelinek 
776*3247Sgjelinek 	/* verify bounds span same pages */
777*3247Sgjelinek 	ASSERT((*first)->vmb_start >= new_next->vmb_start);
778*3247Sgjelinek 	ASSERT((*last)->vmb_end <= new_last->vmb_end);
779*3247Sgjelinek 	for (;;) {
780*3247Sgjelinek 		/* If bound already has type, proceed to next bound */
781*3247Sgjelinek 		if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
782*3247Sgjelinek 			if (next == *last)
783*3247Sgjelinek 				break;
784*3247Sgjelinek 			next = next->vmb_next;
785*3247Sgjelinek 			continue;
786*3247Sgjelinek 		}
787*3247Sgjelinek 		while (new_next->vmb_end < next->vmb_start)
788*3247Sgjelinek 			new_next = new_next->vmb_next;
789*3247Sgjelinek 		ASSERT(new_next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
790*3247Sgjelinek 		next->vmb_type = new_next->vmb_type;
791*3247Sgjelinek 		if (new_next->vmb_end < next->vmb_end) {
792*3247Sgjelinek 			/* need to split bound */
793*3247Sgjelinek 			tmp = vmu_alloc_bound();
794*3247Sgjelinek 			tmp->vmb_type = VMUSAGE_BOUND_UNKNOWN;
795*3247Sgjelinek 			tmp->vmb_start = new_next->vmb_end + 1;
796*3247Sgjelinek 			tmp->vmb_end = next->vmb_end;
797*3247Sgjelinek 			tmp->vmb_next = next->vmb_next;
798*3247Sgjelinek 			next->vmb_end = new_next->vmb_end;
799*3247Sgjelinek 			next->vmb_next = tmp;
800*3247Sgjelinek 			if (*last == next)
801*3247Sgjelinek 				*last = tmp;
802*3247Sgjelinek 			if (next->vmb_type == VMUSAGE_BOUND_INCORE)
803*3247Sgjelinek 				rss += next->vmb_end - next->vmb_start + 1;
804*3247Sgjelinek 			next = tmp;
805*3247Sgjelinek 		} else {
806*3247Sgjelinek 			if (next->vmb_type == VMUSAGE_BOUND_INCORE)
807*3247Sgjelinek 				rss += next->vmb_end - next->vmb_start + 1;
808*3247Sgjelinek 			if (next == *last)
809*3247Sgjelinek 				break;
810*3247Sgjelinek 			next = next->vmb_next;
811*3247Sgjelinek 		}
812*3247Sgjelinek 	}
813*3247Sgjelinek 	return (rss);
814*3247Sgjelinek }
815*3247Sgjelinek 
816*3247Sgjelinek /*
817*3247Sgjelinek  * merges adjacent bounds with same type between first and last bound.
818*3247Sgjelinek  * After merge, last pointer is no longer valid, as last bound may be
819*3247Sgjelinek  * merged away.
820*3247Sgjelinek  */
821*3247Sgjelinek static void
822*3247Sgjelinek vmu_merge_bounds(vmu_bound_t **first, vmu_bound_t **last)
823*3247Sgjelinek {
824*3247Sgjelinek 	vmu_bound_t *next;
825*3247Sgjelinek 	vmu_bound_t *tmp;
826*3247Sgjelinek 
827*3247Sgjelinek 	ASSERT(*first != NULL);
828*3247Sgjelinek 	ASSERT(*last != NULL);
829*3247Sgjelinek 
830*3247Sgjelinek 	next = *first;
831*3247Sgjelinek 	while (next != *last) {
832*3247Sgjelinek 
833*3247Sgjelinek 		/* If bounds are adjacent and have same type, merge them */
834*3247Sgjelinek 		if (((next->vmb_end + 1) == next->vmb_next->vmb_start) &&
835*3247Sgjelinek 		    (next->vmb_type == next->vmb_next->vmb_type)) {
836*3247Sgjelinek 			tmp = next->vmb_next;
837*3247Sgjelinek 			next->vmb_end = tmp->vmb_end;
838*3247Sgjelinek 			next->vmb_next = tmp->vmb_next;
839*3247Sgjelinek 			vmu_free_bound(tmp);
840*3247Sgjelinek 			if (tmp == *last)
841*3247Sgjelinek 				*last = next;
842*3247Sgjelinek 		} else {
843*3247Sgjelinek 			next = next->vmb_next;
844*3247Sgjelinek 		}
845*3247Sgjelinek 	}
846*3247Sgjelinek }
847*3247Sgjelinek 
848*3247Sgjelinek /*
849*3247Sgjelinek  * Given an amp and a list of bounds, updates each bound's type with
850*3247Sgjelinek  * VMUSAGE_BOUND_INCORE or VMUSAGE_BOUND_NOT_INCORE.
851*3247Sgjelinek  *
852*3247Sgjelinek  * If a bound is partially incore, it will be split into two bounds.
853*3247Sgjelinek  * first and last may be modified, as bounds may be split into multiple
854*3247Sgjelinek  * bounds if the are partially incore/not-incore.
855*3247Sgjelinek  *
856*3247Sgjelinek  * Set incore to non-zero if bounds are already known to be incore
857*3247Sgjelinek  *
858*3247Sgjelinek  */
859*3247Sgjelinek static void
860*3247Sgjelinek vmu_amp_update_incore_bounds(struct anon_map *amp, vmu_bound_t **first,
861*3247Sgjelinek     vmu_bound_t **last, boolean_t incore)
862*3247Sgjelinek {
863*3247Sgjelinek 	vmu_bound_t *next;
864*3247Sgjelinek 	vmu_bound_t *tmp;
865*3247Sgjelinek 	pgcnt_t index;
866*3247Sgjelinek 	short bound_type;
867*3247Sgjelinek 	short page_type;
868*3247Sgjelinek 	vnode_t *vn;
869*3247Sgjelinek 	anoff_t off;
870*3247Sgjelinek 	struct anon *ap;
871*3247Sgjelinek 
872*3247Sgjelinek 	next = *first;
873*3247Sgjelinek 	/* Shared anon slots don't change once set */
874*3247Sgjelinek 	ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
875*3247Sgjelinek 	for (;;) {
876*3247Sgjelinek 		if (incore == B_TRUE)
877*3247Sgjelinek 			next->vmb_type = VMUSAGE_BOUND_INCORE;
878*3247Sgjelinek 
879*3247Sgjelinek 		if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
880*3247Sgjelinek 			if (next == *last)
881*3247Sgjelinek 				break;
882*3247Sgjelinek 			next = next->vmb_next;
883*3247Sgjelinek 			continue;
884*3247Sgjelinek 		}
885*3247Sgjelinek 		bound_type = next->vmb_type;
886*3247Sgjelinek 		index = next->vmb_start;
887*3247Sgjelinek 		while (index <= next->vmb_end) {
888*3247Sgjelinek 
889*3247Sgjelinek 			/*
890*3247Sgjelinek 			 * These are used to determine how much to increment
891*3247Sgjelinek 			 * index when a large page is found.
892*3247Sgjelinek 			 */
893*3247Sgjelinek 			page_t *page;
894*3247Sgjelinek 			pgcnt_t pgcnt = 1;
895*3247Sgjelinek 			uint_t pgshft;
896*3247Sgjelinek 			pgcnt_t pgmsk;
897*3247Sgjelinek 
898*3247Sgjelinek 			ap = anon_get_ptr(amp->ahp, index);
899*3247Sgjelinek 			if (ap != NULL)
900*3247Sgjelinek 				swap_xlate(ap, &vn, &off);
901*3247Sgjelinek 
902*3247Sgjelinek 			if (ap != NULL && vn != NULL && vn->v_pages != NULL &&
903*3247Sgjelinek 			    (page = page_exists(vn, off)) != NULL) {
904*3247Sgjelinek 				page_type = VMUSAGE_BOUND_INCORE;
905*3247Sgjelinek 				if (page->p_szc > 0) {
906*3247Sgjelinek 					pgcnt = page_get_pagecnt(page->p_szc);
907*3247Sgjelinek 					pgshft = page_get_shift(page->p_szc);
908*3247Sgjelinek 					pgmsk = (0x1 << (pgshft - PAGESHIFT))
909*3247Sgjelinek 					    - 1;
910*3247Sgjelinek 				}
911*3247Sgjelinek 			} else {
912*3247Sgjelinek 				page_type = VMUSAGE_BOUND_NOT_INCORE;
913*3247Sgjelinek 			}
914*3247Sgjelinek 			if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
915*3247Sgjelinek 				next->vmb_type = page_type;
916*3247Sgjelinek 			} else if (next->vmb_type != page_type) {
917*3247Sgjelinek 				/*
918*3247Sgjelinek 				 * if current bound type does not match page
919*3247Sgjelinek 				 * type, need to split off new bound.
920*3247Sgjelinek 				 */
921*3247Sgjelinek 				tmp = vmu_alloc_bound();
922*3247Sgjelinek 				tmp->vmb_type = page_type;
923*3247Sgjelinek 				tmp->vmb_start = index;
924*3247Sgjelinek 				tmp->vmb_end = next->vmb_end;
925*3247Sgjelinek 				tmp->vmb_next = next->vmb_next;
926*3247Sgjelinek 				next->vmb_end = index - 1;
927*3247Sgjelinek 				next->vmb_next = tmp;
928*3247Sgjelinek 				if (*last == next)
929*3247Sgjelinek 					*last = tmp;
930*3247Sgjelinek 				next = tmp;
931*3247Sgjelinek 			}
932*3247Sgjelinek 			if (pgcnt > 1) {
933*3247Sgjelinek 				/*
934*3247Sgjelinek 				 * If inside large page, jump to next large
935*3247Sgjelinek 				 * page
936*3247Sgjelinek 				 */
937*3247Sgjelinek 				index = (index & ~pgmsk) + pgcnt;
938*3247Sgjelinek 			} else {
939*3247Sgjelinek 				index++;
940*3247Sgjelinek 			}
941*3247Sgjelinek 		}
942*3247Sgjelinek 		if (next == *last) {
943*3247Sgjelinek 			ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
944*3247Sgjelinek 			break;
945*3247Sgjelinek 		} else
946*3247Sgjelinek 			next = next->vmb_next;
947*3247Sgjelinek 	}
948*3247Sgjelinek 	ANON_LOCK_EXIT(&amp->a_rwlock);
949*3247Sgjelinek }
950*3247Sgjelinek 
951*3247Sgjelinek /*
952*3247Sgjelinek  * Same as vmu_amp_update_incore_bounds(), except for tracking
953*3247Sgjelinek  * incore-/not-incore for vnodes.
954*3247Sgjelinek  */
955*3247Sgjelinek static void
956*3247Sgjelinek vmu_vnode_update_incore_bounds(vnode_t *vnode, vmu_bound_t **first,
957*3247Sgjelinek     vmu_bound_t **last)
958*3247Sgjelinek {
959*3247Sgjelinek 	vmu_bound_t *next;
960*3247Sgjelinek 	vmu_bound_t *tmp;
961*3247Sgjelinek 	pgcnt_t index;
962*3247Sgjelinek 	short bound_type;
963*3247Sgjelinek 	short page_type;
964*3247Sgjelinek 
965*3247Sgjelinek 	next = *first;
966*3247Sgjelinek 	for (;;) {
967*3247Sgjelinek 		if (vnode->v_pages == NULL)
968*3247Sgjelinek 			next->vmb_type = VMUSAGE_BOUND_NOT_INCORE;
969*3247Sgjelinek 
970*3247Sgjelinek 		if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
971*3247Sgjelinek 			if (next == *last)
972*3247Sgjelinek 				break;
973*3247Sgjelinek 			next = next->vmb_next;
974*3247Sgjelinek 			continue;
975*3247Sgjelinek 		}
976*3247Sgjelinek 
977*3247Sgjelinek 		bound_type = next->vmb_type;
978*3247Sgjelinek 		index = next->vmb_start;
979*3247Sgjelinek 		while (index <= next->vmb_end) {
980*3247Sgjelinek 
981*3247Sgjelinek 			/*
982*3247Sgjelinek 			 * These are used to determine how much to increment
983*3247Sgjelinek 			 * index when a large page is found.
984*3247Sgjelinek 			 */
985*3247Sgjelinek 			page_t *page;
986*3247Sgjelinek 			pgcnt_t pgcnt = 1;
987*3247Sgjelinek 			uint_t pgshft;
988*3247Sgjelinek 			pgcnt_t pgmsk;
989*3247Sgjelinek 
990*3247Sgjelinek 			if (vnode->v_pages != NULL &&
991*3247Sgjelinek 			    (page = page_exists(vnode, ptob(index))) != NULL) {
992*3247Sgjelinek 				page_type = VMUSAGE_BOUND_INCORE;
993*3247Sgjelinek 				if (page->p_szc > 0) {
994*3247Sgjelinek 					pgcnt = page_get_pagecnt(page->p_szc);
995*3247Sgjelinek 					pgshft = page_get_shift(page->p_szc);
996*3247Sgjelinek 					pgmsk = (0x1 << (pgshft - PAGESHIFT))
997*3247Sgjelinek 					    - 1;
998*3247Sgjelinek 				}
999*3247Sgjelinek 			} else {
1000*3247Sgjelinek 				page_type = VMUSAGE_BOUND_NOT_INCORE;
1001*3247Sgjelinek 			}
1002*3247Sgjelinek 			if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
1003*3247Sgjelinek 				next->vmb_type = page_type;
1004*3247Sgjelinek 			} else if (next->vmb_type != page_type) {
1005*3247Sgjelinek 				/*
1006*3247Sgjelinek 				 * if current bound type does not match page
1007*3247Sgjelinek 				 * type, need to split off new bound.
1008*3247Sgjelinek 				 */
1009*3247Sgjelinek 				tmp = vmu_alloc_bound();
1010*3247Sgjelinek 				tmp->vmb_type = page_type;
1011*3247Sgjelinek 				tmp->vmb_start = index;
1012*3247Sgjelinek 				tmp->vmb_end = next->vmb_end;
1013*3247Sgjelinek 				tmp->vmb_next = next->vmb_next;
1014*3247Sgjelinek 				next->vmb_end = index - 1;
1015*3247Sgjelinek 				next->vmb_next = tmp;
1016*3247Sgjelinek 				if (*last == next)
1017*3247Sgjelinek 					*last = tmp;
1018*3247Sgjelinek 				next = tmp;
1019*3247Sgjelinek 			}
1020*3247Sgjelinek 			if (pgcnt > 1) {
1021*3247Sgjelinek 				/*
1022*3247Sgjelinek 				 * If inside large page, jump to next large
1023*3247Sgjelinek 				 * page
1024*3247Sgjelinek 				 */
1025*3247Sgjelinek 				index = (index & ~pgmsk) + pgcnt;
1026*3247Sgjelinek 			} else {
1027*3247Sgjelinek 				index++;
1028*3247Sgjelinek 			}
1029*3247Sgjelinek 		}
1030*3247Sgjelinek 		if (next == *last) {
1031*3247Sgjelinek 			ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
1032*3247Sgjelinek 			break;
1033*3247Sgjelinek 		} else
1034*3247Sgjelinek 			next = next->vmb_next;
1035*3247Sgjelinek 	}
1036*3247Sgjelinek }
1037*3247Sgjelinek 
1038*3247Sgjelinek /*
1039*3247Sgjelinek  * Calculate the rss and swap consumed by a segment.  vmu_entities is the
1040*3247Sgjelinek  * list of entities to visit.  For shared segments, the vnode or amp
1041*3247Sgjelinek  * is looked up in each entity to see if has been already counted.  Private
1042*3247Sgjelinek  * anon pages are checked per entity to ensure that cow pages are not
1043*3247Sgjelinek  * double counted.
1044*3247Sgjelinek  *
1045*3247Sgjelinek  * For private mapped files, first the amp is checked for private pages.
1046*3247Sgjelinek  * Bounds not backed by the amp are looked up in the vnode for each entity
1047*3247Sgjelinek  * to avoid double counting of private COW vnode pages.
1048*3247Sgjelinek  */
1049*3247Sgjelinek static void
1050*3247Sgjelinek vmu_calculate_seg(vmu_entity_t *vmu_entities, struct seg *seg)
1051*3247Sgjelinek {
1052*3247Sgjelinek 	struct segvn_data *svd;
1053*3247Sgjelinek 	struct shm_data *shmd;
1054*3247Sgjelinek 	struct spt_data *sptd;
1055*3247Sgjelinek 	vmu_object_t *shared_object = NULL;
1056*3247Sgjelinek 	vmu_object_t *entity_object = NULL;
1057*3247Sgjelinek 	vmu_entity_t *entity;
1058*3247Sgjelinek 	vmusage_t *result;
1059*3247Sgjelinek 	vmu_bound_t *first = NULL;
1060*3247Sgjelinek 	vmu_bound_t *last = NULL;
1061*3247Sgjelinek 	vmu_bound_t *cur = NULL;
1062*3247Sgjelinek 	vmu_bound_t *e_first = NULL;
1063*3247Sgjelinek 	vmu_bound_t *e_last = NULL;
1064*3247Sgjelinek 	vmu_bound_t *tmp;
1065*3247Sgjelinek 	pgcnt_t p_index, s_index, p_start, p_end, s_start, s_end, rss, virt;
1066*3247Sgjelinek 	struct anon_map *private_amp = NULL;
1067*3247Sgjelinek 	boolean_t incore = B_FALSE;
1068*3247Sgjelinek 	boolean_t shared = B_FALSE;
1069*3247Sgjelinek 	int file = 0;
1070*3247Sgjelinek 	pgcnt_t swresv = 0;
1071*3247Sgjelinek 	pgcnt_t panon = 0;
1072*3247Sgjelinek 
1073*3247Sgjelinek 	/* Can zero-length segments exist?  Not sure, so parenoia */
1074*3247Sgjelinek 	if (seg->s_size <= 0)
1075*3247Sgjelinek 		return;
1076*3247Sgjelinek 
1077*3247Sgjelinek 	/*
1078*3247Sgjelinek 	 * Figure out if there is a shared object (such as a named vnode or
1079*3247Sgjelinek 	 * a shared amp, then figure out if there is a private amp, which
1080*3247Sgjelinek 	 * identifies private pages.
1081*3247Sgjelinek 	 */
1082*3247Sgjelinek 	if (seg->s_ops == &segvn_ops) {
1083*3247Sgjelinek 		svd = (struct segvn_data *)seg->s_data;
1084*3247Sgjelinek 		if (svd->type == MAP_SHARED)
1085*3247Sgjelinek 			shared = B_TRUE;
1086*3247Sgjelinek 		else
1087*3247Sgjelinek 			swresv = svd->swresv;
1088*3247Sgjelinek 
1089*3247Sgjelinek 		if (svd->vp != NULL) {
1090*3247Sgjelinek 			file = 1;
1091*3247Sgjelinek 			shared_object = vmu_find_insert_object(
1092*3247Sgjelinek 			    vmu_data.vmu_all_vnodes_hash, (caddr_t)svd->vp,
1093*3247Sgjelinek 			    VMUSAGE_TYPE_VNODE);
1094*3247Sgjelinek 			s_start = btop(svd->offset);
1095*3247Sgjelinek 			s_end = btop(svd->offset + seg->s_size) - 1;
1096*3247Sgjelinek 		}
1097*3247Sgjelinek 		if (svd->amp != NULL && svd->type == MAP_SHARED) {
1098*3247Sgjelinek 			ASSERT(shared_object == NULL);
1099*3247Sgjelinek 			shared_object = vmu_find_insert_object(
1100*3247Sgjelinek 			    vmu_data.vmu_all_amps_hash, (caddr_t)svd->amp,
1101*3247Sgjelinek 			    VMUSAGE_TYPE_AMP);
1102*3247Sgjelinek 			s_start = svd->anon_index;
1103*3247Sgjelinek 			s_end = svd->anon_index + btop(seg->s_size) - 1;
1104*3247Sgjelinek 			/* schedctl mappings are always in core */
1105*3247Sgjelinek 			if (svd->amp->swresv == 0)
1106*3247Sgjelinek 				incore = B_TRUE;
1107*3247Sgjelinek 		}
1108*3247Sgjelinek 		if (svd->amp != NULL && svd->type == MAP_PRIVATE) {
1109*3247Sgjelinek 			private_amp = svd->amp;
1110*3247Sgjelinek 			p_start = svd->anon_index;
1111*3247Sgjelinek 			p_end = svd->anon_index + btop(seg->s_size) - 1;
1112*3247Sgjelinek 		}
1113*3247Sgjelinek 	} else if (seg->s_ops == &segspt_shmops) {
1114*3247Sgjelinek 		shared = B_TRUE;
1115*3247Sgjelinek 		shmd = (struct shm_data *)seg->s_data;
1116*3247Sgjelinek 		shared_object = vmu_find_insert_object(
1117*3247Sgjelinek 		    vmu_data.vmu_all_amps_hash, (caddr_t)shmd->shm_amp,
1118*3247Sgjelinek 		    VMUSAGE_TYPE_AMP);
1119*3247Sgjelinek 		s_start = 0;
1120*3247Sgjelinek 		s_end = btop(seg->s_size) - 1;
1121*3247Sgjelinek 		sptd = shmd->shm_sptseg->s_data;
1122*3247Sgjelinek 
1123*3247Sgjelinek 		/* ism segments are always incore and do not reserve swap */
1124*3247Sgjelinek 		if (sptd->spt_flags & SHM_SHARE_MMU)
1125*3247Sgjelinek 			incore = B_TRUE;
1126*3247Sgjelinek 
1127*3247Sgjelinek 	} else {
1128*3247Sgjelinek 		return;
1129*3247Sgjelinek 	}
1130*3247Sgjelinek 
1131*3247Sgjelinek 	/*
1132*3247Sgjelinek 	 * If there is a private amp, count anon pages that exist.  If an
1133*3247Sgjelinek 	 * anon has a refcnt > 1 (cow sharing), then save the anon in a
1134*3247Sgjelinek 	 * hash so that it is not double counted.
1135*3247Sgjelinek 	 *
1136*3247Sgjelinek 	 * If there is also a shared object, they figure out the bounds
1137*3247Sgjelinek 	 * which are not mapped by the private amp.
1138*3247Sgjelinek 	 */
1139*3247Sgjelinek 	if (private_amp != NULL) {
1140*3247Sgjelinek 
1141*3247Sgjelinek 		/* Enter as writer to prevent cow anons from being freed */
1142*3247Sgjelinek 		ANON_LOCK_ENTER(&private_amp->a_rwlock, RW_WRITER);
1143*3247Sgjelinek 
1144*3247Sgjelinek 		p_index = p_start;
1145*3247Sgjelinek 		s_index = s_start;
1146*3247Sgjelinek 
1147*3247Sgjelinek 		while (p_index <= p_end) {
1148*3247Sgjelinek 
1149*3247Sgjelinek 			pgcnt_t p_index_next;
1150*3247Sgjelinek 			pgcnt_t p_bound_size;
1151*3247Sgjelinek 			int cnt;
1152*3247Sgjelinek 			anoff_t off;
1153*3247Sgjelinek 			struct vnode *vn;
1154*3247Sgjelinek 			struct anon *ap;
1155*3247Sgjelinek 			page_t *page;		/* For handling of large */
1156*3247Sgjelinek 			pgcnt_t pgcnt = 1;	/* pages */
1157*3247Sgjelinek 			pgcnt_t pgstart;
1158*3247Sgjelinek 			pgcnt_t pgend;
1159*3247Sgjelinek 			uint_t pgshft;
1160*3247Sgjelinek 			pgcnt_t pgmsk;
1161*3247Sgjelinek 
1162*3247Sgjelinek 			p_index_next = p_index;
1163*3247Sgjelinek 			ap = anon_get_next_ptr(private_amp->ahp,
1164*3247Sgjelinek 			    &p_index_next);
1165*3247Sgjelinek 
1166*3247Sgjelinek 			/*
1167*3247Sgjelinek 			 * If next anon is past end of mapping, simulate
1168*3247Sgjelinek 			 * end of anon so loop terminates.
1169*3247Sgjelinek 			 */
1170*3247Sgjelinek 			if (p_index_next > p_end) {
1171*3247Sgjelinek 				p_index_next = p_end + 1;
1172*3247Sgjelinek 				ap = NULL;
1173*3247Sgjelinek 			}
1174*3247Sgjelinek 			/*
1175*3247Sgjelinek 			 * For cow segments, keep track of bounds not
1176*3247Sgjelinek 			 * backed by private amp so they can be looked
1177*3247Sgjelinek 			 * up in the backing vnode
1178*3247Sgjelinek 			 */
1179*3247Sgjelinek 			if (p_index_next != p_index) {
1180*3247Sgjelinek 
1181*3247Sgjelinek 				/*
1182*3247Sgjelinek 				 * Compute index difference between anon and
1183*3247Sgjelinek 				 * previous anon.
1184*3247Sgjelinek 				 */
1185*3247Sgjelinek 				p_bound_size = p_index_next - p_index - 1;
1186*3247Sgjelinek 
1187*3247Sgjelinek 				if (shared_object != NULL) {
1188*3247Sgjelinek 					cur = vmu_alloc_bound();
1189*3247Sgjelinek 					cur->vmb_next = NULL;
1190*3247Sgjelinek 					cur->vmb_start = s_index;
1191*3247Sgjelinek 					cur->vmb_end = s_index + p_bound_size;
1192*3247Sgjelinek 					cur->vmb_type = VMUSAGE_BOUND_UNKNOWN;
1193*3247Sgjelinek 					if (first == NULL) {
1194*3247Sgjelinek 						first = cur;
1195*3247Sgjelinek 						last = cur;
1196*3247Sgjelinek 					} else {
1197*3247Sgjelinek 						last->vmb_next = cur;
1198*3247Sgjelinek 						last = cur;
1199*3247Sgjelinek 					}
1200*3247Sgjelinek 				}
1201*3247Sgjelinek 				p_index = p_index + p_bound_size + 1;
1202*3247Sgjelinek 				s_index = s_index + p_bound_size + 1;
1203*3247Sgjelinek 			}
1204*3247Sgjelinek 
1205*3247Sgjelinek 			/* Detect end of anons in amp */
1206*3247Sgjelinek 			if (ap == NULL)
1207*3247Sgjelinek 				break;
1208*3247Sgjelinek 
1209*3247Sgjelinek 			cnt = ap->an_refcnt;
1210*3247Sgjelinek 			swap_xlate(ap, &vn, &off);
1211*3247Sgjelinek 
1212*3247Sgjelinek 			if (vn == NULL || vn->v_pages == NULL ||
1213*3247Sgjelinek 			    (page = page_exists(vn, off)) == NULL) {
1214*3247Sgjelinek 				p_index++;
1215*3247Sgjelinek 				s_index++;
1216*3247Sgjelinek 				continue;
1217*3247Sgjelinek 			}
1218*3247Sgjelinek 
1219*3247Sgjelinek 			/*
1220*3247Sgjelinek 			 * If large page is found, compute portion of large
1221*3247Sgjelinek 			 * page in mapping, and increment indicies to the next
1222*3247Sgjelinek 			 * large page.
1223*3247Sgjelinek 			 */
1224*3247Sgjelinek 			if (page->p_szc > 0) {
1225*3247Sgjelinek 
1226*3247Sgjelinek 				pgcnt = page_get_pagecnt(page->p_szc);
1227*3247Sgjelinek 				pgshft = page_get_shift(page->p_szc);
1228*3247Sgjelinek 				pgmsk = (0x1 << (pgshft - PAGESHIFT)) - 1;
1229*3247Sgjelinek 
1230*3247Sgjelinek 				/* First page in large page */
1231*3247Sgjelinek 				pgstart = p_index & ~pgmsk;
1232*3247Sgjelinek 				/* Last page in large page */
1233*3247Sgjelinek 				pgend = pgstart + pgcnt - 1;
1234*3247Sgjelinek 				/*
1235*3247Sgjelinek 				 * Artifically end page if page extends past
1236*3247Sgjelinek 				 * end of mapping.
1237*3247Sgjelinek 				 */
1238*3247Sgjelinek 				if (pgend > p_end)
1239*3247Sgjelinek 					pgend = p_end;
1240*3247Sgjelinek 
1241*3247Sgjelinek 				/*
1242*3247Sgjelinek 				 * Compute number of pages from large page
1243*3247Sgjelinek 				 * which are mapped.
1244*3247Sgjelinek 				 */
1245*3247Sgjelinek 				pgcnt = pgend - p_index + 1;
1246*3247Sgjelinek 
1247*3247Sgjelinek 				/*
1248*3247Sgjelinek 				 * Point indicies at page after large page,
1249*3247Sgjelinek 				 * or at page after end of mapping.
1250*3247Sgjelinek 				 */
1251*3247Sgjelinek 				p_index += pgcnt;
1252*3247Sgjelinek 				s_index += pgcnt;
1253*3247Sgjelinek 			} else {
1254*3247Sgjelinek 				p_index++;
1255*3247Sgjelinek 				s_index++;
1256*3247Sgjelinek 			}
1257*3247Sgjelinek 
1258*3247Sgjelinek 			/*
1259*3247Sgjelinek 			 * Assume anon structs with a refcnt
1260*3247Sgjelinek 			 * of 1 are not cow shared, so there
1261*3247Sgjelinek 			 * is no reason to track them per entity.
1262*3247Sgjelinek 			 */
1263*3247Sgjelinek 			if (cnt == 1) {
1264*3247Sgjelinek 				panon += pgcnt;
1265*3247Sgjelinek 				continue;
1266*3247Sgjelinek 			}
1267*3247Sgjelinek 			for (entity = vmu_entities; entity != NULL;
1268*3247Sgjelinek 			    entity = entity->vme_next_calc) {
1269*3247Sgjelinek 
1270*3247Sgjelinek 				result = &entity->vme_result;
1271*3247Sgjelinek 				/*
1272*3247Sgjelinek 				 * Track cow anons per entity so
1273*3247Sgjelinek 				 * they are not double counted.
1274*3247Sgjelinek 				 */
1275*3247Sgjelinek 				if (vmu_find_insert_anon(entity->vme_anon_hash,
1276*3247Sgjelinek 				    (caddr_t)ap) == 0)
1277*3247Sgjelinek 					continue;
1278*3247Sgjelinek 
1279*3247Sgjelinek 				result->vmu_rss_all += (pgcnt << PAGESHIFT);
1280*3247Sgjelinek 				result->vmu_rss_private +=
1281*3247Sgjelinek 				    (pgcnt << PAGESHIFT);
1282*3247Sgjelinek 			}
1283*3247Sgjelinek 		}
1284*3247Sgjelinek 		ANON_LOCK_EXIT(&private_amp->a_rwlock);
1285*3247Sgjelinek 	}
1286*3247Sgjelinek 
1287*3247Sgjelinek 	/* Add up resident anon and swap reserved for private mappings */
1288*3247Sgjelinek 	if (swresv > 0 || panon > 0) {
1289*3247Sgjelinek 		for (entity = vmu_entities; entity != NULL;
1290*3247Sgjelinek 		    entity = entity->vme_next_calc) {
1291*3247Sgjelinek 			result = &entity->vme_result;
1292*3247Sgjelinek 			result->vmu_swap_all += swresv;
1293*3247Sgjelinek 			result->vmu_swap_private += swresv;
1294*3247Sgjelinek 			result->vmu_rss_all += (panon << PAGESHIFT);
1295*3247Sgjelinek 			result->vmu_rss_private += (panon << PAGESHIFT);
1296*3247Sgjelinek 		}
1297*3247Sgjelinek 	}
1298*3247Sgjelinek 
1299*3247Sgjelinek 	/* Compute resident pages backing shared amp or named vnode */
1300*3247Sgjelinek 	if (shared_object != NULL) {
1301*3247Sgjelinek 		if (first == NULL) {
1302*3247Sgjelinek 			/*
1303*3247Sgjelinek 			 * No private amp, or private amp has no anon
1304*3247Sgjelinek 			 * structs.  This means entire segment is backed by
1305*3247Sgjelinek 			 * the shared object.
1306*3247Sgjelinek 			 */
1307*3247Sgjelinek 			first = vmu_alloc_bound();
1308*3247Sgjelinek 			first->vmb_next = NULL;
1309*3247Sgjelinek 			first->vmb_start = s_start;
1310*3247Sgjelinek 			first->vmb_end = s_end;
1311*3247Sgjelinek 			first->vmb_type = VMUSAGE_BOUND_UNKNOWN;
1312*3247Sgjelinek 		}
1313*3247Sgjelinek 		/*
1314*3247Sgjelinek 		 * Iterate bounds not backed by private amp, and compute
1315*3247Sgjelinek 		 * resident pages.
1316*3247Sgjelinek 		 */
1317*3247Sgjelinek 		cur = first;
1318*3247Sgjelinek 		while (cur != NULL) {
1319*3247Sgjelinek 
1320*3247Sgjelinek 			if (vmu_insert_lookup_object_bounds(shared_object,
1321*3247Sgjelinek 			    cur->vmb_start, cur->vmb_end, VMUSAGE_BOUND_UNKNOWN,
1322*3247Sgjelinek 			    &first, &last) > 0) {
1323*3247Sgjelinek 				/* new bounds, find incore/not-incore */
1324*3247Sgjelinek 				if (shared_object->vmo_type ==
1325*3247Sgjelinek 				    VMUSAGE_TYPE_VNODE)
1326*3247Sgjelinek 					vmu_vnode_update_incore_bounds(
1327*3247Sgjelinek 					    (vnode_t *)
1328*3247Sgjelinek 					    shared_object->vmo_key, &first,
1329*3247Sgjelinek 					    &last);
1330*3247Sgjelinek 				else
1331*3247Sgjelinek 					vmu_amp_update_incore_bounds(
1332*3247Sgjelinek 					    (struct anon_map *)
1333*3247Sgjelinek 					    shared_object->vmo_key, &first,
1334*3247Sgjelinek 					    &last, incore);
1335*3247Sgjelinek 				vmu_merge_bounds(&first, &last);
1336*3247Sgjelinek 			}
1337*3247Sgjelinek 			for (entity = vmu_entities; entity != NULL;
1338*3247Sgjelinek 			    entity = entity->vme_next_calc) {
1339*3247Sgjelinek 
1340*3247Sgjelinek 				result = &entity->vme_result;
1341*3247Sgjelinek 
1342*3247Sgjelinek 				entity_object = vmu_find_insert_object(
1343*3247Sgjelinek 				    shared_object->vmo_type ==
1344*3247Sgjelinek 				    VMUSAGE_TYPE_VNODE ? entity->vme_vnode_hash:
1345*3247Sgjelinek 					entity->vme_amp_hash,
1346*3247Sgjelinek 					shared_object->vmo_key,
1347*3247Sgjelinek 					shared_object->vmo_type);
1348*3247Sgjelinek 
1349*3247Sgjelinek 				virt = vmu_insert_lookup_object_bounds(
1350*3247Sgjelinek 				    entity_object, cur->vmb_start, cur->vmb_end,
1351*3247Sgjelinek 				    VMUSAGE_BOUND_UNKNOWN, &e_first, &e_last);
1352*3247Sgjelinek 
1353*3247Sgjelinek 				if (virt == 0)
1354*3247Sgjelinek 					continue;
1355*3247Sgjelinek 				/*
1356*3247Sgjelinek 				 * Range visited for this entity
1357*3247Sgjelinek 				 */
1358*3247Sgjelinek 				rss = vmu_update_bounds(&e_first,
1359*3247Sgjelinek 				    &e_last, first, last);
1360*3247Sgjelinek 				result->vmu_rss_all += (rss << PAGESHIFT);
1361*3247Sgjelinek 				if (shared == B_TRUE && file == B_FALSE) {
1362*3247Sgjelinek 					/* shared anon mapping */
1363*3247Sgjelinek 					result->vmu_swap_all +=
1364*3247Sgjelinek 					    (virt << PAGESHIFT);
1365*3247Sgjelinek 					result->vmu_swap_shared +=
1366*3247Sgjelinek 					    (virt << PAGESHIFT);
1367*3247Sgjelinek 					result->vmu_rss_shared +=
1368*3247Sgjelinek 					    (rss << PAGESHIFT);
1369*3247Sgjelinek 				} else if (shared == B_TRUE && file == B_TRUE) {
1370*3247Sgjelinek 					/* shared file mapping */
1371*3247Sgjelinek 					result->vmu_rss_shared +=
1372*3247Sgjelinek 					    (rss << PAGESHIFT);
1373*3247Sgjelinek 				} else if (shared == B_FALSE &&
1374*3247Sgjelinek 				    file == B_TRUE) {
1375*3247Sgjelinek 					/* private file mapping */
1376*3247Sgjelinek 					result->vmu_rss_private +=
1377*3247Sgjelinek 					    (rss << PAGESHIFT);
1378*3247Sgjelinek 				}
1379*3247Sgjelinek 				vmu_merge_bounds(&e_first, &e_last);
1380*3247Sgjelinek 			}
1381*3247Sgjelinek 			tmp = cur;
1382*3247Sgjelinek 			cur = cur->vmb_next;
1383*3247Sgjelinek 			vmu_free_bound(tmp);
1384*3247Sgjelinek 		}
1385*3247Sgjelinek 	}
1386*3247Sgjelinek }
1387*3247Sgjelinek 
1388*3247Sgjelinek /*
1389*3247Sgjelinek  * Based on the current calculation flags, find the relevant entities
1390*3247Sgjelinek  * which are relative to the process.  Then calculate each segment
1391*3247Sgjelinek  * in the process'es address space for each relevant entity.
1392*3247Sgjelinek  */
1393*3247Sgjelinek static void
1394*3247Sgjelinek vmu_calculate_proc(proc_t *p)
1395*3247Sgjelinek {
1396*3247Sgjelinek 	vmu_entity_t *entities = NULL;
1397*3247Sgjelinek 	vmu_zone_t *zone;
1398*3247Sgjelinek 	vmu_entity_t *tmp;
1399*3247Sgjelinek 	struct as *as;
1400*3247Sgjelinek 	struct seg *seg;
1401*3247Sgjelinek 	int ret;
1402*3247Sgjelinek 
1403*3247Sgjelinek 	/* Figure out which entities are being computed */
1404*3247Sgjelinek 	if ((vmu_data.vmu_system) != NULL) {
1405*3247Sgjelinek 		tmp = vmu_data.vmu_system;
1406*3247Sgjelinek 		tmp->vme_next_calc = entities;
1407*3247Sgjelinek 		entities = tmp;
1408*3247Sgjelinek 	}
1409*3247Sgjelinek 	if (vmu_data.vmu_calc_flags &
1410*3247Sgjelinek 	    (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_PROJECTS |
1411*3247Sgjelinek 	    VMUSAGE_ALL_PROJECTS | VMUSAGE_TASKS | VMUSAGE_ALL_TASKS |
1412*3247Sgjelinek 	    VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_EUSERS |
1413*3247Sgjelinek 	    VMUSAGE_ALL_EUSERS)) {
1414*3247Sgjelinek 		ret = i_mod_hash_find_nosync(vmu_data.vmu_zones_hash,
1415*3247Sgjelinek 		    (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id,
1416*3247Sgjelinek 		    (mod_hash_val_t *)&zone);
1417*3247Sgjelinek 		if (ret != 0) {
1418*3247Sgjelinek 			zone = vmu_alloc_zone(p->p_zone->zone_id);
1419*3247Sgjelinek 			ret = i_mod_hash_insert_nosync(vmu_data.vmu_zones_hash,
1420*3247Sgjelinek 			    (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id,
1421*3247Sgjelinek 			    (mod_hash_val_t)zone, (mod_hash_hndl_t)0);
1422*3247Sgjelinek 			ASSERT(ret == 0);
1423*3247Sgjelinek 		}
1424*3247Sgjelinek 		if (zone->vmz_zone != NULL) {
1425*3247Sgjelinek 			tmp = zone->vmz_zone;
1426*3247Sgjelinek 			tmp->vme_next_calc = entities;
1427*3247Sgjelinek 			entities = tmp;
1428*3247Sgjelinek 		}
1429*3247Sgjelinek 		if (vmu_data.vmu_calc_flags &
1430*3247Sgjelinek 		    (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS)) {
1431*3247Sgjelinek 			tmp = vmu_find_insert_entity(zone->vmz_projects_hash,
1432*3247Sgjelinek 			    p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS,
1433*3247Sgjelinek 			    zone->vmz_id);
1434*3247Sgjelinek 			tmp->vme_next_calc = entities;
1435*3247Sgjelinek 			entities = tmp;
1436*3247Sgjelinek 		}
1437*3247Sgjelinek 		if (vmu_data.vmu_calc_flags &
1438*3247Sgjelinek 		    (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS)) {
1439*3247Sgjelinek 			tmp = vmu_find_insert_entity(zone->vmz_tasks_hash,
1440*3247Sgjelinek 			    p->p_task->tk_tkid, VMUSAGE_TASKS, zone->vmz_id);
1441*3247Sgjelinek 			tmp->vme_next_calc = entities;
1442*3247Sgjelinek 			entities = tmp;
1443*3247Sgjelinek 		}
1444*3247Sgjelinek 		if (vmu_data.vmu_calc_flags &
1445*3247Sgjelinek 		    (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS)) {
1446*3247Sgjelinek 			tmp = vmu_find_insert_entity(zone->vmz_rusers_hash,
1447*3247Sgjelinek 			    crgetruid(p->p_cred), VMUSAGE_RUSERS, zone->vmz_id);
1448*3247Sgjelinek 			tmp->vme_next_calc = entities;
1449*3247Sgjelinek 			entities = tmp;
1450*3247Sgjelinek 		}
1451*3247Sgjelinek 		if (vmu_data.vmu_calc_flags &
1452*3247Sgjelinek 		    (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS)) {
1453*3247Sgjelinek 			tmp = vmu_find_insert_entity(zone->vmz_eusers_hash,
1454*3247Sgjelinek 			    crgetuid(p->p_cred), VMUSAGE_EUSERS, zone->vmz_id);
1455*3247Sgjelinek 			tmp->vme_next_calc = entities;
1456*3247Sgjelinek 			entities = tmp;
1457*3247Sgjelinek 		}
1458*3247Sgjelinek 	}
1459*3247Sgjelinek 	/* Entities which collapse projects and users for all zones */
1460*3247Sgjelinek 	if (vmu_data.vmu_calc_flags & VMUSAGE_COL_PROJECTS) {
1461*3247Sgjelinek 		tmp = vmu_find_insert_entity(vmu_data.vmu_projects_col_hash,
1462*3247Sgjelinek 		    p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS, ALL_ZONES);
1463*3247Sgjelinek 		tmp->vme_next_calc = entities;
1464*3247Sgjelinek 		entities = tmp;
1465*3247Sgjelinek 	}
1466*3247Sgjelinek 	if (vmu_data.vmu_calc_flags & VMUSAGE_COL_RUSERS) {
1467*3247Sgjelinek 		tmp = vmu_find_insert_entity(vmu_data.vmu_rusers_col_hash,
1468*3247Sgjelinek 		    crgetruid(p->p_cred), VMUSAGE_RUSERS, ALL_ZONES);
1469*3247Sgjelinek 		tmp->vme_next_calc = entities;
1470*3247Sgjelinek 		entities = tmp;
1471*3247Sgjelinek 	}
1472*3247Sgjelinek 	if (vmu_data.vmu_calc_flags & VMUSAGE_COL_EUSERS) {
1473*3247Sgjelinek 		tmp = vmu_find_insert_entity(vmu_data.vmu_eusers_col_hash,
1474*3247Sgjelinek 		    crgetuid(p->p_cred), VMUSAGE_EUSERS, ALL_ZONES);
1475*3247Sgjelinek 		tmp->vme_next_calc = entities;
1476*3247Sgjelinek 		entities = tmp;
1477*3247Sgjelinek 	}
1478*3247Sgjelinek 
1479*3247Sgjelinek 	ASSERT(entities != NULL);
1480*3247Sgjelinek 	/* process all segs in process's address space */
1481*3247Sgjelinek 	as = p->p_as;
1482*3247Sgjelinek 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1483*3247Sgjelinek 	for (seg = AS_SEGFIRST(as); seg != NULL;
1484*3247Sgjelinek 	    seg = AS_SEGNEXT(as, seg)) {
1485*3247Sgjelinek 		vmu_calculate_seg(entities, seg);
1486*3247Sgjelinek 	}
1487*3247Sgjelinek 	AS_LOCK_EXIT(as, &as->a_lock);
1488*3247Sgjelinek }
1489*3247Sgjelinek 
1490*3247Sgjelinek /*
1491*3247Sgjelinek  * Free data created by previous call to vmu_calculate().
1492*3247Sgjelinek  */
1493*3247Sgjelinek static void
1494*3247Sgjelinek vmu_clear_calc()
1495*3247Sgjelinek {
1496*3247Sgjelinek 	if (vmu_data.vmu_system != NULL)
1497*3247Sgjelinek 		vmu_free_entity(vmu_data.vmu_system);
1498*3247Sgjelinek 		vmu_data.vmu_system = NULL;
1499*3247Sgjelinek 	if (vmu_data.vmu_zones_hash != NULL)
1500*3247Sgjelinek 		i_mod_hash_clear_nosync(vmu_data.vmu_zones_hash);
1501*3247Sgjelinek 	if (vmu_data.vmu_projects_col_hash != NULL)
1502*3247Sgjelinek 		i_mod_hash_clear_nosync(vmu_data.vmu_projects_col_hash);
1503*3247Sgjelinek 	if (vmu_data.vmu_rusers_col_hash != NULL)
1504*3247Sgjelinek 		i_mod_hash_clear_nosync(vmu_data.vmu_rusers_col_hash);
1505*3247Sgjelinek 	if (vmu_data.vmu_eusers_col_hash != NULL)
1506*3247Sgjelinek 		i_mod_hash_clear_nosync(vmu_data.vmu_eusers_col_hash);
1507*3247Sgjelinek 
1508*3247Sgjelinek 	i_mod_hash_clear_nosync(vmu_data.vmu_all_vnodes_hash);
1509*3247Sgjelinek 	i_mod_hash_clear_nosync(vmu_data.vmu_all_amps_hash);
1510*3247Sgjelinek }
1511*3247Sgjelinek 
1512*3247Sgjelinek /*
1513*3247Sgjelinek  * Free unused data structures.  These can result if the system workload
1514*3247Sgjelinek  * decreases between calculations.
1515*3247Sgjelinek  */
1516*3247Sgjelinek static void
1517*3247Sgjelinek vmu_free_extra()
1518*3247Sgjelinek {
1519*3247Sgjelinek 	vmu_bound_t *tb;
1520*3247Sgjelinek 	vmu_object_t *to;
1521*3247Sgjelinek 	vmu_entity_t *te;
1522*3247Sgjelinek 	vmu_zone_t *tz;
1523*3247Sgjelinek 
1524*3247Sgjelinek 	while (vmu_data.vmu_free_bounds != NULL) {
1525*3247Sgjelinek 		tb = vmu_data.vmu_free_bounds;
1526*3247Sgjelinek 		vmu_data.vmu_free_bounds = vmu_data.vmu_free_bounds->vmb_next;
1527*3247Sgjelinek 		kmem_cache_free(vmu_bound_cache, tb);
1528*3247Sgjelinek 	}
1529*3247Sgjelinek 	while (vmu_data.vmu_free_objects != NULL) {
1530*3247Sgjelinek 		to = vmu_data.vmu_free_objects;
1531*3247Sgjelinek 		vmu_data.vmu_free_objects =
1532*3247Sgjelinek 		    vmu_data.vmu_free_objects->vmo_next;
1533*3247Sgjelinek 		kmem_cache_free(vmu_object_cache, to);
1534*3247Sgjelinek 	}
1535*3247Sgjelinek 	while (vmu_data.vmu_free_entities != NULL) {
1536*3247Sgjelinek 		te = vmu_data.vmu_free_entities;
1537*3247Sgjelinek 		vmu_data.vmu_free_entities =
1538*3247Sgjelinek 		    vmu_data.vmu_free_entities->vme_next;
1539*3247Sgjelinek 		if (te->vme_vnode_hash != NULL)
1540*3247Sgjelinek 			mod_hash_destroy_hash(te->vme_vnode_hash);
1541*3247Sgjelinek 		if (te->vme_amp_hash != NULL)
1542*3247Sgjelinek 			mod_hash_destroy_hash(te->vme_amp_hash);
1543*3247Sgjelinek 		if (te->vme_anon_hash != NULL)
1544*3247Sgjelinek 			mod_hash_destroy_hash(te->vme_anon_hash);
1545*3247Sgjelinek 		kmem_free(te, sizeof (vmu_entity_t));
1546*3247Sgjelinek 	}
1547*3247Sgjelinek 	while (vmu_data.vmu_free_zones != NULL) {
1548*3247Sgjelinek 		tz = vmu_data.vmu_free_zones;
1549*3247Sgjelinek 		vmu_data.vmu_free_zones =
1550*3247Sgjelinek 		    vmu_data.vmu_free_zones->vmz_next;
1551*3247Sgjelinek 		if (tz->vmz_projects_hash != NULL)
1552*3247Sgjelinek 			mod_hash_destroy_hash(tz->vmz_projects_hash);
1553*3247Sgjelinek 		if (tz->vmz_tasks_hash != NULL)
1554*3247Sgjelinek 			mod_hash_destroy_hash(tz->vmz_tasks_hash);
1555*3247Sgjelinek 		if (tz->vmz_rusers_hash != NULL)
1556*3247Sgjelinek 			mod_hash_destroy_hash(tz->vmz_rusers_hash);
1557*3247Sgjelinek 		if (tz->vmz_eusers_hash != NULL)
1558*3247Sgjelinek 			mod_hash_destroy_hash(tz->vmz_eusers_hash);
1559*3247Sgjelinek 		kmem_free(tz, sizeof (vmu_zone_t));
1560*3247Sgjelinek 	}
1561*3247Sgjelinek }
1562*3247Sgjelinek 
1563*3247Sgjelinek extern kcondvar_t *pr_pid_cv;
1564*3247Sgjelinek 
1565*3247Sgjelinek /*
1566*3247Sgjelinek  * Determine which entity types are relevant and allocate the hashes to
1567*3247Sgjelinek  * track them.  Then walk the process table and count rss and swap
1568*3247Sgjelinek  * for each process'es address space.  Address space object such as
1569*3247Sgjelinek  * vnodes, amps and anons are tracked per entity, so that they are
1570*3247Sgjelinek  * not double counted in the results.
1571*3247Sgjelinek  *
1572*3247Sgjelinek  */
1573*3247Sgjelinek static void
1574*3247Sgjelinek vmu_calculate()
1575*3247Sgjelinek {
1576*3247Sgjelinek 	int i = 0;
1577*3247Sgjelinek 	int ret;
1578*3247Sgjelinek 	proc_t *p;
1579*3247Sgjelinek 
1580*3247Sgjelinek 	vmu_clear_calc();
1581*3247Sgjelinek 
1582*3247Sgjelinek 	if (vmu_data.vmu_calc_flags & VMUSAGE_SYSTEM)
1583*3247Sgjelinek 		vmu_data.vmu_system = vmu_alloc_entity(0, VMUSAGE_SYSTEM,
1584*3247Sgjelinek 		    ALL_ZONES);
1585*3247Sgjelinek 
1586*3247Sgjelinek 	/*
1587*3247Sgjelinek 	 * Walk process table and calculate rss of each proc.
1588*3247Sgjelinek 	 *
1589*3247Sgjelinek 	 * Pidlock and p_lock cannot be held while doing the rss calculation.
1590*3247Sgjelinek 	 * This is because:
1591*3247Sgjelinek 	 *	1.  The calculation allocates using KM_SLEEP.
1592*3247Sgjelinek 	 *	2.  The calculation grabs a_lock, which cannot be grabbed
1593*3247Sgjelinek 	 *	    after p_lock.
1594*3247Sgjelinek 	 *
1595*3247Sgjelinek 	 * Since pidlock must be dropped, we cannot simply just walk the
1596*3247Sgjelinek 	 * practive list.  Instead, we walk the process table, and sprlock
1597*3247Sgjelinek 	 * each process to ensure that it does not exit during the
1598*3247Sgjelinek 	 * calculation.
1599*3247Sgjelinek 	 */
1600*3247Sgjelinek 
1601*3247Sgjelinek 	mutex_enter(&pidlock);
1602*3247Sgjelinek 	for (i = 0; i < v.v_proc; i++) {
1603*3247Sgjelinek again:
1604*3247Sgjelinek 		p = pid_entry(i);
1605*3247Sgjelinek 		if (p == NULL)
1606*3247Sgjelinek 			continue;
1607*3247Sgjelinek 
1608*3247Sgjelinek 		mutex_enter(&p->p_lock);
1609*3247Sgjelinek 		mutex_exit(&pidlock);
1610*3247Sgjelinek 
1611*3247Sgjelinek 		if (panicstr) {
1612*3247Sgjelinek 			mutex_exit(&p->p_lock);
1613*3247Sgjelinek 			return;
1614*3247Sgjelinek 		}
1615*3247Sgjelinek 
1616*3247Sgjelinek 		/* Try to set P_PR_LOCK */
1617*3247Sgjelinek 		ret = sprtrylock_proc(p);
1618*3247Sgjelinek 		if (ret == -1) {
1619*3247Sgjelinek 			/* Process in invalid state */
1620*3247Sgjelinek 			mutex_exit(&p->p_lock);
1621*3247Sgjelinek 			mutex_enter(&pidlock);
1622*3247Sgjelinek 			continue;
1623*3247Sgjelinek 		} else if (ret == 1) {
1624*3247Sgjelinek 			/*
1625*3247Sgjelinek 			 * P_PR_LOCK is already set.  Wait and try again.
1626*3247Sgjelinek 			 * This also drops p_lock.
1627*3247Sgjelinek 			 */
1628*3247Sgjelinek 			sprwaitlock_proc(p);
1629*3247Sgjelinek 			mutex_enter(&pidlock);
1630*3247Sgjelinek 			goto again;
1631*3247Sgjelinek 		}
1632*3247Sgjelinek 		mutex_exit(&p->p_lock);
1633*3247Sgjelinek 
1634*3247Sgjelinek 		vmu_calculate_proc(p);
1635*3247Sgjelinek 
1636*3247Sgjelinek 		mutex_enter(&p->p_lock);
1637*3247Sgjelinek 		sprunlock(p);
1638*3247Sgjelinek 		mutex_enter(&pidlock);
1639*3247Sgjelinek 	}
1640*3247Sgjelinek 	mutex_exit(&pidlock);
1641*3247Sgjelinek 
1642*3247Sgjelinek 	vmu_free_extra();
1643*3247Sgjelinek }
1644*3247Sgjelinek 
1645*3247Sgjelinek /*
1646*3247Sgjelinek  * allocate a new cache for N results satisfying flags
1647*3247Sgjelinek  */
1648*3247Sgjelinek vmu_cache_t *
1649*3247Sgjelinek vmu_cache_alloc(size_t nres, uint_t flags)
1650*3247Sgjelinek {
1651*3247Sgjelinek 	vmu_cache_t *cache;
1652*3247Sgjelinek 
1653*3247Sgjelinek 	cache = kmem_zalloc(sizeof (vmu_cache_t), KM_SLEEP);
1654*3247Sgjelinek 	cache->vmc_results = kmem_zalloc(sizeof (vmusage_t) * nres, KM_SLEEP);
1655*3247Sgjelinek 	cache->vmc_nresults = nres;
1656*3247Sgjelinek 	cache->vmc_flags = flags;
1657*3247Sgjelinek 	cache->vmc_refcnt = 1;
1658*3247Sgjelinek 	return (cache);
1659*3247Sgjelinek }
1660*3247Sgjelinek 
1661*3247Sgjelinek /*
1662*3247Sgjelinek  * Make sure cached results are not freed
1663*3247Sgjelinek  */
1664*3247Sgjelinek static void
1665*3247Sgjelinek vmu_cache_hold(vmu_cache_t *cache)
1666*3247Sgjelinek {
1667*3247Sgjelinek 	ASSERT(MUTEX_HELD(&vmu_data.vmu_lock));
1668*3247Sgjelinek 	cache->vmc_refcnt++;
1669*3247Sgjelinek }
1670*3247Sgjelinek 
1671*3247Sgjelinek /*
1672*3247Sgjelinek  * free cache data
1673*3247Sgjelinek  */
1674*3247Sgjelinek static void
1675*3247Sgjelinek vmu_cache_rele(vmu_cache_t *cache)
1676*3247Sgjelinek {
1677*3247Sgjelinek 	ASSERT(MUTEX_HELD(&vmu_data.vmu_lock));
1678*3247Sgjelinek 	ASSERT(cache->vmc_refcnt > 0);
1679*3247Sgjelinek 	cache->vmc_refcnt--;
1680*3247Sgjelinek 	if (cache->vmc_refcnt == 0) {
1681*3247Sgjelinek 		kmem_free(cache->vmc_results, sizeof (vmusage_t) *
1682*3247Sgjelinek 			cache->vmc_nresults);
1683*3247Sgjelinek 		kmem_free(cache, sizeof (vmu_cache_t));
1684*3247Sgjelinek 	}
1685*3247Sgjelinek }
1686*3247Sgjelinek 
1687*3247Sgjelinek /*
1688*3247Sgjelinek  * Copy out the cached results to a caller.  Inspect the callers flags
1689*3247Sgjelinek  * and zone to determine which cached results should be copied.
1690*3247Sgjelinek  */
1691*3247Sgjelinek static int
1692*3247Sgjelinek vmu_copyout_results(vmu_cache_t *cache, vmusage_t *buf, size_t *nres,
1693*3247Sgjelinek     uint_t flags)
1694*3247Sgjelinek {
1695*3247Sgjelinek 	vmusage_t *result, *out_result;
1696*3247Sgjelinek 	vmusage_t dummy;
1697*3247Sgjelinek 	size_t i, count = 0;
1698*3247Sgjelinek 	size_t bufsize;
1699*3247Sgjelinek 	int ret = 0;
1700*3247Sgjelinek 	uint_t types = 0;
1701*3247Sgjelinek 
1702*3247Sgjelinek 	if (nres != NULL) {
1703*3247Sgjelinek 		if (copyin((caddr_t)nres, &bufsize, sizeof (size_t)))
1704*3247Sgjelinek 			return (set_errno(EFAULT));
1705*3247Sgjelinek 	} else {
1706*3247Sgjelinek 		bufsize = 0;
1707*3247Sgjelinek 	}
1708*3247Sgjelinek 
1709*3247Sgjelinek 	/* figure out what results the caller is interested in. */
1710*3247Sgjelinek 	if ((flags & VMUSAGE_SYSTEM) && curproc->p_zone == global_zone)
1711*3247Sgjelinek 		types |= VMUSAGE_SYSTEM;
1712*3247Sgjelinek 	if (flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES))
1713*3247Sgjelinek 		types |= VMUSAGE_ZONE;
1714*3247Sgjelinek 	if (flags & (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS |
1715*3247Sgjelinek 	    VMUSAGE_COL_PROJECTS))
1716*3247Sgjelinek 		types |= VMUSAGE_PROJECTS;
1717*3247Sgjelinek 	if (flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS))
1718*3247Sgjelinek 		types |= VMUSAGE_TASKS;
1719*3247Sgjelinek 	if (flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS))
1720*3247Sgjelinek 		types |= VMUSAGE_RUSERS;
1721*3247Sgjelinek 	if (flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS))
1722*3247Sgjelinek 		types |= VMUSAGE_EUSERS;
1723*3247Sgjelinek 
1724*3247Sgjelinek 	/* count results for current zone */
1725*3247Sgjelinek 	out_result = buf;
1726*3247Sgjelinek 	for (result = cache->vmc_results, i = 0;
1727*3247Sgjelinek 	    i < cache->vmc_nresults; result++, i++) {
1728*3247Sgjelinek 
1729*3247Sgjelinek 		/* Do not return "other-zone" results to non-global zones */
1730*3247Sgjelinek 		if (curproc->p_zone != global_zone &&
1731*3247Sgjelinek 		    curproc->p_zone->zone_id != result->vmu_zoneid)
1732*3247Sgjelinek 			continue;
1733*3247Sgjelinek 
1734*3247Sgjelinek 		/*
1735*3247Sgjelinek 		 * If non-global zone requests VMUSAGE_SYSTEM, fake
1736*3247Sgjelinek 		 * up VMUSAGE_ZONE result as VMUSAGE_SYSTEM result.
1737*3247Sgjelinek 		 */
1738*3247Sgjelinek 		if (curproc->p_zone != global_zone &&
1739*3247Sgjelinek 		    (flags & VMUSAGE_SYSTEM) != 0 &&
1740*3247Sgjelinek 		    result->vmu_type == VMUSAGE_ZONE) {
1741*3247Sgjelinek 			count++;
1742*3247Sgjelinek 			if (out_result != NULL) {
1743*3247Sgjelinek 				if (bufsize < count) {
1744*3247Sgjelinek 					ret = set_errno(EOVERFLOW);
1745*3247Sgjelinek 				} else {
1746*3247Sgjelinek 					dummy = *result;
1747*3247Sgjelinek 					dummy.vmu_zoneid = ALL_ZONES;
1748*3247Sgjelinek 					dummy.vmu_id = 0;
1749*3247Sgjelinek 					dummy.vmu_type = VMUSAGE_SYSTEM;
1750*3247Sgjelinek 					if (copyout(&dummy, out_result,
1751*3247Sgjelinek 					    sizeof (vmusage_t)))
1752*3247Sgjelinek 						return (set_errno(
1753*3247Sgjelinek 						    EFAULT));
1754*3247Sgjelinek 					out_result++;
1755*3247Sgjelinek 				}
1756*3247Sgjelinek 			}
1757*3247Sgjelinek 		}
1758*3247Sgjelinek 
1759*3247Sgjelinek 		/* Skip results that do not match requested type */
1760*3247Sgjelinek 		if ((result->vmu_type & types) == 0)
1761*3247Sgjelinek 			continue;
1762*3247Sgjelinek 
1763*3247Sgjelinek 		/* Skip collated results if not requested */
1764*3247Sgjelinek 		if (result->vmu_zoneid == ALL_ZONES) {
1765*3247Sgjelinek 			if (result->vmu_type == VMUSAGE_PROJECTS &&
1766*3247Sgjelinek 			    (flags & VMUSAGE_COL_PROJECTS) == 0)
1767*3247Sgjelinek 				continue;
1768*3247Sgjelinek 			if (result->vmu_type == VMUSAGE_EUSERS &&
1769*3247Sgjelinek 			    (flags & VMUSAGE_COL_EUSERS) == 0)
1770*3247Sgjelinek 				continue;
1771*3247Sgjelinek 			if (result->vmu_type == VMUSAGE_RUSERS &&
1772*3247Sgjelinek 			    (flags & VMUSAGE_COL_RUSERS) == 0)
1773*3247Sgjelinek 				continue;
1774*3247Sgjelinek 		}
1775*3247Sgjelinek 
1776*3247Sgjelinek 		/* Skip "other zone" results if not requested */
1777*3247Sgjelinek 		if (result->vmu_zoneid != curproc->p_zone->zone_id) {
1778*3247Sgjelinek 			if (result->vmu_type == VMUSAGE_ZONE &&
1779*3247Sgjelinek 			    (flags & VMUSAGE_ALL_ZONES) == 0)
1780*3247Sgjelinek 				continue;
1781*3247Sgjelinek 			if (result->vmu_type == VMUSAGE_PROJECTS &&
1782*3247Sgjelinek 			    (flags & (VMUSAGE_ALL_PROJECTS |
1783*3247Sgjelinek 			    VMUSAGE_COL_PROJECTS)) == 0)
1784*3247Sgjelinek 				continue;
1785*3247Sgjelinek 			if (result->vmu_type == VMUSAGE_TASKS &&
1786*3247Sgjelinek 			    (flags & VMUSAGE_ALL_TASKS) == 0)
1787*3247Sgjelinek 				continue;
1788*3247Sgjelinek 			if (result->vmu_type == VMUSAGE_RUSERS &&
1789*3247Sgjelinek 			    (flags & (VMUSAGE_ALL_RUSERS |
1790*3247Sgjelinek 			    VMUSAGE_COL_RUSERS)) == 0)
1791*3247Sgjelinek 				continue;
1792*3247Sgjelinek 			if (result->vmu_type == VMUSAGE_EUSERS &&
1793*3247Sgjelinek 			    (flags & (VMUSAGE_ALL_EUSERS |
1794*3247Sgjelinek 			    VMUSAGE_COL_EUSERS)) == 0)
1795*3247Sgjelinek 				continue;
1796*3247Sgjelinek 		}
1797*3247Sgjelinek 		count++;
1798*3247Sgjelinek 		if (out_result != NULL) {
1799*3247Sgjelinek 			if (bufsize < count) {
1800*3247Sgjelinek 				ret = set_errno(EOVERFLOW);
1801*3247Sgjelinek 			} else {
1802*3247Sgjelinek 				if (copyout(result, out_result,
1803*3247Sgjelinek 				    sizeof (vmusage_t)))
1804*3247Sgjelinek 					return (set_errno(EFAULT));
1805*3247Sgjelinek 				out_result++;
1806*3247Sgjelinek 			}
1807*3247Sgjelinek 		}
1808*3247Sgjelinek 	}
1809*3247Sgjelinek 	if (nres != NULL)
1810*3247Sgjelinek 		if (copyout(&count, (void *)nres, sizeof (size_t)))
1811*3247Sgjelinek 			return (set_errno(EFAULT));
1812*3247Sgjelinek 
1813*3247Sgjelinek 	return (ret);
1814*3247Sgjelinek }
1815*3247Sgjelinek 
1816*3247Sgjelinek /*
1817*3247Sgjelinek  * vm_getusage()
1818*3247Sgjelinek  *
1819*3247Sgjelinek  * Counts rss and swap by zone, project, task, and/or user.  The flags argument
1820*3247Sgjelinek  * determines the type of results structures returned.  Flags requesting
1821*3247Sgjelinek  * results from more than one zone are "flattened" to the local zone if the
1822*3247Sgjelinek  * caller is not the global zone.
1823*3247Sgjelinek  *
1824*3247Sgjelinek  * args:
1825*3247Sgjelinek  *	flags:	bitmap consisting of one or more of VMUSAGE_*.
1826*3247Sgjelinek  *	age:	maximum allowable age (time since counting was done) in
1827*3247Sgjelinek  *		seconds of the results.  Results from previous callers are
1828*3247Sgjelinek  *		cached in kernel.
1829*3247Sgjelinek  *	buf:	pointer to buffer array of vmusage_t.  If NULL, then only nres
1830*3247Sgjelinek  *		set on success.
1831*3247Sgjelinek  *	nres:	Set to number of vmusage_t structures pointed to by buf
1832*3247Sgjelinek  *		before calling vm_getusage().
1833*3247Sgjelinek  *		On return 0 (success) or ENOSPC, is set to the number of result
1834*3247Sgjelinek  *		structures returned or attempted to return.
1835*3247Sgjelinek  *
1836*3247Sgjelinek  * returns 0 on success, -1 on failure:
1837*3247Sgjelinek  *	EINTR (interrupted)
1838*3247Sgjelinek  *	ENOSPC (nres to small for results, nres set to needed value for success)
1839*3247Sgjelinek  *	EINVAL (flags invalid)
1840*3247Sgjelinek  *	EFAULT (bad address for buf or nres)
1841*3247Sgjelinek  */
1842*3247Sgjelinek int
1843*3247Sgjelinek vm_getusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres)
1844*3247Sgjelinek {
1845*3247Sgjelinek 	vmu_entity_t *entity;
1846*3247Sgjelinek 	vmusage_t *result;
1847*3247Sgjelinek 	int ret = 0;
1848*3247Sgjelinek 	int cacherecent = 0;
1849*3247Sgjelinek 	hrtime_t now;
1850*3247Sgjelinek 	uint_t flags_orig;
1851*3247Sgjelinek 
1852*3247Sgjelinek 	/*
1853*3247Sgjelinek 	 * Non-global zones cannot request system wide and/or collated
1854*3247Sgjelinek 	 * results, or the system result, so munge the flags accordingly.
1855*3247Sgjelinek 	 */
1856*3247Sgjelinek 	flags_orig = flags;
1857*3247Sgjelinek 	if (curproc->p_zone != global_zone) {
1858*3247Sgjelinek 		if (flags & (VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS)) {
1859*3247Sgjelinek 			flags &= ~(VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS);
1860*3247Sgjelinek 			flags |= VMUSAGE_PROJECTS;
1861*3247Sgjelinek 		}
1862*3247Sgjelinek 		if (flags & (VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS)) {
1863*3247Sgjelinek 			flags &= ~(VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS);
1864*3247Sgjelinek 			flags |= VMUSAGE_RUSERS;
1865*3247Sgjelinek 		}
1866*3247Sgjelinek 		if (flags & (VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS)) {
1867*3247Sgjelinek 			flags &= ~(VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS);
1868*3247Sgjelinek 			flags |= VMUSAGE_EUSERS;
1869*3247Sgjelinek 		}
1870*3247Sgjelinek 		if (flags & VMUSAGE_SYSTEM) {
1871*3247Sgjelinek 			flags &= ~VMUSAGE_SYSTEM;
1872*3247Sgjelinek 			flags |= VMUSAGE_ZONE;
1873*3247Sgjelinek 		}
1874*3247Sgjelinek 	}
1875*3247Sgjelinek 
1876*3247Sgjelinek 	/* Check for unknown flags */
1877*3247Sgjelinek 	if ((flags & (~VMUSAGE_MASK)) != 0)
1878*3247Sgjelinek 		return (set_errno(EINVAL));
1879*3247Sgjelinek 
1880*3247Sgjelinek 	/* Check for no flags */
1881*3247Sgjelinek 	if ((flags & VMUSAGE_MASK) == 0)
1882*3247Sgjelinek 		return (set_errno(EINVAL));
1883*3247Sgjelinek 
1884*3247Sgjelinek 	mutex_enter(&vmu_data.vmu_lock);
1885*3247Sgjelinek 	now = gethrtime();
1886*3247Sgjelinek 
1887*3247Sgjelinek start:
1888*3247Sgjelinek 	if (vmu_data.vmu_cache != NULL) {
1889*3247Sgjelinek 
1890*3247Sgjelinek 		vmu_cache_t *cache;
1891*3247Sgjelinek 
1892*3247Sgjelinek 		if ((vmu_data.vmu_cache->vmc_timestamp +
1893*3247Sgjelinek 		    ((hrtime_t)age * NANOSEC)) > now)
1894*3247Sgjelinek 			cacherecent = 1;
1895*3247Sgjelinek 
1896*3247Sgjelinek 		if ((vmu_data.vmu_cache->vmc_flags & flags) == flags &&
1897*3247Sgjelinek 		    cacherecent == 1) {
1898*3247Sgjelinek 			cache = vmu_data.vmu_cache;
1899*3247Sgjelinek 			vmu_cache_hold(cache);
1900*3247Sgjelinek 			mutex_exit(&vmu_data.vmu_lock);
1901*3247Sgjelinek 
1902*3247Sgjelinek 			ret = vmu_copyout_results(cache, buf, nres, flags_orig);
1903*3247Sgjelinek 			mutex_enter(&vmu_data.vmu_lock);
1904*3247Sgjelinek 			vmu_cache_rele(cache);
1905*3247Sgjelinek 			if (vmu_data.vmu_pending_waiters > 0)
1906*3247Sgjelinek 				cv_broadcast(&vmu_data.vmu_cv);
1907*3247Sgjelinek 			mutex_exit(&vmu_data.vmu_lock);
1908*3247Sgjelinek 			return (ret);
1909*3247Sgjelinek 		}
1910*3247Sgjelinek 		/*
1911*3247Sgjelinek 		 * If the cache is recent, it is likely that there are other
1912*3247Sgjelinek 		 * consumers of vm_getusage running, so add their flags to the
1913*3247Sgjelinek 		 * desired flags for the calculation.
1914*3247Sgjelinek 		 */
1915*3247Sgjelinek 		if (cacherecent == 1)
1916*3247Sgjelinek 			flags = vmu_data.vmu_cache->vmc_flags | flags;
1917*3247Sgjelinek 	}
1918*3247Sgjelinek 	if (vmu_data.vmu_calc_thread == NULL) {
1919*3247Sgjelinek 
1920*3247Sgjelinek 		vmu_cache_t *cache;
1921*3247Sgjelinek 
1922*3247Sgjelinek 		vmu_data.vmu_calc_thread = curthread;
1923*3247Sgjelinek 		vmu_data.vmu_calc_flags = flags;
1924*3247Sgjelinek 		vmu_data.vmu_entities = NULL;
1925*3247Sgjelinek 		vmu_data.vmu_nentities = 0;
1926*3247Sgjelinek 		if (vmu_data.vmu_pending_waiters > 0)
1927*3247Sgjelinek 			vmu_data.vmu_calc_flags |=
1928*3247Sgjelinek 			    vmu_data.vmu_pending_flags;
1929*3247Sgjelinek 
1930*3247Sgjelinek 		vmu_data.vmu_pending_flags = 0;
1931*3247Sgjelinek 		mutex_exit(&vmu_data.vmu_lock);
1932*3247Sgjelinek 		vmu_calculate();
1933*3247Sgjelinek 		mutex_enter(&vmu_data.vmu_lock);
1934*3247Sgjelinek 		/* copy results to cache */
1935*3247Sgjelinek 		if (vmu_data.vmu_cache != NULL)
1936*3247Sgjelinek 			vmu_cache_rele(vmu_data.vmu_cache);
1937*3247Sgjelinek 		cache = vmu_data.vmu_cache =
1938*3247Sgjelinek 		    vmu_cache_alloc(vmu_data.vmu_nentities,
1939*3247Sgjelinek 			vmu_data.vmu_calc_flags);
1940*3247Sgjelinek 
1941*3247Sgjelinek 		result = cache->vmc_results;
1942*3247Sgjelinek 		for (entity = vmu_data.vmu_entities; entity != NULL;
1943*3247Sgjelinek 		    entity = entity->vme_next) {
1944*3247Sgjelinek 			*result = entity->vme_result;
1945*3247Sgjelinek 			result++;
1946*3247Sgjelinek 		}
1947*3247Sgjelinek 		cache->vmc_timestamp = gethrtime();
1948*3247Sgjelinek 		vmu_cache_hold(cache);
1949*3247Sgjelinek 
1950*3247Sgjelinek 		vmu_data.vmu_calc_flags = 0;
1951*3247Sgjelinek 		vmu_data.vmu_calc_thread = NULL;
1952*3247Sgjelinek 
1953*3247Sgjelinek 		if (vmu_data.vmu_pending_waiters > 0)
1954*3247Sgjelinek 			cv_broadcast(&vmu_data.vmu_cv);
1955*3247Sgjelinek 
1956*3247Sgjelinek 		mutex_exit(&vmu_data.vmu_lock);
1957*3247Sgjelinek 
1958*3247Sgjelinek 		/* copy cache */
1959*3247Sgjelinek 		ret = vmu_copyout_results(cache, buf, nres, flags_orig);
1960*3247Sgjelinek 		mutex_enter(&vmu_data.vmu_lock);
1961*3247Sgjelinek 		vmu_cache_rele(cache);
1962*3247Sgjelinek 		mutex_exit(&vmu_data.vmu_lock);
1963*3247Sgjelinek 
1964*3247Sgjelinek 		return (ret);
1965*3247Sgjelinek 	}
1966*3247Sgjelinek 	vmu_data.vmu_pending_flags |= flags;
1967*3247Sgjelinek 	vmu_data.vmu_pending_waiters++;
1968*3247Sgjelinek 	while (vmu_data.vmu_calc_thread != NULL) {
1969*3247Sgjelinek 		if (cv_wait_sig(&vmu_data.vmu_cv,
1970*3247Sgjelinek 		    &vmu_data.vmu_lock) == 0) {
1971*3247Sgjelinek 			vmu_data.vmu_pending_waiters--;
1972*3247Sgjelinek 			mutex_exit(&vmu_data.vmu_lock);
1973*3247Sgjelinek 			return (set_errno(EINTR));
1974*3247Sgjelinek 		}
1975*3247Sgjelinek 	}
1976*3247Sgjelinek 	vmu_data.vmu_pending_waiters--;
1977*3247Sgjelinek 	goto start;
1978*3247Sgjelinek }
1979