sys/uvm/uvm_aobj.c

*0b4f1452Smpi/*	$OpenBSD: uvm_aobj.c,v 1.115 2024/12/27 12:04:40 mpi Exp $	*/
1414b0faSart/*	$NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $	*/
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
cd7ee8acSart *                    Washington University.
cd7ee8acSart * All rights reserved.
cd7ee8acSart *
cd7ee8acSart * Redistribution and use in source and binary forms, with or without
cd7ee8acSart * modification, are permitted provided that the following conditions
cd7ee8acSart * are met:
cd7ee8acSart * 1. Redistributions of source code must retain the above copyright
cd7ee8acSart *    notice, this list of conditions and the following disclaimer.
cd7ee8acSart * 2. Redistributions in binary form must reproduce the above copyright
cd7ee8acSart *    notice, this list of conditions and the following disclaimer in the
cd7ee8acSart *    documentation and/or other materials provided with the distribution.
cd7ee8acSart *
cd7ee8acSart * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
cd7ee8acSart * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
cd7ee8acSart * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
cd7ee8acSart * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
cd7ee8acSart * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
cd7ee8acSart * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
cd7ee8acSart * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
cd7ee8acSart * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
cd7ee8acSart * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
cd7ee8acSart * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cd7ee8acSart *
cd7ee8acSart * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
cd7ee8acSart */
cd7ee8acSart/*
cd7ee8acSart * uvm_aobj.c: anonymous memory uvm_object pager
cd7ee8acSart *
cd7ee8acSart * author: Chuck Silvers <chuq@chuq.com>
cd7ee8acSart * started: Jan-1998
cd7ee8acSart *
cd7ee8acSart * - design mostly from Chuck Cranor
cd7ee8acSart */
cd7ee8acSart
cd7ee8acSart#include <sys/param.h>
cd7ee8acSart#include <sys/systm.h>
cd7ee8acSart#include <sys/malloc.h>
fd628a11Sart#include <sys/kernel.h>
cd7ee8acSart#include <sys/pool.h>
1731322cSespie#include <sys/stdint.h>
03d1830dStedu#include <sys/atomic.h>
cd7ee8acSart
cd7ee8acSart#include <uvm/uvm.h>
cd7ee8acSart
cd7ee8acSart/*
52887a38Smpi * An anonymous UVM object (aobj) manages anonymous-memory.  In addition to
52887a38Smpi * keeping the list of resident pages, it may also keep a list of allocated
52887a38Smpi * swap blocks.  Depending on the size of the object, this list is either
52887a38Smpi * stored in an array (small objects) or in a hash table (large objects).
cd7ee8acSart */
cd7ee8acSart
cd7ee8acSart/*
52887a38Smpi * Note: for hash tables, we break the address space of the aobj into blocks
52887a38Smpi * of UAO_SWHASH_CLUSTER_SIZE pages, which shall be a power of two.
cd7ee8acSart */
cd7ee8acSart#define	UAO_SWHASH_CLUSTER_SHIFT	4
cd7ee8acSart#define	UAO_SWHASH_CLUSTER_SIZE		(1 << UAO_SWHASH_CLUSTER_SHIFT)
cd7ee8acSart
52887a38Smpi/* Get the "tag" for this page index. */
39c73ac7Smpi#define	UAO_SWHASH_ELT_TAG(idx)		((idx) >> UAO_SWHASH_CLUSTER_SHIFT)
39c73ac7Smpi#define UAO_SWHASH_ELT_PAGESLOT_IDX(idx) \
39c73ac7Smpi    ((idx) & (UAO_SWHASH_CLUSTER_SIZE - 1))
cd7ee8acSart
52887a38Smpi/* Given an ELT and a page index, find the swap slot. */
39c73ac7Smpi#define	UAO_SWHASH_ELT_PAGESLOT(elt, idx) \
39c73ac7Smpi    ((elt)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(idx)])
cd7ee8acSart
52887a38Smpi/* Given an ELT, return its pageidx base. */
39c73ac7Smpi#define	UAO_SWHASH_ELT_PAGEIDX_BASE(elt) \
39c73ac7Smpi    ((elt)->tag << UAO_SWHASH_CLUSTER_SHIFT)
cd7ee8acSart
52887a38Smpi/* The hash function. */
39c73ac7Smpi#define	UAO_SWHASH_HASH(aobj, idx) \
39c73ac7Smpi    (&(aobj)->u_swhash[(((idx) >> UAO_SWHASH_CLUSTER_SHIFT) \
39c73ac7Smpi    & (aobj)->u_swhashmask)])
cd7ee8acSart
cd7ee8acSart/*
52887a38Smpi * The threshold which determines whether we will use an array or a
cd7ee8acSart * hash table to store the list of allocated swap blocks.
cd7ee8acSart */
cd7ee8acSart#define	UAO_SWHASH_THRESHOLD		(UAO_SWHASH_CLUSTER_SIZE * 4)
39c73ac7Smpi#define	UAO_USES_SWHASH(aobj) \
39c73ac7Smpi    ((aobj)->u_pages > UAO_SWHASH_THRESHOLD)
cd7ee8acSart
52887a38Smpi/* The number of buckets in a hash, with an upper bound. */
cd7ee8acSart#define	UAO_SWHASH_MAXBUCKETS		256
1731322cSespie#define	UAO_SWHASH_BUCKETS(pages) \
1731322cSespie    (min((pages) >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS))
cd7ee8acSart
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * uao_swhash_elt: when a hash table is being used, this structure defines
cd7ee8acSart * the format of an entry in the bucket list.
cd7ee8acSart */
cd7ee8acSartstruct uao_swhash_elt {
cd7ee8acSart	LIST_ENTRY(uao_swhash_elt) list;	/* the hash list */
40cf655dSart	voff_t tag;				/* our 'tag' */
cd7ee8acSart	int count;				/* our number of active slots */
cd7ee8acSart	int slots[UAO_SWHASH_CLUSTER_SIZE];	/* the slots */
cd7ee8acSart};
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * uao_swhash: the swap hash table structure
cd7ee8acSart */
cd7ee8acSartLIST_HEAD(uao_swhash, uao_swhash_elt);
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * uao_swhash_elt_pool: pool of uao_swhash_elt structures
cd7ee8acSart */
cd7ee8acSartstruct pool uao_swhash_elt_pool;
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * uvm_aobj: the actual anon-backed uvm_object
cd7ee8acSart *
cd7ee8acSart * => the uvm_object is at the top of the structure, this allows
0b0fe1a1Soga *   (struct uvm_aobj *) == (struct uvm_object *)
cd7ee8acSart * => only one of u_swslots and u_swhash is used in any given aobj
cd7ee8acSart */
cd7ee8acSartstruct uvm_aobj {
b8a635f6Stedu	struct uvm_object u_obj; /* has: pgops, memt, #pages, #refs */
cd7ee8acSart	int u_pages;		 /* number of pages in entire object */
cd7ee8acSart	int u_flags;		 /* the flags (see uvm_aobj.h) */
cd7ee8acSart	/*
139fed43Soga	 * Either an array or hashtable (array of bucket heads) of
139fed43Soga	 * offset -> swapslot mappings for the aobj.
cd7ee8acSart	 */
139fed43Soga#define u_swslots	u_swap.slot_array
139fed43Soga#define u_swhash	u_swap.slot_hash
139fed43Soga	union swslots {
139fed43Soga		int			*slot_array;
139fed43Soga		struct uao_swhash	*slot_hash;
139fed43Soga	} u_swap;
cd7ee8acSart	u_long u_swhashmask;		/* mask for hashtable */
cd7ee8acSart	LIST_ENTRY(uvm_aobj) u_list;	/* global list of aobjs */
cd7ee8acSart};
cd7ee8acSart
cd7ee8acSartstruct pool uvm_aobj_pool;
cd7ee8acSart
1e3e475dSogastatic struct uao_swhash_elt	*uao_find_swhash_elt(struct uvm_aobj *, int,
2023d591Soga				     boolean_t);
1e3e475dSogastatic boolean_t		 uao_flush(struct uvm_object *, voff_t,
1e3e475dSoga				     voff_t, int);
1e3e475dSogastatic void			 uao_free(struct uvm_aobj *);
1e3e475dSogastatic int			 uao_get(struct uvm_object *, voff_t,
1e3e475dSoga				     vm_page_t *, int *, int, vm_prot_t,
1e3e475dSoga				     int, int);
1e3e475dSogastatic boolean_t		 uao_pagein(struct uvm_aobj *, int, int);
1e3e475dSogastatic boolean_t		 uao_pagein_page(struct uvm_aobj *, int);
cd7ee8acSart
1731322cSespievoid	uao_dropswap_range(struct uvm_object *, voff_t, voff_t);
1731322cSespievoid	uao_shrink_flush(struct uvm_object *, int, int);
1731322cSespieint	uao_shrink_hash(struct uvm_object *, int);
1731322cSespieint	uao_shrink_array(struct uvm_object *, int);
1731322cSespieint	uao_shrink_convert(struct uvm_object *, int);
1731322cSespie
1731322cSespieint	uao_grow_hash(struct uvm_object *, int);
1731322cSespieint	uao_grow_array(struct uvm_object *, int);
1731322cSespieint	uao_grow_convert(struct uvm_object *, int);
1731322cSespie
cd7ee8acSart/*
cd7ee8acSart * aobj_pager
cd7ee8acSart *
cd7ee8acSart * note that some functions (e.g. put) are handled elsewhere
cd7ee8acSart */
9f7b7ef0Smpiconst struct uvm_pagerops aobj_pager = {
9f7b7ef0Smpi	.pgo_reference = uao_reference,
9f7b7ef0Smpi	.pgo_detach = uao_detach,
9f7b7ef0Smpi	.pgo_flush = uao_flush,
9f7b7ef0Smpi	.pgo_get = uao_get,
cd7ee8acSart};
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * uao_list: global list of active aobjs, locked by uao_list_lock
69ba976bSoga *
69ba976bSoga * Lock ordering: generally the locking order is object lock, then list lock.
69ba976bSoga * in the case of swap off we have to iterate over the list, and thus the
69ba976bSoga * ordering is reversed. In that case we must use trylocking to prevent
69ba976bSoga * deadlock.
cd7ee8acSart */
bd69ae14Sogastatic LIST_HEAD(aobjlist, uvm_aobj) uao_list = LIST_HEAD_INITIALIZER(uao_list);
69c04514Smpistatic struct mutex uao_list_lock = MUTEX_INITIALIZER(IPL_MPFLOOR);
cd7ee8acSart
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * functions
cd7ee8acSart */
cd7ee8acSart/*
cd7ee8acSart * hash table/array related functions
cd7ee8acSart */
cd7ee8acSart/*
cd7ee8acSart * uao_find_swhash_elt: find (or create) a hash table entry for a page
cd7ee8acSart * offset.
cd7ee8acSart */
1e3e475dSogastatic struct uao_swhash_elt *
2023d591Sogauao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, boolean_t create)
cd7ee8acSart{
cd7ee8acSart	struct uao_swhash *swhash;
cd7ee8acSart	struct uao_swhash_elt *elt;
40cf655dSart	voff_t page_tag;
cd7ee8acSart
1414b0faSart	swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */
1414b0faSart	page_tag = UAO_SWHASH_ELT_TAG(pageidx);	/* tag to search for */
cd7ee8acSart
52887a38Smpi	/*
52887a38Smpi	 * now search the bucket for the requested tag
52887a38Smpi	 */
fd628a11Sart	LIST_FOREACH(elt, swhash, list) {
1414b0faSart		if (elt->tag == page_tag)
b9df1565Smpi			return elt;
cd7ee8acSart	}
1414b0faSart
1414b0faSart	if (!create)
cd7ee8acSart		return NULL;
1414b0faSart
52887a38Smpi	/*
52887a38Smpi	 * allocate a new entry for the bucket and init/insert it in
52887a38Smpi	 */
e7d50abdSkettenis	elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT | PR_ZERO);
e7d50abdSkettenis	/*
e7d50abdSkettenis	 * XXX We cannot sleep here as the hash table might disappear
e7d50abdSkettenis	 * from under our feet.  And we run the risk of deadlocking
e7d50abdSkettenis	 * the pagedeamon.  In fact this code will only be called by
e7d50abdSkettenis	 * the pagedaemon and allocation will only fail if we
e7d50abdSkettenis	 * exhausted the pagedeamon reserve.  In that case we're
e7d50abdSkettenis	 * doomed anyway, so panic.
e7d50abdSkettenis	 */
e7d50abdSkettenis	if (elt == NULL)
e7d50abdSkettenis		panic("%s: can't allocate entry", __func__);
cd7ee8acSart	LIST_INSERT_HEAD(swhash, elt, list);
cd7ee8acSart	elt->tag = page_tag;
1414b0faSart
b9df1565Smpi	return elt;
cd7ee8acSart}
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * uao_find_swslot: find the swap slot number for an aobj/pageidx
cd7ee8acSart */
a375eb79Smpiint
57296fa7Smpiuao_find_swslot(struct uvm_object *uobj, int pageidx)
cd7ee8acSart{
57296fa7Smpi	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
57296fa7Smpi
57296fa7Smpi	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
cd7ee8acSart
52887a38Smpi	/*
52887a38Smpi	 * if noswap flag is set, then we never return a slot
52887a38Smpi	 */
cd7ee8acSart	if (aobj->u_flags & UAO_FLAG_NOSWAP)
b9df1565Smpi		return 0;
cd7ee8acSart
52887a38Smpi	/*
52887a38Smpi	 * if hashing, look in hash table.
52887a38Smpi	 */
39c73ac7Smpi	if (UAO_USES_SWHASH(aobj)) {
cd7ee8acSart		struct uao_swhash_elt *elt =
cd7ee8acSart		    uao_find_swhash_elt(aobj, pageidx, FALSE);
cd7ee8acSart
cd7ee8acSart		if (elt)
b9df1565Smpi			return UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
cd7ee8acSart		else
b9df1565Smpi			return 0;
cd7ee8acSart	}
cd7ee8acSart
52887a38Smpi	/*
52887a38Smpi	 * otherwise, look in the array
52887a38Smpi	 */
b9df1565Smpi	return aobj->u_swslots[pageidx];
cd7ee8acSart}
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * uao_set_swslot: set the swap slot for a page in an aobj.
cd7ee8acSart *
cd7ee8acSart * => setting a slot to zero frees the slot
69c04514Smpi * => object must be locked by caller
52887a38Smpi * => we return the old slot number, or -1 if we failed to allocate
52887a38Smpi *    memory to record the new slot number
cd7ee8acSart */
cd7ee8acSartint
2023d591Sogauao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
cd7ee8acSart{
cd7ee8acSart	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
cd7ee8acSart	int oldslot;
cd7ee8acSart
69c04514Smpi	KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0);
57296fa7Smpi	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
ac25e10fSmpi
52887a38Smpi	/*
52887a38Smpi	 * if noswap flag is set, then we can't set a slot
52887a38Smpi	 */
cd7ee8acSart	if (aobj->u_flags & UAO_FLAG_NOSWAP) {
cd7ee8acSart		if (slot == 0)
b9df1565Smpi			return 0;		/* a clear is ok */
cd7ee8acSart
cd7ee8acSart		/* but a set is not */
cd7ee8acSart		printf("uao_set_swslot: uobj = %p\n", uobj);
52887a38Smpi	    	panic("uao_set_swslot: attempt to set a slot on a NOSWAP object");
cd7ee8acSart	}
cd7ee8acSart
52887a38Smpi	/*
52887a38Smpi	 * are we using a hash table?  if so, add it in the hash.
52887a38Smpi	 */
39c73ac7Smpi	if (UAO_USES_SWHASH(aobj)) {
cd7ee8acSart		/*
cd7ee8acSart		 * Avoid allocating an entry just to free it again if
cd7ee8acSart		 * the page had not swap slot in the first place, and
cd7ee8acSart		 * we are freeing.
cd7ee8acSart		 */
1414b0faSart		struct uao_swhash_elt *elt =
1414b0faSart		    uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE);
cd7ee8acSart		if (elt == NULL) {
1414b0faSart			KASSERT(slot == 0);
b9df1565Smpi			return 0;
cd7ee8acSart		}
cd7ee8acSart
cd7ee8acSart		oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
cd7ee8acSart		UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
cd7ee8acSart
cd7ee8acSart		/*
cd7ee8acSart		 * now adjust the elt's reference counter and free it if we've
cd7ee8acSart		 * dropped it to zero.
cd7ee8acSart		 */
cd7ee8acSart		if (slot) {
cd7ee8acSart			if (oldslot == 0)
cd7ee8acSart				elt->count++;
52887a38Smpi		} else {
52887a38Smpi			if (oldslot)
cd7ee8acSart				elt->count--;
cd7ee8acSart
cd7ee8acSart			if (elt->count == 0) {
cd7ee8acSart				LIST_REMOVE(elt, list);
cd7ee8acSart				pool_put(&uao_swhash_elt_pool, elt);
cd7ee8acSart			}
cd7ee8acSart		}
cd7ee8acSart	} else {
cd7ee8acSart		/* we are using an array */
cd7ee8acSart		oldslot = aobj->u_swslots[pageidx];
cd7ee8acSart		aobj->u_swslots[pageidx] = slot;
cd7ee8acSart	}
b9df1565Smpi	return oldslot;
cd7ee8acSart}
cd7ee8acSart/*
cd7ee8acSart * end of hash/array functions
cd7ee8acSart */
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * uao_free: free all resources held by an aobj, and then free the aobj
cd7ee8acSart *
cd7ee8acSart * => the aobj should be dead
cd7ee8acSart */
1e3e475dSogastatic void
2023d591Sogauao_free(struct uvm_aobj *aobj)
cd7ee8acSart{
4b17baa2Smpi	struct uvm_object *uobj = &aobj->u_obj;
4b17baa2Smpi
57296fa7Smpi	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
69c04514Smpi	KASSERT(rw_write_held(uobj->vmobjlock));
4b17baa2Smpi	uao_dropswap_range(uobj, 0, 0);
69c04514Smpi	rw_exit(uobj->vmobjlock);
cd7ee8acSart
39c73ac7Smpi	if (UAO_USES_SWHASH(aobj)) {
cd7ee8acSart		/*
4b17baa2Smpi		 * free the hash table itself.
cd7ee8acSart		 */
6540f983Stedu		hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
cd7ee8acSart	} else {
cd500754Sdhill		free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
cd7ee8acSart	}
cd7ee8acSart
52887a38Smpi	/*
52887a38Smpi	 * finally free the aobj itself
52887a38Smpi	 */
2c850ee8Smpi	uvm_obj_destroy(uobj);
cd7ee8acSart	pool_put(&uvm_aobj_pool, aobj);
cd7ee8acSart}
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * pager functions
cd7ee8acSart */
cd7ee8acSart
a4b88e66Smpi#ifdef TMPFS
cd7ee8acSart/*
1731322cSespie * Shrink an aobj to a given number of pages. The procedure is always the same:
1731322cSespie * assess the necessity of data structure conversion (hash to array), secure
1731322cSespie * resources, flush pages and drop swap slots.
1731322cSespie *
1731322cSespie */
1731322cSespie
1731322cSespievoid
1731322cSespieuao_shrink_flush(struct uvm_object *uobj, int startpg, int endpg)
1731322cSespie{
1731322cSespie	KASSERT(startpg < endpg);
1731322cSespie	KASSERT(uobj->uo_refs == 1);
36d5d901Skettenis	uao_flush(uobj, (voff_t)startpg << PAGE_SHIFT,
36d5d901Skettenis	    (voff_t)endpg << PAGE_SHIFT, PGO_FREE);
1731322cSespie	uao_dropswap_range(uobj, startpg, endpg);
1731322cSespie}
1731322cSespie
1731322cSespieint
1731322cSespieuao_shrink_hash(struct uvm_object *uobj, int pages)
1731322cSespie{
1731322cSespie	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1731322cSespie	struct uao_swhash *new_swhash;
810f6bbdSkettenis	struct uao_swhash_elt *elt;
1731322cSespie	unsigned long new_hashmask;
1731322cSespie	int i;
1731322cSespie
39c73ac7Smpi	KASSERT(UAO_USES_SWHASH(aobj));
1731322cSespie
1731322cSespie	/*
1731322cSespie	 * If the size of the hash table doesn't change, all we need to do is
1731322cSespie	 * to adjust the page count.
1731322cSespie	 */
1731322cSespie	if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) {
4f7816f6Skettenis		uao_shrink_flush(uobj, pages, aobj->u_pages);
1731322cSespie		aobj->u_pages = pages;
1731322cSespie		return 0;
1731322cSespie	}
1731322cSespie
1731322cSespie	new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
1731322cSespie	    M_WAITOK | M_CANFAIL, &new_hashmask);
1731322cSespie	if (new_swhash == NULL)
1731322cSespie		return ENOMEM;
1731322cSespie
1731322cSespie	uao_shrink_flush(uobj, pages, aobj->u_pages);
1731322cSespie
1731322cSespie	/*
1731322cSespie	 * Even though the hash table size is changing, the hash of the buckets
1731322cSespie	 * we are interested in copying should not change.
1731322cSespie	 */
810f6bbdSkettenis	for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) {
810f6bbdSkettenis		while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) {
810f6bbdSkettenis			elt = LIST_FIRST(&aobj->u_swhash[i]);
810f6bbdSkettenis			LIST_REMOVE(elt, list);
810f6bbdSkettenis			LIST_INSERT_HEAD(&new_swhash[i], elt, list);
810f6bbdSkettenis		}
810f6bbdSkettenis	}
1731322cSespie
6540f983Stedu	hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
1731322cSespie
1731322cSespie	aobj->u_swhash = new_swhash;
1731322cSespie	aobj->u_pages = pages;
1731322cSespie	aobj->u_swhashmask = new_hashmask;
1731322cSespie
1731322cSespie	return 0;
1731322cSespie}
1731322cSespie
1731322cSespieint
1731322cSespieuao_shrink_convert(struct uvm_object *uobj, int pages)
1731322cSespie{
1731322cSespie	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1731322cSespie	struct uao_swhash_elt *elt;
1731322cSespie	int i, *new_swslots;
1731322cSespie
540e394aSdoug	new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
1731322cSespie	    M_WAITOK | M_CANFAIL | M_ZERO);
1731322cSespie	if (new_swslots == NULL)
1731322cSespie		return ENOMEM;
1731322cSespie
1731322cSespie	uao_shrink_flush(uobj, pages, aobj->u_pages);
1731322cSespie
35164244Stedu	/* Convert swap slots from hash to array.  */
1731322cSespie	for (i = 0; i < pages; i++) {
1731322cSespie		elt = uao_find_swhash_elt(aobj, i, FALSE);
1731322cSespie		if (elt != NULL) {
1731322cSespie			new_swslots[i] = UAO_SWHASH_ELT_PAGESLOT(elt, i);
1731322cSespie			if (new_swslots[i] != 0)
1731322cSespie				elt->count--;
1731322cSespie			if (elt->count == 0) {
1731322cSespie				LIST_REMOVE(elt, list);
1731322cSespie				pool_put(&uao_swhash_elt_pool, elt);
1731322cSespie			}
1731322cSespie		}
1731322cSespie	}
1731322cSespie
6540f983Stedu	hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
1731322cSespie
1731322cSespie	aobj->u_swslots = new_swslots;
1731322cSespie	aobj->u_pages = pages;
1731322cSespie
1731322cSespie	return 0;
1731322cSespie}
1731322cSespie
1731322cSespieint
1731322cSespieuao_shrink_array(struct uvm_object *uobj, int pages)
1731322cSespie{
1731322cSespie	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1731322cSespie	int i, *new_swslots;
1731322cSespie
540e394aSdoug	new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
1731322cSespie	    M_WAITOK | M_CANFAIL | M_ZERO);
1731322cSespie	if (new_swslots == NULL)
1731322cSespie		return ENOMEM;
1731322cSespie
1731322cSespie	uao_shrink_flush(uobj, pages, aobj->u_pages);
1731322cSespie
1731322cSespie	for (i = 0; i < pages; i++)
1731322cSespie		new_swslots[i] = aobj->u_swslots[i];
1731322cSespie
cd500754Sdhill	free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
1731322cSespie
1731322cSespie	aobj->u_swslots = new_swslots;
1731322cSespie	aobj->u_pages = pages;
1731322cSespie
1731322cSespie	return 0;
1731322cSespie}
1731322cSespie
1731322cSespieint
1731322cSespieuao_shrink(struct uvm_object *uobj, int pages)
1731322cSespie{
1731322cSespie	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1731322cSespie
1731322cSespie	KASSERT(pages < aobj->u_pages);
1731322cSespie
1731322cSespie	/*
1731322cSespie	 * Distinguish between three possible cases:
1731322cSespie	 * 1. aobj uses hash and must be converted to array.
1731322cSespie	 * 2. aobj uses array and array size needs to be adjusted.
1731322cSespie	 * 3. aobj uses hash and hash size needs to be adjusted.
1731322cSespie	 */
1731322cSespie	if (pages > UAO_SWHASH_THRESHOLD)
1731322cSespie		return uao_shrink_hash(uobj, pages);	/* case 3 */
1731322cSespie	else if (aobj->u_pages > UAO_SWHASH_THRESHOLD)
1731322cSespie		return uao_shrink_convert(uobj, pages);	/* case 1 */
1731322cSespie	else
1731322cSespie		return uao_shrink_array(uobj, pages);	/* case 2 */
1731322cSespie}
1731322cSespie
1731322cSespie/*
1731322cSespie * Grow an aobj to a given number of pages. Right now we only adjust the swap
1731322cSespie * slots. We could additionally handle page allocation directly, so that they
1731322cSespie * don't happen through uvm_fault(). That would allow us to use another
1731322cSespie * mechanism for the swap slots other than malloc(). It is thus mandatory that
1731322cSespie * the caller of these functions does not allow faults to happen in case of
1731322cSespie * growth error.
1731322cSespie */
1731322cSespieint
1731322cSespieuao_grow_array(struct uvm_object *uobj, int pages)
1731322cSespie{
1731322cSespie	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1731322cSespie	int i, *new_swslots;
1731322cSespie
1731322cSespie	KASSERT(aobj->u_pages <= UAO_SWHASH_THRESHOLD);
1731322cSespie
540e394aSdoug	new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
1731322cSespie	    M_WAITOK | M_CANFAIL | M_ZERO);
1731322cSespie	if (new_swslots == NULL)
1731322cSespie		return ENOMEM;
1731322cSespie
1731322cSespie	for (i = 0; i < aobj->u_pages; i++)
1731322cSespie		new_swslots[i] = aobj->u_swslots[i];
1731322cSespie
cd500754Sdhill	free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
1731322cSespie
1731322cSespie	aobj->u_swslots = new_swslots;
1731322cSespie	aobj->u_pages = pages;
1731322cSespie
1731322cSespie	return 0;
1731322cSespie}
1731322cSespie
1731322cSespieint
1731322cSespieuao_grow_hash(struct uvm_object *uobj, int pages)
1731322cSespie{
1731322cSespie	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1731322cSespie	struct uao_swhash *new_swhash;
1731322cSespie	struct uao_swhash_elt *elt;
1731322cSespie	unsigned long new_hashmask;
1731322cSespie	int i;
1731322cSespie
1731322cSespie	KASSERT(pages > UAO_SWHASH_THRESHOLD);
1731322cSespie
1731322cSespie	/*
1731322cSespie	 * If the size of the hash table doesn't change, all we need to do is
1731322cSespie	 * to adjust the page count.
1731322cSespie	 */
1731322cSespie	if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) {
1731322cSespie		aobj->u_pages = pages;
1731322cSespie		return 0;
1731322cSespie	}
1731322cSespie
1731322cSespie	KASSERT(UAO_SWHASH_BUCKETS(aobj->u_pages) < UAO_SWHASH_BUCKETS(pages));
1731322cSespie
1731322cSespie	new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
1731322cSespie	    M_WAITOK | M_CANFAIL, &new_hashmask);
1731322cSespie	if (new_swhash == NULL)
1731322cSespie		return ENOMEM;
1731322cSespie
1731322cSespie	for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) {
1731322cSespie		while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) {
1731322cSespie			elt = LIST_FIRST(&aobj->u_swhash[i]);
1731322cSespie			LIST_REMOVE(elt, list);
1731322cSespie			LIST_INSERT_HEAD(&new_swhash[i], elt, list);
1731322cSespie		}
1731322cSespie	}
1731322cSespie
6540f983Stedu	hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
1731322cSespie
1731322cSespie	aobj->u_swhash = new_swhash;
1731322cSespie	aobj->u_pages = pages;
1731322cSespie	aobj->u_swhashmask = new_hashmask;
1731322cSespie
1731322cSespie	return 0;
1731322cSespie}
1731322cSespie
1731322cSespieint
1731322cSespieuao_grow_convert(struct uvm_object *uobj, int pages)
1731322cSespie{
1731322cSespie	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1731322cSespie	struct uao_swhash *new_swhash;
1731322cSespie	struct uao_swhash_elt *elt;
1731322cSespie	unsigned long new_hashmask;
1731322cSespie	int i, *old_swslots;
1731322cSespie
1731322cSespie	new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
1731322cSespie	    M_WAITOK | M_CANFAIL, &new_hashmask);
1731322cSespie	if (new_swhash == NULL)
1731322cSespie		return ENOMEM;
1731322cSespie
35164244Stedu	/* Set these now, so we can use uao_find_swhash_elt(). */
1731322cSespie	old_swslots = aobj->u_swslots;
1731322cSespie	aobj->u_swhash = new_swhash;
1731322cSespie	aobj->u_swhashmask = new_hashmask;
1731322cSespie
1731322cSespie	for (i = 0; i < aobj->u_pages; i++) {
1731322cSespie		if (old_swslots[i] != 0) {
1731322cSespie			elt = uao_find_swhash_elt(aobj, i, TRUE);
1731322cSespie			elt->count++;
1731322cSespie			UAO_SWHASH_ELT_PAGESLOT(elt, i) = old_swslots[i];
1731322cSespie		}
1731322cSespie	}
1731322cSespie
cd500754Sdhill	free(old_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
1731322cSespie	aobj->u_pages = pages;
1731322cSespie
1731322cSespie	return 0;
1731322cSespie}
1731322cSespie
1731322cSespieint
1731322cSespieuao_grow(struct uvm_object *uobj, int pages)
1731322cSespie{
1731322cSespie	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1731322cSespie
1731322cSespie	KASSERT(pages > aobj->u_pages);
1731322cSespie
1731322cSespie	/*
1731322cSespie	 * Distinguish between three possible cases:
1731322cSespie	 * 1. aobj uses hash and hash size needs to be adjusted.
1731322cSespie	 * 2. aobj uses array and array size needs to be adjusted.
1731322cSespie	 * 3. aobj uses array and must be converted to hash.
1731322cSespie	 */
1731322cSespie	if (pages <= UAO_SWHASH_THRESHOLD)
1731322cSespie		return uao_grow_array(uobj, pages);	/* case 2 */
1731322cSespie	else if (aobj->u_pages > UAO_SWHASH_THRESHOLD)
1731322cSespie		return uao_grow_hash(uobj, pages);	/* case 1 */
1731322cSespie	else
1731322cSespie		return uao_grow_convert(uobj, pages);
1731322cSespie}
a4b88e66Smpi#endif /* TMPFS */
1731322cSespie
1731322cSespie/*
cd7ee8acSart * uao_create: create an aobj of the given size and return its uvm_object.
cd7ee8acSart *
1731322cSespie * => for normal use, flags are zero or UAO_FLAG_CANFAIL.
cd7ee8acSart * => for the kernel object, the flags are:
cd7ee8acSart *	UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
cd7ee8acSart *	UAO_FLAG_KERNSWAP - enable swapping of kernel object ("           ")
cd7ee8acSart */
cd7ee8acSartstruct uvm_object *
2023d591Sogauao_create(vsize_t size, int flags)
cd7ee8acSart{
52887a38Smpi	static struct uvm_aobj kernel_object_store;
69c04514Smpi	static struct rwlock bootstrap_kernel_object_lock;
52887a38Smpi	static int kobj_alloced = 0;
cd7ee8acSart	int pages = round_page(size) >> PAGE_SHIFT;
cd7ee8acSart	struct uvm_aobj *aobj;
6894b7cfSmpi	int refs;
cd7ee8acSart
52887a38Smpi	/*
52887a38Smpi	 * Allocate a new aobj, unless kernel object is requested.
52887a38Smpi	 */
52887a38Smpi	if (flags & UAO_FLAG_KERNOBJ) {
6894b7cfSmpi		KASSERT(!kobj_alloced);
cd7ee8acSart		aobj = &kernel_object_store;
cd7ee8acSart		aobj->u_pages = pages;
52887a38Smpi		aobj->u_flags = UAO_FLAG_NOSWAP;
6894b7cfSmpi		refs = UVM_OBJ_KERN;
cd7ee8acSart		kobj_alloced = UAO_FLAG_KERNOBJ;
cd7ee8acSart	} else if (flags & UAO_FLAG_KERNSWAP) {
6894b7cfSmpi		KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ);
cd7ee8acSart		aobj = &kernel_object_store;
cd7ee8acSart		kobj_alloced = UAO_FLAG_KERNSWAP;
52887a38Smpi	} else {
cd7ee8acSart		aobj = pool_get(&uvm_aobj_pool, PR_WAITOK);
cd7ee8acSart		aobj->u_pages = pages;
52887a38Smpi		aobj->u_flags = 0;
52887a38Smpi		refs = 1;
cd7ee8acSart	}
cd7ee8acSart
52887a38Smpi	/*
52887a38Smpi	 * allocate hash/array if necessary
52887a38Smpi	 */
1731322cSespie 	if (flags == 0 || (flags & (UAO_FLAG_KERNSWAP | UAO_FLAG_CANFAIL))) {
6894b7cfSmpi		int mflags;
6894b7cfSmpi
1731322cSespie		if (flags)
1731322cSespie			mflags = M_NOWAIT;
1731322cSespie		else
1731322cSespie			mflags = M_WAITOK;
cd7ee8acSart
cd7ee8acSart		/* allocate hash table or array depending on object size */
39c73ac7Smpi		if (UAO_USES_SWHASH(aobj)) {
1731322cSespie			aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(pages),
cd7ee8acSart			    M_UVMAOBJ, mflags, &aobj->u_swhashmask);
1731322cSespie			if (aobj->u_swhash == NULL) {
1731322cSespie				if (flags & UAO_FLAG_CANFAIL) {
1731322cSespie					pool_put(&uvm_aobj_pool, aobj);
b9df1565Smpi					return NULL;
1731322cSespie				}
cd7ee8acSart				panic("uao_create: hashinit swhash failed");
1731322cSespie			}
cd7ee8acSart		} else {
540e394aSdoug			aobj->u_swslots = mallocarray(pages, sizeof(int),
28a8f404Sart			    M_UVMAOBJ, mflags|M_ZERO);
1731322cSespie			if (aobj->u_swslots == NULL) {
1731322cSespie				if (flags & UAO_FLAG_CANFAIL) {
1731322cSespie					pool_put(&uvm_aobj_pool, aobj);
b9df1565Smpi					return NULL;
1731322cSespie				}
cd7ee8acSart				panic("uao_create: malloc swslots failed");
cd7ee8acSart			}
1731322cSespie		}
cd7ee8acSart
1731322cSespie		if (flags & UAO_FLAG_KERNSWAP) {
cd7ee8acSart			aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
b9df1565Smpi			return &aobj->u_obj;
cd7ee8acSart			/* done! */
cd7ee8acSart		}
cd7ee8acSart	}
cd7ee8acSart
52887a38Smpi	/*
52887a38Smpi	 * Initialise UVM object.
52887a38Smpi	 */
da3d0110Smpi	uvm_obj_init(&aobj->u_obj, &aobj_pager, refs);
69c04514Smpi	if (flags & UAO_FLAG_KERNOBJ) {
69c04514Smpi		/* Use a temporary static lock for kernel_object. */
69c04514Smpi		rw_init(&bootstrap_kernel_object_lock, "kobjlk");
69c04514Smpi		uvm_obj_setlock(&aobj->u_obj, &bootstrap_kernel_object_lock);
69c04514Smpi	}
cd7ee8acSart
52887a38Smpi	/*
52887a38Smpi 	 * now that aobj is ready, add it to the global list
52887a38Smpi 	 */
bd69ae14Soga	mtx_enter(&uao_list_lock);
cd7ee8acSart	LIST_INSERT_HEAD(&uao_list, aobj, u_list);
bd69ae14Soga	mtx_leave(&uao_list_lock);
cd7ee8acSart
b9df1565Smpi	return &aobj->u_obj;
cd7ee8acSart}
cd7ee8acSart
cd7ee8acSart
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * uao_init: set up aobj pager subsystem
cd7ee8acSart *
cd7ee8acSart * => called at boot time from uvm_pager_init()
cd7ee8acSart */
28fbabcfSartvoid
2023d591Sogauao_init(void)
cd7ee8acSart{
cd7ee8acSart	/*
0b0fe1a1Soga	 * NOTE: Pages for this pool must not come from a pageable
cd7ee8acSart	 * kernel map!
cd7ee8acSart	 */
1378bae2Sdlg	pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt), 0,
1378bae2Sdlg	    IPL_NONE, PR_WAITOK, "uaoeltpl", NULL);
1378bae2Sdlg	pool_init(&uvm_aobj_pool, sizeof(struct uvm_aobj), 0,
1378bae2Sdlg	    IPL_NONE, PR_WAITOK, "aobjpl", NULL);
cd7ee8acSart}
cd7ee8acSart
cd7ee8acSart/*
52887a38Smpi * uao_reference: hold a reference to an anonymous UVM object.
cd7ee8acSart */
cd7ee8acSartvoid
2023d591Sogauao_reference(struct uvm_object *uobj)
cd7ee8acSart{
52887a38Smpi	/* Kernel object is persistent. */
7cb53682Sart	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
cd7ee8acSart		return;
cd7ee8acSart
1af74310Smpi	atomic_inc_int(&uobj->uo_refs);
cd7ee8acSart}
cd7ee8acSart
28fbabcfSart
cd7ee8acSart/*
52887a38Smpi * uao_detach: drop a reference to an anonymous UVM object.
cd7ee8acSart */
cd7ee8acSartvoid
2023d591Sogauao_detach(struct uvm_object *uobj)
cd7ee8acSart{
cd7ee8acSart	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
0b0fe1a1Soga	struct vm_page *pg;
cd7ee8acSart
52887a38Smpi	/*
52887a38Smpi	 * Detaching from kernel_object is a NOP.
52887a38Smpi	 */
1af74310Smpi	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
cd7ee8acSart		return;
cd7ee8acSart
52887a38Smpi	/*
52887a38Smpi	 * Drop the reference.  If it was the last one, destroy the object.
52887a38Smpi	 */
1af74310Smpi	if (atomic_dec_int_nv(&uobj->uo_refs) > 0) {
cd7ee8acSart		return;
cd7ee8acSart	}
cd7ee8acSart
52887a38Smpi	/*
52887a38Smpi	 * Remove the aobj from the global list.
52887a38Smpi	 */
bd69ae14Soga	mtx_enter(&uao_list_lock);
cd7ee8acSart	LIST_REMOVE(aobj, u_list);
bd69ae14Soga	mtx_leave(&uao_list_lock);
cd7ee8acSart
cd7ee8acSart	/*
52887a38Smpi	 * Free all the pages left in the aobj.  For each page, when the
52887a38Smpi	 * page is no longer busy (and thus after any disk I/O that it is
52887a38Smpi	 * involved in is complete), release any swap resources and free
52887a38Smpi	 * the page itself.
cd7ee8acSart	 */
69c04514Smpi	rw_enter(uobj->vmobjlock, RW_WRITE);
262a556aSdlg	while ((pg = RBT_ROOT(uvm_objtree, &uobj->memt)) != NULL) {
69c04514Smpi		pmap_page_protect(pg, PROT_NONE);
9662fca4Sart		if (pg->pg_flags & PG_BUSY) {
5b4619eaSmpi			uvm_pagewait(pg, uobj->vmobjlock, "uao_det");
5b4619eaSmpi			rw_enter(uobj->vmobjlock, RW_WRITE);
cd7ee8acSart			continue;
cd7ee8acSart		}
8a42ed70Sart		uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
69c04514Smpi		uvm_lock_pageq();
cd7ee8acSart		uvm_pagefree(pg);
cd7ee8acSart		uvm_unlock_pageq();
69c04514Smpi	}
cd7ee8acSart
52887a38Smpi	/*
52887a38Smpi	 * Finally, free the anonymous UVM object itself.
52887a38Smpi	 */
cd7ee8acSart	uao_free(aobj);
cd7ee8acSart}
cd7ee8acSart
cd7ee8acSart/*
52887a38Smpi * uao_flush: flush pages out of a uvm object
e920f2c9Ssmart *
b8a635f6Stedu * => if PGO_CLEANIT is not set, then we will not block.
e920f2c9Ssmart * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
e920f2c9Ssmart *	for flushing.
e920f2c9Ssmart * => NOTE: we are allowed to lock the page queues, so the caller
e920f2c9Ssmart *	must not be holding the lock on them [e.g. pagedaemon had
e920f2c9Ssmart *	better not call us with the queues locked]
e920f2c9Ssmart * => we return TRUE unless we encountered some sort of I/O error
e920f2c9Ssmart *	XXXJRT currently never happens, as we never directly initiate
e920f2c9Ssmart *	XXXJRT I/O
cd7ee8acSart */
cd7ee8acSartboolean_t
2023d591Sogauao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
cd7ee8acSart{
e920f2c9Ssmart	struct uvm_aobj *aobj = (struct uvm_aobj *) uobj;
18725a33Smpi	struct vm_page *pg;
40cf655dSart	voff_t curoff;
e920f2c9Ssmart
57296fa7Smpi	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
69c04514Smpi	KASSERT(rw_write_held(uobj->vmobjlock));
9f7b7ef0Smpi
e920f2c9Ssmart	if (flags & PGO_ALLPAGES) {
e920f2c9Ssmart		start = 0;
36d5d901Skettenis		stop = (voff_t)aobj->u_pages << PAGE_SHIFT;
e920f2c9Ssmart	} else {
e920f2c9Ssmart		start = trunc_page(start);
e920f2c9Ssmart		stop = round_page(stop);
36d5d901Skettenis		if (stop > ((voff_t)aobj->u_pages << PAGE_SHIFT)) {
e920f2c9Ssmart			printf("uao_flush: strange, got an out of range "
e920f2c9Ssmart			    "flush (fixed)\n");
36d5d901Skettenis			stop = (voff_t)aobj->u_pages << PAGE_SHIFT;
e920f2c9Ssmart		}
e920f2c9Ssmart	}
e920f2c9Ssmart
cd7ee8acSart	/*
e920f2c9Ssmart	 * Don't need to do any work here if we're not freeing
e920f2c9Ssmart	 * or deactivating pages.
cd7ee8acSart	 */
6894b7cfSmpi	if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
b9df1565Smpi		return TRUE;
6894b7cfSmpi	}
e920f2c9Ssmart
e920f2c9Ssmart	curoff = start;
0b0fe1a1Soga	for (;;) {
0b0fe1a1Soga		if (curoff < stop) {
18725a33Smpi			pg = uvm_pagelookup(uobj, curoff);
0b0fe1a1Soga			curoff += PAGE_SIZE;
18725a33Smpi			if (pg == NULL)
e920f2c9Ssmart				continue;
e920f2c9Ssmart		} else {
0b0fe1a1Soga			break;
0b0fe1a1Soga		}
e920f2c9Ssmart
0b0fe1a1Soga		/* Make sure page is unbusy, else wait for it. */
18725a33Smpi		if (pg->pg_flags & PG_BUSY) {
5b4619eaSmpi			uvm_pagewait(pg, uobj->vmobjlock, "uaoflsh");
5b4619eaSmpi			rw_enter(uobj->vmobjlock, RW_WRITE);
0b0fe1a1Soga			curoff -= PAGE_SIZE;
e920f2c9Ssmart			continue;
e920f2c9Ssmart		}
e920f2c9Ssmart
e920f2c9Ssmart		switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
e920f2c9Ssmart		/*
e920f2c9Ssmart		 * XXX In these first 3 cases, we always just
e920f2c9Ssmart		 * XXX deactivate the page.  We may want to
e920f2c9Ssmart		 * XXX handle the different cases more specifically
e920f2c9Ssmart		 * XXX in the future.
e920f2c9Ssmart		 */
e920f2c9Ssmart		case PGO_CLEANIT|PGO_FREE:
0b0fe1a1Soga			/* FALLTHROUGH */
e920f2c9Ssmart		case PGO_CLEANIT|PGO_DEACTIVATE:
0b0fe1a1Soga			/* FALLTHROUGH */
e920f2c9Ssmart		case PGO_DEACTIVATE:
e920f2c9Ssmart deactivate_it:
18725a33Smpi			if (pg->wire_count != 0)
e920f2c9Ssmart				continue;
e920f2c9Ssmart
0b0fe1a1Soga			uvm_lock_pageq();
18725a33Smpi			uvm_pagedeactivate(pg);
0b0fe1a1Soga			uvm_unlock_pageq();
e920f2c9Ssmart
e920f2c9Ssmart			continue;
e920f2c9Ssmart		case PGO_FREE:
e920f2c9Ssmart			/*
e920f2c9Ssmart			 * If there are multiple references to
e920f2c9Ssmart			 * the object, just deactivate the page.
e920f2c9Ssmart			 */
e920f2c9Ssmart			if (uobj->uo_refs > 1)
e920f2c9Ssmart				goto deactivate_it;
e920f2c9Ssmart
6f909936Svisa			/* XXX skip the page if it's wired */
18725a33Smpi			if (pg->wire_count != 0)
e920f2c9Ssmart				continue;
e920f2c9Ssmart
52887a38Smpi			/*
52887a38Smpi			 * free the swap slot and the page.
52887a38Smpi			 */
18725a33Smpi			pmap_page_protect(pg, PROT_NONE);
e920f2c9Ssmart
52887a38Smpi			/*
52887a38Smpi			 * freeing swapslot here is not strictly necessary.
52887a38Smpi			 * however, leaving it here doesn't save much
52887a38Smpi			 * because we need to update swap accounting anyway.
52887a38Smpi			 */
18725a33Smpi			uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
0b0fe1a1Soga			uvm_lock_pageq();
18725a33Smpi			uvm_pagefree(pg);
0b0fe1a1Soga			uvm_unlock_pageq();
e920f2c9Ssmart
e920f2c9Ssmart			continue;
e920f2c9Ssmart		default:
e920f2c9Ssmart			panic("uao_flush: weird flags");
e920f2c9Ssmart		}
e920f2c9Ssmart	}
e920f2c9Ssmart
b9df1565Smpi	return TRUE;
cd7ee8acSart}
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * uao_get: fetch me a page
cd7ee8acSart *
cd7ee8acSart * we have three cases:
cd7ee8acSart * 1: page is resident     -> just return the page.
cd7ee8acSart * 2: page is zero-fill    -> allocate a new page and zero it.
cd7ee8acSart * 3: page is swapped out  -> fetch the page from swap.
cd7ee8acSart *
69c04514Smpi * cases 1 can be handled with PGO_LOCKED, cases 2 and 3 cannot.
cd7ee8acSart * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
1414b0faSart * then we will need to return VM_PAGER_UNLOCK.
cd7ee8acSart *
cd7ee8acSart * => flags: PGO_ALLPAGES: get all of the pages
cd7ee8acSart *           PGO_LOCKED: fault data structures are locked
cd7ee8acSart * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
cd7ee8acSart * => NOTE: caller must check for released pages!!
cd7ee8acSart */
1e3e475dSogastatic int
2023d591Sogauao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
2023d591Soga    int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
cd7ee8acSart{
cd7ee8acSart	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
40cf655dSart	voff_t current_offset;
1414b0faSart	vm_page_t ptmp;
28fbabcfSart	int lcv, gotpages, maxpages, swslot, rv, pageidx;
cd7ee8acSart	boolean_t done;
cd7ee8acSart
57296fa7Smpi	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
f3e62b59Smpi	KASSERT(rw_lock_held(uobj->vmobjlock));
f3e62b59Smpi	KASSERT(rw_write_held(uobj->vmobjlock) ||
f3e62b59Smpi	    ((flags & PGO_LOCKED) != 0 && (access_type & PROT_WRITE) == 0));
9f7b7ef0Smpi
52887a38Smpi	/*
52887a38Smpi 	 * get number of pages
52887a38Smpi 	 */
cd7ee8acSart	maxpages = *npagesp;
cd7ee8acSart
cd7ee8acSart	if (flags & PGO_LOCKED) {
52887a38Smpi		/*
52887a38Smpi 		 * step 1a: get pages that are already resident.   only do
52887a38Smpi		 * this if the data structures are locked (i.e. the first
52887a38Smpi		 * time through).
52887a38Smpi 		 */
cd7ee8acSart		done = TRUE;	/* be optimistic */
cd7ee8acSart		gotpages = 0;	/* # of pages we got so far */
cd7ee8acSart
cd7ee8acSart		for (lcv = 0, current_offset = offset ; lcv < maxpages ;
cd7ee8acSart		    lcv++, current_offset += PAGE_SIZE) {
cd7ee8acSart			/* do we care about this page?  if not, skip it */
cd7ee8acSart			if (pps[lcv] == PGO_DONTCARE)
cd7ee8acSart				continue;
cd7ee8acSart
a8f4448aSmpi			/* lookup page */
cd7ee8acSart			ptmp = uvm_pagelookup(uobj, current_offset);
cd7ee8acSart
cd7ee8acSart			/*
52887a38Smpi			 * to be useful must get a non-busy page
52887a38Smpi			 */
a8f4448aSmpi			if (ptmp == NULL || (ptmp->pg_flags & PG_BUSY) != 0) {
cd7ee8acSart				if (lcv == centeridx ||
cd7ee8acSart				    (flags & PGO_ALLPAGES) != 0)
cd7ee8acSart					/* need to do a wait or I/O! */
cd7ee8acSart					done = FALSE;
cd7ee8acSart				continue;
cd7ee8acSart			}
cd7ee8acSart
cd7ee8acSart			/*
52887a38Smpi			 * useful page: plug it in our result array
cd7ee8acSart			 */
cd7ee8acSart			pps[lcv] = ptmp;
cd7ee8acSart			gotpages++;
35164244Stedu		}
cd7ee8acSart
cd7ee8acSart		/*
cd7ee8acSart 		 * step 1b: now we've either done everything needed or we
cd7ee8acSart		 * to unlock and do some waiting or I/O.
cd7ee8acSart 		 */
cd7ee8acSart		*npagesp = gotpages;
a8f4448aSmpi		return done ? VM_PAGER_OK : VM_PAGER_UNLOCK;
cd7ee8acSart	}
cd7ee8acSart
cd7ee8acSart	/*
cd7ee8acSart 	 * step 2: get non-resident or busy pages.
b8a635f6Stedu 	 * data structures are unlocked.
cd7ee8acSart 	 */
cd7ee8acSart	for (lcv = 0, current_offset = offset ; lcv < maxpages ;
cd7ee8acSart	    lcv++, current_offset += PAGE_SIZE) {
cd7ee8acSart		/*
cd7ee8acSart		 * - skip over pages we've already gotten or don't want
cd7ee8acSart		 * - skip over pages we don't _have_ to get
cd7ee8acSart		 */
cd7ee8acSart		if (pps[lcv] != NULL ||
cd7ee8acSart		    (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
cd7ee8acSart			continue;
cd7ee8acSart
28fbabcfSart		pageidx = current_offset >> PAGE_SHIFT;
28fbabcfSart
cd7ee8acSart		/*
cd7ee8acSart 		 * we have yet to locate the current page (pps[lcv]).   we
cd7ee8acSart		 * first look for a page that is already at the current offset.
cd7ee8acSart		 * if we find a page, we check to see if it is busy or
cd7ee8acSart		 * released.  if that is the case, then we sleep on the page
cd7ee8acSart		 * until it is no longer busy or released and repeat the lookup.
cd7ee8acSart		 * if the page we found is neither busy nor released, then we
cd7ee8acSart		 * busy it (so we own it) and plug it into pps[lcv].   this
cd7ee8acSart		 * 'break's the following while loop and indicates we are
cd7ee8acSart		 * ready to move on to the next page in the "lcv" loop above.
cd7ee8acSart 		 *
cd7ee8acSart 		 * if we exit the while loop with pps[lcv] still set to NULL,
cd7ee8acSart		 * then it means that we allocated a new busy/fake/clean page
cd7ee8acSart		 * ptmp in the object and we need to do I/O to fill in the data.
cd7ee8acSart 		 */
cd7ee8acSart
cd7ee8acSart		/* top of "pps" while loop */
cd7ee8acSart		while (pps[lcv] == NULL) {
cd7ee8acSart			/* look for a resident page */
cd7ee8acSart			ptmp = uvm_pagelookup(uobj, current_offset);
cd7ee8acSart
cd7ee8acSart			/* not resident?   allocate one now (if we can) */
cd7ee8acSart			if (ptmp == NULL) {
cd7ee8acSart
cd7ee8acSart				ptmp = uvm_pagealloc(uobj, current_offset,
8a42ed70Sart				    NULL, 0);
cd7ee8acSart
cd7ee8acSart				/* out of RAM? */
cd7ee8acSart				if (ptmp == NULL) {
69c04514Smpi					rw_exit(uobj->vmobjlock);
cd7ee8acSart					uvm_wait("uao_getpage");
69c04514Smpi					rw_enter(uobj->vmobjlock, RW_WRITE);
69c04514Smpi					/* goto top of pps while loop */
cd7ee8acSart					continue;
cd7ee8acSart				}
cd7ee8acSart
cd7ee8acSart				/*
cd7ee8acSart				 * safe with PQ's unlocked: because we just
cd7ee8acSart				 * alloc'd the page
cd7ee8acSart				 */
65d6360cSart				atomic_setbits_int(&ptmp->pg_flags, PQ_AOBJ);
cd7ee8acSart
cd7ee8acSart				/*
cd7ee8acSart				 * got new page ready for I/O.  break pps while
cd7ee8acSart				 * loop.  pps[lcv] is still NULL.
cd7ee8acSart				 */
cd7ee8acSart				break;
cd7ee8acSart			}
cd7ee8acSart
cd7ee8acSart			/* page is there, see if we need to wait on it */
0b0fe1a1Soga			if ((ptmp->pg_flags & PG_BUSY) != 0) {
5b4619eaSmpi				uvm_pagewait(ptmp, uobj->vmobjlock, "uao_get");
5b4619eaSmpi				rw_enter(uobj->vmobjlock, RW_WRITE);
cd7ee8acSart				continue;	/* goto top of pps while loop */
cd7ee8acSart			}
cd7ee8acSart
cd7ee8acSart			/*
52887a38Smpi 			 * if we get here then the page is resident and
52887a38Smpi			 * unbusy.  we busy it now (so we own it).
cd7ee8acSart 			 */
cd7ee8acSart			/* we own it, caller must un-busy */
65d6360cSart			atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
cd7ee8acSart			UVM_PAGE_OWN(ptmp, "uao_get2");
cd7ee8acSart			pps[lcv] = ptmp;
cd7ee8acSart		}
cd7ee8acSart
cd7ee8acSart		/*
cd7ee8acSart 		 * if we own the valid page at the correct offset, pps[lcv] will
cd7ee8acSart 		 * point to it.   nothing more to do except go to the next page.
cd7ee8acSart 		 */
cd7ee8acSart		if (pps[lcv])
cd7ee8acSart			continue;			/* next lcv */
cd7ee8acSart
cd7ee8acSart		/*
cd7ee8acSart 		 * we have a "fake/busy/clean" page that we just allocated.
cd7ee8acSart 		 * do the needed "i/o", either reading from swap or zeroing.
cd7ee8acSart 		 */
57296fa7Smpi		swslot = uao_find_swslot(uobj, pageidx);
cd7ee8acSart
35164244Stedu		/* just zero the page if there's nothing in swap.  */
0b0fe1a1Soga		if (swslot == 0) {
35164244Stedu			/* page hasn't existed before, just zero it. */
cd7ee8acSart			uvm_pagezero(ptmp);
28fbabcfSart		} else {
52887a38Smpi			/*
52887a38Smpi			 * page in the swapped-out page.
69c04514Smpi			 * unlock object for i/o, relock when done.
52887a38Smpi			 */
69c04514Smpi
69c04514Smpi			rw_exit(uobj->vmobjlock);
cd7ee8acSart			rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
69c04514Smpi			rw_enter(uobj->vmobjlock, RW_WRITE);
cd7ee8acSart
52887a38Smpi			/*
52887a38Smpi			 * I/O done.  check for errors.
52887a38Smpi			 */
35164244Stedu			if (rv != VM_PAGER_OK) {
28fbabcfSart				/*
28fbabcfSart				 * remove the swap slot from the aobj
28fbabcfSart				 * and mark the aobj as having no real slot.
28fbabcfSart				 * don't free the swap slot, thus preventing
28fbabcfSart				 * it from being used again.
28fbabcfSart				 */
28fbabcfSart				swslot = uao_set_swslot(&aobj->u_obj, pageidx,
28fbabcfSart							SWSLOT_BAD);
28fbabcfSart				uvm_swap_markbad(swslot, 1);
28fbabcfSart
934ce9c8Skettenis				if (ptmp->pg_flags & PG_WANTED)
934ce9c8Skettenis					wakeup(ptmp);
65d6360cSart				atomic_clearbits_int(&ptmp->pg_flags,
65d6360cSart				    PG_WANTED|PG_BUSY);
cd7ee8acSart				UVM_PAGE_OWN(ptmp, NULL);
cd7ee8acSart				uvm_lock_pageq();
cd7ee8acSart				uvm_pagefree(ptmp);
cd7ee8acSart				uvm_unlock_pageq();
69c04514Smpi				rw_exit(uobj->vmobjlock);
28fbabcfSart
b9df1565Smpi				return rv;
cd7ee8acSart			}
cd7ee8acSart		}
cd7ee8acSart
cd7ee8acSart		/*
cd7ee8acSart 		 * we got the page!   clear the fake flag (indicates valid
cd7ee8acSart		 * data now in page) and plug into our result array.   note
cd7ee8acSart		 * that page is still busy.
cd7ee8acSart 		 *
cd7ee8acSart 		 * it is the callers job to:
cd7ee8acSart 		 * => check if the page is released
cd7ee8acSart 		 * => unbusy the page
cd7ee8acSart 		 * => activate the page
cd7ee8acSart 		 */
65d6360cSart		atomic_clearbits_int(&ptmp->pg_flags, PG_FAKE);
2c7adcb7Sart		pmap_clear_modify(ptmp);		/* ... and clean */
cd7ee8acSart		pps[lcv] = ptmp;
cd7ee8acSart
cd7ee8acSart	}	/* lcv loop */
cd7ee8acSart
69c04514Smpi	rw_exit(uobj->vmobjlock);
b9df1565Smpi	return VM_PAGER_OK;
cd7ee8acSart}
cd7ee8acSart
cd7ee8acSart/*
8a42ed70Sart * uao_dropswap:  release any swap resources from this aobj page.
69c04514Smpi *
69c04514Smpi * => aobj must be locked or have a reference count of 0.
8a42ed70Sart */
06be59c1Sogaint
2023d591Sogauao_dropswap(struct uvm_object *uobj, int pageidx)
8a42ed70Sart{
8a42ed70Sart	int slot;
8a42ed70Sart
57296fa7Smpi	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
57296fa7Smpi
8a42ed70Sart	slot = uao_set_swslot(uobj, pageidx, 0);
8a42ed70Sart	if (slot) {
8a42ed70Sart		uvm_swap_free(slot, 1);
8a42ed70Sart	}
b9df1565Smpi	return slot;
8a42ed70Sart}
28fbabcfSart
28fbabcfSart/*
28fbabcfSart * page in every page in every aobj that is paged-out to a range of swslots.
28fbabcfSart *
69c04514Smpi * => aobj must be locked and is returned locked.
28fbabcfSart * => returns TRUE if pagein was aborted due to lack of memory.
28fbabcfSart */
28fbabcfSartboolean_t
2023d591Sogauao_swap_off(int startslot, int endslot)
28fbabcfSart{
1af74310Smpi	struct uvm_aobj *aobj;
28fbabcfSart
52887a38Smpi	/*
1af74310Smpi	 * Walk the list of all anonymous UVM objects.  Grab the first.
52887a38Smpi	 */
bd69ae14Soga	mtx_enter(&uao_list_lock);
1af74310Smpi	if ((aobj = LIST_FIRST(&uao_list)) == NULL) {
1af74310Smpi		mtx_leave(&uao_list_lock);
1af74310Smpi		return FALSE;
1af74310Smpi	}
1af74310Smpi	uao_reference(&aobj->u_obj);
28fbabcfSart
1af74310Smpi	do {
1af74310Smpi		struct uvm_aobj *nextaobj;
28fbabcfSart		boolean_t rv;
28fbabcfSart
28fbabcfSart		/*
1af74310Smpi		 * Prefetch the next object and immediately hold a reference
1af74310Smpi		 * on it, so neither the current nor the next entry could
1af74310Smpi		 * disappear while we are iterating.
28fbabcfSart		 */
1af74310Smpi		if ((nextaobj = LIST_NEXT(aobj, u_list)) != NULL) {
1af74310Smpi			uao_reference(&nextaobj->u_obj);
1af74310Smpi		}
bd69ae14Soga		mtx_leave(&uao_list_lock);
28fbabcfSart
28fbabcfSart		/*
1af74310Smpi		 * Page in all pages in the swap slot range.
28fbabcfSart		 */
69c04514Smpi		rw_enter(aobj->u_obj.vmobjlock, RW_WRITE);
28fbabcfSart		rv = uao_pagein(aobj, startslot, endslot);
69c04514Smpi		rw_exit(aobj->u_obj.vmobjlock);
1af74310Smpi
1af74310Smpi		/* Drop the reference of the current object. */
1af74310Smpi		uao_detach(&aobj->u_obj);
28fbabcfSart		if (rv) {
1af74310Smpi			if (nextaobj) {
1af74310Smpi				uao_detach(&nextaobj->u_obj);
1af74310Smpi			}
28fbabcfSart			return rv;
28fbabcfSart		}
28fbabcfSart
1af74310Smpi		aobj = nextaobj;
bd69ae14Soga		mtx_enter(&uao_list_lock);
1af74310Smpi	} while (aobj);
28fbabcfSart
52887a38Smpi	/*
52887a38Smpi	 * done with traversal, unlock the list
52887a38Smpi	 */
bd69ae14Soga	mtx_leave(&uao_list_lock);
28fbabcfSart	return FALSE;
28fbabcfSart}
28fbabcfSart
28fbabcfSart/*
28fbabcfSart * page in any pages from aobj in the given range.
28fbabcfSart *
28fbabcfSart * => returns TRUE if pagein was aborted due to lack of memory.
28fbabcfSart */
1e3e475dSogastatic boolean_t
2023d591Sogauao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
28fbabcfSart{
28fbabcfSart	boolean_t rv;
28fbabcfSart
39c73ac7Smpi	if (UAO_USES_SWHASH(aobj)) {
28fbabcfSart		struct uao_swhash_elt *elt;
28fbabcfSart		int bucket;
28fbabcfSart
28fbabcfSartrestart:
28fbabcfSart		for (bucket = aobj->u_swhashmask; bucket >= 0; bucket--) {
28fbabcfSart			for (elt = LIST_FIRST(&aobj->u_swhash[bucket]);
28fbabcfSart			     elt != NULL;
28fbabcfSart			     elt = LIST_NEXT(elt, list)) {
28fbabcfSart				int i;
28fbabcfSart
28fbabcfSart				for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
28fbabcfSart					int slot = elt->slots[i];
28fbabcfSart
52887a38Smpi					/*
52887a38Smpi					 * if the slot isn't in range, skip it.
52887a38Smpi					 */
28fbabcfSart					if (slot < startslot ||
28fbabcfSart					    slot >= endslot) {
28fbabcfSart						continue;
28fbabcfSart					}
28fbabcfSart
28fbabcfSart					/*
28fbabcfSart					 * process the page,
28fbabcfSart					 * the start over on this object
28fbabcfSart					 * since the swhash elt
28fbabcfSart					 * may have been freed.
28fbabcfSart					 */
28fbabcfSart					rv = uao_pagein_page(aobj,
28fbabcfSart					  UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
28fbabcfSart					if (rv) {
28fbabcfSart						return rv;
28fbabcfSart					}
28fbabcfSart					goto restart;
28fbabcfSart				}
28fbabcfSart			}
28fbabcfSart		}
28fbabcfSart	} else {
28fbabcfSart		int i;
28fbabcfSart
28fbabcfSart		for (i = 0; i < aobj->u_pages; i++) {
28fbabcfSart			int slot = aobj->u_swslots[i];
28fbabcfSart
52887a38Smpi			/*
52887a38Smpi			 * if the slot isn't in range, skip it
52887a38Smpi			 */
28fbabcfSart			if (slot < startslot || slot >= endslot) {
28fbabcfSart				continue;
28fbabcfSart			}
28fbabcfSart
52887a38Smpi			/*
52887a38Smpi			 * process the page.
52887a38Smpi			 */
28fbabcfSart			rv = uao_pagein_page(aobj, i);
28fbabcfSart			if (rv) {
28fbabcfSart				return rv;
28fbabcfSart			}
28fbabcfSart		}
28fbabcfSart	}
28fbabcfSart
28fbabcfSart	return FALSE;
28fbabcfSart}
28fbabcfSart
28fbabcfSart/*
52887a38Smpi * uao_pagein_page: page in a single page from an anonymous UVM object.
52887a38Smpi *
52887a38Smpi * => Returns TRUE if pagein was aborted due to lack of memory.
28fbabcfSart */
1e3e475dSogastatic boolean_t
2023d591Sogauao_pagein_page(struct uvm_aobj *aobj, int pageidx)
28fbabcfSart{
69c04514Smpi	struct uvm_object *uobj = &aobj->u_obj;
28fbabcfSart	struct vm_page *pg;
6ec37434Smpi	int rv, npages;
28fbabcfSart
28fbabcfSart	pg = NULL;
28fbabcfSart	npages = 1;
69c04514Smpi
69c04514Smpi	KASSERT(rw_write_held(uobj->vmobjlock));
36d5d901Skettenis	rv = uao_get(&aobj->u_obj, (voff_t)pageidx << PAGE_SHIFT,
1e8cdc2eSderaadt	    &pg, &npages, 0, PROT_READ | PROT_WRITE, 0, 0);
28fbabcfSart
69c04514Smpi	/*
69c04514Smpi	 * relock and finish up.
69c04514Smpi	 */
69c04514Smpi	rw_enter(uobj->vmobjlock, RW_WRITE);
28fbabcfSart	switch (rv) {
1414b0faSart	case VM_PAGER_OK:
28fbabcfSart		break;
28fbabcfSart
1414b0faSart	case VM_PAGER_ERROR:
1414b0faSart	case VM_PAGER_REFAULT:
28fbabcfSart		/*
28fbabcfSart		 * nothing more to do on errors.
1414b0faSart		 * VM_PAGER_REFAULT can only mean that the anon was freed,
28fbabcfSart		 * so again there's nothing to do.
28fbabcfSart		 */
28fbabcfSart		return FALSE;
28fbabcfSart	}
28fbabcfSart
28fbabcfSart	/*
28fbabcfSart	 * ok, we've got the page now.
28fbabcfSart	 * mark it as dirty, clear its swslot and un-busy it.
28fbabcfSart	 */
6ec37434Smpi	uao_dropswap(&aobj->u_obj, pageidx);
65d6360cSart	atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_CLEAN|PG_FAKE);
28fbabcfSart	UVM_PAGE_OWN(pg, NULL);
28fbabcfSart
52887a38Smpi	/*
52887a38Smpi	 * deactivate the page (to put it on a page queue).
52887a38Smpi	 */
28fbabcfSart	uvm_lock_pageq();
28fbabcfSart	uvm_pagedeactivate(pg);
28fbabcfSart	uvm_unlock_pageq();
28fbabcfSart
28fbabcfSart	return FALSE;
28fbabcfSart}
1731322cSespie
1731322cSespie/*
1731322cSespie * uao_dropswap_range: drop swapslots in the range.
1731322cSespie *
1731322cSespie * => aobj must be locked and is returned locked.
1731322cSespie * => start is inclusive.  end is exclusive.
1731322cSespie */
1731322cSespievoid
1731322cSespieuao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
1731322cSespie{
1731322cSespie	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1731322cSespie	int swpgonlydelta = 0;
1731322cSespie
57296fa7Smpi	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
69c04514Smpi	KASSERT(rw_write_held(uobj->vmobjlock));
1731322cSespie
1731322cSespie	if (end == 0) {
1731322cSespie		end = INT64_MAX;
1731322cSespie	}
1731322cSespie
39c73ac7Smpi	if (UAO_USES_SWHASH(aobj)) {
1731322cSespie		int i, hashbuckets = aobj->u_swhashmask + 1;
1731322cSespie		voff_t taghi;
1731322cSespie		voff_t taglo;
1731322cSespie
1731322cSespie		taglo = UAO_SWHASH_ELT_TAG(start);
1731322cSespie		taghi = UAO_SWHASH_ELT_TAG(end);
1731322cSespie
1731322cSespie		for (i = 0; i < hashbuckets; i++) {
1731322cSespie			struct uao_swhash_elt *elt, *next;
1731322cSespie
1731322cSespie			for (elt = LIST_FIRST(&aobj->u_swhash[i]);
1731322cSespie			     elt != NULL;
1731322cSespie			     elt = next) {
1731322cSespie				int startidx, endidx;
1731322cSespie				int j;
1731322cSespie
1731322cSespie				next = LIST_NEXT(elt, list);
1731322cSespie
1731322cSespie				if (elt->tag < taglo || taghi < elt->tag) {
1731322cSespie					continue;
1731322cSespie				}
1731322cSespie
1731322cSespie				if (elt->tag == taglo) {
1731322cSespie					startidx =
1731322cSespie					    UAO_SWHASH_ELT_PAGESLOT_IDX(start);
1731322cSespie				} else {
1731322cSespie					startidx = 0;
1731322cSespie				}
1731322cSespie
1731322cSespie				if (elt->tag == taghi) {
1731322cSespie					endidx =
1731322cSespie					    UAO_SWHASH_ELT_PAGESLOT_IDX(end);
1731322cSespie				} else {
1731322cSespie					endidx = UAO_SWHASH_CLUSTER_SIZE;
1731322cSespie				}
1731322cSespie
1731322cSespie				for (j = startidx; j < endidx; j++) {
1731322cSespie					int slot = elt->slots[j];
1731322cSespie
1731322cSespie					KASSERT(uvm_pagelookup(&aobj->u_obj,
36d5d901Skettenis					    (voff_t)(UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
1731322cSespie					    + j) << PAGE_SHIFT) == NULL);
1731322cSespie
1731322cSespie					if (slot > 0) {
1731322cSespie						uvm_swap_free(slot, 1);
1731322cSespie						swpgonlydelta++;
1731322cSespie						KASSERT(elt->count > 0);
1731322cSespie						elt->slots[j] = 0;
1731322cSespie						elt->count--;
1731322cSespie					}
1731322cSespie				}
1731322cSespie
1731322cSespie				if (elt->count == 0) {
1731322cSespie					LIST_REMOVE(elt, list);
1731322cSespie					pool_put(&uao_swhash_elt_pool, elt);
1731322cSespie				}
1731322cSespie			}
1731322cSespie		}
1731322cSespie	} else {
1731322cSespie		int i;
1731322cSespie
1731322cSespie		if (aobj->u_pages < end) {
1731322cSespie			end = aobj->u_pages;
1731322cSespie		}
1731322cSespie		for (i = start; i < end; i++) {
1731322cSespie			int slot = aobj->u_swslots[i];
1731322cSespie
1731322cSespie			if (slot > 0) {
1731322cSespie				uvm_swap_free(slot, 1);
1731322cSespie				swpgonlydelta++;
1731322cSespie			}
1731322cSespie		}
1731322cSespie	}
1731322cSespie
1731322cSespie	/*
1731322cSespie	 * adjust the counter of pages only in swap for all
1731322cSespie	 * the swap slots we've freed.
1731322cSespie	 */
1731322cSespie	if (swpgonlydelta > 0) {
1731322cSespie		KASSERT(uvmexp.swpgonly >= swpgonlydelta);
c4a864baSmpi		atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta);
1731322cSespie	}
1731322cSespie}