xref: /csrg-svn/sys/vm/swap_pager.c (revision 65231)
145749Smckusick /*
245749Smckusick  * Copyright (c) 1990 University of Utah.
363379Sbostic  * Copyright (c) 1991, 1993
463379Sbostic  *	The Regents of the University of California.  All rights reserved.
545749Smckusick  *
645749Smckusick  * This code is derived from software contributed to Berkeley by
745749Smckusick  * the Systems Programming Group of the University of Utah Computer
845749Smckusick  * Science Department.
945749Smckusick  *
1045749Smckusick  * %sccs.include.redist.c%
1145749Smckusick  *
1249289Shibler  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
1349289Shibler  *
14*65231Smckusick  *	@(#)swap_pager.c	8.5 (Berkeley) 12/30/93
1545749Smckusick  */
1645749Smckusick 
1745749Smckusick /*
1845749Smckusick  * Quick hack to page to dedicated partition(s).
1945749Smckusick  * TODO:
2045749Smckusick  *	Add multiprocessor locks
2145749Smckusick  *	Deal with async writes in a better fashion
2245749Smckusick  */
2345749Smckusick 
2453341Sbostic #include <sys/param.h>
2553341Sbostic #include <sys/systm.h>
2653341Sbostic #include <sys/proc.h>
2753341Sbostic #include <sys/buf.h>
2853341Sbostic #include <sys/map.h>
2953496Sheideman #include <sys/vnode.h>
3053341Sbostic #include <sys/malloc.h>
3145749Smckusick 
3255051Spendry #include <miscfs/specfs/specdev.h>
3355051Spendry 
3453341Sbostic #include <vm/vm.h>
3553341Sbostic #include <vm/vm_page.h>
3653341Sbostic #include <vm/vm_pageout.h>
3753341Sbostic #include <vm/swap_pager.h>
3845749Smckusick 
3945749Smckusick #define NSWSIZES	16	/* size of swtab */
4045749Smckusick #define NPENDINGIO	64	/* max # of pending cleans */
4145749Smckusick #define MAXDADDRS	64	/* max # of disk addrs for fixed allocations */
4245749Smckusick 
4345749Smckusick #ifdef DEBUG
4445749Smckusick int	swpagerdebug = 0x100;
4545749Smckusick #define	SDB_FOLLOW	0x001
4645749Smckusick #define SDB_INIT	0x002
4745749Smckusick #define SDB_ALLOC	0x004
4845749Smckusick #define SDB_IO		0x008
4945749Smckusick #define SDB_WRITE	0x010
5045749Smckusick #define SDB_FAIL	0x020
5145749Smckusick #define SDB_ALLOCBLK	0x040
5245749Smckusick #define SDB_FULL	0x080
5345749Smckusick #define SDB_ANOM	0x100
5445749Smckusick #define SDB_ANOMPANIC	0x200
5545749Smckusick #endif
5645749Smckusick 
57*65231Smckusick TAILQ_HEAD(swpclean, swpagerclean);
58*65231Smckusick 
5945749Smckusick struct swpagerclean {
60*65231Smckusick 	TAILQ_ENTRY(swpagerclean)	spc_list;
61*65231Smckusick 	int				spc_flags;
62*65231Smckusick 	struct buf			*spc_bp;
63*65231Smckusick 	sw_pager_t			spc_swp;
64*65231Smckusick 	vm_offset_t			spc_kva;
65*65231Smckusick 	vm_page_t			spc_m;
6645749Smckusick } swcleanlist[NPENDINGIO];
6753341Sbostic typedef struct swpagerclean *swp_clean_t;
6845749Smckusick 
6953341Sbostic 
7045749Smckusick /* spc_flags values */
7145749Smckusick #define SPC_FREE	0x00
7245749Smckusick #define SPC_BUSY	0x01
7345749Smckusick #define SPC_DONE	0x02
7445749Smckusick #define SPC_ERROR	0x04
7545749Smckusick #define SPC_DIRTY	0x08
7645749Smckusick 
7745749Smckusick struct swtab {
7845749Smckusick 	vm_size_t st_osize;	/* size of object (bytes) */
7945749Smckusick 	int	  st_bsize;	/* vs. size of swap block (DEV_BSIZE units) */
8045749Smckusick #ifdef DEBUG
8145749Smckusick 	u_long	  st_inuse;	/* number in this range in use */
8245749Smckusick 	u_long	  st_usecnt;	/* total used of this size */
8345749Smckusick #endif
8445749Smckusick } swtab[NSWSIZES+1];
8545749Smckusick 
8645749Smckusick #ifdef DEBUG
8745749Smckusick int		swap_pager_pendingio;	/* max pending async "clean" ops */
8845749Smckusick int		swap_pager_poip;	/* pageouts in progress */
8945749Smckusick int		swap_pager_piip;	/* pageins in progress */
9045749Smckusick #endif
9145749Smckusick 
92*65231Smckusick struct swpclean	swap_pager_inuse;	/* list of pending page cleans */
93*65231Smckusick struct swpclean	swap_pager_free;	/* list of free pager clean structs */
94*65231Smckusick struct pagerlst	swap_pager_list;	/* list of "named" anon regions */
9545749Smckusick 
9653341Sbostic static int		swap_pager_finish __P((swp_clean_t));
9753341Sbostic static void 		swap_pager_init __P((void));
9864827Storek static vm_pager_t	swap_pager_alloc
9964827Storek 			    __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
10053341Sbostic static boolean_t	swap_pager_clean __P((vm_page_t, int));
10153341Sbostic static void		swap_pager_dealloc __P((vm_pager_t));
10253341Sbostic static int		swap_pager_getpage
10353341Sbostic 			    __P((vm_pager_t, vm_page_t, boolean_t));
10453341Sbostic static boolean_t	swap_pager_haspage __P((vm_pager_t, vm_offset_t));
10553341Sbostic static int		swap_pager_io __P((sw_pager_t, vm_page_t, int));
10653341Sbostic static void		swap_pager_iodone __P((struct buf *));
10753341Sbostic static int		swap_pager_putpage
10853341Sbostic 			    __P((vm_pager_t, vm_page_t, boolean_t));
10953341Sbostic 
11053341Sbostic struct pagerops swappagerops = {
11153341Sbostic 	swap_pager_init,
11253341Sbostic 	swap_pager_alloc,
11353341Sbostic 	swap_pager_dealloc,
11453341Sbostic 	swap_pager_getpage,
11553341Sbostic 	swap_pager_putpage,
11653341Sbostic 	swap_pager_haspage
11753341Sbostic };
11853341Sbostic 
11953341Sbostic static void
12045749Smckusick swap_pager_init()
12145749Smckusick {
12245749Smckusick 	register swp_clean_t spc;
12345749Smckusick 	register int i, bsize;
12445749Smckusick 	extern int dmmin, dmmax;
12545749Smckusick 	int maxbsize;
12645749Smckusick 
12745749Smckusick #ifdef DEBUG
12845749Smckusick 	if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
12945749Smckusick 		printf("swpg_init()\n");
13045749Smckusick #endif
13145749Smckusick 	dfltpagerops = &swappagerops;
132*65231Smckusick 	TAILQ_INIT(&swap_pager_list);
13345749Smckusick 
13445749Smckusick 	/*
13545749Smckusick 	 * Initialize clean lists
13645749Smckusick 	 */
137*65231Smckusick 	TAILQ_INIT(&swap_pager_inuse);
138*65231Smckusick 	TAILQ_INIT(&swap_pager_free);
13945749Smckusick 	for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) {
140*65231Smckusick 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
14145749Smckusick 		spc->spc_flags = SPC_FREE;
14245749Smckusick 	}
14345749Smckusick 
14445749Smckusick 	/*
14545749Smckusick 	 * Calculate the swap allocation constants.
14645749Smckusick 	 */
14745749Smckusick         if (dmmin == 0) {
14845749Smckusick                 dmmin = DMMIN;
14945749Smckusick 		if (dmmin < CLBYTES/DEV_BSIZE)
15045749Smckusick 			dmmin = CLBYTES/DEV_BSIZE;
15145749Smckusick 	}
15245749Smckusick         if (dmmax == 0)
15345749Smckusick                 dmmax = DMMAX;
15445749Smckusick 
15545749Smckusick 	/*
15645749Smckusick 	 * Fill in our table of object size vs. allocation size
15745749Smckusick 	 */
15845749Smckusick 	bsize = btodb(PAGE_SIZE);
15945749Smckusick 	if (bsize < dmmin)
16045749Smckusick 		bsize = dmmin;
16145749Smckusick 	maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
16245749Smckusick 	if (maxbsize > dmmax)
16345749Smckusick 		maxbsize = dmmax;
16445749Smckusick 	for (i = 0; i < NSWSIZES; i++) {
16545749Smckusick 		swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
16645749Smckusick 		swtab[i].st_bsize = bsize;
16745749Smckusick #ifdef DEBUG
16845749Smckusick 		if (swpagerdebug & SDB_INIT)
16945749Smckusick 			printf("swpg_init: ix %d, size %x, bsize %x\n",
17045749Smckusick 			       i, swtab[i].st_osize, swtab[i].st_bsize);
17145749Smckusick #endif
17245749Smckusick 		if (bsize >= maxbsize)
17345749Smckusick 			break;
17445749Smckusick 		bsize *= 2;
17545749Smckusick 	}
17645749Smckusick 	swtab[i].st_osize = 0;
17745749Smckusick 	swtab[i].st_bsize = bsize;
17845749Smckusick }
17945749Smckusick 
18045749Smckusick /*
18145749Smckusick  * Allocate a pager structure and associated resources.
18245749Smckusick  * Note that if we are called from the pageout daemon (handle == NULL)
18345749Smckusick  * we should not wait for memory as it could resulting in deadlock.
18445749Smckusick  */
18553341Sbostic static vm_pager_t
18664827Storek swap_pager_alloc(handle, size, prot, foff)
18745749Smckusick 	caddr_t handle;
18845749Smckusick 	register vm_size_t size;
18945749Smckusick 	vm_prot_t prot;
19064827Storek 	vm_offset_t foff;
19145749Smckusick {
19245749Smckusick 	register vm_pager_t pager;
19345749Smckusick 	register sw_pager_t swp;
19445749Smckusick 	struct swtab *swt;
19545749Smckusick 	int waitok;
19645749Smckusick 
19745749Smckusick #ifdef DEBUG
19845749Smckusick 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
19945749Smckusick 		printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot);
20045749Smckusick #endif
20145749Smckusick 	/*
20245749Smckusick 	 * If this is a "named" anonymous region, look it up and
20345749Smckusick 	 * return the appropriate pager if it exists.
20445749Smckusick 	 */
20545749Smckusick 	if (handle) {
20645749Smckusick 		pager = vm_pager_lookup(&swap_pager_list, handle);
20748386Skarels 		if (pager != NULL) {
20845749Smckusick 			/*
20945749Smckusick 			 * Use vm_object_lookup to gain a reference
21045749Smckusick 			 * to the object and also to remove from the
21145749Smckusick 			 * object cache.
21245749Smckusick 			 */
21348386Skarels 			if (vm_object_lookup(pager) == NULL)
21445749Smckusick 				panic("swap_pager_alloc: bad object");
21545749Smckusick 			return(pager);
21645749Smckusick 		}
21745749Smckusick 	}
21845749Smckusick 	/*
21945749Smckusick 	 * Pager doesn't exist, allocate swap management resources
22045749Smckusick 	 * and initialize.
22145749Smckusick 	 */
22245749Smckusick 	waitok = handle ? M_WAITOK : M_NOWAIT;
22345749Smckusick 	pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
22448386Skarels 	if (pager == NULL)
22548386Skarels 		return(NULL);
22645749Smckusick 	swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
22745749Smckusick 	if (swp == NULL) {
22845749Smckusick #ifdef DEBUG
22945749Smckusick 		if (swpagerdebug & SDB_FAIL)
23045749Smckusick 			printf("swpg_alloc: swpager malloc failed\n");
23145749Smckusick #endif
23245749Smckusick 		free((caddr_t)pager, M_VMPAGER);
23348386Skarels 		return(NULL);
23445749Smckusick 	}
23545749Smckusick 	size = round_page(size);
23645749Smckusick 	for (swt = swtab; swt->st_osize; swt++)
23745749Smckusick 		if (size <= swt->st_osize)
23845749Smckusick 			break;
23945749Smckusick #ifdef DEBUG
24045749Smckusick 	swt->st_inuse++;
24145749Smckusick 	swt->st_usecnt++;
24245749Smckusick #endif
24345749Smckusick 	swp->sw_osize = size;
24445749Smckusick 	swp->sw_bsize = swt->st_bsize;
24545749Smckusick 	swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
24645749Smckusick 	swp->sw_blocks = (sw_blk_t)
24745749Smckusick 		malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
24845749Smckusick 		       M_VMPGDATA, M_NOWAIT);
24945749Smckusick 	if (swp->sw_blocks == NULL) {
25045749Smckusick 		free((caddr_t)swp, M_VMPGDATA);
25145749Smckusick 		free((caddr_t)pager, M_VMPAGER);
25245749Smckusick #ifdef DEBUG
25345749Smckusick 		if (swpagerdebug & SDB_FAIL)
25445749Smckusick 			printf("swpg_alloc: sw_blocks malloc failed\n");
25545749Smckusick 		swt->st_inuse--;
25645749Smckusick 		swt->st_usecnt--;
25745749Smckusick #endif
25845749Smckusick 		return(FALSE);
25945749Smckusick 	}
26045749Smckusick 	bzero((caddr_t)swp->sw_blocks,
26145749Smckusick 	      swp->sw_nblocks * sizeof(*swp->sw_blocks));
26245749Smckusick 	swp->sw_poip = 0;
26345749Smckusick 	if (handle) {
26445749Smckusick 		vm_object_t object;
26545749Smckusick 
26645749Smckusick 		swp->sw_flags = SW_NAMED;
267*65231Smckusick 		TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list);
26845749Smckusick 		/*
26945749Smckusick 		 * Consistant with other pagers: return with object
27045749Smckusick 		 * referenced.  Can't do this with handle == NULL
27145749Smckusick 		 * since it might be the pageout daemon calling.
27245749Smckusick 		 */
27345749Smckusick 		object = vm_object_allocate(size);
27445749Smckusick 		vm_object_enter(object, pager);
27545749Smckusick 		vm_object_setpager(object, pager, 0, FALSE);
27645749Smckusick 	} else {
27745749Smckusick 		swp->sw_flags = 0;
278*65231Smckusick 		pager->pg_list.tqe_next = NULL;
279*65231Smckusick 		pager->pg_list.tqe_prev = NULL;
28045749Smckusick 	}
28145749Smckusick 	pager->pg_handle = handle;
28245749Smckusick 	pager->pg_ops = &swappagerops;
28345749Smckusick 	pager->pg_type = PG_SWAP;
28464860Shibler 	pager->pg_data = swp;
28545749Smckusick 
28645749Smckusick #ifdef DEBUG
28745749Smckusick 	if (swpagerdebug & SDB_ALLOC)
28845749Smckusick 		printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
28945749Smckusick 		       swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
29045749Smckusick #endif
29145749Smckusick 	return(pager);
29245749Smckusick }
29345749Smckusick 
29453341Sbostic static void
29545749Smckusick swap_pager_dealloc(pager)
29645749Smckusick 	vm_pager_t pager;
29745749Smckusick {
29845749Smckusick 	register int i;
29945749Smckusick 	register sw_blk_t bp;
30045749Smckusick 	register sw_pager_t swp;
30145749Smckusick 	struct swtab *swt;
30245749Smckusick 	int s;
30345749Smckusick 
30445749Smckusick #ifdef DEBUG
30545749Smckusick 	/* save panic time state */
30645749Smckusick 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
30745749Smckusick 		return;
30845749Smckusick 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
30945749Smckusick 		printf("swpg_dealloc(%x)\n", pager);
31045749Smckusick #endif
31145749Smckusick 	/*
31245749Smckusick 	 * Remove from list right away so lookups will fail if we
31345749Smckusick 	 * block for pageout completion.
31445749Smckusick 	 */
31545749Smckusick 	swp = (sw_pager_t) pager->pg_data;
31645749Smckusick 	if (swp->sw_flags & SW_NAMED) {
317*65231Smckusick 		TAILQ_REMOVE(&swap_pager_list, pager, pg_list);
31845749Smckusick 		swp->sw_flags &= ~SW_NAMED;
31945749Smckusick 	}
32045749Smckusick #ifdef DEBUG
32145749Smckusick 	for (swt = swtab; swt->st_osize; swt++)
32245749Smckusick 		if (swp->sw_osize <= swt->st_osize)
32345749Smckusick 			break;
32445749Smckusick 	swt->st_inuse--;
32545749Smckusick #endif
32645749Smckusick 
32745749Smckusick 	/*
32845749Smckusick 	 * Wait for all pageouts to finish and remove
32945749Smckusick 	 * all entries from cleaning list.
33045749Smckusick 	 */
33145749Smckusick 	s = splbio();
33245749Smckusick 	while (swp->sw_poip) {
33345749Smckusick 		swp->sw_flags |= SW_WANTED;
33453341Sbostic 		assert_wait((int)swp, 0);
33545749Smckusick 		thread_block();
33645749Smckusick 	}
33745749Smckusick 	splx(s);
33848386Skarels 	(void) swap_pager_clean(NULL, B_WRITE);
33945749Smckusick 
34045749Smckusick 	/*
34145749Smckusick 	 * Free left over swap blocks
34245749Smckusick 	 */
34345749Smckusick 	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
34445749Smckusick 		if (bp->swb_block) {
34545749Smckusick #ifdef DEBUG
34645749Smckusick 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
34745749Smckusick 				printf("swpg_dealloc: blk %x\n",
34845749Smckusick 				       bp->swb_block);
34945749Smckusick #endif
35045749Smckusick 			rmfree(swapmap, swp->sw_bsize, bp->swb_block);
35145749Smckusick 		}
35245749Smckusick 	/*
35345749Smckusick 	 * Free swap management resources
35445749Smckusick 	 */
35545749Smckusick 	free((caddr_t)swp->sw_blocks, M_VMPGDATA);
35645749Smckusick 	free((caddr_t)swp, M_VMPGDATA);
35745749Smckusick 	free((caddr_t)pager, M_VMPAGER);
35845749Smckusick }
35945749Smckusick 
36053341Sbostic static int
36145749Smckusick swap_pager_getpage(pager, m, sync)
36245749Smckusick 	vm_pager_t pager;
36345749Smckusick 	vm_page_t m;
36445749Smckusick 	boolean_t sync;
36545749Smckusick {
36645749Smckusick #ifdef DEBUG
36745749Smckusick 	if (swpagerdebug & SDB_FOLLOW)
36845749Smckusick 		printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync);
36945749Smckusick #endif
37045749Smckusick 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ));
37145749Smckusick }
37245749Smckusick 
37353341Sbostic static int
37445749Smckusick swap_pager_putpage(pager, m, sync)
37545749Smckusick 	vm_pager_t pager;
37645749Smckusick 	vm_page_t m;
37745749Smckusick 	boolean_t sync;
37845749Smckusick {
37945749Smckusick 	int flags;
38045749Smckusick 
38145749Smckusick #ifdef DEBUG
38245749Smckusick 	if (swpagerdebug & SDB_FOLLOW)
38345749Smckusick 		printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync);
38445749Smckusick #endif
38548386Skarels 	if (pager == NULL) {
38648386Skarels 		(void) swap_pager_clean(NULL, B_WRITE);
38754817Storek 		return (VM_PAGER_OK);		/* ??? */
38845749Smckusick 	}
38945749Smckusick 	flags = B_WRITE;
39045749Smckusick 	if (!sync)
39145749Smckusick 		flags |= B_ASYNC;
39245749Smckusick 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags));
39345749Smckusick }
39445749Smckusick 
39553341Sbostic static boolean_t
39645749Smckusick swap_pager_haspage(pager, offset)
39745749Smckusick 	vm_pager_t pager;
39845749Smckusick 	vm_offset_t offset;
39945749Smckusick {
40045749Smckusick 	register sw_pager_t swp;
40145749Smckusick 	register sw_blk_t swb;
40245749Smckusick 	int ix;
40345749Smckusick 
40445749Smckusick #ifdef DEBUG
40545749Smckusick 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
40645749Smckusick 		printf("swpg_haspage(%x, %x) ", pager, offset);
40745749Smckusick #endif
40845749Smckusick 	swp = (sw_pager_t) pager->pg_data;
40945749Smckusick 	ix = offset / dbtob(swp->sw_bsize);
41045749Smckusick 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
41145749Smckusick #ifdef DEBUG
41245749Smckusick 		if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
41345749Smckusick 			printf("swpg_haspage: %x bad offset %x, ix %x\n",
41445749Smckusick 			       swp->sw_blocks, offset, ix);
41545749Smckusick #endif
41645749Smckusick 		return(FALSE);
41745749Smckusick 	}
41845749Smckusick 	swb = &swp->sw_blocks[ix];
41945749Smckusick 	if (swb->swb_block)
42045749Smckusick 		ix = atop(offset % dbtob(swp->sw_bsize));
42145749Smckusick #ifdef DEBUG
42245749Smckusick 	if (swpagerdebug & SDB_ALLOCBLK)
42345749Smckusick 		printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
42445749Smckusick 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
42545749Smckusick 		printf("-> %c\n",
42645749Smckusick 		       "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
42745749Smckusick #endif
42845749Smckusick 	if (swb->swb_block && (swb->swb_mask & (1 << ix)))
42945749Smckusick 		return(TRUE);
43045749Smckusick 	return(FALSE);
43145749Smckusick }
43245749Smckusick 
43345749Smckusick /*
43445749Smckusick  * Scaled down version of swap().
43545749Smckusick  * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
43645749Smckusick  * BOGUS:  lower level IO routines expect a KVA so we have to map our
43745749Smckusick  * provided physical page into the KVA to keep them happy.
43845749Smckusick  */
43953341Sbostic static int
44045749Smckusick swap_pager_io(swp, m, flags)
44145749Smckusick 	register sw_pager_t swp;
44245749Smckusick 	vm_page_t m;
44345749Smckusick 	int flags;
44445749Smckusick {
44545749Smckusick 	register struct buf *bp;
44645749Smckusick 	register sw_blk_t swb;
44745749Smckusick 	register int s;
44845749Smckusick 	int ix;
44945749Smckusick 	boolean_t rv;
45045749Smckusick 	vm_offset_t kva, off;
45145749Smckusick 	swp_clean_t spc;
45245749Smckusick 
45345749Smckusick #ifdef DEBUG
45445749Smckusick 	/* save panic time state */
45545749Smckusick 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
45653341Sbostic 		return (VM_PAGER_FAIL);		/* XXX: correct return? */
45745749Smckusick 	if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
45845749Smckusick 		printf("swpg_io(%x, %x, %x)\n", swp, m, flags);
45964860Shibler 	if ((flags & (B_READ|B_ASYNC)) == (B_READ|B_ASYNC))
46064860Shibler 		panic("swap_pager_io: cannot do ASYNC reads");
46145749Smckusick #endif
46245749Smckusick 
46345749Smckusick 	/*
46464860Shibler 	 * First determine if the page exists in the pager if this is
46564860Shibler 	 * a sync read.  This quickly handles cases where we are
46664860Shibler 	 * following shadow chains looking for the top level object
46764860Shibler 	 * with the page.
46864860Shibler 	 */
46964860Shibler 	off = m->offset + m->object->paging_offset;
47064860Shibler 	ix = off / dbtob(swp->sw_bsize);
47164860Shibler 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks)
47264860Shibler 		return(VM_PAGER_FAIL);
47364860Shibler 	swb = &swp->sw_blocks[ix];
47464860Shibler 	off = off % dbtob(swp->sw_bsize);
47564860Shibler 	if ((flags & B_READ) &&
47664860Shibler 	    (swb->swb_block == 0 || (swb->swb_mask & (1 << atop(off))) == 0))
47764860Shibler 		return(VM_PAGER_FAIL);
47864860Shibler 
47964860Shibler 	/*
48045749Smckusick 	 * For reads (pageins) and synchronous writes, we clean up
48149289Shibler 	 * all completed async pageouts.
48245749Smckusick 	 */
48345749Smckusick 	if ((flags & B_ASYNC) == 0) {
48445749Smckusick 		s = splbio();
48549289Shibler #ifdef DEBUG
48649289Shibler 		/*
48749289Shibler 		 * Check to see if this page is currently being cleaned.
48849289Shibler 		 * If it is, we just wait til the operation is done before
48949289Shibler 		 * continuing.
49049289Shibler 		 */
49145749Smckusick 		while (swap_pager_clean(m, flags&B_READ)) {
49249289Shibler 			if (swpagerdebug & SDB_ANOM)
49349289Shibler 				printf("swap_pager_io: page %x cleaning\n", m);
49449289Shibler 
49545749Smckusick 			swp->sw_flags |= SW_WANTED;
49653341Sbostic 			assert_wait((int)swp, 0);
49745749Smckusick 			thread_block();
49845749Smckusick 		}
49949289Shibler #else
50049289Shibler 		(void) swap_pager_clean(m, flags&B_READ);
50149289Shibler #endif
50245749Smckusick 		splx(s);
50345749Smckusick 	}
50445749Smckusick 	/*
50545749Smckusick 	 * For async writes (pageouts), we cleanup completed pageouts so
50645749Smckusick 	 * that all available resources are freed.  Also tells us if this
50745749Smckusick 	 * page is already being cleaned.  If it is, or no resources
50845749Smckusick 	 * are available, we try again later.
50945749Smckusick 	 */
51049289Shibler 	else if (swap_pager_clean(m, B_WRITE) ||
511*65231Smckusick 		 swap_pager_free.tqh_first == NULL) {
51249289Shibler #ifdef DEBUG
51349289Shibler 		if ((swpagerdebug & SDB_ANOM) &&
514*65231Smckusick 		    swap_pager_free.tqh_first != NULL)
51549289Shibler 			printf("swap_pager_io: page %x already cleaning\n", m);
51649289Shibler #endif
51745749Smckusick 		return(VM_PAGER_FAIL);
51849289Shibler 	}
51945749Smckusick 
52045749Smckusick 	/*
52164860Shibler 	 * Allocate a swap block if necessary.
52245749Smckusick 	 */
52364860Shibler 	if (swb->swb_block == 0) {
52445749Smckusick 		swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
52545749Smckusick 		if (swb->swb_block == 0) {
52645749Smckusick #ifdef DEBUG
52745749Smckusick 			if (swpagerdebug & SDB_FAIL)
52845749Smckusick 				printf("swpg_io: rmalloc of %x failed\n",
52945749Smckusick 				       swp->sw_bsize);
53045749Smckusick #endif
53145749Smckusick 			return(VM_PAGER_FAIL);
53245749Smckusick 		}
53345749Smckusick #ifdef DEBUG
53445749Smckusick 		if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
53545749Smckusick 			printf("swpg_io: %x alloc blk %x at ix %x\n",
53645749Smckusick 			       swp->sw_blocks, swb->swb_block, ix);
53745749Smckusick #endif
53845749Smckusick 	}
53945749Smckusick 
54045749Smckusick 	/*
54145749Smckusick 	 * Allocate a kernel virtual address and initialize so that PTE
54245749Smckusick 	 * is available for lower level IO drivers.
54345749Smckusick 	 */
54445749Smckusick 	kva = vm_pager_map_page(m);
54545749Smckusick 
54645749Smckusick 	/*
54745749Smckusick 	 * Get a swap buffer header and perform the IO
54845749Smckusick 	 */
54945749Smckusick 	s = splbio();
55056393Smckusick 	while (bswlist.b_actf == NULL) {
55145749Smckusick #ifdef DEBUG
55245749Smckusick 		if (swpagerdebug & SDB_ANOM)
55349289Shibler 			printf("swap_pager_io: wait on swbuf for %x (%d)\n",
55445749Smckusick 			       m, flags);
55545749Smckusick #endif
55645749Smckusick 		bswlist.b_flags |= B_WANTED;
55764860Shibler 		tsleep((caddr_t)&bswlist, PSWP+1, "swpgio", 0);
55845749Smckusick 	}
55956393Smckusick 	bp = bswlist.b_actf;
56056393Smckusick 	bswlist.b_actf = bp->b_actf;
56145749Smckusick 	splx(s);
56245749Smckusick 	bp->b_flags = B_BUSY | (flags & B_READ);
56348386Skarels 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
56464546Sbostic 	bp->b_data = (caddr_t)kva;
56545749Smckusick 	bp->b_blkno = swb->swb_block + btodb(off);
56645749Smckusick 	VHOLD(swapdev_vp);
56745749Smckusick 	bp->b_vp = swapdev_vp;
56846985Smckusick 	if (swapdev_vp->v_type == VBLK)
56946985Smckusick 		bp->b_dev = swapdev_vp->v_rdev;
57045749Smckusick 	bp->b_bcount = PAGE_SIZE;
57153213Smckusick 	if ((bp->b_flags & B_READ) == 0) {
57253213Smckusick 		bp->b_dirtyoff = 0;
57353213Smckusick 		bp->b_dirtyend = PAGE_SIZE;
57445749Smckusick 		swapdev_vp->v_numoutput++;
57553213Smckusick 	}
57645749Smckusick 
57745749Smckusick 	/*
57845749Smckusick 	 * If this is an async write we set up additional buffer fields
57945749Smckusick 	 * and place a "cleaning" entry on the inuse queue.
58045749Smckusick 	 */
58145749Smckusick 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
58245749Smckusick #ifdef DEBUG
583*65231Smckusick 		if (swap_pager_free.tqh_first == NULL)
58445749Smckusick 			panic("swpg_io: lost spc");
58545749Smckusick #endif
586*65231Smckusick 		spc = swap_pager_free.tqh_first;
587*65231Smckusick 		TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
58845749Smckusick #ifdef DEBUG
58945749Smckusick 		if (spc->spc_flags != SPC_FREE)
59045749Smckusick 			panic("swpg_io: bad free spc");
59145749Smckusick #endif
59245749Smckusick 		spc->spc_flags = SPC_BUSY;
59345749Smckusick 		spc->spc_bp = bp;
59445749Smckusick 		spc->spc_swp = swp;
59545749Smckusick 		spc->spc_kva = kva;
59645749Smckusick 		spc->spc_m = m;
59745749Smckusick 		bp->b_flags |= B_CALL;
59845749Smckusick 		bp->b_iodone = swap_pager_iodone;
59945749Smckusick 		s = splbio();
60045749Smckusick 		swp->sw_poip++;
601*65231Smckusick 		TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
60245749Smckusick 
60345749Smckusick #ifdef DEBUG
60445749Smckusick 		swap_pager_poip++;
60545749Smckusick 		if (swpagerdebug & SDB_WRITE)
60645749Smckusick 			printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n",
60745749Smckusick 			       bp, swp, spc, swp->sw_poip);
60845749Smckusick 		if ((swpagerdebug & SDB_ALLOCBLK) &&
60945749Smckusick 		    (swb->swb_mask & (1 << atop(off))) == 0)
61045749Smckusick 			printf("swpg_io: %x write blk %x+%x\n",
61145749Smckusick 			       swp->sw_blocks, swb->swb_block, atop(off));
61245749Smckusick #endif
61345749Smckusick 		swb->swb_mask |= (1 << atop(off));
61445749Smckusick 		splx(s);
61545749Smckusick 	}
61645749Smckusick #ifdef DEBUG
61745749Smckusick 	if (swpagerdebug & SDB_IO)
61845749Smckusick 		printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
61945749Smckusick 		       bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
62045749Smckusick #endif
62145749Smckusick 	VOP_STRATEGY(bp);
62245749Smckusick 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
62345749Smckusick #ifdef DEBUG
62445749Smckusick 		if (swpagerdebug & SDB_IO)
62545749Smckusick 			printf("swpg_io:  IO started: bp %x\n", bp);
62645749Smckusick #endif
62745749Smckusick 		return(VM_PAGER_PEND);
62845749Smckusick 	}
62945749Smckusick 	s = splbio();
63045749Smckusick #ifdef DEBUG
63145749Smckusick 	if (flags & B_READ)
63245749Smckusick 		swap_pager_piip++;
63345749Smckusick 	else
63445749Smckusick 		swap_pager_poip++;
63545749Smckusick #endif
63645749Smckusick 	while ((bp->b_flags & B_DONE) == 0) {
63753341Sbostic 		assert_wait((int)bp, 0);
63845749Smckusick 		thread_block();
63945749Smckusick 	}
64045749Smckusick #ifdef DEBUG
64145749Smckusick 	if (flags & B_READ)
64245749Smckusick 		--swap_pager_piip;
64345749Smckusick 	else
64445749Smckusick 		--swap_pager_poip;
64545749Smckusick #endif
64656320Shibler 	rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
64745749Smckusick 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
64856393Smckusick 	bp->b_actf = bswlist.b_actf;
64956393Smckusick 	bswlist.b_actf = bp;
65045749Smckusick 	if (bp->b_vp)
65145749Smckusick 		brelvp(bp);
65245749Smckusick 	if (bswlist.b_flags & B_WANTED) {
65345749Smckusick 		bswlist.b_flags &= ~B_WANTED;
65445749Smckusick 		thread_wakeup((int)&bswlist);
65545749Smckusick 	}
65645749Smckusick 	if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
65756382Smckusick 		m->flags |= PG_CLEAN;
65845749Smckusick 		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
65945749Smckusick 	}
66045749Smckusick 	splx(s);
66145749Smckusick #ifdef DEBUG
66245749Smckusick 	if (swpagerdebug & SDB_IO)
66345749Smckusick 		printf("swpg_io:  IO done: bp %x, rv %d\n", bp, rv);
66456320Shibler 	if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR)
66545749Smckusick 		printf("swpg_io: IO error\n");
66645749Smckusick #endif
66745749Smckusick 	vm_pager_unmap_page(kva);
66845749Smckusick 	return(rv);
66945749Smckusick }
67045749Smckusick 
67153341Sbostic static boolean_t
67245749Smckusick swap_pager_clean(m, rw)
67345749Smckusick 	vm_page_t m;
67445749Smckusick 	int rw;
67545749Smckusick {
67645749Smckusick 	register swp_clean_t spc, tspc;
67745749Smckusick 	register int s;
67845749Smckusick 
67945749Smckusick #ifdef DEBUG
68045749Smckusick 	/* save panic time state */
68145749Smckusick 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
68254817Storek 		return (FALSE);			/* ??? */
68345749Smckusick 	if (swpagerdebug & SDB_FOLLOW)
68445749Smckusick 		printf("swpg_clean(%x, %d)\n", m, rw);
68545749Smckusick #endif
68648386Skarels 	tspc = NULL;
68745749Smckusick 	for (;;) {
68845749Smckusick 		/*
68945749Smckusick 		 * Look up and removal from inuse list must be done
69045749Smckusick 		 * at splbio() to avoid conflicts with swap_pager_iodone.
69145749Smckusick 		 */
69245749Smckusick 		s = splbio();
693*65231Smckusick 		for (spc = swap_pager_inuse.tqh_first;
694*65231Smckusick 		     spc != NULL;
695*65231Smckusick 		     spc = spc->spc_list.tqe_next) {
69645749Smckusick 			if ((spc->spc_flags & SPC_DONE) &&
69745749Smckusick 			    swap_pager_finish(spc)) {
698*65231Smckusick 				TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
69945749Smckusick 				break;
70045749Smckusick 			}
70145749Smckusick 			if (m && m == spc->spc_m) {
70245749Smckusick #ifdef DEBUG
70345749Smckusick 				if (swpagerdebug & SDB_ANOM)
70449289Shibler 					printf("swap_pager_clean: page %x on list, flags %x\n",
70545749Smckusick 					       m, spc->spc_flags);
70645749Smckusick #endif
70745749Smckusick 				tspc = spc;
70845749Smckusick 			}
70945749Smckusick 		}
71045749Smckusick 
71145749Smckusick 		/*
71245749Smckusick 		 * No operations done, thats all we can do for now.
71345749Smckusick 		 */
714*65231Smckusick 		if (spc == NULL)
71545749Smckusick 			break;
71645749Smckusick 		splx(s);
71745749Smckusick 
71845749Smckusick 		/*
71945749Smckusick 		 * The desired page was found to be busy earlier in
72045749Smckusick 		 * the scan but has since completed.
72145749Smckusick 		 */
72245749Smckusick 		if (tspc && tspc == spc) {
72345749Smckusick #ifdef DEBUG
72445749Smckusick 			if (swpagerdebug & SDB_ANOM)
72549289Shibler 				printf("swap_pager_clean: page %x done while looking\n",
72645749Smckusick 				       m);
72745749Smckusick #endif
72848386Skarels 			tspc = NULL;
72945749Smckusick 		}
73045749Smckusick 		spc->spc_flags = SPC_FREE;
73145749Smckusick 		vm_pager_unmap_page(spc->spc_kva);
732*65231Smckusick 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
73345749Smckusick #ifdef DEBUG
73445749Smckusick 		if (swpagerdebug & SDB_WRITE)
73545749Smckusick 			printf("swpg_clean: free spc %x\n", spc);
73645749Smckusick #endif
73745749Smckusick 	}
73849289Shibler #ifdef DEBUG
73945749Smckusick 	/*
74045749Smckusick 	 * If we found that the desired page is already being cleaned
74145749Smckusick 	 * mark it so that swap_pager_iodone() will not set the clean
74245749Smckusick 	 * flag before the pageout daemon has another chance to clean it.
74345749Smckusick 	 */
74445749Smckusick 	if (tspc && rw == B_WRITE) {
74545749Smckusick 		if (swpagerdebug & SDB_ANOM)
74649289Shibler 			printf("swap_pager_clean: page %x on clean list\n",
74749289Shibler 			       tspc);
74845749Smckusick 		tspc->spc_flags |= SPC_DIRTY;
74945749Smckusick 	}
75049289Shibler #endif
75145749Smckusick 	splx(s);
75245749Smckusick 
75345749Smckusick #ifdef DEBUG
75445749Smckusick 	if (swpagerdebug & SDB_WRITE)
75545749Smckusick 		printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE);
75645749Smckusick 	if ((swpagerdebug & SDB_ANOM) && tspc)
75745749Smckusick 		printf("swpg_clean: %s of cleaning page %x\n",
75845749Smckusick 		       rw == B_READ ? "get" : "put", m);
75945749Smckusick #endif
76045749Smckusick 	return(tspc ? TRUE : FALSE);
76145749Smckusick }
76245749Smckusick 
76353341Sbostic static int
76445749Smckusick swap_pager_finish(spc)
76545749Smckusick 	register swp_clean_t spc;
76645749Smckusick {
76745749Smckusick 	vm_object_t object = spc->spc_m->object;
76845749Smckusick 
76945749Smckusick 	/*
77045749Smckusick 	 * Mark the paging operation as done.
77145749Smckusick 	 * (XXX) If we cannot get the lock, leave it til later.
77245749Smckusick 	 * (XXX) Also we are assuming that an async write is a
77345749Smckusick 	 *       pageout operation that has incremented the counter.
77445749Smckusick 	 */
77545749Smckusick 	if (!vm_object_lock_try(object))
77645749Smckusick 		return(0);
77745749Smckusick 
77845749Smckusick 	if (--object->paging_in_progress == 0)
77945749Smckusick 		thread_wakeup((int) object);
78045749Smckusick 
78149289Shibler #ifdef DEBUG
78245749Smckusick 	/*
78345749Smckusick 	 * XXX: this isn't even close to the right thing to do,
78445749Smckusick 	 * introduces a variety of race conditions.
78545749Smckusick 	 *
78645749Smckusick 	 * If dirty, vm_pageout() has attempted to clean the page
78745749Smckusick 	 * again.  In this case we do not do anything as we will
78849289Shibler 	 * see the page again shortly.
78945749Smckusick 	 */
79049289Shibler 	if (spc->spc_flags & SPC_DIRTY) {
79149289Shibler 		if (swpagerdebug & SDB_ANOM)
79249289Shibler 			printf("swap_pager_finish: page %x dirty again\n",
79349289Shibler 			       spc->spc_m);
79456382Smckusick 		spc->spc_m->flags &= ~PG_BUSY;
79549289Shibler 		PAGE_WAKEUP(spc->spc_m);
79649289Shibler 		vm_object_unlock(object);
79749289Shibler 		return(1);
79845749Smckusick 	}
79949289Shibler #endif
80045749Smckusick 	/*
80149289Shibler 	 * If no error mark as clean and inform the pmap system.
80249289Shibler 	 * If error, mark as dirty so we will try again.
80349289Shibler 	 * (XXX could get stuck doing this, should give up after awhile)
80445749Smckusick 	 */
80549289Shibler 	if (spc->spc_flags & SPC_ERROR) {
80649289Shibler 		printf("swap_pager_finish: clean of page %x failed\n",
80749289Shibler 		       VM_PAGE_TO_PHYS(spc->spc_m));
80856382Smckusick 		spc->spc_m->flags |= PG_LAUNDRY;
80949289Shibler 	} else {
81056382Smckusick 		spc->spc_m->flags |= PG_CLEAN;
81149289Shibler 		pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m));
81249289Shibler 	}
81356382Smckusick 	spc->spc_m->flags &= ~PG_BUSY;
81445749Smckusick 	PAGE_WAKEUP(spc->spc_m);
81545749Smckusick 
81645749Smckusick 	vm_object_unlock(object);
81745749Smckusick 	return(1);
81845749Smckusick }
81945749Smckusick 
82053341Sbostic static void
82145749Smckusick swap_pager_iodone(bp)
82245749Smckusick 	register struct buf *bp;
82345749Smckusick {
82445749Smckusick 	register swp_clean_t spc;
82545749Smckusick 	daddr_t blk;
82645749Smckusick 	int s;
82745749Smckusick 
82845749Smckusick #ifdef DEBUG
82945749Smckusick 	/* save panic time state */
83045749Smckusick 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
83145749Smckusick 		return;
83245749Smckusick 	if (swpagerdebug & SDB_FOLLOW)
83345749Smckusick 		printf("swpg_iodone(%x)\n", bp);
83445749Smckusick #endif
83545749Smckusick 	s = splbio();
836*65231Smckusick 	for (spc = swap_pager_inuse.tqh_first;
837*65231Smckusick 	     spc != NULL;
838*65231Smckusick 	     spc = spc->spc_list.tqe_next)
83945749Smckusick 		if (spc->spc_bp == bp)
84045749Smckusick 			break;
84145749Smckusick #ifdef DEBUG
842*65231Smckusick 	if (spc == NULL)
84349289Shibler 		panic("swap_pager_iodone: bp not found");
84445749Smckusick #endif
84545749Smckusick 
84645749Smckusick 	spc->spc_flags &= ~SPC_BUSY;
84745749Smckusick 	spc->spc_flags |= SPC_DONE;
84845749Smckusick 	if (bp->b_flags & B_ERROR)
84945749Smckusick 		spc->spc_flags |= SPC_ERROR;
85045749Smckusick 	spc->spc_bp = NULL;
85145749Smckusick 	blk = bp->b_blkno;
85245749Smckusick 
85345749Smckusick #ifdef DEBUG
85445749Smckusick 	--swap_pager_poip;
85545749Smckusick 	if (swpagerdebug & SDB_WRITE)
85645749Smckusick 		printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
85745749Smckusick 		       bp, spc->spc_swp, spc->spc_swp->sw_flags,
85845749Smckusick 		       spc, spc->spc_swp->sw_poip);
85945749Smckusick #endif
86045749Smckusick 
86145749Smckusick 	spc->spc_swp->sw_poip--;
86245749Smckusick 	if (spc->spc_swp->sw_flags & SW_WANTED) {
86345749Smckusick 		spc->spc_swp->sw_flags &= ~SW_WANTED;
86445749Smckusick 		thread_wakeup((int)spc->spc_swp);
86545749Smckusick 	}
86645749Smckusick 
86745749Smckusick 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
86856393Smckusick 	bp->b_actf = bswlist.b_actf;
86956393Smckusick 	bswlist.b_actf = bp;
87045749Smckusick 	if (bp->b_vp)
87145749Smckusick 		brelvp(bp);
87245749Smckusick 	if (bswlist.b_flags & B_WANTED) {
87345749Smckusick 		bswlist.b_flags &= ~B_WANTED;
87445749Smckusick 		thread_wakeup((int)&bswlist);
87545749Smckusick 	}
87656917Shibler 	/*
87756917Shibler 	 * Only kick the pageout daemon if we are really hurting
87856917Shibler 	 * for pages, otherwise this page will be picked up later.
87956917Shibler 	 */
88056917Shibler 	if (cnt.v_free_count < cnt.v_free_min)
88156917Shibler 		thread_wakeup((int) &vm_pages_needed);
88245749Smckusick 	splx(s);
88345749Smckusick }
884