xref: /csrg-svn/sys/vm/swap_pager.c (revision 46985)
145749Smckusick /*
245749Smckusick  * Copyright (c) 1990 University of Utah.
345749Smckusick  * Copyright (c) 1991 The Regents of the University of California.
445749Smckusick  * All rights reserved.
545749Smckusick  *
645749Smckusick  * This code is derived from software contributed to Berkeley by
745749Smckusick  * the Systems Programming Group of the University of Utah Computer
845749Smckusick  * Science Department.
945749Smckusick  *
1045749Smckusick  * %sccs.include.redist.c%
1145749Smckusick  *
12*46985Smckusick  *	@(#)swap_pager.c	7.2 (Berkeley) 03/04/91
1345749Smckusick  */
1445749Smckusick 
1545749Smckusick /*
1645749Smckusick  * Quick hack to page to dedicated partition(s).
1745749Smckusick  * TODO:
1845749Smckusick  *	Add multiprocessor locks
1945749Smckusick  *	Deal with async writes in a better fashion
2045749Smckusick  */
2145749Smckusick 
2245749Smckusick #include "swappager.h"
2345749Smckusick #if NSWAPPAGER > 0
2445749Smckusick 
2545749Smckusick #include "param.h"
2645749Smckusick #include "user.h"
2745749Smckusick #include "proc.h"
2845749Smckusick #include "buf.h"
2945749Smckusick #include "map.h"
3045749Smckusick #include "systm.h"
3145749Smckusick #include "specdev.h"
3245749Smckusick #include "vnode.h"
3345749Smckusick #include "malloc.h"
3445749Smckusick #include "queue.h"
3545749Smckusick 
3645749Smckusick #include "../vm/vm_param.h"
3745749Smckusick #include "../vm/vm_pager.h"
3845749Smckusick #include "../vm/vm_page.h"
3945749Smckusick #include "../vm/vm_pageout.h"
4045749Smckusick #include "../vm/swap_pager.h"
4145749Smckusick 
4245749Smckusick #define NSWSIZES	16	/* size of swtab */
4345749Smckusick #define NPENDINGIO	64	/* max # of pending cleans */
4445749Smckusick #define MAXDADDRS	64	/* max # of disk addrs for fixed allocations */
4545749Smckusick 
4645749Smckusick #ifdef DEBUG
4745749Smckusick int	swpagerdebug = 0x100;
4845749Smckusick #define	SDB_FOLLOW	0x001
4945749Smckusick #define SDB_INIT	0x002
5045749Smckusick #define SDB_ALLOC	0x004
5145749Smckusick #define SDB_IO		0x008
5245749Smckusick #define SDB_WRITE	0x010
5345749Smckusick #define SDB_FAIL	0x020
5445749Smckusick #define SDB_ALLOCBLK	0x040
5545749Smckusick #define SDB_FULL	0x080
5645749Smckusick #define SDB_ANOM	0x100
5745749Smckusick #define SDB_ANOMPANIC	0x200
5845749Smckusick #endif
5945749Smckusick 
6045749Smckusick struct swpagerclean {
6145749Smckusick 	queue_head_t		spc_list;
6245749Smckusick 	int			spc_flags;
6345749Smckusick 	struct buf		*spc_bp;
6445749Smckusick 	sw_pager_t		spc_swp;
6545749Smckusick 	vm_offset_t		spc_kva;
6645749Smckusick 	vm_page_t		spc_m;
6745749Smckusick } swcleanlist[NPENDINGIO];
6845749Smckusick typedef	struct swpagerclean	*swp_clean_t;
6945749Smckusick 
7045749Smckusick #define SWP_CLEAN_NULL		((swp_clean_t)0)
7145749Smckusick 
7245749Smckusick /* spc_flags values */
7345749Smckusick #define SPC_FREE	0x00
7445749Smckusick #define SPC_BUSY	0x01
7545749Smckusick #define SPC_DONE	0x02
7645749Smckusick #define SPC_ERROR	0x04
7745749Smckusick #define SPC_DIRTY	0x08
7845749Smckusick 
7945749Smckusick struct swtab {
8045749Smckusick 	vm_size_t st_osize;	/* size of object (bytes) */
8145749Smckusick 	int	  st_bsize;	/* vs. size of swap block (DEV_BSIZE units) */
8245749Smckusick #ifdef DEBUG
8345749Smckusick 	u_long	  st_inuse;	/* number in this range in use */
8445749Smckusick 	u_long	  st_usecnt;	/* total used of this size */
8545749Smckusick #endif
8645749Smckusick } swtab[NSWSIZES+1];
8745749Smckusick 
8845749Smckusick #ifdef DEBUG
8945749Smckusick int		swap_pager_pendingio;	/* max pending async "clean" ops */
9045749Smckusick int		swap_pager_poip;	/* pageouts in progress */
9145749Smckusick int		swap_pager_piip;	/* pageins in progress */
9245749Smckusick #endif
9345749Smckusick 
9445749Smckusick queue_head_t	swap_pager_inuse;	/* list of pending page cleans */
9545749Smckusick queue_head_t	swap_pager_free;	/* list of free pager clean structs */
9645749Smckusick queue_head_t	swap_pager_list;	/* list of "named" anon regions */
9745749Smckusick 
9845749Smckusick void
9945749Smckusick swap_pager_init()
10045749Smckusick {
10145749Smckusick 	register swp_clean_t spc;
10245749Smckusick 	register int i, bsize;
10345749Smckusick 	extern int dmmin, dmmax;
10445749Smckusick 	int maxbsize;
10545749Smckusick 
10645749Smckusick #ifdef DEBUG
10745749Smckusick 	if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
10845749Smckusick 		printf("swpg_init()\n");
10945749Smckusick #endif
11045749Smckusick 	dfltpagerops = &swappagerops;
11145749Smckusick 	queue_init(&swap_pager_list);
11245749Smckusick 
11345749Smckusick 	/*
11445749Smckusick 	 * Initialize clean lists
11545749Smckusick 	 */
11645749Smckusick 	queue_init(&swap_pager_inuse);
11745749Smckusick 	queue_init(&swap_pager_free);
11845749Smckusick 	for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) {
11945749Smckusick 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
12045749Smckusick 		spc->spc_flags = SPC_FREE;
12145749Smckusick 	}
12245749Smckusick 
12345749Smckusick 	/*
12445749Smckusick 	 * Calculate the swap allocation constants.
12545749Smckusick 	 */
12645749Smckusick         if (dmmin == 0) {
12745749Smckusick                 dmmin = DMMIN;
12845749Smckusick 		if (dmmin < CLBYTES/DEV_BSIZE)
12945749Smckusick 			dmmin = CLBYTES/DEV_BSIZE;
13045749Smckusick 	}
13145749Smckusick         if (dmmax == 0)
13245749Smckusick                 dmmax = DMMAX;
13345749Smckusick 
13445749Smckusick 	/*
13545749Smckusick 	 * Fill in our table of object size vs. allocation size
13645749Smckusick 	 */
13745749Smckusick 	bsize = btodb(PAGE_SIZE);
13845749Smckusick 	if (bsize < dmmin)
13945749Smckusick 		bsize = dmmin;
14045749Smckusick 	maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
14145749Smckusick 	if (maxbsize > dmmax)
14245749Smckusick 		maxbsize = dmmax;
14345749Smckusick 	for (i = 0; i < NSWSIZES; i++) {
14445749Smckusick 		swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
14545749Smckusick 		swtab[i].st_bsize = bsize;
14645749Smckusick #ifdef DEBUG
14745749Smckusick 		if (swpagerdebug & SDB_INIT)
14845749Smckusick 			printf("swpg_init: ix %d, size %x, bsize %x\n",
14945749Smckusick 			       i, swtab[i].st_osize, swtab[i].st_bsize);
15045749Smckusick #endif
15145749Smckusick 		if (bsize >= maxbsize)
15245749Smckusick 			break;
15345749Smckusick 		bsize *= 2;
15445749Smckusick 	}
15545749Smckusick 	swtab[i].st_osize = 0;
15645749Smckusick 	swtab[i].st_bsize = bsize;
15745749Smckusick }
15845749Smckusick 
15945749Smckusick /*
16045749Smckusick  * Allocate a pager structure and associated resources.
16145749Smckusick  * Note that if we are called from the pageout daemon (handle == NULL)
16245749Smckusick  * we should not wait for memory as it could resulting in deadlock.
16345749Smckusick  */
16445749Smckusick vm_pager_t
16545749Smckusick swap_pager_alloc(handle, size, prot)
16645749Smckusick 	caddr_t handle;
16745749Smckusick 	register vm_size_t size;
16845749Smckusick 	vm_prot_t prot;
16945749Smckusick {
17045749Smckusick 	register vm_pager_t pager;
17145749Smckusick 	register sw_pager_t swp;
17245749Smckusick 	struct swtab *swt;
17345749Smckusick 	int waitok;
17445749Smckusick 
17545749Smckusick #ifdef DEBUG
17645749Smckusick 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
17745749Smckusick 		printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot);
17845749Smckusick #endif
17945749Smckusick 	/*
18045749Smckusick 	 * If this is a "named" anonymous region, look it up and
18145749Smckusick 	 * return the appropriate pager if it exists.
18245749Smckusick 	 */
18345749Smckusick 	if (handle) {
18445749Smckusick 		pager = vm_pager_lookup(&swap_pager_list, handle);
18545749Smckusick 		if (pager != VM_PAGER_NULL) {
18645749Smckusick 			/*
18745749Smckusick 			 * Use vm_object_lookup to gain a reference
18845749Smckusick 			 * to the object and also to remove from the
18945749Smckusick 			 * object cache.
19045749Smckusick 			 */
19145749Smckusick 			if (vm_object_lookup(pager) == VM_OBJECT_NULL)
19245749Smckusick 				panic("swap_pager_alloc: bad object");
19345749Smckusick 			return(pager);
19445749Smckusick 		}
19545749Smckusick 	}
19645749Smckusick 	/*
19745749Smckusick 	 * Pager doesn't exist, allocate swap management resources
19845749Smckusick 	 * and initialize.
19945749Smckusick 	 */
20045749Smckusick 	waitok = handle ? M_WAITOK : M_NOWAIT;
20145749Smckusick 	pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
20245749Smckusick 	if (pager == VM_PAGER_NULL)
20345749Smckusick 		return(VM_PAGER_NULL);
20445749Smckusick 	swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
20545749Smckusick 	if (swp == NULL) {
20645749Smckusick #ifdef DEBUG
20745749Smckusick 		if (swpagerdebug & SDB_FAIL)
20845749Smckusick 			printf("swpg_alloc: swpager malloc failed\n");
20945749Smckusick #endif
21045749Smckusick 		free((caddr_t)pager, M_VMPAGER);
21145749Smckusick 		return(VM_PAGER_NULL);
21245749Smckusick 	}
21345749Smckusick 	size = round_page(size);
21445749Smckusick 	for (swt = swtab; swt->st_osize; swt++)
21545749Smckusick 		if (size <= swt->st_osize)
21645749Smckusick 			break;
21745749Smckusick #ifdef DEBUG
21845749Smckusick 	swt->st_inuse++;
21945749Smckusick 	swt->st_usecnt++;
22045749Smckusick #endif
22145749Smckusick 	swp->sw_osize = size;
22245749Smckusick 	swp->sw_bsize = swt->st_bsize;
22345749Smckusick 	swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
22445749Smckusick 	swp->sw_blocks = (sw_blk_t)
22545749Smckusick 		malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
22645749Smckusick 		       M_VMPGDATA, M_NOWAIT);
22745749Smckusick 	if (swp->sw_blocks == NULL) {
22845749Smckusick 		free((caddr_t)swp, M_VMPGDATA);
22945749Smckusick 		free((caddr_t)pager, M_VMPAGER);
23045749Smckusick #ifdef DEBUG
23145749Smckusick 		if (swpagerdebug & SDB_FAIL)
23245749Smckusick 			printf("swpg_alloc: sw_blocks malloc failed\n");
23345749Smckusick 		swt->st_inuse--;
23445749Smckusick 		swt->st_usecnt--;
23545749Smckusick #endif
23645749Smckusick 		return(FALSE);
23745749Smckusick 	}
23845749Smckusick 	bzero((caddr_t)swp->sw_blocks,
23945749Smckusick 	      swp->sw_nblocks * sizeof(*swp->sw_blocks));
24045749Smckusick 	swp->sw_poip = 0;
24145749Smckusick 	if (handle) {
24245749Smckusick 		vm_object_t object;
24345749Smckusick 
24445749Smckusick 		swp->sw_flags = SW_NAMED;
24545749Smckusick 		queue_enter(&swap_pager_list, pager, vm_pager_t, pg_list);
24645749Smckusick 		/*
24745749Smckusick 		 * Consistant with other pagers: return with object
24845749Smckusick 		 * referenced.  Can't do this with handle == NULL
24945749Smckusick 		 * since it might be the pageout daemon calling.
25045749Smckusick 		 */
25145749Smckusick 		object = vm_object_allocate(size);
25245749Smckusick 		vm_object_enter(object, pager);
25345749Smckusick 		vm_object_setpager(object, pager, 0, FALSE);
25445749Smckusick 	} else {
25545749Smckusick 		swp->sw_flags = 0;
25645749Smckusick 		queue_init(&pager->pg_list);
25745749Smckusick 	}
25845749Smckusick 	pager->pg_handle = handle;
25945749Smckusick 	pager->pg_ops = &swappagerops;
26045749Smckusick 	pager->pg_type = PG_SWAP;
26145749Smckusick 	pager->pg_data = (caddr_t)swp;
26245749Smckusick 
26345749Smckusick #ifdef DEBUG
26445749Smckusick 	if (swpagerdebug & SDB_ALLOC)
26545749Smckusick 		printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
26645749Smckusick 		       swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
26745749Smckusick #endif
26845749Smckusick 	return(pager);
26945749Smckusick }
27045749Smckusick 
27145749Smckusick void
27245749Smckusick swap_pager_dealloc(pager)
27345749Smckusick 	vm_pager_t pager;
27445749Smckusick {
27545749Smckusick 	register int i;
27645749Smckusick 	register sw_blk_t bp;
27745749Smckusick 	register sw_pager_t swp;
27845749Smckusick 	struct swtab *swt;
27945749Smckusick 	int s;
28045749Smckusick 
28145749Smckusick #ifdef DEBUG
28245749Smckusick 	/* save panic time state */
28345749Smckusick 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
28445749Smckusick 		return;
28545749Smckusick 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
28645749Smckusick 		printf("swpg_dealloc(%x)\n", pager);
28745749Smckusick #endif
28845749Smckusick 	/*
28945749Smckusick 	 * Remove from list right away so lookups will fail if we
29045749Smckusick 	 * block for pageout completion.
29145749Smckusick 	 */
29245749Smckusick 	swp = (sw_pager_t) pager->pg_data;
29345749Smckusick 	if (swp->sw_flags & SW_NAMED) {
29445749Smckusick 		queue_remove(&swap_pager_list, pager, vm_pager_t, pg_list);
29545749Smckusick 		swp->sw_flags &= ~SW_NAMED;
29645749Smckusick 	}
29745749Smckusick #ifdef DEBUG
29845749Smckusick 	for (swt = swtab; swt->st_osize; swt++)
29945749Smckusick 		if (swp->sw_osize <= swt->st_osize)
30045749Smckusick 			break;
30145749Smckusick 	swt->st_inuse--;
30245749Smckusick #endif
30345749Smckusick 
30445749Smckusick 	/*
30545749Smckusick 	 * Wait for all pageouts to finish and remove
30645749Smckusick 	 * all entries from cleaning list.
30745749Smckusick 	 */
30845749Smckusick 	s = splbio();
30945749Smckusick 	while (swp->sw_poip) {
31045749Smckusick 		swp->sw_flags |= SW_WANTED;
31145749Smckusick 		assert_wait((int)swp);
31245749Smckusick 		thread_block();
31345749Smckusick 	}
31445749Smckusick 	splx(s);
31545749Smckusick 	(void) swap_pager_clean(VM_PAGE_NULL, B_WRITE);
31645749Smckusick 
31745749Smckusick 	/*
31845749Smckusick 	 * Free left over swap blocks
31945749Smckusick 	 */
32045749Smckusick 	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
32145749Smckusick 		if (bp->swb_block) {
32245749Smckusick #ifdef DEBUG
32345749Smckusick 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
32445749Smckusick 				printf("swpg_dealloc: blk %x\n",
32545749Smckusick 				       bp->swb_block);
32645749Smckusick #endif
32745749Smckusick 			rmfree(swapmap, swp->sw_bsize, bp->swb_block);
32845749Smckusick 		}
32945749Smckusick 	/*
33045749Smckusick 	 * Free swap management resources
33145749Smckusick 	 */
33245749Smckusick 	free((caddr_t)swp->sw_blocks, M_VMPGDATA);
33345749Smckusick 	free((caddr_t)swp, M_VMPGDATA);
33445749Smckusick 	free((caddr_t)pager, M_VMPAGER);
33545749Smckusick }
33645749Smckusick 
33745749Smckusick swap_pager_getpage(pager, m, sync)
33845749Smckusick 	vm_pager_t pager;
33945749Smckusick 	vm_page_t m;
34045749Smckusick 	boolean_t sync;
34145749Smckusick {
34245749Smckusick #ifdef DEBUG
34345749Smckusick 	if (swpagerdebug & SDB_FOLLOW)
34445749Smckusick 		printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync);
34545749Smckusick #endif
34645749Smckusick 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ));
34745749Smckusick }
34845749Smckusick 
34945749Smckusick swap_pager_putpage(pager, m, sync)
35045749Smckusick 	vm_pager_t pager;
35145749Smckusick 	vm_page_t m;
35245749Smckusick 	boolean_t sync;
35345749Smckusick {
35445749Smckusick 	int flags;
35545749Smckusick 
35645749Smckusick #ifdef DEBUG
35745749Smckusick 	if (swpagerdebug & SDB_FOLLOW)
35845749Smckusick 		printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync);
35945749Smckusick #endif
36045749Smckusick 	if (pager == VM_PAGER_NULL) {
36145749Smckusick 		(void) swap_pager_clean(VM_PAGE_NULL, B_WRITE);
36245749Smckusick 		return;
36345749Smckusick 	}
36445749Smckusick 	flags = B_WRITE;
36545749Smckusick 	if (!sync)
36645749Smckusick 		flags |= B_ASYNC;
36745749Smckusick 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags));
36845749Smckusick }
36945749Smckusick 
37045749Smckusick boolean_t
37145749Smckusick swap_pager_haspage(pager, offset)
37245749Smckusick 	vm_pager_t pager;
37345749Smckusick 	vm_offset_t offset;
37445749Smckusick {
37545749Smckusick 	register sw_pager_t swp;
37645749Smckusick 	register sw_blk_t swb;
37745749Smckusick 	int ix;
37845749Smckusick 
37945749Smckusick #ifdef DEBUG
38045749Smckusick 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
38145749Smckusick 		printf("swpg_haspage(%x, %x) ", pager, offset);
38245749Smckusick #endif
38345749Smckusick 	swp = (sw_pager_t) pager->pg_data;
38445749Smckusick 	ix = offset / dbtob(swp->sw_bsize);
38545749Smckusick 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
38645749Smckusick #ifdef DEBUG
38745749Smckusick 		if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
38845749Smckusick 			printf("swpg_haspage: %x bad offset %x, ix %x\n",
38945749Smckusick 			       swp->sw_blocks, offset, ix);
39045749Smckusick #endif
39145749Smckusick 		return(FALSE);
39245749Smckusick 	}
39345749Smckusick 	swb = &swp->sw_blocks[ix];
39445749Smckusick 	if (swb->swb_block)
39545749Smckusick 		ix = atop(offset % dbtob(swp->sw_bsize));
39645749Smckusick #ifdef DEBUG
39745749Smckusick 	if (swpagerdebug & SDB_ALLOCBLK)
39845749Smckusick 		printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
39945749Smckusick 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
40045749Smckusick 		printf("-> %c\n",
40145749Smckusick 		       "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
40245749Smckusick #endif
40345749Smckusick 	if (swb->swb_block && (swb->swb_mask & (1 << ix)))
40445749Smckusick 		return(TRUE);
40545749Smckusick 	return(FALSE);
40645749Smckusick }
40745749Smckusick 
40845749Smckusick /*
40945749Smckusick  * Scaled down version of swap().
41045749Smckusick  * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
41145749Smckusick  * BOGUS:  lower level IO routines expect a KVA so we have to map our
41245749Smckusick  * provided physical page into the KVA to keep them happy.
41345749Smckusick  */
41445749Smckusick swap_pager_io(swp, m, flags)
41545749Smckusick 	register sw_pager_t swp;
41645749Smckusick 	vm_page_t m;
41745749Smckusick 	int flags;
41845749Smckusick {
41945749Smckusick 	register struct buf *bp;
42045749Smckusick 	register sw_blk_t swb;
42145749Smckusick 	register int s;
42245749Smckusick 	int ix;
42345749Smckusick 	boolean_t rv;
42445749Smckusick 	vm_offset_t kva, off;
42545749Smckusick 	swp_clean_t spc;
42645749Smckusick 
42745749Smckusick #ifdef DEBUG
42845749Smckusick 	/* save panic time state */
42945749Smckusick 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
43045749Smckusick 		return;
43145749Smckusick 	if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
43245749Smckusick 		printf("swpg_io(%x, %x, %x)\n", swp, m, flags);
43345749Smckusick #endif
43445749Smckusick 
43545749Smckusick 	/*
43645749Smckusick 	 * For reads (pageins) and synchronous writes, we clean up
43745749Smckusick 	 * all completed async pageouts and check to see if this
43845749Smckusick 	 * page is currently being cleaned.  If it is, we just wait
43945749Smckusick 	 * til the operation is done before continuing.
44045749Smckusick 	 */
44145749Smckusick 	if ((flags & B_ASYNC) == 0) {
44245749Smckusick 		s = splbio();
44345749Smckusick 		while (swap_pager_clean(m, flags&B_READ)) {
44445749Smckusick 			swp->sw_flags |= SW_WANTED;
44545749Smckusick 			assert_wait((int)swp);
44645749Smckusick 			thread_block();
44745749Smckusick 		}
44845749Smckusick 		splx(s);
44945749Smckusick 	}
45045749Smckusick 	/*
45145749Smckusick 	 * For async writes (pageouts), we cleanup completed pageouts so
45245749Smckusick 	 * that all available resources are freed.  Also tells us if this
45345749Smckusick 	 * page is already being cleaned.  If it is, or no resources
45445749Smckusick 	 * are available, we try again later.
45545749Smckusick 	 */
45645749Smckusick 	else if (swap_pager_clean(m, B_WRITE) || queue_empty(&swap_pager_free))
45745749Smckusick 		return(VM_PAGER_FAIL);
45845749Smckusick 
45945749Smckusick 	/*
46045749Smckusick 	 * Determine swap block and allocate as necessary.
46145749Smckusick 	 */
46245749Smckusick 	off = m->offset + m->object->paging_offset;
46345749Smckusick 	ix = off / dbtob(swp->sw_bsize);
46445749Smckusick 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
46545749Smckusick #ifdef DEBUG
46645749Smckusick 		if (swpagerdebug & SDB_FAIL)
46745749Smckusick 			printf("swpg_io: bad offset %x+%x(%d) in %x\n",
46845749Smckusick 			       m->offset, m->object->paging_offset,
46945749Smckusick 			       ix, swp->sw_blocks);
47045749Smckusick #endif
47145749Smckusick 		return(VM_PAGER_FAIL);
47245749Smckusick 	}
47345749Smckusick 	swb = &swp->sw_blocks[ix];
47445749Smckusick 	off = off % dbtob(swp->sw_bsize);
47545749Smckusick 	if (flags & B_READ) {
47645749Smckusick 		if (swb->swb_block == 0 ||
47745749Smckusick 		    (swb->swb_mask & (1 << atop(off))) == 0) {
47845749Smckusick #ifdef DEBUG
47945749Smckusick 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FAIL))
48045749Smckusick 				printf("swpg_io: %x bad read: blk %x+%x, mask %x, off %x+%x\n",
48145749Smckusick 				       swp->sw_blocks,
48245749Smckusick 				       swb->swb_block, atop(off),
48345749Smckusick 				       swb->swb_mask,
48445749Smckusick 				       m->offset, m->object->paging_offset);
48545749Smckusick #endif
48645749Smckusick 			/* XXX: should we zero page here?? */
48745749Smckusick 			return(VM_PAGER_FAIL);
48845749Smckusick 		}
48945749Smckusick 	} else if (swb->swb_block == 0) {
49045749Smckusick 		swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
49145749Smckusick 		if (swb->swb_block == 0) {
49245749Smckusick #ifdef DEBUG
49345749Smckusick 			if (swpagerdebug & SDB_FAIL)
49445749Smckusick 				printf("swpg_io: rmalloc of %x failed\n",
49545749Smckusick 				       swp->sw_bsize);
49645749Smckusick #endif
49745749Smckusick 			return(VM_PAGER_FAIL);
49845749Smckusick 		}
49945749Smckusick #ifdef DEBUG
50045749Smckusick 		if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
50145749Smckusick 			printf("swpg_io: %x alloc blk %x at ix %x\n",
50245749Smckusick 			       swp->sw_blocks, swb->swb_block, ix);
50345749Smckusick #endif
50445749Smckusick 	}
50545749Smckusick 
50645749Smckusick 	/*
50745749Smckusick 	 * Allocate a kernel virtual address and initialize so that PTE
50845749Smckusick 	 * is available for lower level IO drivers.
50945749Smckusick 	 */
51045749Smckusick 	kva = vm_pager_map_page(m);
51145749Smckusick 
51245749Smckusick 	/*
51345749Smckusick 	 * Get a swap buffer header and perform the IO
51445749Smckusick 	 */
51545749Smckusick 	s = splbio();
51645749Smckusick 	while (bswlist.av_forw == NULL) {
51745749Smckusick #ifdef DEBUG
51845749Smckusick 		if (swpagerdebug & SDB_ANOM)
51945749Smckusick 			printf("swpg_io: wait on swbuf for %x (%d)\n",
52045749Smckusick 			       m, flags);
52145749Smckusick #endif
52245749Smckusick 		bswlist.b_flags |= B_WANTED;
52345749Smckusick 		sleep((caddr_t)&bswlist, PSWP+1);
52445749Smckusick 	}
52545749Smckusick 	bp = bswlist.av_forw;
52645749Smckusick 	bswlist.av_forw = bp->av_forw;
52745749Smckusick 	splx(s);
52845749Smckusick 	bp->b_flags = B_BUSY | (flags & B_READ);
52945749Smckusick 	bp->b_proc = &proc[0];	/* XXX (but without B_PHYS set this is ok) */
53045749Smckusick 	bp->b_un.b_addr = (caddr_t)kva;
53145749Smckusick 	bp->b_blkno = swb->swb_block + btodb(off);
53245749Smckusick 	VHOLD(swapdev_vp);
53345749Smckusick 	bp->b_vp = swapdev_vp;
534*46985Smckusick 	if (swapdev_vp->v_type == VBLK)
535*46985Smckusick 		bp->b_dev = swapdev_vp->v_rdev;
53645749Smckusick 	bp->b_bcount = PAGE_SIZE;
53745749Smckusick 	if ((bp->b_flags & B_READ) == 0)
53845749Smckusick 		swapdev_vp->v_numoutput++;
53945749Smckusick 
54045749Smckusick 	/*
54145749Smckusick 	 * If this is an async write we set up additional buffer fields
54245749Smckusick 	 * and place a "cleaning" entry on the inuse queue.
54345749Smckusick 	 */
54445749Smckusick 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
54545749Smckusick #ifdef DEBUG
54645749Smckusick 		if (queue_empty(&swap_pager_free))
54745749Smckusick 			panic("swpg_io: lost spc");
54845749Smckusick #endif
54945749Smckusick 		queue_remove_first(&swap_pager_free,
55045749Smckusick 				   spc, swp_clean_t, spc_list);
55145749Smckusick #ifdef DEBUG
55245749Smckusick 		if (spc->spc_flags != SPC_FREE)
55345749Smckusick 			panic("swpg_io: bad free spc");
55445749Smckusick #endif
55545749Smckusick 		spc->spc_flags = SPC_BUSY;
55645749Smckusick 		spc->spc_bp = bp;
55745749Smckusick 		spc->spc_swp = swp;
55845749Smckusick 		spc->spc_kva = kva;
55945749Smckusick 		spc->spc_m = m;
56045749Smckusick #ifdef DEBUG
56145749Smckusick 		m->pagerowned = 1;
56245749Smckusick #endif
56345749Smckusick 		bp->b_flags |= B_CALL;
56445749Smckusick 		bp->b_iodone = swap_pager_iodone;
56545749Smckusick 		s = splbio();
56645749Smckusick 		swp->sw_poip++;
56745749Smckusick 		queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list);
56845749Smckusick 
56945749Smckusick #ifdef DEBUG
57045749Smckusick 		swap_pager_poip++;
57145749Smckusick 		if (swpagerdebug & SDB_WRITE)
57245749Smckusick 			printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n",
57345749Smckusick 			       bp, swp, spc, swp->sw_poip);
57445749Smckusick 		if ((swpagerdebug & SDB_ALLOCBLK) &&
57545749Smckusick 		    (swb->swb_mask & (1 << atop(off))) == 0)
57645749Smckusick 			printf("swpg_io: %x write blk %x+%x\n",
57745749Smckusick 			       swp->sw_blocks, swb->swb_block, atop(off));
57845749Smckusick #endif
57945749Smckusick 		swb->swb_mask |= (1 << atop(off));
58045749Smckusick 		/*
58145749Smckusick 		 * XXX: Block write faults til we are done.
58245749Smckusick 		 */
58345749Smckusick 		m->page_lock = VM_PROT_WRITE;
58445749Smckusick 		m->unlock_request = VM_PROT_ALL;
58545749Smckusick 		pmap_copy_on_write(VM_PAGE_TO_PHYS(m));
58645749Smckusick 		splx(s);
58745749Smckusick 	}
58845749Smckusick #ifdef DEBUG
58945749Smckusick 	if (swpagerdebug & SDB_IO)
59045749Smckusick 		printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
59145749Smckusick 		       bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
59245749Smckusick #endif
59345749Smckusick 	VOP_STRATEGY(bp);
59445749Smckusick 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
59545749Smckusick #ifdef DEBUG
59645749Smckusick 		if (swpagerdebug & SDB_IO)
59745749Smckusick 			printf("swpg_io:  IO started: bp %x\n", bp);
59845749Smckusick #endif
59945749Smckusick 		return(VM_PAGER_PEND);
60045749Smckusick 	}
60145749Smckusick 	s = splbio();
60245749Smckusick #ifdef DEBUG
60345749Smckusick 	if (flags & B_READ)
60445749Smckusick 		swap_pager_piip++;
60545749Smckusick 	else
60645749Smckusick 		swap_pager_poip++;
60745749Smckusick #endif
60845749Smckusick 	while ((bp->b_flags & B_DONE) == 0) {
60945749Smckusick 		assert_wait((int)bp);
61045749Smckusick 		thread_block();
61145749Smckusick 	}
61245749Smckusick #ifdef DEBUG
61345749Smckusick 	if (flags & B_READ)
61445749Smckusick 		--swap_pager_piip;
61545749Smckusick 	else
61645749Smckusick 		--swap_pager_poip;
61745749Smckusick #endif
61845749Smckusick 	rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK;
61945749Smckusick 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
62045749Smckusick 	bp->av_forw = bswlist.av_forw;
62145749Smckusick 	bswlist.av_forw = bp;
62245749Smckusick 	if (bp->b_vp)
62345749Smckusick 		brelvp(bp);
62445749Smckusick 	if (bswlist.b_flags & B_WANTED) {
62545749Smckusick 		bswlist.b_flags &= ~B_WANTED;
62645749Smckusick 		thread_wakeup((int)&bswlist);
62745749Smckusick 	}
62845749Smckusick 	if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
62945749Smckusick 		m->clean = 1;
63045749Smckusick 		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
63145749Smckusick 	}
63245749Smckusick 	splx(s);
63345749Smckusick #ifdef DEBUG
63445749Smckusick 	if (swpagerdebug & SDB_IO)
63545749Smckusick 		printf("swpg_io:  IO done: bp %x, rv %d\n", bp, rv);
63645749Smckusick 	if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_FAIL)
63745749Smckusick 		printf("swpg_io: IO error\n");
63845749Smckusick #endif
63945749Smckusick 	vm_pager_unmap_page(kva);
64045749Smckusick 	return(rv);
64145749Smckusick }
64245749Smckusick 
64345749Smckusick boolean_t
64445749Smckusick swap_pager_clean(m, rw)
64545749Smckusick 	vm_page_t m;
64645749Smckusick 	int rw;
64745749Smckusick {
64845749Smckusick 	register swp_clean_t spc, tspc;
64945749Smckusick 	register int s;
65045749Smckusick 
65145749Smckusick #ifdef DEBUG
65245749Smckusick 	/* save panic time state */
65345749Smckusick 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
65445749Smckusick 		return;
65545749Smckusick 	if (swpagerdebug & SDB_FOLLOW)
65645749Smckusick 		printf("swpg_clean(%x, %d)\n", m, rw);
65745749Smckusick #endif
65845749Smckusick 	tspc = SWP_CLEAN_NULL;
65945749Smckusick 	for (;;) {
66045749Smckusick 		/*
66145749Smckusick 		 * Look up and removal from inuse list must be done
66245749Smckusick 		 * at splbio() to avoid conflicts with swap_pager_iodone.
66345749Smckusick 		 */
66445749Smckusick 		s = splbio();
66545749Smckusick 		spc = (swp_clean_t) queue_first(&swap_pager_inuse);
66645749Smckusick 		while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
66745749Smckusick 			if ((spc->spc_flags & SPC_DONE) &&
66845749Smckusick 			    swap_pager_finish(spc)) {
66945749Smckusick 				queue_remove(&swap_pager_inuse, spc,
67045749Smckusick 					     swp_clean_t, spc_list);
67145749Smckusick 				break;
67245749Smckusick 			}
67345749Smckusick 			if (m && m == spc->spc_m) {
67445749Smckusick #ifdef DEBUG
67545749Smckusick 				if (swpagerdebug & SDB_ANOM)
67645749Smckusick 					printf("swpg_clean: %x on list, flags %x\n",
67745749Smckusick 					       m, spc->spc_flags);
67845749Smckusick #endif
67945749Smckusick 				tspc = spc;
68045749Smckusick 			}
68145749Smckusick 			spc = (swp_clean_t) queue_next(&spc->spc_list);
68245749Smckusick 		}
68345749Smckusick 
68445749Smckusick 		/*
68545749Smckusick 		 * No operations done, thats all we can do for now.
68645749Smckusick 		 */
68745749Smckusick 		if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
68845749Smckusick 			break;
68945749Smckusick 		splx(s);
69045749Smckusick 
69145749Smckusick 		/*
69245749Smckusick 		 * The desired page was found to be busy earlier in
69345749Smckusick 		 * the scan but has since completed.
69445749Smckusick 		 */
69545749Smckusick 		if (tspc && tspc == spc) {
69645749Smckusick #ifdef DEBUG
69745749Smckusick 			if (swpagerdebug & SDB_ANOM)
69845749Smckusick 				printf("swpg_clean: %x done while looking\n",
69945749Smckusick 				       m);
70045749Smckusick #endif
70145749Smckusick 			tspc = SWP_CLEAN_NULL;
70245749Smckusick 		}
70345749Smckusick 		spc->spc_flags = SPC_FREE;
70445749Smckusick 		vm_pager_unmap_page(spc->spc_kva);
70545749Smckusick 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
70645749Smckusick #ifdef DEBUG
70745749Smckusick 		if (swpagerdebug & SDB_WRITE)
70845749Smckusick 			printf("swpg_clean: free spc %x\n", spc);
70945749Smckusick #endif
71045749Smckusick 	}
71145749Smckusick 	/*
71245749Smckusick 	 * If we found that the desired page is already being cleaned
71345749Smckusick 	 * mark it so that swap_pager_iodone() will not set the clean
71445749Smckusick 	 * flag before the pageout daemon has another chance to clean it.
71545749Smckusick 	 */
71645749Smckusick 	if (tspc && rw == B_WRITE) {
71745749Smckusick #ifdef DEBUG
71845749Smckusick 		if (swpagerdebug & SDB_ANOM)
71945749Smckusick 			printf("swpg_clean: %x on clean list\n", tspc);
72045749Smckusick #endif
72145749Smckusick 		tspc->spc_flags |= SPC_DIRTY;
72245749Smckusick 	}
72345749Smckusick 	splx(s);
72445749Smckusick 
72545749Smckusick #ifdef DEBUG
72645749Smckusick 	if (swpagerdebug & SDB_WRITE)
72745749Smckusick 		printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE);
72845749Smckusick 	if ((swpagerdebug & SDB_ANOM) && tspc)
72945749Smckusick 		printf("swpg_clean: %s of cleaning page %x\n",
73045749Smckusick 		       rw == B_READ ? "get" : "put", m);
73145749Smckusick #endif
73245749Smckusick 	return(tspc ? TRUE : FALSE);
73345749Smckusick }
73445749Smckusick 
73545749Smckusick swap_pager_finish(spc)
73645749Smckusick 	register swp_clean_t spc;
73745749Smckusick {
73845749Smckusick 	vm_object_t object = spc->spc_m->object;
73945749Smckusick 
74045749Smckusick 	/*
74145749Smckusick 	 * Mark the paging operation as done.
74245749Smckusick 	 * (XXX) If we cannot get the lock, leave it til later.
74345749Smckusick 	 * (XXX) Also we are assuming that an async write is a
74445749Smckusick 	 *       pageout operation that has incremented the counter.
74545749Smckusick 	 */
74645749Smckusick 	if (!vm_object_lock_try(object))
74745749Smckusick 		return(0);
74845749Smckusick 
74945749Smckusick #ifdef DEBUG
75045749Smckusick 	spc->spc_m->pagerowned = 0;
75145749Smckusick #endif
75245749Smckusick 
75345749Smckusick 	if (--object->paging_in_progress == 0)
75445749Smckusick 		thread_wakeup((int) object);
75545749Smckusick 
75645749Smckusick 	/*
75745749Smckusick 	 * XXX: this isn't even close to the right thing to do,
75845749Smckusick 	 * introduces a variety of race conditions.
75945749Smckusick 	 *
76045749Smckusick 	 * If dirty, vm_pageout() has attempted to clean the page
76145749Smckusick 	 * again.  In this case we do not do anything as we will
76245749Smckusick 	 * see the page again shortly.  Otherwise, if no error mark
76345749Smckusick 	 * as clean and inform the pmap system.  If error, mark as
76445749Smckusick 	 * dirty so we will try again (XXX: could get stuck doing
76545749Smckusick 	 * this, should give up after awhile).
76645749Smckusick 	 */
76745749Smckusick 	if ((spc->spc_flags & SPC_DIRTY) == 0) {
76845749Smckusick 		if (spc->spc_flags & SPC_ERROR) {
76945749Smckusick 			printf("swap_pager: clean of %x failed\n",
77045749Smckusick 			       VM_PAGE_TO_PHYS(spc->spc_m));
77145749Smckusick 			spc->spc_m->laundry = TRUE;
77245749Smckusick 		} else {
77345749Smckusick 			spc->spc_m->clean = TRUE;
77445749Smckusick 			pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m));
77545749Smckusick 		}
77645749Smckusick 	}
77745749Smckusick 	/*
77845749Smckusick 	 * XXX: allow blocked write faults to continue
77945749Smckusick 	 */
78045749Smckusick 	spc->spc_m->page_lock = spc->spc_m->unlock_request = VM_PROT_NONE;
78145749Smckusick 	PAGE_WAKEUP(spc->spc_m);
78245749Smckusick 
78345749Smckusick 	vm_object_unlock(object);
78445749Smckusick 	return(1);
78545749Smckusick }
78645749Smckusick 
78745749Smckusick swap_pager_iodone(bp)
78845749Smckusick 	register struct buf *bp;
78945749Smckusick {
79045749Smckusick 	register swp_clean_t spc;
79145749Smckusick 	daddr_t blk;
79245749Smckusick 	int s;
79345749Smckusick 
79445749Smckusick #ifdef DEBUG
79545749Smckusick 	/* save panic time state */
79645749Smckusick 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
79745749Smckusick 		return;
79845749Smckusick 	if (swpagerdebug & SDB_FOLLOW)
79945749Smckusick 		printf("swpg_iodone(%x)\n", bp);
80045749Smckusick #endif
80145749Smckusick 	s = splbio();
80245749Smckusick 	spc = (swp_clean_t) queue_first(&swap_pager_inuse);
80345749Smckusick 	while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
80445749Smckusick 		if (spc->spc_bp == bp)
80545749Smckusick 			break;
80645749Smckusick 		spc = (swp_clean_t) queue_next(&spc->spc_list);
80745749Smckusick 	}
80845749Smckusick #ifdef DEBUG
80945749Smckusick 	if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
81045749Smckusick 		panic("swpg_iodone: bp not found");
81145749Smckusick #endif
81245749Smckusick 
81345749Smckusick 	spc->spc_flags &= ~SPC_BUSY;
81445749Smckusick 	spc->spc_flags |= SPC_DONE;
81545749Smckusick 	if (bp->b_flags & B_ERROR)
81645749Smckusick 		spc->spc_flags |= SPC_ERROR;
81745749Smckusick 	spc->spc_bp = NULL;
81845749Smckusick 	blk = bp->b_blkno;
81945749Smckusick 
82045749Smckusick #ifdef DEBUG
82145749Smckusick 	--swap_pager_poip;
82245749Smckusick 	if (swpagerdebug & SDB_WRITE)
82345749Smckusick 		printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
82445749Smckusick 		       bp, spc->spc_swp, spc->spc_swp->sw_flags,
82545749Smckusick 		       spc, spc->spc_swp->sw_poip);
82645749Smckusick #endif
82745749Smckusick 
82845749Smckusick 	spc->spc_swp->sw_poip--;
82945749Smckusick 	if (spc->spc_swp->sw_flags & SW_WANTED) {
83045749Smckusick 		spc->spc_swp->sw_flags &= ~SW_WANTED;
83145749Smckusick 		thread_wakeup((int)spc->spc_swp);
83245749Smckusick 	}
83345749Smckusick 
83445749Smckusick 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
83545749Smckusick 	bp->av_forw = bswlist.av_forw;
83645749Smckusick 	bswlist.av_forw = bp;
83745749Smckusick 	if (bp->b_vp)
83845749Smckusick 		brelvp(bp);
83945749Smckusick 	if (bswlist.b_flags & B_WANTED) {
84045749Smckusick 		bswlist.b_flags &= ~B_WANTED;
84145749Smckusick 		thread_wakeup((int)&bswlist);
84245749Smckusick 	}
84345749Smckusick #if 0
84445749Smckusick 	/*
84545749Smckusick 	 * XXX: this isn't even close to the right thing to do,
84645749Smckusick 	 * introduces a variety of race conditions.
84745749Smckusick 	 *
84845749Smckusick 	 * If dirty, vm_pageout() has attempted to clean the page
84945749Smckusick 	 * again.  In this case we do not do anything as we will
85045749Smckusick 	 * see the page again shortly.  Otherwise, if no error mark
85145749Smckusick 	 * as clean and inform the pmap system.  If error, mark as
85245749Smckusick 	 * dirty so we will try again (XXX: could get stuck doing
85345749Smckusick 	 * this, should give up after awhile).
85445749Smckusick 	 */
85545749Smckusick 	if ((spc->spc_flags & SPC_DIRTY) == 0) {
85645749Smckusick 		if (spc->spc_flags & SPC_ERROR) {
85745749Smckusick 			printf("swap_pager: clean of %x (block %x) failed\n",
85845749Smckusick 			       VM_PAGE_TO_PHYS(spc->spc_m), blk);
85945749Smckusick 			spc->spc_m->laundry = TRUE;
86045749Smckusick 		} else {
86145749Smckusick 			spc->spc_m->clean = TRUE;
86245749Smckusick 			pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m));
86345749Smckusick 		}
86445749Smckusick 	}
86545749Smckusick 	/*
86645749Smckusick 	 * XXX: allow blocked write faults to continue
86745749Smckusick 	 */
86845749Smckusick 	spc->spc_m->page_lock = spc->spc_m->unlock_request = VM_PROT_NONE;
86945749Smckusick 	PAGE_WAKEUP(spc->spc_m);
87045749Smckusick #endif
87145749Smckusick 
87245749Smckusick 	thread_wakeup((int) &vm_pages_needed);
87345749Smckusick 	splx(s);
87445749Smckusick }
87545749Smckusick #endif
876